162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2011-2012 Red Hat UK.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * This file is released under the GPL.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include "dm-thin-metadata.h"
962306a36Sopenharmony_ci#include "dm-bio-prison-v1.h"
1062306a36Sopenharmony_ci#include "dm.h"
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include <linux/device-mapper.h>
1362306a36Sopenharmony_ci#include <linux/dm-io.h>
1462306a36Sopenharmony_ci#include <linux/dm-kcopyd.h>
1562306a36Sopenharmony_ci#include <linux/jiffies.h>
1662306a36Sopenharmony_ci#include <linux/log2.h>
1762306a36Sopenharmony_ci#include <linux/list.h>
1862306a36Sopenharmony_ci#include <linux/rculist.h>
1962306a36Sopenharmony_ci#include <linux/init.h>
2062306a36Sopenharmony_ci#include <linux/module.h>
2162306a36Sopenharmony_ci#include <linux/slab.h>
2262306a36Sopenharmony_ci#include <linux/vmalloc.h>
2362306a36Sopenharmony_ci#include <linux/sort.h>
2462306a36Sopenharmony_ci#include <linux/rbtree.h>
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#define	DM_MSG_PREFIX	"thin"
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci/*
2962306a36Sopenharmony_ci * Tunable constants
3062306a36Sopenharmony_ci */
3162306a36Sopenharmony_ci#define ENDIO_HOOK_POOL_SIZE 1024
3262306a36Sopenharmony_ci#define MAPPING_POOL_SIZE 1024
3362306a36Sopenharmony_ci#define COMMIT_PERIOD HZ
3462306a36Sopenharmony_ci#define NO_SPACE_TIMEOUT_SECS 60
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_cistatic unsigned int no_space_timeout_secs = NO_SPACE_TIMEOUT_SECS;
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ciDECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
3962306a36Sopenharmony_ci		"A percentage of time allocated for copy on write");
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci/*
4262306a36Sopenharmony_ci * The block size of the device holding pool data must be
4362306a36Sopenharmony_ci * between 64KB and 1GB.
4462306a36Sopenharmony_ci */
4562306a36Sopenharmony_ci#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
4662306a36Sopenharmony_ci#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci/*
4962306a36Sopenharmony_ci * Device id is restricted to 24 bits.
5062306a36Sopenharmony_ci */
5162306a36Sopenharmony_ci#define MAX_DEV_ID ((1 << 24) - 1)
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci/*
5462306a36Sopenharmony_ci * How do we handle breaking sharing of data blocks?
5562306a36Sopenharmony_ci * =================================================
5662306a36Sopenharmony_ci *
5762306a36Sopenharmony_ci * We use a standard copy-on-write btree to store the mappings for the
5862306a36Sopenharmony_ci * devices (note I'm talking about copy-on-write of the metadata here, not
5962306a36Sopenharmony_ci * the data).  When you take an internal snapshot you clone the root node
6062306a36Sopenharmony_ci * of the origin btree.  After this there is no concept of an origin or a
6162306a36Sopenharmony_ci * snapshot.  They are just two device trees that happen to point to the
6262306a36Sopenharmony_ci * same data blocks.
6362306a36Sopenharmony_ci *
6462306a36Sopenharmony_ci * When we get a write in we decide if it's to a shared data block using
6562306a36Sopenharmony_ci * some timestamp magic.  If it is, we have to break sharing.
6662306a36Sopenharmony_ci *
6762306a36Sopenharmony_ci * Let's say we write to a shared block in what was the origin.  The
6862306a36Sopenharmony_ci * steps are:
6962306a36Sopenharmony_ci *
7062306a36Sopenharmony_ci * i) plug io further to this physical block. (see bio_prison code).
7162306a36Sopenharmony_ci *
7262306a36Sopenharmony_ci * ii) quiesce any read io to that shared data block.  Obviously
7362306a36Sopenharmony_ci * including all devices that share this block.  (see dm_deferred_set code)
7462306a36Sopenharmony_ci *
7562306a36Sopenharmony_ci * iii) copy the data block to a newly allocate block.  This step can be
7662306a36Sopenharmony_ci * missed out if the io covers the block. (schedule_copy).
7762306a36Sopenharmony_ci *
7862306a36Sopenharmony_ci * iv) insert the new mapping into the origin's btree
7962306a36Sopenharmony_ci * (process_prepared_mapping).  This act of inserting breaks some
8062306a36Sopenharmony_ci * sharing of btree nodes between the two devices.  Breaking sharing only
8162306a36Sopenharmony_ci * effects the btree of that specific device.  Btrees for the other
8262306a36Sopenharmony_ci * devices that share the block never change.  The btree for the origin
8362306a36Sopenharmony_ci * device as it was after the last commit is untouched, ie. we're using
8462306a36Sopenharmony_ci * persistent data structures in the functional programming sense.
8562306a36Sopenharmony_ci *
8662306a36Sopenharmony_ci * v) unplug io to this physical block, including the io that triggered
8762306a36Sopenharmony_ci * the breaking of sharing.
8862306a36Sopenharmony_ci *
8962306a36Sopenharmony_ci * Steps (ii) and (iii) occur in parallel.
9062306a36Sopenharmony_ci *
9162306a36Sopenharmony_ci * The metadata _doesn't_ need to be committed before the io continues.  We
9262306a36Sopenharmony_ci * get away with this because the io is always written to a _new_ block.
9362306a36Sopenharmony_ci * If there's a crash, then:
9462306a36Sopenharmony_ci *
9562306a36Sopenharmony_ci * - The origin mapping will point to the old origin block (the shared
9662306a36Sopenharmony_ci * one).  This will contain the data as it was before the io that triggered
9762306a36Sopenharmony_ci * the breaking of sharing came in.
9862306a36Sopenharmony_ci *
9962306a36Sopenharmony_ci * - The snap mapping still points to the old block.  As it would after
10062306a36Sopenharmony_ci * the commit.
10162306a36Sopenharmony_ci *
10262306a36Sopenharmony_ci * The downside of this scheme is the timestamp magic isn't perfect, and
10362306a36Sopenharmony_ci * will continue to think that data block in the snapshot device is shared
10462306a36Sopenharmony_ci * even after the write to the origin has broken sharing.  I suspect data
10562306a36Sopenharmony_ci * blocks will typically be shared by many different devices, so we're
10662306a36Sopenharmony_ci * breaking sharing n + 1 times, rather than n, where n is the number of
10762306a36Sopenharmony_ci * devices that reference this data block.  At the moment I think the
10862306a36Sopenharmony_ci * benefits far, far outweigh the disadvantages.
10962306a36Sopenharmony_ci */
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci/*----------------------------------------------------------------*/
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci/*
11462306a36Sopenharmony_ci * Key building.
11562306a36Sopenharmony_ci */
11662306a36Sopenharmony_cienum lock_space {
11762306a36Sopenharmony_ci	VIRTUAL,
11862306a36Sopenharmony_ci	PHYSICAL
11962306a36Sopenharmony_ci};
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_cistatic bool build_key(struct dm_thin_device *td, enum lock_space ls,
12262306a36Sopenharmony_ci		      dm_block_t b, dm_block_t e, struct dm_cell_key *key)
12362306a36Sopenharmony_ci{
12462306a36Sopenharmony_ci	key->virtual = (ls == VIRTUAL);
12562306a36Sopenharmony_ci	key->dev = dm_thin_dev_id(td);
12662306a36Sopenharmony_ci	key->block_begin = b;
12762306a36Sopenharmony_ci	key->block_end = e;
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	return dm_cell_key_has_valid_range(key);
13062306a36Sopenharmony_ci}
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_cistatic void build_data_key(struct dm_thin_device *td, dm_block_t b,
13362306a36Sopenharmony_ci			   struct dm_cell_key *key)
13462306a36Sopenharmony_ci{
13562306a36Sopenharmony_ci	(void) build_key(td, PHYSICAL, b, b + 1llu, key);
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_cistatic void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
13962306a36Sopenharmony_ci			      struct dm_cell_key *key)
14062306a36Sopenharmony_ci{
14162306a36Sopenharmony_ci	(void) build_key(td, VIRTUAL, b, b + 1llu, key);
14262306a36Sopenharmony_ci}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci/*----------------------------------------------------------------*/
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci#define THROTTLE_THRESHOLD (1 * HZ)
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_cistruct throttle {
14962306a36Sopenharmony_ci	struct rw_semaphore lock;
15062306a36Sopenharmony_ci	unsigned long threshold;
15162306a36Sopenharmony_ci	bool throttle_applied;
15262306a36Sopenharmony_ci};
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_cistatic void throttle_init(struct throttle *t)
15562306a36Sopenharmony_ci{
15662306a36Sopenharmony_ci	init_rwsem(&t->lock);
15762306a36Sopenharmony_ci	t->throttle_applied = false;
15862306a36Sopenharmony_ci}
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_cistatic void throttle_work_start(struct throttle *t)
16162306a36Sopenharmony_ci{
16262306a36Sopenharmony_ci	t->threshold = jiffies + THROTTLE_THRESHOLD;
16362306a36Sopenharmony_ci}
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_cistatic void throttle_work_update(struct throttle *t)
16662306a36Sopenharmony_ci{
16762306a36Sopenharmony_ci	if (!t->throttle_applied && time_is_before_jiffies(t->threshold)) {
16862306a36Sopenharmony_ci		down_write(&t->lock);
16962306a36Sopenharmony_ci		t->throttle_applied = true;
17062306a36Sopenharmony_ci	}
17162306a36Sopenharmony_ci}
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_cistatic void throttle_work_complete(struct throttle *t)
17462306a36Sopenharmony_ci{
17562306a36Sopenharmony_ci	if (t->throttle_applied) {
17662306a36Sopenharmony_ci		t->throttle_applied = false;
17762306a36Sopenharmony_ci		up_write(&t->lock);
17862306a36Sopenharmony_ci	}
17962306a36Sopenharmony_ci}
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_cistatic void throttle_lock(struct throttle *t)
18262306a36Sopenharmony_ci{
18362306a36Sopenharmony_ci	down_read(&t->lock);
18462306a36Sopenharmony_ci}
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_cistatic void throttle_unlock(struct throttle *t)
18762306a36Sopenharmony_ci{
18862306a36Sopenharmony_ci	up_read(&t->lock);
18962306a36Sopenharmony_ci}
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci/*----------------------------------------------------------------*/
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci/*
19462306a36Sopenharmony_ci * A pool device ties together a metadata device and a data device.  It
19562306a36Sopenharmony_ci * also provides the interface for creating and destroying internal
19662306a36Sopenharmony_ci * devices.
19762306a36Sopenharmony_ci */
19862306a36Sopenharmony_cistruct dm_thin_new_mapping;
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci/*
20162306a36Sopenharmony_ci * The pool runs in various modes.  Ordered in degraded order for comparisons.
20262306a36Sopenharmony_ci */
20362306a36Sopenharmony_cienum pool_mode {
20462306a36Sopenharmony_ci	PM_WRITE,		/* metadata may be changed */
20562306a36Sopenharmony_ci	PM_OUT_OF_DATA_SPACE,	/* metadata may be changed, though data may not be allocated */
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	/*
20862306a36Sopenharmony_ci	 * Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY.
20962306a36Sopenharmony_ci	 */
21062306a36Sopenharmony_ci	PM_OUT_OF_METADATA_SPACE,
21162306a36Sopenharmony_ci	PM_READ_ONLY,		/* metadata may not be changed */
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	PM_FAIL,		/* all I/O fails */
21462306a36Sopenharmony_ci};
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_cistruct pool_features {
21762306a36Sopenharmony_ci	enum pool_mode mode;
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	bool zero_new_blocks:1;
22062306a36Sopenharmony_ci	bool discard_enabled:1;
22162306a36Sopenharmony_ci	bool discard_passdown:1;
22262306a36Sopenharmony_ci	bool error_if_no_space:1;
22362306a36Sopenharmony_ci};
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_cistruct thin_c;
22662306a36Sopenharmony_citypedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio);
22762306a36Sopenharmony_citypedef void (*process_cell_fn)(struct thin_c *tc, struct dm_bio_prison_cell *cell);
22862306a36Sopenharmony_citypedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m);
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci#define CELL_SORT_ARRAY_SIZE 8192
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_cistruct pool {
23362306a36Sopenharmony_ci	struct list_head list;
23462306a36Sopenharmony_ci	struct dm_target *ti;	/* Only set if a pool target is bound */
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	struct mapped_device *pool_md;
23762306a36Sopenharmony_ci	struct block_device *data_dev;
23862306a36Sopenharmony_ci	struct block_device *md_dev;
23962306a36Sopenharmony_ci	struct dm_pool_metadata *pmd;
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	dm_block_t low_water_blocks;
24262306a36Sopenharmony_ci	uint32_t sectors_per_block;
24362306a36Sopenharmony_ci	int sectors_per_block_shift;
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	struct pool_features pf;
24662306a36Sopenharmony_ci	bool low_water_triggered:1;	/* A dm event has been sent */
24762306a36Sopenharmony_ci	bool suspended:1;
24862306a36Sopenharmony_ci	bool out_of_data_space:1;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	struct dm_bio_prison *prison;
25162306a36Sopenharmony_ci	struct dm_kcopyd_client *copier;
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	struct work_struct worker;
25462306a36Sopenharmony_ci	struct workqueue_struct *wq;
25562306a36Sopenharmony_ci	struct throttle throttle;
25662306a36Sopenharmony_ci	struct delayed_work waker;
25762306a36Sopenharmony_ci	struct delayed_work no_space_timeout;
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	unsigned long last_commit_jiffies;
26062306a36Sopenharmony_ci	unsigned int ref_count;
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	spinlock_t lock;
26362306a36Sopenharmony_ci	struct bio_list deferred_flush_bios;
26462306a36Sopenharmony_ci	struct bio_list deferred_flush_completions;
26562306a36Sopenharmony_ci	struct list_head prepared_mappings;
26662306a36Sopenharmony_ci	struct list_head prepared_discards;
26762306a36Sopenharmony_ci	struct list_head prepared_discards_pt2;
26862306a36Sopenharmony_ci	struct list_head active_thins;
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	struct dm_deferred_set *shared_read_ds;
27162306a36Sopenharmony_ci	struct dm_deferred_set *all_io_ds;
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	struct dm_thin_new_mapping *next_mapping;
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	process_bio_fn process_bio;
27662306a36Sopenharmony_ci	process_bio_fn process_discard;
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci	process_cell_fn process_cell;
27962306a36Sopenharmony_ci	process_cell_fn process_discard_cell;
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	process_mapping_fn process_prepared_mapping;
28262306a36Sopenharmony_ci	process_mapping_fn process_prepared_discard;
28362306a36Sopenharmony_ci	process_mapping_fn process_prepared_discard_pt2;
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci	struct dm_bio_prison_cell **cell_sort_array;
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	mempool_t mapping_pool;
28862306a36Sopenharmony_ci};
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_cistatic void metadata_operation_failed(struct pool *pool, const char *op, int r);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_cistatic enum pool_mode get_pool_mode(struct pool *pool)
29362306a36Sopenharmony_ci{
29462306a36Sopenharmony_ci	return pool->pf.mode;
29562306a36Sopenharmony_ci}
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_cistatic void notify_of_pool_mode_change(struct pool *pool)
29862306a36Sopenharmony_ci{
29962306a36Sopenharmony_ci	static const char *descs[] = {
30062306a36Sopenharmony_ci		"write",
30162306a36Sopenharmony_ci		"out-of-data-space",
30262306a36Sopenharmony_ci		"read-only",
30362306a36Sopenharmony_ci		"read-only",
30462306a36Sopenharmony_ci		"fail"
30562306a36Sopenharmony_ci	};
30662306a36Sopenharmony_ci	const char *extra_desc = NULL;
30762306a36Sopenharmony_ci	enum pool_mode mode = get_pool_mode(pool);
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	if (mode == PM_OUT_OF_DATA_SPACE) {
31062306a36Sopenharmony_ci		if (!pool->pf.error_if_no_space)
31162306a36Sopenharmony_ci			extra_desc = " (queue IO)";
31262306a36Sopenharmony_ci		else
31362306a36Sopenharmony_ci			extra_desc = " (error IO)";
31462306a36Sopenharmony_ci	}
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	dm_table_event(pool->ti->table);
31762306a36Sopenharmony_ci	DMINFO("%s: switching pool to %s%s mode",
31862306a36Sopenharmony_ci	       dm_device_name(pool->pool_md),
31962306a36Sopenharmony_ci	       descs[(int)mode], extra_desc ? : "");
32062306a36Sopenharmony_ci}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci/*
32362306a36Sopenharmony_ci * Target context for a pool.
32462306a36Sopenharmony_ci */
32562306a36Sopenharmony_cistruct pool_c {
32662306a36Sopenharmony_ci	struct dm_target *ti;
32762306a36Sopenharmony_ci	struct pool *pool;
32862306a36Sopenharmony_ci	struct dm_dev *data_dev;
32962306a36Sopenharmony_ci	struct dm_dev *metadata_dev;
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	dm_block_t low_water_blocks;
33262306a36Sopenharmony_ci	struct pool_features requested_pf; /* Features requested during table load */
33362306a36Sopenharmony_ci	struct pool_features adjusted_pf;  /* Features used after adjusting for constituent devices */
33462306a36Sopenharmony_ci};
33562306a36Sopenharmony_ci
33662306a36Sopenharmony_ci/*
33762306a36Sopenharmony_ci * Target context for a thin.
33862306a36Sopenharmony_ci */
33962306a36Sopenharmony_cistruct thin_c {
34062306a36Sopenharmony_ci	struct list_head list;
34162306a36Sopenharmony_ci	struct dm_dev *pool_dev;
34262306a36Sopenharmony_ci	struct dm_dev *origin_dev;
34362306a36Sopenharmony_ci	sector_t origin_size;
34462306a36Sopenharmony_ci	dm_thin_id dev_id;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	struct pool *pool;
34762306a36Sopenharmony_ci	struct dm_thin_device *td;
34862306a36Sopenharmony_ci	struct mapped_device *thin_md;
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci	bool requeue_mode:1;
35162306a36Sopenharmony_ci	spinlock_t lock;
35262306a36Sopenharmony_ci	struct list_head deferred_cells;
35362306a36Sopenharmony_ci	struct bio_list deferred_bio_list;
35462306a36Sopenharmony_ci	struct bio_list retry_on_resume_list;
35562306a36Sopenharmony_ci	struct rb_root sort_bio_list; /* sorted list of deferred bios */
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	/*
35862306a36Sopenharmony_ci	 * Ensures the thin is not destroyed until the worker has finished
35962306a36Sopenharmony_ci	 * iterating the active_thins list.
36062306a36Sopenharmony_ci	 */
36162306a36Sopenharmony_ci	refcount_t refcount;
36262306a36Sopenharmony_ci	struct completion can_destroy;
36362306a36Sopenharmony_ci};
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci/*----------------------------------------------------------------*/
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_cistatic bool block_size_is_power_of_two(struct pool *pool)
36862306a36Sopenharmony_ci{
36962306a36Sopenharmony_ci	return pool->sectors_per_block_shift >= 0;
37062306a36Sopenharmony_ci}
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_cistatic sector_t block_to_sectors(struct pool *pool, dm_block_t b)
37362306a36Sopenharmony_ci{
37462306a36Sopenharmony_ci	return block_size_is_power_of_two(pool) ?
37562306a36Sopenharmony_ci		(b << pool->sectors_per_block_shift) :
37662306a36Sopenharmony_ci		(b * pool->sectors_per_block);
37762306a36Sopenharmony_ci}
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci/*----------------------------------------------------------------*/
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_cistruct discard_op {
38262306a36Sopenharmony_ci	struct thin_c *tc;
38362306a36Sopenharmony_ci	struct blk_plug plug;
38462306a36Sopenharmony_ci	struct bio *parent_bio;
38562306a36Sopenharmony_ci	struct bio *bio;
38662306a36Sopenharmony_ci};
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_cistatic void begin_discard(struct discard_op *op, struct thin_c *tc, struct bio *parent)
38962306a36Sopenharmony_ci{
39062306a36Sopenharmony_ci	BUG_ON(!parent);
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	op->tc = tc;
39362306a36Sopenharmony_ci	blk_start_plug(&op->plug);
39462306a36Sopenharmony_ci	op->parent_bio = parent;
39562306a36Sopenharmony_ci	op->bio = NULL;
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_cistatic int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t data_e)
39962306a36Sopenharmony_ci{
40062306a36Sopenharmony_ci	struct thin_c *tc = op->tc;
40162306a36Sopenharmony_ci	sector_t s = block_to_sectors(tc->pool, data_b);
40262306a36Sopenharmony_ci	sector_t len = block_to_sectors(tc->pool, data_e - data_b);
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOIO, &op->bio);
40562306a36Sopenharmony_ci}
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_cistatic void end_discard(struct discard_op *op, int r)
40862306a36Sopenharmony_ci{
40962306a36Sopenharmony_ci	if (op->bio) {
41062306a36Sopenharmony_ci		/*
41162306a36Sopenharmony_ci		 * Even if one of the calls to issue_discard failed, we
41262306a36Sopenharmony_ci		 * need to wait for the chain to complete.
41362306a36Sopenharmony_ci		 */
41462306a36Sopenharmony_ci		bio_chain(op->bio, op->parent_bio);
41562306a36Sopenharmony_ci		op->bio->bi_opf = REQ_OP_DISCARD;
41662306a36Sopenharmony_ci		submit_bio(op->bio);
41762306a36Sopenharmony_ci	}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	blk_finish_plug(&op->plug);
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	/*
42262306a36Sopenharmony_ci	 * Even if r is set, there could be sub discards in flight that we
42362306a36Sopenharmony_ci	 * need to wait for.
42462306a36Sopenharmony_ci	 */
42562306a36Sopenharmony_ci	if (r && !op->parent_bio->bi_status)
42662306a36Sopenharmony_ci		op->parent_bio->bi_status = errno_to_blk_status(r);
42762306a36Sopenharmony_ci	bio_endio(op->parent_bio);
42862306a36Sopenharmony_ci}
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci/*----------------------------------------------------------------*/
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci/*
43362306a36Sopenharmony_ci * wake_worker() is used when new work is queued and when pool_resume is
43462306a36Sopenharmony_ci * ready to continue deferred IO processing.
43562306a36Sopenharmony_ci */
43662306a36Sopenharmony_cistatic void wake_worker(struct pool *pool)
43762306a36Sopenharmony_ci{
43862306a36Sopenharmony_ci	queue_work(pool->wq, &pool->worker);
43962306a36Sopenharmony_ci}
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci/*----------------------------------------------------------------*/
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_cistatic int bio_detain(struct pool *pool, struct dm_cell_key *key, struct bio *bio,
44462306a36Sopenharmony_ci		      struct dm_bio_prison_cell **cell_result)
44562306a36Sopenharmony_ci{
44662306a36Sopenharmony_ci	int r;
44762306a36Sopenharmony_ci	struct dm_bio_prison_cell *cell_prealloc;
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci	/*
45062306a36Sopenharmony_ci	 * Allocate a cell from the prison's mempool.
45162306a36Sopenharmony_ci	 * This might block but it can't fail.
45262306a36Sopenharmony_ci	 */
45362306a36Sopenharmony_ci	cell_prealloc = dm_bio_prison_alloc_cell(pool->prison, GFP_NOIO);
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	r = dm_bio_detain(pool->prison, key, bio, cell_prealloc, cell_result);
45662306a36Sopenharmony_ci	if (r)
45762306a36Sopenharmony_ci		/*
45862306a36Sopenharmony_ci		 * We reused an old cell; we can get rid of
45962306a36Sopenharmony_ci		 * the new one.
46062306a36Sopenharmony_ci		 */
46162306a36Sopenharmony_ci		dm_bio_prison_free_cell(pool->prison, cell_prealloc);
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ci	return r;
46462306a36Sopenharmony_ci}
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_cistatic void cell_release(struct pool *pool,
46762306a36Sopenharmony_ci			 struct dm_bio_prison_cell *cell,
46862306a36Sopenharmony_ci			 struct bio_list *bios)
46962306a36Sopenharmony_ci{
47062306a36Sopenharmony_ci	dm_cell_release(pool->prison, cell, bios);
47162306a36Sopenharmony_ci	dm_bio_prison_free_cell(pool->prison, cell);
47262306a36Sopenharmony_ci}
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_cistatic void cell_visit_release(struct pool *pool,
47562306a36Sopenharmony_ci			       void (*fn)(void *, struct dm_bio_prison_cell *),
47662306a36Sopenharmony_ci			       void *context,
47762306a36Sopenharmony_ci			       struct dm_bio_prison_cell *cell)
47862306a36Sopenharmony_ci{
47962306a36Sopenharmony_ci	dm_cell_visit_release(pool->prison, fn, context, cell);
48062306a36Sopenharmony_ci	dm_bio_prison_free_cell(pool->prison, cell);
48162306a36Sopenharmony_ci}
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_cistatic void cell_release_no_holder(struct pool *pool,
48462306a36Sopenharmony_ci				   struct dm_bio_prison_cell *cell,
48562306a36Sopenharmony_ci				   struct bio_list *bios)
48662306a36Sopenharmony_ci{
48762306a36Sopenharmony_ci	dm_cell_release_no_holder(pool->prison, cell, bios);
48862306a36Sopenharmony_ci	dm_bio_prison_free_cell(pool->prison, cell);
48962306a36Sopenharmony_ci}
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_cistatic void cell_error_with_code(struct pool *pool,
49262306a36Sopenharmony_ci		struct dm_bio_prison_cell *cell, blk_status_t error_code)
49362306a36Sopenharmony_ci{
49462306a36Sopenharmony_ci	dm_cell_error(pool->prison, cell, error_code);
49562306a36Sopenharmony_ci	dm_bio_prison_free_cell(pool->prison, cell);
49662306a36Sopenharmony_ci}
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_cistatic blk_status_t get_pool_io_error_code(struct pool *pool)
49962306a36Sopenharmony_ci{
50062306a36Sopenharmony_ci	return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
50162306a36Sopenharmony_ci}
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_cistatic void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
50462306a36Sopenharmony_ci{
50562306a36Sopenharmony_ci	cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
50662306a36Sopenharmony_ci}
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_cistatic void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
50962306a36Sopenharmony_ci{
51062306a36Sopenharmony_ci	cell_error_with_code(pool, cell, 0);
51162306a36Sopenharmony_ci}
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_cistatic void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
51462306a36Sopenharmony_ci{
51562306a36Sopenharmony_ci	cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
51662306a36Sopenharmony_ci}
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci/*----------------------------------------------------------------*/
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci/*
52162306a36Sopenharmony_ci * A global list of pools that uses a struct mapped_device as a key.
52262306a36Sopenharmony_ci */
52362306a36Sopenharmony_cistatic struct dm_thin_pool_table {
52462306a36Sopenharmony_ci	struct mutex mutex;
52562306a36Sopenharmony_ci	struct list_head pools;
52662306a36Sopenharmony_ci} dm_thin_pool_table;
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_cistatic void pool_table_init(void)
52962306a36Sopenharmony_ci{
53062306a36Sopenharmony_ci	mutex_init(&dm_thin_pool_table.mutex);
53162306a36Sopenharmony_ci	INIT_LIST_HEAD(&dm_thin_pool_table.pools);
53262306a36Sopenharmony_ci}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_cistatic void pool_table_exit(void)
53562306a36Sopenharmony_ci{
53662306a36Sopenharmony_ci	mutex_destroy(&dm_thin_pool_table.mutex);
53762306a36Sopenharmony_ci}
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_cistatic void __pool_table_insert(struct pool *pool)
54062306a36Sopenharmony_ci{
54162306a36Sopenharmony_ci	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
54262306a36Sopenharmony_ci	list_add(&pool->list, &dm_thin_pool_table.pools);
54362306a36Sopenharmony_ci}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_cistatic void __pool_table_remove(struct pool *pool)
54662306a36Sopenharmony_ci{
54762306a36Sopenharmony_ci	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
54862306a36Sopenharmony_ci	list_del(&pool->list);
54962306a36Sopenharmony_ci}
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_cistatic struct pool *__pool_table_lookup(struct mapped_device *md)
55262306a36Sopenharmony_ci{
55362306a36Sopenharmony_ci	struct pool *pool = NULL, *tmp;
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
55862306a36Sopenharmony_ci		if (tmp->pool_md == md) {
55962306a36Sopenharmony_ci			pool = tmp;
56062306a36Sopenharmony_ci			break;
56162306a36Sopenharmony_ci		}
56262306a36Sopenharmony_ci	}
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	return pool;
56562306a36Sopenharmony_ci}
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_cistatic struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev)
56862306a36Sopenharmony_ci{
56962306a36Sopenharmony_ci	struct pool *pool = NULL, *tmp;
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
57462306a36Sopenharmony_ci		if (tmp->md_dev == md_dev) {
57562306a36Sopenharmony_ci			pool = tmp;
57662306a36Sopenharmony_ci			break;
57762306a36Sopenharmony_ci		}
57862306a36Sopenharmony_ci	}
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci	return pool;
58162306a36Sopenharmony_ci}
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci/*----------------------------------------------------------------*/
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_cistruct dm_thin_endio_hook {
58662306a36Sopenharmony_ci	struct thin_c *tc;
58762306a36Sopenharmony_ci	struct dm_deferred_entry *shared_read_entry;
58862306a36Sopenharmony_ci	struct dm_deferred_entry *all_io_entry;
58962306a36Sopenharmony_ci	struct dm_thin_new_mapping *overwrite_mapping;
59062306a36Sopenharmony_ci	struct rb_node rb_node;
59162306a36Sopenharmony_ci	struct dm_bio_prison_cell *cell;
59262306a36Sopenharmony_ci};
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_cistatic void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
59562306a36Sopenharmony_ci{
59662306a36Sopenharmony_ci	bio_list_merge(bios, master);
59762306a36Sopenharmony_ci	bio_list_init(master);
59862306a36Sopenharmony_ci}
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_cistatic void error_bio_list(struct bio_list *bios, blk_status_t error)
60162306a36Sopenharmony_ci{
60262306a36Sopenharmony_ci	struct bio *bio;
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci	while ((bio = bio_list_pop(bios))) {
60562306a36Sopenharmony_ci		bio->bi_status = error;
60662306a36Sopenharmony_ci		bio_endio(bio);
60762306a36Sopenharmony_ci	}
60862306a36Sopenharmony_ci}
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_cistatic void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
61162306a36Sopenharmony_ci		blk_status_t error)
61262306a36Sopenharmony_ci{
61362306a36Sopenharmony_ci	struct bio_list bios;
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	bio_list_init(&bios);
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	spin_lock_irq(&tc->lock);
61862306a36Sopenharmony_ci	__merge_bio_list(&bios, master);
61962306a36Sopenharmony_ci	spin_unlock_irq(&tc->lock);
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	error_bio_list(&bios, error);
62262306a36Sopenharmony_ci}
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_cistatic void requeue_deferred_cells(struct thin_c *tc)
62562306a36Sopenharmony_ci{
62662306a36Sopenharmony_ci	struct pool *pool = tc->pool;
62762306a36Sopenharmony_ci	struct list_head cells;
62862306a36Sopenharmony_ci	struct dm_bio_prison_cell *cell, *tmp;
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci	INIT_LIST_HEAD(&cells);
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci	spin_lock_irq(&tc->lock);
63362306a36Sopenharmony_ci	list_splice_init(&tc->deferred_cells, &cells);
63462306a36Sopenharmony_ci	spin_unlock_irq(&tc->lock);
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	list_for_each_entry_safe(cell, tmp, &cells, user_list)
63762306a36Sopenharmony_ci		cell_requeue(pool, cell);
63862306a36Sopenharmony_ci}
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_cistatic void requeue_io(struct thin_c *tc)
64162306a36Sopenharmony_ci{
64262306a36Sopenharmony_ci	struct bio_list bios;
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci	bio_list_init(&bios);
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci	spin_lock_irq(&tc->lock);
64762306a36Sopenharmony_ci	__merge_bio_list(&bios, &tc->deferred_bio_list);
64862306a36Sopenharmony_ci	__merge_bio_list(&bios, &tc->retry_on_resume_list);
64962306a36Sopenharmony_ci	spin_unlock_irq(&tc->lock);
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci	error_bio_list(&bios, BLK_STS_DM_REQUEUE);
65262306a36Sopenharmony_ci	requeue_deferred_cells(tc);
65362306a36Sopenharmony_ci}
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_cistatic void error_retry_list_with_code(struct pool *pool, blk_status_t error)
65662306a36Sopenharmony_ci{
65762306a36Sopenharmony_ci	struct thin_c *tc;
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci	rcu_read_lock();
66062306a36Sopenharmony_ci	list_for_each_entry_rcu(tc, &pool->active_thins, list)
66162306a36Sopenharmony_ci		error_thin_bio_list(tc, &tc->retry_on_resume_list, error);
66262306a36Sopenharmony_ci	rcu_read_unlock();
66362306a36Sopenharmony_ci}
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_cistatic void error_retry_list(struct pool *pool)
66662306a36Sopenharmony_ci{
66762306a36Sopenharmony_ci	error_retry_list_with_code(pool, get_pool_io_error_code(pool));
66862306a36Sopenharmony_ci}
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci/*
67162306a36Sopenharmony_ci * This section of code contains the logic for processing a thin device's IO.
67262306a36Sopenharmony_ci * Much of the code depends on pool object resources (lists, workqueues, etc)
67362306a36Sopenharmony_ci * but most is exclusively called from the thin target rather than the thin-pool
67462306a36Sopenharmony_ci * target.
67562306a36Sopenharmony_ci */
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_cistatic dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
67862306a36Sopenharmony_ci{
67962306a36Sopenharmony_ci	struct pool *pool = tc->pool;
68062306a36Sopenharmony_ci	sector_t block_nr = bio->bi_iter.bi_sector;
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci	if (block_size_is_power_of_two(pool))
68362306a36Sopenharmony_ci		block_nr >>= pool->sectors_per_block_shift;
68462306a36Sopenharmony_ci	else
68562306a36Sopenharmony_ci		(void) sector_div(block_nr, pool->sectors_per_block);
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci	return block_nr;
68862306a36Sopenharmony_ci}
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_ci/*
69162306a36Sopenharmony_ci * Returns the _complete_ blocks that this bio covers.
69262306a36Sopenharmony_ci */
69362306a36Sopenharmony_cistatic void get_bio_block_range(struct thin_c *tc, struct bio *bio,
69462306a36Sopenharmony_ci				dm_block_t *begin, dm_block_t *end)
69562306a36Sopenharmony_ci{
69662306a36Sopenharmony_ci	struct pool *pool = tc->pool;
69762306a36Sopenharmony_ci	sector_t b = bio->bi_iter.bi_sector;
69862306a36Sopenharmony_ci	sector_t e = b + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ci	b += pool->sectors_per_block - 1ull; /* so we round up */
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ci	if (block_size_is_power_of_two(pool)) {
70362306a36Sopenharmony_ci		b >>= pool->sectors_per_block_shift;
70462306a36Sopenharmony_ci		e >>= pool->sectors_per_block_shift;
70562306a36Sopenharmony_ci	} else {
70662306a36Sopenharmony_ci		(void) sector_div(b, pool->sectors_per_block);
70762306a36Sopenharmony_ci		(void) sector_div(e, pool->sectors_per_block);
70862306a36Sopenharmony_ci	}
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci	if (e < b)
71162306a36Sopenharmony_ci		/* Can happen if the bio is within a single block. */
71262306a36Sopenharmony_ci		e = b;
71362306a36Sopenharmony_ci
71462306a36Sopenharmony_ci	*begin = b;
71562306a36Sopenharmony_ci	*end = e;
71662306a36Sopenharmony_ci}
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_cistatic void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
71962306a36Sopenharmony_ci{
72062306a36Sopenharmony_ci	struct pool *pool = tc->pool;
72162306a36Sopenharmony_ci	sector_t bi_sector = bio->bi_iter.bi_sector;
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci	bio_set_dev(bio, tc->pool_dev->bdev);
72462306a36Sopenharmony_ci	if (block_size_is_power_of_two(pool))
72562306a36Sopenharmony_ci		bio->bi_iter.bi_sector =
72662306a36Sopenharmony_ci			(block << pool->sectors_per_block_shift) |
72762306a36Sopenharmony_ci			(bi_sector & (pool->sectors_per_block - 1));
72862306a36Sopenharmony_ci	else
72962306a36Sopenharmony_ci		bio->bi_iter.bi_sector = (block * pool->sectors_per_block) +
73062306a36Sopenharmony_ci				 sector_div(bi_sector, pool->sectors_per_block);
73162306a36Sopenharmony_ci}
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_cistatic void remap_to_origin(struct thin_c *tc, struct bio *bio)
73462306a36Sopenharmony_ci{
73562306a36Sopenharmony_ci	bio_set_dev(bio, tc->origin_dev->bdev);
73662306a36Sopenharmony_ci}
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_cistatic int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
73962306a36Sopenharmony_ci{
74062306a36Sopenharmony_ci	return op_is_flush(bio->bi_opf) &&
74162306a36Sopenharmony_ci		dm_thin_changed_this_transaction(tc->td);
74262306a36Sopenharmony_ci}
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_cistatic void inc_all_io_entry(struct pool *pool, struct bio *bio)
74562306a36Sopenharmony_ci{
74662306a36Sopenharmony_ci	struct dm_thin_endio_hook *h;
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_ci	if (bio_op(bio) == REQ_OP_DISCARD)
74962306a36Sopenharmony_ci		return;
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci	h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
75262306a36Sopenharmony_ci	h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds);
75362306a36Sopenharmony_ci}
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_cistatic void issue(struct thin_c *tc, struct bio *bio)
75662306a36Sopenharmony_ci{
75762306a36Sopenharmony_ci	struct pool *pool = tc->pool;
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	if (!bio_triggers_commit(tc, bio)) {
76062306a36Sopenharmony_ci		dm_submit_bio_remap(bio, NULL);
76162306a36Sopenharmony_ci		return;
76262306a36Sopenharmony_ci	}
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci	/*
76562306a36Sopenharmony_ci	 * Complete bio with an error if earlier I/O caused changes to
76662306a36Sopenharmony_ci	 * the metadata that can't be committed e.g, due to I/O errors
76762306a36Sopenharmony_ci	 * on the metadata device.
76862306a36Sopenharmony_ci	 */
76962306a36Sopenharmony_ci	if (dm_thin_aborted_changes(tc->td)) {
77062306a36Sopenharmony_ci		bio_io_error(bio);
77162306a36Sopenharmony_ci		return;
77262306a36Sopenharmony_ci	}
77362306a36Sopenharmony_ci
77462306a36Sopenharmony_ci	/*
77562306a36Sopenharmony_ci	 * Batch together any bios that trigger commits and then issue a
77662306a36Sopenharmony_ci	 * single commit for them in process_deferred_bios().
77762306a36Sopenharmony_ci	 */
77862306a36Sopenharmony_ci	spin_lock_irq(&pool->lock);
77962306a36Sopenharmony_ci	bio_list_add(&pool->deferred_flush_bios, bio);
78062306a36Sopenharmony_ci	spin_unlock_irq(&pool->lock);
78162306a36Sopenharmony_ci}
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_cistatic void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio)
78462306a36Sopenharmony_ci{
78562306a36Sopenharmony_ci	remap_to_origin(tc, bio);
78662306a36Sopenharmony_ci	issue(tc, bio);
78762306a36Sopenharmony_ci}
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_cistatic void remap_and_issue(struct thin_c *tc, struct bio *bio,
79062306a36Sopenharmony_ci			    dm_block_t block)
79162306a36Sopenharmony_ci{
79262306a36Sopenharmony_ci	remap(tc, bio, block);
79362306a36Sopenharmony_ci	issue(tc, bio);
79462306a36Sopenharmony_ci}
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci/*----------------------------------------------------------------*/
79762306a36Sopenharmony_ci
79862306a36Sopenharmony_ci/*
79962306a36Sopenharmony_ci * Bio endio functions.
80062306a36Sopenharmony_ci */
80162306a36Sopenharmony_cistruct dm_thin_new_mapping {
80262306a36Sopenharmony_ci	struct list_head list;
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci	bool pass_discard:1;
80562306a36Sopenharmony_ci	bool maybe_shared:1;
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci	/*
80862306a36Sopenharmony_ci	 * Track quiescing, copying and zeroing preparation actions.  When this
80962306a36Sopenharmony_ci	 * counter hits zero the block is prepared and can be inserted into the
81062306a36Sopenharmony_ci	 * btree.
81162306a36Sopenharmony_ci	 */
81262306a36Sopenharmony_ci	atomic_t prepare_actions;
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_ci	blk_status_t status;
81562306a36Sopenharmony_ci	struct thin_c *tc;
81662306a36Sopenharmony_ci	dm_block_t virt_begin, virt_end;
81762306a36Sopenharmony_ci	dm_block_t data_block;
81862306a36Sopenharmony_ci	struct dm_bio_prison_cell *cell;
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	/*
82162306a36Sopenharmony_ci	 * If the bio covers the whole area of a block then we can avoid
82262306a36Sopenharmony_ci	 * zeroing or copying.  Instead this bio is hooked.  The bio will
82362306a36Sopenharmony_ci	 * still be in the cell, so care has to be taken to avoid issuing
82462306a36Sopenharmony_ci	 * the bio twice.
82562306a36Sopenharmony_ci	 */
82662306a36Sopenharmony_ci	struct bio *bio;
82762306a36Sopenharmony_ci	bio_end_io_t *saved_bi_end_io;
82862306a36Sopenharmony_ci};
82962306a36Sopenharmony_ci
83062306a36Sopenharmony_cistatic void __complete_mapping_preparation(struct dm_thin_new_mapping *m)
83162306a36Sopenharmony_ci{
83262306a36Sopenharmony_ci	struct pool *pool = m->tc->pool;
83362306a36Sopenharmony_ci
83462306a36Sopenharmony_ci	if (atomic_dec_and_test(&m->prepare_actions)) {
83562306a36Sopenharmony_ci		list_add_tail(&m->list, &pool->prepared_mappings);
83662306a36Sopenharmony_ci		wake_worker(pool);
83762306a36Sopenharmony_ci	}
83862306a36Sopenharmony_ci}
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_cistatic void complete_mapping_preparation(struct dm_thin_new_mapping *m)
84162306a36Sopenharmony_ci{
84262306a36Sopenharmony_ci	unsigned long flags;
84362306a36Sopenharmony_ci	struct pool *pool = m->tc->pool;
84462306a36Sopenharmony_ci
84562306a36Sopenharmony_ci	spin_lock_irqsave(&pool->lock, flags);
84662306a36Sopenharmony_ci	__complete_mapping_preparation(m);
84762306a36Sopenharmony_ci	spin_unlock_irqrestore(&pool->lock, flags);
84862306a36Sopenharmony_ci}
84962306a36Sopenharmony_ci
85062306a36Sopenharmony_cistatic void copy_complete(int read_err, unsigned long write_err, void *context)
85162306a36Sopenharmony_ci{
85262306a36Sopenharmony_ci	struct dm_thin_new_mapping *m = context;
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_ci	m->status = read_err || write_err ? BLK_STS_IOERR : 0;
85562306a36Sopenharmony_ci	complete_mapping_preparation(m);
85662306a36Sopenharmony_ci}
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_cistatic void overwrite_endio(struct bio *bio)
85962306a36Sopenharmony_ci{
86062306a36Sopenharmony_ci	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
86162306a36Sopenharmony_ci	struct dm_thin_new_mapping *m = h->overwrite_mapping;
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	bio->bi_end_io = m->saved_bi_end_io;
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci	m->status = bio->bi_status;
86662306a36Sopenharmony_ci	complete_mapping_preparation(m);
86762306a36Sopenharmony_ci}
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ci/*----------------------------------------------------------------*/
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci/*
87262306a36Sopenharmony_ci * Workqueue.
87362306a36Sopenharmony_ci */
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci/*
87662306a36Sopenharmony_ci * Prepared mapping jobs.
87762306a36Sopenharmony_ci */
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci/*
88062306a36Sopenharmony_ci * This sends the bios in the cell, except the original holder, back
88162306a36Sopenharmony_ci * to the deferred_bios list.
88262306a36Sopenharmony_ci */
88362306a36Sopenharmony_cistatic void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell)
88462306a36Sopenharmony_ci{
88562306a36Sopenharmony_ci	struct pool *pool = tc->pool;
88662306a36Sopenharmony_ci	unsigned long flags;
88762306a36Sopenharmony_ci	struct bio_list bios;
88862306a36Sopenharmony_ci
88962306a36Sopenharmony_ci	bio_list_init(&bios);
89062306a36Sopenharmony_ci	cell_release_no_holder(pool, cell, &bios);
89162306a36Sopenharmony_ci
89262306a36Sopenharmony_ci	if (!bio_list_empty(&bios)) {
89362306a36Sopenharmony_ci		spin_lock_irqsave(&tc->lock, flags);
89462306a36Sopenharmony_ci		bio_list_merge(&tc->deferred_bio_list, &bios);
89562306a36Sopenharmony_ci		spin_unlock_irqrestore(&tc->lock, flags);
89662306a36Sopenharmony_ci		wake_worker(pool);
89762306a36Sopenharmony_ci	}
89862306a36Sopenharmony_ci}
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_cistatic void thin_defer_bio(struct thin_c *tc, struct bio *bio);
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_cistruct remap_info {
90362306a36Sopenharmony_ci	struct thin_c *tc;
90462306a36Sopenharmony_ci	struct bio_list defer_bios;
90562306a36Sopenharmony_ci	struct bio_list issue_bios;
90662306a36Sopenharmony_ci};
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_cistatic void __inc_remap_and_issue_cell(void *context,
90962306a36Sopenharmony_ci				       struct dm_bio_prison_cell *cell)
91062306a36Sopenharmony_ci{
91162306a36Sopenharmony_ci	struct remap_info *info = context;
91262306a36Sopenharmony_ci	struct bio *bio;
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci	while ((bio = bio_list_pop(&cell->bios))) {
91562306a36Sopenharmony_ci		if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD)
91662306a36Sopenharmony_ci			bio_list_add(&info->defer_bios, bio);
91762306a36Sopenharmony_ci		else {
91862306a36Sopenharmony_ci			inc_all_io_entry(info->tc->pool, bio);
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci			/*
92162306a36Sopenharmony_ci			 * We can't issue the bios with the bio prison lock
92262306a36Sopenharmony_ci			 * held, so we add them to a list to issue on
92362306a36Sopenharmony_ci			 * return from this function.
92462306a36Sopenharmony_ci			 */
92562306a36Sopenharmony_ci			bio_list_add(&info->issue_bios, bio);
92662306a36Sopenharmony_ci		}
92762306a36Sopenharmony_ci	}
92862306a36Sopenharmony_ci}
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_cistatic void inc_remap_and_issue_cell(struct thin_c *tc,
93162306a36Sopenharmony_ci				     struct dm_bio_prison_cell *cell,
93262306a36Sopenharmony_ci				     dm_block_t block)
93362306a36Sopenharmony_ci{
93462306a36Sopenharmony_ci	struct bio *bio;
93562306a36Sopenharmony_ci	struct remap_info info;
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci	info.tc = tc;
93862306a36Sopenharmony_ci	bio_list_init(&info.defer_bios);
93962306a36Sopenharmony_ci	bio_list_init(&info.issue_bios);
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_ci	/*
94262306a36Sopenharmony_ci	 * We have to be careful to inc any bios we're about to issue
94362306a36Sopenharmony_ci	 * before the cell is released, and avoid a race with new bios
94462306a36Sopenharmony_ci	 * being added to the cell.
94562306a36Sopenharmony_ci	 */
94662306a36Sopenharmony_ci	cell_visit_release(tc->pool, __inc_remap_and_issue_cell,
94762306a36Sopenharmony_ci			   &info, cell);
94862306a36Sopenharmony_ci
94962306a36Sopenharmony_ci	while ((bio = bio_list_pop(&info.defer_bios)))
95062306a36Sopenharmony_ci		thin_defer_bio(tc, bio);
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	while ((bio = bio_list_pop(&info.issue_bios)))
95362306a36Sopenharmony_ci		remap_and_issue(info.tc, bio, block);
95462306a36Sopenharmony_ci}
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_cistatic void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
95762306a36Sopenharmony_ci{
95862306a36Sopenharmony_ci	cell_error(m->tc->pool, m->cell);
95962306a36Sopenharmony_ci	list_del(&m->list);
96062306a36Sopenharmony_ci	mempool_free(m, &m->tc->pool->mapping_pool);
96162306a36Sopenharmony_ci}
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_cistatic void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
96462306a36Sopenharmony_ci{
96562306a36Sopenharmony_ci	struct pool *pool = tc->pool;
96662306a36Sopenharmony_ci
96762306a36Sopenharmony_ci	/*
96862306a36Sopenharmony_ci	 * If the bio has the REQ_FUA flag set we must commit the metadata
96962306a36Sopenharmony_ci	 * before signaling its completion.
97062306a36Sopenharmony_ci	 */
97162306a36Sopenharmony_ci	if (!bio_triggers_commit(tc, bio)) {
97262306a36Sopenharmony_ci		bio_endio(bio);
97362306a36Sopenharmony_ci		return;
97462306a36Sopenharmony_ci	}
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_ci	/*
97762306a36Sopenharmony_ci	 * Complete bio with an error if earlier I/O caused changes to the
97862306a36Sopenharmony_ci	 * metadata that can't be committed, e.g, due to I/O errors on the
97962306a36Sopenharmony_ci	 * metadata device.
98062306a36Sopenharmony_ci	 */
98162306a36Sopenharmony_ci	if (dm_thin_aborted_changes(tc->td)) {
98262306a36Sopenharmony_ci		bio_io_error(bio);
98362306a36Sopenharmony_ci		return;
98462306a36Sopenharmony_ci	}
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_ci	/*
98762306a36Sopenharmony_ci	 * Batch together any bios that trigger commits and then issue a
98862306a36Sopenharmony_ci	 * single commit for them in process_deferred_bios().
98962306a36Sopenharmony_ci	 */
99062306a36Sopenharmony_ci	spin_lock_irq(&pool->lock);
99162306a36Sopenharmony_ci	bio_list_add(&pool->deferred_flush_completions, bio);
99262306a36Sopenharmony_ci	spin_unlock_irq(&pool->lock);
99362306a36Sopenharmony_ci}
99462306a36Sopenharmony_ci
99562306a36Sopenharmony_cistatic void process_prepared_mapping(struct dm_thin_new_mapping *m)
99662306a36Sopenharmony_ci{
99762306a36Sopenharmony_ci	struct thin_c *tc = m->tc;
99862306a36Sopenharmony_ci	struct pool *pool = tc->pool;
99962306a36Sopenharmony_ci	struct bio *bio = m->bio;
100062306a36Sopenharmony_ci	int r;
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_ci	if (m->status) {
100362306a36Sopenharmony_ci		cell_error(pool, m->cell);
100462306a36Sopenharmony_ci		goto out;
100562306a36Sopenharmony_ci	}
100662306a36Sopenharmony_ci
100762306a36Sopenharmony_ci	/*
100862306a36Sopenharmony_ci	 * Commit the prepared block into the mapping btree.
100962306a36Sopenharmony_ci	 * Any I/O for this block arriving after this point will get
101062306a36Sopenharmony_ci	 * remapped to it directly.
101162306a36Sopenharmony_ci	 */
101262306a36Sopenharmony_ci	r = dm_thin_insert_block(tc->td, m->virt_begin, m->data_block);
101362306a36Sopenharmony_ci	if (r) {
101462306a36Sopenharmony_ci		metadata_operation_failed(pool, "dm_thin_insert_block", r);
101562306a36Sopenharmony_ci		cell_error(pool, m->cell);
101662306a36Sopenharmony_ci		goto out;
101762306a36Sopenharmony_ci	}
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci	/*
102062306a36Sopenharmony_ci	 * Release any bios held while the block was being provisioned.
102162306a36Sopenharmony_ci	 * If we are processing a write bio that completely covers the block,
102262306a36Sopenharmony_ci	 * we already processed it so can ignore it now when processing
102362306a36Sopenharmony_ci	 * the bios in the cell.
102462306a36Sopenharmony_ci	 */
102562306a36Sopenharmony_ci	if (bio) {
102662306a36Sopenharmony_ci		inc_remap_and_issue_cell(tc, m->cell, m->data_block);
102762306a36Sopenharmony_ci		complete_overwrite_bio(tc, bio);
102862306a36Sopenharmony_ci	} else {
102962306a36Sopenharmony_ci		inc_all_io_entry(tc->pool, m->cell->holder);
103062306a36Sopenharmony_ci		remap_and_issue(tc, m->cell->holder, m->data_block);
103162306a36Sopenharmony_ci		inc_remap_and_issue_cell(tc, m->cell, m->data_block);
103262306a36Sopenharmony_ci	}
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ciout:
103562306a36Sopenharmony_ci	list_del(&m->list);
103662306a36Sopenharmony_ci	mempool_free(m, &pool->mapping_pool);
103762306a36Sopenharmony_ci}
103862306a36Sopenharmony_ci
103962306a36Sopenharmony_ci/*----------------------------------------------------------------*/
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_cistatic void free_discard_mapping(struct dm_thin_new_mapping *m)
104262306a36Sopenharmony_ci{
104362306a36Sopenharmony_ci	struct thin_c *tc = m->tc;
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci	if (m->cell)
104662306a36Sopenharmony_ci		cell_defer_no_holder(tc, m->cell);
104762306a36Sopenharmony_ci	mempool_free(m, &tc->pool->mapping_pool);
104862306a36Sopenharmony_ci}
104962306a36Sopenharmony_ci
105062306a36Sopenharmony_cistatic void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
105162306a36Sopenharmony_ci{
105262306a36Sopenharmony_ci	bio_io_error(m->bio);
105362306a36Sopenharmony_ci	free_discard_mapping(m);
105462306a36Sopenharmony_ci}
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_cistatic void process_prepared_discard_success(struct dm_thin_new_mapping *m)
105762306a36Sopenharmony_ci{
105862306a36Sopenharmony_ci	bio_endio(m->bio);
105962306a36Sopenharmony_ci	free_discard_mapping(m);
106062306a36Sopenharmony_ci}
106162306a36Sopenharmony_ci
106262306a36Sopenharmony_cistatic void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m)
106362306a36Sopenharmony_ci{
106462306a36Sopenharmony_ci	int r;
106562306a36Sopenharmony_ci	struct thin_c *tc = m->tc;
106662306a36Sopenharmony_ci
106762306a36Sopenharmony_ci	r = dm_thin_remove_range(tc->td, m->cell->key.block_begin, m->cell->key.block_end);
106862306a36Sopenharmony_ci	if (r) {
106962306a36Sopenharmony_ci		metadata_operation_failed(tc->pool, "dm_thin_remove_range", r);
107062306a36Sopenharmony_ci		bio_io_error(m->bio);
107162306a36Sopenharmony_ci	} else
107262306a36Sopenharmony_ci		bio_endio(m->bio);
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci	cell_defer_no_holder(tc, m->cell);
107562306a36Sopenharmony_ci	mempool_free(m, &tc->pool->mapping_pool);
107662306a36Sopenharmony_ci}
107762306a36Sopenharmony_ci
107862306a36Sopenharmony_ci/*----------------------------------------------------------------*/
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_cistatic void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m,
108162306a36Sopenharmony_ci						   struct bio *discard_parent)
108262306a36Sopenharmony_ci{
108362306a36Sopenharmony_ci	/*
108462306a36Sopenharmony_ci	 * We've already unmapped this range of blocks, but before we
108562306a36Sopenharmony_ci	 * passdown we have to check that these blocks are now unused.
108662306a36Sopenharmony_ci	 */
108762306a36Sopenharmony_ci	int r = 0;
108862306a36Sopenharmony_ci	bool shared = true;
108962306a36Sopenharmony_ci	struct thin_c *tc = m->tc;
109062306a36Sopenharmony_ci	struct pool *pool = tc->pool;
109162306a36Sopenharmony_ci	dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin;
109262306a36Sopenharmony_ci	struct discard_op op;
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci	begin_discard(&op, tc, discard_parent);
109562306a36Sopenharmony_ci	while (b != end) {
109662306a36Sopenharmony_ci		/* find start of unmapped run */
109762306a36Sopenharmony_ci		for (; b < end; b++) {
109862306a36Sopenharmony_ci			r = dm_pool_block_is_shared(pool->pmd, b, &shared);
109962306a36Sopenharmony_ci			if (r)
110062306a36Sopenharmony_ci				goto out;
110162306a36Sopenharmony_ci
110262306a36Sopenharmony_ci			if (!shared)
110362306a36Sopenharmony_ci				break;
110462306a36Sopenharmony_ci		}
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci		if (b == end)
110762306a36Sopenharmony_ci			break;
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci		/* find end of run */
111062306a36Sopenharmony_ci		for (e = b + 1; e != end; e++) {
111162306a36Sopenharmony_ci			r = dm_pool_block_is_shared(pool->pmd, e, &shared);
111262306a36Sopenharmony_ci			if (r)
111362306a36Sopenharmony_ci				goto out;
111462306a36Sopenharmony_ci
111562306a36Sopenharmony_ci			if (shared)
111662306a36Sopenharmony_ci				break;
111762306a36Sopenharmony_ci		}
111862306a36Sopenharmony_ci
111962306a36Sopenharmony_ci		r = issue_discard(&op, b, e);
112062306a36Sopenharmony_ci		if (r)
112162306a36Sopenharmony_ci			goto out;
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci		b = e;
112462306a36Sopenharmony_ci	}
112562306a36Sopenharmony_ciout:
112662306a36Sopenharmony_ci	end_discard(&op, r);
112762306a36Sopenharmony_ci}
112862306a36Sopenharmony_ci
112962306a36Sopenharmony_cistatic void queue_passdown_pt2(struct dm_thin_new_mapping *m)
113062306a36Sopenharmony_ci{
113162306a36Sopenharmony_ci	unsigned long flags;
113262306a36Sopenharmony_ci	struct pool *pool = m->tc->pool;
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci	spin_lock_irqsave(&pool->lock, flags);
113562306a36Sopenharmony_ci	list_add_tail(&m->list, &pool->prepared_discards_pt2);
113662306a36Sopenharmony_ci	spin_unlock_irqrestore(&pool->lock, flags);
113762306a36Sopenharmony_ci	wake_worker(pool);
113862306a36Sopenharmony_ci}
113962306a36Sopenharmony_ci
114062306a36Sopenharmony_cistatic void passdown_endio(struct bio *bio)
114162306a36Sopenharmony_ci{
114262306a36Sopenharmony_ci	/*
114362306a36Sopenharmony_ci	 * It doesn't matter if the passdown discard failed, we still want
114462306a36Sopenharmony_ci	 * to unmap (we ignore err).
114562306a36Sopenharmony_ci	 */
114662306a36Sopenharmony_ci	queue_passdown_pt2(bio->bi_private);
114762306a36Sopenharmony_ci	bio_put(bio);
114862306a36Sopenharmony_ci}
114962306a36Sopenharmony_ci
115062306a36Sopenharmony_cistatic void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
115162306a36Sopenharmony_ci{
115262306a36Sopenharmony_ci	int r;
115362306a36Sopenharmony_ci	struct thin_c *tc = m->tc;
115462306a36Sopenharmony_ci	struct pool *pool = tc->pool;
115562306a36Sopenharmony_ci	struct bio *discard_parent;
115662306a36Sopenharmony_ci	dm_block_t data_end = m->data_block + (m->virt_end - m->virt_begin);
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ci	/*
115962306a36Sopenharmony_ci	 * Only this thread allocates blocks, so we can be sure that the
116062306a36Sopenharmony_ci	 * newly unmapped blocks will not be allocated before the end of
116162306a36Sopenharmony_ci	 * the function.
116262306a36Sopenharmony_ci	 */
116362306a36Sopenharmony_ci	r = dm_thin_remove_range(tc->td, m->virt_begin, m->virt_end);
116462306a36Sopenharmony_ci	if (r) {
116562306a36Sopenharmony_ci		metadata_operation_failed(pool, "dm_thin_remove_range", r);
116662306a36Sopenharmony_ci		bio_io_error(m->bio);
116762306a36Sopenharmony_ci		cell_defer_no_holder(tc, m->cell);
116862306a36Sopenharmony_ci		mempool_free(m, &pool->mapping_pool);
116962306a36Sopenharmony_ci		return;
117062306a36Sopenharmony_ci	}
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci	/*
117362306a36Sopenharmony_ci	 * Increment the unmapped blocks.  This prevents a race between the
117462306a36Sopenharmony_ci	 * passdown io and reallocation of freed blocks.
117562306a36Sopenharmony_ci	 */
117662306a36Sopenharmony_ci	r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
117762306a36Sopenharmony_ci	if (r) {
117862306a36Sopenharmony_ci		metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
117962306a36Sopenharmony_ci		bio_io_error(m->bio);
118062306a36Sopenharmony_ci		cell_defer_no_holder(tc, m->cell);
118162306a36Sopenharmony_ci		mempool_free(m, &pool->mapping_pool);
118262306a36Sopenharmony_ci		return;
118362306a36Sopenharmony_ci	}
118462306a36Sopenharmony_ci
118562306a36Sopenharmony_ci	discard_parent = bio_alloc(NULL, 1, 0, GFP_NOIO);
118662306a36Sopenharmony_ci	discard_parent->bi_end_io = passdown_endio;
118762306a36Sopenharmony_ci	discard_parent->bi_private = m;
118862306a36Sopenharmony_ci	if (m->maybe_shared)
118962306a36Sopenharmony_ci		passdown_double_checking_shared_status(m, discard_parent);
119062306a36Sopenharmony_ci	else {
119162306a36Sopenharmony_ci		struct discard_op op;
119262306a36Sopenharmony_ci
119362306a36Sopenharmony_ci		begin_discard(&op, tc, discard_parent);
119462306a36Sopenharmony_ci		r = issue_discard(&op, m->data_block, data_end);
119562306a36Sopenharmony_ci		end_discard(&op, r);
119662306a36Sopenharmony_ci	}
119762306a36Sopenharmony_ci}
119862306a36Sopenharmony_ci
119962306a36Sopenharmony_cistatic void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
120062306a36Sopenharmony_ci{
120162306a36Sopenharmony_ci	int r;
120262306a36Sopenharmony_ci	struct thin_c *tc = m->tc;
120362306a36Sopenharmony_ci	struct pool *pool = tc->pool;
120462306a36Sopenharmony_ci
120562306a36Sopenharmony_ci	/*
120662306a36Sopenharmony_ci	 * The passdown has completed, so now we can decrement all those
120762306a36Sopenharmony_ci	 * unmapped blocks.
120862306a36Sopenharmony_ci	 */
120962306a36Sopenharmony_ci	r = dm_pool_dec_data_range(pool->pmd, m->data_block,
121062306a36Sopenharmony_ci				   m->data_block + (m->virt_end - m->virt_begin));
121162306a36Sopenharmony_ci	if (r) {
121262306a36Sopenharmony_ci		metadata_operation_failed(pool, "dm_pool_dec_data_range", r);
121362306a36Sopenharmony_ci		bio_io_error(m->bio);
121462306a36Sopenharmony_ci	} else
121562306a36Sopenharmony_ci		bio_endio(m->bio);
121662306a36Sopenharmony_ci
121762306a36Sopenharmony_ci	cell_defer_no_holder(tc, m->cell);
121862306a36Sopenharmony_ci	mempool_free(m, &pool->mapping_pool);
121962306a36Sopenharmony_ci}
122062306a36Sopenharmony_ci
122162306a36Sopenharmony_cistatic void process_prepared(struct pool *pool, struct list_head *head,
122262306a36Sopenharmony_ci			     process_mapping_fn *fn)
122362306a36Sopenharmony_ci{
122462306a36Sopenharmony_ci	struct list_head maps;
122562306a36Sopenharmony_ci	struct dm_thin_new_mapping *m, *tmp;
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_ci	INIT_LIST_HEAD(&maps);
122862306a36Sopenharmony_ci	spin_lock_irq(&pool->lock);
122962306a36Sopenharmony_ci	list_splice_init(head, &maps);
123062306a36Sopenharmony_ci	spin_unlock_irq(&pool->lock);
123162306a36Sopenharmony_ci
123262306a36Sopenharmony_ci	list_for_each_entry_safe(m, tmp, &maps, list)
123362306a36Sopenharmony_ci		(*fn)(m);
123462306a36Sopenharmony_ci}
123562306a36Sopenharmony_ci
123662306a36Sopenharmony_ci/*
123762306a36Sopenharmony_ci * Deferred bio jobs.
123862306a36Sopenharmony_ci */
123962306a36Sopenharmony_cistatic int io_overlaps_block(struct pool *pool, struct bio *bio)
124062306a36Sopenharmony_ci{
124162306a36Sopenharmony_ci	return bio->bi_iter.bi_size ==
124262306a36Sopenharmony_ci		(pool->sectors_per_block << SECTOR_SHIFT);
124362306a36Sopenharmony_ci}
124462306a36Sopenharmony_ci
124562306a36Sopenharmony_cistatic int io_overwrites_block(struct pool *pool, struct bio *bio)
124662306a36Sopenharmony_ci{
124762306a36Sopenharmony_ci	return (bio_data_dir(bio) == WRITE) &&
124862306a36Sopenharmony_ci		io_overlaps_block(pool, bio);
124962306a36Sopenharmony_ci}
125062306a36Sopenharmony_ci
125162306a36Sopenharmony_cistatic void save_and_set_endio(struct bio *bio, bio_end_io_t **save,
125262306a36Sopenharmony_ci			       bio_end_io_t *fn)
125362306a36Sopenharmony_ci{
125462306a36Sopenharmony_ci	*save = bio->bi_end_io;
125562306a36Sopenharmony_ci	bio->bi_end_io = fn;
125662306a36Sopenharmony_ci}
125762306a36Sopenharmony_ci
125862306a36Sopenharmony_cistatic int ensure_next_mapping(struct pool *pool)
125962306a36Sopenharmony_ci{
126062306a36Sopenharmony_ci	if (pool->next_mapping)
126162306a36Sopenharmony_ci		return 0;
126262306a36Sopenharmony_ci
126362306a36Sopenharmony_ci	pool->next_mapping = mempool_alloc(&pool->mapping_pool, GFP_ATOMIC);
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ci	return pool->next_mapping ? 0 : -ENOMEM;
126662306a36Sopenharmony_ci}
126762306a36Sopenharmony_ci
126862306a36Sopenharmony_cistatic struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
126962306a36Sopenharmony_ci{
127062306a36Sopenharmony_ci	struct dm_thin_new_mapping *m = pool->next_mapping;
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_ci	BUG_ON(!pool->next_mapping);
127362306a36Sopenharmony_ci
127462306a36Sopenharmony_ci	memset(m, 0, sizeof(struct dm_thin_new_mapping));
127562306a36Sopenharmony_ci	INIT_LIST_HEAD(&m->list);
127662306a36Sopenharmony_ci	m->bio = NULL;
127762306a36Sopenharmony_ci
127862306a36Sopenharmony_ci	pool->next_mapping = NULL;
127962306a36Sopenharmony_ci
128062306a36Sopenharmony_ci	return m;
128162306a36Sopenharmony_ci}
128262306a36Sopenharmony_ci
128362306a36Sopenharmony_cistatic void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
128462306a36Sopenharmony_ci		    sector_t begin, sector_t end)
128562306a36Sopenharmony_ci{
128662306a36Sopenharmony_ci	struct dm_io_region to;
128762306a36Sopenharmony_ci
128862306a36Sopenharmony_ci	to.bdev = tc->pool_dev->bdev;
128962306a36Sopenharmony_ci	to.sector = begin;
129062306a36Sopenharmony_ci	to.count = end - begin;
129162306a36Sopenharmony_ci
129262306a36Sopenharmony_ci	dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
129362306a36Sopenharmony_ci}
129462306a36Sopenharmony_ci
129562306a36Sopenharmony_cistatic void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio,
129662306a36Sopenharmony_ci				      dm_block_t data_begin,
129762306a36Sopenharmony_ci				      struct dm_thin_new_mapping *m)
129862306a36Sopenharmony_ci{
129962306a36Sopenharmony_ci	struct pool *pool = tc->pool;
130062306a36Sopenharmony_ci	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	h->overwrite_mapping = m;
130362306a36Sopenharmony_ci	m->bio = bio;
130462306a36Sopenharmony_ci	save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
130562306a36Sopenharmony_ci	inc_all_io_entry(pool, bio);
130662306a36Sopenharmony_ci	remap_and_issue(tc, bio, data_begin);
130762306a36Sopenharmony_ci}
130862306a36Sopenharmony_ci
130962306a36Sopenharmony_ci/*
131062306a36Sopenharmony_ci * A partial copy also needs to zero the uncopied region.
131162306a36Sopenharmony_ci */
131262306a36Sopenharmony_cistatic void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
131362306a36Sopenharmony_ci			  struct dm_dev *origin, dm_block_t data_origin,
131462306a36Sopenharmony_ci			  dm_block_t data_dest,
131562306a36Sopenharmony_ci			  struct dm_bio_prison_cell *cell, struct bio *bio,
131662306a36Sopenharmony_ci			  sector_t len)
131762306a36Sopenharmony_ci{
131862306a36Sopenharmony_ci	struct pool *pool = tc->pool;
131962306a36Sopenharmony_ci	struct dm_thin_new_mapping *m = get_next_mapping(pool);
132062306a36Sopenharmony_ci
132162306a36Sopenharmony_ci	m->tc = tc;
132262306a36Sopenharmony_ci	m->virt_begin = virt_block;
132362306a36Sopenharmony_ci	m->virt_end = virt_block + 1u;
132462306a36Sopenharmony_ci	m->data_block = data_dest;
132562306a36Sopenharmony_ci	m->cell = cell;
132662306a36Sopenharmony_ci
132762306a36Sopenharmony_ci	/*
132862306a36Sopenharmony_ci	 * quiesce action + copy action + an extra reference held for the
132962306a36Sopenharmony_ci	 * duration of this function (we may need to inc later for a
133062306a36Sopenharmony_ci	 * partial zero).
133162306a36Sopenharmony_ci	 */
133262306a36Sopenharmony_ci	atomic_set(&m->prepare_actions, 3);
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ci	if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list))
133562306a36Sopenharmony_ci		complete_mapping_preparation(m); /* already quiesced */
133662306a36Sopenharmony_ci
133762306a36Sopenharmony_ci	/*
133862306a36Sopenharmony_ci	 * IO to pool_dev remaps to the pool target's data_dev.
133962306a36Sopenharmony_ci	 *
134062306a36Sopenharmony_ci	 * If the whole block of data is being overwritten, we can issue the
134162306a36Sopenharmony_ci	 * bio immediately. Otherwise we use kcopyd to clone the data first.
134262306a36Sopenharmony_ci	 */
134362306a36Sopenharmony_ci	if (io_overwrites_block(pool, bio))
134462306a36Sopenharmony_ci		remap_and_issue_overwrite(tc, bio, data_dest, m);
134562306a36Sopenharmony_ci	else {
134662306a36Sopenharmony_ci		struct dm_io_region from, to;
134762306a36Sopenharmony_ci
134862306a36Sopenharmony_ci		from.bdev = origin->bdev;
134962306a36Sopenharmony_ci		from.sector = data_origin * pool->sectors_per_block;
135062306a36Sopenharmony_ci		from.count = len;
135162306a36Sopenharmony_ci
135262306a36Sopenharmony_ci		to.bdev = tc->pool_dev->bdev;
135362306a36Sopenharmony_ci		to.sector = data_dest * pool->sectors_per_block;
135462306a36Sopenharmony_ci		to.count = len;
135562306a36Sopenharmony_ci
135662306a36Sopenharmony_ci		dm_kcopyd_copy(pool->copier, &from, 1, &to,
135762306a36Sopenharmony_ci			       0, copy_complete, m);
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci		/*
136062306a36Sopenharmony_ci		 * Do we need to zero a tail region?
136162306a36Sopenharmony_ci		 */
136262306a36Sopenharmony_ci		if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) {
136362306a36Sopenharmony_ci			atomic_inc(&m->prepare_actions);
136462306a36Sopenharmony_ci			ll_zero(tc, m,
136562306a36Sopenharmony_ci				data_dest * pool->sectors_per_block + len,
136662306a36Sopenharmony_ci				(data_dest + 1) * pool->sectors_per_block);
136762306a36Sopenharmony_ci		}
136862306a36Sopenharmony_ci	}
136962306a36Sopenharmony_ci
137062306a36Sopenharmony_ci	complete_mapping_preparation(m); /* drop our ref */
137162306a36Sopenharmony_ci}
137262306a36Sopenharmony_ci
137362306a36Sopenharmony_cistatic void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
137462306a36Sopenharmony_ci				   dm_block_t data_origin, dm_block_t data_dest,
137562306a36Sopenharmony_ci				   struct dm_bio_prison_cell *cell, struct bio *bio)
137662306a36Sopenharmony_ci{
137762306a36Sopenharmony_ci	schedule_copy(tc, virt_block, tc->pool_dev,
137862306a36Sopenharmony_ci		      data_origin, data_dest, cell, bio,
137962306a36Sopenharmony_ci		      tc->pool->sectors_per_block);
138062306a36Sopenharmony_ci}
138162306a36Sopenharmony_ci
138262306a36Sopenharmony_cistatic void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
138362306a36Sopenharmony_ci			  dm_block_t data_block, struct dm_bio_prison_cell *cell,
138462306a36Sopenharmony_ci			  struct bio *bio)
138562306a36Sopenharmony_ci{
138662306a36Sopenharmony_ci	struct pool *pool = tc->pool;
138762306a36Sopenharmony_ci	struct dm_thin_new_mapping *m = get_next_mapping(pool);
138862306a36Sopenharmony_ci
138962306a36Sopenharmony_ci	atomic_set(&m->prepare_actions, 1); /* no need to quiesce */
139062306a36Sopenharmony_ci	m->tc = tc;
139162306a36Sopenharmony_ci	m->virt_begin = virt_block;
139262306a36Sopenharmony_ci	m->virt_end = virt_block + 1u;
139362306a36Sopenharmony_ci	m->data_block = data_block;
139462306a36Sopenharmony_ci	m->cell = cell;
139562306a36Sopenharmony_ci
139662306a36Sopenharmony_ci	/*
139762306a36Sopenharmony_ci	 * If the whole block of data is being overwritten or we are not
139862306a36Sopenharmony_ci	 * zeroing pre-existing data, we can issue the bio immediately.
139962306a36Sopenharmony_ci	 * Otherwise we use kcopyd to zero the data first.
140062306a36Sopenharmony_ci	 */
140162306a36Sopenharmony_ci	if (pool->pf.zero_new_blocks) {
140262306a36Sopenharmony_ci		if (io_overwrites_block(pool, bio))
140362306a36Sopenharmony_ci			remap_and_issue_overwrite(tc, bio, data_block, m);
140462306a36Sopenharmony_ci		else
140562306a36Sopenharmony_ci			ll_zero(tc, m, data_block * pool->sectors_per_block,
140662306a36Sopenharmony_ci				(data_block + 1) * pool->sectors_per_block);
140762306a36Sopenharmony_ci	} else
140862306a36Sopenharmony_ci		process_prepared_mapping(m);
140962306a36Sopenharmony_ci}
141062306a36Sopenharmony_ci
141162306a36Sopenharmony_cistatic void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
141262306a36Sopenharmony_ci				   dm_block_t data_dest,
141362306a36Sopenharmony_ci				   struct dm_bio_prison_cell *cell, struct bio *bio)
141462306a36Sopenharmony_ci{
141562306a36Sopenharmony_ci	struct pool *pool = tc->pool;
141662306a36Sopenharmony_ci	sector_t virt_block_begin = virt_block * pool->sectors_per_block;
141762306a36Sopenharmony_ci	sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
141862306a36Sopenharmony_ci
141962306a36Sopenharmony_ci	if (virt_block_end <= tc->origin_size)
142062306a36Sopenharmony_ci		schedule_copy(tc, virt_block, tc->origin_dev,
142162306a36Sopenharmony_ci			      virt_block, data_dest, cell, bio,
142262306a36Sopenharmony_ci			      pool->sectors_per_block);
142362306a36Sopenharmony_ci
142462306a36Sopenharmony_ci	else if (virt_block_begin < tc->origin_size)
142562306a36Sopenharmony_ci		schedule_copy(tc, virt_block, tc->origin_dev,
142662306a36Sopenharmony_ci			      virt_block, data_dest, cell, bio,
142762306a36Sopenharmony_ci			      tc->origin_size - virt_block_begin);
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_ci	else
143062306a36Sopenharmony_ci		schedule_zero(tc, virt_block, data_dest, cell, bio);
143162306a36Sopenharmony_ci}
143262306a36Sopenharmony_ci
143362306a36Sopenharmony_cistatic void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
143462306a36Sopenharmony_ci
143562306a36Sopenharmony_cistatic void requeue_bios(struct pool *pool);
143662306a36Sopenharmony_ci
143762306a36Sopenharmony_cistatic bool is_read_only_pool_mode(enum pool_mode mode)
143862306a36Sopenharmony_ci{
143962306a36Sopenharmony_ci	return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY);
144062306a36Sopenharmony_ci}
144162306a36Sopenharmony_ci
144262306a36Sopenharmony_cistatic bool is_read_only(struct pool *pool)
144362306a36Sopenharmony_ci{
144462306a36Sopenharmony_ci	return is_read_only_pool_mode(get_pool_mode(pool));
144562306a36Sopenharmony_ci}
144662306a36Sopenharmony_ci
144762306a36Sopenharmony_cistatic void check_for_metadata_space(struct pool *pool)
144862306a36Sopenharmony_ci{
144962306a36Sopenharmony_ci	int r;
145062306a36Sopenharmony_ci	const char *ooms_reason = NULL;
145162306a36Sopenharmony_ci	dm_block_t nr_free;
145262306a36Sopenharmony_ci
145362306a36Sopenharmony_ci	r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free);
145462306a36Sopenharmony_ci	if (r)
145562306a36Sopenharmony_ci		ooms_reason = "Could not get free metadata blocks";
145662306a36Sopenharmony_ci	else if (!nr_free)
145762306a36Sopenharmony_ci		ooms_reason = "No free metadata blocks";
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ci	if (ooms_reason && !is_read_only(pool)) {
146062306a36Sopenharmony_ci		DMERR("%s", ooms_reason);
146162306a36Sopenharmony_ci		set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE);
146262306a36Sopenharmony_ci	}
146362306a36Sopenharmony_ci}
146462306a36Sopenharmony_ci
146562306a36Sopenharmony_cistatic void check_for_data_space(struct pool *pool)
146662306a36Sopenharmony_ci{
146762306a36Sopenharmony_ci	int r;
146862306a36Sopenharmony_ci	dm_block_t nr_free;
146962306a36Sopenharmony_ci
147062306a36Sopenharmony_ci	if (get_pool_mode(pool) != PM_OUT_OF_DATA_SPACE)
147162306a36Sopenharmony_ci		return;
147262306a36Sopenharmony_ci
147362306a36Sopenharmony_ci	r = dm_pool_get_free_block_count(pool->pmd, &nr_free);
147462306a36Sopenharmony_ci	if (r)
147562306a36Sopenharmony_ci		return;
147662306a36Sopenharmony_ci
147762306a36Sopenharmony_ci	if (nr_free) {
147862306a36Sopenharmony_ci		set_pool_mode(pool, PM_WRITE);
147962306a36Sopenharmony_ci		requeue_bios(pool);
148062306a36Sopenharmony_ci	}
148162306a36Sopenharmony_ci}
148262306a36Sopenharmony_ci
148362306a36Sopenharmony_ci/*
148462306a36Sopenharmony_ci * A non-zero return indicates read_only or fail_io mode.
148562306a36Sopenharmony_ci * Many callers don't care about the return value.
148662306a36Sopenharmony_ci */
148762306a36Sopenharmony_cistatic int commit(struct pool *pool)
148862306a36Sopenharmony_ci{
148962306a36Sopenharmony_ci	int r;
149062306a36Sopenharmony_ci
149162306a36Sopenharmony_ci	if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE)
149262306a36Sopenharmony_ci		return -EINVAL;
149362306a36Sopenharmony_ci
149462306a36Sopenharmony_ci	r = dm_pool_commit_metadata(pool->pmd);
149562306a36Sopenharmony_ci	if (r)
149662306a36Sopenharmony_ci		metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
149762306a36Sopenharmony_ci	else {
149862306a36Sopenharmony_ci		check_for_metadata_space(pool);
149962306a36Sopenharmony_ci		check_for_data_space(pool);
150062306a36Sopenharmony_ci	}
150162306a36Sopenharmony_ci
150262306a36Sopenharmony_ci	return r;
150362306a36Sopenharmony_ci}
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_cistatic void check_low_water_mark(struct pool *pool, dm_block_t free_blocks)
150662306a36Sopenharmony_ci{
150762306a36Sopenharmony_ci	if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
150862306a36Sopenharmony_ci		DMWARN("%s: reached low water mark for data device: sending event.",
150962306a36Sopenharmony_ci		       dm_device_name(pool->pool_md));
151062306a36Sopenharmony_ci		spin_lock_irq(&pool->lock);
151162306a36Sopenharmony_ci		pool->low_water_triggered = true;
151262306a36Sopenharmony_ci		spin_unlock_irq(&pool->lock);
151362306a36Sopenharmony_ci		dm_table_event(pool->ti->table);
151462306a36Sopenharmony_ci	}
151562306a36Sopenharmony_ci}
151662306a36Sopenharmony_ci
151762306a36Sopenharmony_cistatic int alloc_data_block(struct thin_c *tc, dm_block_t *result)
151862306a36Sopenharmony_ci{
151962306a36Sopenharmony_ci	int r;
152062306a36Sopenharmony_ci	dm_block_t free_blocks;
152162306a36Sopenharmony_ci	struct pool *pool = tc->pool;
152262306a36Sopenharmony_ci
152362306a36Sopenharmony_ci	if (WARN_ON(get_pool_mode(pool) != PM_WRITE))
152462306a36Sopenharmony_ci		return -EINVAL;
152562306a36Sopenharmony_ci
152662306a36Sopenharmony_ci	r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
152762306a36Sopenharmony_ci	if (r) {
152862306a36Sopenharmony_ci		metadata_operation_failed(pool, "dm_pool_get_free_block_count", r);
152962306a36Sopenharmony_ci		return r;
153062306a36Sopenharmony_ci	}
153162306a36Sopenharmony_ci
153262306a36Sopenharmony_ci	check_low_water_mark(pool, free_blocks);
153362306a36Sopenharmony_ci
153462306a36Sopenharmony_ci	if (!free_blocks) {
153562306a36Sopenharmony_ci		/*
153662306a36Sopenharmony_ci		 * Try to commit to see if that will free up some
153762306a36Sopenharmony_ci		 * more space.
153862306a36Sopenharmony_ci		 */
153962306a36Sopenharmony_ci		r = commit(pool);
154062306a36Sopenharmony_ci		if (r)
154162306a36Sopenharmony_ci			return r;
154262306a36Sopenharmony_ci
154362306a36Sopenharmony_ci		r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
154462306a36Sopenharmony_ci		if (r) {
154562306a36Sopenharmony_ci			metadata_operation_failed(pool, "dm_pool_get_free_block_count", r);
154662306a36Sopenharmony_ci			return r;
154762306a36Sopenharmony_ci		}
154862306a36Sopenharmony_ci
154962306a36Sopenharmony_ci		if (!free_blocks) {
155062306a36Sopenharmony_ci			set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
155162306a36Sopenharmony_ci			return -ENOSPC;
155262306a36Sopenharmony_ci		}
155362306a36Sopenharmony_ci	}
155462306a36Sopenharmony_ci
155562306a36Sopenharmony_ci	r = dm_pool_alloc_data_block(pool->pmd, result);
155662306a36Sopenharmony_ci	if (r) {
155762306a36Sopenharmony_ci		if (r == -ENOSPC)
155862306a36Sopenharmony_ci			set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
155962306a36Sopenharmony_ci		else
156062306a36Sopenharmony_ci			metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
156162306a36Sopenharmony_ci		return r;
156262306a36Sopenharmony_ci	}
156362306a36Sopenharmony_ci
156462306a36Sopenharmony_ci	r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks);
156562306a36Sopenharmony_ci	if (r) {
156662306a36Sopenharmony_ci		metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r);
156762306a36Sopenharmony_ci		return r;
156862306a36Sopenharmony_ci	}
156962306a36Sopenharmony_ci
157062306a36Sopenharmony_ci	if (!free_blocks) {
157162306a36Sopenharmony_ci		/* Let's commit before we use up the metadata reserve. */
157262306a36Sopenharmony_ci		r = commit(pool);
157362306a36Sopenharmony_ci		if (r)
157462306a36Sopenharmony_ci			return r;
157562306a36Sopenharmony_ci	}
157662306a36Sopenharmony_ci
157762306a36Sopenharmony_ci	return 0;
157862306a36Sopenharmony_ci}
157962306a36Sopenharmony_ci
158062306a36Sopenharmony_ci/*
158162306a36Sopenharmony_ci * If we have run out of space, queue bios until the device is
158262306a36Sopenharmony_ci * resumed, presumably after having been reloaded with more space.
158362306a36Sopenharmony_ci */
158462306a36Sopenharmony_cistatic void retry_on_resume(struct bio *bio)
158562306a36Sopenharmony_ci{
158662306a36Sopenharmony_ci	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
158762306a36Sopenharmony_ci	struct thin_c *tc = h->tc;
158862306a36Sopenharmony_ci
158962306a36Sopenharmony_ci	spin_lock_irq(&tc->lock);
159062306a36Sopenharmony_ci	bio_list_add(&tc->retry_on_resume_list, bio);
159162306a36Sopenharmony_ci	spin_unlock_irq(&tc->lock);
159262306a36Sopenharmony_ci}
159362306a36Sopenharmony_ci
159462306a36Sopenharmony_cistatic blk_status_t should_error_unserviceable_bio(struct pool *pool)
159562306a36Sopenharmony_ci{
159662306a36Sopenharmony_ci	enum pool_mode m = get_pool_mode(pool);
159762306a36Sopenharmony_ci
159862306a36Sopenharmony_ci	switch (m) {
159962306a36Sopenharmony_ci	case PM_WRITE:
160062306a36Sopenharmony_ci		/* Shouldn't get here */
160162306a36Sopenharmony_ci		DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
160262306a36Sopenharmony_ci		return BLK_STS_IOERR;
160362306a36Sopenharmony_ci
160462306a36Sopenharmony_ci	case PM_OUT_OF_DATA_SPACE:
160562306a36Sopenharmony_ci		return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
160662306a36Sopenharmony_ci
160762306a36Sopenharmony_ci	case PM_OUT_OF_METADATA_SPACE:
160862306a36Sopenharmony_ci	case PM_READ_ONLY:
160962306a36Sopenharmony_ci	case PM_FAIL:
161062306a36Sopenharmony_ci		return BLK_STS_IOERR;
161162306a36Sopenharmony_ci	default:
161262306a36Sopenharmony_ci		/* Shouldn't get here */
161362306a36Sopenharmony_ci		DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
161462306a36Sopenharmony_ci		return BLK_STS_IOERR;
161562306a36Sopenharmony_ci	}
161662306a36Sopenharmony_ci}
161762306a36Sopenharmony_ci
161862306a36Sopenharmony_cistatic void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
161962306a36Sopenharmony_ci{
162062306a36Sopenharmony_ci	blk_status_t error = should_error_unserviceable_bio(pool);
162162306a36Sopenharmony_ci
162262306a36Sopenharmony_ci	if (error) {
162362306a36Sopenharmony_ci		bio->bi_status = error;
162462306a36Sopenharmony_ci		bio_endio(bio);
162562306a36Sopenharmony_ci	} else
162662306a36Sopenharmony_ci		retry_on_resume(bio);
162762306a36Sopenharmony_ci}
162862306a36Sopenharmony_ci
162962306a36Sopenharmony_cistatic void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *cell)
163062306a36Sopenharmony_ci{
163162306a36Sopenharmony_ci	struct bio *bio;
163262306a36Sopenharmony_ci	struct bio_list bios;
163362306a36Sopenharmony_ci	blk_status_t error;
163462306a36Sopenharmony_ci
163562306a36Sopenharmony_ci	error = should_error_unserviceable_bio(pool);
163662306a36Sopenharmony_ci	if (error) {
163762306a36Sopenharmony_ci		cell_error_with_code(pool, cell, error);
163862306a36Sopenharmony_ci		return;
163962306a36Sopenharmony_ci	}
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_ci	bio_list_init(&bios);
164262306a36Sopenharmony_ci	cell_release(pool, cell, &bios);
164362306a36Sopenharmony_ci
164462306a36Sopenharmony_ci	while ((bio = bio_list_pop(&bios)))
164562306a36Sopenharmony_ci		retry_on_resume(bio);
164662306a36Sopenharmony_ci}
164762306a36Sopenharmony_ci
164862306a36Sopenharmony_cistatic void process_discard_cell_no_passdown(struct thin_c *tc,
164962306a36Sopenharmony_ci					     struct dm_bio_prison_cell *virt_cell)
165062306a36Sopenharmony_ci{
165162306a36Sopenharmony_ci	struct pool *pool = tc->pool;
165262306a36Sopenharmony_ci	struct dm_thin_new_mapping *m = get_next_mapping(pool);
165362306a36Sopenharmony_ci
165462306a36Sopenharmony_ci	/*
165562306a36Sopenharmony_ci	 * We don't need to lock the data blocks, since there's no
165662306a36Sopenharmony_ci	 * passdown.  We only lock data blocks for allocation and breaking sharing.
165762306a36Sopenharmony_ci	 */
165862306a36Sopenharmony_ci	m->tc = tc;
165962306a36Sopenharmony_ci	m->virt_begin = virt_cell->key.block_begin;
166062306a36Sopenharmony_ci	m->virt_end = virt_cell->key.block_end;
166162306a36Sopenharmony_ci	m->cell = virt_cell;
166262306a36Sopenharmony_ci	m->bio = virt_cell->holder;
166362306a36Sopenharmony_ci
166462306a36Sopenharmony_ci	if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
166562306a36Sopenharmony_ci		pool->process_prepared_discard(m);
166662306a36Sopenharmony_ci}
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_cistatic void break_up_discard_bio(struct thin_c *tc, dm_block_t begin, dm_block_t end,
166962306a36Sopenharmony_ci				 struct bio *bio)
167062306a36Sopenharmony_ci{
167162306a36Sopenharmony_ci	struct pool *pool = tc->pool;
167262306a36Sopenharmony_ci
167362306a36Sopenharmony_ci	int r;
167462306a36Sopenharmony_ci	bool maybe_shared;
167562306a36Sopenharmony_ci	struct dm_cell_key data_key;
167662306a36Sopenharmony_ci	struct dm_bio_prison_cell *data_cell;
167762306a36Sopenharmony_ci	struct dm_thin_new_mapping *m;
167862306a36Sopenharmony_ci	dm_block_t virt_begin, virt_end, data_begin, data_end;
167962306a36Sopenharmony_ci	dm_block_t len, next_boundary;
168062306a36Sopenharmony_ci
168162306a36Sopenharmony_ci	while (begin != end) {
168262306a36Sopenharmony_ci		r = dm_thin_find_mapped_range(tc->td, begin, end, &virt_begin, &virt_end,
168362306a36Sopenharmony_ci					      &data_begin, &maybe_shared);
168462306a36Sopenharmony_ci		if (r) {
168562306a36Sopenharmony_ci			/*
168662306a36Sopenharmony_ci			 * Silently fail, letting any mappings we've
168762306a36Sopenharmony_ci			 * created complete.
168862306a36Sopenharmony_ci			 */
168962306a36Sopenharmony_ci			break;
169062306a36Sopenharmony_ci		}
169162306a36Sopenharmony_ci
169262306a36Sopenharmony_ci		data_end = data_begin + (virt_end - virt_begin);
169362306a36Sopenharmony_ci
169462306a36Sopenharmony_ci		/*
169562306a36Sopenharmony_ci		 * Make sure the data region obeys the bio prison restrictions.
169662306a36Sopenharmony_ci		 */
169762306a36Sopenharmony_ci		while (data_begin < data_end) {
169862306a36Sopenharmony_ci			r = ensure_next_mapping(pool);
169962306a36Sopenharmony_ci			if (r)
170062306a36Sopenharmony_ci				return; /* we did our best */
170162306a36Sopenharmony_ci
170262306a36Sopenharmony_ci			next_boundary = ((data_begin >> BIO_PRISON_MAX_RANGE_SHIFT) + 1)
170362306a36Sopenharmony_ci				<< BIO_PRISON_MAX_RANGE_SHIFT;
170462306a36Sopenharmony_ci			len = min_t(sector_t, data_end - data_begin, next_boundary - data_begin);
170562306a36Sopenharmony_ci
170662306a36Sopenharmony_ci			/* This key is certainly within range given the above splitting */
170762306a36Sopenharmony_ci			(void) build_key(tc->td, PHYSICAL, data_begin, data_begin + len, &data_key);
170862306a36Sopenharmony_ci			if (bio_detain(tc->pool, &data_key, NULL, &data_cell)) {
170962306a36Sopenharmony_ci				/* contention, we'll give up with this range */
171062306a36Sopenharmony_ci				data_begin += len;
171162306a36Sopenharmony_ci				continue;
171262306a36Sopenharmony_ci			}
171362306a36Sopenharmony_ci
171462306a36Sopenharmony_ci			/*
171562306a36Sopenharmony_ci			 * IO may still be going to the destination block.  We must
171662306a36Sopenharmony_ci			 * quiesce before we can do the removal.
171762306a36Sopenharmony_ci			 */
171862306a36Sopenharmony_ci			m = get_next_mapping(pool);
171962306a36Sopenharmony_ci			m->tc = tc;
172062306a36Sopenharmony_ci			m->maybe_shared = maybe_shared;
172162306a36Sopenharmony_ci			m->virt_begin = virt_begin;
172262306a36Sopenharmony_ci			m->virt_end = virt_begin + len;
172362306a36Sopenharmony_ci			m->data_block = data_begin;
172462306a36Sopenharmony_ci			m->cell = data_cell;
172562306a36Sopenharmony_ci			m->bio = bio;
172662306a36Sopenharmony_ci
172762306a36Sopenharmony_ci			/*
172862306a36Sopenharmony_ci			 * The parent bio must not complete before sub discard bios are
172962306a36Sopenharmony_ci			 * chained to it (see end_discard's bio_chain)!
173062306a36Sopenharmony_ci			 *
173162306a36Sopenharmony_ci			 * This per-mapping bi_remaining increment is paired with
173262306a36Sopenharmony_ci			 * the implicit decrement that occurs via bio_endio() in
173362306a36Sopenharmony_ci			 * end_discard().
173462306a36Sopenharmony_ci			 */
173562306a36Sopenharmony_ci			bio_inc_remaining(bio);
173662306a36Sopenharmony_ci			if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
173762306a36Sopenharmony_ci				pool->process_prepared_discard(m);
173862306a36Sopenharmony_ci
173962306a36Sopenharmony_ci			virt_begin += len;
174062306a36Sopenharmony_ci			data_begin += len;
174162306a36Sopenharmony_ci		}
174262306a36Sopenharmony_ci
174362306a36Sopenharmony_ci		begin = virt_end;
174462306a36Sopenharmony_ci	}
174562306a36Sopenharmony_ci}
174662306a36Sopenharmony_ci
174762306a36Sopenharmony_cistatic void process_discard_cell_passdown(struct thin_c *tc, struct dm_bio_prison_cell *virt_cell)
174862306a36Sopenharmony_ci{
174962306a36Sopenharmony_ci	struct bio *bio = virt_cell->holder;
175062306a36Sopenharmony_ci	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
175162306a36Sopenharmony_ci
175262306a36Sopenharmony_ci	/*
175362306a36Sopenharmony_ci	 * The virt_cell will only get freed once the origin bio completes.
175462306a36Sopenharmony_ci	 * This means it will remain locked while all the individual
175562306a36Sopenharmony_ci	 * passdown bios are in flight.
175662306a36Sopenharmony_ci	 */
175762306a36Sopenharmony_ci	h->cell = virt_cell;
175862306a36Sopenharmony_ci	break_up_discard_bio(tc, virt_cell->key.block_begin, virt_cell->key.block_end, bio);
175962306a36Sopenharmony_ci
176062306a36Sopenharmony_ci	/*
176162306a36Sopenharmony_ci	 * We complete the bio now, knowing that the bi_remaining field
176262306a36Sopenharmony_ci	 * will prevent completion until the sub range discards have
176362306a36Sopenharmony_ci	 * completed.
176462306a36Sopenharmony_ci	 */
176562306a36Sopenharmony_ci	bio_endio(bio);
176662306a36Sopenharmony_ci}
176762306a36Sopenharmony_ci
176862306a36Sopenharmony_cistatic void process_discard_bio(struct thin_c *tc, struct bio *bio)
176962306a36Sopenharmony_ci{
177062306a36Sopenharmony_ci	dm_block_t begin, end;
177162306a36Sopenharmony_ci	struct dm_cell_key virt_key;
177262306a36Sopenharmony_ci	struct dm_bio_prison_cell *virt_cell;
177362306a36Sopenharmony_ci
177462306a36Sopenharmony_ci	get_bio_block_range(tc, bio, &begin, &end);
177562306a36Sopenharmony_ci	if (begin == end) {
177662306a36Sopenharmony_ci		/*
177762306a36Sopenharmony_ci		 * The discard covers less than a block.
177862306a36Sopenharmony_ci		 */
177962306a36Sopenharmony_ci		bio_endio(bio);
178062306a36Sopenharmony_ci		return;
178162306a36Sopenharmony_ci	}
178262306a36Sopenharmony_ci
178362306a36Sopenharmony_ci	if (unlikely(!build_key(tc->td, VIRTUAL, begin, end, &virt_key))) {
178462306a36Sopenharmony_ci		DMERR_LIMIT("Discard doesn't respect bio prison limits");
178562306a36Sopenharmony_ci		bio_endio(bio);
178662306a36Sopenharmony_ci		return;
178762306a36Sopenharmony_ci	}
178862306a36Sopenharmony_ci
178962306a36Sopenharmony_ci	if (bio_detain(tc->pool, &virt_key, bio, &virt_cell)) {
179062306a36Sopenharmony_ci		/*
179162306a36Sopenharmony_ci		 * Potential starvation issue: We're relying on the
179262306a36Sopenharmony_ci		 * fs/application being well behaved, and not trying to
179362306a36Sopenharmony_ci		 * send IO to a region at the same time as discarding it.
179462306a36Sopenharmony_ci		 * If they do this persistently then it's possible this
179562306a36Sopenharmony_ci		 * cell will never be granted.
179662306a36Sopenharmony_ci		 */
179762306a36Sopenharmony_ci		return;
179862306a36Sopenharmony_ci	}
179962306a36Sopenharmony_ci
180062306a36Sopenharmony_ci	tc->pool->process_discard_cell(tc, virt_cell);
180162306a36Sopenharmony_ci}
180262306a36Sopenharmony_ci
180362306a36Sopenharmony_cistatic void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
180462306a36Sopenharmony_ci			  struct dm_cell_key *key,
180562306a36Sopenharmony_ci			  struct dm_thin_lookup_result *lookup_result,
180662306a36Sopenharmony_ci			  struct dm_bio_prison_cell *cell)
180762306a36Sopenharmony_ci{
180862306a36Sopenharmony_ci	int r;
180962306a36Sopenharmony_ci	dm_block_t data_block;
181062306a36Sopenharmony_ci	struct pool *pool = tc->pool;
181162306a36Sopenharmony_ci
181262306a36Sopenharmony_ci	r = alloc_data_block(tc, &data_block);
181362306a36Sopenharmony_ci	switch (r) {
181462306a36Sopenharmony_ci	case 0:
181562306a36Sopenharmony_ci		schedule_internal_copy(tc, block, lookup_result->block,
181662306a36Sopenharmony_ci				       data_block, cell, bio);
181762306a36Sopenharmony_ci		break;
181862306a36Sopenharmony_ci
181962306a36Sopenharmony_ci	case -ENOSPC:
182062306a36Sopenharmony_ci		retry_bios_on_resume(pool, cell);
182162306a36Sopenharmony_ci		break;
182262306a36Sopenharmony_ci
182362306a36Sopenharmony_ci	default:
182462306a36Sopenharmony_ci		DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
182562306a36Sopenharmony_ci			    __func__, r);
182662306a36Sopenharmony_ci		cell_error(pool, cell);
182762306a36Sopenharmony_ci		break;
182862306a36Sopenharmony_ci	}
182962306a36Sopenharmony_ci}
183062306a36Sopenharmony_ci
183162306a36Sopenharmony_cistatic void __remap_and_issue_shared_cell(void *context,
183262306a36Sopenharmony_ci					  struct dm_bio_prison_cell *cell)
183362306a36Sopenharmony_ci{
183462306a36Sopenharmony_ci	struct remap_info *info = context;
183562306a36Sopenharmony_ci	struct bio *bio;
183662306a36Sopenharmony_ci
183762306a36Sopenharmony_ci	while ((bio = bio_list_pop(&cell->bios))) {
183862306a36Sopenharmony_ci		if (bio_data_dir(bio) == WRITE || op_is_flush(bio->bi_opf) ||
183962306a36Sopenharmony_ci		    bio_op(bio) == REQ_OP_DISCARD)
184062306a36Sopenharmony_ci			bio_list_add(&info->defer_bios, bio);
184162306a36Sopenharmony_ci		else {
184262306a36Sopenharmony_ci			struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
184362306a36Sopenharmony_ci
184462306a36Sopenharmony_ci			h->shared_read_entry = dm_deferred_entry_inc(info->tc->pool->shared_read_ds);
184562306a36Sopenharmony_ci			inc_all_io_entry(info->tc->pool, bio);
184662306a36Sopenharmony_ci			bio_list_add(&info->issue_bios, bio);
184762306a36Sopenharmony_ci		}
184862306a36Sopenharmony_ci	}
184962306a36Sopenharmony_ci}
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_cistatic void remap_and_issue_shared_cell(struct thin_c *tc,
185262306a36Sopenharmony_ci					struct dm_bio_prison_cell *cell,
185362306a36Sopenharmony_ci					dm_block_t block)
185462306a36Sopenharmony_ci{
185562306a36Sopenharmony_ci	struct bio *bio;
185662306a36Sopenharmony_ci	struct remap_info info;
185762306a36Sopenharmony_ci
185862306a36Sopenharmony_ci	info.tc = tc;
185962306a36Sopenharmony_ci	bio_list_init(&info.defer_bios);
186062306a36Sopenharmony_ci	bio_list_init(&info.issue_bios);
186162306a36Sopenharmony_ci
186262306a36Sopenharmony_ci	cell_visit_release(tc->pool, __remap_and_issue_shared_cell,
186362306a36Sopenharmony_ci			   &info, cell);
186462306a36Sopenharmony_ci
186562306a36Sopenharmony_ci	while ((bio = bio_list_pop(&info.defer_bios)))
186662306a36Sopenharmony_ci		thin_defer_bio(tc, bio);
186762306a36Sopenharmony_ci
186862306a36Sopenharmony_ci	while ((bio = bio_list_pop(&info.issue_bios)))
186962306a36Sopenharmony_ci		remap_and_issue(tc, bio, block);
187062306a36Sopenharmony_ci}
187162306a36Sopenharmony_ci
187262306a36Sopenharmony_cistatic void process_shared_bio(struct thin_c *tc, struct bio *bio,
187362306a36Sopenharmony_ci			       dm_block_t block,
187462306a36Sopenharmony_ci			       struct dm_thin_lookup_result *lookup_result,
187562306a36Sopenharmony_ci			       struct dm_bio_prison_cell *virt_cell)
187662306a36Sopenharmony_ci{
187762306a36Sopenharmony_ci	struct dm_bio_prison_cell *data_cell;
187862306a36Sopenharmony_ci	struct pool *pool = tc->pool;
187962306a36Sopenharmony_ci	struct dm_cell_key key;
188062306a36Sopenharmony_ci
188162306a36Sopenharmony_ci	/*
188262306a36Sopenharmony_ci	 * If cell is already occupied, then sharing is already in the process
188362306a36Sopenharmony_ci	 * of being broken so we have nothing further to do here.
188462306a36Sopenharmony_ci	 */
188562306a36Sopenharmony_ci	build_data_key(tc->td, lookup_result->block, &key);
188662306a36Sopenharmony_ci	if (bio_detain(pool, &key, bio, &data_cell)) {
188762306a36Sopenharmony_ci		cell_defer_no_holder(tc, virt_cell);
188862306a36Sopenharmony_ci		return;
188962306a36Sopenharmony_ci	}
189062306a36Sopenharmony_ci
189162306a36Sopenharmony_ci	if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size) {
189262306a36Sopenharmony_ci		break_sharing(tc, bio, block, &key, lookup_result, data_cell);
189362306a36Sopenharmony_ci		cell_defer_no_holder(tc, virt_cell);
189462306a36Sopenharmony_ci	} else {
189562306a36Sopenharmony_ci		struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
189662306a36Sopenharmony_ci
189762306a36Sopenharmony_ci		h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds);
189862306a36Sopenharmony_ci		inc_all_io_entry(pool, bio);
189962306a36Sopenharmony_ci		remap_and_issue(tc, bio, lookup_result->block);
190062306a36Sopenharmony_ci
190162306a36Sopenharmony_ci		remap_and_issue_shared_cell(tc, data_cell, lookup_result->block);
190262306a36Sopenharmony_ci		remap_and_issue_shared_cell(tc, virt_cell, lookup_result->block);
190362306a36Sopenharmony_ci	}
190462306a36Sopenharmony_ci}
190562306a36Sopenharmony_ci
190662306a36Sopenharmony_cistatic void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block,
190762306a36Sopenharmony_ci			    struct dm_bio_prison_cell *cell)
190862306a36Sopenharmony_ci{
190962306a36Sopenharmony_ci	int r;
191062306a36Sopenharmony_ci	dm_block_t data_block;
191162306a36Sopenharmony_ci	struct pool *pool = tc->pool;
191262306a36Sopenharmony_ci
191362306a36Sopenharmony_ci	/*
191462306a36Sopenharmony_ci	 * Remap empty bios (flushes) immediately, without provisioning.
191562306a36Sopenharmony_ci	 */
191662306a36Sopenharmony_ci	if (!bio->bi_iter.bi_size) {
191762306a36Sopenharmony_ci		inc_all_io_entry(pool, bio);
191862306a36Sopenharmony_ci		cell_defer_no_holder(tc, cell);
191962306a36Sopenharmony_ci
192062306a36Sopenharmony_ci		remap_and_issue(tc, bio, 0);
192162306a36Sopenharmony_ci		return;
192262306a36Sopenharmony_ci	}
192362306a36Sopenharmony_ci
192462306a36Sopenharmony_ci	/*
192562306a36Sopenharmony_ci	 * Fill read bios with zeroes and complete them immediately.
192662306a36Sopenharmony_ci	 */
192762306a36Sopenharmony_ci	if (bio_data_dir(bio) == READ) {
192862306a36Sopenharmony_ci		zero_fill_bio(bio);
192962306a36Sopenharmony_ci		cell_defer_no_holder(tc, cell);
193062306a36Sopenharmony_ci		bio_endio(bio);
193162306a36Sopenharmony_ci		return;
193262306a36Sopenharmony_ci	}
193362306a36Sopenharmony_ci
193462306a36Sopenharmony_ci	r = alloc_data_block(tc, &data_block);
193562306a36Sopenharmony_ci	switch (r) {
193662306a36Sopenharmony_ci	case 0:
193762306a36Sopenharmony_ci		if (tc->origin_dev)
193862306a36Sopenharmony_ci			schedule_external_copy(tc, block, data_block, cell, bio);
193962306a36Sopenharmony_ci		else
194062306a36Sopenharmony_ci			schedule_zero(tc, block, data_block, cell, bio);
194162306a36Sopenharmony_ci		break;
194262306a36Sopenharmony_ci
194362306a36Sopenharmony_ci	case -ENOSPC:
194462306a36Sopenharmony_ci		retry_bios_on_resume(pool, cell);
194562306a36Sopenharmony_ci		break;
194662306a36Sopenharmony_ci
194762306a36Sopenharmony_ci	default:
194862306a36Sopenharmony_ci		DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
194962306a36Sopenharmony_ci			    __func__, r);
195062306a36Sopenharmony_ci		cell_error(pool, cell);
195162306a36Sopenharmony_ci		break;
195262306a36Sopenharmony_ci	}
195362306a36Sopenharmony_ci}
195462306a36Sopenharmony_ci
195562306a36Sopenharmony_cistatic void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
195662306a36Sopenharmony_ci{
195762306a36Sopenharmony_ci	int r;
195862306a36Sopenharmony_ci	struct pool *pool = tc->pool;
195962306a36Sopenharmony_ci	struct bio *bio = cell->holder;
196062306a36Sopenharmony_ci	dm_block_t block = get_bio_block(tc, bio);
196162306a36Sopenharmony_ci	struct dm_thin_lookup_result lookup_result;
196262306a36Sopenharmony_ci
196362306a36Sopenharmony_ci	if (tc->requeue_mode) {
196462306a36Sopenharmony_ci		cell_requeue(pool, cell);
196562306a36Sopenharmony_ci		return;
196662306a36Sopenharmony_ci	}
196762306a36Sopenharmony_ci
196862306a36Sopenharmony_ci	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
196962306a36Sopenharmony_ci	switch (r) {
197062306a36Sopenharmony_ci	case 0:
197162306a36Sopenharmony_ci		if (lookup_result.shared)
197262306a36Sopenharmony_ci			process_shared_bio(tc, bio, block, &lookup_result, cell);
197362306a36Sopenharmony_ci		else {
197462306a36Sopenharmony_ci			inc_all_io_entry(pool, bio);
197562306a36Sopenharmony_ci			remap_and_issue(tc, bio, lookup_result.block);
197662306a36Sopenharmony_ci			inc_remap_and_issue_cell(tc, cell, lookup_result.block);
197762306a36Sopenharmony_ci		}
197862306a36Sopenharmony_ci		break;
197962306a36Sopenharmony_ci
198062306a36Sopenharmony_ci	case -ENODATA:
198162306a36Sopenharmony_ci		if (bio_data_dir(bio) == READ && tc->origin_dev) {
198262306a36Sopenharmony_ci			inc_all_io_entry(pool, bio);
198362306a36Sopenharmony_ci			cell_defer_no_holder(tc, cell);
198462306a36Sopenharmony_ci
198562306a36Sopenharmony_ci			if (bio_end_sector(bio) <= tc->origin_size)
198662306a36Sopenharmony_ci				remap_to_origin_and_issue(tc, bio);
198762306a36Sopenharmony_ci
198862306a36Sopenharmony_ci			else if (bio->bi_iter.bi_sector < tc->origin_size) {
198962306a36Sopenharmony_ci				zero_fill_bio(bio);
199062306a36Sopenharmony_ci				bio->bi_iter.bi_size = (tc->origin_size - bio->bi_iter.bi_sector) << SECTOR_SHIFT;
199162306a36Sopenharmony_ci				remap_to_origin_and_issue(tc, bio);
199262306a36Sopenharmony_ci
199362306a36Sopenharmony_ci			} else {
199462306a36Sopenharmony_ci				zero_fill_bio(bio);
199562306a36Sopenharmony_ci				bio_endio(bio);
199662306a36Sopenharmony_ci			}
199762306a36Sopenharmony_ci		} else
199862306a36Sopenharmony_ci			provision_block(tc, bio, block, cell);
199962306a36Sopenharmony_ci		break;
200062306a36Sopenharmony_ci
200162306a36Sopenharmony_ci	default:
200262306a36Sopenharmony_ci		DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
200362306a36Sopenharmony_ci			    __func__, r);
200462306a36Sopenharmony_ci		cell_defer_no_holder(tc, cell);
200562306a36Sopenharmony_ci		bio_io_error(bio);
200662306a36Sopenharmony_ci		break;
200762306a36Sopenharmony_ci	}
200862306a36Sopenharmony_ci}
200962306a36Sopenharmony_ci
201062306a36Sopenharmony_cistatic void process_bio(struct thin_c *tc, struct bio *bio)
201162306a36Sopenharmony_ci{
201262306a36Sopenharmony_ci	struct pool *pool = tc->pool;
201362306a36Sopenharmony_ci	dm_block_t block = get_bio_block(tc, bio);
201462306a36Sopenharmony_ci	struct dm_bio_prison_cell *cell;
201562306a36Sopenharmony_ci	struct dm_cell_key key;
201662306a36Sopenharmony_ci
201762306a36Sopenharmony_ci	/*
201862306a36Sopenharmony_ci	 * If cell is already occupied, then the block is already
201962306a36Sopenharmony_ci	 * being provisioned so we have nothing further to do here.
202062306a36Sopenharmony_ci	 */
202162306a36Sopenharmony_ci	build_virtual_key(tc->td, block, &key);
202262306a36Sopenharmony_ci	if (bio_detain(pool, &key, bio, &cell))
202362306a36Sopenharmony_ci		return;
202462306a36Sopenharmony_ci
202562306a36Sopenharmony_ci	process_cell(tc, cell);
202662306a36Sopenharmony_ci}
202762306a36Sopenharmony_ci
202862306a36Sopenharmony_cistatic void __process_bio_read_only(struct thin_c *tc, struct bio *bio,
202962306a36Sopenharmony_ci				    struct dm_bio_prison_cell *cell)
203062306a36Sopenharmony_ci{
203162306a36Sopenharmony_ci	int r;
203262306a36Sopenharmony_ci	int rw = bio_data_dir(bio);
203362306a36Sopenharmony_ci	dm_block_t block = get_bio_block(tc, bio);
203462306a36Sopenharmony_ci	struct dm_thin_lookup_result lookup_result;
203562306a36Sopenharmony_ci
203662306a36Sopenharmony_ci	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
203762306a36Sopenharmony_ci	switch (r) {
203862306a36Sopenharmony_ci	case 0:
203962306a36Sopenharmony_ci		if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size) {
204062306a36Sopenharmony_ci			handle_unserviceable_bio(tc->pool, bio);
204162306a36Sopenharmony_ci			if (cell)
204262306a36Sopenharmony_ci				cell_defer_no_holder(tc, cell);
204362306a36Sopenharmony_ci		} else {
204462306a36Sopenharmony_ci			inc_all_io_entry(tc->pool, bio);
204562306a36Sopenharmony_ci			remap_and_issue(tc, bio, lookup_result.block);
204662306a36Sopenharmony_ci			if (cell)
204762306a36Sopenharmony_ci				inc_remap_and_issue_cell(tc, cell, lookup_result.block);
204862306a36Sopenharmony_ci		}
204962306a36Sopenharmony_ci		break;
205062306a36Sopenharmony_ci
205162306a36Sopenharmony_ci	case -ENODATA:
205262306a36Sopenharmony_ci		if (cell)
205362306a36Sopenharmony_ci			cell_defer_no_holder(tc, cell);
205462306a36Sopenharmony_ci		if (rw != READ) {
205562306a36Sopenharmony_ci			handle_unserviceable_bio(tc->pool, bio);
205662306a36Sopenharmony_ci			break;
205762306a36Sopenharmony_ci		}
205862306a36Sopenharmony_ci
205962306a36Sopenharmony_ci		if (tc->origin_dev) {
206062306a36Sopenharmony_ci			inc_all_io_entry(tc->pool, bio);
206162306a36Sopenharmony_ci			remap_to_origin_and_issue(tc, bio);
206262306a36Sopenharmony_ci			break;
206362306a36Sopenharmony_ci		}
206462306a36Sopenharmony_ci
206562306a36Sopenharmony_ci		zero_fill_bio(bio);
206662306a36Sopenharmony_ci		bio_endio(bio);
206762306a36Sopenharmony_ci		break;
206862306a36Sopenharmony_ci
206962306a36Sopenharmony_ci	default:
207062306a36Sopenharmony_ci		DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
207162306a36Sopenharmony_ci			    __func__, r);
207262306a36Sopenharmony_ci		if (cell)
207362306a36Sopenharmony_ci			cell_defer_no_holder(tc, cell);
207462306a36Sopenharmony_ci		bio_io_error(bio);
207562306a36Sopenharmony_ci		break;
207662306a36Sopenharmony_ci	}
207762306a36Sopenharmony_ci}
207862306a36Sopenharmony_ci
207962306a36Sopenharmony_cistatic void process_bio_read_only(struct thin_c *tc, struct bio *bio)
208062306a36Sopenharmony_ci{
208162306a36Sopenharmony_ci	__process_bio_read_only(tc, bio, NULL);
208262306a36Sopenharmony_ci}
208362306a36Sopenharmony_ci
208462306a36Sopenharmony_cistatic void process_cell_read_only(struct thin_c *tc, struct dm_bio_prison_cell *cell)
208562306a36Sopenharmony_ci{
208662306a36Sopenharmony_ci	__process_bio_read_only(tc, cell->holder, cell);
208762306a36Sopenharmony_ci}
208862306a36Sopenharmony_ci
208962306a36Sopenharmony_cistatic void process_bio_success(struct thin_c *tc, struct bio *bio)
209062306a36Sopenharmony_ci{
209162306a36Sopenharmony_ci	bio_endio(bio);
209262306a36Sopenharmony_ci}
209362306a36Sopenharmony_ci
209462306a36Sopenharmony_cistatic void process_bio_fail(struct thin_c *tc, struct bio *bio)
209562306a36Sopenharmony_ci{
209662306a36Sopenharmony_ci	bio_io_error(bio);
209762306a36Sopenharmony_ci}
209862306a36Sopenharmony_ci
209962306a36Sopenharmony_cistatic void process_cell_success(struct thin_c *tc, struct dm_bio_prison_cell *cell)
210062306a36Sopenharmony_ci{
210162306a36Sopenharmony_ci	cell_success(tc->pool, cell);
210262306a36Sopenharmony_ci}
210362306a36Sopenharmony_ci
210462306a36Sopenharmony_cistatic void process_cell_fail(struct thin_c *tc, struct dm_bio_prison_cell *cell)
210562306a36Sopenharmony_ci{
210662306a36Sopenharmony_ci	cell_error(tc->pool, cell);
210762306a36Sopenharmony_ci}
210862306a36Sopenharmony_ci
210962306a36Sopenharmony_ci/*
211062306a36Sopenharmony_ci * FIXME: should we also commit due to size of transaction, measured in
211162306a36Sopenharmony_ci * metadata blocks?
211262306a36Sopenharmony_ci */
211362306a36Sopenharmony_cistatic int need_commit_due_to_time(struct pool *pool)
211462306a36Sopenharmony_ci{
211562306a36Sopenharmony_ci	return !time_in_range(jiffies, pool->last_commit_jiffies,
211662306a36Sopenharmony_ci			      pool->last_commit_jiffies + COMMIT_PERIOD);
211762306a36Sopenharmony_ci}
211862306a36Sopenharmony_ci
211962306a36Sopenharmony_ci#define thin_pbd(node) rb_entry((node), struct dm_thin_endio_hook, rb_node)
212062306a36Sopenharmony_ci#define thin_bio(pbd) dm_bio_from_per_bio_data((pbd), sizeof(struct dm_thin_endio_hook))
212162306a36Sopenharmony_ci
212262306a36Sopenharmony_cistatic void __thin_bio_rb_add(struct thin_c *tc, struct bio *bio)
212362306a36Sopenharmony_ci{
212462306a36Sopenharmony_ci	struct rb_node **rbp, *parent;
212562306a36Sopenharmony_ci	struct dm_thin_endio_hook *pbd;
212662306a36Sopenharmony_ci	sector_t bi_sector = bio->bi_iter.bi_sector;
212762306a36Sopenharmony_ci
212862306a36Sopenharmony_ci	rbp = &tc->sort_bio_list.rb_node;
212962306a36Sopenharmony_ci	parent = NULL;
213062306a36Sopenharmony_ci	while (*rbp) {
213162306a36Sopenharmony_ci		parent = *rbp;
213262306a36Sopenharmony_ci		pbd = thin_pbd(parent);
213362306a36Sopenharmony_ci
213462306a36Sopenharmony_ci		if (bi_sector < thin_bio(pbd)->bi_iter.bi_sector)
213562306a36Sopenharmony_ci			rbp = &(*rbp)->rb_left;
213662306a36Sopenharmony_ci		else
213762306a36Sopenharmony_ci			rbp = &(*rbp)->rb_right;
213862306a36Sopenharmony_ci	}
213962306a36Sopenharmony_ci
214062306a36Sopenharmony_ci	pbd = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
214162306a36Sopenharmony_ci	rb_link_node(&pbd->rb_node, parent, rbp);
214262306a36Sopenharmony_ci	rb_insert_color(&pbd->rb_node, &tc->sort_bio_list);
214362306a36Sopenharmony_ci}
214462306a36Sopenharmony_ci
214562306a36Sopenharmony_cistatic void __extract_sorted_bios(struct thin_c *tc)
214662306a36Sopenharmony_ci{
214762306a36Sopenharmony_ci	struct rb_node *node;
214862306a36Sopenharmony_ci	struct dm_thin_endio_hook *pbd;
214962306a36Sopenharmony_ci	struct bio *bio;
215062306a36Sopenharmony_ci
215162306a36Sopenharmony_ci	for (node = rb_first(&tc->sort_bio_list); node; node = rb_next(node)) {
215262306a36Sopenharmony_ci		pbd = thin_pbd(node);
215362306a36Sopenharmony_ci		bio = thin_bio(pbd);
215462306a36Sopenharmony_ci
215562306a36Sopenharmony_ci		bio_list_add(&tc->deferred_bio_list, bio);
215662306a36Sopenharmony_ci		rb_erase(&pbd->rb_node, &tc->sort_bio_list);
215762306a36Sopenharmony_ci	}
215862306a36Sopenharmony_ci
215962306a36Sopenharmony_ci	WARN_ON(!RB_EMPTY_ROOT(&tc->sort_bio_list));
216062306a36Sopenharmony_ci}
216162306a36Sopenharmony_ci
216262306a36Sopenharmony_cistatic void __sort_thin_deferred_bios(struct thin_c *tc)
216362306a36Sopenharmony_ci{
216462306a36Sopenharmony_ci	struct bio *bio;
216562306a36Sopenharmony_ci	struct bio_list bios;
216662306a36Sopenharmony_ci
216762306a36Sopenharmony_ci	bio_list_init(&bios);
216862306a36Sopenharmony_ci	bio_list_merge(&bios, &tc->deferred_bio_list);
216962306a36Sopenharmony_ci	bio_list_init(&tc->deferred_bio_list);
217062306a36Sopenharmony_ci
217162306a36Sopenharmony_ci	/* Sort deferred_bio_list using rb-tree */
217262306a36Sopenharmony_ci	while ((bio = bio_list_pop(&bios)))
217362306a36Sopenharmony_ci		__thin_bio_rb_add(tc, bio);
217462306a36Sopenharmony_ci
217562306a36Sopenharmony_ci	/*
217662306a36Sopenharmony_ci	 * Transfer the sorted bios in sort_bio_list back to
217762306a36Sopenharmony_ci	 * deferred_bio_list to allow lockless submission of
217862306a36Sopenharmony_ci	 * all bios.
217962306a36Sopenharmony_ci	 */
218062306a36Sopenharmony_ci	__extract_sorted_bios(tc);
218162306a36Sopenharmony_ci}
218262306a36Sopenharmony_ci
218362306a36Sopenharmony_cistatic void process_thin_deferred_bios(struct thin_c *tc)
218462306a36Sopenharmony_ci{
218562306a36Sopenharmony_ci	struct pool *pool = tc->pool;
218662306a36Sopenharmony_ci	struct bio *bio;
218762306a36Sopenharmony_ci	struct bio_list bios;
218862306a36Sopenharmony_ci	struct blk_plug plug;
218962306a36Sopenharmony_ci	unsigned int count = 0;
219062306a36Sopenharmony_ci
219162306a36Sopenharmony_ci	if (tc->requeue_mode) {
219262306a36Sopenharmony_ci		error_thin_bio_list(tc, &tc->deferred_bio_list,
219362306a36Sopenharmony_ci				BLK_STS_DM_REQUEUE);
219462306a36Sopenharmony_ci		return;
219562306a36Sopenharmony_ci	}
219662306a36Sopenharmony_ci
219762306a36Sopenharmony_ci	bio_list_init(&bios);
219862306a36Sopenharmony_ci
219962306a36Sopenharmony_ci	spin_lock_irq(&tc->lock);
220062306a36Sopenharmony_ci
220162306a36Sopenharmony_ci	if (bio_list_empty(&tc->deferred_bio_list)) {
220262306a36Sopenharmony_ci		spin_unlock_irq(&tc->lock);
220362306a36Sopenharmony_ci		return;
220462306a36Sopenharmony_ci	}
220562306a36Sopenharmony_ci
220662306a36Sopenharmony_ci	__sort_thin_deferred_bios(tc);
220762306a36Sopenharmony_ci
220862306a36Sopenharmony_ci	bio_list_merge(&bios, &tc->deferred_bio_list);
220962306a36Sopenharmony_ci	bio_list_init(&tc->deferred_bio_list);
221062306a36Sopenharmony_ci
221162306a36Sopenharmony_ci	spin_unlock_irq(&tc->lock);
221262306a36Sopenharmony_ci
221362306a36Sopenharmony_ci	blk_start_plug(&plug);
221462306a36Sopenharmony_ci	while ((bio = bio_list_pop(&bios))) {
221562306a36Sopenharmony_ci		/*
221662306a36Sopenharmony_ci		 * If we've got no free new_mapping structs, and processing
221762306a36Sopenharmony_ci		 * this bio might require one, we pause until there are some
221862306a36Sopenharmony_ci		 * prepared mappings to process.
221962306a36Sopenharmony_ci		 */
222062306a36Sopenharmony_ci		if (ensure_next_mapping(pool)) {
222162306a36Sopenharmony_ci			spin_lock_irq(&tc->lock);
222262306a36Sopenharmony_ci			bio_list_add(&tc->deferred_bio_list, bio);
222362306a36Sopenharmony_ci			bio_list_merge(&tc->deferred_bio_list, &bios);
222462306a36Sopenharmony_ci			spin_unlock_irq(&tc->lock);
222562306a36Sopenharmony_ci			break;
222662306a36Sopenharmony_ci		}
222762306a36Sopenharmony_ci
222862306a36Sopenharmony_ci		if (bio_op(bio) == REQ_OP_DISCARD)
222962306a36Sopenharmony_ci			pool->process_discard(tc, bio);
223062306a36Sopenharmony_ci		else
223162306a36Sopenharmony_ci			pool->process_bio(tc, bio);
223262306a36Sopenharmony_ci
223362306a36Sopenharmony_ci		if ((count++ & 127) == 0) {
223462306a36Sopenharmony_ci			throttle_work_update(&pool->throttle);
223562306a36Sopenharmony_ci			dm_pool_issue_prefetches(pool->pmd);
223662306a36Sopenharmony_ci		}
223762306a36Sopenharmony_ci		cond_resched();
223862306a36Sopenharmony_ci	}
223962306a36Sopenharmony_ci	blk_finish_plug(&plug);
224062306a36Sopenharmony_ci}
224162306a36Sopenharmony_ci
224262306a36Sopenharmony_cistatic int cmp_cells(const void *lhs, const void *rhs)
224362306a36Sopenharmony_ci{
224462306a36Sopenharmony_ci	struct dm_bio_prison_cell *lhs_cell = *((struct dm_bio_prison_cell **) lhs);
224562306a36Sopenharmony_ci	struct dm_bio_prison_cell *rhs_cell = *((struct dm_bio_prison_cell **) rhs);
224662306a36Sopenharmony_ci
224762306a36Sopenharmony_ci	BUG_ON(!lhs_cell->holder);
224862306a36Sopenharmony_ci	BUG_ON(!rhs_cell->holder);
224962306a36Sopenharmony_ci
225062306a36Sopenharmony_ci	if (lhs_cell->holder->bi_iter.bi_sector < rhs_cell->holder->bi_iter.bi_sector)
225162306a36Sopenharmony_ci		return -1;
225262306a36Sopenharmony_ci
225362306a36Sopenharmony_ci	if (lhs_cell->holder->bi_iter.bi_sector > rhs_cell->holder->bi_iter.bi_sector)
225462306a36Sopenharmony_ci		return 1;
225562306a36Sopenharmony_ci
225662306a36Sopenharmony_ci	return 0;
225762306a36Sopenharmony_ci}
225862306a36Sopenharmony_ci
225962306a36Sopenharmony_cistatic unsigned int sort_cells(struct pool *pool, struct list_head *cells)
226062306a36Sopenharmony_ci{
226162306a36Sopenharmony_ci	unsigned int count = 0;
226262306a36Sopenharmony_ci	struct dm_bio_prison_cell *cell, *tmp;
226362306a36Sopenharmony_ci
226462306a36Sopenharmony_ci	list_for_each_entry_safe(cell, tmp, cells, user_list) {
226562306a36Sopenharmony_ci		if (count >= CELL_SORT_ARRAY_SIZE)
226662306a36Sopenharmony_ci			break;
226762306a36Sopenharmony_ci
226862306a36Sopenharmony_ci		pool->cell_sort_array[count++] = cell;
226962306a36Sopenharmony_ci		list_del(&cell->user_list);
227062306a36Sopenharmony_ci	}
227162306a36Sopenharmony_ci
227262306a36Sopenharmony_ci	sort(pool->cell_sort_array, count, sizeof(cell), cmp_cells, NULL);
227362306a36Sopenharmony_ci
227462306a36Sopenharmony_ci	return count;
227562306a36Sopenharmony_ci}
227662306a36Sopenharmony_ci
227762306a36Sopenharmony_cistatic void process_thin_deferred_cells(struct thin_c *tc)
227862306a36Sopenharmony_ci{
227962306a36Sopenharmony_ci	struct pool *pool = tc->pool;
228062306a36Sopenharmony_ci	struct list_head cells;
228162306a36Sopenharmony_ci	struct dm_bio_prison_cell *cell;
228262306a36Sopenharmony_ci	unsigned int i, j, count;
228362306a36Sopenharmony_ci
228462306a36Sopenharmony_ci	INIT_LIST_HEAD(&cells);
228562306a36Sopenharmony_ci
228662306a36Sopenharmony_ci	spin_lock_irq(&tc->lock);
228762306a36Sopenharmony_ci	list_splice_init(&tc->deferred_cells, &cells);
228862306a36Sopenharmony_ci	spin_unlock_irq(&tc->lock);
228962306a36Sopenharmony_ci
229062306a36Sopenharmony_ci	if (list_empty(&cells))
229162306a36Sopenharmony_ci		return;
229262306a36Sopenharmony_ci
229362306a36Sopenharmony_ci	do {
229462306a36Sopenharmony_ci		count = sort_cells(tc->pool, &cells);
229562306a36Sopenharmony_ci
229662306a36Sopenharmony_ci		for (i = 0; i < count; i++) {
229762306a36Sopenharmony_ci			cell = pool->cell_sort_array[i];
229862306a36Sopenharmony_ci			BUG_ON(!cell->holder);
229962306a36Sopenharmony_ci
230062306a36Sopenharmony_ci			/*
230162306a36Sopenharmony_ci			 * If we've got no free new_mapping structs, and processing
230262306a36Sopenharmony_ci			 * this bio might require one, we pause until there are some
230362306a36Sopenharmony_ci			 * prepared mappings to process.
230462306a36Sopenharmony_ci			 */
230562306a36Sopenharmony_ci			if (ensure_next_mapping(pool)) {
230662306a36Sopenharmony_ci				for (j = i; j < count; j++)
230762306a36Sopenharmony_ci					list_add(&pool->cell_sort_array[j]->user_list, &cells);
230862306a36Sopenharmony_ci
230962306a36Sopenharmony_ci				spin_lock_irq(&tc->lock);
231062306a36Sopenharmony_ci				list_splice(&cells, &tc->deferred_cells);
231162306a36Sopenharmony_ci				spin_unlock_irq(&tc->lock);
231262306a36Sopenharmony_ci				return;
231362306a36Sopenharmony_ci			}
231462306a36Sopenharmony_ci
231562306a36Sopenharmony_ci			if (bio_op(cell->holder) == REQ_OP_DISCARD)
231662306a36Sopenharmony_ci				pool->process_discard_cell(tc, cell);
231762306a36Sopenharmony_ci			else
231862306a36Sopenharmony_ci				pool->process_cell(tc, cell);
231962306a36Sopenharmony_ci		}
232062306a36Sopenharmony_ci		cond_resched();
232162306a36Sopenharmony_ci	} while (!list_empty(&cells));
232262306a36Sopenharmony_ci}
232362306a36Sopenharmony_ci
232462306a36Sopenharmony_cistatic void thin_get(struct thin_c *tc);
232562306a36Sopenharmony_cistatic void thin_put(struct thin_c *tc);
232662306a36Sopenharmony_ci
232762306a36Sopenharmony_ci/*
232862306a36Sopenharmony_ci * We can't hold rcu_read_lock() around code that can block.  So we
232962306a36Sopenharmony_ci * find a thin with the rcu lock held; bump a refcount; then drop
233062306a36Sopenharmony_ci * the lock.
233162306a36Sopenharmony_ci */
233262306a36Sopenharmony_cistatic struct thin_c *get_first_thin(struct pool *pool)
233362306a36Sopenharmony_ci{
233462306a36Sopenharmony_ci	struct thin_c *tc = NULL;
233562306a36Sopenharmony_ci
233662306a36Sopenharmony_ci	rcu_read_lock();
233762306a36Sopenharmony_ci	if (!list_empty(&pool->active_thins)) {
233862306a36Sopenharmony_ci		tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list);
233962306a36Sopenharmony_ci		thin_get(tc);
234062306a36Sopenharmony_ci	}
234162306a36Sopenharmony_ci	rcu_read_unlock();
234262306a36Sopenharmony_ci
234362306a36Sopenharmony_ci	return tc;
234462306a36Sopenharmony_ci}
234562306a36Sopenharmony_ci
234662306a36Sopenharmony_cistatic struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
234762306a36Sopenharmony_ci{
234862306a36Sopenharmony_ci	struct thin_c *old_tc = tc;
234962306a36Sopenharmony_ci
235062306a36Sopenharmony_ci	rcu_read_lock();
235162306a36Sopenharmony_ci	list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) {
235262306a36Sopenharmony_ci		thin_get(tc);
235362306a36Sopenharmony_ci		thin_put(old_tc);
235462306a36Sopenharmony_ci		rcu_read_unlock();
235562306a36Sopenharmony_ci		return tc;
235662306a36Sopenharmony_ci	}
235762306a36Sopenharmony_ci	thin_put(old_tc);
235862306a36Sopenharmony_ci	rcu_read_unlock();
235962306a36Sopenharmony_ci
236062306a36Sopenharmony_ci	return NULL;
236162306a36Sopenharmony_ci}
236262306a36Sopenharmony_ci
236362306a36Sopenharmony_cistatic void process_deferred_bios(struct pool *pool)
236462306a36Sopenharmony_ci{
236562306a36Sopenharmony_ci	struct bio *bio;
236662306a36Sopenharmony_ci	struct bio_list bios, bio_completions;
236762306a36Sopenharmony_ci	struct thin_c *tc;
236862306a36Sopenharmony_ci
236962306a36Sopenharmony_ci	tc = get_first_thin(pool);
237062306a36Sopenharmony_ci	while (tc) {
237162306a36Sopenharmony_ci		process_thin_deferred_cells(tc);
237262306a36Sopenharmony_ci		process_thin_deferred_bios(tc);
237362306a36Sopenharmony_ci		tc = get_next_thin(pool, tc);
237462306a36Sopenharmony_ci	}
237562306a36Sopenharmony_ci
237662306a36Sopenharmony_ci	/*
237762306a36Sopenharmony_ci	 * If there are any deferred flush bios, we must commit the metadata
237862306a36Sopenharmony_ci	 * before issuing them or signaling their completion.
237962306a36Sopenharmony_ci	 */
238062306a36Sopenharmony_ci	bio_list_init(&bios);
238162306a36Sopenharmony_ci	bio_list_init(&bio_completions);
238262306a36Sopenharmony_ci
238362306a36Sopenharmony_ci	spin_lock_irq(&pool->lock);
238462306a36Sopenharmony_ci	bio_list_merge(&bios, &pool->deferred_flush_bios);
238562306a36Sopenharmony_ci	bio_list_init(&pool->deferred_flush_bios);
238662306a36Sopenharmony_ci
238762306a36Sopenharmony_ci	bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
238862306a36Sopenharmony_ci	bio_list_init(&pool->deferred_flush_completions);
238962306a36Sopenharmony_ci	spin_unlock_irq(&pool->lock);
239062306a36Sopenharmony_ci
239162306a36Sopenharmony_ci	if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
239262306a36Sopenharmony_ci	    !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
239362306a36Sopenharmony_ci		return;
239462306a36Sopenharmony_ci
239562306a36Sopenharmony_ci	if (commit(pool)) {
239662306a36Sopenharmony_ci		bio_list_merge(&bios, &bio_completions);
239762306a36Sopenharmony_ci
239862306a36Sopenharmony_ci		while ((bio = bio_list_pop(&bios)))
239962306a36Sopenharmony_ci			bio_io_error(bio);
240062306a36Sopenharmony_ci		return;
240162306a36Sopenharmony_ci	}
240262306a36Sopenharmony_ci	pool->last_commit_jiffies = jiffies;
240362306a36Sopenharmony_ci
240462306a36Sopenharmony_ci	while ((bio = bio_list_pop(&bio_completions)))
240562306a36Sopenharmony_ci		bio_endio(bio);
240662306a36Sopenharmony_ci
240762306a36Sopenharmony_ci	while ((bio = bio_list_pop(&bios))) {
240862306a36Sopenharmony_ci		/*
240962306a36Sopenharmony_ci		 * The data device was flushed as part of metadata commit,
241062306a36Sopenharmony_ci		 * so complete redundant flushes immediately.
241162306a36Sopenharmony_ci		 */
241262306a36Sopenharmony_ci		if (bio->bi_opf & REQ_PREFLUSH)
241362306a36Sopenharmony_ci			bio_endio(bio);
241462306a36Sopenharmony_ci		else
241562306a36Sopenharmony_ci			dm_submit_bio_remap(bio, NULL);
241662306a36Sopenharmony_ci	}
241762306a36Sopenharmony_ci}
241862306a36Sopenharmony_ci
241962306a36Sopenharmony_cistatic void do_worker(struct work_struct *ws)
242062306a36Sopenharmony_ci{
242162306a36Sopenharmony_ci	struct pool *pool = container_of(ws, struct pool, worker);
242262306a36Sopenharmony_ci
242362306a36Sopenharmony_ci	throttle_work_start(&pool->throttle);
242462306a36Sopenharmony_ci	dm_pool_issue_prefetches(pool->pmd);
242562306a36Sopenharmony_ci	throttle_work_update(&pool->throttle);
242662306a36Sopenharmony_ci	process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping);
242762306a36Sopenharmony_ci	throttle_work_update(&pool->throttle);
242862306a36Sopenharmony_ci	process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
242962306a36Sopenharmony_ci	throttle_work_update(&pool->throttle);
243062306a36Sopenharmony_ci	process_prepared(pool, &pool->prepared_discards_pt2, &pool->process_prepared_discard_pt2);
243162306a36Sopenharmony_ci	throttle_work_update(&pool->throttle);
243262306a36Sopenharmony_ci	process_deferred_bios(pool);
243362306a36Sopenharmony_ci	throttle_work_complete(&pool->throttle);
243462306a36Sopenharmony_ci}
243562306a36Sopenharmony_ci
243662306a36Sopenharmony_ci/*
243762306a36Sopenharmony_ci * We want to commit periodically so that not too much
243862306a36Sopenharmony_ci * unwritten data builds up.
243962306a36Sopenharmony_ci */
244062306a36Sopenharmony_cistatic void do_waker(struct work_struct *ws)
244162306a36Sopenharmony_ci{
244262306a36Sopenharmony_ci	struct pool *pool = container_of(to_delayed_work(ws), struct pool, waker);
244362306a36Sopenharmony_ci
244462306a36Sopenharmony_ci	wake_worker(pool);
244562306a36Sopenharmony_ci	queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
244662306a36Sopenharmony_ci}
244762306a36Sopenharmony_ci
244862306a36Sopenharmony_ci/*
244962306a36Sopenharmony_ci * We're holding onto IO to allow userland time to react.  After the
245062306a36Sopenharmony_ci * timeout either the pool will have been resized (and thus back in
245162306a36Sopenharmony_ci * PM_WRITE mode), or we degrade to PM_OUT_OF_DATA_SPACE w/ error_if_no_space.
245262306a36Sopenharmony_ci */
245362306a36Sopenharmony_cistatic void do_no_space_timeout(struct work_struct *ws)
245462306a36Sopenharmony_ci{
245562306a36Sopenharmony_ci	struct pool *pool = container_of(to_delayed_work(ws), struct pool,
245662306a36Sopenharmony_ci					 no_space_timeout);
245762306a36Sopenharmony_ci
245862306a36Sopenharmony_ci	if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
245962306a36Sopenharmony_ci		pool->pf.error_if_no_space = true;
246062306a36Sopenharmony_ci		notify_of_pool_mode_change(pool);
246162306a36Sopenharmony_ci		error_retry_list_with_code(pool, BLK_STS_NOSPC);
246262306a36Sopenharmony_ci	}
246362306a36Sopenharmony_ci}
246462306a36Sopenharmony_ci
246562306a36Sopenharmony_ci/*----------------------------------------------------------------*/
246662306a36Sopenharmony_ci
246762306a36Sopenharmony_cistruct pool_work {
246862306a36Sopenharmony_ci	struct work_struct worker;
246962306a36Sopenharmony_ci	struct completion complete;
247062306a36Sopenharmony_ci};
247162306a36Sopenharmony_ci
247262306a36Sopenharmony_cistatic struct pool_work *to_pool_work(struct work_struct *ws)
247362306a36Sopenharmony_ci{
247462306a36Sopenharmony_ci	return container_of(ws, struct pool_work, worker);
247562306a36Sopenharmony_ci}
247662306a36Sopenharmony_ci
247762306a36Sopenharmony_cistatic void pool_work_complete(struct pool_work *pw)
247862306a36Sopenharmony_ci{
247962306a36Sopenharmony_ci	complete(&pw->complete);
248062306a36Sopenharmony_ci}
248162306a36Sopenharmony_ci
248262306a36Sopenharmony_cistatic void pool_work_wait(struct pool_work *pw, struct pool *pool,
248362306a36Sopenharmony_ci			   void (*fn)(struct work_struct *))
248462306a36Sopenharmony_ci{
248562306a36Sopenharmony_ci	INIT_WORK_ONSTACK(&pw->worker, fn);
248662306a36Sopenharmony_ci	init_completion(&pw->complete);
248762306a36Sopenharmony_ci	queue_work(pool->wq, &pw->worker);
248862306a36Sopenharmony_ci	wait_for_completion(&pw->complete);
248962306a36Sopenharmony_ci}
249062306a36Sopenharmony_ci
249162306a36Sopenharmony_ci/*----------------------------------------------------------------*/
249262306a36Sopenharmony_ci
249362306a36Sopenharmony_cistruct noflush_work {
249462306a36Sopenharmony_ci	struct pool_work pw;
249562306a36Sopenharmony_ci	struct thin_c *tc;
249662306a36Sopenharmony_ci};
249762306a36Sopenharmony_ci
249862306a36Sopenharmony_cistatic struct noflush_work *to_noflush(struct work_struct *ws)
249962306a36Sopenharmony_ci{
250062306a36Sopenharmony_ci	return container_of(to_pool_work(ws), struct noflush_work, pw);
250162306a36Sopenharmony_ci}
250262306a36Sopenharmony_ci
250362306a36Sopenharmony_cistatic void do_noflush_start(struct work_struct *ws)
250462306a36Sopenharmony_ci{
250562306a36Sopenharmony_ci	struct noflush_work *w = to_noflush(ws);
250662306a36Sopenharmony_ci
250762306a36Sopenharmony_ci	w->tc->requeue_mode = true;
250862306a36Sopenharmony_ci	requeue_io(w->tc);
250962306a36Sopenharmony_ci	pool_work_complete(&w->pw);
251062306a36Sopenharmony_ci}
251162306a36Sopenharmony_ci
251262306a36Sopenharmony_cistatic void do_noflush_stop(struct work_struct *ws)
251362306a36Sopenharmony_ci{
251462306a36Sopenharmony_ci	struct noflush_work *w = to_noflush(ws);
251562306a36Sopenharmony_ci
251662306a36Sopenharmony_ci	w->tc->requeue_mode = false;
251762306a36Sopenharmony_ci	pool_work_complete(&w->pw);
251862306a36Sopenharmony_ci}
251962306a36Sopenharmony_ci
252062306a36Sopenharmony_cistatic void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
252162306a36Sopenharmony_ci{
252262306a36Sopenharmony_ci	struct noflush_work w;
252362306a36Sopenharmony_ci
252462306a36Sopenharmony_ci	w.tc = tc;
252562306a36Sopenharmony_ci	pool_work_wait(&w.pw, tc->pool, fn);
252662306a36Sopenharmony_ci}
252762306a36Sopenharmony_ci
252862306a36Sopenharmony_ci/*----------------------------------------------------------------*/
252962306a36Sopenharmony_ci
253062306a36Sopenharmony_cistatic void set_discard_callbacks(struct pool *pool)
253162306a36Sopenharmony_ci{
253262306a36Sopenharmony_ci	struct pool_c *pt = pool->ti->private;
253362306a36Sopenharmony_ci
253462306a36Sopenharmony_ci	if (pt->adjusted_pf.discard_passdown) {
253562306a36Sopenharmony_ci		pool->process_discard_cell = process_discard_cell_passdown;
253662306a36Sopenharmony_ci		pool->process_prepared_discard = process_prepared_discard_passdown_pt1;
253762306a36Sopenharmony_ci		pool->process_prepared_discard_pt2 = process_prepared_discard_passdown_pt2;
253862306a36Sopenharmony_ci	} else {
253962306a36Sopenharmony_ci		pool->process_discard_cell = process_discard_cell_no_passdown;
254062306a36Sopenharmony_ci		pool->process_prepared_discard = process_prepared_discard_no_passdown;
254162306a36Sopenharmony_ci	}
254262306a36Sopenharmony_ci}
254362306a36Sopenharmony_ci
254462306a36Sopenharmony_cistatic void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
254562306a36Sopenharmony_ci{
254662306a36Sopenharmony_ci	struct pool_c *pt = pool->ti->private;
254762306a36Sopenharmony_ci	bool needs_check = dm_pool_metadata_needs_check(pool->pmd);
254862306a36Sopenharmony_ci	enum pool_mode old_mode = get_pool_mode(pool);
254962306a36Sopenharmony_ci	unsigned long no_space_timeout = READ_ONCE(no_space_timeout_secs) * HZ;
255062306a36Sopenharmony_ci
255162306a36Sopenharmony_ci	/*
255262306a36Sopenharmony_ci	 * Never allow the pool to transition to PM_WRITE mode if user
255362306a36Sopenharmony_ci	 * intervention is required to verify metadata and data consistency.
255462306a36Sopenharmony_ci	 */
255562306a36Sopenharmony_ci	if (new_mode == PM_WRITE && needs_check) {
255662306a36Sopenharmony_ci		DMERR("%s: unable to switch pool to write mode until repaired.",
255762306a36Sopenharmony_ci		      dm_device_name(pool->pool_md));
255862306a36Sopenharmony_ci		if (old_mode != new_mode)
255962306a36Sopenharmony_ci			new_mode = old_mode;
256062306a36Sopenharmony_ci		else
256162306a36Sopenharmony_ci			new_mode = PM_READ_ONLY;
256262306a36Sopenharmony_ci	}
256362306a36Sopenharmony_ci	/*
256462306a36Sopenharmony_ci	 * If we were in PM_FAIL mode, rollback of metadata failed.  We're
256562306a36Sopenharmony_ci	 * not going to recover without a thin_repair.	So we never let the
256662306a36Sopenharmony_ci	 * pool move out of the old mode.
256762306a36Sopenharmony_ci	 */
256862306a36Sopenharmony_ci	if (old_mode == PM_FAIL)
256962306a36Sopenharmony_ci		new_mode = old_mode;
257062306a36Sopenharmony_ci
257162306a36Sopenharmony_ci	switch (new_mode) {
257262306a36Sopenharmony_ci	case PM_FAIL:
257362306a36Sopenharmony_ci		dm_pool_metadata_read_only(pool->pmd);
257462306a36Sopenharmony_ci		pool->process_bio = process_bio_fail;
257562306a36Sopenharmony_ci		pool->process_discard = process_bio_fail;
257662306a36Sopenharmony_ci		pool->process_cell = process_cell_fail;
257762306a36Sopenharmony_ci		pool->process_discard_cell = process_cell_fail;
257862306a36Sopenharmony_ci		pool->process_prepared_mapping = process_prepared_mapping_fail;
257962306a36Sopenharmony_ci		pool->process_prepared_discard = process_prepared_discard_fail;
258062306a36Sopenharmony_ci
258162306a36Sopenharmony_ci		error_retry_list(pool);
258262306a36Sopenharmony_ci		break;
258362306a36Sopenharmony_ci
258462306a36Sopenharmony_ci	case PM_OUT_OF_METADATA_SPACE:
258562306a36Sopenharmony_ci	case PM_READ_ONLY:
258662306a36Sopenharmony_ci		dm_pool_metadata_read_only(pool->pmd);
258762306a36Sopenharmony_ci		pool->process_bio = process_bio_read_only;
258862306a36Sopenharmony_ci		pool->process_discard = process_bio_success;
258962306a36Sopenharmony_ci		pool->process_cell = process_cell_read_only;
259062306a36Sopenharmony_ci		pool->process_discard_cell = process_cell_success;
259162306a36Sopenharmony_ci		pool->process_prepared_mapping = process_prepared_mapping_fail;
259262306a36Sopenharmony_ci		pool->process_prepared_discard = process_prepared_discard_success;
259362306a36Sopenharmony_ci
259462306a36Sopenharmony_ci		error_retry_list(pool);
259562306a36Sopenharmony_ci		break;
259662306a36Sopenharmony_ci
259762306a36Sopenharmony_ci	case PM_OUT_OF_DATA_SPACE:
259862306a36Sopenharmony_ci		/*
259962306a36Sopenharmony_ci		 * Ideally we'd never hit this state; the low water mark
260062306a36Sopenharmony_ci		 * would trigger userland to extend the pool before we
260162306a36Sopenharmony_ci		 * completely run out of data space.  However, many small
260262306a36Sopenharmony_ci		 * IOs to unprovisioned space can consume data space at an
260362306a36Sopenharmony_ci		 * alarming rate.  Adjust your low water mark if you're
260462306a36Sopenharmony_ci		 * frequently seeing this mode.
260562306a36Sopenharmony_ci		 */
260662306a36Sopenharmony_ci		pool->out_of_data_space = true;
260762306a36Sopenharmony_ci		pool->process_bio = process_bio_read_only;
260862306a36Sopenharmony_ci		pool->process_discard = process_discard_bio;
260962306a36Sopenharmony_ci		pool->process_cell = process_cell_read_only;
261062306a36Sopenharmony_ci		pool->process_prepared_mapping = process_prepared_mapping;
261162306a36Sopenharmony_ci		set_discard_callbacks(pool);
261262306a36Sopenharmony_ci
261362306a36Sopenharmony_ci		if (!pool->pf.error_if_no_space && no_space_timeout)
261462306a36Sopenharmony_ci			queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout);
261562306a36Sopenharmony_ci		break;
261662306a36Sopenharmony_ci
261762306a36Sopenharmony_ci	case PM_WRITE:
261862306a36Sopenharmony_ci		if (old_mode == PM_OUT_OF_DATA_SPACE)
261962306a36Sopenharmony_ci			cancel_delayed_work_sync(&pool->no_space_timeout);
262062306a36Sopenharmony_ci		pool->out_of_data_space = false;
262162306a36Sopenharmony_ci		pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space;
262262306a36Sopenharmony_ci		dm_pool_metadata_read_write(pool->pmd);
262362306a36Sopenharmony_ci		pool->process_bio = process_bio;
262462306a36Sopenharmony_ci		pool->process_discard = process_discard_bio;
262562306a36Sopenharmony_ci		pool->process_cell = process_cell;
262662306a36Sopenharmony_ci		pool->process_prepared_mapping = process_prepared_mapping;
262762306a36Sopenharmony_ci		set_discard_callbacks(pool);
262862306a36Sopenharmony_ci		break;
262962306a36Sopenharmony_ci	}
263062306a36Sopenharmony_ci
263162306a36Sopenharmony_ci	pool->pf.mode = new_mode;
263262306a36Sopenharmony_ci	/*
263362306a36Sopenharmony_ci	 * The pool mode may have changed, sync it so bind_control_target()
263462306a36Sopenharmony_ci	 * doesn't cause an unexpected mode transition on resume.
263562306a36Sopenharmony_ci	 */
263662306a36Sopenharmony_ci	pt->adjusted_pf.mode = new_mode;
263762306a36Sopenharmony_ci
263862306a36Sopenharmony_ci	if (old_mode != new_mode)
263962306a36Sopenharmony_ci		notify_of_pool_mode_change(pool);
264062306a36Sopenharmony_ci}
264162306a36Sopenharmony_ci
264262306a36Sopenharmony_cistatic void abort_transaction(struct pool *pool)
264362306a36Sopenharmony_ci{
264462306a36Sopenharmony_ci	const char *dev_name = dm_device_name(pool->pool_md);
264562306a36Sopenharmony_ci
264662306a36Sopenharmony_ci	DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
264762306a36Sopenharmony_ci	if (dm_pool_abort_metadata(pool->pmd)) {
264862306a36Sopenharmony_ci		DMERR("%s: failed to abort metadata transaction", dev_name);
264962306a36Sopenharmony_ci		set_pool_mode(pool, PM_FAIL);
265062306a36Sopenharmony_ci	}
265162306a36Sopenharmony_ci
265262306a36Sopenharmony_ci	if (dm_pool_metadata_set_needs_check(pool->pmd)) {
265362306a36Sopenharmony_ci		DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
265462306a36Sopenharmony_ci		set_pool_mode(pool, PM_FAIL);
265562306a36Sopenharmony_ci	}
265662306a36Sopenharmony_ci}
265762306a36Sopenharmony_ci
265862306a36Sopenharmony_cistatic void metadata_operation_failed(struct pool *pool, const char *op, int r)
265962306a36Sopenharmony_ci{
266062306a36Sopenharmony_ci	DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
266162306a36Sopenharmony_ci		    dm_device_name(pool->pool_md), op, r);
266262306a36Sopenharmony_ci
266362306a36Sopenharmony_ci	abort_transaction(pool);
266462306a36Sopenharmony_ci	set_pool_mode(pool, PM_READ_ONLY);
266562306a36Sopenharmony_ci}
266662306a36Sopenharmony_ci
266762306a36Sopenharmony_ci/*----------------------------------------------------------------*/
266862306a36Sopenharmony_ci
266962306a36Sopenharmony_ci/*
267062306a36Sopenharmony_ci * Mapping functions.
267162306a36Sopenharmony_ci */
267262306a36Sopenharmony_ci
267362306a36Sopenharmony_ci/*
267462306a36Sopenharmony_ci * Called only while mapping a thin bio to hand it over to the workqueue.
267562306a36Sopenharmony_ci */
267662306a36Sopenharmony_cistatic void thin_defer_bio(struct thin_c *tc, struct bio *bio)
267762306a36Sopenharmony_ci{
267862306a36Sopenharmony_ci	struct pool *pool = tc->pool;
267962306a36Sopenharmony_ci
268062306a36Sopenharmony_ci	spin_lock_irq(&tc->lock);
268162306a36Sopenharmony_ci	bio_list_add(&tc->deferred_bio_list, bio);
268262306a36Sopenharmony_ci	spin_unlock_irq(&tc->lock);
268362306a36Sopenharmony_ci
268462306a36Sopenharmony_ci	wake_worker(pool);
268562306a36Sopenharmony_ci}
268662306a36Sopenharmony_ci
268762306a36Sopenharmony_cistatic void thin_defer_bio_with_throttle(struct thin_c *tc, struct bio *bio)
268862306a36Sopenharmony_ci{
268962306a36Sopenharmony_ci	struct pool *pool = tc->pool;
269062306a36Sopenharmony_ci
269162306a36Sopenharmony_ci	throttle_lock(&pool->throttle);
269262306a36Sopenharmony_ci	thin_defer_bio(tc, bio);
269362306a36Sopenharmony_ci	throttle_unlock(&pool->throttle);
269462306a36Sopenharmony_ci}
269562306a36Sopenharmony_ci
269662306a36Sopenharmony_cistatic void thin_defer_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
269762306a36Sopenharmony_ci{
269862306a36Sopenharmony_ci	struct pool *pool = tc->pool;
269962306a36Sopenharmony_ci
270062306a36Sopenharmony_ci	throttle_lock(&pool->throttle);
270162306a36Sopenharmony_ci	spin_lock_irq(&tc->lock);
270262306a36Sopenharmony_ci	list_add_tail(&cell->user_list, &tc->deferred_cells);
270362306a36Sopenharmony_ci	spin_unlock_irq(&tc->lock);
270462306a36Sopenharmony_ci	throttle_unlock(&pool->throttle);
270562306a36Sopenharmony_ci
270662306a36Sopenharmony_ci	wake_worker(pool);
270762306a36Sopenharmony_ci}
270862306a36Sopenharmony_ci
270962306a36Sopenharmony_cistatic void thin_hook_bio(struct thin_c *tc, struct bio *bio)
271062306a36Sopenharmony_ci{
271162306a36Sopenharmony_ci	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
271262306a36Sopenharmony_ci
271362306a36Sopenharmony_ci	h->tc = tc;
271462306a36Sopenharmony_ci	h->shared_read_entry = NULL;
271562306a36Sopenharmony_ci	h->all_io_entry = NULL;
271662306a36Sopenharmony_ci	h->overwrite_mapping = NULL;
271762306a36Sopenharmony_ci	h->cell = NULL;
271862306a36Sopenharmony_ci}
271962306a36Sopenharmony_ci
272062306a36Sopenharmony_ci/*
272162306a36Sopenharmony_ci * Non-blocking function called from the thin target's map function.
272262306a36Sopenharmony_ci */
272362306a36Sopenharmony_cistatic int thin_bio_map(struct dm_target *ti, struct bio *bio)
272462306a36Sopenharmony_ci{
272562306a36Sopenharmony_ci	int r;
272662306a36Sopenharmony_ci	struct thin_c *tc = ti->private;
272762306a36Sopenharmony_ci	dm_block_t block = get_bio_block(tc, bio);
272862306a36Sopenharmony_ci	struct dm_thin_device *td = tc->td;
272962306a36Sopenharmony_ci	struct dm_thin_lookup_result result;
273062306a36Sopenharmony_ci	struct dm_bio_prison_cell *virt_cell, *data_cell;
273162306a36Sopenharmony_ci	struct dm_cell_key key;
273262306a36Sopenharmony_ci
273362306a36Sopenharmony_ci	thin_hook_bio(tc, bio);
273462306a36Sopenharmony_ci
273562306a36Sopenharmony_ci	if (tc->requeue_mode) {
273662306a36Sopenharmony_ci		bio->bi_status = BLK_STS_DM_REQUEUE;
273762306a36Sopenharmony_ci		bio_endio(bio);
273862306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
273962306a36Sopenharmony_ci	}
274062306a36Sopenharmony_ci
274162306a36Sopenharmony_ci	if (get_pool_mode(tc->pool) == PM_FAIL) {
274262306a36Sopenharmony_ci		bio_io_error(bio);
274362306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
274462306a36Sopenharmony_ci	}
274562306a36Sopenharmony_ci
274662306a36Sopenharmony_ci	if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) {
274762306a36Sopenharmony_ci		thin_defer_bio_with_throttle(tc, bio);
274862306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
274962306a36Sopenharmony_ci	}
275062306a36Sopenharmony_ci
275162306a36Sopenharmony_ci	/*
275262306a36Sopenharmony_ci	 * We must hold the virtual cell before doing the lookup, otherwise
275362306a36Sopenharmony_ci	 * there's a race with discard.
275462306a36Sopenharmony_ci	 */
275562306a36Sopenharmony_ci	build_virtual_key(tc->td, block, &key);
275662306a36Sopenharmony_ci	if (bio_detain(tc->pool, &key, bio, &virt_cell))
275762306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
275862306a36Sopenharmony_ci
275962306a36Sopenharmony_ci	r = dm_thin_find_block(td, block, 0, &result);
276062306a36Sopenharmony_ci
276162306a36Sopenharmony_ci	/*
276262306a36Sopenharmony_ci	 * Note that we defer readahead too.
276362306a36Sopenharmony_ci	 */
276462306a36Sopenharmony_ci	switch (r) {
276562306a36Sopenharmony_ci	case 0:
276662306a36Sopenharmony_ci		if (unlikely(result.shared)) {
276762306a36Sopenharmony_ci			/*
276862306a36Sopenharmony_ci			 * We have a race condition here between the
276962306a36Sopenharmony_ci			 * result.shared value returned by the lookup and
277062306a36Sopenharmony_ci			 * snapshot creation, which may cause new
277162306a36Sopenharmony_ci			 * sharing.
277262306a36Sopenharmony_ci			 *
277362306a36Sopenharmony_ci			 * To avoid this always quiesce the origin before
277462306a36Sopenharmony_ci			 * taking the snap.  You want to do this anyway to
277562306a36Sopenharmony_ci			 * ensure a consistent application view
277662306a36Sopenharmony_ci			 * (i.e. lockfs).
277762306a36Sopenharmony_ci			 *
277862306a36Sopenharmony_ci			 * More distant ancestors are irrelevant. The
277962306a36Sopenharmony_ci			 * shared flag will be set in their case.
278062306a36Sopenharmony_ci			 */
278162306a36Sopenharmony_ci			thin_defer_cell(tc, virt_cell);
278262306a36Sopenharmony_ci			return DM_MAPIO_SUBMITTED;
278362306a36Sopenharmony_ci		}
278462306a36Sopenharmony_ci
278562306a36Sopenharmony_ci		build_data_key(tc->td, result.block, &key);
278662306a36Sopenharmony_ci		if (bio_detain(tc->pool, &key, bio, &data_cell)) {
278762306a36Sopenharmony_ci			cell_defer_no_holder(tc, virt_cell);
278862306a36Sopenharmony_ci			return DM_MAPIO_SUBMITTED;
278962306a36Sopenharmony_ci		}
279062306a36Sopenharmony_ci
279162306a36Sopenharmony_ci		inc_all_io_entry(tc->pool, bio);
279262306a36Sopenharmony_ci		cell_defer_no_holder(tc, data_cell);
279362306a36Sopenharmony_ci		cell_defer_no_holder(tc, virt_cell);
279462306a36Sopenharmony_ci
279562306a36Sopenharmony_ci		remap(tc, bio, result.block);
279662306a36Sopenharmony_ci		return DM_MAPIO_REMAPPED;
279762306a36Sopenharmony_ci
279862306a36Sopenharmony_ci	case -ENODATA:
279962306a36Sopenharmony_ci	case -EWOULDBLOCK:
280062306a36Sopenharmony_ci		thin_defer_cell(tc, virt_cell);
280162306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
280262306a36Sopenharmony_ci
280362306a36Sopenharmony_ci	default:
280462306a36Sopenharmony_ci		/*
280562306a36Sopenharmony_ci		 * Must always call bio_io_error on failure.
280662306a36Sopenharmony_ci		 * dm_thin_find_block can fail with -EINVAL if the
280762306a36Sopenharmony_ci		 * pool is switched to fail-io mode.
280862306a36Sopenharmony_ci		 */
280962306a36Sopenharmony_ci		bio_io_error(bio);
281062306a36Sopenharmony_ci		cell_defer_no_holder(tc, virt_cell);
281162306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
281262306a36Sopenharmony_ci	}
281362306a36Sopenharmony_ci}
281462306a36Sopenharmony_ci
281562306a36Sopenharmony_cistatic void requeue_bios(struct pool *pool)
281662306a36Sopenharmony_ci{
281762306a36Sopenharmony_ci	struct thin_c *tc;
281862306a36Sopenharmony_ci
281962306a36Sopenharmony_ci	rcu_read_lock();
282062306a36Sopenharmony_ci	list_for_each_entry_rcu(tc, &pool->active_thins, list) {
282162306a36Sopenharmony_ci		spin_lock_irq(&tc->lock);
282262306a36Sopenharmony_ci		bio_list_merge(&tc->deferred_bio_list, &tc->retry_on_resume_list);
282362306a36Sopenharmony_ci		bio_list_init(&tc->retry_on_resume_list);
282462306a36Sopenharmony_ci		spin_unlock_irq(&tc->lock);
282562306a36Sopenharmony_ci	}
282662306a36Sopenharmony_ci	rcu_read_unlock();
282762306a36Sopenharmony_ci}
282862306a36Sopenharmony_ci
282962306a36Sopenharmony_ci/*
283062306a36Sopenharmony_ci *--------------------------------------------------------------
283162306a36Sopenharmony_ci * Binding of control targets to a pool object
283262306a36Sopenharmony_ci *--------------------------------------------------------------
283362306a36Sopenharmony_ci */
283462306a36Sopenharmony_cistatic bool is_factor(sector_t block_size, uint32_t n)
283562306a36Sopenharmony_ci{
283662306a36Sopenharmony_ci	return !sector_div(block_size, n);
283762306a36Sopenharmony_ci}
283862306a36Sopenharmony_ci
283962306a36Sopenharmony_ci/*
284062306a36Sopenharmony_ci * If discard_passdown was enabled verify that the data device
284162306a36Sopenharmony_ci * supports discards.  Disable discard_passdown if not.
284262306a36Sopenharmony_ci */
284362306a36Sopenharmony_cistatic void disable_discard_passdown_if_not_supported(struct pool_c *pt)
284462306a36Sopenharmony_ci{
284562306a36Sopenharmony_ci	struct pool *pool = pt->pool;
284662306a36Sopenharmony_ci	struct block_device *data_bdev = pt->data_dev->bdev;
284762306a36Sopenharmony_ci	struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
284862306a36Sopenharmony_ci	const char *reason = NULL;
284962306a36Sopenharmony_ci
285062306a36Sopenharmony_ci	if (!pt->adjusted_pf.discard_passdown)
285162306a36Sopenharmony_ci		return;
285262306a36Sopenharmony_ci
285362306a36Sopenharmony_ci	if (!bdev_max_discard_sectors(pt->data_dev->bdev))
285462306a36Sopenharmony_ci		reason = "discard unsupported";
285562306a36Sopenharmony_ci
285662306a36Sopenharmony_ci	else if (data_limits->max_discard_sectors < pool->sectors_per_block)
285762306a36Sopenharmony_ci		reason = "max discard sectors smaller than a block";
285862306a36Sopenharmony_ci
285962306a36Sopenharmony_ci	if (reason) {
286062306a36Sopenharmony_ci		DMWARN("Data device (%pg) %s: Disabling discard passdown.", data_bdev, reason);
286162306a36Sopenharmony_ci		pt->adjusted_pf.discard_passdown = false;
286262306a36Sopenharmony_ci	}
286362306a36Sopenharmony_ci}
286462306a36Sopenharmony_ci
286562306a36Sopenharmony_cistatic int bind_control_target(struct pool *pool, struct dm_target *ti)
286662306a36Sopenharmony_ci{
286762306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
286862306a36Sopenharmony_ci
286962306a36Sopenharmony_ci	/*
287062306a36Sopenharmony_ci	 * We want to make sure that a pool in PM_FAIL mode is never upgraded.
287162306a36Sopenharmony_ci	 */
287262306a36Sopenharmony_ci	enum pool_mode old_mode = get_pool_mode(pool);
287362306a36Sopenharmony_ci	enum pool_mode new_mode = pt->adjusted_pf.mode;
287462306a36Sopenharmony_ci
287562306a36Sopenharmony_ci	/*
287662306a36Sopenharmony_ci	 * Don't change the pool's mode until set_pool_mode() below.
287762306a36Sopenharmony_ci	 * Otherwise the pool's process_* function pointers may
287862306a36Sopenharmony_ci	 * not match the desired pool mode.
287962306a36Sopenharmony_ci	 */
288062306a36Sopenharmony_ci	pt->adjusted_pf.mode = old_mode;
288162306a36Sopenharmony_ci
288262306a36Sopenharmony_ci	pool->ti = ti;
288362306a36Sopenharmony_ci	pool->pf = pt->adjusted_pf;
288462306a36Sopenharmony_ci	pool->low_water_blocks = pt->low_water_blocks;
288562306a36Sopenharmony_ci
288662306a36Sopenharmony_ci	set_pool_mode(pool, new_mode);
288762306a36Sopenharmony_ci
288862306a36Sopenharmony_ci	return 0;
288962306a36Sopenharmony_ci}
289062306a36Sopenharmony_ci
289162306a36Sopenharmony_cistatic void unbind_control_target(struct pool *pool, struct dm_target *ti)
289262306a36Sopenharmony_ci{
289362306a36Sopenharmony_ci	if (pool->ti == ti)
289462306a36Sopenharmony_ci		pool->ti = NULL;
289562306a36Sopenharmony_ci}
289662306a36Sopenharmony_ci
289762306a36Sopenharmony_ci/*
289862306a36Sopenharmony_ci *--------------------------------------------------------------
289962306a36Sopenharmony_ci * Pool creation
290062306a36Sopenharmony_ci *--------------------------------------------------------------
290162306a36Sopenharmony_ci */
290262306a36Sopenharmony_ci/* Initialize pool features. */
290362306a36Sopenharmony_cistatic void pool_features_init(struct pool_features *pf)
290462306a36Sopenharmony_ci{
290562306a36Sopenharmony_ci	pf->mode = PM_WRITE;
290662306a36Sopenharmony_ci	pf->zero_new_blocks = true;
290762306a36Sopenharmony_ci	pf->discard_enabled = true;
290862306a36Sopenharmony_ci	pf->discard_passdown = true;
290962306a36Sopenharmony_ci	pf->error_if_no_space = false;
291062306a36Sopenharmony_ci}
291162306a36Sopenharmony_ci
291262306a36Sopenharmony_cistatic void __pool_destroy(struct pool *pool)
291362306a36Sopenharmony_ci{
291462306a36Sopenharmony_ci	__pool_table_remove(pool);
291562306a36Sopenharmony_ci
291662306a36Sopenharmony_ci	vfree(pool->cell_sort_array);
291762306a36Sopenharmony_ci	if (dm_pool_metadata_close(pool->pmd) < 0)
291862306a36Sopenharmony_ci		DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
291962306a36Sopenharmony_ci
292062306a36Sopenharmony_ci	dm_bio_prison_destroy(pool->prison);
292162306a36Sopenharmony_ci	dm_kcopyd_client_destroy(pool->copier);
292262306a36Sopenharmony_ci
292362306a36Sopenharmony_ci	cancel_delayed_work_sync(&pool->waker);
292462306a36Sopenharmony_ci	cancel_delayed_work_sync(&pool->no_space_timeout);
292562306a36Sopenharmony_ci	if (pool->wq)
292662306a36Sopenharmony_ci		destroy_workqueue(pool->wq);
292762306a36Sopenharmony_ci
292862306a36Sopenharmony_ci	if (pool->next_mapping)
292962306a36Sopenharmony_ci		mempool_free(pool->next_mapping, &pool->mapping_pool);
293062306a36Sopenharmony_ci	mempool_exit(&pool->mapping_pool);
293162306a36Sopenharmony_ci	dm_deferred_set_destroy(pool->shared_read_ds);
293262306a36Sopenharmony_ci	dm_deferred_set_destroy(pool->all_io_ds);
293362306a36Sopenharmony_ci	kfree(pool);
293462306a36Sopenharmony_ci}
293562306a36Sopenharmony_ci
293662306a36Sopenharmony_cistatic struct kmem_cache *_new_mapping_cache;
293762306a36Sopenharmony_ci
293862306a36Sopenharmony_cistatic struct pool *pool_create(struct mapped_device *pool_md,
293962306a36Sopenharmony_ci				struct block_device *metadata_dev,
294062306a36Sopenharmony_ci				struct block_device *data_dev,
294162306a36Sopenharmony_ci				unsigned long block_size,
294262306a36Sopenharmony_ci				int read_only, char **error)
294362306a36Sopenharmony_ci{
294462306a36Sopenharmony_ci	int r;
294562306a36Sopenharmony_ci	void *err_p;
294662306a36Sopenharmony_ci	struct pool *pool;
294762306a36Sopenharmony_ci	struct dm_pool_metadata *pmd;
294862306a36Sopenharmony_ci	bool format_device = read_only ? false : true;
294962306a36Sopenharmony_ci
295062306a36Sopenharmony_ci	pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device);
295162306a36Sopenharmony_ci	if (IS_ERR(pmd)) {
295262306a36Sopenharmony_ci		*error = "Error creating metadata object";
295362306a36Sopenharmony_ci		return (struct pool *)pmd;
295462306a36Sopenharmony_ci	}
295562306a36Sopenharmony_ci
295662306a36Sopenharmony_ci	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
295762306a36Sopenharmony_ci	if (!pool) {
295862306a36Sopenharmony_ci		*error = "Error allocating memory for pool";
295962306a36Sopenharmony_ci		err_p = ERR_PTR(-ENOMEM);
296062306a36Sopenharmony_ci		goto bad_pool;
296162306a36Sopenharmony_ci	}
296262306a36Sopenharmony_ci
296362306a36Sopenharmony_ci	pool->pmd = pmd;
296462306a36Sopenharmony_ci	pool->sectors_per_block = block_size;
296562306a36Sopenharmony_ci	if (block_size & (block_size - 1))
296662306a36Sopenharmony_ci		pool->sectors_per_block_shift = -1;
296762306a36Sopenharmony_ci	else
296862306a36Sopenharmony_ci		pool->sectors_per_block_shift = __ffs(block_size);
296962306a36Sopenharmony_ci	pool->low_water_blocks = 0;
297062306a36Sopenharmony_ci	pool_features_init(&pool->pf);
297162306a36Sopenharmony_ci	pool->prison = dm_bio_prison_create();
297262306a36Sopenharmony_ci	if (!pool->prison) {
297362306a36Sopenharmony_ci		*error = "Error creating pool's bio prison";
297462306a36Sopenharmony_ci		err_p = ERR_PTR(-ENOMEM);
297562306a36Sopenharmony_ci		goto bad_prison;
297662306a36Sopenharmony_ci	}
297762306a36Sopenharmony_ci
297862306a36Sopenharmony_ci	pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
297962306a36Sopenharmony_ci	if (IS_ERR(pool->copier)) {
298062306a36Sopenharmony_ci		r = PTR_ERR(pool->copier);
298162306a36Sopenharmony_ci		*error = "Error creating pool's kcopyd client";
298262306a36Sopenharmony_ci		err_p = ERR_PTR(r);
298362306a36Sopenharmony_ci		goto bad_kcopyd_client;
298462306a36Sopenharmony_ci	}
298562306a36Sopenharmony_ci
298662306a36Sopenharmony_ci	/*
298762306a36Sopenharmony_ci	 * Create singlethreaded workqueue that will service all devices
298862306a36Sopenharmony_ci	 * that use this metadata.
298962306a36Sopenharmony_ci	 */
299062306a36Sopenharmony_ci	pool->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
299162306a36Sopenharmony_ci	if (!pool->wq) {
299262306a36Sopenharmony_ci		*error = "Error creating pool's workqueue";
299362306a36Sopenharmony_ci		err_p = ERR_PTR(-ENOMEM);
299462306a36Sopenharmony_ci		goto bad_wq;
299562306a36Sopenharmony_ci	}
299662306a36Sopenharmony_ci
299762306a36Sopenharmony_ci	throttle_init(&pool->throttle);
299862306a36Sopenharmony_ci	INIT_WORK(&pool->worker, do_worker);
299962306a36Sopenharmony_ci	INIT_DELAYED_WORK(&pool->waker, do_waker);
300062306a36Sopenharmony_ci	INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
300162306a36Sopenharmony_ci	spin_lock_init(&pool->lock);
300262306a36Sopenharmony_ci	bio_list_init(&pool->deferred_flush_bios);
300362306a36Sopenharmony_ci	bio_list_init(&pool->deferred_flush_completions);
300462306a36Sopenharmony_ci	INIT_LIST_HEAD(&pool->prepared_mappings);
300562306a36Sopenharmony_ci	INIT_LIST_HEAD(&pool->prepared_discards);
300662306a36Sopenharmony_ci	INIT_LIST_HEAD(&pool->prepared_discards_pt2);
300762306a36Sopenharmony_ci	INIT_LIST_HEAD(&pool->active_thins);
300862306a36Sopenharmony_ci	pool->low_water_triggered = false;
300962306a36Sopenharmony_ci	pool->suspended = true;
301062306a36Sopenharmony_ci	pool->out_of_data_space = false;
301162306a36Sopenharmony_ci
301262306a36Sopenharmony_ci	pool->shared_read_ds = dm_deferred_set_create();
301362306a36Sopenharmony_ci	if (!pool->shared_read_ds) {
301462306a36Sopenharmony_ci		*error = "Error creating pool's shared read deferred set";
301562306a36Sopenharmony_ci		err_p = ERR_PTR(-ENOMEM);
301662306a36Sopenharmony_ci		goto bad_shared_read_ds;
301762306a36Sopenharmony_ci	}
301862306a36Sopenharmony_ci
301962306a36Sopenharmony_ci	pool->all_io_ds = dm_deferred_set_create();
302062306a36Sopenharmony_ci	if (!pool->all_io_ds) {
302162306a36Sopenharmony_ci		*error = "Error creating pool's all io deferred set";
302262306a36Sopenharmony_ci		err_p = ERR_PTR(-ENOMEM);
302362306a36Sopenharmony_ci		goto bad_all_io_ds;
302462306a36Sopenharmony_ci	}
302562306a36Sopenharmony_ci
302662306a36Sopenharmony_ci	pool->next_mapping = NULL;
302762306a36Sopenharmony_ci	r = mempool_init_slab_pool(&pool->mapping_pool, MAPPING_POOL_SIZE,
302862306a36Sopenharmony_ci				   _new_mapping_cache);
302962306a36Sopenharmony_ci	if (r) {
303062306a36Sopenharmony_ci		*error = "Error creating pool's mapping mempool";
303162306a36Sopenharmony_ci		err_p = ERR_PTR(r);
303262306a36Sopenharmony_ci		goto bad_mapping_pool;
303362306a36Sopenharmony_ci	}
303462306a36Sopenharmony_ci
303562306a36Sopenharmony_ci	pool->cell_sort_array =
303662306a36Sopenharmony_ci		vmalloc(array_size(CELL_SORT_ARRAY_SIZE,
303762306a36Sopenharmony_ci				   sizeof(*pool->cell_sort_array)));
303862306a36Sopenharmony_ci	if (!pool->cell_sort_array) {
303962306a36Sopenharmony_ci		*error = "Error allocating cell sort array";
304062306a36Sopenharmony_ci		err_p = ERR_PTR(-ENOMEM);
304162306a36Sopenharmony_ci		goto bad_sort_array;
304262306a36Sopenharmony_ci	}
304362306a36Sopenharmony_ci
304462306a36Sopenharmony_ci	pool->ref_count = 1;
304562306a36Sopenharmony_ci	pool->last_commit_jiffies = jiffies;
304662306a36Sopenharmony_ci	pool->pool_md = pool_md;
304762306a36Sopenharmony_ci	pool->md_dev = metadata_dev;
304862306a36Sopenharmony_ci	pool->data_dev = data_dev;
304962306a36Sopenharmony_ci	__pool_table_insert(pool);
305062306a36Sopenharmony_ci
305162306a36Sopenharmony_ci	return pool;
305262306a36Sopenharmony_ci
305362306a36Sopenharmony_cibad_sort_array:
305462306a36Sopenharmony_ci	mempool_exit(&pool->mapping_pool);
305562306a36Sopenharmony_cibad_mapping_pool:
305662306a36Sopenharmony_ci	dm_deferred_set_destroy(pool->all_io_ds);
305762306a36Sopenharmony_cibad_all_io_ds:
305862306a36Sopenharmony_ci	dm_deferred_set_destroy(pool->shared_read_ds);
305962306a36Sopenharmony_cibad_shared_read_ds:
306062306a36Sopenharmony_ci	destroy_workqueue(pool->wq);
306162306a36Sopenharmony_cibad_wq:
306262306a36Sopenharmony_ci	dm_kcopyd_client_destroy(pool->copier);
306362306a36Sopenharmony_cibad_kcopyd_client:
306462306a36Sopenharmony_ci	dm_bio_prison_destroy(pool->prison);
306562306a36Sopenharmony_cibad_prison:
306662306a36Sopenharmony_ci	kfree(pool);
306762306a36Sopenharmony_cibad_pool:
306862306a36Sopenharmony_ci	if (dm_pool_metadata_close(pmd))
306962306a36Sopenharmony_ci		DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
307062306a36Sopenharmony_ci
307162306a36Sopenharmony_ci	return err_p;
307262306a36Sopenharmony_ci}
307362306a36Sopenharmony_ci
307462306a36Sopenharmony_cistatic void __pool_inc(struct pool *pool)
307562306a36Sopenharmony_ci{
307662306a36Sopenharmony_ci	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
307762306a36Sopenharmony_ci	pool->ref_count++;
307862306a36Sopenharmony_ci}
307962306a36Sopenharmony_ci
308062306a36Sopenharmony_cistatic void __pool_dec(struct pool *pool)
308162306a36Sopenharmony_ci{
308262306a36Sopenharmony_ci	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
308362306a36Sopenharmony_ci	BUG_ON(!pool->ref_count);
308462306a36Sopenharmony_ci	if (!--pool->ref_count)
308562306a36Sopenharmony_ci		__pool_destroy(pool);
308662306a36Sopenharmony_ci}
308762306a36Sopenharmony_ci
308862306a36Sopenharmony_cistatic struct pool *__pool_find(struct mapped_device *pool_md,
308962306a36Sopenharmony_ci				struct block_device *metadata_dev,
309062306a36Sopenharmony_ci				struct block_device *data_dev,
309162306a36Sopenharmony_ci				unsigned long block_size, int read_only,
309262306a36Sopenharmony_ci				char **error, int *created)
309362306a36Sopenharmony_ci{
309462306a36Sopenharmony_ci	struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
309562306a36Sopenharmony_ci
309662306a36Sopenharmony_ci	if (pool) {
309762306a36Sopenharmony_ci		if (pool->pool_md != pool_md) {
309862306a36Sopenharmony_ci			*error = "metadata device already in use by a pool";
309962306a36Sopenharmony_ci			return ERR_PTR(-EBUSY);
310062306a36Sopenharmony_ci		}
310162306a36Sopenharmony_ci		if (pool->data_dev != data_dev) {
310262306a36Sopenharmony_ci			*error = "data device already in use by a pool";
310362306a36Sopenharmony_ci			return ERR_PTR(-EBUSY);
310462306a36Sopenharmony_ci		}
310562306a36Sopenharmony_ci		__pool_inc(pool);
310662306a36Sopenharmony_ci
310762306a36Sopenharmony_ci	} else {
310862306a36Sopenharmony_ci		pool = __pool_table_lookup(pool_md);
310962306a36Sopenharmony_ci		if (pool) {
311062306a36Sopenharmony_ci			if (pool->md_dev != metadata_dev || pool->data_dev != data_dev) {
311162306a36Sopenharmony_ci				*error = "different pool cannot replace a pool";
311262306a36Sopenharmony_ci				return ERR_PTR(-EINVAL);
311362306a36Sopenharmony_ci			}
311462306a36Sopenharmony_ci			__pool_inc(pool);
311562306a36Sopenharmony_ci
311662306a36Sopenharmony_ci		} else {
311762306a36Sopenharmony_ci			pool = pool_create(pool_md, metadata_dev, data_dev, block_size, read_only, error);
311862306a36Sopenharmony_ci			*created = 1;
311962306a36Sopenharmony_ci		}
312062306a36Sopenharmony_ci	}
312162306a36Sopenharmony_ci
312262306a36Sopenharmony_ci	return pool;
312362306a36Sopenharmony_ci}
312462306a36Sopenharmony_ci
312562306a36Sopenharmony_ci/*
312662306a36Sopenharmony_ci *--------------------------------------------------------------
312762306a36Sopenharmony_ci * Pool target methods
312862306a36Sopenharmony_ci *--------------------------------------------------------------
312962306a36Sopenharmony_ci */
313062306a36Sopenharmony_cistatic void pool_dtr(struct dm_target *ti)
313162306a36Sopenharmony_ci{
313262306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
313362306a36Sopenharmony_ci
313462306a36Sopenharmony_ci	mutex_lock(&dm_thin_pool_table.mutex);
313562306a36Sopenharmony_ci
313662306a36Sopenharmony_ci	unbind_control_target(pt->pool, ti);
313762306a36Sopenharmony_ci	__pool_dec(pt->pool);
313862306a36Sopenharmony_ci	dm_put_device(ti, pt->metadata_dev);
313962306a36Sopenharmony_ci	dm_put_device(ti, pt->data_dev);
314062306a36Sopenharmony_ci	kfree(pt);
314162306a36Sopenharmony_ci
314262306a36Sopenharmony_ci	mutex_unlock(&dm_thin_pool_table.mutex);
314362306a36Sopenharmony_ci}
314462306a36Sopenharmony_ci
314562306a36Sopenharmony_cistatic int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
314662306a36Sopenharmony_ci			       struct dm_target *ti)
314762306a36Sopenharmony_ci{
314862306a36Sopenharmony_ci	int r;
314962306a36Sopenharmony_ci	unsigned int argc;
315062306a36Sopenharmony_ci	const char *arg_name;
315162306a36Sopenharmony_ci
315262306a36Sopenharmony_ci	static const struct dm_arg _args[] = {
315362306a36Sopenharmony_ci		{0, 4, "Invalid number of pool feature arguments"},
315462306a36Sopenharmony_ci	};
315562306a36Sopenharmony_ci
315662306a36Sopenharmony_ci	/*
315762306a36Sopenharmony_ci	 * No feature arguments supplied.
315862306a36Sopenharmony_ci	 */
315962306a36Sopenharmony_ci	if (!as->argc)
316062306a36Sopenharmony_ci		return 0;
316162306a36Sopenharmony_ci
316262306a36Sopenharmony_ci	r = dm_read_arg_group(_args, as, &argc, &ti->error);
316362306a36Sopenharmony_ci	if (r)
316462306a36Sopenharmony_ci		return -EINVAL;
316562306a36Sopenharmony_ci
316662306a36Sopenharmony_ci	while (argc && !r) {
316762306a36Sopenharmony_ci		arg_name = dm_shift_arg(as);
316862306a36Sopenharmony_ci		argc--;
316962306a36Sopenharmony_ci
317062306a36Sopenharmony_ci		if (!strcasecmp(arg_name, "skip_block_zeroing"))
317162306a36Sopenharmony_ci			pf->zero_new_blocks = false;
317262306a36Sopenharmony_ci
317362306a36Sopenharmony_ci		else if (!strcasecmp(arg_name, "ignore_discard"))
317462306a36Sopenharmony_ci			pf->discard_enabled = false;
317562306a36Sopenharmony_ci
317662306a36Sopenharmony_ci		else if (!strcasecmp(arg_name, "no_discard_passdown"))
317762306a36Sopenharmony_ci			pf->discard_passdown = false;
317862306a36Sopenharmony_ci
317962306a36Sopenharmony_ci		else if (!strcasecmp(arg_name, "read_only"))
318062306a36Sopenharmony_ci			pf->mode = PM_READ_ONLY;
318162306a36Sopenharmony_ci
318262306a36Sopenharmony_ci		else if (!strcasecmp(arg_name, "error_if_no_space"))
318362306a36Sopenharmony_ci			pf->error_if_no_space = true;
318462306a36Sopenharmony_ci
318562306a36Sopenharmony_ci		else {
318662306a36Sopenharmony_ci			ti->error = "Unrecognised pool feature requested";
318762306a36Sopenharmony_ci			r = -EINVAL;
318862306a36Sopenharmony_ci			break;
318962306a36Sopenharmony_ci		}
319062306a36Sopenharmony_ci	}
319162306a36Sopenharmony_ci
319262306a36Sopenharmony_ci	return r;
319362306a36Sopenharmony_ci}
319462306a36Sopenharmony_ci
319562306a36Sopenharmony_cistatic void metadata_low_callback(void *context)
319662306a36Sopenharmony_ci{
319762306a36Sopenharmony_ci	struct pool *pool = context;
319862306a36Sopenharmony_ci
319962306a36Sopenharmony_ci	DMWARN("%s: reached low water mark for metadata device: sending event.",
320062306a36Sopenharmony_ci	       dm_device_name(pool->pool_md));
320162306a36Sopenharmony_ci
320262306a36Sopenharmony_ci	dm_table_event(pool->ti->table);
320362306a36Sopenharmony_ci}
320462306a36Sopenharmony_ci
320562306a36Sopenharmony_ci/*
320662306a36Sopenharmony_ci * We need to flush the data device **before** committing the metadata.
320762306a36Sopenharmony_ci *
320862306a36Sopenharmony_ci * This ensures that the data blocks of any newly inserted mappings are
320962306a36Sopenharmony_ci * properly written to non-volatile storage and won't be lost in case of a
321062306a36Sopenharmony_ci * crash.
321162306a36Sopenharmony_ci *
321262306a36Sopenharmony_ci * Failure to do so can result in data corruption in the case of internal or
321362306a36Sopenharmony_ci * external snapshots and in the case of newly provisioned blocks, when block
321462306a36Sopenharmony_ci * zeroing is enabled.
321562306a36Sopenharmony_ci */
321662306a36Sopenharmony_cistatic int metadata_pre_commit_callback(void *context)
321762306a36Sopenharmony_ci{
321862306a36Sopenharmony_ci	struct pool *pool = context;
321962306a36Sopenharmony_ci
322062306a36Sopenharmony_ci	return blkdev_issue_flush(pool->data_dev);
322162306a36Sopenharmony_ci}
322262306a36Sopenharmony_ci
322362306a36Sopenharmony_cistatic sector_t get_dev_size(struct block_device *bdev)
322462306a36Sopenharmony_ci{
322562306a36Sopenharmony_ci	return bdev_nr_sectors(bdev);
322662306a36Sopenharmony_ci}
322762306a36Sopenharmony_ci
322862306a36Sopenharmony_cistatic void warn_if_metadata_device_too_big(struct block_device *bdev)
322962306a36Sopenharmony_ci{
323062306a36Sopenharmony_ci	sector_t metadata_dev_size = get_dev_size(bdev);
323162306a36Sopenharmony_ci
323262306a36Sopenharmony_ci	if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING)
323362306a36Sopenharmony_ci		DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.",
323462306a36Sopenharmony_ci		       bdev, THIN_METADATA_MAX_SECTORS);
323562306a36Sopenharmony_ci}
323662306a36Sopenharmony_ci
323762306a36Sopenharmony_cistatic sector_t get_metadata_dev_size(struct block_device *bdev)
323862306a36Sopenharmony_ci{
323962306a36Sopenharmony_ci	sector_t metadata_dev_size = get_dev_size(bdev);
324062306a36Sopenharmony_ci
324162306a36Sopenharmony_ci	if (metadata_dev_size > THIN_METADATA_MAX_SECTORS)
324262306a36Sopenharmony_ci		metadata_dev_size = THIN_METADATA_MAX_SECTORS;
324362306a36Sopenharmony_ci
324462306a36Sopenharmony_ci	return metadata_dev_size;
324562306a36Sopenharmony_ci}
324662306a36Sopenharmony_ci
324762306a36Sopenharmony_cistatic dm_block_t get_metadata_dev_size_in_blocks(struct block_device *bdev)
324862306a36Sopenharmony_ci{
324962306a36Sopenharmony_ci	sector_t metadata_dev_size = get_metadata_dev_size(bdev);
325062306a36Sopenharmony_ci
325162306a36Sopenharmony_ci	sector_div(metadata_dev_size, THIN_METADATA_BLOCK_SIZE);
325262306a36Sopenharmony_ci
325362306a36Sopenharmony_ci	return metadata_dev_size;
325462306a36Sopenharmony_ci}
325562306a36Sopenharmony_ci
325662306a36Sopenharmony_ci/*
325762306a36Sopenharmony_ci * When a metadata threshold is crossed a dm event is triggered, and
325862306a36Sopenharmony_ci * userland should respond by growing the metadata device.  We could let
325962306a36Sopenharmony_ci * userland set the threshold, like we do with the data threshold, but I'm
326062306a36Sopenharmony_ci * not sure they know enough to do this well.
326162306a36Sopenharmony_ci */
326262306a36Sopenharmony_cistatic dm_block_t calc_metadata_threshold(struct pool_c *pt)
326362306a36Sopenharmony_ci{
326462306a36Sopenharmony_ci	/*
326562306a36Sopenharmony_ci	 * 4M is ample for all ops with the possible exception of thin
326662306a36Sopenharmony_ci	 * device deletion which is harmless if it fails (just retry the
326762306a36Sopenharmony_ci	 * delete after you've grown the device).
326862306a36Sopenharmony_ci	 */
326962306a36Sopenharmony_ci	dm_block_t quarter = get_metadata_dev_size_in_blocks(pt->metadata_dev->bdev) / 4;
327062306a36Sopenharmony_ci
327162306a36Sopenharmony_ci	return min((dm_block_t)1024ULL /* 4M */, quarter);
327262306a36Sopenharmony_ci}
327362306a36Sopenharmony_ci
327462306a36Sopenharmony_ci/*
327562306a36Sopenharmony_ci * thin-pool <metadata dev> <data dev>
327662306a36Sopenharmony_ci *	     <data block size (sectors)>
327762306a36Sopenharmony_ci *	     <low water mark (blocks)>
327862306a36Sopenharmony_ci *	     [<#feature args> [<arg>]*]
327962306a36Sopenharmony_ci *
328062306a36Sopenharmony_ci * Optional feature arguments are:
328162306a36Sopenharmony_ci *	     skip_block_zeroing: skips the zeroing of newly-provisioned blocks.
328262306a36Sopenharmony_ci *	     ignore_discard: disable discard
328362306a36Sopenharmony_ci *	     no_discard_passdown: don't pass discards down to the data device
328462306a36Sopenharmony_ci *	     read_only: Don't allow any changes to be made to the pool metadata.
328562306a36Sopenharmony_ci *	     error_if_no_space: error IOs, instead of queueing, if no space.
328662306a36Sopenharmony_ci */
328762306a36Sopenharmony_cistatic int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
328862306a36Sopenharmony_ci{
328962306a36Sopenharmony_ci	int r, pool_created = 0;
329062306a36Sopenharmony_ci	struct pool_c *pt;
329162306a36Sopenharmony_ci	struct pool *pool;
329262306a36Sopenharmony_ci	struct pool_features pf;
329362306a36Sopenharmony_ci	struct dm_arg_set as;
329462306a36Sopenharmony_ci	struct dm_dev *data_dev;
329562306a36Sopenharmony_ci	unsigned long block_size;
329662306a36Sopenharmony_ci	dm_block_t low_water_blocks;
329762306a36Sopenharmony_ci	struct dm_dev *metadata_dev;
329862306a36Sopenharmony_ci	blk_mode_t metadata_mode;
329962306a36Sopenharmony_ci
330062306a36Sopenharmony_ci	/*
330162306a36Sopenharmony_ci	 * FIXME Remove validation from scope of lock.
330262306a36Sopenharmony_ci	 */
330362306a36Sopenharmony_ci	mutex_lock(&dm_thin_pool_table.mutex);
330462306a36Sopenharmony_ci
330562306a36Sopenharmony_ci	if (argc < 4) {
330662306a36Sopenharmony_ci		ti->error = "Invalid argument count";
330762306a36Sopenharmony_ci		r = -EINVAL;
330862306a36Sopenharmony_ci		goto out_unlock;
330962306a36Sopenharmony_ci	}
331062306a36Sopenharmony_ci
331162306a36Sopenharmony_ci	as.argc = argc;
331262306a36Sopenharmony_ci	as.argv = argv;
331362306a36Sopenharmony_ci
331462306a36Sopenharmony_ci	/* make sure metadata and data are different devices */
331562306a36Sopenharmony_ci	if (!strcmp(argv[0], argv[1])) {
331662306a36Sopenharmony_ci		ti->error = "Error setting metadata or data device";
331762306a36Sopenharmony_ci		r = -EINVAL;
331862306a36Sopenharmony_ci		goto out_unlock;
331962306a36Sopenharmony_ci	}
332062306a36Sopenharmony_ci
332162306a36Sopenharmony_ci	/*
332262306a36Sopenharmony_ci	 * Set default pool features.
332362306a36Sopenharmony_ci	 */
332462306a36Sopenharmony_ci	pool_features_init(&pf);
332562306a36Sopenharmony_ci
332662306a36Sopenharmony_ci	dm_consume_args(&as, 4);
332762306a36Sopenharmony_ci	r = parse_pool_features(&as, &pf, ti);
332862306a36Sopenharmony_ci	if (r)
332962306a36Sopenharmony_ci		goto out_unlock;
333062306a36Sopenharmony_ci
333162306a36Sopenharmony_ci	metadata_mode = BLK_OPEN_READ |
333262306a36Sopenharmony_ci		((pf.mode == PM_READ_ONLY) ? 0 : BLK_OPEN_WRITE);
333362306a36Sopenharmony_ci	r = dm_get_device(ti, argv[0], metadata_mode, &metadata_dev);
333462306a36Sopenharmony_ci	if (r) {
333562306a36Sopenharmony_ci		ti->error = "Error opening metadata block device";
333662306a36Sopenharmony_ci		goto out_unlock;
333762306a36Sopenharmony_ci	}
333862306a36Sopenharmony_ci	warn_if_metadata_device_too_big(metadata_dev->bdev);
333962306a36Sopenharmony_ci
334062306a36Sopenharmony_ci	r = dm_get_device(ti, argv[1], BLK_OPEN_READ | BLK_OPEN_WRITE, &data_dev);
334162306a36Sopenharmony_ci	if (r) {
334262306a36Sopenharmony_ci		ti->error = "Error getting data device";
334362306a36Sopenharmony_ci		goto out_metadata;
334462306a36Sopenharmony_ci	}
334562306a36Sopenharmony_ci
334662306a36Sopenharmony_ci	if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
334762306a36Sopenharmony_ci	    block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
334862306a36Sopenharmony_ci	    block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
334962306a36Sopenharmony_ci	    block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
335062306a36Sopenharmony_ci		ti->error = "Invalid block size";
335162306a36Sopenharmony_ci		r = -EINVAL;
335262306a36Sopenharmony_ci		goto out;
335362306a36Sopenharmony_ci	}
335462306a36Sopenharmony_ci
335562306a36Sopenharmony_ci	if (kstrtoull(argv[3], 10, (unsigned long long *)&low_water_blocks)) {
335662306a36Sopenharmony_ci		ti->error = "Invalid low water mark";
335762306a36Sopenharmony_ci		r = -EINVAL;
335862306a36Sopenharmony_ci		goto out;
335962306a36Sopenharmony_ci	}
336062306a36Sopenharmony_ci
336162306a36Sopenharmony_ci	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
336262306a36Sopenharmony_ci	if (!pt) {
336362306a36Sopenharmony_ci		r = -ENOMEM;
336462306a36Sopenharmony_ci		goto out;
336562306a36Sopenharmony_ci	}
336662306a36Sopenharmony_ci
336762306a36Sopenharmony_ci	pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, data_dev->bdev,
336862306a36Sopenharmony_ci			   block_size, pf.mode == PM_READ_ONLY, &ti->error, &pool_created);
336962306a36Sopenharmony_ci	if (IS_ERR(pool)) {
337062306a36Sopenharmony_ci		r = PTR_ERR(pool);
337162306a36Sopenharmony_ci		goto out_free_pt;
337262306a36Sopenharmony_ci	}
337362306a36Sopenharmony_ci
337462306a36Sopenharmony_ci	/*
337562306a36Sopenharmony_ci	 * 'pool_created' reflects whether this is the first table load.
337662306a36Sopenharmony_ci	 * Top level discard support is not allowed to be changed after
337762306a36Sopenharmony_ci	 * initial load.  This would require a pool reload to trigger thin
337862306a36Sopenharmony_ci	 * device changes.
337962306a36Sopenharmony_ci	 */
338062306a36Sopenharmony_ci	if (!pool_created && pf.discard_enabled != pool->pf.discard_enabled) {
338162306a36Sopenharmony_ci		ti->error = "Discard support cannot be disabled once enabled";
338262306a36Sopenharmony_ci		r = -EINVAL;
338362306a36Sopenharmony_ci		goto out_flags_changed;
338462306a36Sopenharmony_ci	}
338562306a36Sopenharmony_ci
338662306a36Sopenharmony_ci	pt->pool = pool;
338762306a36Sopenharmony_ci	pt->ti = ti;
338862306a36Sopenharmony_ci	pt->metadata_dev = metadata_dev;
338962306a36Sopenharmony_ci	pt->data_dev = data_dev;
339062306a36Sopenharmony_ci	pt->low_water_blocks = low_water_blocks;
339162306a36Sopenharmony_ci	pt->adjusted_pf = pt->requested_pf = pf;
339262306a36Sopenharmony_ci	ti->num_flush_bios = 1;
339362306a36Sopenharmony_ci	ti->limit_swap_bios = true;
339462306a36Sopenharmony_ci
339562306a36Sopenharmony_ci	/*
339662306a36Sopenharmony_ci	 * Only need to enable discards if the pool should pass
339762306a36Sopenharmony_ci	 * them down to the data device.  The thin device's discard
339862306a36Sopenharmony_ci	 * processing will cause mappings to be removed from the btree.
339962306a36Sopenharmony_ci	 */
340062306a36Sopenharmony_ci	if (pf.discard_enabled && pf.discard_passdown) {
340162306a36Sopenharmony_ci		ti->num_discard_bios = 1;
340262306a36Sopenharmony_ci		/*
340362306a36Sopenharmony_ci		 * Setting 'discards_supported' circumvents the normal
340462306a36Sopenharmony_ci		 * stacking of discard limits (this keeps the pool and
340562306a36Sopenharmony_ci		 * thin devices' discard limits consistent).
340662306a36Sopenharmony_ci		 */
340762306a36Sopenharmony_ci		ti->discards_supported = true;
340862306a36Sopenharmony_ci		ti->max_discard_granularity = true;
340962306a36Sopenharmony_ci	}
341062306a36Sopenharmony_ci	ti->private = pt;
341162306a36Sopenharmony_ci
341262306a36Sopenharmony_ci	r = dm_pool_register_metadata_threshold(pt->pool->pmd,
341362306a36Sopenharmony_ci						calc_metadata_threshold(pt),
341462306a36Sopenharmony_ci						metadata_low_callback,
341562306a36Sopenharmony_ci						pool);
341662306a36Sopenharmony_ci	if (r) {
341762306a36Sopenharmony_ci		ti->error = "Error registering metadata threshold";
341862306a36Sopenharmony_ci		goto out_flags_changed;
341962306a36Sopenharmony_ci	}
342062306a36Sopenharmony_ci
342162306a36Sopenharmony_ci	dm_pool_register_pre_commit_callback(pool->pmd,
342262306a36Sopenharmony_ci					     metadata_pre_commit_callback, pool);
342362306a36Sopenharmony_ci
342462306a36Sopenharmony_ci	mutex_unlock(&dm_thin_pool_table.mutex);
342562306a36Sopenharmony_ci
342662306a36Sopenharmony_ci	return 0;
342762306a36Sopenharmony_ci
342862306a36Sopenharmony_ciout_flags_changed:
342962306a36Sopenharmony_ci	__pool_dec(pool);
343062306a36Sopenharmony_ciout_free_pt:
343162306a36Sopenharmony_ci	kfree(pt);
343262306a36Sopenharmony_ciout:
343362306a36Sopenharmony_ci	dm_put_device(ti, data_dev);
343462306a36Sopenharmony_ciout_metadata:
343562306a36Sopenharmony_ci	dm_put_device(ti, metadata_dev);
343662306a36Sopenharmony_ciout_unlock:
343762306a36Sopenharmony_ci	mutex_unlock(&dm_thin_pool_table.mutex);
343862306a36Sopenharmony_ci
343962306a36Sopenharmony_ci	return r;
344062306a36Sopenharmony_ci}
344162306a36Sopenharmony_ci
344262306a36Sopenharmony_cistatic int pool_map(struct dm_target *ti, struct bio *bio)
344362306a36Sopenharmony_ci{
344462306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
344562306a36Sopenharmony_ci	struct pool *pool = pt->pool;
344662306a36Sopenharmony_ci
344762306a36Sopenharmony_ci	/*
344862306a36Sopenharmony_ci	 * As this is a singleton target, ti->begin is always zero.
344962306a36Sopenharmony_ci	 */
345062306a36Sopenharmony_ci	spin_lock_irq(&pool->lock);
345162306a36Sopenharmony_ci	bio_set_dev(bio, pt->data_dev->bdev);
345262306a36Sopenharmony_ci	spin_unlock_irq(&pool->lock);
345362306a36Sopenharmony_ci
345462306a36Sopenharmony_ci	return DM_MAPIO_REMAPPED;
345562306a36Sopenharmony_ci}
345662306a36Sopenharmony_ci
345762306a36Sopenharmony_cistatic int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit)
345862306a36Sopenharmony_ci{
345962306a36Sopenharmony_ci	int r;
346062306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
346162306a36Sopenharmony_ci	struct pool *pool = pt->pool;
346262306a36Sopenharmony_ci	sector_t data_size = ti->len;
346362306a36Sopenharmony_ci	dm_block_t sb_data_size;
346462306a36Sopenharmony_ci
346562306a36Sopenharmony_ci	*need_commit = false;
346662306a36Sopenharmony_ci
346762306a36Sopenharmony_ci	(void) sector_div(data_size, pool->sectors_per_block);
346862306a36Sopenharmony_ci
346962306a36Sopenharmony_ci	r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
347062306a36Sopenharmony_ci	if (r) {
347162306a36Sopenharmony_ci		DMERR("%s: failed to retrieve data device size",
347262306a36Sopenharmony_ci		      dm_device_name(pool->pool_md));
347362306a36Sopenharmony_ci		return r;
347462306a36Sopenharmony_ci	}
347562306a36Sopenharmony_ci
347662306a36Sopenharmony_ci	if (data_size < sb_data_size) {
347762306a36Sopenharmony_ci		DMERR("%s: pool target (%llu blocks) too small: expected %llu",
347862306a36Sopenharmony_ci		      dm_device_name(pool->pool_md),
347962306a36Sopenharmony_ci		      (unsigned long long)data_size, sb_data_size);
348062306a36Sopenharmony_ci		return -EINVAL;
348162306a36Sopenharmony_ci
348262306a36Sopenharmony_ci	} else if (data_size > sb_data_size) {
348362306a36Sopenharmony_ci		if (dm_pool_metadata_needs_check(pool->pmd)) {
348462306a36Sopenharmony_ci			DMERR("%s: unable to grow the data device until repaired.",
348562306a36Sopenharmony_ci			      dm_device_name(pool->pool_md));
348662306a36Sopenharmony_ci			return 0;
348762306a36Sopenharmony_ci		}
348862306a36Sopenharmony_ci
348962306a36Sopenharmony_ci		if (sb_data_size)
349062306a36Sopenharmony_ci			DMINFO("%s: growing the data device from %llu to %llu blocks",
349162306a36Sopenharmony_ci			       dm_device_name(pool->pool_md),
349262306a36Sopenharmony_ci			       sb_data_size, (unsigned long long)data_size);
349362306a36Sopenharmony_ci		r = dm_pool_resize_data_dev(pool->pmd, data_size);
349462306a36Sopenharmony_ci		if (r) {
349562306a36Sopenharmony_ci			metadata_operation_failed(pool, "dm_pool_resize_data_dev", r);
349662306a36Sopenharmony_ci			return r;
349762306a36Sopenharmony_ci		}
349862306a36Sopenharmony_ci
349962306a36Sopenharmony_ci		*need_commit = true;
350062306a36Sopenharmony_ci	}
350162306a36Sopenharmony_ci
350262306a36Sopenharmony_ci	return 0;
350362306a36Sopenharmony_ci}
350462306a36Sopenharmony_ci
350562306a36Sopenharmony_cistatic int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit)
350662306a36Sopenharmony_ci{
350762306a36Sopenharmony_ci	int r;
350862306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
350962306a36Sopenharmony_ci	struct pool *pool = pt->pool;
351062306a36Sopenharmony_ci	dm_block_t metadata_dev_size, sb_metadata_dev_size;
351162306a36Sopenharmony_ci
351262306a36Sopenharmony_ci	*need_commit = false;
351362306a36Sopenharmony_ci
351462306a36Sopenharmony_ci	metadata_dev_size = get_metadata_dev_size_in_blocks(pool->md_dev);
351562306a36Sopenharmony_ci
351662306a36Sopenharmony_ci	r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size);
351762306a36Sopenharmony_ci	if (r) {
351862306a36Sopenharmony_ci		DMERR("%s: failed to retrieve metadata device size",
351962306a36Sopenharmony_ci		      dm_device_name(pool->pool_md));
352062306a36Sopenharmony_ci		return r;
352162306a36Sopenharmony_ci	}
352262306a36Sopenharmony_ci
352362306a36Sopenharmony_ci	if (metadata_dev_size < sb_metadata_dev_size) {
352462306a36Sopenharmony_ci		DMERR("%s: metadata device (%llu blocks) too small: expected %llu",
352562306a36Sopenharmony_ci		      dm_device_name(pool->pool_md),
352662306a36Sopenharmony_ci		      metadata_dev_size, sb_metadata_dev_size);
352762306a36Sopenharmony_ci		return -EINVAL;
352862306a36Sopenharmony_ci
352962306a36Sopenharmony_ci	} else if (metadata_dev_size > sb_metadata_dev_size) {
353062306a36Sopenharmony_ci		if (dm_pool_metadata_needs_check(pool->pmd)) {
353162306a36Sopenharmony_ci			DMERR("%s: unable to grow the metadata device until repaired.",
353262306a36Sopenharmony_ci			      dm_device_name(pool->pool_md));
353362306a36Sopenharmony_ci			return 0;
353462306a36Sopenharmony_ci		}
353562306a36Sopenharmony_ci
353662306a36Sopenharmony_ci		warn_if_metadata_device_too_big(pool->md_dev);
353762306a36Sopenharmony_ci		DMINFO("%s: growing the metadata device from %llu to %llu blocks",
353862306a36Sopenharmony_ci		       dm_device_name(pool->pool_md),
353962306a36Sopenharmony_ci		       sb_metadata_dev_size, metadata_dev_size);
354062306a36Sopenharmony_ci
354162306a36Sopenharmony_ci		if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE)
354262306a36Sopenharmony_ci			set_pool_mode(pool, PM_WRITE);
354362306a36Sopenharmony_ci
354462306a36Sopenharmony_ci		r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
354562306a36Sopenharmony_ci		if (r) {
354662306a36Sopenharmony_ci			metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r);
354762306a36Sopenharmony_ci			return r;
354862306a36Sopenharmony_ci		}
354962306a36Sopenharmony_ci
355062306a36Sopenharmony_ci		*need_commit = true;
355162306a36Sopenharmony_ci	}
355262306a36Sopenharmony_ci
355362306a36Sopenharmony_ci	return 0;
355462306a36Sopenharmony_ci}
355562306a36Sopenharmony_ci
355662306a36Sopenharmony_ci/*
355762306a36Sopenharmony_ci * Retrieves the number of blocks of the data device from
355862306a36Sopenharmony_ci * the superblock and compares it to the actual device size,
355962306a36Sopenharmony_ci * thus resizing the data device in case it has grown.
356062306a36Sopenharmony_ci *
356162306a36Sopenharmony_ci * This both copes with opening preallocated data devices in the ctr
356262306a36Sopenharmony_ci * being followed by a resume
356362306a36Sopenharmony_ci * -and-
356462306a36Sopenharmony_ci * calling the resume method individually after userspace has
356562306a36Sopenharmony_ci * grown the data device in reaction to a table event.
356662306a36Sopenharmony_ci */
356762306a36Sopenharmony_cistatic int pool_preresume(struct dm_target *ti)
356862306a36Sopenharmony_ci{
356962306a36Sopenharmony_ci	int r;
357062306a36Sopenharmony_ci	bool need_commit1, need_commit2;
357162306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
357262306a36Sopenharmony_ci	struct pool *pool = pt->pool;
357362306a36Sopenharmony_ci
357462306a36Sopenharmony_ci	/*
357562306a36Sopenharmony_ci	 * Take control of the pool object.
357662306a36Sopenharmony_ci	 */
357762306a36Sopenharmony_ci	r = bind_control_target(pool, ti);
357862306a36Sopenharmony_ci	if (r)
357962306a36Sopenharmony_ci		goto out;
358062306a36Sopenharmony_ci
358162306a36Sopenharmony_ci	r = maybe_resize_data_dev(ti, &need_commit1);
358262306a36Sopenharmony_ci	if (r)
358362306a36Sopenharmony_ci		goto out;
358462306a36Sopenharmony_ci
358562306a36Sopenharmony_ci	r = maybe_resize_metadata_dev(ti, &need_commit2);
358662306a36Sopenharmony_ci	if (r)
358762306a36Sopenharmony_ci		goto out;
358862306a36Sopenharmony_ci
358962306a36Sopenharmony_ci	if (need_commit1 || need_commit2)
359062306a36Sopenharmony_ci		(void) commit(pool);
359162306a36Sopenharmony_ciout:
359262306a36Sopenharmony_ci	/*
359362306a36Sopenharmony_ci	 * When a thin-pool is PM_FAIL, it cannot be rebuilt if
359462306a36Sopenharmony_ci	 * bio is in deferred list. Therefore need to return 0
359562306a36Sopenharmony_ci	 * to allow pool_resume() to flush IO.
359662306a36Sopenharmony_ci	 */
359762306a36Sopenharmony_ci	if (r && get_pool_mode(pool) == PM_FAIL)
359862306a36Sopenharmony_ci		r = 0;
359962306a36Sopenharmony_ci
360062306a36Sopenharmony_ci	return r;
360162306a36Sopenharmony_ci}
360262306a36Sopenharmony_ci
360362306a36Sopenharmony_cistatic void pool_suspend_active_thins(struct pool *pool)
360462306a36Sopenharmony_ci{
360562306a36Sopenharmony_ci	struct thin_c *tc;
360662306a36Sopenharmony_ci
360762306a36Sopenharmony_ci	/* Suspend all active thin devices */
360862306a36Sopenharmony_ci	tc = get_first_thin(pool);
360962306a36Sopenharmony_ci	while (tc) {
361062306a36Sopenharmony_ci		dm_internal_suspend_noflush(tc->thin_md);
361162306a36Sopenharmony_ci		tc = get_next_thin(pool, tc);
361262306a36Sopenharmony_ci	}
361362306a36Sopenharmony_ci}
361462306a36Sopenharmony_ci
361562306a36Sopenharmony_cistatic void pool_resume_active_thins(struct pool *pool)
361662306a36Sopenharmony_ci{
361762306a36Sopenharmony_ci	struct thin_c *tc;
361862306a36Sopenharmony_ci
361962306a36Sopenharmony_ci	/* Resume all active thin devices */
362062306a36Sopenharmony_ci	tc = get_first_thin(pool);
362162306a36Sopenharmony_ci	while (tc) {
362262306a36Sopenharmony_ci		dm_internal_resume(tc->thin_md);
362362306a36Sopenharmony_ci		tc = get_next_thin(pool, tc);
362462306a36Sopenharmony_ci	}
362562306a36Sopenharmony_ci}
362662306a36Sopenharmony_ci
362762306a36Sopenharmony_cistatic void pool_resume(struct dm_target *ti)
362862306a36Sopenharmony_ci{
362962306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
363062306a36Sopenharmony_ci	struct pool *pool = pt->pool;
363162306a36Sopenharmony_ci
363262306a36Sopenharmony_ci	/*
363362306a36Sopenharmony_ci	 * Must requeue active_thins' bios and then resume
363462306a36Sopenharmony_ci	 * active_thins _before_ clearing 'suspend' flag.
363562306a36Sopenharmony_ci	 */
363662306a36Sopenharmony_ci	requeue_bios(pool);
363762306a36Sopenharmony_ci	pool_resume_active_thins(pool);
363862306a36Sopenharmony_ci
363962306a36Sopenharmony_ci	spin_lock_irq(&pool->lock);
364062306a36Sopenharmony_ci	pool->low_water_triggered = false;
364162306a36Sopenharmony_ci	pool->suspended = false;
364262306a36Sopenharmony_ci	spin_unlock_irq(&pool->lock);
364362306a36Sopenharmony_ci
364462306a36Sopenharmony_ci	do_waker(&pool->waker.work);
364562306a36Sopenharmony_ci}
364662306a36Sopenharmony_ci
364762306a36Sopenharmony_cistatic void pool_presuspend(struct dm_target *ti)
364862306a36Sopenharmony_ci{
364962306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
365062306a36Sopenharmony_ci	struct pool *pool = pt->pool;
365162306a36Sopenharmony_ci
365262306a36Sopenharmony_ci	spin_lock_irq(&pool->lock);
365362306a36Sopenharmony_ci	pool->suspended = true;
365462306a36Sopenharmony_ci	spin_unlock_irq(&pool->lock);
365562306a36Sopenharmony_ci
365662306a36Sopenharmony_ci	pool_suspend_active_thins(pool);
365762306a36Sopenharmony_ci}
365862306a36Sopenharmony_ci
365962306a36Sopenharmony_cistatic void pool_presuspend_undo(struct dm_target *ti)
366062306a36Sopenharmony_ci{
366162306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
366262306a36Sopenharmony_ci	struct pool *pool = pt->pool;
366362306a36Sopenharmony_ci
366462306a36Sopenharmony_ci	pool_resume_active_thins(pool);
366562306a36Sopenharmony_ci
366662306a36Sopenharmony_ci	spin_lock_irq(&pool->lock);
366762306a36Sopenharmony_ci	pool->suspended = false;
366862306a36Sopenharmony_ci	spin_unlock_irq(&pool->lock);
366962306a36Sopenharmony_ci}
367062306a36Sopenharmony_ci
367162306a36Sopenharmony_cistatic void pool_postsuspend(struct dm_target *ti)
367262306a36Sopenharmony_ci{
367362306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
367462306a36Sopenharmony_ci	struct pool *pool = pt->pool;
367562306a36Sopenharmony_ci
367662306a36Sopenharmony_ci	cancel_delayed_work_sync(&pool->waker);
367762306a36Sopenharmony_ci	cancel_delayed_work_sync(&pool->no_space_timeout);
367862306a36Sopenharmony_ci	flush_workqueue(pool->wq);
367962306a36Sopenharmony_ci	(void) commit(pool);
368062306a36Sopenharmony_ci}
368162306a36Sopenharmony_ci
368262306a36Sopenharmony_cistatic int check_arg_count(unsigned int argc, unsigned int args_required)
368362306a36Sopenharmony_ci{
368462306a36Sopenharmony_ci	if (argc != args_required) {
368562306a36Sopenharmony_ci		DMWARN("Message received with %u arguments instead of %u.",
368662306a36Sopenharmony_ci		       argc, args_required);
368762306a36Sopenharmony_ci		return -EINVAL;
368862306a36Sopenharmony_ci	}
368962306a36Sopenharmony_ci
369062306a36Sopenharmony_ci	return 0;
369162306a36Sopenharmony_ci}
369262306a36Sopenharmony_ci
369362306a36Sopenharmony_cistatic int read_dev_id(char *arg, dm_thin_id *dev_id, int warning)
369462306a36Sopenharmony_ci{
369562306a36Sopenharmony_ci	if (!kstrtoull(arg, 10, (unsigned long long *)dev_id) &&
369662306a36Sopenharmony_ci	    *dev_id <= MAX_DEV_ID)
369762306a36Sopenharmony_ci		return 0;
369862306a36Sopenharmony_ci
369962306a36Sopenharmony_ci	if (warning)
370062306a36Sopenharmony_ci		DMWARN("Message received with invalid device id: %s", arg);
370162306a36Sopenharmony_ci
370262306a36Sopenharmony_ci	return -EINVAL;
370362306a36Sopenharmony_ci}
370462306a36Sopenharmony_ci
370562306a36Sopenharmony_cistatic int process_create_thin_mesg(unsigned int argc, char **argv, struct pool *pool)
370662306a36Sopenharmony_ci{
370762306a36Sopenharmony_ci	dm_thin_id dev_id;
370862306a36Sopenharmony_ci	int r;
370962306a36Sopenharmony_ci
371062306a36Sopenharmony_ci	r = check_arg_count(argc, 2);
371162306a36Sopenharmony_ci	if (r)
371262306a36Sopenharmony_ci		return r;
371362306a36Sopenharmony_ci
371462306a36Sopenharmony_ci	r = read_dev_id(argv[1], &dev_id, 1);
371562306a36Sopenharmony_ci	if (r)
371662306a36Sopenharmony_ci		return r;
371762306a36Sopenharmony_ci
371862306a36Sopenharmony_ci	r = dm_pool_create_thin(pool->pmd, dev_id);
371962306a36Sopenharmony_ci	if (r) {
372062306a36Sopenharmony_ci		DMWARN("Creation of new thinly-provisioned device with id %s failed.",
372162306a36Sopenharmony_ci		       argv[1]);
372262306a36Sopenharmony_ci		return r;
372362306a36Sopenharmony_ci	}
372462306a36Sopenharmony_ci
372562306a36Sopenharmony_ci	return 0;
372662306a36Sopenharmony_ci}
372762306a36Sopenharmony_ci
372862306a36Sopenharmony_cistatic int process_create_snap_mesg(unsigned int argc, char **argv, struct pool *pool)
372962306a36Sopenharmony_ci{
373062306a36Sopenharmony_ci	dm_thin_id dev_id;
373162306a36Sopenharmony_ci	dm_thin_id origin_dev_id;
373262306a36Sopenharmony_ci	int r;
373362306a36Sopenharmony_ci
373462306a36Sopenharmony_ci	r = check_arg_count(argc, 3);
373562306a36Sopenharmony_ci	if (r)
373662306a36Sopenharmony_ci		return r;
373762306a36Sopenharmony_ci
373862306a36Sopenharmony_ci	r = read_dev_id(argv[1], &dev_id, 1);
373962306a36Sopenharmony_ci	if (r)
374062306a36Sopenharmony_ci		return r;
374162306a36Sopenharmony_ci
374262306a36Sopenharmony_ci	r = read_dev_id(argv[2], &origin_dev_id, 1);
374362306a36Sopenharmony_ci	if (r)
374462306a36Sopenharmony_ci		return r;
374562306a36Sopenharmony_ci
374662306a36Sopenharmony_ci	r = dm_pool_create_snap(pool->pmd, dev_id, origin_dev_id);
374762306a36Sopenharmony_ci	if (r) {
374862306a36Sopenharmony_ci		DMWARN("Creation of new snapshot %s of device %s failed.",
374962306a36Sopenharmony_ci		       argv[1], argv[2]);
375062306a36Sopenharmony_ci		return r;
375162306a36Sopenharmony_ci	}
375262306a36Sopenharmony_ci
375362306a36Sopenharmony_ci	return 0;
375462306a36Sopenharmony_ci}
375562306a36Sopenharmony_ci
375662306a36Sopenharmony_cistatic int process_delete_mesg(unsigned int argc, char **argv, struct pool *pool)
375762306a36Sopenharmony_ci{
375862306a36Sopenharmony_ci	dm_thin_id dev_id;
375962306a36Sopenharmony_ci	int r;
376062306a36Sopenharmony_ci
376162306a36Sopenharmony_ci	r = check_arg_count(argc, 2);
376262306a36Sopenharmony_ci	if (r)
376362306a36Sopenharmony_ci		return r;
376462306a36Sopenharmony_ci
376562306a36Sopenharmony_ci	r = read_dev_id(argv[1], &dev_id, 1);
376662306a36Sopenharmony_ci	if (r)
376762306a36Sopenharmony_ci		return r;
376862306a36Sopenharmony_ci
376962306a36Sopenharmony_ci	r = dm_pool_delete_thin_device(pool->pmd, dev_id);
377062306a36Sopenharmony_ci	if (r)
377162306a36Sopenharmony_ci		DMWARN("Deletion of thin device %s failed.", argv[1]);
377262306a36Sopenharmony_ci
377362306a36Sopenharmony_ci	return r;
377462306a36Sopenharmony_ci}
377562306a36Sopenharmony_ci
377662306a36Sopenharmony_cistatic int process_set_transaction_id_mesg(unsigned int argc, char **argv, struct pool *pool)
377762306a36Sopenharmony_ci{
377862306a36Sopenharmony_ci	dm_thin_id old_id, new_id;
377962306a36Sopenharmony_ci	int r;
378062306a36Sopenharmony_ci
378162306a36Sopenharmony_ci	r = check_arg_count(argc, 3);
378262306a36Sopenharmony_ci	if (r)
378362306a36Sopenharmony_ci		return r;
378462306a36Sopenharmony_ci
378562306a36Sopenharmony_ci	if (kstrtoull(argv[1], 10, (unsigned long long *)&old_id)) {
378662306a36Sopenharmony_ci		DMWARN("set_transaction_id message: Unrecognised id %s.", argv[1]);
378762306a36Sopenharmony_ci		return -EINVAL;
378862306a36Sopenharmony_ci	}
378962306a36Sopenharmony_ci
379062306a36Sopenharmony_ci	if (kstrtoull(argv[2], 10, (unsigned long long *)&new_id)) {
379162306a36Sopenharmony_ci		DMWARN("set_transaction_id message: Unrecognised new id %s.", argv[2]);
379262306a36Sopenharmony_ci		return -EINVAL;
379362306a36Sopenharmony_ci	}
379462306a36Sopenharmony_ci
379562306a36Sopenharmony_ci	r = dm_pool_set_metadata_transaction_id(pool->pmd, old_id, new_id);
379662306a36Sopenharmony_ci	if (r) {
379762306a36Sopenharmony_ci		DMWARN("Failed to change transaction id from %s to %s.",
379862306a36Sopenharmony_ci		       argv[1], argv[2]);
379962306a36Sopenharmony_ci		return r;
380062306a36Sopenharmony_ci	}
380162306a36Sopenharmony_ci
380262306a36Sopenharmony_ci	return 0;
380362306a36Sopenharmony_ci}
380462306a36Sopenharmony_ci
380562306a36Sopenharmony_cistatic int process_reserve_metadata_snap_mesg(unsigned int argc, char **argv, struct pool *pool)
380662306a36Sopenharmony_ci{
380762306a36Sopenharmony_ci	int r;
380862306a36Sopenharmony_ci
380962306a36Sopenharmony_ci	r = check_arg_count(argc, 1);
381062306a36Sopenharmony_ci	if (r)
381162306a36Sopenharmony_ci		return r;
381262306a36Sopenharmony_ci
381362306a36Sopenharmony_ci	(void) commit(pool);
381462306a36Sopenharmony_ci
381562306a36Sopenharmony_ci	r = dm_pool_reserve_metadata_snap(pool->pmd);
381662306a36Sopenharmony_ci	if (r)
381762306a36Sopenharmony_ci		DMWARN("reserve_metadata_snap message failed.");
381862306a36Sopenharmony_ci
381962306a36Sopenharmony_ci	return r;
382062306a36Sopenharmony_ci}
382162306a36Sopenharmony_ci
382262306a36Sopenharmony_cistatic int process_release_metadata_snap_mesg(unsigned int argc, char **argv, struct pool *pool)
382362306a36Sopenharmony_ci{
382462306a36Sopenharmony_ci	int r;
382562306a36Sopenharmony_ci
382662306a36Sopenharmony_ci	r = check_arg_count(argc, 1);
382762306a36Sopenharmony_ci	if (r)
382862306a36Sopenharmony_ci		return r;
382962306a36Sopenharmony_ci
383062306a36Sopenharmony_ci	r = dm_pool_release_metadata_snap(pool->pmd);
383162306a36Sopenharmony_ci	if (r)
383262306a36Sopenharmony_ci		DMWARN("release_metadata_snap message failed.");
383362306a36Sopenharmony_ci
383462306a36Sopenharmony_ci	return r;
383562306a36Sopenharmony_ci}
383662306a36Sopenharmony_ci
383762306a36Sopenharmony_ci/*
383862306a36Sopenharmony_ci * Messages supported:
383962306a36Sopenharmony_ci *   create_thin	<dev_id>
384062306a36Sopenharmony_ci *   create_snap	<dev_id> <origin_id>
384162306a36Sopenharmony_ci *   delete		<dev_id>
384262306a36Sopenharmony_ci *   set_transaction_id <current_trans_id> <new_trans_id>
384362306a36Sopenharmony_ci *   reserve_metadata_snap
384462306a36Sopenharmony_ci *   release_metadata_snap
384562306a36Sopenharmony_ci */
384662306a36Sopenharmony_cistatic int pool_message(struct dm_target *ti, unsigned int argc, char **argv,
384762306a36Sopenharmony_ci			char *result, unsigned int maxlen)
384862306a36Sopenharmony_ci{
384962306a36Sopenharmony_ci	int r = -EINVAL;
385062306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
385162306a36Sopenharmony_ci	struct pool *pool = pt->pool;
385262306a36Sopenharmony_ci
385362306a36Sopenharmony_ci	if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) {
385462306a36Sopenharmony_ci		DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
385562306a36Sopenharmony_ci		      dm_device_name(pool->pool_md));
385662306a36Sopenharmony_ci		return -EOPNOTSUPP;
385762306a36Sopenharmony_ci	}
385862306a36Sopenharmony_ci
385962306a36Sopenharmony_ci	if (!strcasecmp(argv[0], "create_thin"))
386062306a36Sopenharmony_ci		r = process_create_thin_mesg(argc, argv, pool);
386162306a36Sopenharmony_ci
386262306a36Sopenharmony_ci	else if (!strcasecmp(argv[0], "create_snap"))
386362306a36Sopenharmony_ci		r = process_create_snap_mesg(argc, argv, pool);
386462306a36Sopenharmony_ci
386562306a36Sopenharmony_ci	else if (!strcasecmp(argv[0], "delete"))
386662306a36Sopenharmony_ci		r = process_delete_mesg(argc, argv, pool);
386762306a36Sopenharmony_ci
386862306a36Sopenharmony_ci	else if (!strcasecmp(argv[0], "set_transaction_id"))
386962306a36Sopenharmony_ci		r = process_set_transaction_id_mesg(argc, argv, pool);
387062306a36Sopenharmony_ci
387162306a36Sopenharmony_ci	else if (!strcasecmp(argv[0], "reserve_metadata_snap"))
387262306a36Sopenharmony_ci		r = process_reserve_metadata_snap_mesg(argc, argv, pool);
387362306a36Sopenharmony_ci
387462306a36Sopenharmony_ci	else if (!strcasecmp(argv[0], "release_metadata_snap"))
387562306a36Sopenharmony_ci		r = process_release_metadata_snap_mesg(argc, argv, pool);
387662306a36Sopenharmony_ci
387762306a36Sopenharmony_ci	else
387862306a36Sopenharmony_ci		DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
387962306a36Sopenharmony_ci
388062306a36Sopenharmony_ci	if (!r)
388162306a36Sopenharmony_ci		(void) commit(pool);
388262306a36Sopenharmony_ci
388362306a36Sopenharmony_ci	return r;
388462306a36Sopenharmony_ci}
388562306a36Sopenharmony_ci
388662306a36Sopenharmony_cistatic void emit_flags(struct pool_features *pf, char *result,
388762306a36Sopenharmony_ci		       unsigned int sz, unsigned int maxlen)
388862306a36Sopenharmony_ci{
388962306a36Sopenharmony_ci	unsigned int count = !pf->zero_new_blocks + !pf->discard_enabled +
389062306a36Sopenharmony_ci		!pf->discard_passdown + (pf->mode == PM_READ_ONLY) +
389162306a36Sopenharmony_ci		pf->error_if_no_space;
389262306a36Sopenharmony_ci	DMEMIT("%u ", count);
389362306a36Sopenharmony_ci
389462306a36Sopenharmony_ci	if (!pf->zero_new_blocks)
389562306a36Sopenharmony_ci		DMEMIT("skip_block_zeroing ");
389662306a36Sopenharmony_ci
389762306a36Sopenharmony_ci	if (!pf->discard_enabled)
389862306a36Sopenharmony_ci		DMEMIT("ignore_discard ");
389962306a36Sopenharmony_ci
390062306a36Sopenharmony_ci	if (!pf->discard_passdown)
390162306a36Sopenharmony_ci		DMEMIT("no_discard_passdown ");
390262306a36Sopenharmony_ci
390362306a36Sopenharmony_ci	if (pf->mode == PM_READ_ONLY)
390462306a36Sopenharmony_ci		DMEMIT("read_only ");
390562306a36Sopenharmony_ci
390662306a36Sopenharmony_ci	if (pf->error_if_no_space)
390762306a36Sopenharmony_ci		DMEMIT("error_if_no_space ");
390862306a36Sopenharmony_ci}
390962306a36Sopenharmony_ci
391062306a36Sopenharmony_ci/*
391162306a36Sopenharmony_ci * Status line is:
391262306a36Sopenharmony_ci *    <transaction id> <used metadata sectors>/<total metadata sectors>
391362306a36Sopenharmony_ci *    <used data sectors>/<total data sectors> <held metadata root>
391462306a36Sopenharmony_ci *    <pool mode> <discard config> <no space config> <needs_check>
391562306a36Sopenharmony_ci */
391662306a36Sopenharmony_cistatic void pool_status(struct dm_target *ti, status_type_t type,
391762306a36Sopenharmony_ci			unsigned int status_flags, char *result, unsigned int maxlen)
391862306a36Sopenharmony_ci{
391962306a36Sopenharmony_ci	int r;
392062306a36Sopenharmony_ci	unsigned int sz = 0;
392162306a36Sopenharmony_ci	uint64_t transaction_id;
392262306a36Sopenharmony_ci	dm_block_t nr_free_blocks_data;
392362306a36Sopenharmony_ci	dm_block_t nr_free_blocks_metadata;
392462306a36Sopenharmony_ci	dm_block_t nr_blocks_data;
392562306a36Sopenharmony_ci	dm_block_t nr_blocks_metadata;
392662306a36Sopenharmony_ci	dm_block_t held_root;
392762306a36Sopenharmony_ci	enum pool_mode mode;
392862306a36Sopenharmony_ci	char buf[BDEVNAME_SIZE];
392962306a36Sopenharmony_ci	char buf2[BDEVNAME_SIZE];
393062306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
393162306a36Sopenharmony_ci	struct pool *pool = pt->pool;
393262306a36Sopenharmony_ci
393362306a36Sopenharmony_ci	switch (type) {
393462306a36Sopenharmony_ci	case STATUSTYPE_INFO:
393562306a36Sopenharmony_ci		if (get_pool_mode(pool) == PM_FAIL) {
393662306a36Sopenharmony_ci			DMEMIT("Fail");
393762306a36Sopenharmony_ci			break;
393862306a36Sopenharmony_ci		}
393962306a36Sopenharmony_ci
394062306a36Sopenharmony_ci		/* Commit to ensure statistics aren't out-of-date */
394162306a36Sopenharmony_ci		if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
394262306a36Sopenharmony_ci			(void) commit(pool);
394362306a36Sopenharmony_ci
394462306a36Sopenharmony_ci		r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id);
394562306a36Sopenharmony_ci		if (r) {
394662306a36Sopenharmony_ci			DMERR("%s: dm_pool_get_metadata_transaction_id returned %d",
394762306a36Sopenharmony_ci			      dm_device_name(pool->pool_md), r);
394862306a36Sopenharmony_ci			goto err;
394962306a36Sopenharmony_ci		}
395062306a36Sopenharmony_ci
395162306a36Sopenharmony_ci		r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata);
395262306a36Sopenharmony_ci		if (r) {
395362306a36Sopenharmony_ci			DMERR("%s: dm_pool_get_free_metadata_block_count returned %d",
395462306a36Sopenharmony_ci			      dm_device_name(pool->pool_md), r);
395562306a36Sopenharmony_ci			goto err;
395662306a36Sopenharmony_ci		}
395762306a36Sopenharmony_ci
395862306a36Sopenharmony_ci		r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
395962306a36Sopenharmony_ci		if (r) {
396062306a36Sopenharmony_ci			DMERR("%s: dm_pool_get_metadata_dev_size returned %d",
396162306a36Sopenharmony_ci			      dm_device_name(pool->pool_md), r);
396262306a36Sopenharmony_ci			goto err;
396362306a36Sopenharmony_ci		}
396462306a36Sopenharmony_ci
396562306a36Sopenharmony_ci		r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data);
396662306a36Sopenharmony_ci		if (r) {
396762306a36Sopenharmony_ci			DMERR("%s: dm_pool_get_free_block_count returned %d",
396862306a36Sopenharmony_ci			      dm_device_name(pool->pool_md), r);
396962306a36Sopenharmony_ci			goto err;
397062306a36Sopenharmony_ci		}
397162306a36Sopenharmony_ci
397262306a36Sopenharmony_ci		r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
397362306a36Sopenharmony_ci		if (r) {
397462306a36Sopenharmony_ci			DMERR("%s: dm_pool_get_data_dev_size returned %d",
397562306a36Sopenharmony_ci			      dm_device_name(pool->pool_md), r);
397662306a36Sopenharmony_ci			goto err;
397762306a36Sopenharmony_ci		}
397862306a36Sopenharmony_ci
397962306a36Sopenharmony_ci		r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
398062306a36Sopenharmony_ci		if (r) {
398162306a36Sopenharmony_ci			DMERR("%s: dm_pool_get_metadata_snap returned %d",
398262306a36Sopenharmony_ci			      dm_device_name(pool->pool_md), r);
398362306a36Sopenharmony_ci			goto err;
398462306a36Sopenharmony_ci		}
398562306a36Sopenharmony_ci
398662306a36Sopenharmony_ci		DMEMIT("%llu %llu/%llu %llu/%llu ",
398762306a36Sopenharmony_ci		       (unsigned long long)transaction_id,
398862306a36Sopenharmony_ci		       (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
398962306a36Sopenharmony_ci		       (unsigned long long)nr_blocks_metadata,
399062306a36Sopenharmony_ci		       (unsigned long long)(nr_blocks_data - nr_free_blocks_data),
399162306a36Sopenharmony_ci		       (unsigned long long)nr_blocks_data);
399262306a36Sopenharmony_ci
399362306a36Sopenharmony_ci		if (held_root)
399462306a36Sopenharmony_ci			DMEMIT("%llu ", held_root);
399562306a36Sopenharmony_ci		else
399662306a36Sopenharmony_ci			DMEMIT("- ");
399762306a36Sopenharmony_ci
399862306a36Sopenharmony_ci		mode = get_pool_mode(pool);
399962306a36Sopenharmony_ci		if (mode == PM_OUT_OF_DATA_SPACE)
400062306a36Sopenharmony_ci			DMEMIT("out_of_data_space ");
400162306a36Sopenharmony_ci		else if (is_read_only_pool_mode(mode))
400262306a36Sopenharmony_ci			DMEMIT("ro ");
400362306a36Sopenharmony_ci		else
400462306a36Sopenharmony_ci			DMEMIT("rw ");
400562306a36Sopenharmony_ci
400662306a36Sopenharmony_ci		if (!pool->pf.discard_enabled)
400762306a36Sopenharmony_ci			DMEMIT("ignore_discard ");
400862306a36Sopenharmony_ci		else if (pool->pf.discard_passdown)
400962306a36Sopenharmony_ci			DMEMIT("discard_passdown ");
401062306a36Sopenharmony_ci		else
401162306a36Sopenharmony_ci			DMEMIT("no_discard_passdown ");
401262306a36Sopenharmony_ci
401362306a36Sopenharmony_ci		if (pool->pf.error_if_no_space)
401462306a36Sopenharmony_ci			DMEMIT("error_if_no_space ");
401562306a36Sopenharmony_ci		else
401662306a36Sopenharmony_ci			DMEMIT("queue_if_no_space ");
401762306a36Sopenharmony_ci
401862306a36Sopenharmony_ci		if (dm_pool_metadata_needs_check(pool->pmd))
401962306a36Sopenharmony_ci			DMEMIT("needs_check ");
402062306a36Sopenharmony_ci		else
402162306a36Sopenharmony_ci			DMEMIT("- ");
402262306a36Sopenharmony_ci
402362306a36Sopenharmony_ci		DMEMIT("%llu ", (unsigned long long)calc_metadata_threshold(pt));
402462306a36Sopenharmony_ci
402562306a36Sopenharmony_ci		break;
402662306a36Sopenharmony_ci
402762306a36Sopenharmony_ci	case STATUSTYPE_TABLE:
402862306a36Sopenharmony_ci		DMEMIT("%s %s %lu %llu ",
402962306a36Sopenharmony_ci		       format_dev_t(buf, pt->metadata_dev->bdev->bd_dev),
403062306a36Sopenharmony_ci		       format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
403162306a36Sopenharmony_ci		       (unsigned long)pool->sectors_per_block,
403262306a36Sopenharmony_ci		       (unsigned long long)pt->low_water_blocks);
403362306a36Sopenharmony_ci		emit_flags(&pt->requested_pf, result, sz, maxlen);
403462306a36Sopenharmony_ci		break;
403562306a36Sopenharmony_ci
403662306a36Sopenharmony_ci	case STATUSTYPE_IMA:
403762306a36Sopenharmony_ci		*result = '\0';
403862306a36Sopenharmony_ci		break;
403962306a36Sopenharmony_ci	}
404062306a36Sopenharmony_ci	return;
404162306a36Sopenharmony_ci
404262306a36Sopenharmony_cierr:
404362306a36Sopenharmony_ci	DMEMIT("Error");
404462306a36Sopenharmony_ci}
404562306a36Sopenharmony_ci
404662306a36Sopenharmony_cistatic int pool_iterate_devices(struct dm_target *ti,
404762306a36Sopenharmony_ci				iterate_devices_callout_fn fn, void *data)
404862306a36Sopenharmony_ci{
404962306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
405062306a36Sopenharmony_ci
405162306a36Sopenharmony_ci	return fn(ti, pt->data_dev, 0, ti->len, data);
405262306a36Sopenharmony_ci}
405362306a36Sopenharmony_ci
405462306a36Sopenharmony_cistatic void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
405562306a36Sopenharmony_ci{
405662306a36Sopenharmony_ci	struct pool_c *pt = ti->private;
405762306a36Sopenharmony_ci	struct pool *pool = pt->pool;
405862306a36Sopenharmony_ci	sector_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
405962306a36Sopenharmony_ci
406062306a36Sopenharmony_ci	/*
406162306a36Sopenharmony_ci	 * If max_sectors is smaller than pool->sectors_per_block adjust it
406262306a36Sopenharmony_ci	 * to the highest possible power-of-2 factor of pool->sectors_per_block.
406362306a36Sopenharmony_ci	 * This is especially beneficial when the pool's data device is a RAID
406462306a36Sopenharmony_ci	 * device that has a full stripe width that matches pool->sectors_per_block
406562306a36Sopenharmony_ci	 * -- because even though partial RAID stripe-sized IOs will be issued to a
406662306a36Sopenharmony_ci	 *    single RAID stripe; when aggregated they will end on a full RAID stripe
406762306a36Sopenharmony_ci	 *    boundary.. which avoids additional partial RAID stripe writes cascading
406862306a36Sopenharmony_ci	 */
406962306a36Sopenharmony_ci	if (limits->max_sectors < pool->sectors_per_block) {
407062306a36Sopenharmony_ci		while (!is_factor(pool->sectors_per_block, limits->max_sectors)) {
407162306a36Sopenharmony_ci			if ((limits->max_sectors & (limits->max_sectors - 1)) == 0)
407262306a36Sopenharmony_ci				limits->max_sectors--;
407362306a36Sopenharmony_ci			limits->max_sectors = rounddown_pow_of_two(limits->max_sectors);
407462306a36Sopenharmony_ci		}
407562306a36Sopenharmony_ci	}
407662306a36Sopenharmony_ci
407762306a36Sopenharmony_ci	/*
407862306a36Sopenharmony_ci	 * If the system-determined stacked limits are compatible with the
407962306a36Sopenharmony_ci	 * pool's blocksize (io_opt is a factor) do not override them.
408062306a36Sopenharmony_ci	 */
408162306a36Sopenharmony_ci	if (io_opt_sectors < pool->sectors_per_block ||
408262306a36Sopenharmony_ci	    !is_factor(io_opt_sectors, pool->sectors_per_block)) {
408362306a36Sopenharmony_ci		if (is_factor(pool->sectors_per_block, limits->max_sectors))
408462306a36Sopenharmony_ci			blk_limits_io_min(limits, limits->max_sectors << SECTOR_SHIFT);
408562306a36Sopenharmony_ci		else
408662306a36Sopenharmony_ci			blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
408762306a36Sopenharmony_ci		blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
408862306a36Sopenharmony_ci	}
408962306a36Sopenharmony_ci
409062306a36Sopenharmony_ci	/*
409162306a36Sopenharmony_ci	 * pt->adjusted_pf is a staging area for the actual features to use.
409262306a36Sopenharmony_ci	 * They get transferred to the live pool in bind_control_target()
409362306a36Sopenharmony_ci	 * called from pool_preresume().
409462306a36Sopenharmony_ci	 */
409562306a36Sopenharmony_ci
409662306a36Sopenharmony_ci	if (pt->adjusted_pf.discard_enabled) {
409762306a36Sopenharmony_ci		disable_discard_passdown_if_not_supported(pt);
409862306a36Sopenharmony_ci		if (!pt->adjusted_pf.discard_passdown)
409962306a36Sopenharmony_ci			limits->max_discard_sectors = 0;
410062306a36Sopenharmony_ci		/*
410162306a36Sopenharmony_ci		 * The pool uses the same discard limits as the underlying data
410262306a36Sopenharmony_ci		 * device.  DM core has already set this up.
410362306a36Sopenharmony_ci		 */
410462306a36Sopenharmony_ci	} else {
410562306a36Sopenharmony_ci		/*
410662306a36Sopenharmony_ci		 * Must explicitly disallow stacking discard limits otherwise the
410762306a36Sopenharmony_ci		 * block layer will stack them if pool's data device has support.
410862306a36Sopenharmony_ci		 */
410962306a36Sopenharmony_ci		limits->discard_granularity = 0;
411062306a36Sopenharmony_ci	}
411162306a36Sopenharmony_ci}
411262306a36Sopenharmony_ci
411362306a36Sopenharmony_cistatic struct target_type pool_target = {
411462306a36Sopenharmony_ci	.name = "thin-pool",
411562306a36Sopenharmony_ci	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
411662306a36Sopenharmony_ci		    DM_TARGET_IMMUTABLE,
411762306a36Sopenharmony_ci	.version = {1, 23, 0},
411862306a36Sopenharmony_ci	.module = THIS_MODULE,
411962306a36Sopenharmony_ci	.ctr = pool_ctr,
412062306a36Sopenharmony_ci	.dtr = pool_dtr,
412162306a36Sopenharmony_ci	.map = pool_map,
412262306a36Sopenharmony_ci	.presuspend = pool_presuspend,
412362306a36Sopenharmony_ci	.presuspend_undo = pool_presuspend_undo,
412462306a36Sopenharmony_ci	.postsuspend = pool_postsuspend,
412562306a36Sopenharmony_ci	.preresume = pool_preresume,
412662306a36Sopenharmony_ci	.resume = pool_resume,
412762306a36Sopenharmony_ci	.message = pool_message,
412862306a36Sopenharmony_ci	.status = pool_status,
412962306a36Sopenharmony_ci	.iterate_devices = pool_iterate_devices,
413062306a36Sopenharmony_ci	.io_hints = pool_io_hints,
413162306a36Sopenharmony_ci};
413262306a36Sopenharmony_ci
413362306a36Sopenharmony_ci/*
413462306a36Sopenharmony_ci *--------------------------------------------------------------
413562306a36Sopenharmony_ci * Thin target methods
413662306a36Sopenharmony_ci *--------------------------------------------------------------
413762306a36Sopenharmony_ci */
413862306a36Sopenharmony_cistatic void thin_get(struct thin_c *tc)
413962306a36Sopenharmony_ci{
414062306a36Sopenharmony_ci	refcount_inc(&tc->refcount);
414162306a36Sopenharmony_ci}
414262306a36Sopenharmony_ci
414362306a36Sopenharmony_cistatic void thin_put(struct thin_c *tc)
414462306a36Sopenharmony_ci{
414562306a36Sopenharmony_ci	if (refcount_dec_and_test(&tc->refcount))
414662306a36Sopenharmony_ci		complete(&tc->can_destroy);
414762306a36Sopenharmony_ci}
414862306a36Sopenharmony_ci
414962306a36Sopenharmony_cistatic void thin_dtr(struct dm_target *ti)
415062306a36Sopenharmony_ci{
415162306a36Sopenharmony_ci	struct thin_c *tc = ti->private;
415262306a36Sopenharmony_ci
415362306a36Sopenharmony_ci	spin_lock_irq(&tc->pool->lock);
415462306a36Sopenharmony_ci	list_del_rcu(&tc->list);
415562306a36Sopenharmony_ci	spin_unlock_irq(&tc->pool->lock);
415662306a36Sopenharmony_ci	synchronize_rcu();
415762306a36Sopenharmony_ci
415862306a36Sopenharmony_ci	thin_put(tc);
415962306a36Sopenharmony_ci	wait_for_completion(&tc->can_destroy);
416062306a36Sopenharmony_ci
416162306a36Sopenharmony_ci	mutex_lock(&dm_thin_pool_table.mutex);
416262306a36Sopenharmony_ci
416362306a36Sopenharmony_ci	__pool_dec(tc->pool);
416462306a36Sopenharmony_ci	dm_pool_close_thin_device(tc->td);
416562306a36Sopenharmony_ci	dm_put_device(ti, tc->pool_dev);
416662306a36Sopenharmony_ci	if (tc->origin_dev)
416762306a36Sopenharmony_ci		dm_put_device(ti, tc->origin_dev);
416862306a36Sopenharmony_ci	kfree(tc);
416962306a36Sopenharmony_ci
417062306a36Sopenharmony_ci	mutex_unlock(&dm_thin_pool_table.mutex);
417162306a36Sopenharmony_ci}
417262306a36Sopenharmony_ci
417362306a36Sopenharmony_ci/*
417462306a36Sopenharmony_ci * Thin target parameters:
417562306a36Sopenharmony_ci *
417662306a36Sopenharmony_ci * <pool_dev> <dev_id> [origin_dev]
417762306a36Sopenharmony_ci *
417862306a36Sopenharmony_ci * pool_dev: the path to the pool (eg, /dev/mapper/my_pool)
417962306a36Sopenharmony_ci * dev_id: the internal device identifier
418062306a36Sopenharmony_ci * origin_dev: a device external to the pool that should act as the origin
418162306a36Sopenharmony_ci *
418262306a36Sopenharmony_ci * If the pool device has discards disabled, they get disabled for the thin
418362306a36Sopenharmony_ci * device as well.
418462306a36Sopenharmony_ci */
418562306a36Sopenharmony_cistatic int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
418662306a36Sopenharmony_ci{
418762306a36Sopenharmony_ci	int r;
418862306a36Sopenharmony_ci	struct thin_c *tc;
418962306a36Sopenharmony_ci	struct dm_dev *pool_dev, *origin_dev;
419062306a36Sopenharmony_ci	struct mapped_device *pool_md;
419162306a36Sopenharmony_ci
419262306a36Sopenharmony_ci	mutex_lock(&dm_thin_pool_table.mutex);
419362306a36Sopenharmony_ci
419462306a36Sopenharmony_ci	if (argc != 2 && argc != 3) {
419562306a36Sopenharmony_ci		ti->error = "Invalid argument count";
419662306a36Sopenharmony_ci		r = -EINVAL;
419762306a36Sopenharmony_ci		goto out_unlock;
419862306a36Sopenharmony_ci	}
419962306a36Sopenharmony_ci
420062306a36Sopenharmony_ci	tc = ti->private = kzalloc(sizeof(*tc), GFP_KERNEL);
420162306a36Sopenharmony_ci	if (!tc) {
420262306a36Sopenharmony_ci		ti->error = "Out of memory";
420362306a36Sopenharmony_ci		r = -ENOMEM;
420462306a36Sopenharmony_ci		goto out_unlock;
420562306a36Sopenharmony_ci	}
420662306a36Sopenharmony_ci	tc->thin_md = dm_table_get_md(ti->table);
420762306a36Sopenharmony_ci	spin_lock_init(&tc->lock);
420862306a36Sopenharmony_ci	INIT_LIST_HEAD(&tc->deferred_cells);
420962306a36Sopenharmony_ci	bio_list_init(&tc->deferred_bio_list);
421062306a36Sopenharmony_ci	bio_list_init(&tc->retry_on_resume_list);
421162306a36Sopenharmony_ci	tc->sort_bio_list = RB_ROOT;
421262306a36Sopenharmony_ci
421362306a36Sopenharmony_ci	if (argc == 3) {
421462306a36Sopenharmony_ci		if (!strcmp(argv[0], argv[2])) {
421562306a36Sopenharmony_ci			ti->error = "Error setting origin device";
421662306a36Sopenharmony_ci			r = -EINVAL;
421762306a36Sopenharmony_ci			goto bad_origin_dev;
421862306a36Sopenharmony_ci		}
421962306a36Sopenharmony_ci
422062306a36Sopenharmony_ci		r = dm_get_device(ti, argv[2], BLK_OPEN_READ, &origin_dev);
422162306a36Sopenharmony_ci		if (r) {
422262306a36Sopenharmony_ci			ti->error = "Error opening origin device";
422362306a36Sopenharmony_ci			goto bad_origin_dev;
422462306a36Sopenharmony_ci		}
422562306a36Sopenharmony_ci		tc->origin_dev = origin_dev;
422662306a36Sopenharmony_ci	}
422762306a36Sopenharmony_ci
422862306a36Sopenharmony_ci	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev);
422962306a36Sopenharmony_ci	if (r) {
423062306a36Sopenharmony_ci		ti->error = "Error opening pool device";
423162306a36Sopenharmony_ci		goto bad_pool_dev;
423262306a36Sopenharmony_ci	}
423362306a36Sopenharmony_ci	tc->pool_dev = pool_dev;
423462306a36Sopenharmony_ci
423562306a36Sopenharmony_ci	if (read_dev_id(argv[1], (unsigned long long *)&tc->dev_id, 0)) {
423662306a36Sopenharmony_ci		ti->error = "Invalid device id";
423762306a36Sopenharmony_ci		r = -EINVAL;
423862306a36Sopenharmony_ci		goto bad_common;
423962306a36Sopenharmony_ci	}
424062306a36Sopenharmony_ci
424162306a36Sopenharmony_ci	pool_md = dm_get_md(tc->pool_dev->bdev->bd_dev);
424262306a36Sopenharmony_ci	if (!pool_md) {
424362306a36Sopenharmony_ci		ti->error = "Couldn't get pool mapped device";
424462306a36Sopenharmony_ci		r = -EINVAL;
424562306a36Sopenharmony_ci		goto bad_common;
424662306a36Sopenharmony_ci	}
424762306a36Sopenharmony_ci
424862306a36Sopenharmony_ci	tc->pool = __pool_table_lookup(pool_md);
424962306a36Sopenharmony_ci	if (!tc->pool) {
425062306a36Sopenharmony_ci		ti->error = "Couldn't find pool object";
425162306a36Sopenharmony_ci		r = -EINVAL;
425262306a36Sopenharmony_ci		goto bad_pool_lookup;
425362306a36Sopenharmony_ci	}
425462306a36Sopenharmony_ci	__pool_inc(tc->pool);
425562306a36Sopenharmony_ci
425662306a36Sopenharmony_ci	if (get_pool_mode(tc->pool) == PM_FAIL) {
425762306a36Sopenharmony_ci		ti->error = "Couldn't open thin device, Pool is in fail mode";
425862306a36Sopenharmony_ci		r = -EINVAL;
425962306a36Sopenharmony_ci		goto bad_pool;
426062306a36Sopenharmony_ci	}
426162306a36Sopenharmony_ci
426262306a36Sopenharmony_ci	r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
426362306a36Sopenharmony_ci	if (r) {
426462306a36Sopenharmony_ci		ti->error = "Couldn't open thin internal device";
426562306a36Sopenharmony_ci		goto bad_pool;
426662306a36Sopenharmony_ci	}
426762306a36Sopenharmony_ci
426862306a36Sopenharmony_ci	r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block);
426962306a36Sopenharmony_ci	if (r)
427062306a36Sopenharmony_ci		goto bad;
427162306a36Sopenharmony_ci
427262306a36Sopenharmony_ci	ti->num_flush_bios = 1;
427362306a36Sopenharmony_ci	ti->limit_swap_bios = true;
427462306a36Sopenharmony_ci	ti->flush_supported = true;
427562306a36Sopenharmony_ci	ti->accounts_remapped_io = true;
427662306a36Sopenharmony_ci	ti->per_io_data_size = sizeof(struct dm_thin_endio_hook);
427762306a36Sopenharmony_ci
427862306a36Sopenharmony_ci	/* In case the pool supports discards, pass them on. */
427962306a36Sopenharmony_ci	if (tc->pool->pf.discard_enabled) {
428062306a36Sopenharmony_ci		ti->discards_supported = true;
428162306a36Sopenharmony_ci		ti->num_discard_bios = 1;
428262306a36Sopenharmony_ci		ti->max_discard_granularity = true;
428362306a36Sopenharmony_ci	}
428462306a36Sopenharmony_ci
428562306a36Sopenharmony_ci	mutex_unlock(&dm_thin_pool_table.mutex);
428662306a36Sopenharmony_ci
428762306a36Sopenharmony_ci	spin_lock_irq(&tc->pool->lock);
428862306a36Sopenharmony_ci	if (tc->pool->suspended) {
428962306a36Sopenharmony_ci		spin_unlock_irq(&tc->pool->lock);
429062306a36Sopenharmony_ci		mutex_lock(&dm_thin_pool_table.mutex); /* reacquire for __pool_dec */
429162306a36Sopenharmony_ci		ti->error = "Unable to activate thin device while pool is suspended";
429262306a36Sopenharmony_ci		r = -EINVAL;
429362306a36Sopenharmony_ci		goto bad;
429462306a36Sopenharmony_ci	}
429562306a36Sopenharmony_ci	refcount_set(&tc->refcount, 1);
429662306a36Sopenharmony_ci	init_completion(&tc->can_destroy);
429762306a36Sopenharmony_ci	list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
429862306a36Sopenharmony_ci	spin_unlock_irq(&tc->pool->lock);
429962306a36Sopenharmony_ci	/*
430062306a36Sopenharmony_ci	 * This synchronize_rcu() call is needed here otherwise we risk a
430162306a36Sopenharmony_ci	 * wake_worker() call finding no bios to process (because the newly
430262306a36Sopenharmony_ci	 * added tc isn't yet visible).  So this reduces latency since we
430362306a36Sopenharmony_ci	 * aren't then dependent on the periodic commit to wake_worker().
430462306a36Sopenharmony_ci	 */
430562306a36Sopenharmony_ci	synchronize_rcu();
430662306a36Sopenharmony_ci
430762306a36Sopenharmony_ci	dm_put(pool_md);
430862306a36Sopenharmony_ci
430962306a36Sopenharmony_ci	return 0;
431062306a36Sopenharmony_ci
431162306a36Sopenharmony_cibad:
431262306a36Sopenharmony_ci	dm_pool_close_thin_device(tc->td);
431362306a36Sopenharmony_cibad_pool:
431462306a36Sopenharmony_ci	__pool_dec(tc->pool);
431562306a36Sopenharmony_cibad_pool_lookup:
431662306a36Sopenharmony_ci	dm_put(pool_md);
431762306a36Sopenharmony_cibad_common:
431862306a36Sopenharmony_ci	dm_put_device(ti, tc->pool_dev);
431962306a36Sopenharmony_cibad_pool_dev:
432062306a36Sopenharmony_ci	if (tc->origin_dev)
432162306a36Sopenharmony_ci		dm_put_device(ti, tc->origin_dev);
432262306a36Sopenharmony_cibad_origin_dev:
432362306a36Sopenharmony_ci	kfree(tc);
432462306a36Sopenharmony_ciout_unlock:
432562306a36Sopenharmony_ci	mutex_unlock(&dm_thin_pool_table.mutex);
432662306a36Sopenharmony_ci
432762306a36Sopenharmony_ci	return r;
432862306a36Sopenharmony_ci}
432962306a36Sopenharmony_ci
433062306a36Sopenharmony_cistatic int thin_map(struct dm_target *ti, struct bio *bio)
433162306a36Sopenharmony_ci{
433262306a36Sopenharmony_ci	bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
433362306a36Sopenharmony_ci
433462306a36Sopenharmony_ci	return thin_bio_map(ti, bio);
433562306a36Sopenharmony_ci}
433662306a36Sopenharmony_ci
433762306a36Sopenharmony_cistatic int thin_endio(struct dm_target *ti, struct bio *bio,
433862306a36Sopenharmony_ci		blk_status_t *err)
433962306a36Sopenharmony_ci{
434062306a36Sopenharmony_ci	unsigned long flags;
434162306a36Sopenharmony_ci	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
434262306a36Sopenharmony_ci	struct list_head work;
434362306a36Sopenharmony_ci	struct dm_thin_new_mapping *m, *tmp;
434462306a36Sopenharmony_ci	struct pool *pool = h->tc->pool;
434562306a36Sopenharmony_ci
434662306a36Sopenharmony_ci	if (h->shared_read_entry) {
434762306a36Sopenharmony_ci		INIT_LIST_HEAD(&work);
434862306a36Sopenharmony_ci		dm_deferred_entry_dec(h->shared_read_entry, &work);
434962306a36Sopenharmony_ci
435062306a36Sopenharmony_ci		spin_lock_irqsave(&pool->lock, flags);
435162306a36Sopenharmony_ci		list_for_each_entry_safe(m, tmp, &work, list) {
435262306a36Sopenharmony_ci			list_del(&m->list);
435362306a36Sopenharmony_ci			__complete_mapping_preparation(m);
435462306a36Sopenharmony_ci		}
435562306a36Sopenharmony_ci		spin_unlock_irqrestore(&pool->lock, flags);
435662306a36Sopenharmony_ci	}
435762306a36Sopenharmony_ci
435862306a36Sopenharmony_ci	if (h->all_io_entry) {
435962306a36Sopenharmony_ci		INIT_LIST_HEAD(&work);
436062306a36Sopenharmony_ci		dm_deferred_entry_dec(h->all_io_entry, &work);
436162306a36Sopenharmony_ci		if (!list_empty(&work)) {
436262306a36Sopenharmony_ci			spin_lock_irqsave(&pool->lock, flags);
436362306a36Sopenharmony_ci			list_for_each_entry_safe(m, tmp, &work, list)
436462306a36Sopenharmony_ci				list_add_tail(&m->list, &pool->prepared_discards);
436562306a36Sopenharmony_ci			spin_unlock_irqrestore(&pool->lock, flags);
436662306a36Sopenharmony_ci			wake_worker(pool);
436762306a36Sopenharmony_ci		}
436862306a36Sopenharmony_ci	}
436962306a36Sopenharmony_ci
437062306a36Sopenharmony_ci	if (h->cell)
437162306a36Sopenharmony_ci		cell_defer_no_holder(h->tc, h->cell);
437262306a36Sopenharmony_ci
437362306a36Sopenharmony_ci	return DM_ENDIO_DONE;
437462306a36Sopenharmony_ci}
437562306a36Sopenharmony_ci
437662306a36Sopenharmony_cistatic void thin_presuspend(struct dm_target *ti)
437762306a36Sopenharmony_ci{
437862306a36Sopenharmony_ci	struct thin_c *tc = ti->private;
437962306a36Sopenharmony_ci
438062306a36Sopenharmony_ci	if (dm_noflush_suspending(ti))
438162306a36Sopenharmony_ci		noflush_work(tc, do_noflush_start);
438262306a36Sopenharmony_ci}
438362306a36Sopenharmony_ci
438462306a36Sopenharmony_cistatic void thin_postsuspend(struct dm_target *ti)
438562306a36Sopenharmony_ci{
438662306a36Sopenharmony_ci	struct thin_c *tc = ti->private;
438762306a36Sopenharmony_ci
438862306a36Sopenharmony_ci	/*
438962306a36Sopenharmony_ci	 * The dm_noflush_suspending flag has been cleared by now, so
439062306a36Sopenharmony_ci	 * unfortunately we must always run this.
439162306a36Sopenharmony_ci	 */
439262306a36Sopenharmony_ci	noflush_work(tc, do_noflush_stop);
439362306a36Sopenharmony_ci}
439462306a36Sopenharmony_ci
439562306a36Sopenharmony_cistatic int thin_preresume(struct dm_target *ti)
439662306a36Sopenharmony_ci{
439762306a36Sopenharmony_ci	struct thin_c *tc = ti->private;
439862306a36Sopenharmony_ci
439962306a36Sopenharmony_ci	if (tc->origin_dev)
440062306a36Sopenharmony_ci		tc->origin_size = get_dev_size(tc->origin_dev->bdev);
440162306a36Sopenharmony_ci
440262306a36Sopenharmony_ci	return 0;
440362306a36Sopenharmony_ci}
440462306a36Sopenharmony_ci
440562306a36Sopenharmony_ci/*
440662306a36Sopenharmony_ci * <nr mapped sectors> <highest mapped sector>
440762306a36Sopenharmony_ci */
440862306a36Sopenharmony_cistatic void thin_status(struct dm_target *ti, status_type_t type,
440962306a36Sopenharmony_ci			unsigned int status_flags, char *result, unsigned int maxlen)
441062306a36Sopenharmony_ci{
441162306a36Sopenharmony_ci	int r;
441262306a36Sopenharmony_ci	ssize_t sz = 0;
441362306a36Sopenharmony_ci	dm_block_t mapped, highest;
441462306a36Sopenharmony_ci	char buf[BDEVNAME_SIZE];
441562306a36Sopenharmony_ci	struct thin_c *tc = ti->private;
441662306a36Sopenharmony_ci
441762306a36Sopenharmony_ci	if (get_pool_mode(tc->pool) == PM_FAIL) {
441862306a36Sopenharmony_ci		DMEMIT("Fail");
441962306a36Sopenharmony_ci		return;
442062306a36Sopenharmony_ci	}
442162306a36Sopenharmony_ci
442262306a36Sopenharmony_ci	if (!tc->td)
442362306a36Sopenharmony_ci		DMEMIT("-");
442462306a36Sopenharmony_ci	else {
442562306a36Sopenharmony_ci		switch (type) {
442662306a36Sopenharmony_ci		case STATUSTYPE_INFO:
442762306a36Sopenharmony_ci			r = dm_thin_get_mapped_count(tc->td, &mapped);
442862306a36Sopenharmony_ci			if (r) {
442962306a36Sopenharmony_ci				DMERR("dm_thin_get_mapped_count returned %d", r);
443062306a36Sopenharmony_ci				goto err;
443162306a36Sopenharmony_ci			}
443262306a36Sopenharmony_ci
443362306a36Sopenharmony_ci			r = dm_thin_get_highest_mapped_block(tc->td, &highest);
443462306a36Sopenharmony_ci			if (r < 0) {
443562306a36Sopenharmony_ci				DMERR("dm_thin_get_highest_mapped_block returned %d", r);
443662306a36Sopenharmony_ci				goto err;
443762306a36Sopenharmony_ci			}
443862306a36Sopenharmony_ci
443962306a36Sopenharmony_ci			DMEMIT("%llu ", mapped * tc->pool->sectors_per_block);
444062306a36Sopenharmony_ci			if (r)
444162306a36Sopenharmony_ci				DMEMIT("%llu", ((highest + 1) *
444262306a36Sopenharmony_ci						tc->pool->sectors_per_block) - 1);
444362306a36Sopenharmony_ci			else
444462306a36Sopenharmony_ci				DMEMIT("-");
444562306a36Sopenharmony_ci			break;
444662306a36Sopenharmony_ci
444762306a36Sopenharmony_ci		case STATUSTYPE_TABLE:
444862306a36Sopenharmony_ci			DMEMIT("%s %lu",
444962306a36Sopenharmony_ci			       format_dev_t(buf, tc->pool_dev->bdev->bd_dev),
445062306a36Sopenharmony_ci			       (unsigned long) tc->dev_id);
445162306a36Sopenharmony_ci			if (tc->origin_dev)
445262306a36Sopenharmony_ci				DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev));
445362306a36Sopenharmony_ci			break;
445462306a36Sopenharmony_ci
445562306a36Sopenharmony_ci		case STATUSTYPE_IMA:
445662306a36Sopenharmony_ci			*result = '\0';
445762306a36Sopenharmony_ci			break;
445862306a36Sopenharmony_ci		}
445962306a36Sopenharmony_ci	}
446062306a36Sopenharmony_ci
446162306a36Sopenharmony_ci	return;
446262306a36Sopenharmony_ci
446362306a36Sopenharmony_cierr:
446462306a36Sopenharmony_ci	DMEMIT("Error");
446562306a36Sopenharmony_ci}
446662306a36Sopenharmony_ci
446762306a36Sopenharmony_cistatic int thin_iterate_devices(struct dm_target *ti,
446862306a36Sopenharmony_ci				iterate_devices_callout_fn fn, void *data)
446962306a36Sopenharmony_ci{
447062306a36Sopenharmony_ci	sector_t blocks;
447162306a36Sopenharmony_ci	struct thin_c *tc = ti->private;
447262306a36Sopenharmony_ci	struct pool *pool = tc->pool;
447362306a36Sopenharmony_ci
447462306a36Sopenharmony_ci	/*
447562306a36Sopenharmony_ci	 * We can't call dm_pool_get_data_dev_size() since that blocks.  So
447662306a36Sopenharmony_ci	 * we follow a more convoluted path through to the pool's target.
447762306a36Sopenharmony_ci	 */
447862306a36Sopenharmony_ci	if (!pool->ti)
447962306a36Sopenharmony_ci		return 0;	/* nothing is bound */
448062306a36Sopenharmony_ci
448162306a36Sopenharmony_ci	blocks = pool->ti->len;
448262306a36Sopenharmony_ci	(void) sector_div(blocks, pool->sectors_per_block);
448362306a36Sopenharmony_ci	if (blocks)
448462306a36Sopenharmony_ci		return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);
448562306a36Sopenharmony_ci
448662306a36Sopenharmony_ci	return 0;
448762306a36Sopenharmony_ci}
448862306a36Sopenharmony_ci
448962306a36Sopenharmony_cistatic void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
449062306a36Sopenharmony_ci{
449162306a36Sopenharmony_ci	struct thin_c *tc = ti->private;
449262306a36Sopenharmony_ci	struct pool *pool = tc->pool;
449362306a36Sopenharmony_ci
449462306a36Sopenharmony_ci	if (pool->pf.discard_enabled) {
449562306a36Sopenharmony_ci		limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
449662306a36Sopenharmony_ci		limits->max_discard_sectors = pool->sectors_per_block * BIO_PRISON_MAX_RANGE;
449762306a36Sopenharmony_ci	}
449862306a36Sopenharmony_ci}
449962306a36Sopenharmony_ci
450062306a36Sopenharmony_cistatic struct target_type thin_target = {
450162306a36Sopenharmony_ci	.name = "thin",
450262306a36Sopenharmony_ci	.version = {1, 23, 0},
450362306a36Sopenharmony_ci	.module	= THIS_MODULE,
450462306a36Sopenharmony_ci	.ctr = thin_ctr,
450562306a36Sopenharmony_ci	.dtr = thin_dtr,
450662306a36Sopenharmony_ci	.map = thin_map,
450762306a36Sopenharmony_ci	.end_io = thin_endio,
450862306a36Sopenharmony_ci	.preresume = thin_preresume,
450962306a36Sopenharmony_ci	.presuspend = thin_presuspend,
451062306a36Sopenharmony_ci	.postsuspend = thin_postsuspend,
451162306a36Sopenharmony_ci	.status = thin_status,
451262306a36Sopenharmony_ci	.iterate_devices = thin_iterate_devices,
451362306a36Sopenharmony_ci	.io_hints = thin_io_hints,
451462306a36Sopenharmony_ci};
451562306a36Sopenharmony_ci
451662306a36Sopenharmony_ci/*----------------------------------------------------------------*/
451762306a36Sopenharmony_ci
451862306a36Sopenharmony_cistatic int __init dm_thin_init(void)
451962306a36Sopenharmony_ci{
452062306a36Sopenharmony_ci	int r = -ENOMEM;
452162306a36Sopenharmony_ci
452262306a36Sopenharmony_ci	pool_table_init();
452362306a36Sopenharmony_ci
452462306a36Sopenharmony_ci	_new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0);
452562306a36Sopenharmony_ci	if (!_new_mapping_cache)
452662306a36Sopenharmony_ci		return r;
452762306a36Sopenharmony_ci
452862306a36Sopenharmony_ci	r = dm_register_target(&thin_target);
452962306a36Sopenharmony_ci	if (r)
453062306a36Sopenharmony_ci		goto bad_new_mapping_cache;
453162306a36Sopenharmony_ci
453262306a36Sopenharmony_ci	r = dm_register_target(&pool_target);
453362306a36Sopenharmony_ci	if (r)
453462306a36Sopenharmony_ci		goto bad_thin_target;
453562306a36Sopenharmony_ci
453662306a36Sopenharmony_ci	return 0;
453762306a36Sopenharmony_ci
453862306a36Sopenharmony_cibad_thin_target:
453962306a36Sopenharmony_ci	dm_unregister_target(&thin_target);
454062306a36Sopenharmony_cibad_new_mapping_cache:
454162306a36Sopenharmony_ci	kmem_cache_destroy(_new_mapping_cache);
454262306a36Sopenharmony_ci
454362306a36Sopenharmony_ci	return r;
454462306a36Sopenharmony_ci}
454562306a36Sopenharmony_ci
454662306a36Sopenharmony_cistatic void dm_thin_exit(void)
454762306a36Sopenharmony_ci{
454862306a36Sopenharmony_ci	dm_unregister_target(&thin_target);
454962306a36Sopenharmony_ci	dm_unregister_target(&pool_target);
455062306a36Sopenharmony_ci
455162306a36Sopenharmony_ci	kmem_cache_destroy(_new_mapping_cache);
455262306a36Sopenharmony_ci
455362306a36Sopenharmony_ci	pool_table_exit();
455462306a36Sopenharmony_ci}
455562306a36Sopenharmony_ci
455662306a36Sopenharmony_cimodule_init(dm_thin_init);
455762306a36Sopenharmony_cimodule_exit(dm_thin_exit);
455862306a36Sopenharmony_ci
455962306a36Sopenharmony_cimodule_param_named(no_space_timeout, no_space_timeout_secs, uint, 0644);
456062306a36Sopenharmony_ciMODULE_PARM_DESC(no_space_timeout, "Out of data space queue IO timeout in seconds");
456162306a36Sopenharmony_ci
456262306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " thin provisioning target");
456362306a36Sopenharmony_ciMODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
456462306a36Sopenharmony_ciMODULE_LICENSE("GPL");
4565