162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2002 Sistina Software (UK) Limited.
462306a36Sopenharmony_ci * Copyright (C) 2006 Red Hat GmbH
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * This file is released under the GPL.
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Kcopyd provides a simple interface for copying an area of one
962306a36Sopenharmony_ci * block-device to one or more other block-devices, with an asynchronous
1062306a36Sopenharmony_ci * completion notification.
1162306a36Sopenharmony_ci */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <linux/types.h>
1462306a36Sopenharmony_ci#include <linux/atomic.h>
1562306a36Sopenharmony_ci#include <linux/blkdev.h>
1662306a36Sopenharmony_ci#include <linux/fs.h>
1762306a36Sopenharmony_ci#include <linux/init.h>
1862306a36Sopenharmony_ci#include <linux/list.h>
1962306a36Sopenharmony_ci#include <linux/mempool.h>
2062306a36Sopenharmony_ci#include <linux/module.h>
2162306a36Sopenharmony_ci#include <linux/pagemap.h>
2262306a36Sopenharmony_ci#include <linux/slab.h>
2362306a36Sopenharmony_ci#include <linux/vmalloc.h>
2462306a36Sopenharmony_ci#include <linux/workqueue.h>
2562306a36Sopenharmony_ci#include <linux/mutex.h>
2662306a36Sopenharmony_ci#include <linux/delay.h>
2762306a36Sopenharmony_ci#include <linux/device-mapper.h>
2862306a36Sopenharmony_ci#include <linux/dm-kcopyd.h>
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#include "dm-core.h"
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci#define SPLIT_COUNT	8
3362306a36Sopenharmony_ci#define MIN_JOBS	8
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci#define DEFAULT_SUB_JOB_SIZE_KB 512
3662306a36Sopenharmony_ci#define MAX_SUB_JOB_SIZE_KB     1024
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_cistatic unsigned int kcopyd_subjob_size_kb = DEFAULT_SUB_JOB_SIZE_KB;
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cimodule_param(kcopyd_subjob_size_kb, uint, 0644);
4162306a36Sopenharmony_ciMODULE_PARM_DESC(kcopyd_subjob_size_kb, "Sub-job size for dm-kcopyd clients");
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_cistatic unsigned int dm_get_kcopyd_subjob_size(void)
4462306a36Sopenharmony_ci{
4562306a36Sopenharmony_ci	unsigned int sub_job_size_kb;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	sub_job_size_kb = __dm_get_module_param(&kcopyd_subjob_size_kb,
4862306a36Sopenharmony_ci						DEFAULT_SUB_JOB_SIZE_KB,
4962306a36Sopenharmony_ci						MAX_SUB_JOB_SIZE_KB);
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	return sub_job_size_kb << 1;
5262306a36Sopenharmony_ci}
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci/*
5562306a36Sopenharmony_ci *----------------------------------------------------------------
5662306a36Sopenharmony_ci * Each kcopyd client has its own little pool of preallocated
5762306a36Sopenharmony_ci * pages for kcopyd io.
5862306a36Sopenharmony_ci *---------------------------------------------------------------
5962306a36Sopenharmony_ci */
6062306a36Sopenharmony_cistruct dm_kcopyd_client {
6162306a36Sopenharmony_ci	struct page_list *pages;
6262306a36Sopenharmony_ci	unsigned int nr_reserved_pages;
6362306a36Sopenharmony_ci	unsigned int nr_free_pages;
6462306a36Sopenharmony_ci	unsigned int sub_job_size;
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	struct dm_io_client *io_client;
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	wait_queue_head_t destroyq;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	mempool_t job_pool;
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	struct workqueue_struct *kcopyd_wq;
7362306a36Sopenharmony_ci	struct work_struct kcopyd_work;
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	struct dm_kcopyd_throttle *throttle;
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	atomic_t nr_jobs;
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci/*
8062306a36Sopenharmony_ci * We maintain four lists of jobs:
8162306a36Sopenharmony_ci *
8262306a36Sopenharmony_ci * i)   jobs waiting for pages
8362306a36Sopenharmony_ci * ii)  jobs that have pages, and are waiting for the io to be issued.
8462306a36Sopenharmony_ci * iii) jobs that don't need to do any IO and just run a callback
8562306a36Sopenharmony_ci * iv) jobs that have completed.
8662306a36Sopenharmony_ci *
8762306a36Sopenharmony_ci * All four of these are protected by job_lock.
8862306a36Sopenharmony_ci */
8962306a36Sopenharmony_ci	spinlock_t job_lock;
9062306a36Sopenharmony_ci	struct list_head callback_jobs;
9162306a36Sopenharmony_ci	struct list_head complete_jobs;
9262306a36Sopenharmony_ci	struct list_head io_jobs;
9362306a36Sopenharmony_ci	struct list_head pages_jobs;
9462306a36Sopenharmony_ci};
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_cistatic struct page_list zero_page_list;
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_cistatic DEFINE_SPINLOCK(throttle_spinlock);
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci/*
10162306a36Sopenharmony_ci * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period.
10262306a36Sopenharmony_ci * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided
10362306a36Sopenharmony_ci * by 2.
10462306a36Sopenharmony_ci */
10562306a36Sopenharmony_ci#define ACCOUNT_INTERVAL_SHIFT		SHIFT_HZ
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci/*
10862306a36Sopenharmony_ci * Sleep this number of milliseconds.
10962306a36Sopenharmony_ci *
11062306a36Sopenharmony_ci * The value was decided experimentally.
11162306a36Sopenharmony_ci * Smaller values seem to cause an increased copy rate above the limit.
11262306a36Sopenharmony_ci * The reason for this is unknown but possibly due to jiffies rounding errors
11362306a36Sopenharmony_ci * or read/write cache inside the disk.
11462306a36Sopenharmony_ci */
11562306a36Sopenharmony_ci#define SLEEP_USEC			100000
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci/*
11862306a36Sopenharmony_ci * Maximum number of sleep events. There is a theoretical livelock if more
11962306a36Sopenharmony_ci * kcopyd clients do work simultaneously which this limit avoids.
12062306a36Sopenharmony_ci */
12162306a36Sopenharmony_ci#define MAX_SLEEPS			10
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_cistatic void io_job_start(struct dm_kcopyd_throttle *t)
12462306a36Sopenharmony_ci{
12562306a36Sopenharmony_ci	unsigned int throttle, now, difference;
12662306a36Sopenharmony_ci	int slept = 0, skew;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	if (unlikely(!t))
12962306a36Sopenharmony_ci		return;
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_citry_again:
13262306a36Sopenharmony_ci	spin_lock_irq(&throttle_spinlock);
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	throttle = READ_ONCE(t->throttle);
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	if (likely(throttle >= 100))
13762306a36Sopenharmony_ci		goto skip_limit;
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	now = jiffies;
14062306a36Sopenharmony_ci	difference = now - t->last_jiffies;
14162306a36Sopenharmony_ci	t->last_jiffies = now;
14262306a36Sopenharmony_ci	if (t->num_io_jobs)
14362306a36Sopenharmony_ci		t->io_period += difference;
14462306a36Sopenharmony_ci	t->total_period += difference;
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	/*
14762306a36Sopenharmony_ci	 * Maintain sane values if we got a temporary overflow.
14862306a36Sopenharmony_ci	 */
14962306a36Sopenharmony_ci	if (unlikely(t->io_period > t->total_period))
15062306a36Sopenharmony_ci		t->io_period = t->total_period;
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) {
15362306a36Sopenharmony_ci		int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT);
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci		t->total_period >>= shift;
15662306a36Sopenharmony_ci		t->io_period >>= shift;
15762306a36Sopenharmony_ci	}
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	skew = t->io_period - throttle * t->total_period / 100;
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	if (unlikely(skew > 0) && slept < MAX_SLEEPS) {
16262306a36Sopenharmony_ci		slept++;
16362306a36Sopenharmony_ci		spin_unlock_irq(&throttle_spinlock);
16462306a36Sopenharmony_ci		fsleep(SLEEP_USEC);
16562306a36Sopenharmony_ci		goto try_again;
16662306a36Sopenharmony_ci	}
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ciskip_limit:
16962306a36Sopenharmony_ci	t->num_io_jobs++;
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	spin_unlock_irq(&throttle_spinlock);
17262306a36Sopenharmony_ci}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_cistatic void io_job_finish(struct dm_kcopyd_throttle *t)
17562306a36Sopenharmony_ci{
17662306a36Sopenharmony_ci	unsigned long flags;
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	if (unlikely(!t))
17962306a36Sopenharmony_ci		return;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	spin_lock_irqsave(&throttle_spinlock, flags);
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	t->num_io_jobs--;
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	if (likely(READ_ONCE(t->throttle) >= 100))
18662306a36Sopenharmony_ci		goto skip_limit;
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	if (!t->num_io_jobs) {
18962306a36Sopenharmony_ci		unsigned int now, difference;
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci		now = jiffies;
19262306a36Sopenharmony_ci		difference = now - t->last_jiffies;
19362306a36Sopenharmony_ci		t->last_jiffies = now;
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci		t->io_period += difference;
19662306a36Sopenharmony_ci		t->total_period += difference;
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci		/*
19962306a36Sopenharmony_ci		 * Maintain sane values if we got a temporary overflow.
20062306a36Sopenharmony_ci		 */
20162306a36Sopenharmony_ci		if (unlikely(t->io_period > t->total_period))
20262306a36Sopenharmony_ci			t->io_period = t->total_period;
20362306a36Sopenharmony_ci	}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ciskip_limit:
20662306a36Sopenharmony_ci	spin_unlock_irqrestore(&throttle_spinlock, flags);
20762306a36Sopenharmony_ci}
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_cistatic void wake(struct dm_kcopyd_client *kc)
21162306a36Sopenharmony_ci{
21262306a36Sopenharmony_ci	queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
21362306a36Sopenharmony_ci}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci/*
21662306a36Sopenharmony_ci * Obtain one page for the use of kcopyd.
21762306a36Sopenharmony_ci */
21862306a36Sopenharmony_cistatic struct page_list *alloc_pl(gfp_t gfp)
21962306a36Sopenharmony_ci{
22062306a36Sopenharmony_ci	struct page_list *pl;
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	pl = kmalloc(sizeof(*pl), gfp);
22362306a36Sopenharmony_ci	if (!pl)
22462306a36Sopenharmony_ci		return NULL;
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	pl->page = alloc_page(gfp | __GFP_HIGHMEM);
22762306a36Sopenharmony_ci	if (!pl->page) {
22862306a36Sopenharmony_ci		kfree(pl);
22962306a36Sopenharmony_ci		return NULL;
23062306a36Sopenharmony_ci	}
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	return pl;
23362306a36Sopenharmony_ci}
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_cistatic void free_pl(struct page_list *pl)
23662306a36Sopenharmony_ci{
23762306a36Sopenharmony_ci	__free_page(pl->page);
23862306a36Sopenharmony_ci	kfree(pl);
23962306a36Sopenharmony_ci}
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci/*
24262306a36Sopenharmony_ci * Add the provided pages to a client's free page list, releasing
24362306a36Sopenharmony_ci * back to the system any beyond the reserved_pages limit.
24462306a36Sopenharmony_ci */
24562306a36Sopenharmony_cistatic void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl)
24662306a36Sopenharmony_ci{
24762306a36Sopenharmony_ci	struct page_list *next;
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	do {
25062306a36Sopenharmony_ci		next = pl->next;
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci		if (kc->nr_free_pages >= kc->nr_reserved_pages)
25362306a36Sopenharmony_ci			free_pl(pl);
25462306a36Sopenharmony_ci		else {
25562306a36Sopenharmony_ci			pl->next = kc->pages;
25662306a36Sopenharmony_ci			kc->pages = pl;
25762306a36Sopenharmony_ci			kc->nr_free_pages++;
25862306a36Sopenharmony_ci		}
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci		pl = next;
26162306a36Sopenharmony_ci	} while (pl);
26262306a36Sopenharmony_ci}
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_cistatic int kcopyd_get_pages(struct dm_kcopyd_client *kc,
26562306a36Sopenharmony_ci			    unsigned int nr, struct page_list **pages)
26662306a36Sopenharmony_ci{
26762306a36Sopenharmony_ci	struct page_list *pl;
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	*pages = NULL;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	do {
27262306a36Sopenharmony_ci		pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM);
27362306a36Sopenharmony_ci		if (unlikely(!pl)) {
27462306a36Sopenharmony_ci			/* Use reserved pages */
27562306a36Sopenharmony_ci			pl = kc->pages;
27662306a36Sopenharmony_ci			if (unlikely(!pl))
27762306a36Sopenharmony_ci				goto out_of_memory;
27862306a36Sopenharmony_ci			kc->pages = pl->next;
27962306a36Sopenharmony_ci			kc->nr_free_pages--;
28062306a36Sopenharmony_ci		}
28162306a36Sopenharmony_ci		pl->next = *pages;
28262306a36Sopenharmony_ci		*pages = pl;
28362306a36Sopenharmony_ci	} while (--nr);
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci	return 0;
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ciout_of_memory:
28862306a36Sopenharmony_ci	if (*pages)
28962306a36Sopenharmony_ci		kcopyd_put_pages(kc, *pages);
29062306a36Sopenharmony_ci	return -ENOMEM;
29162306a36Sopenharmony_ci}
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci/*
29462306a36Sopenharmony_ci * These three functions resize the page pool.
29562306a36Sopenharmony_ci */
29662306a36Sopenharmony_cistatic void drop_pages(struct page_list *pl)
29762306a36Sopenharmony_ci{
29862306a36Sopenharmony_ci	struct page_list *next;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	while (pl) {
30162306a36Sopenharmony_ci		next = pl->next;
30262306a36Sopenharmony_ci		free_pl(pl);
30362306a36Sopenharmony_ci		pl = next;
30462306a36Sopenharmony_ci	}
30562306a36Sopenharmony_ci}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci/*
30862306a36Sopenharmony_ci * Allocate and reserve nr_pages for the use of a specific client.
30962306a36Sopenharmony_ci */
31062306a36Sopenharmony_cistatic int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned int nr_pages)
31162306a36Sopenharmony_ci{
31262306a36Sopenharmony_ci	unsigned int i;
31362306a36Sopenharmony_ci	struct page_list *pl = NULL, *next;
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	for (i = 0; i < nr_pages; i++) {
31662306a36Sopenharmony_ci		next = alloc_pl(GFP_KERNEL);
31762306a36Sopenharmony_ci		if (!next) {
31862306a36Sopenharmony_ci			if (pl)
31962306a36Sopenharmony_ci				drop_pages(pl);
32062306a36Sopenharmony_ci			return -ENOMEM;
32162306a36Sopenharmony_ci		}
32262306a36Sopenharmony_ci		next->next = pl;
32362306a36Sopenharmony_ci		pl = next;
32462306a36Sopenharmony_ci	}
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	kc->nr_reserved_pages += nr_pages;
32762306a36Sopenharmony_ci	kcopyd_put_pages(kc, pl);
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	return 0;
33062306a36Sopenharmony_ci}
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_cistatic void client_free_pages(struct dm_kcopyd_client *kc)
33362306a36Sopenharmony_ci{
33462306a36Sopenharmony_ci	BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages);
33562306a36Sopenharmony_ci	drop_pages(kc->pages);
33662306a36Sopenharmony_ci	kc->pages = NULL;
33762306a36Sopenharmony_ci	kc->nr_free_pages = kc->nr_reserved_pages = 0;
33862306a36Sopenharmony_ci}
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci/*
34162306a36Sopenharmony_ci *---------------------------------------------------------------
34262306a36Sopenharmony_ci * kcopyd_jobs need to be allocated by the *clients* of kcopyd,
34362306a36Sopenharmony_ci * for this reason we use a mempool to prevent the client from
34462306a36Sopenharmony_ci * ever having to do io (which could cause a deadlock).
34562306a36Sopenharmony_ci *---------------------------------------------------------------
34662306a36Sopenharmony_ci */
34762306a36Sopenharmony_cistruct kcopyd_job {
34862306a36Sopenharmony_ci	struct dm_kcopyd_client *kc;
34962306a36Sopenharmony_ci	struct list_head list;
35062306a36Sopenharmony_ci	unsigned int flags;
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	/*
35362306a36Sopenharmony_ci	 * Error state of the job.
35462306a36Sopenharmony_ci	 */
35562306a36Sopenharmony_ci	int read_err;
35662306a36Sopenharmony_ci	unsigned long write_err;
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci	/*
35962306a36Sopenharmony_ci	 * REQ_OP_READ, REQ_OP_WRITE or REQ_OP_WRITE_ZEROES.
36062306a36Sopenharmony_ci	 */
36162306a36Sopenharmony_ci	enum req_op op;
36262306a36Sopenharmony_ci	struct dm_io_region source;
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	/*
36562306a36Sopenharmony_ci	 * The destinations for the transfer.
36662306a36Sopenharmony_ci	 */
36762306a36Sopenharmony_ci	unsigned int num_dests;
36862306a36Sopenharmony_ci	struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	struct page_list *pages;
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	/*
37362306a36Sopenharmony_ci	 * Set this to ensure you are notified when the job has
37462306a36Sopenharmony_ci	 * completed.  'context' is for callback to use.
37562306a36Sopenharmony_ci	 */
37662306a36Sopenharmony_ci	dm_kcopyd_notify_fn fn;
37762306a36Sopenharmony_ci	void *context;
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci	/*
38062306a36Sopenharmony_ci	 * These fields are only used if the job has been split
38162306a36Sopenharmony_ci	 * into more manageable parts.
38262306a36Sopenharmony_ci	 */
38362306a36Sopenharmony_ci	struct mutex lock;
38462306a36Sopenharmony_ci	atomic_t sub_jobs;
38562306a36Sopenharmony_ci	sector_t progress;
38662306a36Sopenharmony_ci	sector_t write_offset;
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	struct kcopyd_job *master_job;
38962306a36Sopenharmony_ci};
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_cistatic struct kmem_cache *_job_cache;
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ciint __init dm_kcopyd_init(void)
39462306a36Sopenharmony_ci{
39562306a36Sopenharmony_ci	_job_cache = kmem_cache_create("kcopyd_job",
39662306a36Sopenharmony_ci				sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1),
39762306a36Sopenharmony_ci				__alignof__(struct kcopyd_job), 0, NULL);
39862306a36Sopenharmony_ci	if (!_job_cache)
39962306a36Sopenharmony_ci		return -ENOMEM;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	zero_page_list.next = &zero_page_list;
40262306a36Sopenharmony_ci	zero_page_list.page = ZERO_PAGE(0);
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	return 0;
40562306a36Sopenharmony_ci}
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_civoid dm_kcopyd_exit(void)
40862306a36Sopenharmony_ci{
40962306a36Sopenharmony_ci	kmem_cache_destroy(_job_cache);
41062306a36Sopenharmony_ci	_job_cache = NULL;
41162306a36Sopenharmony_ci}
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci/*
41462306a36Sopenharmony_ci * Functions to push and pop a job onto the head of a given job
41562306a36Sopenharmony_ci * list.
41662306a36Sopenharmony_ci */
41762306a36Sopenharmony_cistatic struct kcopyd_job *pop_io_job(struct list_head *jobs,
41862306a36Sopenharmony_ci				     struct dm_kcopyd_client *kc)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci	struct kcopyd_job *job;
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	/*
42362306a36Sopenharmony_ci	 * For I/O jobs, pop any read, any write without sequential write
42462306a36Sopenharmony_ci	 * constraint and sequential writes that are at the right position.
42562306a36Sopenharmony_ci	 */
42662306a36Sopenharmony_ci	list_for_each_entry(job, jobs, list) {
42762306a36Sopenharmony_ci		if (job->op == REQ_OP_READ ||
42862306a36Sopenharmony_ci		    !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) {
42962306a36Sopenharmony_ci			list_del(&job->list);
43062306a36Sopenharmony_ci			return job;
43162306a36Sopenharmony_ci		}
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci		if (job->write_offset == job->master_job->write_offset) {
43462306a36Sopenharmony_ci			job->master_job->write_offset += job->source.count;
43562306a36Sopenharmony_ci			list_del(&job->list);
43662306a36Sopenharmony_ci			return job;
43762306a36Sopenharmony_ci		}
43862306a36Sopenharmony_ci	}
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci	return NULL;
44162306a36Sopenharmony_ci}
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_cistatic struct kcopyd_job *pop(struct list_head *jobs,
44462306a36Sopenharmony_ci			      struct dm_kcopyd_client *kc)
44562306a36Sopenharmony_ci{
44662306a36Sopenharmony_ci	struct kcopyd_job *job = NULL;
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci	spin_lock_irq(&kc->job_lock);
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci	if (!list_empty(jobs)) {
45162306a36Sopenharmony_ci		if (jobs == &kc->io_jobs)
45262306a36Sopenharmony_ci			job = pop_io_job(jobs, kc);
45362306a36Sopenharmony_ci		else {
45462306a36Sopenharmony_ci			job = list_entry(jobs->next, struct kcopyd_job, list);
45562306a36Sopenharmony_ci			list_del(&job->list);
45662306a36Sopenharmony_ci		}
45762306a36Sopenharmony_ci	}
45862306a36Sopenharmony_ci	spin_unlock_irq(&kc->job_lock);
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	return job;
46162306a36Sopenharmony_ci}
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_cistatic void push(struct list_head *jobs, struct kcopyd_job *job)
46462306a36Sopenharmony_ci{
46562306a36Sopenharmony_ci	unsigned long flags;
46662306a36Sopenharmony_ci	struct dm_kcopyd_client *kc = job->kc;
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	spin_lock_irqsave(&kc->job_lock, flags);
46962306a36Sopenharmony_ci	list_add_tail(&job->list, jobs);
47062306a36Sopenharmony_ci	spin_unlock_irqrestore(&kc->job_lock, flags);
47162306a36Sopenharmony_ci}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_cistatic void push_head(struct list_head *jobs, struct kcopyd_job *job)
47562306a36Sopenharmony_ci{
47662306a36Sopenharmony_ci	struct dm_kcopyd_client *kc = job->kc;
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	spin_lock_irq(&kc->job_lock);
47962306a36Sopenharmony_ci	list_add(&job->list, jobs);
48062306a36Sopenharmony_ci	spin_unlock_irq(&kc->job_lock);
48162306a36Sopenharmony_ci}
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci/*
48462306a36Sopenharmony_ci * These three functions process 1 item from the corresponding
48562306a36Sopenharmony_ci * job list.
48662306a36Sopenharmony_ci *
48762306a36Sopenharmony_ci * They return:
48862306a36Sopenharmony_ci * < 0: error
48962306a36Sopenharmony_ci *   0: success
49062306a36Sopenharmony_ci * > 0: can't process yet.
49162306a36Sopenharmony_ci */
49262306a36Sopenharmony_cistatic int run_complete_job(struct kcopyd_job *job)
49362306a36Sopenharmony_ci{
49462306a36Sopenharmony_ci	void *context = job->context;
49562306a36Sopenharmony_ci	int read_err = job->read_err;
49662306a36Sopenharmony_ci	unsigned long write_err = job->write_err;
49762306a36Sopenharmony_ci	dm_kcopyd_notify_fn fn = job->fn;
49862306a36Sopenharmony_ci	struct dm_kcopyd_client *kc = job->kc;
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_ci	if (job->pages && job->pages != &zero_page_list)
50162306a36Sopenharmony_ci		kcopyd_put_pages(kc, job->pages);
50262306a36Sopenharmony_ci	/*
50362306a36Sopenharmony_ci	 * If this is the master job, the sub jobs have already
50462306a36Sopenharmony_ci	 * completed so we can free everything.
50562306a36Sopenharmony_ci	 */
50662306a36Sopenharmony_ci	if (job->master_job == job) {
50762306a36Sopenharmony_ci		mutex_destroy(&job->lock);
50862306a36Sopenharmony_ci		mempool_free(job, &kc->job_pool);
50962306a36Sopenharmony_ci	}
51062306a36Sopenharmony_ci	fn(read_err, write_err, context);
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci	if (atomic_dec_and_test(&kc->nr_jobs))
51362306a36Sopenharmony_ci		wake_up(&kc->destroyq);
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	cond_resched();
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	return 0;
51862306a36Sopenharmony_ci}
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_cistatic void complete_io(unsigned long error, void *context)
52162306a36Sopenharmony_ci{
52262306a36Sopenharmony_ci	struct kcopyd_job *job = context;
52362306a36Sopenharmony_ci	struct dm_kcopyd_client *kc = job->kc;
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	io_job_finish(kc->throttle);
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	if (error) {
52862306a36Sopenharmony_ci		if (op_is_write(job->op))
52962306a36Sopenharmony_ci			job->write_err |= error;
53062306a36Sopenharmony_ci		else
53162306a36Sopenharmony_ci			job->read_err = 1;
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci		if (!(job->flags & BIT(DM_KCOPYD_IGNORE_ERROR))) {
53462306a36Sopenharmony_ci			push(&kc->complete_jobs, job);
53562306a36Sopenharmony_ci			wake(kc);
53662306a36Sopenharmony_ci			return;
53762306a36Sopenharmony_ci		}
53862306a36Sopenharmony_ci	}
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	if (op_is_write(job->op))
54162306a36Sopenharmony_ci		push(&kc->complete_jobs, job);
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci	else {
54462306a36Sopenharmony_ci		job->op = REQ_OP_WRITE;
54562306a36Sopenharmony_ci		push(&kc->io_jobs, job);
54662306a36Sopenharmony_ci	}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci	wake(kc);
54962306a36Sopenharmony_ci}
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_ci/*
55262306a36Sopenharmony_ci * Request io on as many buffer heads as we can currently get for
55362306a36Sopenharmony_ci * a particular job.
55462306a36Sopenharmony_ci */
55562306a36Sopenharmony_cistatic int run_io_job(struct kcopyd_job *job)
55662306a36Sopenharmony_ci{
55762306a36Sopenharmony_ci	int r;
55862306a36Sopenharmony_ci	struct dm_io_request io_req = {
55962306a36Sopenharmony_ci		.bi_opf = job->op,
56062306a36Sopenharmony_ci		.mem.type = DM_IO_PAGE_LIST,
56162306a36Sopenharmony_ci		.mem.ptr.pl = job->pages,
56262306a36Sopenharmony_ci		.mem.offset = 0,
56362306a36Sopenharmony_ci		.notify.fn = complete_io,
56462306a36Sopenharmony_ci		.notify.context = job,
56562306a36Sopenharmony_ci		.client = job->kc->io_client,
56662306a36Sopenharmony_ci	};
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci	/*
56962306a36Sopenharmony_ci	 * If we need to write sequentially and some reads or writes failed,
57062306a36Sopenharmony_ci	 * no point in continuing.
57162306a36Sopenharmony_ci	 */
57262306a36Sopenharmony_ci	if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) &&
57362306a36Sopenharmony_ci	    job->master_job->write_err) {
57462306a36Sopenharmony_ci		job->write_err = job->master_job->write_err;
57562306a36Sopenharmony_ci		return -EIO;
57662306a36Sopenharmony_ci	}
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	io_job_start(job->kc->throttle);
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci	if (job->op == REQ_OP_READ)
58162306a36Sopenharmony_ci		r = dm_io(&io_req, 1, &job->source, NULL, IOPRIO_DEFAULT);
58262306a36Sopenharmony_ci	else
58362306a36Sopenharmony_ci		r = dm_io(&io_req, job->num_dests, job->dests, NULL, IOPRIO_DEFAULT);
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci	return r;
58662306a36Sopenharmony_ci}
58762306a36Sopenharmony_ci
58862306a36Sopenharmony_cistatic int run_pages_job(struct kcopyd_job *job)
58962306a36Sopenharmony_ci{
59062306a36Sopenharmony_ci	int r;
59162306a36Sopenharmony_ci	unsigned int nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9);
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	r = kcopyd_get_pages(job->kc, nr_pages, &job->pages);
59462306a36Sopenharmony_ci	if (!r) {
59562306a36Sopenharmony_ci		/* this job is ready for io */
59662306a36Sopenharmony_ci		push(&job->kc->io_jobs, job);
59762306a36Sopenharmony_ci		return 0;
59862306a36Sopenharmony_ci	}
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	if (r == -ENOMEM)
60162306a36Sopenharmony_ci		/* can't complete now */
60262306a36Sopenharmony_ci		return 1;
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci	return r;
60562306a36Sopenharmony_ci}
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci/*
60862306a36Sopenharmony_ci * Run through a list for as long as possible.  Returns the count
60962306a36Sopenharmony_ci * of successful jobs.
61062306a36Sopenharmony_ci */
61162306a36Sopenharmony_cistatic int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
61262306a36Sopenharmony_ci			int (*fn)(struct kcopyd_job *))
61362306a36Sopenharmony_ci{
61462306a36Sopenharmony_ci	struct kcopyd_job *job;
61562306a36Sopenharmony_ci	int r, count = 0;
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	while ((job = pop(jobs, kc))) {
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci		r = fn(job);
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci		if (r < 0) {
62262306a36Sopenharmony_ci			/* error this rogue job */
62362306a36Sopenharmony_ci			if (op_is_write(job->op))
62462306a36Sopenharmony_ci				job->write_err = (unsigned long) -1L;
62562306a36Sopenharmony_ci			else
62662306a36Sopenharmony_ci				job->read_err = 1;
62762306a36Sopenharmony_ci			push(&kc->complete_jobs, job);
62862306a36Sopenharmony_ci			wake(kc);
62962306a36Sopenharmony_ci			break;
63062306a36Sopenharmony_ci		}
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci		if (r > 0) {
63362306a36Sopenharmony_ci			/*
63462306a36Sopenharmony_ci			 * We couldn't service this job ATM, so
63562306a36Sopenharmony_ci			 * push this job back onto the list.
63662306a36Sopenharmony_ci			 */
63762306a36Sopenharmony_ci			push_head(jobs, job);
63862306a36Sopenharmony_ci			break;
63962306a36Sopenharmony_ci		}
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci		count++;
64262306a36Sopenharmony_ci	}
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci	return count;
64562306a36Sopenharmony_ci}
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci/*
64862306a36Sopenharmony_ci * kcopyd does this every time it's woken up.
64962306a36Sopenharmony_ci */
65062306a36Sopenharmony_cistatic void do_work(struct work_struct *work)
65162306a36Sopenharmony_ci{
65262306a36Sopenharmony_ci	struct dm_kcopyd_client *kc = container_of(work,
65362306a36Sopenharmony_ci					struct dm_kcopyd_client, kcopyd_work);
65462306a36Sopenharmony_ci	struct blk_plug plug;
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_ci	/*
65762306a36Sopenharmony_ci	 * The order that these are called is *very* important.
65862306a36Sopenharmony_ci	 * complete jobs can free some pages for pages jobs.
65962306a36Sopenharmony_ci	 * Pages jobs when successful will jump onto the io jobs
66062306a36Sopenharmony_ci	 * list.  io jobs call wake when they complete and it all
66162306a36Sopenharmony_ci	 * starts again.
66262306a36Sopenharmony_ci	 */
66362306a36Sopenharmony_ci	spin_lock_irq(&kc->job_lock);
66462306a36Sopenharmony_ci	list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs);
66562306a36Sopenharmony_ci	spin_unlock_irq(&kc->job_lock);
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci	blk_start_plug(&plug);
66862306a36Sopenharmony_ci	process_jobs(&kc->complete_jobs, kc, run_complete_job);
66962306a36Sopenharmony_ci	process_jobs(&kc->pages_jobs, kc, run_pages_job);
67062306a36Sopenharmony_ci	process_jobs(&kc->io_jobs, kc, run_io_job);
67162306a36Sopenharmony_ci	blk_finish_plug(&plug);
67262306a36Sopenharmony_ci}
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci/*
67562306a36Sopenharmony_ci * If we are copying a small region we just dispatch a single job
67662306a36Sopenharmony_ci * to do the copy, otherwise the io has to be split up into many
67762306a36Sopenharmony_ci * jobs.
67862306a36Sopenharmony_ci */
67962306a36Sopenharmony_cistatic void dispatch_job(struct kcopyd_job *job)
68062306a36Sopenharmony_ci{
68162306a36Sopenharmony_ci	struct dm_kcopyd_client *kc = job->kc;
68262306a36Sopenharmony_ci
68362306a36Sopenharmony_ci	atomic_inc(&kc->nr_jobs);
68462306a36Sopenharmony_ci	if (unlikely(!job->source.count))
68562306a36Sopenharmony_ci		push(&kc->callback_jobs, job);
68662306a36Sopenharmony_ci	else if (job->pages == &zero_page_list)
68762306a36Sopenharmony_ci		push(&kc->io_jobs, job);
68862306a36Sopenharmony_ci	else
68962306a36Sopenharmony_ci		push(&kc->pages_jobs, job);
69062306a36Sopenharmony_ci	wake(kc);
69162306a36Sopenharmony_ci}
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_cistatic void segment_complete(int read_err, unsigned long write_err,
69462306a36Sopenharmony_ci			     void *context)
69562306a36Sopenharmony_ci{
69662306a36Sopenharmony_ci	/* FIXME: tidy this function */
69762306a36Sopenharmony_ci	sector_t progress = 0;
69862306a36Sopenharmony_ci	sector_t count = 0;
69962306a36Sopenharmony_ci	struct kcopyd_job *sub_job = context;
70062306a36Sopenharmony_ci	struct kcopyd_job *job = sub_job->master_job;
70162306a36Sopenharmony_ci	struct dm_kcopyd_client *kc = job->kc;
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci	mutex_lock(&job->lock);
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	/* update the error */
70662306a36Sopenharmony_ci	if (read_err)
70762306a36Sopenharmony_ci		job->read_err = 1;
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_ci	if (write_err)
71062306a36Sopenharmony_ci		job->write_err |= write_err;
71162306a36Sopenharmony_ci
71262306a36Sopenharmony_ci	/*
71362306a36Sopenharmony_ci	 * Only dispatch more work if there hasn't been an error.
71462306a36Sopenharmony_ci	 */
71562306a36Sopenharmony_ci	if ((!job->read_err && !job->write_err) ||
71662306a36Sopenharmony_ci	    job->flags & BIT(DM_KCOPYD_IGNORE_ERROR)) {
71762306a36Sopenharmony_ci		/* get the next chunk of work */
71862306a36Sopenharmony_ci		progress = job->progress;
71962306a36Sopenharmony_ci		count = job->source.count - progress;
72062306a36Sopenharmony_ci		if (count) {
72162306a36Sopenharmony_ci			if (count > kc->sub_job_size)
72262306a36Sopenharmony_ci				count = kc->sub_job_size;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci			job->progress += count;
72562306a36Sopenharmony_ci		}
72662306a36Sopenharmony_ci	}
72762306a36Sopenharmony_ci	mutex_unlock(&job->lock);
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci	if (count) {
73062306a36Sopenharmony_ci		int i;
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci		*sub_job = *job;
73362306a36Sopenharmony_ci		sub_job->write_offset = progress;
73462306a36Sopenharmony_ci		sub_job->source.sector += progress;
73562306a36Sopenharmony_ci		sub_job->source.count = count;
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_ci		for (i = 0; i < job->num_dests; i++) {
73862306a36Sopenharmony_ci			sub_job->dests[i].sector += progress;
73962306a36Sopenharmony_ci			sub_job->dests[i].count = count;
74062306a36Sopenharmony_ci		}
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_ci		sub_job->fn = segment_complete;
74362306a36Sopenharmony_ci		sub_job->context = sub_job;
74462306a36Sopenharmony_ci		dispatch_job(sub_job);
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci	} else if (atomic_dec_and_test(&job->sub_jobs)) {
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_ci		/*
74962306a36Sopenharmony_ci		 * Queue the completion callback to the kcopyd thread.
75062306a36Sopenharmony_ci		 *
75162306a36Sopenharmony_ci		 * Some callers assume that all the completions are called
75262306a36Sopenharmony_ci		 * from a single thread and don't race with each other.
75362306a36Sopenharmony_ci		 *
75462306a36Sopenharmony_ci		 * We must not call the callback directly here because this
75562306a36Sopenharmony_ci		 * code may not be executing in the thread.
75662306a36Sopenharmony_ci		 */
75762306a36Sopenharmony_ci		push(&kc->complete_jobs, job);
75862306a36Sopenharmony_ci		wake(kc);
75962306a36Sopenharmony_ci	}
76062306a36Sopenharmony_ci}
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_ci/*
76362306a36Sopenharmony_ci * Create some sub jobs to share the work between them.
76462306a36Sopenharmony_ci */
76562306a36Sopenharmony_cistatic void split_job(struct kcopyd_job *master_job)
76662306a36Sopenharmony_ci{
76762306a36Sopenharmony_ci	int i;
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ci	atomic_inc(&master_job->kc->nr_jobs);
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci	atomic_set(&master_job->sub_jobs, SPLIT_COUNT);
77262306a36Sopenharmony_ci	for (i = 0; i < SPLIT_COUNT; i++) {
77362306a36Sopenharmony_ci		master_job[i + 1].master_job = master_job;
77462306a36Sopenharmony_ci		segment_complete(0, 0u, &master_job[i + 1]);
77562306a36Sopenharmony_ci	}
77662306a36Sopenharmony_ci}
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_civoid dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
77962306a36Sopenharmony_ci		    unsigned int num_dests, struct dm_io_region *dests,
78062306a36Sopenharmony_ci		    unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
78162306a36Sopenharmony_ci{
78262306a36Sopenharmony_ci	struct kcopyd_job *job;
78362306a36Sopenharmony_ci	int i;
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	/*
78662306a36Sopenharmony_ci	 * Allocate an array of jobs consisting of one master job
78762306a36Sopenharmony_ci	 * followed by SPLIT_COUNT sub jobs.
78862306a36Sopenharmony_ci	 */
78962306a36Sopenharmony_ci	job = mempool_alloc(&kc->job_pool, GFP_NOIO);
79062306a36Sopenharmony_ci	mutex_init(&job->lock);
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_ci	/*
79362306a36Sopenharmony_ci	 * set up for the read.
79462306a36Sopenharmony_ci	 */
79562306a36Sopenharmony_ci	job->kc = kc;
79662306a36Sopenharmony_ci	job->flags = flags;
79762306a36Sopenharmony_ci	job->read_err = 0;
79862306a36Sopenharmony_ci	job->write_err = 0;
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci	job->num_dests = num_dests;
80162306a36Sopenharmony_ci	memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	/*
80462306a36Sopenharmony_ci	 * If one of the destination is a host-managed zoned block device,
80562306a36Sopenharmony_ci	 * we need to write sequentially. If one of the destination is a
80662306a36Sopenharmony_ci	 * host-aware device, then leave it to the caller to choose what to do.
80762306a36Sopenharmony_ci	 */
80862306a36Sopenharmony_ci	if (!(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) {
80962306a36Sopenharmony_ci		for (i = 0; i < job->num_dests; i++) {
81062306a36Sopenharmony_ci			if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) {
81162306a36Sopenharmony_ci				job->flags |= BIT(DM_KCOPYD_WRITE_SEQ);
81262306a36Sopenharmony_ci				break;
81362306a36Sopenharmony_ci			}
81462306a36Sopenharmony_ci		}
81562306a36Sopenharmony_ci	}
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci	/*
81862306a36Sopenharmony_ci	 * If we need to write sequentially, errors cannot be ignored.
81962306a36Sopenharmony_ci	 */
82062306a36Sopenharmony_ci	if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) &&
82162306a36Sopenharmony_ci	    job->flags & BIT(DM_KCOPYD_IGNORE_ERROR))
82262306a36Sopenharmony_ci		job->flags &= ~BIT(DM_KCOPYD_IGNORE_ERROR);
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	if (from) {
82562306a36Sopenharmony_ci		job->source = *from;
82662306a36Sopenharmony_ci		job->pages = NULL;
82762306a36Sopenharmony_ci		job->op = REQ_OP_READ;
82862306a36Sopenharmony_ci	} else {
82962306a36Sopenharmony_ci		memset(&job->source, 0, sizeof(job->source));
83062306a36Sopenharmony_ci		job->source.count = job->dests[0].count;
83162306a36Sopenharmony_ci		job->pages = &zero_page_list;
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_ci		/*
83462306a36Sopenharmony_ci		 * Use WRITE ZEROES to optimize zeroing if all dests support it.
83562306a36Sopenharmony_ci		 */
83662306a36Sopenharmony_ci		job->op = REQ_OP_WRITE_ZEROES;
83762306a36Sopenharmony_ci		for (i = 0; i < job->num_dests; i++)
83862306a36Sopenharmony_ci			if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) {
83962306a36Sopenharmony_ci				job->op = REQ_OP_WRITE;
84062306a36Sopenharmony_ci				break;
84162306a36Sopenharmony_ci			}
84262306a36Sopenharmony_ci	}
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci	job->fn = fn;
84562306a36Sopenharmony_ci	job->context = context;
84662306a36Sopenharmony_ci	job->master_job = job;
84762306a36Sopenharmony_ci	job->write_offset = 0;
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	if (job->source.count <= kc->sub_job_size)
85062306a36Sopenharmony_ci		dispatch_job(job);
85162306a36Sopenharmony_ci	else {
85262306a36Sopenharmony_ci		job->progress = 0;
85362306a36Sopenharmony_ci		split_job(job);
85462306a36Sopenharmony_ci	}
85562306a36Sopenharmony_ci}
85662306a36Sopenharmony_ciEXPORT_SYMBOL(dm_kcopyd_copy);
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_civoid dm_kcopyd_zero(struct dm_kcopyd_client *kc,
85962306a36Sopenharmony_ci		    unsigned int num_dests, struct dm_io_region *dests,
86062306a36Sopenharmony_ci		    unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
86162306a36Sopenharmony_ci{
86262306a36Sopenharmony_ci	dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context);
86362306a36Sopenharmony_ci}
86462306a36Sopenharmony_ciEXPORT_SYMBOL(dm_kcopyd_zero);
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_civoid *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
86762306a36Sopenharmony_ci				 dm_kcopyd_notify_fn fn, void *context)
86862306a36Sopenharmony_ci{
86962306a36Sopenharmony_ci	struct kcopyd_job *job;
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	job = mempool_alloc(&kc->job_pool, GFP_NOIO);
87262306a36Sopenharmony_ci
87362306a36Sopenharmony_ci	memset(job, 0, sizeof(struct kcopyd_job));
87462306a36Sopenharmony_ci	job->kc = kc;
87562306a36Sopenharmony_ci	job->fn = fn;
87662306a36Sopenharmony_ci	job->context = context;
87762306a36Sopenharmony_ci	job->master_job = job;
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci	atomic_inc(&kc->nr_jobs);
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci	return job;
88262306a36Sopenharmony_ci}
88362306a36Sopenharmony_ciEXPORT_SYMBOL(dm_kcopyd_prepare_callback);
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_civoid dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
88662306a36Sopenharmony_ci{
88762306a36Sopenharmony_ci	struct kcopyd_job *job = j;
88862306a36Sopenharmony_ci	struct dm_kcopyd_client *kc = job->kc;
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	job->read_err = read_err;
89162306a36Sopenharmony_ci	job->write_err = write_err;
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci	push(&kc->callback_jobs, job);
89462306a36Sopenharmony_ci	wake(kc);
89562306a36Sopenharmony_ci}
89662306a36Sopenharmony_ciEXPORT_SYMBOL(dm_kcopyd_do_callback);
89762306a36Sopenharmony_ci
89862306a36Sopenharmony_ci/*
89962306a36Sopenharmony_ci * Cancels a kcopyd job, eg. someone might be deactivating a
90062306a36Sopenharmony_ci * mirror.
90162306a36Sopenharmony_ci */
90262306a36Sopenharmony_ci#if 0
90362306a36Sopenharmony_ciint kcopyd_cancel(struct kcopyd_job *job, int block)
90462306a36Sopenharmony_ci{
90562306a36Sopenharmony_ci	/* FIXME: finish */
90662306a36Sopenharmony_ci	return -1;
90762306a36Sopenharmony_ci}
90862306a36Sopenharmony_ci#endif  /*  0  */
90962306a36Sopenharmony_ci
91062306a36Sopenharmony_ci/*
91162306a36Sopenharmony_ci *---------------------------------------------------------------
91262306a36Sopenharmony_ci * Client setup
91362306a36Sopenharmony_ci *---------------------------------------------------------------
91462306a36Sopenharmony_ci */
91562306a36Sopenharmony_cistruct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
91662306a36Sopenharmony_ci{
91762306a36Sopenharmony_ci	int r;
91862306a36Sopenharmony_ci	unsigned int reserve_pages;
91962306a36Sopenharmony_ci	struct dm_kcopyd_client *kc;
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	kc = kzalloc(sizeof(*kc), GFP_KERNEL);
92262306a36Sopenharmony_ci	if (!kc)
92362306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci	spin_lock_init(&kc->job_lock);
92662306a36Sopenharmony_ci	INIT_LIST_HEAD(&kc->callback_jobs);
92762306a36Sopenharmony_ci	INIT_LIST_HEAD(&kc->complete_jobs);
92862306a36Sopenharmony_ci	INIT_LIST_HEAD(&kc->io_jobs);
92962306a36Sopenharmony_ci	INIT_LIST_HEAD(&kc->pages_jobs);
93062306a36Sopenharmony_ci	kc->throttle = throttle;
93162306a36Sopenharmony_ci
93262306a36Sopenharmony_ci	r = mempool_init_slab_pool(&kc->job_pool, MIN_JOBS, _job_cache);
93362306a36Sopenharmony_ci	if (r)
93462306a36Sopenharmony_ci		goto bad_slab;
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	INIT_WORK(&kc->kcopyd_work, do_work);
93762306a36Sopenharmony_ci	kc->kcopyd_wq = alloc_workqueue("kcopyd", WQ_MEM_RECLAIM, 0);
93862306a36Sopenharmony_ci	if (!kc->kcopyd_wq) {
93962306a36Sopenharmony_ci		r = -ENOMEM;
94062306a36Sopenharmony_ci		goto bad_workqueue;
94162306a36Sopenharmony_ci	}
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci	kc->sub_job_size = dm_get_kcopyd_subjob_size();
94462306a36Sopenharmony_ci	reserve_pages = DIV_ROUND_UP(kc->sub_job_size << SECTOR_SHIFT, PAGE_SIZE);
94562306a36Sopenharmony_ci
94662306a36Sopenharmony_ci	kc->pages = NULL;
94762306a36Sopenharmony_ci	kc->nr_reserved_pages = kc->nr_free_pages = 0;
94862306a36Sopenharmony_ci	r = client_reserve_pages(kc, reserve_pages);
94962306a36Sopenharmony_ci	if (r)
95062306a36Sopenharmony_ci		goto bad_client_pages;
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	kc->io_client = dm_io_client_create();
95362306a36Sopenharmony_ci	if (IS_ERR(kc->io_client)) {
95462306a36Sopenharmony_ci		r = PTR_ERR(kc->io_client);
95562306a36Sopenharmony_ci		goto bad_io_client;
95662306a36Sopenharmony_ci	}
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci	init_waitqueue_head(&kc->destroyq);
95962306a36Sopenharmony_ci	atomic_set(&kc->nr_jobs, 0);
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci	return kc;
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_cibad_io_client:
96462306a36Sopenharmony_ci	client_free_pages(kc);
96562306a36Sopenharmony_cibad_client_pages:
96662306a36Sopenharmony_ci	destroy_workqueue(kc->kcopyd_wq);
96762306a36Sopenharmony_cibad_workqueue:
96862306a36Sopenharmony_ci	mempool_exit(&kc->job_pool);
96962306a36Sopenharmony_cibad_slab:
97062306a36Sopenharmony_ci	kfree(kc);
97162306a36Sopenharmony_ci
97262306a36Sopenharmony_ci	return ERR_PTR(r);
97362306a36Sopenharmony_ci}
97462306a36Sopenharmony_ciEXPORT_SYMBOL(dm_kcopyd_client_create);
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_civoid dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
97762306a36Sopenharmony_ci{
97862306a36Sopenharmony_ci	/* Wait for completion of all jobs submitted by this client. */
97962306a36Sopenharmony_ci	wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci	BUG_ON(!list_empty(&kc->callback_jobs));
98262306a36Sopenharmony_ci	BUG_ON(!list_empty(&kc->complete_jobs));
98362306a36Sopenharmony_ci	BUG_ON(!list_empty(&kc->io_jobs));
98462306a36Sopenharmony_ci	BUG_ON(!list_empty(&kc->pages_jobs));
98562306a36Sopenharmony_ci	destroy_workqueue(kc->kcopyd_wq);
98662306a36Sopenharmony_ci	dm_io_client_destroy(kc->io_client);
98762306a36Sopenharmony_ci	client_free_pages(kc);
98862306a36Sopenharmony_ci	mempool_exit(&kc->job_pool);
98962306a36Sopenharmony_ci	kfree(kc);
99062306a36Sopenharmony_ci}
99162306a36Sopenharmony_ciEXPORT_SYMBOL(dm_kcopyd_client_destroy);
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_civoid dm_kcopyd_client_flush(struct dm_kcopyd_client *kc)
99462306a36Sopenharmony_ci{
99562306a36Sopenharmony_ci	flush_workqueue(kc->kcopyd_wq);
99662306a36Sopenharmony_ci}
99762306a36Sopenharmony_ciEXPORT_SYMBOL(dm_kcopyd_client_flush);
998