162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2018 Red Hat. All rights reserved.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * This file is released under the GPL.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/device-mapper.h>
962306a36Sopenharmony_ci#include <linux/module.h>
1062306a36Sopenharmony_ci#include <linux/init.h>
1162306a36Sopenharmony_ci#include <linux/vmalloc.h>
1262306a36Sopenharmony_ci#include <linux/kthread.h>
1362306a36Sopenharmony_ci#include <linux/dm-io.h>
1462306a36Sopenharmony_ci#include <linux/dm-kcopyd.h>
1562306a36Sopenharmony_ci#include <linux/dax.h>
1662306a36Sopenharmony_ci#include <linux/pfn_t.h>
1762306a36Sopenharmony_ci#include <linux/libnvdimm.h>
1862306a36Sopenharmony_ci#include <linux/delay.h>
1962306a36Sopenharmony_ci#include "dm-io-tracker.h"
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#define DM_MSG_PREFIX "writecache"
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#define HIGH_WATERMARK			50
2462306a36Sopenharmony_ci#define LOW_WATERMARK			45
2562306a36Sopenharmony_ci#define MAX_WRITEBACK_JOBS		min(0x10000000 / PAGE_SIZE, totalram_pages() / 16)
2662306a36Sopenharmony_ci#define ENDIO_LATENCY			16
2762306a36Sopenharmony_ci#define WRITEBACK_LATENCY		64
2862306a36Sopenharmony_ci#define AUTOCOMMIT_BLOCKS_SSD		65536
2962306a36Sopenharmony_ci#define AUTOCOMMIT_BLOCKS_PMEM		64
3062306a36Sopenharmony_ci#define AUTOCOMMIT_MSEC			1000
3162306a36Sopenharmony_ci#define MAX_AGE_DIV			16
3262306a36Sopenharmony_ci#define MAX_AGE_UNSPECIFIED		-1UL
3362306a36Sopenharmony_ci#define PAUSE_WRITEBACK			(HZ * 3)
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci#define BITMAP_GRANULARITY	65536
3662306a36Sopenharmony_ci#if BITMAP_GRANULARITY < PAGE_SIZE
3762306a36Sopenharmony_ci#undef BITMAP_GRANULARITY
3862306a36Sopenharmony_ci#define BITMAP_GRANULARITY	PAGE_SIZE
3962306a36Sopenharmony_ci#endif
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_FS_DAX)
4262306a36Sopenharmony_ci#define DM_WRITECACHE_HAS_PMEM
4362306a36Sopenharmony_ci#endif
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HAS_PMEM
4662306a36Sopenharmony_ci#define pmem_assign(dest, src)					\
4762306a36Sopenharmony_cido {								\
4862306a36Sopenharmony_ci	typeof(dest) uniq = (src);				\
4962306a36Sopenharmony_ci	memcpy_flushcache(&(dest), &uniq, sizeof(dest));	\
5062306a36Sopenharmony_ci} while (0)
5162306a36Sopenharmony_ci#else
5262306a36Sopenharmony_ci#define pmem_assign(dest, src)	((dest) = (src))
5362306a36Sopenharmony_ci#endif
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM)
5662306a36Sopenharmony_ci#define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
5762306a36Sopenharmony_ci#endif
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci#define MEMORY_SUPERBLOCK_MAGIC		0x23489321
6062306a36Sopenharmony_ci#define MEMORY_SUPERBLOCK_VERSION	1
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_cistruct wc_memory_entry {
6362306a36Sopenharmony_ci	__le64 original_sector;
6462306a36Sopenharmony_ci	__le64 seq_count;
6562306a36Sopenharmony_ci};
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_cistruct wc_memory_superblock {
6862306a36Sopenharmony_ci	union {
6962306a36Sopenharmony_ci		struct {
7062306a36Sopenharmony_ci			__le32 magic;
7162306a36Sopenharmony_ci			__le32 version;
7262306a36Sopenharmony_ci			__le32 block_size;
7362306a36Sopenharmony_ci			__le32 pad;
7462306a36Sopenharmony_ci			__le64 n_blocks;
7562306a36Sopenharmony_ci			__le64 seq_count;
7662306a36Sopenharmony_ci		};
7762306a36Sopenharmony_ci		__le64 padding[8];
7862306a36Sopenharmony_ci	};
7962306a36Sopenharmony_ci	struct wc_memory_entry entries[];
8062306a36Sopenharmony_ci};
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_cistruct wc_entry {
8362306a36Sopenharmony_ci	struct rb_node rb_node;
8462306a36Sopenharmony_ci	struct list_head lru;
8562306a36Sopenharmony_ci	unsigned short wc_list_contiguous;
8662306a36Sopenharmony_ci#if BITS_PER_LONG == 64
8762306a36Sopenharmony_ci	bool write_in_progress : 1;
8862306a36Sopenharmony_ci	unsigned long index : 47;
8962306a36Sopenharmony_ci#else
9062306a36Sopenharmony_ci	bool write_in_progress;
9162306a36Sopenharmony_ci	unsigned long index;
9262306a36Sopenharmony_ci#endif
9362306a36Sopenharmony_ci	unsigned long age;
9462306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
9562306a36Sopenharmony_ci	uint64_t original_sector;
9662306a36Sopenharmony_ci	uint64_t seq_count;
9762306a36Sopenharmony_ci#endif
9862306a36Sopenharmony_ci};
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HAS_PMEM
10162306a36Sopenharmony_ci#define WC_MODE_PMEM(wc)			((wc)->pmem_mode)
10262306a36Sopenharmony_ci#define WC_MODE_FUA(wc)				((wc)->writeback_fua)
10362306a36Sopenharmony_ci#else
10462306a36Sopenharmony_ci#define WC_MODE_PMEM(wc)			false
10562306a36Sopenharmony_ci#define WC_MODE_FUA(wc)				false
10662306a36Sopenharmony_ci#endif
10762306a36Sopenharmony_ci#define WC_MODE_SORT_FREELIST(wc)		(!WC_MODE_PMEM(wc))
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_cistruct dm_writecache {
11062306a36Sopenharmony_ci	struct mutex lock;
11162306a36Sopenharmony_ci	struct list_head lru;
11262306a36Sopenharmony_ci	union {
11362306a36Sopenharmony_ci		struct list_head freelist;
11462306a36Sopenharmony_ci		struct {
11562306a36Sopenharmony_ci			struct rb_root freetree;
11662306a36Sopenharmony_ci			struct wc_entry *current_free;
11762306a36Sopenharmony_ci		};
11862306a36Sopenharmony_ci	};
11962306a36Sopenharmony_ci	struct rb_root tree;
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	size_t freelist_size;
12262306a36Sopenharmony_ci	size_t writeback_size;
12362306a36Sopenharmony_ci	size_t freelist_high_watermark;
12462306a36Sopenharmony_ci	size_t freelist_low_watermark;
12562306a36Sopenharmony_ci	unsigned long max_age;
12662306a36Sopenharmony_ci	unsigned long pause;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	unsigned int uncommitted_blocks;
12962306a36Sopenharmony_ci	unsigned int autocommit_blocks;
13062306a36Sopenharmony_ci	unsigned int max_writeback_jobs;
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	int error;
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	unsigned long autocommit_jiffies;
13562306a36Sopenharmony_ci	struct timer_list autocommit_timer;
13662306a36Sopenharmony_ci	struct wait_queue_head freelist_wait;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	struct timer_list max_age_timer;
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	atomic_t bio_in_progress[2];
14162306a36Sopenharmony_ci	struct wait_queue_head bio_in_progress_wait[2];
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	struct dm_target *ti;
14462306a36Sopenharmony_ci	struct dm_dev *dev;
14562306a36Sopenharmony_ci	struct dm_dev *ssd_dev;
14662306a36Sopenharmony_ci	sector_t start_sector;
14762306a36Sopenharmony_ci	void *memory_map;
14862306a36Sopenharmony_ci	uint64_t memory_map_size;
14962306a36Sopenharmony_ci	size_t metadata_sectors;
15062306a36Sopenharmony_ci	size_t n_blocks;
15162306a36Sopenharmony_ci	uint64_t seq_count;
15262306a36Sopenharmony_ci	sector_t data_device_sectors;
15362306a36Sopenharmony_ci	void *block_start;
15462306a36Sopenharmony_ci	struct wc_entry *entries;
15562306a36Sopenharmony_ci	unsigned int block_size;
15662306a36Sopenharmony_ci	unsigned char block_size_bits;
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	bool pmem_mode:1;
15962306a36Sopenharmony_ci	bool writeback_fua:1;
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	bool overwrote_committed:1;
16262306a36Sopenharmony_ci	bool memory_vmapped:1;
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	bool start_sector_set:1;
16562306a36Sopenharmony_ci	bool high_wm_percent_set:1;
16662306a36Sopenharmony_ci	bool low_wm_percent_set:1;
16762306a36Sopenharmony_ci	bool max_writeback_jobs_set:1;
16862306a36Sopenharmony_ci	bool autocommit_blocks_set:1;
16962306a36Sopenharmony_ci	bool autocommit_time_set:1;
17062306a36Sopenharmony_ci	bool max_age_set:1;
17162306a36Sopenharmony_ci	bool writeback_fua_set:1;
17262306a36Sopenharmony_ci	bool flush_on_suspend:1;
17362306a36Sopenharmony_ci	bool cleaner:1;
17462306a36Sopenharmony_ci	bool cleaner_set:1;
17562306a36Sopenharmony_ci	bool metadata_only:1;
17662306a36Sopenharmony_ci	bool pause_set:1;
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	unsigned int high_wm_percent_value;
17962306a36Sopenharmony_ci	unsigned int low_wm_percent_value;
18062306a36Sopenharmony_ci	unsigned int autocommit_time_value;
18162306a36Sopenharmony_ci	unsigned int max_age_value;
18262306a36Sopenharmony_ci	unsigned int pause_value;
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	unsigned int writeback_all;
18562306a36Sopenharmony_ci	struct workqueue_struct *writeback_wq;
18662306a36Sopenharmony_ci	struct work_struct writeback_work;
18762306a36Sopenharmony_ci	struct work_struct flush_work;
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	struct dm_io_tracker iot;
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	struct dm_io_client *dm_io;
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	raw_spinlock_t endio_list_lock;
19462306a36Sopenharmony_ci	struct list_head endio_list;
19562306a36Sopenharmony_ci	struct task_struct *endio_thread;
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	struct task_struct *flush_thread;
19862306a36Sopenharmony_ci	struct bio_list flush_list;
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	struct dm_kcopyd_client *dm_kcopyd;
20162306a36Sopenharmony_ci	unsigned long *dirty_bitmap;
20262306a36Sopenharmony_ci	unsigned int dirty_bitmap_size;
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	struct bio_set bio_set;
20562306a36Sopenharmony_ci	mempool_t copy_pool;
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	struct {
20862306a36Sopenharmony_ci		unsigned long long reads;
20962306a36Sopenharmony_ci		unsigned long long read_hits;
21062306a36Sopenharmony_ci		unsigned long long writes;
21162306a36Sopenharmony_ci		unsigned long long write_hits_uncommitted;
21262306a36Sopenharmony_ci		unsigned long long write_hits_committed;
21362306a36Sopenharmony_ci		unsigned long long writes_around;
21462306a36Sopenharmony_ci		unsigned long long writes_allocate;
21562306a36Sopenharmony_ci		unsigned long long writes_blocked_on_freelist;
21662306a36Sopenharmony_ci		unsigned long long flushes;
21762306a36Sopenharmony_ci		unsigned long long discards;
21862306a36Sopenharmony_ci	} stats;
21962306a36Sopenharmony_ci};
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci#define WB_LIST_INLINE		16
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_cistruct writeback_struct {
22462306a36Sopenharmony_ci	struct list_head endio_entry;
22562306a36Sopenharmony_ci	struct dm_writecache *wc;
22662306a36Sopenharmony_ci	struct wc_entry **wc_list;
22762306a36Sopenharmony_ci	unsigned int wc_list_n;
22862306a36Sopenharmony_ci	struct wc_entry *wc_list_inline[WB_LIST_INLINE];
22962306a36Sopenharmony_ci	struct bio bio;
23062306a36Sopenharmony_ci};
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_cistruct copy_struct {
23362306a36Sopenharmony_ci	struct list_head endio_entry;
23462306a36Sopenharmony_ci	struct dm_writecache *wc;
23562306a36Sopenharmony_ci	struct wc_entry *e;
23662306a36Sopenharmony_ci	unsigned int n_entries;
23762306a36Sopenharmony_ci	int error;
23862306a36Sopenharmony_ci};
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ciDECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(dm_writecache_throttle,
24162306a36Sopenharmony_ci					    "A percentage of time allocated for data copying");
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_cistatic void wc_lock(struct dm_writecache *wc)
24462306a36Sopenharmony_ci{
24562306a36Sopenharmony_ci	mutex_lock(&wc->lock);
24662306a36Sopenharmony_ci}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_cistatic void wc_unlock(struct dm_writecache *wc)
24962306a36Sopenharmony_ci{
25062306a36Sopenharmony_ci	mutex_unlock(&wc->lock);
25162306a36Sopenharmony_ci}
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HAS_PMEM
25462306a36Sopenharmony_cistatic int persistent_memory_claim(struct dm_writecache *wc)
25562306a36Sopenharmony_ci{
25662306a36Sopenharmony_ci	int r;
25762306a36Sopenharmony_ci	loff_t s;
25862306a36Sopenharmony_ci	long p, da;
25962306a36Sopenharmony_ci	pfn_t pfn;
26062306a36Sopenharmony_ci	int id;
26162306a36Sopenharmony_ci	struct page **pages;
26262306a36Sopenharmony_ci	sector_t offset;
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	wc->memory_vmapped = false;
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	s = wc->memory_map_size;
26762306a36Sopenharmony_ci	p = s >> PAGE_SHIFT;
26862306a36Sopenharmony_ci	if (!p) {
26962306a36Sopenharmony_ci		r = -EINVAL;
27062306a36Sopenharmony_ci		goto err1;
27162306a36Sopenharmony_ci	}
27262306a36Sopenharmony_ci	if (p != s >> PAGE_SHIFT) {
27362306a36Sopenharmony_ci		r = -EOVERFLOW;
27462306a36Sopenharmony_ci		goto err1;
27562306a36Sopenharmony_ci	}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	offset = get_start_sect(wc->ssd_dev->bdev);
27862306a36Sopenharmony_ci	if (offset & (PAGE_SIZE / 512 - 1)) {
27962306a36Sopenharmony_ci		r = -EINVAL;
28062306a36Sopenharmony_ci		goto err1;
28162306a36Sopenharmony_ci	}
28262306a36Sopenharmony_ci	offset >>= PAGE_SHIFT - 9;
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	id = dax_read_lock();
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, DAX_ACCESS,
28762306a36Sopenharmony_ci			&wc->memory_map, &pfn);
28862306a36Sopenharmony_ci	if (da < 0) {
28962306a36Sopenharmony_ci		wc->memory_map = NULL;
29062306a36Sopenharmony_ci		r = da;
29162306a36Sopenharmony_ci		goto err2;
29262306a36Sopenharmony_ci	}
29362306a36Sopenharmony_ci	if (!pfn_t_has_page(pfn)) {
29462306a36Sopenharmony_ci		wc->memory_map = NULL;
29562306a36Sopenharmony_ci		r = -EOPNOTSUPP;
29662306a36Sopenharmony_ci		goto err2;
29762306a36Sopenharmony_ci	}
29862306a36Sopenharmony_ci	if (da != p) {
29962306a36Sopenharmony_ci		long i;
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci		wc->memory_map = NULL;
30262306a36Sopenharmony_ci		pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL);
30362306a36Sopenharmony_ci		if (!pages) {
30462306a36Sopenharmony_ci			r = -ENOMEM;
30562306a36Sopenharmony_ci			goto err2;
30662306a36Sopenharmony_ci		}
30762306a36Sopenharmony_ci		i = 0;
30862306a36Sopenharmony_ci		do {
30962306a36Sopenharmony_ci			long daa;
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci			daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i,
31262306a36Sopenharmony_ci					p - i, DAX_ACCESS, NULL, &pfn);
31362306a36Sopenharmony_ci			if (daa <= 0) {
31462306a36Sopenharmony_ci				r = daa ? daa : -EINVAL;
31562306a36Sopenharmony_ci				goto err3;
31662306a36Sopenharmony_ci			}
31762306a36Sopenharmony_ci			if (!pfn_t_has_page(pfn)) {
31862306a36Sopenharmony_ci				r = -EOPNOTSUPP;
31962306a36Sopenharmony_ci				goto err3;
32062306a36Sopenharmony_ci			}
32162306a36Sopenharmony_ci			while (daa-- && i < p) {
32262306a36Sopenharmony_ci				pages[i++] = pfn_t_to_page(pfn);
32362306a36Sopenharmony_ci				pfn.val++;
32462306a36Sopenharmony_ci				if (!(i & 15))
32562306a36Sopenharmony_ci					cond_resched();
32662306a36Sopenharmony_ci			}
32762306a36Sopenharmony_ci		} while (i < p);
32862306a36Sopenharmony_ci		wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL);
32962306a36Sopenharmony_ci		if (!wc->memory_map) {
33062306a36Sopenharmony_ci			r = -ENOMEM;
33162306a36Sopenharmony_ci			goto err3;
33262306a36Sopenharmony_ci		}
33362306a36Sopenharmony_ci		kvfree(pages);
33462306a36Sopenharmony_ci		wc->memory_vmapped = true;
33562306a36Sopenharmony_ci	}
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	dax_read_unlock(id);
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci	wc->memory_map += (size_t)wc->start_sector << SECTOR_SHIFT;
34062306a36Sopenharmony_ci	wc->memory_map_size -= (size_t)wc->start_sector << SECTOR_SHIFT;
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci	return 0;
34362306a36Sopenharmony_cierr3:
34462306a36Sopenharmony_ci	kvfree(pages);
34562306a36Sopenharmony_cierr2:
34662306a36Sopenharmony_ci	dax_read_unlock(id);
34762306a36Sopenharmony_cierr1:
34862306a36Sopenharmony_ci	return r;
34962306a36Sopenharmony_ci}
35062306a36Sopenharmony_ci#else
35162306a36Sopenharmony_cistatic int persistent_memory_claim(struct dm_writecache *wc)
35262306a36Sopenharmony_ci{
35362306a36Sopenharmony_ci	return -EOPNOTSUPP;
35462306a36Sopenharmony_ci}
35562306a36Sopenharmony_ci#endif
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_cistatic void persistent_memory_release(struct dm_writecache *wc)
35862306a36Sopenharmony_ci{
35962306a36Sopenharmony_ci	if (wc->memory_vmapped)
36062306a36Sopenharmony_ci		vunmap(wc->memory_map - ((size_t)wc->start_sector << SECTOR_SHIFT));
36162306a36Sopenharmony_ci}
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_cistatic struct page *persistent_memory_page(void *addr)
36462306a36Sopenharmony_ci{
36562306a36Sopenharmony_ci	if (is_vmalloc_addr(addr))
36662306a36Sopenharmony_ci		return vmalloc_to_page(addr);
36762306a36Sopenharmony_ci	else
36862306a36Sopenharmony_ci		return virt_to_page(addr);
36962306a36Sopenharmony_ci}
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_cistatic unsigned int persistent_memory_page_offset(void *addr)
37262306a36Sopenharmony_ci{
37362306a36Sopenharmony_ci	return (unsigned long)addr & (PAGE_SIZE - 1);
37462306a36Sopenharmony_ci}
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_cistatic void persistent_memory_flush_cache(void *ptr, size_t size)
37762306a36Sopenharmony_ci{
37862306a36Sopenharmony_ci	if (is_vmalloc_addr(ptr))
37962306a36Sopenharmony_ci		flush_kernel_vmap_range(ptr, size);
38062306a36Sopenharmony_ci}
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_cistatic void persistent_memory_invalidate_cache(void *ptr, size_t size)
38362306a36Sopenharmony_ci{
38462306a36Sopenharmony_ci	if (is_vmalloc_addr(ptr))
38562306a36Sopenharmony_ci		invalidate_kernel_vmap_range(ptr, size);
38662306a36Sopenharmony_ci}
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_cistatic struct wc_memory_superblock *sb(struct dm_writecache *wc)
38962306a36Sopenharmony_ci{
39062306a36Sopenharmony_ci	return wc->memory_map;
39162306a36Sopenharmony_ci}
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_cistatic struct wc_memory_entry *memory_entry(struct dm_writecache *wc, struct wc_entry *e)
39462306a36Sopenharmony_ci{
39562306a36Sopenharmony_ci	return &sb(wc)->entries[e->index];
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_cistatic void *memory_data(struct dm_writecache *wc, struct wc_entry *e)
39962306a36Sopenharmony_ci{
40062306a36Sopenharmony_ci	return (char *)wc->block_start + (e->index << wc->block_size_bits);
40162306a36Sopenharmony_ci}
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_cistatic sector_t cache_sector(struct dm_writecache *wc, struct wc_entry *e)
40462306a36Sopenharmony_ci{
40562306a36Sopenharmony_ci	return wc->start_sector + wc->metadata_sectors +
40662306a36Sopenharmony_ci		((sector_t)e->index << (wc->block_size_bits - SECTOR_SHIFT));
40762306a36Sopenharmony_ci}
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_cistatic uint64_t read_original_sector(struct dm_writecache *wc, struct wc_entry *e)
41062306a36Sopenharmony_ci{
41162306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
41262306a36Sopenharmony_ci	return e->original_sector;
41362306a36Sopenharmony_ci#else
41462306a36Sopenharmony_ci	return le64_to_cpu(memory_entry(wc, e)->original_sector);
41562306a36Sopenharmony_ci#endif
41662306a36Sopenharmony_ci}
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_cistatic uint64_t read_seq_count(struct dm_writecache *wc, struct wc_entry *e)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
42162306a36Sopenharmony_ci	return e->seq_count;
42262306a36Sopenharmony_ci#else
42362306a36Sopenharmony_ci	return le64_to_cpu(memory_entry(wc, e)->seq_count);
42462306a36Sopenharmony_ci#endif
42562306a36Sopenharmony_ci}
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_cistatic void clear_seq_count(struct dm_writecache *wc, struct wc_entry *e)
42862306a36Sopenharmony_ci{
42962306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
43062306a36Sopenharmony_ci	e->seq_count = -1;
43162306a36Sopenharmony_ci#endif
43262306a36Sopenharmony_ci	pmem_assign(memory_entry(wc, e)->seq_count, cpu_to_le64(-1));
43362306a36Sopenharmony_ci}
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_cistatic void write_original_sector_seq_count(struct dm_writecache *wc, struct wc_entry *e,
43662306a36Sopenharmony_ci					    uint64_t original_sector, uint64_t seq_count)
43762306a36Sopenharmony_ci{
43862306a36Sopenharmony_ci	struct wc_memory_entry me;
43962306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
44062306a36Sopenharmony_ci	e->original_sector = original_sector;
44162306a36Sopenharmony_ci	e->seq_count = seq_count;
44262306a36Sopenharmony_ci#endif
44362306a36Sopenharmony_ci	me.original_sector = cpu_to_le64(original_sector);
44462306a36Sopenharmony_ci	me.seq_count = cpu_to_le64(seq_count);
44562306a36Sopenharmony_ci	pmem_assign(*memory_entry(wc, e), me);
44662306a36Sopenharmony_ci}
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci#define writecache_error(wc, err, msg, arg...)				\
44962306a36Sopenharmony_cido {									\
45062306a36Sopenharmony_ci	if (!cmpxchg(&(wc)->error, 0, err))				\
45162306a36Sopenharmony_ci		DMERR(msg, ##arg);					\
45262306a36Sopenharmony_ci	wake_up(&(wc)->freelist_wait);					\
45362306a36Sopenharmony_ci} while (0)
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci#define writecache_has_error(wc)	(unlikely(READ_ONCE((wc)->error)))
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_cistatic void writecache_flush_all_metadata(struct dm_writecache *wc)
45862306a36Sopenharmony_ci{
45962306a36Sopenharmony_ci	if (!WC_MODE_PMEM(wc))
46062306a36Sopenharmony_ci		memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size);
46162306a36Sopenharmony_ci}
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_cistatic void writecache_flush_region(struct dm_writecache *wc, void *ptr, size_t size)
46462306a36Sopenharmony_ci{
46562306a36Sopenharmony_ci	if (!WC_MODE_PMEM(wc))
46662306a36Sopenharmony_ci		__set_bit(((char *)ptr - (char *)wc->memory_map) / BITMAP_GRANULARITY,
46762306a36Sopenharmony_ci			  wc->dirty_bitmap);
46862306a36Sopenharmony_ci}
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_cistatic void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev);
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_cistruct io_notify {
47362306a36Sopenharmony_ci	struct dm_writecache *wc;
47462306a36Sopenharmony_ci	struct completion c;
47562306a36Sopenharmony_ci	atomic_t count;
47662306a36Sopenharmony_ci};
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_cistatic void writecache_notify_io(unsigned long error, void *context)
47962306a36Sopenharmony_ci{
48062306a36Sopenharmony_ci	struct io_notify *endio = context;
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci	if (unlikely(error != 0))
48362306a36Sopenharmony_ci		writecache_error(endio->wc, -EIO, "error writing metadata");
48462306a36Sopenharmony_ci	BUG_ON(atomic_read(&endio->count) <= 0);
48562306a36Sopenharmony_ci	if (atomic_dec_and_test(&endio->count))
48662306a36Sopenharmony_ci		complete(&endio->c);
48762306a36Sopenharmony_ci}
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_cistatic void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
49062306a36Sopenharmony_ci{
49162306a36Sopenharmony_ci	wait_event(wc->bio_in_progress_wait[direction],
49262306a36Sopenharmony_ci		   !atomic_read(&wc->bio_in_progress[direction]));
49362306a36Sopenharmony_ci}
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_cistatic void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
49662306a36Sopenharmony_ci{
49762306a36Sopenharmony_ci	struct dm_io_region region;
49862306a36Sopenharmony_ci	struct dm_io_request req;
49962306a36Sopenharmony_ci	struct io_notify endio = {
50062306a36Sopenharmony_ci		wc,
50162306a36Sopenharmony_ci		COMPLETION_INITIALIZER_ONSTACK(endio.c),
50262306a36Sopenharmony_ci		ATOMIC_INIT(1),
50362306a36Sopenharmony_ci	};
50462306a36Sopenharmony_ci	unsigned int bitmap_bits = wc->dirty_bitmap_size * 8;
50562306a36Sopenharmony_ci	unsigned int i = 0;
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	while (1) {
50862306a36Sopenharmony_ci		unsigned int j;
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci		i = find_next_bit(wc->dirty_bitmap, bitmap_bits, i);
51162306a36Sopenharmony_ci		if (unlikely(i == bitmap_bits))
51262306a36Sopenharmony_ci			break;
51362306a36Sopenharmony_ci		j = find_next_zero_bit(wc->dirty_bitmap, bitmap_bits, i);
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci		region.bdev = wc->ssd_dev->bdev;
51662306a36Sopenharmony_ci		region.sector = (sector_t)i * (BITMAP_GRANULARITY >> SECTOR_SHIFT);
51762306a36Sopenharmony_ci		region.count = (sector_t)(j - i) * (BITMAP_GRANULARITY >> SECTOR_SHIFT);
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci		if (unlikely(region.sector >= wc->metadata_sectors))
52062306a36Sopenharmony_ci			break;
52162306a36Sopenharmony_ci		if (unlikely(region.sector + region.count > wc->metadata_sectors))
52262306a36Sopenharmony_ci			region.count = wc->metadata_sectors - region.sector;
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci		region.sector += wc->start_sector;
52562306a36Sopenharmony_ci		atomic_inc(&endio.count);
52662306a36Sopenharmony_ci		req.bi_opf = REQ_OP_WRITE | REQ_SYNC;
52762306a36Sopenharmony_ci		req.mem.type = DM_IO_VMA;
52862306a36Sopenharmony_ci		req.mem.ptr.vma = (char *)wc->memory_map + (size_t)i * BITMAP_GRANULARITY;
52962306a36Sopenharmony_ci		req.client = wc->dm_io;
53062306a36Sopenharmony_ci		req.notify.fn = writecache_notify_io;
53162306a36Sopenharmony_ci		req.notify.context = &endio;
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci		/* writing via async dm-io (implied by notify.fn above) won't return an error */
53462306a36Sopenharmony_ci		(void) dm_io(&req, 1, &region, NULL, IOPRIO_DEFAULT);
53562306a36Sopenharmony_ci		i = j;
53662306a36Sopenharmony_ci	}
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	writecache_notify_io(0, &endio);
53962306a36Sopenharmony_ci	wait_for_completion_io(&endio.c);
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci	if (wait_for_ios)
54262306a36Sopenharmony_ci		writecache_wait_for_ios(wc, WRITE);
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci	writecache_disk_flush(wc, wc->ssd_dev);
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci	memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size);
54762306a36Sopenharmony_ci}
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_cistatic void ssd_commit_superblock(struct dm_writecache *wc)
55062306a36Sopenharmony_ci{
55162306a36Sopenharmony_ci	int r;
55262306a36Sopenharmony_ci	struct dm_io_region region;
55362306a36Sopenharmony_ci	struct dm_io_request req;
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci	region.bdev = wc->ssd_dev->bdev;
55662306a36Sopenharmony_ci	region.sector = 0;
55762306a36Sopenharmony_ci	region.count = max(4096U, wc->block_size) >> SECTOR_SHIFT;
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci	if (unlikely(region.sector + region.count > wc->metadata_sectors))
56062306a36Sopenharmony_ci		region.count = wc->metadata_sectors - region.sector;
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci	region.sector += wc->start_sector;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	req.bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_FUA;
56562306a36Sopenharmony_ci	req.mem.type = DM_IO_VMA;
56662306a36Sopenharmony_ci	req.mem.ptr.vma = (char *)wc->memory_map;
56762306a36Sopenharmony_ci	req.client = wc->dm_io;
56862306a36Sopenharmony_ci	req.notify.fn = NULL;
56962306a36Sopenharmony_ci	req.notify.context = NULL;
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci	r = dm_io(&req, 1, &region, NULL, IOPRIO_DEFAULT);
57262306a36Sopenharmony_ci	if (unlikely(r))
57362306a36Sopenharmony_ci		writecache_error(wc, r, "error writing superblock");
57462306a36Sopenharmony_ci}
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_cistatic void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
57762306a36Sopenharmony_ci{
57862306a36Sopenharmony_ci	if (WC_MODE_PMEM(wc))
57962306a36Sopenharmony_ci		pmem_wmb();
58062306a36Sopenharmony_ci	else
58162306a36Sopenharmony_ci		ssd_commit_flushed(wc, wait_for_ios);
58262306a36Sopenharmony_ci}
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_cistatic void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
58562306a36Sopenharmony_ci{
58662306a36Sopenharmony_ci	int r;
58762306a36Sopenharmony_ci	struct dm_io_region region;
58862306a36Sopenharmony_ci	struct dm_io_request req;
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci	region.bdev = dev->bdev;
59162306a36Sopenharmony_ci	region.sector = 0;
59262306a36Sopenharmony_ci	region.count = 0;
59362306a36Sopenharmony_ci	req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
59462306a36Sopenharmony_ci	req.mem.type = DM_IO_KMEM;
59562306a36Sopenharmony_ci	req.mem.ptr.addr = NULL;
59662306a36Sopenharmony_ci	req.client = wc->dm_io;
59762306a36Sopenharmony_ci	req.notify.fn = NULL;
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci	r = dm_io(&req, 1, &region, NULL, IOPRIO_DEFAULT);
60062306a36Sopenharmony_ci	if (unlikely(r))
60162306a36Sopenharmony_ci		writecache_error(wc, r, "error flushing metadata: %d", r);
60262306a36Sopenharmony_ci}
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci#define WFE_RETURN_FOLLOWING	1
60562306a36Sopenharmony_ci#define WFE_LOWEST_SEQ		2
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_cistatic struct wc_entry *writecache_find_entry(struct dm_writecache *wc,
60862306a36Sopenharmony_ci					      uint64_t block, int flags)
60962306a36Sopenharmony_ci{
61062306a36Sopenharmony_ci	struct wc_entry *e;
61162306a36Sopenharmony_ci	struct rb_node *node = wc->tree.rb_node;
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci	if (unlikely(!node))
61462306a36Sopenharmony_ci		return NULL;
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_ci	while (1) {
61762306a36Sopenharmony_ci		e = container_of(node, struct wc_entry, rb_node);
61862306a36Sopenharmony_ci		if (read_original_sector(wc, e) == block)
61962306a36Sopenharmony_ci			break;
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci		node = (read_original_sector(wc, e) >= block ?
62262306a36Sopenharmony_ci			e->rb_node.rb_left : e->rb_node.rb_right);
62362306a36Sopenharmony_ci		if (unlikely(!node)) {
62462306a36Sopenharmony_ci			if (!(flags & WFE_RETURN_FOLLOWING))
62562306a36Sopenharmony_ci				return NULL;
62662306a36Sopenharmony_ci			if (read_original_sector(wc, e) >= block)
62762306a36Sopenharmony_ci				return e;
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci			node = rb_next(&e->rb_node);
63062306a36Sopenharmony_ci			if (unlikely(!node))
63162306a36Sopenharmony_ci				return NULL;
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci			e = container_of(node, struct wc_entry, rb_node);
63462306a36Sopenharmony_ci			return e;
63562306a36Sopenharmony_ci		}
63662306a36Sopenharmony_ci	}
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_ci	while (1) {
63962306a36Sopenharmony_ci		struct wc_entry *e2;
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci		if (flags & WFE_LOWEST_SEQ)
64262306a36Sopenharmony_ci			node = rb_prev(&e->rb_node);
64362306a36Sopenharmony_ci		else
64462306a36Sopenharmony_ci			node = rb_next(&e->rb_node);
64562306a36Sopenharmony_ci		if (unlikely(!node))
64662306a36Sopenharmony_ci			return e;
64762306a36Sopenharmony_ci		e2 = container_of(node, struct wc_entry, rb_node);
64862306a36Sopenharmony_ci		if (read_original_sector(wc, e2) != block)
64962306a36Sopenharmony_ci			return e;
65062306a36Sopenharmony_ci		e = e2;
65162306a36Sopenharmony_ci	}
65262306a36Sopenharmony_ci}
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_cistatic void writecache_insert_entry(struct dm_writecache *wc, struct wc_entry *ins)
65562306a36Sopenharmony_ci{
65662306a36Sopenharmony_ci	struct wc_entry *e;
65762306a36Sopenharmony_ci	struct rb_node **node = &wc->tree.rb_node, *parent = NULL;
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci	while (*node) {
66062306a36Sopenharmony_ci		e = container_of(*node, struct wc_entry, rb_node);
66162306a36Sopenharmony_ci		parent = &e->rb_node;
66262306a36Sopenharmony_ci		if (read_original_sector(wc, e) > read_original_sector(wc, ins))
66362306a36Sopenharmony_ci			node = &parent->rb_left;
66462306a36Sopenharmony_ci		else
66562306a36Sopenharmony_ci			node = &parent->rb_right;
66662306a36Sopenharmony_ci	}
66762306a36Sopenharmony_ci	rb_link_node(&ins->rb_node, parent, node);
66862306a36Sopenharmony_ci	rb_insert_color(&ins->rb_node, &wc->tree);
66962306a36Sopenharmony_ci	list_add(&ins->lru, &wc->lru);
67062306a36Sopenharmony_ci	ins->age = jiffies;
67162306a36Sopenharmony_ci}
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_cistatic void writecache_unlink(struct dm_writecache *wc, struct wc_entry *e)
67462306a36Sopenharmony_ci{
67562306a36Sopenharmony_ci	list_del(&e->lru);
67662306a36Sopenharmony_ci	rb_erase(&e->rb_node, &wc->tree);
67762306a36Sopenharmony_ci}
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_cistatic void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry *e)
68062306a36Sopenharmony_ci{
68162306a36Sopenharmony_ci	if (WC_MODE_SORT_FREELIST(wc)) {
68262306a36Sopenharmony_ci		struct rb_node **node = &wc->freetree.rb_node, *parent = NULL;
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci		if (unlikely(!*node))
68562306a36Sopenharmony_ci			wc->current_free = e;
68662306a36Sopenharmony_ci		while (*node) {
68762306a36Sopenharmony_ci			parent = *node;
68862306a36Sopenharmony_ci			if (&e->rb_node < *node)
68962306a36Sopenharmony_ci				node = &parent->rb_left;
69062306a36Sopenharmony_ci			else
69162306a36Sopenharmony_ci				node = &parent->rb_right;
69262306a36Sopenharmony_ci		}
69362306a36Sopenharmony_ci		rb_link_node(&e->rb_node, parent, node);
69462306a36Sopenharmony_ci		rb_insert_color(&e->rb_node, &wc->freetree);
69562306a36Sopenharmony_ci	} else {
69662306a36Sopenharmony_ci		list_add_tail(&e->lru, &wc->freelist);
69762306a36Sopenharmony_ci	}
69862306a36Sopenharmony_ci	wc->freelist_size++;
69962306a36Sopenharmony_ci}
70062306a36Sopenharmony_ci
70162306a36Sopenharmony_cistatic inline void writecache_verify_watermark(struct dm_writecache *wc)
70262306a36Sopenharmony_ci{
70362306a36Sopenharmony_ci	if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark))
70462306a36Sopenharmony_ci		queue_work(wc->writeback_wq, &wc->writeback_work);
70562306a36Sopenharmony_ci}
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_cistatic void writecache_max_age_timer(struct timer_list *t)
70862306a36Sopenharmony_ci{
70962306a36Sopenharmony_ci	struct dm_writecache *wc = from_timer(wc, t, max_age_timer);
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci	if (!dm_suspended(wc->ti) && !writecache_has_error(wc)) {
71262306a36Sopenharmony_ci		queue_work(wc->writeback_wq, &wc->writeback_work);
71362306a36Sopenharmony_ci		mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV);
71462306a36Sopenharmony_ci	}
71562306a36Sopenharmony_ci}
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_cistatic struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector)
71862306a36Sopenharmony_ci{
71962306a36Sopenharmony_ci	struct wc_entry *e;
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	if (WC_MODE_SORT_FREELIST(wc)) {
72262306a36Sopenharmony_ci		struct rb_node *next;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci		if (unlikely(!wc->current_free))
72562306a36Sopenharmony_ci			return NULL;
72662306a36Sopenharmony_ci		e = wc->current_free;
72762306a36Sopenharmony_ci		if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector))
72862306a36Sopenharmony_ci			return NULL;
72962306a36Sopenharmony_ci		next = rb_next(&e->rb_node);
73062306a36Sopenharmony_ci		rb_erase(&e->rb_node, &wc->freetree);
73162306a36Sopenharmony_ci		if (unlikely(!next))
73262306a36Sopenharmony_ci			next = rb_first(&wc->freetree);
73362306a36Sopenharmony_ci		wc->current_free = next ? container_of(next, struct wc_entry, rb_node) : NULL;
73462306a36Sopenharmony_ci	} else {
73562306a36Sopenharmony_ci		if (unlikely(list_empty(&wc->freelist)))
73662306a36Sopenharmony_ci			return NULL;
73762306a36Sopenharmony_ci		e = container_of(wc->freelist.next, struct wc_entry, lru);
73862306a36Sopenharmony_ci		if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector))
73962306a36Sopenharmony_ci			return NULL;
74062306a36Sopenharmony_ci		list_del(&e->lru);
74162306a36Sopenharmony_ci	}
74262306a36Sopenharmony_ci	wc->freelist_size--;
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ci	writecache_verify_watermark(wc);
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci	return e;
74762306a36Sopenharmony_ci}
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_cistatic void writecache_free_entry(struct dm_writecache *wc, struct wc_entry *e)
75062306a36Sopenharmony_ci{
75162306a36Sopenharmony_ci	writecache_unlink(wc, e);
75262306a36Sopenharmony_ci	writecache_add_to_freelist(wc, e);
75362306a36Sopenharmony_ci	clear_seq_count(wc, e);
75462306a36Sopenharmony_ci	writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry));
75562306a36Sopenharmony_ci	if (unlikely(waitqueue_active(&wc->freelist_wait)))
75662306a36Sopenharmony_ci		wake_up(&wc->freelist_wait);
75762306a36Sopenharmony_ci}
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_cistatic void writecache_wait_on_freelist(struct dm_writecache *wc)
76062306a36Sopenharmony_ci{
76162306a36Sopenharmony_ci	DEFINE_WAIT(wait);
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	prepare_to_wait(&wc->freelist_wait, &wait, TASK_UNINTERRUPTIBLE);
76462306a36Sopenharmony_ci	wc_unlock(wc);
76562306a36Sopenharmony_ci	io_schedule();
76662306a36Sopenharmony_ci	finish_wait(&wc->freelist_wait, &wait);
76762306a36Sopenharmony_ci	wc_lock(wc);
76862306a36Sopenharmony_ci}
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_cistatic void writecache_poison_lists(struct dm_writecache *wc)
77162306a36Sopenharmony_ci{
77262306a36Sopenharmony_ci	/*
77362306a36Sopenharmony_ci	 * Catch incorrect access to these values while the device is suspended.
77462306a36Sopenharmony_ci	 */
77562306a36Sopenharmony_ci	memset(&wc->tree, -1, sizeof(wc->tree));
77662306a36Sopenharmony_ci	wc->lru.next = LIST_POISON1;
77762306a36Sopenharmony_ci	wc->lru.prev = LIST_POISON2;
77862306a36Sopenharmony_ci	wc->freelist.next = LIST_POISON1;
77962306a36Sopenharmony_ci	wc->freelist.prev = LIST_POISON2;
78062306a36Sopenharmony_ci}
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_cistatic void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry *e)
78362306a36Sopenharmony_ci{
78462306a36Sopenharmony_ci	writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry));
78562306a36Sopenharmony_ci	if (WC_MODE_PMEM(wc))
78662306a36Sopenharmony_ci		writecache_flush_region(wc, memory_data(wc, e), wc->block_size);
78762306a36Sopenharmony_ci}
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_cistatic bool writecache_entry_is_committed(struct dm_writecache *wc, struct wc_entry *e)
79062306a36Sopenharmony_ci{
79162306a36Sopenharmony_ci	return read_seq_count(wc, e) < wc->seq_count;
79262306a36Sopenharmony_ci}
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_cistatic void writecache_flush(struct dm_writecache *wc)
79562306a36Sopenharmony_ci{
79662306a36Sopenharmony_ci	struct wc_entry *e, *e2;
79762306a36Sopenharmony_ci	bool need_flush_after_free;
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci	wc->uncommitted_blocks = 0;
80062306a36Sopenharmony_ci	del_timer(&wc->autocommit_timer);
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_ci	if (list_empty(&wc->lru))
80362306a36Sopenharmony_ci		return;
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_ci	e = container_of(wc->lru.next, struct wc_entry, lru);
80662306a36Sopenharmony_ci	if (writecache_entry_is_committed(wc, e)) {
80762306a36Sopenharmony_ci		if (wc->overwrote_committed) {
80862306a36Sopenharmony_ci			writecache_wait_for_ios(wc, WRITE);
80962306a36Sopenharmony_ci			writecache_disk_flush(wc, wc->ssd_dev);
81062306a36Sopenharmony_ci			wc->overwrote_committed = false;
81162306a36Sopenharmony_ci		}
81262306a36Sopenharmony_ci		return;
81362306a36Sopenharmony_ci	}
81462306a36Sopenharmony_ci	while (1) {
81562306a36Sopenharmony_ci		writecache_flush_entry(wc, e);
81662306a36Sopenharmony_ci		if (unlikely(e->lru.next == &wc->lru))
81762306a36Sopenharmony_ci			break;
81862306a36Sopenharmony_ci		e2 = container_of(e->lru.next, struct wc_entry, lru);
81962306a36Sopenharmony_ci		if (writecache_entry_is_committed(wc, e2))
82062306a36Sopenharmony_ci			break;
82162306a36Sopenharmony_ci		e = e2;
82262306a36Sopenharmony_ci		cond_resched();
82362306a36Sopenharmony_ci	}
82462306a36Sopenharmony_ci	writecache_commit_flushed(wc, true);
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	wc->seq_count++;
82762306a36Sopenharmony_ci	pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
82862306a36Sopenharmony_ci	if (WC_MODE_PMEM(wc))
82962306a36Sopenharmony_ci		writecache_commit_flushed(wc, false);
83062306a36Sopenharmony_ci	else
83162306a36Sopenharmony_ci		ssd_commit_superblock(wc);
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_ci	wc->overwrote_committed = false;
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	need_flush_after_free = false;
83662306a36Sopenharmony_ci	while (1) {
83762306a36Sopenharmony_ci		/* Free another committed entry with lower seq-count */
83862306a36Sopenharmony_ci		struct rb_node *rb_node = rb_prev(&e->rb_node);
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci		if (rb_node) {
84162306a36Sopenharmony_ci			e2 = container_of(rb_node, struct wc_entry, rb_node);
84262306a36Sopenharmony_ci			if (read_original_sector(wc, e2) == read_original_sector(wc, e) &&
84362306a36Sopenharmony_ci			    likely(!e2->write_in_progress)) {
84462306a36Sopenharmony_ci				writecache_free_entry(wc, e2);
84562306a36Sopenharmony_ci				need_flush_after_free = true;
84662306a36Sopenharmony_ci			}
84762306a36Sopenharmony_ci		}
84862306a36Sopenharmony_ci		if (unlikely(e->lru.prev == &wc->lru))
84962306a36Sopenharmony_ci			break;
85062306a36Sopenharmony_ci		e = container_of(e->lru.prev, struct wc_entry, lru);
85162306a36Sopenharmony_ci		cond_resched();
85262306a36Sopenharmony_ci	}
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_ci	if (need_flush_after_free)
85562306a36Sopenharmony_ci		writecache_commit_flushed(wc, false);
85662306a36Sopenharmony_ci}
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_cistatic void writecache_flush_work(struct work_struct *work)
85962306a36Sopenharmony_ci{
86062306a36Sopenharmony_ci	struct dm_writecache *wc = container_of(work, struct dm_writecache, flush_work);
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci	wc_lock(wc);
86362306a36Sopenharmony_ci	writecache_flush(wc);
86462306a36Sopenharmony_ci	wc_unlock(wc);
86562306a36Sopenharmony_ci}
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_cistatic void writecache_autocommit_timer(struct timer_list *t)
86862306a36Sopenharmony_ci{
86962306a36Sopenharmony_ci	struct dm_writecache *wc = from_timer(wc, t, autocommit_timer);
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	if (!writecache_has_error(wc))
87262306a36Sopenharmony_ci		queue_work(wc->writeback_wq, &wc->flush_work);
87362306a36Sopenharmony_ci}
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_cistatic void writecache_schedule_autocommit(struct dm_writecache *wc)
87662306a36Sopenharmony_ci{
87762306a36Sopenharmony_ci	if (!timer_pending(&wc->autocommit_timer))
87862306a36Sopenharmony_ci		mod_timer(&wc->autocommit_timer, jiffies + wc->autocommit_jiffies);
87962306a36Sopenharmony_ci}
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_cistatic void writecache_discard(struct dm_writecache *wc, sector_t start, sector_t end)
88262306a36Sopenharmony_ci{
88362306a36Sopenharmony_ci	struct wc_entry *e;
88462306a36Sopenharmony_ci	bool discarded_something = false;
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci	e = writecache_find_entry(wc, start, WFE_RETURN_FOLLOWING | WFE_LOWEST_SEQ);
88762306a36Sopenharmony_ci	if (unlikely(!e))
88862306a36Sopenharmony_ci		return;
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	while (read_original_sector(wc, e) < end) {
89162306a36Sopenharmony_ci		struct rb_node *node = rb_next(&e->rb_node);
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci		if (likely(!e->write_in_progress)) {
89462306a36Sopenharmony_ci			if (!discarded_something) {
89562306a36Sopenharmony_ci				if (!WC_MODE_PMEM(wc)) {
89662306a36Sopenharmony_ci					writecache_wait_for_ios(wc, READ);
89762306a36Sopenharmony_ci					writecache_wait_for_ios(wc, WRITE);
89862306a36Sopenharmony_ci				}
89962306a36Sopenharmony_ci				discarded_something = true;
90062306a36Sopenharmony_ci			}
90162306a36Sopenharmony_ci			if (!writecache_entry_is_committed(wc, e))
90262306a36Sopenharmony_ci				wc->uncommitted_blocks--;
90362306a36Sopenharmony_ci			writecache_free_entry(wc, e);
90462306a36Sopenharmony_ci		}
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci		if (unlikely(!node))
90762306a36Sopenharmony_ci			break;
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci		e = container_of(node, struct wc_entry, rb_node);
91062306a36Sopenharmony_ci	}
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci	if (discarded_something)
91362306a36Sopenharmony_ci		writecache_commit_flushed(wc, false);
91462306a36Sopenharmony_ci}
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_cistatic bool writecache_wait_for_writeback(struct dm_writecache *wc)
91762306a36Sopenharmony_ci{
91862306a36Sopenharmony_ci	if (wc->writeback_size) {
91962306a36Sopenharmony_ci		writecache_wait_on_freelist(wc);
92062306a36Sopenharmony_ci		return true;
92162306a36Sopenharmony_ci	}
92262306a36Sopenharmony_ci	return false;
92362306a36Sopenharmony_ci}
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_cistatic void writecache_suspend(struct dm_target *ti)
92662306a36Sopenharmony_ci{
92762306a36Sopenharmony_ci	struct dm_writecache *wc = ti->private;
92862306a36Sopenharmony_ci	bool flush_on_suspend;
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci	del_timer_sync(&wc->autocommit_timer);
93162306a36Sopenharmony_ci	del_timer_sync(&wc->max_age_timer);
93262306a36Sopenharmony_ci
93362306a36Sopenharmony_ci	wc_lock(wc);
93462306a36Sopenharmony_ci	writecache_flush(wc);
93562306a36Sopenharmony_ci	flush_on_suspend = wc->flush_on_suspend;
93662306a36Sopenharmony_ci	if (flush_on_suspend) {
93762306a36Sopenharmony_ci		wc->flush_on_suspend = false;
93862306a36Sopenharmony_ci		wc->writeback_all++;
93962306a36Sopenharmony_ci		queue_work(wc->writeback_wq, &wc->writeback_work);
94062306a36Sopenharmony_ci	}
94162306a36Sopenharmony_ci	wc_unlock(wc);
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci	drain_workqueue(wc->writeback_wq);
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci	wc_lock(wc);
94662306a36Sopenharmony_ci	if (flush_on_suspend)
94762306a36Sopenharmony_ci		wc->writeback_all--;
94862306a36Sopenharmony_ci	while (writecache_wait_for_writeback(wc))
94962306a36Sopenharmony_ci		;
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_ci	if (WC_MODE_PMEM(wc))
95262306a36Sopenharmony_ci		persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size);
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ci	writecache_poison_lists(wc);
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci	wc_unlock(wc);
95762306a36Sopenharmony_ci}
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_cistatic int writecache_alloc_entries(struct dm_writecache *wc)
96062306a36Sopenharmony_ci{
96162306a36Sopenharmony_ci	size_t b;
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ci	if (wc->entries)
96462306a36Sopenharmony_ci		return 0;
96562306a36Sopenharmony_ci	wc->entries = vmalloc(array_size(sizeof(struct wc_entry), wc->n_blocks));
96662306a36Sopenharmony_ci	if (!wc->entries)
96762306a36Sopenharmony_ci		return -ENOMEM;
96862306a36Sopenharmony_ci	for (b = 0; b < wc->n_blocks; b++) {
96962306a36Sopenharmony_ci		struct wc_entry *e = &wc->entries[b];
97062306a36Sopenharmony_ci
97162306a36Sopenharmony_ci		e->index = b;
97262306a36Sopenharmony_ci		e->write_in_progress = false;
97362306a36Sopenharmony_ci		cond_resched();
97462306a36Sopenharmony_ci	}
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_ci	return 0;
97762306a36Sopenharmony_ci}
97862306a36Sopenharmony_ci
97962306a36Sopenharmony_cistatic int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors)
98062306a36Sopenharmony_ci{
98162306a36Sopenharmony_ci	struct dm_io_region region;
98262306a36Sopenharmony_ci	struct dm_io_request req;
98362306a36Sopenharmony_ci
98462306a36Sopenharmony_ci	region.bdev = wc->ssd_dev->bdev;
98562306a36Sopenharmony_ci	region.sector = wc->start_sector;
98662306a36Sopenharmony_ci	region.count = n_sectors;
98762306a36Sopenharmony_ci	req.bi_opf = REQ_OP_READ | REQ_SYNC;
98862306a36Sopenharmony_ci	req.mem.type = DM_IO_VMA;
98962306a36Sopenharmony_ci	req.mem.ptr.vma = (char *)wc->memory_map;
99062306a36Sopenharmony_ci	req.client = wc->dm_io;
99162306a36Sopenharmony_ci	req.notify.fn = NULL;
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_ci	return dm_io(&req, 1, &region, NULL, IOPRIO_DEFAULT);
99462306a36Sopenharmony_ci}
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_cistatic void writecache_resume(struct dm_target *ti)
99762306a36Sopenharmony_ci{
99862306a36Sopenharmony_ci	struct dm_writecache *wc = ti->private;
99962306a36Sopenharmony_ci	size_t b;
100062306a36Sopenharmony_ci	bool need_flush = false;
100162306a36Sopenharmony_ci	__le64 sb_seq_count;
100262306a36Sopenharmony_ci	int r;
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_ci	wc_lock(wc);
100562306a36Sopenharmony_ci
100662306a36Sopenharmony_ci	wc->data_device_sectors = bdev_nr_sectors(wc->dev->bdev);
100762306a36Sopenharmony_ci
100862306a36Sopenharmony_ci	if (WC_MODE_PMEM(wc)) {
100962306a36Sopenharmony_ci		persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size);
101062306a36Sopenharmony_ci	} else {
101162306a36Sopenharmony_ci		r = writecache_read_metadata(wc, wc->metadata_sectors);
101262306a36Sopenharmony_ci		if (r) {
101362306a36Sopenharmony_ci			size_t sb_entries_offset;
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ci			writecache_error(wc, r, "unable to read metadata: %d", r);
101662306a36Sopenharmony_ci			sb_entries_offset = offsetof(struct wc_memory_superblock, entries);
101762306a36Sopenharmony_ci			memset((char *)wc->memory_map + sb_entries_offset, -1,
101862306a36Sopenharmony_ci			       (wc->metadata_sectors << SECTOR_SHIFT) - sb_entries_offset);
101962306a36Sopenharmony_ci		}
102062306a36Sopenharmony_ci	}
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ci	wc->tree = RB_ROOT;
102362306a36Sopenharmony_ci	INIT_LIST_HEAD(&wc->lru);
102462306a36Sopenharmony_ci	if (WC_MODE_SORT_FREELIST(wc)) {
102562306a36Sopenharmony_ci		wc->freetree = RB_ROOT;
102662306a36Sopenharmony_ci		wc->current_free = NULL;
102762306a36Sopenharmony_ci	} else {
102862306a36Sopenharmony_ci		INIT_LIST_HEAD(&wc->freelist);
102962306a36Sopenharmony_ci	}
103062306a36Sopenharmony_ci	wc->freelist_size = 0;
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci	r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count,
103362306a36Sopenharmony_ci			      sizeof(uint64_t));
103462306a36Sopenharmony_ci	if (r) {
103562306a36Sopenharmony_ci		writecache_error(wc, r, "hardware memory error when reading superblock: %d", r);
103662306a36Sopenharmony_ci		sb_seq_count = cpu_to_le64(0);
103762306a36Sopenharmony_ci	}
103862306a36Sopenharmony_ci	wc->seq_count = le64_to_cpu(sb_seq_count);
103962306a36Sopenharmony_ci
104062306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
104162306a36Sopenharmony_ci	for (b = 0; b < wc->n_blocks; b++) {
104262306a36Sopenharmony_ci		struct wc_entry *e = &wc->entries[b];
104362306a36Sopenharmony_ci		struct wc_memory_entry wme;
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci		if (writecache_has_error(wc)) {
104662306a36Sopenharmony_ci			e->original_sector = -1;
104762306a36Sopenharmony_ci			e->seq_count = -1;
104862306a36Sopenharmony_ci			continue;
104962306a36Sopenharmony_ci		}
105062306a36Sopenharmony_ci		r = copy_mc_to_kernel(&wme, memory_entry(wc, e),
105162306a36Sopenharmony_ci				      sizeof(struct wc_memory_entry));
105262306a36Sopenharmony_ci		if (r) {
105362306a36Sopenharmony_ci			writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d",
105462306a36Sopenharmony_ci					 (unsigned long)b, r);
105562306a36Sopenharmony_ci			e->original_sector = -1;
105662306a36Sopenharmony_ci			e->seq_count = -1;
105762306a36Sopenharmony_ci		} else {
105862306a36Sopenharmony_ci			e->original_sector = le64_to_cpu(wme.original_sector);
105962306a36Sopenharmony_ci			e->seq_count = le64_to_cpu(wme.seq_count);
106062306a36Sopenharmony_ci		}
106162306a36Sopenharmony_ci		cond_resched();
106262306a36Sopenharmony_ci	}
106362306a36Sopenharmony_ci#endif
106462306a36Sopenharmony_ci	for (b = 0; b < wc->n_blocks; b++) {
106562306a36Sopenharmony_ci		struct wc_entry *e = &wc->entries[b];
106662306a36Sopenharmony_ci
106762306a36Sopenharmony_ci		if (!writecache_entry_is_committed(wc, e)) {
106862306a36Sopenharmony_ci			if (read_seq_count(wc, e) != -1) {
106962306a36Sopenharmony_cierase_this:
107062306a36Sopenharmony_ci				clear_seq_count(wc, e);
107162306a36Sopenharmony_ci				need_flush = true;
107262306a36Sopenharmony_ci			}
107362306a36Sopenharmony_ci			writecache_add_to_freelist(wc, e);
107462306a36Sopenharmony_ci		} else {
107562306a36Sopenharmony_ci			struct wc_entry *old;
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_ci			old = writecache_find_entry(wc, read_original_sector(wc, e), 0);
107862306a36Sopenharmony_ci			if (!old) {
107962306a36Sopenharmony_ci				writecache_insert_entry(wc, e);
108062306a36Sopenharmony_ci			} else {
108162306a36Sopenharmony_ci				if (read_seq_count(wc, old) == read_seq_count(wc, e)) {
108262306a36Sopenharmony_ci					writecache_error(wc, -EINVAL,
108362306a36Sopenharmony_ci						 "two identical entries, position %llu, sector %llu, sequence %llu",
108462306a36Sopenharmony_ci						 (unsigned long long)b, (unsigned long long)read_original_sector(wc, e),
108562306a36Sopenharmony_ci						 (unsigned long long)read_seq_count(wc, e));
108662306a36Sopenharmony_ci				}
108762306a36Sopenharmony_ci				if (read_seq_count(wc, old) > read_seq_count(wc, e)) {
108862306a36Sopenharmony_ci					goto erase_this;
108962306a36Sopenharmony_ci				} else {
109062306a36Sopenharmony_ci					writecache_free_entry(wc, old);
109162306a36Sopenharmony_ci					writecache_insert_entry(wc, e);
109262306a36Sopenharmony_ci					need_flush = true;
109362306a36Sopenharmony_ci				}
109462306a36Sopenharmony_ci			}
109562306a36Sopenharmony_ci		}
109662306a36Sopenharmony_ci		cond_resched();
109762306a36Sopenharmony_ci	}
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_ci	if (need_flush) {
110062306a36Sopenharmony_ci		writecache_flush_all_metadata(wc);
110162306a36Sopenharmony_ci		writecache_commit_flushed(wc, false);
110262306a36Sopenharmony_ci	}
110362306a36Sopenharmony_ci
110462306a36Sopenharmony_ci	writecache_verify_watermark(wc);
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci	if (wc->max_age != MAX_AGE_UNSPECIFIED)
110762306a36Sopenharmony_ci		mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV);
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci	wc_unlock(wc);
111062306a36Sopenharmony_ci}
111162306a36Sopenharmony_ci
111262306a36Sopenharmony_cistatic int process_flush_mesg(unsigned int argc, char **argv, struct dm_writecache *wc)
111362306a36Sopenharmony_ci{
111462306a36Sopenharmony_ci	if (argc != 1)
111562306a36Sopenharmony_ci		return -EINVAL;
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci	wc_lock(wc);
111862306a36Sopenharmony_ci	if (dm_suspended(wc->ti)) {
111962306a36Sopenharmony_ci		wc_unlock(wc);
112062306a36Sopenharmony_ci		return -EBUSY;
112162306a36Sopenharmony_ci	}
112262306a36Sopenharmony_ci	if (writecache_has_error(wc)) {
112362306a36Sopenharmony_ci		wc_unlock(wc);
112462306a36Sopenharmony_ci		return -EIO;
112562306a36Sopenharmony_ci	}
112662306a36Sopenharmony_ci
112762306a36Sopenharmony_ci	writecache_flush(wc);
112862306a36Sopenharmony_ci	wc->writeback_all++;
112962306a36Sopenharmony_ci	queue_work(wc->writeback_wq, &wc->writeback_work);
113062306a36Sopenharmony_ci	wc_unlock(wc);
113162306a36Sopenharmony_ci
113262306a36Sopenharmony_ci	flush_workqueue(wc->writeback_wq);
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci	wc_lock(wc);
113562306a36Sopenharmony_ci	wc->writeback_all--;
113662306a36Sopenharmony_ci	if (writecache_has_error(wc)) {
113762306a36Sopenharmony_ci		wc_unlock(wc);
113862306a36Sopenharmony_ci		return -EIO;
113962306a36Sopenharmony_ci	}
114062306a36Sopenharmony_ci	wc_unlock(wc);
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	return 0;
114362306a36Sopenharmony_ci}
114462306a36Sopenharmony_ci
114562306a36Sopenharmony_cistatic int process_flush_on_suspend_mesg(unsigned int argc, char **argv, struct dm_writecache *wc)
114662306a36Sopenharmony_ci{
114762306a36Sopenharmony_ci	if (argc != 1)
114862306a36Sopenharmony_ci		return -EINVAL;
114962306a36Sopenharmony_ci
115062306a36Sopenharmony_ci	wc_lock(wc);
115162306a36Sopenharmony_ci	wc->flush_on_suspend = true;
115262306a36Sopenharmony_ci	wc_unlock(wc);
115362306a36Sopenharmony_ci
115462306a36Sopenharmony_ci	return 0;
115562306a36Sopenharmony_ci}
115662306a36Sopenharmony_ci
115762306a36Sopenharmony_cistatic void activate_cleaner(struct dm_writecache *wc)
115862306a36Sopenharmony_ci{
115962306a36Sopenharmony_ci	wc->flush_on_suspend = true;
116062306a36Sopenharmony_ci	wc->cleaner = true;
116162306a36Sopenharmony_ci	wc->freelist_high_watermark = wc->n_blocks;
116262306a36Sopenharmony_ci	wc->freelist_low_watermark = wc->n_blocks;
116362306a36Sopenharmony_ci}
116462306a36Sopenharmony_ci
116562306a36Sopenharmony_cistatic int process_cleaner_mesg(unsigned int argc, char **argv, struct dm_writecache *wc)
116662306a36Sopenharmony_ci{
116762306a36Sopenharmony_ci	if (argc != 1)
116862306a36Sopenharmony_ci		return -EINVAL;
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	wc_lock(wc);
117162306a36Sopenharmony_ci	activate_cleaner(wc);
117262306a36Sopenharmony_ci	if (!dm_suspended(wc->ti))
117362306a36Sopenharmony_ci		writecache_verify_watermark(wc);
117462306a36Sopenharmony_ci	wc_unlock(wc);
117562306a36Sopenharmony_ci
117662306a36Sopenharmony_ci	return 0;
117762306a36Sopenharmony_ci}
117862306a36Sopenharmony_ci
117962306a36Sopenharmony_cistatic int process_clear_stats_mesg(unsigned int argc, char **argv, struct dm_writecache *wc)
118062306a36Sopenharmony_ci{
118162306a36Sopenharmony_ci	if (argc != 1)
118262306a36Sopenharmony_ci		return -EINVAL;
118362306a36Sopenharmony_ci
118462306a36Sopenharmony_ci	wc_lock(wc);
118562306a36Sopenharmony_ci	memset(&wc->stats, 0, sizeof(wc->stats));
118662306a36Sopenharmony_ci	wc_unlock(wc);
118762306a36Sopenharmony_ci
118862306a36Sopenharmony_ci	return 0;
118962306a36Sopenharmony_ci}
119062306a36Sopenharmony_ci
119162306a36Sopenharmony_cistatic int writecache_message(struct dm_target *ti, unsigned int argc, char **argv,
119262306a36Sopenharmony_ci			      char *result, unsigned int maxlen)
119362306a36Sopenharmony_ci{
119462306a36Sopenharmony_ci	int r = -EINVAL;
119562306a36Sopenharmony_ci	struct dm_writecache *wc = ti->private;
119662306a36Sopenharmony_ci
119762306a36Sopenharmony_ci	if (!strcasecmp(argv[0], "flush"))
119862306a36Sopenharmony_ci		r = process_flush_mesg(argc, argv, wc);
119962306a36Sopenharmony_ci	else if (!strcasecmp(argv[0], "flush_on_suspend"))
120062306a36Sopenharmony_ci		r = process_flush_on_suspend_mesg(argc, argv, wc);
120162306a36Sopenharmony_ci	else if (!strcasecmp(argv[0], "cleaner"))
120262306a36Sopenharmony_ci		r = process_cleaner_mesg(argc, argv, wc);
120362306a36Sopenharmony_ci	else if (!strcasecmp(argv[0], "clear_stats"))
120462306a36Sopenharmony_ci		r = process_clear_stats_mesg(argc, argv, wc);
120562306a36Sopenharmony_ci	else
120662306a36Sopenharmony_ci		DMERR("unrecognised message received: %s", argv[0]);
120762306a36Sopenharmony_ci
120862306a36Sopenharmony_ci	return r;
120962306a36Sopenharmony_ci}
121062306a36Sopenharmony_ci
121162306a36Sopenharmony_cistatic void memcpy_flushcache_optimized(void *dest, void *source, size_t size)
121262306a36Sopenharmony_ci{
121362306a36Sopenharmony_ci	/*
121462306a36Sopenharmony_ci	 * clflushopt performs better with block size 1024, 2048, 4096
121562306a36Sopenharmony_ci	 * non-temporal stores perform better with block size 512
121662306a36Sopenharmony_ci	 *
121762306a36Sopenharmony_ci	 * block size   512             1024            2048            4096
121862306a36Sopenharmony_ci	 * movnti       496 MB/s        642 MB/s        725 MB/s        744 MB/s
121962306a36Sopenharmony_ci	 * clflushopt   373 MB/s        688 MB/s        1.1 GB/s        1.2 GB/s
122062306a36Sopenharmony_ci	 *
122162306a36Sopenharmony_ci	 * We see that movnti performs better for 512-byte blocks, and
122262306a36Sopenharmony_ci	 * clflushopt performs better for 1024-byte and larger blocks. So, we
122362306a36Sopenharmony_ci	 * prefer clflushopt for sizes >= 768.
122462306a36Sopenharmony_ci	 *
122562306a36Sopenharmony_ci	 * NOTE: this happens to be the case now (with dm-writecache's single
122662306a36Sopenharmony_ci	 * threaded model) but re-evaluate this once memcpy_flushcache() is
122762306a36Sopenharmony_ci	 * enabled to use movdir64b which might invalidate this performance
122862306a36Sopenharmony_ci	 * advantage seen with cache-allocating-writes plus flushing.
122962306a36Sopenharmony_ci	 */
123062306a36Sopenharmony_ci#ifdef CONFIG_X86
123162306a36Sopenharmony_ci	if (static_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
123262306a36Sopenharmony_ci	    likely(boot_cpu_data.x86_clflush_size == 64) &&
123362306a36Sopenharmony_ci	    likely(size >= 768)) {
123462306a36Sopenharmony_ci		do {
123562306a36Sopenharmony_ci			memcpy((void *)dest, (void *)source, 64);
123662306a36Sopenharmony_ci			clflushopt((void *)dest);
123762306a36Sopenharmony_ci			dest += 64;
123862306a36Sopenharmony_ci			source += 64;
123962306a36Sopenharmony_ci			size -= 64;
124062306a36Sopenharmony_ci		} while (size >= 64);
124162306a36Sopenharmony_ci		return;
124262306a36Sopenharmony_ci	}
124362306a36Sopenharmony_ci#endif
124462306a36Sopenharmony_ci	memcpy_flushcache(dest, source, size);
124562306a36Sopenharmony_ci}
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_cistatic void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data)
124862306a36Sopenharmony_ci{
124962306a36Sopenharmony_ci	void *buf;
125062306a36Sopenharmony_ci	unsigned int size;
125162306a36Sopenharmony_ci	int rw = bio_data_dir(bio);
125262306a36Sopenharmony_ci	unsigned int remaining_size = wc->block_size;
125362306a36Sopenharmony_ci
125462306a36Sopenharmony_ci	do {
125562306a36Sopenharmony_ci		struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter);
125662306a36Sopenharmony_ci
125762306a36Sopenharmony_ci		buf = bvec_kmap_local(&bv);
125862306a36Sopenharmony_ci		size = bv.bv_len;
125962306a36Sopenharmony_ci		if (unlikely(size > remaining_size))
126062306a36Sopenharmony_ci			size = remaining_size;
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_ci		if (rw == READ) {
126362306a36Sopenharmony_ci			int r;
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ci			r = copy_mc_to_kernel(buf, data, size);
126662306a36Sopenharmony_ci			flush_dcache_page(bio_page(bio));
126762306a36Sopenharmony_ci			if (unlikely(r)) {
126862306a36Sopenharmony_ci				writecache_error(wc, r, "hardware memory error when reading data: %d", r);
126962306a36Sopenharmony_ci				bio->bi_status = BLK_STS_IOERR;
127062306a36Sopenharmony_ci			}
127162306a36Sopenharmony_ci		} else {
127262306a36Sopenharmony_ci			flush_dcache_page(bio_page(bio));
127362306a36Sopenharmony_ci			memcpy_flushcache_optimized(data, buf, size);
127462306a36Sopenharmony_ci		}
127562306a36Sopenharmony_ci
127662306a36Sopenharmony_ci		kunmap_local(buf);
127762306a36Sopenharmony_ci
127862306a36Sopenharmony_ci		data = (char *)data + size;
127962306a36Sopenharmony_ci		remaining_size -= size;
128062306a36Sopenharmony_ci		bio_advance(bio, size);
128162306a36Sopenharmony_ci	} while (unlikely(remaining_size));
128262306a36Sopenharmony_ci}
128362306a36Sopenharmony_ci
128462306a36Sopenharmony_cistatic int writecache_flush_thread(void *data)
128562306a36Sopenharmony_ci{
128662306a36Sopenharmony_ci	struct dm_writecache *wc = data;
128762306a36Sopenharmony_ci
128862306a36Sopenharmony_ci	while (1) {
128962306a36Sopenharmony_ci		struct bio *bio;
129062306a36Sopenharmony_ci
129162306a36Sopenharmony_ci		wc_lock(wc);
129262306a36Sopenharmony_ci		bio = bio_list_pop(&wc->flush_list);
129362306a36Sopenharmony_ci		if (!bio) {
129462306a36Sopenharmony_ci			set_current_state(TASK_INTERRUPTIBLE);
129562306a36Sopenharmony_ci			wc_unlock(wc);
129662306a36Sopenharmony_ci
129762306a36Sopenharmony_ci			if (unlikely(kthread_should_stop())) {
129862306a36Sopenharmony_ci				set_current_state(TASK_RUNNING);
129962306a36Sopenharmony_ci				break;
130062306a36Sopenharmony_ci			}
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci			schedule();
130362306a36Sopenharmony_ci			continue;
130462306a36Sopenharmony_ci		}
130562306a36Sopenharmony_ci
130662306a36Sopenharmony_ci		if (bio_op(bio) == REQ_OP_DISCARD) {
130762306a36Sopenharmony_ci			writecache_discard(wc, bio->bi_iter.bi_sector,
130862306a36Sopenharmony_ci					   bio_end_sector(bio));
130962306a36Sopenharmony_ci			wc_unlock(wc);
131062306a36Sopenharmony_ci			bio_set_dev(bio, wc->dev->bdev);
131162306a36Sopenharmony_ci			submit_bio_noacct(bio);
131262306a36Sopenharmony_ci		} else {
131362306a36Sopenharmony_ci			writecache_flush(wc);
131462306a36Sopenharmony_ci			wc_unlock(wc);
131562306a36Sopenharmony_ci			if (writecache_has_error(wc))
131662306a36Sopenharmony_ci				bio->bi_status = BLK_STS_IOERR;
131762306a36Sopenharmony_ci			bio_endio(bio);
131862306a36Sopenharmony_ci		}
131962306a36Sopenharmony_ci	}
132062306a36Sopenharmony_ci
132162306a36Sopenharmony_ci	return 0;
132262306a36Sopenharmony_ci}
132362306a36Sopenharmony_ci
132462306a36Sopenharmony_cistatic void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio)
132562306a36Sopenharmony_ci{
132662306a36Sopenharmony_ci	if (bio_list_empty(&wc->flush_list))
132762306a36Sopenharmony_ci		wake_up_process(wc->flush_thread);
132862306a36Sopenharmony_ci	bio_list_add(&wc->flush_list, bio);
132962306a36Sopenharmony_ci}
133062306a36Sopenharmony_ci
133162306a36Sopenharmony_cienum wc_map_op {
133262306a36Sopenharmony_ci	WC_MAP_SUBMIT,
133362306a36Sopenharmony_ci	WC_MAP_REMAP,
133462306a36Sopenharmony_ci	WC_MAP_REMAP_ORIGIN,
133562306a36Sopenharmony_ci	WC_MAP_RETURN,
133662306a36Sopenharmony_ci	WC_MAP_ERROR,
133762306a36Sopenharmony_ci};
133862306a36Sopenharmony_ci
133962306a36Sopenharmony_cistatic void writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio,
134062306a36Sopenharmony_ci					struct wc_entry *e)
134162306a36Sopenharmony_ci{
134262306a36Sopenharmony_ci	if (e) {
134362306a36Sopenharmony_ci		sector_t next_boundary =
134462306a36Sopenharmony_ci			read_original_sector(wc, e) - bio->bi_iter.bi_sector;
134562306a36Sopenharmony_ci		if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT)
134662306a36Sopenharmony_ci			dm_accept_partial_bio(bio, next_boundary);
134762306a36Sopenharmony_ci	}
134862306a36Sopenharmony_ci}
134962306a36Sopenharmony_ci
135062306a36Sopenharmony_cistatic enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio)
135162306a36Sopenharmony_ci{
135262306a36Sopenharmony_ci	enum wc_map_op map_op;
135362306a36Sopenharmony_ci	struct wc_entry *e;
135462306a36Sopenharmony_ci
135562306a36Sopenharmony_ciread_next_block:
135662306a36Sopenharmony_ci	wc->stats.reads++;
135762306a36Sopenharmony_ci	e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
135862306a36Sopenharmony_ci	if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) {
135962306a36Sopenharmony_ci		wc->stats.read_hits++;
136062306a36Sopenharmony_ci		if (WC_MODE_PMEM(wc)) {
136162306a36Sopenharmony_ci			bio_copy_block(wc, bio, memory_data(wc, e));
136262306a36Sopenharmony_ci			if (bio->bi_iter.bi_size)
136362306a36Sopenharmony_ci				goto read_next_block;
136462306a36Sopenharmony_ci			map_op = WC_MAP_SUBMIT;
136562306a36Sopenharmony_ci		} else {
136662306a36Sopenharmony_ci			dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT);
136762306a36Sopenharmony_ci			bio_set_dev(bio, wc->ssd_dev->bdev);
136862306a36Sopenharmony_ci			bio->bi_iter.bi_sector = cache_sector(wc, e);
136962306a36Sopenharmony_ci			if (!writecache_entry_is_committed(wc, e))
137062306a36Sopenharmony_ci				writecache_wait_for_ios(wc, WRITE);
137162306a36Sopenharmony_ci			map_op = WC_MAP_REMAP;
137262306a36Sopenharmony_ci		}
137362306a36Sopenharmony_ci	} else {
137462306a36Sopenharmony_ci		writecache_map_remap_origin(wc, bio, e);
137562306a36Sopenharmony_ci		wc->stats.reads += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits;
137662306a36Sopenharmony_ci		map_op = WC_MAP_REMAP_ORIGIN;
137762306a36Sopenharmony_ci	}
137862306a36Sopenharmony_ci
137962306a36Sopenharmony_ci	return map_op;
138062306a36Sopenharmony_ci}
138162306a36Sopenharmony_ci
138262306a36Sopenharmony_cistatic void writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio,
138362306a36Sopenharmony_ci				    struct wc_entry *e, bool search_used)
138462306a36Sopenharmony_ci{
138562306a36Sopenharmony_ci	unsigned int bio_size = wc->block_size;
138662306a36Sopenharmony_ci	sector_t start_cache_sec = cache_sector(wc, e);
138762306a36Sopenharmony_ci	sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT);
138862306a36Sopenharmony_ci
138962306a36Sopenharmony_ci	while (bio_size < bio->bi_iter.bi_size) {
139062306a36Sopenharmony_ci		if (!search_used) {
139162306a36Sopenharmony_ci			struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec);
139262306a36Sopenharmony_ci
139362306a36Sopenharmony_ci			if (!f)
139462306a36Sopenharmony_ci				break;
139562306a36Sopenharmony_ci			write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector +
139662306a36Sopenharmony_ci							(bio_size >> SECTOR_SHIFT), wc->seq_count);
139762306a36Sopenharmony_ci			writecache_insert_entry(wc, f);
139862306a36Sopenharmony_ci			wc->uncommitted_blocks++;
139962306a36Sopenharmony_ci		} else {
140062306a36Sopenharmony_ci			struct wc_entry *f;
140162306a36Sopenharmony_ci			struct rb_node *next = rb_next(&e->rb_node);
140262306a36Sopenharmony_ci
140362306a36Sopenharmony_ci			if (!next)
140462306a36Sopenharmony_ci				break;
140562306a36Sopenharmony_ci			f = container_of(next, struct wc_entry, rb_node);
140662306a36Sopenharmony_ci			if (f != e + 1)
140762306a36Sopenharmony_ci				break;
140862306a36Sopenharmony_ci			if (read_original_sector(wc, f) !=
140962306a36Sopenharmony_ci			    read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT))
141062306a36Sopenharmony_ci				break;
141162306a36Sopenharmony_ci			if (unlikely(f->write_in_progress))
141262306a36Sopenharmony_ci				break;
141362306a36Sopenharmony_ci			if (writecache_entry_is_committed(wc, f))
141462306a36Sopenharmony_ci				wc->overwrote_committed = true;
141562306a36Sopenharmony_ci			e = f;
141662306a36Sopenharmony_ci		}
141762306a36Sopenharmony_ci		bio_size += wc->block_size;
141862306a36Sopenharmony_ci		current_cache_sec += wc->block_size >> SECTOR_SHIFT;
141962306a36Sopenharmony_ci	}
142062306a36Sopenharmony_ci
142162306a36Sopenharmony_ci	bio_set_dev(bio, wc->ssd_dev->bdev);
142262306a36Sopenharmony_ci	bio->bi_iter.bi_sector = start_cache_sec;
142362306a36Sopenharmony_ci	dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT);
142462306a36Sopenharmony_ci
142562306a36Sopenharmony_ci	wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
142662306a36Sopenharmony_ci	wc->stats.writes_allocate += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits;
142762306a36Sopenharmony_ci
142862306a36Sopenharmony_ci	if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
142962306a36Sopenharmony_ci		wc->uncommitted_blocks = 0;
143062306a36Sopenharmony_ci		queue_work(wc->writeback_wq, &wc->flush_work);
143162306a36Sopenharmony_ci	} else {
143262306a36Sopenharmony_ci		writecache_schedule_autocommit(wc);
143362306a36Sopenharmony_ci	}
143462306a36Sopenharmony_ci}
143562306a36Sopenharmony_ci
143662306a36Sopenharmony_cistatic enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio)
143762306a36Sopenharmony_ci{
143862306a36Sopenharmony_ci	struct wc_entry *e;
143962306a36Sopenharmony_ci
144062306a36Sopenharmony_ci	do {
144162306a36Sopenharmony_ci		bool found_entry = false;
144262306a36Sopenharmony_ci		bool search_used = false;
144362306a36Sopenharmony_ci
144462306a36Sopenharmony_ci		if (writecache_has_error(wc)) {
144562306a36Sopenharmony_ci			wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
144662306a36Sopenharmony_ci			return WC_MAP_ERROR;
144762306a36Sopenharmony_ci		}
144862306a36Sopenharmony_ci		e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
144962306a36Sopenharmony_ci		if (e) {
145062306a36Sopenharmony_ci			if (!writecache_entry_is_committed(wc, e)) {
145162306a36Sopenharmony_ci				wc->stats.write_hits_uncommitted++;
145262306a36Sopenharmony_ci				search_used = true;
145362306a36Sopenharmony_ci				goto bio_copy;
145462306a36Sopenharmony_ci			}
145562306a36Sopenharmony_ci			wc->stats.write_hits_committed++;
145662306a36Sopenharmony_ci			if (!WC_MODE_PMEM(wc) && !e->write_in_progress) {
145762306a36Sopenharmony_ci				wc->overwrote_committed = true;
145862306a36Sopenharmony_ci				search_used = true;
145962306a36Sopenharmony_ci				goto bio_copy;
146062306a36Sopenharmony_ci			}
146162306a36Sopenharmony_ci			found_entry = true;
146262306a36Sopenharmony_ci		} else {
146362306a36Sopenharmony_ci			if (unlikely(wc->cleaner) ||
146462306a36Sopenharmony_ci			    (wc->metadata_only && !(bio->bi_opf & REQ_META)))
146562306a36Sopenharmony_ci				goto direct_write;
146662306a36Sopenharmony_ci		}
146762306a36Sopenharmony_ci		e = writecache_pop_from_freelist(wc, (sector_t)-1);
146862306a36Sopenharmony_ci		if (unlikely(!e)) {
146962306a36Sopenharmony_ci			if (!WC_MODE_PMEM(wc) && !found_entry) {
147062306a36Sopenharmony_cidirect_write:
147162306a36Sopenharmony_ci				e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
147262306a36Sopenharmony_ci				writecache_map_remap_origin(wc, bio, e);
147362306a36Sopenharmony_ci				wc->stats.writes_around += bio->bi_iter.bi_size >> wc->block_size_bits;
147462306a36Sopenharmony_ci				wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
147562306a36Sopenharmony_ci				return WC_MAP_REMAP_ORIGIN;
147662306a36Sopenharmony_ci			}
147762306a36Sopenharmony_ci			wc->stats.writes_blocked_on_freelist++;
147862306a36Sopenharmony_ci			writecache_wait_on_freelist(wc);
147962306a36Sopenharmony_ci			continue;
148062306a36Sopenharmony_ci		}
148162306a36Sopenharmony_ci		write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count);
148262306a36Sopenharmony_ci		writecache_insert_entry(wc, e);
148362306a36Sopenharmony_ci		wc->uncommitted_blocks++;
148462306a36Sopenharmony_ci		wc->stats.writes_allocate++;
148562306a36Sopenharmony_cibio_copy:
148662306a36Sopenharmony_ci		if (WC_MODE_PMEM(wc)) {
148762306a36Sopenharmony_ci			bio_copy_block(wc, bio, memory_data(wc, e));
148862306a36Sopenharmony_ci			wc->stats.writes++;
148962306a36Sopenharmony_ci		} else {
149062306a36Sopenharmony_ci			writecache_bio_copy_ssd(wc, bio, e, search_used);
149162306a36Sopenharmony_ci			return WC_MAP_REMAP;
149262306a36Sopenharmony_ci		}
149362306a36Sopenharmony_ci	} while (bio->bi_iter.bi_size);
149462306a36Sopenharmony_ci
149562306a36Sopenharmony_ci	if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks))
149662306a36Sopenharmony_ci		writecache_flush(wc);
149762306a36Sopenharmony_ci	else
149862306a36Sopenharmony_ci		writecache_schedule_autocommit(wc);
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_ci	return WC_MAP_SUBMIT;
150162306a36Sopenharmony_ci}
150262306a36Sopenharmony_ci
150362306a36Sopenharmony_cistatic enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio *bio)
150462306a36Sopenharmony_ci{
150562306a36Sopenharmony_ci	if (writecache_has_error(wc))
150662306a36Sopenharmony_ci		return WC_MAP_ERROR;
150762306a36Sopenharmony_ci
150862306a36Sopenharmony_ci	if (WC_MODE_PMEM(wc)) {
150962306a36Sopenharmony_ci		wc->stats.flushes++;
151062306a36Sopenharmony_ci		writecache_flush(wc);
151162306a36Sopenharmony_ci		if (writecache_has_error(wc))
151262306a36Sopenharmony_ci			return WC_MAP_ERROR;
151362306a36Sopenharmony_ci		else if (unlikely(wc->cleaner) || unlikely(wc->metadata_only))
151462306a36Sopenharmony_ci			return WC_MAP_REMAP_ORIGIN;
151562306a36Sopenharmony_ci		return WC_MAP_SUBMIT;
151662306a36Sopenharmony_ci	}
151762306a36Sopenharmony_ci	/* SSD: */
151862306a36Sopenharmony_ci	if (dm_bio_get_target_bio_nr(bio))
151962306a36Sopenharmony_ci		return WC_MAP_REMAP_ORIGIN;
152062306a36Sopenharmony_ci	wc->stats.flushes++;
152162306a36Sopenharmony_ci	writecache_offload_bio(wc, bio);
152262306a36Sopenharmony_ci	return WC_MAP_RETURN;
152362306a36Sopenharmony_ci}
152462306a36Sopenharmony_ci
152562306a36Sopenharmony_cistatic enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio)
152662306a36Sopenharmony_ci{
152762306a36Sopenharmony_ci	wc->stats.discards += bio->bi_iter.bi_size >> wc->block_size_bits;
152862306a36Sopenharmony_ci
152962306a36Sopenharmony_ci	if (writecache_has_error(wc))
153062306a36Sopenharmony_ci		return WC_MAP_ERROR;
153162306a36Sopenharmony_ci
153262306a36Sopenharmony_ci	if (WC_MODE_PMEM(wc)) {
153362306a36Sopenharmony_ci		writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio));
153462306a36Sopenharmony_ci		return WC_MAP_REMAP_ORIGIN;
153562306a36Sopenharmony_ci	}
153662306a36Sopenharmony_ci	/* SSD: */
153762306a36Sopenharmony_ci	writecache_offload_bio(wc, bio);
153862306a36Sopenharmony_ci	return WC_MAP_RETURN;
153962306a36Sopenharmony_ci}
154062306a36Sopenharmony_ci
154162306a36Sopenharmony_cistatic int writecache_map(struct dm_target *ti, struct bio *bio)
154262306a36Sopenharmony_ci{
154362306a36Sopenharmony_ci	struct dm_writecache *wc = ti->private;
154462306a36Sopenharmony_ci	enum wc_map_op map_op;
154562306a36Sopenharmony_ci
154662306a36Sopenharmony_ci	bio->bi_private = NULL;
154762306a36Sopenharmony_ci
154862306a36Sopenharmony_ci	wc_lock(wc);
154962306a36Sopenharmony_ci
155062306a36Sopenharmony_ci	if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
155162306a36Sopenharmony_ci		map_op = writecache_map_flush(wc, bio);
155262306a36Sopenharmony_ci		goto done;
155362306a36Sopenharmony_ci	}
155462306a36Sopenharmony_ci
155562306a36Sopenharmony_ci	bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
155662306a36Sopenharmony_ci
155762306a36Sopenharmony_ci	if (unlikely((((unsigned int)bio->bi_iter.bi_sector | bio_sectors(bio)) &
155862306a36Sopenharmony_ci				(wc->block_size / 512 - 1)) != 0)) {
155962306a36Sopenharmony_ci		DMERR("I/O is not aligned, sector %llu, size %u, block size %u",
156062306a36Sopenharmony_ci		      (unsigned long long)bio->bi_iter.bi_sector,
156162306a36Sopenharmony_ci		      bio->bi_iter.bi_size, wc->block_size);
156262306a36Sopenharmony_ci		map_op = WC_MAP_ERROR;
156362306a36Sopenharmony_ci		goto done;
156462306a36Sopenharmony_ci	}
156562306a36Sopenharmony_ci
156662306a36Sopenharmony_ci	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
156762306a36Sopenharmony_ci		map_op = writecache_map_discard(wc, bio);
156862306a36Sopenharmony_ci		goto done;
156962306a36Sopenharmony_ci	}
157062306a36Sopenharmony_ci
157162306a36Sopenharmony_ci	if (bio_data_dir(bio) == READ)
157262306a36Sopenharmony_ci		map_op = writecache_map_read(wc, bio);
157362306a36Sopenharmony_ci	else
157462306a36Sopenharmony_ci		map_op = writecache_map_write(wc, bio);
157562306a36Sopenharmony_cidone:
157662306a36Sopenharmony_ci	switch (map_op) {
157762306a36Sopenharmony_ci	case WC_MAP_REMAP_ORIGIN:
157862306a36Sopenharmony_ci		if (likely(wc->pause != 0)) {
157962306a36Sopenharmony_ci			if (bio_op(bio) == REQ_OP_WRITE) {
158062306a36Sopenharmony_ci				dm_iot_io_begin(&wc->iot, 1);
158162306a36Sopenharmony_ci				bio->bi_private = (void *)2;
158262306a36Sopenharmony_ci			}
158362306a36Sopenharmony_ci		}
158462306a36Sopenharmony_ci		bio_set_dev(bio, wc->dev->bdev);
158562306a36Sopenharmony_ci		wc_unlock(wc);
158662306a36Sopenharmony_ci		return DM_MAPIO_REMAPPED;
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_ci	case WC_MAP_REMAP:
158962306a36Sopenharmony_ci		/* make sure that writecache_end_io decrements bio_in_progress: */
159062306a36Sopenharmony_ci		bio->bi_private = (void *)1;
159162306a36Sopenharmony_ci		atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]);
159262306a36Sopenharmony_ci		wc_unlock(wc);
159362306a36Sopenharmony_ci		return DM_MAPIO_REMAPPED;
159462306a36Sopenharmony_ci
159562306a36Sopenharmony_ci	case WC_MAP_SUBMIT:
159662306a36Sopenharmony_ci		wc_unlock(wc);
159762306a36Sopenharmony_ci		bio_endio(bio);
159862306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
159962306a36Sopenharmony_ci
160062306a36Sopenharmony_ci	case WC_MAP_RETURN:
160162306a36Sopenharmony_ci		wc_unlock(wc);
160262306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
160362306a36Sopenharmony_ci
160462306a36Sopenharmony_ci	case WC_MAP_ERROR:
160562306a36Sopenharmony_ci		wc_unlock(wc);
160662306a36Sopenharmony_ci		bio_io_error(bio);
160762306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
160862306a36Sopenharmony_ci
160962306a36Sopenharmony_ci	default:
161062306a36Sopenharmony_ci		BUG();
161162306a36Sopenharmony_ci		wc_unlock(wc);
161262306a36Sopenharmony_ci		return DM_MAPIO_KILL;
161362306a36Sopenharmony_ci	}
161462306a36Sopenharmony_ci}
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_cistatic int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status)
161762306a36Sopenharmony_ci{
161862306a36Sopenharmony_ci	struct dm_writecache *wc = ti->private;
161962306a36Sopenharmony_ci
162062306a36Sopenharmony_ci	if (bio->bi_private == (void *)1) {
162162306a36Sopenharmony_ci		int dir = bio_data_dir(bio);
162262306a36Sopenharmony_ci
162362306a36Sopenharmony_ci		if (atomic_dec_and_test(&wc->bio_in_progress[dir]))
162462306a36Sopenharmony_ci			if (unlikely(waitqueue_active(&wc->bio_in_progress_wait[dir])))
162562306a36Sopenharmony_ci				wake_up(&wc->bio_in_progress_wait[dir]);
162662306a36Sopenharmony_ci	} else if (bio->bi_private == (void *)2) {
162762306a36Sopenharmony_ci		dm_iot_io_end(&wc->iot, 1);
162862306a36Sopenharmony_ci	}
162962306a36Sopenharmony_ci	return 0;
163062306a36Sopenharmony_ci}
163162306a36Sopenharmony_ci
163262306a36Sopenharmony_cistatic int writecache_iterate_devices(struct dm_target *ti,
163362306a36Sopenharmony_ci				      iterate_devices_callout_fn fn, void *data)
163462306a36Sopenharmony_ci{
163562306a36Sopenharmony_ci	struct dm_writecache *wc = ti->private;
163662306a36Sopenharmony_ci
163762306a36Sopenharmony_ci	return fn(ti, wc->dev, 0, ti->len, data);
163862306a36Sopenharmony_ci}
163962306a36Sopenharmony_ci
164062306a36Sopenharmony_cistatic void writecache_io_hints(struct dm_target *ti, struct queue_limits *limits)
164162306a36Sopenharmony_ci{
164262306a36Sopenharmony_ci	struct dm_writecache *wc = ti->private;
164362306a36Sopenharmony_ci
164462306a36Sopenharmony_ci	if (limits->logical_block_size < wc->block_size)
164562306a36Sopenharmony_ci		limits->logical_block_size = wc->block_size;
164662306a36Sopenharmony_ci
164762306a36Sopenharmony_ci	if (limits->physical_block_size < wc->block_size)
164862306a36Sopenharmony_ci		limits->physical_block_size = wc->block_size;
164962306a36Sopenharmony_ci
165062306a36Sopenharmony_ci	if (limits->io_min < wc->block_size)
165162306a36Sopenharmony_ci		limits->io_min = wc->block_size;
165262306a36Sopenharmony_ci}
165362306a36Sopenharmony_ci
165462306a36Sopenharmony_ci
165562306a36Sopenharmony_cistatic void writecache_writeback_endio(struct bio *bio)
165662306a36Sopenharmony_ci{
165762306a36Sopenharmony_ci	struct writeback_struct *wb = container_of(bio, struct writeback_struct, bio);
165862306a36Sopenharmony_ci	struct dm_writecache *wc = wb->wc;
165962306a36Sopenharmony_ci	unsigned long flags;
166062306a36Sopenharmony_ci
166162306a36Sopenharmony_ci	raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
166262306a36Sopenharmony_ci	if (unlikely(list_empty(&wc->endio_list)))
166362306a36Sopenharmony_ci		wake_up_process(wc->endio_thread);
166462306a36Sopenharmony_ci	list_add_tail(&wb->endio_entry, &wc->endio_list);
166562306a36Sopenharmony_ci	raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
166662306a36Sopenharmony_ci}
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_cistatic void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr)
166962306a36Sopenharmony_ci{
167062306a36Sopenharmony_ci	struct copy_struct *c = ptr;
167162306a36Sopenharmony_ci	struct dm_writecache *wc = c->wc;
167262306a36Sopenharmony_ci
167362306a36Sopenharmony_ci	c->error = likely(!(read_err | write_err)) ? 0 : -EIO;
167462306a36Sopenharmony_ci
167562306a36Sopenharmony_ci	raw_spin_lock_irq(&wc->endio_list_lock);
167662306a36Sopenharmony_ci	if (unlikely(list_empty(&wc->endio_list)))
167762306a36Sopenharmony_ci		wake_up_process(wc->endio_thread);
167862306a36Sopenharmony_ci	list_add_tail(&c->endio_entry, &wc->endio_list);
167962306a36Sopenharmony_ci	raw_spin_unlock_irq(&wc->endio_list_lock);
168062306a36Sopenharmony_ci}
168162306a36Sopenharmony_ci
168262306a36Sopenharmony_cistatic void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list)
168362306a36Sopenharmony_ci{
168462306a36Sopenharmony_ci	unsigned int i;
168562306a36Sopenharmony_ci	struct writeback_struct *wb;
168662306a36Sopenharmony_ci	struct wc_entry *e;
168762306a36Sopenharmony_ci	unsigned long n_walked = 0;
168862306a36Sopenharmony_ci
168962306a36Sopenharmony_ci	do {
169062306a36Sopenharmony_ci		wb = list_entry(list->next, struct writeback_struct, endio_entry);
169162306a36Sopenharmony_ci		list_del(&wb->endio_entry);
169262306a36Sopenharmony_ci
169362306a36Sopenharmony_ci		if (unlikely(wb->bio.bi_status != BLK_STS_OK))
169462306a36Sopenharmony_ci			writecache_error(wc, blk_status_to_errno(wb->bio.bi_status),
169562306a36Sopenharmony_ci					"write error %d", wb->bio.bi_status);
169662306a36Sopenharmony_ci		i = 0;
169762306a36Sopenharmony_ci		do {
169862306a36Sopenharmony_ci			e = wb->wc_list[i];
169962306a36Sopenharmony_ci			BUG_ON(!e->write_in_progress);
170062306a36Sopenharmony_ci			e->write_in_progress = false;
170162306a36Sopenharmony_ci			INIT_LIST_HEAD(&e->lru);
170262306a36Sopenharmony_ci			if (!writecache_has_error(wc))
170362306a36Sopenharmony_ci				writecache_free_entry(wc, e);
170462306a36Sopenharmony_ci			BUG_ON(!wc->writeback_size);
170562306a36Sopenharmony_ci			wc->writeback_size--;
170662306a36Sopenharmony_ci			n_walked++;
170762306a36Sopenharmony_ci			if (unlikely(n_walked >= ENDIO_LATENCY)) {
170862306a36Sopenharmony_ci				writecache_commit_flushed(wc, false);
170962306a36Sopenharmony_ci				wc_unlock(wc);
171062306a36Sopenharmony_ci				wc_lock(wc);
171162306a36Sopenharmony_ci				n_walked = 0;
171262306a36Sopenharmony_ci			}
171362306a36Sopenharmony_ci		} while (++i < wb->wc_list_n);
171462306a36Sopenharmony_ci
171562306a36Sopenharmony_ci		if (wb->wc_list != wb->wc_list_inline)
171662306a36Sopenharmony_ci			kfree(wb->wc_list);
171762306a36Sopenharmony_ci		bio_put(&wb->bio);
171862306a36Sopenharmony_ci	} while (!list_empty(list));
171962306a36Sopenharmony_ci}
172062306a36Sopenharmony_ci
172162306a36Sopenharmony_cistatic void __writecache_endio_ssd(struct dm_writecache *wc, struct list_head *list)
172262306a36Sopenharmony_ci{
172362306a36Sopenharmony_ci	struct copy_struct *c;
172462306a36Sopenharmony_ci	struct wc_entry *e;
172562306a36Sopenharmony_ci
172662306a36Sopenharmony_ci	do {
172762306a36Sopenharmony_ci		c = list_entry(list->next, struct copy_struct, endio_entry);
172862306a36Sopenharmony_ci		list_del(&c->endio_entry);
172962306a36Sopenharmony_ci
173062306a36Sopenharmony_ci		if (unlikely(c->error))
173162306a36Sopenharmony_ci			writecache_error(wc, c->error, "copy error");
173262306a36Sopenharmony_ci
173362306a36Sopenharmony_ci		e = c->e;
173462306a36Sopenharmony_ci		do {
173562306a36Sopenharmony_ci			BUG_ON(!e->write_in_progress);
173662306a36Sopenharmony_ci			e->write_in_progress = false;
173762306a36Sopenharmony_ci			INIT_LIST_HEAD(&e->lru);
173862306a36Sopenharmony_ci			if (!writecache_has_error(wc))
173962306a36Sopenharmony_ci				writecache_free_entry(wc, e);
174062306a36Sopenharmony_ci
174162306a36Sopenharmony_ci			BUG_ON(!wc->writeback_size);
174262306a36Sopenharmony_ci			wc->writeback_size--;
174362306a36Sopenharmony_ci			e++;
174462306a36Sopenharmony_ci		} while (--c->n_entries);
174562306a36Sopenharmony_ci		mempool_free(c, &wc->copy_pool);
174662306a36Sopenharmony_ci	} while (!list_empty(list));
174762306a36Sopenharmony_ci}
174862306a36Sopenharmony_ci
174962306a36Sopenharmony_cistatic int writecache_endio_thread(void *data)
175062306a36Sopenharmony_ci{
175162306a36Sopenharmony_ci	struct dm_writecache *wc = data;
175262306a36Sopenharmony_ci
175362306a36Sopenharmony_ci	while (1) {
175462306a36Sopenharmony_ci		struct list_head list;
175562306a36Sopenharmony_ci
175662306a36Sopenharmony_ci		raw_spin_lock_irq(&wc->endio_list_lock);
175762306a36Sopenharmony_ci		if (!list_empty(&wc->endio_list))
175862306a36Sopenharmony_ci			goto pop_from_list;
175962306a36Sopenharmony_ci		set_current_state(TASK_INTERRUPTIBLE);
176062306a36Sopenharmony_ci		raw_spin_unlock_irq(&wc->endio_list_lock);
176162306a36Sopenharmony_ci
176262306a36Sopenharmony_ci		if (unlikely(kthread_should_stop())) {
176362306a36Sopenharmony_ci			set_current_state(TASK_RUNNING);
176462306a36Sopenharmony_ci			break;
176562306a36Sopenharmony_ci		}
176662306a36Sopenharmony_ci
176762306a36Sopenharmony_ci		schedule();
176862306a36Sopenharmony_ci
176962306a36Sopenharmony_ci		continue;
177062306a36Sopenharmony_ci
177162306a36Sopenharmony_cipop_from_list:
177262306a36Sopenharmony_ci		list = wc->endio_list;
177362306a36Sopenharmony_ci		list.next->prev = list.prev->next = &list;
177462306a36Sopenharmony_ci		INIT_LIST_HEAD(&wc->endio_list);
177562306a36Sopenharmony_ci		raw_spin_unlock_irq(&wc->endio_list_lock);
177662306a36Sopenharmony_ci
177762306a36Sopenharmony_ci		if (!WC_MODE_FUA(wc))
177862306a36Sopenharmony_ci			writecache_disk_flush(wc, wc->dev);
177962306a36Sopenharmony_ci
178062306a36Sopenharmony_ci		wc_lock(wc);
178162306a36Sopenharmony_ci
178262306a36Sopenharmony_ci		if (WC_MODE_PMEM(wc)) {
178362306a36Sopenharmony_ci			__writecache_endio_pmem(wc, &list);
178462306a36Sopenharmony_ci		} else {
178562306a36Sopenharmony_ci			__writecache_endio_ssd(wc, &list);
178662306a36Sopenharmony_ci			writecache_wait_for_ios(wc, READ);
178762306a36Sopenharmony_ci		}
178862306a36Sopenharmony_ci
178962306a36Sopenharmony_ci		writecache_commit_flushed(wc, false);
179062306a36Sopenharmony_ci
179162306a36Sopenharmony_ci		wc_unlock(wc);
179262306a36Sopenharmony_ci	}
179362306a36Sopenharmony_ci
179462306a36Sopenharmony_ci	return 0;
179562306a36Sopenharmony_ci}
179662306a36Sopenharmony_ci
179762306a36Sopenharmony_cistatic bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e)
179862306a36Sopenharmony_ci{
179962306a36Sopenharmony_ci	struct dm_writecache *wc = wb->wc;
180062306a36Sopenharmony_ci	unsigned int block_size = wc->block_size;
180162306a36Sopenharmony_ci	void *address = memory_data(wc, e);
180262306a36Sopenharmony_ci
180362306a36Sopenharmony_ci	persistent_memory_flush_cache(address, block_size);
180462306a36Sopenharmony_ci
180562306a36Sopenharmony_ci	if (unlikely(bio_end_sector(&wb->bio) >= wc->data_device_sectors))
180662306a36Sopenharmony_ci		return true;
180762306a36Sopenharmony_ci
180862306a36Sopenharmony_ci	return bio_add_page(&wb->bio, persistent_memory_page(address),
180962306a36Sopenharmony_ci			    block_size, persistent_memory_page_offset(address)) != 0;
181062306a36Sopenharmony_ci}
181162306a36Sopenharmony_ci
181262306a36Sopenharmony_cistruct writeback_list {
181362306a36Sopenharmony_ci	struct list_head list;
181462306a36Sopenharmony_ci	size_t size;
181562306a36Sopenharmony_ci};
181662306a36Sopenharmony_ci
181762306a36Sopenharmony_cistatic void __writeback_throttle(struct dm_writecache *wc, struct writeback_list *wbl)
181862306a36Sopenharmony_ci{
181962306a36Sopenharmony_ci	if (unlikely(wc->max_writeback_jobs)) {
182062306a36Sopenharmony_ci		if (READ_ONCE(wc->writeback_size) - wbl->size >= wc->max_writeback_jobs) {
182162306a36Sopenharmony_ci			wc_lock(wc);
182262306a36Sopenharmony_ci			while (wc->writeback_size - wbl->size >= wc->max_writeback_jobs)
182362306a36Sopenharmony_ci				writecache_wait_on_freelist(wc);
182462306a36Sopenharmony_ci			wc_unlock(wc);
182562306a36Sopenharmony_ci		}
182662306a36Sopenharmony_ci	}
182762306a36Sopenharmony_ci	cond_resched();
182862306a36Sopenharmony_ci}
182962306a36Sopenharmony_ci
183062306a36Sopenharmony_cistatic void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeback_list *wbl)
183162306a36Sopenharmony_ci{
183262306a36Sopenharmony_ci	struct wc_entry *e, *f;
183362306a36Sopenharmony_ci	struct bio *bio;
183462306a36Sopenharmony_ci	struct writeback_struct *wb;
183562306a36Sopenharmony_ci	unsigned int max_pages;
183662306a36Sopenharmony_ci
183762306a36Sopenharmony_ci	while (wbl->size) {
183862306a36Sopenharmony_ci		wbl->size--;
183962306a36Sopenharmony_ci		e = container_of(wbl->list.prev, struct wc_entry, lru);
184062306a36Sopenharmony_ci		list_del(&e->lru);
184162306a36Sopenharmony_ci
184262306a36Sopenharmony_ci		max_pages = e->wc_list_contiguous;
184362306a36Sopenharmony_ci
184462306a36Sopenharmony_ci		bio = bio_alloc_bioset(wc->dev->bdev, max_pages, REQ_OP_WRITE,
184562306a36Sopenharmony_ci				       GFP_NOIO, &wc->bio_set);
184662306a36Sopenharmony_ci		wb = container_of(bio, struct writeback_struct, bio);
184762306a36Sopenharmony_ci		wb->wc = wc;
184862306a36Sopenharmony_ci		bio->bi_end_io = writecache_writeback_endio;
184962306a36Sopenharmony_ci		bio->bi_iter.bi_sector = read_original_sector(wc, e);
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_ci		if (unlikely(max_pages > WB_LIST_INLINE))
185262306a36Sopenharmony_ci			wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *),
185362306a36Sopenharmony_ci						    GFP_NOIO | __GFP_NORETRY |
185462306a36Sopenharmony_ci						    __GFP_NOMEMALLOC | __GFP_NOWARN);
185562306a36Sopenharmony_ci
185662306a36Sopenharmony_ci		if (likely(max_pages <= WB_LIST_INLINE) || unlikely(!wb->wc_list)) {
185762306a36Sopenharmony_ci			wb->wc_list = wb->wc_list_inline;
185862306a36Sopenharmony_ci			max_pages = WB_LIST_INLINE;
185962306a36Sopenharmony_ci		}
186062306a36Sopenharmony_ci
186162306a36Sopenharmony_ci		BUG_ON(!wc_add_block(wb, e));
186262306a36Sopenharmony_ci
186362306a36Sopenharmony_ci		wb->wc_list[0] = e;
186462306a36Sopenharmony_ci		wb->wc_list_n = 1;
186562306a36Sopenharmony_ci
186662306a36Sopenharmony_ci		while (wbl->size && wb->wc_list_n < max_pages) {
186762306a36Sopenharmony_ci			f = container_of(wbl->list.prev, struct wc_entry, lru);
186862306a36Sopenharmony_ci			if (read_original_sector(wc, f) !=
186962306a36Sopenharmony_ci			    read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT))
187062306a36Sopenharmony_ci				break;
187162306a36Sopenharmony_ci			if (!wc_add_block(wb, f))
187262306a36Sopenharmony_ci				break;
187362306a36Sopenharmony_ci			wbl->size--;
187462306a36Sopenharmony_ci			list_del(&f->lru);
187562306a36Sopenharmony_ci			wb->wc_list[wb->wc_list_n++] = f;
187662306a36Sopenharmony_ci			e = f;
187762306a36Sopenharmony_ci		}
187862306a36Sopenharmony_ci		if (WC_MODE_FUA(wc))
187962306a36Sopenharmony_ci			bio->bi_opf |= REQ_FUA;
188062306a36Sopenharmony_ci		if (writecache_has_error(wc)) {
188162306a36Sopenharmony_ci			bio->bi_status = BLK_STS_IOERR;
188262306a36Sopenharmony_ci			bio_endio(bio);
188362306a36Sopenharmony_ci		} else if (unlikely(!bio_sectors(bio))) {
188462306a36Sopenharmony_ci			bio->bi_status = BLK_STS_OK;
188562306a36Sopenharmony_ci			bio_endio(bio);
188662306a36Sopenharmony_ci		} else {
188762306a36Sopenharmony_ci			submit_bio(bio);
188862306a36Sopenharmony_ci		}
188962306a36Sopenharmony_ci
189062306a36Sopenharmony_ci		__writeback_throttle(wc, wbl);
189162306a36Sopenharmony_ci	}
189262306a36Sopenharmony_ci}
189362306a36Sopenharmony_ci
189462306a36Sopenharmony_cistatic void __writecache_writeback_ssd(struct dm_writecache *wc, struct writeback_list *wbl)
189562306a36Sopenharmony_ci{
189662306a36Sopenharmony_ci	struct wc_entry *e, *f;
189762306a36Sopenharmony_ci	struct dm_io_region from, to;
189862306a36Sopenharmony_ci	struct copy_struct *c;
189962306a36Sopenharmony_ci
190062306a36Sopenharmony_ci	while (wbl->size) {
190162306a36Sopenharmony_ci		unsigned int n_sectors;
190262306a36Sopenharmony_ci
190362306a36Sopenharmony_ci		wbl->size--;
190462306a36Sopenharmony_ci		e = container_of(wbl->list.prev, struct wc_entry, lru);
190562306a36Sopenharmony_ci		list_del(&e->lru);
190662306a36Sopenharmony_ci
190762306a36Sopenharmony_ci		n_sectors = e->wc_list_contiguous << (wc->block_size_bits - SECTOR_SHIFT);
190862306a36Sopenharmony_ci
190962306a36Sopenharmony_ci		from.bdev = wc->ssd_dev->bdev;
191062306a36Sopenharmony_ci		from.sector = cache_sector(wc, e);
191162306a36Sopenharmony_ci		from.count = n_sectors;
191262306a36Sopenharmony_ci		to.bdev = wc->dev->bdev;
191362306a36Sopenharmony_ci		to.sector = read_original_sector(wc, e);
191462306a36Sopenharmony_ci		to.count = n_sectors;
191562306a36Sopenharmony_ci
191662306a36Sopenharmony_ci		c = mempool_alloc(&wc->copy_pool, GFP_NOIO);
191762306a36Sopenharmony_ci		c->wc = wc;
191862306a36Sopenharmony_ci		c->e = e;
191962306a36Sopenharmony_ci		c->n_entries = e->wc_list_contiguous;
192062306a36Sopenharmony_ci
192162306a36Sopenharmony_ci		while ((n_sectors -= wc->block_size >> SECTOR_SHIFT)) {
192262306a36Sopenharmony_ci			wbl->size--;
192362306a36Sopenharmony_ci			f = container_of(wbl->list.prev, struct wc_entry, lru);
192462306a36Sopenharmony_ci			BUG_ON(f != e + 1);
192562306a36Sopenharmony_ci			list_del(&f->lru);
192662306a36Sopenharmony_ci			e = f;
192762306a36Sopenharmony_ci		}
192862306a36Sopenharmony_ci
192962306a36Sopenharmony_ci		if (unlikely(to.sector + to.count > wc->data_device_sectors)) {
193062306a36Sopenharmony_ci			if (to.sector >= wc->data_device_sectors) {
193162306a36Sopenharmony_ci				writecache_copy_endio(0, 0, c);
193262306a36Sopenharmony_ci				continue;
193362306a36Sopenharmony_ci			}
193462306a36Sopenharmony_ci			from.count = to.count = wc->data_device_sectors - to.sector;
193562306a36Sopenharmony_ci		}
193662306a36Sopenharmony_ci
193762306a36Sopenharmony_ci		dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c);
193862306a36Sopenharmony_ci
193962306a36Sopenharmony_ci		__writeback_throttle(wc, wbl);
194062306a36Sopenharmony_ci	}
194162306a36Sopenharmony_ci}
194262306a36Sopenharmony_ci
194362306a36Sopenharmony_cistatic void writecache_writeback(struct work_struct *work)
194462306a36Sopenharmony_ci{
194562306a36Sopenharmony_ci	struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work);
194662306a36Sopenharmony_ci	struct blk_plug plug;
194762306a36Sopenharmony_ci	struct wc_entry *f, *g, *e = NULL;
194862306a36Sopenharmony_ci	struct rb_node *node, *next_node;
194962306a36Sopenharmony_ci	struct list_head skipped;
195062306a36Sopenharmony_ci	struct writeback_list wbl;
195162306a36Sopenharmony_ci	unsigned long n_walked;
195262306a36Sopenharmony_ci
195362306a36Sopenharmony_ci	if (!WC_MODE_PMEM(wc)) {
195462306a36Sopenharmony_ci		/* Wait for any active kcopyd work on behalf of ssd writeback */
195562306a36Sopenharmony_ci		dm_kcopyd_client_flush(wc->dm_kcopyd);
195662306a36Sopenharmony_ci	}
195762306a36Sopenharmony_ci
195862306a36Sopenharmony_ci	if (likely(wc->pause != 0)) {
195962306a36Sopenharmony_ci		while (1) {
196062306a36Sopenharmony_ci			unsigned long idle;
196162306a36Sopenharmony_ci
196262306a36Sopenharmony_ci			if (unlikely(wc->cleaner) || unlikely(wc->writeback_all) ||
196362306a36Sopenharmony_ci			    unlikely(dm_suspended(wc->ti)))
196462306a36Sopenharmony_ci				break;
196562306a36Sopenharmony_ci			idle = dm_iot_idle_time(&wc->iot);
196662306a36Sopenharmony_ci			if (idle >= wc->pause)
196762306a36Sopenharmony_ci				break;
196862306a36Sopenharmony_ci			idle = wc->pause - idle;
196962306a36Sopenharmony_ci			if (idle > HZ)
197062306a36Sopenharmony_ci				idle = HZ;
197162306a36Sopenharmony_ci			schedule_timeout_idle(idle);
197262306a36Sopenharmony_ci		}
197362306a36Sopenharmony_ci	}
197462306a36Sopenharmony_ci
197562306a36Sopenharmony_ci	wc_lock(wc);
197662306a36Sopenharmony_cirestart:
197762306a36Sopenharmony_ci	if (writecache_has_error(wc)) {
197862306a36Sopenharmony_ci		wc_unlock(wc);
197962306a36Sopenharmony_ci		return;
198062306a36Sopenharmony_ci	}
198162306a36Sopenharmony_ci
198262306a36Sopenharmony_ci	if (unlikely(wc->writeback_all)) {
198362306a36Sopenharmony_ci		if (writecache_wait_for_writeback(wc))
198462306a36Sopenharmony_ci			goto restart;
198562306a36Sopenharmony_ci	}
198662306a36Sopenharmony_ci
198762306a36Sopenharmony_ci	if (wc->overwrote_committed)
198862306a36Sopenharmony_ci		writecache_wait_for_ios(wc, WRITE);
198962306a36Sopenharmony_ci
199062306a36Sopenharmony_ci	n_walked = 0;
199162306a36Sopenharmony_ci	INIT_LIST_HEAD(&skipped);
199262306a36Sopenharmony_ci	INIT_LIST_HEAD(&wbl.list);
199362306a36Sopenharmony_ci	wbl.size = 0;
199462306a36Sopenharmony_ci	while (!list_empty(&wc->lru) &&
199562306a36Sopenharmony_ci	       (wc->writeback_all ||
199662306a36Sopenharmony_ci		wc->freelist_size + wc->writeback_size <= wc->freelist_low_watermark ||
199762306a36Sopenharmony_ci		(jiffies - container_of(wc->lru.prev, struct wc_entry, lru)->age >=
199862306a36Sopenharmony_ci		 wc->max_age - wc->max_age / MAX_AGE_DIV))) {
199962306a36Sopenharmony_ci
200062306a36Sopenharmony_ci		n_walked++;
200162306a36Sopenharmony_ci		if (unlikely(n_walked > WRITEBACK_LATENCY) &&
200262306a36Sopenharmony_ci		    likely(!wc->writeback_all)) {
200362306a36Sopenharmony_ci			if (likely(!dm_suspended(wc->ti)))
200462306a36Sopenharmony_ci				queue_work(wc->writeback_wq, &wc->writeback_work);
200562306a36Sopenharmony_ci			break;
200662306a36Sopenharmony_ci		}
200762306a36Sopenharmony_ci
200862306a36Sopenharmony_ci		if (unlikely(wc->writeback_all)) {
200962306a36Sopenharmony_ci			if (unlikely(!e)) {
201062306a36Sopenharmony_ci				writecache_flush(wc);
201162306a36Sopenharmony_ci				e = container_of(rb_first(&wc->tree), struct wc_entry, rb_node);
201262306a36Sopenharmony_ci			} else
201362306a36Sopenharmony_ci				e = g;
201462306a36Sopenharmony_ci		} else
201562306a36Sopenharmony_ci			e = container_of(wc->lru.prev, struct wc_entry, lru);
201662306a36Sopenharmony_ci		BUG_ON(e->write_in_progress);
201762306a36Sopenharmony_ci		if (unlikely(!writecache_entry_is_committed(wc, e)))
201862306a36Sopenharmony_ci			writecache_flush(wc);
201962306a36Sopenharmony_ci
202062306a36Sopenharmony_ci		node = rb_prev(&e->rb_node);
202162306a36Sopenharmony_ci		if (node) {
202262306a36Sopenharmony_ci			f = container_of(node, struct wc_entry, rb_node);
202362306a36Sopenharmony_ci			if (unlikely(read_original_sector(wc, f) ==
202462306a36Sopenharmony_ci				     read_original_sector(wc, e))) {
202562306a36Sopenharmony_ci				BUG_ON(!f->write_in_progress);
202662306a36Sopenharmony_ci				list_move(&e->lru, &skipped);
202762306a36Sopenharmony_ci				cond_resched();
202862306a36Sopenharmony_ci				continue;
202962306a36Sopenharmony_ci			}
203062306a36Sopenharmony_ci		}
203162306a36Sopenharmony_ci		wc->writeback_size++;
203262306a36Sopenharmony_ci		list_move(&e->lru, &wbl.list);
203362306a36Sopenharmony_ci		wbl.size++;
203462306a36Sopenharmony_ci		e->write_in_progress = true;
203562306a36Sopenharmony_ci		e->wc_list_contiguous = 1;
203662306a36Sopenharmony_ci
203762306a36Sopenharmony_ci		f = e;
203862306a36Sopenharmony_ci
203962306a36Sopenharmony_ci		while (1) {
204062306a36Sopenharmony_ci			next_node = rb_next(&f->rb_node);
204162306a36Sopenharmony_ci			if (unlikely(!next_node))
204262306a36Sopenharmony_ci				break;
204362306a36Sopenharmony_ci			g = container_of(next_node, struct wc_entry, rb_node);
204462306a36Sopenharmony_ci			if (unlikely(read_original_sector(wc, g) ==
204562306a36Sopenharmony_ci			    read_original_sector(wc, f))) {
204662306a36Sopenharmony_ci				f = g;
204762306a36Sopenharmony_ci				continue;
204862306a36Sopenharmony_ci			}
204962306a36Sopenharmony_ci			if (read_original_sector(wc, g) !=
205062306a36Sopenharmony_ci			    read_original_sector(wc, f) + (wc->block_size >> SECTOR_SHIFT))
205162306a36Sopenharmony_ci				break;
205262306a36Sopenharmony_ci			if (unlikely(g->write_in_progress))
205362306a36Sopenharmony_ci				break;
205462306a36Sopenharmony_ci			if (unlikely(!writecache_entry_is_committed(wc, g)))
205562306a36Sopenharmony_ci				break;
205662306a36Sopenharmony_ci
205762306a36Sopenharmony_ci			if (!WC_MODE_PMEM(wc)) {
205862306a36Sopenharmony_ci				if (g != f + 1)
205962306a36Sopenharmony_ci					break;
206062306a36Sopenharmony_ci			}
206162306a36Sopenharmony_ci
206262306a36Sopenharmony_ci			n_walked++;
206362306a36Sopenharmony_ci			//if (unlikely(n_walked > WRITEBACK_LATENCY) && likely(!wc->writeback_all))
206462306a36Sopenharmony_ci			//	break;
206562306a36Sopenharmony_ci
206662306a36Sopenharmony_ci			wc->writeback_size++;
206762306a36Sopenharmony_ci			list_move(&g->lru, &wbl.list);
206862306a36Sopenharmony_ci			wbl.size++;
206962306a36Sopenharmony_ci			g->write_in_progress = true;
207062306a36Sopenharmony_ci			g->wc_list_contiguous = BIO_MAX_VECS;
207162306a36Sopenharmony_ci			f = g;
207262306a36Sopenharmony_ci			e->wc_list_contiguous++;
207362306a36Sopenharmony_ci			if (unlikely(e->wc_list_contiguous == BIO_MAX_VECS)) {
207462306a36Sopenharmony_ci				if (unlikely(wc->writeback_all)) {
207562306a36Sopenharmony_ci					next_node = rb_next(&f->rb_node);
207662306a36Sopenharmony_ci					if (likely(next_node))
207762306a36Sopenharmony_ci						g = container_of(next_node, struct wc_entry, rb_node);
207862306a36Sopenharmony_ci				}
207962306a36Sopenharmony_ci				break;
208062306a36Sopenharmony_ci			}
208162306a36Sopenharmony_ci		}
208262306a36Sopenharmony_ci		cond_resched();
208362306a36Sopenharmony_ci	}
208462306a36Sopenharmony_ci
208562306a36Sopenharmony_ci	if (!list_empty(&skipped)) {
208662306a36Sopenharmony_ci		list_splice_tail(&skipped, &wc->lru);
208762306a36Sopenharmony_ci		/*
208862306a36Sopenharmony_ci		 * If we didn't do any progress, we must wait until some
208962306a36Sopenharmony_ci		 * writeback finishes to avoid burning CPU in a loop
209062306a36Sopenharmony_ci		 */
209162306a36Sopenharmony_ci		if (unlikely(!wbl.size))
209262306a36Sopenharmony_ci			writecache_wait_for_writeback(wc);
209362306a36Sopenharmony_ci	}
209462306a36Sopenharmony_ci
209562306a36Sopenharmony_ci	wc_unlock(wc);
209662306a36Sopenharmony_ci
209762306a36Sopenharmony_ci	blk_start_plug(&plug);
209862306a36Sopenharmony_ci
209962306a36Sopenharmony_ci	if (WC_MODE_PMEM(wc))
210062306a36Sopenharmony_ci		__writecache_writeback_pmem(wc, &wbl);
210162306a36Sopenharmony_ci	else
210262306a36Sopenharmony_ci		__writecache_writeback_ssd(wc, &wbl);
210362306a36Sopenharmony_ci
210462306a36Sopenharmony_ci	blk_finish_plug(&plug);
210562306a36Sopenharmony_ci
210662306a36Sopenharmony_ci	if (unlikely(wc->writeback_all)) {
210762306a36Sopenharmony_ci		wc_lock(wc);
210862306a36Sopenharmony_ci		while (writecache_wait_for_writeback(wc))
210962306a36Sopenharmony_ci			;
211062306a36Sopenharmony_ci		wc_unlock(wc);
211162306a36Sopenharmony_ci	}
211262306a36Sopenharmony_ci}
211362306a36Sopenharmony_ci
211462306a36Sopenharmony_cistatic int calculate_memory_size(uint64_t device_size, unsigned int block_size,
211562306a36Sopenharmony_ci				 size_t *n_blocks_p, size_t *n_metadata_blocks_p)
211662306a36Sopenharmony_ci{
211762306a36Sopenharmony_ci	uint64_t n_blocks, offset;
211862306a36Sopenharmony_ci	struct wc_entry e;
211962306a36Sopenharmony_ci
212062306a36Sopenharmony_ci	n_blocks = device_size;
212162306a36Sopenharmony_ci	do_div(n_blocks, block_size + sizeof(struct wc_memory_entry));
212262306a36Sopenharmony_ci
212362306a36Sopenharmony_ci	while (1) {
212462306a36Sopenharmony_ci		if (!n_blocks)
212562306a36Sopenharmony_ci			return -ENOSPC;
212662306a36Sopenharmony_ci		/* Verify the following entries[n_blocks] won't overflow */
212762306a36Sopenharmony_ci		if (n_blocks >= ((size_t)-sizeof(struct wc_memory_superblock) /
212862306a36Sopenharmony_ci				 sizeof(struct wc_memory_entry)))
212962306a36Sopenharmony_ci			return -EFBIG;
213062306a36Sopenharmony_ci		offset = offsetof(struct wc_memory_superblock, entries[n_blocks]);
213162306a36Sopenharmony_ci		offset = (offset + block_size - 1) & ~(uint64_t)(block_size - 1);
213262306a36Sopenharmony_ci		if (offset + n_blocks * block_size <= device_size)
213362306a36Sopenharmony_ci			break;
213462306a36Sopenharmony_ci		n_blocks--;
213562306a36Sopenharmony_ci	}
213662306a36Sopenharmony_ci
213762306a36Sopenharmony_ci	/* check if the bit field overflows */
213862306a36Sopenharmony_ci	e.index = n_blocks;
213962306a36Sopenharmony_ci	if (e.index != n_blocks)
214062306a36Sopenharmony_ci		return -EFBIG;
214162306a36Sopenharmony_ci
214262306a36Sopenharmony_ci	if (n_blocks_p)
214362306a36Sopenharmony_ci		*n_blocks_p = n_blocks;
214462306a36Sopenharmony_ci	if (n_metadata_blocks_p)
214562306a36Sopenharmony_ci		*n_metadata_blocks_p = offset >> __ffs(block_size);
214662306a36Sopenharmony_ci	return 0;
214762306a36Sopenharmony_ci}
214862306a36Sopenharmony_ci
214962306a36Sopenharmony_cistatic int init_memory(struct dm_writecache *wc)
215062306a36Sopenharmony_ci{
215162306a36Sopenharmony_ci	size_t b;
215262306a36Sopenharmony_ci	int r;
215362306a36Sopenharmony_ci
215462306a36Sopenharmony_ci	r = calculate_memory_size(wc->memory_map_size, wc->block_size, &wc->n_blocks, NULL);
215562306a36Sopenharmony_ci	if (r)
215662306a36Sopenharmony_ci		return r;
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_ci	r = writecache_alloc_entries(wc);
215962306a36Sopenharmony_ci	if (r)
216062306a36Sopenharmony_ci		return r;
216162306a36Sopenharmony_ci
216262306a36Sopenharmony_ci	for (b = 0; b < ARRAY_SIZE(sb(wc)->padding); b++)
216362306a36Sopenharmony_ci		pmem_assign(sb(wc)->padding[b], cpu_to_le64(0));
216462306a36Sopenharmony_ci	pmem_assign(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION));
216562306a36Sopenharmony_ci	pmem_assign(sb(wc)->block_size, cpu_to_le32(wc->block_size));
216662306a36Sopenharmony_ci	pmem_assign(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks));
216762306a36Sopenharmony_ci	pmem_assign(sb(wc)->seq_count, cpu_to_le64(0));
216862306a36Sopenharmony_ci
216962306a36Sopenharmony_ci	for (b = 0; b < wc->n_blocks; b++) {
217062306a36Sopenharmony_ci		write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);
217162306a36Sopenharmony_ci		cond_resched();
217262306a36Sopenharmony_ci	}
217362306a36Sopenharmony_ci
217462306a36Sopenharmony_ci	writecache_flush_all_metadata(wc);
217562306a36Sopenharmony_ci	writecache_commit_flushed(wc, false);
217662306a36Sopenharmony_ci	pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
217762306a36Sopenharmony_ci	writecache_flush_region(wc, &sb(wc)->magic, sizeof(sb(wc)->magic));
217862306a36Sopenharmony_ci	writecache_commit_flushed(wc, false);
217962306a36Sopenharmony_ci
218062306a36Sopenharmony_ci	return 0;
218162306a36Sopenharmony_ci}
218262306a36Sopenharmony_ci
218362306a36Sopenharmony_cistatic void writecache_dtr(struct dm_target *ti)
218462306a36Sopenharmony_ci{
218562306a36Sopenharmony_ci	struct dm_writecache *wc = ti->private;
218662306a36Sopenharmony_ci
218762306a36Sopenharmony_ci	if (!wc)
218862306a36Sopenharmony_ci		return;
218962306a36Sopenharmony_ci
219062306a36Sopenharmony_ci	if (wc->endio_thread)
219162306a36Sopenharmony_ci		kthread_stop(wc->endio_thread);
219262306a36Sopenharmony_ci
219362306a36Sopenharmony_ci	if (wc->flush_thread)
219462306a36Sopenharmony_ci		kthread_stop(wc->flush_thread);
219562306a36Sopenharmony_ci
219662306a36Sopenharmony_ci	bioset_exit(&wc->bio_set);
219762306a36Sopenharmony_ci
219862306a36Sopenharmony_ci	mempool_exit(&wc->copy_pool);
219962306a36Sopenharmony_ci
220062306a36Sopenharmony_ci	if (wc->writeback_wq)
220162306a36Sopenharmony_ci		destroy_workqueue(wc->writeback_wq);
220262306a36Sopenharmony_ci
220362306a36Sopenharmony_ci	if (wc->dev)
220462306a36Sopenharmony_ci		dm_put_device(ti, wc->dev);
220562306a36Sopenharmony_ci
220662306a36Sopenharmony_ci	if (wc->ssd_dev)
220762306a36Sopenharmony_ci		dm_put_device(ti, wc->ssd_dev);
220862306a36Sopenharmony_ci
220962306a36Sopenharmony_ci	vfree(wc->entries);
221062306a36Sopenharmony_ci
221162306a36Sopenharmony_ci	if (wc->memory_map) {
221262306a36Sopenharmony_ci		if (WC_MODE_PMEM(wc))
221362306a36Sopenharmony_ci			persistent_memory_release(wc);
221462306a36Sopenharmony_ci		else
221562306a36Sopenharmony_ci			vfree(wc->memory_map);
221662306a36Sopenharmony_ci	}
221762306a36Sopenharmony_ci
221862306a36Sopenharmony_ci	if (wc->dm_kcopyd)
221962306a36Sopenharmony_ci		dm_kcopyd_client_destroy(wc->dm_kcopyd);
222062306a36Sopenharmony_ci
222162306a36Sopenharmony_ci	if (wc->dm_io)
222262306a36Sopenharmony_ci		dm_io_client_destroy(wc->dm_io);
222362306a36Sopenharmony_ci
222462306a36Sopenharmony_ci	vfree(wc->dirty_bitmap);
222562306a36Sopenharmony_ci
222662306a36Sopenharmony_ci	kfree(wc);
222762306a36Sopenharmony_ci}
222862306a36Sopenharmony_ci
222962306a36Sopenharmony_cistatic int writecache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
223062306a36Sopenharmony_ci{
223162306a36Sopenharmony_ci	struct dm_writecache *wc;
223262306a36Sopenharmony_ci	struct dm_arg_set as;
223362306a36Sopenharmony_ci	const char *string;
223462306a36Sopenharmony_ci	unsigned int opt_params;
223562306a36Sopenharmony_ci	size_t offset, data_size;
223662306a36Sopenharmony_ci	int i, r;
223762306a36Sopenharmony_ci	char dummy;
223862306a36Sopenharmony_ci	int high_wm_percent = HIGH_WATERMARK;
223962306a36Sopenharmony_ci	int low_wm_percent = LOW_WATERMARK;
224062306a36Sopenharmony_ci	uint64_t x;
224162306a36Sopenharmony_ci	struct wc_memory_superblock s;
224262306a36Sopenharmony_ci
224362306a36Sopenharmony_ci	static struct dm_arg _args[] = {
224462306a36Sopenharmony_ci		{0, 18, "Invalid number of feature args"},
224562306a36Sopenharmony_ci	};
224662306a36Sopenharmony_ci
224762306a36Sopenharmony_ci	as.argc = argc;
224862306a36Sopenharmony_ci	as.argv = argv;
224962306a36Sopenharmony_ci
225062306a36Sopenharmony_ci	wc = kzalloc(sizeof(struct dm_writecache), GFP_KERNEL);
225162306a36Sopenharmony_ci	if (!wc) {
225262306a36Sopenharmony_ci		ti->error = "Cannot allocate writecache structure";
225362306a36Sopenharmony_ci		r = -ENOMEM;
225462306a36Sopenharmony_ci		goto bad;
225562306a36Sopenharmony_ci	}
225662306a36Sopenharmony_ci	ti->private = wc;
225762306a36Sopenharmony_ci	wc->ti = ti;
225862306a36Sopenharmony_ci
225962306a36Sopenharmony_ci	mutex_init(&wc->lock);
226062306a36Sopenharmony_ci	wc->max_age = MAX_AGE_UNSPECIFIED;
226162306a36Sopenharmony_ci	writecache_poison_lists(wc);
226262306a36Sopenharmony_ci	init_waitqueue_head(&wc->freelist_wait);
226362306a36Sopenharmony_ci	timer_setup(&wc->autocommit_timer, writecache_autocommit_timer, 0);
226462306a36Sopenharmony_ci	timer_setup(&wc->max_age_timer, writecache_max_age_timer, 0);
226562306a36Sopenharmony_ci
226662306a36Sopenharmony_ci	for (i = 0; i < 2; i++) {
226762306a36Sopenharmony_ci		atomic_set(&wc->bio_in_progress[i], 0);
226862306a36Sopenharmony_ci		init_waitqueue_head(&wc->bio_in_progress_wait[i]);
226962306a36Sopenharmony_ci	}
227062306a36Sopenharmony_ci
227162306a36Sopenharmony_ci	wc->dm_io = dm_io_client_create();
227262306a36Sopenharmony_ci	if (IS_ERR(wc->dm_io)) {
227362306a36Sopenharmony_ci		r = PTR_ERR(wc->dm_io);
227462306a36Sopenharmony_ci		ti->error = "Unable to allocate dm-io client";
227562306a36Sopenharmony_ci		wc->dm_io = NULL;
227662306a36Sopenharmony_ci		goto bad;
227762306a36Sopenharmony_ci	}
227862306a36Sopenharmony_ci
227962306a36Sopenharmony_ci	wc->writeback_wq = alloc_workqueue("writecache-writeback", WQ_MEM_RECLAIM, 1);
228062306a36Sopenharmony_ci	if (!wc->writeback_wq) {
228162306a36Sopenharmony_ci		r = -ENOMEM;
228262306a36Sopenharmony_ci		ti->error = "Could not allocate writeback workqueue";
228362306a36Sopenharmony_ci		goto bad;
228462306a36Sopenharmony_ci	}
228562306a36Sopenharmony_ci	INIT_WORK(&wc->writeback_work, writecache_writeback);
228662306a36Sopenharmony_ci	INIT_WORK(&wc->flush_work, writecache_flush_work);
228762306a36Sopenharmony_ci
228862306a36Sopenharmony_ci	dm_iot_init(&wc->iot);
228962306a36Sopenharmony_ci
229062306a36Sopenharmony_ci	raw_spin_lock_init(&wc->endio_list_lock);
229162306a36Sopenharmony_ci	INIT_LIST_HEAD(&wc->endio_list);
229262306a36Sopenharmony_ci	wc->endio_thread = kthread_run(writecache_endio_thread, wc, "writecache_endio");
229362306a36Sopenharmony_ci	if (IS_ERR(wc->endio_thread)) {
229462306a36Sopenharmony_ci		r = PTR_ERR(wc->endio_thread);
229562306a36Sopenharmony_ci		wc->endio_thread = NULL;
229662306a36Sopenharmony_ci		ti->error = "Couldn't spawn endio thread";
229762306a36Sopenharmony_ci		goto bad;
229862306a36Sopenharmony_ci	}
229962306a36Sopenharmony_ci
230062306a36Sopenharmony_ci	/*
230162306a36Sopenharmony_ci	 * Parse the mode (pmem or ssd)
230262306a36Sopenharmony_ci	 */
230362306a36Sopenharmony_ci	string = dm_shift_arg(&as);
230462306a36Sopenharmony_ci	if (!string)
230562306a36Sopenharmony_ci		goto bad_arguments;
230662306a36Sopenharmony_ci
230762306a36Sopenharmony_ci	if (!strcasecmp(string, "s")) {
230862306a36Sopenharmony_ci		wc->pmem_mode = false;
230962306a36Sopenharmony_ci	} else if (!strcasecmp(string, "p")) {
231062306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HAS_PMEM
231162306a36Sopenharmony_ci		wc->pmem_mode = true;
231262306a36Sopenharmony_ci		wc->writeback_fua = true;
231362306a36Sopenharmony_ci#else
231462306a36Sopenharmony_ci		/*
231562306a36Sopenharmony_ci		 * If the architecture doesn't support persistent memory or
231662306a36Sopenharmony_ci		 * the kernel doesn't support any DAX drivers, this driver can
231762306a36Sopenharmony_ci		 * only be used in SSD-only mode.
231862306a36Sopenharmony_ci		 */
231962306a36Sopenharmony_ci		r = -EOPNOTSUPP;
232062306a36Sopenharmony_ci		ti->error = "Persistent memory or DAX not supported on this system";
232162306a36Sopenharmony_ci		goto bad;
232262306a36Sopenharmony_ci#endif
232362306a36Sopenharmony_ci	} else {
232462306a36Sopenharmony_ci		goto bad_arguments;
232562306a36Sopenharmony_ci	}
232662306a36Sopenharmony_ci
232762306a36Sopenharmony_ci	if (WC_MODE_PMEM(wc)) {
232862306a36Sopenharmony_ci		r = bioset_init(&wc->bio_set, BIO_POOL_SIZE,
232962306a36Sopenharmony_ci				offsetof(struct writeback_struct, bio),
233062306a36Sopenharmony_ci				BIOSET_NEED_BVECS);
233162306a36Sopenharmony_ci		if (r) {
233262306a36Sopenharmony_ci			ti->error = "Could not allocate bio set";
233362306a36Sopenharmony_ci			goto bad;
233462306a36Sopenharmony_ci		}
233562306a36Sopenharmony_ci	} else {
233662306a36Sopenharmony_ci		wc->pause = PAUSE_WRITEBACK;
233762306a36Sopenharmony_ci		r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct));
233862306a36Sopenharmony_ci		if (r) {
233962306a36Sopenharmony_ci			ti->error = "Could not allocate mempool";
234062306a36Sopenharmony_ci			goto bad;
234162306a36Sopenharmony_ci		}
234262306a36Sopenharmony_ci	}
234362306a36Sopenharmony_ci
234462306a36Sopenharmony_ci	/*
234562306a36Sopenharmony_ci	 * Parse the origin data device
234662306a36Sopenharmony_ci	 */
234762306a36Sopenharmony_ci	string = dm_shift_arg(&as);
234862306a36Sopenharmony_ci	if (!string)
234962306a36Sopenharmony_ci		goto bad_arguments;
235062306a36Sopenharmony_ci	r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->dev);
235162306a36Sopenharmony_ci	if (r) {
235262306a36Sopenharmony_ci		ti->error = "Origin data device lookup failed";
235362306a36Sopenharmony_ci		goto bad;
235462306a36Sopenharmony_ci	}
235562306a36Sopenharmony_ci
235662306a36Sopenharmony_ci	/*
235762306a36Sopenharmony_ci	 * Parse cache data device (be it pmem or ssd)
235862306a36Sopenharmony_ci	 */
235962306a36Sopenharmony_ci	string = dm_shift_arg(&as);
236062306a36Sopenharmony_ci	if (!string)
236162306a36Sopenharmony_ci		goto bad_arguments;
236262306a36Sopenharmony_ci
236362306a36Sopenharmony_ci	r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->ssd_dev);
236462306a36Sopenharmony_ci	if (r) {
236562306a36Sopenharmony_ci		ti->error = "Cache data device lookup failed";
236662306a36Sopenharmony_ci		goto bad;
236762306a36Sopenharmony_ci	}
236862306a36Sopenharmony_ci	wc->memory_map_size = bdev_nr_bytes(wc->ssd_dev->bdev);
236962306a36Sopenharmony_ci
237062306a36Sopenharmony_ci	/*
237162306a36Sopenharmony_ci	 * Parse the cache block size
237262306a36Sopenharmony_ci	 */
237362306a36Sopenharmony_ci	string = dm_shift_arg(&as);
237462306a36Sopenharmony_ci	if (!string)
237562306a36Sopenharmony_ci		goto bad_arguments;
237662306a36Sopenharmony_ci	if (sscanf(string, "%u%c", &wc->block_size, &dummy) != 1 ||
237762306a36Sopenharmony_ci	    wc->block_size < 512 || wc->block_size > PAGE_SIZE ||
237862306a36Sopenharmony_ci	    (wc->block_size & (wc->block_size - 1))) {
237962306a36Sopenharmony_ci		r = -EINVAL;
238062306a36Sopenharmony_ci		ti->error = "Invalid block size";
238162306a36Sopenharmony_ci		goto bad;
238262306a36Sopenharmony_ci	}
238362306a36Sopenharmony_ci	if (wc->block_size < bdev_logical_block_size(wc->dev->bdev) ||
238462306a36Sopenharmony_ci	    wc->block_size < bdev_logical_block_size(wc->ssd_dev->bdev)) {
238562306a36Sopenharmony_ci		r = -EINVAL;
238662306a36Sopenharmony_ci		ti->error = "Block size is smaller than device logical block size";
238762306a36Sopenharmony_ci		goto bad;
238862306a36Sopenharmony_ci	}
238962306a36Sopenharmony_ci	wc->block_size_bits = __ffs(wc->block_size);
239062306a36Sopenharmony_ci
239162306a36Sopenharmony_ci	wc->max_writeback_jobs = MAX_WRITEBACK_JOBS;
239262306a36Sopenharmony_ci	wc->autocommit_blocks = !WC_MODE_PMEM(wc) ? AUTOCOMMIT_BLOCKS_SSD : AUTOCOMMIT_BLOCKS_PMEM;
239362306a36Sopenharmony_ci	wc->autocommit_jiffies = msecs_to_jiffies(AUTOCOMMIT_MSEC);
239462306a36Sopenharmony_ci
239562306a36Sopenharmony_ci	/*
239662306a36Sopenharmony_ci	 * Parse optional arguments
239762306a36Sopenharmony_ci	 */
239862306a36Sopenharmony_ci	r = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
239962306a36Sopenharmony_ci	if (r)
240062306a36Sopenharmony_ci		goto bad;
240162306a36Sopenharmony_ci
240262306a36Sopenharmony_ci	while (opt_params) {
240362306a36Sopenharmony_ci		string = dm_shift_arg(&as), opt_params--;
240462306a36Sopenharmony_ci		if (!strcasecmp(string, "start_sector") && opt_params >= 1) {
240562306a36Sopenharmony_ci			unsigned long long start_sector;
240662306a36Sopenharmony_ci
240762306a36Sopenharmony_ci			string = dm_shift_arg(&as), opt_params--;
240862306a36Sopenharmony_ci			if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1)
240962306a36Sopenharmony_ci				goto invalid_optional;
241062306a36Sopenharmony_ci			wc->start_sector = start_sector;
241162306a36Sopenharmony_ci			wc->start_sector_set = true;
241262306a36Sopenharmony_ci			if (wc->start_sector != start_sector ||
241362306a36Sopenharmony_ci			    wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT)
241462306a36Sopenharmony_ci				goto invalid_optional;
241562306a36Sopenharmony_ci		} else if (!strcasecmp(string, "high_watermark") && opt_params >= 1) {
241662306a36Sopenharmony_ci			string = dm_shift_arg(&as), opt_params--;
241762306a36Sopenharmony_ci			if (sscanf(string, "%d%c", &high_wm_percent, &dummy) != 1)
241862306a36Sopenharmony_ci				goto invalid_optional;
241962306a36Sopenharmony_ci			if (high_wm_percent < 0 || high_wm_percent > 100)
242062306a36Sopenharmony_ci				goto invalid_optional;
242162306a36Sopenharmony_ci			wc->high_wm_percent_value = high_wm_percent;
242262306a36Sopenharmony_ci			wc->high_wm_percent_set = true;
242362306a36Sopenharmony_ci		} else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) {
242462306a36Sopenharmony_ci			string = dm_shift_arg(&as), opt_params--;
242562306a36Sopenharmony_ci			if (sscanf(string, "%d%c", &low_wm_percent, &dummy) != 1)
242662306a36Sopenharmony_ci				goto invalid_optional;
242762306a36Sopenharmony_ci			if (low_wm_percent < 0 || low_wm_percent > 100)
242862306a36Sopenharmony_ci				goto invalid_optional;
242962306a36Sopenharmony_ci			wc->low_wm_percent_value = low_wm_percent;
243062306a36Sopenharmony_ci			wc->low_wm_percent_set = true;
243162306a36Sopenharmony_ci		} else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) {
243262306a36Sopenharmony_ci			string = dm_shift_arg(&as), opt_params--;
243362306a36Sopenharmony_ci			if (sscanf(string, "%u%c", &wc->max_writeback_jobs, &dummy) != 1)
243462306a36Sopenharmony_ci				goto invalid_optional;
243562306a36Sopenharmony_ci			wc->max_writeback_jobs_set = true;
243662306a36Sopenharmony_ci		} else if (!strcasecmp(string, "autocommit_blocks") && opt_params >= 1) {
243762306a36Sopenharmony_ci			string = dm_shift_arg(&as), opt_params--;
243862306a36Sopenharmony_ci			if (sscanf(string, "%u%c", &wc->autocommit_blocks, &dummy) != 1)
243962306a36Sopenharmony_ci				goto invalid_optional;
244062306a36Sopenharmony_ci			wc->autocommit_blocks_set = true;
244162306a36Sopenharmony_ci		} else if (!strcasecmp(string, "autocommit_time") && opt_params >= 1) {
244262306a36Sopenharmony_ci			unsigned int autocommit_msecs;
244362306a36Sopenharmony_ci
244462306a36Sopenharmony_ci			string = dm_shift_arg(&as), opt_params--;
244562306a36Sopenharmony_ci			if (sscanf(string, "%u%c", &autocommit_msecs, &dummy) != 1)
244662306a36Sopenharmony_ci				goto invalid_optional;
244762306a36Sopenharmony_ci			if (autocommit_msecs > 3600000)
244862306a36Sopenharmony_ci				goto invalid_optional;
244962306a36Sopenharmony_ci			wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs);
245062306a36Sopenharmony_ci			wc->autocommit_time_value = autocommit_msecs;
245162306a36Sopenharmony_ci			wc->autocommit_time_set = true;
245262306a36Sopenharmony_ci		} else if (!strcasecmp(string, "max_age") && opt_params >= 1) {
245362306a36Sopenharmony_ci			unsigned int max_age_msecs;
245462306a36Sopenharmony_ci
245562306a36Sopenharmony_ci			string = dm_shift_arg(&as), opt_params--;
245662306a36Sopenharmony_ci			if (sscanf(string, "%u%c", &max_age_msecs, &dummy) != 1)
245762306a36Sopenharmony_ci				goto invalid_optional;
245862306a36Sopenharmony_ci			if (max_age_msecs > 86400000)
245962306a36Sopenharmony_ci				goto invalid_optional;
246062306a36Sopenharmony_ci			wc->max_age = msecs_to_jiffies(max_age_msecs);
246162306a36Sopenharmony_ci			wc->max_age_set = true;
246262306a36Sopenharmony_ci			wc->max_age_value = max_age_msecs;
246362306a36Sopenharmony_ci		} else if (!strcasecmp(string, "cleaner")) {
246462306a36Sopenharmony_ci			wc->cleaner_set = true;
246562306a36Sopenharmony_ci			wc->cleaner = true;
246662306a36Sopenharmony_ci		} else if (!strcasecmp(string, "fua")) {
246762306a36Sopenharmony_ci			if (WC_MODE_PMEM(wc)) {
246862306a36Sopenharmony_ci				wc->writeback_fua = true;
246962306a36Sopenharmony_ci				wc->writeback_fua_set = true;
247062306a36Sopenharmony_ci			} else
247162306a36Sopenharmony_ci				goto invalid_optional;
247262306a36Sopenharmony_ci		} else if (!strcasecmp(string, "nofua")) {
247362306a36Sopenharmony_ci			if (WC_MODE_PMEM(wc)) {
247462306a36Sopenharmony_ci				wc->writeback_fua = false;
247562306a36Sopenharmony_ci				wc->writeback_fua_set = true;
247662306a36Sopenharmony_ci			} else
247762306a36Sopenharmony_ci				goto invalid_optional;
247862306a36Sopenharmony_ci		} else if (!strcasecmp(string, "metadata_only")) {
247962306a36Sopenharmony_ci			wc->metadata_only = true;
248062306a36Sopenharmony_ci		} else if (!strcasecmp(string, "pause_writeback") && opt_params >= 1) {
248162306a36Sopenharmony_ci			unsigned int pause_msecs;
248262306a36Sopenharmony_ci
248362306a36Sopenharmony_ci			if (WC_MODE_PMEM(wc))
248462306a36Sopenharmony_ci				goto invalid_optional;
248562306a36Sopenharmony_ci			string = dm_shift_arg(&as), opt_params--;
248662306a36Sopenharmony_ci			if (sscanf(string, "%u%c", &pause_msecs, &dummy) != 1)
248762306a36Sopenharmony_ci				goto invalid_optional;
248862306a36Sopenharmony_ci			if (pause_msecs > 60000)
248962306a36Sopenharmony_ci				goto invalid_optional;
249062306a36Sopenharmony_ci			wc->pause = msecs_to_jiffies(pause_msecs);
249162306a36Sopenharmony_ci			wc->pause_set = true;
249262306a36Sopenharmony_ci			wc->pause_value = pause_msecs;
249362306a36Sopenharmony_ci		} else {
249462306a36Sopenharmony_ciinvalid_optional:
249562306a36Sopenharmony_ci			r = -EINVAL;
249662306a36Sopenharmony_ci			ti->error = "Invalid optional argument";
249762306a36Sopenharmony_ci			goto bad;
249862306a36Sopenharmony_ci		}
249962306a36Sopenharmony_ci	}
250062306a36Sopenharmony_ci
250162306a36Sopenharmony_ci	if (high_wm_percent < low_wm_percent) {
250262306a36Sopenharmony_ci		r = -EINVAL;
250362306a36Sopenharmony_ci		ti->error = "High watermark must be greater than or equal to low watermark";
250462306a36Sopenharmony_ci		goto bad;
250562306a36Sopenharmony_ci	}
250662306a36Sopenharmony_ci
250762306a36Sopenharmony_ci	if (WC_MODE_PMEM(wc)) {
250862306a36Sopenharmony_ci		if (!dax_synchronous(wc->ssd_dev->dax_dev)) {
250962306a36Sopenharmony_ci			r = -EOPNOTSUPP;
251062306a36Sopenharmony_ci			ti->error = "Asynchronous persistent memory not supported as pmem cache";
251162306a36Sopenharmony_ci			goto bad;
251262306a36Sopenharmony_ci		}
251362306a36Sopenharmony_ci
251462306a36Sopenharmony_ci		r = persistent_memory_claim(wc);
251562306a36Sopenharmony_ci		if (r) {
251662306a36Sopenharmony_ci			ti->error = "Unable to map persistent memory for cache";
251762306a36Sopenharmony_ci			goto bad;
251862306a36Sopenharmony_ci		}
251962306a36Sopenharmony_ci	} else {
252062306a36Sopenharmony_ci		size_t n_blocks, n_metadata_blocks;
252162306a36Sopenharmony_ci		uint64_t n_bitmap_bits;
252262306a36Sopenharmony_ci
252362306a36Sopenharmony_ci		wc->memory_map_size -= (uint64_t)wc->start_sector << SECTOR_SHIFT;
252462306a36Sopenharmony_ci
252562306a36Sopenharmony_ci		bio_list_init(&wc->flush_list);
252662306a36Sopenharmony_ci		wc->flush_thread = kthread_run(writecache_flush_thread, wc, "dm_writecache_flush");
252762306a36Sopenharmony_ci		if (IS_ERR(wc->flush_thread)) {
252862306a36Sopenharmony_ci			r = PTR_ERR(wc->flush_thread);
252962306a36Sopenharmony_ci			wc->flush_thread = NULL;
253062306a36Sopenharmony_ci			ti->error = "Couldn't spawn flush thread";
253162306a36Sopenharmony_ci			goto bad;
253262306a36Sopenharmony_ci		}
253362306a36Sopenharmony_ci
253462306a36Sopenharmony_ci		r = calculate_memory_size(wc->memory_map_size, wc->block_size,
253562306a36Sopenharmony_ci					  &n_blocks, &n_metadata_blocks);
253662306a36Sopenharmony_ci		if (r) {
253762306a36Sopenharmony_ci			ti->error = "Invalid device size";
253862306a36Sopenharmony_ci			goto bad;
253962306a36Sopenharmony_ci		}
254062306a36Sopenharmony_ci
254162306a36Sopenharmony_ci		n_bitmap_bits = (((uint64_t)n_metadata_blocks << wc->block_size_bits) +
254262306a36Sopenharmony_ci				 BITMAP_GRANULARITY - 1) / BITMAP_GRANULARITY;
254362306a36Sopenharmony_ci		/* this is limitation of test_bit functions */
254462306a36Sopenharmony_ci		if (n_bitmap_bits > 1U << 31) {
254562306a36Sopenharmony_ci			r = -EFBIG;
254662306a36Sopenharmony_ci			ti->error = "Invalid device size";
254762306a36Sopenharmony_ci			goto bad;
254862306a36Sopenharmony_ci		}
254962306a36Sopenharmony_ci
255062306a36Sopenharmony_ci		wc->memory_map = vmalloc(n_metadata_blocks << wc->block_size_bits);
255162306a36Sopenharmony_ci		if (!wc->memory_map) {
255262306a36Sopenharmony_ci			r = -ENOMEM;
255362306a36Sopenharmony_ci			ti->error = "Unable to allocate memory for metadata";
255462306a36Sopenharmony_ci			goto bad;
255562306a36Sopenharmony_ci		}
255662306a36Sopenharmony_ci
255762306a36Sopenharmony_ci		wc->dm_kcopyd = dm_kcopyd_client_create(&dm_kcopyd_throttle);
255862306a36Sopenharmony_ci		if (IS_ERR(wc->dm_kcopyd)) {
255962306a36Sopenharmony_ci			r = PTR_ERR(wc->dm_kcopyd);
256062306a36Sopenharmony_ci			ti->error = "Unable to allocate dm-kcopyd client";
256162306a36Sopenharmony_ci			wc->dm_kcopyd = NULL;
256262306a36Sopenharmony_ci			goto bad;
256362306a36Sopenharmony_ci		}
256462306a36Sopenharmony_ci
256562306a36Sopenharmony_ci		wc->metadata_sectors = n_metadata_blocks << (wc->block_size_bits - SECTOR_SHIFT);
256662306a36Sopenharmony_ci		wc->dirty_bitmap_size = (n_bitmap_bits + BITS_PER_LONG - 1) /
256762306a36Sopenharmony_ci			BITS_PER_LONG * sizeof(unsigned long);
256862306a36Sopenharmony_ci		wc->dirty_bitmap = vzalloc(wc->dirty_bitmap_size);
256962306a36Sopenharmony_ci		if (!wc->dirty_bitmap) {
257062306a36Sopenharmony_ci			r = -ENOMEM;
257162306a36Sopenharmony_ci			ti->error = "Unable to allocate dirty bitmap";
257262306a36Sopenharmony_ci			goto bad;
257362306a36Sopenharmony_ci		}
257462306a36Sopenharmony_ci
257562306a36Sopenharmony_ci		r = writecache_read_metadata(wc, wc->block_size >> SECTOR_SHIFT);
257662306a36Sopenharmony_ci		if (r) {
257762306a36Sopenharmony_ci			ti->error = "Unable to read first block of metadata";
257862306a36Sopenharmony_ci			goto bad;
257962306a36Sopenharmony_ci		}
258062306a36Sopenharmony_ci	}
258162306a36Sopenharmony_ci
258262306a36Sopenharmony_ci	r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock));
258362306a36Sopenharmony_ci	if (r) {
258462306a36Sopenharmony_ci		ti->error = "Hardware memory error when reading superblock";
258562306a36Sopenharmony_ci		goto bad;
258662306a36Sopenharmony_ci	}
258762306a36Sopenharmony_ci	if (!le32_to_cpu(s.magic) && !le32_to_cpu(s.version)) {
258862306a36Sopenharmony_ci		r = init_memory(wc);
258962306a36Sopenharmony_ci		if (r) {
259062306a36Sopenharmony_ci			ti->error = "Unable to initialize device";
259162306a36Sopenharmony_ci			goto bad;
259262306a36Sopenharmony_ci		}
259362306a36Sopenharmony_ci		r = copy_mc_to_kernel(&s, sb(wc),
259462306a36Sopenharmony_ci				      sizeof(struct wc_memory_superblock));
259562306a36Sopenharmony_ci		if (r) {
259662306a36Sopenharmony_ci			ti->error = "Hardware memory error when reading superblock";
259762306a36Sopenharmony_ci			goto bad;
259862306a36Sopenharmony_ci		}
259962306a36Sopenharmony_ci	}
260062306a36Sopenharmony_ci
260162306a36Sopenharmony_ci	if (le32_to_cpu(s.magic) != MEMORY_SUPERBLOCK_MAGIC) {
260262306a36Sopenharmony_ci		ti->error = "Invalid magic in the superblock";
260362306a36Sopenharmony_ci		r = -EINVAL;
260462306a36Sopenharmony_ci		goto bad;
260562306a36Sopenharmony_ci	}
260662306a36Sopenharmony_ci
260762306a36Sopenharmony_ci	if (le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION) {
260862306a36Sopenharmony_ci		ti->error = "Invalid version in the superblock";
260962306a36Sopenharmony_ci		r = -EINVAL;
261062306a36Sopenharmony_ci		goto bad;
261162306a36Sopenharmony_ci	}
261262306a36Sopenharmony_ci
261362306a36Sopenharmony_ci	if (le32_to_cpu(s.block_size) != wc->block_size) {
261462306a36Sopenharmony_ci		ti->error = "Block size does not match superblock";
261562306a36Sopenharmony_ci		r = -EINVAL;
261662306a36Sopenharmony_ci		goto bad;
261762306a36Sopenharmony_ci	}
261862306a36Sopenharmony_ci
261962306a36Sopenharmony_ci	wc->n_blocks = le64_to_cpu(s.n_blocks);
262062306a36Sopenharmony_ci
262162306a36Sopenharmony_ci	offset = wc->n_blocks * sizeof(struct wc_memory_entry);
262262306a36Sopenharmony_ci	if (offset / sizeof(struct wc_memory_entry) != le64_to_cpu(sb(wc)->n_blocks)) {
262362306a36Sopenharmony_cioverflow:
262462306a36Sopenharmony_ci		ti->error = "Overflow in size calculation";
262562306a36Sopenharmony_ci		r = -EINVAL;
262662306a36Sopenharmony_ci		goto bad;
262762306a36Sopenharmony_ci	}
262862306a36Sopenharmony_ci	offset += sizeof(struct wc_memory_superblock);
262962306a36Sopenharmony_ci	if (offset < sizeof(struct wc_memory_superblock))
263062306a36Sopenharmony_ci		goto overflow;
263162306a36Sopenharmony_ci	offset = (offset + wc->block_size - 1) & ~(size_t)(wc->block_size - 1);
263262306a36Sopenharmony_ci	data_size = wc->n_blocks * (size_t)wc->block_size;
263362306a36Sopenharmony_ci	if (!offset || (data_size / wc->block_size != wc->n_blocks) ||
263462306a36Sopenharmony_ci	    (offset + data_size < offset))
263562306a36Sopenharmony_ci		goto overflow;
263662306a36Sopenharmony_ci	if (offset + data_size > wc->memory_map_size) {
263762306a36Sopenharmony_ci		ti->error = "Memory area is too small";
263862306a36Sopenharmony_ci		r = -EINVAL;
263962306a36Sopenharmony_ci		goto bad;
264062306a36Sopenharmony_ci	}
264162306a36Sopenharmony_ci
264262306a36Sopenharmony_ci	wc->metadata_sectors = offset >> SECTOR_SHIFT;
264362306a36Sopenharmony_ci	wc->block_start = (char *)sb(wc) + offset;
264462306a36Sopenharmony_ci
264562306a36Sopenharmony_ci	x = (uint64_t)wc->n_blocks * (100 - high_wm_percent);
264662306a36Sopenharmony_ci	x += 50;
264762306a36Sopenharmony_ci	do_div(x, 100);
264862306a36Sopenharmony_ci	wc->freelist_high_watermark = x;
264962306a36Sopenharmony_ci	x = (uint64_t)wc->n_blocks * (100 - low_wm_percent);
265062306a36Sopenharmony_ci	x += 50;
265162306a36Sopenharmony_ci	do_div(x, 100);
265262306a36Sopenharmony_ci	wc->freelist_low_watermark = x;
265362306a36Sopenharmony_ci
265462306a36Sopenharmony_ci	if (wc->cleaner)
265562306a36Sopenharmony_ci		activate_cleaner(wc);
265662306a36Sopenharmony_ci
265762306a36Sopenharmony_ci	r = writecache_alloc_entries(wc);
265862306a36Sopenharmony_ci	if (r) {
265962306a36Sopenharmony_ci		ti->error = "Cannot allocate memory";
266062306a36Sopenharmony_ci		goto bad;
266162306a36Sopenharmony_ci	}
266262306a36Sopenharmony_ci
266362306a36Sopenharmony_ci	ti->num_flush_bios = WC_MODE_PMEM(wc) ? 1 : 2;
266462306a36Sopenharmony_ci	ti->flush_supported = true;
266562306a36Sopenharmony_ci	ti->num_discard_bios = 1;
266662306a36Sopenharmony_ci
266762306a36Sopenharmony_ci	if (WC_MODE_PMEM(wc))
266862306a36Sopenharmony_ci		persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size);
266962306a36Sopenharmony_ci
267062306a36Sopenharmony_ci	return 0;
267162306a36Sopenharmony_ci
267262306a36Sopenharmony_cibad_arguments:
267362306a36Sopenharmony_ci	r = -EINVAL;
267462306a36Sopenharmony_ci	ti->error = "Bad arguments";
267562306a36Sopenharmony_cibad:
267662306a36Sopenharmony_ci	writecache_dtr(ti);
267762306a36Sopenharmony_ci	return r;
267862306a36Sopenharmony_ci}
267962306a36Sopenharmony_ci
268062306a36Sopenharmony_cistatic void writecache_status(struct dm_target *ti, status_type_t type,
268162306a36Sopenharmony_ci			      unsigned int status_flags, char *result, unsigned int maxlen)
268262306a36Sopenharmony_ci{
268362306a36Sopenharmony_ci	struct dm_writecache *wc = ti->private;
268462306a36Sopenharmony_ci	unsigned int extra_args;
268562306a36Sopenharmony_ci	unsigned int sz = 0;
268662306a36Sopenharmony_ci
268762306a36Sopenharmony_ci	switch (type) {
268862306a36Sopenharmony_ci	case STATUSTYPE_INFO:
268962306a36Sopenharmony_ci		DMEMIT("%ld %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu",
269062306a36Sopenharmony_ci		       writecache_has_error(wc),
269162306a36Sopenharmony_ci		       (unsigned long long)wc->n_blocks, (unsigned long long)wc->freelist_size,
269262306a36Sopenharmony_ci		       (unsigned long long)wc->writeback_size,
269362306a36Sopenharmony_ci		       wc->stats.reads,
269462306a36Sopenharmony_ci		       wc->stats.read_hits,
269562306a36Sopenharmony_ci		       wc->stats.writes,
269662306a36Sopenharmony_ci		       wc->stats.write_hits_uncommitted,
269762306a36Sopenharmony_ci		       wc->stats.write_hits_committed,
269862306a36Sopenharmony_ci		       wc->stats.writes_around,
269962306a36Sopenharmony_ci		       wc->stats.writes_allocate,
270062306a36Sopenharmony_ci		       wc->stats.writes_blocked_on_freelist,
270162306a36Sopenharmony_ci		       wc->stats.flushes,
270262306a36Sopenharmony_ci		       wc->stats.discards);
270362306a36Sopenharmony_ci		break;
270462306a36Sopenharmony_ci	case STATUSTYPE_TABLE:
270562306a36Sopenharmony_ci		DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's',
270662306a36Sopenharmony_ci				wc->dev->name, wc->ssd_dev->name, wc->block_size);
270762306a36Sopenharmony_ci		extra_args = 0;
270862306a36Sopenharmony_ci		if (wc->start_sector_set)
270962306a36Sopenharmony_ci			extra_args += 2;
271062306a36Sopenharmony_ci		if (wc->high_wm_percent_set)
271162306a36Sopenharmony_ci			extra_args += 2;
271262306a36Sopenharmony_ci		if (wc->low_wm_percent_set)
271362306a36Sopenharmony_ci			extra_args += 2;
271462306a36Sopenharmony_ci		if (wc->max_writeback_jobs_set)
271562306a36Sopenharmony_ci			extra_args += 2;
271662306a36Sopenharmony_ci		if (wc->autocommit_blocks_set)
271762306a36Sopenharmony_ci			extra_args += 2;
271862306a36Sopenharmony_ci		if (wc->autocommit_time_set)
271962306a36Sopenharmony_ci			extra_args += 2;
272062306a36Sopenharmony_ci		if (wc->max_age_set)
272162306a36Sopenharmony_ci			extra_args += 2;
272262306a36Sopenharmony_ci		if (wc->cleaner_set)
272362306a36Sopenharmony_ci			extra_args++;
272462306a36Sopenharmony_ci		if (wc->writeback_fua_set)
272562306a36Sopenharmony_ci			extra_args++;
272662306a36Sopenharmony_ci		if (wc->metadata_only)
272762306a36Sopenharmony_ci			extra_args++;
272862306a36Sopenharmony_ci		if (wc->pause_set)
272962306a36Sopenharmony_ci			extra_args += 2;
273062306a36Sopenharmony_ci
273162306a36Sopenharmony_ci		DMEMIT("%u", extra_args);
273262306a36Sopenharmony_ci		if (wc->start_sector_set)
273362306a36Sopenharmony_ci			DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector);
273462306a36Sopenharmony_ci		if (wc->high_wm_percent_set)
273562306a36Sopenharmony_ci			DMEMIT(" high_watermark %u", wc->high_wm_percent_value);
273662306a36Sopenharmony_ci		if (wc->low_wm_percent_set)
273762306a36Sopenharmony_ci			DMEMIT(" low_watermark %u", wc->low_wm_percent_value);
273862306a36Sopenharmony_ci		if (wc->max_writeback_jobs_set)
273962306a36Sopenharmony_ci			DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs);
274062306a36Sopenharmony_ci		if (wc->autocommit_blocks_set)
274162306a36Sopenharmony_ci			DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks);
274262306a36Sopenharmony_ci		if (wc->autocommit_time_set)
274362306a36Sopenharmony_ci			DMEMIT(" autocommit_time %u", wc->autocommit_time_value);
274462306a36Sopenharmony_ci		if (wc->max_age_set)
274562306a36Sopenharmony_ci			DMEMIT(" max_age %u", wc->max_age_value);
274662306a36Sopenharmony_ci		if (wc->cleaner_set)
274762306a36Sopenharmony_ci			DMEMIT(" cleaner");
274862306a36Sopenharmony_ci		if (wc->writeback_fua_set)
274962306a36Sopenharmony_ci			DMEMIT(" %sfua", wc->writeback_fua ? "" : "no");
275062306a36Sopenharmony_ci		if (wc->metadata_only)
275162306a36Sopenharmony_ci			DMEMIT(" metadata_only");
275262306a36Sopenharmony_ci		if (wc->pause_set)
275362306a36Sopenharmony_ci			DMEMIT(" pause_writeback %u", wc->pause_value);
275462306a36Sopenharmony_ci		break;
275562306a36Sopenharmony_ci	case STATUSTYPE_IMA:
275662306a36Sopenharmony_ci		*result = '\0';
275762306a36Sopenharmony_ci		break;
275862306a36Sopenharmony_ci	}
275962306a36Sopenharmony_ci}
276062306a36Sopenharmony_ci
276162306a36Sopenharmony_cistatic struct target_type writecache_target = {
276262306a36Sopenharmony_ci	.name			= "writecache",
276362306a36Sopenharmony_ci	.version		= {1, 6, 0},
276462306a36Sopenharmony_ci	.module			= THIS_MODULE,
276562306a36Sopenharmony_ci	.ctr			= writecache_ctr,
276662306a36Sopenharmony_ci	.dtr			= writecache_dtr,
276762306a36Sopenharmony_ci	.status			= writecache_status,
276862306a36Sopenharmony_ci	.postsuspend		= writecache_suspend,
276962306a36Sopenharmony_ci	.resume			= writecache_resume,
277062306a36Sopenharmony_ci	.message		= writecache_message,
277162306a36Sopenharmony_ci	.map			= writecache_map,
277262306a36Sopenharmony_ci	.end_io			= writecache_end_io,
277362306a36Sopenharmony_ci	.iterate_devices	= writecache_iterate_devices,
277462306a36Sopenharmony_ci	.io_hints		= writecache_io_hints,
277562306a36Sopenharmony_ci};
277662306a36Sopenharmony_cimodule_dm(writecache);
277762306a36Sopenharmony_ci
277862306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " writecache target");
277962306a36Sopenharmony_ciMODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>");
278062306a36Sopenharmony_ciMODULE_LICENSE("GPL");
2781