162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2018 Red Hat. All rights reserved. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This file is released under the GPL. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/device-mapper.h> 962306a36Sopenharmony_ci#include <linux/module.h> 1062306a36Sopenharmony_ci#include <linux/init.h> 1162306a36Sopenharmony_ci#include <linux/vmalloc.h> 1262306a36Sopenharmony_ci#include <linux/kthread.h> 1362306a36Sopenharmony_ci#include <linux/dm-io.h> 1462306a36Sopenharmony_ci#include <linux/dm-kcopyd.h> 1562306a36Sopenharmony_ci#include <linux/dax.h> 1662306a36Sopenharmony_ci#include <linux/pfn_t.h> 1762306a36Sopenharmony_ci#include <linux/libnvdimm.h> 1862306a36Sopenharmony_ci#include <linux/delay.h> 1962306a36Sopenharmony_ci#include "dm-io-tracker.h" 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci#define DM_MSG_PREFIX "writecache" 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#define HIGH_WATERMARK 50 2462306a36Sopenharmony_ci#define LOW_WATERMARK 45 2562306a36Sopenharmony_ci#define MAX_WRITEBACK_JOBS min(0x10000000 / PAGE_SIZE, totalram_pages() / 16) 2662306a36Sopenharmony_ci#define ENDIO_LATENCY 16 2762306a36Sopenharmony_ci#define WRITEBACK_LATENCY 64 2862306a36Sopenharmony_ci#define AUTOCOMMIT_BLOCKS_SSD 65536 2962306a36Sopenharmony_ci#define AUTOCOMMIT_BLOCKS_PMEM 64 3062306a36Sopenharmony_ci#define AUTOCOMMIT_MSEC 1000 3162306a36Sopenharmony_ci#define MAX_AGE_DIV 16 3262306a36Sopenharmony_ci#define MAX_AGE_UNSPECIFIED -1UL 3362306a36Sopenharmony_ci#define PAUSE_WRITEBACK (HZ * 3) 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci#define BITMAP_GRANULARITY 65536 3662306a36Sopenharmony_ci#if BITMAP_GRANULARITY < PAGE_SIZE 3762306a36Sopenharmony_ci#undef BITMAP_GRANULARITY 3862306a36Sopenharmony_ci#define BITMAP_GRANULARITY PAGE_SIZE 3962306a36Sopenharmony_ci#endif 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_FS_DAX) 4262306a36Sopenharmony_ci#define DM_WRITECACHE_HAS_PMEM 4362306a36Sopenharmony_ci#endif 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HAS_PMEM 4662306a36Sopenharmony_ci#define pmem_assign(dest, src) \ 4762306a36Sopenharmony_cido { \ 4862306a36Sopenharmony_ci typeof(dest) uniq = (src); \ 4962306a36Sopenharmony_ci memcpy_flushcache(&(dest), &uniq, sizeof(dest)); \ 5062306a36Sopenharmony_ci} while (0) 5162306a36Sopenharmony_ci#else 5262306a36Sopenharmony_ci#define pmem_assign(dest, src) ((dest) = (src)) 5362306a36Sopenharmony_ci#endif 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM) 5662306a36Sopenharmony_ci#define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 5762306a36Sopenharmony_ci#endif 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci#define MEMORY_SUPERBLOCK_MAGIC 0x23489321 6062306a36Sopenharmony_ci#define MEMORY_SUPERBLOCK_VERSION 1 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cistruct wc_memory_entry { 6362306a36Sopenharmony_ci __le64 original_sector; 6462306a36Sopenharmony_ci __le64 seq_count; 6562306a36Sopenharmony_ci}; 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_cistruct wc_memory_superblock { 6862306a36Sopenharmony_ci union { 6962306a36Sopenharmony_ci struct { 7062306a36Sopenharmony_ci __le32 magic; 7162306a36Sopenharmony_ci __le32 version; 7262306a36Sopenharmony_ci __le32 block_size; 7362306a36Sopenharmony_ci __le32 pad; 7462306a36Sopenharmony_ci __le64 n_blocks; 7562306a36Sopenharmony_ci __le64 seq_count; 7662306a36Sopenharmony_ci }; 7762306a36Sopenharmony_ci __le64 padding[8]; 7862306a36Sopenharmony_ci }; 7962306a36Sopenharmony_ci struct wc_memory_entry entries[]; 8062306a36Sopenharmony_ci}; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_cistruct wc_entry { 8362306a36Sopenharmony_ci struct rb_node rb_node; 8462306a36Sopenharmony_ci struct list_head lru; 8562306a36Sopenharmony_ci unsigned short wc_list_contiguous; 8662306a36Sopenharmony_ci#if BITS_PER_LONG == 64 8762306a36Sopenharmony_ci bool write_in_progress : 1; 8862306a36Sopenharmony_ci unsigned long index : 47; 8962306a36Sopenharmony_ci#else 9062306a36Sopenharmony_ci bool write_in_progress; 9162306a36Sopenharmony_ci unsigned long index; 9262306a36Sopenharmony_ci#endif 9362306a36Sopenharmony_ci unsigned long age; 9462306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 9562306a36Sopenharmony_ci uint64_t original_sector; 9662306a36Sopenharmony_ci uint64_t seq_count; 9762306a36Sopenharmony_ci#endif 9862306a36Sopenharmony_ci}; 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HAS_PMEM 10162306a36Sopenharmony_ci#define WC_MODE_PMEM(wc) ((wc)->pmem_mode) 10262306a36Sopenharmony_ci#define WC_MODE_FUA(wc) ((wc)->writeback_fua) 10362306a36Sopenharmony_ci#else 10462306a36Sopenharmony_ci#define WC_MODE_PMEM(wc) false 10562306a36Sopenharmony_ci#define WC_MODE_FUA(wc) false 10662306a36Sopenharmony_ci#endif 10762306a36Sopenharmony_ci#define WC_MODE_SORT_FREELIST(wc) (!WC_MODE_PMEM(wc)) 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_cistruct dm_writecache { 11062306a36Sopenharmony_ci struct mutex lock; 11162306a36Sopenharmony_ci struct list_head lru; 11262306a36Sopenharmony_ci union { 11362306a36Sopenharmony_ci struct list_head freelist; 11462306a36Sopenharmony_ci struct { 11562306a36Sopenharmony_ci struct rb_root freetree; 11662306a36Sopenharmony_ci struct wc_entry *current_free; 11762306a36Sopenharmony_ci }; 11862306a36Sopenharmony_ci }; 11962306a36Sopenharmony_ci struct rb_root tree; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci size_t freelist_size; 12262306a36Sopenharmony_ci size_t writeback_size; 12362306a36Sopenharmony_ci size_t freelist_high_watermark; 12462306a36Sopenharmony_ci size_t freelist_low_watermark; 12562306a36Sopenharmony_ci unsigned long max_age; 12662306a36Sopenharmony_ci unsigned long pause; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci unsigned int uncommitted_blocks; 12962306a36Sopenharmony_ci unsigned int autocommit_blocks; 13062306a36Sopenharmony_ci unsigned int max_writeback_jobs; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci int error; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci unsigned long autocommit_jiffies; 13562306a36Sopenharmony_ci struct timer_list autocommit_timer; 13662306a36Sopenharmony_ci struct wait_queue_head freelist_wait; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci struct timer_list max_age_timer; 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci atomic_t bio_in_progress[2]; 14162306a36Sopenharmony_ci struct wait_queue_head bio_in_progress_wait[2]; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci struct dm_target *ti; 14462306a36Sopenharmony_ci struct dm_dev *dev; 14562306a36Sopenharmony_ci struct dm_dev *ssd_dev; 14662306a36Sopenharmony_ci sector_t start_sector; 14762306a36Sopenharmony_ci void *memory_map; 14862306a36Sopenharmony_ci uint64_t memory_map_size; 14962306a36Sopenharmony_ci size_t metadata_sectors; 15062306a36Sopenharmony_ci size_t n_blocks; 15162306a36Sopenharmony_ci uint64_t seq_count; 15262306a36Sopenharmony_ci sector_t data_device_sectors; 15362306a36Sopenharmony_ci void *block_start; 15462306a36Sopenharmony_ci struct wc_entry *entries; 15562306a36Sopenharmony_ci unsigned int block_size; 15662306a36Sopenharmony_ci unsigned char block_size_bits; 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci bool pmem_mode:1; 15962306a36Sopenharmony_ci bool writeback_fua:1; 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci bool overwrote_committed:1; 16262306a36Sopenharmony_ci bool memory_vmapped:1; 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci bool start_sector_set:1; 16562306a36Sopenharmony_ci bool high_wm_percent_set:1; 16662306a36Sopenharmony_ci bool low_wm_percent_set:1; 16762306a36Sopenharmony_ci bool max_writeback_jobs_set:1; 16862306a36Sopenharmony_ci bool autocommit_blocks_set:1; 16962306a36Sopenharmony_ci bool autocommit_time_set:1; 17062306a36Sopenharmony_ci bool max_age_set:1; 17162306a36Sopenharmony_ci bool writeback_fua_set:1; 17262306a36Sopenharmony_ci bool flush_on_suspend:1; 17362306a36Sopenharmony_ci bool cleaner:1; 17462306a36Sopenharmony_ci bool cleaner_set:1; 17562306a36Sopenharmony_ci bool metadata_only:1; 17662306a36Sopenharmony_ci bool pause_set:1; 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci unsigned int high_wm_percent_value; 17962306a36Sopenharmony_ci unsigned int low_wm_percent_value; 18062306a36Sopenharmony_ci unsigned int autocommit_time_value; 18162306a36Sopenharmony_ci unsigned int max_age_value; 18262306a36Sopenharmony_ci unsigned int pause_value; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci unsigned int writeback_all; 18562306a36Sopenharmony_ci struct workqueue_struct *writeback_wq; 18662306a36Sopenharmony_ci struct work_struct writeback_work; 18762306a36Sopenharmony_ci struct work_struct flush_work; 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci struct dm_io_tracker iot; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci struct dm_io_client *dm_io; 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci raw_spinlock_t endio_list_lock; 19462306a36Sopenharmony_ci struct list_head endio_list; 19562306a36Sopenharmony_ci struct task_struct *endio_thread; 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci struct task_struct *flush_thread; 19862306a36Sopenharmony_ci struct bio_list flush_list; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci struct dm_kcopyd_client *dm_kcopyd; 20162306a36Sopenharmony_ci unsigned long *dirty_bitmap; 20262306a36Sopenharmony_ci unsigned int dirty_bitmap_size; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci struct bio_set bio_set; 20562306a36Sopenharmony_ci mempool_t copy_pool; 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci struct { 20862306a36Sopenharmony_ci unsigned long long reads; 20962306a36Sopenharmony_ci unsigned long long read_hits; 21062306a36Sopenharmony_ci unsigned long long writes; 21162306a36Sopenharmony_ci unsigned long long write_hits_uncommitted; 21262306a36Sopenharmony_ci unsigned long long write_hits_committed; 21362306a36Sopenharmony_ci unsigned long long writes_around; 21462306a36Sopenharmony_ci unsigned long long writes_allocate; 21562306a36Sopenharmony_ci unsigned long long writes_blocked_on_freelist; 21662306a36Sopenharmony_ci unsigned long long flushes; 21762306a36Sopenharmony_ci unsigned long long discards; 21862306a36Sopenharmony_ci } stats; 21962306a36Sopenharmony_ci}; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci#define WB_LIST_INLINE 16 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_cistruct writeback_struct { 22462306a36Sopenharmony_ci struct list_head endio_entry; 22562306a36Sopenharmony_ci struct dm_writecache *wc; 22662306a36Sopenharmony_ci struct wc_entry **wc_list; 22762306a36Sopenharmony_ci unsigned int wc_list_n; 22862306a36Sopenharmony_ci struct wc_entry *wc_list_inline[WB_LIST_INLINE]; 22962306a36Sopenharmony_ci struct bio bio; 23062306a36Sopenharmony_ci}; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_cistruct copy_struct { 23362306a36Sopenharmony_ci struct list_head endio_entry; 23462306a36Sopenharmony_ci struct dm_writecache *wc; 23562306a36Sopenharmony_ci struct wc_entry *e; 23662306a36Sopenharmony_ci unsigned int n_entries; 23762306a36Sopenharmony_ci int error; 23862306a36Sopenharmony_ci}; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ciDECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(dm_writecache_throttle, 24162306a36Sopenharmony_ci "A percentage of time allocated for data copying"); 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_cistatic void wc_lock(struct dm_writecache *wc) 24462306a36Sopenharmony_ci{ 24562306a36Sopenharmony_ci mutex_lock(&wc->lock); 24662306a36Sopenharmony_ci} 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_cistatic void wc_unlock(struct dm_writecache *wc) 24962306a36Sopenharmony_ci{ 25062306a36Sopenharmony_ci mutex_unlock(&wc->lock); 25162306a36Sopenharmony_ci} 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HAS_PMEM 25462306a36Sopenharmony_cistatic int persistent_memory_claim(struct dm_writecache *wc) 25562306a36Sopenharmony_ci{ 25662306a36Sopenharmony_ci int r; 25762306a36Sopenharmony_ci loff_t s; 25862306a36Sopenharmony_ci long p, da; 25962306a36Sopenharmony_ci pfn_t pfn; 26062306a36Sopenharmony_ci int id; 26162306a36Sopenharmony_ci struct page **pages; 26262306a36Sopenharmony_ci sector_t offset; 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci wc->memory_vmapped = false; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci s = wc->memory_map_size; 26762306a36Sopenharmony_ci p = s >> PAGE_SHIFT; 26862306a36Sopenharmony_ci if (!p) { 26962306a36Sopenharmony_ci r = -EINVAL; 27062306a36Sopenharmony_ci goto err1; 27162306a36Sopenharmony_ci } 27262306a36Sopenharmony_ci if (p != s >> PAGE_SHIFT) { 27362306a36Sopenharmony_ci r = -EOVERFLOW; 27462306a36Sopenharmony_ci goto err1; 27562306a36Sopenharmony_ci } 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci offset = get_start_sect(wc->ssd_dev->bdev); 27862306a36Sopenharmony_ci if (offset & (PAGE_SIZE / 512 - 1)) { 27962306a36Sopenharmony_ci r = -EINVAL; 28062306a36Sopenharmony_ci goto err1; 28162306a36Sopenharmony_ci } 28262306a36Sopenharmony_ci offset >>= PAGE_SHIFT - 9; 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci id = dax_read_lock(); 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, DAX_ACCESS, 28762306a36Sopenharmony_ci &wc->memory_map, &pfn); 28862306a36Sopenharmony_ci if (da < 0) { 28962306a36Sopenharmony_ci wc->memory_map = NULL; 29062306a36Sopenharmony_ci r = da; 29162306a36Sopenharmony_ci goto err2; 29262306a36Sopenharmony_ci } 29362306a36Sopenharmony_ci if (!pfn_t_has_page(pfn)) { 29462306a36Sopenharmony_ci wc->memory_map = NULL; 29562306a36Sopenharmony_ci r = -EOPNOTSUPP; 29662306a36Sopenharmony_ci goto err2; 29762306a36Sopenharmony_ci } 29862306a36Sopenharmony_ci if (da != p) { 29962306a36Sopenharmony_ci long i; 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci wc->memory_map = NULL; 30262306a36Sopenharmony_ci pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL); 30362306a36Sopenharmony_ci if (!pages) { 30462306a36Sopenharmony_ci r = -ENOMEM; 30562306a36Sopenharmony_ci goto err2; 30662306a36Sopenharmony_ci } 30762306a36Sopenharmony_ci i = 0; 30862306a36Sopenharmony_ci do { 30962306a36Sopenharmony_ci long daa; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i, 31262306a36Sopenharmony_ci p - i, DAX_ACCESS, NULL, &pfn); 31362306a36Sopenharmony_ci if (daa <= 0) { 31462306a36Sopenharmony_ci r = daa ? daa : -EINVAL; 31562306a36Sopenharmony_ci goto err3; 31662306a36Sopenharmony_ci } 31762306a36Sopenharmony_ci if (!pfn_t_has_page(pfn)) { 31862306a36Sopenharmony_ci r = -EOPNOTSUPP; 31962306a36Sopenharmony_ci goto err3; 32062306a36Sopenharmony_ci } 32162306a36Sopenharmony_ci while (daa-- && i < p) { 32262306a36Sopenharmony_ci pages[i++] = pfn_t_to_page(pfn); 32362306a36Sopenharmony_ci pfn.val++; 32462306a36Sopenharmony_ci if (!(i & 15)) 32562306a36Sopenharmony_ci cond_resched(); 32662306a36Sopenharmony_ci } 32762306a36Sopenharmony_ci } while (i < p); 32862306a36Sopenharmony_ci wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL); 32962306a36Sopenharmony_ci if (!wc->memory_map) { 33062306a36Sopenharmony_ci r = -ENOMEM; 33162306a36Sopenharmony_ci goto err3; 33262306a36Sopenharmony_ci } 33362306a36Sopenharmony_ci kvfree(pages); 33462306a36Sopenharmony_ci wc->memory_vmapped = true; 33562306a36Sopenharmony_ci } 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci dax_read_unlock(id); 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci wc->memory_map += (size_t)wc->start_sector << SECTOR_SHIFT; 34062306a36Sopenharmony_ci wc->memory_map_size -= (size_t)wc->start_sector << SECTOR_SHIFT; 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci return 0; 34362306a36Sopenharmony_cierr3: 34462306a36Sopenharmony_ci kvfree(pages); 34562306a36Sopenharmony_cierr2: 34662306a36Sopenharmony_ci dax_read_unlock(id); 34762306a36Sopenharmony_cierr1: 34862306a36Sopenharmony_ci return r; 34962306a36Sopenharmony_ci} 35062306a36Sopenharmony_ci#else 35162306a36Sopenharmony_cistatic int persistent_memory_claim(struct dm_writecache *wc) 35262306a36Sopenharmony_ci{ 35362306a36Sopenharmony_ci return -EOPNOTSUPP; 35462306a36Sopenharmony_ci} 35562306a36Sopenharmony_ci#endif 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_cistatic void persistent_memory_release(struct dm_writecache *wc) 35862306a36Sopenharmony_ci{ 35962306a36Sopenharmony_ci if (wc->memory_vmapped) 36062306a36Sopenharmony_ci vunmap(wc->memory_map - ((size_t)wc->start_sector << SECTOR_SHIFT)); 36162306a36Sopenharmony_ci} 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_cistatic struct page *persistent_memory_page(void *addr) 36462306a36Sopenharmony_ci{ 36562306a36Sopenharmony_ci if (is_vmalloc_addr(addr)) 36662306a36Sopenharmony_ci return vmalloc_to_page(addr); 36762306a36Sopenharmony_ci else 36862306a36Sopenharmony_ci return virt_to_page(addr); 36962306a36Sopenharmony_ci} 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_cistatic unsigned int persistent_memory_page_offset(void *addr) 37262306a36Sopenharmony_ci{ 37362306a36Sopenharmony_ci return (unsigned long)addr & (PAGE_SIZE - 1); 37462306a36Sopenharmony_ci} 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_cistatic void persistent_memory_flush_cache(void *ptr, size_t size) 37762306a36Sopenharmony_ci{ 37862306a36Sopenharmony_ci if (is_vmalloc_addr(ptr)) 37962306a36Sopenharmony_ci flush_kernel_vmap_range(ptr, size); 38062306a36Sopenharmony_ci} 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_cistatic void persistent_memory_invalidate_cache(void *ptr, size_t size) 38362306a36Sopenharmony_ci{ 38462306a36Sopenharmony_ci if (is_vmalloc_addr(ptr)) 38562306a36Sopenharmony_ci invalidate_kernel_vmap_range(ptr, size); 38662306a36Sopenharmony_ci} 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_cistatic struct wc_memory_superblock *sb(struct dm_writecache *wc) 38962306a36Sopenharmony_ci{ 39062306a36Sopenharmony_ci return wc->memory_map; 39162306a36Sopenharmony_ci} 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_cistatic struct wc_memory_entry *memory_entry(struct dm_writecache *wc, struct wc_entry *e) 39462306a36Sopenharmony_ci{ 39562306a36Sopenharmony_ci return &sb(wc)->entries[e->index]; 39662306a36Sopenharmony_ci} 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_cistatic void *memory_data(struct dm_writecache *wc, struct wc_entry *e) 39962306a36Sopenharmony_ci{ 40062306a36Sopenharmony_ci return (char *)wc->block_start + (e->index << wc->block_size_bits); 40162306a36Sopenharmony_ci} 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_cistatic sector_t cache_sector(struct dm_writecache *wc, struct wc_entry *e) 40462306a36Sopenharmony_ci{ 40562306a36Sopenharmony_ci return wc->start_sector + wc->metadata_sectors + 40662306a36Sopenharmony_ci ((sector_t)e->index << (wc->block_size_bits - SECTOR_SHIFT)); 40762306a36Sopenharmony_ci} 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_cistatic uint64_t read_original_sector(struct dm_writecache *wc, struct wc_entry *e) 41062306a36Sopenharmony_ci{ 41162306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 41262306a36Sopenharmony_ci return e->original_sector; 41362306a36Sopenharmony_ci#else 41462306a36Sopenharmony_ci return le64_to_cpu(memory_entry(wc, e)->original_sector); 41562306a36Sopenharmony_ci#endif 41662306a36Sopenharmony_ci} 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_cistatic uint64_t read_seq_count(struct dm_writecache *wc, struct wc_entry *e) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 42162306a36Sopenharmony_ci return e->seq_count; 42262306a36Sopenharmony_ci#else 42362306a36Sopenharmony_ci return le64_to_cpu(memory_entry(wc, e)->seq_count); 42462306a36Sopenharmony_ci#endif 42562306a36Sopenharmony_ci} 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_cistatic void clear_seq_count(struct dm_writecache *wc, struct wc_entry *e) 42862306a36Sopenharmony_ci{ 42962306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 43062306a36Sopenharmony_ci e->seq_count = -1; 43162306a36Sopenharmony_ci#endif 43262306a36Sopenharmony_ci pmem_assign(memory_entry(wc, e)->seq_count, cpu_to_le64(-1)); 43362306a36Sopenharmony_ci} 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_cistatic void write_original_sector_seq_count(struct dm_writecache *wc, struct wc_entry *e, 43662306a36Sopenharmony_ci uint64_t original_sector, uint64_t seq_count) 43762306a36Sopenharmony_ci{ 43862306a36Sopenharmony_ci struct wc_memory_entry me; 43962306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 44062306a36Sopenharmony_ci e->original_sector = original_sector; 44162306a36Sopenharmony_ci e->seq_count = seq_count; 44262306a36Sopenharmony_ci#endif 44362306a36Sopenharmony_ci me.original_sector = cpu_to_le64(original_sector); 44462306a36Sopenharmony_ci me.seq_count = cpu_to_le64(seq_count); 44562306a36Sopenharmony_ci pmem_assign(*memory_entry(wc, e), me); 44662306a36Sopenharmony_ci} 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci#define writecache_error(wc, err, msg, arg...) \ 44962306a36Sopenharmony_cido { \ 45062306a36Sopenharmony_ci if (!cmpxchg(&(wc)->error, 0, err)) \ 45162306a36Sopenharmony_ci DMERR(msg, ##arg); \ 45262306a36Sopenharmony_ci wake_up(&(wc)->freelist_wait); \ 45362306a36Sopenharmony_ci} while (0) 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci#define writecache_has_error(wc) (unlikely(READ_ONCE((wc)->error))) 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_cistatic void writecache_flush_all_metadata(struct dm_writecache *wc) 45862306a36Sopenharmony_ci{ 45962306a36Sopenharmony_ci if (!WC_MODE_PMEM(wc)) 46062306a36Sopenharmony_ci memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size); 46162306a36Sopenharmony_ci} 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_cistatic void writecache_flush_region(struct dm_writecache *wc, void *ptr, size_t size) 46462306a36Sopenharmony_ci{ 46562306a36Sopenharmony_ci if (!WC_MODE_PMEM(wc)) 46662306a36Sopenharmony_ci __set_bit(((char *)ptr - (char *)wc->memory_map) / BITMAP_GRANULARITY, 46762306a36Sopenharmony_ci wc->dirty_bitmap); 46862306a36Sopenharmony_ci} 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_cistatic void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev); 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_cistruct io_notify { 47362306a36Sopenharmony_ci struct dm_writecache *wc; 47462306a36Sopenharmony_ci struct completion c; 47562306a36Sopenharmony_ci atomic_t count; 47662306a36Sopenharmony_ci}; 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_cistatic void writecache_notify_io(unsigned long error, void *context) 47962306a36Sopenharmony_ci{ 48062306a36Sopenharmony_ci struct io_notify *endio = context; 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci if (unlikely(error != 0)) 48362306a36Sopenharmony_ci writecache_error(endio->wc, -EIO, "error writing metadata"); 48462306a36Sopenharmony_ci BUG_ON(atomic_read(&endio->count) <= 0); 48562306a36Sopenharmony_ci if (atomic_dec_and_test(&endio->count)) 48662306a36Sopenharmony_ci complete(&endio->c); 48762306a36Sopenharmony_ci} 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_cistatic void writecache_wait_for_ios(struct dm_writecache *wc, int direction) 49062306a36Sopenharmony_ci{ 49162306a36Sopenharmony_ci wait_event(wc->bio_in_progress_wait[direction], 49262306a36Sopenharmony_ci !atomic_read(&wc->bio_in_progress[direction])); 49362306a36Sopenharmony_ci} 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_cistatic void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) 49662306a36Sopenharmony_ci{ 49762306a36Sopenharmony_ci struct dm_io_region region; 49862306a36Sopenharmony_ci struct dm_io_request req; 49962306a36Sopenharmony_ci struct io_notify endio = { 50062306a36Sopenharmony_ci wc, 50162306a36Sopenharmony_ci COMPLETION_INITIALIZER_ONSTACK(endio.c), 50262306a36Sopenharmony_ci ATOMIC_INIT(1), 50362306a36Sopenharmony_ci }; 50462306a36Sopenharmony_ci unsigned int bitmap_bits = wc->dirty_bitmap_size * 8; 50562306a36Sopenharmony_ci unsigned int i = 0; 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci while (1) { 50862306a36Sopenharmony_ci unsigned int j; 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci i = find_next_bit(wc->dirty_bitmap, bitmap_bits, i); 51162306a36Sopenharmony_ci if (unlikely(i == bitmap_bits)) 51262306a36Sopenharmony_ci break; 51362306a36Sopenharmony_ci j = find_next_zero_bit(wc->dirty_bitmap, bitmap_bits, i); 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci region.bdev = wc->ssd_dev->bdev; 51662306a36Sopenharmony_ci region.sector = (sector_t)i * (BITMAP_GRANULARITY >> SECTOR_SHIFT); 51762306a36Sopenharmony_ci region.count = (sector_t)(j - i) * (BITMAP_GRANULARITY >> SECTOR_SHIFT); 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci if (unlikely(region.sector >= wc->metadata_sectors)) 52062306a36Sopenharmony_ci break; 52162306a36Sopenharmony_ci if (unlikely(region.sector + region.count > wc->metadata_sectors)) 52262306a36Sopenharmony_ci region.count = wc->metadata_sectors - region.sector; 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci region.sector += wc->start_sector; 52562306a36Sopenharmony_ci atomic_inc(&endio.count); 52662306a36Sopenharmony_ci req.bi_opf = REQ_OP_WRITE | REQ_SYNC; 52762306a36Sopenharmony_ci req.mem.type = DM_IO_VMA; 52862306a36Sopenharmony_ci req.mem.ptr.vma = (char *)wc->memory_map + (size_t)i * BITMAP_GRANULARITY; 52962306a36Sopenharmony_ci req.client = wc->dm_io; 53062306a36Sopenharmony_ci req.notify.fn = writecache_notify_io; 53162306a36Sopenharmony_ci req.notify.context = &endio; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci /* writing via async dm-io (implied by notify.fn above) won't return an error */ 53462306a36Sopenharmony_ci (void) dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); 53562306a36Sopenharmony_ci i = j; 53662306a36Sopenharmony_ci } 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci writecache_notify_io(0, &endio); 53962306a36Sopenharmony_ci wait_for_completion_io(&endio.c); 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci if (wait_for_ios) 54262306a36Sopenharmony_ci writecache_wait_for_ios(wc, WRITE); 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci writecache_disk_flush(wc, wc->ssd_dev); 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size); 54762306a36Sopenharmony_ci} 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_cistatic void ssd_commit_superblock(struct dm_writecache *wc) 55062306a36Sopenharmony_ci{ 55162306a36Sopenharmony_ci int r; 55262306a36Sopenharmony_ci struct dm_io_region region; 55362306a36Sopenharmony_ci struct dm_io_request req; 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci region.bdev = wc->ssd_dev->bdev; 55662306a36Sopenharmony_ci region.sector = 0; 55762306a36Sopenharmony_ci region.count = max(4096U, wc->block_size) >> SECTOR_SHIFT; 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci if (unlikely(region.sector + region.count > wc->metadata_sectors)) 56062306a36Sopenharmony_ci region.count = wc->metadata_sectors - region.sector; 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci region.sector += wc->start_sector; 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci req.bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_FUA; 56562306a36Sopenharmony_ci req.mem.type = DM_IO_VMA; 56662306a36Sopenharmony_ci req.mem.ptr.vma = (char *)wc->memory_map; 56762306a36Sopenharmony_ci req.client = wc->dm_io; 56862306a36Sopenharmony_ci req.notify.fn = NULL; 56962306a36Sopenharmony_ci req.notify.context = NULL; 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci r = dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); 57262306a36Sopenharmony_ci if (unlikely(r)) 57362306a36Sopenharmony_ci writecache_error(wc, r, "error writing superblock"); 57462306a36Sopenharmony_ci} 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_cistatic void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) 57762306a36Sopenharmony_ci{ 57862306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) 57962306a36Sopenharmony_ci pmem_wmb(); 58062306a36Sopenharmony_ci else 58162306a36Sopenharmony_ci ssd_commit_flushed(wc, wait_for_ios); 58262306a36Sopenharmony_ci} 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_cistatic void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) 58562306a36Sopenharmony_ci{ 58662306a36Sopenharmony_ci int r; 58762306a36Sopenharmony_ci struct dm_io_region region; 58862306a36Sopenharmony_ci struct dm_io_request req; 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci region.bdev = dev->bdev; 59162306a36Sopenharmony_ci region.sector = 0; 59262306a36Sopenharmony_ci region.count = 0; 59362306a36Sopenharmony_ci req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; 59462306a36Sopenharmony_ci req.mem.type = DM_IO_KMEM; 59562306a36Sopenharmony_ci req.mem.ptr.addr = NULL; 59662306a36Sopenharmony_ci req.client = wc->dm_io; 59762306a36Sopenharmony_ci req.notify.fn = NULL; 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci r = dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); 60062306a36Sopenharmony_ci if (unlikely(r)) 60162306a36Sopenharmony_ci writecache_error(wc, r, "error flushing metadata: %d", r); 60262306a36Sopenharmony_ci} 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci#define WFE_RETURN_FOLLOWING 1 60562306a36Sopenharmony_ci#define WFE_LOWEST_SEQ 2 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_cistatic struct wc_entry *writecache_find_entry(struct dm_writecache *wc, 60862306a36Sopenharmony_ci uint64_t block, int flags) 60962306a36Sopenharmony_ci{ 61062306a36Sopenharmony_ci struct wc_entry *e; 61162306a36Sopenharmony_ci struct rb_node *node = wc->tree.rb_node; 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci if (unlikely(!node)) 61462306a36Sopenharmony_ci return NULL; 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci while (1) { 61762306a36Sopenharmony_ci e = container_of(node, struct wc_entry, rb_node); 61862306a36Sopenharmony_ci if (read_original_sector(wc, e) == block) 61962306a36Sopenharmony_ci break; 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci node = (read_original_sector(wc, e) >= block ? 62262306a36Sopenharmony_ci e->rb_node.rb_left : e->rb_node.rb_right); 62362306a36Sopenharmony_ci if (unlikely(!node)) { 62462306a36Sopenharmony_ci if (!(flags & WFE_RETURN_FOLLOWING)) 62562306a36Sopenharmony_ci return NULL; 62662306a36Sopenharmony_ci if (read_original_sector(wc, e) >= block) 62762306a36Sopenharmony_ci return e; 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci node = rb_next(&e->rb_node); 63062306a36Sopenharmony_ci if (unlikely(!node)) 63162306a36Sopenharmony_ci return NULL; 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci e = container_of(node, struct wc_entry, rb_node); 63462306a36Sopenharmony_ci return e; 63562306a36Sopenharmony_ci } 63662306a36Sopenharmony_ci } 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci while (1) { 63962306a36Sopenharmony_ci struct wc_entry *e2; 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci if (flags & WFE_LOWEST_SEQ) 64262306a36Sopenharmony_ci node = rb_prev(&e->rb_node); 64362306a36Sopenharmony_ci else 64462306a36Sopenharmony_ci node = rb_next(&e->rb_node); 64562306a36Sopenharmony_ci if (unlikely(!node)) 64662306a36Sopenharmony_ci return e; 64762306a36Sopenharmony_ci e2 = container_of(node, struct wc_entry, rb_node); 64862306a36Sopenharmony_ci if (read_original_sector(wc, e2) != block) 64962306a36Sopenharmony_ci return e; 65062306a36Sopenharmony_ci e = e2; 65162306a36Sopenharmony_ci } 65262306a36Sopenharmony_ci} 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_cistatic void writecache_insert_entry(struct dm_writecache *wc, struct wc_entry *ins) 65562306a36Sopenharmony_ci{ 65662306a36Sopenharmony_ci struct wc_entry *e; 65762306a36Sopenharmony_ci struct rb_node **node = &wc->tree.rb_node, *parent = NULL; 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci while (*node) { 66062306a36Sopenharmony_ci e = container_of(*node, struct wc_entry, rb_node); 66162306a36Sopenharmony_ci parent = &e->rb_node; 66262306a36Sopenharmony_ci if (read_original_sector(wc, e) > read_original_sector(wc, ins)) 66362306a36Sopenharmony_ci node = &parent->rb_left; 66462306a36Sopenharmony_ci else 66562306a36Sopenharmony_ci node = &parent->rb_right; 66662306a36Sopenharmony_ci } 66762306a36Sopenharmony_ci rb_link_node(&ins->rb_node, parent, node); 66862306a36Sopenharmony_ci rb_insert_color(&ins->rb_node, &wc->tree); 66962306a36Sopenharmony_ci list_add(&ins->lru, &wc->lru); 67062306a36Sopenharmony_ci ins->age = jiffies; 67162306a36Sopenharmony_ci} 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_cistatic void writecache_unlink(struct dm_writecache *wc, struct wc_entry *e) 67462306a36Sopenharmony_ci{ 67562306a36Sopenharmony_ci list_del(&e->lru); 67662306a36Sopenharmony_ci rb_erase(&e->rb_node, &wc->tree); 67762306a36Sopenharmony_ci} 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_cistatic void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry *e) 68062306a36Sopenharmony_ci{ 68162306a36Sopenharmony_ci if (WC_MODE_SORT_FREELIST(wc)) { 68262306a36Sopenharmony_ci struct rb_node **node = &wc->freetree.rb_node, *parent = NULL; 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci if (unlikely(!*node)) 68562306a36Sopenharmony_ci wc->current_free = e; 68662306a36Sopenharmony_ci while (*node) { 68762306a36Sopenharmony_ci parent = *node; 68862306a36Sopenharmony_ci if (&e->rb_node < *node) 68962306a36Sopenharmony_ci node = &parent->rb_left; 69062306a36Sopenharmony_ci else 69162306a36Sopenharmony_ci node = &parent->rb_right; 69262306a36Sopenharmony_ci } 69362306a36Sopenharmony_ci rb_link_node(&e->rb_node, parent, node); 69462306a36Sopenharmony_ci rb_insert_color(&e->rb_node, &wc->freetree); 69562306a36Sopenharmony_ci } else { 69662306a36Sopenharmony_ci list_add_tail(&e->lru, &wc->freelist); 69762306a36Sopenharmony_ci } 69862306a36Sopenharmony_ci wc->freelist_size++; 69962306a36Sopenharmony_ci} 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_cistatic inline void writecache_verify_watermark(struct dm_writecache *wc) 70262306a36Sopenharmony_ci{ 70362306a36Sopenharmony_ci if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark)) 70462306a36Sopenharmony_ci queue_work(wc->writeback_wq, &wc->writeback_work); 70562306a36Sopenharmony_ci} 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_cistatic void writecache_max_age_timer(struct timer_list *t) 70862306a36Sopenharmony_ci{ 70962306a36Sopenharmony_ci struct dm_writecache *wc = from_timer(wc, t, max_age_timer); 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci if (!dm_suspended(wc->ti) && !writecache_has_error(wc)) { 71262306a36Sopenharmony_ci queue_work(wc->writeback_wq, &wc->writeback_work); 71362306a36Sopenharmony_ci mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV); 71462306a36Sopenharmony_ci } 71562306a36Sopenharmony_ci} 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_cistatic struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector) 71862306a36Sopenharmony_ci{ 71962306a36Sopenharmony_ci struct wc_entry *e; 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci if (WC_MODE_SORT_FREELIST(wc)) { 72262306a36Sopenharmony_ci struct rb_node *next; 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci if (unlikely(!wc->current_free)) 72562306a36Sopenharmony_ci return NULL; 72662306a36Sopenharmony_ci e = wc->current_free; 72762306a36Sopenharmony_ci if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) 72862306a36Sopenharmony_ci return NULL; 72962306a36Sopenharmony_ci next = rb_next(&e->rb_node); 73062306a36Sopenharmony_ci rb_erase(&e->rb_node, &wc->freetree); 73162306a36Sopenharmony_ci if (unlikely(!next)) 73262306a36Sopenharmony_ci next = rb_first(&wc->freetree); 73362306a36Sopenharmony_ci wc->current_free = next ? container_of(next, struct wc_entry, rb_node) : NULL; 73462306a36Sopenharmony_ci } else { 73562306a36Sopenharmony_ci if (unlikely(list_empty(&wc->freelist))) 73662306a36Sopenharmony_ci return NULL; 73762306a36Sopenharmony_ci e = container_of(wc->freelist.next, struct wc_entry, lru); 73862306a36Sopenharmony_ci if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) 73962306a36Sopenharmony_ci return NULL; 74062306a36Sopenharmony_ci list_del(&e->lru); 74162306a36Sopenharmony_ci } 74262306a36Sopenharmony_ci wc->freelist_size--; 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci writecache_verify_watermark(wc); 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci return e; 74762306a36Sopenharmony_ci} 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_cistatic void writecache_free_entry(struct dm_writecache *wc, struct wc_entry *e) 75062306a36Sopenharmony_ci{ 75162306a36Sopenharmony_ci writecache_unlink(wc, e); 75262306a36Sopenharmony_ci writecache_add_to_freelist(wc, e); 75362306a36Sopenharmony_ci clear_seq_count(wc, e); 75462306a36Sopenharmony_ci writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 75562306a36Sopenharmony_ci if (unlikely(waitqueue_active(&wc->freelist_wait))) 75662306a36Sopenharmony_ci wake_up(&wc->freelist_wait); 75762306a36Sopenharmony_ci} 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_cistatic void writecache_wait_on_freelist(struct dm_writecache *wc) 76062306a36Sopenharmony_ci{ 76162306a36Sopenharmony_ci DEFINE_WAIT(wait); 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci prepare_to_wait(&wc->freelist_wait, &wait, TASK_UNINTERRUPTIBLE); 76462306a36Sopenharmony_ci wc_unlock(wc); 76562306a36Sopenharmony_ci io_schedule(); 76662306a36Sopenharmony_ci finish_wait(&wc->freelist_wait, &wait); 76762306a36Sopenharmony_ci wc_lock(wc); 76862306a36Sopenharmony_ci} 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_cistatic void writecache_poison_lists(struct dm_writecache *wc) 77162306a36Sopenharmony_ci{ 77262306a36Sopenharmony_ci /* 77362306a36Sopenharmony_ci * Catch incorrect access to these values while the device is suspended. 77462306a36Sopenharmony_ci */ 77562306a36Sopenharmony_ci memset(&wc->tree, -1, sizeof(wc->tree)); 77662306a36Sopenharmony_ci wc->lru.next = LIST_POISON1; 77762306a36Sopenharmony_ci wc->lru.prev = LIST_POISON2; 77862306a36Sopenharmony_ci wc->freelist.next = LIST_POISON1; 77962306a36Sopenharmony_ci wc->freelist.prev = LIST_POISON2; 78062306a36Sopenharmony_ci} 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_cistatic void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry *e) 78362306a36Sopenharmony_ci{ 78462306a36Sopenharmony_ci writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry)); 78562306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) 78662306a36Sopenharmony_ci writecache_flush_region(wc, memory_data(wc, e), wc->block_size); 78762306a36Sopenharmony_ci} 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_cistatic bool writecache_entry_is_committed(struct dm_writecache *wc, struct wc_entry *e) 79062306a36Sopenharmony_ci{ 79162306a36Sopenharmony_ci return read_seq_count(wc, e) < wc->seq_count; 79262306a36Sopenharmony_ci} 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_cistatic void writecache_flush(struct dm_writecache *wc) 79562306a36Sopenharmony_ci{ 79662306a36Sopenharmony_ci struct wc_entry *e, *e2; 79762306a36Sopenharmony_ci bool need_flush_after_free; 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci wc->uncommitted_blocks = 0; 80062306a36Sopenharmony_ci del_timer(&wc->autocommit_timer); 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci if (list_empty(&wc->lru)) 80362306a36Sopenharmony_ci return; 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_ci e = container_of(wc->lru.next, struct wc_entry, lru); 80662306a36Sopenharmony_ci if (writecache_entry_is_committed(wc, e)) { 80762306a36Sopenharmony_ci if (wc->overwrote_committed) { 80862306a36Sopenharmony_ci writecache_wait_for_ios(wc, WRITE); 80962306a36Sopenharmony_ci writecache_disk_flush(wc, wc->ssd_dev); 81062306a36Sopenharmony_ci wc->overwrote_committed = false; 81162306a36Sopenharmony_ci } 81262306a36Sopenharmony_ci return; 81362306a36Sopenharmony_ci } 81462306a36Sopenharmony_ci while (1) { 81562306a36Sopenharmony_ci writecache_flush_entry(wc, e); 81662306a36Sopenharmony_ci if (unlikely(e->lru.next == &wc->lru)) 81762306a36Sopenharmony_ci break; 81862306a36Sopenharmony_ci e2 = container_of(e->lru.next, struct wc_entry, lru); 81962306a36Sopenharmony_ci if (writecache_entry_is_committed(wc, e2)) 82062306a36Sopenharmony_ci break; 82162306a36Sopenharmony_ci e = e2; 82262306a36Sopenharmony_ci cond_resched(); 82362306a36Sopenharmony_ci } 82462306a36Sopenharmony_ci writecache_commit_flushed(wc, true); 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci wc->seq_count++; 82762306a36Sopenharmony_ci pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); 82862306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) 82962306a36Sopenharmony_ci writecache_commit_flushed(wc, false); 83062306a36Sopenharmony_ci else 83162306a36Sopenharmony_ci ssd_commit_superblock(wc); 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci wc->overwrote_committed = false; 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_ci need_flush_after_free = false; 83662306a36Sopenharmony_ci while (1) { 83762306a36Sopenharmony_ci /* Free another committed entry with lower seq-count */ 83862306a36Sopenharmony_ci struct rb_node *rb_node = rb_prev(&e->rb_node); 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci if (rb_node) { 84162306a36Sopenharmony_ci e2 = container_of(rb_node, struct wc_entry, rb_node); 84262306a36Sopenharmony_ci if (read_original_sector(wc, e2) == read_original_sector(wc, e) && 84362306a36Sopenharmony_ci likely(!e2->write_in_progress)) { 84462306a36Sopenharmony_ci writecache_free_entry(wc, e2); 84562306a36Sopenharmony_ci need_flush_after_free = true; 84662306a36Sopenharmony_ci } 84762306a36Sopenharmony_ci } 84862306a36Sopenharmony_ci if (unlikely(e->lru.prev == &wc->lru)) 84962306a36Sopenharmony_ci break; 85062306a36Sopenharmony_ci e = container_of(e->lru.prev, struct wc_entry, lru); 85162306a36Sopenharmony_ci cond_resched(); 85262306a36Sopenharmony_ci } 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_ci if (need_flush_after_free) 85562306a36Sopenharmony_ci writecache_commit_flushed(wc, false); 85662306a36Sopenharmony_ci} 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_cistatic void writecache_flush_work(struct work_struct *work) 85962306a36Sopenharmony_ci{ 86062306a36Sopenharmony_ci struct dm_writecache *wc = container_of(work, struct dm_writecache, flush_work); 86162306a36Sopenharmony_ci 86262306a36Sopenharmony_ci wc_lock(wc); 86362306a36Sopenharmony_ci writecache_flush(wc); 86462306a36Sopenharmony_ci wc_unlock(wc); 86562306a36Sopenharmony_ci} 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_cistatic void writecache_autocommit_timer(struct timer_list *t) 86862306a36Sopenharmony_ci{ 86962306a36Sopenharmony_ci struct dm_writecache *wc = from_timer(wc, t, autocommit_timer); 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci if (!writecache_has_error(wc)) 87262306a36Sopenharmony_ci queue_work(wc->writeback_wq, &wc->flush_work); 87362306a36Sopenharmony_ci} 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_cistatic void writecache_schedule_autocommit(struct dm_writecache *wc) 87662306a36Sopenharmony_ci{ 87762306a36Sopenharmony_ci if (!timer_pending(&wc->autocommit_timer)) 87862306a36Sopenharmony_ci mod_timer(&wc->autocommit_timer, jiffies + wc->autocommit_jiffies); 87962306a36Sopenharmony_ci} 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_cistatic void writecache_discard(struct dm_writecache *wc, sector_t start, sector_t end) 88262306a36Sopenharmony_ci{ 88362306a36Sopenharmony_ci struct wc_entry *e; 88462306a36Sopenharmony_ci bool discarded_something = false; 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci e = writecache_find_entry(wc, start, WFE_RETURN_FOLLOWING | WFE_LOWEST_SEQ); 88762306a36Sopenharmony_ci if (unlikely(!e)) 88862306a36Sopenharmony_ci return; 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci while (read_original_sector(wc, e) < end) { 89162306a36Sopenharmony_ci struct rb_node *node = rb_next(&e->rb_node); 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci if (likely(!e->write_in_progress)) { 89462306a36Sopenharmony_ci if (!discarded_something) { 89562306a36Sopenharmony_ci if (!WC_MODE_PMEM(wc)) { 89662306a36Sopenharmony_ci writecache_wait_for_ios(wc, READ); 89762306a36Sopenharmony_ci writecache_wait_for_ios(wc, WRITE); 89862306a36Sopenharmony_ci } 89962306a36Sopenharmony_ci discarded_something = true; 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci if (!writecache_entry_is_committed(wc, e)) 90262306a36Sopenharmony_ci wc->uncommitted_blocks--; 90362306a36Sopenharmony_ci writecache_free_entry(wc, e); 90462306a36Sopenharmony_ci } 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci if (unlikely(!node)) 90762306a36Sopenharmony_ci break; 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci e = container_of(node, struct wc_entry, rb_node); 91062306a36Sopenharmony_ci } 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci if (discarded_something) 91362306a36Sopenharmony_ci writecache_commit_flushed(wc, false); 91462306a36Sopenharmony_ci} 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_cistatic bool writecache_wait_for_writeback(struct dm_writecache *wc) 91762306a36Sopenharmony_ci{ 91862306a36Sopenharmony_ci if (wc->writeback_size) { 91962306a36Sopenharmony_ci writecache_wait_on_freelist(wc); 92062306a36Sopenharmony_ci return true; 92162306a36Sopenharmony_ci } 92262306a36Sopenharmony_ci return false; 92362306a36Sopenharmony_ci} 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_cistatic void writecache_suspend(struct dm_target *ti) 92662306a36Sopenharmony_ci{ 92762306a36Sopenharmony_ci struct dm_writecache *wc = ti->private; 92862306a36Sopenharmony_ci bool flush_on_suspend; 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci del_timer_sync(&wc->autocommit_timer); 93162306a36Sopenharmony_ci del_timer_sync(&wc->max_age_timer); 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_ci wc_lock(wc); 93462306a36Sopenharmony_ci writecache_flush(wc); 93562306a36Sopenharmony_ci flush_on_suspend = wc->flush_on_suspend; 93662306a36Sopenharmony_ci if (flush_on_suspend) { 93762306a36Sopenharmony_ci wc->flush_on_suspend = false; 93862306a36Sopenharmony_ci wc->writeback_all++; 93962306a36Sopenharmony_ci queue_work(wc->writeback_wq, &wc->writeback_work); 94062306a36Sopenharmony_ci } 94162306a36Sopenharmony_ci wc_unlock(wc); 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci drain_workqueue(wc->writeback_wq); 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ci wc_lock(wc); 94662306a36Sopenharmony_ci if (flush_on_suspend) 94762306a36Sopenharmony_ci wc->writeback_all--; 94862306a36Sopenharmony_ci while (writecache_wait_for_writeback(wc)) 94962306a36Sopenharmony_ci ; 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) 95262306a36Sopenharmony_ci persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size); 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci writecache_poison_lists(wc); 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci wc_unlock(wc); 95762306a36Sopenharmony_ci} 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_cistatic int writecache_alloc_entries(struct dm_writecache *wc) 96062306a36Sopenharmony_ci{ 96162306a36Sopenharmony_ci size_t b; 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci if (wc->entries) 96462306a36Sopenharmony_ci return 0; 96562306a36Sopenharmony_ci wc->entries = vmalloc(array_size(sizeof(struct wc_entry), wc->n_blocks)); 96662306a36Sopenharmony_ci if (!wc->entries) 96762306a36Sopenharmony_ci return -ENOMEM; 96862306a36Sopenharmony_ci for (b = 0; b < wc->n_blocks; b++) { 96962306a36Sopenharmony_ci struct wc_entry *e = &wc->entries[b]; 97062306a36Sopenharmony_ci 97162306a36Sopenharmony_ci e->index = b; 97262306a36Sopenharmony_ci e->write_in_progress = false; 97362306a36Sopenharmony_ci cond_resched(); 97462306a36Sopenharmony_ci } 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci return 0; 97762306a36Sopenharmony_ci} 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_cistatic int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors) 98062306a36Sopenharmony_ci{ 98162306a36Sopenharmony_ci struct dm_io_region region; 98262306a36Sopenharmony_ci struct dm_io_request req; 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_ci region.bdev = wc->ssd_dev->bdev; 98562306a36Sopenharmony_ci region.sector = wc->start_sector; 98662306a36Sopenharmony_ci region.count = n_sectors; 98762306a36Sopenharmony_ci req.bi_opf = REQ_OP_READ | REQ_SYNC; 98862306a36Sopenharmony_ci req.mem.type = DM_IO_VMA; 98962306a36Sopenharmony_ci req.mem.ptr.vma = (char *)wc->memory_map; 99062306a36Sopenharmony_ci req.client = wc->dm_io; 99162306a36Sopenharmony_ci req.notify.fn = NULL; 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci return dm_io(&req, 1, ®ion, NULL, IOPRIO_DEFAULT); 99462306a36Sopenharmony_ci} 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_cistatic void writecache_resume(struct dm_target *ti) 99762306a36Sopenharmony_ci{ 99862306a36Sopenharmony_ci struct dm_writecache *wc = ti->private; 99962306a36Sopenharmony_ci size_t b; 100062306a36Sopenharmony_ci bool need_flush = false; 100162306a36Sopenharmony_ci __le64 sb_seq_count; 100262306a36Sopenharmony_ci int r; 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci wc_lock(wc); 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_ci wc->data_device_sectors = bdev_nr_sectors(wc->dev->bdev); 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) { 100962306a36Sopenharmony_ci persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size); 101062306a36Sopenharmony_ci } else { 101162306a36Sopenharmony_ci r = writecache_read_metadata(wc, wc->metadata_sectors); 101262306a36Sopenharmony_ci if (r) { 101362306a36Sopenharmony_ci size_t sb_entries_offset; 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci writecache_error(wc, r, "unable to read metadata: %d", r); 101662306a36Sopenharmony_ci sb_entries_offset = offsetof(struct wc_memory_superblock, entries); 101762306a36Sopenharmony_ci memset((char *)wc->memory_map + sb_entries_offset, -1, 101862306a36Sopenharmony_ci (wc->metadata_sectors << SECTOR_SHIFT) - sb_entries_offset); 101962306a36Sopenharmony_ci } 102062306a36Sopenharmony_ci } 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci wc->tree = RB_ROOT; 102362306a36Sopenharmony_ci INIT_LIST_HEAD(&wc->lru); 102462306a36Sopenharmony_ci if (WC_MODE_SORT_FREELIST(wc)) { 102562306a36Sopenharmony_ci wc->freetree = RB_ROOT; 102662306a36Sopenharmony_ci wc->current_free = NULL; 102762306a36Sopenharmony_ci } else { 102862306a36Sopenharmony_ci INIT_LIST_HEAD(&wc->freelist); 102962306a36Sopenharmony_ci } 103062306a36Sopenharmony_ci wc->freelist_size = 0; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count, 103362306a36Sopenharmony_ci sizeof(uint64_t)); 103462306a36Sopenharmony_ci if (r) { 103562306a36Sopenharmony_ci writecache_error(wc, r, "hardware memory error when reading superblock: %d", r); 103662306a36Sopenharmony_ci sb_seq_count = cpu_to_le64(0); 103762306a36Sopenharmony_ci } 103862306a36Sopenharmony_ci wc->seq_count = le64_to_cpu(sb_seq_count); 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS 104162306a36Sopenharmony_ci for (b = 0; b < wc->n_blocks; b++) { 104262306a36Sopenharmony_ci struct wc_entry *e = &wc->entries[b]; 104362306a36Sopenharmony_ci struct wc_memory_entry wme; 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci if (writecache_has_error(wc)) { 104662306a36Sopenharmony_ci e->original_sector = -1; 104762306a36Sopenharmony_ci e->seq_count = -1; 104862306a36Sopenharmony_ci continue; 104962306a36Sopenharmony_ci } 105062306a36Sopenharmony_ci r = copy_mc_to_kernel(&wme, memory_entry(wc, e), 105162306a36Sopenharmony_ci sizeof(struct wc_memory_entry)); 105262306a36Sopenharmony_ci if (r) { 105362306a36Sopenharmony_ci writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d", 105462306a36Sopenharmony_ci (unsigned long)b, r); 105562306a36Sopenharmony_ci e->original_sector = -1; 105662306a36Sopenharmony_ci e->seq_count = -1; 105762306a36Sopenharmony_ci } else { 105862306a36Sopenharmony_ci e->original_sector = le64_to_cpu(wme.original_sector); 105962306a36Sopenharmony_ci e->seq_count = le64_to_cpu(wme.seq_count); 106062306a36Sopenharmony_ci } 106162306a36Sopenharmony_ci cond_resched(); 106262306a36Sopenharmony_ci } 106362306a36Sopenharmony_ci#endif 106462306a36Sopenharmony_ci for (b = 0; b < wc->n_blocks; b++) { 106562306a36Sopenharmony_ci struct wc_entry *e = &wc->entries[b]; 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci if (!writecache_entry_is_committed(wc, e)) { 106862306a36Sopenharmony_ci if (read_seq_count(wc, e) != -1) { 106962306a36Sopenharmony_cierase_this: 107062306a36Sopenharmony_ci clear_seq_count(wc, e); 107162306a36Sopenharmony_ci need_flush = true; 107262306a36Sopenharmony_ci } 107362306a36Sopenharmony_ci writecache_add_to_freelist(wc, e); 107462306a36Sopenharmony_ci } else { 107562306a36Sopenharmony_ci struct wc_entry *old; 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci old = writecache_find_entry(wc, read_original_sector(wc, e), 0); 107862306a36Sopenharmony_ci if (!old) { 107962306a36Sopenharmony_ci writecache_insert_entry(wc, e); 108062306a36Sopenharmony_ci } else { 108162306a36Sopenharmony_ci if (read_seq_count(wc, old) == read_seq_count(wc, e)) { 108262306a36Sopenharmony_ci writecache_error(wc, -EINVAL, 108362306a36Sopenharmony_ci "two identical entries, position %llu, sector %llu, sequence %llu", 108462306a36Sopenharmony_ci (unsigned long long)b, (unsigned long long)read_original_sector(wc, e), 108562306a36Sopenharmony_ci (unsigned long long)read_seq_count(wc, e)); 108662306a36Sopenharmony_ci } 108762306a36Sopenharmony_ci if (read_seq_count(wc, old) > read_seq_count(wc, e)) { 108862306a36Sopenharmony_ci goto erase_this; 108962306a36Sopenharmony_ci } else { 109062306a36Sopenharmony_ci writecache_free_entry(wc, old); 109162306a36Sopenharmony_ci writecache_insert_entry(wc, e); 109262306a36Sopenharmony_ci need_flush = true; 109362306a36Sopenharmony_ci } 109462306a36Sopenharmony_ci } 109562306a36Sopenharmony_ci } 109662306a36Sopenharmony_ci cond_resched(); 109762306a36Sopenharmony_ci } 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci if (need_flush) { 110062306a36Sopenharmony_ci writecache_flush_all_metadata(wc); 110162306a36Sopenharmony_ci writecache_commit_flushed(wc, false); 110262306a36Sopenharmony_ci } 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_ci writecache_verify_watermark(wc); 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci if (wc->max_age != MAX_AGE_UNSPECIFIED) 110762306a36Sopenharmony_ci mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV); 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci wc_unlock(wc); 111062306a36Sopenharmony_ci} 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_cistatic int process_flush_mesg(unsigned int argc, char **argv, struct dm_writecache *wc) 111362306a36Sopenharmony_ci{ 111462306a36Sopenharmony_ci if (argc != 1) 111562306a36Sopenharmony_ci return -EINVAL; 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci wc_lock(wc); 111862306a36Sopenharmony_ci if (dm_suspended(wc->ti)) { 111962306a36Sopenharmony_ci wc_unlock(wc); 112062306a36Sopenharmony_ci return -EBUSY; 112162306a36Sopenharmony_ci } 112262306a36Sopenharmony_ci if (writecache_has_error(wc)) { 112362306a36Sopenharmony_ci wc_unlock(wc); 112462306a36Sopenharmony_ci return -EIO; 112562306a36Sopenharmony_ci } 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_ci writecache_flush(wc); 112862306a36Sopenharmony_ci wc->writeback_all++; 112962306a36Sopenharmony_ci queue_work(wc->writeback_wq, &wc->writeback_work); 113062306a36Sopenharmony_ci wc_unlock(wc); 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci flush_workqueue(wc->writeback_wq); 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_ci wc_lock(wc); 113562306a36Sopenharmony_ci wc->writeback_all--; 113662306a36Sopenharmony_ci if (writecache_has_error(wc)) { 113762306a36Sopenharmony_ci wc_unlock(wc); 113862306a36Sopenharmony_ci return -EIO; 113962306a36Sopenharmony_ci } 114062306a36Sopenharmony_ci wc_unlock(wc); 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci return 0; 114362306a36Sopenharmony_ci} 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_cistatic int process_flush_on_suspend_mesg(unsigned int argc, char **argv, struct dm_writecache *wc) 114662306a36Sopenharmony_ci{ 114762306a36Sopenharmony_ci if (argc != 1) 114862306a36Sopenharmony_ci return -EINVAL; 114962306a36Sopenharmony_ci 115062306a36Sopenharmony_ci wc_lock(wc); 115162306a36Sopenharmony_ci wc->flush_on_suspend = true; 115262306a36Sopenharmony_ci wc_unlock(wc); 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci return 0; 115562306a36Sopenharmony_ci} 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_cistatic void activate_cleaner(struct dm_writecache *wc) 115862306a36Sopenharmony_ci{ 115962306a36Sopenharmony_ci wc->flush_on_suspend = true; 116062306a36Sopenharmony_ci wc->cleaner = true; 116162306a36Sopenharmony_ci wc->freelist_high_watermark = wc->n_blocks; 116262306a36Sopenharmony_ci wc->freelist_low_watermark = wc->n_blocks; 116362306a36Sopenharmony_ci} 116462306a36Sopenharmony_ci 116562306a36Sopenharmony_cistatic int process_cleaner_mesg(unsigned int argc, char **argv, struct dm_writecache *wc) 116662306a36Sopenharmony_ci{ 116762306a36Sopenharmony_ci if (argc != 1) 116862306a36Sopenharmony_ci return -EINVAL; 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci wc_lock(wc); 117162306a36Sopenharmony_ci activate_cleaner(wc); 117262306a36Sopenharmony_ci if (!dm_suspended(wc->ti)) 117362306a36Sopenharmony_ci writecache_verify_watermark(wc); 117462306a36Sopenharmony_ci wc_unlock(wc); 117562306a36Sopenharmony_ci 117662306a36Sopenharmony_ci return 0; 117762306a36Sopenharmony_ci} 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_cistatic int process_clear_stats_mesg(unsigned int argc, char **argv, struct dm_writecache *wc) 118062306a36Sopenharmony_ci{ 118162306a36Sopenharmony_ci if (argc != 1) 118262306a36Sopenharmony_ci return -EINVAL; 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci wc_lock(wc); 118562306a36Sopenharmony_ci memset(&wc->stats, 0, sizeof(wc->stats)); 118662306a36Sopenharmony_ci wc_unlock(wc); 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci return 0; 118962306a36Sopenharmony_ci} 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_cistatic int writecache_message(struct dm_target *ti, unsigned int argc, char **argv, 119262306a36Sopenharmony_ci char *result, unsigned int maxlen) 119362306a36Sopenharmony_ci{ 119462306a36Sopenharmony_ci int r = -EINVAL; 119562306a36Sopenharmony_ci struct dm_writecache *wc = ti->private; 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci if (!strcasecmp(argv[0], "flush")) 119862306a36Sopenharmony_ci r = process_flush_mesg(argc, argv, wc); 119962306a36Sopenharmony_ci else if (!strcasecmp(argv[0], "flush_on_suspend")) 120062306a36Sopenharmony_ci r = process_flush_on_suspend_mesg(argc, argv, wc); 120162306a36Sopenharmony_ci else if (!strcasecmp(argv[0], "cleaner")) 120262306a36Sopenharmony_ci r = process_cleaner_mesg(argc, argv, wc); 120362306a36Sopenharmony_ci else if (!strcasecmp(argv[0], "clear_stats")) 120462306a36Sopenharmony_ci r = process_clear_stats_mesg(argc, argv, wc); 120562306a36Sopenharmony_ci else 120662306a36Sopenharmony_ci DMERR("unrecognised message received: %s", argv[0]); 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci return r; 120962306a36Sopenharmony_ci} 121062306a36Sopenharmony_ci 121162306a36Sopenharmony_cistatic void memcpy_flushcache_optimized(void *dest, void *source, size_t size) 121262306a36Sopenharmony_ci{ 121362306a36Sopenharmony_ci /* 121462306a36Sopenharmony_ci * clflushopt performs better with block size 1024, 2048, 4096 121562306a36Sopenharmony_ci * non-temporal stores perform better with block size 512 121662306a36Sopenharmony_ci * 121762306a36Sopenharmony_ci * block size 512 1024 2048 4096 121862306a36Sopenharmony_ci * movnti 496 MB/s 642 MB/s 725 MB/s 744 MB/s 121962306a36Sopenharmony_ci * clflushopt 373 MB/s 688 MB/s 1.1 GB/s 1.2 GB/s 122062306a36Sopenharmony_ci * 122162306a36Sopenharmony_ci * We see that movnti performs better for 512-byte blocks, and 122262306a36Sopenharmony_ci * clflushopt performs better for 1024-byte and larger blocks. So, we 122362306a36Sopenharmony_ci * prefer clflushopt for sizes >= 768. 122462306a36Sopenharmony_ci * 122562306a36Sopenharmony_ci * NOTE: this happens to be the case now (with dm-writecache's single 122662306a36Sopenharmony_ci * threaded model) but re-evaluate this once memcpy_flushcache() is 122762306a36Sopenharmony_ci * enabled to use movdir64b which might invalidate this performance 122862306a36Sopenharmony_ci * advantage seen with cache-allocating-writes plus flushing. 122962306a36Sopenharmony_ci */ 123062306a36Sopenharmony_ci#ifdef CONFIG_X86 123162306a36Sopenharmony_ci if (static_cpu_has(X86_FEATURE_CLFLUSHOPT) && 123262306a36Sopenharmony_ci likely(boot_cpu_data.x86_clflush_size == 64) && 123362306a36Sopenharmony_ci likely(size >= 768)) { 123462306a36Sopenharmony_ci do { 123562306a36Sopenharmony_ci memcpy((void *)dest, (void *)source, 64); 123662306a36Sopenharmony_ci clflushopt((void *)dest); 123762306a36Sopenharmony_ci dest += 64; 123862306a36Sopenharmony_ci source += 64; 123962306a36Sopenharmony_ci size -= 64; 124062306a36Sopenharmony_ci } while (size >= 64); 124162306a36Sopenharmony_ci return; 124262306a36Sopenharmony_ci } 124362306a36Sopenharmony_ci#endif 124462306a36Sopenharmony_ci memcpy_flushcache(dest, source, size); 124562306a36Sopenharmony_ci} 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_cistatic void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data) 124862306a36Sopenharmony_ci{ 124962306a36Sopenharmony_ci void *buf; 125062306a36Sopenharmony_ci unsigned int size; 125162306a36Sopenharmony_ci int rw = bio_data_dir(bio); 125262306a36Sopenharmony_ci unsigned int remaining_size = wc->block_size; 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_ci do { 125562306a36Sopenharmony_ci struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter); 125662306a36Sopenharmony_ci 125762306a36Sopenharmony_ci buf = bvec_kmap_local(&bv); 125862306a36Sopenharmony_ci size = bv.bv_len; 125962306a36Sopenharmony_ci if (unlikely(size > remaining_size)) 126062306a36Sopenharmony_ci size = remaining_size; 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci if (rw == READ) { 126362306a36Sopenharmony_ci int r; 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci r = copy_mc_to_kernel(buf, data, size); 126662306a36Sopenharmony_ci flush_dcache_page(bio_page(bio)); 126762306a36Sopenharmony_ci if (unlikely(r)) { 126862306a36Sopenharmony_ci writecache_error(wc, r, "hardware memory error when reading data: %d", r); 126962306a36Sopenharmony_ci bio->bi_status = BLK_STS_IOERR; 127062306a36Sopenharmony_ci } 127162306a36Sopenharmony_ci } else { 127262306a36Sopenharmony_ci flush_dcache_page(bio_page(bio)); 127362306a36Sopenharmony_ci memcpy_flushcache_optimized(data, buf, size); 127462306a36Sopenharmony_ci } 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci kunmap_local(buf); 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci data = (char *)data + size; 127962306a36Sopenharmony_ci remaining_size -= size; 128062306a36Sopenharmony_ci bio_advance(bio, size); 128162306a36Sopenharmony_ci } while (unlikely(remaining_size)); 128262306a36Sopenharmony_ci} 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_cistatic int writecache_flush_thread(void *data) 128562306a36Sopenharmony_ci{ 128662306a36Sopenharmony_ci struct dm_writecache *wc = data; 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci while (1) { 128962306a36Sopenharmony_ci struct bio *bio; 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci wc_lock(wc); 129262306a36Sopenharmony_ci bio = bio_list_pop(&wc->flush_list); 129362306a36Sopenharmony_ci if (!bio) { 129462306a36Sopenharmony_ci set_current_state(TASK_INTERRUPTIBLE); 129562306a36Sopenharmony_ci wc_unlock(wc); 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ci if (unlikely(kthread_should_stop())) { 129862306a36Sopenharmony_ci set_current_state(TASK_RUNNING); 129962306a36Sopenharmony_ci break; 130062306a36Sopenharmony_ci } 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_ci schedule(); 130362306a36Sopenharmony_ci continue; 130462306a36Sopenharmony_ci } 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_ci if (bio_op(bio) == REQ_OP_DISCARD) { 130762306a36Sopenharmony_ci writecache_discard(wc, bio->bi_iter.bi_sector, 130862306a36Sopenharmony_ci bio_end_sector(bio)); 130962306a36Sopenharmony_ci wc_unlock(wc); 131062306a36Sopenharmony_ci bio_set_dev(bio, wc->dev->bdev); 131162306a36Sopenharmony_ci submit_bio_noacct(bio); 131262306a36Sopenharmony_ci } else { 131362306a36Sopenharmony_ci writecache_flush(wc); 131462306a36Sopenharmony_ci wc_unlock(wc); 131562306a36Sopenharmony_ci if (writecache_has_error(wc)) 131662306a36Sopenharmony_ci bio->bi_status = BLK_STS_IOERR; 131762306a36Sopenharmony_ci bio_endio(bio); 131862306a36Sopenharmony_ci } 131962306a36Sopenharmony_ci } 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ci return 0; 132262306a36Sopenharmony_ci} 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_cistatic void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio) 132562306a36Sopenharmony_ci{ 132662306a36Sopenharmony_ci if (bio_list_empty(&wc->flush_list)) 132762306a36Sopenharmony_ci wake_up_process(wc->flush_thread); 132862306a36Sopenharmony_ci bio_list_add(&wc->flush_list, bio); 132962306a36Sopenharmony_ci} 133062306a36Sopenharmony_ci 133162306a36Sopenharmony_cienum wc_map_op { 133262306a36Sopenharmony_ci WC_MAP_SUBMIT, 133362306a36Sopenharmony_ci WC_MAP_REMAP, 133462306a36Sopenharmony_ci WC_MAP_REMAP_ORIGIN, 133562306a36Sopenharmony_ci WC_MAP_RETURN, 133662306a36Sopenharmony_ci WC_MAP_ERROR, 133762306a36Sopenharmony_ci}; 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_cistatic void writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio, 134062306a36Sopenharmony_ci struct wc_entry *e) 134162306a36Sopenharmony_ci{ 134262306a36Sopenharmony_ci if (e) { 134362306a36Sopenharmony_ci sector_t next_boundary = 134462306a36Sopenharmony_ci read_original_sector(wc, e) - bio->bi_iter.bi_sector; 134562306a36Sopenharmony_ci if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) 134662306a36Sopenharmony_ci dm_accept_partial_bio(bio, next_boundary); 134762306a36Sopenharmony_ci } 134862306a36Sopenharmony_ci} 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_cistatic enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio) 135162306a36Sopenharmony_ci{ 135262306a36Sopenharmony_ci enum wc_map_op map_op; 135362306a36Sopenharmony_ci struct wc_entry *e; 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_ciread_next_block: 135662306a36Sopenharmony_ci wc->stats.reads++; 135762306a36Sopenharmony_ci e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); 135862306a36Sopenharmony_ci if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) { 135962306a36Sopenharmony_ci wc->stats.read_hits++; 136062306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) { 136162306a36Sopenharmony_ci bio_copy_block(wc, bio, memory_data(wc, e)); 136262306a36Sopenharmony_ci if (bio->bi_iter.bi_size) 136362306a36Sopenharmony_ci goto read_next_block; 136462306a36Sopenharmony_ci map_op = WC_MAP_SUBMIT; 136562306a36Sopenharmony_ci } else { 136662306a36Sopenharmony_ci dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); 136762306a36Sopenharmony_ci bio_set_dev(bio, wc->ssd_dev->bdev); 136862306a36Sopenharmony_ci bio->bi_iter.bi_sector = cache_sector(wc, e); 136962306a36Sopenharmony_ci if (!writecache_entry_is_committed(wc, e)) 137062306a36Sopenharmony_ci writecache_wait_for_ios(wc, WRITE); 137162306a36Sopenharmony_ci map_op = WC_MAP_REMAP; 137262306a36Sopenharmony_ci } 137362306a36Sopenharmony_ci } else { 137462306a36Sopenharmony_ci writecache_map_remap_origin(wc, bio, e); 137562306a36Sopenharmony_ci wc->stats.reads += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits; 137662306a36Sopenharmony_ci map_op = WC_MAP_REMAP_ORIGIN; 137762306a36Sopenharmony_ci } 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_ci return map_op; 138062306a36Sopenharmony_ci} 138162306a36Sopenharmony_ci 138262306a36Sopenharmony_cistatic void writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio, 138362306a36Sopenharmony_ci struct wc_entry *e, bool search_used) 138462306a36Sopenharmony_ci{ 138562306a36Sopenharmony_ci unsigned int bio_size = wc->block_size; 138662306a36Sopenharmony_ci sector_t start_cache_sec = cache_sector(wc, e); 138762306a36Sopenharmony_ci sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); 138862306a36Sopenharmony_ci 138962306a36Sopenharmony_ci while (bio_size < bio->bi_iter.bi_size) { 139062306a36Sopenharmony_ci if (!search_used) { 139162306a36Sopenharmony_ci struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ci if (!f) 139462306a36Sopenharmony_ci break; 139562306a36Sopenharmony_ci write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + 139662306a36Sopenharmony_ci (bio_size >> SECTOR_SHIFT), wc->seq_count); 139762306a36Sopenharmony_ci writecache_insert_entry(wc, f); 139862306a36Sopenharmony_ci wc->uncommitted_blocks++; 139962306a36Sopenharmony_ci } else { 140062306a36Sopenharmony_ci struct wc_entry *f; 140162306a36Sopenharmony_ci struct rb_node *next = rb_next(&e->rb_node); 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci if (!next) 140462306a36Sopenharmony_ci break; 140562306a36Sopenharmony_ci f = container_of(next, struct wc_entry, rb_node); 140662306a36Sopenharmony_ci if (f != e + 1) 140762306a36Sopenharmony_ci break; 140862306a36Sopenharmony_ci if (read_original_sector(wc, f) != 140962306a36Sopenharmony_ci read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) 141062306a36Sopenharmony_ci break; 141162306a36Sopenharmony_ci if (unlikely(f->write_in_progress)) 141262306a36Sopenharmony_ci break; 141362306a36Sopenharmony_ci if (writecache_entry_is_committed(wc, f)) 141462306a36Sopenharmony_ci wc->overwrote_committed = true; 141562306a36Sopenharmony_ci e = f; 141662306a36Sopenharmony_ci } 141762306a36Sopenharmony_ci bio_size += wc->block_size; 141862306a36Sopenharmony_ci current_cache_sec += wc->block_size >> SECTOR_SHIFT; 141962306a36Sopenharmony_ci } 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_ci bio_set_dev(bio, wc->ssd_dev->bdev); 142262306a36Sopenharmony_ci bio->bi_iter.bi_sector = start_cache_sec; 142362306a36Sopenharmony_ci dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); 142462306a36Sopenharmony_ci 142562306a36Sopenharmony_ci wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits; 142662306a36Sopenharmony_ci wc->stats.writes_allocate += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits; 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { 142962306a36Sopenharmony_ci wc->uncommitted_blocks = 0; 143062306a36Sopenharmony_ci queue_work(wc->writeback_wq, &wc->flush_work); 143162306a36Sopenharmony_ci } else { 143262306a36Sopenharmony_ci writecache_schedule_autocommit(wc); 143362306a36Sopenharmony_ci } 143462306a36Sopenharmony_ci} 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_cistatic enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio) 143762306a36Sopenharmony_ci{ 143862306a36Sopenharmony_ci struct wc_entry *e; 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci do { 144162306a36Sopenharmony_ci bool found_entry = false; 144262306a36Sopenharmony_ci bool search_used = false; 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ci if (writecache_has_error(wc)) { 144562306a36Sopenharmony_ci wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits; 144662306a36Sopenharmony_ci return WC_MAP_ERROR; 144762306a36Sopenharmony_ci } 144862306a36Sopenharmony_ci e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); 144962306a36Sopenharmony_ci if (e) { 145062306a36Sopenharmony_ci if (!writecache_entry_is_committed(wc, e)) { 145162306a36Sopenharmony_ci wc->stats.write_hits_uncommitted++; 145262306a36Sopenharmony_ci search_used = true; 145362306a36Sopenharmony_ci goto bio_copy; 145462306a36Sopenharmony_ci } 145562306a36Sopenharmony_ci wc->stats.write_hits_committed++; 145662306a36Sopenharmony_ci if (!WC_MODE_PMEM(wc) && !e->write_in_progress) { 145762306a36Sopenharmony_ci wc->overwrote_committed = true; 145862306a36Sopenharmony_ci search_used = true; 145962306a36Sopenharmony_ci goto bio_copy; 146062306a36Sopenharmony_ci } 146162306a36Sopenharmony_ci found_entry = true; 146262306a36Sopenharmony_ci } else { 146362306a36Sopenharmony_ci if (unlikely(wc->cleaner) || 146462306a36Sopenharmony_ci (wc->metadata_only && !(bio->bi_opf & REQ_META))) 146562306a36Sopenharmony_ci goto direct_write; 146662306a36Sopenharmony_ci } 146762306a36Sopenharmony_ci e = writecache_pop_from_freelist(wc, (sector_t)-1); 146862306a36Sopenharmony_ci if (unlikely(!e)) { 146962306a36Sopenharmony_ci if (!WC_MODE_PMEM(wc) && !found_entry) { 147062306a36Sopenharmony_cidirect_write: 147162306a36Sopenharmony_ci e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); 147262306a36Sopenharmony_ci writecache_map_remap_origin(wc, bio, e); 147362306a36Sopenharmony_ci wc->stats.writes_around += bio->bi_iter.bi_size >> wc->block_size_bits; 147462306a36Sopenharmony_ci wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits; 147562306a36Sopenharmony_ci return WC_MAP_REMAP_ORIGIN; 147662306a36Sopenharmony_ci } 147762306a36Sopenharmony_ci wc->stats.writes_blocked_on_freelist++; 147862306a36Sopenharmony_ci writecache_wait_on_freelist(wc); 147962306a36Sopenharmony_ci continue; 148062306a36Sopenharmony_ci } 148162306a36Sopenharmony_ci write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count); 148262306a36Sopenharmony_ci writecache_insert_entry(wc, e); 148362306a36Sopenharmony_ci wc->uncommitted_blocks++; 148462306a36Sopenharmony_ci wc->stats.writes_allocate++; 148562306a36Sopenharmony_cibio_copy: 148662306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) { 148762306a36Sopenharmony_ci bio_copy_block(wc, bio, memory_data(wc, e)); 148862306a36Sopenharmony_ci wc->stats.writes++; 148962306a36Sopenharmony_ci } else { 149062306a36Sopenharmony_ci writecache_bio_copy_ssd(wc, bio, e, search_used); 149162306a36Sopenharmony_ci return WC_MAP_REMAP; 149262306a36Sopenharmony_ci } 149362306a36Sopenharmony_ci } while (bio->bi_iter.bi_size); 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_ci if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks)) 149662306a36Sopenharmony_ci writecache_flush(wc); 149762306a36Sopenharmony_ci else 149862306a36Sopenharmony_ci writecache_schedule_autocommit(wc); 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci return WC_MAP_SUBMIT; 150162306a36Sopenharmony_ci} 150262306a36Sopenharmony_ci 150362306a36Sopenharmony_cistatic enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio *bio) 150462306a36Sopenharmony_ci{ 150562306a36Sopenharmony_ci if (writecache_has_error(wc)) 150662306a36Sopenharmony_ci return WC_MAP_ERROR; 150762306a36Sopenharmony_ci 150862306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) { 150962306a36Sopenharmony_ci wc->stats.flushes++; 151062306a36Sopenharmony_ci writecache_flush(wc); 151162306a36Sopenharmony_ci if (writecache_has_error(wc)) 151262306a36Sopenharmony_ci return WC_MAP_ERROR; 151362306a36Sopenharmony_ci else if (unlikely(wc->cleaner) || unlikely(wc->metadata_only)) 151462306a36Sopenharmony_ci return WC_MAP_REMAP_ORIGIN; 151562306a36Sopenharmony_ci return WC_MAP_SUBMIT; 151662306a36Sopenharmony_ci } 151762306a36Sopenharmony_ci /* SSD: */ 151862306a36Sopenharmony_ci if (dm_bio_get_target_bio_nr(bio)) 151962306a36Sopenharmony_ci return WC_MAP_REMAP_ORIGIN; 152062306a36Sopenharmony_ci wc->stats.flushes++; 152162306a36Sopenharmony_ci writecache_offload_bio(wc, bio); 152262306a36Sopenharmony_ci return WC_MAP_RETURN; 152362306a36Sopenharmony_ci} 152462306a36Sopenharmony_ci 152562306a36Sopenharmony_cistatic enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio) 152662306a36Sopenharmony_ci{ 152762306a36Sopenharmony_ci wc->stats.discards += bio->bi_iter.bi_size >> wc->block_size_bits; 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci if (writecache_has_error(wc)) 153062306a36Sopenharmony_ci return WC_MAP_ERROR; 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) { 153362306a36Sopenharmony_ci writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio)); 153462306a36Sopenharmony_ci return WC_MAP_REMAP_ORIGIN; 153562306a36Sopenharmony_ci } 153662306a36Sopenharmony_ci /* SSD: */ 153762306a36Sopenharmony_ci writecache_offload_bio(wc, bio); 153862306a36Sopenharmony_ci return WC_MAP_RETURN; 153962306a36Sopenharmony_ci} 154062306a36Sopenharmony_ci 154162306a36Sopenharmony_cistatic int writecache_map(struct dm_target *ti, struct bio *bio) 154262306a36Sopenharmony_ci{ 154362306a36Sopenharmony_ci struct dm_writecache *wc = ti->private; 154462306a36Sopenharmony_ci enum wc_map_op map_op; 154562306a36Sopenharmony_ci 154662306a36Sopenharmony_ci bio->bi_private = NULL; 154762306a36Sopenharmony_ci 154862306a36Sopenharmony_ci wc_lock(wc); 154962306a36Sopenharmony_ci 155062306a36Sopenharmony_ci if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 155162306a36Sopenharmony_ci map_op = writecache_map_flush(wc, bio); 155262306a36Sopenharmony_ci goto done; 155362306a36Sopenharmony_ci } 155462306a36Sopenharmony_ci 155562306a36Sopenharmony_ci bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ci if (unlikely((((unsigned int)bio->bi_iter.bi_sector | bio_sectors(bio)) & 155862306a36Sopenharmony_ci (wc->block_size / 512 - 1)) != 0)) { 155962306a36Sopenharmony_ci DMERR("I/O is not aligned, sector %llu, size %u, block size %u", 156062306a36Sopenharmony_ci (unsigned long long)bio->bi_iter.bi_sector, 156162306a36Sopenharmony_ci bio->bi_iter.bi_size, wc->block_size); 156262306a36Sopenharmony_ci map_op = WC_MAP_ERROR; 156362306a36Sopenharmony_ci goto done; 156462306a36Sopenharmony_ci } 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { 156762306a36Sopenharmony_ci map_op = writecache_map_discard(wc, bio); 156862306a36Sopenharmony_ci goto done; 156962306a36Sopenharmony_ci } 157062306a36Sopenharmony_ci 157162306a36Sopenharmony_ci if (bio_data_dir(bio) == READ) 157262306a36Sopenharmony_ci map_op = writecache_map_read(wc, bio); 157362306a36Sopenharmony_ci else 157462306a36Sopenharmony_ci map_op = writecache_map_write(wc, bio); 157562306a36Sopenharmony_cidone: 157662306a36Sopenharmony_ci switch (map_op) { 157762306a36Sopenharmony_ci case WC_MAP_REMAP_ORIGIN: 157862306a36Sopenharmony_ci if (likely(wc->pause != 0)) { 157962306a36Sopenharmony_ci if (bio_op(bio) == REQ_OP_WRITE) { 158062306a36Sopenharmony_ci dm_iot_io_begin(&wc->iot, 1); 158162306a36Sopenharmony_ci bio->bi_private = (void *)2; 158262306a36Sopenharmony_ci } 158362306a36Sopenharmony_ci } 158462306a36Sopenharmony_ci bio_set_dev(bio, wc->dev->bdev); 158562306a36Sopenharmony_ci wc_unlock(wc); 158662306a36Sopenharmony_ci return DM_MAPIO_REMAPPED; 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci case WC_MAP_REMAP: 158962306a36Sopenharmony_ci /* make sure that writecache_end_io decrements bio_in_progress: */ 159062306a36Sopenharmony_ci bio->bi_private = (void *)1; 159162306a36Sopenharmony_ci atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); 159262306a36Sopenharmony_ci wc_unlock(wc); 159362306a36Sopenharmony_ci return DM_MAPIO_REMAPPED; 159462306a36Sopenharmony_ci 159562306a36Sopenharmony_ci case WC_MAP_SUBMIT: 159662306a36Sopenharmony_ci wc_unlock(wc); 159762306a36Sopenharmony_ci bio_endio(bio); 159862306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 159962306a36Sopenharmony_ci 160062306a36Sopenharmony_ci case WC_MAP_RETURN: 160162306a36Sopenharmony_ci wc_unlock(wc); 160262306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 160362306a36Sopenharmony_ci 160462306a36Sopenharmony_ci case WC_MAP_ERROR: 160562306a36Sopenharmony_ci wc_unlock(wc); 160662306a36Sopenharmony_ci bio_io_error(bio); 160762306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 160862306a36Sopenharmony_ci 160962306a36Sopenharmony_ci default: 161062306a36Sopenharmony_ci BUG(); 161162306a36Sopenharmony_ci wc_unlock(wc); 161262306a36Sopenharmony_ci return DM_MAPIO_KILL; 161362306a36Sopenharmony_ci } 161462306a36Sopenharmony_ci} 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_cistatic int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status) 161762306a36Sopenharmony_ci{ 161862306a36Sopenharmony_ci struct dm_writecache *wc = ti->private; 161962306a36Sopenharmony_ci 162062306a36Sopenharmony_ci if (bio->bi_private == (void *)1) { 162162306a36Sopenharmony_ci int dir = bio_data_dir(bio); 162262306a36Sopenharmony_ci 162362306a36Sopenharmony_ci if (atomic_dec_and_test(&wc->bio_in_progress[dir])) 162462306a36Sopenharmony_ci if (unlikely(waitqueue_active(&wc->bio_in_progress_wait[dir]))) 162562306a36Sopenharmony_ci wake_up(&wc->bio_in_progress_wait[dir]); 162662306a36Sopenharmony_ci } else if (bio->bi_private == (void *)2) { 162762306a36Sopenharmony_ci dm_iot_io_end(&wc->iot, 1); 162862306a36Sopenharmony_ci } 162962306a36Sopenharmony_ci return 0; 163062306a36Sopenharmony_ci} 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_cistatic int writecache_iterate_devices(struct dm_target *ti, 163362306a36Sopenharmony_ci iterate_devices_callout_fn fn, void *data) 163462306a36Sopenharmony_ci{ 163562306a36Sopenharmony_ci struct dm_writecache *wc = ti->private; 163662306a36Sopenharmony_ci 163762306a36Sopenharmony_ci return fn(ti, wc->dev, 0, ti->len, data); 163862306a36Sopenharmony_ci} 163962306a36Sopenharmony_ci 164062306a36Sopenharmony_cistatic void writecache_io_hints(struct dm_target *ti, struct queue_limits *limits) 164162306a36Sopenharmony_ci{ 164262306a36Sopenharmony_ci struct dm_writecache *wc = ti->private; 164362306a36Sopenharmony_ci 164462306a36Sopenharmony_ci if (limits->logical_block_size < wc->block_size) 164562306a36Sopenharmony_ci limits->logical_block_size = wc->block_size; 164662306a36Sopenharmony_ci 164762306a36Sopenharmony_ci if (limits->physical_block_size < wc->block_size) 164862306a36Sopenharmony_ci limits->physical_block_size = wc->block_size; 164962306a36Sopenharmony_ci 165062306a36Sopenharmony_ci if (limits->io_min < wc->block_size) 165162306a36Sopenharmony_ci limits->io_min = wc->block_size; 165262306a36Sopenharmony_ci} 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_cistatic void writecache_writeback_endio(struct bio *bio) 165662306a36Sopenharmony_ci{ 165762306a36Sopenharmony_ci struct writeback_struct *wb = container_of(bio, struct writeback_struct, bio); 165862306a36Sopenharmony_ci struct dm_writecache *wc = wb->wc; 165962306a36Sopenharmony_ci unsigned long flags; 166062306a36Sopenharmony_ci 166162306a36Sopenharmony_ci raw_spin_lock_irqsave(&wc->endio_list_lock, flags); 166262306a36Sopenharmony_ci if (unlikely(list_empty(&wc->endio_list))) 166362306a36Sopenharmony_ci wake_up_process(wc->endio_thread); 166462306a36Sopenharmony_ci list_add_tail(&wb->endio_entry, &wc->endio_list); 166562306a36Sopenharmony_ci raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags); 166662306a36Sopenharmony_ci} 166762306a36Sopenharmony_ci 166862306a36Sopenharmony_cistatic void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr) 166962306a36Sopenharmony_ci{ 167062306a36Sopenharmony_ci struct copy_struct *c = ptr; 167162306a36Sopenharmony_ci struct dm_writecache *wc = c->wc; 167262306a36Sopenharmony_ci 167362306a36Sopenharmony_ci c->error = likely(!(read_err | write_err)) ? 0 : -EIO; 167462306a36Sopenharmony_ci 167562306a36Sopenharmony_ci raw_spin_lock_irq(&wc->endio_list_lock); 167662306a36Sopenharmony_ci if (unlikely(list_empty(&wc->endio_list))) 167762306a36Sopenharmony_ci wake_up_process(wc->endio_thread); 167862306a36Sopenharmony_ci list_add_tail(&c->endio_entry, &wc->endio_list); 167962306a36Sopenharmony_ci raw_spin_unlock_irq(&wc->endio_list_lock); 168062306a36Sopenharmony_ci} 168162306a36Sopenharmony_ci 168262306a36Sopenharmony_cistatic void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list) 168362306a36Sopenharmony_ci{ 168462306a36Sopenharmony_ci unsigned int i; 168562306a36Sopenharmony_ci struct writeback_struct *wb; 168662306a36Sopenharmony_ci struct wc_entry *e; 168762306a36Sopenharmony_ci unsigned long n_walked = 0; 168862306a36Sopenharmony_ci 168962306a36Sopenharmony_ci do { 169062306a36Sopenharmony_ci wb = list_entry(list->next, struct writeback_struct, endio_entry); 169162306a36Sopenharmony_ci list_del(&wb->endio_entry); 169262306a36Sopenharmony_ci 169362306a36Sopenharmony_ci if (unlikely(wb->bio.bi_status != BLK_STS_OK)) 169462306a36Sopenharmony_ci writecache_error(wc, blk_status_to_errno(wb->bio.bi_status), 169562306a36Sopenharmony_ci "write error %d", wb->bio.bi_status); 169662306a36Sopenharmony_ci i = 0; 169762306a36Sopenharmony_ci do { 169862306a36Sopenharmony_ci e = wb->wc_list[i]; 169962306a36Sopenharmony_ci BUG_ON(!e->write_in_progress); 170062306a36Sopenharmony_ci e->write_in_progress = false; 170162306a36Sopenharmony_ci INIT_LIST_HEAD(&e->lru); 170262306a36Sopenharmony_ci if (!writecache_has_error(wc)) 170362306a36Sopenharmony_ci writecache_free_entry(wc, e); 170462306a36Sopenharmony_ci BUG_ON(!wc->writeback_size); 170562306a36Sopenharmony_ci wc->writeback_size--; 170662306a36Sopenharmony_ci n_walked++; 170762306a36Sopenharmony_ci if (unlikely(n_walked >= ENDIO_LATENCY)) { 170862306a36Sopenharmony_ci writecache_commit_flushed(wc, false); 170962306a36Sopenharmony_ci wc_unlock(wc); 171062306a36Sopenharmony_ci wc_lock(wc); 171162306a36Sopenharmony_ci n_walked = 0; 171262306a36Sopenharmony_ci } 171362306a36Sopenharmony_ci } while (++i < wb->wc_list_n); 171462306a36Sopenharmony_ci 171562306a36Sopenharmony_ci if (wb->wc_list != wb->wc_list_inline) 171662306a36Sopenharmony_ci kfree(wb->wc_list); 171762306a36Sopenharmony_ci bio_put(&wb->bio); 171862306a36Sopenharmony_ci } while (!list_empty(list)); 171962306a36Sopenharmony_ci} 172062306a36Sopenharmony_ci 172162306a36Sopenharmony_cistatic void __writecache_endio_ssd(struct dm_writecache *wc, struct list_head *list) 172262306a36Sopenharmony_ci{ 172362306a36Sopenharmony_ci struct copy_struct *c; 172462306a36Sopenharmony_ci struct wc_entry *e; 172562306a36Sopenharmony_ci 172662306a36Sopenharmony_ci do { 172762306a36Sopenharmony_ci c = list_entry(list->next, struct copy_struct, endio_entry); 172862306a36Sopenharmony_ci list_del(&c->endio_entry); 172962306a36Sopenharmony_ci 173062306a36Sopenharmony_ci if (unlikely(c->error)) 173162306a36Sopenharmony_ci writecache_error(wc, c->error, "copy error"); 173262306a36Sopenharmony_ci 173362306a36Sopenharmony_ci e = c->e; 173462306a36Sopenharmony_ci do { 173562306a36Sopenharmony_ci BUG_ON(!e->write_in_progress); 173662306a36Sopenharmony_ci e->write_in_progress = false; 173762306a36Sopenharmony_ci INIT_LIST_HEAD(&e->lru); 173862306a36Sopenharmony_ci if (!writecache_has_error(wc)) 173962306a36Sopenharmony_ci writecache_free_entry(wc, e); 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_ci BUG_ON(!wc->writeback_size); 174262306a36Sopenharmony_ci wc->writeback_size--; 174362306a36Sopenharmony_ci e++; 174462306a36Sopenharmony_ci } while (--c->n_entries); 174562306a36Sopenharmony_ci mempool_free(c, &wc->copy_pool); 174662306a36Sopenharmony_ci } while (!list_empty(list)); 174762306a36Sopenharmony_ci} 174862306a36Sopenharmony_ci 174962306a36Sopenharmony_cistatic int writecache_endio_thread(void *data) 175062306a36Sopenharmony_ci{ 175162306a36Sopenharmony_ci struct dm_writecache *wc = data; 175262306a36Sopenharmony_ci 175362306a36Sopenharmony_ci while (1) { 175462306a36Sopenharmony_ci struct list_head list; 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci raw_spin_lock_irq(&wc->endio_list_lock); 175762306a36Sopenharmony_ci if (!list_empty(&wc->endio_list)) 175862306a36Sopenharmony_ci goto pop_from_list; 175962306a36Sopenharmony_ci set_current_state(TASK_INTERRUPTIBLE); 176062306a36Sopenharmony_ci raw_spin_unlock_irq(&wc->endio_list_lock); 176162306a36Sopenharmony_ci 176262306a36Sopenharmony_ci if (unlikely(kthread_should_stop())) { 176362306a36Sopenharmony_ci set_current_state(TASK_RUNNING); 176462306a36Sopenharmony_ci break; 176562306a36Sopenharmony_ci } 176662306a36Sopenharmony_ci 176762306a36Sopenharmony_ci schedule(); 176862306a36Sopenharmony_ci 176962306a36Sopenharmony_ci continue; 177062306a36Sopenharmony_ci 177162306a36Sopenharmony_cipop_from_list: 177262306a36Sopenharmony_ci list = wc->endio_list; 177362306a36Sopenharmony_ci list.next->prev = list.prev->next = &list; 177462306a36Sopenharmony_ci INIT_LIST_HEAD(&wc->endio_list); 177562306a36Sopenharmony_ci raw_spin_unlock_irq(&wc->endio_list_lock); 177662306a36Sopenharmony_ci 177762306a36Sopenharmony_ci if (!WC_MODE_FUA(wc)) 177862306a36Sopenharmony_ci writecache_disk_flush(wc, wc->dev); 177962306a36Sopenharmony_ci 178062306a36Sopenharmony_ci wc_lock(wc); 178162306a36Sopenharmony_ci 178262306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) { 178362306a36Sopenharmony_ci __writecache_endio_pmem(wc, &list); 178462306a36Sopenharmony_ci } else { 178562306a36Sopenharmony_ci __writecache_endio_ssd(wc, &list); 178662306a36Sopenharmony_ci writecache_wait_for_ios(wc, READ); 178762306a36Sopenharmony_ci } 178862306a36Sopenharmony_ci 178962306a36Sopenharmony_ci writecache_commit_flushed(wc, false); 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_ci wc_unlock(wc); 179262306a36Sopenharmony_ci } 179362306a36Sopenharmony_ci 179462306a36Sopenharmony_ci return 0; 179562306a36Sopenharmony_ci} 179662306a36Sopenharmony_ci 179762306a36Sopenharmony_cistatic bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e) 179862306a36Sopenharmony_ci{ 179962306a36Sopenharmony_ci struct dm_writecache *wc = wb->wc; 180062306a36Sopenharmony_ci unsigned int block_size = wc->block_size; 180162306a36Sopenharmony_ci void *address = memory_data(wc, e); 180262306a36Sopenharmony_ci 180362306a36Sopenharmony_ci persistent_memory_flush_cache(address, block_size); 180462306a36Sopenharmony_ci 180562306a36Sopenharmony_ci if (unlikely(bio_end_sector(&wb->bio) >= wc->data_device_sectors)) 180662306a36Sopenharmony_ci return true; 180762306a36Sopenharmony_ci 180862306a36Sopenharmony_ci return bio_add_page(&wb->bio, persistent_memory_page(address), 180962306a36Sopenharmony_ci block_size, persistent_memory_page_offset(address)) != 0; 181062306a36Sopenharmony_ci} 181162306a36Sopenharmony_ci 181262306a36Sopenharmony_cistruct writeback_list { 181362306a36Sopenharmony_ci struct list_head list; 181462306a36Sopenharmony_ci size_t size; 181562306a36Sopenharmony_ci}; 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_cistatic void __writeback_throttle(struct dm_writecache *wc, struct writeback_list *wbl) 181862306a36Sopenharmony_ci{ 181962306a36Sopenharmony_ci if (unlikely(wc->max_writeback_jobs)) { 182062306a36Sopenharmony_ci if (READ_ONCE(wc->writeback_size) - wbl->size >= wc->max_writeback_jobs) { 182162306a36Sopenharmony_ci wc_lock(wc); 182262306a36Sopenharmony_ci while (wc->writeback_size - wbl->size >= wc->max_writeback_jobs) 182362306a36Sopenharmony_ci writecache_wait_on_freelist(wc); 182462306a36Sopenharmony_ci wc_unlock(wc); 182562306a36Sopenharmony_ci } 182662306a36Sopenharmony_ci } 182762306a36Sopenharmony_ci cond_resched(); 182862306a36Sopenharmony_ci} 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_cistatic void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeback_list *wbl) 183162306a36Sopenharmony_ci{ 183262306a36Sopenharmony_ci struct wc_entry *e, *f; 183362306a36Sopenharmony_ci struct bio *bio; 183462306a36Sopenharmony_ci struct writeback_struct *wb; 183562306a36Sopenharmony_ci unsigned int max_pages; 183662306a36Sopenharmony_ci 183762306a36Sopenharmony_ci while (wbl->size) { 183862306a36Sopenharmony_ci wbl->size--; 183962306a36Sopenharmony_ci e = container_of(wbl->list.prev, struct wc_entry, lru); 184062306a36Sopenharmony_ci list_del(&e->lru); 184162306a36Sopenharmony_ci 184262306a36Sopenharmony_ci max_pages = e->wc_list_contiguous; 184362306a36Sopenharmony_ci 184462306a36Sopenharmony_ci bio = bio_alloc_bioset(wc->dev->bdev, max_pages, REQ_OP_WRITE, 184562306a36Sopenharmony_ci GFP_NOIO, &wc->bio_set); 184662306a36Sopenharmony_ci wb = container_of(bio, struct writeback_struct, bio); 184762306a36Sopenharmony_ci wb->wc = wc; 184862306a36Sopenharmony_ci bio->bi_end_io = writecache_writeback_endio; 184962306a36Sopenharmony_ci bio->bi_iter.bi_sector = read_original_sector(wc, e); 185062306a36Sopenharmony_ci 185162306a36Sopenharmony_ci if (unlikely(max_pages > WB_LIST_INLINE)) 185262306a36Sopenharmony_ci wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *), 185362306a36Sopenharmony_ci GFP_NOIO | __GFP_NORETRY | 185462306a36Sopenharmony_ci __GFP_NOMEMALLOC | __GFP_NOWARN); 185562306a36Sopenharmony_ci 185662306a36Sopenharmony_ci if (likely(max_pages <= WB_LIST_INLINE) || unlikely(!wb->wc_list)) { 185762306a36Sopenharmony_ci wb->wc_list = wb->wc_list_inline; 185862306a36Sopenharmony_ci max_pages = WB_LIST_INLINE; 185962306a36Sopenharmony_ci } 186062306a36Sopenharmony_ci 186162306a36Sopenharmony_ci BUG_ON(!wc_add_block(wb, e)); 186262306a36Sopenharmony_ci 186362306a36Sopenharmony_ci wb->wc_list[0] = e; 186462306a36Sopenharmony_ci wb->wc_list_n = 1; 186562306a36Sopenharmony_ci 186662306a36Sopenharmony_ci while (wbl->size && wb->wc_list_n < max_pages) { 186762306a36Sopenharmony_ci f = container_of(wbl->list.prev, struct wc_entry, lru); 186862306a36Sopenharmony_ci if (read_original_sector(wc, f) != 186962306a36Sopenharmony_ci read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) 187062306a36Sopenharmony_ci break; 187162306a36Sopenharmony_ci if (!wc_add_block(wb, f)) 187262306a36Sopenharmony_ci break; 187362306a36Sopenharmony_ci wbl->size--; 187462306a36Sopenharmony_ci list_del(&f->lru); 187562306a36Sopenharmony_ci wb->wc_list[wb->wc_list_n++] = f; 187662306a36Sopenharmony_ci e = f; 187762306a36Sopenharmony_ci } 187862306a36Sopenharmony_ci if (WC_MODE_FUA(wc)) 187962306a36Sopenharmony_ci bio->bi_opf |= REQ_FUA; 188062306a36Sopenharmony_ci if (writecache_has_error(wc)) { 188162306a36Sopenharmony_ci bio->bi_status = BLK_STS_IOERR; 188262306a36Sopenharmony_ci bio_endio(bio); 188362306a36Sopenharmony_ci } else if (unlikely(!bio_sectors(bio))) { 188462306a36Sopenharmony_ci bio->bi_status = BLK_STS_OK; 188562306a36Sopenharmony_ci bio_endio(bio); 188662306a36Sopenharmony_ci } else { 188762306a36Sopenharmony_ci submit_bio(bio); 188862306a36Sopenharmony_ci } 188962306a36Sopenharmony_ci 189062306a36Sopenharmony_ci __writeback_throttle(wc, wbl); 189162306a36Sopenharmony_ci } 189262306a36Sopenharmony_ci} 189362306a36Sopenharmony_ci 189462306a36Sopenharmony_cistatic void __writecache_writeback_ssd(struct dm_writecache *wc, struct writeback_list *wbl) 189562306a36Sopenharmony_ci{ 189662306a36Sopenharmony_ci struct wc_entry *e, *f; 189762306a36Sopenharmony_ci struct dm_io_region from, to; 189862306a36Sopenharmony_ci struct copy_struct *c; 189962306a36Sopenharmony_ci 190062306a36Sopenharmony_ci while (wbl->size) { 190162306a36Sopenharmony_ci unsigned int n_sectors; 190262306a36Sopenharmony_ci 190362306a36Sopenharmony_ci wbl->size--; 190462306a36Sopenharmony_ci e = container_of(wbl->list.prev, struct wc_entry, lru); 190562306a36Sopenharmony_ci list_del(&e->lru); 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci n_sectors = e->wc_list_contiguous << (wc->block_size_bits - SECTOR_SHIFT); 190862306a36Sopenharmony_ci 190962306a36Sopenharmony_ci from.bdev = wc->ssd_dev->bdev; 191062306a36Sopenharmony_ci from.sector = cache_sector(wc, e); 191162306a36Sopenharmony_ci from.count = n_sectors; 191262306a36Sopenharmony_ci to.bdev = wc->dev->bdev; 191362306a36Sopenharmony_ci to.sector = read_original_sector(wc, e); 191462306a36Sopenharmony_ci to.count = n_sectors; 191562306a36Sopenharmony_ci 191662306a36Sopenharmony_ci c = mempool_alloc(&wc->copy_pool, GFP_NOIO); 191762306a36Sopenharmony_ci c->wc = wc; 191862306a36Sopenharmony_ci c->e = e; 191962306a36Sopenharmony_ci c->n_entries = e->wc_list_contiguous; 192062306a36Sopenharmony_ci 192162306a36Sopenharmony_ci while ((n_sectors -= wc->block_size >> SECTOR_SHIFT)) { 192262306a36Sopenharmony_ci wbl->size--; 192362306a36Sopenharmony_ci f = container_of(wbl->list.prev, struct wc_entry, lru); 192462306a36Sopenharmony_ci BUG_ON(f != e + 1); 192562306a36Sopenharmony_ci list_del(&f->lru); 192662306a36Sopenharmony_ci e = f; 192762306a36Sopenharmony_ci } 192862306a36Sopenharmony_ci 192962306a36Sopenharmony_ci if (unlikely(to.sector + to.count > wc->data_device_sectors)) { 193062306a36Sopenharmony_ci if (to.sector >= wc->data_device_sectors) { 193162306a36Sopenharmony_ci writecache_copy_endio(0, 0, c); 193262306a36Sopenharmony_ci continue; 193362306a36Sopenharmony_ci } 193462306a36Sopenharmony_ci from.count = to.count = wc->data_device_sectors - to.sector; 193562306a36Sopenharmony_ci } 193662306a36Sopenharmony_ci 193762306a36Sopenharmony_ci dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c); 193862306a36Sopenharmony_ci 193962306a36Sopenharmony_ci __writeback_throttle(wc, wbl); 194062306a36Sopenharmony_ci } 194162306a36Sopenharmony_ci} 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_cistatic void writecache_writeback(struct work_struct *work) 194462306a36Sopenharmony_ci{ 194562306a36Sopenharmony_ci struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work); 194662306a36Sopenharmony_ci struct blk_plug plug; 194762306a36Sopenharmony_ci struct wc_entry *f, *g, *e = NULL; 194862306a36Sopenharmony_ci struct rb_node *node, *next_node; 194962306a36Sopenharmony_ci struct list_head skipped; 195062306a36Sopenharmony_ci struct writeback_list wbl; 195162306a36Sopenharmony_ci unsigned long n_walked; 195262306a36Sopenharmony_ci 195362306a36Sopenharmony_ci if (!WC_MODE_PMEM(wc)) { 195462306a36Sopenharmony_ci /* Wait for any active kcopyd work on behalf of ssd writeback */ 195562306a36Sopenharmony_ci dm_kcopyd_client_flush(wc->dm_kcopyd); 195662306a36Sopenharmony_ci } 195762306a36Sopenharmony_ci 195862306a36Sopenharmony_ci if (likely(wc->pause != 0)) { 195962306a36Sopenharmony_ci while (1) { 196062306a36Sopenharmony_ci unsigned long idle; 196162306a36Sopenharmony_ci 196262306a36Sopenharmony_ci if (unlikely(wc->cleaner) || unlikely(wc->writeback_all) || 196362306a36Sopenharmony_ci unlikely(dm_suspended(wc->ti))) 196462306a36Sopenharmony_ci break; 196562306a36Sopenharmony_ci idle = dm_iot_idle_time(&wc->iot); 196662306a36Sopenharmony_ci if (idle >= wc->pause) 196762306a36Sopenharmony_ci break; 196862306a36Sopenharmony_ci idle = wc->pause - idle; 196962306a36Sopenharmony_ci if (idle > HZ) 197062306a36Sopenharmony_ci idle = HZ; 197162306a36Sopenharmony_ci schedule_timeout_idle(idle); 197262306a36Sopenharmony_ci } 197362306a36Sopenharmony_ci } 197462306a36Sopenharmony_ci 197562306a36Sopenharmony_ci wc_lock(wc); 197662306a36Sopenharmony_cirestart: 197762306a36Sopenharmony_ci if (writecache_has_error(wc)) { 197862306a36Sopenharmony_ci wc_unlock(wc); 197962306a36Sopenharmony_ci return; 198062306a36Sopenharmony_ci } 198162306a36Sopenharmony_ci 198262306a36Sopenharmony_ci if (unlikely(wc->writeback_all)) { 198362306a36Sopenharmony_ci if (writecache_wait_for_writeback(wc)) 198462306a36Sopenharmony_ci goto restart; 198562306a36Sopenharmony_ci } 198662306a36Sopenharmony_ci 198762306a36Sopenharmony_ci if (wc->overwrote_committed) 198862306a36Sopenharmony_ci writecache_wait_for_ios(wc, WRITE); 198962306a36Sopenharmony_ci 199062306a36Sopenharmony_ci n_walked = 0; 199162306a36Sopenharmony_ci INIT_LIST_HEAD(&skipped); 199262306a36Sopenharmony_ci INIT_LIST_HEAD(&wbl.list); 199362306a36Sopenharmony_ci wbl.size = 0; 199462306a36Sopenharmony_ci while (!list_empty(&wc->lru) && 199562306a36Sopenharmony_ci (wc->writeback_all || 199662306a36Sopenharmony_ci wc->freelist_size + wc->writeback_size <= wc->freelist_low_watermark || 199762306a36Sopenharmony_ci (jiffies - container_of(wc->lru.prev, struct wc_entry, lru)->age >= 199862306a36Sopenharmony_ci wc->max_age - wc->max_age / MAX_AGE_DIV))) { 199962306a36Sopenharmony_ci 200062306a36Sopenharmony_ci n_walked++; 200162306a36Sopenharmony_ci if (unlikely(n_walked > WRITEBACK_LATENCY) && 200262306a36Sopenharmony_ci likely(!wc->writeback_all)) { 200362306a36Sopenharmony_ci if (likely(!dm_suspended(wc->ti))) 200462306a36Sopenharmony_ci queue_work(wc->writeback_wq, &wc->writeback_work); 200562306a36Sopenharmony_ci break; 200662306a36Sopenharmony_ci } 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_ci if (unlikely(wc->writeback_all)) { 200962306a36Sopenharmony_ci if (unlikely(!e)) { 201062306a36Sopenharmony_ci writecache_flush(wc); 201162306a36Sopenharmony_ci e = container_of(rb_first(&wc->tree), struct wc_entry, rb_node); 201262306a36Sopenharmony_ci } else 201362306a36Sopenharmony_ci e = g; 201462306a36Sopenharmony_ci } else 201562306a36Sopenharmony_ci e = container_of(wc->lru.prev, struct wc_entry, lru); 201662306a36Sopenharmony_ci BUG_ON(e->write_in_progress); 201762306a36Sopenharmony_ci if (unlikely(!writecache_entry_is_committed(wc, e))) 201862306a36Sopenharmony_ci writecache_flush(wc); 201962306a36Sopenharmony_ci 202062306a36Sopenharmony_ci node = rb_prev(&e->rb_node); 202162306a36Sopenharmony_ci if (node) { 202262306a36Sopenharmony_ci f = container_of(node, struct wc_entry, rb_node); 202362306a36Sopenharmony_ci if (unlikely(read_original_sector(wc, f) == 202462306a36Sopenharmony_ci read_original_sector(wc, e))) { 202562306a36Sopenharmony_ci BUG_ON(!f->write_in_progress); 202662306a36Sopenharmony_ci list_move(&e->lru, &skipped); 202762306a36Sopenharmony_ci cond_resched(); 202862306a36Sopenharmony_ci continue; 202962306a36Sopenharmony_ci } 203062306a36Sopenharmony_ci } 203162306a36Sopenharmony_ci wc->writeback_size++; 203262306a36Sopenharmony_ci list_move(&e->lru, &wbl.list); 203362306a36Sopenharmony_ci wbl.size++; 203462306a36Sopenharmony_ci e->write_in_progress = true; 203562306a36Sopenharmony_ci e->wc_list_contiguous = 1; 203662306a36Sopenharmony_ci 203762306a36Sopenharmony_ci f = e; 203862306a36Sopenharmony_ci 203962306a36Sopenharmony_ci while (1) { 204062306a36Sopenharmony_ci next_node = rb_next(&f->rb_node); 204162306a36Sopenharmony_ci if (unlikely(!next_node)) 204262306a36Sopenharmony_ci break; 204362306a36Sopenharmony_ci g = container_of(next_node, struct wc_entry, rb_node); 204462306a36Sopenharmony_ci if (unlikely(read_original_sector(wc, g) == 204562306a36Sopenharmony_ci read_original_sector(wc, f))) { 204662306a36Sopenharmony_ci f = g; 204762306a36Sopenharmony_ci continue; 204862306a36Sopenharmony_ci } 204962306a36Sopenharmony_ci if (read_original_sector(wc, g) != 205062306a36Sopenharmony_ci read_original_sector(wc, f) + (wc->block_size >> SECTOR_SHIFT)) 205162306a36Sopenharmony_ci break; 205262306a36Sopenharmony_ci if (unlikely(g->write_in_progress)) 205362306a36Sopenharmony_ci break; 205462306a36Sopenharmony_ci if (unlikely(!writecache_entry_is_committed(wc, g))) 205562306a36Sopenharmony_ci break; 205662306a36Sopenharmony_ci 205762306a36Sopenharmony_ci if (!WC_MODE_PMEM(wc)) { 205862306a36Sopenharmony_ci if (g != f + 1) 205962306a36Sopenharmony_ci break; 206062306a36Sopenharmony_ci } 206162306a36Sopenharmony_ci 206262306a36Sopenharmony_ci n_walked++; 206362306a36Sopenharmony_ci //if (unlikely(n_walked > WRITEBACK_LATENCY) && likely(!wc->writeback_all)) 206462306a36Sopenharmony_ci // break; 206562306a36Sopenharmony_ci 206662306a36Sopenharmony_ci wc->writeback_size++; 206762306a36Sopenharmony_ci list_move(&g->lru, &wbl.list); 206862306a36Sopenharmony_ci wbl.size++; 206962306a36Sopenharmony_ci g->write_in_progress = true; 207062306a36Sopenharmony_ci g->wc_list_contiguous = BIO_MAX_VECS; 207162306a36Sopenharmony_ci f = g; 207262306a36Sopenharmony_ci e->wc_list_contiguous++; 207362306a36Sopenharmony_ci if (unlikely(e->wc_list_contiguous == BIO_MAX_VECS)) { 207462306a36Sopenharmony_ci if (unlikely(wc->writeback_all)) { 207562306a36Sopenharmony_ci next_node = rb_next(&f->rb_node); 207662306a36Sopenharmony_ci if (likely(next_node)) 207762306a36Sopenharmony_ci g = container_of(next_node, struct wc_entry, rb_node); 207862306a36Sopenharmony_ci } 207962306a36Sopenharmony_ci break; 208062306a36Sopenharmony_ci } 208162306a36Sopenharmony_ci } 208262306a36Sopenharmony_ci cond_resched(); 208362306a36Sopenharmony_ci } 208462306a36Sopenharmony_ci 208562306a36Sopenharmony_ci if (!list_empty(&skipped)) { 208662306a36Sopenharmony_ci list_splice_tail(&skipped, &wc->lru); 208762306a36Sopenharmony_ci /* 208862306a36Sopenharmony_ci * If we didn't do any progress, we must wait until some 208962306a36Sopenharmony_ci * writeback finishes to avoid burning CPU in a loop 209062306a36Sopenharmony_ci */ 209162306a36Sopenharmony_ci if (unlikely(!wbl.size)) 209262306a36Sopenharmony_ci writecache_wait_for_writeback(wc); 209362306a36Sopenharmony_ci } 209462306a36Sopenharmony_ci 209562306a36Sopenharmony_ci wc_unlock(wc); 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_ci blk_start_plug(&plug); 209862306a36Sopenharmony_ci 209962306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) 210062306a36Sopenharmony_ci __writecache_writeback_pmem(wc, &wbl); 210162306a36Sopenharmony_ci else 210262306a36Sopenharmony_ci __writecache_writeback_ssd(wc, &wbl); 210362306a36Sopenharmony_ci 210462306a36Sopenharmony_ci blk_finish_plug(&plug); 210562306a36Sopenharmony_ci 210662306a36Sopenharmony_ci if (unlikely(wc->writeback_all)) { 210762306a36Sopenharmony_ci wc_lock(wc); 210862306a36Sopenharmony_ci while (writecache_wait_for_writeback(wc)) 210962306a36Sopenharmony_ci ; 211062306a36Sopenharmony_ci wc_unlock(wc); 211162306a36Sopenharmony_ci } 211262306a36Sopenharmony_ci} 211362306a36Sopenharmony_ci 211462306a36Sopenharmony_cistatic int calculate_memory_size(uint64_t device_size, unsigned int block_size, 211562306a36Sopenharmony_ci size_t *n_blocks_p, size_t *n_metadata_blocks_p) 211662306a36Sopenharmony_ci{ 211762306a36Sopenharmony_ci uint64_t n_blocks, offset; 211862306a36Sopenharmony_ci struct wc_entry e; 211962306a36Sopenharmony_ci 212062306a36Sopenharmony_ci n_blocks = device_size; 212162306a36Sopenharmony_ci do_div(n_blocks, block_size + sizeof(struct wc_memory_entry)); 212262306a36Sopenharmony_ci 212362306a36Sopenharmony_ci while (1) { 212462306a36Sopenharmony_ci if (!n_blocks) 212562306a36Sopenharmony_ci return -ENOSPC; 212662306a36Sopenharmony_ci /* Verify the following entries[n_blocks] won't overflow */ 212762306a36Sopenharmony_ci if (n_blocks >= ((size_t)-sizeof(struct wc_memory_superblock) / 212862306a36Sopenharmony_ci sizeof(struct wc_memory_entry))) 212962306a36Sopenharmony_ci return -EFBIG; 213062306a36Sopenharmony_ci offset = offsetof(struct wc_memory_superblock, entries[n_blocks]); 213162306a36Sopenharmony_ci offset = (offset + block_size - 1) & ~(uint64_t)(block_size - 1); 213262306a36Sopenharmony_ci if (offset + n_blocks * block_size <= device_size) 213362306a36Sopenharmony_ci break; 213462306a36Sopenharmony_ci n_blocks--; 213562306a36Sopenharmony_ci } 213662306a36Sopenharmony_ci 213762306a36Sopenharmony_ci /* check if the bit field overflows */ 213862306a36Sopenharmony_ci e.index = n_blocks; 213962306a36Sopenharmony_ci if (e.index != n_blocks) 214062306a36Sopenharmony_ci return -EFBIG; 214162306a36Sopenharmony_ci 214262306a36Sopenharmony_ci if (n_blocks_p) 214362306a36Sopenharmony_ci *n_blocks_p = n_blocks; 214462306a36Sopenharmony_ci if (n_metadata_blocks_p) 214562306a36Sopenharmony_ci *n_metadata_blocks_p = offset >> __ffs(block_size); 214662306a36Sopenharmony_ci return 0; 214762306a36Sopenharmony_ci} 214862306a36Sopenharmony_ci 214962306a36Sopenharmony_cistatic int init_memory(struct dm_writecache *wc) 215062306a36Sopenharmony_ci{ 215162306a36Sopenharmony_ci size_t b; 215262306a36Sopenharmony_ci int r; 215362306a36Sopenharmony_ci 215462306a36Sopenharmony_ci r = calculate_memory_size(wc->memory_map_size, wc->block_size, &wc->n_blocks, NULL); 215562306a36Sopenharmony_ci if (r) 215662306a36Sopenharmony_ci return r; 215762306a36Sopenharmony_ci 215862306a36Sopenharmony_ci r = writecache_alloc_entries(wc); 215962306a36Sopenharmony_ci if (r) 216062306a36Sopenharmony_ci return r; 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_ci for (b = 0; b < ARRAY_SIZE(sb(wc)->padding); b++) 216362306a36Sopenharmony_ci pmem_assign(sb(wc)->padding[b], cpu_to_le64(0)); 216462306a36Sopenharmony_ci pmem_assign(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION)); 216562306a36Sopenharmony_ci pmem_assign(sb(wc)->block_size, cpu_to_le32(wc->block_size)); 216662306a36Sopenharmony_ci pmem_assign(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks)); 216762306a36Sopenharmony_ci pmem_assign(sb(wc)->seq_count, cpu_to_le64(0)); 216862306a36Sopenharmony_ci 216962306a36Sopenharmony_ci for (b = 0; b < wc->n_blocks; b++) { 217062306a36Sopenharmony_ci write_original_sector_seq_count(wc, &wc->entries[b], -1, -1); 217162306a36Sopenharmony_ci cond_resched(); 217262306a36Sopenharmony_ci } 217362306a36Sopenharmony_ci 217462306a36Sopenharmony_ci writecache_flush_all_metadata(wc); 217562306a36Sopenharmony_ci writecache_commit_flushed(wc, false); 217662306a36Sopenharmony_ci pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC)); 217762306a36Sopenharmony_ci writecache_flush_region(wc, &sb(wc)->magic, sizeof(sb(wc)->magic)); 217862306a36Sopenharmony_ci writecache_commit_flushed(wc, false); 217962306a36Sopenharmony_ci 218062306a36Sopenharmony_ci return 0; 218162306a36Sopenharmony_ci} 218262306a36Sopenharmony_ci 218362306a36Sopenharmony_cistatic void writecache_dtr(struct dm_target *ti) 218462306a36Sopenharmony_ci{ 218562306a36Sopenharmony_ci struct dm_writecache *wc = ti->private; 218662306a36Sopenharmony_ci 218762306a36Sopenharmony_ci if (!wc) 218862306a36Sopenharmony_ci return; 218962306a36Sopenharmony_ci 219062306a36Sopenharmony_ci if (wc->endio_thread) 219162306a36Sopenharmony_ci kthread_stop(wc->endio_thread); 219262306a36Sopenharmony_ci 219362306a36Sopenharmony_ci if (wc->flush_thread) 219462306a36Sopenharmony_ci kthread_stop(wc->flush_thread); 219562306a36Sopenharmony_ci 219662306a36Sopenharmony_ci bioset_exit(&wc->bio_set); 219762306a36Sopenharmony_ci 219862306a36Sopenharmony_ci mempool_exit(&wc->copy_pool); 219962306a36Sopenharmony_ci 220062306a36Sopenharmony_ci if (wc->writeback_wq) 220162306a36Sopenharmony_ci destroy_workqueue(wc->writeback_wq); 220262306a36Sopenharmony_ci 220362306a36Sopenharmony_ci if (wc->dev) 220462306a36Sopenharmony_ci dm_put_device(ti, wc->dev); 220562306a36Sopenharmony_ci 220662306a36Sopenharmony_ci if (wc->ssd_dev) 220762306a36Sopenharmony_ci dm_put_device(ti, wc->ssd_dev); 220862306a36Sopenharmony_ci 220962306a36Sopenharmony_ci vfree(wc->entries); 221062306a36Sopenharmony_ci 221162306a36Sopenharmony_ci if (wc->memory_map) { 221262306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) 221362306a36Sopenharmony_ci persistent_memory_release(wc); 221462306a36Sopenharmony_ci else 221562306a36Sopenharmony_ci vfree(wc->memory_map); 221662306a36Sopenharmony_ci } 221762306a36Sopenharmony_ci 221862306a36Sopenharmony_ci if (wc->dm_kcopyd) 221962306a36Sopenharmony_ci dm_kcopyd_client_destroy(wc->dm_kcopyd); 222062306a36Sopenharmony_ci 222162306a36Sopenharmony_ci if (wc->dm_io) 222262306a36Sopenharmony_ci dm_io_client_destroy(wc->dm_io); 222362306a36Sopenharmony_ci 222462306a36Sopenharmony_ci vfree(wc->dirty_bitmap); 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ci kfree(wc); 222762306a36Sopenharmony_ci} 222862306a36Sopenharmony_ci 222962306a36Sopenharmony_cistatic int writecache_ctr(struct dm_target *ti, unsigned int argc, char **argv) 223062306a36Sopenharmony_ci{ 223162306a36Sopenharmony_ci struct dm_writecache *wc; 223262306a36Sopenharmony_ci struct dm_arg_set as; 223362306a36Sopenharmony_ci const char *string; 223462306a36Sopenharmony_ci unsigned int opt_params; 223562306a36Sopenharmony_ci size_t offset, data_size; 223662306a36Sopenharmony_ci int i, r; 223762306a36Sopenharmony_ci char dummy; 223862306a36Sopenharmony_ci int high_wm_percent = HIGH_WATERMARK; 223962306a36Sopenharmony_ci int low_wm_percent = LOW_WATERMARK; 224062306a36Sopenharmony_ci uint64_t x; 224162306a36Sopenharmony_ci struct wc_memory_superblock s; 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci static struct dm_arg _args[] = { 224462306a36Sopenharmony_ci {0, 18, "Invalid number of feature args"}, 224562306a36Sopenharmony_ci }; 224662306a36Sopenharmony_ci 224762306a36Sopenharmony_ci as.argc = argc; 224862306a36Sopenharmony_ci as.argv = argv; 224962306a36Sopenharmony_ci 225062306a36Sopenharmony_ci wc = kzalloc(sizeof(struct dm_writecache), GFP_KERNEL); 225162306a36Sopenharmony_ci if (!wc) { 225262306a36Sopenharmony_ci ti->error = "Cannot allocate writecache structure"; 225362306a36Sopenharmony_ci r = -ENOMEM; 225462306a36Sopenharmony_ci goto bad; 225562306a36Sopenharmony_ci } 225662306a36Sopenharmony_ci ti->private = wc; 225762306a36Sopenharmony_ci wc->ti = ti; 225862306a36Sopenharmony_ci 225962306a36Sopenharmony_ci mutex_init(&wc->lock); 226062306a36Sopenharmony_ci wc->max_age = MAX_AGE_UNSPECIFIED; 226162306a36Sopenharmony_ci writecache_poison_lists(wc); 226262306a36Sopenharmony_ci init_waitqueue_head(&wc->freelist_wait); 226362306a36Sopenharmony_ci timer_setup(&wc->autocommit_timer, writecache_autocommit_timer, 0); 226462306a36Sopenharmony_ci timer_setup(&wc->max_age_timer, writecache_max_age_timer, 0); 226562306a36Sopenharmony_ci 226662306a36Sopenharmony_ci for (i = 0; i < 2; i++) { 226762306a36Sopenharmony_ci atomic_set(&wc->bio_in_progress[i], 0); 226862306a36Sopenharmony_ci init_waitqueue_head(&wc->bio_in_progress_wait[i]); 226962306a36Sopenharmony_ci } 227062306a36Sopenharmony_ci 227162306a36Sopenharmony_ci wc->dm_io = dm_io_client_create(); 227262306a36Sopenharmony_ci if (IS_ERR(wc->dm_io)) { 227362306a36Sopenharmony_ci r = PTR_ERR(wc->dm_io); 227462306a36Sopenharmony_ci ti->error = "Unable to allocate dm-io client"; 227562306a36Sopenharmony_ci wc->dm_io = NULL; 227662306a36Sopenharmony_ci goto bad; 227762306a36Sopenharmony_ci } 227862306a36Sopenharmony_ci 227962306a36Sopenharmony_ci wc->writeback_wq = alloc_workqueue("writecache-writeback", WQ_MEM_RECLAIM, 1); 228062306a36Sopenharmony_ci if (!wc->writeback_wq) { 228162306a36Sopenharmony_ci r = -ENOMEM; 228262306a36Sopenharmony_ci ti->error = "Could not allocate writeback workqueue"; 228362306a36Sopenharmony_ci goto bad; 228462306a36Sopenharmony_ci } 228562306a36Sopenharmony_ci INIT_WORK(&wc->writeback_work, writecache_writeback); 228662306a36Sopenharmony_ci INIT_WORK(&wc->flush_work, writecache_flush_work); 228762306a36Sopenharmony_ci 228862306a36Sopenharmony_ci dm_iot_init(&wc->iot); 228962306a36Sopenharmony_ci 229062306a36Sopenharmony_ci raw_spin_lock_init(&wc->endio_list_lock); 229162306a36Sopenharmony_ci INIT_LIST_HEAD(&wc->endio_list); 229262306a36Sopenharmony_ci wc->endio_thread = kthread_run(writecache_endio_thread, wc, "writecache_endio"); 229362306a36Sopenharmony_ci if (IS_ERR(wc->endio_thread)) { 229462306a36Sopenharmony_ci r = PTR_ERR(wc->endio_thread); 229562306a36Sopenharmony_ci wc->endio_thread = NULL; 229662306a36Sopenharmony_ci ti->error = "Couldn't spawn endio thread"; 229762306a36Sopenharmony_ci goto bad; 229862306a36Sopenharmony_ci } 229962306a36Sopenharmony_ci 230062306a36Sopenharmony_ci /* 230162306a36Sopenharmony_ci * Parse the mode (pmem or ssd) 230262306a36Sopenharmony_ci */ 230362306a36Sopenharmony_ci string = dm_shift_arg(&as); 230462306a36Sopenharmony_ci if (!string) 230562306a36Sopenharmony_ci goto bad_arguments; 230662306a36Sopenharmony_ci 230762306a36Sopenharmony_ci if (!strcasecmp(string, "s")) { 230862306a36Sopenharmony_ci wc->pmem_mode = false; 230962306a36Sopenharmony_ci } else if (!strcasecmp(string, "p")) { 231062306a36Sopenharmony_ci#ifdef DM_WRITECACHE_HAS_PMEM 231162306a36Sopenharmony_ci wc->pmem_mode = true; 231262306a36Sopenharmony_ci wc->writeback_fua = true; 231362306a36Sopenharmony_ci#else 231462306a36Sopenharmony_ci /* 231562306a36Sopenharmony_ci * If the architecture doesn't support persistent memory or 231662306a36Sopenharmony_ci * the kernel doesn't support any DAX drivers, this driver can 231762306a36Sopenharmony_ci * only be used in SSD-only mode. 231862306a36Sopenharmony_ci */ 231962306a36Sopenharmony_ci r = -EOPNOTSUPP; 232062306a36Sopenharmony_ci ti->error = "Persistent memory or DAX not supported on this system"; 232162306a36Sopenharmony_ci goto bad; 232262306a36Sopenharmony_ci#endif 232362306a36Sopenharmony_ci } else { 232462306a36Sopenharmony_ci goto bad_arguments; 232562306a36Sopenharmony_ci } 232662306a36Sopenharmony_ci 232762306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) { 232862306a36Sopenharmony_ci r = bioset_init(&wc->bio_set, BIO_POOL_SIZE, 232962306a36Sopenharmony_ci offsetof(struct writeback_struct, bio), 233062306a36Sopenharmony_ci BIOSET_NEED_BVECS); 233162306a36Sopenharmony_ci if (r) { 233262306a36Sopenharmony_ci ti->error = "Could not allocate bio set"; 233362306a36Sopenharmony_ci goto bad; 233462306a36Sopenharmony_ci } 233562306a36Sopenharmony_ci } else { 233662306a36Sopenharmony_ci wc->pause = PAUSE_WRITEBACK; 233762306a36Sopenharmony_ci r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct)); 233862306a36Sopenharmony_ci if (r) { 233962306a36Sopenharmony_ci ti->error = "Could not allocate mempool"; 234062306a36Sopenharmony_ci goto bad; 234162306a36Sopenharmony_ci } 234262306a36Sopenharmony_ci } 234362306a36Sopenharmony_ci 234462306a36Sopenharmony_ci /* 234562306a36Sopenharmony_ci * Parse the origin data device 234662306a36Sopenharmony_ci */ 234762306a36Sopenharmony_ci string = dm_shift_arg(&as); 234862306a36Sopenharmony_ci if (!string) 234962306a36Sopenharmony_ci goto bad_arguments; 235062306a36Sopenharmony_ci r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->dev); 235162306a36Sopenharmony_ci if (r) { 235262306a36Sopenharmony_ci ti->error = "Origin data device lookup failed"; 235362306a36Sopenharmony_ci goto bad; 235462306a36Sopenharmony_ci } 235562306a36Sopenharmony_ci 235662306a36Sopenharmony_ci /* 235762306a36Sopenharmony_ci * Parse cache data device (be it pmem or ssd) 235862306a36Sopenharmony_ci */ 235962306a36Sopenharmony_ci string = dm_shift_arg(&as); 236062306a36Sopenharmony_ci if (!string) 236162306a36Sopenharmony_ci goto bad_arguments; 236262306a36Sopenharmony_ci 236362306a36Sopenharmony_ci r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->ssd_dev); 236462306a36Sopenharmony_ci if (r) { 236562306a36Sopenharmony_ci ti->error = "Cache data device lookup failed"; 236662306a36Sopenharmony_ci goto bad; 236762306a36Sopenharmony_ci } 236862306a36Sopenharmony_ci wc->memory_map_size = bdev_nr_bytes(wc->ssd_dev->bdev); 236962306a36Sopenharmony_ci 237062306a36Sopenharmony_ci /* 237162306a36Sopenharmony_ci * Parse the cache block size 237262306a36Sopenharmony_ci */ 237362306a36Sopenharmony_ci string = dm_shift_arg(&as); 237462306a36Sopenharmony_ci if (!string) 237562306a36Sopenharmony_ci goto bad_arguments; 237662306a36Sopenharmony_ci if (sscanf(string, "%u%c", &wc->block_size, &dummy) != 1 || 237762306a36Sopenharmony_ci wc->block_size < 512 || wc->block_size > PAGE_SIZE || 237862306a36Sopenharmony_ci (wc->block_size & (wc->block_size - 1))) { 237962306a36Sopenharmony_ci r = -EINVAL; 238062306a36Sopenharmony_ci ti->error = "Invalid block size"; 238162306a36Sopenharmony_ci goto bad; 238262306a36Sopenharmony_ci } 238362306a36Sopenharmony_ci if (wc->block_size < bdev_logical_block_size(wc->dev->bdev) || 238462306a36Sopenharmony_ci wc->block_size < bdev_logical_block_size(wc->ssd_dev->bdev)) { 238562306a36Sopenharmony_ci r = -EINVAL; 238662306a36Sopenharmony_ci ti->error = "Block size is smaller than device logical block size"; 238762306a36Sopenharmony_ci goto bad; 238862306a36Sopenharmony_ci } 238962306a36Sopenharmony_ci wc->block_size_bits = __ffs(wc->block_size); 239062306a36Sopenharmony_ci 239162306a36Sopenharmony_ci wc->max_writeback_jobs = MAX_WRITEBACK_JOBS; 239262306a36Sopenharmony_ci wc->autocommit_blocks = !WC_MODE_PMEM(wc) ? AUTOCOMMIT_BLOCKS_SSD : AUTOCOMMIT_BLOCKS_PMEM; 239362306a36Sopenharmony_ci wc->autocommit_jiffies = msecs_to_jiffies(AUTOCOMMIT_MSEC); 239462306a36Sopenharmony_ci 239562306a36Sopenharmony_ci /* 239662306a36Sopenharmony_ci * Parse optional arguments 239762306a36Sopenharmony_ci */ 239862306a36Sopenharmony_ci r = dm_read_arg_group(_args, &as, &opt_params, &ti->error); 239962306a36Sopenharmony_ci if (r) 240062306a36Sopenharmony_ci goto bad; 240162306a36Sopenharmony_ci 240262306a36Sopenharmony_ci while (opt_params) { 240362306a36Sopenharmony_ci string = dm_shift_arg(&as), opt_params--; 240462306a36Sopenharmony_ci if (!strcasecmp(string, "start_sector") && opt_params >= 1) { 240562306a36Sopenharmony_ci unsigned long long start_sector; 240662306a36Sopenharmony_ci 240762306a36Sopenharmony_ci string = dm_shift_arg(&as), opt_params--; 240862306a36Sopenharmony_ci if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1) 240962306a36Sopenharmony_ci goto invalid_optional; 241062306a36Sopenharmony_ci wc->start_sector = start_sector; 241162306a36Sopenharmony_ci wc->start_sector_set = true; 241262306a36Sopenharmony_ci if (wc->start_sector != start_sector || 241362306a36Sopenharmony_ci wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT) 241462306a36Sopenharmony_ci goto invalid_optional; 241562306a36Sopenharmony_ci } else if (!strcasecmp(string, "high_watermark") && opt_params >= 1) { 241662306a36Sopenharmony_ci string = dm_shift_arg(&as), opt_params--; 241762306a36Sopenharmony_ci if (sscanf(string, "%d%c", &high_wm_percent, &dummy) != 1) 241862306a36Sopenharmony_ci goto invalid_optional; 241962306a36Sopenharmony_ci if (high_wm_percent < 0 || high_wm_percent > 100) 242062306a36Sopenharmony_ci goto invalid_optional; 242162306a36Sopenharmony_ci wc->high_wm_percent_value = high_wm_percent; 242262306a36Sopenharmony_ci wc->high_wm_percent_set = true; 242362306a36Sopenharmony_ci } else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) { 242462306a36Sopenharmony_ci string = dm_shift_arg(&as), opt_params--; 242562306a36Sopenharmony_ci if (sscanf(string, "%d%c", &low_wm_percent, &dummy) != 1) 242662306a36Sopenharmony_ci goto invalid_optional; 242762306a36Sopenharmony_ci if (low_wm_percent < 0 || low_wm_percent > 100) 242862306a36Sopenharmony_ci goto invalid_optional; 242962306a36Sopenharmony_ci wc->low_wm_percent_value = low_wm_percent; 243062306a36Sopenharmony_ci wc->low_wm_percent_set = true; 243162306a36Sopenharmony_ci } else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) { 243262306a36Sopenharmony_ci string = dm_shift_arg(&as), opt_params--; 243362306a36Sopenharmony_ci if (sscanf(string, "%u%c", &wc->max_writeback_jobs, &dummy) != 1) 243462306a36Sopenharmony_ci goto invalid_optional; 243562306a36Sopenharmony_ci wc->max_writeback_jobs_set = true; 243662306a36Sopenharmony_ci } else if (!strcasecmp(string, "autocommit_blocks") && opt_params >= 1) { 243762306a36Sopenharmony_ci string = dm_shift_arg(&as), opt_params--; 243862306a36Sopenharmony_ci if (sscanf(string, "%u%c", &wc->autocommit_blocks, &dummy) != 1) 243962306a36Sopenharmony_ci goto invalid_optional; 244062306a36Sopenharmony_ci wc->autocommit_blocks_set = true; 244162306a36Sopenharmony_ci } else if (!strcasecmp(string, "autocommit_time") && opt_params >= 1) { 244262306a36Sopenharmony_ci unsigned int autocommit_msecs; 244362306a36Sopenharmony_ci 244462306a36Sopenharmony_ci string = dm_shift_arg(&as), opt_params--; 244562306a36Sopenharmony_ci if (sscanf(string, "%u%c", &autocommit_msecs, &dummy) != 1) 244662306a36Sopenharmony_ci goto invalid_optional; 244762306a36Sopenharmony_ci if (autocommit_msecs > 3600000) 244862306a36Sopenharmony_ci goto invalid_optional; 244962306a36Sopenharmony_ci wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs); 245062306a36Sopenharmony_ci wc->autocommit_time_value = autocommit_msecs; 245162306a36Sopenharmony_ci wc->autocommit_time_set = true; 245262306a36Sopenharmony_ci } else if (!strcasecmp(string, "max_age") && opt_params >= 1) { 245362306a36Sopenharmony_ci unsigned int max_age_msecs; 245462306a36Sopenharmony_ci 245562306a36Sopenharmony_ci string = dm_shift_arg(&as), opt_params--; 245662306a36Sopenharmony_ci if (sscanf(string, "%u%c", &max_age_msecs, &dummy) != 1) 245762306a36Sopenharmony_ci goto invalid_optional; 245862306a36Sopenharmony_ci if (max_age_msecs > 86400000) 245962306a36Sopenharmony_ci goto invalid_optional; 246062306a36Sopenharmony_ci wc->max_age = msecs_to_jiffies(max_age_msecs); 246162306a36Sopenharmony_ci wc->max_age_set = true; 246262306a36Sopenharmony_ci wc->max_age_value = max_age_msecs; 246362306a36Sopenharmony_ci } else if (!strcasecmp(string, "cleaner")) { 246462306a36Sopenharmony_ci wc->cleaner_set = true; 246562306a36Sopenharmony_ci wc->cleaner = true; 246662306a36Sopenharmony_ci } else if (!strcasecmp(string, "fua")) { 246762306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) { 246862306a36Sopenharmony_ci wc->writeback_fua = true; 246962306a36Sopenharmony_ci wc->writeback_fua_set = true; 247062306a36Sopenharmony_ci } else 247162306a36Sopenharmony_ci goto invalid_optional; 247262306a36Sopenharmony_ci } else if (!strcasecmp(string, "nofua")) { 247362306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) { 247462306a36Sopenharmony_ci wc->writeback_fua = false; 247562306a36Sopenharmony_ci wc->writeback_fua_set = true; 247662306a36Sopenharmony_ci } else 247762306a36Sopenharmony_ci goto invalid_optional; 247862306a36Sopenharmony_ci } else if (!strcasecmp(string, "metadata_only")) { 247962306a36Sopenharmony_ci wc->metadata_only = true; 248062306a36Sopenharmony_ci } else if (!strcasecmp(string, "pause_writeback") && opt_params >= 1) { 248162306a36Sopenharmony_ci unsigned int pause_msecs; 248262306a36Sopenharmony_ci 248362306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) 248462306a36Sopenharmony_ci goto invalid_optional; 248562306a36Sopenharmony_ci string = dm_shift_arg(&as), opt_params--; 248662306a36Sopenharmony_ci if (sscanf(string, "%u%c", &pause_msecs, &dummy) != 1) 248762306a36Sopenharmony_ci goto invalid_optional; 248862306a36Sopenharmony_ci if (pause_msecs > 60000) 248962306a36Sopenharmony_ci goto invalid_optional; 249062306a36Sopenharmony_ci wc->pause = msecs_to_jiffies(pause_msecs); 249162306a36Sopenharmony_ci wc->pause_set = true; 249262306a36Sopenharmony_ci wc->pause_value = pause_msecs; 249362306a36Sopenharmony_ci } else { 249462306a36Sopenharmony_ciinvalid_optional: 249562306a36Sopenharmony_ci r = -EINVAL; 249662306a36Sopenharmony_ci ti->error = "Invalid optional argument"; 249762306a36Sopenharmony_ci goto bad; 249862306a36Sopenharmony_ci } 249962306a36Sopenharmony_ci } 250062306a36Sopenharmony_ci 250162306a36Sopenharmony_ci if (high_wm_percent < low_wm_percent) { 250262306a36Sopenharmony_ci r = -EINVAL; 250362306a36Sopenharmony_ci ti->error = "High watermark must be greater than or equal to low watermark"; 250462306a36Sopenharmony_ci goto bad; 250562306a36Sopenharmony_ci } 250662306a36Sopenharmony_ci 250762306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) { 250862306a36Sopenharmony_ci if (!dax_synchronous(wc->ssd_dev->dax_dev)) { 250962306a36Sopenharmony_ci r = -EOPNOTSUPP; 251062306a36Sopenharmony_ci ti->error = "Asynchronous persistent memory not supported as pmem cache"; 251162306a36Sopenharmony_ci goto bad; 251262306a36Sopenharmony_ci } 251362306a36Sopenharmony_ci 251462306a36Sopenharmony_ci r = persistent_memory_claim(wc); 251562306a36Sopenharmony_ci if (r) { 251662306a36Sopenharmony_ci ti->error = "Unable to map persistent memory for cache"; 251762306a36Sopenharmony_ci goto bad; 251862306a36Sopenharmony_ci } 251962306a36Sopenharmony_ci } else { 252062306a36Sopenharmony_ci size_t n_blocks, n_metadata_blocks; 252162306a36Sopenharmony_ci uint64_t n_bitmap_bits; 252262306a36Sopenharmony_ci 252362306a36Sopenharmony_ci wc->memory_map_size -= (uint64_t)wc->start_sector << SECTOR_SHIFT; 252462306a36Sopenharmony_ci 252562306a36Sopenharmony_ci bio_list_init(&wc->flush_list); 252662306a36Sopenharmony_ci wc->flush_thread = kthread_run(writecache_flush_thread, wc, "dm_writecache_flush"); 252762306a36Sopenharmony_ci if (IS_ERR(wc->flush_thread)) { 252862306a36Sopenharmony_ci r = PTR_ERR(wc->flush_thread); 252962306a36Sopenharmony_ci wc->flush_thread = NULL; 253062306a36Sopenharmony_ci ti->error = "Couldn't spawn flush thread"; 253162306a36Sopenharmony_ci goto bad; 253262306a36Sopenharmony_ci } 253362306a36Sopenharmony_ci 253462306a36Sopenharmony_ci r = calculate_memory_size(wc->memory_map_size, wc->block_size, 253562306a36Sopenharmony_ci &n_blocks, &n_metadata_blocks); 253662306a36Sopenharmony_ci if (r) { 253762306a36Sopenharmony_ci ti->error = "Invalid device size"; 253862306a36Sopenharmony_ci goto bad; 253962306a36Sopenharmony_ci } 254062306a36Sopenharmony_ci 254162306a36Sopenharmony_ci n_bitmap_bits = (((uint64_t)n_metadata_blocks << wc->block_size_bits) + 254262306a36Sopenharmony_ci BITMAP_GRANULARITY - 1) / BITMAP_GRANULARITY; 254362306a36Sopenharmony_ci /* this is limitation of test_bit functions */ 254462306a36Sopenharmony_ci if (n_bitmap_bits > 1U << 31) { 254562306a36Sopenharmony_ci r = -EFBIG; 254662306a36Sopenharmony_ci ti->error = "Invalid device size"; 254762306a36Sopenharmony_ci goto bad; 254862306a36Sopenharmony_ci } 254962306a36Sopenharmony_ci 255062306a36Sopenharmony_ci wc->memory_map = vmalloc(n_metadata_blocks << wc->block_size_bits); 255162306a36Sopenharmony_ci if (!wc->memory_map) { 255262306a36Sopenharmony_ci r = -ENOMEM; 255362306a36Sopenharmony_ci ti->error = "Unable to allocate memory for metadata"; 255462306a36Sopenharmony_ci goto bad; 255562306a36Sopenharmony_ci } 255662306a36Sopenharmony_ci 255762306a36Sopenharmony_ci wc->dm_kcopyd = dm_kcopyd_client_create(&dm_kcopyd_throttle); 255862306a36Sopenharmony_ci if (IS_ERR(wc->dm_kcopyd)) { 255962306a36Sopenharmony_ci r = PTR_ERR(wc->dm_kcopyd); 256062306a36Sopenharmony_ci ti->error = "Unable to allocate dm-kcopyd client"; 256162306a36Sopenharmony_ci wc->dm_kcopyd = NULL; 256262306a36Sopenharmony_ci goto bad; 256362306a36Sopenharmony_ci } 256462306a36Sopenharmony_ci 256562306a36Sopenharmony_ci wc->metadata_sectors = n_metadata_blocks << (wc->block_size_bits - SECTOR_SHIFT); 256662306a36Sopenharmony_ci wc->dirty_bitmap_size = (n_bitmap_bits + BITS_PER_LONG - 1) / 256762306a36Sopenharmony_ci BITS_PER_LONG * sizeof(unsigned long); 256862306a36Sopenharmony_ci wc->dirty_bitmap = vzalloc(wc->dirty_bitmap_size); 256962306a36Sopenharmony_ci if (!wc->dirty_bitmap) { 257062306a36Sopenharmony_ci r = -ENOMEM; 257162306a36Sopenharmony_ci ti->error = "Unable to allocate dirty bitmap"; 257262306a36Sopenharmony_ci goto bad; 257362306a36Sopenharmony_ci } 257462306a36Sopenharmony_ci 257562306a36Sopenharmony_ci r = writecache_read_metadata(wc, wc->block_size >> SECTOR_SHIFT); 257662306a36Sopenharmony_ci if (r) { 257762306a36Sopenharmony_ci ti->error = "Unable to read first block of metadata"; 257862306a36Sopenharmony_ci goto bad; 257962306a36Sopenharmony_ci } 258062306a36Sopenharmony_ci } 258162306a36Sopenharmony_ci 258262306a36Sopenharmony_ci r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock)); 258362306a36Sopenharmony_ci if (r) { 258462306a36Sopenharmony_ci ti->error = "Hardware memory error when reading superblock"; 258562306a36Sopenharmony_ci goto bad; 258662306a36Sopenharmony_ci } 258762306a36Sopenharmony_ci if (!le32_to_cpu(s.magic) && !le32_to_cpu(s.version)) { 258862306a36Sopenharmony_ci r = init_memory(wc); 258962306a36Sopenharmony_ci if (r) { 259062306a36Sopenharmony_ci ti->error = "Unable to initialize device"; 259162306a36Sopenharmony_ci goto bad; 259262306a36Sopenharmony_ci } 259362306a36Sopenharmony_ci r = copy_mc_to_kernel(&s, sb(wc), 259462306a36Sopenharmony_ci sizeof(struct wc_memory_superblock)); 259562306a36Sopenharmony_ci if (r) { 259662306a36Sopenharmony_ci ti->error = "Hardware memory error when reading superblock"; 259762306a36Sopenharmony_ci goto bad; 259862306a36Sopenharmony_ci } 259962306a36Sopenharmony_ci } 260062306a36Sopenharmony_ci 260162306a36Sopenharmony_ci if (le32_to_cpu(s.magic) != MEMORY_SUPERBLOCK_MAGIC) { 260262306a36Sopenharmony_ci ti->error = "Invalid magic in the superblock"; 260362306a36Sopenharmony_ci r = -EINVAL; 260462306a36Sopenharmony_ci goto bad; 260562306a36Sopenharmony_ci } 260662306a36Sopenharmony_ci 260762306a36Sopenharmony_ci if (le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION) { 260862306a36Sopenharmony_ci ti->error = "Invalid version in the superblock"; 260962306a36Sopenharmony_ci r = -EINVAL; 261062306a36Sopenharmony_ci goto bad; 261162306a36Sopenharmony_ci } 261262306a36Sopenharmony_ci 261362306a36Sopenharmony_ci if (le32_to_cpu(s.block_size) != wc->block_size) { 261462306a36Sopenharmony_ci ti->error = "Block size does not match superblock"; 261562306a36Sopenharmony_ci r = -EINVAL; 261662306a36Sopenharmony_ci goto bad; 261762306a36Sopenharmony_ci } 261862306a36Sopenharmony_ci 261962306a36Sopenharmony_ci wc->n_blocks = le64_to_cpu(s.n_blocks); 262062306a36Sopenharmony_ci 262162306a36Sopenharmony_ci offset = wc->n_blocks * sizeof(struct wc_memory_entry); 262262306a36Sopenharmony_ci if (offset / sizeof(struct wc_memory_entry) != le64_to_cpu(sb(wc)->n_blocks)) { 262362306a36Sopenharmony_cioverflow: 262462306a36Sopenharmony_ci ti->error = "Overflow in size calculation"; 262562306a36Sopenharmony_ci r = -EINVAL; 262662306a36Sopenharmony_ci goto bad; 262762306a36Sopenharmony_ci } 262862306a36Sopenharmony_ci offset += sizeof(struct wc_memory_superblock); 262962306a36Sopenharmony_ci if (offset < sizeof(struct wc_memory_superblock)) 263062306a36Sopenharmony_ci goto overflow; 263162306a36Sopenharmony_ci offset = (offset + wc->block_size - 1) & ~(size_t)(wc->block_size - 1); 263262306a36Sopenharmony_ci data_size = wc->n_blocks * (size_t)wc->block_size; 263362306a36Sopenharmony_ci if (!offset || (data_size / wc->block_size != wc->n_blocks) || 263462306a36Sopenharmony_ci (offset + data_size < offset)) 263562306a36Sopenharmony_ci goto overflow; 263662306a36Sopenharmony_ci if (offset + data_size > wc->memory_map_size) { 263762306a36Sopenharmony_ci ti->error = "Memory area is too small"; 263862306a36Sopenharmony_ci r = -EINVAL; 263962306a36Sopenharmony_ci goto bad; 264062306a36Sopenharmony_ci } 264162306a36Sopenharmony_ci 264262306a36Sopenharmony_ci wc->metadata_sectors = offset >> SECTOR_SHIFT; 264362306a36Sopenharmony_ci wc->block_start = (char *)sb(wc) + offset; 264462306a36Sopenharmony_ci 264562306a36Sopenharmony_ci x = (uint64_t)wc->n_blocks * (100 - high_wm_percent); 264662306a36Sopenharmony_ci x += 50; 264762306a36Sopenharmony_ci do_div(x, 100); 264862306a36Sopenharmony_ci wc->freelist_high_watermark = x; 264962306a36Sopenharmony_ci x = (uint64_t)wc->n_blocks * (100 - low_wm_percent); 265062306a36Sopenharmony_ci x += 50; 265162306a36Sopenharmony_ci do_div(x, 100); 265262306a36Sopenharmony_ci wc->freelist_low_watermark = x; 265362306a36Sopenharmony_ci 265462306a36Sopenharmony_ci if (wc->cleaner) 265562306a36Sopenharmony_ci activate_cleaner(wc); 265662306a36Sopenharmony_ci 265762306a36Sopenharmony_ci r = writecache_alloc_entries(wc); 265862306a36Sopenharmony_ci if (r) { 265962306a36Sopenharmony_ci ti->error = "Cannot allocate memory"; 266062306a36Sopenharmony_ci goto bad; 266162306a36Sopenharmony_ci } 266262306a36Sopenharmony_ci 266362306a36Sopenharmony_ci ti->num_flush_bios = WC_MODE_PMEM(wc) ? 1 : 2; 266462306a36Sopenharmony_ci ti->flush_supported = true; 266562306a36Sopenharmony_ci ti->num_discard_bios = 1; 266662306a36Sopenharmony_ci 266762306a36Sopenharmony_ci if (WC_MODE_PMEM(wc)) 266862306a36Sopenharmony_ci persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size); 266962306a36Sopenharmony_ci 267062306a36Sopenharmony_ci return 0; 267162306a36Sopenharmony_ci 267262306a36Sopenharmony_cibad_arguments: 267362306a36Sopenharmony_ci r = -EINVAL; 267462306a36Sopenharmony_ci ti->error = "Bad arguments"; 267562306a36Sopenharmony_cibad: 267662306a36Sopenharmony_ci writecache_dtr(ti); 267762306a36Sopenharmony_ci return r; 267862306a36Sopenharmony_ci} 267962306a36Sopenharmony_ci 268062306a36Sopenharmony_cistatic void writecache_status(struct dm_target *ti, status_type_t type, 268162306a36Sopenharmony_ci unsigned int status_flags, char *result, unsigned int maxlen) 268262306a36Sopenharmony_ci{ 268362306a36Sopenharmony_ci struct dm_writecache *wc = ti->private; 268462306a36Sopenharmony_ci unsigned int extra_args; 268562306a36Sopenharmony_ci unsigned int sz = 0; 268662306a36Sopenharmony_ci 268762306a36Sopenharmony_ci switch (type) { 268862306a36Sopenharmony_ci case STATUSTYPE_INFO: 268962306a36Sopenharmony_ci DMEMIT("%ld %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu", 269062306a36Sopenharmony_ci writecache_has_error(wc), 269162306a36Sopenharmony_ci (unsigned long long)wc->n_blocks, (unsigned long long)wc->freelist_size, 269262306a36Sopenharmony_ci (unsigned long long)wc->writeback_size, 269362306a36Sopenharmony_ci wc->stats.reads, 269462306a36Sopenharmony_ci wc->stats.read_hits, 269562306a36Sopenharmony_ci wc->stats.writes, 269662306a36Sopenharmony_ci wc->stats.write_hits_uncommitted, 269762306a36Sopenharmony_ci wc->stats.write_hits_committed, 269862306a36Sopenharmony_ci wc->stats.writes_around, 269962306a36Sopenharmony_ci wc->stats.writes_allocate, 270062306a36Sopenharmony_ci wc->stats.writes_blocked_on_freelist, 270162306a36Sopenharmony_ci wc->stats.flushes, 270262306a36Sopenharmony_ci wc->stats.discards); 270362306a36Sopenharmony_ci break; 270462306a36Sopenharmony_ci case STATUSTYPE_TABLE: 270562306a36Sopenharmony_ci DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', 270662306a36Sopenharmony_ci wc->dev->name, wc->ssd_dev->name, wc->block_size); 270762306a36Sopenharmony_ci extra_args = 0; 270862306a36Sopenharmony_ci if (wc->start_sector_set) 270962306a36Sopenharmony_ci extra_args += 2; 271062306a36Sopenharmony_ci if (wc->high_wm_percent_set) 271162306a36Sopenharmony_ci extra_args += 2; 271262306a36Sopenharmony_ci if (wc->low_wm_percent_set) 271362306a36Sopenharmony_ci extra_args += 2; 271462306a36Sopenharmony_ci if (wc->max_writeback_jobs_set) 271562306a36Sopenharmony_ci extra_args += 2; 271662306a36Sopenharmony_ci if (wc->autocommit_blocks_set) 271762306a36Sopenharmony_ci extra_args += 2; 271862306a36Sopenharmony_ci if (wc->autocommit_time_set) 271962306a36Sopenharmony_ci extra_args += 2; 272062306a36Sopenharmony_ci if (wc->max_age_set) 272162306a36Sopenharmony_ci extra_args += 2; 272262306a36Sopenharmony_ci if (wc->cleaner_set) 272362306a36Sopenharmony_ci extra_args++; 272462306a36Sopenharmony_ci if (wc->writeback_fua_set) 272562306a36Sopenharmony_ci extra_args++; 272662306a36Sopenharmony_ci if (wc->metadata_only) 272762306a36Sopenharmony_ci extra_args++; 272862306a36Sopenharmony_ci if (wc->pause_set) 272962306a36Sopenharmony_ci extra_args += 2; 273062306a36Sopenharmony_ci 273162306a36Sopenharmony_ci DMEMIT("%u", extra_args); 273262306a36Sopenharmony_ci if (wc->start_sector_set) 273362306a36Sopenharmony_ci DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector); 273462306a36Sopenharmony_ci if (wc->high_wm_percent_set) 273562306a36Sopenharmony_ci DMEMIT(" high_watermark %u", wc->high_wm_percent_value); 273662306a36Sopenharmony_ci if (wc->low_wm_percent_set) 273762306a36Sopenharmony_ci DMEMIT(" low_watermark %u", wc->low_wm_percent_value); 273862306a36Sopenharmony_ci if (wc->max_writeback_jobs_set) 273962306a36Sopenharmony_ci DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs); 274062306a36Sopenharmony_ci if (wc->autocommit_blocks_set) 274162306a36Sopenharmony_ci DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks); 274262306a36Sopenharmony_ci if (wc->autocommit_time_set) 274362306a36Sopenharmony_ci DMEMIT(" autocommit_time %u", wc->autocommit_time_value); 274462306a36Sopenharmony_ci if (wc->max_age_set) 274562306a36Sopenharmony_ci DMEMIT(" max_age %u", wc->max_age_value); 274662306a36Sopenharmony_ci if (wc->cleaner_set) 274762306a36Sopenharmony_ci DMEMIT(" cleaner"); 274862306a36Sopenharmony_ci if (wc->writeback_fua_set) 274962306a36Sopenharmony_ci DMEMIT(" %sfua", wc->writeback_fua ? "" : "no"); 275062306a36Sopenharmony_ci if (wc->metadata_only) 275162306a36Sopenharmony_ci DMEMIT(" metadata_only"); 275262306a36Sopenharmony_ci if (wc->pause_set) 275362306a36Sopenharmony_ci DMEMIT(" pause_writeback %u", wc->pause_value); 275462306a36Sopenharmony_ci break; 275562306a36Sopenharmony_ci case STATUSTYPE_IMA: 275662306a36Sopenharmony_ci *result = '\0'; 275762306a36Sopenharmony_ci break; 275862306a36Sopenharmony_ci } 275962306a36Sopenharmony_ci} 276062306a36Sopenharmony_ci 276162306a36Sopenharmony_cistatic struct target_type writecache_target = { 276262306a36Sopenharmony_ci .name = "writecache", 276362306a36Sopenharmony_ci .version = {1, 6, 0}, 276462306a36Sopenharmony_ci .module = THIS_MODULE, 276562306a36Sopenharmony_ci .ctr = writecache_ctr, 276662306a36Sopenharmony_ci .dtr = writecache_dtr, 276762306a36Sopenharmony_ci .status = writecache_status, 276862306a36Sopenharmony_ci .postsuspend = writecache_suspend, 276962306a36Sopenharmony_ci .resume = writecache_resume, 277062306a36Sopenharmony_ci .message = writecache_message, 277162306a36Sopenharmony_ci .map = writecache_map, 277262306a36Sopenharmony_ci .end_io = writecache_end_io, 277362306a36Sopenharmony_ci .iterate_devices = writecache_iterate_devices, 277462306a36Sopenharmony_ci .io_hints = writecache_io_hints, 277562306a36Sopenharmony_ci}; 277662306a36Sopenharmony_cimodule_dm(writecache); 277762306a36Sopenharmony_ci 277862306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " writecache target"); 277962306a36Sopenharmony_ciMODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>"); 278062306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 2781