162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2012 Red Hat. All rights reserved. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This file is released under the GPL. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include "dm.h" 962306a36Sopenharmony_ci#include "dm-bio-prison-v2.h" 1062306a36Sopenharmony_ci#include "dm-bio-record.h" 1162306a36Sopenharmony_ci#include "dm-cache-metadata.h" 1262306a36Sopenharmony_ci#include "dm-io-tracker.h" 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/dm-io.h> 1562306a36Sopenharmony_ci#include <linux/dm-kcopyd.h> 1662306a36Sopenharmony_ci#include <linux/jiffies.h> 1762306a36Sopenharmony_ci#include <linux/init.h> 1862306a36Sopenharmony_ci#include <linux/mempool.h> 1962306a36Sopenharmony_ci#include <linux/module.h> 2062306a36Sopenharmony_ci#include <linux/rwsem.h> 2162306a36Sopenharmony_ci#include <linux/slab.h> 2262306a36Sopenharmony_ci#include <linux/vmalloc.h> 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#define DM_MSG_PREFIX "cache" 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ciDECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, 2762306a36Sopenharmony_ci "A percentage of time allocated for copying to and/or from cache"); 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci/* 3262306a36Sopenharmony_ci * Glossary: 3362306a36Sopenharmony_ci * 3462306a36Sopenharmony_ci * oblock: index of an origin block 3562306a36Sopenharmony_ci * cblock: index of a cache block 3662306a36Sopenharmony_ci * promotion: movement of a block from origin to cache 3762306a36Sopenharmony_ci * demotion: movement of a block from cache to origin 3862306a36Sopenharmony_ci * migration: movement of a block between the origin and cache device, 3962306a36Sopenharmony_ci * either direction 4062306a36Sopenharmony_ci */ 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci/* 4562306a36Sopenharmony_ci * Represents a chunk of future work. 'input' allows continuations to pass 4662306a36Sopenharmony_ci * values between themselves, typically error values. 4762306a36Sopenharmony_ci */ 4862306a36Sopenharmony_cistruct continuation { 4962306a36Sopenharmony_ci struct work_struct ws; 5062306a36Sopenharmony_ci blk_status_t input; 5162306a36Sopenharmony_ci}; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic inline void init_continuation(struct continuation *k, 5462306a36Sopenharmony_ci void (*fn)(struct work_struct *)) 5562306a36Sopenharmony_ci{ 5662306a36Sopenharmony_ci INIT_WORK(&k->ws, fn); 5762306a36Sopenharmony_ci k->input = 0; 5862306a36Sopenharmony_ci} 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_cistatic inline void queue_continuation(struct workqueue_struct *wq, 6162306a36Sopenharmony_ci struct continuation *k) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci queue_work(wq, &k->ws); 6462306a36Sopenharmony_ci} 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci/* 6962306a36Sopenharmony_ci * The batcher collects together pieces of work that need a particular 7062306a36Sopenharmony_ci * operation to occur before they can proceed (typically a commit). 7162306a36Sopenharmony_ci */ 7262306a36Sopenharmony_cistruct batcher { 7362306a36Sopenharmony_ci /* 7462306a36Sopenharmony_ci * The operation that everyone is waiting for. 7562306a36Sopenharmony_ci */ 7662306a36Sopenharmony_ci blk_status_t (*commit_op)(void *context); 7762306a36Sopenharmony_ci void *commit_context; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci /* 8062306a36Sopenharmony_ci * This is how bios should be issued once the commit op is complete 8162306a36Sopenharmony_ci * (accounted_request). 8262306a36Sopenharmony_ci */ 8362306a36Sopenharmony_ci void (*issue_op)(struct bio *bio, void *context); 8462306a36Sopenharmony_ci void *issue_context; 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci /* 8762306a36Sopenharmony_ci * Queued work gets put on here after commit. 8862306a36Sopenharmony_ci */ 8962306a36Sopenharmony_ci struct workqueue_struct *wq; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci spinlock_t lock; 9262306a36Sopenharmony_ci struct list_head work_items; 9362306a36Sopenharmony_ci struct bio_list bios; 9462306a36Sopenharmony_ci struct work_struct commit_work; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci bool commit_scheduled; 9762306a36Sopenharmony_ci}; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_cistatic void __commit(struct work_struct *_ws) 10062306a36Sopenharmony_ci{ 10162306a36Sopenharmony_ci struct batcher *b = container_of(_ws, struct batcher, commit_work); 10262306a36Sopenharmony_ci blk_status_t r; 10362306a36Sopenharmony_ci struct list_head work_items; 10462306a36Sopenharmony_ci struct work_struct *ws, *tmp; 10562306a36Sopenharmony_ci struct continuation *k; 10662306a36Sopenharmony_ci struct bio *bio; 10762306a36Sopenharmony_ci struct bio_list bios; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci INIT_LIST_HEAD(&work_items); 11062306a36Sopenharmony_ci bio_list_init(&bios); 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci /* 11362306a36Sopenharmony_ci * We have to grab these before the commit_op to avoid a race 11462306a36Sopenharmony_ci * condition. 11562306a36Sopenharmony_ci */ 11662306a36Sopenharmony_ci spin_lock_irq(&b->lock); 11762306a36Sopenharmony_ci list_splice_init(&b->work_items, &work_items); 11862306a36Sopenharmony_ci bio_list_merge(&bios, &b->bios); 11962306a36Sopenharmony_ci bio_list_init(&b->bios); 12062306a36Sopenharmony_ci b->commit_scheduled = false; 12162306a36Sopenharmony_ci spin_unlock_irq(&b->lock); 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci r = b->commit_op(b->commit_context); 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci list_for_each_entry_safe(ws, tmp, &work_items, entry) { 12662306a36Sopenharmony_ci k = container_of(ws, struct continuation, ws); 12762306a36Sopenharmony_ci k->input = r; 12862306a36Sopenharmony_ci INIT_LIST_HEAD(&ws->entry); /* to avoid a WARN_ON */ 12962306a36Sopenharmony_ci queue_work(b->wq, ws); 13062306a36Sopenharmony_ci } 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci while ((bio = bio_list_pop(&bios))) { 13362306a36Sopenharmony_ci if (r) { 13462306a36Sopenharmony_ci bio->bi_status = r; 13562306a36Sopenharmony_ci bio_endio(bio); 13662306a36Sopenharmony_ci } else 13762306a36Sopenharmony_ci b->issue_op(bio, b->issue_context); 13862306a36Sopenharmony_ci } 13962306a36Sopenharmony_ci} 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_cistatic void batcher_init(struct batcher *b, 14262306a36Sopenharmony_ci blk_status_t (*commit_op)(void *), 14362306a36Sopenharmony_ci void *commit_context, 14462306a36Sopenharmony_ci void (*issue_op)(struct bio *bio, void *), 14562306a36Sopenharmony_ci void *issue_context, 14662306a36Sopenharmony_ci struct workqueue_struct *wq) 14762306a36Sopenharmony_ci{ 14862306a36Sopenharmony_ci b->commit_op = commit_op; 14962306a36Sopenharmony_ci b->commit_context = commit_context; 15062306a36Sopenharmony_ci b->issue_op = issue_op; 15162306a36Sopenharmony_ci b->issue_context = issue_context; 15262306a36Sopenharmony_ci b->wq = wq; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci spin_lock_init(&b->lock); 15562306a36Sopenharmony_ci INIT_LIST_HEAD(&b->work_items); 15662306a36Sopenharmony_ci bio_list_init(&b->bios); 15762306a36Sopenharmony_ci INIT_WORK(&b->commit_work, __commit); 15862306a36Sopenharmony_ci b->commit_scheduled = false; 15962306a36Sopenharmony_ci} 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_cistatic void async_commit(struct batcher *b) 16262306a36Sopenharmony_ci{ 16362306a36Sopenharmony_ci queue_work(b->wq, &b->commit_work); 16462306a36Sopenharmony_ci} 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_cistatic void continue_after_commit(struct batcher *b, struct continuation *k) 16762306a36Sopenharmony_ci{ 16862306a36Sopenharmony_ci bool commit_scheduled; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci spin_lock_irq(&b->lock); 17162306a36Sopenharmony_ci commit_scheduled = b->commit_scheduled; 17262306a36Sopenharmony_ci list_add_tail(&k->ws.entry, &b->work_items); 17362306a36Sopenharmony_ci spin_unlock_irq(&b->lock); 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci if (commit_scheduled) 17662306a36Sopenharmony_ci async_commit(b); 17762306a36Sopenharmony_ci} 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci/* 18062306a36Sopenharmony_ci * Bios are errored if commit failed. 18162306a36Sopenharmony_ci */ 18262306a36Sopenharmony_cistatic void issue_after_commit(struct batcher *b, struct bio *bio) 18362306a36Sopenharmony_ci{ 18462306a36Sopenharmony_ci bool commit_scheduled; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci spin_lock_irq(&b->lock); 18762306a36Sopenharmony_ci commit_scheduled = b->commit_scheduled; 18862306a36Sopenharmony_ci bio_list_add(&b->bios, bio); 18962306a36Sopenharmony_ci spin_unlock_irq(&b->lock); 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci if (commit_scheduled) 19262306a36Sopenharmony_ci async_commit(b); 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci/* 19662306a36Sopenharmony_ci * Call this if some urgent work is waiting for the commit to complete. 19762306a36Sopenharmony_ci */ 19862306a36Sopenharmony_cistatic void schedule_commit(struct batcher *b) 19962306a36Sopenharmony_ci{ 20062306a36Sopenharmony_ci bool immediate; 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci spin_lock_irq(&b->lock); 20362306a36Sopenharmony_ci immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios); 20462306a36Sopenharmony_ci b->commit_scheduled = true; 20562306a36Sopenharmony_ci spin_unlock_irq(&b->lock); 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci if (immediate) 20862306a36Sopenharmony_ci async_commit(b); 20962306a36Sopenharmony_ci} 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci/* 21262306a36Sopenharmony_ci * There are a couple of places where we let a bio run, but want to do some 21362306a36Sopenharmony_ci * work before calling its endio function. We do this by temporarily 21462306a36Sopenharmony_ci * changing the endio fn. 21562306a36Sopenharmony_ci */ 21662306a36Sopenharmony_cistruct dm_hook_info { 21762306a36Sopenharmony_ci bio_end_io_t *bi_end_io; 21862306a36Sopenharmony_ci}; 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_cistatic void dm_hook_bio(struct dm_hook_info *h, struct bio *bio, 22162306a36Sopenharmony_ci bio_end_io_t *bi_end_io, void *bi_private) 22262306a36Sopenharmony_ci{ 22362306a36Sopenharmony_ci h->bi_end_io = bio->bi_end_io; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci bio->bi_end_io = bi_end_io; 22662306a36Sopenharmony_ci bio->bi_private = bi_private; 22762306a36Sopenharmony_ci} 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_cistatic void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) 23062306a36Sopenharmony_ci{ 23162306a36Sopenharmony_ci bio->bi_end_io = h->bi_end_io; 23262306a36Sopenharmony_ci} 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci#define MIGRATION_POOL_SIZE 128 23762306a36Sopenharmony_ci#define COMMIT_PERIOD HZ 23862306a36Sopenharmony_ci#define MIGRATION_COUNT_WINDOW 10 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci/* 24162306a36Sopenharmony_ci * The block size of the device holding cache data must be 24262306a36Sopenharmony_ci * between 32KB and 1GB. 24362306a36Sopenharmony_ci */ 24462306a36Sopenharmony_ci#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT) 24562306a36Sopenharmony_ci#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT) 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_cienum cache_metadata_mode { 24862306a36Sopenharmony_ci CM_WRITE, /* metadata may be changed */ 24962306a36Sopenharmony_ci CM_READ_ONLY, /* metadata may not be changed */ 25062306a36Sopenharmony_ci CM_FAIL 25162306a36Sopenharmony_ci}; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_cienum cache_io_mode { 25462306a36Sopenharmony_ci /* 25562306a36Sopenharmony_ci * Data is written to cached blocks only. These blocks are marked 25662306a36Sopenharmony_ci * dirty. If you lose the cache device you will lose data. 25762306a36Sopenharmony_ci * Potential performance increase for both reads and writes. 25862306a36Sopenharmony_ci */ 25962306a36Sopenharmony_ci CM_IO_WRITEBACK, 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci /* 26262306a36Sopenharmony_ci * Data is written to both cache and origin. Blocks are never 26362306a36Sopenharmony_ci * dirty. Potential performance benfit for reads only. 26462306a36Sopenharmony_ci */ 26562306a36Sopenharmony_ci CM_IO_WRITETHROUGH, 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci /* 26862306a36Sopenharmony_ci * A degraded mode useful for various cache coherency situations 26962306a36Sopenharmony_ci * (eg, rolling back snapshots). Reads and writes always go to the 27062306a36Sopenharmony_ci * origin. If a write goes to a cached oblock, then the cache 27162306a36Sopenharmony_ci * block is invalidated. 27262306a36Sopenharmony_ci */ 27362306a36Sopenharmony_ci CM_IO_PASSTHROUGH 27462306a36Sopenharmony_ci}; 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_cistruct cache_features { 27762306a36Sopenharmony_ci enum cache_metadata_mode mode; 27862306a36Sopenharmony_ci enum cache_io_mode io_mode; 27962306a36Sopenharmony_ci unsigned int metadata_version; 28062306a36Sopenharmony_ci bool discard_passdown:1; 28162306a36Sopenharmony_ci}; 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_cistruct cache_stats { 28462306a36Sopenharmony_ci atomic_t read_hit; 28562306a36Sopenharmony_ci atomic_t read_miss; 28662306a36Sopenharmony_ci atomic_t write_hit; 28762306a36Sopenharmony_ci atomic_t write_miss; 28862306a36Sopenharmony_ci atomic_t demotion; 28962306a36Sopenharmony_ci atomic_t promotion; 29062306a36Sopenharmony_ci atomic_t writeback; 29162306a36Sopenharmony_ci atomic_t copies_avoided; 29262306a36Sopenharmony_ci atomic_t cache_cell_clash; 29362306a36Sopenharmony_ci atomic_t commit_count; 29462306a36Sopenharmony_ci atomic_t discard_count; 29562306a36Sopenharmony_ci}; 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_cistruct cache { 29862306a36Sopenharmony_ci struct dm_target *ti; 29962306a36Sopenharmony_ci spinlock_t lock; 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci /* 30262306a36Sopenharmony_ci * Fields for converting from sectors to blocks. 30362306a36Sopenharmony_ci */ 30462306a36Sopenharmony_ci int sectors_per_block_shift; 30562306a36Sopenharmony_ci sector_t sectors_per_block; 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci struct dm_cache_metadata *cmd; 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci /* 31062306a36Sopenharmony_ci * Metadata is written to this device. 31162306a36Sopenharmony_ci */ 31262306a36Sopenharmony_ci struct dm_dev *metadata_dev; 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci /* 31562306a36Sopenharmony_ci * The slower of the two data devices. Typically a spindle. 31662306a36Sopenharmony_ci */ 31762306a36Sopenharmony_ci struct dm_dev *origin_dev; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci /* 32062306a36Sopenharmony_ci * The faster of the two data devices. Typically an SSD. 32162306a36Sopenharmony_ci */ 32262306a36Sopenharmony_ci struct dm_dev *cache_dev; 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci /* 32562306a36Sopenharmony_ci * Size of the origin device in _complete_ blocks and native sectors. 32662306a36Sopenharmony_ci */ 32762306a36Sopenharmony_ci dm_oblock_t origin_blocks; 32862306a36Sopenharmony_ci sector_t origin_sectors; 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci /* 33162306a36Sopenharmony_ci * Size of the cache device in blocks. 33262306a36Sopenharmony_ci */ 33362306a36Sopenharmony_ci dm_cblock_t cache_size; 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci /* 33662306a36Sopenharmony_ci * Invalidation fields. 33762306a36Sopenharmony_ci */ 33862306a36Sopenharmony_ci spinlock_t invalidation_lock; 33962306a36Sopenharmony_ci struct list_head invalidation_requests; 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci sector_t migration_threshold; 34262306a36Sopenharmony_ci wait_queue_head_t migration_wait; 34362306a36Sopenharmony_ci atomic_t nr_allocated_migrations; 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci /* 34662306a36Sopenharmony_ci * The number of in flight migrations that are performing 34762306a36Sopenharmony_ci * background io. eg, promotion, writeback. 34862306a36Sopenharmony_ci */ 34962306a36Sopenharmony_ci atomic_t nr_io_migrations; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci struct bio_list deferred_bios; 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci struct rw_semaphore quiesce_lock; 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci /* 35662306a36Sopenharmony_ci * origin_blocks entries, discarded if set. 35762306a36Sopenharmony_ci */ 35862306a36Sopenharmony_ci dm_dblock_t discard_nr_blocks; 35962306a36Sopenharmony_ci unsigned long *discard_bitset; 36062306a36Sopenharmony_ci uint32_t discard_block_size; /* a power of 2 times sectors per block */ 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci /* 36362306a36Sopenharmony_ci * Rather than reconstructing the table line for the status we just 36462306a36Sopenharmony_ci * save it and regurgitate. 36562306a36Sopenharmony_ci */ 36662306a36Sopenharmony_ci unsigned int nr_ctr_args; 36762306a36Sopenharmony_ci const char **ctr_args; 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci struct dm_kcopyd_client *copier; 37062306a36Sopenharmony_ci struct work_struct deferred_bio_worker; 37162306a36Sopenharmony_ci struct work_struct migration_worker; 37262306a36Sopenharmony_ci struct workqueue_struct *wq; 37362306a36Sopenharmony_ci struct delayed_work waker; 37462306a36Sopenharmony_ci struct dm_bio_prison_v2 *prison; 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci /* 37762306a36Sopenharmony_ci * cache_size entries, dirty if set 37862306a36Sopenharmony_ci */ 37962306a36Sopenharmony_ci unsigned long *dirty_bitset; 38062306a36Sopenharmony_ci atomic_t nr_dirty; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci unsigned int policy_nr_args; 38362306a36Sopenharmony_ci struct dm_cache_policy *policy; 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci /* 38662306a36Sopenharmony_ci * Cache features such as write-through. 38762306a36Sopenharmony_ci */ 38862306a36Sopenharmony_ci struct cache_features features; 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci struct cache_stats stats; 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci bool need_tick_bio:1; 39362306a36Sopenharmony_ci bool sized:1; 39462306a36Sopenharmony_ci bool invalidate:1; 39562306a36Sopenharmony_ci bool commit_requested:1; 39662306a36Sopenharmony_ci bool loaded_mappings:1; 39762306a36Sopenharmony_ci bool loaded_discards:1; 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci struct rw_semaphore background_work_lock; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci struct batcher committer; 40262306a36Sopenharmony_ci struct work_struct commit_ws; 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci struct dm_io_tracker tracker; 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci mempool_t migration_pool; 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci struct bio_set bs; 40962306a36Sopenharmony_ci}; 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_cistruct per_bio_data { 41262306a36Sopenharmony_ci bool tick:1; 41362306a36Sopenharmony_ci unsigned int req_nr:2; 41462306a36Sopenharmony_ci struct dm_bio_prison_cell_v2 *cell; 41562306a36Sopenharmony_ci struct dm_hook_info hook_info; 41662306a36Sopenharmony_ci sector_t len; 41762306a36Sopenharmony_ci}; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_cistruct dm_cache_migration { 42062306a36Sopenharmony_ci struct continuation k; 42162306a36Sopenharmony_ci struct cache *cache; 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci struct policy_work *op; 42462306a36Sopenharmony_ci struct bio *overwrite_bio; 42562306a36Sopenharmony_ci struct dm_bio_prison_cell_v2 *cell; 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci dm_cblock_t invalidate_cblock; 42862306a36Sopenharmony_ci dm_oblock_t invalidate_oblock; 42962306a36Sopenharmony_ci}; 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_cistatic bool writethrough_mode(struct cache *cache) 43462306a36Sopenharmony_ci{ 43562306a36Sopenharmony_ci return cache->features.io_mode == CM_IO_WRITETHROUGH; 43662306a36Sopenharmony_ci} 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_cistatic bool writeback_mode(struct cache *cache) 43962306a36Sopenharmony_ci{ 44062306a36Sopenharmony_ci return cache->features.io_mode == CM_IO_WRITEBACK; 44162306a36Sopenharmony_ci} 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_cistatic inline bool passthrough_mode(struct cache *cache) 44462306a36Sopenharmony_ci{ 44562306a36Sopenharmony_ci return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH); 44662306a36Sopenharmony_ci} 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_cistatic void wake_deferred_bio_worker(struct cache *cache) 45162306a36Sopenharmony_ci{ 45262306a36Sopenharmony_ci queue_work(cache->wq, &cache->deferred_bio_worker); 45362306a36Sopenharmony_ci} 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_cistatic void wake_migration_worker(struct cache *cache) 45662306a36Sopenharmony_ci{ 45762306a36Sopenharmony_ci if (passthrough_mode(cache)) 45862306a36Sopenharmony_ci return; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci queue_work(cache->wq, &cache->migration_worker); 46162306a36Sopenharmony_ci} 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_cistatic struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache) 46662306a36Sopenharmony_ci{ 46762306a36Sopenharmony_ci return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO); 46862306a36Sopenharmony_ci} 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_cistatic void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell) 47162306a36Sopenharmony_ci{ 47262306a36Sopenharmony_ci dm_bio_prison_free_cell_v2(cache->prison, cell); 47362306a36Sopenharmony_ci} 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_cistatic struct dm_cache_migration *alloc_migration(struct cache *cache) 47662306a36Sopenharmony_ci{ 47762306a36Sopenharmony_ci struct dm_cache_migration *mg; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci mg = mempool_alloc(&cache->migration_pool, GFP_NOIO); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci memset(mg, 0, sizeof(*mg)); 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci mg->cache = cache; 48462306a36Sopenharmony_ci atomic_inc(&cache->nr_allocated_migrations); 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci return mg; 48762306a36Sopenharmony_ci} 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_cistatic void free_migration(struct dm_cache_migration *mg) 49062306a36Sopenharmony_ci{ 49162306a36Sopenharmony_ci struct cache *cache = mg->cache; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci if (atomic_dec_and_test(&cache->nr_allocated_migrations)) 49462306a36Sopenharmony_ci wake_up(&cache->migration_wait); 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci mempool_free(mg, &cache->migration_pool); 49762306a36Sopenharmony_ci} 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_cistatic inline dm_oblock_t oblock_succ(dm_oblock_t b) 50262306a36Sopenharmony_ci{ 50362306a36Sopenharmony_ci return to_oblock(from_oblock(b) + 1ull); 50462306a36Sopenharmony_ci} 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_cistatic void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key) 50762306a36Sopenharmony_ci{ 50862306a36Sopenharmony_ci key->virtual = 0; 50962306a36Sopenharmony_ci key->dev = 0; 51062306a36Sopenharmony_ci key->block_begin = from_oblock(begin); 51162306a36Sopenharmony_ci key->block_end = from_oblock(end); 51262306a36Sopenharmony_ci} 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci/* 51562306a36Sopenharmony_ci * We have two lock levels. Level 0, which is used to prevent WRITEs, and 51662306a36Sopenharmony_ci * level 1 which prevents *both* READs and WRITEs. 51762306a36Sopenharmony_ci */ 51862306a36Sopenharmony_ci#define WRITE_LOCK_LEVEL 0 51962306a36Sopenharmony_ci#define READ_WRITE_LOCK_LEVEL 1 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_cistatic unsigned int lock_level(struct bio *bio) 52262306a36Sopenharmony_ci{ 52362306a36Sopenharmony_ci return bio_data_dir(bio) == WRITE ? 52462306a36Sopenharmony_ci WRITE_LOCK_LEVEL : 52562306a36Sopenharmony_ci READ_WRITE_LOCK_LEVEL; 52662306a36Sopenharmony_ci} 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci/* 52962306a36Sopenharmony_ci *-------------------------------------------------------------- 53062306a36Sopenharmony_ci * Per bio data 53162306a36Sopenharmony_ci *-------------------------------------------------------------- 53262306a36Sopenharmony_ci */ 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_cistatic struct per_bio_data *get_per_bio_data(struct bio *bio) 53562306a36Sopenharmony_ci{ 53662306a36Sopenharmony_ci struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci BUG_ON(!pb); 53962306a36Sopenharmony_ci return pb; 54062306a36Sopenharmony_ci} 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_cistatic struct per_bio_data *init_per_bio_data(struct bio *bio) 54362306a36Sopenharmony_ci{ 54462306a36Sopenharmony_ci struct per_bio_data *pb = get_per_bio_data(bio); 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci pb->tick = false; 54762306a36Sopenharmony_ci pb->req_nr = dm_bio_get_target_bio_nr(bio); 54862306a36Sopenharmony_ci pb->cell = NULL; 54962306a36Sopenharmony_ci pb->len = 0; 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci return pb; 55262306a36Sopenharmony_ci} 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_cistatic void defer_bio(struct cache *cache, struct bio *bio) 55762306a36Sopenharmony_ci{ 55862306a36Sopenharmony_ci spin_lock_irq(&cache->lock); 55962306a36Sopenharmony_ci bio_list_add(&cache->deferred_bios, bio); 56062306a36Sopenharmony_ci spin_unlock_irq(&cache->lock); 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci wake_deferred_bio_worker(cache); 56362306a36Sopenharmony_ci} 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_cistatic void defer_bios(struct cache *cache, struct bio_list *bios) 56662306a36Sopenharmony_ci{ 56762306a36Sopenharmony_ci spin_lock_irq(&cache->lock); 56862306a36Sopenharmony_ci bio_list_merge(&cache->deferred_bios, bios); 56962306a36Sopenharmony_ci bio_list_init(bios); 57062306a36Sopenharmony_ci spin_unlock_irq(&cache->lock); 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci wake_deferred_bio_worker(cache); 57362306a36Sopenharmony_ci} 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_cistatic bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio) 57862306a36Sopenharmony_ci{ 57962306a36Sopenharmony_ci bool r; 58062306a36Sopenharmony_ci struct per_bio_data *pb; 58162306a36Sopenharmony_ci struct dm_cell_key_v2 key; 58262306a36Sopenharmony_ci dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); 58362306a36Sopenharmony_ci struct dm_bio_prison_cell_v2 *cell_prealloc, *cell; 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */ 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci build_key(oblock, end, &key); 58862306a36Sopenharmony_ci r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell); 58962306a36Sopenharmony_ci if (!r) { 59062306a36Sopenharmony_ci /* 59162306a36Sopenharmony_ci * Failed to get the lock. 59262306a36Sopenharmony_ci */ 59362306a36Sopenharmony_ci free_prison_cell(cache, cell_prealloc); 59462306a36Sopenharmony_ci return r; 59562306a36Sopenharmony_ci } 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci if (cell != cell_prealloc) 59862306a36Sopenharmony_ci free_prison_cell(cache, cell_prealloc); 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci pb = get_per_bio_data(bio); 60162306a36Sopenharmony_ci pb->cell = cell; 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci return r; 60462306a36Sopenharmony_ci} 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_cistatic bool is_dirty(struct cache *cache, dm_cblock_t b) 60962306a36Sopenharmony_ci{ 61062306a36Sopenharmony_ci return test_bit(from_cblock(b), cache->dirty_bitset); 61162306a36Sopenharmony_ci} 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_cistatic void set_dirty(struct cache *cache, dm_cblock_t cblock) 61462306a36Sopenharmony_ci{ 61562306a36Sopenharmony_ci if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { 61662306a36Sopenharmony_ci atomic_inc(&cache->nr_dirty); 61762306a36Sopenharmony_ci policy_set_dirty(cache->policy, cblock); 61862306a36Sopenharmony_ci } 61962306a36Sopenharmony_ci} 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci/* 62262306a36Sopenharmony_ci * These two are called when setting after migrations to force the policy 62362306a36Sopenharmony_ci * and dirty bitset to be in sync. 62462306a36Sopenharmony_ci */ 62562306a36Sopenharmony_cistatic void force_set_dirty(struct cache *cache, dm_cblock_t cblock) 62662306a36Sopenharmony_ci{ 62762306a36Sopenharmony_ci if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) 62862306a36Sopenharmony_ci atomic_inc(&cache->nr_dirty); 62962306a36Sopenharmony_ci policy_set_dirty(cache->policy, cblock); 63062306a36Sopenharmony_ci} 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_cistatic void force_clear_dirty(struct cache *cache, dm_cblock_t cblock) 63362306a36Sopenharmony_ci{ 63462306a36Sopenharmony_ci if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { 63562306a36Sopenharmony_ci if (atomic_dec_return(&cache->nr_dirty) == 0) 63662306a36Sopenharmony_ci dm_table_event(cache->ti->table); 63762306a36Sopenharmony_ci } 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci policy_clear_dirty(cache->policy, cblock); 64062306a36Sopenharmony_ci} 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_cistatic bool block_size_is_power_of_two(struct cache *cache) 64562306a36Sopenharmony_ci{ 64662306a36Sopenharmony_ci return cache->sectors_per_block_shift >= 0; 64762306a36Sopenharmony_ci} 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_cistatic dm_block_t block_div(dm_block_t b, uint32_t n) 65062306a36Sopenharmony_ci{ 65162306a36Sopenharmony_ci do_div(b, n); 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci return b; 65462306a36Sopenharmony_ci} 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_cistatic dm_block_t oblocks_per_dblock(struct cache *cache) 65762306a36Sopenharmony_ci{ 65862306a36Sopenharmony_ci dm_block_t oblocks = cache->discard_block_size; 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_ci if (block_size_is_power_of_two(cache)) 66162306a36Sopenharmony_ci oblocks >>= cache->sectors_per_block_shift; 66262306a36Sopenharmony_ci else 66362306a36Sopenharmony_ci oblocks = block_div(oblocks, cache->sectors_per_block); 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci return oblocks; 66662306a36Sopenharmony_ci} 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_cistatic dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) 66962306a36Sopenharmony_ci{ 67062306a36Sopenharmony_ci return to_dblock(block_div(from_oblock(oblock), 67162306a36Sopenharmony_ci oblocks_per_dblock(cache))); 67262306a36Sopenharmony_ci} 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_cistatic void set_discard(struct cache *cache, dm_dblock_t b) 67562306a36Sopenharmony_ci{ 67662306a36Sopenharmony_ci BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks)); 67762306a36Sopenharmony_ci atomic_inc(&cache->stats.discard_count); 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ci spin_lock_irq(&cache->lock); 68062306a36Sopenharmony_ci set_bit(from_dblock(b), cache->discard_bitset); 68162306a36Sopenharmony_ci spin_unlock_irq(&cache->lock); 68262306a36Sopenharmony_ci} 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_cistatic void clear_discard(struct cache *cache, dm_dblock_t b) 68562306a36Sopenharmony_ci{ 68662306a36Sopenharmony_ci spin_lock_irq(&cache->lock); 68762306a36Sopenharmony_ci clear_bit(from_dblock(b), cache->discard_bitset); 68862306a36Sopenharmony_ci spin_unlock_irq(&cache->lock); 68962306a36Sopenharmony_ci} 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_cistatic bool is_discarded(struct cache *cache, dm_dblock_t b) 69262306a36Sopenharmony_ci{ 69362306a36Sopenharmony_ci int r; 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci spin_lock_irq(&cache->lock); 69662306a36Sopenharmony_ci r = test_bit(from_dblock(b), cache->discard_bitset); 69762306a36Sopenharmony_ci spin_unlock_irq(&cache->lock); 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci return r; 70062306a36Sopenharmony_ci} 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_cistatic bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) 70362306a36Sopenharmony_ci{ 70462306a36Sopenharmony_ci int r; 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ci spin_lock_irq(&cache->lock); 70762306a36Sopenharmony_ci r = test_bit(from_dblock(oblock_to_dblock(cache, b)), 70862306a36Sopenharmony_ci cache->discard_bitset); 70962306a36Sopenharmony_ci spin_unlock_irq(&cache->lock); 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci return r; 71262306a36Sopenharmony_ci} 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci/* 71562306a36Sopenharmony_ci * ------------------------------------------------------------- 71662306a36Sopenharmony_ci * Remapping 71762306a36Sopenharmony_ci *-------------------------------------------------------------- 71862306a36Sopenharmony_ci */ 71962306a36Sopenharmony_cistatic void remap_to_origin(struct cache *cache, struct bio *bio) 72062306a36Sopenharmony_ci{ 72162306a36Sopenharmony_ci bio_set_dev(bio, cache->origin_dev->bdev); 72262306a36Sopenharmony_ci} 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_cistatic void remap_to_cache(struct cache *cache, struct bio *bio, 72562306a36Sopenharmony_ci dm_cblock_t cblock) 72662306a36Sopenharmony_ci{ 72762306a36Sopenharmony_ci sector_t bi_sector = bio->bi_iter.bi_sector; 72862306a36Sopenharmony_ci sector_t block = from_cblock(cblock); 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci bio_set_dev(bio, cache->cache_dev->bdev); 73162306a36Sopenharmony_ci if (!block_size_is_power_of_two(cache)) 73262306a36Sopenharmony_ci bio->bi_iter.bi_sector = 73362306a36Sopenharmony_ci (block * cache->sectors_per_block) + 73462306a36Sopenharmony_ci sector_div(bi_sector, cache->sectors_per_block); 73562306a36Sopenharmony_ci else 73662306a36Sopenharmony_ci bio->bi_iter.bi_sector = 73762306a36Sopenharmony_ci (block << cache->sectors_per_block_shift) | 73862306a36Sopenharmony_ci (bi_sector & (cache->sectors_per_block - 1)); 73962306a36Sopenharmony_ci} 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_cistatic void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) 74262306a36Sopenharmony_ci{ 74362306a36Sopenharmony_ci struct per_bio_data *pb; 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci spin_lock_irq(&cache->lock); 74662306a36Sopenharmony_ci if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) && 74762306a36Sopenharmony_ci bio_op(bio) != REQ_OP_DISCARD) { 74862306a36Sopenharmony_ci pb = get_per_bio_data(bio); 74962306a36Sopenharmony_ci pb->tick = true; 75062306a36Sopenharmony_ci cache->need_tick_bio = false; 75162306a36Sopenharmony_ci } 75262306a36Sopenharmony_ci spin_unlock_irq(&cache->lock); 75362306a36Sopenharmony_ci} 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_cistatic void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, 75662306a36Sopenharmony_ci dm_oblock_t oblock) 75762306a36Sopenharmony_ci{ 75862306a36Sopenharmony_ci // FIXME: check_if_tick_bio_needed() is called way too much through this interface 75962306a36Sopenharmony_ci check_if_tick_bio_needed(cache, bio); 76062306a36Sopenharmony_ci remap_to_origin(cache, bio); 76162306a36Sopenharmony_ci if (bio_data_dir(bio) == WRITE) 76262306a36Sopenharmony_ci clear_discard(cache, oblock_to_dblock(cache, oblock)); 76362306a36Sopenharmony_ci} 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_cistatic void remap_to_cache_dirty(struct cache *cache, struct bio *bio, 76662306a36Sopenharmony_ci dm_oblock_t oblock, dm_cblock_t cblock) 76762306a36Sopenharmony_ci{ 76862306a36Sopenharmony_ci check_if_tick_bio_needed(cache, bio); 76962306a36Sopenharmony_ci remap_to_cache(cache, bio, cblock); 77062306a36Sopenharmony_ci if (bio_data_dir(bio) == WRITE) { 77162306a36Sopenharmony_ci set_dirty(cache, cblock); 77262306a36Sopenharmony_ci clear_discard(cache, oblock_to_dblock(cache, oblock)); 77362306a36Sopenharmony_ci } 77462306a36Sopenharmony_ci} 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_cistatic dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) 77762306a36Sopenharmony_ci{ 77862306a36Sopenharmony_ci sector_t block_nr = bio->bi_iter.bi_sector; 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_ci if (!block_size_is_power_of_two(cache)) 78162306a36Sopenharmony_ci (void) sector_div(block_nr, cache->sectors_per_block); 78262306a36Sopenharmony_ci else 78362306a36Sopenharmony_ci block_nr >>= cache->sectors_per_block_shift; 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci return to_oblock(block_nr); 78662306a36Sopenharmony_ci} 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_cistatic bool accountable_bio(struct cache *cache, struct bio *bio) 78962306a36Sopenharmony_ci{ 79062306a36Sopenharmony_ci return bio_op(bio) != REQ_OP_DISCARD; 79162306a36Sopenharmony_ci} 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_cistatic void accounted_begin(struct cache *cache, struct bio *bio) 79462306a36Sopenharmony_ci{ 79562306a36Sopenharmony_ci struct per_bio_data *pb; 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci if (accountable_bio(cache, bio)) { 79862306a36Sopenharmony_ci pb = get_per_bio_data(bio); 79962306a36Sopenharmony_ci pb->len = bio_sectors(bio); 80062306a36Sopenharmony_ci dm_iot_io_begin(&cache->tracker, pb->len); 80162306a36Sopenharmony_ci } 80262306a36Sopenharmony_ci} 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_cistatic void accounted_complete(struct cache *cache, struct bio *bio) 80562306a36Sopenharmony_ci{ 80662306a36Sopenharmony_ci struct per_bio_data *pb = get_per_bio_data(bio); 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci dm_iot_io_end(&cache->tracker, pb->len); 80962306a36Sopenharmony_ci} 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_cistatic void accounted_request(struct cache *cache, struct bio *bio) 81262306a36Sopenharmony_ci{ 81362306a36Sopenharmony_ci accounted_begin(cache, bio); 81462306a36Sopenharmony_ci dm_submit_bio_remap(bio, NULL); 81562306a36Sopenharmony_ci} 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_cistatic void issue_op(struct bio *bio, void *context) 81862306a36Sopenharmony_ci{ 81962306a36Sopenharmony_ci struct cache *cache = context; 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci accounted_request(cache, bio); 82262306a36Sopenharmony_ci} 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci/* 82562306a36Sopenharmony_ci * When running in writethrough mode we need to send writes to clean blocks 82662306a36Sopenharmony_ci * to both the cache and origin devices. Clone the bio and send them in parallel. 82762306a36Sopenharmony_ci */ 82862306a36Sopenharmony_cistatic void remap_to_origin_and_cache(struct cache *cache, struct bio *bio, 82962306a36Sopenharmony_ci dm_oblock_t oblock, dm_cblock_t cblock) 83062306a36Sopenharmony_ci{ 83162306a36Sopenharmony_ci struct bio *origin_bio = bio_alloc_clone(cache->origin_dev->bdev, bio, 83262306a36Sopenharmony_ci GFP_NOIO, &cache->bs); 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci BUG_ON(!origin_bio); 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci bio_chain(origin_bio, bio); 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_ci if (bio_data_dir(origin_bio) == WRITE) 83962306a36Sopenharmony_ci clear_discard(cache, oblock_to_dblock(cache, oblock)); 84062306a36Sopenharmony_ci submit_bio(origin_bio); 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci remap_to_cache(cache, bio, cblock); 84362306a36Sopenharmony_ci} 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_ci/* 84662306a36Sopenharmony_ci *-------------------------------------------------------------- 84762306a36Sopenharmony_ci * Failure modes 84862306a36Sopenharmony_ci *-------------------------------------------------------------- 84962306a36Sopenharmony_ci */ 85062306a36Sopenharmony_cistatic enum cache_metadata_mode get_cache_mode(struct cache *cache) 85162306a36Sopenharmony_ci{ 85262306a36Sopenharmony_ci return cache->features.mode; 85362306a36Sopenharmony_ci} 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_cistatic const char *cache_device_name(struct cache *cache) 85662306a36Sopenharmony_ci{ 85762306a36Sopenharmony_ci return dm_table_device_name(cache->ti->table); 85862306a36Sopenharmony_ci} 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_cistatic void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode) 86162306a36Sopenharmony_ci{ 86262306a36Sopenharmony_ci static const char *descs[] = { 86362306a36Sopenharmony_ci "write", 86462306a36Sopenharmony_ci "read-only", 86562306a36Sopenharmony_ci "fail" 86662306a36Sopenharmony_ci }; 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci dm_table_event(cache->ti->table); 86962306a36Sopenharmony_ci DMINFO("%s: switching cache to %s mode", 87062306a36Sopenharmony_ci cache_device_name(cache), descs[(int)mode]); 87162306a36Sopenharmony_ci} 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_cistatic void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode) 87462306a36Sopenharmony_ci{ 87562306a36Sopenharmony_ci bool needs_check; 87662306a36Sopenharmony_ci enum cache_metadata_mode old_mode = get_cache_mode(cache); 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) { 87962306a36Sopenharmony_ci DMERR("%s: unable to read needs_check flag, setting failure mode.", 88062306a36Sopenharmony_ci cache_device_name(cache)); 88162306a36Sopenharmony_ci new_mode = CM_FAIL; 88262306a36Sopenharmony_ci } 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci if (new_mode == CM_WRITE && needs_check) { 88562306a36Sopenharmony_ci DMERR("%s: unable to switch cache to write mode until repaired.", 88662306a36Sopenharmony_ci cache_device_name(cache)); 88762306a36Sopenharmony_ci if (old_mode != new_mode) 88862306a36Sopenharmony_ci new_mode = old_mode; 88962306a36Sopenharmony_ci else 89062306a36Sopenharmony_ci new_mode = CM_READ_ONLY; 89162306a36Sopenharmony_ci } 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci /* Never move out of fail mode */ 89462306a36Sopenharmony_ci if (old_mode == CM_FAIL) 89562306a36Sopenharmony_ci new_mode = CM_FAIL; 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci switch (new_mode) { 89862306a36Sopenharmony_ci case CM_FAIL: 89962306a36Sopenharmony_ci case CM_READ_ONLY: 90062306a36Sopenharmony_ci dm_cache_metadata_set_read_only(cache->cmd); 90162306a36Sopenharmony_ci break; 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci case CM_WRITE: 90462306a36Sopenharmony_ci dm_cache_metadata_set_read_write(cache->cmd); 90562306a36Sopenharmony_ci break; 90662306a36Sopenharmony_ci } 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_ci cache->features.mode = new_mode; 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci if (new_mode != old_mode) 91162306a36Sopenharmony_ci notify_mode_switch(cache, new_mode); 91262306a36Sopenharmony_ci} 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_cistatic void abort_transaction(struct cache *cache) 91562306a36Sopenharmony_ci{ 91662306a36Sopenharmony_ci const char *dev_name = cache_device_name(cache); 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci if (get_cache_mode(cache) >= CM_READ_ONLY) 91962306a36Sopenharmony_ci return; 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci DMERR_LIMIT("%s: aborting current metadata transaction", dev_name); 92262306a36Sopenharmony_ci if (dm_cache_metadata_abort(cache->cmd)) { 92362306a36Sopenharmony_ci DMERR("%s: failed to abort metadata transaction", dev_name); 92462306a36Sopenharmony_ci set_cache_mode(cache, CM_FAIL); 92562306a36Sopenharmony_ci } 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_ci if (dm_cache_metadata_set_needs_check(cache->cmd)) { 92862306a36Sopenharmony_ci DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); 92962306a36Sopenharmony_ci set_cache_mode(cache, CM_FAIL); 93062306a36Sopenharmony_ci } 93162306a36Sopenharmony_ci} 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_cistatic void metadata_operation_failed(struct cache *cache, const char *op, int r) 93462306a36Sopenharmony_ci{ 93562306a36Sopenharmony_ci DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d", 93662306a36Sopenharmony_ci cache_device_name(cache), op, r); 93762306a36Sopenharmony_ci abort_transaction(cache); 93862306a36Sopenharmony_ci set_cache_mode(cache, CM_READ_ONLY); 93962306a36Sopenharmony_ci} 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_cistatic void load_stats(struct cache *cache) 94462306a36Sopenharmony_ci{ 94562306a36Sopenharmony_ci struct dm_cache_statistics stats; 94662306a36Sopenharmony_ci 94762306a36Sopenharmony_ci dm_cache_metadata_get_stats(cache->cmd, &stats); 94862306a36Sopenharmony_ci atomic_set(&cache->stats.read_hit, stats.read_hits); 94962306a36Sopenharmony_ci atomic_set(&cache->stats.read_miss, stats.read_misses); 95062306a36Sopenharmony_ci atomic_set(&cache->stats.write_hit, stats.write_hits); 95162306a36Sopenharmony_ci atomic_set(&cache->stats.write_miss, stats.write_misses); 95262306a36Sopenharmony_ci} 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_cistatic void save_stats(struct cache *cache) 95562306a36Sopenharmony_ci{ 95662306a36Sopenharmony_ci struct dm_cache_statistics stats; 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci if (get_cache_mode(cache) >= CM_READ_ONLY) 95962306a36Sopenharmony_ci return; 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci stats.read_hits = atomic_read(&cache->stats.read_hit); 96262306a36Sopenharmony_ci stats.read_misses = atomic_read(&cache->stats.read_miss); 96362306a36Sopenharmony_ci stats.write_hits = atomic_read(&cache->stats.write_hit); 96462306a36Sopenharmony_ci stats.write_misses = atomic_read(&cache->stats.write_miss); 96562306a36Sopenharmony_ci 96662306a36Sopenharmony_ci dm_cache_metadata_set_stats(cache->cmd, &stats); 96762306a36Sopenharmony_ci} 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_cistatic void update_stats(struct cache_stats *stats, enum policy_operation op) 97062306a36Sopenharmony_ci{ 97162306a36Sopenharmony_ci switch (op) { 97262306a36Sopenharmony_ci case POLICY_PROMOTE: 97362306a36Sopenharmony_ci atomic_inc(&stats->promotion); 97462306a36Sopenharmony_ci break; 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci case POLICY_DEMOTE: 97762306a36Sopenharmony_ci atomic_inc(&stats->demotion); 97862306a36Sopenharmony_ci break; 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci case POLICY_WRITEBACK: 98162306a36Sopenharmony_ci atomic_inc(&stats->writeback); 98262306a36Sopenharmony_ci break; 98362306a36Sopenharmony_ci } 98462306a36Sopenharmony_ci} 98562306a36Sopenharmony_ci 98662306a36Sopenharmony_ci/* 98762306a36Sopenharmony_ci *--------------------------------------------------------------------- 98862306a36Sopenharmony_ci * Migration processing 98962306a36Sopenharmony_ci * 99062306a36Sopenharmony_ci * Migration covers moving data from the origin device to the cache, or 99162306a36Sopenharmony_ci * vice versa. 99262306a36Sopenharmony_ci *--------------------------------------------------------------------- 99362306a36Sopenharmony_ci */ 99462306a36Sopenharmony_cistatic void inc_io_migrations(struct cache *cache) 99562306a36Sopenharmony_ci{ 99662306a36Sopenharmony_ci atomic_inc(&cache->nr_io_migrations); 99762306a36Sopenharmony_ci} 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_cistatic void dec_io_migrations(struct cache *cache) 100062306a36Sopenharmony_ci{ 100162306a36Sopenharmony_ci atomic_dec(&cache->nr_io_migrations); 100262306a36Sopenharmony_ci} 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_cistatic bool discard_or_flush(struct bio *bio) 100562306a36Sopenharmony_ci{ 100662306a36Sopenharmony_ci return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf); 100762306a36Sopenharmony_ci} 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_cistatic void calc_discard_block_range(struct cache *cache, struct bio *bio, 101062306a36Sopenharmony_ci dm_dblock_t *b, dm_dblock_t *e) 101162306a36Sopenharmony_ci{ 101262306a36Sopenharmony_ci sector_t sb = bio->bi_iter.bi_sector; 101362306a36Sopenharmony_ci sector_t se = bio_end_sector(bio); 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size)); 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci if (se - sb < cache->discard_block_size) 101862306a36Sopenharmony_ci *e = *b; 101962306a36Sopenharmony_ci else 102062306a36Sopenharmony_ci *e = to_dblock(block_div(se, cache->discard_block_size)); 102162306a36Sopenharmony_ci} 102262306a36Sopenharmony_ci 102362306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 102462306a36Sopenharmony_ci 102562306a36Sopenharmony_cistatic void prevent_background_work(struct cache *cache) 102662306a36Sopenharmony_ci{ 102762306a36Sopenharmony_ci lockdep_off(); 102862306a36Sopenharmony_ci down_write(&cache->background_work_lock); 102962306a36Sopenharmony_ci lockdep_on(); 103062306a36Sopenharmony_ci} 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_cistatic void allow_background_work(struct cache *cache) 103362306a36Sopenharmony_ci{ 103462306a36Sopenharmony_ci lockdep_off(); 103562306a36Sopenharmony_ci up_write(&cache->background_work_lock); 103662306a36Sopenharmony_ci lockdep_on(); 103762306a36Sopenharmony_ci} 103862306a36Sopenharmony_ci 103962306a36Sopenharmony_cistatic bool background_work_begin(struct cache *cache) 104062306a36Sopenharmony_ci{ 104162306a36Sopenharmony_ci bool r; 104262306a36Sopenharmony_ci 104362306a36Sopenharmony_ci lockdep_off(); 104462306a36Sopenharmony_ci r = down_read_trylock(&cache->background_work_lock); 104562306a36Sopenharmony_ci lockdep_on(); 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci return r; 104862306a36Sopenharmony_ci} 104962306a36Sopenharmony_ci 105062306a36Sopenharmony_cistatic void background_work_end(struct cache *cache) 105162306a36Sopenharmony_ci{ 105262306a36Sopenharmony_ci lockdep_off(); 105362306a36Sopenharmony_ci up_read(&cache->background_work_lock); 105462306a36Sopenharmony_ci lockdep_on(); 105562306a36Sopenharmony_ci} 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 105862306a36Sopenharmony_ci 105962306a36Sopenharmony_cistatic bool bio_writes_complete_block(struct cache *cache, struct bio *bio) 106062306a36Sopenharmony_ci{ 106162306a36Sopenharmony_ci return (bio_data_dir(bio) == WRITE) && 106262306a36Sopenharmony_ci (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); 106362306a36Sopenharmony_ci} 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_cistatic bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block) 106662306a36Sopenharmony_ci{ 106762306a36Sopenharmony_ci return writeback_mode(cache) && 106862306a36Sopenharmony_ci (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio)); 106962306a36Sopenharmony_ci} 107062306a36Sopenharmony_ci 107162306a36Sopenharmony_cistatic void quiesce(struct dm_cache_migration *mg, 107262306a36Sopenharmony_ci void (*continuation)(struct work_struct *)) 107362306a36Sopenharmony_ci{ 107462306a36Sopenharmony_ci init_continuation(&mg->k, continuation); 107562306a36Sopenharmony_ci dm_cell_quiesce_v2(mg->cache->prison, mg->cell, &mg->k.ws); 107662306a36Sopenharmony_ci} 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_cistatic struct dm_cache_migration *ws_to_mg(struct work_struct *ws) 107962306a36Sopenharmony_ci{ 108062306a36Sopenharmony_ci struct continuation *k = container_of(ws, struct continuation, ws); 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci return container_of(k, struct dm_cache_migration, k); 108362306a36Sopenharmony_ci} 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_cistatic void copy_complete(int read_err, unsigned long write_err, void *context) 108662306a36Sopenharmony_ci{ 108762306a36Sopenharmony_ci struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k); 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_ci if (read_err || write_err) 109062306a36Sopenharmony_ci mg->k.input = BLK_STS_IOERR; 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_ci queue_continuation(mg->cache->wq, &mg->k); 109362306a36Sopenharmony_ci} 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_cistatic void copy(struct dm_cache_migration *mg, bool promote) 109662306a36Sopenharmony_ci{ 109762306a36Sopenharmony_ci struct dm_io_region o_region, c_region; 109862306a36Sopenharmony_ci struct cache *cache = mg->cache; 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci o_region.bdev = cache->origin_dev->bdev; 110162306a36Sopenharmony_ci o_region.sector = from_oblock(mg->op->oblock) * cache->sectors_per_block; 110262306a36Sopenharmony_ci o_region.count = cache->sectors_per_block; 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_ci c_region.bdev = cache->cache_dev->bdev; 110562306a36Sopenharmony_ci c_region.sector = from_cblock(mg->op->cblock) * cache->sectors_per_block; 110662306a36Sopenharmony_ci c_region.count = cache->sectors_per_block; 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci if (promote) 110962306a36Sopenharmony_ci dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k); 111062306a36Sopenharmony_ci else 111162306a36Sopenharmony_ci dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k); 111262306a36Sopenharmony_ci} 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_cistatic void bio_drop_shared_lock(struct cache *cache, struct bio *bio) 111562306a36Sopenharmony_ci{ 111662306a36Sopenharmony_ci struct per_bio_data *pb = get_per_bio_data(bio); 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell)) 111962306a36Sopenharmony_ci free_prison_cell(cache, pb->cell); 112062306a36Sopenharmony_ci pb->cell = NULL; 112162306a36Sopenharmony_ci} 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_cistatic void overwrite_endio(struct bio *bio) 112462306a36Sopenharmony_ci{ 112562306a36Sopenharmony_ci struct dm_cache_migration *mg = bio->bi_private; 112662306a36Sopenharmony_ci struct cache *cache = mg->cache; 112762306a36Sopenharmony_ci struct per_bio_data *pb = get_per_bio_data(bio); 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci dm_unhook_bio(&pb->hook_info, bio); 113062306a36Sopenharmony_ci 113162306a36Sopenharmony_ci if (bio->bi_status) 113262306a36Sopenharmony_ci mg->k.input = bio->bi_status; 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_ci queue_continuation(cache->wq, &mg->k); 113562306a36Sopenharmony_ci} 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_cistatic void overwrite(struct dm_cache_migration *mg, 113862306a36Sopenharmony_ci void (*continuation)(struct work_struct *)) 113962306a36Sopenharmony_ci{ 114062306a36Sopenharmony_ci struct bio *bio = mg->overwrite_bio; 114162306a36Sopenharmony_ci struct per_bio_data *pb = get_per_bio_data(bio); 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci /* 114662306a36Sopenharmony_ci * The overwrite bio is part of the copy operation, as such it does 114762306a36Sopenharmony_ci * not set/clear discard or dirty flags. 114862306a36Sopenharmony_ci */ 114962306a36Sopenharmony_ci if (mg->op->op == POLICY_PROMOTE) 115062306a36Sopenharmony_ci remap_to_cache(mg->cache, bio, mg->op->cblock); 115162306a36Sopenharmony_ci else 115262306a36Sopenharmony_ci remap_to_origin(mg->cache, bio); 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci init_continuation(&mg->k, continuation); 115562306a36Sopenharmony_ci accounted_request(mg->cache, bio); 115662306a36Sopenharmony_ci} 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci/* 115962306a36Sopenharmony_ci * Migration steps: 116062306a36Sopenharmony_ci * 116162306a36Sopenharmony_ci * 1) exclusive lock preventing WRITEs 116262306a36Sopenharmony_ci * 2) quiesce 116362306a36Sopenharmony_ci * 3) copy or issue overwrite bio 116462306a36Sopenharmony_ci * 4) upgrade to exclusive lock preventing READs and WRITEs 116562306a36Sopenharmony_ci * 5) quiesce 116662306a36Sopenharmony_ci * 6) update metadata and commit 116762306a36Sopenharmony_ci * 7) unlock 116862306a36Sopenharmony_ci */ 116962306a36Sopenharmony_cistatic void mg_complete(struct dm_cache_migration *mg, bool success) 117062306a36Sopenharmony_ci{ 117162306a36Sopenharmony_ci struct bio_list bios; 117262306a36Sopenharmony_ci struct cache *cache = mg->cache; 117362306a36Sopenharmony_ci struct policy_work *op = mg->op; 117462306a36Sopenharmony_ci dm_cblock_t cblock = op->cblock; 117562306a36Sopenharmony_ci 117662306a36Sopenharmony_ci if (success) 117762306a36Sopenharmony_ci update_stats(&cache->stats, op->op); 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci switch (op->op) { 118062306a36Sopenharmony_ci case POLICY_PROMOTE: 118162306a36Sopenharmony_ci clear_discard(cache, oblock_to_dblock(cache, op->oblock)); 118262306a36Sopenharmony_ci policy_complete_background_work(cache->policy, op, success); 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci if (mg->overwrite_bio) { 118562306a36Sopenharmony_ci if (success) 118662306a36Sopenharmony_ci force_set_dirty(cache, cblock); 118762306a36Sopenharmony_ci else if (mg->k.input) 118862306a36Sopenharmony_ci mg->overwrite_bio->bi_status = mg->k.input; 118962306a36Sopenharmony_ci else 119062306a36Sopenharmony_ci mg->overwrite_bio->bi_status = BLK_STS_IOERR; 119162306a36Sopenharmony_ci bio_endio(mg->overwrite_bio); 119262306a36Sopenharmony_ci } else { 119362306a36Sopenharmony_ci if (success) 119462306a36Sopenharmony_ci force_clear_dirty(cache, cblock); 119562306a36Sopenharmony_ci dec_io_migrations(cache); 119662306a36Sopenharmony_ci } 119762306a36Sopenharmony_ci break; 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_ci case POLICY_DEMOTE: 120062306a36Sopenharmony_ci /* 120162306a36Sopenharmony_ci * We clear dirty here to update the nr_dirty counter. 120262306a36Sopenharmony_ci */ 120362306a36Sopenharmony_ci if (success) 120462306a36Sopenharmony_ci force_clear_dirty(cache, cblock); 120562306a36Sopenharmony_ci policy_complete_background_work(cache->policy, op, success); 120662306a36Sopenharmony_ci dec_io_migrations(cache); 120762306a36Sopenharmony_ci break; 120862306a36Sopenharmony_ci 120962306a36Sopenharmony_ci case POLICY_WRITEBACK: 121062306a36Sopenharmony_ci if (success) 121162306a36Sopenharmony_ci force_clear_dirty(cache, cblock); 121262306a36Sopenharmony_ci policy_complete_background_work(cache->policy, op, success); 121362306a36Sopenharmony_ci dec_io_migrations(cache); 121462306a36Sopenharmony_ci break; 121562306a36Sopenharmony_ci } 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_ci bio_list_init(&bios); 121862306a36Sopenharmony_ci if (mg->cell) { 121962306a36Sopenharmony_ci if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios)) 122062306a36Sopenharmony_ci free_prison_cell(cache, mg->cell); 122162306a36Sopenharmony_ci } 122262306a36Sopenharmony_ci 122362306a36Sopenharmony_ci free_migration(mg); 122462306a36Sopenharmony_ci defer_bios(cache, &bios); 122562306a36Sopenharmony_ci wake_migration_worker(cache); 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci background_work_end(cache); 122862306a36Sopenharmony_ci} 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_cistatic void mg_success(struct work_struct *ws) 123162306a36Sopenharmony_ci{ 123262306a36Sopenharmony_ci struct dm_cache_migration *mg = ws_to_mg(ws); 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci mg_complete(mg, mg->k.input == 0); 123562306a36Sopenharmony_ci} 123662306a36Sopenharmony_ci 123762306a36Sopenharmony_cistatic void mg_update_metadata(struct work_struct *ws) 123862306a36Sopenharmony_ci{ 123962306a36Sopenharmony_ci int r; 124062306a36Sopenharmony_ci struct dm_cache_migration *mg = ws_to_mg(ws); 124162306a36Sopenharmony_ci struct cache *cache = mg->cache; 124262306a36Sopenharmony_ci struct policy_work *op = mg->op; 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci switch (op->op) { 124562306a36Sopenharmony_ci case POLICY_PROMOTE: 124662306a36Sopenharmony_ci r = dm_cache_insert_mapping(cache->cmd, op->cblock, op->oblock); 124762306a36Sopenharmony_ci if (r) { 124862306a36Sopenharmony_ci DMERR_LIMIT("%s: migration failed; couldn't insert mapping", 124962306a36Sopenharmony_ci cache_device_name(cache)); 125062306a36Sopenharmony_ci metadata_operation_failed(cache, "dm_cache_insert_mapping", r); 125162306a36Sopenharmony_ci 125262306a36Sopenharmony_ci mg_complete(mg, false); 125362306a36Sopenharmony_ci return; 125462306a36Sopenharmony_ci } 125562306a36Sopenharmony_ci mg_complete(mg, true); 125662306a36Sopenharmony_ci break; 125762306a36Sopenharmony_ci 125862306a36Sopenharmony_ci case POLICY_DEMOTE: 125962306a36Sopenharmony_ci r = dm_cache_remove_mapping(cache->cmd, op->cblock); 126062306a36Sopenharmony_ci if (r) { 126162306a36Sopenharmony_ci DMERR_LIMIT("%s: migration failed; couldn't update on disk metadata", 126262306a36Sopenharmony_ci cache_device_name(cache)); 126362306a36Sopenharmony_ci metadata_operation_failed(cache, "dm_cache_remove_mapping", r); 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci mg_complete(mg, false); 126662306a36Sopenharmony_ci return; 126762306a36Sopenharmony_ci } 126862306a36Sopenharmony_ci 126962306a36Sopenharmony_ci /* 127062306a36Sopenharmony_ci * It would be nice if we only had to commit when a REQ_FLUSH 127162306a36Sopenharmony_ci * comes through. But there's one scenario that we have to 127262306a36Sopenharmony_ci * look out for: 127362306a36Sopenharmony_ci * 127462306a36Sopenharmony_ci * - vblock x in a cache block 127562306a36Sopenharmony_ci * - domotion occurs 127662306a36Sopenharmony_ci * - cache block gets reallocated and over written 127762306a36Sopenharmony_ci * - crash 127862306a36Sopenharmony_ci * 127962306a36Sopenharmony_ci * When we recover, because there was no commit the cache will 128062306a36Sopenharmony_ci * rollback to having the data for vblock x in the cache block. 128162306a36Sopenharmony_ci * But the cache block has since been overwritten, so it'll end 128262306a36Sopenharmony_ci * up pointing to data that was never in 'x' during the history 128362306a36Sopenharmony_ci * of the device. 128462306a36Sopenharmony_ci * 128562306a36Sopenharmony_ci * To avoid this issue we require a commit as part of the 128662306a36Sopenharmony_ci * demotion operation. 128762306a36Sopenharmony_ci */ 128862306a36Sopenharmony_ci init_continuation(&mg->k, mg_success); 128962306a36Sopenharmony_ci continue_after_commit(&cache->committer, &mg->k); 129062306a36Sopenharmony_ci schedule_commit(&cache->committer); 129162306a36Sopenharmony_ci break; 129262306a36Sopenharmony_ci 129362306a36Sopenharmony_ci case POLICY_WRITEBACK: 129462306a36Sopenharmony_ci mg_complete(mg, true); 129562306a36Sopenharmony_ci break; 129662306a36Sopenharmony_ci } 129762306a36Sopenharmony_ci} 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_cistatic void mg_update_metadata_after_copy(struct work_struct *ws) 130062306a36Sopenharmony_ci{ 130162306a36Sopenharmony_ci struct dm_cache_migration *mg = ws_to_mg(ws); 130262306a36Sopenharmony_ci 130362306a36Sopenharmony_ci /* 130462306a36Sopenharmony_ci * Did the copy succeed? 130562306a36Sopenharmony_ci */ 130662306a36Sopenharmony_ci if (mg->k.input) 130762306a36Sopenharmony_ci mg_complete(mg, false); 130862306a36Sopenharmony_ci else 130962306a36Sopenharmony_ci mg_update_metadata(ws); 131062306a36Sopenharmony_ci} 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_cistatic void mg_upgrade_lock(struct work_struct *ws) 131362306a36Sopenharmony_ci{ 131462306a36Sopenharmony_ci int r; 131562306a36Sopenharmony_ci struct dm_cache_migration *mg = ws_to_mg(ws); 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci /* 131862306a36Sopenharmony_ci * Did the copy succeed? 131962306a36Sopenharmony_ci */ 132062306a36Sopenharmony_ci if (mg->k.input) 132162306a36Sopenharmony_ci mg_complete(mg, false); 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_ci else { 132462306a36Sopenharmony_ci /* 132562306a36Sopenharmony_ci * Now we want the lock to prevent both reads and writes. 132662306a36Sopenharmony_ci */ 132762306a36Sopenharmony_ci r = dm_cell_lock_promote_v2(mg->cache->prison, mg->cell, 132862306a36Sopenharmony_ci READ_WRITE_LOCK_LEVEL); 132962306a36Sopenharmony_ci if (r < 0) 133062306a36Sopenharmony_ci mg_complete(mg, false); 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_ci else if (r) 133362306a36Sopenharmony_ci quiesce(mg, mg_update_metadata); 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci else 133662306a36Sopenharmony_ci mg_update_metadata(ws); 133762306a36Sopenharmony_ci } 133862306a36Sopenharmony_ci} 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_cistatic void mg_full_copy(struct work_struct *ws) 134162306a36Sopenharmony_ci{ 134262306a36Sopenharmony_ci struct dm_cache_migration *mg = ws_to_mg(ws); 134362306a36Sopenharmony_ci struct cache *cache = mg->cache; 134462306a36Sopenharmony_ci struct policy_work *op = mg->op; 134562306a36Sopenharmony_ci bool is_policy_promote = (op->op == POLICY_PROMOTE); 134662306a36Sopenharmony_ci 134762306a36Sopenharmony_ci if ((!is_policy_promote && !is_dirty(cache, op->cblock)) || 134862306a36Sopenharmony_ci is_discarded_oblock(cache, op->oblock)) { 134962306a36Sopenharmony_ci mg_upgrade_lock(ws); 135062306a36Sopenharmony_ci return; 135162306a36Sopenharmony_ci } 135262306a36Sopenharmony_ci 135362306a36Sopenharmony_ci init_continuation(&mg->k, mg_upgrade_lock); 135462306a36Sopenharmony_ci copy(mg, is_policy_promote); 135562306a36Sopenharmony_ci} 135662306a36Sopenharmony_ci 135762306a36Sopenharmony_cistatic void mg_copy(struct work_struct *ws) 135862306a36Sopenharmony_ci{ 135962306a36Sopenharmony_ci struct dm_cache_migration *mg = ws_to_mg(ws); 136062306a36Sopenharmony_ci 136162306a36Sopenharmony_ci if (mg->overwrite_bio) { 136262306a36Sopenharmony_ci /* 136362306a36Sopenharmony_ci * No exclusive lock was held when we last checked if the bio 136462306a36Sopenharmony_ci * was optimisable. So we have to check again in case things 136562306a36Sopenharmony_ci * have changed (eg, the block may no longer be discarded). 136662306a36Sopenharmony_ci */ 136762306a36Sopenharmony_ci if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) { 136862306a36Sopenharmony_ci /* 136962306a36Sopenharmony_ci * Fallback to a real full copy after doing some tidying up. 137062306a36Sopenharmony_ci */ 137162306a36Sopenharmony_ci bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio); 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */ 137462306a36Sopenharmony_ci mg->overwrite_bio = NULL; 137562306a36Sopenharmony_ci inc_io_migrations(mg->cache); 137662306a36Sopenharmony_ci mg_full_copy(ws); 137762306a36Sopenharmony_ci return; 137862306a36Sopenharmony_ci } 137962306a36Sopenharmony_ci 138062306a36Sopenharmony_ci /* 138162306a36Sopenharmony_ci * It's safe to do this here, even though it's new data 138262306a36Sopenharmony_ci * because all IO has been locked out of the block. 138362306a36Sopenharmony_ci * 138462306a36Sopenharmony_ci * mg_lock_writes() already took READ_WRITE_LOCK_LEVEL 138562306a36Sopenharmony_ci * so _not_ using mg_upgrade_lock() as continutation. 138662306a36Sopenharmony_ci */ 138762306a36Sopenharmony_ci overwrite(mg, mg_update_metadata_after_copy); 138862306a36Sopenharmony_ci 138962306a36Sopenharmony_ci } else 139062306a36Sopenharmony_ci mg_full_copy(ws); 139162306a36Sopenharmony_ci} 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_cistatic int mg_lock_writes(struct dm_cache_migration *mg) 139462306a36Sopenharmony_ci{ 139562306a36Sopenharmony_ci int r; 139662306a36Sopenharmony_ci struct dm_cell_key_v2 key; 139762306a36Sopenharmony_ci struct cache *cache = mg->cache; 139862306a36Sopenharmony_ci struct dm_bio_prison_cell_v2 *prealloc; 139962306a36Sopenharmony_ci 140062306a36Sopenharmony_ci prealloc = alloc_prison_cell(cache); 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ci /* 140362306a36Sopenharmony_ci * Prevent writes to the block, but allow reads to continue. 140462306a36Sopenharmony_ci * Unless we're using an overwrite bio, in which case we lock 140562306a36Sopenharmony_ci * everything. 140662306a36Sopenharmony_ci */ 140762306a36Sopenharmony_ci build_key(mg->op->oblock, oblock_succ(mg->op->oblock), &key); 140862306a36Sopenharmony_ci r = dm_cell_lock_v2(cache->prison, &key, 140962306a36Sopenharmony_ci mg->overwrite_bio ? READ_WRITE_LOCK_LEVEL : WRITE_LOCK_LEVEL, 141062306a36Sopenharmony_ci prealloc, &mg->cell); 141162306a36Sopenharmony_ci if (r < 0) { 141262306a36Sopenharmony_ci free_prison_cell(cache, prealloc); 141362306a36Sopenharmony_ci mg_complete(mg, false); 141462306a36Sopenharmony_ci return r; 141562306a36Sopenharmony_ci } 141662306a36Sopenharmony_ci 141762306a36Sopenharmony_ci if (mg->cell != prealloc) 141862306a36Sopenharmony_ci free_prison_cell(cache, prealloc); 141962306a36Sopenharmony_ci 142062306a36Sopenharmony_ci if (r == 0) 142162306a36Sopenharmony_ci mg_copy(&mg->k.ws); 142262306a36Sopenharmony_ci else 142362306a36Sopenharmony_ci quiesce(mg, mg_copy); 142462306a36Sopenharmony_ci 142562306a36Sopenharmony_ci return 0; 142662306a36Sopenharmony_ci} 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_cistatic int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio) 142962306a36Sopenharmony_ci{ 143062306a36Sopenharmony_ci struct dm_cache_migration *mg; 143162306a36Sopenharmony_ci 143262306a36Sopenharmony_ci if (!background_work_begin(cache)) { 143362306a36Sopenharmony_ci policy_complete_background_work(cache->policy, op, false); 143462306a36Sopenharmony_ci return -EPERM; 143562306a36Sopenharmony_ci } 143662306a36Sopenharmony_ci 143762306a36Sopenharmony_ci mg = alloc_migration(cache); 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci mg->op = op; 144062306a36Sopenharmony_ci mg->overwrite_bio = bio; 144162306a36Sopenharmony_ci 144262306a36Sopenharmony_ci if (!bio) 144362306a36Sopenharmony_ci inc_io_migrations(cache); 144462306a36Sopenharmony_ci 144562306a36Sopenharmony_ci return mg_lock_writes(mg); 144662306a36Sopenharmony_ci} 144762306a36Sopenharmony_ci 144862306a36Sopenharmony_ci/* 144962306a36Sopenharmony_ci *-------------------------------------------------------------- 145062306a36Sopenharmony_ci * invalidation processing 145162306a36Sopenharmony_ci *-------------------------------------------------------------- 145262306a36Sopenharmony_ci */ 145362306a36Sopenharmony_ci 145462306a36Sopenharmony_cistatic void invalidate_complete(struct dm_cache_migration *mg, bool success) 145562306a36Sopenharmony_ci{ 145662306a36Sopenharmony_ci struct bio_list bios; 145762306a36Sopenharmony_ci struct cache *cache = mg->cache; 145862306a36Sopenharmony_ci 145962306a36Sopenharmony_ci bio_list_init(&bios); 146062306a36Sopenharmony_ci if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios)) 146162306a36Sopenharmony_ci free_prison_cell(cache, mg->cell); 146262306a36Sopenharmony_ci 146362306a36Sopenharmony_ci if (!success && mg->overwrite_bio) 146462306a36Sopenharmony_ci bio_io_error(mg->overwrite_bio); 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci free_migration(mg); 146762306a36Sopenharmony_ci defer_bios(cache, &bios); 146862306a36Sopenharmony_ci 146962306a36Sopenharmony_ci background_work_end(cache); 147062306a36Sopenharmony_ci} 147162306a36Sopenharmony_ci 147262306a36Sopenharmony_cistatic void invalidate_completed(struct work_struct *ws) 147362306a36Sopenharmony_ci{ 147462306a36Sopenharmony_ci struct dm_cache_migration *mg = ws_to_mg(ws); 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_ci invalidate_complete(mg, !mg->k.input); 147762306a36Sopenharmony_ci} 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_cistatic int invalidate_cblock(struct cache *cache, dm_cblock_t cblock) 148062306a36Sopenharmony_ci{ 148162306a36Sopenharmony_ci int r; 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_ci r = policy_invalidate_mapping(cache->policy, cblock); 148462306a36Sopenharmony_ci if (!r) { 148562306a36Sopenharmony_ci r = dm_cache_remove_mapping(cache->cmd, cblock); 148662306a36Sopenharmony_ci if (r) { 148762306a36Sopenharmony_ci DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata", 148862306a36Sopenharmony_ci cache_device_name(cache)); 148962306a36Sopenharmony_ci metadata_operation_failed(cache, "dm_cache_remove_mapping", r); 149062306a36Sopenharmony_ci } 149162306a36Sopenharmony_ci 149262306a36Sopenharmony_ci } else if (r == -ENODATA) { 149362306a36Sopenharmony_ci /* 149462306a36Sopenharmony_ci * Harmless, already unmapped. 149562306a36Sopenharmony_ci */ 149662306a36Sopenharmony_ci r = 0; 149762306a36Sopenharmony_ci 149862306a36Sopenharmony_ci } else 149962306a36Sopenharmony_ci DMERR("%s: policy_invalidate_mapping failed", cache_device_name(cache)); 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_ci return r; 150262306a36Sopenharmony_ci} 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_cistatic void invalidate_remove(struct work_struct *ws) 150562306a36Sopenharmony_ci{ 150662306a36Sopenharmony_ci int r; 150762306a36Sopenharmony_ci struct dm_cache_migration *mg = ws_to_mg(ws); 150862306a36Sopenharmony_ci struct cache *cache = mg->cache; 150962306a36Sopenharmony_ci 151062306a36Sopenharmony_ci r = invalidate_cblock(cache, mg->invalidate_cblock); 151162306a36Sopenharmony_ci if (r) { 151262306a36Sopenharmony_ci invalidate_complete(mg, false); 151362306a36Sopenharmony_ci return; 151462306a36Sopenharmony_ci } 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ci init_continuation(&mg->k, invalidate_completed); 151762306a36Sopenharmony_ci continue_after_commit(&cache->committer, &mg->k); 151862306a36Sopenharmony_ci remap_to_origin_clear_discard(cache, mg->overwrite_bio, mg->invalidate_oblock); 151962306a36Sopenharmony_ci mg->overwrite_bio = NULL; 152062306a36Sopenharmony_ci schedule_commit(&cache->committer); 152162306a36Sopenharmony_ci} 152262306a36Sopenharmony_ci 152362306a36Sopenharmony_cistatic int invalidate_lock(struct dm_cache_migration *mg) 152462306a36Sopenharmony_ci{ 152562306a36Sopenharmony_ci int r; 152662306a36Sopenharmony_ci struct dm_cell_key_v2 key; 152762306a36Sopenharmony_ci struct cache *cache = mg->cache; 152862306a36Sopenharmony_ci struct dm_bio_prison_cell_v2 *prealloc; 152962306a36Sopenharmony_ci 153062306a36Sopenharmony_ci prealloc = alloc_prison_cell(cache); 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key); 153362306a36Sopenharmony_ci r = dm_cell_lock_v2(cache->prison, &key, 153462306a36Sopenharmony_ci READ_WRITE_LOCK_LEVEL, prealloc, &mg->cell); 153562306a36Sopenharmony_ci if (r < 0) { 153662306a36Sopenharmony_ci free_prison_cell(cache, prealloc); 153762306a36Sopenharmony_ci invalidate_complete(mg, false); 153862306a36Sopenharmony_ci return r; 153962306a36Sopenharmony_ci } 154062306a36Sopenharmony_ci 154162306a36Sopenharmony_ci if (mg->cell != prealloc) 154262306a36Sopenharmony_ci free_prison_cell(cache, prealloc); 154362306a36Sopenharmony_ci 154462306a36Sopenharmony_ci if (r) 154562306a36Sopenharmony_ci quiesce(mg, invalidate_remove); 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_ci else { 154862306a36Sopenharmony_ci /* 154962306a36Sopenharmony_ci * We can't call invalidate_remove() directly here because we 155062306a36Sopenharmony_ci * might still be in request context. 155162306a36Sopenharmony_ci */ 155262306a36Sopenharmony_ci init_continuation(&mg->k, invalidate_remove); 155362306a36Sopenharmony_ci queue_work(cache->wq, &mg->k.ws); 155462306a36Sopenharmony_ci } 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_ci return 0; 155762306a36Sopenharmony_ci} 155862306a36Sopenharmony_ci 155962306a36Sopenharmony_cistatic int invalidate_start(struct cache *cache, dm_cblock_t cblock, 156062306a36Sopenharmony_ci dm_oblock_t oblock, struct bio *bio) 156162306a36Sopenharmony_ci{ 156262306a36Sopenharmony_ci struct dm_cache_migration *mg; 156362306a36Sopenharmony_ci 156462306a36Sopenharmony_ci if (!background_work_begin(cache)) 156562306a36Sopenharmony_ci return -EPERM; 156662306a36Sopenharmony_ci 156762306a36Sopenharmony_ci mg = alloc_migration(cache); 156862306a36Sopenharmony_ci 156962306a36Sopenharmony_ci mg->overwrite_bio = bio; 157062306a36Sopenharmony_ci mg->invalidate_cblock = cblock; 157162306a36Sopenharmony_ci mg->invalidate_oblock = oblock; 157262306a36Sopenharmony_ci 157362306a36Sopenharmony_ci return invalidate_lock(mg); 157462306a36Sopenharmony_ci} 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci/* 157762306a36Sopenharmony_ci *-------------------------------------------------------------- 157862306a36Sopenharmony_ci * bio processing 157962306a36Sopenharmony_ci *-------------------------------------------------------------- 158062306a36Sopenharmony_ci */ 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_cienum busy { 158362306a36Sopenharmony_ci IDLE, 158462306a36Sopenharmony_ci BUSY 158562306a36Sopenharmony_ci}; 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_cistatic enum busy spare_migration_bandwidth(struct cache *cache) 158862306a36Sopenharmony_ci{ 158962306a36Sopenharmony_ci bool idle = dm_iot_idle_for(&cache->tracker, HZ); 159062306a36Sopenharmony_ci sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) * 159162306a36Sopenharmony_ci cache->sectors_per_block; 159262306a36Sopenharmony_ci 159362306a36Sopenharmony_ci if (idle && current_volume <= cache->migration_threshold) 159462306a36Sopenharmony_ci return IDLE; 159562306a36Sopenharmony_ci else 159662306a36Sopenharmony_ci return BUSY; 159762306a36Sopenharmony_ci} 159862306a36Sopenharmony_ci 159962306a36Sopenharmony_cistatic void inc_hit_counter(struct cache *cache, struct bio *bio) 160062306a36Sopenharmony_ci{ 160162306a36Sopenharmony_ci atomic_inc(bio_data_dir(bio) == READ ? 160262306a36Sopenharmony_ci &cache->stats.read_hit : &cache->stats.write_hit); 160362306a36Sopenharmony_ci} 160462306a36Sopenharmony_ci 160562306a36Sopenharmony_cistatic void inc_miss_counter(struct cache *cache, struct bio *bio) 160662306a36Sopenharmony_ci{ 160762306a36Sopenharmony_ci atomic_inc(bio_data_dir(bio) == READ ? 160862306a36Sopenharmony_ci &cache->stats.read_miss : &cache->stats.write_miss); 160962306a36Sopenharmony_ci} 161062306a36Sopenharmony_ci 161162306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 161262306a36Sopenharmony_ci 161362306a36Sopenharmony_cistatic int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, 161462306a36Sopenharmony_ci bool *commit_needed) 161562306a36Sopenharmony_ci{ 161662306a36Sopenharmony_ci int r, data_dir; 161762306a36Sopenharmony_ci bool rb, background_queued; 161862306a36Sopenharmony_ci dm_cblock_t cblock; 161962306a36Sopenharmony_ci 162062306a36Sopenharmony_ci *commit_needed = false; 162162306a36Sopenharmony_ci 162262306a36Sopenharmony_ci rb = bio_detain_shared(cache, block, bio); 162362306a36Sopenharmony_ci if (!rb) { 162462306a36Sopenharmony_ci /* 162562306a36Sopenharmony_ci * An exclusive lock is held for this block, so we have to 162662306a36Sopenharmony_ci * wait. We set the commit_needed flag so the current 162762306a36Sopenharmony_ci * transaction will be committed asap, allowing this lock 162862306a36Sopenharmony_ci * to be dropped. 162962306a36Sopenharmony_ci */ 163062306a36Sopenharmony_ci *commit_needed = true; 163162306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 163262306a36Sopenharmony_ci } 163362306a36Sopenharmony_ci 163462306a36Sopenharmony_ci data_dir = bio_data_dir(bio); 163562306a36Sopenharmony_ci 163662306a36Sopenharmony_ci if (optimisable_bio(cache, bio, block)) { 163762306a36Sopenharmony_ci struct policy_work *op = NULL; 163862306a36Sopenharmony_ci 163962306a36Sopenharmony_ci r = policy_lookup_with_work(cache->policy, block, &cblock, data_dir, true, &op); 164062306a36Sopenharmony_ci if (unlikely(r && r != -ENOENT)) { 164162306a36Sopenharmony_ci DMERR_LIMIT("%s: policy_lookup_with_work() failed with r = %d", 164262306a36Sopenharmony_ci cache_device_name(cache), r); 164362306a36Sopenharmony_ci bio_io_error(bio); 164462306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 164562306a36Sopenharmony_ci } 164662306a36Sopenharmony_ci 164762306a36Sopenharmony_ci if (r == -ENOENT && op) { 164862306a36Sopenharmony_ci bio_drop_shared_lock(cache, bio); 164962306a36Sopenharmony_ci BUG_ON(op->op != POLICY_PROMOTE); 165062306a36Sopenharmony_ci mg_start(cache, op, bio); 165162306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 165262306a36Sopenharmony_ci } 165362306a36Sopenharmony_ci } else { 165462306a36Sopenharmony_ci r = policy_lookup(cache->policy, block, &cblock, data_dir, false, &background_queued); 165562306a36Sopenharmony_ci if (unlikely(r && r != -ENOENT)) { 165662306a36Sopenharmony_ci DMERR_LIMIT("%s: policy_lookup() failed with r = %d", 165762306a36Sopenharmony_ci cache_device_name(cache), r); 165862306a36Sopenharmony_ci bio_io_error(bio); 165962306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 166062306a36Sopenharmony_ci } 166162306a36Sopenharmony_ci 166262306a36Sopenharmony_ci if (background_queued) 166362306a36Sopenharmony_ci wake_migration_worker(cache); 166462306a36Sopenharmony_ci } 166562306a36Sopenharmony_ci 166662306a36Sopenharmony_ci if (r == -ENOENT) { 166762306a36Sopenharmony_ci struct per_bio_data *pb = get_per_bio_data(bio); 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_ci /* 167062306a36Sopenharmony_ci * Miss. 167162306a36Sopenharmony_ci */ 167262306a36Sopenharmony_ci inc_miss_counter(cache, bio); 167362306a36Sopenharmony_ci if (pb->req_nr == 0) { 167462306a36Sopenharmony_ci accounted_begin(cache, bio); 167562306a36Sopenharmony_ci remap_to_origin_clear_discard(cache, bio, block); 167662306a36Sopenharmony_ci } else { 167762306a36Sopenharmony_ci /* 167862306a36Sopenharmony_ci * This is a duplicate writethrough io that is no 167962306a36Sopenharmony_ci * longer needed because the block has been demoted. 168062306a36Sopenharmony_ci */ 168162306a36Sopenharmony_ci bio_endio(bio); 168262306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 168362306a36Sopenharmony_ci } 168462306a36Sopenharmony_ci } else { 168562306a36Sopenharmony_ci /* 168662306a36Sopenharmony_ci * Hit. 168762306a36Sopenharmony_ci */ 168862306a36Sopenharmony_ci inc_hit_counter(cache, bio); 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci /* 169162306a36Sopenharmony_ci * Passthrough always maps to the origin, invalidating any 169262306a36Sopenharmony_ci * cache blocks that are written to. 169362306a36Sopenharmony_ci */ 169462306a36Sopenharmony_ci if (passthrough_mode(cache)) { 169562306a36Sopenharmony_ci if (bio_data_dir(bio) == WRITE) { 169662306a36Sopenharmony_ci bio_drop_shared_lock(cache, bio); 169762306a36Sopenharmony_ci atomic_inc(&cache->stats.demotion); 169862306a36Sopenharmony_ci invalidate_start(cache, cblock, block, bio); 169962306a36Sopenharmony_ci } else 170062306a36Sopenharmony_ci remap_to_origin_clear_discard(cache, bio, block); 170162306a36Sopenharmony_ci } else { 170262306a36Sopenharmony_ci if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) && 170362306a36Sopenharmony_ci !is_dirty(cache, cblock)) { 170462306a36Sopenharmony_ci remap_to_origin_and_cache(cache, bio, block, cblock); 170562306a36Sopenharmony_ci accounted_begin(cache, bio); 170662306a36Sopenharmony_ci } else 170762306a36Sopenharmony_ci remap_to_cache_dirty(cache, bio, block, cblock); 170862306a36Sopenharmony_ci } 170962306a36Sopenharmony_ci } 171062306a36Sopenharmony_ci 171162306a36Sopenharmony_ci /* 171262306a36Sopenharmony_ci * dm core turns FUA requests into a separate payload and FLUSH req. 171362306a36Sopenharmony_ci */ 171462306a36Sopenharmony_ci if (bio->bi_opf & REQ_FUA) { 171562306a36Sopenharmony_ci /* 171662306a36Sopenharmony_ci * issue_after_commit will call accounted_begin a second time. So 171762306a36Sopenharmony_ci * we call accounted_complete() to avoid double accounting. 171862306a36Sopenharmony_ci */ 171962306a36Sopenharmony_ci accounted_complete(cache, bio); 172062306a36Sopenharmony_ci issue_after_commit(&cache->committer, bio); 172162306a36Sopenharmony_ci *commit_needed = true; 172262306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 172362306a36Sopenharmony_ci } 172462306a36Sopenharmony_ci 172562306a36Sopenharmony_ci return DM_MAPIO_REMAPPED; 172662306a36Sopenharmony_ci} 172762306a36Sopenharmony_ci 172862306a36Sopenharmony_cistatic bool process_bio(struct cache *cache, struct bio *bio) 172962306a36Sopenharmony_ci{ 173062306a36Sopenharmony_ci bool commit_needed; 173162306a36Sopenharmony_ci 173262306a36Sopenharmony_ci if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED) 173362306a36Sopenharmony_ci dm_submit_bio_remap(bio, NULL); 173462306a36Sopenharmony_ci 173562306a36Sopenharmony_ci return commit_needed; 173662306a36Sopenharmony_ci} 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci/* 173962306a36Sopenharmony_ci * A non-zero return indicates read_only or fail_io mode. 174062306a36Sopenharmony_ci */ 174162306a36Sopenharmony_cistatic int commit(struct cache *cache, bool clean_shutdown) 174262306a36Sopenharmony_ci{ 174362306a36Sopenharmony_ci int r; 174462306a36Sopenharmony_ci 174562306a36Sopenharmony_ci if (get_cache_mode(cache) >= CM_READ_ONLY) 174662306a36Sopenharmony_ci return -EINVAL; 174762306a36Sopenharmony_ci 174862306a36Sopenharmony_ci atomic_inc(&cache->stats.commit_count); 174962306a36Sopenharmony_ci r = dm_cache_commit(cache->cmd, clean_shutdown); 175062306a36Sopenharmony_ci if (r) 175162306a36Sopenharmony_ci metadata_operation_failed(cache, "dm_cache_commit", r); 175262306a36Sopenharmony_ci 175362306a36Sopenharmony_ci return r; 175462306a36Sopenharmony_ci} 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci/* 175762306a36Sopenharmony_ci * Used by the batcher. 175862306a36Sopenharmony_ci */ 175962306a36Sopenharmony_cistatic blk_status_t commit_op(void *context) 176062306a36Sopenharmony_ci{ 176162306a36Sopenharmony_ci struct cache *cache = context; 176262306a36Sopenharmony_ci 176362306a36Sopenharmony_ci if (dm_cache_changed_this_transaction(cache->cmd)) 176462306a36Sopenharmony_ci return errno_to_blk_status(commit(cache, false)); 176562306a36Sopenharmony_ci 176662306a36Sopenharmony_ci return 0; 176762306a36Sopenharmony_ci} 176862306a36Sopenharmony_ci 176962306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 177062306a36Sopenharmony_ci 177162306a36Sopenharmony_cistatic bool process_flush_bio(struct cache *cache, struct bio *bio) 177262306a36Sopenharmony_ci{ 177362306a36Sopenharmony_ci struct per_bio_data *pb = get_per_bio_data(bio); 177462306a36Sopenharmony_ci 177562306a36Sopenharmony_ci if (!pb->req_nr) 177662306a36Sopenharmony_ci remap_to_origin(cache, bio); 177762306a36Sopenharmony_ci else 177862306a36Sopenharmony_ci remap_to_cache(cache, bio, 0); 177962306a36Sopenharmony_ci 178062306a36Sopenharmony_ci issue_after_commit(&cache->committer, bio); 178162306a36Sopenharmony_ci return true; 178262306a36Sopenharmony_ci} 178362306a36Sopenharmony_ci 178462306a36Sopenharmony_cistatic bool process_discard_bio(struct cache *cache, struct bio *bio) 178562306a36Sopenharmony_ci{ 178662306a36Sopenharmony_ci dm_dblock_t b, e; 178762306a36Sopenharmony_ci 178862306a36Sopenharmony_ci /* 178962306a36Sopenharmony_ci * FIXME: do we need to lock the region? Or can we just assume the 179062306a36Sopenharmony_ci * user wont be so foolish as to issue discard concurrently with 179162306a36Sopenharmony_ci * other IO? 179262306a36Sopenharmony_ci */ 179362306a36Sopenharmony_ci calc_discard_block_range(cache, bio, &b, &e); 179462306a36Sopenharmony_ci while (b != e) { 179562306a36Sopenharmony_ci set_discard(cache, b); 179662306a36Sopenharmony_ci b = to_dblock(from_dblock(b) + 1); 179762306a36Sopenharmony_ci } 179862306a36Sopenharmony_ci 179962306a36Sopenharmony_ci if (cache->features.discard_passdown) { 180062306a36Sopenharmony_ci remap_to_origin(cache, bio); 180162306a36Sopenharmony_ci dm_submit_bio_remap(bio, NULL); 180262306a36Sopenharmony_ci } else 180362306a36Sopenharmony_ci bio_endio(bio); 180462306a36Sopenharmony_ci 180562306a36Sopenharmony_ci return false; 180662306a36Sopenharmony_ci} 180762306a36Sopenharmony_ci 180862306a36Sopenharmony_cistatic void process_deferred_bios(struct work_struct *ws) 180962306a36Sopenharmony_ci{ 181062306a36Sopenharmony_ci struct cache *cache = container_of(ws, struct cache, deferred_bio_worker); 181162306a36Sopenharmony_ci 181262306a36Sopenharmony_ci bool commit_needed = false; 181362306a36Sopenharmony_ci struct bio_list bios; 181462306a36Sopenharmony_ci struct bio *bio; 181562306a36Sopenharmony_ci 181662306a36Sopenharmony_ci bio_list_init(&bios); 181762306a36Sopenharmony_ci 181862306a36Sopenharmony_ci spin_lock_irq(&cache->lock); 181962306a36Sopenharmony_ci bio_list_merge(&bios, &cache->deferred_bios); 182062306a36Sopenharmony_ci bio_list_init(&cache->deferred_bios); 182162306a36Sopenharmony_ci spin_unlock_irq(&cache->lock); 182262306a36Sopenharmony_ci 182362306a36Sopenharmony_ci while ((bio = bio_list_pop(&bios))) { 182462306a36Sopenharmony_ci if (bio->bi_opf & REQ_PREFLUSH) 182562306a36Sopenharmony_ci commit_needed = process_flush_bio(cache, bio) || commit_needed; 182662306a36Sopenharmony_ci 182762306a36Sopenharmony_ci else if (bio_op(bio) == REQ_OP_DISCARD) 182862306a36Sopenharmony_ci commit_needed = process_discard_bio(cache, bio) || commit_needed; 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_ci else 183162306a36Sopenharmony_ci commit_needed = process_bio(cache, bio) || commit_needed; 183262306a36Sopenharmony_ci cond_resched(); 183362306a36Sopenharmony_ci } 183462306a36Sopenharmony_ci 183562306a36Sopenharmony_ci if (commit_needed) 183662306a36Sopenharmony_ci schedule_commit(&cache->committer); 183762306a36Sopenharmony_ci} 183862306a36Sopenharmony_ci 183962306a36Sopenharmony_ci/* 184062306a36Sopenharmony_ci *-------------------------------------------------------------- 184162306a36Sopenharmony_ci * Main worker loop 184262306a36Sopenharmony_ci *-------------------------------------------------------------- 184362306a36Sopenharmony_ci */ 184462306a36Sopenharmony_cistatic void requeue_deferred_bios(struct cache *cache) 184562306a36Sopenharmony_ci{ 184662306a36Sopenharmony_ci struct bio *bio; 184762306a36Sopenharmony_ci struct bio_list bios; 184862306a36Sopenharmony_ci 184962306a36Sopenharmony_ci bio_list_init(&bios); 185062306a36Sopenharmony_ci bio_list_merge(&bios, &cache->deferred_bios); 185162306a36Sopenharmony_ci bio_list_init(&cache->deferred_bios); 185262306a36Sopenharmony_ci 185362306a36Sopenharmony_ci while ((bio = bio_list_pop(&bios))) { 185462306a36Sopenharmony_ci bio->bi_status = BLK_STS_DM_REQUEUE; 185562306a36Sopenharmony_ci bio_endio(bio); 185662306a36Sopenharmony_ci cond_resched(); 185762306a36Sopenharmony_ci } 185862306a36Sopenharmony_ci} 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_ci/* 186162306a36Sopenharmony_ci * We want to commit periodically so that not too much 186262306a36Sopenharmony_ci * unwritten metadata builds up. 186362306a36Sopenharmony_ci */ 186462306a36Sopenharmony_cistatic void do_waker(struct work_struct *ws) 186562306a36Sopenharmony_ci{ 186662306a36Sopenharmony_ci struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); 186762306a36Sopenharmony_ci 186862306a36Sopenharmony_ci policy_tick(cache->policy, true); 186962306a36Sopenharmony_ci wake_migration_worker(cache); 187062306a36Sopenharmony_ci schedule_commit(&cache->committer); 187162306a36Sopenharmony_ci queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); 187262306a36Sopenharmony_ci} 187362306a36Sopenharmony_ci 187462306a36Sopenharmony_cistatic void check_migrations(struct work_struct *ws) 187562306a36Sopenharmony_ci{ 187662306a36Sopenharmony_ci int r; 187762306a36Sopenharmony_ci struct policy_work *op; 187862306a36Sopenharmony_ci struct cache *cache = container_of(ws, struct cache, migration_worker); 187962306a36Sopenharmony_ci enum busy b; 188062306a36Sopenharmony_ci 188162306a36Sopenharmony_ci for (;;) { 188262306a36Sopenharmony_ci b = spare_migration_bandwidth(cache); 188362306a36Sopenharmony_ci 188462306a36Sopenharmony_ci r = policy_get_background_work(cache->policy, b == IDLE, &op); 188562306a36Sopenharmony_ci if (r == -ENODATA) 188662306a36Sopenharmony_ci break; 188762306a36Sopenharmony_ci 188862306a36Sopenharmony_ci if (r) { 188962306a36Sopenharmony_ci DMERR_LIMIT("%s: policy_background_work failed", 189062306a36Sopenharmony_ci cache_device_name(cache)); 189162306a36Sopenharmony_ci break; 189262306a36Sopenharmony_ci } 189362306a36Sopenharmony_ci 189462306a36Sopenharmony_ci r = mg_start(cache, op, NULL); 189562306a36Sopenharmony_ci if (r) 189662306a36Sopenharmony_ci break; 189762306a36Sopenharmony_ci 189862306a36Sopenharmony_ci cond_resched(); 189962306a36Sopenharmony_ci } 190062306a36Sopenharmony_ci} 190162306a36Sopenharmony_ci 190262306a36Sopenharmony_ci/* 190362306a36Sopenharmony_ci *-------------------------------------------------------------- 190462306a36Sopenharmony_ci * Target methods 190562306a36Sopenharmony_ci *-------------------------------------------------------------- 190662306a36Sopenharmony_ci */ 190762306a36Sopenharmony_ci 190862306a36Sopenharmony_ci/* 190962306a36Sopenharmony_ci * This function gets called on the error paths of the constructor, so we 191062306a36Sopenharmony_ci * have to cope with a partially initialised struct. 191162306a36Sopenharmony_ci */ 191262306a36Sopenharmony_cistatic void destroy(struct cache *cache) 191362306a36Sopenharmony_ci{ 191462306a36Sopenharmony_ci unsigned int i; 191562306a36Sopenharmony_ci 191662306a36Sopenharmony_ci mempool_exit(&cache->migration_pool); 191762306a36Sopenharmony_ci 191862306a36Sopenharmony_ci if (cache->prison) 191962306a36Sopenharmony_ci dm_bio_prison_destroy_v2(cache->prison); 192062306a36Sopenharmony_ci 192162306a36Sopenharmony_ci cancel_delayed_work_sync(&cache->waker); 192262306a36Sopenharmony_ci if (cache->wq) 192362306a36Sopenharmony_ci destroy_workqueue(cache->wq); 192462306a36Sopenharmony_ci 192562306a36Sopenharmony_ci if (cache->dirty_bitset) 192662306a36Sopenharmony_ci free_bitset(cache->dirty_bitset); 192762306a36Sopenharmony_ci 192862306a36Sopenharmony_ci if (cache->discard_bitset) 192962306a36Sopenharmony_ci free_bitset(cache->discard_bitset); 193062306a36Sopenharmony_ci 193162306a36Sopenharmony_ci if (cache->copier) 193262306a36Sopenharmony_ci dm_kcopyd_client_destroy(cache->copier); 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_ci if (cache->cmd) 193562306a36Sopenharmony_ci dm_cache_metadata_close(cache->cmd); 193662306a36Sopenharmony_ci 193762306a36Sopenharmony_ci if (cache->metadata_dev) 193862306a36Sopenharmony_ci dm_put_device(cache->ti, cache->metadata_dev); 193962306a36Sopenharmony_ci 194062306a36Sopenharmony_ci if (cache->origin_dev) 194162306a36Sopenharmony_ci dm_put_device(cache->ti, cache->origin_dev); 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_ci if (cache->cache_dev) 194462306a36Sopenharmony_ci dm_put_device(cache->ti, cache->cache_dev); 194562306a36Sopenharmony_ci 194662306a36Sopenharmony_ci if (cache->policy) 194762306a36Sopenharmony_ci dm_cache_policy_destroy(cache->policy); 194862306a36Sopenharmony_ci 194962306a36Sopenharmony_ci for (i = 0; i < cache->nr_ctr_args ; i++) 195062306a36Sopenharmony_ci kfree(cache->ctr_args[i]); 195162306a36Sopenharmony_ci kfree(cache->ctr_args); 195262306a36Sopenharmony_ci 195362306a36Sopenharmony_ci bioset_exit(&cache->bs); 195462306a36Sopenharmony_ci 195562306a36Sopenharmony_ci kfree(cache); 195662306a36Sopenharmony_ci} 195762306a36Sopenharmony_ci 195862306a36Sopenharmony_cistatic void cache_dtr(struct dm_target *ti) 195962306a36Sopenharmony_ci{ 196062306a36Sopenharmony_ci struct cache *cache = ti->private; 196162306a36Sopenharmony_ci 196262306a36Sopenharmony_ci destroy(cache); 196362306a36Sopenharmony_ci} 196462306a36Sopenharmony_ci 196562306a36Sopenharmony_cistatic sector_t get_dev_size(struct dm_dev *dev) 196662306a36Sopenharmony_ci{ 196762306a36Sopenharmony_ci return bdev_nr_sectors(dev->bdev); 196862306a36Sopenharmony_ci} 196962306a36Sopenharmony_ci 197062306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 197162306a36Sopenharmony_ci 197262306a36Sopenharmony_ci/* 197362306a36Sopenharmony_ci * Construct a cache device mapping. 197462306a36Sopenharmony_ci * 197562306a36Sopenharmony_ci * cache <metadata dev> <cache dev> <origin dev> <block size> 197662306a36Sopenharmony_ci * <#feature args> [<feature arg>]* 197762306a36Sopenharmony_ci * <policy> <#policy args> [<policy arg>]* 197862306a36Sopenharmony_ci * 197962306a36Sopenharmony_ci * metadata dev : fast device holding the persistent metadata 198062306a36Sopenharmony_ci * cache dev : fast device holding cached data blocks 198162306a36Sopenharmony_ci * origin dev : slow device holding original data blocks 198262306a36Sopenharmony_ci * block size : cache unit size in sectors 198362306a36Sopenharmony_ci * 198462306a36Sopenharmony_ci * #feature args : number of feature arguments passed 198562306a36Sopenharmony_ci * feature args : writethrough. (The default is writeback.) 198662306a36Sopenharmony_ci * 198762306a36Sopenharmony_ci * policy : the replacement policy to use 198862306a36Sopenharmony_ci * #policy args : an even number of policy arguments corresponding 198962306a36Sopenharmony_ci * to key/value pairs passed to the policy 199062306a36Sopenharmony_ci * policy args : key/value pairs passed to the policy 199162306a36Sopenharmony_ci * E.g. 'sequential_threshold 1024' 199262306a36Sopenharmony_ci * See cache-policies.txt for details. 199362306a36Sopenharmony_ci * 199462306a36Sopenharmony_ci * Optional feature arguments are: 199562306a36Sopenharmony_ci * writethrough : write through caching that prohibits cache block 199662306a36Sopenharmony_ci * content from being different from origin block content. 199762306a36Sopenharmony_ci * Without this argument, the default behaviour is to write 199862306a36Sopenharmony_ci * back cache block contents later for performance reasons, 199962306a36Sopenharmony_ci * so they may differ from the corresponding origin blocks. 200062306a36Sopenharmony_ci */ 200162306a36Sopenharmony_cistruct cache_args { 200262306a36Sopenharmony_ci struct dm_target *ti; 200362306a36Sopenharmony_ci 200462306a36Sopenharmony_ci struct dm_dev *metadata_dev; 200562306a36Sopenharmony_ci 200662306a36Sopenharmony_ci struct dm_dev *cache_dev; 200762306a36Sopenharmony_ci sector_t cache_sectors; 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_ci struct dm_dev *origin_dev; 201062306a36Sopenharmony_ci sector_t origin_sectors; 201162306a36Sopenharmony_ci 201262306a36Sopenharmony_ci uint32_t block_size; 201362306a36Sopenharmony_ci 201462306a36Sopenharmony_ci const char *policy_name; 201562306a36Sopenharmony_ci int policy_argc; 201662306a36Sopenharmony_ci const char **policy_argv; 201762306a36Sopenharmony_ci 201862306a36Sopenharmony_ci struct cache_features features; 201962306a36Sopenharmony_ci}; 202062306a36Sopenharmony_ci 202162306a36Sopenharmony_cistatic void destroy_cache_args(struct cache_args *ca) 202262306a36Sopenharmony_ci{ 202362306a36Sopenharmony_ci if (ca->metadata_dev) 202462306a36Sopenharmony_ci dm_put_device(ca->ti, ca->metadata_dev); 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci if (ca->cache_dev) 202762306a36Sopenharmony_ci dm_put_device(ca->ti, ca->cache_dev); 202862306a36Sopenharmony_ci 202962306a36Sopenharmony_ci if (ca->origin_dev) 203062306a36Sopenharmony_ci dm_put_device(ca->ti, ca->origin_dev); 203162306a36Sopenharmony_ci 203262306a36Sopenharmony_ci kfree(ca); 203362306a36Sopenharmony_ci} 203462306a36Sopenharmony_ci 203562306a36Sopenharmony_cistatic bool at_least_one_arg(struct dm_arg_set *as, char **error) 203662306a36Sopenharmony_ci{ 203762306a36Sopenharmony_ci if (!as->argc) { 203862306a36Sopenharmony_ci *error = "Insufficient args"; 203962306a36Sopenharmony_ci return false; 204062306a36Sopenharmony_ci } 204162306a36Sopenharmony_ci 204262306a36Sopenharmony_ci return true; 204362306a36Sopenharmony_ci} 204462306a36Sopenharmony_ci 204562306a36Sopenharmony_cistatic int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, 204662306a36Sopenharmony_ci char **error) 204762306a36Sopenharmony_ci{ 204862306a36Sopenharmony_ci int r; 204962306a36Sopenharmony_ci sector_t metadata_dev_size; 205062306a36Sopenharmony_ci 205162306a36Sopenharmony_ci if (!at_least_one_arg(as, error)) 205262306a36Sopenharmony_ci return -EINVAL; 205362306a36Sopenharmony_ci 205462306a36Sopenharmony_ci r = dm_get_device(ca->ti, dm_shift_arg(as), 205562306a36Sopenharmony_ci BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->metadata_dev); 205662306a36Sopenharmony_ci if (r) { 205762306a36Sopenharmony_ci *error = "Error opening metadata device"; 205862306a36Sopenharmony_ci return r; 205962306a36Sopenharmony_ci } 206062306a36Sopenharmony_ci 206162306a36Sopenharmony_ci metadata_dev_size = get_dev_size(ca->metadata_dev); 206262306a36Sopenharmony_ci if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING) 206362306a36Sopenharmony_ci DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.", 206462306a36Sopenharmony_ci ca->metadata_dev->bdev, THIN_METADATA_MAX_SECTORS); 206562306a36Sopenharmony_ci 206662306a36Sopenharmony_ci return 0; 206762306a36Sopenharmony_ci} 206862306a36Sopenharmony_ci 206962306a36Sopenharmony_cistatic int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, 207062306a36Sopenharmony_ci char **error) 207162306a36Sopenharmony_ci{ 207262306a36Sopenharmony_ci int r; 207362306a36Sopenharmony_ci 207462306a36Sopenharmony_ci if (!at_least_one_arg(as, error)) 207562306a36Sopenharmony_ci return -EINVAL; 207662306a36Sopenharmony_ci 207762306a36Sopenharmony_ci r = dm_get_device(ca->ti, dm_shift_arg(as), 207862306a36Sopenharmony_ci BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->cache_dev); 207962306a36Sopenharmony_ci if (r) { 208062306a36Sopenharmony_ci *error = "Error opening cache device"; 208162306a36Sopenharmony_ci return r; 208262306a36Sopenharmony_ci } 208362306a36Sopenharmony_ci ca->cache_sectors = get_dev_size(ca->cache_dev); 208462306a36Sopenharmony_ci 208562306a36Sopenharmony_ci return 0; 208662306a36Sopenharmony_ci} 208762306a36Sopenharmony_ci 208862306a36Sopenharmony_cistatic int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, 208962306a36Sopenharmony_ci char **error) 209062306a36Sopenharmony_ci{ 209162306a36Sopenharmony_ci int r; 209262306a36Sopenharmony_ci 209362306a36Sopenharmony_ci if (!at_least_one_arg(as, error)) 209462306a36Sopenharmony_ci return -EINVAL; 209562306a36Sopenharmony_ci 209662306a36Sopenharmony_ci r = dm_get_device(ca->ti, dm_shift_arg(as), 209762306a36Sopenharmony_ci BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->origin_dev); 209862306a36Sopenharmony_ci if (r) { 209962306a36Sopenharmony_ci *error = "Error opening origin device"; 210062306a36Sopenharmony_ci return r; 210162306a36Sopenharmony_ci } 210262306a36Sopenharmony_ci 210362306a36Sopenharmony_ci ca->origin_sectors = get_dev_size(ca->origin_dev); 210462306a36Sopenharmony_ci if (ca->ti->len > ca->origin_sectors) { 210562306a36Sopenharmony_ci *error = "Device size larger than cached device"; 210662306a36Sopenharmony_ci return -EINVAL; 210762306a36Sopenharmony_ci } 210862306a36Sopenharmony_ci 210962306a36Sopenharmony_ci return 0; 211062306a36Sopenharmony_ci} 211162306a36Sopenharmony_ci 211262306a36Sopenharmony_cistatic int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, 211362306a36Sopenharmony_ci char **error) 211462306a36Sopenharmony_ci{ 211562306a36Sopenharmony_ci unsigned long block_size; 211662306a36Sopenharmony_ci 211762306a36Sopenharmony_ci if (!at_least_one_arg(as, error)) 211862306a36Sopenharmony_ci return -EINVAL; 211962306a36Sopenharmony_ci 212062306a36Sopenharmony_ci if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size || 212162306a36Sopenharmony_ci block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || 212262306a36Sopenharmony_ci block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || 212362306a36Sopenharmony_ci block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { 212462306a36Sopenharmony_ci *error = "Invalid data block size"; 212562306a36Sopenharmony_ci return -EINVAL; 212662306a36Sopenharmony_ci } 212762306a36Sopenharmony_ci 212862306a36Sopenharmony_ci if (block_size > ca->cache_sectors) { 212962306a36Sopenharmony_ci *error = "Data block size is larger than the cache device"; 213062306a36Sopenharmony_ci return -EINVAL; 213162306a36Sopenharmony_ci } 213262306a36Sopenharmony_ci 213362306a36Sopenharmony_ci ca->block_size = block_size; 213462306a36Sopenharmony_ci 213562306a36Sopenharmony_ci return 0; 213662306a36Sopenharmony_ci} 213762306a36Sopenharmony_ci 213862306a36Sopenharmony_cistatic void init_features(struct cache_features *cf) 213962306a36Sopenharmony_ci{ 214062306a36Sopenharmony_ci cf->mode = CM_WRITE; 214162306a36Sopenharmony_ci cf->io_mode = CM_IO_WRITEBACK; 214262306a36Sopenharmony_ci cf->metadata_version = 1; 214362306a36Sopenharmony_ci cf->discard_passdown = true; 214462306a36Sopenharmony_ci} 214562306a36Sopenharmony_ci 214662306a36Sopenharmony_cistatic int parse_features(struct cache_args *ca, struct dm_arg_set *as, 214762306a36Sopenharmony_ci char **error) 214862306a36Sopenharmony_ci{ 214962306a36Sopenharmony_ci static const struct dm_arg _args[] = { 215062306a36Sopenharmony_ci {0, 3, "Invalid number of cache feature arguments"}, 215162306a36Sopenharmony_ci }; 215262306a36Sopenharmony_ci 215362306a36Sopenharmony_ci int r, mode_ctr = 0; 215462306a36Sopenharmony_ci unsigned int argc; 215562306a36Sopenharmony_ci const char *arg; 215662306a36Sopenharmony_ci struct cache_features *cf = &ca->features; 215762306a36Sopenharmony_ci 215862306a36Sopenharmony_ci init_features(cf); 215962306a36Sopenharmony_ci 216062306a36Sopenharmony_ci r = dm_read_arg_group(_args, as, &argc, error); 216162306a36Sopenharmony_ci if (r) 216262306a36Sopenharmony_ci return -EINVAL; 216362306a36Sopenharmony_ci 216462306a36Sopenharmony_ci while (argc--) { 216562306a36Sopenharmony_ci arg = dm_shift_arg(as); 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_ci if (!strcasecmp(arg, "writeback")) { 216862306a36Sopenharmony_ci cf->io_mode = CM_IO_WRITEBACK; 216962306a36Sopenharmony_ci mode_ctr++; 217062306a36Sopenharmony_ci } 217162306a36Sopenharmony_ci 217262306a36Sopenharmony_ci else if (!strcasecmp(arg, "writethrough")) { 217362306a36Sopenharmony_ci cf->io_mode = CM_IO_WRITETHROUGH; 217462306a36Sopenharmony_ci mode_ctr++; 217562306a36Sopenharmony_ci } 217662306a36Sopenharmony_ci 217762306a36Sopenharmony_ci else if (!strcasecmp(arg, "passthrough")) { 217862306a36Sopenharmony_ci cf->io_mode = CM_IO_PASSTHROUGH; 217962306a36Sopenharmony_ci mode_ctr++; 218062306a36Sopenharmony_ci } 218162306a36Sopenharmony_ci 218262306a36Sopenharmony_ci else if (!strcasecmp(arg, "metadata2")) 218362306a36Sopenharmony_ci cf->metadata_version = 2; 218462306a36Sopenharmony_ci 218562306a36Sopenharmony_ci else if (!strcasecmp(arg, "no_discard_passdown")) 218662306a36Sopenharmony_ci cf->discard_passdown = false; 218762306a36Sopenharmony_ci 218862306a36Sopenharmony_ci else { 218962306a36Sopenharmony_ci *error = "Unrecognised cache feature requested"; 219062306a36Sopenharmony_ci return -EINVAL; 219162306a36Sopenharmony_ci } 219262306a36Sopenharmony_ci } 219362306a36Sopenharmony_ci 219462306a36Sopenharmony_ci if (mode_ctr > 1) { 219562306a36Sopenharmony_ci *error = "Duplicate cache io_mode features requested"; 219662306a36Sopenharmony_ci return -EINVAL; 219762306a36Sopenharmony_ci } 219862306a36Sopenharmony_ci 219962306a36Sopenharmony_ci return 0; 220062306a36Sopenharmony_ci} 220162306a36Sopenharmony_ci 220262306a36Sopenharmony_cistatic int parse_policy(struct cache_args *ca, struct dm_arg_set *as, 220362306a36Sopenharmony_ci char **error) 220462306a36Sopenharmony_ci{ 220562306a36Sopenharmony_ci static const struct dm_arg _args[] = { 220662306a36Sopenharmony_ci {0, 1024, "Invalid number of policy arguments"}, 220762306a36Sopenharmony_ci }; 220862306a36Sopenharmony_ci 220962306a36Sopenharmony_ci int r; 221062306a36Sopenharmony_ci 221162306a36Sopenharmony_ci if (!at_least_one_arg(as, error)) 221262306a36Sopenharmony_ci return -EINVAL; 221362306a36Sopenharmony_ci 221462306a36Sopenharmony_ci ca->policy_name = dm_shift_arg(as); 221562306a36Sopenharmony_ci 221662306a36Sopenharmony_ci r = dm_read_arg_group(_args, as, &ca->policy_argc, error); 221762306a36Sopenharmony_ci if (r) 221862306a36Sopenharmony_ci return -EINVAL; 221962306a36Sopenharmony_ci 222062306a36Sopenharmony_ci ca->policy_argv = (const char **)as->argv; 222162306a36Sopenharmony_ci dm_consume_args(as, ca->policy_argc); 222262306a36Sopenharmony_ci 222362306a36Sopenharmony_ci return 0; 222462306a36Sopenharmony_ci} 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_cistatic int parse_cache_args(struct cache_args *ca, int argc, char **argv, 222762306a36Sopenharmony_ci char **error) 222862306a36Sopenharmony_ci{ 222962306a36Sopenharmony_ci int r; 223062306a36Sopenharmony_ci struct dm_arg_set as; 223162306a36Sopenharmony_ci 223262306a36Sopenharmony_ci as.argc = argc; 223362306a36Sopenharmony_ci as.argv = argv; 223462306a36Sopenharmony_ci 223562306a36Sopenharmony_ci r = parse_metadata_dev(ca, &as, error); 223662306a36Sopenharmony_ci if (r) 223762306a36Sopenharmony_ci return r; 223862306a36Sopenharmony_ci 223962306a36Sopenharmony_ci r = parse_cache_dev(ca, &as, error); 224062306a36Sopenharmony_ci if (r) 224162306a36Sopenharmony_ci return r; 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci r = parse_origin_dev(ca, &as, error); 224462306a36Sopenharmony_ci if (r) 224562306a36Sopenharmony_ci return r; 224662306a36Sopenharmony_ci 224762306a36Sopenharmony_ci r = parse_block_size(ca, &as, error); 224862306a36Sopenharmony_ci if (r) 224962306a36Sopenharmony_ci return r; 225062306a36Sopenharmony_ci 225162306a36Sopenharmony_ci r = parse_features(ca, &as, error); 225262306a36Sopenharmony_ci if (r) 225362306a36Sopenharmony_ci return r; 225462306a36Sopenharmony_ci 225562306a36Sopenharmony_ci r = parse_policy(ca, &as, error); 225662306a36Sopenharmony_ci if (r) 225762306a36Sopenharmony_ci return r; 225862306a36Sopenharmony_ci 225962306a36Sopenharmony_ci return 0; 226062306a36Sopenharmony_ci} 226162306a36Sopenharmony_ci 226262306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 226362306a36Sopenharmony_ci 226462306a36Sopenharmony_cistatic struct kmem_cache *migration_cache; 226562306a36Sopenharmony_ci 226662306a36Sopenharmony_ci#define NOT_CORE_OPTION 1 226762306a36Sopenharmony_ci 226862306a36Sopenharmony_cistatic int process_config_option(struct cache *cache, const char *key, const char *value) 226962306a36Sopenharmony_ci{ 227062306a36Sopenharmony_ci unsigned long tmp; 227162306a36Sopenharmony_ci 227262306a36Sopenharmony_ci if (!strcasecmp(key, "migration_threshold")) { 227362306a36Sopenharmony_ci if (kstrtoul(value, 10, &tmp)) 227462306a36Sopenharmony_ci return -EINVAL; 227562306a36Sopenharmony_ci 227662306a36Sopenharmony_ci cache->migration_threshold = tmp; 227762306a36Sopenharmony_ci return 0; 227862306a36Sopenharmony_ci } 227962306a36Sopenharmony_ci 228062306a36Sopenharmony_ci return NOT_CORE_OPTION; 228162306a36Sopenharmony_ci} 228262306a36Sopenharmony_ci 228362306a36Sopenharmony_cistatic int set_config_value(struct cache *cache, const char *key, const char *value) 228462306a36Sopenharmony_ci{ 228562306a36Sopenharmony_ci int r = process_config_option(cache, key, value); 228662306a36Sopenharmony_ci 228762306a36Sopenharmony_ci if (r == NOT_CORE_OPTION) 228862306a36Sopenharmony_ci r = policy_set_config_value(cache->policy, key, value); 228962306a36Sopenharmony_ci 229062306a36Sopenharmony_ci if (r) 229162306a36Sopenharmony_ci DMWARN("bad config value for %s: %s", key, value); 229262306a36Sopenharmony_ci 229362306a36Sopenharmony_ci return r; 229462306a36Sopenharmony_ci} 229562306a36Sopenharmony_ci 229662306a36Sopenharmony_cistatic int set_config_values(struct cache *cache, int argc, const char **argv) 229762306a36Sopenharmony_ci{ 229862306a36Sopenharmony_ci int r = 0; 229962306a36Sopenharmony_ci 230062306a36Sopenharmony_ci if (argc & 1) { 230162306a36Sopenharmony_ci DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs."); 230262306a36Sopenharmony_ci return -EINVAL; 230362306a36Sopenharmony_ci } 230462306a36Sopenharmony_ci 230562306a36Sopenharmony_ci while (argc) { 230662306a36Sopenharmony_ci r = set_config_value(cache, argv[0], argv[1]); 230762306a36Sopenharmony_ci if (r) 230862306a36Sopenharmony_ci break; 230962306a36Sopenharmony_ci 231062306a36Sopenharmony_ci argc -= 2; 231162306a36Sopenharmony_ci argv += 2; 231262306a36Sopenharmony_ci } 231362306a36Sopenharmony_ci 231462306a36Sopenharmony_ci return r; 231562306a36Sopenharmony_ci} 231662306a36Sopenharmony_ci 231762306a36Sopenharmony_cistatic int create_cache_policy(struct cache *cache, struct cache_args *ca, 231862306a36Sopenharmony_ci char **error) 231962306a36Sopenharmony_ci{ 232062306a36Sopenharmony_ci struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name, 232162306a36Sopenharmony_ci cache->cache_size, 232262306a36Sopenharmony_ci cache->origin_sectors, 232362306a36Sopenharmony_ci cache->sectors_per_block); 232462306a36Sopenharmony_ci if (IS_ERR(p)) { 232562306a36Sopenharmony_ci *error = "Error creating cache's policy"; 232662306a36Sopenharmony_ci return PTR_ERR(p); 232762306a36Sopenharmony_ci } 232862306a36Sopenharmony_ci cache->policy = p; 232962306a36Sopenharmony_ci BUG_ON(!cache->policy); 233062306a36Sopenharmony_ci 233162306a36Sopenharmony_ci return 0; 233262306a36Sopenharmony_ci} 233362306a36Sopenharmony_ci 233462306a36Sopenharmony_ci/* 233562306a36Sopenharmony_ci * We want the discard block size to be at least the size of the cache 233662306a36Sopenharmony_ci * block size and have no more than 2^14 discard blocks across the origin. 233762306a36Sopenharmony_ci */ 233862306a36Sopenharmony_ci#define MAX_DISCARD_BLOCKS (1 << 14) 233962306a36Sopenharmony_ci 234062306a36Sopenharmony_cistatic bool too_many_discard_blocks(sector_t discard_block_size, 234162306a36Sopenharmony_ci sector_t origin_size) 234262306a36Sopenharmony_ci{ 234362306a36Sopenharmony_ci (void) sector_div(origin_size, discard_block_size); 234462306a36Sopenharmony_ci 234562306a36Sopenharmony_ci return origin_size > MAX_DISCARD_BLOCKS; 234662306a36Sopenharmony_ci} 234762306a36Sopenharmony_ci 234862306a36Sopenharmony_cistatic sector_t calculate_discard_block_size(sector_t cache_block_size, 234962306a36Sopenharmony_ci sector_t origin_size) 235062306a36Sopenharmony_ci{ 235162306a36Sopenharmony_ci sector_t discard_block_size = cache_block_size; 235262306a36Sopenharmony_ci 235362306a36Sopenharmony_ci if (origin_size) 235462306a36Sopenharmony_ci while (too_many_discard_blocks(discard_block_size, origin_size)) 235562306a36Sopenharmony_ci discard_block_size *= 2; 235662306a36Sopenharmony_ci 235762306a36Sopenharmony_ci return discard_block_size; 235862306a36Sopenharmony_ci} 235962306a36Sopenharmony_ci 236062306a36Sopenharmony_cistatic void set_cache_size(struct cache *cache, dm_cblock_t size) 236162306a36Sopenharmony_ci{ 236262306a36Sopenharmony_ci dm_block_t nr_blocks = from_cblock(size); 236362306a36Sopenharmony_ci 236462306a36Sopenharmony_ci if (nr_blocks > (1 << 20) && cache->cache_size != size) 236562306a36Sopenharmony_ci DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n" 236662306a36Sopenharmony_ci "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n" 236762306a36Sopenharmony_ci "Please consider increasing the cache block size to reduce the overall cache block count.", 236862306a36Sopenharmony_ci (unsigned long long) nr_blocks); 236962306a36Sopenharmony_ci 237062306a36Sopenharmony_ci cache->cache_size = size; 237162306a36Sopenharmony_ci} 237262306a36Sopenharmony_ci 237362306a36Sopenharmony_ci#define DEFAULT_MIGRATION_THRESHOLD 2048 237462306a36Sopenharmony_ci 237562306a36Sopenharmony_cistatic int cache_create(struct cache_args *ca, struct cache **result) 237662306a36Sopenharmony_ci{ 237762306a36Sopenharmony_ci int r = 0; 237862306a36Sopenharmony_ci char **error = &ca->ti->error; 237962306a36Sopenharmony_ci struct cache *cache; 238062306a36Sopenharmony_ci struct dm_target *ti = ca->ti; 238162306a36Sopenharmony_ci dm_block_t origin_blocks; 238262306a36Sopenharmony_ci struct dm_cache_metadata *cmd; 238362306a36Sopenharmony_ci bool may_format = ca->features.mode == CM_WRITE; 238462306a36Sopenharmony_ci 238562306a36Sopenharmony_ci cache = kzalloc(sizeof(*cache), GFP_KERNEL); 238662306a36Sopenharmony_ci if (!cache) 238762306a36Sopenharmony_ci return -ENOMEM; 238862306a36Sopenharmony_ci 238962306a36Sopenharmony_ci cache->ti = ca->ti; 239062306a36Sopenharmony_ci ti->private = cache; 239162306a36Sopenharmony_ci ti->accounts_remapped_io = true; 239262306a36Sopenharmony_ci ti->num_flush_bios = 2; 239362306a36Sopenharmony_ci ti->flush_supported = true; 239462306a36Sopenharmony_ci 239562306a36Sopenharmony_ci ti->num_discard_bios = 1; 239662306a36Sopenharmony_ci ti->discards_supported = true; 239762306a36Sopenharmony_ci 239862306a36Sopenharmony_ci ti->per_io_data_size = sizeof(struct per_bio_data); 239962306a36Sopenharmony_ci 240062306a36Sopenharmony_ci cache->features = ca->features; 240162306a36Sopenharmony_ci if (writethrough_mode(cache)) { 240262306a36Sopenharmony_ci /* Create bioset for writethrough bios issued to origin */ 240362306a36Sopenharmony_ci r = bioset_init(&cache->bs, BIO_POOL_SIZE, 0, 0); 240462306a36Sopenharmony_ci if (r) 240562306a36Sopenharmony_ci goto bad; 240662306a36Sopenharmony_ci } 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci cache->metadata_dev = ca->metadata_dev; 240962306a36Sopenharmony_ci cache->origin_dev = ca->origin_dev; 241062306a36Sopenharmony_ci cache->cache_dev = ca->cache_dev; 241162306a36Sopenharmony_ci 241262306a36Sopenharmony_ci ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; 241362306a36Sopenharmony_ci 241462306a36Sopenharmony_ci origin_blocks = cache->origin_sectors = ca->origin_sectors; 241562306a36Sopenharmony_ci origin_blocks = block_div(origin_blocks, ca->block_size); 241662306a36Sopenharmony_ci cache->origin_blocks = to_oblock(origin_blocks); 241762306a36Sopenharmony_ci 241862306a36Sopenharmony_ci cache->sectors_per_block = ca->block_size; 241962306a36Sopenharmony_ci if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) { 242062306a36Sopenharmony_ci r = -EINVAL; 242162306a36Sopenharmony_ci goto bad; 242262306a36Sopenharmony_ci } 242362306a36Sopenharmony_ci 242462306a36Sopenharmony_ci if (ca->block_size & (ca->block_size - 1)) { 242562306a36Sopenharmony_ci dm_block_t cache_size = ca->cache_sectors; 242662306a36Sopenharmony_ci 242762306a36Sopenharmony_ci cache->sectors_per_block_shift = -1; 242862306a36Sopenharmony_ci cache_size = block_div(cache_size, ca->block_size); 242962306a36Sopenharmony_ci set_cache_size(cache, to_cblock(cache_size)); 243062306a36Sopenharmony_ci } else { 243162306a36Sopenharmony_ci cache->sectors_per_block_shift = __ffs(ca->block_size); 243262306a36Sopenharmony_ci set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift)); 243362306a36Sopenharmony_ci } 243462306a36Sopenharmony_ci 243562306a36Sopenharmony_ci r = create_cache_policy(cache, ca, error); 243662306a36Sopenharmony_ci if (r) 243762306a36Sopenharmony_ci goto bad; 243862306a36Sopenharmony_ci 243962306a36Sopenharmony_ci cache->policy_nr_args = ca->policy_argc; 244062306a36Sopenharmony_ci cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; 244162306a36Sopenharmony_ci 244262306a36Sopenharmony_ci r = set_config_values(cache, ca->policy_argc, ca->policy_argv); 244362306a36Sopenharmony_ci if (r) { 244462306a36Sopenharmony_ci *error = "Error setting cache policy's config values"; 244562306a36Sopenharmony_ci goto bad; 244662306a36Sopenharmony_ci } 244762306a36Sopenharmony_ci 244862306a36Sopenharmony_ci cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, 244962306a36Sopenharmony_ci ca->block_size, may_format, 245062306a36Sopenharmony_ci dm_cache_policy_get_hint_size(cache->policy), 245162306a36Sopenharmony_ci ca->features.metadata_version); 245262306a36Sopenharmony_ci if (IS_ERR(cmd)) { 245362306a36Sopenharmony_ci *error = "Error creating metadata object"; 245462306a36Sopenharmony_ci r = PTR_ERR(cmd); 245562306a36Sopenharmony_ci goto bad; 245662306a36Sopenharmony_ci } 245762306a36Sopenharmony_ci cache->cmd = cmd; 245862306a36Sopenharmony_ci set_cache_mode(cache, CM_WRITE); 245962306a36Sopenharmony_ci if (get_cache_mode(cache) != CM_WRITE) { 246062306a36Sopenharmony_ci *error = "Unable to get write access to metadata, please check/repair metadata."; 246162306a36Sopenharmony_ci r = -EINVAL; 246262306a36Sopenharmony_ci goto bad; 246362306a36Sopenharmony_ci } 246462306a36Sopenharmony_ci 246562306a36Sopenharmony_ci if (passthrough_mode(cache)) { 246662306a36Sopenharmony_ci bool all_clean; 246762306a36Sopenharmony_ci 246862306a36Sopenharmony_ci r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); 246962306a36Sopenharmony_ci if (r) { 247062306a36Sopenharmony_ci *error = "dm_cache_metadata_all_clean() failed"; 247162306a36Sopenharmony_ci goto bad; 247262306a36Sopenharmony_ci } 247362306a36Sopenharmony_ci 247462306a36Sopenharmony_ci if (!all_clean) { 247562306a36Sopenharmony_ci *error = "Cannot enter passthrough mode unless all blocks are clean"; 247662306a36Sopenharmony_ci r = -EINVAL; 247762306a36Sopenharmony_ci goto bad; 247862306a36Sopenharmony_ci } 247962306a36Sopenharmony_ci 248062306a36Sopenharmony_ci policy_allow_migrations(cache->policy, false); 248162306a36Sopenharmony_ci } 248262306a36Sopenharmony_ci 248362306a36Sopenharmony_ci spin_lock_init(&cache->lock); 248462306a36Sopenharmony_ci bio_list_init(&cache->deferred_bios); 248562306a36Sopenharmony_ci atomic_set(&cache->nr_allocated_migrations, 0); 248662306a36Sopenharmony_ci atomic_set(&cache->nr_io_migrations, 0); 248762306a36Sopenharmony_ci init_waitqueue_head(&cache->migration_wait); 248862306a36Sopenharmony_ci 248962306a36Sopenharmony_ci r = -ENOMEM; 249062306a36Sopenharmony_ci atomic_set(&cache->nr_dirty, 0); 249162306a36Sopenharmony_ci cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); 249262306a36Sopenharmony_ci if (!cache->dirty_bitset) { 249362306a36Sopenharmony_ci *error = "could not allocate dirty bitset"; 249462306a36Sopenharmony_ci goto bad; 249562306a36Sopenharmony_ci } 249662306a36Sopenharmony_ci clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); 249762306a36Sopenharmony_ci 249862306a36Sopenharmony_ci cache->discard_block_size = 249962306a36Sopenharmony_ci calculate_discard_block_size(cache->sectors_per_block, 250062306a36Sopenharmony_ci cache->origin_sectors); 250162306a36Sopenharmony_ci cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors, 250262306a36Sopenharmony_ci cache->discard_block_size)); 250362306a36Sopenharmony_ci cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); 250462306a36Sopenharmony_ci if (!cache->discard_bitset) { 250562306a36Sopenharmony_ci *error = "could not allocate discard bitset"; 250662306a36Sopenharmony_ci goto bad; 250762306a36Sopenharmony_ci } 250862306a36Sopenharmony_ci clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); 250962306a36Sopenharmony_ci 251062306a36Sopenharmony_ci cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); 251162306a36Sopenharmony_ci if (IS_ERR(cache->copier)) { 251262306a36Sopenharmony_ci *error = "could not create kcopyd client"; 251362306a36Sopenharmony_ci r = PTR_ERR(cache->copier); 251462306a36Sopenharmony_ci goto bad; 251562306a36Sopenharmony_ci } 251662306a36Sopenharmony_ci 251762306a36Sopenharmony_ci cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0); 251862306a36Sopenharmony_ci if (!cache->wq) { 251962306a36Sopenharmony_ci *error = "could not create workqueue for metadata object"; 252062306a36Sopenharmony_ci goto bad; 252162306a36Sopenharmony_ci } 252262306a36Sopenharmony_ci INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios); 252362306a36Sopenharmony_ci INIT_WORK(&cache->migration_worker, check_migrations); 252462306a36Sopenharmony_ci INIT_DELAYED_WORK(&cache->waker, do_waker); 252562306a36Sopenharmony_ci 252662306a36Sopenharmony_ci cache->prison = dm_bio_prison_create_v2(cache->wq); 252762306a36Sopenharmony_ci if (!cache->prison) { 252862306a36Sopenharmony_ci *error = "could not create bio prison"; 252962306a36Sopenharmony_ci goto bad; 253062306a36Sopenharmony_ci } 253162306a36Sopenharmony_ci 253262306a36Sopenharmony_ci r = mempool_init_slab_pool(&cache->migration_pool, MIGRATION_POOL_SIZE, 253362306a36Sopenharmony_ci migration_cache); 253462306a36Sopenharmony_ci if (r) { 253562306a36Sopenharmony_ci *error = "Error creating cache's migration mempool"; 253662306a36Sopenharmony_ci goto bad; 253762306a36Sopenharmony_ci } 253862306a36Sopenharmony_ci 253962306a36Sopenharmony_ci cache->need_tick_bio = true; 254062306a36Sopenharmony_ci cache->sized = false; 254162306a36Sopenharmony_ci cache->invalidate = false; 254262306a36Sopenharmony_ci cache->commit_requested = false; 254362306a36Sopenharmony_ci cache->loaded_mappings = false; 254462306a36Sopenharmony_ci cache->loaded_discards = false; 254562306a36Sopenharmony_ci 254662306a36Sopenharmony_ci load_stats(cache); 254762306a36Sopenharmony_ci 254862306a36Sopenharmony_ci atomic_set(&cache->stats.demotion, 0); 254962306a36Sopenharmony_ci atomic_set(&cache->stats.promotion, 0); 255062306a36Sopenharmony_ci atomic_set(&cache->stats.copies_avoided, 0); 255162306a36Sopenharmony_ci atomic_set(&cache->stats.cache_cell_clash, 0); 255262306a36Sopenharmony_ci atomic_set(&cache->stats.commit_count, 0); 255362306a36Sopenharmony_ci atomic_set(&cache->stats.discard_count, 0); 255462306a36Sopenharmony_ci 255562306a36Sopenharmony_ci spin_lock_init(&cache->invalidation_lock); 255662306a36Sopenharmony_ci INIT_LIST_HEAD(&cache->invalidation_requests); 255762306a36Sopenharmony_ci 255862306a36Sopenharmony_ci batcher_init(&cache->committer, commit_op, cache, 255962306a36Sopenharmony_ci issue_op, cache, cache->wq); 256062306a36Sopenharmony_ci dm_iot_init(&cache->tracker); 256162306a36Sopenharmony_ci 256262306a36Sopenharmony_ci init_rwsem(&cache->background_work_lock); 256362306a36Sopenharmony_ci prevent_background_work(cache); 256462306a36Sopenharmony_ci 256562306a36Sopenharmony_ci *result = cache; 256662306a36Sopenharmony_ci return 0; 256762306a36Sopenharmony_cibad: 256862306a36Sopenharmony_ci destroy(cache); 256962306a36Sopenharmony_ci return r; 257062306a36Sopenharmony_ci} 257162306a36Sopenharmony_ci 257262306a36Sopenharmony_cistatic int copy_ctr_args(struct cache *cache, int argc, const char **argv) 257362306a36Sopenharmony_ci{ 257462306a36Sopenharmony_ci unsigned int i; 257562306a36Sopenharmony_ci const char **copy; 257662306a36Sopenharmony_ci 257762306a36Sopenharmony_ci copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL); 257862306a36Sopenharmony_ci if (!copy) 257962306a36Sopenharmony_ci return -ENOMEM; 258062306a36Sopenharmony_ci for (i = 0; i < argc; i++) { 258162306a36Sopenharmony_ci copy[i] = kstrdup(argv[i], GFP_KERNEL); 258262306a36Sopenharmony_ci if (!copy[i]) { 258362306a36Sopenharmony_ci while (i--) 258462306a36Sopenharmony_ci kfree(copy[i]); 258562306a36Sopenharmony_ci kfree(copy); 258662306a36Sopenharmony_ci return -ENOMEM; 258762306a36Sopenharmony_ci } 258862306a36Sopenharmony_ci } 258962306a36Sopenharmony_ci 259062306a36Sopenharmony_ci cache->nr_ctr_args = argc; 259162306a36Sopenharmony_ci cache->ctr_args = copy; 259262306a36Sopenharmony_ci 259362306a36Sopenharmony_ci return 0; 259462306a36Sopenharmony_ci} 259562306a36Sopenharmony_ci 259662306a36Sopenharmony_cistatic int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv) 259762306a36Sopenharmony_ci{ 259862306a36Sopenharmony_ci int r = -EINVAL; 259962306a36Sopenharmony_ci struct cache_args *ca; 260062306a36Sopenharmony_ci struct cache *cache = NULL; 260162306a36Sopenharmony_ci 260262306a36Sopenharmony_ci ca = kzalloc(sizeof(*ca), GFP_KERNEL); 260362306a36Sopenharmony_ci if (!ca) { 260462306a36Sopenharmony_ci ti->error = "Error allocating memory for cache"; 260562306a36Sopenharmony_ci return -ENOMEM; 260662306a36Sopenharmony_ci } 260762306a36Sopenharmony_ci ca->ti = ti; 260862306a36Sopenharmony_ci 260962306a36Sopenharmony_ci r = parse_cache_args(ca, argc, argv, &ti->error); 261062306a36Sopenharmony_ci if (r) 261162306a36Sopenharmony_ci goto out; 261262306a36Sopenharmony_ci 261362306a36Sopenharmony_ci r = cache_create(ca, &cache); 261462306a36Sopenharmony_ci if (r) 261562306a36Sopenharmony_ci goto out; 261662306a36Sopenharmony_ci 261762306a36Sopenharmony_ci r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); 261862306a36Sopenharmony_ci if (r) { 261962306a36Sopenharmony_ci destroy(cache); 262062306a36Sopenharmony_ci goto out; 262162306a36Sopenharmony_ci } 262262306a36Sopenharmony_ci 262362306a36Sopenharmony_ci ti->private = cache; 262462306a36Sopenharmony_ciout: 262562306a36Sopenharmony_ci destroy_cache_args(ca); 262662306a36Sopenharmony_ci return r; 262762306a36Sopenharmony_ci} 262862306a36Sopenharmony_ci 262962306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 263062306a36Sopenharmony_ci 263162306a36Sopenharmony_cistatic int cache_map(struct dm_target *ti, struct bio *bio) 263262306a36Sopenharmony_ci{ 263362306a36Sopenharmony_ci struct cache *cache = ti->private; 263462306a36Sopenharmony_ci 263562306a36Sopenharmony_ci int r; 263662306a36Sopenharmony_ci bool commit_needed; 263762306a36Sopenharmony_ci dm_oblock_t block = get_bio_block(cache, bio); 263862306a36Sopenharmony_ci 263962306a36Sopenharmony_ci init_per_bio_data(bio); 264062306a36Sopenharmony_ci if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { 264162306a36Sopenharmony_ci /* 264262306a36Sopenharmony_ci * This can only occur if the io goes to a partial block at 264362306a36Sopenharmony_ci * the end of the origin device. We don't cache these. 264462306a36Sopenharmony_ci * Just remap to the origin and carry on. 264562306a36Sopenharmony_ci */ 264662306a36Sopenharmony_ci remap_to_origin(cache, bio); 264762306a36Sopenharmony_ci accounted_begin(cache, bio); 264862306a36Sopenharmony_ci return DM_MAPIO_REMAPPED; 264962306a36Sopenharmony_ci } 265062306a36Sopenharmony_ci 265162306a36Sopenharmony_ci if (discard_or_flush(bio)) { 265262306a36Sopenharmony_ci defer_bio(cache, bio); 265362306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 265462306a36Sopenharmony_ci } 265562306a36Sopenharmony_ci 265662306a36Sopenharmony_ci r = map_bio(cache, bio, block, &commit_needed); 265762306a36Sopenharmony_ci if (commit_needed) 265862306a36Sopenharmony_ci schedule_commit(&cache->committer); 265962306a36Sopenharmony_ci 266062306a36Sopenharmony_ci return r; 266162306a36Sopenharmony_ci} 266262306a36Sopenharmony_ci 266362306a36Sopenharmony_cistatic int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error) 266462306a36Sopenharmony_ci{ 266562306a36Sopenharmony_ci struct cache *cache = ti->private; 266662306a36Sopenharmony_ci unsigned long flags; 266762306a36Sopenharmony_ci struct per_bio_data *pb = get_per_bio_data(bio); 266862306a36Sopenharmony_ci 266962306a36Sopenharmony_ci if (pb->tick) { 267062306a36Sopenharmony_ci policy_tick(cache->policy, false); 267162306a36Sopenharmony_ci 267262306a36Sopenharmony_ci spin_lock_irqsave(&cache->lock, flags); 267362306a36Sopenharmony_ci cache->need_tick_bio = true; 267462306a36Sopenharmony_ci spin_unlock_irqrestore(&cache->lock, flags); 267562306a36Sopenharmony_ci } 267662306a36Sopenharmony_ci 267762306a36Sopenharmony_ci bio_drop_shared_lock(cache, bio); 267862306a36Sopenharmony_ci accounted_complete(cache, bio); 267962306a36Sopenharmony_ci 268062306a36Sopenharmony_ci return DM_ENDIO_DONE; 268162306a36Sopenharmony_ci} 268262306a36Sopenharmony_ci 268362306a36Sopenharmony_cistatic int write_dirty_bitset(struct cache *cache) 268462306a36Sopenharmony_ci{ 268562306a36Sopenharmony_ci int r; 268662306a36Sopenharmony_ci 268762306a36Sopenharmony_ci if (get_cache_mode(cache) >= CM_READ_ONLY) 268862306a36Sopenharmony_ci return -EINVAL; 268962306a36Sopenharmony_ci 269062306a36Sopenharmony_ci r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset); 269162306a36Sopenharmony_ci if (r) 269262306a36Sopenharmony_ci metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r); 269362306a36Sopenharmony_ci 269462306a36Sopenharmony_ci return r; 269562306a36Sopenharmony_ci} 269662306a36Sopenharmony_ci 269762306a36Sopenharmony_cistatic int write_discard_bitset(struct cache *cache) 269862306a36Sopenharmony_ci{ 269962306a36Sopenharmony_ci unsigned int i, r; 270062306a36Sopenharmony_ci 270162306a36Sopenharmony_ci if (get_cache_mode(cache) >= CM_READ_ONLY) 270262306a36Sopenharmony_ci return -EINVAL; 270362306a36Sopenharmony_ci 270462306a36Sopenharmony_ci r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, 270562306a36Sopenharmony_ci cache->discard_nr_blocks); 270662306a36Sopenharmony_ci if (r) { 270762306a36Sopenharmony_ci DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache)); 270862306a36Sopenharmony_ci metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r); 270962306a36Sopenharmony_ci return r; 271062306a36Sopenharmony_ci } 271162306a36Sopenharmony_ci 271262306a36Sopenharmony_ci for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { 271362306a36Sopenharmony_ci r = dm_cache_set_discard(cache->cmd, to_dblock(i), 271462306a36Sopenharmony_ci is_discarded(cache, to_dblock(i))); 271562306a36Sopenharmony_ci if (r) { 271662306a36Sopenharmony_ci metadata_operation_failed(cache, "dm_cache_set_discard", r); 271762306a36Sopenharmony_ci return r; 271862306a36Sopenharmony_ci } 271962306a36Sopenharmony_ci } 272062306a36Sopenharmony_ci 272162306a36Sopenharmony_ci return 0; 272262306a36Sopenharmony_ci} 272362306a36Sopenharmony_ci 272462306a36Sopenharmony_cistatic int write_hints(struct cache *cache) 272562306a36Sopenharmony_ci{ 272662306a36Sopenharmony_ci int r; 272762306a36Sopenharmony_ci 272862306a36Sopenharmony_ci if (get_cache_mode(cache) >= CM_READ_ONLY) 272962306a36Sopenharmony_ci return -EINVAL; 273062306a36Sopenharmony_ci 273162306a36Sopenharmony_ci r = dm_cache_write_hints(cache->cmd, cache->policy); 273262306a36Sopenharmony_ci if (r) { 273362306a36Sopenharmony_ci metadata_operation_failed(cache, "dm_cache_write_hints", r); 273462306a36Sopenharmony_ci return r; 273562306a36Sopenharmony_ci } 273662306a36Sopenharmony_ci 273762306a36Sopenharmony_ci return 0; 273862306a36Sopenharmony_ci} 273962306a36Sopenharmony_ci 274062306a36Sopenharmony_ci/* 274162306a36Sopenharmony_ci * returns true on success 274262306a36Sopenharmony_ci */ 274362306a36Sopenharmony_cistatic bool sync_metadata(struct cache *cache) 274462306a36Sopenharmony_ci{ 274562306a36Sopenharmony_ci int r1, r2, r3, r4; 274662306a36Sopenharmony_ci 274762306a36Sopenharmony_ci r1 = write_dirty_bitset(cache); 274862306a36Sopenharmony_ci if (r1) 274962306a36Sopenharmony_ci DMERR("%s: could not write dirty bitset", cache_device_name(cache)); 275062306a36Sopenharmony_ci 275162306a36Sopenharmony_ci r2 = write_discard_bitset(cache); 275262306a36Sopenharmony_ci if (r2) 275362306a36Sopenharmony_ci DMERR("%s: could not write discard bitset", cache_device_name(cache)); 275462306a36Sopenharmony_ci 275562306a36Sopenharmony_ci save_stats(cache); 275662306a36Sopenharmony_ci 275762306a36Sopenharmony_ci r3 = write_hints(cache); 275862306a36Sopenharmony_ci if (r3) 275962306a36Sopenharmony_ci DMERR("%s: could not write hints", cache_device_name(cache)); 276062306a36Sopenharmony_ci 276162306a36Sopenharmony_ci /* 276262306a36Sopenharmony_ci * If writing the above metadata failed, we still commit, but don't 276362306a36Sopenharmony_ci * set the clean shutdown flag. This will effectively force every 276462306a36Sopenharmony_ci * dirty bit to be set on reload. 276562306a36Sopenharmony_ci */ 276662306a36Sopenharmony_ci r4 = commit(cache, !r1 && !r2 && !r3); 276762306a36Sopenharmony_ci if (r4) 276862306a36Sopenharmony_ci DMERR("%s: could not write cache metadata", cache_device_name(cache)); 276962306a36Sopenharmony_ci 277062306a36Sopenharmony_ci return !r1 && !r2 && !r3 && !r4; 277162306a36Sopenharmony_ci} 277262306a36Sopenharmony_ci 277362306a36Sopenharmony_cistatic void cache_postsuspend(struct dm_target *ti) 277462306a36Sopenharmony_ci{ 277562306a36Sopenharmony_ci struct cache *cache = ti->private; 277662306a36Sopenharmony_ci 277762306a36Sopenharmony_ci prevent_background_work(cache); 277862306a36Sopenharmony_ci BUG_ON(atomic_read(&cache->nr_io_migrations)); 277962306a36Sopenharmony_ci 278062306a36Sopenharmony_ci cancel_delayed_work_sync(&cache->waker); 278162306a36Sopenharmony_ci drain_workqueue(cache->wq); 278262306a36Sopenharmony_ci WARN_ON(cache->tracker.in_flight); 278362306a36Sopenharmony_ci 278462306a36Sopenharmony_ci /* 278562306a36Sopenharmony_ci * If it's a flush suspend there won't be any deferred bios, so this 278662306a36Sopenharmony_ci * call is harmless. 278762306a36Sopenharmony_ci */ 278862306a36Sopenharmony_ci requeue_deferred_bios(cache); 278962306a36Sopenharmony_ci 279062306a36Sopenharmony_ci if (get_cache_mode(cache) == CM_WRITE) 279162306a36Sopenharmony_ci (void) sync_metadata(cache); 279262306a36Sopenharmony_ci} 279362306a36Sopenharmony_ci 279462306a36Sopenharmony_cistatic int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, 279562306a36Sopenharmony_ci bool dirty, uint32_t hint, bool hint_valid) 279662306a36Sopenharmony_ci{ 279762306a36Sopenharmony_ci struct cache *cache = context; 279862306a36Sopenharmony_ci 279962306a36Sopenharmony_ci if (dirty) { 280062306a36Sopenharmony_ci set_bit(from_cblock(cblock), cache->dirty_bitset); 280162306a36Sopenharmony_ci atomic_inc(&cache->nr_dirty); 280262306a36Sopenharmony_ci } else 280362306a36Sopenharmony_ci clear_bit(from_cblock(cblock), cache->dirty_bitset); 280462306a36Sopenharmony_ci 280562306a36Sopenharmony_ci return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid); 280662306a36Sopenharmony_ci} 280762306a36Sopenharmony_ci 280862306a36Sopenharmony_ci/* 280962306a36Sopenharmony_ci * The discard block size in the on disk metadata is not 281062306a36Sopenharmony_ci * necessarily the same as we're currently using. So we have to 281162306a36Sopenharmony_ci * be careful to only set the discarded attribute if we know it 281262306a36Sopenharmony_ci * covers a complete block of the new size. 281362306a36Sopenharmony_ci */ 281462306a36Sopenharmony_cistruct discard_load_info { 281562306a36Sopenharmony_ci struct cache *cache; 281662306a36Sopenharmony_ci 281762306a36Sopenharmony_ci /* 281862306a36Sopenharmony_ci * These blocks are sized using the on disk dblock size, rather 281962306a36Sopenharmony_ci * than the current one. 282062306a36Sopenharmony_ci */ 282162306a36Sopenharmony_ci dm_block_t block_size; 282262306a36Sopenharmony_ci dm_block_t discard_begin, discard_end; 282362306a36Sopenharmony_ci}; 282462306a36Sopenharmony_ci 282562306a36Sopenharmony_cistatic void discard_load_info_init(struct cache *cache, 282662306a36Sopenharmony_ci struct discard_load_info *li) 282762306a36Sopenharmony_ci{ 282862306a36Sopenharmony_ci li->cache = cache; 282962306a36Sopenharmony_ci li->discard_begin = li->discard_end = 0; 283062306a36Sopenharmony_ci} 283162306a36Sopenharmony_ci 283262306a36Sopenharmony_cistatic void set_discard_range(struct discard_load_info *li) 283362306a36Sopenharmony_ci{ 283462306a36Sopenharmony_ci sector_t b, e; 283562306a36Sopenharmony_ci 283662306a36Sopenharmony_ci if (li->discard_begin == li->discard_end) 283762306a36Sopenharmony_ci return; 283862306a36Sopenharmony_ci 283962306a36Sopenharmony_ci /* 284062306a36Sopenharmony_ci * Convert to sectors. 284162306a36Sopenharmony_ci */ 284262306a36Sopenharmony_ci b = li->discard_begin * li->block_size; 284362306a36Sopenharmony_ci e = li->discard_end * li->block_size; 284462306a36Sopenharmony_ci 284562306a36Sopenharmony_ci /* 284662306a36Sopenharmony_ci * Then convert back to the current dblock size. 284762306a36Sopenharmony_ci */ 284862306a36Sopenharmony_ci b = dm_sector_div_up(b, li->cache->discard_block_size); 284962306a36Sopenharmony_ci sector_div(e, li->cache->discard_block_size); 285062306a36Sopenharmony_ci 285162306a36Sopenharmony_ci /* 285262306a36Sopenharmony_ci * The origin may have shrunk, so we need to check we're still in 285362306a36Sopenharmony_ci * bounds. 285462306a36Sopenharmony_ci */ 285562306a36Sopenharmony_ci if (e > from_dblock(li->cache->discard_nr_blocks)) 285662306a36Sopenharmony_ci e = from_dblock(li->cache->discard_nr_blocks); 285762306a36Sopenharmony_ci 285862306a36Sopenharmony_ci for (; b < e; b++) 285962306a36Sopenharmony_ci set_discard(li->cache, to_dblock(b)); 286062306a36Sopenharmony_ci} 286162306a36Sopenharmony_ci 286262306a36Sopenharmony_cistatic int load_discard(void *context, sector_t discard_block_size, 286362306a36Sopenharmony_ci dm_dblock_t dblock, bool discard) 286462306a36Sopenharmony_ci{ 286562306a36Sopenharmony_ci struct discard_load_info *li = context; 286662306a36Sopenharmony_ci 286762306a36Sopenharmony_ci li->block_size = discard_block_size; 286862306a36Sopenharmony_ci 286962306a36Sopenharmony_ci if (discard) { 287062306a36Sopenharmony_ci if (from_dblock(dblock) == li->discard_end) 287162306a36Sopenharmony_ci /* 287262306a36Sopenharmony_ci * We're already in a discard range, just extend it. 287362306a36Sopenharmony_ci */ 287462306a36Sopenharmony_ci li->discard_end = li->discard_end + 1ULL; 287562306a36Sopenharmony_ci 287662306a36Sopenharmony_ci else { 287762306a36Sopenharmony_ci /* 287862306a36Sopenharmony_ci * Emit the old range and start a new one. 287962306a36Sopenharmony_ci */ 288062306a36Sopenharmony_ci set_discard_range(li); 288162306a36Sopenharmony_ci li->discard_begin = from_dblock(dblock); 288262306a36Sopenharmony_ci li->discard_end = li->discard_begin + 1ULL; 288362306a36Sopenharmony_ci } 288462306a36Sopenharmony_ci } else { 288562306a36Sopenharmony_ci set_discard_range(li); 288662306a36Sopenharmony_ci li->discard_begin = li->discard_end = 0; 288762306a36Sopenharmony_ci } 288862306a36Sopenharmony_ci 288962306a36Sopenharmony_ci return 0; 289062306a36Sopenharmony_ci} 289162306a36Sopenharmony_ci 289262306a36Sopenharmony_cistatic dm_cblock_t get_cache_dev_size(struct cache *cache) 289362306a36Sopenharmony_ci{ 289462306a36Sopenharmony_ci sector_t size = get_dev_size(cache->cache_dev); 289562306a36Sopenharmony_ci (void) sector_div(size, cache->sectors_per_block); 289662306a36Sopenharmony_ci return to_cblock(size); 289762306a36Sopenharmony_ci} 289862306a36Sopenharmony_ci 289962306a36Sopenharmony_cistatic bool can_resize(struct cache *cache, dm_cblock_t new_size) 290062306a36Sopenharmony_ci{ 290162306a36Sopenharmony_ci if (from_cblock(new_size) > from_cblock(cache->cache_size)) { 290262306a36Sopenharmony_ci if (cache->sized) { 290362306a36Sopenharmony_ci DMERR("%s: unable to extend cache due to missing cache table reload", 290462306a36Sopenharmony_ci cache_device_name(cache)); 290562306a36Sopenharmony_ci return false; 290662306a36Sopenharmony_ci } 290762306a36Sopenharmony_ci } 290862306a36Sopenharmony_ci 290962306a36Sopenharmony_ci /* 291062306a36Sopenharmony_ci * We can't drop a dirty block when shrinking the cache. 291162306a36Sopenharmony_ci */ 291262306a36Sopenharmony_ci while (from_cblock(new_size) < from_cblock(cache->cache_size)) { 291362306a36Sopenharmony_ci new_size = to_cblock(from_cblock(new_size) + 1); 291462306a36Sopenharmony_ci if (is_dirty(cache, new_size)) { 291562306a36Sopenharmony_ci DMERR("%s: unable to shrink cache; cache block %llu is dirty", 291662306a36Sopenharmony_ci cache_device_name(cache), 291762306a36Sopenharmony_ci (unsigned long long) from_cblock(new_size)); 291862306a36Sopenharmony_ci return false; 291962306a36Sopenharmony_ci } 292062306a36Sopenharmony_ci } 292162306a36Sopenharmony_ci 292262306a36Sopenharmony_ci return true; 292362306a36Sopenharmony_ci} 292462306a36Sopenharmony_ci 292562306a36Sopenharmony_cistatic int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) 292662306a36Sopenharmony_ci{ 292762306a36Sopenharmony_ci int r; 292862306a36Sopenharmony_ci 292962306a36Sopenharmony_ci r = dm_cache_resize(cache->cmd, new_size); 293062306a36Sopenharmony_ci if (r) { 293162306a36Sopenharmony_ci DMERR("%s: could not resize cache metadata", cache_device_name(cache)); 293262306a36Sopenharmony_ci metadata_operation_failed(cache, "dm_cache_resize", r); 293362306a36Sopenharmony_ci return r; 293462306a36Sopenharmony_ci } 293562306a36Sopenharmony_ci 293662306a36Sopenharmony_ci set_cache_size(cache, new_size); 293762306a36Sopenharmony_ci 293862306a36Sopenharmony_ci return 0; 293962306a36Sopenharmony_ci} 294062306a36Sopenharmony_ci 294162306a36Sopenharmony_cistatic int cache_preresume(struct dm_target *ti) 294262306a36Sopenharmony_ci{ 294362306a36Sopenharmony_ci int r = 0; 294462306a36Sopenharmony_ci struct cache *cache = ti->private; 294562306a36Sopenharmony_ci dm_cblock_t csize = get_cache_dev_size(cache); 294662306a36Sopenharmony_ci 294762306a36Sopenharmony_ci /* 294862306a36Sopenharmony_ci * Check to see if the cache has resized. 294962306a36Sopenharmony_ci */ 295062306a36Sopenharmony_ci if (!cache->sized) { 295162306a36Sopenharmony_ci r = resize_cache_dev(cache, csize); 295262306a36Sopenharmony_ci if (r) 295362306a36Sopenharmony_ci return r; 295462306a36Sopenharmony_ci 295562306a36Sopenharmony_ci cache->sized = true; 295662306a36Sopenharmony_ci 295762306a36Sopenharmony_ci } else if (csize != cache->cache_size) { 295862306a36Sopenharmony_ci if (!can_resize(cache, csize)) 295962306a36Sopenharmony_ci return -EINVAL; 296062306a36Sopenharmony_ci 296162306a36Sopenharmony_ci r = resize_cache_dev(cache, csize); 296262306a36Sopenharmony_ci if (r) 296362306a36Sopenharmony_ci return r; 296462306a36Sopenharmony_ci } 296562306a36Sopenharmony_ci 296662306a36Sopenharmony_ci if (!cache->loaded_mappings) { 296762306a36Sopenharmony_ci r = dm_cache_load_mappings(cache->cmd, cache->policy, 296862306a36Sopenharmony_ci load_mapping, cache); 296962306a36Sopenharmony_ci if (r) { 297062306a36Sopenharmony_ci DMERR("%s: could not load cache mappings", cache_device_name(cache)); 297162306a36Sopenharmony_ci metadata_operation_failed(cache, "dm_cache_load_mappings", r); 297262306a36Sopenharmony_ci return r; 297362306a36Sopenharmony_ci } 297462306a36Sopenharmony_ci 297562306a36Sopenharmony_ci cache->loaded_mappings = true; 297662306a36Sopenharmony_ci } 297762306a36Sopenharmony_ci 297862306a36Sopenharmony_ci if (!cache->loaded_discards) { 297962306a36Sopenharmony_ci struct discard_load_info li; 298062306a36Sopenharmony_ci 298162306a36Sopenharmony_ci /* 298262306a36Sopenharmony_ci * The discard bitset could have been resized, or the 298362306a36Sopenharmony_ci * discard block size changed. To be safe we start by 298462306a36Sopenharmony_ci * setting every dblock to not discarded. 298562306a36Sopenharmony_ci */ 298662306a36Sopenharmony_ci clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); 298762306a36Sopenharmony_ci 298862306a36Sopenharmony_ci discard_load_info_init(cache, &li); 298962306a36Sopenharmony_ci r = dm_cache_load_discards(cache->cmd, load_discard, &li); 299062306a36Sopenharmony_ci if (r) { 299162306a36Sopenharmony_ci DMERR("%s: could not load origin discards", cache_device_name(cache)); 299262306a36Sopenharmony_ci metadata_operation_failed(cache, "dm_cache_load_discards", r); 299362306a36Sopenharmony_ci return r; 299462306a36Sopenharmony_ci } 299562306a36Sopenharmony_ci set_discard_range(&li); 299662306a36Sopenharmony_ci 299762306a36Sopenharmony_ci cache->loaded_discards = true; 299862306a36Sopenharmony_ci } 299962306a36Sopenharmony_ci 300062306a36Sopenharmony_ci return r; 300162306a36Sopenharmony_ci} 300262306a36Sopenharmony_ci 300362306a36Sopenharmony_cistatic void cache_resume(struct dm_target *ti) 300462306a36Sopenharmony_ci{ 300562306a36Sopenharmony_ci struct cache *cache = ti->private; 300662306a36Sopenharmony_ci 300762306a36Sopenharmony_ci cache->need_tick_bio = true; 300862306a36Sopenharmony_ci allow_background_work(cache); 300962306a36Sopenharmony_ci do_waker(&cache->waker.work); 301062306a36Sopenharmony_ci} 301162306a36Sopenharmony_ci 301262306a36Sopenharmony_cistatic void emit_flags(struct cache *cache, char *result, 301362306a36Sopenharmony_ci unsigned int maxlen, ssize_t *sz_ptr) 301462306a36Sopenharmony_ci{ 301562306a36Sopenharmony_ci ssize_t sz = *sz_ptr; 301662306a36Sopenharmony_ci struct cache_features *cf = &cache->features; 301762306a36Sopenharmony_ci unsigned int count = (cf->metadata_version == 2) + !cf->discard_passdown + 1; 301862306a36Sopenharmony_ci 301962306a36Sopenharmony_ci DMEMIT("%u ", count); 302062306a36Sopenharmony_ci 302162306a36Sopenharmony_ci if (cf->metadata_version == 2) 302262306a36Sopenharmony_ci DMEMIT("metadata2 "); 302362306a36Sopenharmony_ci 302462306a36Sopenharmony_ci if (writethrough_mode(cache)) 302562306a36Sopenharmony_ci DMEMIT("writethrough "); 302662306a36Sopenharmony_ci 302762306a36Sopenharmony_ci else if (passthrough_mode(cache)) 302862306a36Sopenharmony_ci DMEMIT("passthrough "); 302962306a36Sopenharmony_ci 303062306a36Sopenharmony_ci else if (writeback_mode(cache)) 303162306a36Sopenharmony_ci DMEMIT("writeback "); 303262306a36Sopenharmony_ci 303362306a36Sopenharmony_ci else { 303462306a36Sopenharmony_ci DMEMIT("unknown "); 303562306a36Sopenharmony_ci DMERR("%s: internal error: unknown io mode: %d", 303662306a36Sopenharmony_ci cache_device_name(cache), (int) cf->io_mode); 303762306a36Sopenharmony_ci } 303862306a36Sopenharmony_ci 303962306a36Sopenharmony_ci if (!cf->discard_passdown) 304062306a36Sopenharmony_ci DMEMIT("no_discard_passdown "); 304162306a36Sopenharmony_ci 304262306a36Sopenharmony_ci *sz_ptr = sz; 304362306a36Sopenharmony_ci} 304462306a36Sopenharmony_ci 304562306a36Sopenharmony_ci/* 304662306a36Sopenharmony_ci * Status format: 304762306a36Sopenharmony_ci * 304862306a36Sopenharmony_ci * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 304962306a36Sopenharmony_ci * <cache block size> <#used cache blocks>/<#total cache blocks> 305062306a36Sopenharmony_ci * <#read hits> <#read misses> <#write hits> <#write misses> 305162306a36Sopenharmony_ci * <#demotions> <#promotions> <#dirty> 305262306a36Sopenharmony_ci * <#features> <features>* 305362306a36Sopenharmony_ci * <#core args> <core args> 305462306a36Sopenharmony_ci * <policy name> <#policy args> <policy args>* <cache metadata mode> <needs_check> 305562306a36Sopenharmony_ci */ 305662306a36Sopenharmony_cistatic void cache_status(struct dm_target *ti, status_type_t type, 305762306a36Sopenharmony_ci unsigned int status_flags, char *result, unsigned int maxlen) 305862306a36Sopenharmony_ci{ 305962306a36Sopenharmony_ci int r = 0; 306062306a36Sopenharmony_ci unsigned int i; 306162306a36Sopenharmony_ci ssize_t sz = 0; 306262306a36Sopenharmony_ci dm_block_t nr_free_blocks_metadata = 0; 306362306a36Sopenharmony_ci dm_block_t nr_blocks_metadata = 0; 306462306a36Sopenharmony_ci char buf[BDEVNAME_SIZE]; 306562306a36Sopenharmony_ci struct cache *cache = ti->private; 306662306a36Sopenharmony_ci dm_cblock_t residency; 306762306a36Sopenharmony_ci bool needs_check; 306862306a36Sopenharmony_ci 306962306a36Sopenharmony_ci switch (type) { 307062306a36Sopenharmony_ci case STATUSTYPE_INFO: 307162306a36Sopenharmony_ci if (get_cache_mode(cache) == CM_FAIL) { 307262306a36Sopenharmony_ci DMEMIT("Fail"); 307362306a36Sopenharmony_ci break; 307462306a36Sopenharmony_ci } 307562306a36Sopenharmony_ci 307662306a36Sopenharmony_ci /* Commit to ensure statistics aren't out-of-date */ 307762306a36Sopenharmony_ci if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) 307862306a36Sopenharmony_ci (void) commit(cache, false); 307962306a36Sopenharmony_ci 308062306a36Sopenharmony_ci r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata); 308162306a36Sopenharmony_ci if (r) { 308262306a36Sopenharmony_ci DMERR("%s: dm_cache_get_free_metadata_block_count returned %d", 308362306a36Sopenharmony_ci cache_device_name(cache), r); 308462306a36Sopenharmony_ci goto err; 308562306a36Sopenharmony_ci } 308662306a36Sopenharmony_ci 308762306a36Sopenharmony_ci r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata); 308862306a36Sopenharmony_ci if (r) { 308962306a36Sopenharmony_ci DMERR("%s: dm_cache_get_metadata_dev_size returned %d", 309062306a36Sopenharmony_ci cache_device_name(cache), r); 309162306a36Sopenharmony_ci goto err; 309262306a36Sopenharmony_ci } 309362306a36Sopenharmony_ci 309462306a36Sopenharmony_ci residency = policy_residency(cache->policy); 309562306a36Sopenharmony_ci 309662306a36Sopenharmony_ci DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ", 309762306a36Sopenharmony_ci (unsigned int)DM_CACHE_METADATA_BLOCK_SIZE, 309862306a36Sopenharmony_ci (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), 309962306a36Sopenharmony_ci (unsigned long long)nr_blocks_metadata, 310062306a36Sopenharmony_ci (unsigned long long)cache->sectors_per_block, 310162306a36Sopenharmony_ci (unsigned long long) from_cblock(residency), 310262306a36Sopenharmony_ci (unsigned long long) from_cblock(cache->cache_size), 310362306a36Sopenharmony_ci (unsigned int) atomic_read(&cache->stats.read_hit), 310462306a36Sopenharmony_ci (unsigned int) atomic_read(&cache->stats.read_miss), 310562306a36Sopenharmony_ci (unsigned int) atomic_read(&cache->stats.write_hit), 310662306a36Sopenharmony_ci (unsigned int) atomic_read(&cache->stats.write_miss), 310762306a36Sopenharmony_ci (unsigned int) atomic_read(&cache->stats.demotion), 310862306a36Sopenharmony_ci (unsigned int) atomic_read(&cache->stats.promotion), 310962306a36Sopenharmony_ci (unsigned long) atomic_read(&cache->nr_dirty)); 311062306a36Sopenharmony_ci 311162306a36Sopenharmony_ci emit_flags(cache, result, maxlen, &sz); 311262306a36Sopenharmony_ci 311362306a36Sopenharmony_ci DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); 311462306a36Sopenharmony_ci 311562306a36Sopenharmony_ci DMEMIT("%s ", dm_cache_policy_get_name(cache->policy)); 311662306a36Sopenharmony_ci if (sz < maxlen) { 311762306a36Sopenharmony_ci r = policy_emit_config_values(cache->policy, result, maxlen, &sz); 311862306a36Sopenharmony_ci if (r) 311962306a36Sopenharmony_ci DMERR("%s: policy_emit_config_values returned %d", 312062306a36Sopenharmony_ci cache_device_name(cache), r); 312162306a36Sopenharmony_ci } 312262306a36Sopenharmony_ci 312362306a36Sopenharmony_ci if (get_cache_mode(cache) == CM_READ_ONLY) 312462306a36Sopenharmony_ci DMEMIT("ro "); 312562306a36Sopenharmony_ci else 312662306a36Sopenharmony_ci DMEMIT("rw "); 312762306a36Sopenharmony_ci 312862306a36Sopenharmony_ci r = dm_cache_metadata_needs_check(cache->cmd, &needs_check); 312962306a36Sopenharmony_ci 313062306a36Sopenharmony_ci if (r || needs_check) 313162306a36Sopenharmony_ci DMEMIT("needs_check "); 313262306a36Sopenharmony_ci else 313362306a36Sopenharmony_ci DMEMIT("- "); 313462306a36Sopenharmony_ci 313562306a36Sopenharmony_ci break; 313662306a36Sopenharmony_ci 313762306a36Sopenharmony_ci case STATUSTYPE_TABLE: 313862306a36Sopenharmony_ci format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); 313962306a36Sopenharmony_ci DMEMIT("%s ", buf); 314062306a36Sopenharmony_ci format_dev_t(buf, cache->cache_dev->bdev->bd_dev); 314162306a36Sopenharmony_ci DMEMIT("%s ", buf); 314262306a36Sopenharmony_ci format_dev_t(buf, cache->origin_dev->bdev->bd_dev); 314362306a36Sopenharmony_ci DMEMIT("%s", buf); 314462306a36Sopenharmony_ci 314562306a36Sopenharmony_ci for (i = 0; i < cache->nr_ctr_args - 1; i++) 314662306a36Sopenharmony_ci DMEMIT(" %s", cache->ctr_args[i]); 314762306a36Sopenharmony_ci if (cache->nr_ctr_args) 314862306a36Sopenharmony_ci DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); 314962306a36Sopenharmony_ci break; 315062306a36Sopenharmony_ci 315162306a36Sopenharmony_ci case STATUSTYPE_IMA: 315262306a36Sopenharmony_ci DMEMIT_TARGET_NAME_VERSION(ti->type); 315362306a36Sopenharmony_ci if (get_cache_mode(cache) == CM_FAIL) 315462306a36Sopenharmony_ci DMEMIT(",metadata_mode=fail"); 315562306a36Sopenharmony_ci else if (get_cache_mode(cache) == CM_READ_ONLY) 315662306a36Sopenharmony_ci DMEMIT(",metadata_mode=ro"); 315762306a36Sopenharmony_ci else 315862306a36Sopenharmony_ci DMEMIT(",metadata_mode=rw"); 315962306a36Sopenharmony_ci 316062306a36Sopenharmony_ci format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); 316162306a36Sopenharmony_ci DMEMIT(",cache_metadata_device=%s", buf); 316262306a36Sopenharmony_ci format_dev_t(buf, cache->cache_dev->bdev->bd_dev); 316362306a36Sopenharmony_ci DMEMIT(",cache_device=%s", buf); 316462306a36Sopenharmony_ci format_dev_t(buf, cache->origin_dev->bdev->bd_dev); 316562306a36Sopenharmony_ci DMEMIT(",cache_origin_device=%s", buf); 316662306a36Sopenharmony_ci DMEMIT(",writethrough=%c", writethrough_mode(cache) ? 'y' : 'n'); 316762306a36Sopenharmony_ci DMEMIT(",writeback=%c", writeback_mode(cache) ? 'y' : 'n'); 316862306a36Sopenharmony_ci DMEMIT(",passthrough=%c", passthrough_mode(cache) ? 'y' : 'n'); 316962306a36Sopenharmony_ci DMEMIT(",metadata2=%c", cache->features.metadata_version == 2 ? 'y' : 'n'); 317062306a36Sopenharmony_ci DMEMIT(",no_discard_passdown=%c", cache->features.discard_passdown ? 'n' : 'y'); 317162306a36Sopenharmony_ci DMEMIT(";"); 317262306a36Sopenharmony_ci break; 317362306a36Sopenharmony_ci } 317462306a36Sopenharmony_ci 317562306a36Sopenharmony_ci return; 317662306a36Sopenharmony_ci 317762306a36Sopenharmony_cierr: 317862306a36Sopenharmony_ci DMEMIT("Error"); 317962306a36Sopenharmony_ci} 318062306a36Sopenharmony_ci 318162306a36Sopenharmony_ci/* 318262306a36Sopenharmony_ci * Defines a range of cblocks, begin to (end - 1) are in the range. end is 318362306a36Sopenharmony_ci * the one-past-the-end value. 318462306a36Sopenharmony_ci */ 318562306a36Sopenharmony_cistruct cblock_range { 318662306a36Sopenharmony_ci dm_cblock_t begin; 318762306a36Sopenharmony_ci dm_cblock_t end; 318862306a36Sopenharmony_ci}; 318962306a36Sopenharmony_ci 319062306a36Sopenharmony_ci/* 319162306a36Sopenharmony_ci * A cache block range can take two forms: 319262306a36Sopenharmony_ci * 319362306a36Sopenharmony_ci * i) A single cblock, eg. '3456' 319462306a36Sopenharmony_ci * ii) A begin and end cblock with a dash between, eg. 123-234 319562306a36Sopenharmony_ci */ 319662306a36Sopenharmony_cistatic int parse_cblock_range(struct cache *cache, const char *str, 319762306a36Sopenharmony_ci struct cblock_range *result) 319862306a36Sopenharmony_ci{ 319962306a36Sopenharmony_ci char dummy; 320062306a36Sopenharmony_ci uint64_t b, e; 320162306a36Sopenharmony_ci int r; 320262306a36Sopenharmony_ci 320362306a36Sopenharmony_ci /* 320462306a36Sopenharmony_ci * Try and parse form (ii) first. 320562306a36Sopenharmony_ci */ 320662306a36Sopenharmony_ci r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy); 320762306a36Sopenharmony_ci if (r < 0) 320862306a36Sopenharmony_ci return r; 320962306a36Sopenharmony_ci 321062306a36Sopenharmony_ci if (r == 2) { 321162306a36Sopenharmony_ci result->begin = to_cblock(b); 321262306a36Sopenharmony_ci result->end = to_cblock(e); 321362306a36Sopenharmony_ci return 0; 321462306a36Sopenharmony_ci } 321562306a36Sopenharmony_ci 321662306a36Sopenharmony_ci /* 321762306a36Sopenharmony_ci * That didn't work, try form (i). 321862306a36Sopenharmony_ci */ 321962306a36Sopenharmony_ci r = sscanf(str, "%llu%c", &b, &dummy); 322062306a36Sopenharmony_ci if (r < 0) 322162306a36Sopenharmony_ci return r; 322262306a36Sopenharmony_ci 322362306a36Sopenharmony_ci if (r == 1) { 322462306a36Sopenharmony_ci result->begin = to_cblock(b); 322562306a36Sopenharmony_ci result->end = to_cblock(from_cblock(result->begin) + 1u); 322662306a36Sopenharmony_ci return 0; 322762306a36Sopenharmony_ci } 322862306a36Sopenharmony_ci 322962306a36Sopenharmony_ci DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str); 323062306a36Sopenharmony_ci return -EINVAL; 323162306a36Sopenharmony_ci} 323262306a36Sopenharmony_ci 323362306a36Sopenharmony_cistatic int validate_cblock_range(struct cache *cache, struct cblock_range *range) 323462306a36Sopenharmony_ci{ 323562306a36Sopenharmony_ci uint64_t b = from_cblock(range->begin); 323662306a36Sopenharmony_ci uint64_t e = from_cblock(range->end); 323762306a36Sopenharmony_ci uint64_t n = from_cblock(cache->cache_size); 323862306a36Sopenharmony_ci 323962306a36Sopenharmony_ci if (b >= n) { 324062306a36Sopenharmony_ci DMERR("%s: begin cblock out of range: %llu >= %llu", 324162306a36Sopenharmony_ci cache_device_name(cache), b, n); 324262306a36Sopenharmony_ci return -EINVAL; 324362306a36Sopenharmony_ci } 324462306a36Sopenharmony_ci 324562306a36Sopenharmony_ci if (e > n) { 324662306a36Sopenharmony_ci DMERR("%s: end cblock out of range: %llu > %llu", 324762306a36Sopenharmony_ci cache_device_name(cache), e, n); 324862306a36Sopenharmony_ci return -EINVAL; 324962306a36Sopenharmony_ci } 325062306a36Sopenharmony_ci 325162306a36Sopenharmony_ci if (b >= e) { 325262306a36Sopenharmony_ci DMERR("%s: invalid cblock range: %llu >= %llu", 325362306a36Sopenharmony_ci cache_device_name(cache), b, e); 325462306a36Sopenharmony_ci return -EINVAL; 325562306a36Sopenharmony_ci } 325662306a36Sopenharmony_ci 325762306a36Sopenharmony_ci return 0; 325862306a36Sopenharmony_ci} 325962306a36Sopenharmony_ci 326062306a36Sopenharmony_cistatic inline dm_cblock_t cblock_succ(dm_cblock_t b) 326162306a36Sopenharmony_ci{ 326262306a36Sopenharmony_ci return to_cblock(from_cblock(b) + 1); 326362306a36Sopenharmony_ci} 326462306a36Sopenharmony_ci 326562306a36Sopenharmony_cistatic int request_invalidation(struct cache *cache, struct cblock_range *range) 326662306a36Sopenharmony_ci{ 326762306a36Sopenharmony_ci int r = 0; 326862306a36Sopenharmony_ci 326962306a36Sopenharmony_ci /* 327062306a36Sopenharmony_ci * We don't need to do any locking here because we know we're in 327162306a36Sopenharmony_ci * passthrough mode. There's is potential for a race between an 327262306a36Sopenharmony_ci * invalidation triggered by an io and an invalidation message. This 327362306a36Sopenharmony_ci * is harmless, we must not worry if the policy call fails. 327462306a36Sopenharmony_ci */ 327562306a36Sopenharmony_ci while (range->begin != range->end) { 327662306a36Sopenharmony_ci r = invalidate_cblock(cache, range->begin); 327762306a36Sopenharmony_ci if (r) 327862306a36Sopenharmony_ci return r; 327962306a36Sopenharmony_ci 328062306a36Sopenharmony_ci range->begin = cblock_succ(range->begin); 328162306a36Sopenharmony_ci } 328262306a36Sopenharmony_ci 328362306a36Sopenharmony_ci cache->commit_requested = true; 328462306a36Sopenharmony_ci return r; 328562306a36Sopenharmony_ci} 328662306a36Sopenharmony_ci 328762306a36Sopenharmony_cistatic int process_invalidate_cblocks_message(struct cache *cache, unsigned int count, 328862306a36Sopenharmony_ci const char **cblock_ranges) 328962306a36Sopenharmony_ci{ 329062306a36Sopenharmony_ci int r = 0; 329162306a36Sopenharmony_ci unsigned int i; 329262306a36Sopenharmony_ci struct cblock_range range; 329362306a36Sopenharmony_ci 329462306a36Sopenharmony_ci if (!passthrough_mode(cache)) { 329562306a36Sopenharmony_ci DMERR("%s: cache has to be in passthrough mode for invalidation", 329662306a36Sopenharmony_ci cache_device_name(cache)); 329762306a36Sopenharmony_ci return -EPERM; 329862306a36Sopenharmony_ci } 329962306a36Sopenharmony_ci 330062306a36Sopenharmony_ci for (i = 0; i < count; i++) { 330162306a36Sopenharmony_ci r = parse_cblock_range(cache, cblock_ranges[i], &range); 330262306a36Sopenharmony_ci if (r) 330362306a36Sopenharmony_ci break; 330462306a36Sopenharmony_ci 330562306a36Sopenharmony_ci r = validate_cblock_range(cache, &range); 330662306a36Sopenharmony_ci if (r) 330762306a36Sopenharmony_ci break; 330862306a36Sopenharmony_ci 330962306a36Sopenharmony_ci /* 331062306a36Sopenharmony_ci * Pass begin and end origin blocks to the worker and wake it. 331162306a36Sopenharmony_ci */ 331262306a36Sopenharmony_ci r = request_invalidation(cache, &range); 331362306a36Sopenharmony_ci if (r) 331462306a36Sopenharmony_ci break; 331562306a36Sopenharmony_ci } 331662306a36Sopenharmony_ci 331762306a36Sopenharmony_ci return r; 331862306a36Sopenharmony_ci} 331962306a36Sopenharmony_ci 332062306a36Sopenharmony_ci/* 332162306a36Sopenharmony_ci * Supports 332262306a36Sopenharmony_ci * "<key> <value>" 332362306a36Sopenharmony_ci * and 332462306a36Sopenharmony_ci * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]* 332562306a36Sopenharmony_ci * 332662306a36Sopenharmony_ci * The key migration_threshold is supported by the cache target core. 332762306a36Sopenharmony_ci */ 332862306a36Sopenharmony_cistatic int cache_message(struct dm_target *ti, unsigned int argc, char **argv, 332962306a36Sopenharmony_ci char *result, unsigned int maxlen) 333062306a36Sopenharmony_ci{ 333162306a36Sopenharmony_ci struct cache *cache = ti->private; 333262306a36Sopenharmony_ci 333362306a36Sopenharmony_ci if (!argc) 333462306a36Sopenharmony_ci return -EINVAL; 333562306a36Sopenharmony_ci 333662306a36Sopenharmony_ci if (get_cache_mode(cache) >= CM_READ_ONLY) { 333762306a36Sopenharmony_ci DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode", 333862306a36Sopenharmony_ci cache_device_name(cache)); 333962306a36Sopenharmony_ci return -EOPNOTSUPP; 334062306a36Sopenharmony_ci } 334162306a36Sopenharmony_ci 334262306a36Sopenharmony_ci if (!strcasecmp(argv[0], "invalidate_cblocks")) 334362306a36Sopenharmony_ci return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1); 334462306a36Sopenharmony_ci 334562306a36Sopenharmony_ci if (argc != 2) 334662306a36Sopenharmony_ci return -EINVAL; 334762306a36Sopenharmony_ci 334862306a36Sopenharmony_ci return set_config_value(cache, argv[0], argv[1]); 334962306a36Sopenharmony_ci} 335062306a36Sopenharmony_ci 335162306a36Sopenharmony_cistatic int cache_iterate_devices(struct dm_target *ti, 335262306a36Sopenharmony_ci iterate_devices_callout_fn fn, void *data) 335362306a36Sopenharmony_ci{ 335462306a36Sopenharmony_ci int r = 0; 335562306a36Sopenharmony_ci struct cache *cache = ti->private; 335662306a36Sopenharmony_ci 335762306a36Sopenharmony_ci r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data); 335862306a36Sopenharmony_ci if (!r) 335962306a36Sopenharmony_ci r = fn(ti, cache->origin_dev, 0, ti->len, data); 336062306a36Sopenharmony_ci 336162306a36Sopenharmony_ci return r; 336262306a36Sopenharmony_ci} 336362306a36Sopenharmony_ci 336462306a36Sopenharmony_ci/* 336562306a36Sopenharmony_ci * If discard_passdown was enabled verify that the origin device 336662306a36Sopenharmony_ci * supports discards. Disable discard_passdown if not. 336762306a36Sopenharmony_ci */ 336862306a36Sopenharmony_cistatic void disable_passdown_if_not_supported(struct cache *cache) 336962306a36Sopenharmony_ci{ 337062306a36Sopenharmony_ci struct block_device *origin_bdev = cache->origin_dev->bdev; 337162306a36Sopenharmony_ci struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; 337262306a36Sopenharmony_ci const char *reason = NULL; 337362306a36Sopenharmony_ci 337462306a36Sopenharmony_ci if (!cache->features.discard_passdown) 337562306a36Sopenharmony_ci return; 337662306a36Sopenharmony_ci 337762306a36Sopenharmony_ci if (!bdev_max_discard_sectors(origin_bdev)) 337862306a36Sopenharmony_ci reason = "discard unsupported"; 337962306a36Sopenharmony_ci 338062306a36Sopenharmony_ci else if (origin_limits->max_discard_sectors < cache->sectors_per_block) 338162306a36Sopenharmony_ci reason = "max discard sectors smaller than a block"; 338262306a36Sopenharmony_ci 338362306a36Sopenharmony_ci if (reason) { 338462306a36Sopenharmony_ci DMWARN("Origin device (%pg) %s: Disabling discard passdown.", 338562306a36Sopenharmony_ci origin_bdev, reason); 338662306a36Sopenharmony_ci cache->features.discard_passdown = false; 338762306a36Sopenharmony_ci } 338862306a36Sopenharmony_ci} 338962306a36Sopenharmony_ci 339062306a36Sopenharmony_cistatic void set_discard_limits(struct cache *cache, struct queue_limits *limits) 339162306a36Sopenharmony_ci{ 339262306a36Sopenharmony_ci struct block_device *origin_bdev = cache->origin_dev->bdev; 339362306a36Sopenharmony_ci struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits; 339462306a36Sopenharmony_ci 339562306a36Sopenharmony_ci if (!cache->features.discard_passdown) { 339662306a36Sopenharmony_ci /* No passdown is done so setting own virtual limits */ 339762306a36Sopenharmony_ci limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, 339862306a36Sopenharmony_ci cache->origin_sectors); 339962306a36Sopenharmony_ci limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; 340062306a36Sopenharmony_ci return; 340162306a36Sopenharmony_ci } 340262306a36Sopenharmony_ci 340362306a36Sopenharmony_ci /* 340462306a36Sopenharmony_ci * cache_iterate_devices() is stacking both origin and fast device limits 340562306a36Sopenharmony_ci * but discards aren't passed to fast device, so inherit origin's limits. 340662306a36Sopenharmony_ci */ 340762306a36Sopenharmony_ci limits->max_discard_sectors = origin_limits->max_discard_sectors; 340862306a36Sopenharmony_ci limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors; 340962306a36Sopenharmony_ci limits->discard_granularity = origin_limits->discard_granularity; 341062306a36Sopenharmony_ci limits->discard_alignment = origin_limits->discard_alignment; 341162306a36Sopenharmony_ci limits->discard_misaligned = origin_limits->discard_misaligned; 341262306a36Sopenharmony_ci} 341362306a36Sopenharmony_ci 341462306a36Sopenharmony_cistatic void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) 341562306a36Sopenharmony_ci{ 341662306a36Sopenharmony_ci struct cache *cache = ti->private; 341762306a36Sopenharmony_ci uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 341862306a36Sopenharmony_ci 341962306a36Sopenharmony_ci /* 342062306a36Sopenharmony_ci * If the system-determined stacked limits are compatible with the 342162306a36Sopenharmony_ci * cache's blocksize (io_opt is a factor) do not override them. 342262306a36Sopenharmony_ci */ 342362306a36Sopenharmony_ci if (io_opt_sectors < cache->sectors_per_block || 342462306a36Sopenharmony_ci do_div(io_opt_sectors, cache->sectors_per_block)) { 342562306a36Sopenharmony_ci blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT); 342662306a36Sopenharmony_ci blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); 342762306a36Sopenharmony_ci } 342862306a36Sopenharmony_ci 342962306a36Sopenharmony_ci disable_passdown_if_not_supported(cache); 343062306a36Sopenharmony_ci set_discard_limits(cache, limits); 343162306a36Sopenharmony_ci} 343262306a36Sopenharmony_ci 343362306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 343462306a36Sopenharmony_ci 343562306a36Sopenharmony_cistatic struct target_type cache_target = { 343662306a36Sopenharmony_ci .name = "cache", 343762306a36Sopenharmony_ci .version = {2, 2, 0}, 343862306a36Sopenharmony_ci .module = THIS_MODULE, 343962306a36Sopenharmony_ci .ctr = cache_ctr, 344062306a36Sopenharmony_ci .dtr = cache_dtr, 344162306a36Sopenharmony_ci .map = cache_map, 344262306a36Sopenharmony_ci .end_io = cache_end_io, 344362306a36Sopenharmony_ci .postsuspend = cache_postsuspend, 344462306a36Sopenharmony_ci .preresume = cache_preresume, 344562306a36Sopenharmony_ci .resume = cache_resume, 344662306a36Sopenharmony_ci .status = cache_status, 344762306a36Sopenharmony_ci .message = cache_message, 344862306a36Sopenharmony_ci .iterate_devices = cache_iterate_devices, 344962306a36Sopenharmony_ci .io_hints = cache_io_hints, 345062306a36Sopenharmony_ci}; 345162306a36Sopenharmony_ci 345262306a36Sopenharmony_cistatic int __init dm_cache_init(void) 345362306a36Sopenharmony_ci{ 345462306a36Sopenharmony_ci int r; 345562306a36Sopenharmony_ci 345662306a36Sopenharmony_ci migration_cache = KMEM_CACHE(dm_cache_migration, 0); 345762306a36Sopenharmony_ci if (!migration_cache) 345862306a36Sopenharmony_ci return -ENOMEM; 345962306a36Sopenharmony_ci 346062306a36Sopenharmony_ci r = dm_register_target(&cache_target); 346162306a36Sopenharmony_ci if (r) { 346262306a36Sopenharmony_ci kmem_cache_destroy(migration_cache); 346362306a36Sopenharmony_ci return r; 346462306a36Sopenharmony_ci } 346562306a36Sopenharmony_ci 346662306a36Sopenharmony_ci return 0; 346762306a36Sopenharmony_ci} 346862306a36Sopenharmony_ci 346962306a36Sopenharmony_cistatic void __exit dm_cache_exit(void) 347062306a36Sopenharmony_ci{ 347162306a36Sopenharmony_ci dm_unregister_target(&cache_target); 347262306a36Sopenharmony_ci kmem_cache_destroy(migration_cache); 347362306a36Sopenharmony_ci} 347462306a36Sopenharmony_ci 347562306a36Sopenharmony_cimodule_init(dm_cache_init); 347662306a36Sopenharmony_cimodule_exit(dm_cache_exit); 347762306a36Sopenharmony_ci 347862306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " cache target"); 347962306a36Sopenharmony_ciMODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 348062306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 3481