162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2003 Sistina Software Limited. 462306a36Sopenharmony_ci * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * This file is released under the GPL. 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include "dm-bio-record.h" 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/init.h> 1262306a36Sopenharmony_ci#include <linux/mempool.h> 1362306a36Sopenharmony_ci#include <linux/module.h> 1462306a36Sopenharmony_ci#include <linux/pagemap.h> 1562306a36Sopenharmony_ci#include <linux/slab.h> 1662306a36Sopenharmony_ci#include <linux/workqueue.h> 1762306a36Sopenharmony_ci#include <linux/device-mapper.h> 1862306a36Sopenharmony_ci#include <linux/dm-io.h> 1962306a36Sopenharmony_ci#include <linux/dm-dirty-log.h> 2062306a36Sopenharmony_ci#include <linux/dm-kcopyd.h> 2162306a36Sopenharmony_ci#include <linux/dm-region-hash.h> 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_cistatic struct workqueue_struct *dm_raid1_wq; 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#define DM_MSG_PREFIX "raid1" 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci#define MAX_NR_MIRRORS (DM_KCOPYD_MAX_REGIONS + 1) 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#define DM_RAID1_HANDLE_ERRORS 0x01 3262306a36Sopenharmony_ci#define DM_RAID1_KEEP_LOG 0x02 3362306a36Sopenharmony_ci#define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) 3462306a36Sopenharmony_ci#define keep_log(p) ((p)->features & DM_RAID1_KEEP_LOG) 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_cistatic DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped); 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci/* 3962306a36Sopenharmony_ci *--------------------------------------------------------------- 4062306a36Sopenharmony_ci * Mirror set structures. 4162306a36Sopenharmony_ci *--------------------------------------------------------------- 4262306a36Sopenharmony_ci */ 4362306a36Sopenharmony_cienum dm_raid1_error { 4462306a36Sopenharmony_ci DM_RAID1_WRITE_ERROR, 4562306a36Sopenharmony_ci DM_RAID1_FLUSH_ERROR, 4662306a36Sopenharmony_ci DM_RAID1_SYNC_ERROR, 4762306a36Sopenharmony_ci DM_RAID1_READ_ERROR 4862306a36Sopenharmony_ci}; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_cistruct mirror { 5162306a36Sopenharmony_ci struct mirror_set *ms; 5262306a36Sopenharmony_ci atomic_t error_count; 5362306a36Sopenharmony_ci unsigned long error_type; 5462306a36Sopenharmony_ci struct dm_dev *dev; 5562306a36Sopenharmony_ci sector_t offset; 5662306a36Sopenharmony_ci}; 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_cistruct mirror_set { 5962306a36Sopenharmony_ci struct dm_target *ti; 6062306a36Sopenharmony_ci struct list_head list; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci uint64_t features; 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci spinlock_t lock; /* protects the lists */ 6562306a36Sopenharmony_ci struct bio_list reads; 6662306a36Sopenharmony_ci struct bio_list writes; 6762306a36Sopenharmony_ci struct bio_list failures; 6862306a36Sopenharmony_ci struct bio_list holds; /* bios are waiting until suspend */ 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci struct dm_region_hash *rh; 7162306a36Sopenharmony_ci struct dm_kcopyd_client *kcopyd_client; 7262306a36Sopenharmony_ci struct dm_io_client *io_client; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci /* recovery */ 7562306a36Sopenharmony_ci region_t nr_regions; 7662306a36Sopenharmony_ci int in_sync; 7762306a36Sopenharmony_ci int log_failure; 7862306a36Sopenharmony_ci int leg_failure; 7962306a36Sopenharmony_ci atomic_t suspend; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci atomic_t default_mirror; /* Default mirror */ 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci struct workqueue_struct *kmirrord_wq; 8462306a36Sopenharmony_ci struct work_struct kmirrord_work; 8562306a36Sopenharmony_ci struct timer_list timer; 8662306a36Sopenharmony_ci unsigned long timer_pending; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci struct work_struct trigger_event; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci unsigned int nr_mirrors; 9162306a36Sopenharmony_ci struct mirror mirror[]; 9262306a36Sopenharmony_ci}; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ciDECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(raid1_resync_throttle, 9562306a36Sopenharmony_ci "A percentage of time allocated for raid resynchronization"); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_cistatic void wakeup_mirrord(void *context) 9862306a36Sopenharmony_ci{ 9962306a36Sopenharmony_ci struct mirror_set *ms = context; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci queue_work(ms->kmirrord_wq, &ms->kmirrord_work); 10262306a36Sopenharmony_ci} 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_cistatic void delayed_wake_fn(struct timer_list *t) 10562306a36Sopenharmony_ci{ 10662306a36Sopenharmony_ci struct mirror_set *ms = from_timer(ms, t, timer); 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci clear_bit(0, &ms->timer_pending); 10962306a36Sopenharmony_ci wakeup_mirrord(ms); 11062306a36Sopenharmony_ci} 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_cistatic void delayed_wake(struct mirror_set *ms) 11362306a36Sopenharmony_ci{ 11462306a36Sopenharmony_ci if (test_and_set_bit(0, &ms->timer_pending)) 11562306a36Sopenharmony_ci return; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci ms->timer.expires = jiffies + HZ / 5; 11862306a36Sopenharmony_ci add_timer(&ms->timer); 11962306a36Sopenharmony_ci} 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_cistatic void wakeup_all_recovery_waiters(void *context) 12262306a36Sopenharmony_ci{ 12362306a36Sopenharmony_ci wake_up_all(&_kmirrord_recovery_stopped); 12462306a36Sopenharmony_ci} 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_cistatic void queue_bio(struct mirror_set *ms, struct bio *bio, int rw) 12762306a36Sopenharmony_ci{ 12862306a36Sopenharmony_ci unsigned long flags; 12962306a36Sopenharmony_ci int should_wake = 0; 13062306a36Sopenharmony_ci struct bio_list *bl; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci bl = (rw == WRITE) ? &ms->writes : &ms->reads; 13362306a36Sopenharmony_ci spin_lock_irqsave(&ms->lock, flags); 13462306a36Sopenharmony_ci should_wake = !(bl->head); 13562306a36Sopenharmony_ci bio_list_add(bl, bio); 13662306a36Sopenharmony_ci spin_unlock_irqrestore(&ms->lock, flags); 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci if (should_wake) 13962306a36Sopenharmony_ci wakeup_mirrord(ms); 14062306a36Sopenharmony_ci} 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_cistatic void dispatch_bios(void *context, struct bio_list *bio_list) 14362306a36Sopenharmony_ci{ 14462306a36Sopenharmony_ci struct mirror_set *ms = context; 14562306a36Sopenharmony_ci struct bio *bio; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci while ((bio = bio_list_pop(bio_list))) 14862306a36Sopenharmony_ci queue_bio(ms, bio, WRITE); 14962306a36Sopenharmony_ci} 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_cistruct dm_raid1_bio_record { 15262306a36Sopenharmony_ci struct mirror *m; 15362306a36Sopenharmony_ci /* if details->bi_bdev == NULL, details were not saved */ 15462306a36Sopenharmony_ci struct dm_bio_details details; 15562306a36Sopenharmony_ci region_t write_region; 15662306a36Sopenharmony_ci}; 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci/* 15962306a36Sopenharmony_ci * Every mirror should look like this one. 16062306a36Sopenharmony_ci */ 16162306a36Sopenharmony_ci#define DEFAULT_MIRROR 0 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci/* 16462306a36Sopenharmony_ci * This is yucky. We squirrel the mirror struct away inside 16562306a36Sopenharmony_ci * bi_next for read/write buffers. This is safe since the bh 16662306a36Sopenharmony_ci * doesn't get submitted to the lower levels of block layer. 16762306a36Sopenharmony_ci */ 16862306a36Sopenharmony_cistatic struct mirror *bio_get_m(struct bio *bio) 16962306a36Sopenharmony_ci{ 17062306a36Sopenharmony_ci return (struct mirror *) bio->bi_next; 17162306a36Sopenharmony_ci} 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_cistatic void bio_set_m(struct bio *bio, struct mirror *m) 17462306a36Sopenharmony_ci{ 17562306a36Sopenharmony_ci bio->bi_next = (struct bio *) m; 17662306a36Sopenharmony_ci} 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_cistatic struct mirror *get_default_mirror(struct mirror_set *ms) 17962306a36Sopenharmony_ci{ 18062306a36Sopenharmony_ci return &ms->mirror[atomic_read(&ms->default_mirror)]; 18162306a36Sopenharmony_ci} 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_cistatic void set_default_mirror(struct mirror *m) 18462306a36Sopenharmony_ci{ 18562306a36Sopenharmony_ci struct mirror_set *ms = m->ms; 18662306a36Sopenharmony_ci struct mirror *m0 = &(ms->mirror[0]); 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci atomic_set(&ms->default_mirror, m - m0); 18962306a36Sopenharmony_ci} 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_cistatic struct mirror *get_valid_mirror(struct mirror_set *ms) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci struct mirror *m; 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci for (m = ms->mirror; m < ms->mirror + ms->nr_mirrors; m++) 19662306a36Sopenharmony_ci if (!atomic_read(&m->error_count)) 19762306a36Sopenharmony_ci return m; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci return NULL; 20062306a36Sopenharmony_ci} 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci/* fail_mirror 20362306a36Sopenharmony_ci * @m: mirror device to fail 20462306a36Sopenharmony_ci * @error_type: one of the enum's, DM_RAID1_*_ERROR 20562306a36Sopenharmony_ci * 20662306a36Sopenharmony_ci * If errors are being handled, record the type of 20762306a36Sopenharmony_ci * error encountered for this device. If this type 20862306a36Sopenharmony_ci * of error has already been recorded, we can return; 20962306a36Sopenharmony_ci * otherwise, we must signal userspace by triggering 21062306a36Sopenharmony_ci * an event. Additionally, if the device is the 21162306a36Sopenharmony_ci * primary device, we must choose a new primary, but 21262306a36Sopenharmony_ci * only if the mirror is in-sync. 21362306a36Sopenharmony_ci * 21462306a36Sopenharmony_ci * This function must not block. 21562306a36Sopenharmony_ci */ 21662306a36Sopenharmony_cistatic void fail_mirror(struct mirror *m, enum dm_raid1_error error_type) 21762306a36Sopenharmony_ci{ 21862306a36Sopenharmony_ci struct mirror_set *ms = m->ms; 21962306a36Sopenharmony_ci struct mirror *new; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci ms->leg_failure = 1; 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci /* 22462306a36Sopenharmony_ci * error_count is used for nothing more than a 22562306a36Sopenharmony_ci * simple way to tell if a device has encountered 22662306a36Sopenharmony_ci * errors. 22762306a36Sopenharmony_ci */ 22862306a36Sopenharmony_ci atomic_inc(&m->error_count); 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci if (test_and_set_bit(error_type, &m->error_type)) 23162306a36Sopenharmony_ci return; 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci if (!errors_handled(ms)) 23462306a36Sopenharmony_ci return; 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci if (m != get_default_mirror(ms)) 23762306a36Sopenharmony_ci goto out; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci if (!ms->in_sync && !keep_log(ms)) { 24062306a36Sopenharmony_ci /* 24162306a36Sopenharmony_ci * Better to issue requests to same failing device 24262306a36Sopenharmony_ci * than to risk returning corrupt data. 24362306a36Sopenharmony_ci */ 24462306a36Sopenharmony_ci DMERR("Primary mirror (%s) failed while out-of-sync: Reads may fail.", 24562306a36Sopenharmony_ci m->dev->name); 24662306a36Sopenharmony_ci goto out; 24762306a36Sopenharmony_ci } 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci new = get_valid_mirror(ms); 25062306a36Sopenharmony_ci if (new) 25162306a36Sopenharmony_ci set_default_mirror(new); 25262306a36Sopenharmony_ci else 25362306a36Sopenharmony_ci DMWARN("All sides of mirror have failed."); 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ciout: 25662306a36Sopenharmony_ci queue_work(dm_raid1_wq, &ms->trigger_event); 25762306a36Sopenharmony_ci} 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_cistatic int mirror_flush(struct dm_target *ti) 26062306a36Sopenharmony_ci{ 26162306a36Sopenharmony_ci struct mirror_set *ms = ti->private; 26262306a36Sopenharmony_ci unsigned long error_bits; 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci unsigned int i; 26562306a36Sopenharmony_ci struct dm_io_region io[MAX_NR_MIRRORS]; 26662306a36Sopenharmony_ci struct mirror *m; 26762306a36Sopenharmony_ci struct dm_io_request io_req = { 26862306a36Sopenharmony_ci .bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, 26962306a36Sopenharmony_ci .mem.type = DM_IO_KMEM, 27062306a36Sopenharmony_ci .mem.ptr.addr = NULL, 27162306a36Sopenharmony_ci .client = ms->io_client, 27262306a36Sopenharmony_ci }; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) { 27562306a36Sopenharmony_ci io[i].bdev = m->dev->bdev; 27662306a36Sopenharmony_ci io[i].sector = 0; 27762306a36Sopenharmony_ci io[i].count = 0; 27862306a36Sopenharmony_ci } 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci error_bits = -1; 28162306a36Sopenharmony_ci dm_io(&io_req, ms->nr_mirrors, io, &error_bits, IOPRIO_DEFAULT); 28262306a36Sopenharmony_ci if (unlikely(error_bits != 0)) { 28362306a36Sopenharmony_ci for (i = 0; i < ms->nr_mirrors; i++) 28462306a36Sopenharmony_ci if (test_bit(i, &error_bits)) 28562306a36Sopenharmony_ci fail_mirror(ms->mirror + i, 28662306a36Sopenharmony_ci DM_RAID1_FLUSH_ERROR); 28762306a36Sopenharmony_ci return -EIO; 28862306a36Sopenharmony_ci } 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci return 0; 29162306a36Sopenharmony_ci} 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci/* 29462306a36Sopenharmony_ci *--------------------------------------------------------------- 29562306a36Sopenharmony_ci * Recovery. 29662306a36Sopenharmony_ci * 29762306a36Sopenharmony_ci * When a mirror is first activated we may find that some regions 29862306a36Sopenharmony_ci * are in the no-sync state. We have to recover these by 29962306a36Sopenharmony_ci * recopying from the default mirror to all the others. 30062306a36Sopenharmony_ci *--------------------------------------------------------------- 30162306a36Sopenharmony_ci */ 30262306a36Sopenharmony_cistatic void recovery_complete(int read_err, unsigned long write_err, 30362306a36Sopenharmony_ci void *context) 30462306a36Sopenharmony_ci{ 30562306a36Sopenharmony_ci struct dm_region *reg = context; 30662306a36Sopenharmony_ci struct mirror_set *ms = dm_rh_region_context(reg); 30762306a36Sopenharmony_ci int m, bit = 0; 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci if (read_err) { 31062306a36Sopenharmony_ci /* Read error means the failure of default mirror. */ 31162306a36Sopenharmony_ci DMERR_LIMIT("Unable to read primary mirror during recovery"); 31262306a36Sopenharmony_ci fail_mirror(get_default_mirror(ms), DM_RAID1_SYNC_ERROR); 31362306a36Sopenharmony_ci } 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci if (write_err) { 31662306a36Sopenharmony_ci DMERR_LIMIT("Write error during recovery (error = 0x%lx)", 31762306a36Sopenharmony_ci write_err); 31862306a36Sopenharmony_ci /* 31962306a36Sopenharmony_ci * Bits correspond to devices (excluding default mirror). 32062306a36Sopenharmony_ci * The default mirror cannot change during recovery. 32162306a36Sopenharmony_ci */ 32262306a36Sopenharmony_ci for (m = 0; m < ms->nr_mirrors; m++) { 32362306a36Sopenharmony_ci if (&ms->mirror[m] == get_default_mirror(ms)) 32462306a36Sopenharmony_ci continue; 32562306a36Sopenharmony_ci if (test_bit(bit, &write_err)) 32662306a36Sopenharmony_ci fail_mirror(ms->mirror + m, 32762306a36Sopenharmony_ci DM_RAID1_SYNC_ERROR); 32862306a36Sopenharmony_ci bit++; 32962306a36Sopenharmony_ci } 33062306a36Sopenharmony_ci } 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci dm_rh_recovery_end(reg, !(read_err || write_err)); 33362306a36Sopenharmony_ci} 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_cistatic void recover(struct mirror_set *ms, struct dm_region *reg) 33662306a36Sopenharmony_ci{ 33762306a36Sopenharmony_ci unsigned int i; 33862306a36Sopenharmony_ci struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest; 33962306a36Sopenharmony_ci struct mirror *m; 34062306a36Sopenharmony_ci unsigned long flags = 0; 34162306a36Sopenharmony_ci region_t key = dm_rh_get_region_key(reg); 34262306a36Sopenharmony_ci sector_t region_size = dm_rh_get_region_size(ms->rh); 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci /* fill in the source */ 34562306a36Sopenharmony_ci m = get_default_mirror(ms); 34662306a36Sopenharmony_ci from.bdev = m->dev->bdev; 34762306a36Sopenharmony_ci from.sector = m->offset + dm_rh_region_to_sector(ms->rh, key); 34862306a36Sopenharmony_ci if (key == (ms->nr_regions - 1)) { 34962306a36Sopenharmony_ci /* 35062306a36Sopenharmony_ci * The final region may be smaller than 35162306a36Sopenharmony_ci * region_size. 35262306a36Sopenharmony_ci */ 35362306a36Sopenharmony_ci from.count = ms->ti->len & (region_size - 1); 35462306a36Sopenharmony_ci if (!from.count) 35562306a36Sopenharmony_ci from.count = region_size; 35662306a36Sopenharmony_ci } else 35762306a36Sopenharmony_ci from.count = region_size; 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci /* fill in the destinations */ 36062306a36Sopenharmony_ci for (i = 0, dest = to; i < ms->nr_mirrors; i++) { 36162306a36Sopenharmony_ci if (&ms->mirror[i] == get_default_mirror(ms)) 36262306a36Sopenharmony_ci continue; 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci m = ms->mirror + i; 36562306a36Sopenharmony_ci dest->bdev = m->dev->bdev; 36662306a36Sopenharmony_ci dest->sector = m->offset + dm_rh_region_to_sector(ms->rh, key); 36762306a36Sopenharmony_ci dest->count = from.count; 36862306a36Sopenharmony_ci dest++; 36962306a36Sopenharmony_ci } 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci /* hand to kcopyd */ 37262306a36Sopenharmony_ci if (!errors_handled(ms)) 37362306a36Sopenharmony_ci flags |= BIT(DM_KCOPYD_IGNORE_ERROR); 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, 37662306a36Sopenharmony_ci flags, recovery_complete, reg); 37762306a36Sopenharmony_ci} 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_cistatic void reset_ms_flags(struct mirror_set *ms) 38062306a36Sopenharmony_ci{ 38162306a36Sopenharmony_ci unsigned int m; 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci ms->leg_failure = 0; 38462306a36Sopenharmony_ci for (m = 0; m < ms->nr_mirrors; m++) { 38562306a36Sopenharmony_ci atomic_set(&(ms->mirror[m].error_count), 0); 38662306a36Sopenharmony_ci ms->mirror[m].error_type = 0; 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci} 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_cistatic void do_recovery(struct mirror_set *ms) 39162306a36Sopenharmony_ci{ 39262306a36Sopenharmony_ci struct dm_region *reg; 39362306a36Sopenharmony_ci struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci /* 39662306a36Sopenharmony_ci * Start quiescing some regions. 39762306a36Sopenharmony_ci */ 39862306a36Sopenharmony_ci dm_rh_recovery_prepare(ms->rh); 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci /* 40162306a36Sopenharmony_ci * Copy any already quiesced regions. 40262306a36Sopenharmony_ci */ 40362306a36Sopenharmony_ci while ((reg = dm_rh_recovery_start(ms->rh))) 40462306a36Sopenharmony_ci recover(ms, reg); 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci /* 40762306a36Sopenharmony_ci * Update the in sync flag. 40862306a36Sopenharmony_ci */ 40962306a36Sopenharmony_ci if (!ms->in_sync && 41062306a36Sopenharmony_ci (log->type->get_sync_count(log) == ms->nr_regions)) { 41162306a36Sopenharmony_ci /* the sync is complete */ 41262306a36Sopenharmony_ci dm_table_event(ms->ti->table); 41362306a36Sopenharmony_ci ms->in_sync = 1; 41462306a36Sopenharmony_ci reset_ms_flags(ms); 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci} 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci/* 41962306a36Sopenharmony_ci *--------------------------------------------------------------- 42062306a36Sopenharmony_ci * Reads 42162306a36Sopenharmony_ci *--------------------------------------------------------------- 42262306a36Sopenharmony_ci */ 42362306a36Sopenharmony_cistatic struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector) 42462306a36Sopenharmony_ci{ 42562306a36Sopenharmony_ci struct mirror *m = get_default_mirror(ms); 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci do { 42862306a36Sopenharmony_ci if (likely(!atomic_read(&m->error_count))) 42962306a36Sopenharmony_ci return m; 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci if (m-- == ms->mirror) 43262306a36Sopenharmony_ci m += ms->nr_mirrors; 43362306a36Sopenharmony_ci } while (m != get_default_mirror(ms)); 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci return NULL; 43662306a36Sopenharmony_ci} 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_cistatic int default_ok(struct mirror *m) 43962306a36Sopenharmony_ci{ 44062306a36Sopenharmony_ci struct mirror *default_mirror = get_default_mirror(m->ms); 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci return !atomic_read(&default_mirror->error_count); 44362306a36Sopenharmony_ci} 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_cistatic int mirror_available(struct mirror_set *ms, struct bio *bio) 44662306a36Sopenharmony_ci{ 44762306a36Sopenharmony_ci struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); 44862306a36Sopenharmony_ci region_t region = dm_rh_bio_to_region(ms->rh, bio); 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci if (log->type->in_sync(log, region, 0)) 45162306a36Sopenharmony_ci return choose_mirror(ms, bio->bi_iter.bi_sector) ? 1 : 0; 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci return 0; 45462306a36Sopenharmony_ci} 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci/* 45762306a36Sopenharmony_ci * remap a buffer to a particular mirror. 45862306a36Sopenharmony_ci */ 45962306a36Sopenharmony_cistatic sector_t map_sector(struct mirror *m, struct bio *bio) 46062306a36Sopenharmony_ci{ 46162306a36Sopenharmony_ci if (unlikely(!bio->bi_iter.bi_size)) 46262306a36Sopenharmony_ci return 0; 46362306a36Sopenharmony_ci return m->offset + dm_target_offset(m->ms->ti, bio->bi_iter.bi_sector); 46462306a36Sopenharmony_ci} 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_cistatic void map_bio(struct mirror *m, struct bio *bio) 46762306a36Sopenharmony_ci{ 46862306a36Sopenharmony_ci bio_set_dev(bio, m->dev->bdev); 46962306a36Sopenharmony_ci bio->bi_iter.bi_sector = map_sector(m, bio); 47062306a36Sopenharmony_ci} 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_cistatic void map_region(struct dm_io_region *io, struct mirror *m, 47362306a36Sopenharmony_ci struct bio *bio) 47462306a36Sopenharmony_ci{ 47562306a36Sopenharmony_ci io->bdev = m->dev->bdev; 47662306a36Sopenharmony_ci io->sector = map_sector(m, bio); 47762306a36Sopenharmony_ci io->count = bio_sectors(bio); 47862306a36Sopenharmony_ci} 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_cistatic void hold_bio(struct mirror_set *ms, struct bio *bio) 48162306a36Sopenharmony_ci{ 48262306a36Sopenharmony_ci /* 48362306a36Sopenharmony_ci * Lock is required to avoid race condition during suspend 48462306a36Sopenharmony_ci * process. 48562306a36Sopenharmony_ci */ 48662306a36Sopenharmony_ci spin_lock_irq(&ms->lock); 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci if (atomic_read(&ms->suspend)) { 48962306a36Sopenharmony_ci spin_unlock_irq(&ms->lock); 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci /* 49262306a36Sopenharmony_ci * If device is suspended, complete the bio. 49362306a36Sopenharmony_ci */ 49462306a36Sopenharmony_ci if (dm_noflush_suspending(ms->ti)) 49562306a36Sopenharmony_ci bio->bi_status = BLK_STS_DM_REQUEUE; 49662306a36Sopenharmony_ci else 49762306a36Sopenharmony_ci bio->bi_status = BLK_STS_IOERR; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci bio_endio(bio); 50062306a36Sopenharmony_ci return; 50162306a36Sopenharmony_ci } 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci /* 50462306a36Sopenharmony_ci * Hold bio until the suspend is complete. 50562306a36Sopenharmony_ci */ 50662306a36Sopenharmony_ci bio_list_add(&ms->holds, bio); 50762306a36Sopenharmony_ci spin_unlock_irq(&ms->lock); 50862306a36Sopenharmony_ci} 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci/* 51162306a36Sopenharmony_ci *--------------------------------------------------------------- 51262306a36Sopenharmony_ci * Reads 51362306a36Sopenharmony_ci *--------------------------------------------------------------- 51462306a36Sopenharmony_ci */ 51562306a36Sopenharmony_cistatic void read_callback(unsigned long error, void *context) 51662306a36Sopenharmony_ci{ 51762306a36Sopenharmony_ci struct bio *bio = context; 51862306a36Sopenharmony_ci struct mirror *m; 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci m = bio_get_m(bio); 52162306a36Sopenharmony_ci bio_set_m(bio, NULL); 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci if (likely(!error)) { 52462306a36Sopenharmony_ci bio_endio(bio); 52562306a36Sopenharmony_ci return; 52662306a36Sopenharmony_ci } 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci fail_mirror(m, DM_RAID1_READ_ERROR); 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci if (likely(default_ok(m)) || mirror_available(m->ms, bio)) { 53162306a36Sopenharmony_ci DMWARN_LIMIT("Read failure on mirror device %s. Trying alternative device.", 53262306a36Sopenharmony_ci m->dev->name); 53362306a36Sopenharmony_ci queue_bio(m->ms, bio, bio_data_dir(bio)); 53462306a36Sopenharmony_ci return; 53562306a36Sopenharmony_ci } 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci DMERR_LIMIT("Read failure on mirror device %s. Failing I/O.", 53862306a36Sopenharmony_ci m->dev->name); 53962306a36Sopenharmony_ci bio_io_error(bio); 54062306a36Sopenharmony_ci} 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci/* Asynchronous read. */ 54362306a36Sopenharmony_cistatic void read_async_bio(struct mirror *m, struct bio *bio) 54462306a36Sopenharmony_ci{ 54562306a36Sopenharmony_ci struct dm_io_region io; 54662306a36Sopenharmony_ci struct dm_io_request io_req = { 54762306a36Sopenharmony_ci .bi_opf = REQ_OP_READ, 54862306a36Sopenharmony_ci .mem.type = DM_IO_BIO, 54962306a36Sopenharmony_ci .mem.ptr.bio = bio, 55062306a36Sopenharmony_ci .notify.fn = read_callback, 55162306a36Sopenharmony_ci .notify.context = bio, 55262306a36Sopenharmony_ci .client = m->ms->io_client, 55362306a36Sopenharmony_ci }; 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci map_region(&io, m, bio); 55662306a36Sopenharmony_ci bio_set_m(bio, m); 55762306a36Sopenharmony_ci BUG_ON(dm_io(&io_req, 1, &io, NULL, IOPRIO_DEFAULT)); 55862306a36Sopenharmony_ci} 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_cistatic inline int region_in_sync(struct mirror_set *ms, region_t region, 56162306a36Sopenharmony_ci int may_block) 56262306a36Sopenharmony_ci{ 56362306a36Sopenharmony_ci int state = dm_rh_get_state(ms->rh, region, may_block); 56462306a36Sopenharmony_ci return state == DM_RH_CLEAN || state == DM_RH_DIRTY; 56562306a36Sopenharmony_ci} 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_cistatic void do_reads(struct mirror_set *ms, struct bio_list *reads) 56862306a36Sopenharmony_ci{ 56962306a36Sopenharmony_ci region_t region; 57062306a36Sopenharmony_ci struct bio *bio; 57162306a36Sopenharmony_ci struct mirror *m; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci while ((bio = bio_list_pop(reads))) { 57462306a36Sopenharmony_ci region = dm_rh_bio_to_region(ms->rh, bio); 57562306a36Sopenharmony_ci m = get_default_mirror(ms); 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci /* 57862306a36Sopenharmony_ci * We can only read balance if the region is in sync. 57962306a36Sopenharmony_ci */ 58062306a36Sopenharmony_ci if (likely(region_in_sync(ms, region, 1))) 58162306a36Sopenharmony_ci m = choose_mirror(ms, bio->bi_iter.bi_sector); 58262306a36Sopenharmony_ci else if (m && atomic_read(&m->error_count)) 58362306a36Sopenharmony_ci m = NULL; 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci if (likely(m)) 58662306a36Sopenharmony_ci read_async_bio(m, bio); 58762306a36Sopenharmony_ci else 58862306a36Sopenharmony_ci bio_io_error(bio); 58962306a36Sopenharmony_ci } 59062306a36Sopenharmony_ci} 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci/* 59362306a36Sopenharmony_ci *--------------------------------------------------------------------- 59462306a36Sopenharmony_ci * Writes. 59562306a36Sopenharmony_ci * 59662306a36Sopenharmony_ci * We do different things with the write io depending on the 59762306a36Sopenharmony_ci * state of the region that it's in: 59862306a36Sopenharmony_ci * 59962306a36Sopenharmony_ci * SYNC: increment pending, use kcopyd to write to *all* mirrors 60062306a36Sopenharmony_ci * RECOVERING: delay the io until recovery completes 60162306a36Sopenharmony_ci * NOSYNC: increment pending, just write to the default mirror 60262306a36Sopenharmony_ci *--------------------------------------------------------------------- 60362306a36Sopenharmony_ci */ 60462306a36Sopenharmony_cistatic void write_callback(unsigned long error, void *context) 60562306a36Sopenharmony_ci{ 60662306a36Sopenharmony_ci unsigned int i; 60762306a36Sopenharmony_ci struct bio *bio = context; 60862306a36Sopenharmony_ci struct mirror_set *ms; 60962306a36Sopenharmony_ci int should_wake = 0; 61062306a36Sopenharmony_ci unsigned long flags; 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci ms = bio_get_m(bio)->ms; 61362306a36Sopenharmony_ci bio_set_m(bio, NULL); 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci /* 61662306a36Sopenharmony_ci * NOTE: We don't decrement the pending count here, 61762306a36Sopenharmony_ci * instead it is done by the targets endio function. 61862306a36Sopenharmony_ci * This way we handle both writes to SYNC and NOSYNC 61962306a36Sopenharmony_ci * regions with the same code. 62062306a36Sopenharmony_ci */ 62162306a36Sopenharmony_ci if (likely(!error)) { 62262306a36Sopenharmony_ci bio_endio(bio); 62362306a36Sopenharmony_ci return; 62462306a36Sopenharmony_ci } 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci /* 62762306a36Sopenharmony_ci * If the bio is discard, return an error, but do not 62862306a36Sopenharmony_ci * degrade the array. 62962306a36Sopenharmony_ci */ 63062306a36Sopenharmony_ci if (bio_op(bio) == REQ_OP_DISCARD) { 63162306a36Sopenharmony_ci bio->bi_status = BLK_STS_NOTSUPP; 63262306a36Sopenharmony_ci bio_endio(bio); 63362306a36Sopenharmony_ci return; 63462306a36Sopenharmony_ci } 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci for (i = 0; i < ms->nr_mirrors; i++) 63762306a36Sopenharmony_ci if (test_bit(i, &error)) 63862306a36Sopenharmony_ci fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR); 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci /* 64162306a36Sopenharmony_ci * Need to raise event. Since raising 64262306a36Sopenharmony_ci * events can block, we need to do it in 64362306a36Sopenharmony_ci * the main thread. 64462306a36Sopenharmony_ci */ 64562306a36Sopenharmony_ci spin_lock_irqsave(&ms->lock, flags); 64662306a36Sopenharmony_ci if (!ms->failures.head) 64762306a36Sopenharmony_ci should_wake = 1; 64862306a36Sopenharmony_ci bio_list_add(&ms->failures, bio); 64962306a36Sopenharmony_ci spin_unlock_irqrestore(&ms->lock, flags); 65062306a36Sopenharmony_ci if (should_wake) 65162306a36Sopenharmony_ci wakeup_mirrord(ms); 65262306a36Sopenharmony_ci} 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_cistatic void do_write(struct mirror_set *ms, struct bio *bio) 65562306a36Sopenharmony_ci{ 65662306a36Sopenharmony_ci unsigned int i; 65762306a36Sopenharmony_ci struct dm_io_region io[MAX_NR_MIRRORS], *dest = io; 65862306a36Sopenharmony_ci struct mirror *m; 65962306a36Sopenharmony_ci blk_opf_t op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH); 66062306a36Sopenharmony_ci struct dm_io_request io_req = { 66162306a36Sopenharmony_ci .bi_opf = REQ_OP_WRITE | op_flags, 66262306a36Sopenharmony_ci .mem.type = DM_IO_BIO, 66362306a36Sopenharmony_ci .mem.ptr.bio = bio, 66462306a36Sopenharmony_ci .notify.fn = write_callback, 66562306a36Sopenharmony_ci .notify.context = bio, 66662306a36Sopenharmony_ci .client = ms->io_client, 66762306a36Sopenharmony_ci }; 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci if (bio_op(bio) == REQ_OP_DISCARD) { 67062306a36Sopenharmony_ci io_req.bi_opf = REQ_OP_DISCARD | op_flags; 67162306a36Sopenharmony_ci io_req.mem.type = DM_IO_KMEM; 67262306a36Sopenharmony_ci io_req.mem.ptr.addr = NULL; 67362306a36Sopenharmony_ci } 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) 67662306a36Sopenharmony_ci map_region(dest++, m, bio); 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci /* 67962306a36Sopenharmony_ci * Use default mirror because we only need it to retrieve the reference 68062306a36Sopenharmony_ci * to the mirror set in write_callback(). 68162306a36Sopenharmony_ci */ 68262306a36Sopenharmony_ci bio_set_m(bio, get_default_mirror(ms)); 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL, IOPRIO_DEFAULT)); 68562306a36Sopenharmony_ci} 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_cistatic void do_writes(struct mirror_set *ms, struct bio_list *writes) 68862306a36Sopenharmony_ci{ 68962306a36Sopenharmony_ci int state; 69062306a36Sopenharmony_ci struct bio *bio; 69162306a36Sopenharmony_ci struct bio_list sync, nosync, recover, *this_list = NULL; 69262306a36Sopenharmony_ci struct bio_list requeue; 69362306a36Sopenharmony_ci struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); 69462306a36Sopenharmony_ci region_t region; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci if (!writes->head) 69762306a36Sopenharmony_ci return; 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci /* 70062306a36Sopenharmony_ci * Classify each write. 70162306a36Sopenharmony_ci */ 70262306a36Sopenharmony_ci bio_list_init(&sync); 70362306a36Sopenharmony_ci bio_list_init(&nosync); 70462306a36Sopenharmony_ci bio_list_init(&recover); 70562306a36Sopenharmony_ci bio_list_init(&requeue); 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci while ((bio = bio_list_pop(writes))) { 70862306a36Sopenharmony_ci if ((bio->bi_opf & REQ_PREFLUSH) || 70962306a36Sopenharmony_ci (bio_op(bio) == REQ_OP_DISCARD)) { 71062306a36Sopenharmony_ci bio_list_add(&sync, bio); 71162306a36Sopenharmony_ci continue; 71262306a36Sopenharmony_ci } 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci region = dm_rh_bio_to_region(ms->rh, bio); 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci if (log->type->is_remote_recovering && 71762306a36Sopenharmony_ci log->type->is_remote_recovering(log, region)) { 71862306a36Sopenharmony_ci bio_list_add(&requeue, bio); 71962306a36Sopenharmony_ci continue; 72062306a36Sopenharmony_ci } 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci state = dm_rh_get_state(ms->rh, region, 1); 72362306a36Sopenharmony_ci switch (state) { 72462306a36Sopenharmony_ci case DM_RH_CLEAN: 72562306a36Sopenharmony_ci case DM_RH_DIRTY: 72662306a36Sopenharmony_ci this_list = &sync; 72762306a36Sopenharmony_ci break; 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci case DM_RH_NOSYNC: 73062306a36Sopenharmony_ci this_list = &nosync; 73162306a36Sopenharmony_ci break; 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci case DM_RH_RECOVERING: 73462306a36Sopenharmony_ci this_list = &recover; 73562306a36Sopenharmony_ci break; 73662306a36Sopenharmony_ci } 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci bio_list_add(this_list, bio); 73962306a36Sopenharmony_ci } 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci /* 74262306a36Sopenharmony_ci * Add bios that are delayed due to remote recovery 74362306a36Sopenharmony_ci * back on to the write queue 74462306a36Sopenharmony_ci */ 74562306a36Sopenharmony_ci if (unlikely(requeue.head)) { 74662306a36Sopenharmony_ci spin_lock_irq(&ms->lock); 74762306a36Sopenharmony_ci bio_list_merge(&ms->writes, &requeue); 74862306a36Sopenharmony_ci spin_unlock_irq(&ms->lock); 74962306a36Sopenharmony_ci delayed_wake(ms); 75062306a36Sopenharmony_ci } 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci /* 75362306a36Sopenharmony_ci * Increment the pending counts for any regions that will 75462306a36Sopenharmony_ci * be written to (writes to recover regions are going to 75562306a36Sopenharmony_ci * be delayed). 75662306a36Sopenharmony_ci */ 75762306a36Sopenharmony_ci dm_rh_inc_pending(ms->rh, &sync); 75862306a36Sopenharmony_ci dm_rh_inc_pending(ms->rh, &nosync); 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ci /* 76162306a36Sopenharmony_ci * If the flush fails on a previous call and succeeds here, 76262306a36Sopenharmony_ci * we must not reset the log_failure variable. We need 76362306a36Sopenharmony_ci * userspace interaction to do that. 76462306a36Sopenharmony_ci */ 76562306a36Sopenharmony_ci ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure; 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci /* 76862306a36Sopenharmony_ci * Dispatch io. 76962306a36Sopenharmony_ci */ 77062306a36Sopenharmony_ci if (unlikely(ms->log_failure) && errors_handled(ms)) { 77162306a36Sopenharmony_ci spin_lock_irq(&ms->lock); 77262306a36Sopenharmony_ci bio_list_merge(&ms->failures, &sync); 77362306a36Sopenharmony_ci spin_unlock_irq(&ms->lock); 77462306a36Sopenharmony_ci wakeup_mirrord(ms); 77562306a36Sopenharmony_ci } else 77662306a36Sopenharmony_ci while ((bio = bio_list_pop(&sync))) 77762306a36Sopenharmony_ci do_write(ms, bio); 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci while ((bio = bio_list_pop(&recover))) 78062306a36Sopenharmony_ci dm_rh_delay(ms->rh, bio); 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_ci while ((bio = bio_list_pop(&nosync))) { 78362306a36Sopenharmony_ci if (unlikely(ms->leg_failure) && errors_handled(ms) && !keep_log(ms)) { 78462306a36Sopenharmony_ci spin_lock_irq(&ms->lock); 78562306a36Sopenharmony_ci bio_list_add(&ms->failures, bio); 78662306a36Sopenharmony_ci spin_unlock_irq(&ms->lock); 78762306a36Sopenharmony_ci wakeup_mirrord(ms); 78862306a36Sopenharmony_ci } else { 78962306a36Sopenharmony_ci map_bio(get_default_mirror(ms), bio); 79062306a36Sopenharmony_ci submit_bio_noacct(bio); 79162306a36Sopenharmony_ci } 79262306a36Sopenharmony_ci } 79362306a36Sopenharmony_ci} 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_cistatic void do_failures(struct mirror_set *ms, struct bio_list *failures) 79662306a36Sopenharmony_ci{ 79762306a36Sopenharmony_ci struct bio *bio; 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci if (likely(!failures->head)) 80062306a36Sopenharmony_ci return; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci /* 80362306a36Sopenharmony_ci * If the log has failed, unattempted writes are being 80462306a36Sopenharmony_ci * put on the holds list. We can't issue those writes 80562306a36Sopenharmony_ci * until a log has been marked, so we must store them. 80662306a36Sopenharmony_ci * 80762306a36Sopenharmony_ci * If a 'noflush' suspend is in progress, we can requeue 80862306a36Sopenharmony_ci * the I/O's to the core. This give userspace a chance 80962306a36Sopenharmony_ci * to reconfigure the mirror, at which point the core 81062306a36Sopenharmony_ci * will reissue the writes. If the 'noflush' flag is 81162306a36Sopenharmony_ci * not set, we have no choice but to return errors. 81262306a36Sopenharmony_ci * 81362306a36Sopenharmony_ci * Some writes on the failures list may have been 81462306a36Sopenharmony_ci * submitted before the log failure and represent a 81562306a36Sopenharmony_ci * failure to write to one of the devices. It is ok 81662306a36Sopenharmony_ci * for us to treat them the same and requeue them 81762306a36Sopenharmony_ci * as well. 81862306a36Sopenharmony_ci */ 81962306a36Sopenharmony_ci while ((bio = bio_list_pop(failures))) { 82062306a36Sopenharmony_ci if (!ms->log_failure) { 82162306a36Sopenharmony_ci ms->in_sync = 0; 82262306a36Sopenharmony_ci dm_rh_mark_nosync(ms->rh, bio); 82362306a36Sopenharmony_ci } 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci /* 82662306a36Sopenharmony_ci * If all the legs are dead, fail the I/O. 82762306a36Sopenharmony_ci * If the device has failed and keep_log is enabled, 82862306a36Sopenharmony_ci * fail the I/O. 82962306a36Sopenharmony_ci * 83062306a36Sopenharmony_ci * If we have been told to handle errors, and keep_log 83162306a36Sopenharmony_ci * isn't enabled, hold the bio and wait for userspace to 83262306a36Sopenharmony_ci * deal with the problem. 83362306a36Sopenharmony_ci * 83462306a36Sopenharmony_ci * Otherwise pretend that the I/O succeeded. (This would 83562306a36Sopenharmony_ci * be wrong if the failed leg returned after reboot and 83662306a36Sopenharmony_ci * got replicated back to the good legs.) 83762306a36Sopenharmony_ci */ 83862306a36Sopenharmony_ci if (unlikely(!get_valid_mirror(ms) || (keep_log(ms) && ms->log_failure))) 83962306a36Sopenharmony_ci bio_io_error(bio); 84062306a36Sopenharmony_ci else if (errors_handled(ms) && !keep_log(ms)) 84162306a36Sopenharmony_ci hold_bio(ms, bio); 84262306a36Sopenharmony_ci else 84362306a36Sopenharmony_ci bio_endio(bio); 84462306a36Sopenharmony_ci } 84562306a36Sopenharmony_ci} 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_cistatic void trigger_event(struct work_struct *work) 84862306a36Sopenharmony_ci{ 84962306a36Sopenharmony_ci struct mirror_set *ms = 85062306a36Sopenharmony_ci container_of(work, struct mirror_set, trigger_event); 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci dm_table_event(ms->ti->table); 85362306a36Sopenharmony_ci} 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci/* 85662306a36Sopenharmony_ci *--------------------------------------------------------------- 85762306a36Sopenharmony_ci * kmirrord 85862306a36Sopenharmony_ci *--------------------------------------------------------------- 85962306a36Sopenharmony_ci */ 86062306a36Sopenharmony_cistatic void do_mirror(struct work_struct *work) 86162306a36Sopenharmony_ci{ 86262306a36Sopenharmony_ci struct mirror_set *ms = container_of(work, struct mirror_set, 86362306a36Sopenharmony_ci kmirrord_work); 86462306a36Sopenharmony_ci struct bio_list reads, writes, failures; 86562306a36Sopenharmony_ci unsigned long flags; 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci spin_lock_irqsave(&ms->lock, flags); 86862306a36Sopenharmony_ci reads = ms->reads; 86962306a36Sopenharmony_ci writes = ms->writes; 87062306a36Sopenharmony_ci failures = ms->failures; 87162306a36Sopenharmony_ci bio_list_init(&ms->reads); 87262306a36Sopenharmony_ci bio_list_init(&ms->writes); 87362306a36Sopenharmony_ci bio_list_init(&ms->failures); 87462306a36Sopenharmony_ci spin_unlock_irqrestore(&ms->lock, flags); 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci dm_rh_update_states(ms->rh, errors_handled(ms)); 87762306a36Sopenharmony_ci do_recovery(ms); 87862306a36Sopenharmony_ci do_reads(ms, &reads); 87962306a36Sopenharmony_ci do_writes(ms, &writes); 88062306a36Sopenharmony_ci do_failures(ms, &failures); 88162306a36Sopenharmony_ci} 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci/* 88462306a36Sopenharmony_ci *--------------------------------------------------------------- 88562306a36Sopenharmony_ci * Target functions 88662306a36Sopenharmony_ci *--------------------------------------------------------------- 88762306a36Sopenharmony_ci */ 88862306a36Sopenharmony_cistatic struct mirror_set *alloc_context(unsigned int nr_mirrors, 88962306a36Sopenharmony_ci uint32_t region_size, 89062306a36Sopenharmony_ci struct dm_target *ti, 89162306a36Sopenharmony_ci struct dm_dirty_log *dl) 89262306a36Sopenharmony_ci{ 89362306a36Sopenharmony_ci struct mirror_set *ms = 89462306a36Sopenharmony_ci kzalloc(struct_size(ms, mirror, nr_mirrors), GFP_KERNEL); 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci if (!ms) { 89762306a36Sopenharmony_ci ti->error = "Cannot allocate mirror context"; 89862306a36Sopenharmony_ci return NULL; 89962306a36Sopenharmony_ci } 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci spin_lock_init(&ms->lock); 90262306a36Sopenharmony_ci bio_list_init(&ms->reads); 90362306a36Sopenharmony_ci bio_list_init(&ms->writes); 90462306a36Sopenharmony_ci bio_list_init(&ms->failures); 90562306a36Sopenharmony_ci bio_list_init(&ms->holds); 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci ms->ti = ti; 90862306a36Sopenharmony_ci ms->nr_mirrors = nr_mirrors; 90962306a36Sopenharmony_ci ms->nr_regions = dm_sector_div_up(ti->len, region_size); 91062306a36Sopenharmony_ci ms->in_sync = 0; 91162306a36Sopenharmony_ci ms->log_failure = 0; 91262306a36Sopenharmony_ci ms->leg_failure = 0; 91362306a36Sopenharmony_ci atomic_set(&ms->suspend, 0); 91462306a36Sopenharmony_ci atomic_set(&ms->default_mirror, DEFAULT_MIRROR); 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci ms->io_client = dm_io_client_create(); 91762306a36Sopenharmony_ci if (IS_ERR(ms->io_client)) { 91862306a36Sopenharmony_ci ti->error = "Error creating dm_io client"; 91962306a36Sopenharmony_ci kfree(ms); 92062306a36Sopenharmony_ci return NULL; 92162306a36Sopenharmony_ci } 92262306a36Sopenharmony_ci 92362306a36Sopenharmony_ci ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord, 92462306a36Sopenharmony_ci wakeup_all_recovery_waiters, 92562306a36Sopenharmony_ci ms->ti->begin, MAX_RECOVERY, 92662306a36Sopenharmony_ci dl, region_size, ms->nr_regions); 92762306a36Sopenharmony_ci if (IS_ERR(ms->rh)) { 92862306a36Sopenharmony_ci ti->error = "Error creating dirty region hash"; 92962306a36Sopenharmony_ci dm_io_client_destroy(ms->io_client); 93062306a36Sopenharmony_ci kfree(ms); 93162306a36Sopenharmony_ci return NULL; 93262306a36Sopenharmony_ci } 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci return ms; 93562306a36Sopenharmony_ci} 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_cistatic void free_context(struct mirror_set *ms, struct dm_target *ti, 93862306a36Sopenharmony_ci unsigned int m) 93962306a36Sopenharmony_ci{ 94062306a36Sopenharmony_ci while (m--) 94162306a36Sopenharmony_ci dm_put_device(ti, ms->mirror[m].dev); 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci dm_io_client_destroy(ms->io_client); 94462306a36Sopenharmony_ci dm_region_hash_destroy(ms->rh); 94562306a36Sopenharmony_ci kfree(ms); 94662306a36Sopenharmony_ci} 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_cistatic int get_mirror(struct mirror_set *ms, struct dm_target *ti, 94962306a36Sopenharmony_ci unsigned int mirror, char **argv) 95062306a36Sopenharmony_ci{ 95162306a36Sopenharmony_ci unsigned long long offset; 95262306a36Sopenharmony_ci char dummy; 95362306a36Sopenharmony_ci int ret; 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1 || 95662306a36Sopenharmony_ci offset != (sector_t)offset) { 95762306a36Sopenharmony_ci ti->error = "Invalid offset"; 95862306a36Sopenharmony_ci return -EINVAL; 95962306a36Sopenharmony_ci } 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), 96262306a36Sopenharmony_ci &ms->mirror[mirror].dev); 96362306a36Sopenharmony_ci if (ret) { 96462306a36Sopenharmony_ci ti->error = "Device lookup failure"; 96562306a36Sopenharmony_ci return ret; 96662306a36Sopenharmony_ci } 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci ms->mirror[mirror].ms = ms; 96962306a36Sopenharmony_ci atomic_set(&(ms->mirror[mirror].error_count), 0); 97062306a36Sopenharmony_ci ms->mirror[mirror].error_type = 0; 97162306a36Sopenharmony_ci ms->mirror[mirror].offset = offset; 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci return 0; 97462306a36Sopenharmony_ci} 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci/* 97762306a36Sopenharmony_ci * Create dirty log: log_type #log_params <log_params> 97862306a36Sopenharmony_ci */ 97962306a36Sopenharmony_cistatic struct dm_dirty_log *create_dirty_log(struct dm_target *ti, 98062306a36Sopenharmony_ci unsigned int argc, char **argv, 98162306a36Sopenharmony_ci unsigned int *args_used) 98262306a36Sopenharmony_ci{ 98362306a36Sopenharmony_ci unsigned int param_count; 98462306a36Sopenharmony_ci struct dm_dirty_log *dl; 98562306a36Sopenharmony_ci char dummy; 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci if (argc < 2) { 98862306a36Sopenharmony_ci ti->error = "Insufficient mirror log arguments"; 98962306a36Sopenharmony_ci return NULL; 99062306a36Sopenharmony_ci } 99162306a36Sopenharmony_ci 99262306a36Sopenharmony_ci if (sscanf(argv[1], "%u%c", ¶m_count, &dummy) != 1) { 99362306a36Sopenharmony_ci ti->error = "Invalid mirror log argument count"; 99462306a36Sopenharmony_ci return NULL; 99562306a36Sopenharmony_ci } 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci *args_used = 2 + param_count; 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci if (argc < *args_used) { 100062306a36Sopenharmony_ci ti->error = "Insufficient mirror log arguments"; 100162306a36Sopenharmony_ci return NULL; 100262306a36Sopenharmony_ci } 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci dl = dm_dirty_log_create(argv[0], ti, mirror_flush, param_count, 100562306a36Sopenharmony_ci argv + 2); 100662306a36Sopenharmony_ci if (!dl) { 100762306a36Sopenharmony_ci ti->error = "Error creating mirror dirty log"; 100862306a36Sopenharmony_ci return NULL; 100962306a36Sopenharmony_ci } 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci return dl; 101262306a36Sopenharmony_ci} 101362306a36Sopenharmony_ci 101462306a36Sopenharmony_cistatic int parse_features(struct mirror_set *ms, unsigned int argc, char **argv, 101562306a36Sopenharmony_ci unsigned int *args_used) 101662306a36Sopenharmony_ci{ 101762306a36Sopenharmony_ci unsigned int num_features; 101862306a36Sopenharmony_ci struct dm_target *ti = ms->ti; 101962306a36Sopenharmony_ci char dummy; 102062306a36Sopenharmony_ci int i; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci *args_used = 0; 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci if (!argc) 102562306a36Sopenharmony_ci return 0; 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ci if (sscanf(argv[0], "%u%c", &num_features, &dummy) != 1) { 102862306a36Sopenharmony_ci ti->error = "Invalid number of features"; 102962306a36Sopenharmony_ci return -EINVAL; 103062306a36Sopenharmony_ci } 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci argc--; 103362306a36Sopenharmony_ci argv++; 103462306a36Sopenharmony_ci (*args_used)++; 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci if (num_features > argc) { 103762306a36Sopenharmony_ci ti->error = "Not enough arguments to support feature count"; 103862306a36Sopenharmony_ci return -EINVAL; 103962306a36Sopenharmony_ci } 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci for (i = 0; i < num_features; i++) { 104262306a36Sopenharmony_ci if (!strcmp("handle_errors", argv[0])) 104362306a36Sopenharmony_ci ms->features |= DM_RAID1_HANDLE_ERRORS; 104462306a36Sopenharmony_ci else if (!strcmp("keep_log", argv[0])) 104562306a36Sopenharmony_ci ms->features |= DM_RAID1_KEEP_LOG; 104662306a36Sopenharmony_ci else { 104762306a36Sopenharmony_ci ti->error = "Unrecognised feature requested"; 104862306a36Sopenharmony_ci return -EINVAL; 104962306a36Sopenharmony_ci } 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci argc--; 105262306a36Sopenharmony_ci argv++; 105362306a36Sopenharmony_ci (*args_used)++; 105462306a36Sopenharmony_ci } 105562306a36Sopenharmony_ci if (!errors_handled(ms) && keep_log(ms)) { 105662306a36Sopenharmony_ci ti->error = "keep_log feature requires the handle_errors feature"; 105762306a36Sopenharmony_ci return -EINVAL; 105862306a36Sopenharmony_ci } 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci return 0; 106162306a36Sopenharmony_ci} 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci/* 106462306a36Sopenharmony_ci * Construct a mirror mapping: 106562306a36Sopenharmony_ci * 106662306a36Sopenharmony_ci * log_type #log_params <log_params> 106762306a36Sopenharmony_ci * #mirrors [mirror_path offset]{2,} 106862306a36Sopenharmony_ci * [#features <features>] 106962306a36Sopenharmony_ci * 107062306a36Sopenharmony_ci * log_type is "core" or "disk" 107162306a36Sopenharmony_ci * #log_params is between 1 and 3 107262306a36Sopenharmony_ci * 107362306a36Sopenharmony_ci * If present, supported features are "handle_errors" and "keep_log". 107462306a36Sopenharmony_ci */ 107562306a36Sopenharmony_cistatic int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) 107662306a36Sopenharmony_ci{ 107762306a36Sopenharmony_ci int r; 107862306a36Sopenharmony_ci unsigned int nr_mirrors, m, args_used; 107962306a36Sopenharmony_ci struct mirror_set *ms; 108062306a36Sopenharmony_ci struct dm_dirty_log *dl; 108162306a36Sopenharmony_ci char dummy; 108262306a36Sopenharmony_ci 108362306a36Sopenharmony_ci dl = create_dirty_log(ti, argc, argv, &args_used); 108462306a36Sopenharmony_ci if (!dl) 108562306a36Sopenharmony_ci return -EINVAL; 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_ci argv += args_used; 108862306a36Sopenharmony_ci argc -= args_used; 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_ci if (!argc || sscanf(argv[0], "%u%c", &nr_mirrors, &dummy) != 1 || 109162306a36Sopenharmony_ci nr_mirrors < 2 || nr_mirrors > MAX_NR_MIRRORS) { 109262306a36Sopenharmony_ci ti->error = "Invalid number of mirrors"; 109362306a36Sopenharmony_ci dm_dirty_log_destroy(dl); 109462306a36Sopenharmony_ci return -EINVAL; 109562306a36Sopenharmony_ci } 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci argv++, argc--; 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci if (argc < nr_mirrors * 2) { 110062306a36Sopenharmony_ci ti->error = "Too few mirror arguments"; 110162306a36Sopenharmony_ci dm_dirty_log_destroy(dl); 110262306a36Sopenharmony_ci return -EINVAL; 110362306a36Sopenharmony_ci } 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl); 110662306a36Sopenharmony_ci if (!ms) { 110762306a36Sopenharmony_ci dm_dirty_log_destroy(dl); 110862306a36Sopenharmony_ci return -ENOMEM; 110962306a36Sopenharmony_ci } 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci /* Get the mirror parameter sets */ 111262306a36Sopenharmony_ci for (m = 0; m < nr_mirrors; m++) { 111362306a36Sopenharmony_ci r = get_mirror(ms, ti, m, argv); 111462306a36Sopenharmony_ci if (r) { 111562306a36Sopenharmony_ci free_context(ms, ti, m); 111662306a36Sopenharmony_ci return r; 111762306a36Sopenharmony_ci } 111862306a36Sopenharmony_ci argv += 2; 111962306a36Sopenharmony_ci argc -= 2; 112062306a36Sopenharmony_ci } 112162306a36Sopenharmony_ci 112262306a36Sopenharmony_ci ti->private = ms; 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_ci r = dm_set_target_max_io_len(ti, dm_rh_get_region_size(ms->rh)); 112562306a36Sopenharmony_ci if (r) 112662306a36Sopenharmony_ci goto err_free_context; 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_ci ti->num_flush_bios = 1; 112962306a36Sopenharmony_ci ti->num_discard_bios = 1; 113062306a36Sopenharmony_ci ti->per_io_data_size = sizeof(struct dm_raid1_bio_record); 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_MEM_RECLAIM, 0); 113362306a36Sopenharmony_ci if (!ms->kmirrord_wq) { 113462306a36Sopenharmony_ci DMERR("couldn't start kmirrord"); 113562306a36Sopenharmony_ci r = -ENOMEM; 113662306a36Sopenharmony_ci goto err_free_context; 113762306a36Sopenharmony_ci } 113862306a36Sopenharmony_ci INIT_WORK(&ms->kmirrord_work, do_mirror); 113962306a36Sopenharmony_ci timer_setup(&ms->timer, delayed_wake_fn, 0); 114062306a36Sopenharmony_ci ms->timer_pending = 0; 114162306a36Sopenharmony_ci INIT_WORK(&ms->trigger_event, trigger_event); 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci r = parse_features(ms, argc, argv, &args_used); 114462306a36Sopenharmony_ci if (r) 114562306a36Sopenharmony_ci goto err_destroy_wq; 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci argv += args_used; 114862306a36Sopenharmony_ci argc -= args_used; 114962306a36Sopenharmony_ci 115062306a36Sopenharmony_ci /* 115162306a36Sopenharmony_ci * Any read-balancing addition depends on the 115262306a36Sopenharmony_ci * DM_RAID1_HANDLE_ERRORS flag being present. 115362306a36Sopenharmony_ci * This is because the decision to balance depends 115462306a36Sopenharmony_ci * on the sync state of a region. If the above 115562306a36Sopenharmony_ci * flag is not present, we ignore errors; and 115662306a36Sopenharmony_ci * the sync state may be inaccurate. 115762306a36Sopenharmony_ci */ 115862306a36Sopenharmony_ci 115962306a36Sopenharmony_ci if (argc) { 116062306a36Sopenharmony_ci ti->error = "Too many mirror arguments"; 116162306a36Sopenharmony_ci r = -EINVAL; 116262306a36Sopenharmony_ci goto err_destroy_wq; 116362306a36Sopenharmony_ci } 116462306a36Sopenharmony_ci 116562306a36Sopenharmony_ci ms->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); 116662306a36Sopenharmony_ci if (IS_ERR(ms->kcopyd_client)) { 116762306a36Sopenharmony_ci r = PTR_ERR(ms->kcopyd_client); 116862306a36Sopenharmony_ci goto err_destroy_wq; 116962306a36Sopenharmony_ci } 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ci wakeup_mirrord(ms); 117262306a36Sopenharmony_ci return 0; 117362306a36Sopenharmony_ci 117462306a36Sopenharmony_cierr_destroy_wq: 117562306a36Sopenharmony_ci destroy_workqueue(ms->kmirrord_wq); 117662306a36Sopenharmony_cierr_free_context: 117762306a36Sopenharmony_ci free_context(ms, ti, ms->nr_mirrors); 117862306a36Sopenharmony_ci return r; 117962306a36Sopenharmony_ci} 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_cistatic void mirror_dtr(struct dm_target *ti) 118262306a36Sopenharmony_ci{ 118362306a36Sopenharmony_ci struct mirror_set *ms = ti->private; 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_ci del_timer_sync(&ms->timer); 118662306a36Sopenharmony_ci flush_workqueue(ms->kmirrord_wq); 118762306a36Sopenharmony_ci flush_work(&ms->trigger_event); 118862306a36Sopenharmony_ci dm_kcopyd_client_destroy(ms->kcopyd_client); 118962306a36Sopenharmony_ci destroy_workqueue(ms->kmirrord_wq); 119062306a36Sopenharmony_ci free_context(ms, ti, ms->nr_mirrors); 119162306a36Sopenharmony_ci} 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci/* 119462306a36Sopenharmony_ci * Mirror mapping function 119562306a36Sopenharmony_ci */ 119662306a36Sopenharmony_cistatic int mirror_map(struct dm_target *ti, struct bio *bio) 119762306a36Sopenharmony_ci{ 119862306a36Sopenharmony_ci int r, rw = bio_data_dir(bio); 119962306a36Sopenharmony_ci struct mirror *m; 120062306a36Sopenharmony_ci struct mirror_set *ms = ti->private; 120162306a36Sopenharmony_ci struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); 120262306a36Sopenharmony_ci struct dm_raid1_bio_record *bio_record = 120362306a36Sopenharmony_ci dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); 120462306a36Sopenharmony_ci 120562306a36Sopenharmony_ci bio_record->details.bi_bdev = NULL; 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci if (rw == WRITE) { 120862306a36Sopenharmony_ci /* Save region for mirror_end_io() handler */ 120962306a36Sopenharmony_ci bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio); 121062306a36Sopenharmony_ci queue_bio(ms, bio, rw); 121162306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 121262306a36Sopenharmony_ci } 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0); 121562306a36Sopenharmony_ci if (r < 0 && r != -EWOULDBLOCK) 121662306a36Sopenharmony_ci return DM_MAPIO_KILL; 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci /* 121962306a36Sopenharmony_ci * If region is not in-sync queue the bio. 122062306a36Sopenharmony_ci */ 122162306a36Sopenharmony_ci if (!r || (r == -EWOULDBLOCK)) { 122262306a36Sopenharmony_ci if (bio->bi_opf & REQ_RAHEAD) 122362306a36Sopenharmony_ci return DM_MAPIO_KILL; 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci queue_bio(ms, bio, rw); 122662306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 122762306a36Sopenharmony_ci } 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci /* 123062306a36Sopenharmony_ci * The region is in-sync and we can perform reads directly. 123162306a36Sopenharmony_ci * Store enough information so we can retry if it fails. 123262306a36Sopenharmony_ci */ 123362306a36Sopenharmony_ci m = choose_mirror(ms, bio->bi_iter.bi_sector); 123462306a36Sopenharmony_ci if (unlikely(!m)) 123562306a36Sopenharmony_ci return DM_MAPIO_KILL; 123662306a36Sopenharmony_ci 123762306a36Sopenharmony_ci dm_bio_record(&bio_record->details, bio); 123862306a36Sopenharmony_ci bio_record->m = m; 123962306a36Sopenharmony_ci 124062306a36Sopenharmony_ci map_bio(m, bio); 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci return DM_MAPIO_REMAPPED; 124362306a36Sopenharmony_ci} 124462306a36Sopenharmony_ci 124562306a36Sopenharmony_cistatic int mirror_end_io(struct dm_target *ti, struct bio *bio, 124662306a36Sopenharmony_ci blk_status_t *error) 124762306a36Sopenharmony_ci{ 124862306a36Sopenharmony_ci int rw = bio_data_dir(bio); 124962306a36Sopenharmony_ci struct mirror_set *ms = ti->private; 125062306a36Sopenharmony_ci struct mirror *m = NULL; 125162306a36Sopenharmony_ci struct dm_bio_details *bd = NULL; 125262306a36Sopenharmony_ci struct dm_raid1_bio_record *bio_record = 125362306a36Sopenharmony_ci dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); 125462306a36Sopenharmony_ci 125562306a36Sopenharmony_ci /* 125662306a36Sopenharmony_ci * We need to dec pending if this was a write. 125762306a36Sopenharmony_ci */ 125862306a36Sopenharmony_ci if (rw == WRITE) { 125962306a36Sopenharmony_ci if (!(bio->bi_opf & REQ_PREFLUSH) && 126062306a36Sopenharmony_ci bio_op(bio) != REQ_OP_DISCARD) 126162306a36Sopenharmony_ci dm_rh_dec(ms->rh, bio_record->write_region); 126262306a36Sopenharmony_ci return DM_ENDIO_DONE; 126362306a36Sopenharmony_ci } 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci if (*error == BLK_STS_NOTSUPP) 126662306a36Sopenharmony_ci goto out; 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_ci if (bio->bi_opf & REQ_RAHEAD) 126962306a36Sopenharmony_ci goto out; 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ci if (unlikely(*error)) { 127262306a36Sopenharmony_ci if (!bio_record->details.bi_bdev) { 127362306a36Sopenharmony_ci /* 127462306a36Sopenharmony_ci * There wasn't enough memory to record necessary 127562306a36Sopenharmony_ci * information for a retry or there was no other 127662306a36Sopenharmony_ci * mirror in-sync. 127762306a36Sopenharmony_ci */ 127862306a36Sopenharmony_ci DMERR_LIMIT("Mirror read failed."); 127962306a36Sopenharmony_ci return DM_ENDIO_DONE; 128062306a36Sopenharmony_ci } 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci m = bio_record->m; 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci DMERR("Mirror read failed from %s. Trying alternative device.", 128562306a36Sopenharmony_ci m->dev->name); 128662306a36Sopenharmony_ci 128762306a36Sopenharmony_ci fail_mirror(m, DM_RAID1_READ_ERROR); 128862306a36Sopenharmony_ci 128962306a36Sopenharmony_ci /* 129062306a36Sopenharmony_ci * A failed read is requeued for another attempt using an intact 129162306a36Sopenharmony_ci * mirror. 129262306a36Sopenharmony_ci */ 129362306a36Sopenharmony_ci if (default_ok(m) || mirror_available(ms, bio)) { 129462306a36Sopenharmony_ci bd = &bio_record->details; 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci dm_bio_restore(bd, bio); 129762306a36Sopenharmony_ci bio_record->details.bi_bdev = NULL; 129862306a36Sopenharmony_ci bio->bi_status = 0; 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_ci queue_bio(ms, bio, rw); 130162306a36Sopenharmony_ci return DM_ENDIO_INCOMPLETE; 130262306a36Sopenharmony_ci } 130362306a36Sopenharmony_ci DMERR("All replicated volumes dead, failing I/O"); 130462306a36Sopenharmony_ci } 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_ciout: 130762306a36Sopenharmony_ci bio_record->details.bi_bdev = NULL; 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_ci return DM_ENDIO_DONE; 131062306a36Sopenharmony_ci} 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_cistatic void mirror_presuspend(struct dm_target *ti) 131362306a36Sopenharmony_ci{ 131462306a36Sopenharmony_ci struct mirror_set *ms = ti->private; 131562306a36Sopenharmony_ci struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci struct bio_list holds; 131862306a36Sopenharmony_ci struct bio *bio; 131962306a36Sopenharmony_ci 132062306a36Sopenharmony_ci atomic_set(&ms->suspend, 1); 132162306a36Sopenharmony_ci 132262306a36Sopenharmony_ci /* 132362306a36Sopenharmony_ci * Process bios in the hold list to start recovery waiting 132462306a36Sopenharmony_ci * for bios in the hold list. After the process, no bio has 132562306a36Sopenharmony_ci * a chance to be added in the hold list because ms->suspend 132662306a36Sopenharmony_ci * is set. 132762306a36Sopenharmony_ci */ 132862306a36Sopenharmony_ci spin_lock_irq(&ms->lock); 132962306a36Sopenharmony_ci holds = ms->holds; 133062306a36Sopenharmony_ci bio_list_init(&ms->holds); 133162306a36Sopenharmony_ci spin_unlock_irq(&ms->lock); 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci while ((bio = bio_list_pop(&holds))) 133462306a36Sopenharmony_ci hold_bio(ms, bio); 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_ci /* 133762306a36Sopenharmony_ci * We must finish up all the work that we've 133862306a36Sopenharmony_ci * generated (i.e. recovery work). 133962306a36Sopenharmony_ci */ 134062306a36Sopenharmony_ci dm_rh_stop_recovery(ms->rh); 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci wait_event(_kmirrord_recovery_stopped, 134362306a36Sopenharmony_ci !dm_rh_recovery_in_flight(ms->rh)); 134462306a36Sopenharmony_ci 134562306a36Sopenharmony_ci if (log->type->presuspend && log->type->presuspend(log)) 134662306a36Sopenharmony_ci /* FIXME: need better error handling */ 134762306a36Sopenharmony_ci DMWARN("log presuspend failed"); 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci /* 135062306a36Sopenharmony_ci * Now that recovery is complete/stopped and the 135162306a36Sopenharmony_ci * delayed bios are queued, we need to wait for 135262306a36Sopenharmony_ci * the worker thread to complete. This way, 135362306a36Sopenharmony_ci * we know that all of our I/O has been pushed. 135462306a36Sopenharmony_ci */ 135562306a36Sopenharmony_ci flush_workqueue(ms->kmirrord_wq); 135662306a36Sopenharmony_ci} 135762306a36Sopenharmony_ci 135862306a36Sopenharmony_cistatic void mirror_postsuspend(struct dm_target *ti) 135962306a36Sopenharmony_ci{ 136062306a36Sopenharmony_ci struct mirror_set *ms = ti->private; 136162306a36Sopenharmony_ci struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_ci if (log->type->postsuspend && log->type->postsuspend(log)) 136462306a36Sopenharmony_ci /* FIXME: need better error handling */ 136562306a36Sopenharmony_ci DMWARN("log postsuspend failed"); 136662306a36Sopenharmony_ci} 136762306a36Sopenharmony_ci 136862306a36Sopenharmony_cistatic void mirror_resume(struct dm_target *ti) 136962306a36Sopenharmony_ci{ 137062306a36Sopenharmony_ci struct mirror_set *ms = ti->private; 137162306a36Sopenharmony_ci struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci atomic_set(&ms->suspend, 0); 137462306a36Sopenharmony_ci if (log->type->resume && log->type->resume(log)) 137562306a36Sopenharmony_ci /* FIXME: need better error handling */ 137662306a36Sopenharmony_ci DMWARN("log resume failed"); 137762306a36Sopenharmony_ci dm_rh_start_recovery(ms->rh); 137862306a36Sopenharmony_ci} 137962306a36Sopenharmony_ci 138062306a36Sopenharmony_ci/* 138162306a36Sopenharmony_ci * device_status_char 138262306a36Sopenharmony_ci * @m: mirror device/leg we want the status of 138362306a36Sopenharmony_ci * 138462306a36Sopenharmony_ci * We return one character representing the most severe error 138562306a36Sopenharmony_ci * we have encountered. 138662306a36Sopenharmony_ci * A => Alive - No failures 138762306a36Sopenharmony_ci * D => Dead - A write failure occurred leaving mirror out-of-sync 138862306a36Sopenharmony_ci * S => Sync - A sychronization failure occurred, mirror out-of-sync 138962306a36Sopenharmony_ci * R => Read - A read failure occurred, mirror data unaffected 139062306a36Sopenharmony_ci * 139162306a36Sopenharmony_ci * Returns: <char> 139262306a36Sopenharmony_ci */ 139362306a36Sopenharmony_cistatic char device_status_char(struct mirror *m) 139462306a36Sopenharmony_ci{ 139562306a36Sopenharmony_ci if (!atomic_read(&(m->error_count))) 139662306a36Sopenharmony_ci return 'A'; 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci return (test_bit(DM_RAID1_FLUSH_ERROR, &(m->error_type))) ? 'F' : 139962306a36Sopenharmony_ci (test_bit(DM_RAID1_WRITE_ERROR, &(m->error_type))) ? 'D' : 140062306a36Sopenharmony_ci (test_bit(DM_RAID1_SYNC_ERROR, &(m->error_type))) ? 'S' : 140162306a36Sopenharmony_ci (test_bit(DM_RAID1_READ_ERROR, &(m->error_type))) ? 'R' : 'U'; 140262306a36Sopenharmony_ci} 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_ci 140562306a36Sopenharmony_cistatic void mirror_status(struct dm_target *ti, status_type_t type, 140662306a36Sopenharmony_ci unsigned int status_flags, char *result, unsigned int maxlen) 140762306a36Sopenharmony_ci{ 140862306a36Sopenharmony_ci unsigned int m, sz = 0; 140962306a36Sopenharmony_ci int num_feature_args = 0; 141062306a36Sopenharmony_ci struct mirror_set *ms = ti->private; 141162306a36Sopenharmony_ci struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); 141262306a36Sopenharmony_ci char buffer[MAX_NR_MIRRORS + 1]; 141362306a36Sopenharmony_ci 141462306a36Sopenharmony_ci switch (type) { 141562306a36Sopenharmony_ci case STATUSTYPE_INFO: 141662306a36Sopenharmony_ci DMEMIT("%d ", ms->nr_mirrors); 141762306a36Sopenharmony_ci for (m = 0; m < ms->nr_mirrors; m++) { 141862306a36Sopenharmony_ci DMEMIT("%s ", ms->mirror[m].dev->name); 141962306a36Sopenharmony_ci buffer[m] = device_status_char(&(ms->mirror[m])); 142062306a36Sopenharmony_ci } 142162306a36Sopenharmony_ci buffer[m] = '\0'; 142262306a36Sopenharmony_ci 142362306a36Sopenharmony_ci DMEMIT("%llu/%llu 1 %s ", 142462306a36Sopenharmony_ci (unsigned long long)log->type->get_sync_count(log), 142562306a36Sopenharmony_ci (unsigned long long)ms->nr_regions, buffer); 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_ci sz += log->type->status(log, type, result+sz, maxlen-sz); 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_ci break; 143062306a36Sopenharmony_ci 143162306a36Sopenharmony_ci case STATUSTYPE_TABLE: 143262306a36Sopenharmony_ci sz = log->type->status(log, type, result, maxlen); 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ci DMEMIT("%d", ms->nr_mirrors); 143562306a36Sopenharmony_ci for (m = 0; m < ms->nr_mirrors; m++) 143662306a36Sopenharmony_ci DMEMIT(" %s %llu", ms->mirror[m].dev->name, 143762306a36Sopenharmony_ci (unsigned long long)ms->mirror[m].offset); 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci num_feature_args += !!errors_handled(ms); 144062306a36Sopenharmony_ci num_feature_args += !!keep_log(ms); 144162306a36Sopenharmony_ci if (num_feature_args) { 144262306a36Sopenharmony_ci DMEMIT(" %d", num_feature_args); 144362306a36Sopenharmony_ci if (errors_handled(ms)) 144462306a36Sopenharmony_ci DMEMIT(" handle_errors"); 144562306a36Sopenharmony_ci if (keep_log(ms)) 144662306a36Sopenharmony_ci DMEMIT(" keep_log"); 144762306a36Sopenharmony_ci } 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci break; 145062306a36Sopenharmony_ci 145162306a36Sopenharmony_ci case STATUSTYPE_IMA: 145262306a36Sopenharmony_ci DMEMIT_TARGET_NAME_VERSION(ti->type); 145362306a36Sopenharmony_ci DMEMIT(",nr_mirrors=%d", ms->nr_mirrors); 145462306a36Sopenharmony_ci for (m = 0; m < ms->nr_mirrors; m++) { 145562306a36Sopenharmony_ci DMEMIT(",mirror_device_%d=%s", m, ms->mirror[m].dev->name); 145662306a36Sopenharmony_ci DMEMIT(",mirror_device_%d_status=%c", 145762306a36Sopenharmony_ci m, device_status_char(&(ms->mirror[m]))); 145862306a36Sopenharmony_ci } 145962306a36Sopenharmony_ci 146062306a36Sopenharmony_ci DMEMIT(",handle_errors=%c", errors_handled(ms) ? 'y' : 'n'); 146162306a36Sopenharmony_ci DMEMIT(",keep_log=%c", keep_log(ms) ? 'y' : 'n'); 146262306a36Sopenharmony_ci 146362306a36Sopenharmony_ci DMEMIT(",log_type_status="); 146462306a36Sopenharmony_ci sz += log->type->status(log, type, result+sz, maxlen-sz); 146562306a36Sopenharmony_ci DMEMIT(";"); 146662306a36Sopenharmony_ci break; 146762306a36Sopenharmony_ci } 146862306a36Sopenharmony_ci} 146962306a36Sopenharmony_ci 147062306a36Sopenharmony_cistatic int mirror_iterate_devices(struct dm_target *ti, 147162306a36Sopenharmony_ci iterate_devices_callout_fn fn, void *data) 147262306a36Sopenharmony_ci{ 147362306a36Sopenharmony_ci struct mirror_set *ms = ti->private; 147462306a36Sopenharmony_ci int ret = 0; 147562306a36Sopenharmony_ci unsigned int i; 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_ci for (i = 0; !ret && i < ms->nr_mirrors; i++) 147862306a36Sopenharmony_ci ret = fn(ti, ms->mirror[i].dev, 147962306a36Sopenharmony_ci ms->mirror[i].offset, ti->len, data); 148062306a36Sopenharmony_ci 148162306a36Sopenharmony_ci return ret; 148262306a36Sopenharmony_ci} 148362306a36Sopenharmony_ci 148462306a36Sopenharmony_cistatic struct target_type mirror_target = { 148562306a36Sopenharmony_ci .name = "mirror", 148662306a36Sopenharmony_ci .version = {1, 14, 0}, 148762306a36Sopenharmony_ci .module = THIS_MODULE, 148862306a36Sopenharmony_ci .ctr = mirror_ctr, 148962306a36Sopenharmony_ci .dtr = mirror_dtr, 149062306a36Sopenharmony_ci .map = mirror_map, 149162306a36Sopenharmony_ci .end_io = mirror_end_io, 149262306a36Sopenharmony_ci .presuspend = mirror_presuspend, 149362306a36Sopenharmony_ci .postsuspend = mirror_postsuspend, 149462306a36Sopenharmony_ci .resume = mirror_resume, 149562306a36Sopenharmony_ci .status = mirror_status, 149662306a36Sopenharmony_ci .iterate_devices = mirror_iterate_devices, 149762306a36Sopenharmony_ci}; 149862306a36Sopenharmony_ci 149962306a36Sopenharmony_cistatic int __init dm_mirror_init(void) 150062306a36Sopenharmony_ci{ 150162306a36Sopenharmony_ci int r; 150262306a36Sopenharmony_ci 150362306a36Sopenharmony_ci dm_raid1_wq = alloc_workqueue("dm_raid1_wq", 0, 0); 150462306a36Sopenharmony_ci if (!dm_raid1_wq) { 150562306a36Sopenharmony_ci DMERR("Failed to alloc workqueue"); 150662306a36Sopenharmony_ci return -ENOMEM; 150762306a36Sopenharmony_ci } 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci r = dm_register_target(&mirror_target); 151062306a36Sopenharmony_ci if (r < 0) { 151162306a36Sopenharmony_ci destroy_workqueue(dm_raid1_wq); 151262306a36Sopenharmony_ci return r; 151362306a36Sopenharmony_ci } 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_ci return 0; 151662306a36Sopenharmony_ci} 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_cistatic void __exit dm_mirror_exit(void) 151962306a36Sopenharmony_ci{ 152062306a36Sopenharmony_ci destroy_workqueue(dm_raid1_wq); 152162306a36Sopenharmony_ci dm_unregister_target(&mirror_target); 152262306a36Sopenharmony_ci} 152362306a36Sopenharmony_ci 152462306a36Sopenharmony_ci/* Module hooks */ 152562306a36Sopenharmony_cimodule_init(dm_mirror_init); 152662306a36Sopenharmony_cimodule_exit(dm_mirror_exit); 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " mirror target"); 152962306a36Sopenharmony_ciMODULE_AUTHOR("Joe Thornber"); 153062306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 1531