18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (C) 2003 Sistina Software Limited. 38c2ecf20Sopenharmony_ci * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * This file is released under the GPL. 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <linux/device-mapper.h> 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include "dm-rq.h" 118c2ecf20Sopenharmony_ci#include "dm-bio-record.h" 128c2ecf20Sopenharmony_ci#include "dm-path-selector.h" 138c2ecf20Sopenharmony_ci#include "dm-uevent.h" 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 168c2ecf20Sopenharmony_ci#include <linux/ctype.h> 178c2ecf20Sopenharmony_ci#include <linux/init.h> 188c2ecf20Sopenharmony_ci#include <linux/mempool.h> 198c2ecf20Sopenharmony_ci#include <linux/module.h> 208c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 218c2ecf20Sopenharmony_ci#include <linux/slab.h> 228c2ecf20Sopenharmony_ci#include <linux/time.h> 238c2ecf20Sopenharmony_ci#include <linux/timer.h> 248c2ecf20Sopenharmony_ci#include <linux/workqueue.h> 258c2ecf20Sopenharmony_ci#include <linux/delay.h> 268c2ecf20Sopenharmony_ci#include <scsi/scsi_dh.h> 278c2ecf20Sopenharmony_ci#include <linux/atomic.h> 288c2ecf20Sopenharmony_ci#include <linux/blk-mq.h> 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci#define DM_MSG_PREFIX "multipath" 318c2ecf20Sopenharmony_ci#define DM_PG_INIT_DELAY_MSECS 2000 328c2ecf20Sopenharmony_ci#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) 338c2ecf20Sopenharmony_ci#define QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT 0 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_cistatic unsigned long queue_if_no_path_timeout_secs = QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT; 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci/* Path properties */ 388c2ecf20Sopenharmony_cistruct pgpath { 398c2ecf20Sopenharmony_ci struct list_head list; 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci struct priority_group *pg; /* Owning PG */ 428c2ecf20Sopenharmony_ci unsigned fail_count; /* Cumulative failure count */ 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci struct dm_path path; 458c2ecf20Sopenharmony_ci struct delayed_work activate_path; 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci bool is_active:1; /* Path status */ 488c2ecf20Sopenharmony_ci}; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci/* 538c2ecf20Sopenharmony_ci * Paths are grouped into Priority Groups and numbered from 1 upwards. 548c2ecf20Sopenharmony_ci * Each has a path selector which controls which path gets used. 558c2ecf20Sopenharmony_ci */ 568c2ecf20Sopenharmony_cistruct priority_group { 578c2ecf20Sopenharmony_ci struct list_head list; 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci struct multipath *m; /* Owning multipath instance */ 608c2ecf20Sopenharmony_ci struct path_selector ps; 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci unsigned pg_num; /* Reference number */ 638c2ecf20Sopenharmony_ci unsigned nr_pgpaths; /* Number of paths in PG */ 648c2ecf20Sopenharmony_ci struct list_head pgpaths; 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci bool bypassed:1; /* Temporarily bypass this PG? */ 678c2ecf20Sopenharmony_ci}; 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci/* Multipath context */ 708c2ecf20Sopenharmony_cistruct multipath { 718c2ecf20Sopenharmony_ci unsigned long flags; /* Multipath state flags */ 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci spinlock_t lock; 748c2ecf20Sopenharmony_ci enum dm_queue_mode queue_mode; 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci struct pgpath *current_pgpath; 778c2ecf20Sopenharmony_ci struct priority_group *current_pg; 788c2ecf20Sopenharmony_ci struct priority_group *next_pg; /* Switch to this PG if set */ 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci atomic_t nr_valid_paths; /* Total number of usable paths */ 818c2ecf20Sopenharmony_ci unsigned nr_priority_groups; 828c2ecf20Sopenharmony_ci struct list_head priority_groups; 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci const char *hw_handler_name; 858c2ecf20Sopenharmony_ci char *hw_handler_params; 868c2ecf20Sopenharmony_ci wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ 878c2ecf20Sopenharmony_ci unsigned pg_init_retries; /* Number of times to retry pg_init */ 888c2ecf20Sopenharmony_ci unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ 898c2ecf20Sopenharmony_ci atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */ 908c2ecf20Sopenharmony_ci atomic_t pg_init_count; /* Number of times pg_init called */ 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci struct mutex work_mutex; 938c2ecf20Sopenharmony_ci struct work_struct trigger_event; 948c2ecf20Sopenharmony_ci struct dm_target *ti; 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci struct work_struct process_queued_bios; 978c2ecf20Sopenharmony_ci struct bio_list queued_bios; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci struct timer_list nopath_timer; /* Timeout for queue_if_no_path */ 1008c2ecf20Sopenharmony_ci}; 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci/* 1038c2ecf20Sopenharmony_ci * Context information attached to each io we process. 1048c2ecf20Sopenharmony_ci */ 1058c2ecf20Sopenharmony_cistruct dm_mpath_io { 1068c2ecf20Sopenharmony_ci struct pgpath *pgpath; 1078c2ecf20Sopenharmony_ci size_t nr_bytes; 1088c2ecf20Sopenharmony_ci}; 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_citypedef int (*action_fn) (struct pgpath *pgpath); 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_cistatic struct workqueue_struct *kmultipathd, *kmpath_handlerd; 1138c2ecf20Sopenharmony_cistatic void trigger_event(struct work_struct *work); 1148c2ecf20Sopenharmony_cistatic void activate_or_offline_path(struct pgpath *pgpath); 1158c2ecf20Sopenharmony_cistatic void activate_path_work(struct work_struct *work); 1168c2ecf20Sopenharmony_cistatic void process_queued_bios(struct work_struct *work); 1178c2ecf20Sopenharmony_cistatic void queue_if_no_path_timeout_work(struct timer_list *t); 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci/*----------------------------------------------- 1208c2ecf20Sopenharmony_ci * Multipath state flags. 1218c2ecf20Sopenharmony_ci *-----------------------------------------------*/ 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci#define MPATHF_QUEUE_IO 0 /* Must we queue all I/O? */ 1248c2ecf20Sopenharmony_ci#define MPATHF_QUEUE_IF_NO_PATH 1 /* Queue I/O if last path fails? */ 1258c2ecf20Sopenharmony_ci#define MPATHF_SAVED_QUEUE_IF_NO_PATH 2 /* Saved state during suspension */ 1268c2ecf20Sopenharmony_ci#define MPATHF_RETAIN_ATTACHED_HW_HANDLER 3 /* If there's already a hw_handler present, don't change it. */ 1278c2ecf20Sopenharmony_ci#define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */ 1288c2ecf20Sopenharmony_ci#define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */ 1298c2ecf20Sopenharmony_ci#define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */ 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_cistatic bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m) 1328c2ecf20Sopenharmony_ci{ 1338c2ecf20Sopenharmony_ci bool r = test_bit(MPATHF_bit, &m->flags); 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci if (r) { 1368c2ecf20Sopenharmony_ci unsigned long flags; 1378c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 1388c2ecf20Sopenharmony_ci r = test_bit(MPATHF_bit, &m->flags); 1398c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 1408c2ecf20Sopenharmony_ci } 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci return r; 1438c2ecf20Sopenharmony_ci} 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci/*----------------------------------------------- 1468c2ecf20Sopenharmony_ci * Allocation routines 1478c2ecf20Sopenharmony_ci *-----------------------------------------------*/ 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_cistatic struct pgpath *alloc_pgpath(void) 1508c2ecf20Sopenharmony_ci{ 1518c2ecf20Sopenharmony_ci struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci if (!pgpath) 1548c2ecf20Sopenharmony_ci return NULL; 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci pgpath->is_active = true; 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci return pgpath; 1598c2ecf20Sopenharmony_ci} 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_cistatic void free_pgpath(struct pgpath *pgpath) 1628c2ecf20Sopenharmony_ci{ 1638c2ecf20Sopenharmony_ci kfree(pgpath); 1648c2ecf20Sopenharmony_ci} 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_cistatic struct priority_group *alloc_priority_group(void) 1678c2ecf20Sopenharmony_ci{ 1688c2ecf20Sopenharmony_ci struct priority_group *pg; 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci pg = kzalloc(sizeof(*pg), GFP_KERNEL); 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci if (pg) 1738c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&pg->pgpaths); 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci return pg; 1768c2ecf20Sopenharmony_ci} 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_cistatic void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) 1798c2ecf20Sopenharmony_ci{ 1808c2ecf20Sopenharmony_ci struct pgpath *pgpath, *tmp; 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { 1838c2ecf20Sopenharmony_ci list_del(&pgpath->list); 1848c2ecf20Sopenharmony_ci dm_put_device(ti, pgpath->path.dev); 1858c2ecf20Sopenharmony_ci free_pgpath(pgpath); 1868c2ecf20Sopenharmony_ci } 1878c2ecf20Sopenharmony_ci} 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_cistatic void free_priority_group(struct priority_group *pg, 1908c2ecf20Sopenharmony_ci struct dm_target *ti) 1918c2ecf20Sopenharmony_ci{ 1928c2ecf20Sopenharmony_ci struct path_selector *ps = &pg->ps; 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci if (ps->type) { 1958c2ecf20Sopenharmony_ci ps->type->destroy(ps); 1968c2ecf20Sopenharmony_ci dm_put_path_selector(ps->type); 1978c2ecf20Sopenharmony_ci } 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci free_pgpaths(&pg->pgpaths, ti); 2008c2ecf20Sopenharmony_ci kfree(pg); 2018c2ecf20Sopenharmony_ci} 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_cistatic struct multipath *alloc_multipath(struct dm_target *ti) 2048c2ecf20Sopenharmony_ci{ 2058c2ecf20Sopenharmony_ci struct multipath *m; 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci m = kzalloc(sizeof(*m), GFP_KERNEL); 2088c2ecf20Sopenharmony_ci if (m) { 2098c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&m->priority_groups); 2108c2ecf20Sopenharmony_ci spin_lock_init(&m->lock); 2118c2ecf20Sopenharmony_ci atomic_set(&m->nr_valid_paths, 0); 2128c2ecf20Sopenharmony_ci INIT_WORK(&m->trigger_event, trigger_event); 2138c2ecf20Sopenharmony_ci mutex_init(&m->work_mutex); 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci m->queue_mode = DM_TYPE_NONE; 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci m->ti = ti; 2188c2ecf20Sopenharmony_ci ti->private = m; 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci timer_setup(&m->nopath_timer, queue_if_no_path_timeout_work, 0); 2218c2ecf20Sopenharmony_ci } 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci return m; 2248c2ecf20Sopenharmony_ci} 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_cistatic int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) 2278c2ecf20Sopenharmony_ci{ 2288c2ecf20Sopenharmony_ci if (m->queue_mode == DM_TYPE_NONE) { 2298c2ecf20Sopenharmony_ci m->queue_mode = DM_TYPE_REQUEST_BASED; 2308c2ecf20Sopenharmony_ci } else if (m->queue_mode == DM_TYPE_BIO_BASED) { 2318c2ecf20Sopenharmony_ci INIT_WORK(&m->process_queued_bios, process_queued_bios); 2328c2ecf20Sopenharmony_ci /* 2338c2ecf20Sopenharmony_ci * bio-based doesn't support any direct scsi_dh management; 2348c2ecf20Sopenharmony_ci * it just discovers if a scsi_dh is attached. 2358c2ecf20Sopenharmony_ci */ 2368c2ecf20Sopenharmony_ci set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags); 2378c2ecf20Sopenharmony_ci } 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci dm_table_set_type(ti->table, m->queue_mode); 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci /* 2428c2ecf20Sopenharmony_ci * Init fields that are only used when a scsi_dh is attached 2438c2ecf20Sopenharmony_ci * - must do this unconditionally (really doesn't hurt non-SCSI uses) 2448c2ecf20Sopenharmony_ci */ 2458c2ecf20Sopenharmony_ci set_bit(MPATHF_QUEUE_IO, &m->flags); 2468c2ecf20Sopenharmony_ci atomic_set(&m->pg_init_in_progress, 0); 2478c2ecf20Sopenharmony_ci atomic_set(&m->pg_init_count, 0); 2488c2ecf20Sopenharmony_ci m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; 2498c2ecf20Sopenharmony_ci init_waitqueue_head(&m->pg_init_wait); 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci return 0; 2528c2ecf20Sopenharmony_ci} 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_cistatic void free_multipath(struct multipath *m) 2558c2ecf20Sopenharmony_ci{ 2568c2ecf20Sopenharmony_ci struct priority_group *pg, *tmp; 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { 2598c2ecf20Sopenharmony_ci list_del(&pg->list); 2608c2ecf20Sopenharmony_ci free_priority_group(pg, m->ti); 2618c2ecf20Sopenharmony_ci } 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci kfree(m->hw_handler_name); 2648c2ecf20Sopenharmony_ci kfree(m->hw_handler_params); 2658c2ecf20Sopenharmony_ci mutex_destroy(&m->work_mutex); 2668c2ecf20Sopenharmony_ci kfree(m); 2678c2ecf20Sopenharmony_ci} 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_cistatic struct dm_mpath_io *get_mpio(union map_info *info) 2708c2ecf20Sopenharmony_ci{ 2718c2ecf20Sopenharmony_ci return info->ptr; 2728c2ecf20Sopenharmony_ci} 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_cistatic size_t multipath_per_bio_data_size(void) 2758c2ecf20Sopenharmony_ci{ 2768c2ecf20Sopenharmony_ci return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details); 2778c2ecf20Sopenharmony_ci} 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_cistatic struct dm_mpath_io *get_mpio_from_bio(struct bio *bio) 2808c2ecf20Sopenharmony_ci{ 2818c2ecf20Sopenharmony_ci return dm_per_bio_data(bio, multipath_per_bio_data_size()); 2828c2ecf20Sopenharmony_ci} 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_cistatic struct dm_bio_details *get_bio_details_from_mpio(struct dm_mpath_io *mpio) 2858c2ecf20Sopenharmony_ci{ 2868c2ecf20Sopenharmony_ci /* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */ 2878c2ecf20Sopenharmony_ci void *bio_details = mpio + 1; 2888c2ecf20Sopenharmony_ci return bio_details; 2898c2ecf20Sopenharmony_ci} 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_cistatic void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p) 2928c2ecf20Sopenharmony_ci{ 2938c2ecf20Sopenharmony_ci struct dm_mpath_io *mpio = get_mpio_from_bio(bio); 2948c2ecf20Sopenharmony_ci struct dm_bio_details *bio_details = get_bio_details_from_mpio(mpio); 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci mpio->nr_bytes = bio->bi_iter.bi_size; 2978c2ecf20Sopenharmony_ci mpio->pgpath = NULL; 2988c2ecf20Sopenharmony_ci *mpio_p = mpio; 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci dm_bio_record(bio_details, bio); 3018c2ecf20Sopenharmony_ci} 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci/*----------------------------------------------- 3048c2ecf20Sopenharmony_ci * Path selection 3058c2ecf20Sopenharmony_ci *-----------------------------------------------*/ 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_cistatic int __pg_init_all_paths(struct multipath *m) 3088c2ecf20Sopenharmony_ci{ 3098c2ecf20Sopenharmony_ci struct pgpath *pgpath; 3108c2ecf20Sopenharmony_ci unsigned long pg_init_delay = 0; 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci lockdep_assert_held(&m->lock); 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci if (atomic_read(&m->pg_init_in_progress) || test_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) 3158c2ecf20Sopenharmony_ci return 0; 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci atomic_inc(&m->pg_init_count); 3188c2ecf20Sopenharmony_ci clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags); 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci /* Check here to reset pg_init_required */ 3218c2ecf20Sopenharmony_ci if (!m->current_pg) 3228c2ecf20Sopenharmony_ci return 0; 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci if (test_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags)) 3258c2ecf20Sopenharmony_ci pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ? 3268c2ecf20Sopenharmony_ci m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS); 3278c2ecf20Sopenharmony_ci list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { 3288c2ecf20Sopenharmony_ci /* Skip failed paths */ 3298c2ecf20Sopenharmony_ci if (!pgpath->is_active) 3308c2ecf20Sopenharmony_ci continue; 3318c2ecf20Sopenharmony_ci if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path, 3328c2ecf20Sopenharmony_ci pg_init_delay)) 3338c2ecf20Sopenharmony_ci atomic_inc(&m->pg_init_in_progress); 3348c2ecf20Sopenharmony_ci } 3358c2ecf20Sopenharmony_ci return atomic_read(&m->pg_init_in_progress); 3368c2ecf20Sopenharmony_ci} 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_cistatic int pg_init_all_paths(struct multipath *m) 3398c2ecf20Sopenharmony_ci{ 3408c2ecf20Sopenharmony_ci int ret; 3418c2ecf20Sopenharmony_ci unsigned long flags; 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 3448c2ecf20Sopenharmony_ci ret = __pg_init_all_paths(m); 3458c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 3468c2ecf20Sopenharmony_ci 3478c2ecf20Sopenharmony_ci return ret; 3488c2ecf20Sopenharmony_ci} 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_cistatic void __switch_pg(struct multipath *m, struct priority_group *pg) 3518c2ecf20Sopenharmony_ci{ 3528c2ecf20Sopenharmony_ci lockdep_assert_held(&m->lock); 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci m->current_pg = pg; 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci /* Must we initialise the PG first, and queue I/O till it's ready? */ 3578c2ecf20Sopenharmony_ci if (m->hw_handler_name) { 3588c2ecf20Sopenharmony_ci set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags); 3598c2ecf20Sopenharmony_ci set_bit(MPATHF_QUEUE_IO, &m->flags); 3608c2ecf20Sopenharmony_ci } else { 3618c2ecf20Sopenharmony_ci clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags); 3628c2ecf20Sopenharmony_ci clear_bit(MPATHF_QUEUE_IO, &m->flags); 3638c2ecf20Sopenharmony_ci } 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci atomic_set(&m->pg_init_count, 0); 3668c2ecf20Sopenharmony_ci} 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_cistatic struct pgpath *choose_path_in_pg(struct multipath *m, 3698c2ecf20Sopenharmony_ci struct priority_group *pg, 3708c2ecf20Sopenharmony_ci size_t nr_bytes) 3718c2ecf20Sopenharmony_ci{ 3728c2ecf20Sopenharmony_ci unsigned long flags; 3738c2ecf20Sopenharmony_ci struct dm_path *path; 3748c2ecf20Sopenharmony_ci struct pgpath *pgpath; 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci path = pg->ps.type->select_path(&pg->ps, nr_bytes); 3778c2ecf20Sopenharmony_ci if (!path) 3788c2ecf20Sopenharmony_ci return ERR_PTR(-ENXIO); 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci pgpath = path_to_pgpath(path); 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_ci if (unlikely(READ_ONCE(m->current_pg) != pg)) { 3838c2ecf20Sopenharmony_ci /* Only update current_pgpath if pg changed */ 3848c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 3858c2ecf20Sopenharmony_ci m->current_pgpath = pgpath; 3868c2ecf20Sopenharmony_ci __switch_pg(m, pg); 3878c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 3888c2ecf20Sopenharmony_ci } 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci return pgpath; 3918c2ecf20Sopenharmony_ci} 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_cistatic struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) 3948c2ecf20Sopenharmony_ci{ 3958c2ecf20Sopenharmony_ci unsigned long flags; 3968c2ecf20Sopenharmony_ci struct priority_group *pg; 3978c2ecf20Sopenharmony_ci struct pgpath *pgpath; 3988c2ecf20Sopenharmony_ci unsigned bypassed = 1; 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci if (!atomic_read(&m->nr_valid_paths)) { 4018c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 4028c2ecf20Sopenharmony_ci clear_bit(MPATHF_QUEUE_IO, &m->flags); 4038c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 4048c2ecf20Sopenharmony_ci goto failed; 4058c2ecf20Sopenharmony_ci } 4068c2ecf20Sopenharmony_ci 4078c2ecf20Sopenharmony_ci /* Were we instructed to switch PG? */ 4088c2ecf20Sopenharmony_ci if (READ_ONCE(m->next_pg)) { 4098c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 4108c2ecf20Sopenharmony_ci pg = m->next_pg; 4118c2ecf20Sopenharmony_ci if (!pg) { 4128c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 4138c2ecf20Sopenharmony_ci goto check_current_pg; 4148c2ecf20Sopenharmony_ci } 4158c2ecf20Sopenharmony_ci m->next_pg = NULL; 4168c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 4178c2ecf20Sopenharmony_ci pgpath = choose_path_in_pg(m, pg, nr_bytes); 4188c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(pgpath)) 4198c2ecf20Sopenharmony_ci return pgpath; 4208c2ecf20Sopenharmony_ci } 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci /* Don't change PG until it has no remaining paths */ 4238c2ecf20Sopenharmony_cicheck_current_pg: 4248c2ecf20Sopenharmony_ci pg = READ_ONCE(m->current_pg); 4258c2ecf20Sopenharmony_ci if (pg) { 4268c2ecf20Sopenharmony_ci pgpath = choose_path_in_pg(m, pg, nr_bytes); 4278c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(pgpath)) 4288c2ecf20Sopenharmony_ci return pgpath; 4298c2ecf20Sopenharmony_ci } 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci /* 4328c2ecf20Sopenharmony_ci * Loop through priority groups until we find a valid path. 4338c2ecf20Sopenharmony_ci * First time we skip PGs marked 'bypassed'. 4348c2ecf20Sopenharmony_ci * Second time we only try the ones we skipped, but set 4358c2ecf20Sopenharmony_ci * pg_init_delay_retry so we do not hammer controllers. 4368c2ecf20Sopenharmony_ci */ 4378c2ecf20Sopenharmony_ci do { 4388c2ecf20Sopenharmony_ci list_for_each_entry(pg, &m->priority_groups, list) { 4398c2ecf20Sopenharmony_ci if (pg->bypassed == !!bypassed) 4408c2ecf20Sopenharmony_ci continue; 4418c2ecf20Sopenharmony_ci pgpath = choose_path_in_pg(m, pg, nr_bytes); 4428c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(pgpath)) { 4438c2ecf20Sopenharmony_ci if (!bypassed) { 4448c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 4458c2ecf20Sopenharmony_ci set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags); 4468c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 4478c2ecf20Sopenharmony_ci } 4488c2ecf20Sopenharmony_ci return pgpath; 4498c2ecf20Sopenharmony_ci } 4508c2ecf20Sopenharmony_ci } 4518c2ecf20Sopenharmony_ci } while (bypassed--); 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_cifailed: 4548c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 4558c2ecf20Sopenharmony_ci m->current_pgpath = NULL; 4568c2ecf20Sopenharmony_ci m->current_pg = NULL; 4578c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci return NULL; 4608c2ecf20Sopenharmony_ci} 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci/* 4638c2ecf20Sopenharmony_ci * dm_report_EIO() is a macro instead of a function to make pr_debug_ratelimited() 4648c2ecf20Sopenharmony_ci * report the function name and line number of the function from which 4658c2ecf20Sopenharmony_ci * it has been invoked. 4668c2ecf20Sopenharmony_ci */ 4678c2ecf20Sopenharmony_ci#define dm_report_EIO(m) \ 4688c2ecf20Sopenharmony_cido { \ 4698c2ecf20Sopenharmony_ci DMDEBUG_LIMIT("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d", \ 4708c2ecf20Sopenharmony_ci dm_table_device_name((m)->ti->table), \ 4718c2ecf20Sopenharmony_ci test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \ 4728c2ecf20Sopenharmony_ci test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \ 4738c2ecf20Sopenharmony_ci dm_noflush_suspending((m)->ti)); \ 4748c2ecf20Sopenharmony_ci} while (0) 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci/* 4778c2ecf20Sopenharmony_ci * Check whether bios must be queued in the device-mapper core rather 4788c2ecf20Sopenharmony_ci * than here in the target. 4798c2ecf20Sopenharmony_ci */ 4808c2ecf20Sopenharmony_cistatic bool __must_push_back(struct multipath *m) 4818c2ecf20Sopenharmony_ci{ 4828c2ecf20Sopenharmony_ci return dm_noflush_suspending(m->ti); 4838c2ecf20Sopenharmony_ci} 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_cistatic bool must_push_back_rq(struct multipath *m) 4868c2ecf20Sopenharmony_ci{ 4878c2ecf20Sopenharmony_ci unsigned long flags; 4888c2ecf20Sopenharmony_ci bool ret; 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 4918c2ecf20Sopenharmony_ci ret = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || __must_push_back(m)); 4928c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci return ret; 4958c2ecf20Sopenharmony_ci} 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci/* 4988c2ecf20Sopenharmony_ci * Map cloned requests (request-based multipath) 4998c2ecf20Sopenharmony_ci */ 5008c2ecf20Sopenharmony_cistatic int multipath_clone_and_map(struct dm_target *ti, struct request *rq, 5018c2ecf20Sopenharmony_ci union map_info *map_context, 5028c2ecf20Sopenharmony_ci struct request **__clone) 5038c2ecf20Sopenharmony_ci{ 5048c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 5058c2ecf20Sopenharmony_ci size_t nr_bytes = blk_rq_bytes(rq); 5068c2ecf20Sopenharmony_ci struct pgpath *pgpath; 5078c2ecf20Sopenharmony_ci struct block_device *bdev; 5088c2ecf20Sopenharmony_ci struct dm_mpath_io *mpio = get_mpio(map_context); 5098c2ecf20Sopenharmony_ci struct request_queue *q; 5108c2ecf20Sopenharmony_ci struct request *clone; 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_ci /* Do we need to select a new pgpath? */ 5138c2ecf20Sopenharmony_ci pgpath = READ_ONCE(m->current_pgpath); 5148c2ecf20Sopenharmony_ci if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) 5158c2ecf20Sopenharmony_ci pgpath = choose_pgpath(m, nr_bytes); 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci if (!pgpath) { 5188c2ecf20Sopenharmony_ci if (must_push_back_rq(m)) 5198c2ecf20Sopenharmony_ci return DM_MAPIO_DELAY_REQUEUE; 5208c2ecf20Sopenharmony_ci dm_report_EIO(m); /* Failed */ 5218c2ecf20Sopenharmony_ci return DM_MAPIO_KILL; 5228c2ecf20Sopenharmony_ci } else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) || 5238c2ecf20Sopenharmony_ci mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) { 5248c2ecf20Sopenharmony_ci pg_init_all_paths(m); 5258c2ecf20Sopenharmony_ci return DM_MAPIO_DELAY_REQUEUE; 5268c2ecf20Sopenharmony_ci } 5278c2ecf20Sopenharmony_ci 5288c2ecf20Sopenharmony_ci mpio->pgpath = pgpath; 5298c2ecf20Sopenharmony_ci mpio->nr_bytes = nr_bytes; 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci bdev = pgpath->path.dev->bdev; 5328c2ecf20Sopenharmony_ci q = bdev_get_queue(bdev); 5338c2ecf20Sopenharmony_ci clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, 5348c2ecf20Sopenharmony_ci BLK_MQ_REQ_NOWAIT); 5358c2ecf20Sopenharmony_ci if (IS_ERR(clone)) { 5368c2ecf20Sopenharmony_ci /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ 5378c2ecf20Sopenharmony_ci if (blk_queue_dying(q)) { 5388c2ecf20Sopenharmony_ci atomic_inc(&m->pg_init_in_progress); 5398c2ecf20Sopenharmony_ci activate_or_offline_path(pgpath); 5408c2ecf20Sopenharmony_ci return DM_MAPIO_DELAY_REQUEUE; 5418c2ecf20Sopenharmony_ci } 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ci /* 5448c2ecf20Sopenharmony_ci * blk-mq's SCHED_RESTART can cover this requeue, so we 5458c2ecf20Sopenharmony_ci * needn't deal with it by DELAY_REQUEUE. More importantly, 5468c2ecf20Sopenharmony_ci * we have to return DM_MAPIO_REQUEUE so that blk-mq can 5478c2ecf20Sopenharmony_ci * get the queue busy feedback (via BLK_STS_RESOURCE), 5488c2ecf20Sopenharmony_ci * otherwise I/O merging can suffer. 5498c2ecf20Sopenharmony_ci */ 5508c2ecf20Sopenharmony_ci return DM_MAPIO_REQUEUE; 5518c2ecf20Sopenharmony_ci } 5528c2ecf20Sopenharmony_ci clone->bio = clone->biotail = NULL; 5538c2ecf20Sopenharmony_ci clone->rq_disk = bdev->bd_disk; 5548c2ecf20Sopenharmony_ci clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; 5558c2ecf20Sopenharmony_ci *__clone = clone; 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci if (pgpath->pg->ps.type->start_io) 5588c2ecf20Sopenharmony_ci pgpath->pg->ps.type->start_io(&pgpath->pg->ps, 5598c2ecf20Sopenharmony_ci &pgpath->path, 5608c2ecf20Sopenharmony_ci nr_bytes); 5618c2ecf20Sopenharmony_ci return DM_MAPIO_REMAPPED; 5628c2ecf20Sopenharmony_ci} 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_cistatic void multipath_release_clone(struct request *clone, 5658c2ecf20Sopenharmony_ci union map_info *map_context) 5668c2ecf20Sopenharmony_ci{ 5678c2ecf20Sopenharmony_ci if (unlikely(map_context)) { 5688c2ecf20Sopenharmony_ci /* 5698c2ecf20Sopenharmony_ci * non-NULL map_context means caller is still map 5708c2ecf20Sopenharmony_ci * method; must undo multipath_clone_and_map() 5718c2ecf20Sopenharmony_ci */ 5728c2ecf20Sopenharmony_ci struct dm_mpath_io *mpio = get_mpio(map_context); 5738c2ecf20Sopenharmony_ci struct pgpath *pgpath = mpio->pgpath; 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ci if (pgpath && pgpath->pg->ps.type->end_io) 5768c2ecf20Sopenharmony_ci pgpath->pg->ps.type->end_io(&pgpath->pg->ps, 5778c2ecf20Sopenharmony_ci &pgpath->path, 5788c2ecf20Sopenharmony_ci mpio->nr_bytes, 5798c2ecf20Sopenharmony_ci clone->io_start_time_ns); 5808c2ecf20Sopenharmony_ci } 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci blk_put_request(clone); 5838c2ecf20Sopenharmony_ci} 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci/* 5868c2ecf20Sopenharmony_ci * Map cloned bios (bio-based multipath) 5878c2ecf20Sopenharmony_ci */ 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_cistatic void __multipath_queue_bio(struct multipath *m, struct bio *bio) 5908c2ecf20Sopenharmony_ci{ 5918c2ecf20Sopenharmony_ci /* Queue for the daemon to resubmit */ 5928c2ecf20Sopenharmony_ci bio_list_add(&m->queued_bios, bio); 5938c2ecf20Sopenharmony_ci if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) 5948c2ecf20Sopenharmony_ci queue_work(kmultipathd, &m->process_queued_bios); 5958c2ecf20Sopenharmony_ci} 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_cistatic void multipath_queue_bio(struct multipath *m, struct bio *bio) 5988c2ecf20Sopenharmony_ci{ 5998c2ecf20Sopenharmony_ci unsigned long flags; 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 6028c2ecf20Sopenharmony_ci __multipath_queue_bio(m, bio); 6038c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 6048c2ecf20Sopenharmony_ci} 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_cistatic struct pgpath *__map_bio(struct multipath *m, struct bio *bio) 6078c2ecf20Sopenharmony_ci{ 6088c2ecf20Sopenharmony_ci struct pgpath *pgpath; 6098c2ecf20Sopenharmony_ci unsigned long flags; 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci /* Do we need to select a new pgpath? */ 6128c2ecf20Sopenharmony_ci pgpath = READ_ONCE(m->current_pgpath); 6138c2ecf20Sopenharmony_ci if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) 6148c2ecf20Sopenharmony_ci pgpath = choose_pgpath(m, bio->bi_iter.bi_size); 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci if (!pgpath) { 6178c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 6188c2ecf20Sopenharmony_ci if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 6198c2ecf20Sopenharmony_ci __multipath_queue_bio(m, bio); 6208c2ecf20Sopenharmony_ci pgpath = ERR_PTR(-EAGAIN); 6218c2ecf20Sopenharmony_ci } 6228c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 6238c2ecf20Sopenharmony_ci 6248c2ecf20Sopenharmony_ci } else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) || 6258c2ecf20Sopenharmony_ci mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) { 6268c2ecf20Sopenharmony_ci multipath_queue_bio(m, bio); 6278c2ecf20Sopenharmony_ci pg_init_all_paths(m); 6288c2ecf20Sopenharmony_ci return ERR_PTR(-EAGAIN); 6298c2ecf20Sopenharmony_ci } 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci return pgpath; 6328c2ecf20Sopenharmony_ci} 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_cistatic int __multipath_map_bio(struct multipath *m, struct bio *bio, 6358c2ecf20Sopenharmony_ci struct dm_mpath_io *mpio) 6368c2ecf20Sopenharmony_ci{ 6378c2ecf20Sopenharmony_ci struct pgpath *pgpath = __map_bio(m, bio); 6388c2ecf20Sopenharmony_ci 6398c2ecf20Sopenharmony_ci if (IS_ERR(pgpath)) 6408c2ecf20Sopenharmony_ci return DM_MAPIO_SUBMITTED; 6418c2ecf20Sopenharmony_ci 6428c2ecf20Sopenharmony_ci if (!pgpath) { 6438c2ecf20Sopenharmony_ci if (__must_push_back(m)) 6448c2ecf20Sopenharmony_ci return DM_MAPIO_REQUEUE; 6458c2ecf20Sopenharmony_ci dm_report_EIO(m); 6468c2ecf20Sopenharmony_ci return DM_MAPIO_KILL; 6478c2ecf20Sopenharmony_ci } 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_ci mpio->pgpath = pgpath; 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci bio->bi_status = 0; 6528c2ecf20Sopenharmony_ci bio_set_dev(bio, pgpath->path.dev->bdev); 6538c2ecf20Sopenharmony_ci bio->bi_opf |= REQ_FAILFAST_TRANSPORT; 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_ci if (pgpath->pg->ps.type->start_io) 6568c2ecf20Sopenharmony_ci pgpath->pg->ps.type->start_io(&pgpath->pg->ps, 6578c2ecf20Sopenharmony_ci &pgpath->path, 6588c2ecf20Sopenharmony_ci mpio->nr_bytes); 6598c2ecf20Sopenharmony_ci return DM_MAPIO_REMAPPED; 6608c2ecf20Sopenharmony_ci} 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_cistatic int multipath_map_bio(struct dm_target *ti, struct bio *bio) 6638c2ecf20Sopenharmony_ci{ 6648c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 6658c2ecf20Sopenharmony_ci struct dm_mpath_io *mpio = NULL; 6668c2ecf20Sopenharmony_ci 6678c2ecf20Sopenharmony_ci multipath_init_per_bio_data(bio, &mpio); 6688c2ecf20Sopenharmony_ci return __multipath_map_bio(m, bio, mpio); 6698c2ecf20Sopenharmony_ci} 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_cistatic void process_queued_io_list(struct multipath *m) 6728c2ecf20Sopenharmony_ci{ 6738c2ecf20Sopenharmony_ci if (m->queue_mode == DM_TYPE_REQUEST_BASED) 6748c2ecf20Sopenharmony_ci dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table)); 6758c2ecf20Sopenharmony_ci else if (m->queue_mode == DM_TYPE_BIO_BASED) 6768c2ecf20Sopenharmony_ci queue_work(kmultipathd, &m->process_queued_bios); 6778c2ecf20Sopenharmony_ci} 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_cistatic void process_queued_bios(struct work_struct *work) 6808c2ecf20Sopenharmony_ci{ 6818c2ecf20Sopenharmony_ci int r; 6828c2ecf20Sopenharmony_ci unsigned long flags; 6838c2ecf20Sopenharmony_ci struct bio *bio; 6848c2ecf20Sopenharmony_ci struct bio_list bios; 6858c2ecf20Sopenharmony_ci struct blk_plug plug; 6868c2ecf20Sopenharmony_ci struct multipath *m = 6878c2ecf20Sopenharmony_ci container_of(work, struct multipath, process_queued_bios); 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci bio_list_init(&bios); 6908c2ecf20Sopenharmony_ci 6918c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ci if (bio_list_empty(&m->queued_bios)) { 6948c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 6958c2ecf20Sopenharmony_ci return; 6968c2ecf20Sopenharmony_ci } 6978c2ecf20Sopenharmony_ci 6988c2ecf20Sopenharmony_ci bio_list_merge(&bios, &m->queued_bios); 6998c2ecf20Sopenharmony_ci bio_list_init(&m->queued_bios); 7008c2ecf20Sopenharmony_ci 7018c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci blk_start_plug(&plug); 7048c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&bios))) { 7058c2ecf20Sopenharmony_ci struct dm_mpath_io *mpio = get_mpio_from_bio(bio); 7068c2ecf20Sopenharmony_ci dm_bio_restore(get_bio_details_from_mpio(mpio), bio); 7078c2ecf20Sopenharmony_ci r = __multipath_map_bio(m, bio, mpio); 7088c2ecf20Sopenharmony_ci switch (r) { 7098c2ecf20Sopenharmony_ci case DM_MAPIO_KILL: 7108c2ecf20Sopenharmony_ci bio->bi_status = BLK_STS_IOERR; 7118c2ecf20Sopenharmony_ci bio_endio(bio); 7128c2ecf20Sopenharmony_ci break; 7138c2ecf20Sopenharmony_ci case DM_MAPIO_REQUEUE: 7148c2ecf20Sopenharmony_ci bio->bi_status = BLK_STS_DM_REQUEUE; 7158c2ecf20Sopenharmony_ci bio_endio(bio); 7168c2ecf20Sopenharmony_ci break; 7178c2ecf20Sopenharmony_ci case DM_MAPIO_REMAPPED: 7188c2ecf20Sopenharmony_ci submit_bio_noacct(bio); 7198c2ecf20Sopenharmony_ci break; 7208c2ecf20Sopenharmony_ci case DM_MAPIO_SUBMITTED: 7218c2ecf20Sopenharmony_ci break; 7228c2ecf20Sopenharmony_ci default: 7238c2ecf20Sopenharmony_ci WARN_ONCE(true, "__multipath_map_bio() returned %d\n", r); 7248c2ecf20Sopenharmony_ci } 7258c2ecf20Sopenharmony_ci } 7268c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 7278c2ecf20Sopenharmony_ci} 7288c2ecf20Sopenharmony_ci 7298c2ecf20Sopenharmony_ci/* 7308c2ecf20Sopenharmony_ci * If we run out of usable paths, should we queue I/O or error it? 7318c2ecf20Sopenharmony_ci */ 7328c2ecf20Sopenharmony_cistatic int queue_if_no_path(struct multipath *m, bool queue_if_no_path, 7338c2ecf20Sopenharmony_ci bool save_old_value, const char *caller) 7348c2ecf20Sopenharmony_ci{ 7358c2ecf20Sopenharmony_ci unsigned long flags; 7368c2ecf20Sopenharmony_ci bool queue_if_no_path_bit, saved_queue_if_no_path_bit; 7378c2ecf20Sopenharmony_ci const char *dm_dev_name = dm_table_device_name(m->ti->table); 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_ci DMDEBUG("%s: %s caller=%s queue_if_no_path=%d save_old_value=%d", 7408c2ecf20Sopenharmony_ci dm_dev_name, __func__, caller, queue_if_no_path, save_old_value); 7418c2ecf20Sopenharmony_ci 7428c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_ci queue_if_no_path_bit = test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); 7458c2ecf20Sopenharmony_ci saved_queue_if_no_path_bit = test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci if (save_old_value) { 7488c2ecf20Sopenharmony_ci if (unlikely(!queue_if_no_path_bit && saved_queue_if_no_path_bit)) { 7498c2ecf20Sopenharmony_ci DMERR("%s: QIFNP disabled but saved as enabled, saving again loses state, not saving!", 7508c2ecf20Sopenharmony_ci dm_dev_name); 7518c2ecf20Sopenharmony_ci } else 7528c2ecf20Sopenharmony_ci assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path_bit); 7538c2ecf20Sopenharmony_ci } else if (!queue_if_no_path && saved_queue_if_no_path_bit) { 7548c2ecf20Sopenharmony_ci /* due to "fail_if_no_path" message, need to honor it. */ 7558c2ecf20Sopenharmony_ci clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); 7568c2ecf20Sopenharmony_ci } 7578c2ecf20Sopenharmony_ci assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path); 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_ci DMDEBUG("%s: after %s changes; QIFNP = %d; SQIFNP = %d; DNFS = %d", 7608c2ecf20Sopenharmony_ci dm_dev_name, __func__, 7618c2ecf20Sopenharmony_ci test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags), 7628c2ecf20Sopenharmony_ci test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags), 7638c2ecf20Sopenharmony_ci dm_noflush_suspending(m->ti)); 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci if (!queue_if_no_path) { 7688c2ecf20Sopenharmony_ci dm_table_run_md_queue_async(m->ti->table); 7698c2ecf20Sopenharmony_ci process_queued_io_list(m); 7708c2ecf20Sopenharmony_ci } 7718c2ecf20Sopenharmony_ci 7728c2ecf20Sopenharmony_ci return 0; 7738c2ecf20Sopenharmony_ci} 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_ci/* 7768c2ecf20Sopenharmony_ci * If the queue_if_no_path timeout fires, turn off queue_if_no_path and 7778c2ecf20Sopenharmony_ci * process any queued I/O. 7788c2ecf20Sopenharmony_ci */ 7798c2ecf20Sopenharmony_cistatic void queue_if_no_path_timeout_work(struct timer_list *t) 7808c2ecf20Sopenharmony_ci{ 7818c2ecf20Sopenharmony_ci struct multipath *m = from_timer(m, t, nopath_timer); 7828c2ecf20Sopenharmony_ci 7838c2ecf20Sopenharmony_ci DMWARN("queue_if_no_path timeout on %s, failing queued IO", 7848c2ecf20Sopenharmony_ci dm_table_device_name(m->ti->table)); 7858c2ecf20Sopenharmony_ci queue_if_no_path(m, false, false, __func__); 7868c2ecf20Sopenharmony_ci} 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci/* 7898c2ecf20Sopenharmony_ci * Enable the queue_if_no_path timeout if necessary. 7908c2ecf20Sopenharmony_ci * Called with m->lock held. 7918c2ecf20Sopenharmony_ci */ 7928c2ecf20Sopenharmony_cistatic void enable_nopath_timeout(struct multipath *m) 7938c2ecf20Sopenharmony_ci{ 7948c2ecf20Sopenharmony_ci unsigned long queue_if_no_path_timeout = 7958c2ecf20Sopenharmony_ci READ_ONCE(queue_if_no_path_timeout_secs) * HZ; 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_ci lockdep_assert_held(&m->lock); 7988c2ecf20Sopenharmony_ci 7998c2ecf20Sopenharmony_ci if (queue_if_no_path_timeout > 0 && 8008c2ecf20Sopenharmony_ci atomic_read(&m->nr_valid_paths) == 0 && 8018c2ecf20Sopenharmony_ci test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 8028c2ecf20Sopenharmony_ci mod_timer(&m->nopath_timer, 8038c2ecf20Sopenharmony_ci jiffies + queue_if_no_path_timeout); 8048c2ecf20Sopenharmony_ci } 8058c2ecf20Sopenharmony_ci} 8068c2ecf20Sopenharmony_ci 8078c2ecf20Sopenharmony_cistatic void disable_nopath_timeout(struct multipath *m) 8088c2ecf20Sopenharmony_ci{ 8098c2ecf20Sopenharmony_ci del_timer_sync(&m->nopath_timer); 8108c2ecf20Sopenharmony_ci} 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_ci/* 8138c2ecf20Sopenharmony_ci * An event is triggered whenever a path is taken out of use. 8148c2ecf20Sopenharmony_ci * Includes path failure and PG bypass. 8158c2ecf20Sopenharmony_ci */ 8168c2ecf20Sopenharmony_cistatic void trigger_event(struct work_struct *work) 8178c2ecf20Sopenharmony_ci{ 8188c2ecf20Sopenharmony_ci struct multipath *m = 8198c2ecf20Sopenharmony_ci container_of(work, struct multipath, trigger_event); 8208c2ecf20Sopenharmony_ci 8218c2ecf20Sopenharmony_ci dm_table_event(m->ti->table); 8228c2ecf20Sopenharmony_ci} 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci/*----------------------------------------------------------------- 8258c2ecf20Sopenharmony_ci * Constructor/argument parsing: 8268c2ecf20Sopenharmony_ci * <#multipath feature args> [<arg>]* 8278c2ecf20Sopenharmony_ci * <#hw_handler args> [hw_handler [<arg>]*] 8288c2ecf20Sopenharmony_ci * <#priority groups> 8298c2ecf20Sopenharmony_ci * <initial priority group> 8308c2ecf20Sopenharmony_ci * [<selector> <#selector args> [<arg>]* 8318c2ecf20Sopenharmony_ci * <#paths> <#per-path selector args> 8328c2ecf20Sopenharmony_ci * [<path> [<arg>]* ]+ ]+ 8338c2ecf20Sopenharmony_ci *---------------------------------------------------------------*/ 8348c2ecf20Sopenharmony_cistatic int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, 8358c2ecf20Sopenharmony_ci struct dm_target *ti) 8368c2ecf20Sopenharmony_ci{ 8378c2ecf20Sopenharmony_ci int r; 8388c2ecf20Sopenharmony_ci struct path_selector_type *pst; 8398c2ecf20Sopenharmony_ci unsigned ps_argc; 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci static const struct dm_arg _args[] = { 8428c2ecf20Sopenharmony_ci {0, 1024, "invalid number of path selector args"}, 8438c2ecf20Sopenharmony_ci }; 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_ci pst = dm_get_path_selector(dm_shift_arg(as)); 8468c2ecf20Sopenharmony_ci if (!pst) { 8478c2ecf20Sopenharmony_ci ti->error = "unknown path selector type"; 8488c2ecf20Sopenharmony_ci return -EINVAL; 8498c2ecf20Sopenharmony_ci } 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci r = dm_read_arg_group(_args, as, &ps_argc, &ti->error); 8528c2ecf20Sopenharmony_ci if (r) { 8538c2ecf20Sopenharmony_ci dm_put_path_selector(pst); 8548c2ecf20Sopenharmony_ci return -EINVAL; 8558c2ecf20Sopenharmony_ci } 8568c2ecf20Sopenharmony_ci 8578c2ecf20Sopenharmony_ci r = pst->create(&pg->ps, ps_argc, as->argv); 8588c2ecf20Sopenharmony_ci if (r) { 8598c2ecf20Sopenharmony_ci dm_put_path_selector(pst); 8608c2ecf20Sopenharmony_ci ti->error = "path selector constructor failed"; 8618c2ecf20Sopenharmony_ci return r; 8628c2ecf20Sopenharmony_ci } 8638c2ecf20Sopenharmony_ci 8648c2ecf20Sopenharmony_ci pg->ps.type = pst; 8658c2ecf20Sopenharmony_ci dm_consume_args(as, ps_argc); 8668c2ecf20Sopenharmony_ci 8678c2ecf20Sopenharmony_ci return 0; 8688c2ecf20Sopenharmony_ci} 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_cistatic int setup_scsi_dh(struct block_device *bdev, struct multipath *m, 8718c2ecf20Sopenharmony_ci const char **attached_handler_name, char **error) 8728c2ecf20Sopenharmony_ci{ 8738c2ecf20Sopenharmony_ci struct request_queue *q = bdev_get_queue(bdev); 8748c2ecf20Sopenharmony_ci int r; 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci if (mpath_double_check_test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, m)) { 8778c2ecf20Sopenharmony_ciretain: 8788c2ecf20Sopenharmony_ci if (*attached_handler_name) { 8798c2ecf20Sopenharmony_ci /* 8808c2ecf20Sopenharmony_ci * Clear any hw_handler_params associated with a 8818c2ecf20Sopenharmony_ci * handler that isn't already attached. 8828c2ecf20Sopenharmony_ci */ 8838c2ecf20Sopenharmony_ci if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) { 8848c2ecf20Sopenharmony_ci kfree(m->hw_handler_params); 8858c2ecf20Sopenharmony_ci m->hw_handler_params = NULL; 8868c2ecf20Sopenharmony_ci } 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci /* 8898c2ecf20Sopenharmony_ci * Reset hw_handler_name to match the attached handler 8908c2ecf20Sopenharmony_ci * 8918c2ecf20Sopenharmony_ci * NB. This modifies the table line to show the actual 8928c2ecf20Sopenharmony_ci * handler instead of the original table passed in. 8938c2ecf20Sopenharmony_ci */ 8948c2ecf20Sopenharmony_ci kfree(m->hw_handler_name); 8958c2ecf20Sopenharmony_ci m->hw_handler_name = *attached_handler_name; 8968c2ecf20Sopenharmony_ci *attached_handler_name = NULL; 8978c2ecf20Sopenharmony_ci } 8988c2ecf20Sopenharmony_ci } 8998c2ecf20Sopenharmony_ci 9008c2ecf20Sopenharmony_ci if (m->hw_handler_name) { 9018c2ecf20Sopenharmony_ci r = scsi_dh_attach(q, m->hw_handler_name); 9028c2ecf20Sopenharmony_ci if (r == -EBUSY) { 9038c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci printk(KERN_INFO "dm-mpath: retaining handler on device %s\n", 9068c2ecf20Sopenharmony_ci bdevname(bdev, b)); 9078c2ecf20Sopenharmony_ci goto retain; 9088c2ecf20Sopenharmony_ci } 9098c2ecf20Sopenharmony_ci if (r < 0) { 9108c2ecf20Sopenharmony_ci *error = "error attaching hardware handler"; 9118c2ecf20Sopenharmony_ci return r; 9128c2ecf20Sopenharmony_ci } 9138c2ecf20Sopenharmony_ci 9148c2ecf20Sopenharmony_ci if (m->hw_handler_params) { 9158c2ecf20Sopenharmony_ci r = scsi_dh_set_params(q, m->hw_handler_params); 9168c2ecf20Sopenharmony_ci if (r < 0) { 9178c2ecf20Sopenharmony_ci *error = "unable to set hardware handler parameters"; 9188c2ecf20Sopenharmony_ci return r; 9198c2ecf20Sopenharmony_ci } 9208c2ecf20Sopenharmony_ci } 9218c2ecf20Sopenharmony_ci } 9228c2ecf20Sopenharmony_ci 9238c2ecf20Sopenharmony_ci return 0; 9248c2ecf20Sopenharmony_ci} 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_cistatic struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, 9278c2ecf20Sopenharmony_ci struct dm_target *ti) 9288c2ecf20Sopenharmony_ci{ 9298c2ecf20Sopenharmony_ci int r; 9308c2ecf20Sopenharmony_ci struct pgpath *p; 9318c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 9328c2ecf20Sopenharmony_ci struct request_queue *q; 9338c2ecf20Sopenharmony_ci const char *attached_handler_name = NULL; 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci /* we need at least a path arg */ 9368c2ecf20Sopenharmony_ci if (as->argc < 1) { 9378c2ecf20Sopenharmony_ci ti->error = "no device given"; 9388c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 9398c2ecf20Sopenharmony_ci } 9408c2ecf20Sopenharmony_ci 9418c2ecf20Sopenharmony_ci p = alloc_pgpath(); 9428c2ecf20Sopenharmony_ci if (!p) 9438c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 9448c2ecf20Sopenharmony_ci 9458c2ecf20Sopenharmony_ci r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), 9468c2ecf20Sopenharmony_ci &p->path.dev); 9478c2ecf20Sopenharmony_ci if (r) { 9488c2ecf20Sopenharmony_ci ti->error = "error getting device"; 9498c2ecf20Sopenharmony_ci goto bad; 9508c2ecf20Sopenharmony_ci } 9518c2ecf20Sopenharmony_ci 9528c2ecf20Sopenharmony_ci q = bdev_get_queue(p->path.dev->bdev); 9538c2ecf20Sopenharmony_ci attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); 9548c2ecf20Sopenharmony_ci if (attached_handler_name || m->hw_handler_name) { 9558c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&p->activate_path, activate_path_work); 9568c2ecf20Sopenharmony_ci r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error); 9578c2ecf20Sopenharmony_ci kfree(attached_handler_name); 9588c2ecf20Sopenharmony_ci if (r) { 9598c2ecf20Sopenharmony_ci dm_put_device(ti, p->path.dev); 9608c2ecf20Sopenharmony_ci goto bad; 9618c2ecf20Sopenharmony_ci } 9628c2ecf20Sopenharmony_ci } 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); 9658c2ecf20Sopenharmony_ci if (r) { 9668c2ecf20Sopenharmony_ci dm_put_device(ti, p->path.dev); 9678c2ecf20Sopenharmony_ci goto bad; 9688c2ecf20Sopenharmony_ci } 9698c2ecf20Sopenharmony_ci 9708c2ecf20Sopenharmony_ci return p; 9718c2ecf20Sopenharmony_ci bad: 9728c2ecf20Sopenharmony_ci free_pgpath(p); 9738c2ecf20Sopenharmony_ci return ERR_PTR(r); 9748c2ecf20Sopenharmony_ci} 9758c2ecf20Sopenharmony_ci 9768c2ecf20Sopenharmony_cistatic struct priority_group *parse_priority_group(struct dm_arg_set *as, 9778c2ecf20Sopenharmony_ci struct multipath *m) 9788c2ecf20Sopenharmony_ci{ 9798c2ecf20Sopenharmony_ci static const struct dm_arg _args[] = { 9808c2ecf20Sopenharmony_ci {1, 1024, "invalid number of paths"}, 9818c2ecf20Sopenharmony_ci {0, 1024, "invalid number of selector args"} 9828c2ecf20Sopenharmony_ci }; 9838c2ecf20Sopenharmony_ci 9848c2ecf20Sopenharmony_ci int r; 9858c2ecf20Sopenharmony_ci unsigned i, nr_selector_args, nr_args; 9868c2ecf20Sopenharmony_ci struct priority_group *pg; 9878c2ecf20Sopenharmony_ci struct dm_target *ti = m->ti; 9888c2ecf20Sopenharmony_ci 9898c2ecf20Sopenharmony_ci if (as->argc < 2) { 9908c2ecf20Sopenharmony_ci as->argc = 0; 9918c2ecf20Sopenharmony_ci ti->error = "not enough priority group arguments"; 9928c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 9938c2ecf20Sopenharmony_ci } 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_ci pg = alloc_priority_group(); 9968c2ecf20Sopenharmony_ci if (!pg) { 9978c2ecf20Sopenharmony_ci ti->error = "couldn't allocate priority group"; 9988c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 9998c2ecf20Sopenharmony_ci } 10008c2ecf20Sopenharmony_ci pg->m = m; 10018c2ecf20Sopenharmony_ci 10028c2ecf20Sopenharmony_ci r = parse_path_selector(as, pg, ti); 10038c2ecf20Sopenharmony_ci if (r) 10048c2ecf20Sopenharmony_ci goto bad; 10058c2ecf20Sopenharmony_ci 10068c2ecf20Sopenharmony_ci /* 10078c2ecf20Sopenharmony_ci * read the paths 10088c2ecf20Sopenharmony_ci */ 10098c2ecf20Sopenharmony_ci r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error); 10108c2ecf20Sopenharmony_ci if (r) 10118c2ecf20Sopenharmony_ci goto bad; 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_ci r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error); 10148c2ecf20Sopenharmony_ci if (r) 10158c2ecf20Sopenharmony_ci goto bad; 10168c2ecf20Sopenharmony_ci 10178c2ecf20Sopenharmony_ci nr_args = 1 + nr_selector_args; 10188c2ecf20Sopenharmony_ci for (i = 0; i < pg->nr_pgpaths; i++) { 10198c2ecf20Sopenharmony_ci struct pgpath *pgpath; 10208c2ecf20Sopenharmony_ci struct dm_arg_set path_args; 10218c2ecf20Sopenharmony_ci 10228c2ecf20Sopenharmony_ci if (as->argc < nr_args) { 10238c2ecf20Sopenharmony_ci ti->error = "not enough path parameters"; 10248c2ecf20Sopenharmony_ci r = -EINVAL; 10258c2ecf20Sopenharmony_ci goto bad; 10268c2ecf20Sopenharmony_ci } 10278c2ecf20Sopenharmony_ci 10288c2ecf20Sopenharmony_ci path_args.argc = nr_args; 10298c2ecf20Sopenharmony_ci path_args.argv = as->argv; 10308c2ecf20Sopenharmony_ci 10318c2ecf20Sopenharmony_ci pgpath = parse_path(&path_args, &pg->ps, ti); 10328c2ecf20Sopenharmony_ci if (IS_ERR(pgpath)) { 10338c2ecf20Sopenharmony_ci r = PTR_ERR(pgpath); 10348c2ecf20Sopenharmony_ci goto bad; 10358c2ecf20Sopenharmony_ci } 10368c2ecf20Sopenharmony_ci 10378c2ecf20Sopenharmony_ci pgpath->pg = pg; 10388c2ecf20Sopenharmony_ci list_add_tail(&pgpath->list, &pg->pgpaths); 10398c2ecf20Sopenharmony_ci dm_consume_args(as, nr_args); 10408c2ecf20Sopenharmony_ci } 10418c2ecf20Sopenharmony_ci 10428c2ecf20Sopenharmony_ci return pg; 10438c2ecf20Sopenharmony_ci 10448c2ecf20Sopenharmony_ci bad: 10458c2ecf20Sopenharmony_ci free_priority_group(pg, ti); 10468c2ecf20Sopenharmony_ci return ERR_PTR(r); 10478c2ecf20Sopenharmony_ci} 10488c2ecf20Sopenharmony_ci 10498c2ecf20Sopenharmony_cistatic int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) 10508c2ecf20Sopenharmony_ci{ 10518c2ecf20Sopenharmony_ci unsigned hw_argc; 10528c2ecf20Sopenharmony_ci int ret; 10538c2ecf20Sopenharmony_ci struct dm_target *ti = m->ti; 10548c2ecf20Sopenharmony_ci 10558c2ecf20Sopenharmony_ci static const struct dm_arg _args[] = { 10568c2ecf20Sopenharmony_ci {0, 1024, "invalid number of hardware handler args"}, 10578c2ecf20Sopenharmony_ci }; 10588c2ecf20Sopenharmony_ci 10598c2ecf20Sopenharmony_ci if (dm_read_arg_group(_args, as, &hw_argc, &ti->error)) 10608c2ecf20Sopenharmony_ci return -EINVAL; 10618c2ecf20Sopenharmony_ci 10628c2ecf20Sopenharmony_ci if (!hw_argc) 10638c2ecf20Sopenharmony_ci return 0; 10648c2ecf20Sopenharmony_ci 10658c2ecf20Sopenharmony_ci if (m->queue_mode == DM_TYPE_BIO_BASED) { 10668c2ecf20Sopenharmony_ci dm_consume_args(as, hw_argc); 10678c2ecf20Sopenharmony_ci DMERR("bio-based multipath doesn't allow hardware handler args"); 10688c2ecf20Sopenharmony_ci return 0; 10698c2ecf20Sopenharmony_ci } 10708c2ecf20Sopenharmony_ci 10718c2ecf20Sopenharmony_ci m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); 10728c2ecf20Sopenharmony_ci if (!m->hw_handler_name) 10738c2ecf20Sopenharmony_ci return -EINVAL; 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_ci if (hw_argc > 1) { 10768c2ecf20Sopenharmony_ci char *p; 10778c2ecf20Sopenharmony_ci int i, j, len = 4; 10788c2ecf20Sopenharmony_ci 10798c2ecf20Sopenharmony_ci for (i = 0; i <= hw_argc - 2; i++) 10808c2ecf20Sopenharmony_ci len += strlen(as->argv[i]) + 1; 10818c2ecf20Sopenharmony_ci p = m->hw_handler_params = kzalloc(len, GFP_KERNEL); 10828c2ecf20Sopenharmony_ci if (!p) { 10838c2ecf20Sopenharmony_ci ti->error = "memory allocation failed"; 10848c2ecf20Sopenharmony_ci ret = -ENOMEM; 10858c2ecf20Sopenharmony_ci goto fail; 10868c2ecf20Sopenharmony_ci } 10878c2ecf20Sopenharmony_ci j = sprintf(p, "%d", hw_argc - 1); 10888c2ecf20Sopenharmony_ci for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) 10898c2ecf20Sopenharmony_ci j = sprintf(p, "%s", as->argv[i]); 10908c2ecf20Sopenharmony_ci } 10918c2ecf20Sopenharmony_ci dm_consume_args(as, hw_argc - 1); 10928c2ecf20Sopenharmony_ci 10938c2ecf20Sopenharmony_ci return 0; 10948c2ecf20Sopenharmony_cifail: 10958c2ecf20Sopenharmony_ci kfree(m->hw_handler_name); 10968c2ecf20Sopenharmony_ci m->hw_handler_name = NULL; 10978c2ecf20Sopenharmony_ci return ret; 10988c2ecf20Sopenharmony_ci} 10998c2ecf20Sopenharmony_ci 11008c2ecf20Sopenharmony_cistatic int parse_features(struct dm_arg_set *as, struct multipath *m) 11018c2ecf20Sopenharmony_ci{ 11028c2ecf20Sopenharmony_ci int r; 11038c2ecf20Sopenharmony_ci unsigned argc; 11048c2ecf20Sopenharmony_ci struct dm_target *ti = m->ti; 11058c2ecf20Sopenharmony_ci const char *arg_name; 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_ci static const struct dm_arg _args[] = { 11088c2ecf20Sopenharmony_ci {0, 8, "invalid number of feature args"}, 11098c2ecf20Sopenharmony_ci {1, 50, "pg_init_retries must be between 1 and 50"}, 11108c2ecf20Sopenharmony_ci {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, 11118c2ecf20Sopenharmony_ci }; 11128c2ecf20Sopenharmony_ci 11138c2ecf20Sopenharmony_ci r = dm_read_arg_group(_args, as, &argc, &ti->error); 11148c2ecf20Sopenharmony_ci if (r) 11158c2ecf20Sopenharmony_ci return -EINVAL; 11168c2ecf20Sopenharmony_ci 11178c2ecf20Sopenharmony_ci if (!argc) 11188c2ecf20Sopenharmony_ci return 0; 11198c2ecf20Sopenharmony_ci 11208c2ecf20Sopenharmony_ci do { 11218c2ecf20Sopenharmony_ci arg_name = dm_shift_arg(as); 11228c2ecf20Sopenharmony_ci argc--; 11238c2ecf20Sopenharmony_ci 11248c2ecf20Sopenharmony_ci if (!strcasecmp(arg_name, "queue_if_no_path")) { 11258c2ecf20Sopenharmony_ci r = queue_if_no_path(m, true, false, __func__); 11268c2ecf20Sopenharmony_ci continue; 11278c2ecf20Sopenharmony_ci } 11288c2ecf20Sopenharmony_ci 11298c2ecf20Sopenharmony_ci if (!strcasecmp(arg_name, "retain_attached_hw_handler")) { 11308c2ecf20Sopenharmony_ci set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags); 11318c2ecf20Sopenharmony_ci continue; 11328c2ecf20Sopenharmony_ci } 11338c2ecf20Sopenharmony_ci 11348c2ecf20Sopenharmony_ci if (!strcasecmp(arg_name, "pg_init_retries") && 11358c2ecf20Sopenharmony_ci (argc >= 1)) { 11368c2ecf20Sopenharmony_ci r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); 11378c2ecf20Sopenharmony_ci argc--; 11388c2ecf20Sopenharmony_ci continue; 11398c2ecf20Sopenharmony_ci } 11408c2ecf20Sopenharmony_ci 11418c2ecf20Sopenharmony_ci if (!strcasecmp(arg_name, "pg_init_delay_msecs") && 11428c2ecf20Sopenharmony_ci (argc >= 1)) { 11438c2ecf20Sopenharmony_ci r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error); 11448c2ecf20Sopenharmony_ci argc--; 11458c2ecf20Sopenharmony_ci continue; 11468c2ecf20Sopenharmony_ci } 11478c2ecf20Sopenharmony_ci 11488c2ecf20Sopenharmony_ci if (!strcasecmp(arg_name, "queue_mode") && 11498c2ecf20Sopenharmony_ci (argc >= 1)) { 11508c2ecf20Sopenharmony_ci const char *queue_mode_name = dm_shift_arg(as); 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_ci if (!strcasecmp(queue_mode_name, "bio")) 11538c2ecf20Sopenharmony_ci m->queue_mode = DM_TYPE_BIO_BASED; 11548c2ecf20Sopenharmony_ci else if (!strcasecmp(queue_mode_name, "rq") || 11558c2ecf20Sopenharmony_ci !strcasecmp(queue_mode_name, "mq")) 11568c2ecf20Sopenharmony_ci m->queue_mode = DM_TYPE_REQUEST_BASED; 11578c2ecf20Sopenharmony_ci else { 11588c2ecf20Sopenharmony_ci ti->error = "Unknown 'queue_mode' requested"; 11598c2ecf20Sopenharmony_ci r = -EINVAL; 11608c2ecf20Sopenharmony_ci } 11618c2ecf20Sopenharmony_ci argc--; 11628c2ecf20Sopenharmony_ci continue; 11638c2ecf20Sopenharmony_ci } 11648c2ecf20Sopenharmony_ci 11658c2ecf20Sopenharmony_ci ti->error = "Unrecognised multipath feature request"; 11668c2ecf20Sopenharmony_ci r = -EINVAL; 11678c2ecf20Sopenharmony_ci } while (argc && !r); 11688c2ecf20Sopenharmony_ci 11698c2ecf20Sopenharmony_ci return r; 11708c2ecf20Sopenharmony_ci} 11718c2ecf20Sopenharmony_ci 11728c2ecf20Sopenharmony_cistatic int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) 11738c2ecf20Sopenharmony_ci{ 11748c2ecf20Sopenharmony_ci /* target arguments */ 11758c2ecf20Sopenharmony_ci static const struct dm_arg _args[] = { 11768c2ecf20Sopenharmony_ci {0, 1024, "invalid number of priority groups"}, 11778c2ecf20Sopenharmony_ci {0, 1024, "invalid initial priority group number"}, 11788c2ecf20Sopenharmony_ci }; 11798c2ecf20Sopenharmony_ci 11808c2ecf20Sopenharmony_ci int r; 11818c2ecf20Sopenharmony_ci struct multipath *m; 11828c2ecf20Sopenharmony_ci struct dm_arg_set as; 11838c2ecf20Sopenharmony_ci unsigned pg_count = 0; 11848c2ecf20Sopenharmony_ci unsigned next_pg_num; 11858c2ecf20Sopenharmony_ci unsigned long flags; 11868c2ecf20Sopenharmony_ci 11878c2ecf20Sopenharmony_ci as.argc = argc; 11888c2ecf20Sopenharmony_ci as.argv = argv; 11898c2ecf20Sopenharmony_ci 11908c2ecf20Sopenharmony_ci m = alloc_multipath(ti); 11918c2ecf20Sopenharmony_ci if (!m) { 11928c2ecf20Sopenharmony_ci ti->error = "can't allocate multipath"; 11938c2ecf20Sopenharmony_ci return -EINVAL; 11948c2ecf20Sopenharmony_ci } 11958c2ecf20Sopenharmony_ci 11968c2ecf20Sopenharmony_ci r = parse_features(&as, m); 11978c2ecf20Sopenharmony_ci if (r) 11988c2ecf20Sopenharmony_ci goto bad; 11998c2ecf20Sopenharmony_ci 12008c2ecf20Sopenharmony_ci r = alloc_multipath_stage2(ti, m); 12018c2ecf20Sopenharmony_ci if (r) 12028c2ecf20Sopenharmony_ci goto bad; 12038c2ecf20Sopenharmony_ci 12048c2ecf20Sopenharmony_ci r = parse_hw_handler(&as, m); 12058c2ecf20Sopenharmony_ci if (r) 12068c2ecf20Sopenharmony_ci goto bad; 12078c2ecf20Sopenharmony_ci 12088c2ecf20Sopenharmony_ci r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error); 12098c2ecf20Sopenharmony_ci if (r) 12108c2ecf20Sopenharmony_ci goto bad; 12118c2ecf20Sopenharmony_ci 12128c2ecf20Sopenharmony_ci r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error); 12138c2ecf20Sopenharmony_ci if (r) 12148c2ecf20Sopenharmony_ci goto bad; 12158c2ecf20Sopenharmony_ci 12168c2ecf20Sopenharmony_ci if ((!m->nr_priority_groups && next_pg_num) || 12178c2ecf20Sopenharmony_ci (m->nr_priority_groups && !next_pg_num)) { 12188c2ecf20Sopenharmony_ci ti->error = "invalid initial priority group"; 12198c2ecf20Sopenharmony_ci r = -EINVAL; 12208c2ecf20Sopenharmony_ci goto bad; 12218c2ecf20Sopenharmony_ci } 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_ci /* parse the priority groups */ 12248c2ecf20Sopenharmony_ci while (as.argc) { 12258c2ecf20Sopenharmony_ci struct priority_group *pg; 12268c2ecf20Sopenharmony_ci unsigned nr_valid_paths = atomic_read(&m->nr_valid_paths); 12278c2ecf20Sopenharmony_ci 12288c2ecf20Sopenharmony_ci pg = parse_priority_group(&as, m); 12298c2ecf20Sopenharmony_ci if (IS_ERR(pg)) { 12308c2ecf20Sopenharmony_ci r = PTR_ERR(pg); 12318c2ecf20Sopenharmony_ci goto bad; 12328c2ecf20Sopenharmony_ci } 12338c2ecf20Sopenharmony_ci 12348c2ecf20Sopenharmony_ci nr_valid_paths += pg->nr_pgpaths; 12358c2ecf20Sopenharmony_ci atomic_set(&m->nr_valid_paths, nr_valid_paths); 12368c2ecf20Sopenharmony_ci 12378c2ecf20Sopenharmony_ci list_add_tail(&pg->list, &m->priority_groups); 12388c2ecf20Sopenharmony_ci pg_count++; 12398c2ecf20Sopenharmony_ci pg->pg_num = pg_count; 12408c2ecf20Sopenharmony_ci if (!--next_pg_num) 12418c2ecf20Sopenharmony_ci m->next_pg = pg; 12428c2ecf20Sopenharmony_ci } 12438c2ecf20Sopenharmony_ci 12448c2ecf20Sopenharmony_ci if (pg_count != m->nr_priority_groups) { 12458c2ecf20Sopenharmony_ci ti->error = "priority group count mismatch"; 12468c2ecf20Sopenharmony_ci r = -EINVAL; 12478c2ecf20Sopenharmony_ci goto bad; 12488c2ecf20Sopenharmony_ci } 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 12518c2ecf20Sopenharmony_ci enable_nopath_timeout(m); 12528c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 12538c2ecf20Sopenharmony_ci 12548c2ecf20Sopenharmony_ci ti->num_flush_bios = 1; 12558c2ecf20Sopenharmony_ci ti->num_discard_bios = 1; 12568c2ecf20Sopenharmony_ci ti->num_write_same_bios = 1; 12578c2ecf20Sopenharmony_ci ti->num_write_zeroes_bios = 1; 12588c2ecf20Sopenharmony_ci if (m->queue_mode == DM_TYPE_BIO_BASED) 12598c2ecf20Sopenharmony_ci ti->per_io_data_size = multipath_per_bio_data_size(); 12608c2ecf20Sopenharmony_ci else 12618c2ecf20Sopenharmony_ci ti->per_io_data_size = sizeof(struct dm_mpath_io); 12628c2ecf20Sopenharmony_ci 12638c2ecf20Sopenharmony_ci return 0; 12648c2ecf20Sopenharmony_ci 12658c2ecf20Sopenharmony_ci bad: 12668c2ecf20Sopenharmony_ci free_multipath(m); 12678c2ecf20Sopenharmony_ci return r; 12688c2ecf20Sopenharmony_ci} 12698c2ecf20Sopenharmony_ci 12708c2ecf20Sopenharmony_cistatic void multipath_wait_for_pg_init_completion(struct multipath *m) 12718c2ecf20Sopenharmony_ci{ 12728c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 12738c2ecf20Sopenharmony_ci 12748c2ecf20Sopenharmony_ci while (1) { 12758c2ecf20Sopenharmony_ci prepare_to_wait(&m->pg_init_wait, &wait, TASK_UNINTERRUPTIBLE); 12768c2ecf20Sopenharmony_ci 12778c2ecf20Sopenharmony_ci if (!atomic_read(&m->pg_init_in_progress)) 12788c2ecf20Sopenharmony_ci break; 12798c2ecf20Sopenharmony_ci 12808c2ecf20Sopenharmony_ci io_schedule(); 12818c2ecf20Sopenharmony_ci } 12828c2ecf20Sopenharmony_ci finish_wait(&m->pg_init_wait, &wait); 12838c2ecf20Sopenharmony_ci} 12848c2ecf20Sopenharmony_ci 12858c2ecf20Sopenharmony_cistatic void flush_multipath_work(struct multipath *m) 12868c2ecf20Sopenharmony_ci{ 12878c2ecf20Sopenharmony_ci if (m->hw_handler_name) { 12888c2ecf20Sopenharmony_ci unsigned long flags; 12898c2ecf20Sopenharmony_ci 12908c2ecf20Sopenharmony_ci if (!atomic_read(&m->pg_init_in_progress)) 12918c2ecf20Sopenharmony_ci goto skip; 12928c2ecf20Sopenharmony_ci 12938c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 12948c2ecf20Sopenharmony_ci if (atomic_read(&m->pg_init_in_progress) && 12958c2ecf20Sopenharmony_ci !test_and_set_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) { 12968c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 12978c2ecf20Sopenharmony_ci 12988c2ecf20Sopenharmony_ci flush_workqueue(kmpath_handlerd); 12998c2ecf20Sopenharmony_ci multipath_wait_for_pg_init_completion(m); 13008c2ecf20Sopenharmony_ci 13018c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 13028c2ecf20Sopenharmony_ci clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags); 13038c2ecf20Sopenharmony_ci } 13048c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 13058c2ecf20Sopenharmony_ci } 13068c2ecf20Sopenharmony_ciskip: 13078c2ecf20Sopenharmony_ci if (m->queue_mode == DM_TYPE_BIO_BASED) 13088c2ecf20Sopenharmony_ci flush_work(&m->process_queued_bios); 13098c2ecf20Sopenharmony_ci flush_work(&m->trigger_event); 13108c2ecf20Sopenharmony_ci} 13118c2ecf20Sopenharmony_ci 13128c2ecf20Sopenharmony_cistatic void multipath_dtr(struct dm_target *ti) 13138c2ecf20Sopenharmony_ci{ 13148c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 13158c2ecf20Sopenharmony_ci 13168c2ecf20Sopenharmony_ci disable_nopath_timeout(m); 13178c2ecf20Sopenharmony_ci flush_multipath_work(m); 13188c2ecf20Sopenharmony_ci free_multipath(m); 13198c2ecf20Sopenharmony_ci} 13208c2ecf20Sopenharmony_ci 13218c2ecf20Sopenharmony_ci/* 13228c2ecf20Sopenharmony_ci * Take a path out of use. 13238c2ecf20Sopenharmony_ci */ 13248c2ecf20Sopenharmony_cistatic int fail_path(struct pgpath *pgpath) 13258c2ecf20Sopenharmony_ci{ 13268c2ecf20Sopenharmony_ci unsigned long flags; 13278c2ecf20Sopenharmony_ci struct multipath *m = pgpath->pg->m; 13288c2ecf20Sopenharmony_ci 13298c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 13308c2ecf20Sopenharmony_ci 13318c2ecf20Sopenharmony_ci if (!pgpath->is_active) 13328c2ecf20Sopenharmony_ci goto out; 13338c2ecf20Sopenharmony_ci 13348c2ecf20Sopenharmony_ci DMWARN("%s: Failing path %s.", 13358c2ecf20Sopenharmony_ci dm_table_device_name(m->ti->table), 13368c2ecf20Sopenharmony_ci pgpath->path.dev->name); 13378c2ecf20Sopenharmony_ci 13388c2ecf20Sopenharmony_ci pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); 13398c2ecf20Sopenharmony_ci pgpath->is_active = false; 13408c2ecf20Sopenharmony_ci pgpath->fail_count++; 13418c2ecf20Sopenharmony_ci 13428c2ecf20Sopenharmony_ci atomic_dec(&m->nr_valid_paths); 13438c2ecf20Sopenharmony_ci 13448c2ecf20Sopenharmony_ci if (pgpath == m->current_pgpath) 13458c2ecf20Sopenharmony_ci m->current_pgpath = NULL; 13468c2ecf20Sopenharmony_ci 13478c2ecf20Sopenharmony_ci dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, 13488c2ecf20Sopenharmony_ci pgpath->path.dev->name, atomic_read(&m->nr_valid_paths)); 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci schedule_work(&m->trigger_event); 13518c2ecf20Sopenharmony_ci 13528c2ecf20Sopenharmony_ci enable_nopath_timeout(m); 13538c2ecf20Sopenharmony_ci 13548c2ecf20Sopenharmony_ciout: 13558c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 13568c2ecf20Sopenharmony_ci 13578c2ecf20Sopenharmony_ci return 0; 13588c2ecf20Sopenharmony_ci} 13598c2ecf20Sopenharmony_ci 13608c2ecf20Sopenharmony_ci/* 13618c2ecf20Sopenharmony_ci * Reinstate a previously-failed path 13628c2ecf20Sopenharmony_ci */ 13638c2ecf20Sopenharmony_cistatic int reinstate_path(struct pgpath *pgpath) 13648c2ecf20Sopenharmony_ci{ 13658c2ecf20Sopenharmony_ci int r = 0, run_queue = 0; 13668c2ecf20Sopenharmony_ci unsigned long flags; 13678c2ecf20Sopenharmony_ci struct multipath *m = pgpath->pg->m; 13688c2ecf20Sopenharmony_ci unsigned nr_valid_paths; 13698c2ecf20Sopenharmony_ci 13708c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 13718c2ecf20Sopenharmony_ci 13728c2ecf20Sopenharmony_ci if (pgpath->is_active) 13738c2ecf20Sopenharmony_ci goto out; 13748c2ecf20Sopenharmony_ci 13758c2ecf20Sopenharmony_ci DMWARN("%s: Reinstating path %s.", 13768c2ecf20Sopenharmony_ci dm_table_device_name(m->ti->table), 13778c2ecf20Sopenharmony_ci pgpath->path.dev->name); 13788c2ecf20Sopenharmony_ci 13798c2ecf20Sopenharmony_ci r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path); 13808c2ecf20Sopenharmony_ci if (r) 13818c2ecf20Sopenharmony_ci goto out; 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_ci pgpath->is_active = true; 13848c2ecf20Sopenharmony_ci 13858c2ecf20Sopenharmony_ci nr_valid_paths = atomic_inc_return(&m->nr_valid_paths); 13868c2ecf20Sopenharmony_ci if (nr_valid_paths == 1) { 13878c2ecf20Sopenharmony_ci m->current_pgpath = NULL; 13888c2ecf20Sopenharmony_ci run_queue = 1; 13898c2ecf20Sopenharmony_ci } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { 13908c2ecf20Sopenharmony_ci if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) 13918c2ecf20Sopenharmony_ci atomic_inc(&m->pg_init_in_progress); 13928c2ecf20Sopenharmony_ci } 13938c2ecf20Sopenharmony_ci 13948c2ecf20Sopenharmony_ci dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 13958c2ecf20Sopenharmony_ci pgpath->path.dev->name, nr_valid_paths); 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_ci schedule_work(&m->trigger_event); 13988c2ecf20Sopenharmony_ci 13998c2ecf20Sopenharmony_ciout: 14008c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 14018c2ecf20Sopenharmony_ci if (run_queue) { 14028c2ecf20Sopenharmony_ci dm_table_run_md_queue_async(m->ti->table); 14038c2ecf20Sopenharmony_ci process_queued_io_list(m); 14048c2ecf20Sopenharmony_ci } 14058c2ecf20Sopenharmony_ci 14068c2ecf20Sopenharmony_ci if (pgpath->is_active) 14078c2ecf20Sopenharmony_ci disable_nopath_timeout(m); 14088c2ecf20Sopenharmony_ci 14098c2ecf20Sopenharmony_ci return r; 14108c2ecf20Sopenharmony_ci} 14118c2ecf20Sopenharmony_ci 14128c2ecf20Sopenharmony_ci/* 14138c2ecf20Sopenharmony_ci * Fail or reinstate all paths that match the provided struct dm_dev. 14148c2ecf20Sopenharmony_ci */ 14158c2ecf20Sopenharmony_cistatic int action_dev(struct multipath *m, struct dm_dev *dev, 14168c2ecf20Sopenharmony_ci action_fn action) 14178c2ecf20Sopenharmony_ci{ 14188c2ecf20Sopenharmony_ci int r = -EINVAL; 14198c2ecf20Sopenharmony_ci struct pgpath *pgpath; 14208c2ecf20Sopenharmony_ci struct priority_group *pg; 14218c2ecf20Sopenharmony_ci 14228c2ecf20Sopenharmony_ci list_for_each_entry(pg, &m->priority_groups, list) { 14238c2ecf20Sopenharmony_ci list_for_each_entry(pgpath, &pg->pgpaths, list) { 14248c2ecf20Sopenharmony_ci if (pgpath->path.dev == dev) 14258c2ecf20Sopenharmony_ci r = action(pgpath); 14268c2ecf20Sopenharmony_ci } 14278c2ecf20Sopenharmony_ci } 14288c2ecf20Sopenharmony_ci 14298c2ecf20Sopenharmony_ci return r; 14308c2ecf20Sopenharmony_ci} 14318c2ecf20Sopenharmony_ci 14328c2ecf20Sopenharmony_ci/* 14338c2ecf20Sopenharmony_ci * Temporarily try to avoid having to use the specified PG 14348c2ecf20Sopenharmony_ci */ 14358c2ecf20Sopenharmony_cistatic void bypass_pg(struct multipath *m, struct priority_group *pg, 14368c2ecf20Sopenharmony_ci bool bypassed) 14378c2ecf20Sopenharmony_ci{ 14388c2ecf20Sopenharmony_ci unsigned long flags; 14398c2ecf20Sopenharmony_ci 14408c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_ci pg->bypassed = bypassed; 14438c2ecf20Sopenharmony_ci m->current_pgpath = NULL; 14448c2ecf20Sopenharmony_ci m->current_pg = NULL; 14458c2ecf20Sopenharmony_ci 14468c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 14478c2ecf20Sopenharmony_ci 14488c2ecf20Sopenharmony_ci schedule_work(&m->trigger_event); 14498c2ecf20Sopenharmony_ci} 14508c2ecf20Sopenharmony_ci 14518c2ecf20Sopenharmony_ci/* 14528c2ecf20Sopenharmony_ci * Switch to using the specified PG from the next I/O that gets mapped 14538c2ecf20Sopenharmony_ci */ 14548c2ecf20Sopenharmony_cistatic int switch_pg_num(struct multipath *m, const char *pgstr) 14558c2ecf20Sopenharmony_ci{ 14568c2ecf20Sopenharmony_ci struct priority_group *pg; 14578c2ecf20Sopenharmony_ci unsigned pgnum; 14588c2ecf20Sopenharmony_ci unsigned long flags; 14598c2ecf20Sopenharmony_ci char dummy; 14608c2ecf20Sopenharmony_ci 14618c2ecf20Sopenharmony_ci if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 14628c2ecf20Sopenharmony_ci !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) { 14638c2ecf20Sopenharmony_ci DMWARN("invalid PG number supplied to switch_pg_num"); 14648c2ecf20Sopenharmony_ci return -EINVAL; 14658c2ecf20Sopenharmony_ci } 14668c2ecf20Sopenharmony_ci 14678c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 14688c2ecf20Sopenharmony_ci list_for_each_entry(pg, &m->priority_groups, list) { 14698c2ecf20Sopenharmony_ci pg->bypassed = false; 14708c2ecf20Sopenharmony_ci if (--pgnum) 14718c2ecf20Sopenharmony_ci continue; 14728c2ecf20Sopenharmony_ci 14738c2ecf20Sopenharmony_ci m->current_pgpath = NULL; 14748c2ecf20Sopenharmony_ci m->current_pg = NULL; 14758c2ecf20Sopenharmony_ci m->next_pg = pg; 14768c2ecf20Sopenharmony_ci } 14778c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 14788c2ecf20Sopenharmony_ci 14798c2ecf20Sopenharmony_ci schedule_work(&m->trigger_event); 14808c2ecf20Sopenharmony_ci return 0; 14818c2ecf20Sopenharmony_ci} 14828c2ecf20Sopenharmony_ci 14838c2ecf20Sopenharmony_ci/* 14848c2ecf20Sopenharmony_ci * Set/clear bypassed status of a PG. 14858c2ecf20Sopenharmony_ci * PGs are numbered upwards from 1 in the order they were declared. 14868c2ecf20Sopenharmony_ci */ 14878c2ecf20Sopenharmony_cistatic int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed) 14888c2ecf20Sopenharmony_ci{ 14898c2ecf20Sopenharmony_ci struct priority_group *pg; 14908c2ecf20Sopenharmony_ci unsigned pgnum; 14918c2ecf20Sopenharmony_ci char dummy; 14928c2ecf20Sopenharmony_ci 14938c2ecf20Sopenharmony_ci if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 14948c2ecf20Sopenharmony_ci !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) { 14958c2ecf20Sopenharmony_ci DMWARN("invalid PG number supplied to bypass_pg"); 14968c2ecf20Sopenharmony_ci return -EINVAL; 14978c2ecf20Sopenharmony_ci } 14988c2ecf20Sopenharmony_ci 14998c2ecf20Sopenharmony_ci list_for_each_entry(pg, &m->priority_groups, list) { 15008c2ecf20Sopenharmony_ci if (!--pgnum) 15018c2ecf20Sopenharmony_ci break; 15028c2ecf20Sopenharmony_ci } 15038c2ecf20Sopenharmony_ci 15048c2ecf20Sopenharmony_ci bypass_pg(m, pg, bypassed); 15058c2ecf20Sopenharmony_ci return 0; 15068c2ecf20Sopenharmony_ci} 15078c2ecf20Sopenharmony_ci 15088c2ecf20Sopenharmony_ci/* 15098c2ecf20Sopenharmony_ci * Should we retry pg_init immediately? 15108c2ecf20Sopenharmony_ci */ 15118c2ecf20Sopenharmony_cistatic bool pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) 15128c2ecf20Sopenharmony_ci{ 15138c2ecf20Sopenharmony_ci unsigned long flags; 15148c2ecf20Sopenharmony_ci bool limit_reached = false; 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 15178c2ecf20Sopenharmony_ci 15188c2ecf20Sopenharmony_ci if (atomic_read(&m->pg_init_count) <= m->pg_init_retries && 15198c2ecf20Sopenharmony_ci !test_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) 15208c2ecf20Sopenharmony_ci set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags); 15218c2ecf20Sopenharmony_ci else 15228c2ecf20Sopenharmony_ci limit_reached = true; 15238c2ecf20Sopenharmony_ci 15248c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 15258c2ecf20Sopenharmony_ci 15268c2ecf20Sopenharmony_ci return limit_reached; 15278c2ecf20Sopenharmony_ci} 15288c2ecf20Sopenharmony_ci 15298c2ecf20Sopenharmony_cistatic void pg_init_done(void *data, int errors) 15308c2ecf20Sopenharmony_ci{ 15318c2ecf20Sopenharmony_ci struct pgpath *pgpath = data; 15328c2ecf20Sopenharmony_ci struct priority_group *pg = pgpath->pg; 15338c2ecf20Sopenharmony_ci struct multipath *m = pg->m; 15348c2ecf20Sopenharmony_ci unsigned long flags; 15358c2ecf20Sopenharmony_ci bool delay_retry = false; 15368c2ecf20Sopenharmony_ci 15378c2ecf20Sopenharmony_ci /* device or driver problems */ 15388c2ecf20Sopenharmony_ci switch (errors) { 15398c2ecf20Sopenharmony_ci case SCSI_DH_OK: 15408c2ecf20Sopenharmony_ci break; 15418c2ecf20Sopenharmony_ci case SCSI_DH_NOSYS: 15428c2ecf20Sopenharmony_ci if (!m->hw_handler_name) { 15438c2ecf20Sopenharmony_ci errors = 0; 15448c2ecf20Sopenharmony_ci break; 15458c2ecf20Sopenharmony_ci } 15468c2ecf20Sopenharmony_ci DMERR("Could not failover the device: Handler scsi_dh_%s " 15478c2ecf20Sopenharmony_ci "Error %d.", m->hw_handler_name, errors); 15488c2ecf20Sopenharmony_ci /* 15498c2ecf20Sopenharmony_ci * Fail path for now, so we do not ping pong 15508c2ecf20Sopenharmony_ci */ 15518c2ecf20Sopenharmony_ci fail_path(pgpath); 15528c2ecf20Sopenharmony_ci break; 15538c2ecf20Sopenharmony_ci case SCSI_DH_DEV_TEMP_BUSY: 15548c2ecf20Sopenharmony_ci /* 15558c2ecf20Sopenharmony_ci * Probably doing something like FW upgrade on the 15568c2ecf20Sopenharmony_ci * controller so try the other pg. 15578c2ecf20Sopenharmony_ci */ 15588c2ecf20Sopenharmony_ci bypass_pg(m, pg, true); 15598c2ecf20Sopenharmony_ci break; 15608c2ecf20Sopenharmony_ci case SCSI_DH_RETRY: 15618c2ecf20Sopenharmony_ci /* Wait before retrying. */ 15628c2ecf20Sopenharmony_ci delay_retry = true; 15638c2ecf20Sopenharmony_ci fallthrough; 15648c2ecf20Sopenharmony_ci case SCSI_DH_IMM_RETRY: 15658c2ecf20Sopenharmony_ci case SCSI_DH_RES_TEMP_UNAVAIL: 15668c2ecf20Sopenharmony_ci if (pg_init_limit_reached(m, pgpath)) 15678c2ecf20Sopenharmony_ci fail_path(pgpath); 15688c2ecf20Sopenharmony_ci errors = 0; 15698c2ecf20Sopenharmony_ci break; 15708c2ecf20Sopenharmony_ci case SCSI_DH_DEV_OFFLINED: 15718c2ecf20Sopenharmony_ci default: 15728c2ecf20Sopenharmony_ci /* 15738c2ecf20Sopenharmony_ci * We probably do not want to fail the path for a device 15748c2ecf20Sopenharmony_ci * error, but this is what the old dm did. In future 15758c2ecf20Sopenharmony_ci * patches we can do more advanced handling. 15768c2ecf20Sopenharmony_ci */ 15778c2ecf20Sopenharmony_ci fail_path(pgpath); 15788c2ecf20Sopenharmony_ci } 15798c2ecf20Sopenharmony_ci 15808c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 15818c2ecf20Sopenharmony_ci if (errors) { 15828c2ecf20Sopenharmony_ci if (pgpath == m->current_pgpath) { 15838c2ecf20Sopenharmony_ci DMERR("Could not failover device. Error %d.", errors); 15848c2ecf20Sopenharmony_ci m->current_pgpath = NULL; 15858c2ecf20Sopenharmony_ci m->current_pg = NULL; 15868c2ecf20Sopenharmony_ci } 15878c2ecf20Sopenharmony_ci } else if (!test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) 15888c2ecf20Sopenharmony_ci pg->bypassed = false; 15898c2ecf20Sopenharmony_ci 15908c2ecf20Sopenharmony_ci if (atomic_dec_return(&m->pg_init_in_progress) > 0) 15918c2ecf20Sopenharmony_ci /* Activations of other paths are still on going */ 15928c2ecf20Sopenharmony_ci goto out; 15938c2ecf20Sopenharmony_ci 15948c2ecf20Sopenharmony_ci if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { 15958c2ecf20Sopenharmony_ci if (delay_retry) 15968c2ecf20Sopenharmony_ci set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags); 15978c2ecf20Sopenharmony_ci else 15988c2ecf20Sopenharmony_ci clear_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags); 15998c2ecf20Sopenharmony_ci 16008c2ecf20Sopenharmony_ci if (__pg_init_all_paths(m)) 16018c2ecf20Sopenharmony_ci goto out; 16028c2ecf20Sopenharmony_ci } 16038c2ecf20Sopenharmony_ci clear_bit(MPATHF_QUEUE_IO, &m->flags); 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci process_queued_io_list(m); 16068c2ecf20Sopenharmony_ci 16078c2ecf20Sopenharmony_ci /* 16088c2ecf20Sopenharmony_ci * Wake up any thread waiting to suspend. 16098c2ecf20Sopenharmony_ci */ 16108c2ecf20Sopenharmony_ci wake_up(&m->pg_init_wait); 16118c2ecf20Sopenharmony_ci 16128c2ecf20Sopenharmony_ciout: 16138c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 16148c2ecf20Sopenharmony_ci} 16158c2ecf20Sopenharmony_ci 16168c2ecf20Sopenharmony_cistatic void activate_or_offline_path(struct pgpath *pgpath) 16178c2ecf20Sopenharmony_ci{ 16188c2ecf20Sopenharmony_ci struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci if (pgpath->is_active && !blk_queue_dying(q)) 16218c2ecf20Sopenharmony_ci scsi_dh_activate(q, pg_init_done, pgpath); 16228c2ecf20Sopenharmony_ci else 16238c2ecf20Sopenharmony_ci pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED); 16248c2ecf20Sopenharmony_ci} 16258c2ecf20Sopenharmony_ci 16268c2ecf20Sopenharmony_cistatic void activate_path_work(struct work_struct *work) 16278c2ecf20Sopenharmony_ci{ 16288c2ecf20Sopenharmony_ci struct pgpath *pgpath = 16298c2ecf20Sopenharmony_ci container_of(work, struct pgpath, activate_path.work); 16308c2ecf20Sopenharmony_ci 16318c2ecf20Sopenharmony_ci activate_or_offline_path(pgpath); 16328c2ecf20Sopenharmony_ci} 16338c2ecf20Sopenharmony_ci 16348c2ecf20Sopenharmony_cistatic int multipath_end_io(struct dm_target *ti, struct request *clone, 16358c2ecf20Sopenharmony_ci blk_status_t error, union map_info *map_context) 16368c2ecf20Sopenharmony_ci{ 16378c2ecf20Sopenharmony_ci struct dm_mpath_io *mpio = get_mpio(map_context); 16388c2ecf20Sopenharmony_ci struct pgpath *pgpath = mpio->pgpath; 16398c2ecf20Sopenharmony_ci int r = DM_ENDIO_DONE; 16408c2ecf20Sopenharmony_ci 16418c2ecf20Sopenharmony_ci /* 16428c2ecf20Sopenharmony_ci * We don't queue any clone request inside the multipath target 16438c2ecf20Sopenharmony_ci * during end I/O handling, since those clone requests don't have 16448c2ecf20Sopenharmony_ci * bio clones. If we queue them inside the multipath target, 16458c2ecf20Sopenharmony_ci * we need to make bio clones, that requires memory allocation. 16468c2ecf20Sopenharmony_ci * (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests 16478c2ecf20Sopenharmony_ci * don't have bio clones.) 16488c2ecf20Sopenharmony_ci * Instead of queueing the clone request here, we queue the original 16498c2ecf20Sopenharmony_ci * request into dm core, which will remake a clone request and 16508c2ecf20Sopenharmony_ci * clone bios for it and resubmit it later. 16518c2ecf20Sopenharmony_ci */ 16528c2ecf20Sopenharmony_ci if (error && blk_path_error(error)) { 16538c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 16548c2ecf20Sopenharmony_ci 16558c2ecf20Sopenharmony_ci if (error == BLK_STS_RESOURCE) 16568c2ecf20Sopenharmony_ci r = DM_ENDIO_DELAY_REQUEUE; 16578c2ecf20Sopenharmony_ci else 16588c2ecf20Sopenharmony_ci r = DM_ENDIO_REQUEUE; 16598c2ecf20Sopenharmony_ci 16608c2ecf20Sopenharmony_ci if (pgpath) 16618c2ecf20Sopenharmony_ci fail_path(pgpath); 16628c2ecf20Sopenharmony_ci 16638c2ecf20Sopenharmony_ci if (!atomic_read(&m->nr_valid_paths) && 16648c2ecf20Sopenharmony_ci !must_push_back_rq(m)) { 16658c2ecf20Sopenharmony_ci if (error == BLK_STS_IOERR) 16668c2ecf20Sopenharmony_ci dm_report_EIO(m); 16678c2ecf20Sopenharmony_ci /* complete with the original error */ 16688c2ecf20Sopenharmony_ci r = DM_ENDIO_DONE; 16698c2ecf20Sopenharmony_ci } 16708c2ecf20Sopenharmony_ci } 16718c2ecf20Sopenharmony_ci 16728c2ecf20Sopenharmony_ci if (pgpath) { 16738c2ecf20Sopenharmony_ci struct path_selector *ps = &pgpath->pg->ps; 16748c2ecf20Sopenharmony_ci 16758c2ecf20Sopenharmony_ci if (ps->type->end_io) 16768c2ecf20Sopenharmony_ci ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes, 16778c2ecf20Sopenharmony_ci clone->io_start_time_ns); 16788c2ecf20Sopenharmony_ci } 16798c2ecf20Sopenharmony_ci 16808c2ecf20Sopenharmony_ci return r; 16818c2ecf20Sopenharmony_ci} 16828c2ecf20Sopenharmony_ci 16838c2ecf20Sopenharmony_cistatic int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, 16848c2ecf20Sopenharmony_ci blk_status_t *error) 16858c2ecf20Sopenharmony_ci{ 16868c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 16878c2ecf20Sopenharmony_ci struct dm_mpath_io *mpio = get_mpio_from_bio(clone); 16888c2ecf20Sopenharmony_ci struct pgpath *pgpath = mpio->pgpath; 16898c2ecf20Sopenharmony_ci unsigned long flags; 16908c2ecf20Sopenharmony_ci int r = DM_ENDIO_DONE; 16918c2ecf20Sopenharmony_ci 16928c2ecf20Sopenharmony_ci if (!*error || !blk_path_error(*error)) 16938c2ecf20Sopenharmony_ci goto done; 16948c2ecf20Sopenharmony_ci 16958c2ecf20Sopenharmony_ci if (pgpath) 16968c2ecf20Sopenharmony_ci fail_path(pgpath); 16978c2ecf20Sopenharmony_ci 16988c2ecf20Sopenharmony_ci if (!atomic_read(&m->nr_valid_paths)) { 16998c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 17008c2ecf20Sopenharmony_ci if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 17018c2ecf20Sopenharmony_ci if (__must_push_back(m)) { 17028c2ecf20Sopenharmony_ci r = DM_ENDIO_REQUEUE; 17038c2ecf20Sopenharmony_ci } else { 17048c2ecf20Sopenharmony_ci dm_report_EIO(m); 17058c2ecf20Sopenharmony_ci *error = BLK_STS_IOERR; 17068c2ecf20Sopenharmony_ci } 17078c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 17088c2ecf20Sopenharmony_ci goto done; 17098c2ecf20Sopenharmony_ci } 17108c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 17118c2ecf20Sopenharmony_ci } 17128c2ecf20Sopenharmony_ci 17138c2ecf20Sopenharmony_ci multipath_queue_bio(m, clone); 17148c2ecf20Sopenharmony_ci r = DM_ENDIO_INCOMPLETE; 17158c2ecf20Sopenharmony_cidone: 17168c2ecf20Sopenharmony_ci if (pgpath) { 17178c2ecf20Sopenharmony_ci struct path_selector *ps = &pgpath->pg->ps; 17188c2ecf20Sopenharmony_ci 17198c2ecf20Sopenharmony_ci if (ps->type->end_io) 17208c2ecf20Sopenharmony_ci ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes, 17218c2ecf20Sopenharmony_ci dm_start_time_ns_from_clone(clone)); 17228c2ecf20Sopenharmony_ci } 17238c2ecf20Sopenharmony_ci 17248c2ecf20Sopenharmony_ci return r; 17258c2ecf20Sopenharmony_ci} 17268c2ecf20Sopenharmony_ci 17278c2ecf20Sopenharmony_ci/* 17288c2ecf20Sopenharmony_ci * Suspend with flush can't complete until all the I/O is processed 17298c2ecf20Sopenharmony_ci * so if the last path fails we must error any remaining I/O. 17308c2ecf20Sopenharmony_ci * - Note that if the freeze_bdev fails while suspending, the 17318c2ecf20Sopenharmony_ci * queue_if_no_path state is lost - userspace should reset it. 17328c2ecf20Sopenharmony_ci * Otherwise, during noflush suspend, queue_if_no_path will not change. 17338c2ecf20Sopenharmony_ci */ 17348c2ecf20Sopenharmony_cistatic void multipath_presuspend(struct dm_target *ti) 17358c2ecf20Sopenharmony_ci{ 17368c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 17378c2ecf20Sopenharmony_ci 17388c2ecf20Sopenharmony_ci /* FIXME: bio-based shouldn't need to always disable queue_if_no_path */ 17398c2ecf20Sopenharmony_ci if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti)) 17408c2ecf20Sopenharmony_ci queue_if_no_path(m, false, true, __func__); 17418c2ecf20Sopenharmony_ci} 17428c2ecf20Sopenharmony_ci 17438c2ecf20Sopenharmony_cistatic void multipath_postsuspend(struct dm_target *ti) 17448c2ecf20Sopenharmony_ci{ 17458c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 17468c2ecf20Sopenharmony_ci 17478c2ecf20Sopenharmony_ci mutex_lock(&m->work_mutex); 17488c2ecf20Sopenharmony_ci flush_multipath_work(m); 17498c2ecf20Sopenharmony_ci mutex_unlock(&m->work_mutex); 17508c2ecf20Sopenharmony_ci} 17518c2ecf20Sopenharmony_ci 17528c2ecf20Sopenharmony_ci/* 17538c2ecf20Sopenharmony_ci * Restore the queue_if_no_path setting. 17548c2ecf20Sopenharmony_ci */ 17558c2ecf20Sopenharmony_cistatic void multipath_resume(struct dm_target *ti) 17568c2ecf20Sopenharmony_ci{ 17578c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 17588c2ecf20Sopenharmony_ci unsigned long flags; 17598c2ecf20Sopenharmony_ci 17608c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 17618c2ecf20Sopenharmony_ci if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) { 17628c2ecf20Sopenharmony_ci set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); 17638c2ecf20Sopenharmony_ci clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); 17648c2ecf20Sopenharmony_ci } 17658c2ecf20Sopenharmony_ci 17668c2ecf20Sopenharmony_ci DMDEBUG("%s: %s finished; QIFNP = %d; SQIFNP = %d", 17678c2ecf20Sopenharmony_ci dm_table_device_name(m->ti->table), __func__, 17688c2ecf20Sopenharmony_ci test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags), 17698c2ecf20Sopenharmony_ci test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)); 17708c2ecf20Sopenharmony_ci 17718c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 17728c2ecf20Sopenharmony_ci} 17738c2ecf20Sopenharmony_ci 17748c2ecf20Sopenharmony_ci/* 17758c2ecf20Sopenharmony_ci * Info output has the following format: 17768c2ecf20Sopenharmony_ci * num_multipath_feature_args [multipath_feature_args]* 17778c2ecf20Sopenharmony_ci * num_handler_status_args [handler_status_args]* 17788c2ecf20Sopenharmony_ci * num_groups init_group_number 17798c2ecf20Sopenharmony_ci * [A|D|E num_ps_status_args [ps_status_args]* 17808c2ecf20Sopenharmony_ci * num_paths num_selector_args 17818c2ecf20Sopenharmony_ci * [path_dev A|F fail_count [selector_args]* ]+ ]+ 17828c2ecf20Sopenharmony_ci * 17838c2ecf20Sopenharmony_ci * Table output has the following format (identical to the constructor string): 17848c2ecf20Sopenharmony_ci * num_feature_args [features_args]* 17858c2ecf20Sopenharmony_ci * num_handler_args hw_handler [hw_handler_args]* 17868c2ecf20Sopenharmony_ci * num_groups init_group_number 17878c2ecf20Sopenharmony_ci * [priority selector-name num_ps_args [ps_args]* 17888c2ecf20Sopenharmony_ci * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ 17898c2ecf20Sopenharmony_ci */ 17908c2ecf20Sopenharmony_cistatic void multipath_status(struct dm_target *ti, status_type_t type, 17918c2ecf20Sopenharmony_ci unsigned status_flags, char *result, unsigned maxlen) 17928c2ecf20Sopenharmony_ci{ 17938c2ecf20Sopenharmony_ci int sz = 0; 17948c2ecf20Sopenharmony_ci unsigned long flags; 17958c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 17968c2ecf20Sopenharmony_ci struct priority_group *pg; 17978c2ecf20Sopenharmony_ci struct pgpath *p; 17988c2ecf20Sopenharmony_ci unsigned pg_num; 17998c2ecf20Sopenharmony_ci char state; 18008c2ecf20Sopenharmony_ci 18018c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 18028c2ecf20Sopenharmony_ci 18038c2ecf20Sopenharmony_ci /* Features */ 18048c2ecf20Sopenharmony_ci if (type == STATUSTYPE_INFO) 18058c2ecf20Sopenharmony_ci DMEMIT("2 %u %u ", test_bit(MPATHF_QUEUE_IO, &m->flags), 18068c2ecf20Sopenharmony_ci atomic_read(&m->pg_init_count)); 18078c2ecf20Sopenharmony_ci else { 18088c2ecf20Sopenharmony_ci DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) + 18098c2ecf20Sopenharmony_ci (m->pg_init_retries > 0) * 2 + 18108c2ecf20Sopenharmony_ci (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + 18118c2ecf20Sopenharmony_ci test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) + 18128c2ecf20Sopenharmony_ci (m->queue_mode != DM_TYPE_REQUEST_BASED) * 2); 18138c2ecf20Sopenharmony_ci 18148c2ecf20Sopenharmony_ci if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) 18158c2ecf20Sopenharmony_ci DMEMIT("queue_if_no_path "); 18168c2ecf20Sopenharmony_ci if (m->pg_init_retries) 18178c2ecf20Sopenharmony_ci DMEMIT("pg_init_retries %u ", m->pg_init_retries); 18188c2ecf20Sopenharmony_ci if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) 18198c2ecf20Sopenharmony_ci DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); 18208c2ecf20Sopenharmony_ci if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) 18218c2ecf20Sopenharmony_ci DMEMIT("retain_attached_hw_handler "); 18228c2ecf20Sopenharmony_ci if (m->queue_mode != DM_TYPE_REQUEST_BASED) { 18238c2ecf20Sopenharmony_ci switch(m->queue_mode) { 18248c2ecf20Sopenharmony_ci case DM_TYPE_BIO_BASED: 18258c2ecf20Sopenharmony_ci DMEMIT("queue_mode bio "); 18268c2ecf20Sopenharmony_ci break; 18278c2ecf20Sopenharmony_ci default: 18288c2ecf20Sopenharmony_ci WARN_ON_ONCE(true); 18298c2ecf20Sopenharmony_ci break; 18308c2ecf20Sopenharmony_ci } 18318c2ecf20Sopenharmony_ci } 18328c2ecf20Sopenharmony_ci } 18338c2ecf20Sopenharmony_ci 18348c2ecf20Sopenharmony_ci if (!m->hw_handler_name || type == STATUSTYPE_INFO) 18358c2ecf20Sopenharmony_ci DMEMIT("0 "); 18368c2ecf20Sopenharmony_ci else 18378c2ecf20Sopenharmony_ci DMEMIT("1 %s ", m->hw_handler_name); 18388c2ecf20Sopenharmony_ci 18398c2ecf20Sopenharmony_ci DMEMIT("%u ", m->nr_priority_groups); 18408c2ecf20Sopenharmony_ci 18418c2ecf20Sopenharmony_ci if (m->next_pg) 18428c2ecf20Sopenharmony_ci pg_num = m->next_pg->pg_num; 18438c2ecf20Sopenharmony_ci else if (m->current_pg) 18448c2ecf20Sopenharmony_ci pg_num = m->current_pg->pg_num; 18458c2ecf20Sopenharmony_ci else 18468c2ecf20Sopenharmony_ci pg_num = (m->nr_priority_groups ? 1 : 0); 18478c2ecf20Sopenharmony_ci 18488c2ecf20Sopenharmony_ci DMEMIT("%u ", pg_num); 18498c2ecf20Sopenharmony_ci 18508c2ecf20Sopenharmony_ci switch (type) { 18518c2ecf20Sopenharmony_ci case STATUSTYPE_INFO: 18528c2ecf20Sopenharmony_ci list_for_each_entry(pg, &m->priority_groups, list) { 18538c2ecf20Sopenharmony_ci if (pg->bypassed) 18548c2ecf20Sopenharmony_ci state = 'D'; /* Disabled */ 18558c2ecf20Sopenharmony_ci else if (pg == m->current_pg) 18568c2ecf20Sopenharmony_ci state = 'A'; /* Currently Active */ 18578c2ecf20Sopenharmony_ci else 18588c2ecf20Sopenharmony_ci state = 'E'; /* Enabled */ 18598c2ecf20Sopenharmony_ci 18608c2ecf20Sopenharmony_ci DMEMIT("%c ", state); 18618c2ecf20Sopenharmony_ci 18628c2ecf20Sopenharmony_ci if (pg->ps.type->status) 18638c2ecf20Sopenharmony_ci sz += pg->ps.type->status(&pg->ps, NULL, type, 18648c2ecf20Sopenharmony_ci result + sz, 18658c2ecf20Sopenharmony_ci maxlen - sz); 18668c2ecf20Sopenharmony_ci else 18678c2ecf20Sopenharmony_ci DMEMIT("0 "); 18688c2ecf20Sopenharmony_ci 18698c2ecf20Sopenharmony_ci DMEMIT("%u %u ", pg->nr_pgpaths, 18708c2ecf20Sopenharmony_ci pg->ps.type->info_args); 18718c2ecf20Sopenharmony_ci 18728c2ecf20Sopenharmony_ci list_for_each_entry(p, &pg->pgpaths, list) { 18738c2ecf20Sopenharmony_ci DMEMIT("%s %s %u ", p->path.dev->name, 18748c2ecf20Sopenharmony_ci p->is_active ? "A" : "F", 18758c2ecf20Sopenharmony_ci p->fail_count); 18768c2ecf20Sopenharmony_ci if (pg->ps.type->status) 18778c2ecf20Sopenharmony_ci sz += pg->ps.type->status(&pg->ps, 18788c2ecf20Sopenharmony_ci &p->path, type, result + sz, 18798c2ecf20Sopenharmony_ci maxlen - sz); 18808c2ecf20Sopenharmony_ci } 18818c2ecf20Sopenharmony_ci } 18828c2ecf20Sopenharmony_ci break; 18838c2ecf20Sopenharmony_ci 18848c2ecf20Sopenharmony_ci case STATUSTYPE_TABLE: 18858c2ecf20Sopenharmony_ci list_for_each_entry(pg, &m->priority_groups, list) { 18868c2ecf20Sopenharmony_ci DMEMIT("%s ", pg->ps.type->name); 18878c2ecf20Sopenharmony_ci 18888c2ecf20Sopenharmony_ci if (pg->ps.type->status) 18898c2ecf20Sopenharmony_ci sz += pg->ps.type->status(&pg->ps, NULL, type, 18908c2ecf20Sopenharmony_ci result + sz, 18918c2ecf20Sopenharmony_ci maxlen - sz); 18928c2ecf20Sopenharmony_ci else 18938c2ecf20Sopenharmony_ci DMEMIT("0 "); 18948c2ecf20Sopenharmony_ci 18958c2ecf20Sopenharmony_ci DMEMIT("%u %u ", pg->nr_pgpaths, 18968c2ecf20Sopenharmony_ci pg->ps.type->table_args); 18978c2ecf20Sopenharmony_ci 18988c2ecf20Sopenharmony_ci list_for_each_entry(p, &pg->pgpaths, list) { 18998c2ecf20Sopenharmony_ci DMEMIT("%s ", p->path.dev->name); 19008c2ecf20Sopenharmony_ci if (pg->ps.type->status) 19018c2ecf20Sopenharmony_ci sz += pg->ps.type->status(&pg->ps, 19028c2ecf20Sopenharmony_ci &p->path, type, result + sz, 19038c2ecf20Sopenharmony_ci maxlen - sz); 19048c2ecf20Sopenharmony_ci } 19058c2ecf20Sopenharmony_ci } 19068c2ecf20Sopenharmony_ci break; 19078c2ecf20Sopenharmony_ci } 19088c2ecf20Sopenharmony_ci 19098c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 19108c2ecf20Sopenharmony_ci} 19118c2ecf20Sopenharmony_ci 19128c2ecf20Sopenharmony_cistatic int multipath_message(struct dm_target *ti, unsigned argc, char **argv, 19138c2ecf20Sopenharmony_ci char *result, unsigned maxlen) 19148c2ecf20Sopenharmony_ci{ 19158c2ecf20Sopenharmony_ci int r = -EINVAL; 19168c2ecf20Sopenharmony_ci struct dm_dev *dev; 19178c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 19188c2ecf20Sopenharmony_ci action_fn action; 19198c2ecf20Sopenharmony_ci unsigned long flags; 19208c2ecf20Sopenharmony_ci 19218c2ecf20Sopenharmony_ci mutex_lock(&m->work_mutex); 19228c2ecf20Sopenharmony_ci 19238c2ecf20Sopenharmony_ci if (dm_suspended(ti)) { 19248c2ecf20Sopenharmony_ci r = -EBUSY; 19258c2ecf20Sopenharmony_ci goto out; 19268c2ecf20Sopenharmony_ci } 19278c2ecf20Sopenharmony_ci 19288c2ecf20Sopenharmony_ci if (argc == 1) { 19298c2ecf20Sopenharmony_ci if (!strcasecmp(argv[0], "queue_if_no_path")) { 19308c2ecf20Sopenharmony_ci r = queue_if_no_path(m, true, false, __func__); 19318c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 19328c2ecf20Sopenharmony_ci enable_nopath_timeout(m); 19338c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 19348c2ecf20Sopenharmony_ci goto out; 19358c2ecf20Sopenharmony_ci } else if (!strcasecmp(argv[0], "fail_if_no_path")) { 19368c2ecf20Sopenharmony_ci r = queue_if_no_path(m, false, false, __func__); 19378c2ecf20Sopenharmony_ci disable_nopath_timeout(m); 19388c2ecf20Sopenharmony_ci goto out; 19398c2ecf20Sopenharmony_ci } 19408c2ecf20Sopenharmony_ci } 19418c2ecf20Sopenharmony_ci 19428c2ecf20Sopenharmony_ci if (argc != 2) { 19438c2ecf20Sopenharmony_ci DMWARN("Invalid multipath message arguments. Expected 2 arguments, got %d.", argc); 19448c2ecf20Sopenharmony_ci goto out; 19458c2ecf20Sopenharmony_ci } 19468c2ecf20Sopenharmony_ci 19478c2ecf20Sopenharmony_ci if (!strcasecmp(argv[0], "disable_group")) { 19488c2ecf20Sopenharmony_ci r = bypass_pg_num(m, argv[1], true); 19498c2ecf20Sopenharmony_ci goto out; 19508c2ecf20Sopenharmony_ci } else if (!strcasecmp(argv[0], "enable_group")) { 19518c2ecf20Sopenharmony_ci r = bypass_pg_num(m, argv[1], false); 19528c2ecf20Sopenharmony_ci goto out; 19538c2ecf20Sopenharmony_ci } else if (!strcasecmp(argv[0], "switch_group")) { 19548c2ecf20Sopenharmony_ci r = switch_pg_num(m, argv[1]); 19558c2ecf20Sopenharmony_ci goto out; 19568c2ecf20Sopenharmony_ci } else if (!strcasecmp(argv[0], "reinstate_path")) 19578c2ecf20Sopenharmony_ci action = reinstate_path; 19588c2ecf20Sopenharmony_ci else if (!strcasecmp(argv[0], "fail_path")) 19598c2ecf20Sopenharmony_ci action = fail_path; 19608c2ecf20Sopenharmony_ci else { 19618c2ecf20Sopenharmony_ci DMWARN("Unrecognised multipath message received: %s", argv[0]); 19628c2ecf20Sopenharmony_ci goto out; 19638c2ecf20Sopenharmony_ci } 19648c2ecf20Sopenharmony_ci 19658c2ecf20Sopenharmony_ci r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev); 19668c2ecf20Sopenharmony_ci if (r) { 19678c2ecf20Sopenharmony_ci DMWARN("message: error getting device %s", 19688c2ecf20Sopenharmony_ci argv[1]); 19698c2ecf20Sopenharmony_ci goto out; 19708c2ecf20Sopenharmony_ci } 19718c2ecf20Sopenharmony_ci 19728c2ecf20Sopenharmony_ci r = action_dev(m, dev, action); 19738c2ecf20Sopenharmony_ci 19748c2ecf20Sopenharmony_ci dm_put_device(ti, dev); 19758c2ecf20Sopenharmony_ci 19768c2ecf20Sopenharmony_ciout: 19778c2ecf20Sopenharmony_ci mutex_unlock(&m->work_mutex); 19788c2ecf20Sopenharmony_ci return r; 19798c2ecf20Sopenharmony_ci} 19808c2ecf20Sopenharmony_ci 19818c2ecf20Sopenharmony_cistatic int multipath_prepare_ioctl(struct dm_target *ti, 19828c2ecf20Sopenharmony_ci struct block_device **bdev) 19838c2ecf20Sopenharmony_ci{ 19848c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 19858c2ecf20Sopenharmony_ci struct pgpath *pgpath; 19868c2ecf20Sopenharmony_ci unsigned long flags; 19878c2ecf20Sopenharmony_ci int r; 19888c2ecf20Sopenharmony_ci 19898c2ecf20Sopenharmony_ci pgpath = READ_ONCE(m->current_pgpath); 19908c2ecf20Sopenharmony_ci if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) 19918c2ecf20Sopenharmony_ci pgpath = choose_pgpath(m, 0); 19928c2ecf20Sopenharmony_ci 19938c2ecf20Sopenharmony_ci if (pgpath) { 19948c2ecf20Sopenharmony_ci if (!mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) { 19958c2ecf20Sopenharmony_ci *bdev = pgpath->path.dev->bdev; 19968c2ecf20Sopenharmony_ci r = 0; 19978c2ecf20Sopenharmony_ci } else { 19988c2ecf20Sopenharmony_ci /* pg_init has not started or completed */ 19998c2ecf20Sopenharmony_ci r = -ENOTCONN; 20008c2ecf20Sopenharmony_ci } 20018c2ecf20Sopenharmony_ci } else { 20028c2ecf20Sopenharmony_ci /* No path is available */ 20038c2ecf20Sopenharmony_ci r = -EIO; 20048c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 20058c2ecf20Sopenharmony_ci if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) 20068c2ecf20Sopenharmony_ci r = -ENOTCONN; 20078c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 20088c2ecf20Sopenharmony_ci } 20098c2ecf20Sopenharmony_ci 20108c2ecf20Sopenharmony_ci if (r == -ENOTCONN) { 20118c2ecf20Sopenharmony_ci if (!READ_ONCE(m->current_pg)) { 20128c2ecf20Sopenharmony_ci /* Path status changed, redo selection */ 20138c2ecf20Sopenharmony_ci (void) choose_pgpath(m, 0); 20148c2ecf20Sopenharmony_ci } 20158c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 20168c2ecf20Sopenharmony_ci if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) 20178c2ecf20Sopenharmony_ci (void) __pg_init_all_paths(m); 20188c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 20198c2ecf20Sopenharmony_ci dm_table_run_md_queue_async(m->ti->table); 20208c2ecf20Sopenharmony_ci process_queued_io_list(m); 20218c2ecf20Sopenharmony_ci } 20228c2ecf20Sopenharmony_ci 20238c2ecf20Sopenharmony_ci /* 20248c2ecf20Sopenharmony_ci * Only pass ioctls through if the device sizes match exactly. 20258c2ecf20Sopenharmony_ci */ 20268c2ecf20Sopenharmony_ci if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT) 20278c2ecf20Sopenharmony_ci return 1; 20288c2ecf20Sopenharmony_ci return r; 20298c2ecf20Sopenharmony_ci} 20308c2ecf20Sopenharmony_ci 20318c2ecf20Sopenharmony_cistatic int multipath_iterate_devices(struct dm_target *ti, 20328c2ecf20Sopenharmony_ci iterate_devices_callout_fn fn, void *data) 20338c2ecf20Sopenharmony_ci{ 20348c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 20358c2ecf20Sopenharmony_ci struct priority_group *pg; 20368c2ecf20Sopenharmony_ci struct pgpath *p; 20378c2ecf20Sopenharmony_ci int ret = 0; 20388c2ecf20Sopenharmony_ci 20398c2ecf20Sopenharmony_ci list_for_each_entry(pg, &m->priority_groups, list) { 20408c2ecf20Sopenharmony_ci list_for_each_entry(p, &pg->pgpaths, list) { 20418c2ecf20Sopenharmony_ci ret = fn(ti, p->path.dev, ti->begin, ti->len, data); 20428c2ecf20Sopenharmony_ci if (ret) 20438c2ecf20Sopenharmony_ci goto out; 20448c2ecf20Sopenharmony_ci } 20458c2ecf20Sopenharmony_ci } 20468c2ecf20Sopenharmony_ci 20478c2ecf20Sopenharmony_ciout: 20488c2ecf20Sopenharmony_ci return ret; 20498c2ecf20Sopenharmony_ci} 20508c2ecf20Sopenharmony_ci 20518c2ecf20Sopenharmony_cistatic int pgpath_busy(struct pgpath *pgpath) 20528c2ecf20Sopenharmony_ci{ 20538c2ecf20Sopenharmony_ci struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); 20548c2ecf20Sopenharmony_ci 20558c2ecf20Sopenharmony_ci return blk_lld_busy(q); 20568c2ecf20Sopenharmony_ci} 20578c2ecf20Sopenharmony_ci 20588c2ecf20Sopenharmony_ci/* 20598c2ecf20Sopenharmony_ci * We return "busy", only when we can map I/Os but underlying devices 20608c2ecf20Sopenharmony_ci * are busy (so even if we map I/Os now, the I/Os will wait on 20618c2ecf20Sopenharmony_ci * the underlying queue). 20628c2ecf20Sopenharmony_ci * In other words, if we want to kill I/Os or queue them inside us 20638c2ecf20Sopenharmony_ci * due to map unavailability, we don't return "busy". Otherwise, 20648c2ecf20Sopenharmony_ci * dm core won't give us the I/Os and we can't do what we want. 20658c2ecf20Sopenharmony_ci */ 20668c2ecf20Sopenharmony_cistatic int multipath_busy(struct dm_target *ti) 20678c2ecf20Sopenharmony_ci{ 20688c2ecf20Sopenharmony_ci bool busy = false, has_active = false; 20698c2ecf20Sopenharmony_ci struct multipath *m = ti->private; 20708c2ecf20Sopenharmony_ci struct priority_group *pg, *next_pg; 20718c2ecf20Sopenharmony_ci struct pgpath *pgpath; 20728c2ecf20Sopenharmony_ci 20738c2ecf20Sopenharmony_ci /* pg_init in progress */ 20748c2ecf20Sopenharmony_ci if (atomic_read(&m->pg_init_in_progress)) 20758c2ecf20Sopenharmony_ci return true; 20768c2ecf20Sopenharmony_ci 20778c2ecf20Sopenharmony_ci /* no paths available, for blk-mq: rely on IO mapping to delay requeue */ 20788c2ecf20Sopenharmony_ci if (!atomic_read(&m->nr_valid_paths)) { 20798c2ecf20Sopenharmony_ci unsigned long flags; 20808c2ecf20Sopenharmony_ci spin_lock_irqsave(&m->lock, flags); 20818c2ecf20Sopenharmony_ci if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 20828c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 20838c2ecf20Sopenharmony_ci return (m->queue_mode != DM_TYPE_REQUEST_BASED); 20848c2ecf20Sopenharmony_ci } 20858c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&m->lock, flags); 20868c2ecf20Sopenharmony_ci } 20878c2ecf20Sopenharmony_ci 20888c2ecf20Sopenharmony_ci /* Guess which priority_group will be used at next mapping time */ 20898c2ecf20Sopenharmony_ci pg = READ_ONCE(m->current_pg); 20908c2ecf20Sopenharmony_ci next_pg = READ_ONCE(m->next_pg); 20918c2ecf20Sopenharmony_ci if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg)) 20928c2ecf20Sopenharmony_ci pg = next_pg; 20938c2ecf20Sopenharmony_ci 20948c2ecf20Sopenharmony_ci if (!pg) { 20958c2ecf20Sopenharmony_ci /* 20968c2ecf20Sopenharmony_ci * We don't know which pg will be used at next mapping time. 20978c2ecf20Sopenharmony_ci * We don't call choose_pgpath() here to avoid to trigger 20988c2ecf20Sopenharmony_ci * pg_init just by busy checking. 20998c2ecf20Sopenharmony_ci * So we don't know whether underlying devices we will be using 21008c2ecf20Sopenharmony_ci * at next mapping time are busy or not. Just try mapping. 21018c2ecf20Sopenharmony_ci */ 21028c2ecf20Sopenharmony_ci return busy; 21038c2ecf20Sopenharmony_ci } 21048c2ecf20Sopenharmony_ci 21058c2ecf20Sopenharmony_ci /* 21068c2ecf20Sopenharmony_ci * If there is one non-busy active path at least, the path selector 21078c2ecf20Sopenharmony_ci * will be able to select it. So we consider such a pg as not busy. 21088c2ecf20Sopenharmony_ci */ 21098c2ecf20Sopenharmony_ci busy = true; 21108c2ecf20Sopenharmony_ci list_for_each_entry(pgpath, &pg->pgpaths, list) { 21118c2ecf20Sopenharmony_ci if (pgpath->is_active) { 21128c2ecf20Sopenharmony_ci has_active = true; 21138c2ecf20Sopenharmony_ci if (!pgpath_busy(pgpath)) { 21148c2ecf20Sopenharmony_ci busy = false; 21158c2ecf20Sopenharmony_ci break; 21168c2ecf20Sopenharmony_ci } 21178c2ecf20Sopenharmony_ci } 21188c2ecf20Sopenharmony_ci } 21198c2ecf20Sopenharmony_ci 21208c2ecf20Sopenharmony_ci if (!has_active) { 21218c2ecf20Sopenharmony_ci /* 21228c2ecf20Sopenharmony_ci * No active path in this pg, so this pg won't be used and 21238c2ecf20Sopenharmony_ci * the current_pg will be changed at next mapping time. 21248c2ecf20Sopenharmony_ci * We need to try mapping to determine it. 21258c2ecf20Sopenharmony_ci */ 21268c2ecf20Sopenharmony_ci busy = false; 21278c2ecf20Sopenharmony_ci } 21288c2ecf20Sopenharmony_ci 21298c2ecf20Sopenharmony_ci return busy; 21308c2ecf20Sopenharmony_ci} 21318c2ecf20Sopenharmony_ci 21328c2ecf20Sopenharmony_ci/*----------------------------------------------------------------- 21338c2ecf20Sopenharmony_ci * Module setup 21348c2ecf20Sopenharmony_ci *---------------------------------------------------------------*/ 21358c2ecf20Sopenharmony_cistatic struct target_type multipath_target = { 21368c2ecf20Sopenharmony_ci .name = "multipath", 21378c2ecf20Sopenharmony_ci .version = {1, 14, 0}, 21388c2ecf20Sopenharmony_ci .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE | 21398c2ecf20Sopenharmony_ci DM_TARGET_PASSES_INTEGRITY, 21408c2ecf20Sopenharmony_ci .module = THIS_MODULE, 21418c2ecf20Sopenharmony_ci .ctr = multipath_ctr, 21428c2ecf20Sopenharmony_ci .dtr = multipath_dtr, 21438c2ecf20Sopenharmony_ci .clone_and_map_rq = multipath_clone_and_map, 21448c2ecf20Sopenharmony_ci .release_clone_rq = multipath_release_clone, 21458c2ecf20Sopenharmony_ci .rq_end_io = multipath_end_io, 21468c2ecf20Sopenharmony_ci .map = multipath_map_bio, 21478c2ecf20Sopenharmony_ci .end_io = multipath_end_io_bio, 21488c2ecf20Sopenharmony_ci .presuspend = multipath_presuspend, 21498c2ecf20Sopenharmony_ci .postsuspend = multipath_postsuspend, 21508c2ecf20Sopenharmony_ci .resume = multipath_resume, 21518c2ecf20Sopenharmony_ci .status = multipath_status, 21528c2ecf20Sopenharmony_ci .message = multipath_message, 21538c2ecf20Sopenharmony_ci .prepare_ioctl = multipath_prepare_ioctl, 21548c2ecf20Sopenharmony_ci .iterate_devices = multipath_iterate_devices, 21558c2ecf20Sopenharmony_ci .busy = multipath_busy, 21568c2ecf20Sopenharmony_ci}; 21578c2ecf20Sopenharmony_ci 21588c2ecf20Sopenharmony_cistatic int __init dm_multipath_init(void) 21598c2ecf20Sopenharmony_ci{ 21608c2ecf20Sopenharmony_ci int r; 21618c2ecf20Sopenharmony_ci 21628c2ecf20Sopenharmony_ci kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); 21638c2ecf20Sopenharmony_ci if (!kmultipathd) { 21648c2ecf20Sopenharmony_ci DMERR("failed to create workqueue kmpathd"); 21658c2ecf20Sopenharmony_ci r = -ENOMEM; 21668c2ecf20Sopenharmony_ci goto bad_alloc_kmultipathd; 21678c2ecf20Sopenharmony_ci } 21688c2ecf20Sopenharmony_ci 21698c2ecf20Sopenharmony_ci /* 21708c2ecf20Sopenharmony_ci * A separate workqueue is used to handle the device handlers 21718c2ecf20Sopenharmony_ci * to avoid overloading existing workqueue. Overloading the 21728c2ecf20Sopenharmony_ci * old workqueue would also create a bottleneck in the 21738c2ecf20Sopenharmony_ci * path of the storage hardware device activation. 21748c2ecf20Sopenharmony_ci */ 21758c2ecf20Sopenharmony_ci kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd", 21768c2ecf20Sopenharmony_ci WQ_MEM_RECLAIM); 21778c2ecf20Sopenharmony_ci if (!kmpath_handlerd) { 21788c2ecf20Sopenharmony_ci DMERR("failed to create workqueue kmpath_handlerd"); 21798c2ecf20Sopenharmony_ci r = -ENOMEM; 21808c2ecf20Sopenharmony_ci goto bad_alloc_kmpath_handlerd; 21818c2ecf20Sopenharmony_ci } 21828c2ecf20Sopenharmony_ci 21838c2ecf20Sopenharmony_ci r = dm_register_target(&multipath_target); 21848c2ecf20Sopenharmony_ci if (r < 0) { 21858c2ecf20Sopenharmony_ci DMERR("request-based register failed %d", r); 21868c2ecf20Sopenharmony_ci r = -EINVAL; 21878c2ecf20Sopenharmony_ci goto bad_register_target; 21888c2ecf20Sopenharmony_ci } 21898c2ecf20Sopenharmony_ci 21908c2ecf20Sopenharmony_ci return 0; 21918c2ecf20Sopenharmony_ci 21928c2ecf20Sopenharmony_cibad_register_target: 21938c2ecf20Sopenharmony_ci destroy_workqueue(kmpath_handlerd); 21948c2ecf20Sopenharmony_cibad_alloc_kmpath_handlerd: 21958c2ecf20Sopenharmony_ci destroy_workqueue(kmultipathd); 21968c2ecf20Sopenharmony_cibad_alloc_kmultipathd: 21978c2ecf20Sopenharmony_ci return r; 21988c2ecf20Sopenharmony_ci} 21998c2ecf20Sopenharmony_ci 22008c2ecf20Sopenharmony_cistatic void __exit dm_multipath_exit(void) 22018c2ecf20Sopenharmony_ci{ 22028c2ecf20Sopenharmony_ci destroy_workqueue(kmpath_handlerd); 22038c2ecf20Sopenharmony_ci destroy_workqueue(kmultipathd); 22048c2ecf20Sopenharmony_ci 22058c2ecf20Sopenharmony_ci dm_unregister_target(&multipath_target); 22068c2ecf20Sopenharmony_ci} 22078c2ecf20Sopenharmony_ci 22088c2ecf20Sopenharmony_cimodule_init(dm_multipath_init); 22098c2ecf20Sopenharmony_cimodule_exit(dm_multipath_exit); 22108c2ecf20Sopenharmony_ci 22118c2ecf20Sopenharmony_cimodule_param_named(queue_if_no_path_timeout_secs, 22128c2ecf20Sopenharmony_ci queue_if_no_path_timeout_secs, ulong, S_IRUGO | S_IWUSR); 22138c2ecf20Sopenharmony_ciMODULE_PARM_DESC(queue_if_no_path_timeout_secs, "No available paths queue IO timeout in seconds"); 22148c2ecf20Sopenharmony_ci 22158c2ecf20Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " multipath target"); 22168c2ecf20Sopenharmony_ciMODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); 22178c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 2218