18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (C) 2003 Sistina Software Limited.
38c2ecf20Sopenharmony_ci * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This file is released under the GPL.
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/device-mapper.h>
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci#include "dm-rq.h"
118c2ecf20Sopenharmony_ci#include "dm-bio-record.h"
128c2ecf20Sopenharmony_ci#include "dm-path-selector.h"
138c2ecf20Sopenharmony_ci#include "dm-uevent.h"
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
168c2ecf20Sopenharmony_ci#include <linux/ctype.h>
178c2ecf20Sopenharmony_ci#include <linux/init.h>
188c2ecf20Sopenharmony_ci#include <linux/mempool.h>
198c2ecf20Sopenharmony_ci#include <linux/module.h>
208c2ecf20Sopenharmony_ci#include <linux/pagemap.h>
218c2ecf20Sopenharmony_ci#include <linux/slab.h>
228c2ecf20Sopenharmony_ci#include <linux/time.h>
238c2ecf20Sopenharmony_ci#include <linux/timer.h>
248c2ecf20Sopenharmony_ci#include <linux/workqueue.h>
258c2ecf20Sopenharmony_ci#include <linux/delay.h>
268c2ecf20Sopenharmony_ci#include <scsi/scsi_dh.h>
278c2ecf20Sopenharmony_ci#include <linux/atomic.h>
288c2ecf20Sopenharmony_ci#include <linux/blk-mq.h>
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci#define DM_MSG_PREFIX "multipath"
318c2ecf20Sopenharmony_ci#define DM_PG_INIT_DELAY_MSECS 2000
328c2ecf20Sopenharmony_ci#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
338c2ecf20Sopenharmony_ci#define QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT 0
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_cistatic unsigned long queue_if_no_path_timeout_secs = QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT;
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci/* Path properties */
388c2ecf20Sopenharmony_cistruct pgpath {
398c2ecf20Sopenharmony_ci	struct list_head list;
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	struct priority_group *pg;	/* Owning PG */
428c2ecf20Sopenharmony_ci	unsigned fail_count;		/* Cumulative failure count */
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci	struct dm_path path;
458c2ecf20Sopenharmony_ci	struct delayed_work activate_path;
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	bool is_active:1;		/* Path status */
488c2ecf20Sopenharmony_ci};
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci/*
538c2ecf20Sopenharmony_ci * Paths are grouped into Priority Groups and numbered from 1 upwards.
548c2ecf20Sopenharmony_ci * Each has a path selector which controls which path gets used.
558c2ecf20Sopenharmony_ci */
568c2ecf20Sopenharmony_cistruct priority_group {
578c2ecf20Sopenharmony_ci	struct list_head list;
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	struct multipath *m;		/* Owning multipath instance */
608c2ecf20Sopenharmony_ci	struct path_selector ps;
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	unsigned pg_num;		/* Reference number */
638c2ecf20Sopenharmony_ci	unsigned nr_pgpaths;		/* Number of paths in PG */
648c2ecf20Sopenharmony_ci	struct list_head pgpaths;
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	bool bypassed:1;		/* Temporarily bypass this PG? */
678c2ecf20Sopenharmony_ci};
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci/* Multipath context */
708c2ecf20Sopenharmony_cistruct multipath {
718c2ecf20Sopenharmony_ci	unsigned long flags;		/* Multipath state flags */
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	spinlock_t lock;
748c2ecf20Sopenharmony_ci	enum dm_queue_mode queue_mode;
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	struct pgpath *current_pgpath;
778c2ecf20Sopenharmony_ci	struct priority_group *current_pg;
788c2ecf20Sopenharmony_ci	struct priority_group *next_pg;	/* Switch to this PG if set */
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	atomic_t nr_valid_paths;	/* Total number of usable paths */
818c2ecf20Sopenharmony_ci	unsigned nr_priority_groups;
828c2ecf20Sopenharmony_ci	struct list_head priority_groups;
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	const char *hw_handler_name;
858c2ecf20Sopenharmony_ci	char *hw_handler_params;
868c2ecf20Sopenharmony_ci	wait_queue_head_t pg_init_wait;	/* Wait for pg_init completion */
878c2ecf20Sopenharmony_ci	unsigned pg_init_retries;	/* Number of times to retry pg_init */
888c2ecf20Sopenharmony_ci	unsigned pg_init_delay_msecs;	/* Number of msecs before pg_init retry */
898c2ecf20Sopenharmony_ci	atomic_t pg_init_in_progress;	/* Only one pg_init allowed at once */
908c2ecf20Sopenharmony_ci	atomic_t pg_init_count;		/* Number of times pg_init called */
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	struct mutex work_mutex;
938c2ecf20Sopenharmony_ci	struct work_struct trigger_event;
948c2ecf20Sopenharmony_ci	struct dm_target *ti;
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci	struct work_struct process_queued_bios;
978c2ecf20Sopenharmony_ci	struct bio_list queued_bios;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	struct timer_list nopath_timer;	/* Timeout for queue_if_no_path */
1008c2ecf20Sopenharmony_ci};
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci/*
1038c2ecf20Sopenharmony_ci * Context information attached to each io we process.
1048c2ecf20Sopenharmony_ci */
1058c2ecf20Sopenharmony_cistruct dm_mpath_io {
1068c2ecf20Sopenharmony_ci	struct pgpath *pgpath;
1078c2ecf20Sopenharmony_ci	size_t nr_bytes;
1088c2ecf20Sopenharmony_ci};
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_citypedef int (*action_fn) (struct pgpath *pgpath);
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_cistatic struct workqueue_struct *kmultipathd, *kmpath_handlerd;
1138c2ecf20Sopenharmony_cistatic void trigger_event(struct work_struct *work);
1148c2ecf20Sopenharmony_cistatic void activate_or_offline_path(struct pgpath *pgpath);
1158c2ecf20Sopenharmony_cistatic void activate_path_work(struct work_struct *work);
1168c2ecf20Sopenharmony_cistatic void process_queued_bios(struct work_struct *work);
1178c2ecf20Sopenharmony_cistatic void queue_if_no_path_timeout_work(struct timer_list *t);
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci/*-----------------------------------------------
1208c2ecf20Sopenharmony_ci * Multipath state flags.
1218c2ecf20Sopenharmony_ci *-----------------------------------------------*/
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci#define MPATHF_QUEUE_IO 0			/* Must we queue all I/O? */
1248c2ecf20Sopenharmony_ci#define MPATHF_QUEUE_IF_NO_PATH 1		/* Queue I/O if last path fails? */
1258c2ecf20Sopenharmony_ci#define MPATHF_SAVED_QUEUE_IF_NO_PATH 2		/* Saved state during suspension */
1268c2ecf20Sopenharmony_ci#define MPATHF_RETAIN_ATTACHED_HW_HANDLER 3	/* If there's already a hw_handler present, don't change it. */
1278c2ecf20Sopenharmony_ci#define MPATHF_PG_INIT_DISABLED 4		/* pg_init is not currently allowed */
1288c2ecf20Sopenharmony_ci#define MPATHF_PG_INIT_REQUIRED 5		/* pg_init needs calling? */
1298c2ecf20Sopenharmony_ci#define MPATHF_PG_INIT_DELAY_RETRY 6		/* Delay pg_init retry? */
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_cistatic bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m)
1328c2ecf20Sopenharmony_ci{
1338c2ecf20Sopenharmony_ci	bool r = test_bit(MPATHF_bit, &m->flags);
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	if (r) {
1368c2ecf20Sopenharmony_ci		unsigned long flags;
1378c2ecf20Sopenharmony_ci		spin_lock_irqsave(&m->lock, flags);
1388c2ecf20Sopenharmony_ci		r = test_bit(MPATHF_bit, &m->flags);
1398c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&m->lock, flags);
1408c2ecf20Sopenharmony_ci	}
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci	return r;
1438c2ecf20Sopenharmony_ci}
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci/*-----------------------------------------------
1468c2ecf20Sopenharmony_ci * Allocation routines
1478c2ecf20Sopenharmony_ci *-----------------------------------------------*/
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_cistatic struct pgpath *alloc_pgpath(void)
1508c2ecf20Sopenharmony_ci{
1518c2ecf20Sopenharmony_ci	struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	if (!pgpath)
1548c2ecf20Sopenharmony_ci		return NULL;
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci	pgpath->is_active = true;
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	return pgpath;
1598c2ecf20Sopenharmony_ci}
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_cistatic void free_pgpath(struct pgpath *pgpath)
1628c2ecf20Sopenharmony_ci{
1638c2ecf20Sopenharmony_ci	kfree(pgpath);
1648c2ecf20Sopenharmony_ci}
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_cistatic struct priority_group *alloc_priority_group(void)
1678c2ecf20Sopenharmony_ci{
1688c2ecf20Sopenharmony_ci	struct priority_group *pg;
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ci	pg = kzalloc(sizeof(*pg), GFP_KERNEL);
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	if (pg)
1738c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&pg->pgpaths);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	return pg;
1768c2ecf20Sopenharmony_ci}
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_cistatic void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
1798c2ecf20Sopenharmony_ci{
1808c2ecf20Sopenharmony_ci	struct pgpath *pgpath, *tmp;
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci	list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
1838c2ecf20Sopenharmony_ci		list_del(&pgpath->list);
1848c2ecf20Sopenharmony_ci		dm_put_device(ti, pgpath->path.dev);
1858c2ecf20Sopenharmony_ci		free_pgpath(pgpath);
1868c2ecf20Sopenharmony_ci	}
1878c2ecf20Sopenharmony_ci}
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_cistatic void free_priority_group(struct priority_group *pg,
1908c2ecf20Sopenharmony_ci				struct dm_target *ti)
1918c2ecf20Sopenharmony_ci{
1928c2ecf20Sopenharmony_ci	struct path_selector *ps = &pg->ps;
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	if (ps->type) {
1958c2ecf20Sopenharmony_ci		ps->type->destroy(ps);
1968c2ecf20Sopenharmony_ci		dm_put_path_selector(ps->type);
1978c2ecf20Sopenharmony_ci	}
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci	free_pgpaths(&pg->pgpaths, ti);
2008c2ecf20Sopenharmony_ci	kfree(pg);
2018c2ecf20Sopenharmony_ci}
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_cistatic struct multipath *alloc_multipath(struct dm_target *ti)
2048c2ecf20Sopenharmony_ci{
2058c2ecf20Sopenharmony_ci	struct multipath *m;
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	m = kzalloc(sizeof(*m), GFP_KERNEL);
2088c2ecf20Sopenharmony_ci	if (m) {
2098c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&m->priority_groups);
2108c2ecf20Sopenharmony_ci		spin_lock_init(&m->lock);
2118c2ecf20Sopenharmony_ci		atomic_set(&m->nr_valid_paths, 0);
2128c2ecf20Sopenharmony_ci		INIT_WORK(&m->trigger_event, trigger_event);
2138c2ecf20Sopenharmony_ci		mutex_init(&m->work_mutex);
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ci		m->queue_mode = DM_TYPE_NONE;
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci		m->ti = ti;
2188c2ecf20Sopenharmony_ci		ti->private = m;
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci		timer_setup(&m->nopath_timer, queue_if_no_path_timeout_work, 0);
2218c2ecf20Sopenharmony_ci	}
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci	return m;
2248c2ecf20Sopenharmony_ci}
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_cistatic int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
2278c2ecf20Sopenharmony_ci{
2288c2ecf20Sopenharmony_ci	if (m->queue_mode == DM_TYPE_NONE) {
2298c2ecf20Sopenharmony_ci		m->queue_mode = DM_TYPE_REQUEST_BASED;
2308c2ecf20Sopenharmony_ci	} else if (m->queue_mode == DM_TYPE_BIO_BASED) {
2318c2ecf20Sopenharmony_ci		INIT_WORK(&m->process_queued_bios, process_queued_bios);
2328c2ecf20Sopenharmony_ci		/*
2338c2ecf20Sopenharmony_ci		 * bio-based doesn't support any direct scsi_dh management;
2348c2ecf20Sopenharmony_ci		 * it just discovers if a scsi_dh is attached.
2358c2ecf20Sopenharmony_ci		 */
2368c2ecf20Sopenharmony_ci		set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
2378c2ecf20Sopenharmony_ci	}
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	dm_table_set_type(ti->table, m->queue_mode);
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci	/*
2428c2ecf20Sopenharmony_ci	 * Init fields that are only used when a scsi_dh is attached
2438c2ecf20Sopenharmony_ci	 * - must do this unconditionally (really doesn't hurt non-SCSI uses)
2448c2ecf20Sopenharmony_ci	 */
2458c2ecf20Sopenharmony_ci	set_bit(MPATHF_QUEUE_IO, &m->flags);
2468c2ecf20Sopenharmony_ci	atomic_set(&m->pg_init_in_progress, 0);
2478c2ecf20Sopenharmony_ci	atomic_set(&m->pg_init_count, 0);
2488c2ecf20Sopenharmony_ci	m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
2498c2ecf20Sopenharmony_ci	init_waitqueue_head(&m->pg_init_wait);
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci	return 0;
2528c2ecf20Sopenharmony_ci}
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_cistatic void free_multipath(struct multipath *m)
2558c2ecf20Sopenharmony_ci{
2568c2ecf20Sopenharmony_ci	struct priority_group *pg, *tmp;
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
2598c2ecf20Sopenharmony_ci		list_del(&pg->list);
2608c2ecf20Sopenharmony_ci		free_priority_group(pg, m->ti);
2618c2ecf20Sopenharmony_ci	}
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	kfree(m->hw_handler_name);
2648c2ecf20Sopenharmony_ci	kfree(m->hw_handler_params);
2658c2ecf20Sopenharmony_ci	mutex_destroy(&m->work_mutex);
2668c2ecf20Sopenharmony_ci	kfree(m);
2678c2ecf20Sopenharmony_ci}
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_cistatic struct dm_mpath_io *get_mpio(union map_info *info)
2708c2ecf20Sopenharmony_ci{
2718c2ecf20Sopenharmony_ci	return info->ptr;
2728c2ecf20Sopenharmony_ci}
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_cistatic size_t multipath_per_bio_data_size(void)
2758c2ecf20Sopenharmony_ci{
2768c2ecf20Sopenharmony_ci	return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
2778c2ecf20Sopenharmony_ci}
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_cistatic struct dm_mpath_io *get_mpio_from_bio(struct bio *bio)
2808c2ecf20Sopenharmony_ci{
2818c2ecf20Sopenharmony_ci	return dm_per_bio_data(bio, multipath_per_bio_data_size());
2828c2ecf20Sopenharmony_ci}
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_cistatic struct dm_bio_details *get_bio_details_from_mpio(struct dm_mpath_io *mpio)
2858c2ecf20Sopenharmony_ci{
2868c2ecf20Sopenharmony_ci	/* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */
2878c2ecf20Sopenharmony_ci	void *bio_details = mpio + 1;
2888c2ecf20Sopenharmony_ci	return bio_details;
2898c2ecf20Sopenharmony_ci}
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_cistatic void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p)
2928c2ecf20Sopenharmony_ci{
2938c2ecf20Sopenharmony_ci	struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
2948c2ecf20Sopenharmony_ci	struct dm_bio_details *bio_details = get_bio_details_from_mpio(mpio);
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci	mpio->nr_bytes = bio->bi_iter.bi_size;
2978c2ecf20Sopenharmony_ci	mpio->pgpath = NULL;
2988c2ecf20Sopenharmony_ci	*mpio_p = mpio;
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci	dm_bio_record(bio_details, bio);
3018c2ecf20Sopenharmony_ci}
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci/*-----------------------------------------------
3048c2ecf20Sopenharmony_ci * Path selection
3058c2ecf20Sopenharmony_ci *-----------------------------------------------*/
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_cistatic int __pg_init_all_paths(struct multipath *m)
3088c2ecf20Sopenharmony_ci{
3098c2ecf20Sopenharmony_ci	struct pgpath *pgpath;
3108c2ecf20Sopenharmony_ci	unsigned long pg_init_delay = 0;
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci	lockdep_assert_held(&m->lock);
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci	if (atomic_read(&m->pg_init_in_progress) || test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
3158c2ecf20Sopenharmony_ci		return 0;
3168c2ecf20Sopenharmony_ci
3178c2ecf20Sopenharmony_ci	atomic_inc(&m->pg_init_count);
3188c2ecf20Sopenharmony_ci	clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_ci	/* Check here to reset pg_init_required */
3218c2ecf20Sopenharmony_ci	if (!m->current_pg)
3228c2ecf20Sopenharmony_ci		return 0;
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_ci	if (test_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags))
3258c2ecf20Sopenharmony_ci		pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ?
3268c2ecf20Sopenharmony_ci						 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS);
3278c2ecf20Sopenharmony_ci	list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
3288c2ecf20Sopenharmony_ci		/* Skip failed paths */
3298c2ecf20Sopenharmony_ci		if (!pgpath->is_active)
3308c2ecf20Sopenharmony_ci			continue;
3318c2ecf20Sopenharmony_ci		if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path,
3328c2ecf20Sopenharmony_ci				       pg_init_delay))
3338c2ecf20Sopenharmony_ci			atomic_inc(&m->pg_init_in_progress);
3348c2ecf20Sopenharmony_ci	}
3358c2ecf20Sopenharmony_ci	return atomic_read(&m->pg_init_in_progress);
3368c2ecf20Sopenharmony_ci}
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_cistatic int pg_init_all_paths(struct multipath *m)
3398c2ecf20Sopenharmony_ci{
3408c2ecf20Sopenharmony_ci	int ret;
3418c2ecf20Sopenharmony_ci	unsigned long flags;
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
3448c2ecf20Sopenharmony_ci	ret = __pg_init_all_paths(m);
3458c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_ci	return ret;
3488c2ecf20Sopenharmony_ci}
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_cistatic void __switch_pg(struct multipath *m, struct priority_group *pg)
3518c2ecf20Sopenharmony_ci{
3528c2ecf20Sopenharmony_ci	lockdep_assert_held(&m->lock);
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	m->current_pg = pg;
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_ci	/* Must we initialise the PG first, and queue I/O till it's ready? */
3578c2ecf20Sopenharmony_ci	if (m->hw_handler_name) {
3588c2ecf20Sopenharmony_ci		set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
3598c2ecf20Sopenharmony_ci		set_bit(MPATHF_QUEUE_IO, &m->flags);
3608c2ecf20Sopenharmony_ci	} else {
3618c2ecf20Sopenharmony_ci		clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
3628c2ecf20Sopenharmony_ci		clear_bit(MPATHF_QUEUE_IO, &m->flags);
3638c2ecf20Sopenharmony_ci	}
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_ci	atomic_set(&m->pg_init_count, 0);
3668c2ecf20Sopenharmony_ci}
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_cistatic struct pgpath *choose_path_in_pg(struct multipath *m,
3698c2ecf20Sopenharmony_ci					struct priority_group *pg,
3708c2ecf20Sopenharmony_ci					size_t nr_bytes)
3718c2ecf20Sopenharmony_ci{
3728c2ecf20Sopenharmony_ci	unsigned long flags;
3738c2ecf20Sopenharmony_ci	struct dm_path *path;
3748c2ecf20Sopenharmony_ci	struct pgpath *pgpath;
3758c2ecf20Sopenharmony_ci
3768c2ecf20Sopenharmony_ci	path = pg->ps.type->select_path(&pg->ps, nr_bytes);
3778c2ecf20Sopenharmony_ci	if (!path)
3788c2ecf20Sopenharmony_ci		return ERR_PTR(-ENXIO);
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	pgpath = path_to_pgpath(path);
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_ci	if (unlikely(READ_ONCE(m->current_pg) != pg)) {
3838c2ecf20Sopenharmony_ci		/* Only update current_pgpath if pg changed */
3848c2ecf20Sopenharmony_ci		spin_lock_irqsave(&m->lock, flags);
3858c2ecf20Sopenharmony_ci		m->current_pgpath = pgpath;
3868c2ecf20Sopenharmony_ci		__switch_pg(m, pg);
3878c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&m->lock, flags);
3888c2ecf20Sopenharmony_ci	}
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	return pgpath;
3918c2ecf20Sopenharmony_ci}
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_cistatic struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
3948c2ecf20Sopenharmony_ci{
3958c2ecf20Sopenharmony_ci	unsigned long flags;
3968c2ecf20Sopenharmony_ci	struct priority_group *pg;
3978c2ecf20Sopenharmony_ci	struct pgpath *pgpath;
3988c2ecf20Sopenharmony_ci	unsigned bypassed = 1;
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	if (!atomic_read(&m->nr_valid_paths)) {
4018c2ecf20Sopenharmony_ci		spin_lock_irqsave(&m->lock, flags);
4028c2ecf20Sopenharmony_ci		clear_bit(MPATHF_QUEUE_IO, &m->flags);
4038c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&m->lock, flags);
4048c2ecf20Sopenharmony_ci		goto failed;
4058c2ecf20Sopenharmony_ci	}
4068c2ecf20Sopenharmony_ci
4078c2ecf20Sopenharmony_ci	/* Were we instructed to switch PG? */
4088c2ecf20Sopenharmony_ci	if (READ_ONCE(m->next_pg)) {
4098c2ecf20Sopenharmony_ci		spin_lock_irqsave(&m->lock, flags);
4108c2ecf20Sopenharmony_ci		pg = m->next_pg;
4118c2ecf20Sopenharmony_ci		if (!pg) {
4128c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&m->lock, flags);
4138c2ecf20Sopenharmony_ci			goto check_current_pg;
4148c2ecf20Sopenharmony_ci		}
4158c2ecf20Sopenharmony_ci		m->next_pg = NULL;
4168c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&m->lock, flags);
4178c2ecf20Sopenharmony_ci		pgpath = choose_path_in_pg(m, pg, nr_bytes);
4188c2ecf20Sopenharmony_ci		if (!IS_ERR_OR_NULL(pgpath))
4198c2ecf20Sopenharmony_ci			return pgpath;
4208c2ecf20Sopenharmony_ci	}
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci	/* Don't change PG until it has no remaining paths */
4238c2ecf20Sopenharmony_cicheck_current_pg:
4248c2ecf20Sopenharmony_ci	pg = READ_ONCE(m->current_pg);
4258c2ecf20Sopenharmony_ci	if (pg) {
4268c2ecf20Sopenharmony_ci		pgpath = choose_path_in_pg(m, pg, nr_bytes);
4278c2ecf20Sopenharmony_ci		if (!IS_ERR_OR_NULL(pgpath))
4288c2ecf20Sopenharmony_ci			return pgpath;
4298c2ecf20Sopenharmony_ci	}
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	/*
4328c2ecf20Sopenharmony_ci	 * Loop through priority groups until we find a valid path.
4338c2ecf20Sopenharmony_ci	 * First time we skip PGs marked 'bypassed'.
4348c2ecf20Sopenharmony_ci	 * Second time we only try the ones we skipped, but set
4358c2ecf20Sopenharmony_ci	 * pg_init_delay_retry so we do not hammer controllers.
4368c2ecf20Sopenharmony_ci	 */
4378c2ecf20Sopenharmony_ci	do {
4388c2ecf20Sopenharmony_ci		list_for_each_entry(pg, &m->priority_groups, list) {
4398c2ecf20Sopenharmony_ci			if (pg->bypassed == !!bypassed)
4408c2ecf20Sopenharmony_ci				continue;
4418c2ecf20Sopenharmony_ci			pgpath = choose_path_in_pg(m, pg, nr_bytes);
4428c2ecf20Sopenharmony_ci			if (!IS_ERR_OR_NULL(pgpath)) {
4438c2ecf20Sopenharmony_ci				if (!bypassed) {
4448c2ecf20Sopenharmony_ci					spin_lock_irqsave(&m->lock, flags);
4458c2ecf20Sopenharmony_ci					set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
4468c2ecf20Sopenharmony_ci					spin_unlock_irqrestore(&m->lock, flags);
4478c2ecf20Sopenharmony_ci				}
4488c2ecf20Sopenharmony_ci				return pgpath;
4498c2ecf20Sopenharmony_ci			}
4508c2ecf20Sopenharmony_ci		}
4518c2ecf20Sopenharmony_ci	} while (bypassed--);
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_cifailed:
4548c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
4558c2ecf20Sopenharmony_ci	m->current_pgpath = NULL;
4568c2ecf20Sopenharmony_ci	m->current_pg = NULL;
4578c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci	return NULL;
4608c2ecf20Sopenharmony_ci}
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci/*
4638c2ecf20Sopenharmony_ci * dm_report_EIO() is a macro instead of a function to make pr_debug_ratelimited()
4648c2ecf20Sopenharmony_ci * report the function name and line number of the function from which
4658c2ecf20Sopenharmony_ci * it has been invoked.
4668c2ecf20Sopenharmony_ci */
4678c2ecf20Sopenharmony_ci#define dm_report_EIO(m)						\
4688c2ecf20Sopenharmony_cido {									\
4698c2ecf20Sopenharmony_ci	DMDEBUG_LIMIT("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d", \
4708c2ecf20Sopenharmony_ci		      dm_table_device_name((m)->ti->table),		\
4718c2ecf20Sopenharmony_ci		      test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags),	\
4728c2ecf20Sopenharmony_ci		      test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \
4738c2ecf20Sopenharmony_ci		      dm_noflush_suspending((m)->ti));			\
4748c2ecf20Sopenharmony_ci} while (0)
4758c2ecf20Sopenharmony_ci
4768c2ecf20Sopenharmony_ci/*
4778c2ecf20Sopenharmony_ci * Check whether bios must be queued in the device-mapper core rather
4788c2ecf20Sopenharmony_ci * than here in the target.
4798c2ecf20Sopenharmony_ci */
4808c2ecf20Sopenharmony_cistatic bool __must_push_back(struct multipath *m)
4818c2ecf20Sopenharmony_ci{
4828c2ecf20Sopenharmony_ci	return dm_noflush_suspending(m->ti);
4838c2ecf20Sopenharmony_ci}
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_cistatic bool must_push_back_rq(struct multipath *m)
4868c2ecf20Sopenharmony_ci{
4878c2ecf20Sopenharmony_ci	unsigned long flags;
4888c2ecf20Sopenharmony_ci	bool ret;
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
4918c2ecf20Sopenharmony_ci	ret = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || __must_push_back(m));
4928c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_ci	return ret;
4958c2ecf20Sopenharmony_ci}
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci/*
4988c2ecf20Sopenharmony_ci * Map cloned requests (request-based multipath)
4998c2ecf20Sopenharmony_ci */
5008c2ecf20Sopenharmony_cistatic int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
5018c2ecf20Sopenharmony_ci				   union map_info *map_context,
5028c2ecf20Sopenharmony_ci				   struct request **__clone)
5038c2ecf20Sopenharmony_ci{
5048c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
5058c2ecf20Sopenharmony_ci	size_t nr_bytes = blk_rq_bytes(rq);
5068c2ecf20Sopenharmony_ci	struct pgpath *pgpath;
5078c2ecf20Sopenharmony_ci	struct block_device *bdev;
5088c2ecf20Sopenharmony_ci	struct dm_mpath_io *mpio = get_mpio(map_context);
5098c2ecf20Sopenharmony_ci	struct request_queue *q;
5108c2ecf20Sopenharmony_ci	struct request *clone;
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	/* Do we need to select a new pgpath? */
5138c2ecf20Sopenharmony_ci	pgpath = READ_ONCE(m->current_pgpath);
5148c2ecf20Sopenharmony_ci	if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
5158c2ecf20Sopenharmony_ci		pgpath = choose_pgpath(m, nr_bytes);
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci	if (!pgpath) {
5188c2ecf20Sopenharmony_ci		if (must_push_back_rq(m))
5198c2ecf20Sopenharmony_ci			return DM_MAPIO_DELAY_REQUEUE;
5208c2ecf20Sopenharmony_ci		dm_report_EIO(m);	/* Failed */
5218c2ecf20Sopenharmony_ci		return DM_MAPIO_KILL;
5228c2ecf20Sopenharmony_ci	} else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) ||
5238c2ecf20Sopenharmony_ci		   mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) {
5248c2ecf20Sopenharmony_ci		pg_init_all_paths(m);
5258c2ecf20Sopenharmony_ci		return DM_MAPIO_DELAY_REQUEUE;
5268c2ecf20Sopenharmony_ci	}
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci	mpio->pgpath = pgpath;
5298c2ecf20Sopenharmony_ci	mpio->nr_bytes = nr_bytes;
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci	bdev = pgpath->path.dev->bdev;
5328c2ecf20Sopenharmony_ci	q = bdev_get_queue(bdev);
5338c2ecf20Sopenharmony_ci	clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE,
5348c2ecf20Sopenharmony_ci			BLK_MQ_REQ_NOWAIT);
5358c2ecf20Sopenharmony_ci	if (IS_ERR(clone)) {
5368c2ecf20Sopenharmony_ci		/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
5378c2ecf20Sopenharmony_ci		if (blk_queue_dying(q)) {
5388c2ecf20Sopenharmony_ci			atomic_inc(&m->pg_init_in_progress);
5398c2ecf20Sopenharmony_ci			activate_or_offline_path(pgpath);
5408c2ecf20Sopenharmony_ci			return DM_MAPIO_DELAY_REQUEUE;
5418c2ecf20Sopenharmony_ci		}
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_ci		/*
5448c2ecf20Sopenharmony_ci		 * blk-mq's SCHED_RESTART can cover this requeue, so we
5458c2ecf20Sopenharmony_ci		 * needn't deal with it by DELAY_REQUEUE. More importantly,
5468c2ecf20Sopenharmony_ci		 * we have to return DM_MAPIO_REQUEUE so that blk-mq can
5478c2ecf20Sopenharmony_ci		 * get the queue busy feedback (via BLK_STS_RESOURCE),
5488c2ecf20Sopenharmony_ci		 * otherwise I/O merging can suffer.
5498c2ecf20Sopenharmony_ci		 */
5508c2ecf20Sopenharmony_ci		return DM_MAPIO_REQUEUE;
5518c2ecf20Sopenharmony_ci	}
5528c2ecf20Sopenharmony_ci	clone->bio = clone->biotail = NULL;
5538c2ecf20Sopenharmony_ci	clone->rq_disk = bdev->bd_disk;
5548c2ecf20Sopenharmony_ci	clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
5558c2ecf20Sopenharmony_ci	*__clone = clone;
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_ci	if (pgpath->pg->ps.type->start_io)
5588c2ecf20Sopenharmony_ci		pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
5598c2ecf20Sopenharmony_ci					      &pgpath->path,
5608c2ecf20Sopenharmony_ci					      nr_bytes);
5618c2ecf20Sopenharmony_ci	return DM_MAPIO_REMAPPED;
5628c2ecf20Sopenharmony_ci}
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_cistatic void multipath_release_clone(struct request *clone,
5658c2ecf20Sopenharmony_ci				    union map_info *map_context)
5668c2ecf20Sopenharmony_ci{
5678c2ecf20Sopenharmony_ci	if (unlikely(map_context)) {
5688c2ecf20Sopenharmony_ci		/*
5698c2ecf20Sopenharmony_ci		 * non-NULL map_context means caller is still map
5708c2ecf20Sopenharmony_ci		 * method; must undo multipath_clone_and_map()
5718c2ecf20Sopenharmony_ci		 */
5728c2ecf20Sopenharmony_ci		struct dm_mpath_io *mpio = get_mpio(map_context);
5738c2ecf20Sopenharmony_ci		struct pgpath *pgpath = mpio->pgpath;
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci		if (pgpath && pgpath->pg->ps.type->end_io)
5768c2ecf20Sopenharmony_ci			pgpath->pg->ps.type->end_io(&pgpath->pg->ps,
5778c2ecf20Sopenharmony_ci						    &pgpath->path,
5788c2ecf20Sopenharmony_ci						    mpio->nr_bytes,
5798c2ecf20Sopenharmony_ci						    clone->io_start_time_ns);
5808c2ecf20Sopenharmony_ci	}
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_ci	blk_put_request(clone);
5838c2ecf20Sopenharmony_ci}
5848c2ecf20Sopenharmony_ci
5858c2ecf20Sopenharmony_ci/*
5868c2ecf20Sopenharmony_ci * Map cloned bios (bio-based multipath)
5878c2ecf20Sopenharmony_ci */
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_cistatic void __multipath_queue_bio(struct multipath *m, struct bio *bio)
5908c2ecf20Sopenharmony_ci{
5918c2ecf20Sopenharmony_ci	/* Queue for the daemon to resubmit */
5928c2ecf20Sopenharmony_ci	bio_list_add(&m->queued_bios, bio);
5938c2ecf20Sopenharmony_ci	if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
5948c2ecf20Sopenharmony_ci		queue_work(kmultipathd, &m->process_queued_bios);
5958c2ecf20Sopenharmony_ci}
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_cistatic void multipath_queue_bio(struct multipath *m, struct bio *bio)
5988c2ecf20Sopenharmony_ci{
5998c2ecf20Sopenharmony_ci	unsigned long flags;
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
6028c2ecf20Sopenharmony_ci	__multipath_queue_bio(m, bio);
6038c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
6048c2ecf20Sopenharmony_ci}
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_cistatic struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
6078c2ecf20Sopenharmony_ci{
6088c2ecf20Sopenharmony_ci	struct pgpath *pgpath;
6098c2ecf20Sopenharmony_ci	unsigned long flags;
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_ci	/* Do we need to select a new pgpath? */
6128c2ecf20Sopenharmony_ci	pgpath = READ_ONCE(m->current_pgpath);
6138c2ecf20Sopenharmony_ci	if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
6148c2ecf20Sopenharmony_ci		pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci	if (!pgpath) {
6178c2ecf20Sopenharmony_ci		spin_lock_irqsave(&m->lock, flags);
6188c2ecf20Sopenharmony_ci		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
6198c2ecf20Sopenharmony_ci			__multipath_queue_bio(m, bio);
6208c2ecf20Sopenharmony_ci			pgpath = ERR_PTR(-EAGAIN);
6218c2ecf20Sopenharmony_ci		}
6228c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&m->lock, flags);
6238c2ecf20Sopenharmony_ci
6248c2ecf20Sopenharmony_ci	} else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) ||
6258c2ecf20Sopenharmony_ci		   mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) {
6268c2ecf20Sopenharmony_ci		multipath_queue_bio(m, bio);
6278c2ecf20Sopenharmony_ci		pg_init_all_paths(m);
6288c2ecf20Sopenharmony_ci		return ERR_PTR(-EAGAIN);
6298c2ecf20Sopenharmony_ci	}
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci	return pgpath;
6328c2ecf20Sopenharmony_ci}
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_cistatic int __multipath_map_bio(struct multipath *m, struct bio *bio,
6358c2ecf20Sopenharmony_ci			       struct dm_mpath_io *mpio)
6368c2ecf20Sopenharmony_ci{
6378c2ecf20Sopenharmony_ci	struct pgpath *pgpath = __map_bio(m, bio);
6388c2ecf20Sopenharmony_ci
6398c2ecf20Sopenharmony_ci	if (IS_ERR(pgpath))
6408c2ecf20Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
6418c2ecf20Sopenharmony_ci
6428c2ecf20Sopenharmony_ci	if (!pgpath) {
6438c2ecf20Sopenharmony_ci		if (__must_push_back(m))
6448c2ecf20Sopenharmony_ci			return DM_MAPIO_REQUEUE;
6458c2ecf20Sopenharmony_ci		dm_report_EIO(m);
6468c2ecf20Sopenharmony_ci		return DM_MAPIO_KILL;
6478c2ecf20Sopenharmony_ci	}
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_ci	mpio->pgpath = pgpath;
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_ci	bio->bi_status = 0;
6528c2ecf20Sopenharmony_ci	bio_set_dev(bio, pgpath->path.dev->bdev);
6538c2ecf20Sopenharmony_ci	bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
6548c2ecf20Sopenharmony_ci
6558c2ecf20Sopenharmony_ci	if (pgpath->pg->ps.type->start_io)
6568c2ecf20Sopenharmony_ci		pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
6578c2ecf20Sopenharmony_ci					      &pgpath->path,
6588c2ecf20Sopenharmony_ci					      mpio->nr_bytes);
6598c2ecf20Sopenharmony_ci	return DM_MAPIO_REMAPPED;
6608c2ecf20Sopenharmony_ci}
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_cistatic int multipath_map_bio(struct dm_target *ti, struct bio *bio)
6638c2ecf20Sopenharmony_ci{
6648c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
6658c2ecf20Sopenharmony_ci	struct dm_mpath_io *mpio = NULL;
6668c2ecf20Sopenharmony_ci
6678c2ecf20Sopenharmony_ci	multipath_init_per_bio_data(bio, &mpio);
6688c2ecf20Sopenharmony_ci	return __multipath_map_bio(m, bio, mpio);
6698c2ecf20Sopenharmony_ci}
6708c2ecf20Sopenharmony_ci
6718c2ecf20Sopenharmony_cistatic void process_queued_io_list(struct multipath *m)
6728c2ecf20Sopenharmony_ci{
6738c2ecf20Sopenharmony_ci	if (m->queue_mode == DM_TYPE_REQUEST_BASED)
6748c2ecf20Sopenharmony_ci		dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
6758c2ecf20Sopenharmony_ci	else if (m->queue_mode == DM_TYPE_BIO_BASED)
6768c2ecf20Sopenharmony_ci		queue_work(kmultipathd, &m->process_queued_bios);
6778c2ecf20Sopenharmony_ci}
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_cistatic void process_queued_bios(struct work_struct *work)
6808c2ecf20Sopenharmony_ci{
6818c2ecf20Sopenharmony_ci	int r;
6828c2ecf20Sopenharmony_ci	unsigned long flags;
6838c2ecf20Sopenharmony_ci	struct bio *bio;
6848c2ecf20Sopenharmony_ci	struct bio_list bios;
6858c2ecf20Sopenharmony_ci	struct blk_plug plug;
6868c2ecf20Sopenharmony_ci	struct multipath *m =
6878c2ecf20Sopenharmony_ci		container_of(work, struct multipath, process_queued_bios);
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_ci	bio_list_init(&bios);
6908c2ecf20Sopenharmony_ci
6918c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
6928c2ecf20Sopenharmony_ci
6938c2ecf20Sopenharmony_ci	if (bio_list_empty(&m->queued_bios)) {
6948c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&m->lock, flags);
6958c2ecf20Sopenharmony_ci		return;
6968c2ecf20Sopenharmony_ci	}
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_ci	bio_list_merge(&bios, &m->queued_bios);
6998c2ecf20Sopenharmony_ci	bio_list_init(&m->queued_bios);
7008c2ecf20Sopenharmony_ci
7018c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
7048c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bios))) {
7058c2ecf20Sopenharmony_ci		struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
7068c2ecf20Sopenharmony_ci		dm_bio_restore(get_bio_details_from_mpio(mpio), bio);
7078c2ecf20Sopenharmony_ci		r = __multipath_map_bio(m, bio, mpio);
7088c2ecf20Sopenharmony_ci		switch (r) {
7098c2ecf20Sopenharmony_ci		case DM_MAPIO_KILL:
7108c2ecf20Sopenharmony_ci			bio->bi_status = BLK_STS_IOERR;
7118c2ecf20Sopenharmony_ci			bio_endio(bio);
7128c2ecf20Sopenharmony_ci			break;
7138c2ecf20Sopenharmony_ci		case DM_MAPIO_REQUEUE:
7148c2ecf20Sopenharmony_ci			bio->bi_status = BLK_STS_DM_REQUEUE;
7158c2ecf20Sopenharmony_ci			bio_endio(bio);
7168c2ecf20Sopenharmony_ci			break;
7178c2ecf20Sopenharmony_ci		case DM_MAPIO_REMAPPED:
7188c2ecf20Sopenharmony_ci			submit_bio_noacct(bio);
7198c2ecf20Sopenharmony_ci			break;
7208c2ecf20Sopenharmony_ci		case DM_MAPIO_SUBMITTED:
7218c2ecf20Sopenharmony_ci			break;
7228c2ecf20Sopenharmony_ci		default:
7238c2ecf20Sopenharmony_ci			WARN_ONCE(true, "__multipath_map_bio() returned %d\n", r);
7248c2ecf20Sopenharmony_ci		}
7258c2ecf20Sopenharmony_ci	}
7268c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
7278c2ecf20Sopenharmony_ci}
7288c2ecf20Sopenharmony_ci
7298c2ecf20Sopenharmony_ci/*
7308c2ecf20Sopenharmony_ci * If we run out of usable paths, should we queue I/O or error it?
7318c2ecf20Sopenharmony_ci */
7328c2ecf20Sopenharmony_cistatic int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
7338c2ecf20Sopenharmony_ci			    bool save_old_value, const char *caller)
7348c2ecf20Sopenharmony_ci{
7358c2ecf20Sopenharmony_ci	unsigned long flags;
7368c2ecf20Sopenharmony_ci	bool queue_if_no_path_bit, saved_queue_if_no_path_bit;
7378c2ecf20Sopenharmony_ci	const char *dm_dev_name = dm_table_device_name(m->ti->table);
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_ci	DMDEBUG("%s: %s caller=%s queue_if_no_path=%d save_old_value=%d",
7408c2ecf20Sopenharmony_ci		dm_dev_name, __func__, caller, queue_if_no_path, save_old_value);
7418c2ecf20Sopenharmony_ci
7428c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
7438c2ecf20Sopenharmony_ci
7448c2ecf20Sopenharmony_ci	queue_if_no_path_bit = test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
7458c2ecf20Sopenharmony_ci	saved_queue_if_no_path_bit = test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
7468c2ecf20Sopenharmony_ci
7478c2ecf20Sopenharmony_ci	if (save_old_value) {
7488c2ecf20Sopenharmony_ci		if (unlikely(!queue_if_no_path_bit && saved_queue_if_no_path_bit)) {
7498c2ecf20Sopenharmony_ci			DMERR("%s: QIFNP disabled but saved as enabled, saving again loses state, not saving!",
7508c2ecf20Sopenharmony_ci			      dm_dev_name);
7518c2ecf20Sopenharmony_ci		} else
7528c2ecf20Sopenharmony_ci			assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path_bit);
7538c2ecf20Sopenharmony_ci	} else if (!queue_if_no_path && saved_queue_if_no_path_bit) {
7548c2ecf20Sopenharmony_ci		/* due to "fail_if_no_path" message, need to honor it. */
7558c2ecf20Sopenharmony_ci		clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
7568c2ecf20Sopenharmony_ci	}
7578c2ecf20Sopenharmony_ci	assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path);
7588c2ecf20Sopenharmony_ci
7598c2ecf20Sopenharmony_ci	DMDEBUG("%s: after %s changes; QIFNP = %d; SQIFNP = %d; DNFS = %d",
7608c2ecf20Sopenharmony_ci		dm_dev_name, __func__,
7618c2ecf20Sopenharmony_ci		test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags),
7628c2ecf20Sopenharmony_ci		test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags),
7638c2ecf20Sopenharmony_ci		dm_noflush_suspending(m->ti));
7648c2ecf20Sopenharmony_ci
7658c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ci	if (!queue_if_no_path) {
7688c2ecf20Sopenharmony_ci		dm_table_run_md_queue_async(m->ti->table);
7698c2ecf20Sopenharmony_ci		process_queued_io_list(m);
7708c2ecf20Sopenharmony_ci	}
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	return 0;
7738c2ecf20Sopenharmony_ci}
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_ci/*
7768c2ecf20Sopenharmony_ci * If the queue_if_no_path timeout fires, turn off queue_if_no_path and
7778c2ecf20Sopenharmony_ci * process any queued I/O.
7788c2ecf20Sopenharmony_ci */
7798c2ecf20Sopenharmony_cistatic void queue_if_no_path_timeout_work(struct timer_list *t)
7808c2ecf20Sopenharmony_ci{
7818c2ecf20Sopenharmony_ci	struct multipath *m = from_timer(m, t, nopath_timer);
7828c2ecf20Sopenharmony_ci
7838c2ecf20Sopenharmony_ci	DMWARN("queue_if_no_path timeout on %s, failing queued IO",
7848c2ecf20Sopenharmony_ci	       dm_table_device_name(m->ti->table));
7858c2ecf20Sopenharmony_ci	queue_if_no_path(m, false, false, __func__);
7868c2ecf20Sopenharmony_ci}
7878c2ecf20Sopenharmony_ci
7888c2ecf20Sopenharmony_ci/*
7898c2ecf20Sopenharmony_ci * Enable the queue_if_no_path timeout if necessary.
7908c2ecf20Sopenharmony_ci * Called with m->lock held.
7918c2ecf20Sopenharmony_ci */
7928c2ecf20Sopenharmony_cistatic void enable_nopath_timeout(struct multipath *m)
7938c2ecf20Sopenharmony_ci{
7948c2ecf20Sopenharmony_ci	unsigned long queue_if_no_path_timeout =
7958c2ecf20Sopenharmony_ci		READ_ONCE(queue_if_no_path_timeout_secs) * HZ;
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci	lockdep_assert_held(&m->lock);
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_ci	if (queue_if_no_path_timeout > 0 &&
8008c2ecf20Sopenharmony_ci	    atomic_read(&m->nr_valid_paths) == 0 &&
8018c2ecf20Sopenharmony_ci	    test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
8028c2ecf20Sopenharmony_ci		mod_timer(&m->nopath_timer,
8038c2ecf20Sopenharmony_ci			  jiffies + queue_if_no_path_timeout);
8048c2ecf20Sopenharmony_ci	}
8058c2ecf20Sopenharmony_ci}
8068c2ecf20Sopenharmony_ci
8078c2ecf20Sopenharmony_cistatic void disable_nopath_timeout(struct multipath *m)
8088c2ecf20Sopenharmony_ci{
8098c2ecf20Sopenharmony_ci	del_timer_sync(&m->nopath_timer);
8108c2ecf20Sopenharmony_ci}
8118c2ecf20Sopenharmony_ci
8128c2ecf20Sopenharmony_ci/*
8138c2ecf20Sopenharmony_ci * An event is triggered whenever a path is taken out of use.
8148c2ecf20Sopenharmony_ci * Includes path failure and PG bypass.
8158c2ecf20Sopenharmony_ci */
8168c2ecf20Sopenharmony_cistatic void trigger_event(struct work_struct *work)
8178c2ecf20Sopenharmony_ci{
8188c2ecf20Sopenharmony_ci	struct multipath *m =
8198c2ecf20Sopenharmony_ci		container_of(work, struct multipath, trigger_event);
8208c2ecf20Sopenharmony_ci
8218c2ecf20Sopenharmony_ci	dm_table_event(m->ti->table);
8228c2ecf20Sopenharmony_ci}
8238c2ecf20Sopenharmony_ci
8248c2ecf20Sopenharmony_ci/*-----------------------------------------------------------------
8258c2ecf20Sopenharmony_ci * Constructor/argument parsing:
8268c2ecf20Sopenharmony_ci * <#multipath feature args> [<arg>]*
8278c2ecf20Sopenharmony_ci * <#hw_handler args> [hw_handler [<arg>]*]
8288c2ecf20Sopenharmony_ci * <#priority groups>
8298c2ecf20Sopenharmony_ci * <initial priority group>
8308c2ecf20Sopenharmony_ci *     [<selector> <#selector args> [<arg>]*
8318c2ecf20Sopenharmony_ci *      <#paths> <#per-path selector args>
8328c2ecf20Sopenharmony_ci *         [<path> [<arg>]* ]+ ]+
8338c2ecf20Sopenharmony_ci *---------------------------------------------------------------*/
8348c2ecf20Sopenharmony_cistatic int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
8358c2ecf20Sopenharmony_ci			       struct dm_target *ti)
8368c2ecf20Sopenharmony_ci{
8378c2ecf20Sopenharmony_ci	int r;
8388c2ecf20Sopenharmony_ci	struct path_selector_type *pst;
8398c2ecf20Sopenharmony_ci	unsigned ps_argc;
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci	static const struct dm_arg _args[] = {
8428c2ecf20Sopenharmony_ci		{0, 1024, "invalid number of path selector args"},
8438c2ecf20Sopenharmony_ci	};
8448c2ecf20Sopenharmony_ci
8458c2ecf20Sopenharmony_ci	pst = dm_get_path_selector(dm_shift_arg(as));
8468c2ecf20Sopenharmony_ci	if (!pst) {
8478c2ecf20Sopenharmony_ci		ti->error = "unknown path selector type";
8488c2ecf20Sopenharmony_ci		return -EINVAL;
8498c2ecf20Sopenharmony_ci	}
8508c2ecf20Sopenharmony_ci
8518c2ecf20Sopenharmony_ci	r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
8528c2ecf20Sopenharmony_ci	if (r) {
8538c2ecf20Sopenharmony_ci		dm_put_path_selector(pst);
8548c2ecf20Sopenharmony_ci		return -EINVAL;
8558c2ecf20Sopenharmony_ci	}
8568c2ecf20Sopenharmony_ci
8578c2ecf20Sopenharmony_ci	r = pst->create(&pg->ps, ps_argc, as->argv);
8588c2ecf20Sopenharmony_ci	if (r) {
8598c2ecf20Sopenharmony_ci		dm_put_path_selector(pst);
8608c2ecf20Sopenharmony_ci		ti->error = "path selector constructor failed";
8618c2ecf20Sopenharmony_ci		return r;
8628c2ecf20Sopenharmony_ci	}
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_ci	pg->ps.type = pst;
8658c2ecf20Sopenharmony_ci	dm_consume_args(as, ps_argc);
8668c2ecf20Sopenharmony_ci
8678c2ecf20Sopenharmony_ci	return 0;
8688c2ecf20Sopenharmony_ci}
8698c2ecf20Sopenharmony_ci
8708c2ecf20Sopenharmony_cistatic int setup_scsi_dh(struct block_device *bdev, struct multipath *m,
8718c2ecf20Sopenharmony_ci			 const char **attached_handler_name, char **error)
8728c2ecf20Sopenharmony_ci{
8738c2ecf20Sopenharmony_ci	struct request_queue *q = bdev_get_queue(bdev);
8748c2ecf20Sopenharmony_ci	int r;
8758c2ecf20Sopenharmony_ci
8768c2ecf20Sopenharmony_ci	if (mpath_double_check_test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, m)) {
8778c2ecf20Sopenharmony_ciretain:
8788c2ecf20Sopenharmony_ci		if (*attached_handler_name) {
8798c2ecf20Sopenharmony_ci			/*
8808c2ecf20Sopenharmony_ci			 * Clear any hw_handler_params associated with a
8818c2ecf20Sopenharmony_ci			 * handler that isn't already attached.
8828c2ecf20Sopenharmony_ci			 */
8838c2ecf20Sopenharmony_ci			if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) {
8848c2ecf20Sopenharmony_ci				kfree(m->hw_handler_params);
8858c2ecf20Sopenharmony_ci				m->hw_handler_params = NULL;
8868c2ecf20Sopenharmony_ci			}
8878c2ecf20Sopenharmony_ci
8888c2ecf20Sopenharmony_ci			/*
8898c2ecf20Sopenharmony_ci			 * Reset hw_handler_name to match the attached handler
8908c2ecf20Sopenharmony_ci			 *
8918c2ecf20Sopenharmony_ci			 * NB. This modifies the table line to show the actual
8928c2ecf20Sopenharmony_ci			 * handler instead of the original table passed in.
8938c2ecf20Sopenharmony_ci			 */
8948c2ecf20Sopenharmony_ci			kfree(m->hw_handler_name);
8958c2ecf20Sopenharmony_ci			m->hw_handler_name = *attached_handler_name;
8968c2ecf20Sopenharmony_ci			*attached_handler_name = NULL;
8978c2ecf20Sopenharmony_ci		}
8988c2ecf20Sopenharmony_ci	}
8998c2ecf20Sopenharmony_ci
9008c2ecf20Sopenharmony_ci	if (m->hw_handler_name) {
9018c2ecf20Sopenharmony_ci		r = scsi_dh_attach(q, m->hw_handler_name);
9028c2ecf20Sopenharmony_ci		if (r == -EBUSY) {
9038c2ecf20Sopenharmony_ci			char b[BDEVNAME_SIZE];
9048c2ecf20Sopenharmony_ci
9058c2ecf20Sopenharmony_ci			printk(KERN_INFO "dm-mpath: retaining handler on device %s\n",
9068c2ecf20Sopenharmony_ci			       bdevname(bdev, b));
9078c2ecf20Sopenharmony_ci			goto retain;
9088c2ecf20Sopenharmony_ci		}
9098c2ecf20Sopenharmony_ci		if (r < 0) {
9108c2ecf20Sopenharmony_ci			*error = "error attaching hardware handler";
9118c2ecf20Sopenharmony_ci			return r;
9128c2ecf20Sopenharmony_ci		}
9138c2ecf20Sopenharmony_ci
9148c2ecf20Sopenharmony_ci		if (m->hw_handler_params) {
9158c2ecf20Sopenharmony_ci			r = scsi_dh_set_params(q, m->hw_handler_params);
9168c2ecf20Sopenharmony_ci			if (r < 0) {
9178c2ecf20Sopenharmony_ci				*error = "unable to set hardware handler parameters";
9188c2ecf20Sopenharmony_ci				return r;
9198c2ecf20Sopenharmony_ci			}
9208c2ecf20Sopenharmony_ci		}
9218c2ecf20Sopenharmony_ci	}
9228c2ecf20Sopenharmony_ci
9238c2ecf20Sopenharmony_ci	return 0;
9248c2ecf20Sopenharmony_ci}
9258c2ecf20Sopenharmony_ci
9268c2ecf20Sopenharmony_cistatic struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
9278c2ecf20Sopenharmony_ci				 struct dm_target *ti)
9288c2ecf20Sopenharmony_ci{
9298c2ecf20Sopenharmony_ci	int r;
9308c2ecf20Sopenharmony_ci	struct pgpath *p;
9318c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
9328c2ecf20Sopenharmony_ci	struct request_queue *q;
9338c2ecf20Sopenharmony_ci	const char *attached_handler_name = NULL;
9348c2ecf20Sopenharmony_ci
9358c2ecf20Sopenharmony_ci	/* we need at least a path arg */
9368c2ecf20Sopenharmony_ci	if (as->argc < 1) {
9378c2ecf20Sopenharmony_ci		ti->error = "no device given";
9388c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
9398c2ecf20Sopenharmony_ci	}
9408c2ecf20Sopenharmony_ci
9418c2ecf20Sopenharmony_ci	p = alloc_pgpath();
9428c2ecf20Sopenharmony_ci	if (!p)
9438c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
9448c2ecf20Sopenharmony_ci
9458c2ecf20Sopenharmony_ci	r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
9468c2ecf20Sopenharmony_ci			  &p->path.dev);
9478c2ecf20Sopenharmony_ci	if (r) {
9488c2ecf20Sopenharmony_ci		ti->error = "error getting device";
9498c2ecf20Sopenharmony_ci		goto bad;
9508c2ecf20Sopenharmony_ci	}
9518c2ecf20Sopenharmony_ci
9528c2ecf20Sopenharmony_ci	q = bdev_get_queue(p->path.dev->bdev);
9538c2ecf20Sopenharmony_ci	attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
9548c2ecf20Sopenharmony_ci	if (attached_handler_name || m->hw_handler_name) {
9558c2ecf20Sopenharmony_ci		INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
9568c2ecf20Sopenharmony_ci		r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error);
9578c2ecf20Sopenharmony_ci		kfree(attached_handler_name);
9588c2ecf20Sopenharmony_ci		if (r) {
9598c2ecf20Sopenharmony_ci			dm_put_device(ti, p->path.dev);
9608c2ecf20Sopenharmony_ci			goto bad;
9618c2ecf20Sopenharmony_ci		}
9628c2ecf20Sopenharmony_ci	}
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ci	r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
9658c2ecf20Sopenharmony_ci	if (r) {
9668c2ecf20Sopenharmony_ci		dm_put_device(ti, p->path.dev);
9678c2ecf20Sopenharmony_ci		goto bad;
9688c2ecf20Sopenharmony_ci	}
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_ci	return p;
9718c2ecf20Sopenharmony_ci bad:
9728c2ecf20Sopenharmony_ci	free_pgpath(p);
9738c2ecf20Sopenharmony_ci	return ERR_PTR(r);
9748c2ecf20Sopenharmony_ci}
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_cistatic struct priority_group *parse_priority_group(struct dm_arg_set *as,
9778c2ecf20Sopenharmony_ci						   struct multipath *m)
9788c2ecf20Sopenharmony_ci{
9798c2ecf20Sopenharmony_ci	static const struct dm_arg _args[] = {
9808c2ecf20Sopenharmony_ci		{1, 1024, "invalid number of paths"},
9818c2ecf20Sopenharmony_ci		{0, 1024, "invalid number of selector args"}
9828c2ecf20Sopenharmony_ci	};
9838c2ecf20Sopenharmony_ci
9848c2ecf20Sopenharmony_ci	int r;
9858c2ecf20Sopenharmony_ci	unsigned i, nr_selector_args, nr_args;
9868c2ecf20Sopenharmony_ci	struct priority_group *pg;
9878c2ecf20Sopenharmony_ci	struct dm_target *ti = m->ti;
9888c2ecf20Sopenharmony_ci
9898c2ecf20Sopenharmony_ci	if (as->argc < 2) {
9908c2ecf20Sopenharmony_ci		as->argc = 0;
9918c2ecf20Sopenharmony_ci		ti->error = "not enough priority group arguments";
9928c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
9938c2ecf20Sopenharmony_ci	}
9948c2ecf20Sopenharmony_ci
9958c2ecf20Sopenharmony_ci	pg = alloc_priority_group();
9968c2ecf20Sopenharmony_ci	if (!pg) {
9978c2ecf20Sopenharmony_ci		ti->error = "couldn't allocate priority group";
9988c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
9998c2ecf20Sopenharmony_ci	}
10008c2ecf20Sopenharmony_ci	pg->m = m;
10018c2ecf20Sopenharmony_ci
10028c2ecf20Sopenharmony_ci	r = parse_path_selector(as, pg, ti);
10038c2ecf20Sopenharmony_ci	if (r)
10048c2ecf20Sopenharmony_ci		goto bad;
10058c2ecf20Sopenharmony_ci
10068c2ecf20Sopenharmony_ci	/*
10078c2ecf20Sopenharmony_ci	 * read the paths
10088c2ecf20Sopenharmony_ci	 */
10098c2ecf20Sopenharmony_ci	r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
10108c2ecf20Sopenharmony_ci	if (r)
10118c2ecf20Sopenharmony_ci		goto bad;
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_ci	r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
10148c2ecf20Sopenharmony_ci	if (r)
10158c2ecf20Sopenharmony_ci		goto bad;
10168c2ecf20Sopenharmony_ci
10178c2ecf20Sopenharmony_ci	nr_args = 1 + nr_selector_args;
10188c2ecf20Sopenharmony_ci	for (i = 0; i < pg->nr_pgpaths; i++) {
10198c2ecf20Sopenharmony_ci		struct pgpath *pgpath;
10208c2ecf20Sopenharmony_ci		struct dm_arg_set path_args;
10218c2ecf20Sopenharmony_ci
10228c2ecf20Sopenharmony_ci		if (as->argc < nr_args) {
10238c2ecf20Sopenharmony_ci			ti->error = "not enough path parameters";
10248c2ecf20Sopenharmony_ci			r = -EINVAL;
10258c2ecf20Sopenharmony_ci			goto bad;
10268c2ecf20Sopenharmony_ci		}
10278c2ecf20Sopenharmony_ci
10288c2ecf20Sopenharmony_ci		path_args.argc = nr_args;
10298c2ecf20Sopenharmony_ci		path_args.argv = as->argv;
10308c2ecf20Sopenharmony_ci
10318c2ecf20Sopenharmony_ci		pgpath = parse_path(&path_args, &pg->ps, ti);
10328c2ecf20Sopenharmony_ci		if (IS_ERR(pgpath)) {
10338c2ecf20Sopenharmony_ci			r = PTR_ERR(pgpath);
10348c2ecf20Sopenharmony_ci			goto bad;
10358c2ecf20Sopenharmony_ci		}
10368c2ecf20Sopenharmony_ci
10378c2ecf20Sopenharmony_ci		pgpath->pg = pg;
10388c2ecf20Sopenharmony_ci		list_add_tail(&pgpath->list, &pg->pgpaths);
10398c2ecf20Sopenharmony_ci		dm_consume_args(as, nr_args);
10408c2ecf20Sopenharmony_ci	}
10418c2ecf20Sopenharmony_ci
10428c2ecf20Sopenharmony_ci	return pg;
10438c2ecf20Sopenharmony_ci
10448c2ecf20Sopenharmony_ci bad:
10458c2ecf20Sopenharmony_ci	free_priority_group(pg, ti);
10468c2ecf20Sopenharmony_ci	return ERR_PTR(r);
10478c2ecf20Sopenharmony_ci}
10488c2ecf20Sopenharmony_ci
10498c2ecf20Sopenharmony_cistatic int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
10508c2ecf20Sopenharmony_ci{
10518c2ecf20Sopenharmony_ci	unsigned hw_argc;
10528c2ecf20Sopenharmony_ci	int ret;
10538c2ecf20Sopenharmony_ci	struct dm_target *ti = m->ti;
10548c2ecf20Sopenharmony_ci
10558c2ecf20Sopenharmony_ci	static const struct dm_arg _args[] = {
10568c2ecf20Sopenharmony_ci		{0, 1024, "invalid number of hardware handler args"},
10578c2ecf20Sopenharmony_ci	};
10588c2ecf20Sopenharmony_ci
10598c2ecf20Sopenharmony_ci	if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
10608c2ecf20Sopenharmony_ci		return -EINVAL;
10618c2ecf20Sopenharmony_ci
10628c2ecf20Sopenharmony_ci	if (!hw_argc)
10638c2ecf20Sopenharmony_ci		return 0;
10648c2ecf20Sopenharmony_ci
10658c2ecf20Sopenharmony_ci	if (m->queue_mode == DM_TYPE_BIO_BASED) {
10668c2ecf20Sopenharmony_ci		dm_consume_args(as, hw_argc);
10678c2ecf20Sopenharmony_ci		DMERR("bio-based multipath doesn't allow hardware handler args");
10688c2ecf20Sopenharmony_ci		return 0;
10698c2ecf20Sopenharmony_ci	}
10708c2ecf20Sopenharmony_ci
10718c2ecf20Sopenharmony_ci	m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
10728c2ecf20Sopenharmony_ci	if (!m->hw_handler_name)
10738c2ecf20Sopenharmony_ci		return -EINVAL;
10748c2ecf20Sopenharmony_ci
10758c2ecf20Sopenharmony_ci	if (hw_argc > 1) {
10768c2ecf20Sopenharmony_ci		char *p;
10778c2ecf20Sopenharmony_ci		int i, j, len = 4;
10788c2ecf20Sopenharmony_ci
10798c2ecf20Sopenharmony_ci		for (i = 0; i <= hw_argc - 2; i++)
10808c2ecf20Sopenharmony_ci			len += strlen(as->argv[i]) + 1;
10818c2ecf20Sopenharmony_ci		p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
10828c2ecf20Sopenharmony_ci		if (!p) {
10838c2ecf20Sopenharmony_ci			ti->error = "memory allocation failed";
10848c2ecf20Sopenharmony_ci			ret = -ENOMEM;
10858c2ecf20Sopenharmony_ci			goto fail;
10868c2ecf20Sopenharmony_ci		}
10878c2ecf20Sopenharmony_ci		j = sprintf(p, "%d", hw_argc - 1);
10888c2ecf20Sopenharmony_ci		for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
10898c2ecf20Sopenharmony_ci			j = sprintf(p, "%s", as->argv[i]);
10908c2ecf20Sopenharmony_ci	}
10918c2ecf20Sopenharmony_ci	dm_consume_args(as, hw_argc - 1);
10928c2ecf20Sopenharmony_ci
10938c2ecf20Sopenharmony_ci	return 0;
10948c2ecf20Sopenharmony_cifail:
10958c2ecf20Sopenharmony_ci	kfree(m->hw_handler_name);
10968c2ecf20Sopenharmony_ci	m->hw_handler_name = NULL;
10978c2ecf20Sopenharmony_ci	return ret;
10988c2ecf20Sopenharmony_ci}
10998c2ecf20Sopenharmony_ci
11008c2ecf20Sopenharmony_cistatic int parse_features(struct dm_arg_set *as, struct multipath *m)
11018c2ecf20Sopenharmony_ci{
11028c2ecf20Sopenharmony_ci	int r;
11038c2ecf20Sopenharmony_ci	unsigned argc;
11048c2ecf20Sopenharmony_ci	struct dm_target *ti = m->ti;
11058c2ecf20Sopenharmony_ci	const char *arg_name;
11068c2ecf20Sopenharmony_ci
11078c2ecf20Sopenharmony_ci	static const struct dm_arg _args[] = {
11088c2ecf20Sopenharmony_ci		{0, 8, "invalid number of feature args"},
11098c2ecf20Sopenharmony_ci		{1, 50, "pg_init_retries must be between 1 and 50"},
11108c2ecf20Sopenharmony_ci		{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
11118c2ecf20Sopenharmony_ci	};
11128c2ecf20Sopenharmony_ci
11138c2ecf20Sopenharmony_ci	r = dm_read_arg_group(_args, as, &argc, &ti->error);
11148c2ecf20Sopenharmony_ci	if (r)
11158c2ecf20Sopenharmony_ci		return -EINVAL;
11168c2ecf20Sopenharmony_ci
11178c2ecf20Sopenharmony_ci	if (!argc)
11188c2ecf20Sopenharmony_ci		return 0;
11198c2ecf20Sopenharmony_ci
11208c2ecf20Sopenharmony_ci	do {
11218c2ecf20Sopenharmony_ci		arg_name = dm_shift_arg(as);
11228c2ecf20Sopenharmony_ci		argc--;
11238c2ecf20Sopenharmony_ci
11248c2ecf20Sopenharmony_ci		if (!strcasecmp(arg_name, "queue_if_no_path")) {
11258c2ecf20Sopenharmony_ci			r = queue_if_no_path(m, true, false, __func__);
11268c2ecf20Sopenharmony_ci			continue;
11278c2ecf20Sopenharmony_ci		}
11288c2ecf20Sopenharmony_ci
11298c2ecf20Sopenharmony_ci		if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
11308c2ecf20Sopenharmony_ci			set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
11318c2ecf20Sopenharmony_ci			continue;
11328c2ecf20Sopenharmony_ci		}
11338c2ecf20Sopenharmony_ci
11348c2ecf20Sopenharmony_ci		if (!strcasecmp(arg_name, "pg_init_retries") &&
11358c2ecf20Sopenharmony_ci		    (argc >= 1)) {
11368c2ecf20Sopenharmony_ci			r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
11378c2ecf20Sopenharmony_ci			argc--;
11388c2ecf20Sopenharmony_ci			continue;
11398c2ecf20Sopenharmony_ci		}
11408c2ecf20Sopenharmony_ci
11418c2ecf20Sopenharmony_ci		if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
11428c2ecf20Sopenharmony_ci		    (argc >= 1)) {
11438c2ecf20Sopenharmony_ci			r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
11448c2ecf20Sopenharmony_ci			argc--;
11458c2ecf20Sopenharmony_ci			continue;
11468c2ecf20Sopenharmony_ci		}
11478c2ecf20Sopenharmony_ci
11488c2ecf20Sopenharmony_ci		if (!strcasecmp(arg_name, "queue_mode") &&
11498c2ecf20Sopenharmony_ci		    (argc >= 1)) {
11508c2ecf20Sopenharmony_ci			const char *queue_mode_name = dm_shift_arg(as);
11518c2ecf20Sopenharmony_ci
11528c2ecf20Sopenharmony_ci			if (!strcasecmp(queue_mode_name, "bio"))
11538c2ecf20Sopenharmony_ci				m->queue_mode = DM_TYPE_BIO_BASED;
11548c2ecf20Sopenharmony_ci			else if (!strcasecmp(queue_mode_name, "rq") ||
11558c2ecf20Sopenharmony_ci				 !strcasecmp(queue_mode_name, "mq"))
11568c2ecf20Sopenharmony_ci				m->queue_mode = DM_TYPE_REQUEST_BASED;
11578c2ecf20Sopenharmony_ci			else {
11588c2ecf20Sopenharmony_ci				ti->error = "Unknown 'queue_mode' requested";
11598c2ecf20Sopenharmony_ci				r = -EINVAL;
11608c2ecf20Sopenharmony_ci			}
11618c2ecf20Sopenharmony_ci			argc--;
11628c2ecf20Sopenharmony_ci			continue;
11638c2ecf20Sopenharmony_ci		}
11648c2ecf20Sopenharmony_ci
11658c2ecf20Sopenharmony_ci		ti->error = "Unrecognised multipath feature request";
11668c2ecf20Sopenharmony_ci		r = -EINVAL;
11678c2ecf20Sopenharmony_ci	} while (argc && !r);
11688c2ecf20Sopenharmony_ci
11698c2ecf20Sopenharmony_ci	return r;
11708c2ecf20Sopenharmony_ci}
11718c2ecf20Sopenharmony_ci
11728c2ecf20Sopenharmony_cistatic int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
11738c2ecf20Sopenharmony_ci{
11748c2ecf20Sopenharmony_ci	/* target arguments */
11758c2ecf20Sopenharmony_ci	static const struct dm_arg _args[] = {
11768c2ecf20Sopenharmony_ci		{0, 1024, "invalid number of priority groups"},
11778c2ecf20Sopenharmony_ci		{0, 1024, "invalid initial priority group number"},
11788c2ecf20Sopenharmony_ci	};
11798c2ecf20Sopenharmony_ci
11808c2ecf20Sopenharmony_ci	int r;
11818c2ecf20Sopenharmony_ci	struct multipath *m;
11828c2ecf20Sopenharmony_ci	struct dm_arg_set as;
11838c2ecf20Sopenharmony_ci	unsigned pg_count = 0;
11848c2ecf20Sopenharmony_ci	unsigned next_pg_num;
11858c2ecf20Sopenharmony_ci	unsigned long flags;
11868c2ecf20Sopenharmony_ci
11878c2ecf20Sopenharmony_ci	as.argc = argc;
11888c2ecf20Sopenharmony_ci	as.argv = argv;
11898c2ecf20Sopenharmony_ci
11908c2ecf20Sopenharmony_ci	m = alloc_multipath(ti);
11918c2ecf20Sopenharmony_ci	if (!m) {
11928c2ecf20Sopenharmony_ci		ti->error = "can't allocate multipath";
11938c2ecf20Sopenharmony_ci		return -EINVAL;
11948c2ecf20Sopenharmony_ci	}
11958c2ecf20Sopenharmony_ci
11968c2ecf20Sopenharmony_ci	r = parse_features(&as, m);
11978c2ecf20Sopenharmony_ci	if (r)
11988c2ecf20Sopenharmony_ci		goto bad;
11998c2ecf20Sopenharmony_ci
12008c2ecf20Sopenharmony_ci	r = alloc_multipath_stage2(ti, m);
12018c2ecf20Sopenharmony_ci	if (r)
12028c2ecf20Sopenharmony_ci		goto bad;
12038c2ecf20Sopenharmony_ci
12048c2ecf20Sopenharmony_ci	r = parse_hw_handler(&as, m);
12058c2ecf20Sopenharmony_ci	if (r)
12068c2ecf20Sopenharmony_ci		goto bad;
12078c2ecf20Sopenharmony_ci
12088c2ecf20Sopenharmony_ci	r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
12098c2ecf20Sopenharmony_ci	if (r)
12108c2ecf20Sopenharmony_ci		goto bad;
12118c2ecf20Sopenharmony_ci
12128c2ecf20Sopenharmony_ci	r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
12138c2ecf20Sopenharmony_ci	if (r)
12148c2ecf20Sopenharmony_ci		goto bad;
12158c2ecf20Sopenharmony_ci
12168c2ecf20Sopenharmony_ci	if ((!m->nr_priority_groups && next_pg_num) ||
12178c2ecf20Sopenharmony_ci	    (m->nr_priority_groups && !next_pg_num)) {
12188c2ecf20Sopenharmony_ci		ti->error = "invalid initial priority group";
12198c2ecf20Sopenharmony_ci		r = -EINVAL;
12208c2ecf20Sopenharmony_ci		goto bad;
12218c2ecf20Sopenharmony_ci	}
12228c2ecf20Sopenharmony_ci
12238c2ecf20Sopenharmony_ci	/* parse the priority groups */
12248c2ecf20Sopenharmony_ci	while (as.argc) {
12258c2ecf20Sopenharmony_ci		struct priority_group *pg;
12268c2ecf20Sopenharmony_ci		unsigned nr_valid_paths = atomic_read(&m->nr_valid_paths);
12278c2ecf20Sopenharmony_ci
12288c2ecf20Sopenharmony_ci		pg = parse_priority_group(&as, m);
12298c2ecf20Sopenharmony_ci		if (IS_ERR(pg)) {
12308c2ecf20Sopenharmony_ci			r = PTR_ERR(pg);
12318c2ecf20Sopenharmony_ci			goto bad;
12328c2ecf20Sopenharmony_ci		}
12338c2ecf20Sopenharmony_ci
12348c2ecf20Sopenharmony_ci		nr_valid_paths += pg->nr_pgpaths;
12358c2ecf20Sopenharmony_ci		atomic_set(&m->nr_valid_paths, nr_valid_paths);
12368c2ecf20Sopenharmony_ci
12378c2ecf20Sopenharmony_ci		list_add_tail(&pg->list, &m->priority_groups);
12388c2ecf20Sopenharmony_ci		pg_count++;
12398c2ecf20Sopenharmony_ci		pg->pg_num = pg_count;
12408c2ecf20Sopenharmony_ci		if (!--next_pg_num)
12418c2ecf20Sopenharmony_ci			m->next_pg = pg;
12428c2ecf20Sopenharmony_ci	}
12438c2ecf20Sopenharmony_ci
12448c2ecf20Sopenharmony_ci	if (pg_count != m->nr_priority_groups) {
12458c2ecf20Sopenharmony_ci		ti->error = "priority group count mismatch";
12468c2ecf20Sopenharmony_ci		r = -EINVAL;
12478c2ecf20Sopenharmony_ci		goto bad;
12488c2ecf20Sopenharmony_ci	}
12498c2ecf20Sopenharmony_ci
12508c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
12518c2ecf20Sopenharmony_ci	enable_nopath_timeout(m);
12528c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
12538c2ecf20Sopenharmony_ci
12548c2ecf20Sopenharmony_ci	ti->num_flush_bios = 1;
12558c2ecf20Sopenharmony_ci	ti->num_discard_bios = 1;
12568c2ecf20Sopenharmony_ci	ti->num_write_same_bios = 1;
12578c2ecf20Sopenharmony_ci	ti->num_write_zeroes_bios = 1;
12588c2ecf20Sopenharmony_ci	if (m->queue_mode == DM_TYPE_BIO_BASED)
12598c2ecf20Sopenharmony_ci		ti->per_io_data_size = multipath_per_bio_data_size();
12608c2ecf20Sopenharmony_ci	else
12618c2ecf20Sopenharmony_ci		ti->per_io_data_size = sizeof(struct dm_mpath_io);
12628c2ecf20Sopenharmony_ci
12638c2ecf20Sopenharmony_ci	return 0;
12648c2ecf20Sopenharmony_ci
12658c2ecf20Sopenharmony_ci bad:
12668c2ecf20Sopenharmony_ci	free_multipath(m);
12678c2ecf20Sopenharmony_ci	return r;
12688c2ecf20Sopenharmony_ci}
12698c2ecf20Sopenharmony_ci
12708c2ecf20Sopenharmony_cistatic void multipath_wait_for_pg_init_completion(struct multipath *m)
12718c2ecf20Sopenharmony_ci{
12728c2ecf20Sopenharmony_ci	DEFINE_WAIT(wait);
12738c2ecf20Sopenharmony_ci
12748c2ecf20Sopenharmony_ci	while (1) {
12758c2ecf20Sopenharmony_ci		prepare_to_wait(&m->pg_init_wait, &wait, TASK_UNINTERRUPTIBLE);
12768c2ecf20Sopenharmony_ci
12778c2ecf20Sopenharmony_ci		if (!atomic_read(&m->pg_init_in_progress))
12788c2ecf20Sopenharmony_ci			break;
12798c2ecf20Sopenharmony_ci
12808c2ecf20Sopenharmony_ci		io_schedule();
12818c2ecf20Sopenharmony_ci	}
12828c2ecf20Sopenharmony_ci	finish_wait(&m->pg_init_wait, &wait);
12838c2ecf20Sopenharmony_ci}
12848c2ecf20Sopenharmony_ci
12858c2ecf20Sopenharmony_cistatic void flush_multipath_work(struct multipath *m)
12868c2ecf20Sopenharmony_ci{
12878c2ecf20Sopenharmony_ci	if (m->hw_handler_name) {
12888c2ecf20Sopenharmony_ci		unsigned long flags;
12898c2ecf20Sopenharmony_ci
12908c2ecf20Sopenharmony_ci		if (!atomic_read(&m->pg_init_in_progress))
12918c2ecf20Sopenharmony_ci			goto skip;
12928c2ecf20Sopenharmony_ci
12938c2ecf20Sopenharmony_ci		spin_lock_irqsave(&m->lock, flags);
12948c2ecf20Sopenharmony_ci		if (atomic_read(&m->pg_init_in_progress) &&
12958c2ecf20Sopenharmony_ci		    !test_and_set_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) {
12968c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&m->lock, flags);
12978c2ecf20Sopenharmony_ci
12988c2ecf20Sopenharmony_ci			flush_workqueue(kmpath_handlerd);
12998c2ecf20Sopenharmony_ci			multipath_wait_for_pg_init_completion(m);
13008c2ecf20Sopenharmony_ci
13018c2ecf20Sopenharmony_ci			spin_lock_irqsave(&m->lock, flags);
13028c2ecf20Sopenharmony_ci			clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
13038c2ecf20Sopenharmony_ci		}
13048c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&m->lock, flags);
13058c2ecf20Sopenharmony_ci	}
13068c2ecf20Sopenharmony_ciskip:
13078c2ecf20Sopenharmony_ci	if (m->queue_mode == DM_TYPE_BIO_BASED)
13088c2ecf20Sopenharmony_ci		flush_work(&m->process_queued_bios);
13098c2ecf20Sopenharmony_ci	flush_work(&m->trigger_event);
13108c2ecf20Sopenharmony_ci}
13118c2ecf20Sopenharmony_ci
13128c2ecf20Sopenharmony_cistatic void multipath_dtr(struct dm_target *ti)
13138c2ecf20Sopenharmony_ci{
13148c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
13158c2ecf20Sopenharmony_ci
13168c2ecf20Sopenharmony_ci	disable_nopath_timeout(m);
13178c2ecf20Sopenharmony_ci	flush_multipath_work(m);
13188c2ecf20Sopenharmony_ci	free_multipath(m);
13198c2ecf20Sopenharmony_ci}
13208c2ecf20Sopenharmony_ci
13218c2ecf20Sopenharmony_ci/*
13228c2ecf20Sopenharmony_ci * Take a path out of use.
13238c2ecf20Sopenharmony_ci */
13248c2ecf20Sopenharmony_cistatic int fail_path(struct pgpath *pgpath)
13258c2ecf20Sopenharmony_ci{
13268c2ecf20Sopenharmony_ci	unsigned long flags;
13278c2ecf20Sopenharmony_ci	struct multipath *m = pgpath->pg->m;
13288c2ecf20Sopenharmony_ci
13298c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
13308c2ecf20Sopenharmony_ci
13318c2ecf20Sopenharmony_ci	if (!pgpath->is_active)
13328c2ecf20Sopenharmony_ci		goto out;
13338c2ecf20Sopenharmony_ci
13348c2ecf20Sopenharmony_ci	DMWARN("%s: Failing path %s.",
13358c2ecf20Sopenharmony_ci	       dm_table_device_name(m->ti->table),
13368c2ecf20Sopenharmony_ci	       pgpath->path.dev->name);
13378c2ecf20Sopenharmony_ci
13388c2ecf20Sopenharmony_ci	pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
13398c2ecf20Sopenharmony_ci	pgpath->is_active = false;
13408c2ecf20Sopenharmony_ci	pgpath->fail_count++;
13418c2ecf20Sopenharmony_ci
13428c2ecf20Sopenharmony_ci	atomic_dec(&m->nr_valid_paths);
13438c2ecf20Sopenharmony_ci
13448c2ecf20Sopenharmony_ci	if (pgpath == m->current_pgpath)
13458c2ecf20Sopenharmony_ci		m->current_pgpath = NULL;
13468c2ecf20Sopenharmony_ci
13478c2ecf20Sopenharmony_ci	dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
13488c2ecf20Sopenharmony_ci		       pgpath->path.dev->name, atomic_read(&m->nr_valid_paths));
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_ci	schedule_work(&m->trigger_event);
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci	enable_nopath_timeout(m);
13538c2ecf20Sopenharmony_ci
13548c2ecf20Sopenharmony_ciout:
13558c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
13568c2ecf20Sopenharmony_ci
13578c2ecf20Sopenharmony_ci	return 0;
13588c2ecf20Sopenharmony_ci}
13598c2ecf20Sopenharmony_ci
13608c2ecf20Sopenharmony_ci/*
13618c2ecf20Sopenharmony_ci * Reinstate a previously-failed path
13628c2ecf20Sopenharmony_ci */
13638c2ecf20Sopenharmony_cistatic int reinstate_path(struct pgpath *pgpath)
13648c2ecf20Sopenharmony_ci{
13658c2ecf20Sopenharmony_ci	int r = 0, run_queue = 0;
13668c2ecf20Sopenharmony_ci	unsigned long flags;
13678c2ecf20Sopenharmony_ci	struct multipath *m = pgpath->pg->m;
13688c2ecf20Sopenharmony_ci	unsigned nr_valid_paths;
13698c2ecf20Sopenharmony_ci
13708c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
13718c2ecf20Sopenharmony_ci
13728c2ecf20Sopenharmony_ci	if (pgpath->is_active)
13738c2ecf20Sopenharmony_ci		goto out;
13748c2ecf20Sopenharmony_ci
13758c2ecf20Sopenharmony_ci	DMWARN("%s: Reinstating path %s.",
13768c2ecf20Sopenharmony_ci	       dm_table_device_name(m->ti->table),
13778c2ecf20Sopenharmony_ci	       pgpath->path.dev->name);
13788c2ecf20Sopenharmony_ci
13798c2ecf20Sopenharmony_ci	r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
13808c2ecf20Sopenharmony_ci	if (r)
13818c2ecf20Sopenharmony_ci		goto out;
13828c2ecf20Sopenharmony_ci
13838c2ecf20Sopenharmony_ci	pgpath->is_active = true;
13848c2ecf20Sopenharmony_ci
13858c2ecf20Sopenharmony_ci	nr_valid_paths = atomic_inc_return(&m->nr_valid_paths);
13868c2ecf20Sopenharmony_ci	if (nr_valid_paths == 1) {
13878c2ecf20Sopenharmony_ci		m->current_pgpath = NULL;
13888c2ecf20Sopenharmony_ci		run_queue = 1;
13898c2ecf20Sopenharmony_ci	} else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
13908c2ecf20Sopenharmony_ci		if (queue_work(kmpath_handlerd, &pgpath->activate_path.work))
13918c2ecf20Sopenharmony_ci			atomic_inc(&m->pg_init_in_progress);
13928c2ecf20Sopenharmony_ci	}
13938c2ecf20Sopenharmony_ci
13948c2ecf20Sopenharmony_ci	dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
13958c2ecf20Sopenharmony_ci		       pgpath->path.dev->name, nr_valid_paths);
13968c2ecf20Sopenharmony_ci
13978c2ecf20Sopenharmony_ci	schedule_work(&m->trigger_event);
13988c2ecf20Sopenharmony_ci
13998c2ecf20Sopenharmony_ciout:
14008c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
14018c2ecf20Sopenharmony_ci	if (run_queue) {
14028c2ecf20Sopenharmony_ci		dm_table_run_md_queue_async(m->ti->table);
14038c2ecf20Sopenharmony_ci		process_queued_io_list(m);
14048c2ecf20Sopenharmony_ci	}
14058c2ecf20Sopenharmony_ci
14068c2ecf20Sopenharmony_ci	if (pgpath->is_active)
14078c2ecf20Sopenharmony_ci		disable_nopath_timeout(m);
14088c2ecf20Sopenharmony_ci
14098c2ecf20Sopenharmony_ci	return r;
14108c2ecf20Sopenharmony_ci}
14118c2ecf20Sopenharmony_ci
14128c2ecf20Sopenharmony_ci/*
14138c2ecf20Sopenharmony_ci * Fail or reinstate all paths that match the provided struct dm_dev.
14148c2ecf20Sopenharmony_ci */
14158c2ecf20Sopenharmony_cistatic int action_dev(struct multipath *m, struct dm_dev *dev,
14168c2ecf20Sopenharmony_ci		      action_fn action)
14178c2ecf20Sopenharmony_ci{
14188c2ecf20Sopenharmony_ci	int r = -EINVAL;
14198c2ecf20Sopenharmony_ci	struct pgpath *pgpath;
14208c2ecf20Sopenharmony_ci	struct priority_group *pg;
14218c2ecf20Sopenharmony_ci
14228c2ecf20Sopenharmony_ci	list_for_each_entry(pg, &m->priority_groups, list) {
14238c2ecf20Sopenharmony_ci		list_for_each_entry(pgpath, &pg->pgpaths, list) {
14248c2ecf20Sopenharmony_ci			if (pgpath->path.dev == dev)
14258c2ecf20Sopenharmony_ci				r = action(pgpath);
14268c2ecf20Sopenharmony_ci		}
14278c2ecf20Sopenharmony_ci	}
14288c2ecf20Sopenharmony_ci
14298c2ecf20Sopenharmony_ci	return r;
14308c2ecf20Sopenharmony_ci}
14318c2ecf20Sopenharmony_ci
14328c2ecf20Sopenharmony_ci/*
14338c2ecf20Sopenharmony_ci * Temporarily try to avoid having to use the specified PG
14348c2ecf20Sopenharmony_ci */
14358c2ecf20Sopenharmony_cistatic void bypass_pg(struct multipath *m, struct priority_group *pg,
14368c2ecf20Sopenharmony_ci		      bool bypassed)
14378c2ecf20Sopenharmony_ci{
14388c2ecf20Sopenharmony_ci	unsigned long flags;
14398c2ecf20Sopenharmony_ci
14408c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
14418c2ecf20Sopenharmony_ci
14428c2ecf20Sopenharmony_ci	pg->bypassed = bypassed;
14438c2ecf20Sopenharmony_ci	m->current_pgpath = NULL;
14448c2ecf20Sopenharmony_ci	m->current_pg = NULL;
14458c2ecf20Sopenharmony_ci
14468c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
14478c2ecf20Sopenharmony_ci
14488c2ecf20Sopenharmony_ci	schedule_work(&m->trigger_event);
14498c2ecf20Sopenharmony_ci}
14508c2ecf20Sopenharmony_ci
14518c2ecf20Sopenharmony_ci/*
14528c2ecf20Sopenharmony_ci * Switch to using the specified PG from the next I/O that gets mapped
14538c2ecf20Sopenharmony_ci */
14548c2ecf20Sopenharmony_cistatic int switch_pg_num(struct multipath *m, const char *pgstr)
14558c2ecf20Sopenharmony_ci{
14568c2ecf20Sopenharmony_ci	struct priority_group *pg;
14578c2ecf20Sopenharmony_ci	unsigned pgnum;
14588c2ecf20Sopenharmony_ci	unsigned long flags;
14598c2ecf20Sopenharmony_ci	char dummy;
14608c2ecf20Sopenharmony_ci
14618c2ecf20Sopenharmony_ci	if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
14628c2ecf20Sopenharmony_ci	    !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
14638c2ecf20Sopenharmony_ci		DMWARN("invalid PG number supplied to switch_pg_num");
14648c2ecf20Sopenharmony_ci		return -EINVAL;
14658c2ecf20Sopenharmony_ci	}
14668c2ecf20Sopenharmony_ci
14678c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
14688c2ecf20Sopenharmony_ci	list_for_each_entry(pg, &m->priority_groups, list) {
14698c2ecf20Sopenharmony_ci		pg->bypassed = false;
14708c2ecf20Sopenharmony_ci		if (--pgnum)
14718c2ecf20Sopenharmony_ci			continue;
14728c2ecf20Sopenharmony_ci
14738c2ecf20Sopenharmony_ci		m->current_pgpath = NULL;
14748c2ecf20Sopenharmony_ci		m->current_pg = NULL;
14758c2ecf20Sopenharmony_ci		m->next_pg = pg;
14768c2ecf20Sopenharmony_ci	}
14778c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
14788c2ecf20Sopenharmony_ci
14798c2ecf20Sopenharmony_ci	schedule_work(&m->trigger_event);
14808c2ecf20Sopenharmony_ci	return 0;
14818c2ecf20Sopenharmony_ci}
14828c2ecf20Sopenharmony_ci
14838c2ecf20Sopenharmony_ci/*
14848c2ecf20Sopenharmony_ci * Set/clear bypassed status of a PG.
14858c2ecf20Sopenharmony_ci * PGs are numbered upwards from 1 in the order they were declared.
14868c2ecf20Sopenharmony_ci */
14878c2ecf20Sopenharmony_cistatic int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
14888c2ecf20Sopenharmony_ci{
14898c2ecf20Sopenharmony_ci	struct priority_group *pg;
14908c2ecf20Sopenharmony_ci	unsigned pgnum;
14918c2ecf20Sopenharmony_ci	char dummy;
14928c2ecf20Sopenharmony_ci
14938c2ecf20Sopenharmony_ci	if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
14948c2ecf20Sopenharmony_ci	    !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
14958c2ecf20Sopenharmony_ci		DMWARN("invalid PG number supplied to bypass_pg");
14968c2ecf20Sopenharmony_ci		return -EINVAL;
14978c2ecf20Sopenharmony_ci	}
14988c2ecf20Sopenharmony_ci
14998c2ecf20Sopenharmony_ci	list_for_each_entry(pg, &m->priority_groups, list) {
15008c2ecf20Sopenharmony_ci		if (!--pgnum)
15018c2ecf20Sopenharmony_ci			break;
15028c2ecf20Sopenharmony_ci	}
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_ci	bypass_pg(m, pg, bypassed);
15058c2ecf20Sopenharmony_ci	return 0;
15068c2ecf20Sopenharmony_ci}
15078c2ecf20Sopenharmony_ci
15088c2ecf20Sopenharmony_ci/*
15098c2ecf20Sopenharmony_ci * Should we retry pg_init immediately?
15108c2ecf20Sopenharmony_ci */
15118c2ecf20Sopenharmony_cistatic bool pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
15128c2ecf20Sopenharmony_ci{
15138c2ecf20Sopenharmony_ci	unsigned long flags;
15148c2ecf20Sopenharmony_ci	bool limit_reached = false;
15158c2ecf20Sopenharmony_ci
15168c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
15178c2ecf20Sopenharmony_ci
15188c2ecf20Sopenharmony_ci	if (atomic_read(&m->pg_init_count) <= m->pg_init_retries &&
15198c2ecf20Sopenharmony_ci	    !test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
15208c2ecf20Sopenharmony_ci		set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
15218c2ecf20Sopenharmony_ci	else
15228c2ecf20Sopenharmony_ci		limit_reached = true;
15238c2ecf20Sopenharmony_ci
15248c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
15258c2ecf20Sopenharmony_ci
15268c2ecf20Sopenharmony_ci	return limit_reached;
15278c2ecf20Sopenharmony_ci}
15288c2ecf20Sopenharmony_ci
15298c2ecf20Sopenharmony_cistatic void pg_init_done(void *data, int errors)
15308c2ecf20Sopenharmony_ci{
15318c2ecf20Sopenharmony_ci	struct pgpath *pgpath = data;
15328c2ecf20Sopenharmony_ci	struct priority_group *pg = pgpath->pg;
15338c2ecf20Sopenharmony_ci	struct multipath *m = pg->m;
15348c2ecf20Sopenharmony_ci	unsigned long flags;
15358c2ecf20Sopenharmony_ci	bool delay_retry = false;
15368c2ecf20Sopenharmony_ci
15378c2ecf20Sopenharmony_ci	/* device or driver problems */
15388c2ecf20Sopenharmony_ci	switch (errors) {
15398c2ecf20Sopenharmony_ci	case SCSI_DH_OK:
15408c2ecf20Sopenharmony_ci		break;
15418c2ecf20Sopenharmony_ci	case SCSI_DH_NOSYS:
15428c2ecf20Sopenharmony_ci		if (!m->hw_handler_name) {
15438c2ecf20Sopenharmony_ci			errors = 0;
15448c2ecf20Sopenharmony_ci			break;
15458c2ecf20Sopenharmony_ci		}
15468c2ecf20Sopenharmony_ci		DMERR("Could not failover the device: Handler scsi_dh_%s "
15478c2ecf20Sopenharmony_ci		      "Error %d.", m->hw_handler_name, errors);
15488c2ecf20Sopenharmony_ci		/*
15498c2ecf20Sopenharmony_ci		 * Fail path for now, so we do not ping pong
15508c2ecf20Sopenharmony_ci		 */
15518c2ecf20Sopenharmony_ci		fail_path(pgpath);
15528c2ecf20Sopenharmony_ci		break;
15538c2ecf20Sopenharmony_ci	case SCSI_DH_DEV_TEMP_BUSY:
15548c2ecf20Sopenharmony_ci		/*
15558c2ecf20Sopenharmony_ci		 * Probably doing something like FW upgrade on the
15568c2ecf20Sopenharmony_ci		 * controller so try the other pg.
15578c2ecf20Sopenharmony_ci		 */
15588c2ecf20Sopenharmony_ci		bypass_pg(m, pg, true);
15598c2ecf20Sopenharmony_ci		break;
15608c2ecf20Sopenharmony_ci	case SCSI_DH_RETRY:
15618c2ecf20Sopenharmony_ci		/* Wait before retrying. */
15628c2ecf20Sopenharmony_ci		delay_retry = true;
15638c2ecf20Sopenharmony_ci		fallthrough;
15648c2ecf20Sopenharmony_ci	case SCSI_DH_IMM_RETRY:
15658c2ecf20Sopenharmony_ci	case SCSI_DH_RES_TEMP_UNAVAIL:
15668c2ecf20Sopenharmony_ci		if (pg_init_limit_reached(m, pgpath))
15678c2ecf20Sopenharmony_ci			fail_path(pgpath);
15688c2ecf20Sopenharmony_ci		errors = 0;
15698c2ecf20Sopenharmony_ci		break;
15708c2ecf20Sopenharmony_ci	case SCSI_DH_DEV_OFFLINED:
15718c2ecf20Sopenharmony_ci	default:
15728c2ecf20Sopenharmony_ci		/*
15738c2ecf20Sopenharmony_ci		 * We probably do not want to fail the path for a device
15748c2ecf20Sopenharmony_ci		 * error, but this is what the old dm did. In future
15758c2ecf20Sopenharmony_ci		 * patches we can do more advanced handling.
15768c2ecf20Sopenharmony_ci		 */
15778c2ecf20Sopenharmony_ci		fail_path(pgpath);
15788c2ecf20Sopenharmony_ci	}
15798c2ecf20Sopenharmony_ci
15808c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
15818c2ecf20Sopenharmony_ci	if (errors) {
15828c2ecf20Sopenharmony_ci		if (pgpath == m->current_pgpath) {
15838c2ecf20Sopenharmony_ci			DMERR("Could not failover device. Error %d.", errors);
15848c2ecf20Sopenharmony_ci			m->current_pgpath = NULL;
15858c2ecf20Sopenharmony_ci			m->current_pg = NULL;
15868c2ecf20Sopenharmony_ci		}
15878c2ecf20Sopenharmony_ci	} else if (!test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
15888c2ecf20Sopenharmony_ci		pg->bypassed = false;
15898c2ecf20Sopenharmony_ci
15908c2ecf20Sopenharmony_ci	if (atomic_dec_return(&m->pg_init_in_progress) > 0)
15918c2ecf20Sopenharmony_ci		/* Activations of other paths are still on going */
15928c2ecf20Sopenharmony_ci		goto out;
15938c2ecf20Sopenharmony_ci
15948c2ecf20Sopenharmony_ci	if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
15958c2ecf20Sopenharmony_ci		if (delay_retry)
15968c2ecf20Sopenharmony_ci			set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
15978c2ecf20Sopenharmony_ci		else
15988c2ecf20Sopenharmony_ci			clear_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
15998c2ecf20Sopenharmony_ci
16008c2ecf20Sopenharmony_ci		if (__pg_init_all_paths(m))
16018c2ecf20Sopenharmony_ci			goto out;
16028c2ecf20Sopenharmony_ci	}
16038c2ecf20Sopenharmony_ci	clear_bit(MPATHF_QUEUE_IO, &m->flags);
16048c2ecf20Sopenharmony_ci
16058c2ecf20Sopenharmony_ci	process_queued_io_list(m);
16068c2ecf20Sopenharmony_ci
16078c2ecf20Sopenharmony_ci	/*
16088c2ecf20Sopenharmony_ci	 * Wake up any thread waiting to suspend.
16098c2ecf20Sopenharmony_ci	 */
16108c2ecf20Sopenharmony_ci	wake_up(&m->pg_init_wait);
16118c2ecf20Sopenharmony_ci
16128c2ecf20Sopenharmony_ciout:
16138c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
16148c2ecf20Sopenharmony_ci}
16158c2ecf20Sopenharmony_ci
16168c2ecf20Sopenharmony_cistatic void activate_or_offline_path(struct pgpath *pgpath)
16178c2ecf20Sopenharmony_ci{
16188c2ecf20Sopenharmony_ci	struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
16198c2ecf20Sopenharmony_ci
16208c2ecf20Sopenharmony_ci	if (pgpath->is_active && !blk_queue_dying(q))
16218c2ecf20Sopenharmony_ci		scsi_dh_activate(q, pg_init_done, pgpath);
16228c2ecf20Sopenharmony_ci	else
16238c2ecf20Sopenharmony_ci		pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED);
16248c2ecf20Sopenharmony_ci}
16258c2ecf20Sopenharmony_ci
16268c2ecf20Sopenharmony_cistatic void activate_path_work(struct work_struct *work)
16278c2ecf20Sopenharmony_ci{
16288c2ecf20Sopenharmony_ci	struct pgpath *pgpath =
16298c2ecf20Sopenharmony_ci		container_of(work, struct pgpath, activate_path.work);
16308c2ecf20Sopenharmony_ci
16318c2ecf20Sopenharmony_ci	activate_or_offline_path(pgpath);
16328c2ecf20Sopenharmony_ci}
16338c2ecf20Sopenharmony_ci
16348c2ecf20Sopenharmony_cistatic int multipath_end_io(struct dm_target *ti, struct request *clone,
16358c2ecf20Sopenharmony_ci			    blk_status_t error, union map_info *map_context)
16368c2ecf20Sopenharmony_ci{
16378c2ecf20Sopenharmony_ci	struct dm_mpath_io *mpio = get_mpio(map_context);
16388c2ecf20Sopenharmony_ci	struct pgpath *pgpath = mpio->pgpath;
16398c2ecf20Sopenharmony_ci	int r = DM_ENDIO_DONE;
16408c2ecf20Sopenharmony_ci
16418c2ecf20Sopenharmony_ci	/*
16428c2ecf20Sopenharmony_ci	 * We don't queue any clone request inside the multipath target
16438c2ecf20Sopenharmony_ci	 * during end I/O handling, since those clone requests don't have
16448c2ecf20Sopenharmony_ci	 * bio clones.  If we queue them inside the multipath target,
16458c2ecf20Sopenharmony_ci	 * we need to make bio clones, that requires memory allocation.
16468c2ecf20Sopenharmony_ci	 * (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests
16478c2ecf20Sopenharmony_ci	 *  don't have bio clones.)
16488c2ecf20Sopenharmony_ci	 * Instead of queueing the clone request here, we queue the original
16498c2ecf20Sopenharmony_ci	 * request into dm core, which will remake a clone request and
16508c2ecf20Sopenharmony_ci	 * clone bios for it and resubmit it later.
16518c2ecf20Sopenharmony_ci	 */
16528c2ecf20Sopenharmony_ci	if (error && blk_path_error(error)) {
16538c2ecf20Sopenharmony_ci		struct multipath *m = ti->private;
16548c2ecf20Sopenharmony_ci
16558c2ecf20Sopenharmony_ci		if (error == BLK_STS_RESOURCE)
16568c2ecf20Sopenharmony_ci			r = DM_ENDIO_DELAY_REQUEUE;
16578c2ecf20Sopenharmony_ci		else
16588c2ecf20Sopenharmony_ci			r = DM_ENDIO_REQUEUE;
16598c2ecf20Sopenharmony_ci
16608c2ecf20Sopenharmony_ci		if (pgpath)
16618c2ecf20Sopenharmony_ci			fail_path(pgpath);
16628c2ecf20Sopenharmony_ci
16638c2ecf20Sopenharmony_ci		if (!atomic_read(&m->nr_valid_paths) &&
16648c2ecf20Sopenharmony_ci		    !must_push_back_rq(m)) {
16658c2ecf20Sopenharmony_ci			if (error == BLK_STS_IOERR)
16668c2ecf20Sopenharmony_ci				dm_report_EIO(m);
16678c2ecf20Sopenharmony_ci			/* complete with the original error */
16688c2ecf20Sopenharmony_ci			r = DM_ENDIO_DONE;
16698c2ecf20Sopenharmony_ci		}
16708c2ecf20Sopenharmony_ci	}
16718c2ecf20Sopenharmony_ci
16728c2ecf20Sopenharmony_ci	if (pgpath) {
16738c2ecf20Sopenharmony_ci		struct path_selector *ps = &pgpath->pg->ps;
16748c2ecf20Sopenharmony_ci
16758c2ecf20Sopenharmony_ci		if (ps->type->end_io)
16768c2ecf20Sopenharmony_ci			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes,
16778c2ecf20Sopenharmony_ci					 clone->io_start_time_ns);
16788c2ecf20Sopenharmony_ci	}
16798c2ecf20Sopenharmony_ci
16808c2ecf20Sopenharmony_ci	return r;
16818c2ecf20Sopenharmony_ci}
16828c2ecf20Sopenharmony_ci
16838c2ecf20Sopenharmony_cistatic int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
16848c2ecf20Sopenharmony_ci				blk_status_t *error)
16858c2ecf20Sopenharmony_ci{
16868c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
16878c2ecf20Sopenharmony_ci	struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
16888c2ecf20Sopenharmony_ci	struct pgpath *pgpath = mpio->pgpath;
16898c2ecf20Sopenharmony_ci	unsigned long flags;
16908c2ecf20Sopenharmony_ci	int r = DM_ENDIO_DONE;
16918c2ecf20Sopenharmony_ci
16928c2ecf20Sopenharmony_ci	if (!*error || !blk_path_error(*error))
16938c2ecf20Sopenharmony_ci		goto done;
16948c2ecf20Sopenharmony_ci
16958c2ecf20Sopenharmony_ci	if (pgpath)
16968c2ecf20Sopenharmony_ci		fail_path(pgpath);
16978c2ecf20Sopenharmony_ci
16988c2ecf20Sopenharmony_ci	if (!atomic_read(&m->nr_valid_paths)) {
16998c2ecf20Sopenharmony_ci		spin_lock_irqsave(&m->lock, flags);
17008c2ecf20Sopenharmony_ci		if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
17018c2ecf20Sopenharmony_ci			if (__must_push_back(m)) {
17028c2ecf20Sopenharmony_ci				r = DM_ENDIO_REQUEUE;
17038c2ecf20Sopenharmony_ci			} else {
17048c2ecf20Sopenharmony_ci				dm_report_EIO(m);
17058c2ecf20Sopenharmony_ci				*error = BLK_STS_IOERR;
17068c2ecf20Sopenharmony_ci			}
17078c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&m->lock, flags);
17088c2ecf20Sopenharmony_ci			goto done;
17098c2ecf20Sopenharmony_ci		}
17108c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&m->lock, flags);
17118c2ecf20Sopenharmony_ci	}
17128c2ecf20Sopenharmony_ci
17138c2ecf20Sopenharmony_ci	multipath_queue_bio(m, clone);
17148c2ecf20Sopenharmony_ci	r = DM_ENDIO_INCOMPLETE;
17158c2ecf20Sopenharmony_cidone:
17168c2ecf20Sopenharmony_ci	if (pgpath) {
17178c2ecf20Sopenharmony_ci		struct path_selector *ps = &pgpath->pg->ps;
17188c2ecf20Sopenharmony_ci
17198c2ecf20Sopenharmony_ci		if (ps->type->end_io)
17208c2ecf20Sopenharmony_ci			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes,
17218c2ecf20Sopenharmony_ci					 dm_start_time_ns_from_clone(clone));
17228c2ecf20Sopenharmony_ci	}
17238c2ecf20Sopenharmony_ci
17248c2ecf20Sopenharmony_ci	return r;
17258c2ecf20Sopenharmony_ci}
17268c2ecf20Sopenharmony_ci
17278c2ecf20Sopenharmony_ci/*
17288c2ecf20Sopenharmony_ci * Suspend with flush can't complete until all the I/O is processed
17298c2ecf20Sopenharmony_ci * so if the last path fails we must error any remaining I/O.
17308c2ecf20Sopenharmony_ci * - Note that if the freeze_bdev fails while suspending, the
17318c2ecf20Sopenharmony_ci *   queue_if_no_path state is lost - userspace should reset it.
17328c2ecf20Sopenharmony_ci * Otherwise, during noflush suspend, queue_if_no_path will not change.
17338c2ecf20Sopenharmony_ci */
17348c2ecf20Sopenharmony_cistatic void multipath_presuspend(struct dm_target *ti)
17358c2ecf20Sopenharmony_ci{
17368c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
17378c2ecf20Sopenharmony_ci
17388c2ecf20Sopenharmony_ci	/* FIXME: bio-based shouldn't need to always disable queue_if_no_path */
17398c2ecf20Sopenharmony_ci	if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti))
17408c2ecf20Sopenharmony_ci		queue_if_no_path(m, false, true, __func__);
17418c2ecf20Sopenharmony_ci}
17428c2ecf20Sopenharmony_ci
17438c2ecf20Sopenharmony_cistatic void multipath_postsuspend(struct dm_target *ti)
17448c2ecf20Sopenharmony_ci{
17458c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
17468c2ecf20Sopenharmony_ci
17478c2ecf20Sopenharmony_ci	mutex_lock(&m->work_mutex);
17488c2ecf20Sopenharmony_ci	flush_multipath_work(m);
17498c2ecf20Sopenharmony_ci	mutex_unlock(&m->work_mutex);
17508c2ecf20Sopenharmony_ci}
17518c2ecf20Sopenharmony_ci
17528c2ecf20Sopenharmony_ci/*
17538c2ecf20Sopenharmony_ci * Restore the queue_if_no_path setting.
17548c2ecf20Sopenharmony_ci */
17558c2ecf20Sopenharmony_cistatic void multipath_resume(struct dm_target *ti)
17568c2ecf20Sopenharmony_ci{
17578c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
17588c2ecf20Sopenharmony_ci	unsigned long flags;
17598c2ecf20Sopenharmony_ci
17608c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
17618c2ecf20Sopenharmony_ci	if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) {
17628c2ecf20Sopenharmony_ci		set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
17638c2ecf20Sopenharmony_ci		clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
17648c2ecf20Sopenharmony_ci	}
17658c2ecf20Sopenharmony_ci
17668c2ecf20Sopenharmony_ci	DMDEBUG("%s: %s finished; QIFNP = %d; SQIFNP = %d",
17678c2ecf20Sopenharmony_ci		dm_table_device_name(m->ti->table), __func__,
17688c2ecf20Sopenharmony_ci		test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags),
17698c2ecf20Sopenharmony_ci		test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags));
17708c2ecf20Sopenharmony_ci
17718c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
17728c2ecf20Sopenharmony_ci}
17738c2ecf20Sopenharmony_ci
17748c2ecf20Sopenharmony_ci/*
17758c2ecf20Sopenharmony_ci * Info output has the following format:
17768c2ecf20Sopenharmony_ci * num_multipath_feature_args [multipath_feature_args]*
17778c2ecf20Sopenharmony_ci * num_handler_status_args [handler_status_args]*
17788c2ecf20Sopenharmony_ci * num_groups init_group_number
17798c2ecf20Sopenharmony_ci *            [A|D|E num_ps_status_args [ps_status_args]*
17808c2ecf20Sopenharmony_ci *             num_paths num_selector_args
17818c2ecf20Sopenharmony_ci *             [path_dev A|F fail_count [selector_args]* ]+ ]+
17828c2ecf20Sopenharmony_ci *
17838c2ecf20Sopenharmony_ci * Table output has the following format (identical to the constructor string):
17848c2ecf20Sopenharmony_ci * num_feature_args [features_args]*
17858c2ecf20Sopenharmony_ci * num_handler_args hw_handler [hw_handler_args]*
17868c2ecf20Sopenharmony_ci * num_groups init_group_number
17878c2ecf20Sopenharmony_ci *     [priority selector-name num_ps_args [ps_args]*
17888c2ecf20Sopenharmony_ci *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
17898c2ecf20Sopenharmony_ci */
17908c2ecf20Sopenharmony_cistatic void multipath_status(struct dm_target *ti, status_type_t type,
17918c2ecf20Sopenharmony_ci			     unsigned status_flags, char *result, unsigned maxlen)
17928c2ecf20Sopenharmony_ci{
17938c2ecf20Sopenharmony_ci	int sz = 0;
17948c2ecf20Sopenharmony_ci	unsigned long flags;
17958c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
17968c2ecf20Sopenharmony_ci	struct priority_group *pg;
17978c2ecf20Sopenharmony_ci	struct pgpath *p;
17988c2ecf20Sopenharmony_ci	unsigned pg_num;
17998c2ecf20Sopenharmony_ci	char state;
18008c2ecf20Sopenharmony_ci
18018c2ecf20Sopenharmony_ci	spin_lock_irqsave(&m->lock, flags);
18028c2ecf20Sopenharmony_ci
18038c2ecf20Sopenharmony_ci	/* Features */
18048c2ecf20Sopenharmony_ci	if (type == STATUSTYPE_INFO)
18058c2ecf20Sopenharmony_ci		DMEMIT("2 %u %u ", test_bit(MPATHF_QUEUE_IO, &m->flags),
18068c2ecf20Sopenharmony_ci		       atomic_read(&m->pg_init_count));
18078c2ecf20Sopenharmony_ci	else {
18088c2ecf20Sopenharmony_ci		DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) +
18098c2ecf20Sopenharmony_ci			      (m->pg_init_retries > 0) * 2 +
18108c2ecf20Sopenharmony_ci			      (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
18118c2ecf20Sopenharmony_ci			      test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) +
18128c2ecf20Sopenharmony_ci			      (m->queue_mode != DM_TYPE_REQUEST_BASED) * 2);
18138c2ecf20Sopenharmony_ci
18148c2ecf20Sopenharmony_ci		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
18158c2ecf20Sopenharmony_ci			DMEMIT("queue_if_no_path ");
18168c2ecf20Sopenharmony_ci		if (m->pg_init_retries)
18178c2ecf20Sopenharmony_ci			DMEMIT("pg_init_retries %u ", m->pg_init_retries);
18188c2ecf20Sopenharmony_ci		if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
18198c2ecf20Sopenharmony_ci			DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
18208c2ecf20Sopenharmony_ci		if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
18218c2ecf20Sopenharmony_ci			DMEMIT("retain_attached_hw_handler ");
18228c2ecf20Sopenharmony_ci		if (m->queue_mode != DM_TYPE_REQUEST_BASED) {
18238c2ecf20Sopenharmony_ci			switch(m->queue_mode) {
18248c2ecf20Sopenharmony_ci			case DM_TYPE_BIO_BASED:
18258c2ecf20Sopenharmony_ci				DMEMIT("queue_mode bio ");
18268c2ecf20Sopenharmony_ci				break;
18278c2ecf20Sopenharmony_ci			default:
18288c2ecf20Sopenharmony_ci				WARN_ON_ONCE(true);
18298c2ecf20Sopenharmony_ci				break;
18308c2ecf20Sopenharmony_ci			}
18318c2ecf20Sopenharmony_ci		}
18328c2ecf20Sopenharmony_ci	}
18338c2ecf20Sopenharmony_ci
18348c2ecf20Sopenharmony_ci	if (!m->hw_handler_name || type == STATUSTYPE_INFO)
18358c2ecf20Sopenharmony_ci		DMEMIT("0 ");
18368c2ecf20Sopenharmony_ci	else
18378c2ecf20Sopenharmony_ci		DMEMIT("1 %s ", m->hw_handler_name);
18388c2ecf20Sopenharmony_ci
18398c2ecf20Sopenharmony_ci	DMEMIT("%u ", m->nr_priority_groups);
18408c2ecf20Sopenharmony_ci
18418c2ecf20Sopenharmony_ci	if (m->next_pg)
18428c2ecf20Sopenharmony_ci		pg_num = m->next_pg->pg_num;
18438c2ecf20Sopenharmony_ci	else if (m->current_pg)
18448c2ecf20Sopenharmony_ci		pg_num = m->current_pg->pg_num;
18458c2ecf20Sopenharmony_ci	else
18468c2ecf20Sopenharmony_ci		pg_num = (m->nr_priority_groups ? 1 : 0);
18478c2ecf20Sopenharmony_ci
18488c2ecf20Sopenharmony_ci	DMEMIT("%u ", pg_num);
18498c2ecf20Sopenharmony_ci
18508c2ecf20Sopenharmony_ci	switch (type) {
18518c2ecf20Sopenharmony_ci	case STATUSTYPE_INFO:
18528c2ecf20Sopenharmony_ci		list_for_each_entry(pg, &m->priority_groups, list) {
18538c2ecf20Sopenharmony_ci			if (pg->bypassed)
18548c2ecf20Sopenharmony_ci				state = 'D';	/* Disabled */
18558c2ecf20Sopenharmony_ci			else if (pg == m->current_pg)
18568c2ecf20Sopenharmony_ci				state = 'A';	/* Currently Active */
18578c2ecf20Sopenharmony_ci			else
18588c2ecf20Sopenharmony_ci				state = 'E';	/* Enabled */
18598c2ecf20Sopenharmony_ci
18608c2ecf20Sopenharmony_ci			DMEMIT("%c ", state);
18618c2ecf20Sopenharmony_ci
18628c2ecf20Sopenharmony_ci			if (pg->ps.type->status)
18638c2ecf20Sopenharmony_ci				sz += pg->ps.type->status(&pg->ps, NULL, type,
18648c2ecf20Sopenharmony_ci							  result + sz,
18658c2ecf20Sopenharmony_ci							  maxlen - sz);
18668c2ecf20Sopenharmony_ci			else
18678c2ecf20Sopenharmony_ci				DMEMIT("0 ");
18688c2ecf20Sopenharmony_ci
18698c2ecf20Sopenharmony_ci			DMEMIT("%u %u ", pg->nr_pgpaths,
18708c2ecf20Sopenharmony_ci			       pg->ps.type->info_args);
18718c2ecf20Sopenharmony_ci
18728c2ecf20Sopenharmony_ci			list_for_each_entry(p, &pg->pgpaths, list) {
18738c2ecf20Sopenharmony_ci				DMEMIT("%s %s %u ", p->path.dev->name,
18748c2ecf20Sopenharmony_ci				       p->is_active ? "A" : "F",
18758c2ecf20Sopenharmony_ci				       p->fail_count);
18768c2ecf20Sopenharmony_ci				if (pg->ps.type->status)
18778c2ecf20Sopenharmony_ci					sz += pg->ps.type->status(&pg->ps,
18788c2ecf20Sopenharmony_ci					      &p->path, type, result + sz,
18798c2ecf20Sopenharmony_ci					      maxlen - sz);
18808c2ecf20Sopenharmony_ci			}
18818c2ecf20Sopenharmony_ci		}
18828c2ecf20Sopenharmony_ci		break;
18838c2ecf20Sopenharmony_ci
18848c2ecf20Sopenharmony_ci	case STATUSTYPE_TABLE:
18858c2ecf20Sopenharmony_ci		list_for_each_entry(pg, &m->priority_groups, list) {
18868c2ecf20Sopenharmony_ci			DMEMIT("%s ", pg->ps.type->name);
18878c2ecf20Sopenharmony_ci
18888c2ecf20Sopenharmony_ci			if (pg->ps.type->status)
18898c2ecf20Sopenharmony_ci				sz += pg->ps.type->status(&pg->ps, NULL, type,
18908c2ecf20Sopenharmony_ci							  result + sz,
18918c2ecf20Sopenharmony_ci							  maxlen - sz);
18928c2ecf20Sopenharmony_ci			else
18938c2ecf20Sopenharmony_ci				DMEMIT("0 ");
18948c2ecf20Sopenharmony_ci
18958c2ecf20Sopenharmony_ci			DMEMIT("%u %u ", pg->nr_pgpaths,
18968c2ecf20Sopenharmony_ci			       pg->ps.type->table_args);
18978c2ecf20Sopenharmony_ci
18988c2ecf20Sopenharmony_ci			list_for_each_entry(p, &pg->pgpaths, list) {
18998c2ecf20Sopenharmony_ci				DMEMIT("%s ", p->path.dev->name);
19008c2ecf20Sopenharmony_ci				if (pg->ps.type->status)
19018c2ecf20Sopenharmony_ci					sz += pg->ps.type->status(&pg->ps,
19028c2ecf20Sopenharmony_ci					      &p->path, type, result + sz,
19038c2ecf20Sopenharmony_ci					      maxlen - sz);
19048c2ecf20Sopenharmony_ci			}
19058c2ecf20Sopenharmony_ci		}
19068c2ecf20Sopenharmony_ci		break;
19078c2ecf20Sopenharmony_ci	}
19088c2ecf20Sopenharmony_ci
19098c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&m->lock, flags);
19108c2ecf20Sopenharmony_ci}
19118c2ecf20Sopenharmony_ci
19128c2ecf20Sopenharmony_cistatic int multipath_message(struct dm_target *ti, unsigned argc, char **argv,
19138c2ecf20Sopenharmony_ci			     char *result, unsigned maxlen)
19148c2ecf20Sopenharmony_ci{
19158c2ecf20Sopenharmony_ci	int r = -EINVAL;
19168c2ecf20Sopenharmony_ci	struct dm_dev *dev;
19178c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
19188c2ecf20Sopenharmony_ci	action_fn action;
19198c2ecf20Sopenharmony_ci	unsigned long flags;
19208c2ecf20Sopenharmony_ci
19218c2ecf20Sopenharmony_ci	mutex_lock(&m->work_mutex);
19228c2ecf20Sopenharmony_ci
19238c2ecf20Sopenharmony_ci	if (dm_suspended(ti)) {
19248c2ecf20Sopenharmony_ci		r = -EBUSY;
19258c2ecf20Sopenharmony_ci		goto out;
19268c2ecf20Sopenharmony_ci	}
19278c2ecf20Sopenharmony_ci
19288c2ecf20Sopenharmony_ci	if (argc == 1) {
19298c2ecf20Sopenharmony_ci		if (!strcasecmp(argv[0], "queue_if_no_path")) {
19308c2ecf20Sopenharmony_ci			r = queue_if_no_path(m, true, false, __func__);
19318c2ecf20Sopenharmony_ci			spin_lock_irqsave(&m->lock, flags);
19328c2ecf20Sopenharmony_ci			enable_nopath_timeout(m);
19338c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&m->lock, flags);
19348c2ecf20Sopenharmony_ci			goto out;
19358c2ecf20Sopenharmony_ci		} else if (!strcasecmp(argv[0], "fail_if_no_path")) {
19368c2ecf20Sopenharmony_ci			r = queue_if_no_path(m, false, false, __func__);
19378c2ecf20Sopenharmony_ci			disable_nopath_timeout(m);
19388c2ecf20Sopenharmony_ci			goto out;
19398c2ecf20Sopenharmony_ci		}
19408c2ecf20Sopenharmony_ci	}
19418c2ecf20Sopenharmony_ci
19428c2ecf20Sopenharmony_ci	if (argc != 2) {
19438c2ecf20Sopenharmony_ci		DMWARN("Invalid multipath message arguments. Expected 2 arguments, got %d.", argc);
19448c2ecf20Sopenharmony_ci		goto out;
19458c2ecf20Sopenharmony_ci	}
19468c2ecf20Sopenharmony_ci
19478c2ecf20Sopenharmony_ci	if (!strcasecmp(argv[0], "disable_group")) {
19488c2ecf20Sopenharmony_ci		r = bypass_pg_num(m, argv[1], true);
19498c2ecf20Sopenharmony_ci		goto out;
19508c2ecf20Sopenharmony_ci	} else if (!strcasecmp(argv[0], "enable_group")) {
19518c2ecf20Sopenharmony_ci		r = bypass_pg_num(m, argv[1], false);
19528c2ecf20Sopenharmony_ci		goto out;
19538c2ecf20Sopenharmony_ci	} else if (!strcasecmp(argv[0], "switch_group")) {
19548c2ecf20Sopenharmony_ci		r = switch_pg_num(m, argv[1]);
19558c2ecf20Sopenharmony_ci		goto out;
19568c2ecf20Sopenharmony_ci	} else if (!strcasecmp(argv[0], "reinstate_path"))
19578c2ecf20Sopenharmony_ci		action = reinstate_path;
19588c2ecf20Sopenharmony_ci	else if (!strcasecmp(argv[0], "fail_path"))
19598c2ecf20Sopenharmony_ci		action = fail_path;
19608c2ecf20Sopenharmony_ci	else {
19618c2ecf20Sopenharmony_ci		DMWARN("Unrecognised multipath message received: %s", argv[0]);
19628c2ecf20Sopenharmony_ci		goto out;
19638c2ecf20Sopenharmony_ci	}
19648c2ecf20Sopenharmony_ci
19658c2ecf20Sopenharmony_ci	r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
19668c2ecf20Sopenharmony_ci	if (r) {
19678c2ecf20Sopenharmony_ci		DMWARN("message: error getting device %s",
19688c2ecf20Sopenharmony_ci		       argv[1]);
19698c2ecf20Sopenharmony_ci		goto out;
19708c2ecf20Sopenharmony_ci	}
19718c2ecf20Sopenharmony_ci
19728c2ecf20Sopenharmony_ci	r = action_dev(m, dev, action);
19738c2ecf20Sopenharmony_ci
19748c2ecf20Sopenharmony_ci	dm_put_device(ti, dev);
19758c2ecf20Sopenharmony_ci
19768c2ecf20Sopenharmony_ciout:
19778c2ecf20Sopenharmony_ci	mutex_unlock(&m->work_mutex);
19788c2ecf20Sopenharmony_ci	return r;
19798c2ecf20Sopenharmony_ci}
19808c2ecf20Sopenharmony_ci
19818c2ecf20Sopenharmony_cistatic int multipath_prepare_ioctl(struct dm_target *ti,
19828c2ecf20Sopenharmony_ci				   struct block_device **bdev)
19838c2ecf20Sopenharmony_ci{
19848c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
19858c2ecf20Sopenharmony_ci	struct pgpath *pgpath;
19868c2ecf20Sopenharmony_ci	unsigned long flags;
19878c2ecf20Sopenharmony_ci	int r;
19888c2ecf20Sopenharmony_ci
19898c2ecf20Sopenharmony_ci	pgpath = READ_ONCE(m->current_pgpath);
19908c2ecf20Sopenharmony_ci	if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
19918c2ecf20Sopenharmony_ci		pgpath = choose_pgpath(m, 0);
19928c2ecf20Sopenharmony_ci
19938c2ecf20Sopenharmony_ci	if (pgpath) {
19948c2ecf20Sopenharmony_ci		if (!mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) {
19958c2ecf20Sopenharmony_ci			*bdev = pgpath->path.dev->bdev;
19968c2ecf20Sopenharmony_ci			r = 0;
19978c2ecf20Sopenharmony_ci		} else {
19988c2ecf20Sopenharmony_ci			/* pg_init has not started or completed */
19998c2ecf20Sopenharmony_ci			r = -ENOTCONN;
20008c2ecf20Sopenharmony_ci		}
20018c2ecf20Sopenharmony_ci	} else {
20028c2ecf20Sopenharmony_ci		/* No path is available */
20038c2ecf20Sopenharmony_ci		r = -EIO;
20048c2ecf20Sopenharmony_ci		spin_lock_irqsave(&m->lock, flags);
20058c2ecf20Sopenharmony_ci		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
20068c2ecf20Sopenharmony_ci			r = -ENOTCONN;
20078c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&m->lock, flags);
20088c2ecf20Sopenharmony_ci	}
20098c2ecf20Sopenharmony_ci
20108c2ecf20Sopenharmony_ci	if (r == -ENOTCONN) {
20118c2ecf20Sopenharmony_ci		if (!READ_ONCE(m->current_pg)) {
20128c2ecf20Sopenharmony_ci			/* Path status changed, redo selection */
20138c2ecf20Sopenharmony_ci			(void) choose_pgpath(m, 0);
20148c2ecf20Sopenharmony_ci		}
20158c2ecf20Sopenharmony_ci		spin_lock_irqsave(&m->lock, flags);
20168c2ecf20Sopenharmony_ci		if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
20178c2ecf20Sopenharmony_ci			(void) __pg_init_all_paths(m);
20188c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&m->lock, flags);
20198c2ecf20Sopenharmony_ci		dm_table_run_md_queue_async(m->ti->table);
20208c2ecf20Sopenharmony_ci		process_queued_io_list(m);
20218c2ecf20Sopenharmony_ci	}
20228c2ecf20Sopenharmony_ci
20238c2ecf20Sopenharmony_ci	/*
20248c2ecf20Sopenharmony_ci	 * Only pass ioctls through if the device sizes match exactly.
20258c2ecf20Sopenharmony_ci	 */
20268c2ecf20Sopenharmony_ci	if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
20278c2ecf20Sopenharmony_ci		return 1;
20288c2ecf20Sopenharmony_ci	return r;
20298c2ecf20Sopenharmony_ci}
20308c2ecf20Sopenharmony_ci
20318c2ecf20Sopenharmony_cistatic int multipath_iterate_devices(struct dm_target *ti,
20328c2ecf20Sopenharmony_ci				     iterate_devices_callout_fn fn, void *data)
20338c2ecf20Sopenharmony_ci{
20348c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
20358c2ecf20Sopenharmony_ci	struct priority_group *pg;
20368c2ecf20Sopenharmony_ci	struct pgpath *p;
20378c2ecf20Sopenharmony_ci	int ret = 0;
20388c2ecf20Sopenharmony_ci
20398c2ecf20Sopenharmony_ci	list_for_each_entry(pg, &m->priority_groups, list) {
20408c2ecf20Sopenharmony_ci		list_for_each_entry(p, &pg->pgpaths, list) {
20418c2ecf20Sopenharmony_ci			ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
20428c2ecf20Sopenharmony_ci			if (ret)
20438c2ecf20Sopenharmony_ci				goto out;
20448c2ecf20Sopenharmony_ci		}
20458c2ecf20Sopenharmony_ci	}
20468c2ecf20Sopenharmony_ci
20478c2ecf20Sopenharmony_ciout:
20488c2ecf20Sopenharmony_ci	return ret;
20498c2ecf20Sopenharmony_ci}
20508c2ecf20Sopenharmony_ci
20518c2ecf20Sopenharmony_cistatic int pgpath_busy(struct pgpath *pgpath)
20528c2ecf20Sopenharmony_ci{
20538c2ecf20Sopenharmony_ci	struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
20548c2ecf20Sopenharmony_ci
20558c2ecf20Sopenharmony_ci	return blk_lld_busy(q);
20568c2ecf20Sopenharmony_ci}
20578c2ecf20Sopenharmony_ci
20588c2ecf20Sopenharmony_ci/*
20598c2ecf20Sopenharmony_ci * We return "busy", only when we can map I/Os but underlying devices
20608c2ecf20Sopenharmony_ci * are busy (so even if we map I/Os now, the I/Os will wait on
20618c2ecf20Sopenharmony_ci * the underlying queue).
20628c2ecf20Sopenharmony_ci * In other words, if we want to kill I/Os or queue them inside us
20638c2ecf20Sopenharmony_ci * due to map unavailability, we don't return "busy".  Otherwise,
20648c2ecf20Sopenharmony_ci * dm core won't give us the I/Os and we can't do what we want.
20658c2ecf20Sopenharmony_ci */
20668c2ecf20Sopenharmony_cistatic int multipath_busy(struct dm_target *ti)
20678c2ecf20Sopenharmony_ci{
20688c2ecf20Sopenharmony_ci	bool busy = false, has_active = false;
20698c2ecf20Sopenharmony_ci	struct multipath *m = ti->private;
20708c2ecf20Sopenharmony_ci	struct priority_group *pg, *next_pg;
20718c2ecf20Sopenharmony_ci	struct pgpath *pgpath;
20728c2ecf20Sopenharmony_ci
20738c2ecf20Sopenharmony_ci	/* pg_init in progress */
20748c2ecf20Sopenharmony_ci	if (atomic_read(&m->pg_init_in_progress))
20758c2ecf20Sopenharmony_ci		return true;
20768c2ecf20Sopenharmony_ci
20778c2ecf20Sopenharmony_ci	/* no paths available, for blk-mq: rely on IO mapping to delay requeue */
20788c2ecf20Sopenharmony_ci	if (!atomic_read(&m->nr_valid_paths)) {
20798c2ecf20Sopenharmony_ci		unsigned long flags;
20808c2ecf20Sopenharmony_ci		spin_lock_irqsave(&m->lock, flags);
20818c2ecf20Sopenharmony_ci		if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
20828c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&m->lock, flags);
20838c2ecf20Sopenharmony_ci			return (m->queue_mode != DM_TYPE_REQUEST_BASED);
20848c2ecf20Sopenharmony_ci		}
20858c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&m->lock, flags);
20868c2ecf20Sopenharmony_ci	}
20878c2ecf20Sopenharmony_ci
20888c2ecf20Sopenharmony_ci	/* Guess which priority_group will be used at next mapping time */
20898c2ecf20Sopenharmony_ci	pg = READ_ONCE(m->current_pg);
20908c2ecf20Sopenharmony_ci	next_pg = READ_ONCE(m->next_pg);
20918c2ecf20Sopenharmony_ci	if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg))
20928c2ecf20Sopenharmony_ci		pg = next_pg;
20938c2ecf20Sopenharmony_ci
20948c2ecf20Sopenharmony_ci	if (!pg) {
20958c2ecf20Sopenharmony_ci		/*
20968c2ecf20Sopenharmony_ci		 * We don't know which pg will be used at next mapping time.
20978c2ecf20Sopenharmony_ci		 * We don't call choose_pgpath() here to avoid to trigger
20988c2ecf20Sopenharmony_ci		 * pg_init just by busy checking.
20998c2ecf20Sopenharmony_ci		 * So we don't know whether underlying devices we will be using
21008c2ecf20Sopenharmony_ci		 * at next mapping time are busy or not. Just try mapping.
21018c2ecf20Sopenharmony_ci		 */
21028c2ecf20Sopenharmony_ci		return busy;
21038c2ecf20Sopenharmony_ci	}
21048c2ecf20Sopenharmony_ci
21058c2ecf20Sopenharmony_ci	/*
21068c2ecf20Sopenharmony_ci	 * If there is one non-busy active path at least, the path selector
21078c2ecf20Sopenharmony_ci	 * will be able to select it. So we consider such a pg as not busy.
21088c2ecf20Sopenharmony_ci	 */
21098c2ecf20Sopenharmony_ci	busy = true;
21108c2ecf20Sopenharmony_ci	list_for_each_entry(pgpath, &pg->pgpaths, list) {
21118c2ecf20Sopenharmony_ci		if (pgpath->is_active) {
21128c2ecf20Sopenharmony_ci			has_active = true;
21138c2ecf20Sopenharmony_ci			if (!pgpath_busy(pgpath)) {
21148c2ecf20Sopenharmony_ci				busy = false;
21158c2ecf20Sopenharmony_ci				break;
21168c2ecf20Sopenharmony_ci			}
21178c2ecf20Sopenharmony_ci		}
21188c2ecf20Sopenharmony_ci	}
21198c2ecf20Sopenharmony_ci
21208c2ecf20Sopenharmony_ci	if (!has_active) {
21218c2ecf20Sopenharmony_ci		/*
21228c2ecf20Sopenharmony_ci		 * No active path in this pg, so this pg won't be used and
21238c2ecf20Sopenharmony_ci		 * the current_pg will be changed at next mapping time.
21248c2ecf20Sopenharmony_ci		 * We need to try mapping to determine it.
21258c2ecf20Sopenharmony_ci		 */
21268c2ecf20Sopenharmony_ci		busy = false;
21278c2ecf20Sopenharmony_ci	}
21288c2ecf20Sopenharmony_ci
21298c2ecf20Sopenharmony_ci	return busy;
21308c2ecf20Sopenharmony_ci}
21318c2ecf20Sopenharmony_ci
21328c2ecf20Sopenharmony_ci/*-----------------------------------------------------------------
21338c2ecf20Sopenharmony_ci * Module setup
21348c2ecf20Sopenharmony_ci *---------------------------------------------------------------*/
21358c2ecf20Sopenharmony_cistatic struct target_type multipath_target = {
21368c2ecf20Sopenharmony_ci	.name = "multipath",
21378c2ecf20Sopenharmony_ci	.version = {1, 14, 0},
21388c2ecf20Sopenharmony_ci	.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
21398c2ecf20Sopenharmony_ci		    DM_TARGET_PASSES_INTEGRITY,
21408c2ecf20Sopenharmony_ci	.module = THIS_MODULE,
21418c2ecf20Sopenharmony_ci	.ctr = multipath_ctr,
21428c2ecf20Sopenharmony_ci	.dtr = multipath_dtr,
21438c2ecf20Sopenharmony_ci	.clone_and_map_rq = multipath_clone_and_map,
21448c2ecf20Sopenharmony_ci	.release_clone_rq = multipath_release_clone,
21458c2ecf20Sopenharmony_ci	.rq_end_io = multipath_end_io,
21468c2ecf20Sopenharmony_ci	.map = multipath_map_bio,
21478c2ecf20Sopenharmony_ci	.end_io = multipath_end_io_bio,
21488c2ecf20Sopenharmony_ci	.presuspend = multipath_presuspend,
21498c2ecf20Sopenharmony_ci	.postsuspend = multipath_postsuspend,
21508c2ecf20Sopenharmony_ci	.resume = multipath_resume,
21518c2ecf20Sopenharmony_ci	.status = multipath_status,
21528c2ecf20Sopenharmony_ci	.message = multipath_message,
21538c2ecf20Sopenharmony_ci	.prepare_ioctl = multipath_prepare_ioctl,
21548c2ecf20Sopenharmony_ci	.iterate_devices = multipath_iterate_devices,
21558c2ecf20Sopenharmony_ci	.busy = multipath_busy,
21568c2ecf20Sopenharmony_ci};
21578c2ecf20Sopenharmony_ci
21588c2ecf20Sopenharmony_cistatic int __init dm_multipath_init(void)
21598c2ecf20Sopenharmony_ci{
21608c2ecf20Sopenharmony_ci	int r;
21618c2ecf20Sopenharmony_ci
21628c2ecf20Sopenharmony_ci	kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
21638c2ecf20Sopenharmony_ci	if (!kmultipathd) {
21648c2ecf20Sopenharmony_ci		DMERR("failed to create workqueue kmpathd");
21658c2ecf20Sopenharmony_ci		r = -ENOMEM;
21668c2ecf20Sopenharmony_ci		goto bad_alloc_kmultipathd;
21678c2ecf20Sopenharmony_ci	}
21688c2ecf20Sopenharmony_ci
21698c2ecf20Sopenharmony_ci	/*
21708c2ecf20Sopenharmony_ci	 * A separate workqueue is used to handle the device handlers
21718c2ecf20Sopenharmony_ci	 * to avoid overloading existing workqueue. Overloading the
21728c2ecf20Sopenharmony_ci	 * old workqueue would also create a bottleneck in the
21738c2ecf20Sopenharmony_ci	 * path of the storage hardware device activation.
21748c2ecf20Sopenharmony_ci	 */
21758c2ecf20Sopenharmony_ci	kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd",
21768c2ecf20Sopenharmony_ci						  WQ_MEM_RECLAIM);
21778c2ecf20Sopenharmony_ci	if (!kmpath_handlerd) {
21788c2ecf20Sopenharmony_ci		DMERR("failed to create workqueue kmpath_handlerd");
21798c2ecf20Sopenharmony_ci		r = -ENOMEM;
21808c2ecf20Sopenharmony_ci		goto bad_alloc_kmpath_handlerd;
21818c2ecf20Sopenharmony_ci	}
21828c2ecf20Sopenharmony_ci
21838c2ecf20Sopenharmony_ci	r = dm_register_target(&multipath_target);
21848c2ecf20Sopenharmony_ci	if (r < 0) {
21858c2ecf20Sopenharmony_ci		DMERR("request-based register failed %d", r);
21868c2ecf20Sopenharmony_ci		r = -EINVAL;
21878c2ecf20Sopenharmony_ci		goto bad_register_target;
21888c2ecf20Sopenharmony_ci	}
21898c2ecf20Sopenharmony_ci
21908c2ecf20Sopenharmony_ci	return 0;
21918c2ecf20Sopenharmony_ci
21928c2ecf20Sopenharmony_cibad_register_target:
21938c2ecf20Sopenharmony_ci	destroy_workqueue(kmpath_handlerd);
21948c2ecf20Sopenharmony_cibad_alloc_kmpath_handlerd:
21958c2ecf20Sopenharmony_ci	destroy_workqueue(kmultipathd);
21968c2ecf20Sopenharmony_cibad_alloc_kmultipathd:
21978c2ecf20Sopenharmony_ci	return r;
21988c2ecf20Sopenharmony_ci}
21998c2ecf20Sopenharmony_ci
22008c2ecf20Sopenharmony_cistatic void __exit dm_multipath_exit(void)
22018c2ecf20Sopenharmony_ci{
22028c2ecf20Sopenharmony_ci	destroy_workqueue(kmpath_handlerd);
22038c2ecf20Sopenharmony_ci	destroy_workqueue(kmultipathd);
22048c2ecf20Sopenharmony_ci
22058c2ecf20Sopenharmony_ci	dm_unregister_target(&multipath_target);
22068c2ecf20Sopenharmony_ci}
22078c2ecf20Sopenharmony_ci
22088c2ecf20Sopenharmony_cimodule_init(dm_multipath_init);
22098c2ecf20Sopenharmony_cimodule_exit(dm_multipath_exit);
22108c2ecf20Sopenharmony_ci
22118c2ecf20Sopenharmony_cimodule_param_named(queue_if_no_path_timeout_secs,
22128c2ecf20Sopenharmony_ci		   queue_if_no_path_timeout_secs, ulong, S_IRUGO | S_IWUSR);
22138c2ecf20Sopenharmony_ciMODULE_PARM_DESC(queue_if_no_path_timeout_secs, "No available paths queue IO timeout in seconds");
22148c2ecf20Sopenharmony_ci
22158c2ecf20Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " multipath target");
22168c2ecf20Sopenharmony_ciMODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
22178c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
2218