18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * multipath.c : Multiple Devices driver for Linux
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * MULTIPATH management functions.
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci * derived from raid1.c.
128c2ecf20Sopenharmony_ci */
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
158c2ecf20Sopenharmony_ci#include <linux/module.h>
168c2ecf20Sopenharmony_ci#include <linux/raid/md_u.h>
178c2ecf20Sopenharmony_ci#include <linux/seq_file.h>
188c2ecf20Sopenharmony_ci#include <linux/slab.h>
198c2ecf20Sopenharmony_ci#include "md.h"
208c2ecf20Sopenharmony_ci#include "md-multipath.h"
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci#define MAX_WORK_PER_DISK 128
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci#define	NR_RESERVED_BUFS	32
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_cistatic int multipath_map (struct mpconf *conf)
278c2ecf20Sopenharmony_ci{
288c2ecf20Sopenharmony_ci	int i, disks = conf->raid_disks;
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci	/*
318c2ecf20Sopenharmony_ci	 * Later we do read balancing on the read side
328c2ecf20Sopenharmony_ci	 * now we use the first available disk.
338c2ecf20Sopenharmony_ci	 */
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci	rcu_read_lock();
368c2ecf20Sopenharmony_ci	for (i = 0; i < disks; i++) {
378c2ecf20Sopenharmony_ci		struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
388c2ecf20Sopenharmony_ci		if (rdev && test_bit(In_sync, &rdev->flags) &&
398c2ecf20Sopenharmony_ci		    !test_bit(Faulty, &rdev->flags)) {
408c2ecf20Sopenharmony_ci			atomic_inc(&rdev->nr_pending);
418c2ecf20Sopenharmony_ci			rcu_read_unlock();
428c2ecf20Sopenharmony_ci			return i;
438c2ecf20Sopenharmony_ci		}
448c2ecf20Sopenharmony_ci	}
458c2ecf20Sopenharmony_ci	rcu_read_unlock();
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n");
488c2ecf20Sopenharmony_ci	return (-1);
498c2ecf20Sopenharmony_ci}
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_cistatic void multipath_reschedule_retry (struct multipath_bh *mp_bh)
528c2ecf20Sopenharmony_ci{
538c2ecf20Sopenharmony_ci	unsigned long flags;
548c2ecf20Sopenharmony_ci	struct mddev *mddev = mp_bh->mddev;
558c2ecf20Sopenharmony_ci	struct mpconf *conf = mddev->private;
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	spin_lock_irqsave(&conf->device_lock, flags);
588c2ecf20Sopenharmony_ci	list_add(&mp_bh->retry_list, &conf->retry_list);
598c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&conf->device_lock, flags);
608c2ecf20Sopenharmony_ci	md_wakeup_thread(mddev->thread);
618c2ecf20Sopenharmony_ci}
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci/*
648c2ecf20Sopenharmony_ci * multipath_end_bh_io() is called when we have finished servicing a multipathed
658c2ecf20Sopenharmony_ci * operation and are ready to return a success/failure code to the buffer
668c2ecf20Sopenharmony_ci * cache layer.
678c2ecf20Sopenharmony_ci */
688c2ecf20Sopenharmony_cistatic void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
698c2ecf20Sopenharmony_ci{
708c2ecf20Sopenharmony_ci	struct bio *bio = mp_bh->master_bio;
718c2ecf20Sopenharmony_ci	struct mpconf *conf = mp_bh->mddev->private;
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	bio->bi_status = status;
748c2ecf20Sopenharmony_ci	bio_endio(bio);
758c2ecf20Sopenharmony_ci	mempool_free(mp_bh, &conf->pool);
768c2ecf20Sopenharmony_ci}
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_cistatic void multipath_end_request(struct bio *bio)
798c2ecf20Sopenharmony_ci{
808c2ecf20Sopenharmony_ci	struct multipath_bh *mp_bh = bio->bi_private;
818c2ecf20Sopenharmony_ci	struct mpconf *conf = mp_bh->mddev->private;
828c2ecf20Sopenharmony_ci	struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	if (!bio->bi_status)
858c2ecf20Sopenharmony_ci		multipath_end_bh_io(mp_bh, 0);
868c2ecf20Sopenharmony_ci	else if (!(bio->bi_opf & REQ_RAHEAD)) {
878c2ecf20Sopenharmony_ci		/*
888c2ecf20Sopenharmony_ci		 * oops, IO error:
898c2ecf20Sopenharmony_ci		 */
908c2ecf20Sopenharmony_ci		char b[BDEVNAME_SIZE];
918c2ecf20Sopenharmony_ci		md_error (mp_bh->mddev, rdev);
928c2ecf20Sopenharmony_ci		pr_info("multipath: %s: rescheduling sector %llu\n",
938c2ecf20Sopenharmony_ci			bdevname(rdev->bdev,b),
948c2ecf20Sopenharmony_ci			(unsigned long long)bio->bi_iter.bi_sector);
958c2ecf20Sopenharmony_ci		multipath_reschedule_retry(mp_bh);
968c2ecf20Sopenharmony_ci	} else
978c2ecf20Sopenharmony_ci		multipath_end_bh_io(mp_bh, bio->bi_status);
988c2ecf20Sopenharmony_ci	rdev_dec_pending(rdev, conf->mddev);
998c2ecf20Sopenharmony_ci}
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_cistatic bool multipath_make_request(struct mddev *mddev, struct bio * bio)
1028c2ecf20Sopenharmony_ci{
1038c2ecf20Sopenharmony_ci	struct mpconf *conf = mddev->private;
1048c2ecf20Sopenharmony_ci	struct multipath_bh * mp_bh;
1058c2ecf20Sopenharmony_ci	struct multipath_info *multipath;
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci	if (unlikely(bio->bi_opf & REQ_PREFLUSH)
1088c2ecf20Sopenharmony_ci	    && md_flush_request(mddev, bio))
1098c2ecf20Sopenharmony_ci		return true;
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	mp_bh->master_bio = bio;
1148c2ecf20Sopenharmony_ci	mp_bh->mddev = mddev;
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci	mp_bh->path = multipath_map(conf);
1178c2ecf20Sopenharmony_ci	if (mp_bh->path < 0) {
1188c2ecf20Sopenharmony_ci		bio_io_error(bio);
1198c2ecf20Sopenharmony_ci		mempool_free(mp_bh, &conf->pool);
1208c2ecf20Sopenharmony_ci		return true;
1218c2ecf20Sopenharmony_ci	}
1228c2ecf20Sopenharmony_ci	multipath = conf->multipaths + mp_bh->path;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	bio_init(&mp_bh->bio, NULL, 0);
1258c2ecf20Sopenharmony_ci	__bio_clone_fast(&mp_bh->bio, bio);
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
1288c2ecf20Sopenharmony_ci	bio_set_dev(&mp_bh->bio, multipath->rdev->bdev);
1298c2ecf20Sopenharmony_ci	mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
1308c2ecf20Sopenharmony_ci	mp_bh->bio.bi_end_io = multipath_end_request;
1318c2ecf20Sopenharmony_ci	mp_bh->bio.bi_private = mp_bh;
1328c2ecf20Sopenharmony_ci	mddev_check_writesame(mddev, &mp_bh->bio);
1338c2ecf20Sopenharmony_ci	mddev_check_write_zeroes(mddev, &mp_bh->bio);
1348c2ecf20Sopenharmony_ci	submit_bio_noacct(&mp_bh->bio);
1358c2ecf20Sopenharmony_ci	return true;
1368c2ecf20Sopenharmony_ci}
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_cistatic void multipath_status(struct seq_file *seq, struct mddev *mddev)
1398c2ecf20Sopenharmony_ci{
1408c2ecf20Sopenharmony_ci	struct mpconf *conf = mddev->private;
1418c2ecf20Sopenharmony_ci	int i;
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	seq_printf (seq, " [%d/%d] [", conf->raid_disks,
1448c2ecf20Sopenharmony_ci		    conf->raid_disks - mddev->degraded);
1458c2ecf20Sopenharmony_ci	rcu_read_lock();
1468c2ecf20Sopenharmony_ci	for (i = 0; i < conf->raid_disks; i++) {
1478c2ecf20Sopenharmony_ci		struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
1488c2ecf20Sopenharmony_ci		seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
1498c2ecf20Sopenharmony_ci	}
1508c2ecf20Sopenharmony_ci	rcu_read_unlock();
1518c2ecf20Sopenharmony_ci	seq_putc(seq, ']');
1528c2ecf20Sopenharmony_ci}
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci/*
1558c2ecf20Sopenharmony_ci * Careful, this can execute in IRQ contexts as well!
1568c2ecf20Sopenharmony_ci */
1578c2ecf20Sopenharmony_cistatic void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	struct mpconf *conf = mddev->private;
1608c2ecf20Sopenharmony_ci	char b[BDEVNAME_SIZE];
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci	if (conf->raid_disks - mddev->degraded <= 1) {
1638c2ecf20Sopenharmony_ci		/*
1648c2ecf20Sopenharmony_ci		 * Uh oh, we can do nothing if this is our last path, but
1658c2ecf20Sopenharmony_ci		 * first check if this is a queued request for a device
1668c2ecf20Sopenharmony_ci		 * which has just failed.
1678c2ecf20Sopenharmony_ci		 */
1688c2ecf20Sopenharmony_ci		pr_warn("multipath: only one IO path left and IO error.\n");
1698c2ecf20Sopenharmony_ci		/* leave it active... it's all we have */
1708c2ecf20Sopenharmony_ci		return;
1718c2ecf20Sopenharmony_ci	}
1728c2ecf20Sopenharmony_ci	/*
1738c2ecf20Sopenharmony_ci	 * Mark disk as unusable
1748c2ecf20Sopenharmony_ci	 */
1758c2ecf20Sopenharmony_ci	if (test_and_clear_bit(In_sync, &rdev->flags)) {
1768c2ecf20Sopenharmony_ci		unsigned long flags;
1778c2ecf20Sopenharmony_ci		spin_lock_irqsave(&conf->device_lock, flags);
1788c2ecf20Sopenharmony_ci		mddev->degraded++;
1798c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&conf->device_lock, flags);
1808c2ecf20Sopenharmony_ci	}
1818c2ecf20Sopenharmony_ci	set_bit(Faulty, &rdev->flags);
1828c2ecf20Sopenharmony_ci	set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
1838c2ecf20Sopenharmony_ci	pr_err("multipath: IO failure on %s, disabling IO path.\n"
1848c2ecf20Sopenharmony_ci	       "multipath: Operation continuing on %d IO paths.\n",
1858c2ecf20Sopenharmony_ci	       bdevname(rdev->bdev, b),
1868c2ecf20Sopenharmony_ci	       conf->raid_disks - mddev->degraded);
1878c2ecf20Sopenharmony_ci}
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_cistatic void print_multipath_conf (struct mpconf *conf)
1908c2ecf20Sopenharmony_ci{
1918c2ecf20Sopenharmony_ci	int i;
1928c2ecf20Sopenharmony_ci	struct multipath_info *tmp;
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	pr_debug("MULTIPATH conf printout:\n");
1958c2ecf20Sopenharmony_ci	if (!conf) {
1968c2ecf20Sopenharmony_ci		pr_debug("(conf==NULL)\n");
1978c2ecf20Sopenharmony_ci		return;
1988c2ecf20Sopenharmony_ci	}
1998c2ecf20Sopenharmony_ci	pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
2008c2ecf20Sopenharmony_ci		 conf->raid_disks);
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	for (i = 0; i < conf->raid_disks; i++) {
2038c2ecf20Sopenharmony_ci		char b[BDEVNAME_SIZE];
2048c2ecf20Sopenharmony_ci		tmp = conf->multipaths + i;
2058c2ecf20Sopenharmony_ci		if (tmp->rdev)
2068c2ecf20Sopenharmony_ci			pr_debug(" disk%d, o:%d, dev:%s\n",
2078c2ecf20Sopenharmony_ci				 i,!test_bit(Faulty, &tmp->rdev->flags),
2088c2ecf20Sopenharmony_ci				 bdevname(tmp->rdev->bdev,b));
2098c2ecf20Sopenharmony_ci	}
2108c2ecf20Sopenharmony_ci}
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_cistatic int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
2138c2ecf20Sopenharmony_ci{
2148c2ecf20Sopenharmony_ci	struct mpconf *conf = mddev->private;
2158c2ecf20Sopenharmony_ci	int err = -EEXIST;
2168c2ecf20Sopenharmony_ci	int path;
2178c2ecf20Sopenharmony_ci	struct multipath_info *p;
2188c2ecf20Sopenharmony_ci	int first = 0;
2198c2ecf20Sopenharmony_ci	int last = mddev->raid_disks - 1;
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	if (rdev->raid_disk >= 0)
2228c2ecf20Sopenharmony_ci		first = last = rdev->raid_disk;
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci	print_multipath_conf(conf);
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	for (path = first; path <= last; path++)
2278c2ecf20Sopenharmony_ci		if ((p=conf->multipaths+path)->rdev == NULL) {
2288c2ecf20Sopenharmony_ci			disk_stack_limits(mddev->gendisk, rdev->bdev,
2298c2ecf20Sopenharmony_ci					  rdev->data_offset << 9);
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci			err = md_integrity_add_rdev(rdev, mddev);
2328c2ecf20Sopenharmony_ci			if (err)
2338c2ecf20Sopenharmony_ci				break;
2348c2ecf20Sopenharmony_ci			spin_lock_irq(&conf->device_lock);
2358c2ecf20Sopenharmony_ci			mddev->degraded--;
2368c2ecf20Sopenharmony_ci			rdev->raid_disk = path;
2378c2ecf20Sopenharmony_ci			set_bit(In_sync, &rdev->flags);
2388c2ecf20Sopenharmony_ci			spin_unlock_irq(&conf->device_lock);
2398c2ecf20Sopenharmony_ci			rcu_assign_pointer(p->rdev, rdev);
2408c2ecf20Sopenharmony_ci			err = 0;
2418c2ecf20Sopenharmony_ci			break;
2428c2ecf20Sopenharmony_ci		}
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci	print_multipath_conf(conf);
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci	return err;
2478c2ecf20Sopenharmony_ci}
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_cistatic int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
2508c2ecf20Sopenharmony_ci{
2518c2ecf20Sopenharmony_ci	struct mpconf *conf = mddev->private;
2528c2ecf20Sopenharmony_ci	int err = 0;
2538c2ecf20Sopenharmony_ci	int number = rdev->raid_disk;
2548c2ecf20Sopenharmony_ci	struct multipath_info *p = conf->multipaths + number;
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci	print_multipath_conf(conf);
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	if (rdev == p->rdev) {
2598c2ecf20Sopenharmony_ci		if (test_bit(In_sync, &rdev->flags) ||
2608c2ecf20Sopenharmony_ci		    atomic_read(&rdev->nr_pending)) {
2618c2ecf20Sopenharmony_ci			pr_warn("hot-remove-disk, slot %d is identified but is still operational!\n", number);
2628c2ecf20Sopenharmony_ci			err = -EBUSY;
2638c2ecf20Sopenharmony_ci			goto abort;
2648c2ecf20Sopenharmony_ci		}
2658c2ecf20Sopenharmony_ci		p->rdev = NULL;
2668c2ecf20Sopenharmony_ci		if (!test_bit(RemoveSynchronized, &rdev->flags)) {
2678c2ecf20Sopenharmony_ci			synchronize_rcu();
2688c2ecf20Sopenharmony_ci			if (atomic_read(&rdev->nr_pending)) {
2698c2ecf20Sopenharmony_ci				/* lost the race, try later */
2708c2ecf20Sopenharmony_ci				err = -EBUSY;
2718c2ecf20Sopenharmony_ci				p->rdev = rdev;
2728c2ecf20Sopenharmony_ci				goto abort;
2738c2ecf20Sopenharmony_ci			}
2748c2ecf20Sopenharmony_ci		}
2758c2ecf20Sopenharmony_ci		err = md_integrity_register(mddev);
2768c2ecf20Sopenharmony_ci	}
2778c2ecf20Sopenharmony_ciabort:
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci	print_multipath_conf(conf);
2808c2ecf20Sopenharmony_ci	return err;
2818c2ecf20Sopenharmony_ci}
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci/*
2848c2ecf20Sopenharmony_ci * This is a kernel thread which:
2858c2ecf20Sopenharmony_ci *
2868c2ecf20Sopenharmony_ci *	1.	Retries failed read operations on working multipaths.
2878c2ecf20Sopenharmony_ci *	2.	Updates the raid superblock when problems encounter.
2888c2ecf20Sopenharmony_ci *	3.	Performs writes following reads for array syncronising.
2898c2ecf20Sopenharmony_ci */
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_cistatic void multipathd(struct md_thread *thread)
2928c2ecf20Sopenharmony_ci{
2938c2ecf20Sopenharmony_ci	struct mddev *mddev = thread->mddev;
2948c2ecf20Sopenharmony_ci	struct multipath_bh *mp_bh;
2958c2ecf20Sopenharmony_ci	struct bio *bio;
2968c2ecf20Sopenharmony_ci	unsigned long flags;
2978c2ecf20Sopenharmony_ci	struct mpconf *conf = mddev->private;
2988c2ecf20Sopenharmony_ci	struct list_head *head = &conf->retry_list;
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci	md_check_recovery(mddev);
3018c2ecf20Sopenharmony_ci	for (;;) {
3028c2ecf20Sopenharmony_ci		char b[BDEVNAME_SIZE];
3038c2ecf20Sopenharmony_ci		spin_lock_irqsave(&conf->device_lock, flags);
3048c2ecf20Sopenharmony_ci		if (list_empty(head))
3058c2ecf20Sopenharmony_ci			break;
3068c2ecf20Sopenharmony_ci		mp_bh = list_entry(head->prev, struct multipath_bh, retry_list);
3078c2ecf20Sopenharmony_ci		list_del(head->prev);
3088c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&conf->device_lock, flags);
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci		bio = &mp_bh->bio;
3118c2ecf20Sopenharmony_ci		bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci		if ((mp_bh->path = multipath_map (conf))<0) {
3148c2ecf20Sopenharmony_ci			pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
3158c2ecf20Sopenharmony_ci			       bio_devname(bio, b),
3168c2ecf20Sopenharmony_ci			       (unsigned long long)bio->bi_iter.bi_sector);
3178c2ecf20Sopenharmony_ci			multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
3188c2ecf20Sopenharmony_ci		} else {
3198c2ecf20Sopenharmony_ci			pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
3208c2ecf20Sopenharmony_ci			       bio_devname(bio, b),
3218c2ecf20Sopenharmony_ci			       (unsigned long long)bio->bi_iter.bi_sector);
3228c2ecf20Sopenharmony_ci			*bio = *(mp_bh->master_bio);
3238c2ecf20Sopenharmony_ci			bio->bi_iter.bi_sector +=
3248c2ecf20Sopenharmony_ci				conf->multipaths[mp_bh->path].rdev->data_offset;
3258c2ecf20Sopenharmony_ci			bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev);
3268c2ecf20Sopenharmony_ci			bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
3278c2ecf20Sopenharmony_ci			bio->bi_end_io = multipath_end_request;
3288c2ecf20Sopenharmony_ci			bio->bi_private = mp_bh;
3298c2ecf20Sopenharmony_ci			submit_bio_noacct(bio);
3308c2ecf20Sopenharmony_ci		}
3318c2ecf20Sopenharmony_ci	}
3328c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&conf->device_lock, flags);
3338c2ecf20Sopenharmony_ci}
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_cistatic sector_t multipath_size(struct mddev *mddev, sector_t sectors, int raid_disks)
3368c2ecf20Sopenharmony_ci{
3378c2ecf20Sopenharmony_ci	WARN_ONCE(sectors || raid_disks,
3388c2ecf20Sopenharmony_ci		  "%s does not support generic reshape\n", __func__);
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci	return mddev->dev_sectors;
3418c2ecf20Sopenharmony_ci}
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_cistatic int multipath_run (struct mddev *mddev)
3448c2ecf20Sopenharmony_ci{
3458c2ecf20Sopenharmony_ci	struct mpconf *conf;
3468c2ecf20Sopenharmony_ci	int disk_idx;
3478c2ecf20Sopenharmony_ci	struct multipath_info *disk;
3488c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
3498c2ecf20Sopenharmony_ci	int working_disks;
3508c2ecf20Sopenharmony_ci	int ret;
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci	if (md_check_no_bitmap(mddev))
3538c2ecf20Sopenharmony_ci		return -EINVAL;
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci	if (mddev->level != LEVEL_MULTIPATH) {
3568c2ecf20Sopenharmony_ci		pr_warn("multipath: %s: raid level not set to multipath IO (%d)\n",
3578c2ecf20Sopenharmony_ci			mdname(mddev), mddev->level);
3588c2ecf20Sopenharmony_ci		goto out;
3598c2ecf20Sopenharmony_ci	}
3608c2ecf20Sopenharmony_ci	/*
3618c2ecf20Sopenharmony_ci	 * copy the already verified devices into our private MULTIPATH
3628c2ecf20Sopenharmony_ci	 * bookkeeping area. [whatever we allocate in multipath_run(),
3638c2ecf20Sopenharmony_ci	 * should be freed in multipath_free()]
3648c2ecf20Sopenharmony_ci	 */
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL);
3678c2ecf20Sopenharmony_ci	mddev->private = conf;
3688c2ecf20Sopenharmony_ci	if (!conf)
3698c2ecf20Sopenharmony_ci		goto out;
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci	conf->multipaths = kcalloc(mddev->raid_disks,
3728c2ecf20Sopenharmony_ci				   sizeof(struct multipath_info),
3738c2ecf20Sopenharmony_ci				   GFP_KERNEL);
3748c2ecf20Sopenharmony_ci	if (!conf->multipaths)
3758c2ecf20Sopenharmony_ci		goto out_free_conf;
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci	working_disks = 0;
3788c2ecf20Sopenharmony_ci	rdev_for_each(rdev, mddev) {
3798c2ecf20Sopenharmony_ci		disk_idx = rdev->raid_disk;
3808c2ecf20Sopenharmony_ci		if (disk_idx < 0 ||
3818c2ecf20Sopenharmony_ci		    disk_idx >= mddev->raid_disks)
3828c2ecf20Sopenharmony_ci			continue;
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci		disk = conf->multipaths + disk_idx;
3858c2ecf20Sopenharmony_ci		disk->rdev = rdev;
3868c2ecf20Sopenharmony_ci		disk_stack_limits(mddev->gendisk, rdev->bdev,
3878c2ecf20Sopenharmony_ci				  rdev->data_offset << 9);
3888c2ecf20Sopenharmony_ci
3898c2ecf20Sopenharmony_ci		if (!test_bit(Faulty, &rdev->flags))
3908c2ecf20Sopenharmony_ci			working_disks++;
3918c2ecf20Sopenharmony_ci	}
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_ci	conf->raid_disks = mddev->raid_disks;
3948c2ecf20Sopenharmony_ci	conf->mddev = mddev;
3958c2ecf20Sopenharmony_ci	spin_lock_init(&conf->device_lock);
3968c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&conf->retry_list);
3978c2ecf20Sopenharmony_ci
3988c2ecf20Sopenharmony_ci	if (!working_disks) {
3998c2ecf20Sopenharmony_ci		pr_warn("multipath: no operational IO paths for %s\n",
4008c2ecf20Sopenharmony_ci			mdname(mddev));
4018c2ecf20Sopenharmony_ci		goto out_free_conf;
4028c2ecf20Sopenharmony_ci	}
4038c2ecf20Sopenharmony_ci	mddev->degraded = conf->raid_disks - working_disks;
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_ci	ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS,
4068c2ecf20Sopenharmony_ci					sizeof(struct multipath_bh));
4078c2ecf20Sopenharmony_ci	if (ret)
4088c2ecf20Sopenharmony_ci		goto out_free_conf;
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci	mddev->thread = md_register_thread(multipathd, mddev,
4118c2ecf20Sopenharmony_ci					   "multipath");
4128c2ecf20Sopenharmony_ci	if (!mddev->thread)
4138c2ecf20Sopenharmony_ci		goto out_free_conf;
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_ci	pr_info("multipath: array %s active with %d out of %d IO paths\n",
4168c2ecf20Sopenharmony_ci		mdname(mddev), conf->raid_disks - mddev->degraded,
4178c2ecf20Sopenharmony_ci		mddev->raid_disks);
4188c2ecf20Sopenharmony_ci	/*
4198c2ecf20Sopenharmony_ci	 * Ok, everything is just fine now
4208c2ecf20Sopenharmony_ci	 */
4218c2ecf20Sopenharmony_ci	md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
4228c2ecf20Sopenharmony_ci
4238c2ecf20Sopenharmony_ci	if (md_integrity_register(mddev))
4248c2ecf20Sopenharmony_ci		goto out_free_conf;
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	return 0;
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ciout_free_conf:
4298c2ecf20Sopenharmony_ci	mempool_exit(&conf->pool);
4308c2ecf20Sopenharmony_ci	kfree(conf->multipaths);
4318c2ecf20Sopenharmony_ci	kfree(conf);
4328c2ecf20Sopenharmony_ci	mddev->private = NULL;
4338c2ecf20Sopenharmony_ciout:
4348c2ecf20Sopenharmony_ci	return -EIO;
4358c2ecf20Sopenharmony_ci}
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_cistatic void multipath_free(struct mddev *mddev, void *priv)
4388c2ecf20Sopenharmony_ci{
4398c2ecf20Sopenharmony_ci	struct mpconf *conf = priv;
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci	mempool_exit(&conf->pool);
4428c2ecf20Sopenharmony_ci	kfree(conf->multipaths);
4438c2ecf20Sopenharmony_ci	kfree(conf);
4448c2ecf20Sopenharmony_ci}
4458c2ecf20Sopenharmony_ci
4468c2ecf20Sopenharmony_cistatic struct md_personality multipath_personality =
4478c2ecf20Sopenharmony_ci{
4488c2ecf20Sopenharmony_ci	.name		= "multipath",
4498c2ecf20Sopenharmony_ci	.level		= LEVEL_MULTIPATH,
4508c2ecf20Sopenharmony_ci	.owner		= THIS_MODULE,
4518c2ecf20Sopenharmony_ci	.make_request	= multipath_make_request,
4528c2ecf20Sopenharmony_ci	.run		= multipath_run,
4538c2ecf20Sopenharmony_ci	.free		= multipath_free,
4548c2ecf20Sopenharmony_ci	.status		= multipath_status,
4558c2ecf20Sopenharmony_ci	.error_handler	= multipath_error,
4568c2ecf20Sopenharmony_ci	.hot_add_disk	= multipath_add_disk,
4578c2ecf20Sopenharmony_ci	.hot_remove_disk= multipath_remove_disk,
4588c2ecf20Sopenharmony_ci	.size		= multipath_size,
4598c2ecf20Sopenharmony_ci};
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_cistatic int __init multipath_init (void)
4628c2ecf20Sopenharmony_ci{
4638c2ecf20Sopenharmony_ci	return register_md_personality (&multipath_personality);
4648c2ecf20Sopenharmony_ci}
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_cistatic void __exit multipath_exit (void)
4678c2ecf20Sopenharmony_ci{
4688c2ecf20Sopenharmony_ci	unregister_md_personality (&multipath_personality);
4698c2ecf20Sopenharmony_ci}
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_cimodule_init(multipath_init);
4728c2ecf20Sopenharmony_cimodule_exit(multipath_exit);
4738c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
4748c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("simple multi-path personality for MD");
4758c2ecf20Sopenharmony_ciMODULE_ALIAS("md-personality-7"); /* MULTIPATH */
4768c2ecf20Sopenharmony_ciMODULE_ALIAS("md-multipath");
4778c2ecf20Sopenharmony_ciMODULE_ALIAS("md-level--4");
478