18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * multipath.c : Multiple Devices driver for Linux 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * MULTIPATH management functions. 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * derived from raid1.c. 128c2ecf20Sopenharmony_ci */ 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 158c2ecf20Sopenharmony_ci#include <linux/module.h> 168c2ecf20Sopenharmony_ci#include <linux/raid/md_u.h> 178c2ecf20Sopenharmony_ci#include <linux/seq_file.h> 188c2ecf20Sopenharmony_ci#include <linux/slab.h> 198c2ecf20Sopenharmony_ci#include "md.h" 208c2ecf20Sopenharmony_ci#include "md-multipath.h" 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci#define MAX_WORK_PER_DISK 128 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci#define NR_RESERVED_BUFS 32 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_cistatic int multipath_map (struct mpconf *conf) 278c2ecf20Sopenharmony_ci{ 288c2ecf20Sopenharmony_ci int i, disks = conf->raid_disks; 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci /* 318c2ecf20Sopenharmony_ci * Later we do read balancing on the read side 328c2ecf20Sopenharmony_ci * now we use the first available disk. 338c2ecf20Sopenharmony_ci */ 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci rcu_read_lock(); 368c2ecf20Sopenharmony_ci for (i = 0; i < disks; i++) { 378c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); 388c2ecf20Sopenharmony_ci if (rdev && test_bit(In_sync, &rdev->flags) && 398c2ecf20Sopenharmony_ci !test_bit(Faulty, &rdev->flags)) { 408c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 418c2ecf20Sopenharmony_ci rcu_read_unlock(); 428c2ecf20Sopenharmony_ci return i; 438c2ecf20Sopenharmony_ci } 448c2ecf20Sopenharmony_ci } 458c2ecf20Sopenharmony_ci rcu_read_unlock(); 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n"); 488c2ecf20Sopenharmony_ci return (-1); 498c2ecf20Sopenharmony_ci} 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_cistatic void multipath_reschedule_retry (struct multipath_bh *mp_bh) 528c2ecf20Sopenharmony_ci{ 538c2ecf20Sopenharmony_ci unsigned long flags; 548c2ecf20Sopenharmony_ci struct mddev *mddev = mp_bh->mddev; 558c2ecf20Sopenharmony_ci struct mpconf *conf = mddev->private; 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 588c2ecf20Sopenharmony_ci list_add(&mp_bh->retry_list, &conf->retry_list); 598c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 608c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 618c2ecf20Sopenharmony_ci} 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci/* 648c2ecf20Sopenharmony_ci * multipath_end_bh_io() is called when we have finished servicing a multipathed 658c2ecf20Sopenharmony_ci * operation and are ready to return a success/failure code to the buffer 668c2ecf20Sopenharmony_ci * cache layer. 678c2ecf20Sopenharmony_ci */ 688c2ecf20Sopenharmony_cistatic void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status) 698c2ecf20Sopenharmony_ci{ 708c2ecf20Sopenharmony_ci struct bio *bio = mp_bh->master_bio; 718c2ecf20Sopenharmony_ci struct mpconf *conf = mp_bh->mddev->private; 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci bio->bi_status = status; 748c2ecf20Sopenharmony_ci bio_endio(bio); 758c2ecf20Sopenharmony_ci mempool_free(mp_bh, &conf->pool); 768c2ecf20Sopenharmony_ci} 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_cistatic void multipath_end_request(struct bio *bio) 798c2ecf20Sopenharmony_ci{ 808c2ecf20Sopenharmony_ci struct multipath_bh *mp_bh = bio->bi_private; 818c2ecf20Sopenharmony_ci struct mpconf *conf = mp_bh->mddev->private; 828c2ecf20Sopenharmony_ci struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev; 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci if (!bio->bi_status) 858c2ecf20Sopenharmony_ci multipath_end_bh_io(mp_bh, 0); 868c2ecf20Sopenharmony_ci else if (!(bio->bi_opf & REQ_RAHEAD)) { 878c2ecf20Sopenharmony_ci /* 888c2ecf20Sopenharmony_ci * oops, IO error: 898c2ecf20Sopenharmony_ci */ 908c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 918c2ecf20Sopenharmony_ci md_error (mp_bh->mddev, rdev); 928c2ecf20Sopenharmony_ci pr_info("multipath: %s: rescheduling sector %llu\n", 938c2ecf20Sopenharmony_ci bdevname(rdev->bdev,b), 948c2ecf20Sopenharmony_ci (unsigned long long)bio->bi_iter.bi_sector); 958c2ecf20Sopenharmony_ci multipath_reschedule_retry(mp_bh); 968c2ecf20Sopenharmony_ci } else 978c2ecf20Sopenharmony_ci multipath_end_bh_io(mp_bh, bio->bi_status); 988c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 998c2ecf20Sopenharmony_ci} 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_cistatic bool multipath_make_request(struct mddev *mddev, struct bio * bio) 1028c2ecf20Sopenharmony_ci{ 1038c2ecf20Sopenharmony_ci struct mpconf *conf = mddev->private; 1048c2ecf20Sopenharmony_ci struct multipath_bh * mp_bh; 1058c2ecf20Sopenharmony_ci struct multipath_info *multipath; 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci if (unlikely(bio->bi_opf & REQ_PREFLUSH) 1088c2ecf20Sopenharmony_ci && md_flush_request(mddev, bio)) 1098c2ecf20Sopenharmony_ci return true; 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci mp_bh = mempool_alloc(&conf->pool, GFP_NOIO); 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci mp_bh->master_bio = bio; 1148c2ecf20Sopenharmony_ci mp_bh->mddev = mddev; 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci mp_bh->path = multipath_map(conf); 1178c2ecf20Sopenharmony_ci if (mp_bh->path < 0) { 1188c2ecf20Sopenharmony_ci bio_io_error(bio); 1198c2ecf20Sopenharmony_ci mempool_free(mp_bh, &conf->pool); 1208c2ecf20Sopenharmony_ci return true; 1218c2ecf20Sopenharmony_ci } 1228c2ecf20Sopenharmony_ci multipath = conf->multipaths + mp_bh->path; 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci bio_init(&mp_bh->bio, NULL, 0); 1258c2ecf20Sopenharmony_ci __bio_clone_fast(&mp_bh->bio, bio); 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; 1288c2ecf20Sopenharmony_ci bio_set_dev(&mp_bh->bio, multipath->rdev->bdev); 1298c2ecf20Sopenharmony_ci mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT; 1308c2ecf20Sopenharmony_ci mp_bh->bio.bi_end_io = multipath_end_request; 1318c2ecf20Sopenharmony_ci mp_bh->bio.bi_private = mp_bh; 1328c2ecf20Sopenharmony_ci mddev_check_writesame(mddev, &mp_bh->bio); 1338c2ecf20Sopenharmony_ci mddev_check_write_zeroes(mddev, &mp_bh->bio); 1348c2ecf20Sopenharmony_ci submit_bio_noacct(&mp_bh->bio); 1358c2ecf20Sopenharmony_ci return true; 1368c2ecf20Sopenharmony_ci} 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_cistatic void multipath_status(struct seq_file *seq, struct mddev *mddev) 1398c2ecf20Sopenharmony_ci{ 1408c2ecf20Sopenharmony_ci struct mpconf *conf = mddev->private; 1418c2ecf20Sopenharmony_ci int i; 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci seq_printf (seq, " [%d/%d] [", conf->raid_disks, 1448c2ecf20Sopenharmony_ci conf->raid_disks - mddev->degraded); 1458c2ecf20Sopenharmony_ci rcu_read_lock(); 1468c2ecf20Sopenharmony_ci for (i = 0; i < conf->raid_disks; i++) { 1478c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); 1488c2ecf20Sopenharmony_ci seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); 1498c2ecf20Sopenharmony_ci } 1508c2ecf20Sopenharmony_ci rcu_read_unlock(); 1518c2ecf20Sopenharmony_ci seq_putc(seq, ']'); 1528c2ecf20Sopenharmony_ci} 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci/* 1558c2ecf20Sopenharmony_ci * Careful, this can execute in IRQ contexts as well! 1568c2ecf20Sopenharmony_ci */ 1578c2ecf20Sopenharmony_cistatic void multipath_error (struct mddev *mddev, struct md_rdev *rdev) 1588c2ecf20Sopenharmony_ci{ 1598c2ecf20Sopenharmony_ci struct mpconf *conf = mddev->private; 1608c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci if (conf->raid_disks - mddev->degraded <= 1) { 1638c2ecf20Sopenharmony_ci /* 1648c2ecf20Sopenharmony_ci * Uh oh, we can do nothing if this is our last path, but 1658c2ecf20Sopenharmony_ci * first check if this is a queued request for a device 1668c2ecf20Sopenharmony_ci * which has just failed. 1678c2ecf20Sopenharmony_ci */ 1688c2ecf20Sopenharmony_ci pr_warn("multipath: only one IO path left and IO error.\n"); 1698c2ecf20Sopenharmony_ci /* leave it active... it's all we have */ 1708c2ecf20Sopenharmony_ci return; 1718c2ecf20Sopenharmony_ci } 1728c2ecf20Sopenharmony_ci /* 1738c2ecf20Sopenharmony_ci * Mark disk as unusable 1748c2ecf20Sopenharmony_ci */ 1758c2ecf20Sopenharmony_ci if (test_and_clear_bit(In_sync, &rdev->flags)) { 1768c2ecf20Sopenharmony_ci unsigned long flags; 1778c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 1788c2ecf20Sopenharmony_ci mddev->degraded++; 1798c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 1808c2ecf20Sopenharmony_ci } 1818c2ecf20Sopenharmony_ci set_bit(Faulty, &rdev->flags); 1828c2ecf20Sopenharmony_ci set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 1838c2ecf20Sopenharmony_ci pr_err("multipath: IO failure on %s, disabling IO path.\n" 1848c2ecf20Sopenharmony_ci "multipath: Operation continuing on %d IO paths.\n", 1858c2ecf20Sopenharmony_ci bdevname(rdev->bdev, b), 1868c2ecf20Sopenharmony_ci conf->raid_disks - mddev->degraded); 1878c2ecf20Sopenharmony_ci} 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_cistatic void print_multipath_conf (struct mpconf *conf) 1908c2ecf20Sopenharmony_ci{ 1918c2ecf20Sopenharmony_ci int i; 1928c2ecf20Sopenharmony_ci struct multipath_info *tmp; 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci pr_debug("MULTIPATH conf printout:\n"); 1958c2ecf20Sopenharmony_ci if (!conf) { 1968c2ecf20Sopenharmony_ci pr_debug("(conf==NULL)\n"); 1978c2ecf20Sopenharmony_ci return; 1988c2ecf20Sopenharmony_ci } 1998c2ecf20Sopenharmony_ci pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, 2008c2ecf20Sopenharmony_ci conf->raid_disks); 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci for (i = 0; i < conf->raid_disks; i++) { 2038c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 2048c2ecf20Sopenharmony_ci tmp = conf->multipaths + i; 2058c2ecf20Sopenharmony_ci if (tmp->rdev) 2068c2ecf20Sopenharmony_ci pr_debug(" disk%d, o:%d, dev:%s\n", 2078c2ecf20Sopenharmony_ci i,!test_bit(Faulty, &tmp->rdev->flags), 2088c2ecf20Sopenharmony_ci bdevname(tmp->rdev->bdev,b)); 2098c2ecf20Sopenharmony_ci } 2108c2ecf20Sopenharmony_ci} 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_cistatic int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev) 2138c2ecf20Sopenharmony_ci{ 2148c2ecf20Sopenharmony_ci struct mpconf *conf = mddev->private; 2158c2ecf20Sopenharmony_ci int err = -EEXIST; 2168c2ecf20Sopenharmony_ci int path; 2178c2ecf20Sopenharmony_ci struct multipath_info *p; 2188c2ecf20Sopenharmony_ci int first = 0; 2198c2ecf20Sopenharmony_ci int last = mddev->raid_disks - 1; 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci if (rdev->raid_disk >= 0) 2228c2ecf20Sopenharmony_ci first = last = rdev->raid_disk; 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci print_multipath_conf(conf); 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci for (path = first; path <= last; path++) 2278c2ecf20Sopenharmony_ci if ((p=conf->multipaths+path)->rdev == NULL) { 2288c2ecf20Sopenharmony_ci disk_stack_limits(mddev->gendisk, rdev->bdev, 2298c2ecf20Sopenharmony_ci rdev->data_offset << 9); 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci err = md_integrity_add_rdev(rdev, mddev); 2328c2ecf20Sopenharmony_ci if (err) 2338c2ecf20Sopenharmony_ci break; 2348c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 2358c2ecf20Sopenharmony_ci mddev->degraded--; 2368c2ecf20Sopenharmony_ci rdev->raid_disk = path; 2378c2ecf20Sopenharmony_ci set_bit(In_sync, &rdev->flags); 2388c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 2398c2ecf20Sopenharmony_ci rcu_assign_pointer(p->rdev, rdev); 2408c2ecf20Sopenharmony_ci err = 0; 2418c2ecf20Sopenharmony_ci break; 2428c2ecf20Sopenharmony_ci } 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci print_multipath_conf(conf); 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci return err; 2478c2ecf20Sopenharmony_ci} 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_cistatic int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev) 2508c2ecf20Sopenharmony_ci{ 2518c2ecf20Sopenharmony_ci struct mpconf *conf = mddev->private; 2528c2ecf20Sopenharmony_ci int err = 0; 2538c2ecf20Sopenharmony_ci int number = rdev->raid_disk; 2548c2ecf20Sopenharmony_ci struct multipath_info *p = conf->multipaths + number; 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci print_multipath_conf(conf); 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci if (rdev == p->rdev) { 2598c2ecf20Sopenharmony_ci if (test_bit(In_sync, &rdev->flags) || 2608c2ecf20Sopenharmony_ci atomic_read(&rdev->nr_pending)) { 2618c2ecf20Sopenharmony_ci pr_warn("hot-remove-disk, slot %d is identified but is still operational!\n", number); 2628c2ecf20Sopenharmony_ci err = -EBUSY; 2638c2ecf20Sopenharmony_ci goto abort; 2648c2ecf20Sopenharmony_ci } 2658c2ecf20Sopenharmony_ci p->rdev = NULL; 2668c2ecf20Sopenharmony_ci if (!test_bit(RemoveSynchronized, &rdev->flags)) { 2678c2ecf20Sopenharmony_ci synchronize_rcu(); 2688c2ecf20Sopenharmony_ci if (atomic_read(&rdev->nr_pending)) { 2698c2ecf20Sopenharmony_ci /* lost the race, try later */ 2708c2ecf20Sopenharmony_ci err = -EBUSY; 2718c2ecf20Sopenharmony_ci p->rdev = rdev; 2728c2ecf20Sopenharmony_ci goto abort; 2738c2ecf20Sopenharmony_ci } 2748c2ecf20Sopenharmony_ci } 2758c2ecf20Sopenharmony_ci err = md_integrity_register(mddev); 2768c2ecf20Sopenharmony_ci } 2778c2ecf20Sopenharmony_ciabort: 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci print_multipath_conf(conf); 2808c2ecf20Sopenharmony_ci return err; 2818c2ecf20Sopenharmony_ci} 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci/* 2848c2ecf20Sopenharmony_ci * This is a kernel thread which: 2858c2ecf20Sopenharmony_ci * 2868c2ecf20Sopenharmony_ci * 1. Retries failed read operations on working multipaths. 2878c2ecf20Sopenharmony_ci * 2. Updates the raid superblock when problems encounter. 2888c2ecf20Sopenharmony_ci * 3. Performs writes following reads for array syncronising. 2898c2ecf20Sopenharmony_ci */ 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_cistatic void multipathd(struct md_thread *thread) 2928c2ecf20Sopenharmony_ci{ 2938c2ecf20Sopenharmony_ci struct mddev *mddev = thread->mddev; 2948c2ecf20Sopenharmony_ci struct multipath_bh *mp_bh; 2958c2ecf20Sopenharmony_ci struct bio *bio; 2968c2ecf20Sopenharmony_ci unsigned long flags; 2978c2ecf20Sopenharmony_ci struct mpconf *conf = mddev->private; 2988c2ecf20Sopenharmony_ci struct list_head *head = &conf->retry_list; 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci md_check_recovery(mddev); 3018c2ecf20Sopenharmony_ci for (;;) { 3028c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 3038c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 3048c2ecf20Sopenharmony_ci if (list_empty(head)) 3058c2ecf20Sopenharmony_ci break; 3068c2ecf20Sopenharmony_ci mp_bh = list_entry(head->prev, struct multipath_bh, retry_list); 3078c2ecf20Sopenharmony_ci list_del(head->prev); 3088c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci bio = &mp_bh->bio; 3118c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector; 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci if ((mp_bh->path = multipath_map (conf))<0) { 3148c2ecf20Sopenharmony_ci pr_err("multipath: %s: unrecoverable IO read error for block %llu\n", 3158c2ecf20Sopenharmony_ci bio_devname(bio, b), 3168c2ecf20Sopenharmony_ci (unsigned long long)bio->bi_iter.bi_sector); 3178c2ecf20Sopenharmony_ci multipath_end_bh_io(mp_bh, BLK_STS_IOERR); 3188c2ecf20Sopenharmony_ci } else { 3198c2ecf20Sopenharmony_ci pr_err("multipath: %s: redirecting sector %llu to another IO path\n", 3208c2ecf20Sopenharmony_ci bio_devname(bio, b), 3218c2ecf20Sopenharmony_ci (unsigned long long)bio->bi_iter.bi_sector); 3228c2ecf20Sopenharmony_ci *bio = *(mp_bh->master_bio); 3238c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector += 3248c2ecf20Sopenharmony_ci conf->multipaths[mp_bh->path].rdev->data_offset; 3258c2ecf20Sopenharmony_ci bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev); 3268c2ecf20Sopenharmony_ci bio->bi_opf |= REQ_FAILFAST_TRANSPORT; 3278c2ecf20Sopenharmony_ci bio->bi_end_io = multipath_end_request; 3288c2ecf20Sopenharmony_ci bio->bi_private = mp_bh; 3298c2ecf20Sopenharmony_ci submit_bio_noacct(bio); 3308c2ecf20Sopenharmony_ci } 3318c2ecf20Sopenharmony_ci } 3328c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 3338c2ecf20Sopenharmony_ci} 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_cistatic sector_t multipath_size(struct mddev *mddev, sector_t sectors, int raid_disks) 3368c2ecf20Sopenharmony_ci{ 3378c2ecf20Sopenharmony_ci WARN_ONCE(sectors || raid_disks, 3388c2ecf20Sopenharmony_ci "%s does not support generic reshape\n", __func__); 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci return mddev->dev_sectors; 3418c2ecf20Sopenharmony_ci} 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_cistatic int multipath_run (struct mddev *mddev) 3448c2ecf20Sopenharmony_ci{ 3458c2ecf20Sopenharmony_ci struct mpconf *conf; 3468c2ecf20Sopenharmony_ci int disk_idx; 3478c2ecf20Sopenharmony_ci struct multipath_info *disk; 3488c2ecf20Sopenharmony_ci struct md_rdev *rdev; 3498c2ecf20Sopenharmony_ci int working_disks; 3508c2ecf20Sopenharmony_ci int ret; 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci if (md_check_no_bitmap(mddev)) 3538c2ecf20Sopenharmony_ci return -EINVAL; 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci if (mddev->level != LEVEL_MULTIPATH) { 3568c2ecf20Sopenharmony_ci pr_warn("multipath: %s: raid level not set to multipath IO (%d)\n", 3578c2ecf20Sopenharmony_ci mdname(mddev), mddev->level); 3588c2ecf20Sopenharmony_ci goto out; 3598c2ecf20Sopenharmony_ci } 3608c2ecf20Sopenharmony_ci /* 3618c2ecf20Sopenharmony_ci * copy the already verified devices into our private MULTIPATH 3628c2ecf20Sopenharmony_ci * bookkeeping area. [whatever we allocate in multipath_run(), 3638c2ecf20Sopenharmony_ci * should be freed in multipath_free()] 3648c2ecf20Sopenharmony_ci */ 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL); 3678c2ecf20Sopenharmony_ci mddev->private = conf; 3688c2ecf20Sopenharmony_ci if (!conf) 3698c2ecf20Sopenharmony_ci goto out; 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_ci conf->multipaths = kcalloc(mddev->raid_disks, 3728c2ecf20Sopenharmony_ci sizeof(struct multipath_info), 3738c2ecf20Sopenharmony_ci GFP_KERNEL); 3748c2ecf20Sopenharmony_ci if (!conf->multipaths) 3758c2ecf20Sopenharmony_ci goto out_free_conf; 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci working_disks = 0; 3788c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) { 3798c2ecf20Sopenharmony_ci disk_idx = rdev->raid_disk; 3808c2ecf20Sopenharmony_ci if (disk_idx < 0 || 3818c2ecf20Sopenharmony_ci disk_idx >= mddev->raid_disks) 3828c2ecf20Sopenharmony_ci continue; 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci disk = conf->multipaths + disk_idx; 3858c2ecf20Sopenharmony_ci disk->rdev = rdev; 3868c2ecf20Sopenharmony_ci disk_stack_limits(mddev->gendisk, rdev->bdev, 3878c2ecf20Sopenharmony_ci rdev->data_offset << 9); 3888c2ecf20Sopenharmony_ci 3898c2ecf20Sopenharmony_ci if (!test_bit(Faulty, &rdev->flags)) 3908c2ecf20Sopenharmony_ci working_disks++; 3918c2ecf20Sopenharmony_ci } 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_ci conf->raid_disks = mddev->raid_disks; 3948c2ecf20Sopenharmony_ci conf->mddev = mddev; 3958c2ecf20Sopenharmony_ci spin_lock_init(&conf->device_lock); 3968c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->retry_list); 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci if (!working_disks) { 3998c2ecf20Sopenharmony_ci pr_warn("multipath: no operational IO paths for %s\n", 4008c2ecf20Sopenharmony_ci mdname(mddev)); 4018c2ecf20Sopenharmony_ci goto out_free_conf; 4028c2ecf20Sopenharmony_ci } 4038c2ecf20Sopenharmony_ci mddev->degraded = conf->raid_disks - working_disks; 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS, 4068c2ecf20Sopenharmony_ci sizeof(struct multipath_bh)); 4078c2ecf20Sopenharmony_ci if (ret) 4088c2ecf20Sopenharmony_ci goto out_free_conf; 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci mddev->thread = md_register_thread(multipathd, mddev, 4118c2ecf20Sopenharmony_ci "multipath"); 4128c2ecf20Sopenharmony_ci if (!mddev->thread) 4138c2ecf20Sopenharmony_ci goto out_free_conf; 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci pr_info("multipath: array %s active with %d out of %d IO paths\n", 4168c2ecf20Sopenharmony_ci mdname(mddev), conf->raid_disks - mddev->degraded, 4178c2ecf20Sopenharmony_ci mddev->raid_disks); 4188c2ecf20Sopenharmony_ci /* 4198c2ecf20Sopenharmony_ci * Ok, everything is just fine now 4208c2ecf20Sopenharmony_ci */ 4218c2ecf20Sopenharmony_ci md_set_array_sectors(mddev, multipath_size(mddev, 0, 0)); 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci if (md_integrity_register(mddev)) 4248c2ecf20Sopenharmony_ci goto out_free_conf; 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_ci return 0; 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ciout_free_conf: 4298c2ecf20Sopenharmony_ci mempool_exit(&conf->pool); 4308c2ecf20Sopenharmony_ci kfree(conf->multipaths); 4318c2ecf20Sopenharmony_ci kfree(conf); 4328c2ecf20Sopenharmony_ci mddev->private = NULL; 4338c2ecf20Sopenharmony_ciout: 4348c2ecf20Sopenharmony_ci return -EIO; 4358c2ecf20Sopenharmony_ci} 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_cistatic void multipath_free(struct mddev *mddev, void *priv) 4388c2ecf20Sopenharmony_ci{ 4398c2ecf20Sopenharmony_ci struct mpconf *conf = priv; 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci mempool_exit(&conf->pool); 4428c2ecf20Sopenharmony_ci kfree(conf->multipaths); 4438c2ecf20Sopenharmony_ci kfree(conf); 4448c2ecf20Sopenharmony_ci} 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_cistatic struct md_personality multipath_personality = 4478c2ecf20Sopenharmony_ci{ 4488c2ecf20Sopenharmony_ci .name = "multipath", 4498c2ecf20Sopenharmony_ci .level = LEVEL_MULTIPATH, 4508c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 4518c2ecf20Sopenharmony_ci .make_request = multipath_make_request, 4528c2ecf20Sopenharmony_ci .run = multipath_run, 4538c2ecf20Sopenharmony_ci .free = multipath_free, 4548c2ecf20Sopenharmony_ci .status = multipath_status, 4558c2ecf20Sopenharmony_ci .error_handler = multipath_error, 4568c2ecf20Sopenharmony_ci .hot_add_disk = multipath_add_disk, 4578c2ecf20Sopenharmony_ci .hot_remove_disk= multipath_remove_disk, 4588c2ecf20Sopenharmony_ci .size = multipath_size, 4598c2ecf20Sopenharmony_ci}; 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_cistatic int __init multipath_init (void) 4628c2ecf20Sopenharmony_ci{ 4638c2ecf20Sopenharmony_ci return register_md_personality (&multipath_personality); 4648c2ecf20Sopenharmony_ci} 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_cistatic void __exit multipath_exit (void) 4678c2ecf20Sopenharmony_ci{ 4688c2ecf20Sopenharmony_ci unregister_md_personality (&multipath_personality); 4698c2ecf20Sopenharmony_ci} 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_cimodule_init(multipath_init); 4728c2ecf20Sopenharmony_cimodule_exit(multipath_exit); 4738c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 4748c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("simple multi-path personality for MD"); 4758c2ecf20Sopenharmony_ciMODULE_ALIAS("md-personality-7"); /* MULTIPATH */ 4768c2ecf20Sopenharmony_ciMODULE_ALIAS("md-multipath"); 4778c2ecf20Sopenharmony_ciMODULE_ALIAS("md-level--4"); 478