18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * raid10.c : Multiple Devices driver for Linux 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2000-2004 Neil Brown 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * RAID-10 support for md. 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Base on code in raid1.c. See raid1.c for further copyright information. 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include <linux/slab.h> 138c2ecf20Sopenharmony_ci#include <linux/delay.h> 148c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 158c2ecf20Sopenharmony_ci#include <linux/module.h> 168c2ecf20Sopenharmony_ci#include <linux/seq_file.h> 178c2ecf20Sopenharmony_ci#include <linux/ratelimit.h> 188c2ecf20Sopenharmony_ci#include <linux/kthread.h> 198c2ecf20Sopenharmony_ci#include <linux/raid/md_p.h> 208c2ecf20Sopenharmony_ci#include <trace/events/block.h> 218c2ecf20Sopenharmony_ci#include "md.h" 228c2ecf20Sopenharmony_ci#include "raid10.h" 238c2ecf20Sopenharmony_ci#include "raid0.h" 248c2ecf20Sopenharmony_ci#include "md-bitmap.h" 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci/* 278c2ecf20Sopenharmony_ci * RAID10 provides a combination of RAID0 and RAID1 functionality. 288c2ecf20Sopenharmony_ci * The layout of data is defined by 298c2ecf20Sopenharmony_ci * chunk_size 308c2ecf20Sopenharmony_ci * raid_disks 318c2ecf20Sopenharmony_ci * near_copies (stored in low byte of layout) 328c2ecf20Sopenharmony_ci * far_copies (stored in second byte of layout) 338c2ecf20Sopenharmony_ci * far_offset (stored in bit 16 of layout ) 348c2ecf20Sopenharmony_ci * use_far_sets (stored in bit 17 of layout ) 358c2ecf20Sopenharmony_ci * use_far_sets_bugfixed (stored in bit 18 of layout ) 368c2ecf20Sopenharmony_ci * 378c2ecf20Sopenharmony_ci * The data to be stored is divided into chunks using chunksize. Each device 388c2ecf20Sopenharmony_ci * is divided into far_copies sections. In each section, chunks are laid out 398c2ecf20Sopenharmony_ci * in a style similar to raid0, but near_copies copies of each chunk is stored 408c2ecf20Sopenharmony_ci * (each on a different drive). The starting device for each section is offset 418c2ecf20Sopenharmony_ci * near_copies from the starting device of the previous section. Thus there 428c2ecf20Sopenharmony_ci * are (near_copies * far_copies) of each chunk, and each is on a different 438c2ecf20Sopenharmony_ci * drive. near_copies and far_copies must be at least one, and their product 448c2ecf20Sopenharmony_ci * is at most raid_disks. 458c2ecf20Sopenharmony_ci * 468c2ecf20Sopenharmony_ci * If far_offset is true, then the far_copies are handled a bit differently. 478c2ecf20Sopenharmony_ci * The copies are still in different stripes, but instead of being very far 488c2ecf20Sopenharmony_ci * apart on disk, there are adjacent stripes. 498c2ecf20Sopenharmony_ci * 508c2ecf20Sopenharmony_ci * The far and offset algorithms are handled slightly differently if 518c2ecf20Sopenharmony_ci * 'use_far_sets' is true. In this case, the array's devices are grouped into 528c2ecf20Sopenharmony_ci * sets that are (near_copies * far_copies) in size. The far copied stripes 538c2ecf20Sopenharmony_ci * are still shifted by 'near_copies' devices, but this shifting stays confined 548c2ecf20Sopenharmony_ci * to the set rather than the entire array. This is done to improve the number 558c2ecf20Sopenharmony_ci * of device combinations that can fail without causing the array to fail. 568c2ecf20Sopenharmony_ci * Example 'far' algorithm w/o 'use_far_sets' (each letter represents a chunk 578c2ecf20Sopenharmony_ci * on a device): 588c2ecf20Sopenharmony_ci * A B C D A B C D E 598c2ecf20Sopenharmony_ci * ... ... 608c2ecf20Sopenharmony_ci * D A B C E A B C D 618c2ecf20Sopenharmony_ci * Example 'far' algorithm w/ 'use_far_sets' enabled (sets illustrated w/ []'s): 628c2ecf20Sopenharmony_ci * [A B] [C D] [A B] [C D E] 638c2ecf20Sopenharmony_ci * |...| |...| |...| | ... | 648c2ecf20Sopenharmony_ci * [B A] [D C] [B A] [E C D] 658c2ecf20Sopenharmony_ci */ 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_cistatic void allow_barrier(struct r10conf *conf); 688c2ecf20Sopenharmony_cistatic void lower_barrier(struct r10conf *conf); 698c2ecf20Sopenharmony_cistatic int _enough(struct r10conf *conf, int previous, int ignore); 708c2ecf20Sopenharmony_cistatic int enough(struct r10conf *conf, int ignore); 718c2ecf20Sopenharmony_cistatic sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, 728c2ecf20Sopenharmony_ci int *skipped); 738c2ecf20Sopenharmony_cistatic void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); 748c2ecf20Sopenharmony_cistatic void end_reshape_write(struct bio *bio); 758c2ecf20Sopenharmony_cistatic void end_reshape(struct r10conf *conf); 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci#define raid10_log(md, fmt, args...) \ 788c2ecf20Sopenharmony_ci do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0) 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci#include "raid1-10.c" 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci/* 838c2ecf20Sopenharmony_ci * for resync bio, r10bio pointer can be retrieved from the per-bio 848c2ecf20Sopenharmony_ci * 'struct resync_pages'. 858c2ecf20Sopenharmony_ci */ 868c2ecf20Sopenharmony_cistatic inline struct r10bio *get_resync_r10bio(struct bio *bio) 878c2ecf20Sopenharmony_ci{ 888c2ecf20Sopenharmony_ci return get_resync_pages(bio)->raid_bio; 898c2ecf20Sopenharmony_ci} 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_cistatic void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) 928c2ecf20Sopenharmony_ci{ 938c2ecf20Sopenharmony_ci struct r10conf *conf = data; 948c2ecf20Sopenharmony_ci int size = offsetof(struct r10bio, devs[conf->copies]); 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci /* allocate a r10bio with room for raid_disks entries in the 978c2ecf20Sopenharmony_ci * bios array */ 988c2ecf20Sopenharmony_ci return kzalloc(size, gfp_flags); 998c2ecf20Sopenharmony_ci} 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9) 1028c2ecf20Sopenharmony_ci/* amount of memory to reserve for resync requests */ 1038c2ecf20Sopenharmony_ci#define RESYNC_WINDOW (1024*1024) 1048c2ecf20Sopenharmony_ci/* maximum number of concurrent requests, memory permitting */ 1058c2ecf20Sopenharmony_ci#define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE) 1068c2ecf20Sopenharmony_ci#define CLUSTER_RESYNC_WINDOW (32 * RESYNC_WINDOW) 1078c2ecf20Sopenharmony_ci#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9) 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci/* 1108c2ecf20Sopenharmony_ci * When performing a resync, we need to read and compare, so 1118c2ecf20Sopenharmony_ci * we need as many pages are there are copies. 1128c2ecf20Sopenharmony_ci * When performing a recovery, we need 2 bios, one for read, 1138c2ecf20Sopenharmony_ci * one for write (we recover only one drive per r10buf) 1148c2ecf20Sopenharmony_ci * 1158c2ecf20Sopenharmony_ci */ 1168c2ecf20Sopenharmony_cistatic void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) 1178c2ecf20Sopenharmony_ci{ 1188c2ecf20Sopenharmony_ci struct r10conf *conf = data; 1198c2ecf20Sopenharmony_ci struct r10bio *r10_bio; 1208c2ecf20Sopenharmony_ci struct bio *bio; 1218c2ecf20Sopenharmony_ci int j; 1228c2ecf20Sopenharmony_ci int nalloc, nalloc_rp; 1238c2ecf20Sopenharmony_ci struct resync_pages *rps; 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci r10_bio = r10bio_pool_alloc(gfp_flags, conf); 1268c2ecf20Sopenharmony_ci if (!r10_bio) 1278c2ecf20Sopenharmony_ci return NULL; 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery) || 1308c2ecf20Sopenharmony_ci test_bit(MD_RECOVERY_RESHAPE, &conf->mddev->recovery)) 1318c2ecf20Sopenharmony_ci nalloc = conf->copies; /* resync */ 1328c2ecf20Sopenharmony_ci else 1338c2ecf20Sopenharmony_ci nalloc = 2; /* recovery */ 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci /* allocate once for all bios */ 1368c2ecf20Sopenharmony_ci if (!conf->have_replacement) 1378c2ecf20Sopenharmony_ci nalloc_rp = nalloc; 1388c2ecf20Sopenharmony_ci else 1398c2ecf20Sopenharmony_ci nalloc_rp = nalloc * 2; 1408c2ecf20Sopenharmony_ci rps = kmalloc_array(nalloc_rp, sizeof(struct resync_pages), gfp_flags); 1418c2ecf20Sopenharmony_ci if (!rps) 1428c2ecf20Sopenharmony_ci goto out_free_r10bio; 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci /* 1458c2ecf20Sopenharmony_ci * Allocate bios. 1468c2ecf20Sopenharmony_ci */ 1478c2ecf20Sopenharmony_ci for (j = nalloc ; j-- ; ) { 1488c2ecf20Sopenharmony_ci bio = bio_kmalloc(gfp_flags, RESYNC_PAGES); 1498c2ecf20Sopenharmony_ci if (!bio) 1508c2ecf20Sopenharmony_ci goto out_free_bio; 1518c2ecf20Sopenharmony_ci r10_bio->devs[j].bio = bio; 1528c2ecf20Sopenharmony_ci if (!conf->have_replacement) 1538c2ecf20Sopenharmony_ci continue; 1548c2ecf20Sopenharmony_ci bio = bio_kmalloc(gfp_flags, RESYNC_PAGES); 1558c2ecf20Sopenharmony_ci if (!bio) 1568c2ecf20Sopenharmony_ci goto out_free_bio; 1578c2ecf20Sopenharmony_ci r10_bio->devs[j].repl_bio = bio; 1588c2ecf20Sopenharmony_ci } 1598c2ecf20Sopenharmony_ci /* 1608c2ecf20Sopenharmony_ci * Allocate RESYNC_PAGES data pages and attach them 1618c2ecf20Sopenharmony_ci * where needed. 1628c2ecf20Sopenharmony_ci */ 1638c2ecf20Sopenharmony_ci for (j = 0; j < nalloc; j++) { 1648c2ecf20Sopenharmony_ci struct bio *rbio = r10_bio->devs[j].repl_bio; 1658c2ecf20Sopenharmony_ci struct resync_pages *rp, *rp_repl; 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci rp = &rps[j]; 1688c2ecf20Sopenharmony_ci if (rbio) 1698c2ecf20Sopenharmony_ci rp_repl = &rps[nalloc + j]; 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci bio = r10_bio->devs[j].bio; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci if (!j || test_bit(MD_RECOVERY_SYNC, 1748c2ecf20Sopenharmony_ci &conf->mddev->recovery)) { 1758c2ecf20Sopenharmony_ci if (resync_alloc_pages(rp, gfp_flags)) 1768c2ecf20Sopenharmony_ci goto out_free_pages; 1778c2ecf20Sopenharmony_ci } else { 1788c2ecf20Sopenharmony_ci memcpy(rp, &rps[0], sizeof(*rp)); 1798c2ecf20Sopenharmony_ci resync_get_all_pages(rp); 1808c2ecf20Sopenharmony_ci } 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci rp->raid_bio = r10_bio; 1838c2ecf20Sopenharmony_ci bio->bi_private = rp; 1848c2ecf20Sopenharmony_ci if (rbio) { 1858c2ecf20Sopenharmony_ci memcpy(rp_repl, rp, sizeof(*rp)); 1868c2ecf20Sopenharmony_ci rbio->bi_private = rp_repl; 1878c2ecf20Sopenharmony_ci } 1888c2ecf20Sopenharmony_ci } 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci return r10_bio; 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ciout_free_pages: 1938c2ecf20Sopenharmony_ci while (--j >= 0) 1948c2ecf20Sopenharmony_ci resync_free_pages(&rps[j]); 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci j = 0; 1978c2ecf20Sopenharmony_ciout_free_bio: 1988c2ecf20Sopenharmony_ci for ( ; j < nalloc; j++) { 1998c2ecf20Sopenharmony_ci if (r10_bio->devs[j].bio) 2008c2ecf20Sopenharmony_ci bio_put(r10_bio->devs[j].bio); 2018c2ecf20Sopenharmony_ci if (r10_bio->devs[j].repl_bio) 2028c2ecf20Sopenharmony_ci bio_put(r10_bio->devs[j].repl_bio); 2038c2ecf20Sopenharmony_ci } 2048c2ecf20Sopenharmony_ci kfree(rps); 2058c2ecf20Sopenharmony_ciout_free_r10bio: 2068c2ecf20Sopenharmony_ci rbio_pool_free(r10_bio, conf); 2078c2ecf20Sopenharmony_ci return NULL; 2088c2ecf20Sopenharmony_ci} 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_cistatic void r10buf_pool_free(void *__r10_bio, void *data) 2118c2ecf20Sopenharmony_ci{ 2128c2ecf20Sopenharmony_ci struct r10conf *conf = data; 2138c2ecf20Sopenharmony_ci struct r10bio *r10bio = __r10_bio; 2148c2ecf20Sopenharmony_ci int j; 2158c2ecf20Sopenharmony_ci struct resync_pages *rp = NULL; 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci for (j = conf->copies; j--; ) { 2188c2ecf20Sopenharmony_ci struct bio *bio = r10bio->devs[j].bio; 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci if (bio) { 2218c2ecf20Sopenharmony_ci rp = get_resync_pages(bio); 2228c2ecf20Sopenharmony_ci resync_free_pages(rp); 2238c2ecf20Sopenharmony_ci bio_put(bio); 2248c2ecf20Sopenharmony_ci } 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci bio = r10bio->devs[j].repl_bio; 2278c2ecf20Sopenharmony_ci if (bio) 2288c2ecf20Sopenharmony_ci bio_put(bio); 2298c2ecf20Sopenharmony_ci } 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci /* resync pages array stored in the 1st bio's .bi_private */ 2328c2ecf20Sopenharmony_ci kfree(rp); 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci rbio_pool_free(r10bio, conf); 2358c2ecf20Sopenharmony_ci} 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_cistatic void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio) 2388c2ecf20Sopenharmony_ci{ 2398c2ecf20Sopenharmony_ci int i; 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci for (i = 0; i < conf->copies; i++) { 2428c2ecf20Sopenharmony_ci struct bio **bio = & r10_bio->devs[i].bio; 2438c2ecf20Sopenharmony_ci if (!BIO_SPECIAL(*bio)) 2448c2ecf20Sopenharmony_ci bio_put(*bio); 2458c2ecf20Sopenharmony_ci *bio = NULL; 2468c2ecf20Sopenharmony_ci bio = &r10_bio->devs[i].repl_bio; 2478c2ecf20Sopenharmony_ci if (r10_bio->read_slot < 0 && !BIO_SPECIAL(*bio)) 2488c2ecf20Sopenharmony_ci bio_put(*bio); 2498c2ecf20Sopenharmony_ci *bio = NULL; 2508c2ecf20Sopenharmony_ci } 2518c2ecf20Sopenharmony_ci} 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_cistatic void free_r10bio(struct r10bio *r10_bio) 2548c2ecf20Sopenharmony_ci{ 2558c2ecf20Sopenharmony_ci struct r10conf *conf = r10_bio->mddev->private; 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci put_all_bios(conf, r10_bio); 2588c2ecf20Sopenharmony_ci mempool_free(r10_bio, &conf->r10bio_pool); 2598c2ecf20Sopenharmony_ci} 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_cistatic void put_buf(struct r10bio *r10_bio) 2628c2ecf20Sopenharmony_ci{ 2638c2ecf20Sopenharmony_ci struct r10conf *conf = r10_bio->mddev->private; 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci mempool_free(r10_bio, &conf->r10buf_pool); 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci lower_barrier(conf); 2688c2ecf20Sopenharmony_ci} 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_cistatic void reschedule_retry(struct r10bio *r10_bio) 2718c2ecf20Sopenharmony_ci{ 2728c2ecf20Sopenharmony_ci unsigned long flags; 2738c2ecf20Sopenharmony_ci struct mddev *mddev = r10_bio->mddev; 2748c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 2778c2ecf20Sopenharmony_ci list_add(&r10_bio->retry_list, &conf->retry_list); 2788c2ecf20Sopenharmony_ci conf->nr_queued ++; 2798c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci /* wake up frozen array... */ 2828c2ecf20Sopenharmony_ci wake_up(&conf->wait_barrier); 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 2858c2ecf20Sopenharmony_ci} 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci/* 2888c2ecf20Sopenharmony_ci * raid_end_bio_io() is called when we have finished servicing a mirrored 2898c2ecf20Sopenharmony_ci * operation and are ready to return a success/failure code to the buffer 2908c2ecf20Sopenharmony_ci * cache layer. 2918c2ecf20Sopenharmony_ci */ 2928c2ecf20Sopenharmony_cistatic void raid_end_bio_io(struct r10bio *r10_bio) 2938c2ecf20Sopenharmony_ci{ 2948c2ecf20Sopenharmony_ci struct bio *bio = r10_bio->master_bio; 2958c2ecf20Sopenharmony_ci struct r10conf *conf = r10_bio->mddev->private; 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) 2988c2ecf20Sopenharmony_ci bio->bi_status = BLK_STS_IOERR; 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci bio_endio(bio); 3018c2ecf20Sopenharmony_ci /* 3028c2ecf20Sopenharmony_ci * Wake up any possible resync thread that waits for the device 3038c2ecf20Sopenharmony_ci * to go idle. 3048c2ecf20Sopenharmony_ci */ 3058c2ecf20Sopenharmony_ci allow_barrier(conf); 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_ci free_r10bio(r10_bio); 3088c2ecf20Sopenharmony_ci} 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci/* 3118c2ecf20Sopenharmony_ci * Update disk head position estimator based on IRQ completion info. 3128c2ecf20Sopenharmony_ci */ 3138c2ecf20Sopenharmony_cistatic inline void update_head_pos(int slot, struct r10bio *r10_bio) 3148c2ecf20Sopenharmony_ci{ 3158c2ecf20Sopenharmony_ci struct r10conf *conf = r10_bio->mddev->private; 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci conf->mirrors[r10_bio->devs[slot].devnum].head_position = 3188c2ecf20Sopenharmony_ci r10_bio->devs[slot].addr + (r10_bio->sectors); 3198c2ecf20Sopenharmony_ci} 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci/* 3228c2ecf20Sopenharmony_ci * Find the disk number which triggered given bio 3238c2ecf20Sopenharmony_ci */ 3248c2ecf20Sopenharmony_cistatic int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio, 3258c2ecf20Sopenharmony_ci struct bio *bio, int *slotp, int *replp) 3268c2ecf20Sopenharmony_ci{ 3278c2ecf20Sopenharmony_ci int slot; 3288c2ecf20Sopenharmony_ci int repl = 0; 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci for (slot = 0; slot < conf->copies; slot++) { 3318c2ecf20Sopenharmony_ci if (r10_bio->devs[slot].bio == bio) 3328c2ecf20Sopenharmony_ci break; 3338c2ecf20Sopenharmony_ci if (r10_bio->devs[slot].repl_bio == bio) { 3348c2ecf20Sopenharmony_ci repl = 1; 3358c2ecf20Sopenharmony_ci break; 3368c2ecf20Sopenharmony_ci } 3378c2ecf20Sopenharmony_ci } 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci BUG_ON(slot == conf->copies); 3408c2ecf20Sopenharmony_ci update_head_pos(slot, r10_bio); 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci if (slotp) 3438c2ecf20Sopenharmony_ci *slotp = slot; 3448c2ecf20Sopenharmony_ci if (replp) 3458c2ecf20Sopenharmony_ci *replp = repl; 3468c2ecf20Sopenharmony_ci return r10_bio->devs[slot].devnum; 3478c2ecf20Sopenharmony_ci} 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_cistatic void raid10_end_read_request(struct bio *bio) 3508c2ecf20Sopenharmony_ci{ 3518c2ecf20Sopenharmony_ci int uptodate = !bio->bi_status; 3528c2ecf20Sopenharmony_ci struct r10bio *r10_bio = bio->bi_private; 3538c2ecf20Sopenharmony_ci int slot; 3548c2ecf20Sopenharmony_ci struct md_rdev *rdev; 3558c2ecf20Sopenharmony_ci struct r10conf *conf = r10_bio->mddev->private; 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci slot = r10_bio->read_slot; 3588c2ecf20Sopenharmony_ci rdev = r10_bio->devs[slot].rdev; 3598c2ecf20Sopenharmony_ci /* 3608c2ecf20Sopenharmony_ci * this branch is our 'one mirror IO has finished' event handler: 3618c2ecf20Sopenharmony_ci */ 3628c2ecf20Sopenharmony_ci update_head_pos(slot, r10_bio); 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci if (uptodate) { 3658c2ecf20Sopenharmony_ci /* 3668c2ecf20Sopenharmony_ci * Set R10BIO_Uptodate in our master bio, so that 3678c2ecf20Sopenharmony_ci * we will return a good error code to the higher 3688c2ecf20Sopenharmony_ci * levels even if IO on some other mirrored buffer fails. 3698c2ecf20Sopenharmony_ci * 3708c2ecf20Sopenharmony_ci * The 'master' represents the composite IO operation to 3718c2ecf20Sopenharmony_ci * user-side. So if something waits for IO, then it will 3728c2ecf20Sopenharmony_ci * wait for the 'master' bio. 3738c2ecf20Sopenharmony_ci */ 3748c2ecf20Sopenharmony_ci set_bit(R10BIO_Uptodate, &r10_bio->state); 3758c2ecf20Sopenharmony_ci } else { 3768c2ecf20Sopenharmony_ci /* If all other devices that store this block have 3778c2ecf20Sopenharmony_ci * failed, we want to return the error upwards rather 3788c2ecf20Sopenharmony_ci * than fail the last device. Here we redefine 3798c2ecf20Sopenharmony_ci * "uptodate" to mean "Don't want to retry" 3808c2ecf20Sopenharmony_ci */ 3818c2ecf20Sopenharmony_ci if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state), 3828c2ecf20Sopenharmony_ci rdev->raid_disk)) 3838c2ecf20Sopenharmony_ci uptodate = 1; 3848c2ecf20Sopenharmony_ci } 3858c2ecf20Sopenharmony_ci if (uptodate) { 3868c2ecf20Sopenharmony_ci raid_end_bio_io(r10_bio); 3878c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 3888c2ecf20Sopenharmony_ci } else { 3898c2ecf20Sopenharmony_ci /* 3908c2ecf20Sopenharmony_ci * oops, read error - keep the refcount on the rdev 3918c2ecf20Sopenharmony_ci */ 3928c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 3938c2ecf20Sopenharmony_ci pr_err_ratelimited("md/raid10:%s: %s: rescheduling sector %llu\n", 3948c2ecf20Sopenharmony_ci mdname(conf->mddev), 3958c2ecf20Sopenharmony_ci bdevname(rdev->bdev, b), 3968c2ecf20Sopenharmony_ci (unsigned long long)r10_bio->sector); 3978c2ecf20Sopenharmony_ci set_bit(R10BIO_ReadError, &r10_bio->state); 3988c2ecf20Sopenharmony_ci reschedule_retry(r10_bio); 3998c2ecf20Sopenharmony_ci } 4008c2ecf20Sopenharmony_ci} 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_cistatic void close_write(struct r10bio *r10_bio) 4038c2ecf20Sopenharmony_ci{ 4048c2ecf20Sopenharmony_ci /* clear the bitmap if all writes complete successfully */ 4058c2ecf20Sopenharmony_ci md_bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, 4068c2ecf20Sopenharmony_ci r10_bio->sectors, 4078c2ecf20Sopenharmony_ci !test_bit(R10BIO_Degraded, &r10_bio->state), 4088c2ecf20Sopenharmony_ci 0); 4098c2ecf20Sopenharmony_ci md_write_end(r10_bio->mddev); 4108c2ecf20Sopenharmony_ci} 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_cistatic void one_write_done(struct r10bio *r10_bio) 4138c2ecf20Sopenharmony_ci{ 4148c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&r10_bio->remaining)) { 4158c2ecf20Sopenharmony_ci if (test_bit(R10BIO_WriteError, &r10_bio->state)) 4168c2ecf20Sopenharmony_ci reschedule_retry(r10_bio); 4178c2ecf20Sopenharmony_ci else { 4188c2ecf20Sopenharmony_ci close_write(r10_bio); 4198c2ecf20Sopenharmony_ci if (test_bit(R10BIO_MadeGood, &r10_bio->state)) 4208c2ecf20Sopenharmony_ci reschedule_retry(r10_bio); 4218c2ecf20Sopenharmony_ci else 4228c2ecf20Sopenharmony_ci raid_end_bio_io(r10_bio); 4238c2ecf20Sopenharmony_ci } 4248c2ecf20Sopenharmony_ci } 4258c2ecf20Sopenharmony_ci} 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_cistatic void raid10_end_write_request(struct bio *bio) 4288c2ecf20Sopenharmony_ci{ 4298c2ecf20Sopenharmony_ci struct r10bio *r10_bio = bio->bi_private; 4308c2ecf20Sopenharmony_ci int dev; 4318c2ecf20Sopenharmony_ci int dec_rdev = 1; 4328c2ecf20Sopenharmony_ci struct r10conf *conf = r10_bio->mddev->private; 4338c2ecf20Sopenharmony_ci int slot, repl; 4348c2ecf20Sopenharmony_ci struct md_rdev *rdev = NULL; 4358c2ecf20Sopenharmony_ci struct bio *to_put = NULL; 4368c2ecf20Sopenharmony_ci bool discard_error; 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_ci discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD; 4398c2ecf20Sopenharmony_ci 4408c2ecf20Sopenharmony_ci dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci if (repl) 4438c2ecf20Sopenharmony_ci rdev = conf->mirrors[dev].replacement; 4448c2ecf20Sopenharmony_ci if (!rdev) { 4458c2ecf20Sopenharmony_ci smp_rmb(); 4468c2ecf20Sopenharmony_ci repl = 0; 4478c2ecf20Sopenharmony_ci rdev = conf->mirrors[dev].rdev; 4488c2ecf20Sopenharmony_ci } 4498c2ecf20Sopenharmony_ci /* 4508c2ecf20Sopenharmony_ci * this branch is our 'one mirror IO has finished' event handler: 4518c2ecf20Sopenharmony_ci */ 4528c2ecf20Sopenharmony_ci if (bio->bi_status && !discard_error) { 4538c2ecf20Sopenharmony_ci if (repl) 4548c2ecf20Sopenharmony_ci /* Never record new bad blocks to replacement, 4558c2ecf20Sopenharmony_ci * just fail it. 4568c2ecf20Sopenharmony_ci */ 4578c2ecf20Sopenharmony_ci md_error(rdev->mddev, rdev); 4588c2ecf20Sopenharmony_ci else { 4598c2ecf20Sopenharmony_ci set_bit(WriteErrorSeen, &rdev->flags); 4608c2ecf20Sopenharmony_ci if (!test_and_set_bit(WantReplacement, &rdev->flags)) 4618c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_NEEDED, 4628c2ecf20Sopenharmony_ci &rdev->mddev->recovery); 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci dec_rdev = 0; 4658c2ecf20Sopenharmony_ci if (test_bit(FailFast, &rdev->flags) && 4668c2ecf20Sopenharmony_ci (bio->bi_opf & MD_FAILFAST)) { 4678c2ecf20Sopenharmony_ci md_error(rdev->mddev, rdev); 4688c2ecf20Sopenharmony_ci } 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci /* 4718c2ecf20Sopenharmony_ci * When the device is faulty, it is not necessary to 4728c2ecf20Sopenharmony_ci * handle write error. 4738c2ecf20Sopenharmony_ci */ 4748c2ecf20Sopenharmony_ci if (!test_bit(Faulty, &rdev->flags)) 4758c2ecf20Sopenharmony_ci set_bit(R10BIO_WriteError, &r10_bio->state); 4768c2ecf20Sopenharmony_ci else { 4778c2ecf20Sopenharmony_ci /* Fail the request */ 4788c2ecf20Sopenharmony_ci set_bit(R10BIO_Degraded, &r10_bio->state); 4798c2ecf20Sopenharmony_ci r10_bio->devs[slot].bio = NULL; 4808c2ecf20Sopenharmony_ci to_put = bio; 4818c2ecf20Sopenharmony_ci dec_rdev = 1; 4828c2ecf20Sopenharmony_ci } 4838c2ecf20Sopenharmony_ci } 4848c2ecf20Sopenharmony_ci } else { 4858c2ecf20Sopenharmony_ci /* 4868c2ecf20Sopenharmony_ci * Set R10BIO_Uptodate in our master bio, so that 4878c2ecf20Sopenharmony_ci * we will return a good error code for to the higher 4888c2ecf20Sopenharmony_ci * levels even if IO on some other mirrored buffer fails. 4898c2ecf20Sopenharmony_ci * 4908c2ecf20Sopenharmony_ci * The 'master' represents the composite IO operation to 4918c2ecf20Sopenharmony_ci * user-side. So if something waits for IO, then it will 4928c2ecf20Sopenharmony_ci * wait for the 'master' bio. 4938c2ecf20Sopenharmony_ci */ 4948c2ecf20Sopenharmony_ci sector_t first_bad; 4958c2ecf20Sopenharmony_ci int bad_sectors; 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci /* 4988c2ecf20Sopenharmony_ci * Do not set R10BIO_Uptodate if the current device is 4998c2ecf20Sopenharmony_ci * rebuilding or Faulty. This is because we cannot use 5008c2ecf20Sopenharmony_ci * such device for properly reading the data back (we could 5018c2ecf20Sopenharmony_ci * potentially use it, if the current write would have felt 5028c2ecf20Sopenharmony_ci * before rdev->recovery_offset, but for simplicity we don't 5038c2ecf20Sopenharmony_ci * check this here. 5048c2ecf20Sopenharmony_ci */ 5058c2ecf20Sopenharmony_ci if (test_bit(In_sync, &rdev->flags) && 5068c2ecf20Sopenharmony_ci !test_bit(Faulty, &rdev->flags)) 5078c2ecf20Sopenharmony_ci set_bit(R10BIO_Uptodate, &r10_bio->state); 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_ci /* Maybe we can clear some bad blocks. */ 5108c2ecf20Sopenharmony_ci if (is_badblock(rdev, 5118c2ecf20Sopenharmony_ci r10_bio->devs[slot].addr, 5128c2ecf20Sopenharmony_ci r10_bio->sectors, 5138c2ecf20Sopenharmony_ci &first_bad, &bad_sectors) && !discard_error) { 5148c2ecf20Sopenharmony_ci bio_put(bio); 5158c2ecf20Sopenharmony_ci if (repl) 5168c2ecf20Sopenharmony_ci r10_bio->devs[slot].repl_bio = IO_MADE_GOOD; 5178c2ecf20Sopenharmony_ci else 5188c2ecf20Sopenharmony_ci r10_bio->devs[slot].bio = IO_MADE_GOOD; 5198c2ecf20Sopenharmony_ci dec_rdev = 0; 5208c2ecf20Sopenharmony_ci set_bit(R10BIO_MadeGood, &r10_bio->state); 5218c2ecf20Sopenharmony_ci } 5228c2ecf20Sopenharmony_ci } 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci /* 5258c2ecf20Sopenharmony_ci * 5268c2ecf20Sopenharmony_ci * Let's see if all mirrored write operations have finished 5278c2ecf20Sopenharmony_ci * already. 5288c2ecf20Sopenharmony_ci */ 5298c2ecf20Sopenharmony_ci one_write_done(r10_bio); 5308c2ecf20Sopenharmony_ci if (dec_rdev) 5318c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 5328c2ecf20Sopenharmony_ci if (to_put) 5338c2ecf20Sopenharmony_ci bio_put(to_put); 5348c2ecf20Sopenharmony_ci} 5358c2ecf20Sopenharmony_ci 5368c2ecf20Sopenharmony_ci/* 5378c2ecf20Sopenharmony_ci * RAID10 layout manager 5388c2ecf20Sopenharmony_ci * As well as the chunksize and raid_disks count, there are two 5398c2ecf20Sopenharmony_ci * parameters: near_copies and far_copies. 5408c2ecf20Sopenharmony_ci * near_copies * far_copies must be <= raid_disks. 5418c2ecf20Sopenharmony_ci * Normally one of these will be 1. 5428c2ecf20Sopenharmony_ci * If both are 1, we get raid0. 5438c2ecf20Sopenharmony_ci * If near_copies == raid_disks, we get raid1. 5448c2ecf20Sopenharmony_ci * 5458c2ecf20Sopenharmony_ci * Chunks are laid out in raid0 style with near_copies copies of the 5468c2ecf20Sopenharmony_ci * first chunk, followed by near_copies copies of the next chunk and 5478c2ecf20Sopenharmony_ci * so on. 5488c2ecf20Sopenharmony_ci * If far_copies > 1, then after 1/far_copies of the array has been assigned 5498c2ecf20Sopenharmony_ci * as described above, we start again with a device offset of near_copies. 5508c2ecf20Sopenharmony_ci * So we effectively have another copy of the whole array further down all 5518c2ecf20Sopenharmony_ci * the drives, but with blocks on different drives. 5528c2ecf20Sopenharmony_ci * With this layout, and block is never stored twice on the one device. 5538c2ecf20Sopenharmony_ci * 5548c2ecf20Sopenharmony_ci * raid10_find_phys finds the sector offset of a given virtual sector 5558c2ecf20Sopenharmony_ci * on each device that it is on. 5568c2ecf20Sopenharmony_ci * 5578c2ecf20Sopenharmony_ci * raid10_find_virt does the reverse mapping, from a device and a 5588c2ecf20Sopenharmony_ci * sector offset to a virtual address 5598c2ecf20Sopenharmony_ci */ 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_cistatic void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio) 5628c2ecf20Sopenharmony_ci{ 5638c2ecf20Sopenharmony_ci int n,f; 5648c2ecf20Sopenharmony_ci sector_t sector; 5658c2ecf20Sopenharmony_ci sector_t chunk; 5668c2ecf20Sopenharmony_ci sector_t stripe; 5678c2ecf20Sopenharmony_ci int dev; 5688c2ecf20Sopenharmony_ci int slot = 0; 5698c2ecf20Sopenharmony_ci int last_far_set_start, last_far_set_size; 5708c2ecf20Sopenharmony_ci 5718c2ecf20Sopenharmony_ci last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1; 5728c2ecf20Sopenharmony_ci last_far_set_start *= geo->far_set_size; 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci last_far_set_size = geo->far_set_size; 5758c2ecf20Sopenharmony_ci last_far_set_size += (geo->raid_disks % geo->far_set_size); 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci /* now calculate first sector/dev */ 5788c2ecf20Sopenharmony_ci chunk = r10bio->sector >> geo->chunk_shift; 5798c2ecf20Sopenharmony_ci sector = r10bio->sector & geo->chunk_mask; 5808c2ecf20Sopenharmony_ci 5818c2ecf20Sopenharmony_ci chunk *= geo->near_copies; 5828c2ecf20Sopenharmony_ci stripe = chunk; 5838c2ecf20Sopenharmony_ci dev = sector_div(stripe, geo->raid_disks); 5848c2ecf20Sopenharmony_ci if (geo->far_offset) 5858c2ecf20Sopenharmony_ci stripe *= geo->far_copies; 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_ci sector += stripe << geo->chunk_shift; 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci /* and calculate all the others */ 5908c2ecf20Sopenharmony_ci for (n = 0; n < geo->near_copies; n++) { 5918c2ecf20Sopenharmony_ci int d = dev; 5928c2ecf20Sopenharmony_ci int set; 5938c2ecf20Sopenharmony_ci sector_t s = sector; 5948c2ecf20Sopenharmony_ci r10bio->devs[slot].devnum = d; 5958c2ecf20Sopenharmony_ci r10bio->devs[slot].addr = s; 5968c2ecf20Sopenharmony_ci slot++; 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ci for (f = 1; f < geo->far_copies; f++) { 5998c2ecf20Sopenharmony_ci set = d / geo->far_set_size; 6008c2ecf20Sopenharmony_ci d += geo->near_copies; 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci if ((geo->raid_disks % geo->far_set_size) && 6038c2ecf20Sopenharmony_ci (d > last_far_set_start)) { 6048c2ecf20Sopenharmony_ci d -= last_far_set_start; 6058c2ecf20Sopenharmony_ci d %= last_far_set_size; 6068c2ecf20Sopenharmony_ci d += last_far_set_start; 6078c2ecf20Sopenharmony_ci } else { 6088c2ecf20Sopenharmony_ci d %= geo->far_set_size; 6098c2ecf20Sopenharmony_ci d += geo->far_set_size * set; 6108c2ecf20Sopenharmony_ci } 6118c2ecf20Sopenharmony_ci s += geo->stride; 6128c2ecf20Sopenharmony_ci r10bio->devs[slot].devnum = d; 6138c2ecf20Sopenharmony_ci r10bio->devs[slot].addr = s; 6148c2ecf20Sopenharmony_ci slot++; 6158c2ecf20Sopenharmony_ci } 6168c2ecf20Sopenharmony_ci dev++; 6178c2ecf20Sopenharmony_ci if (dev >= geo->raid_disks) { 6188c2ecf20Sopenharmony_ci dev = 0; 6198c2ecf20Sopenharmony_ci sector += (geo->chunk_mask + 1); 6208c2ecf20Sopenharmony_ci } 6218c2ecf20Sopenharmony_ci } 6228c2ecf20Sopenharmony_ci} 6238c2ecf20Sopenharmony_ci 6248c2ecf20Sopenharmony_cistatic void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio) 6258c2ecf20Sopenharmony_ci{ 6268c2ecf20Sopenharmony_ci struct geom *geo = &conf->geo; 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci if (conf->reshape_progress != MaxSector && 6298c2ecf20Sopenharmony_ci ((r10bio->sector >= conf->reshape_progress) != 6308c2ecf20Sopenharmony_ci conf->mddev->reshape_backwards)) { 6318c2ecf20Sopenharmony_ci set_bit(R10BIO_Previous, &r10bio->state); 6328c2ecf20Sopenharmony_ci geo = &conf->prev; 6338c2ecf20Sopenharmony_ci } else 6348c2ecf20Sopenharmony_ci clear_bit(R10BIO_Previous, &r10bio->state); 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci __raid10_find_phys(geo, r10bio); 6378c2ecf20Sopenharmony_ci} 6388c2ecf20Sopenharmony_ci 6398c2ecf20Sopenharmony_cistatic sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) 6408c2ecf20Sopenharmony_ci{ 6418c2ecf20Sopenharmony_ci sector_t offset, chunk, vchunk; 6428c2ecf20Sopenharmony_ci /* Never use conf->prev as this is only called during resync 6438c2ecf20Sopenharmony_ci * or recovery, so reshape isn't happening 6448c2ecf20Sopenharmony_ci */ 6458c2ecf20Sopenharmony_ci struct geom *geo = &conf->geo; 6468c2ecf20Sopenharmony_ci int far_set_start = (dev / geo->far_set_size) * geo->far_set_size; 6478c2ecf20Sopenharmony_ci int far_set_size = geo->far_set_size; 6488c2ecf20Sopenharmony_ci int last_far_set_start; 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci if (geo->raid_disks % geo->far_set_size) { 6518c2ecf20Sopenharmony_ci last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1; 6528c2ecf20Sopenharmony_ci last_far_set_start *= geo->far_set_size; 6538c2ecf20Sopenharmony_ci 6548c2ecf20Sopenharmony_ci if (dev >= last_far_set_start) { 6558c2ecf20Sopenharmony_ci far_set_size = geo->far_set_size; 6568c2ecf20Sopenharmony_ci far_set_size += (geo->raid_disks % geo->far_set_size); 6578c2ecf20Sopenharmony_ci far_set_start = last_far_set_start; 6588c2ecf20Sopenharmony_ci } 6598c2ecf20Sopenharmony_ci } 6608c2ecf20Sopenharmony_ci 6618c2ecf20Sopenharmony_ci offset = sector & geo->chunk_mask; 6628c2ecf20Sopenharmony_ci if (geo->far_offset) { 6638c2ecf20Sopenharmony_ci int fc; 6648c2ecf20Sopenharmony_ci chunk = sector >> geo->chunk_shift; 6658c2ecf20Sopenharmony_ci fc = sector_div(chunk, geo->far_copies); 6668c2ecf20Sopenharmony_ci dev -= fc * geo->near_copies; 6678c2ecf20Sopenharmony_ci if (dev < far_set_start) 6688c2ecf20Sopenharmony_ci dev += far_set_size; 6698c2ecf20Sopenharmony_ci } else { 6708c2ecf20Sopenharmony_ci while (sector >= geo->stride) { 6718c2ecf20Sopenharmony_ci sector -= geo->stride; 6728c2ecf20Sopenharmony_ci if (dev < (geo->near_copies + far_set_start)) 6738c2ecf20Sopenharmony_ci dev += far_set_size - geo->near_copies; 6748c2ecf20Sopenharmony_ci else 6758c2ecf20Sopenharmony_ci dev -= geo->near_copies; 6768c2ecf20Sopenharmony_ci } 6778c2ecf20Sopenharmony_ci chunk = sector >> geo->chunk_shift; 6788c2ecf20Sopenharmony_ci } 6798c2ecf20Sopenharmony_ci vchunk = chunk * geo->raid_disks + dev; 6808c2ecf20Sopenharmony_ci sector_div(vchunk, geo->near_copies); 6818c2ecf20Sopenharmony_ci return (vchunk << geo->chunk_shift) + offset; 6828c2ecf20Sopenharmony_ci} 6838c2ecf20Sopenharmony_ci 6848c2ecf20Sopenharmony_ci/* 6858c2ecf20Sopenharmony_ci * This routine returns the disk from which the requested read should 6868c2ecf20Sopenharmony_ci * be done. There is a per-array 'next expected sequential IO' sector 6878c2ecf20Sopenharmony_ci * number - if this matches on the next IO then we use the last disk. 6888c2ecf20Sopenharmony_ci * There is also a per-disk 'last know head position' sector that is 6898c2ecf20Sopenharmony_ci * maintained from IRQ contexts, both the normal and the resync IO 6908c2ecf20Sopenharmony_ci * completion handlers update this position correctly. If there is no 6918c2ecf20Sopenharmony_ci * perfect sequential match then we pick the disk whose head is closest. 6928c2ecf20Sopenharmony_ci * 6938c2ecf20Sopenharmony_ci * If there are 2 mirrors in the same 2 devices, performance degrades 6948c2ecf20Sopenharmony_ci * because position is mirror, not device based. 6958c2ecf20Sopenharmony_ci * 6968c2ecf20Sopenharmony_ci * The rdev for the device selected will have nr_pending incremented. 6978c2ecf20Sopenharmony_ci */ 6988c2ecf20Sopenharmony_ci 6998c2ecf20Sopenharmony_ci/* 7008c2ecf20Sopenharmony_ci * FIXME: possibly should rethink readbalancing and do it differently 7018c2ecf20Sopenharmony_ci * depending on near_copies / far_copies geometry. 7028c2ecf20Sopenharmony_ci */ 7038c2ecf20Sopenharmony_cistatic struct md_rdev *read_balance(struct r10conf *conf, 7048c2ecf20Sopenharmony_ci struct r10bio *r10_bio, 7058c2ecf20Sopenharmony_ci int *max_sectors) 7068c2ecf20Sopenharmony_ci{ 7078c2ecf20Sopenharmony_ci const sector_t this_sector = r10_bio->sector; 7088c2ecf20Sopenharmony_ci int disk, slot; 7098c2ecf20Sopenharmony_ci int sectors = r10_bio->sectors; 7108c2ecf20Sopenharmony_ci int best_good_sectors; 7118c2ecf20Sopenharmony_ci sector_t new_distance, best_dist; 7128c2ecf20Sopenharmony_ci struct md_rdev *best_dist_rdev, *best_pending_rdev, *rdev = NULL; 7138c2ecf20Sopenharmony_ci int do_balance; 7148c2ecf20Sopenharmony_ci int best_dist_slot, best_pending_slot; 7158c2ecf20Sopenharmony_ci bool has_nonrot_disk = false; 7168c2ecf20Sopenharmony_ci unsigned int min_pending; 7178c2ecf20Sopenharmony_ci struct geom *geo = &conf->geo; 7188c2ecf20Sopenharmony_ci 7198c2ecf20Sopenharmony_ci raid10_find_phys(conf, r10_bio); 7208c2ecf20Sopenharmony_ci rcu_read_lock(); 7218c2ecf20Sopenharmony_ci best_dist_slot = -1; 7228c2ecf20Sopenharmony_ci min_pending = UINT_MAX; 7238c2ecf20Sopenharmony_ci best_dist_rdev = NULL; 7248c2ecf20Sopenharmony_ci best_pending_rdev = NULL; 7258c2ecf20Sopenharmony_ci best_dist = MaxSector; 7268c2ecf20Sopenharmony_ci best_good_sectors = 0; 7278c2ecf20Sopenharmony_ci do_balance = 1; 7288c2ecf20Sopenharmony_ci clear_bit(R10BIO_FailFast, &r10_bio->state); 7298c2ecf20Sopenharmony_ci /* 7308c2ecf20Sopenharmony_ci * Check if we can balance. We can balance on the whole 7318c2ecf20Sopenharmony_ci * device if no resync is going on (recovery is ok), or below 7328c2ecf20Sopenharmony_ci * the resync window. We take the first readable disk when 7338c2ecf20Sopenharmony_ci * above the resync window. 7348c2ecf20Sopenharmony_ci */ 7358c2ecf20Sopenharmony_ci if ((conf->mddev->recovery_cp < MaxSector 7368c2ecf20Sopenharmony_ci && (this_sector + sectors >= conf->next_resync)) || 7378c2ecf20Sopenharmony_ci (mddev_is_clustered(conf->mddev) && 7388c2ecf20Sopenharmony_ci md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector, 7398c2ecf20Sopenharmony_ci this_sector + sectors))) 7408c2ecf20Sopenharmony_ci do_balance = 0; 7418c2ecf20Sopenharmony_ci 7428c2ecf20Sopenharmony_ci for (slot = 0; slot < conf->copies ; slot++) { 7438c2ecf20Sopenharmony_ci sector_t first_bad; 7448c2ecf20Sopenharmony_ci int bad_sectors; 7458c2ecf20Sopenharmony_ci sector_t dev_sector; 7468c2ecf20Sopenharmony_ci unsigned int pending; 7478c2ecf20Sopenharmony_ci bool nonrot; 7488c2ecf20Sopenharmony_ci 7498c2ecf20Sopenharmony_ci if (r10_bio->devs[slot].bio == IO_BLOCKED) 7508c2ecf20Sopenharmony_ci continue; 7518c2ecf20Sopenharmony_ci disk = r10_bio->devs[slot].devnum; 7528c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->mirrors[disk].replacement); 7538c2ecf20Sopenharmony_ci if (rdev == NULL || test_bit(Faulty, &rdev->flags) || 7548c2ecf20Sopenharmony_ci r10_bio->devs[slot].addr + sectors > 7558c2ecf20Sopenharmony_ci rdev->recovery_offset) { 7568c2ecf20Sopenharmony_ci /* 7578c2ecf20Sopenharmony_ci * Read replacement first to prevent reading both rdev 7588c2ecf20Sopenharmony_ci * and replacement as NULL during replacement replace 7598c2ecf20Sopenharmony_ci * rdev. 7608c2ecf20Sopenharmony_ci */ 7618c2ecf20Sopenharmony_ci smp_mb(); 7628c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->mirrors[disk].rdev); 7638c2ecf20Sopenharmony_ci } 7648c2ecf20Sopenharmony_ci if (rdev == NULL || 7658c2ecf20Sopenharmony_ci test_bit(Faulty, &rdev->flags)) 7668c2ecf20Sopenharmony_ci continue; 7678c2ecf20Sopenharmony_ci if (!test_bit(In_sync, &rdev->flags) && 7688c2ecf20Sopenharmony_ci r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) 7698c2ecf20Sopenharmony_ci continue; 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci dev_sector = r10_bio->devs[slot].addr; 7728c2ecf20Sopenharmony_ci if (is_badblock(rdev, dev_sector, sectors, 7738c2ecf20Sopenharmony_ci &first_bad, &bad_sectors)) { 7748c2ecf20Sopenharmony_ci if (best_dist < MaxSector) 7758c2ecf20Sopenharmony_ci /* Already have a better slot */ 7768c2ecf20Sopenharmony_ci continue; 7778c2ecf20Sopenharmony_ci if (first_bad <= dev_sector) { 7788c2ecf20Sopenharmony_ci /* Cannot read here. If this is the 7798c2ecf20Sopenharmony_ci * 'primary' device, then we must not read 7808c2ecf20Sopenharmony_ci * beyond 'bad_sectors' from another device. 7818c2ecf20Sopenharmony_ci */ 7828c2ecf20Sopenharmony_ci bad_sectors -= (dev_sector - first_bad); 7838c2ecf20Sopenharmony_ci if (!do_balance && sectors > bad_sectors) 7848c2ecf20Sopenharmony_ci sectors = bad_sectors; 7858c2ecf20Sopenharmony_ci if (best_good_sectors > sectors) 7868c2ecf20Sopenharmony_ci best_good_sectors = sectors; 7878c2ecf20Sopenharmony_ci } else { 7888c2ecf20Sopenharmony_ci sector_t good_sectors = 7898c2ecf20Sopenharmony_ci first_bad - dev_sector; 7908c2ecf20Sopenharmony_ci if (good_sectors > best_good_sectors) { 7918c2ecf20Sopenharmony_ci best_good_sectors = good_sectors; 7928c2ecf20Sopenharmony_ci best_dist_slot = slot; 7938c2ecf20Sopenharmony_ci best_dist_rdev = rdev; 7948c2ecf20Sopenharmony_ci } 7958c2ecf20Sopenharmony_ci if (!do_balance) 7968c2ecf20Sopenharmony_ci /* Must read from here */ 7978c2ecf20Sopenharmony_ci break; 7988c2ecf20Sopenharmony_ci } 7998c2ecf20Sopenharmony_ci continue; 8008c2ecf20Sopenharmony_ci } else 8018c2ecf20Sopenharmony_ci best_good_sectors = sectors; 8028c2ecf20Sopenharmony_ci 8038c2ecf20Sopenharmony_ci if (!do_balance) 8048c2ecf20Sopenharmony_ci break; 8058c2ecf20Sopenharmony_ci 8068c2ecf20Sopenharmony_ci nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); 8078c2ecf20Sopenharmony_ci has_nonrot_disk |= nonrot; 8088c2ecf20Sopenharmony_ci pending = atomic_read(&rdev->nr_pending); 8098c2ecf20Sopenharmony_ci if (min_pending > pending && nonrot) { 8108c2ecf20Sopenharmony_ci min_pending = pending; 8118c2ecf20Sopenharmony_ci best_pending_slot = slot; 8128c2ecf20Sopenharmony_ci best_pending_rdev = rdev; 8138c2ecf20Sopenharmony_ci } 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci if (best_dist_slot >= 0) 8168c2ecf20Sopenharmony_ci /* At least 2 disks to choose from so failfast is OK */ 8178c2ecf20Sopenharmony_ci set_bit(R10BIO_FailFast, &r10_bio->state); 8188c2ecf20Sopenharmony_ci /* This optimisation is debatable, and completely destroys 8198c2ecf20Sopenharmony_ci * sequential read speed for 'far copies' arrays. So only 8208c2ecf20Sopenharmony_ci * keep it for 'near' arrays, and review those later. 8218c2ecf20Sopenharmony_ci */ 8228c2ecf20Sopenharmony_ci if (geo->near_copies > 1 && !pending) 8238c2ecf20Sopenharmony_ci new_distance = 0; 8248c2ecf20Sopenharmony_ci 8258c2ecf20Sopenharmony_ci /* for far > 1 always use the lowest address */ 8268c2ecf20Sopenharmony_ci else if (geo->far_copies > 1) 8278c2ecf20Sopenharmony_ci new_distance = r10_bio->devs[slot].addr; 8288c2ecf20Sopenharmony_ci else 8298c2ecf20Sopenharmony_ci new_distance = abs(r10_bio->devs[slot].addr - 8308c2ecf20Sopenharmony_ci conf->mirrors[disk].head_position); 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_ci if (new_distance < best_dist) { 8338c2ecf20Sopenharmony_ci best_dist = new_distance; 8348c2ecf20Sopenharmony_ci best_dist_slot = slot; 8358c2ecf20Sopenharmony_ci best_dist_rdev = rdev; 8368c2ecf20Sopenharmony_ci } 8378c2ecf20Sopenharmony_ci } 8388c2ecf20Sopenharmony_ci if (slot >= conf->copies) { 8398c2ecf20Sopenharmony_ci if (has_nonrot_disk) { 8408c2ecf20Sopenharmony_ci slot = best_pending_slot; 8418c2ecf20Sopenharmony_ci rdev = best_pending_rdev; 8428c2ecf20Sopenharmony_ci } else { 8438c2ecf20Sopenharmony_ci slot = best_dist_slot; 8448c2ecf20Sopenharmony_ci rdev = best_dist_rdev; 8458c2ecf20Sopenharmony_ci } 8468c2ecf20Sopenharmony_ci } 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci if (slot >= 0) { 8498c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 8508c2ecf20Sopenharmony_ci r10_bio->read_slot = slot; 8518c2ecf20Sopenharmony_ci } else 8528c2ecf20Sopenharmony_ci rdev = NULL; 8538c2ecf20Sopenharmony_ci rcu_read_unlock(); 8548c2ecf20Sopenharmony_ci *max_sectors = best_good_sectors; 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci return rdev; 8578c2ecf20Sopenharmony_ci} 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_cistatic void flush_pending_writes(struct r10conf *conf) 8608c2ecf20Sopenharmony_ci{ 8618c2ecf20Sopenharmony_ci /* Any writes that have been queued but are awaiting 8628c2ecf20Sopenharmony_ci * bitmap updates get flushed here. 8638c2ecf20Sopenharmony_ci */ 8648c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci if (conf->pending_bio_list.head) { 8678c2ecf20Sopenharmony_ci struct blk_plug plug; 8688c2ecf20Sopenharmony_ci struct bio *bio; 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_ci bio = bio_list_get(&conf->pending_bio_list); 8718c2ecf20Sopenharmony_ci conf->pending_count = 0; 8728c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci /* 8758c2ecf20Sopenharmony_ci * As this is called in a wait_event() loop (see freeze_array), 8768c2ecf20Sopenharmony_ci * current->state might be TASK_UNINTERRUPTIBLE which will 8778c2ecf20Sopenharmony_ci * cause a warning when we prepare to wait again. As it is 8788c2ecf20Sopenharmony_ci * rare that this path is taken, it is perfectly safe to force 8798c2ecf20Sopenharmony_ci * us to go around the wait_event() loop again, so the warning 8808c2ecf20Sopenharmony_ci * is a false-positive. Silence the warning by resetting 8818c2ecf20Sopenharmony_ci * thread state 8828c2ecf20Sopenharmony_ci */ 8838c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci blk_start_plug(&plug); 8868c2ecf20Sopenharmony_ci /* flush any pending bitmap writes to disk 8878c2ecf20Sopenharmony_ci * before proceeding w/ I/O */ 8888c2ecf20Sopenharmony_ci md_bitmap_unplug(conf->mddev->bitmap); 8898c2ecf20Sopenharmony_ci wake_up(&conf->wait_barrier); 8908c2ecf20Sopenharmony_ci 8918c2ecf20Sopenharmony_ci while (bio) { /* submit pending writes */ 8928c2ecf20Sopenharmony_ci struct bio *next = bio->bi_next; 8938c2ecf20Sopenharmony_ci struct md_rdev *rdev = (void*)bio->bi_disk; 8948c2ecf20Sopenharmony_ci bio->bi_next = NULL; 8958c2ecf20Sopenharmony_ci bio_set_dev(bio, rdev->bdev); 8968c2ecf20Sopenharmony_ci if (test_bit(Faulty, &rdev->flags)) { 8978c2ecf20Sopenharmony_ci bio_io_error(bio); 8988c2ecf20Sopenharmony_ci } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 8998c2ecf20Sopenharmony_ci !blk_queue_discard(bio->bi_disk->queue))) 9008c2ecf20Sopenharmony_ci /* Just ignore it */ 9018c2ecf20Sopenharmony_ci bio_endio(bio); 9028c2ecf20Sopenharmony_ci else 9038c2ecf20Sopenharmony_ci submit_bio_noacct(bio); 9048c2ecf20Sopenharmony_ci bio = next; 9058c2ecf20Sopenharmony_ci cond_resched(); 9068c2ecf20Sopenharmony_ci } 9078c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 9088c2ecf20Sopenharmony_ci } else 9098c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 9108c2ecf20Sopenharmony_ci} 9118c2ecf20Sopenharmony_ci 9128c2ecf20Sopenharmony_ci/* Barriers.... 9138c2ecf20Sopenharmony_ci * Sometimes we need to suspend IO while we do something else, 9148c2ecf20Sopenharmony_ci * either some resync/recovery, or reconfigure the array. 9158c2ecf20Sopenharmony_ci * To do this we raise a 'barrier'. 9168c2ecf20Sopenharmony_ci * The 'barrier' is a counter that can be raised multiple times 9178c2ecf20Sopenharmony_ci * to count how many activities are happening which preclude 9188c2ecf20Sopenharmony_ci * normal IO. 9198c2ecf20Sopenharmony_ci * We can only raise the barrier if there is no pending IO. 9208c2ecf20Sopenharmony_ci * i.e. if nr_pending == 0. 9218c2ecf20Sopenharmony_ci * We choose only to raise the barrier if no-one is waiting for the 9228c2ecf20Sopenharmony_ci * barrier to go down. This means that as soon as an IO request 9238c2ecf20Sopenharmony_ci * is ready, no other operations which require a barrier will start 9248c2ecf20Sopenharmony_ci * until the IO request has had a chance. 9258c2ecf20Sopenharmony_ci * 9268c2ecf20Sopenharmony_ci * So: regular IO calls 'wait_barrier'. When that returns there 9278c2ecf20Sopenharmony_ci * is no backgroup IO happening, It must arrange to call 9288c2ecf20Sopenharmony_ci * allow_barrier when it has finished its IO. 9298c2ecf20Sopenharmony_ci * backgroup IO calls must call raise_barrier. Once that returns 9308c2ecf20Sopenharmony_ci * there is no normal IO happeing. It must arrange to call 9318c2ecf20Sopenharmony_ci * lower_barrier when the particular background IO completes. 9328c2ecf20Sopenharmony_ci */ 9338c2ecf20Sopenharmony_ci 9348c2ecf20Sopenharmony_cistatic void raise_barrier(struct r10conf *conf, int force) 9358c2ecf20Sopenharmony_ci{ 9368c2ecf20Sopenharmony_ci BUG_ON(force && !conf->barrier); 9378c2ecf20Sopenharmony_ci spin_lock_irq(&conf->resync_lock); 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci /* Wait until no block IO is waiting (unless 'force') */ 9408c2ecf20Sopenharmony_ci wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, 9418c2ecf20Sopenharmony_ci conf->resync_lock); 9428c2ecf20Sopenharmony_ci 9438c2ecf20Sopenharmony_ci /* block any new IO from starting */ 9448c2ecf20Sopenharmony_ci conf->barrier++; 9458c2ecf20Sopenharmony_ci 9468c2ecf20Sopenharmony_ci /* Now wait for all pending IO to complete */ 9478c2ecf20Sopenharmony_ci wait_event_lock_irq(conf->wait_barrier, 9488c2ecf20Sopenharmony_ci !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH, 9498c2ecf20Sopenharmony_ci conf->resync_lock); 9508c2ecf20Sopenharmony_ci 9518c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->resync_lock); 9528c2ecf20Sopenharmony_ci} 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_cistatic void lower_barrier(struct r10conf *conf) 9558c2ecf20Sopenharmony_ci{ 9568c2ecf20Sopenharmony_ci unsigned long flags; 9578c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->resync_lock, flags); 9588c2ecf20Sopenharmony_ci conf->barrier--; 9598c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->resync_lock, flags); 9608c2ecf20Sopenharmony_ci wake_up(&conf->wait_barrier); 9618c2ecf20Sopenharmony_ci} 9628c2ecf20Sopenharmony_ci 9638c2ecf20Sopenharmony_cistatic void wait_barrier(struct r10conf *conf) 9648c2ecf20Sopenharmony_ci{ 9658c2ecf20Sopenharmony_ci spin_lock_irq(&conf->resync_lock); 9668c2ecf20Sopenharmony_ci if (conf->barrier) { 9678c2ecf20Sopenharmony_ci struct bio_list *bio_list = current->bio_list; 9688c2ecf20Sopenharmony_ci conf->nr_waiting++; 9698c2ecf20Sopenharmony_ci /* Wait for the barrier to drop. 9708c2ecf20Sopenharmony_ci * However if there are already pending 9718c2ecf20Sopenharmony_ci * requests (preventing the barrier from 9728c2ecf20Sopenharmony_ci * rising completely), and the 9738c2ecf20Sopenharmony_ci * pre-process bio queue isn't empty, 9748c2ecf20Sopenharmony_ci * then don't wait, as we need to empty 9758c2ecf20Sopenharmony_ci * that queue to get the nr_pending 9768c2ecf20Sopenharmony_ci * count down. 9778c2ecf20Sopenharmony_ci */ 9788c2ecf20Sopenharmony_ci raid10_log(conf->mddev, "wait barrier"); 9798c2ecf20Sopenharmony_ci wait_event_lock_irq(conf->wait_barrier, 9808c2ecf20Sopenharmony_ci !conf->barrier || 9818c2ecf20Sopenharmony_ci (atomic_read(&conf->nr_pending) && 9828c2ecf20Sopenharmony_ci bio_list && 9838c2ecf20Sopenharmony_ci (!bio_list_empty(&bio_list[0]) || 9848c2ecf20Sopenharmony_ci !bio_list_empty(&bio_list[1]))) || 9858c2ecf20Sopenharmony_ci /* move on if recovery thread is 9868c2ecf20Sopenharmony_ci * blocked by us 9878c2ecf20Sopenharmony_ci */ 9888c2ecf20Sopenharmony_ci (conf->mddev->thread->tsk == current && 9898c2ecf20Sopenharmony_ci test_bit(MD_RECOVERY_RUNNING, 9908c2ecf20Sopenharmony_ci &conf->mddev->recovery) && 9918c2ecf20Sopenharmony_ci conf->nr_queued > 0), 9928c2ecf20Sopenharmony_ci conf->resync_lock); 9938c2ecf20Sopenharmony_ci conf->nr_waiting--; 9948c2ecf20Sopenharmony_ci if (!conf->nr_waiting) 9958c2ecf20Sopenharmony_ci wake_up(&conf->wait_barrier); 9968c2ecf20Sopenharmony_ci } 9978c2ecf20Sopenharmony_ci atomic_inc(&conf->nr_pending); 9988c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->resync_lock); 9998c2ecf20Sopenharmony_ci} 10008c2ecf20Sopenharmony_ci 10018c2ecf20Sopenharmony_cistatic void allow_barrier(struct r10conf *conf) 10028c2ecf20Sopenharmony_ci{ 10038c2ecf20Sopenharmony_ci if ((atomic_dec_and_test(&conf->nr_pending)) || 10048c2ecf20Sopenharmony_ci (conf->array_freeze_pending)) 10058c2ecf20Sopenharmony_ci wake_up(&conf->wait_barrier); 10068c2ecf20Sopenharmony_ci} 10078c2ecf20Sopenharmony_ci 10088c2ecf20Sopenharmony_cistatic void freeze_array(struct r10conf *conf, int extra) 10098c2ecf20Sopenharmony_ci{ 10108c2ecf20Sopenharmony_ci /* stop syncio and normal IO and wait for everything to 10118c2ecf20Sopenharmony_ci * go quiet. 10128c2ecf20Sopenharmony_ci * We increment barrier and nr_waiting, and then 10138c2ecf20Sopenharmony_ci * wait until nr_pending match nr_queued+extra 10148c2ecf20Sopenharmony_ci * This is called in the context of one normal IO request 10158c2ecf20Sopenharmony_ci * that has failed. Thus any sync request that might be pending 10168c2ecf20Sopenharmony_ci * will be blocked by nr_pending, and we need to wait for 10178c2ecf20Sopenharmony_ci * pending IO requests to complete or be queued for re-try. 10188c2ecf20Sopenharmony_ci * Thus the number queued (nr_queued) plus this request (extra) 10198c2ecf20Sopenharmony_ci * must match the number of pending IOs (nr_pending) before 10208c2ecf20Sopenharmony_ci * we continue. 10218c2ecf20Sopenharmony_ci */ 10228c2ecf20Sopenharmony_ci spin_lock_irq(&conf->resync_lock); 10238c2ecf20Sopenharmony_ci conf->array_freeze_pending++; 10248c2ecf20Sopenharmony_ci conf->barrier++; 10258c2ecf20Sopenharmony_ci conf->nr_waiting++; 10268c2ecf20Sopenharmony_ci wait_event_lock_irq_cmd(conf->wait_barrier, 10278c2ecf20Sopenharmony_ci atomic_read(&conf->nr_pending) == conf->nr_queued+extra, 10288c2ecf20Sopenharmony_ci conf->resync_lock, 10298c2ecf20Sopenharmony_ci flush_pending_writes(conf)); 10308c2ecf20Sopenharmony_ci 10318c2ecf20Sopenharmony_ci conf->array_freeze_pending--; 10328c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->resync_lock); 10338c2ecf20Sopenharmony_ci} 10348c2ecf20Sopenharmony_ci 10358c2ecf20Sopenharmony_cistatic void unfreeze_array(struct r10conf *conf) 10368c2ecf20Sopenharmony_ci{ 10378c2ecf20Sopenharmony_ci /* reverse the effect of the freeze */ 10388c2ecf20Sopenharmony_ci spin_lock_irq(&conf->resync_lock); 10398c2ecf20Sopenharmony_ci conf->barrier--; 10408c2ecf20Sopenharmony_ci conf->nr_waiting--; 10418c2ecf20Sopenharmony_ci wake_up(&conf->wait_barrier); 10428c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->resync_lock); 10438c2ecf20Sopenharmony_ci} 10448c2ecf20Sopenharmony_ci 10458c2ecf20Sopenharmony_cistatic sector_t choose_data_offset(struct r10bio *r10_bio, 10468c2ecf20Sopenharmony_ci struct md_rdev *rdev) 10478c2ecf20Sopenharmony_ci{ 10488c2ecf20Sopenharmony_ci if (!test_bit(MD_RECOVERY_RESHAPE, &rdev->mddev->recovery) || 10498c2ecf20Sopenharmony_ci test_bit(R10BIO_Previous, &r10_bio->state)) 10508c2ecf20Sopenharmony_ci return rdev->data_offset; 10518c2ecf20Sopenharmony_ci else 10528c2ecf20Sopenharmony_ci return rdev->new_data_offset; 10538c2ecf20Sopenharmony_ci} 10548c2ecf20Sopenharmony_ci 10558c2ecf20Sopenharmony_cistruct raid10_plug_cb { 10568c2ecf20Sopenharmony_ci struct blk_plug_cb cb; 10578c2ecf20Sopenharmony_ci struct bio_list pending; 10588c2ecf20Sopenharmony_ci int pending_cnt; 10598c2ecf20Sopenharmony_ci}; 10608c2ecf20Sopenharmony_ci 10618c2ecf20Sopenharmony_cistatic void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) 10628c2ecf20Sopenharmony_ci{ 10638c2ecf20Sopenharmony_ci struct raid10_plug_cb *plug = container_of(cb, struct raid10_plug_cb, 10648c2ecf20Sopenharmony_ci cb); 10658c2ecf20Sopenharmony_ci struct mddev *mddev = plug->cb.data; 10668c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 10678c2ecf20Sopenharmony_ci struct bio *bio; 10688c2ecf20Sopenharmony_ci 10698c2ecf20Sopenharmony_ci if (from_schedule || current->bio_list) { 10708c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 10718c2ecf20Sopenharmony_ci bio_list_merge(&conf->pending_bio_list, &plug->pending); 10728c2ecf20Sopenharmony_ci conf->pending_count += plug->pending_cnt; 10738c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 10748c2ecf20Sopenharmony_ci wake_up(&conf->wait_barrier); 10758c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 10768c2ecf20Sopenharmony_ci kfree(plug); 10778c2ecf20Sopenharmony_ci return; 10788c2ecf20Sopenharmony_ci } 10798c2ecf20Sopenharmony_ci 10808c2ecf20Sopenharmony_ci /* we aren't scheduling, so we can do the write-out directly. */ 10818c2ecf20Sopenharmony_ci bio = bio_list_get(&plug->pending); 10828c2ecf20Sopenharmony_ci md_bitmap_unplug(mddev->bitmap); 10838c2ecf20Sopenharmony_ci wake_up(&conf->wait_barrier); 10848c2ecf20Sopenharmony_ci 10858c2ecf20Sopenharmony_ci while (bio) { /* submit pending writes */ 10868c2ecf20Sopenharmony_ci struct bio *next = bio->bi_next; 10878c2ecf20Sopenharmony_ci struct md_rdev *rdev = (void*)bio->bi_disk; 10888c2ecf20Sopenharmony_ci bio->bi_next = NULL; 10898c2ecf20Sopenharmony_ci bio_set_dev(bio, rdev->bdev); 10908c2ecf20Sopenharmony_ci if (test_bit(Faulty, &rdev->flags)) { 10918c2ecf20Sopenharmony_ci bio_io_error(bio); 10928c2ecf20Sopenharmony_ci } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 10938c2ecf20Sopenharmony_ci !blk_queue_discard(bio->bi_disk->queue))) 10948c2ecf20Sopenharmony_ci /* Just ignore it */ 10958c2ecf20Sopenharmony_ci bio_endio(bio); 10968c2ecf20Sopenharmony_ci else 10978c2ecf20Sopenharmony_ci submit_bio_noacct(bio); 10988c2ecf20Sopenharmony_ci bio = next; 10998c2ecf20Sopenharmony_ci cond_resched(); 11008c2ecf20Sopenharmony_ci } 11018c2ecf20Sopenharmony_ci kfree(plug); 11028c2ecf20Sopenharmony_ci} 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_ci/* 11058c2ecf20Sopenharmony_ci * 1. Register the new request and wait if the reconstruction thread has put 11068c2ecf20Sopenharmony_ci * up a bar for new requests. Continue immediately if no resync is active 11078c2ecf20Sopenharmony_ci * currently. 11088c2ecf20Sopenharmony_ci * 2. If IO spans the reshape position. Need to wait for reshape to pass. 11098c2ecf20Sopenharmony_ci */ 11108c2ecf20Sopenharmony_cistatic void regular_request_wait(struct mddev *mddev, struct r10conf *conf, 11118c2ecf20Sopenharmony_ci struct bio *bio, sector_t sectors) 11128c2ecf20Sopenharmony_ci{ 11138c2ecf20Sopenharmony_ci wait_barrier(conf); 11148c2ecf20Sopenharmony_ci while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 11158c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector < conf->reshape_progress && 11168c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector + sectors > conf->reshape_progress) { 11178c2ecf20Sopenharmony_ci raid10_log(conf->mddev, "wait reshape"); 11188c2ecf20Sopenharmony_ci allow_barrier(conf); 11198c2ecf20Sopenharmony_ci wait_event(conf->wait_barrier, 11208c2ecf20Sopenharmony_ci conf->reshape_progress <= bio->bi_iter.bi_sector || 11218c2ecf20Sopenharmony_ci conf->reshape_progress >= bio->bi_iter.bi_sector + 11228c2ecf20Sopenharmony_ci sectors); 11238c2ecf20Sopenharmony_ci wait_barrier(conf); 11248c2ecf20Sopenharmony_ci } 11258c2ecf20Sopenharmony_ci} 11268c2ecf20Sopenharmony_ci 11278c2ecf20Sopenharmony_cistatic void raid10_read_request(struct mddev *mddev, struct bio *bio, 11288c2ecf20Sopenharmony_ci struct r10bio *r10_bio) 11298c2ecf20Sopenharmony_ci{ 11308c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 11318c2ecf20Sopenharmony_ci struct bio *read_bio; 11328c2ecf20Sopenharmony_ci const int op = bio_op(bio); 11338c2ecf20Sopenharmony_ci const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); 11348c2ecf20Sopenharmony_ci int max_sectors; 11358c2ecf20Sopenharmony_ci struct md_rdev *rdev; 11368c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 11378c2ecf20Sopenharmony_ci int slot = r10_bio->read_slot; 11388c2ecf20Sopenharmony_ci struct md_rdev *err_rdev = NULL; 11398c2ecf20Sopenharmony_ci gfp_t gfp = GFP_NOIO; 11408c2ecf20Sopenharmony_ci 11418c2ecf20Sopenharmony_ci if (slot >= 0 && r10_bio->devs[slot].rdev) { 11428c2ecf20Sopenharmony_ci /* 11438c2ecf20Sopenharmony_ci * This is an error retry, but we cannot 11448c2ecf20Sopenharmony_ci * safely dereference the rdev in the r10_bio, 11458c2ecf20Sopenharmony_ci * we must use the one in conf. 11468c2ecf20Sopenharmony_ci * If it has already been disconnected (unlikely) 11478c2ecf20Sopenharmony_ci * we lose the device name in error messages. 11488c2ecf20Sopenharmony_ci */ 11498c2ecf20Sopenharmony_ci int disk; 11508c2ecf20Sopenharmony_ci /* 11518c2ecf20Sopenharmony_ci * As we are blocking raid10, it is a little safer to 11528c2ecf20Sopenharmony_ci * use __GFP_HIGH. 11538c2ecf20Sopenharmony_ci */ 11548c2ecf20Sopenharmony_ci gfp = GFP_NOIO | __GFP_HIGH; 11558c2ecf20Sopenharmony_ci 11568c2ecf20Sopenharmony_ci rcu_read_lock(); 11578c2ecf20Sopenharmony_ci disk = r10_bio->devs[slot].devnum; 11588c2ecf20Sopenharmony_ci err_rdev = rcu_dereference(conf->mirrors[disk].rdev); 11598c2ecf20Sopenharmony_ci if (err_rdev) 11608c2ecf20Sopenharmony_ci bdevname(err_rdev->bdev, b); 11618c2ecf20Sopenharmony_ci else { 11628c2ecf20Sopenharmony_ci strcpy(b, "???"); 11638c2ecf20Sopenharmony_ci /* This never gets dereferenced */ 11648c2ecf20Sopenharmony_ci err_rdev = r10_bio->devs[slot].rdev; 11658c2ecf20Sopenharmony_ci } 11668c2ecf20Sopenharmony_ci rcu_read_unlock(); 11678c2ecf20Sopenharmony_ci } 11688c2ecf20Sopenharmony_ci 11698c2ecf20Sopenharmony_ci regular_request_wait(mddev, conf, bio, r10_bio->sectors); 11708c2ecf20Sopenharmony_ci rdev = read_balance(conf, r10_bio, &max_sectors); 11718c2ecf20Sopenharmony_ci if (!rdev) { 11728c2ecf20Sopenharmony_ci if (err_rdev) { 11738c2ecf20Sopenharmony_ci pr_crit_ratelimited("md/raid10:%s: %s: unrecoverable I/O read error for block %llu\n", 11748c2ecf20Sopenharmony_ci mdname(mddev), b, 11758c2ecf20Sopenharmony_ci (unsigned long long)r10_bio->sector); 11768c2ecf20Sopenharmony_ci } 11778c2ecf20Sopenharmony_ci raid_end_bio_io(r10_bio); 11788c2ecf20Sopenharmony_ci return; 11798c2ecf20Sopenharmony_ci } 11808c2ecf20Sopenharmony_ci if (err_rdev) 11818c2ecf20Sopenharmony_ci pr_err_ratelimited("md/raid10:%s: %s: redirecting sector %llu to another mirror\n", 11828c2ecf20Sopenharmony_ci mdname(mddev), 11838c2ecf20Sopenharmony_ci bdevname(rdev->bdev, b), 11848c2ecf20Sopenharmony_ci (unsigned long long)r10_bio->sector); 11858c2ecf20Sopenharmony_ci if (max_sectors < bio_sectors(bio)) { 11868c2ecf20Sopenharmony_ci struct bio *split = bio_split(bio, max_sectors, 11878c2ecf20Sopenharmony_ci gfp, &conf->bio_split); 11888c2ecf20Sopenharmony_ci bio_chain(split, bio); 11898c2ecf20Sopenharmony_ci allow_barrier(conf); 11908c2ecf20Sopenharmony_ci submit_bio_noacct(bio); 11918c2ecf20Sopenharmony_ci wait_barrier(conf); 11928c2ecf20Sopenharmony_ci bio = split; 11938c2ecf20Sopenharmony_ci r10_bio->master_bio = bio; 11948c2ecf20Sopenharmony_ci r10_bio->sectors = max_sectors; 11958c2ecf20Sopenharmony_ci } 11968c2ecf20Sopenharmony_ci slot = r10_bio->read_slot; 11978c2ecf20Sopenharmony_ci 11988c2ecf20Sopenharmony_ci read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); 11998c2ecf20Sopenharmony_ci 12008c2ecf20Sopenharmony_ci r10_bio->devs[slot].bio = read_bio; 12018c2ecf20Sopenharmony_ci r10_bio->devs[slot].rdev = rdev; 12028c2ecf20Sopenharmony_ci 12038c2ecf20Sopenharmony_ci read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + 12048c2ecf20Sopenharmony_ci choose_data_offset(r10_bio, rdev); 12058c2ecf20Sopenharmony_ci bio_set_dev(read_bio, rdev->bdev); 12068c2ecf20Sopenharmony_ci read_bio->bi_end_io = raid10_end_read_request; 12078c2ecf20Sopenharmony_ci bio_set_op_attrs(read_bio, op, do_sync); 12088c2ecf20Sopenharmony_ci if (test_bit(FailFast, &rdev->flags) && 12098c2ecf20Sopenharmony_ci test_bit(R10BIO_FailFast, &r10_bio->state)) 12108c2ecf20Sopenharmony_ci read_bio->bi_opf |= MD_FAILFAST; 12118c2ecf20Sopenharmony_ci read_bio->bi_private = r10_bio; 12128c2ecf20Sopenharmony_ci 12138c2ecf20Sopenharmony_ci if (mddev->gendisk) 12148c2ecf20Sopenharmony_ci trace_block_bio_remap(read_bio->bi_disk->queue, 12158c2ecf20Sopenharmony_ci read_bio, disk_devt(mddev->gendisk), 12168c2ecf20Sopenharmony_ci r10_bio->sector); 12178c2ecf20Sopenharmony_ci submit_bio_noacct(read_bio); 12188c2ecf20Sopenharmony_ci return; 12198c2ecf20Sopenharmony_ci} 12208c2ecf20Sopenharmony_ci 12218c2ecf20Sopenharmony_cistatic void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, 12228c2ecf20Sopenharmony_ci struct bio *bio, bool replacement, 12238c2ecf20Sopenharmony_ci int n_copy) 12248c2ecf20Sopenharmony_ci{ 12258c2ecf20Sopenharmony_ci const int op = bio_op(bio); 12268c2ecf20Sopenharmony_ci const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); 12278c2ecf20Sopenharmony_ci const unsigned long do_fua = (bio->bi_opf & REQ_FUA); 12288c2ecf20Sopenharmony_ci unsigned long flags; 12298c2ecf20Sopenharmony_ci struct blk_plug_cb *cb; 12308c2ecf20Sopenharmony_ci struct raid10_plug_cb *plug = NULL; 12318c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 12328c2ecf20Sopenharmony_ci struct md_rdev *rdev; 12338c2ecf20Sopenharmony_ci int devnum = r10_bio->devs[n_copy].devnum; 12348c2ecf20Sopenharmony_ci struct bio *mbio; 12358c2ecf20Sopenharmony_ci 12368c2ecf20Sopenharmony_ci if (replacement) { 12378c2ecf20Sopenharmony_ci rdev = conf->mirrors[devnum].replacement; 12388c2ecf20Sopenharmony_ci if (rdev == NULL) { 12398c2ecf20Sopenharmony_ci /* Replacement just got moved to main 'rdev' */ 12408c2ecf20Sopenharmony_ci smp_mb(); 12418c2ecf20Sopenharmony_ci rdev = conf->mirrors[devnum].rdev; 12428c2ecf20Sopenharmony_ci } 12438c2ecf20Sopenharmony_ci } else 12448c2ecf20Sopenharmony_ci rdev = conf->mirrors[devnum].rdev; 12458c2ecf20Sopenharmony_ci 12468c2ecf20Sopenharmony_ci mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); 12478c2ecf20Sopenharmony_ci if (replacement) 12488c2ecf20Sopenharmony_ci r10_bio->devs[n_copy].repl_bio = mbio; 12498c2ecf20Sopenharmony_ci else 12508c2ecf20Sopenharmony_ci r10_bio->devs[n_copy].bio = mbio; 12518c2ecf20Sopenharmony_ci 12528c2ecf20Sopenharmony_ci mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr + 12538c2ecf20Sopenharmony_ci choose_data_offset(r10_bio, rdev)); 12548c2ecf20Sopenharmony_ci bio_set_dev(mbio, rdev->bdev); 12558c2ecf20Sopenharmony_ci mbio->bi_end_io = raid10_end_write_request; 12568c2ecf20Sopenharmony_ci bio_set_op_attrs(mbio, op, do_sync | do_fua); 12578c2ecf20Sopenharmony_ci if (!replacement && test_bit(FailFast, 12588c2ecf20Sopenharmony_ci &conf->mirrors[devnum].rdev->flags) 12598c2ecf20Sopenharmony_ci && enough(conf, devnum)) 12608c2ecf20Sopenharmony_ci mbio->bi_opf |= MD_FAILFAST; 12618c2ecf20Sopenharmony_ci mbio->bi_private = r10_bio; 12628c2ecf20Sopenharmony_ci 12638c2ecf20Sopenharmony_ci if (conf->mddev->gendisk) 12648c2ecf20Sopenharmony_ci trace_block_bio_remap(mbio->bi_disk->queue, 12658c2ecf20Sopenharmony_ci mbio, disk_devt(conf->mddev->gendisk), 12668c2ecf20Sopenharmony_ci r10_bio->sector); 12678c2ecf20Sopenharmony_ci /* flush_pending_writes() needs access to the rdev so...*/ 12688c2ecf20Sopenharmony_ci mbio->bi_disk = (void *)rdev; 12698c2ecf20Sopenharmony_ci 12708c2ecf20Sopenharmony_ci atomic_inc(&r10_bio->remaining); 12718c2ecf20Sopenharmony_ci 12728c2ecf20Sopenharmony_ci cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug)); 12738c2ecf20Sopenharmony_ci if (cb) 12748c2ecf20Sopenharmony_ci plug = container_of(cb, struct raid10_plug_cb, cb); 12758c2ecf20Sopenharmony_ci else 12768c2ecf20Sopenharmony_ci plug = NULL; 12778c2ecf20Sopenharmony_ci if (plug) { 12788c2ecf20Sopenharmony_ci bio_list_add(&plug->pending, mbio); 12798c2ecf20Sopenharmony_ci plug->pending_cnt++; 12808c2ecf20Sopenharmony_ci } else { 12818c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 12828c2ecf20Sopenharmony_ci bio_list_add(&conf->pending_bio_list, mbio); 12838c2ecf20Sopenharmony_ci conf->pending_count++; 12848c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 12858c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 12868c2ecf20Sopenharmony_ci } 12878c2ecf20Sopenharmony_ci} 12888c2ecf20Sopenharmony_ci 12898c2ecf20Sopenharmony_cistatic void raid10_write_request(struct mddev *mddev, struct bio *bio, 12908c2ecf20Sopenharmony_ci struct r10bio *r10_bio) 12918c2ecf20Sopenharmony_ci{ 12928c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 12938c2ecf20Sopenharmony_ci int i; 12948c2ecf20Sopenharmony_ci struct md_rdev *blocked_rdev; 12958c2ecf20Sopenharmony_ci sector_t sectors; 12968c2ecf20Sopenharmony_ci int max_sectors; 12978c2ecf20Sopenharmony_ci 12988c2ecf20Sopenharmony_ci if ((mddev_is_clustered(mddev) && 12998c2ecf20Sopenharmony_ci md_cluster_ops->area_resyncing(mddev, WRITE, 13008c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector, 13018c2ecf20Sopenharmony_ci bio_end_sector(bio)))) { 13028c2ecf20Sopenharmony_ci DEFINE_WAIT(w); 13038c2ecf20Sopenharmony_ci for (;;) { 13048c2ecf20Sopenharmony_ci prepare_to_wait(&conf->wait_barrier, 13058c2ecf20Sopenharmony_ci &w, TASK_IDLE); 13068c2ecf20Sopenharmony_ci if (!md_cluster_ops->area_resyncing(mddev, WRITE, 13078c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector, bio_end_sector(bio))) 13088c2ecf20Sopenharmony_ci break; 13098c2ecf20Sopenharmony_ci schedule(); 13108c2ecf20Sopenharmony_ci } 13118c2ecf20Sopenharmony_ci finish_wait(&conf->wait_barrier, &w); 13128c2ecf20Sopenharmony_ci } 13138c2ecf20Sopenharmony_ci 13148c2ecf20Sopenharmony_ci sectors = r10_bio->sectors; 13158c2ecf20Sopenharmony_ci regular_request_wait(mddev, conf, bio, sectors); 13168c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 13178c2ecf20Sopenharmony_ci (mddev->reshape_backwards 13188c2ecf20Sopenharmony_ci ? (bio->bi_iter.bi_sector < conf->reshape_safe && 13198c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector + sectors > conf->reshape_progress) 13208c2ecf20Sopenharmony_ci : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe && 13218c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector < conf->reshape_progress))) { 13228c2ecf20Sopenharmony_ci /* Need to update reshape_position in metadata */ 13238c2ecf20Sopenharmony_ci mddev->reshape_position = conf->reshape_progress; 13248c2ecf20Sopenharmony_ci set_mask_bits(&mddev->sb_flags, 0, 13258c2ecf20Sopenharmony_ci BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING)); 13268c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 13278c2ecf20Sopenharmony_ci raid10_log(conf->mddev, "wait reshape metadata"); 13288c2ecf20Sopenharmony_ci wait_event(mddev->sb_wait, 13298c2ecf20Sopenharmony_ci !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); 13308c2ecf20Sopenharmony_ci 13318c2ecf20Sopenharmony_ci conf->reshape_safe = mddev->reshape_position; 13328c2ecf20Sopenharmony_ci } 13338c2ecf20Sopenharmony_ci 13348c2ecf20Sopenharmony_ci if (conf->pending_count >= max_queued_requests) { 13358c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 13368c2ecf20Sopenharmony_ci raid10_log(mddev, "wait queued"); 13378c2ecf20Sopenharmony_ci wait_event(conf->wait_barrier, 13388c2ecf20Sopenharmony_ci conf->pending_count < max_queued_requests); 13398c2ecf20Sopenharmony_ci } 13408c2ecf20Sopenharmony_ci /* first select target devices under rcu_lock and 13418c2ecf20Sopenharmony_ci * inc refcount on their rdev. Record them by setting 13428c2ecf20Sopenharmony_ci * bios[x] to bio 13438c2ecf20Sopenharmony_ci * If there are known/acknowledged bad blocks on any device 13448c2ecf20Sopenharmony_ci * on which we have seen a write error, we want to avoid 13458c2ecf20Sopenharmony_ci * writing to those blocks. This potentially requires several 13468c2ecf20Sopenharmony_ci * writes to write around the bad blocks. Each set of writes 13478c2ecf20Sopenharmony_ci * gets its own r10_bio with a set of bios attached. 13488c2ecf20Sopenharmony_ci */ 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ 13518c2ecf20Sopenharmony_ci raid10_find_phys(conf, r10_bio); 13528c2ecf20Sopenharmony_ciretry_write: 13538c2ecf20Sopenharmony_ci blocked_rdev = NULL; 13548c2ecf20Sopenharmony_ci rcu_read_lock(); 13558c2ecf20Sopenharmony_ci max_sectors = r10_bio->sectors; 13568c2ecf20Sopenharmony_ci 13578c2ecf20Sopenharmony_ci for (i = 0; i < conf->copies; i++) { 13588c2ecf20Sopenharmony_ci int d = r10_bio->devs[i].devnum; 13598c2ecf20Sopenharmony_ci struct md_rdev *rdev, *rrdev; 13608c2ecf20Sopenharmony_ci 13618c2ecf20Sopenharmony_ci rrdev = rcu_dereference(conf->mirrors[d].replacement); 13628c2ecf20Sopenharmony_ci /* 13638c2ecf20Sopenharmony_ci * Read replacement first to prevent reading both rdev and 13648c2ecf20Sopenharmony_ci * replacement as NULL during replacement replace rdev. 13658c2ecf20Sopenharmony_ci */ 13668c2ecf20Sopenharmony_ci smp_mb(); 13678c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->mirrors[d].rdev); 13688c2ecf20Sopenharmony_ci if (rdev == rrdev) 13698c2ecf20Sopenharmony_ci rrdev = NULL; 13708c2ecf20Sopenharmony_ci if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { 13718c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 13728c2ecf20Sopenharmony_ci blocked_rdev = rdev; 13738c2ecf20Sopenharmony_ci break; 13748c2ecf20Sopenharmony_ci } 13758c2ecf20Sopenharmony_ci if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) { 13768c2ecf20Sopenharmony_ci atomic_inc(&rrdev->nr_pending); 13778c2ecf20Sopenharmony_ci blocked_rdev = rrdev; 13788c2ecf20Sopenharmony_ci break; 13798c2ecf20Sopenharmony_ci } 13808c2ecf20Sopenharmony_ci if (rdev && (test_bit(Faulty, &rdev->flags))) 13818c2ecf20Sopenharmony_ci rdev = NULL; 13828c2ecf20Sopenharmony_ci if (rrdev && (test_bit(Faulty, &rrdev->flags))) 13838c2ecf20Sopenharmony_ci rrdev = NULL; 13848c2ecf20Sopenharmony_ci 13858c2ecf20Sopenharmony_ci r10_bio->devs[i].bio = NULL; 13868c2ecf20Sopenharmony_ci r10_bio->devs[i].repl_bio = NULL; 13878c2ecf20Sopenharmony_ci 13888c2ecf20Sopenharmony_ci if (!rdev && !rrdev) { 13898c2ecf20Sopenharmony_ci set_bit(R10BIO_Degraded, &r10_bio->state); 13908c2ecf20Sopenharmony_ci continue; 13918c2ecf20Sopenharmony_ci } 13928c2ecf20Sopenharmony_ci if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) { 13938c2ecf20Sopenharmony_ci sector_t first_bad; 13948c2ecf20Sopenharmony_ci sector_t dev_sector = r10_bio->devs[i].addr; 13958c2ecf20Sopenharmony_ci int bad_sectors; 13968c2ecf20Sopenharmony_ci int is_bad; 13978c2ecf20Sopenharmony_ci 13988c2ecf20Sopenharmony_ci is_bad = is_badblock(rdev, dev_sector, max_sectors, 13998c2ecf20Sopenharmony_ci &first_bad, &bad_sectors); 14008c2ecf20Sopenharmony_ci if (is_bad < 0) { 14018c2ecf20Sopenharmony_ci /* Mustn't write here until the bad block 14028c2ecf20Sopenharmony_ci * is acknowledged 14038c2ecf20Sopenharmony_ci */ 14048c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 14058c2ecf20Sopenharmony_ci set_bit(BlockedBadBlocks, &rdev->flags); 14068c2ecf20Sopenharmony_ci blocked_rdev = rdev; 14078c2ecf20Sopenharmony_ci break; 14088c2ecf20Sopenharmony_ci } 14098c2ecf20Sopenharmony_ci if (is_bad && first_bad <= dev_sector) { 14108c2ecf20Sopenharmony_ci /* Cannot write here at all */ 14118c2ecf20Sopenharmony_ci bad_sectors -= (dev_sector - first_bad); 14128c2ecf20Sopenharmony_ci if (bad_sectors < max_sectors) 14138c2ecf20Sopenharmony_ci /* Mustn't write more than bad_sectors 14148c2ecf20Sopenharmony_ci * to other devices yet 14158c2ecf20Sopenharmony_ci */ 14168c2ecf20Sopenharmony_ci max_sectors = bad_sectors; 14178c2ecf20Sopenharmony_ci /* We don't set R10BIO_Degraded as that 14188c2ecf20Sopenharmony_ci * only applies if the disk is missing, 14198c2ecf20Sopenharmony_ci * so it might be re-added, and we want to 14208c2ecf20Sopenharmony_ci * know to recover this chunk. 14218c2ecf20Sopenharmony_ci * In this case the device is here, and the 14228c2ecf20Sopenharmony_ci * fact that this chunk is not in-sync is 14238c2ecf20Sopenharmony_ci * recorded in the bad block log. 14248c2ecf20Sopenharmony_ci */ 14258c2ecf20Sopenharmony_ci continue; 14268c2ecf20Sopenharmony_ci } 14278c2ecf20Sopenharmony_ci if (is_bad) { 14288c2ecf20Sopenharmony_ci int good_sectors = first_bad - dev_sector; 14298c2ecf20Sopenharmony_ci if (good_sectors < max_sectors) 14308c2ecf20Sopenharmony_ci max_sectors = good_sectors; 14318c2ecf20Sopenharmony_ci } 14328c2ecf20Sopenharmony_ci } 14338c2ecf20Sopenharmony_ci if (rdev) { 14348c2ecf20Sopenharmony_ci r10_bio->devs[i].bio = bio; 14358c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 14368c2ecf20Sopenharmony_ci } 14378c2ecf20Sopenharmony_ci if (rrdev) { 14388c2ecf20Sopenharmony_ci r10_bio->devs[i].repl_bio = bio; 14398c2ecf20Sopenharmony_ci atomic_inc(&rrdev->nr_pending); 14408c2ecf20Sopenharmony_ci } 14418c2ecf20Sopenharmony_ci } 14428c2ecf20Sopenharmony_ci rcu_read_unlock(); 14438c2ecf20Sopenharmony_ci 14448c2ecf20Sopenharmony_ci if (unlikely(blocked_rdev)) { 14458c2ecf20Sopenharmony_ci /* Have to wait for this device to get unblocked, then retry */ 14468c2ecf20Sopenharmony_ci int j; 14478c2ecf20Sopenharmony_ci int d; 14488c2ecf20Sopenharmony_ci 14498c2ecf20Sopenharmony_ci for (j = 0; j < i; j++) { 14508c2ecf20Sopenharmony_ci if (r10_bio->devs[j].bio) { 14518c2ecf20Sopenharmony_ci d = r10_bio->devs[j].devnum; 14528c2ecf20Sopenharmony_ci rdev_dec_pending(conf->mirrors[d].rdev, mddev); 14538c2ecf20Sopenharmony_ci } 14548c2ecf20Sopenharmony_ci if (r10_bio->devs[j].repl_bio) { 14558c2ecf20Sopenharmony_ci struct md_rdev *rdev; 14568c2ecf20Sopenharmony_ci d = r10_bio->devs[j].devnum; 14578c2ecf20Sopenharmony_ci rdev = conf->mirrors[d].replacement; 14588c2ecf20Sopenharmony_ci if (!rdev) { 14598c2ecf20Sopenharmony_ci /* Race with remove_disk */ 14608c2ecf20Sopenharmony_ci smp_mb(); 14618c2ecf20Sopenharmony_ci rdev = conf->mirrors[d].rdev; 14628c2ecf20Sopenharmony_ci } 14638c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, mddev); 14648c2ecf20Sopenharmony_ci } 14658c2ecf20Sopenharmony_ci } 14668c2ecf20Sopenharmony_ci allow_barrier(conf); 14678c2ecf20Sopenharmony_ci raid10_log(conf->mddev, "wait rdev %d blocked", blocked_rdev->raid_disk); 14688c2ecf20Sopenharmony_ci md_wait_for_blocked_rdev(blocked_rdev, mddev); 14698c2ecf20Sopenharmony_ci wait_barrier(conf); 14708c2ecf20Sopenharmony_ci goto retry_write; 14718c2ecf20Sopenharmony_ci } 14728c2ecf20Sopenharmony_ci 14738c2ecf20Sopenharmony_ci if (max_sectors < r10_bio->sectors) 14748c2ecf20Sopenharmony_ci r10_bio->sectors = max_sectors; 14758c2ecf20Sopenharmony_ci 14768c2ecf20Sopenharmony_ci if (r10_bio->sectors < bio_sectors(bio)) { 14778c2ecf20Sopenharmony_ci struct bio *split = bio_split(bio, r10_bio->sectors, 14788c2ecf20Sopenharmony_ci GFP_NOIO, &conf->bio_split); 14798c2ecf20Sopenharmony_ci bio_chain(split, bio); 14808c2ecf20Sopenharmony_ci allow_barrier(conf); 14818c2ecf20Sopenharmony_ci submit_bio_noacct(bio); 14828c2ecf20Sopenharmony_ci wait_barrier(conf); 14838c2ecf20Sopenharmony_ci bio = split; 14848c2ecf20Sopenharmony_ci r10_bio->master_bio = bio; 14858c2ecf20Sopenharmony_ci } 14868c2ecf20Sopenharmony_ci 14878c2ecf20Sopenharmony_ci atomic_set(&r10_bio->remaining, 1); 14888c2ecf20Sopenharmony_ci md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); 14898c2ecf20Sopenharmony_ci 14908c2ecf20Sopenharmony_ci for (i = 0; i < conf->copies; i++) { 14918c2ecf20Sopenharmony_ci if (r10_bio->devs[i].bio) 14928c2ecf20Sopenharmony_ci raid10_write_one_disk(mddev, r10_bio, bio, false, i); 14938c2ecf20Sopenharmony_ci if (r10_bio->devs[i].repl_bio) 14948c2ecf20Sopenharmony_ci raid10_write_one_disk(mddev, r10_bio, bio, true, i); 14958c2ecf20Sopenharmony_ci } 14968c2ecf20Sopenharmony_ci one_write_done(r10_bio); 14978c2ecf20Sopenharmony_ci} 14988c2ecf20Sopenharmony_ci 14998c2ecf20Sopenharmony_cistatic void __make_request(struct mddev *mddev, struct bio *bio, int sectors) 15008c2ecf20Sopenharmony_ci{ 15018c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 15028c2ecf20Sopenharmony_ci struct r10bio *r10_bio; 15038c2ecf20Sopenharmony_ci 15048c2ecf20Sopenharmony_ci r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO); 15058c2ecf20Sopenharmony_ci 15068c2ecf20Sopenharmony_ci r10_bio->master_bio = bio; 15078c2ecf20Sopenharmony_ci r10_bio->sectors = sectors; 15088c2ecf20Sopenharmony_ci 15098c2ecf20Sopenharmony_ci r10_bio->mddev = mddev; 15108c2ecf20Sopenharmony_ci r10_bio->sector = bio->bi_iter.bi_sector; 15118c2ecf20Sopenharmony_ci r10_bio->state = 0; 15128c2ecf20Sopenharmony_ci r10_bio->read_slot = -1; 15138c2ecf20Sopenharmony_ci memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies); 15148c2ecf20Sopenharmony_ci 15158c2ecf20Sopenharmony_ci if (bio_data_dir(bio) == READ) 15168c2ecf20Sopenharmony_ci raid10_read_request(mddev, bio, r10_bio); 15178c2ecf20Sopenharmony_ci else 15188c2ecf20Sopenharmony_ci raid10_write_request(mddev, bio, r10_bio); 15198c2ecf20Sopenharmony_ci} 15208c2ecf20Sopenharmony_ci 15218c2ecf20Sopenharmony_cistatic bool raid10_make_request(struct mddev *mddev, struct bio *bio) 15228c2ecf20Sopenharmony_ci{ 15238c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 15248c2ecf20Sopenharmony_ci sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask); 15258c2ecf20Sopenharmony_ci int chunk_sects = chunk_mask + 1; 15268c2ecf20Sopenharmony_ci int sectors = bio_sectors(bio); 15278c2ecf20Sopenharmony_ci 15288c2ecf20Sopenharmony_ci if (unlikely(bio->bi_opf & REQ_PREFLUSH) 15298c2ecf20Sopenharmony_ci && md_flush_request(mddev, bio)) 15308c2ecf20Sopenharmony_ci return true; 15318c2ecf20Sopenharmony_ci 15328c2ecf20Sopenharmony_ci if (!md_write_start(mddev, bio)) 15338c2ecf20Sopenharmony_ci return false; 15348c2ecf20Sopenharmony_ci 15358c2ecf20Sopenharmony_ci /* 15368c2ecf20Sopenharmony_ci * If this request crosses a chunk boundary, we need to split 15378c2ecf20Sopenharmony_ci * it. 15388c2ecf20Sopenharmony_ci */ 15398c2ecf20Sopenharmony_ci if (unlikely((bio->bi_iter.bi_sector & chunk_mask) + 15408c2ecf20Sopenharmony_ci sectors > chunk_sects 15418c2ecf20Sopenharmony_ci && (conf->geo.near_copies < conf->geo.raid_disks 15428c2ecf20Sopenharmony_ci || conf->prev.near_copies < 15438c2ecf20Sopenharmony_ci conf->prev.raid_disks))) 15448c2ecf20Sopenharmony_ci sectors = chunk_sects - 15458c2ecf20Sopenharmony_ci (bio->bi_iter.bi_sector & 15468c2ecf20Sopenharmony_ci (chunk_sects - 1)); 15478c2ecf20Sopenharmony_ci __make_request(mddev, bio, sectors); 15488c2ecf20Sopenharmony_ci 15498c2ecf20Sopenharmony_ci /* In case raid10d snuck in to freeze_array */ 15508c2ecf20Sopenharmony_ci wake_up(&conf->wait_barrier); 15518c2ecf20Sopenharmony_ci return true; 15528c2ecf20Sopenharmony_ci} 15538c2ecf20Sopenharmony_ci 15548c2ecf20Sopenharmony_cistatic void raid10_status(struct seq_file *seq, struct mddev *mddev) 15558c2ecf20Sopenharmony_ci{ 15568c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 15578c2ecf20Sopenharmony_ci int i; 15588c2ecf20Sopenharmony_ci 15598c2ecf20Sopenharmony_ci if (conf->geo.near_copies < conf->geo.raid_disks) 15608c2ecf20Sopenharmony_ci seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2); 15618c2ecf20Sopenharmony_ci if (conf->geo.near_copies > 1) 15628c2ecf20Sopenharmony_ci seq_printf(seq, " %d near-copies", conf->geo.near_copies); 15638c2ecf20Sopenharmony_ci if (conf->geo.far_copies > 1) { 15648c2ecf20Sopenharmony_ci if (conf->geo.far_offset) 15658c2ecf20Sopenharmony_ci seq_printf(seq, " %d offset-copies", conf->geo.far_copies); 15668c2ecf20Sopenharmony_ci else 15678c2ecf20Sopenharmony_ci seq_printf(seq, " %d far-copies", conf->geo.far_copies); 15688c2ecf20Sopenharmony_ci if (conf->geo.far_set_size != conf->geo.raid_disks) 15698c2ecf20Sopenharmony_ci seq_printf(seq, " %d devices per set", conf->geo.far_set_size); 15708c2ecf20Sopenharmony_ci } 15718c2ecf20Sopenharmony_ci seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks, 15728c2ecf20Sopenharmony_ci conf->geo.raid_disks - mddev->degraded); 15738c2ecf20Sopenharmony_ci rcu_read_lock(); 15748c2ecf20Sopenharmony_ci for (i = 0; i < conf->geo.raid_disks; i++) { 15758c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); 15768c2ecf20Sopenharmony_ci seq_printf(seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); 15778c2ecf20Sopenharmony_ci } 15788c2ecf20Sopenharmony_ci rcu_read_unlock(); 15798c2ecf20Sopenharmony_ci seq_printf(seq, "]"); 15808c2ecf20Sopenharmony_ci} 15818c2ecf20Sopenharmony_ci 15828c2ecf20Sopenharmony_ci/* check if there are enough drives for 15838c2ecf20Sopenharmony_ci * every block to appear on atleast one. 15848c2ecf20Sopenharmony_ci * Don't consider the device numbered 'ignore' 15858c2ecf20Sopenharmony_ci * as we might be about to remove it. 15868c2ecf20Sopenharmony_ci */ 15878c2ecf20Sopenharmony_cistatic int _enough(struct r10conf *conf, int previous, int ignore) 15888c2ecf20Sopenharmony_ci{ 15898c2ecf20Sopenharmony_ci int first = 0; 15908c2ecf20Sopenharmony_ci int has_enough = 0; 15918c2ecf20Sopenharmony_ci int disks, ncopies; 15928c2ecf20Sopenharmony_ci if (previous) { 15938c2ecf20Sopenharmony_ci disks = conf->prev.raid_disks; 15948c2ecf20Sopenharmony_ci ncopies = conf->prev.near_copies; 15958c2ecf20Sopenharmony_ci } else { 15968c2ecf20Sopenharmony_ci disks = conf->geo.raid_disks; 15978c2ecf20Sopenharmony_ci ncopies = conf->geo.near_copies; 15988c2ecf20Sopenharmony_ci } 15998c2ecf20Sopenharmony_ci 16008c2ecf20Sopenharmony_ci rcu_read_lock(); 16018c2ecf20Sopenharmony_ci do { 16028c2ecf20Sopenharmony_ci int n = conf->copies; 16038c2ecf20Sopenharmony_ci int cnt = 0; 16048c2ecf20Sopenharmony_ci int this = first; 16058c2ecf20Sopenharmony_ci while (n--) { 16068c2ecf20Sopenharmony_ci struct md_rdev *rdev; 16078c2ecf20Sopenharmony_ci if (this != ignore && 16088c2ecf20Sopenharmony_ci (rdev = rcu_dereference(conf->mirrors[this].rdev)) && 16098c2ecf20Sopenharmony_ci test_bit(In_sync, &rdev->flags)) 16108c2ecf20Sopenharmony_ci cnt++; 16118c2ecf20Sopenharmony_ci this = (this+1) % disks; 16128c2ecf20Sopenharmony_ci } 16138c2ecf20Sopenharmony_ci if (cnt == 0) 16148c2ecf20Sopenharmony_ci goto out; 16158c2ecf20Sopenharmony_ci first = (first + ncopies) % disks; 16168c2ecf20Sopenharmony_ci } while (first != 0); 16178c2ecf20Sopenharmony_ci has_enough = 1; 16188c2ecf20Sopenharmony_ciout: 16198c2ecf20Sopenharmony_ci rcu_read_unlock(); 16208c2ecf20Sopenharmony_ci return has_enough; 16218c2ecf20Sopenharmony_ci} 16228c2ecf20Sopenharmony_ci 16238c2ecf20Sopenharmony_cistatic int enough(struct r10conf *conf, int ignore) 16248c2ecf20Sopenharmony_ci{ 16258c2ecf20Sopenharmony_ci /* when calling 'enough', both 'prev' and 'geo' must 16268c2ecf20Sopenharmony_ci * be stable. 16278c2ecf20Sopenharmony_ci * This is ensured if ->reconfig_mutex or ->device_lock 16288c2ecf20Sopenharmony_ci * is held. 16298c2ecf20Sopenharmony_ci */ 16308c2ecf20Sopenharmony_ci return _enough(conf, 0, ignore) && 16318c2ecf20Sopenharmony_ci _enough(conf, 1, ignore); 16328c2ecf20Sopenharmony_ci} 16338c2ecf20Sopenharmony_ci 16348c2ecf20Sopenharmony_cistatic void raid10_error(struct mddev *mddev, struct md_rdev *rdev) 16358c2ecf20Sopenharmony_ci{ 16368c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 16378c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 16388c2ecf20Sopenharmony_ci unsigned long flags; 16398c2ecf20Sopenharmony_ci 16408c2ecf20Sopenharmony_ci /* 16418c2ecf20Sopenharmony_ci * If it is not operational, then we have already marked it as dead 16428c2ecf20Sopenharmony_ci * else if it is the last working disks with "fail_last_dev == false", 16438c2ecf20Sopenharmony_ci * ignore the error, let the next level up know. 16448c2ecf20Sopenharmony_ci * else mark the drive as failed 16458c2ecf20Sopenharmony_ci */ 16468c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 16478c2ecf20Sopenharmony_ci if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev 16488c2ecf20Sopenharmony_ci && !enough(conf, rdev->raid_disk)) { 16498c2ecf20Sopenharmony_ci /* 16508c2ecf20Sopenharmony_ci * Don't fail the drive, just return an IO error. 16518c2ecf20Sopenharmony_ci */ 16528c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 16538c2ecf20Sopenharmony_ci return; 16548c2ecf20Sopenharmony_ci } 16558c2ecf20Sopenharmony_ci if (test_and_clear_bit(In_sync, &rdev->flags)) 16568c2ecf20Sopenharmony_ci mddev->degraded++; 16578c2ecf20Sopenharmony_ci /* 16588c2ecf20Sopenharmony_ci * If recovery is running, make sure it aborts. 16598c2ecf20Sopenharmony_ci */ 16608c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_INTR, &mddev->recovery); 16618c2ecf20Sopenharmony_ci set_bit(Blocked, &rdev->flags); 16628c2ecf20Sopenharmony_ci set_bit(Faulty, &rdev->flags); 16638c2ecf20Sopenharmony_ci set_mask_bits(&mddev->sb_flags, 0, 16648c2ecf20Sopenharmony_ci BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING)); 16658c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 16668c2ecf20Sopenharmony_ci pr_crit("md/raid10:%s: Disk failure on %s, disabling device.\n" 16678c2ecf20Sopenharmony_ci "md/raid10:%s: Operation continuing on %d devices.\n", 16688c2ecf20Sopenharmony_ci mdname(mddev), bdevname(rdev->bdev, b), 16698c2ecf20Sopenharmony_ci mdname(mddev), conf->geo.raid_disks - mddev->degraded); 16708c2ecf20Sopenharmony_ci} 16718c2ecf20Sopenharmony_ci 16728c2ecf20Sopenharmony_cistatic void print_conf(struct r10conf *conf) 16738c2ecf20Sopenharmony_ci{ 16748c2ecf20Sopenharmony_ci int i; 16758c2ecf20Sopenharmony_ci struct md_rdev *rdev; 16768c2ecf20Sopenharmony_ci 16778c2ecf20Sopenharmony_ci pr_debug("RAID10 conf printout:\n"); 16788c2ecf20Sopenharmony_ci if (!conf) { 16798c2ecf20Sopenharmony_ci pr_debug("(!conf)\n"); 16808c2ecf20Sopenharmony_ci return; 16818c2ecf20Sopenharmony_ci } 16828c2ecf20Sopenharmony_ci pr_debug(" --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded, 16838c2ecf20Sopenharmony_ci conf->geo.raid_disks); 16848c2ecf20Sopenharmony_ci 16858c2ecf20Sopenharmony_ci /* This is only called with ->reconfix_mutex held, so 16868c2ecf20Sopenharmony_ci * rcu protection of rdev is not needed */ 16878c2ecf20Sopenharmony_ci for (i = 0; i < conf->geo.raid_disks; i++) { 16888c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 16898c2ecf20Sopenharmony_ci rdev = conf->mirrors[i].rdev; 16908c2ecf20Sopenharmony_ci if (rdev) 16918c2ecf20Sopenharmony_ci pr_debug(" disk %d, wo:%d, o:%d, dev:%s\n", 16928c2ecf20Sopenharmony_ci i, !test_bit(In_sync, &rdev->flags), 16938c2ecf20Sopenharmony_ci !test_bit(Faulty, &rdev->flags), 16948c2ecf20Sopenharmony_ci bdevname(rdev->bdev,b)); 16958c2ecf20Sopenharmony_ci } 16968c2ecf20Sopenharmony_ci} 16978c2ecf20Sopenharmony_ci 16988c2ecf20Sopenharmony_cistatic void close_sync(struct r10conf *conf) 16998c2ecf20Sopenharmony_ci{ 17008c2ecf20Sopenharmony_ci wait_barrier(conf); 17018c2ecf20Sopenharmony_ci allow_barrier(conf); 17028c2ecf20Sopenharmony_ci 17038c2ecf20Sopenharmony_ci mempool_exit(&conf->r10buf_pool); 17048c2ecf20Sopenharmony_ci} 17058c2ecf20Sopenharmony_ci 17068c2ecf20Sopenharmony_cistatic int raid10_spare_active(struct mddev *mddev) 17078c2ecf20Sopenharmony_ci{ 17088c2ecf20Sopenharmony_ci int i; 17098c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 17108c2ecf20Sopenharmony_ci struct raid10_info *tmp; 17118c2ecf20Sopenharmony_ci int count = 0; 17128c2ecf20Sopenharmony_ci unsigned long flags; 17138c2ecf20Sopenharmony_ci 17148c2ecf20Sopenharmony_ci /* 17158c2ecf20Sopenharmony_ci * Find all non-in_sync disks within the RAID10 configuration 17168c2ecf20Sopenharmony_ci * and mark them in_sync 17178c2ecf20Sopenharmony_ci */ 17188c2ecf20Sopenharmony_ci for (i = 0; i < conf->geo.raid_disks; i++) { 17198c2ecf20Sopenharmony_ci tmp = conf->mirrors + i; 17208c2ecf20Sopenharmony_ci if (tmp->replacement 17218c2ecf20Sopenharmony_ci && tmp->replacement->recovery_offset == MaxSector 17228c2ecf20Sopenharmony_ci && !test_bit(Faulty, &tmp->replacement->flags) 17238c2ecf20Sopenharmony_ci && !test_and_set_bit(In_sync, &tmp->replacement->flags)) { 17248c2ecf20Sopenharmony_ci /* Replacement has just become active */ 17258c2ecf20Sopenharmony_ci if (!tmp->rdev 17268c2ecf20Sopenharmony_ci || !test_and_clear_bit(In_sync, &tmp->rdev->flags)) 17278c2ecf20Sopenharmony_ci count++; 17288c2ecf20Sopenharmony_ci if (tmp->rdev) { 17298c2ecf20Sopenharmony_ci /* Replaced device not technically faulty, 17308c2ecf20Sopenharmony_ci * but we need to be sure it gets removed 17318c2ecf20Sopenharmony_ci * and never re-added. 17328c2ecf20Sopenharmony_ci */ 17338c2ecf20Sopenharmony_ci set_bit(Faulty, &tmp->rdev->flags); 17348c2ecf20Sopenharmony_ci sysfs_notify_dirent_safe( 17358c2ecf20Sopenharmony_ci tmp->rdev->sysfs_state); 17368c2ecf20Sopenharmony_ci } 17378c2ecf20Sopenharmony_ci sysfs_notify_dirent_safe(tmp->replacement->sysfs_state); 17388c2ecf20Sopenharmony_ci } else if (tmp->rdev 17398c2ecf20Sopenharmony_ci && tmp->rdev->recovery_offset == MaxSector 17408c2ecf20Sopenharmony_ci && !test_bit(Faulty, &tmp->rdev->flags) 17418c2ecf20Sopenharmony_ci && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { 17428c2ecf20Sopenharmony_ci count++; 17438c2ecf20Sopenharmony_ci sysfs_notify_dirent_safe(tmp->rdev->sysfs_state); 17448c2ecf20Sopenharmony_ci } 17458c2ecf20Sopenharmony_ci } 17468c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 17478c2ecf20Sopenharmony_ci mddev->degraded -= count; 17488c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_ci print_conf(conf); 17518c2ecf20Sopenharmony_ci return count; 17528c2ecf20Sopenharmony_ci} 17538c2ecf20Sopenharmony_ci 17548c2ecf20Sopenharmony_cistatic int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) 17558c2ecf20Sopenharmony_ci{ 17568c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 17578c2ecf20Sopenharmony_ci int err = -EEXIST; 17588c2ecf20Sopenharmony_ci int mirror; 17598c2ecf20Sopenharmony_ci int first = 0; 17608c2ecf20Sopenharmony_ci int last = conf->geo.raid_disks - 1; 17618c2ecf20Sopenharmony_ci 17628c2ecf20Sopenharmony_ci if (mddev->recovery_cp < MaxSector) 17638c2ecf20Sopenharmony_ci /* only hot-add to in-sync arrays, as recovery is 17648c2ecf20Sopenharmony_ci * very different from resync 17658c2ecf20Sopenharmony_ci */ 17668c2ecf20Sopenharmony_ci return -EBUSY; 17678c2ecf20Sopenharmony_ci if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1)) 17688c2ecf20Sopenharmony_ci return -EINVAL; 17698c2ecf20Sopenharmony_ci 17708c2ecf20Sopenharmony_ci if (md_integrity_add_rdev(rdev, mddev)) 17718c2ecf20Sopenharmony_ci return -ENXIO; 17728c2ecf20Sopenharmony_ci 17738c2ecf20Sopenharmony_ci if (rdev->raid_disk >= 0) 17748c2ecf20Sopenharmony_ci first = last = rdev->raid_disk; 17758c2ecf20Sopenharmony_ci 17768c2ecf20Sopenharmony_ci if (rdev->saved_raid_disk >= first && 17778c2ecf20Sopenharmony_ci rdev->saved_raid_disk < conf->geo.raid_disks && 17788c2ecf20Sopenharmony_ci conf->mirrors[rdev->saved_raid_disk].rdev == NULL) 17798c2ecf20Sopenharmony_ci mirror = rdev->saved_raid_disk; 17808c2ecf20Sopenharmony_ci else 17818c2ecf20Sopenharmony_ci mirror = first; 17828c2ecf20Sopenharmony_ci for ( ; mirror <= last ; mirror++) { 17838c2ecf20Sopenharmony_ci struct raid10_info *p = &conf->mirrors[mirror]; 17848c2ecf20Sopenharmony_ci if (p->recovery_disabled == mddev->recovery_disabled) 17858c2ecf20Sopenharmony_ci continue; 17868c2ecf20Sopenharmony_ci if (p->rdev) { 17878c2ecf20Sopenharmony_ci if (!test_bit(WantReplacement, &p->rdev->flags) || 17888c2ecf20Sopenharmony_ci p->replacement != NULL) 17898c2ecf20Sopenharmony_ci continue; 17908c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 17918c2ecf20Sopenharmony_ci set_bit(Replacement, &rdev->flags); 17928c2ecf20Sopenharmony_ci rdev->raid_disk = mirror; 17938c2ecf20Sopenharmony_ci err = 0; 17948c2ecf20Sopenharmony_ci if (mddev->gendisk) 17958c2ecf20Sopenharmony_ci disk_stack_limits(mddev->gendisk, rdev->bdev, 17968c2ecf20Sopenharmony_ci rdev->data_offset << 9); 17978c2ecf20Sopenharmony_ci conf->fullsync = 1; 17988c2ecf20Sopenharmony_ci rcu_assign_pointer(p->replacement, rdev); 17998c2ecf20Sopenharmony_ci break; 18008c2ecf20Sopenharmony_ci } 18018c2ecf20Sopenharmony_ci 18028c2ecf20Sopenharmony_ci if (mddev->gendisk) 18038c2ecf20Sopenharmony_ci disk_stack_limits(mddev->gendisk, rdev->bdev, 18048c2ecf20Sopenharmony_ci rdev->data_offset << 9); 18058c2ecf20Sopenharmony_ci 18068c2ecf20Sopenharmony_ci p->head_position = 0; 18078c2ecf20Sopenharmony_ci p->recovery_disabled = mddev->recovery_disabled - 1; 18088c2ecf20Sopenharmony_ci rdev->raid_disk = mirror; 18098c2ecf20Sopenharmony_ci err = 0; 18108c2ecf20Sopenharmony_ci if (rdev->saved_raid_disk != mirror) 18118c2ecf20Sopenharmony_ci conf->fullsync = 1; 18128c2ecf20Sopenharmony_ci rcu_assign_pointer(p->rdev, rdev); 18138c2ecf20Sopenharmony_ci break; 18148c2ecf20Sopenharmony_ci } 18158c2ecf20Sopenharmony_ci if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) 18168c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); 18178c2ecf20Sopenharmony_ci 18188c2ecf20Sopenharmony_ci print_conf(conf); 18198c2ecf20Sopenharmony_ci return err; 18208c2ecf20Sopenharmony_ci} 18218c2ecf20Sopenharmony_ci 18228c2ecf20Sopenharmony_cistatic int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) 18238c2ecf20Sopenharmony_ci{ 18248c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 18258c2ecf20Sopenharmony_ci int err = 0; 18268c2ecf20Sopenharmony_ci int number = rdev->raid_disk; 18278c2ecf20Sopenharmony_ci struct md_rdev **rdevp; 18288c2ecf20Sopenharmony_ci struct raid10_info *p; 18298c2ecf20Sopenharmony_ci 18308c2ecf20Sopenharmony_ci print_conf(conf); 18318c2ecf20Sopenharmony_ci if (unlikely(number >= mddev->raid_disks)) 18328c2ecf20Sopenharmony_ci return 0; 18338c2ecf20Sopenharmony_ci p = conf->mirrors + number; 18348c2ecf20Sopenharmony_ci if (rdev == p->rdev) 18358c2ecf20Sopenharmony_ci rdevp = &p->rdev; 18368c2ecf20Sopenharmony_ci else if (rdev == p->replacement) 18378c2ecf20Sopenharmony_ci rdevp = &p->replacement; 18388c2ecf20Sopenharmony_ci else 18398c2ecf20Sopenharmony_ci return 0; 18408c2ecf20Sopenharmony_ci 18418c2ecf20Sopenharmony_ci if (test_bit(In_sync, &rdev->flags) || 18428c2ecf20Sopenharmony_ci atomic_read(&rdev->nr_pending)) { 18438c2ecf20Sopenharmony_ci err = -EBUSY; 18448c2ecf20Sopenharmony_ci goto abort; 18458c2ecf20Sopenharmony_ci } 18468c2ecf20Sopenharmony_ci /* Only remove non-faulty devices if recovery 18478c2ecf20Sopenharmony_ci * is not possible. 18488c2ecf20Sopenharmony_ci */ 18498c2ecf20Sopenharmony_ci if (!test_bit(Faulty, &rdev->flags) && 18508c2ecf20Sopenharmony_ci mddev->recovery_disabled != p->recovery_disabled && 18518c2ecf20Sopenharmony_ci (!p->replacement || p->replacement == rdev) && 18528c2ecf20Sopenharmony_ci number < conf->geo.raid_disks && 18538c2ecf20Sopenharmony_ci enough(conf, -1)) { 18548c2ecf20Sopenharmony_ci err = -EBUSY; 18558c2ecf20Sopenharmony_ci goto abort; 18568c2ecf20Sopenharmony_ci } 18578c2ecf20Sopenharmony_ci *rdevp = NULL; 18588c2ecf20Sopenharmony_ci if (!test_bit(RemoveSynchronized, &rdev->flags)) { 18598c2ecf20Sopenharmony_ci synchronize_rcu(); 18608c2ecf20Sopenharmony_ci if (atomic_read(&rdev->nr_pending)) { 18618c2ecf20Sopenharmony_ci /* lost the race, try later */ 18628c2ecf20Sopenharmony_ci err = -EBUSY; 18638c2ecf20Sopenharmony_ci *rdevp = rdev; 18648c2ecf20Sopenharmony_ci goto abort; 18658c2ecf20Sopenharmony_ci } 18668c2ecf20Sopenharmony_ci } 18678c2ecf20Sopenharmony_ci if (p->replacement) { 18688c2ecf20Sopenharmony_ci /* We must have just cleared 'rdev' */ 18698c2ecf20Sopenharmony_ci p->rdev = p->replacement; 18708c2ecf20Sopenharmony_ci clear_bit(Replacement, &p->replacement->flags); 18718c2ecf20Sopenharmony_ci smp_mb(); /* Make sure other CPUs may see both as identical 18728c2ecf20Sopenharmony_ci * but will never see neither -- if they are careful. 18738c2ecf20Sopenharmony_ci */ 18748c2ecf20Sopenharmony_ci p->replacement = NULL; 18758c2ecf20Sopenharmony_ci } 18768c2ecf20Sopenharmony_ci 18778c2ecf20Sopenharmony_ci clear_bit(WantReplacement, &rdev->flags); 18788c2ecf20Sopenharmony_ci err = md_integrity_register(mddev); 18798c2ecf20Sopenharmony_ci 18808c2ecf20Sopenharmony_ciabort: 18818c2ecf20Sopenharmony_ci 18828c2ecf20Sopenharmony_ci print_conf(conf); 18838c2ecf20Sopenharmony_ci return err; 18848c2ecf20Sopenharmony_ci} 18858c2ecf20Sopenharmony_ci 18868c2ecf20Sopenharmony_cistatic void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d) 18878c2ecf20Sopenharmony_ci{ 18888c2ecf20Sopenharmony_ci struct r10conf *conf = r10_bio->mddev->private; 18898c2ecf20Sopenharmony_ci 18908c2ecf20Sopenharmony_ci if (!bio->bi_status) 18918c2ecf20Sopenharmony_ci set_bit(R10BIO_Uptodate, &r10_bio->state); 18928c2ecf20Sopenharmony_ci else 18938c2ecf20Sopenharmony_ci /* The write handler will notice the lack of 18948c2ecf20Sopenharmony_ci * R10BIO_Uptodate and record any errors etc 18958c2ecf20Sopenharmony_ci */ 18968c2ecf20Sopenharmony_ci atomic_add(r10_bio->sectors, 18978c2ecf20Sopenharmony_ci &conf->mirrors[d].rdev->corrected_errors); 18988c2ecf20Sopenharmony_ci 18998c2ecf20Sopenharmony_ci /* for reconstruct, we always reschedule after a read. 19008c2ecf20Sopenharmony_ci * for resync, only after all reads 19018c2ecf20Sopenharmony_ci */ 19028c2ecf20Sopenharmony_ci rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev); 19038c2ecf20Sopenharmony_ci if (test_bit(R10BIO_IsRecover, &r10_bio->state) || 19048c2ecf20Sopenharmony_ci atomic_dec_and_test(&r10_bio->remaining)) { 19058c2ecf20Sopenharmony_ci /* we have read all the blocks, 19068c2ecf20Sopenharmony_ci * do the comparison in process context in raid10d 19078c2ecf20Sopenharmony_ci */ 19088c2ecf20Sopenharmony_ci reschedule_retry(r10_bio); 19098c2ecf20Sopenharmony_ci } 19108c2ecf20Sopenharmony_ci} 19118c2ecf20Sopenharmony_ci 19128c2ecf20Sopenharmony_cistatic void end_sync_read(struct bio *bio) 19138c2ecf20Sopenharmony_ci{ 19148c2ecf20Sopenharmony_ci struct r10bio *r10_bio = get_resync_r10bio(bio); 19158c2ecf20Sopenharmony_ci struct r10conf *conf = r10_bio->mddev->private; 19168c2ecf20Sopenharmony_ci int d = find_bio_disk(conf, r10_bio, bio, NULL, NULL); 19178c2ecf20Sopenharmony_ci 19188c2ecf20Sopenharmony_ci __end_sync_read(r10_bio, bio, d); 19198c2ecf20Sopenharmony_ci} 19208c2ecf20Sopenharmony_ci 19218c2ecf20Sopenharmony_cistatic void end_reshape_read(struct bio *bio) 19228c2ecf20Sopenharmony_ci{ 19238c2ecf20Sopenharmony_ci /* reshape read bio isn't allocated from r10buf_pool */ 19248c2ecf20Sopenharmony_ci struct r10bio *r10_bio = bio->bi_private; 19258c2ecf20Sopenharmony_ci 19268c2ecf20Sopenharmony_ci __end_sync_read(r10_bio, bio, r10_bio->read_slot); 19278c2ecf20Sopenharmony_ci} 19288c2ecf20Sopenharmony_ci 19298c2ecf20Sopenharmony_cistatic void end_sync_request(struct r10bio *r10_bio) 19308c2ecf20Sopenharmony_ci{ 19318c2ecf20Sopenharmony_ci struct mddev *mddev = r10_bio->mddev; 19328c2ecf20Sopenharmony_ci 19338c2ecf20Sopenharmony_ci while (atomic_dec_and_test(&r10_bio->remaining)) { 19348c2ecf20Sopenharmony_ci if (r10_bio->master_bio == NULL) { 19358c2ecf20Sopenharmony_ci /* the primary of several recovery bios */ 19368c2ecf20Sopenharmony_ci sector_t s = r10_bio->sectors; 19378c2ecf20Sopenharmony_ci if (test_bit(R10BIO_MadeGood, &r10_bio->state) || 19388c2ecf20Sopenharmony_ci test_bit(R10BIO_WriteError, &r10_bio->state)) 19398c2ecf20Sopenharmony_ci reschedule_retry(r10_bio); 19408c2ecf20Sopenharmony_ci else 19418c2ecf20Sopenharmony_ci put_buf(r10_bio); 19428c2ecf20Sopenharmony_ci md_done_sync(mddev, s, 1); 19438c2ecf20Sopenharmony_ci break; 19448c2ecf20Sopenharmony_ci } else { 19458c2ecf20Sopenharmony_ci struct r10bio *r10_bio2 = (struct r10bio *)r10_bio->master_bio; 19468c2ecf20Sopenharmony_ci if (test_bit(R10BIO_MadeGood, &r10_bio->state) || 19478c2ecf20Sopenharmony_ci test_bit(R10BIO_WriteError, &r10_bio->state)) 19488c2ecf20Sopenharmony_ci reschedule_retry(r10_bio); 19498c2ecf20Sopenharmony_ci else 19508c2ecf20Sopenharmony_ci put_buf(r10_bio); 19518c2ecf20Sopenharmony_ci r10_bio = r10_bio2; 19528c2ecf20Sopenharmony_ci } 19538c2ecf20Sopenharmony_ci } 19548c2ecf20Sopenharmony_ci} 19558c2ecf20Sopenharmony_ci 19568c2ecf20Sopenharmony_cistatic void end_sync_write(struct bio *bio) 19578c2ecf20Sopenharmony_ci{ 19588c2ecf20Sopenharmony_ci struct r10bio *r10_bio = get_resync_r10bio(bio); 19598c2ecf20Sopenharmony_ci struct mddev *mddev = r10_bio->mddev; 19608c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 19618c2ecf20Sopenharmony_ci int d; 19628c2ecf20Sopenharmony_ci sector_t first_bad; 19638c2ecf20Sopenharmony_ci int bad_sectors; 19648c2ecf20Sopenharmony_ci int slot; 19658c2ecf20Sopenharmony_ci int repl; 19668c2ecf20Sopenharmony_ci struct md_rdev *rdev = NULL; 19678c2ecf20Sopenharmony_ci 19688c2ecf20Sopenharmony_ci d = find_bio_disk(conf, r10_bio, bio, &slot, &repl); 19698c2ecf20Sopenharmony_ci if (repl) 19708c2ecf20Sopenharmony_ci rdev = conf->mirrors[d].replacement; 19718c2ecf20Sopenharmony_ci else 19728c2ecf20Sopenharmony_ci rdev = conf->mirrors[d].rdev; 19738c2ecf20Sopenharmony_ci 19748c2ecf20Sopenharmony_ci if (bio->bi_status) { 19758c2ecf20Sopenharmony_ci if (repl) 19768c2ecf20Sopenharmony_ci md_error(mddev, rdev); 19778c2ecf20Sopenharmony_ci else { 19788c2ecf20Sopenharmony_ci set_bit(WriteErrorSeen, &rdev->flags); 19798c2ecf20Sopenharmony_ci if (!test_and_set_bit(WantReplacement, &rdev->flags)) 19808c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_NEEDED, 19818c2ecf20Sopenharmony_ci &rdev->mddev->recovery); 19828c2ecf20Sopenharmony_ci set_bit(R10BIO_WriteError, &r10_bio->state); 19838c2ecf20Sopenharmony_ci } 19848c2ecf20Sopenharmony_ci } else if (is_badblock(rdev, 19858c2ecf20Sopenharmony_ci r10_bio->devs[slot].addr, 19868c2ecf20Sopenharmony_ci r10_bio->sectors, 19878c2ecf20Sopenharmony_ci &first_bad, &bad_sectors)) 19888c2ecf20Sopenharmony_ci set_bit(R10BIO_MadeGood, &r10_bio->state); 19898c2ecf20Sopenharmony_ci 19908c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, mddev); 19918c2ecf20Sopenharmony_ci 19928c2ecf20Sopenharmony_ci end_sync_request(r10_bio); 19938c2ecf20Sopenharmony_ci} 19948c2ecf20Sopenharmony_ci 19958c2ecf20Sopenharmony_ci/* 19968c2ecf20Sopenharmony_ci * Note: sync and recover and handled very differently for raid10 19978c2ecf20Sopenharmony_ci * This code is for resync. 19988c2ecf20Sopenharmony_ci * For resync, we read through virtual addresses and read all blocks. 19998c2ecf20Sopenharmony_ci * If there is any error, we schedule a write. The lowest numbered 20008c2ecf20Sopenharmony_ci * drive is authoritative. 20018c2ecf20Sopenharmony_ci * However requests come for physical address, so we need to map. 20028c2ecf20Sopenharmony_ci * For every physical address there are raid_disks/copies virtual addresses, 20038c2ecf20Sopenharmony_ci * which is always are least one, but is not necessarly an integer. 20048c2ecf20Sopenharmony_ci * This means that a physical address can span multiple chunks, so we may 20058c2ecf20Sopenharmony_ci * have to submit multiple io requests for a single sync request. 20068c2ecf20Sopenharmony_ci */ 20078c2ecf20Sopenharmony_ci/* 20088c2ecf20Sopenharmony_ci * We check if all blocks are in-sync and only write to blocks that 20098c2ecf20Sopenharmony_ci * aren't in sync 20108c2ecf20Sopenharmony_ci */ 20118c2ecf20Sopenharmony_cistatic void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) 20128c2ecf20Sopenharmony_ci{ 20138c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 20148c2ecf20Sopenharmony_ci int i, first; 20158c2ecf20Sopenharmony_ci struct bio *tbio, *fbio; 20168c2ecf20Sopenharmony_ci int vcnt; 20178c2ecf20Sopenharmony_ci struct page **tpages, **fpages; 20188c2ecf20Sopenharmony_ci 20198c2ecf20Sopenharmony_ci atomic_set(&r10_bio->remaining, 1); 20208c2ecf20Sopenharmony_ci 20218c2ecf20Sopenharmony_ci /* find the first device with a block */ 20228c2ecf20Sopenharmony_ci for (i=0; i<conf->copies; i++) 20238c2ecf20Sopenharmony_ci if (!r10_bio->devs[i].bio->bi_status) 20248c2ecf20Sopenharmony_ci break; 20258c2ecf20Sopenharmony_ci 20268c2ecf20Sopenharmony_ci if (i == conf->copies) 20278c2ecf20Sopenharmony_ci goto done; 20288c2ecf20Sopenharmony_ci 20298c2ecf20Sopenharmony_ci first = i; 20308c2ecf20Sopenharmony_ci fbio = r10_bio->devs[i].bio; 20318c2ecf20Sopenharmony_ci fbio->bi_iter.bi_size = r10_bio->sectors << 9; 20328c2ecf20Sopenharmony_ci fbio->bi_iter.bi_idx = 0; 20338c2ecf20Sopenharmony_ci fpages = get_resync_pages(fbio)->pages; 20348c2ecf20Sopenharmony_ci 20358c2ecf20Sopenharmony_ci vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9); 20368c2ecf20Sopenharmony_ci /* now find blocks with errors */ 20378c2ecf20Sopenharmony_ci for (i=0 ; i < conf->copies ; i++) { 20388c2ecf20Sopenharmony_ci int j, d; 20398c2ecf20Sopenharmony_ci struct md_rdev *rdev; 20408c2ecf20Sopenharmony_ci struct resync_pages *rp; 20418c2ecf20Sopenharmony_ci 20428c2ecf20Sopenharmony_ci tbio = r10_bio->devs[i].bio; 20438c2ecf20Sopenharmony_ci 20448c2ecf20Sopenharmony_ci if (tbio->bi_end_io != end_sync_read) 20458c2ecf20Sopenharmony_ci continue; 20468c2ecf20Sopenharmony_ci if (i == first) 20478c2ecf20Sopenharmony_ci continue; 20488c2ecf20Sopenharmony_ci 20498c2ecf20Sopenharmony_ci tpages = get_resync_pages(tbio)->pages; 20508c2ecf20Sopenharmony_ci d = r10_bio->devs[i].devnum; 20518c2ecf20Sopenharmony_ci rdev = conf->mirrors[d].rdev; 20528c2ecf20Sopenharmony_ci if (!r10_bio->devs[i].bio->bi_status) { 20538c2ecf20Sopenharmony_ci /* We know that the bi_io_vec layout is the same for 20548c2ecf20Sopenharmony_ci * both 'first' and 'i', so we just compare them. 20558c2ecf20Sopenharmony_ci * All vec entries are PAGE_SIZE; 20568c2ecf20Sopenharmony_ci */ 20578c2ecf20Sopenharmony_ci int sectors = r10_bio->sectors; 20588c2ecf20Sopenharmony_ci for (j = 0; j < vcnt; j++) { 20598c2ecf20Sopenharmony_ci int len = PAGE_SIZE; 20608c2ecf20Sopenharmony_ci if (sectors < (len / 512)) 20618c2ecf20Sopenharmony_ci len = sectors * 512; 20628c2ecf20Sopenharmony_ci if (memcmp(page_address(fpages[j]), 20638c2ecf20Sopenharmony_ci page_address(tpages[j]), 20648c2ecf20Sopenharmony_ci len)) 20658c2ecf20Sopenharmony_ci break; 20668c2ecf20Sopenharmony_ci sectors -= len/512; 20678c2ecf20Sopenharmony_ci } 20688c2ecf20Sopenharmony_ci if (j == vcnt) 20698c2ecf20Sopenharmony_ci continue; 20708c2ecf20Sopenharmony_ci atomic64_add(r10_bio->sectors, &mddev->resync_mismatches); 20718c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) 20728c2ecf20Sopenharmony_ci /* Don't fix anything. */ 20738c2ecf20Sopenharmony_ci continue; 20748c2ecf20Sopenharmony_ci } else if (test_bit(FailFast, &rdev->flags)) { 20758c2ecf20Sopenharmony_ci /* Just give up on this device */ 20768c2ecf20Sopenharmony_ci md_error(rdev->mddev, rdev); 20778c2ecf20Sopenharmony_ci continue; 20788c2ecf20Sopenharmony_ci } 20798c2ecf20Sopenharmony_ci /* Ok, we need to write this bio, either to correct an 20808c2ecf20Sopenharmony_ci * inconsistency or to correct an unreadable block. 20818c2ecf20Sopenharmony_ci * First we need to fixup bv_offset, bv_len and 20828c2ecf20Sopenharmony_ci * bi_vecs, as the read request might have corrupted these 20838c2ecf20Sopenharmony_ci */ 20848c2ecf20Sopenharmony_ci rp = get_resync_pages(tbio); 20858c2ecf20Sopenharmony_ci bio_reset(tbio); 20868c2ecf20Sopenharmony_ci 20878c2ecf20Sopenharmony_ci md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size); 20888c2ecf20Sopenharmony_ci 20898c2ecf20Sopenharmony_ci rp->raid_bio = r10_bio; 20908c2ecf20Sopenharmony_ci tbio->bi_private = rp; 20918c2ecf20Sopenharmony_ci tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; 20928c2ecf20Sopenharmony_ci tbio->bi_end_io = end_sync_write; 20938c2ecf20Sopenharmony_ci bio_set_op_attrs(tbio, REQ_OP_WRITE, 0); 20948c2ecf20Sopenharmony_ci 20958c2ecf20Sopenharmony_ci bio_copy_data(tbio, fbio); 20968c2ecf20Sopenharmony_ci 20978c2ecf20Sopenharmony_ci atomic_inc(&conf->mirrors[d].rdev->nr_pending); 20988c2ecf20Sopenharmony_ci atomic_inc(&r10_bio->remaining); 20998c2ecf20Sopenharmony_ci md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio)); 21008c2ecf20Sopenharmony_ci 21018c2ecf20Sopenharmony_ci if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) 21028c2ecf20Sopenharmony_ci tbio->bi_opf |= MD_FAILFAST; 21038c2ecf20Sopenharmony_ci tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset; 21048c2ecf20Sopenharmony_ci bio_set_dev(tbio, conf->mirrors[d].rdev->bdev); 21058c2ecf20Sopenharmony_ci submit_bio_noacct(tbio); 21068c2ecf20Sopenharmony_ci } 21078c2ecf20Sopenharmony_ci 21088c2ecf20Sopenharmony_ci /* Now write out to any replacement devices 21098c2ecf20Sopenharmony_ci * that are active 21108c2ecf20Sopenharmony_ci */ 21118c2ecf20Sopenharmony_ci for (i = 0; i < conf->copies; i++) { 21128c2ecf20Sopenharmony_ci int d; 21138c2ecf20Sopenharmony_ci 21148c2ecf20Sopenharmony_ci tbio = r10_bio->devs[i].repl_bio; 21158c2ecf20Sopenharmony_ci if (!tbio || !tbio->bi_end_io) 21168c2ecf20Sopenharmony_ci continue; 21178c2ecf20Sopenharmony_ci if (r10_bio->devs[i].bio->bi_end_io != end_sync_write 21188c2ecf20Sopenharmony_ci && r10_bio->devs[i].bio != fbio) 21198c2ecf20Sopenharmony_ci bio_copy_data(tbio, fbio); 21208c2ecf20Sopenharmony_ci d = r10_bio->devs[i].devnum; 21218c2ecf20Sopenharmony_ci atomic_inc(&r10_bio->remaining); 21228c2ecf20Sopenharmony_ci md_sync_acct(conf->mirrors[d].replacement->bdev, 21238c2ecf20Sopenharmony_ci bio_sectors(tbio)); 21248c2ecf20Sopenharmony_ci submit_bio_noacct(tbio); 21258c2ecf20Sopenharmony_ci } 21268c2ecf20Sopenharmony_ci 21278c2ecf20Sopenharmony_cidone: 21288c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&r10_bio->remaining)) { 21298c2ecf20Sopenharmony_ci md_done_sync(mddev, r10_bio->sectors, 1); 21308c2ecf20Sopenharmony_ci put_buf(r10_bio); 21318c2ecf20Sopenharmony_ci } 21328c2ecf20Sopenharmony_ci} 21338c2ecf20Sopenharmony_ci 21348c2ecf20Sopenharmony_ci/* 21358c2ecf20Sopenharmony_ci * Now for the recovery code. 21368c2ecf20Sopenharmony_ci * Recovery happens across physical sectors. 21378c2ecf20Sopenharmony_ci * We recover all non-is_sync drives by finding the virtual address of 21388c2ecf20Sopenharmony_ci * each, and then choose a working drive that also has that virt address. 21398c2ecf20Sopenharmony_ci * There is a separate r10_bio for each non-in_sync drive. 21408c2ecf20Sopenharmony_ci * Only the first two slots are in use. The first for reading, 21418c2ecf20Sopenharmony_ci * The second for writing. 21428c2ecf20Sopenharmony_ci * 21438c2ecf20Sopenharmony_ci */ 21448c2ecf20Sopenharmony_cistatic void fix_recovery_read_error(struct r10bio *r10_bio) 21458c2ecf20Sopenharmony_ci{ 21468c2ecf20Sopenharmony_ci /* We got a read error during recovery. 21478c2ecf20Sopenharmony_ci * We repeat the read in smaller page-sized sections. 21488c2ecf20Sopenharmony_ci * If a read succeeds, write it to the new device or record 21498c2ecf20Sopenharmony_ci * a bad block if we cannot. 21508c2ecf20Sopenharmony_ci * If a read fails, record a bad block on both old and 21518c2ecf20Sopenharmony_ci * new devices. 21528c2ecf20Sopenharmony_ci */ 21538c2ecf20Sopenharmony_ci struct mddev *mddev = r10_bio->mddev; 21548c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 21558c2ecf20Sopenharmony_ci struct bio *bio = r10_bio->devs[0].bio; 21568c2ecf20Sopenharmony_ci sector_t sect = 0; 21578c2ecf20Sopenharmony_ci int sectors = r10_bio->sectors; 21588c2ecf20Sopenharmony_ci int idx = 0; 21598c2ecf20Sopenharmony_ci int dr = r10_bio->devs[0].devnum; 21608c2ecf20Sopenharmony_ci int dw = r10_bio->devs[1].devnum; 21618c2ecf20Sopenharmony_ci struct page **pages = get_resync_pages(bio)->pages; 21628c2ecf20Sopenharmony_ci 21638c2ecf20Sopenharmony_ci while (sectors) { 21648c2ecf20Sopenharmony_ci int s = sectors; 21658c2ecf20Sopenharmony_ci struct md_rdev *rdev; 21668c2ecf20Sopenharmony_ci sector_t addr; 21678c2ecf20Sopenharmony_ci int ok; 21688c2ecf20Sopenharmony_ci 21698c2ecf20Sopenharmony_ci if (s > (PAGE_SIZE>>9)) 21708c2ecf20Sopenharmony_ci s = PAGE_SIZE >> 9; 21718c2ecf20Sopenharmony_ci 21728c2ecf20Sopenharmony_ci rdev = conf->mirrors[dr].rdev; 21738c2ecf20Sopenharmony_ci addr = r10_bio->devs[0].addr + sect, 21748c2ecf20Sopenharmony_ci ok = sync_page_io(rdev, 21758c2ecf20Sopenharmony_ci addr, 21768c2ecf20Sopenharmony_ci s << 9, 21778c2ecf20Sopenharmony_ci pages[idx], 21788c2ecf20Sopenharmony_ci REQ_OP_READ, 0, false); 21798c2ecf20Sopenharmony_ci if (ok) { 21808c2ecf20Sopenharmony_ci rdev = conf->mirrors[dw].rdev; 21818c2ecf20Sopenharmony_ci addr = r10_bio->devs[1].addr + sect; 21828c2ecf20Sopenharmony_ci ok = sync_page_io(rdev, 21838c2ecf20Sopenharmony_ci addr, 21848c2ecf20Sopenharmony_ci s << 9, 21858c2ecf20Sopenharmony_ci pages[idx], 21868c2ecf20Sopenharmony_ci REQ_OP_WRITE, 0, false); 21878c2ecf20Sopenharmony_ci if (!ok) { 21888c2ecf20Sopenharmony_ci set_bit(WriteErrorSeen, &rdev->flags); 21898c2ecf20Sopenharmony_ci if (!test_and_set_bit(WantReplacement, 21908c2ecf20Sopenharmony_ci &rdev->flags)) 21918c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_NEEDED, 21928c2ecf20Sopenharmony_ci &rdev->mddev->recovery); 21938c2ecf20Sopenharmony_ci } 21948c2ecf20Sopenharmony_ci } 21958c2ecf20Sopenharmony_ci if (!ok) { 21968c2ecf20Sopenharmony_ci /* We don't worry if we cannot set a bad block - 21978c2ecf20Sopenharmony_ci * it really is bad so there is no loss in not 21988c2ecf20Sopenharmony_ci * recording it yet 21998c2ecf20Sopenharmony_ci */ 22008c2ecf20Sopenharmony_ci rdev_set_badblocks(rdev, addr, s, 0); 22018c2ecf20Sopenharmony_ci 22028c2ecf20Sopenharmony_ci if (rdev != conf->mirrors[dw].rdev) { 22038c2ecf20Sopenharmony_ci /* need bad block on destination too */ 22048c2ecf20Sopenharmony_ci struct md_rdev *rdev2 = conf->mirrors[dw].rdev; 22058c2ecf20Sopenharmony_ci addr = r10_bio->devs[1].addr + sect; 22068c2ecf20Sopenharmony_ci ok = rdev_set_badblocks(rdev2, addr, s, 0); 22078c2ecf20Sopenharmony_ci if (!ok) { 22088c2ecf20Sopenharmony_ci /* just abort the recovery */ 22098c2ecf20Sopenharmony_ci pr_notice("md/raid10:%s: recovery aborted due to read error\n", 22108c2ecf20Sopenharmony_ci mdname(mddev)); 22118c2ecf20Sopenharmony_ci 22128c2ecf20Sopenharmony_ci conf->mirrors[dw].recovery_disabled 22138c2ecf20Sopenharmony_ci = mddev->recovery_disabled; 22148c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_INTR, 22158c2ecf20Sopenharmony_ci &mddev->recovery); 22168c2ecf20Sopenharmony_ci break; 22178c2ecf20Sopenharmony_ci } 22188c2ecf20Sopenharmony_ci } 22198c2ecf20Sopenharmony_ci } 22208c2ecf20Sopenharmony_ci 22218c2ecf20Sopenharmony_ci sectors -= s; 22228c2ecf20Sopenharmony_ci sect += s; 22238c2ecf20Sopenharmony_ci idx++; 22248c2ecf20Sopenharmony_ci } 22258c2ecf20Sopenharmony_ci} 22268c2ecf20Sopenharmony_ci 22278c2ecf20Sopenharmony_cistatic void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) 22288c2ecf20Sopenharmony_ci{ 22298c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 22308c2ecf20Sopenharmony_ci int d; 22318c2ecf20Sopenharmony_ci struct bio *wbio = r10_bio->devs[1].bio; 22328c2ecf20Sopenharmony_ci struct bio *wbio2 = r10_bio->devs[1].repl_bio; 22338c2ecf20Sopenharmony_ci 22348c2ecf20Sopenharmony_ci /* Need to test wbio2->bi_end_io before we call 22358c2ecf20Sopenharmony_ci * submit_bio_noacct as if the former is NULL, 22368c2ecf20Sopenharmony_ci * the latter is free to free wbio2. 22378c2ecf20Sopenharmony_ci */ 22388c2ecf20Sopenharmony_ci if (wbio2 && !wbio2->bi_end_io) 22398c2ecf20Sopenharmony_ci wbio2 = NULL; 22408c2ecf20Sopenharmony_ci 22418c2ecf20Sopenharmony_ci if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) { 22428c2ecf20Sopenharmony_ci fix_recovery_read_error(r10_bio); 22438c2ecf20Sopenharmony_ci if (wbio->bi_end_io) 22448c2ecf20Sopenharmony_ci end_sync_request(r10_bio); 22458c2ecf20Sopenharmony_ci if (wbio2) 22468c2ecf20Sopenharmony_ci end_sync_request(r10_bio); 22478c2ecf20Sopenharmony_ci return; 22488c2ecf20Sopenharmony_ci } 22498c2ecf20Sopenharmony_ci 22508c2ecf20Sopenharmony_ci /* 22518c2ecf20Sopenharmony_ci * share the pages with the first bio 22528c2ecf20Sopenharmony_ci * and submit the write request 22538c2ecf20Sopenharmony_ci */ 22548c2ecf20Sopenharmony_ci d = r10_bio->devs[1].devnum; 22558c2ecf20Sopenharmony_ci if (wbio->bi_end_io) { 22568c2ecf20Sopenharmony_ci atomic_inc(&conf->mirrors[d].rdev->nr_pending); 22578c2ecf20Sopenharmony_ci md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio)); 22588c2ecf20Sopenharmony_ci submit_bio_noacct(wbio); 22598c2ecf20Sopenharmony_ci } 22608c2ecf20Sopenharmony_ci if (wbio2) { 22618c2ecf20Sopenharmony_ci atomic_inc(&conf->mirrors[d].replacement->nr_pending); 22628c2ecf20Sopenharmony_ci md_sync_acct(conf->mirrors[d].replacement->bdev, 22638c2ecf20Sopenharmony_ci bio_sectors(wbio2)); 22648c2ecf20Sopenharmony_ci submit_bio_noacct(wbio2); 22658c2ecf20Sopenharmony_ci } 22668c2ecf20Sopenharmony_ci} 22678c2ecf20Sopenharmony_ci 22688c2ecf20Sopenharmony_ci/* 22698c2ecf20Sopenharmony_ci * Used by fix_read_error() to decay the per rdev read_errors. 22708c2ecf20Sopenharmony_ci * We halve the read error count for every hour that has elapsed 22718c2ecf20Sopenharmony_ci * since the last recorded read error. 22728c2ecf20Sopenharmony_ci * 22738c2ecf20Sopenharmony_ci */ 22748c2ecf20Sopenharmony_cistatic void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev) 22758c2ecf20Sopenharmony_ci{ 22768c2ecf20Sopenharmony_ci long cur_time_mon; 22778c2ecf20Sopenharmony_ci unsigned long hours_since_last; 22788c2ecf20Sopenharmony_ci unsigned int read_errors = atomic_read(&rdev->read_errors); 22798c2ecf20Sopenharmony_ci 22808c2ecf20Sopenharmony_ci cur_time_mon = ktime_get_seconds(); 22818c2ecf20Sopenharmony_ci 22828c2ecf20Sopenharmony_ci if (rdev->last_read_error == 0) { 22838c2ecf20Sopenharmony_ci /* first time we've seen a read error */ 22848c2ecf20Sopenharmony_ci rdev->last_read_error = cur_time_mon; 22858c2ecf20Sopenharmony_ci return; 22868c2ecf20Sopenharmony_ci } 22878c2ecf20Sopenharmony_ci 22888c2ecf20Sopenharmony_ci hours_since_last = (long)(cur_time_mon - 22898c2ecf20Sopenharmony_ci rdev->last_read_error) / 3600; 22908c2ecf20Sopenharmony_ci 22918c2ecf20Sopenharmony_ci rdev->last_read_error = cur_time_mon; 22928c2ecf20Sopenharmony_ci 22938c2ecf20Sopenharmony_ci /* 22948c2ecf20Sopenharmony_ci * if hours_since_last is > the number of bits in read_errors 22958c2ecf20Sopenharmony_ci * just set read errors to 0. We do this to avoid 22968c2ecf20Sopenharmony_ci * overflowing the shift of read_errors by hours_since_last. 22978c2ecf20Sopenharmony_ci */ 22988c2ecf20Sopenharmony_ci if (hours_since_last >= 8 * sizeof(read_errors)) 22998c2ecf20Sopenharmony_ci atomic_set(&rdev->read_errors, 0); 23008c2ecf20Sopenharmony_ci else 23018c2ecf20Sopenharmony_ci atomic_set(&rdev->read_errors, read_errors >> hours_since_last); 23028c2ecf20Sopenharmony_ci} 23038c2ecf20Sopenharmony_ci 23048c2ecf20Sopenharmony_cistatic int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, 23058c2ecf20Sopenharmony_ci int sectors, struct page *page, int rw) 23068c2ecf20Sopenharmony_ci{ 23078c2ecf20Sopenharmony_ci sector_t first_bad; 23088c2ecf20Sopenharmony_ci int bad_sectors; 23098c2ecf20Sopenharmony_ci 23108c2ecf20Sopenharmony_ci if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors) 23118c2ecf20Sopenharmony_ci && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags))) 23128c2ecf20Sopenharmony_ci return -1; 23138c2ecf20Sopenharmony_ci if (sync_page_io(rdev, sector, sectors << 9, page, rw, 0, false)) 23148c2ecf20Sopenharmony_ci /* success */ 23158c2ecf20Sopenharmony_ci return 1; 23168c2ecf20Sopenharmony_ci if (rw == WRITE) { 23178c2ecf20Sopenharmony_ci set_bit(WriteErrorSeen, &rdev->flags); 23188c2ecf20Sopenharmony_ci if (!test_and_set_bit(WantReplacement, &rdev->flags)) 23198c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_NEEDED, 23208c2ecf20Sopenharmony_ci &rdev->mddev->recovery); 23218c2ecf20Sopenharmony_ci } 23228c2ecf20Sopenharmony_ci /* need to record an error - either for the block or the device */ 23238c2ecf20Sopenharmony_ci if (!rdev_set_badblocks(rdev, sector, sectors, 0)) 23248c2ecf20Sopenharmony_ci md_error(rdev->mddev, rdev); 23258c2ecf20Sopenharmony_ci return 0; 23268c2ecf20Sopenharmony_ci} 23278c2ecf20Sopenharmony_ci 23288c2ecf20Sopenharmony_ci/* 23298c2ecf20Sopenharmony_ci * This is a kernel thread which: 23308c2ecf20Sopenharmony_ci * 23318c2ecf20Sopenharmony_ci * 1. Retries failed read operations on working mirrors. 23328c2ecf20Sopenharmony_ci * 2. Updates the raid superblock when problems encounter. 23338c2ecf20Sopenharmony_ci * 3. Performs writes following reads for array synchronising. 23348c2ecf20Sopenharmony_ci */ 23358c2ecf20Sopenharmony_ci 23368c2ecf20Sopenharmony_cistatic void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10bio *r10_bio) 23378c2ecf20Sopenharmony_ci{ 23388c2ecf20Sopenharmony_ci int sect = 0; /* Offset from r10_bio->sector */ 23398c2ecf20Sopenharmony_ci int sectors = r10_bio->sectors; 23408c2ecf20Sopenharmony_ci struct md_rdev *rdev; 23418c2ecf20Sopenharmony_ci int max_read_errors = atomic_read(&mddev->max_corr_read_errors); 23428c2ecf20Sopenharmony_ci int d = r10_bio->devs[r10_bio->read_slot].devnum; 23438c2ecf20Sopenharmony_ci 23448c2ecf20Sopenharmony_ci /* still own a reference to this rdev, so it cannot 23458c2ecf20Sopenharmony_ci * have been cleared recently. 23468c2ecf20Sopenharmony_ci */ 23478c2ecf20Sopenharmony_ci rdev = conf->mirrors[d].rdev; 23488c2ecf20Sopenharmony_ci 23498c2ecf20Sopenharmony_ci if (test_bit(Faulty, &rdev->flags)) 23508c2ecf20Sopenharmony_ci /* drive has already been failed, just ignore any 23518c2ecf20Sopenharmony_ci more fix_read_error() attempts */ 23528c2ecf20Sopenharmony_ci return; 23538c2ecf20Sopenharmony_ci 23548c2ecf20Sopenharmony_ci check_decay_read_errors(mddev, rdev); 23558c2ecf20Sopenharmony_ci atomic_inc(&rdev->read_errors); 23568c2ecf20Sopenharmony_ci if (atomic_read(&rdev->read_errors) > max_read_errors) { 23578c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 23588c2ecf20Sopenharmony_ci bdevname(rdev->bdev, b); 23598c2ecf20Sopenharmony_ci 23608c2ecf20Sopenharmony_ci pr_notice("md/raid10:%s: %s: Raid device exceeded read_error threshold [cur %d:max %d]\n", 23618c2ecf20Sopenharmony_ci mdname(mddev), b, 23628c2ecf20Sopenharmony_ci atomic_read(&rdev->read_errors), max_read_errors); 23638c2ecf20Sopenharmony_ci pr_notice("md/raid10:%s: %s: Failing raid device\n", 23648c2ecf20Sopenharmony_ci mdname(mddev), b); 23658c2ecf20Sopenharmony_ci md_error(mddev, rdev); 23668c2ecf20Sopenharmony_ci r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED; 23678c2ecf20Sopenharmony_ci return; 23688c2ecf20Sopenharmony_ci } 23698c2ecf20Sopenharmony_ci 23708c2ecf20Sopenharmony_ci while(sectors) { 23718c2ecf20Sopenharmony_ci int s = sectors; 23728c2ecf20Sopenharmony_ci int sl = r10_bio->read_slot; 23738c2ecf20Sopenharmony_ci int success = 0; 23748c2ecf20Sopenharmony_ci int start; 23758c2ecf20Sopenharmony_ci 23768c2ecf20Sopenharmony_ci if (s > (PAGE_SIZE>>9)) 23778c2ecf20Sopenharmony_ci s = PAGE_SIZE >> 9; 23788c2ecf20Sopenharmony_ci 23798c2ecf20Sopenharmony_ci rcu_read_lock(); 23808c2ecf20Sopenharmony_ci do { 23818c2ecf20Sopenharmony_ci sector_t first_bad; 23828c2ecf20Sopenharmony_ci int bad_sectors; 23838c2ecf20Sopenharmony_ci 23848c2ecf20Sopenharmony_ci d = r10_bio->devs[sl].devnum; 23858c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->mirrors[d].rdev); 23868c2ecf20Sopenharmony_ci if (rdev && 23878c2ecf20Sopenharmony_ci test_bit(In_sync, &rdev->flags) && 23888c2ecf20Sopenharmony_ci !test_bit(Faulty, &rdev->flags) && 23898c2ecf20Sopenharmony_ci is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, 23908c2ecf20Sopenharmony_ci &first_bad, &bad_sectors) == 0) { 23918c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 23928c2ecf20Sopenharmony_ci rcu_read_unlock(); 23938c2ecf20Sopenharmony_ci success = sync_page_io(rdev, 23948c2ecf20Sopenharmony_ci r10_bio->devs[sl].addr + 23958c2ecf20Sopenharmony_ci sect, 23968c2ecf20Sopenharmony_ci s<<9, 23978c2ecf20Sopenharmony_ci conf->tmppage, 23988c2ecf20Sopenharmony_ci REQ_OP_READ, 0, false); 23998c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, mddev); 24008c2ecf20Sopenharmony_ci rcu_read_lock(); 24018c2ecf20Sopenharmony_ci if (success) 24028c2ecf20Sopenharmony_ci break; 24038c2ecf20Sopenharmony_ci } 24048c2ecf20Sopenharmony_ci sl++; 24058c2ecf20Sopenharmony_ci if (sl == conf->copies) 24068c2ecf20Sopenharmony_ci sl = 0; 24078c2ecf20Sopenharmony_ci } while (!success && sl != r10_bio->read_slot); 24088c2ecf20Sopenharmony_ci rcu_read_unlock(); 24098c2ecf20Sopenharmony_ci 24108c2ecf20Sopenharmony_ci if (!success) { 24118c2ecf20Sopenharmony_ci /* Cannot read from anywhere, just mark the block 24128c2ecf20Sopenharmony_ci * as bad on the first device to discourage future 24138c2ecf20Sopenharmony_ci * reads. 24148c2ecf20Sopenharmony_ci */ 24158c2ecf20Sopenharmony_ci int dn = r10_bio->devs[r10_bio->read_slot].devnum; 24168c2ecf20Sopenharmony_ci rdev = conf->mirrors[dn].rdev; 24178c2ecf20Sopenharmony_ci 24188c2ecf20Sopenharmony_ci if (!rdev_set_badblocks( 24198c2ecf20Sopenharmony_ci rdev, 24208c2ecf20Sopenharmony_ci r10_bio->devs[r10_bio->read_slot].addr 24218c2ecf20Sopenharmony_ci + sect, 24228c2ecf20Sopenharmony_ci s, 0)) { 24238c2ecf20Sopenharmony_ci md_error(mddev, rdev); 24248c2ecf20Sopenharmony_ci r10_bio->devs[r10_bio->read_slot].bio 24258c2ecf20Sopenharmony_ci = IO_BLOCKED; 24268c2ecf20Sopenharmony_ci } 24278c2ecf20Sopenharmony_ci break; 24288c2ecf20Sopenharmony_ci } 24298c2ecf20Sopenharmony_ci 24308c2ecf20Sopenharmony_ci start = sl; 24318c2ecf20Sopenharmony_ci /* write it back and re-read */ 24328c2ecf20Sopenharmony_ci rcu_read_lock(); 24338c2ecf20Sopenharmony_ci while (sl != r10_bio->read_slot) { 24348c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 24358c2ecf20Sopenharmony_ci 24368c2ecf20Sopenharmony_ci if (sl==0) 24378c2ecf20Sopenharmony_ci sl = conf->copies; 24388c2ecf20Sopenharmony_ci sl--; 24398c2ecf20Sopenharmony_ci d = r10_bio->devs[sl].devnum; 24408c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->mirrors[d].rdev); 24418c2ecf20Sopenharmony_ci if (!rdev || 24428c2ecf20Sopenharmony_ci test_bit(Faulty, &rdev->flags) || 24438c2ecf20Sopenharmony_ci !test_bit(In_sync, &rdev->flags)) 24448c2ecf20Sopenharmony_ci continue; 24458c2ecf20Sopenharmony_ci 24468c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 24478c2ecf20Sopenharmony_ci rcu_read_unlock(); 24488c2ecf20Sopenharmony_ci if (r10_sync_page_io(rdev, 24498c2ecf20Sopenharmony_ci r10_bio->devs[sl].addr + 24508c2ecf20Sopenharmony_ci sect, 24518c2ecf20Sopenharmony_ci s, conf->tmppage, WRITE) 24528c2ecf20Sopenharmony_ci == 0) { 24538c2ecf20Sopenharmony_ci /* Well, this device is dead */ 24548c2ecf20Sopenharmony_ci pr_notice("md/raid10:%s: read correction write failed (%d sectors at %llu on %s)\n", 24558c2ecf20Sopenharmony_ci mdname(mddev), s, 24568c2ecf20Sopenharmony_ci (unsigned long long)( 24578c2ecf20Sopenharmony_ci sect + 24588c2ecf20Sopenharmony_ci choose_data_offset(r10_bio, 24598c2ecf20Sopenharmony_ci rdev)), 24608c2ecf20Sopenharmony_ci bdevname(rdev->bdev, b)); 24618c2ecf20Sopenharmony_ci pr_notice("md/raid10:%s: %s: failing drive\n", 24628c2ecf20Sopenharmony_ci mdname(mddev), 24638c2ecf20Sopenharmony_ci bdevname(rdev->bdev, b)); 24648c2ecf20Sopenharmony_ci } 24658c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, mddev); 24668c2ecf20Sopenharmony_ci rcu_read_lock(); 24678c2ecf20Sopenharmony_ci } 24688c2ecf20Sopenharmony_ci sl = start; 24698c2ecf20Sopenharmony_ci while (sl != r10_bio->read_slot) { 24708c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 24718c2ecf20Sopenharmony_ci 24728c2ecf20Sopenharmony_ci if (sl==0) 24738c2ecf20Sopenharmony_ci sl = conf->copies; 24748c2ecf20Sopenharmony_ci sl--; 24758c2ecf20Sopenharmony_ci d = r10_bio->devs[sl].devnum; 24768c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->mirrors[d].rdev); 24778c2ecf20Sopenharmony_ci if (!rdev || 24788c2ecf20Sopenharmony_ci test_bit(Faulty, &rdev->flags) || 24798c2ecf20Sopenharmony_ci !test_bit(In_sync, &rdev->flags)) 24808c2ecf20Sopenharmony_ci continue; 24818c2ecf20Sopenharmony_ci 24828c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 24838c2ecf20Sopenharmony_ci rcu_read_unlock(); 24848c2ecf20Sopenharmony_ci switch (r10_sync_page_io(rdev, 24858c2ecf20Sopenharmony_ci r10_bio->devs[sl].addr + 24868c2ecf20Sopenharmony_ci sect, 24878c2ecf20Sopenharmony_ci s, conf->tmppage, 24888c2ecf20Sopenharmony_ci READ)) { 24898c2ecf20Sopenharmony_ci case 0: 24908c2ecf20Sopenharmony_ci /* Well, this device is dead */ 24918c2ecf20Sopenharmony_ci pr_notice("md/raid10:%s: unable to read back corrected sectors (%d sectors at %llu on %s)\n", 24928c2ecf20Sopenharmony_ci mdname(mddev), s, 24938c2ecf20Sopenharmony_ci (unsigned long long)( 24948c2ecf20Sopenharmony_ci sect + 24958c2ecf20Sopenharmony_ci choose_data_offset(r10_bio, rdev)), 24968c2ecf20Sopenharmony_ci bdevname(rdev->bdev, b)); 24978c2ecf20Sopenharmony_ci pr_notice("md/raid10:%s: %s: failing drive\n", 24988c2ecf20Sopenharmony_ci mdname(mddev), 24998c2ecf20Sopenharmony_ci bdevname(rdev->bdev, b)); 25008c2ecf20Sopenharmony_ci break; 25018c2ecf20Sopenharmony_ci case 1: 25028c2ecf20Sopenharmony_ci pr_info("md/raid10:%s: read error corrected (%d sectors at %llu on %s)\n", 25038c2ecf20Sopenharmony_ci mdname(mddev), s, 25048c2ecf20Sopenharmony_ci (unsigned long long)( 25058c2ecf20Sopenharmony_ci sect + 25068c2ecf20Sopenharmony_ci choose_data_offset(r10_bio, rdev)), 25078c2ecf20Sopenharmony_ci bdevname(rdev->bdev, b)); 25088c2ecf20Sopenharmony_ci atomic_add(s, &rdev->corrected_errors); 25098c2ecf20Sopenharmony_ci } 25108c2ecf20Sopenharmony_ci 25118c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, mddev); 25128c2ecf20Sopenharmony_ci rcu_read_lock(); 25138c2ecf20Sopenharmony_ci } 25148c2ecf20Sopenharmony_ci rcu_read_unlock(); 25158c2ecf20Sopenharmony_ci 25168c2ecf20Sopenharmony_ci sectors -= s; 25178c2ecf20Sopenharmony_ci sect += s; 25188c2ecf20Sopenharmony_ci } 25198c2ecf20Sopenharmony_ci} 25208c2ecf20Sopenharmony_ci 25218c2ecf20Sopenharmony_cistatic int narrow_write_error(struct r10bio *r10_bio, int i) 25228c2ecf20Sopenharmony_ci{ 25238c2ecf20Sopenharmony_ci struct bio *bio = r10_bio->master_bio; 25248c2ecf20Sopenharmony_ci struct mddev *mddev = r10_bio->mddev; 25258c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 25268c2ecf20Sopenharmony_ci struct md_rdev *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev; 25278c2ecf20Sopenharmony_ci /* bio has the data to be written to slot 'i' where 25288c2ecf20Sopenharmony_ci * we just recently had a write error. 25298c2ecf20Sopenharmony_ci * We repeatedly clone the bio and trim down to one block, 25308c2ecf20Sopenharmony_ci * then try the write. Where the write fails we record 25318c2ecf20Sopenharmony_ci * a bad block. 25328c2ecf20Sopenharmony_ci * It is conceivable that the bio doesn't exactly align with 25338c2ecf20Sopenharmony_ci * blocks. We must handle this. 25348c2ecf20Sopenharmony_ci * 25358c2ecf20Sopenharmony_ci * We currently own a reference to the rdev. 25368c2ecf20Sopenharmony_ci */ 25378c2ecf20Sopenharmony_ci 25388c2ecf20Sopenharmony_ci int block_sectors; 25398c2ecf20Sopenharmony_ci sector_t sector; 25408c2ecf20Sopenharmony_ci int sectors; 25418c2ecf20Sopenharmony_ci int sect_to_write = r10_bio->sectors; 25428c2ecf20Sopenharmony_ci int ok = 1; 25438c2ecf20Sopenharmony_ci 25448c2ecf20Sopenharmony_ci if (rdev->badblocks.shift < 0) 25458c2ecf20Sopenharmony_ci return 0; 25468c2ecf20Sopenharmony_ci 25478c2ecf20Sopenharmony_ci block_sectors = roundup(1 << rdev->badblocks.shift, 25488c2ecf20Sopenharmony_ci bdev_logical_block_size(rdev->bdev) >> 9); 25498c2ecf20Sopenharmony_ci sector = r10_bio->sector; 25508c2ecf20Sopenharmony_ci sectors = ((r10_bio->sector + block_sectors) 25518c2ecf20Sopenharmony_ci & ~(sector_t)(block_sectors - 1)) 25528c2ecf20Sopenharmony_ci - sector; 25538c2ecf20Sopenharmony_ci 25548c2ecf20Sopenharmony_ci while (sect_to_write) { 25558c2ecf20Sopenharmony_ci struct bio *wbio; 25568c2ecf20Sopenharmony_ci sector_t wsector; 25578c2ecf20Sopenharmony_ci if (sectors > sect_to_write) 25588c2ecf20Sopenharmony_ci sectors = sect_to_write; 25598c2ecf20Sopenharmony_ci /* Write at 'sector' for 'sectors' */ 25608c2ecf20Sopenharmony_ci wbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); 25618c2ecf20Sopenharmony_ci bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors); 25628c2ecf20Sopenharmony_ci wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); 25638c2ecf20Sopenharmony_ci wbio->bi_iter.bi_sector = wsector + 25648c2ecf20Sopenharmony_ci choose_data_offset(r10_bio, rdev); 25658c2ecf20Sopenharmony_ci bio_set_dev(wbio, rdev->bdev); 25668c2ecf20Sopenharmony_ci bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); 25678c2ecf20Sopenharmony_ci 25688c2ecf20Sopenharmony_ci if (submit_bio_wait(wbio) < 0) 25698c2ecf20Sopenharmony_ci /* Failure! */ 25708c2ecf20Sopenharmony_ci ok = rdev_set_badblocks(rdev, wsector, 25718c2ecf20Sopenharmony_ci sectors, 0) 25728c2ecf20Sopenharmony_ci && ok; 25738c2ecf20Sopenharmony_ci 25748c2ecf20Sopenharmony_ci bio_put(wbio); 25758c2ecf20Sopenharmony_ci sect_to_write -= sectors; 25768c2ecf20Sopenharmony_ci sector += sectors; 25778c2ecf20Sopenharmony_ci sectors = block_sectors; 25788c2ecf20Sopenharmony_ci } 25798c2ecf20Sopenharmony_ci return ok; 25808c2ecf20Sopenharmony_ci} 25818c2ecf20Sopenharmony_ci 25828c2ecf20Sopenharmony_cistatic void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) 25838c2ecf20Sopenharmony_ci{ 25848c2ecf20Sopenharmony_ci int slot = r10_bio->read_slot; 25858c2ecf20Sopenharmony_ci struct bio *bio; 25868c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 25878c2ecf20Sopenharmony_ci struct md_rdev *rdev = r10_bio->devs[slot].rdev; 25888c2ecf20Sopenharmony_ci 25898c2ecf20Sopenharmony_ci /* we got a read error. Maybe the drive is bad. Maybe just 25908c2ecf20Sopenharmony_ci * the block and we can fix it. 25918c2ecf20Sopenharmony_ci * We freeze all other IO, and try reading the block from 25928c2ecf20Sopenharmony_ci * other devices. When we find one, we re-write 25938c2ecf20Sopenharmony_ci * and check it that fixes the read error. 25948c2ecf20Sopenharmony_ci * This is all done synchronously while the array is 25958c2ecf20Sopenharmony_ci * frozen. 25968c2ecf20Sopenharmony_ci */ 25978c2ecf20Sopenharmony_ci bio = r10_bio->devs[slot].bio; 25988c2ecf20Sopenharmony_ci bio_put(bio); 25998c2ecf20Sopenharmony_ci r10_bio->devs[slot].bio = NULL; 26008c2ecf20Sopenharmony_ci 26018c2ecf20Sopenharmony_ci if (mddev->ro) 26028c2ecf20Sopenharmony_ci r10_bio->devs[slot].bio = IO_BLOCKED; 26038c2ecf20Sopenharmony_ci else if (!test_bit(FailFast, &rdev->flags)) { 26048c2ecf20Sopenharmony_ci freeze_array(conf, 1); 26058c2ecf20Sopenharmony_ci fix_read_error(conf, mddev, r10_bio); 26068c2ecf20Sopenharmony_ci unfreeze_array(conf); 26078c2ecf20Sopenharmony_ci } else 26088c2ecf20Sopenharmony_ci md_error(mddev, rdev); 26098c2ecf20Sopenharmony_ci 26108c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, mddev); 26118c2ecf20Sopenharmony_ci allow_barrier(conf); 26128c2ecf20Sopenharmony_ci r10_bio->state = 0; 26138c2ecf20Sopenharmony_ci raid10_read_request(mddev, r10_bio->master_bio, r10_bio); 26148c2ecf20Sopenharmony_ci} 26158c2ecf20Sopenharmony_ci 26168c2ecf20Sopenharmony_cistatic void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) 26178c2ecf20Sopenharmony_ci{ 26188c2ecf20Sopenharmony_ci /* Some sort of write request has finished and it 26198c2ecf20Sopenharmony_ci * succeeded in writing where we thought there was a 26208c2ecf20Sopenharmony_ci * bad block. So forget the bad block. 26218c2ecf20Sopenharmony_ci * Or possibly if failed and we need to record 26228c2ecf20Sopenharmony_ci * a bad block. 26238c2ecf20Sopenharmony_ci */ 26248c2ecf20Sopenharmony_ci int m; 26258c2ecf20Sopenharmony_ci struct md_rdev *rdev; 26268c2ecf20Sopenharmony_ci 26278c2ecf20Sopenharmony_ci if (test_bit(R10BIO_IsSync, &r10_bio->state) || 26288c2ecf20Sopenharmony_ci test_bit(R10BIO_IsRecover, &r10_bio->state)) { 26298c2ecf20Sopenharmony_ci for (m = 0; m < conf->copies; m++) { 26308c2ecf20Sopenharmony_ci int dev = r10_bio->devs[m].devnum; 26318c2ecf20Sopenharmony_ci rdev = conf->mirrors[dev].rdev; 26328c2ecf20Sopenharmony_ci if (r10_bio->devs[m].bio == NULL || 26338c2ecf20Sopenharmony_ci r10_bio->devs[m].bio->bi_end_io == NULL) 26348c2ecf20Sopenharmony_ci continue; 26358c2ecf20Sopenharmony_ci if (!r10_bio->devs[m].bio->bi_status) { 26368c2ecf20Sopenharmony_ci rdev_clear_badblocks( 26378c2ecf20Sopenharmony_ci rdev, 26388c2ecf20Sopenharmony_ci r10_bio->devs[m].addr, 26398c2ecf20Sopenharmony_ci r10_bio->sectors, 0); 26408c2ecf20Sopenharmony_ci } else { 26418c2ecf20Sopenharmony_ci if (!rdev_set_badblocks( 26428c2ecf20Sopenharmony_ci rdev, 26438c2ecf20Sopenharmony_ci r10_bio->devs[m].addr, 26448c2ecf20Sopenharmony_ci r10_bio->sectors, 0)) 26458c2ecf20Sopenharmony_ci md_error(conf->mddev, rdev); 26468c2ecf20Sopenharmony_ci } 26478c2ecf20Sopenharmony_ci rdev = conf->mirrors[dev].replacement; 26488c2ecf20Sopenharmony_ci if (r10_bio->devs[m].repl_bio == NULL || 26498c2ecf20Sopenharmony_ci r10_bio->devs[m].repl_bio->bi_end_io == NULL) 26508c2ecf20Sopenharmony_ci continue; 26518c2ecf20Sopenharmony_ci 26528c2ecf20Sopenharmony_ci if (!r10_bio->devs[m].repl_bio->bi_status) { 26538c2ecf20Sopenharmony_ci rdev_clear_badblocks( 26548c2ecf20Sopenharmony_ci rdev, 26558c2ecf20Sopenharmony_ci r10_bio->devs[m].addr, 26568c2ecf20Sopenharmony_ci r10_bio->sectors, 0); 26578c2ecf20Sopenharmony_ci } else { 26588c2ecf20Sopenharmony_ci if (!rdev_set_badblocks( 26598c2ecf20Sopenharmony_ci rdev, 26608c2ecf20Sopenharmony_ci r10_bio->devs[m].addr, 26618c2ecf20Sopenharmony_ci r10_bio->sectors, 0)) 26628c2ecf20Sopenharmony_ci md_error(conf->mddev, rdev); 26638c2ecf20Sopenharmony_ci } 26648c2ecf20Sopenharmony_ci } 26658c2ecf20Sopenharmony_ci put_buf(r10_bio); 26668c2ecf20Sopenharmony_ci } else { 26678c2ecf20Sopenharmony_ci bool fail = false; 26688c2ecf20Sopenharmony_ci for (m = 0; m < conf->copies; m++) { 26698c2ecf20Sopenharmony_ci int dev = r10_bio->devs[m].devnum; 26708c2ecf20Sopenharmony_ci struct bio *bio = r10_bio->devs[m].bio; 26718c2ecf20Sopenharmony_ci rdev = conf->mirrors[dev].rdev; 26728c2ecf20Sopenharmony_ci if (bio == IO_MADE_GOOD) { 26738c2ecf20Sopenharmony_ci rdev_clear_badblocks( 26748c2ecf20Sopenharmony_ci rdev, 26758c2ecf20Sopenharmony_ci r10_bio->devs[m].addr, 26768c2ecf20Sopenharmony_ci r10_bio->sectors, 0); 26778c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 26788c2ecf20Sopenharmony_ci } else if (bio != NULL && bio->bi_status) { 26798c2ecf20Sopenharmony_ci fail = true; 26808c2ecf20Sopenharmony_ci if (!narrow_write_error(r10_bio, m)) { 26818c2ecf20Sopenharmony_ci md_error(conf->mddev, rdev); 26828c2ecf20Sopenharmony_ci set_bit(R10BIO_Degraded, 26838c2ecf20Sopenharmony_ci &r10_bio->state); 26848c2ecf20Sopenharmony_ci } 26858c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 26868c2ecf20Sopenharmony_ci } 26878c2ecf20Sopenharmony_ci bio = r10_bio->devs[m].repl_bio; 26888c2ecf20Sopenharmony_ci rdev = conf->mirrors[dev].replacement; 26898c2ecf20Sopenharmony_ci if (rdev && bio == IO_MADE_GOOD) { 26908c2ecf20Sopenharmony_ci rdev_clear_badblocks( 26918c2ecf20Sopenharmony_ci rdev, 26928c2ecf20Sopenharmony_ci r10_bio->devs[m].addr, 26938c2ecf20Sopenharmony_ci r10_bio->sectors, 0); 26948c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 26958c2ecf20Sopenharmony_ci } 26968c2ecf20Sopenharmony_ci } 26978c2ecf20Sopenharmony_ci if (fail) { 26988c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 26998c2ecf20Sopenharmony_ci list_add(&r10_bio->retry_list, &conf->bio_end_io_list); 27008c2ecf20Sopenharmony_ci conf->nr_queued++; 27018c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 27028c2ecf20Sopenharmony_ci /* 27038c2ecf20Sopenharmony_ci * In case freeze_array() is waiting for condition 27048c2ecf20Sopenharmony_ci * nr_pending == nr_queued + extra to be true. 27058c2ecf20Sopenharmony_ci */ 27068c2ecf20Sopenharmony_ci wake_up(&conf->wait_barrier); 27078c2ecf20Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 27088c2ecf20Sopenharmony_ci } else { 27098c2ecf20Sopenharmony_ci if (test_bit(R10BIO_WriteError, 27108c2ecf20Sopenharmony_ci &r10_bio->state)) 27118c2ecf20Sopenharmony_ci close_write(r10_bio); 27128c2ecf20Sopenharmony_ci raid_end_bio_io(r10_bio); 27138c2ecf20Sopenharmony_ci } 27148c2ecf20Sopenharmony_ci } 27158c2ecf20Sopenharmony_ci} 27168c2ecf20Sopenharmony_ci 27178c2ecf20Sopenharmony_cistatic void raid10d(struct md_thread *thread) 27188c2ecf20Sopenharmony_ci{ 27198c2ecf20Sopenharmony_ci struct mddev *mddev = thread->mddev; 27208c2ecf20Sopenharmony_ci struct r10bio *r10_bio; 27218c2ecf20Sopenharmony_ci unsigned long flags; 27228c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 27238c2ecf20Sopenharmony_ci struct list_head *head = &conf->retry_list; 27248c2ecf20Sopenharmony_ci struct blk_plug plug; 27258c2ecf20Sopenharmony_ci 27268c2ecf20Sopenharmony_ci md_check_recovery(mddev); 27278c2ecf20Sopenharmony_ci 27288c2ecf20Sopenharmony_ci if (!list_empty_careful(&conf->bio_end_io_list) && 27298c2ecf20Sopenharmony_ci !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { 27308c2ecf20Sopenharmony_ci LIST_HEAD(tmp); 27318c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 27328c2ecf20Sopenharmony_ci if (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { 27338c2ecf20Sopenharmony_ci while (!list_empty(&conf->bio_end_io_list)) { 27348c2ecf20Sopenharmony_ci list_move(conf->bio_end_io_list.prev, &tmp); 27358c2ecf20Sopenharmony_ci conf->nr_queued--; 27368c2ecf20Sopenharmony_ci } 27378c2ecf20Sopenharmony_ci } 27388c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 27398c2ecf20Sopenharmony_ci while (!list_empty(&tmp)) { 27408c2ecf20Sopenharmony_ci r10_bio = list_first_entry(&tmp, struct r10bio, 27418c2ecf20Sopenharmony_ci retry_list); 27428c2ecf20Sopenharmony_ci list_del(&r10_bio->retry_list); 27438c2ecf20Sopenharmony_ci if (mddev->degraded) 27448c2ecf20Sopenharmony_ci set_bit(R10BIO_Degraded, &r10_bio->state); 27458c2ecf20Sopenharmony_ci 27468c2ecf20Sopenharmony_ci if (test_bit(R10BIO_WriteError, 27478c2ecf20Sopenharmony_ci &r10_bio->state)) 27488c2ecf20Sopenharmony_ci close_write(r10_bio); 27498c2ecf20Sopenharmony_ci raid_end_bio_io(r10_bio); 27508c2ecf20Sopenharmony_ci } 27518c2ecf20Sopenharmony_ci } 27528c2ecf20Sopenharmony_ci 27538c2ecf20Sopenharmony_ci blk_start_plug(&plug); 27548c2ecf20Sopenharmony_ci for (;;) { 27558c2ecf20Sopenharmony_ci 27568c2ecf20Sopenharmony_ci flush_pending_writes(conf); 27578c2ecf20Sopenharmony_ci 27588c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 27598c2ecf20Sopenharmony_ci if (list_empty(head)) { 27608c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 27618c2ecf20Sopenharmony_ci break; 27628c2ecf20Sopenharmony_ci } 27638c2ecf20Sopenharmony_ci r10_bio = list_entry(head->prev, struct r10bio, retry_list); 27648c2ecf20Sopenharmony_ci list_del(head->prev); 27658c2ecf20Sopenharmony_ci conf->nr_queued--; 27668c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 27678c2ecf20Sopenharmony_ci 27688c2ecf20Sopenharmony_ci mddev = r10_bio->mddev; 27698c2ecf20Sopenharmony_ci conf = mddev->private; 27708c2ecf20Sopenharmony_ci if (test_bit(R10BIO_MadeGood, &r10_bio->state) || 27718c2ecf20Sopenharmony_ci test_bit(R10BIO_WriteError, &r10_bio->state)) 27728c2ecf20Sopenharmony_ci handle_write_completed(conf, r10_bio); 27738c2ecf20Sopenharmony_ci else if (test_bit(R10BIO_IsReshape, &r10_bio->state)) 27748c2ecf20Sopenharmony_ci reshape_request_write(mddev, r10_bio); 27758c2ecf20Sopenharmony_ci else if (test_bit(R10BIO_IsSync, &r10_bio->state)) 27768c2ecf20Sopenharmony_ci sync_request_write(mddev, r10_bio); 27778c2ecf20Sopenharmony_ci else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) 27788c2ecf20Sopenharmony_ci recovery_request_write(mddev, r10_bio); 27798c2ecf20Sopenharmony_ci else if (test_bit(R10BIO_ReadError, &r10_bio->state)) 27808c2ecf20Sopenharmony_ci handle_read_error(mddev, r10_bio); 27818c2ecf20Sopenharmony_ci else 27828c2ecf20Sopenharmony_ci WARN_ON_ONCE(1); 27838c2ecf20Sopenharmony_ci 27848c2ecf20Sopenharmony_ci cond_resched(); 27858c2ecf20Sopenharmony_ci if (mddev->sb_flags & ~(1<<MD_SB_CHANGE_PENDING)) 27868c2ecf20Sopenharmony_ci md_check_recovery(mddev); 27878c2ecf20Sopenharmony_ci } 27888c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 27898c2ecf20Sopenharmony_ci} 27908c2ecf20Sopenharmony_ci 27918c2ecf20Sopenharmony_cistatic int init_resync(struct r10conf *conf) 27928c2ecf20Sopenharmony_ci{ 27938c2ecf20Sopenharmony_ci int ret, buffs, i; 27948c2ecf20Sopenharmony_ci 27958c2ecf20Sopenharmony_ci buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; 27968c2ecf20Sopenharmony_ci BUG_ON(mempool_initialized(&conf->r10buf_pool)); 27978c2ecf20Sopenharmony_ci conf->have_replacement = 0; 27988c2ecf20Sopenharmony_ci for (i = 0; i < conf->geo.raid_disks; i++) 27998c2ecf20Sopenharmony_ci if (conf->mirrors[i].replacement) 28008c2ecf20Sopenharmony_ci conf->have_replacement = 1; 28018c2ecf20Sopenharmony_ci ret = mempool_init(&conf->r10buf_pool, buffs, 28028c2ecf20Sopenharmony_ci r10buf_pool_alloc, r10buf_pool_free, conf); 28038c2ecf20Sopenharmony_ci if (ret) 28048c2ecf20Sopenharmony_ci return ret; 28058c2ecf20Sopenharmony_ci conf->next_resync = 0; 28068c2ecf20Sopenharmony_ci return 0; 28078c2ecf20Sopenharmony_ci} 28088c2ecf20Sopenharmony_ci 28098c2ecf20Sopenharmony_cistatic struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf) 28108c2ecf20Sopenharmony_ci{ 28118c2ecf20Sopenharmony_ci struct r10bio *r10bio = mempool_alloc(&conf->r10buf_pool, GFP_NOIO); 28128c2ecf20Sopenharmony_ci struct rsync_pages *rp; 28138c2ecf20Sopenharmony_ci struct bio *bio; 28148c2ecf20Sopenharmony_ci int nalloc; 28158c2ecf20Sopenharmony_ci int i; 28168c2ecf20Sopenharmony_ci 28178c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery) || 28188c2ecf20Sopenharmony_ci test_bit(MD_RECOVERY_RESHAPE, &conf->mddev->recovery)) 28198c2ecf20Sopenharmony_ci nalloc = conf->copies; /* resync */ 28208c2ecf20Sopenharmony_ci else 28218c2ecf20Sopenharmony_ci nalloc = 2; /* recovery */ 28228c2ecf20Sopenharmony_ci 28238c2ecf20Sopenharmony_ci for (i = 0; i < nalloc; i++) { 28248c2ecf20Sopenharmony_ci bio = r10bio->devs[i].bio; 28258c2ecf20Sopenharmony_ci rp = bio->bi_private; 28268c2ecf20Sopenharmony_ci bio_reset(bio); 28278c2ecf20Sopenharmony_ci bio->bi_private = rp; 28288c2ecf20Sopenharmony_ci bio = r10bio->devs[i].repl_bio; 28298c2ecf20Sopenharmony_ci if (bio) { 28308c2ecf20Sopenharmony_ci rp = bio->bi_private; 28318c2ecf20Sopenharmony_ci bio_reset(bio); 28328c2ecf20Sopenharmony_ci bio->bi_private = rp; 28338c2ecf20Sopenharmony_ci } 28348c2ecf20Sopenharmony_ci } 28358c2ecf20Sopenharmony_ci return r10bio; 28368c2ecf20Sopenharmony_ci} 28378c2ecf20Sopenharmony_ci 28388c2ecf20Sopenharmony_ci/* 28398c2ecf20Sopenharmony_ci * Set cluster_sync_high since we need other nodes to add the 28408c2ecf20Sopenharmony_ci * range [cluster_sync_low, cluster_sync_high] to suspend list. 28418c2ecf20Sopenharmony_ci */ 28428c2ecf20Sopenharmony_cistatic void raid10_set_cluster_sync_high(struct r10conf *conf) 28438c2ecf20Sopenharmony_ci{ 28448c2ecf20Sopenharmony_ci sector_t window_size; 28458c2ecf20Sopenharmony_ci int extra_chunk, chunks; 28468c2ecf20Sopenharmony_ci 28478c2ecf20Sopenharmony_ci /* 28488c2ecf20Sopenharmony_ci * First, here we define "stripe" as a unit which across 28498c2ecf20Sopenharmony_ci * all member devices one time, so we get chunks by use 28508c2ecf20Sopenharmony_ci * raid_disks / near_copies. Otherwise, if near_copies is 28518c2ecf20Sopenharmony_ci * close to raid_disks, then resync window could increases 28528c2ecf20Sopenharmony_ci * linearly with the increase of raid_disks, which means 28538c2ecf20Sopenharmony_ci * we will suspend a really large IO window while it is not 28548c2ecf20Sopenharmony_ci * necessary. If raid_disks is not divisible by near_copies, 28558c2ecf20Sopenharmony_ci * an extra chunk is needed to ensure the whole "stripe" is 28568c2ecf20Sopenharmony_ci * covered. 28578c2ecf20Sopenharmony_ci */ 28588c2ecf20Sopenharmony_ci 28598c2ecf20Sopenharmony_ci chunks = conf->geo.raid_disks / conf->geo.near_copies; 28608c2ecf20Sopenharmony_ci if (conf->geo.raid_disks % conf->geo.near_copies == 0) 28618c2ecf20Sopenharmony_ci extra_chunk = 0; 28628c2ecf20Sopenharmony_ci else 28638c2ecf20Sopenharmony_ci extra_chunk = 1; 28648c2ecf20Sopenharmony_ci window_size = (chunks + extra_chunk) * conf->mddev->chunk_sectors; 28658c2ecf20Sopenharmony_ci 28668c2ecf20Sopenharmony_ci /* 28678c2ecf20Sopenharmony_ci * At least use a 32M window to align with raid1's resync window 28688c2ecf20Sopenharmony_ci */ 28698c2ecf20Sopenharmony_ci window_size = (CLUSTER_RESYNC_WINDOW_SECTORS > window_size) ? 28708c2ecf20Sopenharmony_ci CLUSTER_RESYNC_WINDOW_SECTORS : window_size; 28718c2ecf20Sopenharmony_ci 28728c2ecf20Sopenharmony_ci conf->cluster_sync_high = conf->cluster_sync_low + window_size; 28738c2ecf20Sopenharmony_ci} 28748c2ecf20Sopenharmony_ci 28758c2ecf20Sopenharmony_ci/* 28768c2ecf20Sopenharmony_ci * perform a "sync" on one "block" 28778c2ecf20Sopenharmony_ci * 28788c2ecf20Sopenharmony_ci * We need to make sure that no normal I/O request - particularly write 28798c2ecf20Sopenharmony_ci * requests - conflict with active sync requests. 28808c2ecf20Sopenharmony_ci * 28818c2ecf20Sopenharmony_ci * This is achieved by tracking pending requests and a 'barrier' concept 28828c2ecf20Sopenharmony_ci * that can be installed to exclude normal IO requests. 28838c2ecf20Sopenharmony_ci * 28848c2ecf20Sopenharmony_ci * Resync and recovery are handled very differently. 28858c2ecf20Sopenharmony_ci * We differentiate by looking at MD_RECOVERY_SYNC in mddev->recovery. 28868c2ecf20Sopenharmony_ci * 28878c2ecf20Sopenharmony_ci * For resync, we iterate over virtual addresses, read all copies, 28888c2ecf20Sopenharmony_ci * and update if there are differences. If only one copy is live, 28898c2ecf20Sopenharmony_ci * skip it. 28908c2ecf20Sopenharmony_ci * For recovery, we iterate over physical addresses, read a good 28918c2ecf20Sopenharmony_ci * value for each non-in_sync drive, and over-write. 28928c2ecf20Sopenharmony_ci * 28938c2ecf20Sopenharmony_ci * So, for recovery we may have several outstanding complex requests for a 28948c2ecf20Sopenharmony_ci * given address, one for each out-of-sync device. We model this by allocating 28958c2ecf20Sopenharmony_ci * a number of r10_bio structures, one for each out-of-sync device. 28968c2ecf20Sopenharmony_ci * As we setup these structures, we collect all bio's together into a list 28978c2ecf20Sopenharmony_ci * which we then process collectively to add pages, and then process again 28988c2ecf20Sopenharmony_ci * to pass to submit_bio_noacct. 28998c2ecf20Sopenharmony_ci * 29008c2ecf20Sopenharmony_ci * The r10_bio structures are linked using a borrowed master_bio pointer. 29018c2ecf20Sopenharmony_ci * This link is counted in ->remaining. When the r10_bio that points to NULL 29028c2ecf20Sopenharmony_ci * has its remaining count decremented to 0, the whole complex operation 29038c2ecf20Sopenharmony_ci * is complete. 29048c2ecf20Sopenharmony_ci * 29058c2ecf20Sopenharmony_ci */ 29068c2ecf20Sopenharmony_ci 29078c2ecf20Sopenharmony_cistatic sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, 29088c2ecf20Sopenharmony_ci int *skipped) 29098c2ecf20Sopenharmony_ci{ 29108c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 29118c2ecf20Sopenharmony_ci struct r10bio *r10_bio; 29128c2ecf20Sopenharmony_ci struct bio *biolist = NULL, *bio; 29138c2ecf20Sopenharmony_ci sector_t max_sector, nr_sectors; 29148c2ecf20Sopenharmony_ci int i; 29158c2ecf20Sopenharmony_ci int max_sync; 29168c2ecf20Sopenharmony_ci sector_t sync_blocks; 29178c2ecf20Sopenharmony_ci sector_t sectors_skipped = 0; 29188c2ecf20Sopenharmony_ci int chunks_skipped = 0; 29198c2ecf20Sopenharmony_ci sector_t chunk_mask = conf->geo.chunk_mask; 29208c2ecf20Sopenharmony_ci int page_idx = 0; 29218c2ecf20Sopenharmony_ci 29228c2ecf20Sopenharmony_ci /* 29238c2ecf20Sopenharmony_ci * Allow skipping a full rebuild for incremental assembly 29248c2ecf20Sopenharmony_ci * of a clean array, like RAID1 does. 29258c2ecf20Sopenharmony_ci */ 29268c2ecf20Sopenharmony_ci if (mddev->bitmap == NULL && 29278c2ecf20Sopenharmony_ci mddev->recovery_cp == MaxSector && 29288c2ecf20Sopenharmony_ci mddev->reshape_position == MaxSector && 29298c2ecf20Sopenharmony_ci !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && 29308c2ecf20Sopenharmony_ci !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && 29318c2ecf20Sopenharmony_ci !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 29328c2ecf20Sopenharmony_ci conf->fullsync == 0) { 29338c2ecf20Sopenharmony_ci *skipped = 1; 29348c2ecf20Sopenharmony_ci return mddev->dev_sectors - sector_nr; 29358c2ecf20Sopenharmony_ci } 29368c2ecf20Sopenharmony_ci 29378c2ecf20Sopenharmony_ci if (!mempool_initialized(&conf->r10buf_pool)) 29388c2ecf20Sopenharmony_ci if (init_resync(conf)) 29398c2ecf20Sopenharmony_ci return 0; 29408c2ecf20Sopenharmony_ci 29418c2ecf20Sopenharmony_ci skipped: 29428c2ecf20Sopenharmony_ci max_sector = mddev->dev_sectors; 29438c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || 29448c2ecf20Sopenharmony_ci test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 29458c2ecf20Sopenharmony_ci max_sector = mddev->resync_max_sectors; 29468c2ecf20Sopenharmony_ci if (sector_nr >= max_sector) { 29478c2ecf20Sopenharmony_ci conf->cluster_sync_low = 0; 29488c2ecf20Sopenharmony_ci conf->cluster_sync_high = 0; 29498c2ecf20Sopenharmony_ci 29508c2ecf20Sopenharmony_ci /* If we aborted, we need to abort the 29518c2ecf20Sopenharmony_ci * sync on the 'current' bitmap chucks (there can 29528c2ecf20Sopenharmony_ci * be several when recovering multiple devices). 29538c2ecf20Sopenharmony_ci * as we may have started syncing it but not finished. 29548c2ecf20Sopenharmony_ci * We can find the current address in 29558c2ecf20Sopenharmony_ci * mddev->curr_resync, but for recovery, 29568c2ecf20Sopenharmony_ci * we need to convert that to several 29578c2ecf20Sopenharmony_ci * virtual addresses. 29588c2ecf20Sopenharmony_ci */ 29598c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { 29608c2ecf20Sopenharmony_ci end_reshape(conf); 29618c2ecf20Sopenharmony_ci close_sync(conf); 29628c2ecf20Sopenharmony_ci return 0; 29638c2ecf20Sopenharmony_ci } 29648c2ecf20Sopenharmony_ci 29658c2ecf20Sopenharmony_ci if (mddev->curr_resync < max_sector) { /* aborted */ 29668c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) 29678c2ecf20Sopenharmony_ci md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, 29688c2ecf20Sopenharmony_ci &sync_blocks, 1); 29698c2ecf20Sopenharmony_ci else for (i = 0; i < conf->geo.raid_disks; i++) { 29708c2ecf20Sopenharmony_ci sector_t sect = 29718c2ecf20Sopenharmony_ci raid10_find_virt(conf, mddev->curr_resync, i); 29728c2ecf20Sopenharmony_ci md_bitmap_end_sync(mddev->bitmap, sect, 29738c2ecf20Sopenharmony_ci &sync_blocks, 1); 29748c2ecf20Sopenharmony_ci } 29758c2ecf20Sopenharmony_ci } else { 29768c2ecf20Sopenharmony_ci /* completed sync */ 29778c2ecf20Sopenharmony_ci if ((!mddev->bitmap || conf->fullsync) 29788c2ecf20Sopenharmony_ci && conf->have_replacement 29798c2ecf20Sopenharmony_ci && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 29808c2ecf20Sopenharmony_ci /* Completed a full sync so the replacements 29818c2ecf20Sopenharmony_ci * are now fully recovered. 29828c2ecf20Sopenharmony_ci */ 29838c2ecf20Sopenharmony_ci rcu_read_lock(); 29848c2ecf20Sopenharmony_ci for (i = 0; i < conf->geo.raid_disks; i++) { 29858c2ecf20Sopenharmony_ci struct md_rdev *rdev = 29868c2ecf20Sopenharmony_ci rcu_dereference(conf->mirrors[i].replacement); 29878c2ecf20Sopenharmony_ci if (rdev) 29888c2ecf20Sopenharmony_ci rdev->recovery_offset = MaxSector; 29898c2ecf20Sopenharmony_ci } 29908c2ecf20Sopenharmony_ci rcu_read_unlock(); 29918c2ecf20Sopenharmony_ci } 29928c2ecf20Sopenharmony_ci conf->fullsync = 0; 29938c2ecf20Sopenharmony_ci } 29948c2ecf20Sopenharmony_ci md_bitmap_close_sync(mddev->bitmap); 29958c2ecf20Sopenharmony_ci close_sync(conf); 29968c2ecf20Sopenharmony_ci *skipped = 1; 29978c2ecf20Sopenharmony_ci return sectors_skipped; 29988c2ecf20Sopenharmony_ci } 29998c2ecf20Sopenharmony_ci 30008c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 30018c2ecf20Sopenharmony_ci return reshape_request(mddev, sector_nr, skipped); 30028c2ecf20Sopenharmony_ci 30038c2ecf20Sopenharmony_ci if (chunks_skipped >= conf->geo.raid_disks) { 30048c2ecf20Sopenharmony_ci /* if there has been nothing to do on any drive, 30058c2ecf20Sopenharmony_ci * then there is nothing to do at all.. 30068c2ecf20Sopenharmony_ci */ 30078c2ecf20Sopenharmony_ci *skipped = 1; 30088c2ecf20Sopenharmony_ci return (max_sector - sector_nr) + sectors_skipped; 30098c2ecf20Sopenharmony_ci } 30108c2ecf20Sopenharmony_ci 30118c2ecf20Sopenharmony_ci if (max_sector > mddev->resync_max) 30128c2ecf20Sopenharmony_ci max_sector = mddev->resync_max; /* Don't do IO beyond here */ 30138c2ecf20Sopenharmony_ci 30148c2ecf20Sopenharmony_ci /* make sure whole request will fit in a chunk - if chunks 30158c2ecf20Sopenharmony_ci * are meaningful 30168c2ecf20Sopenharmony_ci */ 30178c2ecf20Sopenharmony_ci if (conf->geo.near_copies < conf->geo.raid_disks && 30188c2ecf20Sopenharmony_ci max_sector > (sector_nr | chunk_mask)) 30198c2ecf20Sopenharmony_ci max_sector = (sector_nr | chunk_mask) + 1; 30208c2ecf20Sopenharmony_ci 30218c2ecf20Sopenharmony_ci /* 30228c2ecf20Sopenharmony_ci * If there is non-resync activity waiting for a turn, then let it 30238c2ecf20Sopenharmony_ci * though before starting on this new sync request. 30248c2ecf20Sopenharmony_ci */ 30258c2ecf20Sopenharmony_ci if (conf->nr_waiting) 30268c2ecf20Sopenharmony_ci schedule_timeout_uninterruptible(1); 30278c2ecf20Sopenharmony_ci 30288c2ecf20Sopenharmony_ci /* Again, very different code for resync and recovery. 30298c2ecf20Sopenharmony_ci * Both must result in an r10bio with a list of bios that 30308c2ecf20Sopenharmony_ci * have bi_end_io, bi_sector, bi_disk set, 30318c2ecf20Sopenharmony_ci * and bi_private set to the r10bio. 30328c2ecf20Sopenharmony_ci * For recovery, we may actually create several r10bios 30338c2ecf20Sopenharmony_ci * with 2 bios in each, that correspond to the bios in the main one. 30348c2ecf20Sopenharmony_ci * In this case, the subordinate r10bios link back through a 30358c2ecf20Sopenharmony_ci * borrowed master_bio pointer, and the counter in the master 30368c2ecf20Sopenharmony_ci * includes a ref from each subordinate. 30378c2ecf20Sopenharmony_ci */ 30388c2ecf20Sopenharmony_ci /* First, we decide what to do and set ->bi_end_io 30398c2ecf20Sopenharmony_ci * To end_sync_read if we want to read, and 30408c2ecf20Sopenharmony_ci * end_sync_write if we will want to write. 30418c2ecf20Sopenharmony_ci */ 30428c2ecf20Sopenharmony_ci 30438c2ecf20Sopenharmony_ci max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); 30448c2ecf20Sopenharmony_ci if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 30458c2ecf20Sopenharmony_ci /* recovery... the complicated one */ 30468c2ecf20Sopenharmony_ci int j; 30478c2ecf20Sopenharmony_ci r10_bio = NULL; 30488c2ecf20Sopenharmony_ci 30498c2ecf20Sopenharmony_ci for (i = 0 ; i < conf->geo.raid_disks; i++) { 30508c2ecf20Sopenharmony_ci int still_degraded; 30518c2ecf20Sopenharmony_ci struct r10bio *rb2; 30528c2ecf20Sopenharmony_ci sector_t sect; 30538c2ecf20Sopenharmony_ci int must_sync; 30548c2ecf20Sopenharmony_ci int any_working; 30558c2ecf20Sopenharmony_ci int need_recover = 0; 30568c2ecf20Sopenharmony_ci struct raid10_info *mirror = &conf->mirrors[i]; 30578c2ecf20Sopenharmony_ci struct md_rdev *mrdev, *mreplace; 30588c2ecf20Sopenharmony_ci 30598c2ecf20Sopenharmony_ci rcu_read_lock(); 30608c2ecf20Sopenharmony_ci mrdev = rcu_dereference(mirror->rdev); 30618c2ecf20Sopenharmony_ci mreplace = rcu_dereference(mirror->replacement); 30628c2ecf20Sopenharmony_ci 30638c2ecf20Sopenharmony_ci if (mrdev != NULL && 30648c2ecf20Sopenharmony_ci !test_bit(Faulty, &mrdev->flags) && 30658c2ecf20Sopenharmony_ci !test_bit(In_sync, &mrdev->flags)) 30668c2ecf20Sopenharmony_ci need_recover = 1; 30678c2ecf20Sopenharmony_ci if (mreplace && test_bit(Faulty, &mreplace->flags)) 30688c2ecf20Sopenharmony_ci mreplace = NULL; 30698c2ecf20Sopenharmony_ci 30708c2ecf20Sopenharmony_ci if (!need_recover && !mreplace) { 30718c2ecf20Sopenharmony_ci rcu_read_unlock(); 30728c2ecf20Sopenharmony_ci continue; 30738c2ecf20Sopenharmony_ci } 30748c2ecf20Sopenharmony_ci 30758c2ecf20Sopenharmony_ci still_degraded = 0; 30768c2ecf20Sopenharmony_ci /* want to reconstruct this device */ 30778c2ecf20Sopenharmony_ci rb2 = r10_bio; 30788c2ecf20Sopenharmony_ci sect = raid10_find_virt(conf, sector_nr, i); 30798c2ecf20Sopenharmony_ci if (sect >= mddev->resync_max_sectors) { 30808c2ecf20Sopenharmony_ci /* last stripe is not complete - don't 30818c2ecf20Sopenharmony_ci * try to recover this sector. 30828c2ecf20Sopenharmony_ci */ 30838c2ecf20Sopenharmony_ci rcu_read_unlock(); 30848c2ecf20Sopenharmony_ci continue; 30858c2ecf20Sopenharmony_ci } 30868c2ecf20Sopenharmony_ci /* Unless we are doing a full sync, or a replacement 30878c2ecf20Sopenharmony_ci * we only need to recover the block if it is set in 30888c2ecf20Sopenharmony_ci * the bitmap 30898c2ecf20Sopenharmony_ci */ 30908c2ecf20Sopenharmony_ci must_sync = md_bitmap_start_sync(mddev->bitmap, sect, 30918c2ecf20Sopenharmony_ci &sync_blocks, 1); 30928c2ecf20Sopenharmony_ci if (sync_blocks < max_sync) 30938c2ecf20Sopenharmony_ci max_sync = sync_blocks; 30948c2ecf20Sopenharmony_ci if (!must_sync && 30958c2ecf20Sopenharmony_ci mreplace == NULL && 30968c2ecf20Sopenharmony_ci !conf->fullsync) { 30978c2ecf20Sopenharmony_ci /* yep, skip the sync_blocks here, but don't assume 30988c2ecf20Sopenharmony_ci * that there will never be anything to do here 30998c2ecf20Sopenharmony_ci */ 31008c2ecf20Sopenharmony_ci chunks_skipped = -1; 31018c2ecf20Sopenharmony_ci rcu_read_unlock(); 31028c2ecf20Sopenharmony_ci continue; 31038c2ecf20Sopenharmony_ci } 31048c2ecf20Sopenharmony_ci atomic_inc(&mrdev->nr_pending); 31058c2ecf20Sopenharmony_ci if (mreplace) 31068c2ecf20Sopenharmony_ci atomic_inc(&mreplace->nr_pending); 31078c2ecf20Sopenharmony_ci rcu_read_unlock(); 31088c2ecf20Sopenharmony_ci 31098c2ecf20Sopenharmony_ci r10_bio = raid10_alloc_init_r10buf(conf); 31108c2ecf20Sopenharmony_ci r10_bio->state = 0; 31118c2ecf20Sopenharmony_ci raise_barrier(conf, rb2 != NULL); 31128c2ecf20Sopenharmony_ci atomic_set(&r10_bio->remaining, 0); 31138c2ecf20Sopenharmony_ci 31148c2ecf20Sopenharmony_ci r10_bio->master_bio = (struct bio*)rb2; 31158c2ecf20Sopenharmony_ci if (rb2) 31168c2ecf20Sopenharmony_ci atomic_inc(&rb2->remaining); 31178c2ecf20Sopenharmony_ci r10_bio->mddev = mddev; 31188c2ecf20Sopenharmony_ci set_bit(R10BIO_IsRecover, &r10_bio->state); 31198c2ecf20Sopenharmony_ci r10_bio->sector = sect; 31208c2ecf20Sopenharmony_ci 31218c2ecf20Sopenharmony_ci raid10_find_phys(conf, r10_bio); 31228c2ecf20Sopenharmony_ci 31238c2ecf20Sopenharmony_ci /* Need to check if the array will still be 31248c2ecf20Sopenharmony_ci * degraded 31258c2ecf20Sopenharmony_ci */ 31268c2ecf20Sopenharmony_ci rcu_read_lock(); 31278c2ecf20Sopenharmony_ci for (j = 0; j < conf->geo.raid_disks; j++) { 31288c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference( 31298c2ecf20Sopenharmony_ci conf->mirrors[j].rdev); 31308c2ecf20Sopenharmony_ci if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { 31318c2ecf20Sopenharmony_ci still_degraded = 1; 31328c2ecf20Sopenharmony_ci break; 31338c2ecf20Sopenharmony_ci } 31348c2ecf20Sopenharmony_ci } 31358c2ecf20Sopenharmony_ci 31368c2ecf20Sopenharmony_ci must_sync = md_bitmap_start_sync(mddev->bitmap, sect, 31378c2ecf20Sopenharmony_ci &sync_blocks, still_degraded); 31388c2ecf20Sopenharmony_ci 31398c2ecf20Sopenharmony_ci any_working = 0; 31408c2ecf20Sopenharmony_ci for (j=0; j<conf->copies;j++) { 31418c2ecf20Sopenharmony_ci int k; 31428c2ecf20Sopenharmony_ci int d = r10_bio->devs[j].devnum; 31438c2ecf20Sopenharmony_ci sector_t from_addr, to_addr; 31448c2ecf20Sopenharmony_ci struct md_rdev *rdev = 31458c2ecf20Sopenharmony_ci rcu_dereference(conf->mirrors[d].rdev); 31468c2ecf20Sopenharmony_ci sector_t sector, first_bad; 31478c2ecf20Sopenharmony_ci int bad_sectors; 31488c2ecf20Sopenharmony_ci if (!rdev || 31498c2ecf20Sopenharmony_ci !test_bit(In_sync, &rdev->flags)) 31508c2ecf20Sopenharmony_ci continue; 31518c2ecf20Sopenharmony_ci /* This is where we read from */ 31528c2ecf20Sopenharmony_ci any_working = 1; 31538c2ecf20Sopenharmony_ci sector = r10_bio->devs[j].addr; 31548c2ecf20Sopenharmony_ci 31558c2ecf20Sopenharmony_ci if (is_badblock(rdev, sector, max_sync, 31568c2ecf20Sopenharmony_ci &first_bad, &bad_sectors)) { 31578c2ecf20Sopenharmony_ci if (first_bad > sector) 31588c2ecf20Sopenharmony_ci max_sync = first_bad - sector; 31598c2ecf20Sopenharmony_ci else { 31608c2ecf20Sopenharmony_ci bad_sectors -= (sector 31618c2ecf20Sopenharmony_ci - first_bad); 31628c2ecf20Sopenharmony_ci if (max_sync > bad_sectors) 31638c2ecf20Sopenharmony_ci max_sync = bad_sectors; 31648c2ecf20Sopenharmony_ci continue; 31658c2ecf20Sopenharmony_ci } 31668c2ecf20Sopenharmony_ci } 31678c2ecf20Sopenharmony_ci bio = r10_bio->devs[0].bio; 31688c2ecf20Sopenharmony_ci bio->bi_next = biolist; 31698c2ecf20Sopenharmony_ci biolist = bio; 31708c2ecf20Sopenharmony_ci bio->bi_end_io = end_sync_read; 31718c2ecf20Sopenharmony_ci bio_set_op_attrs(bio, REQ_OP_READ, 0); 31728c2ecf20Sopenharmony_ci if (test_bit(FailFast, &rdev->flags)) 31738c2ecf20Sopenharmony_ci bio->bi_opf |= MD_FAILFAST; 31748c2ecf20Sopenharmony_ci from_addr = r10_bio->devs[j].addr; 31758c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = from_addr + 31768c2ecf20Sopenharmony_ci rdev->data_offset; 31778c2ecf20Sopenharmony_ci bio_set_dev(bio, rdev->bdev); 31788c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 31798c2ecf20Sopenharmony_ci /* and we write to 'i' (if not in_sync) */ 31808c2ecf20Sopenharmony_ci 31818c2ecf20Sopenharmony_ci for (k=0; k<conf->copies; k++) 31828c2ecf20Sopenharmony_ci if (r10_bio->devs[k].devnum == i) 31838c2ecf20Sopenharmony_ci break; 31848c2ecf20Sopenharmony_ci BUG_ON(k == conf->copies); 31858c2ecf20Sopenharmony_ci to_addr = r10_bio->devs[k].addr; 31868c2ecf20Sopenharmony_ci r10_bio->devs[0].devnum = d; 31878c2ecf20Sopenharmony_ci r10_bio->devs[0].addr = from_addr; 31888c2ecf20Sopenharmony_ci r10_bio->devs[1].devnum = i; 31898c2ecf20Sopenharmony_ci r10_bio->devs[1].addr = to_addr; 31908c2ecf20Sopenharmony_ci 31918c2ecf20Sopenharmony_ci if (need_recover) { 31928c2ecf20Sopenharmony_ci bio = r10_bio->devs[1].bio; 31938c2ecf20Sopenharmony_ci bio->bi_next = biolist; 31948c2ecf20Sopenharmony_ci biolist = bio; 31958c2ecf20Sopenharmony_ci bio->bi_end_io = end_sync_write; 31968c2ecf20Sopenharmony_ci bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 31978c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = to_addr 31988c2ecf20Sopenharmony_ci + mrdev->data_offset; 31998c2ecf20Sopenharmony_ci bio_set_dev(bio, mrdev->bdev); 32008c2ecf20Sopenharmony_ci atomic_inc(&r10_bio->remaining); 32018c2ecf20Sopenharmony_ci } else 32028c2ecf20Sopenharmony_ci r10_bio->devs[1].bio->bi_end_io = NULL; 32038c2ecf20Sopenharmony_ci 32048c2ecf20Sopenharmony_ci /* and maybe write to replacement */ 32058c2ecf20Sopenharmony_ci bio = r10_bio->devs[1].repl_bio; 32068c2ecf20Sopenharmony_ci if (bio) 32078c2ecf20Sopenharmony_ci bio->bi_end_io = NULL; 32088c2ecf20Sopenharmony_ci /* Note: if replace is not NULL, then bio 32098c2ecf20Sopenharmony_ci * cannot be NULL as r10buf_pool_alloc will 32108c2ecf20Sopenharmony_ci * have allocated it. 32118c2ecf20Sopenharmony_ci */ 32128c2ecf20Sopenharmony_ci if (!mreplace) 32138c2ecf20Sopenharmony_ci break; 32148c2ecf20Sopenharmony_ci bio->bi_next = biolist; 32158c2ecf20Sopenharmony_ci biolist = bio; 32168c2ecf20Sopenharmony_ci bio->bi_end_io = end_sync_write; 32178c2ecf20Sopenharmony_ci bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 32188c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = to_addr + 32198c2ecf20Sopenharmony_ci mreplace->data_offset; 32208c2ecf20Sopenharmony_ci bio_set_dev(bio, mreplace->bdev); 32218c2ecf20Sopenharmony_ci atomic_inc(&r10_bio->remaining); 32228c2ecf20Sopenharmony_ci break; 32238c2ecf20Sopenharmony_ci } 32248c2ecf20Sopenharmony_ci rcu_read_unlock(); 32258c2ecf20Sopenharmony_ci if (j == conf->copies) { 32268c2ecf20Sopenharmony_ci /* Cannot recover, so abort the recovery or 32278c2ecf20Sopenharmony_ci * record a bad block */ 32288c2ecf20Sopenharmony_ci if (any_working) { 32298c2ecf20Sopenharmony_ci /* problem is that there are bad blocks 32308c2ecf20Sopenharmony_ci * on other device(s) 32318c2ecf20Sopenharmony_ci */ 32328c2ecf20Sopenharmony_ci int k; 32338c2ecf20Sopenharmony_ci for (k = 0; k < conf->copies; k++) 32348c2ecf20Sopenharmony_ci if (r10_bio->devs[k].devnum == i) 32358c2ecf20Sopenharmony_ci break; 32368c2ecf20Sopenharmony_ci if (!test_bit(In_sync, 32378c2ecf20Sopenharmony_ci &mrdev->flags) 32388c2ecf20Sopenharmony_ci && !rdev_set_badblocks( 32398c2ecf20Sopenharmony_ci mrdev, 32408c2ecf20Sopenharmony_ci r10_bio->devs[k].addr, 32418c2ecf20Sopenharmony_ci max_sync, 0)) 32428c2ecf20Sopenharmony_ci any_working = 0; 32438c2ecf20Sopenharmony_ci if (mreplace && 32448c2ecf20Sopenharmony_ci !rdev_set_badblocks( 32458c2ecf20Sopenharmony_ci mreplace, 32468c2ecf20Sopenharmony_ci r10_bio->devs[k].addr, 32478c2ecf20Sopenharmony_ci max_sync, 0)) 32488c2ecf20Sopenharmony_ci any_working = 0; 32498c2ecf20Sopenharmony_ci } 32508c2ecf20Sopenharmony_ci if (!any_working) { 32518c2ecf20Sopenharmony_ci if (!test_and_set_bit(MD_RECOVERY_INTR, 32528c2ecf20Sopenharmony_ci &mddev->recovery)) 32538c2ecf20Sopenharmony_ci pr_warn("md/raid10:%s: insufficient working devices for recovery.\n", 32548c2ecf20Sopenharmony_ci mdname(mddev)); 32558c2ecf20Sopenharmony_ci mirror->recovery_disabled 32568c2ecf20Sopenharmony_ci = mddev->recovery_disabled; 32578c2ecf20Sopenharmony_ci } 32588c2ecf20Sopenharmony_ci put_buf(r10_bio); 32598c2ecf20Sopenharmony_ci if (rb2) 32608c2ecf20Sopenharmony_ci atomic_dec(&rb2->remaining); 32618c2ecf20Sopenharmony_ci r10_bio = rb2; 32628c2ecf20Sopenharmony_ci rdev_dec_pending(mrdev, mddev); 32638c2ecf20Sopenharmony_ci if (mreplace) 32648c2ecf20Sopenharmony_ci rdev_dec_pending(mreplace, mddev); 32658c2ecf20Sopenharmony_ci break; 32668c2ecf20Sopenharmony_ci } 32678c2ecf20Sopenharmony_ci rdev_dec_pending(mrdev, mddev); 32688c2ecf20Sopenharmony_ci if (mreplace) 32698c2ecf20Sopenharmony_ci rdev_dec_pending(mreplace, mddev); 32708c2ecf20Sopenharmony_ci if (r10_bio->devs[0].bio->bi_opf & MD_FAILFAST) { 32718c2ecf20Sopenharmony_ci /* Only want this if there is elsewhere to 32728c2ecf20Sopenharmony_ci * read from. 'j' is currently the first 32738c2ecf20Sopenharmony_ci * readable copy. 32748c2ecf20Sopenharmony_ci */ 32758c2ecf20Sopenharmony_ci int targets = 1; 32768c2ecf20Sopenharmony_ci for (; j < conf->copies; j++) { 32778c2ecf20Sopenharmony_ci int d = r10_bio->devs[j].devnum; 32788c2ecf20Sopenharmony_ci if (conf->mirrors[d].rdev && 32798c2ecf20Sopenharmony_ci test_bit(In_sync, 32808c2ecf20Sopenharmony_ci &conf->mirrors[d].rdev->flags)) 32818c2ecf20Sopenharmony_ci targets++; 32828c2ecf20Sopenharmony_ci } 32838c2ecf20Sopenharmony_ci if (targets == 1) 32848c2ecf20Sopenharmony_ci r10_bio->devs[0].bio->bi_opf 32858c2ecf20Sopenharmony_ci &= ~MD_FAILFAST; 32868c2ecf20Sopenharmony_ci } 32878c2ecf20Sopenharmony_ci } 32888c2ecf20Sopenharmony_ci if (biolist == NULL) { 32898c2ecf20Sopenharmony_ci while (r10_bio) { 32908c2ecf20Sopenharmony_ci struct r10bio *rb2 = r10_bio; 32918c2ecf20Sopenharmony_ci r10_bio = (struct r10bio*) rb2->master_bio; 32928c2ecf20Sopenharmony_ci rb2->master_bio = NULL; 32938c2ecf20Sopenharmony_ci put_buf(rb2); 32948c2ecf20Sopenharmony_ci } 32958c2ecf20Sopenharmony_ci goto giveup; 32968c2ecf20Sopenharmony_ci } 32978c2ecf20Sopenharmony_ci } else { 32988c2ecf20Sopenharmony_ci /* resync. Schedule a read for every block at this virt offset */ 32998c2ecf20Sopenharmony_ci int count = 0; 33008c2ecf20Sopenharmony_ci 33018c2ecf20Sopenharmony_ci /* 33028c2ecf20Sopenharmony_ci * Since curr_resync_completed could probably not update in 33038c2ecf20Sopenharmony_ci * time, and we will set cluster_sync_low based on it. 33048c2ecf20Sopenharmony_ci * Let's check against "sector_nr + 2 * RESYNC_SECTORS" for 33058c2ecf20Sopenharmony_ci * safety reason, which ensures curr_resync_completed is 33068c2ecf20Sopenharmony_ci * updated in bitmap_cond_end_sync. 33078c2ecf20Sopenharmony_ci */ 33088c2ecf20Sopenharmony_ci md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, 33098c2ecf20Sopenharmony_ci mddev_is_clustered(mddev) && 33108c2ecf20Sopenharmony_ci (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); 33118c2ecf20Sopenharmony_ci 33128c2ecf20Sopenharmony_ci if (!md_bitmap_start_sync(mddev->bitmap, sector_nr, 33138c2ecf20Sopenharmony_ci &sync_blocks, mddev->degraded) && 33148c2ecf20Sopenharmony_ci !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, 33158c2ecf20Sopenharmony_ci &mddev->recovery)) { 33168c2ecf20Sopenharmony_ci /* We can skip this block */ 33178c2ecf20Sopenharmony_ci *skipped = 1; 33188c2ecf20Sopenharmony_ci return sync_blocks + sectors_skipped; 33198c2ecf20Sopenharmony_ci } 33208c2ecf20Sopenharmony_ci if (sync_blocks < max_sync) 33218c2ecf20Sopenharmony_ci max_sync = sync_blocks; 33228c2ecf20Sopenharmony_ci r10_bio = raid10_alloc_init_r10buf(conf); 33238c2ecf20Sopenharmony_ci r10_bio->state = 0; 33248c2ecf20Sopenharmony_ci 33258c2ecf20Sopenharmony_ci r10_bio->mddev = mddev; 33268c2ecf20Sopenharmony_ci atomic_set(&r10_bio->remaining, 0); 33278c2ecf20Sopenharmony_ci raise_barrier(conf, 0); 33288c2ecf20Sopenharmony_ci conf->next_resync = sector_nr; 33298c2ecf20Sopenharmony_ci 33308c2ecf20Sopenharmony_ci r10_bio->master_bio = NULL; 33318c2ecf20Sopenharmony_ci r10_bio->sector = sector_nr; 33328c2ecf20Sopenharmony_ci set_bit(R10BIO_IsSync, &r10_bio->state); 33338c2ecf20Sopenharmony_ci raid10_find_phys(conf, r10_bio); 33348c2ecf20Sopenharmony_ci r10_bio->sectors = (sector_nr | chunk_mask) - sector_nr + 1; 33358c2ecf20Sopenharmony_ci 33368c2ecf20Sopenharmony_ci for (i = 0; i < conf->copies; i++) { 33378c2ecf20Sopenharmony_ci int d = r10_bio->devs[i].devnum; 33388c2ecf20Sopenharmony_ci sector_t first_bad, sector; 33398c2ecf20Sopenharmony_ci int bad_sectors; 33408c2ecf20Sopenharmony_ci struct md_rdev *rdev; 33418c2ecf20Sopenharmony_ci 33428c2ecf20Sopenharmony_ci if (r10_bio->devs[i].repl_bio) 33438c2ecf20Sopenharmony_ci r10_bio->devs[i].repl_bio->bi_end_io = NULL; 33448c2ecf20Sopenharmony_ci 33458c2ecf20Sopenharmony_ci bio = r10_bio->devs[i].bio; 33468c2ecf20Sopenharmony_ci bio->bi_status = BLK_STS_IOERR; 33478c2ecf20Sopenharmony_ci rcu_read_lock(); 33488c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->mirrors[d].rdev); 33498c2ecf20Sopenharmony_ci if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { 33508c2ecf20Sopenharmony_ci rcu_read_unlock(); 33518c2ecf20Sopenharmony_ci continue; 33528c2ecf20Sopenharmony_ci } 33538c2ecf20Sopenharmony_ci sector = r10_bio->devs[i].addr; 33548c2ecf20Sopenharmony_ci if (is_badblock(rdev, sector, max_sync, 33558c2ecf20Sopenharmony_ci &first_bad, &bad_sectors)) { 33568c2ecf20Sopenharmony_ci if (first_bad > sector) 33578c2ecf20Sopenharmony_ci max_sync = first_bad - sector; 33588c2ecf20Sopenharmony_ci else { 33598c2ecf20Sopenharmony_ci bad_sectors -= (sector - first_bad); 33608c2ecf20Sopenharmony_ci if (max_sync > bad_sectors) 33618c2ecf20Sopenharmony_ci max_sync = bad_sectors; 33628c2ecf20Sopenharmony_ci rcu_read_unlock(); 33638c2ecf20Sopenharmony_ci continue; 33648c2ecf20Sopenharmony_ci } 33658c2ecf20Sopenharmony_ci } 33668c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 33678c2ecf20Sopenharmony_ci atomic_inc(&r10_bio->remaining); 33688c2ecf20Sopenharmony_ci bio->bi_next = biolist; 33698c2ecf20Sopenharmony_ci biolist = bio; 33708c2ecf20Sopenharmony_ci bio->bi_end_io = end_sync_read; 33718c2ecf20Sopenharmony_ci bio_set_op_attrs(bio, REQ_OP_READ, 0); 33728c2ecf20Sopenharmony_ci if (test_bit(FailFast, &rdev->flags)) 33738c2ecf20Sopenharmony_ci bio->bi_opf |= MD_FAILFAST; 33748c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = sector + rdev->data_offset; 33758c2ecf20Sopenharmony_ci bio_set_dev(bio, rdev->bdev); 33768c2ecf20Sopenharmony_ci count++; 33778c2ecf20Sopenharmony_ci 33788c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->mirrors[d].replacement); 33798c2ecf20Sopenharmony_ci if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { 33808c2ecf20Sopenharmony_ci rcu_read_unlock(); 33818c2ecf20Sopenharmony_ci continue; 33828c2ecf20Sopenharmony_ci } 33838c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 33848c2ecf20Sopenharmony_ci 33858c2ecf20Sopenharmony_ci /* Need to set up for writing to the replacement */ 33868c2ecf20Sopenharmony_ci bio = r10_bio->devs[i].repl_bio; 33878c2ecf20Sopenharmony_ci bio->bi_status = BLK_STS_IOERR; 33888c2ecf20Sopenharmony_ci 33898c2ecf20Sopenharmony_ci sector = r10_bio->devs[i].addr; 33908c2ecf20Sopenharmony_ci bio->bi_next = biolist; 33918c2ecf20Sopenharmony_ci biolist = bio; 33928c2ecf20Sopenharmony_ci bio->bi_end_io = end_sync_write; 33938c2ecf20Sopenharmony_ci bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 33948c2ecf20Sopenharmony_ci if (test_bit(FailFast, &rdev->flags)) 33958c2ecf20Sopenharmony_ci bio->bi_opf |= MD_FAILFAST; 33968c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = sector + rdev->data_offset; 33978c2ecf20Sopenharmony_ci bio_set_dev(bio, rdev->bdev); 33988c2ecf20Sopenharmony_ci count++; 33998c2ecf20Sopenharmony_ci rcu_read_unlock(); 34008c2ecf20Sopenharmony_ci } 34018c2ecf20Sopenharmony_ci 34028c2ecf20Sopenharmony_ci if (count < 2) { 34038c2ecf20Sopenharmony_ci for (i=0; i<conf->copies; i++) { 34048c2ecf20Sopenharmony_ci int d = r10_bio->devs[i].devnum; 34058c2ecf20Sopenharmony_ci if (r10_bio->devs[i].bio->bi_end_io) 34068c2ecf20Sopenharmony_ci rdev_dec_pending(conf->mirrors[d].rdev, 34078c2ecf20Sopenharmony_ci mddev); 34088c2ecf20Sopenharmony_ci if (r10_bio->devs[i].repl_bio && 34098c2ecf20Sopenharmony_ci r10_bio->devs[i].repl_bio->bi_end_io) 34108c2ecf20Sopenharmony_ci rdev_dec_pending( 34118c2ecf20Sopenharmony_ci conf->mirrors[d].replacement, 34128c2ecf20Sopenharmony_ci mddev); 34138c2ecf20Sopenharmony_ci } 34148c2ecf20Sopenharmony_ci put_buf(r10_bio); 34158c2ecf20Sopenharmony_ci biolist = NULL; 34168c2ecf20Sopenharmony_ci goto giveup; 34178c2ecf20Sopenharmony_ci } 34188c2ecf20Sopenharmony_ci } 34198c2ecf20Sopenharmony_ci 34208c2ecf20Sopenharmony_ci nr_sectors = 0; 34218c2ecf20Sopenharmony_ci if (sector_nr + max_sync < max_sector) 34228c2ecf20Sopenharmony_ci max_sector = sector_nr + max_sync; 34238c2ecf20Sopenharmony_ci do { 34248c2ecf20Sopenharmony_ci struct page *page; 34258c2ecf20Sopenharmony_ci int len = PAGE_SIZE; 34268c2ecf20Sopenharmony_ci if (sector_nr + (len>>9) > max_sector) 34278c2ecf20Sopenharmony_ci len = (max_sector - sector_nr) << 9; 34288c2ecf20Sopenharmony_ci if (len == 0) 34298c2ecf20Sopenharmony_ci break; 34308c2ecf20Sopenharmony_ci for (bio= biolist ; bio ; bio=bio->bi_next) { 34318c2ecf20Sopenharmony_ci struct resync_pages *rp = get_resync_pages(bio); 34328c2ecf20Sopenharmony_ci page = resync_fetch_page(rp, page_idx); 34338c2ecf20Sopenharmony_ci /* 34348c2ecf20Sopenharmony_ci * won't fail because the vec table is big enough 34358c2ecf20Sopenharmony_ci * to hold all these pages 34368c2ecf20Sopenharmony_ci */ 34378c2ecf20Sopenharmony_ci bio_add_page(bio, page, len, 0); 34388c2ecf20Sopenharmony_ci } 34398c2ecf20Sopenharmony_ci nr_sectors += len>>9; 34408c2ecf20Sopenharmony_ci sector_nr += len>>9; 34418c2ecf20Sopenharmony_ci } while (++page_idx < RESYNC_PAGES); 34428c2ecf20Sopenharmony_ci r10_bio->sectors = nr_sectors; 34438c2ecf20Sopenharmony_ci 34448c2ecf20Sopenharmony_ci if (mddev_is_clustered(mddev) && 34458c2ecf20Sopenharmony_ci test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 34468c2ecf20Sopenharmony_ci /* It is resync not recovery */ 34478c2ecf20Sopenharmony_ci if (conf->cluster_sync_high < sector_nr + nr_sectors) { 34488c2ecf20Sopenharmony_ci conf->cluster_sync_low = mddev->curr_resync_completed; 34498c2ecf20Sopenharmony_ci raid10_set_cluster_sync_high(conf); 34508c2ecf20Sopenharmony_ci /* Send resync message */ 34518c2ecf20Sopenharmony_ci md_cluster_ops->resync_info_update(mddev, 34528c2ecf20Sopenharmony_ci conf->cluster_sync_low, 34538c2ecf20Sopenharmony_ci conf->cluster_sync_high); 34548c2ecf20Sopenharmony_ci } 34558c2ecf20Sopenharmony_ci } else if (mddev_is_clustered(mddev)) { 34568c2ecf20Sopenharmony_ci /* This is recovery not resync */ 34578c2ecf20Sopenharmony_ci sector_t sect_va1, sect_va2; 34588c2ecf20Sopenharmony_ci bool broadcast_msg = false; 34598c2ecf20Sopenharmony_ci 34608c2ecf20Sopenharmony_ci for (i = 0; i < conf->geo.raid_disks; i++) { 34618c2ecf20Sopenharmony_ci /* 34628c2ecf20Sopenharmony_ci * sector_nr is a device address for recovery, so we 34638c2ecf20Sopenharmony_ci * need translate it to array address before compare 34648c2ecf20Sopenharmony_ci * with cluster_sync_high. 34658c2ecf20Sopenharmony_ci */ 34668c2ecf20Sopenharmony_ci sect_va1 = raid10_find_virt(conf, sector_nr, i); 34678c2ecf20Sopenharmony_ci 34688c2ecf20Sopenharmony_ci if (conf->cluster_sync_high < sect_va1 + nr_sectors) { 34698c2ecf20Sopenharmony_ci broadcast_msg = true; 34708c2ecf20Sopenharmony_ci /* 34718c2ecf20Sopenharmony_ci * curr_resync_completed is similar as 34728c2ecf20Sopenharmony_ci * sector_nr, so make the translation too. 34738c2ecf20Sopenharmony_ci */ 34748c2ecf20Sopenharmony_ci sect_va2 = raid10_find_virt(conf, 34758c2ecf20Sopenharmony_ci mddev->curr_resync_completed, i); 34768c2ecf20Sopenharmony_ci 34778c2ecf20Sopenharmony_ci if (conf->cluster_sync_low == 0 || 34788c2ecf20Sopenharmony_ci conf->cluster_sync_low > sect_va2) 34798c2ecf20Sopenharmony_ci conf->cluster_sync_low = sect_va2; 34808c2ecf20Sopenharmony_ci } 34818c2ecf20Sopenharmony_ci } 34828c2ecf20Sopenharmony_ci if (broadcast_msg) { 34838c2ecf20Sopenharmony_ci raid10_set_cluster_sync_high(conf); 34848c2ecf20Sopenharmony_ci md_cluster_ops->resync_info_update(mddev, 34858c2ecf20Sopenharmony_ci conf->cluster_sync_low, 34868c2ecf20Sopenharmony_ci conf->cluster_sync_high); 34878c2ecf20Sopenharmony_ci } 34888c2ecf20Sopenharmony_ci } 34898c2ecf20Sopenharmony_ci 34908c2ecf20Sopenharmony_ci while (biolist) { 34918c2ecf20Sopenharmony_ci bio = biolist; 34928c2ecf20Sopenharmony_ci biolist = biolist->bi_next; 34938c2ecf20Sopenharmony_ci 34948c2ecf20Sopenharmony_ci bio->bi_next = NULL; 34958c2ecf20Sopenharmony_ci r10_bio = get_resync_r10bio(bio); 34968c2ecf20Sopenharmony_ci r10_bio->sectors = nr_sectors; 34978c2ecf20Sopenharmony_ci 34988c2ecf20Sopenharmony_ci if (bio->bi_end_io == end_sync_read) { 34998c2ecf20Sopenharmony_ci md_sync_acct_bio(bio, nr_sectors); 35008c2ecf20Sopenharmony_ci bio->bi_status = 0; 35018c2ecf20Sopenharmony_ci submit_bio_noacct(bio); 35028c2ecf20Sopenharmony_ci } 35038c2ecf20Sopenharmony_ci } 35048c2ecf20Sopenharmony_ci 35058c2ecf20Sopenharmony_ci if (sectors_skipped) 35068c2ecf20Sopenharmony_ci /* pretend they weren't skipped, it makes 35078c2ecf20Sopenharmony_ci * no important difference in this case 35088c2ecf20Sopenharmony_ci */ 35098c2ecf20Sopenharmony_ci md_done_sync(mddev, sectors_skipped, 1); 35108c2ecf20Sopenharmony_ci 35118c2ecf20Sopenharmony_ci return sectors_skipped + nr_sectors; 35128c2ecf20Sopenharmony_ci giveup: 35138c2ecf20Sopenharmony_ci /* There is nowhere to write, so all non-sync 35148c2ecf20Sopenharmony_ci * drives must be failed or in resync, all drives 35158c2ecf20Sopenharmony_ci * have a bad block, so try the next chunk... 35168c2ecf20Sopenharmony_ci */ 35178c2ecf20Sopenharmony_ci if (sector_nr + max_sync < max_sector) 35188c2ecf20Sopenharmony_ci max_sector = sector_nr + max_sync; 35198c2ecf20Sopenharmony_ci 35208c2ecf20Sopenharmony_ci sectors_skipped += (max_sector - sector_nr); 35218c2ecf20Sopenharmony_ci chunks_skipped ++; 35228c2ecf20Sopenharmony_ci sector_nr = max_sector; 35238c2ecf20Sopenharmony_ci goto skipped; 35248c2ecf20Sopenharmony_ci} 35258c2ecf20Sopenharmony_ci 35268c2ecf20Sopenharmony_cistatic sector_t 35278c2ecf20Sopenharmony_ciraid10_size(struct mddev *mddev, sector_t sectors, int raid_disks) 35288c2ecf20Sopenharmony_ci{ 35298c2ecf20Sopenharmony_ci sector_t size; 35308c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 35318c2ecf20Sopenharmony_ci 35328c2ecf20Sopenharmony_ci if (!raid_disks) 35338c2ecf20Sopenharmony_ci raid_disks = min(conf->geo.raid_disks, 35348c2ecf20Sopenharmony_ci conf->prev.raid_disks); 35358c2ecf20Sopenharmony_ci if (!sectors) 35368c2ecf20Sopenharmony_ci sectors = conf->dev_sectors; 35378c2ecf20Sopenharmony_ci 35388c2ecf20Sopenharmony_ci size = sectors >> conf->geo.chunk_shift; 35398c2ecf20Sopenharmony_ci sector_div(size, conf->geo.far_copies); 35408c2ecf20Sopenharmony_ci size = size * raid_disks; 35418c2ecf20Sopenharmony_ci sector_div(size, conf->geo.near_copies); 35428c2ecf20Sopenharmony_ci 35438c2ecf20Sopenharmony_ci return size << conf->geo.chunk_shift; 35448c2ecf20Sopenharmony_ci} 35458c2ecf20Sopenharmony_ci 35468c2ecf20Sopenharmony_cistatic void calc_sectors(struct r10conf *conf, sector_t size) 35478c2ecf20Sopenharmony_ci{ 35488c2ecf20Sopenharmony_ci /* Calculate the number of sectors-per-device that will 35498c2ecf20Sopenharmony_ci * actually be used, and set conf->dev_sectors and 35508c2ecf20Sopenharmony_ci * conf->stride 35518c2ecf20Sopenharmony_ci */ 35528c2ecf20Sopenharmony_ci 35538c2ecf20Sopenharmony_ci size = size >> conf->geo.chunk_shift; 35548c2ecf20Sopenharmony_ci sector_div(size, conf->geo.far_copies); 35558c2ecf20Sopenharmony_ci size = size * conf->geo.raid_disks; 35568c2ecf20Sopenharmony_ci sector_div(size, conf->geo.near_copies); 35578c2ecf20Sopenharmony_ci /* 'size' is now the number of chunks in the array */ 35588c2ecf20Sopenharmony_ci /* calculate "used chunks per device" */ 35598c2ecf20Sopenharmony_ci size = size * conf->copies; 35608c2ecf20Sopenharmony_ci 35618c2ecf20Sopenharmony_ci /* We need to round up when dividing by raid_disks to 35628c2ecf20Sopenharmony_ci * get the stride size. 35638c2ecf20Sopenharmony_ci */ 35648c2ecf20Sopenharmony_ci size = DIV_ROUND_UP_SECTOR_T(size, conf->geo.raid_disks); 35658c2ecf20Sopenharmony_ci 35668c2ecf20Sopenharmony_ci conf->dev_sectors = size << conf->geo.chunk_shift; 35678c2ecf20Sopenharmony_ci 35688c2ecf20Sopenharmony_ci if (conf->geo.far_offset) 35698c2ecf20Sopenharmony_ci conf->geo.stride = 1 << conf->geo.chunk_shift; 35708c2ecf20Sopenharmony_ci else { 35718c2ecf20Sopenharmony_ci sector_div(size, conf->geo.far_copies); 35728c2ecf20Sopenharmony_ci conf->geo.stride = size << conf->geo.chunk_shift; 35738c2ecf20Sopenharmony_ci } 35748c2ecf20Sopenharmony_ci} 35758c2ecf20Sopenharmony_ci 35768c2ecf20Sopenharmony_cienum geo_type {geo_new, geo_old, geo_start}; 35778c2ecf20Sopenharmony_cistatic int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) 35788c2ecf20Sopenharmony_ci{ 35798c2ecf20Sopenharmony_ci int nc, fc, fo; 35808c2ecf20Sopenharmony_ci int layout, chunk, disks; 35818c2ecf20Sopenharmony_ci switch (new) { 35828c2ecf20Sopenharmony_ci case geo_old: 35838c2ecf20Sopenharmony_ci layout = mddev->layout; 35848c2ecf20Sopenharmony_ci chunk = mddev->chunk_sectors; 35858c2ecf20Sopenharmony_ci disks = mddev->raid_disks - mddev->delta_disks; 35868c2ecf20Sopenharmony_ci break; 35878c2ecf20Sopenharmony_ci case geo_new: 35888c2ecf20Sopenharmony_ci layout = mddev->new_layout; 35898c2ecf20Sopenharmony_ci chunk = mddev->new_chunk_sectors; 35908c2ecf20Sopenharmony_ci disks = mddev->raid_disks; 35918c2ecf20Sopenharmony_ci break; 35928c2ecf20Sopenharmony_ci default: /* avoid 'may be unused' warnings */ 35938c2ecf20Sopenharmony_ci case geo_start: /* new when starting reshape - raid_disks not 35948c2ecf20Sopenharmony_ci * updated yet. */ 35958c2ecf20Sopenharmony_ci layout = mddev->new_layout; 35968c2ecf20Sopenharmony_ci chunk = mddev->new_chunk_sectors; 35978c2ecf20Sopenharmony_ci disks = mddev->raid_disks + mddev->delta_disks; 35988c2ecf20Sopenharmony_ci break; 35998c2ecf20Sopenharmony_ci } 36008c2ecf20Sopenharmony_ci if (layout >> 19) 36018c2ecf20Sopenharmony_ci return -1; 36028c2ecf20Sopenharmony_ci if (chunk < (PAGE_SIZE >> 9) || 36038c2ecf20Sopenharmony_ci !is_power_of_2(chunk)) 36048c2ecf20Sopenharmony_ci return -2; 36058c2ecf20Sopenharmony_ci nc = layout & 255; 36068c2ecf20Sopenharmony_ci fc = (layout >> 8) & 255; 36078c2ecf20Sopenharmony_ci fo = layout & (1<<16); 36088c2ecf20Sopenharmony_ci geo->raid_disks = disks; 36098c2ecf20Sopenharmony_ci geo->near_copies = nc; 36108c2ecf20Sopenharmony_ci geo->far_copies = fc; 36118c2ecf20Sopenharmony_ci geo->far_offset = fo; 36128c2ecf20Sopenharmony_ci switch (layout >> 17) { 36138c2ecf20Sopenharmony_ci case 0: /* original layout. simple but not always optimal */ 36148c2ecf20Sopenharmony_ci geo->far_set_size = disks; 36158c2ecf20Sopenharmony_ci break; 36168c2ecf20Sopenharmony_ci case 1: /* "improved" layout which was buggy. Hopefully no-one is 36178c2ecf20Sopenharmony_ci * actually using this, but leave code here just in case.*/ 36188c2ecf20Sopenharmony_ci geo->far_set_size = disks/fc; 36198c2ecf20Sopenharmony_ci WARN(geo->far_set_size < fc, 36208c2ecf20Sopenharmony_ci "This RAID10 layout does not provide data safety - please backup and create new array\n"); 36218c2ecf20Sopenharmony_ci break; 36228c2ecf20Sopenharmony_ci case 2: /* "improved" layout fixed to match documentation */ 36238c2ecf20Sopenharmony_ci geo->far_set_size = fc * nc; 36248c2ecf20Sopenharmony_ci break; 36258c2ecf20Sopenharmony_ci default: /* Not a valid layout */ 36268c2ecf20Sopenharmony_ci return -1; 36278c2ecf20Sopenharmony_ci } 36288c2ecf20Sopenharmony_ci geo->chunk_mask = chunk - 1; 36298c2ecf20Sopenharmony_ci geo->chunk_shift = ffz(~chunk); 36308c2ecf20Sopenharmony_ci return nc*fc; 36318c2ecf20Sopenharmony_ci} 36328c2ecf20Sopenharmony_ci 36338c2ecf20Sopenharmony_cistatic void raid10_free_conf(struct r10conf *conf) 36348c2ecf20Sopenharmony_ci{ 36358c2ecf20Sopenharmony_ci if (!conf) 36368c2ecf20Sopenharmony_ci return; 36378c2ecf20Sopenharmony_ci 36388c2ecf20Sopenharmony_ci mempool_exit(&conf->r10bio_pool); 36398c2ecf20Sopenharmony_ci kfree(conf->mirrors); 36408c2ecf20Sopenharmony_ci kfree(conf->mirrors_old); 36418c2ecf20Sopenharmony_ci kfree(conf->mirrors_new); 36428c2ecf20Sopenharmony_ci safe_put_page(conf->tmppage); 36438c2ecf20Sopenharmony_ci bioset_exit(&conf->bio_split); 36448c2ecf20Sopenharmony_ci kfree(conf); 36458c2ecf20Sopenharmony_ci} 36468c2ecf20Sopenharmony_ci 36478c2ecf20Sopenharmony_cistatic struct r10conf *setup_conf(struct mddev *mddev) 36488c2ecf20Sopenharmony_ci{ 36498c2ecf20Sopenharmony_ci struct r10conf *conf = NULL; 36508c2ecf20Sopenharmony_ci int err = -EINVAL; 36518c2ecf20Sopenharmony_ci struct geom geo; 36528c2ecf20Sopenharmony_ci int copies; 36538c2ecf20Sopenharmony_ci 36548c2ecf20Sopenharmony_ci copies = setup_geo(&geo, mddev, geo_new); 36558c2ecf20Sopenharmony_ci 36568c2ecf20Sopenharmony_ci if (copies == -2) { 36578c2ecf20Sopenharmony_ci pr_warn("md/raid10:%s: chunk size must be at least PAGE_SIZE(%ld) and be a power of 2.\n", 36588c2ecf20Sopenharmony_ci mdname(mddev), PAGE_SIZE); 36598c2ecf20Sopenharmony_ci goto out; 36608c2ecf20Sopenharmony_ci } 36618c2ecf20Sopenharmony_ci 36628c2ecf20Sopenharmony_ci if (copies < 2 || copies > mddev->raid_disks) { 36638c2ecf20Sopenharmony_ci pr_warn("md/raid10:%s: unsupported raid10 layout: 0x%8x\n", 36648c2ecf20Sopenharmony_ci mdname(mddev), mddev->new_layout); 36658c2ecf20Sopenharmony_ci goto out; 36668c2ecf20Sopenharmony_ci } 36678c2ecf20Sopenharmony_ci 36688c2ecf20Sopenharmony_ci err = -ENOMEM; 36698c2ecf20Sopenharmony_ci conf = kzalloc(sizeof(struct r10conf), GFP_KERNEL); 36708c2ecf20Sopenharmony_ci if (!conf) 36718c2ecf20Sopenharmony_ci goto out; 36728c2ecf20Sopenharmony_ci 36738c2ecf20Sopenharmony_ci /* FIXME calc properly */ 36748c2ecf20Sopenharmony_ci conf->mirrors = kcalloc(mddev->raid_disks + max(0, -mddev->delta_disks), 36758c2ecf20Sopenharmony_ci sizeof(struct raid10_info), 36768c2ecf20Sopenharmony_ci GFP_KERNEL); 36778c2ecf20Sopenharmony_ci if (!conf->mirrors) 36788c2ecf20Sopenharmony_ci goto out; 36798c2ecf20Sopenharmony_ci 36808c2ecf20Sopenharmony_ci conf->tmppage = alloc_page(GFP_KERNEL); 36818c2ecf20Sopenharmony_ci if (!conf->tmppage) 36828c2ecf20Sopenharmony_ci goto out; 36838c2ecf20Sopenharmony_ci 36848c2ecf20Sopenharmony_ci conf->geo = geo; 36858c2ecf20Sopenharmony_ci conf->copies = copies; 36868c2ecf20Sopenharmony_ci err = mempool_init(&conf->r10bio_pool, NR_RAID_BIOS, r10bio_pool_alloc, 36878c2ecf20Sopenharmony_ci rbio_pool_free, conf); 36888c2ecf20Sopenharmony_ci if (err) 36898c2ecf20Sopenharmony_ci goto out; 36908c2ecf20Sopenharmony_ci 36918c2ecf20Sopenharmony_ci err = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0); 36928c2ecf20Sopenharmony_ci if (err) 36938c2ecf20Sopenharmony_ci goto out; 36948c2ecf20Sopenharmony_ci 36958c2ecf20Sopenharmony_ci calc_sectors(conf, mddev->dev_sectors); 36968c2ecf20Sopenharmony_ci if (mddev->reshape_position == MaxSector) { 36978c2ecf20Sopenharmony_ci conf->prev = conf->geo; 36988c2ecf20Sopenharmony_ci conf->reshape_progress = MaxSector; 36998c2ecf20Sopenharmony_ci } else { 37008c2ecf20Sopenharmony_ci if (setup_geo(&conf->prev, mddev, geo_old) != conf->copies) { 37018c2ecf20Sopenharmony_ci err = -EINVAL; 37028c2ecf20Sopenharmony_ci goto out; 37038c2ecf20Sopenharmony_ci } 37048c2ecf20Sopenharmony_ci conf->reshape_progress = mddev->reshape_position; 37058c2ecf20Sopenharmony_ci if (conf->prev.far_offset) 37068c2ecf20Sopenharmony_ci conf->prev.stride = 1 << conf->prev.chunk_shift; 37078c2ecf20Sopenharmony_ci else 37088c2ecf20Sopenharmony_ci /* far_copies must be 1 */ 37098c2ecf20Sopenharmony_ci conf->prev.stride = conf->dev_sectors; 37108c2ecf20Sopenharmony_ci } 37118c2ecf20Sopenharmony_ci conf->reshape_safe = conf->reshape_progress; 37128c2ecf20Sopenharmony_ci spin_lock_init(&conf->device_lock); 37138c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->retry_list); 37148c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->bio_end_io_list); 37158c2ecf20Sopenharmony_ci 37168c2ecf20Sopenharmony_ci spin_lock_init(&conf->resync_lock); 37178c2ecf20Sopenharmony_ci init_waitqueue_head(&conf->wait_barrier); 37188c2ecf20Sopenharmony_ci atomic_set(&conf->nr_pending, 0); 37198c2ecf20Sopenharmony_ci 37208c2ecf20Sopenharmony_ci err = -ENOMEM; 37218c2ecf20Sopenharmony_ci conf->thread = md_register_thread(raid10d, mddev, "raid10"); 37228c2ecf20Sopenharmony_ci if (!conf->thread) 37238c2ecf20Sopenharmony_ci goto out; 37248c2ecf20Sopenharmony_ci 37258c2ecf20Sopenharmony_ci conf->mddev = mddev; 37268c2ecf20Sopenharmony_ci return conf; 37278c2ecf20Sopenharmony_ci 37288c2ecf20Sopenharmony_ci out: 37298c2ecf20Sopenharmony_ci raid10_free_conf(conf); 37308c2ecf20Sopenharmony_ci return ERR_PTR(err); 37318c2ecf20Sopenharmony_ci} 37328c2ecf20Sopenharmony_ci 37338c2ecf20Sopenharmony_cistatic void raid10_set_io_opt(struct r10conf *conf) 37348c2ecf20Sopenharmony_ci{ 37358c2ecf20Sopenharmony_ci int raid_disks = conf->geo.raid_disks; 37368c2ecf20Sopenharmony_ci 37378c2ecf20Sopenharmony_ci if (!(conf->geo.raid_disks % conf->geo.near_copies)) 37388c2ecf20Sopenharmony_ci raid_disks /= conf->geo.near_copies; 37398c2ecf20Sopenharmony_ci blk_queue_io_opt(conf->mddev->queue, (conf->mddev->chunk_sectors << 9) * 37408c2ecf20Sopenharmony_ci raid_disks); 37418c2ecf20Sopenharmony_ci} 37428c2ecf20Sopenharmony_ci 37438c2ecf20Sopenharmony_cistatic int raid10_run(struct mddev *mddev) 37448c2ecf20Sopenharmony_ci{ 37458c2ecf20Sopenharmony_ci struct r10conf *conf; 37468c2ecf20Sopenharmony_ci int i, disk_idx; 37478c2ecf20Sopenharmony_ci struct raid10_info *disk; 37488c2ecf20Sopenharmony_ci struct md_rdev *rdev; 37498c2ecf20Sopenharmony_ci sector_t size; 37508c2ecf20Sopenharmony_ci sector_t min_offset_diff = 0; 37518c2ecf20Sopenharmony_ci int first = 1; 37528c2ecf20Sopenharmony_ci bool discard_supported = false; 37538c2ecf20Sopenharmony_ci 37548c2ecf20Sopenharmony_ci if (mddev_init_writes_pending(mddev) < 0) 37558c2ecf20Sopenharmony_ci return -ENOMEM; 37568c2ecf20Sopenharmony_ci 37578c2ecf20Sopenharmony_ci if (mddev->private == NULL) { 37588c2ecf20Sopenharmony_ci conf = setup_conf(mddev); 37598c2ecf20Sopenharmony_ci if (IS_ERR(conf)) 37608c2ecf20Sopenharmony_ci return PTR_ERR(conf); 37618c2ecf20Sopenharmony_ci mddev->private = conf; 37628c2ecf20Sopenharmony_ci } 37638c2ecf20Sopenharmony_ci conf = mddev->private; 37648c2ecf20Sopenharmony_ci if (!conf) 37658c2ecf20Sopenharmony_ci goto out; 37668c2ecf20Sopenharmony_ci 37678c2ecf20Sopenharmony_ci mddev->thread = conf->thread; 37688c2ecf20Sopenharmony_ci conf->thread = NULL; 37698c2ecf20Sopenharmony_ci 37708c2ecf20Sopenharmony_ci if (mddev_is_clustered(conf->mddev)) { 37718c2ecf20Sopenharmony_ci int fc, fo; 37728c2ecf20Sopenharmony_ci 37738c2ecf20Sopenharmony_ci fc = (mddev->layout >> 8) & 255; 37748c2ecf20Sopenharmony_ci fo = mddev->layout & (1<<16); 37758c2ecf20Sopenharmony_ci if (fc > 1 || fo > 0) { 37768c2ecf20Sopenharmony_ci pr_err("only near layout is supported by clustered" 37778c2ecf20Sopenharmony_ci " raid10\n"); 37788c2ecf20Sopenharmony_ci goto out_free_conf; 37798c2ecf20Sopenharmony_ci } 37808c2ecf20Sopenharmony_ci } 37818c2ecf20Sopenharmony_ci 37828c2ecf20Sopenharmony_ci if (mddev->queue) { 37838c2ecf20Sopenharmony_ci blk_queue_max_discard_sectors(mddev->queue, 37848c2ecf20Sopenharmony_ci mddev->chunk_sectors); 37858c2ecf20Sopenharmony_ci blk_queue_max_write_same_sectors(mddev->queue, 0); 37868c2ecf20Sopenharmony_ci blk_queue_max_write_zeroes_sectors(mddev->queue, 0); 37878c2ecf20Sopenharmony_ci blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); 37888c2ecf20Sopenharmony_ci raid10_set_io_opt(conf); 37898c2ecf20Sopenharmony_ci } 37908c2ecf20Sopenharmony_ci 37918c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) { 37928c2ecf20Sopenharmony_ci long long diff; 37938c2ecf20Sopenharmony_ci 37948c2ecf20Sopenharmony_ci disk_idx = rdev->raid_disk; 37958c2ecf20Sopenharmony_ci if (disk_idx < 0) 37968c2ecf20Sopenharmony_ci continue; 37978c2ecf20Sopenharmony_ci if (disk_idx >= conf->geo.raid_disks && 37988c2ecf20Sopenharmony_ci disk_idx >= conf->prev.raid_disks) 37998c2ecf20Sopenharmony_ci continue; 38008c2ecf20Sopenharmony_ci disk = conf->mirrors + disk_idx; 38018c2ecf20Sopenharmony_ci 38028c2ecf20Sopenharmony_ci if (test_bit(Replacement, &rdev->flags)) { 38038c2ecf20Sopenharmony_ci if (disk->replacement) 38048c2ecf20Sopenharmony_ci goto out_free_conf; 38058c2ecf20Sopenharmony_ci disk->replacement = rdev; 38068c2ecf20Sopenharmony_ci } else { 38078c2ecf20Sopenharmony_ci if (disk->rdev) 38088c2ecf20Sopenharmony_ci goto out_free_conf; 38098c2ecf20Sopenharmony_ci disk->rdev = rdev; 38108c2ecf20Sopenharmony_ci } 38118c2ecf20Sopenharmony_ci diff = (rdev->new_data_offset - rdev->data_offset); 38128c2ecf20Sopenharmony_ci if (!mddev->reshape_backwards) 38138c2ecf20Sopenharmony_ci diff = -diff; 38148c2ecf20Sopenharmony_ci if (diff < 0) 38158c2ecf20Sopenharmony_ci diff = 0; 38168c2ecf20Sopenharmony_ci if (first || diff < min_offset_diff) 38178c2ecf20Sopenharmony_ci min_offset_diff = diff; 38188c2ecf20Sopenharmony_ci 38198c2ecf20Sopenharmony_ci if (mddev->gendisk) 38208c2ecf20Sopenharmony_ci disk_stack_limits(mddev->gendisk, rdev->bdev, 38218c2ecf20Sopenharmony_ci rdev->data_offset << 9); 38228c2ecf20Sopenharmony_ci 38238c2ecf20Sopenharmony_ci disk->head_position = 0; 38248c2ecf20Sopenharmony_ci 38258c2ecf20Sopenharmony_ci if (blk_queue_discard(bdev_get_queue(rdev->bdev))) 38268c2ecf20Sopenharmony_ci discard_supported = true; 38278c2ecf20Sopenharmony_ci first = 0; 38288c2ecf20Sopenharmony_ci } 38298c2ecf20Sopenharmony_ci 38308c2ecf20Sopenharmony_ci if (mddev->queue) { 38318c2ecf20Sopenharmony_ci if (discard_supported) 38328c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_DISCARD, 38338c2ecf20Sopenharmony_ci mddev->queue); 38348c2ecf20Sopenharmony_ci else 38358c2ecf20Sopenharmony_ci blk_queue_flag_clear(QUEUE_FLAG_DISCARD, 38368c2ecf20Sopenharmony_ci mddev->queue); 38378c2ecf20Sopenharmony_ci } 38388c2ecf20Sopenharmony_ci /* need to check that every block has at least one working mirror */ 38398c2ecf20Sopenharmony_ci if (!enough(conf, -1)) { 38408c2ecf20Sopenharmony_ci pr_err("md/raid10:%s: not enough operational mirrors.\n", 38418c2ecf20Sopenharmony_ci mdname(mddev)); 38428c2ecf20Sopenharmony_ci goto out_free_conf; 38438c2ecf20Sopenharmony_ci } 38448c2ecf20Sopenharmony_ci 38458c2ecf20Sopenharmony_ci if (conf->reshape_progress != MaxSector) { 38468c2ecf20Sopenharmony_ci /* must ensure that shape change is supported */ 38478c2ecf20Sopenharmony_ci if (conf->geo.far_copies != 1 && 38488c2ecf20Sopenharmony_ci conf->geo.far_offset == 0) 38498c2ecf20Sopenharmony_ci goto out_free_conf; 38508c2ecf20Sopenharmony_ci if (conf->prev.far_copies != 1 && 38518c2ecf20Sopenharmony_ci conf->prev.far_offset == 0) 38528c2ecf20Sopenharmony_ci goto out_free_conf; 38538c2ecf20Sopenharmony_ci } 38548c2ecf20Sopenharmony_ci 38558c2ecf20Sopenharmony_ci mddev->degraded = 0; 38568c2ecf20Sopenharmony_ci for (i = 0; 38578c2ecf20Sopenharmony_ci i < conf->geo.raid_disks 38588c2ecf20Sopenharmony_ci || i < conf->prev.raid_disks; 38598c2ecf20Sopenharmony_ci i++) { 38608c2ecf20Sopenharmony_ci 38618c2ecf20Sopenharmony_ci disk = conf->mirrors + i; 38628c2ecf20Sopenharmony_ci 38638c2ecf20Sopenharmony_ci if (!disk->rdev && disk->replacement) { 38648c2ecf20Sopenharmony_ci /* The replacement is all we have - use it */ 38658c2ecf20Sopenharmony_ci disk->rdev = disk->replacement; 38668c2ecf20Sopenharmony_ci disk->replacement = NULL; 38678c2ecf20Sopenharmony_ci clear_bit(Replacement, &disk->rdev->flags); 38688c2ecf20Sopenharmony_ci } 38698c2ecf20Sopenharmony_ci 38708c2ecf20Sopenharmony_ci if (!disk->rdev || 38718c2ecf20Sopenharmony_ci !test_bit(In_sync, &disk->rdev->flags)) { 38728c2ecf20Sopenharmony_ci disk->head_position = 0; 38738c2ecf20Sopenharmony_ci mddev->degraded++; 38748c2ecf20Sopenharmony_ci if (disk->rdev && 38758c2ecf20Sopenharmony_ci disk->rdev->saved_raid_disk < 0) 38768c2ecf20Sopenharmony_ci conf->fullsync = 1; 38778c2ecf20Sopenharmony_ci } 38788c2ecf20Sopenharmony_ci 38798c2ecf20Sopenharmony_ci if (disk->replacement && 38808c2ecf20Sopenharmony_ci !test_bit(In_sync, &disk->replacement->flags) && 38818c2ecf20Sopenharmony_ci disk->replacement->saved_raid_disk < 0) { 38828c2ecf20Sopenharmony_ci conf->fullsync = 1; 38838c2ecf20Sopenharmony_ci } 38848c2ecf20Sopenharmony_ci 38858c2ecf20Sopenharmony_ci disk->recovery_disabled = mddev->recovery_disabled - 1; 38868c2ecf20Sopenharmony_ci } 38878c2ecf20Sopenharmony_ci 38888c2ecf20Sopenharmony_ci if (mddev->recovery_cp != MaxSector) 38898c2ecf20Sopenharmony_ci pr_notice("md/raid10:%s: not clean -- starting background reconstruction\n", 38908c2ecf20Sopenharmony_ci mdname(mddev)); 38918c2ecf20Sopenharmony_ci pr_info("md/raid10:%s: active with %d out of %d devices\n", 38928c2ecf20Sopenharmony_ci mdname(mddev), conf->geo.raid_disks - mddev->degraded, 38938c2ecf20Sopenharmony_ci conf->geo.raid_disks); 38948c2ecf20Sopenharmony_ci /* 38958c2ecf20Sopenharmony_ci * Ok, everything is just fine now 38968c2ecf20Sopenharmony_ci */ 38978c2ecf20Sopenharmony_ci mddev->dev_sectors = conf->dev_sectors; 38988c2ecf20Sopenharmony_ci size = raid10_size(mddev, 0, 0); 38998c2ecf20Sopenharmony_ci md_set_array_sectors(mddev, size); 39008c2ecf20Sopenharmony_ci mddev->resync_max_sectors = size; 39018c2ecf20Sopenharmony_ci set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); 39028c2ecf20Sopenharmony_ci 39038c2ecf20Sopenharmony_ci if (md_integrity_register(mddev)) 39048c2ecf20Sopenharmony_ci goto out_free_conf; 39058c2ecf20Sopenharmony_ci 39068c2ecf20Sopenharmony_ci if (conf->reshape_progress != MaxSector) { 39078c2ecf20Sopenharmony_ci unsigned long before_length, after_length; 39088c2ecf20Sopenharmony_ci 39098c2ecf20Sopenharmony_ci before_length = ((1 << conf->prev.chunk_shift) * 39108c2ecf20Sopenharmony_ci conf->prev.far_copies); 39118c2ecf20Sopenharmony_ci after_length = ((1 << conf->geo.chunk_shift) * 39128c2ecf20Sopenharmony_ci conf->geo.far_copies); 39138c2ecf20Sopenharmony_ci 39148c2ecf20Sopenharmony_ci if (max(before_length, after_length) > min_offset_diff) { 39158c2ecf20Sopenharmony_ci /* This cannot work */ 39168c2ecf20Sopenharmony_ci pr_warn("md/raid10: offset difference not enough to continue reshape\n"); 39178c2ecf20Sopenharmony_ci goto out_free_conf; 39188c2ecf20Sopenharmony_ci } 39198c2ecf20Sopenharmony_ci conf->offset_diff = min_offset_diff; 39208c2ecf20Sopenharmony_ci 39218c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 39228c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 39238c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 39248c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 39258c2ecf20Sopenharmony_ci mddev->sync_thread = md_register_thread(md_do_sync, mddev, 39268c2ecf20Sopenharmony_ci "reshape"); 39278c2ecf20Sopenharmony_ci if (!mddev->sync_thread) 39288c2ecf20Sopenharmony_ci goto out_free_conf; 39298c2ecf20Sopenharmony_ci } 39308c2ecf20Sopenharmony_ci 39318c2ecf20Sopenharmony_ci return 0; 39328c2ecf20Sopenharmony_ci 39338c2ecf20Sopenharmony_ciout_free_conf: 39348c2ecf20Sopenharmony_ci md_unregister_thread(&mddev->thread); 39358c2ecf20Sopenharmony_ci raid10_free_conf(conf); 39368c2ecf20Sopenharmony_ci mddev->private = NULL; 39378c2ecf20Sopenharmony_ciout: 39388c2ecf20Sopenharmony_ci return -EIO; 39398c2ecf20Sopenharmony_ci} 39408c2ecf20Sopenharmony_ci 39418c2ecf20Sopenharmony_cistatic void raid10_free(struct mddev *mddev, void *priv) 39428c2ecf20Sopenharmony_ci{ 39438c2ecf20Sopenharmony_ci raid10_free_conf(priv); 39448c2ecf20Sopenharmony_ci} 39458c2ecf20Sopenharmony_ci 39468c2ecf20Sopenharmony_cistatic void raid10_quiesce(struct mddev *mddev, int quiesce) 39478c2ecf20Sopenharmony_ci{ 39488c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 39498c2ecf20Sopenharmony_ci 39508c2ecf20Sopenharmony_ci if (quiesce) 39518c2ecf20Sopenharmony_ci raise_barrier(conf, 0); 39528c2ecf20Sopenharmony_ci else 39538c2ecf20Sopenharmony_ci lower_barrier(conf); 39548c2ecf20Sopenharmony_ci} 39558c2ecf20Sopenharmony_ci 39568c2ecf20Sopenharmony_cistatic int raid10_resize(struct mddev *mddev, sector_t sectors) 39578c2ecf20Sopenharmony_ci{ 39588c2ecf20Sopenharmony_ci /* Resize of 'far' arrays is not supported. 39598c2ecf20Sopenharmony_ci * For 'near' and 'offset' arrays we can set the 39608c2ecf20Sopenharmony_ci * number of sectors used to be an appropriate multiple 39618c2ecf20Sopenharmony_ci * of the chunk size. 39628c2ecf20Sopenharmony_ci * For 'offset', this is far_copies*chunksize. 39638c2ecf20Sopenharmony_ci * For 'near' the multiplier is the LCM of 39648c2ecf20Sopenharmony_ci * near_copies and raid_disks. 39658c2ecf20Sopenharmony_ci * So if far_copies > 1 && !far_offset, fail. 39668c2ecf20Sopenharmony_ci * Else find LCM(raid_disks, near_copy)*far_copies and 39678c2ecf20Sopenharmony_ci * multiply by chunk_size. Then round to this number. 39688c2ecf20Sopenharmony_ci * This is mostly done by raid10_size() 39698c2ecf20Sopenharmony_ci */ 39708c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 39718c2ecf20Sopenharmony_ci sector_t oldsize, size; 39728c2ecf20Sopenharmony_ci 39738c2ecf20Sopenharmony_ci if (mddev->reshape_position != MaxSector) 39748c2ecf20Sopenharmony_ci return -EBUSY; 39758c2ecf20Sopenharmony_ci 39768c2ecf20Sopenharmony_ci if (conf->geo.far_copies > 1 && !conf->geo.far_offset) 39778c2ecf20Sopenharmony_ci return -EINVAL; 39788c2ecf20Sopenharmony_ci 39798c2ecf20Sopenharmony_ci oldsize = raid10_size(mddev, 0, 0); 39808c2ecf20Sopenharmony_ci size = raid10_size(mddev, sectors, 0); 39818c2ecf20Sopenharmony_ci if (mddev->external_size && 39828c2ecf20Sopenharmony_ci mddev->array_sectors > size) 39838c2ecf20Sopenharmony_ci return -EINVAL; 39848c2ecf20Sopenharmony_ci if (mddev->bitmap) { 39858c2ecf20Sopenharmony_ci int ret = md_bitmap_resize(mddev->bitmap, size, 0, 0); 39868c2ecf20Sopenharmony_ci if (ret) 39878c2ecf20Sopenharmony_ci return ret; 39888c2ecf20Sopenharmony_ci } 39898c2ecf20Sopenharmony_ci md_set_array_sectors(mddev, size); 39908c2ecf20Sopenharmony_ci if (sectors > mddev->dev_sectors && 39918c2ecf20Sopenharmony_ci mddev->recovery_cp > oldsize) { 39928c2ecf20Sopenharmony_ci mddev->recovery_cp = oldsize; 39938c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 39948c2ecf20Sopenharmony_ci } 39958c2ecf20Sopenharmony_ci calc_sectors(conf, sectors); 39968c2ecf20Sopenharmony_ci mddev->dev_sectors = conf->dev_sectors; 39978c2ecf20Sopenharmony_ci mddev->resync_max_sectors = size; 39988c2ecf20Sopenharmony_ci return 0; 39998c2ecf20Sopenharmony_ci} 40008c2ecf20Sopenharmony_ci 40018c2ecf20Sopenharmony_cistatic void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs) 40028c2ecf20Sopenharmony_ci{ 40038c2ecf20Sopenharmony_ci struct md_rdev *rdev; 40048c2ecf20Sopenharmony_ci struct r10conf *conf; 40058c2ecf20Sopenharmony_ci 40068c2ecf20Sopenharmony_ci if (mddev->degraded > 0) { 40078c2ecf20Sopenharmony_ci pr_warn("md/raid10:%s: Error: degraded raid0!\n", 40088c2ecf20Sopenharmony_ci mdname(mddev)); 40098c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 40108c2ecf20Sopenharmony_ci } 40118c2ecf20Sopenharmony_ci sector_div(size, devs); 40128c2ecf20Sopenharmony_ci 40138c2ecf20Sopenharmony_ci /* Set new parameters */ 40148c2ecf20Sopenharmony_ci mddev->new_level = 10; 40158c2ecf20Sopenharmony_ci /* new layout: far_copies = 1, near_copies = 2 */ 40168c2ecf20Sopenharmony_ci mddev->new_layout = (1<<8) + 2; 40178c2ecf20Sopenharmony_ci mddev->new_chunk_sectors = mddev->chunk_sectors; 40188c2ecf20Sopenharmony_ci mddev->delta_disks = mddev->raid_disks; 40198c2ecf20Sopenharmony_ci mddev->raid_disks *= 2; 40208c2ecf20Sopenharmony_ci /* make sure it will be not marked as dirty */ 40218c2ecf20Sopenharmony_ci mddev->recovery_cp = MaxSector; 40228c2ecf20Sopenharmony_ci mddev->dev_sectors = size; 40238c2ecf20Sopenharmony_ci 40248c2ecf20Sopenharmony_ci conf = setup_conf(mddev); 40258c2ecf20Sopenharmony_ci if (!IS_ERR(conf)) { 40268c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) 40278c2ecf20Sopenharmony_ci if (rdev->raid_disk >= 0) { 40288c2ecf20Sopenharmony_ci rdev->new_raid_disk = rdev->raid_disk * 2; 40298c2ecf20Sopenharmony_ci rdev->sectors = size; 40308c2ecf20Sopenharmony_ci } 40318c2ecf20Sopenharmony_ci conf->barrier = 1; 40328c2ecf20Sopenharmony_ci } 40338c2ecf20Sopenharmony_ci 40348c2ecf20Sopenharmony_ci return conf; 40358c2ecf20Sopenharmony_ci} 40368c2ecf20Sopenharmony_ci 40378c2ecf20Sopenharmony_cistatic void *raid10_takeover(struct mddev *mddev) 40388c2ecf20Sopenharmony_ci{ 40398c2ecf20Sopenharmony_ci struct r0conf *raid0_conf; 40408c2ecf20Sopenharmony_ci 40418c2ecf20Sopenharmony_ci /* raid10 can take over: 40428c2ecf20Sopenharmony_ci * raid0 - providing it has only two drives 40438c2ecf20Sopenharmony_ci */ 40448c2ecf20Sopenharmony_ci if (mddev->level == 0) { 40458c2ecf20Sopenharmony_ci /* for raid0 takeover only one zone is supported */ 40468c2ecf20Sopenharmony_ci raid0_conf = mddev->private; 40478c2ecf20Sopenharmony_ci if (raid0_conf->nr_strip_zones > 1) { 40488c2ecf20Sopenharmony_ci pr_warn("md/raid10:%s: cannot takeover raid 0 with more than one zone.\n", 40498c2ecf20Sopenharmony_ci mdname(mddev)); 40508c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 40518c2ecf20Sopenharmony_ci } 40528c2ecf20Sopenharmony_ci return raid10_takeover_raid0(mddev, 40538c2ecf20Sopenharmony_ci raid0_conf->strip_zone->zone_end, 40548c2ecf20Sopenharmony_ci raid0_conf->strip_zone->nb_dev); 40558c2ecf20Sopenharmony_ci } 40568c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 40578c2ecf20Sopenharmony_ci} 40588c2ecf20Sopenharmony_ci 40598c2ecf20Sopenharmony_cistatic int raid10_check_reshape(struct mddev *mddev) 40608c2ecf20Sopenharmony_ci{ 40618c2ecf20Sopenharmony_ci /* Called when there is a request to change 40628c2ecf20Sopenharmony_ci * - layout (to ->new_layout) 40638c2ecf20Sopenharmony_ci * - chunk size (to ->new_chunk_sectors) 40648c2ecf20Sopenharmony_ci * - raid_disks (by delta_disks) 40658c2ecf20Sopenharmony_ci * or when trying to restart a reshape that was ongoing. 40668c2ecf20Sopenharmony_ci * 40678c2ecf20Sopenharmony_ci * We need to validate the request and possibly allocate 40688c2ecf20Sopenharmony_ci * space if that might be an issue later. 40698c2ecf20Sopenharmony_ci * 40708c2ecf20Sopenharmony_ci * Currently we reject any reshape of a 'far' mode array, 40718c2ecf20Sopenharmony_ci * allow chunk size to change if new is generally acceptable, 40728c2ecf20Sopenharmony_ci * allow raid_disks to increase, and allow 40738c2ecf20Sopenharmony_ci * a switch between 'near' mode and 'offset' mode. 40748c2ecf20Sopenharmony_ci */ 40758c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 40768c2ecf20Sopenharmony_ci struct geom geo; 40778c2ecf20Sopenharmony_ci 40788c2ecf20Sopenharmony_ci if (conf->geo.far_copies != 1 && !conf->geo.far_offset) 40798c2ecf20Sopenharmony_ci return -EINVAL; 40808c2ecf20Sopenharmony_ci 40818c2ecf20Sopenharmony_ci if (setup_geo(&geo, mddev, geo_start) != conf->copies) 40828c2ecf20Sopenharmony_ci /* mustn't change number of copies */ 40838c2ecf20Sopenharmony_ci return -EINVAL; 40848c2ecf20Sopenharmony_ci if (geo.far_copies > 1 && !geo.far_offset) 40858c2ecf20Sopenharmony_ci /* Cannot switch to 'far' mode */ 40868c2ecf20Sopenharmony_ci return -EINVAL; 40878c2ecf20Sopenharmony_ci 40888c2ecf20Sopenharmony_ci if (mddev->array_sectors & geo.chunk_mask) 40898c2ecf20Sopenharmony_ci /* not factor of array size */ 40908c2ecf20Sopenharmony_ci return -EINVAL; 40918c2ecf20Sopenharmony_ci 40928c2ecf20Sopenharmony_ci if (!enough(conf, -1)) 40938c2ecf20Sopenharmony_ci return -EINVAL; 40948c2ecf20Sopenharmony_ci 40958c2ecf20Sopenharmony_ci kfree(conf->mirrors_new); 40968c2ecf20Sopenharmony_ci conf->mirrors_new = NULL; 40978c2ecf20Sopenharmony_ci if (mddev->delta_disks > 0) { 40988c2ecf20Sopenharmony_ci /* allocate new 'mirrors' list */ 40998c2ecf20Sopenharmony_ci conf->mirrors_new = 41008c2ecf20Sopenharmony_ci kcalloc(mddev->raid_disks + mddev->delta_disks, 41018c2ecf20Sopenharmony_ci sizeof(struct raid10_info), 41028c2ecf20Sopenharmony_ci GFP_KERNEL); 41038c2ecf20Sopenharmony_ci if (!conf->mirrors_new) 41048c2ecf20Sopenharmony_ci return -ENOMEM; 41058c2ecf20Sopenharmony_ci } 41068c2ecf20Sopenharmony_ci return 0; 41078c2ecf20Sopenharmony_ci} 41088c2ecf20Sopenharmony_ci 41098c2ecf20Sopenharmony_ci/* 41108c2ecf20Sopenharmony_ci * Need to check if array has failed when deciding whether to: 41118c2ecf20Sopenharmony_ci * - start an array 41128c2ecf20Sopenharmony_ci * - remove non-faulty devices 41138c2ecf20Sopenharmony_ci * - add a spare 41148c2ecf20Sopenharmony_ci * - allow a reshape 41158c2ecf20Sopenharmony_ci * This determination is simple when no reshape is happening. 41168c2ecf20Sopenharmony_ci * However if there is a reshape, we need to carefully check 41178c2ecf20Sopenharmony_ci * both the before and after sections. 41188c2ecf20Sopenharmony_ci * This is because some failed devices may only affect one 41198c2ecf20Sopenharmony_ci * of the two sections, and some non-in_sync devices may 41208c2ecf20Sopenharmony_ci * be insync in the section most affected by failed devices. 41218c2ecf20Sopenharmony_ci */ 41228c2ecf20Sopenharmony_cistatic int calc_degraded(struct r10conf *conf) 41238c2ecf20Sopenharmony_ci{ 41248c2ecf20Sopenharmony_ci int degraded, degraded2; 41258c2ecf20Sopenharmony_ci int i; 41268c2ecf20Sopenharmony_ci 41278c2ecf20Sopenharmony_ci rcu_read_lock(); 41288c2ecf20Sopenharmony_ci degraded = 0; 41298c2ecf20Sopenharmony_ci /* 'prev' section first */ 41308c2ecf20Sopenharmony_ci for (i = 0; i < conf->prev.raid_disks; i++) { 41318c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); 41328c2ecf20Sopenharmony_ci if (!rdev || test_bit(Faulty, &rdev->flags)) 41338c2ecf20Sopenharmony_ci degraded++; 41348c2ecf20Sopenharmony_ci else if (!test_bit(In_sync, &rdev->flags)) 41358c2ecf20Sopenharmony_ci /* When we can reduce the number of devices in 41368c2ecf20Sopenharmony_ci * an array, this might not contribute to 41378c2ecf20Sopenharmony_ci * 'degraded'. It does now. 41388c2ecf20Sopenharmony_ci */ 41398c2ecf20Sopenharmony_ci degraded++; 41408c2ecf20Sopenharmony_ci } 41418c2ecf20Sopenharmony_ci rcu_read_unlock(); 41428c2ecf20Sopenharmony_ci if (conf->geo.raid_disks == conf->prev.raid_disks) 41438c2ecf20Sopenharmony_ci return degraded; 41448c2ecf20Sopenharmony_ci rcu_read_lock(); 41458c2ecf20Sopenharmony_ci degraded2 = 0; 41468c2ecf20Sopenharmony_ci for (i = 0; i < conf->geo.raid_disks; i++) { 41478c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); 41488c2ecf20Sopenharmony_ci if (!rdev || test_bit(Faulty, &rdev->flags)) 41498c2ecf20Sopenharmony_ci degraded2++; 41508c2ecf20Sopenharmony_ci else if (!test_bit(In_sync, &rdev->flags)) { 41518c2ecf20Sopenharmony_ci /* If reshape is increasing the number of devices, 41528c2ecf20Sopenharmony_ci * this section has already been recovered, so 41538c2ecf20Sopenharmony_ci * it doesn't contribute to degraded. 41548c2ecf20Sopenharmony_ci * else it does. 41558c2ecf20Sopenharmony_ci */ 41568c2ecf20Sopenharmony_ci if (conf->geo.raid_disks <= conf->prev.raid_disks) 41578c2ecf20Sopenharmony_ci degraded2++; 41588c2ecf20Sopenharmony_ci } 41598c2ecf20Sopenharmony_ci } 41608c2ecf20Sopenharmony_ci rcu_read_unlock(); 41618c2ecf20Sopenharmony_ci if (degraded2 > degraded) 41628c2ecf20Sopenharmony_ci return degraded2; 41638c2ecf20Sopenharmony_ci return degraded; 41648c2ecf20Sopenharmony_ci} 41658c2ecf20Sopenharmony_ci 41668c2ecf20Sopenharmony_cistatic int raid10_start_reshape(struct mddev *mddev) 41678c2ecf20Sopenharmony_ci{ 41688c2ecf20Sopenharmony_ci /* A 'reshape' has been requested. This commits 41698c2ecf20Sopenharmony_ci * the various 'new' fields and sets MD_RECOVER_RESHAPE 41708c2ecf20Sopenharmony_ci * This also checks if there are enough spares and adds them 41718c2ecf20Sopenharmony_ci * to the array. 41728c2ecf20Sopenharmony_ci * We currently require enough spares to make the final 41738c2ecf20Sopenharmony_ci * array non-degraded. We also require that the difference 41748c2ecf20Sopenharmony_ci * between old and new data_offset - on each device - is 41758c2ecf20Sopenharmony_ci * enough that we never risk over-writing. 41768c2ecf20Sopenharmony_ci */ 41778c2ecf20Sopenharmony_ci 41788c2ecf20Sopenharmony_ci unsigned long before_length, after_length; 41798c2ecf20Sopenharmony_ci sector_t min_offset_diff = 0; 41808c2ecf20Sopenharmony_ci int first = 1; 41818c2ecf20Sopenharmony_ci struct geom new; 41828c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 41838c2ecf20Sopenharmony_ci struct md_rdev *rdev; 41848c2ecf20Sopenharmony_ci int spares = 0; 41858c2ecf20Sopenharmony_ci int ret; 41868c2ecf20Sopenharmony_ci 41878c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 41888c2ecf20Sopenharmony_ci return -EBUSY; 41898c2ecf20Sopenharmony_ci 41908c2ecf20Sopenharmony_ci if (setup_geo(&new, mddev, geo_start) != conf->copies) 41918c2ecf20Sopenharmony_ci return -EINVAL; 41928c2ecf20Sopenharmony_ci 41938c2ecf20Sopenharmony_ci before_length = ((1 << conf->prev.chunk_shift) * 41948c2ecf20Sopenharmony_ci conf->prev.far_copies); 41958c2ecf20Sopenharmony_ci after_length = ((1 << conf->geo.chunk_shift) * 41968c2ecf20Sopenharmony_ci conf->geo.far_copies); 41978c2ecf20Sopenharmony_ci 41988c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) { 41998c2ecf20Sopenharmony_ci if (!test_bit(In_sync, &rdev->flags) 42008c2ecf20Sopenharmony_ci && !test_bit(Faulty, &rdev->flags)) 42018c2ecf20Sopenharmony_ci spares++; 42028c2ecf20Sopenharmony_ci if (rdev->raid_disk >= 0) { 42038c2ecf20Sopenharmony_ci long long diff = (rdev->new_data_offset 42048c2ecf20Sopenharmony_ci - rdev->data_offset); 42058c2ecf20Sopenharmony_ci if (!mddev->reshape_backwards) 42068c2ecf20Sopenharmony_ci diff = -diff; 42078c2ecf20Sopenharmony_ci if (diff < 0) 42088c2ecf20Sopenharmony_ci diff = 0; 42098c2ecf20Sopenharmony_ci if (first || diff < min_offset_diff) 42108c2ecf20Sopenharmony_ci min_offset_diff = diff; 42118c2ecf20Sopenharmony_ci first = 0; 42128c2ecf20Sopenharmony_ci } 42138c2ecf20Sopenharmony_ci } 42148c2ecf20Sopenharmony_ci 42158c2ecf20Sopenharmony_ci if (max(before_length, after_length) > min_offset_diff) 42168c2ecf20Sopenharmony_ci return -EINVAL; 42178c2ecf20Sopenharmony_ci 42188c2ecf20Sopenharmony_ci if (spares < mddev->delta_disks) 42198c2ecf20Sopenharmony_ci return -EINVAL; 42208c2ecf20Sopenharmony_ci 42218c2ecf20Sopenharmony_ci conf->offset_diff = min_offset_diff; 42228c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 42238c2ecf20Sopenharmony_ci if (conf->mirrors_new) { 42248c2ecf20Sopenharmony_ci memcpy(conf->mirrors_new, conf->mirrors, 42258c2ecf20Sopenharmony_ci sizeof(struct raid10_info)*conf->prev.raid_disks); 42268c2ecf20Sopenharmony_ci smp_mb(); 42278c2ecf20Sopenharmony_ci kfree(conf->mirrors_old); 42288c2ecf20Sopenharmony_ci conf->mirrors_old = conf->mirrors; 42298c2ecf20Sopenharmony_ci conf->mirrors = conf->mirrors_new; 42308c2ecf20Sopenharmony_ci conf->mirrors_new = NULL; 42318c2ecf20Sopenharmony_ci } 42328c2ecf20Sopenharmony_ci setup_geo(&conf->geo, mddev, geo_start); 42338c2ecf20Sopenharmony_ci smp_mb(); 42348c2ecf20Sopenharmony_ci if (mddev->reshape_backwards) { 42358c2ecf20Sopenharmony_ci sector_t size = raid10_size(mddev, 0, 0); 42368c2ecf20Sopenharmony_ci if (size < mddev->array_sectors) { 42378c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 42388c2ecf20Sopenharmony_ci pr_warn("md/raid10:%s: array size must be reduce before number of disks\n", 42398c2ecf20Sopenharmony_ci mdname(mddev)); 42408c2ecf20Sopenharmony_ci return -EINVAL; 42418c2ecf20Sopenharmony_ci } 42428c2ecf20Sopenharmony_ci mddev->resync_max_sectors = size; 42438c2ecf20Sopenharmony_ci conf->reshape_progress = size; 42448c2ecf20Sopenharmony_ci } else 42458c2ecf20Sopenharmony_ci conf->reshape_progress = 0; 42468c2ecf20Sopenharmony_ci conf->reshape_safe = conf->reshape_progress; 42478c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 42488c2ecf20Sopenharmony_ci 42498c2ecf20Sopenharmony_ci if (mddev->delta_disks && mddev->bitmap) { 42508c2ecf20Sopenharmony_ci struct mdp_superblock_1 *sb = NULL; 42518c2ecf20Sopenharmony_ci sector_t oldsize, newsize; 42528c2ecf20Sopenharmony_ci 42538c2ecf20Sopenharmony_ci oldsize = raid10_size(mddev, 0, 0); 42548c2ecf20Sopenharmony_ci newsize = raid10_size(mddev, 0, conf->geo.raid_disks); 42558c2ecf20Sopenharmony_ci 42568c2ecf20Sopenharmony_ci if (!mddev_is_clustered(mddev)) { 42578c2ecf20Sopenharmony_ci ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0); 42588c2ecf20Sopenharmony_ci if (ret) 42598c2ecf20Sopenharmony_ci goto abort; 42608c2ecf20Sopenharmony_ci else 42618c2ecf20Sopenharmony_ci goto out; 42628c2ecf20Sopenharmony_ci } 42638c2ecf20Sopenharmony_ci 42648c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) { 42658c2ecf20Sopenharmony_ci if (rdev->raid_disk > -1 && 42668c2ecf20Sopenharmony_ci !test_bit(Faulty, &rdev->flags)) 42678c2ecf20Sopenharmony_ci sb = page_address(rdev->sb_page); 42688c2ecf20Sopenharmony_ci } 42698c2ecf20Sopenharmony_ci 42708c2ecf20Sopenharmony_ci /* 42718c2ecf20Sopenharmony_ci * some node is already performing reshape, and no need to 42728c2ecf20Sopenharmony_ci * call md_bitmap_resize again since it should be called when 42738c2ecf20Sopenharmony_ci * receiving BITMAP_RESIZE msg 42748c2ecf20Sopenharmony_ci */ 42758c2ecf20Sopenharmony_ci if ((sb && (le32_to_cpu(sb->feature_map) & 42768c2ecf20Sopenharmony_ci MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize)) 42778c2ecf20Sopenharmony_ci goto out; 42788c2ecf20Sopenharmony_ci 42798c2ecf20Sopenharmony_ci ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0); 42808c2ecf20Sopenharmony_ci if (ret) 42818c2ecf20Sopenharmony_ci goto abort; 42828c2ecf20Sopenharmony_ci 42838c2ecf20Sopenharmony_ci ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize); 42848c2ecf20Sopenharmony_ci if (ret) { 42858c2ecf20Sopenharmony_ci md_bitmap_resize(mddev->bitmap, oldsize, 0, 0); 42868c2ecf20Sopenharmony_ci goto abort; 42878c2ecf20Sopenharmony_ci } 42888c2ecf20Sopenharmony_ci } 42898c2ecf20Sopenharmony_ciout: 42908c2ecf20Sopenharmony_ci if (mddev->delta_disks > 0) { 42918c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) 42928c2ecf20Sopenharmony_ci if (rdev->raid_disk < 0 && 42938c2ecf20Sopenharmony_ci !test_bit(Faulty, &rdev->flags)) { 42948c2ecf20Sopenharmony_ci if (raid10_add_disk(mddev, rdev) == 0) { 42958c2ecf20Sopenharmony_ci if (rdev->raid_disk >= 42968c2ecf20Sopenharmony_ci conf->prev.raid_disks) 42978c2ecf20Sopenharmony_ci set_bit(In_sync, &rdev->flags); 42988c2ecf20Sopenharmony_ci else 42998c2ecf20Sopenharmony_ci rdev->recovery_offset = 0; 43008c2ecf20Sopenharmony_ci 43018c2ecf20Sopenharmony_ci /* Failure here is OK */ 43028c2ecf20Sopenharmony_ci sysfs_link_rdev(mddev, rdev); 43038c2ecf20Sopenharmony_ci } 43048c2ecf20Sopenharmony_ci } else if (rdev->raid_disk >= conf->prev.raid_disks 43058c2ecf20Sopenharmony_ci && !test_bit(Faulty, &rdev->flags)) { 43068c2ecf20Sopenharmony_ci /* This is a spare that was manually added */ 43078c2ecf20Sopenharmony_ci set_bit(In_sync, &rdev->flags); 43088c2ecf20Sopenharmony_ci } 43098c2ecf20Sopenharmony_ci } 43108c2ecf20Sopenharmony_ci /* When a reshape changes the number of devices, 43118c2ecf20Sopenharmony_ci * ->degraded is measured against the larger of the 43128c2ecf20Sopenharmony_ci * pre and post numbers. 43138c2ecf20Sopenharmony_ci */ 43148c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 43158c2ecf20Sopenharmony_ci mddev->degraded = calc_degraded(conf); 43168c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 43178c2ecf20Sopenharmony_ci mddev->raid_disks = conf->geo.raid_disks; 43188c2ecf20Sopenharmony_ci mddev->reshape_position = conf->reshape_progress; 43198c2ecf20Sopenharmony_ci set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 43208c2ecf20Sopenharmony_ci 43218c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 43228c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 43238c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_DONE, &mddev->recovery); 43248c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 43258c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 43268c2ecf20Sopenharmony_ci 43278c2ecf20Sopenharmony_ci mddev->sync_thread = md_register_thread(md_do_sync, mddev, 43288c2ecf20Sopenharmony_ci "reshape"); 43298c2ecf20Sopenharmony_ci if (!mddev->sync_thread) { 43308c2ecf20Sopenharmony_ci ret = -EAGAIN; 43318c2ecf20Sopenharmony_ci goto abort; 43328c2ecf20Sopenharmony_ci } 43338c2ecf20Sopenharmony_ci conf->reshape_checkpoint = jiffies; 43348c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->sync_thread); 43358c2ecf20Sopenharmony_ci md_new_event(mddev); 43368c2ecf20Sopenharmony_ci return 0; 43378c2ecf20Sopenharmony_ci 43388c2ecf20Sopenharmony_ciabort: 43398c2ecf20Sopenharmony_ci mddev->recovery = 0; 43408c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 43418c2ecf20Sopenharmony_ci conf->geo = conf->prev; 43428c2ecf20Sopenharmony_ci mddev->raid_disks = conf->geo.raid_disks; 43438c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) 43448c2ecf20Sopenharmony_ci rdev->new_data_offset = rdev->data_offset; 43458c2ecf20Sopenharmony_ci smp_wmb(); 43468c2ecf20Sopenharmony_ci conf->reshape_progress = MaxSector; 43478c2ecf20Sopenharmony_ci conf->reshape_safe = MaxSector; 43488c2ecf20Sopenharmony_ci mddev->reshape_position = MaxSector; 43498c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 43508c2ecf20Sopenharmony_ci return ret; 43518c2ecf20Sopenharmony_ci} 43528c2ecf20Sopenharmony_ci 43538c2ecf20Sopenharmony_ci/* Calculate the last device-address that could contain 43548c2ecf20Sopenharmony_ci * any block from the chunk that includes the array-address 's' 43558c2ecf20Sopenharmony_ci * and report the next address. 43568c2ecf20Sopenharmony_ci * i.e. the address returned will be chunk-aligned and after 43578c2ecf20Sopenharmony_ci * any data that is in the chunk containing 's'. 43588c2ecf20Sopenharmony_ci */ 43598c2ecf20Sopenharmony_cistatic sector_t last_dev_address(sector_t s, struct geom *geo) 43608c2ecf20Sopenharmony_ci{ 43618c2ecf20Sopenharmony_ci s = (s | geo->chunk_mask) + 1; 43628c2ecf20Sopenharmony_ci s >>= geo->chunk_shift; 43638c2ecf20Sopenharmony_ci s *= geo->near_copies; 43648c2ecf20Sopenharmony_ci s = DIV_ROUND_UP_SECTOR_T(s, geo->raid_disks); 43658c2ecf20Sopenharmony_ci s *= geo->far_copies; 43668c2ecf20Sopenharmony_ci s <<= geo->chunk_shift; 43678c2ecf20Sopenharmony_ci return s; 43688c2ecf20Sopenharmony_ci} 43698c2ecf20Sopenharmony_ci 43708c2ecf20Sopenharmony_ci/* Calculate the first device-address that could contain 43718c2ecf20Sopenharmony_ci * any block from the chunk that includes the array-address 's'. 43728c2ecf20Sopenharmony_ci * This too will be the start of a chunk 43738c2ecf20Sopenharmony_ci */ 43748c2ecf20Sopenharmony_cistatic sector_t first_dev_address(sector_t s, struct geom *geo) 43758c2ecf20Sopenharmony_ci{ 43768c2ecf20Sopenharmony_ci s >>= geo->chunk_shift; 43778c2ecf20Sopenharmony_ci s *= geo->near_copies; 43788c2ecf20Sopenharmony_ci sector_div(s, geo->raid_disks); 43798c2ecf20Sopenharmony_ci s *= geo->far_copies; 43808c2ecf20Sopenharmony_ci s <<= geo->chunk_shift; 43818c2ecf20Sopenharmony_ci return s; 43828c2ecf20Sopenharmony_ci} 43838c2ecf20Sopenharmony_ci 43848c2ecf20Sopenharmony_cistatic sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, 43858c2ecf20Sopenharmony_ci int *skipped) 43868c2ecf20Sopenharmony_ci{ 43878c2ecf20Sopenharmony_ci /* We simply copy at most one chunk (smallest of old and new) 43888c2ecf20Sopenharmony_ci * at a time, possibly less if that exceeds RESYNC_PAGES, 43898c2ecf20Sopenharmony_ci * or we hit a bad block or something. 43908c2ecf20Sopenharmony_ci * This might mean we pause for normal IO in the middle of 43918c2ecf20Sopenharmony_ci * a chunk, but that is not a problem as mddev->reshape_position 43928c2ecf20Sopenharmony_ci * can record any location. 43938c2ecf20Sopenharmony_ci * 43948c2ecf20Sopenharmony_ci * If we will want to write to a location that isn't 43958c2ecf20Sopenharmony_ci * yet recorded as 'safe' (i.e. in metadata on disk) then 43968c2ecf20Sopenharmony_ci * we need to flush all reshape requests and update the metadata. 43978c2ecf20Sopenharmony_ci * 43988c2ecf20Sopenharmony_ci * When reshaping forwards (e.g. to more devices), we interpret 43998c2ecf20Sopenharmony_ci * 'safe' as the earliest block which might not have been copied 44008c2ecf20Sopenharmony_ci * down yet. We divide this by previous stripe size and multiply 44018c2ecf20Sopenharmony_ci * by previous stripe length to get lowest device offset that we 44028c2ecf20Sopenharmony_ci * cannot write to yet. 44038c2ecf20Sopenharmony_ci * We interpret 'sector_nr' as an address that we want to write to. 44048c2ecf20Sopenharmony_ci * From this we use last_device_address() to find where we might 44058c2ecf20Sopenharmony_ci * write to, and first_device_address on the 'safe' position. 44068c2ecf20Sopenharmony_ci * If this 'next' write position is after the 'safe' position, 44078c2ecf20Sopenharmony_ci * we must update the metadata to increase the 'safe' position. 44088c2ecf20Sopenharmony_ci * 44098c2ecf20Sopenharmony_ci * When reshaping backwards, we round in the opposite direction 44108c2ecf20Sopenharmony_ci * and perform the reverse test: next write position must not be 44118c2ecf20Sopenharmony_ci * less than current safe position. 44128c2ecf20Sopenharmony_ci * 44138c2ecf20Sopenharmony_ci * In all this the minimum difference in data offsets 44148c2ecf20Sopenharmony_ci * (conf->offset_diff - always positive) allows a bit of slack, 44158c2ecf20Sopenharmony_ci * so next can be after 'safe', but not by more than offset_diff 44168c2ecf20Sopenharmony_ci * 44178c2ecf20Sopenharmony_ci * We need to prepare all the bios here before we start any IO 44188c2ecf20Sopenharmony_ci * to ensure the size we choose is acceptable to all devices. 44198c2ecf20Sopenharmony_ci * The means one for each copy for write-out and an extra one for 44208c2ecf20Sopenharmony_ci * read-in. 44218c2ecf20Sopenharmony_ci * We store the read-in bio in ->master_bio and the others in 44228c2ecf20Sopenharmony_ci * ->devs[x].bio and ->devs[x].repl_bio. 44238c2ecf20Sopenharmony_ci */ 44248c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 44258c2ecf20Sopenharmony_ci struct r10bio *r10_bio; 44268c2ecf20Sopenharmony_ci sector_t next, safe, last; 44278c2ecf20Sopenharmony_ci int max_sectors; 44288c2ecf20Sopenharmony_ci int nr_sectors; 44298c2ecf20Sopenharmony_ci int s; 44308c2ecf20Sopenharmony_ci struct md_rdev *rdev; 44318c2ecf20Sopenharmony_ci int need_flush = 0; 44328c2ecf20Sopenharmony_ci struct bio *blist; 44338c2ecf20Sopenharmony_ci struct bio *bio, *read_bio; 44348c2ecf20Sopenharmony_ci int sectors_done = 0; 44358c2ecf20Sopenharmony_ci struct page **pages; 44368c2ecf20Sopenharmony_ci 44378c2ecf20Sopenharmony_ci if (sector_nr == 0) { 44388c2ecf20Sopenharmony_ci /* If restarting in the middle, skip the initial sectors */ 44398c2ecf20Sopenharmony_ci if (mddev->reshape_backwards && 44408c2ecf20Sopenharmony_ci conf->reshape_progress < raid10_size(mddev, 0, 0)) { 44418c2ecf20Sopenharmony_ci sector_nr = (raid10_size(mddev, 0, 0) 44428c2ecf20Sopenharmony_ci - conf->reshape_progress); 44438c2ecf20Sopenharmony_ci } else if (!mddev->reshape_backwards && 44448c2ecf20Sopenharmony_ci conf->reshape_progress > 0) 44458c2ecf20Sopenharmony_ci sector_nr = conf->reshape_progress; 44468c2ecf20Sopenharmony_ci if (sector_nr) { 44478c2ecf20Sopenharmony_ci mddev->curr_resync_completed = sector_nr; 44488c2ecf20Sopenharmony_ci sysfs_notify_dirent_safe(mddev->sysfs_completed); 44498c2ecf20Sopenharmony_ci *skipped = 1; 44508c2ecf20Sopenharmony_ci return sector_nr; 44518c2ecf20Sopenharmony_ci } 44528c2ecf20Sopenharmony_ci } 44538c2ecf20Sopenharmony_ci 44548c2ecf20Sopenharmony_ci /* We don't use sector_nr to track where we are up to 44558c2ecf20Sopenharmony_ci * as that doesn't work well for ->reshape_backwards. 44568c2ecf20Sopenharmony_ci * So just use ->reshape_progress. 44578c2ecf20Sopenharmony_ci */ 44588c2ecf20Sopenharmony_ci if (mddev->reshape_backwards) { 44598c2ecf20Sopenharmony_ci /* 'next' is the earliest device address that we might 44608c2ecf20Sopenharmony_ci * write to for this chunk in the new layout 44618c2ecf20Sopenharmony_ci */ 44628c2ecf20Sopenharmony_ci next = first_dev_address(conf->reshape_progress - 1, 44638c2ecf20Sopenharmony_ci &conf->geo); 44648c2ecf20Sopenharmony_ci 44658c2ecf20Sopenharmony_ci /* 'safe' is the last device address that we might read from 44668c2ecf20Sopenharmony_ci * in the old layout after a restart 44678c2ecf20Sopenharmony_ci */ 44688c2ecf20Sopenharmony_ci safe = last_dev_address(conf->reshape_safe - 1, 44698c2ecf20Sopenharmony_ci &conf->prev); 44708c2ecf20Sopenharmony_ci 44718c2ecf20Sopenharmony_ci if (next + conf->offset_diff < safe) 44728c2ecf20Sopenharmony_ci need_flush = 1; 44738c2ecf20Sopenharmony_ci 44748c2ecf20Sopenharmony_ci last = conf->reshape_progress - 1; 44758c2ecf20Sopenharmony_ci sector_nr = last & ~(sector_t)(conf->geo.chunk_mask 44768c2ecf20Sopenharmony_ci & conf->prev.chunk_mask); 44778c2ecf20Sopenharmony_ci if (sector_nr + RESYNC_SECTORS < last) 44788c2ecf20Sopenharmony_ci sector_nr = last + 1 - RESYNC_SECTORS; 44798c2ecf20Sopenharmony_ci } else { 44808c2ecf20Sopenharmony_ci /* 'next' is after the last device address that we 44818c2ecf20Sopenharmony_ci * might write to for this chunk in the new layout 44828c2ecf20Sopenharmony_ci */ 44838c2ecf20Sopenharmony_ci next = last_dev_address(conf->reshape_progress, &conf->geo); 44848c2ecf20Sopenharmony_ci 44858c2ecf20Sopenharmony_ci /* 'safe' is the earliest device address that we might 44868c2ecf20Sopenharmony_ci * read from in the old layout after a restart 44878c2ecf20Sopenharmony_ci */ 44888c2ecf20Sopenharmony_ci safe = first_dev_address(conf->reshape_safe, &conf->prev); 44898c2ecf20Sopenharmony_ci 44908c2ecf20Sopenharmony_ci /* Need to update metadata if 'next' might be beyond 'safe' 44918c2ecf20Sopenharmony_ci * as that would possibly corrupt data 44928c2ecf20Sopenharmony_ci */ 44938c2ecf20Sopenharmony_ci if (next > safe + conf->offset_diff) 44948c2ecf20Sopenharmony_ci need_flush = 1; 44958c2ecf20Sopenharmony_ci 44968c2ecf20Sopenharmony_ci sector_nr = conf->reshape_progress; 44978c2ecf20Sopenharmony_ci last = sector_nr | (conf->geo.chunk_mask 44988c2ecf20Sopenharmony_ci & conf->prev.chunk_mask); 44998c2ecf20Sopenharmony_ci 45008c2ecf20Sopenharmony_ci if (sector_nr + RESYNC_SECTORS <= last) 45018c2ecf20Sopenharmony_ci last = sector_nr + RESYNC_SECTORS - 1; 45028c2ecf20Sopenharmony_ci } 45038c2ecf20Sopenharmony_ci 45048c2ecf20Sopenharmony_ci if (need_flush || 45058c2ecf20Sopenharmony_ci time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { 45068c2ecf20Sopenharmony_ci /* Need to update reshape_position in metadata */ 45078c2ecf20Sopenharmony_ci wait_barrier(conf); 45088c2ecf20Sopenharmony_ci mddev->reshape_position = conf->reshape_progress; 45098c2ecf20Sopenharmony_ci if (mddev->reshape_backwards) 45108c2ecf20Sopenharmony_ci mddev->curr_resync_completed = raid10_size(mddev, 0, 0) 45118c2ecf20Sopenharmony_ci - conf->reshape_progress; 45128c2ecf20Sopenharmony_ci else 45138c2ecf20Sopenharmony_ci mddev->curr_resync_completed = conf->reshape_progress; 45148c2ecf20Sopenharmony_ci conf->reshape_checkpoint = jiffies; 45158c2ecf20Sopenharmony_ci set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 45168c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 45178c2ecf20Sopenharmony_ci wait_event(mddev->sb_wait, mddev->sb_flags == 0 || 45188c2ecf20Sopenharmony_ci test_bit(MD_RECOVERY_INTR, &mddev->recovery)); 45198c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 45208c2ecf20Sopenharmony_ci allow_barrier(conf); 45218c2ecf20Sopenharmony_ci return sectors_done; 45228c2ecf20Sopenharmony_ci } 45238c2ecf20Sopenharmony_ci conf->reshape_safe = mddev->reshape_position; 45248c2ecf20Sopenharmony_ci allow_barrier(conf); 45258c2ecf20Sopenharmony_ci } 45268c2ecf20Sopenharmony_ci 45278c2ecf20Sopenharmony_ci raise_barrier(conf, 0); 45288c2ecf20Sopenharmony_ciread_more: 45298c2ecf20Sopenharmony_ci /* Now schedule reads for blocks from sector_nr to last */ 45308c2ecf20Sopenharmony_ci r10_bio = raid10_alloc_init_r10buf(conf); 45318c2ecf20Sopenharmony_ci r10_bio->state = 0; 45328c2ecf20Sopenharmony_ci raise_barrier(conf, 1); 45338c2ecf20Sopenharmony_ci atomic_set(&r10_bio->remaining, 0); 45348c2ecf20Sopenharmony_ci r10_bio->mddev = mddev; 45358c2ecf20Sopenharmony_ci r10_bio->sector = sector_nr; 45368c2ecf20Sopenharmony_ci set_bit(R10BIO_IsReshape, &r10_bio->state); 45378c2ecf20Sopenharmony_ci r10_bio->sectors = last - sector_nr + 1; 45388c2ecf20Sopenharmony_ci rdev = read_balance(conf, r10_bio, &max_sectors); 45398c2ecf20Sopenharmony_ci BUG_ON(!test_bit(R10BIO_Previous, &r10_bio->state)); 45408c2ecf20Sopenharmony_ci 45418c2ecf20Sopenharmony_ci if (!rdev) { 45428c2ecf20Sopenharmony_ci /* Cannot read from here, so need to record bad blocks 45438c2ecf20Sopenharmony_ci * on all the target devices. 45448c2ecf20Sopenharmony_ci */ 45458c2ecf20Sopenharmony_ci // FIXME 45468c2ecf20Sopenharmony_ci mempool_free(r10_bio, &conf->r10buf_pool); 45478c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_INTR, &mddev->recovery); 45488c2ecf20Sopenharmony_ci return sectors_done; 45498c2ecf20Sopenharmony_ci } 45508c2ecf20Sopenharmony_ci 45518c2ecf20Sopenharmony_ci read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev); 45528c2ecf20Sopenharmony_ci 45538c2ecf20Sopenharmony_ci bio_set_dev(read_bio, rdev->bdev); 45548c2ecf20Sopenharmony_ci read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr 45558c2ecf20Sopenharmony_ci + rdev->data_offset); 45568c2ecf20Sopenharmony_ci read_bio->bi_private = r10_bio; 45578c2ecf20Sopenharmony_ci read_bio->bi_end_io = end_reshape_read; 45588c2ecf20Sopenharmony_ci bio_set_op_attrs(read_bio, REQ_OP_READ, 0); 45598c2ecf20Sopenharmony_ci read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); 45608c2ecf20Sopenharmony_ci read_bio->bi_status = 0; 45618c2ecf20Sopenharmony_ci read_bio->bi_vcnt = 0; 45628c2ecf20Sopenharmony_ci read_bio->bi_iter.bi_size = 0; 45638c2ecf20Sopenharmony_ci r10_bio->master_bio = read_bio; 45648c2ecf20Sopenharmony_ci r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; 45658c2ecf20Sopenharmony_ci 45668c2ecf20Sopenharmony_ci /* 45678c2ecf20Sopenharmony_ci * Broadcast RESYNC message to other nodes, so all nodes would not 45688c2ecf20Sopenharmony_ci * write to the region to avoid conflict. 45698c2ecf20Sopenharmony_ci */ 45708c2ecf20Sopenharmony_ci if (mddev_is_clustered(mddev) && conf->cluster_sync_high <= sector_nr) { 45718c2ecf20Sopenharmony_ci struct mdp_superblock_1 *sb = NULL; 45728c2ecf20Sopenharmony_ci int sb_reshape_pos = 0; 45738c2ecf20Sopenharmony_ci 45748c2ecf20Sopenharmony_ci conf->cluster_sync_low = sector_nr; 45758c2ecf20Sopenharmony_ci conf->cluster_sync_high = sector_nr + CLUSTER_RESYNC_WINDOW_SECTORS; 45768c2ecf20Sopenharmony_ci sb = page_address(rdev->sb_page); 45778c2ecf20Sopenharmony_ci if (sb) { 45788c2ecf20Sopenharmony_ci sb_reshape_pos = le64_to_cpu(sb->reshape_position); 45798c2ecf20Sopenharmony_ci /* 45808c2ecf20Sopenharmony_ci * Set cluster_sync_low again if next address for array 45818c2ecf20Sopenharmony_ci * reshape is less than cluster_sync_low. Since we can't 45828c2ecf20Sopenharmony_ci * update cluster_sync_low until it has finished reshape. 45838c2ecf20Sopenharmony_ci */ 45848c2ecf20Sopenharmony_ci if (sb_reshape_pos < conf->cluster_sync_low) 45858c2ecf20Sopenharmony_ci conf->cluster_sync_low = sb_reshape_pos; 45868c2ecf20Sopenharmony_ci } 45878c2ecf20Sopenharmony_ci 45888c2ecf20Sopenharmony_ci md_cluster_ops->resync_info_update(mddev, conf->cluster_sync_low, 45898c2ecf20Sopenharmony_ci conf->cluster_sync_high); 45908c2ecf20Sopenharmony_ci } 45918c2ecf20Sopenharmony_ci 45928c2ecf20Sopenharmony_ci /* Now find the locations in the new layout */ 45938c2ecf20Sopenharmony_ci __raid10_find_phys(&conf->geo, r10_bio); 45948c2ecf20Sopenharmony_ci 45958c2ecf20Sopenharmony_ci blist = read_bio; 45968c2ecf20Sopenharmony_ci read_bio->bi_next = NULL; 45978c2ecf20Sopenharmony_ci 45988c2ecf20Sopenharmony_ci rcu_read_lock(); 45998c2ecf20Sopenharmony_ci for (s = 0; s < conf->copies*2; s++) { 46008c2ecf20Sopenharmony_ci struct bio *b; 46018c2ecf20Sopenharmony_ci int d = r10_bio->devs[s/2].devnum; 46028c2ecf20Sopenharmony_ci struct md_rdev *rdev2; 46038c2ecf20Sopenharmony_ci if (s&1) { 46048c2ecf20Sopenharmony_ci rdev2 = rcu_dereference(conf->mirrors[d].replacement); 46058c2ecf20Sopenharmony_ci b = r10_bio->devs[s/2].repl_bio; 46068c2ecf20Sopenharmony_ci } else { 46078c2ecf20Sopenharmony_ci rdev2 = rcu_dereference(conf->mirrors[d].rdev); 46088c2ecf20Sopenharmony_ci b = r10_bio->devs[s/2].bio; 46098c2ecf20Sopenharmony_ci } 46108c2ecf20Sopenharmony_ci if (!rdev2 || test_bit(Faulty, &rdev2->flags)) 46118c2ecf20Sopenharmony_ci continue; 46128c2ecf20Sopenharmony_ci 46138c2ecf20Sopenharmony_ci bio_set_dev(b, rdev2->bdev); 46148c2ecf20Sopenharmony_ci b->bi_iter.bi_sector = r10_bio->devs[s/2].addr + 46158c2ecf20Sopenharmony_ci rdev2->new_data_offset; 46168c2ecf20Sopenharmony_ci b->bi_end_io = end_reshape_write; 46178c2ecf20Sopenharmony_ci bio_set_op_attrs(b, REQ_OP_WRITE, 0); 46188c2ecf20Sopenharmony_ci b->bi_next = blist; 46198c2ecf20Sopenharmony_ci blist = b; 46208c2ecf20Sopenharmony_ci } 46218c2ecf20Sopenharmony_ci 46228c2ecf20Sopenharmony_ci /* Now add as many pages as possible to all of these bios. */ 46238c2ecf20Sopenharmony_ci 46248c2ecf20Sopenharmony_ci nr_sectors = 0; 46258c2ecf20Sopenharmony_ci pages = get_resync_pages(r10_bio->devs[0].bio)->pages; 46268c2ecf20Sopenharmony_ci for (s = 0 ; s < max_sectors; s += PAGE_SIZE >> 9) { 46278c2ecf20Sopenharmony_ci struct page *page = pages[s / (PAGE_SIZE >> 9)]; 46288c2ecf20Sopenharmony_ci int len = (max_sectors - s) << 9; 46298c2ecf20Sopenharmony_ci if (len > PAGE_SIZE) 46308c2ecf20Sopenharmony_ci len = PAGE_SIZE; 46318c2ecf20Sopenharmony_ci for (bio = blist; bio ; bio = bio->bi_next) { 46328c2ecf20Sopenharmony_ci /* 46338c2ecf20Sopenharmony_ci * won't fail because the vec table is big enough 46348c2ecf20Sopenharmony_ci * to hold all these pages 46358c2ecf20Sopenharmony_ci */ 46368c2ecf20Sopenharmony_ci bio_add_page(bio, page, len, 0); 46378c2ecf20Sopenharmony_ci } 46388c2ecf20Sopenharmony_ci sector_nr += len >> 9; 46398c2ecf20Sopenharmony_ci nr_sectors += len >> 9; 46408c2ecf20Sopenharmony_ci } 46418c2ecf20Sopenharmony_ci rcu_read_unlock(); 46428c2ecf20Sopenharmony_ci r10_bio->sectors = nr_sectors; 46438c2ecf20Sopenharmony_ci 46448c2ecf20Sopenharmony_ci /* Now submit the read */ 46458c2ecf20Sopenharmony_ci md_sync_acct_bio(read_bio, r10_bio->sectors); 46468c2ecf20Sopenharmony_ci atomic_inc(&r10_bio->remaining); 46478c2ecf20Sopenharmony_ci read_bio->bi_next = NULL; 46488c2ecf20Sopenharmony_ci submit_bio_noacct(read_bio); 46498c2ecf20Sopenharmony_ci sectors_done += nr_sectors; 46508c2ecf20Sopenharmony_ci if (sector_nr <= last) 46518c2ecf20Sopenharmony_ci goto read_more; 46528c2ecf20Sopenharmony_ci 46538c2ecf20Sopenharmony_ci lower_barrier(conf); 46548c2ecf20Sopenharmony_ci 46558c2ecf20Sopenharmony_ci /* Now that we have done the whole section we can 46568c2ecf20Sopenharmony_ci * update reshape_progress 46578c2ecf20Sopenharmony_ci */ 46588c2ecf20Sopenharmony_ci if (mddev->reshape_backwards) 46598c2ecf20Sopenharmony_ci conf->reshape_progress -= sectors_done; 46608c2ecf20Sopenharmony_ci else 46618c2ecf20Sopenharmony_ci conf->reshape_progress += sectors_done; 46628c2ecf20Sopenharmony_ci 46638c2ecf20Sopenharmony_ci return sectors_done; 46648c2ecf20Sopenharmony_ci} 46658c2ecf20Sopenharmony_ci 46668c2ecf20Sopenharmony_cistatic void end_reshape_request(struct r10bio *r10_bio); 46678c2ecf20Sopenharmony_cistatic int handle_reshape_read_error(struct mddev *mddev, 46688c2ecf20Sopenharmony_ci struct r10bio *r10_bio); 46698c2ecf20Sopenharmony_cistatic void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio) 46708c2ecf20Sopenharmony_ci{ 46718c2ecf20Sopenharmony_ci /* Reshape read completed. Hopefully we have a block 46728c2ecf20Sopenharmony_ci * to write out. 46738c2ecf20Sopenharmony_ci * If we got a read error then we do sync 1-page reads from 46748c2ecf20Sopenharmony_ci * elsewhere until we find the data - or give up. 46758c2ecf20Sopenharmony_ci */ 46768c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 46778c2ecf20Sopenharmony_ci int s; 46788c2ecf20Sopenharmony_ci 46798c2ecf20Sopenharmony_ci if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) 46808c2ecf20Sopenharmony_ci if (handle_reshape_read_error(mddev, r10_bio) < 0) { 46818c2ecf20Sopenharmony_ci /* Reshape has been aborted */ 46828c2ecf20Sopenharmony_ci md_done_sync(mddev, r10_bio->sectors, 0); 46838c2ecf20Sopenharmony_ci return; 46848c2ecf20Sopenharmony_ci } 46858c2ecf20Sopenharmony_ci 46868c2ecf20Sopenharmony_ci /* We definitely have the data in the pages, schedule the 46878c2ecf20Sopenharmony_ci * writes. 46888c2ecf20Sopenharmony_ci */ 46898c2ecf20Sopenharmony_ci atomic_set(&r10_bio->remaining, 1); 46908c2ecf20Sopenharmony_ci for (s = 0; s < conf->copies*2; s++) { 46918c2ecf20Sopenharmony_ci struct bio *b; 46928c2ecf20Sopenharmony_ci int d = r10_bio->devs[s/2].devnum; 46938c2ecf20Sopenharmony_ci struct md_rdev *rdev; 46948c2ecf20Sopenharmony_ci rcu_read_lock(); 46958c2ecf20Sopenharmony_ci if (s&1) { 46968c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->mirrors[d].replacement); 46978c2ecf20Sopenharmony_ci b = r10_bio->devs[s/2].repl_bio; 46988c2ecf20Sopenharmony_ci } else { 46998c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->mirrors[d].rdev); 47008c2ecf20Sopenharmony_ci b = r10_bio->devs[s/2].bio; 47018c2ecf20Sopenharmony_ci } 47028c2ecf20Sopenharmony_ci if (!rdev || test_bit(Faulty, &rdev->flags)) { 47038c2ecf20Sopenharmony_ci rcu_read_unlock(); 47048c2ecf20Sopenharmony_ci continue; 47058c2ecf20Sopenharmony_ci } 47068c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 47078c2ecf20Sopenharmony_ci rcu_read_unlock(); 47088c2ecf20Sopenharmony_ci md_sync_acct_bio(b, r10_bio->sectors); 47098c2ecf20Sopenharmony_ci atomic_inc(&r10_bio->remaining); 47108c2ecf20Sopenharmony_ci b->bi_next = NULL; 47118c2ecf20Sopenharmony_ci submit_bio_noacct(b); 47128c2ecf20Sopenharmony_ci } 47138c2ecf20Sopenharmony_ci end_reshape_request(r10_bio); 47148c2ecf20Sopenharmony_ci} 47158c2ecf20Sopenharmony_ci 47168c2ecf20Sopenharmony_cistatic void end_reshape(struct r10conf *conf) 47178c2ecf20Sopenharmony_ci{ 47188c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) 47198c2ecf20Sopenharmony_ci return; 47208c2ecf20Sopenharmony_ci 47218c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 47228c2ecf20Sopenharmony_ci conf->prev = conf->geo; 47238c2ecf20Sopenharmony_ci md_finish_reshape(conf->mddev); 47248c2ecf20Sopenharmony_ci smp_wmb(); 47258c2ecf20Sopenharmony_ci conf->reshape_progress = MaxSector; 47268c2ecf20Sopenharmony_ci conf->reshape_safe = MaxSector; 47278c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 47288c2ecf20Sopenharmony_ci 47298c2ecf20Sopenharmony_ci if (conf->mddev->queue) 47308c2ecf20Sopenharmony_ci raid10_set_io_opt(conf); 47318c2ecf20Sopenharmony_ci conf->fullsync = 0; 47328c2ecf20Sopenharmony_ci} 47338c2ecf20Sopenharmony_ci 47348c2ecf20Sopenharmony_cistatic void raid10_update_reshape_pos(struct mddev *mddev) 47358c2ecf20Sopenharmony_ci{ 47368c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 47378c2ecf20Sopenharmony_ci sector_t lo, hi; 47388c2ecf20Sopenharmony_ci 47398c2ecf20Sopenharmony_ci md_cluster_ops->resync_info_get(mddev, &lo, &hi); 47408c2ecf20Sopenharmony_ci if (((mddev->reshape_position <= hi) && (mddev->reshape_position >= lo)) 47418c2ecf20Sopenharmony_ci || mddev->reshape_position == MaxSector) 47428c2ecf20Sopenharmony_ci conf->reshape_progress = mddev->reshape_position; 47438c2ecf20Sopenharmony_ci else 47448c2ecf20Sopenharmony_ci WARN_ON_ONCE(1); 47458c2ecf20Sopenharmony_ci} 47468c2ecf20Sopenharmony_ci 47478c2ecf20Sopenharmony_cistatic int handle_reshape_read_error(struct mddev *mddev, 47488c2ecf20Sopenharmony_ci struct r10bio *r10_bio) 47498c2ecf20Sopenharmony_ci{ 47508c2ecf20Sopenharmony_ci /* Use sync reads to get the blocks from somewhere else */ 47518c2ecf20Sopenharmony_ci int sectors = r10_bio->sectors; 47528c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 47538c2ecf20Sopenharmony_ci struct r10bio *r10b; 47548c2ecf20Sopenharmony_ci int slot = 0; 47558c2ecf20Sopenharmony_ci int idx = 0; 47568c2ecf20Sopenharmony_ci struct page **pages; 47578c2ecf20Sopenharmony_ci 47588c2ecf20Sopenharmony_ci r10b = kmalloc(struct_size(r10b, devs, conf->copies), GFP_NOIO); 47598c2ecf20Sopenharmony_ci if (!r10b) { 47608c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_INTR, &mddev->recovery); 47618c2ecf20Sopenharmony_ci return -ENOMEM; 47628c2ecf20Sopenharmony_ci } 47638c2ecf20Sopenharmony_ci 47648c2ecf20Sopenharmony_ci /* reshape IOs share pages from .devs[0].bio */ 47658c2ecf20Sopenharmony_ci pages = get_resync_pages(r10_bio->devs[0].bio)->pages; 47668c2ecf20Sopenharmony_ci 47678c2ecf20Sopenharmony_ci r10b->sector = r10_bio->sector; 47688c2ecf20Sopenharmony_ci __raid10_find_phys(&conf->prev, r10b); 47698c2ecf20Sopenharmony_ci 47708c2ecf20Sopenharmony_ci while (sectors) { 47718c2ecf20Sopenharmony_ci int s = sectors; 47728c2ecf20Sopenharmony_ci int success = 0; 47738c2ecf20Sopenharmony_ci int first_slot = slot; 47748c2ecf20Sopenharmony_ci 47758c2ecf20Sopenharmony_ci if (s > (PAGE_SIZE >> 9)) 47768c2ecf20Sopenharmony_ci s = PAGE_SIZE >> 9; 47778c2ecf20Sopenharmony_ci 47788c2ecf20Sopenharmony_ci rcu_read_lock(); 47798c2ecf20Sopenharmony_ci while (!success) { 47808c2ecf20Sopenharmony_ci int d = r10b->devs[slot].devnum; 47818c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); 47828c2ecf20Sopenharmony_ci sector_t addr; 47838c2ecf20Sopenharmony_ci if (rdev == NULL || 47848c2ecf20Sopenharmony_ci test_bit(Faulty, &rdev->flags) || 47858c2ecf20Sopenharmony_ci !test_bit(In_sync, &rdev->flags)) 47868c2ecf20Sopenharmony_ci goto failed; 47878c2ecf20Sopenharmony_ci 47888c2ecf20Sopenharmony_ci addr = r10b->devs[slot].addr + idx * PAGE_SIZE; 47898c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 47908c2ecf20Sopenharmony_ci rcu_read_unlock(); 47918c2ecf20Sopenharmony_ci success = sync_page_io(rdev, 47928c2ecf20Sopenharmony_ci addr, 47938c2ecf20Sopenharmony_ci s << 9, 47948c2ecf20Sopenharmony_ci pages[idx], 47958c2ecf20Sopenharmony_ci REQ_OP_READ, 0, false); 47968c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, mddev); 47978c2ecf20Sopenharmony_ci rcu_read_lock(); 47988c2ecf20Sopenharmony_ci if (success) 47998c2ecf20Sopenharmony_ci break; 48008c2ecf20Sopenharmony_ci failed: 48018c2ecf20Sopenharmony_ci slot++; 48028c2ecf20Sopenharmony_ci if (slot >= conf->copies) 48038c2ecf20Sopenharmony_ci slot = 0; 48048c2ecf20Sopenharmony_ci if (slot == first_slot) 48058c2ecf20Sopenharmony_ci break; 48068c2ecf20Sopenharmony_ci } 48078c2ecf20Sopenharmony_ci rcu_read_unlock(); 48088c2ecf20Sopenharmony_ci if (!success) { 48098c2ecf20Sopenharmony_ci /* couldn't read this block, must give up */ 48108c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_INTR, 48118c2ecf20Sopenharmony_ci &mddev->recovery); 48128c2ecf20Sopenharmony_ci kfree(r10b); 48138c2ecf20Sopenharmony_ci return -EIO; 48148c2ecf20Sopenharmony_ci } 48158c2ecf20Sopenharmony_ci sectors -= s; 48168c2ecf20Sopenharmony_ci idx++; 48178c2ecf20Sopenharmony_ci } 48188c2ecf20Sopenharmony_ci kfree(r10b); 48198c2ecf20Sopenharmony_ci return 0; 48208c2ecf20Sopenharmony_ci} 48218c2ecf20Sopenharmony_ci 48228c2ecf20Sopenharmony_cistatic void end_reshape_write(struct bio *bio) 48238c2ecf20Sopenharmony_ci{ 48248c2ecf20Sopenharmony_ci struct r10bio *r10_bio = get_resync_r10bio(bio); 48258c2ecf20Sopenharmony_ci struct mddev *mddev = r10_bio->mddev; 48268c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 48278c2ecf20Sopenharmony_ci int d; 48288c2ecf20Sopenharmony_ci int slot; 48298c2ecf20Sopenharmony_ci int repl; 48308c2ecf20Sopenharmony_ci struct md_rdev *rdev = NULL; 48318c2ecf20Sopenharmony_ci 48328c2ecf20Sopenharmony_ci d = find_bio_disk(conf, r10_bio, bio, &slot, &repl); 48338c2ecf20Sopenharmony_ci if (repl) 48348c2ecf20Sopenharmony_ci rdev = conf->mirrors[d].replacement; 48358c2ecf20Sopenharmony_ci if (!rdev) { 48368c2ecf20Sopenharmony_ci smp_mb(); 48378c2ecf20Sopenharmony_ci rdev = conf->mirrors[d].rdev; 48388c2ecf20Sopenharmony_ci } 48398c2ecf20Sopenharmony_ci 48408c2ecf20Sopenharmony_ci if (bio->bi_status) { 48418c2ecf20Sopenharmony_ci /* FIXME should record badblock */ 48428c2ecf20Sopenharmony_ci md_error(mddev, rdev); 48438c2ecf20Sopenharmony_ci } 48448c2ecf20Sopenharmony_ci 48458c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, mddev); 48468c2ecf20Sopenharmony_ci end_reshape_request(r10_bio); 48478c2ecf20Sopenharmony_ci} 48488c2ecf20Sopenharmony_ci 48498c2ecf20Sopenharmony_cistatic void end_reshape_request(struct r10bio *r10_bio) 48508c2ecf20Sopenharmony_ci{ 48518c2ecf20Sopenharmony_ci if (!atomic_dec_and_test(&r10_bio->remaining)) 48528c2ecf20Sopenharmony_ci return; 48538c2ecf20Sopenharmony_ci md_done_sync(r10_bio->mddev, r10_bio->sectors, 1); 48548c2ecf20Sopenharmony_ci bio_put(r10_bio->master_bio); 48558c2ecf20Sopenharmony_ci put_buf(r10_bio); 48568c2ecf20Sopenharmony_ci} 48578c2ecf20Sopenharmony_ci 48588c2ecf20Sopenharmony_cistatic void raid10_finish_reshape(struct mddev *mddev) 48598c2ecf20Sopenharmony_ci{ 48608c2ecf20Sopenharmony_ci struct r10conf *conf = mddev->private; 48618c2ecf20Sopenharmony_ci 48628c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) 48638c2ecf20Sopenharmony_ci return; 48648c2ecf20Sopenharmony_ci 48658c2ecf20Sopenharmony_ci if (mddev->delta_disks > 0) { 48668c2ecf20Sopenharmony_ci if (mddev->recovery_cp > mddev->resync_max_sectors) { 48678c2ecf20Sopenharmony_ci mddev->recovery_cp = mddev->resync_max_sectors; 48688c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 48698c2ecf20Sopenharmony_ci } 48708c2ecf20Sopenharmony_ci mddev->resync_max_sectors = mddev->array_sectors; 48718c2ecf20Sopenharmony_ci } else { 48728c2ecf20Sopenharmony_ci int d; 48738c2ecf20Sopenharmony_ci rcu_read_lock(); 48748c2ecf20Sopenharmony_ci for (d = conf->geo.raid_disks ; 48758c2ecf20Sopenharmony_ci d < conf->geo.raid_disks - mddev->delta_disks; 48768c2ecf20Sopenharmony_ci d++) { 48778c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); 48788c2ecf20Sopenharmony_ci if (rdev) 48798c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 48808c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->mirrors[d].replacement); 48818c2ecf20Sopenharmony_ci if (rdev) 48828c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 48838c2ecf20Sopenharmony_ci } 48848c2ecf20Sopenharmony_ci rcu_read_unlock(); 48858c2ecf20Sopenharmony_ci } 48868c2ecf20Sopenharmony_ci mddev->layout = mddev->new_layout; 48878c2ecf20Sopenharmony_ci mddev->chunk_sectors = 1 << conf->geo.chunk_shift; 48888c2ecf20Sopenharmony_ci mddev->reshape_position = MaxSector; 48898c2ecf20Sopenharmony_ci mddev->delta_disks = 0; 48908c2ecf20Sopenharmony_ci mddev->reshape_backwards = 0; 48918c2ecf20Sopenharmony_ci} 48928c2ecf20Sopenharmony_ci 48938c2ecf20Sopenharmony_cistatic struct md_personality raid10_personality = 48948c2ecf20Sopenharmony_ci{ 48958c2ecf20Sopenharmony_ci .name = "raid10", 48968c2ecf20Sopenharmony_ci .level = 10, 48978c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 48988c2ecf20Sopenharmony_ci .make_request = raid10_make_request, 48998c2ecf20Sopenharmony_ci .run = raid10_run, 49008c2ecf20Sopenharmony_ci .free = raid10_free, 49018c2ecf20Sopenharmony_ci .status = raid10_status, 49028c2ecf20Sopenharmony_ci .error_handler = raid10_error, 49038c2ecf20Sopenharmony_ci .hot_add_disk = raid10_add_disk, 49048c2ecf20Sopenharmony_ci .hot_remove_disk= raid10_remove_disk, 49058c2ecf20Sopenharmony_ci .spare_active = raid10_spare_active, 49068c2ecf20Sopenharmony_ci .sync_request = raid10_sync_request, 49078c2ecf20Sopenharmony_ci .quiesce = raid10_quiesce, 49088c2ecf20Sopenharmony_ci .size = raid10_size, 49098c2ecf20Sopenharmony_ci .resize = raid10_resize, 49108c2ecf20Sopenharmony_ci .takeover = raid10_takeover, 49118c2ecf20Sopenharmony_ci .check_reshape = raid10_check_reshape, 49128c2ecf20Sopenharmony_ci .start_reshape = raid10_start_reshape, 49138c2ecf20Sopenharmony_ci .finish_reshape = raid10_finish_reshape, 49148c2ecf20Sopenharmony_ci .update_reshape_pos = raid10_update_reshape_pos, 49158c2ecf20Sopenharmony_ci}; 49168c2ecf20Sopenharmony_ci 49178c2ecf20Sopenharmony_cistatic int __init raid_init(void) 49188c2ecf20Sopenharmony_ci{ 49198c2ecf20Sopenharmony_ci return register_md_personality(&raid10_personality); 49208c2ecf20Sopenharmony_ci} 49218c2ecf20Sopenharmony_ci 49228c2ecf20Sopenharmony_cistatic void raid_exit(void) 49238c2ecf20Sopenharmony_ci{ 49248c2ecf20Sopenharmony_ci unregister_md_personality(&raid10_personality); 49258c2ecf20Sopenharmony_ci} 49268c2ecf20Sopenharmony_ci 49278c2ecf20Sopenharmony_cimodule_init(raid_init); 49288c2ecf20Sopenharmony_cimodule_exit(raid_exit); 49298c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 49308c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD"); 49318c2ecf20Sopenharmony_ciMODULE_ALIAS("md-personality-9"); /* RAID10 */ 49328c2ecf20Sopenharmony_ciMODULE_ALIAS("md-raid10"); 49338c2ecf20Sopenharmony_ciMODULE_ALIAS("md-level-10"); 49348c2ecf20Sopenharmony_ci 49358c2ecf20Sopenharmony_cimodule_param(max_queued_requests, int, S_IRUGO|S_IWUSR); 4936