18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * raid5.c : Multiple Devices driver for Linux 48c2ecf20Sopenharmony_ci * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman 58c2ecf20Sopenharmony_ci * Copyright (C) 1999, 2000 Ingo Molnar 68c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2003 H. Peter Anvin 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * RAID-4/5/6 management functions. 98c2ecf20Sopenharmony_ci * Thanks to Penguin Computing for making the RAID-6 development possible 108c2ecf20Sopenharmony_ci * by donating a test server! 118c2ecf20Sopenharmony_ci */ 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci/* 148c2ecf20Sopenharmony_ci * BITMAP UNPLUGGING: 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * The sequencing for updating the bitmap reliably is a little 178c2ecf20Sopenharmony_ci * subtle (and I got it wrong the first time) so it deserves some 188c2ecf20Sopenharmony_ci * explanation. 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * We group bitmap updates into batches. Each batch has a number. 218c2ecf20Sopenharmony_ci * We may write out several batches at once, but that isn't very important. 228c2ecf20Sopenharmony_ci * conf->seq_write is the number of the last batch successfully written. 238c2ecf20Sopenharmony_ci * conf->seq_flush is the number of the last batch that was closed to 248c2ecf20Sopenharmony_ci * new additions. 258c2ecf20Sopenharmony_ci * When we discover that we will need to write to any block in a stripe 268c2ecf20Sopenharmony_ci * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq 278c2ecf20Sopenharmony_ci * the number of the batch it will be in. This is seq_flush+1. 288c2ecf20Sopenharmony_ci * When we are ready to do a write, if that batch hasn't been written yet, 298c2ecf20Sopenharmony_ci * we plug the array and queue the stripe for later. 308c2ecf20Sopenharmony_ci * When an unplug happens, we increment bm_flush, thus closing the current 318c2ecf20Sopenharmony_ci * batch. 328c2ecf20Sopenharmony_ci * When we notice that bm_flush > bm_write, we write out all pending updates 338c2ecf20Sopenharmony_ci * to the bitmap, and advance bm_write to where bm_flush was. 348c2ecf20Sopenharmony_ci * This may occasionally write a bit out twice, but is sure never to 358c2ecf20Sopenharmony_ci * miss any bits. 368c2ecf20Sopenharmony_ci */ 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 398c2ecf20Sopenharmony_ci#include <linux/kthread.h> 408c2ecf20Sopenharmony_ci#include <linux/raid/pq.h> 418c2ecf20Sopenharmony_ci#include <linux/async_tx.h> 428c2ecf20Sopenharmony_ci#include <linux/module.h> 438c2ecf20Sopenharmony_ci#include <linux/async.h> 448c2ecf20Sopenharmony_ci#include <linux/seq_file.h> 458c2ecf20Sopenharmony_ci#include <linux/cpu.h> 468c2ecf20Sopenharmony_ci#include <linux/slab.h> 478c2ecf20Sopenharmony_ci#include <linux/ratelimit.h> 488c2ecf20Sopenharmony_ci#include <linux/nodemask.h> 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci#include <trace/events/block.h> 518c2ecf20Sopenharmony_ci#include <linux/list_sort.h> 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci#include "md.h" 548c2ecf20Sopenharmony_ci#include "raid5.h" 558c2ecf20Sopenharmony_ci#include "raid0.h" 568c2ecf20Sopenharmony_ci#include "md-bitmap.h" 578c2ecf20Sopenharmony_ci#include "raid5-log.h" 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci#define UNSUPPORTED_MDDEV_FLAGS (1L << MD_FAILFAST_SUPPORTED) 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci#define cpu_to_group(cpu) cpu_to_node(cpu) 628c2ecf20Sopenharmony_ci#define ANY_GROUP NUMA_NO_NODE 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_cistatic bool devices_handle_discard_safely = false; 658c2ecf20Sopenharmony_cimodule_param(devices_handle_discard_safely, bool, 0644); 668c2ecf20Sopenharmony_ciMODULE_PARM_DESC(devices_handle_discard_safely, 678c2ecf20Sopenharmony_ci "Set to Y if all devices in each array reliably return zeroes on reads from discarded regions"); 688c2ecf20Sopenharmony_cistatic struct workqueue_struct *raid5_wq; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_cistatic inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) 718c2ecf20Sopenharmony_ci{ 728c2ecf20Sopenharmony_ci int hash = (sect >> RAID5_STRIPE_SHIFT(conf)) & HASH_MASK; 738c2ecf20Sopenharmony_ci return &conf->stripe_hashtbl[hash]; 748c2ecf20Sopenharmony_ci} 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_cistatic inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect) 778c2ecf20Sopenharmony_ci{ 788c2ecf20Sopenharmony_ci return (sect >> RAID5_STRIPE_SHIFT(conf)) & STRIPE_HASH_LOCKS_MASK; 798c2ecf20Sopenharmony_ci} 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_cistatic inline void lock_device_hash_lock(struct r5conf *conf, int hash) 828c2ecf20Sopenharmony_ci{ 838c2ecf20Sopenharmony_ci spin_lock_irq(conf->hash_locks + hash); 848c2ecf20Sopenharmony_ci spin_lock(&conf->device_lock); 858c2ecf20Sopenharmony_ci} 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_cistatic inline void unlock_device_hash_lock(struct r5conf *conf, int hash) 888c2ecf20Sopenharmony_ci{ 898c2ecf20Sopenharmony_ci spin_unlock(&conf->device_lock); 908c2ecf20Sopenharmony_ci spin_unlock_irq(conf->hash_locks + hash); 918c2ecf20Sopenharmony_ci} 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_cistatic inline void lock_all_device_hash_locks_irq(struct r5conf *conf) 948c2ecf20Sopenharmony_ci{ 958c2ecf20Sopenharmony_ci int i; 968c2ecf20Sopenharmony_ci spin_lock_irq(conf->hash_locks); 978c2ecf20Sopenharmony_ci for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) 988c2ecf20Sopenharmony_ci spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); 998c2ecf20Sopenharmony_ci spin_lock(&conf->device_lock); 1008c2ecf20Sopenharmony_ci} 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_cistatic inline void unlock_all_device_hash_locks_irq(struct r5conf *conf) 1038c2ecf20Sopenharmony_ci{ 1048c2ecf20Sopenharmony_ci int i; 1058c2ecf20Sopenharmony_ci spin_unlock(&conf->device_lock); 1068c2ecf20Sopenharmony_ci for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--) 1078c2ecf20Sopenharmony_ci spin_unlock(conf->hash_locks + i); 1088c2ecf20Sopenharmony_ci spin_unlock_irq(conf->hash_locks); 1098c2ecf20Sopenharmony_ci} 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci/* Find first data disk in a raid6 stripe */ 1128c2ecf20Sopenharmony_cistatic inline int raid6_d0(struct stripe_head *sh) 1138c2ecf20Sopenharmony_ci{ 1148c2ecf20Sopenharmony_ci if (sh->ddf_layout) 1158c2ecf20Sopenharmony_ci /* ddf always start from first device */ 1168c2ecf20Sopenharmony_ci return 0; 1178c2ecf20Sopenharmony_ci /* md starts just after Q block */ 1188c2ecf20Sopenharmony_ci if (sh->qd_idx == sh->disks - 1) 1198c2ecf20Sopenharmony_ci return 0; 1208c2ecf20Sopenharmony_ci else 1218c2ecf20Sopenharmony_ci return sh->qd_idx + 1; 1228c2ecf20Sopenharmony_ci} 1238c2ecf20Sopenharmony_cistatic inline int raid6_next_disk(int disk, int raid_disks) 1248c2ecf20Sopenharmony_ci{ 1258c2ecf20Sopenharmony_ci disk++; 1268c2ecf20Sopenharmony_ci return (disk < raid_disks) ? disk : 0; 1278c2ecf20Sopenharmony_ci} 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci/* When walking through the disks in a raid5, starting at raid6_d0, 1308c2ecf20Sopenharmony_ci * We need to map each disk to a 'slot', where the data disks are slot 1318c2ecf20Sopenharmony_ci * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk 1328c2ecf20Sopenharmony_ci * is raid_disks-1. This help does that mapping. 1338c2ecf20Sopenharmony_ci */ 1348c2ecf20Sopenharmony_cistatic int raid6_idx_to_slot(int idx, struct stripe_head *sh, 1358c2ecf20Sopenharmony_ci int *count, int syndrome_disks) 1368c2ecf20Sopenharmony_ci{ 1378c2ecf20Sopenharmony_ci int slot = *count; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci if (sh->ddf_layout) 1408c2ecf20Sopenharmony_ci (*count)++; 1418c2ecf20Sopenharmony_ci if (idx == sh->pd_idx) 1428c2ecf20Sopenharmony_ci return syndrome_disks; 1438c2ecf20Sopenharmony_ci if (idx == sh->qd_idx) 1448c2ecf20Sopenharmony_ci return syndrome_disks + 1; 1458c2ecf20Sopenharmony_ci if (!sh->ddf_layout) 1468c2ecf20Sopenharmony_ci (*count)++; 1478c2ecf20Sopenharmony_ci return slot; 1488c2ecf20Sopenharmony_ci} 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_cistatic void print_raid5_conf (struct r5conf *conf); 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_cistatic int stripe_operations_active(struct stripe_head *sh) 1538c2ecf20Sopenharmony_ci{ 1548c2ecf20Sopenharmony_ci return sh->check_state || sh->reconstruct_state || 1558c2ecf20Sopenharmony_ci test_bit(STRIPE_BIOFILL_RUN, &sh->state) || 1568c2ecf20Sopenharmony_ci test_bit(STRIPE_COMPUTE_RUN, &sh->state); 1578c2ecf20Sopenharmony_ci} 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_cistatic bool stripe_is_lowprio(struct stripe_head *sh) 1608c2ecf20Sopenharmony_ci{ 1618c2ecf20Sopenharmony_ci return (test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) || 1628c2ecf20Sopenharmony_ci test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) && 1638c2ecf20Sopenharmony_ci !test_bit(STRIPE_R5C_CACHING, &sh->state); 1648c2ecf20Sopenharmony_ci} 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_cistatic void raid5_wakeup_stripe_thread(struct stripe_head *sh) 1678c2ecf20Sopenharmony_ci{ 1688c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 1698c2ecf20Sopenharmony_ci struct r5worker_group *group; 1708c2ecf20Sopenharmony_ci int thread_cnt; 1718c2ecf20Sopenharmony_ci int i, cpu = sh->cpu; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci if (!cpu_online(cpu)) { 1748c2ecf20Sopenharmony_ci cpu = cpumask_any(cpu_online_mask); 1758c2ecf20Sopenharmony_ci sh->cpu = cpu; 1768c2ecf20Sopenharmony_ci } 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci if (list_empty(&sh->lru)) { 1798c2ecf20Sopenharmony_ci struct r5worker_group *group; 1808c2ecf20Sopenharmony_ci group = conf->worker_groups + cpu_to_group(cpu); 1818c2ecf20Sopenharmony_ci if (stripe_is_lowprio(sh)) 1828c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, &group->loprio_list); 1838c2ecf20Sopenharmony_ci else 1848c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, &group->handle_list); 1858c2ecf20Sopenharmony_ci group->stripes_cnt++; 1868c2ecf20Sopenharmony_ci sh->group = group; 1878c2ecf20Sopenharmony_ci } 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci if (conf->worker_cnt_per_group == 0) { 1908c2ecf20Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 1918c2ecf20Sopenharmony_ci return; 1928c2ecf20Sopenharmony_ci } 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci group = conf->worker_groups + cpu_to_group(sh->cpu); 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci group->workers[0].working = true; 1978c2ecf20Sopenharmony_ci /* at least one worker should run to avoid race */ 1988c2ecf20Sopenharmony_ci queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work); 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1; 2018c2ecf20Sopenharmony_ci /* wakeup more workers */ 2028c2ecf20Sopenharmony_ci for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) { 2038c2ecf20Sopenharmony_ci if (group->workers[i].working == false) { 2048c2ecf20Sopenharmony_ci group->workers[i].working = true; 2058c2ecf20Sopenharmony_ci queue_work_on(sh->cpu, raid5_wq, 2068c2ecf20Sopenharmony_ci &group->workers[i].work); 2078c2ecf20Sopenharmony_ci thread_cnt--; 2088c2ecf20Sopenharmony_ci } 2098c2ecf20Sopenharmony_ci } 2108c2ecf20Sopenharmony_ci} 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_cistatic void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, 2138c2ecf20Sopenharmony_ci struct list_head *temp_inactive_list) 2148c2ecf20Sopenharmony_ci{ 2158c2ecf20Sopenharmony_ci int i; 2168c2ecf20Sopenharmony_ci int injournal = 0; /* number of date pages with R5_InJournal */ 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci BUG_ON(!list_empty(&sh->lru)); 2198c2ecf20Sopenharmony_ci BUG_ON(atomic_read(&conf->active_stripes)==0); 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci if (r5c_is_writeback(conf->log)) 2228c2ecf20Sopenharmony_ci for (i = sh->disks; i--; ) 2238c2ecf20Sopenharmony_ci if (test_bit(R5_InJournal, &sh->dev[i].flags)) 2248c2ecf20Sopenharmony_ci injournal++; 2258c2ecf20Sopenharmony_ci /* 2268c2ecf20Sopenharmony_ci * In the following cases, the stripe cannot be released to cached 2278c2ecf20Sopenharmony_ci * lists. Therefore, we make the stripe write out and set 2288c2ecf20Sopenharmony_ci * STRIPE_HANDLE: 2298c2ecf20Sopenharmony_ci * 1. when quiesce in r5c write back; 2308c2ecf20Sopenharmony_ci * 2. when resync is requested fot the stripe. 2318c2ecf20Sopenharmony_ci */ 2328c2ecf20Sopenharmony_ci if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) || 2338c2ecf20Sopenharmony_ci (conf->quiesce && r5c_is_writeback(conf->log) && 2348c2ecf20Sopenharmony_ci !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) { 2358c2ecf20Sopenharmony_ci if (test_bit(STRIPE_R5C_CACHING, &sh->state)) 2368c2ecf20Sopenharmony_ci r5c_make_stripe_write_out(sh); 2378c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 2388c2ecf20Sopenharmony_ci } 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci if (test_bit(STRIPE_HANDLE, &sh->state)) { 2418c2ecf20Sopenharmony_ci if (test_bit(STRIPE_DELAYED, &sh->state) && 2428c2ecf20Sopenharmony_ci !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 2438c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, &conf->delayed_list); 2448c2ecf20Sopenharmony_ci else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 2458c2ecf20Sopenharmony_ci sh->bm_seq - conf->seq_write > 0) 2468c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, &conf->bitmap_list); 2478c2ecf20Sopenharmony_ci else { 2488c2ecf20Sopenharmony_ci clear_bit(STRIPE_DELAYED, &sh->state); 2498c2ecf20Sopenharmony_ci clear_bit(STRIPE_BIT_DELAY, &sh->state); 2508c2ecf20Sopenharmony_ci if (conf->worker_cnt_per_group == 0) { 2518c2ecf20Sopenharmony_ci if (stripe_is_lowprio(sh)) 2528c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, 2538c2ecf20Sopenharmony_ci &conf->loprio_list); 2548c2ecf20Sopenharmony_ci else 2558c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, 2568c2ecf20Sopenharmony_ci &conf->handle_list); 2578c2ecf20Sopenharmony_ci } else { 2588c2ecf20Sopenharmony_ci raid5_wakeup_stripe_thread(sh); 2598c2ecf20Sopenharmony_ci return; 2608c2ecf20Sopenharmony_ci } 2618c2ecf20Sopenharmony_ci } 2628c2ecf20Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 2638c2ecf20Sopenharmony_ci } else { 2648c2ecf20Sopenharmony_ci BUG_ON(stripe_operations_active(sh)); 2658c2ecf20Sopenharmony_ci if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 2668c2ecf20Sopenharmony_ci if (atomic_dec_return(&conf->preread_active_stripes) 2678c2ecf20Sopenharmony_ci < IO_THRESHOLD) 2688c2ecf20Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 2698c2ecf20Sopenharmony_ci atomic_dec(&conf->active_stripes); 2708c2ecf20Sopenharmony_ci if (!test_bit(STRIPE_EXPANDING, &sh->state)) { 2718c2ecf20Sopenharmony_ci if (!r5c_is_writeback(conf->log)) 2728c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, temp_inactive_list); 2738c2ecf20Sopenharmony_ci else { 2748c2ecf20Sopenharmony_ci WARN_ON(test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)); 2758c2ecf20Sopenharmony_ci if (injournal == 0) 2768c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, temp_inactive_list); 2778c2ecf20Sopenharmony_ci else if (injournal == conf->raid_disks - conf->max_degraded) { 2788c2ecf20Sopenharmony_ci /* full stripe */ 2798c2ecf20Sopenharmony_ci if (!test_and_set_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) 2808c2ecf20Sopenharmony_ci atomic_inc(&conf->r5c_cached_full_stripes); 2818c2ecf20Sopenharmony_ci if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) 2828c2ecf20Sopenharmony_ci atomic_dec(&conf->r5c_cached_partial_stripes); 2838c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, &conf->r5c_full_stripe_list); 2848c2ecf20Sopenharmony_ci r5c_check_cached_full_stripe(conf); 2858c2ecf20Sopenharmony_ci } else 2868c2ecf20Sopenharmony_ci /* 2878c2ecf20Sopenharmony_ci * STRIPE_R5C_PARTIAL_STRIPE is set in 2888c2ecf20Sopenharmony_ci * r5c_try_caching_write(). No need to 2898c2ecf20Sopenharmony_ci * set it again. 2908c2ecf20Sopenharmony_ci */ 2918c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, &conf->r5c_partial_stripe_list); 2928c2ecf20Sopenharmony_ci } 2938c2ecf20Sopenharmony_ci } 2948c2ecf20Sopenharmony_ci } 2958c2ecf20Sopenharmony_ci} 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_cistatic void __release_stripe(struct r5conf *conf, struct stripe_head *sh, 2988c2ecf20Sopenharmony_ci struct list_head *temp_inactive_list) 2998c2ecf20Sopenharmony_ci{ 3008c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&sh->count)) 3018c2ecf20Sopenharmony_ci do_release_stripe(conf, sh, temp_inactive_list); 3028c2ecf20Sopenharmony_ci} 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci/* 3058c2ecf20Sopenharmony_ci * @hash could be NR_STRIPE_HASH_LOCKS, then we have a list of inactive_list 3068c2ecf20Sopenharmony_ci * 3078c2ecf20Sopenharmony_ci * Be careful: Only one task can add/delete stripes from temp_inactive_list at 3088c2ecf20Sopenharmony_ci * given time. Adding stripes only takes device lock, while deleting stripes 3098c2ecf20Sopenharmony_ci * only takes hash lock. 3108c2ecf20Sopenharmony_ci */ 3118c2ecf20Sopenharmony_cistatic void release_inactive_stripe_list(struct r5conf *conf, 3128c2ecf20Sopenharmony_ci struct list_head *temp_inactive_list, 3138c2ecf20Sopenharmony_ci int hash) 3148c2ecf20Sopenharmony_ci{ 3158c2ecf20Sopenharmony_ci int size; 3168c2ecf20Sopenharmony_ci bool do_wakeup = false; 3178c2ecf20Sopenharmony_ci unsigned long flags; 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_ci if (hash == NR_STRIPE_HASH_LOCKS) { 3208c2ecf20Sopenharmony_ci size = NR_STRIPE_HASH_LOCKS; 3218c2ecf20Sopenharmony_ci hash = NR_STRIPE_HASH_LOCKS - 1; 3228c2ecf20Sopenharmony_ci } else 3238c2ecf20Sopenharmony_ci size = 1; 3248c2ecf20Sopenharmony_ci while (size) { 3258c2ecf20Sopenharmony_ci struct list_head *list = &temp_inactive_list[size - 1]; 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci /* 3288c2ecf20Sopenharmony_ci * We don't hold any lock here yet, raid5_get_active_stripe() might 3298c2ecf20Sopenharmony_ci * remove stripes from the list 3308c2ecf20Sopenharmony_ci */ 3318c2ecf20Sopenharmony_ci if (!list_empty_careful(list)) { 3328c2ecf20Sopenharmony_ci spin_lock_irqsave(conf->hash_locks + hash, flags); 3338c2ecf20Sopenharmony_ci if (list_empty(conf->inactive_list + hash) && 3348c2ecf20Sopenharmony_ci !list_empty(list)) 3358c2ecf20Sopenharmony_ci atomic_dec(&conf->empty_inactive_list_nr); 3368c2ecf20Sopenharmony_ci list_splice_tail_init(list, conf->inactive_list + hash); 3378c2ecf20Sopenharmony_ci do_wakeup = true; 3388c2ecf20Sopenharmony_ci spin_unlock_irqrestore(conf->hash_locks + hash, flags); 3398c2ecf20Sopenharmony_ci } 3408c2ecf20Sopenharmony_ci size--; 3418c2ecf20Sopenharmony_ci hash--; 3428c2ecf20Sopenharmony_ci } 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_ci if (do_wakeup) { 3458c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_stripe); 3468c2ecf20Sopenharmony_ci if (atomic_read(&conf->active_stripes) == 0) 3478c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_quiescent); 3488c2ecf20Sopenharmony_ci if (conf->retry_read_aligned) 3498c2ecf20Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 3508c2ecf20Sopenharmony_ci } 3518c2ecf20Sopenharmony_ci} 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ci/* should hold conf->device_lock already */ 3548c2ecf20Sopenharmony_cistatic int release_stripe_list(struct r5conf *conf, 3558c2ecf20Sopenharmony_ci struct list_head *temp_inactive_list) 3568c2ecf20Sopenharmony_ci{ 3578c2ecf20Sopenharmony_ci struct stripe_head *sh, *t; 3588c2ecf20Sopenharmony_ci int count = 0; 3598c2ecf20Sopenharmony_ci struct llist_node *head; 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_ci head = llist_del_all(&conf->released_stripes); 3628c2ecf20Sopenharmony_ci head = llist_reverse_order(head); 3638c2ecf20Sopenharmony_ci llist_for_each_entry_safe(sh, t, head, release_list) { 3648c2ecf20Sopenharmony_ci int hash; 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci /* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */ 3678c2ecf20Sopenharmony_ci smp_mb(); 3688c2ecf20Sopenharmony_ci clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state); 3698c2ecf20Sopenharmony_ci /* 3708c2ecf20Sopenharmony_ci * Don't worry the bit is set here, because if the bit is set 3718c2ecf20Sopenharmony_ci * again, the count is always > 1. This is true for 3728c2ecf20Sopenharmony_ci * STRIPE_ON_UNPLUG_LIST bit too. 3738c2ecf20Sopenharmony_ci */ 3748c2ecf20Sopenharmony_ci hash = sh->hash_lock_index; 3758c2ecf20Sopenharmony_ci __release_stripe(conf, sh, &temp_inactive_list[hash]); 3768c2ecf20Sopenharmony_ci count++; 3778c2ecf20Sopenharmony_ci } 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci return count; 3808c2ecf20Sopenharmony_ci} 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_civoid raid5_release_stripe(struct stripe_head *sh) 3838c2ecf20Sopenharmony_ci{ 3848c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 3858c2ecf20Sopenharmony_ci unsigned long flags; 3868c2ecf20Sopenharmony_ci struct list_head list; 3878c2ecf20Sopenharmony_ci int hash; 3888c2ecf20Sopenharmony_ci bool wakeup; 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci /* Avoid release_list until the last reference. 3918c2ecf20Sopenharmony_ci */ 3928c2ecf20Sopenharmony_ci if (atomic_add_unless(&sh->count, -1, 1)) 3938c2ecf20Sopenharmony_ci return; 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ci if (unlikely(!conf->mddev->thread) || 3968c2ecf20Sopenharmony_ci test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) 3978c2ecf20Sopenharmony_ci goto slow_path; 3988c2ecf20Sopenharmony_ci wakeup = llist_add(&sh->release_list, &conf->released_stripes); 3998c2ecf20Sopenharmony_ci if (wakeup) 4008c2ecf20Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 4018c2ecf20Sopenharmony_ci return; 4028c2ecf20Sopenharmony_cislow_path: 4038c2ecf20Sopenharmony_ci /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */ 4048c2ecf20Sopenharmony_ci if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) { 4058c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&list); 4068c2ecf20Sopenharmony_ci hash = sh->hash_lock_index; 4078c2ecf20Sopenharmony_ci do_release_stripe(conf, sh, &list); 4088c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 4098c2ecf20Sopenharmony_ci release_inactive_stripe_list(conf, &list, hash); 4108c2ecf20Sopenharmony_ci } 4118c2ecf20Sopenharmony_ci} 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_cistatic inline void remove_hash(struct stripe_head *sh) 4148c2ecf20Sopenharmony_ci{ 4158c2ecf20Sopenharmony_ci pr_debug("remove_hash(), stripe %llu\n", 4168c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci hlist_del_init(&sh->hash); 4198c2ecf20Sopenharmony_ci} 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_cistatic inline void insert_hash(struct r5conf *conf, struct stripe_head *sh) 4228c2ecf20Sopenharmony_ci{ 4238c2ecf20Sopenharmony_ci struct hlist_head *hp = stripe_hash(conf, sh->sector); 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci pr_debug("insert_hash(), stripe %llu\n", 4268c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci hlist_add_head(&sh->hash, hp); 4298c2ecf20Sopenharmony_ci} 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci/* find an idle stripe, make sure it is unhashed, and return it. */ 4328c2ecf20Sopenharmony_cistatic struct stripe_head *get_free_stripe(struct r5conf *conf, int hash) 4338c2ecf20Sopenharmony_ci{ 4348c2ecf20Sopenharmony_ci struct stripe_head *sh = NULL; 4358c2ecf20Sopenharmony_ci struct list_head *first; 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci if (list_empty(conf->inactive_list + hash)) 4388c2ecf20Sopenharmony_ci goto out; 4398c2ecf20Sopenharmony_ci first = (conf->inactive_list + hash)->next; 4408c2ecf20Sopenharmony_ci sh = list_entry(first, struct stripe_head, lru); 4418c2ecf20Sopenharmony_ci list_del_init(first); 4428c2ecf20Sopenharmony_ci remove_hash(sh); 4438c2ecf20Sopenharmony_ci atomic_inc(&conf->active_stripes); 4448c2ecf20Sopenharmony_ci BUG_ON(hash != sh->hash_lock_index); 4458c2ecf20Sopenharmony_ci if (list_empty(conf->inactive_list + hash)) 4468c2ecf20Sopenharmony_ci atomic_inc(&conf->empty_inactive_list_nr); 4478c2ecf20Sopenharmony_ciout: 4488c2ecf20Sopenharmony_ci return sh; 4498c2ecf20Sopenharmony_ci} 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE 4528c2ecf20Sopenharmony_cistatic void free_stripe_pages(struct stripe_head *sh) 4538c2ecf20Sopenharmony_ci{ 4548c2ecf20Sopenharmony_ci int i; 4558c2ecf20Sopenharmony_ci struct page *p; 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci /* Have not allocate page pool */ 4588c2ecf20Sopenharmony_ci if (!sh->pages) 4598c2ecf20Sopenharmony_ci return; 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci for (i = 0; i < sh->nr_pages; i++) { 4628c2ecf20Sopenharmony_ci p = sh->pages[i]; 4638c2ecf20Sopenharmony_ci if (p) 4648c2ecf20Sopenharmony_ci put_page(p); 4658c2ecf20Sopenharmony_ci sh->pages[i] = NULL; 4668c2ecf20Sopenharmony_ci } 4678c2ecf20Sopenharmony_ci} 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_cistatic int alloc_stripe_pages(struct stripe_head *sh, gfp_t gfp) 4708c2ecf20Sopenharmony_ci{ 4718c2ecf20Sopenharmony_ci int i; 4728c2ecf20Sopenharmony_ci struct page *p; 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci for (i = 0; i < sh->nr_pages; i++) { 4758c2ecf20Sopenharmony_ci /* The page have allocated. */ 4768c2ecf20Sopenharmony_ci if (sh->pages[i]) 4778c2ecf20Sopenharmony_ci continue; 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci p = alloc_page(gfp); 4808c2ecf20Sopenharmony_ci if (!p) { 4818c2ecf20Sopenharmony_ci free_stripe_pages(sh); 4828c2ecf20Sopenharmony_ci return -ENOMEM; 4838c2ecf20Sopenharmony_ci } 4848c2ecf20Sopenharmony_ci sh->pages[i] = p; 4858c2ecf20Sopenharmony_ci } 4868c2ecf20Sopenharmony_ci return 0; 4878c2ecf20Sopenharmony_ci} 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_cistatic int 4908c2ecf20Sopenharmony_ciinit_stripe_shared_pages(struct stripe_head *sh, struct r5conf *conf, int disks) 4918c2ecf20Sopenharmony_ci{ 4928c2ecf20Sopenharmony_ci int nr_pages, cnt; 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci if (sh->pages) 4958c2ecf20Sopenharmony_ci return 0; 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci /* Each of the sh->dev[i] need one conf->stripe_size */ 4988c2ecf20Sopenharmony_ci cnt = PAGE_SIZE / conf->stripe_size; 4998c2ecf20Sopenharmony_ci nr_pages = (disks + cnt - 1) / cnt; 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_ci sh->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 5028c2ecf20Sopenharmony_ci if (!sh->pages) 5038c2ecf20Sopenharmony_ci return -ENOMEM; 5048c2ecf20Sopenharmony_ci sh->nr_pages = nr_pages; 5058c2ecf20Sopenharmony_ci sh->stripes_per_page = cnt; 5068c2ecf20Sopenharmony_ci return 0; 5078c2ecf20Sopenharmony_ci} 5088c2ecf20Sopenharmony_ci#endif 5098c2ecf20Sopenharmony_ci 5108c2ecf20Sopenharmony_cistatic void shrink_buffers(struct stripe_head *sh) 5118c2ecf20Sopenharmony_ci{ 5128c2ecf20Sopenharmony_ci int i; 5138c2ecf20Sopenharmony_ci int num = sh->raid_conf->pool_size; 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci#if PAGE_SIZE == DEFAULT_STRIPE_SIZE 5168c2ecf20Sopenharmony_ci for (i = 0; i < num ; i++) { 5178c2ecf20Sopenharmony_ci struct page *p; 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci WARN_ON(sh->dev[i].page != sh->dev[i].orig_page); 5208c2ecf20Sopenharmony_ci p = sh->dev[i].page; 5218c2ecf20Sopenharmony_ci if (!p) 5228c2ecf20Sopenharmony_ci continue; 5238c2ecf20Sopenharmony_ci sh->dev[i].page = NULL; 5248c2ecf20Sopenharmony_ci put_page(p); 5258c2ecf20Sopenharmony_ci } 5268c2ecf20Sopenharmony_ci#else 5278c2ecf20Sopenharmony_ci for (i = 0; i < num; i++) 5288c2ecf20Sopenharmony_ci sh->dev[i].page = NULL; 5298c2ecf20Sopenharmony_ci free_stripe_pages(sh); /* Free pages */ 5308c2ecf20Sopenharmony_ci#endif 5318c2ecf20Sopenharmony_ci} 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_cistatic int grow_buffers(struct stripe_head *sh, gfp_t gfp) 5348c2ecf20Sopenharmony_ci{ 5358c2ecf20Sopenharmony_ci int i; 5368c2ecf20Sopenharmony_ci int num = sh->raid_conf->pool_size; 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci#if PAGE_SIZE == DEFAULT_STRIPE_SIZE 5398c2ecf20Sopenharmony_ci for (i = 0; i < num; i++) { 5408c2ecf20Sopenharmony_ci struct page *page; 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci if (!(page = alloc_page(gfp))) { 5438c2ecf20Sopenharmony_ci return 1; 5448c2ecf20Sopenharmony_ci } 5458c2ecf20Sopenharmony_ci sh->dev[i].page = page; 5468c2ecf20Sopenharmony_ci sh->dev[i].orig_page = page; 5478c2ecf20Sopenharmony_ci sh->dev[i].offset = 0; 5488c2ecf20Sopenharmony_ci } 5498c2ecf20Sopenharmony_ci#else 5508c2ecf20Sopenharmony_ci if (alloc_stripe_pages(sh, gfp)) 5518c2ecf20Sopenharmony_ci return -ENOMEM; 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci for (i = 0; i < num; i++) { 5548c2ecf20Sopenharmony_ci sh->dev[i].page = raid5_get_dev_page(sh, i); 5558c2ecf20Sopenharmony_ci sh->dev[i].orig_page = sh->dev[i].page; 5568c2ecf20Sopenharmony_ci sh->dev[i].offset = raid5_get_page_offset(sh, i); 5578c2ecf20Sopenharmony_ci } 5588c2ecf20Sopenharmony_ci#endif 5598c2ecf20Sopenharmony_ci return 0; 5608c2ecf20Sopenharmony_ci} 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_cistatic void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous, 5638c2ecf20Sopenharmony_ci struct stripe_head *sh); 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_cistatic void init_stripe(struct stripe_head *sh, sector_t sector, int previous) 5668c2ecf20Sopenharmony_ci{ 5678c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 5688c2ecf20Sopenharmony_ci int i, seq; 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci BUG_ON(atomic_read(&sh->count) != 0); 5718c2ecf20Sopenharmony_ci BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); 5728c2ecf20Sopenharmony_ci BUG_ON(stripe_operations_active(sh)); 5738c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ci pr_debug("init_stripe called, stripe %llu\n", 5768c2ecf20Sopenharmony_ci (unsigned long long)sector); 5778c2ecf20Sopenharmony_ciretry: 5788c2ecf20Sopenharmony_ci seq = read_seqcount_begin(&conf->gen_lock); 5798c2ecf20Sopenharmony_ci sh->generation = conf->generation - previous; 5808c2ecf20Sopenharmony_ci sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks; 5818c2ecf20Sopenharmony_ci sh->sector = sector; 5828c2ecf20Sopenharmony_ci stripe_set_idx(sector, conf, previous, sh); 5838c2ecf20Sopenharmony_ci sh->state = 0; 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci for (i = sh->disks; i--; ) { 5868c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci if (dev->toread || dev->read || dev->towrite || dev->written || 5898c2ecf20Sopenharmony_ci test_bit(R5_LOCKED, &dev->flags)) { 5908c2ecf20Sopenharmony_ci pr_err("sector=%llx i=%d %p %p %p %p %d\n", 5918c2ecf20Sopenharmony_ci (unsigned long long)sh->sector, i, dev->toread, 5928c2ecf20Sopenharmony_ci dev->read, dev->towrite, dev->written, 5938c2ecf20Sopenharmony_ci test_bit(R5_LOCKED, &dev->flags)); 5948c2ecf20Sopenharmony_ci WARN_ON(1); 5958c2ecf20Sopenharmony_ci } 5968c2ecf20Sopenharmony_ci dev->flags = 0; 5978c2ecf20Sopenharmony_ci dev->sector = raid5_compute_blocknr(sh, i, previous); 5988c2ecf20Sopenharmony_ci } 5998c2ecf20Sopenharmony_ci if (read_seqcount_retry(&conf->gen_lock, seq)) 6008c2ecf20Sopenharmony_ci goto retry; 6018c2ecf20Sopenharmony_ci sh->overwrite_disks = 0; 6028c2ecf20Sopenharmony_ci insert_hash(conf, sh); 6038c2ecf20Sopenharmony_ci sh->cpu = smp_processor_id(); 6048c2ecf20Sopenharmony_ci set_bit(STRIPE_BATCH_READY, &sh->state); 6058c2ecf20Sopenharmony_ci} 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_cistatic struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, 6088c2ecf20Sopenharmony_ci short generation) 6098c2ecf20Sopenharmony_ci{ 6108c2ecf20Sopenharmony_ci struct stripe_head *sh; 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector); 6138c2ecf20Sopenharmony_ci hlist_for_each_entry(sh, stripe_hash(conf, sector), hash) 6148c2ecf20Sopenharmony_ci if (sh->sector == sector && sh->generation == generation) 6158c2ecf20Sopenharmony_ci return sh; 6168c2ecf20Sopenharmony_ci pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector); 6178c2ecf20Sopenharmony_ci return NULL; 6188c2ecf20Sopenharmony_ci} 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci/* 6218c2ecf20Sopenharmony_ci * Need to check if array has failed when deciding whether to: 6228c2ecf20Sopenharmony_ci * - start an array 6238c2ecf20Sopenharmony_ci * - remove non-faulty devices 6248c2ecf20Sopenharmony_ci * - add a spare 6258c2ecf20Sopenharmony_ci * - allow a reshape 6268c2ecf20Sopenharmony_ci * This determination is simple when no reshape is happening. 6278c2ecf20Sopenharmony_ci * However if there is a reshape, we need to carefully check 6288c2ecf20Sopenharmony_ci * both the before and after sections. 6298c2ecf20Sopenharmony_ci * This is because some failed devices may only affect one 6308c2ecf20Sopenharmony_ci * of the two sections, and some non-in_sync devices may 6318c2ecf20Sopenharmony_ci * be insync in the section most affected by failed devices. 6328c2ecf20Sopenharmony_ci */ 6338c2ecf20Sopenharmony_ciint raid5_calc_degraded(struct r5conf *conf) 6348c2ecf20Sopenharmony_ci{ 6358c2ecf20Sopenharmony_ci int degraded, degraded2; 6368c2ecf20Sopenharmony_ci int i; 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci rcu_read_lock(); 6398c2ecf20Sopenharmony_ci degraded = 0; 6408c2ecf20Sopenharmony_ci for (i = 0; i < conf->previous_raid_disks; i++) { 6418c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); 6428c2ecf20Sopenharmony_ci if (rdev && test_bit(Faulty, &rdev->flags)) 6438c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->disks[i].replacement); 6448c2ecf20Sopenharmony_ci if (!rdev || test_bit(Faulty, &rdev->flags)) 6458c2ecf20Sopenharmony_ci degraded++; 6468c2ecf20Sopenharmony_ci else if (test_bit(In_sync, &rdev->flags)) 6478c2ecf20Sopenharmony_ci ; 6488c2ecf20Sopenharmony_ci else 6498c2ecf20Sopenharmony_ci /* not in-sync or faulty. 6508c2ecf20Sopenharmony_ci * If the reshape increases the number of devices, 6518c2ecf20Sopenharmony_ci * this is being recovered by the reshape, so 6528c2ecf20Sopenharmony_ci * this 'previous' section is not in_sync. 6538c2ecf20Sopenharmony_ci * If the number of devices is being reduced however, 6548c2ecf20Sopenharmony_ci * the device can only be part of the array if 6558c2ecf20Sopenharmony_ci * we are reverting a reshape, so this section will 6568c2ecf20Sopenharmony_ci * be in-sync. 6578c2ecf20Sopenharmony_ci */ 6588c2ecf20Sopenharmony_ci if (conf->raid_disks >= conf->previous_raid_disks) 6598c2ecf20Sopenharmony_ci degraded++; 6608c2ecf20Sopenharmony_ci } 6618c2ecf20Sopenharmony_ci rcu_read_unlock(); 6628c2ecf20Sopenharmony_ci if (conf->raid_disks == conf->previous_raid_disks) 6638c2ecf20Sopenharmony_ci return degraded; 6648c2ecf20Sopenharmony_ci rcu_read_lock(); 6658c2ecf20Sopenharmony_ci degraded2 = 0; 6668c2ecf20Sopenharmony_ci for (i = 0; i < conf->raid_disks; i++) { 6678c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); 6688c2ecf20Sopenharmony_ci if (rdev && test_bit(Faulty, &rdev->flags)) 6698c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->disks[i].replacement); 6708c2ecf20Sopenharmony_ci if (!rdev || test_bit(Faulty, &rdev->flags)) 6718c2ecf20Sopenharmony_ci degraded2++; 6728c2ecf20Sopenharmony_ci else if (test_bit(In_sync, &rdev->flags)) 6738c2ecf20Sopenharmony_ci ; 6748c2ecf20Sopenharmony_ci else 6758c2ecf20Sopenharmony_ci /* not in-sync or faulty. 6768c2ecf20Sopenharmony_ci * If reshape increases the number of devices, this 6778c2ecf20Sopenharmony_ci * section has already been recovered, else it 6788c2ecf20Sopenharmony_ci * almost certainly hasn't. 6798c2ecf20Sopenharmony_ci */ 6808c2ecf20Sopenharmony_ci if (conf->raid_disks <= conf->previous_raid_disks) 6818c2ecf20Sopenharmony_ci degraded2++; 6828c2ecf20Sopenharmony_ci } 6838c2ecf20Sopenharmony_ci rcu_read_unlock(); 6848c2ecf20Sopenharmony_ci if (degraded2 > degraded) 6858c2ecf20Sopenharmony_ci return degraded2; 6868c2ecf20Sopenharmony_ci return degraded; 6878c2ecf20Sopenharmony_ci} 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_cistatic bool has_failed(struct r5conf *conf) 6908c2ecf20Sopenharmony_ci{ 6918c2ecf20Sopenharmony_ci int degraded = conf->mddev->degraded; 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ci if (test_bit(MD_BROKEN, &conf->mddev->flags)) 6948c2ecf20Sopenharmony_ci return true; 6958c2ecf20Sopenharmony_ci 6968c2ecf20Sopenharmony_ci if (conf->mddev->reshape_position != MaxSector) 6978c2ecf20Sopenharmony_ci degraded = raid5_calc_degraded(conf); 6988c2ecf20Sopenharmony_ci 6998c2ecf20Sopenharmony_ci return degraded > conf->max_degraded; 7008c2ecf20Sopenharmony_ci} 7018c2ecf20Sopenharmony_ci 7028c2ecf20Sopenharmony_cistruct stripe_head * 7038c2ecf20Sopenharmony_ciraid5_get_active_stripe(struct r5conf *conf, sector_t sector, 7048c2ecf20Sopenharmony_ci int previous, int noblock, int noquiesce) 7058c2ecf20Sopenharmony_ci{ 7068c2ecf20Sopenharmony_ci struct stripe_head *sh; 7078c2ecf20Sopenharmony_ci int hash = stripe_hash_locks_hash(conf, sector); 7088c2ecf20Sopenharmony_ci int inc_empty_inactive_list_flag; 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_ci pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ci spin_lock_irq(conf->hash_locks + hash); 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_ci do { 7158c2ecf20Sopenharmony_ci wait_event_lock_irq(conf->wait_for_quiescent, 7168c2ecf20Sopenharmony_ci conf->quiesce == 0 || noquiesce, 7178c2ecf20Sopenharmony_ci *(conf->hash_locks + hash)); 7188c2ecf20Sopenharmony_ci sh = __find_stripe(conf, sector, conf->generation - previous); 7198c2ecf20Sopenharmony_ci if (!sh) { 7208c2ecf20Sopenharmony_ci if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) { 7218c2ecf20Sopenharmony_ci sh = get_free_stripe(conf, hash); 7228c2ecf20Sopenharmony_ci if (!sh && !test_bit(R5_DID_ALLOC, 7238c2ecf20Sopenharmony_ci &conf->cache_state)) 7248c2ecf20Sopenharmony_ci set_bit(R5_ALLOC_MORE, 7258c2ecf20Sopenharmony_ci &conf->cache_state); 7268c2ecf20Sopenharmony_ci } 7278c2ecf20Sopenharmony_ci if (noblock && sh == NULL) 7288c2ecf20Sopenharmony_ci break; 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci r5c_check_stripe_cache_usage(conf); 7318c2ecf20Sopenharmony_ci if (!sh) { 7328c2ecf20Sopenharmony_ci set_bit(R5_INACTIVE_BLOCKED, 7338c2ecf20Sopenharmony_ci &conf->cache_state); 7348c2ecf20Sopenharmony_ci r5l_wake_reclaim(conf->log, 0); 7358c2ecf20Sopenharmony_ci wait_event_lock_irq( 7368c2ecf20Sopenharmony_ci conf->wait_for_stripe, 7378c2ecf20Sopenharmony_ci !list_empty(conf->inactive_list + hash) && 7388c2ecf20Sopenharmony_ci (atomic_read(&conf->active_stripes) 7398c2ecf20Sopenharmony_ci < (conf->max_nr_stripes * 3 / 4) 7408c2ecf20Sopenharmony_ci || !test_bit(R5_INACTIVE_BLOCKED, 7418c2ecf20Sopenharmony_ci &conf->cache_state)), 7428c2ecf20Sopenharmony_ci *(conf->hash_locks + hash)); 7438c2ecf20Sopenharmony_ci clear_bit(R5_INACTIVE_BLOCKED, 7448c2ecf20Sopenharmony_ci &conf->cache_state); 7458c2ecf20Sopenharmony_ci } else { 7468c2ecf20Sopenharmony_ci init_stripe(sh, sector, previous); 7478c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 7488c2ecf20Sopenharmony_ci } 7498c2ecf20Sopenharmony_ci } else if (!atomic_inc_not_zero(&sh->count)) { 7508c2ecf20Sopenharmony_ci spin_lock(&conf->device_lock); 7518c2ecf20Sopenharmony_ci if (!atomic_read(&sh->count)) { 7528c2ecf20Sopenharmony_ci if (!test_bit(STRIPE_HANDLE, &sh->state)) 7538c2ecf20Sopenharmony_ci atomic_inc(&conf->active_stripes); 7548c2ecf20Sopenharmony_ci BUG_ON(list_empty(&sh->lru) && 7558c2ecf20Sopenharmony_ci !test_bit(STRIPE_EXPANDING, &sh->state)); 7568c2ecf20Sopenharmony_ci inc_empty_inactive_list_flag = 0; 7578c2ecf20Sopenharmony_ci if (!list_empty(conf->inactive_list + hash)) 7588c2ecf20Sopenharmony_ci inc_empty_inactive_list_flag = 1; 7598c2ecf20Sopenharmony_ci list_del_init(&sh->lru); 7608c2ecf20Sopenharmony_ci if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) 7618c2ecf20Sopenharmony_ci atomic_inc(&conf->empty_inactive_list_nr); 7628c2ecf20Sopenharmony_ci if (sh->group) { 7638c2ecf20Sopenharmony_ci sh->group->stripes_cnt--; 7648c2ecf20Sopenharmony_ci sh->group = NULL; 7658c2ecf20Sopenharmony_ci } 7668c2ecf20Sopenharmony_ci } 7678c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 7688c2ecf20Sopenharmony_ci spin_unlock(&conf->device_lock); 7698c2ecf20Sopenharmony_ci } 7708c2ecf20Sopenharmony_ci } while (sh == NULL); 7718c2ecf20Sopenharmony_ci 7728c2ecf20Sopenharmony_ci spin_unlock_irq(conf->hash_locks + hash); 7738c2ecf20Sopenharmony_ci return sh; 7748c2ecf20Sopenharmony_ci} 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_cistatic bool is_full_stripe_write(struct stripe_head *sh) 7778c2ecf20Sopenharmony_ci{ 7788c2ecf20Sopenharmony_ci BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded)); 7798c2ecf20Sopenharmony_ci return sh->overwrite_disks == (sh->disks - sh->raid_conf->max_degraded); 7808c2ecf20Sopenharmony_ci} 7818c2ecf20Sopenharmony_ci 7828c2ecf20Sopenharmony_cistatic void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) 7838c2ecf20Sopenharmony_ci __acquires(&sh1->stripe_lock) 7848c2ecf20Sopenharmony_ci __acquires(&sh2->stripe_lock) 7858c2ecf20Sopenharmony_ci{ 7868c2ecf20Sopenharmony_ci if (sh1 > sh2) { 7878c2ecf20Sopenharmony_ci spin_lock_irq(&sh2->stripe_lock); 7888c2ecf20Sopenharmony_ci spin_lock_nested(&sh1->stripe_lock, 1); 7898c2ecf20Sopenharmony_ci } else { 7908c2ecf20Sopenharmony_ci spin_lock_irq(&sh1->stripe_lock); 7918c2ecf20Sopenharmony_ci spin_lock_nested(&sh2->stripe_lock, 1); 7928c2ecf20Sopenharmony_ci } 7938c2ecf20Sopenharmony_ci} 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_cistatic void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) 7968c2ecf20Sopenharmony_ci __releases(&sh1->stripe_lock) 7978c2ecf20Sopenharmony_ci __releases(&sh2->stripe_lock) 7988c2ecf20Sopenharmony_ci{ 7998c2ecf20Sopenharmony_ci spin_unlock(&sh1->stripe_lock); 8008c2ecf20Sopenharmony_ci spin_unlock_irq(&sh2->stripe_lock); 8018c2ecf20Sopenharmony_ci} 8028c2ecf20Sopenharmony_ci 8038c2ecf20Sopenharmony_ci/* Only freshly new full stripe normal write stripe can be added to a batch list */ 8048c2ecf20Sopenharmony_cistatic bool stripe_can_batch(struct stripe_head *sh) 8058c2ecf20Sopenharmony_ci{ 8068c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 8078c2ecf20Sopenharmony_ci 8088c2ecf20Sopenharmony_ci if (raid5_has_log(conf) || raid5_has_ppl(conf)) 8098c2ecf20Sopenharmony_ci return false; 8108c2ecf20Sopenharmony_ci return test_bit(STRIPE_BATCH_READY, &sh->state) && 8118c2ecf20Sopenharmony_ci !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && 8128c2ecf20Sopenharmony_ci is_full_stripe_write(sh); 8138c2ecf20Sopenharmony_ci} 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci/* we only do back search */ 8168c2ecf20Sopenharmony_cistatic void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh) 8178c2ecf20Sopenharmony_ci{ 8188c2ecf20Sopenharmony_ci struct stripe_head *head; 8198c2ecf20Sopenharmony_ci sector_t head_sector, tmp_sec; 8208c2ecf20Sopenharmony_ci int hash; 8218c2ecf20Sopenharmony_ci int dd_idx; 8228c2ecf20Sopenharmony_ci int inc_empty_inactive_list_flag; 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */ 8258c2ecf20Sopenharmony_ci tmp_sec = sh->sector; 8268c2ecf20Sopenharmony_ci if (!sector_div(tmp_sec, conf->chunk_sectors)) 8278c2ecf20Sopenharmony_ci return; 8288c2ecf20Sopenharmony_ci head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf); 8298c2ecf20Sopenharmony_ci 8308c2ecf20Sopenharmony_ci hash = stripe_hash_locks_hash(conf, head_sector); 8318c2ecf20Sopenharmony_ci spin_lock_irq(conf->hash_locks + hash); 8328c2ecf20Sopenharmony_ci head = __find_stripe(conf, head_sector, conf->generation); 8338c2ecf20Sopenharmony_ci if (head && !atomic_inc_not_zero(&head->count)) { 8348c2ecf20Sopenharmony_ci spin_lock(&conf->device_lock); 8358c2ecf20Sopenharmony_ci if (!atomic_read(&head->count)) { 8368c2ecf20Sopenharmony_ci if (!test_bit(STRIPE_HANDLE, &head->state)) 8378c2ecf20Sopenharmony_ci atomic_inc(&conf->active_stripes); 8388c2ecf20Sopenharmony_ci BUG_ON(list_empty(&head->lru) && 8398c2ecf20Sopenharmony_ci !test_bit(STRIPE_EXPANDING, &head->state)); 8408c2ecf20Sopenharmony_ci inc_empty_inactive_list_flag = 0; 8418c2ecf20Sopenharmony_ci if (!list_empty(conf->inactive_list + hash)) 8428c2ecf20Sopenharmony_ci inc_empty_inactive_list_flag = 1; 8438c2ecf20Sopenharmony_ci list_del_init(&head->lru); 8448c2ecf20Sopenharmony_ci if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) 8458c2ecf20Sopenharmony_ci atomic_inc(&conf->empty_inactive_list_nr); 8468c2ecf20Sopenharmony_ci if (head->group) { 8478c2ecf20Sopenharmony_ci head->group->stripes_cnt--; 8488c2ecf20Sopenharmony_ci head->group = NULL; 8498c2ecf20Sopenharmony_ci } 8508c2ecf20Sopenharmony_ci } 8518c2ecf20Sopenharmony_ci atomic_inc(&head->count); 8528c2ecf20Sopenharmony_ci spin_unlock(&conf->device_lock); 8538c2ecf20Sopenharmony_ci } 8548c2ecf20Sopenharmony_ci spin_unlock_irq(conf->hash_locks + hash); 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci if (!head) 8578c2ecf20Sopenharmony_ci return; 8588c2ecf20Sopenharmony_ci if (!stripe_can_batch(head)) 8598c2ecf20Sopenharmony_ci goto out; 8608c2ecf20Sopenharmony_ci 8618c2ecf20Sopenharmony_ci lock_two_stripes(head, sh); 8628c2ecf20Sopenharmony_ci /* clear_batch_ready clear the flag */ 8638c2ecf20Sopenharmony_ci if (!stripe_can_batch(head) || !stripe_can_batch(sh)) 8648c2ecf20Sopenharmony_ci goto unlock_out; 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci if (sh->batch_head) 8678c2ecf20Sopenharmony_ci goto unlock_out; 8688c2ecf20Sopenharmony_ci 8698c2ecf20Sopenharmony_ci dd_idx = 0; 8708c2ecf20Sopenharmony_ci while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) 8718c2ecf20Sopenharmony_ci dd_idx++; 8728c2ecf20Sopenharmony_ci if (head->dev[dd_idx].towrite->bi_opf != sh->dev[dd_idx].towrite->bi_opf || 8738c2ecf20Sopenharmony_ci bio_op(head->dev[dd_idx].towrite) != bio_op(sh->dev[dd_idx].towrite)) 8748c2ecf20Sopenharmony_ci goto unlock_out; 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci if (head->batch_head) { 8778c2ecf20Sopenharmony_ci spin_lock(&head->batch_head->batch_lock); 8788c2ecf20Sopenharmony_ci /* This batch list is already running */ 8798c2ecf20Sopenharmony_ci if (!stripe_can_batch(head)) { 8808c2ecf20Sopenharmony_ci spin_unlock(&head->batch_head->batch_lock); 8818c2ecf20Sopenharmony_ci goto unlock_out; 8828c2ecf20Sopenharmony_ci } 8838c2ecf20Sopenharmony_ci /* 8848c2ecf20Sopenharmony_ci * We must assign batch_head of this stripe within the 8858c2ecf20Sopenharmony_ci * batch_lock, otherwise clear_batch_ready of batch head 8868c2ecf20Sopenharmony_ci * stripe could clear BATCH_READY bit of this stripe and 8878c2ecf20Sopenharmony_ci * this stripe->batch_head doesn't get assigned, which 8888c2ecf20Sopenharmony_ci * could confuse clear_batch_ready for this stripe 8898c2ecf20Sopenharmony_ci */ 8908c2ecf20Sopenharmony_ci sh->batch_head = head->batch_head; 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_ci /* 8938c2ecf20Sopenharmony_ci * at this point, head's BATCH_READY could be cleared, but we 8948c2ecf20Sopenharmony_ci * can still add the stripe to batch list 8958c2ecf20Sopenharmony_ci */ 8968c2ecf20Sopenharmony_ci list_add(&sh->batch_list, &head->batch_list); 8978c2ecf20Sopenharmony_ci spin_unlock(&head->batch_head->batch_lock); 8988c2ecf20Sopenharmony_ci } else { 8998c2ecf20Sopenharmony_ci head->batch_head = head; 9008c2ecf20Sopenharmony_ci sh->batch_head = head->batch_head; 9018c2ecf20Sopenharmony_ci spin_lock(&head->batch_lock); 9028c2ecf20Sopenharmony_ci list_add_tail(&sh->batch_list, &head->batch_list); 9038c2ecf20Sopenharmony_ci spin_unlock(&head->batch_lock); 9048c2ecf20Sopenharmony_ci } 9058c2ecf20Sopenharmony_ci 9068c2ecf20Sopenharmony_ci if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 9078c2ecf20Sopenharmony_ci if (atomic_dec_return(&conf->preread_active_stripes) 9088c2ecf20Sopenharmony_ci < IO_THRESHOLD) 9098c2ecf20Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 9108c2ecf20Sopenharmony_ci 9118c2ecf20Sopenharmony_ci if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) { 9128c2ecf20Sopenharmony_ci int seq = sh->bm_seq; 9138c2ecf20Sopenharmony_ci if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) && 9148c2ecf20Sopenharmony_ci sh->batch_head->bm_seq > seq) 9158c2ecf20Sopenharmony_ci seq = sh->batch_head->bm_seq; 9168c2ecf20Sopenharmony_ci set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state); 9178c2ecf20Sopenharmony_ci sh->batch_head->bm_seq = seq; 9188c2ecf20Sopenharmony_ci } 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 9218c2ecf20Sopenharmony_ciunlock_out: 9228c2ecf20Sopenharmony_ci unlock_two_stripes(head, sh); 9238c2ecf20Sopenharmony_ciout: 9248c2ecf20Sopenharmony_ci raid5_release_stripe(head); 9258c2ecf20Sopenharmony_ci} 9268c2ecf20Sopenharmony_ci 9278c2ecf20Sopenharmony_ci/* Determine if 'data_offset' or 'new_data_offset' should be used 9288c2ecf20Sopenharmony_ci * in this stripe_head. 9298c2ecf20Sopenharmony_ci */ 9308c2ecf20Sopenharmony_cistatic int use_new_offset(struct r5conf *conf, struct stripe_head *sh) 9318c2ecf20Sopenharmony_ci{ 9328c2ecf20Sopenharmony_ci sector_t progress = conf->reshape_progress; 9338c2ecf20Sopenharmony_ci /* Need a memory barrier to make sure we see the value 9348c2ecf20Sopenharmony_ci * of conf->generation, or ->data_offset that was set before 9358c2ecf20Sopenharmony_ci * reshape_progress was updated. 9368c2ecf20Sopenharmony_ci */ 9378c2ecf20Sopenharmony_ci smp_rmb(); 9388c2ecf20Sopenharmony_ci if (progress == MaxSector) 9398c2ecf20Sopenharmony_ci return 0; 9408c2ecf20Sopenharmony_ci if (sh->generation == conf->generation - 1) 9418c2ecf20Sopenharmony_ci return 0; 9428c2ecf20Sopenharmony_ci /* We are in a reshape, and this is a new-generation stripe, 9438c2ecf20Sopenharmony_ci * so use new_data_offset. 9448c2ecf20Sopenharmony_ci */ 9458c2ecf20Sopenharmony_ci return 1; 9468c2ecf20Sopenharmony_ci} 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_cistatic void dispatch_bio_list(struct bio_list *tmp) 9498c2ecf20Sopenharmony_ci{ 9508c2ecf20Sopenharmony_ci struct bio *bio; 9518c2ecf20Sopenharmony_ci 9528c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(tmp))) 9538c2ecf20Sopenharmony_ci submit_bio_noacct(bio); 9548c2ecf20Sopenharmony_ci} 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_cistatic int cmp_stripe(void *priv, const struct list_head *a, 9578c2ecf20Sopenharmony_ci const struct list_head *b) 9588c2ecf20Sopenharmony_ci{ 9598c2ecf20Sopenharmony_ci const struct r5pending_data *da = list_entry(a, 9608c2ecf20Sopenharmony_ci struct r5pending_data, sibling); 9618c2ecf20Sopenharmony_ci const struct r5pending_data *db = list_entry(b, 9628c2ecf20Sopenharmony_ci struct r5pending_data, sibling); 9638c2ecf20Sopenharmony_ci if (da->sector > db->sector) 9648c2ecf20Sopenharmony_ci return 1; 9658c2ecf20Sopenharmony_ci if (da->sector < db->sector) 9668c2ecf20Sopenharmony_ci return -1; 9678c2ecf20Sopenharmony_ci return 0; 9688c2ecf20Sopenharmony_ci} 9698c2ecf20Sopenharmony_ci 9708c2ecf20Sopenharmony_cistatic void dispatch_defer_bios(struct r5conf *conf, int target, 9718c2ecf20Sopenharmony_ci struct bio_list *list) 9728c2ecf20Sopenharmony_ci{ 9738c2ecf20Sopenharmony_ci struct r5pending_data *data; 9748c2ecf20Sopenharmony_ci struct list_head *first, *next = NULL; 9758c2ecf20Sopenharmony_ci int cnt = 0; 9768c2ecf20Sopenharmony_ci 9778c2ecf20Sopenharmony_ci if (conf->pending_data_cnt == 0) 9788c2ecf20Sopenharmony_ci return; 9798c2ecf20Sopenharmony_ci 9808c2ecf20Sopenharmony_ci list_sort(NULL, &conf->pending_list, cmp_stripe); 9818c2ecf20Sopenharmony_ci 9828c2ecf20Sopenharmony_ci first = conf->pending_list.next; 9838c2ecf20Sopenharmony_ci 9848c2ecf20Sopenharmony_ci /* temporarily move the head */ 9858c2ecf20Sopenharmony_ci if (conf->next_pending_data) 9868c2ecf20Sopenharmony_ci list_move_tail(&conf->pending_list, 9878c2ecf20Sopenharmony_ci &conf->next_pending_data->sibling); 9888c2ecf20Sopenharmony_ci 9898c2ecf20Sopenharmony_ci while (!list_empty(&conf->pending_list)) { 9908c2ecf20Sopenharmony_ci data = list_first_entry(&conf->pending_list, 9918c2ecf20Sopenharmony_ci struct r5pending_data, sibling); 9928c2ecf20Sopenharmony_ci if (&data->sibling == first) 9938c2ecf20Sopenharmony_ci first = data->sibling.next; 9948c2ecf20Sopenharmony_ci next = data->sibling.next; 9958c2ecf20Sopenharmony_ci 9968c2ecf20Sopenharmony_ci bio_list_merge(list, &data->bios); 9978c2ecf20Sopenharmony_ci list_move(&data->sibling, &conf->free_list); 9988c2ecf20Sopenharmony_ci cnt++; 9998c2ecf20Sopenharmony_ci if (cnt >= target) 10008c2ecf20Sopenharmony_ci break; 10018c2ecf20Sopenharmony_ci } 10028c2ecf20Sopenharmony_ci conf->pending_data_cnt -= cnt; 10038c2ecf20Sopenharmony_ci BUG_ON(conf->pending_data_cnt < 0 || cnt < target); 10048c2ecf20Sopenharmony_ci 10058c2ecf20Sopenharmony_ci if (next != &conf->pending_list) 10068c2ecf20Sopenharmony_ci conf->next_pending_data = list_entry(next, 10078c2ecf20Sopenharmony_ci struct r5pending_data, sibling); 10088c2ecf20Sopenharmony_ci else 10098c2ecf20Sopenharmony_ci conf->next_pending_data = NULL; 10108c2ecf20Sopenharmony_ci /* list isn't empty */ 10118c2ecf20Sopenharmony_ci if (first != &conf->pending_list) 10128c2ecf20Sopenharmony_ci list_move_tail(&conf->pending_list, first); 10138c2ecf20Sopenharmony_ci} 10148c2ecf20Sopenharmony_ci 10158c2ecf20Sopenharmony_cistatic void flush_deferred_bios(struct r5conf *conf) 10168c2ecf20Sopenharmony_ci{ 10178c2ecf20Sopenharmony_ci struct bio_list tmp = BIO_EMPTY_LIST; 10188c2ecf20Sopenharmony_ci 10198c2ecf20Sopenharmony_ci if (conf->pending_data_cnt == 0) 10208c2ecf20Sopenharmony_ci return; 10218c2ecf20Sopenharmony_ci 10228c2ecf20Sopenharmony_ci spin_lock(&conf->pending_bios_lock); 10238c2ecf20Sopenharmony_ci dispatch_defer_bios(conf, conf->pending_data_cnt, &tmp); 10248c2ecf20Sopenharmony_ci BUG_ON(conf->pending_data_cnt != 0); 10258c2ecf20Sopenharmony_ci spin_unlock(&conf->pending_bios_lock); 10268c2ecf20Sopenharmony_ci 10278c2ecf20Sopenharmony_ci dispatch_bio_list(&tmp); 10288c2ecf20Sopenharmony_ci} 10298c2ecf20Sopenharmony_ci 10308c2ecf20Sopenharmony_cistatic void defer_issue_bios(struct r5conf *conf, sector_t sector, 10318c2ecf20Sopenharmony_ci struct bio_list *bios) 10328c2ecf20Sopenharmony_ci{ 10338c2ecf20Sopenharmony_ci struct bio_list tmp = BIO_EMPTY_LIST; 10348c2ecf20Sopenharmony_ci struct r5pending_data *ent; 10358c2ecf20Sopenharmony_ci 10368c2ecf20Sopenharmony_ci spin_lock(&conf->pending_bios_lock); 10378c2ecf20Sopenharmony_ci ent = list_first_entry(&conf->free_list, struct r5pending_data, 10388c2ecf20Sopenharmony_ci sibling); 10398c2ecf20Sopenharmony_ci list_move_tail(&ent->sibling, &conf->pending_list); 10408c2ecf20Sopenharmony_ci ent->sector = sector; 10418c2ecf20Sopenharmony_ci bio_list_init(&ent->bios); 10428c2ecf20Sopenharmony_ci bio_list_merge(&ent->bios, bios); 10438c2ecf20Sopenharmony_ci conf->pending_data_cnt++; 10448c2ecf20Sopenharmony_ci if (conf->pending_data_cnt >= PENDING_IO_MAX) 10458c2ecf20Sopenharmony_ci dispatch_defer_bios(conf, PENDING_IO_ONE_FLUSH, &tmp); 10468c2ecf20Sopenharmony_ci 10478c2ecf20Sopenharmony_ci spin_unlock(&conf->pending_bios_lock); 10488c2ecf20Sopenharmony_ci 10498c2ecf20Sopenharmony_ci dispatch_bio_list(&tmp); 10508c2ecf20Sopenharmony_ci} 10518c2ecf20Sopenharmony_ci 10528c2ecf20Sopenharmony_cistatic void 10538c2ecf20Sopenharmony_ciraid5_end_read_request(struct bio *bi); 10548c2ecf20Sopenharmony_cistatic void 10558c2ecf20Sopenharmony_ciraid5_end_write_request(struct bio *bi); 10568c2ecf20Sopenharmony_ci 10578c2ecf20Sopenharmony_cistatic void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) 10588c2ecf20Sopenharmony_ci{ 10598c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 10608c2ecf20Sopenharmony_ci int i, disks = sh->disks; 10618c2ecf20Sopenharmony_ci struct stripe_head *head_sh = sh; 10628c2ecf20Sopenharmony_ci struct bio_list pending_bios = BIO_EMPTY_LIST; 10638c2ecf20Sopenharmony_ci bool should_defer; 10648c2ecf20Sopenharmony_ci 10658c2ecf20Sopenharmony_ci might_sleep(); 10668c2ecf20Sopenharmony_ci 10678c2ecf20Sopenharmony_ci if (log_stripe(sh, s) == 0) 10688c2ecf20Sopenharmony_ci return; 10698c2ecf20Sopenharmony_ci 10708c2ecf20Sopenharmony_ci should_defer = conf->batch_bio_dispatch && conf->group_cnt; 10718c2ecf20Sopenharmony_ci 10728c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 10738c2ecf20Sopenharmony_ci int op, op_flags = 0; 10748c2ecf20Sopenharmony_ci int replace_only = 0; 10758c2ecf20Sopenharmony_ci struct bio *bi, *rbi; 10768c2ecf20Sopenharmony_ci struct md_rdev *rdev, *rrdev = NULL; 10778c2ecf20Sopenharmony_ci 10788c2ecf20Sopenharmony_ci sh = head_sh; 10798c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { 10808c2ecf20Sopenharmony_ci op = REQ_OP_WRITE; 10818c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags)) 10828c2ecf20Sopenharmony_ci op_flags = REQ_FUA; 10838c2ecf20Sopenharmony_ci if (test_bit(R5_Discard, &sh->dev[i].flags)) 10848c2ecf20Sopenharmony_ci op = REQ_OP_DISCARD; 10858c2ecf20Sopenharmony_ci } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) 10868c2ecf20Sopenharmony_ci op = REQ_OP_READ; 10878c2ecf20Sopenharmony_ci else if (test_and_clear_bit(R5_WantReplace, 10888c2ecf20Sopenharmony_ci &sh->dev[i].flags)) { 10898c2ecf20Sopenharmony_ci op = REQ_OP_WRITE; 10908c2ecf20Sopenharmony_ci replace_only = 1; 10918c2ecf20Sopenharmony_ci } else 10928c2ecf20Sopenharmony_ci continue; 10938c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags)) 10948c2ecf20Sopenharmony_ci op_flags |= REQ_SYNC; 10958c2ecf20Sopenharmony_ci 10968c2ecf20Sopenharmony_ciagain: 10978c2ecf20Sopenharmony_ci bi = &sh->dev[i].req; 10988c2ecf20Sopenharmony_ci rbi = &sh->dev[i].rreq; /* For writing to replacement */ 10998c2ecf20Sopenharmony_ci 11008c2ecf20Sopenharmony_ci rcu_read_lock(); 11018c2ecf20Sopenharmony_ci rrdev = rcu_dereference(conf->disks[i].replacement); 11028c2ecf20Sopenharmony_ci smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */ 11038c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->disks[i].rdev); 11048c2ecf20Sopenharmony_ci if (!rdev) { 11058c2ecf20Sopenharmony_ci rdev = rrdev; 11068c2ecf20Sopenharmony_ci rrdev = NULL; 11078c2ecf20Sopenharmony_ci } 11088c2ecf20Sopenharmony_ci if (op_is_write(op)) { 11098c2ecf20Sopenharmony_ci if (replace_only) 11108c2ecf20Sopenharmony_ci rdev = NULL; 11118c2ecf20Sopenharmony_ci if (rdev == rrdev) 11128c2ecf20Sopenharmony_ci /* We raced and saw duplicates */ 11138c2ecf20Sopenharmony_ci rrdev = NULL; 11148c2ecf20Sopenharmony_ci } else { 11158c2ecf20Sopenharmony_ci if (test_bit(R5_ReadRepl, &head_sh->dev[i].flags) && rrdev) 11168c2ecf20Sopenharmony_ci rdev = rrdev; 11178c2ecf20Sopenharmony_ci rrdev = NULL; 11188c2ecf20Sopenharmony_ci } 11198c2ecf20Sopenharmony_ci 11208c2ecf20Sopenharmony_ci if (rdev && test_bit(Faulty, &rdev->flags)) 11218c2ecf20Sopenharmony_ci rdev = NULL; 11228c2ecf20Sopenharmony_ci if (rdev) 11238c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 11248c2ecf20Sopenharmony_ci if (rrdev && test_bit(Faulty, &rrdev->flags)) 11258c2ecf20Sopenharmony_ci rrdev = NULL; 11268c2ecf20Sopenharmony_ci if (rrdev) 11278c2ecf20Sopenharmony_ci atomic_inc(&rrdev->nr_pending); 11288c2ecf20Sopenharmony_ci rcu_read_unlock(); 11298c2ecf20Sopenharmony_ci 11308c2ecf20Sopenharmony_ci /* We have already checked bad blocks for reads. Now 11318c2ecf20Sopenharmony_ci * need to check for writes. We never accept write errors 11328c2ecf20Sopenharmony_ci * on the replacement, so we don't to check rrdev. 11338c2ecf20Sopenharmony_ci */ 11348c2ecf20Sopenharmony_ci while (op_is_write(op) && rdev && 11358c2ecf20Sopenharmony_ci test_bit(WriteErrorSeen, &rdev->flags)) { 11368c2ecf20Sopenharmony_ci sector_t first_bad; 11378c2ecf20Sopenharmony_ci int bad_sectors; 11388c2ecf20Sopenharmony_ci int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 11398c2ecf20Sopenharmony_ci &first_bad, &bad_sectors); 11408c2ecf20Sopenharmony_ci if (!bad) 11418c2ecf20Sopenharmony_ci break; 11428c2ecf20Sopenharmony_ci 11438c2ecf20Sopenharmony_ci if (bad < 0) { 11448c2ecf20Sopenharmony_ci set_bit(BlockedBadBlocks, &rdev->flags); 11458c2ecf20Sopenharmony_ci if (!conf->mddev->external && 11468c2ecf20Sopenharmony_ci conf->mddev->sb_flags) { 11478c2ecf20Sopenharmony_ci /* It is very unlikely, but we might 11488c2ecf20Sopenharmony_ci * still need to write out the 11498c2ecf20Sopenharmony_ci * bad block log - better give it 11508c2ecf20Sopenharmony_ci * a chance*/ 11518c2ecf20Sopenharmony_ci md_check_recovery(conf->mddev); 11528c2ecf20Sopenharmony_ci } 11538c2ecf20Sopenharmony_ci /* 11548c2ecf20Sopenharmony_ci * Because md_wait_for_blocked_rdev 11558c2ecf20Sopenharmony_ci * will dec nr_pending, we must 11568c2ecf20Sopenharmony_ci * increment it first. 11578c2ecf20Sopenharmony_ci */ 11588c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 11598c2ecf20Sopenharmony_ci md_wait_for_blocked_rdev(rdev, conf->mddev); 11608c2ecf20Sopenharmony_ci } else { 11618c2ecf20Sopenharmony_ci /* Acknowledged bad block - skip the write */ 11628c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 11638c2ecf20Sopenharmony_ci rdev = NULL; 11648c2ecf20Sopenharmony_ci } 11658c2ecf20Sopenharmony_ci } 11668c2ecf20Sopenharmony_ci 11678c2ecf20Sopenharmony_ci if (rdev) { 11688c2ecf20Sopenharmony_ci if (s->syncing || s->expanding || s->expanded 11698c2ecf20Sopenharmony_ci || s->replacing) 11708c2ecf20Sopenharmony_ci md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf)); 11718c2ecf20Sopenharmony_ci 11728c2ecf20Sopenharmony_ci set_bit(STRIPE_IO_STARTED, &sh->state); 11738c2ecf20Sopenharmony_ci 11748c2ecf20Sopenharmony_ci bio_set_dev(bi, rdev->bdev); 11758c2ecf20Sopenharmony_ci bio_set_op_attrs(bi, op, op_flags); 11768c2ecf20Sopenharmony_ci bi->bi_end_io = op_is_write(op) 11778c2ecf20Sopenharmony_ci ? raid5_end_write_request 11788c2ecf20Sopenharmony_ci : raid5_end_read_request; 11798c2ecf20Sopenharmony_ci bi->bi_private = sh; 11808c2ecf20Sopenharmony_ci 11818c2ecf20Sopenharmony_ci pr_debug("%s: for %llu schedule op %d on disc %d\n", 11828c2ecf20Sopenharmony_ci __func__, (unsigned long long)sh->sector, 11838c2ecf20Sopenharmony_ci bi->bi_opf, i); 11848c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 11858c2ecf20Sopenharmony_ci if (sh != head_sh) 11868c2ecf20Sopenharmony_ci atomic_inc(&head_sh->count); 11878c2ecf20Sopenharmony_ci if (use_new_offset(conf, sh)) 11888c2ecf20Sopenharmony_ci bi->bi_iter.bi_sector = (sh->sector 11898c2ecf20Sopenharmony_ci + rdev->new_data_offset); 11908c2ecf20Sopenharmony_ci else 11918c2ecf20Sopenharmony_ci bi->bi_iter.bi_sector = (sh->sector 11928c2ecf20Sopenharmony_ci + rdev->data_offset); 11938c2ecf20Sopenharmony_ci if (test_bit(R5_ReadNoMerge, &head_sh->dev[i].flags)) 11948c2ecf20Sopenharmony_ci bi->bi_opf |= REQ_NOMERGE; 11958c2ecf20Sopenharmony_ci 11968c2ecf20Sopenharmony_ci if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) 11978c2ecf20Sopenharmony_ci WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); 11988c2ecf20Sopenharmony_ci 11998c2ecf20Sopenharmony_ci if (!op_is_write(op) && 12008c2ecf20Sopenharmony_ci test_bit(R5_InJournal, &sh->dev[i].flags)) 12018c2ecf20Sopenharmony_ci /* 12028c2ecf20Sopenharmony_ci * issuing read for a page in journal, this 12038c2ecf20Sopenharmony_ci * must be preparing for prexor in rmw; read 12048c2ecf20Sopenharmony_ci * the data into orig_page 12058c2ecf20Sopenharmony_ci */ 12068c2ecf20Sopenharmony_ci sh->dev[i].vec.bv_page = sh->dev[i].orig_page; 12078c2ecf20Sopenharmony_ci else 12088c2ecf20Sopenharmony_ci sh->dev[i].vec.bv_page = sh->dev[i].page; 12098c2ecf20Sopenharmony_ci bi->bi_vcnt = 1; 12108c2ecf20Sopenharmony_ci bi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); 12118c2ecf20Sopenharmony_ci bi->bi_io_vec[0].bv_offset = sh->dev[i].offset; 12128c2ecf20Sopenharmony_ci bi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); 12138c2ecf20Sopenharmony_ci bi->bi_write_hint = sh->dev[i].write_hint; 12148c2ecf20Sopenharmony_ci if (!rrdev) 12158c2ecf20Sopenharmony_ci sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET; 12168c2ecf20Sopenharmony_ci /* 12178c2ecf20Sopenharmony_ci * If this is discard request, set bi_vcnt 0. We don't 12188c2ecf20Sopenharmony_ci * want to confuse SCSI because SCSI will replace payload 12198c2ecf20Sopenharmony_ci */ 12208c2ecf20Sopenharmony_ci if (op == REQ_OP_DISCARD) 12218c2ecf20Sopenharmony_ci bi->bi_vcnt = 0; 12228c2ecf20Sopenharmony_ci if (rrdev) 12238c2ecf20Sopenharmony_ci set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); 12248c2ecf20Sopenharmony_ci 12258c2ecf20Sopenharmony_ci if (conf->mddev->gendisk) 12268c2ecf20Sopenharmony_ci trace_block_bio_remap(bi->bi_disk->queue, 12278c2ecf20Sopenharmony_ci bi, disk_devt(conf->mddev->gendisk), 12288c2ecf20Sopenharmony_ci sh->dev[i].sector); 12298c2ecf20Sopenharmony_ci if (should_defer && op_is_write(op)) 12308c2ecf20Sopenharmony_ci bio_list_add(&pending_bios, bi); 12318c2ecf20Sopenharmony_ci else 12328c2ecf20Sopenharmony_ci submit_bio_noacct(bi); 12338c2ecf20Sopenharmony_ci } 12348c2ecf20Sopenharmony_ci if (rrdev) { 12358c2ecf20Sopenharmony_ci if (s->syncing || s->expanding || s->expanded 12368c2ecf20Sopenharmony_ci || s->replacing) 12378c2ecf20Sopenharmony_ci md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf)); 12388c2ecf20Sopenharmony_ci 12398c2ecf20Sopenharmony_ci set_bit(STRIPE_IO_STARTED, &sh->state); 12408c2ecf20Sopenharmony_ci 12418c2ecf20Sopenharmony_ci bio_set_dev(rbi, rrdev->bdev); 12428c2ecf20Sopenharmony_ci bio_set_op_attrs(rbi, op, op_flags); 12438c2ecf20Sopenharmony_ci BUG_ON(!op_is_write(op)); 12448c2ecf20Sopenharmony_ci rbi->bi_end_io = raid5_end_write_request; 12458c2ecf20Sopenharmony_ci rbi->bi_private = sh; 12468c2ecf20Sopenharmony_ci 12478c2ecf20Sopenharmony_ci pr_debug("%s: for %llu schedule op %d on " 12488c2ecf20Sopenharmony_ci "replacement disc %d\n", 12498c2ecf20Sopenharmony_ci __func__, (unsigned long long)sh->sector, 12508c2ecf20Sopenharmony_ci rbi->bi_opf, i); 12518c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 12528c2ecf20Sopenharmony_ci if (sh != head_sh) 12538c2ecf20Sopenharmony_ci atomic_inc(&head_sh->count); 12548c2ecf20Sopenharmony_ci if (use_new_offset(conf, sh)) 12558c2ecf20Sopenharmony_ci rbi->bi_iter.bi_sector = (sh->sector 12568c2ecf20Sopenharmony_ci + rrdev->new_data_offset); 12578c2ecf20Sopenharmony_ci else 12588c2ecf20Sopenharmony_ci rbi->bi_iter.bi_sector = (sh->sector 12598c2ecf20Sopenharmony_ci + rrdev->data_offset); 12608c2ecf20Sopenharmony_ci if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) 12618c2ecf20Sopenharmony_ci WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); 12628c2ecf20Sopenharmony_ci sh->dev[i].rvec.bv_page = sh->dev[i].page; 12638c2ecf20Sopenharmony_ci rbi->bi_vcnt = 1; 12648c2ecf20Sopenharmony_ci rbi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); 12658c2ecf20Sopenharmony_ci rbi->bi_io_vec[0].bv_offset = sh->dev[i].offset; 12668c2ecf20Sopenharmony_ci rbi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); 12678c2ecf20Sopenharmony_ci rbi->bi_write_hint = sh->dev[i].write_hint; 12688c2ecf20Sopenharmony_ci sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET; 12698c2ecf20Sopenharmony_ci /* 12708c2ecf20Sopenharmony_ci * If this is discard request, set bi_vcnt 0. We don't 12718c2ecf20Sopenharmony_ci * want to confuse SCSI because SCSI will replace payload 12728c2ecf20Sopenharmony_ci */ 12738c2ecf20Sopenharmony_ci if (op == REQ_OP_DISCARD) 12748c2ecf20Sopenharmony_ci rbi->bi_vcnt = 0; 12758c2ecf20Sopenharmony_ci if (conf->mddev->gendisk) 12768c2ecf20Sopenharmony_ci trace_block_bio_remap(rbi->bi_disk->queue, 12778c2ecf20Sopenharmony_ci rbi, disk_devt(conf->mddev->gendisk), 12788c2ecf20Sopenharmony_ci sh->dev[i].sector); 12798c2ecf20Sopenharmony_ci if (should_defer && op_is_write(op)) 12808c2ecf20Sopenharmony_ci bio_list_add(&pending_bios, rbi); 12818c2ecf20Sopenharmony_ci else 12828c2ecf20Sopenharmony_ci submit_bio_noacct(rbi); 12838c2ecf20Sopenharmony_ci } 12848c2ecf20Sopenharmony_ci if (!rdev && !rrdev) { 12858c2ecf20Sopenharmony_ci if (op_is_write(op)) 12868c2ecf20Sopenharmony_ci set_bit(STRIPE_DEGRADED, &sh->state); 12878c2ecf20Sopenharmony_ci pr_debug("skip op %d on disc %d for sector %llu\n", 12888c2ecf20Sopenharmony_ci bi->bi_opf, i, (unsigned long long)sh->sector); 12898c2ecf20Sopenharmony_ci clear_bit(R5_LOCKED, &sh->dev[i].flags); 12908c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 12918c2ecf20Sopenharmony_ci } 12928c2ecf20Sopenharmony_ci 12938c2ecf20Sopenharmony_ci if (!head_sh->batch_head) 12948c2ecf20Sopenharmony_ci continue; 12958c2ecf20Sopenharmony_ci sh = list_first_entry(&sh->batch_list, struct stripe_head, 12968c2ecf20Sopenharmony_ci batch_list); 12978c2ecf20Sopenharmony_ci if (sh != head_sh) 12988c2ecf20Sopenharmony_ci goto again; 12998c2ecf20Sopenharmony_ci } 13008c2ecf20Sopenharmony_ci 13018c2ecf20Sopenharmony_ci if (should_defer && !bio_list_empty(&pending_bios)) 13028c2ecf20Sopenharmony_ci defer_issue_bios(conf, head_sh->sector, &pending_bios); 13038c2ecf20Sopenharmony_ci} 13048c2ecf20Sopenharmony_ci 13058c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor * 13068c2ecf20Sopenharmony_ciasync_copy_data(int frombio, struct bio *bio, struct page **page, 13078c2ecf20Sopenharmony_ci unsigned int poff, sector_t sector, struct dma_async_tx_descriptor *tx, 13088c2ecf20Sopenharmony_ci struct stripe_head *sh, int no_skipcopy) 13098c2ecf20Sopenharmony_ci{ 13108c2ecf20Sopenharmony_ci struct bio_vec bvl; 13118c2ecf20Sopenharmony_ci struct bvec_iter iter; 13128c2ecf20Sopenharmony_ci struct page *bio_page; 13138c2ecf20Sopenharmony_ci int page_offset; 13148c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 13158c2ecf20Sopenharmony_ci enum async_tx_flags flags = 0; 13168c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 13178c2ecf20Sopenharmony_ci 13188c2ecf20Sopenharmony_ci if (bio->bi_iter.bi_sector >= sector) 13198c2ecf20Sopenharmony_ci page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512; 13208c2ecf20Sopenharmony_ci else 13218c2ecf20Sopenharmony_ci page_offset = (signed)(sector - bio->bi_iter.bi_sector) * -512; 13228c2ecf20Sopenharmony_ci 13238c2ecf20Sopenharmony_ci if (frombio) 13248c2ecf20Sopenharmony_ci flags |= ASYNC_TX_FENCE; 13258c2ecf20Sopenharmony_ci init_async_submit(&submit, flags, tx, NULL, NULL, NULL); 13268c2ecf20Sopenharmony_ci 13278c2ecf20Sopenharmony_ci bio_for_each_segment(bvl, bio, iter) { 13288c2ecf20Sopenharmony_ci int len = bvl.bv_len; 13298c2ecf20Sopenharmony_ci int clen; 13308c2ecf20Sopenharmony_ci int b_offset = 0; 13318c2ecf20Sopenharmony_ci 13328c2ecf20Sopenharmony_ci if (page_offset < 0) { 13338c2ecf20Sopenharmony_ci b_offset = -page_offset; 13348c2ecf20Sopenharmony_ci page_offset += b_offset; 13358c2ecf20Sopenharmony_ci len -= b_offset; 13368c2ecf20Sopenharmony_ci } 13378c2ecf20Sopenharmony_ci 13388c2ecf20Sopenharmony_ci if (len > 0 && page_offset + len > RAID5_STRIPE_SIZE(conf)) 13398c2ecf20Sopenharmony_ci clen = RAID5_STRIPE_SIZE(conf) - page_offset; 13408c2ecf20Sopenharmony_ci else 13418c2ecf20Sopenharmony_ci clen = len; 13428c2ecf20Sopenharmony_ci 13438c2ecf20Sopenharmony_ci if (clen > 0) { 13448c2ecf20Sopenharmony_ci b_offset += bvl.bv_offset; 13458c2ecf20Sopenharmony_ci bio_page = bvl.bv_page; 13468c2ecf20Sopenharmony_ci if (frombio) { 13478c2ecf20Sopenharmony_ci if (conf->skip_copy && 13488c2ecf20Sopenharmony_ci b_offset == 0 && page_offset == 0 && 13498c2ecf20Sopenharmony_ci clen == RAID5_STRIPE_SIZE(conf) && 13508c2ecf20Sopenharmony_ci !no_skipcopy) 13518c2ecf20Sopenharmony_ci *page = bio_page; 13528c2ecf20Sopenharmony_ci else 13538c2ecf20Sopenharmony_ci tx = async_memcpy(*page, bio_page, page_offset + poff, 13548c2ecf20Sopenharmony_ci b_offset, clen, &submit); 13558c2ecf20Sopenharmony_ci } else 13568c2ecf20Sopenharmony_ci tx = async_memcpy(bio_page, *page, b_offset, 13578c2ecf20Sopenharmony_ci page_offset + poff, clen, &submit); 13588c2ecf20Sopenharmony_ci } 13598c2ecf20Sopenharmony_ci /* chain the operations */ 13608c2ecf20Sopenharmony_ci submit.depend_tx = tx; 13618c2ecf20Sopenharmony_ci 13628c2ecf20Sopenharmony_ci if (clen < len) /* hit end of page */ 13638c2ecf20Sopenharmony_ci break; 13648c2ecf20Sopenharmony_ci page_offset += len; 13658c2ecf20Sopenharmony_ci } 13668c2ecf20Sopenharmony_ci 13678c2ecf20Sopenharmony_ci return tx; 13688c2ecf20Sopenharmony_ci} 13698c2ecf20Sopenharmony_ci 13708c2ecf20Sopenharmony_cistatic void ops_complete_biofill(void *stripe_head_ref) 13718c2ecf20Sopenharmony_ci{ 13728c2ecf20Sopenharmony_ci struct stripe_head *sh = stripe_head_ref; 13738c2ecf20Sopenharmony_ci int i; 13748c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 13758c2ecf20Sopenharmony_ci 13768c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, 13778c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 13788c2ecf20Sopenharmony_ci 13798c2ecf20Sopenharmony_ci /* clear completed biofills */ 13808c2ecf20Sopenharmony_ci for (i = sh->disks; i--; ) { 13818c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_ci /* acknowledge completion of a biofill operation */ 13848c2ecf20Sopenharmony_ci /* and check if we need to reply to a read request, 13858c2ecf20Sopenharmony_ci * new R5_Wantfill requests are held off until 13868c2ecf20Sopenharmony_ci * !STRIPE_BIOFILL_RUN 13878c2ecf20Sopenharmony_ci */ 13888c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_Wantfill, &dev->flags)) { 13898c2ecf20Sopenharmony_ci struct bio *rbi, *rbi2; 13908c2ecf20Sopenharmony_ci 13918c2ecf20Sopenharmony_ci BUG_ON(!dev->read); 13928c2ecf20Sopenharmony_ci rbi = dev->read; 13938c2ecf20Sopenharmony_ci dev->read = NULL; 13948c2ecf20Sopenharmony_ci while (rbi && rbi->bi_iter.bi_sector < 13958c2ecf20Sopenharmony_ci dev->sector + RAID5_STRIPE_SECTORS(conf)) { 13968c2ecf20Sopenharmony_ci rbi2 = r5_next_bio(conf, rbi, dev->sector); 13978c2ecf20Sopenharmony_ci bio_endio(rbi); 13988c2ecf20Sopenharmony_ci rbi = rbi2; 13998c2ecf20Sopenharmony_ci } 14008c2ecf20Sopenharmony_ci } 14018c2ecf20Sopenharmony_ci } 14028c2ecf20Sopenharmony_ci clear_bit(STRIPE_BIOFILL_RUN, &sh->state); 14038c2ecf20Sopenharmony_ci 14048c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 14058c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 14068c2ecf20Sopenharmony_ci} 14078c2ecf20Sopenharmony_ci 14088c2ecf20Sopenharmony_cistatic void ops_run_biofill(struct stripe_head *sh) 14098c2ecf20Sopenharmony_ci{ 14108c2ecf20Sopenharmony_ci struct dma_async_tx_descriptor *tx = NULL; 14118c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 14128c2ecf20Sopenharmony_ci int i; 14138c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 14148c2ecf20Sopenharmony_ci 14158c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 14168c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, 14178c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_ci for (i = sh->disks; i--; ) { 14208c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 14218c2ecf20Sopenharmony_ci if (test_bit(R5_Wantfill, &dev->flags)) { 14228c2ecf20Sopenharmony_ci struct bio *rbi; 14238c2ecf20Sopenharmony_ci spin_lock_irq(&sh->stripe_lock); 14248c2ecf20Sopenharmony_ci dev->read = rbi = dev->toread; 14258c2ecf20Sopenharmony_ci dev->toread = NULL; 14268c2ecf20Sopenharmony_ci spin_unlock_irq(&sh->stripe_lock); 14278c2ecf20Sopenharmony_ci while (rbi && rbi->bi_iter.bi_sector < 14288c2ecf20Sopenharmony_ci dev->sector + RAID5_STRIPE_SECTORS(conf)) { 14298c2ecf20Sopenharmony_ci tx = async_copy_data(0, rbi, &dev->page, 14308c2ecf20Sopenharmony_ci dev->offset, 14318c2ecf20Sopenharmony_ci dev->sector, tx, sh, 0); 14328c2ecf20Sopenharmony_ci rbi = r5_next_bio(conf, rbi, dev->sector); 14338c2ecf20Sopenharmony_ci } 14348c2ecf20Sopenharmony_ci } 14358c2ecf20Sopenharmony_ci } 14368c2ecf20Sopenharmony_ci 14378c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 14388c2ecf20Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); 14398c2ecf20Sopenharmony_ci async_trigger_callback(&submit); 14408c2ecf20Sopenharmony_ci} 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_cistatic void mark_target_uptodate(struct stripe_head *sh, int target) 14438c2ecf20Sopenharmony_ci{ 14448c2ecf20Sopenharmony_ci struct r5dev *tgt; 14458c2ecf20Sopenharmony_ci 14468c2ecf20Sopenharmony_ci if (target < 0) 14478c2ecf20Sopenharmony_ci return; 14488c2ecf20Sopenharmony_ci 14498c2ecf20Sopenharmony_ci tgt = &sh->dev[target]; 14508c2ecf20Sopenharmony_ci set_bit(R5_UPTODATE, &tgt->flags); 14518c2ecf20Sopenharmony_ci BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); 14528c2ecf20Sopenharmony_ci clear_bit(R5_Wantcompute, &tgt->flags); 14538c2ecf20Sopenharmony_ci} 14548c2ecf20Sopenharmony_ci 14558c2ecf20Sopenharmony_cistatic void ops_complete_compute(void *stripe_head_ref) 14568c2ecf20Sopenharmony_ci{ 14578c2ecf20Sopenharmony_ci struct stripe_head *sh = stripe_head_ref; 14588c2ecf20Sopenharmony_ci 14598c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, 14608c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 14618c2ecf20Sopenharmony_ci 14628c2ecf20Sopenharmony_ci /* mark the computed target(s) as uptodate */ 14638c2ecf20Sopenharmony_ci mark_target_uptodate(sh, sh->ops.target); 14648c2ecf20Sopenharmony_ci mark_target_uptodate(sh, sh->ops.target2); 14658c2ecf20Sopenharmony_ci 14668c2ecf20Sopenharmony_ci clear_bit(STRIPE_COMPUTE_RUN, &sh->state); 14678c2ecf20Sopenharmony_ci if (sh->check_state == check_state_compute_run) 14688c2ecf20Sopenharmony_ci sh->check_state = check_state_compute_result; 14698c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 14708c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 14718c2ecf20Sopenharmony_ci} 14728c2ecf20Sopenharmony_ci 14738c2ecf20Sopenharmony_ci/* return a pointer to the address conversion region of the scribble buffer */ 14748c2ecf20Sopenharmony_cistatic struct page **to_addr_page(struct raid5_percpu *percpu, int i) 14758c2ecf20Sopenharmony_ci{ 14768c2ecf20Sopenharmony_ci return percpu->scribble + i * percpu->scribble_obj_size; 14778c2ecf20Sopenharmony_ci} 14788c2ecf20Sopenharmony_ci 14798c2ecf20Sopenharmony_ci/* return a pointer to the address conversion region of the scribble buffer */ 14808c2ecf20Sopenharmony_cistatic addr_conv_t *to_addr_conv(struct stripe_head *sh, 14818c2ecf20Sopenharmony_ci struct raid5_percpu *percpu, int i) 14828c2ecf20Sopenharmony_ci{ 14838c2ecf20Sopenharmony_ci return (void *) (to_addr_page(percpu, i) + sh->disks + 2); 14848c2ecf20Sopenharmony_ci} 14858c2ecf20Sopenharmony_ci 14868c2ecf20Sopenharmony_ci/* 14878c2ecf20Sopenharmony_ci * Return a pointer to record offset address. 14888c2ecf20Sopenharmony_ci */ 14898c2ecf20Sopenharmony_cistatic unsigned int * 14908c2ecf20Sopenharmony_cito_addr_offs(struct stripe_head *sh, struct raid5_percpu *percpu) 14918c2ecf20Sopenharmony_ci{ 14928c2ecf20Sopenharmony_ci return (unsigned int *) (to_addr_conv(sh, percpu, 0) + sh->disks + 2); 14938c2ecf20Sopenharmony_ci} 14948c2ecf20Sopenharmony_ci 14958c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor * 14968c2ecf20Sopenharmony_ciops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) 14978c2ecf20Sopenharmony_ci{ 14988c2ecf20Sopenharmony_ci int disks = sh->disks; 14998c2ecf20Sopenharmony_ci struct page **xor_srcs = to_addr_page(percpu, 0); 15008c2ecf20Sopenharmony_ci unsigned int *off_srcs = to_addr_offs(sh, percpu); 15018c2ecf20Sopenharmony_ci int target = sh->ops.target; 15028c2ecf20Sopenharmony_ci struct r5dev *tgt = &sh->dev[target]; 15038c2ecf20Sopenharmony_ci struct page *xor_dest = tgt->page; 15048c2ecf20Sopenharmony_ci unsigned int off_dest = tgt->offset; 15058c2ecf20Sopenharmony_ci int count = 0; 15068c2ecf20Sopenharmony_ci struct dma_async_tx_descriptor *tx; 15078c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 15088c2ecf20Sopenharmony_ci int i; 15098c2ecf20Sopenharmony_ci 15108c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 15118c2ecf20Sopenharmony_ci 15128c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu block: %d\n", 15138c2ecf20Sopenharmony_ci __func__, (unsigned long long)sh->sector, target); 15148c2ecf20Sopenharmony_ci BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 15178c2ecf20Sopenharmony_ci if (i != target) { 15188c2ecf20Sopenharmony_ci off_srcs[count] = sh->dev[i].offset; 15198c2ecf20Sopenharmony_ci xor_srcs[count++] = sh->dev[i].page; 15208c2ecf20Sopenharmony_ci } 15218c2ecf20Sopenharmony_ci } 15228c2ecf20Sopenharmony_ci 15238c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 15248c2ecf20Sopenharmony_ci 15258c2ecf20Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, 15268c2ecf20Sopenharmony_ci ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); 15278c2ecf20Sopenharmony_ci if (unlikely(count == 1)) 15288c2ecf20Sopenharmony_ci tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], 15298c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 15308c2ecf20Sopenharmony_ci else 15318c2ecf20Sopenharmony_ci tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, 15328c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 15338c2ecf20Sopenharmony_ci 15348c2ecf20Sopenharmony_ci return tx; 15358c2ecf20Sopenharmony_ci} 15368c2ecf20Sopenharmony_ci 15378c2ecf20Sopenharmony_ci/* set_syndrome_sources - populate source buffers for gen_syndrome 15388c2ecf20Sopenharmony_ci * @srcs - (struct page *) array of size sh->disks 15398c2ecf20Sopenharmony_ci * @offs - (unsigned int) array of offset for each page 15408c2ecf20Sopenharmony_ci * @sh - stripe_head to parse 15418c2ecf20Sopenharmony_ci * 15428c2ecf20Sopenharmony_ci * Populates srcs in proper layout order for the stripe and returns the 15438c2ecf20Sopenharmony_ci * 'count' of sources to be used in a call to async_gen_syndrome. The P 15448c2ecf20Sopenharmony_ci * destination buffer is recorded in srcs[count] and the Q destination 15458c2ecf20Sopenharmony_ci * is recorded in srcs[count+1]]. 15468c2ecf20Sopenharmony_ci */ 15478c2ecf20Sopenharmony_cistatic int set_syndrome_sources(struct page **srcs, 15488c2ecf20Sopenharmony_ci unsigned int *offs, 15498c2ecf20Sopenharmony_ci struct stripe_head *sh, 15508c2ecf20Sopenharmony_ci int srctype) 15518c2ecf20Sopenharmony_ci{ 15528c2ecf20Sopenharmony_ci int disks = sh->disks; 15538c2ecf20Sopenharmony_ci int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); 15548c2ecf20Sopenharmony_ci int d0_idx = raid6_d0(sh); 15558c2ecf20Sopenharmony_ci int count; 15568c2ecf20Sopenharmony_ci int i; 15578c2ecf20Sopenharmony_ci 15588c2ecf20Sopenharmony_ci for (i = 0; i < disks; i++) 15598c2ecf20Sopenharmony_ci srcs[i] = NULL; 15608c2ecf20Sopenharmony_ci 15618c2ecf20Sopenharmony_ci count = 0; 15628c2ecf20Sopenharmony_ci i = d0_idx; 15638c2ecf20Sopenharmony_ci do { 15648c2ecf20Sopenharmony_ci int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); 15658c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 15668c2ecf20Sopenharmony_ci 15678c2ecf20Sopenharmony_ci if (i == sh->qd_idx || i == sh->pd_idx || 15688c2ecf20Sopenharmony_ci (srctype == SYNDROME_SRC_ALL) || 15698c2ecf20Sopenharmony_ci (srctype == SYNDROME_SRC_WANT_DRAIN && 15708c2ecf20Sopenharmony_ci (test_bit(R5_Wantdrain, &dev->flags) || 15718c2ecf20Sopenharmony_ci test_bit(R5_InJournal, &dev->flags))) || 15728c2ecf20Sopenharmony_ci (srctype == SYNDROME_SRC_WRITTEN && 15738c2ecf20Sopenharmony_ci (dev->written || 15748c2ecf20Sopenharmony_ci test_bit(R5_InJournal, &dev->flags)))) { 15758c2ecf20Sopenharmony_ci if (test_bit(R5_InJournal, &dev->flags)) 15768c2ecf20Sopenharmony_ci srcs[slot] = sh->dev[i].orig_page; 15778c2ecf20Sopenharmony_ci else 15788c2ecf20Sopenharmony_ci srcs[slot] = sh->dev[i].page; 15798c2ecf20Sopenharmony_ci /* 15808c2ecf20Sopenharmony_ci * For R5_InJournal, PAGE_SIZE must be 4KB and will 15818c2ecf20Sopenharmony_ci * not shared page. In that case, dev[i].offset 15828c2ecf20Sopenharmony_ci * is 0. 15838c2ecf20Sopenharmony_ci */ 15848c2ecf20Sopenharmony_ci offs[slot] = sh->dev[i].offset; 15858c2ecf20Sopenharmony_ci } 15868c2ecf20Sopenharmony_ci i = raid6_next_disk(i, disks); 15878c2ecf20Sopenharmony_ci } while (i != d0_idx); 15888c2ecf20Sopenharmony_ci 15898c2ecf20Sopenharmony_ci return syndrome_disks; 15908c2ecf20Sopenharmony_ci} 15918c2ecf20Sopenharmony_ci 15928c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor * 15938c2ecf20Sopenharmony_ciops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) 15948c2ecf20Sopenharmony_ci{ 15958c2ecf20Sopenharmony_ci int disks = sh->disks; 15968c2ecf20Sopenharmony_ci struct page **blocks = to_addr_page(percpu, 0); 15978c2ecf20Sopenharmony_ci unsigned int *offs = to_addr_offs(sh, percpu); 15988c2ecf20Sopenharmony_ci int target; 15998c2ecf20Sopenharmony_ci int qd_idx = sh->qd_idx; 16008c2ecf20Sopenharmony_ci struct dma_async_tx_descriptor *tx; 16018c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 16028c2ecf20Sopenharmony_ci struct r5dev *tgt; 16038c2ecf20Sopenharmony_ci struct page *dest; 16048c2ecf20Sopenharmony_ci unsigned int dest_off; 16058c2ecf20Sopenharmony_ci int i; 16068c2ecf20Sopenharmony_ci int count; 16078c2ecf20Sopenharmony_ci 16088c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 16098c2ecf20Sopenharmony_ci if (sh->ops.target < 0) 16108c2ecf20Sopenharmony_ci target = sh->ops.target2; 16118c2ecf20Sopenharmony_ci else if (sh->ops.target2 < 0) 16128c2ecf20Sopenharmony_ci target = sh->ops.target; 16138c2ecf20Sopenharmony_ci else 16148c2ecf20Sopenharmony_ci /* we should only have one valid target */ 16158c2ecf20Sopenharmony_ci BUG(); 16168c2ecf20Sopenharmony_ci BUG_ON(target < 0); 16178c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu block: %d\n", 16188c2ecf20Sopenharmony_ci __func__, (unsigned long long)sh->sector, target); 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci tgt = &sh->dev[target]; 16218c2ecf20Sopenharmony_ci BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); 16228c2ecf20Sopenharmony_ci dest = tgt->page; 16238c2ecf20Sopenharmony_ci dest_off = tgt->offset; 16248c2ecf20Sopenharmony_ci 16258c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 16268c2ecf20Sopenharmony_ci 16278c2ecf20Sopenharmony_ci if (target == qd_idx) { 16288c2ecf20Sopenharmony_ci count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL); 16298c2ecf20Sopenharmony_ci blocks[count] = NULL; /* regenerating p is not necessary */ 16308c2ecf20Sopenharmony_ci BUG_ON(blocks[count+1] != dest); /* q should already be set */ 16318c2ecf20Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_FENCE, NULL, 16328c2ecf20Sopenharmony_ci ops_complete_compute, sh, 16338c2ecf20Sopenharmony_ci to_addr_conv(sh, percpu, 0)); 16348c2ecf20Sopenharmony_ci tx = async_gen_syndrome(blocks, offs, count+2, 16358c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 16368c2ecf20Sopenharmony_ci } else { 16378c2ecf20Sopenharmony_ci /* Compute any data- or p-drive using XOR */ 16388c2ecf20Sopenharmony_ci count = 0; 16398c2ecf20Sopenharmony_ci for (i = disks; i-- ; ) { 16408c2ecf20Sopenharmony_ci if (i == target || i == qd_idx) 16418c2ecf20Sopenharmony_ci continue; 16428c2ecf20Sopenharmony_ci offs[count] = sh->dev[i].offset; 16438c2ecf20Sopenharmony_ci blocks[count++] = sh->dev[i].page; 16448c2ecf20Sopenharmony_ci } 16458c2ecf20Sopenharmony_ci 16468c2ecf20Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, 16478c2ecf20Sopenharmony_ci NULL, ops_complete_compute, sh, 16488c2ecf20Sopenharmony_ci to_addr_conv(sh, percpu, 0)); 16498c2ecf20Sopenharmony_ci tx = async_xor_offs(dest, dest_off, blocks, offs, count, 16508c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 16518c2ecf20Sopenharmony_ci } 16528c2ecf20Sopenharmony_ci 16538c2ecf20Sopenharmony_ci return tx; 16548c2ecf20Sopenharmony_ci} 16558c2ecf20Sopenharmony_ci 16568c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor * 16578c2ecf20Sopenharmony_ciops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) 16588c2ecf20Sopenharmony_ci{ 16598c2ecf20Sopenharmony_ci int i, count, disks = sh->disks; 16608c2ecf20Sopenharmony_ci int syndrome_disks = sh->ddf_layout ? disks : disks-2; 16618c2ecf20Sopenharmony_ci int d0_idx = raid6_d0(sh); 16628c2ecf20Sopenharmony_ci int faila = -1, failb = -1; 16638c2ecf20Sopenharmony_ci int target = sh->ops.target; 16648c2ecf20Sopenharmony_ci int target2 = sh->ops.target2; 16658c2ecf20Sopenharmony_ci struct r5dev *tgt = &sh->dev[target]; 16668c2ecf20Sopenharmony_ci struct r5dev *tgt2 = &sh->dev[target2]; 16678c2ecf20Sopenharmony_ci struct dma_async_tx_descriptor *tx; 16688c2ecf20Sopenharmony_ci struct page **blocks = to_addr_page(percpu, 0); 16698c2ecf20Sopenharmony_ci unsigned int *offs = to_addr_offs(sh, percpu); 16708c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 16718c2ecf20Sopenharmony_ci 16728c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 16738c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu block1: %d block2: %d\n", 16748c2ecf20Sopenharmony_ci __func__, (unsigned long long)sh->sector, target, target2); 16758c2ecf20Sopenharmony_ci BUG_ON(target < 0 || target2 < 0); 16768c2ecf20Sopenharmony_ci BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); 16778c2ecf20Sopenharmony_ci BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags)); 16788c2ecf20Sopenharmony_ci 16798c2ecf20Sopenharmony_ci /* we need to open-code set_syndrome_sources to handle the 16808c2ecf20Sopenharmony_ci * slot number conversion for 'faila' and 'failb' 16818c2ecf20Sopenharmony_ci */ 16828c2ecf20Sopenharmony_ci for (i = 0; i < disks ; i++) { 16838c2ecf20Sopenharmony_ci offs[i] = 0; 16848c2ecf20Sopenharmony_ci blocks[i] = NULL; 16858c2ecf20Sopenharmony_ci } 16868c2ecf20Sopenharmony_ci count = 0; 16878c2ecf20Sopenharmony_ci i = d0_idx; 16888c2ecf20Sopenharmony_ci do { 16898c2ecf20Sopenharmony_ci int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); 16908c2ecf20Sopenharmony_ci 16918c2ecf20Sopenharmony_ci offs[slot] = sh->dev[i].offset; 16928c2ecf20Sopenharmony_ci blocks[slot] = sh->dev[i].page; 16938c2ecf20Sopenharmony_ci 16948c2ecf20Sopenharmony_ci if (i == target) 16958c2ecf20Sopenharmony_ci faila = slot; 16968c2ecf20Sopenharmony_ci if (i == target2) 16978c2ecf20Sopenharmony_ci failb = slot; 16988c2ecf20Sopenharmony_ci i = raid6_next_disk(i, disks); 16998c2ecf20Sopenharmony_ci } while (i != d0_idx); 17008c2ecf20Sopenharmony_ci 17018c2ecf20Sopenharmony_ci BUG_ON(faila == failb); 17028c2ecf20Sopenharmony_ci if (failb < faila) 17038c2ecf20Sopenharmony_ci swap(faila, failb); 17048c2ecf20Sopenharmony_ci pr_debug("%s: stripe: %llu faila: %d failb: %d\n", 17058c2ecf20Sopenharmony_ci __func__, (unsigned long long)sh->sector, faila, failb); 17068c2ecf20Sopenharmony_ci 17078c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 17088c2ecf20Sopenharmony_ci 17098c2ecf20Sopenharmony_ci if (failb == syndrome_disks+1) { 17108c2ecf20Sopenharmony_ci /* Q disk is one of the missing disks */ 17118c2ecf20Sopenharmony_ci if (faila == syndrome_disks) { 17128c2ecf20Sopenharmony_ci /* Missing P+Q, just recompute */ 17138c2ecf20Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_FENCE, NULL, 17148c2ecf20Sopenharmony_ci ops_complete_compute, sh, 17158c2ecf20Sopenharmony_ci to_addr_conv(sh, percpu, 0)); 17168c2ecf20Sopenharmony_ci return async_gen_syndrome(blocks, offs, syndrome_disks+2, 17178c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), 17188c2ecf20Sopenharmony_ci &submit); 17198c2ecf20Sopenharmony_ci } else { 17208c2ecf20Sopenharmony_ci struct page *dest; 17218c2ecf20Sopenharmony_ci unsigned int dest_off; 17228c2ecf20Sopenharmony_ci int data_target; 17238c2ecf20Sopenharmony_ci int qd_idx = sh->qd_idx; 17248c2ecf20Sopenharmony_ci 17258c2ecf20Sopenharmony_ci /* Missing D+Q: recompute D from P, then recompute Q */ 17268c2ecf20Sopenharmony_ci if (target == qd_idx) 17278c2ecf20Sopenharmony_ci data_target = target2; 17288c2ecf20Sopenharmony_ci else 17298c2ecf20Sopenharmony_ci data_target = target; 17308c2ecf20Sopenharmony_ci 17318c2ecf20Sopenharmony_ci count = 0; 17328c2ecf20Sopenharmony_ci for (i = disks; i-- ; ) { 17338c2ecf20Sopenharmony_ci if (i == data_target || i == qd_idx) 17348c2ecf20Sopenharmony_ci continue; 17358c2ecf20Sopenharmony_ci offs[count] = sh->dev[i].offset; 17368c2ecf20Sopenharmony_ci blocks[count++] = sh->dev[i].page; 17378c2ecf20Sopenharmony_ci } 17388c2ecf20Sopenharmony_ci dest = sh->dev[data_target].page; 17398c2ecf20Sopenharmony_ci dest_off = sh->dev[data_target].offset; 17408c2ecf20Sopenharmony_ci init_async_submit(&submit, 17418c2ecf20Sopenharmony_ci ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, 17428c2ecf20Sopenharmony_ci NULL, NULL, NULL, 17438c2ecf20Sopenharmony_ci to_addr_conv(sh, percpu, 0)); 17448c2ecf20Sopenharmony_ci tx = async_xor_offs(dest, dest_off, blocks, offs, count, 17458c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), 17468c2ecf20Sopenharmony_ci &submit); 17478c2ecf20Sopenharmony_ci 17488c2ecf20Sopenharmony_ci count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL); 17498c2ecf20Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_FENCE, tx, 17508c2ecf20Sopenharmony_ci ops_complete_compute, sh, 17518c2ecf20Sopenharmony_ci to_addr_conv(sh, percpu, 0)); 17528c2ecf20Sopenharmony_ci return async_gen_syndrome(blocks, offs, count+2, 17538c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), 17548c2ecf20Sopenharmony_ci &submit); 17558c2ecf20Sopenharmony_ci } 17568c2ecf20Sopenharmony_ci } else { 17578c2ecf20Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_FENCE, NULL, 17588c2ecf20Sopenharmony_ci ops_complete_compute, sh, 17598c2ecf20Sopenharmony_ci to_addr_conv(sh, percpu, 0)); 17608c2ecf20Sopenharmony_ci if (failb == syndrome_disks) { 17618c2ecf20Sopenharmony_ci /* We're missing D+P. */ 17628c2ecf20Sopenharmony_ci return async_raid6_datap_recov(syndrome_disks+2, 17638c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), 17648c2ecf20Sopenharmony_ci faila, 17658c2ecf20Sopenharmony_ci blocks, offs, &submit); 17668c2ecf20Sopenharmony_ci } else { 17678c2ecf20Sopenharmony_ci /* We're missing D+D. */ 17688c2ecf20Sopenharmony_ci return async_raid6_2data_recov(syndrome_disks+2, 17698c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), 17708c2ecf20Sopenharmony_ci faila, failb, 17718c2ecf20Sopenharmony_ci blocks, offs, &submit); 17728c2ecf20Sopenharmony_ci } 17738c2ecf20Sopenharmony_ci } 17748c2ecf20Sopenharmony_ci} 17758c2ecf20Sopenharmony_ci 17768c2ecf20Sopenharmony_cistatic void ops_complete_prexor(void *stripe_head_ref) 17778c2ecf20Sopenharmony_ci{ 17788c2ecf20Sopenharmony_ci struct stripe_head *sh = stripe_head_ref; 17798c2ecf20Sopenharmony_ci 17808c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, 17818c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 17828c2ecf20Sopenharmony_ci 17838c2ecf20Sopenharmony_ci if (r5c_is_writeback(sh->raid_conf->log)) 17848c2ecf20Sopenharmony_ci /* 17858c2ecf20Sopenharmony_ci * raid5-cache write back uses orig_page during prexor. 17868c2ecf20Sopenharmony_ci * After prexor, it is time to free orig_page 17878c2ecf20Sopenharmony_ci */ 17888c2ecf20Sopenharmony_ci r5c_release_extra_page(sh); 17898c2ecf20Sopenharmony_ci} 17908c2ecf20Sopenharmony_ci 17918c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor * 17928c2ecf20Sopenharmony_ciops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu, 17938c2ecf20Sopenharmony_ci struct dma_async_tx_descriptor *tx) 17948c2ecf20Sopenharmony_ci{ 17958c2ecf20Sopenharmony_ci int disks = sh->disks; 17968c2ecf20Sopenharmony_ci struct page **xor_srcs = to_addr_page(percpu, 0); 17978c2ecf20Sopenharmony_ci unsigned int *off_srcs = to_addr_offs(sh, percpu); 17988c2ecf20Sopenharmony_ci int count = 0, pd_idx = sh->pd_idx, i; 17998c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 18008c2ecf20Sopenharmony_ci 18018c2ecf20Sopenharmony_ci /* existing parity data subtracted */ 18028c2ecf20Sopenharmony_ci unsigned int off_dest = off_srcs[count] = sh->dev[pd_idx].offset; 18038c2ecf20Sopenharmony_ci struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 18048c2ecf20Sopenharmony_ci 18058c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 18068c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, 18078c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 18088c2ecf20Sopenharmony_ci 18098c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 18108c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 18118c2ecf20Sopenharmony_ci /* Only process blocks that are known to be uptodate */ 18128c2ecf20Sopenharmony_ci if (test_bit(R5_InJournal, &dev->flags)) { 18138c2ecf20Sopenharmony_ci /* 18148c2ecf20Sopenharmony_ci * For this case, PAGE_SIZE must be equal to 4KB and 18158c2ecf20Sopenharmony_ci * page offset is zero. 18168c2ecf20Sopenharmony_ci */ 18178c2ecf20Sopenharmony_ci off_srcs[count] = dev->offset; 18188c2ecf20Sopenharmony_ci xor_srcs[count++] = dev->orig_page; 18198c2ecf20Sopenharmony_ci } else if (test_bit(R5_Wantdrain, &dev->flags)) { 18208c2ecf20Sopenharmony_ci off_srcs[count] = dev->offset; 18218c2ecf20Sopenharmony_ci xor_srcs[count++] = dev->page; 18228c2ecf20Sopenharmony_ci } 18238c2ecf20Sopenharmony_ci } 18248c2ecf20Sopenharmony_ci 18258c2ecf20Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, 18268c2ecf20Sopenharmony_ci ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0)); 18278c2ecf20Sopenharmony_ci tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, 18288c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 18298c2ecf20Sopenharmony_ci 18308c2ecf20Sopenharmony_ci return tx; 18318c2ecf20Sopenharmony_ci} 18328c2ecf20Sopenharmony_ci 18338c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor * 18348c2ecf20Sopenharmony_ciops_run_prexor6(struct stripe_head *sh, struct raid5_percpu *percpu, 18358c2ecf20Sopenharmony_ci struct dma_async_tx_descriptor *tx) 18368c2ecf20Sopenharmony_ci{ 18378c2ecf20Sopenharmony_ci struct page **blocks = to_addr_page(percpu, 0); 18388c2ecf20Sopenharmony_ci unsigned int *offs = to_addr_offs(sh, percpu); 18398c2ecf20Sopenharmony_ci int count; 18408c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 18418c2ecf20Sopenharmony_ci 18428c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, 18438c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 18448c2ecf20Sopenharmony_ci 18458c2ecf20Sopenharmony_ci count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_WANT_DRAIN); 18468c2ecf20Sopenharmony_ci 18478c2ecf20Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx, 18488c2ecf20Sopenharmony_ci ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0)); 18498c2ecf20Sopenharmony_ci tx = async_gen_syndrome(blocks, offs, count+2, 18508c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 18518c2ecf20Sopenharmony_ci 18528c2ecf20Sopenharmony_ci return tx; 18538c2ecf20Sopenharmony_ci} 18548c2ecf20Sopenharmony_ci 18558c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor * 18568c2ecf20Sopenharmony_ciops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 18578c2ecf20Sopenharmony_ci{ 18588c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 18598c2ecf20Sopenharmony_ci int disks = sh->disks; 18608c2ecf20Sopenharmony_ci int i; 18618c2ecf20Sopenharmony_ci struct stripe_head *head_sh = sh; 18628c2ecf20Sopenharmony_ci 18638c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, 18648c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 18658c2ecf20Sopenharmony_ci 18668c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 18678c2ecf20Sopenharmony_ci struct r5dev *dev; 18688c2ecf20Sopenharmony_ci struct bio *chosen; 18698c2ecf20Sopenharmony_ci 18708c2ecf20Sopenharmony_ci sh = head_sh; 18718c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_Wantdrain, &head_sh->dev[i].flags)) { 18728c2ecf20Sopenharmony_ci struct bio *wbi; 18738c2ecf20Sopenharmony_ci 18748c2ecf20Sopenharmony_ciagain: 18758c2ecf20Sopenharmony_ci dev = &sh->dev[i]; 18768c2ecf20Sopenharmony_ci /* 18778c2ecf20Sopenharmony_ci * clear R5_InJournal, so when rewriting a page in 18788c2ecf20Sopenharmony_ci * journal, it is not skipped by r5l_log_stripe() 18798c2ecf20Sopenharmony_ci */ 18808c2ecf20Sopenharmony_ci clear_bit(R5_InJournal, &dev->flags); 18818c2ecf20Sopenharmony_ci spin_lock_irq(&sh->stripe_lock); 18828c2ecf20Sopenharmony_ci chosen = dev->towrite; 18838c2ecf20Sopenharmony_ci dev->towrite = NULL; 18848c2ecf20Sopenharmony_ci sh->overwrite_disks = 0; 18858c2ecf20Sopenharmony_ci BUG_ON(dev->written); 18868c2ecf20Sopenharmony_ci wbi = dev->written = chosen; 18878c2ecf20Sopenharmony_ci spin_unlock_irq(&sh->stripe_lock); 18888c2ecf20Sopenharmony_ci WARN_ON(dev->page != dev->orig_page); 18898c2ecf20Sopenharmony_ci 18908c2ecf20Sopenharmony_ci while (wbi && wbi->bi_iter.bi_sector < 18918c2ecf20Sopenharmony_ci dev->sector + RAID5_STRIPE_SECTORS(conf)) { 18928c2ecf20Sopenharmony_ci if (wbi->bi_opf & REQ_FUA) 18938c2ecf20Sopenharmony_ci set_bit(R5_WantFUA, &dev->flags); 18948c2ecf20Sopenharmony_ci if (wbi->bi_opf & REQ_SYNC) 18958c2ecf20Sopenharmony_ci set_bit(R5_SyncIO, &dev->flags); 18968c2ecf20Sopenharmony_ci if (bio_op(wbi) == REQ_OP_DISCARD) 18978c2ecf20Sopenharmony_ci set_bit(R5_Discard, &dev->flags); 18988c2ecf20Sopenharmony_ci else { 18998c2ecf20Sopenharmony_ci tx = async_copy_data(1, wbi, &dev->page, 19008c2ecf20Sopenharmony_ci dev->offset, 19018c2ecf20Sopenharmony_ci dev->sector, tx, sh, 19028c2ecf20Sopenharmony_ci r5c_is_writeback(conf->log)); 19038c2ecf20Sopenharmony_ci if (dev->page != dev->orig_page && 19048c2ecf20Sopenharmony_ci !r5c_is_writeback(conf->log)) { 19058c2ecf20Sopenharmony_ci set_bit(R5_SkipCopy, &dev->flags); 19068c2ecf20Sopenharmony_ci clear_bit(R5_UPTODATE, &dev->flags); 19078c2ecf20Sopenharmony_ci clear_bit(R5_OVERWRITE, &dev->flags); 19088c2ecf20Sopenharmony_ci } 19098c2ecf20Sopenharmony_ci } 19108c2ecf20Sopenharmony_ci wbi = r5_next_bio(conf, wbi, dev->sector); 19118c2ecf20Sopenharmony_ci } 19128c2ecf20Sopenharmony_ci 19138c2ecf20Sopenharmony_ci if (head_sh->batch_head) { 19148c2ecf20Sopenharmony_ci sh = list_first_entry(&sh->batch_list, 19158c2ecf20Sopenharmony_ci struct stripe_head, 19168c2ecf20Sopenharmony_ci batch_list); 19178c2ecf20Sopenharmony_ci if (sh == head_sh) 19188c2ecf20Sopenharmony_ci continue; 19198c2ecf20Sopenharmony_ci goto again; 19208c2ecf20Sopenharmony_ci } 19218c2ecf20Sopenharmony_ci } 19228c2ecf20Sopenharmony_ci } 19238c2ecf20Sopenharmony_ci 19248c2ecf20Sopenharmony_ci return tx; 19258c2ecf20Sopenharmony_ci} 19268c2ecf20Sopenharmony_ci 19278c2ecf20Sopenharmony_cistatic void ops_complete_reconstruct(void *stripe_head_ref) 19288c2ecf20Sopenharmony_ci{ 19298c2ecf20Sopenharmony_ci struct stripe_head *sh = stripe_head_ref; 19308c2ecf20Sopenharmony_ci int disks = sh->disks; 19318c2ecf20Sopenharmony_ci int pd_idx = sh->pd_idx; 19328c2ecf20Sopenharmony_ci int qd_idx = sh->qd_idx; 19338c2ecf20Sopenharmony_ci int i; 19348c2ecf20Sopenharmony_ci bool fua = false, sync = false, discard = false; 19358c2ecf20Sopenharmony_ci 19368c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, 19378c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 19388c2ecf20Sopenharmony_ci 19398c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 19408c2ecf20Sopenharmony_ci fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); 19418c2ecf20Sopenharmony_ci sync |= test_bit(R5_SyncIO, &sh->dev[i].flags); 19428c2ecf20Sopenharmony_ci discard |= test_bit(R5_Discard, &sh->dev[i].flags); 19438c2ecf20Sopenharmony_ci } 19448c2ecf20Sopenharmony_ci 19458c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 19468c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 19478c2ecf20Sopenharmony_ci 19488c2ecf20Sopenharmony_ci if (dev->written || i == pd_idx || i == qd_idx) { 19498c2ecf20Sopenharmony_ci if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) { 19508c2ecf20Sopenharmony_ci set_bit(R5_UPTODATE, &dev->flags); 19518c2ecf20Sopenharmony_ci if (test_bit(STRIPE_EXPAND_READY, &sh->state)) 19528c2ecf20Sopenharmony_ci set_bit(R5_Expanded, &dev->flags); 19538c2ecf20Sopenharmony_ci } 19548c2ecf20Sopenharmony_ci if (fua) 19558c2ecf20Sopenharmony_ci set_bit(R5_WantFUA, &dev->flags); 19568c2ecf20Sopenharmony_ci if (sync) 19578c2ecf20Sopenharmony_ci set_bit(R5_SyncIO, &dev->flags); 19588c2ecf20Sopenharmony_ci } 19598c2ecf20Sopenharmony_ci } 19608c2ecf20Sopenharmony_ci 19618c2ecf20Sopenharmony_ci if (sh->reconstruct_state == reconstruct_state_drain_run) 19628c2ecf20Sopenharmony_ci sh->reconstruct_state = reconstruct_state_drain_result; 19638c2ecf20Sopenharmony_ci else if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) 19648c2ecf20Sopenharmony_ci sh->reconstruct_state = reconstruct_state_prexor_drain_result; 19658c2ecf20Sopenharmony_ci else { 19668c2ecf20Sopenharmony_ci BUG_ON(sh->reconstruct_state != reconstruct_state_run); 19678c2ecf20Sopenharmony_ci sh->reconstruct_state = reconstruct_state_result; 19688c2ecf20Sopenharmony_ci } 19698c2ecf20Sopenharmony_ci 19708c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 19718c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 19728c2ecf20Sopenharmony_ci} 19738c2ecf20Sopenharmony_ci 19748c2ecf20Sopenharmony_cistatic void 19758c2ecf20Sopenharmony_ciops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu, 19768c2ecf20Sopenharmony_ci struct dma_async_tx_descriptor *tx) 19778c2ecf20Sopenharmony_ci{ 19788c2ecf20Sopenharmony_ci int disks = sh->disks; 19798c2ecf20Sopenharmony_ci struct page **xor_srcs; 19808c2ecf20Sopenharmony_ci unsigned int *off_srcs; 19818c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 19828c2ecf20Sopenharmony_ci int count, pd_idx = sh->pd_idx, i; 19838c2ecf20Sopenharmony_ci struct page *xor_dest; 19848c2ecf20Sopenharmony_ci unsigned int off_dest; 19858c2ecf20Sopenharmony_ci int prexor = 0; 19868c2ecf20Sopenharmony_ci unsigned long flags; 19878c2ecf20Sopenharmony_ci int j = 0; 19888c2ecf20Sopenharmony_ci struct stripe_head *head_sh = sh; 19898c2ecf20Sopenharmony_ci int last_stripe; 19908c2ecf20Sopenharmony_ci 19918c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, 19928c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 19938c2ecf20Sopenharmony_ci 19948c2ecf20Sopenharmony_ci for (i = 0; i < sh->disks; i++) { 19958c2ecf20Sopenharmony_ci if (pd_idx == i) 19968c2ecf20Sopenharmony_ci continue; 19978c2ecf20Sopenharmony_ci if (!test_bit(R5_Discard, &sh->dev[i].flags)) 19988c2ecf20Sopenharmony_ci break; 19998c2ecf20Sopenharmony_ci } 20008c2ecf20Sopenharmony_ci if (i >= sh->disks) { 20018c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 20028c2ecf20Sopenharmony_ci set_bit(R5_Discard, &sh->dev[pd_idx].flags); 20038c2ecf20Sopenharmony_ci ops_complete_reconstruct(sh); 20048c2ecf20Sopenharmony_ci return; 20058c2ecf20Sopenharmony_ci } 20068c2ecf20Sopenharmony_ciagain: 20078c2ecf20Sopenharmony_ci count = 0; 20088c2ecf20Sopenharmony_ci xor_srcs = to_addr_page(percpu, j); 20098c2ecf20Sopenharmony_ci off_srcs = to_addr_offs(sh, percpu); 20108c2ecf20Sopenharmony_ci /* check if prexor is active which means only process blocks 20118c2ecf20Sopenharmony_ci * that are part of a read-modify-write (written) 20128c2ecf20Sopenharmony_ci */ 20138c2ecf20Sopenharmony_ci if (head_sh->reconstruct_state == reconstruct_state_prexor_drain_run) { 20148c2ecf20Sopenharmony_ci prexor = 1; 20158c2ecf20Sopenharmony_ci off_dest = off_srcs[count] = sh->dev[pd_idx].offset; 20168c2ecf20Sopenharmony_ci xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 20178c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 20188c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 20198c2ecf20Sopenharmony_ci if (head_sh->dev[i].written || 20208c2ecf20Sopenharmony_ci test_bit(R5_InJournal, &head_sh->dev[i].flags)) { 20218c2ecf20Sopenharmony_ci off_srcs[count] = dev->offset; 20228c2ecf20Sopenharmony_ci xor_srcs[count++] = dev->page; 20238c2ecf20Sopenharmony_ci } 20248c2ecf20Sopenharmony_ci } 20258c2ecf20Sopenharmony_ci } else { 20268c2ecf20Sopenharmony_ci xor_dest = sh->dev[pd_idx].page; 20278c2ecf20Sopenharmony_ci off_dest = sh->dev[pd_idx].offset; 20288c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 20298c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 20308c2ecf20Sopenharmony_ci if (i != pd_idx) { 20318c2ecf20Sopenharmony_ci off_srcs[count] = dev->offset; 20328c2ecf20Sopenharmony_ci xor_srcs[count++] = dev->page; 20338c2ecf20Sopenharmony_ci } 20348c2ecf20Sopenharmony_ci } 20358c2ecf20Sopenharmony_ci } 20368c2ecf20Sopenharmony_ci 20378c2ecf20Sopenharmony_ci /* 1/ if we prexor'd then the dest is reused as a source 20388c2ecf20Sopenharmony_ci * 2/ if we did not prexor then we are redoing the parity 20398c2ecf20Sopenharmony_ci * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST 20408c2ecf20Sopenharmony_ci * for the synchronous xor case 20418c2ecf20Sopenharmony_ci */ 20428c2ecf20Sopenharmony_ci last_stripe = !head_sh->batch_head || 20438c2ecf20Sopenharmony_ci list_first_entry(&sh->batch_list, 20448c2ecf20Sopenharmony_ci struct stripe_head, batch_list) == head_sh; 20458c2ecf20Sopenharmony_ci if (last_stripe) { 20468c2ecf20Sopenharmony_ci flags = ASYNC_TX_ACK | 20478c2ecf20Sopenharmony_ci (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); 20488c2ecf20Sopenharmony_ci 20498c2ecf20Sopenharmony_ci atomic_inc(&head_sh->count); 20508c2ecf20Sopenharmony_ci init_async_submit(&submit, flags, tx, ops_complete_reconstruct, head_sh, 20518c2ecf20Sopenharmony_ci to_addr_conv(sh, percpu, j)); 20528c2ecf20Sopenharmony_ci } else { 20538c2ecf20Sopenharmony_ci flags = prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST; 20548c2ecf20Sopenharmony_ci init_async_submit(&submit, flags, tx, NULL, NULL, 20558c2ecf20Sopenharmony_ci to_addr_conv(sh, percpu, j)); 20568c2ecf20Sopenharmony_ci } 20578c2ecf20Sopenharmony_ci 20588c2ecf20Sopenharmony_ci if (unlikely(count == 1)) 20598c2ecf20Sopenharmony_ci tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], 20608c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 20618c2ecf20Sopenharmony_ci else 20628c2ecf20Sopenharmony_ci tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, 20638c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 20648c2ecf20Sopenharmony_ci if (!last_stripe) { 20658c2ecf20Sopenharmony_ci j++; 20668c2ecf20Sopenharmony_ci sh = list_first_entry(&sh->batch_list, struct stripe_head, 20678c2ecf20Sopenharmony_ci batch_list); 20688c2ecf20Sopenharmony_ci goto again; 20698c2ecf20Sopenharmony_ci } 20708c2ecf20Sopenharmony_ci} 20718c2ecf20Sopenharmony_ci 20728c2ecf20Sopenharmony_cistatic void 20738c2ecf20Sopenharmony_ciops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu, 20748c2ecf20Sopenharmony_ci struct dma_async_tx_descriptor *tx) 20758c2ecf20Sopenharmony_ci{ 20768c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 20778c2ecf20Sopenharmony_ci struct page **blocks; 20788c2ecf20Sopenharmony_ci unsigned int *offs; 20798c2ecf20Sopenharmony_ci int count, i, j = 0; 20808c2ecf20Sopenharmony_ci struct stripe_head *head_sh = sh; 20818c2ecf20Sopenharmony_ci int last_stripe; 20828c2ecf20Sopenharmony_ci int synflags; 20838c2ecf20Sopenharmony_ci unsigned long txflags; 20848c2ecf20Sopenharmony_ci 20858c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); 20868c2ecf20Sopenharmony_ci 20878c2ecf20Sopenharmony_ci for (i = 0; i < sh->disks; i++) { 20888c2ecf20Sopenharmony_ci if (sh->pd_idx == i || sh->qd_idx == i) 20898c2ecf20Sopenharmony_ci continue; 20908c2ecf20Sopenharmony_ci if (!test_bit(R5_Discard, &sh->dev[i].flags)) 20918c2ecf20Sopenharmony_ci break; 20928c2ecf20Sopenharmony_ci } 20938c2ecf20Sopenharmony_ci if (i >= sh->disks) { 20948c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 20958c2ecf20Sopenharmony_ci set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); 20968c2ecf20Sopenharmony_ci set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); 20978c2ecf20Sopenharmony_ci ops_complete_reconstruct(sh); 20988c2ecf20Sopenharmony_ci return; 20998c2ecf20Sopenharmony_ci } 21008c2ecf20Sopenharmony_ci 21018c2ecf20Sopenharmony_ciagain: 21028c2ecf20Sopenharmony_ci blocks = to_addr_page(percpu, j); 21038c2ecf20Sopenharmony_ci offs = to_addr_offs(sh, percpu); 21048c2ecf20Sopenharmony_ci 21058c2ecf20Sopenharmony_ci if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { 21068c2ecf20Sopenharmony_ci synflags = SYNDROME_SRC_WRITTEN; 21078c2ecf20Sopenharmony_ci txflags = ASYNC_TX_ACK | ASYNC_TX_PQ_XOR_DST; 21088c2ecf20Sopenharmony_ci } else { 21098c2ecf20Sopenharmony_ci synflags = SYNDROME_SRC_ALL; 21108c2ecf20Sopenharmony_ci txflags = ASYNC_TX_ACK; 21118c2ecf20Sopenharmony_ci } 21128c2ecf20Sopenharmony_ci 21138c2ecf20Sopenharmony_ci count = set_syndrome_sources(blocks, offs, sh, synflags); 21148c2ecf20Sopenharmony_ci last_stripe = !head_sh->batch_head || 21158c2ecf20Sopenharmony_ci list_first_entry(&sh->batch_list, 21168c2ecf20Sopenharmony_ci struct stripe_head, batch_list) == head_sh; 21178c2ecf20Sopenharmony_ci 21188c2ecf20Sopenharmony_ci if (last_stripe) { 21198c2ecf20Sopenharmony_ci atomic_inc(&head_sh->count); 21208c2ecf20Sopenharmony_ci init_async_submit(&submit, txflags, tx, ops_complete_reconstruct, 21218c2ecf20Sopenharmony_ci head_sh, to_addr_conv(sh, percpu, j)); 21228c2ecf20Sopenharmony_ci } else 21238c2ecf20Sopenharmony_ci init_async_submit(&submit, 0, tx, NULL, NULL, 21248c2ecf20Sopenharmony_ci to_addr_conv(sh, percpu, j)); 21258c2ecf20Sopenharmony_ci tx = async_gen_syndrome(blocks, offs, count+2, 21268c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 21278c2ecf20Sopenharmony_ci if (!last_stripe) { 21288c2ecf20Sopenharmony_ci j++; 21298c2ecf20Sopenharmony_ci sh = list_first_entry(&sh->batch_list, struct stripe_head, 21308c2ecf20Sopenharmony_ci batch_list); 21318c2ecf20Sopenharmony_ci goto again; 21328c2ecf20Sopenharmony_ci } 21338c2ecf20Sopenharmony_ci} 21348c2ecf20Sopenharmony_ci 21358c2ecf20Sopenharmony_cistatic void ops_complete_check(void *stripe_head_ref) 21368c2ecf20Sopenharmony_ci{ 21378c2ecf20Sopenharmony_ci struct stripe_head *sh = stripe_head_ref; 21388c2ecf20Sopenharmony_ci 21398c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, 21408c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 21418c2ecf20Sopenharmony_ci 21428c2ecf20Sopenharmony_ci sh->check_state = check_state_check_result; 21438c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 21448c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 21458c2ecf20Sopenharmony_ci} 21468c2ecf20Sopenharmony_ci 21478c2ecf20Sopenharmony_cistatic void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) 21488c2ecf20Sopenharmony_ci{ 21498c2ecf20Sopenharmony_ci int disks = sh->disks; 21508c2ecf20Sopenharmony_ci int pd_idx = sh->pd_idx; 21518c2ecf20Sopenharmony_ci int qd_idx = sh->qd_idx; 21528c2ecf20Sopenharmony_ci struct page *xor_dest; 21538c2ecf20Sopenharmony_ci unsigned int off_dest; 21548c2ecf20Sopenharmony_ci struct page **xor_srcs = to_addr_page(percpu, 0); 21558c2ecf20Sopenharmony_ci unsigned int *off_srcs = to_addr_offs(sh, percpu); 21568c2ecf20Sopenharmony_ci struct dma_async_tx_descriptor *tx; 21578c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 21588c2ecf20Sopenharmony_ci int count; 21598c2ecf20Sopenharmony_ci int i; 21608c2ecf20Sopenharmony_ci 21618c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu\n", __func__, 21628c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 21638c2ecf20Sopenharmony_ci 21648c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 21658c2ecf20Sopenharmony_ci count = 0; 21668c2ecf20Sopenharmony_ci xor_dest = sh->dev[pd_idx].page; 21678c2ecf20Sopenharmony_ci off_dest = sh->dev[pd_idx].offset; 21688c2ecf20Sopenharmony_ci off_srcs[count] = off_dest; 21698c2ecf20Sopenharmony_ci xor_srcs[count++] = xor_dest; 21708c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 21718c2ecf20Sopenharmony_ci if (i == pd_idx || i == qd_idx) 21728c2ecf20Sopenharmony_ci continue; 21738c2ecf20Sopenharmony_ci off_srcs[count] = sh->dev[i].offset; 21748c2ecf20Sopenharmony_ci xor_srcs[count++] = sh->dev[i].page; 21758c2ecf20Sopenharmony_ci } 21768c2ecf20Sopenharmony_ci 21778c2ecf20Sopenharmony_ci init_async_submit(&submit, 0, NULL, NULL, NULL, 21788c2ecf20Sopenharmony_ci to_addr_conv(sh, percpu, 0)); 21798c2ecf20Sopenharmony_ci tx = async_xor_val_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, 21808c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), 21818c2ecf20Sopenharmony_ci &sh->ops.zero_sum_result, &submit); 21828c2ecf20Sopenharmony_ci 21838c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 21848c2ecf20Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); 21858c2ecf20Sopenharmony_ci tx = async_trigger_callback(&submit); 21868c2ecf20Sopenharmony_ci} 21878c2ecf20Sopenharmony_ci 21888c2ecf20Sopenharmony_cistatic void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp) 21898c2ecf20Sopenharmony_ci{ 21908c2ecf20Sopenharmony_ci struct page **srcs = to_addr_page(percpu, 0); 21918c2ecf20Sopenharmony_ci unsigned int *offs = to_addr_offs(sh, percpu); 21928c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 21938c2ecf20Sopenharmony_ci int count; 21948c2ecf20Sopenharmony_ci 21958c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu checkp: %d\n", __func__, 21968c2ecf20Sopenharmony_ci (unsigned long long)sh->sector, checkp); 21978c2ecf20Sopenharmony_ci 21988c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 21998c2ecf20Sopenharmony_ci count = set_syndrome_sources(srcs, offs, sh, SYNDROME_SRC_ALL); 22008c2ecf20Sopenharmony_ci if (!checkp) 22018c2ecf20Sopenharmony_ci srcs[count] = NULL; 22028c2ecf20Sopenharmony_ci 22038c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 22048c2ecf20Sopenharmony_ci init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check, 22058c2ecf20Sopenharmony_ci sh, to_addr_conv(sh, percpu, 0)); 22068c2ecf20Sopenharmony_ci async_syndrome_val(srcs, offs, count+2, 22078c2ecf20Sopenharmony_ci RAID5_STRIPE_SIZE(sh->raid_conf), 22088c2ecf20Sopenharmony_ci &sh->ops.zero_sum_result, percpu->spare_page, 0, &submit); 22098c2ecf20Sopenharmony_ci} 22108c2ecf20Sopenharmony_ci 22118c2ecf20Sopenharmony_cistatic void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) 22128c2ecf20Sopenharmony_ci{ 22138c2ecf20Sopenharmony_ci int overlap_clear = 0, i, disks = sh->disks; 22148c2ecf20Sopenharmony_ci struct dma_async_tx_descriptor *tx = NULL; 22158c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 22168c2ecf20Sopenharmony_ci int level = conf->level; 22178c2ecf20Sopenharmony_ci struct raid5_percpu *percpu; 22188c2ecf20Sopenharmony_ci unsigned long cpu; 22198c2ecf20Sopenharmony_ci 22208c2ecf20Sopenharmony_ci cpu = get_cpu(); 22218c2ecf20Sopenharmony_ci percpu = per_cpu_ptr(conf->percpu, cpu); 22228c2ecf20Sopenharmony_ci if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { 22238c2ecf20Sopenharmony_ci ops_run_biofill(sh); 22248c2ecf20Sopenharmony_ci overlap_clear++; 22258c2ecf20Sopenharmony_ci } 22268c2ecf20Sopenharmony_ci 22278c2ecf20Sopenharmony_ci if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { 22288c2ecf20Sopenharmony_ci if (level < 6) 22298c2ecf20Sopenharmony_ci tx = ops_run_compute5(sh, percpu); 22308c2ecf20Sopenharmony_ci else { 22318c2ecf20Sopenharmony_ci if (sh->ops.target2 < 0 || sh->ops.target < 0) 22328c2ecf20Sopenharmony_ci tx = ops_run_compute6_1(sh, percpu); 22338c2ecf20Sopenharmony_ci else 22348c2ecf20Sopenharmony_ci tx = ops_run_compute6_2(sh, percpu); 22358c2ecf20Sopenharmony_ci } 22368c2ecf20Sopenharmony_ci /* terminate the chain if reconstruct is not set to be run */ 22378c2ecf20Sopenharmony_ci if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) 22388c2ecf20Sopenharmony_ci async_tx_ack(tx); 22398c2ecf20Sopenharmony_ci } 22408c2ecf20Sopenharmony_ci 22418c2ecf20Sopenharmony_ci if (test_bit(STRIPE_OP_PREXOR, &ops_request)) { 22428c2ecf20Sopenharmony_ci if (level < 6) 22438c2ecf20Sopenharmony_ci tx = ops_run_prexor5(sh, percpu, tx); 22448c2ecf20Sopenharmony_ci else 22458c2ecf20Sopenharmony_ci tx = ops_run_prexor6(sh, percpu, tx); 22468c2ecf20Sopenharmony_ci } 22478c2ecf20Sopenharmony_ci 22488c2ecf20Sopenharmony_ci if (test_bit(STRIPE_OP_PARTIAL_PARITY, &ops_request)) 22498c2ecf20Sopenharmony_ci tx = ops_run_partial_parity(sh, percpu, tx); 22508c2ecf20Sopenharmony_ci 22518c2ecf20Sopenharmony_ci if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { 22528c2ecf20Sopenharmony_ci tx = ops_run_biodrain(sh, tx); 22538c2ecf20Sopenharmony_ci overlap_clear++; 22548c2ecf20Sopenharmony_ci } 22558c2ecf20Sopenharmony_ci 22568c2ecf20Sopenharmony_ci if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) { 22578c2ecf20Sopenharmony_ci if (level < 6) 22588c2ecf20Sopenharmony_ci ops_run_reconstruct5(sh, percpu, tx); 22598c2ecf20Sopenharmony_ci else 22608c2ecf20Sopenharmony_ci ops_run_reconstruct6(sh, percpu, tx); 22618c2ecf20Sopenharmony_ci } 22628c2ecf20Sopenharmony_ci 22638c2ecf20Sopenharmony_ci if (test_bit(STRIPE_OP_CHECK, &ops_request)) { 22648c2ecf20Sopenharmony_ci if (sh->check_state == check_state_run) 22658c2ecf20Sopenharmony_ci ops_run_check_p(sh, percpu); 22668c2ecf20Sopenharmony_ci else if (sh->check_state == check_state_run_q) 22678c2ecf20Sopenharmony_ci ops_run_check_pq(sh, percpu, 0); 22688c2ecf20Sopenharmony_ci else if (sh->check_state == check_state_run_pq) 22698c2ecf20Sopenharmony_ci ops_run_check_pq(sh, percpu, 1); 22708c2ecf20Sopenharmony_ci else 22718c2ecf20Sopenharmony_ci BUG(); 22728c2ecf20Sopenharmony_ci } 22738c2ecf20Sopenharmony_ci 22748c2ecf20Sopenharmony_ci if (overlap_clear && !sh->batch_head) 22758c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 22768c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 22778c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_Overlap, &dev->flags)) 22788c2ecf20Sopenharmony_ci wake_up(&sh->raid_conf->wait_for_overlap); 22798c2ecf20Sopenharmony_ci } 22808c2ecf20Sopenharmony_ci put_cpu(); 22818c2ecf20Sopenharmony_ci} 22828c2ecf20Sopenharmony_ci 22838c2ecf20Sopenharmony_cistatic void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) 22848c2ecf20Sopenharmony_ci{ 22858c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE 22868c2ecf20Sopenharmony_ci kfree(sh->pages); 22878c2ecf20Sopenharmony_ci#endif 22888c2ecf20Sopenharmony_ci if (sh->ppl_page) 22898c2ecf20Sopenharmony_ci __free_page(sh->ppl_page); 22908c2ecf20Sopenharmony_ci kmem_cache_free(sc, sh); 22918c2ecf20Sopenharmony_ci} 22928c2ecf20Sopenharmony_ci 22938c2ecf20Sopenharmony_cistatic struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, 22948c2ecf20Sopenharmony_ci int disks, struct r5conf *conf) 22958c2ecf20Sopenharmony_ci{ 22968c2ecf20Sopenharmony_ci struct stripe_head *sh; 22978c2ecf20Sopenharmony_ci int i; 22988c2ecf20Sopenharmony_ci 22998c2ecf20Sopenharmony_ci sh = kmem_cache_zalloc(sc, gfp); 23008c2ecf20Sopenharmony_ci if (sh) { 23018c2ecf20Sopenharmony_ci spin_lock_init(&sh->stripe_lock); 23028c2ecf20Sopenharmony_ci spin_lock_init(&sh->batch_lock); 23038c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&sh->batch_list); 23048c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&sh->lru); 23058c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&sh->r5c); 23068c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&sh->log_list); 23078c2ecf20Sopenharmony_ci atomic_set(&sh->count, 1); 23088c2ecf20Sopenharmony_ci sh->raid_conf = conf; 23098c2ecf20Sopenharmony_ci sh->log_start = MaxSector; 23108c2ecf20Sopenharmony_ci for (i = 0; i < disks; i++) { 23118c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 23128c2ecf20Sopenharmony_ci 23138c2ecf20Sopenharmony_ci bio_init(&dev->req, &dev->vec, 1); 23148c2ecf20Sopenharmony_ci bio_init(&dev->rreq, &dev->rvec, 1); 23158c2ecf20Sopenharmony_ci } 23168c2ecf20Sopenharmony_ci 23178c2ecf20Sopenharmony_ci if (raid5_has_ppl(conf)) { 23188c2ecf20Sopenharmony_ci sh->ppl_page = alloc_page(gfp); 23198c2ecf20Sopenharmony_ci if (!sh->ppl_page) { 23208c2ecf20Sopenharmony_ci free_stripe(sc, sh); 23218c2ecf20Sopenharmony_ci return NULL; 23228c2ecf20Sopenharmony_ci } 23238c2ecf20Sopenharmony_ci } 23248c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE 23258c2ecf20Sopenharmony_ci if (init_stripe_shared_pages(sh, conf, disks)) { 23268c2ecf20Sopenharmony_ci free_stripe(sc, sh); 23278c2ecf20Sopenharmony_ci return NULL; 23288c2ecf20Sopenharmony_ci } 23298c2ecf20Sopenharmony_ci#endif 23308c2ecf20Sopenharmony_ci } 23318c2ecf20Sopenharmony_ci return sh; 23328c2ecf20Sopenharmony_ci} 23338c2ecf20Sopenharmony_cistatic int grow_one_stripe(struct r5conf *conf, gfp_t gfp) 23348c2ecf20Sopenharmony_ci{ 23358c2ecf20Sopenharmony_ci struct stripe_head *sh; 23368c2ecf20Sopenharmony_ci 23378c2ecf20Sopenharmony_ci sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf); 23388c2ecf20Sopenharmony_ci if (!sh) 23398c2ecf20Sopenharmony_ci return 0; 23408c2ecf20Sopenharmony_ci 23418c2ecf20Sopenharmony_ci if (grow_buffers(sh, gfp)) { 23428c2ecf20Sopenharmony_ci shrink_buffers(sh); 23438c2ecf20Sopenharmony_ci free_stripe(conf->slab_cache, sh); 23448c2ecf20Sopenharmony_ci return 0; 23458c2ecf20Sopenharmony_ci } 23468c2ecf20Sopenharmony_ci sh->hash_lock_index = 23478c2ecf20Sopenharmony_ci conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS; 23488c2ecf20Sopenharmony_ci /* we just created an active stripe so... */ 23498c2ecf20Sopenharmony_ci atomic_inc(&conf->active_stripes); 23508c2ecf20Sopenharmony_ci 23518c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 23528c2ecf20Sopenharmony_ci conf->max_nr_stripes++; 23538c2ecf20Sopenharmony_ci return 1; 23548c2ecf20Sopenharmony_ci} 23558c2ecf20Sopenharmony_ci 23568c2ecf20Sopenharmony_cistatic int grow_stripes(struct r5conf *conf, int num) 23578c2ecf20Sopenharmony_ci{ 23588c2ecf20Sopenharmony_ci struct kmem_cache *sc; 23598c2ecf20Sopenharmony_ci size_t namelen = sizeof(conf->cache_name[0]); 23608c2ecf20Sopenharmony_ci int devs = max(conf->raid_disks, conf->previous_raid_disks); 23618c2ecf20Sopenharmony_ci 23628c2ecf20Sopenharmony_ci if (conf->mddev->gendisk) 23638c2ecf20Sopenharmony_ci snprintf(conf->cache_name[0], namelen, 23648c2ecf20Sopenharmony_ci "raid%d-%s", conf->level, mdname(conf->mddev)); 23658c2ecf20Sopenharmony_ci else 23668c2ecf20Sopenharmony_ci snprintf(conf->cache_name[0], namelen, 23678c2ecf20Sopenharmony_ci "raid%d-%p", conf->level, conf->mddev); 23688c2ecf20Sopenharmony_ci snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]); 23698c2ecf20Sopenharmony_ci 23708c2ecf20Sopenharmony_ci conf->active_name = 0; 23718c2ecf20Sopenharmony_ci sc = kmem_cache_create(conf->cache_name[conf->active_name], 23728c2ecf20Sopenharmony_ci sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), 23738c2ecf20Sopenharmony_ci 0, 0, NULL); 23748c2ecf20Sopenharmony_ci if (!sc) 23758c2ecf20Sopenharmony_ci return 1; 23768c2ecf20Sopenharmony_ci conf->slab_cache = sc; 23778c2ecf20Sopenharmony_ci conf->pool_size = devs; 23788c2ecf20Sopenharmony_ci while (num--) 23798c2ecf20Sopenharmony_ci if (!grow_one_stripe(conf, GFP_KERNEL)) 23808c2ecf20Sopenharmony_ci return 1; 23818c2ecf20Sopenharmony_ci 23828c2ecf20Sopenharmony_ci return 0; 23838c2ecf20Sopenharmony_ci} 23848c2ecf20Sopenharmony_ci 23858c2ecf20Sopenharmony_ci/** 23868c2ecf20Sopenharmony_ci * scribble_alloc - allocate percpu scribble buffer for required size 23878c2ecf20Sopenharmony_ci * of the scribble region 23888c2ecf20Sopenharmony_ci * @percpu: from for_each_present_cpu() of the caller 23898c2ecf20Sopenharmony_ci * @num: total number of disks in the array 23908c2ecf20Sopenharmony_ci * @cnt: scribble objs count for required size of the scribble region 23918c2ecf20Sopenharmony_ci * 23928c2ecf20Sopenharmony_ci * The scribble buffer size must be enough to contain: 23938c2ecf20Sopenharmony_ci * 1/ a struct page pointer for each device in the array +2 23948c2ecf20Sopenharmony_ci * 2/ room to convert each entry in (1) to its corresponding dma 23958c2ecf20Sopenharmony_ci * (dma_map_page()) or page (page_address()) address. 23968c2ecf20Sopenharmony_ci * 23978c2ecf20Sopenharmony_ci * Note: the +2 is for the destination buffers of the ddf/raid6 case where we 23988c2ecf20Sopenharmony_ci * calculate over all devices (not just the data blocks), using zeros in place 23998c2ecf20Sopenharmony_ci * of the P and Q blocks. 24008c2ecf20Sopenharmony_ci */ 24018c2ecf20Sopenharmony_cistatic int scribble_alloc(struct raid5_percpu *percpu, 24028c2ecf20Sopenharmony_ci int num, int cnt) 24038c2ecf20Sopenharmony_ci{ 24048c2ecf20Sopenharmony_ci size_t obj_size = 24058c2ecf20Sopenharmony_ci sizeof(struct page *) * (num + 2) + 24068c2ecf20Sopenharmony_ci sizeof(addr_conv_t) * (num + 2) + 24078c2ecf20Sopenharmony_ci sizeof(unsigned int) * (num + 2); 24088c2ecf20Sopenharmony_ci void *scribble; 24098c2ecf20Sopenharmony_ci 24108c2ecf20Sopenharmony_ci /* 24118c2ecf20Sopenharmony_ci * If here is in raid array suspend context, it is in memalloc noio 24128c2ecf20Sopenharmony_ci * context as well, there is no potential recursive memory reclaim 24138c2ecf20Sopenharmony_ci * I/Os with the GFP_KERNEL flag. 24148c2ecf20Sopenharmony_ci */ 24158c2ecf20Sopenharmony_ci scribble = kvmalloc_array(cnt, obj_size, GFP_KERNEL); 24168c2ecf20Sopenharmony_ci if (!scribble) 24178c2ecf20Sopenharmony_ci return -ENOMEM; 24188c2ecf20Sopenharmony_ci 24198c2ecf20Sopenharmony_ci kvfree(percpu->scribble); 24208c2ecf20Sopenharmony_ci 24218c2ecf20Sopenharmony_ci percpu->scribble = scribble; 24228c2ecf20Sopenharmony_ci percpu->scribble_obj_size = obj_size; 24238c2ecf20Sopenharmony_ci return 0; 24248c2ecf20Sopenharmony_ci} 24258c2ecf20Sopenharmony_ci 24268c2ecf20Sopenharmony_cistatic int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) 24278c2ecf20Sopenharmony_ci{ 24288c2ecf20Sopenharmony_ci unsigned long cpu; 24298c2ecf20Sopenharmony_ci int err = 0; 24308c2ecf20Sopenharmony_ci 24318c2ecf20Sopenharmony_ci /* 24328c2ecf20Sopenharmony_ci * Never shrink. And mddev_suspend() could deadlock if this is called 24338c2ecf20Sopenharmony_ci * from raid5d. In that case, scribble_disks and scribble_sectors 24348c2ecf20Sopenharmony_ci * should equal to new_disks and new_sectors 24358c2ecf20Sopenharmony_ci */ 24368c2ecf20Sopenharmony_ci if (conf->scribble_disks >= new_disks && 24378c2ecf20Sopenharmony_ci conf->scribble_sectors >= new_sectors) 24388c2ecf20Sopenharmony_ci return 0; 24398c2ecf20Sopenharmony_ci mddev_suspend(conf->mddev); 24408c2ecf20Sopenharmony_ci get_online_cpus(); 24418c2ecf20Sopenharmony_ci 24428c2ecf20Sopenharmony_ci for_each_present_cpu(cpu) { 24438c2ecf20Sopenharmony_ci struct raid5_percpu *percpu; 24448c2ecf20Sopenharmony_ci 24458c2ecf20Sopenharmony_ci percpu = per_cpu_ptr(conf->percpu, cpu); 24468c2ecf20Sopenharmony_ci err = scribble_alloc(percpu, new_disks, 24478c2ecf20Sopenharmony_ci new_sectors / RAID5_STRIPE_SECTORS(conf)); 24488c2ecf20Sopenharmony_ci if (err) 24498c2ecf20Sopenharmony_ci break; 24508c2ecf20Sopenharmony_ci } 24518c2ecf20Sopenharmony_ci 24528c2ecf20Sopenharmony_ci put_online_cpus(); 24538c2ecf20Sopenharmony_ci mddev_resume(conf->mddev); 24548c2ecf20Sopenharmony_ci if (!err) { 24558c2ecf20Sopenharmony_ci conf->scribble_disks = new_disks; 24568c2ecf20Sopenharmony_ci conf->scribble_sectors = new_sectors; 24578c2ecf20Sopenharmony_ci } 24588c2ecf20Sopenharmony_ci return err; 24598c2ecf20Sopenharmony_ci} 24608c2ecf20Sopenharmony_ci 24618c2ecf20Sopenharmony_cistatic int resize_stripes(struct r5conf *conf, int newsize) 24628c2ecf20Sopenharmony_ci{ 24638c2ecf20Sopenharmony_ci /* Make all the stripes able to hold 'newsize' devices. 24648c2ecf20Sopenharmony_ci * New slots in each stripe get 'page' set to a new page. 24658c2ecf20Sopenharmony_ci * 24668c2ecf20Sopenharmony_ci * This happens in stages: 24678c2ecf20Sopenharmony_ci * 1/ create a new kmem_cache and allocate the required number of 24688c2ecf20Sopenharmony_ci * stripe_heads. 24698c2ecf20Sopenharmony_ci * 2/ gather all the old stripe_heads and transfer the pages across 24708c2ecf20Sopenharmony_ci * to the new stripe_heads. This will have the side effect of 24718c2ecf20Sopenharmony_ci * freezing the array as once all stripe_heads have been collected, 24728c2ecf20Sopenharmony_ci * no IO will be possible. Old stripe heads are freed once their 24738c2ecf20Sopenharmony_ci * pages have been transferred over, and the old kmem_cache is 24748c2ecf20Sopenharmony_ci * freed when all stripes are done. 24758c2ecf20Sopenharmony_ci * 3/ reallocate conf->disks to be suitable bigger. If this fails, 24768c2ecf20Sopenharmony_ci * we simple return a failure status - no need to clean anything up. 24778c2ecf20Sopenharmony_ci * 4/ allocate new pages for the new slots in the new stripe_heads. 24788c2ecf20Sopenharmony_ci * If this fails, we don't bother trying the shrink the 24798c2ecf20Sopenharmony_ci * stripe_heads down again, we just leave them as they are. 24808c2ecf20Sopenharmony_ci * As each stripe_head is processed the new one is released into 24818c2ecf20Sopenharmony_ci * active service. 24828c2ecf20Sopenharmony_ci * 24838c2ecf20Sopenharmony_ci * Once step2 is started, we cannot afford to wait for a write, 24848c2ecf20Sopenharmony_ci * so we use GFP_NOIO allocations. 24858c2ecf20Sopenharmony_ci */ 24868c2ecf20Sopenharmony_ci struct stripe_head *osh, *nsh; 24878c2ecf20Sopenharmony_ci LIST_HEAD(newstripes); 24888c2ecf20Sopenharmony_ci struct disk_info *ndisks; 24898c2ecf20Sopenharmony_ci int err = 0; 24908c2ecf20Sopenharmony_ci struct kmem_cache *sc; 24918c2ecf20Sopenharmony_ci int i; 24928c2ecf20Sopenharmony_ci int hash, cnt; 24938c2ecf20Sopenharmony_ci 24948c2ecf20Sopenharmony_ci md_allow_write(conf->mddev); 24958c2ecf20Sopenharmony_ci 24968c2ecf20Sopenharmony_ci /* Step 1 */ 24978c2ecf20Sopenharmony_ci sc = kmem_cache_create(conf->cache_name[1-conf->active_name], 24988c2ecf20Sopenharmony_ci sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), 24998c2ecf20Sopenharmony_ci 0, 0, NULL); 25008c2ecf20Sopenharmony_ci if (!sc) 25018c2ecf20Sopenharmony_ci return -ENOMEM; 25028c2ecf20Sopenharmony_ci 25038c2ecf20Sopenharmony_ci /* Need to ensure auto-resizing doesn't interfere */ 25048c2ecf20Sopenharmony_ci mutex_lock(&conf->cache_size_mutex); 25058c2ecf20Sopenharmony_ci 25068c2ecf20Sopenharmony_ci for (i = conf->max_nr_stripes; i; i--) { 25078c2ecf20Sopenharmony_ci nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf); 25088c2ecf20Sopenharmony_ci if (!nsh) 25098c2ecf20Sopenharmony_ci break; 25108c2ecf20Sopenharmony_ci 25118c2ecf20Sopenharmony_ci list_add(&nsh->lru, &newstripes); 25128c2ecf20Sopenharmony_ci } 25138c2ecf20Sopenharmony_ci if (i) { 25148c2ecf20Sopenharmony_ci /* didn't get enough, give up */ 25158c2ecf20Sopenharmony_ci while (!list_empty(&newstripes)) { 25168c2ecf20Sopenharmony_ci nsh = list_entry(newstripes.next, struct stripe_head, lru); 25178c2ecf20Sopenharmony_ci list_del(&nsh->lru); 25188c2ecf20Sopenharmony_ci free_stripe(sc, nsh); 25198c2ecf20Sopenharmony_ci } 25208c2ecf20Sopenharmony_ci kmem_cache_destroy(sc); 25218c2ecf20Sopenharmony_ci mutex_unlock(&conf->cache_size_mutex); 25228c2ecf20Sopenharmony_ci return -ENOMEM; 25238c2ecf20Sopenharmony_ci } 25248c2ecf20Sopenharmony_ci /* Step 2 - Must use GFP_NOIO now. 25258c2ecf20Sopenharmony_ci * OK, we have enough stripes, start collecting inactive 25268c2ecf20Sopenharmony_ci * stripes and copying them over 25278c2ecf20Sopenharmony_ci */ 25288c2ecf20Sopenharmony_ci hash = 0; 25298c2ecf20Sopenharmony_ci cnt = 0; 25308c2ecf20Sopenharmony_ci list_for_each_entry(nsh, &newstripes, lru) { 25318c2ecf20Sopenharmony_ci lock_device_hash_lock(conf, hash); 25328c2ecf20Sopenharmony_ci wait_event_cmd(conf->wait_for_stripe, 25338c2ecf20Sopenharmony_ci !list_empty(conf->inactive_list + hash), 25348c2ecf20Sopenharmony_ci unlock_device_hash_lock(conf, hash), 25358c2ecf20Sopenharmony_ci lock_device_hash_lock(conf, hash)); 25368c2ecf20Sopenharmony_ci osh = get_free_stripe(conf, hash); 25378c2ecf20Sopenharmony_ci unlock_device_hash_lock(conf, hash); 25388c2ecf20Sopenharmony_ci 25398c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE 25408c2ecf20Sopenharmony_ci for (i = 0; i < osh->nr_pages; i++) { 25418c2ecf20Sopenharmony_ci nsh->pages[i] = osh->pages[i]; 25428c2ecf20Sopenharmony_ci osh->pages[i] = NULL; 25438c2ecf20Sopenharmony_ci } 25448c2ecf20Sopenharmony_ci#endif 25458c2ecf20Sopenharmony_ci for(i=0; i<conf->pool_size; i++) { 25468c2ecf20Sopenharmony_ci nsh->dev[i].page = osh->dev[i].page; 25478c2ecf20Sopenharmony_ci nsh->dev[i].orig_page = osh->dev[i].page; 25488c2ecf20Sopenharmony_ci nsh->dev[i].offset = osh->dev[i].offset; 25498c2ecf20Sopenharmony_ci } 25508c2ecf20Sopenharmony_ci nsh->hash_lock_index = hash; 25518c2ecf20Sopenharmony_ci free_stripe(conf->slab_cache, osh); 25528c2ecf20Sopenharmony_ci cnt++; 25538c2ecf20Sopenharmony_ci if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS + 25548c2ecf20Sopenharmony_ci !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) { 25558c2ecf20Sopenharmony_ci hash++; 25568c2ecf20Sopenharmony_ci cnt = 0; 25578c2ecf20Sopenharmony_ci } 25588c2ecf20Sopenharmony_ci } 25598c2ecf20Sopenharmony_ci kmem_cache_destroy(conf->slab_cache); 25608c2ecf20Sopenharmony_ci 25618c2ecf20Sopenharmony_ci /* Step 3. 25628c2ecf20Sopenharmony_ci * At this point, we are holding all the stripes so the array 25638c2ecf20Sopenharmony_ci * is completely stalled, so now is a good time to resize 25648c2ecf20Sopenharmony_ci * conf->disks and the scribble region 25658c2ecf20Sopenharmony_ci */ 25668c2ecf20Sopenharmony_ci ndisks = kcalloc(newsize, sizeof(struct disk_info), GFP_NOIO); 25678c2ecf20Sopenharmony_ci if (ndisks) { 25688c2ecf20Sopenharmony_ci for (i = 0; i < conf->pool_size; i++) 25698c2ecf20Sopenharmony_ci ndisks[i] = conf->disks[i]; 25708c2ecf20Sopenharmony_ci 25718c2ecf20Sopenharmony_ci for (i = conf->pool_size; i < newsize; i++) { 25728c2ecf20Sopenharmony_ci ndisks[i].extra_page = alloc_page(GFP_NOIO); 25738c2ecf20Sopenharmony_ci if (!ndisks[i].extra_page) 25748c2ecf20Sopenharmony_ci err = -ENOMEM; 25758c2ecf20Sopenharmony_ci } 25768c2ecf20Sopenharmony_ci 25778c2ecf20Sopenharmony_ci if (err) { 25788c2ecf20Sopenharmony_ci for (i = conf->pool_size; i < newsize; i++) 25798c2ecf20Sopenharmony_ci if (ndisks[i].extra_page) 25808c2ecf20Sopenharmony_ci put_page(ndisks[i].extra_page); 25818c2ecf20Sopenharmony_ci kfree(ndisks); 25828c2ecf20Sopenharmony_ci } else { 25838c2ecf20Sopenharmony_ci kfree(conf->disks); 25848c2ecf20Sopenharmony_ci conf->disks = ndisks; 25858c2ecf20Sopenharmony_ci } 25868c2ecf20Sopenharmony_ci } else 25878c2ecf20Sopenharmony_ci err = -ENOMEM; 25888c2ecf20Sopenharmony_ci 25898c2ecf20Sopenharmony_ci conf->slab_cache = sc; 25908c2ecf20Sopenharmony_ci conf->active_name = 1-conf->active_name; 25918c2ecf20Sopenharmony_ci 25928c2ecf20Sopenharmony_ci /* Step 4, return new stripes to service */ 25938c2ecf20Sopenharmony_ci while(!list_empty(&newstripes)) { 25948c2ecf20Sopenharmony_ci nsh = list_entry(newstripes.next, struct stripe_head, lru); 25958c2ecf20Sopenharmony_ci list_del_init(&nsh->lru); 25968c2ecf20Sopenharmony_ci 25978c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE 25988c2ecf20Sopenharmony_ci for (i = 0; i < nsh->nr_pages; i++) { 25998c2ecf20Sopenharmony_ci if (nsh->pages[i]) 26008c2ecf20Sopenharmony_ci continue; 26018c2ecf20Sopenharmony_ci nsh->pages[i] = alloc_page(GFP_NOIO); 26028c2ecf20Sopenharmony_ci if (!nsh->pages[i]) 26038c2ecf20Sopenharmony_ci err = -ENOMEM; 26048c2ecf20Sopenharmony_ci } 26058c2ecf20Sopenharmony_ci 26068c2ecf20Sopenharmony_ci for (i = conf->raid_disks; i < newsize; i++) { 26078c2ecf20Sopenharmony_ci if (nsh->dev[i].page) 26088c2ecf20Sopenharmony_ci continue; 26098c2ecf20Sopenharmony_ci nsh->dev[i].page = raid5_get_dev_page(nsh, i); 26108c2ecf20Sopenharmony_ci nsh->dev[i].orig_page = nsh->dev[i].page; 26118c2ecf20Sopenharmony_ci nsh->dev[i].offset = raid5_get_page_offset(nsh, i); 26128c2ecf20Sopenharmony_ci } 26138c2ecf20Sopenharmony_ci#else 26148c2ecf20Sopenharmony_ci for (i=conf->raid_disks; i < newsize; i++) 26158c2ecf20Sopenharmony_ci if (nsh->dev[i].page == NULL) { 26168c2ecf20Sopenharmony_ci struct page *p = alloc_page(GFP_NOIO); 26178c2ecf20Sopenharmony_ci nsh->dev[i].page = p; 26188c2ecf20Sopenharmony_ci nsh->dev[i].orig_page = p; 26198c2ecf20Sopenharmony_ci nsh->dev[i].offset = 0; 26208c2ecf20Sopenharmony_ci if (!p) 26218c2ecf20Sopenharmony_ci err = -ENOMEM; 26228c2ecf20Sopenharmony_ci } 26238c2ecf20Sopenharmony_ci#endif 26248c2ecf20Sopenharmony_ci raid5_release_stripe(nsh); 26258c2ecf20Sopenharmony_ci } 26268c2ecf20Sopenharmony_ci /* critical section pass, GFP_NOIO no longer needed */ 26278c2ecf20Sopenharmony_ci 26288c2ecf20Sopenharmony_ci if (!err) 26298c2ecf20Sopenharmony_ci conf->pool_size = newsize; 26308c2ecf20Sopenharmony_ci mutex_unlock(&conf->cache_size_mutex); 26318c2ecf20Sopenharmony_ci 26328c2ecf20Sopenharmony_ci return err; 26338c2ecf20Sopenharmony_ci} 26348c2ecf20Sopenharmony_ci 26358c2ecf20Sopenharmony_cistatic int drop_one_stripe(struct r5conf *conf) 26368c2ecf20Sopenharmony_ci{ 26378c2ecf20Sopenharmony_ci struct stripe_head *sh; 26388c2ecf20Sopenharmony_ci int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK; 26398c2ecf20Sopenharmony_ci 26408c2ecf20Sopenharmony_ci spin_lock_irq(conf->hash_locks + hash); 26418c2ecf20Sopenharmony_ci sh = get_free_stripe(conf, hash); 26428c2ecf20Sopenharmony_ci spin_unlock_irq(conf->hash_locks + hash); 26438c2ecf20Sopenharmony_ci if (!sh) 26448c2ecf20Sopenharmony_ci return 0; 26458c2ecf20Sopenharmony_ci BUG_ON(atomic_read(&sh->count)); 26468c2ecf20Sopenharmony_ci shrink_buffers(sh); 26478c2ecf20Sopenharmony_ci free_stripe(conf->slab_cache, sh); 26488c2ecf20Sopenharmony_ci atomic_dec(&conf->active_stripes); 26498c2ecf20Sopenharmony_ci conf->max_nr_stripes--; 26508c2ecf20Sopenharmony_ci return 1; 26518c2ecf20Sopenharmony_ci} 26528c2ecf20Sopenharmony_ci 26538c2ecf20Sopenharmony_cistatic void shrink_stripes(struct r5conf *conf) 26548c2ecf20Sopenharmony_ci{ 26558c2ecf20Sopenharmony_ci while (conf->max_nr_stripes && 26568c2ecf20Sopenharmony_ci drop_one_stripe(conf)) 26578c2ecf20Sopenharmony_ci ; 26588c2ecf20Sopenharmony_ci 26598c2ecf20Sopenharmony_ci kmem_cache_destroy(conf->slab_cache); 26608c2ecf20Sopenharmony_ci conf->slab_cache = NULL; 26618c2ecf20Sopenharmony_ci} 26628c2ecf20Sopenharmony_ci 26638c2ecf20Sopenharmony_cistatic void raid5_end_read_request(struct bio * bi) 26648c2ecf20Sopenharmony_ci{ 26658c2ecf20Sopenharmony_ci struct stripe_head *sh = bi->bi_private; 26668c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 26678c2ecf20Sopenharmony_ci int disks = sh->disks, i; 26688c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 26698c2ecf20Sopenharmony_ci struct md_rdev *rdev = NULL; 26708c2ecf20Sopenharmony_ci sector_t s; 26718c2ecf20Sopenharmony_ci 26728c2ecf20Sopenharmony_ci for (i=0 ; i<disks; i++) 26738c2ecf20Sopenharmony_ci if (bi == &sh->dev[i].req) 26748c2ecf20Sopenharmony_ci break; 26758c2ecf20Sopenharmony_ci 26768c2ecf20Sopenharmony_ci pr_debug("end_read_request %llu/%d, count: %d, error %d.\n", 26778c2ecf20Sopenharmony_ci (unsigned long long)sh->sector, i, atomic_read(&sh->count), 26788c2ecf20Sopenharmony_ci bi->bi_status); 26798c2ecf20Sopenharmony_ci if (i == disks) { 26808c2ecf20Sopenharmony_ci bio_reset(bi); 26818c2ecf20Sopenharmony_ci BUG(); 26828c2ecf20Sopenharmony_ci return; 26838c2ecf20Sopenharmony_ci } 26848c2ecf20Sopenharmony_ci if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) 26858c2ecf20Sopenharmony_ci /* If replacement finished while this request was outstanding, 26868c2ecf20Sopenharmony_ci * 'replacement' might be NULL already. 26878c2ecf20Sopenharmony_ci * In that case it moved down to 'rdev'. 26888c2ecf20Sopenharmony_ci * rdev is not removed until all requests are finished. 26898c2ecf20Sopenharmony_ci */ 26908c2ecf20Sopenharmony_ci rdev = conf->disks[i].replacement; 26918c2ecf20Sopenharmony_ci if (!rdev) 26928c2ecf20Sopenharmony_ci rdev = conf->disks[i].rdev; 26938c2ecf20Sopenharmony_ci 26948c2ecf20Sopenharmony_ci if (use_new_offset(conf, sh)) 26958c2ecf20Sopenharmony_ci s = sh->sector + rdev->new_data_offset; 26968c2ecf20Sopenharmony_ci else 26978c2ecf20Sopenharmony_ci s = sh->sector + rdev->data_offset; 26988c2ecf20Sopenharmony_ci if (!bi->bi_status) { 26998c2ecf20Sopenharmony_ci set_bit(R5_UPTODATE, &sh->dev[i].flags); 27008c2ecf20Sopenharmony_ci if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 27018c2ecf20Sopenharmony_ci /* Note that this cannot happen on a 27028c2ecf20Sopenharmony_ci * replacement device. We just fail those on 27038c2ecf20Sopenharmony_ci * any error 27048c2ecf20Sopenharmony_ci */ 27058c2ecf20Sopenharmony_ci pr_info_ratelimited( 27068c2ecf20Sopenharmony_ci "md/raid:%s: read error corrected (%lu sectors at %llu on %s)\n", 27078c2ecf20Sopenharmony_ci mdname(conf->mddev), RAID5_STRIPE_SECTORS(conf), 27088c2ecf20Sopenharmony_ci (unsigned long long)s, 27098c2ecf20Sopenharmony_ci bdevname(rdev->bdev, b)); 27108c2ecf20Sopenharmony_ci atomic_add(RAID5_STRIPE_SECTORS(conf), &rdev->corrected_errors); 27118c2ecf20Sopenharmony_ci clear_bit(R5_ReadError, &sh->dev[i].flags); 27128c2ecf20Sopenharmony_ci clear_bit(R5_ReWrite, &sh->dev[i].flags); 27138c2ecf20Sopenharmony_ci } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) 27148c2ecf20Sopenharmony_ci clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); 27158c2ecf20Sopenharmony_ci 27168c2ecf20Sopenharmony_ci if (test_bit(R5_InJournal, &sh->dev[i].flags)) 27178c2ecf20Sopenharmony_ci /* 27188c2ecf20Sopenharmony_ci * end read for a page in journal, this 27198c2ecf20Sopenharmony_ci * must be preparing for prexor in rmw 27208c2ecf20Sopenharmony_ci */ 27218c2ecf20Sopenharmony_ci set_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags); 27228c2ecf20Sopenharmony_ci 27238c2ecf20Sopenharmony_ci if (atomic_read(&rdev->read_errors)) 27248c2ecf20Sopenharmony_ci atomic_set(&rdev->read_errors, 0); 27258c2ecf20Sopenharmony_ci } else { 27268c2ecf20Sopenharmony_ci const char *bdn = bdevname(rdev->bdev, b); 27278c2ecf20Sopenharmony_ci int retry = 0; 27288c2ecf20Sopenharmony_ci int set_bad = 0; 27298c2ecf20Sopenharmony_ci 27308c2ecf20Sopenharmony_ci clear_bit(R5_UPTODATE, &sh->dev[i].flags); 27318c2ecf20Sopenharmony_ci if (!(bi->bi_status == BLK_STS_PROTECTION)) 27328c2ecf20Sopenharmony_ci atomic_inc(&rdev->read_errors); 27338c2ecf20Sopenharmony_ci if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) 27348c2ecf20Sopenharmony_ci pr_warn_ratelimited( 27358c2ecf20Sopenharmony_ci "md/raid:%s: read error on replacement device (sector %llu on %s).\n", 27368c2ecf20Sopenharmony_ci mdname(conf->mddev), 27378c2ecf20Sopenharmony_ci (unsigned long long)s, 27388c2ecf20Sopenharmony_ci bdn); 27398c2ecf20Sopenharmony_ci else if (conf->mddev->degraded >= conf->max_degraded) { 27408c2ecf20Sopenharmony_ci set_bad = 1; 27418c2ecf20Sopenharmony_ci pr_warn_ratelimited( 27428c2ecf20Sopenharmony_ci "md/raid:%s: read error not correctable (sector %llu on %s).\n", 27438c2ecf20Sopenharmony_ci mdname(conf->mddev), 27448c2ecf20Sopenharmony_ci (unsigned long long)s, 27458c2ecf20Sopenharmony_ci bdn); 27468c2ecf20Sopenharmony_ci } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { 27478c2ecf20Sopenharmony_ci /* Oh, no!!! */ 27488c2ecf20Sopenharmony_ci set_bad = 1; 27498c2ecf20Sopenharmony_ci pr_warn_ratelimited( 27508c2ecf20Sopenharmony_ci "md/raid:%s: read error NOT corrected!! (sector %llu on %s).\n", 27518c2ecf20Sopenharmony_ci mdname(conf->mddev), 27528c2ecf20Sopenharmony_ci (unsigned long long)s, 27538c2ecf20Sopenharmony_ci bdn); 27548c2ecf20Sopenharmony_ci } else if (atomic_read(&rdev->read_errors) 27558c2ecf20Sopenharmony_ci > conf->max_nr_stripes) { 27568c2ecf20Sopenharmony_ci if (!test_bit(Faulty, &rdev->flags)) { 27578c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: %d read_errors > %d stripes\n", 27588c2ecf20Sopenharmony_ci mdname(conf->mddev), 27598c2ecf20Sopenharmony_ci atomic_read(&rdev->read_errors), 27608c2ecf20Sopenharmony_ci conf->max_nr_stripes); 27618c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: Too many read errors, failing device %s.\n", 27628c2ecf20Sopenharmony_ci mdname(conf->mddev), bdn); 27638c2ecf20Sopenharmony_ci } 27648c2ecf20Sopenharmony_ci } else 27658c2ecf20Sopenharmony_ci retry = 1; 27668c2ecf20Sopenharmony_ci if (set_bad && test_bit(In_sync, &rdev->flags) 27678c2ecf20Sopenharmony_ci && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) 27688c2ecf20Sopenharmony_ci retry = 1; 27698c2ecf20Sopenharmony_ci if (retry) 27708c2ecf20Sopenharmony_ci if (sh->qd_idx >= 0 && sh->pd_idx == i) 27718c2ecf20Sopenharmony_ci set_bit(R5_ReadError, &sh->dev[i].flags); 27728c2ecf20Sopenharmony_ci else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { 27738c2ecf20Sopenharmony_ci set_bit(R5_ReadError, &sh->dev[i].flags); 27748c2ecf20Sopenharmony_ci clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); 27758c2ecf20Sopenharmony_ci } else 27768c2ecf20Sopenharmony_ci set_bit(R5_ReadNoMerge, &sh->dev[i].flags); 27778c2ecf20Sopenharmony_ci else { 27788c2ecf20Sopenharmony_ci clear_bit(R5_ReadError, &sh->dev[i].flags); 27798c2ecf20Sopenharmony_ci clear_bit(R5_ReWrite, &sh->dev[i].flags); 27808c2ecf20Sopenharmony_ci if (!(set_bad 27818c2ecf20Sopenharmony_ci && test_bit(In_sync, &rdev->flags) 27828c2ecf20Sopenharmony_ci && rdev_set_badblocks( 27838c2ecf20Sopenharmony_ci rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0))) 27848c2ecf20Sopenharmony_ci md_error(conf->mddev, rdev); 27858c2ecf20Sopenharmony_ci } 27868c2ecf20Sopenharmony_ci } 27878c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 27888c2ecf20Sopenharmony_ci bio_reset(bi); 27898c2ecf20Sopenharmony_ci clear_bit(R5_LOCKED, &sh->dev[i].flags); 27908c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 27918c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 27928c2ecf20Sopenharmony_ci} 27938c2ecf20Sopenharmony_ci 27948c2ecf20Sopenharmony_cistatic void raid5_end_write_request(struct bio *bi) 27958c2ecf20Sopenharmony_ci{ 27968c2ecf20Sopenharmony_ci struct stripe_head *sh = bi->bi_private; 27978c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 27988c2ecf20Sopenharmony_ci int disks = sh->disks, i; 27998c2ecf20Sopenharmony_ci struct md_rdev *rdev; 28008c2ecf20Sopenharmony_ci sector_t first_bad; 28018c2ecf20Sopenharmony_ci int bad_sectors; 28028c2ecf20Sopenharmony_ci int replacement = 0; 28038c2ecf20Sopenharmony_ci 28048c2ecf20Sopenharmony_ci for (i = 0 ; i < disks; i++) { 28058c2ecf20Sopenharmony_ci if (bi == &sh->dev[i].req) { 28068c2ecf20Sopenharmony_ci rdev = conf->disks[i].rdev; 28078c2ecf20Sopenharmony_ci break; 28088c2ecf20Sopenharmony_ci } 28098c2ecf20Sopenharmony_ci if (bi == &sh->dev[i].rreq) { 28108c2ecf20Sopenharmony_ci rdev = conf->disks[i].replacement; 28118c2ecf20Sopenharmony_ci if (rdev) 28128c2ecf20Sopenharmony_ci replacement = 1; 28138c2ecf20Sopenharmony_ci else 28148c2ecf20Sopenharmony_ci /* rdev was removed and 'replacement' 28158c2ecf20Sopenharmony_ci * replaced it. rdev is not removed 28168c2ecf20Sopenharmony_ci * until all requests are finished. 28178c2ecf20Sopenharmony_ci */ 28188c2ecf20Sopenharmony_ci rdev = conf->disks[i].rdev; 28198c2ecf20Sopenharmony_ci break; 28208c2ecf20Sopenharmony_ci } 28218c2ecf20Sopenharmony_ci } 28228c2ecf20Sopenharmony_ci pr_debug("end_write_request %llu/%d, count %d, error: %d.\n", 28238c2ecf20Sopenharmony_ci (unsigned long long)sh->sector, i, atomic_read(&sh->count), 28248c2ecf20Sopenharmony_ci bi->bi_status); 28258c2ecf20Sopenharmony_ci if (i == disks) { 28268c2ecf20Sopenharmony_ci bio_reset(bi); 28278c2ecf20Sopenharmony_ci BUG(); 28288c2ecf20Sopenharmony_ci return; 28298c2ecf20Sopenharmony_ci } 28308c2ecf20Sopenharmony_ci 28318c2ecf20Sopenharmony_ci if (replacement) { 28328c2ecf20Sopenharmony_ci if (bi->bi_status) 28338c2ecf20Sopenharmony_ci md_error(conf->mddev, rdev); 28348c2ecf20Sopenharmony_ci else if (is_badblock(rdev, sh->sector, 28358c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 28368c2ecf20Sopenharmony_ci &first_bad, &bad_sectors)) 28378c2ecf20Sopenharmony_ci set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); 28388c2ecf20Sopenharmony_ci } else { 28398c2ecf20Sopenharmony_ci if (bi->bi_status) { 28408c2ecf20Sopenharmony_ci set_bit(STRIPE_DEGRADED, &sh->state); 28418c2ecf20Sopenharmony_ci set_bit(WriteErrorSeen, &rdev->flags); 28428c2ecf20Sopenharmony_ci set_bit(R5_WriteError, &sh->dev[i].flags); 28438c2ecf20Sopenharmony_ci if (!test_and_set_bit(WantReplacement, &rdev->flags)) 28448c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_NEEDED, 28458c2ecf20Sopenharmony_ci &rdev->mddev->recovery); 28468c2ecf20Sopenharmony_ci } else if (is_badblock(rdev, sh->sector, 28478c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 28488c2ecf20Sopenharmony_ci &first_bad, &bad_sectors)) { 28498c2ecf20Sopenharmony_ci set_bit(R5_MadeGood, &sh->dev[i].flags); 28508c2ecf20Sopenharmony_ci if (test_bit(R5_ReadError, &sh->dev[i].flags)) 28518c2ecf20Sopenharmony_ci /* That was a successful write so make 28528c2ecf20Sopenharmony_ci * sure it looks like we already did 28538c2ecf20Sopenharmony_ci * a re-write. 28548c2ecf20Sopenharmony_ci */ 28558c2ecf20Sopenharmony_ci set_bit(R5_ReWrite, &sh->dev[i].flags); 28568c2ecf20Sopenharmony_ci } 28578c2ecf20Sopenharmony_ci } 28588c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 28598c2ecf20Sopenharmony_ci 28608c2ecf20Sopenharmony_ci if (sh->batch_head && bi->bi_status && !replacement) 28618c2ecf20Sopenharmony_ci set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); 28628c2ecf20Sopenharmony_ci 28638c2ecf20Sopenharmony_ci bio_reset(bi); 28648c2ecf20Sopenharmony_ci if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) 28658c2ecf20Sopenharmony_ci clear_bit(R5_LOCKED, &sh->dev[i].flags); 28668c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 28678c2ecf20Sopenharmony_ci 28688c2ecf20Sopenharmony_ci if (sh->batch_head && sh != sh->batch_head) 28698c2ecf20Sopenharmony_ci raid5_release_stripe(sh->batch_head); 28708c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 28718c2ecf20Sopenharmony_ci} 28728c2ecf20Sopenharmony_ci 28738c2ecf20Sopenharmony_cistatic void raid5_error(struct mddev *mddev, struct md_rdev *rdev) 28748c2ecf20Sopenharmony_ci{ 28758c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 28768c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 28778c2ecf20Sopenharmony_ci unsigned long flags; 28788c2ecf20Sopenharmony_ci pr_debug("raid456: error called\n"); 28798c2ecf20Sopenharmony_ci 28808c2ecf20Sopenharmony_ci pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n", 28818c2ecf20Sopenharmony_ci mdname(mddev), bdevname(rdev->bdev, b)); 28828c2ecf20Sopenharmony_ci 28838c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 28848c2ecf20Sopenharmony_ci set_bit(Faulty, &rdev->flags); 28858c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 28868c2ecf20Sopenharmony_ci mddev->degraded = raid5_calc_degraded(conf); 28878c2ecf20Sopenharmony_ci 28888c2ecf20Sopenharmony_ci if (has_failed(conf)) { 28898c2ecf20Sopenharmony_ci set_bit(MD_BROKEN, &conf->mddev->flags); 28908c2ecf20Sopenharmony_ci conf->recovery_disabled = mddev->recovery_disabled; 28918c2ecf20Sopenharmony_ci 28928c2ecf20Sopenharmony_ci pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n", 28938c2ecf20Sopenharmony_ci mdname(mddev), mddev->degraded, conf->raid_disks); 28948c2ecf20Sopenharmony_ci } else { 28958c2ecf20Sopenharmony_ci pr_crit("md/raid:%s: Operation continuing on %d devices.\n", 28968c2ecf20Sopenharmony_ci mdname(mddev), conf->raid_disks - mddev->degraded); 28978c2ecf20Sopenharmony_ci } 28988c2ecf20Sopenharmony_ci 28998c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 29008c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_INTR, &mddev->recovery); 29018c2ecf20Sopenharmony_ci 29028c2ecf20Sopenharmony_ci set_bit(Blocked, &rdev->flags); 29038c2ecf20Sopenharmony_ci set_mask_bits(&mddev->sb_flags, 0, 29048c2ecf20Sopenharmony_ci BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING)); 29058c2ecf20Sopenharmony_ci r5c_update_on_rdev_error(mddev, rdev); 29068c2ecf20Sopenharmony_ci} 29078c2ecf20Sopenharmony_ci 29088c2ecf20Sopenharmony_ci/* 29098c2ecf20Sopenharmony_ci * Input: a 'big' sector number, 29108c2ecf20Sopenharmony_ci * Output: index of the data and parity disk, and the sector # in them. 29118c2ecf20Sopenharmony_ci */ 29128c2ecf20Sopenharmony_cisector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, 29138c2ecf20Sopenharmony_ci int previous, int *dd_idx, 29148c2ecf20Sopenharmony_ci struct stripe_head *sh) 29158c2ecf20Sopenharmony_ci{ 29168c2ecf20Sopenharmony_ci sector_t stripe, stripe2; 29178c2ecf20Sopenharmony_ci sector_t chunk_number; 29188c2ecf20Sopenharmony_ci unsigned int chunk_offset; 29198c2ecf20Sopenharmony_ci int pd_idx, qd_idx; 29208c2ecf20Sopenharmony_ci int ddf_layout = 0; 29218c2ecf20Sopenharmony_ci sector_t new_sector; 29228c2ecf20Sopenharmony_ci int algorithm = previous ? conf->prev_algo 29238c2ecf20Sopenharmony_ci : conf->algorithm; 29248c2ecf20Sopenharmony_ci int sectors_per_chunk = previous ? conf->prev_chunk_sectors 29258c2ecf20Sopenharmony_ci : conf->chunk_sectors; 29268c2ecf20Sopenharmony_ci int raid_disks = previous ? conf->previous_raid_disks 29278c2ecf20Sopenharmony_ci : conf->raid_disks; 29288c2ecf20Sopenharmony_ci int data_disks = raid_disks - conf->max_degraded; 29298c2ecf20Sopenharmony_ci 29308c2ecf20Sopenharmony_ci /* First compute the information on this sector */ 29318c2ecf20Sopenharmony_ci 29328c2ecf20Sopenharmony_ci /* 29338c2ecf20Sopenharmony_ci * Compute the chunk number and the sector offset inside the chunk 29348c2ecf20Sopenharmony_ci */ 29358c2ecf20Sopenharmony_ci chunk_offset = sector_div(r_sector, sectors_per_chunk); 29368c2ecf20Sopenharmony_ci chunk_number = r_sector; 29378c2ecf20Sopenharmony_ci 29388c2ecf20Sopenharmony_ci /* 29398c2ecf20Sopenharmony_ci * Compute the stripe number 29408c2ecf20Sopenharmony_ci */ 29418c2ecf20Sopenharmony_ci stripe = chunk_number; 29428c2ecf20Sopenharmony_ci *dd_idx = sector_div(stripe, data_disks); 29438c2ecf20Sopenharmony_ci stripe2 = stripe; 29448c2ecf20Sopenharmony_ci /* 29458c2ecf20Sopenharmony_ci * Select the parity disk based on the user selected algorithm. 29468c2ecf20Sopenharmony_ci */ 29478c2ecf20Sopenharmony_ci pd_idx = qd_idx = -1; 29488c2ecf20Sopenharmony_ci switch(conf->level) { 29498c2ecf20Sopenharmony_ci case 4: 29508c2ecf20Sopenharmony_ci pd_idx = data_disks; 29518c2ecf20Sopenharmony_ci break; 29528c2ecf20Sopenharmony_ci case 5: 29538c2ecf20Sopenharmony_ci switch (algorithm) { 29548c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_ASYMMETRIC: 29558c2ecf20Sopenharmony_ci pd_idx = data_disks - sector_div(stripe2, raid_disks); 29568c2ecf20Sopenharmony_ci if (*dd_idx >= pd_idx) 29578c2ecf20Sopenharmony_ci (*dd_idx)++; 29588c2ecf20Sopenharmony_ci break; 29598c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_ASYMMETRIC: 29608c2ecf20Sopenharmony_ci pd_idx = sector_div(stripe2, raid_disks); 29618c2ecf20Sopenharmony_ci if (*dd_idx >= pd_idx) 29628c2ecf20Sopenharmony_ci (*dd_idx)++; 29638c2ecf20Sopenharmony_ci break; 29648c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_SYMMETRIC: 29658c2ecf20Sopenharmony_ci pd_idx = data_disks - sector_div(stripe2, raid_disks); 29668c2ecf20Sopenharmony_ci *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; 29678c2ecf20Sopenharmony_ci break; 29688c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_SYMMETRIC: 29698c2ecf20Sopenharmony_ci pd_idx = sector_div(stripe2, raid_disks); 29708c2ecf20Sopenharmony_ci *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; 29718c2ecf20Sopenharmony_ci break; 29728c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_0: 29738c2ecf20Sopenharmony_ci pd_idx = 0; 29748c2ecf20Sopenharmony_ci (*dd_idx)++; 29758c2ecf20Sopenharmony_ci break; 29768c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_N: 29778c2ecf20Sopenharmony_ci pd_idx = data_disks; 29788c2ecf20Sopenharmony_ci break; 29798c2ecf20Sopenharmony_ci default: 29808c2ecf20Sopenharmony_ci BUG(); 29818c2ecf20Sopenharmony_ci } 29828c2ecf20Sopenharmony_ci break; 29838c2ecf20Sopenharmony_ci case 6: 29848c2ecf20Sopenharmony_ci 29858c2ecf20Sopenharmony_ci switch (algorithm) { 29868c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_ASYMMETRIC: 29878c2ecf20Sopenharmony_ci pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); 29888c2ecf20Sopenharmony_ci qd_idx = pd_idx + 1; 29898c2ecf20Sopenharmony_ci if (pd_idx == raid_disks-1) { 29908c2ecf20Sopenharmony_ci (*dd_idx)++; /* Q D D D P */ 29918c2ecf20Sopenharmony_ci qd_idx = 0; 29928c2ecf20Sopenharmony_ci } else if (*dd_idx >= pd_idx) 29938c2ecf20Sopenharmony_ci (*dd_idx) += 2; /* D D P Q D */ 29948c2ecf20Sopenharmony_ci break; 29958c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_ASYMMETRIC: 29968c2ecf20Sopenharmony_ci pd_idx = sector_div(stripe2, raid_disks); 29978c2ecf20Sopenharmony_ci qd_idx = pd_idx + 1; 29988c2ecf20Sopenharmony_ci if (pd_idx == raid_disks-1) { 29998c2ecf20Sopenharmony_ci (*dd_idx)++; /* Q D D D P */ 30008c2ecf20Sopenharmony_ci qd_idx = 0; 30018c2ecf20Sopenharmony_ci } else if (*dd_idx >= pd_idx) 30028c2ecf20Sopenharmony_ci (*dd_idx) += 2; /* D D P Q D */ 30038c2ecf20Sopenharmony_ci break; 30048c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_SYMMETRIC: 30058c2ecf20Sopenharmony_ci pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); 30068c2ecf20Sopenharmony_ci qd_idx = (pd_idx + 1) % raid_disks; 30078c2ecf20Sopenharmony_ci *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks; 30088c2ecf20Sopenharmony_ci break; 30098c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_SYMMETRIC: 30108c2ecf20Sopenharmony_ci pd_idx = sector_div(stripe2, raid_disks); 30118c2ecf20Sopenharmony_ci qd_idx = (pd_idx + 1) % raid_disks; 30128c2ecf20Sopenharmony_ci *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks; 30138c2ecf20Sopenharmony_ci break; 30148c2ecf20Sopenharmony_ci 30158c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_0: 30168c2ecf20Sopenharmony_ci pd_idx = 0; 30178c2ecf20Sopenharmony_ci qd_idx = 1; 30188c2ecf20Sopenharmony_ci (*dd_idx) += 2; 30198c2ecf20Sopenharmony_ci break; 30208c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_N: 30218c2ecf20Sopenharmony_ci pd_idx = data_disks; 30228c2ecf20Sopenharmony_ci qd_idx = data_disks + 1; 30238c2ecf20Sopenharmony_ci break; 30248c2ecf20Sopenharmony_ci 30258c2ecf20Sopenharmony_ci case ALGORITHM_ROTATING_ZERO_RESTART: 30268c2ecf20Sopenharmony_ci /* Exactly the same as RIGHT_ASYMMETRIC, but or 30278c2ecf20Sopenharmony_ci * of blocks for computing Q is different. 30288c2ecf20Sopenharmony_ci */ 30298c2ecf20Sopenharmony_ci pd_idx = sector_div(stripe2, raid_disks); 30308c2ecf20Sopenharmony_ci qd_idx = pd_idx + 1; 30318c2ecf20Sopenharmony_ci if (pd_idx == raid_disks-1) { 30328c2ecf20Sopenharmony_ci (*dd_idx)++; /* Q D D D P */ 30338c2ecf20Sopenharmony_ci qd_idx = 0; 30348c2ecf20Sopenharmony_ci } else if (*dd_idx >= pd_idx) 30358c2ecf20Sopenharmony_ci (*dd_idx) += 2; /* D D P Q D */ 30368c2ecf20Sopenharmony_ci ddf_layout = 1; 30378c2ecf20Sopenharmony_ci break; 30388c2ecf20Sopenharmony_ci 30398c2ecf20Sopenharmony_ci case ALGORITHM_ROTATING_N_RESTART: 30408c2ecf20Sopenharmony_ci /* Same a left_asymmetric, by first stripe is 30418c2ecf20Sopenharmony_ci * D D D P Q rather than 30428c2ecf20Sopenharmony_ci * Q D D D P 30438c2ecf20Sopenharmony_ci */ 30448c2ecf20Sopenharmony_ci stripe2 += 1; 30458c2ecf20Sopenharmony_ci pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); 30468c2ecf20Sopenharmony_ci qd_idx = pd_idx + 1; 30478c2ecf20Sopenharmony_ci if (pd_idx == raid_disks-1) { 30488c2ecf20Sopenharmony_ci (*dd_idx)++; /* Q D D D P */ 30498c2ecf20Sopenharmony_ci qd_idx = 0; 30508c2ecf20Sopenharmony_ci } else if (*dd_idx >= pd_idx) 30518c2ecf20Sopenharmony_ci (*dd_idx) += 2; /* D D P Q D */ 30528c2ecf20Sopenharmony_ci ddf_layout = 1; 30538c2ecf20Sopenharmony_ci break; 30548c2ecf20Sopenharmony_ci 30558c2ecf20Sopenharmony_ci case ALGORITHM_ROTATING_N_CONTINUE: 30568c2ecf20Sopenharmony_ci /* Same as left_symmetric but Q is before P */ 30578c2ecf20Sopenharmony_ci pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); 30588c2ecf20Sopenharmony_ci qd_idx = (pd_idx + raid_disks - 1) % raid_disks; 30598c2ecf20Sopenharmony_ci *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; 30608c2ecf20Sopenharmony_ci ddf_layout = 1; 30618c2ecf20Sopenharmony_ci break; 30628c2ecf20Sopenharmony_ci 30638c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_ASYMMETRIC_6: 30648c2ecf20Sopenharmony_ci /* RAID5 left_asymmetric, with Q on last device */ 30658c2ecf20Sopenharmony_ci pd_idx = data_disks - sector_div(stripe2, raid_disks-1); 30668c2ecf20Sopenharmony_ci if (*dd_idx >= pd_idx) 30678c2ecf20Sopenharmony_ci (*dd_idx)++; 30688c2ecf20Sopenharmony_ci qd_idx = raid_disks - 1; 30698c2ecf20Sopenharmony_ci break; 30708c2ecf20Sopenharmony_ci 30718c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_ASYMMETRIC_6: 30728c2ecf20Sopenharmony_ci pd_idx = sector_div(stripe2, raid_disks-1); 30738c2ecf20Sopenharmony_ci if (*dd_idx >= pd_idx) 30748c2ecf20Sopenharmony_ci (*dd_idx)++; 30758c2ecf20Sopenharmony_ci qd_idx = raid_disks - 1; 30768c2ecf20Sopenharmony_ci break; 30778c2ecf20Sopenharmony_ci 30788c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_SYMMETRIC_6: 30798c2ecf20Sopenharmony_ci pd_idx = data_disks - sector_div(stripe2, raid_disks-1); 30808c2ecf20Sopenharmony_ci *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); 30818c2ecf20Sopenharmony_ci qd_idx = raid_disks - 1; 30828c2ecf20Sopenharmony_ci break; 30838c2ecf20Sopenharmony_ci 30848c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_SYMMETRIC_6: 30858c2ecf20Sopenharmony_ci pd_idx = sector_div(stripe2, raid_disks-1); 30868c2ecf20Sopenharmony_ci *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); 30878c2ecf20Sopenharmony_ci qd_idx = raid_disks - 1; 30888c2ecf20Sopenharmony_ci break; 30898c2ecf20Sopenharmony_ci 30908c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_0_6: 30918c2ecf20Sopenharmony_ci pd_idx = 0; 30928c2ecf20Sopenharmony_ci (*dd_idx)++; 30938c2ecf20Sopenharmony_ci qd_idx = raid_disks - 1; 30948c2ecf20Sopenharmony_ci break; 30958c2ecf20Sopenharmony_ci 30968c2ecf20Sopenharmony_ci default: 30978c2ecf20Sopenharmony_ci BUG(); 30988c2ecf20Sopenharmony_ci } 30998c2ecf20Sopenharmony_ci break; 31008c2ecf20Sopenharmony_ci } 31018c2ecf20Sopenharmony_ci 31028c2ecf20Sopenharmony_ci if (sh) { 31038c2ecf20Sopenharmony_ci sh->pd_idx = pd_idx; 31048c2ecf20Sopenharmony_ci sh->qd_idx = qd_idx; 31058c2ecf20Sopenharmony_ci sh->ddf_layout = ddf_layout; 31068c2ecf20Sopenharmony_ci } 31078c2ecf20Sopenharmony_ci /* 31088c2ecf20Sopenharmony_ci * Finally, compute the new sector number 31098c2ecf20Sopenharmony_ci */ 31108c2ecf20Sopenharmony_ci new_sector = (sector_t)stripe * sectors_per_chunk + chunk_offset; 31118c2ecf20Sopenharmony_ci return new_sector; 31128c2ecf20Sopenharmony_ci} 31138c2ecf20Sopenharmony_ci 31148c2ecf20Sopenharmony_cisector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous) 31158c2ecf20Sopenharmony_ci{ 31168c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 31178c2ecf20Sopenharmony_ci int raid_disks = sh->disks; 31188c2ecf20Sopenharmony_ci int data_disks = raid_disks - conf->max_degraded; 31198c2ecf20Sopenharmony_ci sector_t new_sector = sh->sector, check; 31208c2ecf20Sopenharmony_ci int sectors_per_chunk = previous ? conf->prev_chunk_sectors 31218c2ecf20Sopenharmony_ci : conf->chunk_sectors; 31228c2ecf20Sopenharmony_ci int algorithm = previous ? conf->prev_algo 31238c2ecf20Sopenharmony_ci : conf->algorithm; 31248c2ecf20Sopenharmony_ci sector_t stripe; 31258c2ecf20Sopenharmony_ci int chunk_offset; 31268c2ecf20Sopenharmony_ci sector_t chunk_number; 31278c2ecf20Sopenharmony_ci int dummy1, dd_idx = i; 31288c2ecf20Sopenharmony_ci sector_t r_sector; 31298c2ecf20Sopenharmony_ci struct stripe_head sh2; 31308c2ecf20Sopenharmony_ci 31318c2ecf20Sopenharmony_ci chunk_offset = sector_div(new_sector, sectors_per_chunk); 31328c2ecf20Sopenharmony_ci stripe = new_sector; 31338c2ecf20Sopenharmony_ci 31348c2ecf20Sopenharmony_ci if (i == sh->pd_idx) 31358c2ecf20Sopenharmony_ci return 0; 31368c2ecf20Sopenharmony_ci switch(conf->level) { 31378c2ecf20Sopenharmony_ci case 4: break; 31388c2ecf20Sopenharmony_ci case 5: 31398c2ecf20Sopenharmony_ci switch (algorithm) { 31408c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_ASYMMETRIC: 31418c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_ASYMMETRIC: 31428c2ecf20Sopenharmony_ci if (i > sh->pd_idx) 31438c2ecf20Sopenharmony_ci i--; 31448c2ecf20Sopenharmony_ci break; 31458c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_SYMMETRIC: 31468c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_SYMMETRIC: 31478c2ecf20Sopenharmony_ci if (i < sh->pd_idx) 31488c2ecf20Sopenharmony_ci i += raid_disks; 31498c2ecf20Sopenharmony_ci i -= (sh->pd_idx + 1); 31508c2ecf20Sopenharmony_ci break; 31518c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_0: 31528c2ecf20Sopenharmony_ci i -= 1; 31538c2ecf20Sopenharmony_ci break; 31548c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_N: 31558c2ecf20Sopenharmony_ci break; 31568c2ecf20Sopenharmony_ci default: 31578c2ecf20Sopenharmony_ci BUG(); 31588c2ecf20Sopenharmony_ci } 31598c2ecf20Sopenharmony_ci break; 31608c2ecf20Sopenharmony_ci case 6: 31618c2ecf20Sopenharmony_ci if (i == sh->qd_idx) 31628c2ecf20Sopenharmony_ci return 0; /* It is the Q disk */ 31638c2ecf20Sopenharmony_ci switch (algorithm) { 31648c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_ASYMMETRIC: 31658c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_ASYMMETRIC: 31668c2ecf20Sopenharmony_ci case ALGORITHM_ROTATING_ZERO_RESTART: 31678c2ecf20Sopenharmony_ci case ALGORITHM_ROTATING_N_RESTART: 31688c2ecf20Sopenharmony_ci if (sh->pd_idx == raid_disks-1) 31698c2ecf20Sopenharmony_ci i--; /* Q D D D P */ 31708c2ecf20Sopenharmony_ci else if (i > sh->pd_idx) 31718c2ecf20Sopenharmony_ci i -= 2; /* D D P Q D */ 31728c2ecf20Sopenharmony_ci break; 31738c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_SYMMETRIC: 31748c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_SYMMETRIC: 31758c2ecf20Sopenharmony_ci if (sh->pd_idx == raid_disks-1) 31768c2ecf20Sopenharmony_ci i--; /* Q D D D P */ 31778c2ecf20Sopenharmony_ci else { 31788c2ecf20Sopenharmony_ci /* D D P Q D */ 31798c2ecf20Sopenharmony_ci if (i < sh->pd_idx) 31808c2ecf20Sopenharmony_ci i += raid_disks; 31818c2ecf20Sopenharmony_ci i -= (sh->pd_idx + 2); 31828c2ecf20Sopenharmony_ci } 31838c2ecf20Sopenharmony_ci break; 31848c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_0: 31858c2ecf20Sopenharmony_ci i -= 2; 31868c2ecf20Sopenharmony_ci break; 31878c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_N: 31888c2ecf20Sopenharmony_ci break; 31898c2ecf20Sopenharmony_ci case ALGORITHM_ROTATING_N_CONTINUE: 31908c2ecf20Sopenharmony_ci /* Like left_symmetric, but P is before Q */ 31918c2ecf20Sopenharmony_ci if (sh->pd_idx == 0) 31928c2ecf20Sopenharmony_ci i--; /* P D D D Q */ 31938c2ecf20Sopenharmony_ci else { 31948c2ecf20Sopenharmony_ci /* D D Q P D */ 31958c2ecf20Sopenharmony_ci if (i < sh->pd_idx) 31968c2ecf20Sopenharmony_ci i += raid_disks; 31978c2ecf20Sopenharmony_ci i -= (sh->pd_idx + 1); 31988c2ecf20Sopenharmony_ci } 31998c2ecf20Sopenharmony_ci break; 32008c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_ASYMMETRIC_6: 32018c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_ASYMMETRIC_6: 32028c2ecf20Sopenharmony_ci if (i > sh->pd_idx) 32038c2ecf20Sopenharmony_ci i--; 32048c2ecf20Sopenharmony_ci break; 32058c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_SYMMETRIC_6: 32068c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_SYMMETRIC_6: 32078c2ecf20Sopenharmony_ci if (i < sh->pd_idx) 32088c2ecf20Sopenharmony_ci i += data_disks + 1; 32098c2ecf20Sopenharmony_ci i -= (sh->pd_idx + 1); 32108c2ecf20Sopenharmony_ci break; 32118c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_0_6: 32128c2ecf20Sopenharmony_ci i -= 1; 32138c2ecf20Sopenharmony_ci break; 32148c2ecf20Sopenharmony_ci default: 32158c2ecf20Sopenharmony_ci BUG(); 32168c2ecf20Sopenharmony_ci } 32178c2ecf20Sopenharmony_ci break; 32188c2ecf20Sopenharmony_ci } 32198c2ecf20Sopenharmony_ci 32208c2ecf20Sopenharmony_ci chunk_number = stripe * data_disks + i; 32218c2ecf20Sopenharmony_ci r_sector = chunk_number * sectors_per_chunk + chunk_offset; 32228c2ecf20Sopenharmony_ci 32238c2ecf20Sopenharmony_ci check = raid5_compute_sector(conf, r_sector, 32248c2ecf20Sopenharmony_ci previous, &dummy1, &sh2); 32258c2ecf20Sopenharmony_ci if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx 32268c2ecf20Sopenharmony_ci || sh2.qd_idx != sh->qd_idx) { 32278c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: compute_blocknr: map not correct\n", 32288c2ecf20Sopenharmony_ci mdname(conf->mddev)); 32298c2ecf20Sopenharmony_ci return 0; 32308c2ecf20Sopenharmony_ci } 32318c2ecf20Sopenharmony_ci return r_sector; 32328c2ecf20Sopenharmony_ci} 32338c2ecf20Sopenharmony_ci 32348c2ecf20Sopenharmony_ci/* 32358c2ecf20Sopenharmony_ci * There are cases where we want handle_stripe_dirtying() and 32368c2ecf20Sopenharmony_ci * schedule_reconstruction() to delay towrite to some dev of a stripe. 32378c2ecf20Sopenharmony_ci * 32388c2ecf20Sopenharmony_ci * This function checks whether we want to delay the towrite. Specifically, 32398c2ecf20Sopenharmony_ci * we delay the towrite when: 32408c2ecf20Sopenharmony_ci * 32418c2ecf20Sopenharmony_ci * 1. degraded stripe has a non-overwrite to the missing dev, AND this 32428c2ecf20Sopenharmony_ci * stripe has data in journal (for other devices). 32438c2ecf20Sopenharmony_ci * 32448c2ecf20Sopenharmony_ci * In this case, when reading data for the non-overwrite dev, it is 32458c2ecf20Sopenharmony_ci * necessary to handle complex rmw of write back cache (prexor with 32468c2ecf20Sopenharmony_ci * orig_page, and xor with page). To keep read path simple, we would 32478c2ecf20Sopenharmony_ci * like to flush data in journal to RAID disks first, so complex rmw 32488c2ecf20Sopenharmony_ci * is handled in the write patch (handle_stripe_dirtying). 32498c2ecf20Sopenharmony_ci * 32508c2ecf20Sopenharmony_ci * 2. when journal space is critical (R5C_LOG_CRITICAL=1) 32518c2ecf20Sopenharmony_ci * 32528c2ecf20Sopenharmony_ci * It is important to be able to flush all stripes in raid5-cache. 32538c2ecf20Sopenharmony_ci * Therefore, we need reserve some space on the journal device for 32548c2ecf20Sopenharmony_ci * these flushes. If flush operation includes pending writes to the 32558c2ecf20Sopenharmony_ci * stripe, we need to reserve (conf->raid_disk + 1) pages per stripe 32568c2ecf20Sopenharmony_ci * for the flush out. If we exclude these pending writes from flush 32578c2ecf20Sopenharmony_ci * operation, we only need (conf->max_degraded + 1) pages per stripe. 32588c2ecf20Sopenharmony_ci * Therefore, excluding pending writes in these cases enables more 32598c2ecf20Sopenharmony_ci * efficient use of the journal device. 32608c2ecf20Sopenharmony_ci * 32618c2ecf20Sopenharmony_ci * Note: To make sure the stripe makes progress, we only delay 32628c2ecf20Sopenharmony_ci * towrite for stripes with data already in journal (injournal > 0). 32638c2ecf20Sopenharmony_ci * When LOG_CRITICAL, stripes with injournal == 0 will be sent to 32648c2ecf20Sopenharmony_ci * no_space_stripes list. 32658c2ecf20Sopenharmony_ci * 32668c2ecf20Sopenharmony_ci * 3. during journal failure 32678c2ecf20Sopenharmony_ci * In journal failure, we try to flush all cached data to raid disks 32688c2ecf20Sopenharmony_ci * based on data in stripe cache. The array is read-only to upper 32698c2ecf20Sopenharmony_ci * layers, so we would skip all pending writes. 32708c2ecf20Sopenharmony_ci * 32718c2ecf20Sopenharmony_ci */ 32728c2ecf20Sopenharmony_cistatic inline bool delay_towrite(struct r5conf *conf, 32738c2ecf20Sopenharmony_ci struct r5dev *dev, 32748c2ecf20Sopenharmony_ci struct stripe_head_state *s) 32758c2ecf20Sopenharmony_ci{ 32768c2ecf20Sopenharmony_ci /* case 1 above */ 32778c2ecf20Sopenharmony_ci if (!test_bit(R5_OVERWRITE, &dev->flags) && 32788c2ecf20Sopenharmony_ci !test_bit(R5_Insync, &dev->flags) && s->injournal) 32798c2ecf20Sopenharmony_ci return true; 32808c2ecf20Sopenharmony_ci /* case 2 above */ 32818c2ecf20Sopenharmony_ci if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) && 32828c2ecf20Sopenharmony_ci s->injournal > 0) 32838c2ecf20Sopenharmony_ci return true; 32848c2ecf20Sopenharmony_ci /* case 3 above */ 32858c2ecf20Sopenharmony_ci if (s->log_failed && s->injournal) 32868c2ecf20Sopenharmony_ci return true; 32878c2ecf20Sopenharmony_ci return false; 32888c2ecf20Sopenharmony_ci} 32898c2ecf20Sopenharmony_ci 32908c2ecf20Sopenharmony_cistatic void 32918c2ecf20Sopenharmony_cischedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, 32928c2ecf20Sopenharmony_ci int rcw, int expand) 32938c2ecf20Sopenharmony_ci{ 32948c2ecf20Sopenharmony_ci int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks; 32958c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 32968c2ecf20Sopenharmony_ci int level = conf->level; 32978c2ecf20Sopenharmony_ci 32988c2ecf20Sopenharmony_ci if (rcw) { 32998c2ecf20Sopenharmony_ci /* 33008c2ecf20Sopenharmony_ci * In some cases, handle_stripe_dirtying initially decided to 33018c2ecf20Sopenharmony_ci * run rmw and allocates extra page for prexor. However, rcw is 33028c2ecf20Sopenharmony_ci * cheaper later on. We need to free the extra page now, 33038c2ecf20Sopenharmony_ci * because we won't be able to do that in ops_complete_prexor(). 33048c2ecf20Sopenharmony_ci */ 33058c2ecf20Sopenharmony_ci r5c_release_extra_page(sh); 33068c2ecf20Sopenharmony_ci 33078c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 33088c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 33098c2ecf20Sopenharmony_ci 33108c2ecf20Sopenharmony_ci if (dev->towrite && !delay_towrite(conf, dev, s)) { 33118c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 33128c2ecf20Sopenharmony_ci set_bit(R5_Wantdrain, &dev->flags); 33138c2ecf20Sopenharmony_ci if (!expand) 33148c2ecf20Sopenharmony_ci clear_bit(R5_UPTODATE, &dev->flags); 33158c2ecf20Sopenharmony_ci s->locked++; 33168c2ecf20Sopenharmony_ci } else if (test_bit(R5_InJournal, &dev->flags)) { 33178c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 33188c2ecf20Sopenharmony_ci s->locked++; 33198c2ecf20Sopenharmony_ci } 33208c2ecf20Sopenharmony_ci } 33218c2ecf20Sopenharmony_ci /* if we are not expanding this is a proper write request, and 33228c2ecf20Sopenharmony_ci * there will be bios with new data to be drained into the 33238c2ecf20Sopenharmony_ci * stripe cache 33248c2ecf20Sopenharmony_ci */ 33258c2ecf20Sopenharmony_ci if (!expand) { 33268c2ecf20Sopenharmony_ci if (!s->locked) 33278c2ecf20Sopenharmony_ci /* False alarm, nothing to do */ 33288c2ecf20Sopenharmony_ci return; 33298c2ecf20Sopenharmony_ci sh->reconstruct_state = reconstruct_state_drain_run; 33308c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); 33318c2ecf20Sopenharmony_ci } else 33328c2ecf20Sopenharmony_ci sh->reconstruct_state = reconstruct_state_run; 33338c2ecf20Sopenharmony_ci 33348c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); 33358c2ecf20Sopenharmony_ci 33368c2ecf20Sopenharmony_ci if (s->locked + conf->max_degraded == disks) 33378c2ecf20Sopenharmony_ci if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) 33388c2ecf20Sopenharmony_ci atomic_inc(&conf->pending_full_writes); 33398c2ecf20Sopenharmony_ci } else { 33408c2ecf20Sopenharmony_ci BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || 33418c2ecf20Sopenharmony_ci test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); 33428c2ecf20Sopenharmony_ci BUG_ON(level == 6 && 33438c2ecf20Sopenharmony_ci (!(test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags) || 33448c2ecf20Sopenharmony_ci test_bit(R5_Wantcompute, &sh->dev[qd_idx].flags)))); 33458c2ecf20Sopenharmony_ci 33468c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 33478c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 33488c2ecf20Sopenharmony_ci if (i == pd_idx || i == qd_idx) 33498c2ecf20Sopenharmony_ci continue; 33508c2ecf20Sopenharmony_ci 33518c2ecf20Sopenharmony_ci if (dev->towrite && 33528c2ecf20Sopenharmony_ci (test_bit(R5_UPTODATE, &dev->flags) || 33538c2ecf20Sopenharmony_ci test_bit(R5_Wantcompute, &dev->flags))) { 33548c2ecf20Sopenharmony_ci set_bit(R5_Wantdrain, &dev->flags); 33558c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 33568c2ecf20Sopenharmony_ci clear_bit(R5_UPTODATE, &dev->flags); 33578c2ecf20Sopenharmony_ci s->locked++; 33588c2ecf20Sopenharmony_ci } else if (test_bit(R5_InJournal, &dev->flags)) { 33598c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 33608c2ecf20Sopenharmony_ci s->locked++; 33618c2ecf20Sopenharmony_ci } 33628c2ecf20Sopenharmony_ci } 33638c2ecf20Sopenharmony_ci if (!s->locked) 33648c2ecf20Sopenharmony_ci /* False alarm - nothing to do */ 33658c2ecf20Sopenharmony_ci return; 33668c2ecf20Sopenharmony_ci sh->reconstruct_state = reconstruct_state_prexor_drain_run; 33678c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_PREXOR, &s->ops_request); 33688c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); 33698c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); 33708c2ecf20Sopenharmony_ci } 33718c2ecf20Sopenharmony_ci 33728c2ecf20Sopenharmony_ci /* keep the parity disk(s) locked while asynchronous operations 33738c2ecf20Sopenharmony_ci * are in flight 33748c2ecf20Sopenharmony_ci */ 33758c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); 33768c2ecf20Sopenharmony_ci clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 33778c2ecf20Sopenharmony_ci s->locked++; 33788c2ecf20Sopenharmony_ci 33798c2ecf20Sopenharmony_ci if (level == 6) { 33808c2ecf20Sopenharmony_ci int qd_idx = sh->qd_idx; 33818c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[qd_idx]; 33828c2ecf20Sopenharmony_ci 33838c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 33848c2ecf20Sopenharmony_ci clear_bit(R5_UPTODATE, &dev->flags); 33858c2ecf20Sopenharmony_ci s->locked++; 33868c2ecf20Sopenharmony_ci } 33878c2ecf20Sopenharmony_ci 33888c2ecf20Sopenharmony_ci if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page && 33898c2ecf20Sopenharmony_ci test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) && 33908c2ecf20Sopenharmony_ci !test_bit(STRIPE_FULL_WRITE, &sh->state) && 33918c2ecf20Sopenharmony_ci test_bit(R5_Insync, &sh->dev[pd_idx].flags)) 33928c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_PARTIAL_PARITY, &s->ops_request); 33938c2ecf20Sopenharmony_ci 33948c2ecf20Sopenharmony_ci pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", 33958c2ecf20Sopenharmony_ci __func__, (unsigned long long)sh->sector, 33968c2ecf20Sopenharmony_ci s->locked, s->ops_request); 33978c2ecf20Sopenharmony_ci} 33988c2ecf20Sopenharmony_ci 33998c2ecf20Sopenharmony_ci/* 34008c2ecf20Sopenharmony_ci * Each stripe/dev can have one or more bion attached. 34018c2ecf20Sopenharmony_ci * toread/towrite point to the first in a chain. 34028c2ecf20Sopenharmony_ci * The bi_next chain must be in order. 34038c2ecf20Sopenharmony_ci */ 34048c2ecf20Sopenharmony_cistatic int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, 34058c2ecf20Sopenharmony_ci int forwrite, int previous) 34068c2ecf20Sopenharmony_ci{ 34078c2ecf20Sopenharmony_ci struct bio **bip; 34088c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 34098c2ecf20Sopenharmony_ci int firstwrite=0; 34108c2ecf20Sopenharmony_ci 34118c2ecf20Sopenharmony_ci pr_debug("adding bi b#%llu to stripe s#%llu\n", 34128c2ecf20Sopenharmony_ci (unsigned long long)bi->bi_iter.bi_sector, 34138c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 34148c2ecf20Sopenharmony_ci 34158c2ecf20Sopenharmony_ci spin_lock_irq(&sh->stripe_lock); 34168c2ecf20Sopenharmony_ci sh->dev[dd_idx].write_hint = bi->bi_write_hint; 34178c2ecf20Sopenharmony_ci /* Don't allow new IO added to stripes in batch list */ 34188c2ecf20Sopenharmony_ci if (sh->batch_head) 34198c2ecf20Sopenharmony_ci goto overlap; 34208c2ecf20Sopenharmony_ci if (forwrite) { 34218c2ecf20Sopenharmony_ci bip = &sh->dev[dd_idx].towrite; 34228c2ecf20Sopenharmony_ci if (*bip == NULL) 34238c2ecf20Sopenharmony_ci firstwrite = 1; 34248c2ecf20Sopenharmony_ci } else 34258c2ecf20Sopenharmony_ci bip = &sh->dev[dd_idx].toread; 34268c2ecf20Sopenharmony_ci while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) { 34278c2ecf20Sopenharmony_ci if (bio_end_sector(*bip) > bi->bi_iter.bi_sector) 34288c2ecf20Sopenharmony_ci goto overlap; 34298c2ecf20Sopenharmony_ci bip = & (*bip)->bi_next; 34308c2ecf20Sopenharmony_ci } 34318c2ecf20Sopenharmony_ci if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi)) 34328c2ecf20Sopenharmony_ci goto overlap; 34338c2ecf20Sopenharmony_ci 34348c2ecf20Sopenharmony_ci if (forwrite && raid5_has_ppl(conf)) { 34358c2ecf20Sopenharmony_ci /* 34368c2ecf20Sopenharmony_ci * With PPL only writes to consecutive data chunks within a 34378c2ecf20Sopenharmony_ci * stripe are allowed because for a single stripe_head we can 34388c2ecf20Sopenharmony_ci * only have one PPL entry at a time, which describes one data 34398c2ecf20Sopenharmony_ci * range. Not really an overlap, but wait_for_overlap can be 34408c2ecf20Sopenharmony_ci * used to handle this. 34418c2ecf20Sopenharmony_ci */ 34428c2ecf20Sopenharmony_ci sector_t sector; 34438c2ecf20Sopenharmony_ci sector_t first = 0; 34448c2ecf20Sopenharmony_ci sector_t last = 0; 34458c2ecf20Sopenharmony_ci int count = 0; 34468c2ecf20Sopenharmony_ci int i; 34478c2ecf20Sopenharmony_ci 34488c2ecf20Sopenharmony_ci for (i = 0; i < sh->disks; i++) { 34498c2ecf20Sopenharmony_ci if (i != sh->pd_idx && 34508c2ecf20Sopenharmony_ci (i == dd_idx || sh->dev[i].towrite)) { 34518c2ecf20Sopenharmony_ci sector = sh->dev[i].sector; 34528c2ecf20Sopenharmony_ci if (count == 0 || sector < first) 34538c2ecf20Sopenharmony_ci first = sector; 34548c2ecf20Sopenharmony_ci if (sector > last) 34558c2ecf20Sopenharmony_ci last = sector; 34568c2ecf20Sopenharmony_ci count++; 34578c2ecf20Sopenharmony_ci } 34588c2ecf20Sopenharmony_ci } 34598c2ecf20Sopenharmony_ci 34608c2ecf20Sopenharmony_ci if (first + conf->chunk_sectors * (count - 1) != last) 34618c2ecf20Sopenharmony_ci goto overlap; 34628c2ecf20Sopenharmony_ci } 34638c2ecf20Sopenharmony_ci 34648c2ecf20Sopenharmony_ci if (!forwrite || previous) 34658c2ecf20Sopenharmony_ci clear_bit(STRIPE_BATCH_READY, &sh->state); 34668c2ecf20Sopenharmony_ci 34678c2ecf20Sopenharmony_ci BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); 34688c2ecf20Sopenharmony_ci if (*bip) 34698c2ecf20Sopenharmony_ci bi->bi_next = *bip; 34708c2ecf20Sopenharmony_ci *bip = bi; 34718c2ecf20Sopenharmony_ci bio_inc_remaining(bi); 34728c2ecf20Sopenharmony_ci md_write_inc(conf->mddev, bi); 34738c2ecf20Sopenharmony_ci 34748c2ecf20Sopenharmony_ci if (forwrite) { 34758c2ecf20Sopenharmony_ci /* check if page is covered */ 34768c2ecf20Sopenharmony_ci sector_t sector = sh->dev[dd_idx].sector; 34778c2ecf20Sopenharmony_ci for (bi=sh->dev[dd_idx].towrite; 34788c2ecf20Sopenharmony_ci sector < sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf) && 34798c2ecf20Sopenharmony_ci bi && bi->bi_iter.bi_sector <= sector; 34808c2ecf20Sopenharmony_ci bi = r5_next_bio(conf, bi, sh->dev[dd_idx].sector)) { 34818c2ecf20Sopenharmony_ci if (bio_end_sector(bi) >= sector) 34828c2ecf20Sopenharmony_ci sector = bio_end_sector(bi); 34838c2ecf20Sopenharmony_ci } 34848c2ecf20Sopenharmony_ci if (sector >= sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf)) 34858c2ecf20Sopenharmony_ci if (!test_and_set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags)) 34868c2ecf20Sopenharmony_ci sh->overwrite_disks++; 34878c2ecf20Sopenharmony_ci } 34888c2ecf20Sopenharmony_ci 34898c2ecf20Sopenharmony_ci pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", 34908c2ecf20Sopenharmony_ci (unsigned long long)(*bip)->bi_iter.bi_sector, 34918c2ecf20Sopenharmony_ci (unsigned long long)sh->sector, dd_idx); 34928c2ecf20Sopenharmony_ci 34938c2ecf20Sopenharmony_ci if (conf->mddev->bitmap && firstwrite) { 34948c2ecf20Sopenharmony_ci /* Cannot hold spinlock over bitmap_startwrite, 34958c2ecf20Sopenharmony_ci * but must ensure this isn't added to a batch until 34968c2ecf20Sopenharmony_ci * we have added to the bitmap and set bm_seq. 34978c2ecf20Sopenharmony_ci * So set STRIPE_BITMAP_PENDING to prevent 34988c2ecf20Sopenharmony_ci * batching. 34998c2ecf20Sopenharmony_ci * If multiple add_stripe_bio() calls race here they 35008c2ecf20Sopenharmony_ci * much all set STRIPE_BITMAP_PENDING. So only the first one 35018c2ecf20Sopenharmony_ci * to complete "bitmap_startwrite" gets to set 35028c2ecf20Sopenharmony_ci * STRIPE_BIT_DELAY. This is important as once a stripe 35038c2ecf20Sopenharmony_ci * is added to a batch, STRIPE_BIT_DELAY cannot be changed 35048c2ecf20Sopenharmony_ci * any more. 35058c2ecf20Sopenharmony_ci */ 35068c2ecf20Sopenharmony_ci set_bit(STRIPE_BITMAP_PENDING, &sh->state); 35078c2ecf20Sopenharmony_ci spin_unlock_irq(&sh->stripe_lock); 35088c2ecf20Sopenharmony_ci md_bitmap_startwrite(conf->mddev->bitmap, sh->sector, 35098c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 0); 35108c2ecf20Sopenharmony_ci spin_lock_irq(&sh->stripe_lock); 35118c2ecf20Sopenharmony_ci clear_bit(STRIPE_BITMAP_PENDING, &sh->state); 35128c2ecf20Sopenharmony_ci if (!sh->batch_head) { 35138c2ecf20Sopenharmony_ci sh->bm_seq = conf->seq_flush+1; 35148c2ecf20Sopenharmony_ci set_bit(STRIPE_BIT_DELAY, &sh->state); 35158c2ecf20Sopenharmony_ci } 35168c2ecf20Sopenharmony_ci } 35178c2ecf20Sopenharmony_ci spin_unlock_irq(&sh->stripe_lock); 35188c2ecf20Sopenharmony_ci 35198c2ecf20Sopenharmony_ci if (stripe_can_batch(sh)) 35208c2ecf20Sopenharmony_ci stripe_add_to_batch_list(conf, sh); 35218c2ecf20Sopenharmony_ci return 1; 35228c2ecf20Sopenharmony_ci 35238c2ecf20Sopenharmony_ci overlap: 35248c2ecf20Sopenharmony_ci set_bit(R5_Overlap, &sh->dev[dd_idx].flags); 35258c2ecf20Sopenharmony_ci spin_unlock_irq(&sh->stripe_lock); 35268c2ecf20Sopenharmony_ci return 0; 35278c2ecf20Sopenharmony_ci} 35288c2ecf20Sopenharmony_ci 35298c2ecf20Sopenharmony_cistatic void end_reshape(struct r5conf *conf); 35308c2ecf20Sopenharmony_ci 35318c2ecf20Sopenharmony_cistatic void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous, 35328c2ecf20Sopenharmony_ci struct stripe_head *sh) 35338c2ecf20Sopenharmony_ci{ 35348c2ecf20Sopenharmony_ci int sectors_per_chunk = 35358c2ecf20Sopenharmony_ci previous ? conf->prev_chunk_sectors : conf->chunk_sectors; 35368c2ecf20Sopenharmony_ci int dd_idx; 35378c2ecf20Sopenharmony_ci int chunk_offset = sector_div(stripe, sectors_per_chunk); 35388c2ecf20Sopenharmony_ci int disks = previous ? conf->previous_raid_disks : conf->raid_disks; 35398c2ecf20Sopenharmony_ci 35408c2ecf20Sopenharmony_ci raid5_compute_sector(conf, 35418c2ecf20Sopenharmony_ci stripe * (disks - conf->max_degraded) 35428c2ecf20Sopenharmony_ci *sectors_per_chunk + chunk_offset, 35438c2ecf20Sopenharmony_ci previous, 35448c2ecf20Sopenharmony_ci &dd_idx, sh); 35458c2ecf20Sopenharmony_ci} 35468c2ecf20Sopenharmony_ci 35478c2ecf20Sopenharmony_cistatic void 35488c2ecf20Sopenharmony_cihandle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, 35498c2ecf20Sopenharmony_ci struct stripe_head_state *s, int disks) 35508c2ecf20Sopenharmony_ci{ 35518c2ecf20Sopenharmony_ci int i; 35528c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 35538c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 35548c2ecf20Sopenharmony_ci struct bio *bi; 35558c2ecf20Sopenharmony_ci int bitmap_end = 0; 35568c2ecf20Sopenharmony_ci 35578c2ecf20Sopenharmony_ci if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 35588c2ecf20Sopenharmony_ci struct md_rdev *rdev; 35598c2ecf20Sopenharmony_ci rcu_read_lock(); 35608c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->disks[i].rdev); 35618c2ecf20Sopenharmony_ci if (rdev && test_bit(In_sync, &rdev->flags) && 35628c2ecf20Sopenharmony_ci !test_bit(Faulty, &rdev->flags)) 35638c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 35648c2ecf20Sopenharmony_ci else 35658c2ecf20Sopenharmony_ci rdev = NULL; 35668c2ecf20Sopenharmony_ci rcu_read_unlock(); 35678c2ecf20Sopenharmony_ci if (rdev) { 35688c2ecf20Sopenharmony_ci if (!rdev_set_badblocks( 35698c2ecf20Sopenharmony_ci rdev, 35708c2ecf20Sopenharmony_ci sh->sector, 35718c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 0)) 35728c2ecf20Sopenharmony_ci md_error(conf->mddev, rdev); 35738c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 35748c2ecf20Sopenharmony_ci } 35758c2ecf20Sopenharmony_ci } 35768c2ecf20Sopenharmony_ci spin_lock_irq(&sh->stripe_lock); 35778c2ecf20Sopenharmony_ci /* fail all writes first */ 35788c2ecf20Sopenharmony_ci bi = sh->dev[i].towrite; 35798c2ecf20Sopenharmony_ci sh->dev[i].towrite = NULL; 35808c2ecf20Sopenharmony_ci sh->overwrite_disks = 0; 35818c2ecf20Sopenharmony_ci spin_unlock_irq(&sh->stripe_lock); 35828c2ecf20Sopenharmony_ci if (bi) 35838c2ecf20Sopenharmony_ci bitmap_end = 1; 35848c2ecf20Sopenharmony_ci 35858c2ecf20Sopenharmony_ci log_stripe_write_finished(sh); 35868c2ecf20Sopenharmony_ci 35878c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 35888c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_overlap); 35898c2ecf20Sopenharmony_ci 35908c2ecf20Sopenharmony_ci while (bi && bi->bi_iter.bi_sector < 35918c2ecf20Sopenharmony_ci sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { 35928c2ecf20Sopenharmony_ci struct bio *nextbi = r5_next_bio(conf, bi, sh->dev[i].sector); 35938c2ecf20Sopenharmony_ci 35948c2ecf20Sopenharmony_ci md_write_end(conf->mddev); 35958c2ecf20Sopenharmony_ci bio_io_error(bi); 35968c2ecf20Sopenharmony_ci bi = nextbi; 35978c2ecf20Sopenharmony_ci } 35988c2ecf20Sopenharmony_ci if (bitmap_end) 35998c2ecf20Sopenharmony_ci md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, 36008c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 0, 0); 36018c2ecf20Sopenharmony_ci bitmap_end = 0; 36028c2ecf20Sopenharmony_ci /* and fail all 'written' */ 36038c2ecf20Sopenharmony_ci bi = sh->dev[i].written; 36048c2ecf20Sopenharmony_ci sh->dev[i].written = NULL; 36058c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) { 36068c2ecf20Sopenharmony_ci WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); 36078c2ecf20Sopenharmony_ci sh->dev[i].page = sh->dev[i].orig_page; 36088c2ecf20Sopenharmony_ci } 36098c2ecf20Sopenharmony_ci 36108c2ecf20Sopenharmony_ci if (bi) bitmap_end = 1; 36118c2ecf20Sopenharmony_ci while (bi && bi->bi_iter.bi_sector < 36128c2ecf20Sopenharmony_ci sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { 36138c2ecf20Sopenharmony_ci struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector); 36148c2ecf20Sopenharmony_ci 36158c2ecf20Sopenharmony_ci md_write_end(conf->mddev); 36168c2ecf20Sopenharmony_ci bio_io_error(bi); 36178c2ecf20Sopenharmony_ci bi = bi2; 36188c2ecf20Sopenharmony_ci } 36198c2ecf20Sopenharmony_ci 36208c2ecf20Sopenharmony_ci /* fail any reads if this device is non-operational and 36218c2ecf20Sopenharmony_ci * the data has not reached the cache yet. 36228c2ecf20Sopenharmony_ci */ 36238c2ecf20Sopenharmony_ci if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && 36248c2ecf20Sopenharmony_ci s->failed > conf->max_degraded && 36258c2ecf20Sopenharmony_ci (!test_bit(R5_Insync, &sh->dev[i].flags) || 36268c2ecf20Sopenharmony_ci test_bit(R5_ReadError, &sh->dev[i].flags))) { 36278c2ecf20Sopenharmony_ci spin_lock_irq(&sh->stripe_lock); 36288c2ecf20Sopenharmony_ci bi = sh->dev[i].toread; 36298c2ecf20Sopenharmony_ci sh->dev[i].toread = NULL; 36308c2ecf20Sopenharmony_ci spin_unlock_irq(&sh->stripe_lock); 36318c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 36328c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_overlap); 36338c2ecf20Sopenharmony_ci if (bi) 36348c2ecf20Sopenharmony_ci s->to_read--; 36358c2ecf20Sopenharmony_ci while (bi && bi->bi_iter.bi_sector < 36368c2ecf20Sopenharmony_ci sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { 36378c2ecf20Sopenharmony_ci struct bio *nextbi = 36388c2ecf20Sopenharmony_ci r5_next_bio(conf, bi, sh->dev[i].sector); 36398c2ecf20Sopenharmony_ci 36408c2ecf20Sopenharmony_ci bio_io_error(bi); 36418c2ecf20Sopenharmony_ci bi = nextbi; 36428c2ecf20Sopenharmony_ci } 36438c2ecf20Sopenharmony_ci } 36448c2ecf20Sopenharmony_ci if (bitmap_end) 36458c2ecf20Sopenharmony_ci md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, 36468c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 0, 0); 36478c2ecf20Sopenharmony_ci /* If we were in the middle of a write the parity block might 36488c2ecf20Sopenharmony_ci * still be locked - so just clear all R5_LOCKED flags 36498c2ecf20Sopenharmony_ci */ 36508c2ecf20Sopenharmony_ci clear_bit(R5_LOCKED, &sh->dev[i].flags); 36518c2ecf20Sopenharmony_ci } 36528c2ecf20Sopenharmony_ci s->to_write = 0; 36538c2ecf20Sopenharmony_ci s->written = 0; 36548c2ecf20Sopenharmony_ci 36558c2ecf20Sopenharmony_ci if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) 36568c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&conf->pending_full_writes)) 36578c2ecf20Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 36588c2ecf20Sopenharmony_ci} 36598c2ecf20Sopenharmony_ci 36608c2ecf20Sopenharmony_cistatic void 36618c2ecf20Sopenharmony_cihandle_failed_sync(struct r5conf *conf, struct stripe_head *sh, 36628c2ecf20Sopenharmony_ci struct stripe_head_state *s) 36638c2ecf20Sopenharmony_ci{ 36648c2ecf20Sopenharmony_ci int abort = 0; 36658c2ecf20Sopenharmony_ci int i; 36668c2ecf20Sopenharmony_ci 36678c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 36688c2ecf20Sopenharmony_ci clear_bit(STRIPE_SYNCING, &sh->state); 36698c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) 36708c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_overlap); 36718c2ecf20Sopenharmony_ci s->syncing = 0; 36728c2ecf20Sopenharmony_ci s->replacing = 0; 36738c2ecf20Sopenharmony_ci /* There is nothing more to do for sync/check/repair. 36748c2ecf20Sopenharmony_ci * Don't even need to abort as that is handled elsewhere 36758c2ecf20Sopenharmony_ci * if needed, and not always wanted e.g. if there is a known 36768c2ecf20Sopenharmony_ci * bad block here. 36778c2ecf20Sopenharmony_ci * For recover/replace we need to record a bad block on all 36788c2ecf20Sopenharmony_ci * non-sync devices, or abort the recovery 36798c2ecf20Sopenharmony_ci */ 36808c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) { 36818c2ecf20Sopenharmony_ci /* During recovery devices cannot be removed, so 36828c2ecf20Sopenharmony_ci * locking and refcounting of rdevs is not needed 36838c2ecf20Sopenharmony_ci */ 36848c2ecf20Sopenharmony_ci rcu_read_lock(); 36858c2ecf20Sopenharmony_ci for (i = 0; i < conf->raid_disks; i++) { 36868c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); 36878c2ecf20Sopenharmony_ci if (rdev 36888c2ecf20Sopenharmony_ci && !test_bit(Faulty, &rdev->flags) 36898c2ecf20Sopenharmony_ci && !test_bit(In_sync, &rdev->flags) 36908c2ecf20Sopenharmony_ci && !rdev_set_badblocks(rdev, sh->sector, 36918c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 0)) 36928c2ecf20Sopenharmony_ci abort = 1; 36938c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->disks[i].replacement); 36948c2ecf20Sopenharmony_ci if (rdev 36958c2ecf20Sopenharmony_ci && !test_bit(Faulty, &rdev->flags) 36968c2ecf20Sopenharmony_ci && !test_bit(In_sync, &rdev->flags) 36978c2ecf20Sopenharmony_ci && !rdev_set_badblocks(rdev, sh->sector, 36988c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 0)) 36998c2ecf20Sopenharmony_ci abort = 1; 37008c2ecf20Sopenharmony_ci } 37018c2ecf20Sopenharmony_ci rcu_read_unlock(); 37028c2ecf20Sopenharmony_ci if (abort) 37038c2ecf20Sopenharmony_ci conf->recovery_disabled = 37048c2ecf20Sopenharmony_ci conf->mddev->recovery_disabled; 37058c2ecf20Sopenharmony_ci } 37068c2ecf20Sopenharmony_ci md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), !abort); 37078c2ecf20Sopenharmony_ci} 37088c2ecf20Sopenharmony_ci 37098c2ecf20Sopenharmony_cistatic int want_replace(struct stripe_head *sh, int disk_idx) 37108c2ecf20Sopenharmony_ci{ 37118c2ecf20Sopenharmony_ci struct md_rdev *rdev; 37128c2ecf20Sopenharmony_ci int rv = 0; 37138c2ecf20Sopenharmony_ci 37148c2ecf20Sopenharmony_ci rcu_read_lock(); 37158c2ecf20Sopenharmony_ci rdev = rcu_dereference(sh->raid_conf->disks[disk_idx].replacement); 37168c2ecf20Sopenharmony_ci if (rdev 37178c2ecf20Sopenharmony_ci && !test_bit(Faulty, &rdev->flags) 37188c2ecf20Sopenharmony_ci && !test_bit(In_sync, &rdev->flags) 37198c2ecf20Sopenharmony_ci && (rdev->recovery_offset <= sh->sector 37208c2ecf20Sopenharmony_ci || rdev->mddev->recovery_cp <= sh->sector)) 37218c2ecf20Sopenharmony_ci rv = 1; 37228c2ecf20Sopenharmony_ci rcu_read_unlock(); 37238c2ecf20Sopenharmony_ci return rv; 37248c2ecf20Sopenharmony_ci} 37258c2ecf20Sopenharmony_ci 37268c2ecf20Sopenharmony_cistatic int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, 37278c2ecf20Sopenharmony_ci int disk_idx, int disks) 37288c2ecf20Sopenharmony_ci{ 37298c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[disk_idx]; 37308c2ecf20Sopenharmony_ci struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]], 37318c2ecf20Sopenharmony_ci &sh->dev[s->failed_num[1]] }; 37328c2ecf20Sopenharmony_ci int i; 37338c2ecf20Sopenharmony_ci bool force_rcw = (sh->raid_conf->rmw_level == PARITY_DISABLE_RMW); 37348c2ecf20Sopenharmony_ci 37358c2ecf20Sopenharmony_ci 37368c2ecf20Sopenharmony_ci if (test_bit(R5_LOCKED, &dev->flags) || 37378c2ecf20Sopenharmony_ci test_bit(R5_UPTODATE, &dev->flags)) 37388c2ecf20Sopenharmony_ci /* No point reading this as we already have it or have 37398c2ecf20Sopenharmony_ci * decided to get it. 37408c2ecf20Sopenharmony_ci */ 37418c2ecf20Sopenharmony_ci return 0; 37428c2ecf20Sopenharmony_ci 37438c2ecf20Sopenharmony_ci if (dev->toread || 37448c2ecf20Sopenharmony_ci (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags))) 37458c2ecf20Sopenharmony_ci /* We need this block to directly satisfy a request */ 37468c2ecf20Sopenharmony_ci return 1; 37478c2ecf20Sopenharmony_ci 37488c2ecf20Sopenharmony_ci if (s->syncing || s->expanding || 37498c2ecf20Sopenharmony_ci (s->replacing && want_replace(sh, disk_idx))) 37508c2ecf20Sopenharmony_ci /* When syncing, or expanding we read everything. 37518c2ecf20Sopenharmony_ci * When replacing, we need the replaced block. 37528c2ecf20Sopenharmony_ci */ 37538c2ecf20Sopenharmony_ci return 1; 37548c2ecf20Sopenharmony_ci 37558c2ecf20Sopenharmony_ci if ((s->failed >= 1 && fdev[0]->toread) || 37568c2ecf20Sopenharmony_ci (s->failed >= 2 && fdev[1]->toread)) 37578c2ecf20Sopenharmony_ci /* If we want to read from a failed device, then 37588c2ecf20Sopenharmony_ci * we need to actually read every other device. 37598c2ecf20Sopenharmony_ci */ 37608c2ecf20Sopenharmony_ci return 1; 37618c2ecf20Sopenharmony_ci 37628c2ecf20Sopenharmony_ci /* Sometimes neither read-modify-write nor reconstruct-write 37638c2ecf20Sopenharmony_ci * cycles can work. In those cases we read every block we 37648c2ecf20Sopenharmony_ci * can. Then the parity-update is certain to have enough to 37658c2ecf20Sopenharmony_ci * work with. 37668c2ecf20Sopenharmony_ci * This can only be a problem when we need to write something, 37678c2ecf20Sopenharmony_ci * and some device has failed. If either of those tests 37688c2ecf20Sopenharmony_ci * fail we need look no further. 37698c2ecf20Sopenharmony_ci */ 37708c2ecf20Sopenharmony_ci if (!s->failed || !s->to_write) 37718c2ecf20Sopenharmony_ci return 0; 37728c2ecf20Sopenharmony_ci 37738c2ecf20Sopenharmony_ci if (test_bit(R5_Insync, &dev->flags) && 37748c2ecf20Sopenharmony_ci !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 37758c2ecf20Sopenharmony_ci /* Pre-reads at not permitted until after short delay 37768c2ecf20Sopenharmony_ci * to gather multiple requests. However if this 37778c2ecf20Sopenharmony_ci * device is no Insync, the block could only be computed 37788c2ecf20Sopenharmony_ci * and there is no need to delay that. 37798c2ecf20Sopenharmony_ci */ 37808c2ecf20Sopenharmony_ci return 0; 37818c2ecf20Sopenharmony_ci 37828c2ecf20Sopenharmony_ci for (i = 0; i < s->failed && i < 2; i++) { 37838c2ecf20Sopenharmony_ci if (fdev[i]->towrite && 37848c2ecf20Sopenharmony_ci !test_bit(R5_UPTODATE, &fdev[i]->flags) && 37858c2ecf20Sopenharmony_ci !test_bit(R5_OVERWRITE, &fdev[i]->flags)) 37868c2ecf20Sopenharmony_ci /* If we have a partial write to a failed 37878c2ecf20Sopenharmony_ci * device, then we will need to reconstruct 37888c2ecf20Sopenharmony_ci * the content of that device, so all other 37898c2ecf20Sopenharmony_ci * devices must be read. 37908c2ecf20Sopenharmony_ci */ 37918c2ecf20Sopenharmony_ci return 1; 37928c2ecf20Sopenharmony_ci 37938c2ecf20Sopenharmony_ci if (s->failed >= 2 && 37948c2ecf20Sopenharmony_ci (fdev[i]->towrite || 37958c2ecf20Sopenharmony_ci s->failed_num[i] == sh->pd_idx || 37968c2ecf20Sopenharmony_ci s->failed_num[i] == sh->qd_idx) && 37978c2ecf20Sopenharmony_ci !test_bit(R5_UPTODATE, &fdev[i]->flags)) 37988c2ecf20Sopenharmony_ci /* In max degraded raid6, If the failed disk is P, Q, 37998c2ecf20Sopenharmony_ci * or we want to read the failed disk, we need to do 38008c2ecf20Sopenharmony_ci * reconstruct-write. 38018c2ecf20Sopenharmony_ci */ 38028c2ecf20Sopenharmony_ci force_rcw = true; 38038c2ecf20Sopenharmony_ci } 38048c2ecf20Sopenharmony_ci 38058c2ecf20Sopenharmony_ci /* If we are forced to do a reconstruct-write, because parity 38068c2ecf20Sopenharmony_ci * cannot be trusted and we are currently recovering it, there 38078c2ecf20Sopenharmony_ci * is extra need to be careful. 38088c2ecf20Sopenharmony_ci * If one of the devices that we would need to read, because 38098c2ecf20Sopenharmony_ci * it is not being overwritten (and maybe not written at all) 38108c2ecf20Sopenharmony_ci * is missing/faulty, then we need to read everything we can. 38118c2ecf20Sopenharmony_ci */ 38128c2ecf20Sopenharmony_ci if (!force_rcw && 38138c2ecf20Sopenharmony_ci sh->sector < sh->raid_conf->mddev->recovery_cp) 38148c2ecf20Sopenharmony_ci /* reconstruct-write isn't being forced */ 38158c2ecf20Sopenharmony_ci return 0; 38168c2ecf20Sopenharmony_ci for (i = 0; i < s->failed && i < 2; i++) { 38178c2ecf20Sopenharmony_ci if (s->failed_num[i] != sh->pd_idx && 38188c2ecf20Sopenharmony_ci s->failed_num[i] != sh->qd_idx && 38198c2ecf20Sopenharmony_ci !test_bit(R5_UPTODATE, &fdev[i]->flags) && 38208c2ecf20Sopenharmony_ci !test_bit(R5_OVERWRITE, &fdev[i]->flags)) 38218c2ecf20Sopenharmony_ci return 1; 38228c2ecf20Sopenharmony_ci } 38238c2ecf20Sopenharmony_ci 38248c2ecf20Sopenharmony_ci return 0; 38258c2ecf20Sopenharmony_ci} 38268c2ecf20Sopenharmony_ci 38278c2ecf20Sopenharmony_ci/* fetch_block - checks the given member device to see if its data needs 38288c2ecf20Sopenharmony_ci * to be read or computed to satisfy a request. 38298c2ecf20Sopenharmony_ci * 38308c2ecf20Sopenharmony_ci * Returns 1 when no more member devices need to be checked, otherwise returns 38318c2ecf20Sopenharmony_ci * 0 to tell the loop in handle_stripe_fill to continue 38328c2ecf20Sopenharmony_ci */ 38338c2ecf20Sopenharmony_cistatic int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, 38348c2ecf20Sopenharmony_ci int disk_idx, int disks) 38358c2ecf20Sopenharmony_ci{ 38368c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[disk_idx]; 38378c2ecf20Sopenharmony_ci 38388c2ecf20Sopenharmony_ci /* is the data in this block needed, and can we get it? */ 38398c2ecf20Sopenharmony_ci if (need_this_block(sh, s, disk_idx, disks)) { 38408c2ecf20Sopenharmony_ci /* we would like to get this block, possibly by computing it, 38418c2ecf20Sopenharmony_ci * otherwise read it if the backing disk is insync 38428c2ecf20Sopenharmony_ci */ 38438c2ecf20Sopenharmony_ci BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); 38448c2ecf20Sopenharmony_ci BUG_ON(test_bit(R5_Wantread, &dev->flags)); 38458c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 38468c2ecf20Sopenharmony_ci 38478c2ecf20Sopenharmony_ci /* 38488c2ecf20Sopenharmony_ci * In the raid6 case if the only non-uptodate disk is P 38498c2ecf20Sopenharmony_ci * then we already trusted P to compute the other failed 38508c2ecf20Sopenharmony_ci * drives. It is safe to compute rather than re-read P. 38518c2ecf20Sopenharmony_ci * In other cases we only compute blocks from failed 38528c2ecf20Sopenharmony_ci * devices, otherwise check/repair might fail to detect 38538c2ecf20Sopenharmony_ci * a real inconsistency. 38548c2ecf20Sopenharmony_ci */ 38558c2ecf20Sopenharmony_ci 38568c2ecf20Sopenharmony_ci if ((s->uptodate == disks - 1) && 38578c2ecf20Sopenharmony_ci ((sh->qd_idx >= 0 && sh->pd_idx == disk_idx) || 38588c2ecf20Sopenharmony_ci (s->failed && (disk_idx == s->failed_num[0] || 38598c2ecf20Sopenharmony_ci disk_idx == s->failed_num[1])))) { 38608c2ecf20Sopenharmony_ci /* have disk failed, and we're requested to fetch it; 38618c2ecf20Sopenharmony_ci * do compute it 38628c2ecf20Sopenharmony_ci */ 38638c2ecf20Sopenharmony_ci pr_debug("Computing stripe %llu block %d\n", 38648c2ecf20Sopenharmony_ci (unsigned long long)sh->sector, disk_idx); 38658c2ecf20Sopenharmony_ci set_bit(STRIPE_COMPUTE_RUN, &sh->state); 38668c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); 38678c2ecf20Sopenharmony_ci set_bit(R5_Wantcompute, &dev->flags); 38688c2ecf20Sopenharmony_ci sh->ops.target = disk_idx; 38698c2ecf20Sopenharmony_ci sh->ops.target2 = -1; /* no 2nd target */ 38708c2ecf20Sopenharmony_ci s->req_compute = 1; 38718c2ecf20Sopenharmony_ci /* Careful: from this point on 'uptodate' is in the eye 38728c2ecf20Sopenharmony_ci * of raid_run_ops which services 'compute' operations 38738c2ecf20Sopenharmony_ci * before writes. R5_Wantcompute flags a block that will 38748c2ecf20Sopenharmony_ci * be R5_UPTODATE by the time it is needed for a 38758c2ecf20Sopenharmony_ci * subsequent operation. 38768c2ecf20Sopenharmony_ci */ 38778c2ecf20Sopenharmony_ci s->uptodate++; 38788c2ecf20Sopenharmony_ci return 1; 38798c2ecf20Sopenharmony_ci } else if (s->uptodate == disks-2 && s->failed >= 2) { 38808c2ecf20Sopenharmony_ci /* Computing 2-failure is *very* expensive; only 38818c2ecf20Sopenharmony_ci * do it if failed >= 2 38828c2ecf20Sopenharmony_ci */ 38838c2ecf20Sopenharmony_ci int other; 38848c2ecf20Sopenharmony_ci for (other = disks; other--; ) { 38858c2ecf20Sopenharmony_ci if (other == disk_idx) 38868c2ecf20Sopenharmony_ci continue; 38878c2ecf20Sopenharmony_ci if (!test_bit(R5_UPTODATE, 38888c2ecf20Sopenharmony_ci &sh->dev[other].flags)) 38898c2ecf20Sopenharmony_ci break; 38908c2ecf20Sopenharmony_ci } 38918c2ecf20Sopenharmony_ci BUG_ON(other < 0); 38928c2ecf20Sopenharmony_ci pr_debug("Computing stripe %llu blocks %d,%d\n", 38938c2ecf20Sopenharmony_ci (unsigned long long)sh->sector, 38948c2ecf20Sopenharmony_ci disk_idx, other); 38958c2ecf20Sopenharmony_ci set_bit(STRIPE_COMPUTE_RUN, &sh->state); 38968c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); 38978c2ecf20Sopenharmony_ci set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags); 38988c2ecf20Sopenharmony_ci set_bit(R5_Wantcompute, &sh->dev[other].flags); 38998c2ecf20Sopenharmony_ci sh->ops.target = disk_idx; 39008c2ecf20Sopenharmony_ci sh->ops.target2 = other; 39018c2ecf20Sopenharmony_ci s->uptodate += 2; 39028c2ecf20Sopenharmony_ci s->req_compute = 1; 39038c2ecf20Sopenharmony_ci return 1; 39048c2ecf20Sopenharmony_ci } else if (test_bit(R5_Insync, &dev->flags)) { 39058c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 39068c2ecf20Sopenharmony_ci set_bit(R5_Wantread, &dev->flags); 39078c2ecf20Sopenharmony_ci s->locked++; 39088c2ecf20Sopenharmony_ci pr_debug("Reading block %d (sync=%d)\n", 39098c2ecf20Sopenharmony_ci disk_idx, s->syncing); 39108c2ecf20Sopenharmony_ci } 39118c2ecf20Sopenharmony_ci } 39128c2ecf20Sopenharmony_ci 39138c2ecf20Sopenharmony_ci return 0; 39148c2ecf20Sopenharmony_ci} 39158c2ecf20Sopenharmony_ci 39168c2ecf20Sopenharmony_ci/* 39178c2ecf20Sopenharmony_ci * handle_stripe_fill - read or compute data to satisfy pending requests. 39188c2ecf20Sopenharmony_ci */ 39198c2ecf20Sopenharmony_cistatic void handle_stripe_fill(struct stripe_head *sh, 39208c2ecf20Sopenharmony_ci struct stripe_head_state *s, 39218c2ecf20Sopenharmony_ci int disks) 39228c2ecf20Sopenharmony_ci{ 39238c2ecf20Sopenharmony_ci int i; 39248c2ecf20Sopenharmony_ci 39258c2ecf20Sopenharmony_ci /* look for blocks to read/compute, skip this if a compute 39268c2ecf20Sopenharmony_ci * is already in flight, or if the stripe contents are in the 39278c2ecf20Sopenharmony_ci * midst of changing due to a write 39288c2ecf20Sopenharmony_ci */ 39298c2ecf20Sopenharmony_ci if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && 39308c2ecf20Sopenharmony_ci !sh->reconstruct_state) { 39318c2ecf20Sopenharmony_ci 39328c2ecf20Sopenharmony_ci /* 39338c2ecf20Sopenharmony_ci * For degraded stripe with data in journal, do not handle 39348c2ecf20Sopenharmony_ci * read requests yet, instead, flush the stripe to raid 39358c2ecf20Sopenharmony_ci * disks first, this avoids handling complex rmw of write 39368c2ecf20Sopenharmony_ci * back cache (prexor with orig_page, and then xor with 39378c2ecf20Sopenharmony_ci * page) in the read path 39388c2ecf20Sopenharmony_ci */ 39398c2ecf20Sopenharmony_ci if (s->to_read && s->injournal && s->failed) { 39408c2ecf20Sopenharmony_ci if (test_bit(STRIPE_R5C_CACHING, &sh->state)) 39418c2ecf20Sopenharmony_ci r5c_make_stripe_write_out(sh); 39428c2ecf20Sopenharmony_ci goto out; 39438c2ecf20Sopenharmony_ci } 39448c2ecf20Sopenharmony_ci 39458c2ecf20Sopenharmony_ci for (i = disks; i--; ) 39468c2ecf20Sopenharmony_ci if (fetch_block(sh, s, i, disks)) 39478c2ecf20Sopenharmony_ci break; 39488c2ecf20Sopenharmony_ci } 39498c2ecf20Sopenharmony_ciout: 39508c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 39518c2ecf20Sopenharmony_ci} 39528c2ecf20Sopenharmony_ci 39538c2ecf20Sopenharmony_cistatic void break_stripe_batch_list(struct stripe_head *head_sh, 39548c2ecf20Sopenharmony_ci unsigned long handle_flags); 39558c2ecf20Sopenharmony_ci/* handle_stripe_clean_event 39568c2ecf20Sopenharmony_ci * any written block on an uptodate or failed drive can be returned. 39578c2ecf20Sopenharmony_ci * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but 39588c2ecf20Sopenharmony_ci * never LOCKED, so we don't need to test 'failed' directly. 39598c2ecf20Sopenharmony_ci */ 39608c2ecf20Sopenharmony_cistatic void handle_stripe_clean_event(struct r5conf *conf, 39618c2ecf20Sopenharmony_ci struct stripe_head *sh, int disks) 39628c2ecf20Sopenharmony_ci{ 39638c2ecf20Sopenharmony_ci int i; 39648c2ecf20Sopenharmony_ci struct r5dev *dev; 39658c2ecf20Sopenharmony_ci int discard_pending = 0; 39668c2ecf20Sopenharmony_ci struct stripe_head *head_sh = sh; 39678c2ecf20Sopenharmony_ci bool do_endio = false; 39688c2ecf20Sopenharmony_ci 39698c2ecf20Sopenharmony_ci for (i = disks; i--; ) 39708c2ecf20Sopenharmony_ci if (sh->dev[i].written) { 39718c2ecf20Sopenharmony_ci dev = &sh->dev[i]; 39728c2ecf20Sopenharmony_ci if (!test_bit(R5_LOCKED, &dev->flags) && 39738c2ecf20Sopenharmony_ci (test_bit(R5_UPTODATE, &dev->flags) || 39748c2ecf20Sopenharmony_ci test_bit(R5_Discard, &dev->flags) || 39758c2ecf20Sopenharmony_ci test_bit(R5_SkipCopy, &dev->flags))) { 39768c2ecf20Sopenharmony_ci /* We can return any write requests */ 39778c2ecf20Sopenharmony_ci struct bio *wbi, *wbi2; 39788c2ecf20Sopenharmony_ci pr_debug("Return write for disc %d\n", i); 39798c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_Discard, &dev->flags)) 39808c2ecf20Sopenharmony_ci clear_bit(R5_UPTODATE, &dev->flags); 39818c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) { 39828c2ecf20Sopenharmony_ci WARN_ON(test_bit(R5_UPTODATE, &dev->flags)); 39838c2ecf20Sopenharmony_ci } 39848c2ecf20Sopenharmony_ci do_endio = true; 39858c2ecf20Sopenharmony_ci 39868c2ecf20Sopenharmony_cireturnbi: 39878c2ecf20Sopenharmony_ci dev->page = dev->orig_page; 39888c2ecf20Sopenharmony_ci wbi = dev->written; 39898c2ecf20Sopenharmony_ci dev->written = NULL; 39908c2ecf20Sopenharmony_ci while (wbi && wbi->bi_iter.bi_sector < 39918c2ecf20Sopenharmony_ci dev->sector + RAID5_STRIPE_SECTORS(conf)) { 39928c2ecf20Sopenharmony_ci wbi2 = r5_next_bio(conf, wbi, dev->sector); 39938c2ecf20Sopenharmony_ci md_write_end(conf->mddev); 39948c2ecf20Sopenharmony_ci bio_endio(wbi); 39958c2ecf20Sopenharmony_ci wbi = wbi2; 39968c2ecf20Sopenharmony_ci } 39978c2ecf20Sopenharmony_ci md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, 39988c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 39998c2ecf20Sopenharmony_ci !test_bit(STRIPE_DEGRADED, &sh->state), 40008c2ecf20Sopenharmony_ci 0); 40018c2ecf20Sopenharmony_ci if (head_sh->batch_head) { 40028c2ecf20Sopenharmony_ci sh = list_first_entry(&sh->batch_list, 40038c2ecf20Sopenharmony_ci struct stripe_head, 40048c2ecf20Sopenharmony_ci batch_list); 40058c2ecf20Sopenharmony_ci if (sh != head_sh) { 40068c2ecf20Sopenharmony_ci dev = &sh->dev[i]; 40078c2ecf20Sopenharmony_ci goto returnbi; 40088c2ecf20Sopenharmony_ci } 40098c2ecf20Sopenharmony_ci } 40108c2ecf20Sopenharmony_ci sh = head_sh; 40118c2ecf20Sopenharmony_ci dev = &sh->dev[i]; 40128c2ecf20Sopenharmony_ci } else if (test_bit(R5_Discard, &dev->flags)) 40138c2ecf20Sopenharmony_ci discard_pending = 1; 40148c2ecf20Sopenharmony_ci } 40158c2ecf20Sopenharmony_ci 40168c2ecf20Sopenharmony_ci log_stripe_write_finished(sh); 40178c2ecf20Sopenharmony_ci 40188c2ecf20Sopenharmony_ci if (!discard_pending && 40198c2ecf20Sopenharmony_ci test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { 40208c2ecf20Sopenharmony_ci int hash; 40218c2ecf20Sopenharmony_ci clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); 40228c2ecf20Sopenharmony_ci clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); 40238c2ecf20Sopenharmony_ci if (sh->qd_idx >= 0) { 40248c2ecf20Sopenharmony_ci clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); 40258c2ecf20Sopenharmony_ci clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags); 40268c2ecf20Sopenharmony_ci } 40278c2ecf20Sopenharmony_ci /* now that discard is done we can proceed with any sync */ 40288c2ecf20Sopenharmony_ci clear_bit(STRIPE_DISCARD, &sh->state); 40298c2ecf20Sopenharmony_ci /* 40308c2ecf20Sopenharmony_ci * SCSI discard will change some bio fields and the stripe has 40318c2ecf20Sopenharmony_ci * no updated data, so remove it from hash list and the stripe 40328c2ecf20Sopenharmony_ci * will be reinitialized 40338c2ecf20Sopenharmony_ci */ 40348c2ecf20Sopenharmony_ciunhash: 40358c2ecf20Sopenharmony_ci hash = sh->hash_lock_index; 40368c2ecf20Sopenharmony_ci spin_lock_irq(conf->hash_locks + hash); 40378c2ecf20Sopenharmony_ci remove_hash(sh); 40388c2ecf20Sopenharmony_ci spin_unlock_irq(conf->hash_locks + hash); 40398c2ecf20Sopenharmony_ci if (head_sh->batch_head) { 40408c2ecf20Sopenharmony_ci sh = list_first_entry(&sh->batch_list, 40418c2ecf20Sopenharmony_ci struct stripe_head, batch_list); 40428c2ecf20Sopenharmony_ci if (sh != head_sh) 40438c2ecf20Sopenharmony_ci goto unhash; 40448c2ecf20Sopenharmony_ci } 40458c2ecf20Sopenharmony_ci sh = head_sh; 40468c2ecf20Sopenharmony_ci 40478c2ecf20Sopenharmony_ci if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) 40488c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 40498c2ecf20Sopenharmony_ci 40508c2ecf20Sopenharmony_ci } 40518c2ecf20Sopenharmony_ci 40528c2ecf20Sopenharmony_ci if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) 40538c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&conf->pending_full_writes)) 40548c2ecf20Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 40558c2ecf20Sopenharmony_ci 40568c2ecf20Sopenharmony_ci if (head_sh->batch_head && do_endio) 40578c2ecf20Sopenharmony_ci break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS); 40588c2ecf20Sopenharmony_ci} 40598c2ecf20Sopenharmony_ci 40608c2ecf20Sopenharmony_ci/* 40618c2ecf20Sopenharmony_ci * For RMW in write back cache, we need extra page in prexor to store the 40628c2ecf20Sopenharmony_ci * old data. This page is stored in dev->orig_page. 40638c2ecf20Sopenharmony_ci * 40648c2ecf20Sopenharmony_ci * This function checks whether we have data for prexor. The exact logic 40658c2ecf20Sopenharmony_ci * is: 40668c2ecf20Sopenharmony_ci * R5_UPTODATE && (!R5_InJournal || R5_OrigPageUPTDODATE) 40678c2ecf20Sopenharmony_ci */ 40688c2ecf20Sopenharmony_cistatic inline bool uptodate_for_rmw(struct r5dev *dev) 40698c2ecf20Sopenharmony_ci{ 40708c2ecf20Sopenharmony_ci return (test_bit(R5_UPTODATE, &dev->flags)) && 40718c2ecf20Sopenharmony_ci (!test_bit(R5_InJournal, &dev->flags) || 40728c2ecf20Sopenharmony_ci test_bit(R5_OrigPageUPTDODATE, &dev->flags)); 40738c2ecf20Sopenharmony_ci} 40748c2ecf20Sopenharmony_ci 40758c2ecf20Sopenharmony_cistatic int handle_stripe_dirtying(struct r5conf *conf, 40768c2ecf20Sopenharmony_ci struct stripe_head *sh, 40778c2ecf20Sopenharmony_ci struct stripe_head_state *s, 40788c2ecf20Sopenharmony_ci int disks) 40798c2ecf20Sopenharmony_ci{ 40808c2ecf20Sopenharmony_ci int rmw = 0, rcw = 0, i; 40818c2ecf20Sopenharmony_ci sector_t recovery_cp = conf->mddev->recovery_cp; 40828c2ecf20Sopenharmony_ci 40838c2ecf20Sopenharmony_ci /* Check whether resync is now happening or should start. 40848c2ecf20Sopenharmony_ci * If yes, then the array is dirty (after unclean shutdown or 40858c2ecf20Sopenharmony_ci * initial creation), so parity in some stripes might be inconsistent. 40868c2ecf20Sopenharmony_ci * In this case, we need to always do reconstruct-write, to ensure 40878c2ecf20Sopenharmony_ci * that in case of drive failure or read-error correction, we 40888c2ecf20Sopenharmony_ci * generate correct data from the parity. 40898c2ecf20Sopenharmony_ci */ 40908c2ecf20Sopenharmony_ci if (conf->rmw_level == PARITY_DISABLE_RMW || 40918c2ecf20Sopenharmony_ci (recovery_cp < MaxSector && sh->sector >= recovery_cp && 40928c2ecf20Sopenharmony_ci s->failed == 0)) { 40938c2ecf20Sopenharmony_ci /* Calculate the real rcw later - for now make it 40948c2ecf20Sopenharmony_ci * look like rcw is cheaper 40958c2ecf20Sopenharmony_ci */ 40968c2ecf20Sopenharmony_ci rcw = 1; rmw = 2; 40978c2ecf20Sopenharmony_ci pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n", 40988c2ecf20Sopenharmony_ci conf->rmw_level, (unsigned long long)recovery_cp, 40998c2ecf20Sopenharmony_ci (unsigned long long)sh->sector); 41008c2ecf20Sopenharmony_ci } else for (i = disks; i--; ) { 41018c2ecf20Sopenharmony_ci /* would I have to read this buffer for read_modify_write */ 41028c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 41038c2ecf20Sopenharmony_ci if (((dev->towrite && !delay_towrite(conf, dev, s)) || 41048c2ecf20Sopenharmony_ci i == sh->pd_idx || i == sh->qd_idx || 41058c2ecf20Sopenharmony_ci test_bit(R5_InJournal, &dev->flags)) && 41068c2ecf20Sopenharmony_ci !test_bit(R5_LOCKED, &dev->flags) && 41078c2ecf20Sopenharmony_ci !(uptodate_for_rmw(dev) || 41088c2ecf20Sopenharmony_ci test_bit(R5_Wantcompute, &dev->flags))) { 41098c2ecf20Sopenharmony_ci if (test_bit(R5_Insync, &dev->flags)) 41108c2ecf20Sopenharmony_ci rmw++; 41118c2ecf20Sopenharmony_ci else 41128c2ecf20Sopenharmony_ci rmw += 2*disks; /* cannot read it */ 41138c2ecf20Sopenharmony_ci } 41148c2ecf20Sopenharmony_ci /* Would I have to read this buffer for reconstruct_write */ 41158c2ecf20Sopenharmony_ci if (!test_bit(R5_OVERWRITE, &dev->flags) && 41168c2ecf20Sopenharmony_ci i != sh->pd_idx && i != sh->qd_idx && 41178c2ecf20Sopenharmony_ci !test_bit(R5_LOCKED, &dev->flags) && 41188c2ecf20Sopenharmony_ci !(test_bit(R5_UPTODATE, &dev->flags) || 41198c2ecf20Sopenharmony_ci test_bit(R5_Wantcompute, &dev->flags))) { 41208c2ecf20Sopenharmony_ci if (test_bit(R5_Insync, &dev->flags)) 41218c2ecf20Sopenharmony_ci rcw++; 41228c2ecf20Sopenharmony_ci else 41238c2ecf20Sopenharmony_ci rcw += 2*disks; 41248c2ecf20Sopenharmony_ci } 41258c2ecf20Sopenharmony_ci } 41268c2ecf20Sopenharmony_ci 41278c2ecf20Sopenharmony_ci pr_debug("for sector %llu state 0x%lx, rmw=%d rcw=%d\n", 41288c2ecf20Sopenharmony_ci (unsigned long long)sh->sector, sh->state, rmw, rcw); 41298c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 41308c2ecf20Sopenharmony_ci if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) { 41318c2ecf20Sopenharmony_ci /* prefer read-modify-write, but need to get some data */ 41328c2ecf20Sopenharmony_ci if (conf->mddev->queue) 41338c2ecf20Sopenharmony_ci blk_add_trace_msg(conf->mddev->queue, 41348c2ecf20Sopenharmony_ci "raid5 rmw %llu %d", 41358c2ecf20Sopenharmony_ci (unsigned long long)sh->sector, rmw); 41368c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 41378c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 41388c2ecf20Sopenharmony_ci if (test_bit(R5_InJournal, &dev->flags) && 41398c2ecf20Sopenharmony_ci dev->page == dev->orig_page && 41408c2ecf20Sopenharmony_ci !test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) { 41418c2ecf20Sopenharmony_ci /* alloc page for prexor */ 41428c2ecf20Sopenharmony_ci struct page *p = alloc_page(GFP_NOIO); 41438c2ecf20Sopenharmony_ci 41448c2ecf20Sopenharmony_ci if (p) { 41458c2ecf20Sopenharmony_ci dev->orig_page = p; 41468c2ecf20Sopenharmony_ci continue; 41478c2ecf20Sopenharmony_ci } 41488c2ecf20Sopenharmony_ci 41498c2ecf20Sopenharmony_ci /* 41508c2ecf20Sopenharmony_ci * alloc_page() failed, try use 41518c2ecf20Sopenharmony_ci * disk_info->extra_page 41528c2ecf20Sopenharmony_ci */ 41538c2ecf20Sopenharmony_ci if (!test_and_set_bit(R5C_EXTRA_PAGE_IN_USE, 41548c2ecf20Sopenharmony_ci &conf->cache_state)) { 41558c2ecf20Sopenharmony_ci r5c_use_extra_page(sh); 41568c2ecf20Sopenharmony_ci break; 41578c2ecf20Sopenharmony_ci } 41588c2ecf20Sopenharmony_ci 41598c2ecf20Sopenharmony_ci /* extra_page in use, add to delayed_list */ 41608c2ecf20Sopenharmony_ci set_bit(STRIPE_DELAYED, &sh->state); 41618c2ecf20Sopenharmony_ci s->waiting_extra_page = 1; 41628c2ecf20Sopenharmony_ci return -EAGAIN; 41638c2ecf20Sopenharmony_ci } 41648c2ecf20Sopenharmony_ci } 41658c2ecf20Sopenharmony_ci 41668c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 41678c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 41688c2ecf20Sopenharmony_ci if (((dev->towrite && !delay_towrite(conf, dev, s)) || 41698c2ecf20Sopenharmony_ci i == sh->pd_idx || i == sh->qd_idx || 41708c2ecf20Sopenharmony_ci test_bit(R5_InJournal, &dev->flags)) && 41718c2ecf20Sopenharmony_ci !test_bit(R5_LOCKED, &dev->flags) && 41728c2ecf20Sopenharmony_ci !(uptodate_for_rmw(dev) || 41738c2ecf20Sopenharmony_ci test_bit(R5_Wantcompute, &dev->flags)) && 41748c2ecf20Sopenharmony_ci test_bit(R5_Insync, &dev->flags)) { 41758c2ecf20Sopenharmony_ci if (test_bit(STRIPE_PREREAD_ACTIVE, 41768c2ecf20Sopenharmony_ci &sh->state)) { 41778c2ecf20Sopenharmony_ci pr_debug("Read_old block %d for r-m-w\n", 41788c2ecf20Sopenharmony_ci i); 41798c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 41808c2ecf20Sopenharmony_ci set_bit(R5_Wantread, &dev->flags); 41818c2ecf20Sopenharmony_ci s->locked++; 41828c2ecf20Sopenharmony_ci } else 41838c2ecf20Sopenharmony_ci set_bit(STRIPE_DELAYED, &sh->state); 41848c2ecf20Sopenharmony_ci } 41858c2ecf20Sopenharmony_ci } 41868c2ecf20Sopenharmony_ci } 41878c2ecf20Sopenharmony_ci if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_PREFER_RMW)) && rcw > 0) { 41888c2ecf20Sopenharmony_ci /* want reconstruct write, but need to get some data */ 41898c2ecf20Sopenharmony_ci int qread =0; 41908c2ecf20Sopenharmony_ci rcw = 0; 41918c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 41928c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 41938c2ecf20Sopenharmony_ci if (!test_bit(R5_OVERWRITE, &dev->flags) && 41948c2ecf20Sopenharmony_ci i != sh->pd_idx && i != sh->qd_idx && 41958c2ecf20Sopenharmony_ci !test_bit(R5_LOCKED, &dev->flags) && 41968c2ecf20Sopenharmony_ci !(test_bit(R5_UPTODATE, &dev->flags) || 41978c2ecf20Sopenharmony_ci test_bit(R5_Wantcompute, &dev->flags))) { 41988c2ecf20Sopenharmony_ci rcw++; 41998c2ecf20Sopenharmony_ci if (test_bit(R5_Insync, &dev->flags) && 42008c2ecf20Sopenharmony_ci test_bit(STRIPE_PREREAD_ACTIVE, 42018c2ecf20Sopenharmony_ci &sh->state)) { 42028c2ecf20Sopenharmony_ci pr_debug("Read_old block " 42038c2ecf20Sopenharmony_ci "%d for Reconstruct\n", i); 42048c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 42058c2ecf20Sopenharmony_ci set_bit(R5_Wantread, &dev->flags); 42068c2ecf20Sopenharmony_ci s->locked++; 42078c2ecf20Sopenharmony_ci qread++; 42088c2ecf20Sopenharmony_ci } else 42098c2ecf20Sopenharmony_ci set_bit(STRIPE_DELAYED, &sh->state); 42108c2ecf20Sopenharmony_ci } 42118c2ecf20Sopenharmony_ci } 42128c2ecf20Sopenharmony_ci if (rcw && conf->mddev->queue) 42138c2ecf20Sopenharmony_ci blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", 42148c2ecf20Sopenharmony_ci (unsigned long long)sh->sector, 42158c2ecf20Sopenharmony_ci rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); 42168c2ecf20Sopenharmony_ci } 42178c2ecf20Sopenharmony_ci 42188c2ecf20Sopenharmony_ci if (rcw > disks && rmw > disks && 42198c2ecf20Sopenharmony_ci !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 42208c2ecf20Sopenharmony_ci set_bit(STRIPE_DELAYED, &sh->state); 42218c2ecf20Sopenharmony_ci 42228c2ecf20Sopenharmony_ci /* now if nothing is locked, and if we have enough data, 42238c2ecf20Sopenharmony_ci * we can start a write request 42248c2ecf20Sopenharmony_ci */ 42258c2ecf20Sopenharmony_ci /* since handle_stripe can be called at any time we need to handle the 42268c2ecf20Sopenharmony_ci * case where a compute block operation has been submitted and then a 42278c2ecf20Sopenharmony_ci * subsequent call wants to start a write request. raid_run_ops only 42288c2ecf20Sopenharmony_ci * handles the case where compute block and reconstruct are requested 42298c2ecf20Sopenharmony_ci * simultaneously. If this is not the case then new writes need to be 42308c2ecf20Sopenharmony_ci * held off until the compute completes. 42318c2ecf20Sopenharmony_ci */ 42328c2ecf20Sopenharmony_ci if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && 42338c2ecf20Sopenharmony_ci (s->locked == 0 && (rcw == 0 || rmw == 0) && 42348c2ecf20Sopenharmony_ci !test_bit(STRIPE_BIT_DELAY, &sh->state))) 42358c2ecf20Sopenharmony_ci schedule_reconstruction(sh, s, rcw == 0, 0); 42368c2ecf20Sopenharmony_ci return 0; 42378c2ecf20Sopenharmony_ci} 42388c2ecf20Sopenharmony_ci 42398c2ecf20Sopenharmony_cistatic void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, 42408c2ecf20Sopenharmony_ci struct stripe_head_state *s, int disks) 42418c2ecf20Sopenharmony_ci{ 42428c2ecf20Sopenharmony_ci struct r5dev *dev = NULL; 42438c2ecf20Sopenharmony_ci 42448c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 42458c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 42468c2ecf20Sopenharmony_ci 42478c2ecf20Sopenharmony_ci switch (sh->check_state) { 42488c2ecf20Sopenharmony_ci case check_state_idle: 42498c2ecf20Sopenharmony_ci /* start a new check operation if there are no failures */ 42508c2ecf20Sopenharmony_ci if (s->failed == 0) { 42518c2ecf20Sopenharmony_ci BUG_ON(s->uptodate != disks); 42528c2ecf20Sopenharmony_ci sh->check_state = check_state_run; 42538c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_CHECK, &s->ops_request); 42548c2ecf20Sopenharmony_ci clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); 42558c2ecf20Sopenharmony_ci s->uptodate--; 42568c2ecf20Sopenharmony_ci break; 42578c2ecf20Sopenharmony_ci } 42588c2ecf20Sopenharmony_ci dev = &sh->dev[s->failed_num[0]]; 42598c2ecf20Sopenharmony_ci fallthrough; 42608c2ecf20Sopenharmony_ci case check_state_compute_result: 42618c2ecf20Sopenharmony_ci sh->check_state = check_state_idle; 42628c2ecf20Sopenharmony_ci if (!dev) 42638c2ecf20Sopenharmony_ci dev = &sh->dev[sh->pd_idx]; 42648c2ecf20Sopenharmony_ci 42658c2ecf20Sopenharmony_ci /* check that a write has not made the stripe insync */ 42668c2ecf20Sopenharmony_ci if (test_bit(STRIPE_INSYNC, &sh->state)) 42678c2ecf20Sopenharmony_ci break; 42688c2ecf20Sopenharmony_ci 42698c2ecf20Sopenharmony_ci /* either failed parity check, or recovery is happening */ 42708c2ecf20Sopenharmony_ci BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); 42718c2ecf20Sopenharmony_ci BUG_ON(s->uptodate != disks); 42728c2ecf20Sopenharmony_ci 42738c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 42748c2ecf20Sopenharmony_ci s->locked++; 42758c2ecf20Sopenharmony_ci set_bit(R5_Wantwrite, &dev->flags); 42768c2ecf20Sopenharmony_ci 42778c2ecf20Sopenharmony_ci clear_bit(STRIPE_DEGRADED, &sh->state); 42788c2ecf20Sopenharmony_ci set_bit(STRIPE_INSYNC, &sh->state); 42798c2ecf20Sopenharmony_ci break; 42808c2ecf20Sopenharmony_ci case check_state_run: 42818c2ecf20Sopenharmony_ci break; /* we will be called again upon completion */ 42828c2ecf20Sopenharmony_ci case check_state_check_result: 42838c2ecf20Sopenharmony_ci sh->check_state = check_state_idle; 42848c2ecf20Sopenharmony_ci 42858c2ecf20Sopenharmony_ci /* if a failure occurred during the check operation, leave 42868c2ecf20Sopenharmony_ci * STRIPE_INSYNC not set and let the stripe be handled again 42878c2ecf20Sopenharmony_ci */ 42888c2ecf20Sopenharmony_ci if (s->failed) 42898c2ecf20Sopenharmony_ci break; 42908c2ecf20Sopenharmony_ci 42918c2ecf20Sopenharmony_ci /* handle a successful check operation, if parity is correct 42928c2ecf20Sopenharmony_ci * we are done. Otherwise update the mismatch count and repair 42938c2ecf20Sopenharmony_ci * parity if !MD_RECOVERY_CHECK 42948c2ecf20Sopenharmony_ci */ 42958c2ecf20Sopenharmony_ci if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0) 42968c2ecf20Sopenharmony_ci /* parity is correct (on disc, 42978c2ecf20Sopenharmony_ci * not in buffer any more) 42988c2ecf20Sopenharmony_ci */ 42998c2ecf20Sopenharmony_ci set_bit(STRIPE_INSYNC, &sh->state); 43008c2ecf20Sopenharmony_ci else { 43018c2ecf20Sopenharmony_ci atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches); 43028c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { 43038c2ecf20Sopenharmony_ci /* don't try to repair!! */ 43048c2ecf20Sopenharmony_ci set_bit(STRIPE_INSYNC, &sh->state); 43058c2ecf20Sopenharmony_ci pr_warn_ratelimited("%s: mismatch sector in range " 43068c2ecf20Sopenharmony_ci "%llu-%llu\n", mdname(conf->mddev), 43078c2ecf20Sopenharmony_ci (unsigned long long) sh->sector, 43088c2ecf20Sopenharmony_ci (unsigned long long) sh->sector + 43098c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf)); 43108c2ecf20Sopenharmony_ci } else { 43118c2ecf20Sopenharmony_ci sh->check_state = check_state_compute_run; 43128c2ecf20Sopenharmony_ci set_bit(STRIPE_COMPUTE_RUN, &sh->state); 43138c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); 43148c2ecf20Sopenharmony_ci set_bit(R5_Wantcompute, 43158c2ecf20Sopenharmony_ci &sh->dev[sh->pd_idx].flags); 43168c2ecf20Sopenharmony_ci sh->ops.target = sh->pd_idx; 43178c2ecf20Sopenharmony_ci sh->ops.target2 = -1; 43188c2ecf20Sopenharmony_ci s->uptodate++; 43198c2ecf20Sopenharmony_ci } 43208c2ecf20Sopenharmony_ci } 43218c2ecf20Sopenharmony_ci break; 43228c2ecf20Sopenharmony_ci case check_state_compute_run: 43238c2ecf20Sopenharmony_ci break; 43248c2ecf20Sopenharmony_ci default: 43258c2ecf20Sopenharmony_ci pr_err("%s: unknown check_state: %d sector: %llu\n", 43268c2ecf20Sopenharmony_ci __func__, sh->check_state, 43278c2ecf20Sopenharmony_ci (unsigned long long) sh->sector); 43288c2ecf20Sopenharmony_ci BUG(); 43298c2ecf20Sopenharmony_ci } 43308c2ecf20Sopenharmony_ci} 43318c2ecf20Sopenharmony_ci 43328c2ecf20Sopenharmony_cistatic void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, 43338c2ecf20Sopenharmony_ci struct stripe_head_state *s, 43348c2ecf20Sopenharmony_ci int disks) 43358c2ecf20Sopenharmony_ci{ 43368c2ecf20Sopenharmony_ci int pd_idx = sh->pd_idx; 43378c2ecf20Sopenharmony_ci int qd_idx = sh->qd_idx; 43388c2ecf20Sopenharmony_ci struct r5dev *dev; 43398c2ecf20Sopenharmony_ci 43408c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 43418c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 43428c2ecf20Sopenharmony_ci 43438c2ecf20Sopenharmony_ci BUG_ON(s->failed > 2); 43448c2ecf20Sopenharmony_ci 43458c2ecf20Sopenharmony_ci /* Want to check and possibly repair P and Q. 43468c2ecf20Sopenharmony_ci * However there could be one 'failed' device, in which 43478c2ecf20Sopenharmony_ci * case we can only check one of them, possibly using the 43488c2ecf20Sopenharmony_ci * other to generate missing data 43498c2ecf20Sopenharmony_ci */ 43508c2ecf20Sopenharmony_ci 43518c2ecf20Sopenharmony_ci switch (sh->check_state) { 43528c2ecf20Sopenharmony_ci case check_state_idle: 43538c2ecf20Sopenharmony_ci /* start a new check operation if there are < 2 failures */ 43548c2ecf20Sopenharmony_ci if (s->failed == s->q_failed) { 43558c2ecf20Sopenharmony_ci /* The only possible failed device holds Q, so it 43568c2ecf20Sopenharmony_ci * makes sense to check P (If anything else were failed, 43578c2ecf20Sopenharmony_ci * we would have used P to recreate it). 43588c2ecf20Sopenharmony_ci */ 43598c2ecf20Sopenharmony_ci sh->check_state = check_state_run; 43608c2ecf20Sopenharmony_ci } 43618c2ecf20Sopenharmony_ci if (!s->q_failed && s->failed < 2) { 43628c2ecf20Sopenharmony_ci /* Q is not failed, and we didn't use it to generate 43638c2ecf20Sopenharmony_ci * anything, so it makes sense to check it 43648c2ecf20Sopenharmony_ci */ 43658c2ecf20Sopenharmony_ci if (sh->check_state == check_state_run) 43668c2ecf20Sopenharmony_ci sh->check_state = check_state_run_pq; 43678c2ecf20Sopenharmony_ci else 43688c2ecf20Sopenharmony_ci sh->check_state = check_state_run_q; 43698c2ecf20Sopenharmony_ci } 43708c2ecf20Sopenharmony_ci 43718c2ecf20Sopenharmony_ci /* discard potentially stale zero_sum_result */ 43728c2ecf20Sopenharmony_ci sh->ops.zero_sum_result = 0; 43738c2ecf20Sopenharmony_ci 43748c2ecf20Sopenharmony_ci if (sh->check_state == check_state_run) { 43758c2ecf20Sopenharmony_ci /* async_xor_zero_sum destroys the contents of P */ 43768c2ecf20Sopenharmony_ci clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 43778c2ecf20Sopenharmony_ci s->uptodate--; 43788c2ecf20Sopenharmony_ci } 43798c2ecf20Sopenharmony_ci if (sh->check_state >= check_state_run && 43808c2ecf20Sopenharmony_ci sh->check_state <= check_state_run_pq) { 43818c2ecf20Sopenharmony_ci /* async_syndrome_zero_sum preserves P and Q, so 43828c2ecf20Sopenharmony_ci * no need to mark them !uptodate here 43838c2ecf20Sopenharmony_ci */ 43848c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_CHECK, &s->ops_request); 43858c2ecf20Sopenharmony_ci break; 43868c2ecf20Sopenharmony_ci } 43878c2ecf20Sopenharmony_ci 43888c2ecf20Sopenharmony_ci /* we have 2-disk failure */ 43898c2ecf20Sopenharmony_ci BUG_ON(s->failed != 2); 43908c2ecf20Sopenharmony_ci fallthrough; 43918c2ecf20Sopenharmony_ci case check_state_compute_result: 43928c2ecf20Sopenharmony_ci sh->check_state = check_state_idle; 43938c2ecf20Sopenharmony_ci 43948c2ecf20Sopenharmony_ci /* check that a write has not made the stripe insync */ 43958c2ecf20Sopenharmony_ci if (test_bit(STRIPE_INSYNC, &sh->state)) 43968c2ecf20Sopenharmony_ci break; 43978c2ecf20Sopenharmony_ci 43988c2ecf20Sopenharmony_ci /* now write out any block on a failed drive, 43998c2ecf20Sopenharmony_ci * or P or Q if they were recomputed 44008c2ecf20Sopenharmony_ci */ 44018c2ecf20Sopenharmony_ci dev = NULL; 44028c2ecf20Sopenharmony_ci if (s->failed == 2) { 44038c2ecf20Sopenharmony_ci dev = &sh->dev[s->failed_num[1]]; 44048c2ecf20Sopenharmony_ci s->locked++; 44058c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 44068c2ecf20Sopenharmony_ci set_bit(R5_Wantwrite, &dev->flags); 44078c2ecf20Sopenharmony_ci } 44088c2ecf20Sopenharmony_ci if (s->failed >= 1) { 44098c2ecf20Sopenharmony_ci dev = &sh->dev[s->failed_num[0]]; 44108c2ecf20Sopenharmony_ci s->locked++; 44118c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 44128c2ecf20Sopenharmony_ci set_bit(R5_Wantwrite, &dev->flags); 44138c2ecf20Sopenharmony_ci } 44148c2ecf20Sopenharmony_ci if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { 44158c2ecf20Sopenharmony_ci dev = &sh->dev[pd_idx]; 44168c2ecf20Sopenharmony_ci s->locked++; 44178c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 44188c2ecf20Sopenharmony_ci set_bit(R5_Wantwrite, &dev->flags); 44198c2ecf20Sopenharmony_ci } 44208c2ecf20Sopenharmony_ci if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { 44218c2ecf20Sopenharmony_ci dev = &sh->dev[qd_idx]; 44228c2ecf20Sopenharmony_ci s->locked++; 44238c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 44248c2ecf20Sopenharmony_ci set_bit(R5_Wantwrite, &dev->flags); 44258c2ecf20Sopenharmony_ci } 44268c2ecf20Sopenharmony_ci if (WARN_ONCE(dev && !test_bit(R5_UPTODATE, &dev->flags), 44278c2ecf20Sopenharmony_ci "%s: disk%td not up to date\n", 44288c2ecf20Sopenharmony_ci mdname(conf->mddev), 44298c2ecf20Sopenharmony_ci dev - (struct r5dev *) &sh->dev)) { 44308c2ecf20Sopenharmony_ci clear_bit(R5_LOCKED, &dev->flags); 44318c2ecf20Sopenharmony_ci clear_bit(R5_Wantwrite, &dev->flags); 44328c2ecf20Sopenharmony_ci s->locked--; 44338c2ecf20Sopenharmony_ci } 44348c2ecf20Sopenharmony_ci clear_bit(STRIPE_DEGRADED, &sh->state); 44358c2ecf20Sopenharmony_ci 44368c2ecf20Sopenharmony_ci set_bit(STRIPE_INSYNC, &sh->state); 44378c2ecf20Sopenharmony_ci break; 44388c2ecf20Sopenharmony_ci case check_state_run: 44398c2ecf20Sopenharmony_ci case check_state_run_q: 44408c2ecf20Sopenharmony_ci case check_state_run_pq: 44418c2ecf20Sopenharmony_ci break; /* we will be called again upon completion */ 44428c2ecf20Sopenharmony_ci case check_state_check_result: 44438c2ecf20Sopenharmony_ci sh->check_state = check_state_idle; 44448c2ecf20Sopenharmony_ci 44458c2ecf20Sopenharmony_ci /* handle a successful check operation, if parity is correct 44468c2ecf20Sopenharmony_ci * we are done. Otherwise update the mismatch count and repair 44478c2ecf20Sopenharmony_ci * parity if !MD_RECOVERY_CHECK 44488c2ecf20Sopenharmony_ci */ 44498c2ecf20Sopenharmony_ci if (sh->ops.zero_sum_result == 0) { 44508c2ecf20Sopenharmony_ci /* both parities are correct */ 44518c2ecf20Sopenharmony_ci if (!s->failed) 44528c2ecf20Sopenharmony_ci set_bit(STRIPE_INSYNC, &sh->state); 44538c2ecf20Sopenharmony_ci else { 44548c2ecf20Sopenharmony_ci /* in contrast to the raid5 case we can validate 44558c2ecf20Sopenharmony_ci * parity, but still have a failure to write 44568c2ecf20Sopenharmony_ci * back 44578c2ecf20Sopenharmony_ci */ 44588c2ecf20Sopenharmony_ci sh->check_state = check_state_compute_result; 44598c2ecf20Sopenharmony_ci /* Returning at this point means that we may go 44608c2ecf20Sopenharmony_ci * off and bring p and/or q uptodate again so 44618c2ecf20Sopenharmony_ci * we make sure to check zero_sum_result again 44628c2ecf20Sopenharmony_ci * to verify if p or q need writeback 44638c2ecf20Sopenharmony_ci */ 44648c2ecf20Sopenharmony_ci } 44658c2ecf20Sopenharmony_ci } else { 44668c2ecf20Sopenharmony_ci atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches); 44678c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { 44688c2ecf20Sopenharmony_ci /* don't try to repair!! */ 44698c2ecf20Sopenharmony_ci set_bit(STRIPE_INSYNC, &sh->state); 44708c2ecf20Sopenharmony_ci pr_warn_ratelimited("%s: mismatch sector in range " 44718c2ecf20Sopenharmony_ci "%llu-%llu\n", mdname(conf->mddev), 44728c2ecf20Sopenharmony_ci (unsigned long long) sh->sector, 44738c2ecf20Sopenharmony_ci (unsigned long long) sh->sector + 44748c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf)); 44758c2ecf20Sopenharmony_ci } else { 44768c2ecf20Sopenharmony_ci int *target = &sh->ops.target; 44778c2ecf20Sopenharmony_ci 44788c2ecf20Sopenharmony_ci sh->ops.target = -1; 44798c2ecf20Sopenharmony_ci sh->ops.target2 = -1; 44808c2ecf20Sopenharmony_ci sh->check_state = check_state_compute_run; 44818c2ecf20Sopenharmony_ci set_bit(STRIPE_COMPUTE_RUN, &sh->state); 44828c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); 44838c2ecf20Sopenharmony_ci if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { 44848c2ecf20Sopenharmony_ci set_bit(R5_Wantcompute, 44858c2ecf20Sopenharmony_ci &sh->dev[pd_idx].flags); 44868c2ecf20Sopenharmony_ci *target = pd_idx; 44878c2ecf20Sopenharmony_ci target = &sh->ops.target2; 44888c2ecf20Sopenharmony_ci s->uptodate++; 44898c2ecf20Sopenharmony_ci } 44908c2ecf20Sopenharmony_ci if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { 44918c2ecf20Sopenharmony_ci set_bit(R5_Wantcompute, 44928c2ecf20Sopenharmony_ci &sh->dev[qd_idx].flags); 44938c2ecf20Sopenharmony_ci *target = qd_idx; 44948c2ecf20Sopenharmony_ci s->uptodate++; 44958c2ecf20Sopenharmony_ci } 44968c2ecf20Sopenharmony_ci } 44978c2ecf20Sopenharmony_ci } 44988c2ecf20Sopenharmony_ci break; 44998c2ecf20Sopenharmony_ci case check_state_compute_run: 45008c2ecf20Sopenharmony_ci break; 45018c2ecf20Sopenharmony_ci default: 45028c2ecf20Sopenharmony_ci pr_warn("%s: unknown check_state: %d sector: %llu\n", 45038c2ecf20Sopenharmony_ci __func__, sh->check_state, 45048c2ecf20Sopenharmony_ci (unsigned long long) sh->sector); 45058c2ecf20Sopenharmony_ci BUG(); 45068c2ecf20Sopenharmony_ci } 45078c2ecf20Sopenharmony_ci} 45088c2ecf20Sopenharmony_ci 45098c2ecf20Sopenharmony_cistatic void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) 45108c2ecf20Sopenharmony_ci{ 45118c2ecf20Sopenharmony_ci int i; 45128c2ecf20Sopenharmony_ci 45138c2ecf20Sopenharmony_ci /* We have read all the blocks in this stripe and now we need to 45148c2ecf20Sopenharmony_ci * copy some of them into a target stripe for expand. 45158c2ecf20Sopenharmony_ci */ 45168c2ecf20Sopenharmony_ci struct dma_async_tx_descriptor *tx = NULL; 45178c2ecf20Sopenharmony_ci BUG_ON(sh->batch_head); 45188c2ecf20Sopenharmony_ci clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); 45198c2ecf20Sopenharmony_ci for (i = 0; i < sh->disks; i++) 45208c2ecf20Sopenharmony_ci if (i != sh->pd_idx && i != sh->qd_idx) { 45218c2ecf20Sopenharmony_ci int dd_idx, j; 45228c2ecf20Sopenharmony_ci struct stripe_head *sh2; 45238c2ecf20Sopenharmony_ci struct async_submit_ctl submit; 45248c2ecf20Sopenharmony_ci 45258c2ecf20Sopenharmony_ci sector_t bn = raid5_compute_blocknr(sh, i, 1); 45268c2ecf20Sopenharmony_ci sector_t s = raid5_compute_sector(conf, bn, 0, 45278c2ecf20Sopenharmony_ci &dd_idx, NULL); 45288c2ecf20Sopenharmony_ci sh2 = raid5_get_active_stripe(conf, s, 0, 1, 1); 45298c2ecf20Sopenharmony_ci if (sh2 == NULL) 45308c2ecf20Sopenharmony_ci /* so far only the early blocks of this stripe 45318c2ecf20Sopenharmony_ci * have been requested. When later blocks 45328c2ecf20Sopenharmony_ci * get requested, we will try again 45338c2ecf20Sopenharmony_ci */ 45348c2ecf20Sopenharmony_ci continue; 45358c2ecf20Sopenharmony_ci if (!test_bit(STRIPE_EXPANDING, &sh2->state) || 45368c2ecf20Sopenharmony_ci test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) { 45378c2ecf20Sopenharmony_ci /* must have already done this block */ 45388c2ecf20Sopenharmony_ci raid5_release_stripe(sh2); 45398c2ecf20Sopenharmony_ci continue; 45408c2ecf20Sopenharmony_ci } 45418c2ecf20Sopenharmony_ci 45428c2ecf20Sopenharmony_ci /* place all the copies on one channel */ 45438c2ecf20Sopenharmony_ci init_async_submit(&submit, 0, tx, NULL, NULL, NULL); 45448c2ecf20Sopenharmony_ci tx = async_memcpy(sh2->dev[dd_idx].page, 45458c2ecf20Sopenharmony_ci sh->dev[i].page, sh2->dev[dd_idx].offset, 45468c2ecf20Sopenharmony_ci sh->dev[i].offset, RAID5_STRIPE_SIZE(conf), 45478c2ecf20Sopenharmony_ci &submit); 45488c2ecf20Sopenharmony_ci 45498c2ecf20Sopenharmony_ci set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); 45508c2ecf20Sopenharmony_ci set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); 45518c2ecf20Sopenharmony_ci for (j = 0; j < conf->raid_disks; j++) 45528c2ecf20Sopenharmony_ci if (j != sh2->pd_idx && 45538c2ecf20Sopenharmony_ci j != sh2->qd_idx && 45548c2ecf20Sopenharmony_ci !test_bit(R5_Expanded, &sh2->dev[j].flags)) 45558c2ecf20Sopenharmony_ci break; 45568c2ecf20Sopenharmony_ci if (j == conf->raid_disks) { 45578c2ecf20Sopenharmony_ci set_bit(STRIPE_EXPAND_READY, &sh2->state); 45588c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh2->state); 45598c2ecf20Sopenharmony_ci } 45608c2ecf20Sopenharmony_ci raid5_release_stripe(sh2); 45618c2ecf20Sopenharmony_ci 45628c2ecf20Sopenharmony_ci } 45638c2ecf20Sopenharmony_ci /* done submitting copies, wait for them to complete */ 45648c2ecf20Sopenharmony_ci async_tx_quiesce(&tx); 45658c2ecf20Sopenharmony_ci} 45668c2ecf20Sopenharmony_ci 45678c2ecf20Sopenharmony_ci/* 45688c2ecf20Sopenharmony_ci * handle_stripe - do things to a stripe. 45698c2ecf20Sopenharmony_ci * 45708c2ecf20Sopenharmony_ci * We lock the stripe by setting STRIPE_ACTIVE and then examine the 45718c2ecf20Sopenharmony_ci * state of various bits to see what needs to be done. 45728c2ecf20Sopenharmony_ci * Possible results: 45738c2ecf20Sopenharmony_ci * return some read requests which now have data 45748c2ecf20Sopenharmony_ci * return some write requests which are safely on storage 45758c2ecf20Sopenharmony_ci * schedule a read on some buffers 45768c2ecf20Sopenharmony_ci * schedule a write of some buffers 45778c2ecf20Sopenharmony_ci * return confirmation of parity correctness 45788c2ecf20Sopenharmony_ci * 45798c2ecf20Sopenharmony_ci */ 45808c2ecf20Sopenharmony_ci 45818c2ecf20Sopenharmony_cistatic void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) 45828c2ecf20Sopenharmony_ci{ 45838c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 45848c2ecf20Sopenharmony_ci int disks = sh->disks; 45858c2ecf20Sopenharmony_ci struct r5dev *dev; 45868c2ecf20Sopenharmony_ci int i; 45878c2ecf20Sopenharmony_ci int do_recovery = 0; 45888c2ecf20Sopenharmony_ci 45898c2ecf20Sopenharmony_ci memset(s, 0, sizeof(*s)); 45908c2ecf20Sopenharmony_ci 45918c2ecf20Sopenharmony_ci s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state) && !sh->batch_head; 45928c2ecf20Sopenharmony_ci s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state) && !sh->batch_head; 45938c2ecf20Sopenharmony_ci s->failed_num[0] = -1; 45948c2ecf20Sopenharmony_ci s->failed_num[1] = -1; 45958c2ecf20Sopenharmony_ci s->log_failed = r5l_log_disk_error(conf); 45968c2ecf20Sopenharmony_ci 45978c2ecf20Sopenharmony_ci /* Now to look around and see what can be done */ 45988c2ecf20Sopenharmony_ci rcu_read_lock(); 45998c2ecf20Sopenharmony_ci for (i=disks; i--; ) { 46008c2ecf20Sopenharmony_ci struct md_rdev *rdev; 46018c2ecf20Sopenharmony_ci sector_t first_bad; 46028c2ecf20Sopenharmony_ci int bad_sectors; 46038c2ecf20Sopenharmony_ci int is_bad = 0; 46048c2ecf20Sopenharmony_ci 46058c2ecf20Sopenharmony_ci dev = &sh->dev[i]; 46068c2ecf20Sopenharmony_ci 46078c2ecf20Sopenharmony_ci pr_debug("check %d: state 0x%lx read %p write %p written %p\n", 46088c2ecf20Sopenharmony_ci i, dev->flags, 46098c2ecf20Sopenharmony_ci dev->toread, dev->towrite, dev->written); 46108c2ecf20Sopenharmony_ci /* maybe we can reply to a read 46118c2ecf20Sopenharmony_ci * 46128c2ecf20Sopenharmony_ci * new wantfill requests are only permitted while 46138c2ecf20Sopenharmony_ci * ops_complete_biofill is guaranteed to be inactive 46148c2ecf20Sopenharmony_ci */ 46158c2ecf20Sopenharmony_ci if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && 46168c2ecf20Sopenharmony_ci !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) 46178c2ecf20Sopenharmony_ci set_bit(R5_Wantfill, &dev->flags); 46188c2ecf20Sopenharmony_ci 46198c2ecf20Sopenharmony_ci /* now count some things */ 46208c2ecf20Sopenharmony_ci if (test_bit(R5_LOCKED, &dev->flags)) 46218c2ecf20Sopenharmony_ci s->locked++; 46228c2ecf20Sopenharmony_ci if (test_bit(R5_UPTODATE, &dev->flags)) 46238c2ecf20Sopenharmony_ci s->uptodate++; 46248c2ecf20Sopenharmony_ci if (test_bit(R5_Wantcompute, &dev->flags)) { 46258c2ecf20Sopenharmony_ci s->compute++; 46268c2ecf20Sopenharmony_ci BUG_ON(s->compute > 2); 46278c2ecf20Sopenharmony_ci } 46288c2ecf20Sopenharmony_ci 46298c2ecf20Sopenharmony_ci if (test_bit(R5_Wantfill, &dev->flags)) 46308c2ecf20Sopenharmony_ci s->to_fill++; 46318c2ecf20Sopenharmony_ci else if (dev->toread) 46328c2ecf20Sopenharmony_ci s->to_read++; 46338c2ecf20Sopenharmony_ci if (dev->towrite) { 46348c2ecf20Sopenharmony_ci s->to_write++; 46358c2ecf20Sopenharmony_ci if (!test_bit(R5_OVERWRITE, &dev->flags)) 46368c2ecf20Sopenharmony_ci s->non_overwrite++; 46378c2ecf20Sopenharmony_ci } 46388c2ecf20Sopenharmony_ci if (dev->written) 46398c2ecf20Sopenharmony_ci s->written++; 46408c2ecf20Sopenharmony_ci /* Prefer to use the replacement for reads, but only 46418c2ecf20Sopenharmony_ci * if it is recovered enough and has no bad blocks. 46428c2ecf20Sopenharmony_ci */ 46438c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->disks[i].replacement); 46448c2ecf20Sopenharmony_ci if (rdev && !test_bit(Faulty, &rdev->flags) && 46458c2ecf20Sopenharmony_ci rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) && 46468c2ecf20Sopenharmony_ci !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 46478c2ecf20Sopenharmony_ci &first_bad, &bad_sectors)) 46488c2ecf20Sopenharmony_ci set_bit(R5_ReadRepl, &dev->flags); 46498c2ecf20Sopenharmony_ci else { 46508c2ecf20Sopenharmony_ci if (rdev && !test_bit(Faulty, &rdev->flags)) 46518c2ecf20Sopenharmony_ci set_bit(R5_NeedReplace, &dev->flags); 46528c2ecf20Sopenharmony_ci else 46538c2ecf20Sopenharmony_ci clear_bit(R5_NeedReplace, &dev->flags); 46548c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->disks[i].rdev); 46558c2ecf20Sopenharmony_ci clear_bit(R5_ReadRepl, &dev->flags); 46568c2ecf20Sopenharmony_ci } 46578c2ecf20Sopenharmony_ci if (rdev && test_bit(Faulty, &rdev->flags)) 46588c2ecf20Sopenharmony_ci rdev = NULL; 46598c2ecf20Sopenharmony_ci if (rdev) { 46608c2ecf20Sopenharmony_ci is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 46618c2ecf20Sopenharmony_ci &first_bad, &bad_sectors); 46628c2ecf20Sopenharmony_ci if (s->blocked_rdev == NULL 46638c2ecf20Sopenharmony_ci && (test_bit(Blocked, &rdev->flags) 46648c2ecf20Sopenharmony_ci || is_bad < 0)) { 46658c2ecf20Sopenharmony_ci if (is_bad < 0) 46668c2ecf20Sopenharmony_ci set_bit(BlockedBadBlocks, 46678c2ecf20Sopenharmony_ci &rdev->flags); 46688c2ecf20Sopenharmony_ci s->blocked_rdev = rdev; 46698c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 46708c2ecf20Sopenharmony_ci } 46718c2ecf20Sopenharmony_ci } 46728c2ecf20Sopenharmony_ci clear_bit(R5_Insync, &dev->flags); 46738c2ecf20Sopenharmony_ci if (!rdev) 46748c2ecf20Sopenharmony_ci /* Not in-sync */; 46758c2ecf20Sopenharmony_ci else if (is_bad) { 46768c2ecf20Sopenharmony_ci /* also not in-sync */ 46778c2ecf20Sopenharmony_ci if (!test_bit(WriteErrorSeen, &rdev->flags) && 46788c2ecf20Sopenharmony_ci test_bit(R5_UPTODATE, &dev->flags)) { 46798c2ecf20Sopenharmony_ci /* treat as in-sync, but with a read error 46808c2ecf20Sopenharmony_ci * which we can now try to correct 46818c2ecf20Sopenharmony_ci */ 46828c2ecf20Sopenharmony_ci set_bit(R5_Insync, &dev->flags); 46838c2ecf20Sopenharmony_ci set_bit(R5_ReadError, &dev->flags); 46848c2ecf20Sopenharmony_ci } 46858c2ecf20Sopenharmony_ci } else if (test_bit(In_sync, &rdev->flags)) 46868c2ecf20Sopenharmony_ci set_bit(R5_Insync, &dev->flags); 46878c2ecf20Sopenharmony_ci else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset) 46888c2ecf20Sopenharmony_ci /* in sync if before recovery_offset */ 46898c2ecf20Sopenharmony_ci set_bit(R5_Insync, &dev->flags); 46908c2ecf20Sopenharmony_ci else if (test_bit(R5_UPTODATE, &dev->flags) && 46918c2ecf20Sopenharmony_ci test_bit(R5_Expanded, &dev->flags)) 46928c2ecf20Sopenharmony_ci /* If we've reshaped into here, we assume it is Insync. 46938c2ecf20Sopenharmony_ci * We will shortly update recovery_offset to make 46948c2ecf20Sopenharmony_ci * it official. 46958c2ecf20Sopenharmony_ci */ 46968c2ecf20Sopenharmony_ci set_bit(R5_Insync, &dev->flags); 46978c2ecf20Sopenharmony_ci 46988c2ecf20Sopenharmony_ci if (test_bit(R5_WriteError, &dev->flags)) { 46998c2ecf20Sopenharmony_ci /* This flag does not apply to '.replacement' 47008c2ecf20Sopenharmony_ci * only to .rdev, so make sure to check that*/ 47018c2ecf20Sopenharmony_ci struct md_rdev *rdev2 = rcu_dereference( 47028c2ecf20Sopenharmony_ci conf->disks[i].rdev); 47038c2ecf20Sopenharmony_ci if (rdev2 == rdev) 47048c2ecf20Sopenharmony_ci clear_bit(R5_Insync, &dev->flags); 47058c2ecf20Sopenharmony_ci if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { 47068c2ecf20Sopenharmony_ci s->handle_bad_blocks = 1; 47078c2ecf20Sopenharmony_ci atomic_inc(&rdev2->nr_pending); 47088c2ecf20Sopenharmony_ci } else 47098c2ecf20Sopenharmony_ci clear_bit(R5_WriteError, &dev->flags); 47108c2ecf20Sopenharmony_ci } 47118c2ecf20Sopenharmony_ci if (test_bit(R5_MadeGood, &dev->flags)) { 47128c2ecf20Sopenharmony_ci /* This flag does not apply to '.replacement' 47138c2ecf20Sopenharmony_ci * only to .rdev, so make sure to check that*/ 47148c2ecf20Sopenharmony_ci struct md_rdev *rdev2 = rcu_dereference( 47158c2ecf20Sopenharmony_ci conf->disks[i].rdev); 47168c2ecf20Sopenharmony_ci if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { 47178c2ecf20Sopenharmony_ci s->handle_bad_blocks = 1; 47188c2ecf20Sopenharmony_ci atomic_inc(&rdev2->nr_pending); 47198c2ecf20Sopenharmony_ci } else 47208c2ecf20Sopenharmony_ci clear_bit(R5_MadeGood, &dev->flags); 47218c2ecf20Sopenharmony_ci } 47228c2ecf20Sopenharmony_ci if (test_bit(R5_MadeGoodRepl, &dev->flags)) { 47238c2ecf20Sopenharmony_ci struct md_rdev *rdev2 = rcu_dereference( 47248c2ecf20Sopenharmony_ci conf->disks[i].replacement); 47258c2ecf20Sopenharmony_ci if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { 47268c2ecf20Sopenharmony_ci s->handle_bad_blocks = 1; 47278c2ecf20Sopenharmony_ci atomic_inc(&rdev2->nr_pending); 47288c2ecf20Sopenharmony_ci } else 47298c2ecf20Sopenharmony_ci clear_bit(R5_MadeGoodRepl, &dev->flags); 47308c2ecf20Sopenharmony_ci } 47318c2ecf20Sopenharmony_ci if (!test_bit(R5_Insync, &dev->flags)) { 47328c2ecf20Sopenharmony_ci /* The ReadError flag will just be confusing now */ 47338c2ecf20Sopenharmony_ci clear_bit(R5_ReadError, &dev->flags); 47348c2ecf20Sopenharmony_ci clear_bit(R5_ReWrite, &dev->flags); 47358c2ecf20Sopenharmony_ci } 47368c2ecf20Sopenharmony_ci if (test_bit(R5_ReadError, &dev->flags)) 47378c2ecf20Sopenharmony_ci clear_bit(R5_Insync, &dev->flags); 47388c2ecf20Sopenharmony_ci if (!test_bit(R5_Insync, &dev->flags)) { 47398c2ecf20Sopenharmony_ci if (s->failed < 2) 47408c2ecf20Sopenharmony_ci s->failed_num[s->failed] = i; 47418c2ecf20Sopenharmony_ci s->failed++; 47428c2ecf20Sopenharmony_ci if (rdev && !test_bit(Faulty, &rdev->flags)) 47438c2ecf20Sopenharmony_ci do_recovery = 1; 47448c2ecf20Sopenharmony_ci else if (!rdev) { 47458c2ecf20Sopenharmony_ci rdev = rcu_dereference( 47468c2ecf20Sopenharmony_ci conf->disks[i].replacement); 47478c2ecf20Sopenharmony_ci if (rdev && !test_bit(Faulty, &rdev->flags)) 47488c2ecf20Sopenharmony_ci do_recovery = 1; 47498c2ecf20Sopenharmony_ci } 47508c2ecf20Sopenharmony_ci } 47518c2ecf20Sopenharmony_ci 47528c2ecf20Sopenharmony_ci if (test_bit(R5_InJournal, &dev->flags)) 47538c2ecf20Sopenharmony_ci s->injournal++; 47548c2ecf20Sopenharmony_ci if (test_bit(R5_InJournal, &dev->flags) && dev->written) 47558c2ecf20Sopenharmony_ci s->just_cached++; 47568c2ecf20Sopenharmony_ci } 47578c2ecf20Sopenharmony_ci if (test_bit(STRIPE_SYNCING, &sh->state)) { 47588c2ecf20Sopenharmony_ci /* If there is a failed device being replaced, 47598c2ecf20Sopenharmony_ci * we must be recovering. 47608c2ecf20Sopenharmony_ci * else if we are after recovery_cp, we must be syncing 47618c2ecf20Sopenharmony_ci * else if MD_RECOVERY_REQUESTED is set, we also are syncing. 47628c2ecf20Sopenharmony_ci * else we can only be replacing 47638c2ecf20Sopenharmony_ci * sync and recovery both need to read all devices, and so 47648c2ecf20Sopenharmony_ci * use the same flag. 47658c2ecf20Sopenharmony_ci */ 47668c2ecf20Sopenharmony_ci if (do_recovery || 47678c2ecf20Sopenharmony_ci sh->sector >= conf->mddev->recovery_cp || 47688c2ecf20Sopenharmony_ci test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery))) 47698c2ecf20Sopenharmony_ci s->syncing = 1; 47708c2ecf20Sopenharmony_ci else 47718c2ecf20Sopenharmony_ci s->replacing = 1; 47728c2ecf20Sopenharmony_ci } 47738c2ecf20Sopenharmony_ci rcu_read_unlock(); 47748c2ecf20Sopenharmony_ci} 47758c2ecf20Sopenharmony_ci 47768c2ecf20Sopenharmony_ci/* 47778c2ecf20Sopenharmony_ci * Return '1' if this is a member of batch, or '0' if it is a lone stripe or 47788c2ecf20Sopenharmony_ci * a head which can now be handled. 47798c2ecf20Sopenharmony_ci */ 47808c2ecf20Sopenharmony_cistatic int clear_batch_ready(struct stripe_head *sh) 47818c2ecf20Sopenharmony_ci{ 47828c2ecf20Sopenharmony_ci struct stripe_head *tmp; 47838c2ecf20Sopenharmony_ci if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state)) 47848c2ecf20Sopenharmony_ci return (sh->batch_head && sh->batch_head != sh); 47858c2ecf20Sopenharmony_ci spin_lock(&sh->stripe_lock); 47868c2ecf20Sopenharmony_ci if (!sh->batch_head) { 47878c2ecf20Sopenharmony_ci spin_unlock(&sh->stripe_lock); 47888c2ecf20Sopenharmony_ci return 0; 47898c2ecf20Sopenharmony_ci } 47908c2ecf20Sopenharmony_ci 47918c2ecf20Sopenharmony_ci /* 47928c2ecf20Sopenharmony_ci * this stripe could be added to a batch list before we check 47938c2ecf20Sopenharmony_ci * BATCH_READY, skips it 47948c2ecf20Sopenharmony_ci */ 47958c2ecf20Sopenharmony_ci if (sh->batch_head != sh) { 47968c2ecf20Sopenharmony_ci spin_unlock(&sh->stripe_lock); 47978c2ecf20Sopenharmony_ci return 1; 47988c2ecf20Sopenharmony_ci } 47998c2ecf20Sopenharmony_ci spin_lock(&sh->batch_lock); 48008c2ecf20Sopenharmony_ci list_for_each_entry(tmp, &sh->batch_list, batch_list) 48018c2ecf20Sopenharmony_ci clear_bit(STRIPE_BATCH_READY, &tmp->state); 48028c2ecf20Sopenharmony_ci spin_unlock(&sh->batch_lock); 48038c2ecf20Sopenharmony_ci spin_unlock(&sh->stripe_lock); 48048c2ecf20Sopenharmony_ci 48058c2ecf20Sopenharmony_ci /* 48068c2ecf20Sopenharmony_ci * BATCH_READY is cleared, no new stripes can be added. 48078c2ecf20Sopenharmony_ci * batch_list can be accessed without lock 48088c2ecf20Sopenharmony_ci */ 48098c2ecf20Sopenharmony_ci return 0; 48108c2ecf20Sopenharmony_ci} 48118c2ecf20Sopenharmony_ci 48128c2ecf20Sopenharmony_cistatic void break_stripe_batch_list(struct stripe_head *head_sh, 48138c2ecf20Sopenharmony_ci unsigned long handle_flags) 48148c2ecf20Sopenharmony_ci{ 48158c2ecf20Sopenharmony_ci struct stripe_head *sh, *next; 48168c2ecf20Sopenharmony_ci int i; 48178c2ecf20Sopenharmony_ci int do_wakeup = 0; 48188c2ecf20Sopenharmony_ci 48198c2ecf20Sopenharmony_ci list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) { 48208c2ecf20Sopenharmony_ci 48218c2ecf20Sopenharmony_ci list_del_init(&sh->batch_list); 48228c2ecf20Sopenharmony_ci 48238c2ecf20Sopenharmony_ci WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | 48248c2ecf20Sopenharmony_ci (1 << STRIPE_SYNCING) | 48258c2ecf20Sopenharmony_ci (1 << STRIPE_REPLACED) | 48268c2ecf20Sopenharmony_ci (1 << STRIPE_DELAYED) | 48278c2ecf20Sopenharmony_ci (1 << STRIPE_BIT_DELAY) | 48288c2ecf20Sopenharmony_ci (1 << STRIPE_FULL_WRITE) | 48298c2ecf20Sopenharmony_ci (1 << STRIPE_BIOFILL_RUN) | 48308c2ecf20Sopenharmony_ci (1 << STRIPE_COMPUTE_RUN) | 48318c2ecf20Sopenharmony_ci (1 << STRIPE_DISCARD) | 48328c2ecf20Sopenharmony_ci (1 << STRIPE_BATCH_READY) | 48338c2ecf20Sopenharmony_ci (1 << STRIPE_BATCH_ERR) | 48348c2ecf20Sopenharmony_ci (1 << STRIPE_BITMAP_PENDING)), 48358c2ecf20Sopenharmony_ci "stripe state: %lx\n", sh->state); 48368c2ecf20Sopenharmony_ci WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | 48378c2ecf20Sopenharmony_ci (1 << STRIPE_REPLACED)), 48388c2ecf20Sopenharmony_ci "head stripe state: %lx\n", head_sh->state); 48398c2ecf20Sopenharmony_ci 48408c2ecf20Sopenharmony_ci set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | 48418c2ecf20Sopenharmony_ci (1 << STRIPE_PREREAD_ACTIVE) | 48428c2ecf20Sopenharmony_ci (1 << STRIPE_DEGRADED) | 48438c2ecf20Sopenharmony_ci (1 << STRIPE_ON_UNPLUG_LIST)), 48448c2ecf20Sopenharmony_ci head_sh->state & (1 << STRIPE_INSYNC)); 48458c2ecf20Sopenharmony_ci 48468c2ecf20Sopenharmony_ci sh->check_state = head_sh->check_state; 48478c2ecf20Sopenharmony_ci sh->reconstruct_state = head_sh->reconstruct_state; 48488c2ecf20Sopenharmony_ci spin_lock_irq(&sh->stripe_lock); 48498c2ecf20Sopenharmony_ci sh->batch_head = NULL; 48508c2ecf20Sopenharmony_ci spin_unlock_irq(&sh->stripe_lock); 48518c2ecf20Sopenharmony_ci for (i = 0; i < sh->disks; i++) { 48528c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 48538c2ecf20Sopenharmony_ci do_wakeup = 1; 48548c2ecf20Sopenharmony_ci sh->dev[i].flags = head_sh->dev[i].flags & 48558c2ecf20Sopenharmony_ci (~((1 << R5_WriteError) | (1 << R5_Overlap))); 48568c2ecf20Sopenharmony_ci } 48578c2ecf20Sopenharmony_ci if (handle_flags == 0 || 48588c2ecf20Sopenharmony_ci sh->state & handle_flags) 48598c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 48608c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 48618c2ecf20Sopenharmony_ci } 48628c2ecf20Sopenharmony_ci spin_lock_irq(&head_sh->stripe_lock); 48638c2ecf20Sopenharmony_ci head_sh->batch_head = NULL; 48648c2ecf20Sopenharmony_ci spin_unlock_irq(&head_sh->stripe_lock); 48658c2ecf20Sopenharmony_ci for (i = 0; i < head_sh->disks; i++) 48668c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) 48678c2ecf20Sopenharmony_ci do_wakeup = 1; 48688c2ecf20Sopenharmony_ci if (head_sh->state & handle_flags) 48698c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &head_sh->state); 48708c2ecf20Sopenharmony_ci 48718c2ecf20Sopenharmony_ci if (do_wakeup) 48728c2ecf20Sopenharmony_ci wake_up(&head_sh->raid_conf->wait_for_overlap); 48738c2ecf20Sopenharmony_ci} 48748c2ecf20Sopenharmony_ci 48758c2ecf20Sopenharmony_cistatic void handle_stripe(struct stripe_head *sh) 48768c2ecf20Sopenharmony_ci{ 48778c2ecf20Sopenharmony_ci struct stripe_head_state s; 48788c2ecf20Sopenharmony_ci struct r5conf *conf = sh->raid_conf; 48798c2ecf20Sopenharmony_ci int i; 48808c2ecf20Sopenharmony_ci int prexor; 48818c2ecf20Sopenharmony_ci int disks = sh->disks; 48828c2ecf20Sopenharmony_ci struct r5dev *pdev, *qdev; 48838c2ecf20Sopenharmony_ci 48848c2ecf20Sopenharmony_ci clear_bit(STRIPE_HANDLE, &sh->state); 48858c2ecf20Sopenharmony_ci 48868c2ecf20Sopenharmony_ci /* 48878c2ecf20Sopenharmony_ci * handle_stripe should not continue handle the batched stripe, only 48888c2ecf20Sopenharmony_ci * the head of batch list or lone stripe can continue. Otherwise we 48898c2ecf20Sopenharmony_ci * could see break_stripe_batch_list warns about the STRIPE_ACTIVE 48908c2ecf20Sopenharmony_ci * is set for the batched stripe. 48918c2ecf20Sopenharmony_ci */ 48928c2ecf20Sopenharmony_ci if (clear_batch_ready(sh)) 48938c2ecf20Sopenharmony_ci return; 48948c2ecf20Sopenharmony_ci 48958c2ecf20Sopenharmony_ci if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) { 48968c2ecf20Sopenharmony_ci /* already being handled, ensure it gets handled 48978c2ecf20Sopenharmony_ci * again when current action finishes */ 48988c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 48998c2ecf20Sopenharmony_ci return; 49008c2ecf20Sopenharmony_ci } 49018c2ecf20Sopenharmony_ci 49028c2ecf20Sopenharmony_ci if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) 49038c2ecf20Sopenharmony_ci break_stripe_batch_list(sh, 0); 49048c2ecf20Sopenharmony_ci 49058c2ecf20Sopenharmony_ci if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { 49068c2ecf20Sopenharmony_ci spin_lock(&sh->stripe_lock); 49078c2ecf20Sopenharmony_ci /* 49088c2ecf20Sopenharmony_ci * Cannot process 'sync' concurrently with 'discard'. 49098c2ecf20Sopenharmony_ci * Flush data in r5cache before 'sync'. 49108c2ecf20Sopenharmony_ci */ 49118c2ecf20Sopenharmony_ci if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) && 49128c2ecf20Sopenharmony_ci !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) && 49138c2ecf20Sopenharmony_ci !test_bit(STRIPE_DISCARD, &sh->state) && 49148c2ecf20Sopenharmony_ci test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { 49158c2ecf20Sopenharmony_ci set_bit(STRIPE_SYNCING, &sh->state); 49168c2ecf20Sopenharmony_ci clear_bit(STRIPE_INSYNC, &sh->state); 49178c2ecf20Sopenharmony_ci clear_bit(STRIPE_REPLACED, &sh->state); 49188c2ecf20Sopenharmony_ci } 49198c2ecf20Sopenharmony_ci spin_unlock(&sh->stripe_lock); 49208c2ecf20Sopenharmony_ci } 49218c2ecf20Sopenharmony_ci clear_bit(STRIPE_DELAYED, &sh->state); 49228c2ecf20Sopenharmony_ci 49238c2ecf20Sopenharmony_ci pr_debug("handling stripe %llu, state=%#lx cnt=%d, " 49248c2ecf20Sopenharmony_ci "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", 49258c2ecf20Sopenharmony_ci (unsigned long long)sh->sector, sh->state, 49268c2ecf20Sopenharmony_ci atomic_read(&sh->count), sh->pd_idx, sh->qd_idx, 49278c2ecf20Sopenharmony_ci sh->check_state, sh->reconstruct_state); 49288c2ecf20Sopenharmony_ci 49298c2ecf20Sopenharmony_ci analyse_stripe(sh, &s); 49308c2ecf20Sopenharmony_ci 49318c2ecf20Sopenharmony_ci if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) 49328c2ecf20Sopenharmony_ci goto finish; 49338c2ecf20Sopenharmony_ci 49348c2ecf20Sopenharmony_ci if (s.handle_bad_blocks || 49358c2ecf20Sopenharmony_ci test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) { 49368c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 49378c2ecf20Sopenharmony_ci goto finish; 49388c2ecf20Sopenharmony_ci } 49398c2ecf20Sopenharmony_ci 49408c2ecf20Sopenharmony_ci if (unlikely(s.blocked_rdev)) { 49418c2ecf20Sopenharmony_ci if (s.syncing || s.expanding || s.expanded || 49428c2ecf20Sopenharmony_ci s.replacing || s.to_write || s.written) { 49438c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 49448c2ecf20Sopenharmony_ci goto finish; 49458c2ecf20Sopenharmony_ci } 49468c2ecf20Sopenharmony_ci /* There is nothing for the blocked_rdev to block */ 49478c2ecf20Sopenharmony_ci rdev_dec_pending(s.blocked_rdev, conf->mddev); 49488c2ecf20Sopenharmony_ci s.blocked_rdev = NULL; 49498c2ecf20Sopenharmony_ci } 49508c2ecf20Sopenharmony_ci 49518c2ecf20Sopenharmony_ci if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { 49528c2ecf20Sopenharmony_ci set_bit(STRIPE_OP_BIOFILL, &s.ops_request); 49538c2ecf20Sopenharmony_ci set_bit(STRIPE_BIOFILL_RUN, &sh->state); 49548c2ecf20Sopenharmony_ci } 49558c2ecf20Sopenharmony_ci 49568c2ecf20Sopenharmony_ci pr_debug("locked=%d uptodate=%d to_read=%d" 49578c2ecf20Sopenharmony_ci " to_write=%d failed=%d failed_num=%d,%d\n", 49588c2ecf20Sopenharmony_ci s.locked, s.uptodate, s.to_read, s.to_write, s.failed, 49598c2ecf20Sopenharmony_ci s.failed_num[0], s.failed_num[1]); 49608c2ecf20Sopenharmony_ci /* 49618c2ecf20Sopenharmony_ci * check if the array has lost more than max_degraded devices and, 49628c2ecf20Sopenharmony_ci * if so, some requests might need to be failed. 49638c2ecf20Sopenharmony_ci * 49648c2ecf20Sopenharmony_ci * When journal device failed (log_failed), we will only process 49658c2ecf20Sopenharmony_ci * the stripe if there is data need write to raid disks 49668c2ecf20Sopenharmony_ci */ 49678c2ecf20Sopenharmony_ci if (s.failed > conf->max_degraded || 49688c2ecf20Sopenharmony_ci (s.log_failed && s.injournal == 0)) { 49698c2ecf20Sopenharmony_ci sh->check_state = 0; 49708c2ecf20Sopenharmony_ci sh->reconstruct_state = 0; 49718c2ecf20Sopenharmony_ci break_stripe_batch_list(sh, 0); 49728c2ecf20Sopenharmony_ci if (s.to_read+s.to_write+s.written) 49738c2ecf20Sopenharmony_ci handle_failed_stripe(conf, sh, &s, disks); 49748c2ecf20Sopenharmony_ci if (s.syncing + s.replacing) 49758c2ecf20Sopenharmony_ci handle_failed_sync(conf, sh, &s); 49768c2ecf20Sopenharmony_ci } 49778c2ecf20Sopenharmony_ci 49788c2ecf20Sopenharmony_ci /* Now we check to see if any write operations have recently 49798c2ecf20Sopenharmony_ci * completed 49808c2ecf20Sopenharmony_ci */ 49818c2ecf20Sopenharmony_ci prexor = 0; 49828c2ecf20Sopenharmony_ci if (sh->reconstruct_state == reconstruct_state_prexor_drain_result) 49838c2ecf20Sopenharmony_ci prexor = 1; 49848c2ecf20Sopenharmony_ci if (sh->reconstruct_state == reconstruct_state_drain_result || 49858c2ecf20Sopenharmony_ci sh->reconstruct_state == reconstruct_state_prexor_drain_result) { 49868c2ecf20Sopenharmony_ci sh->reconstruct_state = reconstruct_state_idle; 49878c2ecf20Sopenharmony_ci 49888c2ecf20Sopenharmony_ci /* All the 'written' buffers and the parity block are ready to 49898c2ecf20Sopenharmony_ci * be written back to disk 49908c2ecf20Sopenharmony_ci */ 49918c2ecf20Sopenharmony_ci BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags) && 49928c2ecf20Sopenharmony_ci !test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)); 49938c2ecf20Sopenharmony_ci BUG_ON(sh->qd_idx >= 0 && 49948c2ecf20Sopenharmony_ci !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags) && 49958c2ecf20Sopenharmony_ci !test_bit(R5_Discard, &sh->dev[sh->qd_idx].flags)); 49968c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 49978c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 49988c2ecf20Sopenharmony_ci if (test_bit(R5_LOCKED, &dev->flags) && 49998c2ecf20Sopenharmony_ci (i == sh->pd_idx || i == sh->qd_idx || 50008c2ecf20Sopenharmony_ci dev->written || test_bit(R5_InJournal, 50018c2ecf20Sopenharmony_ci &dev->flags))) { 50028c2ecf20Sopenharmony_ci pr_debug("Writing block %d\n", i); 50038c2ecf20Sopenharmony_ci set_bit(R5_Wantwrite, &dev->flags); 50048c2ecf20Sopenharmony_ci if (prexor) 50058c2ecf20Sopenharmony_ci continue; 50068c2ecf20Sopenharmony_ci if (s.failed > 1) 50078c2ecf20Sopenharmony_ci continue; 50088c2ecf20Sopenharmony_ci if (!test_bit(R5_Insync, &dev->flags) || 50098c2ecf20Sopenharmony_ci ((i == sh->pd_idx || i == sh->qd_idx) && 50108c2ecf20Sopenharmony_ci s.failed == 0)) 50118c2ecf20Sopenharmony_ci set_bit(STRIPE_INSYNC, &sh->state); 50128c2ecf20Sopenharmony_ci } 50138c2ecf20Sopenharmony_ci } 50148c2ecf20Sopenharmony_ci if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 50158c2ecf20Sopenharmony_ci s.dec_preread_active = 1; 50168c2ecf20Sopenharmony_ci } 50178c2ecf20Sopenharmony_ci 50188c2ecf20Sopenharmony_ci /* 50198c2ecf20Sopenharmony_ci * might be able to return some write requests if the parity blocks 50208c2ecf20Sopenharmony_ci * are safe, or on a failed drive 50218c2ecf20Sopenharmony_ci */ 50228c2ecf20Sopenharmony_ci pdev = &sh->dev[sh->pd_idx]; 50238c2ecf20Sopenharmony_ci s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx) 50248c2ecf20Sopenharmony_ci || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx); 50258c2ecf20Sopenharmony_ci qdev = &sh->dev[sh->qd_idx]; 50268c2ecf20Sopenharmony_ci s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx) 50278c2ecf20Sopenharmony_ci || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx) 50288c2ecf20Sopenharmony_ci || conf->level < 6; 50298c2ecf20Sopenharmony_ci 50308c2ecf20Sopenharmony_ci if (s.written && 50318c2ecf20Sopenharmony_ci (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) 50328c2ecf20Sopenharmony_ci && !test_bit(R5_LOCKED, &pdev->flags) 50338c2ecf20Sopenharmony_ci && (test_bit(R5_UPTODATE, &pdev->flags) || 50348c2ecf20Sopenharmony_ci test_bit(R5_Discard, &pdev->flags))))) && 50358c2ecf20Sopenharmony_ci (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) 50368c2ecf20Sopenharmony_ci && !test_bit(R5_LOCKED, &qdev->flags) 50378c2ecf20Sopenharmony_ci && (test_bit(R5_UPTODATE, &qdev->flags) || 50388c2ecf20Sopenharmony_ci test_bit(R5_Discard, &qdev->flags)))))) 50398c2ecf20Sopenharmony_ci handle_stripe_clean_event(conf, sh, disks); 50408c2ecf20Sopenharmony_ci 50418c2ecf20Sopenharmony_ci if (s.just_cached) 50428c2ecf20Sopenharmony_ci r5c_handle_cached_data_endio(conf, sh, disks); 50438c2ecf20Sopenharmony_ci log_stripe_write_finished(sh); 50448c2ecf20Sopenharmony_ci 50458c2ecf20Sopenharmony_ci /* Now we might consider reading some blocks, either to check/generate 50468c2ecf20Sopenharmony_ci * parity, or to satisfy requests 50478c2ecf20Sopenharmony_ci * or to load a block that is being partially written. 50488c2ecf20Sopenharmony_ci */ 50498c2ecf20Sopenharmony_ci if (s.to_read || s.non_overwrite 50508c2ecf20Sopenharmony_ci || (s.to_write && s.failed) 50518c2ecf20Sopenharmony_ci || (s.syncing && (s.uptodate + s.compute < disks)) 50528c2ecf20Sopenharmony_ci || s.replacing 50538c2ecf20Sopenharmony_ci || s.expanding) 50548c2ecf20Sopenharmony_ci handle_stripe_fill(sh, &s, disks); 50558c2ecf20Sopenharmony_ci 50568c2ecf20Sopenharmony_ci /* 50578c2ecf20Sopenharmony_ci * When the stripe finishes full journal write cycle (write to journal 50588c2ecf20Sopenharmony_ci * and raid disk), this is the clean up procedure so it is ready for 50598c2ecf20Sopenharmony_ci * next operation. 50608c2ecf20Sopenharmony_ci */ 50618c2ecf20Sopenharmony_ci r5c_finish_stripe_write_out(conf, sh, &s); 50628c2ecf20Sopenharmony_ci 50638c2ecf20Sopenharmony_ci /* 50648c2ecf20Sopenharmony_ci * Now to consider new write requests, cache write back and what else, 50658c2ecf20Sopenharmony_ci * if anything should be read. We do not handle new writes when: 50668c2ecf20Sopenharmony_ci * 1/ A 'write' operation (copy+xor) is already in flight. 50678c2ecf20Sopenharmony_ci * 2/ A 'check' operation is in flight, as it may clobber the parity 50688c2ecf20Sopenharmony_ci * block. 50698c2ecf20Sopenharmony_ci * 3/ A r5c cache log write is in flight. 50708c2ecf20Sopenharmony_ci */ 50718c2ecf20Sopenharmony_ci 50728c2ecf20Sopenharmony_ci if (!sh->reconstruct_state && !sh->check_state && !sh->log_io) { 50738c2ecf20Sopenharmony_ci if (!r5c_is_writeback(conf->log)) { 50748c2ecf20Sopenharmony_ci if (s.to_write) 50758c2ecf20Sopenharmony_ci handle_stripe_dirtying(conf, sh, &s, disks); 50768c2ecf20Sopenharmony_ci } else { /* write back cache */ 50778c2ecf20Sopenharmony_ci int ret = 0; 50788c2ecf20Sopenharmony_ci 50798c2ecf20Sopenharmony_ci /* First, try handle writes in caching phase */ 50808c2ecf20Sopenharmony_ci if (s.to_write) 50818c2ecf20Sopenharmony_ci ret = r5c_try_caching_write(conf, sh, &s, 50828c2ecf20Sopenharmony_ci disks); 50838c2ecf20Sopenharmony_ci /* 50848c2ecf20Sopenharmony_ci * If caching phase failed: ret == -EAGAIN 50858c2ecf20Sopenharmony_ci * OR 50868c2ecf20Sopenharmony_ci * stripe under reclaim: !caching && injournal 50878c2ecf20Sopenharmony_ci * 50888c2ecf20Sopenharmony_ci * fall back to handle_stripe_dirtying() 50898c2ecf20Sopenharmony_ci */ 50908c2ecf20Sopenharmony_ci if (ret == -EAGAIN || 50918c2ecf20Sopenharmony_ci /* stripe under reclaim: !caching && injournal */ 50928c2ecf20Sopenharmony_ci (!test_bit(STRIPE_R5C_CACHING, &sh->state) && 50938c2ecf20Sopenharmony_ci s.injournal > 0)) { 50948c2ecf20Sopenharmony_ci ret = handle_stripe_dirtying(conf, sh, &s, 50958c2ecf20Sopenharmony_ci disks); 50968c2ecf20Sopenharmony_ci if (ret == -EAGAIN) 50978c2ecf20Sopenharmony_ci goto finish; 50988c2ecf20Sopenharmony_ci } 50998c2ecf20Sopenharmony_ci } 51008c2ecf20Sopenharmony_ci } 51018c2ecf20Sopenharmony_ci 51028c2ecf20Sopenharmony_ci /* maybe we need to check and possibly fix the parity for this stripe 51038c2ecf20Sopenharmony_ci * Any reads will already have been scheduled, so we just see if enough 51048c2ecf20Sopenharmony_ci * data is available. The parity check is held off while parity 51058c2ecf20Sopenharmony_ci * dependent operations are in flight. 51068c2ecf20Sopenharmony_ci */ 51078c2ecf20Sopenharmony_ci if (sh->check_state || 51088c2ecf20Sopenharmony_ci (s.syncing && s.locked == 0 && 51098c2ecf20Sopenharmony_ci !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && 51108c2ecf20Sopenharmony_ci !test_bit(STRIPE_INSYNC, &sh->state))) { 51118c2ecf20Sopenharmony_ci if (conf->level == 6) 51128c2ecf20Sopenharmony_ci handle_parity_checks6(conf, sh, &s, disks); 51138c2ecf20Sopenharmony_ci else 51148c2ecf20Sopenharmony_ci handle_parity_checks5(conf, sh, &s, disks); 51158c2ecf20Sopenharmony_ci } 51168c2ecf20Sopenharmony_ci 51178c2ecf20Sopenharmony_ci if ((s.replacing || s.syncing) && s.locked == 0 51188c2ecf20Sopenharmony_ci && !test_bit(STRIPE_COMPUTE_RUN, &sh->state) 51198c2ecf20Sopenharmony_ci && !test_bit(STRIPE_REPLACED, &sh->state)) { 51208c2ecf20Sopenharmony_ci /* Write out to replacement devices where possible */ 51218c2ecf20Sopenharmony_ci for (i = 0; i < conf->raid_disks; i++) 51228c2ecf20Sopenharmony_ci if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) { 51238c2ecf20Sopenharmony_ci WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags)); 51248c2ecf20Sopenharmony_ci set_bit(R5_WantReplace, &sh->dev[i].flags); 51258c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &sh->dev[i].flags); 51268c2ecf20Sopenharmony_ci s.locked++; 51278c2ecf20Sopenharmony_ci } 51288c2ecf20Sopenharmony_ci if (s.replacing) 51298c2ecf20Sopenharmony_ci set_bit(STRIPE_INSYNC, &sh->state); 51308c2ecf20Sopenharmony_ci set_bit(STRIPE_REPLACED, &sh->state); 51318c2ecf20Sopenharmony_ci } 51328c2ecf20Sopenharmony_ci if ((s.syncing || s.replacing) && s.locked == 0 && 51338c2ecf20Sopenharmony_ci !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && 51348c2ecf20Sopenharmony_ci test_bit(STRIPE_INSYNC, &sh->state)) { 51358c2ecf20Sopenharmony_ci md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1); 51368c2ecf20Sopenharmony_ci clear_bit(STRIPE_SYNCING, &sh->state); 51378c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) 51388c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_overlap); 51398c2ecf20Sopenharmony_ci } 51408c2ecf20Sopenharmony_ci 51418c2ecf20Sopenharmony_ci /* If the failed drives are just a ReadError, then we might need 51428c2ecf20Sopenharmony_ci * to progress the repair/check process 51438c2ecf20Sopenharmony_ci */ 51448c2ecf20Sopenharmony_ci if (s.failed <= conf->max_degraded && !conf->mddev->ro) 51458c2ecf20Sopenharmony_ci for (i = 0; i < s.failed; i++) { 51468c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[s.failed_num[i]]; 51478c2ecf20Sopenharmony_ci if (test_bit(R5_ReadError, &dev->flags) 51488c2ecf20Sopenharmony_ci && !test_bit(R5_LOCKED, &dev->flags) 51498c2ecf20Sopenharmony_ci && test_bit(R5_UPTODATE, &dev->flags) 51508c2ecf20Sopenharmony_ci ) { 51518c2ecf20Sopenharmony_ci if (!test_bit(R5_ReWrite, &dev->flags)) { 51528c2ecf20Sopenharmony_ci set_bit(R5_Wantwrite, &dev->flags); 51538c2ecf20Sopenharmony_ci set_bit(R5_ReWrite, &dev->flags); 51548c2ecf20Sopenharmony_ci } else 51558c2ecf20Sopenharmony_ci /* let's read it back */ 51568c2ecf20Sopenharmony_ci set_bit(R5_Wantread, &dev->flags); 51578c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &dev->flags); 51588c2ecf20Sopenharmony_ci s.locked++; 51598c2ecf20Sopenharmony_ci } 51608c2ecf20Sopenharmony_ci } 51618c2ecf20Sopenharmony_ci 51628c2ecf20Sopenharmony_ci /* Finish reconstruct operations initiated by the expansion process */ 51638c2ecf20Sopenharmony_ci if (sh->reconstruct_state == reconstruct_state_result) { 51648c2ecf20Sopenharmony_ci struct stripe_head *sh_src 51658c2ecf20Sopenharmony_ci = raid5_get_active_stripe(conf, sh->sector, 1, 1, 1); 51668c2ecf20Sopenharmony_ci if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) { 51678c2ecf20Sopenharmony_ci /* sh cannot be written until sh_src has been read. 51688c2ecf20Sopenharmony_ci * so arrange for sh to be delayed a little 51698c2ecf20Sopenharmony_ci */ 51708c2ecf20Sopenharmony_ci set_bit(STRIPE_DELAYED, &sh->state); 51718c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 51728c2ecf20Sopenharmony_ci if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, 51738c2ecf20Sopenharmony_ci &sh_src->state)) 51748c2ecf20Sopenharmony_ci atomic_inc(&conf->preread_active_stripes); 51758c2ecf20Sopenharmony_ci raid5_release_stripe(sh_src); 51768c2ecf20Sopenharmony_ci goto finish; 51778c2ecf20Sopenharmony_ci } 51788c2ecf20Sopenharmony_ci if (sh_src) 51798c2ecf20Sopenharmony_ci raid5_release_stripe(sh_src); 51808c2ecf20Sopenharmony_ci 51818c2ecf20Sopenharmony_ci sh->reconstruct_state = reconstruct_state_idle; 51828c2ecf20Sopenharmony_ci clear_bit(STRIPE_EXPANDING, &sh->state); 51838c2ecf20Sopenharmony_ci for (i = conf->raid_disks; i--; ) { 51848c2ecf20Sopenharmony_ci set_bit(R5_Wantwrite, &sh->dev[i].flags); 51858c2ecf20Sopenharmony_ci set_bit(R5_LOCKED, &sh->dev[i].flags); 51868c2ecf20Sopenharmony_ci s.locked++; 51878c2ecf20Sopenharmony_ci } 51888c2ecf20Sopenharmony_ci } 51898c2ecf20Sopenharmony_ci 51908c2ecf20Sopenharmony_ci if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && 51918c2ecf20Sopenharmony_ci !sh->reconstruct_state) { 51928c2ecf20Sopenharmony_ci /* Need to write out all blocks after computing parity */ 51938c2ecf20Sopenharmony_ci sh->disks = conf->raid_disks; 51948c2ecf20Sopenharmony_ci stripe_set_idx(sh->sector, conf, 0, sh); 51958c2ecf20Sopenharmony_ci schedule_reconstruction(sh, &s, 1, 1); 51968c2ecf20Sopenharmony_ci } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { 51978c2ecf20Sopenharmony_ci clear_bit(STRIPE_EXPAND_READY, &sh->state); 51988c2ecf20Sopenharmony_ci atomic_dec(&conf->reshape_stripes); 51998c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_overlap); 52008c2ecf20Sopenharmony_ci md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1); 52018c2ecf20Sopenharmony_ci } 52028c2ecf20Sopenharmony_ci 52038c2ecf20Sopenharmony_ci if (s.expanding && s.locked == 0 && 52048c2ecf20Sopenharmony_ci !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) 52058c2ecf20Sopenharmony_ci handle_stripe_expansion(conf, sh); 52068c2ecf20Sopenharmony_ci 52078c2ecf20Sopenharmony_cifinish: 52088c2ecf20Sopenharmony_ci /* wait for this device to become unblocked */ 52098c2ecf20Sopenharmony_ci if (unlikely(s.blocked_rdev)) { 52108c2ecf20Sopenharmony_ci if (conf->mddev->external) 52118c2ecf20Sopenharmony_ci md_wait_for_blocked_rdev(s.blocked_rdev, 52128c2ecf20Sopenharmony_ci conf->mddev); 52138c2ecf20Sopenharmony_ci else 52148c2ecf20Sopenharmony_ci /* Internal metadata will immediately 52158c2ecf20Sopenharmony_ci * be written by raid5d, so we don't 52168c2ecf20Sopenharmony_ci * need to wait here. 52178c2ecf20Sopenharmony_ci */ 52188c2ecf20Sopenharmony_ci rdev_dec_pending(s.blocked_rdev, 52198c2ecf20Sopenharmony_ci conf->mddev); 52208c2ecf20Sopenharmony_ci } 52218c2ecf20Sopenharmony_ci 52228c2ecf20Sopenharmony_ci if (s.handle_bad_blocks) 52238c2ecf20Sopenharmony_ci for (i = disks; i--; ) { 52248c2ecf20Sopenharmony_ci struct md_rdev *rdev; 52258c2ecf20Sopenharmony_ci struct r5dev *dev = &sh->dev[i]; 52268c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_WriteError, &dev->flags)) { 52278c2ecf20Sopenharmony_ci /* We own a safe reference to the rdev */ 52288c2ecf20Sopenharmony_ci rdev = conf->disks[i].rdev; 52298c2ecf20Sopenharmony_ci if (!rdev_set_badblocks(rdev, sh->sector, 52308c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 0)) 52318c2ecf20Sopenharmony_ci md_error(conf->mddev, rdev); 52328c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 52338c2ecf20Sopenharmony_ci } 52348c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { 52358c2ecf20Sopenharmony_ci rdev = conf->disks[i].rdev; 52368c2ecf20Sopenharmony_ci rdev_clear_badblocks(rdev, sh->sector, 52378c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 0); 52388c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 52398c2ecf20Sopenharmony_ci } 52408c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { 52418c2ecf20Sopenharmony_ci rdev = conf->disks[i].replacement; 52428c2ecf20Sopenharmony_ci if (!rdev) 52438c2ecf20Sopenharmony_ci /* rdev have been moved down */ 52448c2ecf20Sopenharmony_ci rdev = conf->disks[i].rdev; 52458c2ecf20Sopenharmony_ci rdev_clear_badblocks(rdev, sh->sector, 52468c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 0); 52478c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 52488c2ecf20Sopenharmony_ci } 52498c2ecf20Sopenharmony_ci } 52508c2ecf20Sopenharmony_ci 52518c2ecf20Sopenharmony_ci if (s.ops_request) 52528c2ecf20Sopenharmony_ci raid_run_ops(sh, s.ops_request); 52538c2ecf20Sopenharmony_ci 52548c2ecf20Sopenharmony_ci ops_run_io(sh, &s); 52558c2ecf20Sopenharmony_ci 52568c2ecf20Sopenharmony_ci if (s.dec_preread_active) { 52578c2ecf20Sopenharmony_ci /* We delay this until after ops_run_io so that if make_request 52588c2ecf20Sopenharmony_ci * is waiting on a flush, it won't continue until the writes 52598c2ecf20Sopenharmony_ci * have actually been submitted. 52608c2ecf20Sopenharmony_ci */ 52618c2ecf20Sopenharmony_ci atomic_dec(&conf->preread_active_stripes); 52628c2ecf20Sopenharmony_ci if (atomic_read(&conf->preread_active_stripes) < 52638c2ecf20Sopenharmony_ci IO_THRESHOLD) 52648c2ecf20Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 52658c2ecf20Sopenharmony_ci } 52668c2ecf20Sopenharmony_ci 52678c2ecf20Sopenharmony_ci clear_bit_unlock(STRIPE_ACTIVE, &sh->state); 52688c2ecf20Sopenharmony_ci} 52698c2ecf20Sopenharmony_ci 52708c2ecf20Sopenharmony_cistatic void raid5_activate_delayed(struct r5conf *conf) 52718c2ecf20Sopenharmony_ci{ 52728c2ecf20Sopenharmony_ci if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { 52738c2ecf20Sopenharmony_ci while (!list_empty(&conf->delayed_list)) { 52748c2ecf20Sopenharmony_ci struct list_head *l = conf->delayed_list.next; 52758c2ecf20Sopenharmony_ci struct stripe_head *sh; 52768c2ecf20Sopenharmony_ci sh = list_entry(l, struct stripe_head, lru); 52778c2ecf20Sopenharmony_ci list_del_init(l); 52788c2ecf20Sopenharmony_ci clear_bit(STRIPE_DELAYED, &sh->state); 52798c2ecf20Sopenharmony_ci if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 52808c2ecf20Sopenharmony_ci atomic_inc(&conf->preread_active_stripes); 52818c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, &conf->hold_list); 52828c2ecf20Sopenharmony_ci raid5_wakeup_stripe_thread(sh); 52838c2ecf20Sopenharmony_ci } 52848c2ecf20Sopenharmony_ci } 52858c2ecf20Sopenharmony_ci} 52868c2ecf20Sopenharmony_ci 52878c2ecf20Sopenharmony_cistatic void activate_bit_delay(struct r5conf *conf, 52888c2ecf20Sopenharmony_ci struct list_head *temp_inactive_list) 52898c2ecf20Sopenharmony_ci{ 52908c2ecf20Sopenharmony_ci /* device_lock is held */ 52918c2ecf20Sopenharmony_ci struct list_head head; 52928c2ecf20Sopenharmony_ci list_add(&head, &conf->bitmap_list); 52938c2ecf20Sopenharmony_ci list_del_init(&conf->bitmap_list); 52948c2ecf20Sopenharmony_ci while (!list_empty(&head)) { 52958c2ecf20Sopenharmony_ci struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru); 52968c2ecf20Sopenharmony_ci int hash; 52978c2ecf20Sopenharmony_ci list_del_init(&sh->lru); 52988c2ecf20Sopenharmony_ci atomic_inc(&sh->count); 52998c2ecf20Sopenharmony_ci hash = sh->hash_lock_index; 53008c2ecf20Sopenharmony_ci __release_stripe(conf, sh, &temp_inactive_list[hash]); 53018c2ecf20Sopenharmony_ci } 53028c2ecf20Sopenharmony_ci} 53038c2ecf20Sopenharmony_ci 53048c2ecf20Sopenharmony_cistatic int in_chunk_boundary(struct mddev *mddev, struct bio *bio) 53058c2ecf20Sopenharmony_ci{ 53068c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 53078c2ecf20Sopenharmony_ci sector_t sector = bio->bi_iter.bi_sector; 53088c2ecf20Sopenharmony_ci unsigned int chunk_sectors; 53098c2ecf20Sopenharmony_ci unsigned int bio_sectors = bio_sectors(bio); 53108c2ecf20Sopenharmony_ci 53118c2ecf20Sopenharmony_ci WARN_ON_ONCE(bio->bi_partno); 53128c2ecf20Sopenharmony_ci 53138c2ecf20Sopenharmony_ci chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors); 53148c2ecf20Sopenharmony_ci return chunk_sectors >= 53158c2ecf20Sopenharmony_ci ((sector & (chunk_sectors - 1)) + bio_sectors); 53168c2ecf20Sopenharmony_ci} 53178c2ecf20Sopenharmony_ci 53188c2ecf20Sopenharmony_ci/* 53198c2ecf20Sopenharmony_ci * add bio to the retry LIFO ( in O(1) ... we are in interrupt ) 53208c2ecf20Sopenharmony_ci * later sampled by raid5d. 53218c2ecf20Sopenharmony_ci */ 53228c2ecf20Sopenharmony_cistatic void add_bio_to_retry(struct bio *bi,struct r5conf *conf) 53238c2ecf20Sopenharmony_ci{ 53248c2ecf20Sopenharmony_ci unsigned long flags; 53258c2ecf20Sopenharmony_ci 53268c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 53278c2ecf20Sopenharmony_ci 53288c2ecf20Sopenharmony_ci bi->bi_next = conf->retry_read_aligned_list; 53298c2ecf20Sopenharmony_ci conf->retry_read_aligned_list = bi; 53308c2ecf20Sopenharmony_ci 53318c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 53328c2ecf20Sopenharmony_ci md_wakeup_thread(conf->mddev->thread); 53338c2ecf20Sopenharmony_ci} 53348c2ecf20Sopenharmony_ci 53358c2ecf20Sopenharmony_cistatic struct bio *remove_bio_from_retry(struct r5conf *conf, 53368c2ecf20Sopenharmony_ci unsigned int *offset) 53378c2ecf20Sopenharmony_ci{ 53388c2ecf20Sopenharmony_ci struct bio *bi; 53398c2ecf20Sopenharmony_ci 53408c2ecf20Sopenharmony_ci bi = conf->retry_read_aligned; 53418c2ecf20Sopenharmony_ci if (bi) { 53428c2ecf20Sopenharmony_ci *offset = conf->retry_read_offset; 53438c2ecf20Sopenharmony_ci conf->retry_read_aligned = NULL; 53448c2ecf20Sopenharmony_ci return bi; 53458c2ecf20Sopenharmony_ci } 53468c2ecf20Sopenharmony_ci bi = conf->retry_read_aligned_list; 53478c2ecf20Sopenharmony_ci if(bi) { 53488c2ecf20Sopenharmony_ci conf->retry_read_aligned_list = bi->bi_next; 53498c2ecf20Sopenharmony_ci bi->bi_next = NULL; 53508c2ecf20Sopenharmony_ci *offset = 0; 53518c2ecf20Sopenharmony_ci } 53528c2ecf20Sopenharmony_ci 53538c2ecf20Sopenharmony_ci return bi; 53548c2ecf20Sopenharmony_ci} 53558c2ecf20Sopenharmony_ci 53568c2ecf20Sopenharmony_ci/* 53578c2ecf20Sopenharmony_ci * The "raid5_align_endio" should check if the read succeeded and if it 53588c2ecf20Sopenharmony_ci * did, call bio_endio on the original bio (having bio_put the new bio 53598c2ecf20Sopenharmony_ci * first). 53608c2ecf20Sopenharmony_ci * If the read failed.. 53618c2ecf20Sopenharmony_ci */ 53628c2ecf20Sopenharmony_cistatic void raid5_align_endio(struct bio *bi) 53638c2ecf20Sopenharmony_ci{ 53648c2ecf20Sopenharmony_ci struct bio* raid_bi = bi->bi_private; 53658c2ecf20Sopenharmony_ci struct mddev *mddev; 53668c2ecf20Sopenharmony_ci struct r5conf *conf; 53678c2ecf20Sopenharmony_ci struct md_rdev *rdev; 53688c2ecf20Sopenharmony_ci blk_status_t error = bi->bi_status; 53698c2ecf20Sopenharmony_ci 53708c2ecf20Sopenharmony_ci bio_put(bi); 53718c2ecf20Sopenharmony_ci 53728c2ecf20Sopenharmony_ci rdev = (void*)raid_bi->bi_next; 53738c2ecf20Sopenharmony_ci raid_bi->bi_next = NULL; 53748c2ecf20Sopenharmony_ci mddev = rdev->mddev; 53758c2ecf20Sopenharmony_ci conf = mddev->private; 53768c2ecf20Sopenharmony_ci 53778c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, conf->mddev); 53788c2ecf20Sopenharmony_ci 53798c2ecf20Sopenharmony_ci if (!error) { 53808c2ecf20Sopenharmony_ci bio_endio(raid_bi); 53818c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&conf->active_aligned_reads)) 53828c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_quiescent); 53838c2ecf20Sopenharmony_ci return; 53848c2ecf20Sopenharmony_ci } 53858c2ecf20Sopenharmony_ci 53868c2ecf20Sopenharmony_ci pr_debug("raid5_align_endio : io error...handing IO for a retry\n"); 53878c2ecf20Sopenharmony_ci 53888c2ecf20Sopenharmony_ci add_bio_to_retry(raid_bi, conf); 53898c2ecf20Sopenharmony_ci} 53908c2ecf20Sopenharmony_ci 53918c2ecf20Sopenharmony_cistatic int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) 53928c2ecf20Sopenharmony_ci{ 53938c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 53948c2ecf20Sopenharmony_ci int dd_idx; 53958c2ecf20Sopenharmony_ci struct bio* align_bi; 53968c2ecf20Sopenharmony_ci struct md_rdev *rdev; 53978c2ecf20Sopenharmony_ci sector_t end_sector; 53988c2ecf20Sopenharmony_ci 53998c2ecf20Sopenharmony_ci if (!in_chunk_boundary(mddev, raid_bio)) { 54008c2ecf20Sopenharmony_ci pr_debug("%s: non aligned\n", __func__); 54018c2ecf20Sopenharmony_ci return 0; 54028c2ecf20Sopenharmony_ci } 54038c2ecf20Sopenharmony_ci /* 54048c2ecf20Sopenharmony_ci * use bio_clone_fast to make a copy of the bio 54058c2ecf20Sopenharmony_ci */ 54068c2ecf20Sopenharmony_ci align_bi = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set); 54078c2ecf20Sopenharmony_ci if (!align_bi) 54088c2ecf20Sopenharmony_ci return 0; 54098c2ecf20Sopenharmony_ci /* 54108c2ecf20Sopenharmony_ci * set bi_end_io to a new function, and set bi_private to the 54118c2ecf20Sopenharmony_ci * original bio. 54128c2ecf20Sopenharmony_ci */ 54138c2ecf20Sopenharmony_ci align_bi->bi_end_io = raid5_align_endio; 54148c2ecf20Sopenharmony_ci align_bi->bi_private = raid_bio; 54158c2ecf20Sopenharmony_ci /* 54168c2ecf20Sopenharmony_ci * compute position 54178c2ecf20Sopenharmony_ci */ 54188c2ecf20Sopenharmony_ci align_bi->bi_iter.bi_sector = 54198c2ecf20Sopenharmony_ci raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector, 54208c2ecf20Sopenharmony_ci 0, &dd_idx, NULL); 54218c2ecf20Sopenharmony_ci 54228c2ecf20Sopenharmony_ci end_sector = bio_end_sector(align_bi); 54238c2ecf20Sopenharmony_ci rcu_read_lock(); 54248c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->disks[dd_idx].replacement); 54258c2ecf20Sopenharmony_ci if (!rdev || test_bit(Faulty, &rdev->flags) || 54268c2ecf20Sopenharmony_ci rdev->recovery_offset < end_sector) { 54278c2ecf20Sopenharmony_ci rdev = rcu_dereference(conf->disks[dd_idx].rdev); 54288c2ecf20Sopenharmony_ci if (rdev && 54298c2ecf20Sopenharmony_ci (test_bit(Faulty, &rdev->flags) || 54308c2ecf20Sopenharmony_ci !(test_bit(In_sync, &rdev->flags) || 54318c2ecf20Sopenharmony_ci rdev->recovery_offset >= end_sector))) 54328c2ecf20Sopenharmony_ci rdev = NULL; 54338c2ecf20Sopenharmony_ci } 54348c2ecf20Sopenharmony_ci 54358c2ecf20Sopenharmony_ci if (r5c_big_stripe_cached(conf, align_bi->bi_iter.bi_sector)) { 54368c2ecf20Sopenharmony_ci rcu_read_unlock(); 54378c2ecf20Sopenharmony_ci bio_put(align_bi); 54388c2ecf20Sopenharmony_ci return 0; 54398c2ecf20Sopenharmony_ci } 54408c2ecf20Sopenharmony_ci 54418c2ecf20Sopenharmony_ci if (rdev) { 54428c2ecf20Sopenharmony_ci sector_t first_bad; 54438c2ecf20Sopenharmony_ci int bad_sectors; 54448c2ecf20Sopenharmony_ci 54458c2ecf20Sopenharmony_ci atomic_inc(&rdev->nr_pending); 54468c2ecf20Sopenharmony_ci rcu_read_unlock(); 54478c2ecf20Sopenharmony_ci raid_bio->bi_next = (void*)rdev; 54488c2ecf20Sopenharmony_ci bio_set_dev(align_bi, rdev->bdev); 54498c2ecf20Sopenharmony_ci 54508c2ecf20Sopenharmony_ci if (is_badblock(rdev, align_bi->bi_iter.bi_sector, 54518c2ecf20Sopenharmony_ci bio_sectors(align_bi), 54528c2ecf20Sopenharmony_ci &first_bad, &bad_sectors)) { 54538c2ecf20Sopenharmony_ci bio_put(align_bi); 54548c2ecf20Sopenharmony_ci rdev_dec_pending(rdev, mddev); 54558c2ecf20Sopenharmony_ci return 0; 54568c2ecf20Sopenharmony_ci } 54578c2ecf20Sopenharmony_ci 54588c2ecf20Sopenharmony_ci /* No reshape active, so we can trust rdev->data_offset */ 54598c2ecf20Sopenharmony_ci align_bi->bi_iter.bi_sector += rdev->data_offset; 54608c2ecf20Sopenharmony_ci 54618c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 54628c2ecf20Sopenharmony_ci wait_event_lock_irq(conf->wait_for_quiescent, 54638c2ecf20Sopenharmony_ci conf->quiesce == 0, 54648c2ecf20Sopenharmony_ci conf->device_lock); 54658c2ecf20Sopenharmony_ci atomic_inc(&conf->active_aligned_reads); 54668c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 54678c2ecf20Sopenharmony_ci 54688c2ecf20Sopenharmony_ci if (mddev->gendisk) 54698c2ecf20Sopenharmony_ci trace_block_bio_remap(align_bi->bi_disk->queue, 54708c2ecf20Sopenharmony_ci align_bi, disk_devt(mddev->gendisk), 54718c2ecf20Sopenharmony_ci raid_bio->bi_iter.bi_sector); 54728c2ecf20Sopenharmony_ci submit_bio_noacct(align_bi); 54738c2ecf20Sopenharmony_ci return 1; 54748c2ecf20Sopenharmony_ci } else { 54758c2ecf20Sopenharmony_ci rcu_read_unlock(); 54768c2ecf20Sopenharmony_ci bio_put(align_bi); 54778c2ecf20Sopenharmony_ci return 0; 54788c2ecf20Sopenharmony_ci } 54798c2ecf20Sopenharmony_ci} 54808c2ecf20Sopenharmony_ci 54818c2ecf20Sopenharmony_cistatic struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio) 54828c2ecf20Sopenharmony_ci{ 54838c2ecf20Sopenharmony_ci struct bio *split; 54848c2ecf20Sopenharmony_ci sector_t sector = raid_bio->bi_iter.bi_sector; 54858c2ecf20Sopenharmony_ci unsigned chunk_sects = mddev->chunk_sectors; 54868c2ecf20Sopenharmony_ci unsigned sectors = chunk_sects - (sector & (chunk_sects-1)); 54878c2ecf20Sopenharmony_ci 54888c2ecf20Sopenharmony_ci if (sectors < bio_sectors(raid_bio)) { 54898c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 54908c2ecf20Sopenharmony_ci split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split); 54918c2ecf20Sopenharmony_ci bio_chain(split, raid_bio); 54928c2ecf20Sopenharmony_ci submit_bio_noacct(raid_bio); 54938c2ecf20Sopenharmony_ci raid_bio = split; 54948c2ecf20Sopenharmony_ci } 54958c2ecf20Sopenharmony_ci 54968c2ecf20Sopenharmony_ci if (!raid5_read_one_chunk(mddev, raid_bio)) 54978c2ecf20Sopenharmony_ci return raid_bio; 54988c2ecf20Sopenharmony_ci 54998c2ecf20Sopenharmony_ci return NULL; 55008c2ecf20Sopenharmony_ci} 55018c2ecf20Sopenharmony_ci 55028c2ecf20Sopenharmony_ci/* __get_priority_stripe - get the next stripe to process 55038c2ecf20Sopenharmony_ci * 55048c2ecf20Sopenharmony_ci * Full stripe writes are allowed to pass preread active stripes up until 55058c2ecf20Sopenharmony_ci * the bypass_threshold is exceeded. In general the bypass_count 55068c2ecf20Sopenharmony_ci * increments when the handle_list is handled before the hold_list; however, it 55078c2ecf20Sopenharmony_ci * will not be incremented when STRIPE_IO_STARTED is sampled set signifying a 55088c2ecf20Sopenharmony_ci * stripe with in flight i/o. The bypass_count will be reset when the 55098c2ecf20Sopenharmony_ci * head of the hold_list has changed, i.e. the head was promoted to the 55108c2ecf20Sopenharmony_ci * handle_list. 55118c2ecf20Sopenharmony_ci */ 55128c2ecf20Sopenharmony_cistatic struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) 55138c2ecf20Sopenharmony_ci{ 55148c2ecf20Sopenharmony_ci struct stripe_head *sh, *tmp; 55158c2ecf20Sopenharmony_ci struct list_head *handle_list = NULL; 55168c2ecf20Sopenharmony_ci struct r5worker_group *wg; 55178c2ecf20Sopenharmony_ci bool second_try = !r5c_is_writeback(conf->log) && 55188c2ecf20Sopenharmony_ci !r5l_log_disk_error(conf); 55198c2ecf20Sopenharmony_ci bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) || 55208c2ecf20Sopenharmony_ci r5l_log_disk_error(conf); 55218c2ecf20Sopenharmony_ci 55228c2ecf20Sopenharmony_ciagain: 55238c2ecf20Sopenharmony_ci wg = NULL; 55248c2ecf20Sopenharmony_ci sh = NULL; 55258c2ecf20Sopenharmony_ci if (conf->worker_cnt_per_group == 0) { 55268c2ecf20Sopenharmony_ci handle_list = try_loprio ? &conf->loprio_list : 55278c2ecf20Sopenharmony_ci &conf->handle_list; 55288c2ecf20Sopenharmony_ci } else if (group != ANY_GROUP) { 55298c2ecf20Sopenharmony_ci handle_list = try_loprio ? &conf->worker_groups[group].loprio_list : 55308c2ecf20Sopenharmony_ci &conf->worker_groups[group].handle_list; 55318c2ecf20Sopenharmony_ci wg = &conf->worker_groups[group]; 55328c2ecf20Sopenharmony_ci } else { 55338c2ecf20Sopenharmony_ci int i; 55348c2ecf20Sopenharmony_ci for (i = 0; i < conf->group_cnt; i++) { 55358c2ecf20Sopenharmony_ci handle_list = try_loprio ? &conf->worker_groups[i].loprio_list : 55368c2ecf20Sopenharmony_ci &conf->worker_groups[i].handle_list; 55378c2ecf20Sopenharmony_ci wg = &conf->worker_groups[i]; 55388c2ecf20Sopenharmony_ci if (!list_empty(handle_list)) 55398c2ecf20Sopenharmony_ci break; 55408c2ecf20Sopenharmony_ci } 55418c2ecf20Sopenharmony_ci } 55428c2ecf20Sopenharmony_ci 55438c2ecf20Sopenharmony_ci pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n", 55448c2ecf20Sopenharmony_ci __func__, 55458c2ecf20Sopenharmony_ci list_empty(handle_list) ? "empty" : "busy", 55468c2ecf20Sopenharmony_ci list_empty(&conf->hold_list) ? "empty" : "busy", 55478c2ecf20Sopenharmony_ci atomic_read(&conf->pending_full_writes), conf->bypass_count); 55488c2ecf20Sopenharmony_ci 55498c2ecf20Sopenharmony_ci if (!list_empty(handle_list)) { 55508c2ecf20Sopenharmony_ci sh = list_entry(handle_list->next, typeof(*sh), lru); 55518c2ecf20Sopenharmony_ci 55528c2ecf20Sopenharmony_ci if (list_empty(&conf->hold_list)) 55538c2ecf20Sopenharmony_ci conf->bypass_count = 0; 55548c2ecf20Sopenharmony_ci else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) { 55558c2ecf20Sopenharmony_ci if (conf->hold_list.next == conf->last_hold) 55568c2ecf20Sopenharmony_ci conf->bypass_count++; 55578c2ecf20Sopenharmony_ci else { 55588c2ecf20Sopenharmony_ci conf->last_hold = conf->hold_list.next; 55598c2ecf20Sopenharmony_ci conf->bypass_count -= conf->bypass_threshold; 55608c2ecf20Sopenharmony_ci if (conf->bypass_count < 0) 55618c2ecf20Sopenharmony_ci conf->bypass_count = 0; 55628c2ecf20Sopenharmony_ci } 55638c2ecf20Sopenharmony_ci } 55648c2ecf20Sopenharmony_ci } else if (!list_empty(&conf->hold_list) && 55658c2ecf20Sopenharmony_ci ((conf->bypass_threshold && 55668c2ecf20Sopenharmony_ci conf->bypass_count > conf->bypass_threshold) || 55678c2ecf20Sopenharmony_ci atomic_read(&conf->pending_full_writes) == 0)) { 55688c2ecf20Sopenharmony_ci 55698c2ecf20Sopenharmony_ci list_for_each_entry(tmp, &conf->hold_list, lru) { 55708c2ecf20Sopenharmony_ci if (conf->worker_cnt_per_group == 0 || 55718c2ecf20Sopenharmony_ci group == ANY_GROUP || 55728c2ecf20Sopenharmony_ci !cpu_online(tmp->cpu) || 55738c2ecf20Sopenharmony_ci cpu_to_group(tmp->cpu) == group) { 55748c2ecf20Sopenharmony_ci sh = tmp; 55758c2ecf20Sopenharmony_ci break; 55768c2ecf20Sopenharmony_ci } 55778c2ecf20Sopenharmony_ci } 55788c2ecf20Sopenharmony_ci 55798c2ecf20Sopenharmony_ci if (sh) { 55808c2ecf20Sopenharmony_ci conf->bypass_count -= conf->bypass_threshold; 55818c2ecf20Sopenharmony_ci if (conf->bypass_count < 0) 55828c2ecf20Sopenharmony_ci conf->bypass_count = 0; 55838c2ecf20Sopenharmony_ci } 55848c2ecf20Sopenharmony_ci wg = NULL; 55858c2ecf20Sopenharmony_ci } 55868c2ecf20Sopenharmony_ci 55878c2ecf20Sopenharmony_ci if (!sh) { 55888c2ecf20Sopenharmony_ci if (second_try) 55898c2ecf20Sopenharmony_ci return NULL; 55908c2ecf20Sopenharmony_ci second_try = true; 55918c2ecf20Sopenharmony_ci try_loprio = !try_loprio; 55928c2ecf20Sopenharmony_ci goto again; 55938c2ecf20Sopenharmony_ci } 55948c2ecf20Sopenharmony_ci 55958c2ecf20Sopenharmony_ci if (wg) { 55968c2ecf20Sopenharmony_ci wg->stripes_cnt--; 55978c2ecf20Sopenharmony_ci sh->group = NULL; 55988c2ecf20Sopenharmony_ci } 55998c2ecf20Sopenharmony_ci list_del_init(&sh->lru); 56008c2ecf20Sopenharmony_ci BUG_ON(atomic_inc_return(&sh->count) != 1); 56018c2ecf20Sopenharmony_ci return sh; 56028c2ecf20Sopenharmony_ci} 56038c2ecf20Sopenharmony_ci 56048c2ecf20Sopenharmony_cistruct raid5_plug_cb { 56058c2ecf20Sopenharmony_ci struct blk_plug_cb cb; 56068c2ecf20Sopenharmony_ci struct list_head list; 56078c2ecf20Sopenharmony_ci struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS]; 56088c2ecf20Sopenharmony_ci}; 56098c2ecf20Sopenharmony_ci 56108c2ecf20Sopenharmony_cistatic void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) 56118c2ecf20Sopenharmony_ci{ 56128c2ecf20Sopenharmony_ci struct raid5_plug_cb *cb = container_of( 56138c2ecf20Sopenharmony_ci blk_cb, struct raid5_plug_cb, cb); 56148c2ecf20Sopenharmony_ci struct stripe_head *sh; 56158c2ecf20Sopenharmony_ci struct mddev *mddev = cb->cb.data; 56168c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 56178c2ecf20Sopenharmony_ci int cnt = 0; 56188c2ecf20Sopenharmony_ci int hash; 56198c2ecf20Sopenharmony_ci 56208c2ecf20Sopenharmony_ci if (cb->list.next && !list_empty(&cb->list)) { 56218c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 56228c2ecf20Sopenharmony_ci while (!list_empty(&cb->list)) { 56238c2ecf20Sopenharmony_ci sh = list_first_entry(&cb->list, struct stripe_head, lru); 56248c2ecf20Sopenharmony_ci list_del_init(&sh->lru); 56258c2ecf20Sopenharmony_ci /* 56268c2ecf20Sopenharmony_ci * avoid race release_stripe_plug() sees 56278c2ecf20Sopenharmony_ci * STRIPE_ON_UNPLUG_LIST clear but the stripe 56288c2ecf20Sopenharmony_ci * is still in our list 56298c2ecf20Sopenharmony_ci */ 56308c2ecf20Sopenharmony_ci smp_mb__before_atomic(); 56318c2ecf20Sopenharmony_ci clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); 56328c2ecf20Sopenharmony_ci /* 56338c2ecf20Sopenharmony_ci * STRIPE_ON_RELEASE_LIST could be set here. In that 56348c2ecf20Sopenharmony_ci * case, the count is always > 1 here 56358c2ecf20Sopenharmony_ci */ 56368c2ecf20Sopenharmony_ci hash = sh->hash_lock_index; 56378c2ecf20Sopenharmony_ci __release_stripe(conf, sh, &cb->temp_inactive_list[hash]); 56388c2ecf20Sopenharmony_ci cnt++; 56398c2ecf20Sopenharmony_ci } 56408c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 56418c2ecf20Sopenharmony_ci } 56428c2ecf20Sopenharmony_ci release_inactive_stripe_list(conf, cb->temp_inactive_list, 56438c2ecf20Sopenharmony_ci NR_STRIPE_HASH_LOCKS); 56448c2ecf20Sopenharmony_ci if (mddev->queue) 56458c2ecf20Sopenharmony_ci trace_block_unplug(mddev->queue, cnt, !from_schedule); 56468c2ecf20Sopenharmony_ci kfree(cb); 56478c2ecf20Sopenharmony_ci} 56488c2ecf20Sopenharmony_ci 56498c2ecf20Sopenharmony_cistatic void release_stripe_plug(struct mddev *mddev, 56508c2ecf20Sopenharmony_ci struct stripe_head *sh) 56518c2ecf20Sopenharmony_ci{ 56528c2ecf20Sopenharmony_ci struct blk_plug_cb *blk_cb = blk_check_plugged( 56538c2ecf20Sopenharmony_ci raid5_unplug, mddev, 56548c2ecf20Sopenharmony_ci sizeof(struct raid5_plug_cb)); 56558c2ecf20Sopenharmony_ci struct raid5_plug_cb *cb; 56568c2ecf20Sopenharmony_ci 56578c2ecf20Sopenharmony_ci if (!blk_cb) { 56588c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 56598c2ecf20Sopenharmony_ci return; 56608c2ecf20Sopenharmony_ci } 56618c2ecf20Sopenharmony_ci 56628c2ecf20Sopenharmony_ci cb = container_of(blk_cb, struct raid5_plug_cb, cb); 56638c2ecf20Sopenharmony_ci 56648c2ecf20Sopenharmony_ci if (cb->list.next == NULL) { 56658c2ecf20Sopenharmony_ci int i; 56668c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&cb->list); 56678c2ecf20Sopenharmony_ci for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) 56688c2ecf20Sopenharmony_ci INIT_LIST_HEAD(cb->temp_inactive_list + i); 56698c2ecf20Sopenharmony_ci } 56708c2ecf20Sopenharmony_ci 56718c2ecf20Sopenharmony_ci if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) 56728c2ecf20Sopenharmony_ci list_add_tail(&sh->lru, &cb->list); 56738c2ecf20Sopenharmony_ci else 56748c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 56758c2ecf20Sopenharmony_ci} 56768c2ecf20Sopenharmony_ci 56778c2ecf20Sopenharmony_cistatic void make_discard_request(struct mddev *mddev, struct bio *bi) 56788c2ecf20Sopenharmony_ci{ 56798c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 56808c2ecf20Sopenharmony_ci sector_t logical_sector, last_sector; 56818c2ecf20Sopenharmony_ci struct stripe_head *sh; 56828c2ecf20Sopenharmony_ci int stripe_sectors; 56838c2ecf20Sopenharmony_ci 56848c2ecf20Sopenharmony_ci if (mddev->reshape_position != MaxSector) 56858c2ecf20Sopenharmony_ci /* Skip discard while reshape is happening */ 56868c2ecf20Sopenharmony_ci return; 56878c2ecf20Sopenharmony_ci 56888c2ecf20Sopenharmony_ci logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); 56898c2ecf20Sopenharmony_ci last_sector = bio_end_sector(bi); 56908c2ecf20Sopenharmony_ci 56918c2ecf20Sopenharmony_ci bi->bi_next = NULL; 56928c2ecf20Sopenharmony_ci 56938c2ecf20Sopenharmony_ci stripe_sectors = conf->chunk_sectors * 56948c2ecf20Sopenharmony_ci (conf->raid_disks - conf->max_degraded); 56958c2ecf20Sopenharmony_ci logical_sector = DIV_ROUND_UP_SECTOR_T(logical_sector, 56968c2ecf20Sopenharmony_ci stripe_sectors); 56978c2ecf20Sopenharmony_ci sector_div(last_sector, stripe_sectors); 56988c2ecf20Sopenharmony_ci 56998c2ecf20Sopenharmony_ci logical_sector *= conf->chunk_sectors; 57008c2ecf20Sopenharmony_ci last_sector *= conf->chunk_sectors; 57018c2ecf20Sopenharmony_ci 57028c2ecf20Sopenharmony_ci for (; logical_sector < last_sector; 57038c2ecf20Sopenharmony_ci logical_sector += RAID5_STRIPE_SECTORS(conf)) { 57048c2ecf20Sopenharmony_ci DEFINE_WAIT(w); 57058c2ecf20Sopenharmony_ci int d; 57068c2ecf20Sopenharmony_ci again: 57078c2ecf20Sopenharmony_ci sh = raid5_get_active_stripe(conf, logical_sector, 0, 0, 0); 57088c2ecf20Sopenharmony_ci prepare_to_wait(&conf->wait_for_overlap, &w, 57098c2ecf20Sopenharmony_ci TASK_UNINTERRUPTIBLE); 57108c2ecf20Sopenharmony_ci set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); 57118c2ecf20Sopenharmony_ci if (test_bit(STRIPE_SYNCING, &sh->state)) { 57128c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 57138c2ecf20Sopenharmony_ci schedule(); 57148c2ecf20Sopenharmony_ci goto again; 57158c2ecf20Sopenharmony_ci } 57168c2ecf20Sopenharmony_ci clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); 57178c2ecf20Sopenharmony_ci spin_lock_irq(&sh->stripe_lock); 57188c2ecf20Sopenharmony_ci for (d = 0; d < conf->raid_disks; d++) { 57198c2ecf20Sopenharmony_ci if (d == sh->pd_idx || d == sh->qd_idx) 57208c2ecf20Sopenharmony_ci continue; 57218c2ecf20Sopenharmony_ci if (sh->dev[d].towrite || sh->dev[d].toread) { 57228c2ecf20Sopenharmony_ci set_bit(R5_Overlap, &sh->dev[d].flags); 57238c2ecf20Sopenharmony_ci spin_unlock_irq(&sh->stripe_lock); 57248c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 57258c2ecf20Sopenharmony_ci schedule(); 57268c2ecf20Sopenharmony_ci goto again; 57278c2ecf20Sopenharmony_ci } 57288c2ecf20Sopenharmony_ci } 57298c2ecf20Sopenharmony_ci set_bit(STRIPE_DISCARD, &sh->state); 57308c2ecf20Sopenharmony_ci finish_wait(&conf->wait_for_overlap, &w); 57318c2ecf20Sopenharmony_ci sh->overwrite_disks = 0; 57328c2ecf20Sopenharmony_ci for (d = 0; d < conf->raid_disks; d++) { 57338c2ecf20Sopenharmony_ci if (d == sh->pd_idx || d == sh->qd_idx) 57348c2ecf20Sopenharmony_ci continue; 57358c2ecf20Sopenharmony_ci sh->dev[d].towrite = bi; 57368c2ecf20Sopenharmony_ci set_bit(R5_OVERWRITE, &sh->dev[d].flags); 57378c2ecf20Sopenharmony_ci bio_inc_remaining(bi); 57388c2ecf20Sopenharmony_ci md_write_inc(mddev, bi); 57398c2ecf20Sopenharmony_ci sh->overwrite_disks++; 57408c2ecf20Sopenharmony_ci } 57418c2ecf20Sopenharmony_ci spin_unlock_irq(&sh->stripe_lock); 57428c2ecf20Sopenharmony_ci if (conf->mddev->bitmap) { 57438c2ecf20Sopenharmony_ci for (d = 0; 57448c2ecf20Sopenharmony_ci d < conf->raid_disks - conf->max_degraded; 57458c2ecf20Sopenharmony_ci d++) 57468c2ecf20Sopenharmony_ci md_bitmap_startwrite(mddev->bitmap, 57478c2ecf20Sopenharmony_ci sh->sector, 57488c2ecf20Sopenharmony_ci RAID5_STRIPE_SECTORS(conf), 57498c2ecf20Sopenharmony_ci 0); 57508c2ecf20Sopenharmony_ci sh->bm_seq = conf->seq_flush + 1; 57518c2ecf20Sopenharmony_ci set_bit(STRIPE_BIT_DELAY, &sh->state); 57528c2ecf20Sopenharmony_ci } 57538c2ecf20Sopenharmony_ci 57548c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 57558c2ecf20Sopenharmony_ci clear_bit(STRIPE_DELAYED, &sh->state); 57568c2ecf20Sopenharmony_ci if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 57578c2ecf20Sopenharmony_ci atomic_inc(&conf->preread_active_stripes); 57588c2ecf20Sopenharmony_ci release_stripe_plug(mddev, sh); 57598c2ecf20Sopenharmony_ci } 57608c2ecf20Sopenharmony_ci 57618c2ecf20Sopenharmony_ci bio_endio(bi); 57628c2ecf20Sopenharmony_ci} 57638c2ecf20Sopenharmony_ci 57648c2ecf20Sopenharmony_cistatic bool raid5_make_request(struct mddev *mddev, struct bio * bi) 57658c2ecf20Sopenharmony_ci{ 57668c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 57678c2ecf20Sopenharmony_ci int dd_idx; 57688c2ecf20Sopenharmony_ci sector_t new_sector; 57698c2ecf20Sopenharmony_ci sector_t logical_sector, last_sector; 57708c2ecf20Sopenharmony_ci struct stripe_head *sh; 57718c2ecf20Sopenharmony_ci const int rw = bio_data_dir(bi); 57728c2ecf20Sopenharmony_ci DEFINE_WAIT(w); 57738c2ecf20Sopenharmony_ci bool do_prepare; 57748c2ecf20Sopenharmony_ci bool do_flush = false; 57758c2ecf20Sopenharmony_ci 57768c2ecf20Sopenharmony_ci if (unlikely(bi->bi_opf & REQ_PREFLUSH)) { 57778c2ecf20Sopenharmony_ci int ret = log_handle_flush_request(conf, bi); 57788c2ecf20Sopenharmony_ci 57798c2ecf20Sopenharmony_ci if (ret == 0) 57808c2ecf20Sopenharmony_ci return true; 57818c2ecf20Sopenharmony_ci if (ret == -ENODEV) { 57828c2ecf20Sopenharmony_ci if (md_flush_request(mddev, bi)) 57838c2ecf20Sopenharmony_ci return true; 57848c2ecf20Sopenharmony_ci } 57858c2ecf20Sopenharmony_ci /* ret == -EAGAIN, fallback */ 57868c2ecf20Sopenharmony_ci /* 57878c2ecf20Sopenharmony_ci * if r5l_handle_flush_request() didn't clear REQ_PREFLUSH, 57888c2ecf20Sopenharmony_ci * we need to flush journal device 57898c2ecf20Sopenharmony_ci */ 57908c2ecf20Sopenharmony_ci do_flush = bi->bi_opf & REQ_PREFLUSH; 57918c2ecf20Sopenharmony_ci } 57928c2ecf20Sopenharmony_ci 57938c2ecf20Sopenharmony_ci if (!md_write_start(mddev, bi)) 57948c2ecf20Sopenharmony_ci return false; 57958c2ecf20Sopenharmony_ci /* 57968c2ecf20Sopenharmony_ci * If array is degraded, better not do chunk aligned read because 57978c2ecf20Sopenharmony_ci * later we might have to read it again in order to reconstruct 57988c2ecf20Sopenharmony_ci * data on failed drives. 57998c2ecf20Sopenharmony_ci */ 58008c2ecf20Sopenharmony_ci if (rw == READ && mddev->degraded == 0 && 58018c2ecf20Sopenharmony_ci mddev->reshape_position == MaxSector) { 58028c2ecf20Sopenharmony_ci bi = chunk_aligned_read(mddev, bi); 58038c2ecf20Sopenharmony_ci if (!bi) 58048c2ecf20Sopenharmony_ci return true; 58058c2ecf20Sopenharmony_ci } 58068c2ecf20Sopenharmony_ci 58078c2ecf20Sopenharmony_ci if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) { 58088c2ecf20Sopenharmony_ci make_discard_request(mddev, bi); 58098c2ecf20Sopenharmony_ci md_write_end(mddev); 58108c2ecf20Sopenharmony_ci return true; 58118c2ecf20Sopenharmony_ci } 58128c2ecf20Sopenharmony_ci 58138c2ecf20Sopenharmony_ci logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); 58148c2ecf20Sopenharmony_ci last_sector = bio_end_sector(bi); 58158c2ecf20Sopenharmony_ci bi->bi_next = NULL; 58168c2ecf20Sopenharmony_ci 58178c2ecf20Sopenharmony_ci prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); 58188c2ecf20Sopenharmony_ci for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) { 58198c2ecf20Sopenharmony_ci int previous; 58208c2ecf20Sopenharmony_ci int seq; 58218c2ecf20Sopenharmony_ci 58228c2ecf20Sopenharmony_ci do_prepare = false; 58238c2ecf20Sopenharmony_ci retry: 58248c2ecf20Sopenharmony_ci seq = read_seqcount_begin(&conf->gen_lock); 58258c2ecf20Sopenharmony_ci previous = 0; 58268c2ecf20Sopenharmony_ci if (do_prepare) 58278c2ecf20Sopenharmony_ci prepare_to_wait(&conf->wait_for_overlap, &w, 58288c2ecf20Sopenharmony_ci TASK_UNINTERRUPTIBLE); 58298c2ecf20Sopenharmony_ci if (unlikely(conf->reshape_progress != MaxSector)) { 58308c2ecf20Sopenharmony_ci /* spinlock is needed as reshape_progress may be 58318c2ecf20Sopenharmony_ci * 64bit on a 32bit platform, and so it might be 58328c2ecf20Sopenharmony_ci * possible to see a half-updated value 58338c2ecf20Sopenharmony_ci * Of course reshape_progress could change after 58348c2ecf20Sopenharmony_ci * the lock is dropped, so once we get a reference 58358c2ecf20Sopenharmony_ci * to the stripe that we think it is, we will have 58368c2ecf20Sopenharmony_ci * to check again. 58378c2ecf20Sopenharmony_ci */ 58388c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 58398c2ecf20Sopenharmony_ci if (mddev->reshape_backwards 58408c2ecf20Sopenharmony_ci ? logical_sector < conf->reshape_progress 58418c2ecf20Sopenharmony_ci : logical_sector >= conf->reshape_progress) { 58428c2ecf20Sopenharmony_ci previous = 1; 58438c2ecf20Sopenharmony_ci } else { 58448c2ecf20Sopenharmony_ci if (mddev->reshape_backwards 58458c2ecf20Sopenharmony_ci ? logical_sector < conf->reshape_safe 58468c2ecf20Sopenharmony_ci : logical_sector >= conf->reshape_safe) { 58478c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 58488c2ecf20Sopenharmony_ci schedule(); 58498c2ecf20Sopenharmony_ci do_prepare = true; 58508c2ecf20Sopenharmony_ci goto retry; 58518c2ecf20Sopenharmony_ci } 58528c2ecf20Sopenharmony_ci } 58538c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 58548c2ecf20Sopenharmony_ci } 58558c2ecf20Sopenharmony_ci 58568c2ecf20Sopenharmony_ci new_sector = raid5_compute_sector(conf, logical_sector, 58578c2ecf20Sopenharmony_ci previous, 58588c2ecf20Sopenharmony_ci &dd_idx, NULL); 58598c2ecf20Sopenharmony_ci pr_debug("raid456: raid5_make_request, sector %llu logical %llu\n", 58608c2ecf20Sopenharmony_ci (unsigned long long)new_sector, 58618c2ecf20Sopenharmony_ci (unsigned long long)logical_sector); 58628c2ecf20Sopenharmony_ci 58638c2ecf20Sopenharmony_ci sh = raid5_get_active_stripe(conf, new_sector, previous, 58648c2ecf20Sopenharmony_ci (bi->bi_opf & REQ_RAHEAD), 0); 58658c2ecf20Sopenharmony_ci if (sh) { 58668c2ecf20Sopenharmony_ci if (unlikely(previous)) { 58678c2ecf20Sopenharmony_ci /* expansion might have moved on while waiting for a 58688c2ecf20Sopenharmony_ci * stripe, so we must do the range check again. 58698c2ecf20Sopenharmony_ci * Expansion could still move past after this 58708c2ecf20Sopenharmony_ci * test, but as we are holding a reference to 58718c2ecf20Sopenharmony_ci * 'sh', we know that if that happens, 58728c2ecf20Sopenharmony_ci * STRIPE_EXPANDING will get set and the expansion 58738c2ecf20Sopenharmony_ci * won't proceed until we finish with the stripe. 58748c2ecf20Sopenharmony_ci */ 58758c2ecf20Sopenharmony_ci int must_retry = 0; 58768c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 58778c2ecf20Sopenharmony_ci if (mddev->reshape_backwards 58788c2ecf20Sopenharmony_ci ? logical_sector >= conf->reshape_progress 58798c2ecf20Sopenharmony_ci : logical_sector < conf->reshape_progress) 58808c2ecf20Sopenharmony_ci /* mismatch, need to try again */ 58818c2ecf20Sopenharmony_ci must_retry = 1; 58828c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 58838c2ecf20Sopenharmony_ci if (must_retry) { 58848c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 58858c2ecf20Sopenharmony_ci schedule(); 58868c2ecf20Sopenharmony_ci do_prepare = true; 58878c2ecf20Sopenharmony_ci goto retry; 58888c2ecf20Sopenharmony_ci } 58898c2ecf20Sopenharmony_ci } 58908c2ecf20Sopenharmony_ci if (read_seqcount_retry(&conf->gen_lock, seq)) { 58918c2ecf20Sopenharmony_ci /* Might have got the wrong stripe_head 58928c2ecf20Sopenharmony_ci * by accident 58938c2ecf20Sopenharmony_ci */ 58948c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 58958c2ecf20Sopenharmony_ci goto retry; 58968c2ecf20Sopenharmony_ci } 58978c2ecf20Sopenharmony_ci 58988c2ecf20Sopenharmony_ci if (test_bit(STRIPE_EXPANDING, &sh->state) || 58998c2ecf20Sopenharmony_ci !add_stripe_bio(sh, bi, dd_idx, rw, previous)) { 59008c2ecf20Sopenharmony_ci /* Stripe is busy expanding or 59018c2ecf20Sopenharmony_ci * add failed due to overlap. Flush everything 59028c2ecf20Sopenharmony_ci * and wait a while 59038c2ecf20Sopenharmony_ci */ 59048c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 59058c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 59068c2ecf20Sopenharmony_ci schedule(); 59078c2ecf20Sopenharmony_ci do_prepare = true; 59088c2ecf20Sopenharmony_ci goto retry; 59098c2ecf20Sopenharmony_ci } 59108c2ecf20Sopenharmony_ci if (do_flush) { 59118c2ecf20Sopenharmony_ci set_bit(STRIPE_R5C_PREFLUSH, &sh->state); 59128c2ecf20Sopenharmony_ci /* we only need flush for one stripe */ 59138c2ecf20Sopenharmony_ci do_flush = false; 59148c2ecf20Sopenharmony_ci } 59158c2ecf20Sopenharmony_ci 59168c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 59178c2ecf20Sopenharmony_ci clear_bit(STRIPE_DELAYED, &sh->state); 59188c2ecf20Sopenharmony_ci if ((!sh->batch_head || sh == sh->batch_head) && 59198c2ecf20Sopenharmony_ci (bi->bi_opf & REQ_SYNC) && 59208c2ecf20Sopenharmony_ci !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 59218c2ecf20Sopenharmony_ci atomic_inc(&conf->preread_active_stripes); 59228c2ecf20Sopenharmony_ci release_stripe_plug(mddev, sh); 59238c2ecf20Sopenharmony_ci } else { 59248c2ecf20Sopenharmony_ci /* cannot get stripe for read-ahead, just give-up */ 59258c2ecf20Sopenharmony_ci bi->bi_status = BLK_STS_IOERR; 59268c2ecf20Sopenharmony_ci break; 59278c2ecf20Sopenharmony_ci } 59288c2ecf20Sopenharmony_ci } 59298c2ecf20Sopenharmony_ci finish_wait(&conf->wait_for_overlap, &w); 59308c2ecf20Sopenharmony_ci 59318c2ecf20Sopenharmony_ci if (rw == WRITE) 59328c2ecf20Sopenharmony_ci md_write_end(mddev); 59338c2ecf20Sopenharmony_ci bio_endio(bi); 59348c2ecf20Sopenharmony_ci return true; 59358c2ecf20Sopenharmony_ci} 59368c2ecf20Sopenharmony_ci 59378c2ecf20Sopenharmony_cistatic sector_t raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks); 59388c2ecf20Sopenharmony_ci 59398c2ecf20Sopenharmony_cistatic sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *skipped) 59408c2ecf20Sopenharmony_ci{ 59418c2ecf20Sopenharmony_ci /* reshaping is quite different to recovery/resync so it is 59428c2ecf20Sopenharmony_ci * handled quite separately ... here. 59438c2ecf20Sopenharmony_ci * 59448c2ecf20Sopenharmony_ci * On each call to sync_request, we gather one chunk worth of 59458c2ecf20Sopenharmony_ci * destination stripes and flag them as expanding. 59468c2ecf20Sopenharmony_ci * Then we find all the source stripes and request reads. 59478c2ecf20Sopenharmony_ci * As the reads complete, handle_stripe will copy the data 59488c2ecf20Sopenharmony_ci * into the destination stripe and release that stripe. 59498c2ecf20Sopenharmony_ci */ 59508c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 59518c2ecf20Sopenharmony_ci struct stripe_head *sh; 59528c2ecf20Sopenharmony_ci struct md_rdev *rdev; 59538c2ecf20Sopenharmony_ci sector_t first_sector, last_sector; 59548c2ecf20Sopenharmony_ci int raid_disks = conf->previous_raid_disks; 59558c2ecf20Sopenharmony_ci int data_disks = raid_disks - conf->max_degraded; 59568c2ecf20Sopenharmony_ci int new_data_disks = conf->raid_disks - conf->max_degraded; 59578c2ecf20Sopenharmony_ci int i; 59588c2ecf20Sopenharmony_ci int dd_idx; 59598c2ecf20Sopenharmony_ci sector_t writepos, readpos, safepos; 59608c2ecf20Sopenharmony_ci sector_t stripe_addr; 59618c2ecf20Sopenharmony_ci int reshape_sectors; 59628c2ecf20Sopenharmony_ci struct list_head stripes; 59638c2ecf20Sopenharmony_ci sector_t retn; 59648c2ecf20Sopenharmony_ci 59658c2ecf20Sopenharmony_ci if (sector_nr == 0) { 59668c2ecf20Sopenharmony_ci /* If restarting in the middle, skip the initial sectors */ 59678c2ecf20Sopenharmony_ci if (mddev->reshape_backwards && 59688c2ecf20Sopenharmony_ci conf->reshape_progress < raid5_size(mddev, 0, 0)) { 59698c2ecf20Sopenharmony_ci sector_nr = raid5_size(mddev, 0, 0) 59708c2ecf20Sopenharmony_ci - conf->reshape_progress; 59718c2ecf20Sopenharmony_ci } else if (mddev->reshape_backwards && 59728c2ecf20Sopenharmony_ci conf->reshape_progress == MaxSector) { 59738c2ecf20Sopenharmony_ci /* shouldn't happen, but just in case, finish up.*/ 59748c2ecf20Sopenharmony_ci sector_nr = MaxSector; 59758c2ecf20Sopenharmony_ci } else if (!mddev->reshape_backwards && 59768c2ecf20Sopenharmony_ci conf->reshape_progress > 0) 59778c2ecf20Sopenharmony_ci sector_nr = conf->reshape_progress; 59788c2ecf20Sopenharmony_ci sector_div(sector_nr, new_data_disks); 59798c2ecf20Sopenharmony_ci if (sector_nr) { 59808c2ecf20Sopenharmony_ci mddev->curr_resync_completed = sector_nr; 59818c2ecf20Sopenharmony_ci sysfs_notify_dirent_safe(mddev->sysfs_completed); 59828c2ecf20Sopenharmony_ci *skipped = 1; 59838c2ecf20Sopenharmony_ci retn = sector_nr; 59848c2ecf20Sopenharmony_ci goto finish; 59858c2ecf20Sopenharmony_ci } 59868c2ecf20Sopenharmony_ci } 59878c2ecf20Sopenharmony_ci 59888c2ecf20Sopenharmony_ci /* We need to process a full chunk at a time. 59898c2ecf20Sopenharmony_ci * If old and new chunk sizes differ, we need to process the 59908c2ecf20Sopenharmony_ci * largest of these 59918c2ecf20Sopenharmony_ci */ 59928c2ecf20Sopenharmony_ci 59938c2ecf20Sopenharmony_ci reshape_sectors = max(conf->chunk_sectors, conf->prev_chunk_sectors); 59948c2ecf20Sopenharmony_ci 59958c2ecf20Sopenharmony_ci /* We update the metadata at least every 10 seconds, or when 59968c2ecf20Sopenharmony_ci * the data about to be copied would over-write the source of 59978c2ecf20Sopenharmony_ci * the data at the front of the range. i.e. one new_stripe 59988c2ecf20Sopenharmony_ci * along from reshape_progress new_maps to after where 59998c2ecf20Sopenharmony_ci * reshape_safe old_maps to 60008c2ecf20Sopenharmony_ci */ 60018c2ecf20Sopenharmony_ci writepos = conf->reshape_progress; 60028c2ecf20Sopenharmony_ci sector_div(writepos, new_data_disks); 60038c2ecf20Sopenharmony_ci readpos = conf->reshape_progress; 60048c2ecf20Sopenharmony_ci sector_div(readpos, data_disks); 60058c2ecf20Sopenharmony_ci safepos = conf->reshape_safe; 60068c2ecf20Sopenharmony_ci sector_div(safepos, data_disks); 60078c2ecf20Sopenharmony_ci if (mddev->reshape_backwards) { 60088c2ecf20Sopenharmony_ci BUG_ON(writepos < reshape_sectors); 60098c2ecf20Sopenharmony_ci writepos -= reshape_sectors; 60108c2ecf20Sopenharmony_ci readpos += reshape_sectors; 60118c2ecf20Sopenharmony_ci safepos += reshape_sectors; 60128c2ecf20Sopenharmony_ci } else { 60138c2ecf20Sopenharmony_ci writepos += reshape_sectors; 60148c2ecf20Sopenharmony_ci /* readpos and safepos are worst-case calculations. 60158c2ecf20Sopenharmony_ci * A negative number is overly pessimistic, and causes 60168c2ecf20Sopenharmony_ci * obvious problems for unsigned storage. So clip to 0. 60178c2ecf20Sopenharmony_ci */ 60188c2ecf20Sopenharmony_ci readpos -= min_t(sector_t, reshape_sectors, readpos); 60198c2ecf20Sopenharmony_ci safepos -= min_t(sector_t, reshape_sectors, safepos); 60208c2ecf20Sopenharmony_ci } 60218c2ecf20Sopenharmony_ci 60228c2ecf20Sopenharmony_ci /* Having calculated the 'writepos' possibly use it 60238c2ecf20Sopenharmony_ci * to set 'stripe_addr' which is where we will write to. 60248c2ecf20Sopenharmony_ci */ 60258c2ecf20Sopenharmony_ci if (mddev->reshape_backwards) { 60268c2ecf20Sopenharmony_ci BUG_ON(conf->reshape_progress == 0); 60278c2ecf20Sopenharmony_ci stripe_addr = writepos; 60288c2ecf20Sopenharmony_ci BUG_ON((mddev->dev_sectors & 60298c2ecf20Sopenharmony_ci ~((sector_t)reshape_sectors - 1)) 60308c2ecf20Sopenharmony_ci - reshape_sectors - stripe_addr 60318c2ecf20Sopenharmony_ci != sector_nr); 60328c2ecf20Sopenharmony_ci } else { 60338c2ecf20Sopenharmony_ci BUG_ON(writepos != sector_nr + reshape_sectors); 60348c2ecf20Sopenharmony_ci stripe_addr = sector_nr; 60358c2ecf20Sopenharmony_ci } 60368c2ecf20Sopenharmony_ci 60378c2ecf20Sopenharmony_ci /* 'writepos' is the most advanced device address we might write. 60388c2ecf20Sopenharmony_ci * 'readpos' is the least advanced device address we might read. 60398c2ecf20Sopenharmony_ci * 'safepos' is the least address recorded in the metadata as having 60408c2ecf20Sopenharmony_ci * been reshaped. 60418c2ecf20Sopenharmony_ci * If there is a min_offset_diff, these are adjusted either by 60428c2ecf20Sopenharmony_ci * increasing the safepos/readpos if diff is negative, or 60438c2ecf20Sopenharmony_ci * increasing writepos if diff is positive. 60448c2ecf20Sopenharmony_ci * If 'readpos' is then behind 'writepos', there is no way that we can 60458c2ecf20Sopenharmony_ci * ensure safety in the face of a crash - that must be done by userspace 60468c2ecf20Sopenharmony_ci * making a backup of the data. So in that case there is no particular 60478c2ecf20Sopenharmony_ci * rush to update metadata. 60488c2ecf20Sopenharmony_ci * Otherwise if 'safepos' is behind 'writepos', then we really need to 60498c2ecf20Sopenharmony_ci * update the metadata to advance 'safepos' to match 'readpos' so that 60508c2ecf20Sopenharmony_ci * we can be safe in the event of a crash. 60518c2ecf20Sopenharmony_ci * So we insist on updating metadata if safepos is behind writepos and 60528c2ecf20Sopenharmony_ci * readpos is beyond writepos. 60538c2ecf20Sopenharmony_ci * In any case, update the metadata every 10 seconds. 60548c2ecf20Sopenharmony_ci * Maybe that number should be configurable, but I'm not sure it is 60558c2ecf20Sopenharmony_ci * worth it.... maybe it could be a multiple of safemode_delay??? 60568c2ecf20Sopenharmony_ci */ 60578c2ecf20Sopenharmony_ci if (conf->min_offset_diff < 0) { 60588c2ecf20Sopenharmony_ci safepos += -conf->min_offset_diff; 60598c2ecf20Sopenharmony_ci readpos += -conf->min_offset_diff; 60608c2ecf20Sopenharmony_ci } else 60618c2ecf20Sopenharmony_ci writepos += conf->min_offset_diff; 60628c2ecf20Sopenharmony_ci 60638c2ecf20Sopenharmony_ci if ((mddev->reshape_backwards 60648c2ecf20Sopenharmony_ci ? (safepos > writepos && readpos < writepos) 60658c2ecf20Sopenharmony_ci : (safepos < writepos && readpos > writepos)) || 60668c2ecf20Sopenharmony_ci time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { 60678c2ecf20Sopenharmony_ci /* Cannot proceed until we've updated the superblock... */ 60688c2ecf20Sopenharmony_ci wait_event(conf->wait_for_overlap, 60698c2ecf20Sopenharmony_ci atomic_read(&conf->reshape_stripes)==0 60708c2ecf20Sopenharmony_ci || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); 60718c2ecf20Sopenharmony_ci if (atomic_read(&conf->reshape_stripes) != 0) 60728c2ecf20Sopenharmony_ci return 0; 60738c2ecf20Sopenharmony_ci mddev->reshape_position = conf->reshape_progress; 60748c2ecf20Sopenharmony_ci mddev->curr_resync_completed = sector_nr; 60758c2ecf20Sopenharmony_ci if (!mddev->reshape_backwards) 60768c2ecf20Sopenharmony_ci /* Can update recovery_offset */ 60778c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) 60788c2ecf20Sopenharmony_ci if (rdev->raid_disk >= 0 && 60798c2ecf20Sopenharmony_ci !test_bit(Journal, &rdev->flags) && 60808c2ecf20Sopenharmony_ci !test_bit(In_sync, &rdev->flags) && 60818c2ecf20Sopenharmony_ci rdev->recovery_offset < sector_nr) 60828c2ecf20Sopenharmony_ci rdev->recovery_offset = sector_nr; 60838c2ecf20Sopenharmony_ci 60848c2ecf20Sopenharmony_ci conf->reshape_checkpoint = jiffies; 60858c2ecf20Sopenharmony_ci set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 60868c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 60878c2ecf20Sopenharmony_ci wait_event(mddev->sb_wait, mddev->sb_flags == 0 || 60888c2ecf20Sopenharmony_ci test_bit(MD_RECOVERY_INTR, &mddev->recovery)); 60898c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) 60908c2ecf20Sopenharmony_ci return 0; 60918c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 60928c2ecf20Sopenharmony_ci conf->reshape_safe = mddev->reshape_position; 60938c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 60948c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_overlap); 60958c2ecf20Sopenharmony_ci sysfs_notify_dirent_safe(mddev->sysfs_completed); 60968c2ecf20Sopenharmony_ci } 60978c2ecf20Sopenharmony_ci 60988c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&stripes); 60998c2ecf20Sopenharmony_ci for (i = 0; i < reshape_sectors; i += RAID5_STRIPE_SECTORS(conf)) { 61008c2ecf20Sopenharmony_ci int j; 61018c2ecf20Sopenharmony_ci int skipped_disk = 0; 61028c2ecf20Sopenharmony_ci sh = raid5_get_active_stripe(conf, stripe_addr+i, 0, 0, 1); 61038c2ecf20Sopenharmony_ci set_bit(STRIPE_EXPANDING, &sh->state); 61048c2ecf20Sopenharmony_ci atomic_inc(&conf->reshape_stripes); 61058c2ecf20Sopenharmony_ci /* If any of this stripe is beyond the end of the old 61068c2ecf20Sopenharmony_ci * array, then we need to zero those blocks 61078c2ecf20Sopenharmony_ci */ 61088c2ecf20Sopenharmony_ci for (j=sh->disks; j--;) { 61098c2ecf20Sopenharmony_ci sector_t s; 61108c2ecf20Sopenharmony_ci if (j == sh->pd_idx) 61118c2ecf20Sopenharmony_ci continue; 61128c2ecf20Sopenharmony_ci if (conf->level == 6 && 61138c2ecf20Sopenharmony_ci j == sh->qd_idx) 61148c2ecf20Sopenharmony_ci continue; 61158c2ecf20Sopenharmony_ci s = raid5_compute_blocknr(sh, j, 0); 61168c2ecf20Sopenharmony_ci if (s < raid5_size(mddev, 0, 0)) { 61178c2ecf20Sopenharmony_ci skipped_disk = 1; 61188c2ecf20Sopenharmony_ci continue; 61198c2ecf20Sopenharmony_ci } 61208c2ecf20Sopenharmony_ci memset(page_address(sh->dev[j].page), 0, RAID5_STRIPE_SIZE(conf)); 61218c2ecf20Sopenharmony_ci set_bit(R5_Expanded, &sh->dev[j].flags); 61228c2ecf20Sopenharmony_ci set_bit(R5_UPTODATE, &sh->dev[j].flags); 61238c2ecf20Sopenharmony_ci } 61248c2ecf20Sopenharmony_ci if (!skipped_disk) { 61258c2ecf20Sopenharmony_ci set_bit(STRIPE_EXPAND_READY, &sh->state); 61268c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 61278c2ecf20Sopenharmony_ci } 61288c2ecf20Sopenharmony_ci list_add(&sh->lru, &stripes); 61298c2ecf20Sopenharmony_ci } 61308c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 61318c2ecf20Sopenharmony_ci if (mddev->reshape_backwards) 61328c2ecf20Sopenharmony_ci conf->reshape_progress -= reshape_sectors * new_data_disks; 61338c2ecf20Sopenharmony_ci else 61348c2ecf20Sopenharmony_ci conf->reshape_progress += reshape_sectors * new_data_disks; 61358c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 61368c2ecf20Sopenharmony_ci /* Ok, those stripe are ready. We can start scheduling 61378c2ecf20Sopenharmony_ci * reads on the source stripes. 61388c2ecf20Sopenharmony_ci * The source stripes are determined by mapping the first and last 61398c2ecf20Sopenharmony_ci * block on the destination stripes. 61408c2ecf20Sopenharmony_ci */ 61418c2ecf20Sopenharmony_ci first_sector = 61428c2ecf20Sopenharmony_ci raid5_compute_sector(conf, stripe_addr*(new_data_disks), 61438c2ecf20Sopenharmony_ci 1, &dd_idx, NULL); 61448c2ecf20Sopenharmony_ci last_sector = 61458c2ecf20Sopenharmony_ci raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) 61468c2ecf20Sopenharmony_ci * new_data_disks - 1), 61478c2ecf20Sopenharmony_ci 1, &dd_idx, NULL); 61488c2ecf20Sopenharmony_ci if (last_sector >= mddev->dev_sectors) 61498c2ecf20Sopenharmony_ci last_sector = mddev->dev_sectors - 1; 61508c2ecf20Sopenharmony_ci while (first_sector <= last_sector) { 61518c2ecf20Sopenharmony_ci sh = raid5_get_active_stripe(conf, first_sector, 1, 0, 1); 61528c2ecf20Sopenharmony_ci set_bit(STRIPE_EXPAND_SOURCE, &sh->state); 61538c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 61548c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 61558c2ecf20Sopenharmony_ci first_sector += RAID5_STRIPE_SECTORS(conf); 61568c2ecf20Sopenharmony_ci } 61578c2ecf20Sopenharmony_ci /* Now that the sources are clearly marked, we can release 61588c2ecf20Sopenharmony_ci * the destination stripes 61598c2ecf20Sopenharmony_ci */ 61608c2ecf20Sopenharmony_ci while (!list_empty(&stripes)) { 61618c2ecf20Sopenharmony_ci sh = list_entry(stripes.next, struct stripe_head, lru); 61628c2ecf20Sopenharmony_ci list_del_init(&sh->lru); 61638c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 61648c2ecf20Sopenharmony_ci } 61658c2ecf20Sopenharmony_ci /* If this takes us to the resync_max point where we have to pause, 61668c2ecf20Sopenharmony_ci * then we need to write out the superblock. 61678c2ecf20Sopenharmony_ci */ 61688c2ecf20Sopenharmony_ci sector_nr += reshape_sectors; 61698c2ecf20Sopenharmony_ci retn = reshape_sectors; 61708c2ecf20Sopenharmony_cifinish: 61718c2ecf20Sopenharmony_ci if (mddev->curr_resync_completed > mddev->resync_max || 61728c2ecf20Sopenharmony_ci (sector_nr - mddev->curr_resync_completed) * 2 61738c2ecf20Sopenharmony_ci >= mddev->resync_max - mddev->curr_resync_completed) { 61748c2ecf20Sopenharmony_ci /* Cannot proceed until we've updated the superblock... */ 61758c2ecf20Sopenharmony_ci wait_event(conf->wait_for_overlap, 61768c2ecf20Sopenharmony_ci atomic_read(&conf->reshape_stripes) == 0 61778c2ecf20Sopenharmony_ci || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); 61788c2ecf20Sopenharmony_ci if (atomic_read(&conf->reshape_stripes) != 0) 61798c2ecf20Sopenharmony_ci goto ret; 61808c2ecf20Sopenharmony_ci mddev->reshape_position = conf->reshape_progress; 61818c2ecf20Sopenharmony_ci mddev->curr_resync_completed = sector_nr; 61828c2ecf20Sopenharmony_ci if (!mddev->reshape_backwards) 61838c2ecf20Sopenharmony_ci /* Can update recovery_offset */ 61848c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) 61858c2ecf20Sopenharmony_ci if (rdev->raid_disk >= 0 && 61868c2ecf20Sopenharmony_ci !test_bit(Journal, &rdev->flags) && 61878c2ecf20Sopenharmony_ci !test_bit(In_sync, &rdev->flags) && 61888c2ecf20Sopenharmony_ci rdev->recovery_offset < sector_nr) 61898c2ecf20Sopenharmony_ci rdev->recovery_offset = sector_nr; 61908c2ecf20Sopenharmony_ci conf->reshape_checkpoint = jiffies; 61918c2ecf20Sopenharmony_ci set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 61928c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 61938c2ecf20Sopenharmony_ci wait_event(mddev->sb_wait, 61948c2ecf20Sopenharmony_ci !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) 61958c2ecf20Sopenharmony_ci || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); 61968c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) 61978c2ecf20Sopenharmony_ci goto ret; 61988c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 61998c2ecf20Sopenharmony_ci conf->reshape_safe = mddev->reshape_position; 62008c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 62018c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_overlap); 62028c2ecf20Sopenharmony_ci sysfs_notify_dirent_safe(mddev->sysfs_completed); 62038c2ecf20Sopenharmony_ci } 62048c2ecf20Sopenharmony_ciret: 62058c2ecf20Sopenharmony_ci return retn; 62068c2ecf20Sopenharmony_ci} 62078c2ecf20Sopenharmony_ci 62088c2ecf20Sopenharmony_cistatic inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_nr, 62098c2ecf20Sopenharmony_ci int *skipped) 62108c2ecf20Sopenharmony_ci{ 62118c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 62128c2ecf20Sopenharmony_ci struct stripe_head *sh; 62138c2ecf20Sopenharmony_ci sector_t max_sector = mddev->dev_sectors; 62148c2ecf20Sopenharmony_ci sector_t sync_blocks; 62158c2ecf20Sopenharmony_ci int still_degraded = 0; 62168c2ecf20Sopenharmony_ci int i; 62178c2ecf20Sopenharmony_ci 62188c2ecf20Sopenharmony_ci if (sector_nr >= max_sector) { 62198c2ecf20Sopenharmony_ci /* just being told to finish up .. nothing much to do */ 62208c2ecf20Sopenharmony_ci 62218c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { 62228c2ecf20Sopenharmony_ci end_reshape(conf); 62238c2ecf20Sopenharmony_ci return 0; 62248c2ecf20Sopenharmony_ci } 62258c2ecf20Sopenharmony_ci 62268c2ecf20Sopenharmony_ci if (mddev->curr_resync < max_sector) /* aborted */ 62278c2ecf20Sopenharmony_ci md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, 62288c2ecf20Sopenharmony_ci &sync_blocks, 1); 62298c2ecf20Sopenharmony_ci else /* completed sync */ 62308c2ecf20Sopenharmony_ci conf->fullsync = 0; 62318c2ecf20Sopenharmony_ci md_bitmap_close_sync(mddev->bitmap); 62328c2ecf20Sopenharmony_ci 62338c2ecf20Sopenharmony_ci return 0; 62348c2ecf20Sopenharmony_ci } 62358c2ecf20Sopenharmony_ci 62368c2ecf20Sopenharmony_ci /* Allow raid5_quiesce to complete */ 62378c2ecf20Sopenharmony_ci wait_event(conf->wait_for_overlap, conf->quiesce != 2); 62388c2ecf20Sopenharmony_ci 62398c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 62408c2ecf20Sopenharmony_ci return reshape_request(mddev, sector_nr, skipped); 62418c2ecf20Sopenharmony_ci 62428c2ecf20Sopenharmony_ci /* No need to check resync_max as we never do more than one 62438c2ecf20Sopenharmony_ci * stripe, and as resync_max will always be on a chunk boundary, 62448c2ecf20Sopenharmony_ci * if the check in md_do_sync didn't fire, there is no chance 62458c2ecf20Sopenharmony_ci * of overstepping resync_max here 62468c2ecf20Sopenharmony_ci */ 62478c2ecf20Sopenharmony_ci 62488c2ecf20Sopenharmony_ci /* if there is too many failed drives and we are trying 62498c2ecf20Sopenharmony_ci * to resync, then assert that we are finished, because there is 62508c2ecf20Sopenharmony_ci * nothing we can do. 62518c2ecf20Sopenharmony_ci */ 62528c2ecf20Sopenharmony_ci if (mddev->degraded >= conf->max_degraded && 62538c2ecf20Sopenharmony_ci test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 62548c2ecf20Sopenharmony_ci sector_t rv = mddev->dev_sectors - sector_nr; 62558c2ecf20Sopenharmony_ci *skipped = 1; 62568c2ecf20Sopenharmony_ci return rv; 62578c2ecf20Sopenharmony_ci } 62588c2ecf20Sopenharmony_ci if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && 62598c2ecf20Sopenharmony_ci !conf->fullsync && 62608c2ecf20Sopenharmony_ci !md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && 62618c2ecf20Sopenharmony_ci sync_blocks >= RAID5_STRIPE_SECTORS(conf)) { 62628c2ecf20Sopenharmony_ci /* we can skip this block, and probably more */ 62638c2ecf20Sopenharmony_ci do_div(sync_blocks, RAID5_STRIPE_SECTORS(conf)); 62648c2ecf20Sopenharmony_ci *skipped = 1; 62658c2ecf20Sopenharmony_ci /* keep things rounded to whole stripes */ 62668c2ecf20Sopenharmony_ci return sync_blocks * RAID5_STRIPE_SECTORS(conf); 62678c2ecf20Sopenharmony_ci } 62688c2ecf20Sopenharmony_ci 62698c2ecf20Sopenharmony_ci md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); 62708c2ecf20Sopenharmony_ci 62718c2ecf20Sopenharmony_ci sh = raid5_get_active_stripe(conf, sector_nr, 0, 1, 0); 62728c2ecf20Sopenharmony_ci if (sh == NULL) { 62738c2ecf20Sopenharmony_ci sh = raid5_get_active_stripe(conf, sector_nr, 0, 0, 0); 62748c2ecf20Sopenharmony_ci /* make sure we don't swamp the stripe cache if someone else 62758c2ecf20Sopenharmony_ci * is trying to get access 62768c2ecf20Sopenharmony_ci */ 62778c2ecf20Sopenharmony_ci schedule_timeout_uninterruptible(1); 62788c2ecf20Sopenharmony_ci } 62798c2ecf20Sopenharmony_ci /* Need to check if array will still be degraded after recovery/resync 62808c2ecf20Sopenharmony_ci * Note in case of > 1 drive failures it's possible we're rebuilding 62818c2ecf20Sopenharmony_ci * one drive while leaving another faulty drive in array. 62828c2ecf20Sopenharmony_ci */ 62838c2ecf20Sopenharmony_ci rcu_read_lock(); 62848c2ecf20Sopenharmony_ci for (i = 0; i < conf->raid_disks; i++) { 62858c2ecf20Sopenharmony_ci struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev); 62868c2ecf20Sopenharmony_ci 62878c2ecf20Sopenharmony_ci if (rdev == NULL || test_bit(Faulty, &rdev->flags)) 62888c2ecf20Sopenharmony_ci still_degraded = 1; 62898c2ecf20Sopenharmony_ci } 62908c2ecf20Sopenharmony_ci rcu_read_unlock(); 62918c2ecf20Sopenharmony_ci 62928c2ecf20Sopenharmony_ci md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); 62938c2ecf20Sopenharmony_ci 62948c2ecf20Sopenharmony_ci set_bit(STRIPE_SYNC_REQUESTED, &sh->state); 62958c2ecf20Sopenharmony_ci set_bit(STRIPE_HANDLE, &sh->state); 62968c2ecf20Sopenharmony_ci 62978c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 62988c2ecf20Sopenharmony_ci 62998c2ecf20Sopenharmony_ci return RAID5_STRIPE_SECTORS(conf); 63008c2ecf20Sopenharmony_ci} 63018c2ecf20Sopenharmony_ci 63028c2ecf20Sopenharmony_cistatic int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio, 63038c2ecf20Sopenharmony_ci unsigned int offset) 63048c2ecf20Sopenharmony_ci{ 63058c2ecf20Sopenharmony_ci /* We may not be able to submit a whole bio at once as there 63068c2ecf20Sopenharmony_ci * may not be enough stripe_heads available. 63078c2ecf20Sopenharmony_ci * We cannot pre-allocate enough stripe_heads as we may need 63088c2ecf20Sopenharmony_ci * more than exist in the cache (if we allow ever large chunks). 63098c2ecf20Sopenharmony_ci * So we do one stripe head at a time and record in 63108c2ecf20Sopenharmony_ci * ->bi_hw_segments how many have been done. 63118c2ecf20Sopenharmony_ci * 63128c2ecf20Sopenharmony_ci * We *know* that this entire raid_bio is in one chunk, so 63138c2ecf20Sopenharmony_ci * it will be only one 'dd_idx' and only need one call to raid5_compute_sector. 63148c2ecf20Sopenharmony_ci */ 63158c2ecf20Sopenharmony_ci struct stripe_head *sh; 63168c2ecf20Sopenharmony_ci int dd_idx; 63178c2ecf20Sopenharmony_ci sector_t sector, logical_sector, last_sector; 63188c2ecf20Sopenharmony_ci int scnt = 0; 63198c2ecf20Sopenharmony_ci int handled = 0; 63208c2ecf20Sopenharmony_ci 63218c2ecf20Sopenharmony_ci logical_sector = raid_bio->bi_iter.bi_sector & 63228c2ecf20Sopenharmony_ci ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); 63238c2ecf20Sopenharmony_ci sector = raid5_compute_sector(conf, logical_sector, 63248c2ecf20Sopenharmony_ci 0, &dd_idx, NULL); 63258c2ecf20Sopenharmony_ci last_sector = bio_end_sector(raid_bio); 63268c2ecf20Sopenharmony_ci 63278c2ecf20Sopenharmony_ci for (; logical_sector < last_sector; 63288c2ecf20Sopenharmony_ci logical_sector += RAID5_STRIPE_SECTORS(conf), 63298c2ecf20Sopenharmony_ci sector += RAID5_STRIPE_SECTORS(conf), 63308c2ecf20Sopenharmony_ci scnt++) { 63318c2ecf20Sopenharmony_ci 63328c2ecf20Sopenharmony_ci if (scnt < offset) 63338c2ecf20Sopenharmony_ci /* already done this stripe */ 63348c2ecf20Sopenharmony_ci continue; 63358c2ecf20Sopenharmony_ci 63368c2ecf20Sopenharmony_ci sh = raid5_get_active_stripe(conf, sector, 0, 1, 1); 63378c2ecf20Sopenharmony_ci 63388c2ecf20Sopenharmony_ci if (!sh) { 63398c2ecf20Sopenharmony_ci /* failed to get a stripe - must wait */ 63408c2ecf20Sopenharmony_ci conf->retry_read_aligned = raid_bio; 63418c2ecf20Sopenharmony_ci conf->retry_read_offset = scnt; 63428c2ecf20Sopenharmony_ci return handled; 63438c2ecf20Sopenharmony_ci } 63448c2ecf20Sopenharmony_ci 63458c2ecf20Sopenharmony_ci if (!add_stripe_bio(sh, raid_bio, dd_idx, 0, 0)) { 63468c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 63478c2ecf20Sopenharmony_ci conf->retry_read_aligned = raid_bio; 63488c2ecf20Sopenharmony_ci conf->retry_read_offset = scnt; 63498c2ecf20Sopenharmony_ci return handled; 63508c2ecf20Sopenharmony_ci } 63518c2ecf20Sopenharmony_ci 63528c2ecf20Sopenharmony_ci set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags); 63538c2ecf20Sopenharmony_ci handle_stripe(sh); 63548c2ecf20Sopenharmony_ci raid5_release_stripe(sh); 63558c2ecf20Sopenharmony_ci handled++; 63568c2ecf20Sopenharmony_ci } 63578c2ecf20Sopenharmony_ci 63588c2ecf20Sopenharmony_ci bio_endio(raid_bio); 63598c2ecf20Sopenharmony_ci 63608c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&conf->active_aligned_reads)) 63618c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_quiescent); 63628c2ecf20Sopenharmony_ci return handled; 63638c2ecf20Sopenharmony_ci} 63648c2ecf20Sopenharmony_ci 63658c2ecf20Sopenharmony_cistatic int handle_active_stripes(struct r5conf *conf, int group, 63668c2ecf20Sopenharmony_ci struct r5worker *worker, 63678c2ecf20Sopenharmony_ci struct list_head *temp_inactive_list) 63688c2ecf20Sopenharmony_ci __releases(&conf->device_lock) 63698c2ecf20Sopenharmony_ci __acquires(&conf->device_lock) 63708c2ecf20Sopenharmony_ci{ 63718c2ecf20Sopenharmony_ci struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; 63728c2ecf20Sopenharmony_ci int i, batch_size = 0, hash; 63738c2ecf20Sopenharmony_ci bool release_inactive = false; 63748c2ecf20Sopenharmony_ci 63758c2ecf20Sopenharmony_ci while (batch_size < MAX_STRIPE_BATCH && 63768c2ecf20Sopenharmony_ci (sh = __get_priority_stripe(conf, group)) != NULL) 63778c2ecf20Sopenharmony_ci batch[batch_size++] = sh; 63788c2ecf20Sopenharmony_ci 63798c2ecf20Sopenharmony_ci if (batch_size == 0) { 63808c2ecf20Sopenharmony_ci for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) 63818c2ecf20Sopenharmony_ci if (!list_empty(temp_inactive_list + i)) 63828c2ecf20Sopenharmony_ci break; 63838c2ecf20Sopenharmony_ci if (i == NR_STRIPE_HASH_LOCKS) { 63848c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 63858c2ecf20Sopenharmony_ci log_flush_stripe_to_raid(conf); 63868c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 63878c2ecf20Sopenharmony_ci return batch_size; 63888c2ecf20Sopenharmony_ci } 63898c2ecf20Sopenharmony_ci release_inactive = true; 63908c2ecf20Sopenharmony_ci } 63918c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 63928c2ecf20Sopenharmony_ci 63938c2ecf20Sopenharmony_ci release_inactive_stripe_list(conf, temp_inactive_list, 63948c2ecf20Sopenharmony_ci NR_STRIPE_HASH_LOCKS); 63958c2ecf20Sopenharmony_ci 63968c2ecf20Sopenharmony_ci r5l_flush_stripe_to_raid(conf->log); 63978c2ecf20Sopenharmony_ci if (release_inactive) { 63988c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 63998c2ecf20Sopenharmony_ci return 0; 64008c2ecf20Sopenharmony_ci } 64018c2ecf20Sopenharmony_ci 64028c2ecf20Sopenharmony_ci for (i = 0; i < batch_size; i++) 64038c2ecf20Sopenharmony_ci handle_stripe(batch[i]); 64048c2ecf20Sopenharmony_ci log_write_stripe_run(conf); 64058c2ecf20Sopenharmony_ci 64068c2ecf20Sopenharmony_ci cond_resched(); 64078c2ecf20Sopenharmony_ci 64088c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 64098c2ecf20Sopenharmony_ci for (i = 0; i < batch_size; i++) { 64108c2ecf20Sopenharmony_ci hash = batch[i]->hash_lock_index; 64118c2ecf20Sopenharmony_ci __release_stripe(conf, batch[i], &temp_inactive_list[hash]); 64128c2ecf20Sopenharmony_ci } 64138c2ecf20Sopenharmony_ci return batch_size; 64148c2ecf20Sopenharmony_ci} 64158c2ecf20Sopenharmony_ci 64168c2ecf20Sopenharmony_cistatic void raid5_do_work(struct work_struct *work) 64178c2ecf20Sopenharmony_ci{ 64188c2ecf20Sopenharmony_ci struct r5worker *worker = container_of(work, struct r5worker, work); 64198c2ecf20Sopenharmony_ci struct r5worker_group *group = worker->group; 64208c2ecf20Sopenharmony_ci struct r5conf *conf = group->conf; 64218c2ecf20Sopenharmony_ci struct mddev *mddev = conf->mddev; 64228c2ecf20Sopenharmony_ci int group_id = group - conf->worker_groups; 64238c2ecf20Sopenharmony_ci int handled; 64248c2ecf20Sopenharmony_ci struct blk_plug plug; 64258c2ecf20Sopenharmony_ci 64268c2ecf20Sopenharmony_ci pr_debug("+++ raid5worker active\n"); 64278c2ecf20Sopenharmony_ci 64288c2ecf20Sopenharmony_ci blk_start_plug(&plug); 64298c2ecf20Sopenharmony_ci handled = 0; 64308c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 64318c2ecf20Sopenharmony_ci while (1) { 64328c2ecf20Sopenharmony_ci int batch_size, released; 64338c2ecf20Sopenharmony_ci 64348c2ecf20Sopenharmony_ci released = release_stripe_list(conf, worker->temp_inactive_list); 64358c2ecf20Sopenharmony_ci 64368c2ecf20Sopenharmony_ci batch_size = handle_active_stripes(conf, group_id, worker, 64378c2ecf20Sopenharmony_ci worker->temp_inactive_list); 64388c2ecf20Sopenharmony_ci worker->working = false; 64398c2ecf20Sopenharmony_ci if (!batch_size && !released) 64408c2ecf20Sopenharmony_ci break; 64418c2ecf20Sopenharmony_ci handled += batch_size; 64428c2ecf20Sopenharmony_ci wait_event_lock_irq(mddev->sb_wait, 64438c2ecf20Sopenharmony_ci !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), 64448c2ecf20Sopenharmony_ci conf->device_lock); 64458c2ecf20Sopenharmony_ci } 64468c2ecf20Sopenharmony_ci pr_debug("%d stripes handled\n", handled); 64478c2ecf20Sopenharmony_ci 64488c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 64498c2ecf20Sopenharmony_ci 64508c2ecf20Sopenharmony_ci flush_deferred_bios(conf); 64518c2ecf20Sopenharmony_ci 64528c2ecf20Sopenharmony_ci r5l_flush_stripe_to_raid(conf->log); 64538c2ecf20Sopenharmony_ci 64548c2ecf20Sopenharmony_ci async_tx_issue_pending_all(); 64558c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 64568c2ecf20Sopenharmony_ci 64578c2ecf20Sopenharmony_ci pr_debug("--- raid5worker inactive\n"); 64588c2ecf20Sopenharmony_ci} 64598c2ecf20Sopenharmony_ci 64608c2ecf20Sopenharmony_ci/* 64618c2ecf20Sopenharmony_ci * This is our raid5 kernel thread. 64628c2ecf20Sopenharmony_ci * 64638c2ecf20Sopenharmony_ci * We scan the hash table for stripes which can be handled now. 64648c2ecf20Sopenharmony_ci * During the scan, completed stripes are saved for us by the interrupt 64658c2ecf20Sopenharmony_ci * handler, so that they will not have to wait for our next wakeup. 64668c2ecf20Sopenharmony_ci */ 64678c2ecf20Sopenharmony_cistatic void raid5d(struct md_thread *thread) 64688c2ecf20Sopenharmony_ci{ 64698c2ecf20Sopenharmony_ci struct mddev *mddev = thread->mddev; 64708c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 64718c2ecf20Sopenharmony_ci int handled; 64728c2ecf20Sopenharmony_ci struct blk_plug plug; 64738c2ecf20Sopenharmony_ci 64748c2ecf20Sopenharmony_ci pr_debug("+++ raid5d active\n"); 64758c2ecf20Sopenharmony_ci 64768c2ecf20Sopenharmony_ci md_check_recovery(mddev); 64778c2ecf20Sopenharmony_ci 64788c2ecf20Sopenharmony_ci blk_start_plug(&plug); 64798c2ecf20Sopenharmony_ci handled = 0; 64808c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 64818c2ecf20Sopenharmony_ci while (1) { 64828c2ecf20Sopenharmony_ci struct bio *bio; 64838c2ecf20Sopenharmony_ci int batch_size, released; 64848c2ecf20Sopenharmony_ci unsigned int offset; 64858c2ecf20Sopenharmony_ci 64868c2ecf20Sopenharmony_ci released = release_stripe_list(conf, conf->temp_inactive_list); 64878c2ecf20Sopenharmony_ci if (released) 64888c2ecf20Sopenharmony_ci clear_bit(R5_DID_ALLOC, &conf->cache_state); 64898c2ecf20Sopenharmony_ci 64908c2ecf20Sopenharmony_ci if ( 64918c2ecf20Sopenharmony_ci !list_empty(&conf->bitmap_list)) { 64928c2ecf20Sopenharmony_ci /* Now is a good time to flush some bitmap updates */ 64938c2ecf20Sopenharmony_ci conf->seq_flush++; 64948c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 64958c2ecf20Sopenharmony_ci md_bitmap_unplug(mddev->bitmap); 64968c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 64978c2ecf20Sopenharmony_ci conf->seq_write = conf->seq_flush; 64988c2ecf20Sopenharmony_ci activate_bit_delay(conf, conf->temp_inactive_list); 64998c2ecf20Sopenharmony_ci } 65008c2ecf20Sopenharmony_ci raid5_activate_delayed(conf); 65018c2ecf20Sopenharmony_ci 65028c2ecf20Sopenharmony_ci while ((bio = remove_bio_from_retry(conf, &offset))) { 65038c2ecf20Sopenharmony_ci int ok; 65048c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 65058c2ecf20Sopenharmony_ci ok = retry_aligned_read(conf, bio, offset); 65068c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 65078c2ecf20Sopenharmony_ci if (!ok) 65088c2ecf20Sopenharmony_ci break; 65098c2ecf20Sopenharmony_ci handled++; 65108c2ecf20Sopenharmony_ci } 65118c2ecf20Sopenharmony_ci 65128c2ecf20Sopenharmony_ci batch_size = handle_active_stripes(conf, ANY_GROUP, NULL, 65138c2ecf20Sopenharmony_ci conf->temp_inactive_list); 65148c2ecf20Sopenharmony_ci if (!batch_size && !released) 65158c2ecf20Sopenharmony_ci break; 65168c2ecf20Sopenharmony_ci handled += batch_size; 65178c2ecf20Sopenharmony_ci 65188c2ecf20Sopenharmony_ci if (mddev->sb_flags & ~(1 << MD_SB_CHANGE_PENDING)) { 65198c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 65208c2ecf20Sopenharmony_ci md_check_recovery(mddev); 65218c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 65228c2ecf20Sopenharmony_ci } 65238c2ecf20Sopenharmony_ci } 65248c2ecf20Sopenharmony_ci pr_debug("%d stripes handled\n", handled); 65258c2ecf20Sopenharmony_ci 65268c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 65278c2ecf20Sopenharmony_ci if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) && 65288c2ecf20Sopenharmony_ci mutex_trylock(&conf->cache_size_mutex)) { 65298c2ecf20Sopenharmony_ci grow_one_stripe(conf, __GFP_NOWARN); 65308c2ecf20Sopenharmony_ci /* Set flag even if allocation failed. This helps 65318c2ecf20Sopenharmony_ci * slow down allocation requests when mem is short 65328c2ecf20Sopenharmony_ci */ 65338c2ecf20Sopenharmony_ci set_bit(R5_DID_ALLOC, &conf->cache_state); 65348c2ecf20Sopenharmony_ci mutex_unlock(&conf->cache_size_mutex); 65358c2ecf20Sopenharmony_ci } 65368c2ecf20Sopenharmony_ci 65378c2ecf20Sopenharmony_ci flush_deferred_bios(conf); 65388c2ecf20Sopenharmony_ci 65398c2ecf20Sopenharmony_ci r5l_flush_stripe_to_raid(conf->log); 65408c2ecf20Sopenharmony_ci 65418c2ecf20Sopenharmony_ci async_tx_issue_pending_all(); 65428c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 65438c2ecf20Sopenharmony_ci 65448c2ecf20Sopenharmony_ci pr_debug("--- raid5d inactive\n"); 65458c2ecf20Sopenharmony_ci} 65468c2ecf20Sopenharmony_ci 65478c2ecf20Sopenharmony_cistatic ssize_t 65488c2ecf20Sopenharmony_ciraid5_show_stripe_cache_size(struct mddev *mddev, char *page) 65498c2ecf20Sopenharmony_ci{ 65508c2ecf20Sopenharmony_ci struct r5conf *conf; 65518c2ecf20Sopenharmony_ci int ret = 0; 65528c2ecf20Sopenharmony_ci spin_lock(&mddev->lock); 65538c2ecf20Sopenharmony_ci conf = mddev->private; 65548c2ecf20Sopenharmony_ci if (conf) 65558c2ecf20Sopenharmony_ci ret = sprintf(page, "%d\n", conf->min_nr_stripes); 65568c2ecf20Sopenharmony_ci spin_unlock(&mddev->lock); 65578c2ecf20Sopenharmony_ci return ret; 65588c2ecf20Sopenharmony_ci} 65598c2ecf20Sopenharmony_ci 65608c2ecf20Sopenharmony_ciint 65618c2ecf20Sopenharmony_ciraid5_set_cache_size(struct mddev *mddev, int size) 65628c2ecf20Sopenharmony_ci{ 65638c2ecf20Sopenharmony_ci int result = 0; 65648c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 65658c2ecf20Sopenharmony_ci 65668c2ecf20Sopenharmony_ci if (size <= 16 || size > 32768) 65678c2ecf20Sopenharmony_ci return -EINVAL; 65688c2ecf20Sopenharmony_ci 65698c2ecf20Sopenharmony_ci conf->min_nr_stripes = size; 65708c2ecf20Sopenharmony_ci mutex_lock(&conf->cache_size_mutex); 65718c2ecf20Sopenharmony_ci while (size < conf->max_nr_stripes && 65728c2ecf20Sopenharmony_ci drop_one_stripe(conf)) 65738c2ecf20Sopenharmony_ci ; 65748c2ecf20Sopenharmony_ci mutex_unlock(&conf->cache_size_mutex); 65758c2ecf20Sopenharmony_ci 65768c2ecf20Sopenharmony_ci md_allow_write(mddev); 65778c2ecf20Sopenharmony_ci 65788c2ecf20Sopenharmony_ci mutex_lock(&conf->cache_size_mutex); 65798c2ecf20Sopenharmony_ci while (size > conf->max_nr_stripes) 65808c2ecf20Sopenharmony_ci if (!grow_one_stripe(conf, GFP_KERNEL)) { 65818c2ecf20Sopenharmony_ci conf->min_nr_stripes = conf->max_nr_stripes; 65828c2ecf20Sopenharmony_ci result = -ENOMEM; 65838c2ecf20Sopenharmony_ci break; 65848c2ecf20Sopenharmony_ci } 65858c2ecf20Sopenharmony_ci mutex_unlock(&conf->cache_size_mutex); 65868c2ecf20Sopenharmony_ci 65878c2ecf20Sopenharmony_ci return result; 65888c2ecf20Sopenharmony_ci} 65898c2ecf20Sopenharmony_ciEXPORT_SYMBOL(raid5_set_cache_size); 65908c2ecf20Sopenharmony_ci 65918c2ecf20Sopenharmony_cistatic ssize_t 65928c2ecf20Sopenharmony_ciraid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len) 65938c2ecf20Sopenharmony_ci{ 65948c2ecf20Sopenharmony_ci struct r5conf *conf; 65958c2ecf20Sopenharmony_ci unsigned long new; 65968c2ecf20Sopenharmony_ci int err; 65978c2ecf20Sopenharmony_ci 65988c2ecf20Sopenharmony_ci if (len >= PAGE_SIZE) 65998c2ecf20Sopenharmony_ci return -EINVAL; 66008c2ecf20Sopenharmony_ci if (kstrtoul(page, 10, &new)) 66018c2ecf20Sopenharmony_ci return -EINVAL; 66028c2ecf20Sopenharmony_ci err = mddev_lock(mddev); 66038c2ecf20Sopenharmony_ci if (err) 66048c2ecf20Sopenharmony_ci return err; 66058c2ecf20Sopenharmony_ci conf = mddev->private; 66068c2ecf20Sopenharmony_ci if (!conf) 66078c2ecf20Sopenharmony_ci err = -ENODEV; 66088c2ecf20Sopenharmony_ci else 66098c2ecf20Sopenharmony_ci err = raid5_set_cache_size(mddev, new); 66108c2ecf20Sopenharmony_ci mddev_unlock(mddev); 66118c2ecf20Sopenharmony_ci 66128c2ecf20Sopenharmony_ci return err ?: len; 66138c2ecf20Sopenharmony_ci} 66148c2ecf20Sopenharmony_ci 66158c2ecf20Sopenharmony_cistatic struct md_sysfs_entry 66168c2ecf20Sopenharmony_ciraid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR, 66178c2ecf20Sopenharmony_ci raid5_show_stripe_cache_size, 66188c2ecf20Sopenharmony_ci raid5_store_stripe_cache_size); 66198c2ecf20Sopenharmony_ci 66208c2ecf20Sopenharmony_cistatic ssize_t 66218c2ecf20Sopenharmony_ciraid5_show_rmw_level(struct mddev *mddev, char *page) 66228c2ecf20Sopenharmony_ci{ 66238c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 66248c2ecf20Sopenharmony_ci if (conf) 66258c2ecf20Sopenharmony_ci return sprintf(page, "%d\n", conf->rmw_level); 66268c2ecf20Sopenharmony_ci else 66278c2ecf20Sopenharmony_ci return 0; 66288c2ecf20Sopenharmony_ci} 66298c2ecf20Sopenharmony_ci 66308c2ecf20Sopenharmony_cistatic ssize_t 66318c2ecf20Sopenharmony_ciraid5_store_rmw_level(struct mddev *mddev, const char *page, size_t len) 66328c2ecf20Sopenharmony_ci{ 66338c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 66348c2ecf20Sopenharmony_ci unsigned long new; 66358c2ecf20Sopenharmony_ci 66368c2ecf20Sopenharmony_ci if (!conf) 66378c2ecf20Sopenharmony_ci return -ENODEV; 66388c2ecf20Sopenharmony_ci 66398c2ecf20Sopenharmony_ci if (len >= PAGE_SIZE) 66408c2ecf20Sopenharmony_ci return -EINVAL; 66418c2ecf20Sopenharmony_ci 66428c2ecf20Sopenharmony_ci if (kstrtoul(page, 10, &new)) 66438c2ecf20Sopenharmony_ci return -EINVAL; 66448c2ecf20Sopenharmony_ci 66458c2ecf20Sopenharmony_ci if (new != PARITY_DISABLE_RMW && !raid6_call.xor_syndrome) 66468c2ecf20Sopenharmony_ci return -EINVAL; 66478c2ecf20Sopenharmony_ci 66488c2ecf20Sopenharmony_ci if (new != PARITY_DISABLE_RMW && 66498c2ecf20Sopenharmony_ci new != PARITY_ENABLE_RMW && 66508c2ecf20Sopenharmony_ci new != PARITY_PREFER_RMW) 66518c2ecf20Sopenharmony_ci return -EINVAL; 66528c2ecf20Sopenharmony_ci 66538c2ecf20Sopenharmony_ci conf->rmw_level = new; 66548c2ecf20Sopenharmony_ci return len; 66558c2ecf20Sopenharmony_ci} 66568c2ecf20Sopenharmony_ci 66578c2ecf20Sopenharmony_cistatic struct md_sysfs_entry 66588c2ecf20Sopenharmony_ciraid5_rmw_level = __ATTR(rmw_level, S_IRUGO | S_IWUSR, 66598c2ecf20Sopenharmony_ci raid5_show_rmw_level, 66608c2ecf20Sopenharmony_ci raid5_store_rmw_level); 66618c2ecf20Sopenharmony_ci 66628c2ecf20Sopenharmony_cistatic ssize_t 66638c2ecf20Sopenharmony_ciraid5_show_stripe_size(struct mddev *mddev, char *page) 66648c2ecf20Sopenharmony_ci{ 66658c2ecf20Sopenharmony_ci struct r5conf *conf; 66668c2ecf20Sopenharmony_ci int ret = 0; 66678c2ecf20Sopenharmony_ci 66688c2ecf20Sopenharmony_ci spin_lock(&mddev->lock); 66698c2ecf20Sopenharmony_ci conf = mddev->private; 66708c2ecf20Sopenharmony_ci if (conf) 66718c2ecf20Sopenharmony_ci ret = sprintf(page, "%lu\n", RAID5_STRIPE_SIZE(conf)); 66728c2ecf20Sopenharmony_ci spin_unlock(&mddev->lock); 66738c2ecf20Sopenharmony_ci return ret; 66748c2ecf20Sopenharmony_ci} 66758c2ecf20Sopenharmony_ci 66768c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE 66778c2ecf20Sopenharmony_cistatic ssize_t 66788c2ecf20Sopenharmony_ciraid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len) 66798c2ecf20Sopenharmony_ci{ 66808c2ecf20Sopenharmony_ci struct r5conf *conf; 66818c2ecf20Sopenharmony_ci unsigned long new; 66828c2ecf20Sopenharmony_ci int err; 66838c2ecf20Sopenharmony_ci int size; 66848c2ecf20Sopenharmony_ci 66858c2ecf20Sopenharmony_ci if (len >= PAGE_SIZE) 66868c2ecf20Sopenharmony_ci return -EINVAL; 66878c2ecf20Sopenharmony_ci if (kstrtoul(page, 10, &new)) 66888c2ecf20Sopenharmony_ci return -EINVAL; 66898c2ecf20Sopenharmony_ci 66908c2ecf20Sopenharmony_ci /* 66918c2ecf20Sopenharmony_ci * The value should not be bigger than PAGE_SIZE. It requires to 66928c2ecf20Sopenharmony_ci * be multiple of DEFAULT_STRIPE_SIZE and the value should be power 66938c2ecf20Sopenharmony_ci * of two. 66948c2ecf20Sopenharmony_ci */ 66958c2ecf20Sopenharmony_ci if (new % DEFAULT_STRIPE_SIZE != 0 || 66968c2ecf20Sopenharmony_ci new > PAGE_SIZE || new == 0 || 66978c2ecf20Sopenharmony_ci new != roundup_pow_of_two(new)) 66988c2ecf20Sopenharmony_ci return -EINVAL; 66998c2ecf20Sopenharmony_ci 67008c2ecf20Sopenharmony_ci err = mddev_lock(mddev); 67018c2ecf20Sopenharmony_ci if (err) 67028c2ecf20Sopenharmony_ci return err; 67038c2ecf20Sopenharmony_ci 67048c2ecf20Sopenharmony_ci conf = mddev->private; 67058c2ecf20Sopenharmony_ci if (!conf) { 67068c2ecf20Sopenharmony_ci err = -ENODEV; 67078c2ecf20Sopenharmony_ci goto out_unlock; 67088c2ecf20Sopenharmony_ci } 67098c2ecf20Sopenharmony_ci 67108c2ecf20Sopenharmony_ci if (new == conf->stripe_size) 67118c2ecf20Sopenharmony_ci goto out_unlock; 67128c2ecf20Sopenharmony_ci 67138c2ecf20Sopenharmony_ci pr_debug("md/raid: change stripe_size from %lu to %lu\n", 67148c2ecf20Sopenharmony_ci conf->stripe_size, new); 67158c2ecf20Sopenharmony_ci 67168c2ecf20Sopenharmony_ci if (mddev->sync_thread || 67178c2ecf20Sopenharmony_ci test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || 67188c2ecf20Sopenharmony_ci mddev->reshape_position != MaxSector || 67198c2ecf20Sopenharmony_ci mddev->sysfs_active) { 67208c2ecf20Sopenharmony_ci err = -EBUSY; 67218c2ecf20Sopenharmony_ci goto out_unlock; 67228c2ecf20Sopenharmony_ci } 67238c2ecf20Sopenharmony_ci 67248c2ecf20Sopenharmony_ci mddev_suspend(mddev); 67258c2ecf20Sopenharmony_ci mutex_lock(&conf->cache_size_mutex); 67268c2ecf20Sopenharmony_ci size = conf->max_nr_stripes; 67278c2ecf20Sopenharmony_ci 67288c2ecf20Sopenharmony_ci shrink_stripes(conf); 67298c2ecf20Sopenharmony_ci 67308c2ecf20Sopenharmony_ci conf->stripe_size = new; 67318c2ecf20Sopenharmony_ci conf->stripe_shift = ilog2(new) - 9; 67328c2ecf20Sopenharmony_ci conf->stripe_sectors = new >> 9; 67338c2ecf20Sopenharmony_ci if (grow_stripes(conf, size)) { 67348c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: couldn't allocate buffers\n", 67358c2ecf20Sopenharmony_ci mdname(mddev)); 67368c2ecf20Sopenharmony_ci err = -ENOMEM; 67378c2ecf20Sopenharmony_ci } 67388c2ecf20Sopenharmony_ci mutex_unlock(&conf->cache_size_mutex); 67398c2ecf20Sopenharmony_ci mddev_resume(mddev); 67408c2ecf20Sopenharmony_ci 67418c2ecf20Sopenharmony_ciout_unlock: 67428c2ecf20Sopenharmony_ci mddev_unlock(mddev); 67438c2ecf20Sopenharmony_ci return err ?: len; 67448c2ecf20Sopenharmony_ci} 67458c2ecf20Sopenharmony_ci 67468c2ecf20Sopenharmony_cistatic struct md_sysfs_entry 67478c2ecf20Sopenharmony_ciraid5_stripe_size = __ATTR(stripe_size, 0644, 67488c2ecf20Sopenharmony_ci raid5_show_stripe_size, 67498c2ecf20Sopenharmony_ci raid5_store_stripe_size); 67508c2ecf20Sopenharmony_ci#else 67518c2ecf20Sopenharmony_cistatic struct md_sysfs_entry 67528c2ecf20Sopenharmony_ciraid5_stripe_size = __ATTR(stripe_size, 0444, 67538c2ecf20Sopenharmony_ci raid5_show_stripe_size, 67548c2ecf20Sopenharmony_ci NULL); 67558c2ecf20Sopenharmony_ci#endif 67568c2ecf20Sopenharmony_ci 67578c2ecf20Sopenharmony_cistatic ssize_t 67588c2ecf20Sopenharmony_ciraid5_show_preread_threshold(struct mddev *mddev, char *page) 67598c2ecf20Sopenharmony_ci{ 67608c2ecf20Sopenharmony_ci struct r5conf *conf; 67618c2ecf20Sopenharmony_ci int ret = 0; 67628c2ecf20Sopenharmony_ci spin_lock(&mddev->lock); 67638c2ecf20Sopenharmony_ci conf = mddev->private; 67648c2ecf20Sopenharmony_ci if (conf) 67658c2ecf20Sopenharmony_ci ret = sprintf(page, "%d\n", conf->bypass_threshold); 67668c2ecf20Sopenharmony_ci spin_unlock(&mddev->lock); 67678c2ecf20Sopenharmony_ci return ret; 67688c2ecf20Sopenharmony_ci} 67698c2ecf20Sopenharmony_ci 67708c2ecf20Sopenharmony_cistatic ssize_t 67718c2ecf20Sopenharmony_ciraid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len) 67728c2ecf20Sopenharmony_ci{ 67738c2ecf20Sopenharmony_ci struct r5conf *conf; 67748c2ecf20Sopenharmony_ci unsigned long new; 67758c2ecf20Sopenharmony_ci int err; 67768c2ecf20Sopenharmony_ci 67778c2ecf20Sopenharmony_ci if (len >= PAGE_SIZE) 67788c2ecf20Sopenharmony_ci return -EINVAL; 67798c2ecf20Sopenharmony_ci if (kstrtoul(page, 10, &new)) 67808c2ecf20Sopenharmony_ci return -EINVAL; 67818c2ecf20Sopenharmony_ci 67828c2ecf20Sopenharmony_ci err = mddev_lock(mddev); 67838c2ecf20Sopenharmony_ci if (err) 67848c2ecf20Sopenharmony_ci return err; 67858c2ecf20Sopenharmony_ci conf = mddev->private; 67868c2ecf20Sopenharmony_ci if (!conf) 67878c2ecf20Sopenharmony_ci err = -ENODEV; 67888c2ecf20Sopenharmony_ci else if (new > conf->min_nr_stripes) 67898c2ecf20Sopenharmony_ci err = -EINVAL; 67908c2ecf20Sopenharmony_ci else 67918c2ecf20Sopenharmony_ci conf->bypass_threshold = new; 67928c2ecf20Sopenharmony_ci mddev_unlock(mddev); 67938c2ecf20Sopenharmony_ci return err ?: len; 67948c2ecf20Sopenharmony_ci} 67958c2ecf20Sopenharmony_ci 67968c2ecf20Sopenharmony_cistatic struct md_sysfs_entry 67978c2ecf20Sopenharmony_ciraid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, 67988c2ecf20Sopenharmony_ci S_IRUGO | S_IWUSR, 67998c2ecf20Sopenharmony_ci raid5_show_preread_threshold, 68008c2ecf20Sopenharmony_ci raid5_store_preread_threshold); 68018c2ecf20Sopenharmony_ci 68028c2ecf20Sopenharmony_cistatic ssize_t 68038c2ecf20Sopenharmony_ciraid5_show_skip_copy(struct mddev *mddev, char *page) 68048c2ecf20Sopenharmony_ci{ 68058c2ecf20Sopenharmony_ci struct r5conf *conf; 68068c2ecf20Sopenharmony_ci int ret = 0; 68078c2ecf20Sopenharmony_ci spin_lock(&mddev->lock); 68088c2ecf20Sopenharmony_ci conf = mddev->private; 68098c2ecf20Sopenharmony_ci if (conf) 68108c2ecf20Sopenharmony_ci ret = sprintf(page, "%d\n", conf->skip_copy); 68118c2ecf20Sopenharmony_ci spin_unlock(&mddev->lock); 68128c2ecf20Sopenharmony_ci return ret; 68138c2ecf20Sopenharmony_ci} 68148c2ecf20Sopenharmony_ci 68158c2ecf20Sopenharmony_cistatic ssize_t 68168c2ecf20Sopenharmony_ciraid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) 68178c2ecf20Sopenharmony_ci{ 68188c2ecf20Sopenharmony_ci struct r5conf *conf; 68198c2ecf20Sopenharmony_ci unsigned long new; 68208c2ecf20Sopenharmony_ci int err; 68218c2ecf20Sopenharmony_ci 68228c2ecf20Sopenharmony_ci if (len >= PAGE_SIZE) 68238c2ecf20Sopenharmony_ci return -EINVAL; 68248c2ecf20Sopenharmony_ci if (kstrtoul(page, 10, &new)) 68258c2ecf20Sopenharmony_ci return -EINVAL; 68268c2ecf20Sopenharmony_ci new = !!new; 68278c2ecf20Sopenharmony_ci 68288c2ecf20Sopenharmony_ci err = mddev_lock(mddev); 68298c2ecf20Sopenharmony_ci if (err) 68308c2ecf20Sopenharmony_ci return err; 68318c2ecf20Sopenharmony_ci conf = mddev->private; 68328c2ecf20Sopenharmony_ci if (!conf) 68338c2ecf20Sopenharmony_ci err = -ENODEV; 68348c2ecf20Sopenharmony_ci else if (new != conf->skip_copy) { 68358c2ecf20Sopenharmony_ci struct request_queue *q = mddev->queue; 68368c2ecf20Sopenharmony_ci 68378c2ecf20Sopenharmony_ci mddev_suspend(mddev); 68388c2ecf20Sopenharmony_ci conf->skip_copy = new; 68398c2ecf20Sopenharmony_ci if (new) 68408c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); 68418c2ecf20Sopenharmony_ci else 68428c2ecf20Sopenharmony_ci blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); 68438c2ecf20Sopenharmony_ci mddev_resume(mddev); 68448c2ecf20Sopenharmony_ci } 68458c2ecf20Sopenharmony_ci mddev_unlock(mddev); 68468c2ecf20Sopenharmony_ci return err ?: len; 68478c2ecf20Sopenharmony_ci} 68488c2ecf20Sopenharmony_ci 68498c2ecf20Sopenharmony_cistatic struct md_sysfs_entry 68508c2ecf20Sopenharmony_ciraid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR, 68518c2ecf20Sopenharmony_ci raid5_show_skip_copy, 68528c2ecf20Sopenharmony_ci raid5_store_skip_copy); 68538c2ecf20Sopenharmony_ci 68548c2ecf20Sopenharmony_cistatic ssize_t 68558c2ecf20Sopenharmony_cistripe_cache_active_show(struct mddev *mddev, char *page) 68568c2ecf20Sopenharmony_ci{ 68578c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 68588c2ecf20Sopenharmony_ci if (conf) 68598c2ecf20Sopenharmony_ci return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); 68608c2ecf20Sopenharmony_ci else 68618c2ecf20Sopenharmony_ci return 0; 68628c2ecf20Sopenharmony_ci} 68638c2ecf20Sopenharmony_ci 68648c2ecf20Sopenharmony_cistatic struct md_sysfs_entry 68658c2ecf20Sopenharmony_ciraid5_stripecache_active = __ATTR_RO(stripe_cache_active); 68668c2ecf20Sopenharmony_ci 68678c2ecf20Sopenharmony_cistatic ssize_t 68688c2ecf20Sopenharmony_ciraid5_show_group_thread_cnt(struct mddev *mddev, char *page) 68698c2ecf20Sopenharmony_ci{ 68708c2ecf20Sopenharmony_ci struct r5conf *conf; 68718c2ecf20Sopenharmony_ci int ret = 0; 68728c2ecf20Sopenharmony_ci spin_lock(&mddev->lock); 68738c2ecf20Sopenharmony_ci conf = mddev->private; 68748c2ecf20Sopenharmony_ci if (conf) 68758c2ecf20Sopenharmony_ci ret = sprintf(page, "%d\n", conf->worker_cnt_per_group); 68768c2ecf20Sopenharmony_ci spin_unlock(&mddev->lock); 68778c2ecf20Sopenharmony_ci return ret; 68788c2ecf20Sopenharmony_ci} 68798c2ecf20Sopenharmony_ci 68808c2ecf20Sopenharmony_cistatic int alloc_thread_groups(struct r5conf *conf, int cnt, 68818c2ecf20Sopenharmony_ci int *group_cnt, 68828c2ecf20Sopenharmony_ci struct r5worker_group **worker_groups); 68838c2ecf20Sopenharmony_cistatic ssize_t 68848c2ecf20Sopenharmony_ciraid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) 68858c2ecf20Sopenharmony_ci{ 68868c2ecf20Sopenharmony_ci struct r5conf *conf; 68878c2ecf20Sopenharmony_ci unsigned int new; 68888c2ecf20Sopenharmony_ci int err; 68898c2ecf20Sopenharmony_ci struct r5worker_group *new_groups, *old_groups; 68908c2ecf20Sopenharmony_ci int group_cnt; 68918c2ecf20Sopenharmony_ci 68928c2ecf20Sopenharmony_ci if (len >= PAGE_SIZE) 68938c2ecf20Sopenharmony_ci return -EINVAL; 68948c2ecf20Sopenharmony_ci if (kstrtouint(page, 10, &new)) 68958c2ecf20Sopenharmony_ci return -EINVAL; 68968c2ecf20Sopenharmony_ci /* 8192 should be big enough */ 68978c2ecf20Sopenharmony_ci if (new > 8192) 68988c2ecf20Sopenharmony_ci return -EINVAL; 68998c2ecf20Sopenharmony_ci 69008c2ecf20Sopenharmony_ci err = mddev_lock(mddev); 69018c2ecf20Sopenharmony_ci if (err) 69028c2ecf20Sopenharmony_ci return err; 69038c2ecf20Sopenharmony_ci conf = mddev->private; 69048c2ecf20Sopenharmony_ci if (!conf) 69058c2ecf20Sopenharmony_ci err = -ENODEV; 69068c2ecf20Sopenharmony_ci else if (new != conf->worker_cnt_per_group) { 69078c2ecf20Sopenharmony_ci mddev_suspend(mddev); 69088c2ecf20Sopenharmony_ci 69098c2ecf20Sopenharmony_ci old_groups = conf->worker_groups; 69108c2ecf20Sopenharmony_ci if (old_groups) 69118c2ecf20Sopenharmony_ci flush_workqueue(raid5_wq); 69128c2ecf20Sopenharmony_ci 69138c2ecf20Sopenharmony_ci err = alloc_thread_groups(conf, new, &group_cnt, &new_groups); 69148c2ecf20Sopenharmony_ci if (!err) { 69158c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 69168c2ecf20Sopenharmony_ci conf->group_cnt = group_cnt; 69178c2ecf20Sopenharmony_ci conf->worker_cnt_per_group = new; 69188c2ecf20Sopenharmony_ci conf->worker_groups = new_groups; 69198c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 69208c2ecf20Sopenharmony_ci 69218c2ecf20Sopenharmony_ci if (old_groups) 69228c2ecf20Sopenharmony_ci kfree(old_groups[0].workers); 69238c2ecf20Sopenharmony_ci kfree(old_groups); 69248c2ecf20Sopenharmony_ci } 69258c2ecf20Sopenharmony_ci mddev_resume(mddev); 69268c2ecf20Sopenharmony_ci } 69278c2ecf20Sopenharmony_ci mddev_unlock(mddev); 69288c2ecf20Sopenharmony_ci 69298c2ecf20Sopenharmony_ci return err ?: len; 69308c2ecf20Sopenharmony_ci} 69318c2ecf20Sopenharmony_ci 69328c2ecf20Sopenharmony_cistatic struct md_sysfs_entry 69338c2ecf20Sopenharmony_ciraid5_group_thread_cnt = __ATTR(group_thread_cnt, S_IRUGO | S_IWUSR, 69348c2ecf20Sopenharmony_ci raid5_show_group_thread_cnt, 69358c2ecf20Sopenharmony_ci raid5_store_group_thread_cnt); 69368c2ecf20Sopenharmony_ci 69378c2ecf20Sopenharmony_cistatic struct attribute *raid5_attrs[] = { 69388c2ecf20Sopenharmony_ci &raid5_stripecache_size.attr, 69398c2ecf20Sopenharmony_ci &raid5_stripecache_active.attr, 69408c2ecf20Sopenharmony_ci &raid5_preread_bypass_threshold.attr, 69418c2ecf20Sopenharmony_ci &raid5_group_thread_cnt.attr, 69428c2ecf20Sopenharmony_ci &raid5_skip_copy.attr, 69438c2ecf20Sopenharmony_ci &raid5_rmw_level.attr, 69448c2ecf20Sopenharmony_ci &raid5_stripe_size.attr, 69458c2ecf20Sopenharmony_ci &r5c_journal_mode.attr, 69468c2ecf20Sopenharmony_ci &ppl_write_hint.attr, 69478c2ecf20Sopenharmony_ci NULL, 69488c2ecf20Sopenharmony_ci}; 69498c2ecf20Sopenharmony_cistatic struct attribute_group raid5_attrs_group = { 69508c2ecf20Sopenharmony_ci .name = NULL, 69518c2ecf20Sopenharmony_ci .attrs = raid5_attrs, 69528c2ecf20Sopenharmony_ci}; 69538c2ecf20Sopenharmony_ci 69548c2ecf20Sopenharmony_cistatic int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt, 69558c2ecf20Sopenharmony_ci struct r5worker_group **worker_groups) 69568c2ecf20Sopenharmony_ci{ 69578c2ecf20Sopenharmony_ci int i, j, k; 69588c2ecf20Sopenharmony_ci ssize_t size; 69598c2ecf20Sopenharmony_ci struct r5worker *workers; 69608c2ecf20Sopenharmony_ci 69618c2ecf20Sopenharmony_ci if (cnt == 0) { 69628c2ecf20Sopenharmony_ci *group_cnt = 0; 69638c2ecf20Sopenharmony_ci *worker_groups = NULL; 69648c2ecf20Sopenharmony_ci return 0; 69658c2ecf20Sopenharmony_ci } 69668c2ecf20Sopenharmony_ci *group_cnt = num_possible_nodes(); 69678c2ecf20Sopenharmony_ci size = sizeof(struct r5worker) * cnt; 69688c2ecf20Sopenharmony_ci workers = kcalloc(size, *group_cnt, GFP_NOIO); 69698c2ecf20Sopenharmony_ci *worker_groups = kcalloc(*group_cnt, sizeof(struct r5worker_group), 69708c2ecf20Sopenharmony_ci GFP_NOIO); 69718c2ecf20Sopenharmony_ci if (!*worker_groups || !workers) { 69728c2ecf20Sopenharmony_ci kfree(workers); 69738c2ecf20Sopenharmony_ci kfree(*worker_groups); 69748c2ecf20Sopenharmony_ci return -ENOMEM; 69758c2ecf20Sopenharmony_ci } 69768c2ecf20Sopenharmony_ci 69778c2ecf20Sopenharmony_ci for (i = 0; i < *group_cnt; i++) { 69788c2ecf20Sopenharmony_ci struct r5worker_group *group; 69798c2ecf20Sopenharmony_ci 69808c2ecf20Sopenharmony_ci group = &(*worker_groups)[i]; 69818c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&group->handle_list); 69828c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&group->loprio_list); 69838c2ecf20Sopenharmony_ci group->conf = conf; 69848c2ecf20Sopenharmony_ci group->workers = workers + i * cnt; 69858c2ecf20Sopenharmony_ci 69868c2ecf20Sopenharmony_ci for (j = 0; j < cnt; j++) { 69878c2ecf20Sopenharmony_ci struct r5worker *worker = group->workers + j; 69888c2ecf20Sopenharmony_ci worker->group = group; 69898c2ecf20Sopenharmony_ci INIT_WORK(&worker->work, raid5_do_work); 69908c2ecf20Sopenharmony_ci 69918c2ecf20Sopenharmony_ci for (k = 0; k < NR_STRIPE_HASH_LOCKS; k++) 69928c2ecf20Sopenharmony_ci INIT_LIST_HEAD(worker->temp_inactive_list + k); 69938c2ecf20Sopenharmony_ci } 69948c2ecf20Sopenharmony_ci } 69958c2ecf20Sopenharmony_ci 69968c2ecf20Sopenharmony_ci return 0; 69978c2ecf20Sopenharmony_ci} 69988c2ecf20Sopenharmony_ci 69998c2ecf20Sopenharmony_cistatic void free_thread_groups(struct r5conf *conf) 70008c2ecf20Sopenharmony_ci{ 70018c2ecf20Sopenharmony_ci if (conf->worker_groups) 70028c2ecf20Sopenharmony_ci kfree(conf->worker_groups[0].workers); 70038c2ecf20Sopenharmony_ci kfree(conf->worker_groups); 70048c2ecf20Sopenharmony_ci conf->worker_groups = NULL; 70058c2ecf20Sopenharmony_ci} 70068c2ecf20Sopenharmony_ci 70078c2ecf20Sopenharmony_cistatic sector_t 70088c2ecf20Sopenharmony_ciraid5_size(struct mddev *mddev, sector_t sectors, int raid_disks) 70098c2ecf20Sopenharmony_ci{ 70108c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 70118c2ecf20Sopenharmony_ci 70128c2ecf20Sopenharmony_ci if (!sectors) 70138c2ecf20Sopenharmony_ci sectors = mddev->dev_sectors; 70148c2ecf20Sopenharmony_ci if (!raid_disks) 70158c2ecf20Sopenharmony_ci /* size is defined by the smallest of previous and new size */ 70168c2ecf20Sopenharmony_ci raid_disks = min(conf->raid_disks, conf->previous_raid_disks); 70178c2ecf20Sopenharmony_ci 70188c2ecf20Sopenharmony_ci sectors &= ~((sector_t)conf->chunk_sectors - 1); 70198c2ecf20Sopenharmony_ci sectors &= ~((sector_t)conf->prev_chunk_sectors - 1); 70208c2ecf20Sopenharmony_ci return sectors * (raid_disks - conf->max_degraded); 70218c2ecf20Sopenharmony_ci} 70228c2ecf20Sopenharmony_ci 70238c2ecf20Sopenharmony_cistatic void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu) 70248c2ecf20Sopenharmony_ci{ 70258c2ecf20Sopenharmony_ci safe_put_page(percpu->spare_page); 70268c2ecf20Sopenharmony_ci percpu->spare_page = NULL; 70278c2ecf20Sopenharmony_ci kvfree(percpu->scribble); 70288c2ecf20Sopenharmony_ci percpu->scribble = NULL; 70298c2ecf20Sopenharmony_ci} 70308c2ecf20Sopenharmony_ci 70318c2ecf20Sopenharmony_cistatic int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu) 70328c2ecf20Sopenharmony_ci{ 70338c2ecf20Sopenharmony_ci if (conf->level == 6 && !percpu->spare_page) { 70348c2ecf20Sopenharmony_ci percpu->spare_page = alloc_page(GFP_KERNEL); 70358c2ecf20Sopenharmony_ci if (!percpu->spare_page) 70368c2ecf20Sopenharmony_ci return -ENOMEM; 70378c2ecf20Sopenharmony_ci } 70388c2ecf20Sopenharmony_ci 70398c2ecf20Sopenharmony_ci if (scribble_alloc(percpu, 70408c2ecf20Sopenharmony_ci max(conf->raid_disks, 70418c2ecf20Sopenharmony_ci conf->previous_raid_disks), 70428c2ecf20Sopenharmony_ci max(conf->chunk_sectors, 70438c2ecf20Sopenharmony_ci conf->prev_chunk_sectors) 70448c2ecf20Sopenharmony_ci / RAID5_STRIPE_SECTORS(conf))) { 70458c2ecf20Sopenharmony_ci free_scratch_buffer(conf, percpu); 70468c2ecf20Sopenharmony_ci return -ENOMEM; 70478c2ecf20Sopenharmony_ci } 70488c2ecf20Sopenharmony_ci 70498c2ecf20Sopenharmony_ci return 0; 70508c2ecf20Sopenharmony_ci} 70518c2ecf20Sopenharmony_ci 70528c2ecf20Sopenharmony_cistatic int raid456_cpu_dead(unsigned int cpu, struct hlist_node *node) 70538c2ecf20Sopenharmony_ci{ 70548c2ecf20Sopenharmony_ci struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); 70558c2ecf20Sopenharmony_ci 70568c2ecf20Sopenharmony_ci free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); 70578c2ecf20Sopenharmony_ci return 0; 70588c2ecf20Sopenharmony_ci} 70598c2ecf20Sopenharmony_ci 70608c2ecf20Sopenharmony_cistatic void raid5_free_percpu(struct r5conf *conf) 70618c2ecf20Sopenharmony_ci{ 70628c2ecf20Sopenharmony_ci if (!conf->percpu) 70638c2ecf20Sopenharmony_ci return; 70648c2ecf20Sopenharmony_ci 70658c2ecf20Sopenharmony_ci cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); 70668c2ecf20Sopenharmony_ci free_percpu(conf->percpu); 70678c2ecf20Sopenharmony_ci} 70688c2ecf20Sopenharmony_ci 70698c2ecf20Sopenharmony_cistatic void free_conf(struct r5conf *conf) 70708c2ecf20Sopenharmony_ci{ 70718c2ecf20Sopenharmony_ci int i; 70728c2ecf20Sopenharmony_ci 70738c2ecf20Sopenharmony_ci log_exit(conf); 70748c2ecf20Sopenharmony_ci 70758c2ecf20Sopenharmony_ci unregister_shrinker(&conf->shrinker); 70768c2ecf20Sopenharmony_ci free_thread_groups(conf); 70778c2ecf20Sopenharmony_ci shrink_stripes(conf); 70788c2ecf20Sopenharmony_ci raid5_free_percpu(conf); 70798c2ecf20Sopenharmony_ci for (i = 0; i < conf->pool_size; i++) 70808c2ecf20Sopenharmony_ci if (conf->disks[i].extra_page) 70818c2ecf20Sopenharmony_ci put_page(conf->disks[i].extra_page); 70828c2ecf20Sopenharmony_ci kfree(conf->disks); 70838c2ecf20Sopenharmony_ci bioset_exit(&conf->bio_split); 70848c2ecf20Sopenharmony_ci kfree(conf->stripe_hashtbl); 70858c2ecf20Sopenharmony_ci kfree(conf->pending_data); 70868c2ecf20Sopenharmony_ci kfree(conf); 70878c2ecf20Sopenharmony_ci} 70888c2ecf20Sopenharmony_ci 70898c2ecf20Sopenharmony_cistatic int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) 70908c2ecf20Sopenharmony_ci{ 70918c2ecf20Sopenharmony_ci struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); 70928c2ecf20Sopenharmony_ci struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); 70938c2ecf20Sopenharmony_ci 70948c2ecf20Sopenharmony_ci if (alloc_scratch_buffer(conf, percpu)) { 70958c2ecf20Sopenharmony_ci pr_warn("%s: failed memory allocation for cpu%u\n", 70968c2ecf20Sopenharmony_ci __func__, cpu); 70978c2ecf20Sopenharmony_ci return -ENOMEM; 70988c2ecf20Sopenharmony_ci } 70998c2ecf20Sopenharmony_ci return 0; 71008c2ecf20Sopenharmony_ci} 71018c2ecf20Sopenharmony_ci 71028c2ecf20Sopenharmony_cistatic int raid5_alloc_percpu(struct r5conf *conf) 71038c2ecf20Sopenharmony_ci{ 71048c2ecf20Sopenharmony_ci int err = 0; 71058c2ecf20Sopenharmony_ci 71068c2ecf20Sopenharmony_ci conf->percpu = alloc_percpu(struct raid5_percpu); 71078c2ecf20Sopenharmony_ci if (!conf->percpu) 71088c2ecf20Sopenharmony_ci return -ENOMEM; 71098c2ecf20Sopenharmony_ci 71108c2ecf20Sopenharmony_ci err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); 71118c2ecf20Sopenharmony_ci if (!err) { 71128c2ecf20Sopenharmony_ci conf->scribble_disks = max(conf->raid_disks, 71138c2ecf20Sopenharmony_ci conf->previous_raid_disks); 71148c2ecf20Sopenharmony_ci conf->scribble_sectors = max(conf->chunk_sectors, 71158c2ecf20Sopenharmony_ci conf->prev_chunk_sectors); 71168c2ecf20Sopenharmony_ci } 71178c2ecf20Sopenharmony_ci return err; 71188c2ecf20Sopenharmony_ci} 71198c2ecf20Sopenharmony_ci 71208c2ecf20Sopenharmony_cistatic unsigned long raid5_cache_scan(struct shrinker *shrink, 71218c2ecf20Sopenharmony_ci struct shrink_control *sc) 71228c2ecf20Sopenharmony_ci{ 71238c2ecf20Sopenharmony_ci struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); 71248c2ecf20Sopenharmony_ci unsigned long ret = SHRINK_STOP; 71258c2ecf20Sopenharmony_ci 71268c2ecf20Sopenharmony_ci if (mutex_trylock(&conf->cache_size_mutex)) { 71278c2ecf20Sopenharmony_ci ret= 0; 71288c2ecf20Sopenharmony_ci while (ret < sc->nr_to_scan && 71298c2ecf20Sopenharmony_ci conf->max_nr_stripes > conf->min_nr_stripes) { 71308c2ecf20Sopenharmony_ci if (drop_one_stripe(conf) == 0) { 71318c2ecf20Sopenharmony_ci ret = SHRINK_STOP; 71328c2ecf20Sopenharmony_ci break; 71338c2ecf20Sopenharmony_ci } 71348c2ecf20Sopenharmony_ci ret++; 71358c2ecf20Sopenharmony_ci } 71368c2ecf20Sopenharmony_ci mutex_unlock(&conf->cache_size_mutex); 71378c2ecf20Sopenharmony_ci } 71388c2ecf20Sopenharmony_ci return ret; 71398c2ecf20Sopenharmony_ci} 71408c2ecf20Sopenharmony_ci 71418c2ecf20Sopenharmony_cistatic unsigned long raid5_cache_count(struct shrinker *shrink, 71428c2ecf20Sopenharmony_ci struct shrink_control *sc) 71438c2ecf20Sopenharmony_ci{ 71448c2ecf20Sopenharmony_ci struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); 71458c2ecf20Sopenharmony_ci 71468c2ecf20Sopenharmony_ci if (conf->max_nr_stripes < conf->min_nr_stripes) 71478c2ecf20Sopenharmony_ci /* unlikely, but not impossible */ 71488c2ecf20Sopenharmony_ci return 0; 71498c2ecf20Sopenharmony_ci return conf->max_nr_stripes - conf->min_nr_stripes; 71508c2ecf20Sopenharmony_ci} 71518c2ecf20Sopenharmony_ci 71528c2ecf20Sopenharmony_cistatic struct r5conf *setup_conf(struct mddev *mddev) 71538c2ecf20Sopenharmony_ci{ 71548c2ecf20Sopenharmony_ci struct r5conf *conf; 71558c2ecf20Sopenharmony_ci int raid_disk, memory, max_disks; 71568c2ecf20Sopenharmony_ci struct md_rdev *rdev; 71578c2ecf20Sopenharmony_ci struct disk_info *disk; 71588c2ecf20Sopenharmony_ci char pers_name[6]; 71598c2ecf20Sopenharmony_ci int i; 71608c2ecf20Sopenharmony_ci int group_cnt; 71618c2ecf20Sopenharmony_ci struct r5worker_group *new_group; 71628c2ecf20Sopenharmony_ci int ret; 71638c2ecf20Sopenharmony_ci 71648c2ecf20Sopenharmony_ci if (mddev->new_level != 5 71658c2ecf20Sopenharmony_ci && mddev->new_level != 4 71668c2ecf20Sopenharmony_ci && mddev->new_level != 6) { 71678c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: raid level not set to 4/5/6 (%d)\n", 71688c2ecf20Sopenharmony_ci mdname(mddev), mddev->new_level); 71698c2ecf20Sopenharmony_ci return ERR_PTR(-EIO); 71708c2ecf20Sopenharmony_ci } 71718c2ecf20Sopenharmony_ci if ((mddev->new_level == 5 71728c2ecf20Sopenharmony_ci && !algorithm_valid_raid5(mddev->new_layout)) || 71738c2ecf20Sopenharmony_ci (mddev->new_level == 6 71748c2ecf20Sopenharmony_ci && !algorithm_valid_raid6(mddev->new_layout))) { 71758c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: layout %d not supported\n", 71768c2ecf20Sopenharmony_ci mdname(mddev), mddev->new_layout); 71778c2ecf20Sopenharmony_ci return ERR_PTR(-EIO); 71788c2ecf20Sopenharmony_ci } 71798c2ecf20Sopenharmony_ci if (mddev->new_level == 6 && mddev->raid_disks < 4) { 71808c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: not enough configured devices (%d, minimum 4)\n", 71818c2ecf20Sopenharmony_ci mdname(mddev), mddev->raid_disks); 71828c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 71838c2ecf20Sopenharmony_ci } 71848c2ecf20Sopenharmony_ci 71858c2ecf20Sopenharmony_ci if (!mddev->new_chunk_sectors || 71868c2ecf20Sopenharmony_ci (mddev->new_chunk_sectors << 9) % PAGE_SIZE || 71878c2ecf20Sopenharmony_ci !is_power_of_2(mddev->new_chunk_sectors)) { 71888c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: invalid chunk size %d\n", 71898c2ecf20Sopenharmony_ci mdname(mddev), mddev->new_chunk_sectors << 9); 71908c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 71918c2ecf20Sopenharmony_ci } 71928c2ecf20Sopenharmony_ci 71938c2ecf20Sopenharmony_ci conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); 71948c2ecf20Sopenharmony_ci if (conf == NULL) 71958c2ecf20Sopenharmony_ci goto abort; 71968c2ecf20Sopenharmony_ci 71978c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE 71988c2ecf20Sopenharmony_ci conf->stripe_size = DEFAULT_STRIPE_SIZE; 71998c2ecf20Sopenharmony_ci conf->stripe_shift = ilog2(DEFAULT_STRIPE_SIZE) - 9; 72008c2ecf20Sopenharmony_ci conf->stripe_sectors = DEFAULT_STRIPE_SIZE >> 9; 72018c2ecf20Sopenharmony_ci#endif 72028c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->free_list); 72038c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->pending_list); 72048c2ecf20Sopenharmony_ci conf->pending_data = kcalloc(PENDING_IO_MAX, 72058c2ecf20Sopenharmony_ci sizeof(struct r5pending_data), 72068c2ecf20Sopenharmony_ci GFP_KERNEL); 72078c2ecf20Sopenharmony_ci if (!conf->pending_data) 72088c2ecf20Sopenharmony_ci goto abort; 72098c2ecf20Sopenharmony_ci for (i = 0; i < PENDING_IO_MAX; i++) 72108c2ecf20Sopenharmony_ci list_add(&conf->pending_data[i].sibling, &conf->free_list); 72118c2ecf20Sopenharmony_ci /* Don't enable multi-threading by default*/ 72128c2ecf20Sopenharmony_ci if (!alloc_thread_groups(conf, 0, &group_cnt, &new_group)) { 72138c2ecf20Sopenharmony_ci conf->group_cnt = group_cnt; 72148c2ecf20Sopenharmony_ci conf->worker_cnt_per_group = 0; 72158c2ecf20Sopenharmony_ci conf->worker_groups = new_group; 72168c2ecf20Sopenharmony_ci } else 72178c2ecf20Sopenharmony_ci goto abort; 72188c2ecf20Sopenharmony_ci spin_lock_init(&conf->device_lock); 72198c2ecf20Sopenharmony_ci seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock); 72208c2ecf20Sopenharmony_ci mutex_init(&conf->cache_size_mutex); 72218c2ecf20Sopenharmony_ci init_waitqueue_head(&conf->wait_for_quiescent); 72228c2ecf20Sopenharmony_ci init_waitqueue_head(&conf->wait_for_stripe); 72238c2ecf20Sopenharmony_ci init_waitqueue_head(&conf->wait_for_overlap); 72248c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->handle_list); 72258c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->loprio_list); 72268c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->hold_list); 72278c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->delayed_list); 72288c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->bitmap_list); 72298c2ecf20Sopenharmony_ci init_llist_head(&conf->released_stripes); 72308c2ecf20Sopenharmony_ci atomic_set(&conf->active_stripes, 0); 72318c2ecf20Sopenharmony_ci atomic_set(&conf->preread_active_stripes, 0); 72328c2ecf20Sopenharmony_ci atomic_set(&conf->active_aligned_reads, 0); 72338c2ecf20Sopenharmony_ci spin_lock_init(&conf->pending_bios_lock); 72348c2ecf20Sopenharmony_ci conf->batch_bio_dispatch = true; 72358c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) { 72368c2ecf20Sopenharmony_ci if (test_bit(Journal, &rdev->flags)) 72378c2ecf20Sopenharmony_ci continue; 72388c2ecf20Sopenharmony_ci if (blk_queue_nonrot(bdev_get_queue(rdev->bdev))) { 72398c2ecf20Sopenharmony_ci conf->batch_bio_dispatch = false; 72408c2ecf20Sopenharmony_ci break; 72418c2ecf20Sopenharmony_ci } 72428c2ecf20Sopenharmony_ci } 72438c2ecf20Sopenharmony_ci 72448c2ecf20Sopenharmony_ci conf->bypass_threshold = BYPASS_THRESHOLD; 72458c2ecf20Sopenharmony_ci conf->recovery_disabled = mddev->recovery_disabled - 1; 72468c2ecf20Sopenharmony_ci 72478c2ecf20Sopenharmony_ci conf->raid_disks = mddev->raid_disks; 72488c2ecf20Sopenharmony_ci if (mddev->reshape_position == MaxSector) 72498c2ecf20Sopenharmony_ci conf->previous_raid_disks = mddev->raid_disks; 72508c2ecf20Sopenharmony_ci else 72518c2ecf20Sopenharmony_ci conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; 72528c2ecf20Sopenharmony_ci max_disks = max(conf->raid_disks, conf->previous_raid_disks); 72538c2ecf20Sopenharmony_ci 72548c2ecf20Sopenharmony_ci conf->disks = kcalloc(max_disks, sizeof(struct disk_info), 72558c2ecf20Sopenharmony_ci GFP_KERNEL); 72568c2ecf20Sopenharmony_ci 72578c2ecf20Sopenharmony_ci if (!conf->disks) 72588c2ecf20Sopenharmony_ci goto abort; 72598c2ecf20Sopenharmony_ci 72608c2ecf20Sopenharmony_ci for (i = 0; i < max_disks; i++) { 72618c2ecf20Sopenharmony_ci conf->disks[i].extra_page = alloc_page(GFP_KERNEL); 72628c2ecf20Sopenharmony_ci if (!conf->disks[i].extra_page) 72638c2ecf20Sopenharmony_ci goto abort; 72648c2ecf20Sopenharmony_ci } 72658c2ecf20Sopenharmony_ci 72668c2ecf20Sopenharmony_ci ret = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0); 72678c2ecf20Sopenharmony_ci if (ret) 72688c2ecf20Sopenharmony_ci goto abort; 72698c2ecf20Sopenharmony_ci conf->mddev = mddev; 72708c2ecf20Sopenharmony_ci 72718c2ecf20Sopenharmony_ci if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) 72728c2ecf20Sopenharmony_ci goto abort; 72738c2ecf20Sopenharmony_ci 72748c2ecf20Sopenharmony_ci /* We init hash_locks[0] separately to that it can be used 72758c2ecf20Sopenharmony_ci * as the reference lock in the spin_lock_nest_lock() call 72768c2ecf20Sopenharmony_ci * in lock_all_device_hash_locks_irq in order to convince 72778c2ecf20Sopenharmony_ci * lockdep that we know what we are doing. 72788c2ecf20Sopenharmony_ci */ 72798c2ecf20Sopenharmony_ci spin_lock_init(conf->hash_locks); 72808c2ecf20Sopenharmony_ci for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) 72818c2ecf20Sopenharmony_ci spin_lock_init(conf->hash_locks + i); 72828c2ecf20Sopenharmony_ci 72838c2ecf20Sopenharmony_ci for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) 72848c2ecf20Sopenharmony_ci INIT_LIST_HEAD(conf->inactive_list + i); 72858c2ecf20Sopenharmony_ci 72868c2ecf20Sopenharmony_ci for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) 72878c2ecf20Sopenharmony_ci INIT_LIST_HEAD(conf->temp_inactive_list + i); 72888c2ecf20Sopenharmony_ci 72898c2ecf20Sopenharmony_ci atomic_set(&conf->r5c_cached_full_stripes, 0); 72908c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->r5c_full_stripe_list); 72918c2ecf20Sopenharmony_ci atomic_set(&conf->r5c_cached_partial_stripes, 0); 72928c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&conf->r5c_partial_stripe_list); 72938c2ecf20Sopenharmony_ci atomic_set(&conf->r5c_flushing_full_stripes, 0); 72948c2ecf20Sopenharmony_ci atomic_set(&conf->r5c_flushing_partial_stripes, 0); 72958c2ecf20Sopenharmony_ci 72968c2ecf20Sopenharmony_ci conf->level = mddev->new_level; 72978c2ecf20Sopenharmony_ci conf->chunk_sectors = mddev->new_chunk_sectors; 72988c2ecf20Sopenharmony_ci if (raid5_alloc_percpu(conf) != 0) 72998c2ecf20Sopenharmony_ci goto abort; 73008c2ecf20Sopenharmony_ci 73018c2ecf20Sopenharmony_ci pr_debug("raid456: run(%s) called.\n", mdname(mddev)); 73028c2ecf20Sopenharmony_ci 73038c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) { 73048c2ecf20Sopenharmony_ci raid_disk = rdev->raid_disk; 73058c2ecf20Sopenharmony_ci if (raid_disk >= max_disks 73068c2ecf20Sopenharmony_ci || raid_disk < 0 || test_bit(Journal, &rdev->flags)) 73078c2ecf20Sopenharmony_ci continue; 73088c2ecf20Sopenharmony_ci disk = conf->disks + raid_disk; 73098c2ecf20Sopenharmony_ci 73108c2ecf20Sopenharmony_ci if (test_bit(Replacement, &rdev->flags)) { 73118c2ecf20Sopenharmony_ci if (disk->replacement) 73128c2ecf20Sopenharmony_ci goto abort; 73138c2ecf20Sopenharmony_ci disk->replacement = rdev; 73148c2ecf20Sopenharmony_ci } else { 73158c2ecf20Sopenharmony_ci if (disk->rdev) 73168c2ecf20Sopenharmony_ci goto abort; 73178c2ecf20Sopenharmony_ci disk->rdev = rdev; 73188c2ecf20Sopenharmony_ci } 73198c2ecf20Sopenharmony_ci 73208c2ecf20Sopenharmony_ci if (test_bit(In_sync, &rdev->flags)) { 73218c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 73228c2ecf20Sopenharmony_ci pr_info("md/raid:%s: device %s operational as raid disk %d\n", 73238c2ecf20Sopenharmony_ci mdname(mddev), bdevname(rdev->bdev, b), raid_disk); 73248c2ecf20Sopenharmony_ci } else if (rdev->saved_raid_disk != raid_disk) 73258c2ecf20Sopenharmony_ci /* Cannot rely on bitmap to complete recovery */ 73268c2ecf20Sopenharmony_ci conf->fullsync = 1; 73278c2ecf20Sopenharmony_ci } 73288c2ecf20Sopenharmony_ci 73298c2ecf20Sopenharmony_ci conf->level = mddev->new_level; 73308c2ecf20Sopenharmony_ci if (conf->level == 6) { 73318c2ecf20Sopenharmony_ci conf->max_degraded = 2; 73328c2ecf20Sopenharmony_ci if (raid6_call.xor_syndrome) 73338c2ecf20Sopenharmony_ci conf->rmw_level = PARITY_ENABLE_RMW; 73348c2ecf20Sopenharmony_ci else 73358c2ecf20Sopenharmony_ci conf->rmw_level = PARITY_DISABLE_RMW; 73368c2ecf20Sopenharmony_ci } else { 73378c2ecf20Sopenharmony_ci conf->max_degraded = 1; 73388c2ecf20Sopenharmony_ci conf->rmw_level = PARITY_ENABLE_RMW; 73398c2ecf20Sopenharmony_ci } 73408c2ecf20Sopenharmony_ci conf->algorithm = mddev->new_layout; 73418c2ecf20Sopenharmony_ci conf->reshape_progress = mddev->reshape_position; 73428c2ecf20Sopenharmony_ci if (conf->reshape_progress != MaxSector) { 73438c2ecf20Sopenharmony_ci conf->prev_chunk_sectors = mddev->chunk_sectors; 73448c2ecf20Sopenharmony_ci conf->prev_algo = mddev->layout; 73458c2ecf20Sopenharmony_ci } else { 73468c2ecf20Sopenharmony_ci conf->prev_chunk_sectors = conf->chunk_sectors; 73478c2ecf20Sopenharmony_ci conf->prev_algo = conf->algorithm; 73488c2ecf20Sopenharmony_ci } 73498c2ecf20Sopenharmony_ci 73508c2ecf20Sopenharmony_ci conf->min_nr_stripes = NR_STRIPES; 73518c2ecf20Sopenharmony_ci if (mddev->reshape_position != MaxSector) { 73528c2ecf20Sopenharmony_ci int stripes = max_t(int, 73538c2ecf20Sopenharmony_ci ((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4, 73548c2ecf20Sopenharmony_ci ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4); 73558c2ecf20Sopenharmony_ci conf->min_nr_stripes = max(NR_STRIPES, stripes); 73568c2ecf20Sopenharmony_ci if (conf->min_nr_stripes != NR_STRIPES) 73578c2ecf20Sopenharmony_ci pr_info("md/raid:%s: force stripe size %d for reshape\n", 73588c2ecf20Sopenharmony_ci mdname(mddev), conf->min_nr_stripes); 73598c2ecf20Sopenharmony_ci } 73608c2ecf20Sopenharmony_ci memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + 73618c2ecf20Sopenharmony_ci max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; 73628c2ecf20Sopenharmony_ci atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); 73638c2ecf20Sopenharmony_ci if (grow_stripes(conf, conf->min_nr_stripes)) { 73648c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: couldn't allocate %dkB for buffers\n", 73658c2ecf20Sopenharmony_ci mdname(mddev), memory); 73668c2ecf20Sopenharmony_ci goto abort; 73678c2ecf20Sopenharmony_ci } else 73688c2ecf20Sopenharmony_ci pr_debug("md/raid:%s: allocated %dkB\n", mdname(mddev), memory); 73698c2ecf20Sopenharmony_ci /* 73708c2ecf20Sopenharmony_ci * Losing a stripe head costs more than the time to refill it, 73718c2ecf20Sopenharmony_ci * it reduces the queue depth and so can hurt throughput. 73728c2ecf20Sopenharmony_ci * So set it rather large, scaled by number of devices. 73738c2ecf20Sopenharmony_ci */ 73748c2ecf20Sopenharmony_ci conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4; 73758c2ecf20Sopenharmony_ci conf->shrinker.scan_objects = raid5_cache_scan; 73768c2ecf20Sopenharmony_ci conf->shrinker.count_objects = raid5_cache_count; 73778c2ecf20Sopenharmony_ci conf->shrinker.batch = 128; 73788c2ecf20Sopenharmony_ci conf->shrinker.flags = 0; 73798c2ecf20Sopenharmony_ci if (register_shrinker(&conf->shrinker)) { 73808c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: couldn't register shrinker.\n", 73818c2ecf20Sopenharmony_ci mdname(mddev)); 73828c2ecf20Sopenharmony_ci goto abort; 73838c2ecf20Sopenharmony_ci } 73848c2ecf20Sopenharmony_ci 73858c2ecf20Sopenharmony_ci sprintf(pers_name, "raid%d", mddev->new_level); 73868c2ecf20Sopenharmony_ci conf->thread = md_register_thread(raid5d, mddev, pers_name); 73878c2ecf20Sopenharmony_ci if (!conf->thread) { 73888c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: couldn't allocate thread.\n", 73898c2ecf20Sopenharmony_ci mdname(mddev)); 73908c2ecf20Sopenharmony_ci goto abort; 73918c2ecf20Sopenharmony_ci } 73928c2ecf20Sopenharmony_ci 73938c2ecf20Sopenharmony_ci return conf; 73948c2ecf20Sopenharmony_ci 73958c2ecf20Sopenharmony_ci abort: 73968c2ecf20Sopenharmony_ci if (conf) { 73978c2ecf20Sopenharmony_ci free_conf(conf); 73988c2ecf20Sopenharmony_ci return ERR_PTR(-EIO); 73998c2ecf20Sopenharmony_ci } else 74008c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 74018c2ecf20Sopenharmony_ci} 74028c2ecf20Sopenharmony_ci 74038c2ecf20Sopenharmony_cistatic int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded) 74048c2ecf20Sopenharmony_ci{ 74058c2ecf20Sopenharmony_ci switch (algo) { 74068c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_0: 74078c2ecf20Sopenharmony_ci if (raid_disk < max_degraded) 74088c2ecf20Sopenharmony_ci return 1; 74098c2ecf20Sopenharmony_ci break; 74108c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_N: 74118c2ecf20Sopenharmony_ci if (raid_disk >= raid_disks - max_degraded) 74128c2ecf20Sopenharmony_ci return 1; 74138c2ecf20Sopenharmony_ci break; 74148c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_0_6: 74158c2ecf20Sopenharmony_ci if (raid_disk == 0 || 74168c2ecf20Sopenharmony_ci raid_disk == raid_disks - 1) 74178c2ecf20Sopenharmony_ci return 1; 74188c2ecf20Sopenharmony_ci break; 74198c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_ASYMMETRIC_6: 74208c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_ASYMMETRIC_6: 74218c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_SYMMETRIC_6: 74228c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_SYMMETRIC_6: 74238c2ecf20Sopenharmony_ci if (raid_disk == raid_disks - 1) 74248c2ecf20Sopenharmony_ci return 1; 74258c2ecf20Sopenharmony_ci } 74268c2ecf20Sopenharmony_ci return 0; 74278c2ecf20Sopenharmony_ci} 74288c2ecf20Sopenharmony_ci 74298c2ecf20Sopenharmony_cistatic void raid5_set_io_opt(struct r5conf *conf) 74308c2ecf20Sopenharmony_ci{ 74318c2ecf20Sopenharmony_ci blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) * 74328c2ecf20Sopenharmony_ci (conf->raid_disks - conf->max_degraded)); 74338c2ecf20Sopenharmony_ci} 74348c2ecf20Sopenharmony_ci 74358c2ecf20Sopenharmony_cistatic int raid5_run(struct mddev *mddev) 74368c2ecf20Sopenharmony_ci{ 74378c2ecf20Sopenharmony_ci struct r5conf *conf; 74388c2ecf20Sopenharmony_ci int working_disks = 0; 74398c2ecf20Sopenharmony_ci int dirty_parity_disks = 0; 74408c2ecf20Sopenharmony_ci struct md_rdev *rdev; 74418c2ecf20Sopenharmony_ci struct md_rdev *journal_dev = NULL; 74428c2ecf20Sopenharmony_ci sector_t reshape_offset = 0; 74438c2ecf20Sopenharmony_ci int i; 74448c2ecf20Sopenharmony_ci long long min_offset_diff = 0; 74458c2ecf20Sopenharmony_ci int first = 1; 74468c2ecf20Sopenharmony_ci 74478c2ecf20Sopenharmony_ci if (mddev_init_writes_pending(mddev) < 0) 74488c2ecf20Sopenharmony_ci return -ENOMEM; 74498c2ecf20Sopenharmony_ci 74508c2ecf20Sopenharmony_ci if (mddev->recovery_cp != MaxSector) 74518c2ecf20Sopenharmony_ci pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", 74528c2ecf20Sopenharmony_ci mdname(mddev)); 74538c2ecf20Sopenharmony_ci 74548c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) { 74558c2ecf20Sopenharmony_ci long long diff; 74568c2ecf20Sopenharmony_ci 74578c2ecf20Sopenharmony_ci if (test_bit(Journal, &rdev->flags)) { 74588c2ecf20Sopenharmony_ci journal_dev = rdev; 74598c2ecf20Sopenharmony_ci continue; 74608c2ecf20Sopenharmony_ci } 74618c2ecf20Sopenharmony_ci if (rdev->raid_disk < 0) 74628c2ecf20Sopenharmony_ci continue; 74638c2ecf20Sopenharmony_ci diff = (rdev->new_data_offset - rdev->data_offset); 74648c2ecf20Sopenharmony_ci if (first) { 74658c2ecf20Sopenharmony_ci min_offset_diff = diff; 74668c2ecf20Sopenharmony_ci first = 0; 74678c2ecf20Sopenharmony_ci } else if (mddev->reshape_backwards && 74688c2ecf20Sopenharmony_ci diff < min_offset_diff) 74698c2ecf20Sopenharmony_ci min_offset_diff = diff; 74708c2ecf20Sopenharmony_ci else if (!mddev->reshape_backwards && 74718c2ecf20Sopenharmony_ci diff > min_offset_diff) 74728c2ecf20Sopenharmony_ci min_offset_diff = diff; 74738c2ecf20Sopenharmony_ci } 74748c2ecf20Sopenharmony_ci 74758c2ecf20Sopenharmony_ci if ((test_bit(MD_HAS_JOURNAL, &mddev->flags) || journal_dev) && 74768c2ecf20Sopenharmony_ci (mddev->bitmap_info.offset || mddev->bitmap_info.file)) { 74778c2ecf20Sopenharmony_ci pr_notice("md/raid:%s: array cannot have both journal and bitmap\n", 74788c2ecf20Sopenharmony_ci mdname(mddev)); 74798c2ecf20Sopenharmony_ci return -EINVAL; 74808c2ecf20Sopenharmony_ci } 74818c2ecf20Sopenharmony_ci 74828c2ecf20Sopenharmony_ci if (mddev->reshape_position != MaxSector) { 74838c2ecf20Sopenharmony_ci /* Check that we can continue the reshape. 74848c2ecf20Sopenharmony_ci * Difficulties arise if the stripe we would write to 74858c2ecf20Sopenharmony_ci * next is at or after the stripe we would read from next. 74868c2ecf20Sopenharmony_ci * For a reshape that changes the number of devices, this 74878c2ecf20Sopenharmony_ci * is only possible for a very short time, and mdadm makes 74888c2ecf20Sopenharmony_ci * sure that time appears to have past before assembling 74898c2ecf20Sopenharmony_ci * the array. So we fail if that time hasn't passed. 74908c2ecf20Sopenharmony_ci * For a reshape that keeps the number of devices the same 74918c2ecf20Sopenharmony_ci * mdadm must be monitoring the reshape can keeping the 74928c2ecf20Sopenharmony_ci * critical areas read-only and backed up. It will start 74938c2ecf20Sopenharmony_ci * the array in read-only mode, so we check for that. 74948c2ecf20Sopenharmony_ci */ 74958c2ecf20Sopenharmony_ci sector_t here_new, here_old; 74968c2ecf20Sopenharmony_ci int old_disks; 74978c2ecf20Sopenharmony_ci int max_degraded = (mddev->level == 6 ? 2 : 1); 74988c2ecf20Sopenharmony_ci int chunk_sectors; 74998c2ecf20Sopenharmony_ci int new_data_disks; 75008c2ecf20Sopenharmony_ci 75018c2ecf20Sopenharmony_ci if (journal_dev) { 75028c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n", 75038c2ecf20Sopenharmony_ci mdname(mddev)); 75048c2ecf20Sopenharmony_ci return -EINVAL; 75058c2ecf20Sopenharmony_ci } 75068c2ecf20Sopenharmony_ci 75078c2ecf20Sopenharmony_ci if (mddev->new_level != mddev->level) { 75088c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: unsupported reshape required - aborting.\n", 75098c2ecf20Sopenharmony_ci mdname(mddev)); 75108c2ecf20Sopenharmony_ci return -EINVAL; 75118c2ecf20Sopenharmony_ci } 75128c2ecf20Sopenharmony_ci old_disks = mddev->raid_disks - mddev->delta_disks; 75138c2ecf20Sopenharmony_ci /* reshape_position must be on a new-stripe boundary, and one 75148c2ecf20Sopenharmony_ci * further up in new geometry must map after here in old 75158c2ecf20Sopenharmony_ci * geometry. 75168c2ecf20Sopenharmony_ci * If the chunk sizes are different, then as we perform reshape 75178c2ecf20Sopenharmony_ci * in units of the largest of the two, reshape_position needs 75188c2ecf20Sopenharmony_ci * be a multiple of the largest chunk size times new data disks. 75198c2ecf20Sopenharmony_ci */ 75208c2ecf20Sopenharmony_ci here_new = mddev->reshape_position; 75218c2ecf20Sopenharmony_ci chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors); 75228c2ecf20Sopenharmony_ci new_data_disks = mddev->raid_disks - max_degraded; 75238c2ecf20Sopenharmony_ci if (sector_div(here_new, chunk_sectors * new_data_disks)) { 75248c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n", 75258c2ecf20Sopenharmony_ci mdname(mddev)); 75268c2ecf20Sopenharmony_ci return -EINVAL; 75278c2ecf20Sopenharmony_ci } 75288c2ecf20Sopenharmony_ci reshape_offset = here_new * chunk_sectors; 75298c2ecf20Sopenharmony_ci /* here_new is the stripe we will write to */ 75308c2ecf20Sopenharmony_ci here_old = mddev->reshape_position; 75318c2ecf20Sopenharmony_ci sector_div(here_old, chunk_sectors * (old_disks-max_degraded)); 75328c2ecf20Sopenharmony_ci /* here_old is the first stripe that we might need to read 75338c2ecf20Sopenharmony_ci * from */ 75348c2ecf20Sopenharmony_ci if (mddev->delta_disks == 0) { 75358c2ecf20Sopenharmony_ci /* We cannot be sure it is safe to start an in-place 75368c2ecf20Sopenharmony_ci * reshape. It is only safe if user-space is monitoring 75378c2ecf20Sopenharmony_ci * and taking constant backups. 75388c2ecf20Sopenharmony_ci * mdadm always starts a situation like this in 75398c2ecf20Sopenharmony_ci * readonly mode so it can take control before 75408c2ecf20Sopenharmony_ci * allowing any writes. So just check for that. 75418c2ecf20Sopenharmony_ci */ 75428c2ecf20Sopenharmony_ci if (abs(min_offset_diff) >= mddev->chunk_sectors && 75438c2ecf20Sopenharmony_ci abs(min_offset_diff) >= mddev->new_chunk_sectors) 75448c2ecf20Sopenharmony_ci /* not really in-place - so OK */; 75458c2ecf20Sopenharmony_ci else if (mddev->ro == 0) { 75468c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n", 75478c2ecf20Sopenharmony_ci mdname(mddev)); 75488c2ecf20Sopenharmony_ci return -EINVAL; 75498c2ecf20Sopenharmony_ci } 75508c2ecf20Sopenharmony_ci } else if (mddev->reshape_backwards 75518c2ecf20Sopenharmony_ci ? (here_new * chunk_sectors + min_offset_diff <= 75528c2ecf20Sopenharmony_ci here_old * chunk_sectors) 75538c2ecf20Sopenharmony_ci : (here_new * chunk_sectors >= 75548c2ecf20Sopenharmony_ci here_old * chunk_sectors + (-min_offset_diff))) { 75558c2ecf20Sopenharmony_ci /* Reading from the same stripe as writing to - bad */ 75568c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n", 75578c2ecf20Sopenharmony_ci mdname(mddev)); 75588c2ecf20Sopenharmony_ci return -EINVAL; 75598c2ecf20Sopenharmony_ci } 75608c2ecf20Sopenharmony_ci pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev)); 75618c2ecf20Sopenharmony_ci /* OK, we should be able to continue; */ 75628c2ecf20Sopenharmony_ci } else { 75638c2ecf20Sopenharmony_ci BUG_ON(mddev->level != mddev->new_level); 75648c2ecf20Sopenharmony_ci BUG_ON(mddev->layout != mddev->new_layout); 75658c2ecf20Sopenharmony_ci BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors); 75668c2ecf20Sopenharmony_ci BUG_ON(mddev->delta_disks != 0); 75678c2ecf20Sopenharmony_ci } 75688c2ecf20Sopenharmony_ci 75698c2ecf20Sopenharmony_ci if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && 75708c2ecf20Sopenharmony_ci test_bit(MD_HAS_PPL, &mddev->flags)) { 75718c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n", 75728c2ecf20Sopenharmony_ci mdname(mddev)); 75738c2ecf20Sopenharmony_ci clear_bit(MD_HAS_PPL, &mddev->flags); 75748c2ecf20Sopenharmony_ci clear_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags); 75758c2ecf20Sopenharmony_ci } 75768c2ecf20Sopenharmony_ci 75778c2ecf20Sopenharmony_ci if (mddev->private == NULL) 75788c2ecf20Sopenharmony_ci conf = setup_conf(mddev); 75798c2ecf20Sopenharmony_ci else 75808c2ecf20Sopenharmony_ci conf = mddev->private; 75818c2ecf20Sopenharmony_ci 75828c2ecf20Sopenharmony_ci if (IS_ERR(conf)) 75838c2ecf20Sopenharmony_ci return PTR_ERR(conf); 75848c2ecf20Sopenharmony_ci 75858c2ecf20Sopenharmony_ci if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 75868c2ecf20Sopenharmony_ci if (!journal_dev) { 75878c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: journal disk is missing, force array readonly\n", 75888c2ecf20Sopenharmony_ci mdname(mddev)); 75898c2ecf20Sopenharmony_ci mddev->ro = 1; 75908c2ecf20Sopenharmony_ci set_disk_ro(mddev->gendisk, 1); 75918c2ecf20Sopenharmony_ci } else if (mddev->recovery_cp == MaxSector) 75928c2ecf20Sopenharmony_ci set_bit(MD_JOURNAL_CLEAN, &mddev->flags); 75938c2ecf20Sopenharmony_ci } 75948c2ecf20Sopenharmony_ci 75958c2ecf20Sopenharmony_ci conf->min_offset_diff = min_offset_diff; 75968c2ecf20Sopenharmony_ci mddev->thread = conf->thread; 75978c2ecf20Sopenharmony_ci conf->thread = NULL; 75988c2ecf20Sopenharmony_ci mddev->private = conf; 75998c2ecf20Sopenharmony_ci 76008c2ecf20Sopenharmony_ci for (i = 0; i < conf->raid_disks && conf->previous_raid_disks; 76018c2ecf20Sopenharmony_ci i++) { 76028c2ecf20Sopenharmony_ci rdev = conf->disks[i].rdev; 76038c2ecf20Sopenharmony_ci if (!rdev && conf->disks[i].replacement) { 76048c2ecf20Sopenharmony_ci /* The replacement is all we have yet */ 76058c2ecf20Sopenharmony_ci rdev = conf->disks[i].replacement; 76068c2ecf20Sopenharmony_ci conf->disks[i].replacement = NULL; 76078c2ecf20Sopenharmony_ci clear_bit(Replacement, &rdev->flags); 76088c2ecf20Sopenharmony_ci conf->disks[i].rdev = rdev; 76098c2ecf20Sopenharmony_ci } 76108c2ecf20Sopenharmony_ci if (!rdev) 76118c2ecf20Sopenharmony_ci continue; 76128c2ecf20Sopenharmony_ci if (conf->disks[i].replacement && 76138c2ecf20Sopenharmony_ci conf->reshape_progress != MaxSector) { 76148c2ecf20Sopenharmony_ci /* replacements and reshape simply do not mix. */ 76158c2ecf20Sopenharmony_ci pr_warn("md: cannot handle concurrent replacement and reshape.\n"); 76168c2ecf20Sopenharmony_ci goto abort; 76178c2ecf20Sopenharmony_ci } 76188c2ecf20Sopenharmony_ci if (test_bit(In_sync, &rdev->flags)) { 76198c2ecf20Sopenharmony_ci working_disks++; 76208c2ecf20Sopenharmony_ci continue; 76218c2ecf20Sopenharmony_ci } 76228c2ecf20Sopenharmony_ci /* This disc is not fully in-sync. However if it 76238c2ecf20Sopenharmony_ci * just stored parity (beyond the recovery_offset), 76248c2ecf20Sopenharmony_ci * when we don't need to be concerned about the 76258c2ecf20Sopenharmony_ci * array being dirty. 76268c2ecf20Sopenharmony_ci * When reshape goes 'backwards', we never have 76278c2ecf20Sopenharmony_ci * partially completed devices, so we only need 76288c2ecf20Sopenharmony_ci * to worry about reshape going forwards. 76298c2ecf20Sopenharmony_ci */ 76308c2ecf20Sopenharmony_ci /* Hack because v0.91 doesn't store recovery_offset properly. */ 76318c2ecf20Sopenharmony_ci if (mddev->major_version == 0 && 76328c2ecf20Sopenharmony_ci mddev->minor_version > 90) 76338c2ecf20Sopenharmony_ci rdev->recovery_offset = reshape_offset; 76348c2ecf20Sopenharmony_ci 76358c2ecf20Sopenharmony_ci if (rdev->recovery_offset < reshape_offset) { 76368c2ecf20Sopenharmony_ci /* We need to check old and new layout */ 76378c2ecf20Sopenharmony_ci if (!only_parity(rdev->raid_disk, 76388c2ecf20Sopenharmony_ci conf->algorithm, 76398c2ecf20Sopenharmony_ci conf->raid_disks, 76408c2ecf20Sopenharmony_ci conf->max_degraded)) 76418c2ecf20Sopenharmony_ci continue; 76428c2ecf20Sopenharmony_ci } 76438c2ecf20Sopenharmony_ci if (!only_parity(rdev->raid_disk, 76448c2ecf20Sopenharmony_ci conf->prev_algo, 76458c2ecf20Sopenharmony_ci conf->previous_raid_disks, 76468c2ecf20Sopenharmony_ci conf->max_degraded)) 76478c2ecf20Sopenharmony_ci continue; 76488c2ecf20Sopenharmony_ci dirty_parity_disks++; 76498c2ecf20Sopenharmony_ci } 76508c2ecf20Sopenharmony_ci 76518c2ecf20Sopenharmony_ci /* 76528c2ecf20Sopenharmony_ci * 0 for a fully functional array, 1 or 2 for a degraded array. 76538c2ecf20Sopenharmony_ci */ 76548c2ecf20Sopenharmony_ci mddev->degraded = raid5_calc_degraded(conf); 76558c2ecf20Sopenharmony_ci 76568c2ecf20Sopenharmony_ci if (has_failed(conf)) { 76578c2ecf20Sopenharmony_ci pr_crit("md/raid:%s: not enough operational devices (%d/%d failed)\n", 76588c2ecf20Sopenharmony_ci mdname(mddev), mddev->degraded, conf->raid_disks); 76598c2ecf20Sopenharmony_ci goto abort; 76608c2ecf20Sopenharmony_ci } 76618c2ecf20Sopenharmony_ci 76628c2ecf20Sopenharmony_ci /* device size must be a multiple of chunk size */ 76638c2ecf20Sopenharmony_ci mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); 76648c2ecf20Sopenharmony_ci mddev->resync_max_sectors = mddev->dev_sectors; 76658c2ecf20Sopenharmony_ci 76668c2ecf20Sopenharmony_ci if (mddev->degraded > dirty_parity_disks && 76678c2ecf20Sopenharmony_ci mddev->recovery_cp != MaxSector) { 76688c2ecf20Sopenharmony_ci if (test_bit(MD_HAS_PPL, &mddev->flags)) 76698c2ecf20Sopenharmony_ci pr_crit("md/raid:%s: starting dirty degraded array with PPL.\n", 76708c2ecf20Sopenharmony_ci mdname(mddev)); 76718c2ecf20Sopenharmony_ci else if (mddev->ok_start_degraded) 76728c2ecf20Sopenharmony_ci pr_crit("md/raid:%s: starting dirty degraded array - data corruption possible.\n", 76738c2ecf20Sopenharmony_ci mdname(mddev)); 76748c2ecf20Sopenharmony_ci else { 76758c2ecf20Sopenharmony_ci pr_crit("md/raid:%s: cannot start dirty degraded array.\n", 76768c2ecf20Sopenharmony_ci mdname(mddev)); 76778c2ecf20Sopenharmony_ci goto abort; 76788c2ecf20Sopenharmony_ci } 76798c2ecf20Sopenharmony_ci } 76808c2ecf20Sopenharmony_ci 76818c2ecf20Sopenharmony_ci pr_info("md/raid:%s: raid level %d active with %d out of %d devices, algorithm %d\n", 76828c2ecf20Sopenharmony_ci mdname(mddev), conf->level, 76838c2ecf20Sopenharmony_ci mddev->raid_disks-mddev->degraded, mddev->raid_disks, 76848c2ecf20Sopenharmony_ci mddev->new_layout); 76858c2ecf20Sopenharmony_ci 76868c2ecf20Sopenharmony_ci print_raid5_conf(conf); 76878c2ecf20Sopenharmony_ci 76888c2ecf20Sopenharmony_ci if (conf->reshape_progress != MaxSector) { 76898c2ecf20Sopenharmony_ci conf->reshape_safe = conf->reshape_progress; 76908c2ecf20Sopenharmony_ci atomic_set(&conf->reshape_stripes, 0); 76918c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 76928c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 76938c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 76948c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 76958c2ecf20Sopenharmony_ci mddev->sync_thread = md_register_thread(md_do_sync, mddev, 76968c2ecf20Sopenharmony_ci "reshape"); 76978c2ecf20Sopenharmony_ci if (!mddev->sync_thread) 76988c2ecf20Sopenharmony_ci goto abort; 76998c2ecf20Sopenharmony_ci } 77008c2ecf20Sopenharmony_ci 77018c2ecf20Sopenharmony_ci /* Ok, everything is just fine now */ 77028c2ecf20Sopenharmony_ci if (mddev->to_remove == &raid5_attrs_group) 77038c2ecf20Sopenharmony_ci mddev->to_remove = NULL; 77048c2ecf20Sopenharmony_ci else if (mddev->kobj.sd && 77058c2ecf20Sopenharmony_ci sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) 77068c2ecf20Sopenharmony_ci pr_warn("raid5: failed to create sysfs attributes for %s\n", 77078c2ecf20Sopenharmony_ci mdname(mddev)); 77088c2ecf20Sopenharmony_ci md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 77098c2ecf20Sopenharmony_ci 77108c2ecf20Sopenharmony_ci if (mddev->queue) { 77118c2ecf20Sopenharmony_ci int chunk_size; 77128c2ecf20Sopenharmony_ci /* read-ahead size must cover two whole stripes, which 77138c2ecf20Sopenharmony_ci * is 2 * (datadisks) * chunksize where 'n' is the 77148c2ecf20Sopenharmony_ci * number of raid devices 77158c2ecf20Sopenharmony_ci */ 77168c2ecf20Sopenharmony_ci int data_disks = conf->previous_raid_disks - conf->max_degraded; 77178c2ecf20Sopenharmony_ci int stripe = data_disks * 77188c2ecf20Sopenharmony_ci ((mddev->chunk_sectors << 9) / PAGE_SIZE); 77198c2ecf20Sopenharmony_ci 77208c2ecf20Sopenharmony_ci chunk_size = mddev->chunk_sectors << 9; 77218c2ecf20Sopenharmony_ci blk_queue_io_min(mddev->queue, chunk_size); 77228c2ecf20Sopenharmony_ci raid5_set_io_opt(conf); 77238c2ecf20Sopenharmony_ci mddev->queue->limits.raid_partial_stripes_expensive = 1; 77248c2ecf20Sopenharmony_ci /* 77258c2ecf20Sopenharmony_ci * We can only discard a whole stripe. It doesn't make sense to 77268c2ecf20Sopenharmony_ci * discard data disk but write parity disk 77278c2ecf20Sopenharmony_ci */ 77288c2ecf20Sopenharmony_ci stripe = stripe * PAGE_SIZE; 77298c2ecf20Sopenharmony_ci /* Round up to power of 2, as discard handling 77308c2ecf20Sopenharmony_ci * currently assumes that */ 77318c2ecf20Sopenharmony_ci while ((stripe-1) & stripe) 77328c2ecf20Sopenharmony_ci stripe = (stripe | (stripe-1)) + 1; 77338c2ecf20Sopenharmony_ci mddev->queue->limits.discard_alignment = stripe; 77348c2ecf20Sopenharmony_ci mddev->queue->limits.discard_granularity = stripe; 77358c2ecf20Sopenharmony_ci 77368c2ecf20Sopenharmony_ci blk_queue_max_write_same_sectors(mddev->queue, 0); 77378c2ecf20Sopenharmony_ci blk_queue_max_write_zeroes_sectors(mddev->queue, 0); 77388c2ecf20Sopenharmony_ci 77398c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) { 77408c2ecf20Sopenharmony_ci disk_stack_limits(mddev->gendisk, rdev->bdev, 77418c2ecf20Sopenharmony_ci rdev->data_offset << 9); 77428c2ecf20Sopenharmony_ci disk_stack_limits(mddev->gendisk, rdev->bdev, 77438c2ecf20Sopenharmony_ci rdev->new_data_offset << 9); 77448c2ecf20Sopenharmony_ci } 77458c2ecf20Sopenharmony_ci 77468c2ecf20Sopenharmony_ci /* 77478c2ecf20Sopenharmony_ci * zeroing is required, otherwise data 77488c2ecf20Sopenharmony_ci * could be lost. Consider a scenario: discard a stripe 77498c2ecf20Sopenharmony_ci * (the stripe could be inconsistent if 77508c2ecf20Sopenharmony_ci * discard_zeroes_data is 0); write one disk of the 77518c2ecf20Sopenharmony_ci * stripe (the stripe could be inconsistent again 77528c2ecf20Sopenharmony_ci * depending on which disks are used to calculate 77538c2ecf20Sopenharmony_ci * parity); the disk is broken; The stripe data of this 77548c2ecf20Sopenharmony_ci * disk is lost. 77558c2ecf20Sopenharmony_ci * 77568c2ecf20Sopenharmony_ci * We only allow DISCARD if the sysadmin has confirmed that 77578c2ecf20Sopenharmony_ci * only safe devices are in use by setting a module parameter. 77588c2ecf20Sopenharmony_ci * A better idea might be to turn DISCARD into WRITE_ZEROES 77598c2ecf20Sopenharmony_ci * requests, as that is required to be safe. 77608c2ecf20Sopenharmony_ci */ 77618c2ecf20Sopenharmony_ci if (devices_handle_discard_safely && 77628c2ecf20Sopenharmony_ci mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && 77638c2ecf20Sopenharmony_ci mddev->queue->limits.discard_granularity >= stripe) 77648c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_DISCARD, 77658c2ecf20Sopenharmony_ci mddev->queue); 77668c2ecf20Sopenharmony_ci else 77678c2ecf20Sopenharmony_ci blk_queue_flag_clear(QUEUE_FLAG_DISCARD, 77688c2ecf20Sopenharmony_ci mddev->queue); 77698c2ecf20Sopenharmony_ci 77708c2ecf20Sopenharmony_ci blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); 77718c2ecf20Sopenharmony_ci } 77728c2ecf20Sopenharmony_ci 77738c2ecf20Sopenharmony_ci if (log_init(conf, journal_dev, raid5_has_ppl(conf))) 77748c2ecf20Sopenharmony_ci goto abort; 77758c2ecf20Sopenharmony_ci 77768c2ecf20Sopenharmony_ci return 0; 77778c2ecf20Sopenharmony_ciabort: 77788c2ecf20Sopenharmony_ci md_unregister_thread(&mddev->thread); 77798c2ecf20Sopenharmony_ci print_raid5_conf(conf); 77808c2ecf20Sopenharmony_ci free_conf(conf); 77818c2ecf20Sopenharmony_ci mddev->private = NULL; 77828c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev)); 77838c2ecf20Sopenharmony_ci return -EIO; 77848c2ecf20Sopenharmony_ci} 77858c2ecf20Sopenharmony_ci 77868c2ecf20Sopenharmony_cistatic void raid5_free(struct mddev *mddev, void *priv) 77878c2ecf20Sopenharmony_ci{ 77888c2ecf20Sopenharmony_ci struct r5conf *conf = priv; 77898c2ecf20Sopenharmony_ci 77908c2ecf20Sopenharmony_ci free_conf(conf); 77918c2ecf20Sopenharmony_ci mddev->to_remove = &raid5_attrs_group; 77928c2ecf20Sopenharmony_ci} 77938c2ecf20Sopenharmony_ci 77948c2ecf20Sopenharmony_cistatic void raid5_status(struct seq_file *seq, struct mddev *mddev) 77958c2ecf20Sopenharmony_ci{ 77968c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 77978c2ecf20Sopenharmony_ci int i; 77988c2ecf20Sopenharmony_ci 77998c2ecf20Sopenharmony_ci seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level, 78008c2ecf20Sopenharmony_ci conf->chunk_sectors / 2, mddev->layout); 78018c2ecf20Sopenharmony_ci seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); 78028c2ecf20Sopenharmony_ci rcu_read_lock(); 78038c2ecf20Sopenharmony_ci for (i = 0; i < conf->raid_disks; i++) { 78048c2ecf20Sopenharmony_ci struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); 78058c2ecf20Sopenharmony_ci seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); 78068c2ecf20Sopenharmony_ci } 78078c2ecf20Sopenharmony_ci rcu_read_unlock(); 78088c2ecf20Sopenharmony_ci seq_printf (seq, "]"); 78098c2ecf20Sopenharmony_ci} 78108c2ecf20Sopenharmony_ci 78118c2ecf20Sopenharmony_cistatic void print_raid5_conf (struct r5conf *conf) 78128c2ecf20Sopenharmony_ci{ 78138c2ecf20Sopenharmony_ci int i; 78148c2ecf20Sopenharmony_ci struct disk_info *tmp; 78158c2ecf20Sopenharmony_ci 78168c2ecf20Sopenharmony_ci pr_debug("RAID conf printout:\n"); 78178c2ecf20Sopenharmony_ci if (!conf) { 78188c2ecf20Sopenharmony_ci pr_debug("(conf==NULL)\n"); 78198c2ecf20Sopenharmony_ci return; 78208c2ecf20Sopenharmony_ci } 78218c2ecf20Sopenharmony_ci pr_debug(" --- level:%d rd:%d wd:%d\n", conf->level, 78228c2ecf20Sopenharmony_ci conf->raid_disks, 78238c2ecf20Sopenharmony_ci conf->raid_disks - conf->mddev->degraded); 78248c2ecf20Sopenharmony_ci 78258c2ecf20Sopenharmony_ci for (i = 0; i < conf->raid_disks; i++) { 78268c2ecf20Sopenharmony_ci char b[BDEVNAME_SIZE]; 78278c2ecf20Sopenharmony_ci tmp = conf->disks + i; 78288c2ecf20Sopenharmony_ci if (tmp->rdev) 78298c2ecf20Sopenharmony_ci pr_debug(" disk %d, o:%d, dev:%s\n", 78308c2ecf20Sopenharmony_ci i, !test_bit(Faulty, &tmp->rdev->flags), 78318c2ecf20Sopenharmony_ci bdevname(tmp->rdev->bdev, b)); 78328c2ecf20Sopenharmony_ci } 78338c2ecf20Sopenharmony_ci} 78348c2ecf20Sopenharmony_ci 78358c2ecf20Sopenharmony_cistatic int raid5_spare_active(struct mddev *mddev) 78368c2ecf20Sopenharmony_ci{ 78378c2ecf20Sopenharmony_ci int i; 78388c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 78398c2ecf20Sopenharmony_ci struct disk_info *tmp; 78408c2ecf20Sopenharmony_ci int count = 0; 78418c2ecf20Sopenharmony_ci unsigned long flags; 78428c2ecf20Sopenharmony_ci 78438c2ecf20Sopenharmony_ci for (i = 0; i < conf->raid_disks; i++) { 78448c2ecf20Sopenharmony_ci tmp = conf->disks + i; 78458c2ecf20Sopenharmony_ci if (tmp->replacement 78468c2ecf20Sopenharmony_ci && tmp->replacement->recovery_offset == MaxSector 78478c2ecf20Sopenharmony_ci && !test_bit(Faulty, &tmp->replacement->flags) 78488c2ecf20Sopenharmony_ci && !test_and_set_bit(In_sync, &tmp->replacement->flags)) { 78498c2ecf20Sopenharmony_ci /* Replacement has just become active. */ 78508c2ecf20Sopenharmony_ci if (!tmp->rdev 78518c2ecf20Sopenharmony_ci || !test_and_clear_bit(In_sync, &tmp->rdev->flags)) 78528c2ecf20Sopenharmony_ci count++; 78538c2ecf20Sopenharmony_ci if (tmp->rdev) { 78548c2ecf20Sopenharmony_ci /* Replaced device not technically faulty, 78558c2ecf20Sopenharmony_ci * but we need to be sure it gets removed 78568c2ecf20Sopenharmony_ci * and never re-added. 78578c2ecf20Sopenharmony_ci */ 78588c2ecf20Sopenharmony_ci set_bit(Faulty, &tmp->rdev->flags); 78598c2ecf20Sopenharmony_ci sysfs_notify_dirent_safe( 78608c2ecf20Sopenharmony_ci tmp->rdev->sysfs_state); 78618c2ecf20Sopenharmony_ci } 78628c2ecf20Sopenharmony_ci sysfs_notify_dirent_safe(tmp->replacement->sysfs_state); 78638c2ecf20Sopenharmony_ci } else if (tmp->rdev 78648c2ecf20Sopenharmony_ci && tmp->rdev->recovery_offset == MaxSector 78658c2ecf20Sopenharmony_ci && !test_bit(Faulty, &tmp->rdev->flags) 78668c2ecf20Sopenharmony_ci && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { 78678c2ecf20Sopenharmony_ci count++; 78688c2ecf20Sopenharmony_ci sysfs_notify_dirent_safe(tmp->rdev->sysfs_state); 78698c2ecf20Sopenharmony_ci } 78708c2ecf20Sopenharmony_ci } 78718c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 78728c2ecf20Sopenharmony_ci mddev->degraded = raid5_calc_degraded(conf); 78738c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 78748c2ecf20Sopenharmony_ci print_raid5_conf(conf); 78758c2ecf20Sopenharmony_ci return count; 78768c2ecf20Sopenharmony_ci} 78778c2ecf20Sopenharmony_ci 78788c2ecf20Sopenharmony_cistatic int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) 78798c2ecf20Sopenharmony_ci{ 78808c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 78818c2ecf20Sopenharmony_ci int err = 0; 78828c2ecf20Sopenharmony_ci int number = rdev->raid_disk; 78838c2ecf20Sopenharmony_ci struct md_rdev **rdevp; 78848c2ecf20Sopenharmony_ci struct disk_info *p = conf->disks + number; 78858c2ecf20Sopenharmony_ci 78868c2ecf20Sopenharmony_ci print_raid5_conf(conf); 78878c2ecf20Sopenharmony_ci if (test_bit(Journal, &rdev->flags) && conf->log) { 78888c2ecf20Sopenharmony_ci /* 78898c2ecf20Sopenharmony_ci * we can't wait pending write here, as this is called in 78908c2ecf20Sopenharmony_ci * raid5d, wait will deadlock. 78918c2ecf20Sopenharmony_ci * neilb: there is no locking about new writes here, 78928c2ecf20Sopenharmony_ci * so this cannot be safe. 78938c2ecf20Sopenharmony_ci */ 78948c2ecf20Sopenharmony_ci if (atomic_read(&conf->active_stripes) || 78958c2ecf20Sopenharmony_ci atomic_read(&conf->r5c_cached_full_stripes) || 78968c2ecf20Sopenharmony_ci atomic_read(&conf->r5c_cached_partial_stripes)) { 78978c2ecf20Sopenharmony_ci return -EBUSY; 78988c2ecf20Sopenharmony_ci } 78998c2ecf20Sopenharmony_ci log_exit(conf); 79008c2ecf20Sopenharmony_ci return 0; 79018c2ecf20Sopenharmony_ci } 79028c2ecf20Sopenharmony_ci if (rdev == p->rdev) 79038c2ecf20Sopenharmony_ci rdevp = &p->rdev; 79048c2ecf20Sopenharmony_ci else if (rdev == p->replacement) 79058c2ecf20Sopenharmony_ci rdevp = &p->replacement; 79068c2ecf20Sopenharmony_ci else 79078c2ecf20Sopenharmony_ci return 0; 79088c2ecf20Sopenharmony_ci 79098c2ecf20Sopenharmony_ci if (number >= conf->raid_disks && 79108c2ecf20Sopenharmony_ci conf->reshape_progress == MaxSector) 79118c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 79128c2ecf20Sopenharmony_ci 79138c2ecf20Sopenharmony_ci if (test_bit(In_sync, &rdev->flags) || 79148c2ecf20Sopenharmony_ci atomic_read(&rdev->nr_pending)) { 79158c2ecf20Sopenharmony_ci err = -EBUSY; 79168c2ecf20Sopenharmony_ci goto abort; 79178c2ecf20Sopenharmony_ci } 79188c2ecf20Sopenharmony_ci /* Only remove non-faulty devices if recovery 79198c2ecf20Sopenharmony_ci * isn't possible. 79208c2ecf20Sopenharmony_ci */ 79218c2ecf20Sopenharmony_ci if (!test_bit(Faulty, &rdev->flags) && 79228c2ecf20Sopenharmony_ci mddev->recovery_disabled != conf->recovery_disabled && 79238c2ecf20Sopenharmony_ci !has_failed(conf) && 79248c2ecf20Sopenharmony_ci (!p->replacement || p->replacement == rdev) && 79258c2ecf20Sopenharmony_ci number < conf->raid_disks) { 79268c2ecf20Sopenharmony_ci err = -EBUSY; 79278c2ecf20Sopenharmony_ci goto abort; 79288c2ecf20Sopenharmony_ci } 79298c2ecf20Sopenharmony_ci *rdevp = NULL; 79308c2ecf20Sopenharmony_ci if (!test_bit(RemoveSynchronized, &rdev->flags)) { 79318c2ecf20Sopenharmony_ci synchronize_rcu(); 79328c2ecf20Sopenharmony_ci if (atomic_read(&rdev->nr_pending)) { 79338c2ecf20Sopenharmony_ci /* lost the race, try later */ 79348c2ecf20Sopenharmony_ci err = -EBUSY; 79358c2ecf20Sopenharmony_ci *rdevp = rdev; 79368c2ecf20Sopenharmony_ci } 79378c2ecf20Sopenharmony_ci } 79388c2ecf20Sopenharmony_ci if (!err) { 79398c2ecf20Sopenharmony_ci err = log_modify(conf, rdev, false); 79408c2ecf20Sopenharmony_ci if (err) 79418c2ecf20Sopenharmony_ci goto abort; 79428c2ecf20Sopenharmony_ci } 79438c2ecf20Sopenharmony_ci if (p->replacement) { 79448c2ecf20Sopenharmony_ci /* We must have just cleared 'rdev' */ 79458c2ecf20Sopenharmony_ci p->rdev = p->replacement; 79468c2ecf20Sopenharmony_ci clear_bit(Replacement, &p->replacement->flags); 79478c2ecf20Sopenharmony_ci smp_mb(); /* Make sure other CPUs may see both as identical 79488c2ecf20Sopenharmony_ci * but will never see neither - if they are careful 79498c2ecf20Sopenharmony_ci */ 79508c2ecf20Sopenharmony_ci p->replacement = NULL; 79518c2ecf20Sopenharmony_ci 79528c2ecf20Sopenharmony_ci if (!err) 79538c2ecf20Sopenharmony_ci err = log_modify(conf, p->rdev, true); 79548c2ecf20Sopenharmony_ci } 79558c2ecf20Sopenharmony_ci 79568c2ecf20Sopenharmony_ci clear_bit(WantReplacement, &rdev->flags); 79578c2ecf20Sopenharmony_ciabort: 79588c2ecf20Sopenharmony_ci 79598c2ecf20Sopenharmony_ci print_raid5_conf(conf); 79608c2ecf20Sopenharmony_ci return err; 79618c2ecf20Sopenharmony_ci} 79628c2ecf20Sopenharmony_ci 79638c2ecf20Sopenharmony_cistatic int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) 79648c2ecf20Sopenharmony_ci{ 79658c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 79668c2ecf20Sopenharmony_ci int ret, err = -EEXIST; 79678c2ecf20Sopenharmony_ci int disk; 79688c2ecf20Sopenharmony_ci struct disk_info *p; 79698c2ecf20Sopenharmony_ci int first = 0; 79708c2ecf20Sopenharmony_ci int last = conf->raid_disks - 1; 79718c2ecf20Sopenharmony_ci 79728c2ecf20Sopenharmony_ci if (test_bit(Journal, &rdev->flags)) { 79738c2ecf20Sopenharmony_ci if (conf->log) 79748c2ecf20Sopenharmony_ci return -EBUSY; 79758c2ecf20Sopenharmony_ci 79768c2ecf20Sopenharmony_ci rdev->raid_disk = 0; 79778c2ecf20Sopenharmony_ci /* 79788c2ecf20Sopenharmony_ci * The array is in readonly mode if journal is missing, so no 79798c2ecf20Sopenharmony_ci * write requests running. We should be safe 79808c2ecf20Sopenharmony_ci */ 79818c2ecf20Sopenharmony_ci ret = log_init(conf, rdev, false); 79828c2ecf20Sopenharmony_ci if (ret) 79838c2ecf20Sopenharmony_ci return ret; 79848c2ecf20Sopenharmony_ci 79858c2ecf20Sopenharmony_ci ret = r5l_start(conf->log); 79868c2ecf20Sopenharmony_ci if (ret) 79878c2ecf20Sopenharmony_ci return ret; 79888c2ecf20Sopenharmony_ci 79898c2ecf20Sopenharmony_ci return 0; 79908c2ecf20Sopenharmony_ci } 79918c2ecf20Sopenharmony_ci if (mddev->recovery_disabled == conf->recovery_disabled) 79928c2ecf20Sopenharmony_ci return -EBUSY; 79938c2ecf20Sopenharmony_ci 79948c2ecf20Sopenharmony_ci if (rdev->saved_raid_disk < 0 && has_failed(conf)) 79958c2ecf20Sopenharmony_ci /* no point adding a device */ 79968c2ecf20Sopenharmony_ci return -EINVAL; 79978c2ecf20Sopenharmony_ci 79988c2ecf20Sopenharmony_ci if (rdev->raid_disk >= 0) 79998c2ecf20Sopenharmony_ci first = last = rdev->raid_disk; 80008c2ecf20Sopenharmony_ci 80018c2ecf20Sopenharmony_ci /* 80028c2ecf20Sopenharmony_ci * find the disk ... but prefer rdev->saved_raid_disk 80038c2ecf20Sopenharmony_ci * if possible. 80048c2ecf20Sopenharmony_ci */ 80058c2ecf20Sopenharmony_ci if (rdev->saved_raid_disk >= 0 && 80068c2ecf20Sopenharmony_ci rdev->saved_raid_disk >= first && 80078c2ecf20Sopenharmony_ci rdev->saved_raid_disk <= last && 80088c2ecf20Sopenharmony_ci conf->disks[rdev->saved_raid_disk].rdev == NULL) 80098c2ecf20Sopenharmony_ci first = rdev->saved_raid_disk; 80108c2ecf20Sopenharmony_ci 80118c2ecf20Sopenharmony_ci for (disk = first; disk <= last; disk++) { 80128c2ecf20Sopenharmony_ci p = conf->disks + disk; 80138c2ecf20Sopenharmony_ci if (p->rdev == NULL) { 80148c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 80158c2ecf20Sopenharmony_ci rdev->raid_disk = disk; 80168c2ecf20Sopenharmony_ci if (rdev->saved_raid_disk != disk) 80178c2ecf20Sopenharmony_ci conf->fullsync = 1; 80188c2ecf20Sopenharmony_ci rcu_assign_pointer(p->rdev, rdev); 80198c2ecf20Sopenharmony_ci 80208c2ecf20Sopenharmony_ci err = log_modify(conf, rdev, true); 80218c2ecf20Sopenharmony_ci 80228c2ecf20Sopenharmony_ci goto out; 80238c2ecf20Sopenharmony_ci } 80248c2ecf20Sopenharmony_ci } 80258c2ecf20Sopenharmony_ci for (disk = first; disk <= last; disk++) { 80268c2ecf20Sopenharmony_ci p = conf->disks + disk; 80278c2ecf20Sopenharmony_ci if (test_bit(WantReplacement, &p->rdev->flags) && 80288c2ecf20Sopenharmony_ci p->replacement == NULL) { 80298c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 80308c2ecf20Sopenharmony_ci set_bit(Replacement, &rdev->flags); 80318c2ecf20Sopenharmony_ci rdev->raid_disk = disk; 80328c2ecf20Sopenharmony_ci err = 0; 80338c2ecf20Sopenharmony_ci conf->fullsync = 1; 80348c2ecf20Sopenharmony_ci rcu_assign_pointer(p->replacement, rdev); 80358c2ecf20Sopenharmony_ci break; 80368c2ecf20Sopenharmony_ci } 80378c2ecf20Sopenharmony_ci } 80388c2ecf20Sopenharmony_ciout: 80398c2ecf20Sopenharmony_ci print_raid5_conf(conf); 80408c2ecf20Sopenharmony_ci return err; 80418c2ecf20Sopenharmony_ci} 80428c2ecf20Sopenharmony_ci 80438c2ecf20Sopenharmony_cistatic int raid5_resize(struct mddev *mddev, sector_t sectors) 80448c2ecf20Sopenharmony_ci{ 80458c2ecf20Sopenharmony_ci /* no resync is happening, and there is enough space 80468c2ecf20Sopenharmony_ci * on all devices, so we can resize. 80478c2ecf20Sopenharmony_ci * We need to make sure resync covers any new space. 80488c2ecf20Sopenharmony_ci * If the array is shrinking we should possibly wait until 80498c2ecf20Sopenharmony_ci * any io in the removed space completes, but it hardly seems 80508c2ecf20Sopenharmony_ci * worth it. 80518c2ecf20Sopenharmony_ci */ 80528c2ecf20Sopenharmony_ci sector_t newsize; 80538c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 80548c2ecf20Sopenharmony_ci 80558c2ecf20Sopenharmony_ci if (raid5_has_log(conf) || raid5_has_ppl(conf)) 80568c2ecf20Sopenharmony_ci return -EINVAL; 80578c2ecf20Sopenharmony_ci sectors &= ~((sector_t)conf->chunk_sectors - 1); 80588c2ecf20Sopenharmony_ci newsize = raid5_size(mddev, sectors, mddev->raid_disks); 80598c2ecf20Sopenharmony_ci if (mddev->external_size && 80608c2ecf20Sopenharmony_ci mddev->array_sectors > newsize) 80618c2ecf20Sopenharmony_ci return -EINVAL; 80628c2ecf20Sopenharmony_ci if (mddev->bitmap) { 80638c2ecf20Sopenharmony_ci int ret = md_bitmap_resize(mddev->bitmap, sectors, 0, 0); 80648c2ecf20Sopenharmony_ci if (ret) 80658c2ecf20Sopenharmony_ci return ret; 80668c2ecf20Sopenharmony_ci } 80678c2ecf20Sopenharmony_ci md_set_array_sectors(mddev, newsize); 80688c2ecf20Sopenharmony_ci if (sectors > mddev->dev_sectors && 80698c2ecf20Sopenharmony_ci mddev->recovery_cp > mddev->dev_sectors) { 80708c2ecf20Sopenharmony_ci mddev->recovery_cp = mddev->dev_sectors; 80718c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 80728c2ecf20Sopenharmony_ci } 80738c2ecf20Sopenharmony_ci mddev->dev_sectors = sectors; 80748c2ecf20Sopenharmony_ci mddev->resync_max_sectors = sectors; 80758c2ecf20Sopenharmony_ci return 0; 80768c2ecf20Sopenharmony_ci} 80778c2ecf20Sopenharmony_ci 80788c2ecf20Sopenharmony_cistatic int check_stripe_cache(struct mddev *mddev) 80798c2ecf20Sopenharmony_ci{ 80808c2ecf20Sopenharmony_ci /* Can only proceed if there are plenty of stripe_heads. 80818c2ecf20Sopenharmony_ci * We need a minimum of one full stripe,, and for sensible progress 80828c2ecf20Sopenharmony_ci * it is best to have about 4 times that. 80838c2ecf20Sopenharmony_ci * If we require 4 times, then the default 256 4K stripe_heads will 80848c2ecf20Sopenharmony_ci * allow for chunk sizes up to 256K, which is probably OK. 80858c2ecf20Sopenharmony_ci * If the chunk size is greater, user-space should request more 80868c2ecf20Sopenharmony_ci * stripe_heads first. 80878c2ecf20Sopenharmony_ci */ 80888c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 80898c2ecf20Sopenharmony_ci if (((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4 80908c2ecf20Sopenharmony_ci > conf->min_nr_stripes || 80918c2ecf20Sopenharmony_ci ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4 80928c2ecf20Sopenharmony_ci > conf->min_nr_stripes) { 80938c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: reshape: not enough stripes. Needed %lu\n", 80948c2ecf20Sopenharmony_ci mdname(mddev), 80958c2ecf20Sopenharmony_ci ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) 80968c2ecf20Sopenharmony_ci / RAID5_STRIPE_SIZE(conf))*4); 80978c2ecf20Sopenharmony_ci return 0; 80988c2ecf20Sopenharmony_ci } 80998c2ecf20Sopenharmony_ci return 1; 81008c2ecf20Sopenharmony_ci} 81018c2ecf20Sopenharmony_ci 81028c2ecf20Sopenharmony_cistatic int check_reshape(struct mddev *mddev) 81038c2ecf20Sopenharmony_ci{ 81048c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 81058c2ecf20Sopenharmony_ci 81068c2ecf20Sopenharmony_ci if (raid5_has_log(conf) || raid5_has_ppl(conf)) 81078c2ecf20Sopenharmony_ci return -EINVAL; 81088c2ecf20Sopenharmony_ci if (mddev->delta_disks == 0 && 81098c2ecf20Sopenharmony_ci mddev->new_layout == mddev->layout && 81108c2ecf20Sopenharmony_ci mddev->new_chunk_sectors == mddev->chunk_sectors) 81118c2ecf20Sopenharmony_ci return 0; /* nothing to do */ 81128c2ecf20Sopenharmony_ci if (has_failed(conf)) 81138c2ecf20Sopenharmony_ci return -EINVAL; 81148c2ecf20Sopenharmony_ci if (mddev->delta_disks < 0 && mddev->reshape_position == MaxSector) { 81158c2ecf20Sopenharmony_ci /* We might be able to shrink, but the devices must 81168c2ecf20Sopenharmony_ci * be made bigger first. 81178c2ecf20Sopenharmony_ci * For raid6, 4 is the minimum size. 81188c2ecf20Sopenharmony_ci * Otherwise 2 is the minimum 81198c2ecf20Sopenharmony_ci */ 81208c2ecf20Sopenharmony_ci int min = 2; 81218c2ecf20Sopenharmony_ci if (mddev->level == 6) 81228c2ecf20Sopenharmony_ci min = 4; 81238c2ecf20Sopenharmony_ci if (mddev->raid_disks + mddev->delta_disks < min) 81248c2ecf20Sopenharmony_ci return -EINVAL; 81258c2ecf20Sopenharmony_ci } 81268c2ecf20Sopenharmony_ci 81278c2ecf20Sopenharmony_ci if (!check_stripe_cache(mddev)) 81288c2ecf20Sopenharmony_ci return -ENOSPC; 81298c2ecf20Sopenharmony_ci 81308c2ecf20Sopenharmony_ci if (mddev->new_chunk_sectors > mddev->chunk_sectors || 81318c2ecf20Sopenharmony_ci mddev->delta_disks > 0) 81328c2ecf20Sopenharmony_ci if (resize_chunks(conf, 81338c2ecf20Sopenharmony_ci conf->previous_raid_disks 81348c2ecf20Sopenharmony_ci + max(0, mddev->delta_disks), 81358c2ecf20Sopenharmony_ci max(mddev->new_chunk_sectors, 81368c2ecf20Sopenharmony_ci mddev->chunk_sectors) 81378c2ecf20Sopenharmony_ci ) < 0) 81388c2ecf20Sopenharmony_ci return -ENOMEM; 81398c2ecf20Sopenharmony_ci 81408c2ecf20Sopenharmony_ci if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size) 81418c2ecf20Sopenharmony_ci return 0; /* never bother to shrink */ 81428c2ecf20Sopenharmony_ci return resize_stripes(conf, (conf->previous_raid_disks 81438c2ecf20Sopenharmony_ci + mddev->delta_disks)); 81448c2ecf20Sopenharmony_ci} 81458c2ecf20Sopenharmony_ci 81468c2ecf20Sopenharmony_cistatic int raid5_start_reshape(struct mddev *mddev) 81478c2ecf20Sopenharmony_ci{ 81488c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 81498c2ecf20Sopenharmony_ci struct md_rdev *rdev; 81508c2ecf20Sopenharmony_ci int spares = 0; 81518c2ecf20Sopenharmony_ci unsigned long flags; 81528c2ecf20Sopenharmony_ci 81538c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 81548c2ecf20Sopenharmony_ci return -EBUSY; 81558c2ecf20Sopenharmony_ci 81568c2ecf20Sopenharmony_ci if (!check_stripe_cache(mddev)) 81578c2ecf20Sopenharmony_ci return -ENOSPC; 81588c2ecf20Sopenharmony_ci 81598c2ecf20Sopenharmony_ci if (has_failed(conf)) 81608c2ecf20Sopenharmony_ci return -EINVAL; 81618c2ecf20Sopenharmony_ci 81628c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) { 81638c2ecf20Sopenharmony_ci if (!test_bit(In_sync, &rdev->flags) 81648c2ecf20Sopenharmony_ci && !test_bit(Faulty, &rdev->flags)) 81658c2ecf20Sopenharmony_ci spares++; 81668c2ecf20Sopenharmony_ci } 81678c2ecf20Sopenharmony_ci 81688c2ecf20Sopenharmony_ci if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) 81698c2ecf20Sopenharmony_ci /* Not enough devices even to make a degraded array 81708c2ecf20Sopenharmony_ci * of that size 81718c2ecf20Sopenharmony_ci */ 81728c2ecf20Sopenharmony_ci return -EINVAL; 81738c2ecf20Sopenharmony_ci 81748c2ecf20Sopenharmony_ci /* Refuse to reduce size of the array. Any reductions in 81758c2ecf20Sopenharmony_ci * array size must be through explicit setting of array_size 81768c2ecf20Sopenharmony_ci * attribute. 81778c2ecf20Sopenharmony_ci */ 81788c2ecf20Sopenharmony_ci if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks) 81798c2ecf20Sopenharmony_ci < mddev->array_sectors) { 81808c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: array size must be reduced before number of disks\n", 81818c2ecf20Sopenharmony_ci mdname(mddev)); 81828c2ecf20Sopenharmony_ci return -EINVAL; 81838c2ecf20Sopenharmony_ci } 81848c2ecf20Sopenharmony_ci 81858c2ecf20Sopenharmony_ci atomic_set(&conf->reshape_stripes, 0); 81868c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 81878c2ecf20Sopenharmony_ci write_seqcount_begin(&conf->gen_lock); 81888c2ecf20Sopenharmony_ci conf->previous_raid_disks = conf->raid_disks; 81898c2ecf20Sopenharmony_ci conf->raid_disks += mddev->delta_disks; 81908c2ecf20Sopenharmony_ci conf->prev_chunk_sectors = conf->chunk_sectors; 81918c2ecf20Sopenharmony_ci conf->chunk_sectors = mddev->new_chunk_sectors; 81928c2ecf20Sopenharmony_ci conf->prev_algo = conf->algorithm; 81938c2ecf20Sopenharmony_ci conf->algorithm = mddev->new_layout; 81948c2ecf20Sopenharmony_ci conf->generation++; 81958c2ecf20Sopenharmony_ci /* Code that selects data_offset needs to see the generation update 81968c2ecf20Sopenharmony_ci * if reshape_progress has been set - so a memory barrier needed. 81978c2ecf20Sopenharmony_ci */ 81988c2ecf20Sopenharmony_ci smp_mb(); 81998c2ecf20Sopenharmony_ci if (mddev->reshape_backwards) 82008c2ecf20Sopenharmony_ci conf->reshape_progress = raid5_size(mddev, 0, 0); 82018c2ecf20Sopenharmony_ci else 82028c2ecf20Sopenharmony_ci conf->reshape_progress = 0; 82038c2ecf20Sopenharmony_ci conf->reshape_safe = conf->reshape_progress; 82048c2ecf20Sopenharmony_ci write_seqcount_end(&conf->gen_lock); 82058c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 82068c2ecf20Sopenharmony_ci 82078c2ecf20Sopenharmony_ci /* Now make sure any requests that proceeded on the assumption 82088c2ecf20Sopenharmony_ci * the reshape wasn't running - like Discard or Read - have 82098c2ecf20Sopenharmony_ci * completed. 82108c2ecf20Sopenharmony_ci */ 82118c2ecf20Sopenharmony_ci mddev_suspend(mddev); 82128c2ecf20Sopenharmony_ci mddev_resume(mddev); 82138c2ecf20Sopenharmony_ci 82148c2ecf20Sopenharmony_ci /* Add some new drives, as many as will fit. 82158c2ecf20Sopenharmony_ci * We know there are enough to make the newly sized array work. 82168c2ecf20Sopenharmony_ci * Don't add devices if we are reducing the number of 82178c2ecf20Sopenharmony_ci * devices in the array. This is because it is not possible 82188c2ecf20Sopenharmony_ci * to correctly record the "partially reconstructed" state of 82198c2ecf20Sopenharmony_ci * such devices during the reshape and confusion could result. 82208c2ecf20Sopenharmony_ci */ 82218c2ecf20Sopenharmony_ci if (mddev->delta_disks >= 0) { 82228c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) 82238c2ecf20Sopenharmony_ci if (rdev->raid_disk < 0 && 82248c2ecf20Sopenharmony_ci !test_bit(Faulty, &rdev->flags)) { 82258c2ecf20Sopenharmony_ci if (raid5_add_disk(mddev, rdev) == 0) { 82268c2ecf20Sopenharmony_ci if (rdev->raid_disk 82278c2ecf20Sopenharmony_ci >= conf->previous_raid_disks) 82288c2ecf20Sopenharmony_ci set_bit(In_sync, &rdev->flags); 82298c2ecf20Sopenharmony_ci else 82308c2ecf20Sopenharmony_ci rdev->recovery_offset = 0; 82318c2ecf20Sopenharmony_ci 82328c2ecf20Sopenharmony_ci /* Failure here is OK */ 82338c2ecf20Sopenharmony_ci sysfs_link_rdev(mddev, rdev); 82348c2ecf20Sopenharmony_ci } 82358c2ecf20Sopenharmony_ci } else if (rdev->raid_disk >= conf->previous_raid_disks 82368c2ecf20Sopenharmony_ci && !test_bit(Faulty, &rdev->flags)) { 82378c2ecf20Sopenharmony_ci /* This is a spare that was manually added */ 82388c2ecf20Sopenharmony_ci set_bit(In_sync, &rdev->flags); 82398c2ecf20Sopenharmony_ci } 82408c2ecf20Sopenharmony_ci 82418c2ecf20Sopenharmony_ci /* When a reshape changes the number of devices, 82428c2ecf20Sopenharmony_ci * ->degraded is measured against the larger of the 82438c2ecf20Sopenharmony_ci * pre and post number of devices. 82448c2ecf20Sopenharmony_ci */ 82458c2ecf20Sopenharmony_ci spin_lock_irqsave(&conf->device_lock, flags); 82468c2ecf20Sopenharmony_ci mddev->degraded = raid5_calc_degraded(conf); 82478c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&conf->device_lock, flags); 82488c2ecf20Sopenharmony_ci } 82498c2ecf20Sopenharmony_ci mddev->raid_disks = conf->raid_disks; 82508c2ecf20Sopenharmony_ci mddev->reshape_position = conf->reshape_progress; 82518c2ecf20Sopenharmony_ci set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 82528c2ecf20Sopenharmony_ci 82538c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 82548c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 82558c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_DONE, &mddev->recovery); 82568c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 82578c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 82588c2ecf20Sopenharmony_ci mddev->sync_thread = md_register_thread(md_do_sync, mddev, 82598c2ecf20Sopenharmony_ci "reshape"); 82608c2ecf20Sopenharmony_ci if (!mddev->sync_thread) { 82618c2ecf20Sopenharmony_ci mddev->recovery = 0; 82628c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 82638c2ecf20Sopenharmony_ci write_seqcount_begin(&conf->gen_lock); 82648c2ecf20Sopenharmony_ci mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; 82658c2ecf20Sopenharmony_ci mddev->new_chunk_sectors = 82668c2ecf20Sopenharmony_ci conf->chunk_sectors = conf->prev_chunk_sectors; 82678c2ecf20Sopenharmony_ci mddev->new_layout = conf->algorithm = conf->prev_algo; 82688c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) 82698c2ecf20Sopenharmony_ci rdev->new_data_offset = rdev->data_offset; 82708c2ecf20Sopenharmony_ci smp_wmb(); 82718c2ecf20Sopenharmony_ci conf->generation --; 82728c2ecf20Sopenharmony_ci conf->reshape_progress = MaxSector; 82738c2ecf20Sopenharmony_ci mddev->reshape_position = MaxSector; 82748c2ecf20Sopenharmony_ci write_seqcount_end(&conf->gen_lock); 82758c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 82768c2ecf20Sopenharmony_ci return -EAGAIN; 82778c2ecf20Sopenharmony_ci } 82788c2ecf20Sopenharmony_ci conf->reshape_checkpoint = jiffies; 82798c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->sync_thread); 82808c2ecf20Sopenharmony_ci md_new_event(mddev); 82818c2ecf20Sopenharmony_ci return 0; 82828c2ecf20Sopenharmony_ci} 82838c2ecf20Sopenharmony_ci 82848c2ecf20Sopenharmony_ci/* This is called from the reshape thread and should make any 82858c2ecf20Sopenharmony_ci * changes needed in 'conf' 82868c2ecf20Sopenharmony_ci */ 82878c2ecf20Sopenharmony_cistatic void end_reshape(struct r5conf *conf) 82888c2ecf20Sopenharmony_ci{ 82898c2ecf20Sopenharmony_ci 82908c2ecf20Sopenharmony_ci if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { 82918c2ecf20Sopenharmony_ci struct md_rdev *rdev; 82928c2ecf20Sopenharmony_ci 82938c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 82948c2ecf20Sopenharmony_ci conf->previous_raid_disks = conf->raid_disks; 82958c2ecf20Sopenharmony_ci md_finish_reshape(conf->mddev); 82968c2ecf20Sopenharmony_ci smp_wmb(); 82978c2ecf20Sopenharmony_ci conf->reshape_progress = MaxSector; 82988c2ecf20Sopenharmony_ci conf->mddev->reshape_position = MaxSector; 82998c2ecf20Sopenharmony_ci rdev_for_each(rdev, conf->mddev) 83008c2ecf20Sopenharmony_ci if (rdev->raid_disk >= 0 && 83018c2ecf20Sopenharmony_ci !test_bit(Journal, &rdev->flags) && 83028c2ecf20Sopenharmony_ci !test_bit(In_sync, &rdev->flags)) 83038c2ecf20Sopenharmony_ci rdev->recovery_offset = MaxSector; 83048c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 83058c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_overlap); 83068c2ecf20Sopenharmony_ci 83078c2ecf20Sopenharmony_ci if (conf->mddev->queue) 83088c2ecf20Sopenharmony_ci raid5_set_io_opt(conf); 83098c2ecf20Sopenharmony_ci } 83108c2ecf20Sopenharmony_ci} 83118c2ecf20Sopenharmony_ci 83128c2ecf20Sopenharmony_ci/* This is called from the raid5d thread with mddev_lock held. 83138c2ecf20Sopenharmony_ci * It makes config changes to the device. 83148c2ecf20Sopenharmony_ci */ 83158c2ecf20Sopenharmony_cistatic void raid5_finish_reshape(struct mddev *mddev) 83168c2ecf20Sopenharmony_ci{ 83178c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 83188c2ecf20Sopenharmony_ci 83198c2ecf20Sopenharmony_ci if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 83208c2ecf20Sopenharmony_ci 83218c2ecf20Sopenharmony_ci if (mddev->delta_disks <= 0) { 83228c2ecf20Sopenharmony_ci int d; 83238c2ecf20Sopenharmony_ci spin_lock_irq(&conf->device_lock); 83248c2ecf20Sopenharmony_ci mddev->degraded = raid5_calc_degraded(conf); 83258c2ecf20Sopenharmony_ci spin_unlock_irq(&conf->device_lock); 83268c2ecf20Sopenharmony_ci for (d = conf->raid_disks ; 83278c2ecf20Sopenharmony_ci d < conf->raid_disks - mddev->delta_disks; 83288c2ecf20Sopenharmony_ci d++) { 83298c2ecf20Sopenharmony_ci struct md_rdev *rdev = conf->disks[d].rdev; 83308c2ecf20Sopenharmony_ci if (rdev) 83318c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 83328c2ecf20Sopenharmony_ci rdev = conf->disks[d].replacement; 83338c2ecf20Sopenharmony_ci if (rdev) 83348c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 83358c2ecf20Sopenharmony_ci } 83368c2ecf20Sopenharmony_ci } 83378c2ecf20Sopenharmony_ci mddev->layout = conf->algorithm; 83388c2ecf20Sopenharmony_ci mddev->chunk_sectors = conf->chunk_sectors; 83398c2ecf20Sopenharmony_ci mddev->reshape_position = MaxSector; 83408c2ecf20Sopenharmony_ci mddev->delta_disks = 0; 83418c2ecf20Sopenharmony_ci mddev->reshape_backwards = 0; 83428c2ecf20Sopenharmony_ci } 83438c2ecf20Sopenharmony_ci} 83448c2ecf20Sopenharmony_ci 83458c2ecf20Sopenharmony_cistatic void raid5_quiesce(struct mddev *mddev, int quiesce) 83468c2ecf20Sopenharmony_ci{ 83478c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 83488c2ecf20Sopenharmony_ci 83498c2ecf20Sopenharmony_ci if (quiesce) { 83508c2ecf20Sopenharmony_ci /* stop all writes */ 83518c2ecf20Sopenharmony_ci lock_all_device_hash_locks_irq(conf); 83528c2ecf20Sopenharmony_ci /* '2' tells resync/reshape to pause so that all 83538c2ecf20Sopenharmony_ci * active stripes can drain 83548c2ecf20Sopenharmony_ci */ 83558c2ecf20Sopenharmony_ci r5c_flush_cache(conf, INT_MAX); 83568c2ecf20Sopenharmony_ci conf->quiesce = 2; 83578c2ecf20Sopenharmony_ci wait_event_cmd(conf->wait_for_quiescent, 83588c2ecf20Sopenharmony_ci atomic_read(&conf->active_stripes) == 0 && 83598c2ecf20Sopenharmony_ci atomic_read(&conf->active_aligned_reads) == 0, 83608c2ecf20Sopenharmony_ci unlock_all_device_hash_locks_irq(conf), 83618c2ecf20Sopenharmony_ci lock_all_device_hash_locks_irq(conf)); 83628c2ecf20Sopenharmony_ci conf->quiesce = 1; 83638c2ecf20Sopenharmony_ci unlock_all_device_hash_locks_irq(conf); 83648c2ecf20Sopenharmony_ci /* allow reshape to continue */ 83658c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_overlap); 83668c2ecf20Sopenharmony_ci } else { 83678c2ecf20Sopenharmony_ci /* re-enable writes */ 83688c2ecf20Sopenharmony_ci lock_all_device_hash_locks_irq(conf); 83698c2ecf20Sopenharmony_ci conf->quiesce = 0; 83708c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_quiescent); 83718c2ecf20Sopenharmony_ci wake_up(&conf->wait_for_overlap); 83728c2ecf20Sopenharmony_ci unlock_all_device_hash_locks_irq(conf); 83738c2ecf20Sopenharmony_ci } 83748c2ecf20Sopenharmony_ci log_quiesce(conf, quiesce); 83758c2ecf20Sopenharmony_ci} 83768c2ecf20Sopenharmony_ci 83778c2ecf20Sopenharmony_cistatic void *raid45_takeover_raid0(struct mddev *mddev, int level) 83788c2ecf20Sopenharmony_ci{ 83798c2ecf20Sopenharmony_ci struct r0conf *raid0_conf = mddev->private; 83808c2ecf20Sopenharmony_ci sector_t sectors; 83818c2ecf20Sopenharmony_ci 83828c2ecf20Sopenharmony_ci /* for raid0 takeover only one zone is supported */ 83838c2ecf20Sopenharmony_ci if (raid0_conf->nr_strip_zones > 1) { 83848c2ecf20Sopenharmony_ci pr_warn("md/raid:%s: cannot takeover raid0 with more than one zone.\n", 83858c2ecf20Sopenharmony_ci mdname(mddev)); 83868c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 83878c2ecf20Sopenharmony_ci } 83888c2ecf20Sopenharmony_ci 83898c2ecf20Sopenharmony_ci sectors = raid0_conf->strip_zone[0].zone_end; 83908c2ecf20Sopenharmony_ci sector_div(sectors, raid0_conf->strip_zone[0].nb_dev); 83918c2ecf20Sopenharmony_ci mddev->dev_sectors = sectors; 83928c2ecf20Sopenharmony_ci mddev->new_level = level; 83938c2ecf20Sopenharmony_ci mddev->new_layout = ALGORITHM_PARITY_N; 83948c2ecf20Sopenharmony_ci mddev->new_chunk_sectors = mddev->chunk_sectors; 83958c2ecf20Sopenharmony_ci mddev->raid_disks += 1; 83968c2ecf20Sopenharmony_ci mddev->delta_disks = 1; 83978c2ecf20Sopenharmony_ci /* make sure it will be not marked as dirty */ 83988c2ecf20Sopenharmony_ci mddev->recovery_cp = MaxSector; 83998c2ecf20Sopenharmony_ci 84008c2ecf20Sopenharmony_ci return setup_conf(mddev); 84018c2ecf20Sopenharmony_ci} 84028c2ecf20Sopenharmony_ci 84038c2ecf20Sopenharmony_cistatic void *raid5_takeover_raid1(struct mddev *mddev) 84048c2ecf20Sopenharmony_ci{ 84058c2ecf20Sopenharmony_ci int chunksect; 84068c2ecf20Sopenharmony_ci void *ret; 84078c2ecf20Sopenharmony_ci 84088c2ecf20Sopenharmony_ci if (mddev->raid_disks != 2 || 84098c2ecf20Sopenharmony_ci mddev->degraded > 1) 84108c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 84118c2ecf20Sopenharmony_ci 84128c2ecf20Sopenharmony_ci /* Should check if there are write-behind devices? */ 84138c2ecf20Sopenharmony_ci 84148c2ecf20Sopenharmony_ci chunksect = 64*2; /* 64K by default */ 84158c2ecf20Sopenharmony_ci 84168c2ecf20Sopenharmony_ci /* The array must be an exact multiple of chunksize */ 84178c2ecf20Sopenharmony_ci while (chunksect && (mddev->array_sectors & (chunksect-1))) 84188c2ecf20Sopenharmony_ci chunksect >>= 1; 84198c2ecf20Sopenharmony_ci 84208c2ecf20Sopenharmony_ci if ((chunksect<<9) < RAID5_STRIPE_SIZE((struct r5conf *)mddev->private)) 84218c2ecf20Sopenharmony_ci /* array size does not allow a suitable chunk size */ 84228c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 84238c2ecf20Sopenharmony_ci 84248c2ecf20Sopenharmony_ci mddev->new_level = 5; 84258c2ecf20Sopenharmony_ci mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC; 84268c2ecf20Sopenharmony_ci mddev->new_chunk_sectors = chunksect; 84278c2ecf20Sopenharmony_ci 84288c2ecf20Sopenharmony_ci ret = setup_conf(mddev); 84298c2ecf20Sopenharmony_ci if (!IS_ERR(ret)) 84308c2ecf20Sopenharmony_ci mddev_clear_unsupported_flags(mddev, 84318c2ecf20Sopenharmony_ci UNSUPPORTED_MDDEV_FLAGS); 84328c2ecf20Sopenharmony_ci return ret; 84338c2ecf20Sopenharmony_ci} 84348c2ecf20Sopenharmony_ci 84358c2ecf20Sopenharmony_cistatic void *raid5_takeover_raid6(struct mddev *mddev) 84368c2ecf20Sopenharmony_ci{ 84378c2ecf20Sopenharmony_ci int new_layout; 84388c2ecf20Sopenharmony_ci 84398c2ecf20Sopenharmony_ci switch (mddev->layout) { 84408c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_ASYMMETRIC_6: 84418c2ecf20Sopenharmony_ci new_layout = ALGORITHM_LEFT_ASYMMETRIC; 84428c2ecf20Sopenharmony_ci break; 84438c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_ASYMMETRIC_6: 84448c2ecf20Sopenharmony_ci new_layout = ALGORITHM_RIGHT_ASYMMETRIC; 84458c2ecf20Sopenharmony_ci break; 84468c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_SYMMETRIC_6: 84478c2ecf20Sopenharmony_ci new_layout = ALGORITHM_LEFT_SYMMETRIC; 84488c2ecf20Sopenharmony_ci break; 84498c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_SYMMETRIC_6: 84508c2ecf20Sopenharmony_ci new_layout = ALGORITHM_RIGHT_SYMMETRIC; 84518c2ecf20Sopenharmony_ci break; 84528c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_0_6: 84538c2ecf20Sopenharmony_ci new_layout = ALGORITHM_PARITY_0; 84548c2ecf20Sopenharmony_ci break; 84558c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_N: 84568c2ecf20Sopenharmony_ci new_layout = ALGORITHM_PARITY_N; 84578c2ecf20Sopenharmony_ci break; 84588c2ecf20Sopenharmony_ci default: 84598c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 84608c2ecf20Sopenharmony_ci } 84618c2ecf20Sopenharmony_ci mddev->new_level = 5; 84628c2ecf20Sopenharmony_ci mddev->new_layout = new_layout; 84638c2ecf20Sopenharmony_ci mddev->delta_disks = -1; 84648c2ecf20Sopenharmony_ci mddev->raid_disks -= 1; 84658c2ecf20Sopenharmony_ci return setup_conf(mddev); 84668c2ecf20Sopenharmony_ci} 84678c2ecf20Sopenharmony_ci 84688c2ecf20Sopenharmony_cistatic int raid5_check_reshape(struct mddev *mddev) 84698c2ecf20Sopenharmony_ci{ 84708c2ecf20Sopenharmony_ci /* For a 2-drive array, the layout and chunk size can be changed 84718c2ecf20Sopenharmony_ci * immediately as not restriping is needed. 84728c2ecf20Sopenharmony_ci * For larger arrays we record the new value - after validation 84738c2ecf20Sopenharmony_ci * to be used by a reshape pass. 84748c2ecf20Sopenharmony_ci */ 84758c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 84768c2ecf20Sopenharmony_ci int new_chunk = mddev->new_chunk_sectors; 84778c2ecf20Sopenharmony_ci 84788c2ecf20Sopenharmony_ci if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout)) 84798c2ecf20Sopenharmony_ci return -EINVAL; 84808c2ecf20Sopenharmony_ci if (new_chunk > 0) { 84818c2ecf20Sopenharmony_ci if (!is_power_of_2(new_chunk)) 84828c2ecf20Sopenharmony_ci return -EINVAL; 84838c2ecf20Sopenharmony_ci if (new_chunk < (PAGE_SIZE>>9)) 84848c2ecf20Sopenharmony_ci return -EINVAL; 84858c2ecf20Sopenharmony_ci if (mddev->array_sectors & (new_chunk-1)) 84868c2ecf20Sopenharmony_ci /* not factor of array size */ 84878c2ecf20Sopenharmony_ci return -EINVAL; 84888c2ecf20Sopenharmony_ci } 84898c2ecf20Sopenharmony_ci 84908c2ecf20Sopenharmony_ci /* They look valid */ 84918c2ecf20Sopenharmony_ci 84928c2ecf20Sopenharmony_ci if (mddev->raid_disks == 2) { 84938c2ecf20Sopenharmony_ci /* can make the change immediately */ 84948c2ecf20Sopenharmony_ci if (mddev->new_layout >= 0) { 84958c2ecf20Sopenharmony_ci conf->algorithm = mddev->new_layout; 84968c2ecf20Sopenharmony_ci mddev->layout = mddev->new_layout; 84978c2ecf20Sopenharmony_ci } 84988c2ecf20Sopenharmony_ci if (new_chunk > 0) { 84998c2ecf20Sopenharmony_ci conf->chunk_sectors = new_chunk ; 85008c2ecf20Sopenharmony_ci mddev->chunk_sectors = new_chunk; 85018c2ecf20Sopenharmony_ci } 85028c2ecf20Sopenharmony_ci set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 85038c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 85048c2ecf20Sopenharmony_ci } 85058c2ecf20Sopenharmony_ci return check_reshape(mddev); 85068c2ecf20Sopenharmony_ci} 85078c2ecf20Sopenharmony_ci 85088c2ecf20Sopenharmony_cistatic int raid6_check_reshape(struct mddev *mddev) 85098c2ecf20Sopenharmony_ci{ 85108c2ecf20Sopenharmony_ci int new_chunk = mddev->new_chunk_sectors; 85118c2ecf20Sopenharmony_ci 85128c2ecf20Sopenharmony_ci if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout)) 85138c2ecf20Sopenharmony_ci return -EINVAL; 85148c2ecf20Sopenharmony_ci if (new_chunk > 0) { 85158c2ecf20Sopenharmony_ci if (!is_power_of_2(new_chunk)) 85168c2ecf20Sopenharmony_ci return -EINVAL; 85178c2ecf20Sopenharmony_ci if (new_chunk < (PAGE_SIZE >> 9)) 85188c2ecf20Sopenharmony_ci return -EINVAL; 85198c2ecf20Sopenharmony_ci if (mddev->array_sectors & (new_chunk-1)) 85208c2ecf20Sopenharmony_ci /* not factor of array size */ 85218c2ecf20Sopenharmony_ci return -EINVAL; 85228c2ecf20Sopenharmony_ci } 85238c2ecf20Sopenharmony_ci 85248c2ecf20Sopenharmony_ci /* They look valid */ 85258c2ecf20Sopenharmony_ci return check_reshape(mddev); 85268c2ecf20Sopenharmony_ci} 85278c2ecf20Sopenharmony_ci 85288c2ecf20Sopenharmony_cistatic void *raid5_takeover(struct mddev *mddev) 85298c2ecf20Sopenharmony_ci{ 85308c2ecf20Sopenharmony_ci /* raid5 can take over: 85318c2ecf20Sopenharmony_ci * raid0 - if there is only one strip zone - make it a raid4 layout 85328c2ecf20Sopenharmony_ci * raid1 - if there are two drives. We need to know the chunk size 85338c2ecf20Sopenharmony_ci * raid4 - trivial - just use a raid4 layout. 85348c2ecf20Sopenharmony_ci * raid6 - Providing it is a *_6 layout 85358c2ecf20Sopenharmony_ci */ 85368c2ecf20Sopenharmony_ci if (mddev->level == 0) 85378c2ecf20Sopenharmony_ci return raid45_takeover_raid0(mddev, 5); 85388c2ecf20Sopenharmony_ci if (mddev->level == 1) 85398c2ecf20Sopenharmony_ci return raid5_takeover_raid1(mddev); 85408c2ecf20Sopenharmony_ci if (mddev->level == 4) { 85418c2ecf20Sopenharmony_ci mddev->new_layout = ALGORITHM_PARITY_N; 85428c2ecf20Sopenharmony_ci mddev->new_level = 5; 85438c2ecf20Sopenharmony_ci return setup_conf(mddev); 85448c2ecf20Sopenharmony_ci } 85458c2ecf20Sopenharmony_ci if (mddev->level == 6) 85468c2ecf20Sopenharmony_ci return raid5_takeover_raid6(mddev); 85478c2ecf20Sopenharmony_ci 85488c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 85498c2ecf20Sopenharmony_ci} 85508c2ecf20Sopenharmony_ci 85518c2ecf20Sopenharmony_cistatic void *raid4_takeover(struct mddev *mddev) 85528c2ecf20Sopenharmony_ci{ 85538c2ecf20Sopenharmony_ci /* raid4 can take over: 85548c2ecf20Sopenharmony_ci * raid0 - if there is only one strip zone 85558c2ecf20Sopenharmony_ci * raid5 - if layout is right 85568c2ecf20Sopenharmony_ci */ 85578c2ecf20Sopenharmony_ci if (mddev->level == 0) 85588c2ecf20Sopenharmony_ci return raid45_takeover_raid0(mddev, 4); 85598c2ecf20Sopenharmony_ci if (mddev->level == 5 && 85608c2ecf20Sopenharmony_ci mddev->layout == ALGORITHM_PARITY_N) { 85618c2ecf20Sopenharmony_ci mddev->new_layout = 0; 85628c2ecf20Sopenharmony_ci mddev->new_level = 4; 85638c2ecf20Sopenharmony_ci return setup_conf(mddev); 85648c2ecf20Sopenharmony_ci } 85658c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 85668c2ecf20Sopenharmony_ci} 85678c2ecf20Sopenharmony_ci 85688c2ecf20Sopenharmony_cistatic struct md_personality raid5_personality; 85698c2ecf20Sopenharmony_ci 85708c2ecf20Sopenharmony_cistatic void *raid6_takeover(struct mddev *mddev) 85718c2ecf20Sopenharmony_ci{ 85728c2ecf20Sopenharmony_ci /* Currently can only take over a raid5. We map the 85738c2ecf20Sopenharmony_ci * personality to an equivalent raid6 personality 85748c2ecf20Sopenharmony_ci * with the Q block at the end. 85758c2ecf20Sopenharmony_ci */ 85768c2ecf20Sopenharmony_ci int new_layout; 85778c2ecf20Sopenharmony_ci 85788c2ecf20Sopenharmony_ci if (mddev->pers != &raid5_personality) 85798c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 85808c2ecf20Sopenharmony_ci if (mddev->degraded > 1) 85818c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 85828c2ecf20Sopenharmony_ci if (mddev->raid_disks > 253) 85838c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 85848c2ecf20Sopenharmony_ci if (mddev->raid_disks < 3) 85858c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 85868c2ecf20Sopenharmony_ci 85878c2ecf20Sopenharmony_ci switch (mddev->layout) { 85888c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_ASYMMETRIC: 85898c2ecf20Sopenharmony_ci new_layout = ALGORITHM_LEFT_ASYMMETRIC_6; 85908c2ecf20Sopenharmony_ci break; 85918c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_ASYMMETRIC: 85928c2ecf20Sopenharmony_ci new_layout = ALGORITHM_RIGHT_ASYMMETRIC_6; 85938c2ecf20Sopenharmony_ci break; 85948c2ecf20Sopenharmony_ci case ALGORITHM_LEFT_SYMMETRIC: 85958c2ecf20Sopenharmony_ci new_layout = ALGORITHM_LEFT_SYMMETRIC_6; 85968c2ecf20Sopenharmony_ci break; 85978c2ecf20Sopenharmony_ci case ALGORITHM_RIGHT_SYMMETRIC: 85988c2ecf20Sopenharmony_ci new_layout = ALGORITHM_RIGHT_SYMMETRIC_6; 85998c2ecf20Sopenharmony_ci break; 86008c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_0: 86018c2ecf20Sopenharmony_ci new_layout = ALGORITHM_PARITY_0_6; 86028c2ecf20Sopenharmony_ci break; 86038c2ecf20Sopenharmony_ci case ALGORITHM_PARITY_N: 86048c2ecf20Sopenharmony_ci new_layout = ALGORITHM_PARITY_N; 86058c2ecf20Sopenharmony_ci break; 86068c2ecf20Sopenharmony_ci default: 86078c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 86088c2ecf20Sopenharmony_ci } 86098c2ecf20Sopenharmony_ci mddev->new_level = 6; 86108c2ecf20Sopenharmony_ci mddev->new_layout = new_layout; 86118c2ecf20Sopenharmony_ci mddev->delta_disks = 1; 86128c2ecf20Sopenharmony_ci mddev->raid_disks += 1; 86138c2ecf20Sopenharmony_ci return setup_conf(mddev); 86148c2ecf20Sopenharmony_ci} 86158c2ecf20Sopenharmony_ci 86168c2ecf20Sopenharmony_cistatic int raid5_change_consistency_policy(struct mddev *mddev, const char *buf) 86178c2ecf20Sopenharmony_ci{ 86188c2ecf20Sopenharmony_ci struct r5conf *conf; 86198c2ecf20Sopenharmony_ci int err; 86208c2ecf20Sopenharmony_ci 86218c2ecf20Sopenharmony_ci err = mddev_lock(mddev); 86228c2ecf20Sopenharmony_ci if (err) 86238c2ecf20Sopenharmony_ci return err; 86248c2ecf20Sopenharmony_ci conf = mddev->private; 86258c2ecf20Sopenharmony_ci if (!conf) { 86268c2ecf20Sopenharmony_ci mddev_unlock(mddev); 86278c2ecf20Sopenharmony_ci return -ENODEV; 86288c2ecf20Sopenharmony_ci } 86298c2ecf20Sopenharmony_ci 86308c2ecf20Sopenharmony_ci if (strncmp(buf, "ppl", 3) == 0) { 86318c2ecf20Sopenharmony_ci /* ppl only works with RAID 5 */ 86328c2ecf20Sopenharmony_ci if (!raid5_has_ppl(conf) && conf->level == 5) { 86338c2ecf20Sopenharmony_ci err = log_init(conf, NULL, true); 86348c2ecf20Sopenharmony_ci if (!err) { 86358c2ecf20Sopenharmony_ci err = resize_stripes(conf, conf->pool_size); 86368c2ecf20Sopenharmony_ci if (err) 86378c2ecf20Sopenharmony_ci log_exit(conf); 86388c2ecf20Sopenharmony_ci } 86398c2ecf20Sopenharmony_ci } else 86408c2ecf20Sopenharmony_ci err = -EINVAL; 86418c2ecf20Sopenharmony_ci } else if (strncmp(buf, "resync", 6) == 0) { 86428c2ecf20Sopenharmony_ci if (raid5_has_ppl(conf)) { 86438c2ecf20Sopenharmony_ci mddev_suspend(mddev); 86448c2ecf20Sopenharmony_ci log_exit(conf); 86458c2ecf20Sopenharmony_ci mddev_resume(mddev); 86468c2ecf20Sopenharmony_ci err = resize_stripes(conf, conf->pool_size); 86478c2ecf20Sopenharmony_ci } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) && 86488c2ecf20Sopenharmony_ci r5l_log_disk_error(conf)) { 86498c2ecf20Sopenharmony_ci bool journal_dev_exists = false; 86508c2ecf20Sopenharmony_ci struct md_rdev *rdev; 86518c2ecf20Sopenharmony_ci 86528c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) 86538c2ecf20Sopenharmony_ci if (test_bit(Journal, &rdev->flags)) { 86548c2ecf20Sopenharmony_ci journal_dev_exists = true; 86558c2ecf20Sopenharmony_ci break; 86568c2ecf20Sopenharmony_ci } 86578c2ecf20Sopenharmony_ci 86588c2ecf20Sopenharmony_ci if (!journal_dev_exists) { 86598c2ecf20Sopenharmony_ci mddev_suspend(mddev); 86608c2ecf20Sopenharmony_ci clear_bit(MD_HAS_JOURNAL, &mddev->flags); 86618c2ecf20Sopenharmony_ci mddev_resume(mddev); 86628c2ecf20Sopenharmony_ci } else /* need remove journal device first */ 86638c2ecf20Sopenharmony_ci err = -EBUSY; 86648c2ecf20Sopenharmony_ci } else 86658c2ecf20Sopenharmony_ci err = -EINVAL; 86668c2ecf20Sopenharmony_ci } else { 86678c2ecf20Sopenharmony_ci err = -EINVAL; 86688c2ecf20Sopenharmony_ci } 86698c2ecf20Sopenharmony_ci 86708c2ecf20Sopenharmony_ci if (!err) 86718c2ecf20Sopenharmony_ci md_update_sb(mddev, 1); 86728c2ecf20Sopenharmony_ci 86738c2ecf20Sopenharmony_ci mddev_unlock(mddev); 86748c2ecf20Sopenharmony_ci 86758c2ecf20Sopenharmony_ci return err; 86768c2ecf20Sopenharmony_ci} 86778c2ecf20Sopenharmony_ci 86788c2ecf20Sopenharmony_cistatic int raid5_start(struct mddev *mddev) 86798c2ecf20Sopenharmony_ci{ 86808c2ecf20Sopenharmony_ci struct r5conf *conf = mddev->private; 86818c2ecf20Sopenharmony_ci 86828c2ecf20Sopenharmony_ci return r5l_start(conf->log); 86838c2ecf20Sopenharmony_ci} 86848c2ecf20Sopenharmony_ci 86858c2ecf20Sopenharmony_cistatic struct md_personality raid6_personality = 86868c2ecf20Sopenharmony_ci{ 86878c2ecf20Sopenharmony_ci .name = "raid6", 86888c2ecf20Sopenharmony_ci .level = 6, 86898c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 86908c2ecf20Sopenharmony_ci .make_request = raid5_make_request, 86918c2ecf20Sopenharmony_ci .run = raid5_run, 86928c2ecf20Sopenharmony_ci .start = raid5_start, 86938c2ecf20Sopenharmony_ci .free = raid5_free, 86948c2ecf20Sopenharmony_ci .status = raid5_status, 86958c2ecf20Sopenharmony_ci .error_handler = raid5_error, 86968c2ecf20Sopenharmony_ci .hot_add_disk = raid5_add_disk, 86978c2ecf20Sopenharmony_ci .hot_remove_disk= raid5_remove_disk, 86988c2ecf20Sopenharmony_ci .spare_active = raid5_spare_active, 86998c2ecf20Sopenharmony_ci .sync_request = raid5_sync_request, 87008c2ecf20Sopenharmony_ci .resize = raid5_resize, 87018c2ecf20Sopenharmony_ci .size = raid5_size, 87028c2ecf20Sopenharmony_ci .check_reshape = raid6_check_reshape, 87038c2ecf20Sopenharmony_ci .start_reshape = raid5_start_reshape, 87048c2ecf20Sopenharmony_ci .finish_reshape = raid5_finish_reshape, 87058c2ecf20Sopenharmony_ci .quiesce = raid5_quiesce, 87068c2ecf20Sopenharmony_ci .takeover = raid6_takeover, 87078c2ecf20Sopenharmony_ci .change_consistency_policy = raid5_change_consistency_policy, 87088c2ecf20Sopenharmony_ci}; 87098c2ecf20Sopenharmony_cistatic struct md_personality raid5_personality = 87108c2ecf20Sopenharmony_ci{ 87118c2ecf20Sopenharmony_ci .name = "raid5", 87128c2ecf20Sopenharmony_ci .level = 5, 87138c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 87148c2ecf20Sopenharmony_ci .make_request = raid5_make_request, 87158c2ecf20Sopenharmony_ci .run = raid5_run, 87168c2ecf20Sopenharmony_ci .start = raid5_start, 87178c2ecf20Sopenharmony_ci .free = raid5_free, 87188c2ecf20Sopenharmony_ci .status = raid5_status, 87198c2ecf20Sopenharmony_ci .error_handler = raid5_error, 87208c2ecf20Sopenharmony_ci .hot_add_disk = raid5_add_disk, 87218c2ecf20Sopenharmony_ci .hot_remove_disk= raid5_remove_disk, 87228c2ecf20Sopenharmony_ci .spare_active = raid5_spare_active, 87238c2ecf20Sopenharmony_ci .sync_request = raid5_sync_request, 87248c2ecf20Sopenharmony_ci .resize = raid5_resize, 87258c2ecf20Sopenharmony_ci .size = raid5_size, 87268c2ecf20Sopenharmony_ci .check_reshape = raid5_check_reshape, 87278c2ecf20Sopenharmony_ci .start_reshape = raid5_start_reshape, 87288c2ecf20Sopenharmony_ci .finish_reshape = raid5_finish_reshape, 87298c2ecf20Sopenharmony_ci .quiesce = raid5_quiesce, 87308c2ecf20Sopenharmony_ci .takeover = raid5_takeover, 87318c2ecf20Sopenharmony_ci .change_consistency_policy = raid5_change_consistency_policy, 87328c2ecf20Sopenharmony_ci}; 87338c2ecf20Sopenharmony_ci 87348c2ecf20Sopenharmony_cistatic struct md_personality raid4_personality = 87358c2ecf20Sopenharmony_ci{ 87368c2ecf20Sopenharmony_ci .name = "raid4", 87378c2ecf20Sopenharmony_ci .level = 4, 87388c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 87398c2ecf20Sopenharmony_ci .make_request = raid5_make_request, 87408c2ecf20Sopenharmony_ci .run = raid5_run, 87418c2ecf20Sopenharmony_ci .start = raid5_start, 87428c2ecf20Sopenharmony_ci .free = raid5_free, 87438c2ecf20Sopenharmony_ci .status = raid5_status, 87448c2ecf20Sopenharmony_ci .error_handler = raid5_error, 87458c2ecf20Sopenharmony_ci .hot_add_disk = raid5_add_disk, 87468c2ecf20Sopenharmony_ci .hot_remove_disk= raid5_remove_disk, 87478c2ecf20Sopenharmony_ci .spare_active = raid5_spare_active, 87488c2ecf20Sopenharmony_ci .sync_request = raid5_sync_request, 87498c2ecf20Sopenharmony_ci .resize = raid5_resize, 87508c2ecf20Sopenharmony_ci .size = raid5_size, 87518c2ecf20Sopenharmony_ci .check_reshape = raid5_check_reshape, 87528c2ecf20Sopenharmony_ci .start_reshape = raid5_start_reshape, 87538c2ecf20Sopenharmony_ci .finish_reshape = raid5_finish_reshape, 87548c2ecf20Sopenharmony_ci .quiesce = raid5_quiesce, 87558c2ecf20Sopenharmony_ci .takeover = raid4_takeover, 87568c2ecf20Sopenharmony_ci .change_consistency_policy = raid5_change_consistency_policy, 87578c2ecf20Sopenharmony_ci}; 87588c2ecf20Sopenharmony_ci 87598c2ecf20Sopenharmony_cistatic int __init raid5_init(void) 87608c2ecf20Sopenharmony_ci{ 87618c2ecf20Sopenharmony_ci int ret; 87628c2ecf20Sopenharmony_ci 87638c2ecf20Sopenharmony_ci raid5_wq = alloc_workqueue("raid5wq", 87648c2ecf20Sopenharmony_ci WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0); 87658c2ecf20Sopenharmony_ci if (!raid5_wq) 87668c2ecf20Sopenharmony_ci return -ENOMEM; 87678c2ecf20Sopenharmony_ci 87688c2ecf20Sopenharmony_ci ret = cpuhp_setup_state_multi(CPUHP_MD_RAID5_PREPARE, 87698c2ecf20Sopenharmony_ci "md/raid5:prepare", 87708c2ecf20Sopenharmony_ci raid456_cpu_up_prepare, 87718c2ecf20Sopenharmony_ci raid456_cpu_dead); 87728c2ecf20Sopenharmony_ci if (ret) { 87738c2ecf20Sopenharmony_ci destroy_workqueue(raid5_wq); 87748c2ecf20Sopenharmony_ci return ret; 87758c2ecf20Sopenharmony_ci } 87768c2ecf20Sopenharmony_ci register_md_personality(&raid6_personality); 87778c2ecf20Sopenharmony_ci register_md_personality(&raid5_personality); 87788c2ecf20Sopenharmony_ci register_md_personality(&raid4_personality); 87798c2ecf20Sopenharmony_ci return 0; 87808c2ecf20Sopenharmony_ci} 87818c2ecf20Sopenharmony_ci 87828c2ecf20Sopenharmony_cistatic void raid5_exit(void) 87838c2ecf20Sopenharmony_ci{ 87848c2ecf20Sopenharmony_ci unregister_md_personality(&raid6_personality); 87858c2ecf20Sopenharmony_ci unregister_md_personality(&raid5_personality); 87868c2ecf20Sopenharmony_ci unregister_md_personality(&raid4_personality); 87878c2ecf20Sopenharmony_ci cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE); 87888c2ecf20Sopenharmony_ci destroy_workqueue(raid5_wq); 87898c2ecf20Sopenharmony_ci} 87908c2ecf20Sopenharmony_ci 87918c2ecf20Sopenharmony_cimodule_init(raid5_init); 87928c2ecf20Sopenharmony_cimodule_exit(raid5_exit); 87938c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 87948c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("RAID4/5/6 (striping with parity) personality for MD"); 87958c2ecf20Sopenharmony_ciMODULE_ALIAS("md-personality-4"); /* RAID5 */ 87968c2ecf20Sopenharmony_ciMODULE_ALIAS("md-raid5"); 87978c2ecf20Sopenharmony_ciMODULE_ALIAS("md-raid4"); 87988c2ecf20Sopenharmony_ciMODULE_ALIAS("md-level-5"); 87998c2ecf20Sopenharmony_ciMODULE_ALIAS("md-level-4"); 88008c2ecf20Sopenharmony_ciMODULE_ALIAS("md-personality-8"); /* RAID6 */ 88018c2ecf20Sopenharmony_ciMODULE_ALIAS("md-raid6"); 88028c2ecf20Sopenharmony_ciMODULE_ALIAS("md-level-6"); 88038c2ecf20Sopenharmony_ci 88048c2ecf20Sopenharmony_ci/* This used to be two separate modules, they were: */ 88058c2ecf20Sopenharmony_ciMODULE_ALIAS("raid5"); 88068c2ecf20Sopenharmony_ciMODULE_ALIAS("raid6"); 8807