18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * raid5.c : Multiple Devices driver for Linux
48c2ecf20Sopenharmony_ci *	   Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman
58c2ecf20Sopenharmony_ci *	   Copyright (C) 1999, 2000 Ingo Molnar
68c2ecf20Sopenharmony_ci *	   Copyright (C) 2002, 2003 H. Peter Anvin
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * RAID-4/5/6 management functions.
98c2ecf20Sopenharmony_ci * Thanks to Penguin Computing for making the RAID-6 development possible
108c2ecf20Sopenharmony_ci * by donating a test server!
118c2ecf20Sopenharmony_ci */
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci/*
148c2ecf20Sopenharmony_ci * BITMAP UNPLUGGING:
158c2ecf20Sopenharmony_ci *
168c2ecf20Sopenharmony_ci * The sequencing for updating the bitmap reliably is a little
178c2ecf20Sopenharmony_ci * subtle (and I got it wrong the first time) so it deserves some
188c2ecf20Sopenharmony_ci * explanation.
198c2ecf20Sopenharmony_ci *
208c2ecf20Sopenharmony_ci * We group bitmap updates into batches.  Each batch has a number.
218c2ecf20Sopenharmony_ci * We may write out several batches at once, but that isn't very important.
228c2ecf20Sopenharmony_ci * conf->seq_write is the number of the last batch successfully written.
238c2ecf20Sopenharmony_ci * conf->seq_flush is the number of the last batch that was closed to
248c2ecf20Sopenharmony_ci *    new additions.
258c2ecf20Sopenharmony_ci * When we discover that we will need to write to any block in a stripe
268c2ecf20Sopenharmony_ci * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
278c2ecf20Sopenharmony_ci * the number of the batch it will be in. This is seq_flush+1.
288c2ecf20Sopenharmony_ci * When we are ready to do a write, if that batch hasn't been written yet,
298c2ecf20Sopenharmony_ci *   we plug the array and queue the stripe for later.
308c2ecf20Sopenharmony_ci * When an unplug happens, we increment bm_flush, thus closing the current
318c2ecf20Sopenharmony_ci *   batch.
328c2ecf20Sopenharmony_ci * When we notice that bm_flush > bm_write, we write out all pending updates
338c2ecf20Sopenharmony_ci * to the bitmap, and advance bm_write to where bm_flush was.
348c2ecf20Sopenharmony_ci * This may occasionally write a bit out twice, but is sure never to
358c2ecf20Sopenharmony_ci * miss any bits.
368c2ecf20Sopenharmony_ci */
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
398c2ecf20Sopenharmony_ci#include <linux/kthread.h>
408c2ecf20Sopenharmony_ci#include <linux/raid/pq.h>
418c2ecf20Sopenharmony_ci#include <linux/async_tx.h>
428c2ecf20Sopenharmony_ci#include <linux/module.h>
438c2ecf20Sopenharmony_ci#include <linux/async.h>
448c2ecf20Sopenharmony_ci#include <linux/seq_file.h>
458c2ecf20Sopenharmony_ci#include <linux/cpu.h>
468c2ecf20Sopenharmony_ci#include <linux/slab.h>
478c2ecf20Sopenharmony_ci#include <linux/ratelimit.h>
488c2ecf20Sopenharmony_ci#include <linux/nodemask.h>
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci#include <trace/events/block.h>
518c2ecf20Sopenharmony_ci#include <linux/list_sort.h>
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci#include "md.h"
548c2ecf20Sopenharmony_ci#include "raid5.h"
558c2ecf20Sopenharmony_ci#include "raid0.h"
568c2ecf20Sopenharmony_ci#include "md-bitmap.h"
578c2ecf20Sopenharmony_ci#include "raid5-log.h"
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci#define UNSUPPORTED_MDDEV_FLAGS	(1L << MD_FAILFAST_SUPPORTED)
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci#define cpu_to_group(cpu) cpu_to_node(cpu)
628c2ecf20Sopenharmony_ci#define ANY_GROUP NUMA_NO_NODE
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_cistatic bool devices_handle_discard_safely = false;
658c2ecf20Sopenharmony_cimodule_param(devices_handle_discard_safely, bool, 0644);
668c2ecf20Sopenharmony_ciMODULE_PARM_DESC(devices_handle_discard_safely,
678c2ecf20Sopenharmony_ci		 "Set to Y if all devices in each array reliably return zeroes on reads from discarded regions");
688c2ecf20Sopenharmony_cistatic struct workqueue_struct *raid5_wq;
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_cistatic inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
718c2ecf20Sopenharmony_ci{
728c2ecf20Sopenharmony_ci	int hash = (sect >> RAID5_STRIPE_SHIFT(conf)) & HASH_MASK;
738c2ecf20Sopenharmony_ci	return &conf->stripe_hashtbl[hash];
748c2ecf20Sopenharmony_ci}
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_cistatic inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect)
778c2ecf20Sopenharmony_ci{
788c2ecf20Sopenharmony_ci	return (sect >> RAID5_STRIPE_SHIFT(conf)) & STRIPE_HASH_LOCKS_MASK;
798c2ecf20Sopenharmony_ci}
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_cistatic inline void lock_device_hash_lock(struct r5conf *conf, int hash)
828c2ecf20Sopenharmony_ci{
838c2ecf20Sopenharmony_ci	spin_lock_irq(conf->hash_locks + hash);
848c2ecf20Sopenharmony_ci	spin_lock(&conf->device_lock);
858c2ecf20Sopenharmony_ci}
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_cistatic inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
888c2ecf20Sopenharmony_ci{
898c2ecf20Sopenharmony_ci	spin_unlock(&conf->device_lock);
908c2ecf20Sopenharmony_ci	spin_unlock_irq(conf->hash_locks + hash);
918c2ecf20Sopenharmony_ci}
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_cistatic inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
948c2ecf20Sopenharmony_ci{
958c2ecf20Sopenharmony_ci	int i;
968c2ecf20Sopenharmony_ci	spin_lock_irq(conf->hash_locks);
978c2ecf20Sopenharmony_ci	for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
988c2ecf20Sopenharmony_ci		spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
998c2ecf20Sopenharmony_ci	spin_lock(&conf->device_lock);
1008c2ecf20Sopenharmony_ci}
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_cistatic inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
1038c2ecf20Sopenharmony_ci{
1048c2ecf20Sopenharmony_ci	int i;
1058c2ecf20Sopenharmony_ci	spin_unlock(&conf->device_lock);
1068c2ecf20Sopenharmony_ci	for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--)
1078c2ecf20Sopenharmony_ci		spin_unlock(conf->hash_locks + i);
1088c2ecf20Sopenharmony_ci	spin_unlock_irq(conf->hash_locks);
1098c2ecf20Sopenharmony_ci}
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci/* Find first data disk in a raid6 stripe */
1128c2ecf20Sopenharmony_cistatic inline int raid6_d0(struct stripe_head *sh)
1138c2ecf20Sopenharmony_ci{
1148c2ecf20Sopenharmony_ci	if (sh->ddf_layout)
1158c2ecf20Sopenharmony_ci		/* ddf always start from first device */
1168c2ecf20Sopenharmony_ci		return 0;
1178c2ecf20Sopenharmony_ci	/* md starts just after Q block */
1188c2ecf20Sopenharmony_ci	if (sh->qd_idx == sh->disks - 1)
1198c2ecf20Sopenharmony_ci		return 0;
1208c2ecf20Sopenharmony_ci	else
1218c2ecf20Sopenharmony_ci		return sh->qd_idx + 1;
1228c2ecf20Sopenharmony_ci}
1238c2ecf20Sopenharmony_cistatic inline int raid6_next_disk(int disk, int raid_disks)
1248c2ecf20Sopenharmony_ci{
1258c2ecf20Sopenharmony_ci	disk++;
1268c2ecf20Sopenharmony_ci	return (disk < raid_disks) ? disk : 0;
1278c2ecf20Sopenharmony_ci}
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci/* When walking through the disks in a raid5, starting at raid6_d0,
1308c2ecf20Sopenharmony_ci * We need to map each disk to a 'slot', where the data disks are slot
1318c2ecf20Sopenharmony_ci * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
1328c2ecf20Sopenharmony_ci * is raid_disks-1.  This help does that mapping.
1338c2ecf20Sopenharmony_ci */
1348c2ecf20Sopenharmony_cistatic int raid6_idx_to_slot(int idx, struct stripe_head *sh,
1358c2ecf20Sopenharmony_ci			     int *count, int syndrome_disks)
1368c2ecf20Sopenharmony_ci{
1378c2ecf20Sopenharmony_ci	int slot = *count;
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	if (sh->ddf_layout)
1408c2ecf20Sopenharmony_ci		(*count)++;
1418c2ecf20Sopenharmony_ci	if (idx == sh->pd_idx)
1428c2ecf20Sopenharmony_ci		return syndrome_disks;
1438c2ecf20Sopenharmony_ci	if (idx == sh->qd_idx)
1448c2ecf20Sopenharmony_ci		return syndrome_disks + 1;
1458c2ecf20Sopenharmony_ci	if (!sh->ddf_layout)
1468c2ecf20Sopenharmony_ci		(*count)++;
1478c2ecf20Sopenharmony_ci	return slot;
1488c2ecf20Sopenharmony_ci}
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_cistatic void print_raid5_conf (struct r5conf *conf);
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_cistatic int stripe_operations_active(struct stripe_head *sh)
1538c2ecf20Sopenharmony_ci{
1548c2ecf20Sopenharmony_ci	return sh->check_state || sh->reconstruct_state ||
1558c2ecf20Sopenharmony_ci	       test_bit(STRIPE_BIOFILL_RUN, &sh->state) ||
1568c2ecf20Sopenharmony_ci	       test_bit(STRIPE_COMPUTE_RUN, &sh->state);
1578c2ecf20Sopenharmony_ci}
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_cistatic bool stripe_is_lowprio(struct stripe_head *sh)
1608c2ecf20Sopenharmony_ci{
1618c2ecf20Sopenharmony_ci	return (test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) ||
1628c2ecf20Sopenharmony_ci		test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) &&
1638c2ecf20Sopenharmony_ci	       !test_bit(STRIPE_R5C_CACHING, &sh->state);
1648c2ecf20Sopenharmony_ci}
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_cistatic void raid5_wakeup_stripe_thread(struct stripe_head *sh)
1678c2ecf20Sopenharmony_ci{
1688c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
1698c2ecf20Sopenharmony_ci	struct r5worker_group *group;
1708c2ecf20Sopenharmony_ci	int thread_cnt;
1718c2ecf20Sopenharmony_ci	int i, cpu = sh->cpu;
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci	if (!cpu_online(cpu)) {
1748c2ecf20Sopenharmony_ci		cpu = cpumask_any(cpu_online_mask);
1758c2ecf20Sopenharmony_ci		sh->cpu = cpu;
1768c2ecf20Sopenharmony_ci	}
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	if (list_empty(&sh->lru)) {
1798c2ecf20Sopenharmony_ci		struct r5worker_group *group;
1808c2ecf20Sopenharmony_ci		group = conf->worker_groups + cpu_to_group(cpu);
1818c2ecf20Sopenharmony_ci		if (stripe_is_lowprio(sh))
1828c2ecf20Sopenharmony_ci			list_add_tail(&sh->lru, &group->loprio_list);
1838c2ecf20Sopenharmony_ci		else
1848c2ecf20Sopenharmony_ci			list_add_tail(&sh->lru, &group->handle_list);
1858c2ecf20Sopenharmony_ci		group->stripes_cnt++;
1868c2ecf20Sopenharmony_ci		sh->group = group;
1878c2ecf20Sopenharmony_ci	}
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci	if (conf->worker_cnt_per_group == 0) {
1908c2ecf20Sopenharmony_ci		md_wakeup_thread(conf->mddev->thread);
1918c2ecf20Sopenharmony_ci		return;
1928c2ecf20Sopenharmony_ci	}
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	group = conf->worker_groups + cpu_to_group(sh->cpu);
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	group->workers[0].working = true;
1978c2ecf20Sopenharmony_ci	/* at least one worker should run to avoid race */
1988c2ecf20Sopenharmony_ci	queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work);
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci	thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1;
2018c2ecf20Sopenharmony_ci	/* wakeup more workers */
2028c2ecf20Sopenharmony_ci	for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) {
2038c2ecf20Sopenharmony_ci		if (group->workers[i].working == false) {
2048c2ecf20Sopenharmony_ci			group->workers[i].working = true;
2058c2ecf20Sopenharmony_ci			queue_work_on(sh->cpu, raid5_wq,
2068c2ecf20Sopenharmony_ci				      &group->workers[i].work);
2078c2ecf20Sopenharmony_ci			thread_cnt--;
2088c2ecf20Sopenharmony_ci		}
2098c2ecf20Sopenharmony_ci	}
2108c2ecf20Sopenharmony_ci}
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_cistatic void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
2138c2ecf20Sopenharmony_ci			      struct list_head *temp_inactive_list)
2148c2ecf20Sopenharmony_ci{
2158c2ecf20Sopenharmony_ci	int i;
2168c2ecf20Sopenharmony_ci	int injournal = 0;	/* number of date pages with R5_InJournal */
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&sh->lru));
2198c2ecf20Sopenharmony_ci	BUG_ON(atomic_read(&conf->active_stripes)==0);
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	if (r5c_is_writeback(conf->log))
2228c2ecf20Sopenharmony_ci		for (i = sh->disks; i--; )
2238c2ecf20Sopenharmony_ci			if (test_bit(R5_InJournal, &sh->dev[i].flags))
2248c2ecf20Sopenharmony_ci				injournal++;
2258c2ecf20Sopenharmony_ci	/*
2268c2ecf20Sopenharmony_ci	 * In the following cases, the stripe cannot be released to cached
2278c2ecf20Sopenharmony_ci	 * lists. Therefore, we make the stripe write out and set
2288c2ecf20Sopenharmony_ci	 * STRIPE_HANDLE:
2298c2ecf20Sopenharmony_ci	 *   1. when quiesce in r5c write back;
2308c2ecf20Sopenharmony_ci	 *   2. when resync is requested fot the stripe.
2318c2ecf20Sopenharmony_ci	 */
2328c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) ||
2338c2ecf20Sopenharmony_ci	    (conf->quiesce && r5c_is_writeback(conf->log) &&
2348c2ecf20Sopenharmony_ci	     !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) {
2358c2ecf20Sopenharmony_ci		if (test_bit(STRIPE_R5C_CACHING, &sh->state))
2368c2ecf20Sopenharmony_ci			r5c_make_stripe_write_out(sh);
2378c2ecf20Sopenharmony_ci		set_bit(STRIPE_HANDLE, &sh->state);
2388c2ecf20Sopenharmony_ci	}
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_HANDLE, &sh->state)) {
2418c2ecf20Sopenharmony_ci		if (test_bit(STRIPE_DELAYED, &sh->state) &&
2428c2ecf20Sopenharmony_ci		    !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
2438c2ecf20Sopenharmony_ci			list_add_tail(&sh->lru, &conf->delayed_list);
2448c2ecf20Sopenharmony_ci		else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
2458c2ecf20Sopenharmony_ci			   sh->bm_seq - conf->seq_write > 0)
2468c2ecf20Sopenharmony_ci			list_add_tail(&sh->lru, &conf->bitmap_list);
2478c2ecf20Sopenharmony_ci		else {
2488c2ecf20Sopenharmony_ci			clear_bit(STRIPE_DELAYED, &sh->state);
2498c2ecf20Sopenharmony_ci			clear_bit(STRIPE_BIT_DELAY, &sh->state);
2508c2ecf20Sopenharmony_ci			if (conf->worker_cnt_per_group == 0) {
2518c2ecf20Sopenharmony_ci				if (stripe_is_lowprio(sh))
2528c2ecf20Sopenharmony_ci					list_add_tail(&sh->lru,
2538c2ecf20Sopenharmony_ci							&conf->loprio_list);
2548c2ecf20Sopenharmony_ci				else
2558c2ecf20Sopenharmony_ci					list_add_tail(&sh->lru,
2568c2ecf20Sopenharmony_ci							&conf->handle_list);
2578c2ecf20Sopenharmony_ci			} else {
2588c2ecf20Sopenharmony_ci				raid5_wakeup_stripe_thread(sh);
2598c2ecf20Sopenharmony_ci				return;
2608c2ecf20Sopenharmony_ci			}
2618c2ecf20Sopenharmony_ci		}
2628c2ecf20Sopenharmony_ci		md_wakeup_thread(conf->mddev->thread);
2638c2ecf20Sopenharmony_ci	} else {
2648c2ecf20Sopenharmony_ci		BUG_ON(stripe_operations_active(sh));
2658c2ecf20Sopenharmony_ci		if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
2668c2ecf20Sopenharmony_ci			if (atomic_dec_return(&conf->preread_active_stripes)
2678c2ecf20Sopenharmony_ci			    < IO_THRESHOLD)
2688c2ecf20Sopenharmony_ci				md_wakeup_thread(conf->mddev->thread);
2698c2ecf20Sopenharmony_ci		atomic_dec(&conf->active_stripes);
2708c2ecf20Sopenharmony_ci		if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
2718c2ecf20Sopenharmony_ci			if (!r5c_is_writeback(conf->log))
2728c2ecf20Sopenharmony_ci				list_add_tail(&sh->lru, temp_inactive_list);
2738c2ecf20Sopenharmony_ci			else {
2748c2ecf20Sopenharmony_ci				WARN_ON(test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags));
2758c2ecf20Sopenharmony_ci				if (injournal == 0)
2768c2ecf20Sopenharmony_ci					list_add_tail(&sh->lru, temp_inactive_list);
2778c2ecf20Sopenharmony_ci				else if (injournal == conf->raid_disks - conf->max_degraded) {
2788c2ecf20Sopenharmony_ci					/* full stripe */
2798c2ecf20Sopenharmony_ci					if (!test_and_set_bit(STRIPE_R5C_FULL_STRIPE, &sh->state))
2808c2ecf20Sopenharmony_ci						atomic_inc(&conf->r5c_cached_full_stripes);
2818c2ecf20Sopenharmony_ci					if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state))
2828c2ecf20Sopenharmony_ci						atomic_dec(&conf->r5c_cached_partial_stripes);
2838c2ecf20Sopenharmony_ci					list_add_tail(&sh->lru, &conf->r5c_full_stripe_list);
2848c2ecf20Sopenharmony_ci					r5c_check_cached_full_stripe(conf);
2858c2ecf20Sopenharmony_ci				} else
2868c2ecf20Sopenharmony_ci					/*
2878c2ecf20Sopenharmony_ci					 * STRIPE_R5C_PARTIAL_STRIPE is set in
2888c2ecf20Sopenharmony_ci					 * r5c_try_caching_write(). No need to
2898c2ecf20Sopenharmony_ci					 * set it again.
2908c2ecf20Sopenharmony_ci					 */
2918c2ecf20Sopenharmony_ci					list_add_tail(&sh->lru, &conf->r5c_partial_stripe_list);
2928c2ecf20Sopenharmony_ci			}
2938c2ecf20Sopenharmony_ci		}
2948c2ecf20Sopenharmony_ci	}
2958c2ecf20Sopenharmony_ci}
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_cistatic void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
2988c2ecf20Sopenharmony_ci			     struct list_head *temp_inactive_list)
2998c2ecf20Sopenharmony_ci{
3008c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&sh->count))
3018c2ecf20Sopenharmony_ci		do_release_stripe(conf, sh, temp_inactive_list);
3028c2ecf20Sopenharmony_ci}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci/*
3058c2ecf20Sopenharmony_ci * @hash could be NR_STRIPE_HASH_LOCKS, then we have a list of inactive_list
3068c2ecf20Sopenharmony_ci *
3078c2ecf20Sopenharmony_ci * Be careful: Only one task can add/delete stripes from temp_inactive_list at
3088c2ecf20Sopenharmony_ci * given time. Adding stripes only takes device lock, while deleting stripes
3098c2ecf20Sopenharmony_ci * only takes hash lock.
3108c2ecf20Sopenharmony_ci */
3118c2ecf20Sopenharmony_cistatic void release_inactive_stripe_list(struct r5conf *conf,
3128c2ecf20Sopenharmony_ci					 struct list_head *temp_inactive_list,
3138c2ecf20Sopenharmony_ci					 int hash)
3148c2ecf20Sopenharmony_ci{
3158c2ecf20Sopenharmony_ci	int size;
3168c2ecf20Sopenharmony_ci	bool do_wakeup = false;
3178c2ecf20Sopenharmony_ci	unsigned long flags;
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci	if (hash == NR_STRIPE_HASH_LOCKS) {
3208c2ecf20Sopenharmony_ci		size = NR_STRIPE_HASH_LOCKS;
3218c2ecf20Sopenharmony_ci		hash = NR_STRIPE_HASH_LOCKS - 1;
3228c2ecf20Sopenharmony_ci	} else
3238c2ecf20Sopenharmony_ci		size = 1;
3248c2ecf20Sopenharmony_ci	while (size) {
3258c2ecf20Sopenharmony_ci		struct list_head *list = &temp_inactive_list[size - 1];
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci		/*
3288c2ecf20Sopenharmony_ci		 * We don't hold any lock here yet, raid5_get_active_stripe() might
3298c2ecf20Sopenharmony_ci		 * remove stripes from the list
3308c2ecf20Sopenharmony_ci		 */
3318c2ecf20Sopenharmony_ci		if (!list_empty_careful(list)) {
3328c2ecf20Sopenharmony_ci			spin_lock_irqsave(conf->hash_locks + hash, flags);
3338c2ecf20Sopenharmony_ci			if (list_empty(conf->inactive_list + hash) &&
3348c2ecf20Sopenharmony_ci			    !list_empty(list))
3358c2ecf20Sopenharmony_ci				atomic_dec(&conf->empty_inactive_list_nr);
3368c2ecf20Sopenharmony_ci			list_splice_tail_init(list, conf->inactive_list + hash);
3378c2ecf20Sopenharmony_ci			do_wakeup = true;
3388c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(conf->hash_locks + hash, flags);
3398c2ecf20Sopenharmony_ci		}
3408c2ecf20Sopenharmony_ci		size--;
3418c2ecf20Sopenharmony_ci		hash--;
3428c2ecf20Sopenharmony_ci	}
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci	if (do_wakeup) {
3458c2ecf20Sopenharmony_ci		wake_up(&conf->wait_for_stripe);
3468c2ecf20Sopenharmony_ci		if (atomic_read(&conf->active_stripes) == 0)
3478c2ecf20Sopenharmony_ci			wake_up(&conf->wait_for_quiescent);
3488c2ecf20Sopenharmony_ci		if (conf->retry_read_aligned)
3498c2ecf20Sopenharmony_ci			md_wakeup_thread(conf->mddev->thread);
3508c2ecf20Sopenharmony_ci	}
3518c2ecf20Sopenharmony_ci}
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci/* should hold conf->device_lock already */
3548c2ecf20Sopenharmony_cistatic int release_stripe_list(struct r5conf *conf,
3558c2ecf20Sopenharmony_ci			       struct list_head *temp_inactive_list)
3568c2ecf20Sopenharmony_ci{
3578c2ecf20Sopenharmony_ci	struct stripe_head *sh, *t;
3588c2ecf20Sopenharmony_ci	int count = 0;
3598c2ecf20Sopenharmony_ci	struct llist_node *head;
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci	head = llist_del_all(&conf->released_stripes);
3628c2ecf20Sopenharmony_ci	head = llist_reverse_order(head);
3638c2ecf20Sopenharmony_ci	llist_for_each_entry_safe(sh, t, head, release_list) {
3648c2ecf20Sopenharmony_ci		int hash;
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci		/* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */
3678c2ecf20Sopenharmony_ci		smp_mb();
3688c2ecf20Sopenharmony_ci		clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state);
3698c2ecf20Sopenharmony_ci		/*
3708c2ecf20Sopenharmony_ci		 * Don't worry the bit is set here, because if the bit is set
3718c2ecf20Sopenharmony_ci		 * again, the count is always > 1. This is true for
3728c2ecf20Sopenharmony_ci		 * STRIPE_ON_UNPLUG_LIST bit too.
3738c2ecf20Sopenharmony_ci		 */
3748c2ecf20Sopenharmony_ci		hash = sh->hash_lock_index;
3758c2ecf20Sopenharmony_ci		__release_stripe(conf, sh, &temp_inactive_list[hash]);
3768c2ecf20Sopenharmony_ci		count++;
3778c2ecf20Sopenharmony_ci	}
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_ci	return count;
3808c2ecf20Sopenharmony_ci}
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_civoid raid5_release_stripe(struct stripe_head *sh)
3838c2ecf20Sopenharmony_ci{
3848c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
3858c2ecf20Sopenharmony_ci	unsigned long flags;
3868c2ecf20Sopenharmony_ci	struct list_head list;
3878c2ecf20Sopenharmony_ci	int hash;
3888c2ecf20Sopenharmony_ci	bool wakeup;
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	/* Avoid release_list until the last reference.
3918c2ecf20Sopenharmony_ci	 */
3928c2ecf20Sopenharmony_ci	if (atomic_add_unless(&sh->count, -1, 1))
3938c2ecf20Sopenharmony_ci		return;
3948c2ecf20Sopenharmony_ci
3958c2ecf20Sopenharmony_ci	if (unlikely(!conf->mddev->thread) ||
3968c2ecf20Sopenharmony_ci		test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
3978c2ecf20Sopenharmony_ci		goto slow_path;
3988c2ecf20Sopenharmony_ci	wakeup = llist_add(&sh->release_list, &conf->released_stripes);
3998c2ecf20Sopenharmony_ci	if (wakeup)
4008c2ecf20Sopenharmony_ci		md_wakeup_thread(conf->mddev->thread);
4018c2ecf20Sopenharmony_ci	return;
4028c2ecf20Sopenharmony_cislow_path:
4038c2ecf20Sopenharmony_ci	/* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
4048c2ecf20Sopenharmony_ci	if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) {
4058c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&list);
4068c2ecf20Sopenharmony_ci		hash = sh->hash_lock_index;
4078c2ecf20Sopenharmony_ci		do_release_stripe(conf, sh, &list);
4088c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&conf->device_lock, flags);
4098c2ecf20Sopenharmony_ci		release_inactive_stripe_list(conf, &list, hash);
4108c2ecf20Sopenharmony_ci	}
4118c2ecf20Sopenharmony_ci}
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_cistatic inline void remove_hash(struct stripe_head *sh)
4148c2ecf20Sopenharmony_ci{
4158c2ecf20Sopenharmony_ci	pr_debug("remove_hash(), stripe %llu\n",
4168c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_ci	hlist_del_init(&sh->hash);
4198c2ecf20Sopenharmony_ci}
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_cistatic inline void insert_hash(struct r5conf *conf, struct stripe_head *sh)
4228c2ecf20Sopenharmony_ci{
4238c2ecf20Sopenharmony_ci	struct hlist_head *hp = stripe_hash(conf, sh->sector);
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	pr_debug("insert_hash(), stripe %llu\n",
4268c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	hlist_add_head(&sh->hash, hp);
4298c2ecf20Sopenharmony_ci}
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci/* find an idle stripe, make sure it is unhashed, and return it. */
4328c2ecf20Sopenharmony_cistatic struct stripe_head *get_free_stripe(struct r5conf *conf, int hash)
4338c2ecf20Sopenharmony_ci{
4348c2ecf20Sopenharmony_ci	struct stripe_head *sh = NULL;
4358c2ecf20Sopenharmony_ci	struct list_head *first;
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci	if (list_empty(conf->inactive_list + hash))
4388c2ecf20Sopenharmony_ci		goto out;
4398c2ecf20Sopenharmony_ci	first = (conf->inactive_list + hash)->next;
4408c2ecf20Sopenharmony_ci	sh = list_entry(first, struct stripe_head, lru);
4418c2ecf20Sopenharmony_ci	list_del_init(first);
4428c2ecf20Sopenharmony_ci	remove_hash(sh);
4438c2ecf20Sopenharmony_ci	atomic_inc(&conf->active_stripes);
4448c2ecf20Sopenharmony_ci	BUG_ON(hash != sh->hash_lock_index);
4458c2ecf20Sopenharmony_ci	if (list_empty(conf->inactive_list + hash))
4468c2ecf20Sopenharmony_ci		atomic_inc(&conf->empty_inactive_list_nr);
4478c2ecf20Sopenharmony_ciout:
4488c2ecf20Sopenharmony_ci	return sh;
4498c2ecf20Sopenharmony_ci}
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
4528c2ecf20Sopenharmony_cistatic void free_stripe_pages(struct stripe_head *sh)
4538c2ecf20Sopenharmony_ci{
4548c2ecf20Sopenharmony_ci	int i;
4558c2ecf20Sopenharmony_ci	struct page *p;
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci	/* Have not allocate page pool */
4588c2ecf20Sopenharmony_ci	if (!sh->pages)
4598c2ecf20Sopenharmony_ci		return;
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci	for (i = 0; i < sh->nr_pages; i++) {
4628c2ecf20Sopenharmony_ci		p = sh->pages[i];
4638c2ecf20Sopenharmony_ci		if (p)
4648c2ecf20Sopenharmony_ci			put_page(p);
4658c2ecf20Sopenharmony_ci		sh->pages[i] = NULL;
4668c2ecf20Sopenharmony_ci	}
4678c2ecf20Sopenharmony_ci}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_cistatic int alloc_stripe_pages(struct stripe_head *sh, gfp_t gfp)
4708c2ecf20Sopenharmony_ci{
4718c2ecf20Sopenharmony_ci	int i;
4728c2ecf20Sopenharmony_ci	struct page *p;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	for (i = 0; i < sh->nr_pages; i++) {
4758c2ecf20Sopenharmony_ci		/* The page have allocated. */
4768c2ecf20Sopenharmony_ci		if (sh->pages[i])
4778c2ecf20Sopenharmony_ci			continue;
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci		p = alloc_page(gfp);
4808c2ecf20Sopenharmony_ci		if (!p) {
4818c2ecf20Sopenharmony_ci			free_stripe_pages(sh);
4828c2ecf20Sopenharmony_ci			return -ENOMEM;
4838c2ecf20Sopenharmony_ci		}
4848c2ecf20Sopenharmony_ci		sh->pages[i] = p;
4858c2ecf20Sopenharmony_ci	}
4868c2ecf20Sopenharmony_ci	return 0;
4878c2ecf20Sopenharmony_ci}
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_cistatic int
4908c2ecf20Sopenharmony_ciinit_stripe_shared_pages(struct stripe_head *sh, struct r5conf *conf, int disks)
4918c2ecf20Sopenharmony_ci{
4928c2ecf20Sopenharmony_ci	int nr_pages, cnt;
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_ci	if (sh->pages)
4958c2ecf20Sopenharmony_ci		return 0;
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci	/* Each of the sh->dev[i] need one conf->stripe_size */
4988c2ecf20Sopenharmony_ci	cnt = PAGE_SIZE / conf->stripe_size;
4998c2ecf20Sopenharmony_ci	nr_pages = (disks + cnt - 1) / cnt;
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_ci	sh->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
5028c2ecf20Sopenharmony_ci	if (!sh->pages)
5038c2ecf20Sopenharmony_ci		return -ENOMEM;
5048c2ecf20Sopenharmony_ci	sh->nr_pages = nr_pages;
5058c2ecf20Sopenharmony_ci	sh->stripes_per_page = cnt;
5068c2ecf20Sopenharmony_ci	return 0;
5078c2ecf20Sopenharmony_ci}
5088c2ecf20Sopenharmony_ci#endif
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_cistatic void shrink_buffers(struct stripe_head *sh)
5118c2ecf20Sopenharmony_ci{
5128c2ecf20Sopenharmony_ci	int i;
5138c2ecf20Sopenharmony_ci	int num = sh->raid_conf->pool_size;
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci#if PAGE_SIZE == DEFAULT_STRIPE_SIZE
5168c2ecf20Sopenharmony_ci	for (i = 0; i < num ; i++) {
5178c2ecf20Sopenharmony_ci		struct page *p;
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_ci		WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
5208c2ecf20Sopenharmony_ci		p = sh->dev[i].page;
5218c2ecf20Sopenharmony_ci		if (!p)
5228c2ecf20Sopenharmony_ci			continue;
5238c2ecf20Sopenharmony_ci		sh->dev[i].page = NULL;
5248c2ecf20Sopenharmony_ci		put_page(p);
5258c2ecf20Sopenharmony_ci	}
5268c2ecf20Sopenharmony_ci#else
5278c2ecf20Sopenharmony_ci	for (i = 0; i < num; i++)
5288c2ecf20Sopenharmony_ci		sh->dev[i].page = NULL;
5298c2ecf20Sopenharmony_ci	free_stripe_pages(sh); /* Free pages */
5308c2ecf20Sopenharmony_ci#endif
5318c2ecf20Sopenharmony_ci}
5328c2ecf20Sopenharmony_ci
5338c2ecf20Sopenharmony_cistatic int grow_buffers(struct stripe_head *sh, gfp_t gfp)
5348c2ecf20Sopenharmony_ci{
5358c2ecf20Sopenharmony_ci	int i;
5368c2ecf20Sopenharmony_ci	int num = sh->raid_conf->pool_size;
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci#if PAGE_SIZE == DEFAULT_STRIPE_SIZE
5398c2ecf20Sopenharmony_ci	for (i = 0; i < num; i++) {
5408c2ecf20Sopenharmony_ci		struct page *page;
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_ci		if (!(page = alloc_page(gfp))) {
5438c2ecf20Sopenharmony_ci			return 1;
5448c2ecf20Sopenharmony_ci		}
5458c2ecf20Sopenharmony_ci		sh->dev[i].page = page;
5468c2ecf20Sopenharmony_ci		sh->dev[i].orig_page = page;
5478c2ecf20Sopenharmony_ci		sh->dev[i].offset = 0;
5488c2ecf20Sopenharmony_ci	}
5498c2ecf20Sopenharmony_ci#else
5508c2ecf20Sopenharmony_ci	if (alloc_stripe_pages(sh, gfp))
5518c2ecf20Sopenharmony_ci		return -ENOMEM;
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci	for (i = 0; i < num; i++) {
5548c2ecf20Sopenharmony_ci		sh->dev[i].page = raid5_get_dev_page(sh, i);
5558c2ecf20Sopenharmony_ci		sh->dev[i].orig_page = sh->dev[i].page;
5568c2ecf20Sopenharmony_ci		sh->dev[i].offset = raid5_get_page_offset(sh, i);
5578c2ecf20Sopenharmony_ci	}
5588c2ecf20Sopenharmony_ci#endif
5598c2ecf20Sopenharmony_ci	return 0;
5608c2ecf20Sopenharmony_ci}
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_cistatic void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
5638c2ecf20Sopenharmony_ci			    struct stripe_head *sh);
5648c2ecf20Sopenharmony_ci
5658c2ecf20Sopenharmony_cistatic void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
5668c2ecf20Sopenharmony_ci{
5678c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
5688c2ecf20Sopenharmony_ci	int i, seq;
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci	BUG_ON(atomic_read(&sh->count) != 0);
5718c2ecf20Sopenharmony_ci	BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
5728c2ecf20Sopenharmony_ci	BUG_ON(stripe_operations_active(sh));
5738c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci	pr_debug("init_stripe called, stripe %llu\n",
5768c2ecf20Sopenharmony_ci		(unsigned long long)sector);
5778c2ecf20Sopenharmony_ciretry:
5788c2ecf20Sopenharmony_ci	seq = read_seqcount_begin(&conf->gen_lock);
5798c2ecf20Sopenharmony_ci	sh->generation = conf->generation - previous;
5808c2ecf20Sopenharmony_ci	sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
5818c2ecf20Sopenharmony_ci	sh->sector = sector;
5828c2ecf20Sopenharmony_ci	stripe_set_idx(sector, conf, previous, sh);
5838c2ecf20Sopenharmony_ci	sh->state = 0;
5848c2ecf20Sopenharmony_ci
5858c2ecf20Sopenharmony_ci	for (i = sh->disks; i--; ) {
5868c2ecf20Sopenharmony_ci		struct r5dev *dev = &sh->dev[i];
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_ci		if (dev->toread || dev->read || dev->towrite || dev->written ||
5898c2ecf20Sopenharmony_ci		    test_bit(R5_LOCKED, &dev->flags)) {
5908c2ecf20Sopenharmony_ci			pr_err("sector=%llx i=%d %p %p %p %p %d\n",
5918c2ecf20Sopenharmony_ci			       (unsigned long long)sh->sector, i, dev->toread,
5928c2ecf20Sopenharmony_ci			       dev->read, dev->towrite, dev->written,
5938c2ecf20Sopenharmony_ci			       test_bit(R5_LOCKED, &dev->flags));
5948c2ecf20Sopenharmony_ci			WARN_ON(1);
5958c2ecf20Sopenharmony_ci		}
5968c2ecf20Sopenharmony_ci		dev->flags = 0;
5978c2ecf20Sopenharmony_ci		dev->sector = raid5_compute_blocknr(sh, i, previous);
5988c2ecf20Sopenharmony_ci	}
5998c2ecf20Sopenharmony_ci	if (read_seqcount_retry(&conf->gen_lock, seq))
6008c2ecf20Sopenharmony_ci		goto retry;
6018c2ecf20Sopenharmony_ci	sh->overwrite_disks = 0;
6028c2ecf20Sopenharmony_ci	insert_hash(conf, sh);
6038c2ecf20Sopenharmony_ci	sh->cpu = smp_processor_id();
6048c2ecf20Sopenharmony_ci	set_bit(STRIPE_BATCH_READY, &sh->state);
6058c2ecf20Sopenharmony_ci}
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_cistatic struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
6088c2ecf20Sopenharmony_ci					 short generation)
6098c2ecf20Sopenharmony_ci{
6108c2ecf20Sopenharmony_ci	struct stripe_head *sh;
6118c2ecf20Sopenharmony_ci
6128c2ecf20Sopenharmony_ci	pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
6138c2ecf20Sopenharmony_ci	hlist_for_each_entry(sh, stripe_hash(conf, sector), hash)
6148c2ecf20Sopenharmony_ci		if (sh->sector == sector && sh->generation == generation)
6158c2ecf20Sopenharmony_ci			return sh;
6168c2ecf20Sopenharmony_ci	pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
6178c2ecf20Sopenharmony_ci	return NULL;
6188c2ecf20Sopenharmony_ci}
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci/*
6218c2ecf20Sopenharmony_ci * Need to check if array has failed when deciding whether to:
6228c2ecf20Sopenharmony_ci *  - start an array
6238c2ecf20Sopenharmony_ci *  - remove non-faulty devices
6248c2ecf20Sopenharmony_ci *  - add a spare
6258c2ecf20Sopenharmony_ci *  - allow a reshape
6268c2ecf20Sopenharmony_ci * This determination is simple when no reshape is happening.
6278c2ecf20Sopenharmony_ci * However if there is a reshape, we need to carefully check
6288c2ecf20Sopenharmony_ci * both the before and after sections.
6298c2ecf20Sopenharmony_ci * This is because some failed devices may only affect one
6308c2ecf20Sopenharmony_ci * of the two sections, and some non-in_sync devices may
6318c2ecf20Sopenharmony_ci * be insync in the section most affected by failed devices.
6328c2ecf20Sopenharmony_ci */
6338c2ecf20Sopenharmony_ciint raid5_calc_degraded(struct r5conf *conf)
6348c2ecf20Sopenharmony_ci{
6358c2ecf20Sopenharmony_ci	int degraded, degraded2;
6368c2ecf20Sopenharmony_ci	int i;
6378c2ecf20Sopenharmony_ci
6388c2ecf20Sopenharmony_ci	rcu_read_lock();
6398c2ecf20Sopenharmony_ci	degraded = 0;
6408c2ecf20Sopenharmony_ci	for (i = 0; i < conf->previous_raid_disks; i++) {
6418c2ecf20Sopenharmony_ci		struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
6428c2ecf20Sopenharmony_ci		if (rdev && test_bit(Faulty, &rdev->flags))
6438c2ecf20Sopenharmony_ci			rdev = rcu_dereference(conf->disks[i].replacement);
6448c2ecf20Sopenharmony_ci		if (!rdev || test_bit(Faulty, &rdev->flags))
6458c2ecf20Sopenharmony_ci			degraded++;
6468c2ecf20Sopenharmony_ci		else if (test_bit(In_sync, &rdev->flags))
6478c2ecf20Sopenharmony_ci			;
6488c2ecf20Sopenharmony_ci		else
6498c2ecf20Sopenharmony_ci			/* not in-sync or faulty.
6508c2ecf20Sopenharmony_ci			 * If the reshape increases the number of devices,
6518c2ecf20Sopenharmony_ci			 * this is being recovered by the reshape, so
6528c2ecf20Sopenharmony_ci			 * this 'previous' section is not in_sync.
6538c2ecf20Sopenharmony_ci			 * If the number of devices is being reduced however,
6548c2ecf20Sopenharmony_ci			 * the device can only be part of the array if
6558c2ecf20Sopenharmony_ci			 * we are reverting a reshape, so this section will
6568c2ecf20Sopenharmony_ci			 * be in-sync.
6578c2ecf20Sopenharmony_ci			 */
6588c2ecf20Sopenharmony_ci			if (conf->raid_disks >= conf->previous_raid_disks)
6598c2ecf20Sopenharmony_ci				degraded++;
6608c2ecf20Sopenharmony_ci	}
6618c2ecf20Sopenharmony_ci	rcu_read_unlock();
6628c2ecf20Sopenharmony_ci	if (conf->raid_disks == conf->previous_raid_disks)
6638c2ecf20Sopenharmony_ci		return degraded;
6648c2ecf20Sopenharmony_ci	rcu_read_lock();
6658c2ecf20Sopenharmony_ci	degraded2 = 0;
6668c2ecf20Sopenharmony_ci	for (i = 0; i < conf->raid_disks; i++) {
6678c2ecf20Sopenharmony_ci		struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
6688c2ecf20Sopenharmony_ci		if (rdev && test_bit(Faulty, &rdev->flags))
6698c2ecf20Sopenharmony_ci			rdev = rcu_dereference(conf->disks[i].replacement);
6708c2ecf20Sopenharmony_ci		if (!rdev || test_bit(Faulty, &rdev->flags))
6718c2ecf20Sopenharmony_ci			degraded2++;
6728c2ecf20Sopenharmony_ci		else if (test_bit(In_sync, &rdev->flags))
6738c2ecf20Sopenharmony_ci			;
6748c2ecf20Sopenharmony_ci		else
6758c2ecf20Sopenharmony_ci			/* not in-sync or faulty.
6768c2ecf20Sopenharmony_ci			 * If reshape increases the number of devices, this
6778c2ecf20Sopenharmony_ci			 * section has already been recovered, else it
6788c2ecf20Sopenharmony_ci			 * almost certainly hasn't.
6798c2ecf20Sopenharmony_ci			 */
6808c2ecf20Sopenharmony_ci			if (conf->raid_disks <= conf->previous_raid_disks)
6818c2ecf20Sopenharmony_ci				degraded2++;
6828c2ecf20Sopenharmony_ci	}
6838c2ecf20Sopenharmony_ci	rcu_read_unlock();
6848c2ecf20Sopenharmony_ci	if (degraded2 > degraded)
6858c2ecf20Sopenharmony_ci		return degraded2;
6868c2ecf20Sopenharmony_ci	return degraded;
6878c2ecf20Sopenharmony_ci}
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_cistatic bool has_failed(struct r5conf *conf)
6908c2ecf20Sopenharmony_ci{
6918c2ecf20Sopenharmony_ci	int degraded = conf->mddev->degraded;
6928c2ecf20Sopenharmony_ci
6938c2ecf20Sopenharmony_ci	if (test_bit(MD_BROKEN, &conf->mddev->flags))
6948c2ecf20Sopenharmony_ci		return true;
6958c2ecf20Sopenharmony_ci
6968c2ecf20Sopenharmony_ci	if (conf->mddev->reshape_position != MaxSector)
6978c2ecf20Sopenharmony_ci		degraded = raid5_calc_degraded(conf);
6988c2ecf20Sopenharmony_ci
6998c2ecf20Sopenharmony_ci	return degraded > conf->max_degraded;
7008c2ecf20Sopenharmony_ci}
7018c2ecf20Sopenharmony_ci
7028c2ecf20Sopenharmony_cistruct stripe_head *
7038c2ecf20Sopenharmony_ciraid5_get_active_stripe(struct r5conf *conf, sector_t sector,
7048c2ecf20Sopenharmony_ci			int previous, int noblock, int noquiesce)
7058c2ecf20Sopenharmony_ci{
7068c2ecf20Sopenharmony_ci	struct stripe_head *sh;
7078c2ecf20Sopenharmony_ci	int hash = stripe_hash_locks_hash(conf, sector);
7088c2ecf20Sopenharmony_ci	int inc_empty_inactive_list_flag;
7098c2ecf20Sopenharmony_ci
7108c2ecf20Sopenharmony_ci	pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	spin_lock_irq(conf->hash_locks + hash);
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_ci	do {
7158c2ecf20Sopenharmony_ci		wait_event_lock_irq(conf->wait_for_quiescent,
7168c2ecf20Sopenharmony_ci				    conf->quiesce == 0 || noquiesce,
7178c2ecf20Sopenharmony_ci				    *(conf->hash_locks + hash));
7188c2ecf20Sopenharmony_ci		sh = __find_stripe(conf, sector, conf->generation - previous);
7198c2ecf20Sopenharmony_ci		if (!sh) {
7208c2ecf20Sopenharmony_ci			if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
7218c2ecf20Sopenharmony_ci				sh = get_free_stripe(conf, hash);
7228c2ecf20Sopenharmony_ci				if (!sh && !test_bit(R5_DID_ALLOC,
7238c2ecf20Sopenharmony_ci						     &conf->cache_state))
7248c2ecf20Sopenharmony_ci					set_bit(R5_ALLOC_MORE,
7258c2ecf20Sopenharmony_ci						&conf->cache_state);
7268c2ecf20Sopenharmony_ci			}
7278c2ecf20Sopenharmony_ci			if (noblock && sh == NULL)
7288c2ecf20Sopenharmony_ci				break;
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_ci			r5c_check_stripe_cache_usage(conf);
7318c2ecf20Sopenharmony_ci			if (!sh) {
7328c2ecf20Sopenharmony_ci				set_bit(R5_INACTIVE_BLOCKED,
7338c2ecf20Sopenharmony_ci					&conf->cache_state);
7348c2ecf20Sopenharmony_ci				r5l_wake_reclaim(conf->log, 0);
7358c2ecf20Sopenharmony_ci				wait_event_lock_irq(
7368c2ecf20Sopenharmony_ci					conf->wait_for_stripe,
7378c2ecf20Sopenharmony_ci					!list_empty(conf->inactive_list + hash) &&
7388c2ecf20Sopenharmony_ci					(atomic_read(&conf->active_stripes)
7398c2ecf20Sopenharmony_ci					 < (conf->max_nr_stripes * 3 / 4)
7408c2ecf20Sopenharmony_ci					 || !test_bit(R5_INACTIVE_BLOCKED,
7418c2ecf20Sopenharmony_ci						      &conf->cache_state)),
7428c2ecf20Sopenharmony_ci					*(conf->hash_locks + hash));
7438c2ecf20Sopenharmony_ci				clear_bit(R5_INACTIVE_BLOCKED,
7448c2ecf20Sopenharmony_ci					  &conf->cache_state);
7458c2ecf20Sopenharmony_ci			} else {
7468c2ecf20Sopenharmony_ci				init_stripe(sh, sector, previous);
7478c2ecf20Sopenharmony_ci				atomic_inc(&sh->count);
7488c2ecf20Sopenharmony_ci			}
7498c2ecf20Sopenharmony_ci		} else if (!atomic_inc_not_zero(&sh->count)) {
7508c2ecf20Sopenharmony_ci			spin_lock(&conf->device_lock);
7518c2ecf20Sopenharmony_ci			if (!atomic_read(&sh->count)) {
7528c2ecf20Sopenharmony_ci				if (!test_bit(STRIPE_HANDLE, &sh->state))
7538c2ecf20Sopenharmony_ci					atomic_inc(&conf->active_stripes);
7548c2ecf20Sopenharmony_ci				BUG_ON(list_empty(&sh->lru) &&
7558c2ecf20Sopenharmony_ci				       !test_bit(STRIPE_EXPANDING, &sh->state));
7568c2ecf20Sopenharmony_ci				inc_empty_inactive_list_flag = 0;
7578c2ecf20Sopenharmony_ci				if (!list_empty(conf->inactive_list + hash))
7588c2ecf20Sopenharmony_ci					inc_empty_inactive_list_flag = 1;
7598c2ecf20Sopenharmony_ci				list_del_init(&sh->lru);
7608c2ecf20Sopenharmony_ci				if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
7618c2ecf20Sopenharmony_ci					atomic_inc(&conf->empty_inactive_list_nr);
7628c2ecf20Sopenharmony_ci				if (sh->group) {
7638c2ecf20Sopenharmony_ci					sh->group->stripes_cnt--;
7648c2ecf20Sopenharmony_ci					sh->group = NULL;
7658c2ecf20Sopenharmony_ci				}
7668c2ecf20Sopenharmony_ci			}
7678c2ecf20Sopenharmony_ci			atomic_inc(&sh->count);
7688c2ecf20Sopenharmony_ci			spin_unlock(&conf->device_lock);
7698c2ecf20Sopenharmony_ci		}
7708c2ecf20Sopenharmony_ci	} while (sh == NULL);
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	spin_unlock_irq(conf->hash_locks + hash);
7738c2ecf20Sopenharmony_ci	return sh;
7748c2ecf20Sopenharmony_ci}
7758c2ecf20Sopenharmony_ci
7768c2ecf20Sopenharmony_cistatic bool is_full_stripe_write(struct stripe_head *sh)
7778c2ecf20Sopenharmony_ci{
7788c2ecf20Sopenharmony_ci	BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded));
7798c2ecf20Sopenharmony_ci	return sh->overwrite_disks == (sh->disks - sh->raid_conf->max_degraded);
7808c2ecf20Sopenharmony_ci}
7818c2ecf20Sopenharmony_ci
7828c2ecf20Sopenharmony_cistatic void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
7838c2ecf20Sopenharmony_ci		__acquires(&sh1->stripe_lock)
7848c2ecf20Sopenharmony_ci		__acquires(&sh2->stripe_lock)
7858c2ecf20Sopenharmony_ci{
7868c2ecf20Sopenharmony_ci	if (sh1 > sh2) {
7878c2ecf20Sopenharmony_ci		spin_lock_irq(&sh2->stripe_lock);
7888c2ecf20Sopenharmony_ci		spin_lock_nested(&sh1->stripe_lock, 1);
7898c2ecf20Sopenharmony_ci	} else {
7908c2ecf20Sopenharmony_ci		spin_lock_irq(&sh1->stripe_lock);
7918c2ecf20Sopenharmony_ci		spin_lock_nested(&sh2->stripe_lock, 1);
7928c2ecf20Sopenharmony_ci	}
7938c2ecf20Sopenharmony_ci}
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_cistatic void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
7968c2ecf20Sopenharmony_ci		__releases(&sh1->stripe_lock)
7978c2ecf20Sopenharmony_ci		__releases(&sh2->stripe_lock)
7988c2ecf20Sopenharmony_ci{
7998c2ecf20Sopenharmony_ci	spin_unlock(&sh1->stripe_lock);
8008c2ecf20Sopenharmony_ci	spin_unlock_irq(&sh2->stripe_lock);
8018c2ecf20Sopenharmony_ci}
8028c2ecf20Sopenharmony_ci
8038c2ecf20Sopenharmony_ci/* Only freshly new full stripe normal write stripe can be added to a batch list */
8048c2ecf20Sopenharmony_cistatic bool stripe_can_batch(struct stripe_head *sh)
8058c2ecf20Sopenharmony_ci{
8068c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
8078c2ecf20Sopenharmony_ci
8088c2ecf20Sopenharmony_ci	if (raid5_has_log(conf) || raid5_has_ppl(conf))
8098c2ecf20Sopenharmony_ci		return false;
8108c2ecf20Sopenharmony_ci	return test_bit(STRIPE_BATCH_READY, &sh->state) &&
8118c2ecf20Sopenharmony_ci		!test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
8128c2ecf20Sopenharmony_ci		is_full_stripe_write(sh);
8138c2ecf20Sopenharmony_ci}
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci/* we only do back search */
8168c2ecf20Sopenharmony_cistatic void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh)
8178c2ecf20Sopenharmony_ci{
8188c2ecf20Sopenharmony_ci	struct stripe_head *head;
8198c2ecf20Sopenharmony_ci	sector_t head_sector, tmp_sec;
8208c2ecf20Sopenharmony_ci	int hash;
8218c2ecf20Sopenharmony_ci	int dd_idx;
8228c2ecf20Sopenharmony_ci	int inc_empty_inactive_list_flag;
8238c2ecf20Sopenharmony_ci
8248c2ecf20Sopenharmony_ci	/* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
8258c2ecf20Sopenharmony_ci	tmp_sec = sh->sector;
8268c2ecf20Sopenharmony_ci	if (!sector_div(tmp_sec, conf->chunk_sectors))
8278c2ecf20Sopenharmony_ci		return;
8288c2ecf20Sopenharmony_ci	head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf);
8298c2ecf20Sopenharmony_ci
8308c2ecf20Sopenharmony_ci	hash = stripe_hash_locks_hash(conf, head_sector);
8318c2ecf20Sopenharmony_ci	spin_lock_irq(conf->hash_locks + hash);
8328c2ecf20Sopenharmony_ci	head = __find_stripe(conf, head_sector, conf->generation);
8338c2ecf20Sopenharmony_ci	if (head && !atomic_inc_not_zero(&head->count)) {
8348c2ecf20Sopenharmony_ci		spin_lock(&conf->device_lock);
8358c2ecf20Sopenharmony_ci		if (!atomic_read(&head->count)) {
8368c2ecf20Sopenharmony_ci			if (!test_bit(STRIPE_HANDLE, &head->state))
8378c2ecf20Sopenharmony_ci				atomic_inc(&conf->active_stripes);
8388c2ecf20Sopenharmony_ci			BUG_ON(list_empty(&head->lru) &&
8398c2ecf20Sopenharmony_ci			       !test_bit(STRIPE_EXPANDING, &head->state));
8408c2ecf20Sopenharmony_ci			inc_empty_inactive_list_flag = 0;
8418c2ecf20Sopenharmony_ci			if (!list_empty(conf->inactive_list + hash))
8428c2ecf20Sopenharmony_ci				inc_empty_inactive_list_flag = 1;
8438c2ecf20Sopenharmony_ci			list_del_init(&head->lru);
8448c2ecf20Sopenharmony_ci			if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
8458c2ecf20Sopenharmony_ci				atomic_inc(&conf->empty_inactive_list_nr);
8468c2ecf20Sopenharmony_ci			if (head->group) {
8478c2ecf20Sopenharmony_ci				head->group->stripes_cnt--;
8488c2ecf20Sopenharmony_ci				head->group = NULL;
8498c2ecf20Sopenharmony_ci			}
8508c2ecf20Sopenharmony_ci		}
8518c2ecf20Sopenharmony_ci		atomic_inc(&head->count);
8528c2ecf20Sopenharmony_ci		spin_unlock(&conf->device_lock);
8538c2ecf20Sopenharmony_ci	}
8548c2ecf20Sopenharmony_ci	spin_unlock_irq(conf->hash_locks + hash);
8558c2ecf20Sopenharmony_ci
8568c2ecf20Sopenharmony_ci	if (!head)
8578c2ecf20Sopenharmony_ci		return;
8588c2ecf20Sopenharmony_ci	if (!stripe_can_batch(head))
8598c2ecf20Sopenharmony_ci		goto out;
8608c2ecf20Sopenharmony_ci
8618c2ecf20Sopenharmony_ci	lock_two_stripes(head, sh);
8628c2ecf20Sopenharmony_ci	/* clear_batch_ready clear the flag */
8638c2ecf20Sopenharmony_ci	if (!stripe_can_batch(head) || !stripe_can_batch(sh))
8648c2ecf20Sopenharmony_ci		goto unlock_out;
8658c2ecf20Sopenharmony_ci
8668c2ecf20Sopenharmony_ci	if (sh->batch_head)
8678c2ecf20Sopenharmony_ci		goto unlock_out;
8688c2ecf20Sopenharmony_ci
8698c2ecf20Sopenharmony_ci	dd_idx = 0;
8708c2ecf20Sopenharmony_ci	while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
8718c2ecf20Sopenharmony_ci		dd_idx++;
8728c2ecf20Sopenharmony_ci	if (head->dev[dd_idx].towrite->bi_opf != sh->dev[dd_idx].towrite->bi_opf ||
8738c2ecf20Sopenharmony_ci	    bio_op(head->dev[dd_idx].towrite) != bio_op(sh->dev[dd_idx].towrite))
8748c2ecf20Sopenharmony_ci		goto unlock_out;
8758c2ecf20Sopenharmony_ci
8768c2ecf20Sopenharmony_ci	if (head->batch_head) {
8778c2ecf20Sopenharmony_ci		spin_lock(&head->batch_head->batch_lock);
8788c2ecf20Sopenharmony_ci		/* This batch list is already running */
8798c2ecf20Sopenharmony_ci		if (!stripe_can_batch(head)) {
8808c2ecf20Sopenharmony_ci			spin_unlock(&head->batch_head->batch_lock);
8818c2ecf20Sopenharmony_ci			goto unlock_out;
8828c2ecf20Sopenharmony_ci		}
8838c2ecf20Sopenharmony_ci		/*
8848c2ecf20Sopenharmony_ci		 * We must assign batch_head of this stripe within the
8858c2ecf20Sopenharmony_ci		 * batch_lock, otherwise clear_batch_ready of batch head
8868c2ecf20Sopenharmony_ci		 * stripe could clear BATCH_READY bit of this stripe and
8878c2ecf20Sopenharmony_ci		 * this stripe->batch_head doesn't get assigned, which
8888c2ecf20Sopenharmony_ci		 * could confuse clear_batch_ready for this stripe
8898c2ecf20Sopenharmony_ci		 */
8908c2ecf20Sopenharmony_ci		sh->batch_head = head->batch_head;
8918c2ecf20Sopenharmony_ci
8928c2ecf20Sopenharmony_ci		/*
8938c2ecf20Sopenharmony_ci		 * at this point, head's BATCH_READY could be cleared, but we
8948c2ecf20Sopenharmony_ci		 * can still add the stripe to batch list
8958c2ecf20Sopenharmony_ci		 */
8968c2ecf20Sopenharmony_ci		list_add(&sh->batch_list, &head->batch_list);
8978c2ecf20Sopenharmony_ci		spin_unlock(&head->batch_head->batch_lock);
8988c2ecf20Sopenharmony_ci	} else {
8998c2ecf20Sopenharmony_ci		head->batch_head = head;
9008c2ecf20Sopenharmony_ci		sh->batch_head = head->batch_head;
9018c2ecf20Sopenharmony_ci		spin_lock(&head->batch_lock);
9028c2ecf20Sopenharmony_ci		list_add_tail(&sh->batch_list, &head->batch_list);
9038c2ecf20Sopenharmony_ci		spin_unlock(&head->batch_lock);
9048c2ecf20Sopenharmony_ci	}
9058c2ecf20Sopenharmony_ci
9068c2ecf20Sopenharmony_ci	if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
9078c2ecf20Sopenharmony_ci		if (atomic_dec_return(&conf->preread_active_stripes)
9088c2ecf20Sopenharmony_ci		    < IO_THRESHOLD)
9098c2ecf20Sopenharmony_ci			md_wakeup_thread(conf->mddev->thread);
9108c2ecf20Sopenharmony_ci
9118c2ecf20Sopenharmony_ci	if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) {
9128c2ecf20Sopenharmony_ci		int seq = sh->bm_seq;
9138c2ecf20Sopenharmony_ci		if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) &&
9148c2ecf20Sopenharmony_ci		    sh->batch_head->bm_seq > seq)
9158c2ecf20Sopenharmony_ci			seq = sh->batch_head->bm_seq;
9168c2ecf20Sopenharmony_ci		set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state);
9178c2ecf20Sopenharmony_ci		sh->batch_head->bm_seq = seq;
9188c2ecf20Sopenharmony_ci	}
9198c2ecf20Sopenharmony_ci
9208c2ecf20Sopenharmony_ci	atomic_inc(&sh->count);
9218c2ecf20Sopenharmony_ciunlock_out:
9228c2ecf20Sopenharmony_ci	unlock_two_stripes(head, sh);
9238c2ecf20Sopenharmony_ciout:
9248c2ecf20Sopenharmony_ci	raid5_release_stripe(head);
9258c2ecf20Sopenharmony_ci}
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_ci/* Determine if 'data_offset' or 'new_data_offset' should be used
9288c2ecf20Sopenharmony_ci * in this stripe_head.
9298c2ecf20Sopenharmony_ci */
9308c2ecf20Sopenharmony_cistatic int use_new_offset(struct r5conf *conf, struct stripe_head *sh)
9318c2ecf20Sopenharmony_ci{
9328c2ecf20Sopenharmony_ci	sector_t progress = conf->reshape_progress;
9338c2ecf20Sopenharmony_ci	/* Need a memory barrier to make sure we see the value
9348c2ecf20Sopenharmony_ci	 * of conf->generation, or ->data_offset that was set before
9358c2ecf20Sopenharmony_ci	 * reshape_progress was updated.
9368c2ecf20Sopenharmony_ci	 */
9378c2ecf20Sopenharmony_ci	smp_rmb();
9388c2ecf20Sopenharmony_ci	if (progress == MaxSector)
9398c2ecf20Sopenharmony_ci		return 0;
9408c2ecf20Sopenharmony_ci	if (sh->generation == conf->generation - 1)
9418c2ecf20Sopenharmony_ci		return 0;
9428c2ecf20Sopenharmony_ci	/* We are in a reshape, and this is a new-generation stripe,
9438c2ecf20Sopenharmony_ci	 * so use new_data_offset.
9448c2ecf20Sopenharmony_ci	 */
9458c2ecf20Sopenharmony_ci	return 1;
9468c2ecf20Sopenharmony_ci}
9478c2ecf20Sopenharmony_ci
9488c2ecf20Sopenharmony_cistatic void dispatch_bio_list(struct bio_list *tmp)
9498c2ecf20Sopenharmony_ci{
9508c2ecf20Sopenharmony_ci	struct bio *bio;
9518c2ecf20Sopenharmony_ci
9528c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(tmp)))
9538c2ecf20Sopenharmony_ci		submit_bio_noacct(bio);
9548c2ecf20Sopenharmony_ci}
9558c2ecf20Sopenharmony_ci
9568c2ecf20Sopenharmony_cistatic int cmp_stripe(void *priv, const struct list_head *a,
9578c2ecf20Sopenharmony_ci		      const struct list_head *b)
9588c2ecf20Sopenharmony_ci{
9598c2ecf20Sopenharmony_ci	const struct r5pending_data *da = list_entry(a,
9608c2ecf20Sopenharmony_ci				struct r5pending_data, sibling);
9618c2ecf20Sopenharmony_ci	const struct r5pending_data *db = list_entry(b,
9628c2ecf20Sopenharmony_ci				struct r5pending_data, sibling);
9638c2ecf20Sopenharmony_ci	if (da->sector > db->sector)
9648c2ecf20Sopenharmony_ci		return 1;
9658c2ecf20Sopenharmony_ci	if (da->sector < db->sector)
9668c2ecf20Sopenharmony_ci		return -1;
9678c2ecf20Sopenharmony_ci	return 0;
9688c2ecf20Sopenharmony_ci}
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_cistatic void dispatch_defer_bios(struct r5conf *conf, int target,
9718c2ecf20Sopenharmony_ci				struct bio_list *list)
9728c2ecf20Sopenharmony_ci{
9738c2ecf20Sopenharmony_ci	struct r5pending_data *data;
9748c2ecf20Sopenharmony_ci	struct list_head *first, *next = NULL;
9758c2ecf20Sopenharmony_ci	int cnt = 0;
9768c2ecf20Sopenharmony_ci
9778c2ecf20Sopenharmony_ci	if (conf->pending_data_cnt == 0)
9788c2ecf20Sopenharmony_ci		return;
9798c2ecf20Sopenharmony_ci
9808c2ecf20Sopenharmony_ci	list_sort(NULL, &conf->pending_list, cmp_stripe);
9818c2ecf20Sopenharmony_ci
9828c2ecf20Sopenharmony_ci	first = conf->pending_list.next;
9838c2ecf20Sopenharmony_ci
9848c2ecf20Sopenharmony_ci	/* temporarily move the head */
9858c2ecf20Sopenharmony_ci	if (conf->next_pending_data)
9868c2ecf20Sopenharmony_ci		list_move_tail(&conf->pending_list,
9878c2ecf20Sopenharmony_ci				&conf->next_pending_data->sibling);
9888c2ecf20Sopenharmony_ci
9898c2ecf20Sopenharmony_ci	while (!list_empty(&conf->pending_list)) {
9908c2ecf20Sopenharmony_ci		data = list_first_entry(&conf->pending_list,
9918c2ecf20Sopenharmony_ci			struct r5pending_data, sibling);
9928c2ecf20Sopenharmony_ci		if (&data->sibling == first)
9938c2ecf20Sopenharmony_ci			first = data->sibling.next;
9948c2ecf20Sopenharmony_ci		next = data->sibling.next;
9958c2ecf20Sopenharmony_ci
9968c2ecf20Sopenharmony_ci		bio_list_merge(list, &data->bios);
9978c2ecf20Sopenharmony_ci		list_move(&data->sibling, &conf->free_list);
9988c2ecf20Sopenharmony_ci		cnt++;
9998c2ecf20Sopenharmony_ci		if (cnt >= target)
10008c2ecf20Sopenharmony_ci			break;
10018c2ecf20Sopenharmony_ci	}
10028c2ecf20Sopenharmony_ci	conf->pending_data_cnt -= cnt;
10038c2ecf20Sopenharmony_ci	BUG_ON(conf->pending_data_cnt < 0 || cnt < target);
10048c2ecf20Sopenharmony_ci
10058c2ecf20Sopenharmony_ci	if (next != &conf->pending_list)
10068c2ecf20Sopenharmony_ci		conf->next_pending_data = list_entry(next,
10078c2ecf20Sopenharmony_ci				struct r5pending_data, sibling);
10088c2ecf20Sopenharmony_ci	else
10098c2ecf20Sopenharmony_ci		conf->next_pending_data = NULL;
10108c2ecf20Sopenharmony_ci	/* list isn't empty */
10118c2ecf20Sopenharmony_ci	if (first != &conf->pending_list)
10128c2ecf20Sopenharmony_ci		list_move_tail(&conf->pending_list, first);
10138c2ecf20Sopenharmony_ci}
10148c2ecf20Sopenharmony_ci
10158c2ecf20Sopenharmony_cistatic void flush_deferred_bios(struct r5conf *conf)
10168c2ecf20Sopenharmony_ci{
10178c2ecf20Sopenharmony_ci	struct bio_list tmp = BIO_EMPTY_LIST;
10188c2ecf20Sopenharmony_ci
10198c2ecf20Sopenharmony_ci	if (conf->pending_data_cnt == 0)
10208c2ecf20Sopenharmony_ci		return;
10218c2ecf20Sopenharmony_ci
10228c2ecf20Sopenharmony_ci	spin_lock(&conf->pending_bios_lock);
10238c2ecf20Sopenharmony_ci	dispatch_defer_bios(conf, conf->pending_data_cnt, &tmp);
10248c2ecf20Sopenharmony_ci	BUG_ON(conf->pending_data_cnt != 0);
10258c2ecf20Sopenharmony_ci	spin_unlock(&conf->pending_bios_lock);
10268c2ecf20Sopenharmony_ci
10278c2ecf20Sopenharmony_ci	dispatch_bio_list(&tmp);
10288c2ecf20Sopenharmony_ci}
10298c2ecf20Sopenharmony_ci
10308c2ecf20Sopenharmony_cistatic void defer_issue_bios(struct r5conf *conf, sector_t sector,
10318c2ecf20Sopenharmony_ci				struct bio_list *bios)
10328c2ecf20Sopenharmony_ci{
10338c2ecf20Sopenharmony_ci	struct bio_list tmp = BIO_EMPTY_LIST;
10348c2ecf20Sopenharmony_ci	struct r5pending_data *ent;
10358c2ecf20Sopenharmony_ci
10368c2ecf20Sopenharmony_ci	spin_lock(&conf->pending_bios_lock);
10378c2ecf20Sopenharmony_ci	ent = list_first_entry(&conf->free_list, struct r5pending_data,
10388c2ecf20Sopenharmony_ci							sibling);
10398c2ecf20Sopenharmony_ci	list_move_tail(&ent->sibling, &conf->pending_list);
10408c2ecf20Sopenharmony_ci	ent->sector = sector;
10418c2ecf20Sopenharmony_ci	bio_list_init(&ent->bios);
10428c2ecf20Sopenharmony_ci	bio_list_merge(&ent->bios, bios);
10438c2ecf20Sopenharmony_ci	conf->pending_data_cnt++;
10448c2ecf20Sopenharmony_ci	if (conf->pending_data_cnt >= PENDING_IO_MAX)
10458c2ecf20Sopenharmony_ci		dispatch_defer_bios(conf, PENDING_IO_ONE_FLUSH, &tmp);
10468c2ecf20Sopenharmony_ci
10478c2ecf20Sopenharmony_ci	spin_unlock(&conf->pending_bios_lock);
10488c2ecf20Sopenharmony_ci
10498c2ecf20Sopenharmony_ci	dispatch_bio_list(&tmp);
10508c2ecf20Sopenharmony_ci}
10518c2ecf20Sopenharmony_ci
10528c2ecf20Sopenharmony_cistatic void
10538c2ecf20Sopenharmony_ciraid5_end_read_request(struct bio *bi);
10548c2ecf20Sopenharmony_cistatic void
10558c2ecf20Sopenharmony_ciraid5_end_write_request(struct bio *bi);
10568c2ecf20Sopenharmony_ci
10578c2ecf20Sopenharmony_cistatic void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
10588c2ecf20Sopenharmony_ci{
10598c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
10608c2ecf20Sopenharmony_ci	int i, disks = sh->disks;
10618c2ecf20Sopenharmony_ci	struct stripe_head *head_sh = sh;
10628c2ecf20Sopenharmony_ci	struct bio_list pending_bios = BIO_EMPTY_LIST;
10638c2ecf20Sopenharmony_ci	bool should_defer;
10648c2ecf20Sopenharmony_ci
10658c2ecf20Sopenharmony_ci	might_sleep();
10668c2ecf20Sopenharmony_ci
10678c2ecf20Sopenharmony_ci	if (log_stripe(sh, s) == 0)
10688c2ecf20Sopenharmony_ci		return;
10698c2ecf20Sopenharmony_ci
10708c2ecf20Sopenharmony_ci	should_defer = conf->batch_bio_dispatch && conf->group_cnt;
10718c2ecf20Sopenharmony_ci
10728c2ecf20Sopenharmony_ci	for (i = disks; i--; ) {
10738c2ecf20Sopenharmony_ci		int op, op_flags = 0;
10748c2ecf20Sopenharmony_ci		int replace_only = 0;
10758c2ecf20Sopenharmony_ci		struct bio *bi, *rbi;
10768c2ecf20Sopenharmony_ci		struct md_rdev *rdev, *rrdev = NULL;
10778c2ecf20Sopenharmony_ci
10788c2ecf20Sopenharmony_ci		sh = head_sh;
10798c2ecf20Sopenharmony_ci		if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
10808c2ecf20Sopenharmony_ci			op = REQ_OP_WRITE;
10818c2ecf20Sopenharmony_ci			if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
10828c2ecf20Sopenharmony_ci				op_flags = REQ_FUA;
10838c2ecf20Sopenharmony_ci			if (test_bit(R5_Discard, &sh->dev[i].flags))
10848c2ecf20Sopenharmony_ci				op = REQ_OP_DISCARD;
10858c2ecf20Sopenharmony_ci		} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
10868c2ecf20Sopenharmony_ci			op = REQ_OP_READ;
10878c2ecf20Sopenharmony_ci		else if (test_and_clear_bit(R5_WantReplace,
10888c2ecf20Sopenharmony_ci					    &sh->dev[i].flags)) {
10898c2ecf20Sopenharmony_ci			op = REQ_OP_WRITE;
10908c2ecf20Sopenharmony_ci			replace_only = 1;
10918c2ecf20Sopenharmony_ci		} else
10928c2ecf20Sopenharmony_ci			continue;
10938c2ecf20Sopenharmony_ci		if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags))
10948c2ecf20Sopenharmony_ci			op_flags |= REQ_SYNC;
10958c2ecf20Sopenharmony_ci
10968c2ecf20Sopenharmony_ciagain:
10978c2ecf20Sopenharmony_ci		bi = &sh->dev[i].req;
10988c2ecf20Sopenharmony_ci		rbi = &sh->dev[i].rreq; /* For writing to replacement */
10998c2ecf20Sopenharmony_ci
11008c2ecf20Sopenharmony_ci		rcu_read_lock();
11018c2ecf20Sopenharmony_ci		rrdev = rcu_dereference(conf->disks[i].replacement);
11028c2ecf20Sopenharmony_ci		smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
11038c2ecf20Sopenharmony_ci		rdev = rcu_dereference(conf->disks[i].rdev);
11048c2ecf20Sopenharmony_ci		if (!rdev) {
11058c2ecf20Sopenharmony_ci			rdev = rrdev;
11068c2ecf20Sopenharmony_ci			rrdev = NULL;
11078c2ecf20Sopenharmony_ci		}
11088c2ecf20Sopenharmony_ci		if (op_is_write(op)) {
11098c2ecf20Sopenharmony_ci			if (replace_only)
11108c2ecf20Sopenharmony_ci				rdev = NULL;
11118c2ecf20Sopenharmony_ci			if (rdev == rrdev)
11128c2ecf20Sopenharmony_ci				/* We raced and saw duplicates */
11138c2ecf20Sopenharmony_ci				rrdev = NULL;
11148c2ecf20Sopenharmony_ci		} else {
11158c2ecf20Sopenharmony_ci			if (test_bit(R5_ReadRepl, &head_sh->dev[i].flags) && rrdev)
11168c2ecf20Sopenharmony_ci				rdev = rrdev;
11178c2ecf20Sopenharmony_ci			rrdev = NULL;
11188c2ecf20Sopenharmony_ci		}
11198c2ecf20Sopenharmony_ci
11208c2ecf20Sopenharmony_ci		if (rdev && test_bit(Faulty, &rdev->flags))
11218c2ecf20Sopenharmony_ci			rdev = NULL;
11228c2ecf20Sopenharmony_ci		if (rdev)
11238c2ecf20Sopenharmony_ci			atomic_inc(&rdev->nr_pending);
11248c2ecf20Sopenharmony_ci		if (rrdev && test_bit(Faulty, &rrdev->flags))
11258c2ecf20Sopenharmony_ci			rrdev = NULL;
11268c2ecf20Sopenharmony_ci		if (rrdev)
11278c2ecf20Sopenharmony_ci			atomic_inc(&rrdev->nr_pending);
11288c2ecf20Sopenharmony_ci		rcu_read_unlock();
11298c2ecf20Sopenharmony_ci
11308c2ecf20Sopenharmony_ci		/* We have already checked bad blocks for reads.  Now
11318c2ecf20Sopenharmony_ci		 * need to check for writes.  We never accept write errors
11328c2ecf20Sopenharmony_ci		 * on the replacement, so we don't to check rrdev.
11338c2ecf20Sopenharmony_ci		 */
11348c2ecf20Sopenharmony_ci		while (op_is_write(op) && rdev &&
11358c2ecf20Sopenharmony_ci		       test_bit(WriteErrorSeen, &rdev->flags)) {
11368c2ecf20Sopenharmony_ci			sector_t first_bad;
11378c2ecf20Sopenharmony_ci			int bad_sectors;
11388c2ecf20Sopenharmony_ci			int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
11398c2ecf20Sopenharmony_ci					      &first_bad, &bad_sectors);
11408c2ecf20Sopenharmony_ci			if (!bad)
11418c2ecf20Sopenharmony_ci				break;
11428c2ecf20Sopenharmony_ci
11438c2ecf20Sopenharmony_ci			if (bad < 0) {
11448c2ecf20Sopenharmony_ci				set_bit(BlockedBadBlocks, &rdev->flags);
11458c2ecf20Sopenharmony_ci				if (!conf->mddev->external &&
11468c2ecf20Sopenharmony_ci				    conf->mddev->sb_flags) {
11478c2ecf20Sopenharmony_ci					/* It is very unlikely, but we might
11488c2ecf20Sopenharmony_ci					 * still need to write out the
11498c2ecf20Sopenharmony_ci					 * bad block log - better give it
11508c2ecf20Sopenharmony_ci					 * a chance*/
11518c2ecf20Sopenharmony_ci					md_check_recovery(conf->mddev);
11528c2ecf20Sopenharmony_ci				}
11538c2ecf20Sopenharmony_ci				/*
11548c2ecf20Sopenharmony_ci				 * Because md_wait_for_blocked_rdev
11558c2ecf20Sopenharmony_ci				 * will dec nr_pending, we must
11568c2ecf20Sopenharmony_ci				 * increment it first.
11578c2ecf20Sopenharmony_ci				 */
11588c2ecf20Sopenharmony_ci				atomic_inc(&rdev->nr_pending);
11598c2ecf20Sopenharmony_ci				md_wait_for_blocked_rdev(rdev, conf->mddev);
11608c2ecf20Sopenharmony_ci			} else {
11618c2ecf20Sopenharmony_ci				/* Acknowledged bad block - skip the write */
11628c2ecf20Sopenharmony_ci				rdev_dec_pending(rdev, conf->mddev);
11638c2ecf20Sopenharmony_ci				rdev = NULL;
11648c2ecf20Sopenharmony_ci			}
11658c2ecf20Sopenharmony_ci		}
11668c2ecf20Sopenharmony_ci
11678c2ecf20Sopenharmony_ci		if (rdev) {
11688c2ecf20Sopenharmony_ci			if (s->syncing || s->expanding || s->expanded
11698c2ecf20Sopenharmony_ci			    || s->replacing)
11708c2ecf20Sopenharmony_ci				md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf));
11718c2ecf20Sopenharmony_ci
11728c2ecf20Sopenharmony_ci			set_bit(STRIPE_IO_STARTED, &sh->state);
11738c2ecf20Sopenharmony_ci
11748c2ecf20Sopenharmony_ci			bio_set_dev(bi, rdev->bdev);
11758c2ecf20Sopenharmony_ci			bio_set_op_attrs(bi, op, op_flags);
11768c2ecf20Sopenharmony_ci			bi->bi_end_io = op_is_write(op)
11778c2ecf20Sopenharmony_ci				? raid5_end_write_request
11788c2ecf20Sopenharmony_ci				: raid5_end_read_request;
11798c2ecf20Sopenharmony_ci			bi->bi_private = sh;
11808c2ecf20Sopenharmony_ci
11818c2ecf20Sopenharmony_ci			pr_debug("%s: for %llu schedule op %d on disc %d\n",
11828c2ecf20Sopenharmony_ci				__func__, (unsigned long long)sh->sector,
11838c2ecf20Sopenharmony_ci				bi->bi_opf, i);
11848c2ecf20Sopenharmony_ci			atomic_inc(&sh->count);
11858c2ecf20Sopenharmony_ci			if (sh != head_sh)
11868c2ecf20Sopenharmony_ci				atomic_inc(&head_sh->count);
11878c2ecf20Sopenharmony_ci			if (use_new_offset(conf, sh))
11888c2ecf20Sopenharmony_ci				bi->bi_iter.bi_sector = (sh->sector
11898c2ecf20Sopenharmony_ci						 + rdev->new_data_offset);
11908c2ecf20Sopenharmony_ci			else
11918c2ecf20Sopenharmony_ci				bi->bi_iter.bi_sector = (sh->sector
11928c2ecf20Sopenharmony_ci						 + rdev->data_offset);
11938c2ecf20Sopenharmony_ci			if (test_bit(R5_ReadNoMerge, &head_sh->dev[i].flags))
11948c2ecf20Sopenharmony_ci				bi->bi_opf |= REQ_NOMERGE;
11958c2ecf20Sopenharmony_ci
11968c2ecf20Sopenharmony_ci			if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
11978c2ecf20Sopenharmony_ci				WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
11988c2ecf20Sopenharmony_ci
11998c2ecf20Sopenharmony_ci			if (!op_is_write(op) &&
12008c2ecf20Sopenharmony_ci			    test_bit(R5_InJournal, &sh->dev[i].flags))
12018c2ecf20Sopenharmony_ci				/*
12028c2ecf20Sopenharmony_ci				 * issuing read for a page in journal, this
12038c2ecf20Sopenharmony_ci				 * must be preparing for prexor in rmw; read
12048c2ecf20Sopenharmony_ci				 * the data into orig_page
12058c2ecf20Sopenharmony_ci				 */
12068c2ecf20Sopenharmony_ci				sh->dev[i].vec.bv_page = sh->dev[i].orig_page;
12078c2ecf20Sopenharmony_ci			else
12088c2ecf20Sopenharmony_ci				sh->dev[i].vec.bv_page = sh->dev[i].page;
12098c2ecf20Sopenharmony_ci			bi->bi_vcnt = 1;
12108c2ecf20Sopenharmony_ci			bi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf);
12118c2ecf20Sopenharmony_ci			bi->bi_io_vec[0].bv_offset = sh->dev[i].offset;
12128c2ecf20Sopenharmony_ci			bi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf);
12138c2ecf20Sopenharmony_ci			bi->bi_write_hint = sh->dev[i].write_hint;
12148c2ecf20Sopenharmony_ci			if (!rrdev)
12158c2ecf20Sopenharmony_ci				sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
12168c2ecf20Sopenharmony_ci			/*
12178c2ecf20Sopenharmony_ci			 * If this is discard request, set bi_vcnt 0. We don't
12188c2ecf20Sopenharmony_ci			 * want to confuse SCSI because SCSI will replace payload
12198c2ecf20Sopenharmony_ci			 */
12208c2ecf20Sopenharmony_ci			if (op == REQ_OP_DISCARD)
12218c2ecf20Sopenharmony_ci				bi->bi_vcnt = 0;
12228c2ecf20Sopenharmony_ci			if (rrdev)
12238c2ecf20Sopenharmony_ci				set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
12248c2ecf20Sopenharmony_ci
12258c2ecf20Sopenharmony_ci			if (conf->mddev->gendisk)
12268c2ecf20Sopenharmony_ci				trace_block_bio_remap(bi->bi_disk->queue,
12278c2ecf20Sopenharmony_ci						      bi, disk_devt(conf->mddev->gendisk),
12288c2ecf20Sopenharmony_ci						      sh->dev[i].sector);
12298c2ecf20Sopenharmony_ci			if (should_defer && op_is_write(op))
12308c2ecf20Sopenharmony_ci				bio_list_add(&pending_bios, bi);
12318c2ecf20Sopenharmony_ci			else
12328c2ecf20Sopenharmony_ci				submit_bio_noacct(bi);
12338c2ecf20Sopenharmony_ci		}
12348c2ecf20Sopenharmony_ci		if (rrdev) {
12358c2ecf20Sopenharmony_ci			if (s->syncing || s->expanding || s->expanded
12368c2ecf20Sopenharmony_ci			    || s->replacing)
12378c2ecf20Sopenharmony_ci				md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf));
12388c2ecf20Sopenharmony_ci
12398c2ecf20Sopenharmony_ci			set_bit(STRIPE_IO_STARTED, &sh->state);
12408c2ecf20Sopenharmony_ci
12418c2ecf20Sopenharmony_ci			bio_set_dev(rbi, rrdev->bdev);
12428c2ecf20Sopenharmony_ci			bio_set_op_attrs(rbi, op, op_flags);
12438c2ecf20Sopenharmony_ci			BUG_ON(!op_is_write(op));
12448c2ecf20Sopenharmony_ci			rbi->bi_end_io = raid5_end_write_request;
12458c2ecf20Sopenharmony_ci			rbi->bi_private = sh;
12468c2ecf20Sopenharmony_ci
12478c2ecf20Sopenharmony_ci			pr_debug("%s: for %llu schedule op %d on "
12488c2ecf20Sopenharmony_ci				 "replacement disc %d\n",
12498c2ecf20Sopenharmony_ci				__func__, (unsigned long long)sh->sector,
12508c2ecf20Sopenharmony_ci				rbi->bi_opf, i);
12518c2ecf20Sopenharmony_ci			atomic_inc(&sh->count);
12528c2ecf20Sopenharmony_ci			if (sh != head_sh)
12538c2ecf20Sopenharmony_ci				atomic_inc(&head_sh->count);
12548c2ecf20Sopenharmony_ci			if (use_new_offset(conf, sh))
12558c2ecf20Sopenharmony_ci				rbi->bi_iter.bi_sector = (sh->sector
12568c2ecf20Sopenharmony_ci						  + rrdev->new_data_offset);
12578c2ecf20Sopenharmony_ci			else
12588c2ecf20Sopenharmony_ci				rbi->bi_iter.bi_sector = (sh->sector
12598c2ecf20Sopenharmony_ci						  + rrdev->data_offset);
12608c2ecf20Sopenharmony_ci			if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
12618c2ecf20Sopenharmony_ci				WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
12628c2ecf20Sopenharmony_ci			sh->dev[i].rvec.bv_page = sh->dev[i].page;
12638c2ecf20Sopenharmony_ci			rbi->bi_vcnt = 1;
12648c2ecf20Sopenharmony_ci			rbi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf);
12658c2ecf20Sopenharmony_ci			rbi->bi_io_vec[0].bv_offset = sh->dev[i].offset;
12668c2ecf20Sopenharmony_ci			rbi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf);
12678c2ecf20Sopenharmony_ci			rbi->bi_write_hint = sh->dev[i].write_hint;
12688c2ecf20Sopenharmony_ci			sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
12698c2ecf20Sopenharmony_ci			/*
12708c2ecf20Sopenharmony_ci			 * If this is discard request, set bi_vcnt 0. We don't
12718c2ecf20Sopenharmony_ci			 * want to confuse SCSI because SCSI will replace payload
12728c2ecf20Sopenharmony_ci			 */
12738c2ecf20Sopenharmony_ci			if (op == REQ_OP_DISCARD)
12748c2ecf20Sopenharmony_ci				rbi->bi_vcnt = 0;
12758c2ecf20Sopenharmony_ci			if (conf->mddev->gendisk)
12768c2ecf20Sopenharmony_ci				trace_block_bio_remap(rbi->bi_disk->queue,
12778c2ecf20Sopenharmony_ci						      rbi, disk_devt(conf->mddev->gendisk),
12788c2ecf20Sopenharmony_ci						      sh->dev[i].sector);
12798c2ecf20Sopenharmony_ci			if (should_defer && op_is_write(op))
12808c2ecf20Sopenharmony_ci				bio_list_add(&pending_bios, rbi);
12818c2ecf20Sopenharmony_ci			else
12828c2ecf20Sopenharmony_ci				submit_bio_noacct(rbi);
12838c2ecf20Sopenharmony_ci		}
12848c2ecf20Sopenharmony_ci		if (!rdev && !rrdev) {
12858c2ecf20Sopenharmony_ci			if (op_is_write(op))
12868c2ecf20Sopenharmony_ci				set_bit(STRIPE_DEGRADED, &sh->state);
12878c2ecf20Sopenharmony_ci			pr_debug("skip op %d on disc %d for sector %llu\n",
12888c2ecf20Sopenharmony_ci				bi->bi_opf, i, (unsigned long long)sh->sector);
12898c2ecf20Sopenharmony_ci			clear_bit(R5_LOCKED, &sh->dev[i].flags);
12908c2ecf20Sopenharmony_ci			set_bit(STRIPE_HANDLE, &sh->state);
12918c2ecf20Sopenharmony_ci		}
12928c2ecf20Sopenharmony_ci
12938c2ecf20Sopenharmony_ci		if (!head_sh->batch_head)
12948c2ecf20Sopenharmony_ci			continue;
12958c2ecf20Sopenharmony_ci		sh = list_first_entry(&sh->batch_list, struct stripe_head,
12968c2ecf20Sopenharmony_ci				      batch_list);
12978c2ecf20Sopenharmony_ci		if (sh != head_sh)
12988c2ecf20Sopenharmony_ci			goto again;
12998c2ecf20Sopenharmony_ci	}
13008c2ecf20Sopenharmony_ci
13018c2ecf20Sopenharmony_ci	if (should_defer && !bio_list_empty(&pending_bios))
13028c2ecf20Sopenharmony_ci		defer_issue_bios(conf, head_sh->sector, &pending_bios);
13038c2ecf20Sopenharmony_ci}
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor *
13068c2ecf20Sopenharmony_ciasync_copy_data(int frombio, struct bio *bio, struct page **page,
13078c2ecf20Sopenharmony_ci	unsigned int poff, sector_t sector, struct dma_async_tx_descriptor *tx,
13088c2ecf20Sopenharmony_ci	struct stripe_head *sh, int no_skipcopy)
13098c2ecf20Sopenharmony_ci{
13108c2ecf20Sopenharmony_ci	struct bio_vec bvl;
13118c2ecf20Sopenharmony_ci	struct bvec_iter iter;
13128c2ecf20Sopenharmony_ci	struct page *bio_page;
13138c2ecf20Sopenharmony_ci	int page_offset;
13148c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
13158c2ecf20Sopenharmony_ci	enum async_tx_flags flags = 0;
13168c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
13178c2ecf20Sopenharmony_ci
13188c2ecf20Sopenharmony_ci	if (bio->bi_iter.bi_sector >= sector)
13198c2ecf20Sopenharmony_ci		page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512;
13208c2ecf20Sopenharmony_ci	else
13218c2ecf20Sopenharmony_ci		page_offset = (signed)(sector - bio->bi_iter.bi_sector) * -512;
13228c2ecf20Sopenharmony_ci
13238c2ecf20Sopenharmony_ci	if (frombio)
13248c2ecf20Sopenharmony_ci		flags |= ASYNC_TX_FENCE;
13258c2ecf20Sopenharmony_ci	init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
13268c2ecf20Sopenharmony_ci
13278c2ecf20Sopenharmony_ci	bio_for_each_segment(bvl, bio, iter) {
13288c2ecf20Sopenharmony_ci		int len = bvl.bv_len;
13298c2ecf20Sopenharmony_ci		int clen;
13308c2ecf20Sopenharmony_ci		int b_offset = 0;
13318c2ecf20Sopenharmony_ci
13328c2ecf20Sopenharmony_ci		if (page_offset < 0) {
13338c2ecf20Sopenharmony_ci			b_offset = -page_offset;
13348c2ecf20Sopenharmony_ci			page_offset += b_offset;
13358c2ecf20Sopenharmony_ci			len -= b_offset;
13368c2ecf20Sopenharmony_ci		}
13378c2ecf20Sopenharmony_ci
13388c2ecf20Sopenharmony_ci		if (len > 0 && page_offset + len > RAID5_STRIPE_SIZE(conf))
13398c2ecf20Sopenharmony_ci			clen = RAID5_STRIPE_SIZE(conf) - page_offset;
13408c2ecf20Sopenharmony_ci		else
13418c2ecf20Sopenharmony_ci			clen = len;
13428c2ecf20Sopenharmony_ci
13438c2ecf20Sopenharmony_ci		if (clen > 0) {
13448c2ecf20Sopenharmony_ci			b_offset += bvl.bv_offset;
13458c2ecf20Sopenharmony_ci			bio_page = bvl.bv_page;
13468c2ecf20Sopenharmony_ci			if (frombio) {
13478c2ecf20Sopenharmony_ci				if (conf->skip_copy &&
13488c2ecf20Sopenharmony_ci				    b_offset == 0 && page_offset == 0 &&
13498c2ecf20Sopenharmony_ci				    clen == RAID5_STRIPE_SIZE(conf) &&
13508c2ecf20Sopenharmony_ci				    !no_skipcopy)
13518c2ecf20Sopenharmony_ci					*page = bio_page;
13528c2ecf20Sopenharmony_ci				else
13538c2ecf20Sopenharmony_ci					tx = async_memcpy(*page, bio_page, page_offset + poff,
13548c2ecf20Sopenharmony_ci						  b_offset, clen, &submit);
13558c2ecf20Sopenharmony_ci			} else
13568c2ecf20Sopenharmony_ci				tx = async_memcpy(bio_page, *page, b_offset,
13578c2ecf20Sopenharmony_ci						  page_offset + poff, clen, &submit);
13588c2ecf20Sopenharmony_ci		}
13598c2ecf20Sopenharmony_ci		/* chain the operations */
13608c2ecf20Sopenharmony_ci		submit.depend_tx = tx;
13618c2ecf20Sopenharmony_ci
13628c2ecf20Sopenharmony_ci		if (clen < len) /* hit end of page */
13638c2ecf20Sopenharmony_ci			break;
13648c2ecf20Sopenharmony_ci		page_offset +=  len;
13658c2ecf20Sopenharmony_ci	}
13668c2ecf20Sopenharmony_ci
13678c2ecf20Sopenharmony_ci	return tx;
13688c2ecf20Sopenharmony_ci}
13698c2ecf20Sopenharmony_ci
13708c2ecf20Sopenharmony_cistatic void ops_complete_biofill(void *stripe_head_ref)
13718c2ecf20Sopenharmony_ci{
13728c2ecf20Sopenharmony_ci	struct stripe_head *sh = stripe_head_ref;
13738c2ecf20Sopenharmony_ci	int i;
13748c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
13758c2ecf20Sopenharmony_ci
13768c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__,
13778c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
13788c2ecf20Sopenharmony_ci
13798c2ecf20Sopenharmony_ci	/* clear completed biofills */
13808c2ecf20Sopenharmony_ci	for (i = sh->disks; i--; ) {
13818c2ecf20Sopenharmony_ci		struct r5dev *dev = &sh->dev[i];
13828c2ecf20Sopenharmony_ci
13838c2ecf20Sopenharmony_ci		/* acknowledge completion of a biofill operation */
13848c2ecf20Sopenharmony_ci		/* and check if we need to reply to a read request,
13858c2ecf20Sopenharmony_ci		 * new R5_Wantfill requests are held off until
13868c2ecf20Sopenharmony_ci		 * !STRIPE_BIOFILL_RUN
13878c2ecf20Sopenharmony_ci		 */
13888c2ecf20Sopenharmony_ci		if (test_and_clear_bit(R5_Wantfill, &dev->flags)) {
13898c2ecf20Sopenharmony_ci			struct bio *rbi, *rbi2;
13908c2ecf20Sopenharmony_ci
13918c2ecf20Sopenharmony_ci			BUG_ON(!dev->read);
13928c2ecf20Sopenharmony_ci			rbi = dev->read;
13938c2ecf20Sopenharmony_ci			dev->read = NULL;
13948c2ecf20Sopenharmony_ci			while (rbi && rbi->bi_iter.bi_sector <
13958c2ecf20Sopenharmony_ci				dev->sector + RAID5_STRIPE_SECTORS(conf)) {
13968c2ecf20Sopenharmony_ci				rbi2 = r5_next_bio(conf, rbi, dev->sector);
13978c2ecf20Sopenharmony_ci				bio_endio(rbi);
13988c2ecf20Sopenharmony_ci				rbi = rbi2;
13998c2ecf20Sopenharmony_ci			}
14008c2ecf20Sopenharmony_ci		}
14018c2ecf20Sopenharmony_ci	}
14028c2ecf20Sopenharmony_ci	clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
14038c2ecf20Sopenharmony_ci
14048c2ecf20Sopenharmony_ci	set_bit(STRIPE_HANDLE, &sh->state);
14058c2ecf20Sopenharmony_ci	raid5_release_stripe(sh);
14068c2ecf20Sopenharmony_ci}
14078c2ecf20Sopenharmony_ci
14088c2ecf20Sopenharmony_cistatic void ops_run_biofill(struct stripe_head *sh)
14098c2ecf20Sopenharmony_ci{
14108c2ecf20Sopenharmony_ci	struct dma_async_tx_descriptor *tx = NULL;
14118c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
14128c2ecf20Sopenharmony_ci	int i;
14138c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
14148c2ecf20Sopenharmony_ci
14158c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
14168c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__,
14178c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
14188c2ecf20Sopenharmony_ci
14198c2ecf20Sopenharmony_ci	for (i = sh->disks; i--; ) {
14208c2ecf20Sopenharmony_ci		struct r5dev *dev = &sh->dev[i];
14218c2ecf20Sopenharmony_ci		if (test_bit(R5_Wantfill, &dev->flags)) {
14228c2ecf20Sopenharmony_ci			struct bio *rbi;
14238c2ecf20Sopenharmony_ci			spin_lock_irq(&sh->stripe_lock);
14248c2ecf20Sopenharmony_ci			dev->read = rbi = dev->toread;
14258c2ecf20Sopenharmony_ci			dev->toread = NULL;
14268c2ecf20Sopenharmony_ci			spin_unlock_irq(&sh->stripe_lock);
14278c2ecf20Sopenharmony_ci			while (rbi && rbi->bi_iter.bi_sector <
14288c2ecf20Sopenharmony_ci				dev->sector + RAID5_STRIPE_SECTORS(conf)) {
14298c2ecf20Sopenharmony_ci				tx = async_copy_data(0, rbi, &dev->page,
14308c2ecf20Sopenharmony_ci						     dev->offset,
14318c2ecf20Sopenharmony_ci						     dev->sector, tx, sh, 0);
14328c2ecf20Sopenharmony_ci				rbi = r5_next_bio(conf, rbi, dev->sector);
14338c2ecf20Sopenharmony_ci			}
14348c2ecf20Sopenharmony_ci		}
14358c2ecf20Sopenharmony_ci	}
14368c2ecf20Sopenharmony_ci
14378c2ecf20Sopenharmony_ci	atomic_inc(&sh->count);
14388c2ecf20Sopenharmony_ci	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
14398c2ecf20Sopenharmony_ci	async_trigger_callback(&submit);
14408c2ecf20Sopenharmony_ci}
14418c2ecf20Sopenharmony_ci
14428c2ecf20Sopenharmony_cistatic void mark_target_uptodate(struct stripe_head *sh, int target)
14438c2ecf20Sopenharmony_ci{
14448c2ecf20Sopenharmony_ci	struct r5dev *tgt;
14458c2ecf20Sopenharmony_ci
14468c2ecf20Sopenharmony_ci	if (target < 0)
14478c2ecf20Sopenharmony_ci		return;
14488c2ecf20Sopenharmony_ci
14498c2ecf20Sopenharmony_ci	tgt = &sh->dev[target];
14508c2ecf20Sopenharmony_ci	set_bit(R5_UPTODATE, &tgt->flags);
14518c2ecf20Sopenharmony_ci	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
14528c2ecf20Sopenharmony_ci	clear_bit(R5_Wantcompute, &tgt->flags);
14538c2ecf20Sopenharmony_ci}
14548c2ecf20Sopenharmony_ci
14558c2ecf20Sopenharmony_cistatic void ops_complete_compute(void *stripe_head_ref)
14568c2ecf20Sopenharmony_ci{
14578c2ecf20Sopenharmony_ci	struct stripe_head *sh = stripe_head_ref;
14588c2ecf20Sopenharmony_ci
14598c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__,
14608c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
14618c2ecf20Sopenharmony_ci
14628c2ecf20Sopenharmony_ci	/* mark the computed target(s) as uptodate */
14638c2ecf20Sopenharmony_ci	mark_target_uptodate(sh, sh->ops.target);
14648c2ecf20Sopenharmony_ci	mark_target_uptodate(sh, sh->ops.target2);
14658c2ecf20Sopenharmony_ci
14668c2ecf20Sopenharmony_ci	clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
14678c2ecf20Sopenharmony_ci	if (sh->check_state == check_state_compute_run)
14688c2ecf20Sopenharmony_ci		sh->check_state = check_state_compute_result;
14698c2ecf20Sopenharmony_ci	set_bit(STRIPE_HANDLE, &sh->state);
14708c2ecf20Sopenharmony_ci	raid5_release_stripe(sh);
14718c2ecf20Sopenharmony_ci}
14728c2ecf20Sopenharmony_ci
14738c2ecf20Sopenharmony_ci/* return a pointer to the address conversion region of the scribble buffer */
14748c2ecf20Sopenharmony_cistatic struct page **to_addr_page(struct raid5_percpu *percpu, int i)
14758c2ecf20Sopenharmony_ci{
14768c2ecf20Sopenharmony_ci	return percpu->scribble + i * percpu->scribble_obj_size;
14778c2ecf20Sopenharmony_ci}
14788c2ecf20Sopenharmony_ci
14798c2ecf20Sopenharmony_ci/* return a pointer to the address conversion region of the scribble buffer */
14808c2ecf20Sopenharmony_cistatic addr_conv_t *to_addr_conv(struct stripe_head *sh,
14818c2ecf20Sopenharmony_ci				 struct raid5_percpu *percpu, int i)
14828c2ecf20Sopenharmony_ci{
14838c2ecf20Sopenharmony_ci	return (void *) (to_addr_page(percpu, i) + sh->disks + 2);
14848c2ecf20Sopenharmony_ci}
14858c2ecf20Sopenharmony_ci
14868c2ecf20Sopenharmony_ci/*
14878c2ecf20Sopenharmony_ci * Return a pointer to record offset address.
14888c2ecf20Sopenharmony_ci */
14898c2ecf20Sopenharmony_cistatic unsigned int *
14908c2ecf20Sopenharmony_cito_addr_offs(struct stripe_head *sh, struct raid5_percpu *percpu)
14918c2ecf20Sopenharmony_ci{
14928c2ecf20Sopenharmony_ci	return (unsigned int *) (to_addr_conv(sh, percpu, 0) + sh->disks + 2);
14938c2ecf20Sopenharmony_ci}
14948c2ecf20Sopenharmony_ci
14958c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor *
14968c2ecf20Sopenharmony_ciops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
14978c2ecf20Sopenharmony_ci{
14988c2ecf20Sopenharmony_ci	int disks = sh->disks;
14998c2ecf20Sopenharmony_ci	struct page **xor_srcs = to_addr_page(percpu, 0);
15008c2ecf20Sopenharmony_ci	unsigned int *off_srcs = to_addr_offs(sh, percpu);
15018c2ecf20Sopenharmony_ci	int target = sh->ops.target;
15028c2ecf20Sopenharmony_ci	struct r5dev *tgt = &sh->dev[target];
15038c2ecf20Sopenharmony_ci	struct page *xor_dest = tgt->page;
15048c2ecf20Sopenharmony_ci	unsigned int off_dest = tgt->offset;
15058c2ecf20Sopenharmony_ci	int count = 0;
15068c2ecf20Sopenharmony_ci	struct dma_async_tx_descriptor *tx;
15078c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
15088c2ecf20Sopenharmony_ci	int i;
15098c2ecf20Sopenharmony_ci
15108c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
15118c2ecf20Sopenharmony_ci
15128c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu block: %d\n",
15138c2ecf20Sopenharmony_ci		__func__, (unsigned long long)sh->sector, target);
15148c2ecf20Sopenharmony_ci	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
15158c2ecf20Sopenharmony_ci
15168c2ecf20Sopenharmony_ci	for (i = disks; i--; ) {
15178c2ecf20Sopenharmony_ci		if (i != target) {
15188c2ecf20Sopenharmony_ci			off_srcs[count] = sh->dev[i].offset;
15198c2ecf20Sopenharmony_ci			xor_srcs[count++] = sh->dev[i].page;
15208c2ecf20Sopenharmony_ci		}
15218c2ecf20Sopenharmony_ci	}
15228c2ecf20Sopenharmony_ci
15238c2ecf20Sopenharmony_ci	atomic_inc(&sh->count);
15248c2ecf20Sopenharmony_ci
15258c2ecf20Sopenharmony_ci	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
15268c2ecf20Sopenharmony_ci			  ops_complete_compute, sh, to_addr_conv(sh, percpu, 0));
15278c2ecf20Sopenharmony_ci	if (unlikely(count == 1))
15288c2ecf20Sopenharmony_ci		tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0],
15298c2ecf20Sopenharmony_ci				RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
15308c2ecf20Sopenharmony_ci	else
15318c2ecf20Sopenharmony_ci		tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count,
15328c2ecf20Sopenharmony_ci				RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
15338c2ecf20Sopenharmony_ci
15348c2ecf20Sopenharmony_ci	return tx;
15358c2ecf20Sopenharmony_ci}
15368c2ecf20Sopenharmony_ci
15378c2ecf20Sopenharmony_ci/* set_syndrome_sources - populate source buffers for gen_syndrome
15388c2ecf20Sopenharmony_ci * @srcs - (struct page *) array of size sh->disks
15398c2ecf20Sopenharmony_ci * @offs - (unsigned int) array of offset for each page
15408c2ecf20Sopenharmony_ci * @sh - stripe_head to parse
15418c2ecf20Sopenharmony_ci *
15428c2ecf20Sopenharmony_ci * Populates srcs in proper layout order for the stripe and returns the
15438c2ecf20Sopenharmony_ci * 'count' of sources to be used in a call to async_gen_syndrome.  The P
15448c2ecf20Sopenharmony_ci * destination buffer is recorded in srcs[count] and the Q destination
15458c2ecf20Sopenharmony_ci * is recorded in srcs[count+1]].
15468c2ecf20Sopenharmony_ci */
15478c2ecf20Sopenharmony_cistatic int set_syndrome_sources(struct page **srcs,
15488c2ecf20Sopenharmony_ci				unsigned int *offs,
15498c2ecf20Sopenharmony_ci				struct stripe_head *sh,
15508c2ecf20Sopenharmony_ci				int srctype)
15518c2ecf20Sopenharmony_ci{
15528c2ecf20Sopenharmony_ci	int disks = sh->disks;
15538c2ecf20Sopenharmony_ci	int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
15548c2ecf20Sopenharmony_ci	int d0_idx = raid6_d0(sh);
15558c2ecf20Sopenharmony_ci	int count;
15568c2ecf20Sopenharmony_ci	int i;
15578c2ecf20Sopenharmony_ci
15588c2ecf20Sopenharmony_ci	for (i = 0; i < disks; i++)
15598c2ecf20Sopenharmony_ci		srcs[i] = NULL;
15608c2ecf20Sopenharmony_ci
15618c2ecf20Sopenharmony_ci	count = 0;
15628c2ecf20Sopenharmony_ci	i = d0_idx;
15638c2ecf20Sopenharmony_ci	do {
15648c2ecf20Sopenharmony_ci		int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
15658c2ecf20Sopenharmony_ci		struct r5dev *dev = &sh->dev[i];
15668c2ecf20Sopenharmony_ci
15678c2ecf20Sopenharmony_ci		if (i == sh->qd_idx || i == sh->pd_idx ||
15688c2ecf20Sopenharmony_ci		    (srctype == SYNDROME_SRC_ALL) ||
15698c2ecf20Sopenharmony_ci		    (srctype == SYNDROME_SRC_WANT_DRAIN &&
15708c2ecf20Sopenharmony_ci		     (test_bit(R5_Wantdrain, &dev->flags) ||
15718c2ecf20Sopenharmony_ci		      test_bit(R5_InJournal, &dev->flags))) ||
15728c2ecf20Sopenharmony_ci		    (srctype == SYNDROME_SRC_WRITTEN &&
15738c2ecf20Sopenharmony_ci		     (dev->written ||
15748c2ecf20Sopenharmony_ci		      test_bit(R5_InJournal, &dev->flags)))) {
15758c2ecf20Sopenharmony_ci			if (test_bit(R5_InJournal, &dev->flags))
15768c2ecf20Sopenharmony_ci				srcs[slot] = sh->dev[i].orig_page;
15778c2ecf20Sopenharmony_ci			else
15788c2ecf20Sopenharmony_ci				srcs[slot] = sh->dev[i].page;
15798c2ecf20Sopenharmony_ci			/*
15808c2ecf20Sopenharmony_ci			 * For R5_InJournal, PAGE_SIZE must be 4KB and will
15818c2ecf20Sopenharmony_ci			 * not shared page. In that case, dev[i].offset
15828c2ecf20Sopenharmony_ci			 * is 0.
15838c2ecf20Sopenharmony_ci			 */
15848c2ecf20Sopenharmony_ci			offs[slot] = sh->dev[i].offset;
15858c2ecf20Sopenharmony_ci		}
15868c2ecf20Sopenharmony_ci		i = raid6_next_disk(i, disks);
15878c2ecf20Sopenharmony_ci	} while (i != d0_idx);
15888c2ecf20Sopenharmony_ci
15898c2ecf20Sopenharmony_ci	return syndrome_disks;
15908c2ecf20Sopenharmony_ci}
15918c2ecf20Sopenharmony_ci
15928c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor *
15938c2ecf20Sopenharmony_ciops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
15948c2ecf20Sopenharmony_ci{
15958c2ecf20Sopenharmony_ci	int disks = sh->disks;
15968c2ecf20Sopenharmony_ci	struct page **blocks = to_addr_page(percpu, 0);
15978c2ecf20Sopenharmony_ci	unsigned int *offs = to_addr_offs(sh, percpu);
15988c2ecf20Sopenharmony_ci	int target;
15998c2ecf20Sopenharmony_ci	int qd_idx = sh->qd_idx;
16008c2ecf20Sopenharmony_ci	struct dma_async_tx_descriptor *tx;
16018c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
16028c2ecf20Sopenharmony_ci	struct r5dev *tgt;
16038c2ecf20Sopenharmony_ci	struct page *dest;
16048c2ecf20Sopenharmony_ci	unsigned int dest_off;
16058c2ecf20Sopenharmony_ci	int i;
16068c2ecf20Sopenharmony_ci	int count;
16078c2ecf20Sopenharmony_ci
16088c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
16098c2ecf20Sopenharmony_ci	if (sh->ops.target < 0)
16108c2ecf20Sopenharmony_ci		target = sh->ops.target2;
16118c2ecf20Sopenharmony_ci	else if (sh->ops.target2 < 0)
16128c2ecf20Sopenharmony_ci		target = sh->ops.target;
16138c2ecf20Sopenharmony_ci	else
16148c2ecf20Sopenharmony_ci		/* we should only have one valid target */
16158c2ecf20Sopenharmony_ci		BUG();
16168c2ecf20Sopenharmony_ci	BUG_ON(target < 0);
16178c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu block: %d\n",
16188c2ecf20Sopenharmony_ci		__func__, (unsigned long long)sh->sector, target);
16198c2ecf20Sopenharmony_ci
16208c2ecf20Sopenharmony_ci	tgt = &sh->dev[target];
16218c2ecf20Sopenharmony_ci	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
16228c2ecf20Sopenharmony_ci	dest = tgt->page;
16238c2ecf20Sopenharmony_ci	dest_off = tgt->offset;
16248c2ecf20Sopenharmony_ci
16258c2ecf20Sopenharmony_ci	atomic_inc(&sh->count);
16268c2ecf20Sopenharmony_ci
16278c2ecf20Sopenharmony_ci	if (target == qd_idx) {
16288c2ecf20Sopenharmony_ci		count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL);
16298c2ecf20Sopenharmony_ci		blocks[count] = NULL; /* regenerating p is not necessary */
16308c2ecf20Sopenharmony_ci		BUG_ON(blocks[count+1] != dest); /* q should already be set */
16318c2ecf20Sopenharmony_ci		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
16328c2ecf20Sopenharmony_ci				  ops_complete_compute, sh,
16338c2ecf20Sopenharmony_ci				  to_addr_conv(sh, percpu, 0));
16348c2ecf20Sopenharmony_ci		tx = async_gen_syndrome(blocks, offs, count+2,
16358c2ecf20Sopenharmony_ci				RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
16368c2ecf20Sopenharmony_ci	} else {
16378c2ecf20Sopenharmony_ci		/* Compute any data- or p-drive using XOR */
16388c2ecf20Sopenharmony_ci		count = 0;
16398c2ecf20Sopenharmony_ci		for (i = disks; i-- ; ) {
16408c2ecf20Sopenharmony_ci			if (i == target || i == qd_idx)
16418c2ecf20Sopenharmony_ci				continue;
16428c2ecf20Sopenharmony_ci			offs[count] = sh->dev[i].offset;
16438c2ecf20Sopenharmony_ci			blocks[count++] = sh->dev[i].page;
16448c2ecf20Sopenharmony_ci		}
16458c2ecf20Sopenharmony_ci
16468c2ecf20Sopenharmony_ci		init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
16478c2ecf20Sopenharmony_ci				  NULL, ops_complete_compute, sh,
16488c2ecf20Sopenharmony_ci				  to_addr_conv(sh, percpu, 0));
16498c2ecf20Sopenharmony_ci		tx = async_xor_offs(dest, dest_off, blocks, offs, count,
16508c2ecf20Sopenharmony_ci				RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
16518c2ecf20Sopenharmony_ci	}
16528c2ecf20Sopenharmony_ci
16538c2ecf20Sopenharmony_ci	return tx;
16548c2ecf20Sopenharmony_ci}
16558c2ecf20Sopenharmony_ci
16568c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor *
16578c2ecf20Sopenharmony_ciops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
16588c2ecf20Sopenharmony_ci{
16598c2ecf20Sopenharmony_ci	int i, count, disks = sh->disks;
16608c2ecf20Sopenharmony_ci	int syndrome_disks = sh->ddf_layout ? disks : disks-2;
16618c2ecf20Sopenharmony_ci	int d0_idx = raid6_d0(sh);
16628c2ecf20Sopenharmony_ci	int faila = -1, failb = -1;
16638c2ecf20Sopenharmony_ci	int target = sh->ops.target;
16648c2ecf20Sopenharmony_ci	int target2 = sh->ops.target2;
16658c2ecf20Sopenharmony_ci	struct r5dev *tgt = &sh->dev[target];
16668c2ecf20Sopenharmony_ci	struct r5dev *tgt2 = &sh->dev[target2];
16678c2ecf20Sopenharmony_ci	struct dma_async_tx_descriptor *tx;
16688c2ecf20Sopenharmony_ci	struct page **blocks = to_addr_page(percpu, 0);
16698c2ecf20Sopenharmony_ci	unsigned int *offs = to_addr_offs(sh, percpu);
16708c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
16718c2ecf20Sopenharmony_ci
16728c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
16738c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu block1: %d block2: %d\n",
16748c2ecf20Sopenharmony_ci		 __func__, (unsigned long long)sh->sector, target, target2);
16758c2ecf20Sopenharmony_ci	BUG_ON(target < 0 || target2 < 0);
16768c2ecf20Sopenharmony_ci	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
16778c2ecf20Sopenharmony_ci	BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
16788c2ecf20Sopenharmony_ci
16798c2ecf20Sopenharmony_ci	/* we need to open-code set_syndrome_sources to handle the
16808c2ecf20Sopenharmony_ci	 * slot number conversion for 'faila' and 'failb'
16818c2ecf20Sopenharmony_ci	 */
16828c2ecf20Sopenharmony_ci	for (i = 0; i < disks ; i++) {
16838c2ecf20Sopenharmony_ci		offs[i] = 0;
16848c2ecf20Sopenharmony_ci		blocks[i] = NULL;
16858c2ecf20Sopenharmony_ci	}
16868c2ecf20Sopenharmony_ci	count = 0;
16878c2ecf20Sopenharmony_ci	i = d0_idx;
16888c2ecf20Sopenharmony_ci	do {
16898c2ecf20Sopenharmony_ci		int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
16908c2ecf20Sopenharmony_ci
16918c2ecf20Sopenharmony_ci		offs[slot] = sh->dev[i].offset;
16928c2ecf20Sopenharmony_ci		blocks[slot] = sh->dev[i].page;
16938c2ecf20Sopenharmony_ci
16948c2ecf20Sopenharmony_ci		if (i == target)
16958c2ecf20Sopenharmony_ci			faila = slot;
16968c2ecf20Sopenharmony_ci		if (i == target2)
16978c2ecf20Sopenharmony_ci			failb = slot;
16988c2ecf20Sopenharmony_ci		i = raid6_next_disk(i, disks);
16998c2ecf20Sopenharmony_ci	} while (i != d0_idx);
17008c2ecf20Sopenharmony_ci
17018c2ecf20Sopenharmony_ci	BUG_ON(faila == failb);
17028c2ecf20Sopenharmony_ci	if (failb < faila)
17038c2ecf20Sopenharmony_ci		swap(faila, failb);
17048c2ecf20Sopenharmony_ci	pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
17058c2ecf20Sopenharmony_ci		 __func__, (unsigned long long)sh->sector, faila, failb);
17068c2ecf20Sopenharmony_ci
17078c2ecf20Sopenharmony_ci	atomic_inc(&sh->count);
17088c2ecf20Sopenharmony_ci
17098c2ecf20Sopenharmony_ci	if (failb == syndrome_disks+1) {
17108c2ecf20Sopenharmony_ci		/* Q disk is one of the missing disks */
17118c2ecf20Sopenharmony_ci		if (faila == syndrome_disks) {
17128c2ecf20Sopenharmony_ci			/* Missing P+Q, just recompute */
17138c2ecf20Sopenharmony_ci			init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
17148c2ecf20Sopenharmony_ci					  ops_complete_compute, sh,
17158c2ecf20Sopenharmony_ci					  to_addr_conv(sh, percpu, 0));
17168c2ecf20Sopenharmony_ci			return async_gen_syndrome(blocks, offs, syndrome_disks+2,
17178c2ecf20Sopenharmony_ci						  RAID5_STRIPE_SIZE(sh->raid_conf),
17188c2ecf20Sopenharmony_ci						  &submit);
17198c2ecf20Sopenharmony_ci		} else {
17208c2ecf20Sopenharmony_ci			struct page *dest;
17218c2ecf20Sopenharmony_ci			unsigned int dest_off;
17228c2ecf20Sopenharmony_ci			int data_target;
17238c2ecf20Sopenharmony_ci			int qd_idx = sh->qd_idx;
17248c2ecf20Sopenharmony_ci
17258c2ecf20Sopenharmony_ci			/* Missing D+Q: recompute D from P, then recompute Q */
17268c2ecf20Sopenharmony_ci			if (target == qd_idx)
17278c2ecf20Sopenharmony_ci				data_target = target2;
17288c2ecf20Sopenharmony_ci			else
17298c2ecf20Sopenharmony_ci				data_target = target;
17308c2ecf20Sopenharmony_ci
17318c2ecf20Sopenharmony_ci			count = 0;
17328c2ecf20Sopenharmony_ci			for (i = disks; i-- ; ) {
17338c2ecf20Sopenharmony_ci				if (i == data_target || i == qd_idx)
17348c2ecf20Sopenharmony_ci					continue;
17358c2ecf20Sopenharmony_ci				offs[count] = sh->dev[i].offset;
17368c2ecf20Sopenharmony_ci				blocks[count++] = sh->dev[i].page;
17378c2ecf20Sopenharmony_ci			}
17388c2ecf20Sopenharmony_ci			dest = sh->dev[data_target].page;
17398c2ecf20Sopenharmony_ci			dest_off = sh->dev[data_target].offset;
17408c2ecf20Sopenharmony_ci			init_async_submit(&submit,
17418c2ecf20Sopenharmony_ci					  ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
17428c2ecf20Sopenharmony_ci					  NULL, NULL, NULL,
17438c2ecf20Sopenharmony_ci					  to_addr_conv(sh, percpu, 0));
17448c2ecf20Sopenharmony_ci			tx = async_xor_offs(dest, dest_off, blocks, offs, count,
17458c2ecf20Sopenharmony_ci				       RAID5_STRIPE_SIZE(sh->raid_conf),
17468c2ecf20Sopenharmony_ci				       &submit);
17478c2ecf20Sopenharmony_ci
17488c2ecf20Sopenharmony_ci			count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL);
17498c2ecf20Sopenharmony_ci			init_async_submit(&submit, ASYNC_TX_FENCE, tx,
17508c2ecf20Sopenharmony_ci					  ops_complete_compute, sh,
17518c2ecf20Sopenharmony_ci					  to_addr_conv(sh, percpu, 0));
17528c2ecf20Sopenharmony_ci			return async_gen_syndrome(blocks, offs, count+2,
17538c2ecf20Sopenharmony_ci						  RAID5_STRIPE_SIZE(sh->raid_conf),
17548c2ecf20Sopenharmony_ci						  &submit);
17558c2ecf20Sopenharmony_ci		}
17568c2ecf20Sopenharmony_ci	} else {
17578c2ecf20Sopenharmony_ci		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
17588c2ecf20Sopenharmony_ci				  ops_complete_compute, sh,
17598c2ecf20Sopenharmony_ci				  to_addr_conv(sh, percpu, 0));
17608c2ecf20Sopenharmony_ci		if (failb == syndrome_disks) {
17618c2ecf20Sopenharmony_ci			/* We're missing D+P. */
17628c2ecf20Sopenharmony_ci			return async_raid6_datap_recov(syndrome_disks+2,
17638c2ecf20Sopenharmony_ci						RAID5_STRIPE_SIZE(sh->raid_conf),
17648c2ecf20Sopenharmony_ci						faila,
17658c2ecf20Sopenharmony_ci						blocks, offs, &submit);
17668c2ecf20Sopenharmony_ci		} else {
17678c2ecf20Sopenharmony_ci			/* We're missing D+D. */
17688c2ecf20Sopenharmony_ci			return async_raid6_2data_recov(syndrome_disks+2,
17698c2ecf20Sopenharmony_ci						RAID5_STRIPE_SIZE(sh->raid_conf),
17708c2ecf20Sopenharmony_ci						faila, failb,
17718c2ecf20Sopenharmony_ci						blocks, offs, &submit);
17728c2ecf20Sopenharmony_ci		}
17738c2ecf20Sopenharmony_ci	}
17748c2ecf20Sopenharmony_ci}
17758c2ecf20Sopenharmony_ci
17768c2ecf20Sopenharmony_cistatic void ops_complete_prexor(void *stripe_head_ref)
17778c2ecf20Sopenharmony_ci{
17788c2ecf20Sopenharmony_ci	struct stripe_head *sh = stripe_head_ref;
17798c2ecf20Sopenharmony_ci
17808c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__,
17818c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
17828c2ecf20Sopenharmony_ci
17838c2ecf20Sopenharmony_ci	if (r5c_is_writeback(sh->raid_conf->log))
17848c2ecf20Sopenharmony_ci		/*
17858c2ecf20Sopenharmony_ci		 * raid5-cache write back uses orig_page during prexor.
17868c2ecf20Sopenharmony_ci		 * After prexor, it is time to free orig_page
17878c2ecf20Sopenharmony_ci		 */
17888c2ecf20Sopenharmony_ci		r5c_release_extra_page(sh);
17898c2ecf20Sopenharmony_ci}
17908c2ecf20Sopenharmony_ci
17918c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor *
17928c2ecf20Sopenharmony_ciops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu,
17938c2ecf20Sopenharmony_ci		struct dma_async_tx_descriptor *tx)
17948c2ecf20Sopenharmony_ci{
17958c2ecf20Sopenharmony_ci	int disks = sh->disks;
17968c2ecf20Sopenharmony_ci	struct page **xor_srcs = to_addr_page(percpu, 0);
17978c2ecf20Sopenharmony_ci	unsigned int *off_srcs = to_addr_offs(sh, percpu);
17988c2ecf20Sopenharmony_ci	int count = 0, pd_idx = sh->pd_idx, i;
17998c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
18008c2ecf20Sopenharmony_ci
18018c2ecf20Sopenharmony_ci	/* existing parity data subtracted */
18028c2ecf20Sopenharmony_ci	unsigned int off_dest = off_srcs[count] = sh->dev[pd_idx].offset;
18038c2ecf20Sopenharmony_ci	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
18048c2ecf20Sopenharmony_ci
18058c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
18068c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__,
18078c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
18088c2ecf20Sopenharmony_ci
18098c2ecf20Sopenharmony_ci	for (i = disks; i--; ) {
18108c2ecf20Sopenharmony_ci		struct r5dev *dev = &sh->dev[i];
18118c2ecf20Sopenharmony_ci		/* Only process blocks that are known to be uptodate */
18128c2ecf20Sopenharmony_ci		if (test_bit(R5_InJournal, &dev->flags)) {
18138c2ecf20Sopenharmony_ci			/*
18148c2ecf20Sopenharmony_ci			 * For this case, PAGE_SIZE must be equal to 4KB and
18158c2ecf20Sopenharmony_ci			 * page offset is zero.
18168c2ecf20Sopenharmony_ci			 */
18178c2ecf20Sopenharmony_ci			off_srcs[count] = dev->offset;
18188c2ecf20Sopenharmony_ci			xor_srcs[count++] = dev->orig_page;
18198c2ecf20Sopenharmony_ci		} else if (test_bit(R5_Wantdrain, &dev->flags)) {
18208c2ecf20Sopenharmony_ci			off_srcs[count] = dev->offset;
18218c2ecf20Sopenharmony_ci			xor_srcs[count++] = dev->page;
18228c2ecf20Sopenharmony_ci		}
18238c2ecf20Sopenharmony_ci	}
18248c2ecf20Sopenharmony_ci
18258c2ecf20Sopenharmony_ci	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
18268c2ecf20Sopenharmony_ci			  ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
18278c2ecf20Sopenharmony_ci	tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count,
18288c2ecf20Sopenharmony_ci			RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
18298c2ecf20Sopenharmony_ci
18308c2ecf20Sopenharmony_ci	return tx;
18318c2ecf20Sopenharmony_ci}
18328c2ecf20Sopenharmony_ci
18338c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor *
18348c2ecf20Sopenharmony_ciops_run_prexor6(struct stripe_head *sh, struct raid5_percpu *percpu,
18358c2ecf20Sopenharmony_ci		struct dma_async_tx_descriptor *tx)
18368c2ecf20Sopenharmony_ci{
18378c2ecf20Sopenharmony_ci	struct page **blocks = to_addr_page(percpu, 0);
18388c2ecf20Sopenharmony_ci	unsigned int *offs = to_addr_offs(sh, percpu);
18398c2ecf20Sopenharmony_ci	int count;
18408c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
18418c2ecf20Sopenharmony_ci
18428c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__,
18438c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
18448c2ecf20Sopenharmony_ci
18458c2ecf20Sopenharmony_ci	count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_WANT_DRAIN);
18468c2ecf20Sopenharmony_ci
18478c2ecf20Sopenharmony_ci	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx,
18488c2ecf20Sopenharmony_ci			  ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
18498c2ecf20Sopenharmony_ci	tx = async_gen_syndrome(blocks, offs, count+2,
18508c2ecf20Sopenharmony_ci			RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
18518c2ecf20Sopenharmony_ci
18528c2ecf20Sopenharmony_ci	return tx;
18538c2ecf20Sopenharmony_ci}
18548c2ecf20Sopenharmony_ci
18558c2ecf20Sopenharmony_cistatic struct dma_async_tx_descriptor *
18568c2ecf20Sopenharmony_ciops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
18578c2ecf20Sopenharmony_ci{
18588c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
18598c2ecf20Sopenharmony_ci	int disks = sh->disks;
18608c2ecf20Sopenharmony_ci	int i;
18618c2ecf20Sopenharmony_ci	struct stripe_head *head_sh = sh;
18628c2ecf20Sopenharmony_ci
18638c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__,
18648c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
18658c2ecf20Sopenharmony_ci
18668c2ecf20Sopenharmony_ci	for (i = disks; i--; ) {
18678c2ecf20Sopenharmony_ci		struct r5dev *dev;
18688c2ecf20Sopenharmony_ci		struct bio *chosen;
18698c2ecf20Sopenharmony_ci
18708c2ecf20Sopenharmony_ci		sh = head_sh;
18718c2ecf20Sopenharmony_ci		if (test_and_clear_bit(R5_Wantdrain, &head_sh->dev[i].flags)) {
18728c2ecf20Sopenharmony_ci			struct bio *wbi;
18738c2ecf20Sopenharmony_ci
18748c2ecf20Sopenharmony_ciagain:
18758c2ecf20Sopenharmony_ci			dev = &sh->dev[i];
18768c2ecf20Sopenharmony_ci			/*
18778c2ecf20Sopenharmony_ci			 * clear R5_InJournal, so when rewriting a page in
18788c2ecf20Sopenharmony_ci			 * journal, it is not skipped by r5l_log_stripe()
18798c2ecf20Sopenharmony_ci			 */
18808c2ecf20Sopenharmony_ci			clear_bit(R5_InJournal, &dev->flags);
18818c2ecf20Sopenharmony_ci			spin_lock_irq(&sh->stripe_lock);
18828c2ecf20Sopenharmony_ci			chosen = dev->towrite;
18838c2ecf20Sopenharmony_ci			dev->towrite = NULL;
18848c2ecf20Sopenharmony_ci			sh->overwrite_disks = 0;
18858c2ecf20Sopenharmony_ci			BUG_ON(dev->written);
18868c2ecf20Sopenharmony_ci			wbi = dev->written = chosen;
18878c2ecf20Sopenharmony_ci			spin_unlock_irq(&sh->stripe_lock);
18888c2ecf20Sopenharmony_ci			WARN_ON(dev->page != dev->orig_page);
18898c2ecf20Sopenharmony_ci
18908c2ecf20Sopenharmony_ci			while (wbi && wbi->bi_iter.bi_sector <
18918c2ecf20Sopenharmony_ci				dev->sector + RAID5_STRIPE_SECTORS(conf)) {
18928c2ecf20Sopenharmony_ci				if (wbi->bi_opf & REQ_FUA)
18938c2ecf20Sopenharmony_ci					set_bit(R5_WantFUA, &dev->flags);
18948c2ecf20Sopenharmony_ci				if (wbi->bi_opf & REQ_SYNC)
18958c2ecf20Sopenharmony_ci					set_bit(R5_SyncIO, &dev->flags);
18968c2ecf20Sopenharmony_ci				if (bio_op(wbi) == REQ_OP_DISCARD)
18978c2ecf20Sopenharmony_ci					set_bit(R5_Discard, &dev->flags);
18988c2ecf20Sopenharmony_ci				else {
18998c2ecf20Sopenharmony_ci					tx = async_copy_data(1, wbi, &dev->page,
19008c2ecf20Sopenharmony_ci							     dev->offset,
19018c2ecf20Sopenharmony_ci							     dev->sector, tx, sh,
19028c2ecf20Sopenharmony_ci							     r5c_is_writeback(conf->log));
19038c2ecf20Sopenharmony_ci					if (dev->page != dev->orig_page &&
19048c2ecf20Sopenharmony_ci					    !r5c_is_writeback(conf->log)) {
19058c2ecf20Sopenharmony_ci						set_bit(R5_SkipCopy, &dev->flags);
19068c2ecf20Sopenharmony_ci						clear_bit(R5_UPTODATE, &dev->flags);
19078c2ecf20Sopenharmony_ci						clear_bit(R5_OVERWRITE, &dev->flags);
19088c2ecf20Sopenharmony_ci					}
19098c2ecf20Sopenharmony_ci				}
19108c2ecf20Sopenharmony_ci				wbi = r5_next_bio(conf, wbi, dev->sector);
19118c2ecf20Sopenharmony_ci			}
19128c2ecf20Sopenharmony_ci
19138c2ecf20Sopenharmony_ci			if (head_sh->batch_head) {
19148c2ecf20Sopenharmony_ci				sh = list_first_entry(&sh->batch_list,
19158c2ecf20Sopenharmony_ci						      struct stripe_head,
19168c2ecf20Sopenharmony_ci						      batch_list);
19178c2ecf20Sopenharmony_ci				if (sh == head_sh)
19188c2ecf20Sopenharmony_ci					continue;
19198c2ecf20Sopenharmony_ci				goto again;
19208c2ecf20Sopenharmony_ci			}
19218c2ecf20Sopenharmony_ci		}
19228c2ecf20Sopenharmony_ci	}
19238c2ecf20Sopenharmony_ci
19248c2ecf20Sopenharmony_ci	return tx;
19258c2ecf20Sopenharmony_ci}
19268c2ecf20Sopenharmony_ci
19278c2ecf20Sopenharmony_cistatic void ops_complete_reconstruct(void *stripe_head_ref)
19288c2ecf20Sopenharmony_ci{
19298c2ecf20Sopenharmony_ci	struct stripe_head *sh = stripe_head_ref;
19308c2ecf20Sopenharmony_ci	int disks = sh->disks;
19318c2ecf20Sopenharmony_ci	int pd_idx = sh->pd_idx;
19328c2ecf20Sopenharmony_ci	int qd_idx = sh->qd_idx;
19338c2ecf20Sopenharmony_ci	int i;
19348c2ecf20Sopenharmony_ci	bool fua = false, sync = false, discard = false;
19358c2ecf20Sopenharmony_ci
19368c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__,
19378c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
19388c2ecf20Sopenharmony_ci
19398c2ecf20Sopenharmony_ci	for (i = disks; i--; ) {
19408c2ecf20Sopenharmony_ci		fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
19418c2ecf20Sopenharmony_ci		sync |= test_bit(R5_SyncIO, &sh->dev[i].flags);
19428c2ecf20Sopenharmony_ci		discard |= test_bit(R5_Discard, &sh->dev[i].flags);
19438c2ecf20Sopenharmony_ci	}
19448c2ecf20Sopenharmony_ci
19458c2ecf20Sopenharmony_ci	for (i = disks; i--; ) {
19468c2ecf20Sopenharmony_ci		struct r5dev *dev = &sh->dev[i];
19478c2ecf20Sopenharmony_ci
19488c2ecf20Sopenharmony_ci		if (dev->written || i == pd_idx || i == qd_idx) {
19498c2ecf20Sopenharmony_ci			if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) {
19508c2ecf20Sopenharmony_ci				set_bit(R5_UPTODATE, &dev->flags);
19518c2ecf20Sopenharmony_ci				if (test_bit(STRIPE_EXPAND_READY, &sh->state))
19528c2ecf20Sopenharmony_ci					set_bit(R5_Expanded, &dev->flags);
19538c2ecf20Sopenharmony_ci			}
19548c2ecf20Sopenharmony_ci			if (fua)
19558c2ecf20Sopenharmony_ci				set_bit(R5_WantFUA, &dev->flags);
19568c2ecf20Sopenharmony_ci			if (sync)
19578c2ecf20Sopenharmony_ci				set_bit(R5_SyncIO, &dev->flags);
19588c2ecf20Sopenharmony_ci		}
19598c2ecf20Sopenharmony_ci	}
19608c2ecf20Sopenharmony_ci
19618c2ecf20Sopenharmony_ci	if (sh->reconstruct_state == reconstruct_state_drain_run)
19628c2ecf20Sopenharmony_ci		sh->reconstruct_state = reconstruct_state_drain_result;
19638c2ecf20Sopenharmony_ci	else if (sh->reconstruct_state == reconstruct_state_prexor_drain_run)
19648c2ecf20Sopenharmony_ci		sh->reconstruct_state = reconstruct_state_prexor_drain_result;
19658c2ecf20Sopenharmony_ci	else {
19668c2ecf20Sopenharmony_ci		BUG_ON(sh->reconstruct_state != reconstruct_state_run);
19678c2ecf20Sopenharmony_ci		sh->reconstruct_state = reconstruct_state_result;
19688c2ecf20Sopenharmony_ci	}
19698c2ecf20Sopenharmony_ci
19708c2ecf20Sopenharmony_ci	set_bit(STRIPE_HANDLE, &sh->state);
19718c2ecf20Sopenharmony_ci	raid5_release_stripe(sh);
19728c2ecf20Sopenharmony_ci}
19738c2ecf20Sopenharmony_ci
19748c2ecf20Sopenharmony_cistatic void
19758c2ecf20Sopenharmony_ciops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
19768c2ecf20Sopenharmony_ci		     struct dma_async_tx_descriptor *tx)
19778c2ecf20Sopenharmony_ci{
19788c2ecf20Sopenharmony_ci	int disks = sh->disks;
19798c2ecf20Sopenharmony_ci	struct page **xor_srcs;
19808c2ecf20Sopenharmony_ci	unsigned int *off_srcs;
19818c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
19828c2ecf20Sopenharmony_ci	int count, pd_idx = sh->pd_idx, i;
19838c2ecf20Sopenharmony_ci	struct page *xor_dest;
19848c2ecf20Sopenharmony_ci	unsigned int off_dest;
19858c2ecf20Sopenharmony_ci	int prexor = 0;
19868c2ecf20Sopenharmony_ci	unsigned long flags;
19878c2ecf20Sopenharmony_ci	int j = 0;
19888c2ecf20Sopenharmony_ci	struct stripe_head *head_sh = sh;
19898c2ecf20Sopenharmony_ci	int last_stripe;
19908c2ecf20Sopenharmony_ci
19918c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__,
19928c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
19938c2ecf20Sopenharmony_ci
19948c2ecf20Sopenharmony_ci	for (i = 0; i < sh->disks; i++) {
19958c2ecf20Sopenharmony_ci		if (pd_idx == i)
19968c2ecf20Sopenharmony_ci			continue;
19978c2ecf20Sopenharmony_ci		if (!test_bit(R5_Discard, &sh->dev[i].flags))
19988c2ecf20Sopenharmony_ci			break;
19998c2ecf20Sopenharmony_ci	}
20008c2ecf20Sopenharmony_ci	if (i >= sh->disks) {
20018c2ecf20Sopenharmony_ci		atomic_inc(&sh->count);
20028c2ecf20Sopenharmony_ci		set_bit(R5_Discard, &sh->dev[pd_idx].flags);
20038c2ecf20Sopenharmony_ci		ops_complete_reconstruct(sh);
20048c2ecf20Sopenharmony_ci		return;
20058c2ecf20Sopenharmony_ci	}
20068c2ecf20Sopenharmony_ciagain:
20078c2ecf20Sopenharmony_ci	count = 0;
20088c2ecf20Sopenharmony_ci	xor_srcs = to_addr_page(percpu, j);
20098c2ecf20Sopenharmony_ci	off_srcs = to_addr_offs(sh, percpu);
20108c2ecf20Sopenharmony_ci	/* check if prexor is active which means only process blocks
20118c2ecf20Sopenharmony_ci	 * that are part of a read-modify-write (written)
20128c2ecf20Sopenharmony_ci	 */
20138c2ecf20Sopenharmony_ci	if (head_sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
20148c2ecf20Sopenharmony_ci		prexor = 1;
20158c2ecf20Sopenharmony_ci		off_dest = off_srcs[count] = sh->dev[pd_idx].offset;
20168c2ecf20Sopenharmony_ci		xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
20178c2ecf20Sopenharmony_ci		for (i = disks; i--; ) {
20188c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
20198c2ecf20Sopenharmony_ci			if (head_sh->dev[i].written ||
20208c2ecf20Sopenharmony_ci			    test_bit(R5_InJournal, &head_sh->dev[i].flags)) {
20218c2ecf20Sopenharmony_ci				off_srcs[count] = dev->offset;
20228c2ecf20Sopenharmony_ci				xor_srcs[count++] = dev->page;
20238c2ecf20Sopenharmony_ci			}
20248c2ecf20Sopenharmony_ci		}
20258c2ecf20Sopenharmony_ci	} else {
20268c2ecf20Sopenharmony_ci		xor_dest = sh->dev[pd_idx].page;
20278c2ecf20Sopenharmony_ci		off_dest = sh->dev[pd_idx].offset;
20288c2ecf20Sopenharmony_ci		for (i = disks; i--; ) {
20298c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
20308c2ecf20Sopenharmony_ci			if (i != pd_idx) {
20318c2ecf20Sopenharmony_ci				off_srcs[count] = dev->offset;
20328c2ecf20Sopenharmony_ci				xor_srcs[count++] = dev->page;
20338c2ecf20Sopenharmony_ci			}
20348c2ecf20Sopenharmony_ci		}
20358c2ecf20Sopenharmony_ci	}
20368c2ecf20Sopenharmony_ci
20378c2ecf20Sopenharmony_ci	/* 1/ if we prexor'd then the dest is reused as a source
20388c2ecf20Sopenharmony_ci	 * 2/ if we did not prexor then we are redoing the parity
20398c2ecf20Sopenharmony_ci	 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
20408c2ecf20Sopenharmony_ci	 * for the synchronous xor case
20418c2ecf20Sopenharmony_ci	 */
20428c2ecf20Sopenharmony_ci	last_stripe = !head_sh->batch_head ||
20438c2ecf20Sopenharmony_ci		list_first_entry(&sh->batch_list,
20448c2ecf20Sopenharmony_ci				 struct stripe_head, batch_list) == head_sh;
20458c2ecf20Sopenharmony_ci	if (last_stripe) {
20468c2ecf20Sopenharmony_ci		flags = ASYNC_TX_ACK |
20478c2ecf20Sopenharmony_ci			(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
20488c2ecf20Sopenharmony_ci
20498c2ecf20Sopenharmony_ci		atomic_inc(&head_sh->count);
20508c2ecf20Sopenharmony_ci		init_async_submit(&submit, flags, tx, ops_complete_reconstruct, head_sh,
20518c2ecf20Sopenharmony_ci				  to_addr_conv(sh, percpu, j));
20528c2ecf20Sopenharmony_ci	} else {
20538c2ecf20Sopenharmony_ci		flags = prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST;
20548c2ecf20Sopenharmony_ci		init_async_submit(&submit, flags, tx, NULL, NULL,
20558c2ecf20Sopenharmony_ci				  to_addr_conv(sh, percpu, j));
20568c2ecf20Sopenharmony_ci	}
20578c2ecf20Sopenharmony_ci
20588c2ecf20Sopenharmony_ci	if (unlikely(count == 1))
20598c2ecf20Sopenharmony_ci		tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0],
20608c2ecf20Sopenharmony_ci				RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
20618c2ecf20Sopenharmony_ci	else
20628c2ecf20Sopenharmony_ci		tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count,
20638c2ecf20Sopenharmony_ci				RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
20648c2ecf20Sopenharmony_ci	if (!last_stripe) {
20658c2ecf20Sopenharmony_ci		j++;
20668c2ecf20Sopenharmony_ci		sh = list_first_entry(&sh->batch_list, struct stripe_head,
20678c2ecf20Sopenharmony_ci				      batch_list);
20688c2ecf20Sopenharmony_ci		goto again;
20698c2ecf20Sopenharmony_ci	}
20708c2ecf20Sopenharmony_ci}
20718c2ecf20Sopenharmony_ci
20728c2ecf20Sopenharmony_cistatic void
20738c2ecf20Sopenharmony_ciops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
20748c2ecf20Sopenharmony_ci		     struct dma_async_tx_descriptor *tx)
20758c2ecf20Sopenharmony_ci{
20768c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
20778c2ecf20Sopenharmony_ci	struct page **blocks;
20788c2ecf20Sopenharmony_ci	unsigned int *offs;
20798c2ecf20Sopenharmony_ci	int count, i, j = 0;
20808c2ecf20Sopenharmony_ci	struct stripe_head *head_sh = sh;
20818c2ecf20Sopenharmony_ci	int last_stripe;
20828c2ecf20Sopenharmony_ci	int synflags;
20838c2ecf20Sopenharmony_ci	unsigned long txflags;
20848c2ecf20Sopenharmony_ci
20858c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
20868c2ecf20Sopenharmony_ci
20878c2ecf20Sopenharmony_ci	for (i = 0; i < sh->disks; i++) {
20888c2ecf20Sopenharmony_ci		if (sh->pd_idx == i || sh->qd_idx == i)
20898c2ecf20Sopenharmony_ci			continue;
20908c2ecf20Sopenharmony_ci		if (!test_bit(R5_Discard, &sh->dev[i].flags))
20918c2ecf20Sopenharmony_ci			break;
20928c2ecf20Sopenharmony_ci	}
20938c2ecf20Sopenharmony_ci	if (i >= sh->disks) {
20948c2ecf20Sopenharmony_ci		atomic_inc(&sh->count);
20958c2ecf20Sopenharmony_ci		set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
20968c2ecf20Sopenharmony_ci		set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
20978c2ecf20Sopenharmony_ci		ops_complete_reconstruct(sh);
20988c2ecf20Sopenharmony_ci		return;
20998c2ecf20Sopenharmony_ci	}
21008c2ecf20Sopenharmony_ci
21018c2ecf20Sopenharmony_ciagain:
21028c2ecf20Sopenharmony_ci	blocks = to_addr_page(percpu, j);
21038c2ecf20Sopenharmony_ci	offs = to_addr_offs(sh, percpu);
21048c2ecf20Sopenharmony_ci
21058c2ecf20Sopenharmony_ci	if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
21068c2ecf20Sopenharmony_ci		synflags = SYNDROME_SRC_WRITTEN;
21078c2ecf20Sopenharmony_ci		txflags = ASYNC_TX_ACK | ASYNC_TX_PQ_XOR_DST;
21088c2ecf20Sopenharmony_ci	} else {
21098c2ecf20Sopenharmony_ci		synflags = SYNDROME_SRC_ALL;
21108c2ecf20Sopenharmony_ci		txflags = ASYNC_TX_ACK;
21118c2ecf20Sopenharmony_ci	}
21128c2ecf20Sopenharmony_ci
21138c2ecf20Sopenharmony_ci	count = set_syndrome_sources(blocks, offs, sh, synflags);
21148c2ecf20Sopenharmony_ci	last_stripe = !head_sh->batch_head ||
21158c2ecf20Sopenharmony_ci		list_first_entry(&sh->batch_list,
21168c2ecf20Sopenharmony_ci				 struct stripe_head, batch_list) == head_sh;
21178c2ecf20Sopenharmony_ci
21188c2ecf20Sopenharmony_ci	if (last_stripe) {
21198c2ecf20Sopenharmony_ci		atomic_inc(&head_sh->count);
21208c2ecf20Sopenharmony_ci		init_async_submit(&submit, txflags, tx, ops_complete_reconstruct,
21218c2ecf20Sopenharmony_ci				  head_sh, to_addr_conv(sh, percpu, j));
21228c2ecf20Sopenharmony_ci	} else
21238c2ecf20Sopenharmony_ci		init_async_submit(&submit, 0, tx, NULL, NULL,
21248c2ecf20Sopenharmony_ci				  to_addr_conv(sh, percpu, j));
21258c2ecf20Sopenharmony_ci	tx = async_gen_syndrome(blocks, offs, count+2,
21268c2ecf20Sopenharmony_ci			RAID5_STRIPE_SIZE(sh->raid_conf),  &submit);
21278c2ecf20Sopenharmony_ci	if (!last_stripe) {
21288c2ecf20Sopenharmony_ci		j++;
21298c2ecf20Sopenharmony_ci		sh = list_first_entry(&sh->batch_list, struct stripe_head,
21308c2ecf20Sopenharmony_ci				      batch_list);
21318c2ecf20Sopenharmony_ci		goto again;
21328c2ecf20Sopenharmony_ci	}
21338c2ecf20Sopenharmony_ci}
21348c2ecf20Sopenharmony_ci
21358c2ecf20Sopenharmony_cistatic void ops_complete_check(void *stripe_head_ref)
21368c2ecf20Sopenharmony_ci{
21378c2ecf20Sopenharmony_ci	struct stripe_head *sh = stripe_head_ref;
21388c2ecf20Sopenharmony_ci
21398c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__,
21408c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
21418c2ecf20Sopenharmony_ci
21428c2ecf20Sopenharmony_ci	sh->check_state = check_state_check_result;
21438c2ecf20Sopenharmony_ci	set_bit(STRIPE_HANDLE, &sh->state);
21448c2ecf20Sopenharmony_ci	raid5_release_stripe(sh);
21458c2ecf20Sopenharmony_ci}
21468c2ecf20Sopenharmony_ci
21478c2ecf20Sopenharmony_cistatic void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
21488c2ecf20Sopenharmony_ci{
21498c2ecf20Sopenharmony_ci	int disks = sh->disks;
21508c2ecf20Sopenharmony_ci	int pd_idx = sh->pd_idx;
21518c2ecf20Sopenharmony_ci	int qd_idx = sh->qd_idx;
21528c2ecf20Sopenharmony_ci	struct page *xor_dest;
21538c2ecf20Sopenharmony_ci	unsigned int off_dest;
21548c2ecf20Sopenharmony_ci	struct page **xor_srcs = to_addr_page(percpu, 0);
21558c2ecf20Sopenharmony_ci	unsigned int *off_srcs = to_addr_offs(sh, percpu);
21568c2ecf20Sopenharmony_ci	struct dma_async_tx_descriptor *tx;
21578c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
21588c2ecf20Sopenharmony_ci	int count;
21598c2ecf20Sopenharmony_ci	int i;
21608c2ecf20Sopenharmony_ci
21618c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu\n", __func__,
21628c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
21638c2ecf20Sopenharmony_ci
21648c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
21658c2ecf20Sopenharmony_ci	count = 0;
21668c2ecf20Sopenharmony_ci	xor_dest = sh->dev[pd_idx].page;
21678c2ecf20Sopenharmony_ci	off_dest = sh->dev[pd_idx].offset;
21688c2ecf20Sopenharmony_ci	off_srcs[count] = off_dest;
21698c2ecf20Sopenharmony_ci	xor_srcs[count++] = xor_dest;
21708c2ecf20Sopenharmony_ci	for (i = disks; i--; ) {
21718c2ecf20Sopenharmony_ci		if (i == pd_idx || i == qd_idx)
21728c2ecf20Sopenharmony_ci			continue;
21738c2ecf20Sopenharmony_ci		off_srcs[count] = sh->dev[i].offset;
21748c2ecf20Sopenharmony_ci		xor_srcs[count++] = sh->dev[i].page;
21758c2ecf20Sopenharmony_ci	}
21768c2ecf20Sopenharmony_ci
21778c2ecf20Sopenharmony_ci	init_async_submit(&submit, 0, NULL, NULL, NULL,
21788c2ecf20Sopenharmony_ci			  to_addr_conv(sh, percpu, 0));
21798c2ecf20Sopenharmony_ci	tx = async_xor_val_offs(xor_dest, off_dest, xor_srcs, off_srcs, count,
21808c2ecf20Sopenharmony_ci			   RAID5_STRIPE_SIZE(sh->raid_conf),
21818c2ecf20Sopenharmony_ci			   &sh->ops.zero_sum_result, &submit);
21828c2ecf20Sopenharmony_ci
21838c2ecf20Sopenharmony_ci	atomic_inc(&sh->count);
21848c2ecf20Sopenharmony_ci	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
21858c2ecf20Sopenharmony_ci	tx = async_trigger_callback(&submit);
21868c2ecf20Sopenharmony_ci}
21878c2ecf20Sopenharmony_ci
21888c2ecf20Sopenharmony_cistatic void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
21898c2ecf20Sopenharmony_ci{
21908c2ecf20Sopenharmony_ci	struct page **srcs = to_addr_page(percpu, 0);
21918c2ecf20Sopenharmony_ci	unsigned int *offs = to_addr_offs(sh, percpu);
21928c2ecf20Sopenharmony_ci	struct async_submit_ctl submit;
21938c2ecf20Sopenharmony_ci	int count;
21948c2ecf20Sopenharmony_ci
21958c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu checkp: %d\n", __func__,
21968c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector, checkp);
21978c2ecf20Sopenharmony_ci
21988c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
21998c2ecf20Sopenharmony_ci	count = set_syndrome_sources(srcs, offs, sh, SYNDROME_SRC_ALL);
22008c2ecf20Sopenharmony_ci	if (!checkp)
22018c2ecf20Sopenharmony_ci		srcs[count] = NULL;
22028c2ecf20Sopenharmony_ci
22038c2ecf20Sopenharmony_ci	atomic_inc(&sh->count);
22048c2ecf20Sopenharmony_ci	init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
22058c2ecf20Sopenharmony_ci			  sh, to_addr_conv(sh, percpu, 0));
22068c2ecf20Sopenharmony_ci	async_syndrome_val(srcs, offs, count+2,
22078c2ecf20Sopenharmony_ci			   RAID5_STRIPE_SIZE(sh->raid_conf),
22088c2ecf20Sopenharmony_ci			   &sh->ops.zero_sum_result, percpu->spare_page, 0, &submit);
22098c2ecf20Sopenharmony_ci}
22108c2ecf20Sopenharmony_ci
22118c2ecf20Sopenharmony_cistatic void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
22128c2ecf20Sopenharmony_ci{
22138c2ecf20Sopenharmony_ci	int overlap_clear = 0, i, disks = sh->disks;
22148c2ecf20Sopenharmony_ci	struct dma_async_tx_descriptor *tx = NULL;
22158c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
22168c2ecf20Sopenharmony_ci	int level = conf->level;
22178c2ecf20Sopenharmony_ci	struct raid5_percpu *percpu;
22188c2ecf20Sopenharmony_ci	unsigned long cpu;
22198c2ecf20Sopenharmony_ci
22208c2ecf20Sopenharmony_ci	cpu = get_cpu();
22218c2ecf20Sopenharmony_ci	percpu = per_cpu_ptr(conf->percpu, cpu);
22228c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
22238c2ecf20Sopenharmony_ci		ops_run_biofill(sh);
22248c2ecf20Sopenharmony_ci		overlap_clear++;
22258c2ecf20Sopenharmony_ci	}
22268c2ecf20Sopenharmony_ci
22278c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
22288c2ecf20Sopenharmony_ci		if (level < 6)
22298c2ecf20Sopenharmony_ci			tx = ops_run_compute5(sh, percpu);
22308c2ecf20Sopenharmony_ci		else {
22318c2ecf20Sopenharmony_ci			if (sh->ops.target2 < 0 || sh->ops.target < 0)
22328c2ecf20Sopenharmony_ci				tx = ops_run_compute6_1(sh, percpu);
22338c2ecf20Sopenharmony_ci			else
22348c2ecf20Sopenharmony_ci				tx = ops_run_compute6_2(sh, percpu);
22358c2ecf20Sopenharmony_ci		}
22368c2ecf20Sopenharmony_ci		/* terminate the chain if reconstruct is not set to be run */
22378c2ecf20Sopenharmony_ci		if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
22388c2ecf20Sopenharmony_ci			async_tx_ack(tx);
22398c2ecf20Sopenharmony_ci	}
22408c2ecf20Sopenharmony_ci
22418c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_OP_PREXOR, &ops_request)) {
22428c2ecf20Sopenharmony_ci		if (level < 6)
22438c2ecf20Sopenharmony_ci			tx = ops_run_prexor5(sh, percpu, tx);
22448c2ecf20Sopenharmony_ci		else
22458c2ecf20Sopenharmony_ci			tx = ops_run_prexor6(sh, percpu, tx);
22468c2ecf20Sopenharmony_ci	}
22478c2ecf20Sopenharmony_ci
22488c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_OP_PARTIAL_PARITY, &ops_request))
22498c2ecf20Sopenharmony_ci		tx = ops_run_partial_parity(sh, percpu, tx);
22508c2ecf20Sopenharmony_ci
22518c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
22528c2ecf20Sopenharmony_ci		tx = ops_run_biodrain(sh, tx);
22538c2ecf20Sopenharmony_ci		overlap_clear++;
22548c2ecf20Sopenharmony_ci	}
22558c2ecf20Sopenharmony_ci
22568c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
22578c2ecf20Sopenharmony_ci		if (level < 6)
22588c2ecf20Sopenharmony_ci			ops_run_reconstruct5(sh, percpu, tx);
22598c2ecf20Sopenharmony_ci		else
22608c2ecf20Sopenharmony_ci			ops_run_reconstruct6(sh, percpu, tx);
22618c2ecf20Sopenharmony_ci	}
22628c2ecf20Sopenharmony_ci
22638c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
22648c2ecf20Sopenharmony_ci		if (sh->check_state == check_state_run)
22658c2ecf20Sopenharmony_ci			ops_run_check_p(sh, percpu);
22668c2ecf20Sopenharmony_ci		else if (sh->check_state == check_state_run_q)
22678c2ecf20Sopenharmony_ci			ops_run_check_pq(sh, percpu, 0);
22688c2ecf20Sopenharmony_ci		else if (sh->check_state == check_state_run_pq)
22698c2ecf20Sopenharmony_ci			ops_run_check_pq(sh, percpu, 1);
22708c2ecf20Sopenharmony_ci		else
22718c2ecf20Sopenharmony_ci			BUG();
22728c2ecf20Sopenharmony_ci	}
22738c2ecf20Sopenharmony_ci
22748c2ecf20Sopenharmony_ci	if (overlap_clear && !sh->batch_head)
22758c2ecf20Sopenharmony_ci		for (i = disks; i--; ) {
22768c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
22778c2ecf20Sopenharmony_ci			if (test_and_clear_bit(R5_Overlap, &dev->flags))
22788c2ecf20Sopenharmony_ci				wake_up(&sh->raid_conf->wait_for_overlap);
22798c2ecf20Sopenharmony_ci		}
22808c2ecf20Sopenharmony_ci	put_cpu();
22818c2ecf20Sopenharmony_ci}
22828c2ecf20Sopenharmony_ci
22838c2ecf20Sopenharmony_cistatic void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
22848c2ecf20Sopenharmony_ci{
22858c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
22868c2ecf20Sopenharmony_ci	kfree(sh->pages);
22878c2ecf20Sopenharmony_ci#endif
22888c2ecf20Sopenharmony_ci	if (sh->ppl_page)
22898c2ecf20Sopenharmony_ci		__free_page(sh->ppl_page);
22908c2ecf20Sopenharmony_ci	kmem_cache_free(sc, sh);
22918c2ecf20Sopenharmony_ci}
22928c2ecf20Sopenharmony_ci
22938c2ecf20Sopenharmony_cistatic struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
22948c2ecf20Sopenharmony_ci	int disks, struct r5conf *conf)
22958c2ecf20Sopenharmony_ci{
22968c2ecf20Sopenharmony_ci	struct stripe_head *sh;
22978c2ecf20Sopenharmony_ci	int i;
22988c2ecf20Sopenharmony_ci
22998c2ecf20Sopenharmony_ci	sh = kmem_cache_zalloc(sc, gfp);
23008c2ecf20Sopenharmony_ci	if (sh) {
23018c2ecf20Sopenharmony_ci		spin_lock_init(&sh->stripe_lock);
23028c2ecf20Sopenharmony_ci		spin_lock_init(&sh->batch_lock);
23038c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&sh->batch_list);
23048c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&sh->lru);
23058c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&sh->r5c);
23068c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&sh->log_list);
23078c2ecf20Sopenharmony_ci		atomic_set(&sh->count, 1);
23088c2ecf20Sopenharmony_ci		sh->raid_conf = conf;
23098c2ecf20Sopenharmony_ci		sh->log_start = MaxSector;
23108c2ecf20Sopenharmony_ci		for (i = 0; i < disks; i++) {
23118c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
23128c2ecf20Sopenharmony_ci
23138c2ecf20Sopenharmony_ci			bio_init(&dev->req, &dev->vec, 1);
23148c2ecf20Sopenharmony_ci			bio_init(&dev->rreq, &dev->rvec, 1);
23158c2ecf20Sopenharmony_ci		}
23168c2ecf20Sopenharmony_ci
23178c2ecf20Sopenharmony_ci		if (raid5_has_ppl(conf)) {
23188c2ecf20Sopenharmony_ci			sh->ppl_page = alloc_page(gfp);
23198c2ecf20Sopenharmony_ci			if (!sh->ppl_page) {
23208c2ecf20Sopenharmony_ci				free_stripe(sc, sh);
23218c2ecf20Sopenharmony_ci				return NULL;
23228c2ecf20Sopenharmony_ci			}
23238c2ecf20Sopenharmony_ci		}
23248c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
23258c2ecf20Sopenharmony_ci		if (init_stripe_shared_pages(sh, conf, disks)) {
23268c2ecf20Sopenharmony_ci			free_stripe(sc, sh);
23278c2ecf20Sopenharmony_ci			return NULL;
23288c2ecf20Sopenharmony_ci		}
23298c2ecf20Sopenharmony_ci#endif
23308c2ecf20Sopenharmony_ci	}
23318c2ecf20Sopenharmony_ci	return sh;
23328c2ecf20Sopenharmony_ci}
23338c2ecf20Sopenharmony_cistatic int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
23348c2ecf20Sopenharmony_ci{
23358c2ecf20Sopenharmony_ci	struct stripe_head *sh;
23368c2ecf20Sopenharmony_ci
23378c2ecf20Sopenharmony_ci	sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf);
23388c2ecf20Sopenharmony_ci	if (!sh)
23398c2ecf20Sopenharmony_ci		return 0;
23408c2ecf20Sopenharmony_ci
23418c2ecf20Sopenharmony_ci	if (grow_buffers(sh, gfp)) {
23428c2ecf20Sopenharmony_ci		shrink_buffers(sh);
23438c2ecf20Sopenharmony_ci		free_stripe(conf->slab_cache, sh);
23448c2ecf20Sopenharmony_ci		return 0;
23458c2ecf20Sopenharmony_ci	}
23468c2ecf20Sopenharmony_ci	sh->hash_lock_index =
23478c2ecf20Sopenharmony_ci		conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
23488c2ecf20Sopenharmony_ci	/* we just created an active stripe so... */
23498c2ecf20Sopenharmony_ci	atomic_inc(&conf->active_stripes);
23508c2ecf20Sopenharmony_ci
23518c2ecf20Sopenharmony_ci	raid5_release_stripe(sh);
23528c2ecf20Sopenharmony_ci	conf->max_nr_stripes++;
23538c2ecf20Sopenharmony_ci	return 1;
23548c2ecf20Sopenharmony_ci}
23558c2ecf20Sopenharmony_ci
23568c2ecf20Sopenharmony_cistatic int grow_stripes(struct r5conf *conf, int num)
23578c2ecf20Sopenharmony_ci{
23588c2ecf20Sopenharmony_ci	struct kmem_cache *sc;
23598c2ecf20Sopenharmony_ci	size_t namelen = sizeof(conf->cache_name[0]);
23608c2ecf20Sopenharmony_ci	int devs = max(conf->raid_disks, conf->previous_raid_disks);
23618c2ecf20Sopenharmony_ci
23628c2ecf20Sopenharmony_ci	if (conf->mddev->gendisk)
23638c2ecf20Sopenharmony_ci		snprintf(conf->cache_name[0], namelen,
23648c2ecf20Sopenharmony_ci			"raid%d-%s", conf->level, mdname(conf->mddev));
23658c2ecf20Sopenharmony_ci	else
23668c2ecf20Sopenharmony_ci		snprintf(conf->cache_name[0], namelen,
23678c2ecf20Sopenharmony_ci			"raid%d-%p", conf->level, conf->mddev);
23688c2ecf20Sopenharmony_ci	snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);
23698c2ecf20Sopenharmony_ci
23708c2ecf20Sopenharmony_ci	conf->active_name = 0;
23718c2ecf20Sopenharmony_ci	sc = kmem_cache_create(conf->cache_name[conf->active_name],
23728c2ecf20Sopenharmony_ci			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
23738c2ecf20Sopenharmony_ci			       0, 0, NULL);
23748c2ecf20Sopenharmony_ci	if (!sc)
23758c2ecf20Sopenharmony_ci		return 1;
23768c2ecf20Sopenharmony_ci	conf->slab_cache = sc;
23778c2ecf20Sopenharmony_ci	conf->pool_size = devs;
23788c2ecf20Sopenharmony_ci	while (num--)
23798c2ecf20Sopenharmony_ci		if (!grow_one_stripe(conf, GFP_KERNEL))
23808c2ecf20Sopenharmony_ci			return 1;
23818c2ecf20Sopenharmony_ci
23828c2ecf20Sopenharmony_ci	return 0;
23838c2ecf20Sopenharmony_ci}
23848c2ecf20Sopenharmony_ci
23858c2ecf20Sopenharmony_ci/**
23868c2ecf20Sopenharmony_ci * scribble_alloc - allocate percpu scribble buffer for required size
23878c2ecf20Sopenharmony_ci *		    of the scribble region
23888c2ecf20Sopenharmony_ci * @percpu: from for_each_present_cpu() of the caller
23898c2ecf20Sopenharmony_ci * @num: total number of disks in the array
23908c2ecf20Sopenharmony_ci * @cnt: scribble objs count for required size of the scribble region
23918c2ecf20Sopenharmony_ci *
23928c2ecf20Sopenharmony_ci * The scribble buffer size must be enough to contain:
23938c2ecf20Sopenharmony_ci * 1/ a struct page pointer for each device in the array +2
23948c2ecf20Sopenharmony_ci * 2/ room to convert each entry in (1) to its corresponding dma
23958c2ecf20Sopenharmony_ci *    (dma_map_page()) or page (page_address()) address.
23968c2ecf20Sopenharmony_ci *
23978c2ecf20Sopenharmony_ci * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
23988c2ecf20Sopenharmony_ci * calculate over all devices (not just the data blocks), using zeros in place
23998c2ecf20Sopenharmony_ci * of the P and Q blocks.
24008c2ecf20Sopenharmony_ci */
24018c2ecf20Sopenharmony_cistatic int scribble_alloc(struct raid5_percpu *percpu,
24028c2ecf20Sopenharmony_ci			  int num, int cnt)
24038c2ecf20Sopenharmony_ci{
24048c2ecf20Sopenharmony_ci	size_t obj_size =
24058c2ecf20Sopenharmony_ci		sizeof(struct page *) * (num + 2) +
24068c2ecf20Sopenharmony_ci		sizeof(addr_conv_t) * (num + 2) +
24078c2ecf20Sopenharmony_ci		sizeof(unsigned int) * (num + 2);
24088c2ecf20Sopenharmony_ci	void *scribble;
24098c2ecf20Sopenharmony_ci
24108c2ecf20Sopenharmony_ci	/*
24118c2ecf20Sopenharmony_ci	 * If here is in raid array suspend context, it is in memalloc noio
24128c2ecf20Sopenharmony_ci	 * context as well, there is no potential recursive memory reclaim
24138c2ecf20Sopenharmony_ci	 * I/Os with the GFP_KERNEL flag.
24148c2ecf20Sopenharmony_ci	 */
24158c2ecf20Sopenharmony_ci	scribble = kvmalloc_array(cnt, obj_size, GFP_KERNEL);
24168c2ecf20Sopenharmony_ci	if (!scribble)
24178c2ecf20Sopenharmony_ci		return -ENOMEM;
24188c2ecf20Sopenharmony_ci
24198c2ecf20Sopenharmony_ci	kvfree(percpu->scribble);
24208c2ecf20Sopenharmony_ci
24218c2ecf20Sopenharmony_ci	percpu->scribble = scribble;
24228c2ecf20Sopenharmony_ci	percpu->scribble_obj_size = obj_size;
24238c2ecf20Sopenharmony_ci	return 0;
24248c2ecf20Sopenharmony_ci}
24258c2ecf20Sopenharmony_ci
24268c2ecf20Sopenharmony_cistatic int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
24278c2ecf20Sopenharmony_ci{
24288c2ecf20Sopenharmony_ci	unsigned long cpu;
24298c2ecf20Sopenharmony_ci	int err = 0;
24308c2ecf20Sopenharmony_ci
24318c2ecf20Sopenharmony_ci	/*
24328c2ecf20Sopenharmony_ci	 * Never shrink. And mddev_suspend() could deadlock if this is called
24338c2ecf20Sopenharmony_ci	 * from raid5d. In that case, scribble_disks and scribble_sectors
24348c2ecf20Sopenharmony_ci	 * should equal to new_disks and new_sectors
24358c2ecf20Sopenharmony_ci	 */
24368c2ecf20Sopenharmony_ci	if (conf->scribble_disks >= new_disks &&
24378c2ecf20Sopenharmony_ci	    conf->scribble_sectors >= new_sectors)
24388c2ecf20Sopenharmony_ci		return 0;
24398c2ecf20Sopenharmony_ci	mddev_suspend(conf->mddev);
24408c2ecf20Sopenharmony_ci	get_online_cpus();
24418c2ecf20Sopenharmony_ci
24428c2ecf20Sopenharmony_ci	for_each_present_cpu(cpu) {
24438c2ecf20Sopenharmony_ci		struct raid5_percpu *percpu;
24448c2ecf20Sopenharmony_ci
24458c2ecf20Sopenharmony_ci		percpu = per_cpu_ptr(conf->percpu, cpu);
24468c2ecf20Sopenharmony_ci		err = scribble_alloc(percpu, new_disks,
24478c2ecf20Sopenharmony_ci				     new_sectors / RAID5_STRIPE_SECTORS(conf));
24488c2ecf20Sopenharmony_ci		if (err)
24498c2ecf20Sopenharmony_ci			break;
24508c2ecf20Sopenharmony_ci	}
24518c2ecf20Sopenharmony_ci
24528c2ecf20Sopenharmony_ci	put_online_cpus();
24538c2ecf20Sopenharmony_ci	mddev_resume(conf->mddev);
24548c2ecf20Sopenharmony_ci	if (!err) {
24558c2ecf20Sopenharmony_ci		conf->scribble_disks = new_disks;
24568c2ecf20Sopenharmony_ci		conf->scribble_sectors = new_sectors;
24578c2ecf20Sopenharmony_ci	}
24588c2ecf20Sopenharmony_ci	return err;
24598c2ecf20Sopenharmony_ci}
24608c2ecf20Sopenharmony_ci
24618c2ecf20Sopenharmony_cistatic int resize_stripes(struct r5conf *conf, int newsize)
24628c2ecf20Sopenharmony_ci{
24638c2ecf20Sopenharmony_ci	/* Make all the stripes able to hold 'newsize' devices.
24648c2ecf20Sopenharmony_ci	 * New slots in each stripe get 'page' set to a new page.
24658c2ecf20Sopenharmony_ci	 *
24668c2ecf20Sopenharmony_ci	 * This happens in stages:
24678c2ecf20Sopenharmony_ci	 * 1/ create a new kmem_cache and allocate the required number of
24688c2ecf20Sopenharmony_ci	 *    stripe_heads.
24698c2ecf20Sopenharmony_ci	 * 2/ gather all the old stripe_heads and transfer the pages across
24708c2ecf20Sopenharmony_ci	 *    to the new stripe_heads.  This will have the side effect of
24718c2ecf20Sopenharmony_ci	 *    freezing the array as once all stripe_heads have been collected,
24728c2ecf20Sopenharmony_ci	 *    no IO will be possible.  Old stripe heads are freed once their
24738c2ecf20Sopenharmony_ci	 *    pages have been transferred over, and the old kmem_cache is
24748c2ecf20Sopenharmony_ci	 *    freed when all stripes are done.
24758c2ecf20Sopenharmony_ci	 * 3/ reallocate conf->disks to be suitable bigger.  If this fails,
24768c2ecf20Sopenharmony_ci	 *    we simple return a failure status - no need to clean anything up.
24778c2ecf20Sopenharmony_ci	 * 4/ allocate new pages for the new slots in the new stripe_heads.
24788c2ecf20Sopenharmony_ci	 *    If this fails, we don't bother trying the shrink the
24798c2ecf20Sopenharmony_ci	 *    stripe_heads down again, we just leave them as they are.
24808c2ecf20Sopenharmony_ci	 *    As each stripe_head is processed the new one is released into
24818c2ecf20Sopenharmony_ci	 *    active service.
24828c2ecf20Sopenharmony_ci	 *
24838c2ecf20Sopenharmony_ci	 * Once step2 is started, we cannot afford to wait for a write,
24848c2ecf20Sopenharmony_ci	 * so we use GFP_NOIO allocations.
24858c2ecf20Sopenharmony_ci	 */
24868c2ecf20Sopenharmony_ci	struct stripe_head *osh, *nsh;
24878c2ecf20Sopenharmony_ci	LIST_HEAD(newstripes);
24888c2ecf20Sopenharmony_ci	struct disk_info *ndisks;
24898c2ecf20Sopenharmony_ci	int err = 0;
24908c2ecf20Sopenharmony_ci	struct kmem_cache *sc;
24918c2ecf20Sopenharmony_ci	int i;
24928c2ecf20Sopenharmony_ci	int hash, cnt;
24938c2ecf20Sopenharmony_ci
24948c2ecf20Sopenharmony_ci	md_allow_write(conf->mddev);
24958c2ecf20Sopenharmony_ci
24968c2ecf20Sopenharmony_ci	/* Step 1 */
24978c2ecf20Sopenharmony_ci	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
24988c2ecf20Sopenharmony_ci			       sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
24998c2ecf20Sopenharmony_ci			       0, 0, NULL);
25008c2ecf20Sopenharmony_ci	if (!sc)
25018c2ecf20Sopenharmony_ci		return -ENOMEM;
25028c2ecf20Sopenharmony_ci
25038c2ecf20Sopenharmony_ci	/* Need to ensure auto-resizing doesn't interfere */
25048c2ecf20Sopenharmony_ci	mutex_lock(&conf->cache_size_mutex);
25058c2ecf20Sopenharmony_ci
25068c2ecf20Sopenharmony_ci	for (i = conf->max_nr_stripes; i; i--) {
25078c2ecf20Sopenharmony_ci		nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf);
25088c2ecf20Sopenharmony_ci		if (!nsh)
25098c2ecf20Sopenharmony_ci			break;
25108c2ecf20Sopenharmony_ci
25118c2ecf20Sopenharmony_ci		list_add(&nsh->lru, &newstripes);
25128c2ecf20Sopenharmony_ci	}
25138c2ecf20Sopenharmony_ci	if (i) {
25148c2ecf20Sopenharmony_ci		/* didn't get enough, give up */
25158c2ecf20Sopenharmony_ci		while (!list_empty(&newstripes)) {
25168c2ecf20Sopenharmony_ci			nsh = list_entry(newstripes.next, struct stripe_head, lru);
25178c2ecf20Sopenharmony_ci			list_del(&nsh->lru);
25188c2ecf20Sopenharmony_ci			free_stripe(sc, nsh);
25198c2ecf20Sopenharmony_ci		}
25208c2ecf20Sopenharmony_ci		kmem_cache_destroy(sc);
25218c2ecf20Sopenharmony_ci		mutex_unlock(&conf->cache_size_mutex);
25228c2ecf20Sopenharmony_ci		return -ENOMEM;
25238c2ecf20Sopenharmony_ci	}
25248c2ecf20Sopenharmony_ci	/* Step 2 - Must use GFP_NOIO now.
25258c2ecf20Sopenharmony_ci	 * OK, we have enough stripes, start collecting inactive
25268c2ecf20Sopenharmony_ci	 * stripes and copying them over
25278c2ecf20Sopenharmony_ci	 */
25288c2ecf20Sopenharmony_ci	hash = 0;
25298c2ecf20Sopenharmony_ci	cnt = 0;
25308c2ecf20Sopenharmony_ci	list_for_each_entry(nsh, &newstripes, lru) {
25318c2ecf20Sopenharmony_ci		lock_device_hash_lock(conf, hash);
25328c2ecf20Sopenharmony_ci		wait_event_cmd(conf->wait_for_stripe,
25338c2ecf20Sopenharmony_ci				    !list_empty(conf->inactive_list + hash),
25348c2ecf20Sopenharmony_ci				    unlock_device_hash_lock(conf, hash),
25358c2ecf20Sopenharmony_ci				    lock_device_hash_lock(conf, hash));
25368c2ecf20Sopenharmony_ci		osh = get_free_stripe(conf, hash);
25378c2ecf20Sopenharmony_ci		unlock_device_hash_lock(conf, hash);
25388c2ecf20Sopenharmony_ci
25398c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
25408c2ecf20Sopenharmony_ci	for (i = 0; i < osh->nr_pages; i++) {
25418c2ecf20Sopenharmony_ci		nsh->pages[i] = osh->pages[i];
25428c2ecf20Sopenharmony_ci		osh->pages[i] = NULL;
25438c2ecf20Sopenharmony_ci	}
25448c2ecf20Sopenharmony_ci#endif
25458c2ecf20Sopenharmony_ci		for(i=0; i<conf->pool_size; i++) {
25468c2ecf20Sopenharmony_ci			nsh->dev[i].page = osh->dev[i].page;
25478c2ecf20Sopenharmony_ci			nsh->dev[i].orig_page = osh->dev[i].page;
25488c2ecf20Sopenharmony_ci			nsh->dev[i].offset = osh->dev[i].offset;
25498c2ecf20Sopenharmony_ci		}
25508c2ecf20Sopenharmony_ci		nsh->hash_lock_index = hash;
25518c2ecf20Sopenharmony_ci		free_stripe(conf->slab_cache, osh);
25528c2ecf20Sopenharmony_ci		cnt++;
25538c2ecf20Sopenharmony_ci		if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
25548c2ecf20Sopenharmony_ci		    !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
25558c2ecf20Sopenharmony_ci			hash++;
25568c2ecf20Sopenharmony_ci			cnt = 0;
25578c2ecf20Sopenharmony_ci		}
25588c2ecf20Sopenharmony_ci	}
25598c2ecf20Sopenharmony_ci	kmem_cache_destroy(conf->slab_cache);
25608c2ecf20Sopenharmony_ci
25618c2ecf20Sopenharmony_ci	/* Step 3.
25628c2ecf20Sopenharmony_ci	 * At this point, we are holding all the stripes so the array
25638c2ecf20Sopenharmony_ci	 * is completely stalled, so now is a good time to resize
25648c2ecf20Sopenharmony_ci	 * conf->disks and the scribble region
25658c2ecf20Sopenharmony_ci	 */
25668c2ecf20Sopenharmony_ci	ndisks = kcalloc(newsize, sizeof(struct disk_info), GFP_NOIO);
25678c2ecf20Sopenharmony_ci	if (ndisks) {
25688c2ecf20Sopenharmony_ci		for (i = 0; i < conf->pool_size; i++)
25698c2ecf20Sopenharmony_ci			ndisks[i] = conf->disks[i];
25708c2ecf20Sopenharmony_ci
25718c2ecf20Sopenharmony_ci		for (i = conf->pool_size; i < newsize; i++) {
25728c2ecf20Sopenharmony_ci			ndisks[i].extra_page = alloc_page(GFP_NOIO);
25738c2ecf20Sopenharmony_ci			if (!ndisks[i].extra_page)
25748c2ecf20Sopenharmony_ci				err = -ENOMEM;
25758c2ecf20Sopenharmony_ci		}
25768c2ecf20Sopenharmony_ci
25778c2ecf20Sopenharmony_ci		if (err) {
25788c2ecf20Sopenharmony_ci			for (i = conf->pool_size; i < newsize; i++)
25798c2ecf20Sopenharmony_ci				if (ndisks[i].extra_page)
25808c2ecf20Sopenharmony_ci					put_page(ndisks[i].extra_page);
25818c2ecf20Sopenharmony_ci			kfree(ndisks);
25828c2ecf20Sopenharmony_ci		} else {
25838c2ecf20Sopenharmony_ci			kfree(conf->disks);
25848c2ecf20Sopenharmony_ci			conf->disks = ndisks;
25858c2ecf20Sopenharmony_ci		}
25868c2ecf20Sopenharmony_ci	} else
25878c2ecf20Sopenharmony_ci		err = -ENOMEM;
25888c2ecf20Sopenharmony_ci
25898c2ecf20Sopenharmony_ci	conf->slab_cache = sc;
25908c2ecf20Sopenharmony_ci	conf->active_name = 1-conf->active_name;
25918c2ecf20Sopenharmony_ci
25928c2ecf20Sopenharmony_ci	/* Step 4, return new stripes to service */
25938c2ecf20Sopenharmony_ci	while(!list_empty(&newstripes)) {
25948c2ecf20Sopenharmony_ci		nsh = list_entry(newstripes.next, struct stripe_head, lru);
25958c2ecf20Sopenharmony_ci		list_del_init(&nsh->lru);
25968c2ecf20Sopenharmony_ci
25978c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
25988c2ecf20Sopenharmony_ci		for (i = 0; i < nsh->nr_pages; i++) {
25998c2ecf20Sopenharmony_ci			if (nsh->pages[i])
26008c2ecf20Sopenharmony_ci				continue;
26018c2ecf20Sopenharmony_ci			nsh->pages[i] = alloc_page(GFP_NOIO);
26028c2ecf20Sopenharmony_ci			if (!nsh->pages[i])
26038c2ecf20Sopenharmony_ci				err = -ENOMEM;
26048c2ecf20Sopenharmony_ci		}
26058c2ecf20Sopenharmony_ci
26068c2ecf20Sopenharmony_ci		for (i = conf->raid_disks; i < newsize; i++) {
26078c2ecf20Sopenharmony_ci			if (nsh->dev[i].page)
26088c2ecf20Sopenharmony_ci				continue;
26098c2ecf20Sopenharmony_ci			nsh->dev[i].page = raid5_get_dev_page(nsh, i);
26108c2ecf20Sopenharmony_ci			nsh->dev[i].orig_page = nsh->dev[i].page;
26118c2ecf20Sopenharmony_ci			nsh->dev[i].offset = raid5_get_page_offset(nsh, i);
26128c2ecf20Sopenharmony_ci		}
26138c2ecf20Sopenharmony_ci#else
26148c2ecf20Sopenharmony_ci		for (i=conf->raid_disks; i < newsize; i++)
26158c2ecf20Sopenharmony_ci			if (nsh->dev[i].page == NULL) {
26168c2ecf20Sopenharmony_ci				struct page *p = alloc_page(GFP_NOIO);
26178c2ecf20Sopenharmony_ci				nsh->dev[i].page = p;
26188c2ecf20Sopenharmony_ci				nsh->dev[i].orig_page = p;
26198c2ecf20Sopenharmony_ci				nsh->dev[i].offset = 0;
26208c2ecf20Sopenharmony_ci				if (!p)
26218c2ecf20Sopenharmony_ci					err = -ENOMEM;
26228c2ecf20Sopenharmony_ci			}
26238c2ecf20Sopenharmony_ci#endif
26248c2ecf20Sopenharmony_ci		raid5_release_stripe(nsh);
26258c2ecf20Sopenharmony_ci	}
26268c2ecf20Sopenharmony_ci	/* critical section pass, GFP_NOIO no longer needed */
26278c2ecf20Sopenharmony_ci
26288c2ecf20Sopenharmony_ci	if (!err)
26298c2ecf20Sopenharmony_ci		conf->pool_size = newsize;
26308c2ecf20Sopenharmony_ci	mutex_unlock(&conf->cache_size_mutex);
26318c2ecf20Sopenharmony_ci
26328c2ecf20Sopenharmony_ci	return err;
26338c2ecf20Sopenharmony_ci}
26348c2ecf20Sopenharmony_ci
26358c2ecf20Sopenharmony_cistatic int drop_one_stripe(struct r5conf *conf)
26368c2ecf20Sopenharmony_ci{
26378c2ecf20Sopenharmony_ci	struct stripe_head *sh;
26388c2ecf20Sopenharmony_ci	int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK;
26398c2ecf20Sopenharmony_ci
26408c2ecf20Sopenharmony_ci	spin_lock_irq(conf->hash_locks + hash);
26418c2ecf20Sopenharmony_ci	sh = get_free_stripe(conf, hash);
26428c2ecf20Sopenharmony_ci	spin_unlock_irq(conf->hash_locks + hash);
26438c2ecf20Sopenharmony_ci	if (!sh)
26448c2ecf20Sopenharmony_ci		return 0;
26458c2ecf20Sopenharmony_ci	BUG_ON(atomic_read(&sh->count));
26468c2ecf20Sopenharmony_ci	shrink_buffers(sh);
26478c2ecf20Sopenharmony_ci	free_stripe(conf->slab_cache, sh);
26488c2ecf20Sopenharmony_ci	atomic_dec(&conf->active_stripes);
26498c2ecf20Sopenharmony_ci	conf->max_nr_stripes--;
26508c2ecf20Sopenharmony_ci	return 1;
26518c2ecf20Sopenharmony_ci}
26528c2ecf20Sopenharmony_ci
26538c2ecf20Sopenharmony_cistatic void shrink_stripes(struct r5conf *conf)
26548c2ecf20Sopenharmony_ci{
26558c2ecf20Sopenharmony_ci	while (conf->max_nr_stripes &&
26568c2ecf20Sopenharmony_ci	       drop_one_stripe(conf))
26578c2ecf20Sopenharmony_ci		;
26588c2ecf20Sopenharmony_ci
26598c2ecf20Sopenharmony_ci	kmem_cache_destroy(conf->slab_cache);
26608c2ecf20Sopenharmony_ci	conf->slab_cache = NULL;
26618c2ecf20Sopenharmony_ci}
26628c2ecf20Sopenharmony_ci
26638c2ecf20Sopenharmony_cistatic void raid5_end_read_request(struct bio * bi)
26648c2ecf20Sopenharmony_ci{
26658c2ecf20Sopenharmony_ci	struct stripe_head *sh = bi->bi_private;
26668c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
26678c2ecf20Sopenharmony_ci	int disks = sh->disks, i;
26688c2ecf20Sopenharmony_ci	char b[BDEVNAME_SIZE];
26698c2ecf20Sopenharmony_ci	struct md_rdev *rdev = NULL;
26708c2ecf20Sopenharmony_ci	sector_t s;
26718c2ecf20Sopenharmony_ci
26728c2ecf20Sopenharmony_ci	for (i=0 ; i<disks; i++)
26738c2ecf20Sopenharmony_ci		if (bi == &sh->dev[i].req)
26748c2ecf20Sopenharmony_ci			break;
26758c2ecf20Sopenharmony_ci
26768c2ecf20Sopenharmony_ci	pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
26778c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
26788c2ecf20Sopenharmony_ci		bi->bi_status);
26798c2ecf20Sopenharmony_ci	if (i == disks) {
26808c2ecf20Sopenharmony_ci		bio_reset(bi);
26818c2ecf20Sopenharmony_ci		BUG();
26828c2ecf20Sopenharmony_ci		return;
26838c2ecf20Sopenharmony_ci	}
26848c2ecf20Sopenharmony_ci	if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
26858c2ecf20Sopenharmony_ci		/* If replacement finished while this request was outstanding,
26868c2ecf20Sopenharmony_ci		 * 'replacement' might be NULL already.
26878c2ecf20Sopenharmony_ci		 * In that case it moved down to 'rdev'.
26888c2ecf20Sopenharmony_ci		 * rdev is not removed until all requests are finished.
26898c2ecf20Sopenharmony_ci		 */
26908c2ecf20Sopenharmony_ci		rdev = conf->disks[i].replacement;
26918c2ecf20Sopenharmony_ci	if (!rdev)
26928c2ecf20Sopenharmony_ci		rdev = conf->disks[i].rdev;
26938c2ecf20Sopenharmony_ci
26948c2ecf20Sopenharmony_ci	if (use_new_offset(conf, sh))
26958c2ecf20Sopenharmony_ci		s = sh->sector + rdev->new_data_offset;
26968c2ecf20Sopenharmony_ci	else
26978c2ecf20Sopenharmony_ci		s = sh->sector + rdev->data_offset;
26988c2ecf20Sopenharmony_ci	if (!bi->bi_status) {
26998c2ecf20Sopenharmony_ci		set_bit(R5_UPTODATE, &sh->dev[i].flags);
27008c2ecf20Sopenharmony_ci		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
27018c2ecf20Sopenharmony_ci			/* Note that this cannot happen on a
27028c2ecf20Sopenharmony_ci			 * replacement device.  We just fail those on
27038c2ecf20Sopenharmony_ci			 * any error
27048c2ecf20Sopenharmony_ci			 */
27058c2ecf20Sopenharmony_ci			pr_info_ratelimited(
27068c2ecf20Sopenharmony_ci				"md/raid:%s: read error corrected (%lu sectors at %llu on %s)\n",
27078c2ecf20Sopenharmony_ci				mdname(conf->mddev), RAID5_STRIPE_SECTORS(conf),
27088c2ecf20Sopenharmony_ci				(unsigned long long)s,
27098c2ecf20Sopenharmony_ci				bdevname(rdev->bdev, b));
27108c2ecf20Sopenharmony_ci			atomic_add(RAID5_STRIPE_SECTORS(conf), &rdev->corrected_errors);
27118c2ecf20Sopenharmony_ci			clear_bit(R5_ReadError, &sh->dev[i].flags);
27128c2ecf20Sopenharmony_ci			clear_bit(R5_ReWrite, &sh->dev[i].flags);
27138c2ecf20Sopenharmony_ci		} else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
27148c2ecf20Sopenharmony_ci			clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
27158c2ecf20Sopenharmony_ci
27168c2ecf20Sopenharmony_ci		if (test_bit(R5_InJournal, &sh->dev[i].flags))
27178c2ecf20Sopenharmony_ci			/*
27188c2ecf20Sopenharmony_ci			 * end read for a page in journal, this
27198c2ecf20Sopenharmony_ci			 * must be preparing for prexor in rmw
27208c2ecf20Sopenharmony_ci			 */
27218c2ecf20Sopenharmony_ci			set_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
27228c2ecf20Sopenharmony_ci
27238c2ecf20Sopenharmony_ci		if (atomic_read(&rdev->read_errors))
27248c2ecf20Sopenharmony_ci			atomic_set(&rdev->read_errors, 0);
27258c2ecf20Sopenharmony_ci	} else {
27268c2ecf20Sopenharmony_ci		const char *bdn = bdevname(rdev->bdev, b);
27278c2ecf20Sopenharmony_ci		int retry = 0;
27288c2ecf20Sopenharmony_ci		int set_bad = 0;
27298c2ecf20Sopenharmony_ci
27308c2ecf20Sopenharmony_ci		clear_bit(R5_UPTODATE, &sh->dev[i].flags);
27318c2ecf20Sopenharmony_ci		if (!(bi->bi_status == BLK_STS_PROTECTION))
27328c2ecf20Sopenharmony_ci			atomic_inc(&rdev->read_errors);
27338c2ecf20Sopenharmony_ci		if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
27348c2ecf20Sopenharmony_ci			pr_warn_ratelimited(
27358c2ecf20Sopenharmony_ci				"md/raid:%s: read error on replacement device (sector %llu on %s).\n",
27368c2ecf20Sopenharmony_ci				mdname(conf->mddev),
27378c2ecf20Sopenharmony_ci				(unsigned long long)s,
27388c2ecf20Sopenharmony_ci				bdn);
27398c2ecf20Sopenharmony_ci		else if (conf->mddev->degraded >= conf->max_degraded) {
27408c2ecf20Sopenharmony_ci			set_bad = 1;
27418c2ecf20Sopenharmony_ci			pr_warn_ratelimited(
27428c2ecf20Sopenharmony_ci				"md/raid:%s: read error not correctable (sector %llu on %s).\n",
27438c2ecf20Sopenharmony_ci				mdname(conf->mddev),
27448c2ecf20Sopenharmony_ci				(unsigned long long)s,
27458c2ecf20Sopenharmony_ci				bdn);
27468c2ecf20Sopenharmony_ci		} else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
27478c2ecf20Sopenharmony_ci			/* Oh, no!!! */
27488c2ecf20Sopenharmony_ci			set_bad = 1;
27498c2ecf20Sopenharmony_ci			pr_warn_ratelimited(
27508c2ecf20Sopenharmony_ci				"md/raid:%s: read error NOT corrected!! (sector %llu on %s).\n",
27518c2ecf20Sopenharmony_ci				mdname(conf->mddev),
27528c2ecf20Sopenharmony_ci				(unsigned long long)s,
27538c2ecf20Sopenharmony_ci				bdn);
27548c2ecf20Sopenharmony_ci		} else if (atomic_read(&rdev->read_errors)
27558c2ecf20Sopenharmony_ci			 > conf->max_nr_stripes) {
27568c2ecf20Sopenharmony_ci			if (!test_bit(Faulty, &rdev->flags)) {
27578c2ecf20Sopenharmony_ci				pr_warn("md/raid:%s: %d read_errors > %d stripes\n",
27588c2ecf20Sopenharmony_ci				    mdname(conf->mddev),
27598c2ecf20Sopenharmony_ci				    atomic_read(&rdev->read_errors),
27608c2ecf20Sopenharmony_ci				    conf->max_nr_stripes);
27618c2ecf20Sopenharmony_ci				pr_warn("md/raid:%s: Too many read errors, failing device %s.\n",
27628c2ecf20Sopenharmony_ci				    mdname(conf->mddev), bdn);
27638c2ecf20Sopenharmony_ci			}
27648c2ecf20Sopenharmony_ci		} else
27658c2ecf20Sopenharmony_ci			retry = 1;
27668c2ecf20Sopenharmony_ci		if (set_bad && test_bit(In_sync, &rdev->flags)
27678c2ecf20Sopenharmony_ci		    && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
27688c2ecf20Sopenharmony_ci			retry = 1;
27698c2ecf20Sopenharmony_ci		if (retry)
27708c2ecf20Sopenharmony_ci			if (sh->qd_idx >= 0 && sh->pd_idx == i)
27718c2ecf20Sopenharmony_ci				set_bit(R5_ReadError, &sh->dev[i].flags);
27728c2ecf20Sopenharmony_ci			else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
27738c2ecf20Sopenharmony_ci				set_bit(R5_ReadError, &sh->dev[i].flags);
27748c2ecf20Sopenharmony_ci				clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
27758c2ecf20Sopenharmony_ci			} else
27768c2ecf20Sopenharmony_ci				set_bit(R5_ReadNoMerge, &sh->dev[i].flags);
27778c2ecf20Sopenharmony_ci		else {
27788c2ecf20Sopenharmony_ci			clear_bit(R5_ReadError, &sh->dev[i].flags);
27798c2ecf20Sopenharmony_ci			clear_bit(R5_ReWrite, &sh->dev[i].flags);
27808c2ecf20Sopenharmony_ci			if (!(set_bad
27818c2ecf20Sopenharmony_ci			      && test_bit(In_sync, &rdev->flags)
27828c2ecf20Sopenharmony_ci			      && rdev_set_badblocks(
27838c2ecf20Sopenharmony_ci				      rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0)))
27848c2ecf20Sopenharmony_ci				md_error(conf->mddev, rdev);
27858c2ecf20Sopenharmony_ci		}
27868c2ecf20Sopenharmony_ci	}
27878c2ecf20Sopenharmony_ci	rdev_dec_pending(rdev, conf->mddev);
27888c2ecf20Sopenharmony_ci	bio_reset(bi);
27898c2ecf20Sopenharmony_ci	clear_bit(R5_LOCKED, &sh->dev[i].flags);
27908c2ecf20Sopenharmony_ci	set_bit(STRIPE_HANDLE, &sh->state);
27918c2ecf20Sopenharmony_ci	raid5_release_stripe(sh);
27928c2ecf20Sopenharmony_ci}
27938c2ecf20Sopenharmony_ci
27948c2ecf20Sopenharmony_cistatic void raid5_end_write_request(struct bio *bi)
27958c2ecf20Sopenharmony_ci{
27968c2ecf20Sopenharmony_ci	struct stripe_head *sh = bi->bi_private;
27978c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
27988c2ecf20Sopenharmony_ci	int disks = sh->disks, i;
27998c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
28008c2ecf20Sopenharmony_ci	sector_t first_bad;
28018c2ecf20Sopenharmony_ci	int bad_sectors;
28028c2ecf20Sopenharmony_ci	int replacement = 0;
28038c2ecf20Sopenharmony_ci
28048c2ecf20Sopenharmony_ci	for (i = 0 ; i < disks; i++) {
28058c2ecf20Sopenharmony_ci		if (bi == &sh->dev[i].req) {
28068c2ecf20Sopenharmony_ci			rdev = conf->disks[i].rdev;
28078c2ecf20Sopenharmony_ci			break;
28088c2ecf20Sopenharmony_ci		}
28098c2ecf20Sopenharmony_ci		if (bi == &sh->dev[i].rreq) {
28108c2ecf20Sopenharmony_ci			rdev = conf->disks[i].replacement;
28118c2ecf20Sopenharmony_ci			if (rdev)
28128c2ecf20Sopenharmony_ci				replacement = 1;
28138c2ecf20Sopenharmony_ci			else
28148c2ecf20Sopenharmony_ci				/* rdev was removed and 'replacement'
28158c2ecf20Sopenharmony_ci				 * replaced it.  rdev is not removed
28168c2ecf20Sopenharmony_ci				 * until all requests are finished.
28178c2ecf20Sopenharmony_ci				 */
28188c2ecf20Sopenharmony_ci				rdev = conf->disks[i].rdev;
28198c2ecf20Sopenharmony_ci			break;
28208c2ecf20Sopenharmony_ci		}
28218c2ecf20Sopenharmony_ci	}
28228c2ecf20Sopenharmony_ci	pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
28238c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
28248c2ecf20Sopenharmony_ci		bi->bi_status);
28258c2ecf20Sopenharmony_ci	if (i == disks) {
28268c2ecf20Sopenharmony_ci		bio_reset(bi);
28278c2ecf20Sopenharmony_ci		BUG();
28288c2ecf20Sopenharmony_ci		return;
28298c2ecf20Sopenharmony_ci	}
28308c2ecf20Sopenharmony_ci
28318c2ecf20Sopenharmony_ci	if (replacement) {
28328c2ecf20Sopenharmony_ci		if (bi->bi_status)
28338c2ecf20Sopenharmony_ci			md_error(conf->mddev, rdev);
28348c2ecf20Sopenharmony_ci		else if (is_badblock(rdev, sh->sector,
28358c2ecf20Sopenharmony_ci				     RAID5_STRIPE_SECTORS(conf),
28368c2ecf20Sopenharmony_ci				     &first_bad, &bad_sectors))
28378c2ecf20Sopenharmony_ci			set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
28388c2ecf20Sopenharmony_ci	} else {
28398c2ecf20Sopenharmony_ci		if (bi->bi_status) {
28408c2ecf20Sopenharmony_ci			set_bit(STRIPE_DEGRADED, &sh->state);
28418c2ecf20Sopenharmony_ci			set_bit(WriteErrorSeen, &rdev->flags);
28428c2ecf20Sopenharmony_ci			set_bit(R5_WriteError, &sh->dev[i].flags);
28438c2ecf20Sopenharmony_ci			if (!test_and_set_bit(WantReplacement, &rdev->flags))
28448c2ecf20Sopenharmony_ci				set_bit(MD_RECOVERY_NEEDED,
28458c2ecf20Sopenharmony_ci					&rdev->mddev->recovery);
28468c2ecf20Sopenharmony_ci		} else if (is_badblock(rdev, sh->sector,
28478c2ecf20Sopenharmony_ci				       RAID5_STRIPE_SECTORS(conf),
28488c2ecf20Sopenharmony_ci				       &first_bad, &bad_sectors)) {
28498c2ecf20Sopenharmony_ci			set_bit(R5_MadeGood, &sh->dev[i].flags);
28508c2ecf20Sopenharmony_ci			if (test_bit(R5_ReadError, &sh->dev[i].flags))
28518c2ecf20Sopenharmony_ci				/* That was a successful write so make
28528c2ecf20Sopenharmony_ci				 * sure it looks like we already did
28538c2ecf20Sopenharmony_ci				 * a re-write.
28548c2ecf20Sopenharmony_ci				 */
28558c2ecf20Sopenharmony_ci				set_bit(R5_ReWrite, &sh->dev[i].flags);
28568c2ecf20Sopenharmony_ci		}
28578c2ecf20Sopenharmony_ci	}
28588c2ecf20Sopenharmony_ci	rdev_dec_pending(rdev, conf->mddev);
28598c2ecf20Sopenharmony_ci
28608c2ecf20Sopenharmony_ci	if (sh->batch_head && bi->bi_status && !replacement)
28618c2ecf20Sopenharmony_ci		set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
28628c2ecf20Sopenharmony_ci
28638c2ecf20Sopenharmony_ci	bio_reset(bi);
28648c2ecf20Sopenharmony_ci	if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
28658c2ecf20Sopenharmony_ci		clear_bit(R5_LOCKED, &sh->dev[i].flags);
28668c2ecf20Sopenharmony_ci	set_bit(STRIPE_HANDLE, &sh->state);
28678c2ecf20Sopenharmony_ci
28688c2ecf20Sopenharmony_ci	if (sh->batch_head && sh != sh->batch_head)
28698c2ecf20Sopenharmony_ci		raid5_release_stripe(sh->batch_head);
28708c2ecf20Sopenharmony_ci	raid5_release_stripe(sh);
28718c2ecf20Sopenharmony_ci}
28728c2ecf20Sopenharmony_ci
28738c2ecf20Sopenharmony_cistatic void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
28748c2ecf20Sopenharmony_ci{
28758c2ecf20Sopenharmony_ci	char b[BDEVNAME_SIZE];
28768c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
28778c2ecf20Sopenharmony_ci	unsigned long flags;
28788c2ecf20Sopenharmony_ci	pr_debug("raid456: error called\n");
28798c2ecf20Sopenharmony_ci
28808c2ecf20Sopenharmony_ci	pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
28818c2ecf20Sopenharmony_ci		mdname(mddev), bdevname(rdev->bdev, b));
28828c2ecf20Sopenharmony_ci
28838c2ecf20Sopenharmony_ci	spin_lock_irqsave(&conf->device_lock, flags);
28848c2ecf20Sopenharmony_ci	set_bit(Faulty, &rdev->flags);
28858c2ecf20Sopenharmony_ci	clear_bit(In_sync, &rdev->flags);
28868c2ecf20Sopenharmony_ci	mddev->degraded = raid5_calc_degraded(conf);
28878c2ecf20Sopenharmony_ci
28888c2ecf20Sopenharmony_ci	if (has_failed(conf)) {
28898c2ecf20Sopenharmony_ci		set_bit(MD_BROKEN, &conf->mddev->flags);
28908c2ecf20Sopenharmony_ci		conf->recovery_disabled = mddev->recovery_disabled;
28918c2ecf20Sopenharmony_ci
28928c2ecf20Sopenharmony_ci		pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
28938c2ecf20Sopenharmony_ci			mdname(mddev), mddev->degraded, conf->raid_disks);
28948c2ecf20Sopenharmony_ci	} else {
28958c2ecf20Sopenharmony_ci		pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
28968c2ecf20Sopenharmony_ci			mdname(mddev), conf->raid_disks - mddev->degraded);
28978c2ecf20Sopenharmony_ci	}
28988c2ecf20Sopenharmony_ci
28998c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&conf->device_lock, flags);
29008c2ecf20Sopenharmony_ci	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
29018c2ecf20Sopenharmony_ci
29028c2ecf20Sopenharmony_ci	set_bit(Blocked, &rdev->flags);
29038c2ecf20Sopenharmony_ci	set_mask_bits(&mddev->sb_flags, 0,
29048c2ecf20Sopenharmony_ci		      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
29058c2ecf20Sopenharmony_ci	r5c_update_on_rdev_error(mddev, rdev);
29068c2ecf20Sopenharmony_ci}
29078c2ecf20Sopenharmony_ci
29088c2ecf20Sopenharmony_ci/*
29098c2ecf20Sopenharmony_ci * Input: a 'big' sector number,
29108c2ecf20Sopenharmony_ci * Output: index of the data and parity disk, and the sector # in them.
29118c2ecf20Sopenharmony_ci */
29128c2ecf20Sopenharmony_cisector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
29138c2ecf20Sopenharmony_ci			      int previous, int *dd_idx,
29148c2ecf20Sopenharmony_ci			      struct stripe_head *sh)
29158c2ecf20Sopenharmony_ci{
29168c2ecf20Sopenharmony_ci	sector_t stripe, stripe2;
29178c2ecf20Sopenharmony_ci	sector_t chunk_number;
29188c2ecf20Sopenharmony_ci	unsigned int chunk_offset;
29198c2ecf20Sopenharmony_ci	int pd_idx, qd_idx;
29208c2ecf20Sopenharmony_ci	int ddf_layout = 0;
29218c2ecf20Sopenharmony_ci	sector_t new_sector;
29228c2ecf20Sopenharmony_ci	int algorithm = previous ? conf->prev_algo
29238c2ecf20Sopenharmony_ci				 : conf->algorithm;
29248c2ecf20Sopenharmony_ci	int sectors_per_chunk = previous ? conf->prev_chunk_sectors
29258c2ecf20Sopenharmony_ci					 : conf->chunk_sectors;
29268c2ecf20Sopenharmony_ci	int raid_disks = previous ? conf->previous_raid_disks
29278c2ecf20Sopenharmony_ci				  : conf->raid_disks;
29288c2ecf20Sopenharmony_ci	int data_disks = raid_disks - conf->max_degraded;
29298c2ecf20Sopenharmony_ci
29308c2ecf20Sopenharmony_ci	/* First compute the information on this sector */
29318c2ecf20Sopenharmony_ci
29328c2ecf20Sopenharmony_ci	/*
29338c2ecf20Sopenharmony_ci	 * Compute the chunk number and the sector offset inside the chunk
29348c2ecf20Sopenharmony_ci	 */
29358c2ecf20Sopenharmony_ci	chunk_offset = sector_div(r_sector, sectors_per_chunk);
29368c2ecf20Sopenharmony_ci	chunk_number = r_sector;
29378c2ecf20Sopenharmony_ci
29388c2ecf20Sopenharmony_ci	/*
29398c2ecf20Sopenharmony_ci	 * Compute the stripe number
29408c2ecf20Sopenharmony_ci	 */
29418c2ecf20Sopenharmony_ci	stripe = chunk_number;
29428c2ecf20Sopenharmony_ci	*dd_idx = sector_div(stripe, data_disks);
29438c2ecf20Sopenharmony_ci	stripe2 = stripe;
29448c2ecf20Sopenharmony_ci	/*
29458c2ecf20Sopenharmony_ci	 * Select the parity disk based on the user selected algorithm.
29468c2ecf20Sopenharmony_ci	 */
29478c2ecf20Sopenharmony_ci	pd_idx = qd_idx = -1;
29488c2ecf20Sopenharmony_ci	switch(conf->level) {
29498c2ecf20Sopenharmony_ci	case 4:
29508c2ecf20Sopenharmony_ci		pd_idx = data_disks;
29518c2ecf20Sopenharmony_ci		break;
29528c2ecf20Sopenharmony_ci	case 5:
29538c2ecf20Sopenharmony_ci		switch (algorithm) {
29548c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_ASYMMETRIC:
29558c2ecf20Sopenharmony_ci			pd_idx = data_disks - sector_div(stripe2, raid_disks);
29568c2ecf20Sopenharmony_ci			if (*dd_idx >= pd_idx)
29578c2ecf20Sopenharmony_ci				(*dd_idx)++;
29588c2ecf20Sopenharmony_ci			break;
29598c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_ASYMMETRIC:
29608c2ecf20Sopenharmony_ci			pd_idx = sector_div(stripe2, raid_disks);
29618c2ecf20Sopenharmony_ci			if (*dd_idx >= pd_idx)
29628c2ecf20Sopenharmony_ci				(*dd_idx)++;
29638c2ecf20Sopenharmony_ci			break;
29648c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_SYMMETRIC:
29658c2ecf20Sopenharmony_ci			pd_idx = data_disks - sector_div(stripe2, raid_disks);
29668c2ecf20Sopenharmony_ci			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
29678c2ecf20Sopenharmony_ci			break;
29688c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_SYMMETRIC:
29698c2ecf20Sopenharmony_ci			pd_idx = sector_div(stripe2, raid_disks);
29708c2ecf20Sopenharmony_ci			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
29718c2ecf20Sopenharmony_ci			break;
29728c2ecf20Sopenharmony_ci		case ALGORITHM_PARITY_0:
29738c2ecf20Sopenharmony_ci			pd_idx = 0;
29748c2ecf20Sopenharmony_ci			(*dd_idx)++;
29758c2ecf20Sopenharmony_ci			break;
29768c2ecf20Sopenharmony_ci		case ALGORITHM_PARITY_N:
29778c2ecf20Sopenharmony_ci			pd_idx = data_disks;
29788c2ecf20Sopenharmony_ci			break;
29798c2ecf20Sopenharmony_ci		default:
29808c2ecf20Sopenharmony_ci			BUG();
29818c2ecf20Sopenharmony_ci		}
29828c2ecf20Sopenharmony_ci		break;
29838c2ecf20Sopenharmony_ci	case 6:
29848c2ecf20Sopenharmony_ci
29858c2ecf20Sopenharmony_ci		switch (algorithm) {
29868c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_ASYMMETRIC:
29878c2ecf20Sopenharmony_ci			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
29888c2ecf20Sopenharmony_ci			qd_idx = pd_idx + 1;
29898c2ecf20Sopenharmony_ci			if (pd_idx == raid_disks-1) {
29908c2ecf20Sopenharmony_ci				(*dd_idx)++;	/* Q D D D P */
29918c2ecf20Sopenharmony_ci				qd_idx = 0;
29928c2ecf20Sopenharmony_ci			} else if (*dd_idx >= pd_idx)
29938c2ecf20Sopenharmony_ci				(*dd_idx) += 2; /* D D P Q D */
29948c2ecf20Sopenharmony_ci			break;
29958c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_ASYMMETRIC:
29968c2ecf20Sopenharmony_ci			pd_idx = sector_div(stripe2, raid_disks);
29978c2ecf20Sopenharmony_ci			qd_idx = pd_idx + 1;
29988c2ecf20Sopenharmony_ci			if (pd_idx == raid_disks-1) {
29998c2ecf20Sopenharmony_ci				(*dd_idx)++;	/* Q D D D P */
30008c2ecf20Sopenharmony_ci				qd_idx = 0;
30018c2ecf20Sopenharmony_ci			} else if (*dd_idx >= pd_idx)
30028c2ecf20Sopenharmony_ci				(*dd_idx) += 2; /* D D P Q D */
30038c2ecf20Sopenharmony_ci			break;
30048c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_SYMMETRIC:
30058c2ecf20Sopenharmony_ci			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
30068c2ecf20Sopenharmony_ci			qd_idx = (pd_idx + 1) % raid_disks;
30078c2ecf20Sopenharmony_ci			*dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
30088c2ecf20Sopenharmony_ci			break;
30098c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_SYMMETRIC:
30108c2ecf20Sopenharmony_ci			pd_idx = sector_div(stripe2, raid_disks);
30118c2ecf20Sopenharmony_ci			qd_idx = (pd_idx + 1) % raid_disks;
30128c2ecf20Sopenharmony_ci			*dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
30138c2ecf20Sopenharmony_ci			break;
30148c2ecf20Sopenharmony_ci
30158c2ecf20Sopenharmony_ci		case ALGORITHM_PARITY_0:
30168c2ecf20Sopenharmony_ci			pd_idx = 0;
30178c2ecf20Sopenharmony_ci			qd_idx = 1;
30188c2ecf20Sopenharmony_ci			(*dd_idx) += 2;
30198c2ecf20Sopenharmony_ci			break;
30208c2ecf20Sopenharmony_ci		case ALGORITHM_PARITY_N:
30218c2ecf20Sopenharmony_ci			pd_idx = data_disks;
30228c2ecf20Sopenharmony_ci			qd_idx = data_disks + 1;
30238c2ecf20Sopenharmony_ci			break;
30248c2ecf20Sopenharmony_ci
30258c2ecf20Sopenharmony_ci		case ALGORITHM_ROTATING_ZERO_RESTART:
30268c2ecf20Sopenharmony_ci			/* Exactly the same as RIGHT_ASYMMETRIC, but or
30278c2ecf20Sopenharmony_ci			 * of blocks for computing Q is different.
30288c2ecf20Sopenharmony_ci			 */
30298c2ecf20Sopenharmony_ci			pd_idx = sector_div(stripe2, raid_disks);
30308c2ecf20Sopenharmony_ci			qd_idx = pd_idx + 1;
30318c2ecf20Sopenharmony_ci			if (pd_idx == raid_disks-1) {
30328c2ecf20Sopenharmony_ci				(*dd_idx)++;	/* Q D D D P */
30338c2ecf20Sopenharmony_ci				qd_idx = 0;
30348c2ecf20Sopenharmony_ci			} else if (*dd_idx >= pd_idx)
30358c2ecf20Sopenharmony_ci				(*dd_idx) += 2; /* D D P Q D */
30368c2ecf20Sopenharmony_ci			ddf_layout = 1;
30378c2ecf20Sopenharmony_ci			break;
30388c2ecf20Sopenharmony_ci
30398c2ecf20Sopenharmony_ci		case ALGORITHM_ROTATING_N_RESTART:
30408c2ecf20Sopenharmony_ci			/* Same a left_asymmetric, by first stripe is
30418c2ecf20Sopenharmony_ci			 * D D D P Q  rather than
30428c2ecf20Sopenharmony_ci			 * Q D D D P
30438c2ecf20Sopenharmony_ci			 */
30448c2ecf20Sopenharmony_ci			stripe2 += 1;
30458c2ecf20Sopenharmony_ci			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
30468c2ecf20Sopenharmony_ci			qd_idx = pd_idx + 1;
30478c2ecf20Sopenharmony_ci			if (pd_idx == raid_disks-1) {
30488c2ecf20Sopenharmony_ci				(*dd_idx)++;	/* Q D D D P */
30498c2ecf20Sopenharmony_ci				qd_idx = 0;
30508c2ecf20Sopenharmony_ci			} else if (*dd_idx >= pd_idx)
30518c2ecf20Sopenharmony_ci				(*dd_idx) += 2; /* D D P Q D */
30528c2ecf20Sopenharmony_ci			ddf_layout = 1;
30538c2ecf20Sopenharmony_ci			break;
30548c2ecf20Sopenharmony_ci
30558c2ecf20Sopenharmony_ci		case ALGORITHM_ROTATING_N_CONTINUE:
30568c2ecf20Sopenharmony_ci			/* Same as left_symmetric but Q is before P */
30578c2ecf20Sopenharmony_ci			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
30588c2ecf20Sopenharmony_ci			qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
30598c2ecf20Sopenharmony_ci			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
30608c2ecf20Sopenharmony_ci			ddf_layout = 1;
30618c2ecf20Sopenharmony_ci			break;
30628c2ecf20Sopenharmony_ci
30638c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_ASYMMETRIC_6:
30648c2ecf20Sopenharmony_ci			/* RAID5 left_asymmetric, with Q on last device */
30658c2ecf20Sopenharmony_ci			pd_idx = data_disks - sector_div(stripe2, raid_disks-1);
30668c2ecf20Sopenharmony_ci			if (*dd_idx >= pd_idx)
30678c2ecf20Sopenharmony_ci				(*dd_idx)++;
30688c2ecf20Sopenharmony_ci			qd_idx = raid_disks - 1;
30698c2ecf20Sopenharmony_ci			break;
30708c2ecf20Sopenharmony_ci
30718c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_ASYMMETRIC_6:
30728c2ecf20Sopenharmony_ci			pd_idx = sector_div(stripe2, raid_disks-1);
30738c2ecf20Sopenharmony_ci			if (*dd_idx >= pd_idx)
30748c2ecf20Sopenharmony_ci				(*dd_idx)++;
30758c2ecf20Sopenharmony_ci			qd_idx = raid_disks - 1;
30768c2ecf20Sopenharmony_ci			break;
30778c2ecf20Sopenharmony_ci
30788c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_SYMMETRIC_6:
30798c2ecf20Sopenharmony_ci			pd_idx = data_disks - sector_div(stripe2, raid_disks-1);
30808c2ecf20Sopenharmony_ci			*dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
30818c2ecf20Sopenharmony_ci			qd_idx = raid_disks - 1;
30828c2ecf20Sopenharmony_ci			break;
30838c2ecf20Sopenharmony_ci
30848c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_SYMMETRIC_6:
30858c2ecf20Sopenharmony_ci			pd_idx = sector_div(stripe2, raid_disks-1);
30868c2ecf20Sopenharmony_ci			*dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
30878c2ecf20Sopenharmony_ci			qd_idx = raid_disks - 1;
30888c2ecf20Sopenharmony_ci			break;
30898c2ecf20Sopenharmony_ci
30908c2ecf20Sopenharmony_ci		case ALGORITHM_PARITY_0_6:
30918c2ecf20Sopenharmony_ci			pd_idx = 0;
30928c2ecf20Sopenharmony_ci			(*dd_idx)++;
30938c2ecf20Sopenharmony_ci			qd_idx = raid_disks - 1;
30948c2ecf20Sopenharmony_ci			break;
30958c2ecf20Sopenharmony_ci
30968c2ecf20Sopenharmony_ci		default:
30978c2ecf20Sopenharmony_ci			BUG();
30988c2ecf20Sopenharmony_ci		}
30998c2ecf20Sopenharmony_ci		break;
31008c2ecf20Sopenharmony_ci	}
31018c2ecf20Sopenharmony_ci
31028c2ecf20Sopenharmony_ci	if (sh) {
31038c2ecf20Sopenharmony_ci		sh->pd_idx = pd_idx;
31048c2ecf20Sopenharmony_ci		sh->qd_idx = qd_idx;
31058c2ecf20Sopenharmony_ci		sh->ddf_layout = ddf_layout;
31068c2ecf20Sopenharmony_ci	}
31078c2ecf20Sopenharmony_ci	/*
31088c2ecf20Sopenharmony_ci	 * Finally, compute the new sector number
31098c2ecf20Sopenharmony_ci	 */
31108c2ecf20Sopenharmony_ci	new_sector = (sector_t)stripe * sectors_per_chunk + chunk_offset;
31118c2ecf20Sopenharmony_ci	return new_sector;
31128c2ecf20Sopenharmony_ci}
31138c2ecf20Sopenharmony_ci
31148c2ecf20Sopenharmony_cisector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
31158c2ecf20Sopenharmony_ci{
31168c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
31178c2ecf20Sopenharmony_ci	int raid_disks = sh->disks;
31188c2ecf20Sopenharmony_ci	int data_disks = raid_disks - conf->max_degraded;
31198c2ecf20Sopenharmony_ci	sector_t new_sector = sh->sector, check;
31208c2ecf20Sopenharmony_ci	int sectors_per_chunk = previous ? conf->prev_chunk_sectors
31218c2ecf20Sopenharmony_ci					 : conf->chunk_sectors;
31228c2ecf20Sopenharmony_ci	int algorithm = previous ? conf->prev_algo
31238c2ecf20Sopenharmony_ci				 : conf->algorithm;
31248c2ecf20Sopenharmony_ci	sector_t stripe;
31258c2ecf20Sopenharmony_ci	int chunk_offset;
31268c2ecf20Sopenharmony_ci	sector_t chunk_number;
31278c2ecf20Sopenharmony_ci	int dummy1, dd_idx = i;
31288c2ecf20Sopenharmony_ci	sector_t r_sector;
31298c2ecf20Sopenharmony_ci	struct stripe_head sh2;
31308c2ecf20Sopenharmony_ci
31318c2ecf20Sopenharmony_ci	chunk_offset = sector_div(new_sector, sectors_per_chunk);
31328c2ecf20Sopenharmony_ci	stripe = new_sector;
31338c2ecf20Sopenharmony_ci
31348c2ecf20Sopenharmony_ci	if (i == sh->pd_idx)
31358c2ecf20Sopenharmony_ci		return 0;
31368c2ecf20Sopenharmony_ci	switch(conf->level) {
31378c2ecf20Sopenharmony_ci	case 4: break;
31388c2ecf20Sopenharmony_ci	case 5:
31398c2ecf20Sopenharmony_ci		switch (algorithm) {
31408c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_ASYMMETRIC:
31418c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_ASYMMETRIC:
31428c2ecf20Sopenharmony_ci			if (i > sh->pd_idx)
31438c2ecf20Sopenharmony_ci				i--;
31448c2ecf20Sopenharmony_ci			break;
31458c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_SYMMETRIC:
31468c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_SYMMETRIC:
31478c2ecf20Sopenharmony_ci			if (i < sh->pd_idx)
31488c2ecf20Sopenharmony_ci				i += raid_disks;
31498c2ecf20Sopenharmony_ci			i -= (sh->pd_idx + 1);
31508c2ecf20Sopenharmony_ci			break;
31518c2ecf20Sopenharmony_ci		case ALGORITHM_PARITY_0:
31528c2ecf20Sopenharmony_ci			i -= 1;
31538c2ecf20Sopenharmony_ci			break;
31548c2ecf20Sopenharmony_ci		case ALGORITHM_PARITY_N:
31558c2ecf20Sopenharmony_ci			break;
31568c2ecf20Sopenharmony_ci		default:
31578c2ecf20Sopenharmony_ci			BUG();
31588c2ecf20Sopenharmony_ci		}
31598c2ecf20Sopenharmony_ci		break;
31608c2ecf20Sopenharmony_ci	case 6:
31618c2ecf20Sopenharmony_ci		if (i == sh->qd_idx)
31628c2ecf20Sopenharmony_ci			return 0; /* It is the Q disk */
31638c2ecf20Sopenharmony_ci		switch (algorithm) {
31648c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_ASYMMETRIC:
31658c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_ASYMMETRIC:
31668c2ecf20Sopenharmony_ci		case ALGORITHM_ROTATING_ZERO_RESTART:
31678c2ecf20Sopenharmony_ci		case ALGORITHM_ROTATING_N_RESTART:
31688c2ecf20Sopenharmony_ci			if (sh->pd_idx == raid_disks-1)
31698c2ecf20Sopenharmony_ci				i--;	/* Q D D D P */
31708c2ecf20Sopenharmony_ci			else if (i > sh->pd_idx)
31718c2ecf20Sopenharmony_ci				i -= 2; /* D D P Q D */
31728c2ecf20Sopenharmony_ci			break;
31738c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_SYMMETRIC:
31748c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_SYMMETRIC:
31758c2ecf20Sopenharmony_ci			if (sh->pd_idx == raid_disks-1)
31768c2ecf20Sopenharmony_ci				i--; /* Q D D D P */
31778c2ecf20Sopenharmony_ci			else {
31788c2ecf20Sopenharmony_ci				/* D D P Q D */
31798c2ecf20Sopenharmony_ci				if (i < sh->pd_idx)
31808c2ecf20Sopenharmony_ci					i += raid_disks;
31818c2ecf20Sopenharmony_ci				i -= (sh->pd_idx + 2);
31828c2ecf20Sopenharmony_ci			}
31838c2ecf20Sopenharmony_ci			break;
31848c2ecf20Sopenharmony_ci		case ALGORITHM_PARITY_0:
31858c2ecf20Sopenharmony_ci			i -= 2;
31868c2ecf20Sopenharmony_ci			break;
31878c2ecf20Sopenharmony_ci		case ALGORITHM_PARITY_N:
31888c2ecf20Sopenharmony_ci			break;
31898c2ecf20Sopenharmony_ci		case ALGORITHM_ROTATING_N_CONTINUE:
31908c2ecf20Sopenharmony_ci			/* Like left_symmetric, but P is before Q */
31918c2ecf20Sopenharmony_ci			if (sh->pd_idx == 0)
31928c2ecf20Sopenharmony_ci				i--;	/* P D D D Q */
31938c2ecf20Sopenharmony_ci			else {
31948c2ecf20Sopenharmony_ci				/* D D Q P D */
31958c2ecf20Sopenharmony_ci				if (i < sh->pd_idx)
31968c2ecf20Sopenharmony_ci					i += raid_disks;
31978c2ecf20Sopenharmony_ci				i -= (sh->pd_idx + 1);
31988c2ecf20Sopenharmony_ci			}
31998c2ecf20Sopenharmony_ci			break;
32008c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_ASYMMETRIC_6:
32018c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_ASYMMETRIC_6:
32028c2ecf20Sopenharmony_ci			if (i > sh->pd_idx)
32038c2ecf20Sopenharmony_ci				i--;
32048c2ecf20Sopenharmony_ci			break;
32058c2ecf20Sopenharmony_ci		case ALGORITHM_LEFT_SYMMETRIC_6:
32068c2ecf20Sopenharmony_ci		case ALGORITHM_RIGHT_SYMMETRIC_6:
32078c2ecf20Sopenharmony_ci			if (i < sh->pd_idx)
32088c2ecf20Sopenharmony_ci				i += data_disks + 1;
32098c2ecf20Sopenharmony_ci			i -= (sh->pd_idx + 1);
32108c2ecf20Sopenharmony_ci			break;
32118c2ecf20Sopenharmony_ci		case ALGORITHM_PARITY_0_6:
32128c2ecf20Sopenharmony_ci			i -= 1;
32138c2ecf20Sopenharmony_ci			break;
32148c2ecf20Sopenharmony_ci		default:
32158c2ecf20Sopenharmony_ci			BUG();
32168c2ecf20Sopenharmony_ci		}
32178c2ecf20Sopenharmony_ci		break;
32188c2ecf20Sopenharmony_ci	}
32198c2ecf20Sopenharmony_ci
32208c2ecf20Sopenharmony_ci	chunk_number = stripe * data_disks + i;
32218c2ecf20Sopenharmony_ci	r_sector = chunk_number * sectors_per_chunk + chunk_offset;
32228c2ecf20Sopenharmony_ci
32238c2ecf20Sopenharmony_ci	check = raid5_compute_sector(conf, r_sector,
32248c2ecf20Sopenharmony_ci				     previous, &dummy1, &sh2);
32258c2ecf20Sopenharmony_ci	if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx
32268c2ecf20Sopenharmony_ci		|| sh2.qd_idx != sh->qd_idx) {
32278c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: compute_blocknr: map not correct\n",
32288c2ecf20Sopenharmony_ci			mdname(conf->mddev));
32298c2ecf20Sopenharmony_ci		return 0;
32308c2ecf20Sopenharmony_ci	}
32318c2ecf20Sopenharmony_ci	return r_sector;
32328c2ecf20Sopenharmony_ci}
32338c2ecf20Sopenharmony_ci
32348c2ecf20Sopenharmony_ci/*
32358c2ecf20Sopenharmony_ci * There are cases where we want handle_stripe_dirtying() and
32368c2ecf20Sopenharmony_ci * schedule_reconstruction() to delay towrite to some dev of a stripe.
32378c2ecf20Sopenharmony_ci *
32388c2ecf20Sopenharmony_ci * This function checks whether we want to delay the towrite. Specifically,
32398c2ecf20Sopenharmony_ci * we delay the towrite when:
32408c2ecf20Sopenharmony_ci *
32418c2ecf20Sopenharmony_ci *   1. degraded stripe has a non-overwrite to the missing dev, AND this
32428c2ecf20Sopenharmony_ci *      stripe has data in journal (for other devices).
32438c2ecf20Sopenharmony_ci *
32448c2ecf20Sopenharmony_ci *      In this case, when reading data for the non-overwrite dev, it is
32458c2ecf20Sopenharmony_ci *      necessary to handle complex rmw of write back cache (prexor with
32468c2ecf20Sopenharmony_ci *      orig_page, and xor with page). To keep read path simple, we would
32478c2ecf20Sopenharmony_ci *      like to flush data in journal to RAID disks first, so complex rmw
32488c2ecf20Sopenharmony_ci *      is handled in the write patch (handle_stripe_dirtying).
32498c2ecf20Sopenharmony_ci *
32508c2ecf20Sopenharmony_ci *   2. when journal space is critical (R5C_LOG_CRITICAL=1)
32518c2ecf20Sopenharmony_ci *
32528c2ecf20Sopenharmony_ci *      It is important to be able to flush all stripes in raid5-cache.
32538c2ecf20Sopenharmony_ci *      Therefore, we need reserve some space on the journal device for
32548c2ecf20Sopenharmony_ci *      these flushes. If flush operation includes pending writes to the
32558c2ecf20Sopenharmony_ci *      stripe, we need to reserve (conf->raid_disk + 1) pages per stripe
32568c2ecf20Sopenharmony_ci *      for the flush out. If we exclude these pending writes from flush
32578c2ecf20Sopenharmony_ci *      operation, we only need (conf->max_degraded + 1) pages per stripe.
32588c2ecf20Sopenharmony_ci *      Therefore, excluding pending writes in these cases enables more
32598c2ecf20Sopenharmony_ci *      efficient use of the journal device.
32608c2ecf20Sopenharmony_ci *
32618c2ecf20Sopenharmony_ci *      Note: To make sure the stripe makes progress, we only delay
32628c2ecf20Sopenharmony_ci *      towrite for stripes with data already in journal (injournal > 0).
32638c2ecf20Sopenharmony_ci *      When LOG_CRITICAL, stripes with injournal == 0 will be sent to
32648c2ecf20Sopenharmony_ci *      no_space_stripes list.
32658c2ecf20Sopenharmony_ci *
32668c2ecf20Sopenharmony_ci *   3. during journal failure
32678c2ecf20Sopenharmony_ci *      In journal failure, we try to flush all cached data to raid disks
32688c2ecf20Sopenharmony_ci *      based on data in stripe cache. The array is read-only to upper
32698c2ecf20Sopenharmony_ci *      layers, so we would skip all pending writes.
32708c2ecf20Sopenharmony_ci *
32718c2ecf20Sopenharmony_ci */
32728c2ecf20Sopenharmony_cistatic inline bool delay_towrite(struct r5conf *conf,
32738c2ecf20Sopenharmony_ci				 struct r5dev *dev,
32748c2ecf20Sopenharmony_ci				 struct stripe_head_state *s)
32758c2ecf20Sopenharmony_ci{
32768c2ecf20Sopenharmony_ci	/* case 1 above */
32778c2ecf20Sopenharmony_ci	if (!test_bit(R5_OVERWRITE, &dev->flags) &&
32788c2ecf20Sopenharmony_ci	    !test_bit(R5_Insync, &dev->flags) && s->injournal)
32798c2ecf20Sopenharmony_ci		return true;
32808c2ecf20Sopenharmony_ci	/* case 2 above */
32818c2ecf20Sopenharmony_ci	if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) &&
32828c2ecf20Sopenharmony_ci	    s->injournal > 0)
32838c2ecf20Sopenharmony_ci		return true;
32848c2ecf20Sopenharmony_ci	/* case 3 above */
32858c2ecf20Sopenharmony_ci	if (s->log_failed && s->injournal)
32868c2ecf20Sopenharmony_ci		return true;
32878c2ecf20Sopenharmony_ci	return false;
32888c2ecf20Sopenharmony_ci}
32898c2ecf20Sopenharmony_ci
32908c2ecf20Sopenharmony_cistatic void
32918c2ecf20Sopenharmony_cischedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
32928c2ecf20Sopenharmony_ci			 int rcw, int expand)
32938c2ecf20Sopenharmony_ci{
32948c2ecf20Sopenharmony_ci	int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks;
32958c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
32968c2ecf20Sopenharmony_ci	int level = conf->level;
32978c2ecf20Sopenharmony_ci
32988c2ecf20Sopenharmony_ci	if (rcw) {
32998c2ecf20Sopenharmony_ci		/*
33008c2ecf20Sopenharmony_ci		 * In some cases, handle_stripe_dirtying initially decided to
33018c2ecf20Sopenharmony_ci		 * run rmw and allocates extra page for prexor. However, rcw is
33028c2ecf20Sopenharmony_ci		 * cheaper later on. We need to free the extra page now,
33038c2ecf20Sopenharmony_ci		 * because we won't be able to do that in ops_complete_prexor().
33048c2ecf20Sopenharmony_ci		 */
33058c2ecf20Sopenharmony_ci		r5c_release_extra_page(sh);
33068c2ecf20Sopenharmony_ci
33078c2ecf20Sopenharmony_ci		for (i = disks; i--; ) {
33088c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
33098c2ecf20Sopenharmony_ci
33108c2ecf20Sopenharmony_ci			if (dev->towrite && !delay_towrite(conf, dev, s)) {
33118c2ecf20Sopenharmony_ci				set_bit(R5_LOCKED, &dev->flags);
33128c2ecf20Sopenharmony_ci				set_bit(R5_Wantdrain, &dev->flags);
33138c2ecf20Sopenharmony_ci				if (!expand)
33148c2ecf20Sopenharmony_ci					clear_bit(R5_UPTODATE, &dev->flags);
33158c2ecf20Sopenharmony_ci				s->locked++;
33168c2ecf20Sopenharmony_ci			} else if (test_bit(R5_InJournal, &dev->flags)) {
33178c2ecf20Sopenharmony_ci				set_bit(R5_LOCKED, &dev->flags);
33188c2ecf20Sopenharmony_ci				s->locked++;
33198c2ecf20Sopenharmony_ci			}
33208c2ecf20Sopenharmony_ci		}
33218c2ecf20Sopenharmony_ci		/* if we are not expanding this is a proper write request, and
33228c2ecf20Sopenharmony_ci		 * there will be bios with new data to be drained into the
33238c2ecf20Sopenharmony_ci		 * stripe cache
33248c2ecf20Sopenharmony_ci		 */
33258c2ecf20Sopenharmony_ci		if (!expand) {
33268c2ecf20Sopenharmony_ci			if (!s->locked)
33278c2ecf20Sopenharmony_ci				/* False alarm, nothing to do */
33288c2ecf20Sopenharmony_ci				return;
33298c2ecf20Sopenharmony_ci			sh->reconstruct_state = reconstruct_state_drain_run;
33308c2ecf20Sopenharmony_ci			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
33318c2ecf20Sopenharmony_ci		} else
33328c2ecf20Sopenharmony_ci			sh->reconstruct_state = reconstruct_state_run;
33338c2ecf20Sopenharmony_ci
33348c2ecf20Sopenharmony_ci		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
33358c2ecf20Sopenharmony_ci
33368c2ecf20Sopenharmony_ci		if (s->locked + conf->max_degraded == disks)
33378c2ecf20Sopenharmony_ci			if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
33388c2ecf20Sopenharmony_ci				atomic_inc(&conf->pending_full_writes);
33398c2ecf20Sopenharmony_ci	} else {
33408c2ecf20Sopenharmony_ci		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
33418c2ecf20Sopenharmony_ci			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
33428c2ecf20Sopenharmony_ci		BUG_ON(level == 6 &&
33438c2ecf20Sopenharmony_ci			(!(test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags) ||
33448c2ecf20Sopenharmony_ci			   test_bit(R5_Wantcompute, &sh->dev[qd_idx].flags))));
33458c2ecf20Sopenharmony_ci
33468c2ecf20Sopenharmony_ci		for (i = disks; i--; ) {
33478c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
33488c2ecf20Sopenharmony_ci			if (i == pd_idx || i == qd_idx)
33498c2ecf20Sopenharmony_ci				continue;
33508c2ecf20Sopenharmony_ci
33518c2ecf20Sopenharmony_ci			if (dev->towrite &&
33528c2ecf20Sopenharmony_ci			    (test_bit(R5_UPTODATE, &dev->flags) ||
33538c2ecf20Sopenharmony_ci			     test_bit(R5_Wantcompute, &dev->flags))) {
33548c2ecf20Sopenharmony_ci				set_bit(R5_Wantdrain, &dev->flags);
33558c2ecf20Sopenharmony_ci				set_bit(R5_LOCKED, &dev->flags);
33568c2ecf20Sopenharmony_ci				clear_bit(R5_UPTODATE, &dev->flags);
33578c2ecf20Sopenharmony_ci				s->locked++;
33588c2ecf20Sopenharmony_ci			} else if (test_bit(R5_InJournal, &dev->flags)) {
33598c2ecf20Sopenharmony_ci				set_bit(R5_LOCKED, &dev->flags);
33608c2ecf20Sopenharmony_ci				s->locked++;
33618c2ecf20Sopenharmony_ci			}
33628c2ecf20Sopenharmony_ci		}
33638c2ecf20Sopenharmony_ci		if (!s->locked)
33648c2ecf20Sopenharmony_ci			/* False alarm - nothing to do */
33658c2ecf20Sopenharmony_ci			return;
33668c2ecf20Sopenharmony_ci		sh->reconstruct_state = reconstruct_state_prexor_drain_run;
33678c2ecf20Sopenharmony_ci		set_bit(STRIPE_OP_PREXOR, &s->ops_request);
33688c2ecf20Sopenharmony_ci		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
33698c2ecf20Sopenharmony_ci		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
33708c2ecf20Sopenharmony_ci	}
33718c2ecf20Sopenharmony_ci
33728c2ecf20Sopenharmony_ci	/* keep the parity disk(s) locked while asynchronous operations
33738c2ecf20Sopenharmony_ci	 * are in flight
33748c2ecf20Sopenharmony_ci	 */
33758c2ecf20Sopenharmony_ci	set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
33768c2ecf20Sopenharmony_ci	clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
33778c2ecf20Sopenharmony_ci	s->locked++;
33788c2ecf20Sopenharmony_ci
33798c2ecf20Sopenharmony_ci	if (level == 6) {
33808c2ecf20Sopenharmony_ci		int qd_idx = sh->qd_idx;
33818c2ecf20Sopenharmony_ci		struct r5dev *dev = &sh->dev[qd_idx];
33828c2ecf20Sopenharmony_ci
33838c2ecf20Sopenharmony_ci		set_bit(R5_LOCKED, &dev->flags);
33848c2ecf20Sopenharmony_ci		clear_bit(R5_UPTODATE, &dev->flags);
33858c2ecf20Sopenharmony_ci		s->locked++;
33868c2ecf20Sopenharmony_ci	}
33878c2ecf20Sopenharmony_ci
33888c2ecf20Sopenharmony_ci	if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page &&
33898c2ecf20Sopenharmony_ci	    test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) &&
33908c2ecf20Sopenharmony_ci	    !test_bit(STRIPE_FULL_WRITE, &sh->state) &&
33918c2ecf20Sopenharmony_ci	    test_bit(R5_Insync, &sh->dev[pd_idx].flags))
33928c2ecf20Sopenharmony_ci		set_bit(STRIPE_OP_PARTIAL_PARITY, &s->ops_request);
33938c2ecf20Sopenharmony_ci
33948c2ecf20Sopenharmony_ci	pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
33958c2ecf20Sopenharmony_ci		__func__, (unsigned long long)sh->sector,
33968c2ecf20Sopenharmony_ci		s->locked, s->ops_request);
33978c2ecf20Sopenharmony_ci}
33988c2ecf20Sopenharmony_ci
33998c2ecf20Sopenharmony_ci/*
34008c2ecf20Sopenharmony_ci * Each stripe/dev can have one or more bion attached.
34018c2ecf20Sopenharmony_ci * toread/towrite point to the first in a chain.
34028c2ecf20Sopenharmony_ci * The bi_next chain must be in order.
34038c2ecf20Sopenharmony_ci */
34048c2ecf20Sopenharmony_cistatic int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
34058c2ecf20Sopenharmony_ci			  int forwrite, int previous)
34068c2ecf20Sopenharmony_ci{
34078c2ecf20Sopenharmony_ci	struct bio **bip;
34088c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
34098c2ecf20Sopenharmony_ci	int firstwrite=0;
34108c2ecf20Sopenharmony_ci
34118c2ecf20Sopenharmony_ci	pr_debug("adding bi b#%llu to stripe s#%llu\n",
34128c2ecf20Sopenharmony_ci		(unsigned long long)bi->bi_iter.bi_sector,
34138c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector);
34148c2ecf20Sopenharmony_ci
34158c2ecf20Sopenharmony_ci	spin_lock_irq(&sh->stripe_lock);
34168c2ecf20Sopenharmony_ci	sh->dev[dd_idx].write_hint = bi->bi_write_hint;
34178c2ecf20Sopenharmony_ci	/* Don't allow new IO added to stripes in batch list */
34188c2ecf20Sopenharmony_ci	if (sh->batch_head)
34198c2ecf20Sopenharmony_ci		goto overlap;
34208c2ecf20Sopenharmony_ci	if (forwrite) {
34218c2ecf20Sopenharmony_ci		bip = &sh->dev[dd_idx].towrite;
34228c2ecf20Sopenharmony_ci		if (*bip == NULL)
34238c2ecf20Sopenharmony_ci			firstwrite = 1;
34248c2ecf20Sopenharmony_ci	} else
34258c2ecf20Sopenharmony_ci		bip = &sh->dev[dd_idx].toread;
34268c2ecf20Sopenharmony_ci	while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) {
34278c2ecf20Sopenharmony_ci		if (bio_end_sector(*bip) > bi->bi_iter.bi_sector)
34288c2ecf20Sopenharmony_ci			goto overlap;
34298c2ecf20Sopenharmony_ci		bip = & (*bip)->bi_next;
34308c2ecf20Sopenharmony_ci	}
34318c2ecf20Sopenharmony_ci	if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
34328c2ecf20Sopenharmony_ci		goto overlap;
34338c2ecf20Sopenharmony_ci
34348c2ecf20Sopenharmony_ci	if (forwrite && raid5_has_ppl(conf)) {
34358c2ecf20Sopenharmony_ci		/*
34368c2ecf20Sopenharmony_ci		 * With PPL only writes to consecutive data chunks within a
34378c2ecf20Sopenharmony_ci		 * stripe are allowed because for a single stripe_head we can
34388c2ecf20Sopenharmony_ci		 * only have one PPL entry at a time, which describes one data
34398c2ecf20Sopenharmony_ci		 * range. Not really an overlap, but wait_for_overlap can be
34408c2ecf20Sopenharmony_ci		 * used to handle this.
34418c2ecf20Sopenharmony_ci		 */
34428c2ecf20Sopenharmony_ci		sector_t sector;
34438c2ecf20Sopenharmony_ci		sector_t first = 0;
34448c2ecf20Sopenharmony_ci		sector_t last = 0;
34458c2ecf20Sopenharmony_ci		int count = 0;
34468c2ecf20Sopenharmony_ci		int i;
34478c2ecf20Sopenharmony_ci
34488c2ecf20Sopenharmony_ci		for (i = 0; i < sh->disks; i++) {
34498c2ecf20Sopenharmony_ci			if (i != sh->pd_idx &&
34508c2ecf20Sopenharmony_ci			    (i == dd_idx || sh->dev[i].towrite)) {
34518c2ecf20Sopenharmony_ci				sector = sh->dev[i].sector;
34528c2ecf20Sopenharmony_ci				if (count == 0 || sector < first)
34538c2ecf20Sopenharmony_ci					first = sector;
34548c2ecf20Sopenharmony_ci				if (sector > last)
34558c2ecf20Sopenharmony_ci					last = sector;
34568c2ecf20Sopenharmony_ci				count++;
34578c2ecf20Sopenharmony_ci			}
34588c2ecf20Sopenharmony_ci		}
34598c2ecf20Sopenharmony_ci
34608c2ecf20Sopenharmony_ci		if (first + conf->chunk_sectors * (count - 1) != last)
34618c2ecf20Sopenharmony_ci			goto overlap;
34628c2ecf20Sopenharmony_ci	}
34638c2ecf20Sopenharmony_ci
34648c2ecf20Sopenharmony_ci	if (!forwrite || previous)
34658c2ecf20Sopenharmony_ci		clear_bit(STRIPE_BATCH_READY, &sh->state);
34668c2ecf20Sopenharmony_ci
34678c2ecf20Sopenharmony_ci	BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
34688c2ecf20Sopenharmony_ci	if (*bip)
34698c2ecf20Sopenharmony_ci		bi->bi_next = *bip;
34708c2ecf20Sopenharmony_ci	*bip = bi;
34718c2ecf20Sopenharmony_ci	bio_inc_remaining(bi);
34728c2ecf20Sopenharmony_ci	md_write_inc(conf->mddev, bi);
34738c2ecf20Sopenharmony_ci
34748c2ecf20Sopenharmony_ci	if (forwrite) {
34758c2ecf20Sopenharmony_ci		/* check if page is covered */
34768c2ecf20Sopenharmony_ci		sector_t sector = sh->dev[dd_idx].sector;
34778c2ecf20Sopenharmony_ci		for (bi=sh->dev[dd_idx].towrite;
34788c2ecf20Sopenharmony_ci		     sector < sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf) &&
34798c2ecf20Sopenharmony_ci			     bi && bi->bi_iter.bi_sector <= sector;
34808c2ecf20Sopenharmony_ci		     bi = r5_next_bio(conf, bi, sh->dev[dd_idx].sector)) {
34818c2ecf20Sopenharmony_ci			if (bio_end_sector(bi) >= sector)
34828c2ecf20Sopenharmony_ci				sector = bio_end_sector(bi);
34838c2ecf20Sopenharmony_ci		}
34848c2ecf20Sopenharmony_ci		if (sector >= sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf))
34858c2ecf20Sopenharmony_ci			if (!test_and_set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags))
34868c2ecf20Sopenharmony_ci				sh->overwrite_disks++;
34878c2ecf20Sopenharmony_ci	}
34888c2ecf20Sopenharmony_ci
34898c2ecf20Sopenharmony_ci	pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
34908c2ecf20Sopenharmony_ci		(unsigned long long)(*bip)->bi_iter.bi_sector,
34918c2ecf20Sopenharmony_ci		(unsigned long long)sh->sector, dd_idx);
34928c2ecf20Sopenharmony_ci
34938c2ecf20Sopenharmony_ci	if (conf->mddev->bitmap && firstwrite) {
34948c2ecf20Sopenharmony_ci		/* Cannot hold spinlock over bitmap_startwrite,
34958c2ecf20Sopenharmony_ci		 * but must ensure this isn't added to a batch until
34968c2ecf20Sopenharmony_ci		 * we have added to the bitmap and set bm_seq.
34978c2ecf20Sopenharmony_ci		 * So set STRIPE_BITMAP_PENDING to prevent
34988c2ecf20Sopenharmony_ci		 * batching.
34998c2ecf20Sopenharmony_ci		 * If multiple add_stripe_bio() calls race here they
35008c2ecf20Sopenharmony_ci		 * much all set STRIPE_BITMAP_PENDING.  So only the first one
35018c2ecf20Sopenharmony_ci		 * to complete "bitmap_startwrite" gets to set
35028c2ecf20Sopenharmony_ci		 * STRIPE_BIT_DELAY.  This is important as once a stripe
35038c2ecf20Sopenharmony_ci		 * is added to a batch, STRIPE_BIT_DELAY cannot be changed
35048c2ecf20Sopenharmony_ci		 * any more.
35058c2ecf20Sopenharmony_ci		 */
35068c2ecf20Sopenharmony_ci		set_bit(STRIPE_BITMAP_PENDING, &sh->state);
35078c2ecf20Sopenharmony_ci		spin_unlock_irq(&sh->stripe_lock);
35088c2ecf20Sopenharmony_ci		md_bitmap_startwrite(conf->mddev->bitmap, sh->sector,
35098c2ecf20Sopenharmony_ci				     RAID5_STRIPE_SECTORS(conf), 0);
35108c2ecf20Sopenharmony_ci		spin_lock_irq(&sh->stripe_lock);
35118c2ecf20Sopenharmony_ci		clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
35128c2ecf20Sopenharmony_ci		if (!sh->batch_head) {
35138c2ecf20Sopenharmony_ci			sh->bm_seq = conf->seq_flush+1;
35148c2ecf20Sopenharmony_ci			set_bit(STRIPE_BIT_DELAY, &sh->state);
35158c2ecf20Sopenharmony_ci		}
35168c2ecf20Sopenharmony_ci	}
35178c2ecf20Sopenharmony_ci	spin_unlock_irq(&sh->stripe_lock);
35188c2ecf20Sopenharmony_ci
35198c2ecf20Sopenharmony_ci	if (stripe_can_batch(sh))
35208c2ecf20Sopenharmony_ci		stripe_add_to_batch_list(conf, sh);
35218c2ecf20Sopenharmony_ci	return 1;
35228c2ecf20Sopenharmony_ci
35238c2ecf20Sopenharmony_ci overlap:
35248c2ecf20Sopenharmony_ci	set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
35258c2ecf20Sopenharmony_ci	spin_unlock_irq(&sh->stripe_lock);
35268c2ecf20Sopenharmony_ci	return 0;
35278c2ecf20Sopenharmony_ci}
35288c2ecf20Sopenharmony_ci
35298c2ecf20Sopenharmony_cistatic void end_reshape(struct r5conf *conf);
35308c2ecf20Sopenharmony_ci
35318c2ecf20Sopenharmony_cistatic void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
35328c2ecf20Sopenharmony_ci			    struct stripe_head *sh)
35338c2ecf20Sopenharmony_ci{
35348c2ecf20Sopenharmony_ci	int sectors_per_chunk =
35358c2ecf20Sopenharmony_ci		previous ? conf->prev_chunk_sectors : conf->chunk_sectors;
35368c2ecf20Sopenharmony_ci	int dd_idx;
35378c2ecf20Sopenharmony_ci	int chunk_offset = sector_div(stripe, sectors_per_chunk);
35388c2ecf20Sopenharmony_ci	int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
35398c2ecf20Sopenharmony_ci
35408c2ecf20Sopenharmony_ci	raid5_compute_sector(conf,
35418c2ecf20Sopenharmony_ci			     stripe * (disks - conf->max_degraded)
35428c2ecf20Sopenharmony_ci			     *sectors_per_chunk + chunk_offset,
35438c2ecf20Sopenharmony_ci			     previous,
35448c2ecf20Sopenharmony_ci			     &dd_idx, sh);
35458c2ecf20Sopenharmony_ci}
35468c2ecf20Sopenharmony_ci
35478c2ecf20Sopenharmony_cistatic void
35488c2ecf20Sopenharmony_cihandle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
35498c2ecf20Sopenharmony_ci		     struct stripe_head_state *s, int disks)
35508c2ecf20Sopenharmony_ci{
35518c2ecf20Sopenharmony_ci	int i;
35528c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
35538c2ecf20Sopenharmony_ci	for (i = disks; i--; ) {
35548c2ecf20Sopenharmony_ci		struct bio *bi;
35558c2ecf20Sopenharmony_ci		int bitmap_end = 0;
35568c2ecf20Sopenharmony_ci
35578c2ecf20Sopenharmony_ci		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
35588c2ecf20Sopenharmony_ci			struct md_rdev *rdev;
35598c2ecf20Sopenharmony_ci			rcu_read_lock();
35608c2ecf20Sopenharmony_ci			rdev = rcu_dereference(conf->disks[i].rdev);
35618c2ecf20Sopenharmony_ci			if (rdev && test_bit(In_sync, &rdev->flags) &&
35628c2ecf20Sopenharmony_ci			    !test_bit(Faulty, &rdev->flags))
35638c2ecf20Sopenharmony_ci				atomic_inc(&rdev->nr_pending);
35648c2ecf20Sopenharmony_ci			else
35658c2ecf20Sopenharmony_ci				rdev = NULL;
35668c2ecf20Sopenharmony_ci			rcu_read_unlock();
35678c2ecf20Sopenharmony_ci			if (rdev) {
35688c2ecf20Sopenharmony_ci				if (!rdev_set_badblocks(
35698c2ecf20Sopenharmony_ci					    rdev,
35708c2ecf20Sopenharmony_ci					    sh->sector,
35718c2ecf20Sopenharmony_ci					    RAID5_STRIPE_SECTORS(conf), 0))
35728c2ecf20Sopenharmony_ci					md_error(conf->mddev, rdev);
35738c2ecf20Sopenharmony_ci				rdev_dec_pending(rdev, conf->mddev);
35748c2ecf20Sopenharmony_ci			}
35758c2ecf20Sopenharmony_ci		}
35768c2ecf20Sopenharmony_ci		spin_lock_irq(&sh->stripe_lock);
35778c2ecf20Sopenharmony_ci		/* fail all writes first */
35788c2ecf20Sopenharmony_ci		bi = sh->dev[i].towrite;
35798c2ecf20Sopenharmony_ci		sh->dev[i].towrite = NULL;
35808c2ecf20Sopenharmony_ci		sh->overwrite_disks = 0;
35818c2ecf20Sopenharmony_ci		spin_unlock_irq(&sh->stripe_lock);
35828c2ecf20Sopenharmony_ci		if (bi)
35838c2ecf20Sopenharmony_ci			bitmap_end = 1;
35848c2ecf20Sopenharmony_ci
35858c2ecf20Sopenharmony_ci		log_stripe_write_finished(sh);
35868c2ecf20Sopenharmony_ci
35878c2ecf20Sopenharmony_ci		if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
35888c2ecf20Sopenharmony_ci			wake_up(&conf->wait_for_overlap);
35898c2ecf20Sopenharmony_ci
35908c2ecf20Sopenharmony_ci		while (bi && bi->bi_iter.bi_sector <
35918c2ecf20Sopenharmony_ci			sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
35928c2ecf20Sopenharmony_ci			struct bio *nextbi = r5_next_bio(conf, bi, sh->dev[i].sector);
35938c2ecf20Sopenharmony_ci
35948c2ecf20Sopenharmony_ci			md_write_end(conf->mddev);
35958c2ecf20Sopenharmony_ci			bio_io_error(bi);
35968c2ecf20Sopenharmony_ci			bi = nextbi;
35978c2ecf20Sopenharmony_ci		}
35988c2ecf20Sopenharmony_ci		if (bitmap_end)
35998c2ecf20Sopenharmony_ci			md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
36008c2ecf20Sopenharmony_ci					   RAID5_STRIPE_SECTORS(conf), 0, 0);
36018c2ecf20Sopenharmony_ci		bitmap_end = 0;
36028c2ecf20Sopenharmony_ci		/* and fail all 'written' */
36038c2ecf20Sopenharmony_ci		bi = sh->dev[i].written;
36048c2ecf20Sopenharmony_ci		sh->dev[i].written = NULL;
36058c2ecf20Sopenharmony_ci		if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) {
36068c2ecf20Sopenharmony_ci			WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
36078c2ecf20Sopenharmony_ci			sh->dev[i].page = sh->dev[i].orig_page;
36088c2ecf20Sopenharmony_ci		}
36098c2ecf20Sopenharmony_ci
36108c2ecf20Sopenharmony_ci		if (bi) bitmap_end = 1;
36118c2ecf20Sopenharmony_ci		while (bi && bi->bi_iter.bi_sector <
36128c2ecf20Sopenharmony_ci		       sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
36138c2ecf20Sopenharmony_ci			struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector);
36148c2ecf20Sopenharmony_ci
36158c2ecf20Sopenharmony_ci			md_write_end(conf->mddev);
36168c2ecf20Sopenharmony_ci			bio_io_error(bi);
36178c2ecf20Sopenharmony_ci			bi = bi2;
36188c2ecf20Sopenharmony_ci		}
36198c2ecf20Sopenharmony_ci
36208c2ecf20Sopenharmony_ci		/* fail any reads if this device is non-operational and
36218c2ecf20Sopenharmony_ci		 * the data has not reached the cache yet.
36228c2ecf20Sopenharmony_ci		 */
36238c2ecf20Sopenharmony_ci		if (!test_bit(R5_Wantfill, &sh->dev[i].flags) &&
36248c2ecf20Sopenharmony_ci		    s->failed > conf->max_degraded &&
36258c2ecf20Sopenharmony_ci		    (!test_bit(R5_Insync, &sh->dev[i].flags) ||
36268c2ecf20Sopenharmony_ci		      test_bit(R5_ReadError, &sh->dev[i].flags))) {
36278c2ecf20Sopenharmony_ci			spin_lock_irq(&sh->stripe_lock);
36288c2ecf20Sopenharmony_ci			bi = sh->dev[i].toread;
36298c2ecf20Sopenharmony_ci			sh->dev[i].toread = NULL;
36308c2ecf20Sopenharmony_ci			spin_unlock_irq(&sh->stripe_lock);
36318c2ecf20Sopenharmony_ci			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
36328c2ecf20Sopenharmony_ci				wake_up(&conf->wait_for_overlap);
36338c2ecf20Sopenharmony_ci			if (bi)
36348c2ecf20Sopenharmony_ci				s->to_read--;
36358c2ecf20Sopenharmony_ci			while (bi && bi->bi_iter.bi_sector <
36368c2ecf20Sopenharmony_ci			       sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
36378c2ecf20Sopenharmony_ci				struct bio *nextbi =
36388c2ecf20Sopenharmony_ci					r5_next_bio(conf, bi, sh->dev[i].sector);
36398c2ecf20Sopenharmony_ci
36408c2ecf20Sopenharmony_ci				bio_io_error(bi);
36418c2ecf20Sopenharmony_ci				bi = nextbi;
36428c2ecf20Sopenharmony_ci			}
36438c2ecf20Sopenharmony_ci		}
36448c2ecf20Sopenharmony_ci		if (bitmap_end)
36458c2ecf20Sopenharmony_ci			md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
36468c2ecf20Sopenharmony_ci					   RAID5_STRIPE_SECTORS(conf), 0, 0);
36478c2ecf20Sopenharmony_ci		/* If we were in the middle of a write the parity block might
36488c2ecf20Sopenharmony_ci		 * still be locked - so just clear all R5_LOCKED flags
36498c2ecf20Sopenharmony_ci		 */
36508c2ecf20Sopenharmony_ci		clear_bit(R5_LOCKED, &sh->dev[i].flags);
36518c2ecf20Sopenharmony_ci	}
36528c2ecf20Sopenharmony_ci	s->to_write = 0;
36538c2ecf20Sopenharmony_ci	s->written = 0;
36548c2ecf20Sopenharmony_ci
36558c2ecf20Sopenharmony_ci	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
36568c2ecf20Sopenharmony_ci		if (atomic_dec_and_test(&conf->pending_full_writes))
36578c2ecf20Sopenharmony_ci			md_wakeup_thread(conf->mddev->thread);
36588c2ecf20Sopenharmony_ci}
36598c2ecf20Sopenharmony_ci
36608c2ecf20Sopenharmony_cistatic void
36618c2ecf20Sopenharmony_cihandle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
36628c2ecf20Sopenharmony_ci		   struct stripe_head_state *s)
36638c2ecf20Sopenharmony_ci{
36648c2ecf20Sopenharmony_ci	int abort = 0;
36658c2ecf20Sopenharmony_ci	int i;
36668c2ecf20Sopenharmony_ci
36678c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
36688c2ecf20Sopenharmony_ci	clear_bit(STRIPE_SYNCING, &sh->state);
36698c2ecf20Sopenharmony_ci	if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
36708c2ecf20Sopenharmony_ci		wake_up(&conf->wait_for_overlap);
36718c2ecf20Sopenharmony_ci	s->syncing = 0;
36728c2ecf20Sopenharmony_ci	s->replacing = 0;
36738c2ecf20Sopenharmony_ci	/* There is nothing more to do for sync/check/repair.
36748c2ecf20Sopenharmony_ci	 * Don't even need to abort as that is handled elsewhere
36758c2ecf20Sopenharmony_ci	 * if needed, and not always wanted e.g. if there is a known
36768c2ecf20Sopenharmony_ci	 * bad block here.
36778c2ecf20Sopenharmony_ci	 * For recover/replace we need to record a bad block on all
36788c2ecf20Sopenharmony_ci	 * non-sync devices, or abort the recovery
36798c2ecf20Sopenharmony_ci	 */
36808c2ecf20Sopenharmony_ci	if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
36818c2ecf20Sopenharmony_ci		/* During recovery devices cannot be removed, so
36828c2ecf20Sopenharmony_ci		 * locking and refcounting of rdevs is not needed
36838c2ecf20Sopenharmony_ci		 */
36848c2ecf20Sopenharmony_ci		rcu_read_lock();
36858c2ecf20Sopenharmony_ci		for (i = 0; i < conf->raid_disks; i++) {
36868c2ecf20Sopenharmony_ci			struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
36878c2ecf20Sopenharmony_ci			if (rdev
36888c2ecf20Sopenharmony_ci			    && !test_bit(Faulty, &rdev->flags)
36898c2ecf20Sopenharmony_ci			    && !test_bit(In_sync, &rdev->flags)
36908c2ecf20Sopenharmony_ci			    && !rdev_set_badblocks(rdev, sh->sector,
36918c2ecf20Sopenharmony_ci						   RAID5_STRIPE_SECTORS(conf), 0))
36928c2ecf20Sopenharmony_ci				abort = 1;
36938c2ecf20Sopenharmony_ci			rdev = rcu_dereference(conf->disks[i].replacement);
36948c2ecf20Sopenharmony_ci			if (rdev
36958c2ecf20Sopenharmony_ci			    && !test_bit(Faulty, &rdev->flags)
36968c2ecf20Sopenharmony_ci			    && !test_bit(In_sync, &rdev->flags)
36978c2ecf20Sopenharmony_ci			    && !rdev_set_badblocks(rdev, sh->sector,
36988c2ecf20Sopenharmony_ci						   RAID5_STRIPE_SECTORS(conf), 0))
36998c2ecf20Sopenharmony_ci				abort = 1;
37008c2ecf20Sopenharmony_ci		}
37018c2ecf20Sopenharmony_ci		rcu_read_unlock();
37028c2ecf20Sopenharmony_ci		if (abort)
37038c2ecf20Sopenharmony_ci			conf->recovery_disabled =
37048c2ecf20Sopenharmony_ci				conf->mddev->recovery_disabled;
37058c2ecf20Sopenharmony_ci	}
37068c2ecf20Sopenharmony_ci	md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), !abort);
37078c2ecf20Sopenharmony_ci}
37088c2ecf20Sopenharmony_ci
37098c2ecf20Sopenharmony_cistatic int want_replace(struct stripe_head *sh, int disk_idx)
37108c2ecf20Sopenharmony_ci{
37118c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
37128c2ecf20Sopenharmony_ci	int rv = 0;
37138c2ecf20Sopenharmony_ci
37148c2ecf20Sopenharmony_ci	rcu_read_lock();
37158c2ecf20Sopenharmony_ci	rdev = rcu_dereference(sh->raid_conf->disks[disk_idx].replacement);
37168c2ecf20Sopenharmony_ci	if (rdev
37178c2ecf20Sopenharmony_ci	    && !test_bit(Faulty, &rdev->flags)
37188c2ecf20Sopenharmony_ci	    && !test_bit(In_sync, &rdev->flags)
37198c2ecf20Sopenharmony_ci	    && (rdev->recovery_offset <= sh->sector
37208c2ecf20Sopenharmony_ci		|| rdev->mddev->recovery_cp <= sh->sector))
37218c2ecf20Sopenharmony_ci		rv = 1;
37228c2ecf20Sopenharmony_ci	rcu_read_unlock();
37238c2ecf20Sopenharmony_ci	return rv;
37248c2ecf20Sopenharmony_ci}
37258c2ecf20Sopenharmony_ci
37268c2ecf20Sopenharmony_cistatic int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
37278c2ecf20Sopenharmony_ci			   int disk_idx, int disks)
37288c2ecf20Sopenharmony_ci{
37298c2ecf20Sopenharmony_ci	struct r5dev *dev = &sh->dev[disk_idx];
37308c2ecf20Sopenharmony_ci	struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
37318c2ecf20Sopenharmony_ci				  &sh->dev[s->failed_num[1]] };
37328c2ecf20Sopenharmony_ci	int i;
37338c2ecf20Sopenharmony_ci	bool force_rcw = (sh->raid_conf->rmw_level == PARITY_DISABLE_RMW);
37348c2ecf20Sopenharmony_ci
37358c2ecf20Sopenharmony_ci
37368c2ecf20Sopenharmony_ci	if (test_bit(R5_LOCKED, &dev->flags) ||
37378c2ecf20Sopenharmony_ci	    test_bit(R5_UPTODATE, &dev->flags))
37388c2ecf20Sopenharmony_ci		/* No point reading this as we already have it or have
37398c2ecf20Sopenharmony_ci		 * decided to get it.
37408c2ecf20Sopenharmony_ci		 */
37418c2ecf20Sopenharmony_ci		return 0;
37428c2ecf20Sopenharmony_ci
37438c2ecf20Sopenharmony_ci	if (dev->toread ||
37448c2ecf20Sopenharmony_ci	    (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)))
37458c2ecf20Sopenharmony_ci		/* We need this block to directly satisfy a request */
37468c2ecf20Sopenharmony_ci		return 1;
37478c2ecf20Sopenharmony_ci
37488c2ecf20Sopenharmony_ci	if (s->syncing || s->expanding ||
37498c2ecf20Sopenharmony_ci	    (s->replacing && want_replace(sh, disk_idx)))
37508c2ecf20Sopenharmony_ci		/* When syncing, or expanding we read everything.
37518c2ecf20Sopenharmony_ci		 * When replacing, we need the replaced block.
37528c2ecf20Sopenharmony_ci		 */
37538c2ecf20Sopenharmony_ci		return 1;
37548c2ecf20Sopenharmony_ci
37558c2ecf20Sopenharmony_ci	if ((s->failed >= 1 && fdev[0]->toread) ||
37568c2ecf20Sopenharmony_ci	    (s->failed >= 2 && fdev[1]->toread))
37578c2ecf20Sopenharmony_ci		/* If we want to read from a failed device, then
37588c2ecf20Sopenharmony_ci		 * we need to actually read every other device.
37598c2ecf20Sopenharmony_ci		 */
37608c2ecf20Sopenharmony_ci		return 1;
37618c2ecf20Sopenharmony_ci
37628c2ecf20Sopenharmony_ci	/* Sometimes neither read-modify-write nor reconstruct-write
37638c2ecf20Sopenharmony_ci	 * cycles can work.  In those cases we read every block we
37648c2ecf20Sopenharmony_ci	 * can.  Then the parity-update is certain to have enough to
37658c2ecf20Sopenharmony_ci	 * work with.
37668c2ecf20Sopenharmony_ci	 * This can only be a problem when we need to write something,
37678c2ecf20Sopenharmony_ci	 * and some device has failed.  If either of those tests
37688c2ecf20Sopenharmony_ci	 * fail we need look no further.
37698c2ecf20Sopenharmony_ci	 */
37708c2ecf20Sopenharmony_ci	if (!s->failed || !s->to_write)
37718c2ecf20Sopenharmony_ci		return 0;
37728c2ecf20Sopenharmony_ci
37738c2ecf20Sopenharmony_ci	if (test_bit(R5_Insync, &dev->flags) &&
37748c2ecf20Sopenharmony_ci	    !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
37758c2ecf20Sopenharmony_ci		/* Pre-reads at not permitted until after short delay
37768c2ecf20Sopenharmony_ci		 * to gather multiple requests.  However if this
37778c2ecf20Sopenharmony_ci		 * device is no Insync, the block could only be computed
37788c2ecf20Sopenharmony_ci		 * and there is no need to delay that.
37798c2ecf20Sopenharmony_ci		 */
37808c2ecf20Sopenharmony_ci		return 0;
37818c2ecf20Sopenharmony_ci
37828c2ecf20Sopenharmony_ci	for (i = 0; i < s->failed && i < 2; i++) {
37838c2ecf20Sopenharmony_ci		if (fdev[i]->towrite &&
37848c2ecf20Sopenharmony_ci		    !test_bit(R5_UPTODATE, &fdev[i]->flags) &&
37858c2ecf20Sopenharmony_ci		    !test_bit(R5_OVERWRITE, &fdev[i]->flags))
37868c2ecf20Sopenharmony_ci			/* If we have a partial write to a failed
37878c2ecf20Sopenharmony_ci			 * device, then we will need to reconstruct
37888c2ecf20Sopenharmony_ci			 * the content of that device, so all other
37898c2ecf20Sopenharmony_ci			 * devices must be read.
37908c2ecf20Sopenharmony_ci			 */
37918c2ecf20Sopenharmony_ci			return 1;
37928c2ecf20Sopenharmony_ci
37938c2ecf20Sopenharmony_ci		if (s->failed >= 2 &&
37948c2ecf20Sopenharmony_ci		    (fdev[i]->towrite ||
37958c2ecf20Sopenharmony_ci		     s->failed_num[i] == sh->pd_idx ||
37968c2ecf20Sopenharmony_ci		     s->failed_num[i] == sh->qd_idx) &&
37978c2ecf20Sopenharmony_ci		    !test_bit(R5_UPTODATE, &fdev[i]->flags))
37988c2ecf20Sopenharmony_ci			/* In max degraded raid6, If the failed disk is P, Q,
37998c2ecf20Sopenharmony_ci			 * or we want to read the failed disk, we need to do
38008c2ecf20Sopenharmony_ci			 * reconstruct-write.
38018c2ecf20Sopenharmony_ci			 */
38028c2ecf20Sopenharmony_ci			force_rcw = true;
38038c2ecf20Sopenharmony_ci	}
38048c2ecf20Sopenharmony_ci
38058c2ecf20Sopenharmony_ci	/* If we are forced to do a reconstruct-write, because parity
38068c2ecf20Sopenharmony_ci	 * cannot be trusted and we are currently recovering it, there
38078c2ecf20Sopenharmony_ci	 * is extra need to be careful.
38088c2ecf20Sopenharmony_ci	 * If one of the devices that we would need to read, because
38098c2ecf20Sopenharmony_ci	 * it is not being overwritten (and maybe not written at all)
38108c2ecf20Sopenharmony_ci	 * is missing/faulty, then we need to read everything we can.
38118c2ecf20Sopenharmony_ci	 */
38128c2ecf20Sopenharmony_ci	if (!force_rcw &&
38138c2ecf20Sopenharmony_ci	    sh->sector < sh->raid_conf->mddev->recovery_cp)
38148c2ecf20Sopenharmony_ci		/* reconstruct-write isn't being forced */
38158c2ecf20Sopenharmony_ci		return 0;
38168c2ecf20Sopenharmony_ci	for (i = 0; i < s->failed && i < 2; i++) {
38178c2ecf20Sopenharmony_ci		if (s->failed_num[i] != sh->pd_idx &&
38188c2ecf20Sopenharmony_ci		    s->failed_num[i] != sh->qd_idx &&
38198c2ecf20Sopenharmony_ci		    !test_bit(R5_UPTODATE, &fdev[i]->flags) &&
38208c2ecf20Sopenharmony_ci		    !test_bit(R5_OVERWRITE, &fdev[i]->flags))
38218c2ecf20Sopenharmony_ci			return 1;
38228c2ecf20Sopenharmony_ci	}
38238c2ecf20Sopenharmony_ci
38248c2ecf20Sopenharmony_ci	return 0;
38258c2ecf20Sopenharmony_ci}
38268c2ecf20Sopenharmony_ci
38278c2ecf20Sopenharmony_ci/* fetch_block - checks the given member device to see if its data needs
38288c2ecf20Sopenharmony_ci * to be read or computed to satisfy a request.
38298c2ecf20Sopenharmony_ci *
38308c2ecf20Sopenharmony_ci * Returns 1 when no more member devices need to be checked, otherwise returns
38318c2ecf20Sopenharmony_ci * 0 to tell the loop in handle_stripe_fill to continue
38328c2ecf20Sopenharmony_ci */
38338c2ecf20Sopenharmony_cistatic int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
38348c2ecf20Sopenharmony_ci		       int disk_idx, int disks)
38358c2ecf20Sopenharmony_ci{
38368c2ecf20Sopenharmony_ci	struct r5dev *dev = &sh->dev[disk_idx];
38378c2ecf20Sopenharmony_ci
38388c2ecf20Sopenharmony_ci	/* is the data in this block needed, and can we get it? */
38398c2ecf20Sopenharmony_ci	if (need_this_block(sh, s, disk_idx, disks)) {
38408c2ecf20Sopenharmony_ci		/* we would like to get this block, possibly by computing it,
38418c2ecf20Sopenharmony_ci		 * otherwise read it if the backing disk is insync
38428c2ecf20Sopenharmony_ci		 */
38438c2ecf20Sopenharmony_ci		BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
38448c2ecf20Sopenharmony_ci		BUG_ON(test_bit(R5_Wantread, &dev->flags));
38458c2ecf20Sopenharmony_ci		BUG_ON(sh->batch_head);
38468c2ecf20Sopenharmony_ci
38478c2ecf20Sopenharmony_ci		/*
38488c2ecf20Sopenharmony_ci		 * In the raid6 case if the only non-uptodate disk is P
38498c2ecf20Sopenharmony_ci		 * then we already trusted P to compute the other failed
38508c2ecf20Sopenharmony_ci		 * drives. It is safe to compute rather than re-read P.
38518c2ecf20Sopenharmony_ci		 * In other cases we only compute blocks from failed
38528c2ecf20Sopenharmony_ci		 * devices, otherwise check/repair might fail to detect
38538c2ecf20Sopenharmony_ci		 * a real inconsistency.
38548c2ecf20Sopenharmony_ci		 */
38558c2ecf20Sopenharmony_ci
38568c2ecf20Sopenharmony_ci		if ((s->uptodate == disks - 1) &&
38578c2ecf20Sopenharmony_ci		    ((sh->qd_idx >= 0 && sh->pd_idx == disk_idx) ||
38588c2ecf20Sopenharmony_ci		    (s->failed && (disk_idx == s->failed_num[0] ||
38598c2ecf20Sopenharmony_ci				   disk_idx == s->failed_num[1])))) {
38608c2ecf20Sopenharmony_ci			/* have disk failed, and we're requested to fetch it;
38618c2ecf20Sopenharmony_ci			 * do compute it
38628c2ecf20Sopenharmony_ci			 */
38638c2ecf20Sopenharmony_ci			pr_debug("Computing stripe %llu block %d\n",
38648c2ecf20Sopenharmony_ci			       (unsigned long long)sh->sector, disk_idx);
38658c2ecf20Sopenharmony_ci			set_bit(STRIPE_COMPUTE_RUN, &sh->state);
38668c2ecf20Sopenharmony_ci			set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
38678c2ecf20Sopenharmony_ci			set_bit(R5_Wantcompute, &dev->flags);
38688c2ecf20Sopenharmony_ci			sh->ops.target = disk_idx;
38698c2ecf20Sopenharmony_ci			sh->ops.target2 = -1; /* no 2nd target */
38708c2ecf20Sopenharmony_ci			s->req_compute = 1;
38718c2ecf20Sopenharmony_ci			/* Careful: from this point on 'uptodate' is in the eye
38728c2ecf20Sopenharmony_ci			 * of raid_run_ops which services 'compute' operations
38738c2ecf20Sopenharmony_ci			 * before writes. R5_Wantcompute flags a block that will
38748c2ecf20Sopenharmony_ci			 * be R5_UPTODATE by the time it is needed for a
38758c2ecf20Sopenharmony_ci			 * subsequent operation.
38768c2ecf20Sopenharmony_ci			 */
38778c2ecf20Sopenharmony_ci			s->uptodate++;
38788c2ecf20Sopenharmony_ci			return 1;
38798c2ecf20Sopenharmony_ci		} else if (s->uptodate == disks-2 && s->failed >= 2) {
38808c2ecf20Sopenharmony_ci			/* Computing 2-failure is *very* expensive; only
38818c2ecf20Sopenharmony_ci			 * do it if failed >= 2
38828c2ecf20Sopenharmony_ci			 */
38838c2ecf20Sopenharmony_ci			int other;
38848c2ecf20Sopenharmony_ci			for (other = disks; other--; ) {
38858c2ecf20Sopenharmony_ci				if (other == disk_idx)
38868c2ecf20Sopenharmony_ci					continue;
38878c2ecf20Sopenharmony_ci				if (!test_bit(R5_UPTODATE,
38888c2ecf20Sopenharmony_ci				      &sh->dev[other].flags))
38898c2ecf20Sopenharmony_ci					break;
38908c2ecf20Sopenharmony_ci			}
38918c2ecf20Sopenharmony_ci			BUG_ON(other < 0);
38928c2ecf20Sopenharmony_ci			pr_debug("Computing stripe %llu blocks %d,%d\n",
38938c2ecf20Sopenharmony_ci			       (unsigned long long)sh->sector,
38948c2ecf20Sopenharmony_ci			       disk_idx, other);
38958c2ecf20Sopenharmony_ci			set_bit(STRIPE_COMPUTE_RUN, &sh->state);
38968c2ecf20Sopenharmony_ci			set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
38978c2ecf20Sopenharmony_ci			set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
38988c2ecf20Sopenharmony_ci			set_bit(R5_Wantcompute, &sh->dev[other].flags);
38998c2ecf20Sopenharmony_ci			sh->ops.target = disk_idx;
39008c2ecf20Sopenharmony_ci			sh->ops.target2 = other;
39018c2ecf20Sopenharmony_ci			s->uptodate += 2;
39028c2ecf20Sopenharmony_ci			s->req_compute = 1;
39038c2ecf20Sopenharmony_ci			return 1;
39048c2ecf20Sopenharmony_ci		} else if (test_bit(R5_Insync, &dev->flags)) {
39058c2ecf20Sopenharmony_ci			set_bit(R5_LOCKED, &dev->flags);
39068c2ecf20Sopenharmony_ci			set_bit(R5_Wantread, &dev->flags);
39078c2ecf20Sopenharmony_ci			s->locked++;
39088c2ecf20Sopenharmony_ci			pr_debug("Reading block %d (sync=%d)\n",
39098c2ecf20Sopenharmony_ci				disk_idx, s->syncing);
39108c2ecf20Sopenharmony_ci		}
39118c2ecf20Sopenharmony_ci	}
39128c2ecf20Sopenharmony_ci
39138c2ecf20Sopenharmony_ci	return 0;
39148c2ecf20Sopenharmony_ci}
39158c2ecf20Sopenharmony_ci
39168c2ecf20Sopenharmony_ci/*
39178c2ecf20Sopenharmony_ci * handle_stripe_fill - read or compute data to satisfy pending requests.
39188c2ecf20Sopenharmony_ci */
39198c2ecf20Sopenharmony_cistatic void handle_stripe_fill(struct stripe_head *sh,
39208c2ecf20Sopenharmony_ci			       struct stripe_head_state *s,
39218c2ecf20Sopenharmony_ci			       int disks)
39228c2ecf20Sopenharmony_ci{
39238c2ecf20Sopenharmony_ci	int i;
39248c2ecf20Sopenharmony_ci
39258c2ecf20Sopenharmony_ci	/* look for blocks to read/compute, skip this if a compute
39268c2ecf20Sopenharmony_ci	 * is already in flight, or if the stripe contents are in the
39278c2ecf20Sopenharmony_ci	 * midst of changing due to a write
39288c2ecf20Sopenharmony_ci	 */
39298c2ecf20Sopenharmony_ci	if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
39308c2ecf20Sopenharmony_ci	    !sh->reconstruct_state) {
39318c2ecf20Sopenharmony_ci
39328c2ecf20Sopenharmony_ci		/*
39338c2ecf20Sopenharmony_ci		 * For degraded stripe with data in journal, do not handle
39348c2ecf20Sopenharmony_ci		 * read requests yet, instead, flush the stripe to raid
39358c2ecf20Sopenharmony_ci		 * disks first, this avoids handling complex rmw of write
39368c2ecf20Sopenharmony_ci		 * back cache (prexor with orig_page, and then xor with
39378c2ecf20Sopenharmony_ci		 * page) in the read path
39388c2ecf20Sopenharmony_ci		 */
39398c2ecf20Sopenharmony_ci		if (s->to_read && s->injournal && s->failed) {
39408c2ecf20Sopenharmony_ci			if (test_bit(STRIPE_R5C_CACHING, &sh->state))
39418c2ecf20Sopenharmony_ci				r5c_make_stripe_write_out(sh);
39428c2ecf20Sopenharmony_ci			goto out;
39438c2ecf20Sopenharmony_ci		}
39448c2ecf20Sopenharmony_ci
39458c2ecf20Sopenharmony_ci		for (i = disks; i--; )
39468c2ecf20Sopenharmony_ci			if (fetch_block(sh, s, i, disks))
39478c2ecf20Sopenharmony_ci				break;
39488c2ecf20Sopenharmony_ci	}
39498c2ecf20Sopenharmony_ciout:
39508c2ecf20Sopenharmony_ci	set_bit(STRIPE_HANDLE, &sh->state);
39518c2ecf20Sopenharmony_ci}
39528c2ecf20Sopenharmony_ci
39538c2ecf20Sopenharmony_cistatic void break_stripe_batch_list(struct stripe_head *head_sh,
39548c2ecf20Sopenharmony_ci				    unsigned long handle_flags);
39558c2ecf20Sopenharmony_ci/* handle_stripe_clean_event
39568c2ecf20Sopenharmony_ci * any written block on an uptodate or failed drive can be returned.
39578c2ecf20Sopenharmony_ci * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
39588c2ecf20Sopenharmony_ci * never LOCKED, so we don't need to test 'failed' directly.
39598c2ecf20Sopenharmony_ci */
39608c2ecf20Sopenharmony_cistatic void handle_stripe_clean_event(struct r5conf *conf,
39618c2ecf20Sopenharmony_ci	struct stripe_head *sh, int disks)
39628c2ecf20Sopenharmony_ci{
39638c2ecf20Sopenharmony_ci	int i;
39648c2ecf20Sopenharmony_ci	struct r5dev *dev;
39658c2ecf20Sopenharmony_ci	int discard_pending = 0;
39668c2ecf20Sopenharmony_ci	struct stripe_head *head_sh = sh;
39678c2ecf20Sopenharmony_ci	bool do_endio = false;
39688c2ecf20Sopenharmony_ci
39698c2ecf20Sopenharmony_ci	for (i = disks; i--; )
39708c2ecf20Sopenharmony_ci		if (sh->dev[i].written) {
39718c2ecf20Sopenharmony_ci			dev = &sh->dev[i];
39728c2ecf20Sopenharmony_ci			if (!test_bit(R5_LOCKED, &dev->flags) &&
39738c2ecf20Sopenharmony_ci			    (test_bit(R5_UPTODATE, &dev->flags) ||
39748c2ecf20Sopenharmony_ci			     test_bit(R5_Discard, &dev->flags) ||
39758c2ecf20Sopenharmony_ci			     test_bit(R5_SkipCopy, &dev->flags))) {
39768c2ecf20Sopenharmony_ci				/* We can return any write requests */
39778c2ecf20Sopenharmony_ci				struct bio *wbi, *wbi2;
39788c2ecf20Sopenharmony_ci				pr_debug("Return write for disc %d\n", i);
39798c2ecf20Sopenharmony_ci				if (test_and_clear_bit(R5_Discard, &dev->flags))
39808c2ecf20Sopenharmony_ci					clear_bit(R5_UPTODATE, &dev->flags);
39818c2ecf20Sopenharmony_ci				if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) {
39828c2ecf20Sopenharmony_ci					WARN_ON(test_bit(R5_UPTODATE, &dev->flags));
39838c2ecf20Sopenharmony_ci				}
39848c2ecf20Sopenharmony_ci				do_endio = true;
39858c2ecf20Sopenharmony_ci
39868c2ecf20Sopenharmony_cireturnbi:
39878c2ecf20Sopenharmony_ci				dev->page = dev->orig_page;
39888c2ecf20Sopenharmony_ci				wbi = dev->written;
39898c2ecf20Sopenharmony_ci				dev->written = NULL;
39908c2ecf20Sopenharmony_ci				while (wbi && wbi->bi_iter.bi_sector <
39918c2ecf20Sopenharmony_ci					dev->sector + RAID5_STRIPE_SECTORS(conf)) {
39928c2ecf20Sopenharmony_ci					wbi2 = r5_next_bio(conf, wbi, dev->sector);
39938c2ecf20Sopenharmony_ci					md_write_end(conf->mddev);
39948c2ecf20Sopenharmony_ci					bio_endio(wbi);
39958c2ecf20Sopenharmony_ci					wbi = wbi2;
39968c2ecf20Sopenharmony_ci				}
39978c2ecf20Sopenharmony_ci				md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
39988c2ecf20Sopenharmony_ci						   RAID5_STRIPE_SECTORS(conf),
39998c2ecf20Sopenharmony_ci						   !test_bit(STRIPE_DEGRADED, &sh->state),
40008c2ecf20Sopenharmony_ci						   0);
40018c2ecf20Sopenharmony_ci				if (head_sh->batch_head) {
40028c2ecf20Sopenharmony_ci					sh = list_first_entry(&sh->batch_list,
40038c2ecf20Sopenharmony_ci							      struct stripe_head,
40048c2ecf20Sopenharmony_ci							      batch_list);
40058c2ecf20Sopenharmony_ci					if (sh != head_sh) {
40068c2ecf20Sopenharmony_ci						dev = &sh->dev[i];
40078c2ecf20Sopenharmony_ci						goto returnbi;
40088c2ecf20Sopenharmony_ci					}
40098c2ecf20Sopenharmony_ci				}
40108c2ecf20Sopenharmony_ci				sh = head_sh;
40118c2ecf20Sopenharmony_ci				dev = &sh->dev[i];
40128c2ecf20Sopenharmony_ci			} else if (test_bit(R5_Discard, &dev->flags))
40138c2ecf20Sopenharmony_ci				discard_pending = 1;
40148c2ecf20Sopenharmony_ci		}
40158c2ecf20Sopenharmony_ci
40168c2ecf20Sopenharmony_ci	log_stripe_write_finished(sh);
40178c2ecf20Sopenharmony_ci
40188c2ecf20Sopenharmony_ci	if (!discard_pending &&
40198c2ecf20Sopenharmony_ci	    test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
40208c2ecf20Sopenharmony_ci		int hash;
40218c2ecf20Sopenharmony_ci		clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
40228c2ecf20Sopenharmony_ci		clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
40238c2ecf20Sopenharmony_ci		if (sh->qd_idx >= 0) {
40248c2ecf20Sopenharmony_ci			clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
40258c2ecf20Sopenharmony_ci			clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
40268c2ecf20Sopenharmony_ci		}
40278c2ecf20Sopenharmony_ci		/* now that discard is done we can proceed with any sync */
40288c2ecf20Sopenharmony_ci		clear_bit(STRIPE_DISCARD, &sh->state);
40298c2ecf20Sopenharmony_ci		/*
40308c2ecf20Sopenharmony_ci		 * SCSI discard will change some bio fields and the stripe has
40318c2ecf20Sopenharmony_ci		 * no updated data, so remove it from hash list and the stripe
40328c2ecf20Sopenharmony_ci		 * will be reinitialized
40338c2ecf20Sopenharmony_ci		 */
40348c2ecf20Sopenharmony_ciunhash:
40358c2ecf20Sopenharmony_ci		hash = sh->hash_lock_index;
40368c2ecf20Sopenharmony_ci		spin_lock_irq(conf->hash_locks + hash);
40378c2ecf20Sopenharmony_ci		remove_hash(sh);
40388c2ecf20Sopenharmony_ci		spin_unlock_irq(conf->hash_locks + hash);
40398c2ecf20Sopenharmony_ci		if (head_sh->batch_head) {
40408c2ecf20Sopenharmony_ci			sh = list_first_entry(&sh->batch_list,
40418c2ecf20Sopenharmony_ci					      struct stripe_head, batch_list);
40428c2ecf20Sopenharmony_ci			if (sh != head_sh)
40438c2ecf20Sopenharmony_ci					goto unhash;
40448c2ecf20Sopenharmony_ci		}
40458c2ecf20Sopenharmony_ci		sh = head_sh;
40468c2ecf20Sopenharmony_ci
40478c2ecf20Sopenharmony_ci		if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
40488c2ecf20Sopenharmony_ci			set_bit(STRIPE_HANDLE, &sh->state);
40498c2ecf20Sopenharmony_ci
40508c2ecf20Sopenharmony_ci	}
40518c2ecf20Sopenharmony_ci
40528c2ecf20Sopenharmony_ci	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
40538c2ecf20Sopenharmony_ci		if (atomic_dec_and_test(&conf->pending_full_writes))
40548c2ecf20Sopenharmony_ci			md_wakeup_thread(conf->mddev->thread);
40558c2ecf20Sopenharmony_ci
40568c2ecf20Sopenharmony_ci	if (head_sh->batch_head && do_endio)
40578c2ecf20Sopenharmony_ci		break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
40588c2ecf20Sopenharmony_ci}
40598c2ecf20Sopenharmony_ci
40608c2ecf20Sopenharmony_ci/*
40618c2ecf20Sopenharmony_ci * For RMW in write back cache, we need extra page in prexor to store the
40628c2ecf20Sopenharmony_ci * old data. This page is stored in dev->orig_page.
40638c2ecf20Sopenharmony_ci *
40648c2ecf20Sopenharmony_ci * This function checks whether we have data for prexor. The exact logic
40658c2ecf20Sopenharmony_ci * is:
40668c2ecf20Sopenharmony_ci *       R5_UPTODATE && (!R5_InJournal || R5_OrigPageUPTDODATE)
40678c2ecf20Sopenharmony_ci */
40688c2ecf20Sopenharmony_cistatic inline bool uptodate_for_rmw(struct r5dev *dev)
40698c2ecf20Sopenharmony_ci{
40708c2ecf20Sopenharmony_ci	return (test_bit(R5_UPTODATE, &dev->flags)) &&
40718c2ecf20Sopenharmony_ci		(!test_bit(R5_InJournal, &dev->flags) ||
40728c2ecf20Sopenharmony_ci		 test_bit(R5_OrigPageUPTDODATE, &dev->flags));
40738c2ecf20Sopenharmony_ci}
40748c2ecf20Sopenharmony_ci
40758c2ecf20Sopenharmony_cistatic int handle_stripe_dirtying(struct r5conf *conf,
40768c2ecf20Sopenharmony_ci				  struct stripe_head *sh,
40778c2ecf20Sopenharmony_ci				  struct stripe_head_state *s,
40788c2ecf20Sopenharmony_ci				  int disks)
40798c2ecf20Sopenharmony_ci{
40808c2ecf20Sopenharmony_ci	int rmw = 0, rcw = 0, i;
40818c2ecf20Sopenharmony_ci	sector_t recovery_cp = conf->mddev->recovery_cp;
40828c2ecf20Sopenharmony_ci
40838c2ecf20Sopenharmony_ci	/* Check whether resync is now happening or should start.
40848c2ecf20Sopenharmony_ci	 * If yes, then the array is dirty (after unclean shutdown or
40858c2ecf20Sopenharmony_ci	 * initial creation), so parity in some stripes might be inconsistent.
40868c2ecf20Sopenharmony_ci	 * In this case, we need to always do reconstruct-write, to ensure
40878c2ecf20Sopenharmony_ci	 * that in case of drive failure or read-error correction, we
40888c2ecf20Sopenharmony_ci	 * generate correct data from the parity.
40898c2ecf20Sopenharmony_ci	 */
40908c2ecf20Sopenharmony_ci	if (conf->rmw_level == PARITY_DISABLE_RMW ||
40918c2ecf20Sopenharmony_ci	    (recovery_cp < MaxSector && sh->sector >= recovery_cp &&
40928c2ecf20Sopenharmony_ci	     s->failed == 0)) {
40938c2ecf20Sopenharmony_ci		/* Calculate the real rcw later - for now make it
40948c2ecf20Sopenharmony_ci		 * look like rcw is cheaper
40958c2ecf20Sopenharmony_ci		 */
40968c2ecf20Sopenharmony_ci		rcw = 1; rmw = 2;
40978c2ecf20Sopenharmony_ci		pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n",
40988c2ecf20Sopenharmony_ci			 conf->rmw_level, (unsigned long long)recovery_cp,
40998c2ecf20Sopenharmony_ci			 (unsigned long long)sh->sector);
41008c2ecf20Sopenharmony_ci	} else for (i = disks; i--; ) {
41018c2ecf20Sopenharmony_ci		/* would I have to read this buffer for read_modify_write */
41028c2ecf20Sopenharmony_ci		struct r5dev *dev = &sh->dev[i];
41038c2ecf20Sopenharmony_ci		if (((dev->towrite && !delay_towrite(conf, dev, s)) ||
41048c2ecf20Sopenharmony_ci		     i == sh->pd_idx || i == sh->qd_idx ||
41058c2ecf20Sopenharmony_ci		     test_bit(R5_InJournal, &dev->flags)) &&
41068c2ecf20Sopenharmony_ci		    !test_bit(R5_LOCKED, &dev->flags) &&
41078c2ecf20Sopenharmony_ci		    !(uptodate_for_rmw(dev) ||
41088c2ecf20Sopenharmony_ci		      test_bit(R5_Wantcompute, &dev->flags))) {
41098c2ecf20Sopenharmony_ci			if (test_bit(R5_Insync, &dev->flags))
41108c2ecf20Sopenharmony_ci				rmw++;
41118c2ecf20Sopenharmony_ci			else
41128c2ecf20Sopenharmony_ci				rmw += 2*disks;  /* cannot read it */
41138c2ecf20Sopenharmony_ci		}
41148c2ecf20Sopenharmony_ci		/* Would I have to read this buffer for reconstruct_write */
41158c2ecf20Sopenharmony_ci		if (!test_bit(R5_OVERWRITE, &dev->flags) &&
41168c2ecf20Sopenharmony_ci		    i != sh->pd_idx && i != sh->qd_idx &&
41178c2ecf20Sopenharmony_ci		    !test_bit(R5_LOCKED, &dev->flags) &&
41188c2ecf20Sopenharmony_ci		    !(test_bit(R5_UPTODATE, &dev->flags) ||
41198c2ecf20Sopenharmony_ci		      test_bit(R5_Wantcompute, &dev->flags))) {
41208c2ecf20Sopenharmony_ci			if (test_bit(R5_Insync, &dev->flags))
41218c2ecf20Sopenharmony_ci				rcw++;
41228c2ecf20Sopenharmony_ci			else
41238c2ecf20Sopenharmony_ci				rcw += 2*disks;
41248c2ecf20Sopenharmony_ci		}
41258c2ecf20Sopenharmony_ci	}
41268c2ecf20Sopenharmony_ci
41278c2ecf20Sopenharmony_ci	pr_debug("for sector %llu state 0x%lx, rmw=%d rcw=%d\n",
41288c2ecf20Sopenharmony_ci		 (unsigned long long)sh->sector, sh->state, rmw, rcw);
41298c2ecf20Sopenharmony_ci	set_bit(STRIPE_HANDLE, &sh->state);
41308c2ecf20Sopenharmony_ci	if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) {
41318c2ecf20Sopenharmony_ci		/* prefer read-modify-write, but need to get some data */
41328c2ecf20Sopenharmony_ci		if (conf->mddev->queue)
41338c2ecf20Sopenharmony_ci			blk_add_trace_msg(conf->mddev->queue,
41348c2ecf20Sopenharmony_ci					  "raid5 rmw %llu %d",
41358c2ecf20Sopenharmony_ci					  (unsigned long long)sh->sector, rmw);
41368c2ecf20Sopenharmony_ci		for (i = disks; i--; ) {
41378c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
41388c2ecf20Sopenharmony_ci			if (test_bit(R5_InJournal, &dev->flags) &&
41398c2ecf20Sopenharmony_ci			    dev->page == dev->orig_page &&
41408c2ecf20Sopenharmony_ci			    !test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) {
41418c2ecf20Sopenharmony_ci				/* alloc page for prexor */
41428c2ecf20Sopenharmony_ci				struct page *p = alloc_page(GFP_NOIO);
41438c2ecf20Sopenharmony_ci
41448c2ecf20Sopenharmony_ci				if (p) {
41458c2ecf20Sopenharmony_ci					dev->orig_page = p;
41468c2ecf20Sopenharmony_ci					continue;
41478c2ecf20Sopenharmony_ci				}
41488c2ecf20Sopenharmony_ci
41498c2ecf20Sopenharmony_ci				/*
41508c2ecf20Sopenharmony_ci				 * alloc_page() failed, try use
41518c2ecf20Sopenharmony_ci				 * disk_info->extra_page
41528c2ecf20Sopenharmony_ci				 */
41538c2ecf20Sopenharmony_ci				if (!test_and_set_bit(R5C_EXTRA_PAGE_IN_USE,
41548c2ecf20Sopenharmony_ci						      &conf->cache_state)) {
41558c2ecf20Sopenharmony_ci					r5c_use_extra_page(sh);
41568c2ecf20Sopenharmony_ci					break;
41578c2ecf20Sopenharmony_ci				}
41588c2ecf20Sopenharmony_ci
41598c2ecf20Sopenharmony_ci				/* extra_page in use, add to delayed_list */
41608c2ecf20Sopenharmony_ci				set_bit(STRIPE_DELAYED, &sh->state);
41618c2ecf20Sopenharmony_ci				s->waiting_extra_page = 1;
41628c2ecf20Sopenharmony_ci				return -EAGAIN;
41638c2ecf20Sopenharmony_ci			}
41648c2ecf20Sopenharmony_ci		}
41658c2ecf20Sopenharmony_ci
41668c2ecf20Sopenharmony_ci		for (i = disks; i--; ) {
41678c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
41688c2ecf20Sopenharmony_ci			if (((dev->towrite && !delay_towrite(conf, dev, s)) ||
41698c2ecf20Sopenharmony_ci			     i == sh->pd_idx || i == sh->qd_idx ||
41708c2ecf20Sopenharmony_ci			     test_bit(R5_InJournal, &dev->flags)) &&
41718c2ecf20Sopenharmony_ci			    !test_bit(R5_LOCKED, &dev->flags) &&
41728c2ecf20Sopenharmony_ci			    !(uptodate_for_rmw(dev) ||
41738c2ecf20Sopenharmony_ci			      test_bit(R5_Wantcompute, &dev->flags)) &&
41748c2ecf20Sopenharmony_ci			    test_bit(R5_Insync, &dev->flags)) {
41758c2ecf20Sopenharmony_ci				if (test_bit(STRIPE_PREREAD_ACTIVE,
41768c2ecf20Sopenharmony_ci					     &sh->state)) {
41778c2ecf20Sopenharmony_ci					pr_debug("Read_old block %d for r-m-w\n",
41788c2ecf20Sopenharmony_ci						 i);
41798c2ecf20Sopenharmony_ci					set_bit(R5_LOCKED, &dev->flags);
41808c2ecf20Sopenharmony_ci					set_bit(R5_Wantread, &dev->flags);
41818c2ecf20Sopenharmony_ci					s->locked++;
41828c2ecf20Sopenharmony_ci				} else
41838c2ecf20Sopenharmony_ci					set_bit(STRIPE_DELAYED, &sh->state);
41848c2ecf20Sopenharmony_ci			}
41858c2ecf20Sopenharmony_ci		}
41868c2ecf20Sopenharmony_ci	}
41878c2ecf20Sopenharmony_ci	if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_PREFER_RMW)) && rcw > 0) {
41888c2ecf20Sopenharmony_ci		/* want reconstruct write, but need to get some data */
41898c2ecf20Sopenharmony_ci		int qread =0;
41908c2ecf20Sopenharmony_ci		rcw = 0;
41918c2ecf20Sopenharmony_ci		for (i = disks; i--; ) {
41928c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
41938c2ecf20Sopenharmony_ci			if (!test_bit(R5_OVERWRITE, &dev->flags) &&
41948c2ecf20Sopenharmony_ci			    i != sh->pd_idx && i != sh->qd_idx &&
41958c2ecf20Sopenharmony_ci			    !test_bit(R5_LOCKED, &dev->flags) &&
41968c2ecf20Sopenharmony_ci			    !(test_bit(R5_UPTODATE, &dev->flags) ||
41978c2ecf20Sopenharmony_ci			      test_bit(R5_Wantcompute, &dev->flags))) {
41988c2ecf20Sopenharmony_ci				rcw++;
41998c2ecf20Sopenharmony_ci				if (test_bit(R5_Insync, &dev->flags) &&
42008c2ecf20Sopenharmony_ci				    test_bit(STRIPE_PREREAD_ACTIVE,
42018c2ecf20Sopenharmony_ci					     &sh->state)) {
42028c2ecf20Sopenharmony_ci					pr_debug("Read_old block "
42038c2ecf20Sopenharmony_ci						"%d for Reconstruct\n", i);
42048c2ecf20Sopenharmony_ci					set_bit(R5_LOCKED, &dev->flags);
42058c2ecf20Sopenharmony_ci					set_bit(R5_Wantread, &dev->flags);
42068c2ecf20Sopenharmony_ci					s->locked++;
42078c2ecf20Sopenharmony_ci					qread++;
42088c2ecf20Sopenharmony_ci				} else
42098c2ecf20Sopenharmony_ci					set_bit(STRIPE_DELAYED, &sh->state);
42108c2ecf20Sopenharmony_ci			}
42118c2ecf20Sopenharmony_ci		}
42128c2ecf20Sopenharmony_ci		if (rcw && conf->mddev->queue)
42138c2ecf20Sopenharmony_ci			blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
42148c2ecf20Sopenharmony_ci					  (unsigned long long)sh->sector,
42158c2ecf20Sopenharmony_ci					  rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
42168c2ecf20Sopenharmony_ci	}
42178c2ecf20Sopenharmony_ci
42188c2ecf20Sopenharmony_ci	if (rcw > disks && rmw > disks &&
42198c2ecf20Sopenharmony_ci	    !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
42208c2ecf20Sopenharmony_ci		set_bit(STRIPE_DELAYED, &sh->state);
42218c2ecf20Sopenharmony_ci
42228c2ecf20Sopenharmony_ci	/* now if nothing is locked, and if we have enough data,
42238c2ecf20Sopenharmony_ci	 * we can start a write request
42248c2ecf20Sopenharmony_ci	 */
42258c2ecf20Sopenharmony_ci	/* since handle_stripe can be called at any time we need to handle the
42268c2ecf20Sopenharmony_ci	 * case where a compute block operation has been submitted and then a
42278c2ecf20Sopenharmony_ci	 * subsequent call wants to start a write request.  raid_run_ops only
42288c2ecf20Sopenharmony_ci	 * handles the case where compute block and reconstruct are requested
42298c2ecf20Sopenharmony_ci	 * simultaneously.  If this is not the case then new writes need to be
42308c2ecf20Sopenharmony_ci	 * held off until the compute completes.
42318c2ecf20Sopenharmony_ci	 */
42328c2ecf20Sopenharmony_ci	if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
42338c2ecf20Sopenharmony_ci	    (s->locked == 0 && (rcw == 0 || rmw == 0) &&
42348c2ecf20Sopenharmony_ci	     !test_bit(STRIPE_BIT_DELAY, &sh->state)))
42358c2ecf20Sopenharmony_ci		schedule_reconstruction(sh, s, rcw == 0, 0);
42368c2ecf20Sopenharmony_ci	return 0;
42378c2ecf20Sopenharmony_ci}
42388c2ecf20Sopenharmony_ci
42398c2ecf20Sopenharmony_cistatic void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
42408c2ecf20Sopenharmony_ci				struct stripe_head_state *s, int disks)
42418c2ecf20Sopenharmony_ci{
42428c2ecf20Sopenharmony_ci	struct r5dev *dev = NULL;
42438c2ecf20Sopenharmony_ci
42448c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
42458c2ecf20Sopenharmony_ci	set_bit(STRIPE_HANDLE, &sh->state);
42468c2ecf20Sopenharmony_ci
42478c2ecf20Sopenharmony_ci	switch (sh->check_state) {
42488c2ecf20Sopenharmony_ci	case check_state_idle:
42498c2ecf20Sopenharmony_ci		/* start a new check operation if there are no failures */
42508c2ecf20Sopenharmony_ci		if (s->failed == 0) {
42518c2ecf20Sopenharmony_ci			BUG_ON(s->uptodate != disks);
42528c2ecf20Sopenharmony_ci			sh->check_state = check_state_run;
42538c2ecf20Sopenharmony_ci			set_bit(STRIPE_OP_CHECK, &s->ops_request);
42548c2ecf20Sopenharmony_ci			clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
42558c2ecf20Sopenharmony_ci			s->uptodate--;
42568c2ecf20Sopenharmony_ci			break;
42578c2ecf20Sopenharmony_ci		}
42588c2ecf20Sopenharmony_ci		dev = &sh->dev[s->failed_num[0]];
42598c2ecf20Sopenharmony_ci		fallthrough;
42608c2ecf20Sopenharmony_ci	case check_state_compute_result:
42618c2ecf20Sopenharmony_ci		sh->check_state = check_state_idle;
42628c2ecf20Sopenharmony_ci		if (!dev)
42638c2ecf20Sopenharmony_ci			dev = &sh->dev[sh->pd_idx];
42648c2ecf20Sopenharmony_ci
42658c2ecf20Sopenharmony_ci		/* check that a write has not made the stripe insync */
42668c2ecf20Sopenharmony_ci		if (test_bit(STRIPE_INSYNC, &sh->state))
42678c2ecf20Sopenharmony_ci			break;
42688c2ecf20Sopenharmony_ci
42698c2ecf20Sopenharmony_ci		/* either failed parity check, or recovery is happening */
42708c2ecf20Sopenharmony_ci		BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
42718c2ecf20Sopenharmony_ci		BUG_ON(s->uptodate != disks);
42728c2ecf20Sopenharmony_ci
42738c2ecf20Sopenharmony_ci		set_bit(R5_LOCKED, &dev->flags);
42748c2ecf20Sopenharmony_ci		s->locked++;
42758c2ecf20Sopenharmony_ci		set_bit(R5_Wantwrite, &dev->flags);
42768c2ecf20Sopenharmony_ci
42778c2ecf20Sopenharmony_ci		clear_bit(STRIPE_DEGRADED, &sh->state);
42788c2ecf20Sopenharmony_ci		set_bit(STRIPE_INSYNC, &sh->state);
42798c2ecf20Sopenharmony_ci		break;
42808c2ecf20Sopenharmony_ci	case check_state_run:
42818c2ecf20Sopenharmony_ci		break; /* we will be called again upon completion */
42828c2ecf20Sopenharmony_ci	case check_state_check_result:
42838c2ecf20Sopenharmony_ci		sh->check_state = check_state_idle;
42848c2ecf20Sopenharmony_ci
42858c2ecf20Sopenharmony_ci		/* if a failure occurred during the check operation, leave
42868c2ecf20Sopenharmony_ci		 * STRIPE_INSYNC not set and let the stripe be handled again
42878c2ecf20Sopenharmony_ci		 */
42888c2ecf20Sopenharmony_ci		if (s->failed)
42898c2ecf20Sopenharmony_ci			break;
42908c2ecf20Sopenharmony_ci
42918c2ecf20Sopenharmony_ci		/* handle a successful check operation, if parity is correct
42928c2ecf20Sopenharmony_ci		 * we are done.  Otherwise update the mismatch count and repair
42938c2ecf20Sopenharmony_ci		 * parity if !MD_RECOVERY_CHECK
42948c2ecf20Sopenharmony_ci		 */
42958c2ecf20Sopenharmony_ci		if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
42968c2ecf20Sopenharmony_ci			/* parity is correct (on disc,
42978c2ecf20Sopenharmony_ci			 * not in buffer any more)
42988c2ecf20Sopenharmony_ci			 */
42998c2ecf20Sopenharmony_ci			set_bit(STRIPE_INSYNC, &sh->state);
43008c2ecf20Sopenharmony_ci		else {
43018c2ecf20Sopenharmony_ci			atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches);
43028c2ecf20Sopenharmony_ci			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
43038c2ecf20Sopenharmony_ci				/* don't try to repair!! */
43048c2ecf20Sopenharmony_ci				set_bit(STRIPE_INSYNC, &sh->state);
43058c2ecf20Sopenharmony_ci				pr_warn_ratelimited("%s: mismatch sector in range "
43068c2ecf20Sopenharmony_ci						    "%llu-%llu\n", mdname(conf->mddev),
43078c2ecf20Sopenharmony_ci						    (unsigned long long) sh->sector,
43088c2ecf20Sopenharmony_ci						    (unsigned long long) sh->sector +
43098c2ecf20Sopenharmony_ci						    RAID5_STRIPE_SECTORS(conf));
43108c2ecf20Sopenharmony_ci			} else {
43118c2ecf20Sopenharmony_ci				sh->check_state = check_state_compute_run;
43128c2ecf20Sopenharmony_ci				set_bit(STRIPE_COMPUTE_RUN, &sh->state);
43138c2ecf20Sopenharmony_ci				set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
43148c2ecf20Sopenharmony_ci				set_bit(R5_Wantcompute,
43158c2ecf20Sopenharmony_ci					&sh->dev[sh->pd_idx].flags);
43168c2ecf20Sopenharmony_ci				sh->ops.target = sh->pd_idx;
43178c2ecf20Sopenharmony_ci				sh->ops.target2 = -1;
43188c2ecf20Sopenharmony_ci				s->uptodate++;
43198c2ecf20Sopenharmony_ci			}
43208c2ecf20Sopenharmony_ci		}
43218c2ecf20Sopenharmony_ci		break;
43228c2ecf20Sopenharmony_ci	case check_state_compute_run:
43238c2ecf20Sopenharmony_ci		break;
43248c2ecf20Sopenharmony_ci	default:
43258c2ecf20Sopenharmony_ci		pr_err("%s: unknown check_state: %d sector: %llu\n",
43268c2ecf20Sopenharmony_ci		       __func__, sh->check_state,
43278c2ecf20Sopenharmony_ci		       (unsigned long long) sh->sector);
43288c2ecf20Sopenharmony_ci		BUG();
43298c2ecf20Sopenharmony_ci	}
43308c2ecf20Sopenharmony_ci}
43318c2ecf20Sopenharmony_ci
43328c2ecf20Sopenharmony_cistatic void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
43338c2ecf20Sopenharmony_ci				  struct stripe_head_state *s,
43348c2ecf20Sopenharmony_ci				  int disks)
43358c2ecf20Sopenharmony_ci{
43368c2ecf20Sopenharmony_ci	int pd_idx = sh->pd_idx;
43378c2ecf20Sopenharmony_ci	int qd_idx = sh->qd_idx;
43388c2ecf20Sopenharmony_ci	struct r5dev *dev;
43398c2ecf20Sopenharmony_ci
43408c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
43418c2ecf20Sopenharmony_ci	set_bit(STRIPE_HANDLE, &sh->state);
43428c2ecf20Sopenharmony_ci
43438c2ecf20Sopenharmony_ci	BUG_ON(s->failed > 2);
43448c2ecf20Sopenharmony_ci
43458c2ecf20Sopenharmony_ci	/* Want to check and possibly repair P and Q.
43468c2ecf20Sopenharmony_ci	 * However there could be one 'failed' device, in which
43478c2ecf20Sopenharmony_ci	 * case we can only check one of them, possibly using the
43488c2ecf20Sopenharmony_ci	 * other to generate missing data
43498c2ecf20Sopenharmony_ci	 */
43508c2ecf20Sopenharmony_ci
43518c2ecf20Sopenharmony_ci	switch (sh->check_state) {
43528c2ecf20Sopenharmony_ci	case check_state_idle:
43538c2ecf20Sopenharmony_ci		/* start a new check operation if there are < 2 failures */
43548c2ecf20Sopenharmony_ci		if (s->failed == s->q_failed) {
43558c2ecf20Sopenharmony_ci			/* The only possible failed device holds Q, so it
43568c2ecf20Sopenharmony_ci			 * makes sense to check P (If anything else were failed,
43578c2ecf20Sopenharmony_ci			 * we would have used P to recreate it).
43588c2ecf20Sopenharmony_ci			 */
43598c2ecf20Sopenharmony_ci			sh->check_state = check_state_run;
43608c2ecf20Sopenharmony_ci		}
43618c2ecf20Sopenharmony_ci		if (!s->q_failed && s->failed < 2) {
43628c2ecf20Sopenharmony_ci			/* Q is not failed, and we didn't use it to generate
43638c2ecf20Sopenharmony_ci			 * anything, so it makes sense to check it
43648c2ecf20Sopenharmony_ci			 */
43658c2ecf20Sopenharmony_ci			if (sh->check_state == check_state_run)
43668c2ecf20Sopenharmony_ci				sh->check_state = check_state_run_pq;
43678c2ecf20Sopenharmony_ci			else
43688c2ecf20Sopenharmony_ci				sh->check_state = check_state_run_q;
43698c2ecf20Sopenharmony_ci		}
43708c2ecf20Sopenharmony_ci
43718c2ecf20Sopenharmony_ci		/* discard potentially stale zero_sum_result */
43728c2ecf20Sopenharmony_ci		sh->ops.zero_sum_result = 0;
43738c2ecf20Sopenharmony_ci
43748c2ecf20Sopenharmony_ci		if (sh->check_state == check_state_run) {
43758c2ecf20Sopenharmony_ci			/* async_xor_zero_sum destroys the contents of P */
43768c2ecf20Sopenharmony_ci			clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
43778c2ecf20Sopenharmony_ci			s->uptodate--;
43788c2ecf20Sopenharmony_ci		}
43798c2ecf20Sopenharmony_ci		if (sh->check_state >= check_state_run &&
43808c2ecf20Sopenharmony_ci		    sh->check_state <= check_state_run_pq) {
43818c2ecf20Sopenharmony_ci			/* async_syndrome_zero_sum preserves P and Q, so
43828c2ecf20Sopenharmony_ci			 * no need to mark them !uptodate here
43838c2ecf20Sopenharmony_ci			 */
43848c2ecf20Sopenharmony_ci			set_bit(STRIPE_OP_CHECK, &s->ops_request);
43858c2ecf20Sopenharmony_ci			break;
43868c2ecf20Sopenharmony_ci		}
43878c2ecf20Sopenharmony_ci
43888c2ecf20Sopenharmony_ci		/* we have 2-disk failure */
43898c2ecf20Sopenharmony_ci		BUG_ON(s->failed != 2);
43908c2ecf20Sopenharmony_ci		fallthrough;
43918c2ecf20Sopenharmony_ci	case check_state_compute_result:
43928c2ecf20Sopenharmony_ci		sh->check_state = check_state_idle;
43938c2ecf20Sopenharmony_ci
43948c2ecf20Sopenharmony_ci		/* check that a write has not made the stripe insync */
43958c2ecf20Sopenharmony_ci		if (test_bit(STRIPE_INSYNC, &sh->state))
43968c2ecf20Sopenharmony_ci			break;
43978c2ecf20Sopenharmony_ci
43988c2ecf20Sopenharmony_ci		/* now write out any block on a failed drive,
43998c2ecf20Sopenharmony_ci		 * or P or Q if they were recomputed
44008c2ecf20Sopenharmony_ci		 */
44018c2ecf20Sopenharmony_ci		dev = NULL;
44028c2ecf20Sopenharmony_ci		if (s->failed == 2) {
44038c2ecf20Sopenharmony_ci			dev = &sh->dev[s->failed_num[1]];
44048c2ecf20Sopenharmony_ci			s->locked++;
44058c2ecf20Sopenharmony_ci			set_bit(R5_LOCKED, &dev->flags);
44068c2ecf20Sopenharmony_ci			set_bit(R5_Wantwrite, &dev->flags);
44078c2ecf20Sopenharmony_ci		}
44088c2ecf20Sopenharmony_ci		if (s->failed >= 1) {
44098c2ecf20Sopenharmony_ci			dev = &sh->dev[s->failed_num[0]];
44108c2ecf20Sopenharmony_ci			s->locked++;
44118c2ecf20Sopenharmony_ci			set_bit(R5_LOCKED, &dev->flags);
44128c2ecf20Sopenharmony_ci			set_bit(R5_Wantwrite, &dev->flags);
44138c2ecf20Sopenharmony_ci		}
44148c2ecf20Sopenharmony_ci		if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
44158c2ecf20Sopenharmony_ci			dev = &sh->dev[pd_idx];
44168c2ecf20Sopenharmony_ci			s->locked++;
44178c2ecf20Sopenharmony_ci			set_bit(R5_LOCKED, &dev->flags);
44188c2ecf20Sopenharmony_ci			set_bit(R5_Wantwrite, &dev->flags);
44198c2ecf20Sopenharmony_ci		}
44208c2ecf20Sopenharmony_ci		if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
44218c2ecf20Sopenharmony_ci			dev = &sh->dev[qd_idx];
44228c2ecf20Sopenharmony_ci			s->locked++;
44238c2ecf20Sopenharmony_ci			set_bit(R5_LOCKED, &dev->flags);
44248c2ecf20Sopenharmony_ci			set_bit(R5_Wantwrite, &dev->flags);
44258c2ecf20Sopenharmony_ci		}
44268c2ecf20Sopenharmony_ci		if (WARN_ONCE(dev && !test_bit(R5_UPTODATE, &dev->flags),
44278c2ecf20Sopenharmony_ci			      "%s: disk%td not up to date\n",
44288c2ecf20Sopenharmony_ci			      mdname(conf->mddev),
44298c2ecf20Sopenharmony_ci			      dev - (struct r5dev *) &sh->dev)) {
44308c2ecf20Sopenharmony_ci			clear_bit(R5_LOCKED, &dev->flags);
44318c2ecf20Sopenharmony_ci			clear_bit(R5_Wantwrite, &dev->flags);
44328c2ecf20Sopenharmony_ci			s->locked--;
44338c2ecf20Sopenharmony_ci		}
44348c2ecf20Sopenharmony_ci		clear_bit(STRIPE_DEGRADED, &sh->state);
44358c2ecf20Sopenharmony_ci
44368c2ecf20Sopenharmony_ci		set_bit(STRIPE_INSYNC, &sh->state);
44378c2ecf20Sopenharmony_ci		break;
44388c2ecf20Sopenharmony_ci	case check_state_run:
44398c2ecf20Sopenharmony_ci	case check_state_run_q:
44408c2ecf20Sopenharmony_ci	case check_state_run_pq:
44418c2ecf20Sopenharmony_ci		break; /* we will be called again upon completion */
44428c2ecf20Sopenharmony_ci	case check_state_check_result:
44438c2ecf20Sopenharmony_ci		sh->check_state = check_state_idle;
44448c2ecf20Sopenharmony_ci
44458c2ecf20Sopenharmony_ci		/* handle a successful check operation, if parity is correct
44468c2ecf20Sopenharmony_ci		 * we are done.  Otherwise update the mismatch count and repair
44478c2ecf20Sopenharmony_ci		 * parity if !MD_RECOVERY_CHECK
44488c2ecf20Sopenharmony_ci		 */
44498c2ecf20Sopenharmony_ci		if (sh->ops.zero_sum_result == 0) {
44508c2ecf20Sopenharmony_ci			/* both parities are correct */
44518c2ecf20Sopenharmony_ci			if (!s->failed)
44528c2ecf20Sopenharmony_ci				set_bit(STRIPE_INSYNC, &sh->state);
44538c2ecf20Sopenharmony_ci			else {
44548c2ecf20Sopenharmony_ci				/* in contrast to the raid5 case we can validate
44558c2ecf20Sopenharmony_ci				 * parity, but still have a failure to write
44568c2ecf20Sopenharmony_ci				 * back
44578c2ecf20Sopenharmony_ci				 */
44588c2ecf20Sopenharmony_ci				sh->check_state = check_state_compute_result;
44598c2ecf20Sopenharmony_ci				/* Returning at this point means that we may go
44608c2ecf20Sopenharmony_ci				 * off and bring p and/or q uptodate again so
44618c2ecf20Sopenharmony_ci				 * we make sure to check zero_sum_result again
44628c2ecf20Sopenharmony_ci				 * to verify if p or q need writeback
44638c2ecf20Sopenharmony_ci				 */
44648c2ecf20Sopenharmony_ci			}
44658c2ecf20Sopenharmony_ci		} else {
44668c2ecf20Sopenharmony_ci			atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches);
44678c2ecf20Sopenharmony_ci			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
44688c2ecf20Sopenharmony_ci				/* don't try to repair!! */
44698c2ecf20Sopenharmony_ci				set_bit(STRIPE_INSYNC, &sh->state);
44708c2ecf20Sopenharmony_ci				pr_warn_ratelimited("%s: mismatch sector in range "
44718c2ecf20Sopenharmony_ci						    "%llu-%llu\n", mdname(conf->mddev),
44728c2ecf20Sopenharmony_ci						    (unsigned long long) sh->sector,
44738c2ecf20Sopenharmony_ci						    (unsigned long long) sh->sector +
44748c2ecf20Sopenharmony_ci						    RAID5_STRIPE_SECTORS(conf));
44758c2ecf20Sopenharmony_ci			} else {
44768c2ecf20Sopenharmony_ci				int *target = &sh->ops.target;
44778c2ecf20Sopenharmony_ci
44788c2ecf20Sopenharmony_ci				sh->ops.target = -1;
44798c2ecf20Sopenharmony_ci				sh->ops.target2 = -1;
44808c2ecf20Sopenharmony_ci				sh->check_state = check_state_compute_run;
44818c2ecf20Sopenharmony_ci				set_bit(STRIPE_COMPUTE_RUN, &sh->state);
44828c2ecf20Sopenharmony_ci				set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
44838c2ecf20Sopenharmony_ci				if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
44848c2ecf20Sopenharmony_ci					set_bit(R5_Wantcompute,
44858c2ecf20Sopenharmony_ci						&sh->dev[pd_idx].flags);
44868c2ecf20Sopenharmony_ci					*target = pd_idx;
44878c2ecf20Sopenharmony_ci					target = &sh->ops.target2;
44888c2ecf20Sopenharmony_ci					s->uptodate++;
44898c2ecf20Sopenharmony_ci				}
44908c2ecf20Sopenharmony_ci				if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
44918c2ecf20Sopenharmony_ci					set_bit(R5_Wantcompute,
44928c2ecf20Sopenharmony_ci						&sh->dev[qd_idx].flags);
44938c2ecf20Sopenharmony_ci					*target = qd_idx;
44948c2ecf20Sopenharmony_ci					s->uptodate++;
44958c2ecf20Sopenharmony_ci				}
44968c2ecf20Sopenharmony_ci			}
44978c2ecf20Sopenharmony_ci		}
44988c2ecf20Sopenharmony_ci		break;
44998c2ecf20Sopenharmony_ci	case check_state_compute_run:
45008c2ecf20Sopenharmony_ci		break;
45018c2ecf20Sopenharmony_ci	default:
45028c2ecf20Sopenharmony_ci		pr_warn("%s: unknown check_state: %d sector: %llu\n",
45038c2ecf20Sopenharmony_ci			__func__, sh->check_state,
45048c2ecf20Sopenharmony_ci			(unsigned long long) sh->sector);
45058c2ecf20Sopenharmony_ci		BUG();
45068c2ecf20Sopenharmony_ci	}
45078c2ecf20Sopenharmony_ci}
45088c2ecf20Sopenharmony_ci
45098c2ecf20Sopenharmony_cistatic void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
45108c2ecf20Sopenharmony_ci{
45118c2ecf20Sopenharmony_ci	int i;
45128c2ecf20Sopenharmony_ci
45138c2ecf20Sopenharmony_ci	/* We have read all the blocks in this stripe and now we need to
45148c2ecf20Sopenharmony_ci	 * copy some of them into a target stripe for expand.
45158c2ecf20Sopenharmony_ci	 */
45168c2ecf20Sopenharmony_ci	struct dma_async_tx_descriptor *tx = NULL;
45178c2ecf20Sopenharmony_ci	BUG_ON(sh->batch_head);
45188c2ecf20Sopenharmony_ci	clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
45198c2ecf20Sopenharmony_ci	for (i = 0; i < sh->disks; i++)
45208c2ecf20Sopenharmony_ci		if (i != sh->pd_idx && i != sh->qd_idx) {
45218c2ecf20Sopenharmony_ci			int dd_idx, j;
45228c2ecf20Sopenharmony_ci			struct stripe_head *sh2;
45238c2ecf20Sopenharmony_ci			struct async_submit_ctl submit;
45248c2ecf20Sopenharmony_ci
45258c2ecf20Sopenharmony_ci			sector_t bn = raid5_compute_blocknr(sh, i, 1);
45268c2ecf20Sopenharmony_ci			sector_t s = raid5_compute_sector(conf, bn, 0,
45278c2ecf20Sopenharmony_ci							  &dd_idx, NULL);
45288c2ecf20Sopenharmony_ci			sh2 = raid5_get_active_stripe(conf, s, 0, 1, 1);
45298c2ecf20Sopenharmony_ci			if (sh2 == NULL)
45308c2ecf20Sopenharmony_ci				/* so far only the early blocks of this stripe
45318c2ecf20Sopenharmony_ci				 * have been requested.  When later blocks
45328c2ecf20Sopenharmony_ci				 * get requested, we will try again
45338c2ecf20Sopenharmony_ci				 */
45348c2ecf20Sopenharmony_ci				continue;
45358c2ecf20Sopenharmony_ci			if (!test_bit(STRIPE_EXPANDING, &sh2->state) ||
45368c2ecf20Sopenharmony_ci			   test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) {
45378c2ecf20Sopenharmony_ci				/* must have already done this block */
45388c2ecf20Sopenharmony_ci				raid5_release_stripe(sh2);
45398c2ecf20Sopenharmony_ci				continue;
45408c2ecf20Sopenharmony_ci			}
45418c2ecf20Sopenharmony_ci
45428c2ecf20Sopenharmony_ci			/* place all the copies on one channel */
45438c2ecf20Sopenharmony_ci			init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
45448c2ecf20Sopenharmony_ci			tx = async_memcpy(sh2->dev[dd_idx].page,
45458c2ecf20Sopenharmony_ci					  sh->dev[i].page, sh2->dev[dd_idx].offset,
45468c2ecf20Sopenharmony_ci					  sh->dev[i].offset, RAID5_STRIPE_SIZE(conf),
45478c2ecf20Sopenharmony_ci					  &submit);
45488c2ecf20Sopenharmony_ci
45498c2ecf20Sopenharmony_ci			set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
45508c2ecf20Sopenharmony_ci			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
45518c2ecf20Sopenharmony_ci			for (j = 0; j < conf->raid_disks; j++)
45528c2ecf20Sopenharmony_ci				if (j != sh2->pd_idx &&
45538c2ecf20Sopenharmony_ci				    j != sh2->qd_idx &&
45548c2ecf20Sopenharmony_ci				    !test_bit(R5_Expanded, &sh2->dev[j].flags))
45558c2ecf20Sopenharmony_ci					break;
45568c2ecf20Sopenharmony_ci			if (j == conf->raid_disks) {
45578c2ecf20Sopenharmony_ci				set_bit(STRIPE_EXPAND_READY, &sh2->state);
45588c2ecf20Sopenharmony_ci				set_bit(STRIPE_HANDLE, &sh2->state);
45598c2ecf20Sopenharmony_ci			}
45608c2ecf20Sopenharmony_ci			raid5_release_stripe(sh2);
45618c2ecf20Sopenharmony_ci
45628c2ecf20Sopenharmony_ci		}
45638c2ecf20Sopenharmony_ci	/* done submitting copies, wait for them to complete */
45648c2ecf20Sopenharmony_ci	async_tx_quiesce(&tx);
45658c2ecf20Sopenharmony_ci}
45668c2ecf20Sopenharmony_ci
45678c2ecf20Sopenharmony_ci/*
45688c2ecf20Sopenharmony_ci * handle_stripe - do things to a stripe.
45698c2ecf20Sopenharmony_ci *
45708c2ecf20Sopenharmony_ci * We lock the stripe by setting STRIPE_ACTIVE and then examine the
45718c2ecf20Sopenharmony_ci * state of various bits to see what needs to be done.
45728c2ecf20Sopenharmony_ci * Possible results:
45738c2ecf20Sopenharmony_ci *    return some read requests which now have data
45748c2ecf20Sopenharmony_ci *    return some write requests which are safely on storage
45758c2ecf20Sopenharmony_ci *    schedule a read on some buffers
45768c2ecf20Sopenharmony_ci *    schedule a write of some buffers
45778c2ecf20Sopenharmony_ci *    return confirmation of parity correctness
45788c2ecf20Sopenharmony_ci *
45798c2ecf20Sopenharmony_ci */
45808c2ecf20Sopenharmony_ci
45818c2ecf20Sopenharmony_cistatic void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
45828c2ecf20Sopenharmony_ci{
45838c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
45848c2ecf20Sopenharmony_ci	int disks = sh->disks;
45858c2ecf20Sopenharmony_ci	struct r5dev *dev;
45868c2ecf20Sopenharmony_ci	int i;
45878c2ecf20Sopenharmony_ci	int do_recovery = 0;
45888c2ecf20Sopenharmony_ci
45898c2ecf20Sopenharmony_ci	memset(s, 0, sizeof(*s));
45908c2ecf20Sopenharmony_ci
45918c2ecf20Sopenharmony_ci	s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state) && !sh->batch_head;
45928c2ecf20Sopenharmony_ci	s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state) && !sh->batch_head;
45938c2ecf20Sopenharmony_ci	s->failed_num[0] = -1;
45948c2ecf20Sopenharmony_ci	s->failed_num[1] = -1;
45958c2ecf20Sopenharmony_ci	s->log_failed = r5l_log_disk_error(conf);
45968c2ecf20Sopenharmony_ci
45978c2ecf20Sopenharmony_ci	/* Now to look around and see what can be done */
45988c2ecf20Sopenharmony_ci	rcu_read_lock();
45998c2ecf20Sopenharmony_ci	for (i=disks; i--; ) {
46008c2ecf20Sopenharmony_ci		struct md_rdev *rdev;
46018c2ecf20Sopenharmony_ci		sector_t first_bad;
46028c2ecf20Sopenharmony_ci		int bad_sectors;
46038c2ecf20Sopenharmony_ci		int is_bad = 0;
46048c2ecf20Sopenharmony_ci
46058c2ecf20Sopenharmony_ci		dev = &sh->dev[i];
46068c2ecf20Sopenharmony_ci
46078c2ecf20Sopenharmony_ci		pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
46088c2ecf20Sopenharmony_ci			 i, dev->flags,
46098c2ecf20Sopenharmony_ci			 dev->toread, dev->towrite, dev->written);
46108c2ecf20Sopenharmony_ci		/* maybe we can reply to a read
46118c2ecf20Sopenharmony_ci		 *
46128c2ecf20Sopenharmony_ci		 * new wantfill requests are only permitted while
46138c2ecf20Sopenharmony_ci		 * ops_complete_biofill is guaranteed to be inactive
46148c2ecf20Sopenharmony_ci		 */
46158c2ecf20Sopenharmony_ci		if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
46168c2ecf20Sopenharmony_ci		    !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
46178c2ecf20Sopenharmony_ci			set_bit(R5_Wantfill, &dev->flags);
46188c2ecf20Sopenharmony_ci
46198c2ecf20Sopenharmony_ci		/* now count some things */
46208c2ecf20Sopenharmony_ci		if (test_bit(R5_LOCKED, &dev->flags))
46218c2ecf20Sopenharmony_ci			s->locked++;
46228c2ecf20Sopenharmony_ci		if (test_bit(R5_UPTODATE, &dev->flags))
46238c2ecf20Sopenharmony_ci			s->uptodate++;
46248c2ecf20Sopenharmony_ci		if (test_bit(R5_Wantcompute, &dev->flags)) {
46258c2ecf20Sopenharmony_ci			s->compute++;
46268c2ecf20Sopenharmony_ci			BUG_ON(s->compute > 2);
46278c2ecf20Sopenharmony_ci		}
46288c2ecf20Sopenharmony_ci
46298c2ecf20Sopenharmony_ci		if (test_bit(R5_Wantfill, &dev->flags))
46308c2ecf20Sopenharmony_ci			s->to_fill++;
46318c2ecf20Sopenharmony_ci		else if (dev->toread)
46328c2ecf20Sopenharmony_ci			s->to_read++;
46338c2ecf20Sopenharmony_ci		if (dev->towrite) {
46348c2ecf20Sopenharmony_ci			s->to_write++;
46358c2ecf20Sopenharmony_ci			if (!test_bit(R5_OVERWRITE, &dev->flags))
46368c2ecf20Sopenharmony_ci				s->non_overwrite++;
46378c2ecf20Sopenharmony_ci		}
46388c2ecf20Sopenharmony_ci		if (dev->written)
46398c2ecf20Sopenharmony_ci			s->written++;
46408c2ecf20Sopenharmony_ci		/* Prefer to use the replacement for reads, but only
46418c2ecf20Sopenharmony_ci		 * if it is recovered enough and has no bad blocks.
46428c2ecf20Sopenharmony_ci		 */
46438c2ecf20Sopenharmony_ci		rdev = rcu_dereference(conf->disks[i].replacement);
46448c2ecf20Sopenharmony_ci		if (rdev && !test_bit(Faulty, &rdev->flags) &&
46458c2ecf20Sopenharmony_ci		    rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
46468c2ecf20Sopenharmony_ci		    !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
46478c2ecf20Sopenharmony_ci				 &first_bad, &bad_sectors))
46488c2ecf20Sopenharmony_ci			set_bit(R5_ReadRepl, &dev->flags);
46498c2ecf20Sopenharmony_ci		else {
46508c2ecf20Sopenharmony_ci			if (rdev && !test_bit(Faulty, &rdev->flags))
46518c2ecf20Sopenharmony_ci				set_bit(R5_NeedReplace, &dev->flags);
46528c2ecf20Sopenharmony_ci			else
46538c2ecf20Sopenharmony_ci				clear_bit(R5_NeedReplace, &dev->flags);
46548c2ecf20Sopenharmony_ci			rdev = rcu_dereference(conf->disks[i].rdev);
46558c2ecf20Sopenharmony_ci			clear_bit(R5_ReadRepl, &dev->flags);
46568c2ecf20Sopenharmony_ci		}
46578c2ecf20Sopenharmony_ci		if (rdev && test_bit(Faulty, &rdev->flags))
46588c2ecf20Sopenharmony_ci			rdev = NULL;
46598c2ecf20Sopenharmony_ci		if (rdev) {
46608c2ecf20Sopenharmony_ci			is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
46618c2ecf20Sopenharmony_ci					     &first_bad, &bad_sectors);
46628c2ecf20Sopenharmony_ci			if (s->blocked_rdev == NULL
46638c2ecf20Sopenharmony_ci			    && (test_bit(Blocked, &rdev->flags)
46648c2ecf20Sopenharmony_ci				|| is_bad < 0)) {
46658c2ecf20Sopenharmony_ci				if (is_bad < 0)
46668c2ecf20Sopenharmony_ci					set_bit(BlockedBadBlocks,
46678c2ecf20Sopenharmony_ci						&rdev->flags);
46688c2ecf20Sopenharmony_ci				s->blocked_rdev = rdev;
46698c2ecf20Sopenharmony_ci				atomic_inc(&rdev->nr_pending);
46708c2ecf20Sopenharmony_ci			}
46718c2ecf20Sopenharmony_ci		}
46728c2ecf20Sopenharmony_ci		clear_bit(R5_Insync, &dev->flags);
46738c2ecf20Sopenharmony_ci		if (!rdev)
46748c2ecf20Sopenharmony_ci			/* Not in-sync */;
46758c2ecf20Sopenharmony_ci		else if (is_bad) {
46768c2ecf20Sopenharmony_ci			/* also not in-sync */
46778c2ecf20Sopenharmony_ci			if (!test_bit(WriteErrorSeen, &rdev->flags) &&
46788c2ecf20Sopenharmony_ci			    test_bit(R5_UPTODATE, &dev->flags)) {
46798c2ecf20Sopenharmony_ci				/* treat as in-sync, but with a read error
46808c2ecf20Sopenharmony_ci				 * which we can now try to correct
46818c2ecf20Sopenharmony_ci				 */
46828c2ecf20Sopenharmony_ci				set_bit(R5_Insync, &dev->flags);
46838c2ecf20Sopenharmony_ci				set_bit(R5_ReadError, &dev->flags);
46848c2ecf20Sopenharmony_ci			}
46858c2ecf20Sopenharmony_ci		} else if (test_bit(In_sync, &rdev->flags))
46868c2ecf20Sopenharmony_ci			set_bit(R5_Insync, &dev->flags);
46878c2ecf20Sopenharmony_ci		else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset)
46888c2ecf20Sopenharmony_ci			/* in sync if before recovery_offset */
46898c2ecf20Sopenharmony_ci			set_bit(R5_Insync, &dev->flags);
46908c2ecf20Sopenharmony_ci		else if (test_bit(R5_UPTODATE, &dev->flags) &&
46918c2ecf20Sopenharmony_ci			 test_bit(R5_Expanded, &dev->flags))
46928c2ecf20Sopenharmony_ci			/* If we've reshaped into here, we assume it is Insync.
46938c2ecf20Sopenharmony_ci			 * We will shortly update recovery_offset to make
46948c2ecf20Sopenharmony_ci			 * it official.
46958c2ecf20Sopenharmony_ci			 */
46968c2ecf20Sopenharmony_ci			set_bit(R5_Insync, &dev->flags);
46978c2ecf20Sopenharmony_ci
46988c2ecf20Sopenharmony_ci		if (test_bit(R5_WriteError, &dev->flags)) {
46998c2ecf20Sopenharmony_ci			/* This flag does not apply to '.replacement'
47008c2ecf20Sopenharmony_ci			 * only to .rdev, so make sure to check that*/
47018c2ecf20Sopenharmony_ci			struct md_rdev *rdev2 = rcu_dereference(
47028c2ecf20Sopenharmony_ci				conf->disks[i].rdev);
47038c2ecf20Sopenharmony_ci			if (rdev2 == rdev)
47048c2ecf20Sopenharmony_ci				clear_bit(R5_Insync, &dev->flags);
47058c2ecf20Sopenharmony_ci			if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
47068c2ecf20Sopenharmony_ci				s->handle_bad_blocks = 1;
47078c2ecf20Sopenharmony_ci				atomic_inc(&rdev2->nr_pending);
47088c2ecf20Sopenharmony_ci			} else
47098c2ecf20Sopenharmony_ci				clear_bit(R5_WriteError, &dev->flags);
47108c2ecf20Sopenharmony_ci		}
47118c2ecf20Sopenharmony_ci		if (test_bit(R5_MadeGood, &dev->flags)) {
47128c2ecf20Sopenharmony_ci			/* This flag does not apply to '.replacement'
47138c2ecf20Sopenharmony_ci			 * only to .rdev, so make sure to check that*/
47148c2ecf20Sopenharmony_ci			struct md_rdev *rdev2 = rcu_dereference(
47158c2ecf20Sopenharmony_ci				conf->disks[i].rdev);
47168c2ecf20Sopenharmony_ci			if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
47178c2ecf20Sopenharmony_ci				s->handle_bad_blocks = 1;
47188c2ecf20Sopenharmony_ci				atomic_inc(&rdev2->nr_pending);
47198c2ecf20Sopenharmony_ci			} else
47208c2ecf20Sopenharmony_ci				clear_bit(R5_MadeGood, &dev->flags);
47218c2ecf20Sopenharmony_ci		}
47228c2ecf20Sopenharmony_ci		if (test_bit(R5_MadeGoodRepl, &dev->flags)) {
47238c2ecf20Sopenharmony_ci			struct md_rdev *rdev2 = rcu_dereference(
47248c2ecf20Sopenharmony_ci				conf->disks[i].replacement);
47258c2ecf20Sopenharmony_ci			if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
47268c2ecf20Sopenharmony_ci				s->handle_bad_blocks = 1;
47278c2ecf20Sopenharmony_ci				atomic_inc(&rdev2->nr_pending);
47288c2ecf20Sopenharmony_ci			} else
47298c2ecf20Sopenharmony_ci				clear_bit(R5_MadeGoodRepl, &dev->flags);
47308c2ecf20Sopenharmony_ci		}
47318c2ecf20Sopenharmony_ci		if (!test_bit(R5_Insync, &dev->flags)) {
47328c2ecf20Sopenharmony_ci			/* The ReadError flag will just be confusing now */
47338c2ecf20Sopenharmony_ci			clear_bit(R5_ReadError, &dev->flags);
47348c2ecf20Sopenharmony_ci			clear_bit(R5_ReWrite, &dev->flags);
47358c2ecf20Sopenharmony_ci		}
47368c2ecf20Sopenharmony_ci		if (test_bit(R5_ReadError, &dev->flags))
47378c2ecf20Sopenharmony_ci			clear_bit(R5_Insync, &dev->flags);
47388c2ecf20Sopenharmony_ci		if (!test_bit(R5_Insync, &dev->flags)) {
47398c2ecf20Sopenharmony_ci			if (s->failed < 2)
47408c2ecf20Sopenharmony_ci				s->failed_num[s->failed] = i;
47418c2ecf20Sopenharmony_ci			s->failed++;
47428c2ecf20Sopenharmony_ci			if (rdev && !test_bit(Faulty, &rdev->flags))
47438c2ecf20Sopenharmony_ci				do_recovery = 1;
47448c2ecf20Sopenharmony_ci			else if (!rdev) {
47458c2ecf20Sopenharmony_ci				rdev = rcu_dereference(
47468c2ecf20Sopenharmony_ci				    conf->disks[i].replacement);
47478c2ecf20Sopenharmony_ci				if (rdev && !test_bit(Faulty, &rdev->flags))
47488c2ecf20Sopenharmony_ci					do_recovery = 1;
47498c2ecf20Sopenharmony_ci			}
47508c2ecf20Sopenharmony_ci		}
47518c2ecf20Sopenharmony_ci
47528c2ecf20Sopenharmony_ci		if (test_bit(R5_InJournal, &dev->flags))
47538c2ecf20Sopenharmony_ci			s->injournal++;
47548c2ecf20Sopenharmony_ci		if (test_bit(R5_InJournal, &dev->flags) && dev->written)
47558c2ecf20Sopenharmony_ci			s->just_cached++;
47568c2ecf20Sopenharmony_ci	}
47578c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_SYNCING, &sh->state)) {
47588c2ecf20Sopenharmony_ci		/* If there is a failed device being replaced,
47598c2ecf20Sopenharmony_ci		 *     we must be recovering.
47608c2ecf20Sopenharmony_ci		 * else if we are after recovery_cp, we must be syncing
47618c2ecf20Sopenharmony_ci		 * else if MD_RECOVERY_REQUESTED is set, we also are syncing.
47628c2ecf20Sopenharmony_ci		 * else we can only be replacing
47638c2ecf20Sopenharmony_ci		 * sync and recovery both need to read all devices, and so
47648c2ecf20Sopenharmony_ci		 * use the same flag.
47658c2ecf20Sopenharmony_ci		 */
47668c2ecf20Sopenharmony_ci		if (do_recovery ||
47678c2ecf20Sopenharmony_ci		    sh->sector >= conf->mddev->recovery_cp ||
47688c2ecf20Sopenharmony_ci		    test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery)))
47698c2ecf20Sopenharmony_ci			s->syncing = 1;
47708c2ecf20Sopenharmony_ci		else
47718c2ecf20Sopenharmony_ci			s->replacing = 1;
47728c2ecf20Sopenharmony_ci	}
47738c2ecf20Sopenharmony_ci	rcu_read_unlock();
47748c2ecf20Sopenharmony_ci}
47758c2ecf20Sopenharmony_ci
47768c2ecf20Sopenharmony_ci/*
47778c2ecf20Sopenharmony_ci * Return '1' if this is a member of batch, or '0' if it is a lone stripe or
47788c2ecf20Sopenharmony_ci * a head which can now be handled.
47798c2ecf20Sopenharmony_ci */
47808c2ecf20Sopenharmony_cistatic int clear_batch_ready(struct stripe_head *sh)
47818c2ecf20Sopenharmony_ci{
47828c2ecf20Sopenharmony_ci	struct stripe_head *tmp;
47838c2ecf20Sopenharmony_ci	if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state))
47848c2ecf20Sopenharmony_ci		return (sh->batch_head && sh->batch_head != sh);
47858c2ecf20Sopenharmony_ci	spin_lock(&sh->stripe_lock);
47868c2ecf20Sopenharmony_ci	if (!sh->batch_head) {
47878c2ecf20Sopenharmony_ci		spin_unlock(&sh->stripe_lock);
47888c2ecf20Sopenharmony_ci		return 0;
47898c2ecf20Sopenharmony_ci	}
47908c2ecf20Sopenharmony_ci
47918c2ecf20Sopenharmony_ci	/*
47928c2ecf20Sopenharmony_ci	 * this stripe could be added to a batch list before we check
47938c2ecf20Sopenharmony_ci	 * BATCH_READY, skips it
47948c2ecf20Sopenharmony_ci	 */
47958c2ecf20Sopenharmony_ci	if (sh->batch_head != sh) {
47968c2ecf20Sopenharmony_ci		spin_unlock(&sh->stripe_lock);
47978c2ecf20Sopenharmony_ci		return 1;
47988c2ecf20Sopenharmony_ci	}
47998c2ecf20Sopenharmony_ci	spin_lock(&sh->batch_lock);
48008c2ecf20Sopenharmony_ci	list_for_each_entry(tmp, &sh->batch_list, batch_list)
48018c2ecf20Sopenharmony_ci		clear_bit(STRIPE_BATCH_READY, &tmp->state);
48028c2ecf20Sopenharmony_ci	spin_unlock(&sh->batch_lock);
48038c2ecf20Sopenharmony_ci	spin_unlock(&sh->stripe_lock);
48048c2ecf20Sopenharmony_ci
48058c2ecf20Sopenharmony_ci	/*
48068c2ecf20Sopenharmony_ci	 * BATCH_READY is cleared, no new stripes can be added.
48078c2ecf20Sopenharmony_ci	 * batch_list can be accessed without lock
48088c2ecf20Sopenharmony_ci	 */
48098c2ecf20Sopenharmony_ci	return 0;
48108c2ecf20Sopenharmony_ci}
48118c2ecf20Sopenharmony_ci
48128c2ecf20Sopenharmony_cistatic void break_stripe_batch_list(struct stripe_head *head_sh,
48138c2ecf20Sopenharmony_ci				    unsigned long handle_flags)
48148c2ecf20Sopenharmony_ci{
48158c2ecf20Sopenharmony_ci	struct stripe_head *sh, *next;
48168c2ecf20Sopenharmony_ci	int i;
48178c2ecf20Sopenharmony_ci	int do_wakeup = 0;
48188c2ecf20Sopenharmony_ci
48198c2ecf20Sopenharmony_ci	list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) {
48208c2ecf20Sopenharmony_ci
48218c2ecf20Sopenharmony_ci		list_del_init(&sh->batch_list);
48228c2ecf20Sopenharmony_ci
48238c2ecf20Sopenharmony_ci		WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
48248c2ecf20Sopenharmony_ci					  (1 << STRIPE_SYNCING) |
48258c2ecf20Sopenharmony_ci					  (1 << STRIPE_REPLACED) |
48268c2ecf20Sopenharmony_ci					  (1 << STRIPE_DELAYED) |
48278c2ecf20Sopenharmony_ci					  (1 << STRIPE_BIT_DELAY) |
48288c2ecf20Sopenharmony_ci					  (1 << STRIPE_FULL_WRITE) |
48298c2ecf20Sopenharmony_ci					  (1 << STRIPE_BIOFILL_RUN) |
48308c2ecf20Sopenharmony_ci					  (1 << STRIPE_COMPUTE_RUN)  |
48318c2ecf20Sopenharmony_ci					  (1 << STRIPE_DISCARD) |
48328c2ecf20Sopenharmony_ci					  (1 << STRIPE_BATCH_READY) |
48338c2ecf20Sopenharmony_ci					  (1 << STRIPE_BATCH_ERR) |
48348c2ecf20Sopenharmony_ci					  (1 << STRIPE_BITMAP_PENDING)),
48358c2ecf20Sopenharmony_ci			"stripe state: %lx\n", sh->state);
48368c2ecf20Sopenharmony_ci		WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
48378c2ecf20Sopenharmony_ci					      (1 << STRIPE_REPLACED)),
48388c2ecf20Sopenharmony_ci			"head stripe state: %lx\n", head_sh->state);
48398c2ecf20Sopenharmony_ci
48408c2ecf20Sopenharmony_ci		set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
48418c2ecf20Sopenharmony_ci					    (1 << STRIPE_PREREAD_ACTIVE) |
48428c2ecf20Sopenharmony_ci					    (1 << STRIPE_DEGRADED) |
48438c2ecf20Sopenharmony_ci					    (1 << STRIPE_ON_UNPLUG_LIST)),
48448c2ecf20Sopenharmony_ci			      head_sh->state & (1 << STRIPE_INSYNC));
48458c2ecf20Sopenharmony_ci
48468c2ecf20Sopenharmony_ci		sh->check_state = head_sh->check_state;
48478c2ecf20Sopenharmony_ci		sh->reconstruct_state = head_sh->reconstruct_state;
48488c2ecf20Sopenharmony_ci		spin_lock_irq(&sh->stripe_lock);
48498c2ecf20Sopenharmony_ci		sh->batch_head = NULL;
48508c2ecf20Sopenharmony_ci		spin_unlock_irq(&sh->stripe_lock);
48518c2ecf20Sopenharmony_ci		for (i = 0; i < sh->disks; i++) {
48528c2ecf20Sopenharmony_ci			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
48538c2ecf20Sopenharmony_ci				do_wakeup = 1;
48548c2ecf20Sopenharmony_ci			sh->dev[i].flags = head_sh->dev[i].flags &
48558c2ecf20Sopenharmony_ci				(~((1 << R5_WriteError) | (1 << R5_Overlap)));
48568c2ecf20Sopenharmony_ci		}
48578c2ecf20Sopenharmony_ci		if (handle_flags == 0 ||
48588c2ecf20Sopenharmony_ci		    sh->state & handle_flags)
48598c2ecf20Sopenharmony_ci			set_bit(STRIPE_HANDLE, &sh->state);
48608c2ecf20Sopenharmony_ci		raid5_release_stripe(sh);
48618c2ecf20Sopenharmony_ci	}
48628c2ecf20Sopenharmony_ci	spin_lock_irq(&head_sh->stripe_lock);
48638c2ecf20Sopenharmony_ci	head_sh->batch_head = NULL;
48648c2ecf20Sopenharmony_ci	spin_unlock_irq(&head_sh->stripe_lock);
48658c2ecf20Sopenharmony_ci	for (i = 0; i < head_sh->disks; i++)
48668c2ecf20Sopenharmony_ci		if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
48678c2ecf20Sopenharmony_ci			do_wakeup = 1;
48688c2ecf20Sopenharmony_ci	if (head_sh->state & handle_flags)
48698c2ecf20Sopenharmony_ci		set_bit(STRIPE_HANDLE, &head_sh->state);
48708c2ecf20Sopenharmony_ci
48718c2ecf20Sopenharmony_ci	if (do_wakeup)
48728c2ecf20Sopenharmony_ci		wake_up(&head_sh->raid_conf->wait_for_overlap);
48738c2ecf20Sopenharmony_ci}
48748c2ecf20Sopenharmony_ci
48758c2ecf20Sopenharmony_cistatic void handle_stripe(struct stripe_head *sh)
48768c2ecf20Sopenharmony_ci{
48778c2ecf20Sopenharmony_ci	struct stripe_head_state s;
48788c2ecf20Sopenharmony_ci	struct r5conf *conf = sh->raid_conf;
48798c2ecf20Sopenharmony_ci	int i;
48808c2ecf20Sopenharmony_ci	int prexor;
48818c2ecf20Sopenharmony_ci	int disks = sh->disks;
48828c2ecf20Sopenharmony_ci	struct r5dev *pdev, *qdev;
48838c2ecf20Sopenharmony_ci
48848c2ecf20Sopenharmony_ci	clear_bit(STRIPE_HANDLE, &sh->state);
48858c2ecf20Sopenharmony_ci
48868c2ecf20Sopenharmony_ci	/*
48878c2ecf20Sopenharmony_ci	 * handle_stripe should not continue handle the batched stripe, only
48888c2ecf20Sopenharmony_ci	 * the head of batch list or lone stripe can continue. Otherwise we
48898c2ecf20Sopenharmony_ci	 * could see break_stripe_batch_list warns about the STRIPE_ACTIVE
48908c2ecf20Sopenharmony_ci	 * is set for the batched stripe.
48918c2ecf20Sopenharmony_ci	 */
48928c2ecf20Sopenharmony_ci	if (clear_batch_ready(sh))
48938c2ecf20Sopenharmony_ci		return;
48948c2ecf20Sopenharmony_ci
48958c2ecf20Sopenharmony_ci	if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) {
48968c2ecf20Sopenharmony_ci		/* already being handled, ensure it gets handled
48978c2ecf20Sopenharmony_ci		 * again when current action finishes */
48988c2ecf20Sopenharmony_ci		set_bit(STRIPE_HANDLE, &sh->state);
48998c2ecf20Sopenharmony_ci		return;
49008c2ecf20Sopenharmony_ci	}
49018c2ecf20Sopenharmony_ci
49028c2ecf20Sopenharmony_ci	if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
49038c2ecf20Sopenharmony_ci		break_stripe_batch_list(sh, 0);
49048c2ecf20Sopenharmony_ci
49058c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
49068c2ecf20Sopenharmony_ci		spin_lock(&sh->stripe_lock);
49078c2ecf20Sopenharmony_ci		/*
49088c2ecf20Sopenharmony_ci		 * Cannot process 'sync' concurrently with 'discard'.
49098c2ecf20Sopenharmony_ci		 * Flush data in r5cache before 'sync'.
49108c2ecf20Sopenharmony_ci		 */
49118c2ecf20Sopenharmony_ci		if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) &&
49128c2ecf20Sopenharmony_ci		    !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) &&
49138c2ecf20Sopenharmony_ci		    !test_bit(STRIPE_DISCARD, &sh->state) &&
49148c2ecf20Sopenharmony_ci		    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
49158c2ecf20Sopenharmony_ci			set_bit(STRIPE_SYNCING, &sh->state);
49168c2ecf20Sopenharmony_ci			clear_bit(STRIPE_INSYNC, &sh->state);
49178c2ecf20Sopenharmony_ci			clear_bit(STRIPE_REPLACED, &sh->state);
49188c2ecf20Sopenharmony_ci		}
49198c2ecf20Sopenharmony_ci		spin_unlock(&sh->stripe_lock);
49208c2ecf20Sopenharmony_ci	}
49218c2ecf20Sopenharmony_ci	clear_bit(STRIPE_DELAYED, &sh->state);
49228c2ecf20Sopenharmony_ci
49238c2ecf20Sopenharmony_ci	pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
49248c2ecf20Sopenharmony_ci		"pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
49258c2ecf20Sopenharmony_ci	       (unsigned long long)sh->sector, sh->state,
49268c2ecf20Sopenharmony_ci	       atomic_read(&sh->count), sh->pd_idx, sh->qd_idx,
49278c2ecf20Sopenharmony_ci	       sh->check_state, sh->reconstruct_state);
49288c2ecf20Sopenharmony_ci
49298c2ecf20Sopenharmony_ci	analyse_stripe(sh, &s);
49308c2ecf20Sopenharmony_ci
49318c2ecf20Sopenharmony_ci	if (test_bit(STRIPE_LOG_TRAPPED, &sh->state))
49328c2ecf20Sopenharmony_ci		goto finish;
49338c2ecf20Sopenharmony_ci
49348c2ecf20Sopenharmony_ci	if (s.handle_bad_blocks ||
49358c2ecf20Sopenharmony_ci	    test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) {
49368c2ecf20Sopenharmony_ci		set_bit(STRIPE_HANDLE, &sh->state);
49378c2ecf20Sopenharmony_ci		goto finish;
49388c2ecf20Sopenharmony_ci	}
49398c2ecf20Sopenharmony_ci
49408c2ecf20Sopenharmony_ci	if (unlikely(s.blocked_rdev)) {
49418c2ecf20Sopenharmony_ci		if (s.syncing || s.expanding || s.expanded ||
49428c2ecf20Sopenharmony_ci		    s.replacing || s.to_write || s.written) {
49438c2ecf20Sopenharmony_ci			set_bit(STRIPE_HANDLE, &sh->state);
49448c2ecf20Sopenharmony_ci			goto finish;
49458c2ecf20Sopenharmony_ci		}
49468c2ecf20Sopenharmony_ci		/* There is nothing for the blocked_rdev to block */
49478c2ecf20Sopenharmony_ci		rdev_dec_pending(s.blocked_rdev, conf->mddev);
49488c2ecf20Sopenharmony_ci		s.blocked_rdev = NULL;
49498c2ecf20Sopenharmony_ci	}
49508c2ecf20Sopenharmony_ci
49518c2ecf20Sopenharmony_ci	if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
49528c2ecf20Sopenharmony_ci		set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
49538c2ecf20Sopenharmony_ci		set_bit(STRIPE_BIOFILL_RUN, &sh->state);
49548c2ecf20Sopenharmony_ci	}
49558c2ecf20Sopenharmony_ci
49568c2ecf20Sopenharmony_ci	pr_debug("locked=%d uptodate=%d to_read=%d"
49578c2ecf20Sopenharmony_ci	       " to_write=%d failed=%d failed_num=%d,%d\n",
49588c2ecf20Sopenharmony_ci	       s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
49598c2ecf20Sopenharmony_ci	       s.failed_num[0], s.failed_num[1]);
49608c2ecf20Sopenharmony_ci	/*
49618c2ecf20Sopenharmony_ci	 * check if the array has lost more than max_degraded devices and,
49628c2ecf20Sopenharmony_ci	 * if so, some requests might need to be failed.
49638c2ecf20Sopenharmony_ci	 *
49648c2ecf20Sopenharmony_ci	 * When journal device failed (log_failed), we will only process
49658c2ecf20Sopenharmony_ci	 * the stripe if there is data need write to raid disks
49668c2ecf20Sopenharmony_ci	 */
49678c2ecf20Sopenharmony_ci	if (s.failed > conf->max_degraded ||
49688c2ecf20Sopenharmony_ci	    (s.log_failed && s.injournal == 0)) {
49698c2ecf20Sopenharmony_ci		sh->check_state = 0;
49708c2ecf20Sopenharmony_ci		sh->reconstruct_state = 0;
49718c2ecf20Sopenharmony_ci		break_stripe_batch_list(sh, 0);
49728c2ecf20Sopenharmony_ci		if (s.to_read+s.to_write+s.written)
49738c2ecf20Sopenharmony_ci			handle_failed_stripe(conf, sh, &s, disks);
49748c2ecf20Sopenharmony_ci		if (s.syncing + s.replacing)
49758c2ecf20Sopenharmony_ci			handle_failed_sync(conf, sh, &s);
49768c2ecf20Sopenharmony_ci	}
49778c2ecf20Sopenharmony_ci
49788c2ecf20Sopenharmony_ci	/* Now we check to see if any write operations have recently
49798c2ecf20Sopenharmony_ci	 * completed
49808c2ecf20Sopenharmony_ci	 */
49818c2ecf20Sopenharmony_ci	prexor = 0;
49828c2ecf20Sopenharmony_ci	if (sh->reconstruct_state == reconstruct_state_prexor_drain_result)
49838c2ecf20Sopenharmony_ci		prexor = 1;
49848c2ecf20Sopenharmony_ci	if (sh->reconstruct_state == reconstruct_state_drain_result ||
49858c2ecf20Sopenharmony_ci	    sh->reconstruct_state == reconstruct_state_prexor_drain_result) {
49868c2ecf20Sopenharmony_ci		sh->reconstruct_state = reconstruct_state_idle;
49878c2ecf20Sopenharmony_ci
49888c2ecf20Sopenharmony_ci		/* All the 'written' buffers and the parity block are ready to
49898c2ecf20Sopenharmony_ci		 * be written back to disk
49908c2ecf20Sopenharmony_ci		 */
49918c2ecf20Sopenharmony_ci		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags) &&
49928c2ecf20Sopenharmony_ci		       !test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags));
49938c2ecf20Sopenharmony_ci		BUG_ON(sh->qd_idx >= 0 &&
49948c2ecf20Sopenharmony_ci		       !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags) &&
49958c2ecf20Sopenharmony_ci		       !test_bit(R5_Discard, &sh->dev[sh->qd_idx].flags));
49968c2ecf20Sopenharmony_ci		for (i = disks; i--; ) {
49978c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
49988c2ecf20Sopenharmony_ci			if (test_bit(R5_LOCKED, &dev->flags) &&
49998c2ecf20Sopenharmony_ci				(i == sh->pd_idx || i == sh->qd_idx ||
50008c2ecf20Sopenharmony_ci				 dev->written || test_bit(R5_InJournal,
50018c2ecf20Sopenharmony_ci							  &dev->flags))) {
50028c2ecf20Sopenharmony_ci				pr_debug("Writing block %d\n", i);
50038c2ecf20Sopenharmony_ci				set_bit(R5_Wantwrite, &dev->flags);
50048c2ecf20Sopenharmony_ci				if (prexor)
50058c2ecf20Sopenharmony_ci					continue;
50068c2ecf20Sopenharmony_ci				if (s.failed > 1)
50078c2ecf20Sopenharmony_ci					continue;
50088c2ecf20Sopenharmony_ci				if (!test_bit(R5_Insync, &dev->flags) ||
50098c2ecf20Sopenharmony_ci				    ((i == sh->pd_idx || i == sh->qd_idx)  &&
50108c2ecf20Sopenharmony_ci				     s.failed == 0))
50118c2ecf20Sopenharmony_ci					set_bit(STRIPE_INSYNC, &sh->state);
50128c2ecf20Sopenharmony_ci			}
50138c2ecf20Sopenharmony_ci		}
50148c2ecf20Sopenharmony_ci		if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
50158c2ecf20Sopenharmony_ci			s.dec_preread_active = 1;
50168c2ecf20Sopenharmony_ci	}
50178c2ecf20Sopenharmony_ci
50188c2ecf20Sopenharmony_ci	/*
50198c2ecf20Sopenharmony_ci	 * might be able to return some write requests if the parity blocks
50208c2ecf20Sopenharmony_ci	 * are safe, or on a failed drive
50218c2ecf20Sopenharmony_ci	 */
50228c2ecf20Sopenharmony_ci	pdev = &sh->dev[sh->pd_idx];
50238c2ecf20Sopenharmony_ci	s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
50248c2ecf20Sopenharmony_ci		|| (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
50258c2ecf20Sopenharmony_ci	qdev = &sh->dev[sh->qd_idx];
50268c2ecf20Sopenharmony_ci	s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
50278c2ecf20Sopenharmony_ci		|| (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
50288c2ecf20Sopenharmony_ci		|| conf->level < 6;
50298c2ecf20Sopenharmony_ci
50308c2ecf20Sopenharmony_ci	if (s.written &&
50318c2ecf20Sopenharmony_ci	    (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
50328c2ecf20Sopenharmony_ci			     && !test_bit(R5_LOCKED, &pdev->flags)
50338c2ecf20Sopenharmony_ci			     && (test_bit(R5_UPTODATE, &pdev->flags) ||
50348c2ecf20Sopenharmony_ci				 test_bit(R5_Discard, &pdev->flags))))) &&
50358c2ecf20Sopenharmony_ci	    (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
50368c2ecf20Sopenharmony_ci			     && !test_bit(R5_LOCKED, &qdev->flags)
50378c2ecf20Sopenharmony_ci			     && (test_bit(R5_UPTODATE, &qdev->flags) ||
50388c2ecf20Sopenharmony_ci				 test_bit(R5_Discard, &qdev->flags))))))
50398c2ecf20Sopenharmony_ci		handle_stripe_clean_event(conf, sh, disks);
50408c2ecf20Sopenharmony_ci
50418c2ecf20Sopenharmony_ci	if (s.just_cached)
50428c2ecf20Sopenharmony_ci		r5c_handle_cached_data_endio(conf, sh, disks);
50438c2ecf20Sopenharmony_ci	log_stripe_write_finished(sh);
50448c2ecf20Sopenharmony_ci
50458c2ecf20Sopenharmony_ci	/* Now we might consider reading some blocks, either to check/generate
50468c2ecf20Sopenharmony_ci	 * parity, or to satisfy requests
50478c2ecf20Sopenharmony_ci	 * or to load a block that is being partially written.
50488c2ecf20Sopenharmony_ci	 */
50498c2ecf20Sopenharmony_ci	if (s.to_read || s.non_overwrite
50508c2ecf20Sopenharmony_ci	    || (s.to_write && s.failed)
50518c2ecf20Sopenharmony_ci	    || (s.syncing && (s.uptodate + s.compute < disks))
50528c2ecf20Sopenharmony_ci	    || s.replacing
50538c2ecf20Sopenharmony_ci	    || s.expanding)
50548c2ecf20Sopenharmony_ci		handle_stripe_fill(sh, &s, disks);
50558c2ecf20Sopenharmony_ci
50568c2ecf20Sopenharmony_ci	/*
50578c2ecf20Sopenharmony_ci	 * When the stripe finishes full journal write cycle (write to journal
50588c2ecf20Sopenharmony_ci	 * and raid disk), this is the clean up procedure so it is ready for
50598c2ecf20Sopenharmony_ci	 * next operation.
50608c2ecf20Sopenharmony_ci	 */
50618c2ecf20Sopenharmony_ci	r5c_finish_stripe_write_out(conf, sh, &s);
50628c2ecf20Sopenharmony_ci
50638c2ecf20Sopenharmony_ci	/*
50648c2ecf20Sopenharmony_ci	 * Now to consider new write requests, cache write back and what else,
50658c2ecf20Sopenharmony_ci	 * if anything should be read.  We do not handle new writes when:
50668c2ecf20Sopenharmony_ci	 * 1/ A 'write' operation (copy+xor) is already in flight.
50678c2ecf20Sopenharmony_ci	 * 2/ A 'check' operation is in flight, as it may clobber the parity
50688c2ecf20Sopenharmony_ci	 *    block.
50698c2ecf20Sopenharmony_ci	 * 3/ A r5c cache log write is in flight.
50708c2ecf20Sopenharmony_ci	 */
50718c2ecf20Sopenharmony_ci
50728c2ecf20Sopenharmony_ci	if (!sh->reconstruct_state && !sh->check_state && !sh->log_io) {
50738c2ecf20Sopenharmony_ci		if (!r5c_is_writeback(conf->log)) {
50748c2ecf20Sopenharmony_ci			if (s.to_write)
50758c2ecf20Sopenharmony_ci				handle_stripe_dirtying(conf, sh, &s, disks);
50768c2ecf20Sopenharmony_ci		} else { /* write back cache */
50778c2ecf20Sopenharmony_ci			int ret = 0;
50788c2ecf20Sopenharmony_ci
50798c2ecf20Sopenharmony_ci			/* First, try handle writes in caching phase */
50808c2ecf20Sopenharmony_ci			if (s.to_write)
50818c2ecf20Sopenharmony_ci				ret = r5c_try_caching_write(conf, sh, &s,
50828c2ecf20Sopenharmony_ci							    disks);
50838c2ecf20Sopenharmony_ci			/*
50848c2ecf20Sopenharmony_ci			 * If caching phase failed: ret == -EAGAIN
50858c2ecf20Sopenharmony_ci			 *    OR
50868c2ecf20Sopenharmony_ci			 * stripe under reclaim: !caching && injournal
50878c2ecf20Sopenharmony_ci			 *
50888c2ecf20Sopenharmony_ci			 * fall back to handle_stripe_dirtying()
50898c2ecf20Sopenharmony_ci			 */
50908c2ecf20Sopenharmony_ci			if (ret == -EAGAIN ||
50918c2ecf20Sopenharmony_ci			    /* stripe under reclaim: !caching && injournal */
50928c2ecf20Sopenharmony_ci			    (!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
50938c2ecf20Sopenharmony_ci			     s.injournal > 0)) {
50948c2ecf20Sopenharmony_ci				ret = handle_stripe_dirtying(conf, sh, &s,
50958c2ecf20Sopenharmony_ci							     disks);
50968c2ecf20Sopenharmony_ci				if (ret == -EAGAIN)
50978c2ecf20Sopenharmony_ci					goto finish;
50988c2ecf20Sopenharmony_ci			}
50998c2ecf20Sopenharmony_ci		}
51008c2ecf20Sopenharmony_ci	}
51018c2ecf20Sopenharmony_ci
51028c2ecf20Sopenharmony_ci	/* maybe we need to check and possibly fix the parity for this stripe
51038c2ecf20Sopenharmony_ci	 * Any reads will already have been scheduled, so we just see if enough
51048c2ecf20Sopenharmony_ci	 * data is available.  The parity check is held off while parity
51058c2ecf20Sopenharmony_ci	 * dependent operations are in flight.
51068c2ecf20Sopenharmony_ci	 */
51078c2ecf20Sopenharmony_ci	if (sh->check_state ||
51088c2ecf20Sopenharmony_ci	    (s.syncing && s.locked == 0 &&
51098c2ecf20Sopenharmony_ci	     !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
51108c2ecf20Sopenharmony_ci	     !test_bit(STRIPE_INSYNC, &sh->state))) {
51118c2ecf20Sopenharmony_ci		if (conf->level == 6)
51128c2ecf20Sopenharmony_ci			handle_parity_checks6(conf, sh, &s, disks);
51138c2ecf20Sopenharmony_ci		else
51148c2ecf20Sopenharmony_ci			handle_parity_checks5(conf, sh, &s, disks);
51158c2ecf20Sopenharmony_ci	}
51168c2ecf20Sopenharmony_ci
51178c2ecf20Sopenharmony_ci	if ((s.replacing || s.syncing) && s.locked == 0
51188c2ecf20Sopenharmony_ci	    && !test_bit(STRIPE_COMPUTE_RUN, &sh->state)
51198c2ecf20Sopenharmony_ci	    && !test_bit(STRIPE_REPLACED, &sh->state)) {
51208c2ecf20Sopenharmony_ci		/* Write out to replacement devices where possible */
51218c2ecf20Sopenharmony_ci		for (i = 0; i < conf->raid_disks; i++)
51228c2ecf20Sopenharmony_ci			if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) {
51238c2ecf20Sopenharmony_ci				WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags));
51248c2ecf20Sopenharmony_ci				set_bit(R5_WantReplace, &sh->dev[i].flags);
51258c2ecf20Sopenharmony_ci				set_bit(R5_LOCKED, &sh->dev[i].flags);
51268c2ecf20Sopenharmony_ci				s.locked++;
51278c2ecf20Sopenharmony_ci			}
51288c2ecf20Sopenharmony_ci		if (s.replacing)
51298c2ecf20Sopenharmony_ci			set_bit(STRIPE_INSYNC, &sh->state);
51308c2ecf20Sopenharmony_ci		set_bit(STRIPE_REPLACED, &sh->state);
51318c2ecf20Sopenharmony_ci	}
51328c2ecf20Sopenharmony_ci	if ((s.syncing || s.replacing) && s.locked == 0 &&
51338c2ecf20Sopenharmony_ci	    !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
51348c2ecf20Sopenharmony_ci	    test_bit(STRIPE_INSYNC, &sh->state)) {
51358c2ecf20Sopenharmony_ci		md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1);
51368c2ecf20Sopenharmony_ci		clear_bit(STRIPE_SYNCING, &sh->state);
51378c2ecf20Sopenharmony_ci		if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
51388c2ecf20Sopenharmony_ci			wake_up(&conf->wait_for_overlap);
51398c2ecf20Sopenharmony_ci	}
51408c2ecf20Sopenharmony_ci
51418c2ecf20Sopenharmony_ci	/* If the failed drives are just a ReadError, then we might need
51428c2ecf20Sopenharmony_ci	 * to progress the repair/check process
51438c2ecf20Sopenharmony_ci	 */
51448c2ecf20Sopenharmony_ci	if (s.failed <= conf->max_degraded && !conf->mddev->ro)
51458c2ecf20Sopenharmony_ci		for (i = 0; i < s.failed; i++) {
51468c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[s.failed_num[i]];
51478c2ecf20Sopenharmony_ci			if (test_bit(R5_ReadError, &dev->flags)
51488c2ecf20Sopenharmony_ci			    && !test_bit(R5_LOCKED, &dev->flags)
51498c2ecf20Sopenharmony_ci			    && test_bit(R5_UPTODATE, &dev->flags)
51508c2ecf20Sopenharmony_ci				) {
51518c2ecf20Sopenharmony_ci				if (!test_bit(R5_ReWrite, &dev->flags)) {
51528c2ecf20Sopenharmony_ci					set_bit(R5_Wantwrite, &dev->flags);
51538c2ecf20Sopenharmony_ci					set_bit(R5_ReWrite, &dev->flags);
51548c2ecf20Sopenharmony_ci				} else
51558c2ecf20Sopenharmony_ci					/* let's read it back */
51568c2ecf20Sopenharmony_ci					set_bit(R5_Wantread, &dev->flags);
51578c2ecf20Sopenharmony_ci				set_bit(R5_LOCKED, &dev->flags);
51588c2ecf20Sopenharmony_ci				s.locked++;
51598c2ecf20Sopenharmony_ci			}
51608c2ecf20Sopenharmony_ci		}
51618c2ecf20Sopenharmony_ci
51628c2ecf20Sopenharmony_ci	/* Finish reconstruct operations initiated by the expansion process */
51638c2ecf20Sopenharmony_ci	if (sh->reconstruct_state == reconstruct_state_result) {
51648c2ecf20Sopenharmony_ci		struct stripe_head *sh_src
51658c2ecf20Sopenharmony_ci			= raid5_get_active_stripe(conf, sh->sector, 1, 1, 1);
51668c2ecf20Sopenharmony_ci		if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) {
51678c2ecf20Sopenharmony_ci			/* sh cannot be written until sh_src has been read.
51688c2ecf20Sopenharmony_ci			 * so arrange for sh to be delayed a little
51698c2ecf20Sopenharmony_ci			 */
51708c2ecf20Sopenharmony_ci			set_bit(STRIPE_DELAYED, &sh->state);
51718c2ecf20Sopenharmony_ci			set_bit(STRIPE_HANDLE, &sh->state);
51728c2ecf20Sopenharmony_ci			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
51738c2ecf20Sopenharmony_ci					      &sh_src->state))
51748c2ecf20Sopenharmony_ci				atomic_inc(&conf->preread_active_stripes);
51758c2ecf20Sopenharmony_ci			raid5_release_stripe(sh_src);
51768c2ecf20Sopenharmony_ci			goto finish;
51778c2ecf20Sopenharmony_ci		}
51788c2ecf20Sopenharmony_ci		if (sh_src)
51798c2ecf20Sopenharmony_ci			raid5_release_stripe(sh_src);
51808c2ecf20Sopenharmony_ci
51818c2ecf20Sopenharmony_ci		sh->reconstruct_state = reconstruct_state_idle;
51828c2ecf20Sopenharmony_ci		clear_bit(STRIPE_EXPANDING, &sh->state);
51838c2ecf20Sopenharmony_ci		for (i = conf->raid_disks; i--; ) {
51848c2ecf20Sopenharmony_ci			set_bit(R5_Wantwrite, &sh->dev[i].flags);
51858c2ecf20Sopenharmony_ci			set_bit(R5_LOCKED, &sh->dev[i].flags);
51868c2ecf20Sopenharmony_ci			s.locked++;
51878c2ecf20Sopenharmony_ci		}
51888c2ecf20Sopenharmony_ci	}
51898c2ecf20Sopenharmony_ci
51908c2ecf20Sopenharmony_ci	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
51918c2ecf20Sopenharmony_ci	    !sh->reconstruct_state) {
51928c2ecf20Sopenharmony_ci		/* Need to write out all blocks after computing parity */
51938c2ecf20Sopenharmony_ci		sh->disks = conf->raid_disks;
51948c2ecf20Sopenharmony_ci		stripe_set_idx(sh->sector, conf, 0, sh);
51958c2ecf20Sopenharmony_ci		schedule_reconstruction(sh, &s, 1, 1);
51968c2ecf20Sopenharmony_ci	} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
51978c2ecf20Sopenharmony_ci		clear_bit(STRIPE_EXPAND_READY, &sh->state);
51988c2ecf20Sopenharmony_ci		atomic_dec(&conf->reshape_stripes);
51998c2ecf20Sopenharmony_ci		wake_up(&conf->wait_for_overlap);
52008c2ecf20Sopenharmony_ci		md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1);
52018c2ecf20Sopenharmony_ci	}
52028c2ecf20Sopenharmony_ci
52038c2ecf20Sopenharmony_ci	if (s.expanding && s.locked == 0 &&
52048c2ecf20Sopenharmony_ci	    !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
52058c2ecf20Sopenharmony_ci		handle_stripe_expansion(conf, sh);
52068c2ecf20Sopenharmony_ci
52078c2ecf20Sopenharmony_cifinish:
52088c2ecf20Sopenharmony_ci	/* wait for this device to become unblocked */
52098c2ecf20Sopenharmony_ci	if (unlikely(s.blocked_rdev)) {
52108c2ecf20Sopenharmony_ci		if (conf->mddev->external)
52118c2ecf20Sopenharmony_ci			md_wait_for_blocked_rdev(s.blocked_rdev,
52128c2ecf20Sopenharmony_ci						 conf->mddev);
52138c2ecf20Sopenharmony_ci		else
52148c2ecf20Sopenharmony_ci			/* Internal metadata will immediately
52158c2ecf20Sopenharmony_ci			 * be written by raid5d, so we don't
52168c2ecf20Sopenharmony_ci			 * need to wait here.
52178c2ecf20Sopenharmony_ci			 */
52188c2ecf20Sopenharmony_ci			rdev_dec_pending(s.blocked_rdev,
52198c2ecf20Sopenharmony_ci					 conf->mddev);
52208c2ecf20Sopenharmony_ci	}
52218c2ecf20Sopenharmony_ci
52228c2ecf20Sopenharmony_ci	if (s.handle_bad_blocks)
52238c2ecf20Sopenharmony_ci		for (i = disks; i--; ) {
52248c2ecf20Sopenharmony_ci			struct md_rdev *rdev;
52258c2ecf20Sopenharmony_ci			struct r5dev *dev = &sh->dev[i];
52268c2ecf20Sopenharmony_ci			if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
52278c2ecf20Sopenharmony_ci				/* We own a safe reference to the rdev */
52288c2ecf20Sopenharmony_ci				rdev = conf->disks[i].rdev;
52298c2ecf20Sopenharmony_ci				if (!rdev_set_badblocks(rdev, sh->sector,
52308c2ecf20Sopenharmony_ci							RAID5_STRIPE_SECTORS(conf), 0))
52318c2ecf20Sopenharmony_ci					md_error(conf->mddev, rdev);
52328c2ecf20Sopenharmony_ci				rdev_dec_pending(rdev, conf->mddev);
52338c2ecf20Sopenharmony_ci			}
52348c2ecf20Sopenharmony_ci			if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
52358c2ecf20Sopenharmony_ci				rdev = conf->disks[i].rdev;
52368c2ecf20Sopenharmony_ci				rdev_clear_badblocks(rdev, sh->sector,
52378c2ecf20Sopenharmony_ci						     RAID5_STRIPE_SECTORS(conf), 0);
52388c2ecf20Sopenharmony_ci				rdev_dec_pending(rdev, conf->mddev);
52398c2ecf20Sopenharmony_ci			}
52408c2ecf20Sopenharmony_ci			if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
52418c2ecf20Sopenharmony_ci				rdev = conf->disks[i].replacement;
52428c2ecf20Sopenharmony_ci				if (!rdev)
52438c2ecf20Sopenharmony_ci					/* rdev have been moved down */
52448c2ecf20Sopenharmony_ci					rdev = conf->disks[i].rdev;
52458c2ecf20Sopenharmony_ci				rdev_clear_badblocks(rdev, sh->sector,
52468c2ecf20Sopenharmony_ci						     RAID5_STRIPE_SECTORS(conf), 0);
52478c2ecf20Sopenharmony_ci				rdev_dec_pending(rdev, conf->mddev);
52488c2ecf20Sopenharmony_ci			}
52498c2ecf20Sopenharmony_ci		}
52508c2ecf20Sopenharmony_ci
52518c2ecf20Sopenharmony_ci	if (s.ops_request)
52528c2ecf20Sopenharmony_ci		raid_run_ops(sh, s.ops_request);
52538c2ecf20Sopenharmony_ci
52548c2ecf20Sopenharmony_ci	ops_run_io(sh, &s);
52558c2ecf20Sopenharmony_ci
52568c2ecf20Sopenharmony_ci	if (s.dec_preread_active) {
52578c2ecf20Sopenharmony_ci		/* We delay this until after ops_run_io so that if make_request
52588c2ecf20Sopenharmony_ci		 * is waiting on a flush, it won't continue until the writes
52598c2ecf20Sopenharmony_ci		 * have actually been submitted.
52608c2ecf20Sopenharmony_ci		 */
52618c2ecf20Sopenharmony_ci		atomic_dec(&conf->preread_active_stripes);
52628c2ecf20Sopenharmony_ci		if (atomic_read(&conf->preread_active_stripes) <
52638c2ecf20Sopenharmony_ci		    IO_THRESHOLD)
52648c2ecf20Sopenharmony_ci			md_wakeup_thread(conf->mddev->thread);
52658c2ecf20Sopenharmony_ci	}
52668c2ecf20Sopenharmony_ci
52678c2ecf20Sopenharmony_ci	clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
52688c2ecf20Sopenharmony_ci}
52698c2ecf20Sopenharmony_ci
52708c2ecf20Sopenharmony_cistatic void raid5_activate_delayed(struct r5conf *conf)
52718c2ecf20Sopenharmony_ci{
52728c2ecf20Sopenharmony_ci	if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
52738c2ecf20Sopenharmony_ci		while (!list_empty(&conf->delayed_list)) {
52748c2ecf20Sopenharmony_ci			struct list_head *l = conf->delayed_list.next;
52758c2ecf20Sopenharmony_ci			struct stripe_head *sh;
52768c2ecf20Sopenharmony_ci			sh = list_entry(l, struct stripe_head, lru);
52778c2ecf20Sopenharmony_ci			list_del_init(l);
52788c2ecf20Sopenharmony_ci			clear_bit(STRIPE_DELAYED, &sh->state);
52798c2ecf20Sopenharmony_ci			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
52808c2ecf20Sopenharmony_ci				atomic_inc(&conf->preread_active_stripes);
52818c2ecf20Sopenharmony_ci			list_add_tail(&sh->lru, &conf->hold_list);
52828c2ecf20Sopenharmony_ci			raid5_wakeup_stripe_thread(sh);
52838c2ecf20Sopenharmony_ci		}
52848c2ecf20Sopenharmony_ci	}
52858c2ecf20Sopenharmony_ci}
52868c2ecf20Sopenharmony_ci
52878c2ecf20Sopenharmony_cistatic void activate_bit_delay(struct r5conf *conf,
52888c2ecf20Sopenharmony_ci	struct list_head *temp_inactive_list)
52898c2ecf20Sopenharmony_ci{
52908c2ecf20Sopenharmony_ci	/* device_lock is held */
52918c2ecf20Sopenharmony_ci	struct list_head head;
52928c2ecf20Sopenharmony_ci	list_add(&head, &conf->bitmap_list);
52938c2ecf20Sopenharmony_ci	list_del_init(&conf->bitmap_list);
52948c2ecf20Sopenharmony_ci	while (!list_empty(&head)) {
52958c2ecf20Sopenharmony_ci		struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
52968c2ecf20Sopenharmony_ci		int hash;
52978c2ecf20Sopenharmony_ci		list_del_init(&sh->lru);
52988c2ecf20Sopenharmony_ci		atomic_inc(&sh->count);
52998c2ecf20Sopenharmony_ci		hash = sh->hash_lock_index;
53008c2ecf20Sopenharmony_ci		__release_stripe(conf, sh, &temp_inactive_list[hash]);
53018c2ecf20Sopenharmony_ci	}
53028c2ecf20Sopenharmony_ci}
53038c2ecf20Sopenharmony_ci
53048c2ecf20Sopenharmony_cistatic int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
53058c2ecf20Sopenharmony_ci{
53068c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
53078c2ecf20Sopenharmony_ci	sector_t sector = bio->bi_iter.bi_sector;
53088c2ecf20Sopenharmony_ci	unsigned int chunk_sectors;
53098c2ecf20Sopenharmony_ci	unsigned int bio_sectors = bio_sectors(bio);
53108c2ecf20Sopenharmony_ci
53118c2ecf20Sopenharmony_ci	WARN_ON_ONCE(bio->bi_partno);
53128c2ecf20Sopenharmony_ci
53138c2ecf20Sopenharmony_ci	chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
53148c2ecf20Sopenharmony_ci	return  chunk_sectors >=
53158c2ecf20Sopenharmony_ci		((sector & (chunk_sectors - 1)) + bio_sectors);
53168c2ecf20Sopenharmony_ci}
53178c2ecf20Sopenharmony_ci
53188c2ecf20Sopenharmony_ci/*
53198c2ecf20Sopenharmony_ci *  add bio to the retry LIFO  ( in O(1) ... we are in interrupt )
53208c2ecf20Sopenharmony_ci *  later sampled by raid5d.
53218c2ecf20Sopenharmony_ci */
53228c2ecf20Sopenharmony_cistatic void add_bio_to_retry(struct bio *bi,struct r5conf *conf)
53238c2ecf20Sopenharmony_ci{
53248c2ecf20Sopenharmony_ci	unsigned long flags;
53258c2ecf20Sopenharmony_ci
53268c2ecf20Sopenharmony_ci	spin_lock_irqsave(&conf->device_lock, flags);
53278c2ecf20Sopenharmony_ci
53288c2ecf20Sopenharmony_ci	bi->bi_next = conf->retry_read_aligned_list;
53298c2ecf20Sopenharmony_ci	conf->retry_read_aligned_list = bi;
53308c2ecf20Sopenharmony_ci
53318c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&conf->device_lock, flags);
53328c2ecf20Sopenharmony_ci	md_wakeup_thread(conf->mddev->thread);
53338c2ecf20Sopenharmony_ci}
53348c2ecf20Sopenharmony_ci
53358c2ecf20Sopenharmony_cistatic struct bio *remove_bio_from_retry(struct r5conf *conf,
53368c2ecf20Sopenharmony_ci					 unsigned int *offset)
53378c2ecf20Sopenharmony_ci{
53388c2ecf20Sopenharmony_ci	struct bio *bi;
53398c2ecf20Sopenharmony_ci
53408c2ecf20Sopenharmony_ci	bi = conf->retry_read_aligned;
53418c2ecf20Sopenharmony_ci	if (bi) {
53428c2ecf20Sopenharmony_ci		*offset = conf->retry_read_offset;
53438c2ecf20Sopenharmony_ci		conf->retry_read_aligned = NULL;
53448c2ecf20Sopenharmony_ci		return bi;
53458c2ecf20Sopenharmony_ci	}
53468c2ecf20Sopenharmony_ci	bi = conf->retry_read_aligned_list;
53478c2ecf20Sopenharmony_ci	if(bi) {
53488c2ecf20Sopenharmony_ci		conf->retry_read_aligned_list = bi->bi_next;
53498c2ecf20Sopenharmony_ci		bi->bi_next = NULL;
53508c2ecf20Sopenharmony_ci		*offset = 0;
53518c2ecf20Sopenharmony_ci	}
53528c2ecf20Sopenharmony_ci
53538c2ecf20Sopenharmony_ci	return bi;
53548c2ecf20Sopenharmony_ci}
53558c2ecf20Sopenharmony_ci
53568c2ecf20Sopenharmony_ci/*
53578c2ecf20Sopenharmony_ci *  The "raid5_align_endio" should check if the read succeeded and if it
53588c2ecf20Sopenharmony_ci *  did, call bio_endio on the original bio (having bio_put the new bio
53598c2ecf20Sopenharmony_ci *  first).
53608c2ecf20Sopenharmony_ci *  If the read failed..
53618c2ecf20Sopenharmony_ci */
53628c2ecf20Sopenharmony_cistatic void raid5_align_endio(struct bio *bi)
53638c2ecf20Sopenharmony_ci{
53648c2ecf20Sopenharmony_ci	struct bio* raid_bi  = bi->bi_private;
53658c2ecf20Sopenharmony_ci	struct mddev *mddev;
53668c2ecf20Sopenharmony_ci	struct r5conf *conf;
53678c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
53688c2ecf20Sopenharmony_ci	blk_status_t error = bi->bi_status;
53698c2ecf20Sopenharmony_ci
53708c2ecf20Sopenharmony_ci	bio_put(bi);
53718c2ecf20Sopenharmony_ci
53728c2ecf20Sopenharmony_ci	rdev = (void*)raid_bi->bi_next;
53738c2ecf20Sopenharmony_ci	raid_bi->bi_next = NULL;
53748c2ecf20Sopenharmony_ci	mddev = rdev->mddev;
53758c2ecf20Sopenharmony_ci	conf = mddev->private;
53768c2ecf20Sopenharmony_ci
53778c2ecf20Sopenharmony_ci	rdev_dec_pending(rdev, conf->mddev);
53788c2ecf20Sopenharmony_ci
53798c2ecf20Sopenharmony_ci	if (!error) {
53808c2ecf20Sopenharmony_ci		bio_endio(raid_bi);
53818c2ecf20Sopenharmony_ci		if (atomic_dec_and_test(&conf->active_aligned_reads))
53828c2ecf20Sopenharmony_ci			wake_up(&conf->wait_for_quiescent);
53838c2ecf20Sopenharmony_ci		return;
53848c2ecf20Sopenharmony_ci	}
53858c2ecf20Sopenharmony_ci
53868c2ecf20Sopenharmony_ci	pr_debug("raid5_align_endio : io error...handing IO for a retry\n");
53878c2ecf20Sopenharmony_ci
53888c2ecf20Sopenharmony_ci	add_bio_to_retry(raid_bi, conf);
53898c2ecf20Sopenharmony_ci}
53908c2ecf20Sopenharmony_ci
53918c2ecf20Sopenharmony_cistatic int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
53928c2ecf20Sopenharmony_ci{
53938c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
53948c2ecf20Sopenharmony_ci	int dd_idx;
53958c2ecf20Sopenharmony_ci	struct bio* align_bi;
53968c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
53978c2ecf20Sopenharmony_ci	sector_t end_sector;
53988c2ecf20Sopenharmony_ci
53998c2ecf20Sopenharmony_ci	if (!in_chunk_boundary(mddev, raid_bio)) {
54008c2ecf20Sopenharmony_ci		pr_debug("%s: non aligned\n", __func__);
54018c2ecf20Sopenharmony_ci		return 0;
54028c2ecf20Sopenharmony_ci	}
54038c2ecf20Sopenharmony_ci	/*
54048c2ecf20Sopenharmony_ci	 * use bio_clone_fast to make a copy of the bio
54058c2ecf20Sopenharmony_ci	 */
54068c2ecf20Sopenharmony_ci	align_bi = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set);
54078c2ecf20Sopenharmony_ci	if (!align_bi)
54088c2ecf20Sopenharmony_ci		return 0;
54098c2ecf20Sopenharmony_ci	/*
54108c2ecf20Sopenharmony_ci	 *   set bi_end_io to a new function, and set bi_private to the
54118c2ecf20Sopenharmony_ci	 *     original bio.
54128c2ecf20Sopenharmony_ci	 */
54138c2ecf20Sopenharmony_ci	align_bi->bi_end_io  = raid5_align_endio;
54148c2ecf20Sopenharmony_ci	align_bi->bi_private = raid_bio;
54158c2ecf20Sopenharmony_ci	/*
54168c2ecf20Sopenharmony_ci	 *	compute position
54178c2ecf20Sopenharmony_ci	 */
54188c2ecf20Sopenharmony_ci	align_bi->bi_iter.bi_sector =
54198c2ecf20Sopenharmony_ci		raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector,
54208c2ecf20Sopenharmony_ci				     0, &dd_idx, NULL);
54218c2ecf20Sopenharmony_ci
54228c2ecf20Sopenharmony_ci	end_sector = bio_end_sector(align_bi);
54238c2ecf20Sopenharmony_ci	rcu_read_lock();
54248c2ecf20Sopenharmony_ci	rdev = rcu_dereference(conf->disks[dd_idx].replacement);
54258c2ecf20Sopenharmony_ci	if (!rdev || test_bit(Faulty, &rdev->flags) ||
54268c2ecf20Sopenharmony_ci	    rdev->recovery_offset < end_sector) {
54278c2ecf20Sopenharmony_ci		rdev = rcu_dereference(conf->disks[dd_idx].rdev);
54288c2ecf20Sopenharmony_ci		if (rdev &&
54298c2ecf20Sopenharmony_ci		    (test_bit(Faulty, &rdev->flags) ||
54308c2ecf20Sopenharmony_ci		    !(test_bit(In_sync, &rdev->flags) ||
54318c2ecf20Sopenharmony_ci		      rdev->recovery_offset >= end_sector)))
54328c2ecf20Sopenharmony_ci			rdev = NULL;
54338c2ecf20Sopenharmony_ci	}
54348c2ecf20Sopenharmony_ci
54358c2ecf20Sopenharmony_ci	if (r5c_big_stripe_cached(conf, align_bi->bi_iter.bi_sector)) {
54368c2ecf20Sopenharmony_ci		rcu_read_unlock();
54378c2ecf20Sopenharmony_ci		bio_put(align_bi);
54388c2ecf20Sopenharmony_ci		return 0;
54398c2ecf20Sopenharmony_ci	}
54408c2ecf20Sopenharmony_ci
54418c2ecf20Sopenharmony_ci	if (rdev) {
54428c2ecf20Sopenharmony_ci		sector_t first_bad;
54438c2ecf20Sopenharmony_ci		int bad_sectors;
54448c2ecf20Sopenharmony_ci
54458c2ecf20Sopenharmony_ci		atomic_inc(&rdev->nr_pending);
54468c2ecf20Sopenharmony_ci		rcu_read_unlock();
54478c2ecf20Sopenharmony_ci		raid_bio->bi_next = (void*)rdev;
54488c2ecf20Sopenharmony_ci		bio_set_dev(align_bi, rdev->bdev);
54498c2ecf20Sopenharmony_ci
54508c2ecf20Sopenharmony_ci		if (is_badblock(rdev, align_bi->bi_iter.bi_sector,
54518c2ecf20Sopenharmony_ci				bio_sectors(align_bi),
54528c2ecf20Sopenharmony_ci				&first_bad, &bad_sectors)) {
54538c2ecf20Sopenharmony_ci			bio_put(align_bi);
54548c2ecf20Sopenharmony_ci			rdev_dec_pending(rdev, mddev);
54558c2ecf20Sopenharmony_ci			return 0;
54568c2ecf20Sopenharmony_ci		}
54578c2ecf20Sopenharmony_ci
54588c2ecf20Sopenharmony_ci		/* No reshape active, so we can trust rdev->data_offset */
54598c2ecf20Sopenharmony_ci		align_bi->bi_iter.bi_sector += rdev->data_offset;
54608c2ecf20Sopenharmony_ci
54618c2ecf20Sopenharmony_ci		spin_lock_irq(&conf->device_lock);
54628c2ecf20Sopenharmony_ci		wait_event_lock_irq(conf->wait_for_quiescent,
54638c2ecf20Sopenharmony_ci				    conf->quiesce == 0,
54648c2ecf20Sopenharmony_ci				    conf->device_lock);
54658c2ecf20Sopenharmony_ci		atomic_inc(&conf->active_aligned_reads);
54668c2ecf20Sopenharmony_ci		spin_unlock_irq(&conf->device_lock);
54678c2ecf20Sopenharmony_ci
54688c2ecf20Sopenharmony_ci		if (mddev->gendisk)
54698c2ecf20Sopenharmony_ci			trace_block_bio_remap(align_bi->bi_disk->queue,
54708c2ecf20Sopenharmony_ci					      align_bi, disk_devt(mddev->gendisk),
54718c2ecf20Sopenharmony_ci					      raid_bio->bi_iter.bi_sector);
54728c2ecf20Sopenharmony_ci		submit_bio_noacct(align_bi);
54738c2ecf20Sopenharmony_ci		return 1;
54748c2ecf20Sopenharmony_ci	} else {
54758c2ecf20Sopenharmony_ci		rcu_read_unlock();
54768c2ecf20Sopenharmony_ci		bio_put(align_bi);
54778c2ecf20Sopenharmony_ci		return 0;
54788c2ecf20Sopenharmony_ci	}
54798c2ecf20Sopenharmony_ci}
54808c2ecf20Sopenharmony_ci
54818c2ecf20Sopenharmony_cistatic struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
54828c2ecf20Sopenharmony_ci{
54838c2ecf20Sopenharmony_ci	struct bio *split;
54848c2ecf20Sopenharmony_ci	sector_t sector = raid_bio->bi_iter.bi_sector;
54858c2ecf20Sopenharmony_ci	unsigned chunk_sects = mddev->chunk_sectors;
54868c2ecf20Sopenharmony_ci	unsigned sectors = chunk_sects - (sector & (chunk_sects-1));
54878c2ecf20Sopenharmony_ci
54888c2ecf20Sopenharmony_ci	if (sectors < bio_sectors(raid_bio)) {
54898c2ecf20Sopenharmony_ci		struct r5conf *conf = mddev->private;
54908c2ecf20Sopenharmony_ci		split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split);
54918c2ecf20Sopenharmony_ci		bio_chain(split, raid_bio);
54928c2ecf20Sopenharmony_ci		submit_bio_noacct(raid_bio);
54938c2ecf20Sopenharmony_ci		raid_bio = split;
54948c2ecf20Sopenharmony_ci	}
54958c2ecf20Sopenharmony_ci
54968c2ecf20Sopenharmony_ci	if (!raid5_read_one_chunk(mddev, raid_bio))
54978c2ecf20Sopenharmony_ci		return raid_bio;
54988c2ecf20Sopenharmony_ci
54998c2ecf20Sopenharmony_ci	return NULL;
55008c2ecf20Sopenharmony_ci}
55018c2ecf20Sopenharmony_ci
55028c2ecf20Sopenharmony_ci/* __get_priority_stripe - get the next stripe to process
55038c2ecf20Sopenharmony_ci *
55048c2ecf20Sopenharmony_ci * Full stripe writes are allowed to pass preread active stripes up until
55058c2ecf20Sopenharmony_ci * the bypass_threshold is exceeded.  In general the bypass_count
55068c2ecf20Sopenharmony_ci * increments when the handle_list is handled before the hold_list; however, it
55078c2ecf20Sopenharmony_ci * will not be incremented when STRIPE_IO_STARTED is sampled set signifying a
55088c2ecf20Sopenharmony_ci * stripe with in flight i/o.  The bypass_count will be reset when the
55098c2ecf20Sopenharmony_ci * head of the hold_list has changed, i.e. the head was promoted to the
55108c2ecf20Sopenharmony_ci * handle_list.
55118c2ecf20Sopenharmony_ci */
55128c2ecf20Sopenharmony_cistatic struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
55138c2ecf20Sopenharmony_ci{
55148c2ecf20Sopenharmony_ci	struct stripe_head *sh, *tmp;
55158c2ecf20Sopenharmony_ci	struct list_head *handle_list = NULL;
55168c2ecf20Sopenharmony_ci	struct r5worker_group *wg;
55178c2ecf20Sopenharmony_ci	bool second_try = !r5c_is_writeback(conf->log) &&
55188c2ecf20Sopenharmony_ci		!r5l_log_disk_error(conf);
55198c2ecf20Sopenharmony_ci	bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) ||
55208c2ecf20Sopenharmony_ci		r5l_log_disk_error(conf);
55218c2ecf20Sopenharmony_ci
55228c2ecf20Sopenharmony_ciagain:
55238c2ecf20Sopenharmony_ci	wg = NULL;
55248c2ecf20Sopenharmony_ci	sh = NULL;
55258c2ecf20Sopenharmony_ci	if (conf->worker_cnt_per_group == 0) {
55268c2ecf20Sopenharmony_ci		handle_list = try_loprio ? &conf->loprio_list :
55278c2ecf20Sopenharmony_ci					&conf->handle_list;
55288c2ecf20Sopenharmony_ci	} else if (group != ANY_GROUP) {
55298c2ecf20Sopenharmony_ci		handle_list = try_loprio ? &conf->worker_groups[group].loprio_list :
55308c2ecf20Sopenharmony_ci				&conf->worker_groups[group].handle_list;
55318c2ecf20Sopenharmony_ci		wg = &conf->worker_groups[group];
55328c2ecf20Sopenharmony_ci	} else {
55338c2ecf20Sopenharmony_ci		int i;
55348c2ecf20Sopenharmony_ci		for (i = 0; i < conf->group_cnt; i++) {
55358c2ecf20Sopenharmony_ci			handle_list = try_loprio ? &conf->worker_groups[i].loprio_list :
55368c2ecf20Sopenharmony_ci				&conf->worker_groups[i].handle_list;
55378c2ecf20Sopenharmony_ci			wg = &conf->worker_groups[i];
55388c2ecf20Sopenharmony_ci			if (!list_empty(handle_list))
55398c2ecf20Sopenharmony_ci				break;
55408c2ecf20Sopenharmony_ci		}
55418c2ecf20Sopenharmony_ci	}
55428c2ecf20Sopenharmony_ci
55438c2ecf20Sopenharmony_ci	pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n",
55448c2ecf20Sopenharmony_ci		  __func__,
55458c2ecf20Sopenharmony_ci		  list_empty(handle_list) ? "empty" : "busy",
55468c2ecf20Sopenharmony_ci		  list_empty(&conf->hold_list) ? "empty" : "busy",
55478c2ecf20Sopenharmony_ci		  atomic_read(&conf->pending_full_writes), conf->bypass_count);
55488c2ecf20Sopenharmony_ci
55498c2ecf20Sopenharmony_ci	if (!list_empty(handle_list)) {
55508c2ecf20Sopenharmony_ci		sh = list_entry(handle_list->next, typeof(*sh), lru);
55518c2ecf20Sopenharmony_ci
55528c2ecf20Sopenharmony_ci		if (list_empty(&conf->hold_list))
55538c2ecf20Sopenharmony_ci			conf->bypass_count = 0;
55548c2ecf20Sopenharmony_ci		else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) {
55558c2ecf20Sopenharmony_ci			if (conf->hold_list.next == conf->last_hold)
55568c2ecf20Sopenharmony_ci				conf->bypass_count++;
55578c2ecf20Sopenharmony_ci			else {
55588c2ecf20Sopenharmony_ci				conf->last_hold = conf->hold_list.next;
55598c2ecf20Sopenharmony_ci				conf->bypass_count -= conf->bypass_threshold;
55608c2ecf20Sopenharmony_ci				if (conf->bypass_count < 0)
55618c2ecf20Sopenharmony_ci					conf->bypass_count = 0;
55628c2ecf20Sopenharmony_ci			}
55638c2ecf20Sopenharmony_ci		}
55648c2ecf20Sopenharmony_ci	} else if (!list_empty(&conf->hold_list) &&
55658c2ecf20Sopenharmony_ci		   ((conf->bypass_threshold &&
55668c2ecf20Sopenharmony_ci		     conf->bypass_count > conf->bypass_threshold) ||
55678c2ecf20Sopenharmony_ci		    atomic_read(&conf->pending_full_writes) == 0)) {
55688c2ecf20Sopenharmony_ci
55698c2ecf20Sopenharmony_ci		list_for_each_entry(tmp, &conf->hold_list,  lru) {
55708c2ecf20Sopenharmony_ci			if (conf->worker_cnt_per_group == 0 ||
55718c2ecf20Sopenharmony_ci			    group == ANY_GROUP ||
55728c2ecf20Sopenharmony_ci			    !cpu_online(tmp->cpu) ||
55738c2ecf20Sopenharmony_ci			    cpu_to_group(tmp->cpu) == group) {
55748c2ecf20Sopenharmony_ci				sh = tmp;
55758c2ecf20Sopenharmony_ci				break;
55768c2ecf20Sopenharmony_ci			}
55778c2ecf20Sopenharmony_ci		}
55788c2ecf20Sopenharmony_ci
55798c2ecf20Sopenharmony_ci		if (sh) {
55808c2ecf20Sopenharmony_ci			conf->bypass_count -= conf->bypass_threshold;
55818c2ecf20Sopenharmony_ci			if (conf->bypass_count < 0)
55828c2ecf20Sopenharmony_ci				conf->bypass_count = 0;
55838c2ecf20Sopenharmony_ci		}
55848c2ecf20Sopenharmony_ci		wg = NULL;
55858c2ecf20Sopenharmony_ci	}
55868c2ecf20Sopenharmony_ci
55878c2ecf20Sopenharmony_ci	if (!sh) {
55888c2ecf20Sopenharmony_ci		if (second_try)
55898c2ecf20Sopenharmony_ci			return NULL;
55908c2ecf20Sopenharmony_ci		second_try = true;
55918c2ecf20Sopenharmony_ci		try_loprio = !try_loprio;
55928c2ecf20Sopenharmony_ci		goto again;
55938c2ecf20Sopenharmony_ci	}
55948c2ecf20Sopenharmony_ci
55958c2ecf20Sopenharmony_ci	if (wg) {
55968c2ecf20Sopenharmony_ci		wg->stripes_cnt--;
55978c2ecf20Sopenharmony_ci		sh->group = NULL;
55988c2ecf20Sopenharmony_ci	}
55998c2ecf20Sopenharmony_ci	list_del_init(&sh->lru);
56008c2ecf20Sopenharmony_ci	BUG_ON(atomic_inc_return(&sh->count) != 1);
56018c2ecf20Sopenharmony_ci	return sh;
56028c2ecf20Sopenharmony_ci}
56038c2ecf20Sopenharmony_ci
56048c2ecf20Sopenharmony_cistruct raid5_plug_cb {
56058c2ecf20Sopenharmony_ci	struct blk_plug_cb	cb;
56068c2ecf20Sopenharmony_ci	struct list_head	list;
56078c2ecf20Sopenharmony_ci	struct list_head	temp_inactive_list[NR_STRIPE_HASH_LOCKS];
56088c2ecf20Sopenharmony_ci};
56098c2ecf20Sopenharmony_ci
56108c2ecf20Sopenharmony_cistatic void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
56118c2ecf20Sopenharmony_ci{
56128c2ecf20Sopenharmony_ci	struct raid5_plug_cb *cb = container_of(
56138c2ecf20Sopenharmony_ci		blk_cb, struct raid5_plug_cb, cb);
56148c2ecf20Sopenharmony_ci	struct stripe_head *sh;
56158c2ecf20Sopenharmony_ci	struct mddev *mddev = cb->cb.data;
56168c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
56178c2ecf20Sopenharmony_ci	int cnt = 0;
56188c2ecf20Sopenharmony_ci	int hash;
56198c2ecf20Sopenharmony_ci
56208c2ecf20Sopenharmony_ci	if (cb->list.next && !list_empty(&cb->list)) {
56218c2ecf20Sopenharmony_ci		spin_lock_irq(&conf->device_lock);
56228c2ecf20Sopenharmony_ci		while (!list_empty(&cb->list)) {
56238c2ecf20Sopenharmony_ci			sh = list_first_entry(&cb->list, struct stripe_head, lru);
56248c2ecf20Sopenharmony_ci			list_del_init(&sh->lru);
56258c2ecf20Sopenharmony_ci			/*
56268c2ecf20Sopenharmony_ci			 * avoid race release_stripe_plug() sees
56278c2ecf20Sopenharmony_ci			 * STRIPE_ON_UNPLUG_LIST clear but the stripe
56288c2ecf20Sopenharmony_ci			 * is still in our list
56298c2ecf20Sopenharmony_ci			 */
56308c2ecf20Sopenharmony_ci			smp_mb__before_atomic();
56318c2ecf20Sopenharmony_ci			clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
56328c2ecf20Sopenharmony_ci			/*
56338c2ecf20Sopenharmony_ci			 * STRIPE_ON_RELEASE_LIST could be set here. In that
56348c2ecf20Sopenharmony_ci			 * case, the count is always > 1 here
56358c2ecf20Sopenharmony_ci			 */
56368c2ecf20Sopenharmony_ci			hash = sh->hash_lock_index;
56378c2ecf20Sopenharmony_ci			__release_stripe(conf, sh, &cb->temp_inactive_list[hash]);
56388c2ecf20Sopenharmony_ci			cnt++;
56398c2ecf20Sopenharmony_ci		}
56408c2ecf20Sopenharmony_ci		spin_unlock_irq(&conf->device_lock);
56418c2ecf20Sopenharmony_ci	}
56428c2ecf20Sopenharmony_ci	release_inactive_stripe_list(conf, cb->temp_inactive_list,
56438c2ecf20Sopenharmony_ci				     NR_STRIPE_HASH_LOCKS);
56448c2ecf20Sopenharmony_ci	if (mddev->queue)
56458c2ecf20Sopenharmony_ci		trace_block_unplug(mddev->queue, cnt, !from_schedule);
56468c2ecf20Sopenharmony_ci	kfree(cb);
56478c2ecf20Sopenharmony_ci}
56488c2ecf20Sopenharmony_ci
56498c2ecf20Sopenharmony_cistatic void release_stripe_plug(struct mddev *mddev,
56508c2ecf20Sopenharmony_ci				struct stripe_head *sh)
56518c2ecf20Sopenharmony_ci{
56528c2ecf20Sopenharmony_ci	struct blk_plug_cb *blk_cb = blk_check_plugged(
56538c2ecf20Sopenharmony_ci		raid5_unplug, mddev,
56548c2ecf20Sopenharmony_ci		sizeof(struct raid5_plug_cb));
56558c2ecf20Sopenharmony_ci	struct raid5_plug_cb *cb;
56568c2ecf20Sopenharmony_ci
56578c2ecf20Sopenharmony_ci	if (!blk_cb) {
56588c2ecf20Sopenharmony_ci		raid5_release_stripe(sh);
56598c2ecf20Sopenharmony_ci		return;
56608c2ecf20Sopenharmony_ci	}
56618c2ecf20Sopenharmony_ci
56628c2ecf20Sopenharmony_ci	cb = container_of(blk_cb, struct raid5_plug_cb, cb);
56638c2ecf20Sopenharmony_ci
56648c2ecf20Sopenharmony_ci	if (cb->list.next == NULL) {
56658c2ecf20Sopenharmony_ci		int i;
56668c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&cb->list);
56678c2ecf20Sopenharmony_ci		for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
56688c2ecf20Sopenharmony_ci			INIT_LIST_HEAD(cb->temp_inactive_list + i);
56698c2ecf20Sopenharmony_ci	}
56708c2ecf20Sopenharmony_ci
56718c2ecf20Sopenharmony_ci	if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
56728c2ecf20Sopenharmony_ci		list_add_tail(&sh->lru, &cb->list);
56738c2ecf20Sopenharmony_ci	else
56748c2ecf20Sopenharmony_ci		raid5_release_stripe(sh);
56758c2ecf20Sopenharmony_ci}
56768c2ecf20Sopenharmony_ci
56778c2ecf20Sopenharmony_cistatic void make_discard_request(struct mddev *mddev, struct bio *bi)
56788c2ecf20Sopenharmony_ci{
56798c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
56808c2ecf20Sopenharmony_ci	sector_t logical_sector, last_sector;
56818c2ecf20Sopenharmony_ci	struct stripe_head *sh;
56828c2ecf20Sopenharmony_ci	int stripe_sectors;
56838c2ecf20Sopenharmony_ci
56848c2ecf20Sopenharmony_ci	if (mddev->reshape_position != MaxSector)
56858c2ecf20Sopenharmony_ci		/* Skip discard while reshape is happening */
56868c2ecf20Sopenharmony_ci		return;
56878c2ecf20Sopenharmony_ci
56888c2ecf20Sopenharmony_ci	logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
56898c2ecf20Sopenharmony_ci	last_sector = bio_end_sector(bi);
56908c2ecf20Sopenharmony_ci
56918c2ecf20Sopenharmony_ci	bi->bi_next = NULL;
56928c2ecf20Sopenharmony_ci
56938c2ecf20Sopenharmony_ci	stripe_sectors = conf->chunk_sectors *
56948c2ecf20Sopenharmony_ci		(conf->raid_disks - conf->max_degraded);
56958c2ecf20Sopenharmony_ci	logical_sector = DIV_ROUND_UP_SECTOR_T(logical_sector,
56968c2ecf20Sopenharmony_ci					       stripe_sectors);
56978c2ecf20Sopenharmony_ci	sector_div(last_sector, stripe_sectors);
56988c2ecf20Sopenharmony_ci
56998c2ecf20Sopenharmony_ci	logical_sector *= conf->chunk_sectors;
57008c2ecf20Sopenharmony_ci	last_sector *= conf->chunk_sectors;
57018c2ecf20Sopenharmony_ci
57028c2ecf20Sopenharmony_ci	for (; logical_sector < last_sector;
57038c2ecf20Sopenharmony_ci	     logical_sector += RAID5_STRIPE_SECTORS(conf)) {
57048c2ecf20Sopenharmony_ci		DEFINE_WAIT(w);
57058c2ecf20Sopenharmony_ci		int d;
57068c2ecf20Sopenharmony_ci	again:
57078c2ecf20Sopenharmony_ci		sh = raid5_get_active_stripe(conf, logical_sector, 0, 0, 0);
57088c2ecf20Sopenharmony_ci		prepare_to_wait(&conf->wait_for_overlap, &w,
57098c2ecf20Sopenharmony_ci				TASK_UNINTERRUPTIBLE);
57108c2ecf20Sopenharmony_ci		set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
57118c2ecf20Sopenharmony_ci		if (test_bit(STRIPE_SYNCING, &sh->state)) {
57128c2ecf20Sopenharmony_ci			raid5_release_stripe(sh);
57138c2ecf20Sopenharmony_ci			schedule();
57148c2ecf20Sopenharmony_ci			goto again;
57158c2ecf20Sopenharmony_ci		}
57168c2ecf20Sopenharmony_ci		clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
57178c2ecf20Sopenharmony_ci		spin_lock_irq(&sh->stripe_lock);
57188c2ecf20Sopenharmony_ci		for (d = 0; d < conf->raid_disks; d++) {
57198c2ecf20Sopenharmony_ci			if (d == sh->pd_idx || d == sh->qd_idx)
57208c2ecf20Sopenharmony_ci				continue;
57218c2ecf20Sopenharmony_ci			if (sh->dev[d].towrite || sh->dev[d].toread) {
57228c2ecf20Sopenharmony_ci				set_bit(R5_Overlap, &sh->dev[d].flags);
57238c2ecf20Sopenharmony_ci				spin_unlock_irq(&sh->stripe_lock);
57248c2ecf20Sopenharmony_ci				raid5_release_stripe(sh);
57258c2ecf20Sopenharmony_ci				schedule();
57268c2ecf20Sopenharmony_ci				goto again;
57278c2ecf20Sopenharmony_ci			}
57288c2ecf20Sopenharmony_ci		}
57298c2ecf20Sopenharmony_ci		set_bit(STRIPE_DISCARD, &sh->state);
57308c2ecf20Sopenharmony_ci		finish_wait(&conf->wait_for_overlap, &w);
57318c2ecf20Sopenharmony_ci		sh->overwrite_disks = 0;
57328c2ecf20Sopenharmony_ci		for (d = 0; d < conf->raid_disks; d++) {
57338c2ecf20Sopenharmony_ci			if (d == sh->pd_idx || d == sh->qd_idx)
57348c2ecf20Sopenharmony_ci				continue;
57358c2ecf20Sopenharmony_ci			sh->dev[d].towrite = bi;
57368c2ecf20Sopenharmony_ci			set_bit(R5_OVERWRITE, &sh->dev[d].flags);
57378c2ecf20Sopenharmony_ci			bio_inc_remaining(bi);
57388c2ecf20Sopenharmony_ci			md_write_inc(mddev, bi);
57398c2ecf20Sopenharmony_ci			sh->overwrite_disks++;
57408c2ecf20Sopenharmony_ci		}
57418c2ecf20Sopenharmony_ci		spin_unlock_irq(&sh->stripe_lock);
57428c2ecf20Sopenharmony_ci		if (conf->mddev->bitmap) {
57438c2ecf20Sopenharmony_ci			for (d = 0;
57448c2ecf20Sopenharmony_ci			     d < conf->raid_disks - conf->max_degraded;
57458c2ecf20Sopenharmony_ci			     d++)
57468c2ecf20Sopenharmony_ci				md_bitmap_startwrite(mddev->bitmap,
57478c2ecf20Sopenharmony_ci						     sh->sector,
57488c2ecf20Sopenharmony_ci						     RAID5_STRIPE_SECTORS(conf),
57498c2ecf20Sopenharmony_ci						     0);
57508c2ecf20Sopenharmony_ci			sh->bm_seq = conf->seq_flush + 1;
57518c2ecf20Sopenharmony_ci			set_bit(STRIPE_BIT_DELAY, &sh->state);
57528c2ecf20Sopenharmony_ci		}
57538c2ecf20Sopenharmony_ci
57548c2ecf20Sopenharmony_ci		set_bit(STRIPE_HANDLE, &sh->state);
57558c2ecf20Sopenharmony_ci		clear_bit(STRIPE_DELAYED, &sh->state);
57568c2ecf20Sopenharmony_ci		if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
57578c2ecf20Sopenharmony_ci			atomic_inc(&conf->preread_active_stripes);
57588c2ecf20Sopenharmony_ci		release_stripe_plug(mddev, sh);
57598c2ecf20Sopenharmony_ci	}
57608c2ecf20Sopenharmony_ci
57618c2ecf20Sopenharmony_ci	bio_endio(bi);
57628c2ecf20Sopenharmony_ci}
57638c2ecf20Sopenharmony_ci
57648c2ecf20Sopenharmony_cistatic bool raid5_make_request(struct mddev *mddev, struct bio * bi)
57658c2ecf20Sopenharmony_ci{
57668c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
57678c2ecf20Sopenharmony_ci	int dd_idx;
57688c2ecf20Sopenharmony_ci	sector_t new_sector;
57698c2ecf20Sopenharmony_ci	sector_t logical_sector, last_sector;
57708c2ecf20Sopenharmony_ci	struct stripe_head *sh;
57718c2ecf20Sopenharmony_ci	const int rw = bio_data_dir(bi);
57728c2ecf20Sopenharmony_ci	DEFINE_WAIT(w);
57738c2ecf20Sopenharmony_ci	bool do_prepare;
57748c2ecf20Sopenharmony_ci	bool do_flush = false;
57758c2ecf20Sopenharmony_ci
57768c2ecf20Sopenharmony_ci	if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
57778c2ecf20Sopenharmony_ci		int ret = log_handle_flush_request(conf, bi);
57788c2ecf20Sopenharmony_ci
57798c2ecf20Sopenharmony_ci		if (ret == 0)
57808c2ecf20Sopenharmony_ci			return true;
57818c2ecf20Sopenharmony_ci		if (ret == -ENODEV) {
57828c2ecf20Sopenharmony_ci			if (md_flush_request(mddev, bi))
57838c2ecf20Sopenharmony_ci				return true;
57848c2ecf20Sopenharmony_ci		}
57858c2ecf20Sopenharmony_ci		/* ret == -EAGAIN, fallback */
57868c2ecf20Sopenharmony_ci		/*
57878c2ecf20Sopenharmony_ci		 * if r5l_handle_flush_request() didn't clear REQ_PREFLUSH,
57888c2ecf20Sopenharmony_ci		 * we need to flush journal device
57898c2ecf20Sopenharmony_ci		 */
57908c2ecf20Sopenharmony_ci		do_flush = bi->bi_opf & REQ_PREFLUSH;
57918c2ecf20Sopenharmony_ci	}
57928c2ecf20Sopenharmony_ci
57938c2ecf20Sopenharmony_ci	if (!md_write_start(mddev, bi))
57948c2ecf20Sopenharmony_ci		return false;
57958c2ecf20Sopenharmony_ci	/*
57968c2ecf20Sopenharmony_ci	 * If array is degraded, better not do chunk aligned read because
57978c2ecf20Sopenharmony_ci	 * later we might have to read it again in order to reconstruct
57988c2ecf20Sopenharmony_ci	 * data on failed drives.
57998c2ecf20Sopenharmony_ci	 */
58008c2ecf20Sopenharmony_ci	if (rw == READ && mddev->degraded == 0 &&
58018c2ecf20Sopenharmony_ci	    mddev->reshape_position == MaxSector) {
58028c2ecf20Sopenharmony_ci		bi = chunk_aligned_read(mddev, bi);
58038c2ecf20Sopenharmony_ci		if (!bi)
58048c2ecf20Sopenharmony_ci			return true;
58058c2ecf20Sopenharmony_ci	}
58068c2ecf20Sopenharmony_ci
58078c2ecf20Sopenharmony_ci	if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) {
58088c2ecf20Sopenharmony_ci		make_discard_request(mddev, bi);
58098c2ecf20Sopenharmony_ci		md_write_end(mddev);
58108c2ecf20Sopenharmony_ci		return true;
58118c2ecf20Sopenharmony_ci	}
58128c2ecf20Sopenharmony_ci
58138c2ecf20Sopenharmony_ci	logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
58148c2ecf20Sopenharmony_ci	last_sector = bio_end_sector(bi);
58158c2ecf20Sopenharmony_ci	bi->bi_next = NULL;
58168c2ecf20Sopenharmony_ci
58178c2ecf20Sopenharmony_ci	prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
58188c2ecf20Sopenharmony_ci	for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
58198c2ecf20Sopenharmony_ci		int previous;
58208c2ecf20Sopenharmony_ci		int seq;
58218c2ecf20Sopenharmony_ci
58228c2ecf20Sopenharmony_ci		do_prepare = false;
58238c2ecf20Sopenharmony_ci	retry:
58248c2ecf20Sopenharmony_ci		seq = read_seqcount_begin(&conf->gen_lock);
58258c2ecf20Sopenharmony_ci		previous = 0;
58268c2ecf20Sopenharmony_ci		if (do_prepare)
58278c2ecf20Sopenharmony_ci			prepare_to_wait(&conf->wait_for_overlap, &w,
58288c2ecf20Sopenharmony_ci				TASK_UNINTERRUPTIBLE);
58298c2ecf20Sopenharmony_ci		if (unlikely(conf->reshape_progress != MaxSector)) {
58308c2ecf20Sopenharmony_ci			/* spinlock is needed as reshape_progress may be
58318c2ecf20Sopenharmony_ci			 * 64bit on a 32bit platform, and so it might be
58328c2ecf20Sopenharmony_ci			 * possible to see a half-updated value
58338c2ecf20Sopenharmony_ci			 * Of course reshape_progress could change after
58348c2ecf20Sopenharmony_ci			 * the lock is dropped, so once we get a reference
58358c2ecf20Sopenharmony_ci			 * to the stripe that we think it is, we will have
58368c2ecf20Sopenharmony_ci			 * to check again.
58378c2ecf20Sopenharmony_ci			 */
58388c2ecf20Sopenharmony_ci			spin_lock_irq(&conf->device_lock);
58398c2ecf20Sopenharmony_ci			if (mddev->reshape_backwards
58408c2ecf20Sopenharmony_ci			    ? logical_sector < conf->reshape_progress
58418c2ecf20Sopenharmony_ci			    : logical_sector >= conf->reshape_progress) {
58428c2ecf20Sopenharmony_ci				previous = 1;
58438c2ecf20Sopenharmony_ci			} else {
58448c2ecf20Sopenharmony_ci				if (mddev->reshape_backwards
58458c2ecf20Sopenharmony_ci				    ? logical_sector < conf->reshape_safe
58468c2ecf20Sopenharmony_ci				    : logical_sector >= conf->reshape_safe) {
58478c2ecf20Sopenharmony_ci					spin_unlock_irq(&conf->device_lock);
58488c2ecf20Sopenharmony_ci					schedule();
58498c2ecf20Sopenharmony_ci					do_prepare = true;
58508c2ecf20Sopenharmony_ci					goto retry;
58518c2ecf20Sopenharmony_ci				}
58528c2ecf20Sopenharmony_ci			}
58538c2ecf20Sopenharmony_ci			spin_unlock_irq(&conf->device_lock);
58548c2ecf20Sopenharmony_ci		}
58558c2ecf20Sopenharmony_ci
58568c2ecf20Sopenharmony_ci		new_sector = raid5_compute_sector(conf, logical_sector,
58578c2ecf20Sopenharmony_ci						  previous,
58588c2ecf20Sopenharmony_ci						  &dd_idx, NULL);
58598c2ecf20Sopenharmony_ci		pr_debug("raid456: raid5_make_request, sector %llu logical %llu\n",
58608c2ecf20Sopenharmony_ci			(unsigned long long)new_sector,
58618c2ecf20Sopenharmony_ci			(unsigned long long)logical_sector);
58628c2ecf20Sopenharmony_ci
58638c2ecf20Sopenharmony_ci		sh = raid5_get_active_stripe(conf, new_sector, previous,
58648c2ecf20Sopenharmony_ci				       (bi->bi_opf & REQ_RAHEAD), 0);
58658c2ecf20Sopenharmony_ci		if (sh) {
58668c2ecf20Sopenharmony_ci			if (unlikely(previous)) {
58678c2ecf20Sopenharmony_ci				/* expansion might have moved on while waiting for a
58688c2ecf20Sopenharmony_ci				 * stripe, so we must do the range check again.
58698c2ecf20Sopenharmony_ci				 * Expansion could still move past after this
58708c2ecf20Sopenharmony_ci				 * test, but as we are holding a reference to
58718c2ecf20Sopenharmony_ci				 * 'sh', we know that if that happens,
58728c2ecf20Sopenharmony_ci				 *  STRIPE_EXPANDING will get set and the expansion
58738c2ecf20Sopenharmony_ci				 * won't proceed until we finish with the stripe.
58748c2ecf20Sopenharmony_ci				 */
58758c2ecf20Sopenharmony_ci				int must_retry = 0;
58768c2ecf20Sopenharmony_ci				spin_lock_irq(&conf->device_lock);
58778c2ecf20Sopenharmony_ci				if (mddev->reshape_backwards
58788c2ecf20Sopenharmony_ci				    ? logical_sector >= conf->reshape_progress
58798c2ecf20Sopenharmony_ci				    : logical_sector < conf->reshape_progress)
58808c2ecf20Sopenharmony_ci					/* mismatch, need to try again */
58818c2ecf20Sopenharmony_ci					must_retry = 1;
58828c2ecf20Sopenharmony_ci				spin_unlock_irq(&conf->device_lock);
58838c2ecf20Sopenharmony_ci				if (must_retry) {
58848c2ecf20Sopenharmony_ci					raid5_release_stripe(sh);
58858c2ecf20Sopenharmony_ci					schedule();
58868c2ecf20Sopenharmony_ci					do_prepare = true;
58878c2ecf20Sopenharmony_ci					goto retry;
58888c2ecf20Sopenharmony_ci				}
58898c2ecf20Sopenharmony_ci			}
58908c2ecf20Sopenharmony_ci			if (read_seqcount_retry(&conf->gen_lock, seq)) {
58918c2ecf20Sopenharmony_ci				/* Might have got the wrong stripe_head
58928c2ecf20Sopenharmony_ci				 * by accident
58938c2ecf20Sopenharmony_ci				 */
58948c2ecf20Sopenharmony_ci				raid5_release_stripe(sh);
58958c2ecf20Sopenharmony_ci				goto retry;
58968c2ecf20Sopenharmony_ci			}
58978c2ecf20Sopenharmony_ci
58988c2ecf20Sopenharmony_ci			if (test_bit(STRIPE_EXPANDING, &sh->state) ||
58998c2ecf20Sopenharmony_ci			    !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
59008c2ecf20Sopenharmony_ci				/* Stripe is busy expanding or
59018c2ecf20Sopenharmony_ci				 * add failed due to overlap.  Flush everything
59028c2ecf20Sopenharmony_ci				 * and wait a while
59038c2ecf20Sopenharmony_ci				 */
59048c2ecf20Sopenharmony_ci				md_wakeup_thread(mddev->thread);
59058c2ecf20Sopenharmony_ci				raid5_release_stripe(sh);
59068c2ecf20Sopenharmony_ci				schedule();
59078c2ecf20Sopenharmony_ci				do_prepare = true;
59088c2ecf20Sopenharmony_ci				goto retry;
59098c2ecf20Sopenharmony_ci			}
59108c2ecf20Sopenharmony_ci			if (do_flush) {
59118c2ecf20Sopenharmony_ci				set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
59128c2ecf20Sopenharmony_ci				/* we only need flush for one stripe */
59138c2ecf20Sopenharmony_ci				do_flush = false;
59148c2ecf20Sopenharmony_ci			}
59158c2ecf20Sopenharmony_ci
59168c2ecf20Sopenharmony_ci			set_bit(STRIPE_HANDLE, &sh->state);
59178c2ecf20Sopenharmony_ci			clear_bit(STRIPE_DELAYED, &sh->state);
59188c2ecf20Sopenharmony_ci			if ((!sh->batch_head || sh == sh->batch_head) &&
59198c2ecf20Sopenharmony_ci			    (bi->bi_opf & REQ_SYNC) &&
59208c2ecf20Sopenharmony_ci			    !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
59218c2ecf20Sopenharmony_ci				atomic_inc(&conf->preread_active_stripes);
59228c2ecf20Sopenharmony_ci			release_stripe_plug(mddev, sh);
59238c2ecf20Sopenharmony_ci		} else {
59248c2ecf20Sopenharmony_ci			/* cannot get stripe for read-ahead, just give-up */
59258c2ecf20Sopenharmony_ci			bi->bi_status = BLK_STS_IOERR;
59268c2ecf20Sopenharmony_ci			break;
59278c2ecf20Sopenharmony_ci		}
59288c2ecf20Sopenharmony_ci	}
59298c2ecf20Sopenharmony_ci	finish_wait(&conf->wait_for_overlap, &w);
59308c2ecf20Sopenharmony_ci
59318c2ecf20Sopenharmony_ci	if (rw == WRITE)
59328c2ecf20Sopenharmony_ci		md_write_end(mddev);
59338c2ecf20Sopenharmony_ci	bio_endio(bi);
59348c2ecf20Sopenharmony_ci	return true;
59358c2ecf20Sopenharmony_ci}
59368c2ecf20Sopenharmony_ci
59378c2ecf20Sopenharmony_cistatic sector_t raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks);
59388c2ecf20Sopenharmony_ci
59398c2ecf20Sopenharmony_cistatic sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
59408c2ecf20Sopenharmony_ci{
59418c2ecf20Sopenharmony_ci	/* reshaping is quite different to recovery/resync so it is
59428c2ecf20Sopenharmony_ci	 * handled quite separately ... here.
59438c2ecf20Sopenharmony_ci	 *
59448c2ecf20Sopenharmony_ci	 * On each call to sync_request, we gather one chunk worth of
59458c2ecf20Sopenharmony_ci	 * destination stripes and flag them as expanding.
59468c2ecf20Sopenharmony_ci	 * Then we find all the source stripes and request reads.
59478c2ecf20Sopenharmony_ci	 * As the reads complete, handle_stripe will copy the data
59488c2ecf20Sopenharmony_ci	 * into the destination stripe and release that stripe.
59498c2ecf20Sopenharmony_ci	 */
59508c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
59518c2ecf20Sopenharmony_ci	struct stripe_head *sh;
59528c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
59538c2ecf20Sopenharmony_ci	sector_t first_sector, last_sector;
59548c2ecf20Sopenharmony_ci	int raid_disks = conf->previous_raid_disks;
59558c2ecf20Sopenharmony_ci	int data_disks = raid_disks - conf->max_degraded;
59568c2ecf20Sopenharmony_ci	int new_data_disks = conf->raid_disks - conf->max_degraded;
59578c2ecf20Sopenharmony_ci	int i;
59588c2ecf20Sopenharmony_ci	int dd_idx;
59598c2ecf20Sopenharmony_ci	sector_t writepos, readpos, safepos;
59608c2ecf20Sopenharmony_ci	sector_t stripe_addr;
59618c2ecf20Sopenharmony_ci	int reshape_sectors;
59628c2ecf20Sopenharmony_ci	struct list_head stripes;
59638c2ecf20Sopenharmony_ci	sector_t retn;
59648c2ecf20Sopenharmony_ci
59658c2ecf20Sopenharmony_ci	if (sector_nr == 0) {
59668c2ecf20Sopenharmony_ci		/* If restarting in the middle, skip the initial sectors */
59678c2ecf20Sopenharmony_ci		if (mddev->reshape_backwards &&
59688c2ecf20Sopenharmony_ci		    conf->reshape_progress < raid5_size(mddev, 0, 0)) {
59698c2ecf20Sopenharmony_ci			sector_nr = raid5_size(mddev, 0, 0)
59708c2ecf20Sopenharmony_ci				- conf->reshape_progress;
59718c2ecf20Sopenharmony_ci		} else if (mddev->reshape_backwards &&
59728c2ecf20Sopenharmony_ci			   conf->reshape_progress == MaxSector) {
59738c2ecf20Sopenharmony_ci			/* shouldn't happen, but just in case, finish up.*/
59748c2ecf20Sopenharmony_ci			sector_nr = MaxSector;
59758c2ecf20Sopenharmony_ci		} else if (!mddev->reshape_backwards &&
59768c2ecf20Sopenharmony_ci			   conf->reshape_progress > 0)
59778c2ecf20Sopenharmony_ci			sector_nr = conf->reshape_progress;
59788c2ecf20Sopenharmony_ci		sector_div(sector_nr, new_data_disks);
59798c2ecf20Sopenharmony_ci		if (sector_nr) {
59808c2ecf20Sopenharmony_ci			mddev->curr_resync_completed = sector_nr;
59818c2ecf20Sopenharmony_ci			sysfs_notify_dirent_safe(mddev->sysfs_completed);
59828c2ecf20Sopenharmony_ci			*skipped = 1;
59838c2ecf20Sopenharmony_ci			retn = sector_nr;
59848c2ecf20Sopenharmony_ci			goto finish;
59858c2ecf20Sopenharmony_ci		}
59868c2ecf20Sopenharmony_ci	}
59878c2ecf20Sopenharmony_ci
59888c2ecf20Sopenharmony_ci	/* We need to process a full chunk at a time.
59898c2ecf20Sopenharmony_ci	 * If old and new chunk sizes differ, we need to process the
59908c2ecf20Sopenharmony_ci	 * largest of these
59918c2ecf20Sopenharmony_ci	 */
59928c2ecf20Sopenharmony_ci
59938c2ecf20Sopenharmony_ci	reshape_sectors = max(conf->chunk_sectors, conf->prev_chunk_sectors);
59948c2ecf20Sopenharmony_ci
59958c2ecf20Sopenharmony_ci	/* We update the metadata at least every 10 seconds, or when
59968c2ecf20Sopenharmony_ci	 * the data about to be copied would over-write the source of
59978c2ecf20Sopenharmony_ci	 * the data at the front of the range.  i.e. one new_stripe
59988c2ecf20Sopenharmony_ci	 * along from reshape_progress new_maps to after where
59998c2ecf20Sopenharmony_ci	 * reshape_safe old_maps to
60008c2ecf20Sopenharmony_ci	 */
60018c2ecf20Sopenharmony_ci	writepos = conf->reshape_progress;
60028c2ecf20Sopenharmony_ci	sector_div(writepos, new_data_disks);
60038c2ecf20Sopenharmony_ci	readpos = conf->reshape_progress;
60048c2ecf20Sopenharmony_ci	sector_div(readpos, data_disks);
60058c2ecf20Sopenharmony_ci	safepos = conf->reshape_safe;
60068c2ecf20Sopenharmony_ci	sector_div(safepos, data_disks);
60078c2ecf20Sopenharmony_ci	if (mddev->reshape_backwards) {
60088c2ecf20Sopenharmony_ci		BUG_ON(writepos < reshape_sectors);
60098c2ecf20Sopenharmony_ci		writepos -= reshape_sectors;
60108c2ecf20Sopenharmony_ci		readpos += reshape_sectors;
60118c2ecf20Sopenharmony_ci		safepos += reshape_sectors;
60128c2ecf20Sopenharmony_ci	} else {
60138c2ecf20Sopenharmony_ci		writepos += reshape_sectors;
60148c2ecf20Sopenharmony_ci		/* readpos and safepos are worst-case calculations.
60158c2ecf20Sopenharmony_ci		 * A negative number is overly pessimistic, and causes
60168c2ecf20Sopenharmony_ci		 * obvious problems for unsigned storage.  So clip to 0.
60178c2ecf20Sopenharmony_ci		 */
60188c2ecf20Sopenharmony_ci		readpos -= min_t(sector_t, reshape_sectors, readpos);
60198c2ecf20Sopenharmony_ci		safepos -= min_t(sector_t, reshape_sectors, safepos);
60208c2ecf20Sopenharmony_ci	}
60218c2ecf20Sopenharmony_ci
60228c2ecf20Sopenharmony_ci	/* Having calculated the 'writepos' possibly use it
60238c2ecf20Sopenharmony_ci	 * to set 'stripe_addr' which is where we will write to.
60248c2ecf20Sopenharmony_ci	 */
60258c2ecf20Sopenharmony_ci	if (mddev->reshape_backwards) {
60268c2ecf20Sopenharmony_ci		BUG_ON(conf->reshape_progress == 0);
60278c2ecf20Sopenharmony_ci		stripe_addr = writepos;
60288c2ecf20Sopenharmony_ci		BUG_ON((mddev->dev_sectors &
60298c2ecf20Sopenharmony_ci			~((sector_t)reshape_sectors - 1))
60308c2ecf20Sopenharmony_ci		       - reshape_sectors - stripe_addr
60318c2ecf20Sopenharmony_ci		       != sector_nr);
60328c2ecf20Sopenharmony_ci	} else {
60338c2ecf20Sopenharmony_ci		BUG_ON(writepos != sector_nr + reshape_sectors);
60348c2ecf20Sopenharmony_ci		stripe_addr = sector_nr;
60358c2ecf20Sopenharmony_ci	}
60368c2ecf20Sopenharmony_ci
60378c2ecf20Sopenharmony_ci	/* 'writepos' is the most advanced device address we might write.
60388c2ecf20Sopenharmony_ci	 * 'readpos' is the least advanced device address we might read.
60398c2ecf20Sopenharmony_ci	 * 'safepos' is the least address recorded in the metadata as having
60408c2ecf20Sopenharmony_ci	 *     been reshaped.
60418c2ecf20Sopenharmony_ci	 * If there is a min_offset_diff, these are adjusted either by
60428c2ecf20Sopenharmony_ci	 * increasing the safepos/readpos if diff is negative, or
60438c2ecf20Sopenharmony_ci	 * increasing writepos if diff is positive.
60448c2ecf20Sopenharmony_ci	 * If 'readpos' is then behind 'writepos', there is no way that we can
60458c2ecf20Sopenharmony_ci	 * ensure safety in the face of a crash - that must be done by userspace
60468c2ecf20Sopenharmony_ci	 * making a backup of the data.  So in that case there is no particular
60478c2ecf20Sopenharmony_ci	 * rush to update metadata.
60488c2ecf20Sopenharmony_ci	 * Otherwise if 'safepos' is behind 'writepos', then we really need to
60498c2ecf20Sopenharmony_ci	 * update the metadata to advance 'safepos' to match 'readpos' so that
60508c2ecf20Sopenharmony_ci	 * we can be safe in the event of a crash.
60518c2ecf20Sopenharmony_ci	 * So we insist on updating metadata if safepos is behind writepos and
60528c2ecf20Sopenharmony_ci	 * readpos is beyond writepos.
60538c2ecf20Sopenharmony_ci	 * In any case, update the metadata every 10 seconds.
60548c2ecf20Sopenharmony_ci	 * Maybe that number should be configurable, but I'm not sure it is
60558c2ecf20Sopenharmony_ci	 * worth it.... maybe it could be a multiple of safemode_delay???
60568c2ecf20Sopenharmony_ci	 */
60578c2ecf20Sopenharmony_ci	if (conf->min_offset_diff < 0) {
60588c2ecf20Sopenharmony_ci		safepos += -conf->min_offset_diff;
60598c2ecf20Sopenharmony_ci		readpos += -conf->min_offset_diff;
60608c2ecf20Sopenharmony_ci	} else
60618c2ecf20Sopenharmony_ci		writepos += conf->min_offset_diff;
60628c2ecf20Sopenharmony_ci
60638c2ecf20Sopenharmony_ci	if ((mddev->reshape_backwards
60648c2ecf20Sopenharmony_ci	     ? (safepos > writepos && readpos < writepos)
60658c2ecf20Sopenharmony_ci	     : (safepos < writepos && readpos > writepos)) ||
60668c2ecf20Sopenharmony_ci	    time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
60678c2ecf20Sopenharmony_ci		/* Cannot proceed until we've updated the superblock... */
60688c2ecf20Sopenharmony_ci		wait_event(conf->wait_for_overlap,
60698c2ecf20Sopenharmony_ci			   atomic_read(&conf->reshape_stripes)==0
60708c2ecf20Sopenharmony_ci			   || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
60718c2ecf20Sopenharmony_ci		if (atomic_read(&conf->reshape_stripes) != 0)
60728c2ecf20Sopenharmony_ci			return 0;
60738c2ecf20Sopenharmony_ci		mddev->reshape_position = conf->reshape_progress;
60748c2ecf20Sopenharmony_ci		mddev->curr_resync_completed = sector_nr;
60758c2ecf20Sopenharmony_ci		if (!mddev->reshape_backwards)
60768c2ecf20Sopenharmony_ci			/* Can update recovery_offset */
60778c2ecf20Sopenharmony_ci			rdev_for_each(rdev, mddev)
60788c2ecf20Sopenharmony_ci				if (rdev->raid_disk >= 0 &&
60798c2ecf20Sopenharmony_ci				    !test_bit(Journal, &rdev->flags) &&
60808c2ecf20Sopenharmony_ci				    !test_bit(In_sync, &rdev->flags) &&
60818c2ecf20Sopenharmony_ci				    rdev->recovery_offset < sector_nr)
60828c2ecf20Sopenharmony_ci					rdev->recovery_offset = sector_nr;
60838c2ecf20Sopenharmony_ci
60848c2ecf20Sopenharmony_ci		conf->reshape_checkpoint = jiffies;
60858c2ecf20Sopenharmony_ci		set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
60868c2ecf20Sopenharmony_ci		md_wakeup_thread(mddev->thread);
60878c2ecf20Sopenharmony_ci		wait_event(mddev->sb_wait, mddev->sb_flags == 0 ||
60888c2ecf20Sopenharmony_ci			   test_bit(MD_RECOVERY_INTR, &mddev->recovery));
60898c2ecf20Sopenharmony_ci		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
60908c2ecf20Sopenharmony_ci			return 0;
60918c2ecf20Sopenharmony_ci		spin_lock_irq(&conf->device_lock);
60928c2ecf20Sopenharmony_ci		conf->reshape_safe = mddev->reshape_position;
60938c2ecf20Sopenharmony_ci		spin_unlock_irq(&conf->device_lock);
60948c2ecf20Sopenharmony_ci		wake_up(&conf->wait_for_overlap);
60958c2ecf20Sopenharmony_ci		sysfs_notify_dirent_safe(mddev->sysfs_completed);
60968c2ecf20Sopenharmony_ci	}
60978c2ecf20Sopenharmony_ci
60988c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&stripes);
60998c2ecf20Sopenharmony_ci	for (i = 0; i < reshape_sectors; i += RAID5_STRIPE_SECTORS(conf)) {
61008c2ecf20Sopenharmony_ci		int j;
61018c2ecf20Sopenharmony_ci		int skipped_disk = 0;
61028c2ecf20Sopenharmony_ci		sh = raid5_get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
61038c2ecf20Sopenharmony_ci		set_bit(STRIPE_EXPANDING, &sh->state);
61048c2ecf20Sopenharmony_ci		atomic_inc(&conf->reshape_stripes);
61058c2ecf20Sopenharmony_ci		/* If any of this stripe is beyond the end of the old
61068c2ecf20Sopenharmony_ci		 * array, then we need to zero those blocks
61078c2ecf20Sopenharmony_ci		 */
61088c2ecf20Sopenharmony_ci		for (j=sh->disks; j--;) {
61098c2ecf20Sopenharmony_ci			sector_t s;
61108c2ecf20Sopenharmony_ci			if (j == sh->pd_idx)
61118c2ecf20Sopenharmony_ci				continue;
61128c2ecf20Sopenharmony_ci			if (conf->level == 6 &&
61138c2ecf20Sopenharmony_ci			    j == sh->qd_idx)
61148c2ecf20Sopenharmony_ci				continue;
61158c2ecf20Sopenharmony_ci			s = raid5_compute_blocknr(sh, j, 0);
61168c2ecf20Sopenharmony_ci			if (s < raid5_size(mddev, 0, 0)) {
61178c2ecf20Sopenharmony_ci				skipped_disk = 1;
61188c2ecf20Sopenharmony_ci				continue;
61198c2ecf20Sopenharmony_ci			}
61208c2ecf20Sopenharmony_ci			memset(page_address(sh->dev[j].page), 0, RAID5_STRIPE_SIZE(conf));
61218c2ecf20Sopenharmony_ci			set_bit(R5_Expanded, &sh->dev[j].flags);
61228c2ecf20Sopenharmony_ci			set_bit(R5_UPTODATE, &sh->dev[j].flags);
61238c2ecf20Sopenharmony_ci		}
61248c2ecf20Sopenharmony_ci		if (!skipped_disk) {
61258c2ecf20Sopenharmony_ci			set_bit(STRIPE_EXPAND_READY, &sh->state);
61268c2ecf20Sopenharmony_ci			set_bit(STRIPE_HANDLE, &sh->state);
61278c2ecf20Sopenharmony_ci		}
61288c2ecf20Sopenharmony_ci		list_add(&sh->lru, &stripes);
61298c2ecf20Sopenharmony_ci	}
61308c2ecf20Sopenharmony_ci	spin_lock_irq(&conf->device_lock);
61318c2ecf20Sopenharmony_ci	if (mddev->reshape_backwards)
61328c2ecf20Sopenharmony_ci		conf->reshape_progress -= reshape_sectors * new_data_disks;
61338c2ecf20Sopenharmony_ci	else
61348c2ecf20Sopenharmony_ci		conf->reshape_progress += reshape_sectors * new_data_disks;
61358c2ecf20Sopenharmony_ci	spin_unlock_irq(&conf->device_lock);
61368c2ecf20Sopenharmony_ci	/* Ok, those stripe are ready. We can start scheduling
61378c2ecf20Sopenharmony_ci	 * reads on the source stripes.
61388c2ecf20Sopenharmony_ci	 * The source stripes are determined by mapping the first and last
61398c2ecf20Sopenharmony_ci	 * block on the destination stripes.
61408c2ecf20Sopenharmony_ci	 */
61418c2ecf20Sopenharmony_ci	first_sector =
61428c2ecf20Sopenharmony_ci		raid5_compute_sector(conf, stripe_addr*(new_data_disks),
61438c2ecf20Sopenharmony_ci				     1, &dd_idx, NULL);
61448c2ecf20Sopenharmony_ci	last_sector =
61458c2ecf20Sopenharmony_ci		raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
61468c2ecf20Sopenharmony_ci					    * new_data_disks - 1),
61478c2ecf20Sopenharmony_ci				     1, &dd_idx, NULL);
61488c2ecf20Sopenharmony_ci	if (last_sector >= mddev->dev_sectors)
61498c2ecf20Sopenharmony_ci		last_sector = mddev->dev_sectors - 1;
61508c2ecf20Sopenharmony_ci	while (first_sector <= last_sector) {
61518c2ecf20Sopenharmony_ci		sh = raid5_get_active_stripe(conf, first_sector, 1, 0, 1);
61528c2ecf20Sopenharmony_ci		set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
61538c2ecf20Sopenharmony_ci		set_bit(STRIPE_HANDLE, &sh->state);
61548c2ecf20Sopenharmony_ci		raid5_release_stripe(sh);
61558c2ecf20Sopenharmony_ci		first_sector += RAID5_STRIPE_SECTORS(conf);
61568c2ecf20Sopenharmony_ci	}
61578c2ecf20Sopenharmony_ci	/* Now that the sources are clearly marked, we can release
61588c2ecf20Sopenharmony_ci	 * the destination stripes
61598c2ecf20Sopenharmony_ci	 */
61608c2ecf20Sopenharmony_ci	while (!list_empty(&stripes)) {
61618c2ecf20Sopenharmony_ci		sh = list_entry(stripes.next, struct stripe_head, lru);
61628c2ecf20Sopenharmony_ci		list_del_init(&sh->lru);
61638c2ecf20Sopenharmony_ci		raid5_release_stripe(sh);
61648c2ecf20Sopenharmony_ci	}
61658c2ecf20Sopenharmony_ci	/* If this takes us to the resync_max point where we have to pause,
61668c2ecf20Sopenharmony_ci	 * then we need to write out the superblock.
61678c2ecf20Sopenharmony_ci	 */
61688c2ecf20Sopenharmony_ci	sector_nr += reshape_sectors;
61698c2ecf20Sopenharmony_ci	retn = reshape_sectors;
61708c2ecf20Sopenharmony_cifinish:
61718c2ecf20Sopenharmony_ci	if (mddev->curr_resync_completed > mddev->resync_max ||
61728c2ecf20Sopenharmony_ci	    (sector_nr - mddev->curr_resync_completed) * 2
61738c2ecf20Sopenharmony_ci	    >= mddev->resync_max - mddev->curr_resync_completed) {
61748c2ecf20Sopenharmony_ci		/* Cannot proceed until we've updated the superblock... */
61758c2ecf20Sopenharmony_ci		wait_event(conf->wait_for_overlap,
61768c2ecf20Sopenharmony_ci			   atomic_read(&conf->reshape_stripes) == 0
61778c2ecf20Sopenharmony_ci			   || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
61788c2ecf20Sopenharmony_ci		if (atomic_read(&conf->reshape_stripes) != 0)
61798c2ecf20Sopenharmony_ci			goto ret;
61808c2ecf20Sopenharmony_ci		mddev->reshape_position = conf->reshape_progress;
61818c2ecf20Sopenharmony_ci		mddev->curr_resync_completed = sector_nr;
61828c2ecf20Sopenharmony_ci		if (!mddev->reshape_backwards)
61838c2ecf20Sopenharmony_ci			/* Can update recovery_offset */
61848c2ecf20Sopenharmony_ci			rdev_for_each(rdev, mddev)
61858c2ecf20Sopenharmony_ci				if (rdev->raid_disk >= 0 &&
61868c2ecf20Sopenharmony_ci				    !test_bit(Journal, &rdev->flags) &&
61878c2ecf20Sopenharmony_ci				    !test_bit(In_sync, &rdev->flags) &&
61888c2ecf20Sopenharmony_ci				    rdev->recovery_offset < sector_nr)
61898c2ecf20Sopenharmony_ci					rdev->recovery_offset = sector_nr;
61908c2ecf20Sopenharmony_ci		conf->reshape_checkpoint = jiffies;
61918c2ecf20Sopenharmony_ci		set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
61928c2ecf20Sopenharmony_ci		md_wakeup_thread(mddev->thread);
61938c2ecf20Sopenharmony_ci		wait_event(mddev->sb_wait,
61948c2ecf20Sopenharmony_ci			   !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)
61958c2ecf20Sopenharmony_ci			   || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
61968c2ecf20Sopenharmony_ci		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
61978c2ecf20Sopenharmony_ci			goto ret;
61988c2ecf20Sopenharmony_ci		spin_lock_irq(&conf->device_lock);
61998c2ecf20Sopenharmony_ci		conf->reshape_safe = mddev->reshape_position;
62008c2ecf20Sopenharmony_ci		spin_unlock_irq(&conf->device_lock);
62018c2ecf20Sopenharmony_ci		wake_up(&conf->wait_for_overlap);
62028c2ecf20Sopenharmony_ci		sysfs_notify_dirent_safe(mddev->sysfs_completed);
62038c2ecf20Sopenharmony_ci	}
62048c2ecf20Sopenharmony_ciret:
62058c2ecf20Sopenharmony_ci	return retn;
62068c2ecf20Sopenharmony_ci}
62078c2ecf20Sopenharmony_ci
62088c2ecf20Sopenharmony_cistatic inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_nr,
62098c2ecf20Sopenharmony_ci					  int *skipped)
62108c2ecf20Sopenharmony_ci{
62118c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
62128c2ecf20Sopenharmony_ci	struct stripe_head *sh;
62138c2ecf20Sopenharmony_ci	sector_t max_sector = mddev->dev_sectors;
62148c2ecf20Sopenharmony_ci	sector_t sync_blocks;
62158c2ecf20Sopenharmony_ci	int still_degraded = 0;
62168c2ecf20Sopenharmony_ci	int i;
62178c2ecf20Sopenharmony_ci
62188c2ecf20Sopenharmony_ci	if (sector_nr >= max_sector) {
62198c2ecf20Sopenharmony_ci		/* just being told to finish up .. nothing much to do */
62208c2ecf20Sopenharmony_ci
62218c2ecf20Sopenharmony_ci		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
62228c2ecf20Sopenharmony_ci			end_reshape(conf);
62238c2ecf20Sopenharmony_ci			return 0;
62248c2ecf20Sopenharmony_ci		}
62258c2ecf20Sopenharmony_ci
62268c2ecf20Sopenharmony_ci		if (mddev->curr_resync < max_sector) /* aborted */
62278c2ecf20Sopenharmony_ci			md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
62288c2ecf20Sopenharmony_ci					   &sync_blocks, 1);
62298c2ecf20Sopenharmony_ci		else /* completed sync */
62308c2ecf20Sopenharmony_ci			conf->fullsync = 0;
62318c2ecf20Sopenharmony_ci		md_bitmap_close_sync(mddev->bitmap);
62328c2ecf20Sopenharmony_ci
62338c2ecf20Sopenharmony_ci		return 0;
62348c2ecf20Sopenharmony_ci	}
62358c2ecf20Sopenharmony_ci
62368c2ecf20Sopenharmony_ci	/* Allow raid5_quiesce to complete */
62378c2ecf20Sopenharmony_ci	wait_event(conf->wait_for_overlap, conf->quiesce != 2);
62388c2ecf20Sopenharmony_ci
62398c2ecf20Sopenharmony_ci	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
62408c2ecf20Sopenharmony_ci		return reshape_request(mddev, sector_nr, skipped);
62418c2ecf20Sopenharmony_ci
62428c2ecf20Sopenharmony_ci	/* No need to check resync_max as we never do more than one
62438c2ecf20Sopenharmony_ci	 * stripe, and as resync_max will always be on a chunk boundary,
62448c2ecf20Sopenharmony_ci	 * if the check in md_do_sync didn't fire, there is no chance
62458c2ecf20Sopenharmony_ci	 * of overstepping resync_max here
62468c2ecf20Sopenharmony_ci	 */
62478c2ecf20Sopenharmony_ci
62488c2ecf20Sopenharmony_ci	/* if there is too many failed drives and we are trying
62498c2ecf20Sopenharmony_ci	 * to resync, then assert that we are finished, because there is
62508c2ecf20Sopenharmony_ci	 * nothing we can do.
62518c2ecf20Sopenharmony_ci	 */
62528c2ecf20Sopenharmony_ci	if (mddev->degraded >= conf->max_degraded &&
62538c2ecf20Sopenharmony_ci	    test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
62548c2ecf20Sopenharmony_ci		sector_t rv = mddev->dev_sectors - sector_nr;
62558c2ecf20Sopenharmony_ci		*skipped = 1;
62568c2ecf20Sopenharmony_ci		return rv;
62578c2ecf20Sopenharmony_ci	}
62588c2ecf20Sopenharmony_ci	if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
62598c2ecf20Sopenharmony_ci	    !conf->fullsync &&
62608c2ecf20Sopenharmony_ci	    !md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
62618c2ecf20Sopenharmony_ci	    sync_blocks >= RAID5_STRIPE_SECTORS(conf)) {
62628c2ecf20Sopenharmony_ci		/* we can skip this block, and probably more */
62638c2ecf20Sopenharmony_ci		do_div(sync_blocks, RAID5_STRIPE_SECTORS(conf));
62648c2ecf20Sopenharmony_ci		*skipped = 1;
62658c2ecf20Sopenharmony_ci		/* keep things rounded to whole stripes */
62668c2ecf20Sopenharmony_ci		return sync_blocks * RAID5_STRIPE_SECTORS(conf);
62678c2ecf20Sopenharmony_ci	}
62688c2ecf20Sopenharmony_ci
62698c2ecf20Sopenharmony_ci	md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false);
62708c2ecf20Sopenharmony_ci
62718c2ecf20Sopenharmony_ci	sh = raid5_get_active_stripe(conf, sector_nr, 0, 1, 0);
62728c2ecf20Sopenharmony_ci	if (sh == NULL) {
62738c2ecf20Sopenharmony_ci		sh = raid5_get_active_stripe(conf, sector_nr, 0, 0, 0);
62748c2ecf20Sopenharmony_ci		/* make sure we don't swamp the stripe cache if someone else
62758c2ecf20Sopenharmony_ci		 * is trying to get access
62768c2ecf20Sopenharmony_ci		 */
62778c2ecf20Sopenharmony_ci		schedule_timeout_uninterruptible(1);
62788c2ecf20Sopenharmony_ci	}
62798c2ecf20Sopenharmony_ci	/* Need to check if array will still be degraded after recovery/resync
62808c2ecf20Sopenharmony_ci	 * Note in case of > 1 drive failures it's possible we're rebuilding
62818c2ecf20Sopenharmony_ci	 * one drive while leaving another faulty drive in array.
62828c2ecf20Sopenharmony_ci	 */
62838c2ecf20Sopenharmony_ci	rcu_read_lock();
62848c2ecf20Sopenharmony_ci	for (i = 0; i < conf->raid_disks; i++) {
62858c2ecf20Sopenharmony_ci		struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
62868c2ecf20Sopenharmony_ci
62878c2ecf20Sopenharmony_ci		if (rdev == NULL || test_bit(Faulty, &rdev->flags))
62888c2ecf20Sopenharmony_ci			still_degraded = 1;
62898c2ecf20Sopenharmony_ci	}
62908c2ecf20Sopenharmony_ci	rcu_read_unlock();
62918c2ecf20Sopenharmony_ci
62928c2ecf20Sopenharmony_ci	md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
62938c2ecf20Sopenharmony_ci
62948c2ecf20Sopenharmony_ci	set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
62958c2ecf20Sopenharmony_ci	set_bit(STRIPE_HANDLE, &sh->state);
62968c2ecf20Sopenharmony_ci
62978c2ecf20Sopenharmony_ci	raid5_release_stripe(sh);
62988c2ecf20Sopenharmony_ci
62998c2ecf20Sopenharmony_ci	return RAID5_STRIPE_SECTORS(conf);
63008c2ecf20Sopenharmony_ci}
63018c2ecf20Sopenharmony_ci
63028c2ecf20Sopenharmony_cistatic int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio,
63038c2ecf20Sopenharmony_ci			       unsigned int offset)
63048c2ecf20Sopenharmony_ci{
63058c2ecf20Sopenharmony_ci	/* We may not be able to submit a whole bio at once as there
63068c2ecf20Sopenharmony_ci	 * may not be enough stripe_heads available.
63078c2ecf20Sopenharmony_ci	 * We cannot pre-allocate enough stripe_heads as we may need
63088c2ecf20Sopenharmony_ci	 * more than exist in the cache (if we allow ever large chunks).
63098c2ecf20Sopenharmony_ci	 * So we do one stripe head at a time and record in
63108c2ecf20Sopenharmony_ci	 * ->bi_hw_segments how many have been done.
63118c2ecf20Sopenharmony_ci	 *
63128c2ecf20Sopenharmony_ci	 * We *know* that this entire raid_bio is in one chunk, so
63138c2ecf20Sopenharmony_ci	 * it will be only one 'dd_idx' and only need one call to raid5_compute_sector.
63148c2ecf20Sopenharmony_ci	 */
63158c2ecf20Sopenharmony_ci	struct stripe_head *sh;
63168c2ecf20Sopenharmony_ci	int dd_idx;
63178c2ecf20Sopenharmony_ci	sector_t sector, logical_sector, last_sector;
63188c2ecf20Sopenharmony_ci	int scnt = 0;
63198c2ecf20Sopenharmony_ci	int handled = 0;
63208c2ecf20Sopenharmony_ci
63218c2ecf20Sopenharmony_ci	logical_sector = raid_bio->bi_iter.bi_sector &
63228c2ecf20Sopenharmony_ci		~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
63238c2ecf20Sopenharmony_ci	sector = raid5_compute_sector(conf, logical_sector,
63248c2ecf20Sopenharmony_ci				      0, &dd_idx, NULL);
63258c2ecf20Sopenharmony_ci	last_sector = bio_end_sector(raid_bio);
63268c2ecf20Sopenharmony_ci
63278c2ecf20Sopenharmony_ci	for (; logical_sector < last_sector;
63288c2ecf20Sopenharmony_ci	     logical_sector += RAID5_STRIPE_SECTORS(conf),
63298c2ecf20Sopenharmony_ci		     sector += RAID5_STRIPE_SECTORS(conf),
63308c2ecf20Sopenharmony_ci		     scnt++) {
63318c2ecf20Sopenharmony_ci
63328c2ecf20Sopenharmony_ci		if (scnt < offset)
63338c2ecf20Sopenharmony_ci			/* already done this stripe */
63348c2ecf20Sopenharmony_ci			continue;
63358c2ecf20Sopenharmony_ci
63368c2ecf20Sopenharmony_ci		sh = raid5_get_active_stripe(conf, sector, 0, 1, 1);
63378c2ecf20Sopenharmony_ci
63388c2ecf20Sopenharmony_ci		if (!sh) {
63398c2ecf20Sopenharmony_ci			/* failed to get a stripe - must wait */
63408c2ecf20Sopenharmony_ci			conf->retry_read_aligned = raid_bio;
63418c2ecf20Sopenharmony_ci			conf->retry_read_offset = scnt;
63428c2ecf20Sopenharmony_ci			return handled;
63438c2ecf20Sopenharmony_ci		}
63448c2ecf20Sopenharmony_ci
63458c2ecf20Sopenharmony_ci		if (!add_stripe_bio(sh, raid_bio, dd_idx, 0, 0)) {
63468c2ecf20Sopenharmony_ci			raid5_release_stripe(sh);
63478c2ecf20Sopenharmony_ci			conf->retry_read_aligned = raid_bio;
63488c2ecf20Sopenharmony_ci			conf->retry_read_offset = scnt;
63498c2ecf20Sopenharmony_ci			return handled;
63508c2ecf20Sopenharmony_ci		}
63518c2ecf20Sopenharmony_ci
63528c2ecf20Sopenharmony_ci		set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags);
63538c2ecf20Sopenharmony_ci		handle_stripe(sh);
63548c2ecf20Sopenharmony_ci		raid5_release_stripe(sh);
63558c2ecf20Sopenharmony_ci		handled++;
63568c2ecf20Sopenharmony_ci	}
63578c2ecf20Sopenharmony_ci
63588c2ecf20Sopenharmony_ci	bio_endio(raid_bio);
63598c2ecf20Sopenharmony_ci
63608c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&conf->active_aligned_reads))
63618c2ecf20Sopenharmony_ci		wake_up(&conf->wait_for_quiescent);
63628c2ecf20Sopenharmony_ci	return handled;
63638c2ecf20Sopenharmony_ci}
63648c2ecf20Sopenharmony_ci
63658c2ecf20Sopenharmony_cistatic int handle_active_stripes(struct r5conf *conf, int group,
63668c2ecf20Sopenharmony_ci				 struct r5worker *worker,
63678c2ecf20Sopenharmony_ci				 struct list_head *temp_inactive_list)
63688c2ecf20Sopenharmony_ci		__releases(&conf->device_lock)
63698c2ecf20Sopenharmony_ci		__acquires(&conf->device_lock)
63708c2ecf20Sopenharmony_ci{
63718c2ecf20Sopenharmony_ci	struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
63728c2ecf20Sopenharmony_ci	int i, batch_size = 0, hash;
63738c2ecf20Sopenharmony_ci	bool release_inactive = false;
63748c2ecf20Sopenharmony_ci
63758c2ecf20Sopenharmony_ci	while (batch_size < MAX_STRIPE_BATCH &&
63768c2ecf20Sopenharmony_ci			(sh = __get_priority_stripe(conf, group)) != NULL)
63778c2ecf20Sopenharmony_ci		batch[batch_size++] = sh;
63788c2ecf20Sopenharmony_ci
63798c2ecf20Sopenharmony_ci	if (batch_size == 0) {
63808c2ecf20Sopenharmony_ci		for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
63818c2ecf20Sopenharmony_ci			if (!list_empty(temp_inactive_list + i))
63828c2ecf20Sopenharmony_ci				break;
63838c2ecf20Sopenharmony_ci		if (i == NR_STRIPE_HASH_LOCKS) {
63848c2ecf20Sopenharmony_ci			spin_unlock_irq(&conf->device_lock);
63858c2ecf20Sopenharmony_ci			log_flush_stripe_to_raid(conf);
63868c2ecf20Sopenharmony_ci			spin_lock_irq(&conf->device_lock);
63878c2ecf20Sopenharmony_ci			return batch_size;
63888c2ecf20Sopenharmony_ci		}
63898c2ecf20Sopenharmony_ci		release_inactive = true;
63908c2ecf20Sopenharmony_ci	}
63918c2ecf20Sopenharmony_ci	spin_unlock_irq(&conf->device_lock);
63928c2ecf20Sopenharmony_ci
63938c2ecf20Sopenharmony_ci	release_inactive_stripe_list(conf, temp_inactive_list,
63948c2ecf20Sopenharmony_ci				     NR_STRIPE_HASH_LOCKS);
63958c2ecf20Sopenharmony_ci
63968c2ecf20Sopenharmony_ci	r5l_flush_stripe_to_raid(conf->log);
63978c2ecf20Sopenharmony_ci	if (release_inactive) {
63988c2ecf20Sopenharmony_ci		spin_lock_irq(&conf->device_lock);
63998c2ecf20Sopenharmony_ci		return 0;
64008c2ecf20Sopenharmony_ci	}
64018c2ecf20Sopenharmony_ci
64028c2ecf20Sopenharmony_ci	for (i = 0; i < batch_size; i++)
64038c2ecf20Sopenharmony_ci		handle_stripe(batch[i]);
64048c2ecf20Sopenharmony_ci	log_write_stripe_run(conf);
64058c2ecf20Sopenharmony_ci
64068c2ecf20Sopenharmony_ci	cond_resched();
64078c2ecf20Sopenharmony_ci
64088c2ecf20Sopenharmony_ci	spin_lock_irq(&conf->device_lock);
64098c2ecf20Sopenharmony_ci	for (i = 0; i < batch_size; i++) {
64108c2ecf20Sopenharmony_ci		hash = batch[i]->hash_lock_index;
64118c2ecf20Sopenharmony_ci		__release_stripe(conf, batch[i], &temp_inactive_list[hash]);
64128c2ecf20Sopenharmony_ci	}
64138c2ecf20Sopenharmony_ci	return batch_size;
64148c2ecf20Sopenharmony_ci}
64158c2ecf20Sopenharmony_ci
64168c2ecf20Sopenharmony_cistatic void raid5_do_work(struct work_struct *work)
64178c2ecf20Sopenharmony_ci{
64188c2ecf20Sopenharmony_ci	struct r5worker *worker = container_of(work, struct r5worker, work);
64198c2ecf20Sopenharmony_ci	struct r5worker_group *group = worker->group;
64208c2ecf20Sopenharmony_ci	struct r5conf *conf = group->conf;
64218c2ecf20Sopenharmony_ci	struct mddev *mddev = conf->mddev;
64228c2ecf20Sopenharmony_ci	int group_id = group - conf->worker_groups;
64238c2ecf20Sopenharmony_ci	int handled;
64248c2ecf20Sopenharmony_ci	struct blk_plug plug;
64258c2ecf20Sopenharmony_ci
64268c2ecf20Sopenharmony_ci	pr_debug("+++ raid5worker active\n");
64278c2ecf20Sopenharmony_ci
64288c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
64298c2ecf20Sopenharmony_ci	handled = 0;
64308c2ecf20Sopenharmony_ci	spin_lock_irq(&conf->device_lock);
64318c2ecf20Sopenharmony_ci	while (1) {
64328c2ecf20Sopenharmony_ci		int batch_size, released;
64338c2ecf20Sopenharmony_ci
64348c2ecf20Sopenharmony_ci		released = release_stripe_list(conf, worker->temp_inactive_list);
64358c2ecf20Sopenharmony_ci
64368c2ecf20Sopenharmony_ci		batch_size = handle_active_stripes(conf, group_id, worker,
64378c2ecf20Sopenharmony_ci						   worker->temp_inactive_list);
64388c2ecf20Sopenharmony_ci		worker->working = false;
64398c2ecf20Sopenharmony_ci		if (!batch_size && !released)
64408c2ecf20Sopenharmony_ci			break;
64418c2ecf20Sopenharmony_ci		handled += batch_size;
64428c2ecf20Sopenharmony_ci		wait_event_lock_irq(mddev->sb_wait,
64438c2ecf20Sopenharmony_ci			!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
64448c2ecf20Sopenharmony_ci			conf->device_lock);
64458c2ecf20Sopenharmony_ci	}
64468c2ecf20Sopenharmony_ci	pr_debug("%d stripes handled\n", handled);
64478c2ecf20Sopenharmony_ci
64488c2ecf20Sopenharmony_ci	spin_unlock_irq(&conf->device_lock);
64498c2ecf20Sopenharmony_ci
64508c2ecf20Sopenharmony_ci	flush_deferred_bios(conf);
64518c2ecf20Sopenharmony_ci
64528c2ecf20Sopenharmony_ci	r5l_flush_stripe_to_raid(conf->log);
64538c2ecf20Sopenharmony_ci
64548c2ecf20Sopenharmony_ci	async_tx_issue_pending_all();
64558c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
64568c2ecf20Sopenharmony_ci
64578c2ecf20Sopenharmony_ci	pr_debug("--- raid5worker inactive\n");
64588c2ecf20Sopenharmony_ci}
64598c2ecf20Sopenharmony_ci
64608c2ecf20Sopenharmony_ci/*
64618c2ecf20Sopenharmony_ci * This is our raid5 kernel thread.
64628c2ecf20Sopenharmony_ci *
64638c2ecf20Sopenharmony_ci * We scan the hash table for stripes which can be handled now.
64648c2ecf20Sopenharmony_ci * During the scan, completed stripes are saved for us by the interrupt
64658c2ecf20Sopenharmony_ci * handler, so that they will not have to wait for our next wakeup.
64668c2ecf20Sopenharmony_ci */
64678c2ecf20Sopenharmony_cistatic void raid5d(struct md_thread *thread)
64688c2ecf20Sopenharmony_ci{
64698c2ecf20Sopenharmony_ci	struct mddev *mddev = thread->mddev;
64708c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
64718c2ecf20Sopenharmony_ci	int handled;
64728c2ecf20Sopenharmony_ci	struct blk_plug plug;
64738c2ecf20Sopenharmony_ci
64748c2ecf20Sopenharmony_ci	pr_debug("+++ raid5d active\n");
64758c2ecf20Sopenharmony_ci
64768c2ecf20Sopenharmony_ci	md_check_recovery(mddev);
64778c2ecf20Sopenharmony_ci
64788c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
64798c2ecf20Sopenharmony_ci	handled = 0;
64808c2ecf20Sopenharmony_ci	spin_lock_irq(&conf->device_lock);
64818c2ecf20Sopenharmony_ci	while (1) {
64828c2ecf20Sopenharmony_ci		struct bio *bio;
64838c2ecf20Sopenharmony_ci		int batch_size, released;
64848c2ecf20Sopenharmony_ci		unsigned int offset;
64858c2ecf20Sopenharmony_ci
64868c2ecf20Sopenharmony_ci		released = release_stripe_list(conf, conf->temp_inactive_list);
64878c2ecf20Sopenharmony_ci		if (released)
64888c2ecf20Sopenharmony_ci			clear_bit(R5_DID_ALLOC, &conf->cache_state);
64898c2ecf20Sopenharmony_ci
64908c2ecf20Sopenharmony_ci		if (
64918c2ecf20Sopenharmony_ci		    !list_empty(&conf->bitmap_list)) {
64928c2ecf20Sopenharmony_ci			/* Now is a good time to flush some bitmap updates */
64938c2ecf20Sopenharmony_ci			conf->seq_flush++;
64948c2ecf20Sopenharmony_ci			spin_unlock_irq(&conf->device_lock);
64958c2ecf20Sopenharmony_ci			md_bitmap_unplug(mddev->bitmap);
64968c2ecf20Sopenharmony_ci			spin_lock_irq(&conf->device_lock);
64978c2ecf20Sopenharmony_ci			conf->seq_write = conf->seq_flush;
64988c2ecf20Sopenharmony_ci			activate_bit_delay(conf, conf->temp_inactive_list);
64998c2ecf20Sopenharmony_ci		}
65008c2ecf20Sopenharmony_ci		raid5_activate_delayed(conf);
65018c2ecf20Sopenharmony_ci
65028c2ecf20Sopenharmony_ci		while ((bio = remove_bio_from_retry(conf, &offset))) {
65038c2ecf20Sopenharmony_ci			int ok;
65048c2ecf20Sopenharmony_ci			spin_unlock_irq(&conf->device_lock);
65058c2ecf20Sopenharmony_ci			ok = retry_aligned_read(conf, bio, offset);
65068c2ecf20Sopenharmony_ci			spin_lock_irq(&conf->device_lock);
65078c2ecf20Sopenharmony_ci			if (!ok)
65088c2ecf20Sopenharmony_ci				break;
65098c2ecf20Sopenharmony_ci			handled++;
65108c2ecf20Sopenharmony_ci		}
65118c2ecf20Sopenharmony_ci
65128c2ecf20Sopenharmony_ci		batch_size = handle_active_stripes(conf, ANY_GROUP, NULL,
65138c2ecf20Sopenharmony_ci						   conf->temp_inactive_list);
65148c2ecf20Sopenharmony_ci		if (!batch_size && !released)
65158c2ecf20Sopenharmony_ci			break;
65168c2ecf20Sopenharmony_ci		handled += batch_size;
65178c2ecf20Sopenharmony_ci
65188c2ecf20Sopenharmony_ci		if (mddev->sb_flags & ~(1 << MD_SB_CHANGE_PENDING)) {
65198c2ecf20Sopenharmony_ci			spin_unlock_irq(&conf->device_lock);
65208c2ecf20Sopenharmony_ci			md_check_recovery(mddev);
65218c2ecf20Sopenharmony_ci			spin_lock_irq(&conf->device_lock);
65228c2ecf20Sopenharmony_ci		}
65238c2ecf20Sopenharmony_ci	}
65248c2ecf20Sopenharmony_ci	pr_debug("%d stripes handled\n", handled);
65258c2ecf20Sopenharmony_ci
65268c2ecf20Sopenharmony_ci	spin_unlock_irq(&conf->device_lock);
65278c2ecf20Sopenharmony_ci	if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) &&
65288c2ecf20Sopenharmony_ci	    mutex_trylock(&conf->cache_size_mutex)) {
65298c2ecf20Sopenharmony_ci		grow_one_stripe(conf, __GFP_NOWARN);
65308c2ecf20Sopenharmony_ci		/* Set flag even if allocation failed.  This helps
65318c2ecf20Sopenharmony_ci		 * slow down allocation requests when mem is short
65328c2ecf20Sopenharmony_ci		 */
65338c2ecf20Sopenharmony_ci		set_bit(R5_DID_ALLOC, &conf->cache_state);
65348c2ecf20Sopenharmony_ci		mutex_unlock(&conf->cache_size_mutex);
65358c2ecf20Sopenharmony_ci	}
65368c2ecf20Sopenharmony_ci
65378c2ecf20Sopenharmony_ci	flush_deferred_bios(conf);
65388c2ecf20Sopenharmony_ci
65398c2ecf20Sopenharmony_ci	r5l_flush_stripe_to_raid(conf->log);
65408c2ecf20Sopenharmony_ci
65418c2ecf20Sopenharmony_ci	async_tx_issue_pending_all();
65428c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
65438c2ecf20Sopenharmony_ci
65448c2ecf20Sopenharmony_ci	pr_debug("--- raid5d inactive\n");
65458c2ecf20Sopenharmony_ci}
65468c2ecf20Sopenharmony_ci
65478c2ecf20Sopenharmony_cistatic ssize_t
65488c2ecf20Sopenharmony_ciraid5_show_stripe_cache_size(struct mddev *mddev, char *page)
65498c2ecf20Sopenharmony_ci{
65508c2ecf20Sopenharmony_ci	struct r5conf *conf;
65518c2ecf20Sopenharmony_ci	int ret = 0;
65528c2ecf20Sopenharmony_ci	spin_lock(&mddev->lock);
65538c2ecf20Sopenharmony_ci	conf = mddev->private;
65548c2ecf20Sopenharmony_ci	if (conf)
65558c2ecf20Sopenharmony_ci		ret = sprintf(page, "%d\n", conf->min_nr_stripes);
65568c2ecf20Sopenharmony_ci	spin_unlock(&mddev->lock);
65578c2ecf20Sopenharmony_ci	return ret;
65588c2ecf20Sopenharmony_ci}
65598c2ecf20Sopenharmony_ci
65608c2ecf20Sopenharmony_ciint
65618c2ecf20Sopenharmony_ciraid5_set_cache_size(struct mddev *mddev, int size)
65628c2ecf20Sopenharmony_ci{
65638c2ecf20Sopenharmony_ci	int result = 0;
65648c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
65658c2ecf20Sopenharmony_ci
65668c2ecf20Sopenharmony_ci	if (size <= 16 || size > 32768)
65678c2ecf20Sopenharmony_ci		return -EINVAL;
65688c2ecf20Sopenharmony_ci
65698c2ecf20Sopenharmony_ci	conf->min_nr_stripes = size;
65708c2ecf20Sopenharmony_ci	mutex_lock(&conf->cache_size_mutex);
65718c2ecf20Sopenharmony_ci	while (size < conf->max_nr_stripes &&
65728c2ecf20Sopenharmony_ci	       drop_one_stripe(conf))
65738c2ecf20Sopenharmony_ci		;
65748c2ecf20Sopenharmony_ci	mutex_unlock(&conf->cache_size_mutex);
65758c2ecf20Sopenharmony_ci
65768c2ecf20Sopenharmony_ci	md_allow_write(mddev);
65778c2ecf20Sopenharmony_ci
65788c2ecf20Sopenharmony_ci	mutex_lock(&conf->cache_size_mutex);
65798c2ecf20Sopenharmony_ci	while (size > conf->max_nr_stripes)
65808c2ecf20Sopenharmony_ci		if (!grow_one_stripe(conf, GFP_KERNEL)) {
65818c2ecf20Sopenharmony_ci			conf->min_nr_stripes = conf->max_nr_stripes;
65828c2ecf20Sopenharmony_ci			result = -ENOMEM;
65838c2ecf20Sopenharmony_ci			break;
65848c2ecf20Sopenharmony_ci		}
65858c2ecf20Sopenharmony_ci	mutex_unlock(&conf->cache_size_mutex);
65868c2ecf20Sopenharmony_ci
65878c2ecf20Sopenharmony_ci	return result;
65888c2ecf20Sopenharmony_ci}
65898c2ecf20Sopenharmony_ciEXPORT_SYMBOL(raid5_set_cache_size);
65908c2ecf20Sopenharmony_ci
65918c2ecf20Sopenharmony_cistatic ssize_t
65928c2ecf20Sopenharmony_ciraid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len)
65938c2ecf20Sopenharmony_ci{
65948c2ecf20Sopenharmony_ci	struct r5conf *conf;
65958c2ecf20Sopenharmony_ci	unsigned long new;
65968c2ecf20Sopenharmony_ci	int err;
65978c2ecf20Sopenharmony_ci
65988c2ecf20Sopenharmony_ci	if (len >= PAGE_SIZE)
65998c2ecf20Sopenharmony_ci		return -EINVAL;
66008c2ecf20Sopenharmony_ci	if (kstrtoul(page, 10, &new))
66018c2ecf20Sopenharmony_ci		return -EINVAL;
66028c2ecf20Sopenharmony_ci	err = mddev_lock(mddev);
66038c2ecf20Sopenharmony_ci	if (err)
66048c2ecf20Sopenharmony_ci		return err;
66058c2ecf20Sopenharmony_ci	conf = mddev->private;
66068c2ecf20Sopenharmony_ci	if (!conf)
66078c2ecf20Sopenharmony_ci		err = -ENODEV;
66088c2ecf20Sopenharmony_ci	else
66098c2ecf20Sopenharmony_ci		err = raid5_set_cache_size(mddev, new);
66108c2ecf20Sopenharmony_ci	mddev_unlock(mddev);
66118c2ecf20Sopenharmony_ci
66128c2ecf20Sopenharmony_ci	return err ?: len;
66138c2ecf20Sopenharmony_ci}
66148c2ecf20Sopenharmony_ci
66158c2ecf20Sopenharmony_cistatic struct md_sysfs_entry
66168c2ecf20Sopenharmony_ciraid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR,
66178c2ecf20Sopenharmony_ci				raid5_show_stripe_cache_size,
66188c2ecf20Sopenharmony_ci				raid5_store_stripe_cache_size);
66198c2ecf20Sopenharmony_ci
66208c2ecf20Sopenharmony_cistatic ssize_t
66218c2ecf20Sopenharmony_ciraid5_show_rmw_level(struct mddev  *mddev, char *page)
66228c2ecf20Sopenharmony_ci{
66238c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
66248c2ecf20Sopenharmony_ci	if (conf)
66258c2ecf20Sopenharmony_ci		return sprintf(page, "%d\n", conf->rmw_level);
66268c2ecf20Sopenharmony_ci	else
66278c2ecf20Sopenharmony_ci		return 0;
66288c2ecf20Sopenharmony_ci}
66298c2ecf20Sopenharmony_ci
66308c2ecf20Sopenharmony_cistatic ssize_t
66318c2ecf20Sopenharmony_ciraid5_store_rmw_level(struct mddev  *mddev, const char *page, size_t len)
66328c2ecf20Sopenharmony_ci{
66338c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
66348c2ecf20Sopenharmony_ci	unsigned long new;
66358c2ecf20Sopenharmony_ci
66368c2ecf20Sopenharmony_ci	if (!conf)
66378c2ecf20Sopenharmony_ci		return -ENODEV;
66388c2ecf20Sopenharmony_ci
66398c2ecf20Sopenharmony_ci	if (len >= PAGE_SIZE)
66408c2ecf20Sopenharmony_ci		return -EINVAL;
66418c2ecf20Sopenharmony_ci
66428c2ecf20Sopenharmony_ci	if (kstrtoul(page, 10, &new))
66438c2ecf20Sopenharmony_ci		return -EINVAL;
66448c2ecf20Sopenharmony_ci
66458c2ecf20Sopenharmony_ci	if (new != PARITY_DISABLE_RMW && !raid6_call.xor_syndrome)
66468c2ecf20Sopenharmony_ci		return -EINVAL;
66478c2ecf20Sopenharmony_ci
66488c2ecf20Sopenharmony_ci	if (new != PARITY_DISABLE_RMW &&
66498c2ecf20Sopenharmony_ci	    new != PARITY_ENABLE_RMW &&
66508c2ecf20Sopenharmony_ci	    new != PARITY_PREFER_RMW)
66518c2ecf20Sopenharmony_ci		return -EINVAL;
66528c2ecf20Sopenharmony_ci
66538c2ecf20Sopenharmony_ci	conf->rmw_level = new;
66548c2ecf20Sopenharmony_ci	return len;
66558c2ecf20Sopenharmony_ci}
66568c2ecf20Sopenharmony_ci
66578c2ecf20Sopenharmony_cistatic struct md_sysfs_entry
66588c2ecf20Sopenharmony_ciraid5_rmw_level = __ATTR(rmw_level, S_IRUGO | S_IWUSR,
66598c2ecf20Sopenharmony_ci			 raid5_show_rmw_level,
66608c2ecf20Sopenharmony_ci			 raid5_store_rmw_level);
66618c2ecf20Sopenharmony_ci
66628c2ecf20Sopenharmony_cistatic ssize_t
66638c2ecf20Sopenharmony_ciraid5_show_stripe_size(struct mddev  *mddev, char *page)
66648c2ecf20Sopenharmony_ci{
66658c2ecf20Sopenharmony_ci	struct r5conf *conf;
66668c2ecf20Sopenharmony_ci	int ret = 0;
66678c2ecf20Sopenharmony_ci
66688c2ecf20Sopenharmony_ci	spin_lock(&mddev->lock);
66698c2ecf20Sopenharmony_ci	conf = mddev->private;
66708c2ecf20Sopenharmony_ci	if (conf)
66718c2ecf20Sopenharmony_ci		ret = sprintf(page, "%lu\n", RAID5_STRIPE_SIZE(conf));
66728c2ecf20Sopenharmony_ci	spin_unlock(&mddev->lock);
66738c2ecf20Sopenharmony_ci	return ret;
66748c2ecf20Sopenharmony_ci}
66758c2ecf20Sopenharmony_ci
66768c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
66778c2ecf20Sopenharmony_cistatic ssize_t
66788c2ecf20Sopenharmony_ciraid5_store_stripe_size(struct mddev  *mddev, const char *page, size_t len)
66798c2ecf20Sopenharmony_ci{
66808c2ecf20Sopenharmony_ci	struct r5conf *conf;
66818c2ecf20Sopenharmony_ci	unsigned long new;
66828c2ecf20Sopenharmony_ci	int err;
66838c2ecf20Sopenharmony_ci	int size;
66848c2ecf20Sopenharmony_ci
66858c2ecf20Sopenharmony_ci	if (len >= PAGE_SIZE)
66868c2ecf20Sopenharmony_ci		return -EINVAL;
66878c2ecf20Sopenharmony_ci	if (kstrtoul(page, 10, &new))
66888c2ecf20Sopenharmony_ci		return -EINVAL;
66898c2ecf20Sopenharmony_ci
66908c2ecf20Sopenharmony_ci	/*
66918c2ecf20Sopenharmony_ci	 * The value should not be bigger than PAGE_SIZE. It requires to
66928c2ecf20Sopenharmony_ci	 * be multiple of DEFAULT_STRIPE_SIZE and the value should be power
66938c2ecf20Sopenharmony_ci	 * of two.
66948c2ecf20Sopenharmony_ci	 */
66958c2ecf20Sopenharmony_ci	if (new % DEFAULT_STRIPE_SIZE != 0 ||
66968c2ecf20Sopenharmony_ci			new > PAGE_SIZE || new == 0 ||
66978c2ecf20Sopenharmony_ci			new != roundup_pow_of_two(new))
66988c2ecf20Sopenharmony_ci		return -EINVAL;
66998c2ecf20Sopenharmony_ci
67008c2ecf20Sopenharmony_ci	err = mddev_lock(mddev);
67018c2ecf20Sopenharmony_ci	if (err)
67028c2ecf20Sopenharmony_ci		return err;
67038c2ecf20Sopenharmony_ci
67048c2ecf20Sopenharmony_ci	conf = mddev->private;
67058c2ecf20Sopenharmony_ci	if (!conf) {
67068c2ecf20Sopenharmony_ci		err = -ENODEV;
67078c2ecf20Sopenharmony_ci		goto out_unlock;
67088c2ecf20Sopenharmony_ci	}
67098c2ecf20Sopenharmony_ci
67108c2ecf20Sopenharmony_ci	if (new == conf->stripe_size)
67118c2ecf20Sopenharmony_ci		goto out_unlock;
67128c2ecf20Sopenharmony_ci
67138c2ecf20Sopenharmony_ci	pr_debug("md/raid: change stripe_size from %lu to %lu\n",
67148c2ecf20Sopenharmony_ci			conf->stripe_size, new);
67158c2ecf20Sopenharmony_ci
67168c2ecf20Sopenharmony_ci	if (mddev->sync_thread ||
67178c2ecf20Sopenharmony_ci		test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
67188c2ecf20Sopenharmony_ci		mddev->reshape_position != MaxSector ||
67198c2ecf20Sopenharmony_ci		mddev->sysfs_active) {
67208c2ecf20Sopenharmony_ci		err = -EBUSY;
67218c2ecf20Sopenharmony_ci		goto out_unlock;
67228c2ecf20Sopenharmony_ci	}
67238c2ecf20Sopenharmony_ci
67248c2ecf20Sopenharmony_ci	mddev_suspend(mddev);
67258c2ecf20Sopenharmony_ci	mutex_lock(&conf->cache_size_mutex);
67268c2ecf20Sopenharmony_ci	size = conf->max_nr_stripes;
67278c2ecf20Sopenharmony_ci
67288c2ecf20Sopenharmony_ci	shrink_stripes(conf);
67298c2ecf20Sopenharmony_ci
67308c2ecf20Sopenharmony_ci	conf->stripe_size = new;
67318c2ecf20Sopenharmony_ci	conf->stripe_shift = ilog2(new) - 9;
67328c2ecf20Sopenharmony_ci	conf->stripe_sectors = new >> 9;
67338c2ecf20Sopenharmony_ci	if (grow_stripes(conf, size)) {
67348c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: couldn't allocate buffers\n",
67358c2ecf20Sopenharmony_ci				mdname(mddev));
67368c2ecf20Sopenharmony_ci		err = -ENOMEM;
67378c2ecf20Sopenharmony_ci	}
67388c2ecf20Sopenharmony_ci	mutex_unlock(&conf->cache_size_mutex);
67398c2ecf20Sopenharmony_ci	mddev_resume(mddev);
67408c2ecf20Sopenharmony_ci
67418c2ecf20Sopenharmony_ciout_unlock:
67428c2ecf20Sopenharmony_ci	mddev_unlock(mddev);
67438c2ecf20Sopenharmony_ci	return err ?: len;
67448c2ecf20Sopenharmony_ci}
67458c2ecf20Sopenharmony_ci
67468c2ecf20Sopenharmony_cistatic struct md_sysfs_entry
67478c2ecf20Sopenharmony_ciraid5_stripe_size = __ATTR(stripe_size, 0644,
67488c2ecf20Sopenharmony_ci			 raid5_show_stripe_size,
67498c2ecf20Sopenharmony_ci			 raid5_store_stripe_size);
67508c2ecf20Sopenharmony_ci#else
67518c2ecf20Sopenharmony_cistatic struct md_sysfs_entry
67528c2ecf20Sopenharmony_ciraid5_stripe_size = __ATTR(stripe_size, 0444,
67538c2ecf20Sopenharmony_ci			 raid5_show_stripe_size,
67548c2ecf20Sopenharmony_ci			 NULL);
67558c2ecf20Sopenharmony_ci#endif
67568c2ecf20Sopenharmony_ci
67578c2ecf20Sopenharmony_cistatic ssize_t
67588c2ecf20Sopenharmony_ciraid5_show_preread_threshold(struct mddev *mddev, char *page)
67598c2ecf20Sopenharmony_ci{
67608c2ecf20Sopenharmony_ci	struct r5conf *conf;
67618c2ecf20Sopenharmony_ci	int ret = 0;
67628c2ecf20Sopenharmony_ci	spin_lock(&mddev->lock);
67638c2ecf20Sopenharmony_ci	conf = mddev->private;
67648c2ecf20Sopenharmony_ci	if (conf)
67658c2ecf20Sopenharmony_ci		ret = sprintf(page, "%d\n", conf->bypass_threshold);
67668c2ecf20Sopenharmony_ci	spin_unlock(&mddev->lock);
67678c2ecf20Sopenharmony_ci	return ret;
67688c2ecf20Sopenharmony_ci}
67698c2ecf20Sopenharmony_ci
67708c2ecf20Sopenharmony_cistatic ssize_t
67718c2ecf20Sopenharmony_ciraid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len)
67728c2ecf20Sopenharmony_ci{
67738c2ecf20Sopenharmony_ci	struct r5conf *conf;
67748c2ecf20Sopenharmony_ci	unsigned long new;
67758c2ecf20Sopenharmony_ci	int err;
67768c2ecf20Sopenharmony_ci
67778c2ecf20Sopenharmony_ci	if (len >= PAGE_SIZE)
67788c2ecf20Sopenharmony_ci		return -EINVAL;
67798c2ecf20Sopenharmony_ci	if (kstrtoul(page, 10, &new))
67808c2ecf20Sopenharmony_ci		return -EINVAL;
67818c2ecf20Sopenharmony_ci
67828c2ecf20Sopenharmony_ci	err = mddev_lock(mddev);
67838c2ecf20Sopenharmony_ci	if (err)
67848c2ecf20Sopenharmony_ci		return err;
67858c2ecf20Sopenharmony_ci	conf = mddev->private;
67868c2ecf20Sopenharmony_ci	if (!conf)
67878c2ecf20Sopenharmony_ci		err = -ENODEV;
67888c2ecf20Sopenharmony_ci	else if (new > conf->min_nr_stripes)
67898c2ecf20Sopenharmony_ci		err = -EINVAL;
67908c2ecf20Sopenharmony_ci	else
67918c2ecf20Sopenharmony_ci		conf->bypass_threshold = new;
67928c2ecf20Sopenharmony_ci	mddev_unlock(mddev);
67938c2ecf20Sopenharmony_ci	return err ?: len;
67948c2ecf20Sopenharmony_ci}
67958c2ecf20Sopenharmony_ci
67968c2ecf20Sopenharmony_cistatic struct md_sysfs_entry
67978c2ecf20Sopenharmony_ciraid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
67988c2ecf20Sopenharmony_ci					S_IRUGO | S_IWUSR,
67998c2ecf20Sopenharmony_ci					raid5_show_preread_threshold,
68008c2ecf20Sopenharmony_ci					raid5_store_preread_threshold);
68018c2ecf20Sopenharmony_ci
68028c2ecf20Sopenharmony_cistatic ssize_t
68038c2ecf20Sopenharmony_ciraid5_show_skip_copy(struct mddev *mddev, char *page)
68048c2ecf20Sopenharmony_ci{
68058c2ecf20Sopenharmony_ci	struct r5conf *conf;
68068c2ecf20Sopenharmony_ci	int ret = 0;
68078c2ecf20Sopenharmony_ci	spin_lock(&mddev->lock);
68088c2ecf20Sopenharmony_ci	conf = mddev->private;
68098c2ecf20Sopenharmony_ci	if (conf)
68108c2ecf20Sopenharmony_ci		ret = sprintf(page, "%d\n", conf->skip_copy);
68118c2ecf20Sopenharmony_ci	spin_unlock(&mddev->lock);
68128c2ecf20Sopenharmony_ci	return ret;
68138c2ecf20Sopenharmony_ci}
68148c2ecf20Sopenharmony_ci
68158c2ecf20Sopenharmony_cistatic ssize_t
68168c2ecf20Sopenharmony_ciraid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
68178c2ecf20Sopenharmony_ci{
68188c2ecf20Sopenharmony_ci	struct r5conf *conf;
68198c2ecf20Sopenharmony_ci	unsigned long new;
68208c2ecf20Sopenharmony_ci	int err;
68218c2ecf20Sopenharmony_ci
68228c2ecf20Sopenharmony_ci	if (len >= PAGE_SIZE)
68238c2ecf20Sopenharmony_ci		return -EINVAL;
68248c2ecf20Sopenharmony_ci	if (kstrtoul(page, 10, &new))
68258c2ecf20Sopenharmony_ci		return -EINVAL;
68268c2ecf20Sopenharmony_ci	new = !!new;
68278c2ecf20Sopenharmony_ci
68288c2ecf20Sopenharmony_ci	err = mddev_lock(mddev);
68298c2ecf20Sopenharmony_ci	if (err)
68308c2ecf20Sopenharmony_ci		return err;
68318c2ecf20Sopenharmony_ci	conf = mddev->private;
68328c2ecf20Sopenharmony_ci	if (!conf)
68338c2ecf20Sopenharmony_ci		err = -ENODEV;
68348c2ecf20Sopenharmony_ci	else if (new != conf->skip_copy) {
68358c2ecf20Sopenharmony_ci		struct request_queue *q = mddev->queue;
68368c2ecf20Sopenharmony_ci
68378c2ecf20Sopenharmony_ci		mddev_suspend(mddev);
68388c2ecf20Sopenharmony_ci		conf->skip_copy = new;
68398c2ecf20Sopenharmony_ci		if (new)
68408c2ecf20Sopenharmony_ci			blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
68418c2ecf20Sopenharmony_ci		else
68428c2ecf20Sopenharmony_ci			blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
68438c2ecf20Sopenharmony_ci		mddev_resume(mddev);
68448c2ecf20Sopenharmony_ci	}
68458c2ecf20Sopenharmony_ci	mddev_unlock(mddev);
68468c2ecf20Sopenharmony_ci	return err ?: len;
68478c2ecf20Sopenharmony_ci}
68488c2ecf20Sopenharmony_ci
68498c2ecf20Sopenharmony_cistatic struct md_sysfs_entry
68508c2ecf20Sopenharmony_ciraid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR,
68518c2ecf20Sopenharmony_ci					raid5_show_skip_copy,
68528c2ecf20Sopenharmony_ci					raid5_store_skip_copy);
68538c2ecf20Sopenharmony_ci
68548c2ecf20Sopenharmony_cistatic ssize_t
68558c2ecf20Sopenharmony_cistripe_cache_active_show(struct mddev *mddev, char *page)
68568c2ecf20Sopenharmony_ci{
68578c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
68588c2ecf20Sopenharmony_ci	if (conf)
68598c2ecf20Sopenharmony_ci		return sprintf(page, "%d\n", atomic_read(&conf->active_stripes));
68608c2ecf20Sopenharmony_ci	else
68618c2ecf20Sopenharmony_ci		return 0;
68628c2ecf20Sopenharmony_ci}
68638c2ecf20Sopenharmony_ci
68648c2ecf20Sopenharmony_cistatic struct md_sysfs_entry
68658c2ecf20Sopenharmony_ciraid5_stripecache_active = __ATTR_RO(stripe_cache_active);
68668c2ecf20Sopenharmony_ci
68678c2ecf20Sopenharmony_cistatic ssize_t
68688c2ecf20Sopenharmony_ciraid5_show_group_thread_cnt(struct mddev *mddev, char *page)
68698c2ecf20Sopenharmony_ci{
68708c2ecf20Sopenharmony_ci	struct r5conf *conf;
68718c2ecf20Sopenharmony_ci	int ret = 0;
68728c2ecf20Sopenharmony_ci	spin_lock(&mddev->lock);
68738c2ecf20Sopenharmony_ci	conf = mddev->private;
68748c2ecf20Sopenharmony_ci	if (conf)
68758c2ecf20Sopenharmony_ci		ret = sprintf(page, "%d\n", conf->worker_cnt_per_group);
68768c2ecf20Sopenharmony_ci	spin_unlock(&mddev->lock);
68778c2ecf20Sopenharmony_ci	return ret;
68788c2ecf20Sopenharmony_ci}
68798c2ecf20Sopenharmony_ci
68808c2ecf20Sopenharmony_cistatic int alloc_thread_groups(struct r5conf *conf, int cnt,
68818c2ecf20Sopenharmony_ci			       int *group_cnt,
68828c2ecf20Sopenharmony_ci			       struct r5worker_group **worker_groups);
68838c2ecf20Sopenharmony_cistatic ssize_t
68848c2ecf20Sopenharmony_ciraid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
68858c2ecf20Sopenharmony_ci{
68868c2ecf20Sopenharmony_ci	struct r5conf *conf;
68878c2ecf20Sopenharmony_ci	unsigned int new;
68888c2ecf20Sopenharmony_ci	int err;
68898c2ecf20Sopenharmony_ci	struct r5worker_group *new_groups, *old_groups;
68908c2ecf20Sopenharmony_ci	int group_cnt;
68918c2ecf20Sopenharmony_ci
68928c2ecf20Sopenharmony_ci	if (len >= PAGE_SIZE)
68938c2ecf20Sopenharmony_ci		return -EINVAL;
68948c2ecf20Sopenharmony_ci	if (kstrtouint(page, 10, &new))
68958c2ecf20Sopenharmony_ci		return -EINVAL;
68968c2ecf20Sopenharmony_ci	/* 8192 should be big enough */
68978c2ecf20Sopenharmony_ci	if (new > 8192)
68988c2ecf20Sopenharmony_ci		return -EINVAL;
68998c2ecf20Sopenharmony_ci
69008c2ecf20Sopenharmony_ci	err = mddev_lock(mddev);
69018c2ecf20Sopenharmony_ci	if (err)
69028c2ecf20Sopenharmony_ci		return err;
69038c2ecf20Sopenharmony_ci	conf = mddev->private;
69048c2ecf20Sopenharmony_ci	if (!conf)
69058c2ecf20Sopenharmony_ci		err = -ENODEV;
69068c2ecf20Sopenharmony_ci	else if (new != conf->worker_cnt_per_group) {
69078c2ecf20Sopenharmony_ci		mddev_suspend(mddev);
69088c2ecf20Sopenharmony_ci
69098c2ecf20Sopenharmony_ci		old_groups = conf->worker_groups;
69108c2ecf20Sopenharmony_ci		if (old_groups)
69118c2ecf20Sopenharmony_ci			flush_workqueue(raid5_wq);
69128c2ecf20Sopenharmony_ci
69138c2ecf20Sopenharmony_ci		err = alloc_thread_groups(conf, new, &group_cnt, &new_groups);
69148c2ecf20Sopenharmony_ci		if (!err) {
69158c2ecf20Sopenharmony_ci			spin_lock_irq(&conf->device_lock);
69168c2ecf20Sopenharmony_ci			conf->group_cnt = group_cnt;
69178c2ecf20Sopenharmony_ci			conf->worker_cnt_per_group = new;
69188c2ecf20Sopenharmony_ci			conf->worker_groups = new_groups;
69198c2ecf20Sopenharmony_ci			spin_unlock_irq(&conf->device_lock);
69208c2ecf20Sopenharmony_ci
69218c2ecf20Sopenharmony_ci			if (old_groups)
69228c2ecf20Sopenharmony_ci				kfree(old_groups[0].workers);
69238c2ecf20Sopenharmony_ci			kfree(old_groups);
69248c2ecf20Sopenharmony_ci		}
69258c2ecf20Sopenharmony_ci		mddev_resume(mddev);
69268c2ecf20Sopenharmony_ci	}
69278c2ecf20Sopenharmony_ci	mddev_unlock(mddev);
69288c2ecf20Sopenharmony_ci
69298c2ecf20Sopenharmony_ci	return err ?: len;
69308c2ecf20Sopenharmony_ci}
69318c2ecf20Sopenharmony_ci
69328c2ecf20Sopenharmony_cistatic struct md_sysfs_entry
69338c2ecf20Sopenharmony_ciraid5_group_thread_cnt = __ATTR(group_thread_cnt, S_IRUGO | S_IWUSR,
69348c2ecf20Sopenharmony_ci				raid5_show_group_thread_cnt,
69358c2ecf20Sopenharmony_ci				raid5_store_group_thread_cnt);
69368c2ecf20Sopenharmony_ci
69378c2ecf20Sopenharmony_cistatic struct attribute *raid5_attrs[] =  {
69388c2ecf20Sopenharmony_ci	&raid5_stripecache_size.attr,
69398c2ecf20Sopenharmony_ci	&raid5_stripecache_active.attr,
69408c2ecf20Sopenharmony_ci	&raid5_preread_bypass_threshold.attr,
69418c2ecf20Sopenharmony_ci	&raid5_group_thread_cnt.attr,
69428c2ecf20Sopenharmony_ci	&raid5_skip_copy.attr,
69438c2ecf20Sopenharmony_ci	&raid5_rmw_level.attr,
69448c2ecf20Sopenharmony_ci	&raid5_stripe_size.attr,
69458c2ecf20Sopenharmony_ci	&r5c_journal_mode.attr,
69468c2ecf20Sopenharmony_ci	&ppl_write_hint.attr,
69478c2ecf20Sopenharmony_ci	NULL,
69488c2ecf20Sopenharmony_ci};
69498c2ecf20Sopenharmony_cistatic struct attribute_group raid5_attrs_group = {
69508c2ecf20Sopenharmony_ci	.name = NULL,
69518c2ecf20Sopenharmony_ci	.attrs = raid5_attrs,
69528c2ecf20Sopenharmony_ci};
69538c2ecf20Sopenharmony_ci
69548c2ecf20Sopenharmony_cistatic int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt,
69558c2ecf20Sopenharmony_ci			       struct r5worker_group **worker_groups)
69568c2ecf20Sopenharmony_ci{
69578c2ecf20Sopenharmony_ci	int i, j, k;
69588c2ecf20Sopenharmony_ci	ssize_t size;
69598c2ecf20Sopenharmony_ci	struct r5worker *workers;
69608c2ecf20Sopenharmony_ci
69618c2ecf20Sopenharmony_ci	if (cnt == 0) {
69628c2ecf20Sopenharmony_ci		*group_cnt = 0;
69638c2ecf20Sopenharmony_ci		*worker_groups = NULL;
69648c2ecf20Sopenharmony_ci		return 0;
69658c2ecf20Sopenharmony_ci	}
69668c2ecf20Sopenharmony_ci	*group_cnt = num_possible_nodes();
69678c2ecf20Sopenharmony_ci	size = sizeof(struct r5worker) * cnt;
69688c2ecf20Sopenharmony_ci	workers = kcalloc(size, *group_cnt, GFP_NOIO);
69698c2ecf20Sopenharmony_ci	*worker_groups = kcalloc(*group_cnt, sizeof(struct r5worker_group),
69708c2ecf20Sopenharmony_ci				 GFP_NOIO);
69718c2ecf20Sopenharmony_ci	if (!*worker_groups || !workers) {
69728c2ecf20Sopenharmony_ci		kfree(workers);
69738c2ecf20Sopenharmony_ci		kfree(*worker_groups);
69748c2ecf20Sopenharmony_ci		return -ENOMEM;
69758c2ecf20Sopenharmony_ci	}
69768c2ecf20Sopenharmony_ci
69778c2ecf20Sopenharmony_ci	for (i = 0; i < *group_cnt; i++) {
69788c2ecf20Sopenharmony_ci		struct r5worker_group *group;
69798c2ecf20Sopenharmony_ci
69808c2ecf20Sopenharmony_ci		group = &(*worker_groups)[i];
69818c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&group->handle_list);
69828c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&group->loprio_list);
69838c2ecf20Sopenharmony_ci		group->conf = conf;
69848c2ecf20Sopenharmony_ci		group->workers = workers + i * cnt;
69858c2ecf20Sopenharmony_ci
69868c2ecf20Sopenharmony_ci		for (j = 0; j < cnt; j++) {
69878c2ecf20Sopenharmony_ci			struct r5worker *worker = group->workers + j;
69888c2ecf20Sopenharmony_ci			worker->group = group;
69898c2ecf20Sopenharmony_ci			INIT_WORK(&worker->work, raid5_do_work);
69908c2ecf20Sopenharmony_ci
69918c2ecf20Sopenharmony_ci			for (k = 0; k < NR_STRIPE_HASH_LOCKS; k++)
69928c2ecf20Sopenharmony_ci				INIT_LIST_HEAD(worker->temp_inactive_list + k);
69938c2ecf20Sopenharmony_ci		}
69948c2ecf20Sopenharmony_ci	}
69958c2ecf20Sopenharmony_ci
69968c2ecf20Sopenharmony_ci	return 0;
69978c2ecf20Sopenharmony_ci}
69988c2ecf20Sopenharmony_ci
69998c2ecf20Sopenharmony_cistatic void free_thread_groups(struct r5conf *conf)
70008c2ecf20Sopenharmony_ci{
70018c2ecf20Sopenharmony_ci	if (conf->worker_groups)
70028c2ecf20Sopenharmony_ci		kfree(conf->worker_groups[0].workers);
70038c2ecf20Sopenharmony_ci	kfree(conf->worker_groups);
70048c2ecf20Sopenharmony_ci	conf->worker_groups = NULL;
70058c2ecf20Sopenharmony_ci}
70068c2ecf20Sopenharmony_ci
70078c2ecf20Sopenharmony_cistatic sector_t
70088c2ecf20Sopenharmony_ciraid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
70098c2ecf20Sopenharmony_ci{
70108c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
70118c2ecf20Sopenharmony_ci
70128c2ecf20Sopenharmony_ci	if (!sectors)
70138c2ecf20Sopenharmony_ci		sectors = mddev->dev_sectors;
70148c2ecf20Sopenharmony_ci	if (!raid_disks)
70158c2ecf20Sopenharmony_ci		/* size is defined by the smallest of previous and new size */
70168c2ecf20Sopenharmony_ci		raid_disks = min(conf->raid_disks, conf->previous_raid_disks);
70178c2ecf20Sopenharmony_ci
70188c2ecf20Sopenharmony_ci	sectors &= ~((sector_t)conf->chunk_sectors - 1);
70198c2ecf20Sopenharmony_ci	sectors &= ~((sector_t)conf->prev_chunk_sectors - 1);
70208c2ecf20Sopenharmony_ci	return sectors * (raid_disks - conf->max_degraded);
70218c2ecf20Sopenharmony_ci}
70228c2ecf20Sopenharmony_ci
70238c2ecf20Sopenharmony_cistatic void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
70248c2ecf20Sopenharmony_ci{
70258c2ecf20Sopenharmony_ci	safe_put_page(percpu->spare_page);
70268c2ecf20Sopenharmony_ci	percpu->spare_page = NULL;
70278c2ecf20Sopenharmony_ci	kvfree(percpu->scribble);
70288c2ecf20Sopenharmony_ci	percpu->scribble = NULL;
70298c2ecf20Sopenharmony_ci}
70308c2ecf20Sopenharmony_ci
70318c2ecf20Sopenharmony_cistatic int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
70328c2ecf20Sopenharmony_ci{
70338c2ecf20Sopenharmony_ci	if (conf->level == 6 && !percpu->spare_page) {
70348c2ecf20Sopenharmony_ci		percpu->spare_page = alloc_page(GFP_KERNEL);
70358c2ecf20Sopenharmony_ci		if (!percpu->spare_page)
70368c2ecf20Sopenharmony_ci			return -ENOMEM;
70378c2ecf20Sopenharmony_ci	}
70388c2ecf20Sopenharmony_ci
70398c2ecf20Sopenharmony_ci	if (scribble_alloc(percpu,
70408c2ecf20Sopenharmony_ci			   max(conf->raid_disks,
70418c2ecf20Sopenharmony_ci			       conf->previous_raid_disks),
70428c2ecf20Sopenharmony_ci			   max(conf->chunk_sectors,
70438c2ecf20Sopenharmony_ci			       conf->prev_chunk_sectors)
70448c2ecf20Sopenharmony_ci			   / RAID5_STRIPE_SECTORS(conf))) {
70458c2ecf20Sopenharmony_ci		free_scratch_buffer(conf, percpu);
70468c2ecf20Sopenharmony_ci		return -ENOMEM;
70478c2ecf20Sopenharmony_ci	}
70488c2ecf20Sopenharmony_ci
70498c2ecf20Sopenharmony_ci	return 0;
70508c2ecf20Sopenharmony_ci}
70518c2ecf20Sopenharmony_ci
70528c2ecf20Sopenharmony_cistatic int raid456_cpu_dead(unsigned int cpu, struct hlist_node *node)
70538c2ecf20Sopenharmony_ci{
70548c2ecf20Sopenharmony_ci	struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
70558c2ecf20Sopenharmony_ci
70568c2ecf20Sopenharmony_ci	free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
70578c2ecf20Sopenharmony_ci	return 0;
70588c2ecf20Sopenharmony_ci}
70598c2ecf20Sopenharmony_ci
70608c2ecf20Sopenharmony_cistatic void raid5_free_percpu(struct r5conf *conf)
70618c2ecf20Sopenharmony_ci{
70628c2ecf20Sopenharmony_ci	if (!conf->percpu)
70638c2ecf20Sopenharmony_ci		return;
70648c2ecf20Sopenharmony_ci
70658c2ecf20Sopenharmony_ci	cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
70668c2ecf20Sopenharmony_ci	free_percpu(conf->percpu);
70678c2ecf20Sopenharmony_ci}
70688c2ecf20Sopenharmony_ci
70698c2ecf20Sopenharmony_cistatic void free_conf(struct r5conf *conf)
70708c2ecf20Sopenharmony_ci{
70718c2ecf20Sopenharmony_ci	int i;
70728c2ecf20Sopenharmony_ci
70738c2ecf20Sopenharmony_ci	log_exit(conf);
70748c2ecf20Sopenharmony_ci
70758c2ecf20Sopenharmony_ci	unregister_shrinker(&conf->shrinker);
70768c2ecf20Sopenharmony_ci	free_thread_groups(conf);
70778c2ecf20Sopenharmony_ci	shrink_stripes(conf);
70788c2ecf20Sopenharmony_ci	raid5_free_percpu(conf);
70798c2ecf20Sopenharmony_ci	for (i = 0; i < conf->pool_size; i++)
70808c2ecf20Sopenharmony_ci		if (conf->disks[i].extra_page)
70818c2ecf20Sopenharmony_ci			put_page(conf->disks[i].extra_page);
70828c2ecf20Sopenharmony_ci	kfree(conf->disks);
70838c2ecf20Sopenharmony_ci	bioset_exit(&conf->bio_split);
70848c2ecf20Sopenharmony_ci	kfree(conf->stripe_hashtbl);
70858c2ecf20Sopenharmony_ci	kfree(conf->pending_data);
70868c2ecf20Sopenharmony_ci	kfree(conf);
70878c2ecf20Sopenharmony_ci}
70888c2ecf20Sopenharmony_ci
70898c2ecf20Sopenharmony_cistatic int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
70908c2ecf20Sopenharmony_ci{
70918c2ecf20Sopenharmony_ci	struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
70928c2ecf20Sopenharmony_ci	struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
70938c2ecf20Sopenharmony_ci
70948c2ecf20Sopenharmony_ci	if (alloc_scratch_buffer(conf, percpu)) {
70958c2ecf20Sopenharmony_ci		pr_warn("%s: failed memory allocation for cpu%u\n",
70968c2ecf20Sopenharmony_ci			__func__, cpu);
70978c2ecf20Sopenharmony_ci		return -ENOMEM;
70988c2ecf20Sopenharmony_ci	}
70998c2ecf20Sopenharmony_ci	return 0;
71008c2ecf20Sopenharmony_ci}
71018c2ecf20Sopenharmony_ci
71028c2ecf20Sopenharmony_cistatic int raid5_alloc_percpu(struct r5conf *conf)
71038c2ecf20Sopenharmony_ci{
71048c2ecf20Sopenharmony_ci	int err = 0;
71058c2ecf20Sopenharmony_ci
71068c2ecf20Sopenharmony_ci	conf->percpu = alloc_percpu(struct raid5_percpu);
71078c2ecf20Sopenharmony_ci	if (!conf->percpu)
71088c2ecf20Sopenharmony_ci		return -ENOMEM;
71098c2ecf20Sopenharmony_ci
71108c2ecf20Sopenharmony_ci	err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
71118c2ecf20Sopenharmony_ci	if (!err) {
71128c2ecf20Sopenharmony_ci		conf->scribble_disks = max(conf->raid_disks,
71138c2ecf20Sopenharmony_ci			conf->previous_raid_disks);
71148c2ecf20Sopenharmony_ci		conf->scribble_sectors = max(conf->chunk_sectors,
71158c2ecf20Sopenharmony_ci			conf->prev_chunk_sectors);
71168c2ecf20Sopenharmony_ci	}
71178c2ecf20Sopenharmony_ci	return err;
71188c2ecf20Sopenharmony_ci}
71198c2ecf20Sopenharmony_ci
71208c2ecf20Sopenharmony_cistatic unsigned long raid5_cache_scan(struct shrinker *shrink,
71218c2ecf20Sopenharmony_ci				      struct shrink_control *sc)
71228c2ecf20Sopenharmony_ci{
71238c2ecf20Sopenharmony_ci	struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
71248c2ecf20Sopenharmony_ci	unsigned long ret = SHRINK_STOP;
71258c2ecf20Sopenharmony_ci
71268c2ecf20Sopenharmony_ci	if (mutex_trylock(&conf->cache_size_mutex)) {
71278c2ecf20Sopenharmony_ci		ret= 0;
71288c2ecf20Sopenharmony_ci		while (ret < sc->nr_to_scan &&
71298c2ecf20Sopenharmony_ci		       conf->max_nr_stripes > conf->min_nr_stripes) {
71308c2ecf20Sopenharmony_ci			if (drop_one_stripe(conf) == 0) {
71318c2ecf20Sopenharmony_ci				ret = SHRINK_STOP;
71328c2ecf20Sopenharmony_ci				break;
71338c2ecf20Sopenharmony_ci			}
71348c2ecf20Sopenharmony_ci			ret++;
71358c2ecf20Sopenharmony_ci		}
71368c2ecf20Sopenharmony_ci		mutex_unlock(&conf->cache_size_mutex);
71378c2ecf20Sopenharmony_ci	}
71388c2ecf20Sopenharmony_ci	return ret;
71398c2ecf20Sopenharmony_ci}
71408c2ecf20Sopenharmony_ci
71418c2ecf20Sopenharmony_cistatic unsigned long raid5_cache_count(struct shrinker *shrink,
71428c2ecf20Sopenharmony_ci				       struct shrink_control *sc)
71438c2ecf20Sopenharmony_ci{
71448c2ecf20Sopenharmony_ci	struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
71458c2ecf20Sopenharmony_ci
71468c2ecf20Sopenharmony_ci	if (conf->max_nr_stripes < conf->min_nr_stripes)
71478c2ecf20Sopenharmony_ci		/* unlikely, but not impossible */
71488c2ecf20Sopenharmony_ci		return 0;
71498c2ecf20Sopenharmony_ci	return conf->max_nr_stripes - conf->min_nr_stripes;
71508c2ecf20Sopenharmony_ci}
71518c2ecf20Sopenharmony_ci
71528c2ecf20Sopenharmony_cistatic struct r5conf *setup_conf(struct mddev *mddev)
71538c2ecf20Sopenharmony_ci{
71548c2ecf20Sopenharmony_ci	struct r5conf *conf;
71558c2ecf20Sopenharmony_ci	int raid_disk, memory, max_disks;
71568c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
71578c2ecf20Sopenharmony_ci	struct disk_info *disk;
71588c2ecf20Sopenharmony_ci	char pers_name[6];
71598c2ecf20Sopenharmony_ci	int i;
71608c2ecf20Sopenharmony_ci	int group_cnt;
71618c2ecf20Sopenharmony_ci	struct r5worker_group *new_group;
71628c2ecf20Sopenharmony_ci	int ret;
71638c2ecf20Sopenharmony_ci
71648c2ecf20Sopenharmony_ci	if (mddev->new_level != 5
71658c2ecf20Sopenharmony_ci	    && mddev->new_level != 4
71668c2ecf20Sopenharmony_ci	    && mddev->new_level != 6) {
71678c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: raid level not set to 4/5/6 (%d)\n",
71688c2ecf20Sopenharmony_ci			mdname(mddev), mddev->new_level);
71698c2ecf20Sopenharmony_ci		return ERR_PTR(-EIO);
71708c2ecf20Sopenharmony_ci	}
71718c2ecf20Sopenharmony_ci	if ((mddev->new_level == 5
71728c2ecf20Sopenharmony_ci	     && !algorithm_valid_raid5(mddev->new_layout)) ||
71738c2ecf20Sopenharmony_ci	    (mddev->new_level == 6
71748c2ecf20Sopenharmony_ci	     && !algorithm_valid_raid6(mddev->new_layout))) {
71758c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: layout %d not supported\n",
71768c2ecf20Sopenharmony_ci			mdname(mddev), mddev->new_layout);
71778c2ecf20Sopenharmony_ci		return ERR_PTR(-EIO);
71788c2ecf20Sopenharmony_ci	}
71798c2ecf20Sopenharmony_ci	if (mddev->new_level == 6 && mddev->raid_disks < 4) {
71808c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: not enough configured devices (%d, minimum 4)\n",
71818c2ecf20Sopenharmony_ci			mdname(mddev), mddev->raid_disks);
71828c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
71838c2ecf20Sopenharmony_ci	}
71848c2ecf20Sopenharmony_ci
71858c2ecf20Sopenharmony_ci	if (!mddev->new_chunk_sectors ||
71868c2ecf20Sopenharmony_ci	    (mddev->new_chunk_sectors << 9) % PAGE_SIZE ||
71878c2ecf20Sopenharmony_ci	    !is_power_of_2(mddev->new_chunk_sectors)) {
71888c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: invalid chunk size %d\n",
71898c2ecf20Sopenharmony_ci			mdname(mddev), mddev->new_chunk_sectors << 9);
71908c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
71918c2ecf20Sopenharmony_ci	}
71928c2ecf20Sopenharmony_ci
71938c2ecf20Sopenharmony_ci	conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL);
71948c2ecf20Sopenharmony_ci	if (conf == NULL)
71958c2ecf20Sopenharmony_ci		goto abort;
71968c2ecf20Sopenharmony_ci
71978c2ecf20Sopenharmony_ci#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
71988c2ecf20Sopenharmony_ci	conf->stripe_size = DEFAULT_STRIPE_SIZE;
71998c2ecf20Sopenharmony_ci	conf->stripe_shift = ilog2(DEFAULT_STRIPE_SIZE) - 9;
72008c2ecf20Sopenharmony_ci	conf->stripe_sectors = DEFAULT_STRIPE_SIZE >> 9;
72018c2ecf20Sopenharmony_ci#endif
72028c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&conf->free_list);
72038c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&conf->pending_list);
72048c2ecf20Sopenharmony_ci	conf->pending_data = kcalloc(PENDING_IO_MAX,
72058c2ecf20Sopenharmony_ci				     sizeof(struct r5pending_data),
72068c2ecf20Sopenharmony_ci				     GFP_KERNEL);
72078c2ecf20Sopenharmony_ci	if (!conf->pending_data)
72088c2ecf20Sopenharmony_ci		goto abort;
72098c2ecf20Sopenharmony_ci	for (i = 0; i < PENDING_IO_MAX; i++)
72108c2ecf20Sopenharmony_ci		list_add(&conf->pending_data[i].sibling, &conf->free_list);
72118c2ecf20Sopenharmony_ci	/* Don't enable multi-threading by default*/
72128c2ecf20Sopenharmony_ci	if (!alloc_thread_groups(conf, 0, &group_cnt, &new_group)) {
72138c2ecf20Sopenharmony_ci		conf->group_cnt = group_cnt;
72148c2ecf20Sopenharmony_ci		conf->worker_cnt_per_group = 0;
72158c2ecf20Sopenharmony_ci		conf->worker_groups = new_group;
72168c2ecf20Sopenharmony_ci	} else
72178c2ecf20Sopenharmony_ci		goto abort;
72188c2ecf20Sopenharmony_ci	spin_lock_init(&conf->device_lock);
72198c2ecf20Sopenharmony_ci	seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock);
72208c2ecf20Sopenharmony_ci	mutex_init(&conf->cache_size_mutex);
72218c2ecf20Sopenharmony_ci	init_waitqueue_head(&conf->wait_for_quiescent);
72228c2ecf20Sopenharmony_ci	init_waitqueue_head(&conf->wait_for_stripe);
72238c2ecf20Sopenharmony_ci	init_waitqueue_head(&conf->wait_for_overlap);
72248c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&conf->handle_list);
72258c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&conf->loprio_list);
72268c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&conf->hold_list);
72278c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&conf->delayed_list);
72288c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&conf->bitmap_list);
72298c2ecf20Sopenharmony_ci	init_llist_head(&conf->released_stripes);
72308c2ecf20Sopenharmony_ci	atomic_set(&conf->active_stripes, 0);
72318c2ecf20Sopenharmony_ci	atomic_set(&conf->preread_active_stripes, 0);
72328c2ecf20Sopenharmony_ci	atomic_set(&conf->active_aligned_reads, 0);
72338c2ecf20Sopenharmony_ci	spin_lock_init(&conf->pending_bios_lock);
72348c2ecf20Sopenharmony_ci	conf->batch_bio_dispatch = true;
72358c2ecf20Sopenharmony_ci	rdev_for_each(rdev, mddev) {
72368c2ecf20Sopenharmony_ci		if (test_bit(Journal, &rdev->flags))
72378c2ecf20Sopenharmony_ci			continue;
72388c2ecf20Sopenharmony_ci		if (blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
72398c2ecf20Sopenharmony_ci			conf->batch_bio_dispatch = false;
72408c2ecf20Sopenharmony_ci			break;
72418c2ecf20Sopenharmony_ci		}
72428c2ecf20Sopenharmony_ci	}
72438c2ecf20Sopenharmony_ci
72448c2ecf20Sopenharmony_ci	conf->bypass_threshold = BYPASS_THRESHOLD;
72458c2ecf20Sopenharmony_ci	conf->recovery_disabled = mddev->recovery_disabled - 1;
72468c2ecf20Sopenharmony_ci
72478c2ecf20Sopenharmony_ci	conf->raid_disks = mddev->raid_disks;
72488c2ecf20Sopenharmony_ci	if (mddev->reshape_position == MaxSector)
72498c2ecf20Sopenharmony_ci		conf->previous_raid_disks = mddev->raid_disks;
72508c2ecf20Sopenharmony_ci	else
72518c2ecf20Sopenharmony_ci		conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
72528c2ecf20Sopenharmony_ci	max_disks = max(conf->raid_disks, conf->previous_raid_disks);
72538c2ecf20Sopenharmony_ci
72548c2ecf20Sopenharmony_ci	conf->disks = kcalloc(max_disks, sizeof(struct disk_info),
72558c2ecf20Sopenharmony_ci			      GFP_KERNEL);
72568c2ecf20Sopenharmony_ci
72578c2ecf20Sopenharmony_ci	if (!conf->disks)
72588c2ecf20Sopenharmony_ci		goto abort;
72598c2ecf20Sopenharmony_ci
72608c2ecf20Sopenharmony_ci	for (i = 0; i < max_disks; i++) {
72618c2ecf20Sopenharmony_ci		conf->disks[i].extra_page = alloc_page(GFP_KERNEL);
72628c2ecf20Sopenharmony_ci		if (!conf->disks[i].extra_page)
72638c2ecf20Sopenharmony_ci			goto abort;
72648c2ecf20Sopenharmony_ci	}
72658c2ecf20Sopenharmony_ci
72668c2ecf20Sopenharmony_ci	ret = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0);
72678c2ecf20Sopenharmony_ci	if (ret)
72688c2ecf20Sopenharmony_ci		goto abort;
72698c2ecf20Sopenharmony_ci	conf->mddev = mddev;
72708c2ecf20Sopenharmony_ci
72718c2ecf20Sopenharmony_ci	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
72728c2ecf20Sopenharmony_ci		goto abort;
72738c2ecf20Sopenharmony_ci
72748c2ecf20Sopenharmony_ci	/* We init hash_locks[0] separately to that it can be used
72758c2ecf20Sopenharmony_ci	 * as the reference lock in the spin_lock_nest_lock() call
72768c2ecf20Sopenharmony_ci	 * in lock_all_device_hash_locks_irq in order to convince
72778c2ecf20Sopenharmony_ci	 * lockdep that we know what we are doing.
72788c2ecf20Sopenharmony_ci	 */
72798c2ecf20Sopenharmony_ci	spin_lock_init(conf->hash_locks);
72808c2ecf20Sopenharmony_ci	for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
72818c2ecf20Sopenharmony_ci		spin_lock_init(conf->hash_locks + i);
72828c2ecf20Sopenharmony_ci
72838c2ecf20Sopenharmony_ci	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
72848c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(conf->inactive_list + i);
72858c2ecf20Sopenharmony_ci
72868c2ecf20Sopenharmony_ci	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
72878c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(conf->temp_inactive_list + i);
72888c2ecf20Sopenharmony_ci
72898c2ecf20Sopenharmony_ci	atomic_set(&conf->r5c_cached_full_stripes, 0);
72908c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&conf->r5c_full_stripe_list);
72918c2ecf20Sopenharmony_ci	atomic_set(&conf->r5c_cached_partial_stripes, 0);
72928c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&conf->r5c_partial_stripe_list);
72938c2ecf20Sopenharmony_ci	atomic_set(&conf->r5c_flushing_full_stripes, 0);
72948c2ecf20Sopenharmony_ci	atomic_set(&conf->r5c_flushing_partial_stripes, 0);
72958c2ecf20Sopenharmony_ci
72968c2ecf20Sopenharmony_ci	conf->level = mddev->new_level;
72978c2ecf20Sopenharmony_ci	conf->chunk_sectors = mddev->new_chunk_sectors;
72988c2ecf20Sopenharmony_ci	if (raid5_alloc_percpu(conf) != 0)
72998c2ecf20Sopenharmony_ci		goto abort;
73008c2ecf20Sopenharmony_ci
73018c2ecf20Sopenharmony_ci	pr_debug("raid456: run(%s) called.\n", mdname(mddev));
73028c2ecf20Sopenharmony_ci
73038c2ecf20Sopenharmony_ci	rdev_for_each(rdev, mddev) {
73048c2ecf20Sopenharmony_ci		raid_disk = rdev->raid_disk;
73058c2ecf20Sopenharmony_ci		if (raid_disk >= max_disks
73068c2ecf20Sopenharmony_ci		    || raid_disk < 0 || test_bit(Journal, &rdev->flags))
73078c2ecf20Sopenharmony_ci			continue;
73088c2ecf20Sopenharmony_ci		disk = conf->disks + raid_disk;
73098c2ecf20Sopenharmony_ci
73108c2ecf20Sopenharmony_ci		if (test_bit(Replacement, &rdev->flags)) {
73118c2ecf20Sopenharmony_ci			if (disk->replacement)
73128c2ecf20Sopenharmony_ci				goto abort;
73138c2ecf20Sopenharmony_ci			disk->replacement = rdev;
73148c2ecf20Sopenharmony_ci		} else {
73158c2ecf20Sopenharmony_ci			if (disk->rdev)
73168c2ecf20Sopenharmony_ci				goto abort;
73178c2ecf20Sopenharmony_ci			disk->rdev = rdev;
73188c2ecf20Sopenharmony_ci		}
73198c2ecf20Sopenharmony_ci
73208c2ecf20Sopenharmony_ci		if (test_bit(In_sync, &rdev->flags)) {
73218c2ecf20Sopenharmony_ci			char b[BDEVNAME_SIZE];
73228c2ecf20Sopenharmony_ci			pr_info("md/raid:%s: device %s operational as raid disk %d\n",
73238c2ecf20Sopenharmony_ci				mdname(mddev), bdevname(rdev->bdev, b), raid_disk);
73248c2ecf20Sopenharmony_ci		} else if (rdev->saved_raid_disk != raid_disk)
73258c2ecf20Sopenharmony_ci			/* Cannot rely on bitmap to complete recovery */
73268c2ecf20Sopenharmony_ci			conf->fullsync = 1;
73278c2ecf20Sopenharmony_ci	}
73288c2ecf20Sopenharmony_ci
73298c2ecf20Sopenharmony_ci	conf->level = mddev->new_level;
73308c2ecf20Sopenharmony_ci	if (conf->level == 6) {
73318c2ecf20Sopenharmony_ci		conf->max_degraded = 2;
73328c2ecf20Sopenharmony_ci		if (raid6_call.xor_syndrome)
73338c2ecf20Sopenharmony_ci			conf->rmw_level = PARITY_ENABLE_RMW;
73348c2ecf20Sopenharmony_ci		else
73358c2ecf20Sopenharmony_ci			conf->rmw_level = PARITY_DISABLE_RMW;
73368c2ecf20Sopenharmony_ci	} else {
73378c2ecf20Sopenharmony_ci		conf->max_degraded = 1;
73388c2ecf20Sopenharmony_ci		conf->rmw_level = PARITY_ENABLE_RMW;
73398c2ecf20Sopenharmony_ci	}
73408c2ecf20Sopenharmony_ci	conf->algorithm = mddev->new_layout;
73418c2ecf20Sopenharmony_ci	conf->reshape_progress = mddev->reshape_position;
73428c2ecf20Sopenharmony_ci	if (conf->reshape_progress != MaxSector) {
73438c2ecf20Sopenharmony_ci		conf->prev_chunk_sectors = mddev->chunk_sectors;
73448c2ecf20Sopenharmony_ci		conf->prev_algo = mddev->layout;
73458c2ecf20Sopenharmony_ci	} else {
73468c2ecf20Sopenharmony_ci		conf->prev_chunk_sectors = conf->chunk_sectors;
73478c2ecf20Sopenharmony_ci		conf->prev_algo = conf->algorithm;
73488c2ecf20Sopenharmony_ci	}
73498c2ecf20Sopenharmony_ci
73508c2ecf20Sopenharmony_ci	conf->min_nr_stripes = NR_STRIPES;
73518c2ecf20Sopenharmony_ci	if (mddev->reshape_position != MaxSector) {
73528c2ecf20Sopenharmony_ci		int stripes = max_t(int,
73538c2ecf20Sopenharmony_ci			((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4,
73548c2ecf20Sopenharmony_ci			((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4);
73558c2ecf20Sopenharmony_ci		conf->min_nr_stripes = max(NR_STRIPES, stripes);
73568c2ecf20Sopenharmony_ci		if (conf->min_nr_stripes != NR_STRIPES)
73578c2ecf20Sopenharmony_ci			pr_info("md/raid:%s: force stripe size %d for reshape\n",
73588c2ecf20Sopenharmony_ci				mdname(mddev), conf->min_nr_stripes);
73598c2ecf20Sopenharmony_ci	}
73608c2ecf20Sopenharmony_ci	memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
73618c2ecf20Sopenharmony_ci		 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
73628c2ecf20Sopenharmony_ci	atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
73638c2ecf20Sopenharmony_ci	if (grow_stripes(conf, conf->min_nr_stripes)) {
73648c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: couldn't allocate %dkB for buffers\n",
73658c2ecf20Sopenharmony_ci			mdname(mddev), memory);
73668c2ecf20Sopenharmony_ci		goto abort;
73678c2ecf20Sopenharmony_ci	} else
73688c2ecf20Sopenharmony_ci		pr_debug("md/raid:%s: allocated %dkB\n", mdname(mddev), memory);
73698c2ecf20Sopenharmony_ci	/*
73708c2ecf20Sopenharmony_ci	 * Losing a stripe head costs more than the time to refill it,
73718c2ecf20Sopenharmony_ci	 * it reduces the queue depth and so can hurt throughput.
73728c2ecf20Sopenharmony_ci	 * So set it rather large, scaled by number of devices.
73738c2ecf20Sopenharmony_ci	 */
73748c2ecf20Sopenharmony_ci	conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4;
73758c2ecf20Sopenharmony_ci	conf->shrinker.scan_objects = raid5_cache_scan;
73768c2ecf20Sopenharmony_ci	conf->shrinker.count_objects = raid5_cache_count;
73778c2ecf20Sopenharmony_ci	conf->shrinker.batch = 128;
73788c2ecf20Sopenharmony_ci	conf->shrinker.flags = 0;
73798c2ecf20Sopenharmony_ci	if (register_shrinker(&conf->shrinker)) {
73808c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: couldn't register shrinker.\n",
73818c2ecf20Sopenharmony_ci			mdname(mddev));
73828c2ecf20Sopenharmony_ci		goto abort;
73838c2ecf20Sopenharmony_ci	}
73848c2ecf20Sopenharmony_ci
73858c2ecf20Sopenharmony_ci	sprintf(pers_name, "raid%d", mddev->new_level);
73868c2ecf20Sopenharmony_ci	conf->thread = md_register_thread(raid5d, mddev, pers_name);
73878c2ecf20Sopenharmony_ci	if (!conf->thread) {
73888c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: couldn't allocate thread.\n",
73898c2ecf20Sopenharmony_ci			mdname(mddev));
73908c2ecf20Sopenharmony_ci		goto abort;
73918c2ecf20Sopenharmony_ci	}
73928c2ecf20Sopenharmony_ci
73938c2ecf20Sopenharmony_ci	return conf;
73948c2ecf20Sopenharmony_ci
73958c2ecf20Sopenharmony_ci abort:
73968c2ecf20Sopenharmony_ci	if (conf) {
73978c2ecf20Sopenharmony_ci		free_conf(conf);
73988c2ecf20Sopenharmony_ci		return ERR_PTR(-EIO);
73998c2ecf20Sopenharmony_ci	} else
74008c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
74018c2ecf20Sopenharmony_ci}
74028c2ecf20Sopenharmony_ci
74038c2ecf20Sopenharmony_cistatic int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
74048c2ecf20Sopenharmony_ci{
74058c2ecf20Sopenharmony_ci	switch (algo) {
74068c2ecf20Sopenharmony_ci	case ALGORITHM_PARITY_0:
74078c2ecf20Sopenharmony_ci		if (raid_disk < max_degraded)
74088c2ecf20Sopenharmony_ci			return 1;
74098c2ecf20Sopenharmony_ci		break;
74108c2ecf20Sopenharmony_ci	case ALGORITHM_PARITY_N:
74118c2ecf20Sopenharmony_ci		if (raid_disk >= raid_disks - max_degraded)
74128c2ecf20Sopenharmony_ci			return 1;
74138c2ecf20Sopenharmony_ci		break;
74148c2ecf20Sopenharmony_ci	case ALGORITHM_PARITY_0_6:
74158c2ecf20Sopenharmony_ci		if (raid_disk == 0 ||
74168c2ecf20Sopenharmony_ci		    raid_disk == raid_disks - 1)
74178c2ecf20Sopenharmony_ci			return 1;
74188c2ecf20Sopenharmony_ci		break;
74198c2ecf20Sopenharmony_ci	case ALGORITHM_LEFT_ASYMMETRIC_6:
74208c2ecf20Sopenharmony_ci	case ALGORITHM_RIGHT_ASYMMETRIC_6:
74218c2ecf20Sopenharmony_ci	case ALGORITHM_LEFT_SYMMETRIC_6:
74228c2ecf20Sopenharmony_ci	case ALGORITHM_RIGHT_SYMMETRIC_6:
74238c2ecf20Sopenharmony_ci		if (raid_disk == raid_disks - 1)
74248c2ecf20Sopenharmony_ci			return 1;
74258c2ecf20Sopenharmony_ci	}
74268c2ecf20Sopenharmony_ci	return 0;
74278c2ecf20Sopenharmony_ci}
74288c2ecf20Sopenharmony_ci
74298c2ecf20Sopenharmony_cistatic void raid5_set_io_opt(struct r5conf *conf)
74308c2ecf20Sopenharmony_ci{
74318c2ecf20Sopenharmony_ci	blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) *
74328c2ecf20Sopenharmony_ci			 (conf->raid_disks - conf->max_degraded));
74338c2ecf20Sopenharmony_ci}
74348c2ecf20Sopenharmony_ci
74358c2ecf20Sopenharmony_cistatic int raid5_run(struct mddev *mddev)
74368c2ecf20Sopenharmony_ci{
74378c2ecf20Sopenharmony_ci	struct r5conf *conf;
74388c2ecf20Sopenharmony_ci	int working_disks = 0;
74398c2ecf20Sopenharmony_ci	int dirty_parity_disks = 0;
74408c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
74418c2ecf20Sopenharmony_ci	struct md_rdev *journal_dev = NULL;
74428c2ecf20Sopenharmony_ci	sector_t reshape_offset = 0;
74438c2ecf20Sopenharmony_ci	int i;
74448c2ecf20Sopenharmony_ci	long long min_offset_diff = 0;
74458c2ecf20Sopenharmony_ci	int first = 1;
74468c2ecf20Sopenharmony_ci
74478c2ecf20Sopenharmony_ci	if (mddev_init_writes_pending(mddev) < 0)
74488c2ecf20Sopenharmony_ci		return -ENOMEM;
74498c2ecf20Sopenharmony_ci
74508c2ecf20Sopenharmony_ci	if (mddev->recovery_cp != MaxSector)
74518c2ecf20Sopenharmony_ci		pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
74528c2ecf20Sopenharmony_ci			  mdname(mddev));
74538c2ecf20Sopenharmony_ci
74548c2ecf20Sopenharmony_ci	rdev_for_each(rdev, mddev) {
74558c2ecf20Sopenharmony_ci		long long diff;
74568c2ecf20Sopenharmony_ci
74578c2ecf20Sopenharmony_ci		if (test_bit(Journal, &rdev->flags)) {
74588c2ecf20Sopenharmony_ci			journal_dev = rdev;
74598c2ecf20Sopenharmony_ci			continue;
74608c2ecf20Sopenharmony_ci		}
74618c2ecf20Sopenharmony_ci		if (rdev->raid_disk < 0)
74628c2ecf20Sopenharmony_ci			continue;
74638c2ecf20Sopenharmony_ci		diff = (rdev->new_data_offset - rdev->data_offset);
74648c2ecf20Sopenharmony_ci		if (first) {
74658c2ecf20Sopenharmony_ci			min_offset_diff = diff;
74668c2ecf20Sopenharmony_ci			first = 0;
74678c2ecf20Sopenharmony_ci		} else if (mddev->reshape_backwards &&
74688c2ecf20Sopenharmony_ci			 diff < min_offset_diff)
74698c2ecf20Sopenharmony_ci			min_offset_diff = diff;
74708c2ecf20Sopenharmony_ci		else if (!mddev->reshape_backwards &&
74718c2ecf20Sopenharmony_ci			 diff > min_offset_diff)
74728c2ecf20Sopenharmony_ci			min_offset_diff = diff;
74738c2ecf20Sopenharmony_ci	}
74748c2ecf20Sopenharmony_ci
74758c2ecf20Sopenharmony_ci	if ((test_bit(MD_HAS_JOURNAL, &mddev->flags) || journal_dev) &&
74768c2ecf20Sopenharmony_ci	    (mddev->bitmap_info.offset || mddev->bitmap_info.file)) {
74778c2ecf20Sopenharmony_ci		pr_notice("md/raid:%s: array cannot have both journal and bitmap\n",
74788c2ecf20Sopenharmony_ci			  mdname(mddev));
74798c2ecf20Sopenharmony_ci		return -EINVAL;
74808c2ecf20Sopenharmony_ci	}
74818c2ecf20Sopenharmony_ci
74828c2ecf20Sopenharmony_ci	if (mddev->reshape_position != MaxSector) {
74838c2ecf20Sopenharmony_ci		/* Check that we can continue the reshape.
74848c2ecf20Sopenharmony_ci		 * Difficulties arise if the stripe we would write to
74858c2ecf20Sopenharmony_ci		 * next is at or after the stripe we would read from next.
74868c2ecf20Sopenharmony_ci		 * For a reshape that changes the number of devices, this
74878c2ecf20Sopenharmony_ci		 * is only possible for a very short time, and mdadm makes
74888c2ecf20Sopenharmony_ci		 * sure that time appears to have past before assembling
74898c2ecf20Sopenharmony_ci		 * the array.  So we fail if that time hasn't passed.
74908c2ecf20Sopenharmony_ci		 * For a reshape that keeps the number of devices the same
74918c2ecf20Sopenharmony_ci		 * mdadm must be monitoring the reshape can keeping the
74928c2ecf20Sopenharmony_ci		 * critical areas read-only and backed up.  It will start
74938c2ecf20Sopenharmony_ci		 * the array in read-only mode, so we check for that.
74948c2ecf20Sopenharmony_ci		 */
74958c2ecf20Sopenharmony_ci		sector_t here_new, here_old;
74968c2ecf20Sopenharmony_ci		int old_disks;
74978c2ecf20Sopenharmony_ci		int max_degraded = (mddev->level == 6 ? 2 : 1);
74988c2ecf20Sopenharmony_ci		int chunk_sectors;
74998c2ecf20Sopenharmony_ci		int new_data_disks;
75008c2ecf20Sopenharmony_ci
75018c2ecf20Sopenharmony_ci		if (journal_dev) {
75028c2ecf20Sopenharmony_ci			pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n",
75038c2ecf20Sopenharmony_ci				mdname(mddev));
75048c2ecf20Sopenharmony_ci			return -EINVAL;
75058c2ecf20Sopenharmony_ci		}
75068c2ecf20Sopenharmony_ci
75078c2ecf20Sopenharmony_ci		if (mddev->new_level != mddev->level) {
75088c2ecf20Sopenharmony_ci			pr_warn("md/raid:%s: unsupported reshape required - aborting.\n",
75098c2ecf20Sopenharmony_ci				mdname(mddev));
75108c2ecf20Sopenharmony_ci			return -EINVAL;
75118c2ecf20Sopenharmony_ci		}
75128c2ecf20Sopenharmony_ci		old_disks = mddev->raid_disks - mddev->delta_disks;
75138c2ecf20Sopenharmony_ci		/* reshape_position must be on a new-stripe boundary, and one
75148c2ecf20Sopenharmony_ci		 * further up in new geometry must map after here in old
75158c2ecf20Sopenharmony_ci		 * geometry.
75168c2ecf20Sopenharmony_ci		 * If the chunk sizes are different, then as we perform reshape
75178c2ecf20Sopenharmony_ci		 * in units of the largest of the two, reshape_position needs
75188c2ecf20Sopenharmony_ci		 * be a multiple of the largest chunk size times new data disks.
75198c2ecf20Sopenharmony_ci		 */
75208c2ecf20Sopenharmony_ci		here_new = mddev->reshape_position;
75218c2ecf20Sopenharmony_ci		chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors);
75228c2ecf20Sopenharmony_ci		new_data_disks = mddev->raid_disks - max_degraded;
75238c2ecf20Sopenharmony_ci		if (sector_div(here_new, chunk_sectors * new_data_disks)) {
75248c2ecf20Sopenharmony_ci			pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n",
75258c2ecf20Sopenharmony_ci				mdname(mddev));
75268c2ecf20Sopenharmony_ci			return -EINVAL;
75278c2ecf20Sopenharmony_ci		}
75288c2ecf20Sopenharmony_ci		reshape_offset = here_new * chunk_sectors;
75298c2ecf20Sopenharmony_ci		/* here_new is the stripe we will write to */
75308c2ecf20Sopenharmony_ci		here_old = mddev->reshape_position;
75318c2ecf20Sopenharmony_ci		sector_div(here_old, chunk_sectors * (old_disks-max_degraded));
75328c2ecf20Sopenharmony_ci		/* here_old is the first stripe that we might need to read
75338c2ecf20Sopenharmony_ci		 * from */
75348c2ecf20Sopenharmony_ci		if (mddev->delta_disks == 0) {
75358c2ecf20Sopenharmony_ci			/* We cannot be sure it is safe to start an in-place
75368c2ecf20Sopenharmony_ci			 * reshape.  It is only safe if user-space is monitoring
75378c2ecf20Sopenharmony_ci			 * and taking constant backups.
75388c2ecf20Sopenharmony_ci			 * mdadm always starts a situation like this in
75398c2ecf20Sopenharmony_ci			 * readonly mode so it can take control before
75408c2ecf20Sopenharmony_ci			 * allowing any writes.  So just check for that.
75418c2ecf20Sopenharmony_ci			 */
75428c2ecf20Sopenharmony_ci			if (abs(min_offset_diff) >= mddev->chunk_sectors &&
75438c2ecf20Sopenharmony_ci			    abs(min_offset_diff) >= mddev->new_chunk_sectors)
75448c2ecf20Sopenharmony_ci				/* not really in-place - so OK */;
75458c2ecf20Sopenharmony_ci			else if (mddev->ro == 0) {
75468c2ecf20Sopenharmony_ci				pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n",
75478c2ecf20Sopenharmony_ci					mdname(mddev));
75488c2ecf20Sopenharmony_ci				return -EINVAL;
75498c2ecf20Sopenharmony_ci			}
75508c2ecf20Sopenharmony_ci		} else if (mddev->reshape_backwards
75518c2ecf20Sopenharmony_ci		    ? (here_new * chunk_sectors + min_offset_diff <=
75528c2ecf20Sopenharmony_ci		       here_old * chunk_sectors)
75538c2ecf20Sopenharmony_ci		    : (here_new * chunk_sectors >=
75548c2ecf20Sopenharmony_ci		       here_old * chunk_sectors + (-min_offset_diff))) {
75558c2ecf20Sopenharmony_ci			/* Reading from the same stripe as writing to - bad */
75568c2ecf20Sopenharmony_ci			pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n",
75578c2ecf20Sopenharmony_ci				mdname(mddev));
75588c2ecf20Sopenharmony_ci			return -EINVAL;
75598c2ecf20Sopenharmony_ci		}
75608c2ecf20Sopenharmony_ci		pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev));
75618c2ecf20Sopenharmony_ci		/* OK, we should be able to continue; */
75628c2ecf20Sopenharmony_ci	} else {
75638c2ecf20Sopenharmony_ci		BUG_ON(mddev->level != mddev->new_level);
75648c2ecf20Sopenharmony_ci		BUG_ON(mddev->layout != mddev->new_layout);
75658c2ecf20Sopenharmony_ci		BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors);
75668c2ecf20Sopenharmony_ci		BUG_ON(mddev->delta_disks != 0);
75678c2ecf20Sopenharmony_ci	}
75688c2ecf20Sopenharmony_ci
75698c2ecf20Sopenharmony_ci	if (test_bit(MD_HAS_JOURNAL, &mddev->flags) &&
75708c2ecf20Sopenharmony_ci	    test_bit(MD_HAS_PPL, &mddev->flags)) {
75718c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n",
75728c2ecf20Sopenharmony_ci			mdname(mddev));
75738c2ecf20Sopenharmony_ci		clear_bit(MD_HAS_PPL, &mddev->flags);
75748c2ecf20Sopenharmony_ci		clear_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags);
75758c2ecf20Sopenharmony_ci	}
75768c2ecf20Sopenharmony_ci
75778c2ecf20Sopenharmony_ci	if (mddev->private == NULL)
75788c2ecf20Sopenharmony_ci		conf = setup_conf(mddev);
75798c2ecf20Sopenharmony_ci	else
75808c2ecf20Sopenharmony_ci		conf = mddev->private;
75818c2ecf20Sopenharmony_ci
75828c2ecf20Sopenharmony_ci	if (IS_ERR(conf))
75838c2ecf20Sopenharmony_ci		return PTR_ERR(conf);
75848c2ecf20Sopenharmony_ci
75858c2ecf20Sopenharmony_ci	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
75868c2ecf20Sopenharmony_ci		if (!journal_dev) {
75878c2ecf20Sopenharmony_ci			pr_warn("md/raid:%s: journal disk is missing, force array readonly\n",
75888c2ecf20Sopenharmony_ci				mdname(mddev));
75898c2ecf20Sopenharmony_ci			mddev->ro = 1;
75908c2ecf20Sopenharmony_ci			set_disk_ro(mddev->gendisk, 1);
75918c2ecf20Sopenharmony_ci		} else if (mddev->recovery_cp == MaxSector)
75928c2ecf20Sopenharmony_ci			set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
75938c2ecf20Sopenharmony_ci	}
75948c2ecf20Sopenharmony_ci
75958c2ecf20Sopenharmony_ci	conf->min_offset_diff = min_offset_diff;
75968c2ecf20Sopenharmony_ci	mddev->thread = conf->thread;
75978c2ecf20Sopenharmony_ci	conf->thread = NULL;
75988c2ecf20Sopenharmony_ci	mddev->private = conf;
75998c2ecf20Sopenharmony_ci
76008c2ecf20Sopenharmony_ci	for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
76018c2ecf20Sopenharmony_ci	     i++) {
76028c2ecf20Sopenharmony_ci		rdev = conf->disks[i].rdev;
76038c2ecf20Sopenharmony_ci		if (!rdev && conf->disks[i].replacement) {
76048c2ecf20Sopenharmony_ci			/* The replacement is all we have yet */
76058c2ecf20Sopenharmony_ci			rdev = conf->disks[i].replacement;
76068c2ecf20Sopenharmony_ci			conf->disks[i].replacement = NULL;
76078c2ecf20Sopenharmony_ci			clear_bit(Replacement, &rdev->flags);
76088c2ecf20Sopenharmony_ci			conf->disks[i].rdev = rdev;
76098c2ecf20Sopenharmony_ci		}
76108c2ecf20Sopenharmony_ci		if (!rdev)
76118c2ecf20Sopenharmony_ci			continue;
76128c2ecf20Sopenharmony_ci		if (conf->disks[i].replacement &&
76138c2ecf20Sopenharmony_ci		    conf->reshape_progress != MaxSector) {
76148c2ecf20Sopenharmony_ci			/* replacements and reshape simply do not mix. */
76158c2ecf20Sopenharmony_ci			pr_warn("md: cannot handle concurrent replacement and reshape.\n");
76168c2ecf20Sopenharmony_ci			goto abort;
76178c2ecf20Sopenharmony_ci		}
76188c2ecf20Sopenharmony_ci		if (test_bit(In_sync, &rdev->flags)) {
76198c2ecf20Sopenharmony_ci			working_disks++;
76208c2ecf20Sopenharmony_ci			continue;
76218c2ecf20Sopenharmony_ci		}
76228c2ecf20Sopenharmony_ci		/* This disc is not fully in-sync.  However if it
76238c2ecf20Sopenharmony_ci		 * just stored parity (beyond the recovery_offset),
76248c2ecf20Sopenharmony_ci		 * when we don't need to be concerned about the
76258c2ecf20Sopenharmony_ci		 * array being dirty.
76268c2ecf20Sopenharmony_ci		 * When reshape goes 'backwards', we never have
76278c2ecf20Sopenharmony_ci		 * partially completed devices, so we only need
76288c2ecf20Sopenharmony_ci		 * to worry about reshape going forwards.
76298c2ecf20Sopenharmony_ci		 */
76308c2ecf20Sopenharmony_ci		/* Hack because v0.91 doesn't store recovery_offset properly. */
76318c2ecf20Sopenharmony_ci		if (mddev->major_version == 0 &&
76328c2ecf20Sopenharmony_ci		    mddev->minor_version > 90)
76338c2ecf20Sopenharmony_ci			rdev->recovery_offset = reshape_offset;
76348c2ecf20Sopenharmony_ci
76358c2ecf20Sopenharmony_ci		if (rdev->recovery_offset < reshape_offset) {
76368c2ecf20Sopenharmony_ci			/* We need to check old and new layout */
76378c2ecf20Sopenharmony_ci			if (!only_parity(rdev->raid_disk,
76388c2ecf20Sopenharmony_ci					 conf->algorithm,
76398c2ecf20Sopenharmony_ci					 conf->raid_disks,
76408c2ecf20Sopenharmony_ci					 conf->max_degraded))
76418c2ecf20Sopenharmony_ci				continue;
76428c2ecf20Sopenharmony_ci		}
76438c2ecf20Sopenharmony_ci		if (!only_parity(rdev->raid_disk,
76448c2ecf20Sopenharmony_ci				 conf->prev_algo,
76458c2ecf20Sopenharmony_ci				 conf->previous_raid_disks,
76468c2ecf20Sopenharmony_ci				 conf->max_degraded))
76478c2ecf20Sopenharmony_ci			continue;
76488c2ecf20Sopenharmony_ci		dirty_parity_disks++;
76498c2ecf20Sopenharmony_ci	}
76508c2ecf20Sopenharmony_ci
76518c2ecf20Sopenharmony_ci	/*
76528c2ecf20Sopenharmony_ci	 * 0 for a fully functional array, 1 or 2 for a degraded array.
76538c2ecf20Sopenharmony_ci	 */
76548c2ecf20Sopenharmony_ci	mddev->degraded = raid5_calc_degraded(conf);
76558c2ecf20Sopenharmony_ci
76568c2ecf20Sopenharmony_ci	if (has_failed(conf)) {
76578c2ecf20Sopenharmony_ci		pr_crit("md/raid:%s: not enough operational devices (%d/%d failed)\n",
76588c2ecf20Sopenharmony_ci			mdname(mddev), mddev->degraded, conf->raid_disks);
76598c2ecf20Sopenharmony_ci		goto abort;
76608c2ecf20Sopenharmony_ci	}
76618c2ecf20Sopenharmony_ci
76628c2ecf20Sopenharmony_ci	/* device size must be a multiple of chunk size */
76638c2ecf20Sopenharmony_ci	mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
76648c2ecf20Sopenharmony_ci	mddev->resync_max_sectors = mddev->dev_sectors;
76658c2ecf20Sopenharmony_ci
76668c2ecf20Sopenharmony_ci	if (mddev->degraded > dirty_parity_disks &&
76678c2ecf20Sopenharmony_ci	    mddev->recovery_cp != MaxSector) {
76688c2ecf20Sopenharmony_ci		if (test_bit(MD_HAS_PPL, &mddev->flags))
76698c2ecf20Sopenharmony_ci			pr_crit("md/raid:%s: starting dirty degraded array with PPL.\n",
76708c2ecf20Sopenharmony_ci				mdname(mddev));
76718c2ecf20Sopenharmony_ci		else if (mddev->ok_start_degraded)
76728c2ecf20Sopenharmony_ci			pr_crit("md/raid:%s: starting dirty degraded array - data corruption possible.\n",
76738c2ecf20Sopenharmony_ci				mdname(mddev));
76748c2ecf20Sopenharmony_ci		else {
76758c2ecf20Sopenharmony_ci			pr_crit("md/raid:%s: cannot start dirty degraded array.\n",
76768c2ecf20Sopenharmony_ci				mdname(mddev));
76778c2ecf20Sopenharmony_ci			goto abort;
76788c2ecf20Sopenharmony_ci		}
76798c2ecf20Sopenharmony_ci	}
76808c2ecf20Sopenharmony_ci
76818c2ecf20Sopenharmony_ci	pr_info("md/raid:%s: raid level %d active with %d out of %d devices, algorithm %d\n",
76828c2ecf20Sopenharmony_ci		mdname(mddev), conf->level,
76838c2ecf20Sopenharmony_ci		mddev->raid_disks-mddev->degraded, mddev->raid_disks,
76848c2ecf20Sopenharmony_ci		mddev->new_layout);
76858c2ecf20Sopenharmony_ci
76868c2ecf20Sopenharmony_ci	print_raid5_conf(conf);
76878c2ecf20Sopenharmony_ci
76888c2ecf20Sopenharmony_ci	if (conf->reshape_progress != MaxSector) {
76898c2ecf20Sopenharmony_ci		conf->reshape_safe = conf->reshape_progress;
76908c2ecf20Sopenharmony_ci		atomic_set(&conf->reshape_stripes, 0);
76918c2ecf20Sopenharmony_ci		clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
76928c2ecf20Sopenharmony_ci		clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
76938c2ecf20Sopenharmony_ci		set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
76948c2ecf20Sopenharmony_ci		set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
76958c2ecf20Sopenharmony_ci		mddev->sync_thread = md_register_thread(md_do_sync, mddev,
76968c2ecf20Sopenharmony_ci							"reshape");
76978c2ecf20Sopenharmony_ci		if (!mddev->sync_thread)
76988c2ecf20Sopenharmony_ci			goto abort;
76998c2ecf20Sopenharmony_ci	}
77008c2ecf20Sopenharmony_ci
77018c2ecf20Sopenharmony_ci	/* Ok, everything is just fine now */
77028c2ecf20Sopenharmony_ci	if (mddev->to_remove == &raid5_attrs_group)
77038c2ecf20Sopenharmony_ci		mddev->to_remove = NULL;
77048c2ecf20Sopenharmony_ci	else if (mddev->kobj.sd &&
77058c2ecf20Sopenharmony_ci	    sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
77068c2ecf20Sopenharmony_ci		pr_warn("raid5: failed to create sysfs attributes for %s\n",
77078c2ecf20Sopenharmony_ci			mdname(mddev));
77088c2ecf20Sopenharmony_ci	md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
77098c2ecf20Sopenharmony_ci
77108c2ecf20Sopenharmony_ci	if (mddev->queue) {
77118c2ecf20Sopenharmony_ci		int chunk_size;
77128c2ecf20Sopenharmony_ci		/* read-ahead size must cover two whole stripes, which
77138c2ecf20Sopenharmony_ci		 * is 2 * (datadisks) * chunksize where 'n' is the
77148c2ecf20Sopenharmony_ci		 * number of raid devices
77158c2ecf20Sopenharmony_ci		 */
77168c2ecf20Sopenharmony_ci		int data_disks = conf->previous_raid_disks - conf->max_degraded;
77178c2ecf20Sopenharmony_ci		int stripe = data_disks *
77188c2ecf20Sopenharmony_ci			((mddev->chunk_sectors << 9) / PAGE_SIZE);
77198c2ecf20Sopenharmony_ci
77208c2ecf20Sopenharmony_ci		chunk_size = mddev->chunk_sectors << 9;
77218c2ecf20Sopenharmony_ci		blk_queue_io_min(mddev->queue, chunk_size);
77228c2ecf20Sopenharmony_ci		raid5_set_io_opt(conf);
77238c2ecf20Sopenharmony_ci		mddev->queue->limits.raid_partial_stripes_expensive = 1;
77248c2ecf20Sopenharmony_ci		/*
77258c2ecf20Sopenharmony_ci		 * We can only discard a whole stripe. It doesn't make sense to
77268c2ecf20Sopenharmony_ci		 * discard data disk but write parity disk
77278c2ecf20Sopenharmony_ci		 */
77288c2ecf20Sopenharmony_ci		stripe = stripe * PAGE_SIZE;
77298c2ecf20Sopenharmony_ci		/* Round up to power of 2, as discard handling
77308c2ecf20Sopenharmony_ci		 * currently assumes that */
77318c2ecf20Sopenharmony_ci		while ((stripe-1) & stripe)
77328c2ecf20Sopenharmony_ci			stripe = (stripe | (stripe-1)) + 1;
77338c2ecf20Sopenharmony_ci		mddev->queue->limits.discard_alignment = stripe;
77348c2ecf20Sopenharmony_ci		mddev->queue->limits.discard_granularity = stripe;
77358c2ecf20Sopenharmony_ci
77368c2ecf20Sopenharmony_ci		blk_queue_max_write_same_sectors(mddev->queue, 0);
77378c2ecf20Sopenharmony_ci		blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
77388c2ecf20Sopenharmony_ci
77398c2ecf20Sopenharmony_ci		rdev_for_each(rdev, mddev) {
77408c2ecf20Sopenharmony_ci			disk_stack_limits(mddev->gendisk, rdev->bdev,
77418c2ecf20Sopenharmony_ci					  rdev->data_offset << 9);
77428c2ecf20Sopenharmony_ci			disk_stack_limits(mddev->gendisk, rdev->bdev,
77438c2ecf20Sopenharmony_ci					  rdev->new_data_offset << 9);
77448c2ecf20Sopenharmony_ci		}
77458c2ecf20Sopenharmony_ci
77468c2ecf20Sopenharmony_ci		/*
77478c2ecf20Sopenharmony_ci		 * zeroing is required, otherwise data
77488c2ecf20Sopenharmony_ci		 * could be lost. Consider a scenario: discard a stripe
77498c2ecf20Sopenharmony_ci		 * (the stripe could be inconsistent if
77508c2ecf20Sopenharmony_ci		 * discard_zeroes_data is 0); write one disk of the
77518c2ecf20Sopenharmony_ci		 * stripe (the stripe could be inconsistent again
77528c2ecf20Sopenharmony_ci		 * depending on which disks are used to calculate
77538c2ecf20Sopenharmony_ci		 * parity); the disk is broken; The stripe data of this
77548c2ecf20Sopenharmony_ci		 * disk is lost.
77558c2ecf20Sopenharmony_ci		 *
77568c2ecf20Sopenharmony_ci		 * We only allow DISCARD if the sysadmin has confirmed that
77578c2ecf20Sopenharmony_ci		 * only safe devices are in use by setting a module parameter.
77588c2ecf20Sopenharmony_ci		 * A better idea might be to turn DISCARD into WRITE_ZEROES
77598c2ecf20Sopenharmony_ci		 * requests, as that is required to be safe.
77608c2ecf20Sopenharmony_ci		 */
77618c2ecf20Sopenharmony_ci		if (devices_handle_discard_safely &&
77628c2ecf20Sopenharmony_ci		    mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
77638c2ecf20Sopenharmony_ci		    mddev->queue->limits.discard_granularity >= stripe)
77648c2ecf20Sopenharmony_ci			blk_queue_flag_set(QUEUE_FLAG_DISCARD,
77658c2ecf20Sopenharmony_ci						mddev->queue);
77668c2ecf20Sopenharmony_ci		else
77678c2ecf20Sopenharmony_ci			blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
77688c2ecf20Sopenharmony_ci						mddev->queue);
77698c2ecf20Sopenharmony_ci
77708c2ecf20Sopenharmony_ci		blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
77718c2ecf20Sopenharmony_ci	}
77728c2ecf20Sopenharmony_ci
77738c2ecf20Sopenharmony_ci	if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
77748c2ecf20Sopenharmony_ci		goto abort;
77758c2ecf20Sopenharmony_ci
77768c2ecf20Sopenharmony_ci	return 0;
77778c2ecf20Sopenharmony_ciabort:
77788c2ecf20Sopenharmony_ci	md_unregister_thread(&mddev->thread);
77798c2ecf20Sopenharmony_ci	print_raid5_conf(conf);
77808c2ecf20Sopenharmony_ci	free_conf(conf);
77818c2ecf20Sopenharmony_ci	mddev->private = NULL;
77828c2ecf20Sopenharmony_ci	pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
77838c2ecf20Sopenharmony_ci	return -EIO;
77848c2ecf20Sopenharmony_ci}
77858c2ecf20Sopenharmony_ci
77868c2ecf20Sopenharmony_cistatic void raid5_free(struct mddev *mddev, void *priv)
77878c2ecf20Sopenharmony_ci{
77888c2ecf20Sopenharmony_ci	struct r5conf *conf = priv;
77898c2ecf20Sopenharmony_ci
77908c2ecf20Sopenharmony_ci	free_conf(conf);
77918c2ecf20Sopenharmony_ci	mddev->to_remove = &raid5_attrs_group;
77928c2ecf20Sopenharmony_ci}
77938c2ecf20Sopenharmony_ci
77948c2ecf20Sopenharmony_cistatic void raid5_status(struct seq_file *seq, struct mddev *mddev)
77958c2ecf20Sopenharmony_ci{
77968c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
77978c2ecf20Sopenharmony_ci	int i;
77988c2ecf20Sopenharmony_ci
77998c2ecf20Sopenharmony_ci	seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
78008c2ecf20Sopenharmony_ci		conf->chunk_sectors / 2, mddev->layout);
78018c2ecf20Sopenharmony_ci	seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
78028c2ecf20Sopenharmony_ci	rcu_read_lock();
78038c2ecf20Sopenharmony_ci	for (i = 0; i < conf->raid_disks; i++) {
78048c2ecf20Sopenharmony_ci		struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
78058c2ecf20Sopenharmony_ci		seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
78068c2ecf20Sopenharmony_ci	}
78078c2ecf20Sopenharmony_ci	rcu_read_unlock();
78088c2ecf20Sopenharmony_ci	seq_printf (seq, "]");
78098c2ecf20Sopenharmony_ci}
78108c2ecf20Sopenharmony_ci
78118c2ecf20Sopenharmony_cistatic void print_raid5_conf (struct r5conf *conf)
78128c2ecf20Sopenharmony_ci{
78138c2ecf20Sopenharmony_ci	int i;
78148c2ecf20Sopenharmony_ci	struct disk_info *tmp;
78158c2ecf20Sopenharmony_ci
78168c2ecf20Sopenharmony_ci	pr_debug("RAID conf printout:\n");
78178c2ecf20Sopenharmony_ci	if (!conf) {
78188c2ecf20Sopenharmony_ci		pr_debug("(conf==NULL)\n");
78198c2ecf20Sopenharmony_ci		return;
78208c2ecf20Sopenharmony_ci	}
78218c2ecf20Sopenharmony_ci	pr_debug(" --- level:%d rd:%d wd:%d\n", conf->level,
78228c2ecf20Sopenharmony_ci	       conf->raid_disks,
78238c2ecf20Sopenharmony_ci	       conf->raid_disks - conf->mddev->degraded);
78248c2ecf20Sopenharmony_ci
78258c2ecf20Sopenharmony_ci	for (i = 0; i < conf->raid_disks; i++) {
78268c2ecf20Sopenharmony_ci		char b[BDEVNAME_SIZE];
78278c2ecf20Sopenharmony_ci		tmp = conf->disks + i;
78288c2ecf20Sopenharmony_ci		if (tmp->rdev)
78298c2ecf20Sopenharmony_ci			pr_debug(" disk %d, o:%d, dev:%s\n",
78308c2ecf20Sopenharmony_ci			       i, !test_bit(Faulty, &tmp->rdev->flags),
78318c2ecf20Sopenharmony_ci			       bdevname(tmp->rdev->bdev, b));
78328c2ecf20Sopenharmony_ci	}
78338c2ecf20Sopenharmony_ci}
78348c2ecf20Sopenharmony_ci
78358c2ecf20Sopenharmony_cistatic int raid5_spare_active(struct mddev *mddev)
78368c2ecf20Sopenharmony_ci{
78378c2ecf20Sopenharmony_ci	int i;
78388c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
78398c2ecf20Sopenharmony_ci	struct disk_info *tmp;
78408c2ecf20Sopenharmony_ci	int count = 0;
78418c2ecf20Sopenharmony_ci	unsigned long flags;
78428c2ecf20Sopenharmony_ci
78438c2ecf20Sopenharmony_ci	for (i = 0; i < conf->raid_disks; i++) {
78448c2ecf20Sopenharmony_ci		tmp = conf->disks + i;
78458c2ecf20Sopenharmony_ci		if (tmp->replacement
78468c2ecf20Sopenharmony_ci		    && tmp->replacement->recovery_offset == MaxSector
78478c2ecf20Sopenharmony_ci		    && !test_bit(Faulty, &tmp->replacement->flags)
78488c2ecf20Sopenharmony_ci		    && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
78498c2ecf20Sopenharmony_ci			/* Replacement has just become active. */
78508c2ecf20Sopenharmony_ci			if (!tmp->rdev
78518c2ecf20Sopenharmony_ci			    || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
78528c2ecf20Sopenharmony_ci				count++;
78538c2ecf20Sopenharmony_ci			if (tmp->rdev) {
78548c2ecf20Sopenharmony_ci				/* Replaced device not technically faulty,
78558c2ecf20Sopenharmony_ci				 * but we need to be sure it gets removed
78568c2ecf20Sopenharmony_ci				 * and never re-added.
78578c2ecf20Sopenharmony_ci				 */
78588c2ecf20Sopenharmony_ci				set_bit(Faulty, &tmp->rdev->flags);
78598c2ecf20Sopenharmony_ci				sysfs_notify_dirent_safe(
78608c2ecf20Sopenharmony_ci					tmp->rdev->sysfs_state);
78618c2ecf20Sopenharmony_ci			}
78628c2ecf20Sopenharmony_ci			sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
78638c2ecf20Sopenharmony_ci		} else if (tmp->rdev
78648c2ecf20Sopenharmony_ci		    && tmp->rdev->recovery_offset == MaxSector
78658c2ecf20Sopenharmony_ci		    && !test_bit(Faulty, &tmp->rdev->flags)
78668c2ecf20Sopenharmony_ci		    && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
78678c2ecf20Sopenharmony_ci			count++;
78688c2ecf20Sopenharmony_ci			sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
78698c2ecf20Sopenharmony_ci		}
78708c2ecf20Sopenharmony_ci	}
78718c2ecf20Sopenharmony_ci	spin_lock_irqsave(&conf->device_lock, flags);
78728c2ecf20Sopenharmony_ci	mddev->degraded = raid5_calc_degraded(conf);
78738c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&conf->device_lock, flags);
78748c2ecf20Sopenharmony_ci	print_raid5_conf(conf);
78758c2ecf20Sopenharmony_ci	return count;
78768c2ecf20Sopenharmony_ci}
78778c2ecf20Sopenharmony_ci
78788c2ecf20Sopenharmony_cistatic int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
78798c2ecf20Sopenharmony_ci{
78808c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
78818c2ecf20Sopenharmony_ci	int err = 0;
78828c2ecf20Sopenharmony_ci	int number = rdev->raid_disk;
78838c2ecf20Sopenharmony_ci	struct md_rdev **rdevp;
78848c2ecf20Sopenharmony_ci	struct disk_info *p = conf->disks + number;
78858c2ecf20Sopenharmony_ci
78868c2ecf20Sopenharmony_ci	print_raid5_conf(conf);
78878c2ecf20Sopenharmony_ci	if (test_bit(Journal, &rdev->flags) && conf->log) {
78888c2ecf20Sopenharmony_ci		/*
78898c2ecf20Sopenharmony_ci		 * we can't wait pending write here, as this is called in
78908c2ecf20Sopenharmony_ci		 * raid5d, wait will deadlock.
78918c2ecf20Sopenharmony_ci		 * neilb: there is no locking about new writes here,
78928c2ecf20Sopenharmony_ci		 * so this cannot be safe.
78938c2ecf20Sopenharmony_ci		 */
78948c2ecf20Sopenharmony_ci		if (atomic_read(&conf->active_stripes) ||
78958c2ecf20Sopenharmony_ci		    atomic_read(&conf->r5c_cached_full_stripes) ||
78968c2ecf20Sopenharmony_ci		    atomic_read(&conf->r5c_cached_partial_stripes)) {
78978c2ecf20Sopenharmony_ci			return -EBUSY;
78988c2ecf20Sopenharmony_ci		}
78998c2ecf20Sopenharmony_ci		log_exit(conf);
79008c2ecf20Sopenharmony_ci		return 0;
79018c2ecf20Sopenharmony_ci	}
79028c2ecf20Sopenharmony_ci	if (rdev == p->rdev)
79038c2ecf20Sopenharmony_ci		rdevp = &p->rdev;
79048c2ecf20Sopenharmony_ci	else if (rdev == p->replacement)
79058c2ecf20Sopenharmony_ci		rdevp = &p->replacement;
79068c2ecf20Sopenharmony_ci	else
79078c2ecf20Sopenharmony_ci		return 0;
79088c2ecf20Sopenharmony_ci
79098c2ecf20Sopenharmony_ci	if (number >= conf->raid_disks &&
79108c2ecf20Sopenharmony_ci	    conf->reshape_progress == MaxSector)
79118c2ecf20Sopenharmony_ci		clear_bit(In_sync, &rdev->flags);
79128c2ecf20Sopenharmony_ci
79138c2ecf20Sopenharmony_ci	if (test_bit(In_sync, &rdev->flags) ||
79148c2ecf20Sopenharmony_ci	    atomic_read(&rdev->nr_pending)) {
79158c2ecf20Sopenharmony_ci		err = -EBUSY;
79168c2ecf20Sopenharmony_ci		goto abort;
79178c2ecf20Sopenharmony_ci	}
79188c2ecf20Sopenharmony_ci	/* Only remove non-faulty devices if recovery
79198c2ecf20Sopenharmony_ci	 * isn't possible.
79208c2ecf20Sopenharmony_ci	 */
79218c2ecf20Sopenharmony_ci	if (!test_bit(Faulty, &rdev->flags) &&
79228c2ecf20Sopenharmony_ci	    mddev->recovery_disabled != conf->recovery_disabled &&
79238c2ecf20Sopenharmony_ci	    !has_failed(conf) &&
79248c2ecf20Sopenharmony_ci	    (!p->replacement || p->replacement == rdev) &&
79258c2ecf20Sopenharmony_ci	    number < conf->raid_disks) {
79268c2ecf20Sopenharmony_ci		err = -EBUSY;
79278c2ecf20Sopenharmony_ci		goto abort;
79288c2ecf20Sopenharmony_ci	}
79298c2ecf20Sopenharmony_ci	*rdevp = NULL;
79308c2ecf20Sopenharmony_ci	if (!test_bit(RemoveSynchronized, &rdev->flags)) {
79318c2ecf20Sopenharmony_ci		synchronize_rcu();
79328c2ecf20Sopenharmony_ci		if (atomic_read(&rdev->nr_pending)) {
79338c2ecf20Sopenharmony_ci			/* lost the race, try later */
79348c2ecf20Sopenharmony_ci			err = -EBUSY;
79358c2ecf20Sopenharmony_ci			*rdevp = rdev;
79368c2ecf20Sopenharmony_ci		}
79378c2ecf20Sopenharmony_ci	}
79388c2ecf20Sopenharmony_ci	if (!err) {
79398c2ecf20Sopenharmony_ci		err = log_modify(conf, rdev, false);
79408c2ecf20Sopenharmony_ci		if (err)
79418c2ecf20Sopenharmony_ci			goto abort;
79428c2ecf20Sopenharmony_ci	}
79438c2ecf20Sopenharmony_ci	if (p->replacement) {
79448c2ecf20Sopenharmony_ci		/* We must have just cleared 'rdev' */
79458c2ecf20Sopenharmony_ci		p->rdev = p->replacement;
79468c2ecf20Sopenharmony_ci		clear_bit(Replacement, &p->replacement->flags);
79478c2ecf20Sopenharmony_ci		smp_mb(); /* Make sure other CPUs may see both as identical
79488c2ecf20Sopenharmony_ci			   * but will never see neither - if they are careful
79498c2ecf20Sopenharmony_ci			   */
79508c2ecf20Sopenharmony_ci		p->replacement = NULL;
79518c2ecf20Sopenharmony_ci
79528c2ecf20Sopenharmony_ci		if (!err)
79538c2ecf20Sopenharmony_ci			err = log_modify(conf, p->rdev, true);
79548c2ecf20Sopenharmony_ci	}
79558c2ecf20Sopenharmony_ci
79568c2ecf20Sopenharmony_ci	clear_bit(WantReplacement, &rdev->flags);
79578c2ecf20Sopenharmony_ciabort:
79588c2ecf20Sopenharmony_ci
79598c2ecf20Sopenharmony_ci	print_raid5_conf(conf);
79608c2ecf20Sopenharmony_ci	return err;
79618c2ecf20Sopenharmony_ci}
79628c2ecf20Sopenharmony_ci
79638c2ecf20Sopenharmony_cistatic int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
79648c2ecf20Sopenharmony_ci{
79658c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
79668c2ecf20Sopenharmony_ci	int ret, err = -EEXIST;
79678c2ecf20Sopenharmony_ci	int disk;
79688c2ecf20Sopenharmony_ci	struct disk_info *p;
79698c2ecf20Sopenharmony_ci	int first = 0;
79708c2ecf20Sopenharmony_ci	int last = conf->raid_disks - 1;
79718c2ecf20Sopenharmony_ci
79728c2ecf20Sopenharmony_ci	if (test_bit(Journal, &rdev->flags)) {
79738c2ecf20Sopenharmony_ci		if (conf->log)
79748c2ecf20Sopenharmony_ci			return -EBUSY;
79758c2ecf20Sopenharmony_ci
79768c2ecf20Sopenharmony_ci		rdev->raid_disk = 0;
79778c2ecf20Sopenharmony_ci		/*
79788c2ecf20Sopenharmony_ci		 * The array is in readonly mode if journal is missing, so no
79798c2ecf20Sopenharmony_ci		 * write requests running. We should be safe
79808c2ecf20Sopenharmony_ci		 */
79818c2ecf20Sopenharmony_ci		ret = log_init(conf, rdev, false);
79828c2ecf20Sopenharmony_ci		if (ret)
79838c2ecf20Sopenharmony_ci			return ret;
79848c2ecf20Sopenharmony_ci
79858c2ecf20Sopenharmony_ci		ret = r5l_start(conf->log);
79868c2ecf20Sopenharmony_ci		if (ret)
79878c2ecf20Sopenharmony_ci			return ret;
79888c2ecf20Sopenharmony_ci
79898c2ecf20Sopenharmony_ci		return 0;
79908c2ecf20Sopenharmony_ci	}
79918c2ecf20Sopenharmony_ci	if (mddev->recovery_disabled == conf->recovery_disabled)
79928c2ecf20Sopenharmony_ci		return -EBUSY;
79938c2ecf20Sopenharmony_ci
79948c2ecf20Sopenharmony_ci	if (rdev->saved_raid_disk < 0 && has_failed(conf))
79958c2ecf20Sopenharmony_ci		/* no point adding a device */
79968c2ecf20Sopenharmony_ci		return -EINVAL;
79978c2ecf20Sopenharmony_ci
79988c2ecf20Sopenharmony_ci	if (rdev->raid_disk >= 0)
79998c2ecf20Sopenharmony_ci		first = last = rdev->raid_disk;
80008c2ecf20Sopenharmony_ci
80018c2ecf20Sopenharmony_ci	/*
80028c2ecf20Sopenharmony_ci	 * find the disk ... but prefer rdev->saved_raid_disk
80038c2ecf20Sopenharmony_ci	 * if possible.
80048c2ecf20Sopenharmony_ci	 */
80058c2ecf20Sopenharmony_ci	if (rdev->saved_raid_disk >= 0 &&
80068c2ecf20Sopenharmony_ci	    rdev->saved_raid_disk >= first &&
80078c2ecf20Sopenharmony_ci	    rdev->saved_raid_disk <= last &&
80088c2ecf20Sopenharmony_ci	    conf->disks[rdev->saved_raid_disk].rdev == NULL)
80098c2ecf20Sopenharmony_ci		first = rdev->saved_raid_disk;
80108c2ecf20Sopenharmony_ci
80118c2ecf20Sopenharmony_ci	for (disk = first; disk <= last; disk++) {
80128c2ecf20Sopenharmony_ci		p = conf->disks + disk;
80138c2ecf20Sopenharmony_ci		if (p->rdev == NULL) {
80148c2ecf20Sopenharmony_ci			clear_bit(In_sync, &rdev->flags);
80158c2ecf20Sopenharmony_ci			rdev->raid_disk = disk;
80168c2ecf20Sopenharmony_ci			if (rdev->saved_raid_disk != disk)
80178c2ecf20Sopenharmony_ci				conf->fullsync = 1;
80188c2ecf20Sopenharmony_ci			rcu_assign_pointer(p->rdev, rdev);
80198c2ecf20Sopenharmony_ci
80208c2ecf20Sopenharmony_ci			err = log_modify(conf, rdev, true);
80218c2ecf20Sopenharmony_ci
80228c2ecf20Sopenharmony_ci			goto out;
80238c2ecf20Sopenharmony_ci		}
80248c2ecf20Sopenharmony_ci	}
80258c2ecf20Sopenharmony_ci	for (disk = first; disk <= last; disk++) {
80268c2ecf20Sopenharmony_ci		p = conf->disks + disk;
80278c2ecf20Sopenharmony_ci		if (test_bit(WantReplacement, &p->rdev->flags) &&
80288c2ecf20Sopenharmony_ci		    p->replacement == NULL) {
80298c2ecf20Sopenharmony_ci			clear_bit(In_sync, &rdev->flags);
80308c2ecf20Sopenharmony_ci			set_bit(Replacement, &rdev->flags);
80318c2ecf20Sopenharmony_ci			rdev->raid_disk = disk;
80328c2ecf20Sopenharmony_ci			err = 0;
80338c2ecf20Sopenharmony_ci			conf->fullsync = 1;
80348c2ecf20Sopenharmony_ci			rcu_assign_pointer(p->replacement, rdev);
80358c2ecf20Sopenharmony_ci			break;
80368c2ecf20Sopenharmony_ci		}
80378c2ecf20Sopenharmony_ci	}
80388c2ecf20Sopenharmony_ciout:
80398c2ecf20Sopenharmony_ci	print_raid5_conf(conf);
80408c2ecf20Sopenharmony_ci	return err;
80418c2ecf20Sopenharmony_ci}
80428c2ecf20Sopenharmony_ci
80438c2ecf20Sopenharmony_cistatic int raid5_resize(struct mddev *mddev, sector_t sectors)
80448c2ecf20Sopenharmony_ci{
80458c2ecf20Sopenharmony_ci	/* no resync is happening, and there is enough space
80468c2ecf20Sopenharmony_ci	 * on all devices, so we can resize.
80478c2ecf20Sopenharmony_ci	 * We need to make sure resync covers any new space.
80488c2ecf20Sopenharmony_ci	 * If the array is shrinking we should possibly wait until
80498c2ecf20Sopenharmony_ci	 * any io in the removed space completes, but it hardly seems
80508c2ecf20Sopenharmony_ci	 * worth it.
80518c2ecf20Sopenharmony_ci	 */
80528c2ecf20Sopenharmony_ci	sector_t newsize;
80538c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
80548c2ecf20Sopenharmony_ci
80558c2ecf20Sopenharmony_ci	if (raid5_has_log(conf) || raid5_has_ppl(conf))
80568c2ecf20Sopenharmony_ci		return -EINVAL;
80578c2ecf20Sopenharmony_ci	sectors &= ~((sector_t)conf->chunk_sectors - 1);
80588c2ecf20Sopenharmony_ci	newsize = raid5_size(mddev, sectors, mddev->raid_disks);
80598c2ecf20Sopenharmony_ci	if (mddev->external_size &&
80608c2ecf20Sopenharmony_ci	    mddev->array_sectors > newsize)
80618c2ecf20Sopenharmony_ci		return -EINVAL;
80628c2ecf20Sopenharmony_ci	if (mddev->bitmap) {
80638c2ecf20Sopenharmony_ci		int ret = md_bitmap_resize(mddev->bitmap, sectors, 0, 0);
80648c2ecf20Sopenharmony_ci		if (ret)
80658c2ecf20Sopenharmony_ci			return ret;
80668c2ecf20Sopenharmony_ci	}
80678c2ecf20Sopenharmony_ci	md_set_array_sectors(mddev, newsize);
80688c2ecf20Sopenharmony_ci	if (sectors > mddev->dev_sectors &&
80698c2ecf20Sopenharmony_ci	    mddev->recovery_cp > mddev->dev_sectors) {
80708c2ecf20Sopenharmony_ci		mddev->recovery_cp = mddev->dev_sectors;
80718c2ecf20Sopenharmony_ci		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
80728c2ecf20Sopenharmony_ci	}
80738c2ecf20Sopenharmony_ci	mddev->dev_sectors = sectors;
80748c2ecf20Sopenharmony_ci	mddev->resync_max_sectors = sectors;
80758c2ecf20Sopenharmony_ci	return 0;
80768c2ecf20Sopenharmony_ci}
80778c2ecf20Sopenharmony_ci
80788c2ecf20Sopenharmony_cistatic int check_stripe_cache(struct mddev *mddev)
80798c2ecf20Sopenharmony_ci{
80808c2ecf20Sopenharmony_ci	/* Can only proceed if there are plenty of stripe_heads.
80818c2ecf20Sopenharmony_ci	 * We need a minimum of one full stripe,, and for sensible progress
80828c2ecf20Sopenharmony_ci	 * it is best to have about 4 times that.
80838c2ecf20Sopenharmony_ci	 * If we require 4 times, then the default 256 4K stripe_heads will
80848c2ecf20Sopenharmony_ci	 * allow for chunk sizes up to 256K, which is probably OK.
80858c2ecf20Sopenharmony_ci	 * If the chunk size is greater, user-space should request more
80868c2ecf20Sopenharmony_ci	 * stripe_heads first.
80878c2ecf20Sopenharmony_ci	 */
80888c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
80898c2ecf20Sopenharmony_ci	if (((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4
80908c2ecf20Sopenharmony_ci	    > conf->min_nr_stripes ||
80918c2ecf20Sopenharmony_ci	    ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4
80928c2ecf20Sopenharmony_ci	    > conf->min_nr_stripes) {
80938c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: reshape: not enough stripes.  Needed %lu\n",
80948c2ecf20Sopenharmony_ci			mdname(mddev),
80958c2ecf20Sopenharmony_ci			((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
80968c2ecf20Sopenharmony_ci			 / RAID5_STRIPE_SIZE(conf))*4);
80978c2ecf20Sopenharmony_ci		return 0;
80988c2ecf20Sopenharmony_ci	}
80998c2ecf20Sopenharmony_ci	return 1;
81008c2ecf20Sopenharmony_ci}
81018c2ecf20Sopenharmony_ci
81028c2ecf20Sopenharmony_cistatic int check_reshape(struct mddev *mddev)
81038c2ecf20Sopenharmony_ci{
81048c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
81058c2ecf20Sopenharmony_ci
81068c2ecf20Sopenharmony_ci	if (raid5_has_log(conf) || raid5_has_ppl(conf))
81078c2ecf20Sopenharmony_ci		return -EINVAL;
81088c2ecf20Sopenharmony_ci	if (mddev->delta_disks == 0 &&
81098c2ecf20Sopenharmony_ci	    mddev->new_layout == mddev->layout &&
81108c2ecf20Sopenharmony_ci	    mddev->new_chunk_sectors == mddev->chunk_sectors)
81118c2ecf20Sopenharmony_ci		return 0; /* nothing to do */
81128c2ecf20Sopenharmony_ci	if (has_failed(conf))
81138c2ecf20Sopenharmony_ci		return -EINVAL;
81148c2ecf20Sopenharmony_ci	if (mddev->delta_disks < 0 && mddev->reshape_position == MaxSector) {
81158c2ecf20Sopenharmony_ci		/* We might be able to shrink, but the devices must
81168c2ecf20Sopenharmony_ci		 * be made bigger first.
81178c2ecf20Sopenharmony_ci		 * For raid6, 4 is the minimum size.
81188c2ecf20Sopenharmony_ci		 * Otherwise 2 is the minimum
81198c2ecf20Sopenharmony_ci		 */
81208c2ecf20Sopenharmony_ci		int min = 2;
81218c2ecf20Sopenharmony_ci		if (mddev->level == 6)
81228c2ecf20Sopenharmony_ci			min = 4;
81238c2ecf20Sopenharmony_ci		if (mddev->raid_disks + mddev->delta_disks < min)
81248c2ecf20Sopenharmony_ci			return -EINVAL;
81258c2ecf20Sopenharmony_ci	}
81268c2ecf20Sopenharmony_ci
81278c2ecf20Sopenharmony_ci	if (!check_stripe_cache(mddev))
81288c2ecf20Sopenharmony_ci		return -ENOSPC;
81298c2ecf20Sopenharmony_ci
81308c2ecf20Sopenharmony_ci	if (mddev->new_chunk_sectors > mddev->chunk_sectors ||
81318c2ecf20Sopenharmony_ci	    mddev->delta_disks > 0)
81328c2ecf20Sopenharmony_ci		if (resize_chunks(conf,
81338c2ecf20Sopenharmony_ci				  conf->previous_raid_disks
81348c2ecf20Sopenharmony_ci				  + max(0, mddev->delta_disks),
81358c2ecf20Sopenharmony_ci				  max(mddev->new_chunk_sectors,
81368c2ecf20Sopenharmony_ci				      mddev->chunk_sectors)
81378c2ecf20Sopenharmony_ci			    ) < 0)
81388c2ecf20Sopenharmony_ci			return -ENOMEM;
81398c2ecf20Sopenharmony_ci
81408c2ecf20Sopenharmony_ci	if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size)
81418c2ecf20Sopenharmony_ci		return 0; /* never bother to shrink */
81428c2ecf20Sopenharmony_ci	return resize_stripes(conf, (conf->previous_raid_disks
81438c2ecf20Sopenharmony_ci				     + mddev->delta_disks));
81448c2ecf20Sopenharmony_ci}
81458c2ecf20Sopenharmony_ci
81468c2ecf20Sopenharmony_cistatic int raid5_start_reshape(struct mddev *mddev)
81478c2ecf20Sopenharmony_ci{
81488c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
81498c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
81508c2ecf20Sopenharmony_ci	int spares = 0;
81518c2ecf20Sopenharmony_ci	unsigned long flags;
81528c2ecf20Sopenharmony_ci
81538c2ecf20Sopenharmony_ci	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
81548c2ecf20Sopenharmony_ci		return -EBUSY;
81558c2ecf20Sopenharmony_ci
81568c2ecf20Sopenharmony_ci	if (!check_stripe_cache(mddev))
81578c2ecf20Sopenharmony_ci		return -ENOSPC;
81588c2ecf20Sopenharmony_ci
81598c2ecf20Sopenharmony_ci	if (has_failed(conf))
81608c2ecf20Sopenharmony_ci		return -EINVAL;
81618c2ecf20Sopenharmony_ci
81628c2ecf20Sopenharmony_ci	rdev_for_each(rdev, mddev) {
81638c2ecf20Sopenharmony_ci		if (!test_bit(In_sync, &rdev->flags)
81648c2ecf20Sopenharmony_ci		    && !test_bit(Faulty, &rdev->flags))
81658c2ecf20Sopenharmony_ci			spares++;
81668c2ecf20Sopenharmony_ci	}
81678c2ecf20Sopenharmony_ci
81688c2ecf20Sopenharmony_ci	if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
81698c2ecf20Sopenharmony_ci		/* Not enough devices even to make a degraded array
81708c2ecf20Sopenharmony_ci		 * of that size
81718c2ecf20Sopenharmony_ci		 */
81728c2ecf20Sopenharmony_ci		return -EINVAL;
81738c2ecf20Sopenharmony_ci
81748c2ecf20Sopenharmony_ci	/* Refuse to reduce size of the array.  Any reductions in
81758c2ecf20Sopenharmony_ci	 * array size must be through explicit setting of array_size
81768c2ecf20Sopenharmony_ci	 * attribute.
81778c2ecf20Sopenharmony_ci	 */
81788c2ecf20Sopenharmony_ci	if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks)
81798c2ecf20Sopenharmony_ci	    < mddev->array_sectors) {
81808c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: array size must be reduced before number of disks\n",
81818c2ecf20Sopenharmony_ci			mdname(mddev));
81828c2ecf20Sopenharmony_ci		return -EINVAL;
81838c2ecf20Sopenharmony_ci	}
81848c2ecf20Sopenharmony_ci
81858c2ecf20Sopenharmony_ci	atomic_set(&conf->reshape_stripes, 0);
81868c2ecf20Sopenharmony_ci	spin_lock_irq(&conf->device_lock);
81878c2ecf20Sopenharmony_ci	write_seqcount_begin(&conf->gen_lock);
81888c2ecf20Sopenharmony_ci	conf->previous_raid_disks = conf->raid_disks;
81898c2ecf20Sopenharmony_ci	conf->raid_disks += mddev->delta_disks;
81908c2ecf20Sopenharmony_ci	conf->prev_chunk_sectors = conf->chunk_sectors;
81918c2ecf20Sopenharmony_ci	conf->chunk_sectors = mddev->new_chunk_sectors;
81928c2ecf20Sopenharmony_ci	conf->prev_algo = conf->algorithm;
81938c2ecf20Sopenharmony_ci	conf->algorithm = mddev->new_layout;
81948c2ecf20Sopenharmony_ci	conf->generation++;
81958c2ecf20Sopenharmony_ci	/* Code that selects data_offset needs to see the generation update
81968c2ecf20Sopenharmony_ci	 * if reshape_progress has been set - so a memory barrier needed.
81978c2ecf20Sopenharmony_ci	 */
81988c2ecf20Sopenharmony_ci	smp_mb();
81998c2ecf20Sopenharmony_ci	if (mddev->reshape_backwards)
82008c2ecf20Sopenharmony_ci		conf->reshape_progress = raid5_size(mddev, 0, 0);
82018c2ecf20Sopenharmony_ci	else
82028c2ecf20Sopenharmony_ci		conf->reshape_progress = 0;
82038c2ecf20Sopenharmony_ci	conf->reshape_safe = conf->reshape_progress;
82048c2ecf20Sopenharmony_ci	write_seqcount_end(&conf->gen_lock);
82058c2ecf20Sopenharmony_ci	spin_unlock_irq(&conf->device_lock);
82068c2ecf20Sopenharmony_ci
82078c2ecf20Sopenharmony_ci	/* Now make sure any requests that proceeded on the assumption
82088c2ecf20Sopenharmony_ci	 * the reshape wasn't running - like Discard or Read - have
82098c2ecf20Sopenharmony_ci	 * completed.
82108c2ecf20Sopenharmony_ci	 */
82118c2ecf20Sopenharmony_ci	mddev_suspend(mddev);
82128c2ecf20Sopenharmony_ci	mddev_resume(mddev);
82138c2ecf20Sopenharmony_ci
82148c2ecf20Sopenharmony_ci	/* Add some new drives, as many as will fit.
82158c2ecf20Sopenharmony_ci	 * We know there are enough to make the newly sized array work.
82168c2ecf20Sopenharmony_ci	 * Don't add devices if we are reducing the number of
82178c2ecf20Sopenharmony_ci	 * devices in the array.  This is because it is not possible
82188c2ecf20Sopenharmony_ci	 * to correctly record the "partially reconstructed" state of
82198c2ecf20Sopenharmony_ci	 * such devices during the reshape and confusion could result.
82208c2ecf20Sopenharmony_ci	 */
82218c2ecf20Sopenharmony_ci	if (mddev->delta_disks >= 0) {
82228c2ecf20Sopenharmony_ci		rdev_for_each(rdev, mddev)
82238c2ecf20Sopenharmony_ci			if (rdev->raid_disk < 0 &&
82248c2ecf20Sopenharmony_ci			    !test_bit(Faulty, &rdev->flags)) {
82258c2ecf20Sopenharmony_ci				if (raid5_add_disk(mddev, rdev) == 0) {
82268c2ecf20Sopenharmony_ci					if (rdev->raid_disk
82278c2ecf20Sopenharmony_ci					    >= conf->previous_raid_disks)
82288c2ecf20Sopenharmony_ci						set_bit(In_sync, &rdev->flags);
82298c2ecf20Sopenharmony_ci					else
82308c2ecf20Sopenharmony_ci						rdev->recovery_offset = 0;
82318c2ecf20Sopenharmony_ci
82328c2ecf20Sopenharmony_ci					/* Failure here is OK */
82338c2ecf20Sopenharmony_ci					sysfs_link_rdev(mddev, rdev);
82348c2ecf20Sopenharmony_ci				}
82358c2ecf20Sopenharmony_ci			} else if (rdev->raid_disk >= conf->previous_raid_disks
82368c2ecf20Sopenharmony_ci				   && !test_bit(Faulty, &rdev->flags)) {
82378c2ecf20Sopenharmony_ci				/* This is a spare that was manually added */
82388c2ecf20Sopenharmony_ci				set_bit(In_sync, &rdev->flags);
82398c2ecf20Sopenharmony_ci			}
82408c2ecf20Sopenharmony_ci
82418c2ecf20Sopenharmony_ci		/* When a reshape changes the number of devices,
82428c2ecf20Sopenharmony_ci		 * ->degraded is measured against the larger of the
82438c2ecf20Sopenharmony_ci		 * pre and post number of devices.
82448c2ecf20Sopenharmony_ci		 */
82458c2ecf20Sopenharmony_ci		spin_lock_irqsave(&conf->device_lock, flags);
82468c2ecf20Sopenharmony_ci		mddev->degraded = raid5_calc_degraded(conf);
82478c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&conf->device_lock, flags);
82488c2ecf20Sopenharmony_ci	}
82498c2ecf20Sopenharmony_ci	mddev->raid_disks = conf->raid_disks;
82508c2ecf20Sopenharmony_ci	mddev->reshape_position = conf->reshape_progress;
82518c2ecf20Sopenharmony_ci	set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
82528c2ecf20Sopenharmony_ci
82538c2ecf20Sopenharmony_ci	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
82548c2ecf20Sopenharmony_ci	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
82558c2ecf20Sopenharmony_ci	clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
82568c2ecf20Sopenharmony_ci	set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
82578c2ecf20Sopenharmony_ci	set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
82588c2ecf20Sopenharmony_ci	mddev->sync_thread = md_register_thread(md_do_sync, mddev,
82598c2ecf20Sopenharmony_ci						"reshape");
82608c2ecf20Sopenharmony_ci	if (!mddev->sync_thread) {
82618c2ecf20Sopenharmony_ci		mddev->recovery = 0;
82628c2ecf20Sopenharmony_ci		spin_lock_irq(&conf->device_lock);
82638c2ecf20Sopenharmony_ci		write_seqcount_begin(&conf->gen_lock);
82648c2ecf20Sopenharmony_ci		mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
82658c2ecf20Sopenharmony_ci		mddev->new_chunk_sectors =
82668c2ecf20Sopenharmony_ci			conf->chunk_sectors = conf->prev_chunk_sectors;
82678c2ecf20Sopenharmony_ci		mddev->new_layout = conf->algorithm = conf->prev_algo;
82688c2ecf20Sopenharmony_ci		rdev_for_each(rdev, mddev)
82698c2ecf20Sopenharmony_ci			rdev->new_data_offset = rdev->data_offset;
82708c2ecf20Sopenharmony_ci		smp_wmb();
82718c2ecf20Sopenharmony_ci		conf->generation --;
82728c2ecf20Sopenharmony_ci		conf->reshape_progress = MaxSector;
82738c2ecf20Sopenharmony_ci		mddev->reshape_position = MaxSector;
82748c2ecf20Sopenharmony_ci		write_seqcount_end(&conf->gen_lock);
82758c2ecf20Sopenharmony_ci		spin_unlock_irq(&conf->device_lock);
82768c2ecf20Sopenharmony_ci		return -EAGAIN;
82778c2ecf20Sopenharmony_ci	}
82788c2ecf20Sopenharmony_ci	conf->reshape_checkpoint = jiffies;
82798c2ecf20Sopenharmony_ci	md_wakeup_thread(mddev->sync_thread);
82808c2ecf20Sopenharmony_ci	md_new_event(mddev);
82818c2ecf20Sopenharmony_ci	return 0;
82828c2ecf20Sopenharmony_ci}
82838c2ecf20Sopenharmony_ci
82848c2ecf20Sopenharmony_ci/* This is called from the reshape thread and should make any
82858c2ecf20Sopenharmony_ci * changes needed in 'conf'
82868c2ecf20Sopenharmony_ci */
82878c2ecf20Sopenharmony_cistatic void end_reshape(struct r5conf *conf)
82888c2ecf20Sopenharmony_ci{
82898c2ecf20Sopenharmony_ci
82908c2ecf20Sopenharmony_ci	if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
82918c2ecf20Sopenharmony_ci		struct md_rdev *rdev;
82928c2ecf20Sopenharmony_ci
82938c2ecf20Sopenharmony_ci		spin_lock_irq(&conf->device_lock);
82948c2ecf20Sopenharmony_ci		conf->previous_raid_disks = conf->raid_disks;
82958c2ecf20Sopenharmony_ci		md_finish_reshape(conf->mddev);
82968c2ecf20Sopenharmony_ci		smp_wmb();
82978c2ecf20Sopenharmony_ci		conf->reshape_progress = MaxSector;
82988c2ecf20Sopenharmony_ci		conf->mddev->reshape_position = MaxSector;
82998c2ecf20Sopenharmony_ci		rdev_for_each(rdev, conf->mddev)
83008c2ecf20Sopenharmony_ci			if (rdev->raid_disk >= 0 &&
83018c2ecf20Sopenharmony_ci			    !test_bit(Journal, &rdev->flags) &&
83028c2ecf20Sopenharmony_ci			    !test_bit(In_sync, &rdev->flags))
83038c2ecf20Sopenharmony_ci				rdev->recovery_offset = MaxSector;
83048c2ecf20Sopenharmony_ci		spin_unlock_irq(&conf->device_lock);
83058c2ecf20Sopenharmony_ci		wake_up(&conf->wait_for_overlap);
83068c2ecf20Sopenharmony_ci
83078c2ecf20Sopenharmony_ci		if (conf->mddev->queue)
83088c2ecf20Sopenharmony_ci			raid5_set_io_opt(conf);
83098c2ecf20Sopenharmony_ci	}
83108c2ecf20Sopenharmony_ci}
83118c2ecf20Sopenharmony_ci
83128c2ecf20Sopenharmony_ci/* This is called from the raid5d thread with mddev_lock held.
83138c2ecf20Sopenharmony_ci * It makes config changes to the device.
83148c2ecf20Sopenharmony_ci */
83158c2ecf20Sopenharmony_cistatic void raid5_finish_reshape(struct mddev *mddev)
83168c2ecf20Sopenharmony_ci{
83178c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
83188c2ecf20Sopenharmony_ci
83198c2ecf20Sopenharmony_ci	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
83208c2ecf20Sopenharmony_ci
83218c2ecf20Sopenharmony_ci		if (mddev->delta_disks <= 0) {
83228c2ecf20Sopenharmony_ci			int d;
83238c2ecf20Sopenharmony_ci			spin_lock_irq(&conf->device_lock);
83248c2ecf20Sopenharmony_ci			mddev->degraded = raid5_calc_degraded(conf);
83258c2ecf20Sopenharmony_ci			spin_unlock_irq(&conf->device_lock);
83268c2ecf20Sopenharmony_ci			for (d = conf->raid_disks ;
83278c2ecf20Sopenharmony_ci			     d < conf->raid_disks - mddev->delta_disks;
83288c2ecf20Sopenharmony_ci			     d++) {
83298c2ecf20Sopenharmony_ci				struct md_rdev *rdev = conf->disks[d].rdev;
83308c2ecf20Sopenharmony_ci				if (rdev)
83318c2ecf20Sopenharmony_ci					clear_bit(In_sync, &rdev->flags);
83328c2ecf20Sopenharmony_ci				rdev = conf->disks[d].replacement;
83338c2ecf20Sopenharmony_ci				if (rdev)
83348c2ecf20Sopenharmony_ci					clear_bit(In_sync, &rdev->flags);
83358c2ecf20Sopenharmony_ci			}
83368c2ecf20Sopenharmony_ci		}
83378c2ecf20Sopenharmony_ci		mddev->layout = conf->algorithm;
83388c2ecf20Sopenharmony_ci		mddev->chunk_sectors = conf->chunk_sectors;
83398c2ecf20Sopenharmony_ci		mddev->reshape_position = MaxSector;
83408c2ecf20Sopenharmony_ci		mddev->delta_disks = 0;
83418c2ecf20Sopenharmony_ci		mddev->reshape_backwards = 0;
83428c2ecf20Sopenharmony_ci	}
83438c2ecf20Sopenharmony_ci}
83448c2ecf20Sopenharmony_ci
83458c2ecf20Sopenharmony_cistatic void raid5_quiesce(struct mddev *mddev, int quiesce)
83468c2ecf20Sopenharmony_ci{
83478c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
83488c2ecf20Sopenharmony_ci
83498c2ecf20Sopenharmony_ci	if (quiesce) {
83508c2ecf20Sopenharmony_ci		/* stop all writes */
83518c2ecf20Sopenharmony_ci		lock_all_device_hash_locks_irq(conf);
83528c2ecf20Sopenharmony_ci		/* '2' tells resync/reshape to pause so that all
83538c2ecf20Sopenharmony_ci		 * active stripes can drain
83548c2ecf20Sopenharmony_ci		 */
83558c2ecf20Sopenharmony_ci		r5c_flush_cache(conf, INT_MAX);
83568c2ecf20Sopenharmony_ci		conf->quiesce = 2;
83578c2ecf20Sopenharmony_ci		wait_event_cmd(conf->wait_for_quiescent,
83588c2ecf20Sopenharmony_ci				    atomic_read(&conf->active_stripes) == 0 &&
83598c2ecf20Sopenharmony_ci				    atomic_read(&conf->active_aligned_reads) == 0,
83608c2ecf20Sopenharmony_ci				    unlock_all_device_hash_locks_irq(conf),
83618c2ecf20Sopenharmony_ci				    lock_all_device_hash_locks_irq(conf));
83628c2ecf20Sopenharmony_ci		conf->quiesce = 1;
83638c2ecf20Sopenharmony_ci		unlock_all_device_hash_locks_irq(conf);
83648c2ecf20Sopenharmony_ci		/* allow reshape to continue */
83658c2ecf20Sopenharmony_ci		wake_up(&conf->wait_for_overlap);
83668c2ecf20Sopenharmony_ci	} else {
83678c2ecf20Sopenharmony_ci		/* re-enable writes */
83688c2ecf20Sopenharmony_ci		lock_all_device_hash_locks_irq(conf);
83698c2ecf20Sopenharmony_ci		conf->quiesce = 0;
83708c2ecf20Sopenharmony_ci		wake_up(&conf->wait_for_quiescent);
83718c2ecf20Sopenharmony_ci		wake_up(&conf->wait_for_overlap);
83728c2ecf20Sopenharmony_ci		unlock_all_device_hash_locks_irq(conf);
83738c2ecf20Sopenharmony_ci	}
83748c2ecf20Sopenharmony_ci	log_quiesce(conf, quiesce);
83758c2ecf20Sopenharmony_ci}
83768c2ecf20Sopenharmony_ci
83778c2ecf20Sopenharmony_cistatic void *raid45_takeover_raid0(struct mddev *mddev, int level)
83788c2ecf20Sopenharmony_ci{
83798c2ecf20Sopenharmony_ci	struct r0conf *raid0_conf = mddev->private;
83808c2ecf20Sopenharmony_ci	sector_t sectors;
83818c2ecf20Sopenharmony_ci
83828c2ecf20Sopenharmony_ci	/* for raid0 takeover only one zone is supported */
83838c2ecf20Sopenharmony_ci	if (raid0_conf->nr_strip_zones > 1) {
83848c2ecf20Sopenharmony_ci		pr_warn("md/raid:%s: cannot takeover raid0 with more than one zone.\n",
83858c2ecf20Sopenharmony_ci			mdname(mddev));
83868c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
83878c2ecf20Sopenharmony_ci	}
83888c2ecf20Sopenharmony_ci
83898c2ecf20Sopenharmony_ci	sectors = raid0_conf->strip_zone[0].zone_end;
83908c2ecf20Sopenharmony_ci	sector_div(sectors, raid0_conf->strip_zone[0].nb_dev);
83918c2ecf20Sopenharmony_ci	mddev->dev_sectors = sectors;
83928c2ecf20Sopenharmony_ci	mddev->new_level = level;
83938c2ecf20Sopenharmony_ci	mddev->new_layout = ALGORITHM_PARITY_N;
83948c2ecf20Sopenharmony_ci	mddev->new_chunk_sectors = mddev->chunk_sectors;
83958c2ecf20Sopenharmony_ci	mddev->raid_disks += 1;
83968c2ecf20Sopenharmony_ci	mddev->delta_disks = 1;
83978c2ecf20Sopenharmony_ci	/* make sure it will be not marked as dirty */
83988c2ecf20Sopenharmony_ci	mddev->recovery_cp = MaxSector;
83998c2ecf20Sopenharmony_ci
84008c2ecf20Sopenharmony_ci	return setup_conf(mddev);
84018c2ecf20Sopenharmony_ci}
84028c2ecf20Sopenharmony_ci
84038c2ecf20Sopenharmony_cistatic void *raid5_takeover_raid1(struct mddev *mddev)
84048c2ecf20Sopenharmony_ci{
84058c2ecf20Sopenharmony_ci	int chunksect;
84068c2ecf20Sopenharmony_ci	void *ret;
84078c2ecf20Sopenharmony_ci
84088c2ecf20Sopenharmony_ci	if (mddev->raid_disks != 2 ||
84098c2ecf20Sopenharmony_ci	    mddev->degraded > 1)
84108c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
84118c2ecf20Sopenharmony_ci
84128c2ecf20Sopenharmony_ci	/* Should check if there are write-behind devices? */
84138c2ecf20Sopenharmony_ci
84148c2ecf20Sopenharmony_ci	chunksect = 64*2; /* 64K by default */
84158c2ecf20Sopenharmony_ci
84168c2ecf20Sopenharmony_ci	/* The array must be an exact multiple of chunksize */
84178c2ecf20Sopenharmony_ci	while (chunksect && (mddev->array_sectors & (chunksect-1)))
84188c2ecf20Sopenharmony_ci		chunksect >>= 1;
84198c2ecf20Sopenharmony_ci
84208c2ecf20Sopenharmony_ci	if ((chunksect<<9) < RAID5_STRIPE_SIZE((struct r5conf *)mddev->private))
84218c2ecf20Sopenharmony_ci		/* array size does not allow a suitable chunk size */
84228c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
84238c2ecf20Sopenharmony_ci
84248c2ecf20Sopenharmony_ci	mddev->new_level = 5;
84258c2ecf20Sopenharmony_ci	mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC;
84268c2ecf20Sopenharmony_ci	mddev->new_chunk_sectors = chunksect;
84278c2ecf20Sopenharmony_ci
84288c2ecf20Sopenharmony_ci	ret = setup_conf(mddev);
84298c2ecf20Sopenharmony_ci	if (!IS_ERR(ret))
84308c2ecf20Sopenharmony_ci		mddev_clear_unsupported_flags(mddev,
84318c2ecf20Sopenharmony_ci			UNSUPPORTED_MDDEV_FLAGS);
84328c2ecf20Sopenharmony_ci	return ret;
84338c2ecf20Sopenharmony_ci}
84348c2ecf20Sopenharmony_ci
84358c2ecf20Sopenharmony_cistatic void *raid5_takeover_raid6(struct mddev *mddev)
84368c2ecf20Sopenharmony_ci{
84378c2ecf20Sopenharmony_ci	int new_layout;
84388c2ecf20Sopenharmony_ci
84398c2ecf20Sopenharmony_ci	switch (mddev->layout) {
84408c2ecf20Sopenharmony_ci	case ALGORITHM_LEFT_ASYMMETRIC_6:
84418c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_LEFT_ASYMMETRIC;
84428c2ecf20Sopenharmony_ci		break;
84438c2ecf20Sopenharmony_ci	case ALGORITHM_RIGHT_ASYMMETRIC_6:
84448c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_RIGHT_ASYMMETRIC;
84458c2ecf20Sopenharmony_ci		break;
84468c2ecf20Sopenharmony_ci	case ALGORITHM_LEFT_SYMMETRIC_6:
84478c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_LEFT_SYMMETRIC;
84488c2ecf20Sopenharmony_ci		break;
84498c2ecf20Sopenharmony_ci	case ALGORITHM_RIGHT_SYMMETRIC_6:
84508c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_RIGHT_SYMMETRIC;
84518c2ecf20Sopenharmony_ci		break;
84528c2ecf20Sopenharmony_ci	case ALGORITHM_PARITY_0_6:
84538c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_PARITY_0;
84548c2ecf20Sopenharmony_ci		break;
84558c2ecf20Sopenharmony_ci	case ALGORITHM_PARITY_N:
84568c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_PARITY_N;
84578c2ecf20Sopenharmony_ci		break;
84588c2ecf20Sopenharmony_ci	default:
84598c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
84608c2ecf20Sopenharmony_ci	}
84618c2ecf20Sopenharmony_ci	mddev->new_level = 5;
84628c2ecf20Sopenharmony_ci	mddev->new_layout = new_layout;
84638c2ecf20Sopenharmony_ci	mddev->delta_disks = -1;
84648c2ecf20Sopenharmony_ci	mddev->raid_disks -= 1;
84658c2ecf20Sopenharmony_ci	return setup_conf(mddev);
84668c2ecf20Sopenharmony_ci}
84678c2ecf20Sopenharmony_ci
84688c2ecf20Sopenharmony_cistatic int raid5_check_reshape(struct mddev *mddev)
84698c2ecf20Sopenharmony_ci{
84708c2ecf20Sopenharmony_ci	/* For a 2-drive array, the layout and chunk size can be changed
84718c2ecf20Sopenharmony_ci	 * immediately as not restriping is needed.
84728c2ecf20Sopenharmony_ci	 * For larger arrays we record the new value - after validation
84738c2ecf20Sopenharmony_ci	 * to be used by a reshape pass.
84748c2ecf20Sopenharmony_ci	 */
84758c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
84768c2ecf20Sopenharmony_ci	int new_chunk = mddev->new_chunk_sectors;
84778c2ecf20Sopenharmony_ci
84788c2ecf20Sopenharmony_ci	if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout))
84798c2ecf20Sopenharmony_ci		return -EINVAL;
84808c2ecf20Sopenharmony_ci	if (new_chunk > 0) {
84818c2ecf20Sopenharmony_ci		if (!is_power_of_2(new_chunk))
84828c2ecf20Sopenharmony_ci			return -EINVAL;
84838c2ecf20Sopenharmony_ci		if (new_chunk < (PAGE_SIZE>>9))
84848c2ecf20Sopenharmony_ci			return -EINVAL;
84858c2ecf20Sopenharmony_ci		if (mddev->array_sectors & (new_chunk-1))
84868c2ecf20Sopenharmony_ci			/* not factor of array size */
84878c2ecf20Sopenharmony_ci			return -EINVAL;
84888c2ecf20Sopenharmony_ci	}
84898c2ecf20Sopenharmony_ci
84908c2ecf20Sopenharmony_ci	/* They look valid */
84918c2ecf20Sopenharmony_ci
84928c2ecf20Sopenharmony_ci	if (mddev->raid_disks == 2) {
84938c2ecf20Sopenharmony_ci		/* can make the change immediately */
84948c2ecf20Sopenharmony_ci		if (mddev->new_layout >= 0) {
84958c2ecf20Sopenharmony_ci			conf->algorithm = mddev->new_layout;
84968c2ecf20Sopenharmony_ci			mddev->layout = mddev->new_layout;
84978c2ecf20Sopenharmony_ci		}
84988c2ecf20Sopenharmony_ci		if (new_chunk > 0) {
84998c2ecf20Sopenharmony_ci			conf->chunk_sectors = new_chunk ;
85008c2ecf20Sopenharmony_ci			mddev->chunk_sectors = new_chunk;
85018c2ecf20Sopenharmony_ci		}
85028c2ecf20Sopenharmony_ci		set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
85038c2ecf20Sopenharmony_ci		md_wakeup_thread(mddev->thread);
85048c2ecf20Sopenharmony_ci	}
85058c2ecf20Sopenharmony_ci	return check_reshape(mddev);
85068c2ecf20Sopenharmony_ci}
85078c2ecf20Sopenharmony_ci
85088c2ecf20Sopenharmony_cistatic int raid6_check_reshape(struct mddev *mddev)
85098c2ecf20Sopenharmony_ci{
85108c2ecf20Sopenharmony_ci	int new_chunk = mddev->new_chunk_sectors;
85118c2ecf20Sopenharmony_ci
85128c2ecf20Sopenharmony_ci	if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout))
85138c2ecf20Sopenharmony_ci		return -EINVAL;
85148c2ecf20Sopenharmony_ci	if (new_chunk > 0) {
85158c2ecf20Sopenharmony_ci		if (!is_power_of_2(new_chunk))
85168c2ecf20Sopenharmony_ci			return -EINVAL;
85178c2ecf20Sopenharmony_ci		if (new_chunk < (PAGE_SIZE >> 9))
85188c2ecf20Sopenharmony_ci			return -EINVAL;
85198c2ecf20Sopenharmony_ci		if (mddev->array_sectors & (new_chunk-1))
85208c2ecf20Sopenharmony_ci			/* not factor of array size */
85218c2ecf20Sopenharmony_ci			return -EINVAL;
85228c2ecf20Sopenharmony_ci	}
85238c2ecf20Sopenharmony_ci
85248c2ecf20Sopenharmony_ci	/* They look valid */
85258c2ecf20Sopenharmony_ci	return check_reshape(mddev);
85268c2ecf20Sopenharmony_ci}
85278c2ecf20Sopenharmony_ci
85288c2ecf20Sopenharmony_cistatic void *raid5_takeover(struct mddev *mddev)
85298c2ecf20Sopenharmony_ci{
85308c2ecf20Sopenharmony_ci	/* raid5 can take over:
85318c2ecf20Sopenharmony_ci	 *  raid0 - if there is only one strip zone - make it a raid4 layout
85328c2ecf20Sopenharmony_ci	 *  raid1 - if there are two drives.  We need to know the chunk size
85338c2ecf20Sopenharmony_ci	 *  raid4 - trivial - just use a raid4 layout.
85348c2ecf20Sopenharmony_ci	 *  raid6 - Providing it is a *_6 layout
85358c2ecf20Sopenharmony_ci	 */
85368c2ecf20Sopenharmony_ci	if (mddev->level == 0)
85378c2ecf20Sopenharmony_ci		return raid45_takeover_raid0(mddev, 5);
85388c2ecf20Sopenharmony_ci	if (mddev->level == 1)
85398c2ecf20Sopenharmony_ci		return raid5_takeover_raid1(mddev);
85408c2ecf20Sopenharmony_ci	if (mddev->level == 4) {
85418c2ecf20Sopenharmony_ci		mddev->new_layout = ALGORITHM_PARITY_N;
85428c2ecf20Sopenharmony_ci		mddev->new_level = 5;
85438c2ecf20Sopenharmony_ci		return setup_conf(mddev);
85448c2ecf20Sopenharmony_ci	}
85458c2ecf20Sopenharmony_ci	if (mddev->level == 6)
85468c2ecf20Sopenharmony_ci		return raid5_takeover_raid6(mddev);
85478c2ecf20Sopenharmony_ci
85488c2ecf20Sopenharmony_ci	return ERR_PTR(-EINVAL);
85498c2ecf20Sopenharmony_ci}
85508c2ecf20Sopenharmony_ci
85518c2ecf20Sopenharmony_cistatic void *raid4_takeover(struct mddev *mddev)
85528c2ecf20Sopenharmony_ci{
85538c2ecf20Sopenharmony_ci	/* raid4 can take over:
85548c2ecf20Sopenharmony_ci	 *  raid0 - if there is only one strip zone
85558c2ecf20Sopenharmony_ci	 *  raid5 - if layout is right
85568c2ecf20Sopenharmony_ci	 */
85578c2ecf20Sopenharmony_ci	if (mddev->level == 0)
85588c2ecf20Sopenharmony_ci		return raid45_takeover_raid0(mddev, 4);
85598c2ecf20Sopenharmony_ci	if (mddev->level == 5 &&
85608c2ecf20Sopenharmony_ci	    mddev->layout == ALGORITHM_PARITY_N) {
85618c2ecf20Sopenharmony_ci		mddev->new_layout = 0;
85628c2ecf20Sopenharmony_ci		mddev->new_level = 4;
85638c2ecf20Sopenharmony_ci		return setup_conf(mddev);
85648c2ecf20Sopenharmony_ci	}
85658c2ecf20Sopenharmony_ci	return ERR_PTR(-EINVAL);
85668c2ecf20Sopenharmony_ci}
85678c2ecf20Sopenharmony_ci
85688c2ecf20Sopenharmony_cistatic struct md_personality raid5_personality;
85698c2ecf20Sopenharmony_ci
85708c2ecf20Sopenharmony_cistatic void *raid6_takeover(struct mddev *mddev)
85718c2ecf20Sopenharmony_ci{
85728c2ecf20Sopenharmony_ci	/* Currently can only take over a raid5.  We map the
85738c2ecf20Sopenharmony_ci	 * personality to an equivalent raid6 personality
85748c2ecf20Sopenharmony_ci	 * with the Q block at the end.
85758c2ecf20Sopenharmony_ci	 */
85768c2ecf20Sopenharmony_ci	int new_layout;
85778c2ecf20Sopenharmony_ci
85788c2ecf20Sopenharmony_ci	if (mddev->pers != &raid5_personality)
85798c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
85808c2ecf20Sopenharmony_ci	if (mddev->degraded > 1)
85818c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
85828c2ecf20Sopenharmony_ci	if (mddev->raid_disks > 253)
85838c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
85848c2ecf20Sopenharmony_ci	if (mddev->raid_disks < 3)
85858c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
85868c2ecf20Sopenharmony_ci
85878c2ecf20Sopenharmony_ci	switch (mddev->layout) {
85888c2ecf20Sopenharmony_ci	case ALGORITHM_LEFT_ASYMMETRIC:
85898c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_LEFT_ASYMMETRIC_6;
85908c2ecf20Sopenharmony_ci		break;
85918c2ecf20Sopenharmony_ci	case ALGORITHM_RIGHT_ASYMMETRIC:
85928c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_RIGHT_ASYMMETRIC_6;
85938c2ecf20Sopenharmony_ci		break;
85948c2ecf20Sopenharmony_ci	case ALGORITHM_LEFT_SYMMETRIC:
85958c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_LEFT_SYMMETRIC_6;
85968c2ecf20Sopenharmony_ci		break;
85978c2ecf20Sopenharmony_ci	case ALGORITHM_RIGHT_SYMMETRIC:
85988c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_RIGHT_SYMMETRIC_6;
85998c2ecf20Sopenharmony_ci		break;
86008c2ecf20Sopenharmony_ci	case ALGORITHM_PARITY_0:
86018c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_PARITY_0_6;
86028c2ecf20Sopenharmony_ci		break;
86038c2ecf20Sopenharmony_ci	case ALGORITHM_PARITY_N:
86048c2ecf20Sopenharmony_ci		new_layout = ALGORITHM_PARITY_N;
86058c2ecf20Sopenharmony_ci		break;
86068c2ecf20Sopenharmony_ci	default:
86078c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
86088c2ecf20Sopenharmony_ci	}
86098c2ecf20Sopenharmony_ci	mddev->new_level = 6;
86108c2ecf20Sopenharmony_ci	mddev->new_layout = new_layout;
86118c2ecf20Sopenharmony_ci	mddev->delta_disks = 1;
86128c2ecf20Sopenharmony_ci	mddev->raid_disks += 1;
86138c2ecf20Sopenharmony_ci	return setup_conf(mddev);
86148c2ecf20Sopenharmony_ci}
86158c2ecf20Sopenharmony_ci
86168c2ecf20Sopenharmony_cistatic int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
86178c2ecf20Sopenharmony_ci{
86188c2ecf20Sopenharmony_ci	struct r5conf *conf;
86198c2ecf20Sopenharmony_ci	int err;
86208c2ecf20Sopenharmony_ci
86218c2ecf20Sopenharmony_ci	err = mddev_lock(mddev);
86228c2ecf20Sopenharmony_ci	if (err)
86238c2ecf20Sopenharmony_ci		return err;
86248c2ecf20Sopenharmony_ci	conf = mddev->private;
86258c2ecf20Sopenharmony_ci	if (!conf) {
86268c2ecf20Sopenharmony_ci		mddev_unlock(mddev);
86278c2ecf20Sopenharmony_ci		return -ENODEV;
86288c2ecf20Sopenharmony_ci	}
86298c2ecf20Sopenharmony_ci
86308c2ecf20Sopenharmony_ci	if (strncmp(buf, "ppl", 3) == 0) {
86318c2ecf20Sopenharmony_ci		/* ppl only works with RAID 5 */
86328c2ecf20Sopenharmony_ci		if (!raid5_has_ppl(conf) && conf->level == 5) {
86338c2ecf20Sopenharmony_ci			err = log_init(conf, NULL, true);
86348c2ecf20Sopenharmony_ci			if (!err) {
86358c2ecf20Sopenharmony_ci				err = resize_stripes(conf, conf->pool_size);
86368c2ecf20Sopenharmony_ci				if (err)
86378c2ecf20Sopenharmony_ci					log_exit(conf);
86388c2ecf20Sopenharmony_ci			}
86398c2ecf20Sopenharmony_ci		} else
86408c2ecf20Sopenharmony_ci			err = -EINVAL;
86418c2ecf20Sopenharmony_ci	} else if (strncmp(buf, "resync", 6) == 0) {
86428c2ecf20Sopenharmony_ci		if (raid5_has_ppl(conf)) {
86438c2ecf20Sopenharmony_ci			mddev_suspend(mddev);
86448c2ecf20Sopenharmony_ci			log_exit(conf);
86458c2ecf20Sopenharmony_ci			mddev_resume(mddev);
86468c2ecf20Sopenharmony_ci			err = resize_stripes(conf, conf->pool_size);
86478c2ecf20Sopenharmony_ci		} else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) &&
86488c2ecf20Sopenharmony_ci			   r5l_log_disk_error(conf)) {
86498c2ecf20Sopenharmony_ci			bool journal_dev_exists = false;
86508c2ecf20Sopenharmony_ci			struct md_rdev *rdev;
86518c2ecf20Sopenharmony_ci
86528c2ecf20Sopenharmony_ci			rdev_for_each(rdev, mddev)
86538c2ecf20Sopenharmony_ci				if (test_bit(Journal, &rdev->flags)) {
86548c2ecf20Sopenharmony_ci					journal_dev_exists = true;
86558c2ecf20Sopenharmony_ci					break;
86568c2ecf20Sopenharmony_ci				}
86578c2ecf20Sopenharmony_ci
86588c2ecf20Sopenharmony_ci			if (!journal_dev_exists) {
86598c2ecf20Sopenharmony_ci				mddev_suspend(mddev);
86608c2ecf20Sopenharmony_ci				clear_bit(MD_HAS_JOURNAL, &mddev->flags);
86618c2ecf20Sopenharmony_ci				mddev_resume(mddev);
86628c2ecf20Sopenharmony_ci			} else  /* need remove journal device first */
86638c2ecf20Sopenharmony_ci				err = -EBUSY;
86648c2ecf20Sopenharmony_ci		} else
86658c2ecf20Sopenharmony_ci			err = -EINVAL;
86668c2ecf20Sopenharmony_ci	} else {
86678c2ecf20Sopenharmony_ci		err = -EINVAL;
86688c2ecf20Sopenharmony_ci	}
86698c2ecf20Sopenharmony_ci
86708c2ecf20Sopenharmony_ci	if (!err)
86718c2ecf20Sopenharmony_ci		md_update_sb(mddev, 1);
86728c2ecf20Sopenharmony_ci
86738c2ecf20Sopenharmony_ci	mddev_unlock(mddev);
86748c2ecf20Sopenharmony_ci
86758c2ecf20Sopenharmony_ci	return err;
86768c2ecf20Sopenharmony_ci}
86778c2ecf20Sopenharmony_ci
86788c2ecf20Sopenharmony_cistatic int raid5_start(struct mddev *mddev)
86798c2ecf20Sopenharmony_ci{
86808c2ecf20Sopenharmony_ci	struct r5conf *conf = mddev->private;
86818c2ecf20Sopenharmony_ci
86828c2ecf20Sopenharmony_ci	return r5l_start(conf->log);
86838c2ecf20Sopenharmony_ci}
86848c2ecf20Sopenharmony_ci
86858c2ecf20Sopenharmony_cistatic struct md_personality raid6_personality =
86868c2ecf20Sopenharmony_ci{
86878c2ecf20Sopenharmony_ci	.name		= "raid6",
86888c2ecf20Sopenharmony_ci	.level		= 6,
86898c2ecf20Sopenharmony_ci	.owner		= THIS_MODULE,
86908c2ecf20Sopenharmony_ci	.make_request	= raid5_make_request,
86918c2ecf20Sopenharmony_ci	.run		= raid5_run,
86928c2ecf20Sopenharmony_ci	.start		= raid5_start,
86938c2ecf20Sopenharmony_ci	.free		= raid5_free,
86948c2ecf20Sopenharmony_ci	.status		= raid5_status,
86958c2ecf20Sopenharmony_ci	.error_handler	= raid5_error,
86968c2ecf20Sopenharmony_ci	.hot_add_disk	= raid5_add_disk,
86978c2ecf20Sopenharmony_ci	.hot_remove_disk= raid5_remove_disk,
86988c2ecf20Sopenharmony_ci	.spare_active	= raid5_spare_active,
86998c2ecf20Sopenharmony_ci	.sync_request	= raid5_sync_request,
87008c2ecf20Sopenharmony_ci	.resize		= raid5_resize,
87018c2ecf20Sopenharmony_ci	.size		= raid5_size,
87028c2ecf20Sopenharmony_ci	.check_reshape	= raid6_check_reshape,
87038c2ecf20Sopenharmony_ci	.start_reshape  = raid5_start_reshape,
87048c2ecf20Sopenharmony_ci	.finish_reshape = raid5_finish_reshape,
87058c2ecf20Sopenharmony_ci	.quiesce	= raid5_quiesce,
87068c2ecf20Sopenharmony_ci	.takeover	= raid6_takeover,
87078c2ecf20Sopenharmony_ci	.change_consistency_policy = raid5_change_consistency_policy,
87088c2ecf20Sopenharmony_ci};
87098c2ecf20Sopenharmony_cistatic struct md_personality raid5_personality =
87108c2ecf20Sopenharmony_ci{
87118c2ecf20Sopenharmony_ci	.name		= "raid5",
87128c2ecf20Sopenharmony_ci	.level		= 5,
87138c2ecf20Sopenharmony_ci	.owner		= THIS_MODULE,
87148c2ecf20Sopenharmony_ci	.make_request	= raid5_make_request,
87158c2ecf20Sopenharmony_ci	.run		= raid5_run,
87168c2ecf20Sopenharmony_ci	.start		= raid5_start,
87178c2ecf20Sopenharmony_ci	.free		= raid5_free,
87188c2ecf20Sopenharmony_ci	.status		= raid5_status,
87198c2ecf20Sopenharmony_ci	.error_handler	= raid5_error,
87208c2ecf20Sopenharmony_ci	.hot_add_disk	= raid5_add_disk,
87218c2ecf20Sopenharmony_ci	.hot_remove_disk= raid5_remove_disk,
87228c2ecf20Sopenharmony_ci	.spare_active	= raid5_spare_active,
87238c2ecf20Sopenharmony_ci	.sync_request	= raid5_sync_request,
87248c2ecf20Sopenharmony_ci	.resize		= raid5_resize,
87258c2ecf20Sopenharmony_ci	.size		= raid5_size,
87268c2ecf20Sopenharmony_ci	.check_reshape	= raid5_check_reshape,
87278c2ecf20Sopenharmony_ci	.start_reshape  = raid5_start_reshape,
87288c2ecf20Sopenharmony_ci	.finish_reshape = raid5_finish_reshape,
87298c2ecf20Sopenharmony_ci	.quiesce	= raid5_quiesce,
87308c2ecf20Sopenharmony_ci	.takeover	= raid5_takeover,
87318c2ecf20Sopenharmony_ci	.change_consistency_policy = raid5_change_consistency_policy,
87328c2ecf20Sopenharmony_ci};
87338c2ecf20Sopenharmony_ci
87348c2ecf20Sopenharmony_cistatic struct md_personality raid4_personality =
87358c2ecf20Sopenharmony_ci{
87368c2ecf20Sopenharmony_ci	.name		= "raid4",
87378c2ecf20Sopenharmony_ci	.level		= 4,
87388c2ecf20Sopenharmony_ci	.owner		= THIS_MODULE,
87398c2ecf20Sopenharmony_ci	.make_request	= raid5_make_request,
87408c2ecf20Sopenharmony_ci	.run		= raid5_run,
87418c2ecf20Sopenharmony_ci	.start		= raid5_start,
87428c2ecf20Sopenharmony_ci	.free		= raid5_free,
87438c2ecf20Sopenharmony_ci	.status		= raid5_status,
87448c2ecf20Sopenharmony_ci	.error_handler	= raid5_error,
87458c2ecf20Sopenharmony_ci	.hot_add_disk	= raid5_add_disk,
87468c2ecf20Sopenharmony_ci	.hot_remove_disk= raid5_remove_disk,
87478c2ecf20Sopenharmony_ci	.spare_active	= raid5_spare_active,
87488c2ecf20Sopenharmony_ci	.sync_request	= raid5_sync_request,
87498c2ecf20Sopenharmony_ci	.resize		= raid5_resize,
87508c2ecf20Sopenharmony_ci	.size		= raid5_size,
87518c2ecf20Sopenharmony_ci	.check_reshape	= raid5_check_reshape,
87528c2ecf20Sopenharmony_ci	.start_reshape  = raid5_start_reshape,
87538c2ecf20Sopenharmony_ci	.finish_reshape = raid5_finish_reshape,
87548c2ecf20Sopenharmony_ci	.quiesce	= raid5_quiesce,
87558c2ecf20Sopenharmony_ci	.takeover	= raid4_takeover,
87568c2ecf20Sopenharmony_ci	.change_consistency_policy = raid5_change_consistency_policy,
87578c2ecf20Sopenharmony_ci};
87588c2ecf20Sopenharmony_ci
87598c2ecf20Sopenharmony_cistatic int __init raid5_init(void)
87608c2ecf20Sopenharmony_ci{
87618c2ecf20Sopenharmony_ci	int ret;
87628c2ecf20Sopenharmony_ci
87638c2ecf20Sopenharmony_ci	raid5_wq = alloc_workqueue("raid5wq",
87648c2ecf20Sopenharmony_ci		WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0);
87658c2ecf20Sopenharmony_ci	if (!raid5_wq)
87668c2ecf20Sopenharmony_ci		return -ENOMEM;
87678c2ecf20Sopenharmony_ci
87688c2ecf20Sopenharmony_ci	ret = cpuhp_setup_state_multi(CPUHP_MD_RAID5_PREPARE,
87698c2ecf20Sopenharmony_ci				      "md/raid5:prepare",
87708c2ecf20Sopenharmony_ci				      raid456_cpu_up_prepare,
87718c2ecf20Sopenharmony_ci				      raid456_cpu_dead);
87728c2ecf20Sopenharmony_ci	if (ret) {
87738c2ecf20Sopenharmony_ci		destroy_workqueue(raid5_wq);
87748c2ecf20Sopenharmony_ci		return ret;
87758c2ecf20Sopenharmony_ci	}
87768c2ecf20Sopenharmony_ci	register_md_personality(&raid6_personality);
87778c2ecf20Sopenharmony_ci	register_md_personality(&raid5_personality);
87788c2ecf20Sopenharmony_ci	register_md_personality(&raid4_personality);
87798c2ecf20Sopenharmony_ci	return 0;
87808c2ecf20Sopenharmony_ci}
87818c2ecf20Sopenharmony_ci
87828c2ecf20Sopenharmony_cistatic void raid5_exit(void)
87838c2ecf20Sopenharmony_ci{
87848c2ecf20Sopenharmony_ci	unregister_md_personality(&raid6_personality);
87858c2ecf20Sopenharmony_ci	unregister_md_personality(&raid5_personality);
87868c2ecf20Sopenharmony_ci	unregister_md_personality(&raid4_personality);
87878c2ecf20Sopenharmony_ci	cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE);
87888c2ecf20Sopenharmony_ci	destroy_workqueue(raid5_wq);
87898c2ecf20Sopenharmony_ci}
87908c2ecf20Sopenharmony_ci
87918c2ecf20Sopenharmony_cimodule_init(raid5_init);
87928c2ecf20Sopenharmony_cimodule_exit(raid5_exit);
87938c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
87948c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("RAID4/5/6 (striping with parity) personality for MD");
87958c2ecf20Sopenharmony_ciMODULE_ALIAS("md-personality-4"); /* RAID5 */
87968c2ecf20Sopenharmony_ciMODULE_ALIAS("md-raid5");
87978c2ecf20Sopenharmony_ciMODULE_ALIAS("md-raid4");
87988c2ecf20Sopenharmony_ciMODULE_ALIAS("md-level-5");
87998c2ecf20Sopenharmony_ciMODULE_ALIAS("md-level-4");
88008c2ecf20Sopenharmony_ciMODULE_ALIAS("md-personality-8"); /* RAID6 */
88018c2ecf20Sopenharmony_ciMODULE_ALIAS("md-raid6");
88028c2ecf20Sopenharmony_ciMODULE_ALIAS("md-level-6");
88038c2ecf20Sopenharmony_ci
88048c2ecf20Sopenharmony_ci/* This used to be two separate modules, they were: */
88058c2ecf20Sopenharmony_ciMODULE_ALIAS("raid5");
88068c2ecf20Sopenharmony_ciMODULE_ALIAS("raid6");
8807