18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (C) 2010-2011 Neil Brown
38c2ecf20Sopenharmony_ci * Copyright (C) 2010-2018 Red Hat, Inc. All rights reserved.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This file is released under the GPL.
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/slab.h>
98c2ecf20Sopenharmony_ci#include <linux/module.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include "md.h"
128c2ecf20Sopenharmony_ci#include "raid1.h"
138c2ecf20Sopenharmony_ci#include "raid5.h"
148c2ecf20Sopenharmony_ci#include "raid10.h"
158c2ecf20Sopenharmony_ci#include "md-bitmap.h"
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci#include <linux/device-mapper.h>
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#define DM_MSG_PREFIX "raid"
208c2ecf20Sopenharmony_ci#define	MAX_RAID_DEVICES	253 /* md-raid kernel limit */
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci/*
238c2ecf20Sopenharmony_ci * Minimum sectors of free reshape space per raid device
248c2ecf20Sopenharmony_ci */
258c2ecf20Sopenharmony_ci#define	MIN_FREE_RESHAPE_SPACE to_sector(4*4096)
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci/*
288c2ecf20Sopenharmony_ci * Minimum journal space 4 MiB in sectors.
298c2ecf20Sopenharmony_ci */
308c2ecf20Sopenharmony_ci#define	MIN_RAID456_JOURNAL_SPACE (4*2048)
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_cistatic bool devices_handle_discard_safely = false;
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci/*
358c2ecf20Sopenharmony_ci * The following flags are used by dm-raid.c to set up the array state.
368c2ecf20Sopenharmony_ci * They must be cleared before md_run is called.
378c2ecf20Sopenharmony_ci */
388c2ecf20Sopenharmony_ci#define FirstUse 10		/* rdev flag */
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_cistruct raid_dev {
418c2ecf20Sopenharmony_ci	/*
428c2ecf20Sopenharmony_ci	 * Two DM devices, one to hold metadata and one to hold the
438c2ecf20Sopenharmony_ci	 * actual data/parity.	The reason for this is to not confuse
448c2ecf20Sopenharmony_ci	 * ti->len and give more flexibility in altering size and
458c2ecf20Sopenharmony_ci	 * characteristics.
468c2ecf20Sopenharmony_ci	 *
478c2ecf20Sopenharmony_ci	 * While it is possible for this device to be associated
488c2ecf20Sopenharmony_ci	 * with a different physical device than the data_dev, it
498c2ecf20Sopenharmony_ci	 * is intended for it to be the same.
508c2ecf20Sopenharmony_ci	 *    |--------- Physical Device ---------|
518c2ecf20Sopenharmony_ci	 *    |- meta_dev -|------ data_dev ------|
528c2ecf20Sopenharmony_ci	 */
538c2ecf20Sopenharmony_ci	struct dm_dev *meta_dev;
548c2ecf20Sopenharmony_ci	struct dm_dev *data_dev;
558c2ecf20Sopenharmony_ci	struct md_rdev rdev;
568c2ecf20Sopenharmony_ci};
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci/*
598c2ecf20Sopenharmony_ci * Bits for establishing rs->ctr_flags
608c2ecf20Sopenharmony_ci *
618c2ecf20Sopenharmony_ci * 1 = no flag value
628c2ecf20Sopenharmony_ci * 2 = flag with value
638c2ecf20Sopenharmony_ci */
648c2ecf20Sopenharmony_ci#define __CTR_FLAG_SYNC			0  /* 1 */ /* Not with raid0! */
658c2ecf20Sopenharmony_ci#define __CTR_FLAG_NOSYNC		1  /* 1 */ /* Not with raid0! */
668c2ecf20Sopenharmony_ci#define __CTR_FLAG_REBUILD		2  /* 2 */ /* Not with raid0! */
678c2ecf20Sopenharmony_ci#define __CTR_FLAG_DAEMON_SLEEP		3  /* 2 */ /* Not with raid0! */
688c2ecf20Sopenharmony_ci#define __CTR_FLAG_MIN_RECOVERY_RATE	4  /* 2 */ /* Not with raid0! */
698c2ecf20Sopenharmony_ci#define __CTR_FLAG_MAX_RECOVERY_RATE	5  /* 2 */ /* Not with raid0! */
708c2ecf20Sopenharmony_ci#define __CTR_FLAG_MAX_WRITE_BEHIND	6  /* 2 */ /* Only with raid1! */
718c2ecf20Sopenharmony_ci#define __CTR_FLAG_WRITE_MOSTLY		7  /* 2 */ /* Only with raid1! */
728c2ecf20Sopenharmony_ci#define __CTR_FLAG_STRIPE_CACHE		8  /* 2 */ /* Only with raid4/5/6! */
738c2ecf20Sopenharmony_ci#define __CTR_FLAG_REGION_SIZE		9  /* 2 */ /* Not with raid0! */
748c2ecf20Sopenharmony_ci#define __CTR_FLAG_RAID10_COPIES	10 /* 2 */ /* Only with raid10 */
758c2ecf20Sopenharmony_ci#define __CTR_FLAG_RAID10_FORMAT	11 /* 2 */ /* Only with raid10 */
768c2ecf20Sopenharmony_ci/* New for v1.9.0 */
778c2ecf20Sopenharmony_ci#define __CTR_FLAG_DELTA_DISKS		12 /* 2 */ /* Only with reshapable raid1/4/5/6/10! */
788c2ecf20Sopenharmony_ci#define __CTR_FLAG_DATA_OFFSET		13 /* 2 */ /* Only with reshapable raid4/5/6/10! */
798c2ecf20Sopenharmony_ci#define __CTR_FLAG_RAID10_USE_NEAR_SETS 14 /* 2 */ /* Only with raid10! */
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci/* New for v1.10.0 */
828c2ecf20Sopenharmony_ci#define __CTR_FLAG_JOURNAL_DEV		15 /* 2 */ /* Only with raid4/5/6 (journal device)! */
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci/* New for v1.11.1 */
858c2ecf20Sopenharmony_ci#define __CTR_FLAG_JOURNAL_MODE		16 /* 2 */ /* Only with raid4/5/6 (journal mode)! */
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci/*
888c2ecf20Sopenharmony_ci * Flags for rs->ctr_flags field.
898c2ecf20Sopenharmony_ci */
908c2ecf20Sopenharmony_ci#define CTR_FLAG_SYNC			(1 << __CTR_FLAG_SYNC)
918c2ecf20Sopenharmony_ci#define CTR_FLAG_NOSYNC			(1 << __CTR_FLAG_NOSYNC)
928c2ecf20Sopenharmony_ci#define CTR_FLAG_REBUILD		(1 << __CTR_FLAG_REBUILD)
938c2ecf20Sopenharmony_ci#define CTR_FLAG_DAEMON_SLEEP		(1 << __CTR_FLAG_DAEMON_SLEEP)
948c2ecf20Sopenharmony_ci#define CTR_FLAG_MIN_RECOVERY_RATE	(1 << __CTR_FLAG_MIN_RECOVERY_RATE)
958c2ecf20Sopenharmony_ci#define CTR_FLAG_MAX_RECOVERY_RATE	(1 << __CTR_FLAG_MAX_RECOVERY_RATE)
968c2ecf20Sopenharmony_ci#define CTR_FLAG_MAX_WRITE_BEHIND	(1 << __CTR_FLAG_MAX_WRITE_BEHIND)
978c2ecf20Sopenharmony_ci#define CTR_FLAG_WRITE_MOSTLY		(1 << __CTR_FLAG_WRITE_MOSTLY)
988c2ecf20Sopenharmony_ci#define CTR_FLAG_STRIPE_CACHE		(1 << __CTR_FLAG_STRIPE_CACHE)
998c2ecf20Sopenharmony_ci#define CTR_FLAG_REGION_SIZE		(1 << __CTR_FLAG_REGION_SIZE)
1008c2ecf20Sopenharmony_ci#define CTR_FLAG_RAID10_COPIES		(1 << __CTR_FLAG_RAID10_COPIES)
1018c2ecf20Sopenharmony_ci#define CTR_FLAG_RAID10_FORMAT		(1 << __CTR_FLAG_RAID10_FORMAT)
1028c2ecf20Sopenharmony_ci#define CTR_FLAG_DELTA_DISKS		(1 << __CTR_FLAG_DELTA_DISKS)
1038c2ecf20Sopenharmony_ci#define CTR_FLAG_DATA_OFFSET		(1 << __CTR_FLAG_DATA_OFFSET)
1048c2ecf20Sopenharmony_ci#define CTR_FLAG_RAID10_USE_NEAR_SETS	(1 << __CTR_FLAG_RAID10_USE_NEAR_SETS)
1058c2ecf20Sopenharmony_ci#define CTR_FLAG_JOURNAL_DEV		(1 << __CTR_FLAG_JOURNAL_DEV)
1068c2ecf20Sopenharmony_ci#define CTR_FLAG_JOURNAL_MODE		(1 << __CTR_FLAG_JOURNAL_MODE)
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci/*
1098c2ecf20Sopenharmony_ci * Definitions of various constructor flags to
1108c2ecf20Sopenharmony_ci * be used in checks of valid / invalid flags
1118c2ecf20Sopenharmony_ci * per raid level.
1128c2ecf20Sopenharmony_ci */
1138c2ecf20Sopenharmony_ci/* Define all any sync flags */
1148c2ecf20Sopenharmony_ci#define	CTR_FLAGS_ANY_SYNC		(CTR_FLAG_SYNC | CTR_FLAG_NOSYNC)
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci/* Define flags for options without argument (e.g. 'nosync') */
1178c2ecf20Sopenharmony_ci#define	CTR_FLAG_OPTIONS_NO_ARGS	(CTR_FLAGS_ANY_SYNC | \
1188c2ecf20Sopenharmony_ci					 CTR_FLAG_RAID10_USE_NEAR_SETS)
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci/* Define flags for options with one argument (e.g. 'delta_disks +2') */
1218c2ecf20Sopenharmony_ci#define CTR_FLAG_OPTIONS_ONE_ARG (CTR_FLAG_REBUILD | \
1228c2ecf20Sopenharmony_ci				  CTR_FLAG_WRITE_MOSTLY | \
1238c2ecf20Sopenharmony_ci				  CTR_FLAG_DAEMON_SLEEP | \
1248c2ecf20Sopenharmony_ci				  CTR_FLAG_MIN_RECOVERY_RATE | \
1258c2ecf20Sopenharmony_ci				  CTR_FLAG_MAX_RECOVERY_RATE | \
1268c2ecf20Sopenharmony_ci				  CTR_FLAG_MAX_WRITE_BEHIND | \
1278c2ecf20Sopenharmony_ci				  CTR_FLAG_STRIPE_CACHE | \
1288c2ecf20Sopenharmony_ci				  CTR_FLAG_REGION_SIZE | \
1298c2ecf20Sopenharmony_ci				  CTR_FLAG_RAID10_COPIES | \
1308c2ecf20Sopenharmony_ci				  CTR_FLAG_RAID10_FORMAT | \
1318c2ecf20Sopenharmony_ci				  CTR_FLAG_DELTA_DISKS | \
1328c2ecf20Sopenharmony_ci				  CTR_FLAG_DATA_OFFSET | \
1338c2ecf20Sopenharmony_ci				  CTR_FLAG_JOURNAL_DEV | \
1348c2ecf20Sopenharmony_ci				  CTR_FLAG_JOURNAL_MODE)
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci/* Valid options definitions per raid level... */
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci/* "raid0" does only accept data offset */
1398c2ecf20Sopenharmony_ci#define RAID0_VALID_FLAGS	(CTR_FLAG_DATA_OFFSET)
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci/* "raid1" does not accept stripe cache, data offset, delta_disks or any raid10 options */
1428c2ecf20Sopenharmony_ci#define RAID1_VALID_FLAGS	(CTR_FLAGS_ANY_SYNC | \
1438c2ecf20Sopenharmony_ci				 CTR_FLAG_REBUILD | \
1448c2ecf20Sopenharmony_ci				 CTR_FLAG_WRITE_MOSTLY | \
1458c2ecf20Sopenharmony_ci				 CTR_FLAG_DAEMON_SLEEP | \
1468c2ecf20Sopenharmony_ci				 CTR_FLAG_MIN_RECOVERY_RATE | \
1478c2ecf20Sopenharmony_ci				 CTR_FLAG_MAX_RECOVERY_RATE | \
1488c2ecf20Sopenharmony_ci				 CTR_FLAG_MAX_WRITE_BEHIND | \
1498c2ecf20Sopenharmony_ci				 CTR_FLAG_REGION_SIZE | \
1508c2ecf20Sopenharmony_ci				 CTR_FLAG_DELTA_DISKS | \
1518c2ecf20Sopenharmony_ci				 CTR_FLAG_DATA_OFFSET)
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci/* "raid10" does not accept any raid1 or stripe cache options */
1548c2ecf20Sopenharmony_ci#define RAID10_VALID_FLAGS	(CTR_FLAGS_ANY_SYNC | \
1558c2ecf20Sopenharmony_ci				 CTR_FLAG_REBUILD | \
1568c2ecf20Sopenharmony_ci				 CTR_FLAG_DAEMON_SLEEP | \
1578c2ecf20Sopenharmony_ci				 CTR_FLAG_MIN_RECOVERY_RATE | \
1588c2ecf20Sopenharmony_ci				 CTR_FLAG_MAX_RECOVERY_RATE | \
1598c2ecf20Sopenharmony_ci				 CTR_FLAG_REGION_SIZE | \
1608c2ecf20Sopenharmony_ci				 CTR_FLAG_RAID10_COPIES | \
1618c2ecf20Sopenharmony_ci				 CTR_FLAG_RAID10_FORMAT | \
1628c2ecf20Sopenharmony_ci				 CTR_FLAG_DELTA_DISKS | \
1638c2ecf20Sopenharmony_ci				 CTR_FLAG_DATA_OFFSET | \
1648c2ecf20Sopenharmony_ci				 CTR_FLAG_RAID10_USE_NEAR_SETS)
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci/*
1678c2ecf20Sopenharmony_ci * "raid4/5/6" do not accept any raid1 or raid10 specific options
1688c2ecf20Sopenharmony_ci *
1698c2ecf20Sopenharmony_ci * "raid6" does not accept "nosync", because it is not guaranteed
1708c2ecf20Sopenharmony_ci * that both parity and q-syndrome are being written properly with
1718c2ecf20Sopenharmony_ci * any writes
1728c2ecf20Sopenharmony_ci */
1738c2ecf20Sopenharmony_ci#define RAID45_VALID_FLAGS	(CTR_FLAGS_ANY_SYNC | \
1748c2ecf20Sopenharmony_ci				 CTR_FLAG_REBUILD | \
1758c2ecf20Sopenharmony_ci				 CTR_FLAG_DAEMON_SLEEP | \
1768c2ecf20Sopenharmony_ci				 CTR_FLAG_MIN_RECOVERY_RATE | \
1778c2ecf20Sopenharmony_ci				 CTR_FLAG_MAX_RECOVERY_RATE | \
1788c2ecf20Sopenharmony_ci				 CTR_FLAG_STRIPE_CACHE | \
1798c2ecf20Sopenharmony_ci				 CTR_FLAG_REGION_SIZE | \
1808c2ecf20Sopenharmony_ci				 CTR_FLAG_DELTA_DISKS | \
1818c2ecf20Sopenharmony_ci				 CTR_FLAG_DATA_OFFSET | \
1828c2ecf20Sopenharmony_ci				 CTR_FLAG_JOURNAL_DEV | \
1838c2ecf20Sopenharmony_ci				 CTR_FLAG_JOURNAL_MODE)
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci#define RAID6_VALID_FLAGS	(CTR_FLAG_SYNC | \
1868c2ecf20Sopenharmony_ci				 CTR_FLAG_REBUILD | \
1878c2ecf20Sopenharmony_ci				 CTR_FLAG_DAEMON_SLEEP | \
1888c2ecf20Sopenharmony_ci				 CTR_FLAG_MIN_RECOVERY_RATE | \
1898c2ecf20Sopenharmony_ci				 CTR_FLAG_MAX_RECOVERY_RATE | \
1908c2ecf20Sopenharmony_ci				 CTR_FLAG_STRIPE_CACHE | \
1918c2ecf20Sopenharmony_ci				 CTR_FLAG_REGION_SIZE | \
1928c2ecf20Sopenharmony_ci				 CTR_FLAG_DELTA_DISKS | \
1938c2ecf20Sopenharmony_ci				 CTR_FLAG_DATA_OFFSET | \
1948c2ecf20Sopenharmony_ci				 CTR_FLAG_JOURNAL_DEV | \
1958c2ecf20Sopenharmony_ci				 CTR_FLAG_JOURNAL_MODE)
1968c2ecf20Sopenharmony_ci/* ...valid options definitions per raid level */
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci/*
1998c2ecf20Sopenharmony_ci * Flags for rs->runtime_flags field
2008c2ecf20Sopenharmony_ci * (RT_FLAG prefix meaning "runtime flag")
2018c2ecf20Sopenharmony_ci *
2028c2ecf20Sopenharmony_ci * These are all internal and used to define runtime state,
2038c2ecf20Sopenharmony_ci * e.g. to prevent another resume from preresume processing
2048c2ecf20Sopenharmony_ci * the raid set all over again.
2058c2ecf20Sopenharmony_ci */
2068c2ecf20Sopenharmony_ci#define RT_FLAG_RS_PRERESUMED		0
2078c2ecf20Sopenharmony_ci#define RT_FLAG_RS_RESUMED		1
2088c2ecf20Sopenharmony_ci#define RT_FLAG_RS_BITMAP_LOADED	2
2098c2ecf20Sopenharmony_ci#define RT_FLAG_UPDATE_SBS		3
2108c2ecf20Sopenharmony_ci#define RT_FLAG_RESHAPE_RS		4
2118c2ecf20Sopenharmony_ci#define RT_FLAG_RS_SUSPENDED		5
2128c2ecf20Sopenharmony_ci#define RT_FLAG_RS_IN_SYNC		6
2138c2ecf20Sopenharmony_ci#define RT_FLAG_RS_RESYNCING		7
2148c2ecf20Sopenharmony_ci#define RT_FLAG_RS_GROW			8
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci/* Array elements of 64 bit needed for rebuild/failed disk bits */
2178c2ecf20Sopenharmony_ci#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci/*
2208c2ecf20Sopenharmony_ci * raid set level, layout and chunk sectors backup/restore
2218c2ecf20Sopenharmony_ci */
2228c2ecf20Sopenharmony_cistruct rs_layout {
2238c2ecf20Sopenharmony_ci	int new_level;
2248c2ecf20Sopenharmony_ci	int new_layout;
2258c2ecf20Sopenharmony_ci	int new_chunk_sectors;
2268c2ecf20Sopenharmony_ci};
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_cistruct raid_set {
2298c2ecf20Sopenharmony_ci	struct dm_target *ti;
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	uint32_t stripe_cache_entries;
2328c2ecf20Sopenharmony_ci	unsigned long ctr_flags;
2338c2ecf20Sopenharmony_ci	unsigned long runtime_flags;
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci	uint64_t rebuild_disks[DISKS_ARRAY_ELEMS];
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci	int raid_disks;
2388c2ecf20Sopenharmony_ci	int delta_disks;
2398c2ecf20Sopenharmony_ci	int data_offset;
2408c2ecf20Sopenharmony_ci	int raid10_copies;
2418c2ecf20Sopenharmony_ci	int requested_bitmap_chunk_sectors;
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci	struct mddev md;
2448c2ecf20Sopenharmony_ci	struct raid_type *raid_type;
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci	sector_t array_sectors;
2478c2ecf20Sopenharmony_ci	sector_t dev_sectors;
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_ci	/* Optional raid4/5/6 journal device */
2508c2ecf20Sopenharmony_ci	struct journal_dev {
2518c2ecf20Sopenharmony_ci		struct dm_dev *dev;
2528c2ecf20Sopenharmony_ci		struct md_rdev rdev;
2538c2ecf20Sopenharmony_ci		int mode;
2548c2ecf20Sopenharmony_ci	} journal_dev;
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci	struct raid_dev dev[];
2578c2ecf20Sopenharmony_ci};
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_cistatic void rs_config_backup(struct raid_set *rs, struct rs_layout *l)
2608c2ecf20Sopenharmony_ci{
2618c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	l->new_level = mddev->new_level;
2648c2ecf20Sopenharmony_ci	l->new_layout = mddev->new_layout;
2658c2ecf20Sopenharmony_ci	l->new_chunk_sectors = mddev->new_chunk_sectors;
2668c2ecf20Sopenharmony_ci}
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_cistatic void rs_config_restore(struct raid_set *rs, struct rs_layout *l)
2698c2ecf20Sopenharmony_ci{
2708c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci	mddev->new_level = l->new_level;
2738c2ecf20Sopenharmony_ci	mddev->new_layout = l->new_layout;
2748c2ecf20Sopenharmony_ci	mddev->new_chunk_sectors = l->new_chunk_sectors;
2758c2ecf20Sopenharmony_ci}
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci/* raid10 algorithms (i.e. formats) */
2788c2ecf20Sopenharmony_ci#define	ALGORITHM_RAID10_DEFAULT	0
2798c2ecf20Sopenharmony_ci#define	ALGORITHM_RAID10_NEAR		1
2808c2ecf20Sopenharmony_ci#define	ALGORITHM_RAID10_OFFSET		2
2818c2ecf20Sopenharmony_ci#define	ALGORITHM_RAID10_FAR		3
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci/* Supported raid types and properties. */
2848c2ecf20Sopenharmony_cistatic struct raid_type {
2858c2ecf20Sopenharmony_ci	const char *name;		/* RAID algorithm. */
2868c2ecf20Sopenharmony_ci	const char *descr;		/* Descriptor text for logging. */
2878c2ecf20Sopenharmony_ci	const unsigned int parity_devs;	/* # of parity devices. */
2888c2ecf20Sopenharmony_ci	const unsigned int minimal_devs;/* minimal # of devices in set. */
2898c2ecf20Sopenharmony_ci	const unsigned int level;	/* RAID level. */
2908c2ecf20Sopenharmony_ci	const unsigned int algorithm;	/* RAID algorithm. */
2918c2ecf20Sopenharmony_ci} raid_types[] = {
2928c2ecf20Sopenharmony_ci	{"raid0",	  "raid0 (striping)",			    0, 2, 0,  0 /* NONE */},
2938c2ecf20Sopenharmony_ci	{"raid1",	  "raid1 (mirroring)",			    0, 2, 1,  0 /* NONE */},
2948c2ecf20Sopenharmony_ci	{"raid10_far",	  "raid10 far (striped mirrors)",	    0, 2, 10, ALGORITHM_RAID10_FAR},
2958c2ecf20Sopenharmony_ci	{"raid10_offset", "raid10 offset (striped mirrors)",	    0, 2, 10, ALGORITHM_RAID10_OFFSET},
2968c2ecf20Sopenharmony_ci	{"raid10_near",	  "raid10 near (striped mirrors)",	    0, 2, 10, ALGORITHM_RAID10_NEAR},
2978c2ecf20Sopenharmony_ci	{"raid10",	  "raid10 (striped mirrors)",		    0, 2, 10, ALGORITHM_RAID10_DEFAULT},
2988c2ecf20Sopenharmony_ci	{"raid4",	  "raid4 (dedicated first parity disk)",    1, 2, 5,  ALGORITHM_PARITY_0}, /* raid4 layout = raid5_0 */
2998c2ecf20Sopenharmony_ci	{"raid5_n",	  "raid5 (dedicated last parity disk)",	    1, 2, 5,  ALGORITHM_PARITY_N},
3008c2ecf20Sopenharmony_ci	{"raid5_ls",	  "raid5 (left symmetric)",		    1, 2, 5,  ALGORITHM_LEFT_SYMMETRIC},
3018c2ecf20Sopenharmony_ci	{"raid5_rs",	  "raid5 (right symmetric)",		    1, 2, 5,  ALGORITHM_RIGHT_SYMMETRIC},
3028c2ecf20Sopenharmony_ci	{"raid5_la",	  "raid5 (left asymmetric)",		    1, 2, 5,  ALGORITHM_LEFT_ASYMMETRIC},
3038c2ecf20Sopenharmony_ci	{"raid5_ra",	  "raid5 (right asymmetric)",		    1, 2, 5,  ALGORITHM_RIGHT_ASYMMETRIC},
3048c2ecf20Sopenharmony_ci	{"raid6_zr",	  "raid6 (zero restart)",		    2, 4, 6,  ALGORITHM_ROTATING_ZERO_RESTART},
3058c2ecf20Sopenharmony_ci	{"raid6_nr",	  "raid6 (N restart)",			    2, 4, 6,  ALGORITHM_ROTATING_N_RESTART},
3068c2ecf20Sopenharmony_ci	{"raid6_nc",	  "raid6 (N continue)",			    2, 4, 6,  ALGORITHM_ROTATING_N_CONTINUE},
3078c2ecf20Sopenharmony_ci	{"raid6_n_6",	  "raid6 (dedicated parity/Q n/6)",	    2, 4, 6,  ALGORITHM_PARITY_N_6},
3088c2ecf20Sopenharmony_ci	{"raid6_ls_6",	  "raid6 (left symmetric dedicated Q 6)",   2, 4, 6,  ALGORITHM_LEFT_SYMMETRIC_6},
3098c2ecf20Sopenharmony_ci	{"raid6_rs_6",	  "raid6 (right symmetric dedicated Q 6)",  2, 4, 6,  ALGORITHM_RIGHT_SYMMETRIC_6},
3108c2ecf20Sopenharmony_ci	{"raid6_la_6",	  "raid6 (left asymmetric dedicated Q 6)",  2, 4, 6,  ALGORITHM_LEFT_ASYMMETRIC_6},
3118c2ecf20Sopenharmony_ci	{"raid6_ra_6",	  "raid6 (right asymmetric dedicated Q 6)", 2, 4, 6,  ALGORITHM_RIGHT_ASYMMETRIC_6}
3128c2ecf20Sopenharmony_ci};
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci/* True, if @v is in inclusive range [@min, @max] */
3158c2ecf20Sopenharmony_cistatic bool __within_range(long v, long min, long max)
3168c2ecf20Sopenharmony_ci{
3178c2ecf20Sopenharmony_ci	return v >= min && v <= max;
3188c2ecf20Sopenharmony_ci}
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_ci/* All table line arguments are defined here */
3218c2ecf20Sopenharmony_cistatic struct arg_name_flag {
3228c2ecf20Sopenharmony_ci	const unsigned long flag;
3238c2ecf20Sopenharmony_ci	const char *name;
3248c2ecf20Sopenharmony_ci} __arg_name_flags[] = {
3258c2ecf20Sopenharmony_ci	{ CTR_FLAG_SYNC, "sync"},
3268c2ecf20Sopenharmony_ci	{ CTR_FLAG_NOSYNC, "nosync"},
3278c2ecf20Sopenharmony_ci	{ CTR_FLAG_REBUILD, "rebuild"},
3288c2ecf20Sopenharmony_ci	{ CTR_FLAG_DAEMON_SLEEP, "daemon_sleep"},
3298c2ecf20Sopenharmony_ci	{ CTR_FLAG_MIN_RECOVERY_RATE, "min_recovery_rate"},
3308c2ecf20Sopenharmony_ci	{ CTR_FLAG_MAX_RECOVERY_RATE, "max_recovery_rate"},
3318c2ecf20Sopenharmony_ci	{ CTR_FLAG_MAX_WRITE_BEHIND, "max_write_behind"},
3328c2ecf20Sopenharmony_ci	{ CTR_FLAG_WRITE_MOSTLY, "write_mostly"},
3338c2ecf20Sopenharmony_ci	{ CTR_FLAG_STRIPE_CACHE, "stripe_cache"},
3348c2ecf20Sopenharmony_ci	{ CTR_FLAG_REGION_SIZE, "region_size"},
3358c2ecf20Sopenharmony_ci	{ CTR_FLAG_RAID10_COPIES, "raid10_copies"},
3368c2ecf20Sopenharmony_ci	{ CTR_FLAG_RAID10_FORMAT, "raid10_format"},
3378c2ecf20Sopenharmony_ci	{ CTR_FLAG_DATA_OFFSET, "data_offset"},
3388c2ecf20Sopenharmony_ci	{ CTR_FLAG_DELTA_DISKS, "delta_disks"},
3398c2ecf20Sopenharmony_ci	{ CTR_FLAG_RAID10_USE_NEAR_SETS, "raid10_use_near_sets"},
3408c2ecf20Sopenharmony_ci	{ CTR_FLAG_JOURNAL_DEV, "journal_dev" },
3418c2ecf20Sopenharmony_ci	{ CTR_FLAG_JOURNAL_MODE, "journal_mode" },
3428c2ecf20Sopenharmony_ci};
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci/* Return argument name string for given @flag */
3458c2ecf20Sopenharmony_cistatic const char *dm_raid_arg_name_by_flag(const uint32_t flag)
3468c2ecf20Sopenharmony_ci{
3478c2ecf20Sopenharmony_ci	if (hweight32(flag) == 1) {
3488c2ecf20Sopenharmony_ci		struct arg_name_flag *anf = __arg_name_flags + ARRAY_SIZE(__arg_name_flags);
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci		while (anf-- > __arg_name_flags)
3518c2ecf20Sopenharmony_ci			if (flag & anf->flag)
3528c2ecf20Sopenharmony_ci				return anf->name;
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	} else
3558c2ecf20Sopenharmony_ci		DMERR("%s called with more than one flag!", __func__);
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci	return NULL;
3588c2ecf20Sopenharmony_ci}
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci/* Define correlation of raid456 journal cache modes and dm-raid target line parameters */
3618c2ecf20Sopenharmony_cistatic struct {
3628c2ecf20Sopenharmony_ci	const int mode;
3638c2ecf20Sopenharmony_ci	const char *param;
3648c2ecf20Sopenharmony_ci} _raid456_journal_mode[] = {
3658c2ecf20Sopenharmony_ci	{ R5C_JOURNAL_MODE_WRITE_THROUGH , "writethrough" },
3668c2ecf20Sopenharmony_ci	{ R5C_JOURNAL_MODE_WRITE_BACK    , "writeback" }
3678c2ecf20Sopenharmony_ci};
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci/* Return MD raid4/5/6 journal mode for dm @journal_mode one */
3708c2ecf20Sopenharmony_cistatic int dm_raid_journal_mode_to_md(const char *mode)
3718c2ecf20Sopenharmony_ci{
3728c2ecf20Sopenharmony_ci	int m = ARRAY_SIZE(_raid456_journal_mode);
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci	while (m--)
3758c2ecf20Sopenharmony_ci		if (!strcasecmp(mode, _raid456_journal_mode[m].param))
3768c2ecf20Sopenharmony_ci			return _raid456_journal_mode[m].mode;
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_ci	return -EINVAL;
3798c2ecf20Sopenharmony_ci}
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci/* Return dm-raid raid4/5/6 journal mode string for @mode */
3828c2ecf20Sopenharmony_cistatic const char *md_journal_mode_to_dm_raid(const int mode)
3838c2ecf20Sopenharmony_ci{
3848c2ecf20Sopenharmony_ci	int m = ARRAY_SIZE(_raid456_journal_mode);
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci	while (m--)
3878c2ecf20Sopenharmony_ci		if (mode == _raid456_journal_mode[m].mode)
3888c2ecf20Sopenharmony_ci			return _raid456_journal_mode[m].param;
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	return "unknown";
3918c2ecf20Sopenharmony_ci}
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_ci/*
3948c2ecf20Sopenharmony_ci * Bool helpers to test for various raid levels of a raid set.
3958c2ecf20Sopenharmony_ci * It's level as reported by the superblock rather than
3968c2ecf20Sopenharmony_ci * the requested raid_type passed to the constructor.
3978c2ecf20Sopenharmony_ci */
3988c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is raid0 */
3998c2ecf20Sopenharmony_cistatic bool rs_is_raid0(struct raid_set *rs)
4008c2ecf20Sopenharmony_ci{
4018c2ecf20Sopenharmony_ci	return !rs->md.level;
4028c2ecf20Sopenharmony_ci}
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is raid1 */
4058c2ecf20Sopenharmony_cistatic bool rs_is_raid1(struct raid_set *rs)
4068c2ecf20Sopenharmony_ci{
4078c2ecf20Sopenharmony_ci	return rs->md.level == 1;
4088c2ecf20Sopenharmony_ci}
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is raid10 */
4118c2ecf20Sopenharmony_cistatic bool rs_is_raid10(struct raid_set *rs)
4128c2ecf20Sopenharmony_ci{
4138c2ecf20Sopenharmony_ci	return rs->md.level == 10;
4148c2ecf20Sopenharmony_ci}
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is level 6 */
4178c2ecf20Sopenharmony_cistatic bool rs_is_raid6(struct raid_set *rs)
4188c2ecf20Sopenharmony_ci{
4198c2ecf20Sopenharmony_ci	return rs->md.level == 6;
4208c2ecf20Sopenharmony_ci}
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is level 4, 5 or 6 */
4238c2ecf20Sopenharmony_cistatic bool rs_is_raid456(struct raid_set *rs)
4248c2ecf20Sopenharmony_ci{
4258c2ecf20Sopenharmony_ci	return __within_range(rs->md.level, 4, 6);
4268c2ecf20Sopenharmony_ci}
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is reshapable */
4298c2ecf20Sopenharmony_cistatic bool __is_raid10_far(int layout);
4308c2ecf20Sopenharmony_cistatic bool rs_is_reshapable(struct raid_set *rs)
4318c2ecf20Sopenharmony_ci{
4328c2ecf20Sopenharmony_ci	return rs_is_raid456(rs) ||
4338c2ecf20Sopenharmony_ci	       (rs_is_raid10(rs) && !__is_raid10_far(rs->md.new_layout));
4348c2ecf20Sopenharmony_ci}
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is recovering */
4378c2ecf20Sopenharmony_cistatic bool rs_is_recovering(struct raid_set *rs)
4388c2ecf20Sopenharmony_ci{
4398c2ecf20Sopenharmony_ci	return rs->md.recovery_cp < rs->md.dev_sectors;
4408c2ecf20Sopenharmony_ci}
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is reshaping */
4438c2ecf20Sopenharmony_cistatic bool rs_is_reshaping(struct raid_set *rs)
4448c2ecf20Sopenharmony_ci{
4458c2ecf20Sopenharmony_ci	return rs->md.reshape_position != MaxSector;
4468c2ecf20Sopenharmony_ci}
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci/*
4498c2ecf20Sopenharmony_ci * bool helpers to test for various raid levels of a raid type @rt
4508c2ecf20Sopenharmony_ci */
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid0 */
4538c2ecf20Sopenharmony_cistatic bool rt_is_raid0(struct raid_type *rt)
4548c2ecf20Sopenharmony_ci{
4558c2ecf20Sopenharmony_ci	return !rt->level;
4568c2ecf20Sopenharmony_ci}
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid1 */
4598c2ecf20Sopenharmony_cistatic bool rt_is_raid1(struct raid_type *rt)
4608c2ecf20Sopenharmony_ci{
4618c2ecf20Sopenharmony_ci	return rt->level == 1;
4628c2ecf20Sopenharmony_ci}
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid10 */
4658c2ecf20Sopenharmony_cistatic bool rt_is_raid10(struct raid_type *rt)
4668c2ecf20Sopenharmony_ci{
4678c2ecf20Sopenharmony_ci	return rt->level == 10;
4688c2ecf20Sopenharmony_ci}
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid4/5 */
4718c2ecf20Sopenharmony_cistatic bool rt_is_raid45(struct raid_type *rt)
4728c2ecf20Sopenharmony_ci{
4738c2ecf20Sopenharmony_ci	return __within_range(rt->level, 4, 5);
4748c2ecf20Sopenharmony_ci}
4758c2ecf20Sopenharmony_ci
4768c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid6 */
4778c2ecf20Sopenharmony_cistatic bool rt_is_raid6(struct raid_type *rt)
4788c2ecf20Sopenharmony_ci{
4798c2ecf20Sopenharmony_ci	return rt->level == 6;
4808c2ecf20Sopenharmony_ci}
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid4/5/6 */
4838c2ecf20Sopenharmony_cistatic bool rt_is_raid456(struct raid_type *rt)
4848c2ecf20Sopenharmony_ci{
4858c2ecf20Sopenharmony_ci	return __within_range(rt->level, 4, 6);
4868c2ecf20Sopenharmony_ci}
4878c2ecf20Sopenharmony_ci/* END: raid level bools */
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci/* Return valid ctr flags for the raid level of @rs */
4908c2ecf20Sopenharmony_cistatic unsigned long __valid_flags(struct raid_set *rs)
4918c2ecf20Sopenharmony_ci{
4928c2ecf20Sopenharmony_ci	if (rt_is_raid0(rs->raid_type))
4938c2ecf20Sopenharmony_ci		return RAID0_VALID_FLAGS;
4948c2ecf20Sopenharmony_ci	else if (rt_is_raid1(rs->raid_type))
4958c2ecf20Sopenharmony_ci		return RAID1_VALID_FLAGS;
4968c2ecf20Sopenharmony_ci	else if (rt_is_raid10(rs->raid_type))
4978c2ecf20Sopenharmony_ci		return RAID10_VALID_FLAGS;
4988c2ecf20Sopenharmony_ci	else if (rt_is_raid45(rs->raid_type))
4998c2ecf20Sopenharmony_ci		return RAID45_VALID_FLAGS;
5008c2ecf20Sopenharmony_ci	else if (rt_is_raid6(rs->raid_type))
5018c2ecf20Sopenharmony_ci		return RAID6_VALID_FLAGS;
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci	return 0;
5048c2ecf20Sopenharmony_ci}
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_ci/*
5078c2ecf20Sopenharmony_ci * Check for valid flags set on @rs
5088c2ecf20Sopenharmony_ci *
5098c2ecf20Sopenharmony_ci * Has to be called after parsing of the ctr flags!
5108c2ecf20Sopenharmony_ci */
5118c2ecf20Sopenharmony_cistatic int rs_check_for_valid_flags(struct raid_set *rs)
5128c2ecf20Sopenharmony_ci{
5138c2ecf20Sopenharmony_ci	if (rs->ctr_flags & ~__valid_flags(rs)) {
5148c2ecf20Sopenharmony_ci		rs->ti->error = "Invalid flags combination";
5158c2ecf20Sopenharmony_ci		return -EINVAL;
5168c2ecf20Sopenharmony_ci	}
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ci	return 0;
5198c2ecf20Sopenharmony_ci}
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci/* MD raid10 bit definitions and helpers */
5228c2ecf20Sopenharmony_ci#define RAID10_OFFSET			(1 << 16) /* stripes with data copies area adjacent on devices */
5238c2ecf20Sopenharmony_ci#define RAID10_BROCKEN_USE_FAR_SETS	(1 << 17) /* Broken in raid10.c: use sets instead of whole stripe rotation */
5248c2ecf20Sopenharmony_ci#define RAID10_USE_FAR_SETS		(1 << 18) /* Use sets instead of whole stripe rotation */
5258c2ecf20Sopenharmony_ci#define RAID10_FAR_COPIES_SHIFT		8	  /* raid10 # far copies shift (2nd byte of layout) */
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci/* Return md raid10 near copies for @layout */
5288c2ecf20Sopenharmony_cistatic unsigned int __raid10_near_copies(int layout)
5298c2ecf20Sopenharmony_ci{
5308c2ecf20Sopenharmony_ci	return layout & 0xFF;
5318c2ecf20Sopenharmony_ci}
5328c2ecf20Sopenharmony_ci
5338c2ecf20Sopenharmony_ci/* Return md raid10 far copies for @layout */
5348c2ecf20Sopenharmony_cistatic unsigned int __raid10_far_copies(int layout)
5358c2ecf20Sopenharmony_ci{
5368c2ecf20Sopenharmony_ci	return __raid10_near_copies(layout >> RAID10_FAR_COPIES_SHIFT);
5378c2ecf20Sopenharmony_ci}
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci/* Return true if md raid10 offset for @layout */
5408c2ecf20Sopenharmony_cistatic bool __is_raid10_offset(int layout)
5418c2ecf20Sopenharmony_ci{
5428c2ecf20Sopenharmony_ci	return !!(layout & RAID10_OFFSET);
5438c2ecf20Sopenharmony_ci}
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci/* Return true if md raid10 near for @layout */
5468c2ecf20Sopenharmony_cistatic bool __is_raid10_near(int layout)
5478c2ecf20Sopenharmony_ci{
5488c2ecf20Sopenharmony_ci	return !__is_raid10_offset(layout) && __raid10_near_copies(layout) > 1;
5498c2ecf20Sopenharmony_ci}
5508c2ecf20Sopenharmony_ci
5518c2ecf20Sopenharmony_ci/* Return true if md raid10 far for @layout */
5528c2ecf20Sopenharmony_cistatic bool __is_raid10_far(int layout)
5538c2ecf20Sopenharmony_ci{
5548c2ecf20Sopenharmony_ci	return !__is_raid10_offset(layout) && __raid10_far_copies(layout) > 1;
5558c2ecf20Sopenharmony_ci}
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_ci/* Return md raid10 layout string for @layout */
5588c2ecf20Sopenharmony_cistatic const char *raid10_md_layout_to_format(int layout)
5598c2ecf20Sopenharmony_ci{
5608c2ecf20Sopenharmony_ci	/*
5618c2ecf20Sopenharmony_ci	 * Bit 16 stands for "offset"
5628c2ecf20Sopenharmony_ci	 * (i.e. adjacent stripes hold copies)
5638c2ecf20Sopenharmony_ci	 *
5648c2ecf20Sopenharmony_ci	 * Refer to MD's raid10.c for details
5658c2ecf20Sopenharmony_ci	 */
5668c2ecf20Sopenharmony_ci	if (__is_raid10_offset(layout))
5678c2ecf20Sopenharmony_ci		return "offset";
5688c2ecf20Sopenharmony_ci
5698c2ecf20Sopenharmony_ci	if (__raid10_near_copies(layout) > 1)
5708c2ecf20Sopenharmony_ci		return "near";
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci	if (__raid10_far_copies(layout) > 1)
5738c2ecf20Sopenharmony_ci		return "far";
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci	return "unknown";
5768c2ecf20Sopenharmony_ci}
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci/* Return md raid10 algorithm for @name */
5798c2ecf20Sopenharmony_cistatic int raid10_name_to_format(const char *name)
5808c2ecf20Sopenharmony_ci{
5818c2ecf20Sopenharmony_ci	if (!strcasecmp(name, "near"))
5828c2ecf20Sopenharmony_ci		return ALGORITHM_RAID10_NEAR;
5838c2ecf20Sopenharmony_ci	else if (!strcasecmp(name, "offset"))
5848c2ecf20Sopenharmony_ci		return ALGORITHM_RAID10_OFFSET;
5858c2ecf20Sopenharmony_ci	else if (!strcasecmp(name, "far"))
5868c2ecf20Sopenharmony_ci		return ALGORITHM_RAID10_FAR;
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_ci	return -EINVAL;
5898c2ecf20Sopenharmony_ci}
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci/* Return md raid10 copies for @layout */
5928c2ecf20Sopenharmony_cistatic unsigned int raid10_md_layout_to_copies(int layout)
5938c2ecf20Sopenharmony_ci{
5948c2ecf20Sopenharmony_ci	return max(__raid10_near_copies(layout), __raid10_far_copies(layout));
5958c2ecf20Sopenharmony_ci}
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_ci/* Return md raid10 format id for @format string */
5988c2ecf20Sopenharmony_cistatic int raid10_format_to_md_layout(struct raid_set *rs,
5998c2ecf20Sopenharmony_ci				      unsigned int algorithm,
6008c2ecf20Sopenharmony_ci				      unsigned int copies)
6018c2ecf20Sopenharmony_ci{
6028c2ecf20Sopenharmony_ci	unsigned int n = 1, f = 1, r = 0;
6038c2ecf20Sopenharmony_ci
6048c2ecf20Sopenharmony_ci	/*
6058c2ecf20Sopenharmony_ci	 * MD resilienece flaw:
6068c2ecf20Sopenharmony_ci	 *
6078c2ecf20Sopenharmony_ci	 * enabling use_far_sets for far/offset formats causes copies
6088c2ecf20Sopenharmony_ci	 * to be colocated on the same devs together with their origins!
6098c2ecf20Sopenharmony_ci	 *
6108c2ecf20Sopenharmony_ci	 * -> disable it for now in the definition above
6118c2ecf20Sopenharmony_ci	 */
6128c2ecf20Sopenharmony_ci	if (algorithm == ALGORITHM_RAID10_DEFAULT ||
6138c2ecf20Sopenharmony_ci	    algorithm == ALGORITHM_RAID10_NEAR)
6148c2ecf20Sopenharmony_ci		n = copies;
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci	else if (algorithm == ALGORITHM_RAID10_OFFSET) {
6178c2ecf20Sopenharmony_ci		f = copies;
6188c2ecf20Sopenharmony_ci		r = RAID10_OFFSET;
6198c2ecf20Sopenharmony_ci		if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags))
6208c2ecf20Sopenharmony_ci			r |= RAID10_USE_FAR_SETS;
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci	} else if (algorithm == ALGORITHM_RAID10_FAR) {
6238c2ecf20Sopenharmony_ci		f = copies;
6248c2ecf20Sopenharmony_ci		if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags))
6258c2ecf20Sopenharmony_ci			r |= RAID10_USE_FAR_SETS;
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci	} else
6288c2ecf20Sopenharmony_ci		return -EINVAL;
6298c2ecf20Sopenharmony_ci
6308c2ecf20Sopenharmony_ci	return r | (f << RAID10_FAR_COPIES_SHIFT) | n;
6318c2ecf20Sopenharmony_ci}
6328c2ecf20Sopenharmony_ci/* END: MD raid10 bit definitions and helpers */
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_ci/* Check for any of the raid10 algorithms */
6358c2ecf20Sopenharmony_cistatic bool __got_raid10(struct raid_type *rtp, const int layout)
6368c2ecf20Sopenharmony_ci{
6378c2ecf20Sopenharmony_ci	if (rtp->level == 10) {
6388c2ecf20Sopenharmony_ci		switch (rtp->algorithm) {
6398c2ecf20Sopenharmony_ci		case ALGORITHM_RAID10_DEFAULT:
6408c2ecf20Sopenharmony_ci		case ALGORITHM_RAID10_NEAR:
6418c2ecf20Sopenharmony_ci			return __is_raid10_near(layout);
6428c2ecf20Sopenharmony_ci		case ALGORITHM_RAID10_OFFSET:
6438c2ecf20Sopenharmony_ci			return __is_raid10_offset(layout);
6448c2ecf20Sopenharmony_ci		case ALGORITHM_RAID10_FAR:
6458c2ecf20Sopenharmony_ci			return __is_raid10_far(layout);
6468c2ecf20Sopenharmony_ci		default:
6478c2ecf20Sopenharmony_ci			break;
6488c2ecf20Sopenharmony_ci		}
6498c2ecf20Sopenharmony_ci	}
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_ci	return false;
6528c2ecf20Sopenharmony_ci}
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci/* Return raid_type for @name */
6558c2ecf20Sopenharmony_cistatic struct raid_type *get_raid_type(const char *name)
6568c2ecf20Sopenharmony_ci{
6578c2ecf20Sopenharmony_ci	struct raid_type *rtp = raid_types + ARRAY_SIZE(raid_types);
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci	while (rtp-- > raid_types)
6608c2ecf20Sopenharmony_ci		if (!strcasecmp(rtp->name, name))
6618c2ecf20Sopenharmony_ci			return rtp;
6628c2ecf20Sopenharmony_ci
6638c2ecf20Sopenharmony_ci	return NULL;
6648c2ecf20Sopenharmony_ci}
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci/* Return raid_type for @name based derived from @level and @layout */
6678c2ecf20Sopenharmony_cistatic struct raid_type *get_raid_type_by_ll(const int level, const int layout)
6688c2ecf20Sopenharmony_ci{
6698c2ecf20Sopenharmony_ci	struct raid_type *rtp = raid_types + ARRAY_SIZE(raid_types);
6708c2ecf20Sopenharmony_ci
6718c2ecf20Sopenharmony_ci	while (rtp-- > raid_types) {
6728c2ecf20Sopenharmony_ci		/* RAID10 special checks based on @layout flags/properties */
6738c2ecf20Sopenharmony_ci		if (rtp->level == level &&
6748c2ecf20Sopenharmony_ci		    (__got_raid10(rtp, layout) || rtp->algorithm == layout))
6758c2ecf20Sopenharmony_ci			return rtp;
6768c2ecf20Sopenharmony_ci	}
6778c2ecf20Sopenharmony_ci
6788c2ecf20Sopenharmony_ci	return NULL;
6798c2ecf20Sopenharmony_ci}
6808c2ecf20Sopenharmony_ci
6818c2ecf20Sopenharmony_ci/* Adjust rdev sectors */
6828c2ecf20Sopenharmony_cistatic void rs_set_rdev_sectors(struct raid_set *rs)
6838c2ecf20Sopenharmony_ci{
6848c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
6858c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ci	/*
6888c2ecf20Sopenharmony_ci	 * raid10 sets rdev->sector to the device size, which
6898c2ecf20Sopenharmony_ci	 * is unintended in case of out-of-place reshaping
6908c2ecf20Sopenharmony_ci	 */
6918c2ecf20Sopenharmony_ci	rdev_for_each(rdev, mddev)
6928c2ecf20Sopenharmony_ci		if (!test_bit(Journal, &rdev->flags))
6938c2ecf20Sopenharmony_ci			rdev->sectors = mddev->dev_sectors;
6948c2ecf20Sopenharmony_ci}
6958c2ecf20Sopenharmony_ci
6968c2ecf20Sopenharmony_ci/*
6978c2ecf20Sopenharmony_ci * Change bdev capacity of @rs in case of a disk add/remove reshape
6988c2ecf20Sopenharmony_ci */
6998c2ecf20Sopenharmony_cistatic void rs_set_capacity(struct raid_set *rs)
7008c2ecf20Sopenharmony_ci{
7018c2ecf20Sopenharmony_ci	struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table));
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_ci	set_capacity(gendisk, rs->md.array_sectors);
7048c2ecf20Sopenharmony_ci	revalidate_disk_size(gendisk, true);
7058c2ecf20Sopenharmony_ci}
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_ci/*
7088c2ecf20Sopenharmony_ci * Set the mddev properties in @rs to the current
7098c2ecf20Sopenharmony_ci * ones retrieved from the freshest superblock
7108c2ecf20Sopenharmony_ci */
7118c2ecf20Sopenharmony_cistatic void rs_set_cur(struct raid_set *rs)
7128c2ecf20Sopenharmony_ci{
7138c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
7148c2ecf20Sopenharmony_ci
7158c2ecf20Sopenharmony_ci	mddev->new_level = mddev->level;
7168c2ecf20Sopenharmony_ci	mddev->new_layout = mddev->layout;
7178c2ecf20Sopenharmony_ci	mddev->new_chunk_sectors = mddev->chunk_sectors;
7188c2ecf20Sopenharmony_ci}
7198c2ecf20Sopenharmony_ci
7208c2ecf20Sopenharmony_ci/*
7218c2ecf20Sopenharmony_ci * Set the mddev properties in @rs to the new
7228c2ecf20Sopenharmony_ci * ones requested by the ctr
7238c2ecf20Sopenharmony_ci */
7248c2ecf20Sopenharmony_cistatic void rs_set_new(struct raid_set *rs)
7258c2ecf20Sopenharmony_ci{
7268c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
7278c2ecf20Sopenharmony_ci
7288c2ecf20Sopenharmony_ci	mddev->level = mddev->new_level;
7298c2ecf20Sopenharmony_ci	mddev->layout = mddev->new_layout;
7308c2ecf20Sopenharmony_ci	mddev->chunk_sectors = mddev->new_chunk_sectors;
7318c2ecf20Sopenharmony_ci	mddev->raid_disks = rs->raid_disks;
7328c2ecf20Sopenharmony_ci	mddev->delta_disks = 0;
7338c2ecf20Sopenharmony_ci}
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_cistatic struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *raid_type,
7368c2ecf20Sopenharmony_ci				       unsigned int raid_devs)
7378c2ecf20Sopenharmony_ci{
7388c2ecf20Sopenharmony_ci	unsigned int i;
7398c2ecf20Sopenharmony_ci	struct raid_set *rs;
7408c2ecf20Sopenharmony_ci
7418c2ecf20Sopenharmony_ci	if (raid_devs <= raid_type->parity_devs) {
7428c2ecf20Sopenharmony_ci		ti->error = "Insufficient number of devices";
7438c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
7448c2ecf20Sopenharmony_ci	}
7458c2ecf20Sopenharmony_ci
7468c2ecf20Sopenharmony_ci	rs = kzalloc(struct_size(rs, dev, raid_devs), GFP_KERNEL);
7478c2ecf20Sopenharmony_ci	if (!rs) {
7488c2ecf20Sopenharmony_ci		ti->error = "Cannot allocate raid context";
7498c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
7508c2ecf20Sopenharmony_ci	}
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_ci	mddev_init(&rs->md);
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci	rs->raid_disks = raid_devs;
7558c2ecf20Sopenharmony_ci	rs->delta_disks = 0;
7568c2ecf20Sopenharmony_ci
7578c2ecf20Sopenharmony_ci	rs->ti = ti;
7588c2ecf20Sopenharmony_ci	rs->raid_type = raid_type;
7598c2ecf20Sopenharmony_ci	rs->stripe_cache_entries = 256;
7608c2ecf20Sopenharmony_ci	rs->md.raid_disks = raid_devs;
7618c2ecf20Sopenharmony_ci	rs->md.level = raid_type->level;
7628c2ecf20Sopenharmony_ci	rs->md.new_level = rs->md.level;
7638c2ecf20Sopenharmony_ci	rs->md.layout = raid_type->algorithm;
7648c2ecf20Sopenharmony_ci	rs->md.new_layout = rs->md.layout;
7658c2ecf20Sopenharmony_ci	rs->md.delta_disks = 0;
7668c2ecf20Sopenharmony_ci	rs->md.recovery_cp = MaxSector;
7678c2ecf20Sopenharmony_ci
7688c2ecf20Sopenharmony_ci	for (i = 0; i < raid_devs; i++)
7698c2ecf20Sopenharmony_ci		md_rdev_init(&rs->dev[i].rdev);
7708c2ecf20Sopenharmony_ci
7718c2ecf20Sopenharmony_ci	/*
7728c2ecf20Sopenharmony_ci	 * Remaining items to be initialized by further RAID params:
7738c2ecf20Sopenharmony_ci	 *  rs->md.persistent
7748c2ecf20Sopenharmony_ci	 *  rs->md.external
7758c2ecf20Sopenharmony_ci	 *  rs->md.chunk_sectors
7768c2ecf20Sopenharmony_ci	 *  rs->md.new_chunk_sectors
7778c2ecf20Sopenharmony_ci	 *  rs->md.dev_sectors
7788c2ecf20Sopenharmony_ci	 */
7798c2ecf20Sopenharmony_ci
7808c2ecf20Sopenharmony_ci	return rs;
7818c2ecf20Sopenharmony_ci}
7828c2ecf20Sopenharmony_ci
7838c2ecf20Sopenharmony_ci/* Free all @rs allocations */
7848c2ecf20Sopenharmony_cistatic void raid_set_free(struct raid_set *rs)
7858c2ecf20Sopenharmony_ci{
7868c2ecf20Sopenharmony_ci	int i;
7878c2ecf20Sopenharmony_ci
7888c2ecf20Sopenharmony_ci	if (rs->journal_dev.dev) {
7898c2ecf20Sopenharmony_ci		md_rdev_clear(&rs->journal_dev.rdev);
7908c2ecf20Sopenharmony_ci		dm_put_device(rs->ti, rs->journal_dev.dev);
7918c2ecf20Sopenharmony_ci	}
7928c2ecf20Sopenharmony_ci
7938c2ecf20Sopenharmony_ci	for (i = 0; i < rs->raid_disks; i++) {
7948c2ecf20Sopenharmony_ci		if (rs->dev[i].meta_dev)
7958c2ecf20Sopenharmony_ci			dm_put_device(rs->ti, rs->dev[i].meta_dev);
7968c2ecf20Sopenharmony_ci		md_rdev_clear(&rs->dev[i].rdev);
7978c2ecf20Sopenharmony_ci		if (rs->dev[i].data_dev)
7988c2ecf20Sopenharmony_ci			dm_put_device(rs->ti, rs->dev[i].data_dev);
7998c2ecf20Sopenharmony_ci	}
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci	kfree(rs);
8028c2ecf20Sopenharmony_ci}
8038c2ecf20Sopenharmony_ci
8048c2ecf20Sopenharmony_ci/*
8058c2ecf20Sopenharmony_ci * For every device we have two words
8068c2ecf20Sopenharmony_ci *  <meta_dev>: meta device name or '-' if missing
8078c2ecf20Sopenharmony_ci *  <data_dev>: data device name or '-' if missing
8088c2ecf20Sopenharmony_ci *
8098c2ecf20Sopenharmony_ci * The following are permitted:
8108c2ecf20Sopenharmony_ci *    - -
8118c2ecf20Sopenharmony_ci *    - <data_dev>
8128c2ecf20Sopenharmony_ci *    <meta_dev> <data_dev>
8138c2ecf20Sopenharmony_ci *
8148c2ecf20Sopenharmony_ci * The following is not allowed:
8158c2ecf20Sopenharmony_ci *    <meta_dev> -
8168c2ecf20Sopenharmony_ci *
8178c2ecf20Sopenharmony_ci * This code parses those words.  If there is a failure,
8188c2ecf20Sopenharmony_ci * the caller must use raid_set_free() to unwind the operations.
8198c2ecf20Sopenharmony_ci */
8208c2ecf20Sopenharmony_cistatic int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as)
8218c2ecf20Sopenharmony_ci{
8228c2ecf20Sopenharmony_ci	int i;
8238c2ecf20Sopenharmony_ci	int rebuild = 0;
8248c2ecf20Sopenharmony_ci	int metadata_available = 0;
8258c2ecf20Sopenharmony_ci	int r = 0;
8268c2ecf20Sopenharmony_ci	const char *arg;
8278c2ecf20Sopenharmony_ci
8288c2ecf20Sopenharmony_ci	/* Put off the number of raid devices argument to get to dev pairs */
8298c2ecf20Sopenharmony_ci	arg = dm_shift_arg(as);
8308c2ecf20Sopenharmony_ci	if (!arg)
8318c2ecf20Sopenharmony_ci		return -EINVAL;
8328c2ecf20Sopenharmony_ci
8338c2ecf20Sopenharmony_ci	for (i = 0; i < rs->raid_disks; i++) {
8348c2ecf20Sopenharmony_ci		rs->dev[i].rdev.raid_disk = i;
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_ci		rs->dev[i].meta_dev = NULL;
8378c2ecf20Sopenharmony_ci		rs->dev[i].data_dev = NULL;
8388c2ecf20Sopenharmony_ci
8398c2ecf20Sopenharmony_ci		/*
8408c2ecf20Sopenharmony_ci		 * There are no offsets initially.
8418c2ecf20Sopenharmony_ci		 * Out of place reshape will set them accordingly.
8428c2ecf20Sopenharmony_ci		 */
8438c2ecf20Sopenharmony_ci		rs->dev[i].rdev.data_offset = 0;
8448c2ecf20Sopenharmony_ci		rs->dev[i].rdev.new_data_offset = 0;
8458c2ecf20Sopenharmony_ci		rs->dev[i].rdev.mddev = &rs->md;
8468c2ecf20Sopenharmony_ci
8478c2ecf20Sopenharmony_ci		arg = dm_shift_arg(as);
8488c2ecf20Sopenharmony_ci		if (!arg)
8498c2ecf20Sopenharmony_ci			return -EINVAL;
8508c2ecf20Sopenharmony_ci
8518c2ecf20Sopenharmony_ci		if (strcmp(arg, "-")) {
8528c2ecf20Sopenharmony_ci			r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table),
8538c2ecf20Sopenharmony_ci					  &rs->dev[i].meta_dev);
8548c2ecf20Sopenharmony_ci			if (r) {
8558c2ecf20Sopenharmony_ci				rs->ti->error = "RAID metadata device lookup failure";
8568c2ecf20Sopenharmony_ci				return r;
8578c2ecf20Sopenharmony_ci			}
8588c2ecf20Sopenharmony_ci
8598c2ecf20Sopenharmony_ci			rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL);
8608c2ecf20Sopenharmony_ci			if (!rs->dev[i].rdev.sb_page) {
8618c2ecf20Sopenharmony_ci				rs->ti->error = "Failed to allocate superblock page";
8628c2ecf20Sopenharmony_ci				return -ENOMEM;
8638c2ecf20Sopenharmony_ci			}
8648c2ecf20Sopenharmony_ci		}
8658c2ecf20Sopenharmony_ci
8668c2ecf20Sopenharmony_ci		arg = dm_shift_arg(as);
8678c2ecf20Sopenharmony_ci		if (!arg)
8688c2ecf20Sopenharmony_ci			return -EINVAL;
8698c2ecf20Sopenharmony_ci
8708c2ecf20Sopenharmony_ci		if (!strcmp(arg, "-")) {
8718c2ecf20Sopenharmony_ci			if (!test_bit(In_sync, &rs->dev[i].rdev.flags) &&
8728c2ecf20Sopenharmony_ci			    (!rs->dev[i].rdev.recovery_offset)) {
8738c2ecf20Sopenharmony_ci				rs->ti->error = "Drive designated for rebuild not specified";
8748c2ecf20Sopenharmony_ci				return -EINVAL;
8758c2ecf20Sopenharmony_ci			}
8768c2ecf20Sopenharmony_ci
8778c2ecf20Sopenharmony_ci			if (rs->dev[i].meta_dev) {
8788c2ecf20Sopenharmony_ci				rs->ti->error = "No data device supplied with metadata device";
8798c2ecf20Sopenharmony_ci				return -EINVAL;
8808c2ecf20Sopenharmony_ci			}
8818c2ecf20Sopenharmony_ci
8828c2ecf20Sopenharmony_ci			continue;
8838c2ecf20Sopenharmony_ci		}
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci		r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table),
8868c2ecf20Sopenharmony_ci				  &rs->dev[i].data_dev);
8878c2ecf20Sopenharmony_ci		if (r) {
8888c2ecf20Sopenharmony_ci			rs->ti->error = "RAID device lookup failure";
8898c2ecf20Sopenharmony_ci			return r;
8908c2ecf20Sopenharmony_ci		}
8918c2ecf20Sopenharmony_ci
8928c2ecf20Sopenharmony_ci		if (rs->dev[i].meta_dev) {
8938c2ecf20Sopenharmony_ci			metadata_available = 1;
8948c2ecf20Sopenharmony_ci			rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev;
8958c2ecf20Sopenharmony_ci		}
8968c2ecf20Sopenharmony_ci		rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev;
8978c2ecf20Sopenharmony_ci		list_add_tail(&rs->dev[i].rdev.same_set, &rs->md.disks);
8988c2ecf20Sopenharmony_ci		if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
8998c2ecf20Sopenharmony_ci			rebuild++;
9008c2ecf20Sopenharmony_ci	}
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci	if (rs->journal_dev.dev)
9038c2ecf20Sopenharmony_ci		list_add_tail(&rs->journal_dev.rdev.same_set, &rs->md.disks);
9048c2ecf20Sopenharmony_ci
9058c2ecf20Sopenharmony_ci	if (metadata_available) {
9068c2ecf20Sopenharmony_ci		rs->md.external = 0;
9078c2ecf20Sopenharmony_ci		rs->md.persistent = 1;
9088c2ecf20Sopenharmony_ci		rs->md.major_version = 2;
9098c2ecf20Sopenharmony_ci	} else if (rebuild && !rs->md.recovery_cp) {
9108c2ecf20Sopenharmony_ci		/*
9118c2ecf20Sopenharmony_ci		 * Without metadata, we will not be able to tell if the array
9128c2ecf20Sopenharmony_ci		 * is in-sync or not - we must assume it is not.  Therefore,
9138c2ecf20Sopenharmony_ci		 * it is impossible to rebuild a drive.
9148c2ecf20Sopenharmony_ci		 *
9158c2ecf20Sopenharmony_ci		 * Even if there is metadata, the on-disk information may
9168c2ecf20Sopenharmony_ci		 * indicate that the array is not in-sync and it will then
9178c2ecf20Sopenharmony_ci		 * fail at that time.
9188c2ecf20Sopenharmony_ci		 *
9198c2ecf20Sopenharmony_ci		 * User could specify 'nosync' option if desperate.
9208c2ecf20Sopenharmony_ci		 */
9218c2ecf20Sopenharmony_ci		rs->ti->error = "Unable to rebuild drive while array is not in-sync";
9228c2ecf20Sopenharmony_ci		return -EINVAL;
9238c2ecf20Sopenharmony_ci	}
9248c2ecf20Sopenharmony_ci
9258c2ecf20Sopenharmony_ci	return 0;
9268c2ecf20Sopenharmony_ci}
9278c2ecf20Sopenharmony_ci
9288c2ecf20Sopenharmony_ci/*
9298c2ecf20Sopenharmony_ci * validate_region_size
9308c2ecf20Sopenharmony_ci * @rs
9318c2ecf20Sopenharmony_ci * @region_size:  region size in sectors.  If 0, pick a size (4MiB default).
9328c2ecf20Sopenharmony_ci *
9338c2ecf20Sopenharmony_ci * Set rs->md.bitmap_info.chunksize (which really refers to 'region size').
9348c2ecf20Sopenharmony_ci * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap.
9358c2ecf20Sopenharmony_ci *
9368c2ecf20Sopenharmony_ci * Returns: 0 on success, -EINVAL on failure.
9378c2ecf20Sopenharmony_ci */
9388c2ecf20Sopenharmony_cistatic int validate_region_size(struct raid_set *rs, unsigned long region_size)
9398c2ecf20Sopenharmony_ci{
9408c2ecf20Sopenharmony_ci	unsigned long min_region_size = rs->ti->len / (1 << 21);
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_ci	if (rs_is_raid0(rs))
9438c2ecf20Sopenharmony_ci		return 0;
9448c2ecf20Sopenharmony_ci
9458c2ecf20Sopenharmony_ci	if (!region_size) {
9468c2ecf20Sopenharmony_ci		/*
9478c2ecf20Sopenharmony_ci		 * Choose a reasonable default.	 All figures in sectors.
9488c2ecf20Sopenharmony_ci		 */
9498c2ecf20Sopenharmony_ci		if (min_region_size > (1 << 13)) {
9508c2ecf20Sopenharmony_ci			/* If not a power of 2, make it the next power of 2 */
9518c2ecf20Sopenharmony_ci			region_size = roundup_pow_of_two(min_region_size);
9528c2ecf20Sopenharmony_ci			DMINFO("Choosing default region size of %lu sectors",
9538c2ecf20Sopenharmony_ci			       region_size);
9548c2ecf20Sopenharmony_ci		} else {
9558c2ecf20Sopenharmony_ci			DMINFO("Choosing default region size of 4MiB");
9568c2ecf20Sopenharmony_ci			region_size = 1 << 13; /* sectors */
9578c2ecf20Sopenharmony_ci		}
9588c2ecf20Sopenharmony_ci	} else {
9598c2ecf20Sopenharmony_ci		/*
9608c2ecf20Sopenharmony_ci		 * Validate user-supplied value.
9618c2ecf20Sopenharmony_ci		 */
9628c2ecf20Sopenharmony_ci		if (region_size > rs->ti->len) {
9638c2ecf20Sopenharmony_ci			rs->ti->error = "Supplied region size is too large";
9648c2ecf20Sopenharmony_ci			return -EINVAL;
9658c2ecf20Sopenharmony_ci		}
9668c2ecf20Sopenharmony_ci
9678c2ecf20Sopenharmony_ci		if (region_size < min_region_size) {
9688c2ecf20Sopenharmony_ci			DMERR("Supplied region_size (%lu sectors) below minimum (%lu)",
9698c2ecf20Sopenharmony_ci			      region_size, min_region_size);
9708c2ecf20Sopenharmony_ci			rs->ti->error = "Supplied region size is too small";
9718c2ecf20Sopenharmony_ci			return -EINVAL;
9728c2ecf20Sopenharmony_ci		}
9738c2ecf20Sopenharmony_ci
9748c2ecf20Sopenharmony_ci		if (!is_power_of_2(region_size)) {
9758c2ecf20Sopenharmony_ci			rs->ti->error = "Region size is not a power of 2";
9768c2ecf20Sopenharmony_ci			return -EINVAL;
9778c2ecf20Sopenharmony_ci		}
9788c2ecf20Sopenharmony_ci
9798c2ecf20Sopenharmony_ci		if (region_size < rs->md.chunk_sectors) {
9808c2ecf20Sopenharmony_ci			rs->ti->error = "Region size is smaller than the chunk size";
9818c2ecf20Sopenharmony_ci			return -EINVAL;
9828c2ecf20Sopenharmony_ci		}
9838c2ecf20Sopenharmony_ci	}
9848c2ecf20Sopenharmony_ci
9858c2ecf20Sopenharmony_ci	/*
9868c2ecf20Sopenharmony_ci	 * Convert sectors to bytes.
9878c2ecf20Sopenharmony_ci	 */
9888c2ecf20Sopenharmony_ci	rs->md.bitmap_info.chunksize = to_bytes(region_size);
9898c2ecf20Sopenharmony_ci
9908c2ecf20Sopenharmony_ci	return 0;
9918c2ecf20Sopenharmony_ci}
9928c2ecf20Sopenharmony_ci
9938c2ecf20Sopenharmony_ci/*
9948c2ecf20Sopenharmony_ci * validate_raid_redundancy
9958c2ecf20Sopenharmony_ci * @rs
9968c2ecf20Sopenharmony_ci *
9978c2ecf20Sopenharmony_ci * Determine if there are enough devices in the array that haven't
9988c2ecf20Sopenharmony_ci * failed (or are being rebuilt) to form a usable array.
9998c2ecf20Sopenharmony_ci *
10008c2ecf20Sopenharmony_ci * Returns: 0 on success, -EINVAL on failure.
10018c2ecf20Sopenharmony_ci */
10028c2ecf20Sopenharmony_cistatic int validate_raid_redundancy(struct raid_set *rs)
10038c2ecf20Sopenharmony_ci{
10048c2ecf20Sopenharmony_ci	unsigned int i, rebuild_cnt = 0;
10058c2ecf20Sopenharmony_ci	unsigned int rebuilds_per_group = 0, copies, raid_disks;
10068c2ecf20Sopenharmony_ci	unsigned int group_size, last_group_start;
10078c2ecf20Sopenharmony_ci
10088c2ecf20Sopenharmony_ci	for (i = 0; i < rs->raid_disks; i++)
10098c2ecf20Sopenharmony_ci		if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) &&
10108c2ecf20Sopenharmony_ci		    ((!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
10118c2ecf20Sopenharmony_ci		      !rs->dev[i].rdev.sb_page)))
10128c2ecf20Sopenharmony_ci			rebuild_cnt++;
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_ci	switch (rs->md.level) {
10158c2ecf20Sopenharmony_ci	case 0:
10168c2ecf20Sopenharmony_ci		break;
10178c2ecf20Sopenharmony_ci	case 1:
10188c2ecf20Sopenharmony_ci		if (rebuild_cnt >= rs->md.raid_disks)
10198c2ecf20Sopenharmony_ci			goto too_many;
10208c2ecf20Sopenharmony_ci		break;
10218c2ecf20Sopenharmony_ci	case 4:
10228c2ecf20Sopenharmony_ci	case 5:
10238c2ecf20Sopenharmony_ci	case 6:
10248c2ecf20Sopenharmony_ci		if (rebuild_cnt > rs->raid_type->parity_devs)
10258c2ecf20Sopenharmony_ci			goto too_many;
10268c2ecf20Sopenharmony_ci		break;
10278c2ecf20Sopenharmony_ci	case 10:
10288c2ecf20Sopenharmony_ci		copies = raid10_md_layout_to_copies(rs->md.new_layout);
10298c2ecf20Sopenharmony_ci		if (copies < 2) {
10308c2ecf20Sopenharmony_ci			DMERR("Bogus raid10 data copies < 2!");
10318c2ecf20Sopenharmony_ci			return -EINVAL;
10328c2ecf20Sopenharmony_ci		}
10338c2ecf20Sopenharmony_ci
10348c2ecf20Sopenharmony_ci		if (rebuild_cnt < copies)
10358c2ecf20Sopenharmony_ci			break;
10368c2ecf20Sopenharmony_ci
10378c2ecf20Sopenharmony_ci		/*
10388c2ecf20Sopenharmony_ci		 * It is possible to have a higher rebuild count for RAID10,
10398c2ecf20Sopenharmony_ci		 * as long as the failed devices occur in different mirror
10408c2ecf20Sopenharmony_ci		 * groups (i.e. different stripes).
10418c2ecf20Sopenharmony_ci		 *
10428c2ecf20Sopenharmony_ci		 * When checking "near" format, make sure no adjacent devices
10438c2ecf20Sopenharmony_ci		 * have failed beyond what can be handled.  In addition to the
10448c2ecf20Sopenharmony_ci		 * simple case where the number of devices is a multiple of the
10458c2ecf20Sopenharmony_ci		 * number of copies, we must also handle cases where the number
10468c2ecf20Sopenharmony_ci		 * of devices is not a multiple of the number of copies.
10478c2ecf20Sopenharmony_ci		 * E.g.	   dev1 dev2 dev3 dev4 dev5
10488c2ecf20Sopenharmony_ci		 *	    A	 A    B	   B	C
10498c2ecf20Sopenharmony_ci		 *	    C	 D    D	   E	E
10508c2ecf20Sopenharmony_ci		 */
10518c2ecf20Sopenharmony_ci		raid_disks = min(rs->raid_disks, rs->md.raid_disks);
10528c2ecf20Sopenharmony_ci		if (__is_raid10_near(rs->md.new_layout)) {
10538c2ecf20Sopenharmony_ci			for (i = 0; i < raid_disks; i++) {
10548c2ecf20Sopenharmony_ci				if (!(i % copies))
10558c2ecf20Sopenharmony_ci					rebuilds_per_group = 0;
10568c2ecf20Sopenharmony_ci				if ((!rs->dev[i].rdev.sb_page ||
10578c2ecf20Sopenharmony_ci				    !test_bit(In_sync, &rs->dev[i].rdev.flags)) &&
10588c2ecf20Sopenharmony_ci				    (++rebuilds_per_group >= copies))
10598c2ecf20Sopenharmony_ci					goto too_many;
10608c2ecf20Sopenharmony_ci			}
10618c2ecf20Sopenharmony_ci			break;
10628c2ecf20Sopenharmony_ci		}
10638c2ecf20Sopenharmony_ci
10648c2ecf20Sopenharmony_ci		/*
10658c2ecf20Sopenharmony_ci		 * When checking "far" and "offset" formats, we need to ensure
10668c2ecf20Sopenharmony_ci		 * that the device that holds its copy is not also dead or
10678c2ecf20Sopenharmony_ci		 * being rebuilt.  (Note that "far" and "offset" formats only
10688c2ecf20Sopenharmony_ci		 * support two copies right now.  These formats also only ever
10698c2ecf20Sopenharmony_ci		 * use the 'use_far_sets' variant.)
10708c2ecf20Sopenharmony_ci		 *
10718c2ecf20Sopenharmony_ci		 * This check is somewhat complicated by the need to account
10728c2ecf20Sopenharmony_ci		 * for arrays that are not a multiple of (far) copies.	This
10738c2ecf20Sopenharmony_ci		 * results in the need to treat the last (potentially larger)
10748c2ecf20Sopenharmony_ci		 * set differently.
10758c2ecf20Sopenharmony_ci		 */
10768c2ecf20Sopenharmony_ci		group_size = (raid_disks / copies);
10778c2ecf20Sopenharmony_ci		last_group_start = (raid_disks / group_size) - 1;
10788c2ecf20Sopenharmony_ci		last_group_start *= group_size;
10798c2ecf20Sopenharmony_ci		for (i = 0; i < raid_disks; i++) {
10808c2ecf20Sopenharmony_ci			if (!(i % copies) && !(i > last_group_start))
10818c2ecf20Sopenharmony_ci				rebuilds_per_group = 0;
10828c2ecf20Sopenharmony_ci			if ((!rs->dev[i].rdev.sb_page ||
10838c2ecf20Sopenharmony_ci			     !test_bit(In_sync, &rs->dev[i].rdev.flags)) &&
10848c2ecf20Sopenharmony_ci			    (++rebuilds_per_group >= copies))
10858c2ecf20Sopenharmony_ci					goto too_many;
10868c2ecf20Sopenharmony_ci		}
10878c2ecf20Sopenharmony_ci		break;
10888c2ecf20Sopenharmony_ci	default:
10898c2ecf20Sopenharmony_ci		if (rebuild_cnt)
10908c2ecf20Sopenharmony_ci			return -EINVAL;
10918c2ecf20Sopenharmony_ci	}
10928c2ecf20Sopenharmony_ci
10938c2ecf20Sopenharmony_ci	return 0;
10948c2ecf20Sopenharmony_ci
10958c2ecf20Sopenharmony_citoo_many:
10968c2ecf20Sopenharmony_ci	return -EINVAL;
10978c2ecf20Sopenharmony_ci}
10988c2ecf20Sopenharmony_ci
10998c2ecf20Sopenharmony_ci/*
11008c2ecf20Sopenharmony_ci * Possible arguments are...
11018c2ecf20Sopenharmony_ci *	<chunk_size> [optional_args]
11028c2ecf20Sopenharmony_ci *
11038c2ecf20Sopenharmony_ci * Argument definitions
11048c2ecf20Sopenharmony_ci *    <chunk_size>			The number of sectors per disk that
11058c2ecf20Sopenharmony_ci *					will form the "stripe"
11068c2ecf20Sopenharmony_ci *    [[no]sync]			Force or prevent recovery of the
11078c2ecf20Sopenharmony_ci *					entire array
11088c2ecf20Sopenharmony_ci *    [rebuild <idx>]			Rebuild the drive indicated by the index
11098c2ecf20Sopenharmony_ci *    [daemon_sleep <ms>]		Time between bitmap daemon work to
11108c2ecf20Sopenharmony_ci *					clear bits
11118c2ecf20Sopenharmony_ci *    [min_recovery_rate <kB/sec/disk>]	Throttle RAID initialization
11128c2ecf20Sopenharmony_ci *    [max_recovery_rate <kB/sec/disk>]	Throttle RAID initialization
11138c2ecf20Sopenharmony_ci *    [write_mostly <idx>]		Indicate a write mostly drive via index
11148c2ecf20Sopenharmony_ci *    [max_write_behind <sectors>]	See '-write-behind=' (man mdadm)
11158c2ecf20Sopenharmony_ci *    [stripe_cache <sectors>]		Stripe cache size for higher RAIDs
11168c2ecf20Sopenharmony_ci *    [region_size <sectors>]		Defines granularity of bitmap
11178c2ecf20Sopenharmony_ci *    [journal_dev <dev>]		raid4/5/6 journaling deviice
11188c2ecf20Sopenharmony_ci *    					(i.e. write hole closing log)
11198c2ecf20Sopenharmony_ci *
11208c2ecf20Sopenharmony_ci * RAID10-only options:
11218c2ecf20Sopenharmony_ci *    [raid10_copies <# copies>]	Number of copies.  (Default: 2)
11228c2ecf20Sopenharmony_ci *    [raid10_format <near|far|offset>] Layout algorithm.  (Default: near)
11238c2ecf20Sopenharmony_ci */
11248c2ecf20Sopenharmony_cistatic int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
11258c2ecf20Sopenharmony_ci			     unsigned int num_raid_params)
11268c2ecf20Sopenharmony_ci{
11278c2ecf20Sopenharmony_ci	int value, raid10_format = ALGORITHM_RAID10_DEFAULT;
11288c2ecf20Sopenharmony_ci	unsigned int raid10_copies = 2;
11298c2ecf20Sopenharmony_ci	unsigned int i, write_mostly = 0;
11308c2ecf20Sopenharmony_ci	unsigned int region_size = 0;
11318c2ecf20Sopenharmony_ci	sector_t max_io_len;
11328c2ecf20Sopenharmony_ci	const char *arg, *key;
11338c2ecf20Sopenharmony_ci	struct raid_dev *rd;
11348c2ecf20Sopenharmony_ci	struct raid_type *rt = rs->raid_type;
11358c2ecf20Sopenharmony_ci
11368c2ecf20Sopenharmony_ci	arg = dm_shift_arg(as);
11378c2ecf20Sopenharmony_ci	num_raid_params--; /* Account for chunk_size argument */
11388c2ecf20Sopenharmony_ci
11398c2ecf20Sopenharmony_ci	if (kstrtoint(arg, 10, &value) < 0) {
11408c2ecf20Sopenharmony_ci		rs->ti->error = "Bad numerical argument given for chunk_size";
11418c2ecf20Sopenharmony_ci		return -EINVAL;
11428c2ecf20Sopenharmony_ci	}
11438c2ecf20Sopenharmony_ci
11448c2ecf20Sopenharmony_ci	/*
11458c2ecf20Sopenharmony_ci	 * First, parse the in-order required arguments
11468c2ecf20Sopenharmony_ci	 * "chunk_size" is the only argument of this type.
11478c2ecf20Sopenharmony_ci	 */
11488c2ecf20Sopenharmony_ci	if (rt_is_raid1(rt)) {
11498c2ecf20Sopenharmony_ci		if (value)
11508c2ecf20Sopenharmony_ci			DMERR("Ignoring chunk size parameter for RAID 1");
11518c2ecf20Sopenharmony_ci		value = 0;
11528c2ecf20Sopenharmony_ci	} else if (!is_power_of_2(value)) {
11538c2ecf20Sopenharmony_ci		rs->ti->error = "Chunk size must be a power of 2";
11548c2ecf20Sopenharmony_ci		return -EINVAL;
11558c2ecf20Sopenharmony_ci	} else if (value < 8) {
11568c2ecf20Sopenharmony_ci		rs->ti->error = "Chunk size value is too small";
11578c2ecf20Sopenharmony_ci		return -EINVAL;
11588c2ecf20Sopenharmony_ci	}
11598c2ecf20Sopenharmony_ci
11608c2ecf20Sopenharmony_ci	rs->md.new_chunk_sectors = rs->md.chunk_sectors = value;
11618c2ecf20Sopenharmony_ci
11628c2ecf20Sopenharmony_ci	/*
11638c2ecf20Sopenharmony_ci	 * We set each individual device as In_sync with a completed
11648c2ecf20Sopenharmony_ci	 * 'recovery_offset'.  If there has been a device failure or
11658c2ecf20Sopenharmony_ci	 * replacement then one of the following cases applies:
11668c2ecf20Sopenharmony_ci	 *
11678c2ecf20Sopenharmony_ci	 *   1) User specifies 'rebuild'.
11688c2ecf20Sopenharmony_ci	 *	- Device is reset when param is read.
11698c2ecf20Sopenharmony_ci	 *   2) A new device is supplied.
11708c2ecf20Sopenharmony_ci	 *	- No matching superblock found, resets device.
11718c2ecf20Sopenharmony_ci	 *   3) Device failure was transient and returns on reload.
11728c2ecf20Sopenharmony_ci	 *	- Failure noticed, resets device for bitmap replay.
11738c2ecf20Sopenharmony_ci	 *   4) Device hadn't completed recovery after previous failure.
11748c2ecf20Sopenharmony_ci	 *	- Superblock is read and overrides recovery_offset.
11758c2ecf20Sopenharmony_ci	 *
11768c2ecf20Sopenharmony_ci	 * What is found in the superblocks of the devices is always
11778c2ecf20Sopenharmony_ci	 * authoritative, unless 'rebuild' or '[no]sync' was specified.
11788c2ecf20Sopenharmony_ci	 */
11798c2ecf20Sopenharmony_ci	for (i = 0; i < rs->raid_disks; i++) {
11808c2ecf20Sopenharmony_ci		set_bit(In_sync, &rs->dev[i].rdev.flags);
11818c2ecf20Sopenharmony_ci		rs->dev[i].rdev.recovery_offset = MaxSector;
11828c2ecf20Sopenharmony_ci	}
11838c2ecf20Sopenharmony_ci
11848c2ecf20Sopenharmony_ci	/*
11858c2ecf20Sopenharmony_ci	 * Second, parse the unordered optional arguments
11868c2ecf20Sopenharmony_ci	 */
11878c2ecf20Sopenharmony_ci	for (i = 0; i < num_raid_params; i++) {
11888c2ecf20Sopenharmony_ci		key = dm_shift_arg(as);
11898c2ecf20Sopenharmony_ci		if (!key) {
11908c2ecf20Sopenharmony_ci			rs->ti->error = "Not enough raid parameters given";
11918c2ecf20Sopenharmony_ci			return -EINVAL;
11928c2ecf20Sopenharmony_ci		}
11938c2ecf20Sopenharmony_ci
11948c2ecf20Sopenharmony_ci		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC))) {
11958c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) {
11968c2ecf20Sopenharmony_ci				rs->ti->error = "Only one 'nosync' argument allowed";
11978c2ecf20Sopenharmony_ci				return -EINVAL;
11988c2ecf20Sopenharmony_ci			}
11998c2ecf20Sopenharmony_ci			continue;
12008c2ecf20Sopenharmony_ci		}
12018c2ecf20Sopenharmony_ci		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_SYNC))) {
12028c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) {
12038c2ecf20Sopenharmony_ci				rs->ti->error = "Only one 'sync' argument allowed";
12048c2ecf20Sopenharmony_ci				return -EINVAL;
12058c2ecf20Sopenharmony_ci			}
12068c2ecf20Sopenharmony_ci			continue;
12078c2ecf20Sopenharmony_ci		}
12088c2ecf20Sopenharmony_ci		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_USE_NEAR_SETS))) {
12098c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags)) {
12108c2ecf20Sopenharmony_ci				rs->ti->error = "Only one 'raid10_use_new_sets' argument allowed";
12118c2ecf20Sopenharmony_ci				return -EINVAL;
12128c2ecf20Sopenharmony_ci			}
12138c2ecf20Sopenharmony_ci			continue;
12148c2ecf20Sopenharmony_ci		}
12158c2ecf20Sopenharmony_ci
12168c2ecf20Sopenharmony_ci		arg = dm_shift_arg(as);
12178c2ecf20Sopenharmony_ci		i++; /* Account for the argument pairs */
12188c2ecf20Sopenharmony_ci		if (!arg) {
12198c2ecf20Sopenharmony_ci			rs->ti->error = "Wrong number of raid parameters given";
12208c2ecf20Sopenharmony_ci			return -EINVAL;
12218c2ecf20Sopenharmony_ci		}
12228c2ecf20Sopenharmony_ci
12238c2ecf20Sopenharmony_ci		/*
12248c2ecf20Sopenharmony_ci		 * Parameters that take a string value are checked here.
12258c2ecf20Sopenharmony_ci		 */
12268c2ecf20Sopenharmony_ci		/* "raid10_format {near|offset|far} */
12278c2ecf20Sopenharmony_ci		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT))) {
12288c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) {
12298c2ecf20Sopenharmony_ci				rs->ti->error = "Only one 'raid10_format' argument pair allowed";
12308c2ecf20Sopenharmony_ci				return -EINVAL;
12318c2ecf20Sopenharmony_ci			}
12328c2ecf20Sopenharmony_ci			if (!rt_is_raid10(rt)) {
12338c2ecf20Sopenharmony_ci				rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type";
12348c2ecf20Sopenharmony_ci				return -EINVAL;
12358c2ecf20Sopenharmony_ci			}
12368c2ecf20Sopenharmony_ci			raid10_format = raid10_name_to_format(arg);
12378c2ecf20Sopenharmony_ci			if (raid10_format < 0) {
12388c2ecf20Sopenharmony_ci				rs->ti->error = "Invalid 'raid10_format' value given";
12398c2ecf20Sopenharmony_ci				return raid10_format;
12408c2ecf20Sopenharmony_ci			}
12418c2ecf20Sopenharmony_ci			continue;
12428c2ecf20Sopenharmony_ci		}
12438c2ecf20Sopenharmony_ci
12448c2ecf20Sopenharmony_ci		/* "journal_dev <dev>" */
12458c2ecf20Sopenharmony_ci		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV))) {
12468c2ecf20Sopenharmony_ci			int r;
12478c2ecf20Sopenharmony_ci			struct md_rdev *jdev;
12488c2ecf20Sopenharmony_ci
12498c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
12508c2ecf20Sopenharmony_ci				rs->ti->error = "Only one raid4/5/6 set journaling device allowed";
12518c2ecf20Sopenharmony_ci				return -EINVAL;
12528c2ecf20Sopenharmony_ci			}
12538c2ecf20Sopenharmony_ci			if (!rt_is_raid456(rt)) {
12548c2ecf20Sopenharmony_ci				rs->ti->error = "'journal_dev' is an invalid parameter for this RAID type";
12558c2ecf20Sopenharmony_ci				return -EINVAL;
12568c2ecf20Sopenharmony_ci			}
12578c2ecf20Sopenharmony_ci			r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table),
12588c2ecf20Sopenharmony_ci					  &rs->journal_dev.dev);
12598c2ecf20Sopenharmony_ci			if (r) {
12608c2ecf20Sopenharmony_ci				rs->ti->error = "raid4/5/6 journal device lookup failure";
12618c2ecf20Sopenharmony_ci				return r;
12628c2ecf20Sopenharmony_ci			}
12638c2ecf20Sopenharmony_ci			jdev = &rs->journal_dev.rdev;
12648c2ecf20Sopenharmony_ci			md_rdev_init(jdev);
12658c2ecf20Sopenharmony_ci			jdev->mddev = &rs->md;
12668c2ecf20Sopenharmony_ci			jdev->bdev = rs->journal_dev.dev->bdev;
12678c2ecf20Sopenharmony_ci			jdev->sectors = to_sector(i_size_read(jdev->bdev->bd_inode));
12688c2ecf20Sopenharmony_ci			if (jdev->sectors < MIN_RAID456_JOURNAL_SPACE) {
12698c2ecf20Sopenharmony_ci				rs->ti->error = "No space for raid4/5/6 journal";
12708c2ecf20Sopenharmony_ci				return -ENOSPC;
12718c2ecf20Sopenharmony_ci			}
12728c2ecf20Sopenharmony_ci			rs->journal_dev.mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
12738c2ecf20Sopenharmony_ci			set_bit(Journal, &jdev->flags);
12748c2ecf20Sopenharmony_ci			continue;
12758c2ecf20Sopenharmony_ci		}
12768c2ecf20Sopenharmony_ci
12778c2ecf20Sopenharmony_ci		/* "journal_mode <mode>" ("journal_dev" mandatory!) */
12788c2ecf20Sopenharmony_ci		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_MODE))) {
12798c2ecf20Sopenharmony_ci			int r;
12808c2ecf20Sopenharmony_ci
12818c2ecf20Sopenharmony_ci			if (!test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
12828c2ecf20Sopenharmony_ci				rs->ti->error = "raid4/5/6 'journal_mode' is invalid without 'journal_dev'";
12838c2ecf20Sopenharmony_ci				return -EINVAL;
12848c2ecf20Sopenharmony_ci			}
12858c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
12868c2ecf20Sopenharmony_ci				rs->ti->error = "Only one raid4/5/6 'journal_mode' argument allowed";
12878c2ecf20Sopenharmony_ci				return -EINVAL;
12888c2ecf20Sopenharmony_ci			}
12898c2ecf20Sopenharmony_ci			r = dm_raid_journal_mode_to_md(arg);
12908c2ecf20Sopenharmony_ci			if (r < 0) {
12918c2ecf20Sopenharmony_ci				rs->ti->error = "Invalid 'journal_mode' argument";
12928c2ecf20Sopenharmony_ci				return r;
12938c2ecf20Sopenharmony_ci			}
12948c2ecf20Sopenharmony_ci			rs->journal_dev.mode = r;
12958c2ecf20Sopenharmony_ci			continue;
12968c2ecf20Sopenharmony_ci		}
12978c2ecf20Sopenharmony_ci
12988c2ecf20Sopenharmony_ci		/*
12998c2ecf20Sopenharmony_ci		 * Parameters with number values from here on.
13008c2ecf20Sopenharmony_ci		 */
13018c2ecf20Sopenharmony_ci		if (kstrtoint(arg, 10, &value) < 0) {
13028c2ecf20Sopenharmony_ci			rs->ti->error = "Bad numerical argument given in raid params";
13038c2ecf20Sopenharmony_ci			return -EINVAL;
13048c2ecf20Sopenharmony_ci		}
13058c2ecf20Sopenharmony_ci
13068c2ecf20Sopenharmony_ci		if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD))) {
13078c2ecf20Sopenharmony_ci			/*
13088c2ecf20Sopenharmony_ci			 * "rebuild" is being passed in by userspace to provide
13098c2ecf20Sopenharmony_ci			 * indexes of replaced devices and to set up additional
13108c2ecf20Sopenharmony_ci			 * devices on raid level takeover.
13118c2ecf20Sopenharmony_ci			 */
13128c2ecf20Sopenharmony_ci			if (!__within_range(value, 0, rs->raid_disks - 1)) {
13138c2ecf20Sopenharmony_ci				rs->ti->error = "Invalid rebuild index given";
13148c2ecf20Sopenharmony_ci				return -EINVAL;
13158c2ecf20Sopenharmony_ci			}
13168c2ecf20Sopenharmony_ci
13178c2ecf20Sopenharmony_ci			if (test_and_set_bit(value, (void *) rs->rebuild_disks)) {
13188c2ecf20Sopenharmony_ci				rs->ti->error = "rebuild for this index already given";
13198c2ecf20Sopenharmony_ci				return -EINVAL;
13208c2ecf20Sopenharmony_ci			}
13218c2ecf20Sopenharmony_ci
13228c2ecf20Sopenharmony_ci			rd = rs->dev + value;
13238c2ecf20Sopenharmony_ci			clear_bit(In_sync, &rd->rdev.flags);
13248c2ecf20Sopenharmony_ci			clear_bit(Faulty, &rd->rdev.flags);
13258c2ecf20Sopenharmony_ci			rd->rdev.recovery_offset = 0;
13268c2ecf20Sopenharmony_ci			set_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags);
13278c2ecf20Sopenharmony_ci		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_WRITE_MOSTLY))) {
13288c2ecf20Sopenharmony_ci			if (!rt_is_raid1(rt)) {
13298c2ecf20Sopenharmony_ci				rs->ti->error = "write_mostly option is only valid for RAID1";
13308c2ecf20Sopenharmony_ci				return -EINVAL;
13318c2ecf20Sopenharmony_ci			}
13328c2ecf20Sopenharmony_ci
13338c2ecf20Sopenharmony_ci			if (!__within_range(value, 0, rs->md.raid_disks - 1)) {
13348c2ecf20Sopenharmony_ci				rs->ti->error = "Invalid write_mostly index given";
13358c2ecf20Sopenharmony_ci				return -EINVAL;
13368c2ecf20Sopenharmony_ci			}
13378c2ecf20Sopenharmony_ci
13388c2ecf20Sopenharmony_ci			write_mostly++;
13398c2ecf20Sopenharmony_ci			set_bit(WriteMostly, &rs->dev[value].rdev.flags);
13408c2ecf20Sopenharmony_ci			set_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags);
13418c2ecf20Sopenharmony_ci		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MAX_WRITE_BEHIND))) {
13428c2ecf20Sopenharmony_ci			if (!rt_is_raid1(rt)) {
13438c2ecf20Sopenharmony_ci				rs->ti->error = "max_write_behind option is only valid for RAID1";
13448c2ecf20Sopenharmony_ci				return -EINVAL;
13458c2ecf20Sopenharmony_ci			}
13468c2ecf20Sopenharmony_ci
13478c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_MAX_WRITE_BEHIND, &rs->ctr_flags)) {
13488c2ecf20Sopenharmony_ci				rs->ti->error = "Only one max_write_behind argument pair allowed";
13498c2ecf20Sopenharmony_ci				return -EINVAL;
13508c2ecf20Sopenharmony_ci			}
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci			/*
13538c2ecf20Sopenharmony_ci			 * In device-mapper, we specify things in sectors, but
13548c2ecf20Sopenharmony_ci			 * MD records this value in kB
13558c2ecf20Sopenharmony_ci			 */
13568c2ecf20Sopenharmony_ci			if (value < 0 || value / 2 > COUNTER_MAX) {
13578c2ecf20Sopenharmony_ci				rs->ti->error = "Max write-behind limit out of range";
13588c2ecf20Sopenharmony_ci				return -EINVAL;
13598c2ecf20Sopenharmony_ci			}
13608c2ecf20Sopenharmony_ci
13618c2ecf20Sopenharmony_ci			rs->md.bitmap_info.max_write_behind = value / 2;
13628c2ecf20Sopenharmony_ci		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP))) {
13638c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) {
13648c2ecf20Sopenharmony_ci				rs->ti->error = "Only one daemon_sleep argument pair allowed";
13658c2ecf20Sopenharmony_ci				return -EINVAL;
13668c2ecf20Sopenharmony_ci			}
13678c2ecf20Sopenharmony_ci			if (value < 0) {
13688c2ecf20Sopenharmony_ci				rs->ti->error = "daemon sleep period out of range";
13698c2ecf20Sopenharmony_ci				return -EINVAL;
13708c2ecf20Sopenharmony_ci			}
13718c2ecf20Sopenharmony_ci			rs->md.bitmap_info.daemon_sleep = value;
13728c2ecf20Sopenharmony_ci		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET))) {
13738c2ecf20Sopenharmony_ci			/* Userspace passes new data_offset after having extended the the data image LV */
13748c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) {
13758c2ecf20Sopenharmony_ci				rs->ti->error = "Only one data_offset argument pair allowed";
13768c2ecf20Sopenharmony_ci				return -EINVAL;
13778c2ecf20Sopenharmony_ci			}
13788c2ecf20Sopenharmony_ci			/* Ensure sensible data offset */
13798c2ecf20Sopenharmony_ci			if (value < 0 ||
13808c2ecf20Sopenharmony_ci			    (value && (value < MIN_FREE_RESHAPE_SPACE || value % to_sector(PAGE_SIZE)))) {
13818c2ecf20Sopenharmony_ci				rs->ti->error = "Bogus data_offset value";
13828c2ecf20Sopenharmony_ci				return -EINVAL;
13838c2ecf20Sopenharmony_ci			}
13848c2ecf20Sopenharmony_ci			rs->data_offset = value;
13858c2ecf20Sopenharmony_ci		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DELTA_DISKS))) {
13868c2ecf20Sopenharmony_ci			/* Define the +/-# of disks to add to/remove from the given raid set */
13878c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags)) {
13888c2ecf20Sopenharmony_ci				rs->ti->error = "Only one delta_disks argument pair allowed";
13898c2ecf20Sopenharmony_ci				return -EINVAL;
13908c2ecf20Sopenharmony_ci			}
13918c2ecf20Sopenharmony_ci			/* Ensure MAX_RAID_DEVICES and raid type minimal_devs! */
13928c2ecf20Sopenharmony_ci			if (!__within_range(abs(value), 1, MAX_RAID_DEVICES - rt->minimal_devs)) {
13938c2ecf20Sopenharmony_ci				rs->ti->error = "Too many delta_disk requested";
13948c2ecf20Sopenharmony_ci				return -EINVAL;
13958c2ecf20Sopenharmony_ci			}
13968c2ecf20Sopenharmony_ci
13978c2ecf20Sopenharmony_ci			rs->delta_disks = value;
13988c2ecf20Sopenharmony_ci		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_STRIPE_CACHE))) {
13998c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags)) {
14008c2ecf20Sopenharmony_ci				rs->ti->error = "Only one stripe_cache argument pair allowed";
14018c2ecf20Sopenharmony_ci				return -EINVAL;
14028c2ecf20Sopenharmony_ci			}
14038c2ecf20Sopenharmony_ci
14048c2ecf20Sopenharmony_ci			if (!rt_is_raid456(rt)) {
14058c2ecf20Sopenharmony_ci				rs->ti->error = "Inappropriate argument: stripe_cache";
14068c2ecf20Sopenharmony_ci				return -EINVAL;
14078c2ecf20Sopenharmony_ci			}
14088c2ecf20Sopenharmony_ci
14098c2ecf20Sopenharmony_ci			if (value < 0) {
14108c2ecf20Sopenharmony_ci				rs->ti->error = "Bogus stripe cache entries value";
14118c2ecf20Sopenharmony_ci				return -EINVAL;
14128c2ecf20Sopenharmony_ci			}
14138c2ecf20Sopenharmony_ci			rs->stripe_cache_entries = value;
14148c2ecf20Sopenharmony_ci		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE))) {
14158c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) {
14168c2ecf20Sopenharmony_ci				rs->ti->error = "Only one min_recovery_rate argument pair allowed";
14178c2ecf20Sopenharmony_ci				return -EINVAL;
14188c2ecf20Sopenharmony_ci			}
14198c2ecf20Sopenharmony_ci
14208c2ecf20Sopenharmony_ci			if (value < 0) {
14218c2ecf20Sopenharmony_ci				rs->ti->error = "min_recovery_rate out of range";
14228c2ecf20Sopenharmony_ci				return -EINVAL;
14238c2ecf20Sopenharmony_ci			}
14248c2ecf20Sopenharmony_ci			rs->md.sync_speed_min = value;
14258c2ecf20Sopenharmony_ci		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE))) {
14268c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) {
14278c2ecf20Sopenharmony_ci				rs->ti->error = "Only one max_recovery_rate argument pair allowed";
14288c2ecf20Sopenharmony_ci				return -EINVAL;
14298c2ecf20Sopenharmony_ci			}
14308c2ecf20Sopenharmony_ci
14318c2ecf20Sopenharmony_ci			if (value < 0) {
14328c2ecf20Sopenharmony_ci				rs->ti->error = "max_recovery_rate out of range";
14338c2ecf20Sopenharmony_ci				return -EINVAL;
14348c2ecf20Sopenharmony_ci			}
14358c2ecf20Sopenharmony_ci			rs->md.sync_speed_max = value;
14368c2ecf20Sopenharmony_ci		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE))) {
14378c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags)) {
14388c2ecf20Sopenharmony_ci				rs->ti->error = "Only one region_size argument pair allowed";
14398c2ecf20Sopenharmony_ci				return -EINVAL;
14408c2ecf20Sopenharmony_ci			}
14418c2ecf20Sopenharmony_ci
14428c2ecf20Sopenharmony_ci			region_size = value;
14438c2ecf20Sopenharmony_ci			rs->requested_bitmap_chunk_sectors = value;
14448c2ecf20Sopenharmony_ci		} else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_COPIES))) {
14458c2ecf20Sopenharmony_ci			if (test_and_set_bit(__CTR_FLAG_RAID10_COPIES, &rs->ctr_flags)) {
14468c2ecf20Sopenharmony_ci				rs->ti->error = "Only one raid10_copies argument pair allowed";
14478c2ecf20Sopenharmony_ci				return -EINVAL;
14488c2ecf20Sopenharmony_ci			}
14498c2ecf20Sopenharmony_ci
14508c2ecf20Sopenharmony_ci			if (!__within_range(value, 2, rs->md.raid_disks)) {
14518c2ecf20Sopenharmony_ci				rs->ti->error = "Bad value for 'raid10_copies'";
14528c2ecf20Sopenharmony_ci				return -EINVAL;
14538c2ecf20Sopenharmony_ci			}
14548c2ecf20Sopenharmony_ci
14558c2ecf20Sopenharmony_ci			raid10_copies = value;
14568c2ecf20Sopenharmony_ci		} else {
14578c2ecf20Sopenharmony_ci			DMERR("Unable to parse RAID parameter: %s", key);
14588c2ecf20Sopenharmony_ci			rs->ti->error = "Unable to parse RAID parameter";
14598c2ecf20Sopenharmony_ci			return -EINVAL;
14608c2ecf20Sopenharmony_ci		}
14618c2ecf20Sopenharmony_ci	}
14628c2ecf20Sopenharmony_ci
14638c2ecf20Sopenharmony_ci	if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) &&
14648c2ecf20Sopenharmony_ci	    test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) {
14658c2ecf20Sopenharmony_ci		rs->ti->error = "sync and nosync are mutually exclusive";
14668c2ecf20Sopenharmony_ci		return -EINVAL;
14678c2ecf20Sopenharmony_ci	}
14688c2ecf20Sopenharmony_ci
14698c2ecf20Sopenharmony_ci	if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) &&
14708c2ecf20Sopenharmony_ci	    (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) ||
14718c2ecf20Sopenharmony_ci	     test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))) {
14728c2ecf20Sopenharmony_ci		rs->ti->error = "sync/nosync and rebuild are mutually exclusive";
14738c2ecf20Sopenharmony_ci		return -EINVAL;
14748c2ecf20Sopenharmony_ci	}
14758c2ecf20Sopenharmony_ci
14768c2ecf20Sopenharmony_ci	if (write_mostly >= rs->md.raid_disks) {
14778c2ecf20Sopenharmony_ci		rs->ti->error = "Can't set all raid1 devices to write_mostly";
14788c2ecf20Sopenharmony_ci		return -EINVAL;
14798c2ecf20Sopenharmony_ci	}
14808c2ecf20Sopenharmony_ci
14818c2ecf20Sopenharmony_ci	if (rs->md.sync_speed_max &&
14828c2ecf20Sopenharmony_ci	    rs->md.sync_speed_min > rs->md.sync_speed_max) {
14838c2ecf20Sopenharmony_ci		rs->ti->error = "Bogus recovery rates";
14848c2ecf20Sopenharmony_ci		return -EINVAL;
14858c2ecf20Sopenharmony_ci	}
14868c2ecf20Sopenharmony_ci
14878c2ecf20Sopenharmony_ci	if (validate_region_size(rs, region_size))
14888c2ecf20Sopenharmony_ci		return -EINVAL;
14898c2ecf20Sopenharmony_ci
14908c2ecf20Sopenharmony_ci	if (rs->md.chunk_sectors)
14918c2ecf20Sopenharmony_ci		max_io_len = rs->md.chunk_sectors;
14928c2ecf20Sopenharmony_ci	else
14938c2ecf20Sopenharmony_ci		max_io_len = region_size;
14948c2ecf20Sopenharmony_ci
14958c2ecf20Sopenharmony_ci	if (dm_set_target_max_io_len(rs->ti, max_io_len))
14968c2ecf20Sopenharmony_ci		return -EINVAL;
14978c2ecf20Sopenharmony_ci
14988c2ecf20Sopenharmony_ci	if (rt_is_raid10(rt)) {
14998c2ecf20Sopenharmony_ci		if (raid10_copies > rs->md.raid_disks) {
15008c2ecf20Sopenharmony_ci			rs->ti->error = "Not enough devices to satisfy specification";
15018c2ecf20Sopenharmony_ci			return -EINVAL;
15028c2ecf20Sopenharmony_ci		}
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_ci		rs->md.new_layout = raid10_format_to_md_layout(rs, raid10_format, raid10_copies);
15058c2ecf20Sopenharmony_ci		if (rs->md.new_layout < 0) {
15068c2ecf20Sopenharmony_ci			rs->ti->error = "Error getting raid10 format";
15078c2ecf20Sopenharmony_ci			return rs->md.new_layout;
15088c2ecf20Sopenharmony_ci		}
15098c2ecf20Sopenharmony_ci
15108c2ecf20Sopenharmony_ci		rt = get_raid_type_by_ll(10, rs->md.new_layout);
15118c2ecf20Sopenharmony_ci		if (!rt) {
15128c2ecf20Sopenharmony_ci			rs->ti->error = "Failed to recognize new raid10 layout";
15138c2ecf20Sopenharmony_ci			return -EINVAL;
15148c2ecf20Sopenharmony_ci		}
15158c2ecf20Sopenharmony_ci
15168c2ecf20Sopenharmony_ci		if ((rt->algorithm == ALGORITHM_RAID10_DEFAULT ||
15178c2ecf20Sopenharmony_ci		     rt->algorithm == ALGORITHM_RAID10_NEAR) &&
15188c2ecf20Sopenharmony_ci		    test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags)) {
15198c2ecf20Sopenharmony_ci			rs->ti->error = "RAID10 format 'near' and 'raid10_use_near_sets' are incompatible";
15208c2ecf20Sopenharmony_ci			return -EINVAL;
15218c2ecf20Sopenharmony_ci		}
15228c2ecf20Sopenharmony_ci	}
15238c2ecf20Sopenharmony_ci
15248c2ecf20Sopenharmony_ci	rs->raid10_copies = raid10_copies;
15258c2ecf20Sopenharmony_ci
15268c2ecf20Sopenharmony_ci	/* Assume there are no metadata devices until the drives are parsed */
15278c2ecf20Sopenharmony_ci	rs->md.persistent = 0;
15288c2ecf20Sopenharmony_ci	rs->md.external = 1;
15298c2ecf20Sopenharmony_ci
15308c2ecf20Sopenharmony_ci	/* Check, if any invalid ctr arguments have been passed in for the raid level */
15318c2ecf20Sopenharmony_ci	return rs_check_for_valid_flags(rs);
15328c2ecf20Sopenharmony_ci}
15338c2ecf20Sopenharmony_ci
15348c2ecf20Sopenharmony_ci/* Set raid4/5/6 cache size */
15358c2ecf20Sopenharmony_cistatic int rs_set_raid456_stripe_cache(struct raid_set *rs)
15368c2ecf20Sopenharmony_ci{
15378c2ecf20Sopenharmony_ci	int r;
15388c2ecf20Sopenharmony_ci	struct r5conf *conf;
15398c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
15408c2ecf20Sopenharmony_ci	uint32_t min_stripes = max(mddev->chunk_sectors, mddev->new_chunk_sectors) / 2;
15418c2ecf20Sopenharmony_ci	uint32_t nr_stripes = rs->stripe_cache_entries;
15428c2ecf20Sopenharmony_ci
15438c2ecf20Sopenharmony_ci	if (!rt_is_raid456(rs->raid_type)) {
15448c2ecf20Sopenharmony_ci		rs->ti->error = "Inappropriate raid level; cannot change stripe_cache size";
15458c2ecf20Sopenharmony_ci		return -EINVAL;
15468c2ecf20Sopenharmony_ci	}
15478c2ecf20Sopenharmony_ci
15488c2ecf20Sopenharmony_ci	if (nr_stripes < min_stripes) {
15498c2ecf20Sopenharmony_ci		DMINFO("Adjusting requested %u stripe cache entries to %u to suit stripe size",
15508c2ecf20Sopenharmony_ci		       nr_stripes, min_stripes);
15518c2ecf20Sopenharmony_ci		nr_stripes = min_stripes;
15528c2ecf20Sopenharmony_ci	}
15538c2ecf20Sopenharmony_ci
15548c2ecf20Sopenharmony_ci	conf = mddev->private;
15558c2ecf20Sopenharmony_ci	if (!conf) {
15568c2ecf20Sopenharmony_ci		rs->ti->error = "Cannot change stripe_cache size on inactive RAID set";
15578c2ecf20Sopenharmony_ci		return -EINVAL;
15588c2ecf20Sopenharmony_ci	}
15598c2ecf20Sopenharmony_ci
15608c2ecf20Sopenharmony_ci	/* Try setting number of stripes in raid456 stripe cache */
15618c2ecf20Sopenharmony_ci	if (conf->min_nr_stripes != nr_stripes) {
15628c2ecf20Sopenharmony_ci		r = raid5_set_cache_size(mddev, nr_stripes);
15638c2ecf20Sopenharmony_ci		if (r) {
15648c2ecf20Sopenharmony_ci			rs->ti->error = "Failed to set raid4/5/6 stripe cache size";
15658c2ecf20Sopenharmony_ci			return r;
15668c2ecf20Sopenharmony_ci		}
15678c2ecf20Sopenharmony_ci
15688c2ecf20Sopenharmony_ci		DMINFO("%u stripe cache entries", nr_stripes);
15698c2ecf20Sopenharmony_ci	}
15708c2ecf20Sopenharmony_ci
15718c2ecf20Sopenharmony_ci	return 0;
15728c2ecf20Sopenharmony_ci}
15738c2ecf20Sopenharmony_ci
15748c2ecf20Sopenharmony_ci/* Return # of data stripes as kept in mddev as of @rs (i.e. as of superblock) */
15758c2ecf20Sopenharmony_cistatic unsigned int mddev_data_stripes(struct raid_set *rs)
15768c2ecf20Sopenharmony_ci{
15778c2ecf20Sopenharmony_ci	return rs->md.raid_disks - rs->raid_type->parity_devs;
15788c2ecf20Sopenharmony_ci}
15798c2ecf20Sopenharmony_ci
15808c2ecf20Sopenharmony_ci/* Return # of data stripes of @rs (i.e. as of ctr) */
15818c2ecf20Sopenharmony_cistatic unsigned int rs_data_stripes(struct raid_set *rs)
15828c2ecf20Sopenharmony_ci{
15838c2ecf20Sopenharmony_ci	return rs->raid_disks - rs->raid_type->parity_devs;
15848c2ecf20Sopenharmony_ci}
15858c2ecf20Sopenharmony_ci
15868c2ecf20Sopenharmony_ci/*
15878c2ecf20Sopenharmony_ci * Retrieve rdev->sectors from any valid raid device of @rs
15888c2ecf20Sopenharmony_ci * to allow userpace to pass in arbitray "- -" device tupples.
15898c2ecf20Sopenharmony_ci */
15908c2ecf20Sopenharmony_cistatic sector_t __rdev_sectors(struct raid_set *rs)
15918c2ecf20Sopenharmony_ci{
15928c2ecf20Sopenharmony_ci	int i;
15938c2ecf20Sopenharmony_ci
15948c2ecf20Sopenharmony_ci	for (i = 0; i < rs->raid_disks; i++) {
15958c2ecf20Sopenharmony_ci		struct md_rdev *rdev = &rs->dev[i].rdev;
15968c2ecf20Sopenharmony_ci
15978c2ecf20Sopenharmony_ci		if (!test_bit(Journal, &rdev->flags) &&
15988c2ecf20Sopenharmony_ci		    rdev->bdev && rdev->sectors)
15998c2ecf20Sopenharmony_ci			return rdev->sectors;
16008c2ecf20Sopenharmony_ci	}
16018c2ecf20Sopenharmony_ci
16028c2ecf20Sopenharmony_ci	return 0;
16038c2ecf20Sopenharmony_ci}
16048c2ecf20Sopenharmony_ci
16058c2ecf20Sopenharmony_ci/* Check that calculated dev_sectors fits all component devices. */
16068c2ecf20Sopenharmony_cistatic int _check_data_dev_sectors(struct raid_set *rs)
16078c2ecf20Sopenharmony_ci{
16088c2ecf20Sopenharmony_ci	sector_t ds = ~0;
16098c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
16108c2ecf20Sopenharmony_ci
16118c2ecf20Sopenharmony_ci	rdev_for_each(rdev, &rs->md)
16128c2ecf20Sopenharmony_ci		if (!test_bit(Journal, &rdev->flags) && rdev->bdev) {
16138c2ecf20Sopenharmony_ci			ds = min(ds, to_sector(i_size_read(rdev->bdev->bd_inode)));
16148c2ecf20Sopenharmony_ci			if (ds < rs->md.dev_sectors) {
16158c2ecf20Sopenharmony_ci				rs->ti->error = "Component device(s) too small";
16168c2ecf20Sopenharmony_ci				return -EINVAL;
16178c2ecf20Sopenharmony_ci			}
16188c2ecf20Sopenharmony_ci		}
16198c2ecf20Sopenharmony_ci
16208c2ecf20Sopenharmony_ci	return 0;
16218c2ecf20Sopenharmony_ci}
16228c2ecf20Sopenharmony_ci
16238c2ecf20Sopenharmony_ci/* Calculate the sectors per device and per array used for @rs */
16248c2ecf20Sopenharmony_cistatic int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev)
16258c2ecf20Sopenharmony_ci{
16268c2ecf20Sopenharmony_ci	int delta_disks;
16278c2ecf20Sopenharmony_ci	unsigned int data_stripes;
16288c2ecf20Sopenharmony_ci	sector_t array_sectors = sectors, dev_sectors = sectors;
16298c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
16308c2ecf20Sopenharmony_ci
16318c2ecf20Sopenharmony_ci	if (use_mddev) {
16328c2ecf20Sopenharmony_ci		delta_disks = mddev->delta_disks;
16338c2ecf20Sopenharmony_ci		data_stripes = mddev_data_stripes(rs);
16348c2ecf20Sopenharmony_ci	} else {
16358c2ecf20Sopenharmony_ci		delta_disks = rs->delta_disks;
16368c2ecf20Sopenharmony_ci		data_stripes = rs_data_stripes(rs);
16378c2ecf20Sopenharmony_ci	}
16388c2ecf20Sopenharmony_ci
16398c2ecf20Sopenharmony_ci	/* Special raid1 case w/o delta_disks support (yet) */
16408c2ecf20Sopenharmony_ci	if (rt_is_raid1(rs->raid_type))
16418c2ecf20Sopenharmony_ci		;
16428c2ecf20Sopenharmony_ci	else if (rt_is_raid10(rs->raid_type)) {
16438c2ecf20Sopenharmony_ci		if (rs->raid10_copies < 2 ||
16448c2ecf20Sopenharmony_ci		    delta_disks < 0) {
16458c2ecf20Sopenharmony_ci			rs->ti->error = "Bogus raid10 data copies or delta disks";
16468c2ecf20Sopenharmony_ci			return -EINVAL;
16478c2ecf20Sopenharmony_ci		}
16488c2ecf20Sopenharmony_ci
16498c2ecf20Sopenharmony_ci		dev_sectors *= rs->raid10_copies;
16508c2ecf20Sopenharmony_ci		if (sector_div(dev_sectors, data_stripes))
16518c2ecf20Sopenharmony_ci			goto bad;
16528c2ecf20Sopenharmony_ci
16538c2ecf20Sopenharmony_ci		array_sectors = (data_stripes + delta_disks) * dev_sectors;
16548c2ecf20Sopenharmony_ci		if (sector_div(array_sectors, rs->raid10_copies))
16558c2ecf20Sopenharmony_ci			goto bad;
16568c2ecf20Sopenharmony_ci
16578c2ecf20Sopenharmony_ci	} else if (sector_div(dev_sectors, data_stripes))
16588c2ecf20Sopenharmony_ci		goto bad;
16598c2ecf20Sopenharmony_ci
16608c2ecf20Sopenharmony_ci	else
16618c2ecf20Sopenharmony_ci		/* Striped layouts */
16628c2ecf20Sopenharmony_ci		array_sectors = (data_stripes + delta_disks) * dev_sectors;
16638c2ecf20Sopenharmony_ci
16648c2ecf20Sopenharmony_ci	mddev->array_sectors = array_sectors;
16658c2ecf20Sopenharmony_ci	mddev->dev_sectors = dev_sectors;
16668c2ecf20Sopenharmony_ci	rs_set_rdev_sectors(rs);
16678c2ecf20Sopenharmony_ci
16688c2ecf20Sopenharmony_ci	return _check_data_dev_sectors(rs);
16698c2ecf20Sopenharmony_cibad:
16708c2ecf20Sopenharmony_ci	rs->ti->error = "Target length not divisible by number of data devices";
16718c2ecf20Sopenharmony_ci	return -EINVAL;
16728c2ecf20Sopenharmony_ci}
16738c2ecf20Sopenharmony_ci
16748c2ecf20Sopenharmony_ci/* Setup recovery on @rs */
16758c2ecf20Sopenharmony_cistatic void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
16768c2ecf20Sopenharmony_ci{
16778c2ecf20Sopenharmony_ci	/* raid0 does not recover */
16788c2ecf20Sopenharmony_ci	if (rs_is_raid0(rs))
16798c2ecf20Sopenharmony_ci		rs->md.recovery_cp = MaxSector;
16808c2ecf20Sopenharmony_ci	/*
16818c2ecf20Sopenharmony_ci	 * A raid6 set has to be recovered either
16828c2ecf20Sopenharmony_ci	 * completely or for the grown part to
16838c2ecf20Sopenharmony_ci	 * ensure proper parity and Q-Syndrome
16848c2ecf20Sopenharmony_ci	 */
16858c2ecf20Sopenharmony_ci	else if (rs_is_raid6(rs))
16868c2ecf20Sopenharmony_ci		rs->md.recovery_cp = dev_sectors;
16878c2ecf20Sopenharmony_ci	/*
16888c2ecf20Sopenharmony_ci	 * Other raid set types may skip recovery
16898c2ecf20Sopenharmony_ci	 * depending on the 'nosync' flag.
16908c2ecf20Sopenharmony_ci	 */
16918c2ecf20Sopenharmony_ci	else
16928c2ecf20Sopenharmony_ci		rs->md.recovery_cp = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)
16938c2ecf20Sopenharmony_ci				     ? MaxSector : dev_sectors;
16948c2ecf20Sopenharmony_ci}
16958c2ecf20Sopenharmony_ci
16968c2ecf20Sopenharmony_cistatic void do_table_event(struct work_struct *ws)
16978c2ecf20Sopenharmony_ci{
16988c2ecf20Sopenharmony_ci	struct raid_set *rs = container_of(ws, struct raid_set, md.event_work);
16998c2ecf20Sopenharmony_ci
17008c2ecf20Sopenharmony_ci	smp_rmb(); /* Make sure we access most actual mddev properties */
17018c2ecf20Sopenharmony_ci	if (!rs_is_reshaping(rs)) {
17028c2ecf20Sopenharmony_ci		if (rs_is_raid10(rs))
17038c2ecf20Sopenharmony_ci			rs_set_rdev_sectors(rs);
17048c2ecf20Sopenharmony_ci		rs_set_capacity(rs);
17058c2ecf20Sopenharmony_ci	}
17068c2ecf20Sopenharmony_ci	dm_table_event(rs->ti->table);
17078c2ecf20Sopenharmony_ci}
17088c2ecf20Sopenharmony_ci
17098c2ecf20Sopenharmony_ci/*
17108c2ecf20Sopenharmony_ci * Make sure a valid takover (level switch) is being requested on @rs
17118c2ecf20Sopenharmony_ci *
17128c2ecf20Sopenharmony_ci * Conversions of raid sets from one MD personality to another
17138c2ecf20Sopenharmony_ci * have to conform to restrictions which are enforced here.
17148c2ecf20Sopenharmony_ci */
17158c2ecf20Sopenharmony_cistatic int rs_check_takeover(struct raid_set *rs)
17168c2ecf20Sopenharmony_ci{
17178c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
17188c2ecf20Sopenharmony_ci	unsigned int near_copies;
17198c2ecf20Sopenharmony_ci
17208c2ecf20Sopenharmony_ci	if (rs->md.degraded) {
17218c2ecf20Sopenharmony_ci		rs->ti->error = "Can't takeover degraded raid set";
17228c2ecf20Sopenharmony_ci		return -EPERM;
17238c2ecf20Sopenharmony_ci	}
17248c2ecf20Sopenharmony_ci
17258c2ecf20Sopenharmony_ci	if (rs_is_reshaping(rs)) {
17268c2ecf20Sopenharmony_ci		rs->ti->error = "Can't takeover reshaping raid set";
17278c2ecf20Sopenharmony_ci		return -EPERM;
17288c2ecf20Sopenharmony_ci	}
17298c2ecf20Sopenharmony_ci
17308c2ecf20Sopenharmony_ci	switch (mddev->level) {
17318c2ecf20Sopenharmony_ci	case 0:
17328c2ecf20Sopenharmony_ci		/* raid0 -> raid1/5 with one disk */
17338c2ecf20Sopenharmony_ci		if ((mddev->new_level == 1 || mddev->new_level == 5) &&
17348c2ecf20Sopenharmony_ci		    mddev->raid_disks == 1)
17358c2ecf20Sopenharmony_ci			return 0;
17368c2ecf20Sopenharmony_ci
17378c2ecf20Sopenharmony_ci		/* raid0 -> raid10 */
17388c2ecf20Sopenharmony_ci		if (mddev->new_level == 10 &&
17398c2ecf20Sopenharmony_ci		    !(rs->raid_disks % mddev->raid_disks))
17408c2ecf20Sopenharmony_ci			return 0;
17418c2ecf20Sopenharmony_ci
17428c2ecf20Sopenharmony_ci		/* raid0 with multiple disks -> raid4/5/6 */
17438c2ecf20Sopenharmony_ci		if (__within_range(mddev->new_level, 4, 6) &&
17448c2ecf20Sopenharmony_ci		    mddev->new_layout == ALGORITHM_PARITY_N &&
17458c2ecf20Sopenharmony_ci		    mddev->raid_disks > 1)
17468c2ecf20Sopenharmony_ci			return 0;
17478c2ecf20Sopenharmony_ci
17488c2ecf20Sopenharmony_ci		break;
17498c2ecf20Sopenharmony_ci
17508c2ecf20Sopenharmony_ci	case 10:
17518c2ecf20Sopenharmony_ci		/* Can't takeover raid10_offset! */
17528c2ecf20Sopenharmony_ci		if (__is_raid10_offset(mddev->layout))
17538c2ecf20Sopenharmony_ci			break;
17548c2ecf20Sopenharmony_ci
17558c2ecf20Sopenharmony_ci		near_copies = __raid10_near_copies(mddev->layout);
17568c2ecf20Sopenharmony_ci
17578c2ecf20Sopenharmony_ci		/* raid10* -> raid0 */
17588c2ecf20Sopenharmony_ci		if (mddev->new_level == 0) {
17598c2ecf20Sopenharmony_ci			/* Can takeover raid10_near with raid disks divisable by data copies! */
17608c2ecf20Sopenharmony_ci			if (near_copies > 1 &&
17618c2ecf20Sopenharmony_ci			    !(mddev->raid_disks % near_copies)) {
17628c2ecf20Sopenharmony_ci				mddev->raid_disks /= near_copies;
17638c2ecf20Sopenharmony_ci				mddev->delta_disks = mddev->raid_disks;
17648c2ecf20Sopenharmony_ci				return 0;
17658c2ecf20Sopenharmony_ci			}
17668c2ecf20Sopenharmony_ci
17678c2ecf20Sopenharmony_ci			/* Can takeover raid10_far */
17688c2ecf20Sopenharmony_ci			if (near_copies == 1 &&
17698c2ecf20Sopenharmony_ci			    __raid10_far_copies(mddev->layout) > 1)
17708c2ecf20Sopenharmony_ci				return 0;
17718c2ecf20Sopenharmony_ci
17728c2ecf20Sopenharmony_ci			break;
17738c2ecf20Sopenharmony_ci		}
17748c2ecf20Sopenharmony_ci
17758c2ecf20Sopenharmony_ci		/* raid10_{near,far} -> raid1 */
17768c2ecf20Sopenharmony_ci		if (mddev->new_level == 1 &&
17778c2ecf20Sopenharmony_ci		    max(near_copies, __raid10_far_copies(mddev->layout)) == mddev->raid_disks)
17788c2ecf20Sopenharmony_ci			return 0;
17798c2ecf20Sopenharmony_ci
17808c2ecf20Sopenharmony_ci		/* raid10_{near,far} with 2 disks -> raid4/5 */
17818c2ecf20Sopenharmony_ci		if (__within_range(mddev->new_level, 4, 5) &&
17828c2ecf20Sopenharmony_ci		    mddev->raid_disks == 2)
17838c2ecf20Sopenharmony_ci			return 0;
17848c2ecf20Sopenharmony_ci		break;
17858c2ecf20Sopenharmony_ci
17868c2ecf20Sopenharmony_ci	case 1:
17878c2ecf20Sopenharmony_ci		/* raid1 with 2 disks -> raid4/5 */
17888c2ecf20Sopenharmony_ci		if (__within_range(mddev->new_level, 4, 5) &&
17898c2ecf20Sopenharmony_ci		    mddev->raid_disks == 2) {
17908c2ecf20Sopenharmony_ci			mddev->degraded = 1;
17918c2ecf20Sopenharmony_ci			return 0;
17928c2ecf20Sopenharmony_ci		}
17938c2ecf20Sopenharmony_ci
17948c2ecf20Sopenharmony_ci		/* raid1 -> raid0 */
17958c2ecf20Sopenharmony_ci		if (mddev->new_level == 0 &&
17968c2ecf20Sopenharmony_ci		    mddev->raid_disks == 1)
17978c2ecf20Sopenharmony_ci			return 0;
17988c2ecf20Sopenharmony_ci
17998c2ecf20Sopenharmony_ci		/* raid1 -> raid10 */
18008c2ecf20Sopenharmony_ci		if (mddev->new_level == 10)
18018c2ecf20Sopenharmony_ci			return 0;
18028c2ecf20Sopenharmony_ci		break;
18038c2ecf20Sopenharmony_ci
18048c2ecf20Sopenharmony_ci	case 4:
18058c2ecf20Sopenharmony_ci		/* raid4 -> raid0 */
18068c2ecf20Sopenharmony_ci		if (mddev->new_level == 0)
18078c2ecf20Sopenharmony_ci			return 0;
18088c2ecf20Sopenharmony_ci
18098c2ecf20Sopenharmony_ci		/* raid4 -> raid1/5 with 2 disks */
18108c2ecf20Sopenharmony_ci		if ((mddev->new_level == 1 || mddev->new_level == 5) &&
18118c2ecf20Sopenharmony_ci		    mddev->raid_disks == 2)
18128c2ecf20Sopenharmony_ci			return 0;
18138c2ecf20Sopenharmony_ci
18148c2ecf20Sopenharmony_ci		/* raid4 -> raid5/6 with parity N */
18158c2ecf20Sopenharmony_ci		if (__within_range(mddev->new_level, 5, 6) &&
18168c2ecf20Sopenharmony_ci		    mddev->layout == ALGORITHM_PARITY_N)
18178c2ecf20Sopenharmony_ci			return 0;
18188c2ecf20Sopenharmony_ci		break;
18198c2ecf20Sopenharmony_ci
18208c2ecf20Sopenharmony_ci	case 5:
18218c2ecf20Sopenharmony_ci		/* raid5 with parity N -> raid0 */
18228c2ecf20Sopenharmony_ci		if (mddev->new_level == 0 &&
18238c2ecf20Sopenharmony_ci		    mddev->layout == ALGORITHM_PARITY_N)
18248c2ecf20Sopenharmony_ci			return 0;
18258c2ecf20Sopenharmony_ci
18268c2ecf20Sopenharmony_ci		/* raid5 with parity N -> raid4 */
18278c2ecf20Sopenharmony_ci		if (mddev->new_level == 4 &&
18288c2ecf20Sopenharmony_ci		    mddev->layout == ALGORITHM_PARITY_N)
18298c2ecf20Sopenharmony_ci			return 0;
18308c2ecf20Sopenharmony_ci
18318c2ecf20Sopenharmony_ci		/* raid5 with 2 disks -> raid1/4/10 */
18328c2ecf20Sopenharmony_ci		if ((mddev->new_level == 1 || mddev->new_level == 4 || mddev->new_level == 10) &&
18338c2ecf20Sopenharmony_ci		    mddev->raid_disks == 2)
18348c2ecf20Sopenharmony_ci			return 0;
18358c2ecf20Sopenharmony_ci
18368c2ecf20Sopenharmony_ci		/* raid5_* ->  raid6_*_6 with Q-Syndrome N (e.g. raid5_ra -> raid6_ra_6 */
18378c2ecf20Sopenharmony_ci		if (mddev->new_level == 6 &&
18388c2ecf20Sopenharmony_ci		    ((mddev->layout == ALGORITHM_PARITY_N && mddev->new_layout == ALGORITHM_PARITY_N) ||
18398c2ecf20Sopenharmony_ci		      __within_range(mddev->new_layout, ALGORITHM_LEFT_ASYMMETRIC_6, ALGORITHM_RIGHT_SYMMETRIC_6)))
18408c2ecf20Sopenharmony_ci			return 0;
18418c2ecf20Sopenharmony_ci		break;
18428c2ecf20Sopenharmony_ci
18438c2ecf20Sopenharmony_ci	case 6:
18448c2ecf20Sopenharmony_ci		/* raid6 with parity N -> raid0 */
18458c2ecf20Sopenharmony_ci		if (mddev->new_level == 0 &&
18468c2ecf20Sopenharmony_ci		    mddev->layout == ALGORITHM_PARITY_N)
18478c2ecf20Sopenharmony_ci			return 0;
18488c2ecf20Sopenharmony_ci
18498c2ecf20Sopenharmony_ci		/* raid6 with parity N -> raid4 */
18508c2ecf20Sopenharmony_ci		if (mddev->new_level == 4 &&
18518c2ecf20Sopenharmony_ci		    mddev->layout == ALGORITHM_PARITY_N)
18528c2ecf20Sopenharmony_ci			return 0;
18538c2ecf20Sopenharmony_ci
18548c2ecf20Sopenharmony_ci		/* raid6_*_n with Q-Syndrome N -> raid5_* */
18558c2ecf20Sopenharmony_ci		if (mddev->new_level == 5 &&
18568c2ecf20Sopenharmony_ci		    ((mddev->layout == ALGORITHM_PARITY_N && mddev->new_layout == ALGORITHM_PARITY_N) ||
18578c2ecf20Sopenharmony_ci		     __within_range(mddev->new_layout, ALGORITHM_LEFT_ASYMMETRIC, ALGORITHM_RIGHT_SYMMETRIC)))
18588c2ecf20Sopenharmony_ci			return 0;
18598c2ecf20Sopenharmony_ci
18608c2ecf20Sopenharmony_ci	default:
18618c2ecf20Sopenharmony_ci		break;
18628c2ecf20Sopenharmony_ci	}
18638c2ecf20Sopenharmony_ci
18648c2ecf20Sopenharmony_ci	rs->ti->error = "takeover not possible";
18658c2ecf20Sopenharmony_ci	return -EINVAL;
18668c2ecf20Sopenharmony_ci}
18678c2ecf20Sopenharmony_ci
18688c2ecf20Sopenharmony_ci/* True if @rs requested to be taken over */
18698c2ecf20Sopenharmony_cistatic bool rs_takeover_requested(struct raid_set *rs)
18708c2ecf20Sopenharmony_ci{
18718c2ecf20Sopenharmony_ci	return rs->md.new_level != rs->md.level;
18728c2ecf20Sopenharmony_ci}
18738c2ecf20Sopenharmony_ci
18748c2ecf20Sopenharmony_ci/* True if layout is set to reshape. */
18758c2ecf20Sopenharmony_cistatic bool rs_is_layout_change(struct raid_set *rs, bool use_mddev)
18768c2ecf20Sopenharmony_ci{
18778c2ecf20Sopenharmony_ci	return (use_mddev ? rs->md.delta_disks : rs->delta_disks) ||
18788c2ecf20Sopenharmony_ci	       rs->md.new_layout != rs->md.layout ||
18798c2ecf20Sopenharmony_ci	       rs->md.new_chunk_sectors != rs->md.chunk_sectors;
18808c2ecf20Sopenharmony_ci}
18818c2ecf20Sopenharmony_ci
18828c2ecf20Sopenharmony_ci/* True if @rs is requested to reshape by ctr */
18838c2ecf20Sopenharmony_cistatic bool rs_reshape_requested(struct raid_set *rs)
18848c2ecf20Sopenharmony_ci{
18858c2ecf20Sopenharmony_ci	bool change;
18868c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
18878c2ecf20Sopenharmony_ci
18888c2ecf20Sopenharmony_ci	if (rs_takeover_requested(rs))
18898c2ecf20Sopenharmony_ci		return false;
18908c2ecf20Sopenharmony_ci
18918c2ecf20Sopenharmony_ci	if (rs_is_raid0(rs))
18928c2ecf20Sopenharmony_ci		return false;
18938c2ecf20Sopenharmony_ci
18948c2ecf20Sopenharmony_ci	change = rs_is_layout_change(rs, false);
18958c2ecf20Sopenharmony_ci
18968c2ecf20Sopenharmony_ci	/* Historical case to support raid1 reshape without delta disks */
18978c2ecf20Sopenharmony_ci	if (rs_is_raid1(rs)) {
18988c2ecf20Sopenharmony_ci		if (rs->delta_disks)
18998c2ecf20Sopenharmony_ci			return !!rs->delta_disks;
19008c2ecf20Sopenharmony_ci
19018c2ecf20Sopenharmony_ci		return !change &&
19028c2ecf20Sopenharmony_ci		       mddev->raid_disks != rs->raid_disks;
19038c2ecf20Sopenharmony_ci	}
19048c2ecf20Sopenharmony_ci
19058c2ecf20Sopenharmony_ci	if (rs_is_raid10(rs))
19068c2ecf20Sopenharmony_ci		return change &&
19078c2ecf20Sopenharmony_ci		       !__is_raid10_far(mddev->new_layout) &&
19088c2ecf20Sopenharmony_ci		       rs->delta_disks >= 0;
19098c2ecf20Sopenharmony_ci
19108c2ecf20Sopenharmony_ci	return change;
19118c2ecf20Sopenharmony_ci}
19128c2ecf20Sopenharmony_ci
19138c2ecf20Sopenharmony_ci/*  Features */
19148c2ecf20Sopenharmony_ci#define	FEATURE_FLAG_SUPPORTS_V190	0x1 /* Supports extended superblock */
19158c2ecf20Sopenharmony_ci
19168c2ecf20Sopenharmony_ci/* State flags for sb->flags */
19178c2ecf20Sopenharmony_ci#define	SB_FLAG_RESHAPE_ACTIVE		0x1
19188c2ecf20Sopenharmony_ci#define	SB_FLAG_RESHAPE_BACKWARDS	0x2
19198c2ecf20Sopenharmony_ci
19208c2ecf20Sopenharmony_ci/*
19218c2ecf20Sopenharmony_ci * This structure is never routinely used by userspace, unlike md superblocks.
19228c2ecf20Sopenharmony_ci * Devices with this superblock should only ever be accessed via device-mapper.
19238c2ecf20Sopenharmony_ci */
19248c2ecf20Sopenharmony_ci#define DM_RAID_MAGIC 0x64526D44
19258c2ecf20Sopenharmony_cistruct dm_raid_superblock {
19268c2ecf20Sopenharmony_ci	__le32 magic;		/* "DmRd" */
19278c2ecf20Sopenharmony_ci	__le32 compat_features;	/* Used to indicate compatible features (like 1.9.0 ondisk metadata extension) */
19288c2ecf20Sopenharmony_ci
19298c2ecf20Sopenharmony_ci	__le32 num_devices;	/* Number of devices in this raid set. (Max 64) */
19308c2ecf20Sopenharmony_ci	__le32 array_position;	/* The position of this drive in the raid set */
19318c2ecf20Sopenharmony_ci
19328c2ecf20Sopenharmony_ci	__le64 events;		/* Incremented by md when superblock updated */
19338c2ecf20Sopenharmony_ci	__le64 failed_devices;	/* Pre 1.9.0 part of bit field of devices to */
19348c2ecf20Sopenharmony_ci				/* indicate failures (see extension below) */
19358c2ecf20Sopenharmony_ci
19368c2ecf20Sopenharmony_ci	/*
19378c2ecf20Sopenharmony_ci	 * This offset tracks the progress of the repair or replacement of
19388c2ecf20Sopenharmony_ci	 * an individual drive.
19398c2ecf20Sopenharmony_ci	 */
19408c2ecf20Sopenharmony_ci	__le64 disk_recovery_offset;
19418c2ecf20Sopenharmony_ci
19428c2ecf20Sopenharmony_ci	/*
19438c2ecf20Sopenharmony_ci	 * This offset tracks the progress of the initial raid set
19448c2ecf20Sopenharmony_ci	 * synchronisation/parity calculation.
19458c2ecf20Sopenharmony_ci	 */
19468c2ecf20Sopenharmony_ci	__le64 array_resync_offset;
19478c2ecf20Sopenharmony_ci
19488c2ecf20Sopenharmony_ci	/*
19498c2ecf20Sopenharmony_ci	 * raid characteristics
19508c2ecf20Sopenharmony_ci	 */
19518c2ecf20Sopenharmony_ci	__le32 level;
19528c2ecf20Sopenharmony_ci	__le32 layout;
19538c2ecf20Sopenharmony_ci	__le32 stripe_sectors;
19548c2ecf20Sopenharmony_ci
19558c2ecf20Sopenharmony_ci	/********************************************************************
19568c2ecf20Sopenharmony_ci	 * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
19578c2ecf20Sopenharmony_ci	 *
19588c2ecf20Sopenharmony_ci	 * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
19598c2ecf20Sopenharmony_ci	 */
19608c2ecf20Sopenharmony_ci
19618c2ecf20Sopenharmony_ci	__le32 flags; /* Flags defining array states for reshaping */
19628c2ecf20Sopenharmony_ci
19638c2ecf20Sopenharmony_ci	/*
19648c2ecf20Sopenharmony_ci	 * This offset tracks the progress of a raid
19658c2ecf20Sopenharmony_ci	 * set reshape in order to be able to restart it
19668c2ecf20Sopenharmony_ci	 */
19678c2ecf20Sopenharmony_ci	__le64 reshape_position;
19688c2ecf20Sopenharmony_ci
19698c2ecf20Sopenharmony_ci	/*
19708c2ecf20Sopenharmony_ci	 * These define the properties of the array in case of an interrupted reshape
19718c2ecf20Sopenharmony_ci	 */
19728c2ecf20Sopenharmony_ci	__le32 new_level;
19738c2ecf20Sopenharmony_ci	__le32 new_layout;
19748c2ecf20Sopenharmony_ci	__le32 new_stripe_sectors;
19758c2ecf20Sopenharmony_ci	__le32 delta_disks;
19768c2ecf20Sopenharmony_ci
19778c2ecf20Sopenharmony_ci	__le64 array_sectors; /* Array size in sectors */
19788c2ecf20Sopenharmony_ci
19798c2ecf20Sopenharmony_ci	/*
19808c2ecf20Sopenharmony_ci	 * Sector offsets to data on devices (reshaping).
19818c2ecf20Sopenharmony_ci	 * Needed to support out of place reshaping, thus
19828c2ecf20Sopenharmony_ci	 * not writing over any stripes whilst converting
19838c2ecf20Sopenharmony_ci	 * them from old to new layout
19848c2ecf20Sopenharmony_ci	 */
19858c2ecf20Sopenharmony_ci	__le64 data_offset;
19868c2ecf20Sopenharmony_ci	__le64 new_data_offset;
19878c2ecf20Sopenharmony_ci
19888c2ecf20Sopenharmony_ci	__le64 sectors; /* Used device size in sectors */
19898c2ecf20Sopenharmony_ci
19908c2ecf20Sopenharmony_ci	/*
19918c2ecf20Sopenharmony_ci	 * Additonal Bit field of devices indicating failures to support
19928c2ecf20Sopenharmony_ci	 * up to 256 devices with the 1.9.0 on-disk metadata format
19938c2ecf20Sopenharmony_ci	 */
19948c2ecf20Sopenharmony_ci	__le64 extended_failed_devices[DISKS_ARRAY_ELEMS - 1];
19958c2ecf20Sopenharmony_ci
19968c2ecf20Sopenharmony_ci	__le32 incompat_features;	/* Used to indicate any incompatible features */
19978c2ecf20Sopenharmony_ci
19988c2ecf20Sopenharmony_ci	/* Always set rest up to logical block size to 0 when writing (see get_metadata_device() below). */
19998c2ecf20Sopenharmony_ci} __packed;
20008c2ecf20Sopenharmony_ci
20018c2ecf20Sopenharmony_ci/*
20028c2ecf20Sopenharmony_ci * Check for reshape constraints on raid set @rs:
20038c2ecf20Sopenharmony_ci *
20048c2ecf20Sopenharmony_ci * - reshape function non-existent
20058c2ecf20Sopenharmony_ci * - degraded set
20068c2ecf20Sopenharmony_ci * - ongoing recovery
20078c2ecf20Sopenharmony_ci * - ongoing reshape
20088c2ecf20Sopenharmony_ci *
20098c2ecf20Sopenharmony_ci * Returns 0 if none or -EPERM if given constraint
20108c2ecf20Sopenharmony_ci * and error message reference in @errmsg
20118c2ecf20Sopenharmony_ci */
20128c2ecf20Sopenharmony_cistatic int rs_check_reshape(struct raid_set *rs)
20138c2ecf20Sopenharmony_ci{
20148c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
20158c2ecf20Sopenharmony_ci
20168c2ecf20Sopenharmony_ci	if (!mddev->pers || !mddev->pers->check_reshape)
20178c2ecf20Sopenharmony_ci		rs->ti->error = "Reshape not supported";
20188c2ecf20Sopenharmony_ci	else if (mddev->degraded)
20198c2ecf20Sopenharmony_ci		rs->ti->error = "Can't reshape degraded raid set";
20208c2ecf20Sopenharmony_ci	else if (rs_is_recovering(rs))
20218c2ecf20Sopenharmony_ci		rs->ti->error = "Convert request on recovering raid set prohibited";
20228c2ecf20Sopenharmony_ci	else if (rs_is_reshaping(rs))
20238c2ecf20Sopenharmony_ci		rs->ti->error = "raid set already reshaping!";
20248c2ecf20Sopenharmony_ci	else if (!(rs_is_raid1(rs) || rs_is_raid10(rs) || rs_is_raid456(rs)))
20258c2ecf20Sopenharmony_ci		rs->ti->error = "Reshaping only supported for raid1/4/5/6/10";
20268c2ecf20Sopenharmony_ci	else
20278c2ecf20Sopenharmony_ci		return 0;
20288c2ecf20Sopenharmony_ci
20298c2ecf20Sopenharmony_ci	return -EPERM;
20308c2ecf20Sopenharmony_ci}
20318c2ecf20Sopenharmony_ci
20328c2ecf20Sopenharmony_cistatic int read_disk_sb(struct md_rdev *rdev, int size, bool force_reload)
20338c2ecf20Sopenharmony_ci{
20348c2ecf20Sopenharmony_ci	BUG_ON(!rdev->sb_page);
20358c2ecf20Sopenharmony_ci
20368c2ecf20Sopenharmony_ci	if (rdev->sb_loaded && !force_reload)
20378c2ecf20Sopenharmony_ci		return 0;
20388c2ecf20Sopenharmony_ci
20398c2ecf20Sopenharmony_ci	rdev->sb_loaded = 0;
20408c2ecf20Sopenharmony_ci
20418c2ecf20Sopenharmony_ci	if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true)) {
20428c2ecf20Sopenharmony_ci		DMERR("Failed to read superblock of device at position %d",
20438c2ecf20Sopenharmony_ci		      rdev->raid_disk);
20448c2ecf20Sopenharmony_ci		md_error(rdev->mddev, rdev);
20458c2ecf20Sopenharmony_ci		set_bit(Faulty, &rdev->flags);
20468c2ecf20Sopenharmony_ci		return -EIO;
20478c2ecf20Sopenharmony_ci	}
20488c2ecf20Sopenharmony_ci
20498c2ecf20Sopenharmony_ci	rdev->sb_loaded = 1;
20508c2ecf20Sopenharmony_ci
20518c2ecf20Sopenharmony_ci	return 0;
20528c2ecf20Sopenharmony_ci}
20538c2ecf20Sopenharmony_ci
20548c2ecf20Sopenharmony_cistatic void sb_retrieve_failed_devices(struct dm_raid_superblock *sb, uint64_t *failed_devices)
20558c2ecf20Sopenharmony_ci{
20568c2ecf20Sopenharmony_ci	failed_devices[0] = le64_to_cpu(sb->failed_devices);
20578c2ecf20Sopenharmony_ci	memset(failed_devices + 1, 0, sizeof(sb->extended_failed_devices));
20588c2ecf20Sopenharmony_ci
20598c2ecf20Sopenharmony_ci	if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) {
20608c2ecf20Sopenharmony_ci		int i = ARRAY_SIZE(sb->extended_failed_devices);
20618c2ecf20Sopenharmony_ci
20628c2ecf20Sopenharmony_ci		while (i--)
20638c2ecf20Sopenharmony_ci			failed_devices[i+1] = le64_to_cpu(sb->extended_failed_devices[i]);
20648c2ecf20Sopenharmony_ci	}
20658c2ecf20Sopenharmony_ci}
20668c2ecf20Sopenharmony_ci
20678c2ecf20Sopenharmony_cistatic void sb_update_failed_devices(struct dm_raid_superblock *sb, uint64_t *failed_devices)
20688c2ecf20Sopenharmony_ci{
20698c2ecf20Sopenharmony_ci	int i = ARRAY_SIZE(sb->extended_failed_devices);
20708c2ecf20Sopenharmony_ci
20718c2ecf20Sopenharmony_ci	sb->failed_devices = cpu_to_le64(failed_devices[0]);
20728c2ecf20Sopenharmony_ci	while (i--)
20738c2ecf20Sopenharmony_ci		sb->extended_failed_devices[i] = cpu_to_le64(failed_devices[i+1]);
20748c2ecf20Sopenharmony_ci}
20758c2ecf20Sopenharmony_ci
20768c2ecf20Sopenharmony_ci/*
20778c2ecf20Sopenharmony_ci * Synchronize the superblock members with the raid set properties
20788c2ecf20Sopenharmony_ci *
20798c2ecf20Sopenharmony_ci * All superblock data is little endian.
20808c2ecf20Sopenharmony_ci */
20818c2ecf20Sopenharmony_cistatic void super_sync(struct mddev *mddev, struct md_rdev *rdev)
20828c2ecf20Sopenharmony_ci{
20838c2ecf20Sopenharmony_ci	bool update_failed_devices = false;
20848c2ecf20Sopenharmony_ci	unsigned int i;
20858c2ecf20Sopenharmony_ci	uint64_t failed_devices[DISKS_ARRAY_ELEMS];
20868c2ecf20Sopenharmony_ci	struct dm_raid_superblock *sb;
20878c2ecf20Sopenharmony_ci	struct raid_set *rs = container_of(mddev, struct raid_set, md);
20888c2ecf20Sopenharmony_ci
20898c2ecf20Sopenharmony_ci	/* No metadata device, no superblock */
20908c2ecf20Sopenharmony_ci	if (!rdev->meta_bdev)
20918c2ecf20Sopenharmony_ci		return;
20928c2ecf20Sopenharmony_ci
20938c2ecf20Sopenharmony_ci	BUG_ON(!rdev->sb_page);
20948c2ecf20Sopenharmony_ci
20958c2ecf20Sopenharmony_ci	sb = page_address(rdev->sb_page);
20968c2ecf20Sopenharmony_ci
20978c2ecf20Sopenharmony_ci	sb_retrieve_failed_devices(sb, failed_devices);
20988c2ecf20Sopenharmony_ci
20998c2ecf20Sopenharmony_ci	for (i = 0; i < rs->raid_disks; i++)
21008c2ecf20Sopenharmony_ci		if (!rs->dev[i].data_dev || test_bit(Faulty, &rs->dev[i].rdev.flags)) {
21018c2ecf20Sopenharmony_ci			update_failed_devices = true;
21028c2ecf20Sopenharmony_ci			set_bit(i, (void *) failed_devices);
21038c2ecf20Sopenharmony_ci		}
21048c2ecf20Sopenharmony_ci
21058c2ecf20Sopenharmony_ci	if (update_failed_devices)
21068c2ecf20Sopenharmony_ci		sb_update_failed_devices(sb, failed_devices);
21078c2ecf20Sopenharmony_ci
21088c2ecf20Sopenharmony_ci	sb->magic = cpu_to_le32(DM_RAID_MAGIC);
21098c2ecf20Sopenharmony_ci	sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190);
21108c2ecf20Sopenharmony_ci
21118c2ecf20Sopenharmony_ci	sb->num_devices = cpu_to_le32(mddev->raid_disks);
21128c2ecf20Sopenharmony_ci	sb->array_position = cpu_to_le32(rdev->raid_disk);
21138c2ecf20Sopenharmony_ci
21148c2ecf20Sopenharmony_ci	sb->events = cpu_to_le64(mddev->events);
21158c2ecf20Sopenharmony_ci
21168c2ecf20Sopenharmony_ci	sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset);
21178c2ecf20Sopenharmony_ci	sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp);
21188c2ecf20Sopenharmony_ci
21198c2ecf20Sopenharmony_ci	sb->level = cpu_to_le32(mddev->level);
21208c2ecf20Sopenharmony_ci	sb->layout = cpu_to_le32(mddev->layout);
21218c2ecf20Sopenharmony_ci	sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
21228c2ecf20Sopenharmony_ci
21238c2ecf20Sopenharmony_ci	/********************************************************************
21248c2ecf20Sopenharmony_ci	 * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
21258c2ecf20Sopenharmony_ci	 *
21268c2ecf20Sopenharmony_ci	 * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
21278c2ecf20Sopenharmony_ci	 */
21288c2ecf20Sopenharmony_ci	sb->new_level = cpu_to_le32(mddev->new_level);
21298c2ecf20Sopenharmony_ci	sb->new_layout = cpu_to_le32(mddev->new_layout);
21308c2ecf20Sopenharmony_ci	sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors);
21318c2ecf20Sopenharmony_ci
21328c2ecf20Sopenharmony_ci	sb->delta_disks = cpu_to_le32(mddev->delta_disks);
21338c2ecf20Sopenharmony_ci
21348c2ecf20Sopenharmony_ci	smp_rmb(); /* Make sure we access most recent reshape position */
21358c2ecf20Sopenharmony_ci	sb->reshape_position = cpu_to_le64(mddev->reshape_position);
21368c2ecf20Sopenharmony_ci	if (le64_to_cpu(sb->reshape_position) != MaxSector) {
21378c2ecf20Sopenharmony_ci		/* Flag ongoing reshape */
21388c2ecf20Sopenharmony_ci		sb->flags |= cpu_to_le32(SB_FLAG_RESHAPE_ACTIVE);
21398c2ecf20Sopenharmony_ci
21408c2ecf20Sopenharmony_ci		if (mddev->delta_disks < 0 || mddev->reshape_backwards)
21418c2ecf20Sopenharmony_ci			sb->flags |= cpu_to_le32(SB_FLAG_RESHAPE_BACKWARDS);
21428c2ecf20Sopenharmony_ci	} else {
21438c2ecf20Sopenharmony_ci		/* Clear reshape flags */
21448c2ecf20Sopenharmony_ci		sb->flags &= ~(cpu_to_le32(SB_FLAG_RESHAPE_ACTIVE|SB_FLAG_RESHAPE_BACKWARDS));
21458c2ecf20Sopenharmony_ci	}
21468c2ecf20Sopenharmony_ci
21478c2ecf20Sopenharmony_ci	sb->array_sectors = cpu_to_le64(mddev->array_sectors);
21488c2ecf20Sopenharmony_ci	sb->data_offset = cpu_to_le64(rdev->data_offset);
21498c2ecf20Sopenharmony_ci	sb->new_data_offset = cpu_to_le64(rdev->new_data_offset);
21508c2ecf20Sopenharmony_ci	sb->sectors = cpu_to_le64(rdev->sectors);
21518c2ecf20Sopenharmony_ci	sb->incompat_features = cpu_to_le32(0);
21528c2ecf20Sopenharmony_ci
21538c2ecf20Sopenharmony_ci	/* Zero out the rest of the payload after the size of the superblock */
21548c2ecf20Sopenharmony_ci	memset(sb + 1, 0, rdev->sb_size - sizeof(*sb));
21558c2ecf20Sopenharmony_ci}
21568c2ecf20Sopenharmony_ci
21578c2ecf20Sopenharmony_ci/*
21588c2ecf20Sopenharmony_ci * super_load
21598c2ecf20Sopenharmony_ci *
21608c2ecf20Sopenharmony_ci * This function creates a superblock if one is not found on the device
21618c2ecf20Sopenharmony_ci * and will decide which superblock to use if there's a choice.
21628c2ecf20Sopenharmony_ci *
21638c2ecf20Sopenharmony_ci * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise
21648c2ecf20Sopenharmony_ci */
21658c2ecf20Sopenharmony_cistatic int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
21668c2ecf20Sopenharmony_ci{
21678c2ecf20Sopenharmony_ci	int r;
21688c2ecf20Sopenharmony_ci	struct dm_raid_superblock *sb;
21698c2ecf20Sopenharmony_ci	struct dm_raid_superblock *refsb;
21708c2ecf20Sopenharmony_ci	uint64_t events_sb, events_refsb;
21718c2ecf20Sopenharmony_ci
21728c2ecf20Sopenharmony_ci	r = read_disk_sb(rdev, rdev->sb_size, false);
21738c2ecf20Sopenharmony_ci	if (r)
21748c2ecf20Sopenharmony_ci		return r;
21758c2ecf20Sopenharmony_ci
21768c2ecf20Sopenharmony_ci	sb = page_address(rdev->sb_page);
21778c2ecf20Sopenharmony_ci
21788c2ecf20Sopenharmony_ci	/*
21798c2ecf20Sopenharmony_ci	 * Two cases that we want to write new superblocks and rebuild:
21808c2ecf20Sopenharmony_ci	 * 1) New device (no matching magic number)
21818c2ecf20Sopenharmony_ci	 * 2) Device specified for rebuild (!In_sync w/ offset == 0)
21828c2ecf20Sopenharmony_ci	 */
21838c2ecf20Sopenharmony_ci	if ((sb->magic != cpu_to_le32(DM_RAID_MAGIC)) ||
21848c2ecf20Sopenharmony_ci	    (!test_bit(In_sync, &rdev->flags) && !rdev->recovery_offset)) {
21858c2ecf20Sopenharmony_ci		super_sync(rdev->mddev, rdev);
21868c2ecf20Sopenharmony_ci
21878c2ecf20Sopenharmony_ci		set_bit(FirstUse, &rdev->flags);
21888c2ecf20Sopenharmony_ci		sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190);
21898c2ecf20Sopenharmony_ci
21908c2ecf20Sopenharmony_ci		/* Force writing of superblocks to disk */
21918c2ecf20Sopenharmony_ci		set_bit(MD_SB_CHANGE_DEVS, &rdev->mddev->sb_flags);
21928c2ecf20Sopenharmony_ci
21938c2ecf20Sopenharmony_ci		/* Any superblock is better than none, choose that if given */
21948c2ecf20Sopenharmony_ci		return refdev ? 0 : 1;
21958c2ecf20Sopenharmony_ci	}
21968c2ecf20Sopenharmony_ci
21978c2ecf20Sopenharmony_ci	if (!refdev)
21988c2ecf20Sopenharmony_ci		return 1;
21998c2ecf20Sopenharmony_ci
22008c2ecf20Sopenharmony_ci	events_sb = le64_to_cpu(sb->events);
22018c2ecf20Sopenharmony_ci
22028c2ecf20Sopenharmony_ci	refsb = page_address(refdev->sb_page);
22038c2ecf20Sopenharmony_ci	events_refsb = le64_to_cpu(refsb->events);
22048c2ecf20Sopenharmony_ci
22058c2ecf20Sopenharmony_ci	return (events_sb > events_refsb) ? 1 : 0;
22068c2ecf20Sopenharmony_ci}
22078c2ecf20Sopenharmony_ci
22088c2ecf20Sopenharmony_cistatic int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
22098c2ecf20Sopenharmony_ci{
22108c2ecf20Sopenharmony_ci	int role;
22118c2ecf20Sopenharmony_ci	unsigned int d;
22128c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
22138c2ecf20Sopenharmony_ci	uint64_t events_sb;
22148c2ecf20Sopenharmony_ci	uint64_t failed_devices[DISKS_ARRAY_ELEMS];
22158c2ecf20Sopenharmony_ci	struct dm_raid_superblock *sb;
22168c2ecf20Sopenharmony_ci	uint32_t new_devs = 0, rebuild_and_new = 0, rebuilds = 0;
22178c2ecf20Sopenharmony_ci	struct md_rdev *r;
22188c2ecf20Sopenharmony_ci	struct dm_raid_superblock *sb2;
22198c2ecf20Sopenharmony_ci
22208c2ecf20Sopenharmony_ci	sb = page_address(rdev->sb_page);
22218c2ecf20Sopenharmony_ci	events_sb = le64_to_cpu(sb->events);
22228c2ecf20Sopenharmony_ci
22238c2ecf20Sopenharmony_ci	/*
22248c2ecf20Sopenharmony_ci	 * Initialise to 1 if this is a new superblock.
22258c2ecf20Sopenharmony_ci	 */
22268c2ecf20Sopenharmony_ci	mddev->events = events_sb ? : 1;
22278c2ecf20Sopenharmony_ci
22288c2ecf20Sopenharmony_ci	mddev->reshape_position = MaxSector;
22298c2ecf20Sopenharmony_ci
22308c2ecf20Sopenharmony_ci	mddev->raid_disks = le32_to_cpu(sb->num_devices);
22318c2ecf20Sopenharmony_ci	mddev->level = le32_to_cpu(sb->level);
22328c2ecf20Sopenharmony_ci	mddev->layout = le32_to_cpu(sb->layout);
22338c2ecf20Sopenharmony_ci	mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors);
22348c2ecf20Sopenharmony_ci
22358c2ecf20Sopenharmony_ci	/*
22368c2ecf20Sopenharmony_ci	 * Reshaping is supported, e.g. reshape_position is valid
22378c2ecf20Sopenharmony_ci	 * in superblock and superblock content is authoritative.
22388c2ecf20Sopenharmony_ci	 */
22398c2ecf20Sopenharmony_ci	if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) {
22408c2ecf20Sopenharmony_ci		/* Superblock is authoritative wrt given raid set layout! */
22418c2ecf20Sopenharmony_ci		mddev->new_level = le32_to_cpu(sb->new_level);
22428c2ecf20Sopenharmony_ci		mddev->new_layout = le32_to_cpu(sb->new_layout);
22438c2ecf20Sopenharmony_ci		mddev->new_chunk_sectors = le32_to_cpu(sb->new_stripe_sectors);
22448c2ecf20Sopenharmony_ci		mddev->delta_disks = le32_to_cpu(sb->delta_disks);
22458c2ecf20Sopenharmony_ci		mddev->array_sectors = le64_to_cpu(sb->array_sectors);
22468c2ecf20Sopenharmony_ci
22478c2ecf20Sopenharmony_ci		/* raid was reshaping and got interrupted */
22488c2ecf20Sopenharmony_ci		if (le32_to_cpu(sb->flags) & SB_FLAG_RESHAPE_ACTIVE) {
22498c2ecf20Sopenharmony_ci			if (test_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags)) {
22508c2ecf20Sopenharmony_ci				DMERR("Reshape requested but raid set is still reshaping");
22518c2ecf20Sopenharmony_ci				return -EINVAL;
22528c2ecf20Sopenharmony_ci			}
22538c2ecf20Sopenharmony_ci
22548c2ecf20Sopenharmony_ci			if (mddev->delta_disks < 0 ||
22558c2ecf20Sopenharmony_ci			    (!mddev->delta_disks && (le32_to_cpu(sb->flags) & SB_FLAG_RESHAPE_BACKWARDS)))
22568c2ecf20Sopenharmony_ci				mddev->reshape_backwards = 1;
22578c2ecf20Sopenharmony_ci			else
22588c2ecf20Sopenharmony_ci				mddev->reshape_backwards = 0;
22598c2ecf20Sopenharmony_ci
22608c2ecf20Sopenharmony_ci			mddev->reshape_position = le64_to_cpu(sb->reshape_position);
22618c2ecf20Sopenharmony_ci			rs->raid_type = get_raid_type_by_ll(mddev->level, mddev->layout);
22628c2ecf20Sopenharmony_ci		}
22638c2ecf20Sopenharmony_ci
22648c2ecf20Sopenharmony_ci	} else {
22658c2ecf20Sopenharmony_ci		/*
22668c2ecf20Sopenharmony_ci		 * No takeover/reshaping, because we don't have the extended v1.9.0 metadata
22678c2ecf20Sopenharmony_ci		 */
22688c2ecf20Sopenharmony_ci		struct raid_type *rt_cur = get_raid_type_by_ll(mddev->level, mddev->layout);
22698c2ecf20Sopenharmony_ci		struct raid_type *rt_new = get_raid_type_by_ll(mddev->new_level, mddev->new_layout);
22708c2ecf20Sopenharmony_ci
22718c2ecf20Sopenharmony_ci		if (rs_takeover_requested(rs)) {
22728c2ecf20Sopenharmony_ci			if (rt_cur && rt_new)
22738c2ecf20Sopenharmony_ci				DMERR("Takeover raid sets from %s to %s not yet supported by metadata. (raid level change)",
22748c2ecf20Sopenharmony_ci				      rt_cur->name, rt_new->name);
22758c2ecf20Sopenharmony_ci			else
22768c2ecf20Sopenharmony_ci				DMERR("Takeover raid sets not yet supported by metadata. (raid level change)");
22778c2ecf20Sopenharmony_ci			return -EINVAL;
22788c2ecf20Sopenharmony_ci		} else if (rs_reshape_requested(rs)) {
22798c2ecf20Sopenharmony_ci			DMERR("Reshaping raid sets not yet supported by metadata. (raid layout change keeping level)");
22808c2ecf20Sopenharmony_ci			if (mddev->layout != mddev->new_layout) {
22818c2ecf20Sopenharmony_ci				if (rt_cur && rt_new)
22828c2ecf20Sopenharmony_ci					DMERR("	 current layout %s vs new layout %s",
22838c2ecf20Sopenharmony_ci					      rt_cur->name, rt_new->name);
22848c2ecf20Sopenharmony_ci				else
22858c2ecf20Sopenharmony_ci					DMERR("	 current layout 0x%X vs new layout 0x%X",
22868c2ecf20Sopenharmony_ci					      le32_to_cpu(sb->layout), mddev->new_layout);
22878c2ecf20Sopenharmony_ci			}
22888c2ecf20Sopenharmony_ci			if (mddev->chunk_sectors != mddev->new_chunk_sectors)
22898c2ecf20Sopenharmony_ci				DMERR("	 current stripe sectors %u vs new stripe sectors %u",
22908c2ecf20Sopenharmony_ci				      mddev->chunk_sectors, mddev->new_chunk_sectors);
22918c2ecf20Sopenharmony_ci			if (rs->delta_disks)
22928c2ecf20Sopenharmony_ci				DMERR("	 current %u disks vs new %u disks",
22938c2ecf20Sopenharmony_ci				      mddev->raid_disks, mddev->raid_disks + rs->delta_disks);
22948c2ecf20Sopenharmony_ci			if (rs_is_raid10(rs)) {
22958c2ecf20Sopenharmony_ci				DMERR("	 Old layout: %s w/ %u copies",
22968c2ecf20Sopenharmony_ci				      raid10_md_layout_to_format(mddev->layout),
22978c2ecf20Sopenharmony_ci				      raid10_md_layout_to_copies(mddev->layout));
22988c2ecf20Sopenharmony_ci				DMERR("	 New layout: %s w/ %u copies",
22998c2ecf20Sopenharmony_ci				      raid10_md_layout_to_format(mddev->new_layout),
23008c2ecf20Sopenharmony_ci				      raid10_md_layout_to_copies(mddev->new_layout));
23018c2ecf20Sopenharmony_ci			}
23028c2ecf20Sopenharmony_ci			return -EINVAL;
23038c2ecf20Sopenharmony_ci		}
23048c2ecf20Sopenharmony_ci
23058c2ecf20Sopenharmony_ci		DMINFO("Discovered old metadata format; upgrading to extended metadata format");
23068c2ecf20Sopenharmony_ci	}
23078c2ecf20Sopenharmony_ci
23088c2ecf20Sopenharmony_ci	if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
23098c2ecf20Sopenharmony_ci		mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset);
23108c2ecf20Sopenharmony_ci
23118c2ecf20Sopenharmony_ci	/*
23128c2ecf20Sopenharmony_ci	 * During load, we set FirstUse if a new superblock was written.
23138c2ecf20Sopenharmony_ci	 * There are two reasons we might not have a superblock:
23148c2ecf20Sopenharmony_ci	 * 1) The raid set is brand new - in which case, all of the
23158c2ecf20Sopenharmony_ci	 *    devices must have their In_sync bit set.	Also,
23168c2ecf20Sopenharmony_ci	 *    recovery_cp must be 0, unless forced.
23178c2ecf20Sopenharmony_ci	 * 2) This is a new device being added to an old raid set
23188c2ecf20Sopenharmony_ci	 *    and the new device needs to be rebuilt - in which
23198c2ecf20Sopenharmony_ci	 *    case the In_sync bit will /not/ be set and
23208c2ecf20Sopenharmony_ci	 *    recovery_cp must be MaxSector.
23218c2ecf20Sopenharmony_ci	 * 3) This is/are a new device(s) being added to an old
23228c2ecf20Sopenharmony_ci	 *    raid set during takeover to a higher raid level
23238c2ecf20Sopenharmony_ci	 *    to provide capacity for redundancy or during reshape
23248c2ecf20Sopenharmony_ci	 *    to add capacity to grow the raid set.
23258c2ecf20Sopenharmony_ci	 */
23268c2ecf20Sopenharmony_ci	d = 0;
23278c2ecf20Sopenharmony_ci	rdev_for_each(r, mddev) {
23288c2ecf20Sopenharmony_ci		if (test_bit(Journal, &rdev->flags))
23298c2ecf20Sopenharmony_ci			continue;
23308c2ecf20Sopenharmony_ci
23318c2ecf20Sopenharmony_ci		if (test_bit(FirstUse, &r->flags))
23328c2ecf20Sopenharmony_ci			new_devs++;
23338c2ecf20Sopenharmony_ci
23348c2ecf20Sopenharmony_ci		if (!test_bit(In_sync, &r->flags)) {
23358c2ecf20Sopenharmony_ci			DMINFO("Device %d specified for rebuild; clearing superblock",
23368c2ecf20Sopenharmony_ci				r->raid_disk);
23378c2ecf20Sopenharmony_ci			rebuilds++;
23388c2ecf20Sopenharmony_ci
23398c2ecf20Sopenharmony_ci			if (test_bit(FirstUse, &r->flags))
23408c2ecf20Sopenharmony_ci				rebuild_and_new++;
23418c2ecf20Sopenharmony_ci		}
23428c2ecf20Sopenharmony_ci
23438c2ecf20Sopenharmony_ci		d++;
23448c2ecf20Sopenharmony_ci	}
23458c2ecf20Sopenharmony_ci
23468c2ecf20Sopenharmony_ci	if (new_devs == rs->raid_disks || !rebuilds) {
23478c2ecf20Sopenharmony_ci		/* Replace a broken device */
23488c2ecf20Sopenharmony_ci		if (new_devs == rs->raid_disks) {
23498c2ecf20Sopenharmony_ci			DMINFO("Superblocks created for new raid set");
23508c2ecf20Sopenharmony_ci			set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
23518c2ecf20Sopenharmony_ci		} else if (new_devs != rebuilds &&
23528c2ecf20Sopenharmony_ci			   new_devs != rs->delta_disks) {
23538c2ecf20Sopenharmony_ci			DMERR("New device injected into existing raid set without "
23548c2ecf20Sopenharmony_ci			      "'delta_disks' or 'rebuild' parameter specified");
23558c2ecf20Sopenharmony_ci			return -EINVAL;
23568c2ecf20Sopenharmony_ci		}
23578c2ecf20Sopenharmony_ci	} else if (new_devs && new_devs != rebuilds) {
23588c2ecf20Sopenharmony_ci		DMERR("%u 'rebuild' devices cannot be injected into"
23598c2ecf20Sopenharmony_ci		      " a raid set with %u other first-time devices",
23608c2ecf20Sopenharmony_ci		      rebuilds, new_devs);
23618c2ecf20Sopenharmony_ci		return -EINVAL;
23628c2ecf20Sopenharmony_ci	} else if (rebuilds) {
23638c2ecf20Sopenharmony_ci		if (rebuild_and_new && rebuilds != rebuild_and_new) {
23648c2ecf20Sopenharmony_ci			DMERR("new device%s provided without 'rebuild'",
23658c2ecf20Sopenharmony_ci			      new_devs > 1 ? "s" : "");
23668c2ecf20Sopenharmony_ci			return -EINVAL;
23678c2ecf20Sopenharmony_ci		} else if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) && rs_is_recovering(rs)) {
23688c2ecf20Sopenharmony_ci			DMERR("'rebuild' specified while raid set is not in-sync (recovery_cp=%llu)",
23698c2ecf20Sopenharmony_ci			      (unsigned long long) mddev->recovery_cp);
23708c2ecf20Sopenharmony_ci			return -EINVAL;
23718c2ecf20Sopenharmony_ci		} else if (rs_is_reshaping(rs)) {
23728c2ecf20Sopenharmony_ci			DMERR("'rebuild' specified while raid set is being reshaped (reshape_position=%llu)",
23738c2ecf20Sopenharmony_ci			      (unsigned long long) mddev->reshape_position);
23748c2ecf20Sopenharmony_ci			return -EINVAL;
23758c2ecf20Sopenharmony_ci		}
23768c2ecf20Sopenharmony_ci	}
23778c2ecf20Sopenharmony_ci
23788c2ecf20Sopenharmony_ci	/*
23798c2ecf20Sopenharmony_ci	 * Now we set the Faulty bit for those devices that are
23808c2ecf20Sopenharmony_ci	 * recorded in the superblock as failed.
23818c2ecf20Sopenharmony_ci	 */
23828c2ecf20Sopenharmony_ci	sb_retrieve_failed_devices(sb, failed_devices);
23838c2ecf20Sopenharmony_ci	rdev_for_each(r, mddev) {
23848c2ecf20Sopenharmony_ci		if (test_bit(Journal, &rdev->flags) ||
23858c2ecf20Sopenharmony_ci		    !r->sb_page)
23868c2ecf20Sopenharmony_ci			continue;
23878c2ecf20Sopenharmony_ci		sb2 = page_address(r->sb_page);
23888c2ecf20Sopenharmony_ci		sb2->failed_devices = 0;
23898c2ecf20Sopenharmony_ci		memset(sb2->extended_failed_devices, 0, sizeof(sb2->extended_failed_devices));
23908c2ecf20Sopenharmony_ci
23918c2ecf20Sopenharmony_ci		/*
23928c2ecf20Sopenharmony_ci		 * Check for any device re-ordering.
23938c2ecf20Sopenharmony_ci		 */
23948c2ecf20Sopenharmony_ci		if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) {
23958c2ecf20Sopenharmony_ci			role = le32_to_cpu(sb2->array_position);
23968c2ecf20Sopenharmony_ci			if (role < 0)
23978c2ecf20Sopenharmony_ci				continue;
23988c2ecf20Sopenharmony_ci
23998c2ecf20Sopenharmony_ci			if (role != r->raid_disk) {
24008c2ecf20Sopenharmony_ci				if (rs_is_raid10(rs) && __is_raid10_near(mddev->layout)) {
24018c2ecf20Sopenharmony_ci					if (mddev->raid_disks % __raid10_near_copies(mddev->layout) ||
24028c2ecf20Sopenharmony_ci					    rs->raid_disks % rs->raid10_copies) {
24038c2ecf20Sopenharmony_ci						rs->ti->error =
24048c2ecf20Sopenharmony_ci							"Cannot change raid10 near set to odd # of devices!";
24058c2ecf20Sopenharmony_ci						return -EINVAL;
24068c2ecf20Sopenharmony_ci					}
24078c2ecf20Sopenharmony_ci
24088c2ecf20Sopenharmony_ci					sb2->array_position = cpu_to_le32(r->raid_disk);
24098c2ecf20Sopenharmony_ci
24108c2ecf20Sopenharmony_ci				} else if (!(rs_is_raid10(rs) && rt_is_raid0(rs->raid_type)) &&
24118c2ecf20Sopenharmony_ci					   !(rs_is_raid0(rs) && rt_is_raid10(rs->raid_type)) &&
24128c2ecf20Sopenharmony_ci					   !rt_is_raid1(rs->raid_type)) {
24138c2ecf20Sopenharmony_ci					rs->ti->error = "Cannot change device positions in raid set";
24148c2ecf20Sopenharmony_ci					return -EINVAL;
24158c2ecf20Sopenharmony_ci				}
24168c2ecf20Sopenharmony_ci
24178c2ecf20Sopenharmony_ci				DMINFO("raid device #%d now at position #%d", role, r->raid_disk);
24188c2ecf20Sopenharmony_ci			}
24198c2ecf20Sopenharmony_ci
24208c2ecf20Sopenharmony_ci			/*
24218c2ecf20Sopenharmony_ci			 * Partial recovery is performed on
24228c2ecf20Sopenharmony_ci			 * returning failed devices.
24238c2ecf20Sopenharmony_ci			 */
24248c2ecf20Sopenharmony_ci			if (test_bit(role, (void *) failed_devices))
24258c2ecf20Sopenharmony_ci				set_bit(Faulty, &r->flags);
24268c2ecf20Sopenharmony_ci		}
24278c2ecf20Sopenharmony_ci	}
24288c2ecf20Sopenharmony_ci
24298c2ecf20Sopenharmony_ci	return 0;
24308c2ecf20Sopenharmony_ci}
24318c2ecf20Sopenharmony_ci
24328c2ecf20Sopenharmony_cistatic int super_validate(struct raid_set *rs, struct md_rdev *rdev)
24338c2ecf20Sopenharmony_ci{
24348c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
24358c2ecf20Sopenharmony_ci	struct dm_raid_superblock *sb;
24368c2ecf20Sopenharmony_ci
24378c2ecf20Sopenharmony_ci	if (rs_is_raid0(rs) || !rdev->sb_page || rdev->raid_disk < 0)
24388c2ecf20Sopenharmony_ci		return 0;
24398c2ecf20Sopenharmony_ci
24408c2ecf20Sopenharmony_ci	sb = page_address(rdev->sb_page);
24418c2ecf20Sopenharmony_ci
24428c2ecf20Sopenharmony_ci	/*
24438c2ecf20Sopenharmony_ci	 * If mddev->events is not set, we know we have not yet initialized
24448c2ecf20Sopenharmony_ci	 * the array.
24458c2ecf20Sopenharmony_ci	 */
24468c2ecf20Sopenharmony_ci	if (!mddev->events && super_init_validation(rs, rdev))
24478c2ecf20Sopenharmony_ci		return -EINVAL;
24488c2ecf20Sopenharmony_ci
24498c2ecf20Sopenharmony_ci	if (le32_to_cpu(sb->compat_features) &&
24508c2ecf20Sopenharmony_ci	    le32_to_cpu(sb->compat_features) != FEATURE_FLAG_SUPPORTS_V190) {
24518c2ecf20Sopenharmony_ci		rs->ti->error = "Unable to assemble array: Unknown flag(s) in compatible feature flags";
24528c2ecf20Sopenharmony_ci		return -EINVAL;
24538c2ecf20Sopenharmony_ci	}
24548c2ecf20Sopenharmony_ci
24558c2ecf20Sopenharmony_ci	if (sb->incompat_features) {
24568c2ecf20Sopenharmony_ci		rs->ti->error = "Unable to assemble array: No incompatible feature flags supported yet";
24578c2ecf20Sopenharmony_ci		return -EINVAL;
24588c2ecf20Sopenharmony_ci	}
24598c2ecf20Sopenharmony_ci
24608c2ecf20Sopenharmony_ci	/* Enable bitmap creation on @rs unless no metadevs or raid0 or journaled raid4/5/6 set. */
24618c2ecf20Sopenharmony_ci	mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096);
24628c2ecf20Sopenharmony_ci	mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
24638c2ecf20Sopenharmony_ci
24648c2ecf20Sopenharmony_ci	if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
24658c2ecf20Sopenharmony_ci		/*
24668c2ecf20Sopenharmony_ci		 * Retrieve rdev size stored in superblock to be prepared for shrink.
24678c2ecf20Sopenharmony_ci		 * Check extended superblock members are present otherwise the size
24688c2ecf20Sopenharmony_ci		 * will not be set!
24698c2ecf20Sopenharmony_ci		 */
24708c2ecf20Sopenharmony_ci		if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190)
24718c2ecf20Sopenharmony_ci			rdev->sectors = le64_to_cpu(sb->sectors);
24728c2ecf20Sopenharmony_ci
24738c2ecf20Sopenharmony_ci		rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
24748c2ecf20Sopenharmony_ci		if (rdev->recovery_offset == MaxSector)
24758c2ecf20Sopenharmony_ci			set_bit(In_sync, &rdev->flags);
24768c2ecf20Sopenharmony_ci		/*
24778c2ecf20Sopenharmony_ci		 * If no reshape in progress -> we're recovering single
24788c2ecf20Sopenharmony_ci		 * disk(s) and have to set the device(s) to out-of-sync
24798c2ecf20Sopenharmony_ci		 */
24808c2ecf20Sopenharmony_ci		else if (!rs_is_reshaping(rs))
24818c2ecf20Sopenharmony_ci			clear_bit(In_sync, &rdev->flags); /* Mandatory for recovery */
24828c2ecf20Sopenharmony_ci	}
24838c2ecf20Sopenharmony_ci
24848c2ecf20Sopenharmony_ci	/*
24858c2ecf20Sopenharmony_ci	 * If a device comes back, set it as not In_sync and no longer faulty.
24868c2ecf20Sopenharmony_ci	 */
24878c2ecf20Sopenharmony_ci	if (test_and_clear_bit(Faulty, &rdev->flags)) {
24888c2ecf20Sopenharmony_ci		rdev->recovery_offset = 0;
24898c2ecf20Sopenharmony_ci		clear_bit(In_sync, &rdev->flags);
24908c2ecf20Sopenharmony_ci		rdev->saved_raid_disk = rdev->raid_disk;
24918c2ecf20Sopenharmony_ci	}
24928c2ecf20Sopenharmony_ci
24938c2ecf20Sopenharmony_ci	/* Reshape support -> restore repective data offsets */
24948c2ecf20Sopenharmony_ci	rdev->data_offset = le64_to_cpu(sb->data_offset);
24958c2ecf20Sopenharmony_ci	rdev->new_data_offset = le64_to_cpu(sb->new_data_offset);
24968c2ecf20Sopenharmony_ci
24978c2ecf20Sopenharmony_ci	return 0;
24988c2ecf20Sopenharmony_ci}
24998c2ecf20Sopenharmony_ci
25008c2ecf20Sopenharmony_ci/*
25018c2ecf20Sopenharmony_ci * Analyse superblocks and select the freshest.
25028c2ecf20Sopenharmony_ci */
25038c2ecf20Sopenharmony_cistatic int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
25048c2ecf20Sopenharmony_ci{
25058c2ecf20Sopenharmony_ci	int r;
25068c2ecf20Sopenharmony_ci	struct md_rdev *rdev, *freshest;
25078c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
25088c2ecf20Sopenharmony_ci
25098c2ecf20Sopenharmony_ci	freshest = NULL;
25108c2ecf20Sopenharmony_ci	rdev_for_each(rdev, mddev) {
25118c2ecf20Sopenharmony_ci		if (test_bit(Journal, &rdev->flags))
25128c2ecf20Sopenharmony_ci			continue;
25138c2ecf20Sopenharmony_ci
25148c2ecf20Sopenharmony_ci		if (!rdev->meta_bdev)
25158c2ecf20Sopenharmony_ci			continue;
25168c2ecf20Sopenharmony_ci
25178c2ecf20Sopenharmony_ci		/* Set superblock offset/size for metadata device. */
25188c2ecf20Sopenharmony_ci		rdev->sb_start = 0;
25198c2ecf20Sopenharmony_ci		rdev->sb_size = bdev_logical_block_size(rdev->meta_bdev);
25208c2ecf20Sopenharmony_ci		if (rdev->sb_size < sizeof(struct dm_raid_superblock) || rdev->sb_size > PAGE_SIZE) {
25218c2ecf20Sopenharmony_ci			DMERR("superblock size of a logical block is no longer valid");
25228c2ecf20Sopenharmony_ci			return -EINVAL;
25238c2ecf20Sopenharmony_ci		}
25248c2ecf20Sopenharmony_ci
25258c2ecf20Sopenharmony_ci		/*
25268c2ecf20Sopenharmony_ci		 * Skipping super_load due to CTR_FLAG_SYNC will cause
25278c2ecf20Sopenharmony_ci		 * the array to undergo initialization again as
25288c2ecf20Sopenharmony_ci		 * though it were new.	This is the intended effect
25298c2ecf20Sopenharmony_ci		 * of the "sync" directive.
25308c2ecf20Sopenharmony_ci		 *
25318c2ecf20Sopenharmony_ci		 * With reshaping capability added, we must ensure that
25328c2ecf20Sopenharmony_ci		 * that the "sync" directive is disallowed during the reshape.
25338c2ecf20Sopenharmony_ci		 */
25348c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
25358c2ecf20Sopenharmony_ci			continue;
25368c2ecf20Sopenharmony_ci
25378c2ecf20Sopenharmony_ci		r = super_load(rdev, freshest);
25388c2ecf20Sopenharmony_ci
25398c2ecf20Sopenharmony_ci		switch (r) {
25408c2ecf20Sopenharmony_ci		case 1:
25418c2ecf20Sopenharmony_ci			freshest = rdev;
25428c2ecf20Sopenharmony_ci			break;
25438c2ecf20Sopenharmony_ci		case 0:
25448c2ecf20Sopenharmony_ci			break;
25458c2ecf20Sopenharmony_ci		default:
25468c2ecf20Sopenharmony_ci			/* This is a failure to read the superblock from the metadata device. */
25478c2ecf20Sopenharmony_ci			/*
25488c2ecf20Sopenharmony_ci			 * We have to keep any raid0 data/metadata device pairs or
25498c2ecf20Sopenharmony_ci			 * the MD raid0 personality will fail to start the array.
25508c2ecf20Sopenharmony_ci			 */
25518c2ecf20Sopenharmony_ci			if (rs_is_raid0(rs))
25528c2ecf20Sopenharmony_ci				continue;
25538c2ecf20Sopenharmony_ci
25548c2ecf20Sopenharmony_ci			/*
25558c2ecf20Sopenharmony_ci			 * We keep the dm_devs to be able to emit the device tuple
25568c2ecf20Sopenharmony_ci			 * properly on the table line in raid_status() (rather than
25578c2ecf20Sopenharmony_ci			 * mistakenly acting as if '- -' got passed into the constructor).
25588c2ecf20Sopenharmony_ci			 *
25598c2ecf20Sopenharmony_ci			 * The rdev has to stay on the same_set list to allow for
25608c2ecf20Sopenharmony_ci			 * the attempt to restore faulty devices on second resume.
25618c2ecf20Sopenharmony_ci			 */
25628c2ecf20Sopenharmony_ci			rdev->raid_disk = rdev->saved_raid_disk = -1;
25638c2ecf20Sopenharmony_ci			break;
25648c2ecf20Sopenharmony_ci		}
25658c2ecf20Sopenharmony_ci	}
25668c2ecf20Sopenharmony_ci
25678c2ecf20Sopenharmony_ci	if (!freshest)
25688c2ecf20Sopenharmony_ci		return 0;
25698c2ecf20Sopenharmony_ci
25708c2ecf20Sopenharmony_ci	/*
25718c2ecf20Sopenharmony_ci	 * Validation of the freshest device provides the source of
25728c2ecf20Sopenharmony_ci	 * validation for the remaining devices.
25738c2ecf20Sopenharmony_ci	 */
25748c2ecf20Sopenharmony_ci	rs->ti->error = "Unable to assemble array: Invalid superblocks";
25758c2ecf20Sopenharmony_ci	if (super_validate(rs, freshest))
25768c2ecf20Sopenharmony_ci		return -EINVAL;
25778c2ecf20Sopenharmony_ci
25788c2ecf20Sopenharmony_ci	if (validate_raid_redundancy(rs)) {
25798c2ecf20Sopenharmony_ci		rs->ti->error = "Insufficient redundancy to activate array";
25808c2ecf20Sopenharmony_ci		return -EINVAL;
25818c2ecf20Sopenharmony_ci	}
25828c2ecf20Sopenharmony_ci
25838c2ecf20Sopenharmony_ci	rdev_for_each(rdev, mddev)
25848c2ecf20Sopenharmony_ci		if (!test_bit(Journal, &rdev->flags) &&
25858c2ecf20Sopenharmony_ci		    rdev != freshest &&
25868c2ecf20Sopenharmony_ci		    super_validate(rs, rdev))
25878c2ecf20Sopenharmony_ci			return -EINVAL;
25888c2ecf20Sopenharmony_ci	return 0;
25898c2ecf20Sopenharmony_ci}
25908c2ecf20Sopenharmony_ci
25918c2ecf20Sopenharmony_ci/*
25928c2ecf20Sopenharmony_ci * Adjust data_offset and new_data_offset on all disk members of @rs
25938c2ecf20Sopenharmony_ci * for out of place reshaping if requested by contructor
25948c2ecf20Sopenharmony_ci *
25958c2ecf20Sopenharmony_ci * We need free space at the beginning of each raid disk for forward
25968c2ecf20Sopenharmony_ci * and at the end for backward reshapes which userspace has to provide
25978c2ecf20Sopenharmony_ci * via remapping/reordering of space.
25988c2ecf20Sopenharmony_ci */
25998c2ecf20Sopenharmony_cistatic int rs_adjust_data_offsets(struct raid_set *rs)
26008c2ecf20Sopenharmony_ci{
26018c2ecf20Sopenharmony_ci	sector_t data_offset = 0, new_data_offset = 0;
26028c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
26038c2ecf20Sopenharmony_ci
26048c2ecf20Sopenharmony_ci	/* Constructor did not request data offset change */
26058c2ecf20Sopenharmony_ci	if (!test_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) {
26068c2ecf20Sopenharmony_ci		if (!rs_is_reshapable(rs))
26078c2ecf20Sopenharmony_ci			goto out;
26088c2ecf20Sopenharmony_ci
26098c2ecf20Sopenharmony_ci		return 0;
26108c2ecf20Sopenharmony_ci	}
26118c2ecf20Sopenharmony_ci
26128c2ecf20Sopenharmony_ci	/* HM FIXME: get In_Sync raid_dev? */
26138c2ecf20Sopenharmony_ci	rdev = &rs->dev[0].rdev;
26148c2ecf20Sopenharmony_ci
26158c2ecf20Sopenharmony_ci	if (rs->delta_disks < 0) {
26168c2ecf20Sopenharmony_ci		/*
26178c2ecf20Sopenharmony_ci		 * Removing disks (reshaping backwards):
26188c2ecf20Sopenharmony_ci		 *
26198c2ecf20Sopenharmony_ci		 * - before reshape: data is at offset 0 and free space
26208c2ecf20Sopenharmony_ci		 *		     is at end of each component LV
26218c2ecf20Sopenharmony_ci		 *
26228c2ecf20Sopenharmony_ci		 * - after reshape: data is at offset rs->data_offset != 0 on each component LV
26238c2ecf20Sopenharmony_ci		 */
26248c2ecf20Sopenharmony_ci		data_offset = 0;
26258c2ecf20Sopenharmony_ci		new_data_offset = rs->data_offset;
26268c2ecf20Sopenharmony_ci
26278c2ecf20Sopenharmony_ci	} else if (rs->delta_disks > 0) {
26288c2ecf20Sopenharmony_ci		/*
26298c2ecf20Sopenharmony_ci		 * Adding disks (reshaping forwards):
26308c2ecf20Sopenharmony_ci		 *
26318c2ecf20Sopenharmony_ci		 * - before reshape: data is at offset rs->data_offset != 0 and
26328c2ecf20Sopenharmony_ci		 *		     free space is at begin of each component LV
26338c2ecf20Sopenharmony_ci		 *
26348c2ecf20Sopenharmony_ci		 * - after reshape: data is at offset 0 on each component LV
26358c2ecf20Sopenharmony_ci		 */
26368c2ecf20Sopenharmony_ci		data_offset = rs->data_offset;
26378c2ecf20Sopenharmony_ci		new_data_offset = 0;
26388c2ecf20Sopenharmony_ci
26398c2ecf20Sopenharmony_ci	} else {
26408c2ecf20Sopenharmony_ci		/*
26418c2ecf20Sopenharmony_ci		 * User space passes in 0 for data offset after having removed reshape space
26428c2ecf20Sopenharmony_ci		 *
26438c2ecf20Sopenharmony_ci		 * - or - (data offset != 0)
26448c2ecf20Sopenharmony_ci		 *
26458c2ecf20Sopenharmony_ci		 * Changing RAID layout or chunk size -> toggle offsets
26468c2ecf20Sopenharmony_ci		 *
26478c2ecf20Sopenharmony_ci		 * - before reshape: data is at offset rs->data_offset 0 and
26488c2ecf20Sopenharmony_ci		 *		     free space is at end of each component LV
26498c2ecf20Sopenharmony_ci		 *		     -or-
26508c2ecf20Sopenharmony_ci		 *                   data is at offset rs->data_offset != 0 and
26518c2ecf20Sopenharmony_ci		 *		     free space is at begin of each component LV
26528c2ecf20Sopenharmony_ci		 *
26538c2ecf20Sopenharmony_ci		 * - after reshape: data is at offset 0 if it was at offset != 0
26548c2ecf20Sopenharmony_ci		 *                  or at offset != 0 if it was at offset 0
26558c2ecf20Sopenharmony_ci		 *                  on each component LV
26568c2ecf20Sopenharmony_ci		 *
26578c2ecf20Sopenharmony_ci		 */
26588c2ecf20Sopenharmony_ci		data_offset = rs->data_offset ? rdev->data_offset : 0;
26598c2ecf20Sopenharmony_ci		new_data_offset = data_offset ? 0 : rs->data_offset;
26608c2ecf20Sopenharmony_ci		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
26618c2ecf20Sopenharmony_ci	}
26628c2ecf20Sopenharmony_ci
26638c2ecf20Sopenharmony_ci	/*
26648c2ecf20Sopenharmony_ci	 * Make sure we got a minimum amount of free sectors per device
26658c2ecf20Sopenharmony_ci	 */
26668c2ecf20Sopenharmony_ci	if (rs->data_offset &&
26678c2ecf20Sopenharmony_ci	    to_sector(i_size_read(rdev->bdev->bd_inode)) - rs->md.dev_sectors < MIN_FREE_RESHAPE_SPACE) {
26688c2ecf20Sopenharmony_ci		rs->ti->error = data_offset ? "No space for forward reshape" :
26698c2ecf20Sopenharmony_ci					      "No space for backward reshape";
26708c2ecf20Sopenharmony_ci		return -ENOSPC;
26718c2ecf20Sopenharmony_ci	}
26728c2ecf20Sopenharmony_ciout:
26738c2ecf20Sopenharmony_ci	/*
26748c2ecf20Sopenharmony_ci	 * Raise recovery_cp in case data_offset != 0 to
26758c2ecf20Sopenharmony_ci	 * avoid false recovery positives in the constructor.
26768c2ecf20Sopenharmony_ci	 */
26778c2ecf20Sopenharmony_ci	if (rs->md.recovery_cp < rs->md.dev_sectors)
26788c2ecf20Sopenharmony_ci		rs->md.recovery_cp += rs->dev[0].rdev.data_offset;
26798c2ecf20Sopenharmony_ci
26808c2ecf20Sopenharmony_ci	/* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */
26818c2ecf20Sopenharmony_ci	rdev_for_each(rdev, &rs->md) {
26828c2ecf20Sopenharmony_ci		if (!test_bit(Journal, &rdev->flags)) {
26838c2ecf20Sopenharmony_ci			rdev->data_offset = data_offset;
26848c2ecf20Sopenharmony_ci			rdev->new_data_offset = new_data_offset;
26858c2ecf20Sopenharmony_ci		}
26868c2ecf20Sopenharmony_ci	}
26878c2ecf20Sopenharmony_ci
26888c2ecf20Sopenharmony_ci	return 0;
26898c2ecf20Sopenharmony_ci}
26908c2ecf20Sopenharmony_ci
26918c2ecf20Sopenharmony_ci/* Userpace reordered disks -> adjust raid_disk indexes in @rs */
26928c2ecf20Sopenharmony_cistatic void __reorder_raid_disk_indexes(struct raid_set *rs)
26938c2ecf20Sopenharmony_ci{
26948c2ecf20Sopenharmony_ci	int i = 0;
26958c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
26968c2ecf20Sopenharmony_ci
26978c2ecf20Sopenharmony_ci	rdev_for_each(rdev, &rs->md) {
26988c2ecf20Sopenharmony_ci		if (!test_bit(Journal, &rdev->flags)) {
26998c2ecf20Sopenharmony_ci			rdev->raid_disk = i++;
27008c2ecf20Sopenharmony_ci			rdev->saved_raid_disk = rdev->new_raid_disk = -1;
27018c2ecf20Sopenharmony_ci		}
27028c2ecf20Sopenharmony_ci	}
27038c2ecf20Sopenharmony_ci}
27048c2ecf20Sopenharmony_ci
27058c2ecf20Sopenharmony_ci/*
27068c2ecf20Sopenharmony_ci * Setup @rs for takeover by a different raid level
27078c2ecf20Sopenharmony_ci */
27088c2ecf20Sopenharmony_cistatic int rs_setup_takeover(struct raid_set *rs)
27098c2ecf20Sopenharmony_ci{
27108c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
27118c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
27128c2ecf20Sopenharmony_ci	unsigned int d = mddev->raid_disks = rs->raid_disks;
27138c2ecf20Sopenharmony_ci	sector_t new_data_offset = rs->dev[0].rdev.data_offset ? 0 : rs->data_offset;
27148c2ecf20Sopenharmony_ci
27158c2ecf20Sopenharmony_ci	if (rt_is_raid10(rs->raid_type)) {
27168c2ecf20Sopenharmony_ci		if (rs_is_raid0(rs)) {
27178c2ecf20Sopenharmony_ci			/* Userpace reordered disks -> adjust raid_disk indexes */
27188c2ecf20Sopenharmony_ci			__reorder_raid_disk_indexes(rs);
27198c2ecf20Sopenharmony_ci
27208c2ecf20Sopenharmony_ci			/* raid0 -> raid10_far layout */
27218c2ecf20Sopenharmony_ci			mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_FAR,
27228c2ecf20Sopenharmony_ci								   rs->raid10_copies);
27238c2ecf20Sopenharmony_ci		} else if (rs_is_raid1(rs))
27248c2ecf20Sopenharmony_ci			/* raid1 -> raid10_near layout */
27258c2ecf20Sopenharmony_ci			mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_NEAR,
27268c2ecf20Sopenharmony_ci								   rs->raid_disks);
27278c2ecf20Sopenharmony_ci		else
27288c2ecf20Sopenharmony_ci			return -EINVAL;
27298c2ecf20Sopenharmony_ci
27308c2ecf20Sopenharmony_ci	}
27318c2ecf20Sopenharmony_ci
27328c2ecf20Sopenharmony_ci	clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
27338c2ecf20Sopenharmony_ci	mddev->recovery_cp = MaxSector;
27348c2ecf20Sopenharmony_ci
27358c2ecf20Sopenharmony_ci	while (d--) {
27368c2ecf20Sopenharmony_ci		rdev = &rs->dev[d].rdev;
27378c2ecf20Sopenharmony_ci
27388c2ecf20Sopenharmony_ci		if (test_bit(d, (void *) rs->rebuild_disks)) {
27398c2ecf20Sopenharmony_ci			clear_bit(In_sync, &rdev->flags);
27408c2ecf20Sopenharmony_ci			clear_bit(Faulty, &rdev->flags);
27418c2ecf20Sopenharmony_ci			mddev->recovery_cp = rdev->recovery_offset = 0;
27428c2ecf20Sopenharmony_ci			/* Bitmap has to be created when we do an "up" takeover */
27438c2ecf20Sopenharmony_ci			set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
27448c2ecf20Sopenharmony_ci		}
27458c2ecf20Sopenharmony_ci
27468c2ecf20Sopenharmony_ci		rdev->new_data_offset = new_data_offset;
27478c2ecf20Sopenharmony_ci	}
27488c2ecf20Sopenharmony_ci
27498c2ecf20Sopenharmony_ci	return 0;
27508c2ecf20Sopenharmony_ci}
27518c2ecf20Sopenharmony_ci
27528c2ecf20Sopenharmony_ci/* Prepare @rs for reshape */
27538c2ecf20Sopenharmony_cistatic int rs_prepare_reshape(struct raid_set *rs)
27548c2ecf20Sopenharmony_ci{
27558c2ecf20Sopenharmony_ci	bool reshape;
27568c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
27578c2ecf20Sopenharmony_ci
27588c2ecf20Sopenharmony_ci	if (rs_is_raid10(rs)) {
27598c2ecf20Sopenharmony_ci		if (rs->raid_disks != mddev->raid_disks &&
27608c2ecf20Sopenharmony_ci		    __is_raid10_near(mddev->layout) &&
27618c2ecf20Sopenharmony_ci		    rs->raid10_copies &&
27628c2ecf20Sopenharmony_ci		    rs->raid10_copies != __raid10_near_copies(mddev->layout)) {
27638c2ecf20Sopenharmony_ci			/*
27648c2ecf20Sopenharmony_ci			 * raid disk have to be multiple of data copies to allow this conversion,
27658c2ecf20Sopenharmony_ci			 *
27668c2ecf20Sopenharmony_ci			 * This is actually not a reshape it is a
27678c2ecf20Sopenharmony_ci			 * rebuild of any additional mirrors per group
27688c2ecf20Sopenharmony_ci			 */
27698c2ecf20Sopenharmony_ci			if (rs->raid_disks % rs->raid10_copies) {
27708c2ecf20Sopenharmony_ci				rs->ti->error = "Can't reshape raid10 mirror groups";
27718c2ecf20Sopenharmony_ci				return -EINVAL;
27728c2ecf20Sopenharmony_ci			}
27738c2ecf20Sopenharmony_ci
27748c2ecf20Sopenharmony_ci			/* Userpace reordered disks to add/remove mirrors -> adjust raid_disk indexes */
27758c2ecf20Sopenharmony_ci			__reorder_raid_disk_indexes(rs);
27768c2ecf20Sopenharmony_ci			mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_NEAR,
27778c2ecf20Sopenharmony_ci								   rs->raid10_copies);
27788c2ecf20Sopenharmony_ci			mddev->new_layout = mddev->layout;
27798c2ecf20Sopenharmony_ci			reshape = false;
27808c2ecf20Sopenharmony_ci		} else
27818c2ecf20Sopenharmony_ci			reshape = true;
27828c2ecf20Sopenharmony_ci
27838c2ecf20Sopenharmony_ci	} else if (rs_is_raid456(rs))
27848c2ecf20Sopenharmony_ci		reshape = true;
27858c2ecf20Sopenharmony_ci
27868c2ecf20Sopenharmony_ci	else if (rs_is_raid1(rs)) {
27878c2ecf20Sopenharmony_ci		if (rs->delta_disks) {
27888c2ecf20Sopenharmony_ci			/* Process raid1 via delta_disks */
27898c2ecf20Sopenharmony_ci			mddev->degraded = rs->delta_disks < 0 ? -rs->delta_disks : rs->delta_disks;
27908c2ecf20Sopenharmony_ci			reshape = true;
27918c2ecf20Sopenharmony_ci		} else {
27928c2ecf20Sopenharmony_ci			/* Process raid1 without delta_disks */
27938c2ecf20Sopenharmony_ci			mddev->raid_disks = rs->raid_disks;
27948c2ecf20Sopenharmony_ci			reshape = false;
27958c2ecf20Sopenharmony_ci		}
27968c2ecf20Sopenharmony_ci	} else {
27978c2ecf20Sopenharmony_ci		rs->ti->error = "Called with bogus raid type";
27988c2ecf20Sopenharmony_ci		return -EINVAL;
27998c2ecf20Sopenharmony_ci	}
28008c2ecf20Sopenharmony_ci
28018c2ecf20Sopenharmony_ci	if (reshape) {
28028c2ecf20Sopenharmony_ci		set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags);
28038c2ecf20Sopenharmony_ci		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
28048c2ecf20Sopenharmony_ci	} else if (mddev->raid_disks < rs->raid_disks)
28058c2ecf20Sopenharmony_ci		/* Create new superblocks and bitmaps, if any new disks */
28068c2ecf20Sopenharmony_ci		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
28078c2ecf20Sopenharmony_ci
28088c2ecf20Sopenharmony_ci	return 0;
28098c2ecf20Sopenharmony_ci}
28108c2ecf20Sopenharmony_ci
28118c2ecf20Sopenharmony_ci/* Get reshape sectors from data_offsets or raid set */
28128c2ecf20Sopenharmony_cistatic sector_t _get_reshape_sectors(struct raid_set *rs)
28138c2ecf20Sopenharmony_ci{
28148c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
28158c2ecf20Sopenharmony_ci	sector_t reshape_sectors = 0;
28168c2ecf20Sopenharmony_ci
28178c2ecf20Sopenharmony_ci	rdev_for_each(rdev, &rs->md)
28188c2ecf20Sopenharmony_ci		if (!test_bit(Journal, &rdev->flags)) {
28198c2ecf20Sopenharmony_ci			reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ?
28208c2ecf20Sopenharmony_ci					rdev->data_offset - rdev->new_data_offset :
28218c2ecf20Sopenharmony_ci					rdev->new_data_offset - rdev->data_offset;
28228c2ecf20Sopenharmony_ci			break;
28238c2ecf20Sopenharmony_ci		}
28248c2ecf20Sopenharmony_ci
28258c2ecf20Sopenharmony_ci	return max(reshape_sectors, (sector_t) rs->data_offset);
28268c2ecf20Sopenharmony_ci}
28278c2ecf20Sopenharmony_ci
28288c2ecf20Sopenharmony_ci/*
28298c2ecf20Sopenharmony_ci * Reshape:
28308c2ecf20Sopenharmony_ci * - change raid layout
28318c2ecf20Sopenharmony_ci * - change chunk size
28328c2ecf20Sopenharmony_ci * - add disks
28338c2ecf20Sopenharmony_ci * - remove disks
28348c2ecf20Sopenharmony_ci */
28358c2ecf20Sopenharmony_cistatic int rs_setup_reshape(struct raid_set *rs)
28368c2ecf20Sopenharmony_ci{
28378c2ecf20Sopenharmony_ci	int r = 0;
28388c2ecf20Sopenharmony_ci	unsigned int cur_raid_devs, d;
28398c2ecf20Sopenharmony_ci	sector_t reshape_sectors = _get_reshape_sectors(rs);
28408c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
28418c2ecf20Sopenharmony_ci	struct md_rdev *rdev;
28428c2ecf20Sopenharmony_ci
28438c2ecf20Sopenharmony_ci	mddev->delta_disks = rs->delta_disks;
28448c2ecf20Sopenharmony_ci	cur_raid_devs = mddev->raid_disks;
28458c2ecf20Sopenharmony_ci
28468c2ecf20Sopenharmony_ci	/* Ignore impossible layout change whilst adding/removing disks */
28478c2ecf20Sopenharmony_ci	if (mddev->delta_disks &&
28488c2ecf20Sopenharmony_ci	    mddev->layout != mddev->new_layout) {
28498c2ecf20Sopenharmony_ci		DMINFO("Ignoring invalid layout change with delta_disks=%d", rs->delta_disks);
28508c2ecf20Sopenharmony_ci		mddev->new_layout = mddev->layout;
28518c2ecf20Sopenharmony_ci	}
28528c2ecf20Sopenharmony_ci
28538c2ecf20Sopenharmony_ci	/*
28548c2ecf20Sopenharmony_ci	 * Adjust array size:
28558c2ecf20Sopenharmony_ci	 *
28568c2ecf20Sopenharmony_ci	 * - in case of adding disk(s), array size has
28578c2ecf20Sopenharmony_ci	 *   to grow after the disk adding reshape,
28588c2ecf20Sopenharmony_ci	 *   which'll hapen in the event handler;
28598c2ecf20Sopenharmony_ci	 *   reshape will happen forward, so space has to
28608c2ecf20Sopenharmony_ci	 *   be available at the beginning of each disk
28618c2ecf20Sopenharmony_ci	 *
28628c2ecf20Sopenharmony_ci	 * - in case of removing disk(s), array size
28638c2ecf20Sopenharmony_ci	 *   has to shrink before starting the reshape,
28648c2ecf20Sopenharmony_ci	 *   which'll happen here;
28658c2ecf20Sopenharmony_ci	 *   reshape will happen backward, so space has to
28668c2ecf20Sopenharmony_ci	 *   be available at the end of each disk
28678c2ecf20Sopenharmony_ci	 *
28688c2ecf20Sopenharmony_ci	 * - data_offset and new_data_offset are
28698c2ecf20Sopenharmony_ci	 *   adjusted for aforementioned out of place
28708c2ecf20Sopenharmony_ci	 *   reshaping based on userspace passing in
28718c2ecf20Sopenharmony_ci	 *   the "data_offset <sectors>" key/value
28728c2ecf20Sopenharmony_ci	 *   pair via the constructor
28738c2ecf20Sopenharmony_ci	 */
28748c2ecf20Sopenharmony_ci
28758c2ecf20Sopenharmony_ci	/* Add disk(s) */
28768c2ecf20Sopenharmony_ci	if (rs->delta_disks > 0) {
28778c2ecf20Sopenharmony_ci		/* Prepare disks for check in raid4/5/6/10 {check|start}_reshape */
28788c2ecf20Sopenharmony_ci		for (d = cur_raid_devs; d < rs->raid_disks; d++) {
28798c2ecf20Sopenharmony_ci			rdev = &rs->dev[d].rdev;
28808c2ecf20Sopenharmony_ci			clear_bit(In_sync, &rdev->flags);
28818c2ecf20Sopenharmony_ci
28828c2ecf20Sopenharmony_ci			/*
28838c2ecf20Sopenharmony_ci			 * save_raid_disk needs to be -1, or recovery_offset will be set to 0
28848c2ecf20Sopenharmony_ci			 * by md, which'll store that erroneously in the superblock on reshape
28858c2ecf20Sopenharmony_ci			 */
28868c2ecf20Sopenharmony_ci			rdev->saved_raid_disk = -1;
28878c2ecf20Sopenharmony_ci			rdev->raid_disk = d;
28888c2ecf20Sopenharmony_ci
28898c2ecf20Sopenharmony_ci			rdev->sectors = mddev->dev_sectors;
28908c2ecf20Sopenharmony_ci			rdev->recovery_offset = rs_is_raid1(rs) ? 0 : MaxSector;
28918c2ecf20Sopenharmony_ci		}
28928c2ecf20Sopenharmony_ci
28938c2ecf20Sopenharmony_ci		mddev->reshape_backwards = 0; /* adding disk(s) -> forward reshape */
28948c2ecf20Sopenharmony_ci
28958c2ecf20Sopenharmony_ci	/* Remove disk(s) */
28968c2ecf20Sopenharmony_ci	} else if (rs->delta_disks < 0) {
28978c2ecf20Sopenharmony_ci		r = rs_set_dev_and_array_sectors(rs, rs->ti->len, true);
28988c2ecf20Sopenharmony_ci		mddev->reshape_backwards = 1; /* removing disk(s) -> backward reshape */
28998c2ecf20Sopenharmony_ci
29008c2ecf20Sopenharmony_ci	/* Change layout and/or chunk size */
29018c2ecf20Sopenharmony_ci	} else {
29028c2ecf20Sopenharmony_ci		/*
29038c2ecf20Sopenharmony_ci		 * Reshape layout (e.g. raid5_ls -> raid5_n) and/or chunk size:
29048c2ecf20Sopenharmony_ci		 *
29058c2ecf20Sopenharmony_ci		 * keeping number of disks and do layout change ->
29068c2ecf20Sopenharmony_ci		 *
29078c2ecf20Sopenharmony_ci		 * toggle reshape_backward depending on data_offset:
29088c2ecf20Sopenharmony_ci		 *
29098c2ecf20Sopenharmony_ci		 * - free space upfront -> reshape forward
29108c2ecf20Sopenharmony_ci		 *
29118c2ecf20Sopenharmony_ci		 * - free space at the end -> reshape backward
29128c2ecf20Sopenharmony_ci		 *
29138c2ecf20Sopenharmony_ci		 *
29148c2ecf20Sopenharmony_ci		 * This utilizes free reshape space avoiding the need
29158c2ecf20Sopenharmony_ci		 * for userspace to move (parts of) LV segments in
29168c2ecf20Sopenharmony_ci		 * case of layout/chunksize change  (for disk
29178c2ecf20Sopenharmony_ci		 * adding/removing reshape space has to be at
29188c2ecf20Sopenharmony_ci		 * the proper address (see above with delta_disks):
29198c2ecf20Sopenharmony_ci		 *
29208c2ecf20Sopenharmony_ci		 * add disk(s)   -> begin
29218c2ecf20Sopenharmony_ci		 * remove disk(s)-> end
29228c2ecf20Sopenharmony_ci		 */
29238c2ecf20Sopenharmony_ci		mddev->reshape_backwards = rs->dev[0].rdev.data_offset ? 0 : 1;
29248c2ecf20Sopenharmony_ci	}
29258c2ecf20Sopenharmony_ci
29268c2ecf20Sopenharmony_ci	/*
29278c2ecf20Sopenharmony_ci	 * Adjust device size for forward reshape
29288c2ecf20Sopenharmony_ci	 * because md_finish_reshape() reduces it.
29298c2ecf20Sopenharmony_ci	 */
29308c2ecf20Sopenharmony_ci	if (!mddev->reshape_backwards)
29318c2ecf20Sopenharmony_ci		rdev_for_each(rdev, &rs->md)
29328c2ecf20Sopenharmony_ci			if (!test_bit(Journal, &rdev->flags))
29338c2ecf20Sopenharmony_ci				rdev->sectors += reshape_sectors;
29348c2ecf20Sopenharmony_ci
29358c2ecf20Sopenharmony_ci	return r;
29368c2ecf20Sopenharmony_ci}
29378c2ecf20Sopenharmony_ci
29388c2ecf20Sopenharmony_ci/*
29398c2ecf20Sopenharmony_ci * If the md resync thread has updated superblock with max reshape position
29408c2ecf20Sopenharmony_ci * at the end of a reshape but not (yet) reset the layout configuration
29418c2ecf20Sopenharmony_ci * changes -> reset the latter.
29428c2ecf20Sopenharmony_ci */
29438c2ecf20Sopenharmony_cistatic void rs_reset_inconclusive_reshape(struct raid_set *rs)
29448c2ecf20Sopenharmony_ci{
29458c2ecf20Sopenharmony_ci	if (!rs_is_reshaping(rs) && rs_is_layout_change(rs, true)) {
29468c2ecf20Sopenharmony_ci		rs_set_cur(rs);
29478c2ecf20Sopenharmony_ci		rs->md.delta_disks = 0;
29488c2ecf20Sopenharmony_ci		rs->md.reshape_backwards = 0;
29498c2ecf20Sopenharmony_ci	}
29508c2ecf20Sopenharmony_ci}
29518c2ecf20Sopenharmony_ci
29528c2ecf20Sopenharmony_ci/*
29538c2ecf20Sopenharmony_ci * Enable/disable discard support on RAID set depending on
29548c2ecf20Sopenharmony_ci * RAID level and discard properties of underlying RAID members.
29558c2ecf20Sopenharmony_ci */
29568c2ecf20Sopenharmony_cistatic void configure_discard_support(struct raid_set *rs)
29578c2ecf20Sopenharmony_ci{
29588c2ecf20Sopenharmony_ci	int i;
29598c2ecf20Sopenharmony_ci	bool raid456;
29608c2ecf20Sopenharmony_ci	struct dm_target *ti = rs->ti;
29618c2ecf20Sopenharmony_ci
29628c2ecf20Sopenharmony_ci	/*
29638c2ecf20Sopenharmony_ci	 * XXX: RAID level 4,5,6 require zeroing for safety.
29648c2ecf20Sopenharmony_ci	 */
29658c2ecf20Sopenharmony_ci	raid456 = rs_is_raid456(rs);
29668c2ecf20Sopenharmony_ci
29678c2ecf20Sopenharmony_ci	for (i = 0; i < rs->raid_disks; i++) {
29688c2ecf20Sopenharmony_ci		struct request_queue *q;
29698c2ecf20Sopenharmony_ci
29708c2ecf20Sopenharmony_ci		if (!rs->dev[i].rdev.bdev)
29718c2ecf20Sopenharmony_ci			continue;
29728c2ecf20Sopenharmony_ci
29738c2ecf20Sopenharmony_ci		q = bdev_get_queue(rs->dev[i].rdev.bdev);
29748c2ecf20Sopenharmony_ci		if (!q || !blk_queue_discard(q))
29758c2ecf20Sopenharmony_ci			return;
29768c2ecf20Sopenharmony_ci
29778c2ecf20Sopenharmony_ci		if (raid456) {
29788c2ecf20Sopenharmony_ci			if (!devices_handle_discard_safely) {
29798c2ecf20Sopenharmony_ci				DMERR("raid456 discard support disabled due to discard_zeroes_data uncertainty.");
29808c2ecf20Sopenharmony_ci				DMERR("Set dm-raid.devices_handle_discard_safely=Y to override.");
29818c2ecf20Sopenharmony_ci				return;
29828c2ecf20Sopenharmony_ci			}
29838c2ecf20Sopenharmony_ci		}
29848c2ecf20Sopenharmony_ci	}
29858c2ecf20Sopenharmony_ci
29868c2ecf20Sopenharmony_ci	ti->num_discard_bios = 1;
29878c2ecf20Sopenharmony_ci}
29888c2ecf20Sopenharmony_ci
29898c2ecf20Sopenharmony_ci/*
29908c2ecf20Sopenharmony_ci * Construct a RAID0/1/10/4/5/6 mapping:
29918c2ecf20Sopenharmony_ci * Args:
29928c2ecf20Sopenharmony_ci *	<raid_type> <#raid_params> <raid_params>{0,}	\
29938c2ecf20Sopenharmony_ci *	<#raid_devs> [<meta_dev1> <dev1>]{1,}
29948c2ecf20Sopenharmony_ci *
29958c2ecf20Sopenharmony_ci * <raid_params> varies by <raid_type>.	 See 'parse_raid_params' for
29968c2ecf20Sopenharmony_ci * details on possible <raid_params>.
29978c2ecf20Sopenharmony_ci *
29988c2ecf20Sopenharmony_ci * Userspace is free to initialize the metadata devices, hence the superblocks to
29998c2ecf20Sopenharmony_ci * enforce recreation based on the passed in table parameters.
30008c2ecf20Sopenharmony_ci *
30018c2ecf20Sopenharmony_ci */
30028c2ecf20Sopenharmony_cistatic int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
30038c2ecf20Sopenharmony_ci{
30048c2ecf20Sopenharmony_ci	int r;
30058c2ecf20Sopenharmony_ci	bool resize = false;
30068c2ecf20Sopenharmony_ci	struct raid_type *rt;
30078c2ecf20Sopenharmony_ci	unsigned int num_raid_params, num_raid_devs;
30088c2ecf20Sopenharmony_ci	sector_t sb_array_sectors, rdev_sectors, reshape_sectors;
30098c2ecf20Sopenharmony_ci	struct raid_set *rs = NULL;
30108c2ecf20Sopenharmony_ci	const char *arg;
30118c2ecf20Sopenharmony_ci	struct rs_layout rs_layout;
30128c2ecf20Sopenharmony_ci	struct dm_arg_set as = { argc, argv }, as_nrd;
30138c2ecf20Sopenharmony_ci	struct dm_arg _args[] = {
30148c2ecf20Sopenharmony_ci		{ 0, as.argc, "Cannot understand number of raid parameters" },
30158c2ecf20Sopenharmony_ci		{ 1, 254, "Cannot understand number of raid devices parameters" }
30168c2ecf20Sopenharmony_ci	};
30178c2ecf20Sopenharmony_ci
30188c2ecf20Sopenharmony_ci	arg = dm_shift_arg(&as);
30198c2ecf20Sopenharmony_ci	if (!arg) {
30208c2ecf20Sopenharmony_ci		ti->error = "No arguments";
30218c2ecf20Sopenharmony_ci		return -EINVAL;
30228c2ecf20Sopenharmony_ci	}
30238c2ecf20Sopenharmony_ci
30248c2ecf20Sopenharmony_ci	rt = get_raid_type(arg);
30258c2ecf20Sopenharmony_ci	if (!rt) {
30268c2ecf20Sopenharmony_ci		ti->error = "Unrecognised raid_type";
30278c2ecf20Sopenharmony_ci		return -EINVAL;
30288c2ecf20Sopenharmony_ci	}
30298c2ecf20Sopenharmony_ci
30308c2ecf20Sopenharmony_ci	/* Must have <#raid_params> */
30318c2ecf20Sopenharmony_ci	if (dm_read_arg_group(_args, &as, &num_raid_params, &ti->error))
30328c2ecf20Sopenharmony_ci		return -EINVAL;
30338c2ecf20Sopenharmony_ci
30348c2ecf20Sopenharmony_ci	/* number of raid device tupples <meta_dev data_dev> */
30358c2ecf20Sopenharmony_ci	as_nrd = as;
30368c2ecf20Sopenharmony_ci	dm_consume_args(&as_nrd, num_raid_params);
30378c2ecf20Sopenharmony_ci	_args[1].max = (as_nrd.argc - 1) / 2;
30388c2ecf20Sopenharmony_ci	if (dm_read_arg(_args + 1, &as_nrd, &num_raid_devs, &ti->error))
30398c2ecf20Sopenharmony_ci		return -EINVAL;
30408c2ecf20Sopenharmony_ci
30418c2ecf20Sopenharmony_ci	if (!__within_range(num_raid_devs, 1, MAX_RAID_DEVICES)) {
30428c2ecf20Sopenharmony_ci		ti->error = "Invalid number of supplied raid devices";
30438c2ecf20Sopenharmony_ci		return -EINVAL;
30448c2ecf20Sopenharmony_ci	}
30458c2ecf20Sopenharmony_ci
30468c2ecf20Sopenharmony_ci	rs = raid_set_alloc(ti, rt, num_raid_devs);
30478c2ecf20Sopenharmony_ci	if (IS_ERR(rs))
30488c2ecf20Sopenharmony_ci		return PTR_ERR(rs);
30498c2ecf20Sopenharmony_ci
30508c2ecf20Sopenharmony_ci	r = parse_raid_params(rs, &as, num_raid_params);
30518c2ecf20Sopenharmony_ci	if (r)
30528c2ecf20Sopenharmony_ci		goto bad;
30538c2ecf20Sopenharmony_ci
30548c2ecf20Sopenharmony_ci	r = parse_dev_params(rs, &as);
30558c2ecf20Sopenharmony_ci	if (r)
30568c2ecf20Sopenharmony_ci		goto bad;
30578c2ecf20Sopenharmony_ci
30588c2ecf20Sopenharmony_ci	rs->md.sync_super = super_sync;
30598c2ecf20Sopenharmony_ci
30608c2ecf20Sopenharmony_ci	/*
30618c2ecf20Sopenharmony_ci	 * Calculate ctr requested array and device sizes to allow
30628c2ecf20Sopenharmony_ci	 * for superblock analysis needing device sizes defined.
30638c2ecf20Sopenharmony_ci	 *
30648c2ecf20Sopenharmony_ci	 * Any existing superblock will overwrite the array and device sizes
30658c2ecf20Sopenharmony_ci	 */
30668c2ecf20Sopenharmony_ci	r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
30678c2ecf20Sopenharmony_ci	if (r)
30688c2ecf20Sopenharmony_ci		goto bad;
30698c2ecf20Sopenharmony_ci
30708c2ecf20Sopenharmony_ci	/* Memorize just calculated, potentially larger sizes to grow the raid set in preresume */
30718c2ecf20Sopenharmony_ci	rs->array_sectors = rs->md.array_sectors;
30728c2ecf20Sopenharmony_ci	rs->dev_sectors = rs->md.dev_sectors;
30738c2ecf20Sopenharmony_ci
30748c2ecf20Sopenharmony_ci	/*
30758c2ecf20Sopenharmony_ci	 * Backup any new raid set level, layout, ...
30768c2ecf20Sopenharmony_ci	 * requested to be able to compare to superblock
30778c2ecf20Sopenharmony_ci	 * members for conversion decisions.
30788c2ecf20Sopenharmony_ci	 */
30798c2ecf20Sopenharmony_ci	rs_config_backup(rs, &rs_layout);
30808c2ecf20Sopenharmony_ci
30818c2ecf20Sopenharmony_ci	r = analyse_superblocks(ti, rs);
30828c2ecf20Sopenharmony_ci	if (r)
30838c2ecf20Sopenharmony_ci		goto bad;
30848c2ecf20Sopenharmony_ci
30858c2ecf20Sopenharmony_ci	/* All in-core metadata now as of current superblocks after calling analyse_superblocks() */
30868c2ecf20Sopenharmony_ci	sb_array_sectors = rs->md.array_sectors;
30878c2ecf20Sopenharmony_ci	rdev_sectors = __rdev_sectors(rs);
30888c2ecf20Sopenharmony_ci	if (!rdev_sectors) {
30898c2ecf20Sopenharmony_ci		ti->error = "Invalid rdev size";
30908c2ecf20Sopenharmony_ci		r = -EINVAL;
30918c2ecf20Sopenharmony_ci		goto bad;
30928c2ecf20Sopenharmony_ci	}
30938c2ecf20Sopenharmony_ci
30948c2ecf20Sopenharmony_ci
30958c2ecf20Sopenharmony_ci	reshape_sectors = _get_reshape_sectors(rs);
30968c2ecf20Sopenharmony_ci	if (rs->dev_sectors != rdev_sectors) {
30978c2ecf20Sopenharmony_ci		resize = (rs->dev_sectors != rdev_sectors - reshape_sectors);
30988c2ecf20Sopenharmony_ci		if (rs->dev_sectors > rdev_sectors - reshape_sectors)
30998c2ecf20Sopenharmony_ci			set_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
31008c2ecf20Sopenharmony_ci	}
31018c2ecf20Sopenharmony_ci
31028c2ecf20Sopenharmony_ci	INIT_WORK(&rs->md.event_work, do_table_event);
31038c2ecf20Sopenharmony_ci	ti->private = rs;
31048c2ecf20Sopenharmony_ci	ti->num_flush_bios = 1;
31058c2ecf20Sopenharmony_ci
31068c2ecf20Sopenharmony_ci	/* Restore any requested new layout for conversion decision */
31078c2ecf20Sopenharmony_ci	rs_config_restore(rs, &rs_layout);
31088c2ecf20Sopenharmony_ci
31098c2ecf20Sopenharmony_ci	/*
31108c2ecf20Sopenharmony_ci	 * Now that we have any superblock metadata available,
31118c2ecf20Sopenharmony_ci	 * check for new, recovering, reshaping, to be taken over,
31128c2ecf20Sopenharmony_ci	 * to be reshaped or an existing, unchanged raid set to
31138c2ecf20Sopenharmony_ci	 * run in sequence.
31148c2ecf20Sopenharmony_ci	 */
31158c2ecf20Sopenharmony_ci	if (test_bit(MD_ARRAY_FIRST_USE, &rs->md.flags)) {
31168c2ecf20Sopenharmony_ci		/* A new raid6 set has to be recovered to ensure proper parity and Q-Syndrome */
31178c2ecf20Sopenharmony_ci		if (rs_is_raid6(rs) &&
31188c2ecf20Sopenharmony_ci		    test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) {
31198c2ecf20Sopenharmony_ci			ti->error = "'nosync' not allowed for new raid6 set";
31208c2ecf20Sopenharmony_ci			r = -EINVAL;
31218c2ecf20Sopenharmony_ci			goto bad;
31228c2ecf20Sopenharmony_ci		}
31238c2ecf20Sopenharmony_ci		rs_setup_recovery(rs, 0);
31248c2ecf20Sopenharmony_ci		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
31258c2ecf20Sopenharmony_ci		rs_set_new(rs);
31268c2ecf20Sopenharmony_ci	} else if (rs_is_recovering(rs)) {
31278c2ecf20Sopenharmony_ci		/* A recovering raid set may be resized */
31288c2ecf20Sopenharmony_ci		goto size_check;
31298c2ecf20Sopenharmony_ci	} else if (rs_is_reshaping(rs)) {
31308c2ecf20Sopenharmony_ci		/* Have to reject size change request during reshape */
31318c2ecf20Sopenharmony_ci		if (resize) {
31328c2ecf20Sopenharmony_ci			ti->error = "Can't resize a reshaping raid set";
31338c2ecf20Sopenharmony_ci			r = -EPERM;
31348c2ecf20Sopenharmony_ci			goto bad;
31358c2ecf20Sopenharmony_ci		}
31368c2ecf20Sopenharmony_ci		/* skip setup rs */
31378c2ecf20Sopenharmony_ci	} else if (rs_takeover_requested(rs)) {
31388c2ecf20Sopenharmony_ci		if (rs_is_reshaping(rs)) {
31398c2ecf20Sopenharmony_ci			ti->error = "Can't takeover a reshaping raid set";
31408c2ecf20Sopenharmony_ci			r = -EPERM;
31418c2ecf20Sopenharmony_ci			goto bad;
31428c2ecf20Sopenharmony_ci		}
31438c2ecf20Sopenharmony_ci
31448c2ecf20Sopenharmony_ci		/* We can't takeover a journaled raid4/5/6 */
31458c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
31468c2ecf20Sopenharmony_ci			ti->error = "Can't takeover a journaled raid4/5/6 set";
31478c2ecf20Sopenharmony_ci			r = -EPERM;
31488c2ecf20Sopenharmony_ci			goto bad;
31498c2ecf20Sopenharmony_ci		}
31508c2ecf20Sopenharmony_ci
31518c2ecf20Sopenharmony_ci		/*
31528c2ecf20Sopenharmony_ci		 * If a takeover is needed, userspace sets any additional
31538c2ecf20Sopenharmony_ci		 * devices to rebuild and we can check for a valid request here.
31548c2ecf20Sopenharmony_ci		 *
31558c2ecf20Sopenharmony_ci		 * If acceptible, set the level to the new requested
31568c2ecf20Sopenharmony_ci		 * one, prohibit requesting recovery, allow the raid
31578c2ecf20Sopenharmony_ci		 * set to run and store superblocks during resume.
31588c2ecf20Sopenharmony_ci		 */
31598c2ecf20Sopenharmony_ci		r = rs_check_takeover(rs);
31608c2ecf20Sopenharmony_ci		if (r)
31618c2ecf20Sopenharmony_ci			goto bad;
31628c2ecf20Sopenharmony_ci
31638c2ecf20Sopenharmony_ci		r = rs_setup_takeover(rs);
31648c2ecf20Sopenharmony_ci		if (r)
31658c2ecf20Sopenharmony_ci			goto bad;
31668c2ecf20Sopenharmony_ci
31678c2ecf20Sopenharmony_ci		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
31688c2ecf20Sopenharmony_ci		/* Takeover ain't recovery, so disable recovery */
31698c2ecf20Sopenharmony_ci		rs_setup_recovery(rs, MaxSector);
31708c2ecf20Sopenharmony_ci		rs_set_new(rs);
31718c2ecf20Sopenharmony_ci	} else if (rs_reshape_requested(rs)) {
31728c2ecf20Sopenharmony_ci		/* Only request grow on raid set size extensions, not on reshapes. */
31738c2ecf20Sopenharmony_ci		clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
31748c2ecf20Sopenharmony_ci
31758c2ecf20Sopenharmony_ci		/*
31768c2ecf20Sopenharmony_ci		 * No need to check for 'ongoing' takeover here, because takeover
31778c2ecf20Sopenharmony_ci		 * is an instant operation as oposed to an ongoing reshape.
31788c2ecf20Sopenharmony_ci		 */
31798c2ecf20Sopenharmony_ci
31808c2ecf20Sopenharmony_ci		/* We can't reshape a journaled raid4/5/6 */
31818c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
31828c2ecf20Sopenharmony_ci			ti->error = "Can't reshape a journaled raid4/5/6 set";
31838c2ecf20Sopenharmony_ci			r = -EPERM;
31848c2ecf20Sopenharmony_ci			goto bad;
31858c2ecf20Sopenharmony_ci		}
31868c2ecf20Sopenharmony_ci
31878c2ecf20Sopenharmony_ci		/* Out-of-place space has to be available to allow for a reshape unless raid1! */
31888c2ecf20Sopenharmony_ci		if (reshape_sectors || rs_is_raid1(rs)) {
31898c2ecf20Sopenharmony_ci			/*
31908c2ecf20Sopenharmony_ci			  * We can only prepare for a reshape here, because the
31918c2ecf20Sopenharmony_ci			  * raid set needs to run to provide the repective reshape
31928c2ecf20Sopenharmony_ci			  * check functions via its MD personality instance.
31938c2ecf20Sopenharmony_ci			  *
31948c2ecf20Sopenharmony_ci			  * So do the reshape check after md_run() succeeded.
31958c2ecf20Sopenharmony_ci			  */
31968c2ecf20Sopenharmony_ci			r = rs_prepare_reshape(rs);
31978c2ecf20Sopenharmony_ci			if (r)
31988c2ecf20Sopenharmony_ci				goto bad;
31998c2ecf20Sopenharmony_ci
32008c2ecf20Sopenharmony_ci			/* Reshaping ain't recovery, so disable recovery */
32018c2ecf20Sopenharmony_ci			rs_setup_recovery(rs, MaxSector);
32028c2ecf20Sopenharmony_ci		}
32038c2ecf20Sopenharmony_ci		rs_set_cur(rs);
32048c2ecf20Sopenharmony_ci	} else {
32058c2ecf20Sopenharmony_cisize_check:
32068c2ecf20Sopenharmony_ci		/* May not set recovery when a device rebuild is requested */
32078c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
32088c2ecf20Sopenharmony_ci			clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
32098c2ecf20Sopenharmony_ci			set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
32108c2ecf20Sopenharmony_ci			rs_setup_recovery(rs, MaxSector);
32118c2ecf20Sopenharmony_ci		} else if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) {
32128c2ecf20Sopenharmony_ci			/*
32138c2ecf20Sopenharmony_ci			 * Set raid set to current size, i.e. size as of
32148c2ecf20Sopenharmony_ci			 * superblocks to grow to larger size in preresume.
32158c2ecf20Sopenharmony_ci			 */
32168c2ecf20Sopenharmony_ci			r = rs_set_dev_and_array_sectors(rs, sb_array_sectors, false);
32178c2ecf20Sopenharmony_ci			if (r)
32188c2ecf20Sopenharmony_ci				goto bad;
32198c2ecf20Sopenharmony_ci
32208c2ecf20Sopenharmony_ci			rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors);
32218c2ecf20Sopenharmony_ci		} else {
32228c2ecf20Sopenharmony_ci			/* This is no size change or it is shrinking, update size and record in superblocks */
32238c2ecf20Sopenharmony_ci			r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
32248c2ecf20Sopenharmony_ci			if (r)
32258c2ecf20Sopenharmony_ci				goto bad;
32268c2ecf20Sopenharmony_ci
32278c2ecf20Sopenharmony_ci			if (sb_array_sectors > rs->array_sectors)
32288c2ecf20Sopenharmony_ci				set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
32298c2ecf20Sopenharmony_ci		}
32308c2ecf20Sopenharmony_ci		rs_set_cur(rs);
32318c2ecf20Sopenharmony_ci	}
32328c2ecf20Sopenharmony_ci
32338c2ecf20Sopenharmony_ci	/* If constructor requested it, change data and new_data offsets */
32348c2ecf20Sopenharmony_ci	r = rs_adjust_data_offsets(rs);
32358c2ecf20Sopenharmony_ci	if (r)
32368c2ecf20Sopenharmony_ci		goto bad;
32378c2ecf20Sopenharmony_ci
32388c2ecf20Sopenharmony_ci	/* Catch any inconclusive reshape superblock content. */
32398c2ecf20Sopenharmony_ci	rs_reset_inconclusive_reshape(rs);
32408c2ecf20Sopenharmony_ci
32418c2ecf20Sopenharmony_ci	/* Start raid set read-only and assumed clean to change in raid_resume() */
32428c2ecf20Sopenharmony_ci	rs->md.ro = 1;
32438c2ecf20Sopenharmony_ci	rs->md.in_sync = 1;
32448c2ecf20Sopenharmony_ci
32458c2ecf20Sopenharmony_ci	/* Keep array frozen until resume. */
32468c2ecf20Sopenharmony_ci	set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
32478c2ecf20Sopenharmony_ci
32488c2ecf20Sopenharmony_ci	/* Has to be held on running the array */
32498c2ecf20Sopenharmony_ci	mddev_lock_nointr(&rs->md);
32508c2ecf20Sopenharmony_ci	r = md_run(&rs->md);
32518c2ecf20Sopenharmony_ci	rs->md.in_sync = 0; /* Assume already marked dirty */
32528c2ecf20Sopenharmony_ci	if (r) {
32538c2ecf20Sopenharmony_ci		ti->error = "Failed to run raid array";
32548c2ecf20Sopenharmony_ci		mddev_unlock(&rs->md);
32558c2ecf20Sopenharmony_ci		goto bad;
32568c2ecf20Sopenharmony_ci	}
32578c2ecf20Sopenharmony_ci
32588c2ecf20Sopenharmony_ci	r = md_start(&rs->md);
32598c2ecf20Sopenharmony_ci	if (r) {
32608c2ecf20Sopenharmony_ci		ti->error = "Failed to start raid array";
32618c2ecf20Sopenharmony_ci		goto bad_unlock;
32628c2ecf20Sopenharmony_ci	}
32638c2ecf20Sopenharmony_ci
32648c2ecf20Sopenharmony_ci	/* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */
32658c2ecf20Sopenharmony_ci	if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
32668c2ecf20Sopenharmony_ci		r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
32678c2ecf20Sopenharmony_ci		if (r) {
32688c2ecf20Sopenharmony_ci			ti->error = "Failed to set raid4/5/6 journal mode";
32698c2ecf20Sopenharmony_ci			goto bad_unlock;
32708c2ecf20Sopenharmony_ci		}
32718c2ecf20Sopenharmony_ci	}
32728c2ecf20Sopenharmony_ci
32738c2ecf20Sopenharmony_ci	mddev_suspend(&rs->md);
32748c2ecf20Sopenharmony_ci	set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags);
32758c2ecf20Sopenharmony_ci
32768c2ecf20Sopenharmony_ci	/* Try to adjust the raid4/5/6 stripe cache size to the stripe size */
32778c2ecf20Sopenharmony_ci	if (rs_is_raid456(rs)) {
32788c2ecf20Sopenharmony_ci		r = rs_set_raid456_stripe_cache(rs);
32798c2ecf20Sopenharmony_ci		if (r)
32808c2ecf20Sopenharmony_ci			goto bad_unlock;
32818c2ecf20Sopenharmony_ci	}
32828c2ecf20Sopenharmony_ci
32838c2ecf20Sopenharmony_ci	/* Now do an early reshape check */
32848c2ecf20Sopenharmony_ci	if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
32858c2ecf20Sopenharmony_ci		r = rs_check_reshape(rs);
32868c2ecf20Sopenharmony_ci		if (r)
32878c2ecf20Sopenharmony_ci			goto bad_unlock;
32888c2ecf20Sopenharmony_ci
32898c2ecf20Sopenharmony_ci		/* Restore new, ctr requested layout to perform check */
32908c2ecf20Sopenharmony_ci		rs_config_restore(rs, &rs_layout);
32918c2ecf20Sopenharmony_ci
32928c2ecf20Sopenharmony_ci		if (rs->md.pers->start_reshape) {
32938c2ecf20Sopenharmony_ci			r = rs->md.pers->check_reshape(&rs->md);
32948c2ecf20Sopenharmony_ci			if (r) {
32958c2ecf20Sopenharmony_ci				ti->error = "Reshape check failed";
32968c2ecf20Sopenharmony_ci				goto bad_unlock;
32978c2ecf20Sopenharmony_ci			}
32988c2ecf20Sopenharmony_ci		}
32998c2ecf20Sopenharmony_ci	}
33008c2ecf20Sopenharmony_ci
33018c2ecf20Sopenharmony_ci	/* Disable/enable discard support on raid set. */
33028c2ecf20Sopenharmony_ci	configure_discard_support(rs);
33038c2ecf20Sopenharmony_ci
33048c2ecf20Sopenharmony_ci	mddev_unlock(&rs->md);
33058c2ecf20Sopenharmony_ci	return 0;
33068c2ecf20Sopenharmony_ci
33078c2ecf20Sopenharmony_cibad_unlock:
33088c2ecf20Sopenharmony_ci	md_stop(&rs->md);
33098c2ecf20Sopenharmony_ci	mddev_unlock(&rs->md);
33108c2ecf20Sopenharmony_cibad:
33118c2ecf20Sopenharmony_ci	raid_set_free(rs);
33128c2ecf20Sopenharmony_ci
33138c2ecf20Sopenharmony_ci	return r;
33148c2ecf20Sopenharmony_ci}
33158c2ecf20Sopenharmony_ci
33168c2ecf20Sopenharmony_cistatic void raid_dtr(struct dm_target *ti)
33178c2ecf20Sopenharmony_ci{
33188c2ecf20Sopenharmony_ci	struct raid_set *rs = ti->private;
33198c2ecf20Sopenharmony_ci
33208c2ecf20Sopenharmony_ci	mddev_lock_nointr(&rs->md);
33218c2ecf20Sopenharmony_ci	md_stop(&rs->md);
33228c2ecf20Sopenharmony_ci	mddev_unlock(&rs->md);
33238c2ecf20Sopenharmony_ci	raid_set_free(rs);
33248c2ecf20Sopenharmony_ci}
33258c2ecf20Sopenharmony_ci
33268c2ecf20Sopenharmony_cistatic int raid_map(struct dm_target *ti, struct bio *bio)
33278c2ecf20Sopenharmony_ci{
33288c2ecf20Sopenharmony_ci	struct raid_set *rs = ti->private;
33298c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
33308c2ecf20Sopenharmony_ci
33318c2ecf20Sopenharmony_ci	/*
33328c2ecf20Sopenharmony_ci	 * If we're reshaping to add disk(s)), ti->len and
33338c2ecf20Sopenharmony_ci	 * mddev->array_sectors will differ during the process
33348c2ecf20Sopenharmony_ci	 * (ti->len > mddev->array_sectors), so we have to requeue
33358c2ecf20Sopenharmony_ci	 * bios with addresses > mddev->array_sectors here or
33368c2ecf20Sopenharmony_ci	 * there will occur accesses past EOD of the component
33378c2ecf20Sopenharmony_ci	 * data images thus erroring the raid set.
33388c2ecf20Sopenharmony_ci	 */
33398c2ecf20Sopenharmony_ci	if (unlikely(bio_end_sector(bio) > mddev->array_sectors))
33408c2ecf20Sopenharmony_ci		return DM_MAPIO_REQUEUE;
33418c2ecf20Sopenharmony_ci
33428c2ecf20Sopenharmony_ci	md_handle_request(mddev, bio);
33438c2ecf20Sopenharmony_ci
33448c2ecf20Sopenharmony_ci	return DM_MAPIO_SUBMITTED;
33458c2ecf20Sopenharmony_ci}
33468c2ecf20Sopenharmony_ci
33478c2ecf20Sopenharmony_ci/* Return sync state string for @state */
33488c2ecf20Sopenharmony_cienum sync_state { st_frozen, st_reshape, st_resync, st_check, st_repair, st_recover, st_idle };
33498c2ecf20Sopenharmony_cistatic const char *sync_str(enum sync_state state)
33508c2ecf20Sopenharmony_ci{
33518c2ecf20Sopenharmony_ci	/* Has to be in above sync_state order! */
33528c2ecf20Sopenharmony_ci	static const char *sync_strs[] = {
33538c2ecf20Sopenharmony_ci		"frozen",
33548c2ecf20Sopenharmony_ci		"reshape",
33558c2ecf20Sopenharmony_ci		"resync",
33568c2ecf20Sopenharmony_ci		"check",
33578c2ecf20Sopenharmony_ci		"repair",
33588c2ecf20Sopenharmony_ci		"recover",
33598c2ecf20Sopenharmony_ci		"idle"
33608c2ecf20Sopenharmony_ci	};
33618c2ecf20Sopenharmony_ci
33628c2ecf20Sopenharmony_ci	return __within_range(state, 0, ARRAY_SIZE(sync_strs) - 1) ? sync_strs[state] : "undef";
33638c2ecf20Sopenharmony_ci};
33648c2ecf20Sopenharmony_ci
33658c2ecf20Sopenharmony_ci/* Return enum sync_state for @mddev derived from @recovery flags */
33668c2ecf20Sopenharmony_cistatic enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)
33678c2ecf20Sopenharmony_ci{
33688c2ecf20Sopenharmony_ci	if (test_bit(MD_RECOVERY_FROZEN, &recovery))
33698c2ecf20Sopenharmony_ci		return st_frozen;
33708c2ecf20Sopenharmony_ci
33718c2ecf20Sopenharmony_ci	/* The MD sync thread can be done with io or be interrupted but still be running */
33728c2ecf20Sopenharmony_ci	if (!test_bit(MD_RECOVERY_DONE, &recovery) &&
33738c2ecf20Sopenharmony_ci	    (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
33748c2ecf20Sopenharmony_ci	     (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) {
33758c2ecf20Sopenharmony_ci		if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
33768c2ecf20Sopenharmony_ci			return st_reshape;
33778c2ecf20Sopenharmony_ci
33788c2ecf20Sopenharmony_ci		if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
33798c2ecf20Sopenharmony_ci			if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
33808c2ecf20Sopenharmony_ci				return st_resync;
33818c2ecf20Sopenharmony_ci			if (test_bit(MD_RECOVERY_CHECK, &recovery))
33828c2ecf20Sopenharmony_ci				return st_check;
33838c2ecf20Sopenharmony_ci			return st_repair;
33848c2ecf20Sopenharmony_ci		}
33858c2ecf20Sopenharmony_ci
33868c2ecf20Sopenharmony_ci		if (test_bit(MD_RECOVERY_RECOVER, &recovery))
33878c2ecf20Sopenharmony_ci			return st_recover;
33888c2ecf20Sopenharmony_ci
33898c2ecf20Sopenharmony_ci		if (mddev->reshape_position != MaxSector)
33908c2ecf20Sopenharmony_ci			return st_reshape;
33918c2ecf20Sopenharmony_ci	}
33928c2ecf20Sopenharmony_ci
33938c2ecf20Sopenharmony_ci	return st_idle;
33948c2ecf20Sopenharmony_ci}
33958c2ecf20Sopenharmony_ci
33968c2ecf20Sopenharmony_ci/*
33978c2ecf20Sopenharmony_ci * Return status string for @rdev
33988c2ecf20Sopenharmony_ci *
33998c2ecf20Sopenharmony_ci * Status characters:
34008c2ecf20Sopenharmony_ci *
34018c2ecf20Sopenharmony_ci *  'D' = Dead/Failed raid set component or raid4/5/6 journal device
34028c2ecf20Sopenharmony_ci *  'a' = Alive but not in-sync raid set component _or_ alive raid4/5/6 'write_back' journal device
34038c2ecf20Sopenharmony_ci *  'A' = Alive and in-sync raid set component _or_ alive raid4/5/6 'write_through' journal device
34048c2ecf20Sopenharmony_ci *  '-' = Non-existing device (i.e. uspace passed '- -' into the ctr)
34058c2ecf20Sopenharmony_ci */
34068c2ecf20Sopenharmony_cistatic const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev)
34078c2ecf20Sopenharmony_ci{
34088c2ecf20Sopenharmony_ci	if (!rdev->bdev)
34098c2ecf20Sopenharmony_ci		return "-";
34108c2ecf20Sopenharmony_ci	else if (test_bit(Faulty, &rdev->flags))
34118c2ecf20Sopenharmony_ci		return "D";
34128c2ecf20Sopenharmony_ci	else if (test_bit(Journal, &rdev->flags))
34138c2ecf20Sopenharmony_ci		return (rs->journal_dev.mode == R5C_JOURNAL_MODE_WRITE_THROUGH) ? "A" : "a";
34148c2ecf20Sopenharmony_ci	else if (test_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags) ||
34158c2ecf20Sopenharmony_ci		 (!test_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags) &&
34168c2ecf20Sopenharmony_ci		  !test_bit(In_sync, &rdev->flags)))
34178c2ecf20Sopenharmony_ci		return "a";
34188c2ecf20Sopenharmony_ci	else
34198c2ecf20Sopenharmony_ci		return "A";
34208c2ecf20Sopenharmony_ci}
34218c2ecf20Sopenharmony_ci
34228c2ecf20Sopenharmony_ci/* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */
34238c2ecf20Sopenharmony_cistatic sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
34248c2ecf20Sopenharmony_ci				enum sync_state state, sector_t resync_max_sectors)
34258c2ecf20Sopenharmony_ci{
34268c2ecf20Sopenharmony_ci	sector_t r;
34278c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
34288c2ecf20Sopenharmony_ci
34298c2ecf20Sopenharmony_ci	clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
34308c2ecf20Sopenharmony_ci	clear_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
34318c2ecf20Sopenharmony_ci
34328c2ecf20Sopenharmony_ci	if (rs_is_raid0(rs)) {
34338c2ecf20Sopenharmony_ci		r = resync_max_sectors;
34348c2ecf20Sopenharmony_ci		set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
34358c2ecf20Sopenharmony_ci
34368c2ecf20Sopenharmony_ci	} else {
34378c2ecf20Sopenharmony_ci		if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))
34388c2ecf20Sopenharmony_ci			r = mddev->recovery_cp;
34398c2ecf20Sopenharmony_ci		else
34408c2ecf20Sopenharmony_ci			r = mddev->curr_resync_completed;
34418c2ecf20Sopenharmony_ci
34428c2ecf20Sopenharmony_ci		if (state == st_idle && r >= resync_max_sectors) {
34438c2ecf20Sopenharmony_ci			/*
34448c2ecf20Sopenharmony_ci			 * Sync complete.
34458c2ecf20Sopenharmony_ci			 */
34468c2ecf20Sopenharmony_ci			/* In case we have finished recovering, the array is in sync. */
34478c2ecf20Sopenharmony_ci			if (test_bit(MD_RECOVERY_RECOVER, &recovery))
34488c2ecf20Sopenharmony_ci				set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
34498c2ecf20Sopenharmony_ci
34508c2ecf20Sopenharmony_ci		} else if (state == st_recover)
34518c2ecf20Sopenharmony_ci			/*
34528c2ecf20Sopenharmony_ci			 * In case we are recovering, the array is not in sync
34538c2ecf20Sopenharmony_ci			 * and health chars should show the recovering legs.
34548c2ecf20Sopenharmony_ci			 *
34558c2ecf20Sopenharmony_ci			 * Already retrieved recovery offset from curr_resync_completed above.
34568c2ecf20Sopenharmony_ci			 */
34578c2ecf20Sopenharmony_ci			;
34588c2ecf20Sopenharmony_ci
34598c2ecf20Sopenharmony_ci		else if (state == st_resync || state == st_reshape)
34608c2ecf20Sopenharmony_ci			/*
34618c2ecf20Sopenharmony_ci			 * If "resync/reshape" is occurring, the raid set
34628c2ecf20Sopenharmony_ci			 * is or may be out of sync hence the health
34638c2ecf20Sopenharmony_ci			 * characters shall be 'a'.
34648c2ecf20Sopenharmony_ci			 */
34658c2ecf20Sopenharmony_ci			set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
34668c2ecf20Sopenharmony_ci
34678c2ecf20Sopenharmony_ci		else if (state == st_check || state == st_repair)
34688c2ecf20Sopenharmony_ci			/*
34698c2ecf20Sopenharmony_ci			 * If "check" or "repair" is occurring, the raid set has
34708c2ecf20Sopenharmony_ci			 * undergone an initial sync and the health characters
34718c2ecf20Sopenharmony_ci			 * should not be 'a' anymore.
34728c2ecf20Sopenharmony_ci			 */
34738c2ecf20Sopenharmony_ci			set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
34748c2ecf20Sopenharmony_ci
34758c2ecf20Sopenharmony_ci		else if (test_bit(MD_RECOVERY_NEEDED, &recovery))
34768c2ecf20Sopenharmony_ci			/*
34778c2ecf20Sopenharmony_ci			 * We are idle and recovery is needed, prevent 'A' chars race
34788c2ecf20Sopenharmony_ci			 * caused by components still set to in-sync by constructor.
34798c2ecf20Sopenharmony_ci			 */
34808c2ecf20Sopenharmony_ci			set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
34818c2ecf20Sopenharmony_ci
34828c2ecf20Sopenharmony_ci		else {
34838c2ecf20Sopenharmony_ci			/*
34848c2ecf20Sopenharmony_ci			 * We are idle and the raid set may be doing an initial
34858c2ecf20Sopenharmony_ci			 * sync, or it may be rebuilding individual components.
34868c2ecf20Sopenharmony_ci			 * If all the devices are In_sync, then it is the raid set
34878c2ecf20Sopenharmony_ci			 * that is being initialized.
34888c2ecf20Sopenharmony_ci			 */
34898c2ecf20Sopenharmony_ci			struct md_rdev *rdev;
34908c2ecf20Sopenharmony_ci
34918c2ecf20Sopenharmony_ci			set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
34928c2ecf20Sopenharmony_ci			rdev_for_each(rdev, mddev)
34938c2ecf20Sopenharmony_ci				if (!test_bit(Journal, &rdev->flags) &&
34948c2ecf20Sopenharmony_ci				    !test_bit(In_sync, &rdev->flags)) {
34958c2ecf20Sopenharmony_ci					clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
34968c2ecf20Sopenharmony_ci					break;
34978c2ecf20Sopenharmony_ci				}
34988c2ecf20Sopenharmony_ci		}
34998c2ecf20Sopenharmony_ci	}
35008c2ecf20Sopenharmony_ci
35018c2ecf20Sopenharmony_ci	return min(r, resync_max_sectors);
35028c2ecf20Sopenharmony_ci}
35038c2ecf20Sopenharmony_ci
35048c2ecf20Sopenharmony_ci/* Helper to return @dev name or "-" if !@dev */
35058c2ecf20Sopenharmony_cistatic const char *__get_dev_name(struct dm_dev *dev)
35068c2ecf20Sopenharmony_ci{
35078c2ecf20Sopenharmony_ci	return dev ? dev->name : "-";
35088c2ecf20Sopenharmony_ci}
35098c2ecf20Sopenharmony_ci
35108c2ecf20Sopenharmony_cistatic void raid_status(struct dm_target *ti, status_type_t type,
35118c2ecf20Sopenharmony_ci			unsigned int status_flags, char *result, unsigned int maxlen)
35128c2ecf20Sopenharmony_ci{
35138c2ecf20Sopenharmony_ci	struct raid_set *rs = ti->private;
35148c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
35158c2ecf20Sopenharmony_ci	struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL;
35168c2ecf20Sopenharmony_ci	int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0;
35178c2ecf20Sopenharmony_ci	unsigned long recovery;
35188c2ecf20Sopenharmony_ci	unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
35198c2ecf20Sopenharmony_ci	unsigned int sz = 0;
35208c2ecf20Sopenharmony_ci	unsigned int rebuild_writemostly_count = 0;
35218c2ecf20Sopenharmony_ci	sector_t progress, resync_max_sectors, resync_mismatches;
35228c2ecf20Sopenharmony_ci	enum sync_state state;
35238c2ecf20Sopenharmony_ci	struct raid_type *rt;
35248c2ecf20Sopenharmony_ci
35258c2ecf20Sopenharmony_ci	switch (type) {
35268c2ecf20Sopenharmony_ci	case STATUSTYPE_INFO:
35278c2ecf20Sopenharmony_ci		/* *Should* always succeed */
35288c2ecf20Sopenharmony_ci		rt = get_raid_type_by_ll(mddev->new_level, mddev->new_layout);
35298c2ecf20Sopenharmony_ci		if (!rt)
35308c2ecf20Sopenharmony_ci			return;
35318c2ecf20Sopenharmony_ci
35328c2ecf20Sopenharmony_ci		DMEMIT("%s %d ", rt->name, mddev->raid_disks);
35338c2ecf20Sopenharmony_ci
35348c2ecf20Sopenharmony_ci		/* Access most recent mddev properties for status output */
35358c2ecf20Sopenharmony_ci		smp_rmb();
35368c2ecf20Sopenharmony_ci		/* Get sensible max sectors even if raid set not yet started */
35378c2ecf20Sopenharmony_ci		resync_max_sectors = test_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags) ?
35388c2ecf20Sopenharmony_ci				      mddev->resync_max_sectors : mddev->dev_sectors;
35398c2ecf20Sopenharmony_ci		recovery = rs->md.recovery;
35408c2ecf20Sopenharmony_ci		state = decipher_sync_action(mddev, recovery);
35418c2ecf20Sopenharmony_ci		progress = rs_get_progress(rs, recovery, state, resync_max_sectors);
35428c2ecf20Sopenharmony_ci		resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
35438c2ecf20Sopenharmony_ci				    atomic64_read(&mddev->resync_mismatches) : 0;
35448c2ecf20Sopenharmony_ci
35458c2ecf20Sopenharmony_ci		/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
35468c2ecf20Sopenharmony_ci		for (i = 0; i < rs->raid_disks; i++)
35478c2ecf20Sopenharmony_ci			DMEMIT(__raid_dev_status(rs, &rs->dev[i].rdev));
35488c2ecf20Sopenharmony_ci
35498c2ecf20Sopenharmony_ci		/*
35508c2ecf20Sopenharmony_ci		 * In-sync/Reshape ratio:
35518c2ecf20Sopenharmony_ci		 *  The in-sync ratio shows the progress of:
35528c2ecf20Sopenharmony_ci		 *   - Initializing the raid set
35538c2ecf20Sopenharmony_ci		 *   - Rebuilding a subset of devices of the raid set
35548c2ecf20Sopenharmony_ci		 *  The user can distinguish between the two by referring
35558c2ecf20Sopenharmony_ci		 *  to the status characters.
35568c2ecf20Sopenharmony_ci		 *
35578c2ecf20Sopenharmony_ci		 *  The reshape ratio shows the progress of
35588c2ecf20Sopenharmony_ci		 *  changing the raid layout or the number of
35598c2ecf20Sopenharmony_ci		 *  disks of a raid set
35608c2ecf20Sopenharmony_ci		 */
35618c2ecf20Sopenharmony_ci		DMEMIT(" %llu/%llu", (unsigned long long) progress,
35628c2ecf20Sopenharmony_ci				     (unsigned long long) resync_max_sectors);
35638c2ecf20Sopenharmony_ci
35648c2ecf20Sopenharmony_ci		/*
35658c2ecf20Sopenharmony_ci		 * v1.5.0+:
35668c2ecf20Sopenharmony_ci		 *
35678c2ecf20Sopenharmony_ci		 * Sync action:
35688c2ecf20Sopenharmony_ci		 *   See Documentation/admin-guide/device-mapper/dm-raid.rst for
35698c2ecf20Sopenharmony_ci		 *   information on each of these states.
35708c2ecf20Sopenharmony_ci		 */
35718c2ecf20Sopenharmony_ci		DMEMIT(" %s", sync_str(state));
35728c2ecf20Sopenharmony_ci
35738c2ecf20Sopenharmony_ci		/*
35748c2ecf20Sopenharmony_ci		 * v1.5.0+:
35758c2ecf20Sopenharmony_ci		 *
35768c2ecf20Sopenharmony_ci		 * resync_mismatches/mismatch_cnt
35778c2ecf20Sopenharmony_ci		 *   This field shows the number of discrepancies found when
35788c2ecf20Sopenharmony_ci		 *   performing a "check" of the raid set.
35798c2ecf20Sopenharmony_ci		 */
35808c2ecf20Sopenharmony_ci		DMEMIT(" %llu", (unsigned long long) resync_mismatches);
35818c2ecf20Sopenharmony_ci
35828c2ecf20Sopenharmony_ci		/*
35838c2ecf20Sopenharmony_ci		 * v1.9.0+:
35848c2ecf20Sopenharmony_ci		 *
35858c2ecf20Sopenharmony_ci		 * data_offset (needed for out of space reshaping)
35868c2ecf20Sopenharmony_ci		 *   This field shows the data offset into the data
35878c2ecf20Sopenharmony_ci		 *   image LV where the first stripes data starts.
35888c2ecf20Sopenharmony_ci		 *
35898c2ecf20Sopenharmony_ci		 * We keep data_offset equal on all raid disks of the set,
35908c2ecf20Sopenharmony_ci		 * so retrieving it from the first raid disk is sufficient.
35918c2ecf20Sopenharmony_ci		 */
35928c2ecf20Sopenharmony_ci		DMEMIT(" %llu", (unsigned long long) rs->dev[0].rdev.data_offset);
35938c2ecf20Sopenharmony_ci
35948c2ecf20Sopenharmony_ci		/*
35958c2ecf20Sopenharmony_ci		 * v1.10.0+:
35968c2ecf20Sopenharmony_ci		 */
35978c2ecf20Sopenharmony_ci		DMEMIT(" %s", test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ?
35988c2ecf20Sopenharmony_ci			      __raid_dev_status(rs, &rs->journal_dev.rdev) : "-");
35998c2ecf20Sopenharmony_ci		break;
36008c2ecf20Sopenharmony_ci
36018c2ecf20Sopenharmony_ci	case STATUSTYPE_TABLE:
36028c2ecf20Sopenharmony_ci		/* Report the table line string you would use to construct this raid set */
36038c2ecf20Sopenharmony_ci
36048c2ecf20Sopenharmony_ci		/*
36058c2ecf20Sopenharmony_ci		 * Count any rebuild or writemostly argument pairs and subtract the
36068c2ecf20Sopenharmony_ci		 * hweight count being added below of any rebuild and writemostly ctr flags.
36078c2ecf20Sopenharmony_ci		 */
36088c2ecf20Sopenharmony_ci		for (i = 0; i < rs->raid_disks; i++) {
36098c2ecf20Sopenharmony_ci			rebuild_writemostly_count += (test_bit(i, (void *) rs->rebuild_disks) ? 2 : 0) +
36108c2ecf20Sopenharmony_ci						     (test_bit(WriteMostly, &rs->dev[i].rdev.flags) ? 2 : 0);
36118c2ecf20Sopenharmony_ci		}
36128c2ecf20Sopenharmony_ci		rebuild_writemostly_count -= (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) ? 2 : 0) +
36138c2ecf20Sopenharmony_ci					     (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags) ? 2 : 0);
36148c2ecf20Sopenharmony_ci		/* Calculate raid parameter count based on ^ rebuild/writemostly argument counts and ctr flags set. */
36158c2ecf20Sopenharmony_ci		raid_param_cnt += rebuild_writemostly_count +
36168c2ecf20Sopenharmony_ci				  hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) +
36178c2ecf20Sopenharmony_ci				  hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2;
36188c2ecf20Sopenharmony_ci		/* Emit table line */
36198c2ecf20Sopenharmony_ci		/* This has to be in the documented order for userspace! */
36208c2ecf20Sopenharmony_ci		DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors);
36218c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
36228c2ecf20Sopenharmony_ci			DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_SYNC));
36238c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
36248c2ecf20Sopenharmony_ci			DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC));
36258c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags))
36268c2ecf20Sopenharmony_ci			for (i = 0; i < rs->raid_disks; i++)
36278c2ecf20Sopenharmony_ci				if (test_bit(i, (void *) rs->rebuild_disks))
36288c2ecf20Sopenharmony_ci					DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD), i);
36298c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags))
36308c2ecf20Sopenharmony_ci			DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP),
36318c2ecf20Sopenharmony_ci					  mddev->bitmap_info.daemon_sleep);
36328c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags))
36338c2ecf20Sopenharmony_ci			DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE),
36348c2ecf20Sopenharmony_ci					 mddev->sync_speed_min);
36358c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags))
36368c2ecf20Sopenharmony_ci			DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE),
36378c2ecf20Sopenharmony_ci					 mddev->sync_speed_max);
36388c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags))
36398c2ecf20Sopenharmony_ci			for (i = 0; i < rs->raid_disks; i++)
36408c2ecf20Sopenharmony_ci				if (test_bit(WriteMostly, &rs->dev[i].rdev.flags))
36418c2ecf20Sopenharmony_ci					DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_WRITE_MOSTLY),
36428c2ecf20Sopenharmony_ci					       rs->dev[i].rdev.raid_disk);
36438c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_MAX_WRITE_BEHIND, &rs->ctr_flags))
36448c2ecf20Sopenharmony_ci			DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_WRITE_BEHIND),
36458c2ecf20Sopenharmony_ci					  mddev->bitmap_info.max_write_behind);
36468c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags))
36478c2ecf20Sopenharmony_ci			DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_STRIPE_CACHE),
36488c2ecf20Sopenharmony_ci					 max_nr_stripes);
36498c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags))
36508c2ecf20Sopenharmony_ci			DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE),
36518c2ecf20Sopenharmony_ci					   (unsigned long long) to_sector(mddev->bitmap_info.chunksize));
36528c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_RAID10_COPIES, &rs->ctr_flags))
36538c2ecf20Sopenharmony_ci			DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_COPIES),
36548c2ecf20Sopenharmony_ci					 raid10_md_layout_to_copies(mddev->layout));
36558c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags))
36568c2ecf20Sopenharmony_ci			DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT),
36578c2ecf20Sopenharmony_ci					 raid10_md_layout_to_format(mddev->layout));
36588c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags))
36598c2ecf20Sopenharmony_ci			DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_DELTA_DISKS),
36608c2ecf20Sopenharmony_ci					 max(rs->delta_disks, mddev->delta_disks));
36618c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags))
36628c2ecf20Sopenharmony_ci			DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET),
36638c2ecf20Sopenharmony_ci					   (unsigned long long) rs->data_offset);
36648c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags))
36658c2ecf20Sopenharmony_ci			DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV),
36668c2ecf20Sopenharmony_ci					__get_dev_name(rs->journal_dev.dev));
36678c2ecf20Sopenharmony_ci		if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags))
36688c2ecf20Sopenharmony_ci			DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_MODE),
36698c2ecf20Sopenharmony_ci					 md_journal_mode_to_dm_raid(rs->journal_dev.mode));
36708c2ecf20Sopenharmony_ci		DMEMIT(" %d", rs->raid_disks);
36718c2ecf20Sopenharmony_ci		for (i = 0; i < rs->raid_disks; i++)
36728c2ecf20Sopenharmony_ci			DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev),
36738c2ecf20Sopenharmony_ci					 __get_dev_name(rs->dev[i].data_dev));
36748c2ecf20Sopenharmony_ci	}
36758c2ecf20Sopenharmony_ci}
36768c2ecf20Sopenharmony_ci
36778c2ecf20Sopenharmony_cistatic int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
36788c2ecf20Sopenharmony_ci			char *result, unsigned maxlen)
36798c2ecf20Sopenharmony_ci{
36808c2ecf20Sopenharmony_ci	struct raid_set *rs = ti->private;
36818c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
36828c2ecf20Sopenharmony_ci
36838c2ecf20Sopenharmony_ci	if (!mddev->pers || !mddev->pers->sync_request)
36848c2ecf20Sopenharmony_ci		return -EINVAL;
36858c2ecf20Sopenharmony_ci
36868c2ecf20Sopenharmony_ci	if (!strcasecmp(argv[0], "frozen"))
36878c2ecf20Sopenharmony_ci		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
36888c2ecf20Sopenharmony_ci	else
36898c2ecf20Sopenharmony_ci		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
36908c2ecf20Sopenharmony_ci
36918c2ecf20Sopenharmony_ci	if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
36928c2ecf20Sopenharmony_ci		if (mddev->sync_thread) {
36938c2ecf20Sopenharmony_ci			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
36948c2ecf20Sopenharmony_ci			md_reap_sync_thread(mddev);
36958c2ecf20Sopenharmony_ci		}
36968c2ecf20Sopenharmony_ci	} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
36978c2ecf20Sopenharmony_ci		return -EBUSY;
36988c2ecf20Sopenharmony_ci	else if (!strcasecmp(argv[0], "resync"))
36998c2ecf20Sopenharmony_ci		; /* MD_RECOVERY_NEEDED set below */
37008c2ecf20Sopenharmony_ci	else if (!strcasecmp(argv[0], "recover"))
37018c2ecf20Sopenharmony_ci		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
37028c2ecf20Sopenharmony_ci	else {
37038c2ecf20Sopenharmony_ci		if (!strcasecmp(argv[0], "check")) {
37048c2ecf20Sopenharmony_ci			set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
37058c2ecf20Sopenharmony_ci			set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
37068c2ecf20Sopenharmony_ci			set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
37078c2ecf20Sopenharmony_ci		} else if (!strcasecmp(argv[0], "repair")) {
37088c2ecf20Sopenharmony_ci			set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
37098c2ecf20Sopenharmony_ci			set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
37108c2ecf20Sopenharmony_ci		} else
37118c2ecf20Sopenharmony_ci			return -EINVAL;
37128c2ecf20Sopenharmony_ci	}
37138c2ecf20Sopenharmony_ci	if (mddev->ro == 2) {
37148c2ecf20Sopenharmony_ci		/* A write to sync_action is enough to justify
37158c2ecf20Sopenharmony_ci		 * canceling read-auto mode
37168c2ecf20Sopenharmony_ci		 */
37178c2ecf20Sopenharmony_ci		mddev->ro = 0;
37188c2ecf20Sopenharmony_ci		if (!mddev->suspended && mddev->sync_thread)
37198c2ecf20Sopenharmony_ci			md_wakeup_thread(mddev->sync_thread);
37208c2ecf20Sopenharmony_ci	}
37218c2ecf20Sopenharmony_ci	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
37228c2ecf20Sopenharmony_ci	if (!mddev->suspended && mddev->thread)
37238c2ecf20Sopenharmony_ci		md_wakeup_thread(mddev->thread);
37248c2ecf20Sopenharmony_ci
37258c2ecf20Sopenharmony_ci	return 0;
37268c2ecf20Sopenharmony_ci}
37278c2ecf20Sopenharmony_ci
37288c2ecf20Sopenharmony_cistatic int raid_iterate_devices(struct dm_target *ti,
37298c2ecf20Sopenharmony_ci				iterate_devices_callout_fn fn, void *data)
37308c2ecf20Sopenharmony_ci{
37318c2ecf20Sopenharmony_ci	struct raid_set *rs = ti->private;
37328c2ecf20Sopenharmony_ci	unsigned int i;
37338c2ecf20Sopenharmony_ci	int r = 0;
37348c2ecf20Sopenharmony_ci
37358c2ecf20Sopenharmony_ci	for (i = 0; !r && i < rs->raid_disks; i++) {
37368c2ecf20Sopenharmony_ci		if (rs->dev[i].data_dev) {
37378c2ecf20Sopenharmony_ci			r = fn(ti, rs->dev[i].data_dev,
37388c2ecf20Sopenharmony_ci			       0, /* No offset on data devs */
37398c2ecf20Sopenharmony_ci			       rs->md.dev_sectors, data);
37408c2ecf20Sopenharmony_ci		}
37418c2ecf20Sopenharmony_ci	}
37428c2ecf20Sopenharmony_ci
37438c2ecf20Sopenharmony_ci	return r;
37448c2ecf20Sopenharmony_ci}
37458c2ecf20Sopenharmony_ci
37468c2ecf20Sopenharmony_cistatic void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
37478c2ecf20Sopenharmony_ci{
37488c2ecf20Sopenharmony_ci	struct raid_set *rs = ti->private;
37498c2ecf20Sopenharmony_ci	unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors);
37508c2ecf20Sopenharmony_ci
37518c2ecf20Sopenharmony_ci	blk_limits_io_min(limits, chunk_size_bytes);
37528c2ecf20Sopenharmony_ci	blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
37538c2ecf20Sopenharmony_ci
37548c2ecf20Sopenharmony_ci	/*
37558c2ecf20Sopenharmony_ci	 * RAID0 and RAID10 personalities require bio splitting,
37568c2ecf20Sopenharmony_ci	 * RAID1/4/5/6 don't and process large discard bios properly.
37578c2ecf20Sopenharmony_ci	 */
37588c2ecf20Sopenharmony_ci	if (rs_is_raid0(rs) || rs_is_raid10(rs)) {
37598c2ecf20Sopenharmony_ci		limits->discard_granularity = chunk_size_bytes;
37608c2ecf20Sopenharmony_ci		limits->max_discard_sectors = rs->md.chunk_sectors;
37618c2ecf20Sopenharmony_ci	}
37628c2ecf20Sopenharmony_ci}
37638c2ecf20Sopenharmony_ci
37648c2ecf20Sopenharmony_cistatic void raid_postsuspend(struct dm_target *ti)
37658c2ecf20Sopenharmony_ci{
37668c2ecf20Sopenharmony_ci	struct raid_set *rs = ti->private;
37678c2ecf20Sopenharmony_ci
37688c2ecf20Sopenharmony_ci	if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
37698c2ecf20Sopenharmony_ci		/* Writes have to be stopped before suspending to avoid deadlocks. */
37708c2ecf20Sopenharmony_ci		if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
37718c2ecf20Sopenharmony_ci			md_stop_writes(&rs->md);
37728c2ecf20Sopenharmony_ci
37738c2ecf20Sopenharmony_ci		mddev_lock_nointr(&rs->md);
37748c2ecf20Sopenharmony_ci		mddev_suspend(&rs->md);
37758c2ecf20Sopenharmony_ci		mddev_unlock(&rs->md);
37768c2ecf20Sopenharmony_ci	}
37778c2ecf20Sopenharmony_ci}
37788c2ecf20Sopenharmony_ci
37798c2ecf20Sopenharmony_cistatic void attempt_restore_of_faulty_devices(struct raid_set *rs)
37808c2ecf20Sopenharmony_ci{
37818c2ecf20Sopenharmony_ci	int i;
37828c2ecf20Sopenharmony_ci	uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS];
37838c2ecf20Sopenharmony_ci	unsigned long flags;
37848c2ecf20Sopenharmony_ci	bool cleared = false;
37858c2ecf20Sopenharmony_ci	struct dm_raid_superblock *sb;
37868c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
37878c2ecf20Sopenharmony_ci	struct md_rdev *r;
37888c2ecf20Sopenharmony_ci
37898c2ecf20Sopenharmony_ci	/* RAID personalities have to provide hot add/remove methods or we need to bail out. */
37908c2ecf20Sopenharmony_ci	if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk)
37918c2ecf20Sopenharmony_ci		return;
37928c2ecf20Sopenharmony_ci
37938c2ecf20Sopenharmony_ci	memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
37948c2ecf20Sopenharmony_ci
37958c2ecf20Sopenharmony_ci	for (i = 0; i < rs->raid_disks; i++) {
37968c2ecf20Sopenharmony_ci		r = &rs->dev[i].rdev;
37978c2ecf20Sopenharmony_ci		/* HM FIXME: enhance journal device recovery processing */
37988c2ecf20Sopenharmony_ci		if (test_bit(Journal, &r->flags))
37998c2ecf20Sopenharmony_ci			continue;
38008c2ecf20Sopenharmony_ci
38018c2ecf20Sopenharmony_ci		if (test_bit(Faulty, &r->flags) &&
38028c2ecf20Sopenharmony_ci		    r->meta_bdev && !read_disk_sb(r, r->sb_size, true)) {
38038c2ecf20Sopenharmony_ci			DMINFO("Faulty %s device #%d has readable super block."
38048c2ecf20Sopenharmony_ci			       "  Attempting to revive it.",
38058c2ecf20Sopenharmony_ci			       rs->raid_type->name, i);
38068c2ecf20Sopenharmony_ci
38078c2ecf20Sopenharmony_ci			/*
38088c2ecf20Sopenharmony_ci			 * Faulty bit may be set, but sometimes the array can
38098c2ecf20Sopenharmony_ci			 * be suspended before the personalities can respond
38108c2ecf20Sopenharmony_ci			 * by removing the device from the array (i.e. calling
38118c2ecf20Sopenharmony_ci			 * 'hot_remove_disk').	If they haven't yet removed
38128c2ecf20Sopenharmony_ci			 * the failed device, its 'raid_disk' number will be
38138c2ecf20Sopenharmony_ci			 * '>= 0' - meaning we must call this function
38148c2ecf20Sopenharmony_ci			 * ourselves.
38158c2ecf20Sopenharmony_ci			 */
38168c2ecf20Sopenharmony_ci			flags = r->flags;
38178c2ecf20Sopenharmony_ci			clear_bit(In_sync, &r->flags); /* Mandatory for hot remove. */
38188c2ecf20Sopenharmony_ci			if (r->raid_disk >= 0) {
38198c2ecf20Sopenharmony_ci				if (mddev->pers->hot_remove_disk(mddev, r)) {
38208c2ecf20Sopenharmony_ci					/* Failed to revive this device, try next */
38218c2ecf20Sopenharmony_ci					r->flags = flags;
38228c2ecf20Sopenharmony_ci					continue;
38238c2ecf20Sopenharmony_ci				}
38248c2ecf20Sopenharmony_ci			} else
38258c2ecf20Sopenharmony_ci				r->raid_disk = r->saved_raid_disk = i;
38268c2ecf20Sopenharmony_ci
38278c2ecf20Sopenharmony_ci			clear_bit(Faulty, &r->flags);
38288c2ecf20Sopenharmony_ci			clear_bit(WriteErrorSeen, &r->flags);
38298c2ecf20Sopenharmony_ci
38308c2ecf20Sopenharmony_ci			if (mddev->pers->hot_add_disk(mddev, r)) {
38318c2ecf20Sopenharmony_ci				/* Failed to revive this device, try next */
38328c2ecf20Sopenharmony_ci				r->raid_disk = r->saved_raid_disk = -1;
38338c2ecf20Sopenharmony_ci				r->flags = flags;
38348c2ecf20Sopenharmony_ci			} else {
38358c2ecf20Sopenharmony_ci				clear_bit(In_sync, &r->flags);
38368c2ecf20Sopenharmony_ci				r->recovery_offset = 0;
38378c2ecf20Sopenharmony_ci				set_bit(i, (void *) cleared_failed_devices);
38388c2ecf20Sopenharmony_ci				cleared = true;
38398c2ecf20Sopenharmony_ci			}
38408c2ecf20Sopenharmony_ci		}
38418c2ecf20Sopenharmony_ci	}
38428c2ecf20Sopenharmony_ci
38438c2ecf20Sopenharmony_ci	/* If any failed devices could be cleared, update all sbs failed_devices bits */
38448c2ecf20Sopenharmony_ci	if (cleared) {
38458c2ecf20Sopenharmony_ci		uint64_t failed_devices[DISKS_ARRAY_ELEMS];
38468c2ecf20Sopenharmony_ci
38478c2ecf20Sopenharmony_ci		rdev_for_each(r, &rs->md) {
38488c2ecf20Sopenharmony_ci			if (test_bit(Journal, &r->flags))
38498c2ecf20Sopenharmony_ci				continue;
38508c2ecf20Sopenharmony_ci
38518c2ecf20Sopenharmony_ci			sb = page_address(r->sb_page);
38528c2ecf20Sopenharmony_ci			sb_retrieve_failed_devices(sb, failed_devices);
38538c2ecf20Sopenharmony_ci
38548c2ecf20Sopenharmony_ci			for (i = 0; i < DISKS_ARRAY_ELEMS; i++)
38558c2ecf20Sopenharmony_ci				failed_devices[i] &= ~cleared_failed_devices[i];
38568c2ecf20Sopenharmony_ci
38578c2ecf20Sopenharmony_ci			sb_update_failed_devices(sb, failed_devices);
38588c2ecf20Sopenharmony_ci		}
38598c2ecf20Sopenharmony_ci	}
38608c2ecf20Sopenharmony_ci}
38618c2ecf20Sopenharmony_ci
38628c2ecf20Sopenharmony_cistatic int __load_dirty_region_bitmap(struct raid_set *rs)
38638c2ecf20Sopenharmony_ci{
38648c2ecf20Sopenharmony_ci	int r = 0;
38658c2ecf20Sopenharmony_ci
38668c2ecf20Sopenharmony_ci	/* Try loading the bitmap unless "raid0", which does not have one */
38678c2ecf20Sopenharmony_ci	if (!rs_is_raid0(rs) &&
38688c2ecf20Sopenharmony_ci	    !test_and_set_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags)) {
38698c2ecf20Sopenharmony_ci		r = md_bitmap_load(&rs->md);
38708c2ecf20Sopenharmony_ci		if (r)
38718c2ecf20Sopenharmony_ci			DMERR("Failed to load bitmap");
38728c2ecf20Sopenharmony_ci	}
38738c2ecf20Sopenharmony_ci
38748c2ecf20Sopenharmony_ci	return r;
38758c2ecf20Sopenharmony_ci}
38768c2ecf20Sopenharmony_ci
38778c2ecf20Sopenharmony_ci/* Enforce updating all superblocks */
38788c2ecf20Sopenharmony_cistatic void rs_update_sbs(struct raid_set *rs)
38798c2ecf20Sopenharmony_ci{
38808c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
38818c2ecf20Sopenharmony_ci	int ro = mddev->ro;
38828c2ecf20Sopenharmony_ci
38838c2ecf20Sopenharmony_ci	set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
38848c2ecf20Sopenharmony_ci	mddev->ro = 0;
38858c2ecf20Sopenharmony_ci	md_update_sb(mddev, 1);
38868c2ecf20Sopenharmony_ci	mddev->ro = ro;
38878c2ecf20Sopenharmony_ci}
38888c2ecf20Sopenharmony_ci
38898c2ecf20Sopenharmony_ci/*
38908c2ecf20Sopenharmony_ci * Reshape changes raid algorithm of @rs to new one within personality
38918c2ecf20Sopenharmony_ci * (e.g. raid6_zr -> raid6_nc), changes stripe size, adds/removes
38928c2ecf20Sopenharmony_ci * disks from a raid set thus growing/shrinking it or resizes the set
38938c2ecf20Sopenharmony_ci *
38948c2ecf20Sopenharmony_ci * Call mddev_lock_nointr() before!
38958c2ecf20Sopenharmony_ci */
38968c2ecf20Sopenharmony_cistatic int rs_start_reshape(struct raid_set *rs)
38978c2ecf20Sopenharmony_ci{
38988c2ecf20Sopenharmony_ci	int r;
38998c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
39008c2ecf20Sopenharmony_ci	struct md_personality *pers = mddev->pers;
39018c2ecf20Sopenharmony_ci
39028c2ecf20Sopenharmony_ci	/* Don't allow the sync thread to work until the table gets reloaded. */
39038c2ecf20Sopenharmony_ci	set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
39048c2ecf20Sopenharmony_ci
39058c2ecf20Sopenharmony_ci	r = rs_setup_reshape(rs);
39068c2ecf20Sopenharmony_ci	if (r)
39078c2ecf20Sopenharmony_ci		return r;
39088c2ecf20Sopenharmony_ci
39098c2ecf20Sopenharmony_ci	/*
39108c2ecf20Sopenharmony_ci	 * Check any reshape constraints enforced by the personalility
39118c2ecf20Sopenharmony_ci	 *
39128c2ecf20Sopenharmony_ci	 * May as well already kick the reshape off so that * pers->start_reshape() becomes optional.
39138c2ecf20Sopenharmony_ci	 */
39148c2ecf20Sopenharmony_ci	r = pers->check_reshape(mddev);
39158c2ecf20Sopenharmony_ci	if (r) {
39168c2ecf20Sopenharmony_ci		rs->ti->error = "pers->check_reshape() failed";
39178c2ecf20Sopenharmony_ci		return r;
39188c2ecf20Sopenharmony_ci	}
39198c2ecf20Sopenharmony_ci
39208c2ecf20Sopenharmony_ci	/*
39218c2ecf20Sopenharmony_ci	 * Personality may not provide start reshape method in which
39228c2ecf20Sopenharmony_ci	 * case check_reshape above has already covered everything
39238c2ecf20Sopenharmony_ci	 */
39248c2ecf20Sopenharmony_ci	if (pers->start_reshape) {
39258c2ecf20Sopenharmony_ci		r = pers->start_reshape(mddev);
39268c2ecf20Sopenharmony_ci		if (r) {
39278c2ecf20Sopenharmony_ci			rs->ti->error = "pers->start_reshape() failed";
39288c2ecf20Sopenharmony_ci			return r;
39298c2ecf20Sopenharmony_ci		}
39308c2ecf20Sopenharmony_ci	}
39318c2ecf20Sopenharmony_ci
39328c2ecf20Sopenharmony_ci	/*
39338c2ecf20Sopenharmony_ci	 * Now reshape got set up, update superblocks to
39348c2ecf20Sopenharmony_ci	 * reflect the fact so that a table reload will
39358c2ecf20Sopenharmony_ci	 * access proper superblock content in the ctr.
39368c2ecf20Sopenharmony_ci	 */
39378c2ecf20Sopenharmony_ci	rs_update_sbs(rs);
39388c2ecf20Sopenharmony_ci
39398c2ecf20Sopenharmony_ci	return 0;
39408c2ecf20Sopenharmony_ci}
39418c2ecf20Sopenharmony_ci
39428c2ecf20Sopenharmony_cistatic int raid_preresume(struct dm_target *ti)
39438c2ecf20Sopenharmony_ci{
39448c2ecf20Sopenharmony_ci	int r;
39458c2ecf20Sopenharmony_ci	struct raid_set *rs = ti->private;
39468c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
39478c2ecf20Sopenharmony_ci
39488c2ecf20Sopenharmony_ci	/* This is a resume after a suspend of the set -> it's already started. */
39498c2ecf20Sopenharmony_ci	if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags))
39508c2ecf20Sopenharmony_ci		return 0;
39518c2ecf20Sopenharmony_ci
39528c2ecf20Sopenharmony_ci	/*
39538c2ecf20Sopenharmony_ci	 * The superblocks need to be updated on disk if the
39548c2ecf20Sopenharmony_ci	 * array is new or new devices got added (thus zeroed
39558c2ecf20Sopenharmony_ci	 * out by userspace) or __load_dirty_region_bitmap
39568c2ecf20Sopenharmony_ci	 * will overwrite them in core with old data or fail.
39578c2ecf20Sopenharmony_ci	 */
39588c2ecf20Sopenharmony_ci	if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags))
39598c2ecf20Sopenharmony_ci		rs_update_sbs(rs);
39608c2ecf20Sopenharmony_ci
39618c2ecf20Sopenharmony_ci	/* Load the bitmap from disk unless raid0 */
39628c2ecf20Sopenharmony_ci	r = __load_dirty_region_bitmap(rs);
39638c2ecf20Sopenharmony_ci	if (r)
39648c2ecf20Sopenharmony_ci		return r;
39658c2ecf20Sopenharmony_ci
39668c2ecf20Sopenharmony_ci	/* We are extending the raid set size, adjust mddev/md_rdev sizes and set capacity. */
39678c2ecf20Sopenharmony_ci	if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) {
39688c2ecf20Sopenharmony_ci		mddev->array_sectors = rs->array_sectors;
39698c2ecf20Sopenharmony_ci		mddev->dev_sectors = rs->dev_sectors;
39708c2ecf20Sopenharmony_ci		rs_set_rdev_sectors(rs);
39718c2ecf20Sopenharmony_ci		rs_set_capacity(rs);
39728c2ecf20Sopenharmony_ci	}
39738c2ecf20Sopenharmony_ci
39748c2ecf20Sopenharmony_ci	/* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */
39758c2ecf20Sopenharmony_ci        if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
39768c2ecf20Sopenharmony_ci	    (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) ||
39778c2ecf20Sopenharmony_ci	     (rs->requested_bitmap_chunk_sectors &&
39788c2ecf20Sopenharmony_ci	       mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) {
39798c2ecf20Sopenharmony_ci		int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize;
39808c2ecf20Sopenharmony_ci
39818c2ecf20Sopenharmony_ci		r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0);
39828c2ecf20Sopenharmony_ci		if (r)
39838c2ecf20Sopenharmony_ci			DMERR("Failed to resize bitmap");
39848c2ecf20Sopenharmony_ci	}
39858c2ecf20Sopenharmony_ci
39868c2ecf20Sopenharmony_ci	/* Check for any resize/reshape on @rs and adjust/initiate */
39878c2ecf20Sopenharmony_ci	/* Be prepared for mddev_resume() in raid_resume() */
39888c2ecf20Sopenharmony_ci	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
39898c2ecf20Sopenharmony_ci	if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
39908c2ecf20Sopenharmony_ci		set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
39918c2ecf20Sopenharmony_ci		mddev->resync_min = mddev->recovery_cp;
39928c2ecf20Sopenharmony_ci		if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags))
39938c2ecf20Sopenharmony_ci			mddev->resync_max_sectors = mddev->dev_sectors;
39948c2ecf20Sopenharmony_ci	}
39958c2ecf20Sopenharmony_ci
39968c2ecf20Sopenharmony_ci	/* Check for any reshape request unless new raid set */
39978c2ecf20Sopenharmony_ci	if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
39988c2ecf20Sopenharmony_ci		/* Initiate a reshape. */
39998c2ecf20Sopenharmony_ci		rs_set_rdev_sectors(rs);
40008c2ecf20Sopenharmony_ci		mddev_lock_nointr(mddev);
40018c2ecf20Sopenharmony_ci		r = rs_start_reshape(rs);
40028c2ecf20Sopenharmony_ci		mddev_unlock(mddev);
40038c2ecf20Sopenharmony_ci		if (r)
40048c2ecf20Sopenharmony_ci			DMWARN("Failed to check/start reshape, continuing without change");
40058c2ecf20Sopenharmony_ci		r = 0;
40068c2ecf20Sopenharmony_ci	}
40078c2ecf20Sopenharmony_ci
40088c2ecf20Sopenharmony_ci	return r;
40098c2ecf20Sopenharmony_ci}
40108c2ecf20Sopenharmony_ci
40118c2ecf20Sopenharmony_cistatic void raid_resume(struct dm_target *ti)
40128c2ecf20Sopenharmony_ci{
40138c2ecf20Sopenharmony_ci	struct raid_set *rs = ti->private;
40148c2ecf20Sopenharmony_ci	struct mddev *mddev = &rs->md;
40158c2ecf20Sopenharmony_ci
40168c2ecf20Sopenharmony_ci	if (test_and_set_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) {
40178c2ecf20Sopenharmony_ci		/*
40188c2ecf20Sopenharmony_ci		 * A secondary resume while the device is active.
40198c2ecf20Sopenharmony_ci		 * Take this opportunity to check whether any failed
40208c2ecf20Sopenharmony_ci		 * devices are reachable again.
40218c2ecf20Sopenharmony_ci		 */
40228c2ecf20Sopenharmony_ci		attempt_restore_of_faulty_devices(rs);
40238c2ecf20Sopenharmony_ci	}
40248c2ecf20Sopenharmony_ci
40258c2ecf20Sopenharmony_ci	if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
40268c2ecf20Sopenharmony_ci		/* Only reduce raid set size before running a disk removing reshape. */
40278c2ecf20Sopenharmony_ci		if (mddev->delta_disks < 0)
40288c2ecf20Sopenharmony_ci			rs_set_capacity(rs);
40298c2ecf20Sopenharmony_ci
40308c2ecf20Sopenharmony_ci		mddev_lock_nointr(mddev);
40318c2ecf20Sopenharmony_ci		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
40328c2ecf20Sopenharmony_ci		mddev->ro = 0;
40338c2ecf20Sopenharmony_ci		mddev->in_sync = 0;
40348c2ecf20Sopenharmony_ci		mddev_resume(mddev);
40358c2ecf20Sopenharmony_ci		mddev_unlock(mddev);
40368c2ecf20Sopenharmony_ci	}
40378c2ecf20Sopenharmony_ci}
40388c2ecf20Sopenharmony_ci
40398c2ecf20Sopenharmony_cistatic struct target_type raid_target = {
40408c2ecf20Sopenharmony_ci	.name = "raid",
40418c2ecf20Sopenharmony_ci	.version = {1, 15, 1},
40428c2ecf20Sopenharmony_ci	.module = THIS_MODULE,
40438c2ecf20Sopenharmony_ci	.ctr = raid_ctr,
40448c2ecf20Sopenharmony_ci	.dtr = raid_dtr,
40458c2ecf20Sopenharmony_ci	.map = raid_map,
40468c2ecf20Sopenharmony_ci	.status = raid_status,
40478c2ecf20Sopenharmony_ci	.message = raid_message,
40488c2ecf20Sopenharmony_ci	.iterate_devices = raid_iterate_devices,
40498c2ecf20Sopenharmony_ci	.io_hints = raid_io_hints,
40508c2ecf20Sopenharmony_ci	.postsuspend = raid_postsuspend,
40518c2ecf20Sopenharmony_ci	.preresume = raid_preresume,
40528c2ecf20Sopenharmony_ci	.resume = raid_resume,
40538c2ecf20Sopenharmony_ci};
40548c2ecf20Sopenharmony_ci
40558c2ecf20Sopenharmony_cistatic int __init dm_raid_init(void)
40568c2ecf20Sopenharmony_ci{
40578c2ecf20Sopenharmony_ci	DMINFO("Loading target version %u.%u.%u",
40588c2ecf20Sopenharmony_ci	       raid_target.version[0],
40598c2ecf20Sopenharmony_ci	       raid_target.version[1],
40608c2ecf20Sopenharmony_ci	       raid_target.version[2]);
40618c2ecf20Sopenharmony_ci	return dm_register_target(&raid_target);
40628c2ecf20Sopenharmony_ci}
40638c2ecf20Sopenharmony_ci
40648c2ecf20Sopenharmony_cistatic void __exit dm_raid_exit(void)
40658c2ecf20Sopenharmony_ci{
40668c2ecf20Sopenharmony_ci	dm_unregister_target(&raid_target);
40678c2ecf20Sopenharmony_ci}
40688c2ecf20Sopenharmony_ci
40698c2ecf20Sopenharmony_cimodule_init(dm_raid_init);
40708c2ecf20Sopenharmony_cimodule_exit(dm_raid_exit);
40718c2ecf20Sopenharmony_ci
40728c2ecf20Sopenharmony_cimodule_param(devices_handle_discard_safely, bool, 0644);
40738c2ecf20Sopenharmony_ciMODULE_PARM_DESC(devices_handle_discard_safely,
40748c2ecf20Sopenharmony_ci		 "Set to Y if all devices in each array reliably return zeroes on reads from discarded regions");
40758c2ecf20Sopenharmony_ci
40768c2ecf20Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " raid0/1/10/4/5/6 target");
40778c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid0");
40788c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid1");
40798c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid10");
40808c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid4");
40818c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid5");
40828c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid6");
40838c2ecf20Sopenharmony_ciMODULE_AUTHOR("Neil Brown <dm-devel@redhat.com>");
40848c2ecf20Sopenharmony_ciMODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
40858c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
4086