18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (C) 2010-2011 Neil Brown 38c2ecf20Sopenharmony_ci * Copyright (C) 2010-2018 Red Hat, Inc. All rights reserved. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * This file is released under the GPL. 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <linux/slab.h> 98c2ecf20Sopenharmony_ci#include <linux/module.h> 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#include "md.h" 128c2ecf20Sopenharmony_ci#include "raid1.h" 138c2ecf20Sopenharmony_ci#include "raid5.h" 148c2ecf20Sopenharmony_ci#include "raid10.h" 158c2ecf20Sopenharmony_ci#include "md-bitmap.h" 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#include <linux/device-mapper.h> 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#define DM_MSG_PREFIX "raid" 208c2ecf20Sopenharmony_ci#define MAX_RAID_DEVICES 253 /* md-raid kernel limit */ 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci/* 238c2ecf20Sopenharmony_ci * Minimum sectors of free reshape space per raid device 248c2ecf20Sopenharmony_ci */ 258c2ecf20Sopenharmony_ci#define MIN_FREE_RESHAPE_SPACE to_sector(4*4096) 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci/* 288c2ecf20Sopenharmony_ci * Minimum journal space 4 MiB in sectors. 298c2ecf20Sopenharmony_ci */ 308c2ecf20Sopenharmony_ci#define MIN_RAID456_JOURNAL_SPACE (4*2048) 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_cistatic bool devices_handle_discard_safely = false; 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci/* 358c2ecf20Sopenharmony_ci * The following flags are used by dm-raid.c to set up the array state. 368c2ecf20Sopenharmony_ci * They must be cleared before md_run is called. 378c2ecf20Sopenharmony_ci */ 388c2ecf20Sopenharmony_ci#define FirstUse 10 /* rdev flag */ 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_cistruct raid_dev { 418c2ecf20Sopenharmony_ci /* 428c2ecf20Sopenharmony_ci * Two DM devices, one to hold metadata and one to hold the 438c2ecf20Sopenharmony_ci * actual data/parity. The reason for this is to not confuse 448c2ecf20Sopenharmony_ci * ti->len and give more flexibility in altering size and 458c2ecf20Sopenharmony_ci * characteristics. 468c2ecf20Sopenharmony_ci * 478c2ecf20Sopenharmony_ci * While it is possible for this device to be associated 488c2ecf20Sopenharmony_ci * with a different physical device than the data_dev, it 498c2ecf20Sopenharmony_ci * is intended for it to be the same. 508c2ecf20Sopenharmony_ci * |--------- Physical Device ---------| 518c2ecf20Sopenharmony_ci * |- meta_dev -|------ data_dev ------| 528c2ecf20Sopenharmony_ci */ 538c2ecf20Sopenharmony_ci struct dm_dev *meta_dev; 548c2ecf20Sopenharmony_ci struct dm_dev *data_dev; 558c2ecf20Sopenharmony_ci struct md_rdev rdev; 568c2ecf20Sopenharmony_ci}; 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci/* 598c2ecf20Sopenharmony_ci * Bits for establishing rs->ctr_flags 608c2ecf20Sopenharmony_ci * 618c2ecf20Sopenharmony_ci * 1 = no flag value 628c2ecf20Sopenharmony_ci * 2 = flag with value 638c2ecf20Sopenharmony_ci */ 648c2ecf20Sopenharmony_ci#define __CTR_FLAG_SYNC 0 /* 1 */ /* Not with raid0! */ 658c2ecf20Sopenharmony_ci#define __CTR_FLAG_NOSYNC 1 /* 1 */ /* Not with raid0! */ 668c2ecf20Sopenharmony_ci#define __CTR_FLAG_REBUILD 2 /* 2 */ /* Not with raid0! */ 678c2ecf20Sopenharmony_ci#define __CTR_FLAG_DAEMON_SLEEP 3 /* 2 */ /* Not with raid0! */ 688c2ecf20Sopenharmony_ci#define __CTR_FLAG_MIN_RECOVERY_RATE 4 /* 2 */ /* Not with raid0! */ 698c2ecf20Sopenharmony_ci#define __CTR_FLAG_MAX_RECOVERY_RATE 5 /* 2 */ /* Not with raid0! */ 708c2ecf20Sopenharmony_ci#define __CTR_FLAG_MAX_WRITE_BEHIND 6 /* 2 */ /* Only with raid1! */ 718c2ecf20Sopenharmony_ci#define __CTR_FLAG_WRITE_MOSTLY 7 /* 2 */ /* Only with raid1! */ 728c2ecf20Sopenharmony_ci#define __CTR_FLAG_STRIPE_CACHE 8 /* 2 */ /* Only with raid4/5/6! */ 738c2ecf20Sopenharmony_ci#define __CTR_FLAG_REGION_SIZE 9 /* 2 */ /* Not with raid0! */ 748c2ecf20Sopenharmony_ci#define __CTR_FLAG_RAID10_COPIES 10 /* 2 */ /* Only with raid10 */ 758c2ecf20Sopenharmony_ci#define __CTR_FLAG_RAID10_FORMAT 11 /* 2 */ /* Only with raid10 */ 768c2ecf20Sopenharmony_ci/* New for v1.9.0 */ 778c2ecf20Sopenharmony_ci#define __CTR_FLAG_DELTA_DISKS 12 /* 2 */ /* Only with reshapable raid1/4/5/6/10! */ 788c2ecf20Sopenharmony_ci#define __CTR_FLAG_DATA_OFFSET 13 /* 2 */ /* Only with reshapable raid4/5/6/10! */ 798c2ecf20Sopenharmony_ci#define __CTR_FLAG_RAID10_USE_NEAR_SETS 14 /* 2 */ /* Only with raid10! */ 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci/* New for v1.10.0 */ 828c2ecf20Sopenharmony_ci#define __CTR_FLAG_JOURNAL_DEV 15 /* 2 */ /* Only with raid4/5/6 (journal device)! */ 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci/* New for v1.11.1 */ 858c2ecf20Sopenharmony_ci#define __CTR_FLAG_JOURNAL_MODE 16 /* 2 */ /* Only with raid4/5/6 (journal mode)! */ 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci/* 888c2ecf20Sopenharmony_ci * Flags for rs->ctr_flags field. 898c2ecf20Sopenharmony_ci */ 908c2ecf20Sopenharmony_ci#define CTR_FLAG_SYNC (1 << __CTR_FLAG_SYNC) 918c2ecf20Sopenharmony_ci#define CTR_FLAG_NOSYNC (1 << __CTR_FLAG_NOSYNC) 928c2ecf20Sopenharmony_ci#define CTR_FLAG_REBUILD (1 << __CTR_FLAG_REBUILD) 938c2ecf20Sopenharmony_ci#define CTR_FLAG_DAEMON_SLEEP (1 << __CTR_FLAG_DAEMON_SLEEP) 948c2ecf20Sopenharmony_ci#define CTR_FLAG_MIN_RECOVERY_RATE (1 << __CTR_FLAG_MIN_RECOVERY_RATE) 958c2ecf20Sopenharmony_ci#define CTR_FLAG_MAX_RECOVERY_RATE (1 << __CTR_FLAG_MAX_RECOVERY_RATE) 968c2ecf20Sopenharmony_ci#define CTR_FLAG_MAX_WRITE_BEHIND (1 << __CTR_FLAG_MAX_WRITE_BEHIND) 978c2ecf20Sopenharmony_ci#define CTR_FLAG_WRITE_MOSTLY (1 << __CTR_FLAG_WRITE_MOSTLY) 988c2ecf20Sopenharmony_ci#define CTR_FLAG_STRIPE_CACHE (1 << __CTR_FLAG_STRIPE_CACHE) 998c2ecf20Sopenharmony_ci#define CTR_FLAG_REGION_SIZE (1 << __CTR_FLAG_REGION_SIZE) 1008c2ecf20Sopenharmony_ci#define CTR_FLAG_RAID10_COPIES (1 << __CTR_FLAG_RAID10_COPIES) 1018c2ecf20Sopenharmony_ci#define CTR_FLAG_RAID10_FORMAT (1 << __CTR_FLAG_RAID10_FORMAT) 1028c2ecf20Sopenharmony_ci#define CTR_FLAG_DELTA_DISKS (1 << __CTR_FLAG_DELTA_DISKS) 1038c2ecf20Sopenharmony_ci#define CTR_FLAG_DATA_OFFSET (1 << __CTR_FLAG_DATA_OFFSET) 1048c2ecf20Sopenharmony_ci#define CTR_FLAG_RAID10_USE_NEAR_SETS (1 << __CTR_FLAG_RAID10_USE_NEAR_SETS) 1058c2ecf20Sopenharmony_ci#define CTR_FLAG_JOURNAL_DEV (1 << __CTR_FLAG_JOURNAL_DEV) 1068c2ecf20Sopenharmony_ci#define CTR_FLAG_JOURNAL_MODE (1 << __CTR_FLAG_JOURNAL_MODE) 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci/* 1098c2ecf20Sopenharmony_ci * Definitions of various constructor flags to 1108c2ecf20Sopenharmony_ci * be used in checks of valid / invalid flags 1118c2ecf20Sopenharmony_ci * per raid level. 1128c2ecf20Sopenharmony_ci */ 1138c2ecf20Sopenharmony_ci/* Define all any sync flags */ 1148c2ecf20Sopenharmony_ci#define CTR_FLAGS_ANY_SYNC (CTR_FLAG_SYNC | CTR_FLAG_NOSYNC) 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci/* Define flags for options without argument (e.g. 'nosync') */ 1178c2ecf20Sopenharmony_ci#define CTR_FLAG_OPTIONS_NO_ARGS (CTR_FLAGS_ANY_SYNC | \ 1188c2ecf20Sopenharmony_ci CTR_FLAG_RAID10_USE_NEAR_SETS) 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci/* Define flags for options with one argument (e.g. 'delta_disks +2') */ 1218c2ecf20Sopenharmony_ci#define CTR_FLAG_OPTIONS_ONE_ARG (CTR_FLAG_REBUILD | \ 1228c2ecf20Sopenharmony_ci CTR_FLAG_WRITE_MOSTLY | \ 1238c2ecf20Sopenharmony_ci CTR_FLAG_DAEMON_SLEEP | \ 1248c2ecf20Sopenharmony_ci CTR_FLAG_MIN_RECOVERY_RATE | \ 1258c2ecf20Sopenharmony_ci CTR_FLAG_MAX_RECOVERY_RATE | \ 1268c2ecf20Sopenharmony_ci CTR_FLAG_MAX_WRITE_BEHIND | \ 1278c2ecf20Sopenharmony_ci CTR_FLAG_STRIPE_CACHE | \ 1288c2ecf20Sopenharmony_ci CTR_FLAG_REGION_SIZE | \ 1298c2ecf20Sopenharmony_ci CTR_FLAG_RAID10_COPIES | \ 1308c2ecf20Sopenharmony_ci CTR_FLAG_RAID10_FORMAT | \ 1318c2ecf20Sopenharmony_ci CTR_FLAG_DELTA_DISKS | \ 1328c2ecf20Sopenharmony_ci CTR_FLAG_DATA_OFFSET | \ 1338c2ecf20Sopenharmony_ci CTR_FLAG_JOURNAL_DEV | \ 1348c2ecf20Sopenharmony_ci CTR_FLAG_JOURNAL_MODE) 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci/* Valid options definitions per raid level... */ 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci/* "raid0" does only accept data offset */ 1398c2ecf20Sopenharmony_ci#define RAID0_VALID_FLAGS (CTR_FLAG_DATA_OFFSET) 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci/* "raid1" does not accept stripe cache, data offset, delta_disks or any raid10 options */ 1428c2ecf20Sopenharmony_ci#define RAID1_VALID_FLAGS (CTR_FLAGS_ANY_SYNC | \ 1438c2ecf20Sopenharmony_ci CTR_FLAG_REBUILD | \ 1448c2ecf20Sopenharmony_ci CTR_FLAG_WRITE_MOSTLY | \ 1458c2ecf20Sopenharmony_ci CTR_FLAG_DAEMON_SLEEP | \ 1468c2ecf20Sopenharmony_ci CTR_FLAG_MIN_RECOVERY_RATE | \ 1478c2ecf20Sopenharmony_ci CTR_FLAG_MAX_RECOVERY_RATE | \ 1488c2ecf20Sopenharmony_ci CTR_FLAG_MAX_WRITE_BEHIND | \ 1498c2ecf20Sopenharmony_ci CTR_FLAG_REGION_SIZE | \ 1508c2ecf20Sopenharmony_ci CTR_FLAG_DELTA_DISKS | \ 1518c2ecf20Sopenharmony_ci CTR_FLAG_DATA_OFFSET) 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci/* "raid10" does not accept any raid1 or stripe cache options */ 1548c2ecf20Sopenharmony_ci#define RAID10_VALID_FLAGS (CTR_FLAGS_ANY_SYNC | \ 1558c2ecf20Sopenharmony_ci CTR_FLAG_REBUILD | \ 1568c2ecf20Sopenharmony_ci CTR_FLAG_DAEMON_SLEEP | \ 1578c2ecf20Sopenharmony_ci CTR_FLAG_MIN_RECOVERY_RATE | \ 1588c2ecf20Sopenharmony_ci CTR_FLAG_MAX_RECOVERY_RATE | \ 1598c2ecf20Sopenharmony_ci CTR_FLAG_REGION_SIZE | \ 1608c2ecf20Sopenharmony_ci CTR_FLAG_RAID10_COPIES | \ 1618c2ecf20Sopenharmony_ci CTR_FLAG_RAID10_FORMAT | \ 1628c2ecf20Sopenharmony_ci CTR_FLAG_DELTA_DISKS | \ 1638c2ecf20Sopenharmony_ci CTR_FLAG_DATA_OFFSET | \ 1648c2ecf20Sopenharmony_ci CTR_FLAG_RAID10_USE_NEAR_SETS) 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci/* 1678c2ecf20Sopenharmony_ci * "raid4/5/6" do not accept any raid1 or raid10 specific options 1688c2ecf20Sopenharmony_ci * 1698c2ecf20Sopenharmony_ci * "raid6" does not accept "nosync", because it is not guaranteed 1708c2ecf20Sopenharmony_ci * that both parity and q-syndrome are being written properly with 1718c2ecf20Sopenharmony_ci * any writes 1728c2ecf20Sopenharmony_ci */ 1738c2ecf20Sopenharmony_ci#define RAID45_VALID_FLAGS (CTR_FLAGS_ANY_SYNC | \ 1748c2ecf20Sopenharmony_ci CTR_FLAG_REBUILD | \ 1758c2ecf20Sopenharmony_ci CTR_FLAG_DAEMON_SLEEP | \ 1768c2ecf20Sopenharmony_ci CTR_FLAG_MIN_RECOVERY_RATE | \ 1778c2ecf20Sopenharmony_ci CTR_FLAG_MAX_RECOVERY_RATE | \ 1788c2ecf20Sopenharmony_ci CTR_FLAG_STRIPE_CACHE | \ 1798c2ecf20Sopenharmony_ci CTR_FLAG_REGION_SIZE | \ 1808c2ecf20Sopenharmony_ci CTR_FLAG_DELTA_DISKS | \ 1818c2ecf20Sopenharmony_ci CTR_FLAG_DATA_OFFSET | \ 1828c2ecf20Sopenharmony_ci CTR_FLAG_JOURNAL_DEV | \ 1838c2ecf20Sopenharmony_ci CTR_FLAG_JOURNAL_MODE) 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci#define RAID6_VALID_FLAGS (CTR_FLAG_SYNC | \ 1868c2ecf20Sopenharmony_ci CTR_FLAG_REBUILD | \ 1878c2ecf20Sopenharmony_ci CTR_FLAG_DAEMON_SLEEP | \ 1888c2ecf20Sopenharmony_ci CTR_FLAG_MIN_RECOVERY_RATE | \ 1898c2ecf20Sopenharmony_ci CTR_FLAG_MAX_RECOVERY_RATE | \ 1908c2ecf20Sopenharmony_ci CTR_FLAG_STRIPE_CACHE | \ 1918c2ecf20Sopenharmony_ci CTR_FLAG_REGION_SIZE | \ 1928c2ecf20Sopenharmony_ci CTR_FLAG_DELTA_DISKS | \ 1938c2ecf20Sopenharmony_ci CTR_FLAG_DATA_OFFSET | \ 1948c2ecf20Sopenharmony_ci CTR_FLAG_JOURNAL_DEV | \ 1958c2ecf20Sopenharmony_ci CTR_FLAG_JOURNAL_MODE) 1968c2ecf20Sopenharmony_ci/* ...valid options definitions per raid level */ 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci/* 1998c2ecf20Sopenharmony_ci * Flags for rs->runtime_flags field 2008c2ecf20Sopenharmony_ci * (RT_FLAG prefix meaning "runtime flag") 2018c2ecf20Sopenharmony_ci * 2028c2ecf20Sopenharmony_ci * These are all internal and used to define runtime state, 2038c2ecf20Sopenharmony_ci * e.g. to prevent another resume from preresume processing 2048c2ecf20Sopenharmony_ci * the raid set all over again. 2058c2ecf20Sopenharmony_ci */ 2068c2ecf20Sopenharmony_ci#define RT_FLAG_RS_PRERESUMED 0 2078c2ecf20Sopenharmony_ci#define RT_FLAG_RS_RESUMED 1 2088c2ecf20Sopenharmony_ci#define RT_FLAG_RS_BITMAP_LOADED 2 2098c2ecf20Sopenharmony_ci#define RT_FLAG_UPDATE_SBS 3 2108c2ecf20Sopenharmony_ci#define RT_FLAG_RESHAPE_RS 4 2118c2ecf20Sopenharmony_ci#define RT_FLAG_RS_SUSPENDED 5 2128c2ecf20Sopenharmony_ci#define RT_FLAG_RS_IN_SYNC 6 2138c2ecf20Sopenharmony_ci#define RT_FLAG_RS_RESYNCING 7 2148c2ecf20Sopenharmony_ci#define RT_FLAG_RS_GROW 8 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ci/* Array elements of 64 bit needed for rebuild/failed disk bits */ 2178c2ecf20Sopenharmony_ci#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci/* 2208c2ecf20Sopenharmony_ci * raid set level, layout and chunk sectors backup/restore 2218c2ecf20Sopenharmony_ci */ 2228c2ecf20Sopenharmony_cistruct rs_layout { 2238c2ecf20Sopenharmony_ci int new_level; 2248c2ecf20Sopenharmony_ci int new_layout; 2258c2ecf20Sopenharmony_ci int new_chunk_sectors; 2268c2ecf20Sopenharmony_ci}; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_cistruct raid_set { 2298c2ecf20Sopenharmony_ci struct dm_target *ti; 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci uint32_t stripe_cache_entries; 2328c2ecf20Sopenharmony_ci unsigned long ctr_flags; 2338c2ecf20Sopenharmony_ci unsigned long runtime_flags; 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci uint64_t rebuild_disks[DISKS_ARRAY_ELEMS]; 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci int raid_disks; 2388c2ecf20Sopenharmony_ci int delta_disks; 2398c2ecf20Sopenharmony_ci int data_offset; 2408c2ecf20Sopenharmony_ci int raid10_copies; 2418c2ecf20Sopenharmony_ci int requested_bitmap_chunk_sectors; 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci struct mddev md; 2448c2ecf20Sopenharmony_ci struct raid_type *raid_type; 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci sector_t array_sectors; 2478c2ecf20Sopenharmony_ci sector_t dev_sectors; 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci /* Optional raid4/5/6 journal device */ 2508c2ecf20Sopenharmony_ci struct journal_dev { 2518c2ecf20Sopenharmony_ci struct dm_dev *dev; 2528c2ecf20Sopenharmony_ci struct md_rdev rdev; 2538c2ecf20Sopenharmony_ci int mode; 2548c2ecf20Sopenharmony_ci } journal_dev; 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci struct raid_dev dev[]; 2578c2ecf20Sopenharmony_ci}; 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_cistatic void rs_config_backup(struct raid_set *rs, struct rs_layout *l) 2608c2ecf20Sopenharmony_ci{ 2618c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci l->new_level = mddev->new_level; 2648c2ecf20Sopenharmony_ci l->new_layout = mddev->new_layout; 2658c2ecf20Sopenharmony_ci l->new_chunk_sectors = mddev->new_chunk_sectors; 2668c2ecf20Sopenharmony_ci} 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_cistatic void rs_config_restore(struct raid_set *rs, struct rs_layout *l) 2698c2ecf20Sopenharmony_ci{ 2708c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci mddev->new_level = l->new_level; 2738c2ecf20Sopenharmony_ci mddev->new_layout = l->new_layout; 2748c2ecf20Sopenharmony_ci mddev->new_chunk_sectors = l->new_chunk_sectors; 2758c2ecf20Sopenharmony_ci} 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci/* raid10 algorithms (i.e. formats) */ 2788c2ecf20Sopenharmony_ci#define ALGORITHM_RAID10_DEFAULT 0 2798c2ecf20Sopenharmony_ci#define ALGORITHM_RAID10_NEAR 1 2808c2ecf20Sopenharmony_ci#define ALGORITHM_RAID10_OFFSET 2 2818c2ecf20Sopenharmony_ci#define ALGORITHM_RAID10_FAR 3 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci/* Supported raid types and properties. */ 2848c2ecf20Sopenharmony_cistatic struct raid_type { 2858c2ecf20Sopenharmony_ci const char *name; /* RAID algorithm. */ 2868c2ecf20Sopenharmony_ci const char *descr; /* Descriptor text for logging. */ 2878c2ecf20Sopenharmony_ci const unsigned int parity_devs; /* # of parity devices. */ 2888c2ecf20Sopenharmony_ci const unsigned int minimal_devs;/* minimal # of devices in set. */ 2898c2ecf20Sopenharmony_ci const unsigned int level; /* RAID level. */ 2908c2ecf20Sopenharmony_ci const unsigned int algorithm; /* RAID algorithm. */ 2918c2ecf20Sopenharmony_ci} raid_types[] = { 2928c2ecf20Sopenharmony_ci {"raid0", "raid0 (striping)", 0, 2, 0, 0 /* NONE */}, 2938c2ecf20Sopenharmony_ci {"raid1", "raid1 (mirroring)", 0, 2, 1, 0 /* NONE */}, 2948c2ecf20Sopenharmony_ci {"raid10_far", "raid10 far (striped mirrors)", 0, 2, 10, ALGORITHM_RAID10_FAR}, 2958c2ecf20Sopenharmony_ci {"raid10_offset", "raid10 offset (striped mirrors)", 0, 2, 10, ALGORITHM_RAID10_OFFSET}, 2968c2ecf20Sopenharmony_ci {"raid10_near", "raid10 near (striped mirrors)", 0, 2, 10, ALGORITHM_RAID10_NEAR}, 2978c2ecf20Sopenharmony_ci {"raid10", "raid10 (striped mirrors)", 0, 2, 10, ALGORITHM_RAID10_DEFAULT}, 2988c2ecf20Sopenharmony_ci {"raid4", "raid4 (dedicated first parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, /* raid4 layout = raid5_0 */ 2998c2ecf20Sopenharmony_ci {"raid5_n", "raid5 (dedicated last parity disk)", 1, 2, 5, ALGORITHM_PARITY_N}, 3008c2ecf20Sopenharmony_ci {"raid5_ls", "raid5 (left symmetric)", 1, 2, 5, ALGORITHM_LEFT_SYMMETRIC}, 3018c2ecf20Sopenharmony_ci {"raid5_rs", "raid5 (right symmetric)", 1, 2, 5, ALGORITHM_RIGHT_SYMMETRIC}, 3028c2ecf20Sopenharmony_ci {"raid5_la", "raid5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, 3038c2ecf20Sopenharmony_ci {"raid5_ra", "raid5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, 3048c2ecf20Sopenharmony_ci {"raid6_zr", "raid6 (zero restart)", 2, 4, 6, ALGORITHM_ROTATING_ZERO_RESTART}, 3058c2ecf20Sopenharmony_ci {"raid6_nr", "raid6 (N restart)", 2, 4, 6, ALGORITHM_ROTATING_N_RESTART}, 3068c2ecf20Sopenharmony_ci {"raid6_nc", "raid6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE}, 3078c2ecf20Sopenharmony_ci {"raid6_n_6", "raid6 (dedicated parity/Q n/6)", 2, 4, 6, ALGORITHM_PARITY_N_6}, 3088c2ecf20Sopenharmony_ci {"raid6_ls_6", "raid6 (left symmetric dedicated Q 6)", 2, 4, 6, ALGORITHM_LEFT_SYMMETRIC_6}, 3098c2ecf20Sopenharmony_ci {"raid6_rs_6", "raid6 (right symmetric dedicated Q 6)", 2, 4, 6, ALGORITHM_RIGHT_SYMMETRIC_6}, 3108c2ecf20Sopenharmony_ci {"raid6_la_6", "raid6 (left asymmetric dedicated Q 6)", 2, 4, 6, ALGORITHM_LEFT_ASYMMETRIC_6}, 3118c2ecf20Sopenharmony_ci {"raid6_ra_6", "raid6 (right asymmetric dedicated Q 6)", 2, 4, 6, ALGORITHM_RIGHT_ASYMMETRIC_6} 3128c2ecf20Sopenharmony_ci}; 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci/* True, if @v is in inclusive range [@min, @max] */ 3158c2ecf20Sopenharmony_cistatic bool __within_range(long v, long min, long max) 3168c2ecf20Sopenharmony_ci{ 3178c2ecf20Sopenharmony_ci return v >= min && v <= max; 3188c2ecf20Sopenharmony_ci} 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci/* All table line arguments are defined here */ 3218c2ecf20Sopenharmony_cistatic struct arg_name_flag { 3228c2ecf20Sopenharmony_ci const unsigned long flag; 3238c2ecf20Sopenharmony_ci const char *name; 3248c2ecf20Sopenharmony_ci} __arg_name_flags[] = { 3258c2ecf20Sopenharmony_ci { CTR_FLAG_SYNC, "sync"}, 3268c2ecf20Sopenharmony_ci { CTR_FLAG_NOSYNC, "nosync"}, 3278c2ecf20Sopenharmony_ci { CTR_FLAG_REBUILD, "rebuild"}, 3288c2ecf20Sopenharmony_ci { CTR_FLAG_DAEMON_SLEEP, "daemon_sleep"}, 3298c2ecf20Sopenharmony_ci { CTR_FLAG_MIN_RECOVERY_RATE, "min_recovery_rate"}, 3308c2ecf20Sopenharmony_ci { CTR_FLAG_MAX_RECOVERY_RATE, "max_recovery_rate"}, 3318c2ecf20Sopenharmony_ci { CTR_FLAG_MAX_WRITE_BEHIND, "max_write_behind"}, 3328c2ecf20Sopenharmony_ci { CTR_FLAG_WRITE_MOSTLY, "write_mostly"}, 3338c2ecf20Sopenharmony_ci { CTR_FLAG_STRIPE_CACHE, "stripe_cache"}, 3348c2ecf20Sopenharmony_ci { CTR_FLAG_REGION_SIZE, "region_size"}, 3358c2ecf20Sopenharmony_ci { CTR_FLAG_RAID10_COPIES, "raid10_copies"}, 3368c2ecf20Sopenharmony_ci { CTR_FLAG_RAID10_FORMAT, "raid10_format"}, 3378c2ecf20Sopenharmony_ci { CTR_FLAG_DATA_OFFSET, "data_offset"}, 3388c2ecf20Sopenharmony_ci { CTR_FLAG_DELTA_DISKS, "delta_disks"}, 3398c2ecf20Sopenharmony_ci { CTR_FLAG_RAID10_USE_NEAR_SETS, "raid10_use_near_sets"}, 3408c2ecf20Sopenharmony_ci { CTR_FLAG_JOURNAL_DEV, "journal_dev" }, 3418c2ecf20Sopenharmony_ci { CTR_FLAG_JOURNAL_MODE, "journal_mode" }, 3428c2ecf20Sopenharmony_ci}; 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_ci/* Return argument name string for given @flag */ 3458c2ecf20Sopenharmony_cistatic const char *dm_raid_arg_name_by_flag(const uint32_t flag) 3468c2ecf20Sopenharmony_ci{ 3478c2ecf20Sopenharmony_ci if (hweight32(flag) == 1) { 3488c2ecf20Sopenharmony_ci struct arg_name_flag *anf = __arg_name_flags + ARRAY_SIZE(__arg_name_flags); 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ci while (anf-- > __arg_name_flags) 3518c2ecf20Sopenharmony_ci if (flag & anf->flag) 3528c2ecf20Sopenharmony_ci return anf->name; 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci } else 3558c2ecf20Sopenharmony_ci DMERR("%s called with more than one flag!", __func__); 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci return NULL; 3588c2ecf20Sopenharmony_ci} 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci/* Define correlation of raid456 journal cache modes and dm-raid target line parameters */ 3618c2ecf20Sopenharmony_cistatic struct { 3628c2ecf20Sopenharmony_ci const int mode; 3638c2ecf20Sopenharmony_ci const char *param; 3648c2ecf20Sopenharmony_ci} _raid456_journal_mode[] = { 3658c2ecf20Sopenharmony_ci { R5C_JOURNAL_MODE_WRITE_THROUGH , "writethrough" }, 3668c2ecf20Sopenharmony_ci { R5C_JOURNAL_MODE_WRITE_BACK , "writeback" } 3678c2ecf20Sopenharmony_ci}; 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_ci/* Return MD raid4/5/6 journal mode for dm @journal_mode one */ 3708c2ecf20Sopenharmony_cistatic int dm_raid_journal_mode_to_md(const char *mode) 3718c2ecf20Sopenharmony_ci{ 3728c2ecf20Sopenharmony_ci int m = ARRAY_SIZE(_raid456_journal_mode); 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ci while (m--) 3758c2ecf20Sopenharmony_ci if (!strcasecmp(mode, _raid456_journal_mode[m].param)) 3768c2ecf20Sopenharmony_ci return _raid456_journal_mode[m].mode; 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci return -EINVAL; 3798c2ecf20Sopenharmony_ci} 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci/* Return dm-raid raid4/5/6 journal mode string for @mode */ 3828c2ecf20Sopenharmony_cistatic const char *md_journal_mode_to_dm_raid(const int mode) 3838c2ecf20Sopenharmony_ci{ 3848c2ecf20Sopenharmony_ci int m = ARRAY_SIZE(_raid456_journal_mode); 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci while (m--) 3878c2ecf20Sopenharmony_ci if (mode == _raid456_journal_mode[m].mode) 3888c2ecf20Sopenharmony_ci return _raid456_journal_mode[m].param; 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci return "unknown"; 3918c2ecf20Sopenharmony_ci} 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_ci/* 3948c2ecf20Sopenharmony_ci * Bool helpers to test for various raid levels of a raid set. 3958c2ecf20Sopenharmony_ci * It's level as reported by the superblock rather than 3968c2ecf20Sopenharmony_ci * the requested raid_type passed to the constructor. 3978c2ecf20Sopenharmony_ci */ 3988c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is raid0 */ 3998c2ecf20Sopenharmony_cistatic bool rs_is_raid0(struct raid_set *rs) 4008c2ecf20Sopenharmony_ci{ 4018c2ecf20Sopenharmony_ci return !rs->md.level; 4028c2ecf20Sopenharmony_ci} 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is raid1 */ 4058c2ecf20Sopenharmony_cistatic bool rs_is_raid1(struct raid_set *rs) 4068c2ecf20Sopenharmony_ci{ 4078c2ecf20Sopenharmony_ci return rs->md.level == 1; 4088c2ecf20Sopenharmony_ci} 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is raid10 */ 4118c2ecf20Sopenharmony_cistatic bool rs_is_raid10(struct raid_set *rs) 4128c2ecf20Sopenharmony_ci{ 4138c2ecf20Sopenharmony_ci return rs->md.level == 10; 4148c2ecf20Sopenharmony_ci} 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is level 6 */ 4178c2ecf20Sopenharmony_cistatic bool rs_is_raid6(struct raid_set *rs) 4188c2ecf20Sopenharmony_ci{ 4198c2ecf20Sopenharmony_ci return rs->md.level == 6; 4208c2ecf20Sopenharmony_ci} 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is level 4, 5 or 6 */ 4238c2ecf20Sopenharmony_cistatic bool rs_is_raid456(struct raid_set *rs) 4248c2ecf20Sopenharmony_ci{ 4258c2ecf20Sopenharmony_ci return __within_range(rs->md.level, 4, 6); 4268c2ecf20Sopenharmony_ci} 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is reshapable */ 4298c2ecf20Sopenharmony_cistatic bool __is_raid10_far(int layout); 4308c2ecf20Sopenharmony_cistatic bool rs_is_reshapable(struct raid_set *rs) 4318c2ecf20Sopenharmony_ci{ 4328c2ecf20Sopenharmony_ci return rs_is_raid456(rs) || 4338c2ecf20Sopenharmony_ci (rs_is_raid10(rs) && !__is_raid10_far(rs->md.new_layout)); 4348c2ecf20Sopenharmony_ci} 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is recovering */ 4378c2ecf20Sopenharmony_cistatic bool rs_is_recovering(struct raid_set *rs) 4388c2ecf20Sopenharmony_ci{ 4398c2ecf20Sopenharmony_ci return rs->md.recovery_cp < rs->md.dev_sectors; 4408c2ecf20Sopenharmony_ci} 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci/* Return true, if raid set in @rs is reshaping */ 4438c2ecf20Sopenharmony_cistatic bool rs_is_reshaping(struct raid_set *rs) 4448c2ecf20Sopenharmony_ci{ 4458c2ecf20Sopenharmony_ci return rs->md.reshape_position != MaxSector; 4468c2ecf20Sopenharmony_ci} 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci/* 4498c2ecf20Sopenharmony_ci * bool helpers to test for various raid levels of a raid type @rt 4508c2ecf20Sopenharmony_ci */ 4518c2ecf20Sopenharmony_ci 4528c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid0 */ 4538c2ecf20Sopenharmony_cistatic bool rt_is_raid0(struct raid_type *rt) 4548c2ecf20Sopenharmony_ci{ 4558c2ecf20Sopenharmony_ci return !rt->level; 4568c2ecf20Sopenharmony_ci} 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid1 */ 4598c2ecf20Sopenharmony_cistatic bool rt_is_raid1(struct raid_type *rt) 4608c2ecf20Sopenharmony_ci{ 4618c2ecf20Sopenharmony_ci return rt->level == 1; 4628c2ecf20Sopenharmony_ci} 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid10 */ 4658c2ecf20Sopenharmony_cistatic bool rt_is_raid10(struct raid_type *rt) 4668c2ecf20Sopenharmony_ci{ 4678c2ecf20Sopenharmony_ci return rt->level == 10; 4688c2ecf20Sopenharmony_ci} 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid4/5 */ 4718c2ecf20Sopenharmony_cistatic bool rt_is_raid45(struct raid_type *rt) 4728c2ecf20Sopenharmony_ci{ 4738c2ecf20Sopenharmony_ci return __within_range(rt->level, 4, 5); 4748c2ecf20Sopenharmony_ci} 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid6 */ 4778c2ecf20Sopenharmony_cistatic bool rt_is_raid6(struct raid_type *rt) 4788c2ecf20Sopenharmony_ci{ 4798c2ecf20Sopenharmony_ci return rt->level == 6; 4808c2ecf20Sopenharmony_ci} 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_ci/* Return true, if raid type in @rt is raid4/5/6 */ 4838c2ecf20Sopenharmony_cistatic bool rt_is_raid456(struct raid_type *rt) 4848c2ecf20Sopenharmony_ci{ 4858c2ecf20Sopenharmony_ci return __within_range(rt->level, 4, 6); 4868c2ecf20Sopenharmony_ci} 4878c2ecf20Sopenharmony_ci/* END: raid level bools */ 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci/* Return valid ctr flags for the raid level of @rs */ 4908c2ecf20Sopenharmony_cistatic unsigned long __valid_flags(struct raid_set *rs) 4918c2ecf20Sopenharmony_ci{ 4928c2ecf20Sopenharmony_ci if (rt_is_raid0(rs->raid_type)) 4938c2ecf20Sopenharmony_ci return RAID0_VALID_FLAGS; 4948c2ecf20Sopenharmony_ci else if (rt_is_raid1(rs->raid_type)) 4958c2ecf20Sopenharmony_ci return RAID1_VALID_FLAGS; 4968c2ecf20Sopenharmony_ci else if (rt_is_raid10(rs->raid_type)) 4978c2ecf20Sopenharmony_ci return RAID10_VALID_FLAGS; 4988c2ecf20Sopenharmony_ci else if (rt_is_raid45(rs->raid_type)) 4998c2ecf20Sopenharmony_ci return RAID45_VALID_FLAGS; 5008c2ecf20Sopenharmony_ci else if (rt_is_raid6(rs->raid_type)) 5018c2ecf20Sopenharmony_ci return RAID6_VALID_FLAGS; 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci return 0; 5048c2ecf20Sopenharmony_ci} 5058c2ecf20Sopenharmony_ci 5068c2ecf20Sopenharmony_ci/* 5078c2ecf20Sopenharmony_ci * Check for valid flags set on @rs 5088c2ecf20Sopenharmony_ci * 5098c2ecf20Sopenharmony_ci * Has to be called after parsing of the ctr flags! 5108c2ecf20Sopenharmony_ci */ 5118c2ecf20Sopenharmony_cistatic int rs_check_for_valid_flags(struct raid_set *rs) 5128c2ecf20Sopenharmony_ci{ 5138c2ecf20Sopenharmony_ci if (rs->ctr_flags & ~__valid_flags(rs)) { 5148c2ecf20Sopenharmony_ci rs->ti->error = "Invalid flags combination"; 5158c2ecf20Sopenharmony_ci return -EINVAL; 5168c2ecf20Sopenharmony_ci } 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ci return 0; 5198c2ecf20Sopenharmony_ci} 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci/* MD raid10 bit definitions and helpers */ 5228c2ecf20Sopenharmony_ci#define RAID10_OFFSET (1 << 16) /* stripes with data copies area adjacent on devices */ 5238c2ecf20Sopenharmony_ci#define RAID10_BROCKEN_USE_FAR_SETS (1 << 17) /* Broken in raid10.c: use sets instead of whole stripe rotation */ 5248c2ecf20Sopenharmony_ci#define RAID10_USE_FAR_SETS (1 << 18) /* Use sets instead of whole stripe rotation */ 5258c2ecf20Sopenharmony_ci#define RAID10_FAR_COPIES_SHIFT 8 /* raid10 # far copies shift (2nd byte of layout) */ 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci/* Return md raid10 near copies for @layout */ 5288c2ecf20Sopenharmony_cistatic unsigned int __raid10_near_copies(int layout) 5298c2ecf20Sopenharmony_ci{ 5308c2ecf20Sopenharmony_ci return layout & 0xFF; 5318c2ecf20Sopenharmony_ci} 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci/* Return md raid10 far copies for @layout */ 5348c2ecf20Sopenharmony_cistatic unsigned int __raid10_far_copies(int layout) 5358c2ecf20Sopenharmony_ci{ 5368c2ecf20Sopenharmony_ci return __raid10_near_copies(layout >> RAID10_FAR_COPIES_SHIFT); 5378c2ecf20Sopenharmony_ci} 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci/* Return true if md raid10 offset for @layout */ 5408c2ecf20Sopenharmony_cistatic bool __is_raid10_offset(int layout) 5418c2ecf20Sopenharmony_ci{ 5428c2ecf20Sopenharmony_ci return !!(layout & RAID10_OFFSET); 5438c2ecf20Sopenharmony_ci} 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci/* Return true if md raid10 near for @layout */ 5468c2ecf20Sopenharmony_cistatic bool __is_raid10_near(int layout) 5478c2ecf20Sopenharmony_ci{ 5488c2ecf20Sopenharmony_ci return !__is_raid10_offset(layout) && __raid10_near_copies(layout) > 1; 5498c2ecf20Sopenharmony_ci} 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_ci/* Return true if md raid10 far for @layout */ 5528c2ecf20Sopenharmony_cistatic bool __is_raid10_far(int layout) 5538c2ecf20Sopenharmony_ci{ 5548c2ecf20Sopenharmony_ci return !__is_raid10_offset(layout) && __raid10_far_copies(layout) > 1; 5558c2ecf20Sopenharmony_ci} 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci/* Return md raid10 layout string for @layout */ 5588c2ecf20Sopenharmony_cistatic const char *raid10_md_layout_to_format(int layout) 5598c2ecf20Sopenharmony_ci{ 5608c2ecf20Sopenharmony_ci /* 5618c2ecf20Sopenharmony_ci * Bit 16 stands for "offset" 5628c2ecf20Sopenharmony_ci * (i.e. adjacent stripes hold copies) 5638c2ecf20Sopenharmony_ci * 5648c2ecf20Sopenharmony_ci * Refer to MD's raid10.c for details 5658c2ecf20Sopenharmony_ci */ 5668c2ecf20Sopenharmony_ci if (__is_raid10_offset(layout)) 5678c2ecf20Sopenharmony_ci return "offset"; 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci if (__raid10_near_copies(layout) > 1) 5708c2ecf20Sopenharmony_ci return "near"; 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_ci if (__raid10_far_copies(layout) > 1) 5738c2ecf20Sopenharmony_ci return "far"; 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ci return "unknown"; 5768c2ecf20Sopenharmony_ci} 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci/* Return md raid10 algorithm for @name */ 5798c2ecf20Sopenharmony_cistatic int raid10_name_to_format(const char *name) 5808c2ecf20Sopenharmony_ci{ 5818c2ecf20Sopenharmony_ci if (!strcasecmp(name, "near")) 5828c2ecf20Sopenharmony_ci return ALGORITHM_RAID10_NEAR; 5838c2ecf20Sopenharmony_ci else if (!strcasecmp(name, "offset")) 5848c2ecf20Sopenharmony_ci return ALGORITHM_RAID10_OFFSET; 5858c2ecf20Sopenharmony_ci else if (!strcasecmp(name, "far")) 5868c2ecf20Sopenharmony_ci return ALGORITHM_RAID10_FAR; 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci return -EINVAL; 5898c2ecf20Sopenharmony_ci} 5908c2ecf20Sopenharmony_ci 5918c2ecf20Sopenharmony_ci/* Return md raid10 copies for @layout */ 5928c2ecf20Sopenharmony_cistatic unsigned int raid10_md_layout_to_copies(int layout) 5938c2ecf20Sopenharmony_ci{ 5948c2ecf20Sopenharmony_ci return max(__raid10_near_copies(layout), __raid10_far_copies(layout)); 5958c2ecf20Sopenharmony_ci} 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci/* Return md raid10 format id for @format string */ 5988c2ecf20Sopenharmony_cistatic int raid10_format_to_md_layout(struct raid_set *rs, 5998c2ecf20Sopenharmony_ci unsigned int algorithm, 6008c2ecf20Sopenharmony_ci unsigned int copies) 6018c2ecf20Sopenharmony_ci{ 6028c2ecf20Sopenharmony_ci unsigned int n = 1, f = 1, r = 0; 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci /* 6058c2ecf20Sopenharmony_ci * MD resilienece flaw: 6068c2ecf20Sopenharmony_ci * 6078c2ecf20Sopenharmony_ci * enabling use_far_sets for far/offset formats causes copies 6088c2ecf20Sopenharmony_ci * to be colocated on the same devs together with their origins! 6098c2ecf20Sopenharmony_ci * 6108c2ecf20Sopenharmony_ci * -> disable it for now in the definition above 6118c2ecf20Sopenharmony_ci */ 6128c2ecf20Sopenharmony_ci if (algorithm == ALGORITHM_RAID10_DEFAULT || 6138c2ecf20Sopenharmony_ci algorithm == ALGORITHM_RAID10_NEAR) 6148c2ecf20Sopenharmony_ci n = copies; 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci else if (algorithm == ALGORITHM_RAID10_OFFSET) { 6178c2ecf20Sopenharmony_ci f = copies; 6188c2ecf20Sopenharmony_ci r = RAID10_OFFSET; 6198c2ecf20Sopenharmony_ci if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags)) 6208c2ecf20Sopenharmony_ci r |= RAID10_USE_FAR_SETS; 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_ci } else if (algorithm == ALGORITHM_RAID10_FAR) { 6238c2ecf20Sopenharmony_ci f = copies; 6248c2ecf20Sopenharmony_ci if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags)) 6258c2ecf20Sopenharmony_ci r |= RAID10_USE_FAR_SETS; 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_ci } else 6288c2ecf20Sopenharmony_ci return -EINVAL; 6298c2ecf20Sopenharmony_ci 6308c2ecf20Sopenharmony_ci return r | (f << RAID10_FAR_COPIES_SHIFT) | n; 6318c2ecf20Sopenharmony_ci} 6328c2ecf20Sopenharmony_ci/* END: MD raid10 bit definitions and helpers */ 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_ci/* Check for any of the raid10 algorithms */ 6358c2ecf20Sopenharmony_cistatic bool __got_raid10(struct raid_type *rtp, const int layout) 6368c2ecf20Sopenharmony_ci{ 6378c2ecf20Sopenharmony_ci if (rtp->level == 10) { 6388c2ecf20Sopenharmony_ci switch (rtp->algorithm) { 6398c2ecf20Sopenharmony_ci case ALGORITHM_RAID10_DEFAULT: 6408c2ecf20Sopenharmony_ci case ALGORITHM_RAID10_NEAR: 6418c2ecf20Sopenharmony_ci return __is_raid10_near(layout); 6428c2ecf20Sopenharmony_ci case ALGORITHM_RAID10_OFFSET: 6438c2ecf20Sopenharmony_ci return __is_raid10_offset(layout); 6448c2ecf20Sopenharmony_ci case ALGORITHM_RAID10_FAR: 6458c2ecf20Sopenharmony_ci return __is_raid10_far(layout); 6468c2ecf20Sopenharmony_ci default: 6478c2ecf20Sopenharmony_ci break; 6488c2ecf20Sopenharmony_ci } 6498c2ecf20Sopenharmony_ci } 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci return false; 6528c2ecf20Sopenharmony_ci} 6538c2ecf20Sopenharmony_ci 6548c2ecf20Sopenharmony_ci/* Return raid_type for @name */ 6558c2ecf20Sopenharmony_cistatic struct raid_type *get_raid_type(const char *name) 6568c2ecf20Sopenharmony_ci{ 6578c2ecf20Sopenharmony_ci struct raid_type *rtp = raid_types + ARRAY_SIZE(raid_types); 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci while (rtp-- > raid_types) 6608c2ecf20Sopenharmony_ci if (!strcasecmp(rtp->name, name)) 6618c2ecf20Sopenharmony_ci return rtp; 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci return NULL; 6648c2ecf20Sopenharmony_ci} 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_ci/* Return raid_type for @name based derived from @level and @layout */ 6678c2ecf20Sopenharmony_cistatic struct raid_type *get_raid_type_by_ll(const int level, const int layout) 6688c2ecf20Sopenharmony_ci{ 6698c2ecf20Sopenharmony_ci struct raid_type *rtp = raid_types + ARRAY_SIZE(raid_types); 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ci while (rtp-- > raid_types) { 6728c2ecf20Sopenharmony_ci /* RAID10 special checks based on @layout flags/properties */ 6738c2ecf20Sopenharmony_ci if (rtp->level == level && 6748c2ecf20Sopenharmony_ci (__got_raid10(rtp, layout) || rtp->algorithm == layout)) 6758c2ecf20Sopenharmony_ci return rtp; 6768c2ecf20Sopenharmony_ci } 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci return NULL; 6798c2ecf20Sopenharmony_ci} 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_ci/* Adjust rdev sectors */ 6828c2ecf20Sopenharmony_cistatic void rs_set_rdev_sectors(struct raid_set *rs) 6838c2ecf20Sopenharmony_ci{ 6848c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 6858c2ecf20Sopenharmony_ci struct md_rdev *rdev; 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci /* 6888c2ecf20Sopenharmony_ci * raid10 sets rdev->sector to the device size, which 6898c2ecf20Sopenharmony_ci * is unintended in case of out-of-place reshaping 6908c2ecf20Sopenharmony_ci */ 6918c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) 6928c2ecf20Sopenharmony_ci if (!test_bit(Journal, &rdev->flags)) 6938c2ecf20Sopenharmony_ci rdev->sectors = mddev->dev_sectors; 6948c2ecf20Sopenharmony_ci} 6958c2ecf20Sopenharmony_ci 6968c2ecf20Sopenharmony_ci/* 6978c2ecf20Sopenharmony_ci * Change bdev capacity of @rs in case of a disk add/remove reshape 6988c2ecf20Sopenharmony_ci */ 6998c2ecf20Sopenharmony_cistatic void rs_set_capacity(struct raid_set *rs) 7008c2ecf20Sopenharmony_ci{ 7018c2ecf20Sopenharmony_ci struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table)); 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci set_capacity(gendisk, rs->md.array_sectors); 7048c2ecf20Sopenharmony_ci revalidate_disk_size(gendisk, true); 7058c2ecf20Sopenharmony_ci} 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ci/* 7088c2ecf20Sopenharmony_ci * Set the mddev properties in @rs to the current 7098c2ecf20Sopenharmony_ci * ones retrieved from the freshest superblock 7108c2ecf20Sopenharmony_ci */ 7118c2ecf20Sopenharmony_cistatic void rs_set_cur(struct raid_set *rs) 7128c2ecf20Sopenharmony_ci{ 7138c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 7148c2ecf20Sopenharmony_ci 7158c2ecf20Sopenharmony_ci mddev->new_level = mddev->level; 7168c2ecf20Sopenharmony_ci mddev->new_layout = mddev->layout; 7178c2ecf20Sopenharmony_ci mddev->new_chunk_sectors = mddev->chunk_sectors; 7188c2ecf20Sopenharmony_ci} 7198c2ecf20Sopenharmony_ci 7208c2ecf20Sopenharmony_ci/* 7218c2ecf20Sopenharmony_ci * Set the mddev properties in @rs to the new 7228c2ecf20Sopenharmony_ci * ones requested by the ctr 7238c2ecf20Sopenharmony_ci */ 7248c2ecf20Sopenharmony_cistatic void rs_set_new(struct raid_set *rs) 7258c2ecf20Sopenharmony_ci{ 7268c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 7278c2ecf20Sopenharmony_ci 7288c2ecf20Sopenharmony_ci mddev->level = mddev->new_level; 7298c2ecf20Sopenharmony_ci mddev->layout = mddev->new_layout; 7308c2ecf20Sopenharmony_ci mddev->chunk_sectors = mddev->new_chunk_sectors; 7318c2ecf20Sopenharmony_ci mddev->raid_disks = rs->raid_disks; 7328c2ecf20Sopenharmony_ci mddev->delta_disks = 0; 7338c2ecf20Sopenharmony_ci} 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_cistatic struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *raid_type, 7368c2ecf20Sopenharmony_ci unsigned int raid_devs) 7378c2ecf20Sopenharmony_ci{ 7388c2ecf20Sopenharmony_ci unsigned int i; 7398c2ecf20Sopenharmony_ci struct raid_set *rs; 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_ci if (raid_devs <= raid_type->parity_devs) { 7428c2ecf20Sopenharmony_ci ti->error = "Insufficient number of devices"; 7438c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 7448c2ecf20Sopenharmony_ci } 7458c2ecf20Sopenharmony_ci 7468c2ecf20Sopenharmony_ci rs = kzalloc(struct_size(rs, dev, raid_devs), GFP_KERNEL); 7478c2ecf20Sopenharmony_ci if (!rs) { 7488c2ecf20Sopenharmony_ci ti->error = "Cannot allocate raid context"; 7498c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 7508c2ecf20Sopenharmony_ci } 7518c2ecf20Sopenharmony_ci 7528c2ecf20Sopenharmony_ci mddev_init(&rs->md); 7538c2ecf20Sopenharmony_ci 7548c2ecf20Sopenharmony_ci rs->raid_disks = raid_devs; 7558c2ecf20Sopenharmony_ci rs->delta_disks = 0; 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_ci rs->ti = ti; 7588c2ecf20Sopenharmony_ci rs->raid_type = raid_type; 7598c2ecf20Sopenharmony_ci rs->stripe_cache_entries = 256; 7608c2ecf20Sopenharmony_ci rs->md.raid_disks = raid_devs; 7618c2ecf20Sopenharmony_ci rs->md.level = raid_type->level; 7628c2ecf20Sopenharmony_ci rs->md.new_level = rs->md.level; 7638c2ecf20Sopenharmony_ci rs->md.layout = raid_type->algorithm; 7648c2ecf20Sopenharmony_ci rs->md.new_layout = rs->md.layout; 7658c2ecf20Sopenharmony_ci rs->md.delta_disks = 0; 7668c2ecf20Sopenharmony_ci rs->md.recovery_cp = MaxSector; 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci for (i = 0; i < raid_devs; i++) 7698c2ecf20Sopenharmony_ci md_rdev_init(&rs->dev[i].rdev); 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci /* 7728c2ecf20Sopenharmony_ci * Remaining items to be initialized by further RAID params: 7738c2ecf20Sopenharmony_ci * rs->md.persistent 7748c2ecf20Sopenharmony_ci * rs->md.external 7758c2ecf20Sopenharmony_ci * rs->md.chunk_sectors 7768c2ecf20Sopenharmony_ci * rs->md.new_chunk_sectors 7778c2ecf20Sopenharmony_ci * rs->md.dev_sectors 7788c2ecf20Sopenharmony_ci */ 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_ci return rs; 7818c2ecf20Sopenharmony_ci} 7828c2ecf20Sopenharmony_ci 7838c2ecf20Sopenharmony_ci/* Free all @rs allocations */ 7848c2ecf20Sopenharmony_cistatic void raid_set_free(struct raid_set *rs) 7858c2ecf20Sopenharmony_ci{ 7868c2ecf20Sopenharmony_ci int i; 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci if (rs->journal_dev.dev) { 7898c2ecf20Sopenharmony_ci md_rdev_clear(&rs->journal_dev.rdev); 7908c2ecf20Sopenharmony_ci dm_put_device(rs->ti, rs->journal_dev.dev); 7918c2ecf20Sopenharmony_ci } 7928c2ecf20Sopenharmony_ci 7938c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) { 7948c2ecf20Sopenharmony_ci if (rs->dev[i].meta_dev) 7958c2ecf20Sopenharmony_ci dm_put_device(rs->ti, rs->dev[i].meta_dev); 7968c2ecf20Sopenharmony_ci md_rdev_clear(&rs->dev[i].rdev); 7978c2ecf20Sopenharmony_ci if (rs->dev[i].data_dev) 7988c2ecf20Sopenharmony_ci dm_put_device(rs->ti, rs->dev[i].data_dev); 7998c2ecf20Sopenharmony_ci } 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_ci kfree(rs); 8028c2ecf20Sopenharmony_ci} 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_ci/* 8058c2ecf20Sopenharmony_ci * For every device we have two words 8068c2ecf20Sopenharmony_ci * <meta_dev>: meta device name or '-' if missing 8078c2ecf20Sopenharmony_ci * <data_dev>: data device name or '-' if missing 8088c2ecf20Sopenharmony_ci * 8098c2ecf20Sopenharmony_ci * The following are permitted: 8108c2ecf20Sopenharmony_ci * - - 8118c2ecf20Sopenharmony_ci * - <data_dev> 8128c2ecf20Sopenharmony_ci * <meta_dev> <data_dev> 8138c2ecf20Sopenharmony_ci * 8148c2ecf20Sopenharmony_ci * The following is not allowed: 8158c2ecf20Sopenharmony_ci * <meta_dev> - 8168c2ecf20Sopenharmony_ci * 8178c2ecf20Sopenharmony_ci * This code parses those words. If there is a failure, 8188c2ecf20Sopenharmony_ci * the caller must use raid_set_free() to unwind the operations. 8198c2ecf20Sopenharmony_ci */ 8208c2ecf20Sopenharmony_cistatic int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as) 8218c2ecf20Sopenharmony_ci{ 8228c2ecf20Sopenharmony_ci int i; 8238c2ecf20Sopenharmony_ci int rebuild = 0; 8248c2ecf20Sopenharmony_ci int metadata_available = 0; 8258c2ecf20Sopenharmony_ci int r = 0; 8268c2ecf20Sopenharmony_ci const char *arg; 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_ci /* Put off the number of raid devices argument to get to dev pairs */ 8298c2ecf20Sopenharmony_ci arg = dm_shift_arg(as); 8308c2ecf20Sopenharmony_ci if (!arg) 8318c2ecf20Sopenharmony_ci return -EINVAL; 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) { 8348c2ecf20Sopenharmony_ci rs->dev[i].rdev.raid_disk = i; 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci rs->dev[i].meta_dev = NULL; 8378c2ecf20Sopenharmony_ci rs->dev[i].data_dev = NULL; 8388c2ecf20Sopenharmony_ci 8398c2ecf20Sopenharmony_ci /* 8408c2ecf20Sopenharmony_ci * There are no offsets initially. 8418c2ecf20Sopenharmony_ci * Out of place reshape will set them accordingly. 8428c2ecf20Sopenharmony_ci */ 8438c2ecf20Sopenharmony_ci rs->dev[i].rdev.data_offset = 0; 8448c2ecf20Sopenharmony_ci rs->dev[i].rdev.new_data_offset = 0; 8458c2ecf20Sopenharmony_ci rs->dev[i].rdev.mddev = &rs->md; 8468c2ecf20Sopenharmony_ci 8478c2ecf20Sopenharmony_ci arg = dm_shift_arg(as); 8488c2ecf20Sopenharmony_ci if (!arg) 8498c2ecf20Sopenharmony_ci return -EINVAL; 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci if (strcmp(arg, "-")) { 8528c2ecf20Sopenharmony_ci r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table), 8538c2ecf20Sopenharmony_ci &rs->dev[i].meta_dev); 8548c2ecf20Sopenharmony_ci if (r) { 8558c2ecf20Sopenharmony_ci rs->ti->error = "RAID metadata device lookup failure"; 8568c2ecf20Sopenharmony_ci return r; 8578c2ecf20Sopenharmony_ci } 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_ci rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL); 8608c2ecf20Sopenharmony_ci if (!rs->dev[i].rdev.sb_page) { 8618c2ecf20Sopenharmony_ci rs->ti->error = "Failed to allocate superblock page"; 8628c2ecf20Sopenharmony_ci return -ENOMEM; 8638c2ecf20Sopenharmony_ci } 8648c2ecf20Sopenharmony_ci } 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci arg = dm_shift_arg(as); 8678c2ecf20Sopenharmony_ci if (!arg) 8688c2ecf20Sopenharmony_ci return -EINVAL; 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_ci if (!strcmp(arg, "-")) { 8718c2ecf20Sopenharmony_ci if (!test_bit(In_sync, &rs->dev[i].rdev.flags) && 8728c2ecf20Sopenharmony_ci (!rs->dev[i].rdev.recovery_offset)) { 8738c2ecf20Sopenharmony_ci rs->ti->error = "Drive designated for rebuild not specified"; 8748c2ecf20Sopenharmony_ci return -EINVAL; 8758c2ecf20Sopenharmony_ci } 8768c2ecf20Sopenharmony_ci 8778c2ecf20Sopenharmony_ci if (rs->dev[i].meta_dev) { 8788c2ecf20Sopenharmony_ci rs->ti->error = "No data device supplied with metadata device"; 8798c2ecf20Sopenharmony_ci return -EINVAL; 8808c2ecf20Sopenharmony_ci } 8818c2ecf20Sopenharmony_ci 8828c2ecf20Sopenharmony_ci continue; 8838c2ecf20Sopenharmony_ci } 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table), 8868c2ecf20Sopenharmony_ci &rs->dev[i].data_dev); 8878c2ecf20Sopenharmony_ci if (r) { 8888c2ecf20Sopenharmony_ci rs->ti->error = "RAID device lookup failure"; 8898c2ecf20Sopenharmony_ci return r; 8908c2ecf20Sopenharmony_ci } 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_ci if (rs->dev[i].meta_dev) { 8938c2ecf20Sopenharmony_ci metadata_available = 1; 8948c2ecf20Sopenharmony_ci rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev; 8958c2ecf20Sopenharmony_ci } 8968c2ecf20Sopenharmony_ci rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev; 8978c2ecf20Sopenharmony_ci list_add_tail(&rs->dev[i].rdev.same_set, &rs->md.disks); 8988c2ecf20Sopenharmony_ci if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) 8998c2ecf20Sopenharmony_ci rebuild++; 9008c2ecf20Sopenharmony_ci } 9018c2ecf20Sopenharmony_ci 9028c2ecf20Sopenharmony_ci if (rs->journal_dev.dev) 9038c2ecf20Sopenharmony_ci list_add_tail(&rs->journal_dev.rdev.same_set, &rs->md.disks); 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci if (metadata_available) { 9068c2ecf20Sopenharmony_ci rs->md.external = 0; 9078c2ecf20Sopenharmony_ci rs->md.persistent = 1; 9088c2ecf20Sopenharmony_ci rs->md.major_version = 2; 9098c2ecf20Sopenharmony_ci } else if (rebuild && !rs->md.recovery_cp) { 9108c2ecf20Sopenharmony_ci /* 9118c2ecf20Sopenharmony_ci * Without metadata, we will not be able to tell if the array 9128c2ecf20Sopenharmony_ci * is in-sync or not - we must assume it is not. Therefore, 9138c2ecf20Sopenharmony_ci * it is impossible to rebuild a drive. 9148c2ecf20Sopenharmony_ci * 9158c2ecf20Sopenharmony_ci * Even if there is metadata, the on-disk information may 9168c2ecf20Sopenharmony_ci * indicate that the array is not in-sync and it will then 9178c2ecf20Sopenharmony_ci * fail at that time. 9188c2ecf20Sopenharmony_ci * 9198c2ecf20Sopenharmony_ci * User could specify 'nosync' option if desperate. 9208c2ecf20Sopenharmony_ci */ 9218c2ecf20Sopenharmony_ci rs->ti->error = "Unable to rebuild drive while array is not in-sync"; 9228c2ecf20Sopenharmony_ci return -EINVAL; 9238c2ecf20Sopenharmony_ci } 9248c2ecf20Sopenharmony_ci 9258c2ecf20Sopenharmony_ci return 0; 9268c2ecf20Sopenharmony_ci} 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci/* 9298c2ecf20Sopenharmony_ci * validate_region_size 9308c2ecf20Sopenharmony_ci * @rs 9318c2ecf20Sopenharmony_ci * @region_size: region size in sectors. If 0, pick a size (4MiB default). 9328c2ecf20Sopenharmony_ci * 9338c2ecf20Sopenharmony_ci * Set rs->md.bitmap_info.chunksize (which really refers to 'region size'). 9348c2ecf20Sopenharmony_ci * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap. 9358c2ecf20Sopenharmony_ci * 9368c2ecf20Sopenharmony_ci * Returns: 0 on success, -EINVAL on failure. 9378c2ecf20Sopenharmony_ci */ 9388c2ecf20Sopenharmony_cistatic int validate_region_size(struct raid_set *rs, unsigned long region_size) 9398c2ecf20Sopenharmony_ci{ 9408c2ecf20Sopenharmony_ci unsigned long min_region_size = rs->ti->len / (1 << 21); 9418c2ecf20Sopenharmony_ci 9428c2ecf20Sopenharmony_ci if (rs_is_raid0(rs)) 9438c2ecf20Sopenharmony_ci return 0; 9448c2ecf20Sopenharmony_ci 9458c2ecf20Sopenharmony_ci if (!region_size) { 9468c2ecf20Sopenharmony_ci /* 9478c2ecf20Sopenharmony_ci * Choose a reasonable default. All figures in sectors. 9488c2ecf20Sopenharmony_ci */ 9498c2ecf20Sopenharmony_ci if (min_region_size > (1 << 13)) { 9508c2ecf20Sopenharmony_ci /* If not a power of 2, make it the next power of 2 */ 9518c2ecf20Sopenharmony_ci region_size = roundup_pow_of_two(min_region_size); 9528c2ecf20Sopenharmony_ci DMINFO("Choosing default region size of %lu sectors", 9538c2ecf20Sopenharmony_ci region_size); 9548c2ecf20Sopenharmony_ci } else { 9558c2ecf20Sopenharmony_ci DMINFO("Choosing default region size of 4MiB"); 9568c2ecf20Sopenharmony_ci region_size = 1 << 13; /* sectors */ 9578c2ecf20Sopenharmony_ci } 9588c2ecf20Sopenharmony_ci } else { 9598c2ecf20Sopenharmony_ci /* 9608c2ecf20Sopenharmony_ci * Validate user-supplied value. 9618c2ecf20Sopenharmony_ci */ 9628c2ecf20Sopenharmony_ci if (region_size > rs->ti->len) { 9638c2ecf20Sopenharmony_ci rs->ti->error = "Supplied region size is too large"; 9648c2ecf20Sopenharmony_ci return -EINVAL; 9658c2ecf20Sopenharmony_ci } 9668c2ecf20Sopenharmony_ci 9678c2ecf20Sopenharmony_ci if (region_size < min_region_size) { 9688c2ecf20Sopenharmony_ci DMERR("Supplied region_size (%lu sectors) below minimum (%lu)", 9698c2ecf20Sopenharmony_ci region_size, min_region_size); 9708c2ecf20Sopenharmony_ci rs->ti->error = "Supplied region size is too small"; 9718c2ecf20Sopenharmony_ci return -EINVAL; 9728c2ecf20Sopenharmony_ci } 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_ci if (!is_power_of_2(region_size)) { 9758c2ecf20Sopenharmony_ci rs->ti->error = "Region size is not a power of 2"; 9768c2ecf20Sopenharmony_ci return -EINVAL; 9778c2ecf20Sopenharmony_ci } 9788c2ecf20Sopenharmony_ci 9798c2ecf20Sopenharmony_ci if (region_size < rs->md.chunk_sectors) { 9808c2ecf20Sopenharmony_ci rs->ti->error = "Region size is smaller than the chunk size"; 9818c2ecf20Sopenharmony_ci return -EINVAL; 9828c2ecf20Sopenharmony_ci } 9838c2ecf20Sopenharmony_ci } 9848c2ecf20Sopenharmony_ci 9858c2ecf20Sopenharmony_ci /* 9868c2ecf20Sopenharmony_ci * Convert sectors to bytes. 9878c2ecf20Sopenharmony_ci */ 9888c2ecf20Sopenharmony_ci rs->md.bitmap_info.chunksize = to_bytes(region_size); 9898c2ecf20Sopenharmony_ci 9908c2ecf20Sopenharmony_ci return 0; 9918c2ecf20Sopenharmony_ci} 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci/* 9948c2ecf20Sopenharmony_ci * validate_raid_redundancy 9958c2ecf20Sopenharmony_ci * @rs 9968c2ecf20Sopenharmony_ci * 9978c2ecf20Sopenharmony_ci * Determine if there are enough devices in the array that haven't 9988c2ecf20Sopenharmony_ci * failed (or are being rebuilt) to form a usable array. 9998c2ecf20Sopenharmony_ci * 10008c2ecf20Sopenharmony_ci * Returns: 0 on success, -EINVAL on failure. 10018c2ecf20Sopenharmony_ci */ 10028c2ecf20Sopenharmony_cistatic int validate_raid_redundancy(struct raid_set *rs) 10038c2ecf20Sopenharmony_ci{ 10048c2ecf20Sopenharmony_ci unsigned int i, rebuild_cnt = 0; 10058c2ecf20Sopenharmony_ci unsigned int rebuilds_per_group = 0, copies, raid_disks; 10068c2ecf20Sopenharmony_ci unsigned int group_size, last_group_start; 10078c2ecf20Sopenharmony_ci 10088c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) 10098c2ecf20Sopenharmony_ci if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) && 10108c2ecf20Sopenharmony_ci ((!test_bit(In_sync, &rs->dev[i].rdev.flags) || 10118c2ecf20Sopenharmony_ci !rs->dev[i].rdev.sb_page))) 10128c2ecf20Sopenharmony_ci rebuild_cnt++; 10138c2ecf20Sopenharmony_ci 10148c2ecf20Sopenharmony_ci switch (rs->md.level) { 10158c2ecf20Sopenharmony_ci case 0: 10168c2ecf20Sopenharmony_ci break; 10178c2ecf20Sopenharmony_ci case 1: 10188c2ecf20Sopenharmony_ci if (rebuild_cnt >= rs->md.raid_disks) 10198c2ecf20Sopenharmony_ci goto too_many; 10208c2ecf20Sopenharmony_ci break; 10218c2ecf20Sopenharmony_ci case 4: 10228c2ecf20Sopenharmony_ci case 5: 10238c2ecf20Sopenharmony_ci case 6: 10248c2ecf20Sopenharmony_ci if (rebuild_cnt > rs->raid_type->parity_devs) 10258c2ecf20Sopenharmony_ci goto too_many; 10268c2ecf20Sopenharmony_ci break; 10278c2ecf20Sopenharmony_ci case 10: 10288c2ecf20Sopenharmony_ci copies = raid10_md_layout_to_copies(rs->md.new_layout); 10298c2ecf20Sopenharmony_ci if (copies < 2) { 10308c2ecf20Sopenharmony_ci DMERR("Bogus raid10 data copies < 2!"); 10318c2ecf20Sopenharmony_ci return -EINVAL; 10328c2ecf20Sopenharmony_ci } 10338c2ecf20Sopenharmony_ci 10348c2ecf20Sopenharmony_ci if (rebuild_cnt < copies) 10358c2ecf20Sopenharmony_ci break; 10368c2ecf20Sopenharmony_ci 10378c2ecf20Sopenharmony_ci /* 10388c2ecf20Sopenharmony_ci * It is possible to have a higher rebuild count for RAID10, 10398c2ecf20Sopenharmony_ci * as long as the failed devices occur in different mirror 10408c2ecf20Sopenharmony_ci * groups (i.e. different stripes). 10418c2ecf20Sopenharmony_ci * 10428c2ecf20Sopenharmony_ci * When checking "near" format, make sure no adjacent devices 10438c2ecf20Sopenharmony_ci * have failed beyond what can be handled. In addition to the 10448c2ecf20Sopenharmony_ci * simple case where the number of devices is a multiple of the 10458c2ecf20Sopenharmony_ci * number of copies, we must also handle cases where the number 10468c2ecf20Sopenharmony_ci * of devices is not a multiple of the number of copies. 10478c2ecf20Sopenharmony_ci * E.g. dev1 dev2 dev3 dev4 dev5 10488c2ecf20Sopenharmony_ci * A A B B C 10498c2ecf20Sopenharmony_ci * C D D E E 10508c2ecf20Sopenharmony_ci */ 10518c2ecf20Sopenharmony_ci raid_disks = min(rs->raid_disks, rs->md.raid_disks); 10528c2ecf20Sopenharmony_ci if (__is_raid10_near(rs->md.new_layout)) { 10538c2ecf20Sopenharmony_ci for (i = 0; i < raid_disks; i++) { 10548c2ecf20Sopenharmony_ci if (!(i % copies)) 10558c2ecf20Sopenharmony_ci rebuilds_per_group = 0; 10568c2ecf20Sopenharmony_ci if ((!rs->dev[i].rdev.sb_page || 10578c2ecf20Sopenharmony_ci !test_bit(In_sync, &rs->dev[i].rdev.flags)) && 10588c2ecf20Sopenharmony_ci (++rebuilds_per_group >= copies)) 10598c2ecf20Sopenharmony_ci goto too_many; 10608c2ecf20Sopenharmony_ci } 10618c2ecf20Sopenharmony_ci break; 10628c2ecf20Sopenharmony_ci } 10638c2ecf20Sopenharmony_ci 10648c2ecf20Sopenharmony_ci /* 10658c2ecf20Sopenharmony_ci * When checking "far" and "offset" formats, we need to ensure 10668c2ecf20Sopenharmony_ci * that the device that holds its copy is not also dead or 10678c2ecf20Sopenharmony_ci * being rebuilt. (Note that "far" and "offset" formats only 10688c2ecf20Sopenharmony_ci * support two copies right now. These formats also only ever 10698c2ecf20Sopenharmony_ci * use the 'use_far_sets' variant.) 10708c2ecf20Sopenharmony_ci * 10718c2ecf20Sopenharmony_ci * This check is somewhat complicated by the need to account 10728c2ecf20Sopenharmony_ci * for arrays that are not a multiple of (far) copies. This 10738c2ecf20Sopenharmony_ci * results in the need to treat the last (potentially larger) 10748c2ecf20Sopenharmony_ci * set differently. 10758c2ecf20Sopenharmony_ci */ 10768c2ecf20Sopenharmony_ci group_size = (raid_disks / copies); 10778c2ecf20Sopenharmony_ci last_group_start = (raid_disks / group_size) - 1; 10788c2ecf20Sopenharmony_ci last_group_start *= group_size; 10798c2ecf20Sopenharmony_ci for (i = 0; i < raid_disks; i++) { 10808c2ecf20Sopenharmony_ci if (!(i % copies) && !(i > last_group_start)) 10818c2ecf20Sopenharmony_ci rebuilds_per_group = 0; 10828c2ecf20Sopenharmony_ci if ((!rs->dev[i].rdev.sb_page || 10838c2ecf20Sopenharmony_ci !test_bit(In_sync, &rs->dev[i].rdev.flags)) && 10848c2ecf20Sopenharmony_ci (++rebuilds_per_group >= copies)) 10858c2ecf20Sopenharmony_ci goto too_many; 10868c2ecf20Sopenharmony_ci } 10878c2ecf20Sopenharmony_ci break; 10888c2ecf20Sopenharmony_ci default: 10898c2ecf20Sopenharmony_ci if (rebuild_cnt) 10908c2ecf20Sopenharmony_ci return -EINVAL; 10918c2ecf20Sopenharmony_ci } 10928c2ecf20Sopenharmony_ci 10938c2ecf20Sopenharmony_ci return 0; 10948c2ecf20Sopenharmony_ci 10958c2ecf20Sopenharmony_citoo_many: 10968c2ecf20Sopenharmony_ci return -EINVAL; 10978c2ecf20Sopenharmony_ci} 10988c2ecf20Sopenharmony_ci 10998c2ecf20Sopenharmony_ci/* 11008c2ecf20Sopenharmony_ci * Possible arguments are... 11018c2ecf20Sopenharmony_ci * <chunk_size> [optional_args] 11028c2ecf20Sopenharmony_ci * 11038c2ecf20Sopenharmony_ci * Argument definitions 11048c2ecf20Sopenharmony_ci * <chunk_size> The number of sectors per disk that 11058c2ecf20Sopenharmony_ci * will form the "stripe" 11068c2ecf20Sopenharmony_ci * [[no]sync] Force or prevent recovery of the 11078c2ecf20Sopenharmony_ci * entire array 11088c2ecf20Sopenharmony_ci * [rebuild <idx>] Rebuild the drive indicated by the index 11098c2ecf20Sopenharmony_ci * [daemon_sleep <ms>] Time between bitmap daemon work to 11108c2ecf20Sopenharmony_ci * clear bits 11118c2ecf20Sopenharmony_ci * [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization 11128c2ecf20Sopenharmony_ci * [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization 11138c2ecf20Sopenharmony_ci * [write_mostly <idx>] Indicate a write mostly drive via index 11148c2ecf20Sopenharmony_ci * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) 11158c2ecf20Sopenharmony_ci * [stripe_cache <sectors>] Stripe cache size for higher RAIDs 11168c2ecf20Sopenharmony_ci * [region_size <sectors>] Defines granularity of bitmap 11178c2ecf20Sopenharmony_ci * [journal_dev <dev>] raid4/5/6 journaling deviice 11188c2ecf20Sopenharmony_ci * (i.e. write hole closing log) 11198c2ecf20Sopenharmony_ci * 11208c2ecf20Sopenharmony_ci * RAID10-only options: 11218c2ecf20Sopenharmony_ci * [raid10_copies <# copies>] Number of copies. (Default: 2) 11228c2ecf20Sopenharmony_ci * [raid10_format <near|far|offset>] Layout algorithm. (Default: near) 11238c2ecf20Sopenharmony_ci */ 11248c2ecf20Sopenharmony_cistatic int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, 11258c2ecf20Sopenharmony_ci unsigned int num_raid_params) 11268c2ecf20Sopenharmony_ci{ 11278c2ecf20Sopenharmony_ci int value, raid10_format = ALGORITHM_RAID10_DEFAULT; 11288c2ecf20Sopenharmony_ci unsigned int raid10_copies = 2; 11298c2ecf20Sopenharmony_ci unsigned int i, write_mostly = 0; 11308c2ecf20Sopenharmony_ci unsigned int region_size = 0; 11318c2ecf20Sopenharmony_ci sector_t max_io_len; 11328c2ecf20Sopenharmony_ci const char *arg, *key; 11338c2ecf20Sopenharmony_ci struct raid_dev *rd; 11348c2ecf20Sopenharmony_ci struct raid_type *rt = rs->raid_type; 11358c2ecf20Sopenharmony_ci 11368c2ecf20Sopenharmony_ci arg = dm_shift_arg(as); 11378c2ecf20Sopenharmony_ci num_raid_params--; /* Account for chunk_size argument */ 11388c2ecf20Sopenharmony_ci 11398c2ecf20Sopenharmony_ci if (kstrtoint(arg, 10, &value) < 0) { 11408c2ecf20Sopenharmony_ci rs->ti->error = "Bad numerical argument given for chunk_size"; 11418c2ecf20Sopenharmony_ci return -EINVAL; 11428c2ecf20Sopenharmony_ci } 11438c2ecf20Sopenharmony_ci 11448c2ecf20Sopenharmony_ci /* 11458c2ecf20Sopenharmony_ci * First, parse the in-order required arguments 11468c2ecf20Sopenharmony_ci * "chunk_size" is the only argument of this type. 11478c2ecf20Sopenharmony_ci */ 11488c2ecf20Sopenharmony_ci if (rt_is_raid1(rt)) { 11498c2ecf20Sopenharmony_ci if (value) 11508c2ecf20Sopenharmony_ci DMERR("Ignoring chunk size parameter for RAID 1"); 11518c2ecf20Sopenharmony_ci value = 0; 11528c2ecf20Sopenharmony_ci } else if (!is_power_of_2(value)) { 11538c2ecf20Sopenharmony_ci rs->ti->error = "Chunk size must be a power of 2"; 11548c2ecf20Sopenharmony_ci return -EINVAL; 11558c2ecf20Sopenharmony_ci } else if (value < 8) { 11568c2ecf20Sopenharmony_ci rs->ti->error = "Chunk size value is too small"; 11578c2ecf20Sopenharmony_ci return -EINVAL; 11588c2ecf20Sopenharmony_ci } 11598c2ecf20Sopenharmony_ci 11608c2ecf20Sopenharmony_ci rs->md.new_chunk_sectors = rs->md.chunk_sectors = value; 11618c2ecf20Sopenharmony_ci 11628c2ecf20Sopenharmony_ci /* 11638c2ecf20Sopenharmony_ci * We set each individual device as In_sync with a completed 11648c2ecf20Sopenharmony_ci * 'recovery_offset'. If there has been a device failure or 11658c2ecf20Sopenharmony_ci * replacement then one of the following cases applies: 11668c2ecf20Sopenharmony_ci * 11678c2ecf20Sopenharmony_ci * 1) User specifies 'rebuild'. 11688c2ecf20Sopenharmony_ci * - Device is reset when param is read. 11698c2ecf20Sopenharmony_ci * 2) A new device is supplied. 11708c2ecf20Sopenharmony_ci * - No matching superblock found, resets device. 11718c2ecf20Sopenharmony_ci * 3) Device failure was transient and returns on reload. 11728c2ecf20Sopenharmony_ci * - Failure noticed, resets device for bitmap replay. 11738c2ecf20Sopenharmony_ci * 4) Device hadn't completed recovery after previous failure. 11748c2ecf20Sopenharmony_ci * - Superblock is read and overrides recovery_offset. 11758c2ecf20Sopenharmony_ci * 11768c2ecf20Sopenharmony_ci * What is found in the superblocks of the devices is always 11778c2ecf20Sopenharmony_ci * authoritative, unless 'rebuild' or '[no]sync' was specified. 11788c2ecf20Sopenharmony_ci */ 11798c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) { 11808c2ecf20Sopenharmony_ci set_bit(In_sync, &rs->dev[i].rdev.flags); 11818c2ecf20Sopenharmony_ci rs->dev[i].rdev.recovery_offset = MaxSector; 11828c2ecf20Sopenharmony_ci } 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci /* 11858c2ecf20Sopenharmony_ci * Second, parse the unordered optional arguments 11868c2ecf20Sopenharmony_ci */ 11878c2ecf20Sopenharmony_ci for (i = 0; i < num_raid_params; i++) { 11888c2ecf20Sopenharmony_ci key = dm_shift_arg(as); 11898c2ecf20Sopenharmony_ci if (!key) { 11908c2ecf20Sopenharmony_ci rs->ti->error = "Not enough raid parameters given"; 11918c2ecf20Sopenharmony_ci return -EINVAL; 11928c2ecf20Sopenharmony_ci } 11938c2ecf20Sopenharmony_ci 11948c2ecf20Sopenharmony_ci if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC))) { 11958c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) { 11968c2ecf20Sopenharmony_ci rs->ti->error = "Only one 'nosync' argument allowed"; 11978c2ecf20Sopenharmony_ci return -EINVAL; 11988c2ecf20Sopenharmony_ci } 11998c2ecf20Sopenharmony_ci continue; 12008c2ecf20Sopenharmony_ci } 12018c2ecf20Sopenharmony_ci if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_SYNC))) { 12028c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) { 12038c2ecf20Sopenharmony_ci rs->ti->error = "Only one 'sync' argument allowed"; 12048c2ecf20Sopenharmony_ci return -EINVAL; 12058c2ecf20Sopenharmony_ci } 12068c2ecf20Sopenharmony_ci continue; 12078c2ecf20Sopenharmony_ci } 12088c2ecf20Sopenharmony_ci if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_USE_NEAR_SETS))) { 12098c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags)) { 12108c2ecf20Sopenharmony_ci rs->ti->error = "Only one 'raid10_use_new_sets' argument allowed"; 12118c2ecf20Sopenharmony_ci return -EINVAL; 12128c2ecf20Sopenharmony_ci } 12138c2ecf20Sopenharmony_ci continue; 12148c2ecf20Sopenharmony_ci } 12158c2ecf20Sopenharmony_ci 12168c2ecf20Sopenharmony_ci arg = dm_shift_arg(as); 12178c2ecf20Sopenharmony_ci i++; /* Account for the argument pairs */ 12188c2ecf20Sopenharmony_ci if (!arg) { 12198c2ecf20Sopenharmony_ci rs->ti->error = "Wrong number of raid parameters given"; 12208c2ecf20Sopenharmony_ci return -EINVAL; 12218c2ecf20Sopenharmony_ci } 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_ci /* 12248c2ecf20Sopenharmony_ci * Parameters that take a string value are checked here. 12258c2ecf20Sopenharmony_ci */ 12268c2ecf20Sopenharmony_ci /* "raid10_format {near|offset|far} */ 12278c2ecf20Sopenharmony_ci if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT))) { 12288c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) { 12298c2ecf20Sopenharmony_ci rs->ti->error = "Only one 'raid10_format' argument pair allowed"; 12308c2ecf20Sopenharmony_ci return -EINVAL; 12318c2ecf20Sopenharmony_ci } 12328c2ecf20Sopenharmony_ci if (!rt_is_raid10(rt)) { 12338c2ecf20Sopenharmony_ci rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; 12348c2ecf20Sopenharmony_ci return -EINVAL; 12358c2ecf20Sopenharmony_ci } 12368c2ecf20Sopenharmony_ci raid10_format = raid10_name_to_format(arg); 12378c2ecf20Sopenharmony_ci if (raid10_format < 0) { 12388c2ecf20Sopenharmony_ci rs->ti->error = "Invalid 'raid10_format' value given"; 12398c2ecf20Sopenharmony_ci return raid10_format; 12408c2ecf20Sopenharmony_ci } 12418c2ecf20Sopenharmony_ci continue; 12428c2ecf20Sopenharmony_ci } 12438c2ecf20Sopenharmony_ci 12448c2ecf20Sopenharmony_ci /* "journal_dev <dev>" */ 12458c2ecf20Sopenharmony_ci if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV))) { 12468c2ecf20Sopenharmony_ci int r; 12478c2ecf20Sopenharmony_ci struct md_rdev *jdev; 12488c2ecf20Sopenharmony_ci 12498c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) { 12508c2ecf20Sopenharmony_ci rs->ti->error = "Only one raid4/5/6 set journaling device allowed"; 12518c2ecf20Sopenharmony_ci return -EINVAL; 12528c2ecf20Sopenharmony_ci } 12538c2ecf20Sopenharmony_ci if (!rt_is_raid456(rt)) { 12548c2ecf20Sopenharmony_ci rs->ti->error = "'journal_dev' is an invalid parameter for this RAID type"; 12558c2ecf20Sopenharmony_ci return -EINVAL; 12568c2ecf20Sopenharmony_ci } 12578c2ecf20Sopenharmony_ci r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table), 12588c2ecf20Sopenharmony_ci &rs->journal_dev.dev); 12598c2ecf20Sopenharmony_ci if (r) { 12608c2ecf20Sopenharmony_ci rs->ti->error = "raid4/5/6 journal device lookup failure"; 12618c2ecf20Sopenharmony_ci return r; 12628c2ecf20Sopenharmony_ci } 12638c2ecf20Sopenharmony_ci jdev = &rs->journal_dev.rdev; 12648c2ecf20Sopenharmony_ci md_rdev_init(jdev); 12658c2ecf20Sopenharmony_ci jdev->mddev = &rs->md; 12668c2ecf20Sopenharmony_ci jdev->bdev = rs->journal_dev.dev->bdev; 12678c2ecf20Sopenharmony_ci jdev->sectors = to_sector(i_size_read(jdev->bdev->bd_inode)); 12688c2ecf20Sopenharmony_ci if (jdev->sectors < MIN_RAID456_JOURNAL_SPACE) { 12698c2ecf20Sopenharmony_ci rs->ti->error = "No space for raid4/5/6 journal"; 12708c2ecf20Sopenharmony_ci return -ENOSPC; 12718c2ecf20Sopenharmony_ci } 12728c2ecf20Sopenharmony_ci rs->journal_dev.mode = R5C_JOURNAL_MODE_WRITE_THROUGH; 12738c2ecf20Sopenharmony_ci set_bit(Journal, &jdev->flags); 12748c2ecf20Sopenharmony_ci continue; 12758c2ecf20Sopenharmony_ci } 12768c2ecf20Sopenharmony_ci 12778c2ecf20Sopenharmony_ci /* "journal_mode <mode>" ("journal_dev" mandatory!) */ 12788c2ecf20Sopenharmony_ci if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_MODE))) { 12798c2ecf20Sopenharmony_ci int r; 12808c2ecf20Sopenharmony_ci 12818c2ecf20Sopenharmony_ci if (!test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) { 12828c2ecf20Sopenharmony_ci rs->ti->error = "raid4/5/6 'journal_mode' is invalid without 'journal_dev'"; 12838c2ecf20Sopenharmony_ci return -EINVAL; 12848c2ecf20Sopenharmony_ci } 12858c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) { 12868c2ecf20Sopenharmony_ci rs->ti->error = "Only one raid4/5/6 'journal_mode' argument allowed"; 12878c2ecf20Sopenharmony_ci return -EINVAL; 12888c2ecf20Sopenharmony_ci } 12898c2ecf20Sopenharmony_ci r = dm_raid_journal_mode_to_md(arg); 12908c2ecf20Sopenharmony_ci if (r < 0) { 12918c2ecf20Sopenharmony_ci rs->ti->error = "Invalid 'journal_mode' argument"; 12928c2ecf20Sopenharmony_ci return r; 12938c2ecf20Sopenharmony_ci } 12948c2ecf20Sopenharmony_ci rs->journal_dev.mode = r; 12958c2ecf20Sopenharmony_ci continue; 12968c2ecf20Sopenharmony_ci } 12978c2ecf20Sopenharmony_ci 12988c2ecf20Sopenharmony_ci /* 12998c2ecf20Sopenharmony_ci * Parameters with number values from here on. 13008c2ecf20Sopenharmony_ci */ 13018c2ecf20Sopenharmony_ci if (kstrtoint(arg, 10, &value) < 0) { 13028c2ecf20Sopenharmony_ci rs->ti->error = "Bad numerical argument given in raid params"; 13038c2ecf20Sopenharmony_ci return -EINVAL; 13048c2ecf20Sopenharmony_ci } 13058c2ecf20Sopenharmony_ci 13068c2ecf20Sopenharmony_ci if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD))) { 13078c2ecf20Sopenharmony_ci /* 13088c2ecf20Sopenharmony_ci * "rebuild" is being passed in by userspace to provide 13098c2ecf20Sopenharmony_ci * indexes of replaced devices and to set up additional 13108c2ecf20Sopenharmony_ci * devices on raid level takeover. 13118c2ecf20Sopenharmony_ci */ 13128c2ecf20Sopenharmony_ci if (!__within_range(value, 0, rs->raid_disks - 1)) { 13138c2ecf20Sopenharmony_ci rs->ti->error = "Invalid rebuild index given"; 13148c2ecf20Sopenharmony_ci return -EINVAL; 13158c2ecf20Sopenharmony_ci } 13168c2ecf20Sopenharmony_ci 13178c2ecf20Sopenharmony_ci if (test_and_set_bit(value, (void *) rs->rebuild_disks)) { 13188c2ecf20Sopenharmony_ci rs->ti->error = "rebuild for this index already given"; 13198c2ecf20Sopenharmony_ci return -EINVAL; 13208c2ecf20Sopenharmony_ci } 13218c2ecf20Sopenharmony_ci 13228c2ecf20Sopenharmony_ci rd = rs->dev + value; 13238c2ecf20Sopenharmony_ci clear_bit(In_sync, &rd->rdev.flags); 13248c2ecf20Sopenharmony_ci clear_bit(Faulty, &rd->rdev.flags); 13258c2ecf20Sopenharmony_ci rd->rdev.recovery_offset = 0; 13268c2ecf20Sopenharmony_ci set_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags); 13278c2ecf20Sopenharmony_ci } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_WRITE_MOSTLY))) { 13288c2ecf20Sopenharmony_ci if (!rt_is_raid1(rt)) { 13298c2ecf20Sopenharmony_ci rs->ti->error = "write_mostly option is only valid for RAID1"; 13308c2ecf20Sopenharmony_ci return -EINVAL; 13318c2ecf20Sopenharmony_ci } 13328c2ecf20Sopenharmony_ci 13338c2ecf20Sopenharmony_ci if (!__within_range(value, 0, rs->md.raid_disks - 1)) { 13348c2ecf20Sopenharmony_ci rs->ti->error = "Invalid write_mostly index given"; 13358c2ecf20Sopenharmony_ci return -EINVAL; 13368c2ecf20Sopenharmony_ci } 13378c2ecf20Sopenharmony_ci 13388c2ecf20Sopenharmony_ci write_mostly++; 13398c2ecf20Sopenharmony_ci set_bit(WriteMostly, &rs->dev[value].rdev.flags); 13408c2ecf20Sopenharmony_ci set_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags); 13418c2ecf20Sopenharmony_ci } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MAX_WRITE_BEHIND))) { 13428c2ecf20Sopenharmony_ci if (!rt_is_raid1(rt)) { 13438c2ecf20Sopenharmony_ci rs->ti->error = "max_write_behind option is only valid for RAID1"; 13448c2ecf20Sopenharmony_ci return -EINVAL; 13458c2ecf20Sopenharmony_ci } 13468c2ecf20Sopenharmony_ci 13478c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_MAX_WRITE_BEHIND, &rs->ctr_flags)) { 13488c2ecf20Sopenharmony_ci rs->ti->error = "Only one max_write_behind argument pair allowed"; 13498c2ecf20Sopenharmony_ci return -EINVAL; 13508c2ecf20Sopenharmony_ci } 13518c2ecf20Sopenharmony_ci 13528c2ecf20Sopenharmony_ci /* 13538c2ecf20Sopenharmony_ci * In device-mapper, we specify things in sectors, but 13548c2ecf20Sopenharmony_ci * MD records this value in kB 13558c2ecf20Sopenharmony_ci */ 13568c2ecf20Sopenharmony_ci if (value < 0 || value / 2 > COUNTER_MAX) { 13578c2ecf20Sopenharmony_ci rs->ti->error = "Max write-behind limit out of range"; 13588c2ecf20Sopenharmony_ci return -EINVAL; 13598c2ecf20Sopenharmony_ci } 13608c2ecf20Sopenharmony_ci 13618c2ecf20Sopenharmony_ci rs->md.bitmap_info.max_write_behind = value / 2; 13628c2ecf20Sopenharmony_ci } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP))) { 13638c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) { 13648c2ecf20Sopenharmony_ci rs->ti->error = "Only one daemon_sleep argument pair allowed"; 13658c2ecf20Sopenharmony_ci return -EINVAL; 13668c2ecf20Sopenharmony_ci } 13678c2ecf20Sopenharmony_ci if (value < 0) { 13688c2ecf20Sopenharmony_ci rs->ti->error = "daemon sleep period out of range"; 13698c2ecf20Sopenharmony_ci return -EINVAL; 13708c2ecf20Sopenharmony_ci } 13718c2ecf20Sopenharmony_ci rs->md.bitmap_info.daemon_sleep = value; 13728c2ecf20Sopenharmony_ci } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET))) { 13738c2ecf20Sopenharmony_ci /* Userspace passes new data_offset after having extended the the data image LV */ 13748c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) { 13758c2ecf20Sopenharmony_ci rs->ti->error = "Only one data_offset argument pair allowed"; 13768c2ecf20Sopenharmony_ci return -EINVAL; 13778c2ecf20Sopenharmony_ci } 13788c2ecf20Sopenharmony_ci /* Ensure sensible data offset */ 13798c2ecf20Sopenharmony_ci if (value < 0 || 13808c2ecf20Sopenharmony_ci (value && (value < MIN_FREE_RESHAPE_SPACE || value % to_sector(PAGE_SIZE)))) { 13818c2ecf20Sopenharmony_ci rs->ti->error = "Bogus data_offset value"; 13828c2ecf20Sopenharmony_ci return -EINVAL; 13838c2ecf20Sopenharmony_ci } 13848c2ecf20Sopenharmony_ci rs->data_offset = value; 13858c2ecf20Sopenharmony_ci } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_DELTA_DISKS))) { 13868c2ecf20Sopenharmony_ci /* Define the +/-# of disks to add to/remove from the given raid set */ 13878c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags)) { 13888c2ecf20Sopenharmony_ci rs->ti->error = "Only one delta_disks argument pair allowed"; 13898c2ecf20Sopenharmony_ci return -EINVAL; 13908c2ecf20Sopenharmony_ci } 13918c2ecf20Sopenharmony_ci /* Ensure MAX_RAID_DEVICES and raid type minimal_devs! */ 13928c2ecf20Sopenharmony_ci if (!__within_range(abs(value), 1, MAX_RAID_DEVICES - rt->minimal_devs)) { 13938c2ecf20Sopenharmony_ci rs->ti->error = "Too many delta_disk requested"; 13948c2ecf20Sopenharmony_ci return -EINVAL; 13958c2ecf20Sopenharmony_ci } 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_ci rs->delta_disks = value; 13988c2ecf20Sopenharmony_ci } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_STRIPE_CACHE))) { 13998c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags)) { 14008c2ecf20Sopenharmony_ci rs->ti->error = "Only one stripe_cache argument pair allowed"; 14018c2ecf20Sopenharmony_ci return -EINVAL; 14028c2ecf20Sopenharmony_ci } 14038c2ecf20Sopenharmony_ci 14048c2ecf20Sopenharmony_ci if (!rt_is_raid456(rt)) { 14058c2ecf20Sopenharmony_ci rs->ti->error = "Inappropriate argument: stripe_cache"; 14068c2ecf20Sopenharmony_ci return -EINVAL; 14078c2ecf20Sopenharmony_ci } 14088c2ecf20Sopenharmony_ci 14098c2ecf20Sopenharmony_ci if (value < 0) { 14108c2ecf20Sopenharmony_ci rs->ti->error = "Bogus stripe cache entries value"; 14118c2ecf20Sopenharmony_ci return -EINVAL; 14128c2ecf20Sopenharmony_ci } 14138c2ecf20Sopenharmony_ci rs->stripe_cache_entries = value; 14148c2ecf20Sopenharmony_ci } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE))) { 14158c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) { 14168c2ecf20Sopenharmony_ci rs->ti->error = "Only one min_recovery_rate argument pair allowed"; 14178c2ecf20Sopenharmony_ci return -EINVAL; 14188c2ecf20Sopenharmony_ci } 14198c2ecf20Sopenharmony_ci 14208c2ecf20Sopenharmony_ci if (value < 0) { 14218c2ecf20Sopenharmony_ci rs->ti->error = "min_recovery_rate out of range"; 14228c2ecf20Sopenharmony_ci return -EINVAL; 14238c2ecf20Sopenharmony_ci } 14248c2ecf20Sopenharmony_ci rs->md.sync_speed_min = value; 14258c2ecf20Sopenharmony_ci } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE))) { 14268c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) { 14278c2ecf20Sopenharmony_ci rs->ti->error = "Only one max_recovery_rate argument pair allowed"; 14288c2ecf20Sopenharmony_ci return -EINVAL; 14298c2ecf20Sopenharmony_ci } 14308c2ecf20Sopenharmony_ci 14318c2ecf20Sopenharmony_ci if (value < 0) { 14328c2ecf20Sopenharmony_ci rs->ti->error = "max_recovery_rate out of range"; 14338c2ecf20Sopenharmony_ci return -EINVAL; 14348c2ecf20Sopenharmony_ci } 14358c2ecf20Sopenharmony_ci rs->md.sync_speed_max = value; 14368c2ecf20Sopenharmony_ci } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE))) { 14378c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags)) { 14388c2ecf20Sopenharmony_ci rs->ti->error = "Only one region_size argument pair allowed"; 14398c2ecf20Sopenharmony_ci return -EINVAL; 14408c2ecf20Sopenharmony_ci } 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_ci region_size = value; 14438c2ecf20Sopenharmony_ci rs->requested_bitmap_chunk_sectors = value; 14448c2ecf20Sopenharmony_ci } else if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_COPIES))) { 14458c2ecf20Sopenharmony_ci if (test_and_set_bit(__CTR_FLAG_RAID10_COPIES, &rs->ctr_flags)) { 14468c2ecf20Sopenharmony_ci rs->ti->error = "Only one raid10_copies argument pair allowed"; 14478c2ecf20Sopenharmony_ci return -EINVAL; 14488c2ecf20Sopenharmony_ci } 14498c2ecf20Sopenharmony_ci 14508c2ecf20Sopenharmony_ci if (!__within_range(value, 2, rs->md.raid_disks)) { 14518c2ecf20Sopenharmony_ci rs->ti->error = "Bad value for 'raid10_copies'"; 14528c2ecf20Sopenharmony_ci return -EINVAL; 14538c2ecf20Sopenharmony_ci } 14548c2ecf20Sopenharmony_ci 14558c2ecf20Sopenharmony_ci raid10_copies = value; 14568c2ecf20Sopenharmony_ci } else { 14578c2ecf20Sopenharmony_ci DMERR("Unable to parse RAID parameter: %s", key); 14588c2ecf20Sopenharmony_ci rs->ti->error = "Unable to parse RAID parameter"; 14598c2ecf20Sopenharmony_ci return -EINVAL; 14608c2ecf20Sopenharmony_ci } 14618c2ecf20Sopenharmony_ci } 14628c2ecf20Sopenharmony_ci 14638c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) && 14648c2ecf20Sopenharmony_ci test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) { 14658c2ecf20Sopenharmony_ci rs->ti->error = "sync and nosync are mutually exclusive"; 14668c2ecf20Sopenharmony_ci return -EINVAL; 14678c2ecf20Sopenharmony_ci } 14688c2ecf20Sopenharmony_ci 14698c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) && 14708c2ecf20Sopenharmony_ci (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) || 14718c2ecf20Sopenharmony_ci test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))) { 14728c2ecf20Sopenharmony_ci rs->ti->error = "sync/nosync and rebuild are mutually exclusive"; 14738c2ecf20Sopenharmony_ci return -EINVAL; 14748c2ecf20Sopenharmony_ci } 14758c2ecf20Sopenharmony_ci 14768c2ecf20Sopenharmony_ci if (write_mostly >= rs->md.raid_disks) { 14778c2ecf20Sopenharmony_ci rs->ti->error = "Can't set all raid1 devices to write_mostly"; 14788c2ecf20Sopenharmony_ci return -EINVAL; 14798c2ecf20Sopenharmony_ci } 14808c2ecf20Sopenharmony_ci 14818c2ecf20Sopenharmony_ci if (rs->md.sync_speed_max && 14828c2ecf20Sopenharmony_ci rs->md.sync_speed_min > rs->md.sync_speed_max) { 14838c2ecf20Sopenharmony_ci rs->ti->error = "Bogus recovery rates"; 14848c2ecf20Sopenharmony_ci return -EINVAL; 14858c2ecf20Sopenharmony_ci } 14868c2ecf20Sopenharmony_ci 14878c2ecf20Sopenharmony_ci if (validate_region_size(rs, region_size)) 14888c2ecf20Sopenharmony_ci return -EINVAL; 14898c2ecf20Sopenharmony_ci 14908c2ecf20Sopenharmony_ci if (rs->md.chunk_sectors) 14918c2ecf20Sopenharmony_ci max_io_len = rs->md.chunk_sectors; 14928c2ecf20Sopenharmony_ci else 14938c2ecf20Sopenharmony_ci max_io_len = region_size; 14948c2ecf20Sopenharmony_ci 14958c2ecf20Sopenharmony_ci if (dm_set_target_max_io_len(rs->ti, max_io_len)) 14968c2ecf20Sopenharmony_ci return -EINVAL; 14978c2ecf20Sopenharmony_ci 14988c2ecf20Sopenharmony_ci if (rt_is_raid10(rt)) { 14998c2ecf20Sopenharmony_ci if (raid10_copies > rs->md.raid_disks) { 15008c2ecf20Sopenharmony_ci rs->ti->error = "Not enough devices to satisfy specification"; 15018c2ecf20Sopenharmony_ci return -EINVAL; 15028c2ecf20Sopenharmony_ci } 15038c2ecf20Sopenharmony_ci 15048c2ecf20Sopenharmony_ci rs->md.new_layout = raid10_format_to_md_layout(rs, raid10_format, raid10_copies); 15058c2ecf20Sopenharmony_ci if (rs->md.new_layout < 0) { 15068c2ecf20Sopenharmony_ci rs->ti->error = "Error getting raid10 format"; 15078c2ecf20Sopenharmony_ci return rs->md.new_layout; 15088c2ecf20Sopenharmony_ci } 15098c2ecf20Sopenharmony_ci 15108c2ecf20Sopenharmony_ci rt = get_raid_type_by_ll(10, rs->md.new_layout); 15118c2ecf20Sopenharmony_ci if (!rt) { 15128c2ecf20Sopenharmony_ci rs->ti->error = "Failed to recognize new raid10 layout"; 15138c2ecf20Sopenharmony_ci return -EINVAL; 15148c2ecf20Sopenharmony_ci } 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_ci if ((rt->algorithm == ALGORITHM_RAID10_DEFAULT || 15178c2ecf20Sopenharmony_ci rt->algorithm == ALGORITHM_RAID10_NEAR) && 15188c2ecf20Sopenharmony_ci test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags)) { 15198c2ecf20Sopenharmony_ci rs->ti->error = "RAID10 format 'near' and 'raid10_use_near_sets' are incompatible"; 15208c2ecf20Sopenharmony_ci return -EINVAL; 15218c2ecf20Sopenharmony_ci } 15228c2ecf20Sopenharmony_ci } 15238c2ecf20Sopenharmony_ci 15248c2ecf20Sopenharmony_ci rs->raid10_copies = raid10_copies; 15258c2ecf20Sopenharmony_ci 15268c2ecf20Sopenharmony_ci /* Assume there are no metadata devices until the drives are parsed */ 15278c2ecf20Sopenharmony_ci rs->md.persistent = 0; 15288c2ecf20Sopenharmony_ci rs->md.external = 1; 15298c2ecf20Sopenharmony_ci 15308c2ecf20Sopenharmony_ci /* Check, if any invalid ctr arguments have been passed in for the raid level */ 15318c2ecf20Sopenharmony_ci return rs_check_for_valid_flags(rs); 15328c2ecf20Sopenharmony_ci} 15338c2ecf20Sopenharmony_ci 15348c2ecf20Sopenharmony_ci/* Set raid4/5/6 cache size */ 15358c2ecf20Sopenharmony_cistatic int rs_set_raid456_stripe_cache(struct raid_set *rs) 15368c2ecf20Sopenharmony_ci{ 15378c2ecf20Sopenharmony_ci int r; 15388c2ecf20Sopenharmony_ci struct r5conf *conf; 15398c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 15408c2ecf20Sopenharmony_ci uint32_t min_stripes = max(mddev->chunk_sectors, mddev->new_chunk_sectors) / 2; 15418c2ecf20Sopenharmony_ci uint32_t nr_stripes = rs->stripe_cache_entries; 15428c2ecf20Sopenharmony_ci 15438c2ecf20Sopenharmony_ci if (!rt_is_raid456(rs->raid_type)) { 15448c2ecf20Sopenharmony_ci rs->ti->error = "Inappropriate raid level; cannot change stripe_cache size"; 15458c2ecf20Sopenharmony_ci return -EINVAL; 15468c2ecf20Sopenharmony_ci } 15478c2ecf20Sopenharmony_ci 15488c2ecf20Sopenharmony_ci if (nr_stripes < min_stripes) { 15498c2ecf20Sopenharmony_ci DMINFO("Adjusting requested %u stripe cache entries to %u to suit stripe size", 15508c2ecf20Sopenharmony_ci nr_stripes, min_stripes); 15518c2ecf20Sopenharmony_ci nr_stripes = min_stripes; 15528c2ecf20Sopenharmony_ci } 15538c2ecf20Sopenharmony_ci 15548c2ecf20Sopenharmony_ci conf = mddev->private; 15558c2ecf20Sopenharmony_ci if (!conf) { 15568c2ecf20Sopenharmony_ci rs->ti->error = "Cannot change stripe_cache size on inactive RAID set"; 15578c2ecf20Sopenharmony_ci return -EINVAL; 15588c2ecf20Sopenharmony_ci } 15598c2ecf20Sopenharmony_ci 15608c2ecf20Sopenharmony_ci /* Try setting number of stripes in raid456 stripe cache */ 15618c2ecf20Sopenharmony_ci if (conf->min_nr_stripes != nr_stripes) { 15628c2ecf20Sopenharmony_ci r = raid5_set_cache_size(mddev, nr_stripes); 15638c2ecf20Sopenharmony_ci if (r) { 15648c2ecf20Sopenharmony_ci rs->ti->error = "Failed to set raid4/5/6 stripe cache size"; 15658c2ecf20Sopenharmony_ci return r; 15668c2ecf20Sopenharmony_ci } 15678c2ecf20Sopenharmony_ci 15688c2ecf20Sopenharmony_ci DMINFO("%u stripe cache entries", nr_stripes); 15698c2ecf20Sopenharmony_ci } 15708c2ecf20Sopenharmony_ci 15718c2ecf20Sopenharmony_ci return 0; 15728c2ecf20Sopenharmony_ci} 15738c2ecf20Sopenharmony_ci 15748c2ecf20Sopenharmony_ci/* Return # of data stripes as kept in mddev as of @rs (i.e. as of superblock) */ 15758c2ecf20Sopenharmony_cistatic unsigned int mddev_data_stripes(struct raid_set *rs) 15768c2ecf20Sopenharmony_ci{ 15778c2ecf20Sopenharmony_ci return rs->md.raid_disks - rs->raid_type->parity_devs; 15788c2ecf20Sopenharmony_ci} 15798c2ecf20Sopenharmony_ci 15808c2ecf20Sopenharmony_ci/* Return # of data stripes of @rs (i.e. as of ctr) */ 15818c2ecf20Sopenharmony_cistatic unsigned int rs_data_stripes(struct raid_set *rs) 15828c2ecf20Sopenharmony_ci{ 15838c2ecf20Sopenharmony_ci return rs->raid_disks - rs->raid_type->parity_devs; 15848c2ecf20Sopenharmony_ci} 15858c2ecf20Sopenharmony_ci 15868c2ecf20Sopenharmony_ci/* 15878c2ecf20Sopenharmony_ci * Retrieve rdev->sectors from any valid raid device of @rs 15888c2ecf20Sopenharmony_ci * to allow userpace to pass in arbitray "- -" device tupples. 15898c2ecf20Sopenharmony_ci */ 15908c2ecf20Sopenharmony_cistatic sector_t __rdev_sectors(struct raid_set *rs) 15918c2ecf20Sopenharmony_ci{ 15928c2ecf20Sopenharmony_ci int i; 15938c2ecf20Sopenharmony_ci 15948c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) { 15958c2ecf20Sopenharmony_ci struct md_rdev *rdev = &rs->dev[i].rdev; 15968c2ecf20Sopenharmony_ci 15978c2ecf20Sopenharmony_ci if (!test_bit(Journal, &rdev->flags) && 15988c2ecf20Sopenharmony_ci rdev->bdev && rdev->sectors) 15998c2ecf20Sopenharmony_ci return rdev->sectors; 16008c2ecf20Sopenharmony_ci } 16018c2ecf20Sopenharmony_ci 16028c2ecf20Sopenharmony_ci return 0; 16038c2ecf20Sopenharmony_ci} 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci/* Check that calculated dev_sectors fits all component devices. */ 16068c2ecf20Sopenharmony_cistatic int _check_data_dev_sectors(struct raid_set *rs) 16078c2ecf20Sopenharmony_ci{ 16088c2ecf20Sopenharmony_ci sector_t ds = ~0; 16098c2ecf20Sopenharmony_ci struct md_rdev *rdev; 16108c2ecf20Sopenharmony_ci 16118c2ecf20Sopenharmony_ci rdev_for_each(rdev, &rs->md) 16128c2ecf20Sopenharmony_ci if (!test_bit(Journal, &rdev->flags) && rdev->bdev) { 16138c2ecf20Sopenharmony_ci ds = min(ds, to_sector(i_size_read(rdev->bdev->bd_inode))); 16148c2ecf20Sopenharmony_ci if (ds < rs->md.dev_sectors) { 16158c2ecf20Sopenharmony_ci rs->ti->error = "Component device(s) too small"; 16168c2ecf20Sopenharmony_ci return -EINVAL; 16178c2ecf20Sopenharmony_ci } 16188c2ecf20Sopenharmony_ci } 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci return 0; 16218c2ecf20Sopenharmony_ci} 16228c2ecf20Sopenharmony_ci 16238c2ecf20Sopenharmony_ci/* Calculate the sectors per device and per array used for @rs */ 16248c2ecf20Sopenharmony_cistatic int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev) 16258c2ecf20Sopenharmony_ci{ 16268c2ecf20Sopenharmony_ci int delta_disks; 16278c2ecf20Sopenharmony_ci unsigned int data_stripes; 16288c2ecf20Sopenharmony_ci sector_t array_sectors = sectors, dev_sectors = sectors; 16298c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 16308c2ecf20Sopenharmony_ci 16318c2ecf20Sopenharmony_ci if (use_mddev) { 16328c2ecf20Sopenharmony_ci delta_disks = mddev->delta_disks; 16338c2ecf20Sopenharmony_ci data_stripes = mddev_data_stripes(rs); 16348c2ecf20Sopenharmony_ci } else { 16358c2ecf20Sopenharmony_ci delta_disks = rs->delta_disks; 16368c2ecf20Sopenharmony_ci data_stripes = rs_data_stripes(rs); 16378c2ecf20Sopenharmony_ci } 16388c2ecf20Sopenharmony_ci 16398c2ecf20Sopenharmony_ci /* Special raid1 case w/o delta_disks support (yet) */ 16408c2ecf20Sopenharmony_ci if (rt_is_raid1(rs->raid_type)) 16418c2ecf20Sopenharmony_ci ; 16428c2ecf20Sopenharmony_ci else if (rt_is_raid10(rs->raid_type)) { 16438c2ecf20Sopenharmony_ci if (rs->raid10_copies < 2 || 16448c2ecf20Sopenharmony_ci delta_disks < 0) { 16458c2ecf20Sopenharmony_ci rs->ti->error = "Bogus raid10 data copies or delta disks"; 16468c2ecf20Sopenharmony_ci return -EINVAL; 16478c2ecf20Sopenharmony_ci } 16488c2ecf20Sopenharmony_ci 16498c2ecf20Sopenharmony_ci dev_sectors *= rs->raid10_copies; 16508c2ecf20Sopenharmony_ci if (sector_div(dev_sectors, data_stripes)) 16518c2ecf20Sopenharmony_ci goto bad; 16528c2ecf20Sopenharmony_ci 16538c2ecf20Sopenharmony_ci array_sectors = (data_stripes + delta_disks) * dev_sectors; 16548c2ecf20Sopenharmony_ci if (sector_div(array_sectors, rs->raid10_copies)) 16558c2ecf20Sopenharmony_ci goto bad; 16568c2ecf20Sopenharmony_ci 16578c2ecf20Sopenharmony_ci } else if (sector_div(dev_sectors, data_stripes)) 16588c2ecf20Sopenharmony_ci goto bad; 16598c2ecf20Sopenharmony_ci 16608c2ecf20Sopenharmony_ci else 16618c2ecf20Sopenharmony_ci /* Striped layouts */ 16628c2ecf20Sopenharmony_ci array_sectors = (data_stripes + delta_disks) * dev_sectors; 16638c2ecf20Sopenharmony_ci 16648c2ecf20Sopenharmony_ci mddev->array_sectors = array_sectors; 16658c2ecf20Sopenharmony_ci mddev->dev_sectors = dev_sectors; 16668c2ecf20Sopenharmony_ci rs_set_rdev_sectors(rs); 16678c2ecf20Sopenharmony_ci 16688c2ecf20Sopenharmony_ci return _check_data_dev_sectors(rs); 16698c2ecf20Sopenharmony_cibad: 16708c2ecf20Sopenharmony_ci rs->ti->error = "Target length not divisible by number of data devices"; 16718c2ecf20Sopenharmony_ci return -EINVAL; 16728c2ecf20Sopenharmony_ci} 16738c2ecf20Sopenharmony_ci 16748c2ecf20Sopenharmony_ci/* Setup recovery on @rs */ 16758c2ecf20Sopenharmony_cistatic void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) 16768c2ecf20Sopenharmony_ci{ 16778c2ecf20Sopenharmony_ci /* raid0 does not recover */ 16788c2ecf20Sopenharmony_ci if (rs_is_raid0(rs)) 16798c2ecf20Sopenharmony_ci rs->md.recovery_cp = MaxSector; 16808c2ecf20Sopenharmony_ci /* 16818c2ecf20Sopenharmony_ci * A raid6 set has to be recovered either 16828c2ecf20Sopenharmony_ci * completely or for the grown part to 16838c2ecf20Sopenharmony_ci * ensure proper parity and Q-Syndrome 16848c2ecf20Sopenharmony_ci */ 16858c2ecf20Sopenharmony_ci else if (rs_is_raid6(rs)) 16868c2ecf20Sopenharmony_ci rs->md.recovery_cp = dev_sectors; 16878c2ecf20Sopenharmony_ci /* 16888c2ecf20Sopenharmony_ci * Other raid set types may skip recovery 16898c2ecf20Sopenharmony_ci * depending on the 'nosync' flag. 16908c2ecf20Sopenharmony_ci */ 16918c2ecf20Sopenharmony_ci else 16928c2ecf20Sopenharmony_ci rs->md.recovery_cp = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) 16938c2ecf20Sopenharmony_ci ? MaxSector : dev_sectors; 16948c2ecf20Sopenharmony_ci} 16958c2ecf20Sopenharmony_ci 16968c2ecf20Sopenharmony_cistatic void do_table_event(struct work_struct *ws) 16978c2ecf20Sopenharmony_ci{ 16988c2ecf20Sopenharmony_ci struct raid_set *rs = container_of(ws, struct raid_set, md.event_work); 16998c2ecf20Sopenharmony_ci 17008c2ecf20Sopenharmony_ci smp_rmb(); /* Make sure we access most actual mddev properties */ 17018c2ecf20Sopenharmony_ci if (!rs_is_reshaping(rs)) { 17028c2ecf20Sopenharmony_ci if (rs_is_raid10(rs)) 17038c2ecf20Sopenharmony_ci rs_set_rdev_sectors(rs); 17048c2ecf20Sopenharmony_ci rs_set_capacity(rs); 17058c2ecf20Sopenharmony_ci } 17068c2ecf20Sopenharmony_ci dm_table_event(rs->ti->table); 17078c2ecf20Sopenharmony_ci} 17088c2ecf20Sopenharmony_ci 17098c2ecf20Sopenharmony_ci/* 17108c2ecf20Sopenharmony_ci * Make sure a valid takover (level switch) is being requested on @rs 17118c2ecf20Sopenharmony_ci * 17128c2ecf20Sopenharmony_ci * Conversions of raid sets from one MD personality to another 17138c2ecf20Sopenharmony_ci * have to conform to restrictions which are enforced here. 17148c2ecf20Sopenharmony_ci */ 17158c2ecf20Sopenharmony_cistatic int rs_check_takeover(struct raid_set *rs) 17168c2ecf20Sopenharmony_ci{ 17178c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 17188c2ecf20Sopenharmony_ci unsigned int near_copies; 17198c2ecf20Sopenharmony_ci 17208c2ecf20Sopenharmony_ci if (rs->md.degraded) { 17218c2ecf20Sopenharmony_ci rs->ti->error = "Can't takeover degraded raid set"; 17228c2ecf20Sopenharmony_ci return -EPERM; 17238c2ecf20Sopenharmony_ci } 17248c2ecf20Sopenharmony_ci 17258c2ecf20Sopenharmony_ci if (rs_is_reshaping(rs)) { 17268c2ecf20Sopenharmony_ci rs->ti->error = "Can't takeover reshaping raid set"; 17278c2ecf20Sopenharmony_ci return -EPERM; 17288c2ecf20Sopenharmony_ci } 17298c2ecf20Sopenharmony_ci 17308c2ecf20Sopenharmony_ci switch (mddev->level) { 17318c2ecf20Sopenharmony_ci case 0: 17328c2ecf20Sopenharmony_ci /* raid0 -> raid1/5 with one disk */ 17338c2ecf20Sopenharmony_ci if ((mddev->new_level == 1 || mddev->new_level == 5) && 17348c2ecf20Sopenharmony_ci mddev->raid_disks == 1) 17358c2ecf20Sopenharmony_ci return 0; 17368c2ecf20Sopenharmony_ci 17378c2ecf20Sopenharmony_ci /* raid0 -> raid10 */ 17388c2ecf20Sopenharmony_ci if (mddev->new_level == 10 && 17398c2ecf20Sopenharmony_ci !(rs->raid_disks % mddev->raid_disks)) 17408c2ecf20Sopenharmony_ci return 0; 17418c2ecf20Sopenharmony_ci 17428c2ecf20Sopenharmony_ci /* raid0 with multiple disks -> raid4/5/6 */ 17438c2ecf20Sopenharmony_ci if (__within_range(mddev->new_level, 4, 6) && 17448c2ecf20Sopenharmony_ci mddev->new_layout == ALGORITHM_PARITY_N && 17458c2ecf20Sopenharmony_ci mddev->raid_disks > 1) 17468c2ecf20Sopenharmony_ci return 0; 17478c2ecf20Sopenharmony_ci 17488c2ecf20Sopenharmony_ci break; 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_ci case 10: 17518c2ecf20Sopenharmony_ci /* Can't takeover raid10_offset! */ 17528c2ecf20Sopenharmony_ci if (__is_raid10_offset(mddev->layout)) 17538c2ecf20Sopenharmony_ci break; 17548c2ecf20Sopenharmony_ci 17558c2ecf20Sopenharmony_ci near_copies = __raid10_near_copies(mddev->layout); 17568c2ecf20Sopenharmony_ci 17578c2ecf20Sopenharmony_ci /* raid10* -> raid0 */ 17588c2ecf20Sopenharmony_ci if (mddev->new_level == 0) { 17598c2ecf20Sopenharmony_ci /* Can takeover raid10_near with raid disks divisable by data copies! */ 17608c2ecf20Sopenharmony_ci if (near_copies > 1 && 17618c2ecf20Sopenharmony_ci !(mddev->raid_disks % near_copies)) { 17628c2ecf20Sopenharmony_ci mddev->raid_disks /= near_copies; 17638c2ecf20Sopenharmony_ci mddev->delta_disks = mddev->raid_disks; 17648c2ecf20Sopenharmony_ci return 0; 17658c2ecf20Sopenharmony_ci } 17668c2ecf20Sopenharmony_ci 17678c2ecf20Sopenharmony_ci /* Can takeover raid10_far */ 17688c2ecf20Sopenharmony_ci if (near_copies == 1 && 17698c2ecf20Sopenharmony_ci __raid10_far_copies(mddev->layout) > 1) 17708c2ecf20Sopenharmony_ci return 0; 17718c2ecf20Sopenharmony_ci 17728c2ecf20Sopenharmony_ci break; 17738c2ecf20Sopenharmony_ci } 17748c2ecf20Sopenharmony_ci 17758c2ecf20Sopenharmony_ci /* raid10_{near,far} -> raid1 */ 17768c2ecf20Sopenharmony_ci if (mddev->new_level == 1 && 17778c2ecf20Sopenharmony_ci max(near_copies, __raid10_far_copies(mddev->layout)) == mddev->raid_disks) 17788c2ecf20Sopenharmony_ci return 0; 17798c2ecf20Sopenharmony_ci 17808c2ecf20Sopenharmony_ci /* raid10_{near,far} with 2 disks -> raid4/5 */ 17818c2ecf20Sopenharmony_ci if (__within_range(mddev->new_level, 4, 5) && 17828c2ecf20Sopenharmony_ci mddev->raid_disks == 2) 17838c2ecf20Sopenharmony_ci return 0; 17848c2ecf20Sopenharmony_ci break; 17858c2ecf20Sopenharmony_ci 17868c2ecf20Sopenharmony_ci case 1: 17878c2ecf20Sopenharmony_ci /* raid1 with 2 disks -> raid4/5 */ 17888c2ecf20Sopenharmony_ci if (__within_range(mddev->new_level, 4, 5) && 17898c2ecf20Sopenharmony_ci mddev->raid_disks == 2) { 17908c2ecf20Sopenharmony_ci mddev->degraded = 1; 17918c2ecf20Sopenharmony_ci return 0; 17928c2ecf20Sopenharmony_ci } 17938c2ecf20Sopenharmony_ci 17948c2ecf20Sopenharmony_ci /* raid1 -> raid0 */ 17958c2ecf20Sopenharmony_ci if (mddev->new_level == 0 && 17968c2ecf20Sopenharmony_ci mddev->raid_disks == 1) 17978c2ecf20Sopenharmony_ci return 0; 17988c2ecf20Sopenharmony_ci 17998c2ecf20Sopenharmony_ci /* raid1 -> raid10 */ 18008c2ecf20Sopenharmony_ci if (mddev->new_level == 10) 18018c2ecf20Sopenharmony_ci return 0; 18028c2ecf20Sopenharmony_ci break; 18038c2ecf20Sopenharmony_ci 18048c2ecf20Sopenharmony_ci case 4: 18058c2ecf20Sopenharmony_ci /* raid4 -> raid0 */ 18068c2ecf20Sopenharmony_ci if (mddev->new_level == 0) 18078c2ecf20Sopenharmony_ci return 0; 18088c2ecf20Sopenharmony_ci 18098c2ecf20Sopenharmony_ci /* raid4 -> raid1/5 with 2 disks */ 18108c2ecf20Sopenharmony_ci if ((mddev->new_level == 1 || mddev->new_level == 5) && 18118c2ecf20Sopenharmony_ci mddev->raid_disks == 2) 18128c2ecf20Sopenharmony_ci return 0; 18138c2ecf20Sopenharmony_ci 18148c2ecf20Sopenharmony_ci /* raid4 -> raid5/6 with parity N */ 18158c2ecf20Sopenharmony_ci if (__within_range(mddev->new_level, 5, 6) && 18168c2ecf20Sopenharmony_ci mddev->layout == ALGORITHM_PARITY_N) 18178c2ecf20Sopenharmony_ci return 0; 18188c2ecf20Sopenharmony_ci break; 18198c2ecf20Sopenharmony_ci 18208c2ecf20Sopenharmony_ci case 5: 18218c2ecf20Sopenharmony_ci /* raid5 with parity N -> raid0 */ 18228c2ecf20Sopenharmony_ci if (mddev->new_level == 0 && 18238c2ecf20Sopenharmony_ci mddev->layout == ALGORITHM_PARITY_N) 18248c2ecf20Sopenharmony_ci return 0; 18258c2ecf20Sopenharmony_ci 18268c2ecf20Sopenharmony_ci /* raid5 with parity N -> raid4 */ 18278c2ecf20Sopenharmony_ci if (mddev->new_level == 4 && 18288c2ecf20Sopenharmony_ci mddev->layout == ALGORITHM_PARITY_N) 18298c2ecf20Sopenharmony_ci return 0; 18308c2ecf20Sopenharmony_ci 18318c2ecf20Sopenharmony_ci /* raid5 with 2 disks -> raid1/4/10 */ 18328c2ecf20Sopenharmony_ci if ((mddev->new_level == 1 || mddev->new_level == 4 || mddev->new_level == 10) && 18338c2ecf20Sopenharmony_ci mddev->raid_disks == 2) 18348c2ecf20Sopenharmony_ci return 0; 18358c2ecf20Sopenharmony_ci 18368c2ecf20Sopenharmony_ci /* raid5_* -> raid6_*_6 with Q-Syndrome N (e.g. raid5_ra -> raid6_ra_6 */ 18378c2ecf20Sopenharmony_ci if (mddev->new_level == 6 && 18388c2ecf20Sopenharmony_ci ((mddev->layout == ALGORITHM_PARITY_N && mddev->new_layout == ALGORITHM_PARITY_N) || 18398c2ecf20Sopenharmony_ci __within_range(mddev->new_layout, ALGORITHM_LEFT_ASYMMETRIC_6, ALGORITHM_RIGHT_SYMMETRIC_6))) 18408c2ecf20Sopenharmony_ci return 0; 18418c2ecf20Sopenharmony_ci break; 18428c2ecf20Sopenharmony_ci 18438c2ecf20Sopenharmony_ci case 6: 18448c2ecf20Sopenharmony_ci /* raid6 with parity N -> raid0 */ 18458c2ecf20Sopenharmony_ci if (mddev->new_level == 0 && 18468c2ecf20Sopenharmony_ci mddev->layout == ALGORITHM_PARITY_N) 18478c2ecf20Sopenharmony_ci return 0; 18488c2ecf20Sopenharmony_ci 18498c2ecf20Sopenharmony_ci /* raid6 with parity N -> raid4 */ 18508c2ecf20Sopenharmony_ci if (mddev->new_level == 4 && 18518c2ecf20Sopenharmony_ci mddev->layout == ALGORITHM_PARITY_N) 18528c2ecf20Sopenharmony_ci return 0; 18538c2ecf20Sopenharmony_ci 18548c2ecf20Sopenharmony_ci /* raid6_*_n with Q-Syndrome N -> raid5_* */ 18558c2ecf20Sopenharmony_ci if (mddev->new_level == 5 && 18568c2ecf20Sopenharmony_ci ((mddev->layout == ALGORITHM_PARITY_N && mddev->new_layout == ALGORITHM_PARITY_N) || 18578c2ecf20Sopenharmony_ci __within_range(mddev->new_layout, ALGORITHM_LEFT_ASYMMETRIC, ALGORITHM_RIGHT_SYMMETRIC))) 18588c2ecf20Sopenharmony_ci return 0; 18598c2ecf20Sopenharmony_ci 18608c2ecf20Sopenharmony_ci default: 18618c2ecf20Sopenharmony_ci break; 18628c2ecf20Sopenharmony_ci } 18638c2ecf20Sopenharmony_ci 18648c2ecf20Sopenharmony_ci rs->ti->error = "takeover not possible"; 18658c2ecf20Sopenharmony_ci return -EINVAL; 18668c2ecf20Sopenharmony_ci} 18678c2ecf20Sopenharmony_ci 18688c2ecf20Sopenharmony_ci/* True if @rs requested to be taken over */ 18698c2ecf20Sopenharmony_cistatic bool rs_takeover_requested(struct raid_set *rs) 18708c2ecf20Sopenharmony_ci{ 18718c2ecf20Sopenharmony_ci return rs->md.new_level != rs->md.level; 18728c2ecf20Sopenharmony_ci} 18738c2ecf20Sopenharmony_ci 18748c2ecf20Sopenharmony_ci/* True if layout is set to reshape. */ 18758c2ecf20Sopenharmony_cistatic bool rs_is_layout_change(struct raid_set *rs, bool use_mddev) 18768c2ecf20Sopenharmony_ci{ 18778c2ecf20Sopenharmony_ci return (use_mddev ? rs->md.delta_disks : rs->delta_disks) || 18788c2ecf20Sopenharmony_ci rs->md.new_layout != rs->md.layout || 18798c2ecf20Sopenharmony_ci rs->md.new_chunk_sectors != rs->md.chunk_sectors; 18808c2ecf20Sopenharmony_ci} 18818c2ecf20Sopenharmony_ci 18828c2ecf20Sopenharmony_ci/* True if @rs is requested to reshape by ctr */ 18838c2ecf20Sopenharmony_cistatic bool rs_reshape_requested(struct raid_set *rs) 18848c2ecf20Sopenharmony_ci{ 18858c2ecf20Sopenharmony_ci bool change; 18868c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 18878c2ecf20Sopenharmony_ci 18888c2ecf20Sopenharmony_ci if (rs_takeover_requested(rs)) 18898c2ecf20Sopenharmony_ci return false; 18908c2ecf20Sopenharmony_ci 18918c2ecf20Sopenharmony_ci if (rs_is_raid0(rs)) 18928c2ecf20Sopenharmony_ci return false; 18938c2ecf20Sopenharmony_ci 18948c2ecf20Sopenharmony_ci change = rs_is_layout_change(rs, false); 18958c2ecf20Sopenharmony_ci 18968c2ecf20Sopenharmony_ci /* Historical case to support raid1 reshape without delta disks */ 18978c2ecf20Sopenharmony_ci if (rs_is_raid1(rs)) { 18988c2ecf20Sopenharmony_ci if (rs->delta_disks) 18998c2ecf20Sopenharmony_ci return !!rs->delta_disks; 19008c2ecf20Sopenharmony_ci 19018c2ecf20Sopenharmony_ci return !change && 19028c2ecf20Sopenharmony_ci mddev->raid_disks != rs->raid_disks; 19038c2ecf20Sopenharmony_ci } 19048c2ecf20Sopenharmony_ci 19058c2ecf20Sopenharmony_ci if (rs_is_raid10(rs)) 19068c2ecf20Sopenharmony_ci return change && 19078c2ecf20Sopenharmony_ci !__is_raid10_far(mddev->new_layout) && 19088c2ecf20Sopenharmony_ci rs->delta_disks >= 0; 19098c2ecf20Sopenharmony_ci 19108c2ecf20Sopenharmony_ci return change; 19118c2ecf20Sopenharmony_ci} 19128c2ecf20Sopenharmony_ci 19138c2ecf20Sopenharmony_ci/* Features */ 19148c2ecf20Sopenharmony_ci#define FEATURE_FLAG_SUPPORTS_V190 0x1 /* Supports extended superblock */ 19158c2ecf20Sopenharmony_ci 19168c2ecf20Sopenharmony_ci/* State flags for sb->flags */ 19178c2ecf20Sopenharmony_ci#define SB_FLAG_RESHAPE_ACTIVE 0x1 19188c2ecf20Sopenharmony_ci#define SB_FLAG_RESHAPE_BACKWARDS 0x2 19198c2ecf20Sopenharmony_ci 19208c2ecf20Sopenharmony_ci/* 19218c2ecf20Sopenharmony_ci * This structure is never routinely used by userspace, unlike md superblocks. 19228c2ecf20Sopenharmony_ci * Devices with this superblock should only ever be accessed via device-mapper. 19238c2ecf20Sopenharmony_ci */ 19248c2ecf20Sopenharmony_ci#define DM_RAID_MAGIC 0x64526D44 19258c2ecf20Sopenharmony_cistruct dm_raid_superblock { 19268c2ecf20Sopenharmony_ci __le32 magic; /* "DmRd" */ 19278c2ecf20Sopenharmony_ci __le32 compat_features; /* Used to indicate compatible features (like 1.9.0 ondisk metadata extension) */ 19288c2ecf20Sopenharmony_ci 19298c2ecf20Sopenharmony_ci __le32 num_devices; /* Number of devices in this raid set. (Max 64) */ 19308c2ecf20Sopenharmony_ci __le32 array_position; /* The position of this drive in the raid set */ 19318c2ecf20Sopenharmony_ci 19328c2ecf20Sopenharmony_ci __le64 events; /* Incremented by md when superblock updated */ 19338c2ecf20Sopenharmony_ci __le64 failed_devices; /* Pre 1.9.0 part of bit field of devices to */ 19348c2ecf20Sopenharmony_ci /* indicate failures (see extension below) */ 19358c2ecf20Sopenharmony_ci 19368c2ecf20Sopenharmony_ci /* 19378c2ecf20Sopenharmony_ci * This offset tracks the progress of the repair or replacement of 19388c2ecf20Sopenharmony_ci * an individual drive. 19398c2ecf20Sopenharmony_ci */ 19408c2ecf20Sopenharmony_ci __le64 disk_recovery_offset; 19418c2ecf20Sopenharmony_ci 19428c2ecf20Sopenharmony_ci /* 19438c2ecf20Sopenharmony_ci * This offset tracks the progress of the initial raid set 19448c2ecf20Sopenharmony_ci * synchronisation/parity calculation. 19458c2ecf20Sopenharmony_ci */ 19468c2ecf20Sopenharmony_ci __le64 array_resync_offset; 19478c2ecf20Sopenharmony_ci 19488c2ecf20Sopenharmony_ci /* 19498c2ecf20Sopenharmony_ci * raid characteristics 19508c2ecf20Sopenharmony_ci */ 19518c2ecf20Sopenharmony_ci __le32 level; 19528c2ecf20Sopenharmony_ci __le32 layout; 19538c2ecf20Sopenharmony_ci __le32 stripe_sectors; 19548c2ecf20Sopenharmony_ci 19558c2ecf20Sopenharmony_ci /******************************************************************** 19568c2ecf20Sopenharmony_ci * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!! 19578c2ecf20Sopenharmony_ci * 19588c2ecf20Sopenharmony_ci * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist 19598c2ecf20Sopenharmony_ci */ 19608c2ecf20Sopenharmony_ci 19618c2ecf20Sopenharmony_ci __le32 flags; /* Flags defining array states for reshaping */ 19628c2ecf20Sopenharmony_ci 19638c2ecf20Sopenharmony_ci /* 19648c2ecf20Sopenharmony_ci * This offset tracks the progress of a raid 19658c2ecf20Sopenharmony_ci * set reshape in order to be able to restart it 19668c2ecf20Sopenharmony_ci */ 19678c2ecf20Sopenharmony_ci __le64 reshape_position; 19688c2ecf20Sopenharmony_ci 19698c2ecf20Sopenharmony_ci /* 19708c2ecf20Sopenharmony_ci * These define the properties of the array in case of an interrupted reshape 19718c2ecf20Sopenharmony_ci */ 19728c2ecf20Sopenharmony_ci __le32 new_level; 19738c2ecf20Sopenharmony_ci __le32 new_layout; 19748c2ecf20Sopenharmony_ci __le32 new_stripe_sectors; 19758c2ecf20Sopenharmony_ci __le32 delta_disks; 19768c2ecf20Sopenharmony_ci 19778c2ecf20Sopenharmony_ci __le64 array_sectors; /* Array size in sectors */ 19788c2ecf20Sopenharmony_ci 19798c2ecf20Sopenharmony_ci /* 19808c2ecf20Sopenharmony_ci * Sector offsets to data on devices (reshaping). 19818c2ecf20Sopenharmony_ci * Needed to support out of place reshaping, thus 19828c2ecf20Sopenharmony_ci * not writing over any stripes whilst converting 19838c2ecf20Sopenharmony_ci * them from old to new layout 19848c2ecf20Sopenharmony_ci */ 19858c2ecf20Sopenharmony_ci __le64 data_offset; 19868c2ecf20Sopenharmony_ci __le64 new_data_offset; 19878c2ecf20Sopenharmony_ci 19888c2ecf20Sopenharmony_ci __le64 sectors; /* Used device size in sectors */ 19898c2ecf20Sopenharmony_ci 19908c2ecf20Sopenharmony_ci /* 19918c2ecf20Sopenharmony_ci * Additonal Bit field of devices indicating failures to support 19928c2ecf20Sopenharmony_ci * up to 256 devices with the 1.9.0 on-disk metadata format 19938c2ecf20Sopenharmony_ci */ 19948c2ecf20Sopenharmony_ci __le64 extended_failed_devices[DISKS_ARRAY_ELEMS - 1]; 19958c2ecf20Sopenharmony_ci 19968c2ecf20Sopenharmony_ci __le32 incompat_features; /* Used to indicate any incompatible features */ 19978c2ecf20Sopenharmony_ci 19988c2ecf20Sopenharmony_ci /* Always set rest up to logical block size to 0 when writing (see get_metadata_device() below). */ 19998c2ecf20Sopenharmony_ci} __packed; 20008c2ecf20Sopenharmony_ci 20018c2ecf20Sopenharmony_ci/* 20028c2ecf20Sopenharmony_ci * Check for reshape constraints on raid set @rs: 20038c2ecf20Sopenharmony_ci * 20048c2ecf20Sopenharmony_ci * - reshape function non-existent 20058c2ecf20Sopenharmony_ci * - degraded set 20068c2ecf20Sopenharmony_ci * - ongoing recovery 20078c2ecf20Sopenharmony_ci * - ongoing reshape 20088c2ecf20Sopenharmony_ci * 20098c2ecf20Sopenharmony_ci * Returns 0 if none or -EPERM if given constraint 20108c2ecf20Sopenharmony_ci * and error message reference in @errmsg 20118c2ecf20Sopenharmony_ci */ 20128c2ecf20Sopenharmony_cistatic int rs_check_reshape(struct raid_set *rs) 20138c2ecf20Sopenharmony_ci{ 20148c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 20158c2ecf20Sopenharmony_ci 20168c2ecf20Sopenharmony_ci if (!mddev->pers || !mddev->pers->check_reshape) 20178c2ecf20Sopenharmony_ci rs->ti->error = "Reshape not supported"; 20188c2ecf20Sopenharmony_ci else if (mddev->degraded) 20198c2ecf20Sopenharmony_ci rs->ti->error = "Can't reshape degraded raid set"; 20208c2ecf20Sopenharmony_ci else if (rs_is_recovering(rs)) 20218c2ecf20Sopenharmony_ci rs->ti->error = "Convert request on recovering raid set prohibited"; 20228c2ecf20Sopenharmony_ci else if (rs_is_reshaping(rs)) 20238c2ecf20Sopenharmony_ci rs->ti->error = "raid set already reshaping!"; 20248c2ecf20Sopenharmony_ci else if (!(rs_is_raid1(rs) || rs_is_raid10(rs) || rs_is_raid456(rs))) 20258c2ecf20Sopenharmony_ci rs->ti->error = "Reshaping only supported for raid1/4/5/6/10"; 20268c2ecf20Sopenharmony_ci else 20278c2ecf20Sopenharmony_ci return 0; 20288c2ecf20Sopenharmony_ci 20298c2ecf20Sopenharmony_ci return -EPERM; 20308c2ecf20Sopenharmony_ci} 20318c2ecf20Sopenharmony_ci 20328c2ecf20Sopenharmony_cistatic int read_disk_sb(struct md_rdev *rdev, int size, bool force_reload) 20338c2ecf20Sopenharmony_ci{ 20348c2ecf20Sopenharmony_ci BUG_ON(!rdev->sb_page); 20358c2ecf20Sopenharmony_ci 20368c2ecf20Sopenharmony_ci if (rdev->sb_loaded && !force_reload) 20378c2ecf20Sopenharmony_ci return 0; 20388c2ecf20Sopenharmony_ci 20398c2ecf20Sopenharmony_ci rdev->sb_loaded = 0; 20408c2ecf20Sopenharmony_ci 20418c2ecf20Sopenharmony_ci if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true)) { 20428c2ecf20Sopenharmony_ci DMERR("Failed to read superblock of device at position %d", 20438c2ecf20Sopenharmony_ci rdev->raid_disk); 20448c2ecf20Sopenharmony_ci md_error(rdev->mddev, rdev); 20458c2ecf20Sopenharmony_ci set_bit(Faulty, &rdev->flags); 20468c2ecf20Sopenharmony_ci return -EIO; 20478c2ecf20Sopenharmony_ci } 20488c2ecf20Sopenharmony_ci 20498c2ecf20Sopenharmony_ci rdev->sb_loaded = 1; 20508c2ecf20Sopenharmony_ci 20518c2ecf20Sopenharmony_ci return 0; 20528c2ecf20Sopenharmony_ci} 20538c2ecf20Sopenharmony_ci 20548c2ecf20Sopenharmony_cistatic void sb_retrieve_failed_devices(struct dm_raid_superblock *sb, uint64_t *failed_devices) 20558c2ecf20Sopenharmony_ci{ 20568c2ecf20Sopenharmony_ci failed_devices[0] = le64_to_cpu(sb->failed_devices); 20578c2ecf20Sopenharmony_ci memset(failed_devices + 1, 0, sizeof(sb->extended_failed_devices)); 20588c2ecf20Sopenharmony_ci 20598c2ecf20Sopenharmony_ci if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) { 20608c2ecf20Sopenharmony_ci int i = ARRAY_SIZE(sb->extended_failed_devices); 20618c2ecf20Sopenharmony_ci 20628c2ecf20Sopenharmony_ci while (i--) 20638c2ecf20Sopenharmony_ci failed_devices[i+1] = le64_to_cpu(sb->extended_failed_devices[i]); 20648c2ecf20Sopenharmony_ci } 20658c2ecf20Sopenharmony_ci} 20668c2ecf20Sopenharmony_ci 20678c2ecf20Sopenharmony_cistatic void sb_update_failed_devices(struct dm_raid_superblock *sb, uint64_t *failed_devices) 20688c2ecf20Sopenharmony_ci{ 20698c2ecf20Sopenharmony_ci int i = ARRAY_SIZE(sb->extended_failed_devices); 20708c2ecf20Sopenharmony_ci 20718c2ecf20Sopenharmony_ci sb->failed_devices = cpu_to_le64(failed_devices[0]); 20728c2ecf20Sopenharmony_ci while (i--) 20738c2ecf20Sopenharmony_ci sb->extended_failed_devices[i] = cpu_to_le64(failed_devices[i+1]); 20748c2ecf20Sopenharmony_ci} 20758c2ecf20Sopenharmony_ci 20768c2ecf20Sopenharmony_ci/* 20778c2ecf20Sopenharmony_ci * Synchronize the superblock members with the raid set properties 20788c2ecf20Sopenharmony_ci * 20798c2ecf20Sopenharmony_ci * All superblock data is little endian. 20808c2ecf20Sopenharmony_ci */ 20818c2ecf20Sopenharmony_cistatic void super_sync(struct mddev *mddev, struct md_rdev *rdev) 20828c2ecf20Sopenharmony_ci{ 20838c2ecf20Sopenharmony_ci bool update_failed_devices = false; 20848c2ecf20Sopenharmony_ci unsigned int i; 20858c2ecf20Sopenharmony_ci uint64_t failed_devices[DISKS_ARRAY_ELEMS]; 20868c2ecf20Sopenharmony_ci struct dm_raid_superblock *sb; 20878c2ecf20Sopenharmony_ci struct raid_set *rs = container_of(mddev, struct raid_set, md); 20888c2ecf20Sopenharmony_ci 20898c2ecf20Sopenharmony_ci /* No metadata device, no superblock */ 20908c2ecf20Sopenharmony_ci if (!rdev->meta_bdev) 20918c2ecf20Sopenharmony_ci return; 20928c2ecf20Sopenharmony_ci 20938c2ecf20Sopenharmony_ci BUG_ON(!rdev->sb_page); 20948c2ecf20Sopenharmony_ci 20958c2ecf20Sopenharmony_ci sb = page_address(rdev->sb_page); 20968c2ecf20Sopenharmony_ci 20978c2ecf20Sopenharmony_ci sb_retrieve_failed_devices(sb, failed_devices); 20988c2ecf20Sopenharmony_ci 20998c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) 21008c2ecf20Sopenharmony_ci if (!rs->dev[i].data_dev || test_bit(Faulty, &rs->dev[i].rdev.flags)) { 21018c2ecf20Sopenharmony_ci update_failed_devices = true; 21028c2ecf20Sopenharmony_ci set_bit(i, (void *) failed_devices); 21038c2ecf20Sopenharmony_ci } 21048c2ecf20Sopenharmony_ci 21058c2ecf20Sopenharmony_ci if (update_failed_devices) 21068c2ecf20Sopenharmony_ci sb_update_failed_devices(sb, failed_devices); 21078c2ecf20Sopenharmony_ci 21088c2ecf20Sopenharmony_ci sb->magic = cpu_to_le32(DM_RAID_MAGIC); 21098c2ecf20Sopenharmony_ci sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190); 21108c2ecf20Sopenharmony_ci 21118c2ecf20Sopenharmony_ci sb->num_devices = cpu_to_le32(mddev->raid_disks); 21128c2ecf20Sopenharmony_ci sb->array_position = cpu_to_le32(rdev->raid_disk); 21138c2ecf20Sopenharmony_ci 21148c2ecf20Sopenharmony_ci sb->events = cpu_to_le64(mddev->events); 21158c2ecf20Sopenharmony_ci 21168c2ecf20Sopenharmony_ci sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset); 21178c2ecf20Sopenharmony_ci sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp); 21188c2ecf20Sopenharmony_ci 21198c2ecf20Sopenharmony_ci sb->level = cpu_to_le32(mddev->level); 21208c2ecf20Sopenharmony_ci sb->layout = cpu_to_le32(mddev->layout); 21218c2ecf20Sopenharmony_ci sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors); 21228c2ecf20Sopenharmony_ci 21238c2ecf20Sopenharmony_ci /******************************************************************** 21248c2ecf20Sopenharmony_ci * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!! 21258c2ecf20Sopenharmony_ci * 21268c2ecf20Sopenharmony_ci * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist 21278c2ecf20Sopenharmony_ci */ 21288c2ecf20Sopenharmony_ci sb->new_level = cpu_to_le32(mddev->new_level); 21298c2ecf20Sopenharmony_ci sb->new_layout = cpu_to_le32(mddev->new_layout); 21308c2ecf20Sopenharmony_ci sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors); 21318c2ecf20Sopenharmony_ci 21328c2ecf20Sopenharmony_ci sb->delta_disks = cpu_to_le32(mddev->delta_disks); 21338c2ecf20Sopenharmony_ci 21348c2ecf20Sopenharmony_ci smp_rmb(); /* Make sure we access most recent reshape position */ 21358c2ecf20Sopenharmony_ci sb->reshape_position = cpu_to_le64(mddev->reshape_position); 21368c2ecf20Sopenharmony_ci if (le64_to_cpu(sb->reshape_position) != MaxSector) { 21378c2ecf20Sopenharmony_ci /* Flag ongoing reshape */ 21388c2ecf20Sopenharmony_ci sb->flags |= cpu_to_le32(SB_FLAG_RESHAPE_ACTIVE); 21398c2ecf20Sopenharmony_ci 21408c2ecf20Sopenharmony_ci if (mddev->delta_disks < 0 || mddev->reshape_backwards) 21418c2ecf20Sopenharmony_ci sb->flags |= cpu_to_le32(SB_FLAG_RESHAPE_BACKWARDS); 21428c2ecf20Sopenharmony_ci } else { 21438c2ecf20Sopenharmony_ci /* Clear reshape flags */ 21448c2ecf20Sopenharmony_ci sb->flags &= ~(cpu_to_le32(SB_FLAG_RESHAPE_ACTIVE|SB_FLAG_RESHAPE_BACKWARDS)); 21458c2ecf20Sopenharmony_ci } 21468c2ecf20Sopenharmony_ci 21478c2ecf20Sopenharmony_ci sb->array_sectors = cpu_to_le64(mddev->array_sectors); 21488c2ecf20Sopenharmony_ci sb->data_offset = cpu_to_le64(rdev->data_offset); 21498c2ecf20Sopenharmony_ci sb->new_data_offset = cpu_to_le64(rdev->new_data_offset); 21508c2ecf20Sopenharmony_ci sb->sectors = cpu_to_le64(rdev->sectors); 21518c2ecf20Sopenharmony_ci sb->incompat_features = cpu_to_le32(0); 21528c2ecf20Sopenharmony_ci 21538c2ecf20Sopenharmony_ci /* Zero out the rest of the payload after the size of the superblock */ 21548c2ecf20Sopenharmony_ci memset(sb + 1, 0, rdev->sb_size - sizeof(*sb)); 21558c2ecf20Sopenharmony_ci} 21568c2ecf20Sopenharmony_ci 21578c2ecf20Sopenharmony_ci/* 21588c2ecf20Sopenharmony_ci * super_load 21598c2ecf20Sopenharmony_ci * 21608c2ecf20Sopenharmony_ci * This function creates a superblock if one is not found on the device 21618c2ecf20Sopenharmony_ci * and will decide which superblock to use if there's a choice. 21628c2ecf20Sopenharmony_ci * 21638c2ecf20Sopenharmony_ci * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise 21648c2ecf20Sopenharmony_ci */ 21658c2ecf20Sopenharmony_cistatic int super_load(struct md_rdev *rdev, struct md_rdev *refdev) 21668c2ecf20Sopenharmony_ci{ 21678c2ecf20Sopenharmony_ci int r; 21688c2ecf20Sopenharmony_ci struct dm_raid_superblock *sb; 21698c2ecf20Sopenharmony_ci struct dm_raid_superblock *refsb; 21708c2ecf20Sopenharmony_ci uint64_t events_sb, events_refsb; 21718c2ecf20Sopenharmony_ci 21728c2ecf20Sopenharmony_ci r = read_disk_sb(rdev, rdev->sb_size, false); 21738c2ecf20Sopenharmony_ci if (r) 21748c2ecf20Sopenharmony_ci return r; 21758c2ecf20Sopenharmony_ci 21768c2ecf20Sopenharmony_ci sb = page_address(rdev->sb_page); 21778c2ecf20Sopenharmony_ci 21788c2ecf20Sopenharmony_ci /* 21798c2ecf20Sopenharmony_ci * Two cases that we want to write new superblocks and rebuild: 21808c2ecf20Sopenharmony_ci * 1) New device (no matching magic number) 21818c2ecf20Sopenharmony_ci * 2) Device specified for rebuild (!In_sync w/ offset == 0) 21828c2ecf20Sopenharmony_ci */ 21838c2ecf20Sopenharmony_ci if ((sb->magic != cpu_to_le32(DM_RAID_MAGIC)) || 21848c2ecf20Sopenharmony_ci (!test_bit(In_sync, &rdev->flags) && !rdev->recovery_offset)) { 21858c2ecf20Sopenharmony_ci super_sync(rdev->mddev, rdev); 21868c2ecf20Sopenharmony_ci 21878c2ecf20Sopenharmony_ci set_bit(FirstUse, &rdev->flags); 21888c2ecf20Sopenharmony_ci sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190); 21898c2ecf20Sopenharmony_ci 21908c2ecf20Sopenharmony_ci /* Force writing of superblocks to disk */ 21918c2ecf20Sopenharmony_ci set_bit(MD_SB_CHANGE_DEVS, &rdev->mddev->sb_flags); 21928c2ecf20Sopenharmony_ci 21938c2ecf20Sopenharmony_ci /* Any superblock is better than none, choose that if given */ 21948c2ecf20Sopenharmony_ci return refdev ? 0 : 1; 21958c2ecf20Sopenharmony_ci } 21968c2ecf20Sopenharmony_ci 21978c2ecf20Sopenharmony_ci if (!refdev) 21988c2ecf20Sopenharmony_ci return 1; 21998c2ecf20Sopenharmony_ci 22008c2ecf20Sopenharmony_ci events_sb = le64_to_cpu(sb->events); 22018c2ecf20Sopenharmony_ci 22028c2ecf20Sopenharmony_ci refsb = page_address(refdev->sb_page); 22038c2ecf20Sopenharmony_ci events_refsb = le64_to_cpu(refsb->events); 22048c2ecf20Sopenharmony_ci 22058c2ecf20Sopenharmony_ci return (events_sb > events_refsb) ? 1 : 0; 22068c2ecf20Sopenharmony_ci} 22078c2ecf20Sopenharmony_ci 22088c2ecf20Sopenharmony_cistatic int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) 22098c2ecf20Sopenharmony_ci{ 22108c2ecf20Sopenharmony_ci int role; 22118c2ecf20Sopenharmony_ci unsigned int d; 22128c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 22138c2ecf20Sopenharmony_ci uint64_t events_sb; 22148c2ecf20Sopenharmony_ci uint64_t failed_devices[DISKS_ARRAY_ELEMS]; 22158c2ecf20Sopenharmony_ci struct dm_raid_superblock *sb; 22168c2ecf20Sopenharmony_ci uint32_t new_devs = 0, rebuild_and_new = 0, rebuilds = 0; 22178c2ecf20Sopenharmony_ci struct md_rdev *r; 22188c2ecf20Sopenharmony_ci struct dm_raid_superblock *sb2; 22198c2ecf20Sopenharmony_ci 22208c2ecf20Sopenharmony_ci sb = page_address(rdev->sb_page); 22218c2ecf20Sopenharmony_ci events_sb = le64_to_cpu(sb->events); 22228c2ecf20Sopenharmony_ci 22238c2ecf20Sopenharmony_ci /* 22248c2ecf20Sopenharmony_ci * Initialise to 1 if this is a new superblock. 22258c2ecf20Sopenharmony_ci */ 22268c2ecf20Sopenharmony_ci mddev->events = events_sb ? : 1; 22278c2ecf20Sopenharmony_ci 22288c2ecf20Sopenharmony_ci mddev->reshape_position = MaxSector; 22298c2ecf20Sopenharmony_ci 22308c2ecf20Sopenharmony_ci mddev->raid_disks = le32_to_cpu(sb->num_devices); 22318c2ecf20Sopenharmony_ci mddev->level = le32_to_cpu(sb->level); 22328c2ecf20Sopenharmony_ci mddev->layout = le32_to_cpu(sb->layout); 22338c2ecf20Sopenharmony_ci mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors); 22348c2ecf20Sopenharmony_ci 22358c2ecf20Sopenharmony_ci /* 22368c2ecf20Sopenharmony_ci * Reshaping is supported, e.g. reshape_position is valid 22378c2ecf20Sopenharmony_ci * in superblock and superblock content is authoritative. 22388c2ecf20Sopenharmony_ci */ 22398c2ecf20Sopenharmony_ci if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) { 22408c2ecf20Sopenharmony_ci /* Superblock is authoritative wrt given raid set layout! */ 22418c2ecf20Sopenharmony_ci mddev->new_level = le32_to_cpu(sb->new_level); 22428c2ecf20Sopenharmony_ci mddev->new_layout = le32_to_cpu(sb->new_layout); 22438c2ecf20Sopenharmony_ci mddev->new_chunk_sectors = le32_to_cpu(sb->new_stripe_sectors); 22448c2ecf20Sopenharmony_ci mddev->delta_disks = le32_to_cpu(sb->delta_disks); 22458c2ecf20Sopenharmony_ci mddev->array_sectors = le64_to_cpu(sb->array_sectors); 22468c2ecf20Sopenharmony_ci 22478c2ecf20Sopenharmony_ci /* raid was reshaping and got interrupted */ 22488c2ecf20Sopenharmony_ci if (le32_to_cpu(sb->flags) & SB_FLAG_RESHAPE_ACTIVE) { 22498c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags)) { 22508c2ecf20Sopenharmony_ci DMERR("Reshape requested but raid set is still reshaping"); 22518c2ecf20Sopenharmony_ci return -EINVAL; 22528c2ecf20Sopenharmony_ci } 22538c2ecf20Sopenharmony_ci 22548c2ecf20Sopenharmony_ci if (mddev->delta_disks < 0 || 22558c2ecf20Sopenharmony_ci (!mddev->delta_disks && (le32_to_cpu(sb->flags) & SB_FLAG_RESHAPE_BACKWARDS))) 22568c2ecf20Sopenharmony_ci mddev->reshape_backwards = 1; 22578c2ecf20Sopenharmony_ci else 22588c2ecf20Sopenharmony_ci mddev->reshape_backwards = 0; 22598c2ecf20Sopenharmony_ci 22608c2ecf20Sopenharmony_ci mddev->reshape_position = le64_to_cpu(sb->reshape_position); 22618c2ecf20Sopenharmony_ci rs->raid_type = get_raid_type_by_ll(mddev->level, mddev->layout); 22628c2ecf20Sopenharmony_ci } 22638c2ecf20Sopenharmony_ci 22648c2ecf20Sopenharmony_ci } else { 22658c2ecf20Sopenharmony_ci /* 22668c2ecf20Sopenharmony_ci * No takeover/reshaping, because we don't have the extended v1.9.0 metadata 22678c2ecf20Sopenharmony_ci */ 22688c2ecf20Sopenharmony_ci struct raid_type *rt_cur = get_raid_type_by_ll(mddev->level, mddev->layout); 22698c2ecf20Sopenharmony_ci struct raid_type *rt_new = get_raid_type_by_ll(mddev->new_level, mddev->new_layout); 22708c2ecf20Sopenharmony_ci 22718c2ecf20Sopenharmony_ci if (rs_takeover_requested(rs)) { 22728c2ecf20Sopenharmony_ci if (rt_cur && rt_new) 22738c2ecf20Sopenharmony_ci DMERR("Takeover raid sets from %s to %s not yet supported by metadata. (raid level change)", 22748c2ecf20Sopenharmony_ci rt_cur->name, rt_new->name); 22758c2ecf20Sopenharmony_ci else 22768c2ecf20Sopenharmony_ci DMERR("Takeover raid sets not yet supported by metadata. (raid level change)"); 22778c2ecf20Sopenharmony_ci return -EINVAL; 22788c2ecf20Sopenharmony_ci } else if (rs_reshape_requested(rs)) { 22798c2ecf20Sopenharmony_ci DMERR("Reshaping raid sets not yet supported by metadata. (raid layout change keeping level)"); 22808c2ecf20Sopenharmony_ci if (mddev->layout != mddev->new_layout) { 22818c2ecf20Sopenharmony_ci if (rt_cur && rt_new) 22828c2ecf20Sopenharmony_ci DMERR(" current layout %s vs new layout %s", 22838c2ecf20Sopenharmony_ci rt_cur->name, rt_new->name); 22848c2ecf20Sopenharmony_ci else 22858c2ecf20Sopenharmony_ci DMERR(" current layout 0x%X vs new layout 0x%X", 22868c2ecf20Sopenharmony_ci le32_to_cpu(sb->layout), mddev->new_layout); 22878c2ecf20Sopenharmony_ci } 22888c2ecf20Sopenharmony_ci if (mddev->chunk_sectors != mddev->new_chunk_sectors) 22898c2ecf20Sopenharmony_ci DMERR(" current stripe sectors %u vs new stripe sectors %u", 22908c2ecf20Sopenharmony_ci mddev->chunk_sectors, mddev->new_chunk_sectors); 22918c2ecf20Sopenharmony_ci if (rs->delta_disks) 22928c2ecf20Sopenharmony_ci DMERR(" current %u disks vs new %u disks", 22938c2ecf20Sopenharmony_ci mddev->raid_disks, mddev->raid_disks + rs->delta_disks); 22948c2ecf20Sopenharmony_ci if (rs_is_raid10(rs)) { 22958c2ecf20Sopenharmony_ci DMERR(" Old layout: %s w/ %u copies", 22968c2ecf20Sopenharmony_ci raid10_md_layout_to_format(mddev->layout), 22978c2ecf20Sopenharmony_ci raid10_md_layout_to_copies(mddev->layout)); 22988c2ecf20Sopenharmony_ci DMERR(" New layout: %s w/ %u copies", 22998c2ecf20Sopenharmony_ci raid10_md_layout_to_format(mddev->new_layout), 23008c2ecf20Sopenharmony_ci raid10_md_layout_to_copies(mddev->new_layout)); 23018c2ecf20Sopenharmony_ci } 23028c2ecf20Sopenharmony_ci return -EINVAL; 23038c2ecf20Sopenharmony_ci } 23048c2ecf20Sopenharmony_ci 23058c2ecf20Sopenharmony_ci DMINFO("Discovered old metadata format; upgrading to extended metadata format"); 23068c2ecf20Sopenharmony_ci } 23078c2ecf20Sopenharmony_ci 23088c2ecf20Sopenharmony_ci if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) 23098c2ecf20Sopenharmony_ci mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset); 23108c2ecf20Sopenharmony_ci 23118c2ecf20Sopenharmony_ci /* 23128c2ecf20Sopenharmony_ci * During load, we set FirstUse if a new superblock was written. 23138c2ecf20Sopenharmony_ci * There are two reasons we might not have a superblock: 23148c2ecf20Sopenharmony_ci * 1) The raid set is brand new - in which case, all of the 23158c2ecf20Sopenharmony_ci * devices must have their In_sync bit set. Also, 23168c2ecf20Sopenharmony_ci * recovery_cp must be 0, unless forced. 23178c2ecf20Sopenharmony_ci * 2) This is a new device being added to an old raid set 23188c2ecf20Sopenharmony_ci * and the new device needs to be rebuilt - in which 23198c2ecf20Sopenharmony_ci * case the In_sync bit will /not/ be set and 23208c2ecf20Sopenharmony_ci * recovery_cp must be MaxSector. 23218c2ecf20Sopenharmony_ci * 3) This is/are a new device(s) being added to an old 23228c2ecf20Sopenharmony_ci * raid set during takeover to a higher raid level 23238c2ecf20Sopenharmony_ci * to provide capacity for redundancy or during reshape 23248c2ecf20Sopenharmony_ci * to add capacity to grow the raid set. 23258c2ecf20Sopenharmony_ci */ 23268c2ecf20Sopenharmony_ci d = 0; 23278c2ecf20Sopenharmony_ci rdev_for_each(r, mddev) { 23288c2ecf20Sopenharmony_ci if (test_bit(Journal, &rdev->flags)) 23298c2ecf20Sopenharmony_ci continue; 23308c2ecf20Sopenharmony_ci 23318c2ecf20Sopenharmony_ci if (test_bit(FirstUse, &r->flags)) 23328c2ecf20Sopenharmony_ci new_devs++; 23338c2ecf20Sopenharmony_ci 23348c2ecf20Sopenharmony_ci if (!test_bit(In_sync, &r->flags)) { 23358c2ecf20Sopenharmony_ci DMINFO("Device %d specified for rebuild; clearing superblock", 23368c2ecf20Sopenharmony_ci r->raid_disk); 23378c2ecf20Sopenharmony_ci rebuilds++; 23388c2ecf20Sopenharmony_ci 23398c2ecf20Sopenharmony_ci if (test_bit(FirstUse, &r->flags)) 23408c2ecf20Sopenharmony_ci rebuild_and_new++; 23418c2ecf20Sopenharmony_ci } 23428c2ecf20Sopenharmony_ci 23438c2ecf20Sopenharmony_ci d++; 23448c2ecf20Sopenharmony_ci } 23458c2ecf20Sopenharmony_ci 23468c2ecf20Sopenharmony_ci if (new_devs == rs->raid_disks || !rebuilds) { 23478c2ecf20Sopenharmony_ci /* Replace a broken device */ 23488c2ecf20Sopenharmony_ci if (new_devs == rs->raid_disks) { 23498c2ecf20Sopenharmony_ci DMINFO("Superblocks created for new raid set"); 23508c2ecf20Sopenharmony_ci set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); 23518c2ecf20Sopenharmony_ci } else if (new_devs != rebuilds && 23528c2ecf20Sopenharmony_ci new_devs != rs->delta_disks) { 23538c2ecf20Sopenharmony_ci DMERR("New device injected into existing raid set without " 23548c2ecf20Sopenharmony_ci "'delta_disks' or 'rebuild' parameter specified"); 23558c2ecf20Sopenharmony_ci return -EINVAL; 23568c2ecf20Sopenharmony_ci } 23578c2ecf20Sopenharmony_ci } else if (new_devs && new_devs != rebuilds) { 23588c2ecf20Sopenharmony_ci DMERR("%u 'rebuild' devices cannot be injected into" 23598c2ecf20Sopenharmony_ci " a raid set with %u other first-time devices", 23608c2ecf20Sopenharmony_ci rebuilds, new_devs); 23618c2ecf20Sopenharmony_ci return -EINVAL; 23628c2ecf20Sopenharmony_ci } else if (rebuilds) { 23638c2ecf20Sopenharmony_ci if (rebuild_and_new && rebuilds != rebuild_and_new) { 23648c2ecf20Sopenharmony_ci DMERR("new device%s provided without 'rebuild'", 23658c2ecf20Sopenharmony_ci new_devs > 1 ? "s" : ""); 23668c2ecf20Sopenharmony_ci return -EINVAL; 23678c2ecf20Sopenharmony_ci } else if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) && rs_is_recovering(rs)) { 23688c2ecf20Sopenharmony_ci DMERR("'rebuild' specified while raid set is not in-sync (recovery_cp=%llu)", 23698c2ecf20Sopenharmony_ci (unsigned long long) mddev->recovery_cp); 23708c2ecf20Sopenharmony_ci return -EINVAL; 23718c2ecf20Sopenharmony_ci } else if (rs_is_reshaping(rs)) { 23728c2ecf20Sopenharmony_ci DMERR("'rebuild' specified while raid set is being reshaped (reshape_position=%llu)", 23738c2ecf20Sopenharmony_ci (unsigned long long) mddev->reshape_position); 23748c2ecf20Sopenharmony_ci return -EINVAL; 23758c2ecf20Sopenharmony_ci } 23768c2ecf20Sopenharmony_ci } 23778c2ecf20Sopenharmony_ci 23788c2ecf20Sopenharmony_ci /* 23798c2ecf20Sopenharmony_ci * Now we set the Faulty bit for those devices that are 23808c2ecf20Sopenharmony_ci * recorded in the superblock as failed. 23818c2ecf20Sopenharmony_ci */ 23828c2ecf20Sopenharmony_ci sb_retrieve_failed_devices(sb, failed_devices); 23838c2ecf20Sopenharmony_ci rdev_for_each(r, mddev) { 23848c2ecf20Sopenharmony_ci if (test_bit(Journal, &rdev->flags) || 23858c2ecf20Sopenharmony_ci !r->sb_page) 23868c2ecf20Sopenharmony_ci continue; 23878c2ecf20Sopenharmony_ci sb2 = page_address(r->sb_page); 23888c2ecf20Sopenharmony_ci sb2->failed_devices = 0; 23898c2ecf20Sopenharmony_ci memset(sb2->extended_failed_devices, 0, sizeof(sb2->extended_failed_devices)); 23908c2ecf20Sopenharmony_ci 23918c2ecf20Sopenharmony_ci /* 23928c2ecf20Sopenharmony_ci * Check for any device re-ordering. 23938c2ecf20Sopenharmony_ci */ 23948c2ecf20Sopenharmony_ci if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) { 23958c2ecf20Sopenharmony_ci role = le32_to_cpu(sb2->array_position); 23968c2ecf20Sopenharmony_ci if (role < 0) 23978c2ecf20Sopenharmony_ci continue; 23988c2ecf20Sopenharmony_ci 23998c2ecf20Sopenharmony_ci if (role != r->raid_disk) { 24008c2ecf20Sopenharmony_ci if (rs_is_raid10(rs) && __is_raid10_near(mddev->layout)) { 24018c2ecf20Sopenharmony_ci if (mddev->raid_disks % __raid10_near_copies(mddev->layout) || 24028c2ecf20Sopenharmony_ci rs->raid_disks % rs->raid10_copies) { 24038c2ecf20Sopenharmony_ci rs->ti->error = 24048c2ecf20Sopenharmony_ci "Cannot change raid10 near set to odd # of devices!"; 24058c2ecf20Sopenharmony_ci return -EINVAL; 24068c2ecf20Sopenharmony_ci } 24078c2ecf20Sopenharmony_ci 24088c2ecf20Sopenharmony_ci sb2->array_position = cpu_to_le32(r->raid_disk); 24098c2ecf20Sopenharmony_ci 24108c2ecf20Sopenharmony_ci } else if (!(rs_is_raid10(rs) && rt_is_raid0(rs->raid_type)) && 24118c2ecf20Sopenharmony_ci !(rs_is_raid0(rs) && rt_is_raid10(rs->raid_type)) && 24128c2ecf20Sopenharmony_ci !rt_is_raid1(rs->raid_type)) { 24138c2ecf20Sopenharmony_ci rs->ti->error = "Cannot change device positions in raid set"; 24148c2ecf20Sopenharmony_ci return -EINVAL; 24158c2ecf20Sopenharmony_ci } 24168c2ecf20Sopenharmony_ci 24178c2ecf20Sopenharmony_ci DMINFO("raid device #%d now at position #%d", role, r->raid_disk); 24188c2ecf20Sopenharmony_ci } 24198c2ecf20Sopenharmony_ci 24208c2ecf20Sopenharmony_ci /* 24218c2ecf20Sopenharmony_ci * Partial recovery is performed on 24228c2ecf20Sopenharmony_ci * returning failed devices. 24238c2ecf20Sopenharmony_ci */ 24248c2ecf20Sopenharmony_ci if (test_bit(role, (void *) failed_devices)) 24258c2ecf20Sopenharmony_ci set_bit(Faulty, &r->flags); 24268c2ecf20Sopenharmony_ci } 24278c2ecf20Sopenharmony_ci } 24288c2ecf20Sopenharmony_ci 24298c2ecf20Sopenharmony_ci return 0; 24308c2ecf20Sopenharmony_ci} 24318c2ecf20Sopenharmony_ci 24328c2ecf20Sopenharmony_cistatic int super_validate(struct raid_set *rs, struct md_rdev *rdev) 24338c2ecf20Sopenharmony_ci{ 24348c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 24358c2ecf20Sopenharmony_ci struct dm_raid_superblock *sb; 24368c2ecf20Sopenharmony_ci 24378c2ecf20Sopenharmony_ci if (rs_is_raid0(rs) || !rdev->sb_page || rdev->raid_disk < 0) 24388c2ecf20Sopenharmony_ci return 0; 24398c2ecf20Sopenharmony_ci 24408c2ecf20Sopenharmony_ci sb = page_address(rdev->sb_page); 24418c2ecf20Sopenharmony_ci 24428c2ecf20Sopenharmony_ci /* 24438c2ecf20Sopenharmony_ci * If mddev->events is not set, we know we have not yet initialized 24448c2ecf20Sopenharmony_ci * the array. 24458c2ecf20Sopenharmony_ci */ 24468c2ecf20Sopenharmony_ci if (!mddev->events && super_init_validation(rs, rdev)) 24478c2ecf20Sopenharmony_ci return -EINVAL; 24488c2ecf20Sopenharmony_ci 24498c2ecf20Sopenharmony_ci if (le32_to_cpu(sb->compat_features) && 24508c2ecf20Sopenharmony_ci le32_to_cpu(sb->compat_features) != FEATURE_FLAG_SUPPORTS_V190) { 24518c2ecf20Sopenharmony_ci rs->ti->error = "Unable to assemble array: Unknown flag(s) in compatible feature flags"; 24528c2ecf20Sopenharmony_ci return -EINVAL; 24538c2ecf20Sopenharmony_ci } 24548c2ecf20Sopenharmony_ci 24558c2ecf20Sopenharmony_ci if (sb->incompat_features) { 24568c2ecf20Sopenharmony_ci rs->ti->error = "Unable to assemble array: No incompatible feature flags supported yet"; 24578c2ecf20Sopenharmony_ci return -EINVAL; 24588c2ecf20Sopenharmony_ci } 24598c2ecf20Sopenharmony_ci 24608c2ecf20Sopenharmony_ci /* Enable bitmap creation on @rs unless no metadevs or raid0 or journaled raid4/5/6 set. */ 24618c2ecf20Sopenharmony_ci mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096); 24628c2ecf20Sopenharmony_ci mddev->bitmap_info.default_offset = mddev->bitmap_info.offset; 24638c2ecf20Sopenharmony_ci 24648c2ecf20Sopenharmony_ci if (!test_and_clear_bit(FirstUse, &rdev->flags)) { 24658c2ecf20Sopenharmony_ci /* 24668c2ecf20Sopenharmony_ci * Retrieve rdev size stored in superblock to be prepared for shrink. 24678c2ecf20Sopenharmony_ci * Check extended superblock members are present otherwise the size 24688c2ecf20Sopenharmony_ci * will not be set! 24698c2ecf20Sopenharmony_ci */ 24708c2ecf20Sopenharmony_ci if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) 24718c2ecf20Sopenharmony_ci rdev->sectors = le64_to_cpu(sb->sectors); 24728c2ecf20Sopenharmony_ci 24738c2ecf20Sopenharmony_ci rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset); 24748c2ecf20Sopenharmony_ci if (rdev->recovery_offset == MaxSector) 24758c2ecf20Sopenharmony_ci set_bit(In_sync, &rdev->flags); 24768c2ecf20Sopenharmony_ci /* 24778c2ecf20Sopenharmony_ci * If no reshape in progress -> we're recovering single 24788c2ecf20Sopenharmony_ci * disk(s) and have to set the device(s) to out-of-sync 24798c2ecf20Sopenharmony_ci */ 24808c2ecf20Sopenharmony_ci else if (!rs_is_reshaping(rs)) 24818c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); /* Mandatory for recovery */ 24828c2ecf20Sopenharmony_ci } 24838c2ecf20Sopenharmony_ci 24848c2ecf20Sopenharmony_ci /* 24858c2ecf20Sopenharmony_ci * If a device comes back, set it as not In_sync and no longer faulty. 24868c2ecf20Sopenharmony_ci */ 24878c2ecf20Sopenharmony_ci if (test_and_clear_bit(Faulty, &rdev->flags)) { 24888c2ecf20Sopenharmony_ci rdev->recovery_offset = 0; 24898c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 24908c2ecf20Sopenharmony_ci rdev->saved_raid_disk = rdev->raid_disk; 24918c2ecf20Sopenharmony_ci } 24928c2ecf20Sopenharmony_ci 24938c2ecf20Sopenharmony_ci /* Reshape support -> restore repective data offsets */ 24948c2ecf20Sopenharmony_ci rdev->data_offset = le64_to_cpu(sb->data_offset); 24958c2ecf20Sopenharmony_ci rdev->new_data_offset = le64_to_cpu(sb->new_data_offset); 24968c2ecf20Sopenharmony_ci 24978c2ecf20Sopenharmony_ci return 0; 24988c2ecf20Sopenharmony_ci} 24998c2ecf20Sopenharmony_ci 25008c2ecf20Sopenharmony_ci/* 25018c2ecf20Sopenharmony_ci * Analyse superblocks and select the freshest. 25028c2ecf20Sopenharmony_ci */ 25038c2ecf20Sopenharmony_cistatic int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) 25048c2ecf20Sopenharmony_ci{ 25058c2ecf20Sopenharmony_ci int r; 25068c2ecf20Sopenharmony_ci struct md_rdev *rdev, *freshest; 25078c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 25088c2ecf20Sopenharmony_ci 25098c2ecf20Sopenharmony_ci freshest = NULL; 25108c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) { 25118c2ecf20Sopenharmony_ci if (test_bit(Journal, &rdev->flags)) 25128c2ecf20Sopenharmony_ci continue; 25138c2ecf20Sopenharmony_ci 25148c2ecf20Sopenharmony_ci if (!rdev->meta_bdev) 25158c2ecf20Sopenharmony_ci continue; 25168c2ecf20Sopenharmony_ci 25178c2ecf20Sopenharmony_ci /* Set superblock offset/size for metadata device. */ 25188c2ecf20Sopenharmony_ci rdev->sb_start = 0; 25198c2ecf20Sopenharmony_ci rdev->sb_size = bdev_logical_block_size(rdev->meta_bdev); 25208c2ecf20Sopenharmony_ci if (rdev->sb_size < sizeof(struct dm_raid_superblock) || rdev->sb_size > PAGE_SIZE) { 25218c2ecf20Sopenharmony_ci DMERR("superblock size of a logical block is no longer valid"); 25228c2ecf20Sopenharmony_ci return -EINVAL; 25238c2ecf20Sopenharmony_ci } 25248c2ecf20Sopenharmony_ci 25258c2ecf20Sopenharmony_ci /* 25268c2ecf20Sopenharmony_ci * Skipping super_load due to CTR_FLAG_SYNC will cause 25278c2ecf20Sopenharmony_ci * the array to undergo initialization again as 25288c2ecf20Sopenharmony_ci * though it were new. This is the intended effect 25298c2ecf20Sopenharmony_ci * of the "sync" directive. 25308c2ecf20Sopenharmony_ci * 25318c2ecf20Sopenharmony_ci * With reshaping capability added, we must ensure that 25328c2ecf20Sopenharmony_ci * that the "sync" directive is disallowed during the reshape. 25338c2ecf20Sopenharmony_ci */ 25348c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) 25358c2ecf20Sopenharmony_ci continue; 25368c2ecf20Sopenharmony_ci 25378c2ecf20Sopenharmony_ci r = super_load(rdev, freshest); 25388c2ecf20Sopenharmony_ci 25398c2ecf20Sopenharmony_ci switch (r) { 25408c2ecf20Sopenharmony_ci case 1: 25418c2ecf20Sopenharmony_ci freshest = rdev; 25428c2ecf20Sopenharmony_ci break; 25438c2ecf20Sopenharmony_ci case 0: 25448c2ecf20Sopenharmony_ci break; 25458c2ecf20Sopenharmony_ci default: 25468c2ecf20Sopenharmony_ci /* This is a failure to read the superblock from the metadata device. */ 25478c2ecf20Sopenharmony_ci /* 25488c2ecf20Sopenharmony_ci * We have to keep any raid0 data/metadata device pairs or 25498c2ecf20Sopenharmony_ci * the MD raid0 personality will fail to start the array. 25508c2ecf20Sopenharmony_ci */ 25518c2ecf20Sopenharmony_ci if (rs_is_raid0(rs)) 25528c2ecf20Sopenharmony_ci continue; 25538c2ecf20Sopenharmony_ci 25548c2ecf20Sopenharmony_ci /* 25558c2ecf20Sopenharmony_ci * We keep the dm_devs to be able to emit the device tuple 25568c2ecf20Sopenharmony_ci * properly on the table line in raid_status() (rather than 25578c2ecf20Sopenharmony_ci * mistakenly acting as if '- -' got passed into the constructor). 25588c2ecf20Sopenharmony_ci * 25598c2ecf20Sopenharmony_ci * The rdev has to stay on the same_set list to allow for 25608c2ecf20Sopenharmony_ci * the attempt to restore faulty devices on second resume. 25618c2ecf20Sopenharmony_ci */ 25628c2ecf20Sopenharmony_ci rdev->raid_disk = rdev->saved_raid_disk = -1; 25638c2ecf20Sopenharmony_ci break; 25648c2ecf20Sopenharmony_ci } 25658c2ecf20Sopenharmony_ci } 25668c2ecf20Sopenharmony_ci 25678c2ecf20Sopenharmony_ci if (!freshest) 25688c2ecf20Sopenharmony_ci return 0; 25698c2ecf20Sopenharmony_ci 25708c2ecf20Sopenharmony_ci /* 25718c2ecf20Sopenharmony_ci * Validation of the freshest device provides the source of 25728c2ecf20Sopenharmony_ci * validation for the remaining devices. 25738c2ecf20Sopenharmony_ci */ 25748c2ecf20Sopenharmony_ci rs->ti->error = "Unable to assemble array: Invalid superblocks"; 25758c2ecf20Sopenharmony_ci if (super_validate(rs, freshest)) 25768c2ecf20Sopenharmony_ci return -EINVAL; 25778c2ecf20Sopenharmony_ci 25788c2ecf20Sopenharmony_ci if (validate_raid_redundancy(rs)) { 25798c2ecf20Sopenharmony_ci rs->ti->error = "Insufficient redundancy to activate array"; 25808c2ecf20Sopenharmony_ci return -EINVAL; 25818c2ecf20Sopenharmony_ci } 25828c2ecf20Sopenharmony_ci 25838c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) 25848c2ecf20Sopenharmony_ci if (!test_bit(Journal, &rdev->flags) && 25858c2ecf20Sopenharmony_ci rdev != freshest && 25868c2ecf20Sopenharmony_ci super_validate(rs, rdev)) 25878c2ecf20Sopenharmony_ci return -EINVAL; 25888c2ecf20Sopenharmony_ci return 0; 25898c2ecf20Sopenharmony_ci} 25908c2ecf20Sopenharmony_ci 25918c2ecf20Sopenharmony_ci/* 25928c2ecf20Sopenharmony_ci * Adjust data_offset and new_data_offset on all disk members of @rs 25938c2ecf20Sopenharmony_ci * for out of place reshaping if requested by contructor 25948c2ecf20Sopenharmony_ci * 25958c2ecf20Sopenharmony_ci * We need free space at the beginning of each raid disk for forward 25968c2ecf20Sopenharmony_ci * and at the end for backward reshapes which userspace has to provide 25978c2ecf20Sopenharmony_ci * via remapping/reordering of space. 25988c2ecf20Sopenharmony_ci */ 25998c2ecf20Sopenharmony_cistatic int rs_adjust_data_offsets(struct raid_set *rs) 26008c2ecf20Sopenharmony_ci{ 26018c2ecf20Sopenharmony_ci sector_t data_offset = 0, new_data_offset = 0; 26028c2ecf20Sopenharmony_ci struct md_rdev *rdev; 26038c2ecf20Sopenharmony_ci 26048c2ecf20Sopenharmony_ci /* Constructor did not request data offset change */ 26058c2ecf20Sopenharmony_ci if (!test_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) { 26068c2ecf20Sopenharmony_ci if (!rs_is_reshapable(rs)) 26078c2ecf20Sopenharmony_ci goto out; 26088c2ecf20Sopenharmony_ci 26098c2ecf20Sopenharmony_ci return 0; 26108c2ecf20Sopenharmony_ci } 26118c2ecf20Sopenharmony_ci 26128c2ecf20Sopenharmony_ci /* HM FIXME: get In_Sync raid_dev? */ 26138c2ecf20Sopenharmony_ci rdev = &rs->dev[0].rdev; 26148c2ecf20Sopenharmony_ci 26158c2ecf20Sopenharmony_ci if (rs->delta_disks < 0) { 26168c2ecf20Sopenharmony_ci /* 26178c2ecf20Sopenharmony_ci * Removing disks (reshaping backwards): 26188c2ecf20Sopenharmony_ci * 26198c2ecf20Sopenharmony_ci * - before reshape: data is at offset 0 and free space 26208c2ecf20Sopenharmony_ci * is at end of each component LV 26218c2ecf20Sopenharmony_ci * 26228c2ecf20Sopenharmony_ci * - after reshape: data is at offset rs->data_offset != 0 on each component LV 26238c2ecf20Sopenharmony_ci */ 26248c2ecf20Sopenharmony_ci data_offset = 0; 26258c2ecf20Sopenharmony_ci new_data_offset = rs->data_offset; 26268c2ecf20Sopenharmony_ci 26278c2ecf20Sopenharmony_ci } else if (rs->delta_disks > 0) { 26288c2ecf20Sopenharmony_ci /* 26298c2ecf20Sopenharmony_ci * Adding disks (reshaping forwards): 26308c2ecf20Sopenharmony_ci * 26318c2ecf20Sopenharmony_ci * - before reshape: data is at offset rs->data_offset != 0 and 26328c2ecf20Sopenharmony_ci * free space is at begin of each component LV 26338c2ecf20Sopenharmony_ci * 26348c2ecf20Sopenharmony_ci * - after reshape: data is at offset 0 on each component LV 26358c2ecf20Sopenharmony_ci */ 26368c2ecf20Sopenharmony_ci data_offset = rs->data_offset; 26378c2ecf20Sopenharmony_ci new_data_offset = 0; 26388c2ecf20Sopenharmony_ci 26398c2ecf20Sopenharmony_ci } else { 26408c2ecf20Sopenharmony_ci /* 26418c2ecf20Sopenharmony_ci * User space passes in 0 for data offset after having removed reshape space 26428c2ecf20Sopenharmony_ci * 26438c2ecf20Sopenharmony_ci * - or - (data offset != 0) 26448c2ecf20Sopenharmony_ci * 26458c2ecf20Sopenharmony_ci * Changing RAID layout or chunk size -> toggle offsets 26468c2ecf20Sopenharmony_ci * 26478c2ecf20Sopenharmony_ci * - before reshape: data is at offset rs->data_offset 0 and 26488c2ecf20Sopenharmony_ci * free space is at end of each component LV 26498c2ecf20Sopenharmony_ci * -or- 26508c2ecf20Sopenharmony_ci * data is at offset rs->data_offset != 0 and 26518c2ecf20Sopenharmony_ci * free space is at begin of each component LV 26528c2ecf20Sopenharmony_ci * 26538c2ecf20Sopenharmony_ci * - after reshape: data is at offset 0 if it was at offset != 0 26548c2ecf20Sopenharmony_ci * or at offset != 0 if it was at offset 0 26558c2ecf20Sopenharmony_ci * on each component LV 26568c2ecf20Sopenharmony_ci * 26578c2ecf20Sopenharmony_ci */ 26588c2ecf20Sopenharmony_ci data_offset = rs->data_offset ? rdev->data_offset : 0; 26598c2ecf20Sopenharmony_ci new_data_offset = data_offset ? 0 : rs->data_offset; 26608c2ecf20Sopenharmony_ci set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 26618c2ecf20Sopenharmony_ci } 26628c2ecf20Sopenharmony_ci 26638c2ecf20Sopenharmony_ci /* 26648c2ecf20Sopenharmony_ci * Make sure we got a minimum amount of free sectors per device 26658c2ecf20Sopenharmony_ci */ 26668c2ecf20Sopenharmony_ci if (rs->data_offset && 26678c2ecf20Sopenharmony_ci to_sector(i_size_read(rdev->bdev->bd_inode)) - rs->md.dev_sectors < MIN_FREE_RESHAPE_SPACE) { 26688c2ecf20Sopenharmony_ci rs->ti->error = data_offset ? "No space for forward reshape" : 26698c2ecf20Sopenharmony_ci "No space for backward reshape"; 26708c2ecf20Sopenharmony_ci return -ENOSPC; 26718c2ecf20Sopenharmony_ci } 26728c2ecf20Sopenharmony_ciout: 26738c2ecf20Sopenharmony_ci /* 26748c2ecf20Sopenharmony_ci * Raise recovery_cp in case data_offset != 0 to 26758c2ecf20Sopenharmony_ci * avoid false recovery positives in the constructor. 26768c2ecf20Sopenharmony_ci */ 26778c2ecf20Sopenharmony_ci if (rs->md.recovery_cp < rs->md.dev_sectors) 26788c2ecf20Sopenharmony_ci rs->md.recovery_cp += rs->dev[0].rdev.data_offset; 26798c2ecf20Sopenharmony_ci 26808c2ecf20Sopenharmony_ci /* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */ 26818c2ecf20Sopenharmony_ci rdev_for_each(rdev, &rs->md) { 26828c2ecf20Sopenharmony_ci if (!test_bit(Journal, &rdev->flags)) { 26838c2ecf20Sopenharmony_ci rdev->data_offset = data_offset; 26848c2ecf20Sopenharmony_ci rdev->new_data_offset = new_data_offset; 26858c2ecf20Sopenharmony_ci } 26868c2ecf20Sopenharmony_ci } 26878c2ecf20Sopenharmony_ci 26888c2ecf20Sopenharmony_ci return 0; 26898c2ecf20Sopenharmony_ci} 26908c2ecf20Sopenharmony_ci 26918c2ecf20Sopenharmony_ci/* Userpace reordered disks -> adjust raid_disk indexes in @rs */ 26928c2ecf20Sopenharmony_cistatic void __reorder_raid_disk_indexes(struct raid_set *rs) 26938c2ecf20Sopenharmony_ci{ 26948c2ecf20Sopenharmony_ci int i = 0; 26958c2ecf20Sopenharmony_ci struct md_rdev *rdev; 26968c2ecf20Sopenharmony_ci 26978c2ecf20Sopenharmony_ci rdev_for_each(rdev, &rs->md) { 26988c2ecf20Sopenharmony_ci if (!test_bit(Journal, &rdev->flags)) { 26998c2ecf20Sopenharmony_ci rdev->raid_disk = i++; 27008c2ecf20Sopenharmony_ci rdev->saved_raid_disk = rdev->new_raid_disk = -1; 27018c2ecf20Sopenharmony_ci } 27028c2ecf20Sopenharmony_ci } 27038c2ecf20Sopenharmony_ci} 27048c2ecf20Sopenharmony_ci 27058c2ecf20Sopenharmony_ci/* 27068c2ecf20Sopenharmony_ci * Setup @rs for takeover by a different raid level 27078c2ecf20Sopenharmony_ci */ 27088c2ecf20Sopenharmony_cistatic int rs_setup_takeover(struct raid_set *rs) 27098c2ecf20Sopenharmony_ci{ 27108c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 27118c2ecf20Sopenharmony_ci struct md_rdev *rdev; 27128c2ecf20Sopenharmony_ci unsigned int d = mddev->raid_disks = rs->raid_disks; 27138c2ecf20Sopenharmony_ci sector_t new_data_offset = rs->dev[0].rdev.data_offset ? 0 : rs->data_offset; 27148c2ecf20Sopenharmony_ci 27158c2ecf20Sopenharmony_ci if (rt_is_raid10(rs->raid_type)) { 27168c2ecf20Sopenharmony_ci if (rs_is_raid0(rs)) { 27178c2ecf20Sopenharmony_ci /* Userpace reordered disks -> adjust raid_disk indexes */ 27188c2ecf20Sopenharmony_ci __reorder_raid_disk_indexes(rs); 27198c2ecf20Sopenharmony_ci 27208c2ecf20Sopenharmony_ci /* raid0 -> raid10_far layout */ 27218c2ecf20Sopenharmony_ci mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_FAR, 27228c2ecf20Sopenharmony_ci rs->raid10_copies); 27238c2ecf20Sopenharmony_ci } else if (rs_is_raid1(rs)) 27248c2ecf20Sopenharmony_ci /* raid1 -> raid10_near layout */ 27258c2ecf20Sopenharmony_ci mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_NEAR, 27268c2ecf20Sopenharmony_ci rs->raid_disks); 27278c2ecf20Sopenharmony_ci else 27288c2ecf20Sopenharmony_ci return -EINVAL; 27298c2ecf20Sopenharmony_ci 27308c2ecf20Sopenharmony_ci } 27318c2ecf20Sopenharmony_ci 27328c2ecf20Sopenharmony_ci clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags); 27338c2ecf20Sopenharmony_ci mddev->recovery_cp = MaxSector; 27348c2ecf20Sopenharmony_ci 27358c2ecf20Sopenharmony_ci while (d--) { 27368c2ecf20Sopenharmony_ci rdev = &rs->dev[d].rdev; 27378c2ecf20Sopenharmony_ci 27388c2ecf20Sopenharmony_ci if (test_bit(d, (void *) rs->rebuild_disks)) { 27398c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 27408c2ecf20Sopenharmony_ci clear_bit(Faulty, &rdev->flags); 27418c2ecf20Sopenharmony_ci mddev->recovery_cp = rdev->recovery_offset = 0; 27428c2ecf20Sopenharmony_ci /* Bitmap has to be created when we do an "up" takeover */ 27438c2ecf20Sopenharmony_ci set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); 27448c2ecf20Sopenharmony_ci } 27458c2ecf20Sopenharmony_ci 27468c2ecf20Sopenharmony_ci rdev->new_data_offset = new_data_offset; 27478c2ecf20Sopenharmony_ci } 27488c2ecf20Sopenharmony_ci 27498c2ecf20Sopenharmony_ci return 0; 27508c2ecf20Sopenharmony_ci} 27518c2ecf20Sopenharmony_ci 27528c2ecf20Sopenharmony_ci/* Prepare @rs for reshape */ 27538c2ecf20Sopenharmony_cistatic int rs_prepare_reshape(struct raid_set *rs) 27548c2ecf20Sopenharmony_ci{ 27558c2ecf20Sopenharmony_ci bool reshape; 27568c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 27578c2ecf20Sopenharmony_ci 27588c2ecf20Sopenharmony_ci if (rs_is_raid10(rs)) { 27598c2ecf20Sopenharmony_ci if (rs->raid_disks != mddev->raid_disks && 27608c2ecf20Sopenharmony_ci __is_raid10_near(mddev->layout) && 27618c2ecf20Sopenharmony_ci rs->raid10_copies && 27628c2ecf20Sopenharmony_ci rs->raid10_copies != __raid10_near_copies(mddev->layout)) { 27638c2ecf20Sopenharmony_ci /* 27648c2ecf20Sopenharmony_ci * raid disk have to be multiple of data copies to allow this conversion, 27658c2ecf20Sopenharmony_ci * 27668c2ecf20Sopenharmony_ci * This is actually not a reshape it is a 27678c2ecf20Sopenharmony_ci * rebuild of any additional mirrors per group 27688c2ecf20Sopenharmony_ci */ 27698c2ecf20Sopenharmony_ci if (rs->raid_disks % rs->raid10_copies) { 27708c2ecf20Sopenharmony_ci rs->ti->error = "Can't reshape raid10 mirror groups"; 27718c2ecf20Sopenharmony_ci return -EINVAL; 27728c2ecf20Sopenharmony_ci } 27738c2ecf20Sopenharmony_ci 27748c2ecf20Sopenharmony_ci /* Userpace reordered disks to add/remove mirrors -> adjust raid_disk indexes */ 27758c2ecf20Sopenharmony_ci __reorder_raid_disk_indexes(rs); 27768c2ecf20Sopenharmony_ci mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_NEAR, 27778c2ecf20Sopenharmony_ci rs->raid10_copies); 27788c2ecf20Sopenharmony_ci mddev->new_layout = mddev->layout; 27798c2ecf20Sopenharmony_ci reshape = false; 27808c2ecf20Sopenharmony_ci } else 27818c2ecf20Sopenharmony_ci reshape = true; 27828c2ecf20Sopenharmony_ci 27838c2ecf20Sopenharmony_ci } else if (rs_is_raid456(rs)) 27848c2ecf20Sopenharmony_ci reshape = true; 27858c2ecf20Sopenharmony_ci 27868c2ecf20Sopenharmony_ci else if (rs_is_raid1(rs)) { 27878c2ecf20Sopenharmony_ci if (rs->delta_disks) { 27888c2ecf20Sopenharmony_ci /* Process raid1 via delta_disks */ 27898c2ecf20Sopenharmony_ci mddev->degraded = rs->delta_disks < 0 ? -rs->delta_disks : rs->delta_disks; 27908c2ecf20Sopenharmony_ci reshape = true; 27918c2ecf20Sopenharmony_ci } else { 27928c2ecf20Sopenharmony_ci /* Process raid1 without delta_disks */ 27938c2ecf20Sopenharmony_ci mddev->raid_disks = rs->raid_disks; 27948c2ecf20Sopenharmony_ci reshape = false; 27958c2ecf20Sopenharmony_ci } 27968c2ecf20Sopenharmony_ci } else { 27978c2ecf20Sopenharmony_ci rs->ti->error = "Called with bogus raid type"; 27988c2ecf20Sopenharmony_ci return -EINVAL; 27998c2ecf20Sopenharmony_ci } 28008c2ecf20Sopenharmony_ci 28018c2ecf20Sopenharmony_ci if (reshape) { 28028c2ecf20Sopenharmony_ci set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags); 28038c2ecf20Sopenharmony_ci set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 28048c2ecf20Sopenharmony_ci } else if (mddev->raid_disks < rs->raid_disks) 28058c2ecf20Sopenharmony_ci /* Create new superblocks and bitmaps, if any new disks */ 28068c2ecf20Sopenharmony_ci set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 28078c2ecf20Sopenharmony_ci 28088c2ecf20Sopenharmony_ci return 0; 28098c2ecf20Sopenharmony_ci} 28108c2ecf20Sopenharmony_ci 28118c2ecf20Sopenharmony_ci/* Get reshape sectors from data_offsets or raid set */ 28128c2ecf20Sopenharmony_cistatic sector_t _get_reshape_sectors(struct raid_set *rs) 28138c2ecf20Sopenharmony_ci{ 28148c2ecf20Sopenharmony_ci struct md_rdev *rdev; 28158c2ecf20Sopenharmony_ci sector_t reshape_sectors = 0; 28168c2ecf20Sopenharmony_ci 28178c2ecf20Sopenharmony_ci rdev_for_each(rdev, &rs->md) 28188c2ecf20Sopenharmony_ci if (!test_bit(Journal, &rdev->flags)) { 28198c2ecf20Sopenharmony_ci reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ? 28208c2ecf20Sopenharmony_ci rdev->data_offset - rdev->new_data_offset : 28218c2ecf20Sopenharmony_ci rdev->new_data_offset - rdev->data_offset; 28228c2ecf20Sopenharmony_ci break; 28238c2ecf20Sopenharmony_ci } 28248c2ecf20Sopenharmony_ci 28258c2ecf20Sopenharmony_ci return max(reshape_sectors, (sector_t) rs->data_offset); 28268c2ecf20Sopenharmony_ci} 28278c2ecf20Sopenharmony_ci 28288c2ecf20Sopenharmony_ci/* 28298c2ecf20Sopenharmony_ci * Reshape: 28308c2ecf20Sopenharmony_ci * - change raid layout 28318c2ecf20Sopenharmony_ci * - change chunk size 28328c2ecf20Sopenharmony_ci * - add disks 28338c2ecf20Sopenharmony_ci * - remove disks 28348c2ecf20Sopenharmony_ci */ 28358c2ecf20Sopenharmony_cistatic int rs_setup_reshape(struct raid_set *rs) 28368c2ecf20Sopenharmony_ci{ 28378c2ecf20Sopenharmony_ci int r = 0; 28388c2ecf20Sopenharmony_ci unsigned int cur_raid_devs, d; 28398c2ecf20Sopenharmony_ci sector_t reshape_sectors = _get_reshape_sectors(rs); 28408c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 28418c2ecf20Sopenharmony_ci struct md_rdev *rdev; 28428c2ecf20Sopenharmony_ci 28438c2ecf20Sopenharmony_ci mddev->delta_disks = rs->delta_disks; 28448c2ecf20Sopenharmony_ci cur_raid_devs = mddev->raid_disks; 28458c2ecf20Sopenharmony_ci 28468c2ecf20Sopenharmony_ci /* Ignore impossible layout change whilst adding/removing disks */ 28478c2ecf20Sopenharmony_ci if (mddev->delta_disks && 28488c2ecf20Sopenharmony_ci mddev->layout != mddev->new_layout) { 28498c2ecf20Sopenharmony_ci DMINFO("Ignoring invalid layout change with delta_disks=%d", rs->delta_disks); 28508c2ecf20Sopenharmony_ci mddev->new_layout = mddev->layout; 28518c2ecf20Sopenharmony_ci } 28528c2ecf20Sopenharmony_ci 28538c2ecf20Sopenharmony_ci /* 28548c2ecf20Sopenharmony_ci * Adjust array size: 28558c2ecf20Sopenharmony_ci * 28568c2ecf20Sopenharmony_ci * - in case of adding disk(s), array size has 28578c2ecf20Sopenharmony_ci * to grow after the disk adding reshape, 28588c2ecf20Sopenharmony_ci * which'll hapen in the event handler; 28598c2ecf20Sopenharmony_ci * reshape will happen forward, so space has to 28608c2ecf20Sopenharmony_ci * be available at the beginning of each disk 28618c2ecf20Sopenharmony_ci * 28628c2ecf20Sopenharmony_ci * - in case of removing disk(s), array size 28638c2ecf20Sopenharmony_ci * has to shrink before starting the reshape, 28648c2ecf20Sopenharmony_ci * which'll happen here; 28658c2ecf20Sopenharmony_ci * reshape will happen backward, so space has to 28668c2ecf20Sopenharmony_ci * be available at the end of each disk 28678c2ecf20Sopenharmony_ci * 28688c2ecf20Sopenharmony_ci * - data_offset and new_data_offset are 28698c2ecf20Sopenharmony_ci * adjusted for aforementioned out of place 28708c2ecf20Sopenharmony_ci * reshaping based on userspace passing in 28718c2ecf20Sopenharmony_ci * the "data_offset <sectors>" key/value 28728c2ecf20Sopenharmony_ci * pair via the constructor 28738c2ecf20Sopenharmony_ci */ 28748c2ecf20Sopenharmony_ci 28758c2ecf20Sopenharmony_ci /* Add disk(s) */ 28768c2ecf20Sopenharmony_ci if (rs->delta_disks > 0) { 28778c2ecf20Sopenharmony_ci /* Prepare disks for check in raid4/5/6/10 {check|start}_reshape */ 28788c2ecf20Sopenharmony_ci for (d = cur_raid_devs; d < rs->raid_disks; d++) { 28798c2ecf20Sopenharmony_ci rdev = &rs->dev[d].rdev; 28808c2ecf20Sopenharmony_ci clear_bit(In_sync, &rdev->flags); 28818c2ecf20Sopenharmony_ci 28828c2ecf20Sopenharmony_ci /* 28838c2ecf20Sopenharmony_ci * save_raid_disk needs to be -1, or recovery_offset will be set to 0 28848c2ecf20Sopenharmony_ci * by md, which'll store that erroneously in the superblock on reshape 28858c2ecf20Sopenharmony_ci */ 28868c2ecf20Sopenharmony_ci rdev->saved_raid_disk = -1; 28878c2ecf20Sopenharmony_ci rdev->raid_disk = d; 28888c2ecf20Sopenharmony_ci 28898c2ecf20Sopenharmony_ci rdev->sectors = mddev->dev_sectors; 28908c2ecf20Sopenharmony_ci rdev->recovery_offset = rs_is_raid1(rs) ? 0 : MaxSector; 28918c2ecf20Sopenharmony_ci } 28928c2ecf20Sopenharmony_ci 28938c2ecf20Sopenharmony_ci mddev->reshape_backwards = 0; /* adding disk(s) -> forward reshape */ 28948c2ecf20Sopenharmony_ci 28958c2ecf20Sopenharmony_ci /* Remove disk(s) */ 28968c2ecf20Sopenharmony_ci } else if (rs->delta_disks < 0) { 28978c2ecf20Sopenharmony_ci r = rs_set_dev_and_array_sectors(rs, rs->ti->len, true); 28988c2ecf20Sopenharmony_ci mddev->reshape_backwards = 1; /* removing disk(s) -> backward reshape */ 28998c2ecf20Sopenharmony_ci 29008c2ecf20Sopenharmony_ci /* Change layout and/or chunk size */ 29018c2ecf20Sopenharmony_ci } else { 29028c2ecf20Sopenharmony_ci /* 29038c2ecf20Sopenharmony_ci * Reshape layout (e.g. raid5_ls -> raid5_n) and/or chunk size: 29048c2ecf20Sopenharmony_ci * 29058c2ecf20Sopenharmony_ci * keeping number of disks and do layout change -> 29068c2ecf20Sopenharmony_ci * 29078c2ecf20Sopenharmony_ci * toggle reshape_backward depending on data_offset: 29088c2ecf20Sopenharmony_ci * 29098c2ecf20Sopenharmony_ci * - free space upfront -> reshape forward 29108c2ecf20Sopenharmony_ci * 29118c2ecf20Sopenharmony_ci * - free space at the end -> reshape backward 29128c2ecf20Sopenharmony_ci * 29138c2ecf20Sopenharmony_ci * 29148c2ecf20Sopenharmony_ci * This utilizes free reshape space avoiding the need 29158c2ecf20Sopenharmony_ci * for userspace to move (parts of) LV segments in 29168c2ecf20Sopenharmony_ci * case of layout/chunksize change (for disk 29178c2ecf20Sopenharmony_ci * adding/removing reshape space has to be at 29188c2ecf20Sopenharmony_ci * the proper address (see above with delta_disks): 29198c2ecf20Sopenharmony_ci * 29208c2ecf20Sopenharmony_ci * add disk(s) -> begin 29218c2ecf20Sopenharmony_ci * remove disk(s)-> end 29228c2ecf20Sopenharmony_ci */ 29238c2ecf20Sopenharmony_ci mddev->reshape_backwards = rs->dev[0].rdev.data_offset ? 0 : 1; 29248c2ecf20Sopenharmony_ci } 29258c2ecf20Sopenharmony_ci 29268c2ecf20Sopenharmony_ci /* 29278c2ecf20Sopenharmony_ci * Adjust device size for forward reshape 29288c2ecf20Sopenharmony_ci * because md_finish_reshape() reduces it. 29298c2ecf20Sopenharmony_ci */ 29308c2ecf20Sopenharmony_ci if (!mddev->reshape_backwards) 29318c2ecf20Sopenharmony_ci rdev_for_each(rdev, &rs->md) 29328c2ecf20Sopenharmony_ci if (!test_bit(Journal, &rdev->flags)) 29338c2ecf20Sopenharmony_ci rdev->sectors += reshape_sectors; 29348c2ecf20Sopenharmony_ci 29358c2ecf20Sopenharmony_ci return r; 29368c2ecf20Sopenharmony_ci} 29378c2ecf20Sopenharmony_ci 29388c2ecf20Sopenharmony_ci/* 29398c2ecf20Sopenharmony_ci * If the md resync thread has updated superblock with max reshape position 29408c2ecf20Sopenharmony_ci * at the end of a reshape but not (yet) reset the layout configuration 29418c2ecf20Sopenharmony_ci * changes -> reset the latter. 29428c2ecf20Sopenharmony_ci */ 29438c2ecf20Sopenharmony_cistatic void rs_reset_inconclusive_reshape(struct raid_set *rs) 29448c2ecf20Sopenharmony_ci{ 29458c2ecf20Sopenharmony_ci if (!rs_is_reshaping(rs) && rs_is_layout_change(rs, true)) { 29468c2ecf20Sopenharmony_ci rs_set_cur(rs); 29478c2ecf20Sopenharmony_ci rs->md.delta_disks = 0; 29488c2ecf20Sopenharmony_ci rs->md.reshape_backwards = 0; 29498c2ecf20Sopenharmony_ci } 29508c2ecf20Sopenharmony_ci} 29518c2ecf20Sopenharmony_ci 29528c2ecf20Sopenharmony_ci/* 29538c2ecf20Sopenharmony_ci * Enable/disable discard support on RAID set depending on 29548c2ecf20Sopenharmony_ci * RAID level and discard properties of underlying RAID members. 29558c2ecf20Sopenharmony_ci */ 29568c2ecf20Sopenharmony_cistatic void configure_discard_support(struct raid_set *rs) 29578c2ecf20Sopenharmony_ci{ 29588c2ecf20Sopenharmony_ci int i; 29598c2ecf20Sopenharmony_ci bool raid456; 29608c2ecf20Sopenharmony_ci struct dm_target *ti = rs->ti; 29618c2ecf20Sopenharmony_ci 29628c2ecf20Sopenharmony_ci /* 29638c2ecf20Sopenharmony_ci * XXX: RAID level 4,5,6 require zeroing for safety. 29648c2ecf20Sopenharmony_ci */ 29658c2ecf20Sopenharmony_ci raid456 = rs_is_raid456(rs); 29668c2ecf20Sopenharmony_ci 29678c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) { 29688c2ecf20Sopenharmony_ci struct request_queue *q; 29698c2ecf20Sopenharmony_ci 29708c2ecf20Sopenharmony_ci if (!rs->dev[i].rdev.bdev) 29718c2ecf20Sopenharmony_ci continue; 29728c2ecf20Sopenharmony_ci 29738c2ecf20Sopenharmony_ci q = bdev_get_queue(rs->dev[i].rdev.bdev); 29748c2ecf20Sopenharmony_ci if (!q || !blk_queue_discard(q)) 29758c2ecf20Sopenharmony_ci return; 29768c2ecf20Sopenharmony_ci 29778c2ecf20Sopenharmony_ci if (raid456) { 29788c2ecf20Sopenharmony_ci if (!devices_handle_discard_safely) { 29798c2ecf20Sopenharmony_ci DMERR("raid456 discard support disabled due to discard_zeroes_data uncertainty."); 29808c2ecf20Sopenharmony_ci DMERR("Set dm-raid.devices_handle_discard_safely=Y to override."); 29818c2ecf20Sopenharmony_ci return; 29828c2ecf20Sopenharmony_ci } 29838c2ecf20Sopenharmony_ci } 29848c2ecf20Sopenharmony_ci } 29858c2ecf20Sopenharmony_ci 29868c2ecf20Sopenharmony_ci ti->num_discard_bios = 1; 29878c2ecf20Sopenharmony_ci} 29888c2ecf20Sopenharmony_ci 29898c2ecf20Sopenharmony_ci/* 29908c2ecf20Sopenharmony_ci * Construct a RAID0/1/10/4/5/6 mapping: 29918c2ecf20Sopenharmony_ci * Args: 29928c2ecf20Sopenharmony_ci * <raid_type> <#raid_params> <raid_params>{0,} \ 29938c2ecf20Sopenharmony_ci * <#raid_devs> [<meta_dev1> <dev1>]{1,} 29948c2ecf20Sopenharmony_ci * 29958c2ecf20Sopenharmony_ci * <raid_params> varies by <raid_type>. See 'parse_raid_params' for 29968c2ecf20Sopenharmony_ci * details on possible <raid_params>. 29978c2ecf20Sopenharmony_ci * 29988c2ecf20Sopenharmony_ci * Userspace is free to initialize the metadata devices, hence the superblocks to 29998c2ecf20Sopenharmony_ci * enforce recreation based on the passed in table parameters. 30008c2ecf20Sopenharmony_ci * 30018c2ecf20Sopenharmony_ci */ 30028c2ecf20Sopenharmony_cistatic int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) 30038c2ecf20Sopenharmony_ci{ 30048c2ecf20Sopenharmony_ci int r; 30058c2ecf20Sopenharmony_ci bool resize = false; 30068c2ecf20Sopenharmony_ci struct raid_type *rt; 30078c2ecf20Sopenharmony_ci unsigned int num_raid_params, num_raid_devs; 30088c2ecf20Sopenharmony_ci sector_t sb_array_sectors, rdev_sectors, reshape_sectors; 30098c2ecf20Sopenharmony_ci struct raid_set *rs = NULL; 30108c2ecf20Sopenharmony_ci const char *arg; 30118c2ecf20Sopenharmony_ci struct rs_layout rs_layout; 30128c2ecf20Sopenharmony_ci struct dm_arg_set as = { argc, argv }, as_nrd; 30138c2ecf20Sopenharmony_ci struct dm_arg _args[] = { 30148c2ecf20Sopenharmony_ci { 0, as.argc, "Cannot understand number of raid parameters" }, 30158c2ecf20Sopenharmony_ci { 1, 254, "Cannot understand number of raid devices parameters" } 30168c2ecf20Sopenharmony_ci }; 30178c2ecf20Sopenharmony_ci 30188c2ecf20Sopenharmony_ci arg = dm_shift_arg(&as); 30198c2ecf20Sopenharmony_ci if (!arg) { 30208c2ecf20Sopenharmony_ci ti->error = "No arguments"; 30218c2ecf20Sopenharmony_ci return -EINVAL; 30228c2ecf20Sopenharmony_ci } 30238c2ecf20Sopenharmony_ci 30248c2ecf20Sopenharmony_ci rt = get_raid_type(arg); 30258c2ecf20Sopenharmony_ci if (!rt) { 30268c2ecf20Sopenharmony_ci ti->error = "Unrecognised raid_type"; 30278c2ecf20Sopenharmony_ci return -EINVAL; 30288c2ecf20Sopenharmony_ci } 30298c2ecf20Sopenharmony_ci 30308c2ecf20Sopenharmony_ci /* Must have <#raid_params> */ 30318c2ecf20Sopenharmony_ci if (dm_read_arg_group(_args, &as, &num_raid_params, &ti->error)) 30328c2ecf20Sopenharmony_ci return -EINVAL; 30338c2ecf20Sopenharmony_ci 30348c2ecf20Sopenharmony_ci /* number of raid device tupples <meta_dev data_dev> */ 30358c2ecf20Sopenharmony_ci as_nrd = as; 30368c2ecf20Sopenharmony_ci dm_consume_args(&as_nrd, num_raid_params); 30378c2ecf20Sopenharmony_ci _args[1].max = (as_nrd.argc - 1) / 2; 30388c2ecf20Sopenharmony_ci if (dm_read_arg(_args + 1, &as_nrd, &num_raid_devs, &ti->error)) 30398c2ecf20Sopenharmony_ci return -EINVAL; 30408c2ecf20Sopenharmony_ci 30418c2ecf20Sopenharmony_ci if (!__within_range(num_raid_devs, 1, MAX_RAID_DEVICES)) { 30428c2ecf20Sopenharmony_ci ti->error = "Invalid number of supplied raid devices"; 30438c2ecf20Sopenharmony_ci return -EINVAL; 30448c2ecf20Sopenharmony_ci } 30458c2ecf20Sopenharmony_ci 30468c2ecf20Sopenharmony_ci rs = raid_set_alloc(ti, rt, num_raid_devs); 30478c2ecf20Sopenharmony_ci if (IS_ERR(rs)) 30488c2ecf20Sopenharmony_ci return PTR_ERR(rs); 30498c2ecf20Sopenharmony_ci 30508c2ecf20Sopenharmony_ci r = parse_raid_params(rs, &as, num_raid_params); 30518c2ecf20Sopenharmony_ci if (r) 30528c2ecf20Sopenharmony_ci goto bad; 30538c2ecf20Sopenharmony_ci 30548c2ecf20Sopenharmony_ci r = parse_dev_params(rs, &as); 30558c2ecf20Sopenharmony_ci if (r) 30568c2ecf20Sopenharmony_ci goto bad; 30578c2ecf20Sopenharmony_ci 30588c2ecf20Sopenharmony_ci rs->md.sync_super = super_sync; 30598c2ecf20Sopenharmony_ci 30608c2ecf20Sopenharmony_ci /* 30618c2ecf20Sopenharmony_ci * Calculate ctr requested array and device sizes to allow 30628c2ecf20Sopenharmony_ci * for superblock analysis needing device sizes defined. 30638c2ecf20Sopenharmony_ci * 30648c2ecf20Sopenharmony_ci * Any existing superblock will overwrite the array and device sizes 30658c2ecf20Sopenharmony_ci */ 30668c2ecf20Sopenharmony_ci r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false); 30678c2ecf20Sopenharmony_ci if (r) 30688c2ecf20Sopenharmony_ci goto bad; 30698c2ecf20Sopenharmony_ci 30708c2ecf20Sopenharmony_ci /* Memorize just calculated, potentially larger sizes to grow the raid set in preresume */ 30718c2ecf20Sopenharmony_ci rs->array_sectors = rs->md.array_sectors; 30728c2ecf20Sopenharmony_ci rs->dev_sectors = rs->md.dev_sectors; 30738c2ecf20Sopenharmony_ci 30748c2ecf20Sopenharmony_ci /* 30758c2ecf20Sopenharmony_ci * Backup any new raid set level, layout, ... 30768c2ecf20Sopenharmony_ci * requested to be able to compare to superblock 30778c2ecf20Sopenharmony_ci * members for conversion decisions. 30788c2ecf20Sopenharmony_ci */ 30798c2ecf20Sopenharmony_ci rs_config_backup(rs, &rs_layout); 30808c2ecf20Sopenharmony_ci 30818c2ecf20Sopenharmony_ci r = analyse_superblocks(ti, rs); 30828c2ecf20Sopenharmony_ci if (r) 30838c2ecf20Sopenharmony_ci goto bad; 30848c2ecf20Sopenharmony_ci 30858c2ecf20Sopenharmony_ci /* All in-core metadata now as of current superblocks after calling analyse_superblocks() */ 30868c2ecf20Sopenharmony_ci sb_array_sectors = rs->md.array_sectors; 30878c2ecf20Sopenharmony_ci rdev_sectors = __rdev_sectors(rs); 30888c2ecf20Sopenharmony_ci if (!rdev_sectors) { 30898c2ecf20Sopenharmony_ci ti->error = "Invalid rdev size"; 30908c2ecf20Sopenharmony_ci r = -EINVAL; 30918c2ecf20Sopenharmony_ci goto bad; 30928c2ecf20Sopenharmony_ci } 30938c2ecf20Sopenharmony_ci 30948c2ecf20Sopenharmony_ci 30958c2ecf20Sopenharmony_ci reshape_sectors = _get_reshape_sectors(rs); 30968c2ecf20Sopenharmony_ci if (rs->dev_sectors != rdev_sectors) { 30978c2ecf20Sopenharmony_ci resize = (rs->dev_sectors != rdev_sectors - reshape_sectors); 30988c2ecf20Sopenharmony_ci if (rs->dev_sectors > rdev_sectors - reshape_sectors) 30998c2ecf20Sopenharmony_ci set_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); 31008c2ecf20Sopenharmony_ci } 31018c2ecf20Sopenharmony_ci 31028c2ecf20Sopenharmony_ci INIT_WORK(&rs->md.event_work, do_table_event); 31038c2ecf20Sopenharmony_ci ti->private = rs; 31048c2ecf20Sopenharmony_ci ti->num_flush_bios = 1; 31058c2ecf20Sopenharmony_ci 31068c2ecf20Sopenharmony_ci /* Restore any requested new layout for conversion decision */ 31078c2ecf20Sopenharmony_ci rs_config_restore(rs, &rs_layout); 31088c2ecf20Sopenharmony_ci 31098c2ecf20Sopenharmony_ci /* 31108c2ecf20Sopenharmony_ci * Now that we have any superblock metadata available, 31118c2ecf20Sopenharmony_ci * check for new, recovering, reshaping, to be taken over, 31128c2ecf20Sopenharmony_ci * to be reshaped or an existing, unchanged raid set to 31138c2ecf20Sopenharmony_ci * run in sequence. 31148c2ecf20Sopenharmony_ci */ 31158c2ecf20Sopenharmony_ci if (test_bit(MD_ARRAY_FIRST_USE, &rs->md.flags)) { 31168c2ecf20Sopenharmony_ci /* A new raid6 set has to be recovered to ensure proper parity and Q-Syndrome */ 31178c2ecf20Sopenharmony_ci if (rs_is_raid6(rs) && 31188c2ecf20Sopenharmony_ci test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) { 31198c2ecf20Sopenharmony_ci ti->error = "'nosync' not allowed for new raid6 set"; 31208c2ecf20Sopenharmony_ci r = -EINVAL; 31218c2ecf20Sopenharmony_ci goto bad; 31228c2ecf20Sopenharmony_ci } 31238c2ecf20Sopenharmony_ci rs_setup_recovery(rs, 0); 31248c2ecf20Sopenharmony_ci set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 31258c2ecf20Sopenharmony_ci rs_set_new(rs); 31268c2ecf20Sopenharmony_ci } else if (rs_is_recovering(rs)) { 31278c2ecf20Sopenharmony_ci /* A recovering raid set may be resized */ 31288c2ecf20Sopenharmony_ci goto size_check; 31298c2ecf20Sopenharmony_ci } else if (rs_is_reshaping(rs)) { 31308c2ecf20Sopenharmony_ci /* Have to reject size change request during reshape */ 31318c2ecf20Sopenharmony_ci if (resize) { 31328c2ecf20Sopenharmony_ci ti->error = "Can't resize a reshaping raid set"; 31338c2ecf20Sopenharmony_ci r = -EPERM; 31348c2ecf20Sopenharmony_ci goto bad; 31358c2ecf20Sopenharmony_ci } 31368c2ecf20Sopenharmony_ci /* skip setup rs */ 31378c2ecf20Sopenharmony_ci } else if (rs_takeover_requested(rs)) { 31388c2ecf20Sopenharmony_ci if (rs_is_reshaping(rs)) { 31398c2ecf20Sopenharmony_ci ti->error = "Can't takeover a reshaping raid set"; 31408c2ecf20Sopenharmony_ci r = -EPERM; 31418c2ecf20Sopenharmony_ci goto bad; 31428c2ecf20Sopenharmony_ci } 31438c2ecf20Sopenharmony_ci 31448c2ecf20Sopenharmony_ci /* We can't takeover a journaled raid4/5/6 */ 31458c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) { 31468c2ecf20Sopenharmony_ci ti->error = "Can't takeover a journaled raid4/5/6 set"; 31478c2ecf20Sopenharmony_ci r = -EPERM; 31488c2ecf20Sopenharmony_ci goto bad; 31498c2ecf20Sopenharmony_ci } 31508c2ecf20Sopenharmony_ci 31518c2ecf20Sopenharmony_ci /* 31528c2ecf20Sopenharmony_ci * If a takeover is needed, userspace sets any additional 31538c2ecf20Sopenharmony_ci * devices to rebuild and we can check for a valid request here. 31548c2ecf20Sopenharmony_ci * 31558c2ecf20Sopenharmony_ci * If acceptible, set the level to the new requested 31568c2ecf20Sopenharmony_ci * one, prohibit requesting recovery, allow the raid 31578c2ecf20Sopenharmony_ci * set to run and store superblocks during resume. 31588c2ecf20Sopenharmony_ci */ 31598c2ecf20Sopenharmony_ci r = rs_check_takeover(rs); 31608c2ecf20Sopenharmony_ci if (r) 31618c2ecf20Sopenharmony_ci goto bad; 31628c2ecf20Sopenharmony_ci 31638c2ecf20Sopenharmony_ci r = rs_setup_takeover(rs); 31648c2ecf20Sopenharmony_ci if (r) 31658c2ecf20Sopenharmony_ci goto bad; 31668c2ecf20Sopenharmony_ci 31678c2ecf20Sopenharmony_ci set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 31688c2ecf20Sopenharmony_ci /* Takeover ain't recovery, so disable recovery */ 31698c2ecf20Sopenharmony_ci rs_setup_recovery(rs, MaxSector); 31708c2ecf20Sopenharmony_ci rs_set_new(rs); 31718c2ecf20Sopenharmony_ci } else if (rs_reshape_requested(rs)) { 31728c2ecf20Sopenharmony_ci /* Only request grow on raid set size extensions, not on reshapes. */ 31738c2ecf20Sopenharmony_ci clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); 31748c2ecf20Sopenharmony_ci 31758c2ecf20Sopenharmony_ci /* 31768c2ecf20Sopenharmony_ci * No need to check for 'ongoing' takeover here, because takeover 31778c2ecf20Sopenharmony_ci * is an instant operation as oposed to an ongoing reshape. 31788c2ecf20Sopenharmony_ci */ 31798c2ecf20Sopenharmony_ci 31808c2ecf20Sopenharmony_ci /* We can't reshape a journaled raid4/5/6 */ 31818c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) { 31828c2ecf20Sopenharmony_ci ti->error = "Can't reshape a journaled raid4/5/6 set"; 31838c2ecf20Sopenharmony_ci r = -EPERM; 31848c2ecf20Sopenharmony_ci goto bad; 31858c2ecf20Sopenharmony_ci } 31868c2ecf20Sopenharmony_ci 31878c2ecf20Sopenharmony_ci /* Out-of-place space has to be available to allow for a reshape unless raid1! */ 31888c2ecf20Sopenharmony_ci if (reshape_sectors || rs_is_raid1(rs)) { 31898c2ecf20Sopenharmony_ci /* 31908c2ecf20Sopenharmony_ci * We can only prepare for a reshape here, because the 31918c2ecf20Sopenharmony_ci * raid set needs to run to provide the repective reshape 31928c2ecf20Sopenharmony_ci * check functions via its MD personality instance. 31938c2ecf20Sopenharmony_ci * 31948c2ecf20Sopenharmony_ci * So do the reshape check after md_run() succeeded. 31958c2ecf20Sopenharmony_ci */ 31968c2ecf20Sopenharmony_ci r = rs_prepare_reshape(rs); 31978c2ecf20Sopenharmony_ci if (r) 31988c2ecf20Sopenharmony_ci goto bad; 31998c2ecf20Sopenharmony_ci 32008c2ecf20Sopenharmony_ci /* Reshaping ain't recovery, so disable recovery */ 32018c2ecf20Sopenharmony_ci rs_setup_recovery(rs, MaxSector); 32028c2ecf20Sopenharmony_ci } 32038c2ecf20Sopenharmony_ci rs_set_cur(rs); 32048c2ecf20Sopenharmony_ci } else { 32058c2ecf20Sopenharmony_cisize_check: 32068c2ecf20Sopenharmony_ci /* May not set recovery when a device rebuild is requested */ 32078c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) { 32088c2ecf20Sopenharmony_ci clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); 32098c2ecf20Sopenharmony_ci set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 32108c2ecf20Sopenharmony_ci rs_setup_recovery(rs, MaxSector); 32118c2ecf20Sopenharmony_ci } else if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) { 32128c2ecf20Sopenharmony_ci /* 32138c2ecf20Sopenharmony_ci * Set raid set to current size, i.e. size as of 32148c2ecf20Sopenharmony_ci * superblocks to grow to larger size in preresume. 32158c2ecf20Sopenharmony_ci */ 32168c2ecf20Sopenharmony_ci r = rs_set_dev_and_array_sectors(rs, sb_array_sectors, false); 32178c2ecf20Sopenharmony_ci if (r) 32188c2ecf20Sopenharmony_ci goto bad; 32198c2ecf20Sopenharmony_ci 32208c2ecf20Sopenharmony_ci rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors); 32218c2ecf20Sopenharmony_ci } else { 32228c2ecf20Sopenharmony_ci /* This is no size change or it is shrinking, update size and record in superblocks */ 32238c2ecf20Sopenharmony_ci r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false); 32248c2ecf20Sopenharmony_ci if (r) 32258c2ecf20Sopenharmony_ci goto bad; 32268c2ecf20Sopenharmony_ci 32278c2ecf20Sopenharmony_ci if (sb_array_sectors > rs->array_sectors) 32288c2ecf20Sopenharmony_ci set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 32298c2ecf20Sopenharmony_ci } 32308c2ecf20Sopenharmony_ci rs_set_cur(rs); 32318c2ecf20Sopenharmony_ci } 32328c2ecf20Sopenharmony_ci 32338c2ecf20Sopenharmony_ci /* If constructor requested it, change data and new_data offsets */ 32348c2ecf20Sopenharmony_ci r = rs_adjust_data_offsets(rs); 32358c2ecf20Sopenharmony_ci if (r) 32368c2ecf20Sopenharmony_ci goto bad; 32378c2ecf20Sopenharmony_ci 32388c2ecf20Sopenharmony_ci /* Catch any inconclusive reshape superblock content. */ 32398c2ecf20Sopenharmony_ci rs_reset_inconclusive_reshape(rs); 32408c2ecf20Sopenharmony_ci 32418c2ecf20Sopenharmony_ci /* Start raid set read-only and assumed clean to change in raid_resume() */ 32428c2ecf20Sopenharmony_ci rs->md.ro = 1; 32438c2ecf20Sopenharmony_ci rs->md.in_sync = 1; 32448c2ecf20Sopenharmony_ci 32458c2ecf20Sopenharmony_ci /* Keep array frozen until resume. */ 32468c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); 32478c2ecf20Sopenharmony_ci 32488c2ecf20Sopenharmony_ci /* Has to be held on running the array */ 32498c2ecf20Sopenharmony_ci mddev_lock_nointr(&rs->md); 32508c2ecf20Sopenharmony_ci r = md_run(&rs->md); 32518c2ecf20Sopenharmony_ci rs->md.in_sync = 0; /* Assume already marked dirty */ 32528c2ecf20Sopenharmony_ci if (r) { 32538c2ecf20Sopenharmony_ci ti->error = "Failed to run raid array"; 32548c2ecf20Sopenharmony_ci mddev_unlock(&rs->md); 32558c2ecf20Sopenharmony_ci goto bad; 32568c2ecf20Sopenharmony_ci } 32578c2ecf20Sopenharmony_ci 32588c2ecf20Sopenharmony_ci r = md_start(&rs->md); 32598c2ecf20Sopenharmony_ci if (r) { 32608c2ecf20Sopenharmony_ci ti->error = "Failed to start raid array"; 32618c2ecf20Sopenharmony_ci goto bad_unlock; 32628c2ecf20Sopenharmony_ci } 32638c2ecf20Sopenharmony_ci 32648c2ecf20Sopenharmony_ci /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */ 32658c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) { 32668c2ecf20Sopenharmony_ci r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode); 32678c2ecf20Sopenharmony_ci if (r) { 32688c2ecf20Sopenharmony_ci ti->error = "Failed to set raid4/5/6 journal mode"; 32698c2ecf20Sopenharmony_ci goto bad_unlock; 32708c2ecf20Sopenharmony_ci } 32718c2ecf20Sopenharmony_ci } 32728c2ecf20Sopenharmony_ci 32738c2ecf20Sopenharmony_ci mddev_suspend(&rs->md); 32748c2ecf20Sopenharmony_ci set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags); 32758c2ecf20Sopenharmony_ci 32768c2ecf20Sopenharmony_ci /* Try to adjust the raid4/5/6 stripe cache size to the stripe size */ 32778c2ecf20Sopenharmony_ci if (rs_is_raid456(rs)) { 32788c2ecf20Sopenharmony_ci r = rs_set_raid456_stripe_cache(rs); 32798c2ecf20Sopenharmony_ci if (r) 32808c2ecf20Sopenharmony_ci goto bad_unlock; 32818c2ecf20Sopenharmony_ci } 32828c2ecf20Sopenharmony_ci 32838c2ecf20Sopenharmony_ci /* Now do an early reshape check */ 32848c2ecf20Sopenharmony_ci if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) { 32858c2ecf20Sopenharmony_ci r = rs_check_reshape(rs); 32868c2ecf20Sopenharmony_ci if (r) 32878c2ecf20Sopenharmony_ci goto bad_unlock; 32888c2ecf20Sopenharmony_ci 32898c2ecf20Sopenharmony_ci /* Restore new, ctr requested layout to perform check */ 32908c2ecf20Sopenharmony_ci rs_config_restore(rs, &rs_layout); 32918c2ecf20Sopenharmony_ci 32928c2ecf20Sopenharmony_ci if (rs->md.pers->start_reshape) { 32938c2ecf20Sopenharmony_ci r = rs->md.pers->check_reshape(&rs->md); 32948c2ecf20Sopenharmony_ci if (r) { 32958c2ecf20Sopenharmony_ci ti->error = "Reshape check failed"; 32968c2ecf20Sopenharmony_ci goto bad_unlock; 32978c2ecf20Sopenharmony_ci } 32988c2ecf20Sopenharmony_ci } 32998c2ecf20Sopenharmony_ci } 33008c2ecf20Sopenharmony_ci 33018c2ecf20Sopenharmony_ci /* Disable/enable discard support on raid set. */ 33028c2ecf20Sopenharmony_ci configure_discard_support(rs); 33038c2ecf20Sopenharmony_ci 33048c2ecf20Sopenharmony_ci mddev_unlock(&rs->md); 33058c2ecf20Sopenharmony_ci return 0; 33068c2ecf20Sopenharmony_ci 33078c2ecf20Sopenharmony_cibad_unlock: 33088c2ecf20Sopenharmony_ci md_stop(&rs->md); 33098c2ecf20Sopenharmony_ci mddev_unlock(&rs->md); 33108c2ecf20Sopenharmony_cibad: 33118c2ecf20Sopenharmony_ci raid_set_free(rs); 33128c2ecf20Sopenharmony_ci 33138c2ecf20Sopenharmony_ci return r; 33148c2ecf20Sopenharmony_ci} 33158c2ecf20Sopenharmony_ci 33168c2ecf20Sopenharmony_cistatic void raid_dtr(struct dm_target *ti) 33178c2ecf20Sopenharmony_ci{ 33188c2ecf20Sopenharmony_ci struct raid_set *rs = ti->private; 33198c2ecf20Sopenharmony_ci 33208c2ecf20Sopenharmony_ci mddev_lock_nointr(&rs->md); 33218c2ecf20Sopenharmony_ci md_stop(&rs->md); 33228c2ecf20Sopenharmony_ci mddev_unlock(&rs->md); 33238c2ecf20Sopenharmony_ci raid_set_free(rs); 33248c2ecf20Sopenharmony_ci} 33258c2ecf20Sopenharmony_ci 33268c2ecf20Sopenharmony_cistatic int raid_map(struct dm_target *ti, struct bio *bio) 33278c2ecf20Sopenharmony_ci{ 33288c2ecf20Sopenharmony_ci struct raid_set *rs = ti->private; 33298c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 33308c2ecf20Sopenharmony_ci 33318c2ecf20Sopenharmony_ci /* 33328c2ecf20Sopenharmony_ci * If we're reshaping to add disk(s)), ti->len and 33338c2ecf20Sopenharmony_ci * mddev->array_sectors will differ during the process 33348c2ecf20Sopenharmony_ci * (ti->len > mddev->array_sectors), so we have to requeue 33358c2ecf20Sopenharmony_ci * bios with addresses > mddev->array_sectors here or 33368c2ecf20Sopenharmony_ci * there will occur accesses past EOD of the component 33378c2ecf20Sopenharmony_ci * data images thus erroring the raid set. 33388c2ecf20Sopenharmony_ci */ 33398c2ecf20Sopenharmony_ci if (unlikely(bio_end_sector(bio) > mddev->array_sectors)) 33408c2ecf20Sopenharmony_ci return DM_MAPIO_REQUEUE; 33418c2ecf20Sopenharmony_ci 33428c2ecf20Sopenharmony_ci md_handle_request(mddev, bio); 33438c2ecf20Sopenharmony_ci 33448c2ecf20Sopenharmony_ci return DM_MAPIO_SUBMITTED; 33458c2ecf20Sopenharmony_ci} 33468c2ecf20Sopenharmony_ci 33478c2ecf20Sopenharmony_ci/* Return sync state string for @state */ 33488c2ecf20Sopenharmony_cienum sync_state { st_frozen, st_reshape, st_resync, st_check, st_repair, st_recover, st_idle }; 33498c2ecf20Sopenharmony_cistatic const char *sync_str(enum sync_state state) 33508c2ecf20Sopenharmony_ci{ 33518c2ecf20Sopenharmony_ci /* Has to be in above sync_state order! */ 33528c2ecf20Sopenharmony_ci static const char *sync_strs[] = { 33538c2ecf20Sopenharmony_ci "frozen", 33548c2ecf20Sopenharmony_ci "reshape", 33558c2ecf20Sopenharmony_ci "resync", 33568c2ecf20Sopenharmony_ci "check", 33578c2ecf20Sopenharmony_ci "repair", 33588c2ecf20Sopenharmony_ci "recover", 33598c2ecf20Sopenharmony_ci "idle" 33608c2ecf20Sopenharmony_ci }; 33618c2ecf20Sopenharmony_ci 33628c2ecf20Sopenharmony_ci return __within_range(state, 0, ARRAY_SIZE(sync_strs) - 1) ? sync_strs[state] : "undef"; 33638c2ecf20Sopenharmony_ci}; 33648c2ecf20Sopenharmony_ci 33658c2ecf20Sopenharmony_ci/* Return enum sync_state for @mddev derived from @recovery flags */ 33668c2ecf20Sopenharmony_cistatic enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery) 33678c2ecf20Sopenharmony_ci{ 33688c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_FROZEN, &recovery)) 33698c2ecf20Sopenharmony_ci return st_frozen; 33708c2ecf20Sopenharmony_ci 33718c2ecf20Sopenharmony_ci /* The MD sync thread can be done with io or be interrupted but still be running */ 33728c2ecf20Sopenharmony_ci if (!test_bit(MD_RECOVERY_DONE, &recovery) && 33738c2ecf20Sopenharmony_ci (test_bit(MD_RECOVERY_RUNNING, &recovery) || 33748c2ecf20Sopenharmony_ci (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) { 33758c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) 33768c2ecf20Sopenharmony_ci return st_reshape; 33778c2ecf20Sopenharmony_ci 33788c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_SYNC, &recovery)) { 33798c2ecf20Sopenharmony_ci if (!test_bit(MD_RECOVERY_REQUESTED, &recovery)) 33808c2ecf20Sopenharmony_ci return st_resync; 33818c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_CHECK, &recovery)) 33828c2ecf20Sopenharmony_ci return st_check; 33838c2ecf20Sopenharmony_ci return st_repair; 33848c2ecf20Sopenharmony_ci } 33858c2ecf20Sopenharmony_ci 33868c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_RECOVER, &recovery)) 33878c2ecf20Sopenharmony_ci return st_recover; 33888c2ecf20Sopenharmony_ci 33898c2ecf20Sopenharmony_ci if (mddev->reshape_position != MaxSector) 33908c2ecf20Sopenharmony_ci return st_reshape; 33918c2ecf20Sopenharmony_ci } 33928c2ecf20Sopenharmony_ci 33938c2ecf20Sopenharmony_ci return st_idle; 33948c2ecf20Sopenharmony_ci} 33958c2ecf20Sopenharmony_ci 33968c2ecf20Sopenharmony_ci/* 33978c2ecf20Sopenharmony_ci * Return status string for @rdev 33988c2ecf20Sopenharmony_ci * 33998c2ecf20Sopenharmony_ci * Status characters: 34008c2ecf20Sopenharmony_ci * 34018c2ecf20Sopenharmony_ci * 'D' = Dead/Failed raid set component or raid4/5/6 journal device 34028c2ecf20Sopenharmony_ci * 'a' = Alive but not in-sync raid set component _or_ alive raid4/5/6 'write_back' journal device 34038c2ecf20Sopenharmony_ci * 'A' = Alive and in-sync raid set component _or_ alive raid4/5/6 'write_through' journal device 34048c2ecf20Sopenharmony_ci * '-' = Non-existing device (i.e. uspace passed '- -' into the ctr) 34058c2ecf20Sopenharmony_ci */ 34068c2ecf20Sopenharmony_cistatic const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev) 34078c2ecf20Sopenharmony_ci{ 34088c2ecf20Sopenharmony_ci if (!rdev->bdev) 34098c2ecf20Sopenharmony_ci return "-"; 34108c2ecf20Sopenharmony_ci else if (test_bit(Faulty, &rdev->flags)) 34118c2ecf20Sopenharmony_ci return "D"; 34128c2ecf20Sopenharmony_ci else if (test_bit(Journal, &rdev->flags)) 34138c2ecf20Sopenharmony_ci return (rs->journal_dev.mode == R5C_JOURNAL_MODE_WRITE_THROUGH) ? "A" : "a"; 34148c2ecf20Sopenharmony_ci else if (test_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags) || 34158c2ecf20Sopenharmony_ci (!test_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags) && 34168c2ecf20Sopenharmony_ci !test_bit(In_sync, &rdev->flags))) 34178c2ecf20Sopenharmony_ci return "a"; 34188c2ecf20Sopenharmony_ci else 34198c2ecf20Sopenharmony_ci return "A"; 34208c2ecf20Sopenharmony_ci} 34218c2ecf20Sopenharmony_ci 34228c2ecf20Sopenharmony_ci/* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */ 34238c2ecf20Sopenharmony_cistatic sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, 34248c2ecf20Sopenharmony_ci enum sync_state state, sector_t resync_max_sectors) 34258c2ecf20Sopenharmony_ci{ 34268c2ecf20Sopenharmony_ci sector_t r; 34278c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 34288c2ecf20Sopenharmony_ci 34298c2ecf20Sopenharmony_ci clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); 34308c2ecf20Sopenharmony_ci clear_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); 34318c2ecf20Sopenharmony_ci 34328c2ecf20Sopenharmony_ci if (rs_is_raid0(rs)) { 34338c2ecf20Sopenharmony_ci r = resync_max_sectors; 34348c2ecf20Sopenharmony_ci set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); 34358c2ecf20Sopenharmony_ci 34368c2ecf20Sopenharmony_ci } else { 34378c2ecf20Sopenharmony_ci if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery)) 34388c2ecf20Sopenharmony_ci r = mddev->recovery_cp; 34398c2ecf20Sopenharmony_ci else 34408c2ecf20Sopenharmony_ci r = mddev->curr_resync_completed; 34418c2ecf20Sopenharmony_ci 34428c2ecf20Sopenharmony_ci if (state == st_idle && r >= resync_max_sectors) { 34438c2ecf20Sopenharmony_ci /* 34448c2ecf20Sopenharmony_ci * Sync complete. 34458c2ecf20Sopenharmony_ci */ 34468c2ecf20Sopenharmony_ci /* In case we have finished recovering, the array is in sync. */ 34478c2ecf20Sopenharmony_ci if (test_bit(MD_RECOVERY_RECOVER, &recovery)) 34488c2ecf20Sopenharmony_ci set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); 34498c2ecf20Sopenharmony_ci 34508c2ecf20Sopenharmony_ci } else if (state == st_recover) 34518c2ecf20Sopenharmony_ci /* 34528c2ecf20Sopenharmony_ci * In case we are recovering, the array is not in sync 34538c2ecf20Sopenharmony_ci * and health chars should show the recovering legs. 34548c2ecf20Sopenharmony_ci * 34558c2ecf20Sopenharmony_ci * Already retrieved recovery offset from curr_resync_completed above. 34568c2ecf20Sopenharmony_ci */ 34578c2ecf20Sopenharmony_ci ; 34588c2ecf20Sopenharmony_ci 34598c2ecf20Sopenharmony_ci else if (state == st_resync || state == st_reshape) 34608c2ecf20Sopenharmony_ci /* 34618c2ecf20Sopenharmony_ci * If "resync/reshape" is occurring, the raid set 34628c2ecf20Sopenharmony_ci * is or may be out of sync hence the health 34638c2ecf20Sopenharmony_ci * characters shall be 'a'. 34648c2ecf20Sopenharmony_ci */ 34658c2ecf20Sopenharmony_ci set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); 34668c2ecf20Sopenharmony_ci 34678c2ecf20Sopenharmony_ci else if (state == st_check || state == st_repair) 34688c2ecf20Sopenharmony_ci /* 34698c2ecf20Sopenharmony_ci * If "check" or "repair" is occurring, the raid set has 34708c2ecf20Sopenharmony_ci * undergone an initial sync and the health characters 34718c2ecf20Sopenharmony_ci * should not be 'a' anymore. 34728c2ecf20Sopenharmony_ci */ 34738c2ecf20Sopenharmony_ci set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); 34748c2ecf20Sopenharmony_ci 34758c2ecf20Sopenharmony_ci else if (test_bit(MD_RECOVERY_NEEDED, &recovery)) 34768c2ecf20Sopenharmony_ci /* 34778c2ecf20Sopenharmony_ci * We are idle and recovery is needed, prevent 'A' chars race 34788c2ecf20Sopenharmony_ci * caused by components still set to in-sync by constructor. 34798c2ecf20Sopenharmony_ci */ 34808c2ecf20Sopenharmony_ci set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); 34818c2ecf20Sopenharmony_ci 34828c2ecf20Sopenharmony_ci else { 34838c2ecf20Sopenharmony_ci /* 34848c2ecf20Sopenharmony_ci * We are idle and the raid set may be doing an initial 34858c2ecf20Sopenharmony_ci * sync, or it may be rebuilding individual components. 34868c2ecf20Sopenharmony_ci * If all the devices are In_sync, then it is the raid set 34878c2ecf20Sopenharmony_ci * that is being initialized. 34888c2ecf20Sopenharmony_ci */ 34898c2ecf20Sopenharmony_ci struct md_rdev *rdev; 34908c2ecf20Sopenharmony_ci 34918c2ecf20Sopenharmony_ci set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); 34928c2ecf20Sopenharmony_ci rdev_for_each(rdev, mddev) 34938c2ecf20Sopenharmony_ci if (!test_bit(Journal, &rdev->flags) && 34948c2ecf20Sopenharmony_ci !test_bit(In_sync, &rdev->flags)) { 34958c2ecf20Sopenharmony_ci clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); 34968c2ecf20Sopenharmony_ci break; 34978c2ecf20Sopenharmony_ci } 34988c2ecf20Sopenharmony_ci } 34998c2ecf20Sopenharmony_ci } 35008c2ecf20Sopenharmony_ci 35018c2ecf20Sopenharmony_ci return min(r, resync_max_sectors); 35028c2ecf20Sopenharmony_ci} 35038c2ecf20Sopenharmony_ci 35048c2ecf20Sopenharmony_ci/* Helper to return @dev name or "-" if !@dev */ 35058c2ecf20Sopenharmony_cistatic const char *__get_dev_name(struct dm_dev *dev) 35068c2ecf20Sopenharmony_ci{ 35078c2ecf20Sopenharmony_ci return dev ? dev->name : "-"; 35088c2ecf20Sopenharmony_ci} 35098c2ecf20Sopenharmony_ci 35108c2ecf20Sopenharmony_cistatic void raid_status(struct dm_target *ti, status_type_t type, 35118c2ecf20Sopenharmony_ci unsigned int status_flags, char *result, unsigned int maxlen) 35128c2ecf20Sopenharmony_ci{ 35138c2ecf20Sopenharmony_ci struct raid_set *rs = ti->private; 35148c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 35158c2ecf20Sopenharmony_ci struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL; 35168c2ecf20Sopenharmony_ci int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0; 35178c2ecf20Sopenharmony_ci unsigned long recovery; 35188c2ecf20Sopenharmony_ci unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */ 35198c2ecf20Sopenharmony_ci unsigned int sz = 0; 35208c2ecf20Sopenharmony_ci unsigned int rebuild_writemostly_count = 0; 35218c2ecf20Sopenharmony_ci sector_t progress, resync_max_sectors, resync_mismatches; 35228c2ecf20Sopenharmony_ci enum sync_state state; 35238c2ecf20Sopenharmony_ci struct raid_type *rt; 35248c2ecf20Sopenharmony_ci 35258c2ecf20Sopenharmony_ci switch (type) { 35268c2ecf20Sopenharmony_ci case STATUSTYPE_INFO: 35278c2ecf20Sopenharmony_ci /* *Should* always succeed */ 35288c2ecf20Sopenharmony_ci rt = get_raid_type_by_ll(mddev->new_level, mddev->new_layout); 35298c2ecf20Sopenharmony_ci if (!rt) 35308c2ecf20Sopenharmony_ci return; 35318c2ecf20Sopenharmony_ci 35328c2ecf20Sopenharmony_ci DMEMIT("%s %d ", rt->name, mddev->raid_disks); 35338c2ecf20Sopenharmony_ci 35348c2ecf20Sopenharmony_ci /* Access most recent mddev properties for status output */ 35358c2ecf20Sopenharmony_ci smp_rmb(); 35368c2ecf20Sopenharmony_ci /* Get sensible max sectors even if raid set not yet started */ 35378c2ecf20Sopenharmony_ci resync_max_sectors = test_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags) ? 35388c2ecf20Sopenharmony_ci mddev->resync_max_sectors : mddev->dev_sectors; 35398c2ecf20Sopenharmony_ci recovery = rs->md.recovery; 35408c2ecf20Sopenharmony_ci state = decipher_sync_action(mddev, recovery); 35418c2ecf20Sopenharmony_ci progress = rs_get_progress(rs, recovery, state, resync_max_sectors); 35428c2ecf20Sopenharmony_ci resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ? 35438c2ecf20Sopenharmony_ci atomic64_read(&mddev->resync_mismatches) : 0; 35448c2ecf20Sopenharmony_ci 35458c2ecf20Sopenharmony_ci /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */ 35468c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) 35478c2ecf20Sopenharmony_ci DMEMIT(__raid_dev_status(rs, &rs->dev[i].rdev)); 35488c2ecf20Sopenharmony_ci 35498c2ecf20Sopenharmony_ci /* 35508c2ecf20Sopenharmony_ci * In-sync/Reshape ratio: 35518c2ecf20Sopenharmony_ci * The in-sync ratio shows the progress of: 35528c2ecf20Sopenharmony_ci * - Initializing the raid set 35538c2ecf20Sopenharmony_ci * - Rebuilding a subset of devices of the raid set 35548c2ecf20Sopenharmony_ci * The user can distinguish between the two by referring 35558c2ecf20Sopenharmony_ci * to the status characters. 35568c2ecf20Sopenharmony_ci * 35578c2ecf20Sopenharmony_ci * The reshape ratio shows the progress of 35588c2ecf20Sopenharmony_ci * changing the raid layout or the number of 35598c2ecf20Sopenharmony_ci * disks of a raid set 35608c2ecf20Sopenharmony_ci */ 35618c2ecf20Sopenharmony_ci DMEMIT(" %llu/%llu", (unsigned long long) progress, 35628c2ecf20Sopenharmony_ci (unsigned long long) resync_max_sectors); 35638c2ecf20Sopenharmony_ci 35648c2ecf20Sopenharmony_ci /* 35658c2ecf20Sopenharmony_ci * v1.5.0+: 35668c2ecf20Sopenharmony_ci * 35678c2ecf20Sopenharmony_ci * Sync action: 35688c2ecf20Sopenharmony_ci * See Documentation/admin-guide/device-mapper/dm-raid.rst for 35698c2ecf20Sopenharmony_ci * information on each of these states. 35708c2ecf20Sopenharmony_ci */ 35718c2ecf20Sopenharmony_ci DMEMIT(" %s", sync_str(state)); 35728c2ecf20Sopenharmony_ci 35738c2ecf20Sopenharmony_ci /* 35748c2ecf20Sopenharmony_ci * v1.5.0+: 35758c2ecf20Sopenharmony_ci * 35768c2ecf20Sopenharmony_ci * resync_mismatches/mismatch_cnt 35778c2ecf20Sopenharmony_ci * This field shows the number of discrepancies found when 35788c2ecf20Sopenharmony_ci * performing a "check" of the raid set. 35798c2ecf20Sopenharmony_ci */ 35808c2ecf20Sopenharmony_ci DMEMIT(" %llu", (unsigned long long) resync_mismatches); 35818c2ecf20Sopenharmony_ci 35828c2ecf20Sopenharmony_ci /* 35838c2ecf20Sopenharmony_ci * v1.9.0+: 35848c2ecf20Sopenharmony_ci * 35858c2ecf20Sopenharmony_ci * data_offset (needed for out of space reshaping) 35868c2ecf20Sopenharmony_ci * This field shows the data offset into the data 35878c2ecf20Sopenharmony_ci * image LV where the first stripes data starts. 35888c2ecf20Sopenharmony_ci * 35898c2ecf20Sopenharmony_ci * We keep data_offset equal on all raid disks of the set, 35908c2ecf20Sopenharmony_ci * so retrieving it from the first raid disk is sufficient. 35918c2ecf20Sopenharmony_ci */ 35928c2ecf20Sopenharmony_ci DMEMIT(" %llu", (unsigned long long) rs->dev[0].rdev.data_offset); 35938c2ecf20Sopenharmony_ci 35948c2ecf20Sopenharmony_ci /* 35958c2ecf20Sopenharmony_ci * v1.10.0+: 35968c2ecf20Sopenharmony_ci */ 35978c2ecf20Sopenharmony_ci DMEMIT(" %s", test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 35988c2ecf20Sopenharmony_ci __raid_dev_status(rs, &rs->journal_dev.rdev) : "-"); 35998c2ecf20Sopenharmony_ci break; 36008c2ecf20Sopenharmony_ci 36018c2ecf20Sopenharmony_ci case STATUSTYPE_TABLE: 36028c2ecf20Sopenharmony_ci /* Report the table line string you would use to construct this raid set */ 36038c2ecf20Sopenharmony_ci 36048c2ecf20Sopenharmony_ci /* 36058c2ecf20Sopenharmony_ci * Count any rebuild or writemostly argument pairs and subtract the 36068c2ecf20Sopenharmony_ci * hweight count being added below of any rebuild and writemostly ctr flags. 36078c2ecf20Sopenharmony_ci */ 36088c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) { 36098c2ecf20Sopenharmony_ci rebuild_writemostly_count += (test_bit(i, (void *) rs->rebuild_disks) ? 2 : 0) + 36108c2ecf20Sopenharmony_ci (test_bit(WriteMostly, &rs->dev[i].rdev.flags) ? 2 : 0); 36118c2ecf20Sopenharmony_ci } 36128c2ecf20Sopenharmony_ci rebuild_writemostly_count -= (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) ? 2 : 0) + 36138c2ecf20Sopenharmony_ci (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags) ? 2 : 0); 36148c2ecf20Sopenharmony_ci /* Calculate raid parameter count based on ^ rebuild/writemostly argument counts and ctr flags set. */ 36158c2ecf20Sopenharmony_ci raid_param_cnt += rebuild_writemostly_count + 36168c2ecf20Sopenharmony_ci hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) + 36178c2ecf20Sopenharmony_ci hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2; 36188c2ecf20Sopenharmony_ci /* Emit table line */ 36198c2ecf20Sopenharmony_ci /* This has to be in the documented order for userspace! */ 36208c2ecf20Sopenharmony_ci DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors); 36218c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) 36228c2ecf20Sopenharmony_ci DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_SYNC)); 36238c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) 36248c2ecf20Sopenharmony_ci DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC)); 36258c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) 36268c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) 36278c2ecf20Sopenharmony_ci if (test_bit(i, (void *) rs->rebuild_disks)) 36288c2ecf20Sopenharmony_ci DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD), i); 36298c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) 36308c2ecf20Sopenharmony_ci DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP), 36318c2ecf20Sopenharmony_ci mddev->bitmap_info.daemon_sleep); 36328c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) 36338c2ecf20Sopenharmony_ci DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE), 36348c2ecf20Sopenharmony_ci mddev->sync_speed_min); 36358c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) 36368c2ecf20Sopenharmony_ci DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE), 36378c2ecf20Sopenharmony_ci mddev->sync_speed_max); 36388c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags)) 36398c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) 36408c2ecf20Sopenharmony_ci if (test_bit(WriteMostly, &rs->dev[i].rdev.flags)) 36418c2ecf20Sopenharmony_ci DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_WRITE_MOSTLY), 36428c2ecf20Sopenharmony_ci rs->dev[i].rdev.raid_disk); 36438c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_MAX_WRITE_BEHIND, &rs->ctr_flags)) 36448c2ecf20Sopenharmony_ci DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_WRITE_BEHIND), 36458c2ecf20Sopenharmony_ci mddev->bitmap_info.max_write_behind); 36468c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags)) 36478c2ecf20Sopenharmony_ci DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_STRIPE_CACHE), 36488c2ecf20Sopenharmony_ci max_nr_stripes); 36498c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags)) 36508c2ecf20Sopenharmony_ci DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE), 36518c2ecf20Sopenharmony_ci (unsigned long long) to_sector(mddev->bitmap_info.chunksize)); 36528c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_RAID10_COPIES, &rs->ctr_flags)) 36538c2ecf20Sopenharmony_ci DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_COPIES), 36548c2ecf20Sopenharmony_ci raid10_md_layout_to_copies(mddev->layout)); 36558c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) 36568c2ecf20Sopenharmony_ci DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT), 36578c2ecf20Sopenharmony_ci raid10_md_layout_to_format(mddev->layout)); 36588c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags)) 36598c2ecf20Sopenharmony_ci DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_DELTA_DISKS), 36608c2ecf20Sopenharmony_ci max(rs->delta_disks, mddev->delta_disks)); 36618c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) 36628c2ecf20Sopenharmony_ci DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET), 36638c2ecf20Sopenharmony_ci (unsigned long long) rs->data_offset); 36648c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) 36658c2ecf20Sopenharmony_ci DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV), 36668c2ecf20Sopenharmony_ci __get_dev_name(rs->journal_dev.dev)); 36678c2ecf20Sopenharmony_ci if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) 36688c2ecf20Sopenharmony_ci DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_MODE), 36698c2ecf20Sopenharmony_ci md_journal_mode_to_dm_raid(rs->journal_dev.mode)); 36708c2ecf20Sopenharmony_ci DMEMIT(" %d", rs->raid_disks); 36718c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) 36728c2ecf20Sopenharmony_ci DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev), 36738c2ecf20Sopenharmony_ci __get_dev_name(rs->dev[i].data_dev)); 36748c2ecf20Sopenharmony_ci } 36758c2ecf20Sopenharmony_ci} 36768c2ecf20Sopenharmony_ci 36778c2ecf20Sopenharmony_cistatic int raid_message(struct dm_target *ti, unsigned int argc, char **argv, 36788c2ecf20Sopenharmony_ci char *result, unsigned maxlen) 36798c2ecf20Sopenharmony_ci{ 36808c2ecf20Sopenharmony_ci struct raid_set *rs = ti->private; 36818c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 36828c2ecf20Sopenharmony_ci 36838c2ecf20Sopenharmony_ci if (!mddev->pers || !mddev->pers->sync_request) 36848c2ecf20Sopenharmony_ci return -EINVAL; 36858c2ecf20Sopenharmony_ci 36868c2ecf20Sopenharmony_ci if (!strcasecmp(argv[0], "frozen")) 36878c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 36888c2ecf20Sopenharmony_ci else 36898c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 36908c2ecf20Sopenharmony_ci 36918c2ecf20Sopenharmony_ci if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) { 36928c2ecf20Sopenharmony_ci if (mddev->sync_thread) { 36938c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_INTR, &mddev->recovery); 36948c2ecf20Sopenharmony_ci md_reap_sync_thread(mddev); 36958c2ecf20Sopenharmony_ci } 36968c2ecf20Sopenharmony_ci } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle) 36978c2ecf20Sopenharmony_ci return -EBUSY; 36988c2ecf20Sopenharmony_ci else if (!strcasecmp(argv[0], "resync")) 36998c2ecf20Sopenharmony_ci ; /* MD_RECOVERY_NEEDED set below */ 37008c2ecf20Sopenharmony_ci else if (!strcasecmp(argv[0], "recover")) 37018c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 37028c2ecf20Sopenharmony_ci else { 37038c2ecf20Sopenharmony_ci if (!strcasecmp(argv[0], "check")) { 37048c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_CHECK, &mddev->recovery); 37058c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); 37068c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_SYNC, &mddev->recovery); 37078c2ecf20Sopenharmony_ci } else if (!strcasecmp(argv[0], "repair")) { 37088c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); 37098c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_SYNC, &mddev->recovery); 37108c2ecf20Sopenharmony_ci } else 37118c2ecf20Sopenharmony_ci return -EINVAL; 37128c2ecf20Sopenharmony_ci } 37138c2ecf20Sopenharmony_ci if (mddev->ro == 2) { 37148c2ecf20Sopenharmony_ci /* A write to sync_action is enough to justify 37158c2ecf20Sopenharmony_ci * canceling read-auto mode 37168c2ecf20Sopenharmony_ci */ 37178c2ecf20Sopenharmony_ci mddev->ro = 0; 37188c2ecf20Sopenharmony_ci if (!mddev->suspended && mddev->sync_thread) 37198c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->sync_thread); 37208c2ecf20Sopenharmony_ci } 37218c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 37228c2ecf20Sopenharmony_ci if (!mddev->suspended && mddev->thread) 37238c2ecf20Sopenharmony_ci md_wakeup_thread(mddev->thread); 37248c2ecf20Sopenharmony_ci 37258c2ecf20Sopenharmony_ci return 0; 37268c2ecf20Sopenharmony_ci} 37278c2ecf20Sopenharmony_ci 37288c2ecf20Sopenharmony_cistatic int raid_iterate_devices(struct dm_target *ti, 37298c2ecf20Sopenharmony_ci iterate_devices_callout_fn fn, void *data) 37308c2ecf20Sopenharmony_ci{ 37318c2ecf20Sopenharmony_ci struct raid_set *rs = ti->private; 37328c2ecf20Sopenharmony_ci unsigned int i; 37338c2ecf20Sopenharmony_ci int r = 0; 37348c2ecf20Sopenharmony_ci 37358c2ecf20Sopenharmony_ci for (i = 0; !r && i < rs->raid_disks; i++) { 37368c2ecf20Sopenharmony_ci if (rs->dev[i].data_dev) { 37378c2ecf20Sopenharmony_ci r = fn(ti, rs->dev[i].data_dev, 37388c2ecf20Sopenharmony_ci 0, /* No offset on data devs */ 37398c2ecf20Sopenharmony_ci rs->md.dev_sectors, data); 37408c2ecf20Sopenharmony_ci } 37418c2ecf20Sopenharmony_ci } 37428c2ecf20Sopenharmony_ci 37438c2ecf20Sopenharmony_ci return r; 37448c2ecf20Sopenharmony_ci} 37458c2ecf20Sopenharmony_ci 37468c2ecf20Sopenharmony_cistatic void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) 37478c2ecf20Sopenharmony_ci{ 37488c2ecf20Sopenharmony_ci struct raid_set *rs = ti->private; 37498c2ecf20Sopenharmony_ci unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors); 37508c2ecf20Sopenharmony_ci 37518c2ecf20Sopenharmony_ci blk_limits_io_min(limits, chunk_size_bytes); 37528c2ecf20Sopenharmony_ci blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); 37538c2ecf20Sopenharmony_ci 37548c2ecf20Sopenharmony_ci /* 37558c2ecf20Sopenharmony_ci * RAID0 and RAID10 personalities require bio splitting, 37568c2ecf20Sopenharmony_ci * RAID1/4/5/6 don't and process large discard bios properly. 37578c2ecf20Sopenharmony_ci */ 37588c2ecf20Sopenharmony_ci if (rs_is_raid0(rs) || rs_is_raid10(rs)) { 37598c2ecf20Sopenharmony_ci limits->discard_granularity = chunk_size_bytes; 37608c2ecf20Sopenharmony_ci limits->max_discard_sectors = rs->md.chunk_sectors; 37618c2ecf20Sopenharmony_ci } 37628c2ecf20Sopenharmony_ci} 37638c2ecf20Sopenharmony_ci 37648c2ecf20Sopenharmony_cistatic void raid_postsuspend(struct dm_target *ti) 37658c2ecf20Sopenharmony_ci{ 37668c2ecf20Sopenharmony_ci struct raid_set *rs = ti->private; 37678c2ecf20Sopenharmony_ci 37688c2ecf20Sopenharmony_ci if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { 37698c2ecf20Sopenharmony_ci /* Writes have to be stopped before suspending to avoid deadlocks. */ 37708c2ecf20Sopenharmony_ci if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery)) 37718c2ecf20Sopenharmony_ci md_stop_writes(&rs->md); 37728c2ecf20Sopenharmony_ci 37738c2ecf20Sopenharmony_ci mddev_lock_nointr(&rs->md); 37748c2ecf20Sopenharmony_ci mddev_suspend(&rs->md); 37758c2ecf20Sopenharmony_ci mddev_unlock(&rs->md); 37768c2ecf20Sopenharmony_ci } 37778c2ecf20Sopenharmony_ci} 37788c2ecf20Sopenharmony_ci 37798c2ecf20Sopenharmony_cistatic void attempt_restore_of_faulty_devices(struct raid_set *rs) 37808c2ecf20Sopenharmony_ci{ 37818c2ecf20Sopenharmony_ci int i; 37828c2ecf20Sopenharmony_ci uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS]; 37838c2ecf20Sopenharmony_ci unsigned long flags; 37848c2ecf20Sopenharmony_ci bool cleared = false; 37858c2ecf20Sopenharmony_ci struct dm_raid_superblock *sb; 37868c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 37878c2ecf20Sopenharmony_ci struct md_rdev *r; 37888c2ecf20Sopenharmony_ci 37898c2ecf20Sopenharmony_ci /* RAID personalities have to provide hot add/remove methods or we need to bail out. */ 37908c2ecf20Sopenharmony_ci if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk) 37918c2ecf20Sopenharmony_ci return; 37928c2ecf20Sopenharmony_ci 37938c2ecf20Sopenharmony_ci memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); 37948c2ecf20Sopenharmony_ci 37958c2ecf20Sopenharmony_ci for (i = 0; i < rs->raid_disks; i++) { 37968c2ecf20Sopenharmony_ci r = &rs->dev[i].rdev; 37978c2ecf20Sopenharmony_ci /* HM FIXME: enhance journal device recovery processing */ 37988c2ecf20Sopenharmony_ci if (test_bit(Journal, &r->flags)) 37998c2ecf20Sopenharmony_ci continue; 38008c2ecf20Sopenharmony_ci 38018c2ecf20Sopenharmony_ci if (test_bit(Faulty, &r->flags) && 38028c2ecf20Sopenharmony_ci r->meta_bdev && !read_disk_sb(r, r->sb_size, true)) { 38038c2ecf20Sopenharmony_ci DMINFO("Faulty %s device #%d has readable super block." 38048c2ecf20Sopenharmony_ci " Attempting to revive it.", 38058c2ecf20Sopenharmony_ci rs->raid_type->name, i); 38068c2ecf20Sopenharmony_ci 38078c2ecf20Sopenharmony_ci /* 38088c2ecf20Sopenharmony_ci * Faulty bit may be set, but sometimes the array can 38098c2ecf20Sopenharmony_ci * be suspended before the personalities can respond 38108c2ecf20Sopenharmony_ci * by removing the device from the array (i.e. calling 38118c2ecf20Sopenharmony_ci * 'hot_remove_disk'). If they haven't yet removed 38128c2ecf20Sopenharmony_ci * the failed device, its 'raid_disk' number will be 38138c2ecf20Sopenharmony_ci * '>= 0' - meaning we must call this function 38148c2ecf20Sopenharmony_ci * ourselves. 38158c2ecf20Sopenharmony_ci */ 38168c2ecf20Sopenharmony_ci flags = r->flags; 38178c2ecf20Sopenharmony_ci clear_bit(In_sync, &r->flags); /* Mandatory for hot remove. */ 38188c2ecf20Sopenharmony_ci if (r->raid_disk >= 0) { 38198c2ecf20Sopenharmony_ci if (mddev->pers->hot_remove_disk(mddev, r)) { 38208c2ecf20Sopenharmony_ci /* Failed to revive this device, try next */ 38218c2ecf20Sopenharmony_ci r->flags = flags; 38228c2ecf20Sopenharmony_ci continue; 38238c2ecf20Sopenharmony_ci } 38248c2ecf20Sopenharmony_ci } else 38258c2ecf20Sopenharmony_ci r->raid_disk = r->saved_raid_disk = i; 38268c2ecf20Sopenharmony_ci 38278c2ecf20Sopenharmony_ci clear_bit(Faulty, &r->flags); 38288c2ecf20Sopenharmony_ci clear_bit(WriteErrorSeen, &r->flags); 38298c2ecf20Sopenharmony_ci 38308c2ecf20Sopenharmony_ci if (mddev->pers->hot_add_disk(mddev, r)) { 38318c2ecf20Sopenharmony_ci /* Failed to revive this device, try next */ 38328c2ecf20Sopenharmony_ci r->raid_disk = r->saved_raid_disk = -1; 38338c2ecf20Sopenharmony_ci r->flags = flags; 38348c2ecf20Sopenharmony_ci } else { 38358c2ecf20Sopenharmony_ci clear_bit(In_sync, &r->flags); 38368c2ecf20Sopenharmony_ci r->recovery_offset = 0; 38378c2ecf20Sopenharmony_ci set_bit(i, (void *) cleared_failed_devices); 38388c2ecf20Sopenharmony_ci cleared = true; 38398c2ecf20Sopenharmony_ci } 38408c2ecf20Sopenharmony_ci } 38418c2ecf20Sopenharmony_ci } 38428c2ecf20Sopenharmony_ci 38438c2ecf20Sopenharmony_ci /* If any failed devices could be cleared, update all sbs failed_devices bits */ 38448c2ecf20Sopenharmony_ci if (cleared) { 38458c2ecf20Sopenharmony_ci uint64_t failed_devices[DISKS_ARRAY_ELEMS]; 38468c2ecf20Sopenharmony_ci 38478c2ecf20Sopenharmony_ci rdev_for_each(r, &rs->md) { 38488c2ecf20Sopenharmony_ci if (test_bit(Journal, &r->flags)) 38498c2ecf20Sopenharmony_ci continue; 38508c2ecf20Sopenharmony_ci 38518c2ecf20Sopenharmony_ci sb = page_address(r->sb_page); 38528c2ecf20Sopenharmony_ci sb_retrieve_failed_devices(sb, failed_devices); 38538c2ecf20Sopenharmony_ci 38548c2ecf20Sopenharmony_ci for (i = 0; i < DISKS_ARRAY_ELEMS; i++) 38558c2ecf20Sopenharmony_ci failed_devices[i] &= ~cleared_failed_devices[i]; 38568c2ecf20Sopenharmony_ci 38578c2ecf20Sopenharmony_ci sb_update_failed_devices(sb, failed_devices); 38588c2ecf20Sopenharmony_ci } 38598c2ecf20Sopenharmony_ci } 38608c2ecf20Sopenharmony_ci} 38618c2ecf20Sopenharmony_ci 38628c2ecf20Sopenharmony_cistatic int __load_dirty_region_bitmap(struct raid_set *rs) 38638c2ecf20Sopenharmony_ci{ 38648c2ecf20Sopenharmony_ci int r = 0; 38658c2ecf20Sopenharmony_ci 38668c2ecf20Sopenharmony_ci /* Try loading the bitmap unless "raid0", which does not have one */ 38678c2ecf20Sopenharmony_ci if (!rs_is_raid0(rs) && 38688c2ecf20Sopenharmony_ci !test_and_set_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags)) { 38698c2ecf20Sopenharmony_ci r = md_bitmap_load(&rs->md); 38708c2ecf20Sopenharmony_ci if (r) 38718c2ecf20Sopenharmony_ci DMERR("Failed to load bitmap"); 38728c2ecf20Sopenharmony_ci } 38738c2ecf20Sopenharmony_ci 38748c2ecf20Sopenharmony_ci return r; 38758c2ecf20Sopenharmony_ci} 38768c2ecf20Sopenharmony_ci 38778c2ecf20Sopenharmony_ci/* Enforce updating all superblocks */ 38788c2ecf20Sopenharmony_cistatic void rs_update_sbs(struct raid_set *rs) 38798c2ecf20Sopenharmony_ci{ 38808c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 38818c2ecf20Sopenharmony_ci int ro = mddev->ro; 38828c2ecf20Sopenharmony_ci 38838c2ecf20Sopenharmony_ci set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 38848c2ecf20Sopenharmony_ci mddev->ro = 0; 38858c2ecf20Sopenharmony_ci md_update_sb(mddev, 1); 38868c2ecf20Sopenharmony_ci mddev->ro = ro; 38878c2ecf20Sopenharmony_ci} 38888c2ecf20Sopenharmony_ci 38898c2ecf20Sopenharmony_ci/* 38908c2ecf20Sopenharmony_ci * Reshape changes raid algorithm of @rs to new one within personality 38918c2ecf20Sopenharmony_ci * (e.g. raid6_zr -> raid6_nc), changes stripe size, adds/removes 38928c2ecf20Sopenharmony_ci * disks from a raid set thus growing/shrinking it or resizes the set 38938c2ecf20Sopenharmony_ci * 38948c2ecf20Sopenharmony_ci * Call mddev_lock_nointr() before! 38958c2ecf20Sopenharmony_ci */ 38968c2ecf20Sopenharmony_cistatic int rs_start_reshape(struct raid_set *rs) 38978c2ecf20Sopenharmony_ci{ 38988c2ecf20Sopenharmony_ci int r; 38998c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 39008c2ecf20Sopenharmony_ci struct md_personality *pers = mddev->pers; 39018c2ecf20Sopenharmony_ci 39028c2ecf20Sopenharmony_ci /* Don't allow the sync thread to work until the table gets reloaded. */ 39038c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_WAIT, &mddev->recovery); 39048c2ecf20Sopenharmony_ci 39058c2ecf20Sopenharmony_ci r = rs_setup_reshape(rs); 39068c2ecf20Sopenharmony_ci if (r) 39078c2ecf20Sopenharmony_ci return r; 39088c2ecf20Sopenharmony_ci 39098c2ecf20Sopenharmony_ci /* 39108c2ecf20Sopenharmony_ci * Check any reshape constraints enforced by the personalility 39118c2ecf20Sopenharmony_ci * 39128c2ecf20Sopenharmony_ci * May as well already kick the reshape off so that * pers->start_reshape() becomes optional. 39138c2ecf20Sopenharmony_ci */ 39148c2ecf20Sopenharmony_ci r = pers->check_reshape(mddev); 39158c2ecf20Sopenharmony_ci if (r) { 39168c2ecf20Sopenharmony_ci rs->ti->error = "pers->check_reshape() failed"; 39178c2ecf20Sopenharmony_ci return r; 39188c2ecf20Sopenharmony_ci } 39198c2ecf20Sopenharmony_ci 39208c2ecf20Sopenharmony_ci /* 39218c2ecf20Sopenharmony_ci * Personality may not provide start reshape method in which 39228c2ecf20Sopenharmony_ci * case check_reshape above has already covered everything 39238c2ecf20Sopenharmony_ci */ 39248c2ecf20Sopenharmony_ci if (pers->start_reshape) { 39258c2ecf20Sopenharmony_ci r = pers->start_reshape(mddev); 39268c2ecf20Sopenharmony_ci if (r) { 39278c2ecf20Sopenharmony_ci rs->ti->error = "pers->start_reshape() failed"; 39288c2ecf20Sopenharmony_ci return r; 39298c2ecf20Sopenharmony_ci } 39308c2ecf20Sopenharmony_ci } 39318c2ecf20Sopenharmony_ci 39328c2ecf20Sopenharmony_ci /* 39338c2ecf20Sopenharmony_ci * Now reshape got set up, update superblocks to 39348c2ecf20Sopenharmony_ci * reflect the fact so that a table reload will 39358c2ecf20Sopenharmony_ci * access proper superblock content in the ctr. 39368c2ecf20Sopenharmony_ci */ 39378c2ecf20Sopenharmony_ci rs_update_sbs(rs); 39388c2ecf20Sopenharmony_ci 39398c2ecf20Sopenharmony_ci return 0; 39408c2ecf20Sopenharmony_ci} 39418c2ecf20Sopenharmony_ci 39428c2ecf20Sopenharmony_cistatic int raid_preresume(struct dm_target *ti) 39438c2ecf20Sopenharmony_ci{ 39448c2ecf20Sopenharmony_ci int r; 39458c2ecf20Sopenharmony_ci struct raid_set *rs = ti->private; 39468c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 39478c2ecf20Sopenharmony_ci 39488c2ecf20Sopenharmony_ci /* This is a resume after a suspend of the set -> it's already started. */ 39498c2ecf20Sopenharmony_ci if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags)) 39508c2ecf20Sopenharmony_ci return 0; 39518c2ecf20Sopenharmony_ci 39528c2ecf20Sopenharmony_ci /* 39538c2ecf20Sopenharmony_ci * The superblocks need to be updated on disk if the 39548c2ecf20Sopenharmony_ci * array is new or new devices got added (thus zeroed 39558c2ecf20Sopenharmony_ci * out by userspace) or __load_dirty_region_bitmap 39568c2ecf20Sopenharmony_ci * will overwrite them in core with old data or fail. 39578c2ecf20Sopenharmony_ci */ 39588c2ecf20Sopenharmony_ci if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags)) 39598c2ecf20Sopenharmony_ci rs_update_sbs(rs); 39608c2ecf20Sopenharmony_ci 39618c2ecf20Sopenharmony_ci /* Load the bitmap from disk unless raid0 */ 39628c2ecf20Sopenharmony_ci r = __load_dirty_region_bitmap(rs); 39638c2ecf20Sopenharmony_ci if (r) 39648c2ecf20Sopenharmony_ci return r; 39658c2ecf20Sopenharmony_ci 39668c2ecf20Sopenharmony_ci /* We are extending the raid set size, adjust mddev/md_rdev sizes and set capacity. */ 39678c2ecf20Sopenharmony_ci if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) { 39688c2ecf20Sopenharmony_ci mddev->array_sectors = rs->array_sectors; 39698c2ecf20Sopenharmony_ci mddev->dev_sectors = rs->dev_sectors; 39708c2ecf20Sopenharmony_ci rs_set_rdev_sectors(rs); 39718c2ecf20Sopenharmony_ci rs_set_capacity(rs); 39728c2ecf20Sopenharmony_ci } 39738c2ecf20Sopenharmony_ci 39748c2ecf20Sopenharmony_ci /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */ 39758c2ecf20Sopenharmony_ci if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && 39768c2ecf20Sopenharmony_ci (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) || 39778c2ecf20Sopenharmony_ci (rs->requested_bitmap_chunk_sectors && 39788c2ecf20Sopenharmony_ci mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) { 39798c2ecf20Sopenharmony_ci int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize; 39808c2ecf20Sopenharmony_ci 39818c2ecf20Sopenharmony_ci r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0); 39828c2ecf20Sopenharmony_ci if (r) 39838c2ecf20Sopenharmony_ci DMERR("Failed to resize bitmap"); 39848c2ecf20Sopenharmony_ci } 39858c2ecf20Sopenharmony_ci 39868c2ecf20Sopenharmony_ci /* Check for any resize/reshape on @rs and adjust/initiate */ 39878c2ecf20Sopenharmony_ci /* Be prepared for mddev_resume() in raid_resume() */ 39888c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 39898c2ecf20Sopenharmony_ci if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { 39908c2ecf20Sopenharmony_ci set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); 39918c2ecf20Sopenharmony_ci mddev->resync_min = mddev->recovery_cp; 39928c2ecf20Sopenharmony_ci if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) 39938c2ecf20Sopenharmony_ci mddev->resync_max_sectors = mddev->dev_sectors; 39948c2ecf20Sopenharmony_ci } 39958c2ecf20Sopenharmony_ci 39968c2ecf20Sopenharmony_ci /* Check for any reshape request unless new raid set */ 39978c2ecf20Sopenharmony_ci if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) { 39988c2ecf20Sopenharmony_ci /* Initiate a reshape. */ 39998c2ecf20Sopenharmony_ci rs_set_rdev_sectors(rs); 40008c2ecf20Sopenharmony_ci mddev_lock_nointr(mddev); 40018c2ecf20Sopenharmony_ci r = rs_start_reshape(rs); 40028c2ecf20Sopenharmony_ci mddev_unlock(mddev); 40038c2ecf20Sopenharmony_ci if (r) 40048c2ecf20Sopenharmony_ci DMWARN("Failed to check/start reshape, continuing without change"); 40058c2ecf20Sopenharmony_ci r = 0; 40068c2ecf20Sopenharmony_ci } 40078c2ecf20Sopenharmony_ci 40088c2ecf20Sopenharmony_ci return r; 40098c2ecf20Sopenharmony_ci} 40108c2ecf20Sopenharmony_ci 40118c2ecf20Sopenharmony_cistatic void raid_resume(struct dm_target *ti) 40128c2ecf20Sopenharmony_ci{ 40138c2ecf20Sopenharmony_ci struct raid_set *rs = ti->private; 40148c2ecf20Sopenharmony_ci struct mddev *mddev = &rs->md; 40158c2ecf20Sopenharmony_ci 40168c2ecf20Sopenharmony_ci if (test_and_set_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) { 40178c2ecf20Sopenharmony_ci /* 40188c2ecf20Sopenharmony_ci * A secondary resume while the device is active. 40198c2ecf20Sopenharmony_ci * Take this opportunity to check whether any failed 40208c2ecf20Sopenharmony_ci * devices are reachable again. 40218c2ecf20Sopenharmony_ci */ 40228c2ecf20Sopenharmony_ci attempt_restore_of_faulty_devices(rs); 40238c2ecf20Sopenharmony_ci } 40248c2ecf20Sopenharmony_ci 40258c2ecf20Sopenharmony_ci if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) { 40268c2ecf20Sopenharmony_ci /* Only reduce raid set size before running a disk removing reshape. */ 40278c2ecf20Sopenharmony_ci if (mddev->delta_disks < 0) 40288c2ecf20Sopenharmony_ci rs_set_capacity(rs); 40298c2ecf20Sopenharmony_ci 40308c2ecf20Sopenharmony_ci mddev_lock_nointr(mddev); 40318c2ecf20Sopenharmony_ci clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 40328c2ecf20Sopenharmony_ci mddev->ro = 0; 40338c2ecf20Sopenharmony_ci mddev->in_sync = 0; 40348c2ecf20Sopenharmony_ci mddev_resume(mddev); 40358c2ecf20Sopenharmony_ci mddev_unlock(mddev); 40368c2ecf20Sopenharmony_ci } 40378c2ecf20Sopenharmony_ci} 40388c2ecf20Sopenharmony_ci 40398c2ecf20Sopenharmony_cistatic struct target_type raid_target = { 40408c2ecf20Sopenharmony_ci .name = "raid", 40418c2ecf20Sopenharmony_ci .version = {1, 15, 1}, 40428c2ecf20Sopenharmony_ci .module = THIS_MODULE, 40438c2ecf20Sopenharmony_ci .ctr = raid_ctr, 40448c2ecf20Sopenharmony_ci .dtr = raid_dtr, 40458c2ecf20Sopenharmony_ci .map = raid_map, 40468c2ecf20Sopenharmony_ci .status = raid_status, 40478c2ecf20Sopenharmony_ci .message = raid_message, 40488c2ecf20Sopenharmony_ci .iterate_devices = raid_iterate_devices, 40498c2ecf20Sopenharmony_ci .io_hints = raid_io_hints, 40508c2ecf20Sopenharmony_ci .postsuspend = raid_postsuspend, 40518c2ecf20Sopenharmony_ci .preresume = raid_preresume, 40528c2ecf20Sopenharmony_ci .resume = raid_resume, 40538c2ecf20Sopenharmony_ci}; 40548c2ecf20Sopenharmony_ci 40558c2ecf20Sopenharmony_cistatic int __init dm_raid_init(void) 40568c2ecf20Sopenharmony_ci{ 40578c2ecf20Sopenharmony_ci DMINFO("Loading target version %u.%u.%u", 40588c2ecf20Sopenharmony_ci raid_target.version[0], 40598c2ecf20Sopenharmony_ci raid_target.version[1], 40608c2ecf20Sopenharmony_ci raid_target.version[2]); 40618c2ecf20Sopenharmony_ci return dm_register_target(&raid_target); 40628c2ecf20Sopenharmony_ci} 40638c2ecf20Sopenharmony_ci 40648c2ecf20Sopenharmony_cistatic void __exit dm_raid_exit(void) 40658c2ecf20Sopenharmony_ci{ 40668c2ecf20Sopenharmony_ci dm_unregister_target(&raid_target); 40678c2ecf20Sopenharmony_ci} 40688c2ecf20Sopenharmony_ci 40698c2ecf20Sopenharmony_cimodule_init(dm_raid_init); 40708c2ecf20Sopenharmony_cimodule_exit(dm_raid_exit); 40718c2ecf20Sopenharmony_ci 40728c2ecf20Sopenharmony_cimodule_param(devices_handle_discard_safely, bool, 0644); 40738c2ecf20Sopenharmony_ciMODULE_PARM_DESC(devices_handle_discard_safely, 40748c2ecf20Sopenharmony_ci "Set to Y if all devices in each array reliably return zeroes on reads from discarded regions"); 40758c2ecf20Sopenharmony_ci 40768c2ecf20Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " raid0/1/10/4/5/6 target"); 40778c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid0"); 40788c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid1"); 40798c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid10"); 40808c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid4"); 40818c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid5"); 40828c2ecf20Sopenharmony_ciMODULE_ALIAS("dm-raid6"); 40838c2ecf20Sopenharmony_ciMODULE_AUTHOR("Neil Brown <dm-devel@redhat.com>"); 40848c2ecf20Sopenharmony_ciMODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>"); 40858c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 4086