162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 262306a36Sopenharmony_ci#ifndef _RAID10_H 362306a36Sopenharmony_ci#define _RAID10_H 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci/* Note: raid10_info.rdev can be set to NULL asynchronously by 662306a36Sopenharmony_ci * raid10_remove_disk. 762306a36Sopenharmony_ci * There are three safe ways to access raid10_info.rdev. 862306a36Sopenharmony_ci * 1/ when holding mddev->reconfig_mutex 962306a36Sopenharmony_ci * 2/ when resync/recovery/reshape is known to be happening - i.e. in code 1062306a36Sopenharmony_ci * that is called as part of performing resync/recovery/reshape. 1162306a36Sopenharmony_ci * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer 1262306a36Sopenharmony_ci * and if it is non-NULL, increment rdev->nr_pending before dropping the 1362306a36Sopenharmony_ci * RCU lock. 1462306a36Sopenharmony_ci * When .rdev is set to NULL, the nr_pending count checked again and if it has 1562306a36Sopenharmony_ci * been incremented, the pointer is put back in .rdev. 1662306a36Sopenharmony_ci */ 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_cistruct raid10_info { 1962306a36Sopenharmony_ci struct md_rdev *rdev, *replacement; 2062306a36Sopenharmony_ci sector_t head_position; 2162306a36Sopenharmony_ci int recovery_disabled; /* matches 2262306a36Sopenharmony_ci * mddev->recovery_disabled 2362306a36Sopenharmony_ci * when we shouldn't try 2462306a36Sopenharmony_ci * recovering this device. 2562306a36Sopenharmony_ci */ 2662306a36Sopenharmony_ci}; 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_cistruct r10conf { 2962306a36Sopenharmony_ci struct mddev *mddev; 3062306a36Sopenharmony_ci struct raid10_info *mirrors; 3162306a36Sopenharmony_ci struct raid10_info *mirrors_new, *mirrors_old; 3262306a36Sopenharmony_ci spinlock_t device_lock; 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci /* geometry */ 3562306a36Sopenharmony_ci struct geom { 3662306a36Sopenharmony_ci int raid_disks; 3762306a36Sopenharmony_ci int near_copies; /* number of copies laid out 3862306a36Sopenharmony_ci * raid0 style */ 3962306a36Sopenharmony_ci int far_copies; /* number of copies laid out 4062306a36Sopenharmony_ci * at large strides across drives 4162306a36Sopenharmony_ci */ 4262306a36Sopenharmony_ci int far_offset; /* far_copies are offset by 1 4362306a36Sopenharmony_ci * stripe instead of many 4462306a36Sopenharmony_ci */ 4562306a36Sopenharmony_ci sector_t stride; /* distance between far copies. 4662306a36Sopenharmony_ci * This is size / far_copies unless 4762306a36Sopenharmony_ci * far_offset, in which case it is 4862306a36Sopenharmony_ci * 1 stripe. 4962306a36Sopenharmony_ci */ 5062306a36Sopenharmony_ci int far_set_size; /* The number of devices in a set, 5162306a36Sopenharmony_ci * where a 'set' are devices that 5262306a36Sopenharmony_ci * contain far/offset copies of 5362306a36Sopenharmony_ci * each other. 5462306a36Sopenharmony_ci */ 5562306a36Sopenharmony_ci int chunk_shift; /* shift from chunks to sectors */ 5662306a36Sopenharmony_ci sector_t chunk_mask; 5762306a36Sopenharmony_ci } prev, geo; 5862306a36Sopenharmony_ci int copies; /* near_copies * far_copies. 5962306a36Sopenharmony_ci * must be <= raid_disks 6062306a36Sopenharmony_ci */ 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci sector_t dev_sectors; /* temp copy of 6362306a36Sopenharmony_ci * mddev->dev_sectors */ 6462306a36Sopenharmony_ci sector_t reshape_progress; 6562306a36Sopenharmony_ci sector_t reshape_safe; 6662306a36Sopenharmony_ci unsigned long reshape_checkpoint; 6762306a36Sopenharmony_ci sector_t offset_diff; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci struct list_head retry_list; 7062306a36Sopenharmony_ci /* A separate list of r1bio which just need raid_end_bio_io called. 7162306a36Sopenharmony_ci * This mustn't happen for writes which had any errors if the superblock 7262306a36Sopenharmony_ci * needs to be written. 7362306a36Sopenharmony_ci */ 7462306a36Sopenharmony_ci struct list_head bio_end_io_list; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci /* queue pending writes and submit them on unplug */ 7762306a36Sopenharmony_ci struct bio_list pending_bio_list; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci seqlock_t resync_lock; 8062306a36Sopenharmony_ci atomic_t nr_pending; 8162306a36Sopenharmony_ci int nr_waiting; 8262306a36Sopenharmony_ci int nr_queued; 8362306a36Sopenharmony_ci int barrier; 8462306a36Sopenharmony_ci int array_freeze_pending; 8562306a36Sopenharmony_ci sector_t next_resync; 8662306a36Sopenharmony_ci int fullsync; /* set to 1 if a full sync is needed, 8762306a36Sopenharmony_ci * (fresh device added). 8862306a36Sopenharmony_ci * Cleared when a sync completes. 8962306a36Sopenharmony_ci */ 9062306a36Sopenharmony_ci int have_replacement; /* There is at least one 9162306a36Sopenharmony_ci * replacement device. 9262306a36Sopenharmony_ci */ 9362306a36Sopenharmony_ci wait_queue_head_t wait_barrier; 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci mempool_t r10bio_pool; 9662306a36Sopenharmony_ci mempool_t r10buf_pool; 9762306a36Sopenharmony_ci struct page *tmppage; 9862306a36Sopenharmony_ci struct bio_set bio_split; 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci /* When taking over an array from a different personality, we store 10162306a36Sopenharmony_ci * the new thread here until we fully activate the array. 10262306a36Sopenharmony_ci */ 10362306a36Sopenharmony_ci struct md_thread __rcu *thread; 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci /* 10662306a36Sopenharmony_ci * Keep track of cluster resync window to send to other nodes. 10762306a36Sopenharmony_ci */ 10862306a36Sopenharmony_ci sector_t cluster_sync_low; 10962306a36Sopenharmony_ci sector_t cluster_sync_high; 11062306a36Sopenharmony_ci}; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci/* 11362306a36Sopenharmony_ci * this is our 'private' RAID10 bio. 11462306a36Sopenharmony_ci * 11562306a36Sopenharmony_ci * it contains information about what kind of IO operations were started 11662306a36Sopenharmony_ci * for this RAID10 operation, and about their status: 11762306a36Sopenharmony_ci */ 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_cistruct r10bio { 12062306a36Sopenharmony_ci atomic_t remaining; /* 'have we finished' count, 12162306a36Sopenharmony_ci * used from IRQ handlers 12262306a36Sopenharmony_ci */ 12362306a36Sopenharmony_ci sector_t sector; /* virtual sector number */ 12462306a36Sopenharmony_ci int sectors; 12562306a36Sopenharmony_ci unsigned long state; 12662306a36Sopenharmony_ci struct mddev *mddev; 12762306a36Sopenharmony_ci /* 12862306a36Sopenharmony_ci * original bio going to /dev/mdx 12962306a36Sopenharmony_ci */ 13062306a36Sopenharmony_ci struct bio *master_bio; 13162306a36Sopenharmony_ci /* 13262306a36Sopenharmony_ci * if the IO is in READ direction, then this is where we read 13362306a36Sopenharmony_ci */ 13462306a36Sopenharmony_ci int read_slot; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci struct list_head retry_list; 13762306a36Sopenharmony_ci /* 13862306a36Sopenharmony_ci * if the IO is in WRITE direction, then multiple bios are used, 13962306a36Sopenharmony_ci * one for each copy. 14062306a36Sopenharmony_ci * When resyncing we also use one for each copy. 14162306a36Sopenharmony_ci * When reconstructing, we use 2 bios, one for read, one for write. 14262306a36Sopenharmony_ci * We choose the number when they are allocated. 14362306a36Sopenharmony_ci * We sometimes need an extra bio to write to the replacement. 14462306a36Sopenharmony_ci */ 14562306a36Sopenharmony_ci struct r10dev { 14662306a36Sopenharmony_ci struct bio *bio; 14762306a36Sopenharmony_ci union { 14862306a36Sopenharmony_ci struct bio *repl_bio; /* used for resync and 14962306a36Sopenharmony_ci * writes */ 15062306a36Sopenharmony_ci struct md_rdev *rdev; /* used for reads 15162306a36Sopenharmony_ci * (read_slot >= 0) */ 15262306a36Sopenharmony_ci }; 15362306a36Sopenharmony_ci sector_t addr; 15462306a36Sopenharmony_ci int devnum; 15562306a36Sopenharmony_ci } devs[]; 15662306a36Sopenharmony_ci}; 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci/* bits for r10bio.state */ 15962306a36Sopenharmony_cienum r10bio_state { 16062306a36Sopenharmony_ci R10BIO_Uptodate, 16162306a36Sopenharmony_ci R10BIO_IsSync, 16262306a36Sopenharmony_ci R10BIO_IsRecover, 16362306a36Sopenharmony_ci R10BIO_IsReshape, 16462306a36Sopenharmony_ci R10BIO_Degraded, 16562306a36Sopenharmony_ci/* Set ReadError on bios that experience a read error 16662306a36Sopenharmony_ci * so that raid10d knows what to do with them. 16762306a36Sopenharmony_ci */ 16862306a36Sopenharmony_ci R10BIO_ReadError, 16962306a36Sopenharmony_ci/* If a write for this request means we can clear some 17062306a36Sopenharmony_ci * known-bad-block records, we set this flag. 17162306a36Sopenharmony_ci */ 17262306a36Sopenharmony_ci R10BIO_MadeGood, 17362306a36Sopenharmony_ci R10BIO_WriteError, 17462306a36Sopenharmony_ci/* During a reshape we might be performing IO on the 17562306a36Sopenharmony_ci * 'previous' part of the array, in which case this 17662306a36Sopenharmony_ci * flag is set 17762306a36Sopenharmony_ci */ 17862306a36Sopenharmony_ci R10BIO_Previous, 17962306a36Sopenharmony_ci/* failfast devices did receive failfast requests. */ 18062306a36Sopenharmony_ci R10BIO_FailFast, 18162306a36Sopenharmony_ci R10BIO_Discard, 18262306a36Sopenharmony_ci}; 18362306a36Sopenharmony_ci#endif 184