xref: /kernel/linux/linux-5.10/drivers/md/raid10.h (revision 8c2ecf20)
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _RAID10_H
3#define _RAID10_H
4
5/* Note: raid10_info.rdev can be set to NULL asynchronously by
6 * raid10_remove_disk.
7 * There are three safe ways to access raid10_info.rdev.
8 * 1/ when holding mddev->reconfig_mutex
9 * 2/ when resync/recovery/reshape is known to be happening - i.e. in code
10 *    that is called as part of performing resync/recovery/reshape.
11 * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
12 *    and if it is non-NULL, increment rdev->nr_pending before dropping the
13 *    RCU lock.
14 * When .rdev is set to NULL, the nr_pending count checked again and if it has
15 * been incremented, the pointer is put back in .rdev.
16 */
17
18struct raid10_info {
19	struct md_rdev	*rdev, *replacement;
20	sector_t	head_position;
21	int		recovery_disabled;	/* matches
22						 * mddev->recovery_disabled
23						 * when we shouldn't try
24						 * recovering this device.
25						 */
26};
27
28struct r10conf {
29	struct mddev		*mddev;
30	struct raid10_info	*mirrors;
31	struct raid10_info	*mirrors_new, *mirrors_old;
32	spinlock_t		device_lock;
33
34	/* geometry */
35	struct geom {
36		int		raid_disks;
37		int		near_copies;  /* number of copies laid out
38					       * raid0 style */
39		int		far_copies;   /* number of copies laid out
40					       * at large strides across drives
41					       */
42		int		far_offset;   /* far_copies are offset by 1
43					       * stripe instead of many
44					       */
45		sector_t	stride;	      /* distance between far copies.
46					       * This is size / far_copies unless
47					       * far_offset, in which case it is
48					       * 1 stripe.
49					       */
50		int             far_set_size; /* The number of devices in a set,
51					       * where a 'set' are devices that
52					       * contain far/offset copies of
53					       * each other.
54					       */
55		int		chunk_shift; /* shift from chunks to sectors */
56		sector_t	chunk_mask;
57	} prev, geo;
58	int			copies;	      /* near_copies * far_copies.
59					       * must be <= raid_disks
60					       */
61
62	sector_t		dev_sectors;  /* temp copy of
63					       * mddev->dev_sectors */
64	sector_t		reshape_progress;
65	sector_t		reshape_safe;
66	unsigned long		reshape_checkpoint;
67	sector_t		offset_diff;
68
69	struct list_head	retry_list;
70	/* A separate list of r1bio which just need raid_end_bio_io called.
71	 * This mustn't happen for writes which had any errors if the superblock
72	 * needs to be written.
73	 */
74	struct list_head	bio_end_io_list;
75
76	/* queue pending writes and submit them on unplug */
77	struct bio_list		pending_bio_list;
78	int			pending_count;
79
80	spinlock_t		resync_lock;
81	atomic_t		nr_pending;
82	int			nr_waiting;
83	int			nr_queued;
84	int			barrier;
85	int			array_freeze_pending;
86	sector_t		next_resync;
87	int			fullsync;  /* set to 1 if a full sync is needed,
88					    * (fresh device added).
89					    * Cleared when a sync completes.
90					    */
91	int			have_replacement; /* There is at least one
92						   * replacement device.
93						   */
94	wait_queue_head_t	wait_barrier;
95
96	mempool_t		r10bio_pool;
97	mempool_t		r10buf_pool;
98	struct page		*tmppage;
99	struct bio_set		bio_split;
100
101	/* When taking over an array from a different personality, we store
102	 * the new thread here until we fully activate the array.
103	 */
104	struct md_thread	*thread;
105
106	/*
107	 * Keep track of cluster resync window to send to other nodes.
108	 */
109	sector_t		cluster_sync_low;
110	sector_t		cluster_sync_high;
111};
112
113/*
114 * this is our 'private' RAID10 bio.
115 *
116 * it contains information about what kind of IO operations were started
117 * for this RAID10 operation, and about their status:
118 */
119
120struct r10bio {
121	atomic_t		remaining; /* 'have we finished' count,
122					    * used from IRQ handlers
123					    */
124	sector_t		sector;	/* virtual sector number */
125	int			sectors;
126	unsigned long		state;
127	struct mddev		*mddev;
128	/*
129	 * original bio going to /dev/mdx
130	 */
131	struct bio		*master_bio;
132	/*
133	 * if the IO is in READ direction, then this is where we read
134	 */
135	int			read_slot;
136
137	struct list_head	retry_list;
138	/*
139	 * if the IO is in WRITE direction, then multiple bios are used,
140	 * one for each copy.
141	 * When resyncing we also use one for each copy.
142	 * When reconstructing, we use 2 bios, one for read, one for write.
143	 * We choose the number when they are allocated.
144	 * We sometimes need an extra bio to write to the replacement.
145	 */
146	struct r10dev {
147		struct bio	*bio;
148		union {
149			struct bio	*repl_bio; /* used for resync and
150						    * writes */
151			struct md_rdev	*rdev;	   /* used for reads
152						    * (read_slot >= 0) */
153		};
154		sector_t	addr;
155		int		devnum;
156	} devs[];
157};
158
159/* bits for r10bio.state */
160enum r10bio_state {
161	R10BIO_Uptodate,
162	R10BIO_IsSync,
163	R10BIO_IsRecover,
164	R10BIO_IsReshape,
165	R10BIO_Degraded,
166/* Set ReadError on bios that experience a read error
167 * so that raid10d knows what to do with them.
168 */
169	R10BIO_ReadError,
170/* If a write for this request means we can clear some
171 * known-bad-block records, we set this flag.
172 */
173	R10BIO_MadeGood,
174	R10BIO_WriteError,
175/* During a reshape we might be performing IO on the
176 * 'previous' part of the array, in which case this
177 * flag is set
178 */
179	R10BIO_Previous,
180/* failfast devices did receive failfast requests. */
181	R10BIO_FailFast,
182};
183#endif
184