xref: /kernel/linux/linux-6.6/fs/btrfs/raid56.h (revision 62306a36)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Copyright (C) 2012 Fusion-io  All rights reserved.
4 * Copyright (C) 2012 Intel Corp. All rights reserved.
5 */
6
7#ifndef BTRFS_RAID56_H
8#define BTRFS_RAID56_H
9
10#include <linux/workqueue.h>
11#include "volumes.h"
12
13enum btrfs_rbio_ops {
14	BTRFS_RBIO_WRITE,
15	BTRFS_RBIO_READ_REBUILD,
16	BTRFS_RBIO_PARITY_SCRUB,
17};
18
19struct btrfs_raid_bio {
20	struct btrfs_io_context *bioc;
21
22	/*
23	 * While we're doing RMW on a stripe we put it into a hash table so we
24	 * can lock the stripe and merge more rbios into it.
25	 */
26	struct list_head hash_list;
27
28	/* LRU list for the stripe cache */
29	struct list_head stripe_cache;
30
31	/* For scheduling work in the helper threads */
32	struct work_struct work;
33
34	/*
35	 * bio_list and bio_list_lock are used to add more bios into the stripe
36	 * in hopes of avoiding the full RMW
37	 */
38	struct bio_list bio_list;
39	spinlock_t bio_list_lock;
40
41	/*
42	 * Also protected by the bio_list_lock, the plug list is used by the
43	 * plugging code to collect partial bios while plugged.  The stripe
44	 * locking code also uses it to hand off the stripe lock to the next
45	 * pending IO.
46	 */
47	struct list_head plug_list;
48
49	/* Flags that tell us if it is safe to merge with this bio. */
50	unsigned long flags;
51
52	/*
53	 * Set if we're doing a parity rebuild for a read from higher up, which
54	 * is handled differently from a parity rebuild as part of RMW.
55	 */
56	enum btrfs_rbio_ops operation;
57
58	/* How many pages there are for the full stripe including P/Q */
59	u16 nr_pages;
60
61	/* How many sectors there are for the full stripe including P/Q */
62	u16 nr_sectors;
63
64	/* Number of data stripes (no p/q) */
65	u8 nr_data;
66
67	/* Number of all stripes (including P/Q) */
68	u8 real_stripes;
69
70	/* How many pages there are for each stripe */
71	u8 stripe_npages;
72
73	/* How many sectors there are for each stripe */
74	u8 stripe_nsectors;
75
76	/* Stripe number that we're scrubbing  */
77	u8 scrubp;
78
79	/*
80	 * Size of all the bios in the bio_list.  This helps us decide if the
81	 * rbio maps to a full stripe or not.
82	 */
83	int bio_list_bytes;
84
85	refcount_t refs;
86
87	atomic_t stripes_pending;
88
89	wait_queue_head_t io_wait;
90
91	/* Bitmap to record which horizontal stripe has data */
92	unsigned long dbitmap;
93
94	/* Allocated with stripe_nsectors-many bits for finish_*() calls */
95	unsigned long finish_pbitmap;
96
97	/*
98	 * These are two arrays of pointers.  We allocate the rbio big enough
99	 * to hold them both and setup their locations when the rbio is
100	 * allocated.
101	 */
102
103	/*
104	 * Pointers to pages that we allocated for reading/writing stripes
105	 * directly from the disk (including P/Q).
106	 */
107	struct page **stripe_pages;
108
109	/* Pointers to the sectors in the bio_list, for faster lookup */
110	struct sector_ptr *bio_sectors;
111
112	/*
113	 * For subpage support, we need to map each sector to above
114	 * stripe_pages.
115	 */
116	struct sector_ptr *stripe_sectors;
117
118	/* Allocated with real_stripes-many pointers for finish_*() calls */
119	void **finish_pointers;
120
121	/*
122	 * The bitmap recording where IO errors happened.
123	 * Each bit is corresponding to one sector in either bio_sectors[] or
124	 * stripe_sectors[] array.
125	 *
126	 * The reason we don't use another bit in sector_ptr is, we have two
127	 * arrays of sectors, and a lot of IO can use sectors in both arrays.
128	 * Thus making it much harder to iterate.
129	 */
130	unsigned long *error_bitmap;
131
132	/*
133	 * Checksum buffer if the rbio is for data.  The buffer should cover
134	 * all data sectors (excluding P/Q sectors).
135	 */
136	u8 *csum_buf;
137
138	/*
139	 * Each bit represents if the corresponding sector has data csum found.
140	 * Should only cover data sectors (excluding P/Q sectors).
141	 */
142	unsigned long *csum_bitmap;
143};
144
145/*
146 * For trace event usage only. Records useful debug info for each bio submitted
147 * by RAID56 to each physical device.
148 *
149 * No matter signed or not, (-1) is always the one indicating we can not grab
150 * the proper stripe number.
151 */
152struct raid56_bio_trace_info {
153	u64 devid;
154
155	/* The offset inside the stripe. (<= STRIPE_LEN) */
156	u32 offset;
157
158	/*
159	 * Stripe number.
160	 * 0 is the first data stripe, and nr_data for P stripe,
161	 * nr_data + 1 for Q stripe.
162	 * >= real_stripes for
163	 */
164	u8 stripe_nr;
165};
166
167static inline int nr_data_stripes(const struct map_lookup *map)
168{
169	return map->num_stripes - btrfs_nr_parity_stripes(map->type);
170}
171
172static inline int nr_bioc_data_stripes(const struct btrfs_io_context *bioc)
173{
174	return bioc->num_stripes - btrfs_nr_parity_stripes(bioc->map_type);
175}
176
177#define RAID5_P_STRIPE ((u64)-2)
178#define RAID6_Q_STRIPE ((u64)-1)
179
180#define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) ||		\
181			     ((x) == RAID6_Q_STRIPE))
182
183struct btrfs_device;
184
185void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
186			   int mirror_num);
187void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
188
189struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
190				struct btrfs_io_context *bioc,
191				struct btrfs_device *scrub_dev,
192				unsigned long *dbitmap, int stripe_nsectors);
193void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
194
195void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
196				    struct page **data_pages, u64 data_logical);
197
198int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
199void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
200
201#endif
202