xref: /kernel/linux/linux-5.10/fs/btrfs/raid56.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2012 Fusion-io  All rights reserved.
48c2ecf20Sopenharmony_ci * Copyright (C) 2012 Intel Corp. All rights reserved.
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci#include <linux/sched.h>
88c2ecf20Sopenharmony_ci#include <linux/bio.h>
98c2ecf20Sopenharmony_ci#include <linux/slab.h>
108c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
118c2ecf20Sopenharmony_ci#include <linux/raid/pq.h>
128c2ecf20Sopenharmony_ci#include <linux/hash.h>
138c2ecf20Sopenharmony_ci#include <linux/list_sort.h>
148c2ecf20Sopenharmony_ci#include <linux/raid/xor.h>
158c2ecf20Sopenharmony_ci#include <linux/mm.h>
168c2ecf20Sopenharmony_ci#include "ctree.h"
178c2ecf20Sopenharmony_ci#include "disk-io.h"
188c2ecf20Sopenharmony_ci#include "volumes.h"
198c2ecf20Sopenharmony_ci#include "raid56.h"
208c2ecf20Sopenharmony_ci#include "async-thread.h"
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci/* set when additional merges to this rbio are not allowed */
238c2ecf20Sopenharmony_ci#define RBIO_RMW_LOCKED_BIT	1
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci/*
268c2ecf20Sopenharmony_ci * set when this rbio is sitting in the hash, but it is just a cache
278c2ecf20Sopenharmony_ci * of past RMW
288c2ecf20Sopenharmony_ci */
298c2ecf20Sopenharmony_ci#define RBIO_CACHE_BIT		2
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci/*
328c2ecf20Sopenharmony_ci * set when it is safe to trust the stripe_pages for caching
338c2ecf20Sopenharmony_ci */
348c2ecf20Sopenharmony_ci#define RBIO_CACHE_READY_BIT	3
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci#define RBIO_CACHE_SIZE 1024
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci#define BTRFS_STRIPE_HASH_TABLE_BITS				11
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci/* Used by the raid56 code to lock stripes for read/modify/write */
418c2ecf20Sopenharmony_cistruct btrfs_stripe_hash {
428c2ecf20Sopenharmony_ci	struct list_head hash_list;
438c2ecf20Sopenharmony_ci	spinlock_t lock;
448c2ecf20Sopenharmony_ci};
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci/* Used by the raid56 code to lock stripes for read/modify/write */
478c2ecf20Sopenharmony_cistruct btrfs_stripe_hash_table {
488c2ecf20Sopenharmony_ci	struct list_head stripe_cache;
498c2ecf20Sopenharmony_ci	spinlock_t cache_lock;
508c2ecf20Sopenharmony_ci	int cache_size;
518c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash table[];
528c2ecf20Sopenharmony_ci};
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cienum btrfs_rbio_ops {
558c2ecf20Sopenharmony_ci	BTRFS_RBIO_WRITE,
568c2ecf20Sopenharmony_ci	BTRFS_RBIO_READ_REBUILD,
578c2ecf20Sopenharmony_ci	BTRFS_RBIO_PARITY_SCRUB,
588c2ecf20Sopenharmony_ci	BTRFS_RBIO_REBUILD_MISSING,
598c2ecf20Sopenharmony_ci};
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_cistruct btrfs_raid_bio {
628c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info;
638c2ecf20Sopenharmony_ci	struct btrfs_bio *bbio;
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci	/* while we're doing rmw on a stripe
668c2ecf20Sopenharmony_ci	 * we put it into a hash table so we can
678c2ecf20Sopenharmony_ci	 * lock the stripe and merge more rbios
688c2ecf20Sopenharmony_ci	 * into it.
698c2ecf20Sopenharmony_ci	 */
708c2ecf20Sopenharmony_ci	struct list_head hash_list;
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci	/*
738c2ecf20Sopenharmony_ci	 * LRU list for the stripe cache
748c2ecf20Sopenharmony_ci	 */
758c2ecf20Sopenharmony_ci	struct list_head stripe_cache;
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	/*
788c2ecf20Sopenharmony_ci	 * for scheduling work in the helper threads
798c2ecf20Sopenharmony_ci	 */
808c2ecf20Sopenharmony_ci	struct btrfs_work work;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	/*
838c2ecf20Sopenharmony_ci	 * bio list and bio_list_lock are used
848c2ecf20Sopenharmony_ci	 * to add more bios into the stripe
858c2ecf20Sopenharmony_ci	 * in hopes of avoiding the full rmw
868c2ecf20Sopenharmony_ci	 */
878c2ecf20Sopenharmony_ci	struct bio_list bio_list;
888c2ecf20Sopenharmony_ci	spinlock_t bio_list_lock;
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	/* also protected by the bio_list_lock, the
918c2ecf20Sopenharmony_ci	 * plug list is used by the plugging code
928c2ecf20Sopenharmony_ci	 * to collect partial bios while plugged.  The
938c2ecf20Sopenharmony_ci	 * stripe locking code also uses it to hand off
948c2ecf20Sopenharmony_ci	 * the stripe lock to the next pending IO
958c2ecf20Sopenharmony_ci	 */
968c2ecf20Sopenharmony_ci	struct list_head plug_list;
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci	/*
998c2ecf20Sopenharmony_ci	 * flags that tell us if it is safe to
1008c2ecf20Sopenharmony_ci	 * merge with this bio
1018c2ecf20Sopenharmony_ci	 */
1028c2ecf20Sopenharmony_ci	unsigned long flags;
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci	/* size of each individual stripe on disk */
1058c2ecf20Sopenharmony_ci	int stripe_len;
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci	/* number of data stripes (no p/q) */
1088c2ecf20Sopenharmony_ci	int nr_data;
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	int real_stripes;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	int stripe_npages;
1138c2ecf20Sopenharmony_ci	/*
1148c2ecf20Sopenharmony_ci	 * set if we're doing a parity rebuild
1158c2ecf20Sopenharmony_ci	 * for a read from higher up, which is handled
1168c2ecf20Sopenharmony_ci	 * differently from a parity rebuild as part of
1178c2ecf20Sopenharmony_ci	 * rmw
1188c2ecf20Sopenharmony_ci	 */
1198c2ecf20Sopenharmony_ci	enum btrfs_rbio_ops operation;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	/* first bad stripe */
1228c2ecf20Sopenharmony_ci	int faila;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	/* second bad stripe (for raid6 use) */
1258c2ecf20Sopenharmony_ci	int failb;
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	int scrubp;
1288c2ecf20Sopenharmony_ci	/*
1298c2ecf20Sopenharmony_ci	 * number of pages needed to represent the full
1308c2ecf20Sopenharmony_ci	 * stripe
1318c2ecf20Sopenharmony_ci	 */
1328c2ecf20Sopenharmony_ci	int nr_pages;
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci	/*
1358c2ecf20Sopenharmony_ci	 * size of all the bios in the bio_list.  This
1368c2ecf20Sopenharmony_ci	 * helps us decide if the rbio maps to a full
1378c2ecf20Sopenharmony_ci	 * stripe or not
1388c2ecf20Sopenharmony_ci	 */
1398c2ecf20Sopenharmony_ci	int bio_list_bytes;
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	int generic_bio_cnt;
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	refcount_t refs;
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci	atomic_t stripes_pending;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	atomic_t error;
1488c2ecf20Sopenharmony_ci	/*
1498c2ecf20Sopenharmony_ci	 * these are two arrays of pointers.  We allocate the
1508c2ecf20Sopenharmony_ci	 * rbio big enough to hold them both and setup their
1518c2ecf20Sopenharmony_ci	 * locations when the rbio is allocated
1528c2ecf20Sopenharmony_ci	 */
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci	/* pointers to pages that we allocated for
1558c2ecf20Sopenharmony_ci	 * reading/writing stripes directly from the disk (including P/Q)
1568c2ecf20Sopenharmony_ci	 */
1578c2ecf20Sopenharmony_ci	struct page **stripe_pages;
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci	/*
1608c2ecf20Sopenharmony_ci	 * pointers to the pages in the bio_list.  Stored
1618c2ecf20Sopenharmony_ci	 * here for faster lookup
1628c2ecf20Sopenharmony_ci	 */
1638c2ecf20Sopenharmony_ci	struct page **bio_pages;
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	/*
1668c2ecf20Sopenharmony_ci	 * bitmap to record which horizontal stripe has data
1678c2ecf20Sopenharmony_ci	 */
1688c2ecf20Sopenharmony_ci	unsigned long *dbitmap;
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ci	/* allocated with real_stripes-many pointers for finish_*() calls */
1718c2ecf20Sopenharmony_ci	void **finish_pointers;
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci	/* allocated with stripe_npages-many bits for finish_*() calls */
1748c2ecf20Sopenharmony_ci	unsigned long *finish_pbitmap;
1758c2ecf20Sopenharmony_ci};
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_cistatic int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
1788c2ecf20Sopenharmony_cistatic noinline void finish_rmw(struct btrfs_raid_bio *rbio);
1798c2ecf20Sopenharmony_cistatic void rmw_work(struct btrfs_work *work);
1808c2ecf20Sopenharmony_cistatic void read_rebuild_work(struct btrfs_work *work);
1818c2ecf20Sopenharmony_cistatic int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
1828c2ecf20Sopenharmony_cistatic int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
1838c2ecf20Sopenharmony_cistatic void __free_raid_bio(struct btrfs_raid_bio *rbio);
1848c2ecf20Sopenharmony_cistatic void index_rbio_pages(struct btrfs_raid_bio *rbio);
1858c2ecf20Sopenharmony_cistatic int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_cistatic noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
1888c2ecf20Sopenharmony_ci					 int need_check);
1898c2ecf20Sopenharmony_cistatic void scrub_parity_work(struct btrfs_work *work);
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_cistatic void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
1928c2ecf20Sopenharmony_ci{
1938c2ecf20Sopenharmony_ci	btrfs_init_work(&rbio->work, work_func, NULL, NULL);
1948c2ecf20Sopenharmony_ci	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
1958c2ecf20Sopenharmony_ci}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci/*
1988c2ecf20Sopenharmony_ci * the stripe hash table is used for locking, and to collect
1998c2ecf20Sopenharmony_ci * bios in hopes of making a full stripe
2008c2ecf20Sopenharmony_ci */
2018c2ecf20Sopenharmony_ciint btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
2028c2ecf20Sopenharmony_ci{
2038c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash_table *table;
2048c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash_table *x;
2058c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash *cur;
2068c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash *h;
2078c2ecf20Sopenharmony_ci	int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
2088c2ecf20Sopenharmony_ci	int i;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	if (info->stripe_hash_table)
2118c2ecf20Sopenharmony_ci		return 0;
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	/*
2148c2ecf20Sopenharmony_ci	 * The table is large, starting with order 4 and can go as high as
2158c2ecf20Sopenharmony_ci	 * order 7 in case lock debugging is turned on.
2168c2ecf20Sopenharmony_ci	 *
2178c2ecf20Sopenharmony_ci	 * Try harder to allocate and fallback to vmalloc to lower the chance
2188c2ecf20Sopenharmony_ci	 * of a failing mount.
2198c2ecf20Sopenharmony_ci	 */
2208c2ecf20Sopenharmony_ci	table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL);
2218c2ecf20Sopenharmony_ci	if (!table)
2228c2ecf20Sopenharmony_ci		return -ENOMEM;
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci	spin_lock_init(&table->cache_lock);
2258c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&table->stripe_cache);
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci	h = table->table;
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci	for (i = 0; i < num_entries; i++) {
2308c2ecf20Sopenharmony_ci		cur = h + i;
2318c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&cur->hash_list);
2328c2ecf20Sopenharmony_ci		spin_lock_init(&cur->lock);
2338c2ecf20Sopenharmony_ci	}
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci	x = cmpxchg(&info->stripe_hash_table, NULL, table);
2368c2ecf20Sopenharmony_ci	if (x)
2378c2ecf20Sopenharmony_ci		kvfree(x);
2388c2ecf20Sopenharmony_ci	return 0;
2398c2ecf20Sopenharmony_ci}
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci/*
2428c2ecf20Sopenharmony_ci * caching an rbio means to copy anything from the
2438c2ecf20Sopenharmony_ci * bio_pages array into the stripe_pages array.  We
2448c2ecf20Sopenharmony_ci * use the page uptodate bit in the stripe cache array
2458c2ecf20Sopenharmony_ci * to indicate if it has valid data
2468c2ecf20Sopenharmony_ci *
2478c2ecf20Sopenharmony_ci * once the caching is done, we set the cache ready
2488c2ecf20Sopenharmony_ci * bit.
2498c2ecf20Sopenharmony_ci */
2508c2ecf20Sopenharmony_cistatic void cache_rbio_pages(struct btrfs_raid_bio *rbio)
2518c2ecf20Sopenharmony_ci{
2528c2ecf20Sopenharmony_ci	int i;
2538c2ecf20Sopenharmony_ci	char *s;
2548c2ecf20Sopenharmony_ci	char *d;
2558c2ecf20Sopenharmony_ci	int ret;
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	ret = alloc_rbio_pages(rbio);
2588c2ecf20Sopenharmony_ci	if (ret)
2598c2ecf20Sopenharmony_ci		return;
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	for (i = 0; i < rbio->nr_pages; i++) {
2628c2ecf20Sopenharmony_ci		if (!rbio->bio_pages[i])
2638c2ecf20Sopenharmony_ci			continue;
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci		s = kmap(rbio->bio_pages[i]);
2668c2ecf20Sopenharmony_ci		d = kmap(rbio->stripe_pages[i]);
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci		copy_page(d, s);
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci		kunmap(rbio->bio_pages[i]);
2718c2ecf20Sopenharmony_ci		kunmap(rbio->stripe_pages[i]);
2728c2ecf20Sopenharmony_ci		SetPageUptodate(rbio->stripe_pages[i]);
2738c2ecf20Sopenharmony_ci	}
2748c2ecf20Sopenharmony_ci	set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
2758c2ecf20Sopenharmony_ci}
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci/*
2788c2ecf20Sopenharmony_ci * we hash on the first logical address of the stripe
2798c2ecf20Sopenharmony_ci */
2808c2ecf20Sopenharmony_cistatic int rbio_bucket(struct btrfs_raid_bio *rbio)
2818c2ecf20Sopenharmony_ci{
2828c2ecf20Sopenharmony_ci	u64 num = rbio->bbio->raid_map[0];
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci	/*
2858c2ecf20Sopenharmony_ci	 * we shift down quite a bit.  We're using byte
2868c2ecf20Sopenharmony_ci	 * addressing, and most of the lower bits are zeros.
2878c2ecf20Sopenharmony_ci	 * This tends to upset hash_64, and it consistently
2888c2ecf20Sopenharmony_ci	 * returns just one or two different values.
2898c2ecf20Sopenharmony_ci	 *
2908c2ecf20Sopenharmony_ci	 * shifting off the lower bits fixes things.
2918c2ecf20Sopenharmony_ci	 */
2928c2ecf20Sopenharmony_ci	return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
2938c2ecf20Sopenharmony_ci}
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci/*
2968c2ecf20Sopenharmony_ci * stealing an rbio means taking all the uptodate pages from the stripe
2978c2ecf20Sopenharmony_ci * array in the source rbio and putting them into the destination rbio
2988c2ecf20Sopenharmony_ci */
2998c2ecf20Sopenharmony_cistatic void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
3008c2ecf20Sopenharmony_ci{
3018c2ecf20Sopenharmony_ci	int i;
3028c2ecf20Sopenharmony_ci	struct page *s;
3038c2ecf20Sopenharmony_ci	struct page *d;
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci	if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
3068c2ecf20Sopenharmony_ci		return;
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci	for (i = 0; i < dest->nr_pages; i++) {
3098c2ecf20Sopenharmony_ci		s = src->stripe_pages[i];
3108c2ecf20Sopenharmony_ci		if (!s || !PageUptodate(s)) {
3118c2ecf20Sopenharmony_ci			continue;
3128c2ecf20Sopenharmony_ci		}
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci		d = dest->stripe_pages[i];
3158c2ecf20Sopenharmony_ci		if (d)
3168c2ecf20Sopenharmony_ci			__free_page(d);
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci		dest->stripe_pages[i] = s;
3198c2ecf20Sopenharmony_ci		src->stripe_pages[i] = NULL;
3208c2ecf20Sopenharmony_ci	}
3218c2ecf20Sopenharmony_ci}
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci/*
3248c2ecf20Sopenharmony_ci * merging means we take the bio_list from the victim and
3258c2ecf20Sopenharmony_ci * splice it into the destination.  The victim should
3268c2ecf20Sopenharmony_ci * be discarded afterwards.
3278c2ecf20Sopenharmony_ci *
3288c2ecf20Sopenharmony_ci * must be called with dest->rbio_list_lock held
3298c2ecf20Sopenharmony_ci */
3308c2ecf20Sopenharmony_cistatic void merge_rbio(struct btrfs_raid_bio *dest,
3318c2ecf20Sopenharmony_ci		       struct btrfs_raid_bio *victim)
3328c2ecf20Sopenharmony_ci{
3338c2ecf20Sopenharmony_ci	bio_list_merge(&dest->bio_list, &victim->bio_list);
3348c2ecf20Sopenharmony_ci	dest->bio_list_bytes += victim->bio_list_bytes;
3358c2ecf20Sopenharmony_ci	/* Also inherit the bitmaps from @victim. */
3368c2ecf20Sopenharmony_ci	bitmap_or(dest->dbitmap, victim->dbitmap, dest->dbitmap,
3378c2ecf20Sopenharmony_ci		  dest->stripe_npages);
3388c2ecf20Sopenharmony_ci	dest->generic_bio_cnt += victim->generic_bio_cnt;
3398c2ecf20Sopenharmony_ci	bio_list_init(&victim->bio_list);
3408c2ecf20Sopenharmony_ci}
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_ci/*
3438c2ecf20Sopenharmony_ci * used to prune items that are in the cache.  The caller
3448c2ecf20Sopenharmony_ci * must hold the hash table lock.
3458c2ecf20Sopenharmony_ci */
3468c2ecf20Sopenharmony_cistatic void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
3478c2ecf20Sopenharmony_ci{
3488c2ecf20Sopenharmony_ci	int bucket = rbio_bucket(rbio);
3498c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash_table *table;
3508c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash *h;
3518c2ecf20Sopenharmony_ci	int freeit = 0;
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci	/*
3548c2ecf20Sopenharmony_ci	 * check the bit again under the hash table lock.
3558c2ecf20Sopenharmony_ci	 */
3568c2ecf20Sopenharmony_ci	if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
3578c2ecf20Sopenharmony_ci		return;
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci	table = rbio->fs_info->stripe_hash_table;
3608c2ecf20Sopenharmony_ci	h = table->table + bucket;
3618c2ecf20Sopenharmony_ci
3628c2ecf20Sopenharmony_ci	/* hold the lock for the bucket because we may be
3638c2ecf20Sopenharmony_ci	 * removing it from the hash table
3648c2ecf20Sopenharmony_ci	 */
3658c2ecf20Sopenharmony_ci	spin_lock(&h->lock);
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci	/*
3688c2ecf20Sopenharmony_ci	 * hold the lock for the bio list because we need
3698c2ecf20Sopenharmony_ci	 * to make sure the bio list is empty
3708c2ecf20Sopenharmony_ci	 */
3718c2ecf20Sopenharmony_ci	spin_lock(&rbio->bio_list_lock);
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) {
3748c2ecf20Sopenharmony_ci		list_del_init(&rbio->stripe_cache);
3758c2ecf20Sopenharmony_ci		table->cache_size -= 1;
3768c2ecf20Sopenharmony_ci		freeit = 1;
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_ci		/* if the bio list isn't empty, this rbio is
3798c2ecf20Sopenharmony_ci		 * still involved in an IO.  We take it out
3808c2ecf20Sopenharmony_ci		 * of the cache list, and drop the ref that
3818c2ecf20Sopenharmony_ci		 * was held for the list.
3828c2ecf20Sopenharmony_ci		 *
3838c2ecf20Sopenharmony_ci		 * If the bio_list was empty, we also remove
3848c2ecf20Sopenharmony_ci		 * the rbio from the hash_table, and drop
3858c2ecf20Sopenharmony_ci		 * the corresponding ref
3868c2ecf20Sopenharmony_ci		 */
3878c2ecf20Sopenharmony_ci		if (bio_list_empty(&rbio->bio_list)) {
3888c2ecf20Sopenharmony_ci			if (!list_empty(&rbio->hash_list)) {
3898c2ecf20Sopenharmony_ci				list_del_init(&rbio->hash_list);
3908c2ecf20Sopenharmony_ci				refcount_dec(&rbio->refs);
3918c2ecf20Sopenharmony_ci				BUG_ON(!list_empty(&rbio->plug_list));
3928c2ecf20Sopenharmony_ci			}
3938c2ecf20Sopenharmony_ci		}
3948c2ecf20Sopenharmony_ci	}
3958c2ecf20Sopenharmony_ci
3968c2ecf20Sopenharmony_ci	spin_unlock(&rbio->bio_list_lock);
3978c2ecf20Sopenharmony_ci	spin_unlock(&h->lock);
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	if (freeit)
4008c2ecf20Sopenharmony_ci		__free_raid_bio(rbio);
4018c2ecf20Sopenharmony_ci}
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci/*
4048c2ecf20Sopenharmony_ci * prune a given rbio from the cache
4058c2ecf20Sopenharmony_ci */
4068c2ecf20Sopenharmony_cistatic void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
4078c2ecf20Sopenharmony_ci{
4088c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash_table *table;
4098c2ecf20Sopenharmony_ci	unsigned long flags;
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci	if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
4128c2ecf20Sopenharmony_ci		return;
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci	table = rbio->fs_info->stripe_hash_table;
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci	spin_lock_irqsave(&table->cache_lock, flags);
4178c2ecf20Sopenharmony_ci	__remove_rbio_from_cache(rbio);
4188c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&table->cache_lock, flags);
4198c2ecf20Sopenharmony_ci}
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_ci/*
4228c2ecf20Sopenharmony_ci * remove everything in the cache
4238c2ecf20Sopenharmony_ci */
4248c2ecf20Sopenharmony_cistatic void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
4258c2ecf20Sopenharmony_ci{
4268c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash_table *table;
4278c2ecf20Sopenharmony_ci	unsigned long flags;
4288c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio;
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	table = info->stripe_hash_table;
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci	spin_lock_irqsave(&table->cache_lock, flags);
4338c2ecf20Sopenharmony_ci	while (!list_empty(&table->stripe_cache)) {
4348c2ecf20Sopenharmony_ci		rbio = list_entry(table->stripe_cache.next,
4358c2ecf20Sopenharmony_ci				  struct btrfs_raid_bio,
4368c2ecf20Sopenharmony_ci				  stripe_cache);
4378c2ecf20Sopenharmony_ci		__remove_rbio_from_cache(rbio);
4388c2ecf20Sopenharmony_ci	}
4398c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&table->cache_lock, flags);
4408c2ecf20Sopenharmony_ci}
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_ci/*
4438c2ecf20Sopenharmony_ci * remove all cached entries and free the hash table
4448c2ecf20Sopenharmony_ci * used by unmount
4458c2ecf20Sopenharmony_ci */
4468c2ecf20Sopenharmony_civoid btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
4478c2ecf20Sopenharmony_ci{
4488c2ecf20Sopenharmony_ci	if (!info->stripe_hash_table)
4498c2ecf20Sopenharmony_ci		return;
4508c2ecf20Sopenharmony_ci	btrfs_clear_rbio_cache(info);
4518c2ecf20Sopenharmony_ci	kvfree(info->stripe_hash_table);
4528c2ecf20Sopenharmony_ci	info->stripe_hash_table = NULL;
4538c2ecf20Sopenharmony_ci}
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_ci/*
4568c2ecf20Sopenharmony_ci * insert an rbio into the stripe cache.  It
4578c2ecf20Sopenharmony_ci * must have already been prepared by calling
4588c2ecf20Sopenharmony_ci * cache_rbio_pages
4598c2ecf20Sopenharmony_ci *
4608c2ecf20Sopenharmony_ci * If this rbio was already cached, it gets
4618c2ecf20Sopenharmony_ci * moved to the front of the lru.
4628c2ecf20Sopenharmony_ci *
4638c2ecf20Sopenharmony_ci * If the size of the rbio cache is too big, we
4648c2ecf20Sopenharmony_ci * prune an item.
4658c2ecf20Sopenharmony_ci */
4668c2ecf20Sopenharmony_cistatic void cache_rbio(struct btrfs_raid_bio *rbio)
4678c2ecf20Sopenharmony_ci{
4688c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash_table *table;
4698c2ecf20Sopenharmony_ci	unsigned long flags;
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci	if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
4728c2ecf20Sopenharmony_ci		return;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	table = rbio->fs_info->stripe_hash_table;
4758c2ecf20Sopenharmony_ci
4768c2ecf20Sopenharmony_ci	spin_lock_irqsave(&table->cache_lock, flags);
4778c2ecf20Sopenharmony_ci	spin_lock(&rbio->bio_list_lock);
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci	/* bump our ref if we were not in the list before */
4808c2ecf20Sopenharmony_ci	if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
4818c2ecf20Sopenharmony_ci		refcount_inc(&rbio->refs);
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_ci	if (!list_empty(&rbio->stripe_cache)){
4848c2ecf20Sopenharmony_ci		list_move(&rbio->stripe_cache, &table->stripe_cache);
4858c2ecf20Sopenharmony_ci	} else {
4868c2ecf20Sopenharmony_ci		list_add(&rbio->stripe_cache, &table->stripe_cache);
4878c2ecf20Sopenharmony_ci		table->cache_size += 1;
4888c2ecf20Sopenharmony_ci	}
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci	spin_unlock(&rbio->bio_list_lock);
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci	if (table->cache_size > RBIO_CACHE_SIZE) {
4938c2ecf20Sopenharmony_ci		struct btrfs_raid_bio *found;
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci		found = list_entry(table->stripe_cache.prev,
4968c2ecf20Sopenharmony_ci				  struct btrfs_raid_bio,
4978c2ecf20Sopenharmony_ci				  stripe_cache);
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci		if (found != rbio)
5008c2ecf20Sopenharmony_ci			__remove_rbio_from_cache(found);
5018c2ecf20Sopenharmony_ci	}
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&table->cache_lock, flags);
5048c2ecf20Sopenharmony_ci}
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_ci/*
5078c2ecf20Sopenharmony_ci * helper function to run the xor_blocks api.  It is only
5088c2ecf20Sopenharmony_ci * able to do MAX_XOR_BLOCKS at a time, so we need to
5098c2ecf20Sopenharmony_ci * loop through.
5108c2ecf20Sopenharmony_ci */
5118c2ecf20Sopenharmony_cistatic void run_xor(void **pages, int src_cnt, ssize_t len)
5128c2ecf20Sopenharmony_ci{
5138c2ecf20Sopenharmony_ci	int src_off = 0;
5148c2ecf20Sopenharmony_ci	int xor_src_cnt = 0;
5158c2ecf20Sopenharmony_ci	void *dest = pages[src_cnt];
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci	while(src_cnt > 0) {
5188c2ecf20Sopenharmony_ci		xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
5198c2ecf20Sopenharmony_ci		xor_blocks(xor_src_cnt, len, dest, pages + src_off);
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci		src_cnt -= xor_src_cnt;
5228c2ecf20Sopenharmony_ci		src_off += xor_src_cnt;
5238c2ecf20Sopenharmony_ci	}
5248c2ecf20Sopenharmony_ci}
5258c2ecf20Sopenharmony_ci
5268c2ecf20Sopenharmony_ci/*
5278c2ecf20Sopenharmony_ci * Returns true if the bio list inside this rbio covers an entire stripe (no
5288c2ecf20Sopenharmony_ci * rmw required).
5298c2ecf20Sopenharmony_ci */
5308c2ecf20Sopenharmony_cistatic int rbio_is_full(struct btrfs_raid_bio *rbio)
5318c2ecf20Sopenharmony_ci{
5328c2ecf20Sopenharmony_ci	unsigned long flags;
5338c2ecf20Sopenharmony_ci	unsigned long size = rbio->bio_list_bytes;
5348c2ecf20Sopenharmony_ci	int ret = 1;
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rbio->bio_list_lock, flags);
5378c2ecf20Sopenharmony_ci	if (size != rbio->nr_data * rbio->stripe_len)
5388c2ecf20Sopenharmony_ci		ret = 0;
5398c2ecf20Sopenharmony_ci	BUG_ON(size > rbio->nr_data * rbio->stripe_len);
5408c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_ci	return ret;
5438c2ecf20Sopenharmony_ci}
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci/*
5468c2ecf20Sopenharmony_ci * returns 1 if it is safe to merge two rbios together.
5478c2ecf20Sopenharmony_ci * The merging is safe if the two rbios correspond to
5488c2ecf20Sopenharmony_ci * the same stripe and if they are both going in the same
5498c2ecf20Sopenharmony_ci * direction (read vs write), and if neither one is
5508c2ecf20Sopenharmony_ci * locked for final IO
5518c2ecf20Sopenharmony_ci *
5528c2ecf20Sopenharmony_ci * The caller is responsible for locking such that
5538c2ecf20Sopenharmony_ci * rmw_locked is safe to test
5548c2ecf20Sopenharmony_ci */
5558c2ecf20Sopenharmony_cistatic int rbio_can_merge(struct btrfs_raid_bio *last,
5568c2ecf20Sopenharmony_ci			  struct btrfs_raid_bio *cur)
5578c2ecf20Sopenharmony_ci{
5588c2ecf20Sopenharmony_ci	if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) ||
5598c2ecf20Sopenharmony_ci	    test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags))
5608c2ecf20Sopenharmony_ci		return 0;
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_ci	/*
5638c2ecf20Sopenharmony_ci	 * we can't merge with cached rbios, since the
5648c2ecf20Sopenharmony_ci	 * idea is that when we merge the destination
5658c2ecf20Sopenharmony_ci	 * rbio is going to run our IO for us.  We can
5668c2ecf20Sopenharmony_ci	 * steal from cached rbios though, other functions
5678c2ecf20Sopenharmony_ci	 * handle that.
5688c2ecf20Sopenharmony_ci	 */
5698c2ecf20Sopenharmony_ci	if (test_bit(RBIO_CACHE_BIT, &last->flags) ||
5708c2ecf20Sopenharmony_ci	    test_bit(RBIO_CACHE_BIT, &cur->flags))
5718c2ecf20Sopenharmony_ci		return 0;
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci	if (last->bbio->raid_map[0] !=
5748c2ecf20Sopenharmony_ci	    cur->bbio->raid_map[0])
5758c2ecf20Sopenharmony_ci		return 0;
5768c2ecf20Sopenharmony_ci
5778c2ecf20Sopenharmony_ci	/* we can't merge with different operations */
5788c2ecf20Sopenharmony_ci	if (last->operation != cur->operation)
5798c2ecf20Sopenharmony_ci		return 0;
5808c2ecf20Sopenharmony_ci	/*
5818c2ecf20Sopenharmony_ci	 * We've need read the full stripe from the drive.
5828c2ecf20Sopenharmony_ci	 * check and repair the parity and write the new results.
5838c2ecf20Sopenharmony_ci	 *
5848c2ecf20Sopenharmony_ci	 * We're not allowed to add any new bios to the
5858c2ecf20Sopenharmony_ci	 * bio list here, anyone else that wants to
5868c2ecf20Sopenharmony_ci	 * change this stripe needs to do their own rmw.
5878c2ecf20Sopenharmony_ci	 */
5888c2ecf20Sopenharmony_ci	if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
5898c2ecf20Sopenharmony_ci		return 0;
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci	if (last->operation == BTRFS_RBIO_REBUILD_MISSING)
5928c2ecf20Sopenharmony_ci		return 0;
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ci	if (last->operation == BTRFS_RBIO_READ_REBUILD) {
5958c2ecf20Sopenharmony_ci		int fa = last->faila;
5968c2ecf20Sopenharmony_ci		int fb = last->failb;
5978c2ecf20Sopenharmony_ci		int cur_fa = cur->faila;
5988c2ecf20Sopenharmony_ci		int cur_fb = cur->failb;
5998c2ecf20Sopenharmony_ci
6008c2ecf20Sopenharmony_ci		if (last->faila >= last->failb) {
6018c2ecf20Sopenharmony_ci			fa = last->failb;
6028c2ecf20Sopenharmony_ci			fb = last->faila;
6038c2ecf20Sopenharmony_ci		}
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci		if (cur->faila >= cur->failb) {
6068c2ecf20Sopenharmony_ci			cur_fa = cur->failb;
6078c2ecf20Sopenharmony_ci			cur_fb = cur->faila;
6088c2ecf20Sopenharmony_ci		}
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_ci		if (fa != cur_fa || fb != cur_fb)
6118c2ecf20Sopenharmony_ci			return 0;
6128c2ecf20Sopenharmony_ci	}
6138c2ecf20Sopenharmony_ci	return 1;
6148c2ecf20Sopenharmony_ci}
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_cistatic int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
6178c2ecf20Sopenharmony_ci				  int index)
6188c2ecf20Sopenharmony_ci{
6198c2ecf20Sopenharmony_ci	return stripe * rbio->stripe_npages + index;
6208c2ecf20Sopenharmony_ci}
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci/*
6238c2ecf20Sopenharmony_ci * these are just the pages from the rbio array, not from anything
6248c2ecf20Sopenharmony_ci * the FS sent down to us
6258c2ecf20Sopenharmony_ci */
6268c2ecf20Sopenharmony_cistatic struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe,
6278c2ecf20Sopenharmony_ci				     int index)
6288c2ecf20Sopenharmony_ci{
6298c2ecf20Sopenharmony_ci	return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
6308c2ecf20Sopenharmony_ci}
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_ci/*
6338c2ecf20Sopenharmony_ci * helper to index into the pstripe
6348c2ecf20Sopenharmony_ci */
6358c2ecf20Sopenharmony_cistatic struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
6368c2ecf20Sopenharmony_ci{
6378c2ecf20Sopenharmony_ci	return rbio_stripe_page(rbio, rbio->nr_data, index);
6388c2ecf20Sopenharmony_ci}
6398c2ecf20Sopenharmony_ci
6408c2ecf20Sopenharmony_ci/*
6418c2ecf20Sopenharmony_ci * helper to index into the qstripe, returns null
6428c2ecf20Sopenharmony_ci * if there is no qstripe
6438c2ecf20Sopenharmony_ci */
6448c2ecf20Sopenharmony_cistatic struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
6458c2ecf20Sopenharmony_ci{
6468c2ecf20Sopenharmony_ci	if (rbio->nr_data + 1 == rbio->real_stripes)
6478c2ecf20Sopenharmony_ci		return NULL;
6488c2ecf20Sopenharmony_ci	return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
6498c2ecf20Sopenharmony_ci}
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_ci/*
6528c2ecf20Sopenharmony_ci * The first stripe in the table for a logical address
6538c2ecf20Sopenharmony_ci * has the lock.  rbios are added in one of three ways:
6548c2ecf20Sopenharmony_ci *
6558c2ecf20Sopenharmony_ci * 1) Nobody has the stripe locked yet.  The rbio is given
6568c2ecf20Sopenharmony_ci * the lock and 0 is returned.  The caller must start the IO
6578c2ecf20Sopenharmony_ci * themselves.
6588c2ecf20Sopenharmony_ci *
6598c2ecf20Sopenharmony_ci * 2) Someone has the stripe locked, but we're able to merge
6608c2ecf20Sopenharmony_ci * with the lock owner.  The rbio is freed and the IO will
6618c2ecf20Sopenharmony_ci * start automatically along with the existing rbio.  1 is returned.
6628c2ecf20Sopenharmony_ci *
6638c2ecf20Sopenharmony_ci * 3) Someone has the stripe locked, but we're not able to merge.
6648c2ecf20Sopenharmony_ci * The rbio is added to the lock owner's plug list, or merged into
6658c2ecf20Sopenharmony_ci * an rbio already on the plug list.  When the lock owner unlocks,
6668c2ecf20Sopenharmony_ci * the next rbio on the list is run and the IO is started automatically.
6678c2ecf20Sopenharmony_ci * 1 is returned
6688c2ecf20Sopenharmony_ci *
6698c2ecf20Sopenharmony_ci * If we return 0, the caller still owns the rbio and must continue with
6708c2ecf20Sopenharmony_ci * IO submission.  If we return 1, the caller must assume the rbio has
6718c2ecf20Sopenharmony_ci * already been freed.
6728c2ecf20Sopenharmony_ci */
6738c2ecf20Sopenharmony_cistatic noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
6748c2ecf20Sopenharmony_ci{
6758c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash *h;
6768c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *cur;
6778c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *pending;
6788c2ecf20Sopenharmony_ci	unsigned long flags;
6798c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *freeit = NULL;
6808c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *cache_drop = NULL;
6818c2ecf20Sopenharmony_ci	int ret = 0;
6828c2ecf20Sopenharmony_ci
6838c2ecf20Sopenharmony_ci	h = rbio->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
6848c2ecf20Sopenharmony_ci
6858c2ecf20Sopenharmony_ci	spin_lock_irqsave(&h->lock, flags);
6868c2ecf20Sopenharmony_ci	list_for_each_entry(cur, &h->hash_list, hash_list) {
6878c2ecf20Sopenharmony_ci		if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0])
6888c2ecf20Sopenharmony_ci			continue;
6898c2ecf20Sopenharmony_ci
6908c2ecf20Sopenharmony_ci		spin_lock(&cur->bio_list_lock);
6918c2ecf20Sopenharmony_ci
6928c2ecf20Sopenharmony_ci		/* Can we steal this cached rbio's pages? */
6938c2ecf20Sopenharmony_ci		if (bio_list_empty(&cur->bio_list) &&
6948c2ecf20Sopenharmony_ci		    list_empty(&cur->plug_list) &&
6958c2ecf20Sopenharmony_ci		    test_bit(RBIO_CACHE_BIT, &cur->flags) &&
6968c2ecf20Sopenharmony_ci		    !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
6978c2ecf20Sopenharmony_ci			list_del_init(&cur->hash_list);
6988c2ecf20Sopenharmony_ci			refcount_dec(&cur->refs);
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ci			steal_rbio(cur, rbio);
7018c2ecf20Sopenharmony_ci			cache_drop = cur;
7028c2ecf20Sopenharmony_ci			spin_unlock(&cur->bio_list_lock);
7038c2ecf20Sopenharmony_ci
7048c2ecf20Sopenharmony_ci			goto lockit;
7058c2ecf20Sopenharmony_ci		}
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_ci		/* Can we merge into the lock owner? */
7088c2ecf20Sopenharmony_ci		if (rbio_can_merge(cur, rbio)) {
7098c2ecf20Sopenharmony_ci			merge_rbio(cur, rbio);
7108c2ecf20Sopenharmony_ci			spin_unlock(&cur->bio_list_lock);
7118c2ecf20Sopenharmony_ci			freeit = rbio;
7128c2ecf20Sopenharmony_ci			ret = 1;
7138c2ecf20Sopenharmony_ci			goto out;
7148c2ecf20Sopenharmony_ci		}
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_ci
7178c2ecf20Sopenharmony_ci		/*
7188c2ecf20Sopenharmony_ci		 * We couldn't merge with the running rbio, see if we can merge
7198c2ecf20Sopenharmony_ci		 * with the pending ones.  We don't have to check for rmw_locked
7208c2ecf20Sopenharmony_ci		 * because there is no way they are inside finish_rmw right now
7218c2ecf20Sopenharmony_ci		 */
7228c2ecf20Sopenharmony_ci		list_for_each_entry(pending, &cur->plug_list, plug_list) {
7238c2ecf20Sopenharmony_ci			if (rbio_can_merge(pending, rbio)) {
7248c2ecf20Sopenharmony_ci				merge_rbio(pending, rbio);
7258c2ecf20Sopenharmony_ci				spin_unlock(&cur->bio_list_lock);
7268c2ecf20Sopenharmony_ci				freeit = rbio;
7278c2ecf20Sopenharmony_ci				ret = 1;
7288c2ecf20Sopenharmony_ci				goto out;
7298c2ecf20Sopenharmony_ci			}
7308c2ecf20Sopenharmony_ci		}
7318c2ecf20Sopenharmony_ci
7328c2ecf20Sopenharmony_ci		/*
7338c2ecf20Sopenharmony_ci		 * No merging, put us on the tail of the plug list, our rbio
7348c2ecf20Sopenharmony_ci		 * will be started with the currently running rbio unlocks
7358c2ecf20Sopenharmony_ci		 */
7368c2ecf20Sopenharmony_ci		list_add_tail(&rbio->plug_list, &cur->plug_list);
7378c2ecf20Sopenharmony_ci		spin_unlock(&cur->bio_list_lock);
7388c2ecf20Sopenharmony_ci		ret = 1;
7398c2ecf20Sopenharmony_ci		goto out;
7408c2ecf20Sopenharmony_ci	}
7418c2ecf20Sopenharmony_cilockit:
7428c2ecf20Sopenharmony_ci	refcount_inc(&rbio->refs);
7438c2ecf20Sopenharmony_ci	list_add(&rbio->hash_list, &h->hash_list);
7448c2ecf20Sopenharmony_ciout:
7458c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&h->lock, flags);
7468c2ecf20Sopenharmony_ci	if (cache_drop)
7478c2ecf20Sopenharmony_ci		remove_rbio_from_cache(cache_drop);
7488c2ecf20Sopenharmony_ci	if (freeit)
7498c2ecf20Sopenharmony_ci		__free_raid_bio(freeit);
7508c2ecf20Sopenharmony_ci	return ret;
7518c2ecf20Sopenharmony_ci}
7528c2ecf20Sopenharmony_ci
7538c2ecf20Sopenharmony_ci/*
7548c2ecf20Sopenharmony_ci * called as rmw or parity rebuild is completed.  If the plug list has more
7558c2ecf20Sopenharmony_ci * rbios waiting for this stripe, the next one on the list will be started
7568c2ecf20Sopenharmony_ci */
7578c2ecf20Sopenharmony_cistatic noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
7588c2ecf20Sopenharmony_ci{
7598c2ecf20Sopenharmony_ci	int bucket;
7608c2ecf20Sopenharmony_ci	struct btrfs_stripe_hash *h;
7618c2ecf20Sopenharmony_ci	unsigned long flags;
7628c2ecf20Sopenharmony_ci	int keep_cache = 0;
7638c2ecf20Sopenharmony_ci
7648c2ecf20Sopenharmony_ci	bucket = rbio_bucket(rbio);
7658c2ecf20Sopenharmony_ci	h = rbio->fs_info->stripe_hash_table->table + bucket;
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ci	if (list_empty(&rbio->plug_list))
7688c2ecf20Sopenharmony_ci		cache_rbio(rbio);
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_ci	spin_lock_irqsave(&h->lock, flags);
7718c2ecf20Sopenharmony_ci	spin_lock(&rbio->bio_list_lock);
7728c2ecf20Sopenharmony_ci
7738c2ecf20Sopenharmony_ci	if (!list_empty(&rbio->hash_list)) {
7748c2ecf20Sopenharmony_ci		/*
7758c2ecf20Sopenharmony_ci		 * if we're still cached and there is no other IO
7768c2ecf20Sopenharmony_ci		 * to perform, just leave this rbio here for others
7778c2ecf20Sopenharmony_ci		 * to steal from later
7788c2ecf20Sopenharmony_ci		 */
7798c2ecf20Sopenharmony_ci		if (list_empty(&rbio->plug_list) &&
7808c2ecf20Sopenharmony_ci		    test_bit(RBIO_CACHE_BIT, &rbio->flags)) {
7818c2ecf20Sopenharmony_ci			keep_cache = 1;
7828c2ecf20Sopenharmony_ci			clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
7838c2ecf20Sopenharmony_ci			BUG_ON(!bio_list_empty(&rbio->bio_list));
7848c2ecf20Sopenharmony_ci			goto done;
7858c2ecf20Sopenharmony_ci		}
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci		list_del_init(&rbio->hash_list);
7888c2ecf20Sopenharmony_ci		refcount_dec(&rbio->refs);
7898c2ecf20Sopenharmony_ci
7908c2ecf20Sopenharmony_ci		/*
7918c2ecf20Sopenharmony_ci		 * we use the plug list to hold all the rbios
7928c2ecf20Sopenharmony_ci		 * waiting for the chance to lock this stripe.
7938c2ecf20Sopenharmony_ci		 * hand the lock over to one of them.
7948c2ecf20Sopenharmony_ci		 */
7958c2ecf20Sopenharmony_ci		if (!list_empty(&rbio->plug_list)) {
7968c2ecf20Sopenharmony_ci			struct btrfs_raid_bio *next;
7978c2ecf20Sopenharmony_ci			struct list_head *head = rbio->plug_list.next;
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_ci			next = list_entry(head, struct btrfs_raid_bio,
8008c2ecf20Sopenharmony_ci					  plug_list);
8018c2ecf20Sopenharmony_ci
8028c2ecf20Sopenharmony_ci			list_del_init(&rbio->plug_list);
8038c2ecf20Sopenharmony_ci
8048c2ecf20Sopenharmony_ci			list_add(&next->hash_list, &h->hash_list);
8058c2ecf20Sopenharmony_ci			refcount_inc(&next->refs);
8068c2ecf20Sopenharmony_ci			spin_unlock(&rbio->bio_list_lock);
8078c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&h->lock, flags);
8088c2ecf20Sopenharmony_ci
8098c2ecf20Sopenharmony_ci			if (next->operation == BTRFS_RBIO_READ_REBUILD)
8108c2ecf20Sopenharmony_ci				start_async_work(next, read_rebuild_work);
8118c2ecf20Sopenharmony_ci			else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
8128c2ecf20Sopenharmony_ci				steal_rbio(rbio, next);
8138c2ecf20Sopenharmony_ci				start_async_work(next, read_rebuild_work);
8148c2ecf20Sopenharmony_ci			} else if (next->operation == BTRFS_RBIO_WRITE) {
8158c2ecf20Sopenharmony_ci				steal_rbio(rbio, next);
8168c2ecf20Sopenharmony_ci				start_async_work(next, rmw_work);
8178c2ecf20Sopenharmony_ci			} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
8188c2ecf20Sopenharmony_ci				steal_rbio(rbio, next);
8198c2ecf20Sopenharmony_ci				start_async_work(next, scrub_parity_work);
8208c2ecf20Sopenharmony_ci			}
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci			goto done_nolock;
8238c2ecf20Sopenharmony_ci		}
8248c2ecf20Sopenharmony_ci	}
8258c2ecf20Sopenharmony_cidone:
8268c2ecf20Sopenharmony_ci	spin_unlock(&rbio->bio_list_lock);
8278c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&h->lock, flags);
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_cidone_nolock:
8308c2ecf20Sopenharmony_ci	if (!keep_cache)
8318c2ecf20Sopenharmony_ci		remove_rbio_from_cache(rbio);
8328c2ecf20Sopenharmony_ci}
8338c2ecf20Sopenharmony_ci
8348c2ecf20Sopenharmony_cistatic void __free_raid_bio(struct btrfs_raid_bio *rbio)
8358c2ecf20Sopenharmony_ci{
8368c2ecf20Sopenharmony_ci	int i;
8378c2ecf20Sopenharmony_ci
8388c2ecf20Sopenharmony_ci	if (!refcount_dec_and_test(&rbio->refs))
8398c2ecf20Sopenharmony_ci		return;
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci	WARN_ON(!list_empty(&rbio->stripe_cache));
8428c2ecf20Sopenharmony_ci	WARN_ON(!list_empty(&rbio->hash_list));
8438c2ecf20Sopenharmony_ci	WARN_ON(!bio_list_empty(&rbio->bio_list));
8448c2ecf20Sopenharmony_ci
8458c2ecf20Sopenharmony_ci	for (i = 0; i < rbio->nr_pages; i++) {
8468c2ecf20Sopenharmony_ci		if (rbio->stripe_pages[i]) {
8478c2ecf20Sopenharmony_ci			__free_page(rbio->stripe_pages[i]);
8488c2ecf20Sopenharmony_ci			rbio->stripe_pages[i] = NULL;
8498c2ecf20Sopenharmony_ci		}
8508c2ecf20Sopenharmony_ci	}
8518c2ecf20Sopenharmony_ci
8528c2ecf20Sopenharmony_ci	btrfs_put_bbio(rbio->bbio);
8538c2ecf20Sopenharmony_ci	kfree(rbio);
8548c2ecf20Sopenharmony_ci}
8558c2ecf20Sopenharmony_ci
8568c2ecf20Sopenharmony_cistatic void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
8578c2ecf20Sopenharmony_ci{
8588c2ecf20Sopenharmony_ci	struct bio *next;
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_ci	while (cur) {
8618c2ecf20Sopenharmony_ci		next = cur->bi_next;
8628c2ecf20Sopenharmony_ci		cur->bi_next = NULL;
8638c2ecf20Sopenharmony_ci		cur->bi_status = err;
8648c2ecf20Sopenharmony_ci		bio_endio(cur);
8658c2ecf20Sopenharmony_ci		cur = next;
8668c2ecf20Sopenharmony_ci	}
8678c2ecf20Sopenharmony_ci}
8688c2ecf20Sopenharmony_ci
8698c2ecf20Sopenharmony_ci/*
8708c2ecf20Sopenharmony_ci * this frees the rbio and runs through all the bios in the
8718c2ecf20Sopenharmony_ci * bio_list and calls end_io on them
8728c2ecf20Sopenharmony_ci */
8738c2ecf20Sopenharmony_cistatic void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
8748c2ecf20Sopenharmony_ci{
8758c2ecf20Sopenharmony_ci	struct bio *cur = bio_list_get(&rbio->bio_list);
8768c2ecf20Sopenharmony_ci	struct bio *extra;
8778c2ecf20Sopenharmony_ci
8788c2ecf20Sopenharmony_ci	if (rbio->generic_bio_cnt)
8798c2ecf20Sopenharmony_ci		btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
8808c2ecf20Sopenharmony_ci	/*
8818c2ecf20Sopenharmony_ci	 * Clear the data bitmap, as the rbio may be cached for later usage.
8828c2ecf20Sopenharmony_ci	 * do this before before unlock_stripe() so there will be no new bio
8838c2ecf20Sopenharmony_ci	 * for this bio.
8848c2ecf20Sopenharmony_ci	 */
8858c2ecf20Sopenharmony_ci	bitmap_clear(rbio->dbitmap, 0, rbio->stripe_npages);
8868c2ecf20Sopenharmony_ci
8878c2ecf20Sopenharmony_ci	/*
8888c2ecf20Sopenharmony_ci	 * At this moment, rbio->bio_list is empty, however since rbio does not
8898c2ecf20Sopenharmony_ci	 * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the
8908c2ecf20Sopenharmony_ci	 * hash list, rbio may be merged with others so that rbio->bio_list
8918c2ecf20Sopenharmony_ci	 * becomes non-empty.
8928c2ecf20Sopenharmony_ci	 * Once unlock_stripe() is done, rbio->bio_list will not be updated any
8938c2ecf20Sopenharmony_ci	 * more and we can call bio_endio() on all queued bios.
8948c2ecf20Sopenharmony_ci	 */
8958c2ecf20Sopenharmony_ci	unlock_stripe(rbio);
8968c2ecf20Sopenharmony_ci	extra = bio_list_get(&rbio->bio_list);
8978c2ecf20Sopenharmony_ci	__free_raid_bio(rbio);
8988c2ecf20Sopenharmony_ci
8998c2ecf20Sopenharmony_ci	rbio_endio_bio_list(cur, err);
9008c2ecf20Sopenharmony_ci	if (extra)
9018c2ecf20Sopenharmony_ci		rbio_endio_bio_list(extra, err);
9028c2ecf20Sopenharmony_ci}
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_ci/*
9058c2ecf20Sopenharmony_ci * end io function used by finish_rmw.  When we finally
9068c2ecf20Sopenharmony_ci * get here, we've written a full stripe
9078c2ecf20Sopenharmony_ci */
9088c2ecf20Sopenharmony_cistatic void raid_write_end_io(struct bio *bio)
9098c2ecf20Sopenharmony_ci{
9108c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio = bio->bi_private;
9118c2ecf20Sopenharmony_ci	blk_status_t err = bio->bi_status;
9128c2ecf20Sopenharmony_ci	int max_errors;
9138c2ecf20Sopenharmony_ci
9148c2ecf20Sopenharmony_ci	if (err)
9158c2ecf20Sopenharmony_ci		fail_bio_stripe(rbio, bio);
9168c2ecf20Sopenharmony_ci
9178c2ecf20Sopenharmony_ci	bio_put(bio);
9188c2ecf20Sopenharmony_ci
9198c2ecf20Sopenharmony_ci	if (!atomic_dec_and_test(&rbio->stripes_pending))
9208c2ecf20Sopenharmony_ci		return;
9218c2ecf20Sopenharmony_ci
9228c2ecf20Sopenharmony_ci	err = BLK_STS_OK;
9238c2ecf20Sopenharmony_ci
9248c2ecf20Sopenharmony_ci	/* OK, we have read all the stripes we need to. */
9258c2ecf20Sopenharmony_ci	max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
9268c2ecf20Sopenharmony_ci		     0 : rbio->bbio->max_errors;
9278c2ecf20Sopenharmony_ci	if (atomic_read(&rbio->error) > max_errors)
9288c2ecf20Sopenharmony_ci		err = BLK_STS_IOERR;
9298c2ecf20Sopenharmony_ci
9308c2ecf20Sopenharmony_ci	rbio_orig_end_io(rbio, err);
9318c2ecf20Sopenharmony_ci}
9328c2ecf20Sopenharmony_ci
9338c2ecf20Sopenharmony_ci/*
9348c2ecf20Sopenharmony_ci * the read/modify/write code wants to use the original bio for
9358c2ecf20Sopenharmony_ci * any pages it included, and then use the rbio for everything
9368c2ecf20Sopenharmony_ci * else.  This function decides if a given index (stripe number)
9378c2ecf20Sopenharmony_ci * and page number in that stripe fall inside the original bio
9388c2ecf20Sopenharmony_ci * or the rbio.
9398c2ecf20Sopenharmony_ci *
9408c2ecf20Sopenharmony_ci * if you set bio_list_only, you'll get a NULL back for any ranges
9418c2ecf20Sopenharmony_ci * that are outside the bio_list
9428c2ecf20Sopenharmony_ci *
9438c2ecf20Sopenharmony_ci * This doesn't take any refs on anything, you get a bare page pointer
9448c2ecf20Sopenharmony_ci * and the caller must bump refs as required.
9458c2ecf20Sopenharmony_ci *
9468c2ecf20Sopenharmony_ci * You must call index_rbio_pages once before you can trust
9478c2ecf20Sopenharmony_ci * the answers from this function.
9488c2ecf20Sopenharmony_ci */
9498c2ecf20Sopenharmony_cistatic struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
9508c2ecf20Sopenharmony_ci				 int index, int pagenr, int bio_list_only)
9518c2ecf20Sopenharmony_ci{
9528c2ecf20Sopenharmony_ci	int chunk_page;
9538c2ecf20Sopenharmony_ci	struct page *p = NULL;
9548c2ecf20Sopenharmony_ci
9558c2ecf20Sopenharmony_ci	chunk_page = index * (rbio->stripe_len >> PAGE_SHIFT) + pagenr;
9568c2ecf20Sopenharmony_ci
9578c2ecf20Sopenharmony_ci	spin_lock_irq(&rbio->bio_list_lock);
9588c2ecf20Sopenharmony_ci	p = rbio->bio_pages[chunk_page];
9598c2ecf20Sopenharmony_ci	spin_unlock_irq(&rbio->bio_list_lock);
9608c2ecf20Sopenharmony_ci
9618c2ecf20Sopenharmony_ci	if (p || bio_list_only)
9628c2ecf20Sopenharmony_ci		return p;
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ci	return rbio->stripe_pages[chunk_page];
9658c2ecf20Sopenharmony_ci}
9668c2ecf20Sopenharmony_ci
9678c2ecf20Sopenharmony_ci/*
9688c2ecf20Sopenharmony_ci * number of pages we need for the entire stripe across all the
9698c2ecf20Sopenharmony_ci * drives
9708c2ecf20Sopenharmony_ci */
9718c2ecf20Sopenharmony_cistatic unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
9728c2ecf20Sopenharmony_ci{
9738c2ecf20Sopenharmony_ci	return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes;
9748c2ecf20Sopenharmony_ci}
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_ci/*
9778c2ecf20Sopenharmony_ci * allocation and initial setup for the btrfs_raid_bio.  Not
9788c2ecf20Sopenharmony_ci * this does not allocate any pages for rbio->pages.
9798c2ecf20Sopenharmony_ci */
9808c2ecf20Sopenharmony_cistatic struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
9818c2ecf20Sopenharmony_ci					 struct btrfs_bio *bbio,
9828c2ecf20Sopenharmony_ci					 u64 stripe_len)
9838c2ecf20Sopenharmony_ci{
9848c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio;
9858c2ecf20Sopenharmony_ci	int nr_data = 0;
9868c2ecf20Sopenharmony_ci	int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
9878c2ecf20Sopenharmony_ci	int num_pages = rbio_nr_pages(stripe_len, real_stripes);
9888c2ecf20Sopenharmony_ci	int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
9898c2ecf20Sopenharmony_ci	void *p;
9908c2ecf20Sopenharmony_ci
9918c2ecf20Sopenharmony_ci	rbio = kzalloc(sizeof(*rbio) +
9928c2ecf20Sopenharmony_ci		       sizeof(*rbio->stripe_pages) * num_pages +
9938c2ecf20Sopenharmony_ci		       sizeof(*rbio->bio_pages) * num_pages +
9948c2ecf20Sopenharmony_ci		       sizeof(*rbio->finish_pointers) * real_stripes +
9958c2ecf20Sopenharmony_ci		       sizeof(*rbio->dbitmap) * BITS_TO_LONGS(stripe_npages) +
9968c2ecf20Sopenharmony_ci		       sizeof(*rbio->finish_pbitmap) *
9978c2ecf20Sopenharmony_ci				BITS_TO_LONGS(stripe_npages),
9988c2ecf20Sopenharmony_ci		       GFP_NOFS);
9998c2ecf20Sopenharmony_ci	if (!rbio)
10008c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
10018c2ecf20Sopenharmony_ci
10028c2ecf20Sopenharmony_ci	bio_list_init(&rbio->bio_list);
10038c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&rbio->plug_list);
10048c2ecf20Sopenharmony_ci	spin_lock_init(&rbio->bio_list_lock);
10058c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&rbio->stripe_cache);
10068c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&rbio->hash_list);
10078c2ecf20Sopenharmony_ci	rbio->bbio = bbio;
10088c2ecf20Sopenharmony_ci	rbio->fs_info = fs_info;
10098c2ecf20Sopenharmony_ci	rbio->stripe_len = stripe_len;
10108c2ecf20Sopenharmony_ci	rbio->nr_pages = num_pages;
10118c2ecf20Sopenharmony_ci	rbio->real_stripes = real_stripes;
10128c2ecf20Sopenharmony_ci	rbio->stripe_npages = stripe_npages;
10138c2ecf20Sopenharmony_ci	rbio->faila = -1;
10148c2ecf20Sopenharmony_ci	rbio->failb = -1;
10158c2ecf20Sopenharmony_ci	refcount_set(&rbio->refs, 1);
10168c2ecf20Sopenharmony_ci	atomic_set(&rbio->error, 0);
10178c2ecf20Sopenharmony_ci	atomic_set(&rbio->stripes_pending, 0);
10188c2ecf20Sopenharmony_ci
10198c2ecf20Sopenharmony_ci	/*
10208c2ecf20Sopenharmony_ci	 * the stripe_pages, bio_pages, etc arrays point to the extra
10218c2ecf20Sopenharmony_ci	 * memory we allocated past the end of the rbio
10228c2ecf20Sopenharmony_ci	 */
10238c2ecf20Sopenharmony_ci	p = rbio + 1;
10248c2ecf20Sopenharmony_ci#define CONSUME_ALLOC(ptr, count)	do {				\
10258c2ecf20Sopenharmony_ci		ptr = p;						\
10268c2ecf20Sopenharmony_ci		p = (unsigned char *)p + sizeof(*(ptr)) * (count);	\
10278c2ecf20Sopenharmony_ci	} while (0)
10288c2ecf20Sopenharmony_ci	CONSUME_ALLOC(rbio->stripe_pages, num_pages);
10298c2ecf20Sopenharmony_ci	CONSUME_ALLOC(rbio->bio_pages, num_pages);
10308c2ecf20Sopenharmony_ci	CONSUME_ALLOC(rbio->finish_pointers, real_stripes);
10318c2ecf20Sopenharmony_ci	CONSUME_ALLOC(rbio->dbitmap, BITS_TO_LONGS(stripe_npages));
10328c2ecf20Sopenharmony_ci	CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
10338c2ecf20Sopenharmony_ci#undef  CONSUME_ALLOC
10348c2ecf20Sopenharmony_ci
10358c2ecf20Sopenharmony_ci	if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
10368c2ecf20Sopenharmony_ci		nr_data = real_stripes - 1;
10378c2ecf20Sopenharmony_ci	else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
10388c2ecf20Sopenharmony_ci		nr_data = real_stripes - 2;
10398c2ecf20Sopenharmony_ci	else
10408c2ecf20Sopenharmony_ci		BUG();
10418c2ecf20Sopenharmony_ci
10428c2ecf20Sopenharmony_ci	rbio->nr_data = nr_data;
10438c2ecf20Sopenharmony_ci	return rbio;
10448c2ecf20Sopenharmony_ci}
10458c2ecf20Sopenharmony_ci
10468c2ecf20Sopenharmony_ci/* allocate pages for all the stripes in the bio, including parity */
10478c2ecf20Sopenharmony_cistatic int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
10488c2ecf20Sopenharmony_ci{
10498c2ecf20Sopenharmony_ci	int i;
10508c2ecf20Sopenharmony_ci	struct page *page;
10518c2ecf20Sopenharmony_ci
10528c2ecf20Sopenharmony_ci	for (i = 0; i < rbio->nr_pages; i++) {
10538c2ecf20Sopenharmony_ci		if (rbio->stripe_pages[i])
10548c2ecf20Sopenharmony_ci			continue;
10558c2ecf20Sopenharmony_ci		page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
10568c2ecf20Sopenharmony_ci		if (!page)
10578c2ecf20Sopenharmony_ci			return -ENOMEM;
10588c2ecf20Sopenharmony_ci		rbio->stripe_pages[i] = page;
10598c2ecf20Sopenharmony_ci	}
10608c2ecf20Sopenharmony_ci	return 0;
10618c2ecf20Sopenharmony_ci}
10628c2ecf20Sopenharmony_ci
10638c2ecf20Sopenharmony_ci/* only allocate pages for p/q stripes */
10648c2ecf20Sopenharmony_cistatic int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
10658c2ecf20Sopenharmony_ci{
10668c2ecf20Sopenharmony_ci	int i;
10678c2ecf20Sopenharmony_ci	struct page *page;
10688c2ecf20Sopenharmony_ci
10698c2ecf20Sopenharmony_ci	i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
10708c2ecf20Sopenharmony_ci
10718c2ecf20Sopenharmony_ci	for (; i < rbio->nr_pages; i++) {
10728c2ecf20Sopenharmony_ci		if (rbio->stripe_pages[i])
10738c2ecf20Sopenharmony_ci			continue;
10748c2ecf20Sopenharmony_ci		page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
10758c2ecf20Sopenharmony_ci		if (!page)
10768c2ecf20Sopenharmony_ci			return -ENOMEM;
10778c2ecf20Sopenharmony_ci		rbio->stripe_pages[i] = page;
10788c2ecf20Sopenharmony_ci	}
10798c2ecf20Sopenharmony_ci	return 0;
10808c2ecf20Sopenharmony_ci}
10818c2ecf20Sopenharmony_ci
10828c2ecf20Sopenharmony_ci/*
10838c2ecf20Sopenharmony_ci * add a single page from a specific stripe into our list of bios for IO
10848c2ecf20Sopenharmony_ci * this will try to merge into existing bios if possible, and returns
10858c2ecf20Sopenharmony_ci * zero if all went well.
10868c2ecf20Sopenharmony_ci */
10878c2ecf20Sopenharmony_cistatic int rbio_add_io_page(struct btrfs_raid_bio *rbio,
10888c2ecf20Sopenharmony_ci			    struct bio_list *bio_list,
10898c2ecf20Sopenharmony_ci			    struct page *page,
10908c2ecf20Sopenharmony_ci			    int stripe_nr,
10918c2ecf20Sopenharmony_ci			    unsigned long page_index,
10928c2ecf20Sopenharmony_ci			    unsigned long bio_max_len)
10938c2ecf20Sopenharmony_ci{
10948c2ecf20Sopenharmony_ci	struct bio *last = bio_list->tail;
10958c2ecf20Sopenharmony_ci	int ret;
10968c2ecf20Sopenharmony_ci	struct bio *bio;
10978c2ecf20Sopenharmony_ci	struct btrfs_bio_stripe *stripe;
10988c2ecf20Sopenharmony_ci	u64 disk_start;
10998c2ecf20Sopenharmony_ci
11008c2ecf20Sopenharmony_ci	stripe = &rbio->bbio->stripes[stripe_nr];
11018c2ecf20Sopenharmony_ci	disk_start = stripe->physical + (page_index << PAGE_SHIFT);
11028c2ecf20Sopenharmony_ci
11038c2ecf20Sopenharmony_ci	/* if the device is missing, just fail this stripe */
11048c2ecf20Sopenharmony_ci	if (!stripe->dev->bdev)
11058c2ecf20Sopenharmony_ci		return fail_rbio_index(rbio, stripe_nr);
11068c2ecf20Sopenharmony_ci
11078c2ecf20Sopenharmony_ci	/* see if we can add this page onto our existing bio */
11088c2ecf20Sopenharmony_ci	if (last) {
11098c2ecf20Sopenharmony_ci		u64 last_end = (u64)last->bi_iter.bi_sector << 9;
11108c2ecf20Sopenharmony_ci		last_end += last->bi_iter.bi_size;
11118c2ecf20Sopenharmony_ci
11128c2ecf20Sopenharmony_ci		/*
11138c2ecf20Sopenharmony_ci		 * we can't merge these if they are from different
11148c2ecf20Sopenharmony_ci		 * devices or if they are not contiguous
11158c2ecf20Sopenharmony_ci		 */
11168c2ecf20Sopenharmony_ci		if (last_end == disk_start && !last->bi_status &&
11178c2ecf20Sopenharmony_ci		    last->bi_disk == stripe->dev->bdev->bd_disk &&
11188c2ecf20Sopenharmony_ci		    last->bi_partno == stripe->dev->bdev->bd_partno) {
11198c2ecf20Sopenharmony_ci			ret = bio_add_page(last, page, PAGE_SIZE, 0);
11208c2ecf20Sopenharmony_ci			if (ret == PAGE_SIZE)
11218c2ecf20Sopenharmony_ci				return 0;
11228c2ecf20Sopenharmony_ci		}
11238c2ecf20Sopenharmony_ci	}
11248c2ecf20Sopenharmony_ci
11258c2ecf20Sopenharmony_ci	/* put a new bio on the list */
11268c2ecf20Sopenharmony_ci	bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
11278c2ecf20Sopenharmony_ci	btrfs_io_bio(bio)->device = stripe->dev;
11288c2ecf20Sopenharmony_ci	bio->bi_iter.bi_size = 0;
11298c2ecf20Sopenharmony_ci	bio_set_dev(bio, stripe->dev->bdev);
11308c2ecf20Sopenharmony_ci	bio->bi_iter.bi_sector = disk_start >> 9;
11318c2ecf20Sopenharmony_ci
11328c2ecf20Sopenharmony_ci	bio_add_page(bio, page, PAGE_SIZE, 0);
11338c2ecf20Sopenharmony_ci	bio_list_add(bio_list, bio);
11348c2ecf20Sopenharmony_ci	return 0;
11358c2ecf20Sopenharmony_ci}
11368c2ecf20Sopenharmony_ci
11378c2ecf20Sopenharmony_ci/*
11388c2ecf20Sopenharmony_ci * while we're doing the read/modify/write cycle, we could
11398c2ecf20Sopenharmony_ci * have errors in reading pages off the disk.  This checks
11408c2ecf20Sopenharmony_ci * for errors and if we're not able to read the page it'll
11418c2ecf20Sopenharmony_ci * trigger parity reconstruction.  The rmw will be finished
11428c2ecf20Sopenharmony_ci * after we've reconstructed the failed stripes
11438c2ecf20Sopenharmony_ci */
11448c2ecf20Sopenharmony_cistatic void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
11458c2ecf20Sopenharmony_ci{
11468c2ecf20Sopenharmony_ci	if (rbio->faila >= 0 || rbio->failb >= 0) {
11478c2ecf20Sopenharmony_ci		BUG_ON(rbio->faila == rbio->real_stripes - 1);
11488c2ecf20Sopenharmony_ci		__raid56_parity_recover(rbio);
11498c2ecf20Sopenharmony_ci	} else {
11508c2ecf20Sopenharmony_ci		finish_rmw(rbio);
11518c2ecf20Sopenharmony_ci	}
11528c2ecf20Sopenharmony_ci}
11538c2ecf20Sopenharmony_ci
11548c2ecf20Sopenharmony_ci/*
11558c2ecf20Sopenharmony_ci * helper function to walk our bio list and populate the bio_pages array with
11568c2ecf20Sopenharmony_ci * the result.  This seems expensive, but it is faster than constantly
11578c2ecf20Sopenharmony_ci * searching through the bio list as we setup the IO in finish_rmw or stripe
11588c2ecf20Sopenharmony_ci * reconstruction.
11598c2ecf20Sopenharmony_ci *
11608c2ecf20Sopenharmony_ci * This must be called before you trust the answers from page_in_rbio
11618c2ecf20Sopenharmony_ci */
11628c2ecf20Sopenharmony_cistatic void index_rbio_pages(struct btrfs_raid_bio *rbio)
11638c2ecf20Sopenharmony_ci{
11648c2ecf20Sopenharmony_ci	struct bio *bio;
11658c2ecf20Sopenharmony_ci	u64 start;
11668c2ecf20Sopenharmony_ci	unsigned long stripe_offset;
11678c2ecf20Sopenharmony_ci	unsigned long page_index;
11688c2ecf20Sopenharmony_ci
11698c2ecf20Sopenharmony_ci	spin_lock_irq(&rbio->bio_list_lock);
11708c2ecf20Sopenharmony_ci	bio_list_for_each(bio, &rbio->bio_list) {
11718c2ecf20Sopenharmony_ci		struct bio_vec bvec;
11728c2ecf20Sopenharmony_ci		struct bvec_iter iter;
11738c2ecf20Sopenharmony_ci		int i = 0;
11748c2ecf20Sopenharmony_ci
11758c2ecf20Sopenharmony_ci		start = (u64)bio->bi_iter.bi_sector << 9;
11768c2ecf20Sopenharmony_ci		stripe_offset = start - rbio->bbio->raid_map[0];
11778c2ecf20Sopenharmony_ci		page_index = stripe_offset >> PAGE_SHIFT;
11788c2ecf20Sopenharmony_ci
11798c2ecf20Sopenharmony_ci		if (bio_flagged(bio, BIO_CLONED))
11808c2ecf20Sopenharmony_ci			bio->bi_iter = btrfs_io_bio(bio)->iter;
11818c2ecf20Sopenharmony_ci
11828c2ecf20Sopenharmony_ci		bio_for_each_segment(bvec, bio, iter) {
11838c2ecf20Sopenharmony_ci			rbio->bio_pages[page_index + i] = bvec.bv_page;
11848c2ecf20Sopenharmony_ci			i++;
11858c2ecf20Sopenharmony_ci		}
11868c2ecf20Sopenharmony_ci	}
11878c2ecf20Sopenharmony_ci	spin_unlock_irq(&rbio->bio_list_lock);
11888c2ecf20Sopenharmony_ci}
11898c2ecf20Sopenharmony_ci
11908c2ecf20Sopenharmony_ci/*
11918c2ecf20Sopenharmony_ci * this is called from one of two situations.  We either
11928c2ecf20Sopenharmony_ci * have a full stripe from the higher layers, or we've read all
11938c2ecf20Sopenharmony_ci * the missing bits off disk.
11948c2ecf20Sopenharmony_ci *
11958c2ecf20Sopenharmony_ci * This will calculate the parity and then send down any
11968c2ecf20Sopenharmony_ci * changed blocks.
11978c2ecf20Sopenharmony_ci */
11988c2ecf20Sopenharmony_cistatic noinline void finish_rmw(struct btrfs_raid_bio *rbio)
11998c2ecf20Sopenharmony_ci{
12008c2ecf20Sopenharmony_ci	struct btrfs_bio *bbio = rbio->bbio;
12018c2ecf20Sopenharmony_ci	void **pointers = rbio->finish_pointers;
12028c2ecf20Sopenharmony_ci	int nr_data = rbio->nr_data;
12038c2ecf20Sopenharmony_ci	int stripe;
12048c2ecf20Sopenharmony_ci	int pagenr;
12058c2ecf20Sopenharmony_ci	bool has_qstripe;
12068c2ecf20Sopenharmony_ci	struct bio_list bio_list;
12078c2ecf20Sopenharmony_ci	struct bio *bio;
12088c2ecf20Sopenharmony_ci	int ret;
12098c2ecf20Sopenharmony_ci
12108c2ecf20Sopenharmony_ci	bio_list_init(&bio_list);
12118c2ecf20Sopenharmony_ci
12128c2ecf20Sopenharmony_ci	if (rbio->real_stripes - rbio->nr_data == 1)
12138c2ecf20Sopenharmony_ci		has_qstripe = false;
12148c2ecf20Sopenharmony_ci	else if (rbio->real_stripes - rbio->nr_data == 2)
12158c2ecf20Sopenharmony_ci		has_qstripe = true;
12168c2ecf20Sopenharmony_ci	else
12178c2ecf20Sopenharmony_ci		BUG();
12188c2ecf20Sopenharmony_ci
12198c2ecf20Sopenharmony_ci	/* We should have at least one data sector. */
12208c2ecf20Sopenharmony_ci	ASSERT(bitmap_weight(rbio->dbitmap, rbio->stripe_npages));
12218c2ecf20Sopenharmony_ci
12228c2ecf20Sopenharmony_ci	/* at this point we either have a full stripe,
12238c2ecf20Sopenharmony_ci	 * or we've read the full stripe from the drive.
12248c2ecf20Sopenharmony_ci	 * recalculate the parity and write the new results.
12258c2ecf20Sopenharmony_ci	 *
12268c2ecf20Sopenharmony_ci	 * We're not allowed to add any new bios to the
12278c2ecf20Sopenharmony_ci	 * bio list here, anyone else that wants to
12288c2ecf20Sopenharmony_ci	 * change this stripe needs to do their own rmw.
12298c2ecf20Sopenharmony_ci	 */
12308c2ecf20Sopenharmony_ci	spin_lock_irq(&rbio->bio_list_lock);
12318c2ecf20Sopenharmony_ci	set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
12328c2ecf20Sopenharmony_ci	spin_unlock_irq(&rbio->bio_list_lock);
12338c2ecf20Sopenharmony_ci
12348c2ecf20Sopenharmony_ci	atomic_set(&rbio->error, 0);
12358c2ecf20Sopenharmony_ci
12368c2ecf20Sopenharmony_ci	/*
12378c2ecf20Sopenharmony_ci	 * now that we've set rmw_locked, run through the
12388c2ecf20Sopenharmony_ci	 * bio list one last time and map the page pointers
12398c2ecf20Sopenharmony_ci	 *
12408c2ecf20Sopenharmony_ci	 * We don't cache full rbios because we're assuming
12418c2ecf20Sopenharmony_ci	 * the higher layers are unlikely to use this area of
12428c2ecf20Sopenharmony_ci	 * the disk again soon.  If they do use it again,
12438c2ecf20Sopenharmony_ci	 * hopefully they will send another full bio.
12448c2ecf20Sopenharmony_ci	 */
12458c2ecf20Sopenharmony_ci	index_rbio_pages(rbio);
12468c2ecf20Sopenharmony_ci	if (!rbio_is_full(rbio))
12478c2ecf20Sopenharmony_ci		cache_rbio_pages(rbio);
12488c2ecf20Sopenharmony_ci	else
12498c2ecf20Sopenharmony_ci		clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
12508c2ecf20Sopenharmony_ci
12518c2ecf20Sopenharmony_ci	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
12528c2ecf20Sopenharmony_ci		struct page *p;
12538c2ecf20Sopenharmony_ci		/* first collect one page from each data stripe */
12548c2ecf20Sopenharmony_ci		for (stripe = 0; stripe < nr_data; stripe++) {
12558c2ecf20Sopenharmony_ci			p = page_in_rbio(rbio, stripe, pagenr, 0);
12568c2ecf20Sopenharmony_ci			pointers[stripe] = kmap(p);
12578c2ecf20Sopenharmony_ci		}
12588c2ecf20Sopenharmony_ci
12598c2ecf20Sopenharmony_ci		/* then add the parity stripe */
12608c2ecf20Sopenharmony_ci		p = rbio_pstripe_page(rbio, pagenr);
12618c2ecf20Sopenharmony_ci		SetPageUptodate(p);
12628c2ecf20Sopenharmony_ci		pointers[stripe++] = kmap(p);
12638c2ecf20Sopenharmony_ci
12648c2ecf20Sopenharmony_ci		if (has_qstripe) {
12658c2ecf20Sopenharmony_ci
12668c2ecf20Sopenharmony_ci			/*
12678c2ecf20Sopenharmony_ci			 * raid6, add the qstripe and call the
12688c2ecf20Sopenharmony_ci			 * library function to fill in our p/q
12698c2ecf20Sopenharmony_ci			 */
12708c2ecf20Sopenharmony_ci			p = rbio_qstripe_page(rbio, pagenr);
12718c2ecf20Sopenharmony_ci			SetPageUptodate(p);
12728c2ecf20Sopenharmony_ci			pointers[stripe++] = kmap(p);
12738c2ecf20Sopenharmony_ci
12748c2ecf20Sopenharmony_ci			raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
12758c2ecf20Sopenharmony_ci						pointers);
12768c2ecf20Sopenharmony_ci		} else {
12778c2ecf20Sopenharmony_ci			/* raid5 */
12788c2ecf20Sopenharmony_ci			copy_page(pointers[nr_data], pointers[0]);
12798c2ecf20Sopenharmony_ci			run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
12808c2ecf20Sopenharmony_ci		}
12818c2ecf20Sopenharmony_ci
12828c2ecf20Sopenharmony_ci
12838c2ecf20Sopenharmony_ci		for (stripe = 0; stripe < rbio->real_stripes; stripe++)
12848c2ecf20Sopenharmony_ci			kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
12858c2ecf20Sopenharmony_ci	}
12868c2ecf20Sopenharmony_ci
12878c2ecf20Sopenharmony_ci	/*
12888c2ecf20Sopenharmony_ci	 * time to start writing.  Make bios for everything from the
12898c2ecf20Sopenharmony_ci	 * higher layers (the bio_list in our rbio) and our p/q.  Ignore
12908c2ecf20Sopenharmony_ci	 * everything else.
12918c2ecf20Sopenharmony_ci	 */
12928c2ecf20Sopenharmony_ci	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
12938c2ecf20Sopenharmony_ci		for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
12948c2ecf20Sopenharmony_ci			struct page *page;
12958c2ecf20Sopenharmony_ci
12968c2ecf20Sopenharmony_ci			/* This vertical stripe has no data, skip it. */
12978c2ecf20Sopenharmony_ci			if (!test_bit(pagenr, rbio->dbitmap))
12988c2ecf20Sopenharmony_ci				continue;
12998c2ecf20Sopenharmony_ci
13008c2ecf20Sopenharmony_ci			if (stripe < rbio->nr_data) {
13018c2ecf20Sopenharmony_ci				page = page_in_rbio(rbio, stripe, pagenr, 1);
13028c2ecf20Sopenharmony_ci				if (!page)
13038c2ecf20Sopenharmony_ci					continue;
13048c2ecf20Sopenharmony_ci			} else {
13058c2ecf20Sopenharmony_ci			       page = rbio_stripe_page(rbio, stripe, pagenr);
13068c2ecf20Sopenharmony_ci			}
13078c2ecf20Sopenharmony_ci
13088c2ecf20Sopenharmony_ci			ret = rbio_add_io_page(rbio, &bio_list,
13098c2ecf20Sopenharmony_ci				       page, stripe, pagenr, rbio->stripe_len);
13108c2ecf20Sopenharmony_ci			if (ret)
13118c2ecf20Sopenharmony_ci				goto cleanup;
13128c2ecf20Sopenharmony_ci		}
13138c2ecf20Sopenharmony_ci	}
13148c2ecf20Sopenharmony_ci
13158c2ecf20Sopenharmony_ci	if (likely(!bbio->num_tgtdevs))
13168c2ecf20Sopenharmony_ci		goto write_data;
13178c2ecf20Sopenharmony_ci
13188c2ecf20Sopenharmony_ci	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
13198c2ecf20Sopenharmony_ci		if (!bbio->tgtdev_map[stripe])
13208c2ecf20Sopenharmony_ci			continue;
13218c2ecf20Sopenharmony_ci
13228c2ecf20Sopenharmony_ci		for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
13238c2ecf20Sopenharmony_ci			struct page *page;
13248c2ecf20Sopenharmony_ci
13258c2ecf20Sopenharmony_ci			/* This vertical stripe has no data, skip it. */
13268c2ecf20Sopenharmony_ci			if (!test_bit(pagenr, rbio->dbitmap))
13278c2ecf20Sopenharmony_ci				continue;
13288c2ecf20Sopenharmony_ci
13298c2ecf20Sopenharmony_ci			if (stripe < rbio->nr_data) {
13308c2ecf20Sopenharmony_ci				page = page_in_rbio(rbio, stripe, pagenr, 1);
13318c2ecf20Sopenharmony_ci				if (!page)
13328c2ecf20Sopenharmony_ci					continue;
13338c2ecf20Sopenharmony_ci			} else {
13348c2ecf20Sopenharmony_ci			       page = rbio_stripe_page(rbio, stripe, pagenr);
13358c2ecf20Sopenharmony_ci			}
13368c2ecf20Sopenharmony_ci
13378c2ecf20Sopenharmony_ci			ret = rbio_add_io_page(rbio, &bio_list, page,
13388c2ecf20Sopenharmony_ci					       rbio->bbio->tgtdev_map[stripe],
13398c2ecf20Sopenharmony_ci					       pagenr, rbio->stripe_len);
13408c2ecf20Sopenharmony_ci			if (ret)
13418c2ecf20Sopenharmony_ci				goto cleanup;
13428c2ecf20Sopenharmony_ci		}
13438c2ecf20Sopenharmony_ci	}
13448c2ecf20Sopenharmony_ci
13458c2ecf20Sopenharmony_ciwrite_data:
13468c2ecf20Sopenharmony_ci	atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
13478c2ecf20Sopenharmony_ci	BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
13488c2ecf20Sopenharmony_ci
13498c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bio_list))) {
13508c2ecf20Sopenharmony_ci		bio->bi_private = rbio;
13518c2ecf20Sopenharmony_ci		bio->bi_end_io = raid_write_end_io;
13528c2ecf20Sopenharmony_ci		bio->bi_opf = REQ_OP_WRITE;
13538c2ecf20Sopenharmony_ci
13548c2ecf20Sopenharmony_ci		submit_bio(bio);
13558c2ecf20Sopenharmony_ci	}
13568c2ecf20Sopenharmony_ci	return;
13578c2ecf20Sopenharmony_ci
13588c2ecf20Sopenharmony_cicleanup:
13598c2ecf20Sopenharmony_ci	rbio_orig_end_io(rbio, BLK_STS_IOERR);
13608c2ecf20Sopenharmony_ci
13618c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bio_list)))
13628c2ecf20Sopenharmony_ci		bio_put(bio);
13638c2ecf20Sopenharmony_ci}
13648c2ecf20Sopenharmony_ci
13658c2ecf20Sopenharmony_ci/*
13668c2ecf20Sopenharmony_ci * helper to find the stripe number for a given bio.  Used to figure out which
13678c2ecf20Sopenharmony_ci * stripe has failed.  This expects the bio to correspond to a physical disk,
13688c2ecf20Sopenharmony_ci * so it looks up based on physical sector numbers.
13698c2ecf20Sopenharmony_ci */
13708c2ecf20Sopenharmony_cistatic int find_bio_stripe(struct btrfs_raid_bio *rbio,
13718c2ecf20Sopenharmony_ci			   struct bio *bio)
13728c2ecf20Sopenharmony_ci{
13738c2ecf20Sopenharmony_ci	u64 physical = bio->bi_iter.bi_sector;
13748c2ecf20Sopenharmony_ci	int i;
13758c2ecf20Sopenharmony_ci	struct btrfs_bio_stripe *stripe;
13768c2ecf20Sopenharmony_ci
13778c2ecf20Sopenharmony_ci	physical <<= 9;
13788c2ecf20Sopenharmony_ci
13798c2ecf20Sopenharmony_ci	for (i = 0; i < rbio->bbio->num_stripes; i++) {
13808c2ecf20Sopenharmony_ci		stripe = &rbio->bbio->stripes[i];
13818c2ecf20Sopenharmony_ci		if (in_range(physical, stripe->physical, rbio->stripe_len) &&
13828c2ecf20Sopenharmony_ci		    stripe->dev->bdev &&
13838c2ecf20Sopenharmony_ci		    bio->bi_disk == stripe->dev->bdev->bd_disk &&
13848c2ecf20Sopenharmony_ci		    bio->bi_partno == stripe->dev->bdev->bd_partno) {
13858c2ecf20Sopenharmony_ci			return i;
13868c2ecf20Sopenharmony_ci		}
13878c2ecf20Sopenharmony_ci	}
13888c2ecf20Sopenharmony_ci	return -1;
13898c2ecf20Sopenharmony_ci}
13908c2ecf20Sopenharmony_ci
13918c2ecf20Sopenharmony_ci/*
13928c2ecf20Sopenharmony_ci * helper to find the stripe number for a given
13938c2ecf20Sopenharmony_ci * bio (before mapping).  Used to figure out which stripe has
13948c2ecf20Sopenharmony_ci * failed.  This looks up based on logical block numbers.
13958c2ecf20Sopenharmony_ci */
13968c2ecf20Sopenharmony_cistatic int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
13978c2ecf20Sopenharmony_ci				   struct bio *bio)
13988c2ecf20Sopenharmony_ci{
13998c2ecf20Sopenharmony_ci	u64 logical = (u64)bio->bi_iter.bi_sector << 9;
14008c2ecf20Sopenharmony_ci	int i;
14018c2ecf20Sopenharmony_ci
14028c2ecf20Sopenharmony_ci	for (i = 0; i < rbio->nr_data; i++) {
14038c2ecf20Sopenharmony_ci		u64 stripe_start = rbio->bbio->raid_map[i];
14048c2ecf20Sopenharmony_ci
14058c2ecf20Sopenharmony_ci		if (in_range(logical, stripe_start, rbio->stripe_len))
14068c2ecf20Sopenharmony_ci			return i;
14078c2ecf20Sopenharmony_ci	}
14088c2ecf20Sopenharmony_ci	return -1;
14098c2ecf20Sopenharmony_ci}
14108c2ecf20Sopenharmony_ci
14118c2ecf20Sopenharmony_ci/*
14128c2ecf20Sopenharmony_ci * returns -EIO if we had too many failures
14138c2ecf20Sopenharmony_ci */
14148c2ecf20Sopenharmony_cistatic int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
14158c2ecf20Sopenharmony_ci{
14168c2ecf20Sopenharmony_ci	unsigned long flags;
14178c2ecf20Sopenharmony_ci	int ret = 0;
14188c2ecf20Sopenharmony_ci
14198c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rbio->bio_list_lock, flags);
14208c2ecf20Sopenharmony_ci
14218c2ecf20Sopenharmony_ci	/* we already know this stripe is bad, move on */
14228c2ecf20Sopenharmony_ci	if (rbio->faila == failed || rbio->failb == failed)
14238c2ecf20Sopenharmony_ci		goto out;
14248c2ecf20Sopenharmony_ci
14258c2ecf20Sopenharmony_ci	if (rbio->faila == -1) {
14268c2ecf20Sopenharmony_ci		/* first failure on this rbio */
14278c2ecf20Sopenharmony_ci		rbio->faila = failed;
14288c2ecf20Sopenharmony_ci		atomic_inc(&rbio->error);
14298c2ecf20Sopenharmony_ci	} else if (rbio->failb == -1) {
14308c2ecf20Sopenharmony_ci		/* second failure on this rbio */
14318c2ecf20Sopenharmony_ci		rbio->failb = failed;
14328c2ecf20Sopenharmony_ci		atomic_inc(&rbio->error);
14338c2ecf20Sopenharmony_ci	} else {
14348c2ecf20Sopenharmony_ci		ret = -EIO;
14358c2ecf20Sopenharmony_ci	}
14368c2ecf20Sopenharmony_ciout:
14378c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
14388c2ecf20Sopenharmony_ci
14398c2ecf20Sopenharmony_ci	return ret;
14408c2ecf20Sopenharmony_ci}
14418c2ecf20Sopenharmony_ci
14428c2ecf20Sopenharmony_ci/*
14438c2ecf20Sopenharmony_ci * helper to fail a stripe based on a physical disk
14448c2ecf20Sopenharmony_ci * bio.
14458c2ecf20Sopenharmony_ci */
14468c2ecf20Sopenharmony_cistatic int fail_bio_stripe(struct btrfs_raid_bio *rbio,
14478c2ecf20Sopenharmony_ci			   struct bio *bio)
14488c2ecf20Sopenharmony_ci{
14498c2ecf20Sopenharmony_ci	int failed = find_bio_stripe(rbio, bio);
14508c2ecf20Sopenharmony_ci
14518c2ecf20Sopenharmony_ci	if (failed < 0)
14528c2ecf20Sopenharmony_ci		return -EIO;
14538c2ecf20Sopenharmony_ci
14548c2ecf20Sopenharmony_ci	return fail_rbio_index(rbio, failed);
14558c2ecf20Sopenharmony_ci}
14568c2ecf20Sopenharmony_ci
14578c2ecf20Sopenharmony_ci/*
14588c2ecf20Sopenharmony_ci * this sets each page in the bio uptodate.  It should only be used on private
14598c2ecf20Sopenharmony_ci * rbio pages, nothing that comes in from the higher layers
14608c2ecf20Sopenharmony_ci */
14618c2ecf20Sopenharmony_cistatic void set_bio_pages_uptodate(struct bio *bio)
14628c2ecf20Sopenharmony_ci{
14638c2ecf20Sopenharmony_ci	struct bio_vec *bvec;
14648c2ecf20Sopenharmony_ci	struct bvec_iter_all iter_all;
14658c2ecf20Sopenharmony_ci
14668c2ecf20Sopenharmony_ci	ASSERT(!bio_flagged(bio, BIO_CLONED));
14678c2ecf20Sopenharmony_ci
14688c2ecf20Sopenharmony_ci	bio_for_each_segment_all(bvec, bio, iter_all)
14698c2ecf20Sopenharmony_ci		SetPageUptodate(bvec->bv_page);
14708c2ecf20Sopenharmony_ci}
14718c2ecf20Sopenharmony_ci
14728c2ecf20Sopenharmony_ci/*
14738c2ecf20Sopenharmony_ci * end io for the read phase of the rmw cycle.  All the bios here are physical
14748c2ecf20Sopenharmony_ci * stripe bios we've read from the disk so we can recalculate the parity of the
14758c2ecf20Sopenharmony_ci * stripe.
14768c2ecf20Sopenharmony_ci *
14778c2ecf20Sopenharmony_ci * This will usually kick off finish_rmw once all the bios are read in, but it
14788c2ecf20Sopenharmony_ci * may trigger parity reconstruction if we had any errors along the way
14798c2ecf20Sopenharmony_ci */
14808c2ecf20Sopenharmony_cistatic void raid_rmw_end_io(struct bio *bio)
14818c2ecf20Sopenharmony_ci{
14828c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio = bio->bi_private;
14838c2ecf20Sopenharmony_ci
14848c2ecf20Sopenharmony_ci	if (bio->bi_status)
14858c2ecf20Sopenharmony_ci		fail_bio_stripe(rbio, bio);
14868c2ecf20Sopenharmony_ci	else
14878c2ecf20Sopenharmony_ci		set_bio_pages_uptodate(bio);
14888c2ecf20Sopenharmony_ci
14898c2ecf20Sopenharmony_ci	bio_put(bio);
14908c2ecf20Sopenharmony_ci
14918c2ecf20Sopenharmony_ci	if (!atomic_dec_and_test(&rbio->stripes_pending))
14928c2ecf20Sopenharmony_ci		return;
14938c2ecf20Sopenharmony_ci
14948c2ecf20Sopenharmony_ci	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
14958c2ecf20Sopenharmony_ci		goto cleanup;
14968c2ecf20Sopenharmony_ci
14978c2ecf20Sopenharmony_ci	/*
14988c2ecf20Sopenharmony_ci	 * this will normally call finish_rmw to start our write
14998c2ecf20Sopenharmony_ci	 * but if there are any failed stripes we'll reconstruct
15008c2ecf20Sopenharmony_ci	 * from parity first
15018c2ecf20Sopenharmony_ci	 */
15028c2ecf20Sopenharmony_ci	validate_rbio_for_rmw(rbio);
15038c2ecf20Sopenharmony_ci	return;
15048c2ecf20Sopenharmony_ci
15058c2ecf20Sopenharmony_cicleanup:
15068c2ecf20Sopenharmony_ci
15078c2ecf20Sopenharmony_ci	rbio_orig_end_io(rbio, BLK_STS_IOERR);
15088c2ecf20Sopenharmony_ci}
15098c2ecf20Sopenharmony_ci
15108c2ecf20Sopenharmony_ci/*
15118c2ecf20Sopenharmony_ci * the stripe must be locked by the caller.  It will
15128c2ecf20Sopenharmony_ci * unlock after all the writes are done
15138c2ecf20Sopenharmony_ci */
15148c2ecf20Sopenharmony_cistatic int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
15158c2ecf20Sopenharmony_ci{
15168c2ecf20Sopenharmony_ci	int bios_to_read = 0;
15178c2ecf20Sopenharmony_ci	struct bio_list bio_list;
15188c2ecf20Sopenharmony_ci	int ret;
15198c2ecf20Sopenharmony_ci	int pagenr;
15208c2ecf20Sopenharmony_ci	int stripe;
15218c2ecf20Sopenharmony_ci	struct bio *bio;
15228c2ecf20Sopenharmony_ci
15238c2ecf20Sopenharmony_ci	bio_list_init(&bio_list);
15248c2ecf20Sopenharmony_ci
15258c2ecf20Sopenharmony_ci	ret = alloc_rbio_pages(rbio);
15268c2ecf20Sopenharmony_ci	if (ret)
15278c2ecf20Sopenharmony_ci		goto cleanup;
15288c2ecf20Sopenharmony_ci
15298c2ecf20Sopenharmony_ci	index_rbio_pages(rbio);
15308c2ecf20Sopenharmony_ci
15318c2ecf20Sopenharmony_ci	atomic_set(&rbio->error, 0);
15328c2ecf20Sopenharmony_ci	/*
15338c2ecf20Sopenharmony_ci	 * build a list of bios to read all the missing parts of this
15348c2ecf20Sopenharmony_ci	 * stripe
15358c2ecf20Sopenharmony_ci	 */
15368c2ecf20Sopenharmony_ci	for (stripe = 0; stripe < rbio->nr_data; stripe++) {
15378c2ecf20Sopenharmony_ci		for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
15388c2ecf20Sopenharmony_ci			struct page *page;
15398c2ecf20Sopenharmony_ci			/*
15408c2ecf20Sopenharmony_ci			 * we want to find all the pages missing from
15418c2ecf20Sopenharmony_ci			 * the rbio and read them from the disk.  If
15428c2ecf20Sopenharmony_ci			 * page_in_rbio finds a page in the bio list
15438c2ecf20Sopenharmony_ci			 * we don't need to read it off the stripe.
15448c2ecf20Sopenharmony_ci			 */
15458c2ecf20Sopenharmony_ci			page = page_in_rbio(rbio, stripe, pagenr, 1);
15468c2ecf20Sopenharmony_ci			if (page)
15478c2ecf20Sopenharmony_ci				continue;
15488c2ecf20Sopenharmony_ci
15498c2ecf20Sopenharmony_ci			page = rbio_stripe_page(rbio, stripe, pagenr);
15508c2ecf20Sopenharmony_ci			/*
15518c2ecf20Sopenharmony_ci			 * the bio cache may have handed us an uptodate
15528c2ecf20Sopenharmony_ci			 * page.  If so, be happy and use it
15538c2ecf20Sopenharmony_ci			 */
15548c2ecf20Sopenharmony_ci			if (PageUptodate(page))
15558c2ecf20Sopenharmony_ci				continue;
15568c2ecf20Sopenharmony_ci
15578c2ecf20Sopenharmony_ci			ret = rbio_add_io_page(rbio, &bio_list, page,
15588c2ecf20Sopenharmony_ci				       stripe, pagenr, rbio->stripe_len);
15598c2ecf20Sopenharmony_ci			if (ret)
15608c2ecf20Sopenharmony_ci				goto cleanup;
15618c2ecf20Sopenharmony_ci		}
15628c2ecf20Sopenharmony_ci	}
15638c2ecf20Sopenharmony_ci
15648c2ecf20Sopenharmony_ci	bios_to_read = bio_list_size(&bio_list);
15658c2ecf20Sopenharmony_ci	if (!bios_to_read) {
15668c2ecf20Sopenharmony_ci		/*
15678c2ecf20Sopenharmony_ci		 * this can happen if others have merged with
15688c2ecf20Sopenharmony_ci		 * us, it means there is nothing left to read.
15698c2ecf20Sopenharmony_ci		 * But if there are missing devices it may not be
15708c2ecf20Sopenharmony_ci		 * safe to do the full stripe write yet.
15718c2ecf20Sopenharmony_ci		 */
15728c2ecf20Sopenharmony_ci		goto finish;
15738c2ecf20Sopenharmony_ci	}
15748c2ecf20Sopenharmony_ci
15758c2ecf20Sopenharmony_ci	/*
15768c2ecf20Sopenharmony_ci	 * the bbio may be freed once we submit the last bio.  Make sure
15778c2ecf20Sopenharmony_ci	 * not to touch it after that
15788c2ecf20Sopenharmony_ci	 */
15798c2ecf20Sopenharmony_ci	atomic_set(&rbio->stripes_pending, bios_to_read);
15808c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bio_list))) {
15818c2ecf20Sopenharmony_ci		bio->bi_private = rbio;
15828c2ecf20Sopenharmony_ci		bio->bi_end_io = raid_rmw_end_io;
15838c2ecf20Sopenharmony_ci		bio->bi_opf = REQ_OP_READ;
15848c2ecf20Sopenharmony_ci
15858c2ecf20Sopenharmony_ci		btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
15868c2ecf20Sopenharmony_ci
15878c2ecf20Sopenharmony_ci		submit_bio(bio);
15888c2ecf20Sopenharmony_ci	}
15898c2ecf20Sopenharmony_ci	/* the actual write will happen once the reads are done */
15908c2ecf20Sopenharmony_ci	return 0;
15918c2ecf20Sopenharmony_ci
15928c2ecf20Sopenharmony_cicleanup:
15938c2ecf20Sopenharmony_ci	rbio_orig_end_io(rbio, BLK_STS_IOERR);
15948c2ecf20Sopenharmony_ci
15958c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bio_list)))
15968c2ecf20Sopenharmony_ci		bio_put(bio);
15978c2ecf20Sopenharmony_ci
15988c2ecf20Sopenharmony_ci	return -EIO;
15998c2ecf20Sopenharmony_ci
16008c2ecf20Sopenharmony_cifinish:
16018c2ecf20Sopenharmony_ci	validate_rbio_for_rmw(rbio);
16028c2ecf20Sopenharmony_ci	return 0;
16038c2ecf20Sopenharmony_ci}
16048c2ecf20Sopenharmony_ci
16058c2ecf20Sopenharmony_ci/*
16068c2ecf20Sopenharmony_ci * if the upper layers pass in a full stripe, we thank them by only allocating
16078c2ecf20Sopenharmony_ci * enough pages to hold the parity, and sending it all down quickly.
16088c2ecf20Sopenharmony_ci */
16098c2ecf20Sopenharmony_cistatic int full_stripe_write(struct btrfs_raid_bio *rbio)
16108c2ecf20Sopenharmony_ci{
16118c2ecf20Sopenharmony_ci	int ret;
16128c2ecf20Sopenharmony_ci
16138c2ecf20Sopenharmony_ci	ret = alloc_rbio_parity_pages(rbio);
16148c2ecf20Sopenharmony_ci	if (ret) {
16158c2ecf20Sopenharmony_ci		__free_raid_bio(rbio);
16168c2ecf20Sopenharmony_ci		return ret;
16178c2ecf20Sopenharmony_ci	}
16188c2ecf20Sopenharmony_ci
16198c2ecf20Sopenharmony_ci	ret = lock_stripe_add(rbio);
16208c2ecf20Sopenharmony_ci	if (ret == 0)
16218c2ecf20Sopenharmony_ci		finish_rmw(rbio);
16228c2ecf20Sopenharmony_ci	return 0;
16238c2ecf20Sopenharmony_ci}
16248c2ecf20Sopenharmony_ci
16258c2ecf20Sopenharmony_ci/*
16268c2ecf20Sopenharmony_ci * partial stripe writes get handed over to async helpers.
16278c2ecf20Sopenharmony_ci * We're really hoping to merge a few more writes into this
16288c2ecf20Sopenharmony_ci * rbio before calculating new parity
16298c2ecf20Sopenharmony_ci */
16308c2ecf20Sopenharmony_cistatic int partial_stripe_write(struct btrfs_raid_bio *rbio)
16318c2ecf20Sopenharmony_ci{
16328c2ecf20Sopenharmony_ci	int ret;
16338c2ecf20Sopenharmony_ci
16348c2ecf20Sopenharmony_ci	ret = lock_stripe_add(rbio);
16358c2ecf20Sopenharmony_ci	if (ret == 0)
16368c2ecf20Sopenharmony_ci		start_async_work(rbio, rmw_work);
16378c2ecf20Sopenharmony_ci	return 0;
16388c2ecf20Sopenharmony_ci}
16398c2ecf20Sopenharmony_ci
16408c2ecf20Sopenharmony_ci/*
16418c2ecf20Sopenharmony_ci * sometimes while we were reading from the drive to
16428c2ecf20Sopenharmony_ci * recalculate parity, enough new bios come into create
16438c2ecf20Sopenharmony_ci * a full stripe.  So we do a check here to see if we can
16448c2ecf20Sopenharmony_ci * go directly to finish_rmw
16458c2ecf20Sopenharmony_ci */
16468c2ecf20Sopenharmony_cistatic int __raid56_parity_write(struct btrfs_raid_bio *rbio)
16478c2ecf20Sopenharmony_ci{
16488c2ecf20Sopenharmony_ci	/* head off into rmw land if we don't have a full stripe */
16498c2ecf20Sopenharmony_ci	if (!rbio_is_full(rbio))
16508c2ecf20Sopenharmony_ci		return partial_stripe_write(rbio);
16518c2ecf20Sopenharmony_ci	return full_stripe_write(rbio);
16528c2ecf20Sopenharmony_ci}
16538c2ecf20Sopenharmony_ci
16548c2ecf20Sopenharmony_ci/*
16558c2ecf20Sopenharmony_ci * We use plugging call backs to collect full stripes.
16568c2ecf20Sopenharmony_ci * Any time we get a partial stripe write while plugged
16578c2ecf20Sopenharmony_ci * we collect it into a list.  When the unplug comes down,
16588c2ecf20Sopenharmony_ci * we sort the list by logical block number and merge
16598c2ecf20Sopenharmony_ci * everything we can into the same rbios
16608c2ecf20Sopenharmony_ci */
16618c2ecf20Sopenharmony_cistruct btrfs_plug_cb {
16628c2ecf20Sopenharmony_ci	struct blk_plug_cb cb;
16638c2ecf20Sopenharmony_ci	struct btrfs_fs_info *info;
16648c2ecf20Sopenharmony_ci	struct list_head rbio_list;
16658c2ecf20Sopenharmony_ci	struct btrfs_work work;
16668c2ecf20Sopenharmony_ci};
16678c2ecf20Sopenharmony_ci
16688c2ecf20Sopenharmony_ci/*
16698c2ecf20Sopenharmony_ci * rbios on the plug list are sorted for easier merging.
16708c2ecf20Sopenharmony_ci */
16718c2ecf20Sopenharmony_cistatic int plug_cmp(void *priv, const struct list_head *a,
16728c2ecf20Sopenharmony_ci		    const struct list_head *b)
16738c2ecf20Sopenharmony_ci{
16748c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
16758c2ecf20Sopenharmony_ci						 plug_list);
16768c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
16778c2ecf20Sopenharmony_ci						 plug_list);
16788c2ecf20Sopenharmony_ci	u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
16798c2ecf20Sopenharmony_ci	u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
16808c2ecf20Sopenharmony_ci
16818c2ecf20Sopenharmony_ci	if (a_sector < b_sector)
16828c2ecf20Sopenharmony_ci		return -1;
16838c2ecf20Sopenharmony_ci	if (a_sector > b_sector)
16848c2ecf20Sopenharmony_ci		return 1;
16858c2ecf20Sopenharmony_ci	return 0;
16868c2ecf20Sopenharmony_ci}
16878c2ecf20Sopenharmony_ci
16888c2ecf20Sopenharmony_cistatic void run_plug(struct btrfs_plug_cb *plug)
16898c2ecf20Sopenharmony_ci{
16908c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *cur;
16918c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *last = NULL;
16928c2ecf20Sopenharmony_ci
16938c2ecf20Sopenharmony_ci	/*
16948c2ecf20Sopenharmony_ci	 * sort our plug list then try to merge
16958c2ecf20Sopenharmony_ci	 * everything we can in hopes of creating full
16968c2ecf20Sopenharmony_ci	 * stripes.
16978c2ecf20Sopenharmony_ci	 */
16988c2ecf20Sopenharmony_ci	list_sort(NULL, &plug->rbio_list, plug_cmp);
16998c2ecf20Sopenharmony_ci	while (!list_empty(&plug->rbio_list)) {
17008c2ecf20Sopenharmony_ci		cur = list_entry(plug->rbio_list.next,
17018c2ecf20Sopenharmony_ci				 struct btrfs_raid_bio, plug_list);
17028c2ecf20Sopenharmony_ci		list_del_init(&cur->plug_list);
17038c2ecf20Sopenharmony_ci
17048c2ecf20Sopenharmony_ci		if (rbio_is_full(cur)) {
17058c2ecf20Sopenharmony_ci			int ret;
17068c2ecf20Sopenharmony_ci
17078c2ecf20Sopenharmony_ci			/* we have a full stripe, send it down */
17088c2ecf20Sopenharmony_ci			ret = full_stripe_write(cur);
17098c2ecf20Sopenharmony_ci			BUG_ON(ret);
17108c2ecf20Sopenharmony_ci			continue;
17118c2ecf20Sopenharmony_ci		}
17128c2ecf20Sopenharmony_ci		if (last) {
17138c2ecf20Sopenharmony_ci			if (rbio_can_merge(last, cur)) {
17148c2ecf20Sopenharmony_ci				merge_rbio(last, cur);
17158c2ecf20Sopenharmony_ci				__free_raid_bio(cur);
17168c2ecf20Sopenharmony_ci				continue;
17178c2ecf20Sopenharmony_ci
17188c2ecf20Sopenharmony_ci			}
17198c2ecf20Sopenharmony_ci			__raid56_parity_write(last);
17208c2ecf20Sopenharmony_ci		}
17218c2ecf20Sopenharmony_ci		last = cur;
17228c2ecf20Sopenharmony_ci	}
17238c2ecf20Sopenharmony_ci	if (last) {
17248c2ecf20Sopenharmony_ci		__raid56_parity_write(last);
17258c2ecf20Sopenharmony_ci	}
17268c2ecf20Sopenharmony_ci	kfree(plug);
17278c2ecf20Sopenharmony_ci}
17288c2ecf20Sopenharmony_ci
17298c2ecf20Sopenharmony_ci/*
17308c2ecf20Sopenharmony_ci * if the unplug comes from schedule, we have to push the
17318c2ecf20Sopenharmony_ci * work off to a helper thread
17328c2ecf20Sopenharmony_ci */
17338c2ecf20Sopenharmony_cistatic void unplug_work(struct btrfs_work *work)
17348c2ecf20Sopenharmony_ci{
17358c2ecf20Sopenharmony_ci	struct btrfs_plug_cb *plug;
17368c2ecf20Sopenharmony_ci	plug = container_of(work, struct btrfs_plug_cb, work);
17378c2ecf20Sopenharmony_ci	run_plug(plug);
17388c2ecf20Sopenharmony_ci}
17398c2ecf20Sopenharmony_ci
17408c2ecf20Sopenharmony_cistatic void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
17418c2ecf20Sopenharmony_ci{
17428c2ecf20Sopenharmony_ci	struct btrfs_plug_cb *plug;
17438c2ecf20Sopenharmony_ci	plug = container_of(cb, struct btrfs_plug_cb, cb);
17448c2ecf20Sopenharmony_ci
17458c2ecf20Sopenharmony_ci	if (from_schedule) {
17468c2ecf20Sopenharmony_ci		btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
17478c2ecf20Sopenharmony_ci		btrfs_queue_work(plug->info->rmw_workers,
17488c2ecf20Sopenharmony_ci				 &plug->work);
17498c2ecf20Sopenharmony_ci		return;
17508c2ecf20Sopenharmony_ci	}
17518c2ecf20Sopenharmony_ci	run_plug(plug);
17528c2ecf20Sopenharmony_ci}
17538c2ecf20Sopenharmony_ci
17548c2ecf20Sopenharmony_ci/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
17558c2ecf20Sopenharmony_cistatic void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
17568c2ecf20Sopenharmony_ci{
17578c2ecf20Sopenharmony_ci	const struct btrfs_fs_info *fs_info = rbio->fs_info;
17588c2ecf20Sopenharmony_ci	const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
17598c2ecf20Sopenharmony_ci	const u64 full_stripe_start = rbio->bbio->raid_map[0];
17608c2ecf20Sopenharmony_ci	const u32 orig_len = orig_bio->bi_iter.bi_size;
17618c2ecf20Sopenharmony_ci	const u32 sectorsize = fs_info->sectorsize;
17628c2ecf20Sopenharmony_ci	u64 cur_logical;
17638c2ecf20Sopenharmony_ci
17648c2ecf20Sopenharmony_ci	ASSERT(orig_logical >= full_stripe_start &&
17658c2ecf20Sopenharmony_ci	       orig_logical + orig_len <= full_stripe_start +
17668c2ecf20Sopenharmony_ci	       rbio->nr_data * rbio->stripe_len);
17678c2ecf20Sopenharmony_ci
17688c2ecf20Sopenharmony_ci	bio_list_add(&rbio->bio_list, orig_bio);
17698c2ecf20Sopenharmony_ci	rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
17708c2ecf20Sopenharmony_ci
17718c2ecf20Sopenharmony_ci	/* Update the dbitmap. */
17728c2ecf20Sopenharmony_ci	for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len;
17738c2ecf20Sopenharmony_ci	     cur_logical += sectorsize) {
17748c2ecf20Sopenharmony_ci		int bit = ((u32)(cur_logical - full_stripe_start) >>
17758c2ecf20Sopenharmony_ci			   PAGE_SHIFT) % rbio->stripe_npages;
17768c2ecf20Sopenharmony_ci
17778c2ecf20Sopenharmony_ci		set_bit(bit, rbio->dbitmap);
17788c2ecf20Sopenharmony_ci	}
17798c2ecf20Sopenharmony_ci}
17808c2ecf20Sopenharmony_ci
17818c2ecf20Sopenharmony_ci/*
17828c2ecf20Sopenharmony_ci * our main entry point for writes from the rest of the FS.
17838c2ecf20Sopenharmony_ci */
17848c2ecf20Sopenharmony_ciint raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
17858c2ecf20Sopenharmony_ci			struct btrfs_bio *bbio, u64 stripe_len)
17868c2ecf20Sopenharmony_ci{
17878c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio;
17888c2ecf20Sopenharmony_ci	struct btrfs_plug_cb *plug = NULL;
17898c2ecf20Sopenharmony_ci	struct blk_plug_cb *cb;
17908c2ecf20Sopenharmony_ci	int ret;
17918c2ecf20Sopenharmony_ci
17928c2ecf20Sopenharmony_ci	rbio = alloc_rbio(fs_info, bbio, stripe_len);
17938c2ecf20Sopenharmony_ci	if (IS_ERR(rbio)) {
17948c2ecf20Sopenharmony_ci		btrfs_put_bbio(bbio);
17958c2ecf20Sopenharmony_ci		return PTR_ERR(rbio);
17968c2ecf20Sopenharmony_ci	}
17978c2ecf20Sopenharmony_ci	rbio->operation = BTRFS_RBIO_WRITE;
17988c2ecf20Sopenharmony_ci	rbio_add_bio(rbio, bio);
17998c2ecf20Sopenharmony_ci
18008c2ecf20Sopenharmony_ci	btrfs_bio_counter_inc_noblocked(fs_info);
18018c2ecf20Sopenharmony_ci	rbio->generic_bio_cnt = 1;
18028c2ecf20Sopenharmony_ci
18038c2ecf20Sopenharmony_ci	/*
18048c2ecf20Sopenharmony_ci	 * don't plug on full rbios, just get them out the door
18058c2ecf20Sopenharmony_ci	 * as quickly as we can
18068c2ecf20Sopenharmony_ci	 */
18078c2ecf20Sopenharmony_ci	if (rbio_is_full(rbio)) {
18088c2ecf20Sopenharmony_ci		ret = full_stripe_write(rbio);
18098c2ecf20Sopenharmony_ci		if (ret)
18108c2ecf20Sopenharmony_ci			btrfs_bio_counter_dec(fs_info);
18118c2ecf20Sopenharmony_ci		return ret;
18128c2ecf20Sopenharmony_ci	}
18138c2ecf20Sopenharmony_ci
18148c2ecf20Sopenharmony_ci	cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
18158c2ecf20Sopenharmony_ci	if (cb) {
18168c2ecf20Sopenharmony_ci		plug = container_of(cb, struct btrfs_plug_cb, cb);
18178c2ecf20Sopenharmony_ci		if (!plug->info) {
18188c2ecf20Sopenharmony_ci			plug->info = fs_info;
18198c2ecf20Sopenharmony_ci			INIT_LIST_HEAD(&plug->rbio_list);
18208c2ecf20Sopenharmony_ci		}
18218c2ecf20Sopenharmony_ci		list_add_tail(&rbio->plug_list, &plug->rbio_list);
18228c2ecf20Sopenharmony_ci		ret = 0;
18238c2ecf20Sopenharmony_ci	} else {
18248c2ecf20Sopenharmony_ci		ret = __raid56_parity_write(rbio);
18258c2ecf20Sopenharmony_ci		if (ret)
18268c2ecf20Sopenharmony_ci			btrfs_bio_counter_dec(fs_info);
18278c2ecf20Sopenharmony_ci	}
18288c2ecf20Sopenharmony_ci	return ret;
18298c2ecf20Sopenharmony_ci}
18308c2ecf20Sopenharmony_ci
18318c2ecf20Sopenharmony_ci/*
18328c2ecf20Sopenharmony_ci * all parity reconstruction happens here.  We've read in everything
18338c2ecf20Sopenharmony_ci * we can find from the drives and this does the heavy lifting of
18348c2ecf20Sopenharmony_ci * sorting the good from the bad.
18358c2ecf20Sopenharmony_ci */
18368c2ecf20Sopenharmony_cistatic void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
18378c2ecf20Sopenharmony_ci{
18388c2ecf20Sopenharmony_ci	int pagenr, stripe;
18398c2ecf20Sopenharmony_ci	void **pointers;
18408c2ecf20Sopenharmony_ci	int faila = -1, failb = -1;
18418c2ecf20Sopenharmony_ci	struct page *page;
18428c2ecf20Sopenharmony_ci	blk_status_t err;
18438c2ecf20Sopenharmony_ci	int i;
18448c2ecf20Sopenharmony_ci
18458c2ecf20Sopenharmony_ci	pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
18468c2ecf20Sopenharmony_ci	if (!pointers) {
18478c2ecf20Sopenharmony_ci		err = BLK_STS_RESOURCE;
18488c2ecf20Sopenharmony_ci		goto cleanup_io;
18498c2ecf20Sopenharmony_ci	}
18508c2ecf20Sopenharmony_ci
18518c2ecf20Sopenharmony_ci	faila = rbio->faila;
18528c2ecf20Sopenharmony_ci	failb = rbio->failb;
18538c2ecf20Sopenharmony_ci
18548c2ecf20Sopenharmony_ci	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
18558c2ecf20Sopenharmony_ci	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
18568c2ecf20Sopenharmony_ci		spin_lock_irq(&rbio->bio_list_lock);
18578c2ecf20Sopenharmony_ci		set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
18588c2ecf20Sopenharmony_ci		spin_unlock_irq(&rbio->bio_list_lock);
18598c2ecf20Sopenharmony_ci	}
18608c2ecf20Sopenharmony_ci
18618c2ecf20Sopenharmony_ci	index_rbio_pages(rbio);
18628c2ecf20Sopenharmony_ci
18638c2ecf20Sopenharmony_ci	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
18648c2ecf20Sopenharmony_ci		/*
18658c2ecf20Sopenharmony_ci		 * Now we just use bitmap to mark the horizontal stripes in
18668c2ecf20Sopenharmony_ci		 * which we have data when doing parity scrub.
18678c2ecf20Sopenharmony_ci		 */
18688c2ecf20Sopenharmony_ci		if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
18698c2ecf20Sopenharmony_ci		    !test_bit(pagenr, rbio->dbitmap))
18708c2ecf20Sopenharmony_ci			continue;
18718c2ecf20Sopenharmony_ci
18728c2ecf20Sopenharmony_ci		/* setup our array of pointers with pages
18738c2ecf20Sopenharmony_ci		 * from each stripe
18748c2ecf20Sopenharmony_ci		 */
18758c2ecf20Sopenharmony_ci		for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
18768c2ecf20Sopenharmony_ci			/*
18778c2ecf20Sopenharmony_ci			 * if we're rebuilding a read, we have to use
18788c2ecf20Sopenharmony_ci			 * pages from the bio list
18798c2ecf20Sopenharmony_ci			 */
18808c2ecf20Sopenharmony_ci			if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
18818c2ecf20Sopenharmony_ci			     rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
18828c2ecf20Sopenharmony_ci			    (stripe == faila || stripe == failb)) {
18838c2ecf20Sopenharmony_ci				page = page_in_rbio(rbio, stripe, pagenr, 0);
18848c2ecf20Sopenharmony_ci			} else {
18858c2ecf20Sopenharmony_ci				page = rbio_stripe_page(rbio, stripe, pagenr);
18868c2ecf20Sopenharmony_ci			}
18878c2ecf20Sopenharmony_ci			pointers[stripe] = kmap(page);
18888c2ecf20Sopenharmony_ci		}
18898c2ecf20Sopenharmony_ci
18908c2ecf20Sopenharmony_ci		/* all raid6 handling here */
18918c2ecf20Sopenharmony_ci		if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
18928c2ecf20Sopenharmony_ci			/*
18938c2ecf20Sopenharmony_ci			 * single failure, rebuild from parity raid5
18948c2ecf20Sopenharmony_ci			 * style
18958c2ecf20Sopenharmony_ci			 */
18968c2ecf20Sopenharmony_ci			if (failb < 0) {
18978c2ecf20Sopenharmony_ci				if (faila == rbio->nr_data) {
18988c2ecf20Sopenharmony_ci					/*
18998c2ecf20Sopenharmony_ci					 * Just the P stripe has failed, without
19008c2ecf20Sopenharmony_ci					 * a bad data or Q stripe.
19018c2ecf20Sopenharmony_ci					 * TODO, we should redo the xor here.
19028c2ecf20Sopenharmony_ci					 */
19038c2ecf20Sopenharmony_ci					err = BLK_STS_IOERR;
19048c2ecf20Sopenharmony_ci					goto cleanup;
19058c2ecf20Sopenharmony_ci				}
19068c2ecf20Sopenharmony_ci				/*
19078c2ecf20Sopenharmony_ci				 * a single failure in raid6 is rebuilt
19088c2ecf20Sopenharmony_ci				 * in the pstripe code below
19098c2ecf20Sopenharmony_ci				 */
19108c2ecf20Sopenharmony_ci				goto pstripe;
19118c2ecf20Sopenharmony_ci			}
19128c2ecf20Sopenharmony_ci
19138c2ecf20Sopenharmony_ci			/* make sure our ps and qs are in order */
19148c2ecf20Sopenharmony_ci			if (faila > failb)
19158c2ecf20Sopenharmony_ci				swap(faila, failb);
19168c2ecf20Sopenharmony_ci
19178c2ecf20Sopenharmony_ci			/* if the q stripe is failed, do a pstripe reconstruction
19188c2ecf20Sopenharmony_ci			 * from the xors.
19198c2ecf20Sopenharmony_ci			 * If both the q stripe and the P stripe are failed, we're
19208c2ecf20Sopenharmony_ci			 * here due to a crc mismatch and we can't give them the
19218c2ecf20Sopenharmony_ci			 * data they want
19228c2ecf20Sopenharmony_ci			 */
19238c2ecf20Sopenharmony_ci			if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
19248c2ecf20Sopenharmony_ci				if (rbio->bbio->raid_map[faila] ==
19258c2ecf20Sopenharmony_ci				    RAID5_P_STRIPE) {
19268c2ecf20Sopenharmony_ci					err = BLK_STS_IOERR;
19278c2ecf20Sopenharmony_ci					goto cleanup;
19288c2ecf20Sopenharmony_ci				}
19298c2ecf20Sopenharmony_ci				/*
19308c2ecf20Sopenharmony_ci				 * otherwise we have one bad data stripe and
19318c2ecf20Sopenharmony_ci				 * a good P stripe.  raid5!
19328c2ecf20Sopenharmony_ci				 */
19338c2ecf20Sopenharmony_ci				goto pstripe;
19348c2ecf20Sopenharmony_ci			}
19358c2ecf20Sopenharmony_ci
19368c2ecf20Sopenharmony_ci			if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
19378c2ecf20Sopenharmony_ci				raid6_datap_recov(rbio->real_stripes,
19388c2ecf20Sopenharmony_ci						  PAGE_SIZE, faila, pointers);
19398c2ecf20Sopenharmony_ci			} else {
19408c2ecf20Sopenharmony_ci				raid6_2data_recov(rbio->real_stripes,
19418c2ecf20Sopenharmony_ci						  PAGE_SIZE, faila, failb,
19428c2ecf20Sopenharmony_ci						  pointers);
19438c2ecf20Sopenharmony_ci			}
19448c2ecf20Sopenharmony_ci		} else {
19458c2ecf20Sopenharmony_ci			void *p;
19468c2ecf20Sopenharmony_ci
19478c2ecf20Sopenharmony_ci			/* rebuild from P stripe here (raid5 or raid6) */
19488c2ecf20Sopenharmony_ci			BUG_ON(failb != -1);
19498c2ecf20Sopenharmony_cipstripe:
19508c2ecf20Sopenharmony_ci			/* Copy parity block into failed block to start with */
19518c2ecf20Sopenharmony_ci			copy_page(pointers[faila], pointers[rbio->nr_data]);
19528c2ecf20Sopenharmony_ci
19538c2ecf20Sopenharmony_ci			/* rearrange the pointer array */
19548c2ecf20Sopenharmony_ci			p = pointers[faila];
19558c2ecf20Sopenharmony_ci			for (stripe = faila; stripe < rbio->nr_data - 1; stripe++)
19568c2ecf20Sopenharmony_ci				pointers[stripe] = pointers[stripe + 1];
19578c2ecf20Sopenharmony_ci			pointers[rbio->nr_data - 1] = p;
19588c2ecf20Sopenharmony_ci
19598c2ecf20Sopenharmony_ci			/* xor in the rest */
19608c2ecf20Sopenharmony_ci			run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE);
19618c2ecf20Sopenharmony_ci		}
19628c2ecf20Sopenharmony_ci		/* if we're doing this rebuild as part of an rmw, go through
19638c2ecf20Sopenharmony_ci		 * and set all of our private rbio pages in the
19648c2ecf20Sopenharmony_ci		 * failed stripes as uptodate.  This way finish_rmw will
19658c2ecf20Sopenharmony_ci		 * know they can be trusted.  If this was a read reconstruction,
19668c2ecf20Sopenharmony_ci		 * other endio functions will fiddle the uptodate bits
19678c2ecf20Sopenharmony_ci		 */
19688c2ecf20Sopenharmony_ci		if (rbio->operation == BTRFS_RBIO_WRITE) {
19698c2ecf20Sopenharmony_ci			for (i = 0;  i < rbio->stripe_npages; i++) {
19708c2ecf20Sopenharmony_ci				if (faila != -1) {
19718c2ecf20Sopenharmony_ci					page = rbio_stripe_page(rbio, faila, i);
19728c2ecf20Sopenharmony_ci					SetPageUptodate(page);
19738c2ecf20Sopenharmony_ci				}
19748c2ecf20Sopenharmony_ci				if (failb != -1) {
19758c2ecf20Sopenharmony_ci					page = rbio_stripe_page(rbio, failb, i);
19768c2ecf20Sopenharmony_ci					SetPageUptodate(page);
19778c2ecf20Sopenharmony_ci				}
19788c2ecf20Sopenharmony_ci			}
19798c2ecf20Sopenharmony_ci		}
19808c2ecf20Sopenharmony_ci		for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
19818c2ecf20Sopenharmony_ci			/*
19828c2ecf20Sopenharmony_ci			 * if we're rebuilding a read, we have to use
19838c2ecf20Sopenharmony_ci			 * pages from the bio list
19848c2ecf20Sopenharmony_ci			 */
19858c2ecf20Sopenharmony_ci			if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
19868c2ecf20Sopenharmony_ci			     rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
19878c2ecf20Sopenharmony_ci			    (stripe == faila || stripe == failb)) {
19888c2ecf20Sopenharmony_ci				page = page_in_rbio(rbio, stripe, pagenr, 0);
19898c2ecf20Sopenharmony_ci			} else {
19908c2ecf20Sopenharmony_ci				page = rbio_stripe_page(rbio, stripe, pagenr);
19918c2ecf20Sopenharmony_ci			}
19928c2ecf20Sopenharmony_ci			kunmap(page);
19938c2ecf20Sopenharmony_ci		}
19948c2ecf20Sopenharmony_ci	}
19958c2ecf20Sopenharmony_ci
19968c2ecf20Sopenharmony_ci	err = BLK_STS_OK;
19978c2ecf20Sopenharmony_cicleanup:
19988c2ecf20Sopenharmony_ci	kfree(pointers);
19998c2ecf20Sopenharmony_ci
20008c2ecf20Sopenharmony_cicleanup_io:
20018c2ecf20Sopenharmony_ci	/*
20028c2ecf20Sopenharmony_ci	 * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a
20038c2ecf20Sopenharmony_ci	 * valid rbio which is consistent with ondisk content, thus such a
20048c2ecf20Sopenharmony_ci	 * valid rbio can be cached to avoid further disk reads.
20058c2ecf20Sopenharmony_ci	 */
20068c2ecf20Sopenharmony_ci	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
20078c2ecf20Sopenharmony_ci	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
20088c2ecf20Sopenharmony_ci		/*
20098c2ecf20Sopenharmony_ci		 * - In case of two failures, where rbio->failb != -1:
20108c2ecf20Sopenharmony_ci		 *
20118c2ecf20Sopenharmony_ci		 *   Do not cache this rbio since the above read reconstruction
20128c2ecf20Sopenharmony_ci		 *   (raid6_datap_recov() or raid6_2data_recov()) may have
20138c2ecf20Sopenharmony_ci		 *   changed some content of stripes which are not identical to
20148c2ecf20Sopenharmony_ci		 *   on-disk content any more, otherwise, a later write/recover
20158c2ecf20Sopenharmony_ci		 *   may steal stripe_pages from this rbio and end up with
20168c2ecf20Sopenharmony_ci		 *   corruptions or rebuild failures.
20178c2ecf20Sopenharmony_ci		 *
20188c2ecf20Sopenharmony_ci		 * - In case of single failure, where rbio->failb == -1:
20198c2ecf20Sopenharmony_ci		 *
20208c2ecf20Sopenharmony_ci		 *   Cache this rbio iff the above read reconstruction is
20218c2ecf20Sopenharmony_ci		 *   executed without problems.
20228c2ecf20Sopenharmony_ci		 */
20238c2ecf20Sopenharmony_ci		if (err == BLK_STS_OK && rbio->failb < 0)
20248c2ecf20Sopenharmony_ci			cache_rbio_pages(rbio);
20258c2ecf20Sopenharmony_ci		else
20268c2ecf20Sopenharmony_ci			clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
20278c2ecf20Sopenharmony_ci
20288c2ecf20Sopenharmony_ci		rbio_orig_end_io(rbio, err);
20298c2ecf20Sopenharmony_ci	} else if (err == BLK_STS_OK) {
20308c2ecf20Sopenharmony_ci		rbio->faila = -1;
20318c2ecf20Sopenharmony_ci		rbio->failb = -1;
20328c2ecf20Sopenharmony_ci
20338c2ecf20Sopenharmony_ci		if (rbio->operation == BTRFS_RBIO_WRITE)
20348c2ecf20Sopenharmony_ci			finish_rmw(rbio);
20358c2ecf20Sopenharmony_ci		else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
20368c2ecf20Sopenharmony_ci			finish_parity_scrub(rbio, 0);
20378c2ecf20Sopenharmony_ci		else
20388c2ecf20Sopenharmony_ci			BUG();
20398c2ecf20Sopenharmony_ci	} else {
20408c2ecf20Sopenharmony_ci		rbio_orig_end_io(rbio, err);
20418c2ecf20Sopenharmony_ci	}
20428c2ecf20Sopenharmony_ci}
20438c2ecf20Sopenharmony_ci
20448c2ecf20Sopenharmony_ci/*
20458c2ecf20Sopenharmony_ci * This is called only for stripes we've read from disk to
20468c2ecf20Sopenharmony_ci * reconstruct the parity.
20478c2ecf20Sopenharmony_ci */
20488c2ecf20Sopenharmony_cistatic void raid_recover_end_io(struct bio *bio)
20498c2ecf20Sopenharmony_ci{
20508c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio = bio->bi_private;
20518c2ecf20Sopenharmony_ci
20528c2ecf20Sopenharmony_ci	/*
20538c2ecf20Sopenharmony_ci	 * we only read stripe pages off the disk, set them
20548c2ecf20Sopenharmony_ci	 * up to date if there were no errors
20558c2ecf20Sopenharmony_ci	 */
20568c2ecf20Sopenharmony_ci	if (bio->bi_status)
20578c2ecf20Sopenharmony_ci		fail_bio_stripe(rbio, bio);
20588c2ecf20Sopenharmony_ci	else
20598c2ecf20Sopenharmony_ci		set_bio_pages_uptodate(bio);
20608c2ecf20Sopenharmony_ci	bio_put(bio);
20618c2ecf20Sopenharmony_ci
20628c2ecf20Sopenharmony_ci	if (!atomic_dec_and_test(&rbio->stripes_pending))
20638c2ecf20Sopenharmony_ci		return;
20648c2ecf20Sopenharmony_ci
20658c2ecf20Sopenharmony_ci	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
20668c2ecf20Sopenharmony_ci		rbio_orig_end_io(rbio, BLK_STS_IOERR);
20678c2ecf20Sopenharmony_ci	else
20688c2ecf20Sopenharmony_ci		__raid_recover_end_io(rbio);
20698c2ecf20Sopenharmony_ci}
20708c2ecf20Sopenharmony_ci
20718c2ecf20Sopenharmony_ci/*
20728c2ecf20Sopenharmony_ci * reads everything we need off the disk to reconstruct
20738c2ecf20Sopenharmony_ci * the parity. endio handlers trigger final reconstruction
20748c2ecf20Sopenharmony_ci * when the IO is done.
20758c2ecf20Sopenharmony_ci *
20768c2ecf20Sopenharmony_ci * This is used both for reads from the higher layers and for
20778c2ecf20Sopenharmony_ci * parity construction required to finish a rmw cycle.
20788c2ecf20Sopenharmony_ci */
20798c2ecf20Sopenharmony_cistatic int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
20808c2ecf20Sopenharmony_ci{
20818c2ecf20Sopenharmony_ci	int bios_to_read = 0;
20828c2ecf20Sopenharmony_ci	struct bio_list bio_list;
20838c2ecf20Sopenharmony_ci	int ret;
20848c2ecf20Sopenharmony_ci	int pagenr;
20858c2ecf20Sopenharmony_ci	int stripe;
20868c2ecf20Sopenharmony_ci	struct bio *bio;
20878c2ecf20Sopenharmony_ci
20888c2ecf20Sopenharmony_ci	bio_list_init(&bio_list);
20898c2ecf20Sopenharmony_ci
20908c2ecf20Sopenharmony_ci	ret = alloc_rbio_pages(rbio);
20918c2ecf20Sopenharmony_ci	if (ret)
20928c2ecf20Sopenharmony_ci		goto cleanup;
20938c2ecf20Sopenharmony_ci
20948c2ecf20Sopenharmony_ci	atomic_set(&rbio->error, 0);
20958c2ecf20Sopenharmony_ci
20968c2ecf20Sopenharmony_ci	/*
20978c2ecf20Sopenharmony_ci	 * Read everything that hasn't failed. However this time we will
20988c2ecf20Sopenharmony_ci	 * not trust any cached sector.
20998c2ecf20Sopenharmony_ci	 * As we may read out some stale data but higher layer is not reading
21008c2ecf20Sopenharmony_ci	 * that stale part.
21018c2ecf20Sopenharmony_ci	 *
21028c2ecf20Sopenharmony_ci	 * So here we always re-read everything in recovery path.
21038c2ecf20Sopenharmony_ci	 */
21048c2ecf20Sopenharmony_ci	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
21058c2ecf20Sopenharmony_ci		if (rbio->faila == stripe || rbio->failb == stripe) {
21068c2ecf20Sopenharmony_ci			atomic_inc(&rbio->error);
21078c2ecf20Sopenharmony_ci			continue;
21088c2ecf20Sopenharmony_ci		}
21098c2ecf20Sopenharmony_ci
21108c2ecf20Sopenharmony_ci		for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
21118c2ecf20Sopenharmony_ci			ret = rbio_add_io_page(rbio, &bio_list,
21128c2ecf20Sopenharmony_ci				       rbio_stripe_page(rbio, stripe, pagenr),
21138c2ecf20Sopenharmony_ci				       stripe, pagenr, rbio->stripe_len);
21148c2ecf20Sopenharmony_ci			if (ret < 0)
21158c2ecf20Sopenharmony_ci				goto cleanup;
21168c2ecf20Sopenharmony_ci		}
21178c2ecf20Sopenharmony_ci	}
21188c2ecf20Sopenharmony_ci
21198c2ecf20Sopenharmony_ci	bios_to_read = bio_list_size(&bio_list);
21208c2ecf20Sopenharmony_ci	if (!bios_to_read) {
21218c2ecf20Sopenharmony_ci		/*
21228c2ecf20Sopenharmony_ci		 * we might have no bios to read just because the pages
21238c2ecf20Sopenharmony_ci		 * were up to date, or we might have no bios to read because
21248c2ecf20Sopenharmony_ci		 * the devices were gone.
21258c2ecf20Sopenharmony_ci		 */
21268c2ecf20Sopenharmony_ci		if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
21278c2ecf20Sopenharmony_ci			__raid_recover_end_io(rbio);
21288c2ecf20Sopenharmony_ci			return 0;
21298c2ecf20Sopenharmony_ci		} else {
21308c2ecf20Sopenharmony_ci			goto cleanup;
21318c2ecf20Sopenharmony_ci		}
21328c2ecf20Sopenharmony_ci	}
21338c2ecf20Sopenharmony_ci
21348c2ecf20Sopenharmony_ci	/*
21358c2ecf20Sopenharmony_ci	 * the bbio may be freed once we submit the last bio.  Make sure
21368c2ecf20Sopenharmony_ci	 * not to touch it after that
21378c2ecf20Sopenharmony_ci	 */
21388c2ecf20Sopenharmony_ci	atomic_set(&rbio->stripes_pending, bios_to_read);
21398c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bio_list))) {
21408c2ecf20Sopenharmony_ci		bio->bi_private = rbio;
21418c2ecf20Sopenharmony_ci		bio->bi_end_io = raid_recover_end_io;
21428c2ecf20Sopenharmony_ci		bio->bi_opf = REQ_OP_READ;
21438c2ecf20Sopenharmony_ci
21448c2ecf20Sopenharmony_ci		btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
21458c2ecf20Sopenharmony_ci
21468c2ecf20Sopenharmony_ci		submit_bio(bio);
21478c2ecf20Sopenharmony_ci	}
21488c2ecf20Sopenharmony_ci
21498c2ecf20Sopenharmony_ci	return 0;
21508c2ecf20Sopenharmony_ci
21518c2ecf20Sopenharmony_cicleanup:
21528c2ecf20Sopenharmony_ci	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
21538c2ecf20Sopenharmony_ci	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
21548c2ecf20Sopenharmony_ci		rbio_orig_end_io(rbio, BLK_STS_IOERR);
21558c2ecf20Sopenharmony_ci
21568c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bio_list)))
21578c2ecf20Sopenharmony_ci		bio_put(bio);
21588c2ecf20Sopenharmony_ci
21598c2ecf20Sopenharmony_ci	return -EIO;
21608c2ecf20Sopenharmony_ci}
21618c2ecf20Sopenharmony_ci
21628c2ecf20Sopenharmony_ci/*
21638c2ecf20Sopenharmony_ci * the main entry point for reads from the higher layers.  This
21648c2ecf20Sopenharmony_ci * is really only called when the normal read path had a failure,
21658c2ecf20Sopenharmony_ci * so we assume the bio they send down corresponds to a failed part
21668c2ecf20Sopenharmony_ci * of the drive.
21678c2ecf20Sopenharmony_ci */
21688c2ecf20Sopenharmony_ciint raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
21698c2ecf20Sopenharmony_ci			  struct btrfs_bio *bbio, u64 stripe_len,
21708c2ecf20Sopenharmony_ci			  int mirror_num, int generic_io)
21718c2ecf20Sopenharmony_ci{
21728c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio;
21738c2ecf20Sopenharmony_ci	int ret;
21748c2ecf20Sopenharmony_ci
21758c2ecf20Sopenharmony_ci	if (generic_io) {
21768c2ecf20Sopenharmony_ci		ASSERT(bbio->mirror_num == mirror_num);
21778c2ecf20Sopenharmony_ci		btrfs_io_bio(bio)->mirror_num = mirror_num;
21788c2ecf20Sopenharmony_ci	}
21798c2ecf20Sopenharmony_ci
21808c2ecf20Sopenharmony_ci	rbio = alloc_rbio(fs_info, bbio, stripe_len);
21818c2ecf20Sopenharmony_ci	if (IS_ERR(rbio)) {
21828c2ecf20Sopenharmony_ci		if (generic_io)
21838c2ecf20Sopenharmony_ci			btrfs_put_bbio(bbio);
21848c2ecf20Sopenharmony_ci		return PTR_ERR(rbio);
21858c2ecf20Sopenharmony_ci	}
21868c2ecf20Sopenharmony_ci
21878c2ecf20Sopenharmony_ci	rbio->operation = BTRFS_RBIO_READ_REBUILD;
21888c2ecf20Sopenharmony_ci	rbio_add_bio(rbio, bio);
21898c2ecf20Sopenharmony_ci
21908c2ecf20Sopenharmony_ci	rbio->faila = find_logical_bio_stripe(rbio, bio);
21918c2ecf20Sopenharmony_ci	if (rbio->faila == -1) {
21928c2ecf20Sopenharmony_ci		btrfs_warn(fs_info,
21938c2ecf20Sopenharmony_ci	"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)",
21948c2ecf20Sopenharmony_ci			   __func__, (u64)bio->bi_iter.bi_sector << 9,
21958c2ecf20Sopenharmony_ci			   (u64)bio->bi_iter.bi_size, bbio->map_type);
21968c2ecf20Sopenharmony_ci		if (generic_io)
21978c2ecf20Sopenharmony_ci			btrfs_put_bbio(bbio);
21988c2ecf20Sopenharmony_ci		kfree(rbio);
21998c2ecf20Sopenharmony_ci		return -EIO;
22008c2ecf20Sopenharmony_ci	}
22018c2ecf20Sopenharmony_ci
22028c2ecf20Sopenharmony_ci	if (generic_io) {
22038c2ecf20Sopenharmony_ci		btrfs_bio_counter_inc_noblocked(fs_info);
22048c2ecf20Sopenharmony_ci		rbio->generic_bio_cnt = 1;
22058c2ecf20Sopenharmony_ci	} else {
22068c2ecf20Sopenharmony_ci		btrfs_get_bbio(bbio);
22078c2ecf20Sopenharmony_ci	}
22088c2ecf20Sopenharmony_ci
22098c2ecf20Sopenharmony_ci	/*
22108c2ecf20Sopenharmony_ci	 * Loop retry:
22118c2ecf20Sopenharmony_ci	 * for 'mirror == 2', reconstruct from all other stripes.
22128c2ecf20Sopenharmony_ci	 * for 'mirror_num > 2', select a stripe to fail on every retry.
22138c2ecf20Sopenharmony_ci	 */
22148c2ecf20Sopenharmony_ci	if (mirror_num > 2) {
22158c2ecf20Sopenharmony_ci		/*
22168c2ecf20Sopenharmony_ci		 * 'mirror == 3' is to fail the p stripe and
22178c2ecf20Sopenharmony_ci		 * reconstruct from the q stripe.  'mirror > 3' is to
22188c2ecf20Sopenharmony_ci		 * fail a data stripe and reconstruct from p+q stripe.
22198c2ecf20Sopenharmony_ci		 */
22208c2ecf20Sopenharmony_ci		rbio->failb = rbio->real_stripes - (mirror_num - 1);
22218c2ecf20Sopenharmony_ci		ASSERT(rbio->failb > 0);
22228c2ecf20Sopenharmony_ci		if (rbio->failb <= rbio->faila)
22238c2ecf20Sopenharmony_ci			rbio->failb--;
22248c2ecf20Sopenharmony_ci	}
22258c2ecf20Sopenharmony_ci
22268c2ecf20Sopenharmony_ci	ret = lock_stripe_add(rbio);
22278c2ecf20Sopenharmony_ci
22288c2ecf20Sopenharmony_ci	/*
22298c2ecf20Sopenharmony_ci	 * __raid56_parity_recover will end the bio with
22308c2ecf20Sopenharmony_ci	 * any errors it hits.  We don't want to return
22318c2ecf20Sopenharmony_ci	 * its error value up the stack because our caller
22328c2ecf20Sopenharmony_ci	 * will end up calling bio_endio with any nonzero
22338c2ecf20Sopenharmony_ci	 * return
22348c2ecf20Sopenharmony_ci	 */
22358c2ecf20Sopenharmony_ci	if (ret == 0)
22368c2ecf20Sopenharmony_ci		__raid56_parity_recover(rbio);
22378c2ecf20Sopenharmony_ci	/*
22388c2ecf20Sopenharmony_ci	 * our rbio has been added to the list of
22398c2ecf20Sopenharmony_ci	 * rbios that will be handled after the
22408c2ecf20Sopenharmony_ci	 * currently lock owner is done
22418c2ecf20Sopenharmony_ci	 */
22428c2ecf20Sopenharmony_ci	return 0;
22438c2ecf20Sopenharmony_ci
22448c2ecf20Sopenharmony_ci}
22458c2ecf20Sopenharmony_ci
22468c2ecf20Sopenharmony_cistatic void rmw_work(struct btrfs_work *work)
22478c2ecf20Sopenharmony_ci{
22488c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio;
22498c2ecf20Sopenharmony_ci
22508c2ecf20Sopenharmony_ci	rbio = container_of(work, struct btrfs_raid_bio, work);
22518c2ecf20Sopenharmony_ci	raid56_rmw_stripe(rbio);
22528c2ecf20Sopenharmony_ci}
22538c2ecf20Sopenharmony_ci
22548c2ecf20Sopenharmony_cistatic void read_rebuild_work(struct btrfs_work *work)
22558c2ecf20Sopenharmony_ci{
22568c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio;
22578c2ecf20Sopenharmony_ci
22588c2ecf20Sopenharmony_ci	rbio = container_of(work, struct btrfs_raid_bio, work);
22598c2ecf20Sopenharmony_ci	__raid56_parity_recover(rbio);
22608c2ecf20Sopenharmony_ci}
22618c2ecf20Sopenharmony_ci
22628c2ecf20Sopenharmony_ci/*
22638c2ecf20Sopenharmony_ci * The following code is used to scrub/replace the parity stripe
22648c2ecf20Sopenharmony_ci *
22658c2ecf20Sopenharmony_ci * Caller must have already increased bio_counter for getting @bbio.
22668c2ecf20Sopenharmony_ci *
22678c2ecf20Sopenharmony_ci * Note: We need make sure all the pages that add into the scrub/replace
22688c2ecf20Sopenharmony_ci * raid bio are correct and not be changed during the scrub/replace. That
22698c2ecf20Sopenharmony_ci * is those pages just hold metadata or file data with checksum.
22708c2ecf20Sopenharmony_ci */
22718c2ecf20Sopenharmony_ci
22728c2ecf20Sopenharmony_cistruct btrfs_raid_bio *
22738c2ecf20Sopenharmony_ciraid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
22748c2ecf20Sopenharmony_ci			       struct btrfs_bio *bbio, u64 stripe_len,
22758c2ecf20Sopenharmony_ci			       struct btrfs_device *scrub_dev,
22768c2ecf20Sopenharmony_ci			       unsigned long *dbitmap, int stripe_nsectors)
22778c2ecf20Sopenharmony_ci{
22788c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio;
22798c2ecf20Sopenharmony_ci	int i;
22808c2ecf20Sopenharmony_ci
22818c2ecf20Sopenharmony_ci	rbio = alloc_rbio(fs_info, bbio, stripe_len);
22828c2ecf20Sopenharmony_ci	if (IS_ERR(rbio))
22838c2ecf20Sopenharmony_ci		return NULL;
22848c2ecf20Sopenharmony_ci	bio_list_add(&rbio->bio_list, bio);
22858c2ecf20Sopenharmony_ci	/*
22868c2ecf20Sopenharmony_ci	 * This is a special bio which is used to hold the completion handler
22878c2ecf20Sopenharmony_ci	 * and make the scrub rbio is similar to the other types
22888c2ecf20Sopenharmony_ci	 */
22898c2ecf20Sopenharmony_ci	ASSERT(!bio->bi_iter.bi_size);
22908c2ecf20Sopenharmony_ci	rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
22918c2ecf20Sopenharmony_ci
22928c2ecf20Sopenharmony_ci	/*
22938c2ecf20Sopenharmony_ci	 * After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted
22948c2ecf20Sopenharmony_ci	 * to the end position, so this search can start from the first parity
22958c2ecf20Sopenharmony_ci	 * stripe.
22968c2ecf20Sopenharmony_ci	 */
22978c2ecf20Sopenharmony_ci	for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
22988c2ecf20Sopenharmony_ci		if (bbio->stripes[i].dev == scrub_dev) {
22998c2ecf20Sopenharmony_ci			rbio->scrubp = i;
23008c2ecf20Sopenharmony_ci			break;
23018c2ecf20Sopenharmony_ci		}
23028c2ecf20Sopenharmony_ci	}
23038c2ecf20Sopenharmony_ci	ASSERT(i < rbio->real_stripes);
23048c2ecf20Sopenharmony_ci
23058c2ecf20Sopenharmony_ci	/* Now we just support the sectorsize equals to page size */
23068c2ecf20Sopenharmony_ci	ASSERT(fs_info->sectorsize == PAGE_SIZE);
23078c2ecf20Sopenharmony_ci	ASSERT(rbio->stripe_npages == stripe_nsectors);
23088c2ecf20Sopenharmony_ci	bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
23098c2ecf20Sopenharmony_ci
23108c2ecf20Sopenharmony_ci	/*
23118c2ecf20Sopenharmony_ci	 * We have already increased bio_counter when getting bbio, record it
23128c2ecf20Sopenharmony_ci	 * so we can free it at rbio_orig_end_io().
23138c2ecf20Sopenharmony_ci	 */
23148c2ecf20Sopenharmony_ci	rbio->generic_bio_cnt = 1;
23158c2ecf20Sopenharmony_ci
23168c2ecf20Sopenharmony_ci	return rbio;
23178c2ecf20Sopenharmony_ci}
23188c2ecf20Sopenharmony_ci
23198c2ecf20Sopenharmony_ci/* Used for both parity scrub and missing. */
23208c2ecf20Sopenharmony_civoid raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
23218c2ecf20Sopenharmony_ci			    u64 logical)
23228c2ecf20Sopenharmony_ci{
23238c2ecf20Sopenharmony_ci	int stripe_offset;
23248c2ecf20Sopenharmony_ci	int index;
23258c2ecf20Sopenharmony_ci
23268c2ecf20Sopenharmony_ci	ASSERT(logical >= rbio->bbio->raid_map[0]);
23278c2ecf20Sopenharmony_ci	ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
23288c2ecf20Sopenharmony_ci				rbio->stripe_len * rbio->nr_data);
23298c2ecf20Sopenharmony_ci	stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
23308c2ecf20Sopenharmony_ci	index = stripe_offset >> PAGE_SHIFT;
23318c2ecf20Sopenharmony_ci	rbio->bio_pages[index] = page;
23328c2ecf20Sopenharmony_ci}
23338c2ecf20Sopenharmony_ci
23348c2ecf20Sopenharmony_ci/*
23358c2ecf20Sopenharmony_ci * We just scrub the parity that we have correct data on the same horizontal,
23368c2ecf20Sopenharmony_ci * so we needn't allocate all pages for all the stripes.
23378c2ecf20Sopenharmony_ci */
23388c2ecf20Sopenharmony_cistatic int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
23398c2ecf20Sopenharmony_ci{
23408c2ecf20Sopenharmony_ci	int i;
23418c2ecf20Sopenharmony_ci	int bit;
23428c2ecf20Sopenharmony_ci	int index;
23438c2ecf20Sopenharmony_ci	struct page *page;
23448c2ecf20Sopenharmony_ci
23458c2ecf20Sopenharmony_ci	for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) {
23468c2ecf20Sopenharmony_ci		for (i = 0; i < rbio->real_stripes; i++) {
23478c2ecf20Sopenharmony_ci			index = i * rbio->stripe_npages + bit;
23488c2ecf20Sopenharmony_ci			if (rbio->stripe_pages[index])
23498c2ecf20Sopenharmony_ci				continue;
23508c2ecf20Sopenharmony_ci
23518c2ecf20Sopenharmony_ci			page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
23528c2ecf20Sopenharmony_ci			if (!page)
23538c2ecf20Sopenharmony_ci				return -ENOMEM;
23548c2ecf20Sopenharmony_ci			rbio->stripe_pages[index] = page;
23558c2ecf20Sopenharmony_ci		}
23568c2ecf20Sopenharmony_ci	}
23578c2ecf20Sopenharmony_ci	return 0;
23588c2ecf20Sopenharmony_ci}
23598c2ecf20Sopenharmony_ci
23608c2ecf20Sopenharmony_cistatic noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
23618c2ecf20Sopenharmony_ci					 int need_check)
23628c2ecf20Sopenharmony_ci{
23638c2ecf20Sopenharmony_ci	struct btrfs_bio *bbio = rbio->bbio;
23648c2ecf20Sopenharmony_ci	void **pointers = rbio->finish_pointers;
23658c2ecf20Sopenharmony_ci	unsigned long *pbitmap = rbio->finish_pbitmap;
23668c2ecf20Sopenharmony_ci	int nr_data = rbio->nr_data;
23678c2ecf20Sopenharmony_ci	int stripe;
23688c2ecf20Sopenharmony_ci	int pagenr;
23698c2ecf20Sopenharmony_ci	bool has_qstripe;
23708c2ecf20Sopenharmony_ci	struct page *p_page = NULL;
23718c2ecf20Sopenharmony_ci	struct page *q_page = NULL;
23728c2ecf20Sopenharmony_ci	struct bio_list bio_list;
23738c2ecf20Sopenharmony_ci	struct bio *bio;
23748c2ecf20Sopenharmony_ci	int is_replace = 0;
23758c2ecf20Sopenharmony_ci	int ret;
23768c2ecf20Sopenharmony_ci
23778c2ecf20Sopenharmony_ci	bio_list_init(&bio_list);
23788c2ecf20Sopenharmony_ci
23798c2ecf20Sopenharmony_ci	if (rbio->real_stripes - rbio->nr_data == 1)
23808c2ecf20Sopenharmony_ci		has_qstripe = false;
23818c2ecf20Sopenharmony_ci	else if (rbio->real_stripes - rbio->nr_data == 2)
23828c2ecf20Sopenharmony_ci		has_qstripe = true;
23838c2ecf20Sopenharmony_ci	else
23848c2ecf20Sopenharmony_ci		BUG();
23858c2ecf20Sopenharmony_ci
23868c2ecf20Sopenharmony_ci	if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
23878c2ecf20Sopenharmony_ci		is_replace = 1;
23888c2ecf20Sopenharmony_ci		bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
23898c2ecf20Sopenharmony_ci	}
23908c2ecf20Sopenharmony_ci
23918c2ecf20Sopenharmony_ci	/*
23928c2ecf20Sopenharmony_ci	 * Because the higher layers(scrubber) are unlikely to
23938c2ecf20Sopenharmony_ci	 * use this area of the disk again soon, so don't cache
23948c2ecf20Sopenharmony_ci	 * it.
23958c2ecf20Sopenharmony_ci	 */
23968c2ecf20Sopenharmony_ci	clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
23978c2ecf20Sopenharmony_ci
23988c2ecf20Sopenharmony_ci	if (!need_check)
23998c2ecf20Sopenharmony_ci		goto writeback;
24008c2ecf20Sopenharmony_ci
24018c2ecf20Sopenharmony_ci	p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
24028c2ecf20Sopenharmony_ci	if (!p_page)
24038c2ecf20Sopenharmony_ci		goto cleanup;
24048c2ecf20Sopenharmony_ci	SetPageUptodate(p_page);
24058c2ecf20Sopenharmony_ci
24068c2ecf20Sopenharmony_ci	if (has_qstripe) {
24078c2ecf20Sopenharmony_ci		/* RAID6, allocate and map temp space for the Q stripe */
24088c2ecf20Sopenharmony_ci		q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
24098c2ecf20Sopenharmony_ci		if (!q_page) {
24108c2ecf20Sopenharmony_ci			__free_page(p_page);
24118c2ecf20Sopenharmony_ci			goto cleanup;
24128c2ecf20Sopenharmony_ci		}
24138c2ecf20Sopenharmony_ci		SetPageUptodate(q_page);
24148c2ecf20Sopenharmony_ci		pointers[rbio->real_stripes - 1] = kmap(q_page);
24158c2ecf20Sopenharmony_ci	}
24168c2ecf20Sopenharmony_ci
24178c2ecf20Sopenharmony_ci	atomic_set(&rbio->error, 0);
24188c2ecf20Sopenharmony_ci
24198c2ecf20Sopenharmony_ci	/* Map the parity stripe just once */
24208c2ecf20Sopenharmony_ci	pointers[nr_data] = kmap(p_page);
24218c2ecf20Sopenharmony_ci
24228c2ecf20Sopenharmony_ci	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
24238c2ecf20Sopenharmony_ci		struct page *p;
24248c2ecf20Sopenharmony_ci		void *parity;
24258c2ecf20Sopenharmony_ci		/* first collect one page from each data stripe */
24268c2ecf20Sopenharmony_ci		for (stripe = 0; stripe < nr_data; stripe++) {
24278c2ecf20Sopenharmony_ci			p = page_in_rbio(rbio, stripe, pagenr, 0);
24288c2ecf20Sopenharmony_ci			pointers[stripe] = kmap(p);
24298c2ecf20Sopenharmony_ci		}
24308c2ecf20Sopenharmony_ci
24318c2ecf20Sopenharmony_ci		if (has_qstripe) {
24328c2ecf20Sopenharmony_ci			/* RAID6, call the library function to fill in our P/Q */
24338c2ecf20Sopenharmony_ci			raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
24348c2ecf20Sopenharmony_ci						pointers);
24358c2ecf20Sopenharmony_ci		} else {
24368c2ecf20Sopenharmony_ci			/* raid5 */
24378c2ecf20Sopenharmony_ci			copy_page(pointers[nr_data], pointers[0]);
24388c2ecf20Sopenharmony_ci			run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
24398c2ecf20Sopenharmony_ci		}
24408c2ecf20Sopenharmony_ci
24418c2ecf20Sopenharmony_ci		/* Check scrubbing parity and repair it */
24428c2ecf20Sopenharmony_ci		p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
24438c2ecf20Sopenharmony_ci		parity = kmap(p);
24448c2ecf20Sopenharmony_ci		if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
24458c2ecf20Sopenharmony_ci			copy_page(parity, pointers[rbio->scrubp]);
24468c2ecf20Sopenharmony_ci		else
24478c2ecf20Sopenharmony_ci			/* Parity is right, needn't writeback */
24488c2ecf20Sopenharmony_ci			bitmap_clear(rbio->dbitmap, pagenr, 1);
24498c2ecf20Sopenharmony_ci		kunmap(p);
24508c2ecf20Sopenharmony_ci
24518c2ecf20Sopenharmony_ci		for (stripe = 0; stripe < nr_data; stripe++)
24528c2ecf20Sopenharmony_ci			kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
24538c2ecf20Sopenharmony_ci	}
24548c2ecf20Sopenharmony_ci
24558c2ecf20Sopenharmony_ci	kunmap(p_page);
24568c2ecf20Sopenharmony_ci	__free_page(p_page);
24578c2ecf20Sopenharmony_ci	if (q_page) {
24588c2ecf20Sopenharmony_ci		kunmap(q_page);
24598c2ecf20Sopenharmony_ci		__free_page(q_page);
24608c2ecf20Sopenharmony_ci	}
24618c2ecf20Sopenharmony_ci
24628c2ecf20Sopenharmony_ciwriteback:
24638c2ecf20Sopenharmony_ci	/*
24648c2ecf20Sopenharmony_ci	 * time to start writing.  Make bios for everything from the
24658c2ecf20Sopenharmony_ci	 * higher layers (the bio_list in our rbio) and our p/q.  Ignore
24668c2ecf20Sopenharmony_ci	 * everything else.
24678c2ecf20Sopenharmony_ci	 */
24688c2ecf20Sopenharmony_ci	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
24698c2ecf20Sopenharmony_ci		struct page *page;
24708c2ecf20Sopenharmony_ci
24718c2ecf20Sopenharmony_ci		page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
24728c2ecf20Sopenharmony_ci		ret = rbio_add_io_page(rbio, &bio_list,
24738c2ecf20Sopenharmony_ci			       page, rbio->scrubp, pagenr, rbio->stripe_len);
24748c2ecf20Sopenharmony_ci		if (ret)
24758c2ecf20Sopenharmony_ci			goto cleanup;
24768c2ecf20Sopenharmony_ci	}
24778c2ecf20Sopenharmony_ci
24788c2ecf20Sopenharmony_ci	if (!is_replace)
24798c2ecf20Sopenharmony_ci		goto submit_write;
24808c2ecf20Sopenharmony_ci
24818c2ecf20Sopenharmony_ci	for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) {
24828c2ecf20Sopenharmony_ci		struct page *page;
24838c2ecf20Sopenharmony_ci
24848c2ecf20Sopenharmony_ci		page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
24858c2ecf20Sopenharmony_ci		ret = rbio_add_io_page(rbio, &bio_list, page,
24868c2ecf20Sopenharmony_ci				       bbio->tgtdev_map[rbio->scrubp],
24878c2ecf20Sopenharmony_ci				       pagenr, rbio->stripe_len);
24888c2ecf20Sopenharmony_ci		if (ret)
24898c2ecf20Sopenharmony_ci			goto cleanup;
24908c2ecf20Sopenharmony_ci	}
24918c2ecf20Sopenharmony_ci
24928c2ecf20Sopenharmony_cisubmit_write:
24938c2ecf20Sopenharmony_ci	nr_data = bio_list_size(&bio_list);
24948c2ecf20Sopenharmony_ci	if (!nr_data) {
24958c2ecf20Sopenharmony_ci		/* Every parity is right */
24968c2ecf20Sopenharmony_ci		rbio_orig_end_io(rbio, BLK_STS_OK);
24978c2ecf20Sopenharmony_ci		return;
24988c2ecf20Sopenharmony_ci	}
24998c2ecf20Sopenharmony_ci
25008c2ecf20Sopenharmony_ci	atomic_set(&rbio->stripes_pending, nr_data);
25018c2ecf20Sopenharmony_ci
25028c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bio_list))) {
25038c2ecf20Sopenharmony_ci		bio->bi_private = rbio;
25048c2ecf20Sopenharmony_ci		bio->bi_end_io = raid_write_end_io;
25058c2ecf20Sopenharmony_ci		bio->bi_opf = REQ_OP_WRITE;
25068c2ecf20Sopenharmony_ci
25078c2ecf20Sopenharmony_ci		submit_bio(bio);
25088c2ecf20Sopenharmony_ci	}
25098c2ecf20Sopenharmony_ci	return;
25108c2ecf20Sopenharmony_ci
25118c2ecf20Sopenharmony_cicleanup:
25128c2ecf20Sopenharmony_ci	rbio_orig_end_io(rbio, BLK_STS_IOERR);
25138c2ecf20Sopenharmony_ci
25148c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bio_list)))
25158c2ecf20Sopenharmony_ci		bio_put(bio);
25168c2ecf20Sopenharmony_ci}
25178c2ecf20Sopenharmony_ci
25188c2ecf20Sopenharmony_cistatic inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
25198c2ecf20Sopenharmony_ci{
25208c2ecf20Sopenharmony_ci	if (stripe >= 0 && stripe < rbio->nr_data)
25218c2ecf20Sopenharmony_ci		return 1;
25228c2ecf20Sopenharmony_ci	return 0;
25238c2ecf20Sopenharmony_ci}
25248c2ecf20Sopenharmony_ci
25258c2ecf20Sopenharmony_ci/*
25268c2ecf20Sopenharmony_ci * While we're doing the parity check and repair, we could have errors
25278c2ecf20Sopenharmony_ci * in reading pages off the disk.  This checks for errors and if we're
25288c2ecf20Sopenharmony_ci * not able to read the page it'll trigger parity reconstruction.  The
25298c2ecf20Sopenharmony_ci * parity scrub will be finished after we've reconstructed the failed
25308c2ecf20Sopenharmony_ci * stripes
25318c2ecf20Sopenharmony_ci */
25328c2ecf20Sopenharmony_cistatic void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
25338c2ecf20Sopenharmony_ci{
25348c2ecf20Sopenharmony_ci	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
25358c2ecf20Sopenharmony_ci		goto cleanup;
25368c2ecf20Sopenharmony_ci
25378c2ecf20Sopenharmony_ci	if (rbio->faila >= 0 || rbio->failb >= 0) {
25388c2ecf20Sopenharmony_ci		int dfail = 0, failp = -1;
25398c2ecf20Sopenharmony_ci
25408c2ecf20Sopenharmony_ci		if (is_data_stripe(rbio, rbio->faila))
25418c2ecf20Sopenharmony_ci			dfail++;
25428c2ecf20Sopenharmony_ci		else if (is_parity_stripe(rbio->faila))
25438c2ecf20Sopenharmony_ci			failp = rbio->faila;
25448c2ecf20Sopenharmony_ci
25458c2ecf20Sopenharmony_ci		if (is_data_stripe(rbio, rbio->failb))
25468c2ecf20Sopenharmony_ci			dfail++;
25478c2ecf20Sopenharmony_ci		else if (is_parity_stripe(rbio->failb))
25488c2ecf20Sopenharmony_ci			failp = rbio->failb;
25498c2ecf20Sopenharmony_ci
25508c2ecf20Sopenharmony_ci		/*
25518c2ecf20Sopenharmony_ci		 * Because we can not use a scrubbing parity to repair
25528c2ecf20Sopenharmony_ci		 * the data, so the capability of the repair is declined.
25538c2ecf20Sopenharmony_ci		 * (In the case of RAID5, we can not repair anything)
25548c2ecf20Sopenharmony_ci		 */
25558c2ecf20Sopenharmony_ci		if (dfail > rbio->bbio->max_errors - 1)
25568c2ecf20Sopenharmony_ci			goto cleanup;
25578c2ecf20Sopenharmony_ci
25588c2ecf20Sopenharmony_ci		/*
25598c2ecf20Sopenharmony_ci		 * If all data is good, only parity is correctly, just
25608c2ecf20Sopenharmony_ci		 * repair the parity.
25618c2ecf20Sopenharmony_ci		 */
25628c2ecf20Sopenharmony_ci		if (dfail == 0) {
25638c2ecf20Sopenharmony_ci			finish_parity_scrub(rbio, 0);
25648c2ecf20Sopenharmony_ci			return;
25658c2ecf20Sopenharmony_ci		}
25668c2ecf20Sopenharmony_ci
25678c2ecf20Sopenharmony_ci		/*
25688c2ecf20Sopenharmony_ci		 * Here means we got one corrupted data stripe and one
25698c2ecf20Sopenharmony_ci		 * corrupted parity on RAID6, if the corrupted parity
25708c2ecf20Sopenharmony_ci		 * is scrubbing parity, luckily, use the other one to repair
25718c2ecf20Sopenharmony_ci		 * the data, or we can not repair the data stripe.
25728c2ecf20Sopenharmony_ci		 */
25738c2ecf20Sopenharmony_ci		if (failp != rbio->scrubp)
25748c2ecf20Sopenharmony_ci			goto cleanup;
25758c2ecf20Sopenharmony_ci
25768c2ecf20Sopenharmony_ci		__raid_recover_end_io(rbio);
25778c2ecf20Sopenharmony_ci	} else {
25788c2ecf20Sopenharmony_ci		finish_parity_scrub(rbio, 1);
25798c2ecf20Sopenharmony_ci	}
25808c2ecf20Sopenharmony_ci	return;
25818c2ecf20Sopenharmony_ci
25828c2ecf20Sopenharmony_cicleanup:
25838c2ecf20Sopenharmony_ci	rbio_orig_end_io(rbio, BLK_STS_IOERR);
25848c2ecf20Sopenharmony_ci}
25858c2ecf20Sopenharmony_ci
25868c2ecf20Sopenharmony_ci/*
25878c2ecf20Sopenharmony_ci * end io for the read phase of the rmw cycle.  All the bios here are physical
25888c2ecf20Sopenharmony_ci * stripe bios we've read from the disk so we can recalculate the parity of the
25898c2ecf20Sopenharmony_ci * stripe.
25908c2ecf20Sopenharmony_ci *
25918c2ecf20Sopenharmony_ci * This will usually kick off finish_rmw once all the bios are read in, but it
25928c2ecf20Sopenharmony_ci * may trigger parity reconstruction if we had any errors along the way
25938c2ecf20Sopenharmony_ci */
25948c2ecf20Sopenharmony_cistatic void raid56_parity_scrub_end_io(struct bio *bio)
25958c2ecf20Sopenharmony_ci{
25968c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio = bio->bi_private;
25978c2ecf20Sopenharmony_ci
25988c2ecf20Sopenharmony_ci	if (bio->bi_status)
25998c2ecf20Sopenharmony_ci		fail_bio_stripe(rbio, bio);
26008c2ecf20Sopenharmony_ci	else
26018c2ecf20Sopenharmony_ci		set_bio_pages_uptodate(bio);
26028c2ecf20Sopenharmony_ci
26038c2ecf20Sopenharmony_ci	bio_put(bio);
26048c2ecf20Sopenharmony_ci
26058c2ecf20Sopenharmony_ci	if (!atomic_dec_and_test(&rbio->stripes_pending))
26068c2ecf20Sopenharmony_ci		return;
26078c2ecf20Sopenharmony_ci
26088c2ecf20Sopenharmony_ci	/*
26098c2ecf20Sopenharmony_ci	 * this will normally call finish_rmw to start our write
26108c2ecf20Sopenharmony_ci	 * but if there are any failed stripes we'll reconstruct
26118c2ecf20Sopenharmony_ci	 * from parity first
26128c2ecf20Sopenharmony_ci	 */
26138c2ecf20Sopenharmony_ci	validate_rbio_for_parity_scrub(rbio);
26148c2ecf20Sopenharmony_ci}
26158c2ecf20Sopenharmony_ci
26168c2ecf20Sopenharmony_cistatic void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
26178c2ecf20Sopenharmony_ci{
26188c2ecf20Sopenharmony_ci	int bios_to_read = 0;
26198c2ecf20Sopenharmony_ci	struct bio_list bio_list;
26208c2ecf20Sopenharmony_ci	int ret;
26218c2ecf20Sopenharmony_ci	int pagenr;
26228c2ecf20Sopenharmony_ci	int stripe;
26238c2ecf20Sopenharmony_ci	struct bio *bio;
26248c2ecf20Sopenharmony_ci
26258c2ecf20Sopenharmony_ci	bio_list_init(&bio_list);
26268c2ecf20Sopenharmony_ci
26278c2ecf20Sopenharmony_ci	ret = alloc_rbio_essential_pages(rbio);
26288c2ecf20Sopenharmony_ci	if (ret)
26298c2ecf20Sopenharmony_ci		goto cleanup;
26308c2ecf20Sopenharmony_ci
26318c2ecf20Sopenharmony_ci	atomic_set(&rbio->error, 0);
26328c2ecf20Sopenharmony_ci	/*
26338c2ecf20Sopenharmony_ci	 * build a list of bios to read all the missing parts of this
26348c2ecf20Sopenharmony_ci	 * stripe
26358c2ecf20Sopenharmony_ci	 */
26368c2ecf20Sopenharmony_ci	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
26378c2ecf20Sopenharmony_ci		for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
26388c2ecf20Sopenharmony_ci			struct page *page;
26398c2ecf20Sopenharmony_ci			/*
26408c2ecf20Sopenharmony_ci			 * we want to find all the pages missing from
26418c2ecf20Sopenharmony_ci			 * the rbio and read them from the disk.  If
26428c2ecf20Sopenharmony_ci			 * page_in_rbio finds a page in the bio list
26438c2ecf20Sopenharmony_ci			 * we don't need to read it off the stripe.
26448c2ecf20Sopenharmony_ci			 */
26458c2ecf20Sopenharmony_ci			page = page_in_rbio(rbio, stripe, pagenr, 1);
26468c2ecf20Sopenharmony_ci			if (page)
26478c2ecf20Sopenharmony_ci				continue;
26488c2ecf20Sopenharmony_ci
26498c2ecf20Sopenharmony_ci			page = rbio_stripe_page(rbio, stripe, pagenr);
26508c2ecf20Sopenharmony_ci			/*
26518c2ecf20Sopenharmony_ci			 * the bio cache may have handed us an uptodate
26528c2ecf20Sopenharmony_ci			 * page.  If so, be happy and use it
26538c2ecf20Sopenharmony_ci			 */
26548c2ecf20Sopenharmony_ci			if (PageUptodate(page))
26558c2ecf20Sopenharmony_ci				continue;
26568c2ecf20Sopenharmony_ci
26578c2ecf20Sopenharmony_ci			ret = rbio_add_io_page(rbio, &bio_list, page,
26588c2ecf20Sopenharmony_ci				       stripe, pagenr, rbio->stripe_len);
26598c2ecf20Sopenharmony_ci			if (ret)
26608c2ecf20Sopenharmony_ci				goto cleanup;
26618c2ecf20Sopenharmony_ci		}
26628c2ecf20Sopenharmony_ci	}
26638c2ecf20Sopenharmony_ci
26648c2ecf20Sopenharmony_ci	bios_to_read = bio_list_size(&bio_list);
26658c2ecf20Sopenharmony_ci	if (!bios_to_read) {
26668c2ecf20Sopenharmony_ci		/*
26678c2ecf20Sopenharmony_ci		 * this can happen if others have merged with
26688c2ecf20Sopenharmony_ci		 * us, it means there is nothing left to read.
26698c2ecf20Sopenharmony_ci		 * But if there are missing devices it may not be
26708c2ecf20Sopenharmony_ci		 * safe to do the full stripe write yet.
26718c2ecf20Sopenharmony_ci		 */
26728c2ecf20Sopenharmony_ci		goto finish;
26738c2ecf20Sopenharmony_ci	}
26748c2ecf20Sopenharmony_ci
26758c2ecf20Sopenharmony_ci	/*
26768c2ecf20Sopenharmony_ci	 * the bbio may be freed once we submit the last bio.  Make sure
26778c2ecf20Sopenharmony_ci	 * not to touch it after that
26788c2ecf20Sopenharmony_ci	 */
26798c2ecf20Sopenharmony_ci	atomic_set(&rbio->stripes_pending, bios_to_read);
26808c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bio_list))) {
26818c2ecf20Sopenharmony_ci		bio->bi_private = rbio;
26828c2ecf20Sopenharmony_ci		bio->bi_end_io = raid56_parity_scrub_end_io;
26838c2ecf20Sopenharmony_ci		bio->bi_opf = REQ_OP_READ;
26848c2ecf20Sopenharmony_ci
26858c2ecf20Sopenharmony_ci		btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
26868c2ecf20Sopenharmony_ci
26878c2ecf20Sopenharmony_ci		submit_bio(bio);
26888c2ecf20Sopenharmony_ci	}
26898c2ecf20Sopenharmony_ci	/* the actual write will happen once the reads are done */
26908c2ecf20Sopenharmony_ci	return;
26918c2ecf20Sopenharmony_ci
26928c2ecf20Sopenharmony_cicleanup:
26938c2ecf20Sopenharmony_ci	rbio_orig_end_io(rbio, BLK_STS_IOERR);
26948c2ecf20Sopenharmony_ci
26958c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bio_list)))
26968c2ecf20Sopenharmony_ci		bio_put(bio);
26978c2ecf20Sopenharmony_ci
26988c2ecf20Sopenharmony_ci	return;
26998c2ecf20Sopenharmony_ci
27008c2ecf20Sopenharmony_cifinish:
27018c2ecf20Sopenharmony_ci	validate_rbio_for_parity_scrub(rbio);
27028c2ecf20Sopenharmony_ci}
27038c2ecf20Sopenharmony_ci
27048c2ecf20Sopenharmony_cistatic void scrub_parity_work(struct btrfs_work *work)
27058c2ecf20Sopenharmony_ci{
27068c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio;
27078c2ecf20Sopenharmony_ci
27088c2ecf20Sopenharmony_ci	rbio = container_of(work, struct btrfs_raid_bio, work);
27098c2ecf20Sopenharmony_ci	raid56_parity_scrub_stripe(rbio);
27108c2ecf20Sopenharmony_ci}
27118c2ecf20Sopenharmony_ci
27128c2ecf20Sopenharmony_civoid raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
27138c2ecf20Sopenharmony_ci{
27148c2ecf20Sopenharmony_ci	if (!lock_stripe_add(rbio))
27158c2ecf20Sopenharmony_ci		start_async_work(rbio, scrub_parity_work);
27168c2ecf20Sopenharmony_ci}
27178c2ecf20Sopenharmony_ci
27188c2ecf20Sopenharmony_ci/* The following code is used for dev replace of a missing RAID 5/6 device. */
27198c2ecf20Sopenharmony_ci
27208c2ecf20Sopenharmony_cistruct btrfs_raid_bio *
27218c2ecf20Sopenharmony_ciraid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
27228c2ecf20Sopenharmony_ci			  struct btrfs_bio *bbio, u64 length)
27238c2ecf20Sopenharmony_ci{
27248c2ecf20Sopenharmony_ci	struct btrfs_raid_bio *rbio;
27258c2ecf20Sopenharmony_ci
27268c2ecf20Sopenharmony_ci	rbio = alloc_rbio(fs_info, bbio, length);
27278c2ecf20Sopenharmony_ci	if (IS_ERR(rbio))
27288c2ecf20Sopenharmony_ci		return NULL;
27298c2ecf20Sopenharmony_ci
27308c2ecf20Sopenharmony_ci	rbio->operation = BTRFS_RBIO_REBUILD_MISSING;
27318c2ecf20Sopenharmony_ci	bio_list_add(&rbio->bio_list, bio);
27328c2ecf20Sopenharmony_ci	/*
27338c2ecf20Sopenharmony_ci	 * This is a special bio which is used to hold the completion handler
27348c2ecf20Sopenharmony_ci	 * and make the scrub rbio is similar to the other types
27358c2ecf20Sopenharmony_ci	 */
27368c2ecf20Sopenharmony_ci	ASSERT(!bio->bi_iter.bi_size);
27378c2ecf20Sopenharmony_ci
27388c2ecf20Sopenharmony_ci	rbio->faila = find_logical_bio_stripe(rbio, bio);
27398c2ecf20Sopenharmony_ci	if (rbio->faila == -1) {
27408c2ecf20Sopenharmony_ci		BUG();
27418c2ecf20Sopenharmony_ci		kfree(rbio);
27428c2ecf20Sopenharmony_ci		return NULL;
27438c2ecf20Sopenharmony_ci	}
27448c2ecf20Sopenharmony_ci
27458c2ecf20Sopenharmony_ci	/*
27468c2ecf20Sopenharmony_ci	 * When we get bbio, we have already increased bio_counter, record it
27478c2ecf20Sopenharmony_ci	 * so we can free it at rbio_orig_end_io()
27488c2ecf20Sopenharmony_ci	 */
27498c2ecf20Sopenharmony_ci	rbio->generic_bio_cnt = 1;
27508c2ecf20Sopenharmony_ci
27518c2ecf20Sopenharmony_ci	return rbio;
27528c2ecf20Sopenharmony_ci}
27538c2ecf20Sopenharmony_ci
27548c2ecf20Sopenharmony_civoid raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
27558c2ecf20Sopenharmony_ci{
27568c2ecf20Sopenharmony_ci	if (!lock_stripe_add(rbio))
27578c2ecf20Sopenharmony_ci		start_async_work(rbio, read_rebuild_work);
27588c2ecf20Sopenharmony_ci}
2759