18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2012 Fusion-io All rights reserved. 48c2ecf20Sopenharmony_ci * Copyright (C) 2012 Intel Corp. All rights reserved. 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci#include <linux/sched.h> 88c2ecf20Sopenharmony_ci#include <linux/bio.h> 98c2ecf20Sopenharmony_ci#include <linux/slab.h> 108c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 118c2ecf20Sopenharmony_ci#include <linux/raid/pq.h> 128c2ecf20Sopenharmony_ci#include <linux/hash.h> 138c2ecf20Sopenharmony_ci#include <linux/list_sort.h> 148c2ecf20Sopenharmony_ci#include <linux/raid/xor.h> 158c2ecf20Sopenharmony_ci#include <linux/mm.h> 168c2ecf20Sopenharmony_ci#include "ctree.h" 178c2ecf20Sopenharmony_ci#include "disk-io.h" 188c2ecf20Sopenharmony_ci#include "volumes.h" 198c2ecf20Sopenharmony_ci#include "raid56.h" 208c2ecf20Sopenharmony_ci#include "async-thread.h" 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci/* set when additional merges to this rbio are not allowed */ 238c2ecf20Sopenharmony_ci#define RBIO_RMW_LOCKED_BIT 1 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci/* 268c2ecf20Sopenharmony_ci * set when this rbio is sitting in the hash, but it is just a cache 278c2ecf20Sopenharmony_ci * of past RMW 288c2ecf20Sopenharmony_ci */ 298c2ecf20Sopenharmony_ci#define RBIO_CACHE_BIT 2 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci/* 328c2ecf20Sopenharmony_ci * set when it is safe to trust the stripe_pages for caching 338c2ecf20Sopenharmony_ci */ 348c2ecf20Sopenharmony_ci#define RBIO_CACHE_READY_BIT 3 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci#define RBIO_CACHE_SIZE 1024 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci#define BTRFS_STRIPE_HASH_TABLE_BITS 11 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci/* Used by the raid56 code to lock stripes for read/modify/write */ 418c2ecf20Sopenharmony_cistruct btrfs_stripe_hash { 428c2ecf20Sopenharmony_ci struct list_head hash_list; 438c2ecf20Sopenharmony_ci spinlock_t lock; 448c2ecf20Sopenharmony_ci}; 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci/* Used by the raid56 code to lock stripes for read/modify/write */ 478c2ecf20Sopenharmony_cistruct btrfs_stripe_hash_table { 488c2ecf20Sopenharmony_ci struct list_head stripe_cache; 498c2ecf20Sopenharmony_ci spinlock_t cache_lock; 508c2ecf20Sopenharmony_ci int cache_size; 518c2ecf20Sopenharmony_ci struct btrfs_stripe_hash table[]; 528c2ecf20Sopenharmony_ci}; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_cienum btrfs_rbio_ops { 558c2ecf20Sopenharmony_ci BTRFS_RBIO_WRITE, 568c2ecf20Sopenharmony_ci BTRFS_RBIO_READ_REBUILD, 578c2ecf20Sopenharmony_ci BTRFS_RBIO_PARITY_SCRUB, 588c2ecf20Sopenharmony_ci BTRFS_RBIO_REBUILD_MISSING, 598c2ecf20Sopenharmony_ci}; 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_cistruct btrfs_raid_bio { 628c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info; 638c2ecf20Sopenharmony_ci struct btrfs_bio *bbio; 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci /* while we're doing rmw on a stripe 668c2ecf20Sopenharmony_ci * we put it into a hash table so we can 678c2ecf20Sopenharmony_ci * lock the stripe and merge more rbios 688c2ecf20Sopenharmony_ci * into it. 698c2ecf20Sopenharmony_ci */ 708c2ecf20Sopenharmony_ci struct list_head hash_list; 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci /* 738c2ecf20Sopenharmony_ci * LRU list for the stripe cache 748c2ecf20Sopenharmony_ci */ 758c2ecf20Sopenharmony_ci struct list_head stripe_cache; 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci /* 788c2ecf20Sopenharmony_ci * for scheduling work in the helper threads 798c2ecf20Sopenharmony_ci */ 808c2ecf20Sopenharmony_ci struct btrfs_work work; 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci /* 838c2ecf20Sopenharmony_ci * bio list and bio_list_lock are used 848c2ecf20Sopenharmony_ci * to add more bios into the stripe 858c2ecf20Sopenharmony_ci * in hopes of avoiding the full rmw 868c2ecf20Sopenharmony_ci */ 878c2ecf20Sopenharmony_ci struct bio_list bio_list; 888c2ecf20Sopenharmony_ci spinlock_t bio_list_lock; 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci /* also protected by the bio_list_lock, the 918c2ecf20Sopenharmony_ci * plug list is used by the plugging code 928c2ecf20Sopenharmony_ci * to collect partial bios while plugged. The 938c2ecf20Sopenharmony_ci * stripe locking code also uses it to hand off 948c2ecf20Sopenharmony_ci * the stripe lock to the next pending IO 958c2ecf20Sopenharmony_ci */ 968c2ecf20Sopenharmony_ci struct list_head plug_list; 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci /* 998c2ecf20Sopenharmony_ci * flags that tell us if it is safe to 1008c2ecf20Sopenharmony_ci * merge with this bio 1018c2ecf20Sopenharmony_ci */ 1028c2ecf20Sopenharmony_ci unsigned long flags; 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci /* size of each individual stripe on disk */ 1058c2ecf20Sopenharmony_ci int stripe_len; 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci /* number of data stripes (no p/q) */ 1088c2ecf20Sopenharmony_ci int nr_data; 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci int real_stripes; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci int stripe_npages; 1138c2ecf20Sopenharmony_ci /* 1148c2ecf20Sopenharmony_ci * set if we're doing a parity rebuild 1158c2ecf20Sopenharmony_ci * for a read from higher up, which is handled 1168c2ecf20Sopenharmony_ci * differently from a parity rebuild as part of 1178c2ecf20Sopenharmony_ci * rmw 1188c2ecf20Sopenharmony_ci */ 1198c2ecf20Sopenharmony_ci enum btrfs_rbio_ops operation; 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci /* first bad stripe */ 1228c2ecf20Sopenharmony_ci int faila; 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci /* second bad stripe (for raid6 use) */ 1258c2ecf20Sopenharmony_ci int failb; 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci int scrubp; 1288c2ecf20Sopenharmony_ci /* 1298c2ecf20Sopenharmony_ci * number of pages needed to represent the full 1308c2ecf20Sopenharmony_ci * stripe 1318c2ecf20Sopenharmony_ci */ 1328c2ecf20Sopenharmony_ci int nr_pages; 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci /* 1358c2ecf20Sopenharmony_ci * size of all the bios in the bio_list. This 1368c2ecf20Sopenharmony_ci * helps us decide if the rbio maps to a full 1378c2ecf20Sopenharmony_ci * stripe or not 1388c2ecf20Sopenharmony_ci */ 1398c2ecf20Sopenharmony_ci int bio_list_bytes; 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci int generic_bio_cnt; 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci refcount_t refs; 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci atomic_t stripes_pending; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci atomic_t error; 1488c2ecf20Sopenharmony_ci /* 1498c2ecf20Sopenharmony_ci * these are two arrays of pointers. We allocate the 1508c2ecf20Sopenharmony_ci * rbio big enough to hold them both and setup their 1518c2ecf20Sopenharmony_ci * locations when the rbio is allocated 1528c2ecf20Sopenharmony_ci */ 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci /* pointers to pages that we allocated for 1558c2ecf20Sopenharmony_ci * reading/writing stripes directly from the disk (including P/Q) 1568c2ecf20Sopenharmony_ci */ 1578c2ecf20Sopenharmony_ci struct page **stripe_pages; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci /* 1608c2ecf20Sopenharmony_ci * pointers to the pages in the bio_list. Stored 1618c2ecf20Sopenharmony_ci * here for faster lookup 1628c2ecf20Sopenharmony_ci */ 1638c2ecf20Sopenharmony_ci struct page **bio_pages; 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci /* 1668c2ecf20Sopenharmony_ci * bitmap to record which horizontal stripe has data 1678c2ecf20Sopenharmony_ci */ 1688c2ecf20Sopenharmony_ci unsigned long *dbitmap; 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci /* allocated with real_stripes-many pointers for finish_*() calls */ 1718c2ecf20Sopenharmony_ci void **finish_pointers; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci /* allocated with stripe_npages-many bits for finish_*() calls */ 1748c2ecf20Sopenharmony_ci unsigned long *finish_pbitmap; 1758c2ecf20Sopenharmony_ci}; 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_cistatic int __raid56_parity_recover(struct btrfs_raid_bio *rbio); 1788c2ecf20Sopenharmony_cistatic noinline void finish_rmw(struct btrfs_raid_bio *rbio); 1798c2ecf20Sopenharmony_cistatic void rmw_work(struct btrfs_work *work); 1808c2ecf20Sopenharmony_cistatic void read_rebuild_work(struct btrfs_work *work); 1818c2ecf20Sopenharmony_cistatic int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio); 1828c2ecf20Sopenharmony_cistatic int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed); 1838c2ecf20Sopenharmony_cistatic void __free_raid_bio(struct btrfs_raid_bio *rbio); 1848c2ecf20Sopenharmony_cistatic void index_rbio_pages(struct btrfs_raid_bio *rbio); 1858c2ecf20Sopenharmony_cistatic int alloc_rbio_pages(struct btrfs_raid_bio *rbio); 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_cistatic noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, 1888c2ecf20Sopenharmony_ci int need_check); 1898c2ecf20Sopenharmony_cistatic void scrub_parity_work(struct btrfs_work *work); 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_cistatic void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func) 1928c2ecf20Sopenharmony_ci{ 1938c2ecf20Sopenharmony_ci btrfs_init_work(&rbio->work, work_func, NULL, NULL); 1948c2ecf20Sopenharmony_ci btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); 1958c2ecf20Sopenharmony_ci} 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci/* 1988c2ecf20Sopenharmony_ci * the stripe hash table is used for locking, and to collect 1998c2ecf20Sopenharmony_ci * bios in hopes of making a full stripe 2008c2ecf20Sopenharmony_ci */ 2018c2ecf20Sopenharmony_ciint btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) 2028c2ecf20Sopenharmony_ci{ 2038c2ecf20Sopenharmony_ci struct btrfs_stripe_hash_table *table; 2048c2ecf20Sopenharmony_ci struct btrfs_stripe_hash_table *x; 2058c2ecf20Sopenharmony_ci struct btrfs_stripe_hash *cur; 2068c2ecf20Sopenharmony_ci struct btrfs_stripe_hash *h; 2078c2ecf20Sopenharmony_ci int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS; 2088c2ecf20Sopenharmony_ci int i; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci if (info->stripe_hash_table) 2118c2ecf20Sopenharmony_ci return 0; 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci /* 2148c2ecf20Sopenharmony_ci * The table is large, starting with order 4 and can go as high as 2158c2ecf20Sopenharmony_ci * order 7 in case lock debugging is turned on. 2168c2ecf20Sopenharmony_ci * 2178c2ecf20Sopenharmony_ci * Try harder to allocate and fallback to vmalloc to lower the chance 2188c2ecf20Sopenharmony_ci * of a failing mount. 2198c2ecf20Sopenharmony_ci */ 2208c2ecf20Sopenharmony_ci table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL); 2218c2ecf20Sopenharmony_ci if (!table) 2228c2ecf20Sopenharmony_ci return -ENOMEM; 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci spin_lock_init(&table->cache_lock); 2258c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&table->stripe_cache); 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci h = table->table; 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci for (i = 0; i < num_entries; i++) { 2308c2ecf20Sopenharmony_ci cur = h + i; 2318c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&cur->hash_list); 2328c2ecf20Sopenharmony_ci spin_lock_init(&cur->lock); 2338c2ecf20Sopenharmony_ci } 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci x = cmpxchg(&info->stripe_hash_table, NULL, table); 2368c2ecf20Sopenharmony_ci if (x) 2378c2ecf20Sopenharmony_ci kvfree(x); 2388c2ecf20Sopenharmony_ci return 0; 2398c2ecf20Sopenharmony_ci} 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci/* 2428c2ecf20Sopenharmony_ci * caching an rbio means to copy anything from the 2438c2ecf20Sopenharmony_ci * bio_pages array into the stripe_pages array. We 2448c2ecf20Sopenharmony_ci * use the page uptodate bit in the stripe cache array 2458c2ecf20Sopenharmony_ci * to indicate if it has valid data 2468c2ecf20Sopenharmony_ci * 2478c2ecf20Sopenharmony_ci * once the caching is done, we set the cache ready 2488c2ecf20Sopenharmony_ci * bit. 2498c2ecf20Sopenharmony_ci */ 2508c2ecf20Sopenharmony_cistatic void cache_rbio_pages(struct btrfs_raid_bio *rbio) 2518c2ecf20Sopenharmony_ci{ 2528c2ecf20Sopenharmony_ci int i; 2538c2ecf20Sopenharmony_ci char *s; 2548c2ecf20Sopenharmony_ci char *d; 2558c2ecf20Sopenharmony_ci int ret; 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci ret = alloc_rbio_pages(rbio); 2588c2ecf20Sopenharmony_ci if (ret) 2598c2ecf20Sopenharmony_ci return; 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci for (i = 0; i < rbio->nr_pages; i++) { 2628c2ecf20Sopenharmony_ci if (!rbio->bio_pages[i]) 2638c2ecf20Sopenharmony_ci continue; 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci s = kmap(rbio->bio_pages[i]); 2668c2ecf20Sopenharmony_ci d = kmap(rbio->stripe_pages[i]); 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci copy_page(d, s); 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_ci kunmap(rbio->bio_pages[i]); 2718c2ecf20Sopenharmony_ci kunmap(rbio->stripe_pages[i]); 2728c2ecf20Sopenharmony_ci SetPageUptodate(rbio->stripe_pages[i]); 2738c2ecf20Sopenharmony_ci } 2748c2ecf20Sopenharmony_ci set_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 2758c2ecf20Sopenharmony_ci} 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci/* 2788c2ecf20Sopenharmony_ci * we hash on the first logical address of the stripe 2798c2ecf20Sopenharmony_ci */ 2808c2ecf20Sopenharmony_cistatic int rbio_bucket(struct btrfs_raid_bio *rbio) 2818c2ecf20Sopenharmony_ci{ 2828c2ecf20Sopenharmony_ci u64 num = rbio->bbio->raid_map[0]; 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci /* 2858c2ecf20Sopenharmony_ci * we shift down quite a bit. We're using byte 2868c2ecf20Sopenharmony_ci * addressing, and most of the lower bits are zeros. 2878c2ecf20Sopenharmony_ci * This tends to upset hash_64, and it consistently 2888c2ecf20Sopenharmony_ci * returns just one or two different values. 2898c2ecf20Sopenharmony_ci * 2908c2ecf20Sopenharmony_ci * shifting off the lower bits fixes things. 2918c2ecf20Sopenharmony_ci */ 2928c2ecf20Sopenharmony_ci return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS); 2938c2ecf20Sopenharmony_ci} 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci/* 2968c2ecf20Sopenharmony_ci * stealing an rbio means taking all the uptodate pages from the stripe 2978c2ecf20Sopenharmony_ci * array in the source rbio and putting them into the destination rbio 2988c2ecf20Sopenharmony_ci */ 2998c2ecf20Sopenharmony_cistatic void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest) 3008c2ecf20Sopenharmony_ci{ 3018c2ecf20Sopenharmony_ci int i; 3028c2ecf20Sopenharmony_ci struct page *s; 3038c2ecf20Sopenharmony_ci struct page *d; 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags)) 3068c2ecf20Sopenharmony_ci return; 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci for (i = 0; i < dest->nr_pages; i++) { 3098c2ecf20Sopenharmony_ci s = src->stripe_pages[i]; 3108c2ecf20Sopenharmony_ci if (!s || !PageUptodate(s)) { 3118c2ecf20Sopenharmony_ci continue; 3128c2ecf20Sopenharmony_ci } 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci d = dest->stripe_pages[i]; 3158c2ecf20Sopenharmony_ci if (d) 3168c2ecf20Sopenharmony_ci __free_page(d); 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci dest->stripe_pages[i] = s; 3198c2ecf20Sopenharmony_ci src->stripe_pages[i] = NULL; 3208c2ecf20Sopenharmony_ci } 3218c2ecf20Sopenharmony_ci} 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_ci/* 3248c2ecf20Sopenharmony_ci * merging means we take the bio_list from the victim and 3258c2ecf20Sopenharmony_ci * splice it into the destination. The victim should 3268c2ecf20Sopenharmony_ci * be discarded afterwards. 3278c2ecf20Sopenharmony_ci * 3288c2ecf20Sopenharmony_ci * must be called with dest->rbio_list_lock held 3298c2ecf20Sopenharmony_ci */ 3308c2ecf20Sopenharmony_cistatic void merge_rbio(struct btrfs_raid_bio *dest, 3318c2ecf20Sopenharmony_ci struct btrfs_raid_bio *victim) 3328c2ecf20Sopenharmony_ci{ 3338c2ecf20Sopenharmony_ci bio_list_merge(&dest->bio_list, &victim->bio_list); 3348c2ecf20Sopenharmony_ci dest->bio_list_bytes += victim->bio_list_bytes; 3358c2ecf20Sopenharmony_ci /* Also inherit the bitmaps from @victim. */ 3368c2ecf20Sopenharmony_ci bitmap_or(dest->dbitmap, victim->dbitmap, dest->dbitmap, 3378c2ecf20Sopenharmony_ci dest->stripe_npages); 3388c2ecf20Sopenharmony_ci dest->generic_bio_cnt += victim->generic_bio_cnt; 3398c2ecf20Sopenharmony_ci bio_list_init(&victim->bio_list); 3408c2ecf20Sopenharmony_ci} 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci/* 3438c2ecf20Sopenharmony_ci * used to prune items that are in the cache. The caller 3448c2ecf20Sopenharmony_ci * must hold the hash table lock. 3458c2ecf20Sopenharmony_ci */ 3468c2ecf20Sopenharmony_cistatic void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio) 3478c2ecf20Sopenharmony_ci{ 3488c2ecf20Sopenharmony_ci int bucket = rbio_bucket(rbio); 3498c2ecf20Sopenharmony_ci struct btrfs_stripe_hash_table *table; 3508c2ecf20Sopenharmony_ci struct btrfs_stripe_hash *h; 3518c2ecf20Sopenharmony_ci int freeit = 0; 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ci /* 3548c2ecf20Sopenharmony_ci * check the bit again under the hash table lock. 3558c2ecf20Sopenharmony_ci */ 3568c2ecf20Sopenharmony_ci if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) 3578c2ecf20Sopenharmony_ci return; 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci table = rbio->fs_info->stripe_hash_table; 3608c2ecf20Sopenharmony_ci h = table->table + bucket; 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci /* hold the lock for the bucket because we may be 3638c2ecf20Sopenharmony_ci * removing it from the hash table 3648c2ecf20Sopenharmony_ci */ 3658c2ecf20Sopenharmony_ci spin_lock(&h->lock); 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci /* 3688c2ecf20Sopenharmony_ci * hold the lock for the bio list because we need 3698c2ecf20Sopenharmony_ci * to make sure the bio list is empty 3708c2ecf20Sopenharmony_ci */ 3718c2ecf20Sopenharmony_ci spin_lock(&rbio->bio_list_lock); 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) { 3748c2ecf20Sopenharmony_ci list_del_init(&rbio->stripe_cache); 3758c2ecf20Sopenharmony_ci table->cache_size -= 1; 3768c2ecf20Sopenharmony_ci freeit = 1; 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci /* if the bio list isn't empty, this rbio is 3798c2ecf20Sopenharmony_ci * still involved in an IO. We take it out 3808c2ecf20Sopenharmony_ci * of the cache list, and drop the ref that 3818c2ecf20Sopenharmony_ci * was held for the list. 3828c2ecf20Sopenharmony_ci * 3838c2ecf20Sopenharmony_ci * If the bio_list was empty, we also remove 3848c2ecf20Sopenharmony_ci * the rbio from the hash_table, and drop 3858c2ecf20Sopenharmony_ci * the corresponding ref 3868c2ecf20Sopenharmony_ci */ 3878c2ecf20Sopenharmony_ci if (bio_list_empty(&rbio->bio_list)) { 3888c2ecf20Sopenharmony_ci if (!list_empty(&rbio->hash_list)) { 3898c2ecf20Sopenharmony_ci list_del_init(&rbio->hash_list); 3908c2ecf20Sopenharmony_ci refcount_dec(&rbio->refs); 3918c2ecf20Sopenharmony_ci BUG_ON(!list_empty(&rbio->plug_list)); 3928c2ecf20Sopenharmony_ci } 3938c2ecf20Sopenharmony_ci } 3948c2ecf20Sopenharmony_ci } 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 3978c2ecf20Sopenharmony_ci spin_unlock(&h->lock); 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci if (freeit) 4008c2ecf20Sopenharmony_ci __free_raid_bio(rbio); 4018c2ecf20Sopenharmony_ci} 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci/* 4048c2ecf20Sopenharmony_ci * prune a given rbio from the cache 4058c2ecf20Sopenharmony_ci */ 4068c2ecf20Sopenharmony_cistatic void remove_rbio_from_cache(struct btrfs_raid_bio *rbio) 4078c2ecf20Sopenharmony_ci{ 4088c2ecf20Sopenharmony_ci struct btrfs_stripe_hash_table *table; 4098c2ecf20Sopenharmony_ci unsigned long flags; 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) 4128c2ecf20Sopenharmony_ci return; 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci table = rbio->fs_info->stripe_hash_table; 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_ci spin_lock_irqsave(&table->cache_lock, flags); 4178c2ecf20Sopenharmony_ci __remove_rbio_from_cache(rbio); 4188c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&table->cache_lock, flags); 4198c2ecf20Sopenharmony_ci} 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_ci/* 4228c2ecf20Sopenharmony_ci * remove everything in the cache 4238c2ecf20Sopenharmony_ci */ 4248c2ecf20Sopenharmony_cistatic void btrfs_clear_rbio_cache(struct btrfs_fs_info *info) 4258c2ecf20Sopenharmony_ci{ 4268c2ecf20Sopenharmony_ci struct btrfs_stripe_hash_table *table; 4278c2ecf20Sopenharmony_ci unsigned long flags; 4288c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio; 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci table = info->stripe_hash_table; 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci spin_lock_irqsave(&table->cache_lock, flags); 4338c2ecf20Sopenharmony_ci while (!list_empty(&table->stripe_cache)) { 4348c2ecf20Sopenharmony_ci rbio = list_entry(table->stripe_cache.next, 4358c2ecf20Sopenharmony_ci struct btrfs_raid_bio, 4368c2ecf20Sopenharmony_ci stripe_cache); 4378c2ecf20Sopenharmony_ci __remove_rbio_from_cache(rbio); 4388c2ecf20Sopenharmony_ci } 4398c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&table->cache_lock, flags); 4408c2ecf20Sopenharmony_ci} 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci/* 4438c2ecf20Sopenharmony_ci * remove all cached entries and free the hash table 4448c2ecf20Sopenharmony_ci * used by unmount 4458c2ecf20Sopenharmony_ci */ 4468c2ecf20Sopenharmony_civoid btrfs_free_stripe_hash_table(struct btrfs_fs_info *info) 4478c2ecf20Sopenharmony_ci{ 4488c2ecf20Sopenharmony_ci if (!info->stripe_hash_table) 4498c2ecf20Sopenharmony_ci return; 4508c2ecf20Sopenharmony_ci btrfs_clear_rbio_cache(info); 4518c2ecf20Sopenharmony_ci kvfree(info->stripe_hash_table); 4528c2ecf20Sopenharmony_ci info->stripe_hash_table = NULL; 4538c2ecf20Sopenharmony_ci} 4548c2ecf20Sopenharmony_ci 4558c2ecf20Sopenharmony_ci/* 4568c2ecf20Sopenharmony_ci * insert an rbio into the stripe cache. It 4578c2ecf20Sopenharmony_ci * must have already been prepared by calling 4588c2ecf20Sopenharmony_ci * cache_rbio_pages 4598c2ecf20Sopenharmony_ci * 4608c2ecf20Sopenharmony_ci * If this rbio was already cached, it gets 4618c2ecf20Sopenharmony_ci * moved to the front of the lru. 4628c2ecf20Sopenharmony_ci * 4638c2ecf20Sopenharmony_ci * If the size of the rbio cache is too big, we 4648c2ecf20Sopenharmony_ci * prune an item. 4658c2ecf20Sopenharmony_ci */ 4668c2ecf20Sopenharmony_cistatic void cache_rbio(struct btrfs_raid_bio *rbio) 4678c2ecf20Sopenharmony_ci{ 4688c2ecf20Sopenharmony_ci struct btrfs_stripe_hash_table *table; 4698c2ecf20Sopenharmony_ci unsigned long flags; 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_ci if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags)) 4728c2ecf20Sopenharmony_ci return; 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci table = rbio->fs_info->stripe_hash_table; 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci spin_lock_irqsave(&table->cache_lock, flags); 4778c2ecf20Sopenharmony_ci spin_lock(&rbio->bio_list_lock); 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci /* bump our ref if we were not in the list before */ 4808c2ecf20Sopenharmony_ci if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags)) 4818c2ecf20Sopenharmony_ci refcount_inc(&rbio->refs); 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci if (!list_empty(&rbio->stripe_cache)){ 4848c2ecf20Sopenharmony_ci list_move(&rbio->stripe_cache, &table->stripe_cache); 4858c2ecf20Sopenharmony_ci } else { 4868c2ecf20Sopenharmony_ci list_add(&rbio->stripe_cache, &table->stripe_cache); 4878c2ecf20Sopenharmony_ci table->cache_size += 1; 4888c2ecf20Sopenharmony_ci } 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci if (table->cache_size > RBIO_CACHE_SIZE) { 4938c2ecf20Sopenharmony_ci struct btrfs_raid_bio *found; 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci found = list_entry(table->stripe_cache.prev, 4968c2ecf20Sopenharmony_ci struct btrfs_raid_bio, 4978c2ecf20Sopenharmony_ci stripe_cache); 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_ci if (found != rbio) 5008c2ecf20Sopenharmony_ci __remove_rbio_from_cache(found); 5018c2ecf20Sopenharmony_ci } 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&table->cache_lock, flags); 5048c2ecf20Sopenharmony_ci} 5058c2ecf20Sopenharmony_ci 5068c2ecf20Sopenharmony_ci/* 5078c2ecf20Sopenharmony_ci * helper function to run the xor_blocks api. It is only 5088c2ecf20Sopenharmony_ci * able to do MAX_XOR_BLOCKS at a time, so we need to 5098c2ecf20Sopenharmony_ci * loop through. 5108c2ecf20Sopenharmony_ci */ 5118c2ecf20Sopenharmony_cistatic void run_xor(void **pages, int src_cnt, ssize_t len) 5128c2ecf20Sopenharmony_ci{ 5138c2ecf20Sopenharmony_ci int src_off = 0; 5148c2ecf20Sopenharmony_ci int xor_src_cnt = 0; 5158c2ecf20Sopenharmony_ci void *dest = pages[src_cnt]; 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci while(src_cnt > 0) { 5188c2ecf20Sopenharmony_ci xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); 5198c2ecf20Sopenharmony_ci xor_blocks(xor_src_cnt, len, dest, pages + src_off); 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci src_cnt -= xor_src_cnt; 5228c2ecf20Sopenharmony_ci src_off += xor_src_cnt; 5238c2ecf20Sopenharmony_ci } 5248c2ecf20Sopenharmony_ci} 5258c2ecf20Sopenharmony_ci 5268c2ecf20Sopenharmony_ci/* 5278c2ecf20Sopenharmony_ci * Returns true if the bio list inside this rbio covers an entire stripe (no 5288c2ecf20Sopenharmony_ci * rmw required). 5298c2ecf20Sopenharmony_ci */ 5308c2ecf20Sopenharmony_cistatic int rbio_is_full(struct btrfs_raid_bio *rbio) 5318c2ecf20Sopenharmony_ci{ 5328c2ecf20Sopenharmony_ci unsigned long flags; 5338c2ecf20Sopenharmony_ci unsigned long size = rbio->bio_list_bytes; 5348c2ecf20Sopenharmony_ci int ret = 1; 5358c2ecf20Sopenharmony_ci 5368c2ecf20Sopenharmony_ci spin_lock_irqsave(&rbio->bio_list_lock, flags); 5378c2ecf20Sopenharmony_ci if (size != rbio->nr_data * rbio->stripe_len) 5388c2ecf20Sopenharmony_ci ret = 0; 5398c2ecf20Sopenharmony_ci BUG_ON(size > rbio->nr_data * rbio->stripe_len); 5408c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rbio->bio_list_lock, flags); 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci return ret; 5438c2ecf20Sopenharmony_ci} 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci/* 5468c2ecf20Sopenharmony_ci * returns 1 if it is safe to merge two rbios together. 5478c2ecf20Sopenharmony_ci * The merging is safe if the two rbios correspond to 5488c2ecf20Sopenharmony_ci * the same stripe and if they are both going in the same 5498c2ecf20Sopenharmony_ci * direction (read vs write), and if neither one is 5508c2ecf20Sopenharmony_ci * locked for final IO 5518c2ecf20Sopenharmony_ci * 5528c2ecf20Sopenharmony_ci * The caller is responsible for locking such that 5538c2ecf20Sopenharmony_ci * rmw_locked is safe to test 5548c2ecf20Sopenharmony_ci */ 5558c2ecf20Sopenharmony_cistatic int rbio_can_merge(struct btrfs_raid_bio *last, 5568c2ecf20Sopenharmony_ci struct btrfs_raid_bio *cur) 5578c2ecf20Sopenharmony_ci{ 5588c2ecf20Sopenharmony_ci if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) || 5598c2ecf20Sopenharmony_ci test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) 5608c2ecf20Sopenharmony_ci return 0; 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci /* 5638c2ecf20Sopenharmony_ci * we can't merge with cached rbios, since the 5648c2ecf20Sopenharmony_ci * idea is that when we merge the destination 5658c2ecf20Sopenharmony_ci * rbio is going to run our IO for us. We can 5668c2ecf20Sopenharmony_ci * steal from cached rbios though, other functions 5678c2ecf20Sopenharmony_ci * handle that. 5688c2ecf20Sopenharmony_ci */ 5698c2ecf20Sopenharmony_ci if (test_bit(RBIO_CACHE_BIT, &last->flags) || 5708c2ecf20Sopenharmony_ci test_bit(RBIO_CACHE_BIT, &cur->flags)) 5718c2ecf20Sopenharmony_ci return 0; 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_ci if (last->bbio->raid_map[0] != 5748c2ecf20Sopenharmony_ci cur->bbio->raid_map[0]) 5758c2ecf20Sopenharmony_ci return 0; 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci /* we can't merge with different operations */ 5788c2ecf20Sopenharmony_ci if (last->operation != cur->operation) 5798c2ecf20Sopenharmony_ci return 0; 5808c2ecf20Sopenharmony_ci /* 5818c2ecf20Sopenharmony_ci * We've need read the full stripe from the drive. 5828c2ecf20Sopenharmony_ci * check and repair the parity and write the new results. 5838c2ecf20Sopenharmony_ci * 5848c2ecf20Sopenharmony_ci * We're not allowed to add any new bios to the 5858c2ecf20Sopenharmony_ci * bio list here, anyone else that wants to 5868c2ecf20Sopenharmony_ci * change this stripe needs to do their own rmw. 5878c2ecf20Sopenharmony_ci */ 5888c2ecf20Sopenharmony_ci if (last->operation == BTRFS_RBIO_PARITY_SCRUB) 5898c2ecf20Sopenharmony_ci return 0; 5908c2ecf20Sopenharmony_ci 5918c2ecf20Sopenharmony_ci if (last->operation == BTRFS_RBIO_REBUILD_MISSING) 5928c2ecf20Sopenharmony_ci return 0; 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ci if (last->operation == BTRFS_RBIO_READ_REBUILD) { 5958c2ecf20Sopenharmony_ci int fa = last->faila; 5968c2ecf20Sopenharmony_ci int fb = last->failb; 5978c2ecf20Sopenharmony_ci int cur_fa = cur->faila; 5988c2ecf20Sopenharmony_ci int cur_fb = cur->failb; 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_ci if (last->faila >= last->failb) { 6018c2ecf20Sopenharmony_ci fa = last->failb; 6028c2ecf20Sopenharmony_ci fb = last->faila; 6038c2ecf20Sopenharmony_ci } 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_ci if (cur->faila >= cur->failb) { 6068c2ecf20Sopenharmony_ci cur_fa = cur->failb; 6078c2ecf20Sopenharmony_ci cur_fb = cur->faila; 6088c2ecf20Sopenharmony_ci } 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_ci if (fa != cur_fa || fb != cur_fb) 6118c2ecf20Sopenharmony_ci return 0; 6128c2ecf20Sopenharmony_ci } 6138c2ecf20Sopenharmony_ci return 1; 6148c2ecf20Sopenharmony_ci} 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_cistatic int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe, 6178c2ecf20Sopenharmony_ci int index) 6188c2ecf20Sopenharmony_ci{ 6198c2ecf20Sopenharmony_ci return stripe * rbio->stripe_npages + index; 6208c2ecf20Sopenharmony_ci} 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_ci/* 6238c2ecf20Sopenharmony_ci * these are just the pages from the rbio array, not from anything 6248c2ecf20Sopenharmony_ci * the FS sent down to us 6258c2ecf20Sopenharmony_ci */ 6268c2ecf20Sopenharmony_cistatic struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, 6278c2ecf20Sopenharmony_ci int index) 6288c2ecf20Sopenharmony_ci{ 6298c2ecf20Sopenharmony_ci return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)]; 6308c2ecf20Sopenharmony_ci} 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_ci/* 6338c2ecf20Sopenharmony_ci * helper to index into the pstripe 6348c2ecf20Sopenharmony_ci */ 6358c2ecf20Sopenharmony_cistatic struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index) 6368c2ecf20Sopenharmony_ci{ 6378c2ecf20Sopenharmony_ci return rbio_stripe_page(rbio, rbio->nr_data, index); 6388c2ecf20Sopenharmony_ci} 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci/* 6418c2ecf20Sopenharmony_ci * helper to index into the qstripe, returns null 6428c2ecf20Sopenharmony_ci * if there is no qstripe 6438c2ecf20Sopenharmony_ci */ 6448c2ecf20Sopenharmony_cistatic struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index) 6458c2ecf20Sopenharmony_ci{ 6468c2ecf20Sopenharmony_ci if (rbio->nr_data + 1 == rbio->real_stripes) 6478c2ecf20Sopenharmony_ci return NULL; 6488c2ecf20Sopenharmony_ci return rbio_stripe_page(rbio, rbio->nr_data + 1, index); 6498c2ecf20Sopenharmony_ci} 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci/* 6528c2ecf20Sopenharmony_ci * The first stripe in the table for a logical address 6538c2ecf20Sopenharmony_ci * has the lock. rbios are added in one of three ways: 6548c2ecf20Sopenharmony_ci * 6558c2ecf20Sopenharmony_ci * 1) Nobody has the stripe locked yet. The rbio is given 6568c2ecf20Sopenharmony_ci * the lock and 0 is returned. The caller must start the IO 6578c2ecf20Sopenharmony_ci * themselves. 6588c2ecf20Sopenharmony_ci * 6598c2ecf20Sopenharmony_ci * 2) Someone has the stripe locked, but we're able to merge 6608c2ecf20Sopenharmony_ci * with the lock owner. The rbio is freed and the IO will 6618c2ecf20Sopenharmony_ci * start automatically along with the existing rbio. 1 is returned. 6628c2ecf20Sopenharmony_ci * 6638c2ecf20Sopenharmony_ci * 3) Someone has the stripe locked, but we're not able to merge. 6648c2ecf20Sopenharmony_ci * The rbio is added to the lock owner's plug list, or merged into 6658c2ecf20Sopenharmony_ci * an rbio already on the plug list. When the lock owner unlocks, 6668c2ecf20Sopenharmony_ci * the next rbio on the list is run and the IO is started automatically. 6678c2ecf20Sopenharmony_ci * 1 is returned 6688c2ecf20Sopenharmony_ci * 6698c2ecf20Sopenharmony_ci * If we return 0, the caller still owns the rbio and must continue with 6708c2ecf20Sopenharmony_ci * IO submission. If we return 1, the caller must assume the rbio has 6718c2ecf20Sopenharmony_ci * already been freed. 6728c2ecf20Sopenharmony_ci */ 6738c2ecf20Sopenharmony_cistatic noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) 6748c2ecf20Sopenharmony_ci{ 6758c2ecf20Sopenharmony_ci struct btrfs_stripe_hash *h; 6768c2ecf20Sopenharmony_ci struct btrfs_raid_bio *cur; 6778c2ecf20Sopenharmony_ci struct btrfs_raid_bio *pending; 6788c2ecf20Sopenharmony_ci unsigned long flags; 6798c2ecf20Sopenharmony_ci struct btrfs_raid_bio *freeit = NULL; 6808c2ecf20Sopenharmony_ci struct btrfs_raid_bio *cache_drop = NULL; 6818c2ecf20Sopenharmony_ci int ret = 0; 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_ci h = rbio->fs_info->stripe_hash_table->table + rbio_bucket(rbio); 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci spin_lock_irqsave(&h->lock, flags); 6868c2ecf20Sopenharmony_ci list_for_each_entry(cur, &h->hash_list, hash_list) { 6878c2ecf20Sopenharmony_ci if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0]) 6888c2ecf20Sopenharmony_ci continue; 6898c2ecf20Sopenharmony_ci 6908c2ecf20Sopenharmony_ci spin_lock(&cur->bio_list_lock); 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci /* Can we steal this cached rbio's pages? */ 6938c2ecf20Sopenharmony_ci if (bio_list_empty(&cur->bio_list) && 6948c2ecf20Sopenharmony_ci list_empty(&cur->plug_list) && 6958c2ecf20Sopenharmony_ci test_bit(RBIO_CACHE_BIT, &cur->flags) && 6968c2ecf20Sopenharmony_ci !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) { 6978c2ecf20Sopenharmony_ci list_del_init(&cur->hash_list); 6988c2ecf20Sopenharmony_ci refcount_dec(&cur->refs); 6998c2ecf20Sopenharmony_ci 7008c2ecf20Sopenharmony_ci steal_rbio(cur, rbio); 7018c2ecf20Sopenharmony_ci cache_drop = cur; 7028c2ecf20Sopenharmony_ci spin_unlock(&cur->bio_list_lock); 7038c2ecf20Sopenharmony_ci 7048c2ecf20Sopenharmony_ci goto lockit; 7058c2ecf20Sopenharmony_ci } 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ci /* Can we merge into the lock owner? */ 7088c2ecf20Sopenharmony_ci if (rbio_can_merge(cur, rbio)) { 7098c2ecf20Sopenharmony_ci merge_rbio(cur, rbio); 7108c2ecf20Sopenharmony_ci spin_unlock(&cur->bio_list_lock); 7118c2ecf20Sopenharmony_ci freeit = rbio; 7128c2ecf20Sopenharmony_ci ret = 1; 7138c2ecf20Sopenharmony_ci goto out; 7148c2ecf20Sopenharmony_ci } 7158c2ecf20Sopenharmony_ci 7168c2ecf20Sopenharmony_ci 7178c2ecf20Sopenharmony_ci /* 7188c2ecf20Sopenharmony_ci * We couldn't merge with the running rbio, see if we can merge 7198c2ecf20Sopenharmony_ci * with the pending ones. We don't have to check for rmw_locked 7208c2ecf20Sopenharmony_ci * because there is no way they are inside finish_rmw right now 7218c2ecf20Sopenharmony_ci */ 7228c2ecf20Sopenharmony_ci list_for_each_entry(pending, &cur->plug_list, plug_list) { 7238c2ecf20Sopenharmony_ci if (rbio_can_merge(pending, rbio)) { 7248c2ecf20Sopenharmony_ci merge_rbio(pending, rbio); 7258c2ecf20Sopenharmony_ci spin_unlock(&cur->bio_list_lock); 7268c2ecf20Sopenharmony_ci freeit = rbio; 7278c2ecf20Sopenharmony_ci ret = 1; 7288c2ecf20Sopenharmony_ci goto out; 7298c2ecf20Sopenharmony_ci } 7308c2ecf20Sopenharmony_ci } 7318c2ecf20Sopenharmony_ci 7328c2ecf20Sopenharmony_ci /* 7338c2ecf20Sopenharmony_ci * No merging, put us on the tail of the plug list, our rbio 7348c2ecf20Sopenharmony_ci * will be started with the currently running rbio unlocks 7358c2ecf20Sopenharmony_ci */ 7368c2ecf20Sopenharmony_ci list_add_tail(&rbio->plug_list, &cur->plug_list); 7378c2ecf20Sopenharmony_ci spin_unlock(&cur->bio_list_lock); 7388c2ecf20Sopenharmony_ci ret = 1; 7398c2ecf20Sopenharmony_ci goto out; 7408c2ecf20Sopenharmony_ci } 7418c2ecf20Sopenharmony_cilockit: 7428c2ecf20Sopenharmony_ci refcount_inc(&rbio->refs); 7438c2ecf20Sopenharmony_ci list_add(&rbio->hash_list, &h->hash_list); 7448c2ecf20Sopenharmony_ciout: 7458c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&h->lock, flags); 7468c2ecf20Sopenharmony_ci if (cache_drop) 7478c2ecf20Sopenharmony_ci remove_rbio_from_cache(cache_drop); 7488c2ecf20Sopenharmony_ci if (freeit) 7498c2ecf20Sopenharmony_ci __free_raid_bio(freeit); 7508c2ecf20Sopenharmony_ci return ret; 7518c2ecf20Sopenharmony_ci} 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_ci/* 7548c2ecf20Sopenharmony_ci * called as rmw or parity rebuild is completed. If the plug list has more 7558c2ecf20Sopenharmony_ci * rbios waiting for this stripe, the next one on the list will be started 7568c2ecf20Sopenharmony_ci */ 7578c2ecf20Sopenharmony_cistatic noinline void unlock_stripe(struct btrfs_raid_bio *rbio) 7588c2ecf20Sopenharmony_ci{ 7598c2ecf20Sopenharmony_ci int bucket; 7608c2ecf20Sopenharmony_ci struct btrfs_stripe_hash *h; 7618c2ecf20Sopenharmony_ci unsigned long flags; 7628c2ecf20Sopenharmony_ci int keep_cache = 0; 7638c2ecf20Sopenharmony_ci 7648c2ecf20Sopenharmony_ci bucket = rbio_bucket(rbio); 7658c2ecf20Sopenharmony_ci h = rbio->fs_info->stripe_hash_table->table + bucket; 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci if (list_empty(&rbio->plug_list)) 7688c2ecf20Sopenharmony_ci cache_rbio(rbio); 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_ci spin_lock_irqsave(&h->lock, flags); 7718c2ecf20Sopenharmony_ci spin_lock(&rbio->bio_list_lock); 7728c2ecf20Sopenharmony_ci 7738c2ecf20Sopenharmony_ci if (!list_empty(&rbio->hash_list)) { 7748c2ecf20Sopenharmony_ci /* 7758c2ecf20Sopenharmony_ci * if we're still cached and there is no other IO 7768c2ecf20Sopenharmony_ci * to perform, just leave this rbio here for others 7778c2ecf20Sopenharmony_ci * to steal from later 7788c2ecf20Sopenharmony_ci */ 7798c2ecf20Sopenharmony_ci if (list_empty(&rbio->plug_list) && 7808c2ecf20Sopenharmony_ci test_bit(RBIO_CACHE_BIT, &rbio->flags)) { 7818c2ecf20Sopenharmony_ci keep_cache = 1; 7828c2ecf20Sopenharmony_ci clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 7838c2ecf20Sopenharmony_ci BUG_ON(!bio_list_empty(&rbio->bio_list)); 7848c2ecf20Sopenharmony_ci goto done; 7858c2ecf20Sopenharmony_ci } 7868c2ecf20Sopenharmony_ci 7878c2ecf20Sopenharmony_ci list_del_init(&rbio->hash_list); 7888c2ecf20Sopenharmony_ci refcount_dec(&rbio->refs); 7898c2ecf20Sopenharmony_ci 7908c2ecf20Sopenharmony_ci /* 7918c2ecf20Sopenharmony_ci * we use the plug list to hold all the rbios 7928c2ecf20Sopenharmony_ci * waiting for the chance to lock this stripe. 7938c2ecf20Sopenharmony_ci * hand the lock over to one of them. 7948c2ecf20Sopenharmony_ci */ 7958c2ecf20Sopenharmony_ci if (!list_empty(&rbio->plug_list)) { 7968c2ecf20Sopenharmony_ci struct btrfs_raid_bio *next; 7978c2ecf20Sopenharmony_ci struct list_head *head = rbio->plug_list.next; 7988c2ecf20Sopenharmony_ci 7998c2ecf20Sopenharmony_ci next = list_entry(head, struct btrfs_raid_bio, 8008c2ecf20Sopenharmony_ci plug_list); 8018c2ecf20Sopenharmony_ci 8028c2ecf20Sopenharmony_ci list_del_init(&rbio->plug_list); 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_ci list_add(&next->hash_list, &h->hash_list); 8058c2ecf20Sopenharmony_ci refcount_inc(&next->refs); 8068c2ecf20Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 8078c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&h->lock, flags); 8088c2ecf20Sopenharmony_ci 8098c2ecf20Sopenharmony_ci if (next->operation == BTRFS_RBIO_READ_REBUILD) 8108c2ecf20Sopenharmony_ci start_async_work(next, read_rebuild_work); 8118c2ecf20Sopenharmony_ci else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) { 8128c2ecf20Sopenharmony_ci steal_rbio(rbio, next); 8138c2ecf20Sopenharmony_ci start_async_work(next, read_rebuild_work); 8148c2ecf20Sopenharmony_ci } else if (next->operation == BTRFS_RBIO_WRITE) { 8158c2ecf20Sopenharmony_ci steal_rbio(rbio, next); 8168c2ecf20Sopenharmony_ci start_async_work(next, rmw_work); 8178c2ecf20Sopenharmony_ci } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { 8188c2ecf20Sopenharmony_ci steal_rbio(rbio, next); 8198c2ecf20Sopenharmony_ci start_async_work(next, scrub_parity_work); 8208c2ecf20Sopenharmony_ci } 8218c2ecf20Sopenharmony_ci 8228c2ecf20Sopenharmony_ci goto done_nolock; 8238c2ecf20Sopenharmony_ci } 8248c2ecf20Sopenharmony_ci } 8258c2ecf20Sopenharmony_cidone: 8268c2ecf20Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 8278c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&h->lock, flags); 8288c2ecf20Sopenharmony_ci 8298c2ecf20Sopenharmony_cidone_nolock: 8308c2ecf20Sopenharmony_ci if (!keep_cache) 8318c2ecf20Sopenharmony_ci remove_rbio_from_cache(rbio); 8328c2ecf20Sopenharmony_ci} 8338c2ecf20Sopenharmony_ci 8348c2ecf20Sopenharmony_cistatic void __free_raid_bio(struct btrfs_raid_bio *rbio) 8358c2ecf20Sopenharmony_ci{ 8368c2ecf20Sopenharmony_ci int i; 8378c2ecf20Sopenharmony_ci 8388c2ecf20Sopenharmony_ci if (!refcount_dec_and_test(&rbio->refs)) 8398c2ecf20Sopenharmony_ci return; 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci WARN_ON(!list_empty(&rbio->stripe_cache)); 8428c2ecf20Sopenharmony_ci WARN_ON(!list_empty(&rbio->hash_list)); 8438c2ecf20Sopenharmony_ci WARN_ON(!bio_list_empty(&rbio->bio_list)); 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_ci for (i = 0; i < rbio->nr_pages; i++) { 8468c2ecf20Sopenharmony_ci if (rbio->stripe_pages[i]) { 8478c2ecf20Sopenharmony_ci __free_page(rbio->stripe_pages[i]); 8488c2ecf20Sopenharmony_ci rbio->stripe_pages[i] = NULL; 8498c2ecf20Sopenharmony_ci } 8508c2ecf20Sopenharmony_ci } 8518c2ecf20Sopenharmony_ci 8528c2ecf20Sopenharmony_ci btrfs_put_bbio(rbio->bbio); 8538c2ecf20Sopenharmony_ci kfree(rbio); 8548c2ecf20Sopenharmony_ci} 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_cistatic void rbio_endio_bio_list(struct bio *cur, blk_status_t err) 8578c2ecf20Sopenharmony_ci{ 8588c2ecf20Sopenharmony_ci struct bio *next; 8598c2ecf20Sopenharmony_ci 8608c2ecf20Sopenharmony_ci while (cur) { 8618c2ecf20Sopenharmony_ci next = cur->bi_next; 8628c2ecf20Sopenharmony_ci cur->bi_next = NULL; 8638c2ecf20Sopenharmony_ci cur->bi_status = err; 8648c2ecf20Sopenharmony_ci bio_endio(cur); 8658c2ecf20Sopenharmony_ci cur = next; 8668c2ecf20Sopenharmony_ci } 8678c2ecf20Sopenharmony_ci} 8688c2ecf20Sopenharmony_ci 8698c2ecf20Sopenharmony_ci/* 8708c2ecf20Sopenharmony_ci * this frees the rbio and runs through all the bios in the 8718c2ecf20Sopenharmony_ci * bio_list and calls end_io on them 8728c2ecf20Sopenharmony_ci */ 8738c2ecf20Sopenharmony_cistatic void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err) 8748c2ecf20Sopenharmony_ci{ 8758c2ecf20Sopenharmony_ci struct bio *cur = bio_list_get(&rbio->bio_list); 8768c2ecf20Sopenharmony_ci struct bio *extra; 8778c2ecf20Sopenharmony_ci 8788c2ecf20Sopenharmony_ci if (rbio->generic_bio_cnt) 8798c2ecf20Sopenharmony_ci btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt); 8808c2ecf20Sopenharmony_ci /* 8818c2ecf20Sopenharmony_ci * Clear the data bitmap, as the rbio may be cached for later usage. 8828c2ecf20Sopenharmony_ci * do this before before unlock_stripe() so there will be no new bio 8838c2ecf20Sopenharmony_ci * for this bio. 8848c2ecf20Sopenharmony_ci */ 8858c2ecf20Sopenharmony_ci bitmap_clear(rbio->dbitmap, 0, rbio->stripe_npages); 8868c2ecf20Sopenharmony_ci 8878c2ecf20Sopenharmony_ci /* 8888c2ecf20Sopenharmony_ci * At this moment, rbio->bio_list is empty, however since rbio does not 8898c2ecf20Sopenharmony_ci * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the 8908c2ecf20Sopenharmony_ci * hash list, rbio may be merged with others so that rbio->bio_list 8918c2ecf20Sopenharmony_ci * becomes non-empty. 8928c2ecf20Sopenharmony_ci * Once unlock_stripe() is done, rbio->bio_list will not be updated any 8938c2ecf20Sopenharmony_ci * more and we can call bio_endio() on all queued bios. 8948c2ecf20Sopenharmony_ci */ 8958c2ecf20Sopenharmony_ci unlock_stripe(rbio); 8968c2ecf20Sopenharmony_ci extra = bio_list_get(&rbio->bio_list); 8978c2ecf20Sopenharmony_ci __free_raid_bio(rbio); 8988c2ecf20Sopenharmony_ci 8998c2ecf20Sopenharmony_ci rbio_endio_bio_list(cur, err); 9008c2ecf20Sopenharmony_ci if (extra) 9018c2ecf20Sopenharmony_ci rbio_endio_bio_list(extra, err); 9028c2ecf20Sopenharmony_ci} 9038c2ecf20Sopenharmony_ci 9048c2ecf20Sopenharmony_ci/* 9058c2ecf20Sopenharmony_ci * end io function used by finish_rmw. When we finally 9068c2ecf20Sopenharmony_ci * get here, we've written a full stripe 9078c2ecf20Sopenharmony_ci */ 9088c2ecf20Sopenharmony_cistatic void raid_write_end_io(struct bio *bio) 9098c2ecf20Sopenharmony_ci{ 9108c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio = bio->bi_private; 9118c2ecf20Sopenharmony_ci blk_status_t err = bio->bi_status; 9128c2ecf20Sopenharmony_ci int max_errors; 9138c2ecf20Sopenharmony_ci 9148c2ecf20Sopenharmony_ci if (err) 9158c2ecf20Sopenharmony_ci fail_bio_stripe(rbio, bio); 9168c2ecf20Sopenharmony_ci 9178c2ecf20Sopenharmony_ci bio_put(bio); 9188c2ecf20Sopenharmony_ci 9198c2ecf20Sopenharmony_ci if (!atomic_dec_and_test(&rbio->stripes_pending)) 9208c2ecf20Sopenharmony_ci return; 9218c2ecf20Sopenharmony_ci 9228c2ecf20Sopenharmony_ci err = BLK_STS_OK; 9238c2ecf20Sopenharmony_ci 9248c2ecf20Sopenharmony_ci /* OK, we have read all the stripes we need to. */ 9258c2ecf20Sopenharmony_ci max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ? 9268c2ecf20Sopenharmony_ci 0 : rbio->bbio->max_errors; 9278c2ecf20Sopenharmony_ci if (atomic_read(&rbio->error) > max_errors) 9288c2ecf20Sopenharmony_ci err = BLK_STS_IOERR; 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, err); 9318c2ecf20Sopenharmony_ci} 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_ci/* 9348c2ecf20Sopenharmony_ci * the read/modify/write code wants to use the original bio for 9358c2ecf20Sopenharmony_ci * any pages it included, and then use the rbio for everything 9368c2ecf20Sopenharmony_ci * else. This function decides if a given index (stripe number) 9378c2ecf20Sopenharmony_ci * and page number in that stripe fall inside the original bio 9388c2ecf20Sopenharmony_ci * or the rbio. 9398c2ecf20Sopenharmony_ci * 9408c2ecf20Sopenharmony_ci * if you set bio_list_only, you'll get a NULL back for any ranges 9418c2ecf20Sopenharmony_ci * that are outside the bio_list 9428c2ecf20Sopenharmony_ci * 9438c2ecf20Sopenharmony_ci * This doesn't take any refs on anything, you get a bare page pointer 9448c2ecf20Sopenharmony_ci * and the caller must bump refs as required. 9458c2ecf20Sopenharmony_ci * 9468c2ecf20Sopenharmony_ci * You must call index_rbio_pages once before you can trust 9478c2ecf20Sopenharmony_ci * the answers from this function. 9488c2ecf20Sopenharmony_ci */ 9498c2ecf20Sopenharmony_cistatic struct page *page_in_rbio(struct btrfs_raid_bio *rbio, 9508c2ecf20Sopenharmony_ci int index, int pagenr, int bio_list_only) 9518c2ecf20Sopenharmony_ci{ 9528c2ecf20Sopenharmony_ci int chunk_page; 9538c2ecf20Sopenharmony_ci struct page *p = NULL; 9548c2ecf20Sopenharmony_ci 9558c2ecf20Sopenharmony_ci chunk_page = index * (rbio->stripe_len >> PAGE_SHIFT) + pagenr; 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_ci spin_lock_irq(&rbio->bio_list_lock); 9588c2ecf20Sopenharmony_ci p = rbio->bio_pages[chunk_page]; 9598c2ecf20Sopenharmony_ci spin_unlock_irq(&rbio->bio_list_lock); 9608c2ecf20Sopenharmony_ci 9618c2ecf20Sopenharmony_ci if (p || bio_list_only) 9628c2ecf20Sopenharmony_ci return p; 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci return rbio->stripe_pages[chunk_page]; 9658c2ecf20Sopenharmony_ci} 9668c2ecf20Sopenharmony_ci 9678c2ecf20Sopenharmony_ci/* 9688c2ecf20Sopenharmony_ci * number of pages we need for the entire stripe across all the 9698c2ecf20Sopenharmony_ci * drives 9708c2ecf20Sopenharmony_ci */ 9718c2ecf20Sopenharmony_cistatic unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes) 9728c2ecf20Sopenharmony_ci{ 9738c2ecf20Sopenharmony_ci return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes; 9748c2ecf20Sopenharmony_ci} 9758c2ecf20Sopenharmony_ci 9768c2ecf20Sopenharmony_ci/* 9778c2ecf20Sopenharmony_ci * allocation and initial setup for the btrfs_raid_bio. Not 9788c2ecf20Sopenharmony_ci * this does not allocate any pages for rbio->pages. 9798c2ecf20Sopenharmony_ci */ 9808c2ecf20Sopenharmony_cistatic struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, 9818c2ecf20Sopenharmony_ci struct btrfs_bio *bbio, 9828c2ecf20Sopenharmony_ci u64 stripe_len) 9838c2ecf20Sopenharmony_ci{ 9848c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio; 9858c2ecf20Sopenharmony_ci int nr_data = 0; 9868c2ecf20Sopenharmony_ci int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; 9878c2ecf20Sopenharmony_ci int num_pages = rbio_nr_pages(stripe_len, real_stripes); 9888c2ecf20Sopenharmony_ci int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE); 9898c2ecf20Sopenharmony_ci void *p; 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_ci rbio = kzalloc(sizeof(*rbio) + 9928c2ecf20Sopenharmony_ci sizeof(*rbio->stripe_pages) * num_pages + 9938c2ecf20Sopenharmony_ci sizeof(*rbio->bio_pages) * num_pages + 9948c2ecf20Sopenharmony_ci sizeof(*rbio->finish_pointers) * real_stripes + 9958c2ecf20Sopenharmony_ci sizeof(*rbio->dbitmap) * BITS_TO_LONGS(stripe_npages) + 9968c2ecf20Sopenharmony_ci sizeof(*rbio->finish_pbitmap) * 9978c2ecf20Sopenharmony_ci BITS_TO_LONGS(stripe_npages), 9988c2ecf20Sopenharmony_ci GFP_NOFS); 9998c2ecf20Sopenharmony_ci if (!rbio) 10008c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 10018c2ecf20Sopenharmony_ci 10028c2ecf20Sopenharmony_ci bio_list_init(&rbio->bio_list); 10038c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&rbio->plug_list); 10048c2ecf20Sopenharmony_ci spin_lock_init(&rbio->bio_list_lock); 10058c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&rbio->stripe_cache); 10068c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&rbio->hash_list); 10078c2ecf20Sopenharmony_ci rbio->bbio = bbio; 10088c2ecf20Sopenharmony_ci rbio->fs_info = fs_info; 10098c2ecf20Sopenharmony_ci rbio->stripe_len = stripe_len; 10108c2ecf20Sopenharmony_ci rbio->nr_pages = num_pages; 10118c2ecf20Sopenharmony_ci rbio->real_stripes = real_stripes; 10128c2ecf20Sopenharmony_ci rbio->stripe_npages = stripe_npages; 10138c2ecf20Sopenharmony_ci rbio->faila = -1; 10148c2ecf20Sopenharmony_ci rbio->failb = -1; 10158c2ecf20Sopenharmony_ci refcount_set(&rbio->refs, 1); 10168c2ecf20Sopenharmony_ci atomic_set(&rbio->error, 0); 10178c2ecf20Sopenharmony_ci atomic_set(&rbio->stripes_pending, 0); 10188c2ecf20Sopenharmony_ci 10198c2ecf20Sopenharmony_ci /* 10208c2ecf20Sopenharmony_ci * the stripe_pages, bio_pages, etc arrays point to the extra 10218c2ecf20Sopenharmony_ci * memory we allocated past the end of the rbio 10228c2ecf20Sopenharmony_ci */ 10238c2ecf20Sopenharmony_ci p = rbio + 1; 10248c2ecf20Sopenharmony_ci#define CONSUME_ALLOC(ptr, count) do { \ 10258c2ecf20Sopenharmony_ci ptr = p; \ 10268c2ecf20Sopenharmony_ci p = (unsigned char *)p + sizeof(*(ptr)) * (count); \ 10278c2ecf20Sopenharmony_ci } while (0) 10288c2ecf20Sopenharmony_ci CONSUME_ALLOC(rbio->stripe_pages, num_pages); 10298c2ecf20Sopenharmony_ci CONSUME_ALLOC(rbio->bio_pages, num_pages); 10308c2ecf20Sopenharmony_ci CONSUME_ALLOC(rbio->finish_pointers, real_stripes); 10318c2ecf20Sopenharmony_ci CONSUME_ALLOC(rbio->dbitmap, BITS_TO_LONGS(stripe_npages)); 10328c2ecf20Sopenharmony_ci CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages)); 10338c2ecf20Sopenharmony_ci#undef CONSUME_ALLOC 10348c2ecf20Sopenharmony_ci 10358c2ecf20Sopenharmony_ci if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5) 10368c2ecf20Sopenharmony_ci nr_data = real_stripes - 1; 10378c2ecf20Sopenharmony_ci else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) 10388c2ecf20Sopenharmony_ci nr_data = real_stripes - 2; 10398c2ecf20Sopenharmony_ci else 10408c2ecf20Sopenharmony_ci BUG(); 10418c2ecf20Sopenharmony_ci 10428c2ecf20Sopenharmony_ci rbio->nr_data = nr_data; 10438c2ecf20Sopenharmony_ci return rbio; 10448c2ecf20Sopenharmony_ci} 10458c2ecf20Sopenharmony_ci 10468c2ecf20Sopenharmony_ci/* allocate pages for all the stripes in the bio, including parity */ 10478c2ecf20Sopenharmony_cistatic int alloc_rbio_pages(struct btrfs_raid_bio *rbio) 10488c2ecf20Sopenharmony_ci{ 10498c2ecf20Sopenharmony_ci int i; 10508c2ecf20Sopenharmony_ci struct page *page; 10518c2ecf20Sopenharmony_ci 10528c2ecf20Sopenharmony_ci for (i = 0; i < rbio->nr_pages; i++) { 10538c2ecf20Sopenharmony_ci if (rbio->stripe_pages[i]) 10548c2ecf20Sopenharmony_ci continue; 10558c2ecf20Sopenharmony_ci page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 10568c2ecf20Sopenharmony_ci if (!page) 10578c2ecf20Sopenharmony_ci return -ENOMEM; 10588c2ecf20Sopenharmony_ci rbio->stripe_pages[i] = page; 10598c2ecf20Sopenharmony_ci } 10608c2ecf20Sopenharmony_ci return 0; 10618c2ecf20Sopenharmony_ci} 10628c2ecf20Sopenharmony_ci 10638c2ecf20Sopenharmony_ci/* only allocate pages for p/q stripes */ 10648c2ecf20Sopenharmony_cistatic int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio) 10658c2ecf20Sopenharmony_ci{ 10668c2ecf20Sopenharmony_ci int i; 10678c2ecf20Sopenharmony_ci struct page *page; 10688c2ecf20Sopenharmony_ci 10698c2ecf20Sopenharmony_ci i = rbio_stripe_page_index(rbio, rbio->nr_data, 0); 10708c2ecf20Sopenharmony_ci 10718c2ecf20Sopenharmony_ci for (; i < rbio->nr_pages; i++) { 10728c2ecf20Sopenharmony_ci if (rbio->stripe_pages[i]) 10738c2ecf20Sopenharmony_ci continue; 10748c2ecf20Sopenharmony_ci page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 10758c2ecf20Sopenharmony_ci if (!page) 10768c2ecf20Sopenharmony_ci return -ENOMEM; 10778c2ecf20Sopenharmony_ci rbio->stripe_pages[i] = page; 10788c2ecf20Sopenharmony_ci } 10798c2ecf20Sopenharmony_ci return 0; 10808c2ecf20Sopenharmony_ci} 10818c2ecf20Sopenharmony_ci 10828c2ecf20Sopenharmony_ci/* 10838c2ecf20Sopenharmony_ci * add a single page from a specific stripe into our list of bios for IO 10848c2ecf20Sopenharmony_ci * this will try to merge into existing bios if possible, and returns 10858c2ecf20Sopenharmony_ci * zero if all went well. 10868c2ecf20Sopenharmony_ci */ 10878c2ecf20Sopenharmony_cistatic int rbio_add_io_page(struct btrfs_raid_bio *rbio, 10888c2ecf20Sopenharmony_ci struct bio_list *bio_list, 10898c2ecf20Sopenharmony_ci struct page *page, 10908c2ecf20Sopenharmony_ci int stripe_nr, 10918c2ecf20Sopenharmony_ci unsigned long page_index, 10928c2ecf20Sopenharmony_ci unsigned long bio_max_len) 10938c2ecf20Sopenharmony_ci{ 10948c2ecf20Sopenharmony_ci struct bio *last = bio_list->tail; 10958c2ecf20Sopenharmony_ci int ret; 10968c2ecf20Sopenharmony_ci struct bio *bio; 10978c2ecf20Sopenharmony_ci struct btrfs_bio_stripe *stripe; 10988c2ecf20Sopenharmony_ci u64 disk_start; 10998c2ecf20Sopenharmony_ci 11008c2ecf20Sopenharmony_ci stripe = &rbio->bbio->stripes[stripe_nr]; 11018c2ecf20Sopenharmony_ci disk_start = stripe->physical + (page_index << PAGE_SHIFT); 11028c2ecf20Sopenharmony_ci 11038c2ecf20Sopenharmony_ci /* if the device is missing, just fail this stripe */ 11048c2ecf20Sopenharmony_ci if (!stripe->dev->bdev) 11058c2ecf20Sopenharmony_ci return fail_rbio_index(rbio, stripe_nr); 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_ci /* see if we can add this page onto our existing bio */ 11088c2ecf20Sopenharmony_ci if (last) { 11098c2ecf20Sopenharmony_ci u64 last_end = (u64)last->bi_iter.bi_sector << 9; 11108c2ecf20Sopenharmony_ci last_end += last->bi_iter.bi_size; 11118c2ecf20Sopenharmony_ci 11128c2ecf20Sopenharmony_ci /* 11138c2ecf20Sopenharmony_ci * we can't merge these if they are from different 11148c2ecf20Sopenharmony_ci * devices or if they are not contiguous 11158c2ecf20Sopenharmony_ci */ 11168c2ecf20Sopenharmony_ci if (last_end == disk_start && !last->bi_status && 11178c2ecf20Sopenharmony_ci last->bi_disk == stripe->dev->bdev->bd_disk && 11188c2ecf20Sopenharmony_ci last->bi_partno == stripe->dev->bdev->bd_partno) { 11198c2ecf20Sopenharmony_ci ret = bio_add_page(last, page, PAGE_SIZE, 0); 11208c2ecf20Sopenharmony_ci if (ret == PAGE_SIZE) 11218c2ecf20Sopenharmony_ci return 0; 11228c2ecf20Sopenharmony_ci } 11238c2ecf20Sopenharmony_ci } 11248c2ecf20Sopenharmony_ci 11258c2ecf20Sopenharmony_ci /* put a new bio on the list */ 11268c2ecf20Sopenharmony_ci bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1); 11278c2ecf20Sopenharmony_ci btrfs_io_bio(bio)->device = stripe->dev; 11288c2ecf20Sopenharmony_ci bio->bi_iter.bi_size = 0; 11298c2ecf20Sopenharmony_ci bio_set_dev(bio, stripe->dev->bdev); 11308c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = disk_start >> 9; 11318c2ecf20Sopenharmony_ci 11328c2ecf20Sopenharmony_ci bio_add_page(bio, page, PAGE_SIZE, 0); 11338c2ecf20Sopenharmony_ci bio_list_add(bio_list, bio); 11348c2ecf20Sopenharmony_ci return 0; 11358c2ecf20Sopenharmony_ci} 11368c2ecf20Sopenharmony_ci 11378c2ecf20Sopenharmony_ci/* 11388c2ecf20Sopenharmony_ci * while we're doing the read/modify/write cycle, we could 11398c2ecf20Sopenharmony_ci * have errors in reading pages off the disk. This checks 11408c2ecf20Sopenharmony_ci * for errors and if we're not able to read the page it'll 11418c2ecf20Sopenharmony_ci * trigger parity reconstruction. The rmw will be finished 11428c2ecf20Sopenharmony_ci * after we've reconstructed the failed stripes 11438c2ecf20Sopenharmony_ci */ 11448c2ecf20Sopenharmony_cistatic void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio) 11458c2ecf20Sopenharmony_ci{ 11468c2ecf20Sopenharmony_ci if (rbio->faila >= 0 || rbio->failb >= 0) { 11478c2ecf20Sopenharmony_ci BUG_ON(rbio->faila == rbio->real_stripes - 1); 11488c2ecf20Sopenharmony_ci __raid56_parity_recover(rbio); 11498c2ecf20Sopenharmony_ci } else { 11508c2ecf20Sopenharmony_ci finish_rmw(rbio); 11518c2ecf20Sopenharmony_ci } 11528c2ecf20Sopenharmony_ci} 11538c2ecf20Sopenharmony_ci 11548c2ecf20Sopenharmony_ci/* 11558c2ecf20Sopenharmony_ci * helper function to walk our bio list and populate the bio_pages array with 11568c2ecf20Sopenharmony_ci * the result. This seems expensive, but it is faster than constantly 11578c2ecf20Sopenharmony_ci * searching through the bio list as we setup the IO in finish_rmw or stripe 11588c2ecf20Sopenharmony_ci * reconstruction. 11598c2ecf20Sopenharmony_ci * 11608c2ecf20Sopenharmony_ci * This must be called before you trust the answers from page_in_rbio 11618c2ecf20Sopenharmony_ci */ 11628c2ecf20Sopenharmony_cistatic void index_rbio_pages(struct btrfs_raid_bio *rbio) 11638c2ecf20Sopenharmony_ci{ 11648c2ecf20Sopenharmony_ci struct bio *bio; 11658c2ecf20Sopenharmony_ci u64 start; 11668c2ecf20Sopenharmony_ci unsigned long stripe_offset; 11678c2ecf20Sopenharmony_ci unsigned long page_index; 11688c2ecf20Sopenharmony_ci 11698c2ecf20Sopenharmony_ci spin_lock_irq(&rbio->bio_list_lock); 11708c2ecf20Sopenharmony_ci bio_list_for_each(bio, &rbio->bio_list) { 11718c2ecf20Sopenharmony_ci struct bio_vec bvec; 11728c2ecf20Sopenharmony_ci struct bvec_iter iter; 11738c2ecf20Sopenharmony_ci int i = 0; 11748c2ecf20Sopenharmony_ci 11758c2ecf20Sopenharmony_ci start = (u64)bio->bi_iter.bi_sector << 9; 11768c2ecf20Sopenharmony_ci stripe_offset = start - rbio->bbio->raid_map[0]; 11778c2ecf20Sopenharmony_ci page_index = stripe_offset >> PAGE_SHIFT; 11788c2ecf20Sopenharmony_ci 11798c2ecf20Sopenharmony_ci if (bio_flagged(bio, BIO_CLONED)) 11808c2ecf20Sopenharmony_ci bio->bi_iter = btrfs_io_bio(bio)->iter; 11818c2ecf20Sopenharmony_ci 11828c2ecf20Sopenharmony_ci bio_for_each_segment(bvec, bio, iter) { 11838c2ecf20Sopenharmony_ci rbio->bio_pages[page_index + i] = bvec.bv_page; 11848c2ecf20Sopenharmony_ci i++; 11858c2ecf20Sopenharmony_ci } 11868c2ecf20Sopenharmony_ci } 11878c2ecf20Sopenharmony_ci spin_unlock_irq(&rbio->bio_list_lock); 11888c2ecf20Sopenharmony_ci} 11898c2ecf20Sopenharmony_ci 11908c2ecf20Sopenharmony_ci/* 11918c2ecf20Sopenharmony_ci * this is called from one of two situations. We either 11928c2ecf20Sopenharmony_ci * have a full stripe from the higher layers, or we've read all 11938c2ecf20Sopenharmony_ci * the missing bits off disk. 11948c2ecf20Sopenharmony_ci * 11958c2ecf20Sopenharmony_ci * This will calculate the parity and then send down any 11968c2ecf20Sopenharmony_ci * changed blocks. 11978c2ecf20Sopenharmony_ci */ 11988c2ecf20Sopenharmony_cistatic noinline void finish_rmw(struct btrfs_raid_bio *rbio) 11998c2ecf20Sopenharmony_ci{ 12008c2ecf20Sopenharmony_ci struct btrfs_bio *bbio = rbio->bbio; 12018c2ecf20Sopenharmony_ci void **pointers = rbio->finish_pointers; 12028c2ecf20Sopenharmony_ci int nr_data = rbio->nr_data; 12038c2ecf20Sopenharmony_ci int stripe; 12048c2ecf20Sopenharmony_ci int pagenr; 12058c2ecf20Sopenharmony_ci bool has_qstripe; 12068c2ecf20Sopenharmony_ci struct bio_list bio_list; 12078c2ecf20Sopenharmony_ci struct bio *bio; 12088c2ecf20Sopenharmony_ci int ret; 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci bio_list_init(&bio_list); 12118c2ecf20Sopenharmony_ci 12128c2ecf20Sopenharmony_ci if (rbio->real_stripes - rbio->nr_data == 1) 12138c2ecf20Sopenharmony_ci has_qstripe = false; 12148c2ecf20Sopenharmony_ci else if (rbio->real_stripes - rbio->nr_data == 2) 12158c2ecf20Sopenharmony_ci has_qstripe = true; 12168c2ecf20Sopenharmony_ci else 12178c2ecf20Sopenharmony_ci BUG(); 12188c2ecf20Sopenharmony_ci 12198c2ecf20Sopenharmony_ci /* We should have at least one data sector. */ 12208c2ecf20Sopenharmony_ci ASSERT(bitmap_weight(rbio->dbitmap, rbio->stripe_npages)); 12218c2ecf20Sopenharmony_ci 12228c2ecf20Sopenharmony_ci /* at this point we either have a full stripe, 12238c2ecf20Sopenharmony_ci * or we've read the full stripe from the drive. 12248c2ecf20Sopenharmony_ci * recalculate the parity and write the new results. 12258c2ecf20Sopenharmony_ci * 12268c2ecf20Sopenharmony_ci * We're not allowed to add any new bios to the 12278c2ecf20Sopenharmony_ci * bio list here, anyone else that wants to 12288c2ecf20Sopenharmony_ci * change this stripe needs to do their own rmw. 12298c2ecf20Sopenharmony_ci */ 12308c2ecf20Sopenharmony_ci spin_lock_irq(&rbio->bio_list_lock); 12318c2ecf20Sopenharmony_ci set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 12328c2ecf20Sopenharmony_ci spin_unlock_irq(&rbio->bio_list_lock); 12338c2ecf20Sopenharmony_ci 12348c2ecf20Sopenharmony_ci atomic_set(&rbio->error, 0); 12358c2ecf20Sopenharmony_ci 12368c2ecf20Sopenharmony_ci /* 12378c2ecf20Sopenharmony_ci * now that we've set rmw_locked, run through the 12388c2ecf20Sopenharmony_ci * bio list one last time and map the page pointers 12398c2ecf20Sopenharmony_ci * 12408c2ecf20Sopenharmony_ci * We don't cache full rbios because we're assuming 12418c2ecf20Sopenharmony_ci * the higher layers are unlikely to use this area of 12428c2ecf20Sopenharmony_ci * the disk again soon. If they do use it again, 12438c2ecf20Sopenharmony_ci * hopefully they will send another full bio. 12448c2ecf20Sopenharmony_ci */ 12458c2ecf20Sopenharmony_ci index_rbio_pages(rbio); 12468c2ecf20Sopenharmony_ci if (!rbio_is_full(rbio)) 12478c2ecf20Sopenharmony_ci cache_rbio_pages(rbio); 12488c2ecf20Sopenharmony_ci else 12498c2ecf20Sopenharmony_ci clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 12508c2ecf20Sopenharmony_ci 12518c2ecf20Sopenharmony_ci for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 12528c2ecf20Sopenharmony_ci struct page *p; 12538c2ecf20Sopenharmony_ci /* first collect one page from each data stripe */ 12548c2ecf20Sopenharmony_ci for (stripe = 0; stripe < nr_data; stripe++) { 12558c2ecf20Sopenharmony_ci p = page_in_rbio(rbio, stripe, pagenr, 0); 12568c2ecf20Sopenharmony_ci pointers[stripe] = kmap(p); 12578c2ecf20Sopenharmony_ci } 12588c2ecf20Sopenharmony_ci 12598c2ecf20Sopenharmony_ci /* then add the parity stripe */ 12608c2ecf20Sopenharmony_ci p = rbio_pstripe_page(rbio, pagenr); 12618c2ecf20Sopenharmony_ci SetPageUptodate(p); 12628c2ecf20Sopenharmony_ci pointers[stripe++] = kmap(p); 12638c2ecf20Sopenharmony_ci 12648c2ecf20Sopenharmony_ci if (has_qstripe) { 12658c2ecf20Sopenharmony_ci 12668c2ecf20Sopenharmony_ci /* 12678c2ecf20Sopenharmony_ci * raid6, add the qstripe and call the 12688c2ecf20Sopenharmony_ci * library function to fill in our p/q 12698c2ecf20Sopenharmony_ci */ 12708c2ecf20Sopenharmony_ci p = rbio_qstripe_page(rbio, pagenr); 12718c2ecf20Sopenharmony_ci SetPageUptodate(p); 12728c2ecf20Sopenharmony_ci pointers[stripe++] = kmap(p); 12738c2ecf20Sopenharmony_ci 12748c2ecf20Sopenharmony_ci raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, 12758c2ecf20Sopenharmony_ci pointers); 12768c2ecf20Sopenharmony_ci } else { 12778c2ecf20Sopenharmony_ci /* raid5 */ 12788c2ecf20Sopenharmony_ci copy_page(pointers[nr_data], pointers[0]); 12798c2ecf20Sopenharmony_ci run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); 12808c2ecf20Sopenharmony_ci } 12818c2ecf20Sopenharmony_ci 12828c2ecf20Sopenharmony_ci 12838c2ecf20Sopenharmony_ci for (stripe = 0; stripe < rbio->real_stripes; stripe++) 12848c2ecf20Sopenharmony_ci kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); 12858c2ecf20Sopenharmony_ci } 12868c2ecf20Sopenharmony_ci 12878c2ecf20Sopenharmony_ci /* 12888c2ecf20Sopenharmony_ci * time to start writing. Make bios for everything from the 12898c2ecf20Sopenharmony_ci * higher layers (the bio_list in our rbio) and our p/q. Ignore 12908c2ecf20Sopenharmony_ci * everything else. 12918c2ecf20Sopenharmony_ci */ 12928c2ecf20Sopenharmony_ci for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 12938c2ecf20Sopenharmony_ci for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 12948c2ecf20Sopenharmony_ci struct page *page; 12958c2ecf20Sopenharmony_ci 12968c2ecf20Sopenharmony_ci /* This vertical stripe has no data, skip it. */ 12978c2ecf20Sopenharmony_ci if (!test_bit(pagenr, rbio->dbitmap)) 12988c2ecf20Sopenharmony_ci continue; 12998c2ecf20Sopenharmony_ci 13008c2ecf20Sopenharmony_ci if (stripe < rbio->nr_data) { 13018c2ecf20Sopenharmony_ci page = page_in_rbio(rbio, stripe, pagenr, 1); 13028c2ecf20Sopenharmony_ci if (!page) 13038c2ecf20Sopenharmony_ci continue; 13048c2ecf20Sopenharmony_ci } else { 13058c2ecf20Sopenharmony_ci page = rbio_stripe_page(rbio, stripe, pagenr); 13068c2ecf20Sopenharmony_ci } 13078c2ecf20Sopenharmony_ci 13088c2ecf20Sopenharmony_ci ret = rbio_add_io_page(rbio, &bio_list, 13098c2ecf20Sopenharmony_ci page, stripe, pagenr, rbio->stripe_len); 13108c2ecf20Sopenharmony_ci if (ret) 13118c2ecf20Sopenharmony_ci goto cleanup; 13128c2ecf20Sopenharmony_ci } 13138c2ecf20Sopenharmony_ci } 13148c2ecf20Sopenharmony_ci 13158c2ecf20Sopenharmony_ci if (likely(!bbio->num_tgtdevs)) 13168c2ecf20Sopenharmony_ci goto write_data; 13178c2ecf20Sopenharmony_ci 13188c2ecf20Sopenharmony_ci for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 13198c2ecf20Sopenharmony_ci if (!bbio->tgtdev_map[stripe]) 13208c2ecf20Sopenharmony_ci continue; 13218c2ecf20Sopenharmony_ci 13228c2ecf20Sopenharmony_ci for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 13238c2ecf20Sopenharmony_ci struct page *page; 13248c2ecf20Sopenharmony_ci 13258c2ecf20Sopenharmony_ci /* This vertical stripe has no data, skip it. */ 13268c2ecf20Sopenharmony_ci if (!test_bit(pagenr, rbio->dbitmap)) 13278c2ecf20Sopenharmony_ci continue; 13288c2ecf20Sopenharmony_ci 13298c2ecf20Sopenharmony_ci if (stripe < rbio->nr_data) { 13308c2ecf20Sopenharmony_ci page = page_in_rbio(rbio, stripe, pagenr, 1); 13318c2ecf20Sopenharmony_ci if (!page) 13328c2ecf20Sopenharmony_ci continue; 13338c2ecf20Sopenharmony_ci } else { 13348c2ecf20Sopenharmony_ci page = rbio_stripe_page(rbio, stripe, pagenr); 13358c2ecf20Sopenharmony_ci } 13368c2ecf20Sopenharmony_ci 13378c2ecf20Sopenharmony_ci ret = rbio_add_io_page(rbio, &bio_list, page, 13388c2ecf20Sopenharmony_ci rbio->bbio->tgtdev_map[stripe], 13398c2ecf20Sopenharmony_ci pagenr, rbio->stripe_len); 13408c2ecf20Sopenharmony_ci if (ret) 13418c2ecf20Sopenharmony_ci goto cleanup; 13428c2ecf20Sopenharmony_ci } 13438c2ecf20Sopenharmony_ci } 13448c2ecf20Sopenharmony_ci 13458c2ecf20Sopenharmony_ciwrite_data: 13468c2ecf20Sopenharmony_ci atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list)); 13478c2ecf20Sopenharmony_ci BUG_ON(atomic_read(&rbio->stripes_pending) == 0); 13488c2ecf20Sopenharmony_ci 13498c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&bio_list))) { 13508c2ecf20Sopenharmony_ci bio->bi_private = rbio; 13518c2ecf20Sopenharmony_ci bio->bi_end_io = raid_write_end_io; 13528c2ecf20Sopenharmony_ci bio->bi_opf = REQ_OP_WRITE; 13538c2ecf20Sopenharmony_ci 13548c2ecf20Sopenharmony_ci submit_bio(bio); 13558c2ecf20Sopenharmony_ci } 13568c2ecf20Sopenharmony_ci return; 13578c2ecf20Sopenharmony_ci 13588c2ecf20Sopenharmony_cicleanup: 13598c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, BLK_STS_IOERR); 13608c2ecf20Sopenharmony_ci 13618c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&bio_list))) 13628c2ecf20Sopenharmony_ci bio_put(bio); 13638c2ecf20Sopenharmony_ci} 13648c2ecf20Sopenharmony_ci 13658c2ecf20Sopenharmony_ci/* 13668c2ecf20Sopenharmony_ci * helper to find the stripe number for a given bio. Used to figure out which 13678c2ecf20Sopenharmony_ci * stripe has failed. This expects the bio to correspond to a physical disk, 13688c2ecf20Sopenharmony_ci * so it looks up based on physical sector numbers. 13698c2ecf20Sopenharmony_ci */ 13708c2ecf20Sopenharmony_cistatic int find_bio_stripe(struct btrfs_raid_bio *rbio, 13718c2ecf20Sopenharmony_ci struct bio *bio) 13728c2ecf20Sopenharmony_ci{ 13738c2ecf20Sopenharmony_ci u64 physical = bio->bi_iter.bi_sector; 13748c2ecf20Sopenharmony_ci int i; 13758c2ecf20Sopenharmony_ci struct btrfs_bio_stripe *stripe; 13768c2ecf20Sopenharmony_ci 13778c2ecf20Sopenharmony_ci physical <<= 9; 13788c2ecf20Sopenharmony_ci 13798c2ecf20Sopenharmony_ci for (i = 0; i < rbio->bbio->num_stripes; i++) { 13808c2ecf20Sopenharmony_ci stripe = &rbio->bbio->stripes[i]; 13818c2ecf20Sopenharmony_ci if (in_range(physical, stripe->physical, rbio->stripe_len) && 13828c2ecf20Sopenharmony_ci stripe->dev->bdev && 13838c2ecf20Sopenharmony_ci bio->bi_disk == stripe->dev->bdev->bd_disk && 13848c2ecf20Sopenharmony_ci bio->bi_partno == stripe->dev->bdev->bd_partno) { 13858c2ecf20Sopenharmony_ci return i; 13868c2ecf20Sopenharmony_ci } 13878c2ecf20Sopenharmony_ci } 13888c2ecf20Sopenharmony_ci return -1; 13898c2ecf20Sopenharmony_ci} 13908c2ecf20Sopenharmony_ci 13918c2ecf20Sopenharmony_ci/* 13928c2ecf20Sopenharmony_ci * helper to find the stripe number for a given 13938c2ecf20Sopenharmony_ci * bio (before mapping). Used to figure out which stripe has 13948c2ecf20Sopenharmony_ci * failed. This looks up based on logical block numbers. 13958c2ecf20Sopenharmony_ci */ 13968c2ecf20Sopenharmony_cistatic int find_logical_bio_stripe(struct btrfs_raid_bio *rbio, 13978c2ecf20Sopenharmony_ci struct bio *bio) 13988c2ecf20Sopenharmony_ci{ 13998c2ecf20Sopenharmony_ci u64 logical = (u64)bio->bi_iter.bi_sector << 9; 14008c2ecf20Sopenharmony_ci int i; 14018c2ecf20Sopenharmony_ci 14028c2ecf20Sopenharmony_ci for (i = 0; i < rbio->nr_data; i++) { 14038c2ecf20Sopenharmony_ci u64 stripe_start = rbio->bbio->raid_map[i]; 14048c2ecf20Sopenharmony_ci 14058c2ecf20Sopenharmony_ci if (in_range(logical, stripe_start, rbio->stripe_len)) 14068c2ecf20Sopenharmony_ci return i; 14078c2ecf20Sopenharmony_ci } 14088c2ecf20Sopenharmony_ci return -1; 14098c2ecf20Sopenharmony_ci} 14108c2ecf20Sopenharmony_ci 14118c2ecf20Sopenharmony_ci/* 14128c2ecf20Sopenharmony_ci * returns -EIO if we had too many failures 14138c2ecf20Sopenharmony_ci */ 14148c2ecf20Sopenharmony_cistatic int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed) 14158c2ecf20Sopenharmony_ci{ 14168c2ecf20Sopenharmony_ci unsigned long flags; 14178c2ecf20Sopenharmony_ci int ret = 0; 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_ci spin_lock_irqsave(&rbio->bio_list_lock, flags); 14208c2ecf20Sopenharmony_ci 14218c2ecf20Sopenharmony_ci /* we already know this stripe is bad, move on */ 14228c2ecf20Sopenharmony_ci if (rbio->faila == failed || rbio->failb == failed) 14238c2ecf20Sopenharmony_ci goto out; 14248c2ecf20Sopenharmony_ci 14258c2ecf20Sopenharmony_ci if (rbio->faila == -1) { 14268c2ecf20Sopenharmony_ci /* first failure on this rbio */ 14278c2ecf20Sopenharmony_ci rbio->faila = failed; 14288c2ecf20Sopenharmony_ci atomic_inc(&rbio->error); 14298c2ecf20Sopenharmony_ci } else if (rbio->failb == -1) { 14308c2ecf20Sopenharmony_ci /* second failure on this rbio */ 14318c2ecf20Sopenharmony_ci rbio->failb = failed; 14328c2ecf20Sopenharmony_ci atomic_inc(&rbio->error); 14338c2ecf20Sopenharmony_ci } else { 14348c2ecf20Sopenharmony_ci ret = -EIO; 14358c2ecf20Sopenharmony_ci } 14368c2ecf20Sopenharmony_ciout: 14378c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rbio->bio_list_lock, flags); 14388c2ecf20Sopenharmony_ci 14398c2ecf20Sopenharmony_ci return ret; 14408c2ecf20Sopenharmony_ci} 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_ci/* 14438c2ecf20Sopenharmony_ci * helper to fail a stripe based on a physical disk 14448c2ecf20Sopenharmony_ci * bio. 14458c2ecf20Sopenharmony_ci */ 14468c2ecf20Sopenharmony_cistatic int fail_bio_stripe(struct btrfs_raid_bio *rbio, 14478c2ecf20Sopenharmony_ci struct bio *bio) 14488c2ecf20Sopenharmony_ci{ 14498c2ecf20Sopenharmony_ci int failed = find_bio_stripe(rbio, bio); 14508c2ecf20Sopenharmony_ci 14518c2ecf20Sopenharmony_ci if (failed < 0) 14528c2ecf20Sopenharmony_ci return -EIO; 14538c2ecf20Sopenharmony_ci 14548c2ecf20Sopenharmony_ci return fail_rbio_index(rbio, failed); 14558c2ecf20Sopenharmony_ci} 14568c2ecf20Sopenharmony_ci 14578c2ecf20Sopenharmony_ci/* 14588c2ecf20Sopenharmony_ci * this sets each page in the bio uptodate. It should only be used on private 14598c2ecf20Sopenharmony_ci * rbio pages, nothing that comes in from the higher layers 14608c2ecf20Sopenharmony_ci */ 14618c2ecf20Sopenharmony_cistatic void set_bio_pages_uptodate(struct bio *bio) 14628c2ecf20Sopenharmony_ci{ 14638c2ecf20Sopenharmony_ci struct bio_vec *bvec; 14648c2ecf20Sopenharmony_ci struct bvec_iter_all iter_all; 14658c2ecf20Sopenharmony_ci 14668c2ecf20Sopenharmony_ci ASSERT(!bio_flagged(bio, BIO_CLONED)); 14678c2ecf20Sopenharmony_ci 14688c2ecf20Sopenharmony_ci bio_for_each_segment_all(bvec, bio, iter_all) 14698c2ecf20Sopenharmony_ci SetPageUptodate(bvec->bv_page); 14708c2ecf20Sopenharmony_ci} 14718c2ecf20Sopenharmony_ci 14728c2ecf20Sopenharmony_ci/* 14738c2ecf20Sopenharmony_ci * end io for the read phase of the rmw cycle. All the bios here are physical 14748c2ecf20Sopenharmony_ci * stripe bios we've read from the disk so we can recalculate the parity of the 14758c2ecf20Sopenharmony_ci * stripe. 14768c2ecf20Sopenharmony_ci * 14778c2ecf20Sopenharmony_ci * This will usually kick off finish_rmw once all the bios are read in, but it 14788c2ecf20Sopenharmony_ci * may trigger parity reconstruction if we had any errors along the way 14798c2ecf20Sopenharmony_ci */ 14808c2ecf20Sopenharmony_cistatic void raid_rmw_end_io(struct bio *bio) 14818c2ecf20Sopenharmony_ci{ 14828c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio = bio->bi_private; 14838c2ecf20Sopenharmony_ci 14848c2ecf20Sopenharmony_ci if (bio->bi_status) 14858c2ecf20Sopenharmony_ci fail_bio_stripe(rbio, bio); 14868c2ecf20Sopenharmony_ci else 14878c2ecf20Sopenharmony_ci set_bio_pages_uptodate(bio); 14888c2ecf20Sopenharmony_ci 14898c2ecf20Sopenharmony_ci bio_put(bio); 14908c2ecf20Sopenharmony_ci 14918c2ecf20Sopenharmony_ci if (!atomic_dec_and_test(&rbio->stripes_pending)) 14928c2ecf20Sopenharmony_ci return; 14938c2ecf20Sopenharmony_ci 14948c2ecf20Sopenharmony_ci if (atomic_read(&rbio->error) > rbio->bbio->max_errors) 14958c2ecf20Sopenharmony_ci goto cleanup; 14968c2ecf20Sopenharmony_ci 14978c2ecf20Sopenharmony_ci /* 14988c2ecf20Sopenharmony_ci * this will normally call finish_rmw to start our write 14998c2ecf20Sopenharmony_ci * but if there are any failed stripes we'll reconstruct 15008c2ecf20Sopenharmony_ci * from parity first 15018c2ecf20Sopenharmony_ci */ 15028c2ecf20Sopenharmony_ci validate_rbio_for_rmw(rbio); 15038c2ecf20Sopenharmony_ci return; 15048c2ecf20Sopenharmony_ci 15058c2ecf20Sopenharmony_cicleanup: 15068c2ecf20Sopenharmony_ci 15078c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, BLK_STS_IOERR); 15088c2ecf20Sopenharmony_ci} 15098c2ecf20Sopenharmony_ci 15108c2ecf20Sopenharmony_ci/* 15118c2ecf20Sopenharmony_ci * the stripe must be locked by the caller. It will 15128c2ecf20Sopenharmony_ci * unlock after all the writes are done 15138c2ecf20Sopenharmony_ci */ 15148c2ecf20Sopenharmony_cistatic int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) 15158c2ecf20Sopenharmony_ci{ 15168c2ecf20Sopenharmony_ci int bios_to_read = 0; 15178c2ecf20Sopenharmony_ci struct bio_list bio_list; 15188c2ecf20Sopenharmony_ci int ret; 15198c2ecf20Sopenharmony_ci int pagenr; 15208c2ecf20Sopenharmony_ci int stripe; 15218c2ecf20Sopenharmony_ci struct bio *bio; 15228c2ecf20Sopenharmony_ci 15238c2ecf20Sopenharmony_ci bio_list_init(&bio_list); 15248c2ecf20Sopenharmony_ci 15258c2ecf20Sopenharmony_ci ret = alloc_rbio_pages(rbio); 15268c2ecf20Sopenharmony_ci if (ret) 15278c2ecf20Sopenharmony_ci goto cleanup; 15288c2ecf20Sopenharmony_ci 15298c2ecf20Sopenharmony_ci index_rbio_pages(rbio); 15308c2ecf20Sopenharmony_ci 15318c2ecf20Sopenharmony_ci atomic_set(&rbio->error, 0); 15328c2ecf20Sopenharmony_ci /* 15338c2ecf20Sopenharmony_ci * build a list of bios to read all the missing parts of this 15348c2ecf20Sopenharmony_ci * stripe 15358c2ecf20Sopenharmony_ci */ 15368c2ecf20Sopenharmony_ci for (stripe = 0; stripe < rbio->nr_data; stripe++) { 15378c2ecf20Sopenharmony_ci for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 15388c2ecf20Sopenharmony_ci struct page *page; 15398c2ecf20Sopenharmony_ci /* 15408c2ecf20Sopenharmony_ci * we want to find all the pages missing from 15418c2ecf20Sopenharmony_ci * the rbio and read them from the disk. If 15428c2ecf20Sopenharmony_ci * page_in_rbio finds a page in the bio list 15438c2ecf20Sopenharmony_ci * we don't need to read it off the stripe. 15448c2ecf20Sopenharmony_ci */ 15458c2ecf20Sopenharmony_ci page = page_in_rbio(rbio, stripe, pagenr, 1); 15468c2ecf20Sopenharmony_ci if (page) 15478c2ecf20Sopenharmony_ci continue; 15488c2ecf20Sopenharmony_ci 15498c2ecf20Sopenharmony_ci page = rbio_stripe_page(rbio, stripe, pagenr); 15508c2ecf20Sopenharmony_ci /* 15518c2ecf20Sopenharmony_ci * the bio cache may have handed us an uptodate 15528c2ecf20Sopenharmony_ci * page. If so, be happy and use it 15538c2ecf20Sopenharmony_ci */ 15548c2ecf20Sopenharmony_ci if (PageUptodate(page)) 15558c2ecf20Sopenharmony_ci continue; 15568c2ecf20Sopenharmony_ci 15578c2ecf20Sopenharmony_ci ret = rbio_add_io_page(rbio, &bio_list, page, 15588c2ecf20Sopenharmony_ci stripe, pagenr, rbio->stripe_len); 15598c2ecf20Sopenharmony_ci if (ret) 15608c2ecf20Sopenharmony_ci goto cleanup; 15618c2ecf20Sopenharmony_ci } 15628c2ecf20Sopenharmony_ci } 15638c2ecf20Sopenharmony_ci 15648c2ecf20Sopenharmony_ci bios_to_read = bio_list_size(&bio_list); 15658c2ecf20Sopenharmony_ci if (!bios_to_read) { 15668c2ecf20Sopenharmony_ci /* 15678c2ecf20Sopenharmony_ci * this can happen if others have merged with 15688c2ecf20Sopenharmony_ci * us, it means there is nothing left to read. 15698c2ecf20Sopenharmony_ci * But if there are missing devices it may not be 15708c2ecf20Sopenharmony_ci * safe to do the full stripe write yet. 15718c2ecf20Sopenharmony_ci */ 15728c2ecf20Sopenharmony_ci goto finish; 15738c2ecf20Sopenharmony_ci } 15748c2ecf20Sopenharmony_ci 15758c2ecf20Sopenharmony_ci /* 15768c2ecf20Sopenharmony_ci * the bbio may be freed once we submit the last bio. Make sure 15778c2ecf20Sopenharmony_ci * not to touch it after that 15788c2ecf20Sopenharmony_ci */ 15798c2ecf20Sopenharmony_ci atomic_set(&rbio->stripes_pending, bios_to_read); 15808c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&bio_list))) { 15818c2ecf20Sopenharmony_ci bio->bi_private = rbio; 15828c2ecf20Sopenharmony_ci bio->bi_end_io = raid_rmw_end_io; 15838c2ecf20Sopenharmony_ci bio->bi_opf = REQ_OP_READ; 15848c2ecf20Sopenharmony_ci 15858c2ecf20Sopenharmony_ci btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 15868c2ecf20Sopenharmony_ci 15878c2ecf20Sopenharmony_ci submit_bio(bio); 15888c2ecf20Sopenharmony_ci } 15898c2ecf20Sopenharmony_ci /* the actual write will happen once the reads are done */ 15908c2ecf20Sopenharmony_ci return 0; 15918c2ecf20Sopenharmony_ci 15928c2ecf20Sopenharmony_cicleanup: 15938c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, BLK_STS_IOERR); 15948c2ecf20Sopenharmony_ci 15958c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&bio_list))) 15968c2ecf20Sopenharmony_ci bio_put(bio); 15978c2ecf20Sopenharmony_ci 15988c2ecf20Sopenharmony_ci return -EIO; 15998c2ecf20Sopenharmony_ci 16008c2ecf20Sopenharmony_cifinish: 16018c2ecf20Sopenharmony_ci validate_rbio_for_rmw(rbio); 16028c2ecf20Sopenharmony_ci return 0; 16038c2ecf20Sopenharmony_ci} 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci/* 16068c2ecf20Sopenharmony_ci * if the upper layers pass in a full stripe, we thank them by only allocating 16078c2ecf20Sopenharmony_ci * enough pages to hold the parity, and sending it all down quickly. 16088c2ecf20Sopenharmony_ci */ 16098c2ecf20Sopenharmony_cistatic int full_stripe_write(struct btrfs_raid_bio *rbio) 16108c2ecf20Sopenharmony_ci{ 16118c2ecf20Sopenharmony_ci int ret; 16128c2ecf20Sopenharmony_ci 16138c2ecf20Sopenharmony_ci ret = alloc_rbio_parity_pages(rbio); 16148c2ecf20Sopenharmony_ci if (ret) { 16158c2ecf20Sopenharmony_ci __free_raid_bio(rbio); 16168c2ecf20Sopenharmony_ci return ret; 16178c2ecf20Sopenharmony_ci } 16188c2ecf20Sopenharmony_ci 16198c2ecf20Sopenharmony_ci ret = lock_stripe_add(rbio); 16208c2ecf20Sopenharmony_ci if (ret == 0) 16218c2ecf20Sopenharmony_ci finish_rmw(rbio); 16228c2ecf20Sopenharmony_ci return 0; 16238c2ecf20Sopenharmony_ci} 16248c2ecf20Sopenharmony_ci 16258c2ecf20Sopenharmony_ci/* 16268c2ecf20Sopenharmony_ci * partial stripe writes get handed over to async helpers. 16278c2ecf20Sopenharmony_ci * We're really hoping to merge a few more writes into this 16288c2ecf20Sopenharmony_ci * rbio before calculating new parity 16298c2ecf20Sopenharmony_ci */ 16308c2ecf20Sopenharmony_cistatic int partial_stripe_write(struct btrfs_raid_bio *rbio) 16318c2ecf20Sopenharmony_ci{ 16328c2ecf20Sopenharmony_ci int ret; 16338c2ecf20Sopenharmony_ci 16348c2ecf20Sopenharmony_ci ret = lock_stripe_add(rbio); 16358c2ecf20Sopenharmony_ci if (ret == 0) 16368c2ecf20Sopenharmony_ci start_async_work(rbio, rmw_work); 16378c2ecf20Sopenharmony_ci return 0; 16388c2ecf20Sopenharmony_ci} 16398c2ecf20Sopenharmony_ci 16408c2ecf20Sopenharmony_ci/* 16418c2ecf20Sopenharmony_ci * sometimes while we were reading from the drive to 16428c2ecf20Sopenharmony_ci * recalculate parity, enough new bios come into create 16438c2ecf20Sopenharmony_ci * a full stripe. So we do a check here to see if we can 16448c2ecf20Sopenharmony_ci * go directly to finish_rmw 16458c2ecf20Sopenharmony_ci */ 16468c2ecf20Sopenharmony_cistatic int __raid56_parity_write(struct btrfs_raid_bio *rbio) 16478c2ecf20Sopenharmony_ci{ 16488c2ecf20Sopenharmony_ci /* head off into rmw land if we don't have a full stripe */ 16498c2ecf20Sopenharmony_ci if (!rbio_is_full(rbio)) 16508c2ecf20Sopenharmony_ci return partial_stripe_write(rbio); 16518c2ecf20Sopenharmony_ci return full_stripe_write(rbio); 16528c2ecf20Sopenharmony_ci} 16538c2ecf20Sopenharmony_ci 16548c2ecf20Sopenharmony_ci/* 16558c2ecf20Sopenharmony_ci * We use plugging call backs to collect full stripes. 16568c2ecf20Sopenharmony_ci * Any time we get a partial stripe write while plugged 16578c2ecf20Sopenharmony_ci * we collect it into a list. When the unplug comes down, 16588c2ecf20Sopenharmony_ci * we sort the list by logical block number and merge 16598c2ecf20Sopenharmony_ci * everything we can into the same rbios 16608c2ecf20Sopenharmony_ci */ 16618c2ecf20Sopenharmony_cistruct btrfs_plug_cb { 16628c2ecf20Sopenharmony_ci struct blk_plug_cb cb; 16638c2ecf20Sopenharmony_ci struct btrfs_fs_info *info; 16648c2ecf20Sopenharmony_ci struct list_head rbio_list; 16658c2ecf20Sopenharmony_ci struct btrfs_work work; 16668c2ecf20Sopenharmony_ci}; 16678c2ecf20Sopenharmony_ci 16688c2ecf20Sopenharmony_ci/* 16698c2ecf20Sopenharmony_ci * rbios on the plug list are sorted for easier merging. 16708c2ecf20Sopenharmony_ci */ 16718c2ecf20Sopenharmony_cistatic int plug_cmp(void *priv, const struct list_head *a, 16728c2ecf20Sopenharmony_ci const struct list_head *b) 16738c2ecf20Sopenharmony_ci{ 16748c2ecf20Sopenharmony_ci struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio, 16758c2ecf20Sopenharmony_ci plug_list); 16768c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio, 16778c2ecf20Sopenharmony_ci plug_list); 16788c2ecf20Sopenharmony_ci u64 a_sector = ra->bio_list.head->bi_iter.bi_sector; 16798c2ecf20Sopenharmony_ci u64 b_sector = rb->bio_list.head->bi_iter.bi_sector; 16808c2ecf20Sopenharmony_ci 16818c2ecf20Sopenharmony_ci if (a_sector < b_sector) 16828c2ecf20Sopenharmony_ci return -1; 16838c2ecf20Sopenharmony_ci if (a_sector > b_sector) 16848c2ecf20Sopenharmony_ci return 1; 16858c2ecf20Sopenharmony_ci return 0; 16868c2ecf20Sopenharmony_ci} 16878c2ecf20Sopenharmony_ci 16888c2ecf20Sopenharmony_cistatic void run_plug(struct btrfs_plug_cb *plug) 16898c2ecf20Sopenharmony_ci{ 16908c2ecf20Sopenharmony_ci struct btrfs_raid_bio *cur; 16918c2ecf20Sopenharmony_ci struct btrfs_raid_bio *last = NULL; 16928c2ecf20Sopenharmony_ci 16938c2ecf20Sopenharmony_ci /* 16948c2ecf20Sopenharmony_ci * sort our plug list then try to merge 16958c2ecf20Sopenharmony_ci * everything we can in hopes of creating full 16968c2ecf20Sopenharmony_ci * stripes. 16978c2ecf20Sopenharmony_ci */ 16988c2ecf20Sopenharmony_ci list_sort(NULL, &plug->rbio_list, plug_cmp); 16998c2ecf20Sopenharmony_ci while (!list_empty(&plug->rbio_list)) { 17008c2ecf20Sopenharmony_ci cur = list_entry(plug->rbio_list.next, 17018c2ecf20Sopenharmony_ci struct btrfs_raid_bio, plug_list); 17028c2ecf20Sopenharmony_ci list_del_init(&cur->plug_list); 17038c2ecf20Sopenharmony_ci 17048c2ecf20Sopenharmony_ci if (rbio_is_full(cur)) { 17058c2ecf20Sopenharmony_ci int ret; 17068c2ecf20Sopenharmony_ci 17078c2ecf20Sopenharmony_ci /* we have a full stripe, send it down */ 17088c2ecf20Sopenharmony_ci ret = full_stripe_write(cur); 17098c2ecf20Sopenharmony_ci BUG_ON(ret); 17108c2ecf20Sopenharmony_ci continue; 17118c2ecf20Sopenharmony_ci } 17128c2ecf20Sopenharmony_ci if (last) { 17138c2ecf20Sopenharmony_ci if (rbio_can_merge(last, cur)) { 17148c2ecf20Sopenharmony_ci merge_rbio(last, cur); 17158c2ecf20Sopenharmony_ci __free_raid_bio(cur); 17168c2ecf20Sopenharmony_ci continue; 17178c2ecf20Sopenharmony_ci 17188c2ecf20Sopenharmony_ci } 17198c2ecf20Sopenharmony_ci __raid56_parity_write(last); 17208c2ecf20Sopenharmony_ci } 17218c2ecf20Sopenharmony_ci last = cur; 17228c2ecf20Sopenharmony_ci } 17238c2ecf20Sopenharmony_ci if (last) { 17248c2ecf20Sopenharmony_ci __raid56_parity_write(last); 17258c2ecf20Sopenharmony_ci } 17268c2ecf20Sopenharmony_ci kfree(plug); 17278c2ecf20Sopenharmony_ci} 17288c2ecf20Sopenharmony_ci 17298c2ecf20Sopenharmony_ci/* 17308c2ecf20Sopenharmony_ci * if the unplug comes from schedule, we have to push the 17318c2ecf20Sopenharmony_ci * work off to a helper thread 17328c2ecf20Sopenharmony_ci */ 17338c2ecf20Sopenharmony_cistatic void unplug_work(struct btrfs_work *work) 17348c2ecf20Sopenharmony_ci{ 17358c2ecf20Sopenharmony_ci struct btrfs_plug_cb *plug; 17368c2ecf20Sopenharmony_ci plug = container_of(work, struct btrfs_plug_cb, work); 17378c2ecf20Sopenharmony_ci run_plug(plug); 17388c2ecf20Sopenharmony_ci} 17398c2ecf20Sopenharmony_ci 17408c2ecf20Sopenharmony_cistatic void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) 17418c2ecf20Sopenharmony_ci{ 17428c2ecf20Sopenharmony_ci struct btrfs_plug_cb *plug; 17438c2ecf20Sopenharmony_ci plug = container_of(cb, struct btrfs_plug_cb, cb); 17448c2ecf20Sopenharmony_ci 17458c2ecf20Sopenharmony_ci if (from_schedule) { 17468c2ecf20Sopenharmony_ci btrfs_init_work(&plug->work, unplug_work, NULL, NULL); 17478c2ecf20Sopenharmony_ci btrfs_queue_work(plug->info->rmw_workers, 17488c2ecf20Sopenharmony_ci &plug->work); 17498c2ecf20Sopenharmony_ci return; 17508c2ecf20Sopenharmony_ci } 17518c2ecf20Sopenharmony_ci run_plug(plug); 17528c2ecf20Sopenharmony_ci} 17538c2ecf20Sopenharmony_ci 17548c2ecf20Sopenharmony_ci/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */ 17558c2ecf20Sopenharmony_cistatic void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio) 17568c2ecf20Sopenharmony_ci{ 17578c2ecf20Sopenharmony_ci const struct btrfs_fs_info *fs_info = rbio->fs_info; 17588c2ecf20Sopenharmony_ci const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT; 17598c2ecf20Sopenharmony_ci const u64 full_stripe_start = rbio->bbio->raid_map[0]; 17608c2ecf20Sopenharmony_ci const u32 orig_len = orig_bio->bi_iter.bi_size; 17618c2ecf20Sopenharmony_ci const u32 sectorsize = fs_info->sectorsize; 17628c2ecf20Sopenharmony_ci u64 cur_logical; 17638c2ecf20Sopenharmony_ci 17648c2ecf20Sopenharmony_ci ASSERT(orig_logical >= full_stripe_start && 17658c2ecf20Sopenharmony_ci orig_logical + orig_len <= full_stripe_start + 17668c2ecf20Sopenharmony_ci rbio->nr_data * rbio->stripe_len); 17678c2ecf20Sopenharmony_ci 17688c2ecf20Sopenharmony_ci bio_list_add(&rbio->bio_list, orig_bio); 17698c2ecf20Sopenharmony_ci rbio->bio_list_bytes += orig_bio->bi_iter.bi_size; 17708c2ecf20Sopenharmony_ci 17718c2ecf20Sopenharmony_ci /* Update the dbitmap. */ 17728c2ecf20Sopenharmony_ci for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len; 17738c2ecf20Sopenharmony_ci cur_logical += sectorsize) { 17748c2ecf20Sopenharmony_ci int bit = ((u32)(cur_logical - full_stripe_start) >> 17758c2ecf20Sopenharmony_ci PAGE_SHIFT) % rbio->stripe_npages; 17768c2ecf20Sopenharmony_ci 17778c2ecf20Sopenharmony_ci set_bit(bit, rbio->dbitmap); 17788c2ecf20Sopenharmony_ci } 17798c2ecf20Sopenharmony_ci} 17808c2ecf20Sopenharmony_ci 17818c2ecf20Sopenharmony_ci/* 17828c2ecf20Sopenharmony_ci * our main entry point for writes from the rest of the FS. 17838c2ecf20Sopenharmony_ci */ 17848c2ecf20Sopenharmony_ciint raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio, 17858c2ecf20Sopenharmony_ci struct btrfs_bio *bbio, u64 stripe_len) 17868c2ecf20Sopenharmony_ci{ 17878c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio; 17888c2ecf20Sopenharmony_ci struct btrfs_plug_cb *plug = NULL; 17898c2ecf20Sopenharmony_ci struct blk_plug_cb *cb; 17908c2ecf20Sopenharmony_ci int ret; 17918c2ecf20Sopenharmony_ci 17928c2ecf20Sopenharmony_ci rbio = alloc_rbio(fs_info, bbio, stripe_len); 17938c2ecf20Sopenharmony_ci if (IS_ERR(rbio)) { 17948c2ecf20Sopenharmony_ci btrfs_put_bbio(bbio); 17958c2ecf20Sopenharmony_ci return PTR_ERR(rbio); 17968c2ecf20Sopenharmony_ci } 17978c2ecf20Sopenharmony_ci rbio->operation = BTRFS_RBIO_WRITE; 17988c2ecf20Sopenharmony_ci rbio_add_bio(rbio, bio); 17998c2ecf20Sopenharmony_ci 18008c2ecf20Sopenharmony_ci btrfs_bio_counter_inc_noblocked(fs_info); 18018c2ecf20Sopenharmony_ci rbio->generic_bio_cnt = 1; 18028c2ecf20Sopenharmony_ci 18038c2ecf20Sopenharmony_ci /* 18048c2ecf20Sopenharmony_ci * don't plug on full rbios, just get them out the door 18058c2ecf20Sopenharmony_ci * as quickly as we can 18068c2ecf20Sopenharmony_ci */ 18078c2ecf20Sopenharmony_ci if (rbio_is_full(rbio)) { 18088c2ecf20Sopenharmony_ci ret = full_stripe_write(rbio); 18098c2ecf20Sopenharmony_ci if (ret) 18108c2ecf20Sopenharmony_ci btrfs_bio_counter_dec(fs_info); 18118c2ecf20Sopenharmony_ci return ret; 18128c2ecf20Sopenharmony_ci } 18138c2ecf20Sopenharmony_ci 18148c2ecf20Sopenharmony_ci cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug)); 18158c2ecf20Sopenharmony_ci if (cb) { 18168c2ecf20Sopenharmony_ci plug = container_of(cb, struct btrfs_plug_cb, cb); 18178c2ecf20Sopenharmony_ci if (!plug->info) { 18188c2ecf20Sopenharmony_ci plug->info = fs_info; 18198c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&plug->rbio_list); 18208c2ecf20Sopenharmony_ci } 18218c2ecf20Sopenharmony_ci list_add_tail(&rbio->plug_list, &plug->rbio_list); 18228c2ecf20Sopenharmony_ci ret = 0; 18238c2ecf20Sopenharmony_ci } else { 18248c2ecf20Sopenharmony_ci ret = __raid56_parity_write(rbio); 18258c2ecf20Sopenharmony_ci if (ret) 18268c2ecf20Sopenharmony_ci btrfs_bio_counter_dec(fs_info); 18278c2ecf20Sopenharmony_ci } 18288c2ecf20Sopenharmony_ci return ret; 18298c2ecf20Sopenharmony_ci} 18308c2ecf20Sopenharmony_ci 18318c2ecf20Sopenharmony_ci/* 18328c2ecf20Sopenharmony_ci * all parity reconstruction happens here. We've read in everything 18338c2ecf20Sopenharmony_ci * we can find from the drives and this does the heavy lifting of 18348c2ecf20Sopenharmony_ci * sorting the good from the bad. 18358c2ecf20Sopenharmony_ci */ 18368c2ecf20Sopenharmony_cistatic void __raid_recover_end_io(struct btrfs_raid_bio *rbio) 18378c2ecf20Sopenharmony_ci{ 18388c2ecf20Sopenharmony_ci int pagenr, stripe; 18398c2ecf20Sopenharmony_ci void **pointers; 18408c2ecf20Sopenharmony_ci int faila = -1, failb = -1; 18418c2ecf20Sopenharmony_ci struct page *page; 18428c2ecf20Sopenharmony_ci blk_status_t err; 18438c2ecf20Sopenharmony_ci int i; 18448c2ecf20Sopenharmony_ci 18458c2ecf20Sopenharmony_ci pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); 18468c2ecf20Sopenharmony_ci if (!pointers) { 18478c2ecf20Sopenharmony_ci err = BLK_STS_RESOURCE; 18488c2ecf20Sopenharmony_ci goto cleanup_io; 18498c2ecf20Sopenharmony_ci } 18508c2ecf20Sopenharmony_ci 18518c2ecf20Sopenharmony_ci faila = rbio->faila; 18528c2ecf20Sopenharmony_ci failb = rbio->failb; 18538c2ecf20Sopenharmony_ci 18548c2ecf20Sopenharmony_ci if (rbio->operation == BTRFS_RBIO_READ_REBUILD || 18558c2ecf20Sopenharmony_ci rbio->operation == BTRFS_RBIO_REBUILD_MISSING) { 18568c2ecf20Sopenharmony_ci spin_lock_irq(&rbio->bio_list_lock); 18578c2ecf20Sopenharmony_ci set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 18588c2ecf20Sopenharmony_ci spin_unlock_irq(&rbio->bio_list_lock); 18598c2ecf20Sopenharmony_ci } 18608c2ecf20Sopenharmony_ci 18618c2ecf20Sopenharmony_ci index_rbio_pages(rbio); 18628c2ecf20Sopenharmony_ci 18638c2ecf20Sopenharmony_ci for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 18648c2ecf20Sopenharmony_ci /* 18658c2ecf20Sopenharmony_ci * Now we just use bitmap to mark the horizontal stripes in 18668c2ecf20Sopenharmony_ci * which we have data when doing parity scrub. 18678c2ecf20Sopenharmony_ci */ 18688c2ecf20Sopenharmony_ci if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB && 18698c2ecf20Sopenharmony_ci !test_bit(pagenr, rbio->dbitmap)) 18708c2ecf20Sopenharmony_ci continue; 18718c2ecf20Sopenharmony_ci 18728c2ecf20Sopenharmony_ci /* setup our array of pointers with pages 18738c2ecf20Sopenharmony_ci * from each stripe 18748c2ecf20Sopenharmony_ci */ 18758c2ecf20Sopenharmony_ci for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 18768c2ecf20Sopenharmony_ci /* 18778c2ecf20Sopenharmony_ci * if we're rebuilding a read, we have to use 18788c2ecf20Sopenharmony_ci * pages from the bio list 18798c2ecf20Sopenharmony_ci */ 18808c2ecf20Sopenharmony_ci if ((rbio->operation == BTRFS_RBIO_READ_REBUILD || 18818c2ecf20Sopenharmony_ci rbio->operation == BTRFS_RBIO_REBUILD_MISSING) && 18828c2ecf20Sopenharmony_ci (stripe == faila || stripe == failb)) { 18838c2ecf20Sopenharmony_ci page = page_in_rbio(rbio, stripe, pagenr, 0); 18848c2ecf20Sopenharmony_ci } else { 18858c2ecf20Sopenharmony_ci page = rbio_stripe_page(rbio, stripe, pagenr); 18868c2ecf20Sopenharmony_ci } 18878c2ecf20Sopenharmony_ci pointers[stripe] = kmap(page); 18888c2ecf20Sopenharmony_ci } 18898c2ecf20Sopenharmony_ci 18908c2ecf20Sopenharmony_ci /* all raid6 handling here */ 18918c2ecf20Sopenharmony_ci if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) { 18928c2ecf20Sopenharmony_ci /* 18938c2ecf20Sopenharmony_ci * single failure, rebuild from parity raid5 18948c2ecf20Sopenharmony_ci * style 18958c2ecf20Sopenharmony_ci */ 18968c2ecf20Sopenharmony_ci if (failb < 0) { 18978c2ecf20Sopenharmony_ci if (faila == rbio->nr_data) { 18988c2ecf20Sopenharmony_ci /* 18998c2ecf20Sopenharmony_ci * Just the P stripe has failed, without 19008c2ecf20Sopenharmony_ci * a bad data or Q stripe. 19018c2ecf20Sopenharmony_ci * TODO, we should redo the xor here. 19028c2ecf20Sopenharmony_ci */ 19038c2ecf20Sopenharmony_ci err = BLK_STS_IOERR; 19048c2ecf20Sopenharmony_ci goto cleanup; 19058c2ecf20Sopenharmony_ci } 19068c2ecf20Sopenharmony_ci /* 19078c2ecf20Sopenharmony_ci * a single failure in raid6 is rebuilt 19088c2ecf20Sopenharmony_ci * in the pstripe code below 19098c2ecf20Sopenharmony_ci */ 19108c2ecf20Sopenharmony_ci goto pstripe; 19118c2ecf20Sopenharmony_ci } 19128c2ecf20Sopenharmony_ci 19138c2ecf20Sopenharmony_ci /* make sure our ps and qs are in order */ 19148c2ecf20Sopenharmony_ci if (faila > failb) 19158c2ecf20Sopenharmony_ci swap(faila, failb); 19168c2ecf20Sopenharmony_ci 19178c2ecf20Sopenharmony_ci /* if the q stripe is failed, do a pstripe reconstruction 19188c2ecf20Sopenharmony_ci * from the xors. 19198c2ecf20Sopenharmony_ci * If both the q stripe and the P stripe are failed, we're 19208c2ecf20Sopenharmony_ci * here due to a crc mismatch and we can't give them the 19218c2ecf20Sopenharmony_ci * data they want 19228c2ecf20Sopenharmony_ci */ 19238c2ecf20Sopenharmony_ci if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) { 19248c2ecf20Sopenharmony_ci if (rbio->bbio->raid_map[faila] == 19258c2ecf20Sopenharmony_ci RAID5_P_STRIPE) { 19268c2ecf20Sopenharmony_ci err = BLK_STS_IOERR; 19278c2ecf20Sopenharmony_ci goto cleanup; 19288c2ecf20Sopenharmony_ci } 19298c2ecf20Sopenharmony_ci /* 19308c2ecf20Sopenharmony_ci * otherwise we have one bad data stripe and 19318c2ecf20Sopenharmony_ci * a good P stripe. raid5! 19328c2ecf20Sopenharmony_ci */ 19338c2ecf20Sopenharmony_ci goto pstripe; 19348c2ecf20Sopenharmony_ci } 19358c2ecf20Sopenharmony_ci 19368c2ecf20Sopenharmony_ci if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) { 19378c2ecf20Sopenharmony_ci raid6_datap_recov(rbio->real_stripes, 19388c2ecf20Sopenharmony_ci PAGE_SIZE, faila, pointers); 19398c2ecf20Sopenharmony_ci } else { 19408c2ecf20Sopenharmony_ci raid6_2data_recov(rbio->real_stripes, 19418c2ecf20Sopenharmony_ci PAGE_SIZE, faila, failb, 19428c2ecf20Sopenharmony_ci pointers); 19438c2ecf20Sopenharmony_ci } 19448c2ecf20Sopenharmony_ci } else { 19458c2ecf20Sopenharmony_ci void *p; 19468c2ecf20Sopenharmony_ci 19478c2ecf20Sopenharmony_ci /* rebuild from P stripe here (raid5 or raid6) */ 19488c2ecf20Sopenharmony_ci BUG_ON(failb != -1); 19498c2ecf20Sopenharmony_cipstripe: 19508c2ecf20Sopenharmony_ci /* Copy parity block into failed block to start with */ 19518c2ecf20Sopenharmony_ci copy_page(pointers[faila], pointers[rbio->nr_data]); 19528c2ecf20Sopenharmony_ci 19538c2ecf20Sopenharmony_ci /* rearrange the pointer array */ 19548c2ecf20Sopenharmony_ci p = pointers[faila]; 19558c2ecf20Sopenharmony_ci for (stripe = faila; stripe < rbio->nr_data - 1; stripe++) 19568c2ecf20Sopenharmony_ci pointers[stripe] = pointers[stripe + 1]; 19578c2ecf20Sopenharmony_ci pointers[rbio->nr_data - 1] = p; 19588c2ecf20Sopenharmony_ci 19598c2ecf20Sopenharmony_ci /* xor in the rest */ 19608c2ecf20Sopenharmony_ci run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE); 19618c2ecf20Sopenharmony_ci } 19628c2ecf20Sopenharmony_ci /* if we're doing this rebuild as part of an rmw, go through 19638c2ecf20Sopenharmony_ci * and set all of our private rbio pages in the 19648c2ecf20Sopenharmony_ci * failed stripes as uptodate. This way finish_rmw will 19658c2ecf20Sopenharmony_ci * know they can be trusted. If this was a read reconstruction, 19668c2ecf20Sopenharmony_ci * other endio functions will fiddle the uptodate bits 19678c2ecf20Sopenharmony_ci */ 19688c2ecf20Sopenharmony_ci if (rbio->operation == BTRFS_RBIO_WRITE) { 19698c2ecf20Sopenharmony_ci for (i = 0; i < rbio->stripe_npages; i++) { 19708c2ecf20Sopenharmony_ci if (faila != -1) { 19718c2ecf20Sopenharmony_ci page = rbio_stripe_page(rbio, faila, i); 19728c2ecf20Sopenharmony_ci SetPageUptodate(page); 19738c2ecf20Sopenharmony_ci } 19748c2ecf20Sopenharmony_ci if (failb != -1) { 19758c2ecf20Sopenharmony_ci page = rbio_stripe_page(rbio, failb, i); 19768c2ecf20Sopenharmony_ci SetPageUptodate(page); 19778c2ecf20Sopenharmony_ci } 19788c2ecf20Sopenharmony_ci } 19798c2ecf20Sopenharmony_ci } 19808c2ecf20Sopenharmony_ci for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 19818c2ecf20Sopenharmony_ci /* 19828c2ecf20Sopenharmony_ci * if we're rebuilding a read, we have to use 19838c2ecf20Sopenharmony_ci * pages from the bio list 19848c2ecf20Sopenharmony_ci */ 19858c2ecf20Sopenharmony_ci if ((rbio->operation == BTRFS_RBIO_READ_REBUILD || 19868c2ecf20Sopenharmony_ci rbio->operation == BTRFS_RBIO_REBUILD_MISSING) && 19878c2ecf20Sopenharmony_ci (stripe == faila || stripe == failb)) { 19888c2ecf20Sopenharmony_ci page = page_in_rbio(rbio, stripe, pagenr, 0); 19898c2ecf20Sopenharmony_ci } else { 19908c2ecf20Sopenharmony_ci page = rbio_stripe_page(rbio, stripe, pagenr); 19918c2ecf20Sopenharmony_ci } 19928c2ecf20Sopenharmony_ci kunmap(page); 19938c2ecf20Sopenharmony_ci } 19948c2ecf20Sopenharmony_ci } 19958c2ecf20Sopenharmony_ci 19968c2ecf20Sopenharmony_ci err = BLK_STS_OK; 19978c2ecf20Sopenharmony_cicleanup: 19988c2ecf20Sopenharmony_ci kfree(pointers); 19998c2ecf20Sopenharmony_ci 20008c2ecf20Sopenharmony_cicleanup_io: 20018c2ecf20Sopenharmony_ci /* 20028c2ecf20Sopenharmony_ci * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a 20038c2ecf20Sopenharmony_ci * valid rbio which is consistent with ondisk content, thus such a 20048c2ecf20Sopenharmony_ci * valid rbio can be cached to avoid further disk reads. 20058c2ecf20Sopenharmony_ci */ 20068c2ecf20Sopenharmony_ci if (rbio->operation == BTRFS_RBIO_READ_REBUILD || 20078c2ecf20Sopenharmony_ci rbio->operation == BTRFS_RBIO_REBUILD_MISSING) { 20088c2ecf20Sopenharmony_ci /* 20098c2ecf20Sopenharmony_ci * - In case of two failures, where rbio->failb != -1: 20108c2ecf20Sopenharmony_ci * 20118c2ecf20Sopenharmony_ci * Do not cache this rbio since the above read reconstruction 20128c2ecf20Sopenharmony_ci * (raid6_datap_recov() or raid6_2data_recov()) may have 20138c2ecf20Sopenharmony_ci * changed some content of stripes which are not identical to 20148c2ecf20Sopenharmony_ci * on-disk content any more, otherwise, a later write/recover 20158c2ecf20Sopenharmony_ci * may steal stripe_pages from this rbio and end up with 20168c2ecf20Sopenharmony_ci * corruptions or rebuild failures. 20178c2ecf20Sopenharmony_ci * 20188c2ecf20Sopenharmony_ci * - In case of single failure, where rbio->failb == -1: 20198c2ecf20Sopenharmony_ci * 20208c2ecf20Sopenharmony_ci * Cache this rbio iff the above read reconstruction is 20218c2ecf20Sopenharmony_ci * executed without problems. 20228c2ecf20Sopenharmony_ci */ 20238c2ecf20Sopenharmony_ci if (err == BLK_STS_OK && rbio->failb < 0) 20248c2ecf20Sopenharmony_ci cache_rbio_pages(rbio); 20258c2ecf20Sopenharmony_ci else 20268c2ecf20Sopenharmony_ci clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 20278c2ecf20Sopenharmony_ci 20288c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, err); 20298c2ecf20Sopenharmony_ci } else if (err == BLK_STS_OK) { 20308c2ecf20Sopenharmony_ci rbio->faila = -1; 20318c2ecf20Sopenharmony_ci rbio->failb = -1; 20328c2ecf20Sopenharmony_ci 20338c2ecf20Sopenharmony_ci if (rbio->operation == BTRFS_RBIO_WRITE) 20348c2ecf20Sopenharmony_ci finish_rmw(rbio); 20358c2ecf20Sopenharmony_ci else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) 20368c2ecf20Sopenharmony_ci finish_parity_scrub(rbio, 0); 20378c2ecf20Sopenharmony_ci else 20388c2ecf20Sopenharmony_ci BUG(); 20398c2ecf20Sopenharmony_ci } else { 20408c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, err); 20418c2ecf20Sopenharmony_ci } 20428c2ecf20Sopenharmony_ci} 20438c2ecf20Sopenharmony_ci 20448c2ecf20Sopenharmony_ci/* 20458c2ecf20Sopenharmony_ci * This is called only for stripes we've read from disk to 20468c2ecf20Sopenharmony_ci * reconstruct the parity. 20478c2ecf20Sopenharmony_ci */ 20488c2ecf20Sopenharmony_cistatic void raid_recover_end_io(struct bio *bio) 20498c2ecf20Sopenharmony_ci{ 20508c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio = bio->bi_private; 20518c2ecf20Sopenharmony_ci 20528c2ecf20Sopenharmony_ci /* 20538c2ecf20Sopenharmony_ci * we only read stripe pages off the disk, set them 20548c2ecf20Sopenharmony_ci * up to date if there were no errors 20558c2ecf20Sopenharmony_ci */ 20568c2ecf20Sopenharmony_ci if (bio->bi_status) 20578c2ecf20Sopenharmony_ci fail_bio_stripe(rbio, bio); 20588c2ecf20Sopenharmony_ci else 20598c2ecf20Sopenharmony_ci set_bio_pages_uptodate(bio); 20608c2ecf20Sopenharmony_ci bio_put(bio); 20618c2ecf20Sopenharmony_ci 20628c2ecf20Sopenharmony_ci if (!atomic_dec_and_test(&rbio->stripes_pending)) 20638c2ecf20Sopenharmony_ci return; 20648c2ecf20Sopenharmony_ci 20658c2ecf20Sopenharmony_ci if (atomic_read(&rbio->error) > rbio->bbio->max_errors) 20668c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, BLK_STS_IOERR); 20678c2ecf20Sopenharmony_ci else 20688c2ecf20Sopenharmony_ci __raid_recover_end_io(rbio); 20698c2ecf20Sopenharmony_ci} 20708c2ecf20Sopenharmony_ci 20718c2ecf20Sopenharmony_ci/* 20728c2ecf20Sopenharmony_ci * reads everything we need off the disk to reconstruct 20738c2ecf20Sopenharmony_ci * the parity. endio handlers trigger final reconstruction 20748c2ecf20Sopenharmony_ci * when the IO is done. 20758c2ecf20Sopenharmony_ci * 20768c2ecf20Sopenharmony_ci * This is used both for reads from the higher layers and for 20778c2ecf20Sopenharmony_ci * parity construction required to finish a rmw cycle. 20788c2ecf20Sopenharmony_ci */ 20798c2ecf20Sopenharmony_cistatic int __raid56_parity_recover(struct btrfs_raid_bio *rbio) 20808c2ecf20Sopenharmony_ci{ 20818c2ecf20Sopenharmony_ci int bios_to_read = 0; 20828c2ecf20Sopenharmony_ci struct bio_list bio_list; 20838c2ecf20Sopenharmony_ci int ret; 20848c2ecf20Sopenharmony_ci int pagenr; 20858c2ecf20Sopenharmony_ci int stripe; 20868c2ecf20Sopenharmony_ci struct bio *bio; 20878c2ecf20Sopenharmony_ci 20888c2ecf20Sopenharmony_ci bio_list_init(&bio_list); 20898c2ecf20Sopenharmony_ci 20908c2ecf20Sopenharmony_ci ret = alloc_rbio_pages(rbio); 20918c2ecf20Sopenharmony_ci if (ret) 20928c2ecf20Sopenharmony_ci goto cleanup; 20938c2ecf20Sopenharmony_ci 20948c2ecf20Sopenharmony_ci atomic_set(&rbio->error, 0); 20958c2ecf20Sopenharmony_ci 20968c2ecf20Sopenharmony_ci /* 20978c2ecf20Sopenharmony_ci * Read everything that hasn't failed. However this time we will 20988c2ecf20Sopenharmony_ci * not trust any cached sector. 20998c2ecf20Sopenharmony_ci * As we may read out some stale data but higher layer is not reading 21008c2ecf20Sopenharmony_ci * that stale part. 21018c2ecf20Sopenharmony_ci * 21028c2ecf20Sopenharmony_ci * So here we always re-read everything in recovery path. 21038c2ecf20Sopenharmony_ci */ 21048c2ecf20Sopenharmony_ci for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 21058c2ecf20Sopenharmony_ci if (rbio->faila == stripe || rbio->failb == stripe) { 21068c2ecf20Sopenharmony_ci atomic_inc(&rbio->error); 21078c2ecf20Sopenharmony_ci continue; 21088c2ecf20Sopenharmony_ci } 21098c2ecf20Sopenharmony_ci 21108c2ecf20Sopenharmony_ci for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 21118c2ecf20Sopenharmony_ci ret = rbio_add_io_page(rbio, &bio_list, 21128c2ecf20Sopenharmony_ci rbio_stripe_page(rbio, stripe, pagenr), 21138c2ecf20Sopenharmony_ci stripe, pagenr, rbio->stripe_len); 21148c2ecf20Sopenharmony_ci if (ret < 0) 21158c2ecf20Sopenharmony_ci goto cleanup; 21168c2ecf20Sopenharmony_ci } 21178c2ecf20Sopenharmony_ci } 21188c2ecf20Sopenharmony_ci 21198c2ecf20Sopenharmony_ci bios_to_read = bio_list_size(&bio_list); 21208c2ecf20Sopenharmony_ci if (!bios_to_read) { 21218c2ecf20Sopenharmony_ci /* 21228c2ecf20Sopenharmony_ci * we might have no bios to read just because the pages 21238c2ecf20Sopenharmony_ci * were up to date, or we might have no bios to read because 21248c2ecf20Sopenharmony_ci * the devices were gone. 21258c2ecf20Sopenharmony_ci */ 21268c2ecf20Sopenharmony_ci if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) { 21278c2ecf20Sopenharmony_ci __raid_recover_end_io(rbio); 21288c2ecf20Sopenharmony_ci return 0; 21298c2ecf20Sopenharmony_ci } else { 21308c2ecf20Sopenharmony_ci goto cleanup; 21318c2ecf20Sopenharmony_ci } 21328c2ecf20Sopenharmony_ci } 21338c2ecf20Sopenharmony_ci 21348c2ecf20Sopenharmony_ci /* 21358c2ecf20Sopenharmony_ci * the bbio may be freed once we submit the last bio. Make sure 21368c2ecf20Sopenharmony_ci * not to touch it after that 21378c2ecf20Sopenharmony_ci */ 21388c2ecf20Sopenharmony_ci atomic_set(&rbio->stripes_pending, bios_to_read); 21398c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&bio_list))) { 21408c2ecf20Sopenharmony_ci bio->bi_private = rbio; 21418c2ecf20Sopenharmony_ci bio->bi_end_io = raid_recover_end_io; 21428c2ecf20Sopenharmony_ci bio->bi_opf = REQ_OP_READ; 21438c2ecf20Sopenharmony_ci 21448c2ecf20Sopenharmony_ci btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 21458c2ecf20Sopenharmony_ci 21468c2ecf20Sopenharmony_ci submit_bio(bio); 21478c2ecf20Sopenharmony_ci } 21488c2ecf20Sopenharmony_ci 21498c2ecf20Sopenharmony_ci return 0; 21508c2ecf20Sopenharmony_ci 21518c2ecf20Sopenharmony_cicleanup: 21528c2ecf20Sopenharmony_ci if (rbio->operation == BTRFS_RBIO_READ_REBUILD || 21538c2ecf20Sopenharmony_ci rbio->operation == BTRFS_RBIO_REBUILD_MISSING) 21548c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, BLK_STS_IOERR); 21558c2ecf20Sopenharmony_ci 21568c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&bio_list))) 21578c2ecf20Sopenharmony_ci bio_put(bio); 21588c2ecf20Sopenharmony_ci 21598c2ecf20Sopenharmony_ci return -EIO; 21608c2ecf20Sopenharmony_ci} 21618c2ecf20Sopenharmony_ci 21628c2ecf20Sopenharmony_ci/* 21638c2ecf20Sopenharmony_ci * the main entry point for reads from the higher layers. This 21648c2ecf20Sopenharmony_ci * is really only called when the normal read path had a failure, 21658c2ecf20Sopenharmony_ci * so we assume the bio they send down corresponds to a failed part 21668c2ecf20Sopenharmony_ci * of the drive. 21678c2ecf20Sopenharmony_ci */ 21688c2ecf20Sopenharmony_ciint raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, 21698c2ecf20Sopenharmony_ci struct btrfs_bio *bbio, u64 stripe_len, 21708c2ecf20Sopenharmony_ci int mirror_num, int generic_io) 21718c2ecf20Sopenharmony_ci{ 21728c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio; 21738c2ecf20Sopenharmony_ci int ret; 21748c2ecf20Sopenharmony_ci 21758c2ecf20Sopenharmony_ci if (generic_io) { 21768c2ecf20Sopenharmony_ci ASSERT(bbio->mirror_num == mirror_num); 21778c2ecf20Sopenharmony_ci btrfs_io_bio(bio)->mirror_num = mirror_num; 21788c2ecf20Sopenharmony_ci } 21798c2ecf20Sopenharmony_ci 21808c2ecf20Sopenharmony_ci rbio = alloc_rbio(fs_info, bbio, stripe_len); 21818c2ecf20Sopenharmony_ci if (IS_ERR(rbio)) { 21828c2ecf20Sopenharmony_ci if (generic_io) 21838c2ecf20Sopenharmony_ci btrfs_put_bbio(bbio); 21848c2ecf20Sopenharmony_ci return PTR_ERR(rbio); 21858c2ecf20Sopenharmony_ci } 21868c2ecf20Sopenharmony_ci 21878c2ecf20Sopenharmony_ci rbio->operation = BTRFS_RBIO_READ_REBUILD; 21888c2ecf20Sopenharmony_ci rbio_add_bio(rbio, bio); 21898c2ecf20Sopenharmony_ci 21908c2ecf20Sopenharmony_ci rbio->faila = find_logical_bio_stripe(rbio, bio); 21918c2ecf20Sopenharmony_ci if (rbio->faila == -1) { 21928c2ecf20Sopenharmony_ci btrfs_warn(fs_info, 21938c2ecf20Sopenharmony_ci "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)", 21948c2ecf20Sopenharmony_ci __func__, (u64)bio->bi_iter.bi_sector << 9, 21958c2ecf20Sopenharmony_ci (u64)bio->bi_iter.bi_size, bbio->map_type); 21968c2ecf20Sopenharmony_ci if (generic_io) 21978c2ecf20Sopenharmony_ci btrfs_put_bbio(bbio); 21988c2ecf20Sopenharmony_ci kfree(rbio); 21998c2ecf20Sopenharmony_ci return -EIO; 22008c2ecf20Sopenharmony_ci } 22018c2ecf20Sopenharmony_ci 22028c2ecf20Sopenharmony_ci if (generic_io) { 22038c2ecf20Sopenharmony_ci btrfs_bio_counter_inc_noblocked(fs_info); 22048c2ecf20Sopenharmony_ci rbio->generic_bio_cnt = 1; 22058c2ecf20Sopenharmony_ci } else { 22068c2ecf20Sopenharmony_ci btrfs_get_bbio(bbio); 22078c2ecf20Sopenharmony_ci } 22088c2ecf20Sopenharmony_ci 22098c2ecf20Sopenharmony_ci /* 22108c2ecf20Sopenharmony_ci * Loop retry: 22118c2ecf20Sopenharmony_ci * for 'mirror == 2', reconstruct from all other stripes. 22128c2ecf20Sopenharmony_ci * for 'mirror_num > 2', select a stripe to fail on every retry. 22138c2ecf20Sopenharmony_ci */ 22148c2ecf20Sopenharmony_ci if (mirror_num > 2) { 22158c2ecf20Sopenharmony_ci /* 22168c2ecf20Sopenharmony_ci * 'mirror == 3' is to fail the p stripe and 22178c2ecf20Sopenharmony_ci * reconstruct from the q stripe. 'mirror > 3' is to 22188c2ecf20Sopenharmony_ci * fail a data stripe and reconstruct from p+q stripe. 22198c2ecf20Sopenharmony_ci */ 22208c2ecf20Sopenharmony_ci rbio->failb = rbio->real_stripes - (mirror_num - 1); 22218c2ecf20Sopenharmony_ci ASSERT(rbio->failb > 0); 22228c2ecf20Sopenharmony_ci if (rbio->failb <= rbio->faila) 22238c2ecf20Sopenharmony_ci rbio->failb--; 22248c2ecf20Sopenharmony_ci } 22258c2ecf20Sopenharmony_ci 22268c2ecf20Sopenharmony_ci ret = lock_stripe_add(rbio); 22278c2ecf20Sopenharmony_ci 22288c2ecf20Sopenharmony_ci /* 22298c2ecf20Sopenharmony_ci * __raid56_parity_recover will end the bio with 22308c2ecf20Sopenharmony_ci * any errors it hits. We don't want to return 22318c2ecf20Sopenharmony_ci * its error value up the stack because our caller 22328c2ecf20Sopenharmony_ci * will end up calling bio_endio with any nonzero 22338c2ecf20Sopenharmony_ci * return 22348c2ecf20Sopenharmony_ci */ 22358c2ecf20Sopenharmony_ci if (ret == 0) 22368c2ecf20Sopenharmony_ci __raid56_parity_recover(rbio); 22378c2ecf20Sopenharmony_ci /* 22388c2ecf20Sopenharmony_ci * our rbio has been added to the list of 22398c2ecf20Sopenharmony_ci * rbios that will be handled after the 22408c2ecf20Sopenharmony_ci * currently lock owner is done 22418c2ecf20Sopenharmony_ci */ 22428c2ecf20Sopenharmony_ci return 0; 22438c2ecf20Sopenharmony_ci 22448c2ecf20Sopenharmony_ci} 22458c2ecf20Sopenharmony_ci 22468c2ecf20Sopenharmony_cistatic void rmw_work(struct btrfs_work *work) 22478c2ecf20Sopenharmony_ci{ 22488c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio; 22498c2ecf20Sopenharmony_ci 22508c2ecf20Sopenharmony_ci rbio = container_of(work, struct btrfs_raid_bio, work); 22518c2ecf20Sopenharmony_ci raid56_rmw_stripe(rbio); 22528c2ecf20Sopenharmony_ci} 22538c2ecf20Sopenharmony_ci 22548c2ecf20Sopenharmony_cistatic void read_rebuild_work(struct btrfs_work *work) 22558c2ecf20Sopenharmony_ci{ 22568c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio; 22578c2ecf20Sopenharmony_ci 22588c2ecf20Sopenharmony_ci rbio = container_of(work, struct btrfs_raid_bio, work); 22598c2ecf20Sopenharmony_ci __raid56_parity_recover(rbio); 22608c2ecf20Sopenharmony_ci} 22618c2ecf20Sopenharmony_ci 22628c2ecf20Sopenharmony_ci/* 22638c2ecf20Sopenharmony_ci * The following code is used to scrub/replace the parity stripe 22648c2ecf20Sopenharmony_ci * 22658c2ecf20Sopenharmony_ci * Caller must have already increased bio_counter for getting @bbio. 22668c2ecf20Sopenharmony_ci * 22678c2ecf20Sopenharmony_ci * Note: We need make sure all the pages that add into the scrub/replace 22688c2ecf20Sopenharmony_ci * raid bio are correct and not be changed during the scrub/replace. That 22698c2ecf20Sopenharmony_ci * is those pages just hold metadata or file data with checksum. 22708c2ecf20Sopenharmony_ci */ 22718c2ecf20Sopenharmony_ci 22728c2ecf20Sopenharmony_cistruct btrfs_raid_bio * 22738c2ecf20Sopenharmony_ciraid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, 22748c2ecf20Sopenharmony_ci struct btrfs_bio *bbio, u64 stripe_len, 22758c2ecf20Sopenharmony_ci struct btrfs_device *scrub_dev, 22768c2ecf20Sopenharmony_ci unsigned long *dbitmap, int stripe_nsectors) 22778c2ecf20Sopenharmony_ci{ 22788c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio; 22798c2ecf20Sopenharmony_ci int i; 22808c2ecf20Sopenharmony_ci 22818c2ecf20Sopenharmony_ci rbio = alloc_rbio(fs_info, bbio, stripe_len); 22828c2ecf20Sopenharmony_ci if (IS_ERR(rbio)) 22838c2ecf20Sopenharmony_ci return NULL; 22848c2ecf20Sopenharmony_ci bio_list_add(&rbio->bio_list, bio); 22858c2ecf20Sopenharmony_ci /* 22868c2ecf20Sopenharmony_ci * This is a special bio which is used to hold the completion handler 22878c2ecf20Sopenharmony_ci * and make the scrub rbio is similar to the other types 22888c2ecf20Sopenharmony_ci */ 22898c2ecf20Sopenharmony_ci ASSERT(!bio->bi_iter.bi_size); 22908c2ecf20Sopenharmony_ci rbio->operation = BTRFS_RBIO_PARITY_SCRUB; 22918c2ecf20Sopenharmony_ci 22928c2ecf20Sopenharmony_ci /* 22938c2ecf20Sopenharmony_ci * After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted 22948c2ecf20Sopenharmony_ci * to the end position, so this search can start from the first parity 22958c2ecf20Sopenharmony_ci * stripe. 22968c2ecf20Sopenharmony_ci */ 22978c2ecf20Sopenharmony_ci for (i = rbio->nr_data; i < rbio->real_stripes; i++) { 22988c2ecf20Sopenharmony_ci if (bbio->stripes[i].dev == scrub_dev) { 22998c2ecf20Sopenharmony_ci rbio->scrubp = i; 23008c2ecf20Sopenharmony_ci break; 23018c2ecf20Sopenharmony_ci } 23028c2ecf20Sopenharmony_ci } 23038c2ecf20Sopenharmony_ci ASSERT(i < rbio->real_stripes); 23048c2ecf20Sopenharmony_ci 23058c2ecf20Sopenharmony_ci /* Now we just support the sectorsize equals to page size */ 23068c2ecf20Sopenharmony_ci ASSERT(fs_info->sectorsize == PAGE_SIZE); 23078c2ecf20Sopenharmony_ci ASSERT(rbio->stripe_npages == stripe_nsectors); 23088c2ecf20Sopenharmony_ci bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors); 23098c2ecf20Sopenharmony_ci 23108c2ecf20Sopenharmony_ci /* 23118c2ecf20Sopenharmony_ci * We have already increased bio_counter when getting bbio, record it 23128c2ecf20Sopenharmony_ci * so we can free it at rbio_orig_end_io(). 23138c2ecf20Sopenharmony_ci */ 23148c2ecf20Sopenharmony_ci rbio->generic_bio_cnt = 1; 23158c2ecf20Sopenharmony_ci 23168c2ecf20Sopenharmony_ci return rbio; 23178c2ecf20Sopenharmony_ci} 23188c2ecf20Sopenharmony_ci 23198c2ecf20Sopenharmony_ci/* Used for both parity scrub and missing. */ 23208c2ecf20Sopenharmony_civoid raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, 23218c2ecf20Sopenharmony_ci u64 logical) 23228c2ecf20Sopenharmony_ci{ 23238c2ecf20Sopenharmony_ci int stripe_offset; 23248c2ecf20Sopenharmony_ci int index; 23258c2ecf20Sopenharmony_ci 23268c2ecf20Sopenharmony_ci ASSERT(logical >= rbio->bbio->raid_map[0]); 23278c2ecf20Sopenharmony_ci ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] + 23288c2ecf20Sopenharmony_ci rbio->stripe_len * rbio->nr_data); 23298c2ecf20Sopenharmony_ci stripe_offset = (int)(logical - rbio->bbio->raid_map[0]); 23308c2ecf20Sopenharmony_ci index = stripe_offset >> PAGE_SHIFT; 23318c2ecf20Sopenharmony_ci rbio->bio_pages[index] = page; 23328c2ecf20Sopenharmony_ci} 23338c2ecf20Sopenharmony_ci 23348c2ecf20Sopenharmony_ci/* 23358c2ecf20Sopenharmony_ci * We just scrub the parity that we have correct data on the same horizontal, 23368c2ecf20Sopenharmony_ci * so we needn't allocate all pages for all the stripes. 23378c2ecf20Sopenharmony_ci */ 23388c2ecf20Sopenharmony_cistatic int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) 23398c2ecf20Sopenharmony_ci{ 23408c2ecf20Sopenharmony_ci int i; 23418c2ecf20Sopenharmony_ci int bit; 23428c2ecf20Sopenharmony_ci int index; 23438c2ecf20Sopenharmony_ci struct page *page; 23448c2ecf20Sopenharmony_ci 23458c2ecf20Sopenharmony_ci for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) { 23468c2ecf20Sopenharmony_ci for (i = 0; i < rbio->real_stripes; i++) { 23478c2ecf20Sopenharmony_ci index = i * rbio->stripe_npages + bit; 23488c2ecf20Sopenharmony_ci if (rbio->stripe_pages[index]) 23498c2ecf20Sopenharmony_ci continue; 23508c2ecf20Sopenharmony_ci 23518c2ecf20Sopenharmony_ci page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 23528c2ecf20Sopenharmony_ci if (!page) 23538c2ecf20Sopenharmony_ci return -ENOMEM; 23548c2ecf20Sopenharmony_ci rbio->stripe_pages[index] = page; 23558c2ecf20Sopenharmony_ci } 23568c2ecf20Sopenharmony_ci } 23578c2ecf20Sopenharmony_ci return 0; 23588c2ecf20Sopenharmony_ci} 23598c2ecf20Sopenharmony_ci 23608c2ecf20Sopenharmony_cistatic noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, 23618c2ecf20Sopenharmony_ci int need_check) 23628c2ecf20Sopenharmony_ci{ 23638c2ecf20Sopenharmony_ci struct btrfs_bio *bbio = rbio->bbio; 23648c2ecf20Sopenharmony_ci void **pointers = rbio->finish_pointers; 23658c2ecf20Sopenharmony_ci unsigned long *pbitmap = rbio->finish_pbitmap; 23668c2ecf20Sopenharmony_ci int nr_data = rbio->nr_data; 23678c2ecf20Sopenharmony_ci int stripe; 23688c2ecf20Sopenharmony_ci int pagenr; 23698c2ecf20Sopenharmony_ci bool has_qstripe; 23708c2ecf20Sopenharmony_ci struct page *p_page = NULL; 23718c2ecf20Sopenharmony_ci struct page *q_page = NULL; 23728c2ecf20Sopenharmony_ci struct bio_list bio_list; 23738c2ecf20Sopenharmony_ci struct bio *bio; 23748c2ecf20Sopenharmony_ci int is_replace = 0; 23758c2ecf20Sopenharmony_ci int ret; 23768c2ecf20Sopenharmony_ci 23778c2ecf20Sopenharmony_ci bio_list_init(&bio_list); 23788c2ecf20Sopenharmony_ci 23798c2ecf20Sopenharmony_ci if (rbio->real_stripes - rbio->nr_data == 1) 23808c2ecf20Sopenharmony_ci has_qstripe = false; 23818c2ecf20Sopenharmony_ci else if (rbio->real_stripes - rbio->nr_data == 2) 23828c2ecf20Sopenharmony_ci has_qstripe = true; 23838c2ecf20Sopenharmony_ci else 23848c2ecf20Sopenharmony_ci BUG(); 23858c2ecf20Sopenharmony_ci 23868c2ecf20Sopenharmony_ci if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) { 23878c2ecf20Sopenharmony_ci is_replace = 1; 23888c2ecf20Sopenharmony_ci bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages); 23898c2ecf20Sopenharmony_ci } 23908c2ecf20Sopenharmony_ci 23918c2ecf20Sopenharmony_ci /* 23928c2ecf20Sopenharmony_ci * Because the higher layers(scrubber) are unlikely to 23938c2ecf20Sopenharmony_ci * use this area of the disk again soon, so don't cache 23948c2ecf20Sopenharmony_ci * it. 23958c2ecf20Sopenharmony_ci */ 23968c2ecf20Sopenharmony_ci clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 23978c2ecf20Sopenharmony_ci 23988c2ecf20Sopenharmony_ci if (!need_check) 23998c2ecf20Sopenharmony_ci goto writeback; 24008c2ecf20Sopenharmony_ci 24018c2ecf20Sopenharmony_ci p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 24028c2ecf20Sopenharmony_ci if (!p_page) 24038c2ecf20Sopenharmony_ci goto cleanup; 24048c2ecf20Sopenharmony_ci SetPageUptodate(p_page); 24058c2ecf20Sopenharmony_ci 24068c2ecf20Sopenharmony_ci if (has_qstripe) { 24078c2ecf20Sopenharmony_ci /* RAID6, allocate and map temp space for the Q stripe */ 24088c2ecf20Sopenharmony_ci q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 24098c2ecf20Sopenharmony_ci if (!q_page) { 24108c2ecf20Sopenharmony_ci __free_page(p_page); 24118c2ecf20Sopenharmony_ci goto cleanup; 24128c2ecf20Sopenharmony_ci } 24138c2ecf20Sopenharmony_ci SetPageUptodate(q_page); 24148c2ecf20Sopenharmony_ci pointers[rbio->real_stripes - 1] = kmap(q_page); 24158c2ecf20Sopenharmony_ci } 24168c2ecf20Sopenharmony_ci 24178c2ecf20Sopenharmony_ci atomic_set(&rbio->error, 0); 24188c2ecf20Sopenharmony_ci 24198c2ecf20Sopenharmony_ci /* Map the parity stripe just once */ 24208c2ecf20Sopenharmony_ci pointers[nr_data] = kmap(p_page); 24218c2ecf20Sopenharmony_ci 24228c2ecf20Sopenharmony_ci for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { 24238c2ecf20Sopenharmony_ci struct page *p; 24248c2ecf20Sopenharmony_ci void *parity; 24258c2ecf20Sopenharmony_ci /* first collect one page from each data stripe */ 24268c2ecf20Sopenharmony_ci for (stripe = 0; stripe < nr_data; stripe++) { 24278c2ecf20Sopenharmony_ci p = page_in_rbio(rbio, stripe, pagenr, 0); 24288c2ecf20Sopenharmony_ci pointers[stripe] = kmap(p); 24298c2ecf20Sopenharmony_ci } 24308c2ecf20Sopenharmony_ci 24318c2ecf20Sopenharmony_ci if (has_qstripe) { 24328c2ecf20Sopenharmony_ci /* RAID6, call the library function to fill in our P/Q */ 24338c2ecf20Sopenharmony_ci raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, 24348c2ecf20Sopenharmony_ci pointers); 24358c2ecf20Sopenharmony_ci } else { 24368c2ecf20Sopenharmony_ci /* raid5 */ 24378c2ecf20Sopenharmony_ci copy_page(pointers[nr_data], pointers[0]); 24388c2ecf20Sopenharmony_ci run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); 24398c2ecf20Sopenharmony_ci } 24408c2ecf20Sopenharmony_ci 24418c2ecf20Sopenharmony_ci /* Check scrubbing parity and repair it */ 24428c2ecf20Sopenharmony_ci p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); 24438c2ecf20Sopenharmony_ci parity = kmap(p); 24448c2ecf20Sopenharmony_ci if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE)) 24458c2ecf20Sopenharmony_ci copy_page(parity, pointers[rbio->scrubp]); 24468c2ecf20Sopenharmony_ci else 24478c2ecf20Sopenharmony_ci /* Parity is right, needn't writeback */ 24488c2ecf20Sopenharmony_ci bitmap_clear(rbio->dbitmap, pagenr, 1); 24498c2ecf20Sopenharmony_ci kunmap(p); 24508c2ecf20Sopenharmony_ci 24518c2ecf20Sopenharmony_ci for (stripe = 0; stripe < nr_data; stripe++) 24528c2ecf20Sopenharmony_ci kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); 24538c2ecf20Sopenharmony_ci } 24548c2ecf20Sopenharmony_ci 24558c2ecf20Sopenharmony_ci kunmap(p_page); 24568c2ecf20Sopenharmony_ci __free_page(p_page); 24578c2ecf20Sopenharmony_ci if (q_page) { 24588c2ecf20Sopenharmony_ci kunmap(q_page); 24598c2ecf20Sopenharmony_ci __free_page(q_page); 24608c2ecf20Sopenharmony_ci } 24618c2ecf20Sopenharmony_ci 24628c2ecf20Sopenharmony_ciwriteback: 24638c2ecf20Sopenharmony_ci /* 24648c2ecf20Sopenharmony_ci * time to start writing. Make bios for everything from the 24658c2ecf20Sopenharmony_ci * higher layers (the bio_list in our rbio) and our p/q. Ignore 24668c2ecf20Sopenharmony_ci * everything else. 24678c2ecf20Sopenharmony_ci */ 24688c2ecf20Sopenharmony_ci for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { 24698c2ecf20Sopenharmony_ci struct page *page; 24708c2ecf20Sopenharmony_ci 24718c2ecf20Sopenharmony_ci page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); 24728c2ecf20Sopenharmony_ci ret = rbio_add_io_page(rbio, &bio_list, 24738c2ecf20Sopenharmony_ci page, rbio->scrubp, pagenr, rbio->stripe_len); 24748c2ecf20Sopenharmony_ci if (ret) 24758c2ecf20Sopenharmony_ci goto cleanup; 24768c2ecf20Sopenharmony_ci } 24778c2ecf20Sopenharmony_ci 24788c2ecf20Sopenharmony_ci if (!is_replace) 24798c2ecf20Sopenharmony_ci goto submit_write; 24808c2ecf20Sopenharmony_ci 24818c2ecf20Sopenharmony_ci for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) { 24828c2ecf20Sopenharmony_ci struct page *page; 24838c2ecf20Sopenharmony_ci 24848c2ecf20Sopenharmony_ci page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); 24858c2ecf20Sopenharmony_ci ret = rbio_add_io_page(rbio, &bio_list, page, 24868c2ecf20Sopenharmony_ci bbio->tgtdev_map[rbio->scrubp], 24878c2ecf20Sopenharmony_ci pagenr, rbio->stripe_len); 24888c2ecf20Sopenharmony_ci if (ret) 24898c2ecf20Sopenharmony_ci goto cleanup; 24908c2ecf20Sopenharmony_ci } 24918c2ecf20Sopenharmony_ci 24928c2ecf20Sopenharmony_cisubmit_write: 24938c2ecf20Sopenharmony_ci nr_data = bio_list_size(&bio_list); 24948c2ecf20Sopenharmony_ci if (!nr_data) { 24958c2ecf20Sopenharmony_ci /* Every parity is right */ 24968c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, BLK_STS_OK); 24978c2ecf20Sopenharmony_ci return; 24988c2ecf20Sopenharmony_ci } 24998c2ecf20Sopenharmony_ci 25008c2ecf20Sopenharmony_ci atomic_set(&rbio->stripes_pending, nr_data); 25018c2ecf20Sopenharmony_ci 25028c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&bio_list))) { 25038c2ecf20Sopenharmony_ci bio->bi_private = rbio; 25048c2ecf20Sopenharmony_ci bio->bi_end_io = raid_write_end_io; 25058c2ecf20Sopenharmony_ci bio->bi_opf = REQ_OP_WRITE; 25068c2ecf20Sopenharmony_ci 25078c2ecf20Sopenharmony_ci submit_bio(bio); 25088c2ecf20Sopenharmony_ci } 25098c2ecf20Sopenharmony_ci return; 25108c2ecf20Sopenharmony_ci 25118c2ecf20Sopenharmony_cicleanup: 25128c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, BLK_STS_IOERR); 25138c2ecf20Sopenharmony_ci 25148c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&bio_list))) 25158c2ecf20Sopenharmony_ci bio_put(bio); 25168c2ecf20Sopenharmony_ci} 25178c2ecf20Sopenharmony_ci 25188c2ecf20Sopenharmony_cistatic inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) 25198c2ecf20Sopenharmony_ci{ 25208c2ecf20Sopenharmony_ci if (stripe >= 0 && stripe < rbio->nr_data) 25218c2ecf20Sopenharmony_ci return 1; 25228c2ecf20Sopenharmony_ci return 0; 25238c2ecf20Sopenharmony_ci} 25248c2ecf20Sopenharmony_ci 25258c2ecf20Sopenharmony_ci/* 25268c2ecf20Sopenharmony_ci * While we're doing the parity check and repair, we could have errors 25278c2ecf20Sopenharmony_ci * in reading pages off the disk. This checks for errors and if we're 25288c2ecf20Sopenharmony_ci * not able to read the page it'll trigger parity reconstruction. The 25298c2ecf20Sopenharmony_ci * parity scrub will be finished after we've reconstructed the failed 25308c2ecf20Sopenharmony_ci * stripes 25318c2ecf20Sopenharmony_ci */ 25328c2ecf20Sopenharmony_cistatic void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) 25338c2ecf20Sopenharmony_ci{ 25348c2ecf20Sopenharmony_ci if (atomic_read(&rbio->error) > rbio->bbio->max_errors) 25358c2ecf20Sopenharmony_ci goto cleanup; 25368c2ecf20Sopenharmony_ci 25378c2ecf20Sopenharmony_ci if (rbio->faila >= 0 || rbio->failb >= 0) { 25388c2ecf20Sopenharmony_ci int dfail = 0, failp = -1; 25398c2ecf20Sopenharmony_ci 25408c2ecf20Sopenharmony_ci if (is_data_stripe(rbio, rbio->faila)) 25418c2ecf20Sopenharmony_ci dfail++; 25428c2ecf20Sopenharmony_ci else if (is_parity_stripe(rbio->faila)) 25438c2ecf20Sopenharmony_ci failp = rbio->faila; 25448c2ecf20Sopenharmony_ci 25458c2ecf20Sopenharmony_ci if (is_data_stripe(rbio, rbio->failb)) 25468c2ecf20Sopenharmony_ci dfail++; 25478c2ecf20Sopenharmony_ci else if (is_parity_stripe(rbio->failb)) 25488c2ecf20Sopenharmony_ci failp = rbio->failb; 25498c2ecf20Sopenharmony_ci 25508c2ecf20Sopenharmony_ci /* 25518c2ecf20Sopenharmony_ci * Because we can not use a scrubbing parity to repair 25528c2ecf20Sopenharmony_ci * the data, so the capability of the repair is declined. 25538c2ecf20Sopenharmony_ci * (In the case of RAID5, we can not repair anything) 25548c2ecf20Sopenharmony_ci */ 25558c2ecf20Sopenharmony_ci if (dfail > rbio->bbio->max_errors - 1) 25568c2ecf20Sopenharmony_ci goto cleanup; 25578c2ecf20Sopenharmony_ci 25588c2ecf20Sopenharmony_ci /* 25598c2ecf20Sopenharmony_ci * If all data is good, only parity is correctly, just 25608c2ecf20Sopenharmony_ci * repair the parity. 25618c2ecf20Sopenharmony_ci */ 25628c2ecf20Sopenharmony_ci if (dfail == 0) { 25638c2ecf20Sopenharmony_ci finish_parity_scrub(rbio, 0); 25648c2ecf20Sopenharmony_ci return; 25658c2ecf20Sopenharmony_ci } 25668c2ecf20Sopenharmony_ci 25678c2ecf20Sopenharmony_ci /* 25688c2ecf20Sopenharmony_ci * Here means we got one corrupted data stripe and one 25698c2ecf20Sopenharmony_ci * corrupted parity on RAID6, if the corrupted parity 25708c2ecf20Sopenharmony_ci * is scrubbing parity, luckily, use the other one to repair 25718c2ecf20Sopenharmony_ci * the data, or we can not repair the data stripe. 25728c2ecf20Sopenharmony_ci */ 25738c2ecf20Sopenharmony_ci if (failp != rbio->scrubp) 25748c2ecf20Sopenharmony_ci goto cleanup; 25758c2ecf20Sopenharmony_ci 25768c2ecf20Sopenharmony_ci __raid_recover_end_io(rbio); 25778c2ecf20Sopenharmony_ci } else { 25788c2ecf20Sopenharmony_ci finish_parity_scrub(rbio, 1); 25798c2ecf20Sopenharmony_ci } 25808c2ecf20Sopenharmony_ci return; 25818c2ecf20Sopenharmony_ci 25828c2ecf20Sopenharmony_cicleanup: 25838c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, BLK_STS_IOERR); 25848c2ecf20Sopenharmony_ci} 25858c2ecf20Sopenharmony_ci 25868c2ecf20Sopenharmony_ci/* 25878c2ecf20Sopenharmony_ci * end io for the read phase of the rmw cycle. All the bios here are physical 25888c2ecf20Sopenharmony_ci * stripe bios we've read from the disk so we can recalculate the parity of the 25898c2ecf20Sopenharmony_ci * stripe. 25908c2ecf20Sopenharmony_ci * 25918c2ecf20Sopenharmony_ci * This will usually kick off finish_rmw once all the bios are read in, but it 25928c2ecf20Sopenharmony_ci * may trigger parity reconstruction if we had any errors along the way 25938c2ecf20Sopenharmony_ci */ 25948c2ecf20Sopenharmony_cistatic void raid56_parity_scrub_end_io(struct bio *bio) 25958c2ecf20Sopenharmony_ci{ 25968c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio = bio->bi_private; 25978c2ecf20Sopenharmony_ci 25988c2ecf20Sopenharmony_ci if (bio->bi_status) 25998c2ecf20Sopenharmony_ci fail_bio_stripe(rbio, bio); 26008c2ecf20Sopenharmony_ci else 26018c2ecf20Sopenharmony_ci set_bio_pages_uptodate(bio); 26028c2ecf20Sopenharmony_ci 26038c2ecf20Sopenharmony_ci bio_put(bio); 26048c2ecf20Sopenharmony_ci 26058c2ecf20Sopenharmony_ci if (!atomic_dec_and_test(&rbio->stripes_pending)) 26068c2ecf20Sopenharmony_ci return; 26078c2ecf20Sopenharmony_ci 26088c2ecf20Sopenharmony_ci /* 26098c2ecf20Sopenharmony_ci * this will normally call finish_rmw to start our write 26108c2ecf20Sopenharmony_ci * but if there are any failed stripes we'll reconstruct 26118c2ecf20Sopenharmony_ci * from parity first 26128c2ecf20Sopenharmony_ci */ 26138c2ecf20Sopenharmony_ci validate_rbio_for_parity_scrub(rbio); 26148c2ecf20Sopenharmony_ci} 26158c2ecf20Sopenharmony_ci 26168c2ecf20Sopenharmony_cistatic void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) 26178c2ecf20Sopenharmony_ci{ 26188c2ecf20Sopenharmony_ci int bios_to_read = 0; 26198c2ecf20Sopenharmony_ci struct bio_list bio_list; 26208c2ecf20Sopenharmony_ci int ret; 26218c2ecf20Sopenharmony_ci int pagenr; 26228c2ecf20Sopenharmony_ci int stripe; 26238c2ecf20Sopenharmony_ci struct bio *bio; 26248c2ecf20Sopenharmony_ci 26258c2ecf20Sopenharmony_ci bio_list_init(&bio_list); 26268c2ecf20Sopenharmony_ci 26278c2ecf20Sopenharmony_ci ret = alloc_rbio_essential_pages(rbio); 26288c2ecf20Sopenharmony_ci if (ret) 26298c2ecf20Sopenharmony_ci goto cleanup; 26308c2ecf20Sopenharmony_ci 26318c2ecf20Sopenharmony_ci atomic_set(&rbio->error, 0); 26328c2ecf20Sopenharmony_ci /* 26338c2ecf20Sopenharmony_ci * build a list of bios to read all the missing parts of this 26348c2ecf20Sopenharmony_ci * stripe 26358c2ecf20Sopenharmony_ci */ 26368c2ecf20Sopenharmony_ci for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 26378c2ecf20Sopenharmony_ci for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { 26388c2ecf20Sopenharmony_ci struct page *page; 26398c2ecf20Sopenharmony_ci /* 26408c2ecf20Sopenharmony_ci * we want to find all the pages missing from 26418c2ecf20Sopenharmony_ci * the rbio and read them from the disk. If 26428c2ecf20Sopenharmony_ci * page_in_rbio finds a page in the bio list 26438c2ecf20Sopenharmony_ci * we don't need to read it off the stripe. 26448c2ecf20Sopenharmony_ci */ 26458c2ecf20Sopenharmony_ci page = page_in_rbio(rbio, stripe, pagenr, 1); 26468c2ecf20Sopenharmony_ci if (page) 26478c2ecf20Sopenharmony_ci continue; 26488c2ecf20Sopenharmony_ci 26498c2ecf20Sopenharmony_ci page = rbio_stripe_page(rbio, stripe, pagenr); 26508c2ecf20Sopenharmony_ci /* 26518c2ecf20Sopenharmony_ci * the bio cache may have handed us an uptodate 26528c2ecf20Sopenharmony_ci * page. If so, be happy and use it 26538c2ecf20Sopenharmony_ci */ 26548c2ecf20Sopenharmony_ci if (PageUptodate(page)) 26558c2ecf20Sopenharmony_ci continue; 26568c2ecf20Sopenharmony_ci 26578c2ecf20Sopenharmony_ci ret = rbio_add_io_page(rbio, &bio_list, page, 26588c2ecf20Sopenharmony_ci stripe, pagenr, rbio->stripe_len); 26598c2ecf20Sopenharmony_ci if (ret) 26608c2ecf20Sopenharmony_ci goto cleanup; 26618c2ecf20Sopenharmony_ci } 26628c2ecf20Sopenharmony_ci } 26638c2ecf20Sopenharmony_ci 26648c2ecf20Sopenharmony_ci bios_to_read = bio_list_size(&bio_list); 26658c2ecf20Sopenharmony_ci if (!bios_to_read) { 26668c2ecf20Sopenharmony_ci /* 26678c2ecf20Sopenharmony_ci * this can happen if others have merged with 26688c2ecf20Sopenharmony_ci * us, it means there is nothing left to read. 26698c2ecf20Sopenharmony_ci * But if there are missing devices it may not be 26708c2ecf20Sopenharmony_ci * safe to do the full stripe write yet. 26718c2ecf20Sopenharmony_ci */ 26728c2ecf20Sopenharmony_ci goto finish; 26738c2ecf20Sopenharmony_ci } 26748c2ecf20Sopenharmony_ci 26758c2ecf20Sopenharmony_ci /* 26768c2ecf20Sopenharmony_ci * the bbio may be freed once we submit the last bio. Make sure 26778c2ecf20Sopenharmony_ci * not to touch it after that 26788c2ecf20Sopenharmony_ci */ 26798c2ecf20Sopenharmony_ci atomic_set(&rbio->stripes_pending, bios_to_read); 26808c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&bio_list))) { 26818c2ecf20Sopenharmony_ci bio->bi_private = rbio; 26828c2ecf20Sopenharmony_ci bio->bi_end_io = raid56_parity_scrub_end_io; 26838c2ecf20Sopenharmony_ci bio->bi_opf = REQ_OP_READ; 26848c2ecf20Sopenharmony_ci 26858c2ecf20Sopenharmony_ci btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 26868c2ecf20Sopenharmony_ci 26878c2ecf20Sopenharmony_ci submit_bio(bio); 26888c2ecf20Sopenharmony_ci } 26898c2ecf20Sopenharmony_ci /* the actual write will happen once the reads are done */ 26908c2ecf20Sopenharmony_ci return; 26918c2ecf20Sopenharmony_ci 26928c2ecf20Sopenharmony_cicleanup: 26938c2ecf20Sopenharmony_ci rbio_orig_end_io(rbio, BLK_STS_IOERR); 26948c2ecf20Sopenharmony_ci 26958c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&bio_list))) 26968c2ecf20Sopenharmony_ci bio_put(bio); 26978c2ecf20Sopenharmony_ci 26988c2ecf20Sopenharmony_ci return; 26998c2ecf20Sopenharmony_ci 27008c2ecf20Sopenharmony_cifinish: 27018c2ecf20Sopenharmony_ci validate_rbio_for_parity_scrub(rbio); 27028c2ecf20Sopenharmony_ci} 27038c2ecf20Sopenharmony_ci 27048c2ecf20Sopenharmony_cistatic void scrub_parity_work(struct btrfs_work *work) 27058c2ecf20Sopenharmony_ci{ 27068c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio; 27078c2ecf20Sopenharmony_ci 27088c2ecf20Sopenharmony_ci rbio = container_of(work, struct btrfs_raid_bio, work); 27098c2ecf20Sopenharmony_ci raid56_parity_scrub_stripe(rbio); 27108c2ecf20Sopenharmony_ci} 27118c2ecf20Sopenharmony_ci 27128c2ecf20Sopenharmony_civoid raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) 27138c2ecf20Sopenharmony_ci{ 27148c2ecf20Sopenharmony_ci if (!lock_stripe_add(rbio)) 27158c2ecf20Sopenharmony_ci start_async_work(rbio, scrub_parity_work); 27168c2ecf20Sopenharmony_ci} 27178c2ecf20Sopenharmony_ci 27188c2ecf20Sopenharmony_ci/* The following code is used for dev replace of a missing RAID 5/6 device. */ 27198c2ecf20Sopenharmony_ci 27208c2ecf20Sopenharmony_cistruct btrfs_raid_bio * 27218c2ecf20Sopenharmony_ciraid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, 27228c2ecf20Sopenharmony_ci struct btrfs_bio *bbio, u64 length) 27238c2ecf20Sopenharmony_ci{ 27248c2ecf20Sopenharmony_ci struct btrfs_raid_bio *rbio; 27258c2ecf20Sopenharmony_ci 27268c2ecf20Sopenharmony_ci rbio = alloc_rbio(fs_info, bbio, length); 27278c2ecf20Sopenharmony_ci if (IS_ERR(rbio)) 27288c2ecf20Sopenharmony_ci return NULL; 27298c2ecf20Sopenharmony_ci 27308c2ecf20Sopenharmony_ci rbio->operation = BTRFS_RBIO_REBUILD_MISSING; 27318c2ecf20Sopenharmony_ci bio_list_add(&rbio->bio_list, bio); 27328c2ecf20Sopenharmony_ci /* 27338c2ecf20Sopenharmony_ci * This is a special bio which is used to hold the completion handler 27348c2ecf20Sopenharmony_ci * and make the scrub rbio is similar to the other types 27358c2ecf20Sopenharmony_ci */ 27368c2ecf20Sopenharmony_ci ASSERT(!bio->bi_iter.bi_size); 27378c2ecf20Sopenharmony_ci 27388c2ecf20Sopenharmony_ci rbio->faila = find_logical_bio_stripe(rbio, bio); 27398c2ecf20Sopenharmony_ci if (rbio->faila == -1) { 27408c2ecf20Sopenharmony_ci BUG(); 27418c2ecf20Sopenharmony_ci kfree(rbio); 27428c2ecf20Sopenharmony_ci return NULL; 27438c2ecf20Sopenharmony_ci } 27448c2ecf20Sopenharmony_ci 27458c2ecf20Sopenharmony_ci /* 27468c2ecf20Sopenharmony_ci * When we get bbio, we have already increased bio_counter, record it 27478c2ecf20Sopenharmony_ci * so we can free it at rbio_orig_end_io() 27488c2ecf20Sopenharmony_ci */ 27498c2ecf20Sopenharmony_ci rbio->generic_bio_cnt = 1; 27508c2ecf20Sopenharmony_ci 27518c2ecf20Sopenharmony_ci return rbio; 27528c2ecf20Sopenharmony_ci} 27538c2ecf20Sopenharmony_ci 27548c2ecf20Sopenharmony_civoid raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio) 27558c2ecf20Sopenharmony_ci{ 27568c2ecf20Sopenharmony_ci if (!lock_stripe_add(rbio)) 27578c2ecf20Sopenharmony_ci start_async_work(rbio, read_rebuild_work); 27588c2ecf20Sopenharmony_ci} 2759