162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2012 Fusion-io All rights reserved. 462306a36Sopenharmony_ci * Copyright (C) 2012 Intel Corp. All rights reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci#include <linux/sched.h> 862306a36Sopenharmony_ci#include <linux/bio.h> 962306a36Sopenharmony_ci#include <linux/slab.h> 1062306a36Sopenharmony_ci#include <linux/blkdev.h> 1162306a36Sopenharmony_ci#include <linux/raid/pq.h> 1262306a36Sopenharmony_ci#include <linux/hash.h> 1362306a36Sopenharmony_ci#include <linux/list_sort.h> 1462306a36Sopenharmony_ci#include <linux/raid/xor.h> 1562306a36Sopenharmony_ci#include <linux/mm.h> 1662306a36Sopenharmony_ci#include "messages.h" 1762306a36Sopenharmony_ci#include "misc.h" 1862306a36Sopenharmony_ci#include "ctree.h" 1962306a36Sopenharmony_ci#include "disk-io.h" 2062306a36Sopenharmony_ci#include "volumes.h" 2162306a36Sopenharmony_ci#include "raid56.h" 2262306a36Sopenharmony_ci#include "async-thread.h" 2362306a36Sopenharmony_ci#include "file-item.h" 2462306a36Sopenharmony_ci#include "btrfs_inode.h" 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* set when additional merges to this rbio are not allowed */ 2762306a36Sopenharmony_ci#define RBIO_RMW_LOCKED_BIT 1 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci/* 3062306a36Sopenharmony_ci * set when this rbio is sitting in the hash, but it is just a cache 3162306a36Sopenharmony_ci * of past RMW 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci#define RBIO_CACHE_BIT 2 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci/* 3662306a36Sopenharmony_ci * set when it is safe to trust the stripe_pages for caching 3762306a36Sopenharmony_ci */ 3862306a36Sopenharmony_ci#define RBIO_CACHE_READY_BIT 3 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci#define RBIO_CACHE_SIZE 1024 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci#define BTRFS_STRIPE_HASH_TABLE_BITS 11 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci/* Used by the raid56 code to lock stripes for read/modify/write */ 4562306a36Sopenharmony_cistruct btrfs_stripe_hash { 4662306a36Sopenharmony_ci struct list_head hash_list; 4762306a36Sopenharmony_ci spinlock_t lock; 4862306a36Sopenharmony_ci}; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci/* Used by the raid56 code to lock stripes for read/modify/write */ 5162306a36Sopenharmony_cistruct btrfs_stripe_hash_table { 5262306a36Sopenharmony_ci struct list_head stripe_cache; 5362306a36Sopenharmony_ci spinlock_t cache_lock; 5462306a36Sopenharmony_ci int cache_size; 5562306a36Sopenharmony_ci struct btrfs_stripe_hash table[]; 5662306a36Sopenharmony_ci}; 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci/* 5962306a36Sopenharmony_ci * A bvec like structure to present a sector inside a page. 6062306a36Sopenharmony_ci * 6162306a36Sopenharmony_ci * Unlike bvec we don't need bvlen, as it's fixed to sectorsize. 6262306a36Sopenharmony_ci */ 6362306a36Sopenharmony_cistruct sector_ptr { 6462306a36Sopenharmony_ci struct page *page; 6562306a36Sopenharmony_ci unsigned int pgoff:24; 6662306a36Sopenharmony_ci unsigned int uptodate:8; 6762306a36Sopenharmony_ci}; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_cistatic void rmw_rbio_work(struct work_struct *work); 7062306a36Sopenharmony_cistatic void rmw_rbio_work_locked(struct work_struct *work); 7162306a36Sopenharmony_cistatic void index_rbio_pages(struct btrfs_raid_bio *rbio); 7262306a36Sopenharmony_cistatic int alloc_rbio_pages(struct btrfs_raid_bio *rbio); 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_cistatic int finish_parity_scrub(struct btrfs_raid_bio *rbio); 7562306a36Sopenharmony_cistatic void scrub_rbio_work_locked(struct work_struct *work); 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_cistatic void free_raid_bio_pointers(struct btrfs_raid_bio *rbio) 7862306a36Sopenharmony_ci{ 7962306a36Sopenharmony_ci bitmap_free(rbio->error_bitmap); 8062306a36Sopenharmony_ci kfree(rbio->stripe_pages); 8162306a36Sopenharmony_ci kfree(rbio->bio_sectors); 8262306a36Sopenharmony_ci kfree(rbio->stripe_sectors); 8362306a36Sopenharmony_ci kfree(rbio->finish_pointers); 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_cistatic void free_raid_bio(struct btrfs_raid_bio *rbio) 8762306a36Sopenharmony_ci{ 8862306a36Sopenharmony_ci int i; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci if (!refcount_dec_and_test(&rbio->refs)) 9162306a36Sopenharmony_ci return; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci WARN_ON(!list_empty(&rbio->stripe_cache)); 9462306a36Sopenharmony_ci WARN_ON(!list_empty(&rbio->hash_list)); 9562306a36Sopenharmony_ci WARN_ON(!bio_list_empty(&rbio->bio_list)); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci for (i = 0; i < rbio->nr_pages; i++) { 9862306a36Sopenharmony_ci if (rbio->stripe_pages[i]) { 9962306a36Sopenharmony_ci __free_page(rbio->stripe_pages[i]); 10062306a36Sopenharmony_ci rbio->stripe_pages[i] = NULL; 10162306a36Sopenharmony_ci } 10262306a36Sopenharmony_ci } 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci btrfs_put_bioc(rbio->bioc); 10562306a36Sopenharmony_ci free_raid_bio_pointers(rbio); 10662306a36Sopenharmony_ci kfree(rbio); 10762306a36Sopenharmony_ci} 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_cistatic void start_async_work(struct btrfs_raid_bio *rbio, work_func_t work_func) 11062306a36Sopenharmony_ci{ 11162306a36Sopenharmony_ci INIT_WORK(&rbio->work, work_func); 11262306a36Sopenharmony_ci queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work); 11362306a36Sopenharmony_ci} 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci/* 11662306a36Sopenharmony_ci * the stripe hash table is used for locking, and to collect 11762306a36Sopenharmony_ci * bios in hopes of making a full stripe 11862306a36Sopenharmony_ci */ 11962306a36Sopenharmony_ciint btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) 12062306a36Sopenharmony_ci{ 12162306a36Sopenharmony_ci struct btrfs_stripe_hash_table *table; 12262306a36Sopenharmony_ci struct btrfs_stripe_hash_table *x; 12362306a36Sopenharmony_ci struct btrfs_stripe_hash *cur; 12462306a36Sopenharmony_ci struct btrfs_stripe_hash *h; 12562306a36Sopenharmony_ci int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS; 12662306a36Sopenharmony_ci int i; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci if (info->stripe_hash_table) 12962306a36Sopenharmony_ci return 0; 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci /* 13262306a36Sopenharmony_ci * The table is large, starting with order 4 and can go as high as 13362306a36Sopenharmony_ci * order 7 in case lock debugging is turned on. 13462306a36Sopenharmony_ci * 13562306a36Sopenharmony_ci * Try harder to allocate and fallback to vmalloc to lower the chance 13662306a36Sopenharmony_ci * of a failing mount. 13762306a36Sopenharmony_ci */ 13862306a36Sopenharmony_ci table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL); 13962306a36Sopenharmony_ci if (!table) 14062306a36Sopenharmony_ci return -ENOMEM; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci spin_lock_init(&table->cache_lock); 14362306a36Sopenharmony_ci INIT_LIST_HEAD(&table->stripe_cache); 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci h = table->table; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci for (i = 0; i < num_entries; i++) { 14862306a36Sopenharmony_ci cur = h + i; 14962306a36Sopenharmony_ci INIT_LIST_HEAD(&cur->hash_list); 15062306a36Sopenharmony_ci spin_lock_init(&cur->lock); 15162306a36Sopenharmony_ci } 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci x = cmpxchg(&info->stripe_hash_table, NULL, table); 15462306a36Sopenharmony_ci kvfree(x); 15562306a36Sopenharmony_ci return 0; 15662306a36Sopenharmony_ci} 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci/* 15962306a36Sopenharmony_ci * caching an rbio means to copy anything from the 16062306a36Sopenharmony_ci * bio_sectors array into the stripe_pages array. We 16162306a36Sopenharmony_ci * use the page uptodate bit in the stripe cache array 16262306a36Sopenharmony_ci * to indicate if it has valid data 16362306a36Sopenharmony_ci * 16462306a36Sopenharmony_ci * once the caching is done, we set the cache ready 16562306a36Sopenharmony_ci * bit. 16662306a36Sopenharmony_ci */ 16762306a36Sopenharmony_cistatic void cache_rbio_pages(struct btrfs_raid_bio *rbio) 16862306a36Sopenharmony_ci{ 16962306a36Sopenharmony_ci int i; 17062306a36Sopenharmony_ci int ret; 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci ret = alloc_rbio_pages(rbio); 17362306a36Sopenharmony_ci if (ret) 17462306a36Sopenharmony_ci return; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci for (i = 0; i < rbio->nr_sectors; i++) { 17762306a36Sopenharmony_ci /* Some range not covered by bio (partial write), skip it */ 17862306a36Sopenharmony_ci if (!rbio->bio_sectors[i].page) { 17962306a36Sopenharmony_ci /* 18062306a36Sopenharmony_ci * Even if the sector is not covered by bio, if it is 18162306a36Sopenharmony_ci * a data sector it should still be uptodate as it is 18262306a36Sopenharmony_ci * read from disk. 18362306a36Sopenharmony_ci */ 18462306a36Sopenharmony_ci if (i < rbio->nr_data * rbio->stripe_nsectors) 18562306a36Sopenharmony_ci ASSERT(rbio->stripe_sectors[i].uptodate); 18662306a36Sopenharmony_ci continue; 18762306a36Sopenharmony_ci } 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci ASSERT(rbio->stripe_sectors[i].page); 19062306a36Sopenharmony_ci memcpy_page(rbio->stripe_sectors[i].page, 19162306a36Sopenharmony_ci rbio->stripe_sectors[i].pgoff, 19262306a36Sopenharmony_ci rbio->bio_sectors[i].page, 19362306a36Sopenharmony_ci rbio->bio_sectors[i].pgoff, 19462306a36Sopenharmony_ci rbio->bioc->fs_info->sectorsize); 19562306a36Sopenharmony_ci rbio->stripe_sectors[i].uptodate = 1; 19662306a36Sopenharmony_ci } 19762306a36Sopenharmony_ci set_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 19862306a36Sopenharmony_ci} 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci/* 20162306a36Sopenharmony_ci * we hash on the first logical address of the stripe 20262306a36Sopenharmony_ci */ 20362306a36Sopenharmony_cistatic int rbio_bucket(struct btrfs_raid_bio *rbio) 20462306a36Sopenharmony_ci{ 20562306a36Sopenharmony_ci u64 num = rbio->bioc->full_stripe_logical; 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci /* 20862306a36Sopenharmony_ci * we shift down quite a bit. We're using byte 20962306a36Sopenharmony_ci * addressing, and most of the lower bits are zeros. 21062306a36Sopenharmony_ci * This tends to upset hash_64, and it consistently 21162306a36Sopenharmony_ci * returns just one or two different values. 21262306a36Sopenharmony_ci * 21362306a36Sopenharmony_ci * shifting off the lower bits fixes things. 21462306a36Sopenharmony_ci */ 21562306a36Sopenharmony_ci return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS); 21662306a36Sopenharmony_ci} 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_cistatic bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio, 21962306a36Sopenharmony_ci unsigned int page_nr) 22062306a36Sopenharmony_ci{ 22162306a36Sopenharmony_ci const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 22262306a36Sopenharmony_ci const u32 sectors_per_page = PAGE_SIZE / sectorsize; 22362306a36Sopenharmony_ci int i; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci ASSERT(page_nr < rbio->nr_pages); 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci for (i = sectors_per_page * page_nr; 22862306a36Sopenharmony_ci i < sectors_per_page * page_nr + sectors_per_page; 22962306a36Sopenharmony_ci i++) { 23062306a36Sopenharmony_ci if (!rbio->stripe_sectors[i].uptodate) 23162306a36Sopenharmony_ci return false; 23262306a36Sopenharmony_ci } 23362306a36Sopenharmony_ci return true; 23462306a36Sopenharmony_ci} 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci/* 23762306a36Sopenharmony_ci * Update the stripe_sectors[] array to use correct page and pgoff 23862306a36Sopenharmony_ci * 23962306a36Sopenharmony_ci * Should be called every time any page pointer in stripes_pages[] got modified. 24062306a36Sopenharmony_ci */ 24162306a36Sopenharmony_cistatic void index_stripe_sectors(struct btrfs_raid_bio *rbio) 24262306a36Sopenharmony_ci{ 24362306a36Sopenharmony_ci const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 24462306a36Sopenharmony_ci u32 offset; 24562306a36Sopenharmony_ci int i; 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci for (i = 0, offset = 0; i < rbio->nr_sectors; i++, offset += sectorsize) { 24862306a36Sopenharmony_ci int page_index = offset >> PAGE_SHIFT; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci ASSERT(page_index < rbio->nr_pages); 25162306a36Sopenharmony_ci rbio->stripe_sectors[i].page = rbio->stripe_pages[page_index]; 25262306a36Sopenharmony_ci rbio->stripe_sectors[i].pgoff = offset_in_page(offset); 25362306a36Sopenharmony_ci } 25462306a36Sopenharmony_ci} 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_cistatic void steal_rbio_page(struct btrfs_raid_bio *src, 25762306a36Sopenharmony_ci struct btrfs_raid_bio *dest, int page_nr) 25862306a36Sopenharmony_ci{ 25962306a36Sopenharmony_ci const u32 sectorsize = src->bioc->fs_info->sectorsize; 26062306a36Sopenharmony_ci const u32 sectors_per_page = PAGE_SIZE / sectorsize; 26162306a36Sopenharmony_ci int i; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci if (dest->stripe_pages[page_nr]) 26462306a36Sopenharmony_ci __free_page(dest->stripe_pages[page_nr]); 26562306a36Sopenharmony_ci dest->stripe_pages[page_nr] = src->stripe_pages[page_nr]; 26662306a36Sopenharmony_ci src->stripe_pages[page_nr] = NULL; 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci /* Also update the sector->uptodate bits. */ 26962306a36Sopenharmony_ci for (i = sectors_per_page * page_nr; 27062306a36Sopenharmony_ci i < sectors_per_page * page_nr + sectors_per_page; i++) 27162306a36Sopenharmony_ci dest->stripe_sectors[i].uptodate = true; 27262306a36Sopenharmony_ci} 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_cistatic bool is_data_stripe_page(struct btrfs_raid_bio *rbio, int page_nr) 27562306a36Sopenharmony_ci{ 27662306a36Sopenharmony_ci const int sector_nr = (page_nr << PAGE_SHIFT) >> 27762306a36Sopenharmony_ci rbio->bioc->fs_info->sectorsize_bits; 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci /* 28062306a36Sopenharmony_ci * We have ensured PAGE_SIZE is aligned with sectorsize, thus 28162306a36Sopenharmony_ci * we won't have a page which is half data half parity. 28262306a36Sopenharmony_ci * 28362306a36Sopenharmony_ci * Thus if the first sector of the page belongs to data stripes, then 28462306a36Sopenharmony_ci * the full page belongs to data stripes. 28562306a36Sopenharmony_ci */ 28662306a36Sopenharmony_ci return (sector_nr < rbio->nr_data * rbio->stripe_nsectors); 28762306a36Sopenharmony_ci} 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci/* 29062306a36Sopenharmony_ci * Stealing an rbio means taking all the uptodate pages from the stripe array 29162306a36Sopenharmony_ci * in the source rbio and putting them into the destination rbio. 29262306a36Sopenharmony_ci * 29362306a36Sopenharmony_ci * This will also update the involved stripe_sectors[] which are referring to 29462306a36Sopenharmony_ci * the old pages. 29562306a36Sopenharmony_ci */ 29662306a36Sopenharmony_cistatic void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest) 29762306a36Sopenharmony_ci{ 29862306a36Sopenharmony_ci int i; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags)) 30162306a36Sopenharmony_ci return; 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci for (i = 0; i < dest->nr_pages; i++) { 30462306a36Sopenharmony_ci struct page *p = src->stripe_pages[i]; 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci /* 30762306a36Sopenharmony_ci * We don't need to steal P/Q pages as they will always be 30862306a36Sopenharmony_ci * regenerated for RMW or full write anyway. 30962306a36Sopenharmony_ci */ 31062306a36Sopenharmony_ci if (!is_data_stripe_page(src, i)) 31162306a36Sopenharmony_ci continue; 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci /* 31462306a36Sopenharmony_ci * If @src already has RBIO_CACHE_READY_BIT, it should have 31562306a36Sopenharmony_ci * all data stripe pages present and uptodate. 31662306a36Sopenharmony_ci */ 31762306a36Sopenharmony_ci ASSERT(p); 31862306a36Sopenharmony_ci ASSERT(full_page_sectors_uptodate(src, i)); 31962306a36Sopenharmony_ci steal_rbio_page(src, dest, i); 32062306a36Sopenharmony_ci } 32162306a36Sopenharmony_ci index_stripe_sectors(dest); 32262306a36Sopenharmony_ci index_stripe_sectors(src); 32362306a36Sopenharmony_ci} 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci/* 32662306a36Sopenharmony_ci * merging means we take the bio_list from the victim and 32762306a36Sopenharmony_ci * splice it into the destination. The victim should 32862306a36Sopenharmony_ci * be discarded afterwards. 32962306a36Sopenharmony_ci * 33062306a36Sopenharmony_ci * must be called with dest->rbio_list_lock held 33162306a36Sopenharmony_ci */ 33262306a36Sopenharmony_cistatic void merge_rbio(struct btrfs_raid_bio *dest, 33362306a36Sopenharmony_ci struct btrfs_raid_bio *victim) 33462306a36Sopenharmony_ci{ 33562306a36Sopenharmony_ci bio_list_merge(&dest->bio_list, &victim->bio_list); 33662306a36Sopenharmony_ci dest->bio_list_bytes += victim->bio_list_bytes; 33762306a36Sopenharmony_ci /* Also inherit the bitmaps from @victim. */ 33862306a36Sopenharmony_ci bitmap_or(&dest->dbitmap, &victim->dbitmap, &dest->dbitmap, 33962306a36Sopenharmony_ci dest->stripe_nsectors); 34062306a36Sopenharmony_ci bio_list_init(&victim->bio_list); 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci/* 34462306a36Sopenharmony_ci * used to prune items that are in the cache. The caller 34562306a36Sopenharmony_ci * must hold the hash table lock. 34662306a36Sopenharmony_ci */ 34762306a36Sopenharmony_cistatic void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio) 34862306a36Sopenharmony_ci{ 34962306a36Sopenharmony_ci int bucket = rbio_bucket(rbio); 35062306a36Sopenharmony_ci struct btrfs_stripe_hash_table *table; 35162306a36Sopenharmony_ci struct btrfs_stripe_hash *h; 35262306a36Sopenharmony_ci int freeit = 0; 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci /* 35562306a36Sopenharmony_ci * check the bit again under the hash table lock. 35662306a36Sopenharmony_ci */ 35762306a36Sopenharmony_ci if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) 35862306a36Sopenharmony_ci return; 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci table = rbio->bioc->fs_info->stripe_hash_table; 36162306a36Sopenharmony_ci h = table->table + bucket; 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci /* hold the lock for the bucket because we may be 36462306a36Sopenharmony_ci * removing it from the hash table 36562306a36Sopenharmony_ci */ 36662306a36Sopenharmony_ci spin_lock(&h->lock); 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci /* 36962306a36Sopenharmony_ci * hold the lock for the bio list because we need 37062306a36Sopenharmony_ci * to make sure the bio list is empty 37162306a36Sopenharmony_ci */ 37262306a36Sopenharmony_ci spin_lock(&rbio->bio_list_lock); 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) { 37562306a36Sopenharmony_ci list_del_init(&rbio->stripe_cache); 37662306a36Sopenharmony_ci table->cache_size -= 1; 37762306a36Sopenharmony_ci freeit = 1; 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci /* if the bio list isn't empty, this rbio is 38062306a36Sopenharmony_ci * still involved in an IO. We take it out 38162306a36Sopenharmony_ci * of the cache list, and drop the ref that 38262306a36Sopenharmony_ci * was held for the list. 38362306a36Sopenharmony_ci * 38462306a36Sopenharmony_ci * If the bio_list was empty, we also remove 38562306a36Sopenharmony_ci * the rbio from the hash_table, and drop 38662306a36Sopenharmony_ci * the corresponding ref 38762306a36Sopenharmony_ci */ 38862306a36Sopenharmony_ci if (bio_list_empty(&rbio->bio_list)) { 38962306a36Sopenharmony_ci if (!list_empty(&rbio->hash_list)) { 39062306a36Sopenharmony_ci list_del_init(&rbio->hash_list); 39162306a36Sopenharmony_ci refcount_dec(&rbio->refs); 39262306a36Sopenharmony_ci BUG_ON(!list_empty(&rbio->plug_list)); 39362306a36Sopenharmony_ci } 39462306a36Sopenharmony_ci } 39562306a36Sopenharmony_ci } 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 39862306a36Sopenharmony_ci spin_unlock(&h->lock); 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci if (freeit) 40162306a36Sopenharmony_ci free_raid_bio(rbio); 40262306a36Sopenharmony_ci} 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci/* 40562306a36Sopenharmony_ci * prune a given rbio from the cache 40662306a36Sopenharmony_ci */ 40762306a36Sopenharmony_cistatic void remove_rbio_from_cache(struct btrfs_raid_bio *rbio) 40862306a36Sopenharmony_ci{ 40962306a36Sopenharmony_ci struct btrfs_stripe_hash_table *table; 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) 41262306a36Sopenharmony_ci return; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci table = rbio->bioc->fs_info->stripe_hash_table; 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci spin_lock(&table->cache_lock); 41762306a36Sopenharmony_ci __remove_rbio_from_cache(rbio); 41862306a36Sopenharmony_ci spin_unlock(&table->cache_lock); 41962306a36Sopenharmony_ci} 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci/* 42262306a36Sopenharmony_ci * remove everything in the cache 42362306a36Sopenharmony_ci */ 42462306a36Sopenharmony_cistatic void btrfs_clear_rbio_cache(struct btrfs_fs_info *info) 42562306a36Sopenharmony_ci{ 42662306a36Sopenharmony_ci struct btrfs_stripe_hash_table *table; 42762306a36Sopenharmony_ci struct btrfs_raid_bio *rbio; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci table = info->stripe_hash_table; 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci spin_lock(&table->cache_lock); 43262306a36Sopenharmony_ci while (!list_empty(&table->stripe_cache)) { 43362306a36Sopenharmony_ci rbio = list_entry(table->stripe_cache.next, 43462306a36Sopenharmony_ci struct btrfs_raid_bio, 43562306a36Sopenharmony_ci stripe_cache); 43662306a36Sopenharmony_ci __remove_rbio_from_cache(rbio); 43762306a36Sopenharmony_ci } 43862306a36Sopenharmony_ci spin_unlock(&table->cache_lock); 43962306a36Sopenharmony_ci} 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci/* 44262306a36Sopenharmony_ci * remove all cached entries and free the hash table 44362306a36Sopenharmony_ci * used by unmount 44462306a36Sopenharmony_ci */ 44562306a36Sopenharmony_civoid btrfs_free_stripe_hash_table(struct btrfs_fs_info *info) 44662306a36Sopenharmony_ci{ 44762306a36Sopenharmony_ci if (!info->stripe_hash_table) 44862306a36Sopenharmony_ci return; 44962306a36Sopenharmony_ci btrfs_clear_rbio_cache(info); 45062306a36Sopenharmony_ci kvfree(info->stripe_hash_table); 45162306a36Sopenharmony_ci info->stripe_hash_table = NULL; 45262306a36Sopenharmony_ci} 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci/* 45562306a36Sopenharmony_ci * insert an rbio into the stripe cache. It 45662306a36Sopenharmony_ci * must have already been prepared by calling 45762306a36Sopenharmony_ci * cache_rbio_pages 45862306a36Sopenharmony_ci * 45962306a36Sopenharmony_ci * If this rbio was already cached, it gets 46062306a36Sopenharmony_ci * moved to the front of the lru. 46162306a36Sopenharmony_ci * 46262306a36Sopenharmony_ci * If the size of the rbio cache is too big, we 46362306a36Sopenharmony_ci * prune an item. 46462306a36Sopenharmony_ci */ 46562306a36Sopenharmony_cistatic void cache_rbio(struct btrfs_raid_bio *rbio) 46662306a36Sopenharmony_ci{ 46762306a36Sopenharmony_ci struct btrfs_stripe_hash_table *table; 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags)) 47062306a36Sopenharmony_ci return; 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci table = rbio->bioc->fs_info->stripe_hash_table; 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci spin_lock(&table->cache_lock); 47562306a36Sopenharmony_ci spin_lock(&rbio->bio_list_lock); 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci /* bump our ref if we were not in the list before */ 47862306a36Sopenharmony_ci if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags)) 47962306a36Sopenharmony_ci refcount_inc(&rbio->refs); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci if (!list_empty(&rbio->stripe_cache)){ 48262306a36Sopenharmony_ci list_move(&rbio->stripe_cache, &table->stripe_cache); 48362306a36Sopenharmony_ci } else { 48462306a36Sopenharmony_ci list_add(&rbio->stripe_cache, &table->stripe_cache); 48562306a36Sopenharmony_ci table->cache_size += 1; 48662306a36Sopenharmony_ci } 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci if (table->cache_size > RBIO_CACHE_SIZE) { 49162306a36Sopenharmony_ci struct btrfs_raid_bio *found; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci found = list_entry(table->stripe_cache.prev, 49462306a36Sopenharmony_ci struct btrfs_raid_bio, 49562306a36Sopenharmony_ci stripe_cache); 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci if (found != rbio) 49862306a36Sopenharmony_ci __remove_rbio_from_cache(found); 49962306a36Sopenharmony_ci } 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci spin_unlock(&table->cache_lock); 50262306a36Sopenharmony_ci} 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci/* 50562306a36Sopenharmony_ci * helper function to run the xor_blocks api. It is only 50662306a36Sopenharmony_ci * able to do MAX_XOR_BLOCKS at a time, so we need to 50762306a36Sopenharmony_ci * loop through. 50862306a36Sopenharmony_ci */ 50962306a36Sopenharmony_cistatic void run_xor(void **pages, int src_cnt, ssize_t len) 51062306a36Sopenharmony_ci{ 51162306a36Sopenharmony_ci int src_off = 0; 51262306a36Sopenharmony_ci int xor_src_cnt = 0; 51362306a36Sopenharmony_ci void *dest = pages[src_cnt]; 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci while(src_cnt > 0) { 51662306a36Sopenharmony_ci xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); 51762306a36Sopenharmony_ci xor_blocks(xor_src_cnt, len, dest, pages + src_off); 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci src_cnt -= xor_src_cnt; 52062306a36Sopenharmony_ci src_off += xor_src_cnt; 52162306a36Sopenharmony_ci } 52262306a36Sopenharmony_ci} 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci/* 52562306a36Sopenharmony_ci * Returns true if the bio list inside this rbio covers an entire stripe (no 52662306a36Sopenharmony_ci * rmw required). 52762306a36Sopenharmony_ci */ 52862306a36Sopenharmony_cistatic int rbio_is_full(struct btrfs_raid_bio *rbio) 52962306a36Sopenharmony_ci{ 53062306a36Sopenharmony_ci unsigned long size = rbio->bio_list_bytes; 53162306a36Sopenharmony_ci int ret = 1; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci spin_lock(&rbio->bio_list_lock); 53462306a36Sopenharmony_ci if (size != rbio->nr_data * BTRFS_STRIPE_LEN) 53562306a36Sopenharmony_ci ret = 0; 53662306a36Sopenharmony_ci BUG_ON(size > rbio->nr_data * BTRFS_STRIPE_LEN); 53762306a36Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci return ret; 54062306a36Sopenharmony_ci} 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci/* 54362306a36Sopenharmony_ci * returns 1 if it is safe to merge two rbios together. 54462306a36Sopenharmony_ci * The merging is safe if the two rbios correspond to 54562306a36Sopenharmony_ci * the same stripe and if they are both going in the same 54662306a36Sopenharmony_ci * direction (read vs write), and if neither one is 54762306a36Sopenharmony_ci * locked for final IO 54862306a36Sopenharmony_ci * 54962306a36Sopenharmony_ci * The caller is responsible for locking such that 55062306a36Sopenharmony_ci * rmw_locked is safe to test 55162306a36Sopenharmony_ci */ 55262306a36Sopenharmony_cistatic int rbio_can_merge(struct btrfs_raid_bio *last, 55362306a36Sopenharmony_ci struct btrfs_raid_bio *cur) 55462306a36Sopenharmony_ci{ 55562306a36Sopenharmony_ci if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) || 55662306a36Sopenharmony_ci test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) 55762306a36Sopenharmony_ci return 0; 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci /* 56062306a36Sopenharmony_ci * we can't merge with cached rbios, since the 56162306a36Sopenharmony_ci * idea is that when we merge the destination 56262306a36Sopenharmony_ci * rbio is going to run our IO for us. We can 56362306a36Sopenharmony_ci * steal from cached rbios though, other functions 56462306a36Sopenharmony_ci * handle that. 56562306a36Sopenharmony_ci */ 56662306a36Sopenharmony_ci if (test_bit(RBIO_CACHE_BIT, &last->flags) || 56762306a36Sopenharmony_ci test_bit(RBIO_CACHE_BIT, &cur->flags)) 56862306a36Sopenharmony_ci return 0; 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci if (last->bioc->full_stripe_logical != cur->bioc->full_stripe_logical) 57162306a36Sopenharmony_ci return 0; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci /* we can't merge with different operations */ 57462306a36Sopenharmony_ci if (last->operation != cur->operation) 57562306a36Sopenharmony_ci return 0; 57662306a36Sopenharmony_ci /* 57762306a36Sopenharmony_ci * We've need read the full stripe from the drive. 57862306a36Sopenharmony_ci * check and repair the parity and write the new results. 57962306a36Sopenharmony_ci * 58062306a36Sopenharmony_ci * We're not allowed to add any new bios to the 58162306a36Sopenharmony_ci * bio list here, anyone else that wants to 58262306a36Sopenharmony_ci * change this stripe needs to do their own rmw. 58362306a36Sopenharmony_ci */ 58462306a36Sopenharmony_ci if (last->operation == BTRFS_RBIO_PARITY_SCRUB) 58562306a36Sopenharmony_ci return 0; 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci if (last->operation == BTRFS_RBIO_READ_REBUILD) 58862306a36Sopenharmony_ci return 0; 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci return 1; 59162306a36Sopenharmony_ci} 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_cistatic unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio, 59462306a36Sopenharmony_ci unsigned int stripe_nr, 59562306a36Sopenharmony_ci unsigned int sector_nr) 59662306a36Sopenharmony_ci{ 59762306a36Sopenharmony_ci ASSERT(stripe_nr < rbio->real_stripes); 59862306a36Sopenharmony_ci ASSERT(sector_nr < rbio->stripe_nsectors); 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci return stripe_nr * rbio->stripe_nsectors + sector_nr; 60162306a36Sopenharmony_ci} 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci/* Return a sector from rbio->stripe_sectors, not from the bio list */ 60462306a36Sopenharmony_cistatic struct sector_ptr *rbio_stripe_sector(const struct btrfs_raid_bio *rbio, 60562306a36Sopenharmony_ci unsigned int stripe_nr, 60662306a36Sopenharmony_ci unsigned int sector_nr) 60762306a36Sopenharmony_ci{ 60862306a36Sopenharmony_ci return &rbio->stripe_sectors[rbio_stripe_sector_index(rbio, stripe_nr, 60962306a36Sopenharmony_ci sector_nr)]; 61062306a36Sopenharmony_ci} 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci/* Grab a sector inside P stripe */ 61362306a36Sopenharmony_cistatic struct sector_ptr *rbio_pstripe_sector(const struct btrfs_raid_bio *rbio, 61462306a36Sopenharmony_ci unsigned int sector_nr) 61562306a36Sopenharmony_ci{ 61662306a36Sopenharmony_ci return rbio_stripe_sector(rbio, rbio->nr_data, sector_nr); 61762306a36Sopenharmony_ci} 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci/* Grab a sector inside Q stripe, return NULL if not RAID6 */ 62062306a36Sopenharmony_cistatic struct sector_ptr *rbio_qstripe_sector(const struct btrfs_raid_bio *rbio, 62162306a36Sopenharmony_ci unsigned int sector_nr) 62262306a36Sopenharmony_ci{ 62362306a36Sopenharmony_ci if (rbio->nr_data + 1 == rbio->real_stripes) 62462306a36Sopenharmony_ci return NULL; 62562306a36Sopenharmony_ci return rbio_stripe_sector(rbio, rbio->nr_data + 1, sector_nr); 62662306a36Sopenharmony_ci} 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_ci/* 62962306a36Sopenharmony_ci * The first stripe in the table for a logical address 63062306a36Sopenharmony_ci * has the lock. rbios are added in one of three ways: 63162306a36Sopenharmony_ci * 63262306a36Sopenharmony_ci * 1) Nobody has the stripe locked yet. The rbio is given 63362306a36Sopenharmony_ci * the lock and 0 is returned. The caller must start the IO 63462306a36Sopenharmony_ci * themselves. 63562306a36Sopenharmony_ci * 63662306a36Sopenharmony_ci * 2) Someone has the stripe locked, but we're able to merge 63762306a36Sopenharmony_ci * with the lock owner. The rbio is freed and the IO will 63862306a36Sopenharmony_ci * start automatically along with the existing rbio. 1 is returned. 63962306a36Sopenharmony_ci * 64062306a36Sopenharmony_ci * 3) Someone has the stripe locked, but we're not able to merge. 64162306a36Sopenharmony_ci * The rbio is added to the lock owner's plug list, or merged into 64262306a36Sopenharmony_ci * an rbio already on the plug list. When the lock owner unlocks, 64362306a36Sopenharmony_ci * the next rbio on the list is run and the IO is started automatically. 64462306a36Sopenharmony_ci * 1 is returned 64562306a36Sopenharmony_ci * 64662306a36Sopenharmony_ci * If we return 0, the caller still owns the rbio and must continue with 64762306a36Sopenharmony_ci * IO submission. If we return 1, the caller must assume the rbio has 64862306a36Sopenharmony_ci * already been freed. 64962306a36Sopenharmony_ci */ 65062306a36Sopenharmony_cistatic noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) 65162306a36Sopenharmony_ci{ 65262306a36Sopenharmony_ci struct btrfs_stripe_hash *h; 65362306a36Sopenharmony_ci struct btrfs_raid_bio *cur; 65462306a36Sopenharmony_ci struct btrfs_raid_bio *pending; 65562306a36Sopenharmony_ci struct btrfs_raid_bio *freeit = NULL; 65662306a36Sopenharmony_ci struct btrfs_raid_bio *cache_drop = NULL; 65762306a36Sopenharmony_ci int ret = 0; 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio); 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci spin_lock(&h->lock); 66262306a36Sopenharmony_ci list_for_each_entry(cur, &h->hash_list, hash_list) { 66362306a36Sopenharmony_ci if (cur->bioc->full_stripe_logical != rbio->bioc->full_stripe_logical) 66462306a36Sopenharmony_ci continue; 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci spin_lock(&cur->bio_list_lock); 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci /* Can we steal this cached rbio's pages? */ 66962306a36Sopenharmony_ci if (bio_list_empty(&cur->bio_list) && 67062306a36Sopenharmony_ci list_empty(&cur->plug_list) && 67162306a36Sopenharmony_ci test_bit(RBIO_CACHE_BIT, &cur->flags) && 67262306a36Sopenharmony_ci !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) { 67362306a36Sopenharmony_ci list_del_init(&cur->hash_list); 67462306a36Sopenharmony_ci refcount_dec(&cur->refs); 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci steal_rbio(cur, rbio); 67762306a36Sopenharmony_ci cache_drop = cur; 67862306a36Sopenharmony_ci spin_unlock(&cur->bio_list_lock); 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ci goto lockit; 68162306a36Sopenharmony_ci } 68262306a36Sopenharmony_ci 68362306a36Sopenharmony_ci /* Can we merge into the lock owner? */ 68462306a36Sopenharmony_ci if (rbio_can_merge(cur, rbio)) { 68562306a36Sopenharmony_ci merge_rbio(cur, rbio); 68662306a36Sopenharmony_ci spin_unlock(&cur->bio_list_lock); 68762306a36Sopenharmony_ci freeit = rbio; 68862306a36Sopenharmony_ci ret = 1; 68962306a36Sopenharmony_ci goto out; 69062306a36Sopenharmony_ci } 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci /* 69462306a36Sopenharmony_ci * We couldn't merge with the running rbio, see if we can merge 69562306a36Sopenharmony_ci * with the pending ones. We don't have to check for rmw_locked 69662306a36Sopenharmony_ci * because there is no way they are inside finish_rmw right now 69762306a36Sopenharmony_ci */ 69862306a36Sopenharmony_ci list_for_each_entry(pending, &cur->plug_list, plug_list) { 69962306a36Sopenharmony_ci if (rbio_can_merge(pending, rbio)) { 70062306a36Sopenharmony_ci merge_rbio(pending, rbio); 70162306a36Sopenharmony_ci spin_unlock(&cur->bio_list_lock); 70262306a36Sopenharmony_ci freeit = rbio; 70362306a36Sopenharmony_ci ret = 1; 70462306a36Sopenharmony_ci goto out; 70562306a36Sopenharmony_ci } 70662306a36Sopenharmony_ci } 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci /* 70962306a36Sopenharmony_ci * No merging, put us on the tail of the plug list, our rbio 71062306a36Sopenharmony_ci * will be started with the currently running rbio unlocks 71162306a36Sopenharmony_ci */ 71262306a36Sopenharmony_ci list_add_tail(&rbio->plug_list, &cur->plug_list); 71362306a36Sopenharmony_ci spin_unlock(&cur->bio_list_lock); 71462306a36Sopenharmony_ci ret = 1; 71562306a36Sopenharmony_ci goto out; 71662306a36Sopenharmony_ci } 71762306a36Sopenharmony_cilockit: 71862306a36Sopenharmony_ci refcount_inc(&rbio->refs); 71962306a36Sopenharmony_ci list_add(&rbio->hash_list, &h->hash_list); 72062306a36Sopenharmony_ciout: 72162306a36Sopenharmony_ci spin_unlock(&h->lock); 72262306a36Sopenharmony_ci if (cache_drop) 72362306a36Sopenharmony_ci remove_rbio_from_cache(cache_drop); 72462306a36Sopenharmony_ci if (freeit) 72562306a36Sopenharmony_ci free_raid_bio(freeit); 72662306a36Sopenharmony_ci return ret; 72762306a36Sopenharmony_ci} 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_cistatic void recover_rbio_work_locked(struct work_struct *work); 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci/* 73262306a36Sopenharmony_ci * called as rmw or parity rebuild is completed. If the plug list has more 73362306a36Sopenharmony_ci * rbios waiting for this stripe, the next one on the list will be started 73462306a36Sopenharmony_ci */ 73562306a36Sopenharmony_cistatic noinline void unlock_stripe(struct btrfs_raid_bio *rbio) 73662306a36Sopenharmony_ci{ 73762306a36Sopenharmony_ci int bucket; 73862306a36Sopenharmony_ci struct btrfs_stripe_hash *h; 73962306a36Sopenharmony_ci int keep_cache = 0; 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci bucket = rbio_bucket(rbio); 74262306a36Sopenharmony_ci h = rbio->bioc->fs_info->stripe_hash_table->table + bucket; 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci if (list_empty(&rbio->plug_list)) 74562306a36Sopenharmony_ci cache_rbio(rbio); 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci spin_lock(&h->lock); 74862306a36Sopenharmony_ci spin_lock(&rbio->bio_list_lock); 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci if (!list_empty(&rbio->hash_list)) { 75162306a36Sopenharmony_ci /* 75262306a36Sopenharmony_ci * if we're still cached and there is no other IO 75362306a36Sopenharmony_ci * to perform, just leave this rbio here for others 75462306a36Sopenharmony_ci * to steal from later 75562306a36Sopenharmony_ci */ 75662306a36Sopenharmony_ci if (list_empty(&rbio->plug_list) && 75762306a36Sopenharmony_ci test_bit(RBIO_CACHE_BIT, &rbio->flags)) { 75862306a36Sopenharmony_ci keep_cache = 1; 75962306a36Sopenharmony_ci clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 76062306a36Sopenharmony_ci BUG_ON(!bio_list_empty(&rbio->bio_list)); 76162306a36Sopenharmony_ci goto done; 76262306a36Sopenharmony_ci } 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci list_del_init(&rbio->hash_list); 76562306a36Sopenharmony_ci refcount_dec(&rbio->refs); 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci /* 76862306a36Sopenharmony_ci * we use the plug list to hold all the rbios 76962306a36Sopenharmony_ci * waiting for the chance to lock this stripe. 77062306a36Sopenharmony_ci * hand the lock over to one of them. 77162306a36Sopenharmony_ci */ 77262306a36Sopenharmony_ci if (!list_empty(&rbio->plug_list)) { 77362306a36Sopenharmony_ci struct btrfs_raid_bio *next; 77462306a36Sopenharmony_ci struct list_head *head = rbio->plug_list.next; 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci next = list_entry(head, struct btrfs_raid_bio, 77762306a36Sopenharmony_ci plug_list); 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci list_del_init(&rbio->plug_list); 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci list_add(&next->hash_list, &h->hash_list); 78262306a36Sopenharmony_ci refcount_inc(&next->refs); 78362306a36Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 78462306a36Sopenharmony_ci spin_unlock(&h->lock); 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci if (next->operation == BTRFS_RBIO_READ_REBUILD) { 78762306a36Sopenharmony_ci start_async_work(next, recover_rbio_work_locked); 78862306a36Sopenharmony_ci } else if (next->operation == BTRFS_RBIO_WRITE) { 78962306a36Sopenharmony_ci steal_rbio(rbio, next); 79062306a36Sopenharmony_ci start_async_work(next, rmw_rbio_work_locked); 79162306a36Sopenharmony_ci } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { 79262306a36Sopenharmony_ci steal_rbio(rbio, next); 79362306a36Sopenharmony_ci start_async_work(next, scrub_rbio_work_locked); 79462306a36Sopenharmony_ci } 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci goto done_nolock; 79762306a36Sopenharmony_ci } 79862306a36Sopenharmony_ci } 79962306a36Sopenharmony_cidone: 80062306a36Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 80162306a36Sopenharmony_ci spin_unlock(&h->lock); 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_cidone_nolock: 80462306a36Sopenharmony_ci if (!keep_cache) 80562306a36Sopenharmony_ci remove_rbio_from_cache(rbio); 80662306a36Sopenharmony_ci} 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_cistatic void rbio_endio_bio_list(struct bio *cur, blk_status_t err) 80962306a36Sopenharmony_ci{ 81062306a36Sopenharmony_ci struct bio *next; 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ci while (cur) { 81362306a36Sopenharmony_ci next = cur->bi_next; 81462306a36Sopenharmony_ci cur->bi_next = NULL; 81562306a36Sopenharmony_ci cur->bi_status = err; 81662306a36Sopenharmony_ci bio_endio(cur); 81762306a36Sopenharmony_ci cur = next; 81862306a36Sopenharmony_ci } 81962306a36Sopenharmony_ci} 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci/* 82262306a36Sopenharmony_ci * this frees the rbio and runs through all the bios in the 82362306a36Sopenharmony_ci * bio_list and calls end_io on them 82462306a36Sopenharmony_ci */ 82562306a36Sopenharmony_cistatic void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err) 82662306a36Sopenharmony_ci{ 82762306a36Sopenharmony_ci struct bio *cur = bio_list_get(&rbio->bio_list); 82862306a36Sopenharmony_ci struct bio *extra; 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci kfree(rbio->csum_buf); 83162306a36Sopenharmony_ci bitmap_free(rbio->csum_bitmap); 83262306a36Sopenharmony_ci rbio->csum_buf = NULL; 83362306a36Sopenharmony_ci rbio->csum_bitmap = NULL; 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_ci /* 83662306a36Sopenharmony_ci * Clear the data bitmap, as the rbio may be cached for later usage. 83762306a36Sopenharmony_ci * do this before before unlock_stripe() so there will be no new bio 83862306a36Sopenharmony_ci * for this bio. 83962306a36Sopenharmony_ci */ 84062306a36Sopenharmony_ci bitmap_clear(&rbio->dbitmap, 0, rbio->stripe_nsectors); 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci /* 84362306a36Sopenharmony_ci * At this moment, rbio->bio_list is empty, however since rbio does not 84462306a36Sopenharmony_ci * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the 84562306a36Sopenharmony_ci * hash list, rbio may be merged with others so that rbio->bio_list 84662306a36Sopenharmony_ci * becomes non-empty. 84762306a36Sopenharmony_ci * Once unlock_stripe() is done, rbio->bio_list will not be updated any 84862306a36Sopenharmony_ci * more and we can call bio_endio() on all queued bios. 84962306a36Sopenharmony_ci */ 85062306a36Sopenharmony_ci unlock_stripe(rbio); 85162306a36Sopenharmony_ci extra = bio_list_get(&rbio->bio_list); 85262306a36Sopenharmony_ci free_raid_bio(rbio); 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_ci rbio_endio_bio_list(cur, err); 85562306a36Sopenharmony_ci if (extra) 85662306a36Sopenharmony_ci rbio_endio_bio_list(extra, err); 85762306a36Sopenharmony_ci} 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci/* 86062306a36Sopenharmony_ci * Get a sector pointer specified by its @stripe_nr and @sector_nr. 86162306a36Sopenharmony_ci * 86262306a36Sopenharmony_ci * @rbio: The raid bio 86362306a36Sopenharmony_ci * @stripe_nr: Stripe number, valid range [0, real_stripe) 86462306a36Sopenharmony_ci * @sector_nr: Sector number inside the stripe, 86562306a36Sopenharmony_ci * valid range [0, stripe_nsectors) 86662306a36Sopenharmony_ci * @bio_list_only: Whether to use sectors inside the bio list only. 86762306a36Sopenharmony_ci * 86862306a36Sopenharmony_ci * The read/modify/write code wants to reuse the original bio page as much 86962306a36Sopenharmony_ci * as possible, and only use stripe_sectors as fallback. 87062306a36Sopenharmony_ci */ 87162306a36Sopenharmony_cistatic struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio, 87262306a36Sopenharmony_ci int stripe_nr, int sector_nr, 87362306a36Sopenharmony_ci bool bio_list_only) 87462306a36Sopenharmony_ci{ 87562306a36Sopenharmony_ci struct sector_ptr *sector; 87662306a36Sopenharmony_ci int index; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci ASSERT(stripe_nr >= 0 && stripe_nr < rbio->real_stripes); 87962306a36Sopenharmony_ci ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors); 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ci index = stripe_nr * rbio->stripe_nsectors + sector_nr; 88262306a36Sopenharmony_ci ASSERT(index >= 0 && index < rbio->nr_sectors); 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci spin_lock(&rbio->bio_list_lock); 88562306a36Sopenharmony_ci sector = &rbio->bio_sectors[index]; 88662306a36Sopenharmony_ci if (sector->page || bio_list_only) { 88762306a36Sopenharmony_ci /* Don't return sector without a valid page pointer */ 88862306a36Sopenharmony_ci if (!sector->page) 88962306a36Sopenharmony_ci sector = NULL; 89062306a36Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 89162306a36Sopenharmony_ci return sector; 89262306a36Sopenharmony_ci } 89362306a36Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci return &rbio->stripe_sectors[index]; 89662306a36Sopenharmony_ci} 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci/* 89962306a36Sopenharmony_ci * allocation and initial setup for the btrfs_raid_bio. Not 90062306a36Sopenharmony_ci * this does not allocate any pages for rbio->pages. 90162306a36Sopenharmony_ci */ 90262306a36Sopenharmony_cistatic struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, 90362306a36Sopenharmony_ci struct btrfs_io_context *bioc) 90462306a36Sopenharmony_ci{ 90562306a36Sopenharmony_ci const unsigned int real_stripes = bioc->num_stripes - bioc->replace_nr_stripes; 90662306a36Sopenharmony_ci const unsigned int stripe_npages = BTRFS_STRIPE_LEN >> PAGE_SHIFT; 90762306a36Sopenharmony_ci const unsigned int num_pages = stripe_npages * real_stripes; 90862306a36Sopenharmony_ci const unsigned int stripe_nsectors = 90962306a36Sopenharmony_ci BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits; 91062306a36Sopenharmony_ci const unsigned int num_sectors = stripe_nsectors * real_stripes; 91162306a36Sopenharmony_ci struct btrfs_raid_bio *rbio; 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci /* PAGE_SIZE must also be aligned to sectorsize for subpage support */ 91462306a36Sopenharmony_ci ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize)); 91562306a36Sopenharmony_ci /* 91662306a36Sopenharmony_ci * Our current stripe len should be fixed to 64k thus stripe_nsectors 91762306a36Sopenharmony_ci * (at most 16) should be no larger than BITS_PER_LONG. 91862306a36Sopenharmony_ci */ 91962306a36Sopenharmony_ci ASSERT(stripe_nsectors <= BITS_PER_LONG); 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci rbio = kzalloc(sizeof(*rbio), GFP_NOFS); 92262306a36Sopenharmony_ci if (!rbio) 92362306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 92462306a36Sopenharmony_ci rbio->stripe_pages = kcalloc(num_pages, sizeof(struct page *), 92562306a36Sopenharmony_ci GFP_NOFS); 92662306a36Sopenharmony_ci rbio->bio_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr), 92762306a36Sopenharmony_ci GFP_NOFS); 92862306a36Sopenharmony_ci rbio->stripe_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr), 92962306a36Sopenharmony_ci GFP_NOFS); 93062306a36Sopenharmony_ci rbio->finish_pointers = kcalloc(real_stripes, sizeof(void *), GFP_NOFS); 93162306a36Sopenharmony_ci rbio->error_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS); 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_ci if (!rbio->stripe_pages || !rbio->bio_sectors || !rbio->stripe_sectors || 93462306a36Sopenharmony_ci !rbio->finish_pointers || !rbio->error_bitmap) { 93562306a36Sopenharmony_ci free_raid_bio_pointers(rbio); 93662306a36Sopenharmony_ci kfree(rbio); 93762306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 93862306a36Sopenharmony_ci } 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_ci bio_list_init(&rbio->bio_list); 94162306a36Sopenharmony_ci init_waitqueue_head(&rbio->io_wait); 94262306a36Sopenharmony_ci INIT_LIST_HEAD(&rbio->plug_list); 94362306a36Sopenharmony_ci spin_lock_init(&rbio->bio_list_lock); 94462306a36Sopenharmony_ci INIT_LIST_HEAD(&rbio->stripe_cache); 94562306a36Sopenharmony_ci INIT_LIST_HEAD(&rbio->hash_list); 94662306a36Sopenharmony_ci btrfs_get_bioc(bioc); 94762306a36Sopenharmony_ci rbio->bioc = bioc; 94862306a36Sopenharmony_ci rbio->nr_pages = num_pages; 94962306a36Sopenharmony_ci rbio->nr_sectors = num_sectors; 95062306a36Sopenharmony_ci rbio->real_stripes = real_stripes; 95162306a36Sopenharmony_ci rbio->stripe_npages = stripe_npages; 95262306a36Sopenharmony_ci rbio->stripe_nsectors = stripe_nsectors; 95362306a36Sopenharmony_ci refcount_set(&rbio->refs, 1); 95462306a36Sopenharmony_ci atomic_set(&rbio->stripes_pending, 0); 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci ASSERT(btrfs_nr_parity_stripes(bioc->map_type)); 95762306a36Sopenharmony_ci rbio->nr_data = real_stripes - btrfs_nr_parity_stripes(bioc->map_type); 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_ci return rbio; 96062306a36Sopenharmony_ci} 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci/* allocate pages for all the stripes in the bio, including parity */ 96362306a36Sopenharmony_cistatic int alloc_rbio_pages(struct btrfs_raid_bio *rbio) 96462306a36Sopenharmony_ci{ 96562306a36Sopenharmony_ci int ret; 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages); 96862306a36Sopenharmony_ci if (ret < 0) 96962306a36Sopenharmony_ci return ret; 97062306a36Sopenharmony_ci /* Mapping all sectors */ 97162306a36Sopenharmony_ci index_stripe_sectors(rbio); 97262306a36Sopenharmony_ci return 0; 97362306a36Sopenharmony_ci} 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci/* only allocate pages for p/q stripes */ 97662306a36Sopenharmony_cistatic int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio) 97762306a36Sopenharmony_ci{ 97862306a36Sopenharmony_ci const int data_pages = rbio->nr_data * rbio->stripe_npages; 97962306a36Sopenharmony_ci int ret; 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages, 98262306a36Sopenharmony_ci rbio->stripe_pages + data_pages); 98362306a36Sopenharmony_ci if (ret < 0) 98462306a36Sopenharmony_ci return ret; 98562306a36Sopenharmony_ci 98662306a36Sopenharmony_ci index_stripe_sectors(rbio); 98762306a36Sopenharmony_ci return 0; 98862306a36Sopenharmony_ci} 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci/* 99162306a36Sopenharmony_ci * Return the total number of errors found in the vertical stripe of @sector_nr. 99262306a36Sopenharmony_ci * 99362306a36Sopenharmony_ci * @faila and @failb will also be updated to the first and second stripe 99462306a36Sopenharmony_ci * number of the errors. 99562306a36Sopenharmony_ci */ 99662306a36Sopenharmony_cistatic int get_rbio_veritical_errors(struct btrfs_raid_bio *rbio, int sector_nr, 99762306a36Sopenharmony_ci int *faila, int *failb) 99862306a36Sopenharmony_ci{ 99962306a36Sopenharmony_ci int stripe_nr; 100062306a36Sopenharmony_ci int found_errors = 0; 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci if (faila || failb) { 100362306a36Sopenharmony_ci /* 100462306a36Sopenharmony_ci * Both @faila and @failb should be valid pointers if any of 100562306a36Sopenharmony_ci * them is specified. 100662306a36Sopenharmony_ci */ 100762306a36Sopenharmony_ci ASSERT(faila && failb); 100862306a36Sopenharmony_ci *faila = -1; 100962306a36Sopenharmony_ci *failb = -1; 101062306a36Sopenharmony_ci } 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) { 101362306a36Sopenharmony_ci int total_sector_nr = stripe_nr * rbio->stripe_nsectors + sector_nr; 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci if (test_bit(total_sector_nr, rbio->error_bitmap)) { 101662306a36Sopenharmony_ci found_errors++; 101762306a36Sopenharmony_ci if (faila) { 101862306a36Sopenharmony_ci /* Update faila and failb. */ 101962306a36Sopenharmony_ci if (*faila < 0) 102062306a36Sopenharmony_ci *faila = stripe_nr; 102162306a36Sopenharmony_ci else if (*failb < 0) 102262306a36Sopenharmony_ci *failb = stripe_nr; 102362306a36Sopenharmony_ci } 102462306a36Sopenharmony_ci } 102562306a36Sopenharmony_ci } 102662306a36Sopenharmony_ci return found_errors; 102762306a36Sopenharmony_ci} 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci/* 103062306a36Sopenharmony_ci * Add a single sector @sector into our list of bios for IO. 103162306a36Sopenharmony_ci * 103262306a36Sopenharmony_ci * Return 0 if everything went well. 103362306a36Sopenharmony_ci * Return <0 for error. 103462306a36Sopenharmony_ci */ 103562306a36Sopenharmony_cistatic int rbio_add_io_sector(struct btrfs_raid_bio *rbio, 103662306a36Sopenharmony_ci struct bio_list *bio_list, 103762306a36Sopenharmony_ci struct sector_ptr *sector, 103862306a36Sopenharmony_ci unsigned int stripe_nr, 103962306a36Sopenharmony_ci unsigned int sector_nr, 104062306a36Sopenharmony_ci enum req_op op) 104162306a36Sopenharmony_ci{ 104262306a36Sopenharmony_ci const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 104362306a36Sopenharmony_ci struct bio *last = bio_list->tail; 104462306a36Sopenharmony_ci int ret; 104562306a36Sopenharmony_ci struct bio *bio; 104662306a36Sopenharmony_ci struct btrfs_io_stripe *stripe; 104762306a36Sopenharmony_ci u64 disk_start; 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ci /* 105062306a36Sopenharmony_ci * Note: here stripe_nr has taken device replace into consideration, 105162306a36Sopenharmony_ci * thus it can be larger than rbio->real_stripe. 105262306a36Sopenharmony_ci * So here we check against bioc->num_stripes, not rbio->real_stripes. 105362306a36Sopenharmony_ci */ 105462306a36Sopenharmony_ci ASSERT(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes); 105562306a36Sopenharmony_ci ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors); 105662306a36Sopenharmony_ci ASSERT(sector->page); 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci stripe = &rbio->bioc->stripes[stripe_nr]; 105962306a36Sopenharmony_ci disk_start = stripe->physical + sector_nr * sectorsize; 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci /* if the device is missing, just fail this stripe */ 106262306a36Sopenharmony_ci if (!stripe->dev->bdev) { 106362306a36Sopenharmony_ci int found_errors; 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ci set_bit(stripe_nr * rbio->stripe_nsectors + sector_nr, 106662306a36Sopenharmony_ci rbio->error_bitmap); 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ci /* Check if we have reached tolerance early. */ 106962306a36Sopenharmony_ci found_errors = get_rbio_veritical_errors(rbio, sector_nr, 107062306a36Sopenharmony_ci NULL, NULL); 107162306a36Sopenharmony_ci if (found_errors > rbio->bioc->max_errors) 107262306a36Sopenharmony_ci return -EIO; 107362306a36Sopenharmony_ci return 0; 107462306a36Sopenharmony_ci } 107562306a36Sopenharmony_ci 107662306a36Sopenharmony_ci /* see if we can add this page onto our existing bio */ 107762306a36Sopenharmony_ci if (last) { 107862306a36Sopenharmony_ci u64 last_end = last->bi_iter.bi_sector << SECTOR_SHIFT; 107962306a36Sopenharmony_ci last_end += last->bi_iter.bi_size; 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci /* 108262306a36Sopenharmony_ci * we can't merge these if they are from different 108362306a36Sopenharmony_ci * devices or if they are not contiguous 108462306a36Sopenharmony_ci */ 108562306a36Sopenharmony_ci if (last_end == disk_start && !last->bi_status && 108662306a36Sopenharmony_ci last->bi_bdev == stripe->dev->bdev) { 108762306a36Sopenharmony_ci ret = bio_add_page(last, sector->page, sectorsize, 108862306a36Sopenharmony_ci sector->pgoff); 108962306a36Sopenharmony_ci if (ret == sectorsize) 109062306a36Sopenharmony_ci return 0; 109162306a36Sopenharmony_ci } 109262306a36Sopenharmony_ci } 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ci /* put a new bio on the list */ 109562306a36Sopenharmony_ci bio = bio_alloc(stripe->dev->bdev, 109662306a36Sopenharmony_ci max(BTRFS_STRIPE_LEN >> PAGE_SHIFT, 1), 109762306a36Sopenharmony_ci op, GFP_NOFS); 109862306a36Sopenharmony_ci bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT; 109962306a36Sopenharmony_ci bio->bi_private = rbio; 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_ci __bio_add_page(bio, sector->page, sectorsize, sector->pgoff); 110262306a36Sopenharmony_ci bio_list_add(bio_list, bio); 110362306a36Sopenharmony_ci return 0; 110462306a36Sopenharmony_ci} 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_cistatic void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio) 110762306a36Sopenharmony_ci{ 110862306a36Sopenharmony_ci const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 110962306a36Sopenharmony_ci struct bio_vec bvec; 111062306a36Sopenharmony_ci struct bvec_iter iter; 111162306a36Sopenharmony_ci u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - 111262306a36Sopenharmony_ci rbio->bioc->full_stripe_logical; 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci bio_for_each_segment(bvec, bio, iter) { 111562306a36Sopenharmony_ci u32 bvec_offset; 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci for (bvec_offset = 0; bvec_offset < bvec.bv_len; 111862306a36Sopenharmony_ci bvec_offset += sectorsize, offset += sectorsize) { 111962306a36Sopenharmony_ci int index = offset / sectorsize; 112062306a36Sopenharmony_ci struct sector_ptr *sector = &rbio->bio_sectors[index]; 112162306a36Sopenharmony_ci 112262306a36Sopenharmony_ci sector->page = bvec.bv_page; 112362306a36Sopenharmony_ci sector->pgoff = bvec.bv_offset + bvec_offset; 112462306a36Sopenharmony_ci ASSERT(sector->pgoff < PAGE_SIZE); 112562306a36Sopenharmony_ci } 112662306a36Sopenharmony_ci } 112762306a36Sopenharmony_ci} 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci/* 113062306a36Sopenharmony_ci * helper function to walk our bio list and populate the bio_pages array with 113162306a36Sopenharmony_ci * the result. This seems expensive, but it is faster than constantly 113262306a36Sopenharmony_ci * searching through the bio list as we setup the IO in finish_rmw or stripe 113362306a36Sopenharmony_ci * reconstruction. 113462306a36Sopenharmony_ci * 113562306a36Sopenharmony_ci * This must be called before you trust the answers from page_in_rbio 113662306a36Sopenharmony_ci */ 113762306a36Sopenharmony_cistatic void index_rbio_pages(struct btrfs_raid_bio *rbio) 113862306a36Sopenharmony_ci{ 113962306a36Sopenharmony_ci struct bio *bio; 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci spin_lock(&rbio->bio_list_lock); 114262306a36Sopenharmony_ci bio_list_for_each(bio, &rbio->bio_list) 114362306a36Sopenharmony_ci index_one_bio(rbio, bio); 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 114662306a36Sopenharmony_ci} 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_cistatic void bio_get_trace_info(struct btrfs_raid_bio *rbio, struct bio *bio, 114962306a36Sopenharmony_ci struct raid56_bio_trace_info *trace_info) 115062306a36Sopenharmony_ci{ 115162306a36Sopenharmony_ci const struct btrfs_io_context *bioc = rbio->bioc; 115262306a36Sopenharmony_ci int i; 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci ASSERT(bioc); 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_ci /* We rely on bio->bi_bdev to find the stripe number. */ 115762306a36Sopenharmony_ci if (!bio->bi_bdev) 115862306a36Sopenharmony_ci goto not_found; 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci for (i = 0; i < bioc->num_stripes; i++) { 116162306a36Sopenharmony_ci if (bio->bi_bdev != bioc->stripes[i].dev->bdev) 116262306a36Sopenharmony_ci continue; 116362306a36Sopenharmony_ci trace_info->stripe_nr = i; 116462306a36Sopenharmony_ci trace_info->devid = bioc->stripes[i].dev->devid; 116562306a36Sopenharmony_ci trace_info->offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - 116662306a36Sopenharmony_ci bioc->stripes[i].physical; 116762306a36Sopenharmony_ci return; 116862306a36Sopenharmony_ci } 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_cinot_found: 117162306a36Sopenharmony_ci trace_info->devid = -1; 117262306a36Sopenharmony_ci trace_info->offset = -1; 117362306a36Sopenharmony_ci trace_info->stripe_nr = -1; 117462306a36Sopenharmony_ci} 117562306a36Sopenharmony_ci 117662306a36Sopenharmony_cistatic inline void bio_list_put(struct bio_list *bio_list) 117762306a36Sopenharmony_ci{ 117862306a36Sopenharmony_ci struct bio *bio; 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_ci while ((bio = bio_list_pop(bio_list))) 118162306a36Sopenharmony_ci bio_put(bio); 118262306a36Sopenharmony_ci} 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci/* Generate PQ for one vertical stripe. */ 118562306a36Sopenharmony_cistatic void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr) 118662306a36Sopenharmony_ci{ 118762306a36Sopenharmony_ci void **pointers = rbio->finish_pointers; 118862306a36Sopenharmony_ci const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 118962306a36Sopenharmony_ci struct sector_ptr *sector; 119062306a36Sopenharmony_ci int stripe; 119162306a36Sopenharmony_ci const bool has_qstripe = rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6; 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci /* First collect one sector from each data stripe */ 119462306a36Sopenharmony_ci for (stripe = 0; stripe < rbio->nr_data; stripe++) { 119562306a36Sopenharmony_ci sector = sector_in_rbio(rbio, stripe, sectornr, 0); 119662306a36Sopenharmony_ci pointers[stripe] = kmap_local_page(sector->page) + 119762306a36Sopenharmony_ci sector->pgoff; 119862306a36Sopenharmony_ci } 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ci /* Then add the parity stripe */ 120162306a36Sopenharmony_ci sector = rbio_pstripe_sector(rbio, sectornr); 120262306a36Sopenharmony_ci sector->uptodate = 1; 120362306a36Sopenharmony_ci pointers[stripe++] = kmap_local_page(sector->page) + sector->pgoff; 120462306a36Sopenharmony_ci 120562306a36Sopenharmony_ci if (has_qstripe) { 120662306a36Sopenharmony_ci /* 120762306a36Sopenharmony_ci * RAID6, add the qstripe and call the library function 120862306a36Sopenharmony_ci * to fill in our p/q 120962306a36Sopenharmony_ci */ 121062306a36Sopenharmony_ci sector = rbio_qstripe_sector(rbio, sectornr); 121162306a36Sopenharmony_ci sector->uptodate = 1; 121262306a36Sopenharmony_ci pointers[stripe++] = kmap_local_page(sector->page) + 121362306a36Sopenharmony_ci sector->pgoff; 121462306a36Sopenharmony_ci 121562306a36Sopenharmony_ci raid6_call.gen_syndrome(rbio->real_stripes, sectorsize, 121662306a36Sopenharmony_ci pointers); 121762306a36Sopenharmony_ci } else { 121862306a36Sopenharmony_ci /* raid5 */ 121962306a36Sopenharmony_ci memcpy(pointers[rbio->nr_data], pointers[0], sectorsize); 122062306a36Sopenharmony_ci run_xor(pointers + 1, rbio->nr_data - 1, sectorsize); 122162306a36Sopenharmony_ci } 122262306a36Sopenharmony_ci for (stripe = stripe - 1; stripe >= 0; stripe--) 122362306a36Sopenharmony_ci kunmap_local(pointers[stripe]); 122462306a36Sopenharmony_ci} 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_cistatic int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio, 122762306a36Sopenharmony_ci struct bio_list *bio_list) 122862306a36Sopenharmony_ci{ 122962306a36Sopenharmony_ci /* The total sector number inside the full stripe. */ 123062306a36Sopenharmony_ci int total_sector_nr; 123162306a36Sopenharmony_ci int sectornr; 123262306a36Sopenharmony_ci int stripe; 123362306a36Sopenharmony_ci int ret; 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci ASSERT(bio_list_size(bio_list) == 0); 123662306a36Sopenharmony_ci 123762306a36Sopenharmony_ci /* We should have at least one data sector. */ 123862306a36Sopenharmony_ci ASSERT(bitmap_weight(&rbio->dbitmap, rbio->stripe_nsectors)); 123962306a36Sopenharmony_ci 124062306a36Sopenharmony_ci /* 124162306a36Sopenharmony_ci * Reset errors, as we may have errors inherited from from degraded 124262306a36Sopenharmony_ci * write. 124362306a36Sopenharmony_ci */ 124462306a36Sopenharmony_ci bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci /* 124762306a36Sopenharmony_ci * Start assembly. Make bios for everything from the higher layers (the 124862306a36Sopenharmony_ci * bio_list in our rbio) and our P/Q. Ignore everything else. 124962306a36Sopenharmony_ci */ 125062306a36Sopenharmony_ci for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; 125162306a36Sopenharmony_ci total_sector_nr++) { 125262306a36Sopenharmony_ci struct sector_ptr *sector; 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_ci stripe = total_sector_nr / rbio->stripe_nsectors; 125562306a36Sopenharmony_ci sectornr = total_sector_nr % rbio->stripe_nsectors; 125662306a36Sopenharmony_ci 125762306a36Sopenharmony_ci /* This vertical stripe has no data, skip it. */ 125862306a36Sopenharmony_ci if (!test_bit(sectornr, &rbio->dbitmap)) 125962306a36Sopenharmony_ci continue; 126062306a36Sopenharmony_ci 126162306a36Sopenharmony_ci if (stripe < rbio->nr_data) { 126262306a36Sopenharmony_ci sector = sector_in_rbio(rbio, stripe, sectornr, 1); 126362306a36Sopenharmony_ci if (!sector) 126462306a36Sopenharmony_ci continue; 126562306a36Sopenharmony_ci } else { 126662306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, stripe, sectornr); 126762306a36Sopenharmony_ci } 126862306a36Sopenharmony_ci 126962306a36Sopenharmony_ci ret = rbio_add_io_sector(rbio, bio_list, sector, stripe, 127062306a36Sopenharmony_ci sectornr, REQ_OP_WRITE); 127162306a36Sopenharmony_ci if (ret) 127262306a36Sopenharmony_ci goto error; 127362306a36Sopenharmony_ci } 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_ci if (likely(!rbio->bioc->replace_nr_stripes)) 127662306a36Sopenharmony_ci return 0; 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci /* 127962306a36Sopenharmony_ci * Make a copy for the replace target device. 128062306a36Sopenharmony_ci * 128162306a36Sopenharmony_ci * Thus the source stripe number (in replace_stripe_src) should be valid. 128262306a36Sopenharmony_ci */ 128362306a36Sopenharmony_ci ASSERT(rbio->bioc->replace_stripe_src >= 0); 128462306a36Sopenharmony_ci 128562306a36Sopenharmony_ci for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; 128662306a36Sopenharmony_ci total_sector_nr++) { 128762306a36Sopenharmony_ci struct sector_ptr *sector; 128862306a36Sopenharmony_ci 128962306a36Sopenharmony_ci stripe = total_sector_nr / rbio->stripe_nsectors; 129062306a36Sopenharmony_ci sectornr = total_sector_nr % rbio->stripe_nsectors; 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ci /* 129362306a36Sopenharmony_ci * For RAID56, there is only one device that can be replaced, 129462306a36Sopenharmony_ci * and replace_stripe_src[0] indicates the stripe number we 129562306a36Sopenharmony_ci * need to copy from. 129662306a36Sopenharmony_ci */ 129762306a36Sopenharmony_ci if (stripe != rbio->bioc->replace_stripe_src) { 129862306a36Sopenharmony_ci /* 129962306a36Sopenharmony_ci * We can skip the whole stripe completely, note 130062306a36Sopenharmony_ci * total_sector_nr will be increased by one anyway. 130162306a36Sopenharmony_ci */ 130262306a36Sopenharmony_ci ASSERT(sectornr == 0); 130362306a36Sopenharmony_ci total_sector_nr += rbio->stripe_nsectors - 1; 130462306a36Sopenharmony_ci continue; 130562306a36Sopenharmony_ci } 130662306a36Sopenharmony_ci 130762306a36Sopenharmony_ci /* This vertical stripe has no data, skip it. */ 130862306a36Sopenharmony_ci if (!test_bit(sectornr, &rbio->dbitmap)) 130962306a36Sopenharmony_ci continue; 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_ci if (stripe < rbio->nr_data) { 131262306a36Sopenharmony_ci sector = sector_in_rbio(rbio, stripe, sectornr, 1); 131362306a36Sopenharmony_ci if (!sector) 131462306a36Sopenharmony_ci continue; 131562306a36Sopenharmony_ci } else { 131662306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, stripe, sectornr); 131762306a36Sopenharmony_ci } 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci ret = rbio_add_io_sector(rbio, bio_list, sector, 132062306a36Sopenharmony_ci rbio->real_stripes, 132162306a36Sopenharmony_ci sectornr, REQ_OP_WRITE); 132262306a36Sopenharmony_ci if (ret) 132362306a36Sopenharmony_ci goto error; 132462306a36Sopenharmony_ci } 132562306a36Sopenharmony_ci 132662306a36Sopenharmony_ci return 0; 132762306a36Sopenharmony_cierror: 132862306a36Sopenharmony_ci bio_list_put(bio_list); 132962306a36Sopenharmony_ci return -EIO; 133062306a36Sopenharmony_ci} 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_cistatic void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio) 133362306a36Sopenharmony_ci{ 133462306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 133562306a36Sopenharmony_ci u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - 133662306a36Sopenharmony_ci rbio->bioc->full_stripe_logical; 133762306a36Sopenharmony_ci int total_nr_sector = offset >> fs_info->sectorsize_bits; 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_ci ASSERT(total_nr_sector < rbio->nr_data * rbio->stripe_nsectors); 134062306a36Sopenharmony_ci 134162306a36Sopenharmony_ci bitmap_set(rbio->error_bitmap, total_nr_sector, 134262306a36Sopenharmony_ci bio->bi_iter.bi_size >> fs_info->sectorsize_bits); 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci /* 134562306a36Sopenharmony_ci * Special handling for raid56_alloc_missing_rbio() used by 134662306a36Sopenharmony_ci * scrub/replace. Unlike call path in raid56_parity_recover(), they 134762306a36Sopenharmony_ci * pass an empty bio here. Thus we have to find out the missing device 134862306a36Sopenharmony_ci * and mark the stripe error instead. 134962306a36Sopenharmony_ci */ 135062306a36Sopenharmony_ci if (bio->bi_iter.bi_size == 0) { 135162306a36Sopenharmony_ci bool found_missing = false; 135262306a36Sopenharmony_ci int stripe_nr; 135362306a36Sopenharmony_ci 135462306a36Sopenharmony_ci for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) { 135562306a36Sopenharmony_ci if (!rbio->bioc->stripes[stripe_nr].dev->bdev) { 135662306a36Sopenharmony_ci found_missing = true; 135762306a36Sopenharmony_ci bitmap_set(rbio->error_bitmap, 135862306a36Sopenharmony_ci stripe_nr * rbio->stripe_nsectors, 135962306a36Sopenharmony_ci rbio->stripe_nsectors); 136062306a36Sopenharmony_ci } 136162306a36Sopenharmony_ci } 136262306a36Sopenharmony_ci ASSERT(found_missing); 136362306a36Sopenharmony_ci } 136462306a36Sopenharmony_ci} 136562306a36Sopenharmony_ci 136662306a36Sopenharmony_ci/* 136762306a36Sopenharmony_ci * For subpage case, we can no longer set page Up-to-date directly for 136862306a36Sopenharmony_ci * stripe_pages[], thus we need to locate the sector. 136962306a36Sopenharmony_ci */ 137062306a36Sopenharmony_cistatic struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio, 137162306a36Sopenharmony_ci struct page *page, 137262306a36Sopenharmony_ci unsigned int pgoff) 137362306a36Sopenharmony_ci{ 137462306a36Sopenharmony_ci int i; 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci for (i = 0; i < rbio->nr_sectors; i++) { 137762306a36Sopenharmony_ci struct sector_ptr *sector = &rbio->stripe_sectors[i]; 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_ci if (sector->page == page && sector->pgoff == pgoff) 138062306a36Sopenharmony_ci return sector; 138162306a36Sopenharmony_ci } 138262306a36Sopenharmony_ci return NULL; 138362306a36Sopenharmony_ci} 138462306a36Sopenharmony_ci 138562306a36Sopenharmony_ci/* 138662306a36Sopenharmony_ci * this sets each page in the bio uptodate. It should only be used on private 138762306a36Sopenharmony_ci * rbio pages, nothing that comes in from the higher layers 138862306a36Sopenharmony_ci */ 138962306a36Sopenharmony_cistatic void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio) 139062306a36Sopenharmony_ci{ 139162306a36Sopenharmony_ci const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 139262306a36Sopenharmony_ci struct bio_vec *bvec; 139362306a36Sopenharmony_ci struct bvec_iter_all iter_all; 139462306a36Sopenharmony_ci 139562306a36Sopenharmony_ci ASSERT(!bio_flagged(bio, BIO_CLONED)); 139662306a36Sopenharmony_ci 139762306a36Sopenharmony_ci bio_for_each_segment_all(bvec, bio, iter_all) { 139862306a36Sopenharmony_ci struct sector_ptr *sector; 139962306a36Sopenharmony_ci int pgoff; 140062306a36Sopenharmony_ci 140162306a36Sopenharmony_ci for (pgoff = bvec->bv_offset; pgoff - bvec->bv_offset < bvec->bv_len; 140262306a36Sopenharmony_ci pgoff += sectorsize) { 140362306a36Sopenharmony_ci sector = find_stripe_sector(rbio, bvec->bv_page, pgoff); 140462306a36Sopenharmony_ci ASSERT(sector); 140562306a36Sopenharmony_ci if (sector) 140662306a36Sopenharmony_ci sector->uptodate = 1; 140762306a36Sopenharmony_ci } 140862306a36Sopenharmony_ci } 140962306a36Sopenharmony_ci} 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_cistatic int get_bio_sector_nr(struct btrfs_raid_bio *rbio, struct bio *bio) 141262306a36Sopenharmony_ci{ 141362306a36Sopenharmony_ci struct bio_vec *bv = bio_first_bvec_all(bio); 141462306a36Sopenharmony_ci int i; 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_ci for (i = 0; i < rbio->nr_sectors; i++) { 141762306a36Sopenharmony_ci struct sector_ptr *sector; 141862306a36Sopenharmony_ci 141962306a36Sopenharmony_ci sector = &rbio->stripe_sectors[i]; 142062306a36Sopenharmony_ci if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset) 142162306a36Sopenharmony_ci break; 142262306a36Sopenharmony_ci sector = &rbio->bio_sectors[i]; 142362306a36Sopenharmony_ci if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset) 142462306a36Sopenharmony_ci break; 142562306a36Sopenharmony_ci } 142662306a36Sopenharmony_ci ASSERT(i < rbio->nr_sectors); 142762306a36Sopenharmony_ci return i; 142862306a36Sopenharmony_ci} 142962306a36Sopenharmony_ci 143062306a36Sopenharmony_cistatic void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bio) 143162306a36Sopenharmony_ci{ 143262306a36Sopenharmony_ci int total_sector_nr = get_bio_sector_nr(rbio, bio); 143362306a36Sopenharmony_ci u32 bio_size = 0; 143462306a36Sopenharmony_ci struct bio_vec *bvec; 143562306a36Sopenharmony_ci int i; 143662306a36Sopenharmony_ci 143762306a36Sopenharmony_ci bio_for_each_bvec_all(bvec, bio, i) 143862306a36Sopenharmony_ci bio_size += bvec->bv_len; 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci /* 144162306a36Sopenharmony_ci * Since we can have multiple bios touching the error_bitmap, we cannot 144262306a36Sopenharmony_ci * call bitmap_set() without protection. 144362306a36Sopenharmony_ci * 144462306a36Sopenharmony_ci * Instead use set_bit() for each bit, as set_bit() itself is atomic. 144562306a36Sopenharmony_ci */ 144662306a36Sopenharmony_ci for (i = total_sector_nr; i < total_sector_nr + 144762306a36Sopenharmony_ci (bio_size >> rbio->bioc->fs_info->sectorsize_bits); i++) 144862306a36Sopenharmony_ci set_bit(i, rbio->error_bitmap); 144962306a36Sopenharmony_ci} 145062306a36Sopenharmony_ci 145162306a36Sopenharmony_ci/* Verify the data sectors at read time. */ 145262306a36Sopenharmony_cistatic void verify_bio_data_sectors(struct btrfs_raid_bio *rbio, 145362306a36Sopenharmony_ci struct bio *bio) 145462306a36Sopenharmony_ci{ 145562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 145662306a36Sopenharmony_ci int total_sector_nr = get_bio_sector_nr(rbio, bio); 145762306a36Sopenharmony_ci struct bio_vec *bvec; 145862306a36Sopenharmony_ci struct bvec_iter_all iter_all; 145962306a36Sopenharmony_ci 146062306a36Sopenharmony_ci /* No data csum for the whole stripe, no need to verify. */ 146162306a36Sopenharmony_ci if (!rbio->csum_bitmap || !rbio->csum_buf) 146262306a36Sopenharmony_ci return; 146362306a36Sopenharmony_ci 146462306a36Sopenharmony_ci /* P/Q stripes, they have no data csum to verify against. */ 146562306a36Sopenharmony_ci if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors) 146662306a36Sopenharmony_ci return; 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_ci bio_for_each_segment_all(bvec, bio, iter_all) { 146962306a36Sopenharmony_ci int bv_offset; 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_ci for (bv_offset = bvec->bv_offset; 147262306a36Sopenharmony_ci bv_offset < bvec->bv_offset + bvec->bv_len; 147362306a36Sopenharmony_ci bv_offset += fs_info->sectorsize, total_sector_nr++) { 147462306a36Sopenharmony_ci u8 csum_buf[BTRFS_CSUM_SIZE]; 147562306a36Sopenharmony_ci u8 *expected_csum = rbio->csum_buf + 147662306a36Sopenharmony_ci total_sector_nr * fs_info->csum_size; 147762306a36Sopenharmony_ci int ret; 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci /* No csum for this sector, skip to the next sector. */ 148062306a36Sopenharmony_ci if (!test_bit(total_sector_nr, rbio->csum_bitmap)) 148162306a36Sopenharmony_ci continue; 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_ci ret = btrfs_check_sector_csum(fs_info, bvec->bv_page, 148462306a36Sopenharmony_ci bv_offset, csum_buf, expected_csum); 148562306a36Sopenharmony_ci if (ret < 0) 148662306a36Sopenharmony_ci set_bit(total_sector_nr, rbio->error_bitmap); 148762306a36Sopenharmony_ci } 148862306a36Sopenharmony_ci } 148962306a36Sopenharmony_ci} 149062306a36Sopenharmony_ci 149162306a36Sopenharmony_cistatic void raid_wait_read_end_io(struct bio *bio) 149262306a36Sopenharmony_ci{ 149362306a36Sopenharmony_ci struct btrfs_raid_bio *rbio = bio->bi_private; 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_ci if (bio->bi_status) { 149662306a36Sopenharmony_ci rbio_update_error_bitmap(rbio, bio); 149762306a36Sopenharmony_ci } else { 149862306a36Sopenharmony_ci set_bio_pages_uptodate(rbio, bio); 149962306a36Sopenharmony_ci verify_bio_data_sectors(rbio, bio); 150062306a36Sopenharmony_ci } 150162306a36Sopenharmony_ci 150262306a36Sopenharmony_ci bio_put(bio); 150362306a36Sopenharmony_ci if (atomic_dec_and_test(&rbio->stripes_pending)) 150462306a36Sopenharmony_ci wake_up(&rbio->io_wait); 150562306a36Sopenharmony_ci} 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_cistatic void submit_read_wait_bio_list(struct btrfs_raid_bio *rbio, 150862306a36Sopenharmony_ci struct bio_list *bio_list) 150962306a36Sopenharmony_ci{ 151062306a36Sopenharmony_ci struct bio *bio; 151162306a36Sopenharmony_ci 151262306a36Sopenharmony_ci atomic_set(&rbio->stripes_pending, bio_list_size(bio_list)); 151362306a36Sopenharmony_ci while ((bio = bio_list_pop(bio_list))) { 151462306a36Sopenharmony_ci bio->bi_end_io = raid_wait_read_end_io; 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ci if (trace_raid56_read_enabled()) { 151762306a36Sopenharmony_ci struct raid56_bio_trace_info trace_info = { 0 }; 151862306a36Sopenharmony_ci 151962306a36Sopenharmony_ci bio_get_trace_info(rbio, bio, &trace_info); 152062306a36Sopenharmony_ci trace_raid56_read(rbio, bio, &trace_info); 152162306a36Sopenharmony_ci } 152262306a36Sopenharmony_ci submit_bio(bio); 152362306a36Sopenharmony_ci } 152462306a36Sopenharmony_ci 152562306a36Sopenharmony_ci wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); 152662306a36Sopenharmony_ci} 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_cistatic int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio) 152962306a36Sopenharmony_ci{ 153062306a36Sopenharmony_ci const int data_pages = rbio->nr_data * rbio->stripe_npages; 153162306a36Sopenharmony_ci int ret; 153262306a36Sopenharmony_ci 153362306a36Sopenharmony_ci ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages); 153462306a36Sopenharmony_ci if (ret < 0) 153562306a36Sopenharmony_ci return ret; 153662306a36Sopenharmony_ci 153762306a36Sopenharmony_ci index_stripe_sectors(rbio); 153862306a36Sopenharmony_ci return 0; 153962306a36Sopenharmony_ci} 154062306a36Sopenharmony_ci 154162306a36Sopenharmony_ci/* 154262306a36Sopenharmony_ci * We use plugging call backs to collect full stripes. 154362306a36Sopenharmony_ci * Any time we get a partial stripe write while plugged 154462306a36Sopenharmony_ci * we collect it into a list. When the unplug comes down, 154562306a36Sopenharmony_ci * we sort the list by logical block number and merge 154662306a36Sopenharmony_ci * everything we can into the same rbios 154762306a36Sopenharmony_ci */ 154862306a36Sopenharmony_cistruct btrfs_plug_cb { 154962306a36Sopenharmony_ci struct blk_plug_cb cb; 155062306a36Sopenharmony_ci struct btrfs_fs_info *info; 155162306a36Sopenharmony_ci struct list_head rbio_list; 155262306a36Sopenharmony_ci struct work_struct work; 155362306a36Sopenharmony_ci}; 155462306a36Sopenharmony_ci 155562306a36Sopenharmony_ci/* 155662306a36Sopenharmony_ci * rbios on the plug list are sorted for easier merging. 155762306a36Sopenharmony_ci */ 155862306a36Sopenharmony_cistatic int plug_cmp(void *priv, const struct list_head *a, 155962306a36Sopenharmony_ci const struct list_head *b) 156062306a36Sopenharmony_ci{ 156162306a36Sopenharmony_ci const struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio, 156262306a36Sopenharmony_ci plug_list); 156362306a36Sopenharmony_ci const struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio, 156462306a36Sopenharmony_ci plug_list); 156562306a36Sopenharmony_ci u64 a_sector = ra->bio_list.head->bi_iter.bi_sector; 156662306a36Sopenharmony_ci u64 b_sector = rb->bio_list.head->bi_iter.bi_sector; 156762306a36Sopenharmony_ci 156862306a36Sopenharmony_ci if (a_sector < b_sector) 156962306a36Sopenharmony_ci return -1; 157062306a36Sopenharmony_ci if (a_sector > b_sector) 157162306a36Sopenharmony_ci return 1; 157262306a36Sopenharmony_ci return 0; 157362306a36Sopenharmony_ci} 157462306a36Sopenharmony_ci 157562306a36Sopenharmony_cistatic void raid_unplug(struct blk_plug_cb *cb, bool from_schedule) 157662306a36Sopenharmony_ci{ 157762306a36Sopenharmony_ci struct btrfs_plug_cb *plug = container_of(cb, struct btrfs_plug_cb, cb); 157862306a36Sopenharmony_ci struct btrfs_raid_bio *cur; 157962306a36Sopenharmony_ci struct btrfs_raid_bio *last = NULL; 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_ci list_sort(NULL, &plug->rbio_list, plug_cmp); 158262306a36Sopenharmony_ci 158362306a36Sopenharmony_ci while (!list_empty(&plug->rbio_list)) { 158462306a36Sopenharmony_ci cur = list_entry(plug->rbio_list.next, 158562306a36Sopenharmony_ci struct btrfs_raid_bio, plug_list); 158662306a36Sopenharmony_ci list_del_init(&cur->plug_list); 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci if (rbio_is_full(cur)) { 158962306a36Sopenharmony_ci /* We have a full stripe, queue it down. */ 159062306a36Sopenharmony_ci start_async_work(cur, rmw_rbio_work); 159162306a36Sopenharmony_ci continue; 159262306a36Sopenharmony_ci } 159362306a36Sopenharmony_ci if (last) { 159462306a36Sopenharmony_ci if (rbio_can_merge(last, cur)) { 159562306a36Sopenharmony_ci merge_rbio(last, cur); 159662306a36Sopenharmony_ci free_raid_bio(cur); 159762306a36Sopenharmony_ci continue; 159862306a36Sopenharmony_ci } 159962306a36Sopenharmony_ci start_async_work(last, rmw_rbio_work); 160062306a36Sopenharmony_ci } 160162306a36Sopenharmony_ci last = cur; 160262306a36Sopenharmony_ci } 160362306a36Sopenharmony_ci if (last) 160462306a36Sopenharmony_ci start_async_work(last, rmw_rbio_work); 160562306a36Sopenharmony_ci kfree(plug); 160662306a36Sopenharmony_ci} 160762306a36Sopenharmony_ci 160862306a36Sopenharmony_ci/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */ 160962306a36Sopenharmony_cistatic void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio) 161062306a36Sopenharmony_ci{ 161162306a36Sopenharmony_ci const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 161262306a36Sopenharmony_ci const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT; 161362306a36Sopenharmony_ci const u64 full_stripe_start = rbio->bioc->full_stripe_logical; 161462306a36Sopenharmony_ci const u32 orig_len = orig_bio->bi_iter.bi_size; 161562306a36Sopenharmony_ci const u32 sectorsize = fs_info->sectorsize; 161662306a36Sopenharmony_ci u64 cur_logical; 161762306a36Sopenharmony_ci 161862306a36Sopenharmony_ci ASSERT(orig_logical >= full_stripe_start && 161962306a36Sopenharmony_ci orig_logical + orig_len <= full_stripe_start + 162062306a36Sopenharmony_ci rbio->nr_data * BTRFS_STRIPE_LEN); 162162306a36Sopenharmony_ci 162262306a36Sopenharmony_ci bio_list_add(&rbio->bio_list, orig_bio); 162362306a36Sopenharmony_ci rbio->bio_list_bytes += orig_bio->bi_iter.bi_size; 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_ci /* Update the dbitmap. */ 162662306a36Sopenharmony_ci for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len; 162762306a36Sopenharmony_ci cur_logical += sectorsize) { 162862306a36Sopenharmony_ci int bit = ((u32)(cur_logical - full_stripe_start) >> 162962306a36Sopenharmony_ci fs_info->sectorsize_bits) % rbio->stripe_nsectors; 163062306a36Sopenharmony_ci 163162306a36Sopenharmony_ci set_bit(bit, &rbio->dbitmap); 163262306a36Sopenharmony_ci } 163362306a36Sopenharmony_ci} 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_ci/* 163662306a36Sopenharmony_ci * our main entry point for writes from the rest of the FS. 163762306a36Sopenharmony_ci */ 163862306a36Sopenharmony_civoid raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc) 163962306a36Sopenharmony_ci{ 164062306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = bioc->fs_info; 164162306a36Sopenharmony_ci struct btrfs_raid_bio *rbio; 164262306a36Sopenharmony_ci struct btrfs_plug_cb *plug = NULL; 164362306a36Sopenharmony_ci struct blk_plug_cb *cb; 164462306a36Sopenharmony_ci 164562306a36Sopenharmony_ci rbio = alloc_rbio(fs_info, bioc); 164662306a36Sopenharmony_ci if (IS_ERR(rbio)) { 164762306a36Sopenharmony_ci bio->bi_status = errno_to_blk_status(PTR_ERR(rbio)); 164862306a36Sopenharmony_ci bio_endio(bio); 164962306a36Sopenharmony_ci return; 165062306a36Sopenharmony_ci } 165162306a36Sopenharmony_ci rbio->operation = BTRFS_RBIO_WRITE; 165262306a36Sopenharmony_ci rbio_add_bio(rbio, bio); 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci /* 165562306a36Sopenharmony_ci * Don't plug on full rbios, just get them out the door 165662306a36Sopenharmony_ci * as quickly as we can 165762306a36Sopenharmony_ci */ 165862306a36Sopenharmony_ci if (!rbio_is_full(rbio)) { 165962306a36Sopenharmony_ci cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug)); 166062306a36Sopenharmony_ci if (cb) { 166162306a36Sopenharmony_ci plug = container_of(cb, struct btrfs_plug_cb, cb); 166262306a36Sopenharmony_ci if (!plug->info) { 166362306a36Sopenharmony_ci plug->info = fs_info; 166462306a36Sopenharmony_ci INIT_LIST_HEAD(&plug->rbio_list); 166562306a36Sopenharmony_ci } 166662306a36Sopenharmony_ci list_add_tail(&rbio->plug_list, &plug->rbio_list); 166762306a36Sopenharmony_ci return; 166862306a36Sopenharmony_ci } 166962306a36Sopenharmony_ci } 167062306a36Sopenharmony_ci 167162306a36Sopenharmony_ci /* 167262306a36Sopenharmony_ci * Either we don't have any existing plug, or we're doing a full stripe, 167362306a36Sopenharmony_ci * queue the rmw work now. 167462306a36Sopenharmony_ci */ 167562306a36Sopenharmony_ci start_async_work(rbio, rmw_rbio_work); 167662306a36Sopenharmony_ci} 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_cistatic int verify_one_sector(struct btrfs_raid_bio *rbio, 167962306a36Sopenharmony_ci int stripe_nr, int sector_nr) 168062306a36Sopenharmony_ci{ 168162306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 168262306a36Sopenharmony_ci struct sector_ptr *sector; 168362306a36Sopenharmony_ci u8 csum_buf[BTRFS_CSUM_SIZE]; 168462306a36Sopenharmony_ci u8 *csum_expected; 168562306a36Sopenharmony_ci int ret; 168662306a36Sopenharmony_ci 168762306a36Sopenharmony_ci if (!rbio->csum_bitmap || !rbio->csum_buf) 168862306a36Sopenharmony_ci return 0; 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci /* No way to verify P/Q as they are not covered by data csum. */ 169162306a36Sopenharmony_ci if (stripe_nr >= rbio->nr_data) 169262306a36Sopenharmony_ci return 0; 169362306a36Sopenharmony_ci /* 169462306a36Sopenharmony_ci * If we're rebuilding a read, we have to use pages from the 169562306a36Sopenharmony_ci * bio list if possible. 169662306a36Sopenharmony_ci */ 169762306a36Sopenharmony_ci if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { 169862306a36Sopenharmony_ci sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0); 169962306a36Sopenharmony_ci } else { 170062306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr); 170162306a36Sopenharmony_ci } 170262306a36Sopenharmony_ci 170362306a36Sopenharmony_ci ASSERT(sector->page); 170462306a36Sopenharmony_ci 170562306a36Sopenharmony_ci csum_expected = rbio->csum_buf + 170662306a36Sopenharmony_ci (stripe_nr * rbio->stripe_nsectors + sector_nr) * 170762306a36Sopenharmony_ci fs_info->csum_size; 170862306a36Sopenharmony_ci ret = btrfs_check_sector_csum(fs_info, sector->page, sector->pgoff, 170962306a36Sopenharmony_ci csum_buf, csum_expected); 171062306a36Sopenharmony_ci return ret; 171162306a36Sopenharmony_ci} 171262306a36Sopenharmony_ci 171362306a36Sopenharmony_ci/* 171462306a36Sopenharmony_ci * Recover a vertical stripe specified by @sector_nr. 171562306a36Sopenharmony_ci * @*pointers are the pre-allocated pointers by the caller, so we don't 171662306a36Sopenharmony_ci * need to allocate/free the pointers again and again. 171762306a36Sopenharmony_ci */ 171862306a36Sopenharmony_cistatic int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, 171962306a36Sopenharmony_ci void **pointers, void **unmap_array) 172062306a36Sopenharmony_ci{ 172162306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 172262306a36Sopenharmony_ci struct sector_ptr *sector; 172362306a36Sopenharmony_ci const u32 sectorsize = fs_info->sectorsize; 172462306a36Sopenharmony_ci int found_errors; 172562306a36Sopenharmony_ci int faila; 172662306a36Sopenharmony_ci int failb; 172762306a36Sopenharmony_ci int stripe_nr; 172862306a36Sopenharmony_ci int ret = 0; 172962306a36Sopenharmony_ci 173062306a36Sopenharmony_ci /* 173162306a36Sopenharmony_ci * Now we just use bitmap to mark the horizontal stripes in 173262306a36Sopenharmony_ci * which we have data when doing parity scrub. 173362306a36Sopenharmony_ci */ 173462306a36Sopenharmony_ci if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB && 173562306a36Sopenharmony_ci !test_bit(sector_nr, &rbio->dbitmap)) 173662306a36Sopenharmony_ci return 0; 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila, 173962306a36Sopenharmony_ci &failb); 174062306a36Sopenharmony_ci /* 174162306a36Sopenharmony_ci * No errors in the vertical stripe, skip it. Can happen for recovery 174262306a36Sopenharmony_ci * which only part of a stripe failed csum check. 174362306a36Sopenharmony_ci */ 174462306a36Sopenharmony_ci if (!found_errors) 174562306a36Sopenharmony_ci return 0; 174662306a36Sopenharmony_ci 174762306a36Sopenharmony_ci if (found_errors > rbio->bioc->max_errors) 174862306a36Sopenharmony_ci return -EIO; 174962306a36Sopenharmony_ci 175062306a36Sopenharmony_ci /* 175162306a36Sopenharmony_ci * Setup our array of pointers with sectors from each stripe 175262306a36Sopenharmony_ci * 175362306a36Sopenharmony_ci * NOTE: store a duplicate array of pointers to preserve the 175462306a36Sopenharmony_ci * pointer order. 175562306a36Sopenharmony_ci */ 175662306a36Sopenharmony_ci for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) { 175762306a36Sopenharmony_ci /* 175862306a36Sopenharmony_ci * If we're rebuilding a read, we have to use pages from the 175962306a36Sopenharmony_ci * bio list if possible. 176062306a36Sopenharmony_ci */ 176162306a36Sopenharmony_ci if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { 176262306a36Sopenharmony_ci sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0); 176362306a36Sopenharmony_ci } else { 176462306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr); 176562306a36Sopenharmony_ci } 176662306a36Sopenharmony_ci ASSERT(sector->page); 176762306a36Sopenharmony_ci pointers[stripe_nr] = kmap_local_page(sector->page) + 176862306a36Sopenharmony_ci sector->pgoff; 176962306a36Sopenharmony_ci unmap_array[stripe_nr] = pointers[stripe_nr]; 177062306a36Sopenharmony_ci } 177162306a36Sopenharmony_ci 177262306a36Sopenharmony_ci /* All raid6 handling here */ 177362306a36Sopenharmony_ci if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) { 177462306a36Sopenharmony_ci /* Single failure, rebuild from parity raid5 style */ 177562306a36Sopenharmony_ci if (failb < 0) { 177662306a36Sopenharmony_ci if (faila == rbio->nr_data) 177762306a36Sopenharmony_ci /* 177862306a36Sopenharmony_ci * Just the P stripe has failed, without 177962306a36Sopenharmony_ci * a bad data or Q stripe. 178062306a36Sopenharmony_ci * We have nothing to do, just skip the 178162306a36Sopenharmony_ci * recovery for this stripe. 178262306a36Sopenharmony_ci */ 178362306a36Sopenharmony_ci goto cleanup; 178462306a36Sopenharmony_ci /* 178562306a36Sopenharmony_ci * a single failure in raid6 is rebuilt 178662306a36Sopenharmony_ci * in the pstripe code below 178762306a36Sopenharmony_ci */ 178862306a36Sopenharmony_ci goto pstripe; 178962306a36Sopenharmony_ci } 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_ci /* 179262306a36Sopenharmony_ci * If the q stripe is failed, do a pstripe reconstruction from 179362306a36Sopenharmony_ci * the xors. 179462306a36Sopenharmony_ci * If both the q stripe and the P stripe are failed, we're 179562306a36Sopenharmony_ci * here due to a crc mismatch and we can't give them the 179662306a36Sopenharmony_ci * data they want. 179762306a36Sopenharmony_ci */ 179862306a36Sopenharmony_ci if (failb == rbio->real_stripes - 1) { 179962306a36Sopenharmony_ci if (faila == rbio->real_stripes - 2) 180062306a36Sopenharmony_ci /* 180162306a36Sopenharmony_ci * Only P and Q are corrupted. 180262306a36Sopenharmony_ci * We only care about data stripes recovery, 180362306a36Sopenharmony_ci * can skip this vertical stripe. 180462306a36Sopenharmony_ci */ 180562306a36Sopenharmony_ci goto cleanup; 180662306a36Sopenharmony_ci /* 180762306a36Sopenharmony_ci * Otherwise we have one bad data stripe and 180862306a36Sopenharmony_ci * a good P stripe. raid5! 180962306a36Sopenharmony_ci */ 181062306a36Sopenharmony_ci goto pstripe; 181162306a36Sopenharmony_ci } 181262306a36Sopenharmony_ci 181362306a36Sopenharmony_ci if (failb == rbio->real_stripes - 2) { 181462306a36Sopenharmony_ci raid6_datap_recov(rbio->real_stripes, sectorsize, 181562306a36Sopenharmony_ci faila, pointers); 181662306a36Sopenharmony_ci } else { 181762306a36Sopenharmony_ci raid6_2data_recov(rbio->real_stripes, sectorsize, 181862306a36Sopenharmony_ci faila, failb, pointers); 181962306a36Sopenharmony_ci } 182062306a36Sopenharmony_ci } else { 182162306a36Sopenharmony_ci void *p; 182262306a36Sopenharmony_ci 182362306a36Sopenharmony_ci /* Rebuild from P stripe here (raid5 or raid6). */ 182462306a36Sopenharmony_ci ASSERT(failb == -1); 182562306a36Sopenharmony_cipstripe: 182662306a36Sopenharmony_ci /* Copy parity block into failed block to start with */ 182762306a36Sopenharmony_ci memcpy(pointers[faila], pointers[rbio->nr_data], sectorsize); 182862306a36Sopenharmony_ci 182962306a36Sopenharmony_ci /* Rearrange the pointer array */ 183062306a36Sopenharmony_ci p = pointers[faila]; 183162306a36Sopenharmony_ci for (stripe_nr = faila; stripe_nr < rbio->nr_data - 1; 183262306a36Sopenharmony_ci stripe_nr++) 183362306a36Sopenharmony_ci pointers[stripe_nr] = pointers[stripe_nr + 1]; 183462306a36Sopenharmony_ci pointers[rbio->nr_data - 1] = p; 183562306a36Sopenharmony_ci 183662306a36Sopenharmony_ci /* Xor in the rest */ 183762306a36Sopenharmony_ci run_xor(pointers, rbio->nr_data - 1, sectorsize); 183862306a36Sopenharmony_ci 183962306a36Sopenharmony_ci } 184062306a36Sopenharmony_ci 184162306a36Sopenharmony_ci /* 184262306a36Sopenharmony_ci * No matter if this is a RMW or recovery, we should have all 184362306a36Sopenharmony_ci * failed sectors repaired in the vertical stripe, thus they are now 184462306a36Sopenharmony_ci * uptodate. 184562306a36Sopenharmony_ci * Especially if we determine to cache the rbio, we need to 184662306a36Sopenharmony_ci * have at least all data sectors uptodate. 184762306a36Sopenharmony_ci * 184862306a36Sopenharmony_ci * If possible, also check if the repaired sector matches its data 184962306a36Sopenharmony_ci * checksum. 185062306a36Sopenharmony_ci */ 185162306a36Sopenharmony_ci if (faila >= 0) { 185262306a36Sopenharmony_ci ret = verify_one_sector(rbio, faila, sector_nr); 185362306a36Sopenharmony_ci if (ret < 0) 185462306a36Sopenharmony_ci goto cleanup; 185562306a36Sopenharmony_ci 185662306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, faila, sector_nr); 185762306a36Sopenharmony_ci sector->uptodate = 1; 185862306a36Sopenharmony_ci } 185962306a36Sopenharmony_ci if (failb >= 0) { 186062306a36Sopenharmony_ci ret = verify_one_sector(rbio, failb, sector_nr); 186162306a36Sopenharmony_ci if (ret < 0) 186262306a36Sopenharmony_ci goto cleanup; 186362306a36Sopenharmony_ci 186462306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, failb, sector_nr); 186562306a36Sopenharmony_ci sector->uptodate = 1; 186662306a36Sopenharmony_ci } 186762306a36Sopenharmony_ci 186862306a36Sopenharmony_cicleanup: 186962306a36Sopenharmony_ci for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--) 187062306a36Sopenharmony_ci kunmap_local(unmap_array[stripe_nr]); 187162306a36Sopenharmony_ci return ret; 187262306a36Sopenharmony_ci} 187362306a36Sopenharmony_ci 187462306a36Sopenharmony_cistatic int recover_sectors(struct btrfs_raid_bio *rbio) 187562306a36Sopenharmony_ci{ 187662306a36Sopenharmony_ci void **pointers = NULL; 187762306a36Sopenharmony_ci void **unmap_array = NULL; 187862306a36Sopenharmony_ci int sectornr; 187962306a36Sopenharmony_ci int ret = 0; 188062306a36Sopenharmony_ci 188162306a36Sopenharmony_ci /* 188262306a36Sopenharmony_ci * @pointers array stores the pointer for each sector. 188362306a36Sopenharmony_ci * 188462306a36Sopenharmony_ci * @unmap_array stores copy of pointers that does not get reordered 188562306a36Sopenharmony_ci * during reconstruction so that kunmap_local works. 188662306a36Sopenharmony_ci */ 188762306a36Sopenharmony_ci pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); 188862306a36Sopenharmony_ci unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); 188962306a36Sopenharmony_ci if (!pointers || !unmap_array) { 189062306a36Sopenharmony_ci ret = -ENOMEM; 189162306a36Sopenharmony_ci goto out; 189262306a36Sopenharmony_ci } 189362306a36Sopenharmony_ci 189462306a36Sopenharmony_ci if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { 189562306a36Sopenharmony_ci spin_lock(&rbio->bio_list_lock); 189662306a36Sopenharmony_ci set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 189762306a36Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 189862306a36Sopenharmony_ci } 189962306a36Sopenharmony_ci 190062306a36Sopenharmony_ci index_rbio_pages(rbio); 190162306a36Sopenharmony_ci 190262306a36Sopenharmony_ci for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) { 190362306a36Sopenharmony_ci ret = recover_vertical(rbio, sectornr, pointers, unmap_array); 190462306a36Sopenharmony_ci if (ret < 0) 190562306a36Sopenharmony_ci break; 190662306a36Sopenharmony_ci } 190762306a36Sopenharmony_ci 190862306a36Sopenharmony_ciout: 190962306a36Sopenharmony_ci kfree(pointers); 191062306a36Sopenharmony_ci kfree(unmap_array); 191162306a36Sopenharmony_ci return ret; 191262306a36Sopenharmony_ci} 191362306a36Sopenharmony_ci 191462306a36Sopenharmony_cistatic void recover_rbio(struct btrfs_raid_bio *rbio) 191562306a36Sopenharmony_ci{ 191662306a36Sopenharmony_ci struct bio_list bio_list = BIO_EMPTY_LIST; 191762306a36Sopenharmony_ci int total_sector_nr; 191862306a36Sopenharmony_ci int ret = 0; 191962306a36Sopenharmony_ci 192062306a36Sopenharmony_ci /* 192162306a36Sopenharmony_ci * Either we're doing recover for a read failure or degraded write, 192262306a36Sopenharmony_ci * caller should have set error bitmap correctly. 192362306a36Sopenharmony_ci */ 192462306a36Sopenharmony_ci ASSERT(bitmap_weight(rbio->error_bitmap, rbio->nr_sectors)); 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_ci /* For recovery, we need to read all sectors including P/Q. */ 192762306a36Sopenharmony_ci ret = alloc_rbio_pages(rbio); 192862306a36Sopenharmony_ci if (ret < 0) 192962306a36Sopenharmony_ci goto out; 193062306a36Sopenharmony_ci 193162306a36Sopenharmony_ci index_rbio_pages(rbio); 193262306a36Sopenharmony_ci 193362306a36Sopenharmony_ci /* 193462306a36Sopenharmony_ci * Read everything that hasn't failed. However this time we will 193562306a36Sopenharmony_ci * not trust any cached sector. 193662306a36Sopenharmony_ci * As we may read out some stale data but higher layer is not reading 193762306a36Sopenharmony_ci * that stale part. 193862306a36Sopenharmony_ci * 193962306a36Sopenharmony_ci * So here we always re-read everything in recovery path. 194062306a36Sopenharmony_ci */ 194162306a36Sopenharmony_ci for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; 194262306a36Sopenharmony_ci total_sector_nr++) { 194362306a36Sopenharmony_ci int stripe = total_sector_nr / rbio->stripe_nsectors; 194462306a36Sopenharmony_ci int sectornr = total_sector_nr % rbio->stripe_nsectors; 194562306a36Sopenharmony_ci struct sector_ptr *sector; 194662306a36Sopenharmony_ci 194762306a36Sopenharmony_ci /* 194862306a36Sopenharmony_ci * Skip the range which has error. It can be a range which is 194962306a36Sopenharmony_ci * marked error (for csum mismatch), or it can be a missing 195062306a36Sopenharmony_ci * device. 195162306a36Sopenharmony_ci */ 195262306a36Sopenharmony_ci if (!rbio->bioc->stripes[stripe].dev->bdev || 195362306a36Sopenharmony_ci test_bit(total_sector_nr, rbio->error_bitmap)) { 195462306a36Sopenharmony_ci /* 195562306a36Sopenharmony_ci * Also set the error bit for missing device, which 195662306a36Sopenharmony_ci * may not yet have its error bit set. 195762306a36Sopenharmony_ci */ 195862306a36Sopenharmony_ci set_bit(total_sector_nr, rbio->error_bitmap); 195962306a36Sopenharmony_ci continue; 196062306a36Sopenharmony_ci } 196162306a36Sopenharmony_ci 196262306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, stripe, sectornr); 196362306a36Sopenharmony_ci ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe, 196462306a36Sopenharmony_ci sectornr, REQ_OP_READ); 196562306a36Sopenharmony_ci if (ret < 0) { 196662306a36Sopenharmony_ci bio_list_put(&bio_list); 196762306a36Sopenharmony_ci goto out; 196862306a36Sopenharmony_ci } 196962306a36Sopenharmony_ci } 197062306a36Sopenharmony_ci 197162306a36Sopenharmony_ci submit_read_wait_bio_list(rbio, &bio_list); 197262306a36Sopenharmony_ci ret = recover_sectors(rbio); 197362306a36Sopenharmony_ciout: 197462306a36Sopenharmony_ci rbio_orig_end_io(rbio, errno_to_blk_status(ret)); 197562306a36Sopenharmony_ci} 197662306a36Sopenharmony_ci 197762306a36Sopenharmony_cistatic void recover_rbio_work(struct work_struct *work) 197862306a36Sopenharmony_ci{ 197962306a36Sopenharmony_ci struct btrfs_raid_bio *rbio; 198062306a36Sopenharmony_ci 198162306a36Sopenharmony_ci rbio = container_of(work, struct btrfs_raid_bio, work); 198262306a36Sopenharmony_ci if (!lock_stripe_add(rbio)) 198362306a36Sopenharmony_ci recover_rbio(rbio); 198462306a36Sopenharmony_ci} 198562306a36Sopenharmony_ci 198662306a36Sopenharmony_cistatic void recover_rbio_work_locked(struct work_struct *work) 198762306a36Sopenharmony_ci{ 198862306a36Sopenharmony_ci recover_rbio(container_of(work, struct btrfs_raid_bio, work)); 198962306a36Sopenharmony_ci} 199062306a36Sopenharmony_ci 199162306a36Sopenharmony_cistatic void set_rbio_raid6_extra_error(struct btrfs_raid_bio *rbio, int mirror_num) 199262306a36Sopenharmony_ci{ 199362306a36Sopenharmony_ci bool found = false; 199462306a36Sopenharmony_ci int sector_nr; 199562306a36Sopenharmony_ci 199662306a36Sopenharmony_ci /* 199762306a36Sopenharmony_ci * This is for RAID6 extra recovery tries, thus mirror number should 199862306a36Sopenharmony_ci * be large than 2. 199962306a36Sopenharmony_ci * Mirror 1 means read from data stripes. Mirror 2 means rebuild using 200062306a36Sopenharmony_ci * RAID5 methods. 200162306a36Sopenharmony_ci */ 200262306a36Sopenharmony_ci ASSERT(mirror_num > 2); 200362306a36Sopenharmony_ci for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { 200462306a36Sopenharmony_ci int found_errors; 200562306a36Sopenharmony_ci int faila; 200662306a36Sopenharmony_ci int failb; 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_ci found_errors = get_rbio_veritical_errors(rbio, sector_nr, 200962306a36Sopenharmony_ci &faila, &failb); 201062306a36Sopenharmony_ci /* This vertical stripe doesn't have errors. */ 201162306a36Sopenharmony_ci if (!found_errors) 201262306a36Sopenharmony_ci continue; 201362306a36Sopenharmony_ci 201462306a36Sopenharmony_ci /* 201562306a36Sopenharmony_ci * If we found errors, there should be only one error marked 201662306a36Sopenharmony_ci * by previous set_rbio_range_error(). 201762306a36Sopenharmony_ci */ 201862306a36Sopenharmony_ci ASSERT(found_errors == 1); 201962306a36Sopenharmony_ci found = true; 202062306a36Sopenharmony_ci 202162306a36Sopenharmony_ci /* Now select another stripe to mark as error. */ 202262306a36Sopenharmony_ci failb = rbio->real_stripes - (mirror_num - 1); 202362306a36Sopenharmony_ci if (failb <= faila) 202462306a36Sopenharmony_ci failb--; 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci /* Set the extra bit in error bitmap. */ 202762306a36Sopenharmony_ci if (failb >= 0) 202862306a36Sopenharmony_ci set_bit(failb * rbio->stripe_nsectors + sector_nr, 202962306a36Sopenharmony_ci rbio->error_bitmap); 203062306a36Sopenharmony_ci } 203162306a36Sopenharmony_ci 203262306a36Sopenharmony_ci /* We should found at least one vertical stripe with error.*/ 203362306a36Sopenharmony_ci ASSERT(found); 203462306a36Sopenharmony_ci} 203562306a36Sopenharmony_ci 203662306a36Sopenharmony_ci/* 203762306a36Sopenharmony_ci * the main entry point for reads from the higher layers. This 203862306a36Sopenharmony_ci * is really only called when the normal read path had a failure, 203962306a36Sopenharmony_ci * so we assume the bio they send down corresponds to a failed part 204062306a36Sopenharmony_ci * of the drive. 204162306a36Sopenharmony_ci */ 204262306a36Sopenharmony_civoid raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, 204362306a36Sopenharmony_ci int mirror_num) 204462306a36Sopenharmony_ci{ 204562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = bioc->fs_info; 204662306a36Sopenharmony_ci struct btrfs_raid_bio *rbio; 204762306a36Sopenharmony_ci 204862306a36Sopenharmony_ci rbio = alloc_rbio(fs_info, bioc); 204962306a36Sopenharmony_ci if (IS_ERR(rbio)) { 205062306a36Sopenharmony_ci bio->bi_status = errno_to_blk_status(PTR_ERR(rbio)); 205162306a36Sopenharmony_ci bio_endio(bio); 205262306a36Sopenharmony_ci return; 205362306a36Sopenharmony_ci } 205462306a36Sopenharmony_ci 205562306a36Sopenharmony_ci rbio->operation = BTRFS_RBIO_READ_REBUILD; 205662306a36Sopenharmony_ci rbio_add_bio(rbio, bio); 205762306a36Sopenharmony_ci 205862306a36Sopenharmony_ci set_rbio_range_error(rbio, bio); 205962306a36Sopenharmony_ci 206062306a36Sopenharmony_ci /* 206162306a36Sopenharmony_ci * Loop retry: 206262306a36Sopenharmony_ci * for 'mirror == 2', reconstruct from all other stripes. 206362306a36Sopenharmony_ci * for 'mirror_num > 2', select a stripe to fail on every retry. 206462306a36Sopenharmony_ci */ 206562306a36Sopenharmony_ci if (mirror_num > 2) 206662306a36Sopenharmony_ci set_rbio_raid6_extra_error(rbio, mirror_num); 206762306a36Sopenharmony_ci 206862306a36Sopenharmony_ci start_async_work(rbio, recover_rbio_work); 206962306a36Sopenharmony_ci} 207062306a36Sopenharmony_ci 207162306a36Sopenharmony_cistatic void fill_data_csums(struct btrfs_raid_bio *rbio) 207262306a36Sopenharmony_ci{ 207362306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; 207462306a36Sopenharmony_ci struct btrfs_root *csum_root = btrfs_csum_root(fs_info, 207562306a36Sopenharmony_ci rbio->bioc->full_stripe_logical); 207662306a36Sopenharmony_ci const u64 start = rbio->bioc->full_stripe_logical; 207762306a36Sopenharmony_ci const u32 len = (rbio->nr_data * rbio->stripe_nsectors) << 207862306a36Sopenharmony_ci fs_info->sectorsize_bits; 207962306a36Sopenharmony_ci int ret; 208062306a36Sopenharmony_ci 208162306a36Sopenharmony_ci /* The rbio should not have its csum buffer initialized. */ 208262306a36Sopenharmony_ci ASSERT(!rbio->csum_buf && !rbio->csum_bitmap); 208362306a36Sopenharmony_ci 208462306a36Sopenharmony_ci /* 208562306a36Sopenharmony_ci * Skip the csum search if: 208662306a36Sopenharmony_ci * 208762306a36Sopenharmony_ci * - The rbio doesn't belong to data block groups 208862306a36Sopenharmony_ci * Then we are doing IO for tree blocks, no need to search csums. 208962306a36Sopenharmony_ci * 209062306a36Sopenharmony_ci * - The rbio belongs to mixed block groups 209162306a36Sopenharmony_ci * This is to avoid deadlock, as we're already holding the full 209262306a36Sopenharmony_ci * stripe lock, if we trigger a metadata read, and it needs to do 209362306a36Sopenharmony_ci * raid56 recovery, we will deadlock. 209462306a36Sopenharmony_ci */ 209562306a36Sopenharmony_ci if (!(rbio->bioc->map_type & BTRFS_BLOCK_GROUP_DATA) || 209662306a36Sopenharmony_ci rbio->bioc->map_type & BTRFS_BLOCK_GROUP_METADATA) 209762306a36Sopenharmony_ci return; 209862306a36Sopenharmony_ci 209962306a36Sopenharmony_ci rbio->csum_buf = kzalloc(rbio->nr_data * rbio->stripe_nsectors * 210062306a36Sopenharmony_ci fs_info->csum_size, GFP_NOFS); 210162306a36Sopenharmony_ci rbio->csum_bitmap = bitmap_zalloc(rbio->nr_data * rbio->stripe_nsectors, 210262306a36Sopenharmony_ci GFP_NOFS); 210362306a36Sopenharmony_ci if (!rbio->csum_buf || !rbio->csum_bitmap) { 210462306a36Sopenharmony_ci ret = -ENOMEM; 210562306a36Sopenharmony_ci goto error; 210662306a36Sopenharmony_ci } 210762306a36Sopenharmony_ci 210862306a36Sopenharmony_ci ret = btrfs_lookup_csums_bitmap(csum_root, NULL, start, start + len - 1, 210962306a36Sopenharmony_ci rbio->csum_buf, rbio->csum_bitmap); 211062306a36Sopenharmony_ci if (ret < 0) 211162306a36Sopenharmony_ci goto error; 211262306a36Sopenharmony_ci if (bitmap_empty(rbio->csum_bitmap, len >> fs_info->sectorsize_bits)) 211362306a36Sopenharmony_ci goto no_csum; 211462306a36Sopenharmony_ci return; 211562306a36Sopenharmony_ci 211662306a36Sopenharmony_cierror: 211762306a36Sopenharmony_ci /* 211862306a36Sopenharmony_ci * We failed to allocate memory or grab the csum, but it's not fatal, 211962306a36Sopenharmony_ci * we can still continue. But better to warn users that RMW is no 212062306a36Sopenharmony_ci * longer safe for this particular sub-stripe write. 212162306a36Sopenharmony_ci */ 212262306a36Sopenharmony_ci btrfs_warn_rl(fs_info, 212362306a36Sopenharmony_ci"sub-stripe write for full stripe %llu is not safe, failed to get csum: %d", 212462306a36Sopenharmony_ci rbio->bioc->full_stripe_logical, ret); 212562306a36Sopenharmony_cino_csum: 212662306a36Sopenharmony_ci kfree(rbio->csum_buf); 212762306a36Sopenharmony_ci bitmap_free(rbio->csum_bitmap); 212862306a36Sopenharmony_ci rbio->csum_buf = NULL; 212962306a36Sopenharmony_ci rbio->csum_bitmap = NULL; 213062306a36Sopenharmony_ci} 213162306a36Sopenharmony_ci 213262306a36Sopenharmony_cistatic int rmw_read_wait_recover(struct btrfs_raid_bio *rbio) 213362306a36Sopenharmony_ci{ 213462306a36Sopenharmony_ci struct bio_list bio_list = BIO_EMPTY_LIST; 213562306a36Sopenharmony_ci int total_sector_nr; 213662306a36Sopenharmony_ci int ret = 0; 213762306a36Sopenharmony_ci 213862306a36Sopenharmony_ci /* 213962306a36Sopenharmony_ci * Fill the data csums we need for data verification. We need to fill 214062306a36Sopenharmony_ci * the csum_bitmap/csum_buf first, as our endio function will try to 214162306a36Sopenharmony_ci * verify the data sectors. 214262306a36Sopenharmony_ci */ 214362306a36Sopenharmony_ci fill_data_csums(rbio); 214462306a36Sopenharmony_ci 214562306a36Sopenharmony_ci /* 214662306a36Sopenharmony_ci * Build a list of bios to read all sectors (including data and P/Q). 214762306a36Sopenharmony_ci * 214862306a36Sopenharmony_ci * This behavior is to compensate the later csum verification and recovery. 214962306a36Sopenharmony_ci */ 215062306a36Sopenharmony_ci for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; 215162306a36Sopenharmony_ci total_sector_nr++) { 215262306a36Sopenharmony_ci struct sector_ptr *sector; 215362306a36Sopenharmony_ci int stripe = total_sector_nr / rbio->stripe_nsectors; 215462306a36Sopenharmony_ci int sectornr = total_sector_nr % rbio->stripe_nsectors; 215562306a36Sopenharmony_ci 215662306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, stripe, sectornr); 215762306a36Sopenharmony_ci ret = rbio_add_io_sector(rbio, &bio_list, sector, 215862306a36Sopenharmony_ci stripe, sectornr, REQ_OP_READ); 215962306a36Sopenharmony_ci if (ret) { 216062306a36Sopenharmony_ci bio_list_put(&bio_list); 216162306a36Sopenharmony_ci return ret; 216262306a36Sopenharmony_ci } 216362306a36Sopenharmony_ci } 216462306a36Sopenharmony_ci 216562306a36Sopenharmony_ci /* 216662306a36Sopenharmony_ci * We may or may not have any corrupted sectors (including missing dev 216762306a36Sopenharmony_ci * and csum mismatch), just let recover_sectors() to handle them all. 216862306a36Sopenharmony_ci */ 216962306a36Sopenharmony_ci submit_read_wait_bio_list(rbio, &bio_list); 217062306a36Sopenharmony_ci return recover_sectors(rbio); 217162306a36Sopenharmony_ci} 217262306a36Sopenharmony_ci 217362306a36Sopenharmony_cistatic void raid_wait_write_end_io(struct bio *bio) 217462306a36Sopenharmony_ci{ 217562306a36Sopenharmony_ci struct btrfs_raid_bio *rbio = bio->bi_private; 217662306a36Sopenharmony_ci blk_status_t err = bio->bi_status; 217762306a36Sopenharmony_ci 217862306a36Sopenharmony_ci if (err) 217962306a36Sopenharmony_ci rbio_update_error_bitmap(rbio, bio); 218062306a36Sopenharmony_ci bio_put(bio); 218162306a36Sopenharmony_ci if (atomic_dec_and_test(&rbio->stripes_pending)) 218262306a36Sopenharmony_ci wake_up(&rbio->io_wait); 218362306a36Sopenharmony_ci} 218462306a36Sopenharmony_ci 218562306a36Sopenharmony_cistatic void submit_write_bios(struct btrfs_raid_bio *rbio, 218662306a36Sopenharmony_ci struct bio_list *bio_list) 218762306a36Sopenharmony_ci{ 218862306a36Sopenharmony_ci struct bio *bio; 218962306a36Sopenharmony_ci 219062306a36Sopenharmony_ci atomic_set(&rbio->stripes_pending, bio_list_size(bio_list)); 219162306a36Sopenharmony_ci while ((bio = bio_list_pop(bio_list))) { 219262306a36Sopenharmony_ci bio->bi_end_io = raid_wait_write_end_io; 219362306a36Sopenharmony_ci 219462306a36Sopenharmony_ci if (trace_raid56_write_enabled()) { 219562306a36Sopenharmony_ci struct raid56_bio_trace_info trace_info = { 0 }; 219662306a36Sopenharmony_ci 219762306a36Sopenharmony_ci bio_get_trace_info(rbio, bio, &trace_info); 219862306a36Sopenharmony_ci trace_raid56_write(rbio, bio, &trace_info); 219962306a36Sopenharmony_ci } 220062306a36Sopenharmony_ci submit_bio(bio); 220162306a36Sopenharmony_ci } 220262306a36Sopenharmony_ci} 220362306a36Sopenharmony_ci 220462306a36Sopenharmony_ci/* 220562306a36Sopenharmony_ci * To determine if we need to read any sector from the disk. 220662306a36Sopenharmony_ci * Should only be utilized in RMW path, to skip cached rbio. 220762306a36Sopenharmony_ci */ 220862306a36Sopenharmony_cistatic bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio) 220962306a36Sopenharmony_ci{ 221062306a36Sopenharmony_ci int i; 221162306a36Sopenharmony_ci 221262306a36Sopenharmony_ci for (i = 0; i < rbio->nr_data * rbio->stripe_nsectors; i++) { 221362306a36Sopenharmony_ci struct sector_ptr *sector = &rbio->stripe_sectors[i]; 221462306a36Sopenharmony_ci 221562306a36Sopenharmony_ci /* 221662306a36Sopenharmony_ci * We have a sector which doesn't have page nor uptodate, 221762306a36Sopenharmony_ci * thus this rbio can not be cached one, as cached one must 221862306a36Sopenharmony_ci * have all its data sectors present and uptodate. 221962306a36Sopenharmony_ci */ 222062306a36Sopenharmony_ci if (!sector->page || !sector->uptodate) 222162306a36Sopenharmony_ci return true; 222262306a36Sopenharmony_ci } 222362306a36Sopenharmony_ci return false; 222462306a36Sopenharmony_ci} 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_cistatic void rmw_rbio(struct btrfs_raid_bio *rbio) 222762306a36Sopenharmony_ci{ 222862306a36Sopenharmony_ci struct bio_list bio_list; 222962306a36Sopenharmony_ci int sectornr; 223062306a36Sopenharmony_ci int ret = 0; 223162306a36Sopenharmony_ci 223262306a36Sopenharmony_ci /* 223362306a36Sopenharmony_ci * Allocate the pages for parity first, as P/Q pages will always be 223462306a36Sopenharmony_ci * needed for both full-stripe and sub-stripe writes. 223562306a36Sopenharmony_ci */ 223662306a36Sopenharmony_ci ret = alloc_rbio_parity_pages(rbio); 223762306a36Sopenharmony_ci if (ret < 0) 223862306a36Sopenharmony_ci goto out; 223962306a36Sopenharmony_ci 224062306a36Sopenharmony_ci /* 224162306a36Sopenharmony_ci * Either full stripe write, or we have every data sector already 224262306a36Sopenharmony_ci * cached, can go to write path immediately. 224362306a36Sopenharmony_ci */ 224462306a36Sopenharmony_ci if (!rbio_is_full(rbio) && need_read_stripe_sectors(rbio)) { 224562306a36Sopenharmony_ci /* 224662306a36Sopenharmony_ci * Now we're doing sub-stripe write, also need all data stripes 224762306a36Sopenharmony_ci * to do the full RMW. 224862306a36Sopenharmony_ci */ 224962306a36Sopenharmony_ci ret = alloc_rbio_data_pages(rbio); 225062306a36Sopenharmony_ci if (ret < 0) 225162306a36Sopenharmony_ci goto out; 225262306a36Sopenharmony_ci 225362306a36Sopenharmony_ci index_rbio_pages(rbio); 225462306a36Sopenharmony_ci 225562306a36Sopenharmony_ci ret = rmw_read_wait_recover(rbio); 225662306a36Sopenharmony_ci if (ret < 0) 225762306a36Sopenharmony_ci goto out; 225862306a36Sopenharmony_ci } 225962306a36Sopenharmony_ci 226062306a36Sopenharmony_ci /* 226162306a36Sopenharmony_ci * At this stage we're not allowed to add any new bios to the 226262306a36Sopenharmony_ci * bio list any more, anyone else that wants to change this stripe 226362306a36Sopenharmony_ci * needs to do their own rmw. 226462306a36Sopenharmony_ci */ 226562306a36Sopenharmony_ci spin_lock(&rbio->bio_list_lock); 226662306a36Sopenharmony_ci set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 226762306a36Sopenharmony_ci spin_unlock(&rbio->bio_list_lock); 226862306a36Sopenharmony_ci 226962306a36Sopenharmony_ci bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); 227062306a36Sopenharmony_ci 227162306a36Sopenharmony_ci index_rbio_pages(rbio); 227262306a36Sopenharmony_ci 227362306a36Sopenharmony_ci /* 227462306a36Sopenharmony_ci * We don't cache full rbios because we're assuming 227562306a36Sopenharmony_ci * the higher layers are unlikely to use this area of 227662306a36Sopenharmony_ci * the disk again soon. If they do use it again, 227762306a36Sopenharmony_ci * hopefully they will send another full bio. 227862306a36Sopenharmony_ci */ 227962306a36Sopenharmony_ci if (!rbio_is_full(rbio)) 228062306a36Sopenharmony_ci cache_rbio_pages(rbio); 228162306a36Sopenharmony_ci else 228262306a36Sopenharmony_ci clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 228362306a36Sopenharmony_ci 228462306a36Sopenharmony_ci for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) 228562306a36Sopenharmony_ci generate_pq_vertical(rbio, sectornr); 228662306a36Sopenharmony_ci 228762306a36Sopenharmony_ci bio_list_init(&bio_list); 228862306a36Sopenharmony_ci ret = rmw_assemble_write_bios(rbio, &bio_list); 228962306a36Sopenharmony_ci if (ret < 0) 229062306a36Sopenharmony_ci goto out; 229162306a36Sopenharmony_ci 229262306a36Sopenharmony_ci /* We should have at least one bio assembled. */ 229362306a36Sopenharmony_ci ASSERT(bio_list_size(&bio_list)); 229462306a36Sopenharmony_ci submit_write_bios(rbio, &bio_list); 229562306a36Sopenharmony_ci wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); 229662306a36Sopenharmony_ci 229762306a36Sopenharmony_ci /* We may have more errors than our tolerance during the read. */ 229862306a36Sopenharmony_ci for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) { 229962306a36Sopenharmony_ci int found_errors; 230062306a36Sopenharmony_ci 230162306a36Sopenharmony_ci found_errors = get_rbio_veritical_errors(rbio, sectornr, NULL, NULL); 230262306a36Sopenharmony_ci if (found_errors > rbio->bioc->max_errors) { 230362306a36Sopenharmony_ci ret = -EIO; 230462306a36Sopenharmony_ci break; 230562306a36Sopenharmony_ci } 230662306a36Sopenharmony_ci } 230762306a36Sopenharmony_ciout: 230862306a36Sopenharmony_ci rbio_orig_end_io(rbio, errno_to_blk_status(ret)); 230962306a36Sopenharmony_ci} 231062306a36Sopenharmony_ci 231162306a36Sopenharmony_cistatic void rmw_rbio_work(struct work_struct *work) 231262306a36Sopenharmony_ci{ 231362306a36Sopenharmony_ci struct btrfs_raid_bio *rbio; 231462306a36Sopenharmony_ci 231562306a36Sopenharmony_ci rbio = container_of(work, struct btrfs_raid_bio, work); 231662306a36Sopenharmony_ci if (lock_stripe_add(rbio) == 0) 231762306a36Sopenharmony_ci rmw_rbio(rbio); 231862306a36Sopenharmony_ci} 231962306a36Sopenharmony_ci 232062306a36Sopenharmony_cistatic void rmw_rbio_work_locked(struct work_struct *work) 232162306a36Sopenharmony_ci{ 232262306a36Sopenharmony_ci rmw_rbio(container_of(work, struct btrfs_raid_bio, work)); 232362306a36Sopenharmony_ci} 232462306a36Sopenharmony_ci 232562306a36Sopenharmony_ci/* 232662306a36Sopenharmony_ci * The following code is used to scrub/replace the parity stripe 232762306a36Sopenharmony_ci * 232862306a36Sopenharmony_ci * Caller must have already increased bio_counter for getting @bioc. 232962306a36Sopenharmony_ci * 233062306a36Sopenharmony_ci * Note: We need make sure all the pages that add into the scrub/replace 233162306a36Sopenharmony_ci * raid bio are correct and not be changed during the scrub/replace. That 233262306a36Sopenharmony_ci * is those pages just hold metadata or file data with checksum. 233362306a36Sopenharmony_ci */ 233462306a36Sopenharmony_ci 233562306a36Sopenharmony_cistruct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio, 233662306a36Sopenharmony_ci struct btrfs_io_context *bioc, 233762306a36Sopenharmony_ci struct btrfs_device *scrub_dev, 233862306a36Sopenharmony_ci unsigned long *dbitmap, int stripe_nsectors) 233962306a36Sopenharmony_ci{ 234062306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = bioc->fs_info; 234162306a36Sopenharmony_ci struct btrfs_raid_bio *rbio; 234262306a36Sopenharmony_ci int i; 234362306a36Sopenharmony_ci 234462306a36Sopenharmony_ci rbio = alloc_rbio(fs_info, bioc); 234562306a36Sopenharmony_ci if (IS_ERR(rbio)) 234662306a36Sopenharmony_ci return NULL; 234762306a36Sopenharmony_ci bio_list_add(&rbio->bio_list, bio); 234862306a36Sopenharmony_ci /* 234962306a36Sopenharmony_ci * This is a special bio which is used to hold the completion handler 235062306a36Sopenharmony_ci * and make the scrub rbio is similar to the other types 235162306a36Sopenharmony_ci */ 235262306a36Sopenharmony_ci ASSERT(!bio->bi_iter.bi_size); 235362306a36Sopenharmony_ci rbio->operation = BTRFS_RBIO_PARITY_SCRUB; 235462306a36Sopenharmony_ci 235562306a36Sopenharmony_ci /* 235662306a36Sopenharmony_ci * After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted 235762306a36Sopenharmony_ci * to the end position, so this search can start from the first parity 235862306a36Sopenharmony_ci * stripe. 235962306a36Sopenharmony_ci */ 236062306a36Sopenharmony_ci for (i = rbio->nr_data; i < rbio->real_stripes; i++) { 236162306a36Sopenharmony_ci if (bioc->stripes[i].dev == scrub_dev) { 236262306a36Sopenharmony_ci rbio->scrubp = i; 236362306a36Sopenharmony_ci break; 236462306a36Sopenharmony_ci } 236562306a36Sopenharmony_ci } 236662306a36Sopenharmony_ci ASSERT(i < rbio->real_stripes); 236762306a36Sopenharmony_ci 236862306a36Sopenharmony_ci bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors); 236962306a36Sopenharmony_ci return rbio; 237062306a36Sopenharmony_ci} 237162306a36Sopenharmony_ci 237262306a36Sopenharmony_ci/* 237362306a36Sopenharmony_ci * We just scrub the parity that we have correct data on the same horizontal, 237462306a36Sopenharmony_ci * so we needn't allocate all pages for all the stripes. 237562306a36Sopenharmony_ci */ 237662306a36Sopenharmony_cistatic int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) 237762306a36Sopenharmony_ci{ 237862306a36Sopenharmony_ci const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 237962306a36Sopenharmony_ci int total_sector_nr; 238062306a36Sopenharmony_ci 238162306a36Sopenharmony_ci for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; 238262306a36Sopenharmony_ci total_sector_nr++) { 238362306a36Sopenharmony_ci struct page *page; 238462306a36Sopenharmony_ci int sectornr = total_sector_nr % rbio->stripe_nsectors; 238562306a36Sopenharmony_ci int index = (total_sector_nr * sectorsize) >> PAGE_SHIFT; 238662306a36Sopenharmony_ci 238762306a36Sopenharmony_ci if (!test_bit(sectornr, &rbio->dbitmap)) 238862306a36Sopenharmony_ci continue; 238962306a36Sopenharmony_ci if (rbio->stripe_pages[index]) 239062306a36Sopenharmony_ci continue; 239162306a36Sopenharmony_ci page = alloc_page(GFP_NOFS); 239262306a36Sopenharmony_ci if (!page) 239362306a36Sopenharmony_ci return -ENOMEM; 239462306a36Sopenharmony_ci rbio->stripe_pages[index] = page; 239562306a36Sopenharmony_ci } 239662306a36Sopenharmony_ci index_stripe_sectors(rbio); 239762306a36Sopenharmony_ci return 0; 239862306a36Sopenharmony_ci} 239962306a36Sopenharmony_ci 240062306a36Sopenharmony_cistatic int finish_parity_scrub(struct btrfs_raid_bio *rbio) 240162306a36Sopenharmony_ci{ 240262306a36Sopenharmony_ci struct btrfs_io_context *bioc = rbio->bioc; 240362306a36Sopenharmony_ci const u32 sectorsize = bioc->fs_info->sectorsize; 240462306a36Sopenharmony_ci void **pointers = rbio->finish_pointers; 240562306a36Sopenharmony_ci unsigned long *pbitmap = &rbio->finish_pbitmap; 240662306a36Sopenharmony_ci int nr_data = rbio->nr_data; 240762306a36Sopenharmony_ci int stripe; 240862306a36Sopenharmony_ci int sectornr; 240962306a36Sopenharmony_ci bool has_qstripe; 241062306a36Sopenharmony_ci struct sector_ptr p_sector = { 0 }; 241162306a36Sopenharmony_ci struct sector_ptr q_sector = { 0 }; 241262306a36Sopenharmony_ci struct bio_list bio_list; 241362306a36Sopenharmony_ci int is_replace = 0; 241462306a36Sopenharmony_ci int ret; 241562306a36Sopenharmony_ci 241662306a36Sopenharmony_ci bio_list_init(&bio_list); 241762306a36Sopenharmony_ci 241862306a36Sopenharmony_ci if (rbio->real_stripes - rbio->nr_data == 1) 241962306a36Sopenharmony_ci has_qstripe = false; 242062306a36Sopenharmony_ci else if (rbio->real_stripes - rbio->nr_data == 2) 242162306a36Sopenharmony_ci has_qstripe = true; 242262306a36Sopenharmony_ci else 242362306a36Sopenharmony_ci BUG(); 242462306a36Sopenharmony_ci 242562306a36Sopenharmony_ci /* 242662306a36Sopenharmony_ci * Replace is running and our P/Q stripe is being replaced, then we 242762306a36Sopenharmony_ci * need to duplicate the final write to replace target. 242862306a36Sopenharmony_ci */ 242962306a36Sopenharmony_ci if (bioc->replace_nr_stripes && bioc->replace_stripe_src == rbio->scrubp) { 243062306a36Sopenharmony_ci is_replace = 1; 243162306a36Sopenharmony_ci bitmap_copy(pbitmap, &rbio->dbitmap, rbio->stripe_nsectors); 243262306a36Sopenharmony_ci } 243362306a36Sopenharmony_ci 243462306a36Sopenharmony_ci /* 243562306a36Sopenharmony_ci * Because the higher layers(scrubber) are unlikely to 243662306a36Sopenharmony_ci * use this area of the disk again soon, so don't cache 243762306a36Sopenharmony_ci * it. 243862306a36Sopenharmony_ci */ 243962306a36Sopenharmony_ci clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 244062306a36Sopenharmony_ci 244162306a36Sopenharmony_ci p_sector.page = alloc_page(GFP_NOFS); 244262306a36Sopenharmony_ci if (!p_sector.page) 244362306a36Sopenharmony_ci return -ENOMEM; 244462306a36Sopenharmony_ci p_sector.pgoff = 0; 244562306a36Sopenharmony_ci p_sector.uptodate = 1; 244662306a36Sopenharmony_ci 244762306a36Sopenharmony_ci if (has_qstripe) { 244862306a36Sopenharmony_ci /* RAID6, allocate and map temp space for the Q stripe */ 244962306a36Sopenharmony_ci q_sector.page = alloc_page(GFP_NOFS); 245062306a36Sopenharmony_ci if (!q_sector.page) { 245162306a36Sopenharmony_ci __free_page(p_sector.page); 245262306a36Sopenharmony_ci p_sector.page = NULL; 245362306a36Sopenharmony_ci return -ENOMEM; 245462306a36Sopenharmony_ci } 245562306a36Sopenharmony_ci q_sector.pgoff = 0; 245662306a36Sopenharmony_ci q_sector.uptodate = 1; 245762306a36Sopenharmony_ci pointers[rbio->real_stripes - 1] = kmap_local_page(q_sector.page); 245862306a36Sopenharmony_ci } 245962306a36Sopenharmony_ci 246062306a36Sopenharmony_ci bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); 246162306a36Sopenharmony_ci 246262306a36Sopenharmony_ci /* Map the parity stripe just once */ 246362306a36Sopenharmony_ci pointers[nr_data] = kmap_local_page(p_sector.page); 246462306a36Sopenharmony_ci 246562306a36Sopenharmony_ci for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) { 246662306a36Sopenharmony_ci struct sector_ptr *sector; 246762306a36Sopenharmony_ci void *parity; 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_ci /* first collect one page from each data stripe */ 247062306a36Sopenharmony_ci for (stripe = 0; stripe < nr_data; stripe++) { 247162306a36Sopenharmony_ci sector = sector_in_rbio(rbio, stripe, sectornr, 0); 247262306a36Sopenharmony_ci pointers[stripe] = kmap_local_page(sector->page) + 247362306a36Sopenharmony_ci sector->pgoff; 247462306a36Sopenharmony_ci } 247562306a36Sopenharmony_ci 247662306a36Sopenharmony_ci if (has_qstripe) { 247762306a36Sopenharmony_ci /* RAID6, call the library function to fill in our P/Q */ 247862306a36Sopenharmony_ci raid6_call.gen_syndrome(rbio->real_stripes, sectorsize, 247962306a36Sopenharmony_ci pointers); 248062306a36Sopenharmony_ci } else { 248162306a36Sopenharmony_ci /* raid5 */ 248262306a36Sopenharmony_ci memcpy(pointers[nr_data], pointers[0], sectorsize); 248362306a36Sopenharmony_ci run_xor(pointers + 1, nr_data - 1, sectorsize); 248462306a36Sopenharmony_ci } 248562306a36Sopenharmony_ci 248662306a36Sopenharmony_ci /* Check scrubbing parity and repair it */ 248762306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); 248862306a36Sopenharmony_ci parity = kmap_local_page(sector->page) + sector->pgoff; 248962306a36Sopenharmony_ci if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0) 249062306a36Sopenharmony_ci memcpy(parity, pointers[rbio->scrubp], sectorsize); 249162306a36Sopenharmony_ci else 249262306a36Sopenharmony_ci /* Parity is right, needn't writeback */ 249362306a36Sopenharmony_ci bitmap_clear(&rbio->dbitmap, sectornr, 1); 249462306a36Sopenharmony_ci kunmap_local(parity); 249562306a36Sopenharmony_ci 249662306a36Sopenharmony_ci for (stripe = nr_data - 1; stripe >= 0; stripe--) 249762306a36Sopenharmony_ci kunmap_local(pointers[stripe]); 249862306a36Sopenharmony_ci } 249962306a36Sopenharmony_ci 250062306a36Sopenharmony_ci kunmap_local(pointers[nr_data]); 250162306a36Sopenharmony_ci __free_page(p_sector.page); 250262306a36Sopenharmony_ci p_sector.page = NULL; 250362306a36Sopenharmony_ci if (q_sector.page) { 250462306a36Sopenharmony_ci kunmap_local(pointers[rbio->real_stripes - 1]); 250562306a36Sopenharmony_ci __free_page(q_sector.page); 250662306a36Sopenharmony_ci q_sector.page = NULL; 250762306a36Sopenharmony_ci } 250862306a36Sopenharmony_ci 250962306a36Sopenharmony_ci /* 251062306a36Sopenharmony_ci * time to start writing. Make bios for everything from the 251162306a36Sopenharmony_ci * higher layers (the bio_list in our rbio) and our p/q. Ignore 251262306a36Sopenharmony_ci * everything else. 251362306a36Sopenharmony_ci */ 251462306a36Sopenharmony_ci for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) { 251562306a36Sopenharmony_ci struct sector_ptr *sector; 251662306a36Sopenharmony_ci 251762306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); 251862306a36Sopenharmony_ci ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->scrubp, 251962306a36Sopenharmony_ci sectornr, REQ_OP_WRITE); 252062306a36Sopenharmony_ci if (ret) 252162306a36Sopenharmony_ci goto cleanup; 252262306a36Sopenharmony_ci } 252362306a36Sopenharmony_ci 252462306a36Sopenharmony_ci if (!is_replace) 252562306a36Sopenharmony_ci goto submit_write; 252662306a36Sopenharmony_ci 252762306a36Sopenharmony_ci /* 252862306a36Sopenharmony_ci * Replace is running and our parity stripe needs to be duplicated to 252962306a36Sopenharmony_ci * the target device. Check we have a valid source stripe number. 253062306a36Sopenharmony_ci */ 253162306a36Sopenharmony_ci ASSERT(rbio->bioc->replace_stripe_src >= 0); 253262306a36Sopenharmony_ci for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) { 253362306a36Sopenharmony_ci struct sector_ptr *sector; 253462306a36Sopenharmony_ci 253562306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); 253662306a36Sopenharmony_ci ret = rbio_add_io_sector(rbio, &bio_list, sector, 253762306a36Sopenharmony_ci rbio->real_stripes, 253862306a36Sopenharmony_ci sectornr, REQ_OP_WRITE); 253962306a36Sopenharmony_ci if (ret) 254062306a36Sopenharmony_ci goto cleanup; 254162306a36Sopenharmony_ci } 254262306a36Sopenharmony_ci 254362306a36Sopenharmony_cisubmit_write: 254462306a36Sopenharmony_ci submit_write_bios(rbio, &bio_list); 254562306a36Sopenharmony_ci return 0; 254662306a36Sopenharmony_ci 254762306a36Sopenharmony_cicleanup: 254862306a36Sopenharmony_ci bio_list_put(&bio_list); 254962306a36Sopenharmony_ci return ret; 255062306a36Sopenharmony_ci} 255162306a36Sopenharmony_ci 255262306a36Sopenharmony_cistatic inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) 255362306a36Sopenharmony_ci{ 255462306a36Sopenharmony_ci if (stripe >= 0 && stripe < rbio->nr_data) 255562306a36Sopenharmony_ci return 1; 255662306a36Sopenharmony_ci return 0; 255762306a36Sopenharmony_ci} 255862306a36Sopenharmony_ci 255962306a36Sopenharmony_cistatic int recover_scrub_rbio(struct btrfs_raid_bio *rbio) 256062306a36Sopenharmony_ci{ 256162306a36Sopenharmony_ci void **pointers = NULL; 256262306a36Sopenharmony_ci void **unmap_array = NULL; 256362306a36Sopenharmony_ci int sector_nr; 256462306a36Sopenharmony_ci int ret = 0; 256562306a36Sopenharmony_ci 256662306a36Sopenharmony_ci /* 256762306a36Sopenharmony_ci * @pointers array stores the pointer for each sector. 256862306a36Sopenharmony_ci * 256962306a36Sopenharmony_ci * @unmap_array stores copy of pointers that does not get reordered 257062306a36Sopenharmony_ci * during reconstruction so that kunmap_local works. 257162306a36Sopenharmony_ci */ 257262306a36Sopenharmony_ci pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); 257362306a36Sopenharmony_ci unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); 257462306a36Sopenharmony_ci if (!pointers || !unmap_array) { 257562306a36Sopenharmony_ci ret = -ENOMEM; 257662306a36Sopenharmony_ci goto out; 257762306a36Sopenharmony_ci } 257862306a36Sopenharmony_ci 257962306a36Sopenharmony_ci for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { 258062306a36Sopenharmony_ci int dfail = 0, failp = -1; 258162306a36Sopenharmony_ci int faila; 258262306a36Sopenharmony_ci int failb; 258362306a36Sopenharmony_ci int found_errors; 258462306a36Sopenharmony_ci 258562306a36Sopenharmony_ci found_errors = get_rbio_veritical_errors(rbio, sector_nr, 258662306a36Sopenharmony_ci &faila, &failb); 258762306a36Sopenharmony_ci if (found_errors > rbio->bioc->max_errors) { 258862306a36Sopenharmony_ci ret = -EIO; 258962306a36Sopenharmony_ci goto out; 259062306a36Sopenharmony_ci } 259162306a36Sopenharmony_ci if (found_errors == 0) 259262306a36Sopenharmony_ci continue; 259362306a36Sopenharmony_ci 259462306a36Sopenharmony_ci /* We should have at least one error here. */ 259562306a36Sopenharmony_ci ASSERT(faila >= 0 || failb >= 0); 259662306a36Sopenharmony_ci 259762306a36Sopenharmony_ci if (is_data_stripe(rbio, faila)) 259862306a36Sopenharmony_ci dfail++; 259962306a36Sopenharmony_ci else if (is_parity_stripe(faila)) 260062306a36Sopenharmony_ci failp = faila; 260162306a36Sopenharmony_ci 260262306a36Sopenharmony_ci if (is_data_stripe(rbio, failb)) 260362306a36Sopenharmony_ci dfail++; 260462306a36Sopenharmony_ci else if (is_parity_stripe(failb)) 260562306a36Sopenharmony_ci failp = failb; 260662306a36Sopenharmony_ci /* 260762306a36Sopenharmony_ci * Because we can not use a scrubbing parity to repair the 260862306a36Sopenharmony_ci * data, so the capability of the repair is declined. (In the 260962306a36Sopenharmony_ci * case of RAID5, we can not repair anything.) 261062306a36Sopenharmony_ci */ 261162306a36Sopenharmony_ci if (dfail > rbio->bioc->max_errors - 1) { 261262306a36Sopenharmony_ci ret = -EIO; 261362306a36Sopenharmony_ci goto out; 261462306a36Sopenharmony_ci } 261562306a36Sopenharmony_ci /* 261662306a36Sopenharmony_ci * If all data is good, only parity is correctly, just repair 261762306a36Sopenharmony_ci * the parity, no need to recover data stripes. 261862306a36Sopenharmony_ci */ 261962306a36Sopenharmony_ci if (dfail == 0) 262062306a36Sopenharmony_ci continue; 262162306a36Sopenharmony_ci 262262306a36Sopenharmony_ci /* 262362306a36Sopenharmony_ci * Here means we got one corrupted data stripe and one 262462306a36Sopenharmony_ci * corrupted parity on RAID6, if the corrupted parity is 262562306a36Sopenharmony_ci * scrubbing parity, luckily, use the other one to repair the 262662306a36Sopenharmony_ci * data, or we can not repair the data stripe. 262762306a36Sopenharmony_ci */ 262862306a36Sopenharmony_ci if (failp != rbio->scrubp) { 262962306a36Sopenharmony_ci ret = -EIO; 263062306a36Sopenharmony_ci goto out; 263162306a36Sopenharmony_ci } 263262306a36Sopenharmony_ci 263362306a36Sopenharmony_ci ret = recover_vertical(rbio, sector_nr, pointers, unmap_array); 263462306a36Sopenharmony_ci if (ret < 0) 263562306a36Sopenharmony_ci goto out; 263662306a36Sopenharmony_ci } 263762306a36Sopenharmony_ciout: 263862306a36Sopenharmony_ci kfree(pointers); 263962306a36Sopenharmony_ci kfree(unmap_array); 264062306a36Sopenharmony_ci return ret; 264162306a36Sopenharmony_ci} 264262306a36Sopenharmony_ci 264362306a36Sopenharmony_cistatic int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio) 264462306a36Sopenharmony_ci{ 264562306a36Sopenharmony_ci struct bio_list bio_list = BIO_EMPTY_LIST; 264662306a36Sopenharmony_ci int total_sector_nr; 264762306a36Sopenharmony_ci int ret = 0; 264862306a36Sopenharmony_ci 264962306a36Sopenharmony_ci /* Build a list of bios to read all the missing parts. */ 265062306a36Sopenharmony_ci for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; 265162306a36Sopenharmony_ci total_sector_nr++) { 265262306a36Sopenharmony_ci int sectornr = total_sector_nr % rbio->stripe_nsectors; 265362306a36Sopenharmony_ci int stripe = total_sector_nr / rbio->stripe_nsectors; 265462306a36Sopenharmony_ci struct sector_ptr *sector; 265562306a36Sopenharmony_ci 265662306a36Sopenharmony_ci /* No data in the vertical stripe, no need to read. */ 265762306a36Sopenharmony_ci if (!test_bit(sectornr, &rbio->dbitmap)) 265862306a36Sopenharmony_ci continue; 265962306a36Sopenharmony_ci 266062306a36Sopenharmony_ci /* 266162306a36Sopenharmony_ci * We want to find all the sectors missing from the rbio and 266262306a36Sopenharmony_ci * read them from the disk. If sector_in_rbio() finds a sector 266362306a36Sopenharmony_ci * in the bio list we don't need to read it off the stripe. 266462306a36Sopenharmony_ci */ 266562306a36Sopenharmony_ci sector = sector_in_rbio(rbio, stripe, sectornr, 1); 266662306a36Sopenharmony_ci if (sector) 266762306a36Sopenharmony_ci continue; 266862306a36Sopenharmony_ci 266962306a36Sopenharmony_ci sector = rbio_stripe_sector(rbio, stripe, sectornr); 267062306a36Sopenharmony_ci /* 267162306a36Sopenharmony_ci * The bio cache may have handed us an uptodate sector. If so, 267262306a36Sopenharmony_ci * use it. 267362306a36Sopenharmony_ci */ 267462306a36Sopenharmony_ci if (sector->uptodate) 267562306a36Sopenharmony_ci continue; 267662306a36Sopenharmony_ci 267762306a36Sopenharmony_ci ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe, 267862306a36Sopenharmony_ci sectornr, REQ_OP_READ); 267962306a36Sopenharmony_ci if (ret) { 268062306a36Sopenharmony_ci bio_list_put(&bio_list); 268162306a36Sopenharmony_ci return ret; 268262306a36Sopenharmony_ci } 268362306a36Sopenharmony_ci } 268462306a36Sopenharmony_ci 268562306a36Sopenharmony_ci submit_read_wait_bio_list(rbio, &bio_list); 268662306a36Sopenharmony_ci return 0; 268762306a36Sopenharmony_ci} 268862306a36Sopenharmony_ci 268962306a36Sopenharmony_cistatic void scrub_rbio(struct btrfs_raid_bio *rbio) 269062306a36Sopenharmony_ci{ 269162306a36Sopenharmony_ci int sector_nr; 269262306a36Sopenharmony_ci int ret; 269362306a36Sopenharmony_ci 269462306a36Sopenharmony_ci ret = alloc_rbio_essential_pages(rbio); 269562306a36Sopenharmony_ci if (ret) 269662306a36Sopenharmony_ci goto out; 269762306a36Sopenharmony_ci 269862306a36Sopenharmony_ci bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); 269962306a36Sopenharmony_ci 270062306a36Sopenharmony_ci ret = scrub_assemble_read_bios(rbio); 270162306a36Sopenharmony_ci if (ret < 0) 270262306a36Sopenharmony_ci goto out; 270362306a36Sopenharmony_ci 270462306a36Sopenharmony_ci /* We may have some failures, recover the failed sectors first. */ 270562306a36Sopenharmony_ci ret = recover_scrub_rbio(rbio); 270662306a36Sopenharmony_ci if (ret < 0) 270762306a36Sopenharmony_ci goto out; 270862306a36Sopenharmony_ci 270962306a36Sopenharmony_ci /* 271062306a36Sopenharmony_ci * We have every sector properly prepared. Can finish the scrub 271162306a36Sopenharmony_ci * and writeback the good content. 271262306a36Sopenharmony_ci */ 271362306a36Sopenharmony_ci ret = finish_parity_scrub(rbio); 271462306a36Sopenharmony_ci wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0); 271562306a36Sopenharmony_ci for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) { 271662306a36Sopenharmony_ci int found_errors; 271762306a36Sopenharmony_ci 271862306a36Sopenharmony_ci found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL); 271962306a36Sopenharmony_ci if (found_errors > rbio->bioc->max_errors) { 272062306a36Sopenharmony_ci ret = -EIO; 272162306a36Sopenharmony_ci break; 272262306a36Sopenharmony_ci } 272362306a36Sopenharmony_ci } 272462306a36Sopenharmony_ciout: 272562306a36Sopenharmony_ci rbio_orig_end_io(rbio, errno_to_blk_status(ret)); 272662306a36Sopenharmony_ci} 272762306a36Sopenharmony_ci 272862306a36Sopenharmony_cistatic void scrub_rbio_work_locked(struct work_struct *work) 272962306a36Sopenharmony_ci{ 273062306a36Sopenharmony_ci scrub_rbio(container_of(work, struct btrfs_raid_bio, work)); 273162306a36Sopenharmony_ci} 273262306a36Sopenharmony_ci 273362306a36Sopenharmony_civoid raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) 273462306a36Sopenharmony_ci{ 273562306a36Sopenharmony_ci if (!lock_stripe_add(rbio)) 273662306a36Sopenharmony_ci start_async_work(rbio, scrub_rbio_work_locked); 273762306a36Sopenharmony_ci} 273862306a36Sopenharmony_ci 273962306a36Sopenharmony_ci/* 274062306a36Sopenharmony_ci * This is for scrub call sites where we already have correct data contents. 274162306a36Sopenharmony_ci * This allows us to avoid reading data stripes again. 274262306a36Sopenharmony_ci * 274362306a36Sopenharmony_ci * Unfortunately here we have to do page copy, other than reusing the pages. 274462306a36Sopenharmony_ci * This is due to the fact rbio has its own page management for its cache. 274562306a36Sopenharmony_ci */ 274662306a36Sopenharmony_civoid raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio, 274762306a36Sopenharmony_ci struct page **data_pages, u64 data_logical) 274862306a36Sopenharmony_ci{ 274962306a36Sopenharmony_ci const u64 offset_in_full_stripe = data_logical - 275062306a36Sopenharmony_ci rbio->bioc->full_stripe_logical; 275162306a36Sopenharmony_ci const int page_index = offset_in_full_stripe >> PAGE_SHIFT; 275262306a36Sopenharmony_ci const u32 sectorsize = rbio->bioc->fs_info->sectorsize; 275362306a36Sopenharmony_ci const u32 sectors_per_page = PAGE_SIZE / sectorsize; 275462306a36Sopenharmony_ci int ret; 275562306a36Sopenharmony_ci 275662306a36Sopenharmony_ci /* 275762306a36Sopenharmony_ci * If we hit ENOMEM temporarily, but later at 275862306a36Sopenharmony_ci * raid56_parity_submit_scrub_rbio() time it succeeded, we just do 275962306a36Sopenharmony_ci * the extra read, not a big deal. 276062306a36Sopenharmony_ci * 276162306a36Sopenharmony_ci * If we hit ENOMEM later at raid56_parity_submit_scrub_rbio() time, 276262306a36Sopenharmony_ci * the bio would got proper error number set. 276362306a36Sopenharmony_ci */ 276462306a36Sopenharmony_ci ret = alloc_rbio_data_pages(rbio); 276562306a36Sopenharmony_ci if (ret < 0) 276662306a36Sopenharmony_ci return; 276762306a36Sopenharmony_ci 276862306a36Sopenharmony_ci /* data_logical must be at stripe boundary and inside the full stripe. */ 276962306a36Sopenharmony_ci ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN)); 277062306a36Sopenharmony_ci ASSERT(offset_in_full_stripe < (rbio->nr_data << BTRFS_STRIPE_LEN_SHIFT)); 277162306a36Sopenharmony_ci 277262306a36Sopenharmony_ci for (int page_nr = 0; page_nr < (BTRFS_STRIPE_LEN >> PAGE_SHIFT); page_nr++) { 277362306a36Sopenharmony_ci struct page *dst = rbio->stripe_pages[page_nr + page_index]; 277462306a36Sopenharmony_ci struct page *src = data_pages[page_nr]; 277562306a36Sopenharmony_ci 277662306a36Sopenharmony_ci memcpy_page(dst, 0, src, 0, PAGE_SIZE); 277762306a36Sopenharmony_ci for (int sector_nr = sectors_per_page * page_index; 277862306a36Sopenharmony_ci sector_nr < sectors_per_page * (page_index + 1); 277962306a36Sopenharmony_ci sector_nr++) 278062306a36Sopenharmony_ci rbio->stripe_sectors[sector_nr].uptodate = true; 278162306a36Sopenharmony_ci } 278262306a36Sopenharmony_ci} 2783