162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2011, 2012 STRATO. All rights reserved. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/blkdev.h> 762306a36Sopenharmony_ci#include <linux/ratelimit.h> 862306a36Sopenharmony_ci#include <linux/sched/mm.h> 962306a36Sopenharmony_ci#include <crypto/hash.h> 1062306a36Sopenharmony_ci#include "ctree.h" 1162306a36Sopenharmony_ci#include "discard.h" 1262306a36Sopenharmony_ci#include "volumes.h" 1362306a36Sopenharmony_ci#include "disk-io.h" 1462306a36Sopenharmony_ci#include "ordered-data.h" 1562306a36Sopenharmony_ci#include "transaction.h" 1662306a36Sopenharmony_ci#include "backref.h" 1762306a36Sopenharmony_ci#include "extent_io.h" 1862306a36Sopenharmony_ci#include "dev-replace.h" 1962306a36Sopenharmony_ci#include "check-integrity.h" 2062306a36Sopenharmony_ci#include "raid56.h" 2162306a36Sopenharmony_ci#include "block-group.h" 2262306a36Sopenharmony_ci#include "zoned.h" 2362306a36Sopenharmony_ci#include "fs.h" 2462306a36Sopenharmony_ci#include "accessors.h" 2562306a36Sopenharmony_ci#include "file-item.h" 2662306a36Sopenharmony_ci#include "scrub.h" 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci/* 2962306a36Sopenharmony_ci * This is only the first step towards a full-features scrub. It reads all 3062306a36Sopenharmony_ci * extent and super block and verifies the checksums. In case a bad checksum 3162306a36Sopenharmony_ci * is found or the extent cannot be read, good data will be written back if 3262306a36Sopenharmony_ci * any can be found. 3362306a36Sopenharmony_ci * 3462306a36Sopenharmony_ci * Future enhancements: 3562306a36Sopenharmony_ci * - In case an unrepairable extent is encountered, track which files are 3662306a36Sopenharmony_ci * affected and report them 3762306a36Sopenharmony_ci * - track and record media errors, throw out bad devices 3862306a36Sopenharmony_ci * - add a mode to also read unallocated space 3962306a36Sopenharmony_ci */ 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistruct scrub_ctx; 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci/* 4462306a36Sopenharmony_ci * The following value only influences the performance. 4562306a36Sopenharmony_ci * 4662306a36Sopenharmony_ci * This detemines how many stripes would be submitted in one go, 4762306a36Sopenharmony_ci * which is 512KiB (BTRFS_STRIPE_LEN * SCRUB_STRIPES_PER_GROUP). 4862306a36Sopenharmony_ci */ 4962306a36Sopenharmony_ci#define SCRUB_STRIPES_PER_GROUP 8 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci/* 5262306a36Sopenharmony_ci * How many groups we have for each sctx. 5362306a36Sopenharmony_ci * 5462306a36Sopenharmony_ci * This would be 8M per device, the same value as the old scrub in-flight bios 5562306a36Sopenharmony_ci * size limit. 5662306a36Sopenharmony_ci */ 5762306a36Sopenharmony_ci#define SCRUB_GROUPS_PER_SCTX 16 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci#define SCRUB_TOTAL_STRIPES (SCRUB_GROUPS_PER_SCTX * SCRUB_STRIPES_PER_GROUP) 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci/* 6262306a36Sopenharmony_ci * The following value times PAGE_SIZE needs to be large enough to match the 6362306a36Sopenharmony_ci * largest node/leaf/sector size that shall be supported. 6462306a36Sopenharmony_ci */ 6562306a36Sopenharmony_ci#define SCRUB_MAX_SECTORS_PER_BLOCK (BTRFS_MAX_METADATA_BLOCKSIZE / SZ_4K) 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci/* Represent one sector and its needed info to verify the content. */ 6862306a36Sopenharmony_cistruct scrub_sector_verification { 6962306a36Sopenharmony_ci bool is_metadata; 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci union { 7262306a36Sopenharmony_ci /* 7362306a36Sopenharmony_ci * Csum pointer for data csum verification. Should point to a 7462306a36Sopenharmony_ci * sector csum inside scrub_stripe::csums. 7562306a36Sopenharmony_ci * 7662306a36Sopenharmony_ci * NULL if this data sector has no csum. 7762306a36Sopenharmony_ci */ 7862306a36Sopenharmony_ci u8 *csum; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci /* 8162306a36Sopenharmony_ci * Extra info for metadata verification. All sectors inside a 8262306a36Sopenharmony_ci * tree block share the same generation. 8362306a36Sopenharmony_ci */ 8462306a36Sopenharmony_ci u64 generation; 8562306a36Sopenharmony_ci }; 8662306a36Sopenharmony_ci}; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_cienum scrub_stripe_flags { 8962306a36Sopenharmony_ci /* Set when @mirror_num, @dev, @physical and @logical are set. */ 9062306a36Sopenharmony_ci SCRUB_STRIPE_FLAG_INITIALIZED, 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci /* Set when the read-repair is finished. */ 9362306a36Sopenharmony_ci SCRUB_STRIPE_FLAG_REPAIR_DONE, 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci /* 9662306a36Sopenharmony_ci * Set for data stripes if it's triggered from P/Q stripe. 9762306a36Sopenharmony_ci * During such scrub, we should not report errors in data stripes, nor 9862306a36Sopenharmony_ci * update the accounting. 9962306a36Sopenharmony_ci */ 10062306a36Sopenharmony_ci SCRUB_STRIPE_FLAG_NO_REPORT, 10162306a36Sopenharmony_ci}; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci#define SCRUB_STRIPE_PAGES (BTRFS_STRIPE_LEN / PAGE_SIZE) 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci/* 10662306a36Sopenharmony_ci * Represent one contiguous range with a length of BTRFS_STRIPE_LEN. 10762306a36Sopenharmony_ci */ 10862306a36Sopenharmony_cistruct scrub_stripe { 10962306a36Sopenharmony_ci struct scrub_ctx *sctx; 11062306a36Sopenharmony_ci struct btrfs_block_group *bg; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci struct page *pages[SCRUB_STRIPE_PAGES]; 11362306a36Sopenharmony_ci struct scrub_sector_verification *sectors; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci struct btrfs_device *dev; 11662306a36Sopenharmony_ci u64 logical; 11762306a36Sopenharmony_ci u64 physical; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci u16 mirror_num; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci /* Should be BTRFS_STRIPE_LEN / sectorsize. */ 12262306a36Sopenharmony_ci u16 nr_sectors; 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci /* 12562306a36Sopenharmony_ci * How many data/meta extents are in this stripe. Only for scrub status 12662306a36Sopenharmony_ci * reporting purposes. 12762306a36Sopenharmony_ci */ 12862306a36Sopenharmony_ci u16 nr_data_extents; 12962306a36Sopenharmony_ci u16 nr_meta_extents; 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci atomic_t pending_io; 13262306a36Sopenharmony_ci wait_queue_head_t io_wait; 13362306a36Sopenharmony_ci wait_queue_head_t repair_wait; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci /* 13662306a36Sopenharmony_ci * Indicate the states of the stripe. Bits are defined in 13762306a36Sopenharmony_ci * scrub_stripe_flags enum. 13862306a36Sopenharmony_ci */ 13962306a36Sopenharmony_ci unsigned long state; 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci /* Indicate which sectors are covered by extent items. */ 14262306a36Sopenharmony_ci unsigned long extent_sector_bitmap; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci /* 14562306a36Sopenharmony_ci * The errors hit during the initial read of the stripe. 14662306a36Sopenharmony_ci * 14762306a36Sopenharmony_ci * Would be utilized for error reporting and repair. 14862306a36Sopenharmony_ci * 14962306a36Sopenharmony_ci * The remaining init_nr_* records the number of errors hit, only used 15062306a36Sopenharmony_ci * by error reporting. 15162306a36Sopenharmony_ci */ 15262306a36Sopenharmony_ci unsigned long init_error_bitmap; 15362306a36Sopenharmony_ci unsigned int init_nr_io_errors; 15462306a36Sopenharmony_ci unsigned int init_nr_csum_errors; 15562306a36Sopenharmony_ci unsigned int init_nr_meta_errors; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci /* 15862306a36Sopenharmony_ci * The following error bitmaps are all for the current status. 15962306a36Sopenharmony_ci * Every time we submit a new read, these bitmaps may be updated. 16062306a36Sopenharmony_ci * 16162306a36Sopenharmony_ci * error_bitmap = io_error_bitmap | csum_error_bitmap | meta_error_bitmap; 16262306a36Sopenharmony_ci * 16362306a36Sopenharmony_ci * IO and csum errors can happen for both metadata and data. 16462306a36Sopenharmony_ci */ 16562306a36Sopenharmony_ci unsigned long error_bitmap; 16662306a36Sopenharmony_ci unsigned long io_error_bitmap; 16762306a36Sopenharmony_ci unsigned long csum_error_bitmap; 16862306a36Sopenharmony_ci unsigned long meta_error_bitmap; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci /* For writeback (repair or replace) error reporting. */ 17162306a36Sopenharmony_ci unsigned long write_error_bitmap; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci /* Writeback can be concurrent, thus we need to protect the bitmap. */ 17462306a36Sopenharmony_ci spinlock_t write_error_lock; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci /* 17762306a36Sopenharmony_ci * Checksum for the whole stripe if this stripe is inside a data block 17862306a36Sopenharmony_ci * group. 17962306a36Sopenharmony_ci */ 18062306a36Sopenharmony_ci u8 *csums; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci struct work_struct work; 18362306a36Sopenharmony_ci}; 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_cistruct scrub_ctx { 18662306a36Sopenharmony_ci struct scrub_stripe stripes[SCRUB_TOTAL_STRIPES]; 18762306a36Sopenharmony_ci struct scrub_stripe *raid56_data_stripes; 18862306a36Sopenharmony_ci struct btrfs_fs_info *fs_info; 18962306a36Sopenharmony_ci struct btrfs_path extent_path; 19062306a36Sopenharmony_ci struct btrfs_path csum_path; 19162306a36Sopenharmony_ci int first_free; 19262306a36Sopenharmony_ci int cur_stripe; 19362306a36Sopenharmony_ci atomic_t cancel_req; 19462306a36Sopenharmony_ci int readonly; 19562306a36Sopenharmony_ci int sectors_per_bio; 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci /* State of IO submission throttling affecting the associated device */ 19862306a36Sopenharmony_ci ktime_t throttle_deadline; 19962306a36Sopenharmony_ci u64 throttle_sent; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci int is_dev_replace; 20262306a36Sopenharmony_ci u64 write_pointer; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci struct mutex wr_lock; 20562306a36Sopenharmony_ci struct btrfs_device *wr_tgtdev; 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci /* 20862306a36Sopenharmony_ci * statistics 20962306a36Sopenharmony_ci */ 21062306a36Sopenharmony_ci struct btrfs_scrub_progress stat; 21162306a36Sopenharmony_ci spinlock_t stat_lock; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci /* 21462306a36Sopenharmony_ci * Use a ref counter to avoid use-after-free issues. Scrub workers 21562306a36Sopenharmony_ci * decrement bios_in_flight and workers_pending and then do a wakeup 21662306a36Sopenharmony_ci * on the list_wait wait queue. We must ensure the main scrub task 21762306a36Sopenharmony_ci * doesn't free the scrub context before or while the workers are 21862306a36Sopenharmony_ci * doing the wakeup() call. 21962306a36Sopenharmony_ci */ 22062306a36Sopenharmony_ci refcount_t refs; 22162306a36Sopenharmony_ci}; 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_cistruct scrub_warning { 22462306a36Sopenharmony_ci struct btrfs_path *path; 22562306a36Sopenharmony_ci u64 extent_item_size; 22662306a36Sopenharmony_ci const char *errstr; 22762306a36Sopenharmony_ci u64 physical; 22862306a36Sopenharmony_ci u64 logical; 22962306a36Sopenharmony_ci struct btrfs_device *dev; 23062306a36Sopenharmony_ci}; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_cistatic void release_scrub_stripe(struct scrub_stripe *stripe) 23362306a36Sopenharmony_ci{ 23462306a36Sopenharmony_ci if (!stripe) 23562306a36Sopenharmony_ci return; 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci for (int i = 0; i < SCRUB_STRIPE_PAGES; i++) { 23862306a36Sopenharmony_ci if (stripe->pages[i]) 23962306a36Sopenharmony_ci __free_page(stripe->pages[i]); 24062306a36Sopenharmony_ci stripe->pages[i] = NULL; 24162306a36Sopenharmony_ci } 24262306a36Sopenharmony_ci kfree(stripe->sectors); 24362306a36Sopenharmony_ci kfree(stripe->csums); 24462306a36Sopenharmony_ci stripe->sectors = NULL; 24562306a36Sopenharmony_ci stripe->csums = NULL; 24662306a36Sopenharmony_ci stripe->sctx = NULL; 24762306a36Sopenharmony_ci stripe->state = 0; 24862306a36Sopenharmony_ci} 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_cistatic int init_scrub_stripe(struct btrfs_fs_info *fs_info, 25162306a36Sopenharmony_ci struct scrub_stripe *stripe) 25262306a36Sopenharmony_ci{ 25362306a36Sopenharmony_ci int ret; 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci memset(stripe, 0, sizeof(*stripe)); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci stripe->nr_sectors = BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits; 25862306a36Sopenharmony_ci stripe->state = 0; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci init_waitqueue_head(&stripe->io_wait); 26162306a36Sopenharmony_ci init_waitqueue_head(&stripe->repair_wait); 26262306a36Sopenharmony_ci atomic_set(&stripe->pending_io, 0); 26362306a36Sopenharmony_ci spin_lock_init(&stripe->write_error_lock); 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages); 26662306a36Sopenharmony_ci if (ret < 0) 26762306a36Sopenharmony_ci goto error; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci stripe->sectors = kcalloc(stripe->nr_sectors, 27062306a36Sopenharmony_ci sizeof(struct scrub_sector_verification), 27162306a36Sopenharmony_ci GFP_KERNEL); 27262306a36Sopenharmony_ci if (!stripe->sectors) 27362306a36Sopenharmony_ci goto error; 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci stripe->csums = kcalloc(BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits, 27662306a36Sopenharmony_ci fs_info->csum_size, GFP_KERNEL); 27762306a36Sopenharmony_ci if (!stripe->csums) 27862306a36Sopenharmony_ci goto error; 27962306a36Sopenharmony_ci return 0; 28062306a36Sopenharmony_cierror: 28162306a36Sopenharmony_ci release_scrub_stripe(stripe); 28262306a36Sopenharmony_ci return -ENOMEM; 28362306a36Sopenharmony_ci} 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_cistatic void wait_scrub_stripe_io(struct scrub_stripe *stripe) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci wait_event(stripe->io_wait, atomic_read(&stripe->pending_io) == 0); 28862306a36Sopenharmony_ci} 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_cistatic void scrub_put_ctx(struct scrub_ctx *sctx); 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_cistatic void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) 29362306a36Sopenharmony_ci{ 29462306a36Sopenharmony_ci while (atomic_read(&fs_info->scrub_pause_req)) { 29562306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 29662306a36Sopenharmony_ci wait_event(fs_info->scrub_pause_wait, 29762306a36Sopenharmony_ci atomic_read(&fs_info->scrub_pause_req) == 0); 29862306a36Sopenharmony_ci mutex_lock(&fs_info->scrub_lock); 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci} 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_cistatic void scrub_pause_on(struct btrfs_fs_info *fs_info) 30362306a36Sopenharmony_ci{ 30462306a36Sopenharmony_ci atomic_inc(&fs_info->scrubs_paused); 30562306a36Sopenharmony_ci wake_up(&fs_info->scrub_pause_wait); 30662306a36Sopenharmony_ci} 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_cistatic void scrub_pause_off(struct btrfs_fs_info *fs_info) 30962306a36Sopenharmony_ci{ 31062306a36Sopenharmony_ci mutex_lock(&fs_info->scrub_lock); 31162306a36Sopenharmony_ci __scrub_blocked_if_needed(fs_info); 31262306a36Sopenharmony_ci atomic_dec(&fs_info->scrubs_paused); 31362306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci wake_up(&fs_info->scrub_pause_wait); 31662306a36Sopenharmony_ci} 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_cistatic void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) 31962306a36Sopenharmony_ci{ 32062306a36Sopenharmony_ci scrub_pause_on(fs_info); 32162306a36Sopenharmony_ci scrub_pause_off(fs_info); 32262306a36Sopenharmony_ci} 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_cistatic noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) 32562306a36Sopenharmony_ci{ 32662306a36Sopenharmony_ci int i; 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci if (!sctx) 32962306a36Sopenharmony_ci return; 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) 33262306a36Sopenharmony_ci release_scrub_stripe(&sctx->stripes[i]); 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci kvfree(sctx); 33562306a36Sopenharmony_ci} 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_cistatic void scrub_put_ctx(struct scrub_ctx *sctx) 33862306a36Sopenharmony_ci{ 33962306a36Sopenharmony_ci if (refcount_dec_and_test(&sctx->refs)) 34062306a36Sopenharmony_ci scrub_free_ctx(sctx); 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_cistatic noinline_for_stack struct scrub_ctx *scrub_setup_ctx( 34462306a36Sopenharmony_ci struct btrfs_fs_info *fs_info, int is_dev_replace) 34562306a36Sopenharmony_ci{ 34662306a36Sopenharmony_ci struct scrub_ctx *sctx; 34762306a36Sopenharmony_ci int i; 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci /* Since sctx has inline 128 stripes, it can go beyond 64K easily. Use 35062306a36Sopenharmony_ci * kvzalloc(). 35162306a36Sopenharmony_ci */ 35262306a36Sopenharmony_ci sctx = kvzalloc(sizeof(*sctx), GFP_KERNEL); 35362306a36Sopenharmony_ci if (!sctx) 35462306a36Sopenharmony_ci goto nomem; 35562306a36Sopenharmony_ci refcount_set(&sctx->refs, 1); 35662306a36Sopenharmony_ci sctx->is_dev_replace = is_dev_replace; 35762306a36Sopenharmony_ci sctx->fs_info = fs_info; 35862306a36Sopenharmony_ci sctx->extent_path.search_commit_root = 1; 35962306a36Sopenharmony_ci sctx->extent_path.skip_locking = 1; 36062306a36Sopenharmony_ci sctx->csum_path.search_commit_root = 1; 36162306a36Sopenharmony_ci sctx->csum_path.skip_locking = 1; 36262306a36Sopenharmony_ci for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) { 36362306a36Sopenharmony_ci int ret; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci ret = init_scrub_stripe(fs_info, &sctx->stripes[i]); 36662306a36Sopenharmony_ci if (ret < 0) 36762306a36Sopenharmony_ci goto nomem; 36862306a36Sopenharmony_ci sctx->stripes[i].sctx = sctx; 36962306a36Sopenharmony_ci } 37062306a36Sopenharmony_ci sctx->first_free = 0; 37162306a36Sopenharmony_ci atomic_set(&sctx->cancel_req, 0); 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci spin_lock_init(&sctx->stat_lock); 37462306a36Sopenharmony_ci sctx->throttle_deadline = 0; 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci mutex_init(&sctx->wr_lock); 37762306a36Sopenharmony_ci if (is_dev_replace) { 37862306a36Sopenharmony_ci WARN_ON(!fs_info->dev_replace.tgtdev); 37962306a36Sopenharmony_ci sctx->wr_tgtdev = fs_info->dev_replace.tgtdev; 38062306a36Sopenharmony_ci } 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci return sctx; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_cinomem: 38562306a36Sopenharmony_ci scrub_free_ctx(sctx); 38662306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 38762306a36Sopenharmony_ci} 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_cistatic int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, 39062306a36Sopenharmony_ci u64 root, void *warn_ctx) 39162306a36Sopenharmony_ci{ 39262306a36Sopenharmony_ci u32 nlink; 39362306a36Sopenharmony_ci int ret; 39462306a36Sopenharmony_ci int i; 39562306a36Sopenharmony_ci unsigned nofs_flag; 39662306a36Sopenharmony_ci struct extent_buffer *eb; 39762306a36Sopenharmony_ci struct btrfs_inode_item *inode_item; 39862306a36Sopenharmony_ci struct scrub_warning *swarn = warn_ctx; 39962306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = swarn->dev->fs_info; 40062306a36Sopenharmony_ci struct inode_fs_paths *ipath = NULL; 40162306a36Sopenharmony_ci struct btrfs_root *local_root; 40262306a36Sopenharmony_ci struct btrfs_key key; 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci local_root = btrfs_get_fs_root(fs_info, root, true); 40562306a36Sopenharmony_ci if (IS_ERR(local_root)) { 40662306a36Sopenharmony_ci ret = PTR_ERR(local_root); 40762306a36Sopenharmony_ci goto err; 40862306a36Sopenharmony_ci } 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci /* 41162306a36Sopenharmony_ci * this makes the path point to (inum INODE_ITEM ioff) 41262306a36Sopenharmony_ci */ 41362306a36Sopenharmony_ci key.objectid = inum; 41462306a36Sopenharmony_ci key.type = BTRFS_INODE_ITEM_KEY; 41562306a36Sopenharmony_ci key.offset = 0; 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0); 41862306a36Sopenharmony_ci if (ret) { 41962306a36Sopenharmony_ci btrfs_put_root(local_root); 42062306a36Sopenharmony_ci btrfs_release_path(swarn->path); 42162306a36Sopenharmony_ci goto err; 42262306a36Sopenharmony_ci } 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci eb = swarn->path->nodes[0]; 42562306a36Sopenharmony_ci inode_item = btrfs_item_ptr(eb, swarn->path->slots[0], 42662306a36Sopenharmony_ci struct btrfs_inode_item); 42762306a36Sopenharmony_ci nlink = btrfs_inode_nlink(eb, inode_item); 42862306a36Sopenharmony_ci btrfs_release_path(swarn->path); 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci /* 43162306a36Sopenharmony_ci * init_path might indirectly call vmalloc, or use GFP_KERNEL. Scrub 43262306a36Sopenharmony_ci * uses GFP_NOFS in this context, so we keep it consistent but it does 43362306a36Sopenharmony_ci * not seem to be strictly necessary. 43462306a36Sopenharmony_ci */ 43562306a36Sopenharmony_ci nofs_flag = memalloc_nofs_save(); 43662306a36Sopenharmony_ci ipath = init_ipath(4096, local_root, swarn->path); 43762306a36Sopenharmony_ci memalloc_nofs_restore(nofs_flag); 43862306a36Sopenharmony_ci if (IS_ERR(ipath)) { 43962306a36Sopenharmony_ci btrfs_put_root(local_root); 44062306a36Sopenharmony_ci ret = PTR_ERR(ipath); 44162306a36Sopenharmony_ci ipath = NULL; 44262306a36Sopenharmony_ci goto err; 44362306a36Sopenharmony_ci } 44462306a36Sopenharmony_ci ret = paths_from_inode(inum, ipath); 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci if (ret < 0) 44762306a36Sopenharmony_ci goto err; 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci /* 45062306a36Sopenharmony_ci * we deliberately ignore the bit ipath might have been too small to 45162306a36Sopenharmony_ci * hold all of the paths here 45262306a36Sopenharmony_ci */ 45362306a36Sopenharmony_ci for (i = 0; i < ipath->fspath->elem_cnt; ++i) 45462306a36Sopenharmony_ci btrfs_warn_in_rcu(fs_info, 45562306a36Sopenharmony_ci"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)", 45662306a36Sopenharmony_ci swarn->errstr, swarn->logical, 45762306a36Sopenharmony_ci btrfs_dev_name(swarn->dev), 45862306a36Sopenharmony_ci swarn->physical, 45962306a36Sopenharmony_ci root, inum, offset, 46062306a36Sopenharmony_ci fs_info->sectorsize, nlink, 46162306a36Sopenharmony_ci (char *)(unsigned long)ipath->fspath->val[i]); 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci btrfs_put_root(local_root); 46462306a36Sopenharmony_ci free_ipath(ipath); 46562306a36Sopenharmony_ci return 0; 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_cierr: 46862306a36Sopenharmony_ci btrfs_warn_in_rcu(fs_info, 46962306a36Sopenharmony_ci "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d", 47062306a36Sopenharmony_ci swarn->errstr, swarn->logical, 47162306a36Sopenharmony_ci btrfs_dev_name(swarn->dev), 47262306a36Sopenharmony_ci swarn->physical, 47362306a36Sopenharmony_ci root, inum, offset, ret); 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci free_ipath(ipath); 47662306a36Sopenharmony_ci return 0; 47762306a36Sopenharmony_ci} 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_cistatic void scrub_print_common_warning(const char *errstr, struct btrfs_device *dev, 48062306a36Sopenharmony_ci bool is_super, u64 logical, u64 physical) 48162306a36Sopenharmony_ci{ 48262306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = dev->fs_info; 48362306a36Sopenharmony_ci struct btrfs_path *path; 48462306a36Sopenharmony_ci struct btrfs_key found_key; 48562306a36Sopenharmony_ci struct extent_buffer *eb; 48662306a36Sopenharmony_ci struct btrfs_extent_item *ei; 48762306a36Sopenharmony_ci struct scrub_warning swarn; 48862306a36Sopenharmony_ci u64 flags = 0; 48962306a36Sopenharmony_ci u32 item_size; 49062306a36Sopenharmony_ci int ret; 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci /* Super block error, no need to search extent tree. */ 49362306a36Sopenharmony_ci if (is_super) { 49462306a36Sopenharmony_ci btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu", 49562306a36Sopenharmony_ci errstr, btrfs_dev_name(dev), physical); 49662306a36Sopenharmony_ci return; 49762306a36Sopenharmony_ci } 49862306a36Sopenharmony_ci path = btrfs_alloc_path(); 49962306a36Sopenharmony_ci if (!path) 50062306a36Sopenharmony_ci return; 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci swarn.physical = physical; 50362306a36Sopenharmony_ci swarn.logical = logical; 50462306a36Sopenharmony_ci swarn.errstr = errstr; 50562306a36Sopenharmony_ci swarn.dev = NULL; 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci ret = extent_from_logical(fs_info, swarn.logical, path, &found_key, 50862306a36Sopenharmony_ci &flags); 50962306a36Sopenharmony_ci if (ret < 0) 51062306a36Sopenharmony_ci goto out; 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci swarn.extent_item_size = found_key.offset; 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci eb = path->nodes[0]; 51562306a36Sopenharmony_ci ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); 51662306a36Sopenharmony_ci item_size = btrfs_item_size(eb, path->slots[0]); 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 51962306a36Sopenharmony_ci unsigned long ptr = 0; 52062306a36Sopenharmony_ci u8 ref_level; 52162306a36Sopenharmony_ci u64 ref_root; 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci while (true) { 52462306a36Sopenharmony_ci ret = tree_backref_for_extent(&ptr, eb, &found_key, ei, 52562306a36Sopenharmony_ci item_size, &ref_root, 52662306a36Sopenharmony_ci &ref_level); 52762306a36Sopenharmony_ci if (ret < 0) { 52862306a36Sopenharmony_ci btrfs_warn(fs_info, 52962306a36Sopenharmony_ci "failed to resolve tree backref for logical %llu: %d", 53062306a36Sopenharmony_ci swarn.logical, ret); 53162306a36Sopenharmony_ci break; 53262306a36Sopenharmony_ci } 53362306a36Sopenharmony_ci if (ret > 0) 53462306a36Sopenharmony_ci break; 53562306a36Sopenharmony_ci btrfs_warn_in_rcu(fs_info, 53662306a36Sopenharmony_ci"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", 53762306a36Sopenharmony_ci errstr, swarn.logical, btrfs_dev_name(dev), 53862306a36Sopenharmony_ci swarn.physical, (ref_level ? "node" : "leaf"), 53962306a36Sopenharmony_ci ref_level, ref_root); 54062306a36Sopenharmony_ci } 54162306a36Sopenharmony_ci btrfs_release_path(path); 54262306a36Sopenharmony_ci } else { 54362306a36Sopenharmony_ci struct btrfs_backref_walk_ctx ctx = { 0 }; 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci btrfs_release_path(path); 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci ctx.bytenr = found_key.objectid; 54862306a36Sopenharmony_ci ctx.extent_item_pos = swarn.logical - found_key.objectid; 54962306a36Sopenharmony_ci ctx.fs_info = fs_info; 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci swarn.path = path; 55262306a36Sopenharmony_ci swarn.dev = dev; 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci iterate_extent_inodes(&ctx, true, scrub_print_warning_inode, &swarn); 55562306a36Sopenharmony_ci } 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ciout: 55862306a36Sopenharmony_ci btrfs_free_path(path); 55962306a36Sopenharmony_ci} 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_cistatic int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical) 56262306a36Sopenharmony_ci{ 56362306a36Sopenharmony_ci int ret = 0; 56462306a36Sopenharmony_ci u64 length; 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci if (!btrfs_is_zoned(sctx->fs_info)) 56762306a36Sopenharmony_ci return 0; 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci if (!btrfs_dev_is_sequential(sctx->wr_tgtdev, physical)) 57062306a36Sopenharmony_ci return 0; 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci if (sctx->write_pointer < physical) { 57362306a36Sopenharmony_ci length = physical - sctx->write_pointer; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci ret = btrfs_zoned_issue_zeroout(sctx->wr_tgtdev, 57662306a36Sopenharmony_ci sctx->write_pointer, length); 57762306a36Sopenharmony_ci if (!ret) 57862306a36Sopenharmony_ci sctx->write_pointer = physical; 57962306a36Sopenharmony_ci } 58062306a36Sopenharmony_ci return ret; 58162306a36Sopenharmony_ci} 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_cistatic struct page *scrub_stripe_get_page(struct scrub_stripe *stripe, int sector_nr) 58462306a36Sopenharmony_ci{ 58562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 58662306a36Sopenharmony_ci int page_index = (sector_nr << fs_info->sectorsize_bits) >> PAGE_SHIFT; 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_ci return stripe->pages[page_index]; 58962306a36Sopenharmony_ci} 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_cistatic unsigned int scrub_stripe_get_page_offset(struct scrub_stripe *stripe, 59262306a36Sopenharmony_ci int sector_nr) 59362306a36Sopenharmony_ci{ 59462306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci return offset_in_page(sector_nr << fs_info->sectorsize_bits); 59762306a36Sopenharmony_ci} 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_cistatic void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr) 60062306a36Sopenharmony_ci{ 60162306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 60262306a36Sopenharmony_ci const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits; 60362306a36Sopenharmony_ci const u64 logical = stripe->logical + (sector_nr << fs_info->sectorsize_bits); 60462306a36Sopenharmony_ci const struct page *first_page = scrub_stripe_get_page(stripe, sector_nr); 60562306a36Sopenharmony_ci const unsigned int first_off = scrub_stripe_get_page_offset(stripe, sector_nr); 60662306a36Sopenharmony_ci SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 60762306a36Sopenharmony_ci u8 on_disk_csum[BTRFS_CSUM_SIZE]; 60862306a36Sopenharmony_ci u8 calculated_csum[BTRFS_CSUM_SIZE]; 60962306a36Sopenharmony_ci struct btrfs_header *header; 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci /* 61262306a36Sopenharmony_ci * Here we don't have a good way to attach the pages (and subpages) 61362306a36Sopenharmony_ci * to a dummy extent buffer, thus we have to directly grab the members 61462306a36Sopenharmony_ci * from pages. 61562306a36Sopenharmony_ci */ 61662306a36Sopenharmony_ci header = (struct btrfs_header *)(page_address(first_page) + first_off); 61762306a36Sopenharmony_ci memcpy(on_disk_csum, header->csum, fs_info->csum_size); 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci if (logical != btrfs_stack_header_bytenr(header)) { 62062306a36Sopenharmony_ci bitmap_set(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree); 62162306a36Sopenharmony_ci bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); 62262306a36Sopenharmony_ci btrfs_warn_rl(fs_info, 62362306a36Sopenharmony_ci "tree block %llu mirror %u has bad bytenr, has %llu want %llu", 62462306a36Sopenharmony_ci logical, stripe->mirror_num, 62562306a36Sopenharmony_ci btrfs_stack_header_bytenr(header), logical); 62662306a36Sopenharmony_ci return; 62762306a36Sopenharmony_ci } 62862306a36Sopenharmony_ci if (memcmp(header->fsid, fs_info->fs_devices->metadata_uuid, 62962306a36Sopenharmony_ci BTRFS_FSID_SIZE) != 0) { 63062306a36Sopenharmony_ci bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); 63162306a36Sopenharmony_ci bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); 63262306a36Sopenharmony_ci btrfs_warn_rl(fs_info, 63362306a36Sopenharmony_ci "tree block %llu mirror %u has bad fsid, has %pU want %pU", 63462306a36Sopenharmony_ci logical, stripe->mirror_num, 63562306a36Sopenharmony_ci header->fsid, fs_info->fs_devices->fsid); 63662306a36Sopenharmony_ci return; 63762306a36Sopenharmony_ci } 63862306a36Sopenharmony_ci if (memcmp(header->chunk_tree_uuid, fs_info->chunk_tree_uuid, 63962306a36Sopenharmony_ci BTRFS_UUID_SIZE) != 0) { 64062306a36Sopenharmony_ci bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); 64162306a36Sopenharmony_ci bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); 64262306a36Sopenharmony_ci btrfs_warn_rl(fs_info, 64362306a36Sopenharmony_ci "tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU", 64462306a36Sopenharmony_ci logical, stripe->mirror_num, 64562306a36Sopenharmony_ci header->chunk_tree_uuid, fs_info->chunk_tree_uuid); 64662306a36Sopenharmony_ci return; 64762306a36Sopenharmony_ci } 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci /* Now check tree block csum. */ 65062306a36Sopenharmony_ci shash->tfm = fs_info->csum_shash; 65162306a36Sopenharmony_ci crypto_shash_init(shash); 65262306a36Sopenharmony_ci crypto_shash_update(shash, page_address(first_page) + first_off + 65362306a36Sopenharmony_ci BTRFS_CSUM_SIZE, fs_info->sectorsize - BTRFS_CSUM_SIZE); 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci for (int i = sector_nr + 1; i < sector_nr + sectors_per_tree; i++) { 65662306a36Sopenharmony_ci struct page *page = scrub_stripe_get_page(stripe, i); 65762306a36Sopenharmony_ci unsigned int page_off = scrub_stripe_get_page_offset(stripe, i); 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci crypto_shash_update(shash, page_address(page) + page_off, 66062306a36Sopenharmony_ci fs_info->sectorsize); 66162306a36Sopenharmony_ci } 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci crypto_shash_final(shash, calculated_csum); 66462306a36Sopenharmony_ci if (memcmp(calculated_csum, on_disk_csum, fs_info->csum_size) != 0) { 66562306a36Sopenharmony_ci bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); 66662306a36Sopenharmony_ci bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); 66762306a36Sopenharmony_ci btrfs_warn_rl(fs_info, 66862306a36Sopenharmony_ci "tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, 66962306a36Sopenharmony_ci logical, stripe->mirror_num, 67062306a36Sopenharmony_ci CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum), 67162306a36Sopenharmony_ci CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum)); 67262306a36Sopenharmony_ci return; 67362306a36Sopenharmony_ci } 67462306a36Sopenharmony_ci if (stripe->sectors[sector_nr].generation != 67562306a36Sopenharmony_ci btrfs_stack_header_generation(header)) { 67662306a36Sopenharmony_ci bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); 67762306a36Sopenharmony_ci bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); 67862306a36Sopenharmony_ci btrfs_warn_rl(fs_info, 67962306a36Sopenharmony_ci "tree block %llu mirror %u has bad generation, has %llu want %llu", 68062306a36Sopenharmony_ci logical, stripe->mirror_num, 68162306a36Sopenharmony_ci btrfs_stack_header_generation(header), 68262306a36Sopenharmony_ci stripe->sectors[sector_nr].generation); 68362306a36Sopenharmony_ci return; 68462306a36Sopenharmony_ci } 68562306a36Sopenharmony_ci bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree); 68662306a36Sopenharmony_ci bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree); 68762306a36Sopenharmony_ci bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); 68862306a36Sopenharmony_ci} 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_cistatic void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr) 69162306a36Sopenharmony_ci{ 69262306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 69362306a36Sopenharmony_ci struct scrub_sector_verification *sector = &stripe->sectors[sector_nr]; 69462306a36Sopenharmony_ci const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits; 69562306a36Sopenharmony_ci struct page *page = scrub_stripe_get_page(stripe, sector_nr); 69662306a36Sopenharmony_ci unsigned int pgoff = scrub_stripe_get_page_offset(stripe, sector_nr); 69762306a36Sopenharmony_ci u8 csum_buf[BTRFS_CSUM_SIZE]; 69862306a36Sopenharmony_ci int ret; 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci ASSERT(sector_nr >= 0 && sector_nr < stripe->nr_sectors); 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci /* Sector not utilized, skip it. */ 70362306a36Sopenharmony_ci if (!test_bit(sector_nr, &stripe->extent_sector_bitmap)) 70462306a36Sopenharmony_ci return; 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ci /* IO error, no need to check. */ 70762306a36Sopenharmony_ci if (test_bit(sector_nr, &stripe->io_error_bitmap)) 70862306a36Sopenharmony_ci return; 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci /* Metadata, verify the full tree block. */ 71162306a36Sopenharmony_ci if (sector->is_metadata) { 71262306a36Sopenharmony_ci /* 71362306a36Sopenharmony_ci * Check if the tree block crosses the stripe boudary. If 71462306a36Sopenharmony_ci * crossed the boundary, we cannot verify it but only give a 71562306a36Sopenharmony_ci * warning. 71662306a36Sopenharmony_ci * 71762306a36Sopenharmony_ci * This can only happen on a very old filesystem where chunks 71862306a36Sopenharmony_ci * are not ensured to be stripe aligned. 71962306a36Sopenharmony_ci */ 72062306a36Sopenharmony_ci if (unlikely(sector_nr + sectors_per_tree > stripe->nr_sectors)) { 72162306a36Sopenharmony_ci btrfs_warn_rl(fs_info, 72262306a36Sopenharmony_ci "tree block at %llu crosses stripe boundary %llu", 72362306a36Sopenharmony_ci stripe->logical + 72462306a36Sopenharmony_ci (sector_nr << fs_info->sectorsize_bits), 72562306a36Sopenharmony_ci stripe->logical); 72662306a36Sopenharmony_ci return; 72762306a36Sopenharmony_ci } 72862306a36Sopenharmony_ci scrub_verify_one_metadata(stripe, sector_nr); 72962306a36Sopenharmony_ci return; 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci /* 73362306a36Sopenharmony_ci * Data is easier, we just verify the data csum (if we have it). For 73462306a36Sopenharmony_ci * cases without csum, we have no other choice but to trust it. 73562306a36Sopenharmony_ci */ 73662306a36Sopenharmony_ci if (!sector->csum) { 73762306a36Sopenharmony_ci clear_bit(sector_nr, &stripe->error_bitmap); 73862306a36Sopenharmony_ci return; 73962306a36Sopenharmony_ci } 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci ret = btrfs_check_sector_csum(fs_info, page, pgoff, csum_buf, sector->csum); 74262306a36Sopenharmony_ci if (ret < 0) { 74362306a36Sopenharmony_ci set_bit(sector_nr, &stripe->csum_error_bitmap); 74462306a36Sopenharmony_ci set_bit(sector_nr, &stripe->error_bitmap); 74562306a36Sopenharmony_ci } else { 74662306a36Sopenharmony_ci clear_bit(sector_nr, &stripe->csum_error_bitmap); 74762306a36Sopenharmony_ci clear_bit(sector_nr, &stripe->error_bitmap); 74862306a36Sopenharmony_ci } 74962306a36Sopenharmony_ci} 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci/* Verify specified sectors of a stripe. */ 75262306a36Sopenharmony_cistatic void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap) 75362306a36Sopenharmony_ci{ 75462306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 75562306a36Sopenharmony_ci const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits; 75662306a36Sopenharmony_ci int sector_nr; 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_ci for_each_set_bit(sector_nr, &bitmap, stripe->nr_sectors) { 75962306a36Sopenharmony_ci scrub_verify_one_sector(stripe, sector_nr); 76062306a36Sopenharmony_ci if (stripe->sectors[sector_nr].is_metadata) 76162306a36Sopenharmony_ci sector_nr += sectors_per_tree - 1; 76262306a36Sopenharmony_ci } 76362306a36Sopenharmony_ci} 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_cistatic int calc_sector_number(struct scrub_stripe *stripe, struct bio_vec *first_bvec) 76662306a36Sopenharmony_ci{ 76762306a36Sopenharmony_ci int i; 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci for (i = 0; i < stripe->nr_sectors; i++) { 77062306a36Sopenharmony_ci if (scrub_stripe_get_page(stripe, i) == first_bvec->bv_page && 77162306a36Sopenharmony_ci scrub_stripe_get_page_offset(stripe, i) == first_bvec->bv_offset) 77262306a36Sopenharmony_ci break; 77362306a36Sopenharmony_ci } 77462306a36Sopenharmony_ci ASSERT(i < stripe->nr_sectors); 77562306a36Sopenharmony_ci return i; 77662306a36Sopenharmony_ci} 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci/* 77962306a36Sopenharmony_ci * Repair read is different to the regular read: 78062306a36Sopenharmony_ci * 78162306a36Sopenharmony_ci * - Only reads the failed sectors 78262306a36Sopenharmony_ci * - May have extra blocksize limits 78362306a36Sopenharmony_ci */ 78462306a36Sopenharmony_cistatic void scrub_repair_read_endio(struct btrfs_bio *bbio) 78562306a36Sopenharmony_ci{ 78662306a36Sopenharmony_ci struct scrub_stripe *stripe = bbio->private; 78762306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 78862306a36Sopenharmony_ci struct bio_vec *bvec; 78962306a36Sopenharmony_ci int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio)); 79062306a36Sopenharmony_ci u32 bio_size = 0; 79162306a36Sopenharmony_ci int i; 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci ASSERT(sector_nr < stripe->nr_sectors); 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci bio_for_each_bvec_all(bvec, &bbio->bio, i) 79662306a36Sopenharmony_ci bio_size += bvec->bv_len; 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci if (bbio->bio.bi_status) { 79962306a36Sopenharmony_ci bitmap_set(&stripe->io_error_bitmap, sector_nr, 80062306a36Sopenharmony_ci bio_size >> fs_info->sectorsize_bits); 80162306a36Sopenharmony_ci bitmap_set(&stripe->error_bitmap, sector_nr, 80262306a36Sopenharmony_ci bio_size >> fs_info->sectorsize_bits); 80362306a36Sopenharmony_ci } else { 80462306a36Sopenharmony_ci bitmap_clear(&stripe->io_error_bitmap, sector_nr, 80562306a36Sopenharmony_ci bio_size >> fs_info->sectorsize_bits); 80662306a36Sopenharmony_ci } 80762306a36Sopenharmony_ci bio_put(&bbio->bio); 80862306a36Sopenharmony_ci if (atomic_dec_and_test(&stripe->pending_io)) 80962306a36Sopenharmony_ci wake_up(&stripe->io_wait); 81062306a36Sopenharmony_ci} 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_cistatic int calc_next_mirror(int mirror, int num_copies) 81362306a36Sopenharmony_ci{ 81462306a36Sopenharmony_ci ASSERT(mirror <= num_copies); 81562306a36Sopenharmony_ci return (mirror + 1 > num_copies) ? 1 : mirror + 1; 81662306a36Sopenharmony_ci} 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_cistatic void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe, 81962306a36Sopenharmony_ci int mirror, int blocksize, bool wait) 82062306a36Sopenharmony_ci{ 82162306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 82262306a36Sopenharmony_ci struct btrfs_bio *bbio = NULL; 82362306a36Sopenharmony_ci const unsigned long old_error_bitmap = stripe->error_bitmap; 82462306a36Sopenharmony_ci int i; 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci ASSERT(stripe->mirror_num >= 1); 82762306a36Sopenharmony_ci ASSERT(atomic_read(&stripe->pending_io) == 0); 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci for_each_set_bit(i, &old_error_bitmap, stripe->nr_sectors) { 83062306a36Sopenharmony_ci struct page *page; 83162306a36Sopenharmony_ci int pgoff; 83262306a36Sopenharmony_ci int ret; 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci page = scrub_stripe_get_page(stripe, i); 83562306a36Sopenharmony_ci pgoff = scrub_stripe_get_page_offset(stripe, i); 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci /* The current sector cannot be merged, submit the bio. */ 83862306a36Sopenharmony_ci if (bbio && ((i > 0 && !test_bit(i - 1, &stripe->error_bitmap)) || 83962306a36Sopenharmony_ci bbio->bio.bi_iter.bi_size >= blocksize)) { 84062306a36Sopenharmony_ci ASSERT(bbio->bio.bi_iter.bi_size); 84162306a36Sopenharmony_ci atomic_inc(&stripe->pending_io); 84262306a36Sopenharmony_ci btrfs_submit_bio(bbio, mirror); 84362306a36Sopenharmony_ci if (wait) 84462306a36Sopenharmony_ci wait_scrub_stripe_io(stripe); 84562306a36Sopenharmony_ci bbio = NULL; 84662306a36Sopenharmony_ci } 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci if (!bbio) { 84962306a36Sopenharmony_ci bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, 85062306a36Sopenharmony_ci fs_info, scrub_repair_read_endio, stripe); 85162306a36Sopenharmony_ci bbio->bio.bi_iter.bi_sector = (stripe->logical + 85262306a36Sopenharmony_ci (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT; 85362306a36Sopenharmony_ci } 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); 85662306a36Sopenharmony_ci ASSERT(ret == fs_info->sectorsize); 85762306a36Sopenharmony_ci } 85862306a36Sopenharmony_ci if (bbio) { 85962306a36Sopenharmony_ci ASSERT(bbio->bio.bi_iter.bi_size); 86062306a36Sopenharmony_ci atomic_inc(&stripe->pending_io); 86162306a36Sopenharmony_ci btrfs_submit_bio(bbio, mirror); 86262306a36Sopenharmony_ci if (wait) 86362306a36Sopenharmony_ci wait_scrub_stripe_io(stripe); 86462306a36Sopenharmony_ci } 86562306a36Sopenharmony_ci} 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_cistatic void scrub_stripe_report_errors(struct scrub_ctx *sctx, 86862306a36Sopenharmony_ci struct scrub_stripe *stripe) 86962306a36Sopenharmony_ci{ 87062306a36Sopenharmony_ci static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, 87162306a36Sopenharmony_ci DEFAULT_RATELIMIT_BURST); 87262306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 87362306a36Sopenharmony_ci struct btrfs_device *dev = NULL; 87462306a36Sopenharmony_ci u64 physical = 0; 87562306a36Sopenharmony_ci int nr_data_sectors = 0; 87662306a36Sopenharmony_ci int nr_meta_sectors = 0; 87762306a36Sopenharmony_ci int nr_nodatacsum_sectors = 0; 87862306a36Sopenharmony_ci int nr_repaired_sectors = 0; 87962306a36Sopenharmony_ci int sector_nr; 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ci if (test_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state)) 88262306a36Sopenharmony_ci return; 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci /* 88562306a36Sopenharmony_ci * Init needed infos for error reporting. 88662306a36Sopenharmony_ci * 88762306a36Sopenharmony_ci * Although our scrub_stripe infrastucture is mostly based on btrfs_submit_bio() 88862306a36Sopenharmony_ci * thus no need for dev/physical, error reporting still needs dev and physical. 88962306a36Sopenharmony_ci */ 89062306a36Sopenharmony_ci if (!bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) { 89162306a36Sopenharmony_ci u64 mapped_len = fs_info->sectorsize; 89262306a36Sopenharmony_ci struct btrfs_io_context *bioc = NULL; 89362306a36Sopenharmony_ci int stripe_index = stripe->mirror_num - 1; 89462306a36Sopenharmony_ci int ret; 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci /* For scrub, our mirror_num should always start at 1. */ 89762306a36Sopenharmony_ci ASSERT(stripe->mirror_num >= 1); 89862306a36Sopenharmony_ci ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, 89962306a36Sopenharmony_ci stripe->logical, &mapped_len, &bioc, 90062306a36Sopenharmony_ci NULL, NULL, 1); 90162306a36Sopenharmony_ci /* 90262306a36Sopenharmony_ci * If we failed, dev will be NULL, and later detailed reports 90362306a36Sopenharmony_ci * will just be skipped. 90462306a36Sopenharmony_ci */ 90562306a36Sopenharmony_ci if (ret < 0) 90662306a36Sopenharmony_ci goto skip; 90762306a36Sopenharmony_ci physical = bioc->stripes[stripe_index].physical; 90862306a36Sopenharmony_ci dev = bioc->stripes[stripe_index].dev; 90962306a36Sopenharmony_ci btrfs_put_bioc(bioc); 91062306a36Sopenharmony_ci } 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ciskip: 91362306a36Sopenharmony_ci for_each_set_bit(sector_nr, &stripe->extent_sector_bitmap, stripe->nr_sectors) { 91462306a36Sopenharmony_ci bool repaired = false; 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci if (stripe->sectors[sector_nr].is_metadata) { 91762306a36Sopenharmony_ci nr_meta_sectors++; 91862306a36Sopenharmony_ci } else { 91962306a36Sopenharmony_ci nr_data_sectors++; 92062306a36Sopenharmony_ci if (!stripe->sectors[sector_nr].csum) 92162306a36Sopenharmony_ci nr_nodatacsum_sectors++; 92262306a36Sopenharmony_ci } 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci if (test_bit(sector_nr, &stripe->init_error_bitmap) && 92562306a36Sopenharmony_ci !test_bit(sector_nr, &stripe->error_bitmap)) { 92662306a36Sopenharmony_ci nr_repaired_sectors++; 92762306a36Sopenharmony_ci repaired = true; 92862306a36Sopenharmony_ci } 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci /* Good sector from the beginning, nothing need to be done. */ 93162306a36Sopenharmony_ci if (!test_bit(sector_nr, &stripe->init_error_bitmap)) 93262306a36Sopenharmony_ci continue; 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci /* 93562306a36Sopenharmony_ci * Report error for the corrupted sectors. If repaired, just 93662306a36Sopenharmony_ci * output the message of repaired message. 93762306a36Sopenharmony_ci */ 93862306a36Sopenharmony_ci if (repaired) { 93962306a36Sopenharmony_ci if (dev) { 94062306a36Sopenharmony_ci btrfs_err_rl_in_rcu(fs_info, 94162306a36Sopenharmony_ci "fixed up error at logical %llu on dev %s physical %llu", 94262306a36Sopenharmony_ci stripe->logical, btrfs_dev_name(dev), 94362306a36Sopenharmony_ci physical); 94462306a36Sopenharmony_ci } else { 94562306a36Sopenharmony_ci btrfs_err_rl_in_rcu(fs_info, 94662306a36Sopenharmony_ci "fixed up error at logical %llu on mirror %u", 94762306a36Sopenharmony_ci stripe->logical, stripe->mirror_num); 94862306a36Sopenharmony_ci } 94962306a36Sopenharmony_ci continue; 95062306a36Sopenharmony_ci } 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci /* The remaining are all for unrepaired. */ 95362306a36Sopenharmony_ci if (dev) { 95462306a36Sopenharmony_ci btrfs_err_rl_in_rcu(fs_info, 95562306a36Sopenharmony_ci "unable to fixup (regular) error at logical %llu on dev %s physical %llu", 95662306a36Sopenharmony_ci stripe->logical, btrfs_dev_name(dev), 95762306a36Sopenharmony_ci physical); 95862306a36Sopenharmony_ci } else { 95962306a36Sopenharmony_ci btrfs_err_rl_in_rcu(fs_info, 96062306a36Sopenharmony_ci "unable to fixup (regular) error at logical %llu on mirror %u", 96162306a36Sopenharmony_ci stripe->logical, stripe->mirror_num); 96262306a36Sopenharmony_ci } 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci if (test_bit(sector_nr, &stripe->io_error_bitmap)) 96562306a36Sopenharmony_ci if (__ratelimit(&rs) && dev) 96662306a36Sopenharmony_ci scrub_print_common_warning("i/o error", dev, false, 96762306a36Sopenharmony_ci stripe->logical, physical); 96862306a36Sopenharmony_ci if (test_bit(sector_nr, &stripe->csum_error_bitmap)) 96962306a36Sopenharmony_ci if (__ratelimit(&rs) && dev) 97062306a36Sopenharmony_ci scrub_print_common_warning("checksum error", dev, false, 97162306a36Sopenharmony_ci stripe->logical, physical); 97262306a36Sopenharmony_ci if (test_bit(sector_nr, &stripe->meta_error_bitmap)) 97362306a36Sopenharmony_ci if (__ratelimit(&rs) && dev) 97462306a36Sopenharmony_ci scrub_print_common_warning("header error", dev, false, 97562306a36Sopenharmony_ci stripe->logical, physical); 97662306a36Sopenharmony_ci } 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci spin_lock(&sctx->stat_lock); 97962306a36Sopenharmony_ci sctx->stat.data_extents_scrubbed += stripe->nr_data_extents; 98062306a36Sopenharmony_ci sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents; 98162306a36Sopenharmony_ci sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits; 98262306a36Sopenharmony_ci sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits; 98362306a36Sopenharmony_ci sctx->stat.no_csum += nr_nodatacsum_sectors; 98462306a36Sopenharmony_ci sctx->stat.read_errors += stripe->init_nr_io_errors; 98562306a36Sopenharmony_ci sctx->stat.csum_errors += stripe->init_nr_csum_errors; 98662306a36Sopenharmony_ci sctx->stat.verify_errors += stripe->init_nr_meta_errors; 98762306a36Sopenharmony_ci sctx->stat.uncorrectable_errors += 98862306a36Sopenharmony_ci bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors); 98962306a36Sopenharmony_ci sctx->stat.corrected_errors += nr_repaired_sectors; 99062306a36Sopenharmony_ci spin_unlock(&sctx->stat_lock); 99162306a36Sopenharmony_ci} 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_cistatic void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *stripe, 99462306a36Sopenharmony_ci unsigned long write_bitmap, bool dev_replace); 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci/* 99762306a36Sopenharmony_ci * The main entrance for all read related scrub work, including: 99862306a36Sopenharmony_ci * 99962306a36Sopenharmony_ci * - Wait for the initial read to finish 100062306a36Sopenharmony_ci * - Verify and locate any bad sectors 100162306a36Sopenharmony_ci * - Go through the remaining mirrors and try to read as large blocksize as 100262306a36Sopenharmony_ci * possible 100362306a36Sopenharmony_ci * - Go through all mirrors (including the failed mirror) sector-by-sector 100462306a36Sopenharmony_ci * - Submit writeback for repaired sectors 100562306a36Sopenharmony_ci * 100662306a36Sopenharmony_ci * Writeback for dev-replace does not happen here, it needs extra 100762306a36Sopenharmony_ci * synchronization for zoned devices. 100862306a36Sopenharmony_ci */ 100962306a36Sopenharmony_cistatic void scrub_stripe_read_repair_worker(struct work_struct *work) 101062306a36Sopenharmony_ci{ 101162306a36Sopenharmony_ci struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, work); 101262306a36Sopenharmony_ci struct scrub_ctx *sctx = stripe->sctx; 101362306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 101462306a36Sopenharmony_ci int num_copies = btrfs_num_copies(fs_info, stripe->bg->start, 101562306a36Sopenharmony_ci stripe->bg->length); 101662306a36Sopenharmony_ci int mirror; 101762306a36Sopenharmony_ci int i; 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci ASSERT(stripe->mirror_num > 0); 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_ci wait_scrub_stripe_io(stripe); 102262306a36Sopenharmony_ci scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap); 102362306a36Sopenharmony_ci /* Save the initial failed bitmap for later repair and report usage. */ 102462306a36Sopenharmony_ci stripe->init_error_bitmap = stripe->error_bitmap; 102562306a36Sopenharmony_ci stripe->init_nr_io_errors = bitmap_weight(&stripe->io_error_bitmap, 102662306a36Sopenharmony_ci stripe->nr_sectors); 102762306a36Sopenharmony_ci stripe->init_nr_csum_errors = bitmap_weight(&stripe->csum_error_bitmap, 102862306a36Sopenharmony_ci stripe->nr_sectors); 102962306a36Sopenharmony_ci stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap, 103062306a36Sopenharmony_ci stripe->nr_sectors); 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) 103362306a36Sopenharmony_ci goto out; 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci /* 103662306a36Sopenharmony_ci * Try all remaining mirrors. 103762306a36Sopenharmony_ci * 103862306a36Sopenharmony_ci * Here we still try to read as large block as possible, as this is 103962306a36Sopenharmony_ci * faster and we have extra safety nets to rely on. 104062306a36Sopenharmony_ci */ 104162306a36Sopenharmony_ci for (mirror = calc_next_mirror(stripe->mirror_num, num_copies); 104262306a36Sopenharmony_ci mirror != stripe->mirror_num; 104362306a36Sopenharmony_ci mirror = calc_next_mirror(mirror, num_copies)) { 104462306a36Sopenharmony_ci const unsigned long old_error_bitmap = stripe->error_bitmap; 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci scrub_stripe_submit_repair_read(stripe, mirror, 104762306a36Sopenharmony_ci BTRFS_STRIPE_LEN, false); 104862306a36Sopenharmony_ci wait_scrub_stripe_io(stripe); 104962306a36Sopenharmony_ci scrub_verify_one_stripe(stripe, old_error_bitmap); 105062306a36Sopenharmony_ci if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) 105162306a36Sopenharmony_ci goto out; 105262306a36Sopenharmony_ci } 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci /* 105562306a36Sopenharmony_ci * Last safety net, try re-checking all mirrors, including the failed 105662306a36Sopenharmony_ci * one, sector-by-sector. 105762306a36Sopenharmony_ci * 105862306a36Sopenharmony_ci * As if one sector failed the drive's internal csum, the whole read 105962306a36Sopenharmony_ci * containing the offending sector would be marked as error. 106062306a36Sopenharmony_ci * Thus here we do sector-by-sector read. 106162306a36Sopenharmony_ci * 106262306a36Sopenharmony_ci * This can be slow, thus we only try it as the last resort. 106362306a36Sopenharmony_ci */ 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ci for (i = 0, mirror = stripe->mirror_num; 106662306a36Sopenharmony_ci i < num_copies; 106762306a36Sopenharmony_ci i++, mirror = calc_next_mirror(mirror, num_copies)) { 106862306a36Sopenharmony_ci const unsigned long old_error_bitmap = stripe->error_bitmap; 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci scrub_stripe_submit_repair_read(stripe, mirror, 107162306a36Sopenharmony_ci fs_info->sectorsize, true); 107262306a36Sopenharmony_ci wait_scrub_stripe_io(stripe); 107362306a36Sopenharmony_ci scrub_verify_one_stripe(stripe, old_error_bitmap); 107462306a36Sopenharmony_ci if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) 107562306a36Sopenharmony_ci goto out; 107662306a36Sopenharmony_ci } 107762306a36Sopenharmony_ciout: 107862306a36Sopenharmony_ci /* 107962306a36Sopenharmony_ci * Submit the repaired sectors. For zoned case, we cannot do repair 108062306a36Sopenharmony_ci * in-place, but queue the bg to be relocated. 108162306a36Sopenharmony_ci */ 108262306a36Sopenharmony_ci if (btrfs_is_zoned(fs_info)) { 108362306a36Sopenharmony_ci if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) 108462306a36Sopenharmony_ci btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start); 108562306a36Sopenharmony_ci } else if (!sctx->readonly) { 108662306a36Sopenharmony_ci unsigned long repaired; 108762306a36Sopenharmony_ci 108862306a36Sopenharmony_ci bitmap_andnot(&repaired, &stripe->init_error_bitmap, 108962306a36Sopenharmony_ci &stripe->error_bitmap, stripe->nr_sectors); 109062306a36Sopenharmony_ci scrub_write_sectors(sctx, stripe, repaired, false); 109162306a36Sopenharmony_ci wait_scrub_stripe_io(stripe); 109262306a36Sopenharmony_ci } 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ci scrub_stripe_report_errors(sctx, stripe); 109562306a36Sopenharmony_ci set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state); 109662306a36Sopenharmony_ci wake_up(&stripe->repair_wait); 109762306a36Sopenharmony_ci} 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_cistatic void scrub_read_endio(struct btrfs_bio *bbio) 110062306a36Sopenharmony_ci{ 110162306a36Sopenharmony_ci struct scrub_stripe *stripe = bbio->private; 110262306a36Sopenharmony_ci struct bio_vec *bvec; 110362306a36Sopenharmony_ci int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio)); 110462306a36Sopenharmony_ci int num_sectors; 110562306a36Sopenharmony_ci u32 bio_size = 0; 110662306a36Sopenharmony_ci int i; 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci ASSERT(sector_nr < stripe->nr_sectors); 110962306a36Sopenharmony_ci bio_for_each_bvec_all(bvec, &bbio->bio, i) 111062306a36Sopenharmony_ci bio_size += bvec->bv_len; 111162306a36Sopenharmony_ci num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits; 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci if (bbio->bio.bi_status) { 111462306a36Sopenharmony_ci bitmap_set(&stripe->io_error_bitmap, sector_nr, num_sectors); 111562306a36Sopenharmony_ci bitmap_set(&stripe->error_bitmap, sector_nr, num_sectors); 111662306a36Sopenharmony_ci } else { 111762306a36Sopenharmony_ci bitmap_clear(&stripe->io_error_bitmap, sector_nr, num_sectors); 111862306a36Sopenharmony_ci } 111962306a36Sopenharmony_ci bio_put(&bbio->bio); 112062306a36Sopenharmony_ci if (atomic_dec_and_test(&stripe->pending_io)) { 112162306a36Sopenharmony_ci wake_up(&stripe->io_wait); 112262306a36Sopenharmony_ci INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker); 112362306a36Sopenharmony_ci queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work); 112462306a36Sopenharmony_ci } 112562306a36Sopenharmony_ci} 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_cistatic void scrub_write_endio(struct btrfs_bio *bbio) 112862306a36Sopenharmony_ci{ 112962306a36Sopenharmony_ci struct scrub_stripe *stripe = bbio->private; 113062306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 113162306a36Sopenharmony_ci struct bio_vec *bvec; 113262306a36Sopenharmony_ci int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio)); 113362306a36Sopenharmony_ci u32 bio_size = 0; 113462306a36Sopenharmony_ci int i; 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_ci bio_for_each_bvec_all(bvec, &bbio->bio, i) 113762306a36Sopenharmony_ci bio_size += bvec->bv_len; 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_ci if (bbio->bio.bi_status) { 114062306a36Sopenharmony_ci unsigned long flags; 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci spin_lock_irqsave(&stripe->write_error_lock, flags); 114362306a36Sopenharmony_ci bitmap_set(&stripe->write_error_bitmap, sector_nr, 114462306a36Sopenharmony_ci bio_size >> fs_info->sectorsize_bits); 114562306a36Sopenharmony_ci spin_unlock_irqrestore(&stripe->write_error_lock, flags); 114662306a36Sopenharmony_ci } 114762306a36Sopenharmony_ci bio_put(&bbio->bio); 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci if (atomic_dec_and_test(&stripe->pending_io)) 115062306a36Sopenharmony_ci wake_up(&stripe->io_wait); 115162306a36Sopenharmony_ci} 115262306a36Sopenharmony_ci 115362306a36Sopenharmony_cistatic void scrub_submit_write_bio(struct scrub_ctx *sctx, 115462306a36Sopenharmony_ci struct scrub_stripe *stripe, 115562306a36Sopenharmony_ci struct btrfs_bio *bbio, bool dev_replace) 115662306a36Sopenharmony_ci{ 115762306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 115862306a36Sopenharmony_ci u32 bio_len = bbio->bio.bi_iter.bi_size; 115962306a36Sopenharmony_ci u32 bio_off = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT) - 116062306a36Sopenharmony_ci stripe->logical; 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci fill_writer_pointer_gap(sctx, stripe->physical + bio_off); 116362306a36Sopenharmony_ci atomic_inc(&stripe->pending_io); 116462306a36Sopenharmony_ci btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace); 116562306a36Sopenharmony_ci if (!btrfs_is_zoned(fs_info)) 116662306a36Sopenharmony_ci return; 116762306a36Sopenharmony_ci /* 116862306a36Sopenharmony_ci * For zoned writeback, queue depth must be 1, thus we must wait for 116962306a36Sopenharmony_ci * the write to finish before the next write. 117062306a36Sopenharmony_ci */ 117162306a36Sopenharmony_ci wait_scrub_stripe_io(stripe); 117262306a36Sopenharmony_ci 117362306a36Sopenharmony_ci /* 117462306a36Sopenharmony_ci * And also need to update the write pointer if write finished 117562306a36Sopenharmony_ci * successfully. 117662306a36Sopenharmony_ci */ 117762306a36Sopenharmony_ci if (!test_bit(bio_off >> fs_info->sectorsize_bits, 117862306a36Sopenharmony_ci &stripe->write_error_bitmap)) 117962306a36Sopenharmony_ci sctx->write_pointer += bio_len; 118062306a36Sopenharmony_ci} 118162306a36Sopenharmony_ci 118262306a36Sopenharmony_ci/* 118362306a36Sopenharmony_ci * Submit the write bio(s) for the sectors specified by @write_bitmap. 118462306a36Sopenharmony_ci * 118562306a36Sopenharmony_ci * Here we utilize btrfs_submit_repair_write(), which has some extra benefits: 118662306a36Sopenharmony_ci * 118762306a36Sopenharmony_ci * - Only needs logical bytenr and mirror_num 118862306a36Sopenharmony_ci * Just like the scrub read path 118962306a36Sopenharmony_ci * 119062306a36Sopenharmony_ci * - Would only result in writes to the specified mirror 119162306a36Sopenharmony_ci * Unlike the regular writeback path, which would write back to all stripes 119262306a36Sopenharmony_ci * 119362306a36Sopenharmony_ci * - Handle dev-replace and read-repair writeback differently 119462306a36Sopenharmony_ci */ 119562306a36Sopenharmony_cistatic void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *stripe, 119662306a36Sopenharmony_ci unsigned long write_bitmap, bool dev_replace) 119762306a36Sopenharmony_ci{ 119862306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 119962306a36Sopenharmony_ci struct btrfs_bio *bbio = NULL; 120062306a36Sopenharmony_ci int sector_nr; 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci for_each_set_bit(sector_nr, &write_bitmap, stripe->nr_sectors) { 120362306a36Sopenharmony_ci struct page *page = scrub_stripe_get_page(stripe, sector_nr); 120462306a36Sopenharmony_ci unsigned int pgoff = scrub_stripe_get_page_offset(stripe, sector_nr); 120562306a36Sopenharmony_ci int ret; 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci /* We should only writeback sectors covered by an extent. */ 120862306a36Sopenharmony_ci ASSERT(test_bit(sector_nr, &stripe->extent_sector_bitmap)); 120962306a36Sopenharmony_ci 121062306a36Sopenharmony_ci /* Cannot merge with previous sector, submit the current one. */ 121162306a36Sopenharmony_ci if (bbio && sector_nr && !test_bit(sector_nr - 1, &write_bitmap)) { 121262306a36Sopenharmony_ci scrub_submit_write_bio(sctx, stripe, bbio, dev_replace); 121362306a36Sopenharmony_ci bbio = NULL; 121462306a36Sopenharmony_ci } 121562306a36Sopenharmony_ci if (!bbio) { 121662306a36Sopenharmony_ci bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_WRITE, 121762306a36Sopenharmony_ci fs_info, scrub_write_endio, stripe); 121862306a36Sopenharmony_ci bbio->bio.bi_iter.bi_sector = (stripe->logical + 121962306a36Sopenharmony_ci (sector_nr << fs_info->sectorsize_bits)) >> 122062306a36Sopenharmony_ci SECTOR_SHIFT; 122162306a36Sopenharmony_ci } 122262306a36Sopenharmony_ci ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); 122362306a36Sopenharmony_ci ASSERT(ret == fs_info->sectorsize); 122462306a36Sopenharmony_ci } 122562306a36Sopenharmony_ci if (bbio) 122662306a36Sopenharmony_ci scrub_submit_write_bio(sctx, stripe, bbio, dev_replace); 122762306a36Sopenharmony_ci} 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci/* 123062306a36Sopenharmony_ci * Throttling of IO submission, bandwidth-limit based, the timeslice is 1 123162306a36Sopenharmony_ci * second. Limit can be set via /sys/fs/UUID/devinfo/devid/scrub_speed_max. 123262306a36Sopenharmony_ci */ 123362306a36Sopenharmony_cistatic void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *device, 123462306a36Sopenharmony_ci unsigned int bio_size) 123562306a36Sopenharmony_ci{ 123662306a36Sopenharmony_ci const int time_slice = 1000; 123762306a36Sopenharmony_ci s64 delta; 123862306a36Sopenharmony_ci ktime_t now; 123962306a36Sopenharmony_ci u32 div; 124062306a36Sopenharmony_ci u64 bwlimit; 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci bwlimit = READ_ONCE(device->scrub_speed_max); 124362306a36Sopenharmony_ci if (bwlimit == 0) 124462306a36Sopenharmony_ci return; 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci /* 124762306a36Sopenharmony_ci * Slice is divided into intervals when the IO is submitted, adjust by 124862306a36Sopenharmony_ci * bwlimit and maximum of 64 intervals. 124962306a36Sopenharmony_ci */ 125062306a36Sopenharmony_ci div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024))); 125162306a36Sopenharmony_ci div = min_t(u32, 64, div); 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_ci /* Start new epoch, set deadline */ 125462306a36Sopenharmony_ci now = ktime_get(); 125562306a36Sopenharmony_ci if (sctx->throttle_deadline == 0) { 125662306a36Sopenharmony_ci sctx->throttle_deadline = ktime_add_ms(now, time_slice / div); 125762306a36Sopenharmony_ci sctx->throttle_sent = 0; 125862306a36Sopenharmony_ci } 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci /* Still in the time to send? */ 126162306a36Sopenharmony_ci if (ktime_before(now, sctx->throttle_deadline)) { 126262306a36Sopenharmony_ci /* If current bio is within the limit, send it */ 126362306a36Sopenharmony_ci sctx->throttle_sent += bio_size; 126462306a36Sopenharmony_ci if (sctx->throttle_sent <= div_u64(bwlimit, div)) 126562306a36Sopenharmony_ci return; 126662306a36Sopenharmony_ci 126762306a36Sopenharmony_ci /* We're over the limit, sleep until the rest of the slice */ 126862306a36Sopenharmony_ci delta = ktime_ms_delta(sctx->throttle_deadline, now); 126962306a36Sopenharmony_ci } else { 127062306a36Sopenharmony_ci /* New request after deadline, start new epoch */ 127162306a36Sopenharmony_ci delta = 0; 127262306a36Sopenharmony_ci } 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_ci if (delta) { 127562306a36Sopenharmony_ci long timeout; 127662306a36Sopenharmony_ci 127762306a36Sopenharmony_ci timeout = div_u64(delta * HZ, 1000); 127862306a36Sopenharmony_ci schedule_timeout_interruptible(timeout); 127962306a36Sopenharmony_ci } 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_ci /* Next call will start the deadline period */ 128262306a36Sopenharmony_ci sctx->throttle_deadline = 0; 128362306a36Sopenharmony_ci} 128462306a36Sopenharmony_ci 128562306a36Sopenharmony_ci/* 128662306a36Sopenharmony_ci * Given a physical address, this will calculate it's 128762306a36Sopenharmony_ci * logical offset. if this is a parity stripe, it will return 128862306a36Sopenharmony_ci * the most left data stripe's logical offset. 128962306a36Sopenharmony_ci * 129062306a36Sopenharmony_ci * return 0 if it is a data stripe, 1 means parity stripe. 129162306a36Sopenharmony_ci */ 129262306a36Sopenharmony_cistatic int get_raid56_logic_offset(u64 physical, int num, 129362306a36Sopenharmony_ci struct map_lookup *map, u64 *offset, 129462306a36Sopenharmony_ci u64 *stripe_start) 129562306a36Sopenharmony_ci{ 129662306a36Sopenharmony_ci int i; 129762306a36Sopenharmony_ci int j = 0; 129862306a36Sopenharmony_ci u64 last_offset; 129962306a36Sopenharmony_ci const int data_stripes = nr_data_stripes(map); 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ci last_offset = (physical - map->stripes[num].physical) * data_stripes; 130262306a36Sopenharmony_ci if (stripe_start) 130362306a36Sopenharmony_ci *stripe_start = last_offset; 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_ci *offset = last_offset; 130662306a36Sopenharmony_ci for (i = 0; i < data_stripes; i++) { 130762306a36Sopenharmony_ci u32 stripe_nr; 130862306a36Sopenharmony_ci u32 stripe_index; 130962306a36Sopenharmony_ci u32 rot; 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_ci *offset = last_offset + btrfs_stripe_nr_to_offset(i); 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ci stripe_nr = (u32)(*offset >> BTRFS_STRIPE_LEN_SHIFT) / data_stripes; 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ci /* Work out the disk rotation on this stripe-set */ 131662306a36Sopenharmony_ci rot = stripe_nr % map->num_stripes; 131762306a36Sopenharmony_ci /* calculate which stripe this data locates */ 131862306a36Sopenharmony_ci rot += i; 131962306a36Sopenharmony_ci stripe_index = rot % map->num_stripes; 132062306a36Sopenharmony_ci if (stripe_index == num) 132162306a36Sopenharmony_ci return 0; 132262306a36Sopenharmony_ci if (stripe_index < num) 132362306a36Sopenharmony_ci j++; 132462306a36Sopenharmony_ci } 132562306a36Sopenharmony_ci *offset = last_offset + btrfs_stripe_nr_to_offset(j); 132662306a36Sopenharmony_ci return 1; 132762306a36Sopenharmony_ci} 132862306a36Sopenharmony_ci 132962306a36Sopenharmony_ci/* 133062306a36Sopenharmony_ci * Return 0 if the extent item range covers any byte of the range. 133162306a36Sopenharmony_ci * Return <0 if the extent item is before @search_start. 133262306a36Sopenharmony_ci * Return >0 if the extent item is after @start_start + @search_len. 133362306a36Sopenharmony_ci */ 133462306a36Sopenharmony_cistatic int compare_extent_item_range(struct btrfs_path *path, 133562306a36Sopenharmony_ci u64 search_start, u64 search_len) 133662306a36Sopenharmony_ci{ 133762306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = path->nodes[0]->fs_info; 133862306a36Sopenharmony_ci u64 len; 133962306a36Sopenharmony_ci struct btrfs_key key; 134062306a36Sopenharmony_ci 134162306a36Sopenharmony_ci btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 134262306a36Sopenharmony_ci ASSERT(key.type == BTRFS_EXTENT_ITEM_KEY || 134362306a36Sopenharmony_ci key.type == BTRFS_METADATA_ITEM_KEY); 134462306a36Sopenharmony_ci if (key.type == BTRFS_METADATA_ITEM_KEY) 134562306a36Sopenharmony_ci len = fs_info->nodesize; 134662306a36Sopenharmony_ci else 134762306a36Sopenharmony_ci len = key.offset; 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci if (key.objectid + len <= search_start) 135062306a36Sopenharmony_ci return -1; 135162306a36Sopenharmony_ci if (key.objectid >= search_start + search_len) 135262306a36Sopenharmony_ci return 1; 135362306a36Sopenharmony_ci return 0; 135462306a36Sopenharmony_ci} 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci/* 135762306a36Sopenharmony_ci * Locate one extent item which covers any byte in range 135862306a36Sopenharmony_ci * [@search_start, @search_start + @search_length) 135962306a36Sopenharmony_ci * 136062306a36Sopenharmony_ci * If the path is not initialized, we will initialize the search by doing 136162306a36Sopenharmony_ci * a btrfs_search_slot(). 136262306a36Sopenharmony_ci * If the path is already initialized, we will use the path as the initial 136362306a36Sopenharmony_ci * slot, to avoid duplicated btrfs_search_slot() calls. 136462306a36Sopenharmony_ci * 136562306a36Sopenharmony_ci * NOTE: If an extent item starts before @search_start, we will still 136662306a36Sopenharmony_ci * return the extent item. This is for data extent crossing stripe boundary. 136762306a36Sopenharmony_ci * 136862306a36Sopenharmony_ci * Return 0 if we found such extent item, and @path will point to the extent item. 136962306a36Sopenharmony_ci * Return >0 if no such extent item can be found, and @path will be released. 137062306a36Sopenharmony_ci * Return <0 if hit fatal error, and @path will be released. 137162306a36Sopenharmony_ci */ 137262306a36Sopenharmony_cistatic int find_first_extent_item(struct btrfs_root *extent_root, 137362306a36Sopenharmony_ci struct btrfs_path *path, 137462306a36Sopenharmony_ci u64 search_start, u64 search_len) 137562306a36Sopenharmony_ci{ 137662306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = extent_root->fs_info; 137762306a36Sopenharmony_ci struct btrfs_key key; 137862306a36Sopenharmony_ci int ret; 137962306a36Sopenharmony_ci 138062306a36Sopenharmony_ci /* Continue using the existing path */ 138162306a36Sopenharmony_ci if (path->nodes[0]) 138262306a36Sopenharmony_ci goto search_forward; 138362306a36Sopenharmony_ci 138462306a36Sopenharmony_ci if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 138562306a36Sopenharmony_ci key.type = BTRFS_METADATA_ITEM_KEY; 138662306a36Sopenharmony_ci else 138762306a36Sopenharmony_ci key.type = BTRFS_EXTENT_ITEM_KEY; 138862306a36Sopenharmony_ci key.objectid = search_start; 138962306a36Sopenharmony_ci key.offset = (u64)-1; 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 139262306a36Sopenharmony_ci if (ret < 0) 139362306a36Sopenharmony_ci return ret; 139462306a36Sopenharmony_ci 139562306a36Sopenharmony_ci ASSERT(ret > 0); 139662306a36Sopenharmony_ci /* 139762306a36Sopenharmony_ci * Here we intentionally pass 0 as @min_objectid, as there could be 139862306a36Sopenharmony_ci * an extent item starting before @search_start. 139962306a36Sopenharmony_ci */ 140062306a36Sopenharmony_ci ret = btrfs_previous_extent_item(extent_root, path, 0); 140162306a36Sopenharmony_ci if (ret < 0) 140262306a36Sopenharmony_ci return ret; 140362306a36Sopenharmony_ci /* 140462306a36Sopenharmony_ci * No matter whether we have found an extent item, the next loop will 140562306a36Sopenharmony_ci * properly do every check on the key. 140662306a36Sopenharmony_ci */ 140762306a36Sopenharmony_cisearch_forward: 140862306a36Sopenharmony_ci while (true) { 140962306a36Sopenharmony_ci btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 141062306a36Sopenharmony_ci if (key.objectid >= search_start + search_len) 141162306a36Sopenharmony_ci break; 141262306a36Sopenharmony_ci if (key.type != BTRFS_METADATA_ITEM_KEY && 141362306a36Sopenharmony_ci key.type != BTRFS_EXTENT_ITEM_KEY) 141462306a36Sopenharmony_ci goto next; 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_ci ret = compare_extent_item_range(path, search_start, search_len); 141762306a36Sopenharmony_ci if (ret == 0) 141862306a36Sopenharmony_ci return ret; 141962306a36Sopenharmony_ci if (ret > 0) 142062306a36Sopenharmony_ci break; 142162306a36Sopenharmony_cinext: 142262306a36Sopenharmony_ci path->slots[0]++; 142362306a36Sopenharmony_ci if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 142462306a36Sopenharmony_ci ret = btrfs_next_leaf(extent_root, path); 142562306a36Sopenharmony_ci if (ret) { 142662306a36Sopenharmony_ci /* Either no more item or fatal error */ 142762306a36Sopenharmony_ci btrfs_release_path(path); 142862306a36Sopenharmony_ci return ret; 142962306a36Sopenharmony_ci } 143062306a36Sopenharmony_ci } 143162306a36Sopenharmony_ci } 143262306a36Sopenharmony_ci btrfs_release_path(path); 143362306a36Sopenharmony_ci return 1; 143462306a36Sopenharmony_ci} 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_cistatic void get_extent_info(struct btrfs_path *path, u64 *extent_start_ret, 143762306a36Sopenharmony_ci u64 *size_ret, u64 *flags_ret, u64 *generation_ret) 143862306a36Sopenharmony_ci{ 143962306a36Sopenharmony_ci struct btrfs_key key; 144062306a36Sopenharmony_ci struct btrfs_extent_item *ei; 144162306a36Sopenharmony_ci 144262306a36Sopenharmony_ci btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 144362306a36Sopenharmony_ci ASSERT(key.type == BTRFS_METADATA_ITEM_KEY || 144462306a36Sopenharmony_ci key.type == BTRFS_EXTENT_ITEM_KEY); 144562306a36Sopenharmony_ci *extent_start_ret = key.objectid; 144662306a36Sopenharmony_ci if (key.type == BTRFS_METADATA_ITEM_KEY) 144762306a36Sopenharmony_ci *size_ret = path->nodes[0]->fs_info->nodesize; 144862306a36Sopenharmony_ci else 144962306a36Sopenharmony_ci *size_ret = key.offset; 145062306a36Sopenharmony_ci ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_item); 145162306a36Sopenharmony_ci *flags_ret = btrfs_extent_flags(path->nodes[0], ei); 145262306a36Sopenharmony_ci *generation_ret = btrfs_extent_generation(path->nodes[0], ei); 145362306a36Sopenharmony_ci} 145462306a36Sopenharmony_ci 145562306a36Sopenharmony_cistatic int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical, 145662306a36Sopenharmony_ci u64 physical, u64 physical_end) 145762306a36Sopenharmony_ci{ 145862306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 145962306a36Sopenharmony_ci int ret = 0; 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci if (!btrfs_is_zoned(fs_info)) 146262306a36Sopenharmony_ci return 0; 146362306a36Sopenharmony_ci 146462306a36Sopenharmony_ci mutex_lock(&sctx->wr_lock); 146562306a36Sopenharmony_ci if (sctx->write_pointer < physical_end) { 146662306a36Sopenharmony_ci ret = btrfs_sync_zone_write_pointer(sctx->wr_tgtdev, logical, 146762306a36Sopenharmony_ci physical, 146862306a36Sopenharmony_ci sctx->write_pointer); 146962306a36Sopenharmony_ci if (ret) 147062306a36Sopenharmony_ci btrfs_err(fs_info, 147162306a36Sopenharmony_ci "zoned: failed to recover write pointer"); 147262306a36Sopenharmony_ci } 147362306a36Sopenharmony_ci mutex_unlock(&sctx->wr_lock); 147462306a36Sopenharmony_ci btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical); 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_ci return ret; 147762306a36Sopenharmony_ci} 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_cistatic void fill_one_extent_info(struct btrfs_fs_info *fs_info, 148062306a36Sopenharmony_ci struct scrub_stripe *stripe, 148162306a36Sopenharmony_ci u64 extent_start, u64 extent_len, 148262306a36Sopenharmony_ci u64 extent_flags, u64 extent_gen) 148362306a36Sopenharmony_ci{ 148462306a36Sopenharmony_ci for (u64 cur_logical = max(stripe->logical, extent_start); 148562306a36Sopenharmony_ci cur_logical < min(stripe->logical + BTRFS_STRIPE_LEN, 148662306a36Sopenharmony_ci extent_start + extent_len); 148762306a36Sopenharmony_ci cur_logical += fs_info->sectorsize) { 148862306a36Sopenharmony_ci const int nr_sector = (cur_logical - stripe->logical) >> 148962306a36Sopenharmony_ci fs_info->sectorsize_bits; 149062306a36Sopenharmony_ci struct scrub_sector_verification *sector = 149162306a36Sopenharmony_ci &stripe->sectors[nr_sector]; 149262306a36Sopenharmony_ci 149362306a36Sopenharmony_ci set_bit(nr_sector, &stripe->extent_sector_bitmap); 149462306a36Sopenharmony_ci if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 149562306a36Sopenharmony_ci sector->is_metadata = true; 149662306a36Sopenharmony_ci sector->generation = extent_gen; 149762306a36Sopenharmony_ci } 149862306a36Sopenharmony_ci } 149962306a36Sopenharmony_ci} 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_cistatic void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe) 150262306a36Sopenharmony_ci{ 150362306a36Sopenharmony_ci stripe->extent_sector_bitmap = 0; 150462306a36Sopenharmony_ci stripe->init_error_bitmap = 0; 150562306a36Sopenharmony_ci stripe->init_nr_io_errors = 0; 150662306a36Sopenharmony_ci stripe->init_nr_csum_errors = 0; 150762306a36Sopenharmony_ci stripe->init_nr_meta_errors = 0; 150862306a36Sopenharmony_ci stripe->error_bitmap = 0; 150962306a36Sopenharmony_ci stripe->io_error_bitmap = 0; 151062306a36Sopenharmony_ci stripe->csum_error_bitmap = 0; 151162306a36Sopenharmony_ci stripe->meta_error_bitmap = 0; 151262306a36Sopenharmony_ci} 151362306a36Sopenharmony_ci 151462306a36Sopenharmony_ci/* 151562306a36Sopenharmony_ci * Locate one stripe which has at least one extent in its range. 151662306a36Sopenharmony_ci * 151762306a36Sopenharmony_ci * Return 0 if found such stripe, and store its info into @stripe. 151862306a36Sopenharmony_ci * Return >0 if there is no such stripe in the specified range. 151962306a36Sopenharmony_ci * Return <0 for error. 152062306a36Sopenharmony_ci */ 152162306a36Sopenharmony_cistatic int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, 152262306a36Sopenharmony_ci struct btrfs_path *extent_path, 152362306a36Sopenharmony_ci struct btrfs_path *csum_path, 152462306a36Sopenharmony_ci struct btrfs_device *dev, u64 physical, 152562306a36Sopenharmony_ci int mirror_num, u64 logical_start, 152662306a36Sopenharmony_ci u32 logical_len, 152762306a36Sopenharmony_ci struct scrub_stripe *stripe) 152862306a36Sopenharmony_ci{ 152962306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = bg->fs_info; 153062306a36Sopenharmony_ci struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bg->start); 153162306a36Sopenharmony_ci struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bg->start); 153262306a36Sopenharmony_ci const u64 logical_end = logical_start + logical_len; 153362306a36Sopenharmony_ci u64 cur_logical = logical_start; 153462306a36Sopenharmony_ci u64 stripe_end; 153562306a36Sopenharmony_ci u64 extent_start; 153662306a36Sopenharmony_ci u64 extent_len; 153762306a36Sopenharmony_ci u64 extent_flags; 153862306a36Sopenharmony_ci u64 extent_gen; 153962306a36Sopenharmony_ci int ret; 154062306a36Sopenharmony_ci 154162306a36Sopenharmony_ci memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * 154262306a36Sopenharmony_ci stripe->nr_sectors); 154362306a36Sopenharmony_ci scrub_stripe_reset_bitmaps(stripe); 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci /* The range must be inside the bg. */ 154662306a36Sopenharmony_ci ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length); 154762306a36Sopenharmony_ci 154862306a36Sopenharmony_ci ret = find_first_extent_item(extent_root, extent_path, logical_start, 154962306a36Sopenharmony_ci logical_len); 155062306a36Sopenharmony_ci /* Either error or not found. */ 155162306a36Sopenharmony_ci if (ret) 155262306a36Sopenharmony_ci goto out; 155362306a36Sopenharmony_ci get_extent_info(extent_path, &extent_start, &extent_len, &extent_flags, 155462306a36Sopenharmony_ci &extent_gen); 155562306a36Sopenharmony_ci if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) 155662306a36Sopenharmony_ci stripe->nr_meta_extents++; 155762306a36Sopenharmony_ci if (extent_flags & BTRFS_EXTENT_FLAG_DATA) 155862306a36Sopenharmony_ci stripe->nr_data_extents++; 155962306a36Sopenharmony_ci cur_logical = max(extent_start, cur_logical); 156062306a36Sopenharmony_ci 156162306a36Sopenharmony_ci /* 156262306a36Sopenharmony_ci * Round down to stripe boundary. 156362306a36Sopenharmony_ci * 156462306a36Sopenharmony_ci * The extra calculation against bg->start is to handle block groups 156562306a36Sopenharmony_ci * whose logical bytenr is not BTRFS_STRIPE_LEN aligned. 156662306a36Sopenharmony_ci */ 156762306a36Sopenharmony_ci stripe->logical = round_down(cur_logical - bg->start, BTRFS_STRIPE_LEN) + 156862306a36Sopenharmony_ci bg->start; 156962306a36Sopenharmony_ci stripe->physical = physical + stripe->logical - logical_start; 157062306a36Sopenharmony_ci stripe->dev = dev; 157162306a36Sopenharmony_ci stripe->bg = bg; 157262306a36Sopenharmony_ci stripe->mirror_num = mirror_num; 157362306a36Sopenharmony_ci stripe_end = stripe->logical + BTRFS_STRIPE_LEN - 1; 157462306a36Sopenharmony_ci 157562306a36Sopenharmony_ci /* Fill the first extent info into stripe->sectors[] array. */ 157662306a36Sopenharmony_ci fill_one_extent_info(fs_info, stripe, extent_start, extent_len, 157762306a36Sopenharmony_ci extent_flags, extent_gen); 157862306a36Sopenharmony_ci cur_logical = extent_start + extent_len; 157962306a36Sopenharmony_ci 158062306a36Sopenharmony_ci /* Fill the extent info for the remaining sectors. */ 158162306a36Sopenharmony_ci while (cur_logical <= stripe_end) { 158262306a36Sopenharmony_ci ret = find_first_extent_item(extent_root, extent_path, cur_logical, 158362306a36Sopenharmony_ci stripe_end - cur_logical + 1); 158462306a36Sopenharmony_ci if (ret < 0) 158562306a36Sopenharmony_ci goto out; 158662306a36Sopenharmony_ci if (ret > 0) { 158762306a36Sopenharmony_ci ret = 0; 158862306a36Sopenharmony_ci break; 158962306a36Sopenharmony_ci } 159062306a36Sopenharmony_ci get_extent_info(extent_path, &extent_start, &extent_len, 159162306a36Sopenharmony_ci &extent_flags, &extent_gen); 159262306a36Sopenharmony_ci if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) 159362306a36Sopenharmony_ci stripe->nr_meta_extents++; 159462306a36Sopenharmony_ci if (extent_flags & BTRFS_EXTENT_FLAG_DATA) 159562306a36Sopenharmony_ci stripe->nr_data_extents++; 159662306a36Sopenharmony_ci fill_one_extent_info(fs_info, stripe, extent_start, extent_len, 159762306a36Sopenharmony_ci extent_flags, extent_gen); 159862306a36Sopenharmony_ci cur_logical = extent_start + extent_len; 159962306a36Sopenharmony_ci } 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_ci /* Now fill the data csum. */ 160262306a36Sopenharmony_ci if (bg->flags & BTRFS_BLOCK_GROUP_DATA) { 160362306a36Sopenharmony_ci int sector_nr; 160462306a36Sopenharmony_ci unsigned long csum_bitmap = 0; 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_ci /* Csum space should have already been allocated. */ 160762306a36Sopenharmony_ci ASSERT(stripe->csums); 160862306a36Sopenharmony_ci 160962306a36Sopenharmony_ci /* 161062306a36Sopenharmony_ci * Our csum bitmap should be large enough, as BTRFS_STRIPE_LEN 161162306a36Sopenharmony_ci * should contain at most 16 sectors. 161262306a36Sopenharmony_ci */ 161362306a36Sopenharmony_ci ASSERT(BITS_PER_LONG >= BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits); 161462306a36Sopenharmony_ci 161562306a36Sopenharmony_ci ret = btrfs_lookup_csums_bitmap(csum_root, csum_path, 161662306a36Sopenharmony_ci stripe->logical, stripe_end, 161762306a36Sopenharmony_ci stripe->csums, &csum_bitmap); 161862306a36Sopenharmony_ci if (ret < 0) 161962306a36Sopenharmony_ci goto out; 162062306a36Sopenharmony_ci if (ret > 0) 162162306a36Sopenharmony_ci ret = 0; 162262306a36Sopenharmony_ci 162362306a36Sopenharmony_ci for_each_set_bit(sector_nr, &csum_bitmap, stripe->nr_sectors) { 162462306a36Sopenharmony_ci stripe->sectors[sector_nr].csum = stripe->csums + 162562306a36Sopenharmony_ci sector_nr * fs_info->csum_size; 162662306a36Sopenharmony_ci } 162762306a36Sopenharmony_ci } 162862306a36Sopenharmony_ci set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state); 162962306a36Sopenharmony_ciout: 163062306a36Sopenharmony_ci return ret; 163162306a36Sopenharmony_ci} 163262306a36Sopenharmony_ci 163362306a36Sopenharmony_cistatic void scrub_reset_stripe(struct scrub_stripe *stripe) 163462306a36Sopenharmony_ci{ 163562306a36Sopenharmony_ci scrub_stripe_reset_bitmaps(stripe); 163662306a36Sopenharmony_ci 163762306a36Sopenharmony_ci stripe->nr_meta_extents = 0; 163862306a36Sopenharmony_ci stripe->nr_data_extents = 0; 163962306a36Sopenharmony_ci stripe->state = 0; 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_ci for (int i = 0; i < stripe->nr_sectors; i++) { 164262306a36Sopenharmony_ci stripe->sectors[i].is_metadata = false; 164362306a36Sopenharmony_ci stripe->sectors[i].csum = NULL; 164462306a36Sopenharmony_ci stripe->sectors[i].generation = 0; 164562306a36Sopenharmony_ci } 164662306a36Sopenharmony_ci} 164762306a36Sopenharmony_ci 164862306a36Sopenharmony_cistatic void scrub_submit_initial_read(struct scrub_ctx *sctx, 164962306a36Sopenharmony_ci struct scrub_stripe *stripe) 165062306a36Sopenharmony_ci{ 165162306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 165262306a36Sopenharmony_ci struct btrfs_bio *bbio; 165362306a36Sopenharmony_ci unsigned int nr_sectors = min_t(u64, BTRFS_STRIPE_LEN, stripe->bg->start + 165462306a36Sopenharmony_ci stripe->bg->length - stripe->logical) >> 165562306a36Sopenharmony_ci fs_info->sectorsize_bits; 165662306a36Sopenharmony_ci int mirror = stripe->mirror_num; 165762306a36Sopenharmony_ci 165862306a36Sopenharmony_ci ASSERT(stripe->bg); 165962306a36Sopenharmony_ci ASSERT(stripe->mirror_num > 0); 166062306a36Sopenharmony_ci ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state)); 166162306a36Sopenharmony_ci 166262306a36Sopenharmony_ci bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info, 166362306a36Sopenharmony_ci scrub_read_endio, stripe); 166462306a36Sopenharmony_ci 166562306a36Sopenharmony_ci bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT; 166662306a36Sopenharmony_ci /* Read the whole range inside the chunk boundary. */ 166762306a36Sopenharmony_ci for (unsigned int cur = 0; cur < nr_sectors; cur++) { 166862306a36Sopenharmony_ci struct page *page = scrub_stripe_get_page(stripe, cur); 166962306a36Sopenharmony_ci unsigned int pgoff = scrub_stripe_get_page_offset(stripe, cur); 167062306a36Sopenharmony_ci int ret; 167162306a36Sopenharmony_ci 167262306a36Sopenharmony_ci ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); 167362306a36Sopenharmony_ci /* We should have allocated enough bio vectors. */ 167462306a36Sopenharmony_ci ASSERT(ret == fs_info->sectorsize); 167562306a36Sopenharmony_ci } 167662306a36Sopenharmony_ci atomic_inc(&stripe->pending_io); 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_ci /* 167962306a36Sopenharmony_ci * For dev-replace, either user asks to avoid the source dev, or 168062306a36Sopenharmony_ci * the device is missing, we try the next mirror instead. 168162306a36Sopenharmony_ci */ 168262306a36Sopenharmony_ci if (sctx->is_dev_replace && 168362306a36Sopenharmony_ci (fs_info->dev_replace.cont_reading_from_srcdev_mode == 168462306a36Sopenharmony_ci BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID || 168562306a36Sopenharmony_ci !stripe->dev->bdev)) { 168662306a36Sopenharmony_ci int num_copies = btrfs_num_copies(fs_info, stripe->bg->start, 168762306a36Sopenharmony_ci stripe->bg->length); 168862306a36Sopenharmony_ci 168962306a36Sopenharmony_ci mirror = calc_next_mirror(mirror, num_copies); 169062306a36Sopenharmony_ci } 169162306a36Sopenharmony_ci btrfs_submit_bio(bbio, mirror); 169262306a36Sopenharmony_ci} 169362306a36Sopenharmony_ci 169462306a36Sopenharmony_cistatic bool stripe_has_metadata_error(struct scrub_stripe *stripe) 169562306a36Sopenharmony_ci{ 169662306a36Sopenharmony_ci int i; 169762306a36Sopenharmony_ci 169862306a36Sopenharmony_ci for_each_set_bit(i, &stripe->error_bitmap, stripe->nr_sectors) { 169962306a36Sopenharmony_ci if (stripe->sectors[i].is_metadata) { 170062306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = stripe->bg->fs_info; 170162306a36Sopenharmony_ci 170262306a36Sopenharmony_ci btrfs_err(fs_info, 170362306a36Sopenharmony_ci "stripe %llu has unrepaired metadata sector at %llu", 170462306a36Sopenharmony_ci stripe->logical, 170562306a36Sopenharmony_ci stripe->logical + (i << fs_info->sectorsize_bits)); 170662306a36Sopenharmony_ci return true; 170762306a36Sopenharmony_ci } 170862306a36Sopenharmony_ci } 170962306a36Sopenharmony_ci return false; 171062306a36Sopenharmony_ci} 171162306a36Sopenharmony_ci 171262306a36Sopenharmony_cistatic void submit_initial_group_read(struct scrub_ctx *sctx, 171362306a36Sopenharmony_ci unsigned int first_slot, 171462306a36Sopenharmony_ci unsigned int nr_stripes) 171562306a36Sopenharmony_ci{ 171662306a36Sopenharmony_ci struct blk_plug plug; 171762306a36Sopenharmony_ci 171862306a36Sopenharmony_ci ASSERT(first_slot < SCRUB_TOTAL_STRIPES); 171962306a36Sopenharmony_ci ASSERT(first_slot + nr_stripes <= SCRUB_TOTAL_STRIPES); 172062306a36Sopenharmony_ci 172162306a36Sopenharmony_ci scrub_throttle_dev_io(sctx, sctx->stripes[0].dev, 172262306a36Sopenharmony_ci btrfs_stripe_nr_to_offset(nr_stripes)); 172362306a36Sopenharmony_ci blk_start_plug(&plug); 172462306a36Sopenharmony_ci for (int i = 0; i < nr_stripes; i++) { 172562306a36Sopenharmony_ci struct scrub_stripe *stripe = &sctx->stripes[first_slot + i]; 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_ci /* Those stripes should be initialized. */ 172862306a36Sopenharmony_ci ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state)); 172962306a36Sopenharmony_ci scrub_submit_initial_read(sctx, stripe); 173062306a36Sopenharmony_ci } 173162306a36Sopenharmony_ci blk_finish_plug(&plug); 173262306a36Sopenharmony_ci} 173362306a36Sopenharmony_ci 173462306a36Sopenharmony_cistatic int flush_scrub_stripes(struct scrub_ctx *sctx) 173562306a36Sopenharmony_ci{ 173662306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 173762306a36Sopenharmony_ci struct scrub_stripe *stripe; 173862306a36Sopenharmony_ci const int nr_stripes = sctx->cur_stripe; 173962306a36Sopenharmony_ci int ret = 0; 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_ci if (!nr_stripes) 174262306a36Sopenharmony_ci return 0; 174362306a36Sopenharmony_ci 174462306a36Sopenharmony_ci ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &sctx->stripes[0].state)); 174562306a36Sopenharmony_ci 174662306a36Sopenharmony_ci /* Submit the stripes which are populated but not submitted. */ 174762306a36Sopenharmony_ci if (nr_stripes % SCRUB_STRIPES_PER_GROUP) { 174862306a36Sopenharmony_ci const int first_slot = round_down(nr_stripes, SCRUB_STRIPES_PER_GROUP); 174962306a36Sopenharmony_ci 175062306a36Sopenharmony_ci submit_initial_group_read(sctx, first_slot, nr_stripes - first_slot); 175162306a36Sopenharmony_ci } 175262306a36Sopenharmony_ci 175362306a36Sopenharmony_ci for (int i = 0; i < nr_stripes; i++) { 175462306a36Sopenharmony_ci stripe = &sctx->stripes[i]; 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci wait_event(stripe->repair_wait, 175762306a36Sopenharmony_ci test_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state)); 175862306a36Sopenharmony_ci } 175962306a36Sopenharmony_ci 176062306a36Sopenharmony_ci /* Submit for dev-replace. */ 176162306a36Sopenharmony_ci if (sctx->is_dev_replace) { 176262306a36Sopenharmony_ci /* 176362306a36Sopenharmony_ci * For dev-replace, if we know there is something wrong with 176462306a36Sopenharmony_ci * metadata, we should immedately abort. 176562306a36Sopenharmony_ci */ 176662306a36Sopenharmony_ci for (int i = 0; i < nr_stripes; i++) { 176762306a36Sopenharmony_ci if (stripe_has_metadata_error(&sctx->stripes[i])) { 176862306a36Sopenharmony_ci ret = -EIO; 176962306a36Sopenharmony_ci goto out; 177062306a36Sopenharmony_ci } 177162306a36Sopenharmony_ci } 177262306a36Sopenharmony_ci for (int i = 0; i < nr_stripes; i++) { 177362306a36Sopenharmony_ci unsigned long good; 177462306a36Sopenharmony_ci 177562306a36Sopenharmony_ci stripe = &sctx->stripes[i]; 177662306a36Sopenharmony_ci 177762306a36Sopenharmony_ci ASSERT(stripe->dev == fs_info->dev_replace.srcdev); 177862306a36Sopenharmony_ci 177962306a36Sopenharmony_ci bitmap_andnot(&good, &stripe->extent_sector_bitmap, 178062306a36Sopenharmony_ci &stripe->error_bitmap, stripe->nr_sectors); 178162306a36Sopenharmony_ci scrub_write_sectors(sctx, stripe, good, true); 178262306a36Sopenharmony_ci } 178362306a36Sopenharmony_ci } 178462306a36Sopenharmony_ci 178562306a36Sopenharmony_ci /* Wait for the above writebacks to finish. */ 178662306a36Sopenharmony_ci for (int i = 0; i < nr_stripes; i++) { 178762306a36Sopenharmony_ci stripe = &sctx->stripes[i]; 178862306a36Sopenharmony_ci 178962306a36Sopenharmony_ci wait_scrub_stripe_io(stripe); 179062306a36Sopenharmony_ci scrub_reset_stripe(stripe); 179162306a36Sopenharmony_ci } 179262306a36Sopenharmony_ciout: 179362306a36Sopenharmony_ci sctx->cur_stripe = 0; 179462306a36Sopenharmony_ci return ret; 179562306a36Sopenharmony_ci} 179662306a36Sopenharmony_ci 179762306a36Sopenharmony_cistatic void raid56_scrub_wait_endio(struct bio *bio) 179862306a36Sopenharmony_ci{ 179962306a36Sopenharmony_ci complete(bio->bi_private); 180062306a36Sopenharmony_ci} 180162306a36Sopenharmony_ci 180262306a36Sopenharmony_cistatic int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg, 180362306a36Sopenharmony_ci struct btrfs_device *dev, int mirror_num, 180462306a36Sopenharmony_ci u64 logical, u32 length, u64 physical, 180562306a36Sopenharmony_ci u64 *found_logical_ret) 180662306a36Sopenharmony_ci{ 180762306a36Sopenharmony_ci struct scrub_stripe *stripe; 180862306a36Sopenharmony_ci int ret; 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_ci /* 181162306a36Sopenharmony_ci * There should always be one slot left, as caller filling the last 181262306a36Sopenharmony_ci * slot should flush them all. 181362306a36Sopenharmony_ci */ 181462306a36Sopenharmony_ci ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES); 181562306a36Sopenharmony_ci 181662306a36Sopenharmony_ci /* @found_logical_ret must be specified. */ 181762306a36Sopenharmony_ci ASSERT(found_logical_ret); 181862306a36Sopenharmony_ci 181962306a36Sopenharmony_ci stripe = &sctx->stripes[sctx->cur_stripe]; 182062306a36Sopenharmony_ci scrub_reset_stripe(stripe); 182162306a36Sopenharmony_ci ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path, 182262306a36Sopenharmony_ci &sctx->csum_path, dev, physical, 182362306a36Sopenharmony_ci mirror_num, logical, length, stripe); 182462306a36Sopenharmony_ci /* Either >0 as no more extents or <0 for error. */ 182562306a36Sopenharmony_ci if (ret) 182662306a36Sopenharmony_ci return ret; 182762306a36Sopenharmony_ci *found_logical_ret = stripe->logical; 182862306a36Sopenharmony_ci sctx->cur_stripe++; 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_ci /* We filled one group, submit it. */ 183162306a36Sopenharmony_ci if (sctx->cur_stripe % SCRUB_STRIPES_PER_GROUP == 0) { 183262306a36Sopenharmony_ci const int first_slot = sctx->cur_stripe - SCRUB_STRIPES_PER_GROUP; 183362306a36Sopenharmony_ci 183462306a36Sopenharmony_ci submit_initial_group_read(sctx, first_slot, SCRUB_STRIPES_PER_GROUP); 183562306a36Sopenharmony_ci } 183662306a36Sopenharmony_ci 183762306a36Sopenharmony_ci /* Last slot used, flush them all. */ 183862306a36Sopenharmony_ci if (sctx->cur_stripe == SCRUB_TOTAL_STRIPES) 183962306a36Sopenharmony_ci return flush_scrub_stripes(sctx); 184062306a36Sopenharmony_ci return 0; 184162306a36Sopenharmony_ci} 184262306a36Sopenharmony_ci 184362306a36Sopenharmony_cistatic int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, 184462306a36Sopenharmony_ci struct btrfs_device *scrub_dev, 184562306a36Sopenharmony_ci struct btrfs_block_group *bg, 184662306a36Sopenharmony_ci struct map_lookup *map, 184762306a36Sopenharmony_ci u64 full_stripe_start) 184862306a36Sopenharmony_ci{ 184962306a36Sopenharmony_ci DECLARE_COMPLETION_ONSTACK(io_done); 185062306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 185162306a36Sopenharmony_ci struct btrfs_raid_bio *rbio; 185262306a36Sopenharmony_ci struct btrfs_io_context *bioc = NULL; 185362306a36Sopenharmony_ci struct btrfs_path extent_path = { 0 }; 185462306a36Sopenharmony_ci struct btrfs_path csum_path = { 0 }; 185562306a36Sopenharmony_ci struct bio *bio; 185662306a36Sopenharmony_ci struct scrub_stripe *stripe; 185762306a36Sopenharmony_ci bool all_empty = true; 185862306a36Sopenharmony_ci const int data_stripes = nr_data_stripes(map); 185962306a36Sopenharmony_ci unsigned long extent_bitmap = 0; 186062306a36Sopenharmony_ci u64 length = btrfs_stripe_nr_to_offset(data_stripes); 186162306a36Sopenharmony_ci int ret; 186262306a36Sopenharmony_ci 186362306a36Sopenharmony_ci ASSERT(sctx->raid56_data_stripes); 186462306a36Sopenharmony_ci 186562306a36Sopenharmony_ci /* 186662306a36Sopenharmony_ci * For data stripe search, we cannot re-use the same extent/csum paths, 186762306a36Sopenharmony_ci * as the data stripe bytenr may be smaller than previous extent. Thus 186862306a36Sopenharmony_ci * we have to use our own extent/csum paths. 186962306a36Sopenharmony_ci */ 187062306a36Sopenharmony_ci extent_path.search_commit_root = 1; 187162306a36Sopenharmony_ci extent_path.skip_locking = 1; 187262306a36Sopenharmony_ci csum_path.search_commit_root = 1; 187362306a36Sopenharmony_ci csum_path.skip_locking = 1; 187462306a36Sopenharmony_ci 187562306a36Sopenharmony_ci for (int i = 0; i < data_stripes; i++) { 187662306a36Sopenharmony_ci int stripe_index; 187762306a36Sopenharmony_ci int rot; 187862306a36Sopenharmony_ci u64 physical; 187962306a36Sopenharmony_ci 188062306a36Sopenharmony_ci stripe = &sctx->raid56_data_stripes[i]; 188162306a36Sopenharmony_ci rot = div_u64(full_stripe_start - bg->start, 188262306a36Sopenharmony_ci data_stripes) >> BTRFS_STRIPE_LEN_SHIFT; 188362306a36Sopenharmony_ci stripe_index = (i + rot) % map->num_stripes; 188462306a36Sopenharmony_ci physical = map->stripes[stripe_index].physical + 188562306a36Sopenharmony_ci btrfs_stripe_nr_to_offset(rot); 188662306a36Sopenharmony_ci 188762306a36Sopenharmony_ci scrub_reset_stripe(stripe); 188862306a36Sopenharmony_ci set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state); 188962306a36Sopenharmony_ci ret = scrub_find_fill_first_stripe(bg, &extent_path, &csum_path, 189062306a36Sopenharmony_ci map->stripes[stripe_index].dev, physical, 1, 189162306a36Sopenharmony_ci full_stripe_start + btrfs_stripe_nr_to_offset(i), 189262306a36Sopenharmony_ci BTRFS_STRIPE_LEN, stripe); 189362306a36Sopenharmony_ci if (ret < 0) 189462306a36Sopenharmony_ci goto out; 189562306a36Sopenharmony_ci /* 189662306a36Sopenharmony_ci * No extent in this data stripe, need to manually mark them 189762306a36Sopenharmony_ci * initialized to make later read submission happy. 189862306a36Sopenharmony_ci */ 189962306a36Sopenharmony_ci if (ret > 0) { 190062306a36Sopenharmony_ci stripe->logical = full_stripe_start + 190162306a36Sopenharmony_ci btrfs_stripe_nr_to_offset(i); 190262306a36Sopenharmony_ci stripe->dev = map->stripes[stripe_index].dev; 190362306a36Sopenharmony_ci stripe->mirror_num = 1; 190462306a36Sopenharmony_ci set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state); 190562306a36Sopenharmony_ci } 190662306a36Sopenharmony_ci } 190762306a36Sopenharmony_ci 190862306a36Sopenharmony_ci /* Check if all data stripes are empty. */ 190962306a36Sopenharmony_ci for (int i = 0; i < data_stripes; i++) { 191062306a36Sopenharmony_ci stripe = &sctx->raid56_data_stripes[i]; 191162306a36Sopenharmony_ci if (!bitmap_empty(&stripe->extent_sector_bitmap, stripe->nr_sectors)) { 191262306a36Sopenharmony_ci all_empty = false; 191362306a36Sopenharmony_ci break; 191462306a36Sopenharmony_ci } 191562306a36Sopenharmony_ci } 191662306a36Sopenharmony_ci if (all_empty) { 191762306a36Sopenharmony_ci ret = 0; 191862306a36Sopenharmony_ci goto out; 191962306a36Sopenharmony_ci } 192062306a36Sopenharmony_ci 192162306a36Sopenharmony_ci for (int i = 0; i < data_stripes; i++) { 192262306a36Sopenharmony_ci stripe = &sctx->raid56_data_stripes[i]; 192362306a36Sopenharmony_ci scrub_submit_initial_read(sctx, stripe); 192462306a36Sopenharmony_ci } 192562306a36Sopenharmony_ci for (int i = 0; i < data_stripes; i++) { 192662306a36Sopenharmony_ci stripe = &sctx->raid56_data_stripes[i]; 192762306a36Sopenharmony_ci 192862306a36Sopenharmony_ci wait_event(stripe->repair_wait, 192962306a36Sopenharmony_ci test_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state)); 193062306a36Sopenharmony_ci } 193162306a36Sopenharmony_ci /* For now, no zoned support for RAID56. */ 193262306a36Sopenharmony_ci ASSERT(!btrfs_is_zoned(sctx->fs_info)); 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_ci /* 193562306a36Sopenharmony_ci * Now all data stripes are properly verified. Check if we have any 193662306a36Sopenharmony_ci * unrepaired, if so abort immediately or we could further corrupt the 193762306a36Sopenharmony_ci * P/Q stripes. 193862306a36Sopenharmony_ci * 193962306a36Sopenharmony_ci * During the loop, also populate extent_bitmap. 194062306a36Sopenharmony_ci */ 194162306a36Sopenharmony_ci for (int i = 0; i < data_stripes; i++) { 194262306a36Sopenharmony_ci unsigned long error; 194362306a36Sopenharmony_ci 194462306a36Sopenharmony_ci stripe = &sctx->raid56_data_stripes[i]; 194562306a36Sopenharmony_ci 194662306a36Sopenharmony_ci /* 194762306a36Sopenharmony_ci * We should only check the errors where there is an extent. 194862306a36Sopenharmony_ci * As we may hit an empty data stripe while it's missing. 194962306a36Sopenharmony_ci */ 195062306a36Sopenharmony_ci bitmap_and(&error, &stripe->error_bitmap, 195162306a36Sopenharmony_ci &stripe->extent_sector_bitmap, stripe->nr_sectors); 195262306a36Sopenharmony_ci if (!bitmap_empty(&error, stripe->nr_sectors)) { 195362306a36Sopenharmony_ci btrfs_err(fs_info, 195462306a36Sopenharmony_ci"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl", 195562306a36Sopenharmony_ci full_stripe_start, i, stripe->nr_sectors, 195662306a36Sopenharmony_ci &error); 195762306a36Sopenharmony_ci ret = -EIO; 195862306a36Sopenharmony_ci goto out; 195962306a36Sopenharmony_ci } 196062306a36Sopenharmony_ci bitmap_or(&extent_bitmap, &extent_bitmap, 196162306a36Sopenharmony_ci &stripe->extent_sector_bitmap, stripe->nr_sectors); 196262306a36Sopenharmony_ci } 196362306a36Sopenharmony_ci 196462306a36Sopenharmony_ci /* Now we can check and regenerate the P/Q stripe. */ 196562306a36Sopenharmony_ci bio = bio_alloc(NULL, 1, REQ_OP_READ, GFP_NOFS); 196662306a36Sopenharmony_ci bio->bi_iter.bi_sector = full_stripe_start >> SECTOR_SHIFT; 196762306a36Sopenharmony_ci bio->bi_private = &io_done; 196862306a36Sopenharmony_ci bio->bi_end_io = raid56_scrub_wait_endio; 196962306a36Sopenharmony_ci 197062306a36Sopenharmony_ci btrfs_bio_counter_inc_blocked(fs_info); 197162306a36Sopenharmony_ci ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start, 197262306a36Sopenharmony_ci &length, &bioc, NULL, NULL, 1); 197362306a36Sopenharmony_ci if (ret < 0) { 197462306a36Sopenharmony_ci btrfs_put_bioc(bioc); 197562306a36Sopenharmony_ci btrfs_bio_counter_dec(fs_info); 197662306a36Sopenharmony_ci goto out; 197762306a36Sopenharmony_ci } 197862306a36Sopenharmony_ci rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, scrub_dev, &extent_bitmap, 197962306a36Sopenharmony_ci BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits); 198062306a36Sopenharmony_ci btrfs_put_bioc(bioc); 198162306a36Sopenharmony_ci if (!rbio) { 198262306a36Sopenharmony_ci ret = -ENOMEM; 198362306a36Sopenharmony_ci btrfs_bio_counter_dec(fs_info); 198462306a36Sopenharmony_ci goto out; 198562306a36Sopenharmony_ci } 198662306a36Sopenharmony_ci /* Use the recovered stripes as cache to avoid read them from disk again. */ 198762306a36Sopenharmony_ci for (int i = 0; i < data_stripes; i++) { 198862306a36Sopenharmony_ci stripe = &sctx->raid56_data_stripes[i]; 198962306a36Sopenharmony_ci 199062306a36Sopenharmony_ci raid56_parity_cache_data_pages(rbio, stripe->pages, 199162306a36Sopenharmony_ci full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT)); 199262306a36Sopenharmony_ci } 199362306a36Sopenharmony_ci raid56_parity_submit_scrub_rbio(rbio); 199462306a36Sopenharmony_ci wait_for_completion_io(&io_done); 199562306a36Sopenharmony_ci ret = blk_status_to_errno(bio->bi_status); 199662306a36Sopenharmony_ci bio_put(bio); 199762306a36Sopenharmony_ci btrfs_bio_counter_dec(fs_info); 199862306a36Sopenharmony_ci 199962306a36Sopenharmony_ci btrfs_release_path(&extent_path); 200062306a36Sopenharmony_ci btrfs_release_path(&csum_path); 200162306a36Sopenharmony_ciout: 200262306a36Sopenharmony_ci return ret; 200362306a36Sopenharmony_ci} 200462306a36Sopenharmony_ci 200562306a36Sopenharmony_ci/* 200662306a36Sopenharmony_ci * Scrub one range which can only has simple mirror based profile. 200762306a36Sopenharmony_ci * (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in 200862306a36Sopenharmony_ci * RAID0/RAID10). 200962306a36Sopenharmony_ci * 201062306a36Sopenharmony_ci * Since we may need to handle a subset of block group, we need @logical_start 201162306a36Sopenharmony_ci * and @logical_length parameter. 201262306a36Sopenharmony_ci */ 201362306a36Sopenharmony_cistatic int scrub_simple_mirror(struct scrub_ctx *sctx, 201462306a36Sopenharmony_ci struct btrfs_block_group *bg, 201562306a36Sopenharmony_ci struct map_lookup *map, 201662306a36Sopenharmony_ci u64 logical_start, u64 logical_length, 201762306a36Sopenharmony_ci struct btrfs_device *device, 201862306a36Sopenharmony_ci u64 physical, int mirror_num) 201962306a36Sopenharmony_ci{ 202062306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 202162306a36Sopenharmony_ci const u64 logical_end = logical_start + logical_length; 202262306a36Sopenharmony_ci u64 cur_logical = logical_start; 202362306a36Sopenharmony_ci int ret; 202462306a36Sopenharmony_ci 202562306a36Sopenharmony_ci /* The range must be inside the bg */ 202662306a36Sopenharmony_ci ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length); 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_ci /* Go through each extent items inside the logical range */ 202962306a36Sopenharmony_ci while (cur_logical < logical_end) { 203062306a36Sopenharmony_ci u64 found_logical = U64_MAX; 203162306a36Sopenharmony_ci u64 cur_physical = physical + cur_logical - logical_start; 203262306a36Sopenharmony_ci 203362306a36Sopenharmony_ci /* Canceled? */ 203462306a36Sopenharmony_ci if (atomic_read(&fs_info->scrub_cancel_req) || 203562306a36Sopenharmony_ci atomic_read(&sctx->cancel_req)) { 203662306a36Sopenharmony_ci ret = -ECANCELED; 203762306a36Sopenharmony_ci break; 203862306a36Sopenharmony_ci } 203962306a36Sopenharmony_ci /* Paused? */ 204062306a36Sopenharmony_ci if (atomic_read(&fs_info->scrub_pause_req)) { 204162306a36Sopenharmony_ci /* Push queued extents */ 204262306a36Sopenharmony_ci scrub_blocked_if_needed(fs_info); 204362306a36Sopenharmony_ci } 204462306a36Sopenharmony_ci /* Block group removed? */ 204562306a36Sopenharmony_ci spin_lock(&bg->lock); 204662306a36Sopenharmony_ci if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) { 204762306a36Sopenharmony_ci spin_unlock(&bg->lock); 204862306a36Sopenharmony_ci ret = 0; 204962306a36Sopenharmony_ci break; 205062306a36Sopenharmony_ci } 205162306a36Sopenharmony_ci spin_unlock(&bg->lock); 205262306a36Sopenharmony_ci 205362306a36Sopenharmony_ci ret = queue_scrub_stripe(sctx, bg, device, mirror_num, 205462306a36Sopenharmony_ci cur_logical, logical_end - cur_logical, 205562306a36Sopenharmony_ci cur_physical, &found_logical); 205662306a36Sopenharmony_ci if (ret > 0) { 205762306a36Sopenharmony_ci /* No more extent, just update the accounting */ 205862306a36Sopenharmony_ci sctx->stat.last_physical = physical + logical_length; 205962306a36Sopenharmony_ci ret = 0; 206062306a36Sopenharmony_ci break; 206162306a36Sopenharmony_ci } 206262306a36Sopenharmony_ci if (ret < 0) 206362306a36Sopenharmony_ci break; 206462306a36Sopenharmony_ci 206562306a36Sopenharmony_ci /* queue_scrub_stripe() returned 0, @found_logical must be updated. */ 206662306a36Sopenharmony_ci ASSERT(found_logical != U64_MAX); 206762306a36Sopenharmony_ci cur_logical = found_logical + BTRFS_STRIPE_LEN; 206862306a36Sopenharmony_ci 206962306a36Sopenharmony_ci /* Don't hold CPU for too long time */ 207062306a36Sopenharmony_ci cond_resched(); 207162306a36Sopenharmony_ci } 207262306a36Sopenharmony_ci return ret; 207362306a36Sopenharmony_ci} 207462306a36Sopenharmony_ci 207562306a36Sopenharmony_ci/* Calculate the full stripe length for simple stripe based profiles */ 207662306a36Sopenharmony_cistatic u64 simple_stripe_full_stripe_len(const struct map_lookup *map) 207762306a36Sopenharmony_ci{ 207862306a36Sopenharmony_ci ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 | 207962306a36Sopenharmony_ci BTRFS_BLOCK_GROUP_RAID10)); 208062306a36Sopenharmony_ci 208162306a36Sopenharmony_ci return btrfs_stripe_nr_to_offset(map->num_stripes / map->sub_stripes); 208262306a36Sopenharmony_ci} 208362306a36Sopenharmony_ci 208462306a36Sopenharmony_ci/* Get the logical bytenr for the stripe */ 208562306a36Sopenharmony_cistatic u64 simple_stripe_get_logical(struct map_lookup *map, 208662306a36Sopenharmony_ci struct btrfs_block_group *bg, 208762306a36Sopenharmony_ci int stripe_index) 208862306a36Sopenharmony_ci{ 208962306a36Sopenharmony_ci ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 | 209062306a36Sopenharmony_ci BTRFS_BLOCK_GROUP_RAID10)); 209162306a36Sopenharmony_ci ASSERT(stripe_index < map->num_stripes); 209262306a36Sopenharmony_ci 209362306a36Sopenharmony_ci /* 209462306a36Sopenharmony_ci * (stripe_index / sub_stripes) gives how many data stripes we need to 209562306a36Sopenharmony_ci * skip. 209662306a36Sopenharmony_ci */ 209762306a36Sopenharmony_ci return btrfs_stripe_nr_to_offset(stripe_index / map->sub_stripes) + 209862306a36Sopenharmony_ci bg->start; 209962306a36Sopenharmony_ci} 210062306a36Sopenharmony_ci 210162306a36Sopenharmony_ci/* Get the mirror number for the stripe */ 210262306a36Sopenharmony_cistatic int simple_stripe_mirror_num(struct map_lookup *map, int stripe_index) 210362306a36Sopenharmony_ci{ 210462306a36Sopenharmony_ci ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 | 210562306a36Sopenharmony_ci BTRFS_BLOCK_GROUP_RAID10)); 210662306a36Sopenharmony_ci ASSERT(stripe_index < map->num_stripes); 210762306a36Sopenharmony_ci 210862306a36Sopenharmony_ci /* For RAID0, it's fixed to 1, for RAID10 it's 0,1,0,1... */ 210962306a36Sopenharmony_ci return stripe_index % map->sub_stripes + 1; 211062306a36Sopenharmony_ci} 211162306a36Sopenharmony_ci 211262306a36Sopenharmony_cistatic int scrub_simple_stripe(struct scrub_ctx *sctx, 211362306a36Sopenharmony_ci struct btrfs_block_group *bg, 211462306a36Sopenharmony_ci struct map_lookup *map, 211562306a36Sopenharmony_ci struct btrfs_device *device, 211662306a36Sopenharmony_ci int stripe_index) 211762306a36Sopenharmony_ci{ 211862306a36Sopenharmony_ci const u64 logical_increment = simple_stripe_full_stripe_len(map); 211962306a36Sopenharmony_ci const u64 orig_logical = simple_stripe_get_logical(map, bg, stripe_index); 212062306a36Sopenharmony_ci const u64 orig_physical = map->stripes[stripe_index].physical; 212162306a36Sopenharmony_ci const int mirror_num = simple_stripe_mirror_num(map, stripe_index); 212262306a36Sopenharmony_ci u64 cur_logical = orig_logical; 212362306a36Sopenharmony_ci u64 cur_physical = orig_physical; 212462306a36Sopenharmony_ci int ret = 0; 212562306a36Sopenharmony_ci 212662306a36Sopenharmony_ci while (cur_logical < bg->start + bg->length) { 212762306a36Sopenharmony_ci /* 212862306a36Sopenharmony_ci * Inside each stripe, RAID0 is just SINGLE, and RAID10 is 212962306a36Sopenharmony_ci * just RAID1, so we can reuse scrub_simple_mirror() to scrub 213062306a36Sopenharmony_ci * this stripe. 213162306a36Sopenharmony_ci */ 213262306a36Sopenharmony_ci ret = scrub_simple_mirror(sctx, bg, map, cur_logical, 213362306a36Sopenharmony_ci BTRFS_STRIPE_LEN, device, cur_physical, 213462306a36Sopenharmony_ci mirror_num); 213562306a36Sopenharmony_ci if (ret) 213662306a36Sopenharmony_ci return ret; 213762306a36Sopenharmony_ci /* Skip to next stripe which belongs to the target device */ 213862306a36Sopenharmony_ci cur_logical += logical_increment; 213962306a36Sopenharmony_ci /* For physical offset, we just go to next stripe */ 214062306a36Sopenharmony_ci cur_physical += BTRFS_STRIPE_LEN; 214162306a36Sopenharmony_ci } 214262306a36Sopenharmony_ci return ret; 214362306a36Sopenharmony_ci} 214462306a36Sopenharmony_ci 214562306a36Sopenharmony_cistatic noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, 214662306a36Sopenharmony_ci struct btrfs_block_group *bg, 214762306a36Sopenharmony_ci struct extent_map *em, 214862306a36Sopenharmony_ci struct btrfs_device *scrub_dev, 214962306a36Sopenharmony_ci int stripe_index) 215062306a36Sopenharmony_ci{ 215162306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 215262306a36Sopenharmony_ci struct map_lookup *map = em->map_lookup; 215362306a36Sopenharmony_ci const u64 profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK; 215462306a36Sopenharmony_ci const u64 chunk_logical = bg->start; 215562306a36Sopenharmony_ci int ret; 215662306a36Sopenharmony_ci int ret2; 215762306a36Sopenharmony_ci u64 physical = map->stripes[stripe_index].physical; 215862306a36Sopenharmony_ci const u64 dev_stripe_len = btrfs_calc_stripe_length(em); 215962306a36Sopenharmony_ci const u64 physical_end = physical + dev_stripe_len; 216062306a36Sopenharmony_ci u64 logical; 216162306a36Sopenharmony_ci u64 logic_end; 216262306a36Sopenharmony_ci /* The logical increment after finishing one stripe */ 216362306a36Sopenharmony_ci u64 increment; 216462306a36Sopenharmony_ci /* Offset inside the chunk */ 216562306a36Sopenharmony_ci u64 offset; 216662306a36Sopenharmony_ci u64 stripe_logical; 216762306a36Sopenharmony_ci int stop_loop = 0; 216862306a36Sopenharmony_ci 216962306a36Sopenharmony_ci /* Extent_path should be released by now. */ 217062306a36Sopenharmony_ci ASSERT(sctx->extent_path.nodes[0] == NULL); 217162306a36Sopenharmony_ci 217262306a36Sopenharmony_ci scrub_blocked_if_needed(fs_info); 217362306a36Sopenharmony_ci 217462306a36Sopenharmony_ci if (sctx->is_dev_replace && 217562306a36Sopenharmony_ci btrfs_dev_is_sequential(sctx->wr_tgtdev, physical)) { 217662306a36Sopenharmony_ci mutex_lock(&sctx->wr_lock); 217762306a36Sopenharmony_ci sctx->write_pointer = physical; 217862306a36Sopenharmony_ci mutex_unlock(&sctx->wr_lock); 217962306a36Sopenharmony_ci } 218062306a36Sopenharmony_ci 218162306a36Sopenharmony_ci /* Prepare the extra data stripes used by RAID56. */ 218262306a36Sopenharmony_ci if (profile & BTRFS_BLOCK_GROUP_RAID56_MASK) { 218362306a36Sopenharmony_ci ASSERT(sctx->raid56_data_stripes == NULL); 218462306a36Sopenharmony_ci 218562306a36Sopenharmony_ci sctx->raid56_data_stripes = kcalloc(nr_data_stripes(map), 218662306a36Sopenharmony_ci sizeof(struct scrub_stripe), 218762306a36Sopenharmony_ci GFP_KERNEL); 218862306a36Sopenharmony_ci if (!sctx->raid56_data_stripes) { 218962306a36Sopenharmony_ci ret = -ENOMEM; 219062306a36Sopenharmony_ci goto out; 219162306a36Sopenharmony_ci } 219262306a36Sopenharmony_ci for (int i = 0; i < nr_data_stripes(map); i++) { 219362306a36Sopenharmony_ci ret = init_scrub_stripe(fs_info, 219462306a36Sopenharmony_ci &sctx->raid56_data_stripes[i]); 219562306a36Sopenharmony_ci if (ret < 0) 219662306a36Sopenharmony_ci goto out; 219762306a36Sopenharmony_ci sctx->raid56_data_stripes[i].bg = bg; 219862306a36Sopenharmony_ci sctx->raid56_data_stripes[i].sctx = sctx; 219962306a36Sopenharmony_ci } 220062306a36Sopenharmony_ci } 220162306a36Sopenharmony_ci /* 220262306a36Sopenharmony_ci * There used to be a big double loop to handle all profiles using the 220362306a36Sopenharmony_ci * same routine, which grows larger and more gross over time. 220462306a36Sopenharmony_ci * 220562306a36Sopenharmony_ci * So here we handle each profile differently, so simpler profiles 220662306a36Sopenharmony_ci * have simpler scrubbing function. 220762306a36Sopenharmony_ci */ 220862306a36Sopenharmony_ci if (!(profile & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10 | 220962306a36Sopenharmony_ci BTRFS_BLOCK_GROUP_RAID56_MASK))) { 221062306a36Sopenharmony_ci /* 221162306a36Sopenharmony_ci * Above check rules out all complex profile, the remaining 221262306a36Sopenharmony_ci * profiles are SINGLE|DUP|RAID1|RAID1C*, which is simple 221362306a36Sopenharmony_ci * mirrored duplication without stripe. 221462306a36Sopenharmony_ci * 221562306a36Sopenharmony_ci * Only @physical and @mirror_num needs to calculated using 221662306a36Sopenharmony_ci * @stripe_index. 221762306a36Sopenharmony_ci */ 221862306a36Sopenharmony_ci ret = scrub_simple_mirror(sctx, bg, map, bg->start, bg->length, 221962306a36Sopenharmony_ci scrub_dev, map->stripes[stripe_index].physical, 222062306a36Sopenharmony_ci stripe_index + 1); 222162306a36Sopenharmony_ci offset = 0; 222262306a36Sopenharmony_ci goto out; 222362306a36Sopenharmony_ci } 222462306a36Sopenharmony_ci if (profile & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { 222562306a36Sopenharmony_ci ret = scrub_simple_stripe(sctx, bg, map, scrub_dev, stripe_index); 222662306a36Sopenharmony_ci offset = btrfs_stripe_nr_to_offset(stripe_index / map->sub_stripes); 222762306a36Sopenharmony_ci goto out; 222862306a36Sopenharmony_ci } 222962306a36Sopenharmony_ci 223062306a36Sopenharmony_ci /* Only RAID56 goes through the old code */ 223162306a36Sopenharmony_ci ASSERT(map->type & BTRFS_BLOCK_GROUP_RAID56_MASK); 223262306a36Sopenharmony_ci ret = 0; 223362306a36Sopenharmony_ci 223462306a36Sopenharmony_ci /* Calculate the logical end of the stripe */ 223562306a36Sopenharmony_ci get_raid56_logic_offset(physical_end, stripe_index, 223662306a36Sopenharmony_ci map, &logic_end, NULL); 223762306a36Sopenharmony_ci logic_end += chunk_logical; 223862306a36Sopenharmony_ci 223962306a36Sopenharmony_ci /* Initialize @offset in case we need to go to out: label */ 224062306a36Sopenharmony_ci get_raid56_logic_offset(physical, stripe_index, map, &offset, NULL); 224162306a36Sopenharmony_ci increment = btrfs_stripe_nr_to_offset(nr_data_stripes(map)); 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci /* 224462306a36Sopenharmony_ci * Due to the rotation, for RAID56 it's better to iterate each stripe 224562306a36Sopenharmony_ci * using their physical offset. 224662306a36Sopenharmony_ci */ 224762306a36Sopenharmony_ci while (physical < physical_end) { 224862306a36Sopenharmony_ci ret = get_raid56_logic_offset(physical, stripe_index, map, 224962306a36Sopenharmony_ci &logical, &stripe_logical); 225062306a36Sopenharmony_ci logical += chunk_logical; 225162306a36Sopenharmony_ci if (ret) { 225262306a36Sopenharmony_ci /* it is parity strip */ 225362306a36Sopenharmony_ci stripe_logical += chunk_logical; 225462306a36Sopenharmony_ci ret = scrub_raid56_parity_stripe(sctx, scrub_dev, bg, 225562306a36Sopenharmony_ci map, stripe_logical); 225662306a36Sopenharmony_ci if (ret) 225762306a36Sopenharmony_ci goto out; 225862306a36Sopenharmony_ci goto next; 225962306a36Sopenharmony_ci } 226062306a36Sopenharmony_ci 226162306a36Sopenharmony_ci /* 226262306a36Sopenharmony_ci * Now we're at a data stripe, scrub each extents in the range. 226362306a36Sopenharmony_ci * 226462306a36Sopenharmony_ci * At this stage, if we ignore the repair part, inside each data 226562306a36Sopenharmony_ci * stripe it is no different than SINGLE profile. 226662306a36Sopenharmony_ci * We can reuse scrub_simple_mirror() here, as the repair part 226762306a36Sopenharmony_ci * is still based on @mirror_num. 226862306a36Sopenharmony_ci */ 226962306a36Sopenharmony_ci ret = scrub_simple_mirror(sctx, bg, map, logical, BTRFS_STRIPE_LEN, 227062306a36Sopenharmony_ci scrub_dev, physical, 1); 227162306a36Sopenharmony_ci if (ret < 0) 227262306a36Sopenharmony_ci goto out; 227362306a36Sopenharmony_cinext: 227462306a36Sopenharmony_ci logical += increment; 227562306a36Sopenharmony_ci physical += BTRFS_STRIPE_LEN; 227662306a36Sopenharmony_ci spin_lock(&sctx->stat_lock); 227762306a36Sopenharmony_ci if (stop_loop) 227862306a36Sopenharmony_ci sctx->stat.last_physical = 227962306a36Sopenharmony_ci map->stripes[stripe_index].physical + dev_stripe_len; 228062306a36Sopenharmony_ci else 228162306a36Sopenharmony_ci sctx->stat.last_physical = physical; 228262306a36Sopenharmony_ci spin_unlock(&sctx->stat_lock); 228362306a36Sopenharmony_ci if (stop_loop) 228462306a36Sopenharmony_ci break; 228562306a36Sopenharmony_ci } 228662306a36Sopenharmony_ciout: 228762306a36Sopenharmony_ci ret2 = flush_scrub_stripes(sctx); 228862306a36Sopenharmony_ci if (!ret) 228962306a36Sopenharmony_ci ret = ret2; 229062306a36Sopenharmony_ci btrfs_release_path(&sctx->extent_path); 229162306a36Sopenharmony_ci btrfs_release_path(&sctx->csum_path); 229262306a36Sopenharmony_ci 229362306a36Sopenharmony_ci if (sctx->raid56_data_stripes) { 229462306a36Sopenharmony_ci for (int i = 0; i < nr_data_stripes(map); i++) 229562306a36Sopenharmony_ci release_scrub_stripe(&sctx->raid56_data_stripes[i]); 229662306a36Sopenharmony_ci kfree(sctx->raid56_data_stripes); 229762306a36Sopenharmony_ci sctx->raid56_data_stripes = NULL; 229862306a36Sopenharmony_ci } 229962306a36Sopenharmony_ci 230062306a36Sopenharmony_ci if (sctx->is_dev_replace && ret >= 0) { 230162306a36Sopenharmony_ci int ret2; 230262306a36Sopenharmony_ci 230362306a36Sopenharmony_ci ret2 = sync_write_pointer_for_zoned(sctx, 230462306a36Sopenharmony_ci chunk_logical + offset, 230562306a36Sopenharmony_ci map->stripes[stripe_index].physical, 230662306a36Sopenharmony_ci physical_end); 230762306a36Sopenharmony_ci if (ret2) 230862306a36Sopenharmony_ci ret = ret2; 230962306a36Sopenharmony_ci } 231062306a36Sopenharmony_ci 231162306a36Sopenharmony_ci return ret < 0 ? ret : 0; 231262306a36Sopenharmony_ci} 231362306a36Sopenharmony_ci 231462306a36Sopenharmony_cistatic noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, 231562306a36Sopenharmony_ci struct btrfs_block_group *bg, 231662306a36Sopenharmony_ci struct btrfs_device *scrub_dev, 231762306a36Sopenharmony_ci u64 dev_offset, 231862306a36Sopenharmony_ci u64 dev_extent_len) 231962306a36Sopenharmony_ci{ 232062306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 232162306a36Sopenharmony_ci struct extent_map_tree *map_tree = &fs_info->mapping_tree; 232262306a36Sopenharmony_ci struct map_lookup *map; 232362306a36Sopenharmony_ci struct extent_map *em; 232462306a36Sopenharmony_ci int i; 232562306a36Sopenharmony_ci int ret = 0; 232662306a36Sopenharmony_ci 232762306a36Sopenharmony_ci read_lock(&map_tree->lock); 232862306a36Sopenharmony_ci em = lookup_extent_mapping(map_tree, bg->start, bg->length); 232962306a36Sopenharmony_ci read_unlock(&map_tree->lock); 233062306a36Sopenharmony_ci 233162306a36Sopenharmony_ci if (!em) { 233262306a36Sopenharmony_ci /* 233362306a36Sopenharmony_ci * Might have been an unused block group deleted by the cleaner 233462306a36Sopenharmony_ci * kthread or relocation. 233562306a36Sopenharmony_ci */ 233662306a36Sopenharmony_ci spin_lock(&bg->lock); 233762306a36Sopenharmony_ci if (!test_bit(BLOCK_GROUP_FLAG_REMOVED, &bg->runtime_flags)) 233862306a36Sopenharmony_ci ret = -EINVAL; 233962306a36Sopenharmony_ci spin_unlock(&bg->lock); 234062306a36Sopenharmony_ci 234162306a36Sopenharmony_ci return ret; 234262306a36Sopenharmony_ci } 234362306a36Sopenharmony_ci if (em->start != bg->start) 234462306a36Sopenharmony_ci goto out; 234562306a36Sopenharmony_ci if (em->len < dev_extent_len) 234662306a36Sopenharmony_ci goto out; 234762306a36Sopenharmony_ci 234862306a36Sopenharmony_ci map = em->map_lookup; 234962306a36Sopenharmony_ci for (i = 0; i < map->num_stripes; ++i) { 235062306a36Sopenharmony_ci if (map->stripes[i].dev->bdev == scrub_dev->bdev && 235162306a36Sopenharmony_ci map->stripes[i].physical == dev_offset) { 235262306a36Sopenharmony_ci ret = scrub_stripe(sctx, bg, em, scrub_dev, i); 235362306a36Sopenharmony_ci if (ret) 235462306a36Sopenharmony_ci goto out; 235562306a36Sopenharmony_ci } 235662306a36Sopenharmony_ci } 235762306a36Sopenharmony_ciout: 235862306a36Sopenharmony_ci free_extent_map(em); 235962306a36Sopenharmony_ci 236062306a36Sopenharmony_ci return ret; 236162306a36Sopenharmony_ci} 236262306a36Sopenharmony_ci 236362306a36Sopenharmony_cistatic int finish_extent_writes_for_zoned(struct btrfs_root *root, 236462306a36Sopenharmony_ci struct btrfs_block_group *cache) 236562306a36Sopenharmony_ci{ 236662306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = cache->fs_info; 236762306a36Sopenharmony_ci struct btrfs_trans_handle *trans; 236862306a36Sopenharmony_ci 236962306a36Sopenharmony_ci if (!btrfs_is_zoned(fs_info)) 237062306a36Sopenharmony_ci return 0; 237162306a36Sopenharmony_ci 237262306a36Sopenharmony_ci btrfs_wait_block_group_reservations(cache); 237362306a36Sopenharmony_ci btrfs_wait_nocow_writers(cache); 237462306a36Sopenharmony_ci btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start, cache->length); 237562306a36Sopenharmony_ci 237662306a36Sopenharmony_ci trans = btrfs_join_transaction(root); 237762306a36Sopenharmony_ci if (IS_ERR(trans)) 237862306a36Sopenharmony_ci return PTR_ERR(trans); 237962306a36Sopenharmony_ci return btrfs_commit_transaction(trans); 238062306a36Sopenharmony_ci} 238162306a36Sopenharmony_ci 238262306a36Sopenharmony_cistatic noinline_for_stack 238362306a36Sopenharmony_ciint scrub_enumerate_chunks(struct scrub_ctx *sctx, 238462306a36Sopenharmony_ci struct btrfs_device *scrub_dev, u64 start, u64 end) 238562306a36Sopenharmony_ci{ 238662306a36Sopenharmony_ci struct btrfs_dev_extent *dev_extent = NULL; 238762306a36Sopenharmony_ci struct btrfs_path *path; 238862306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 238962306a36Sopenharmony_ci struct btrfs_root *root = fs_info->dev_root; 239062306a36Sopenharmony_ci u64 chunk_offset; 239162306a36Sopenharmony_ci int ret = 0; 239262306a36Sopenharmony_ci int ro_set; 239362306a36Sopenharmony_ci int slot; 239462306a36Sopenharmony_ci struct extent_buffer *l; 239562306a36Sopenharmony_ci struct btrfs_key key; 239662306a36Sopenharmony_ci struct btrfs_key found_key; 239762306a36Sopenharmony_ci struct btrfs_block_group *cache; 239862306a36Sopenharmony_ci struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 239962306a36Sopenharmony_ci 240062306a36Sopenharmony_ci path = btrfs_alloc_path(); 240162306a36Sopenharmony_ci if (!path) 240262306a36Sopenharmony_ci return -ENOMEM; 240362306a36Sopenharmony_ci 240462306a36Sopenharmony_ci path->reada = READA_FORWARD; 240562306a36Sopenharmony_ci path->search_commit_root = 1; 240662306a36Sopenharmony_ci path->skip_locking = 1; 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci key.objectid = scrub_dev->devid; 240962306a36Sopenharmony_ci key.offset = 0ull; 241062306a36Sopenharmony_ci key.type = BTRFS_DEV_EXTENT_KEY; 241162306a36Sopenharmony_ci 241262306a36Sopenharmony_ci while (1) { 241362306a36Sopenharmony_ci u64 dev_extent_len; 241462306a36Sopenharmony_ci 241562306a36Sopenharmony_ci ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 241662306a36Sopenharmony_ci if (ret < 0) 241762306a36Sopenharmony_ci break; 241862306a36Sopenharmony_ci if (ret > 0) { 241962306a36Sopenharmony_ci if (path->slots[0] >= 242062306a36Sopenharmony_ci btrfs_header_nritems(path->nodes[0])) { 242162306a36Sopenharmony_ci ret = btrfs_next_leaf(root, path); 242262306a36Sopenharmony_ci if (ret < 0) 242362306a36Sopenharmony_ci break; 242462306a36Sopenharmony_ci if (ret > 0) { 242562306a36Sopenharmony_ci ret = 0; 242662306a36Sopenharmony_ci break; 242762306a36Sopenharmony_ci } 242862306a36Sopenharmony_ci } else { 242962306a36Sopenharmony_ci ret = 0; 243062306a36Sopenharmony_ci } 243162306a36Sopenharmony_ci } 243262306a36Sopenharmony_ci 243362306a36Sopenharmony_ci l = path->nodes[0]; 243462306a36Sopenharmony_ci slot = path->slots[0]; 243562306a36Sopenharmony_ci 243662306a36Sopenharmony_ci btrfs_item_key_to_cpu(l, &found_key, slot); 243762306a36Sopenharmony_ci 243862306a36Sopenharmony_ci if (found_key.objectid != scrub_dev->devid) 243962306a36Sopenharmony_ci break; 244062306a36Sopenharmony_ci 244162306a36Sopenharmony_ci if (found_key.type != BTRFS_DEV_EXTENT_KEY) 244262306a36Sopenharmony_ci break; 244362306a36Sopenharmony_ci 244462306a36Sopenharmony_ci if (found_key.offset >= end) 244562306a36Sopenharmony_ci break; 244662306a36Sopenharmony_ci 244762306a36Sopenharmony_ci if (found_key.offset < key.offset) 244862306a36Sopenharmony_ci break; 244962306a36Sopenharmony_ci 245062306a36Sopenharmony_ci dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 245162306a36Sopenharmony_ci dev_extent_len = btrfs_dev_extent_length(l, dev_extent); 245262306a36Sopenharmony_ci 245362306a36Sopenharmony_ci if (found_key.offset + dev_extent_len <= start) 245462306a36Sopenharmony_ci goto skip; 245562306a36Sopenharmony_ci 245662306a36Sopenharmony_ci chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); 245762306a36Sopenharmony_ci 245862306a36Sopenharmony_ci /* 245962306a36Sopenharmony_ci * get a reference on the corresponding block group to prevent 246062306a36Sopenharmony_ci * the chunk from going away while we scrub it 246162306a36Sopenharmony_ci */ 246262306a36Sopenharmony_ci cache = btrfs_lookup_block_group(fs_info, chunk_offset); 246362306a36Sopenharmony_ci 246462306a36Sopenharmony_ci /* some chunks are removed but not committed to disk yet, 246562306a36Sopenharmony_ci * continue scrubbing */ 246662306a36Sopenharmony_ci if (!cache) 246762306a36Sopenharmony_ci goto skip; 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_ci ASSERT(cache->start <= chunk_offset); 247062306a36Sopenharmony_ci /* 247162306a36Sopenharmony_ci * We are using the commit root to search for device extents, so 247262306a36Sopenharmony_ci * that means we could have found a device extent item from a 247362306a36Sopenharmony_ci * block group that was deleted in the current transaction. The 247462306a36Sopenharmony_ci * logical start offset of the deleted block group, stored at 247562306a36Sopenharmony_ci * @chunk_offset, might be part of the logical address range of 247662306a36Sopenharmony_ci * a new block group (which uses different physical extents). 247762306a36Sopenharmony_ci * In this case btrfs_lookup_block_group() has returned the new 247862306a36Sopenharmony_ci * block group, and its start address is less than @chunk_offset. 247962306a36Sopenharmony_ci * 248062306a36Sopenharmony_ci * We skip such new block groups, because it's pointless to 248162306a36Sopenharmony_ci * process them, as we won't find their extents because we search 248262306a36Sopenharmony_ci * for them using the commit root of the extent tree. For a device 248362306a36Sopenharmony_ci * replace it's also fine to skip it, we won't miss copying them 248462306a36Sopenharmony_ci * to the target device because we have the write duplication 248562306a36Sopenharmony_ci * setup through the regular write path (by btrfs_map_block()), 248662306a36Sopenharmony_ci * and we have committed a transaction when we started the device 248762306a36Sopenharmony_ci * replace, right after setting up the device replace state. 248862306a36Sopenharmony_ci */ 248962306a36Sopenharmony_ci if (cache->start < chunk_offset) { 249062306a36Sopenharmony_ci btrfs_put_block_group(cache); 249162306a36Sopenharmony_ci goto skip; 249262306a36Sopenharmony_ci } 249362306a36Sopenharmony_ci 249462306a36Sopenharmony_ci if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) { 249562306a36Sopenharmony_ci if (!test_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags)) { 249662306a36Sopenharmony_ci btrfs_put_block_group(cache); 249762306a36Sopenharmony_ci goto skip; 249862306a36Sopenharmony_ci } 249962306a36Sopenharmony_ci } 250062306a36Sopenharmony_ci 250162306a36Sopenharmony_ci /* 250262306a36Sopenharmony_ci * Make sure that while we are scrubbing the corresponding block 250362306a36Sopenharmony_ci * group doesn't get its logical address and its device extents 250462306a36Sopenharmony_ci * reused for another block group, which can possibly be of a 250562306a36Sopenharmony_ci * different type and different profile. We do this to prevent 250662306a36Sopenharmony_ci * false error detections and crashes due to bogus attempts to 250762306a36Sopenharmony_ci * repair extents. 250862306a36Sopenharmony_ci */ 250962306a36Sopenharmony_ci spin_lock(&cache->lock); 251062306a36Sopenharmony_ci if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags)) { 251162306a36Sopenharmony_ci spin_unlock(&cache->lock); 251262306a36Sopenharmony_ci btrfs_put_block_group(cache); 251362306a36Sopenharmony_ci goto skip; 251462306a36Sopenharmony_ci } 251562306a36Sopenharmony_ci btrfs_freeze_block_group(cache); 251662306a36Sopenharmony_ci spin_unlock(&cache->lock); 251762306a36Sopenharmony_ci 251862306a36Sopenharmony_ci /* 251962306a36Sopenharmony_ci * we need call btrfs_inc_block_group_ro() with scrubs_paused, 252062306a36Sopenharmony_ci * to avoid deadlock caused by: 252162306a36Sopenharmony_ci * btrfs_inc_block_group_ro() 252262306a36Sopenharmony_ci * -> btrfs_wait_for_commit() 252362306a36Sopenharmony_ci * -> btrfs_commit_transaction() 252462306a36Sopenharmony_ci * -> btrfs_scrub_pause() 252562306a36Sopenharmony_ci */ 252662306a36Sopenharmony_ci scrub_pause_on(fs_info); 252762306a36Sopenharmony_ci 252862306a36Sopenharmony_ci /* 252962306a36Sopenharmony_ci * Don't do chunk preallocation for scrub. 253062306a36Sopenharmony_ci * 253162306a36Sopenharmony_ci * This is especially important for SYSTEM bgs, or we can hit 253262306a36Sopenharmony_ci * -EFBIG from btrfs_finish_chunk_alloc() like: 253362306a36Sopenharmony_ci * 1. The only SYSTEM bg is marked RO. 253462306a36Sopenharmony_ci * Since SYSTEM bg is small, that's pretty common. 253562306a36Sopenharmony_ci * 2. New SYSTEM bg will be allocated 253662306a36Sopenharmony_ci * Due to regular version will allocate new chunk. 253762306a36Sopenharmony_ci * 3. New SYSTEM bg is empty and will get cleaned up 253862306a36Sopenharmony_ci * Before cleanup really happens, it's marked RO again. 253962306a36Sopenharmony_ci * 4. Empty SYSTEM bg get scrubbed 254062306a36Sopenharmony_ci * We go back to 2. 254162306a36Sopenharmony_ci * 254262306a36Sopenharmony_ci * This can easily boost the amount of SYSTEM chunks if cleaner 254362306a36Sopenharmony_ci * thread can't be triggered fast enough, and use up all space 254462306a36Sopenharmony_ci * of btrfs_super_block::sys_chunk_array 254562306a36Sopenharmony_ci * 254662306a36Sopenharmony_ci * While for dev replace, we need to try our best to mark block 254762306a36Sopenharmony_ci * group RO, to prevent race between: 254862306a36Sopenharmony_ci * - Write duplication 254962306a36Sopenharmony_ci * Contains latest data 255062306a36Sopenharmony_ci * - Scrub copy 255162306a36Sopenharmony_ci * Contains data from commit tree 255262306a36Sopenharmony_ci * 255362306a36Sopenharmony_ci * If target block group is not marked RO, nocow writes can 255462306a36Sopenharmony_ci * be overwritten by scrub copy, causing data corruption. 255562306a36Sopenharmony_ci * So for dev-replace, it's not allowed to continue if a block 255662306a36Sopenharmony_ci * group is not RO. 255762306a36Sopenharmony_ci */ 255862306a36Sopenharmony_ci ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace); 255962306a36Sopenharmony_ci if (!ret && sctx->is_dev_replace) { 256062306a36Sopenharmony_ci ret = finish_extent_writes_for_zoned(root, cache); 256162306a36Sopenharmony_ci if (ret) { 256262306a36Sopenharmony_ci btrfs_dec_block_group_ro(cache); 256362306a36Sopenharmony_ci scrub_pause_off(fs_info); 256462306a36Sopenharmony_ci btrfs_put_block_group(cache); 256562306a36Sopenharmony_ci break; 256662306a36Sopenharmony_ci } 256762306a36Sopenharmony_ci } 256862306a36Sopenharmony_ci 256962306a36Sopenharmony_ci if (ret == 0) { 257062306a36Sopenharmony_ci ro_set = 1; 257162306a36Sopenharmony_ci } else if (ret == -ENOSPC && !sctx->is_dev_replace && 257262306a36Sopenharmony_ci !(cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) { 257362306a36Sopenharmony_ci /* 257462306a36Sopenharmony_ci * btrfs_inc_block_group_ro return -ENOSPC when it 257562306a36Sopenharmony_ci * failed in creating new chunk for metadata. 257662306a36Sopenharmony_ci * It is not a problem for scrub, because 257762306a36Sopenharmony_ci * metadata are always cowed, and our scrub paused 257862306a36Sopenharmony_ci * commit_transactions. 257962306a36Sopenharmony_ci * 258062306a36Sopenharmony_ci * For RAID56 chunks, we have to mark them read-only 258162306a36Sopenharmony_ci * for scrub, as later we would use our own cache 258262306a36Sopenharmony_ci * out of RAID56 realm. 258362306a36Sopenharmony_ci * Thus we want the RAID56 bg to be marked RO to 258462306a36Sopenharmony_ci * prevent RMW from screwing up out cache. 258562306a36Sopenharmony_ci */ 258662306a36Sopenharmony_ci ro_set = 0; 258762306a36Sopenharmony_ci } else if (ret == -ETXTBSY) { 258862306a36Sopenharmony_ci btrfs_warn(fs_info, 258962306a36Sopenharmony_ci "skipping scrub of block group %llu due to active swapfile", 259062306a36Sopenharmony_ci cache->start); 259162306a36Sopenharmony_ci scrub_pause_off(fs_info); 259262306a36Sopenharmony_ci ret = 0; 259362306a36Sopenharmony_ci goto skip_unfreeze; 259462306a36Sopenharmony_ci } else { 259562306a36Sopenharmony_ci btrfs_warn(fs_info, 259662306a36Sopenharmony_ci "failed setting block group ro: %d", ret); 259762306a36Sopenharmony_ci btrfs_unfreeze_block_group(cache); 259862306a36Sopenharmony_ci btrfs_put_block_group(cache); 259962306a36Sopenharmony_ci scrub_pause_off(fs_info); 260062306a36Sopenharmony_ci break; 260162306a36Sopenharmony_ci } 260262306a36Sopenharmony_ci 260362306a36Sopenharmony_ci /* 260462306a36Sopenharmony_ci * Now the target block is marked RO, wait for nocow writes to 260562306a36Sopenharmony_ci * finish before dev-replace. 260662306a36Sopenharmony_ci * COW is fine, as COW never overwrites extents in commit tree. 260762306a36Sopenharmony_ci */ 260862306a36Sopenharmony_ci if (sctx->is_dev_replace) { 260962306a36Sopenharmony_ci btrfs_wait_nocow_writers(cache); 261062306a36Sopenharmony_ci btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start, 261162306a36Sopenharmony_ci cache->length); 261262306a36Sopenharmony_ci } 261362306a36Sopenharmony_ci 261462306a36Sopenharmony_ci scrub_pause_off(fs_info); 261562306a36Sopenharmony_ci down_write(&dev_replace->rwsem); 261662306a36Sopenharmony_ci dev_replace->cursor_right = found_key.offset + dev_extent_len; 261762306a36Sopenharmony_ci dev_replace->cursor_left = found_key.offset; 261862306a36Sopenharmony_ci dev_replace->item_needs_writeback = 1; 261962306a36Sopenharmony_ci up_write(&dev_replace->rwsem); 262062306a36Sopenharmony_ci 262162306a36Sopenharmony_ci ret = scrub_chunk(sctx, cache, scrub_dev, found_key.offset, 262262306a36Sopenharmony_ci dev_extent_len); 262362306a36Sopenharmony_ci if (sctx->is_dev_replace && 262462306a36Sopenharmony_ci !btrfs_finish_block_group_to_copy(dev_replace->srcdev, 262562306a36Sopenharmony_ci cache, found_key.offset)) 262662306a36Sopenharmony_ci ro_set = 0; 262762306a36Sopenharmony_ci 262862306a36Sopenharmony_ci down_write(&dev_replace->rwsem); 262962306a36Sopenharmony_ci dev_replace->cursor_left = dev_replace->cursor_right; 263062306a36Sopenharmony_ci dev_replace->item_needs_writeback = 1; 263162306a36Sopenharmony_ci up_write(&dev_replace->rwsem); 263262306a36Sopenharmony_ci 263362306a36Sopenharmony_ci if (ro_set) 263462306a36Sopenharmony_ci btrfs_dec_block_group_ro(cache); 263562306a36Sopenharmony_ci 263662306a36Sopenharmony_ci /* 263762306a36Sopenharmony_ci * We might have prevented the cleaner kthread from deleting 263862306a36Sopenharmony_ci * this block group if it was already unused because we raced 263962306a36Sopenharmony_ci * and set it to RO mode first. So add it back to the unused 264062306a36Sopenharmony_ci * list, otherwise it might not ever be deleted unless a manual 264162306a36Sopenharmony_ci * balance is triggered or it becomes used and unused again. 264262306a36Sopenharmony_ci */ 264362306a36Sopenharmony_ci spin_lock(&cache->lock); 264462306a36Sopenharmony_ci if (!test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags) && 264562306a36Sopenharmony_ci !cache->ro && cache->reserved == 0 && cache->used == 0) { 264662306a36Sopenharmony_ci spin_unlock(&cache->lock); 264762306a36Sopenharmony_ci if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) 264862306a36Sopenharmony_ci btrfs_discard_queue_work(&fs_info->discard_ctl, 264962306a36Sopenharmony_ci cache); 265062306a36Sopenharmony_ci else 265162306a36Sopenharmony_ci btrfs_mark_bg_unused(cache); 265262306a36Sopenharmony_ci } else { 265362306a36Sopenharmony_ci spin_unlock(&cache->lock); 265462306a36Sopenharmony_ci } 265562306a36Sopenharmony_ciskip_unfreeze: 265662306a36Sopenharmony_ci btrfs_unfreeze_block_group(cache); 265762306a36Sopenharmony_ci btrfs_put_block_group(cache); 265862306a36Sopenharmony_ci if (ret) 265962306a36Sopenharmony_ci break; 266062306a36Sopenharmony_ci if (sctx->is_dev_replace && 266162306a36Sopenharmony_ci atomic64_read(&dev_replace->num_write_errors) > 0) { 266262306a36Sopenharmony_ci ret = -EIO; 266362306a36Sopenharmony_ci break; 266462306a36Sopenharmony_ci } 266562306a36Sopenharmony_ci if (sctx->stat.malloc_errors > 0) { 266662306a36Sopenharmony_ci ret = -ENOMEM; 266762306a36Sopenharmony_ci break; 266862306a36Sopenharmony_ci } 266962306a36Sopenharmony_ciskip: 267062306a36Sopenharmony_ci key.offset = found_key.offset + dev_extent_len; 267162306a36Sopenharmony_ci btrfs_release_path(path); 267262306a36Sopenharmony_ci } 267362306a36Sopenharmony_ci 267462306a36Sopenharmony_ci btrfs_free_path(path); 267562306a36Sopenharmony_ci 267662306a36Sopenharmony_ci return ret; 267762306a36Sopenharmony_ci} 267862306a36Sopenharmony_ci 267962306a36Sopenharmony_cistatic int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev, 268062306a36Sopenharmony_ci struct page *page, u64 physical, u64 generation) 268162306a36Sopenharmony_ci{ 268262306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 268362306a36Sopenharmony_ci struct bio_vec bvec; 268462306a36Sopenharmony_ci struct bio bio; 268562306a36Sopenharmony_ci struct btrfs_super_block *sb = page_address(page); 268662306a36Sopenharmony_ci int ret; 268762306a36Sopenharmony_ci 268862306a36Sopenharmony_ci bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_READ); 268962306a36Sopenharmony_ci bio.bi_iter.bi_sector = physical >> SECTOR_SHIFT; 269062306a36Sopenharmony_ci __bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE, 0); 269162306a36Sopenharmony_ci ret = submit_bio_wait(&bio); 269262306a36Sopenharmony_ci bio_uninit(&bio); 269362306a36Sopenharmony_ci 269462306a36Sopenharmony_ci if (ret < 0) 269562306a36Sopenharmony_ci return ret; 269662306a36Sopenharmony_ci ret = btrfs_check_super_csum(fs_info, sb); 269762306a36Sopenharmony_ci if (ret != 0) { 269862306a36Sopenharmony_ci btrfs_err_rl(fs_info, 269962306a36Sopenharmony_ci "super block at physical %llu devid %llu has bad csum", 270062306a36Sopenharmony_ci physical, dev->devid); 270162306a36Sopenharmony_ci return -EIO; 270262306a36Sopenharmony_ci } 270362306a36Sopenharmony_ci if (btrfs_super_generation(sb) != generation) { 270462306a36Sopenharmony_ci btrfs_err_rl(fs_info, 270562306a36Sopenharmony_ci"super block at physical %llu devid %llu has bad generation %llu expect %llu", 270662306a36Sopenharmony_ci physical, dev->devid, 270762306a36Sopenharmony_ci btrfs_super_generation(sb), generation); 270862306a36Sopenharmony_ci return -EUCLEAN; 270962306a36Sopenharmony_ci } 271062306a36Sopenharmony_ci 271162306a36Sopenharmony_ci return btrfs_validate_super(fs_info, sb, -1); 271262306a36Sopenharmony_ci} 271362306a36Sopenharmony_ci 271462306a36Sopenharmony_cistatic noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, 271562306a36Sopenharmony_ci struct btrfs_device *scrub_dev) 271662306a36Sopenharmony_ci{ 271762306a36Sopenharmony_ci int i; 271862306a36Sopenharmony_ci u64 bytenr; 271962306a36Sopenharmony_ci u64 gen; 272062306a36Sopenharmony_ci int ret = 0; 272162306a36Sopenharmony_ci struct page *page; 272262306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = sctx->fs_info; 272362306a36Sopenharmony_ci 272462306a36Sopenharmony_ci if (BTRFS_FS_ERROR(fs_info)) 272562306a36Sopenharmony_ci return -EROFS; 272662306a36Sopenharmony_ci 272762306a36Sopenharmony_ci page = alloc_page(GFP_KERNEL); 272862306a36Sopenharmony_ci if (!page) { 272962306a36Sopenharmony_ci spin_lock(&sctx->stat_lock); 273062306a36Sopenharmony_ci sctx->stat.malloc_errors++; 273162306a36Sopenharmony_ci spin_unlock(&sctx->stat_lock); 273262306a36Sopenharmony_ci return -ENOMEM; 273362306a36Sopenharmony_ci } 273462306a36Sopenharmony_ci 273562306a36Sopenharmony_ci /* Seed devices of a new filesystem has their own generation. */ 273662306a36Sopenharmony_ci if (scrub_dev->fs_devices != fs_info->fs_devices) 273762306a36Sopenharmony_ci gen = scrub_dev->generation; 273862306a36Sopenharmony_ci else 273962306a36Sopenharmony_ci gen = fs_info->last_trans_committed; 274062306a36Sopenharmony_ci 274162306a36Sopenharmony_ci for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 274262306a36Sopenharmony_ci bytenr = btrfs_sb_offset(i); 274362306a36Sopenharmony_ci if (bytenr + BTRFS_SUPER_INFO_SIZE > 274462306a36Sopenharmony_ci scrub_dev->commit_total_bytes) 274562306a36Sopenharmony_ci break; 274662306a36Sopenharmony_ci if (!btrfs_check_super_location(scrub_dev, bytenr)) 274762306a36Sopenharmony_ci continue; 274862306a36Sopenharmony_ci 274962306a36Sopenharmony_ci ret = scrub_one_super(sctx, scrub_dev, page, bytenr, gen); 275062306a36Sopenharmony_ci if (ret) { 275162306a36Sopenharmony_ci spin_lock(&sctx->stat_lock); 275262306a36Sopenharmony_ci sctx->stat.super_errors++; 275362306a36Sopenharmony_ci spin_unlock(&sctx->stat_lock); 275462306a36Sopenharmony_ci } 275562306a36Sopenharmony_ci } 275662306a36Sopenharmony_ci __free_page(page); 275762306a36Sopenharmony_ci return 0; 275862306a36Sopenharmony_ci} 275962306a36Sopenharmony_ci 276062306a36Sopenharmony_cistatic void scrub_workers_put(struct btrfs_fs_info *fs_info) 276162306a36Sopenharmony_ci{ 276262306a36Sopenharmony_ci if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt, 276362306a36Sopenharmony_ci &fs_info->scrub_lock)) { 276462306a36Sopenharmony_ci struct workqueue_struct *scrub_workers = fs_info->scrub_workers; 276562306a36Sopenharmony_ci 276662306a36Sopenharmony_ci fs_info->scrub_workers = NULL; 276762306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 276862306a36Sopenharmony_ci 276962306a36Sopenharmony_ci if (scrub_workers) 277062306a36Sopenharmony_ci destroy_workqueue(scrub_workers); 277162306a36Sopenharmony_ci } 277262306a36Sopenharmony_ci} 277362306a36Sopenharmony_ci 277462306a36Sopenharmony_ci/* 277562306a36Sopenharmony_ci * get a reference count on fs_info->scrub_workers. start worker if necessary 277662306a36Sopenharmony_ci */ 277762306a36Sopenharmony_cistatic noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info) 277862306a36Sopenharmony_ci{ 277962306a36Sopenharmony_ci struct workqueue_struct *scrub_workers = NULL; 278062306a36Sopenharmony_ci unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; 278162306a36Sopenharmony_ci int max_active = fs_info->thread_pool_size; 278262306a36Sopenharmony_ci int ret = -ENOMEM; 278362306a36Sopenharmony_ci 278462306a36Sopenharmony_ci if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt)) 278562306a36Sopenharmony_ci return 0; 278662306a36Sopenharmony_ci 278762306a36Sopenharmony_ci scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active); 278862306a36Sopenharmony_ci if (!scrub_workers) 278962306a36Sopenharmony_ci return -ENOMEM; 279062306a36Sopenharmony_ci 279162306a36Sopenharmony_ci mutex_lock(&fs_info->scrub_lock); 279262306a36Sopenharmony_ci if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) { 279362306a36Sopenharmony_ci ASSERT(fs_info->scrub_workers == NULL); 279462306a36Sopenharmony_ci fs_info->scrub_workers = scrub_workers; 279562306a36Sopenharmony_ci refcount_set(&fs_info->scrub_workers_refcnt, 1); 279662306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 279762306a36Sopenharmony_ci return 0; 279862306a36Sopenharmony_ci } 279962306a36Sopenharmony_ci /* Other thread raced in and created the workers for us */ 280062306a36Sopenharmony_ci refcount_inc(&fs_info->scrub_workers_refcnt); 280162306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 280262306a36Sopenharmony_ci 280362306a36Sopenharmony_ci ret = 0; 280462306a36Sopenharmony_ci 280562306a36Sopenharmony_ci destroy_workqueue(scrub_workers); 280662306a36Sopenharmony_ci return ret; 280762306a36Sopenharmony_ci} 280862306a36Sopenharmony_ci 280962306a36Sopenharmony_ciint btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, 281062306a36Sopenharmony_ci u64 end, struct btrfs_scrub_progress *progress, 281162306a36Sopenharmony_ci int readonly, int is_dev_replace) 281262306a36Sopenharmony_ci{ 281362306a36Sopenharmony_ci struct btrfs_dev_lookup_args args = { .devid = devid }; 281462306a36Sopenharmony_ci struct scrub_ctx *sctx; 281562306a36Sopenharmony_ci int ret; 281662306a36Sopenharmony_ci struct btrfs_device *dev; 281762306a36Sopenharmony_ci unsigned int nofs_flag; 281862306a36Sopenharmony_ci bool need_commit = false; 281962306a36Sopenharmony_ci 282062306a36Sopenharmony_ci if (btrfs_fs_closing(fs_info)) 282162306a36Sopenharmony_ci return -EAGAIN; 282262306a36Sopenharmony_ci 282362306a36Sopenharmony_ci /* At mount time we have ensured nodesize is in the range of [4K, 64K]. */ 282462306a36Sopenharmony_ci ASSERT(fs_info->nodesize <= BTRFS_STRIPE_LEN); 282562306a36Sopenharmony_ci 282662306a36Sopenharmony_ci /* 282762306a36Sopenharmony_ci * SCRUB_MAX_SECTORS_PER_BLOCK is calculated using the largest possible 282862306a36Sopenharmony_ci * value (max nodesize / min sectorsize), thus nodesize should always 282962306a36Sopenharmony_ci * be fine. 283062306a36Sopenharmony_ci */ 283162306a36Sopenharmony_ci ASSERT(fs_info->nodesize <= 283262306a36Sopenharmony_ci SCRUB_MAX_SECTORS_PER_BLOCK << fs_info->sectorsize_bits); 283362306a36Sopenharmony_ci 283462306a36Sopenharmony_ci /* Allocate outside of device_list_mutex */ 283562306a36Sopenharmony_ci sctx = scrub_setup_ctx(fs_info, is_dev_replace); 283662306a36Sopenharmony_ci if (IS_ERR(sctx)) 283762306a36Sopenharmony_ci return PTR_ERR(sctx); 283862306a36Sopenharmony_ci 283962306a36Sopenharmony_ci ret = scrub_workers_get(fs_info); 284062306a36Sopenharmony_ci if (ret) 284162306a36Sopenharmony_ci goto out_free_ctx; 284262306a36Sopenharmony_ci 284362306a36Sopenharmony_ci mutex_lock(&fs_info->fs_devices->device_list_mutex); 284462306a36Sopenharmony_ci dev = btrfs_find_device(fs_info->fs_devices, &args); 284562306a36Sopenharmony_ci if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) && 284662306a36Sopenharmony_ci !is_dev_replace)) { 284762306a36Sopenharmony_ci mutex_unlock(&fs_info->fs_devices->device_list_mutex); 284862306a36Sopenharmony_ci ret = -ENODEV; 284962306a36Sopenharmony_ci goto out; 285062306a36Sopenharmony_ci } 285162306a36Sopenharmony_ci 285262306a36Sopenharmony_ci if (!is_dev_replace && !readonly && 285362306a36Sopenharmony_ci !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { 285462306a36Sopenharmony_ci mutex_unlock(&fs_info->fs_devices->device_list_mutex); 285562306a36Sopenharmony_ci btrfs_err_in_rcu(fs_info, 285662306a36Sopenharmony_ci "scrub on devid %llu: filesystem on %s is not writable", 285762306a36Sopenharmony_ci devid, btrfs_dev_name(dev)); 285862306a36Sopenharmony_ci ret = -EROFS; 285962306a36Sopenharmony_ci goto out; 286062306a36Sopenharmony_ci } 286162306a36Sopenharmony_ci 286262306a36Sopenharmony_ci mutex_lock(&fs_info->scrub_lock); 286362306a36Sopenharmony_ci if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || 286462306a36Sopenharmony_ci test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) { 286562306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 286662306a36Sopenharmony_ci mutex_unlock(&fs_info->fs_devices->device_list_mutex); 286762306a36Sopenharmony_ci ret = -EIO; 286862306a36Sopenharmony_ci goto out; 286962306a36Sopenharmony_ci } 287062306a36Sopenharmony_ci 287162306a36Sopenharmony_ci down_read(&fs_info->dev_replace.rwsem); 287262306a36Sopenharmony_ci if (dev->scrub_ctx || 287362306a36Sopenharmony_ci (!is_dev_replace && 287462306a36Sopenharmony_ci btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { 287562306a36Sopenharmony_ci up_read(&fs_info->dev_replace.rwsem); 287662306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 287762306a36Sopenharmony_ci mutex_unlock(&fs_info->fs_devices->device_list_mutex); 287862306a36Sopenharmony_ci ret = -EINPROGRESS; 287962306a36Sopenharmony_ci goto out; 288062306a36Sopenharmony_ci } 288162306a36Sopenharmony_ci up_read(&fs_info->dev_replace.rwsem); 288262306a36Sopenharmony_ci 288362306a36Sopenharmony_ci sctx->readonly = readonly; 288462306a36Sopenharmony_ci dev->scrub_ctx = sctx; 288562306a36Sopenharmony_ci mutex_unlock(&fs_info->fs_devices->device_list_mutex); 288662306a36Sopenharmony_ci 288762306a36Sopenharmony_ci /* 288862306a36Sopenharmony_ci * checking @scrub_pause_req here, we can avoid 288962306a36Sopenharmony_ci * race between committing transaction and scrubbing. 289062306a36Sopenharmony_ci */ 289162306a36Sopenharmony_ci __scrub_blocked_if_needed(fs_info); 289262306a36Sopenharmony_ci atomic_inc(&fs_info->scrubs_running); 289362306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 289462306a36Sopenharmony_ci 289562306a36Sopenharmony_ci /* 289662306a36Sopenharmony_ci * In order to avoid deadlock with reclaim when there is a transaction 289762306a36Sopenharmony_ci * trying to pause scrub, make sure we use GFP_NOFS for all the 289862306a36Sopenharmony_ci * allocations done at btrfs_scrub_sectors() and scrub_sectors_for_parity() 289962306a36Sopenharmony_ci * invoked by our callees. The pausing request is done when the 290062306a36Sopenharmony_ci * transaction commit starts, and it blocks the transaction until scrub 290162306a36Sopenharmony_ci * is paused (done at specific points at scrub_stripe() or right above 290262306a36Sopenharmony_ci * before incrementing fs_info->scrubs_running). 290362306a36Sopenharmony_ci */ 290462306a36Sopenharmony_ci nofs_flag = memalloc_nofs_save(); 290562306a36Sopenharmony_ci if (!is_dev_replace) { 290662306a36Sopenharmony_ci u64 old_super_errors; 290762306a36Sopenharmony_ci 290862306a36Sopenharmony_ci spin_lock(&sctx->stat_lock); 290962306a36Sopenharmony_ci old_super_errors = sctx->stat.super_errors; 291062306a36Sopenharmony_ci spin_unlock(&sctx->stat_lock); 291162306a36Sopenharmony_ci 291262306a36Sopenharmony_ci btrfs_info(fs_info, "scrub: started on devid %llu", devid); 291362306a36Sopenharmony_ci /* 291462306a36Sopenharmony_ci * by holding device list mutex, we can 291562306a36Sopenharmony_ci * kick off writing super in log tree sync. 291662306a36Sopenharmony_ci */ 291762306a36Sopenharmony_ci mutex_lock(&fs_info->fs_devices->device_list_mutex); 291862306a36Sopenharmony_ci ret = scrub_supers(sctx, dev); 291962306a36Sopenharmony_ci mutex_unlock(&fs_info->fs_devices->device_list_mutex); 292062306a36Sopenharmony_ci 292162306a36Sopenharmony_ci spin_lock(&sctx->stat_lock); 292262306a36Sopenharmony_ci /* 292362306a36Sopenharmony_ci * Super block errors found, but we can not commit transaction 292462306a36Sopenharmony_ci * at current context, since btrfs_commit_transaction() needs 292562306a36Sopenharmony_ci * to pause the current running scrub (hold by ourselves). 292662306a36Sopenharmony_ci */ 292762306a36Sopenharmony_ci if (sctx->stat.super_errors > old_super_errors && !sctx->readonly) 292862306a36Sopenharmony_ci need_commit = true; 292962306a36Sopenharmony_ci spin_unlock(&sctx->stat_lock); 293062306a36Sopenharmony_ci } 293162306a36Sopenharmony_ci 293262306a36Sopenharmony_ci if (!ret) 293362306a36Sopenharmony_ci ret = scrub_enumerate_chunks(sctx, dev, start, end); 293462306a36Sopenharmony_ci memalloc_nofs_restore(nofs_flag); 293562306a36Sopenharmony_ci 293662306a36Sopenharmony_ci atomic_dec(&fs_info->scrubs_running); 293762306a36Sopenharmony_ci wake_up(&fs_info->scrub_pause_wait); 293862306a36Sopenharmony_ci 293962306a36Sopenharmony_ci if (progress) 294062306a36Sopenharmony_ci memcpy(progress, &sctx->stat, sizeof(*progress)); 294162306a36Sopenharmony_ci 294262306a36Sopenharmony_ci if (!is_dev_replace) 294362306a36Sopenharmony_ci btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d", 294462306a36Sopenharmony_ci ret ? "not finished" : "finished", devid, ret); 294562306a36Sopenharmony_ci 294662306a36Sopenharmony_ci mutex_lock(&fs_info->scrub_lock); 294762306a36Sopenharmony_ci dev->scrub_ctx = NULL; 294862306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 294962306a36Sopenharmony_ci 295062306a36Sopenharmony_ci scrub_workers_put(fs_info); 295162306a36Sopenharmony_ci scrub_put_ctx(sctx); 295262306a36Sopenharmony_ci 295362306a36Sopenharmony_ci /* 295462306a36Sopenharmony_ci * We found some super block errors before, now try to force a 295562306a36Sopenharmony_ci * transaction commit, as scrub has finished. 295662306a36Sopenharmony_ci */ 295762306a36Sopenharmony_ci if (need_commit) { 295862306a36Sopenharmony_ci struct btrfs_trans_handle *trans; 295962306a36Sopenharmony_ci 296062306a36Sopenharmony_ci trans = btrfs_start_transaction(fs_info->tree_root, 0); 296162306a36Sopenharmony_ci if (IS_ERR(trans)) { 296262306a36Sopenharmony_ci ret = PTR_ERR(trans); 296362306a36Sopenharmony_ci btrfs_err(fs_info, 296462306a36Sopenharmony_ci "scrub: failed to start transaction to fix super block errors: %d", ret); 296562306a36Sopenharmony_ci return ret; 296662306a36Sopenharmony_ci } 296762306a36Sopenharmony_ci ret = btrfs_commit_transaction(trans); 296862306a36Sopenharmony_ci if (ret < 0) 296962306a36Sopenharmony_ci btrfs_err(fs_info, 297062306a36Sopenharmony_ci "scrub: failed to commit transaction to fix super block errors: %d", ret); 297162306a36Sopenharmony_ci } 297262306a36Sopenharmony_ci return ret; 297362306a36Sopenharmony_ciout: 297462306a36Sopenharmony_ci scrub_workers_put(fs_info); 297562306a36Sopenharmony_ciout_free_ctx: 297662306a36Sopenharmony_ci scrub_free_ctx(sctx); 297762306a36Sopenharmony_ci 297862306a36Sopenharmony_ci return ret; 297962306a36Sopenharmony_ci} 298062306a36Sopenharmony_ci 298162306a36Sopenharmony_civoid btrfs_scrub_pause(struct btrfs_fs_info *fs_info) 298262306a36Sopenharmony_ci{ 298362306a36Sopenharmony_ci mutex_lock(&fs_info->scrub_lock); 298462306a36Sopenharmony_ci atomic_inc(&fs_info->scrub_pause_req); 298562306a36Sopenharmony_ci while (atomic_read(&fs_info->scrubs_paused) != 298662306a36Sopenharmony_ci atomic_read(&fs_info->scrubs_running)) { 298762306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 298862306a36Sopenharmony_ci wait_event(fs_info->scrub_pause_wait, 298962306a36Sopenharmony_ci atomic_read(&fs_info->scrubs_paused) == 299062306a36Sopenharmony_ci atomic_read(&fs_info->scrubs_running)); 299162306a36Sopenharmony_ci mutex_lock(&fs_info->scrub_lock); 299262306a36Sopenharmony_ci } 299362306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 299462306a36Sopenharmony_ci} 299562306a36Sopenharmony_ci 299662306a36Sopenharmony_civoid btrfs_scrub_continue(struct btrfs_fs_info *fs_info) 299762306a36Sopenharmony_ci{ 299862306a36Sopenharmony_ci atomic_dec(&fs_info->scrub_pause_req); 299962306a36Sopenharmony_ci wake_up(&fs_info->scrub_pause_wait); 300062306a36Sopenharmony_ci} 300162306a36Sopenharmony_ci 300262306a36Sopenharmony_ciint btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) 300362306a36Sopenharmony_ci{ 300462306a36Sopenharmony_ci mutex_lock(&fs_info->scrub_lock); 300562306a36Sopenharmony_ci if (!atomic_read(&fs_info->scrubs_running)) { 300662306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 300762306a36Sopenharmony_ci return -ENOTCONN; 300862306a36Sopenharmony_ci } 300962306a36Sopenharmony_ci 301062306a36Sopenharmony_ci atomic_inc(&fs_info->scrub_cancel_req); 301162306a36Sopenharmony_ci while (atomic_read(&fs_info->scrubs_running)) { 301262306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 301362306a36Sopenharmony_ci wait_event(fs_info->scrub_pause_wait, 301462306a36Sopenharmony_ci atomic_read(&fs_info->scrubs_running) == 0); 301562306a36Sopenharmony_ci mutex_lock(&fs_info->scrub_lock); 301662306a36Sopenharmony_ci } 301762306a36Sopenharmony_ci atomic_dec(&fs_info->scrub_cancel_req); 301862306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 301962306a36Sopenharmony_ci 302062306a36Sopenharmony_ci return 0; 302162306a36Sopenharmony_ci} 302262306a36Sopenharmony_ci 302362306a36Sopenharmony_ciint btrfs_scrub_cancel_dev(struct btrfs_device *dev) 302462306a36Sopenharmony_ci{ 302562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = dev->fs_info; 302662306a36Sopenharmony_ci struct scrub_ctx *sctx; 302762306a36Sopenharmony_ci 302862306a36Sopenharmony_ci mutex_lock(&fs_info->scrub_lock); 302962306a36Sopenharmony_ci sctx = dev->scrub_ctx; 303062306a36Sopenharmony_ci if (!sctx) { 303162306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 303262306a36Sopenharmony_ci return -ENOTCONN; 303362306a36Sopenharmony_ci } 303462306a36Sopenharmony_ci atomic_inc(&sctx->cancel_req); 303562306a36Sopenharmony_ci while (dev->scrub_ctx) { 303662306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 303762306a36Sopenharmony_ci wait_event(fs_info->scrub_pause_wait, 303862306a36Sopenharmony_ci dev->scrub_ctx == NULL); 303962306a36Sopenharmony_ci mutex_lock(&fs_info->scrub_lock); 304062306a36Sopenharmony_ci } 304162306a36Sopenharmony_ci mutex_unlock(&fs_info->scrub_lock); 304262306a36Sopenharmony_ci 304362306a36Sopenharmony_ci return 0; 304462306a36Sopenharmony_ci} 304562306a36Sopenharmony_ci 304662306a36Sopenharmony_ciint btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, 304762306a36Sopenharmony_ci struct btrfs_scrub_progress *progress) 304862306a36Sopenharmony_ci{ 304962306a36Sopenharmony_ci struct btrfs_dev_lookup_args args = { .devid = devid }; 305062306a36Sopenharmony_ci struct btrfs_device *dev; 305162306a36Sopenharmony_ci struct scrub_ctx *sctx = NULL; 305262306a36Sopenharmony_ci 305362306a36Sopenharmony_ci mutex_lock(&fs_info->fs_devices->device_list_mutex); 305462306a36Sopenharmony_ci dev = btrfs_find_device(fs_info->fs_devices, &args); 305562306a36Sopenharmony_ci if (dev) 305662306a36Sopenharmony_ci sctx = dev->scrub_ctx; 305762306a36Sopenharmony_ci if (sctx) 305862306a36Sopenharmony_ci memcpy(progress, &sctx->stat, sizeof(*progress)); 305962306a36Sopenharmony_ci mutex_unlock(&fs_info->fs_devices->device_list_mutex); 306062306a36Sopenharmony_ci 306162306a36Sopenharmony_ci return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV; 306262306a36Sopenharmony_ci} 3063