Lines Matching refs:conf
22 * conf->seq_write is the number of the last batch successfully written.
23 * conf->seq_flush is the number of the last batch that was closed to
70 static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
72 int hash = (sect >> RAID5_STRIPE_SHIFT(conf)) & HASH_MASK;
73 return &conf->stripe_hashtbl[hash];
76 static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect)
78 return (sect >> RAID5_STRIPE_SHIFT(conf)) & STRIPE_HASH_LOCKS_MASK;
81 static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
83 spin_lock_irq(conf->hash_locks + hash);
84 spin_lock(&conf->device_lock);
87 static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
89 spin_unlock(&conf->device_lock);
90 spin_unlock_irq(conf->hash_locks + hash);
93 static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
96 spin_lock_irq(conf->hash_locks);
98 spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
99 spin_lock(&conf->device_lock);
102 static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
105 spin_unlock(&conf->device_lock);
107 spin_unlock(conf->hash_locks + i);
108 spin_unlock_irq(conf->hash_locks);
150 static void print_raid5_conf (struct r5conf *conf);
168 struct r5conf *conf = sh->raid_conf;
180 group = conf->worker_groups + cpu_to_group(cpu);
189 if (conf->worker_cnt_per_group == 0) {
190 md_wakeup_thread(conf->mddev->thread);
194 group = conf->worker_groups + cpu_to_group(sh->cpu);
202 for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) {
212 static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
219 BUG_ON(atomic_read(&conf->active_stripes)==0);
221 if (r5c_is_writeback(conf->log))
233 (conf->quiesce && r5c_is_writeback(conf->log) &&
243 list_add_tail(&sh->lru, &conf->delayed_list);
245 sh->bm_seq - conf->seq_write > 0)
246 list_add_tail(&sh->lru, &conf->bitmap_list);
250 if (conf->worker_cnt_per_group == 0) {
253 &conf->loprio_list);
256 &conf->handle_list);
262 md_wakeup_thread(conf->mddev->thread);
266 if (atomic_dec_return(&conf->preread_active_stripes)
268 md_wakeup_thread(conf->mddev->thread);
269 atomic_dec(&conf->active_stripes);
271 if (!r5c_is_writeback(conf->log))
277 else if (injournal == conf->raid_disks - conf->max_degraded) {
280 atomic_inc(&conf->r5c_cached_full_stripes);
282 atomic_dec(&conf->r5c_cached_partial_stripes);
283 list_add_tail(&sh->lru, &conf->r5c_full_stripe_list);
284 r5c_check_cached_full_stripe(conf);
291 list_add_tail(&sh->lru, &conf->r5c_partial_stripe_list);
297 static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
301 do_release_stripe(conf, sh, temp_inactive_list);
311 static void release_inactive_stripe_list(struct r5conf *conf,
332 spin_lock_irqsave(conf->hash_locks + hash, flags);
333 if (list_empty(conf->inactive_list + hash) &&
335 atomic_dec(&conf->empty_inactive_list_nr);
336 list_splice_tail_init(list, conf->inactive_list + hash);
338 spin_unlock_irqrestore(conf->hash_locks + hash, flags);
345 wake_up(&conf->wait_for_stripe);
346 if (atomic_read(&conf->active_stripes) == 0)
347 wake_up(&conf->wait_for_quiescent);
348 if (conf->retry_read_aligned)
349 md_wakeup_thread(conf->mddev->thread);
353 /* should hold conf->device_lock already */
354 static int release_stripe_list(struct r5conf *conf,
361 head = llist_del_all(&conf->released_stripes);
375 __release_stripe(conf, sh, &temp_inactive_list[hash]);
384 struct r5conf *conf = sh->raid_conf;
395 if (unlikely(!conf->mddev->thread) ||
398 wakeup = llist_add(&sh->release_list, &conf->released_stripes);
400 md_wakeup_thread(conf->mddev->thread);
404 if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) {
407 do_release_stripe(conf, sh, &list);
408 spin_unlock_irqrestore(&conf->device_lock, flags);
409 release_inactive_stripe_list(conf, &list, hash);
421 static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh)
423 struct hlist_head *hp = stripe_hash(conf, sh->sector);
432 static struct stripe_head *get_free_stripe(struct r5conf *conf, int hash)
437 if (list_empty(conf->inactive_list + hash))
439 first = (conf->inactive_list + hash)->next;
443 atomic_inc(&conf->active_stripes);
445 if (list_empty(conf->inactive_list + hash))
446 atomic_inc(&conf->empty_inactive_list_nr);
490 init_stripe_shared_pages(struct stripe_head *sh, struct r5conf *conf, int disks)
497 /* Each of the sh->dev[i] need one conf->stripe_size */
498 cnt = PAGE_SIZE / conf->stripe_size;
562 static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
567 struct r5conf *conf = sh->raid_conf;
578 seq = read_seqcount_begin(&conf->gen_lock);
579 sh->generation = conf->generation - previous;
580 sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
582 stripe_set_idx(sector, conf, previous, sh);
599 if (read_seqcount_retry(&conf->gen_lock, seq))
602 insert_hash(conf, sh);
607 static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
613 hlist_for_each_entry(sh, stripe_hash(conf, sector), hash)
633 int raid5_calc_degraded(struct r5conf *conf)
640 for (i = 0; i < conf->previous_raid_disks; i++) {
641 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
643 rdev = rcu_dereference(conf->disks[i].replacement);
658 if (conf->raid_disks >= conf->previous_raid_disks)
662 if (conf->raid_disks == conf->previous_raid_disks)
666 for (i = 0; i < conf->raid_disks; i++) {
667 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
669 rdev = rcu_dereference(conf->disks[i].replacement);
680 if (conf->raid_disks <= conf->previous_raid_disks)
689 static bool has_failed(struct r5conf *conf)
691 int degraded = conf->mddev->degraded;
693 if (test_bit(MD_BROKEN, &conf->mddev->flags))
696 if (conf->mddev->reshape_position != MaxSector)
697 degraded = raid5_calc_degraded(conf);
699 return degraded > conf->max_degraded;
703 raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
707 int hash = stripe_hash_locks_hash(conf, sector);
712 spin_lock_irq(conf->hash_locks + hash);
715 wait_event_lock_irq(conf->wait_for_quiescent,
716 conf->quiesce == 0 || noquiesce,
717 *(conf->hash_locks + hash));
718 sh = __find_stripe(conf, sector, conf->generation - previous);
720 if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
721 sh = get_free_stripe(conf, hash);
723 &conf->cache_state))
725 &conf->cache_state);
730 r5c_check_stripe_cache_usage(conf);
733 &conf->cache_state);
734 r5l_wake_reclaim(conf->log, 0);
736 conf->wait_for_stripe,
737 !list_empty(conf->inactive_list + hash) &&
738 (atomic_read(&conf->active_stripes)
739 < (conf->max_nr_stripes * 3 / 4)
741 &conf->cache_state)),
742 *(conf->hash_locks + hash));
744 &conf->cache_state);
750 spin_lock(&conf->device_lock);
753 atomic_inc(&conf->active_stripes);
757 if (!list_empty(conf->inactive_list + hash))
760 if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
761 atomic_inc(&conf->empty_inactive_list_nr);
768 spin_unlock(&conf->device_lock);
772 spin_unlock_irq(conf->hash_locks + hash);
806 struct r5conf *conf = sh->raid_conf;
808 if (raid5_has_log(conf) || raid5_has_ppl(conf))
816 static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh)
826 if (!sector_div(tmp_sec, conf->chunk_sectors))
828 head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf);
830 hash = stripe_hash_locks_hash(conf, head_sector);
831 spin_lock_irq(conf->hash_locks + hash);
832 head = __find_stripe(conf, head_sector, conf->generation);
834 spin_lock(&conf->device_lock);
837 atomic_inc(&conf->active_stripes);
841 if (!list_empty(conf->inactive_list + hash))
844 if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
845 atomic_inc(&conf->empty_inactive_list_nr);
852 spin_unlock(&conf->device_lock);
854 spin_unlock_irq(conf->hash_locks + hash);
907 if (atomic_dec_return(&conf->preread_active_stripes)
909 md_wakeup_thread(conf->mddev->thread);
930 static int use_new_offset(struct r5conf *conf, struct stripe_head *sh)
932 sector_t progress = conf->reshape_progress;
934 * of conf->generation, or ->data_offset that was set before
940 if (sh->generation == conf->generation - 1)
970 static void dispatch_defer_bios(struct r5conf *conf, int target,
977 if (conf->pending_data_cnt == 0)
980 list_sort(NULL, &conf->pending_list, cmp_stripe);
982 first = conf->pending_list.next;
985 if (conf->next_pending_data)
986 list_move_tail(&conf->pending_list,
987 &conf->next_pending_data->sibling);
989 while (!list_empty(&conf->pending_list)) {
990 data = list_first_entry(&conf->pending_list,
997 list_move(&data->sibling, &conf->free_list);
1002 conf->pending_data_cnt -= cnt;
1003 BUG_ON(conf->pending_data_cnt < 0 || cnt < target);
1005 if (next != &conf->pending_list)
1006 conf->next_pending_data = list_entry(next,
1009 conf->next_pending_data = NULL;
1011 if (first != &conf->pending_list)
1012 list_move_tail(&conf->pending_list, first);
1015 static void flush_deferred_bios(struct r5conf *conf)
1019 if (conf->pending_data_cnt == 0)
1022 spin_lock(&conf->pending_bios_lock);
1023 dispatch_defer_bios(conf, conf->pending_data_cnt, &tmp);
1024 BUG_ON(conf->pending_data_cnt != 0);
1025 spin_unlock(&conf->pending_bios_lock);
1030 static void defer_issue_bios(struct r5conf *conf, sector_t sector,
1036 spin_lock(&conf->pending_bios_lock);
1037 ent = list_first_entry(&conf->free_list, struct r5pending_data,
1039 list_move_tail(&ent->sibling, &conf->pending_list);
1043 conf->pending_data_cnt++;
1044 if (conf->pending_data_cnt >= PENDING_IO_MAX)
1045 dispatch_defer_bios(conf, PENDING_IO_ONE_FLUSH, &tmp);
1047 spin_unlock(&conf->pending_bios_lock);
1059 struct r5conf *conf = sh->raid_conf;
1070 should_defer = conf->batch_bio_dispatch && conf->group_cnt;
1101 rrdev = rcu_dereference(conf->disks[i].replacement);
1103 rdev = rcu_dereference(conf->disks[i].rdev);
1138 int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
1145 if (!conf->mddev->external &&
1146 conf->mddev->sb_flags) {
1151 md_check_recovery(conf->mddev);
1159 md_wait_for_blocked_rdev(rdev, conf->mddev);
1162 rdev_dec_pending(rdev, conf->mddev);
1170 md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf));
1187 if (use_new_offset(conf, sh))
1210 bi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf);
1212 bi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf);
1225 if (conf->mddev->gendisk)
1227 bi, disk_devt(conf->mddev->gendisk),
1237 md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf));
1254 if (use_new_offset(conf, sh))
1264 rbi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf);
1266 rbi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf);
1275 if (conf->mddev->gendisk)
1277 rbi, disk_devt(conf->mddev->gendisk),
1302 defer_issue_bios(conf, head_sh->sector, &pending_bios);
1316 struct r5conf *conf = sh->raid_conf;
1338 if (len > 0 && page_offset + len > RAID5_STRIPE_SIZE(conf))
1339 clen = RAID5_STRIPE_SIZE(conf) - page_offset;
1347 if (conf->skip_copy &&
1349 clen == RAID5_STRIPE_SIZE(conf) &&
1374 struct r5conf *conf = sh->raid_conf;
1395 dev->sector + RAID5_STRIPE_SECTORS(conf)) {
1396 rbi2 = r5_next_bio(conf, rbi, dev->sector);
1413 struct r5conf *conf = sh->raid_conf;
1428 dev->sector + RAID5_STRIPE_SECTORS(conf)) {
1432 rbi = r5_next_bio(conf, rbi, dev->sector);
1858 struct r5conf *conf = sh->raid_conf;
1891 dev->sector + RAID5_STRIPE_SECTORS(conf)) {
1902 r5c_is_writeback(conf->log));
1904 !r5c_is_writeback(conf->log)) {
1910 wbi = r5_next_bio(conf, wbi, dev->sector);
2215 struct r5conf *conf = sh->raid_conf;
2216 int level = conf->level;
2221 percpu = per_cpu_ptr(conf->percpu, cpu);
2294 int disks, struct r5conf *conf)
2308 sh->raid_conf = conf;
2317 if (raid5_has_ppl(conf)) {
2325 if (init_stripe_shared_pages(sh, conf, disks)) {
2333 static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
2337 sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf);
2343 free_stripe(conf->slab_cache, sh);
2347 conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
2349 atomic_inc(&conf->active_stripes);
2352 conf->max_nr_stripes++;
2356 static int grow_stripes(struct r5conf *conf, int num)
2359 size_t namelen = sizeof(conf->cache_name[0]);
2360 int devs = max(conf->raid_disks, conf->previous_raid_disks);
2362 if (conf->mddev->gendisk)
2363 snprintf(conf->cache_name[0], namelen,
2364 "raid%d-%s", conf->level, mdname(conf->mddev));
2366 snprintf(conf->cache_name[0], namelen,
2367 "raid%d-%p", conf->level, conf->mddev);
2368 snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);
2370 conf->active_name = 0;
2371 sc = kmem_cache_create(conf->cache_name[conf->active_name],
2376 conf->slab_cache = sc;
2377 conf->pool_size = devs;
2379 if (!grow_one_stripe(conf, GFP_KERNEL))
2426 static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
2436 if (conf->scribble_disks >= new_disks &&
2437 conf->scribble_sectors >= new_sectors)
2439 mddev_suspend(conf->mddev);
2445 percpu = per_cpu_ptr(conf->percpu, cpu);
2447 new_sectors / RAID5_STRIPE_SECTORS(conf));
2453 mddev_resume(conf->mddev);
2455 conf->scribble_disks = new_disks;
2456 conf->scribble_sectors = new_sectors;
2461 static int resize_stripes(struct r5conf *conf, int newsize)
2475 * 3/ reallocate conf->disks to be suitable bigger. If this fails,
2494 md_allow_write(conf->mddev);
2497 sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
2504 mutex_lock(&conf->cache_size_mutex);
2506 for (i = conf->max_nr_stripes; i; i--) {
2507 nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf);
2521 mutex_unlock(&conf->cache_size_mutex);
2531 lock_device_hash_lock(conf, hash);
2532 wait_event_cmd(conf->wait_for_stripe,
2533 !list_empty(conf->inactive_list + hash),
2534 unlock_device_hash_lock(conf, hash),
2535 lock_device_hash_lock(conf, hash));
2536 osh = get_free_stripe(conf, hash);
2537 unlock_device_hash_lock(conf, hash);
2545 for(i=0; i<conf->pool_size; i++) {
2551 free_stripe(conf->slab_cache, osh);
2553 if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
2554 !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
2559 kmem_cache_destroy(conf->slab_cache);
2564 * conf->disks and the scribble region
2568 for (i = 0; i < conf->pool_size; i++)
2569 ndisks[i] = conf->disks[i];
2571 for (i = conf->pool_size; i < newsize; i++) {
2578 for (i = conf->pool_size; i < newsize; i++)
2583 kfree(conf->disks);
2584 conf->disks = ndisks;
2589 conf->slab_cache = sc;
2590 conf->active_name = 1-conf->active_name;
2606 for (i = conf->raid_disks; i < newsize; i++) {
2614 for (i=conf->raid_disks; i < newsize; i++)
2629 conf->pool_size = newsize;
2630 mutex_unlock(&conf->cache_size_mutex);
2635 static int drop_one_stripe(struct r5conf *conf)
2638 int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK;
2640 spin_lock_irq(conf->hash_locks + hash);
2641 sh = get_free_stripe(conf, hash);
2642 spin_unlock_irq(conf->hash_locks + hash);
2647 free_stripe(conf->slab_cache, sh);
2648 atomic_dec(&conf->active_stripes);
2649 conf->max_nr_stripes--;
2653 static void shrink_stripes(struct r5conf *conf)
2655 while (conf->max_nr_stripes &&
2656 drop_one_stripe(conf))
2659 kmem_cache_destroy(conf->slab_cache);
2660 conf->slab_cache = NULL;
2666 struct r5conf *conf = sh->raid_conf;
2690 rdev = conf->disks[i].replacement;
2692 rdev = conf->disks[i].rdev;
2694 if (use_new_offset(conf, sh))
2707 mdname(conf->mddev), RAID5_STRIPE_SECTORS(conf),
2710 atomic_add(RAID5_STRIPE_SECTORS(conf), &rdev->corrected_errors);
2736 mdname(conf->mddev),
2739 else if (conf->mddev->degraded >= conf->max_degraded) {
2743 mdname(conf->mddev),
2751 mdname(conf->mddev),
2755 > conf->max_nr_stripes) {
2758 mdname(conf->mddev),
2760 conf->max_nr_stripes);
2762 mdname(conf->mddev), bdn);
2783 rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0)))
2784 md_error(conf->mddev, rdev);
2787 rdev_dec_pending(rdev, conf->mddev);
2797 struct r5conf *conf = sh->raid_conf;
2806 rdev = conf->disks[i].rdev;
2810 rdev = conf->disks[i].replacement;
2818 rdev = conf->disks[i].rdev;
2833 md_error(conf->mddev, rdev);
2835 RAID5_STRIPE_SECTORS(conf),
2847 RAID5_STRIPE_SECTORS(conf),
2858 rdev_dec_pending(rdev, conf->mddev);
2876 struct r5conf *conf = mddev->private;
2883 spin_lock_irqsave(&conf->device_lock, flags);
2886 mddev->degraded = raid5_calc_degraded(conf);
2888 if (has_failed(conf)) {
2889 set_bit(MD_BROKEN, &conf->mddev->flags);
2890 conf->recovery_disabled = mddev->recovery_disabled;
2893 mdname(mddev), mddev->degraded, conf->raid_disks);
2896 mdname(mddev), conf->raid_disks - mddev->degraded);
2899 spin_unlock_irqrestore(&conf->device_lock, flags);
2912 sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
2922 int algorithm = previous ? conf->prev_algo
2923 : conf->algorithm;
2924 int sectors_per_chunk = previous ? conf->prev_chunk_sectors
2925 : conf->chunk_sectors;
2926 int raid_disks = previous ? conf->previous_raid_disks
2927 : conf->raid_disks;
2928 int data_disks = raid_disks - conf->max_degraded;
2948 switch(conf->level) {
3116 struct r5conf *conf = sh->raid_conf;
3118 int data_disks = raid_disks - conf->max_degraded;
3120 int sectors_per_chunk = previous ? conf->prev_chunk_sectors
3121 : conf->chunk_sectors;
3122 int algorithm = previous ? conf->prev_algo
3123 : conf->algorithm;
3136 switch(conf->level) {
3223 check = raid5_compute_sector(conf, r_sector,
3228 mdname(conf->mddev));
3255 * stripe, we need to reserve (conf->raid_disk + 1) pages per stripe
3257 * operation, we only need (conf->max_degraded + 1) pages per stripe.
3272 static inline bool delay_towrite(struct r5conf *conf,
3281 if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) &&
3295 struct r5conf *conf = sh->raid_conf;
3296 int level = conf->level;
3310 if (dev->towrite && !delay_towrite(conf, dev, s)) {
3336 if (s->locked + conf->max_degraded == disks)
3338 atomic_inc(&conf->pending_full_writes);
3408 struct r5conf *conf = sh->raid_conf;
3434 if (forwrite && raid5_has_ppl(conf)) {
3460 if (first + conf->chunk_sectors * (count - 1) != last)
3472 md_write_inc(conf->mddev, bi);
3478 sector < sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf) &&
3480 bi = r5_next_bio(conf, bi, sh->dev[dd_idx].sector)) {
3484 if (sector >= sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf))
3493 if (conf->mddev->bitmap && firstwrite) {
3508 md_bitmap_startwrite(conf->mddev->bitmap, sh->sector,
3509 RAID5_STRIPE_SECTORS(conf), 0);
3513 sh->bm_seq = conf->seq_flush+1;
3520 stripe_add_to_batch_list(conf, sh);
3529 static void end_reshape(struct r5conf *conf);
3531 static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
3535 previous ? conf->prev_chunk_sectors : conf->chunk_sectors;
3538 int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
3540 raid5_compute_sector(conf,
3541 stripe * (disks - conf->max_degraded)
3548 handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
3560 rdev = rcu_dereference(conf->disks[i].rdev);
3571 RAID5_STRIPE_SECTORS(conf), 0))
3572 md_error(conf->mddev, rdev);
3573 rdev_dec_pending(rdev, conf->mddev);
3588 wake_up(&conf->wait_for_overlap);
3591 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
3592 struct bio *nextbi = r5_next_bio(conf, bi, sh->dev[i].sector);
3594 md_write_end(conf->mddev);
3599 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
3600 RAID5_STRIPE_SECTORS(conf), 0, 0);
3612 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
3613 struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector);
3615 md_write_end(conf->mddev);
3624 s->failed > conf->max_degraded &&
3632 wake_up(&conf->wait_for_overlap);
3636 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
3638 r5_next_bio(conf, bi, sh->dev[i].sector);
3645 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
3646 RAID5_STRIPE_SECTORS(conf), 0, 0);
3656 if (atomic_dec_and_test(&conf->pending_full_writes))
3657 md_wakeup_thread(conf->mddev->thread);
3661 handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
3670 wake_up(&conf->wait_for_overlap);
3680 if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
3685 for (i = 0; i < conf->raid_disks; i++) {
3686 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
3691 RAID5_STRIPE_SECTORS(conf), 0))
3693 rdev = rcu_dereference(conf->disks[i].replacement);
3698 RAID5_STRIPE_SECTORS(conf), 0))
3703 conf->recovery_disabled =
3704 conf->mddev->recovery_disabled;
3706 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), !abort);
3960 static void handle_stripe_clean_event(struct r5conf *conf,
3991 dev->sector + RAID5_STRIPE_SECTORS(conf)) {
3992 wbi2 = r5_next_bio(conf, wbi, dev->sector);
3993 md_write_end(conf->mddev);
3997 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
3998 RAID5_STRIPE_SECTORS(conf),
4036 spin_lock_irq(conf->hash_locks + hash);
4038 spin_unlock_irq(conf->hash_locks + hash);
4053 if (atomic_dec_and_test(&conf->pending_full_writes))
4054 md_wakeup_thread(conf->mddev->thread);
4075 static int handle_stripe_dirtying(struct r5conf *conf,
4081 sector_t recovery_cp = conf->mddev->recovery_cp;
4090 if (conf->rmw_level == PARITY_DISABLE_RMW ||
4098 conf->rmw_level, (unsigned long long)recovery_cp,
4103 if (((dev->towrite && !delay_towrite(conf, dev, s)) ||
4130 if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) {
4132 if (conf->mddev->queue)
4133 blk_add_trace_msg(conf->mddev->queue,
4154 &conf->cache_state)) {
4168 if (((dev->towrite && !delay_towrite(conf, dev, s)) ||
4187 if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_PREFER_RMW)) && rcw > 0) {
4212 if (rcw && conf->mddev->queue)
4213 blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
4239 static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
4301 atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches);
4302 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
4306 "%llu-%llu\n", mdname(conf->mddev),
4309 RAID5_STRIPE_SECTORS(conf));
4332 static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
4428 mdname(conf->mddev),
4466 atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches);
4467 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {
4471 "%llu-%llu\n", mdname(conf->mddev),
4474 RAID5_STRIPE_SECTORS(conf));
4509 static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
4526 sector_t s = raid5_compute_sector(conf, bn, 0,
4528 sh2 = raid5_get_active_stripe(conf, s, 0, 1, 1);
4546 sh->dev[i].offset, RAID5_STRIPE_SIZE(conf),
4551 for (j = 0; j < conf->raid_disks; j++)
4556 if (j == conf->raid_disks) {
4583 struct r5conf *conf = sh->raid_conf;
4595 s->log_failed = r5l_log_disk_error(conf);
4643 rdev = rcu_dereference(conf->disks[i].replacement);
4645 rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
4646 !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
4654 rdev = rcu_dereference(conf->disks[i].rdev);
4660 is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
4687 else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset)
4702 conf->disks[i].rdev);
4715 conf->disks[i].rdev);
4724 conf->disks[i].replacement);
4746 conf->disks[i].replacement);
4767 sh->sector >= conf->mddev->recovery_cp ||
4768 test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery)))
4878 struct r5conf *conf = sh->raid_conf;
4935 test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) {
4947 rdev_dec_pending(s.blocked_rdev, conf->mddev);
4967 if (s.failed > conf->max_degraded ||
4973 handle_failed_stripe(conf, sh, &s, disks);
4975 handle_failed_sync(conf, sh, &s);
5028 || conf->level < 6;
5039 handle_stripe_clean_event(conf, sh, disks);
5042 r5c_handle_cached_data_endio(conf, sh, disks);
5061 r5c_finish_stripe_write_out(conf, sh, &s);
5073 if (!r5c_is_writeback(conf->log)) {
5075 handle_stripe_dirtying(conf, sh, &s, disks);
5081 ret = r5c_try_caching_write(conf, sh, &s,
5094 ret = handle_stripe_dirtying(conf, sh, &s,
5111 if (conf->level == 6)
5112 handle_parity_checks6(conf, sh, &s, disks);
5114 handle_parity_checks5(conf, sh, &s, disks);
5121 for (i = 0; i < conf->raid_disks; i++)
5135 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1);
5138 wake_up(&conf->wait_for_overlap);
5144 if (s.failed <= conf->max_degraded && !conf->mddev->ro)
5165 = raid5_get_active_stripe(conf, sh->sector, 1, 1, 1);
5174 atomic_inc(&conf->preread_active_stripes);
5183 for (i = conf->raid_disks; i--; ) {
5193 sh->disks = conf->raid_disks;
5194 stripe_set_idx(sh->sector, conf, 0, sh);
5198 atomic_dec(&conf->reshape_stripes);
5199 wake_up(&conf->wait_for_overlap);
5200 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1);
5205 handle_stripe_expansion(conf, sh);
5210 if (conf->mddev->external)
5212 conf->mddev);
5219 conf->mddev);
5228 rdev = conf->disks[i].rdev;
5230 RAID5_STRIPE_SECTORS(conf), 0))
5231 md_error(conf->mddev, rdev);
5232 rdev_dec_pending(rdev, conf->mddev);
5235 rdev = conf->disks[i].rdev;
5237 RAID5_STRIPE_SECTORS(conf), 0);
5238 rdev_dec_pending(rdev, conf->mddev);
5241 rdev = conf->disks[i].replacement;
5244 rdev = conf->disks[i].rdev;
5246 RAID5_STRIPE_SECTORS(conf), 0);
5247 rdev_dec_pending(rdev, conf->mddev);
5261 atomic_dec(&conf->preread_active_stripes);
5262 if (atomic_read(&conf->preread_active_stripes) <
5264 md_wakeup_thread(conf->mddev->thread);
5270 static void raid5_activate_delayed(struct r5conf *conf)
5272 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
5273 while (!list_empty(&conf->delayed_list)) {
5274 struct list_head *l = conf->delayed_list.next;
5280 atomic_inc(&conf->preread_active_stripes);
5281 list_add_tail(&sh->lru, &conf->hold_list);
5287 static void activate_bit_delay(struct r5conf *conf,
5292 list_add(&head, &conf->bitmap_list);
5293 list_del_init(&conf->bitmap_list);
5300 __release_stripe(conf, sh, &temp_inactive_list[hash]);
5306 struct r5conf *conf = mddev->private;
5313 chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
5322 static void add_bio_to_retry(struct bio *bi,struct r5conf *conf)
5326 spin_lock_irqsave(&conf->device_lock, flags);
5328 bi->bi_next = conf->retry_read_aligned_list;
5329 conf->retry_read_aligned_list = bi;
5331 spin_unlock_irqrestore(&conf->device_lock, flags);
5332 md_wakeup_thread(conf->mddev->thread);
5335 static struct bio *remove_bio_from_retry(struct r5conf *conf,
5340 bi = conf->retry_read_aligned;
5342 *offset = conf->retry_read_offset;
5343 conf->retry_read_aligned = NULL;
5346 bi = conf->retry_read_aligned_list;
5348 conf->retry_read_aligned_list = bi->bi_next;
5366 struct r5conf *conf;
5375 conf = mddev->private;
5377 rdev_dec_pending(rdev, conf->mddev);
5381 if (atomic_dec_and_test(&conf->active_aligned_reads))
5382 wake_up(&conf->wait_for_quiescent);
5388 add_bio_to_retry(raid_bi, conf);
5393 struct r5conf *conf = mddev->private;
5419 raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector,
5424 rdev = rcu_dereference(conf->disks[dd_idx].replacement);
5427 rdev = rcu_dereference(conf->disks[dd_idx].rdev);
5435 if (r5c_big_stripe_cached(conf, align_bi->bi_iter.bi_sector)) {
5461 spin_lock_irq(&conf->device_lock);
5462 wait_event_lock_irq(conf->wait_for_quiescent,
5463 conf->quiesce == 0,
5464 conf->device_lock);
5465 atomic_inc(&conf->active_aligned_reads);
5466 spin_unlock_irq(&conf->device_lock);
5489 struct r5conf *conf = mddev->private;
5490 split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split);
5512 static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
5517 bool second_try = !r5c_is_writeback(conf->log) &&
5518 !r5l_log_disk_error(conf);
5519 bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) ||
5520 r5l_log_disk_error(conf);
5525 if (conf->worker_cnt_per_group == 0) {
5526 handle_list = try_loprio ? &conf->loprio_list :
5527 &conf->handle_list;
5529 handle_list = try_loprio ? &conf->worker_groups[group].loprio_list :
5530 &conf->worker_groups[group].handle_list;
5531 wg = &conf->worker_groups[group];
5534 for (i = 0; i < conf->group_cnt; i++) {
5535 handle_list = try_loprio ? &conf->worker_groups[i].loprio_list :
5536 &conf->worker_groups[i].handle_list;
5537 wg = &conf->worker_groups[i];
5546 list_empty(&conf->hold_list) ? "empty" : "busy",
5547 atomic_read(&conf->pending_full_writes), conf->bypass_count);
5552 if (list_empty(&conf->hold_list))
5553 conf->bypass_count = 0;
5555 if (conf->hold_list.next == conf->last_hold)
5556 conf->bypass_count++;
5558 conf->last_hold = conf->hold_list.next;
5559 conf->bypass_count -= conf->bypass_threshold;
5560 if (conf->bypass_count < 0)
5561 conf->bypass_count = 0;
5564 } else if (!list_empty(&conf->hold_list) &&
5565 ((conf->bypass_threshold &&
5566 conf->bypass_count > conf->bypass_threshold) ||
5567 atomic_read(&conf->pending_full_writes) == 0)) {
5569 list_for_each_entry(tmp, &conf->hold_list, lru) {
5570 if (conf->worker_cnt_per_group == 0 ||
5580 conf->bypass_count -= conf->bypass_threshold;
5581 if (conf->bypass_count < 0)
5582 conf->bypass_count = 0;
5616 struct r5conf *conf = mddev->private;
5621 spin_lock_irq(&conf->device_lock);
5637 __release_stripe(conf, sh, &cb->temp_inactive_list[hash]);
5640 spin_unlock_irq(&conf->device_lock);
5642 release_inactive_stripe_list(conf, cb->temp_inactive_list,
5679 struct r5conf *conf = mddev->private;
5688 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
5693 stripe_sectors = conf->chunk_sectors *
5694 (conf->raid_disks - conf->max_degraded);
5699 logical_sector *= conf->chunk_sectors;
5700 last_sector *= conf->chunk_sectors;
5703 logical_sector += RAID5_STRIPE_SECTORS(conf)) {
5707 sh = raid5_get_active_stripe(conf, logical_sector, 0, 0, 0);
5708 prepare_to_wait(&conf->wait_for_overlap, &w,
5718 for (d = 0; d < conf->raid_disks; d++) {
5730 finish_wait(&conf->wait_for_overlap, &w);
5732 for (d = 0; d < conf->raid_disks; d++) {
5742 if (conf->mddev->bitmap) {
5744 d < conf->raid_disks - conf->max_degraded;
5748 RAID5_STRIPE_SECTORS(conf),
5750 sh->bm_seq = conf->seq_flush + 1;
5757 atomic_inc(&conf->preread_active_stripes);
5766 struct r5conf *conf = mddev->private;
5777 int ret = log_handle_flush_request(conf, bi);
5813 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
5817 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
5818 for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
5824 seq = read_seqcount_begin(&conf->gen_lock);
5827 prepare_to_wait(&conf->wait_for_overlap, &w,
5829 if (unlikely(conf->reshape_progress != MaxSector)) {
5838 spin_lock_irq(&conf->device_lock);
5840 ? logical_sector < conf->reshape_progress
5841 : logical_sector >= conf->reshape_progress) {
5845 ? logical_sector < conf->reshape_safe
5846 : logical_sector >= conf->reshape_safe) {
5847 spin_unlock_irq(&conf->device_lock);
5853 spin_unlock_irq(&conf->device_lock);
5856 new_sector = raid5_compute_sector(conf, logical_sector,
5863 sh = raid5_get_active_stripe(conf, new_sector, previous,
5876 spin_lock_irq(&conf->device_lock);
5878 ? logical_sector >= conf->reshape_progress
5879 : logical_sector < conf->reshape_progress)
5882 spin_unlock_irq(&conf->device_lock);
5890 if (read_seqcount_retry(&conf->gen_lock, seq)) {
5921 atomic_inc(&conf->preread_active_stripes);
5929 finish_wait(&conf->wait_for_overlap, &w);
5950 struct r5conf *conf = mddev->private;
5954 int raid_disks = conf->previous_raid_disks;
5955 int data_disks = raid_disks - conf->max_degraded;
5956 int new_data_disks = conf->raid_disks - conf->max_degraded;
5968 conf->reshape_progress < raid5_size(mddev, 0, 0)) {
5970 - conf->reshape_progress;
5972 conf->reshape_progress == MaxSector) {
5976 conf->reshape_progress > 0)
5977 sector_nr = conf->reshape_progress;
5993 reshape_sectors = max(conf->chunk_sectors, conf->prev_chunk_sectors);
6001 writepos = conf->reshape_progress;
6003 readpos = conf->reshape_progress;
6005 safepos = conf->reshape_safe;
6026 BUG_ON(conf->reshape_progress == 0);
6057 if (conf->min_offset_diff < 0) {
6058 safepos += -conf->min_offset_diff;
6059 readpos += -conf->min_offset_diff;
6061 writepos += conf->min_offset_diff;
6066 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
6068 wait_event(conf->wait_for_overlap,
6069 atomic_read(&conf->reshape_stripes)==0
6071 if (atomic_read(&conf->reshape_stripes) != 0)
6073 mddev->reshape_position = conf->reshape_progress;
6084 conf->reshape_checkpoint = jiffies;
6091 spin_lock_irq(&conf->device_lock);
6092 conf->reshape_safe = mddev->reshape_position;
6093 spin_unlock_irq(&conf->device_lock);
6094 wake_up(&conf->wait_for_overlap);
6099 for (i = 0; i < reshape_sectors; i += RAID5_STRIPE_SECTORS(conf)) {
6102 sh = raid5_get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
6104 atomic_inc(&conf->reshape_stripes);
6112 if (conf->level == 6 &&
6120 memset(page_address(sh->dev[j].page), 0, RAID5_STRIPE_SIZE(conf));
6130 spin_lock_irq(&conf->device_lock);
6132 conf->reshape_progress -= reshape_sectors * new_data_disks;
6134 conf->reshape_progress += reshape_sectors * new_data_disks;
6135 spin_unlock_irq(&conf->device_lock);
6142 raid5_compute_sector(conf, stripe_addr*(new_data_disks),
6145 raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
6151 sh = raid5_get_active_stripe(conf, first_sector, 1, 0, 1);
6155 first_sector += RAID5_STRIPE_SECTORS(conf);
6175 wait_event(conf->wait_for_overlap,
6176 atomic_read(&conf->reshape_stripes) == 0
6178 if (atomic_read(&conf->reshape_stripes) != 0)
6180 mddev->reshape_position = conf->reshape_progress;
6190 conf->reshape_checkpoint = jiffies;
6198 spin_lock_irq(&conf->device_lock);
6199 conf->reshape_safe = mddev->reshape_position;
6200 spin_unlock_irq(&conf->device_lock);
6201 wake_up(&conf->wait_for_overlap);
6211 struct r5conf *conf = mddev->private;
6222 end_reshape(conf);
6230 conf->fullsync = 0;
6237 wait_event(conf->wait_for_overlap, conf->quiesce != 2);
6252 if (mddev->degraded >= conf->max_degraded &&
6259 !conf->fullsync &&
6261 sync_blocks >= RAID5_STRIPE_SECTORS(conf)) {
6263 do_div(sync_blocks, RAID5_STRIPE_SECTORS(conf));
6266 return sync_blocks * RAID5_STRIPE_SECTORS(conf);
6271 sh = raid5_get_active_stripe(conf, sector_nr, 0, 1, 0);
6273 sh = raid5_get_active_stripe(conf, sector_nr, 0, 0, 0);
6284 for (i = 0; i < conf->raid_disks; i++) {
6285 struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
6299 return RAID5_STRIPE_SECTORS(conf);
6302 static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio,
6322 ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
6323 sector = raid5_compute_sector(conf, logical_sector,
6328 logical_sector += RAID5_STRIPE_SECTORS(conf),
6329 sector += RAID5_STRIPE_SECTORS(conf),
6336 sh = raid5_get_active_stripe(conf, sector, 0, 1, 1);
6340 conf->retry_read_aligned = raid_bio;
6341 conf->retry_read_offset = scnt;
6347 conf->retry_read_aligned = raid_bio;
6348 conf->retry_read_offset = scnt;
6360 if (atomic_dec_and_test(&conf->active_aligned_reads))
6361 wake_up(&conf->wait_for_quiescent);
6365 static int handle_active_stripes(struct r5conf *conf, int group,
6368 __releases(&conf->device_lock)
6369 __acquires(&conf->device_lock)
6376 (sh = __get_priority_stripe(conf, group)) != NULL)
6384 spin_unlock_irq(&conf->device_lock);
6385 log_flush_stripe_to_raid(conf);
6386 spin_lock_irq(&conf->device_lock);
6391 spin_unlock_irq(&conf->device_lock);
6393 release_inactive_stripe_list(conf, temp_inactive_list,
6396 r5l_flush_stripe_to_raid(conf->log);
6398 spin_lock_irq(&conf->device_lock);
6404 log_write_stripe_run(conf);
6408 spin_lock_irq(&conf->device_lock);
6411 __release_stripe(conf, batch[i], &temp_inactive_list[hash]);
6420 struct r5conf *conf = group->conf;
6421 struct mddev *mddev = conf->mddev;
6422 int group_id = group - conf->worker_groups;
6430 spin_lock_irq(&conf->device_lock);
6434 released = release_stripe_list(conf, worker->temp_inactive_list);
6436 batch_size = handle_active_stripes(conf, group_id, worker,
6444 conf->device_lock);
6448 spin_unlock_irq(&conf->device_lock);
6450 flush_deferred_bios(conf);
6452 r5l_flush_stripe_to_raid(conf->log);
6470 struct r5conf *conf = mddev->private;
6480 spin_lock_irq(&conf->device_lock);
6486 released = release_stripe_list(conf, conf->temp_inactive_list);
6488 clear_bit(R5_DID_ALLOC, &conf->cache_state);
6491 !list_empty(&conf->bitmap_list)) {
6493 conf->seq_flush++;
6494 spin_unlock_irq(&conf->device_lock);
6496 spin_lock_irq(&conf->device_lock);
6497 conf->seq_write = conf->seq_flush;
6498 activate_bit_delay(conf, conf->temp_inactive_list);
6500 raid5_activate_delayed(conf);
6502 while ((bio = remove_bio_from_retry(conf, &offset))) {
6504 spin_unlock_irq(&conf->device_lock);
6505 ok = retry_aligned_read(conf, bio, offset);
6506 spin_lock_irq(&conf->device_lock);
6512 batch_size = handle_active_stripes(conf, ANY_GROUP, NULL,
6513 conf->temp_inactive_list);
6519 spin_unlock_irq(&conf->device_lock);
6521 spin_lock_irq(&conf->device_lock);
6526 spin_unlock_irq(&conf->device_lock);
6527 if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) &&
6528 mutex_trylock(&conf->cache_size_mutex)) {
6529 grow_one_stripe(conf, __GFP_NOWARN);
6533 set_bit(R5_DID_ALLOC, &conf->cache_state);
6534 mutex_unlock(&conf->cache_size_mutex);
6537 flush_deferred_bios(conf);
6539 r5l_flush_stripe_to_raid(conf->log);
6550 struct r5conf *conf;
6553 conf = mddev->private;
6554 if (conf)
6555 ret = sprintf(page, "%d\n", conf->min_nr_stripes);
6564 struct r5conf *conf = mddev->private;
6569 conf->min_nr_stripes = size;
6570 mutex_lock(&conf->cache_size_mutex);
6571 while (size < conf->max_nr_stripes &&
6572 drop_one_stripe(conf))
6574 mutex_unlock(&conf->cache_size_mutex);
6578 mutex_lock(&conf->cache_size_mutex);
6579 while (size > conf->max_nr_stripes)
6580 if (!grow_one_stripe(conf, GFP_KERNEL)) {
6581 conf->min_nr_stripes = conf->max_nr_stripes;
6585 mutex_unlock(&conf->cache_size_mutex);
6594 struct r5conf *conf;
6605 conf = mddev->private;
6606 if (!conf)
6623 struct r5conf *conf = mddev->private;
6624 if (conf)
6625 return sprintf(page, "%d\n", conf->rmw_level);
6633 struct r5conf *conf = mddev->private;
6636 if (!conf)
6653 conf->rmw_level = new;
6665 struct r5conf *conf;
6669 conf = mddev->private;
6670 if (conf)
6671 ret = sprintf(page, "%lu\n", RAID5_STRIPE_SIZE(conf));
6680 struct r5conf *conf;
6704 conf = mddev->private;
6705 if (!conf) {
6710 if (new == conf->stripe_size)
6714 conf->stripe_size, new);
6725 mutex_lock(&conf->cache_size_mutex);
6726 size = conf->max_nr_stripes;
6728 shrink_stripes(conf);
6730 conf->stripe_size = new;
6731 conf->stripe_shift = ilog2(new) - 9;
6732 conf->stripe_sectors = new >> 9;
6733 if (grow_stripes(conf, size)) {
6738 mutex_unlock(&conf->cache_size_mutex);
6760 struct r5conf *conf;
6763 conf = mddev->private;
6764 if (conf)
6765 ret = sprintf(page, "%d\n", conf->bypass_threshold);
6773 struct r5conf *conf;
6785 conf = mddev->private;
6786 if (!conf)
6788 else if (new > conf->min_nr_stripes)
6791 conf->bypass_threshold = new;
6805 struct r5conf *conf;
6808 conf = mddev->private;
6809 if (conf)
6810 ret = sprintf(page, "%d\n", conf->skip_copy);
6818 struct r5conf *conf;
6831 conf = mddev->private;
6832 if (!conf)
6834 else if (new != conf->skip_copy) {
6838 conf->skip_copy = new;
6857 struct r5conf *conf = mddev->private;
6858 if (conf)
6859 return sprintf(page, "%d\n", atomic_read(&conf->active_stripes));
6870 struct r5conf *conf;
6873 conf = mddev->private;
6874 if (conf)
6875 ret = sprintf(page, "%d\n", conf->worker_cnt_per_group);
6880 static int alloc_thread_groups(struct r5conf *conf, int cnt,
6886 struct r5conf *conf;
6903 conf = mddev->private;
6904 if (!conf)
6906 else if (new != conf->worker_cnt_per_group) {
6909 old_groups = conf->worker_groups;
6913 err = alloc_thread_groups(conf, new, &group_cnt, &new_groups);
6915 spin_lock_irq(&conf->device_lock);
6916 conf->group_cnt = group_cnt;
6917 conf->worker_cnt_per_group = new;
6918 conf->worker_groups = new_groups;
6919 spin_unlock_irq(&conf->device_lock);
6954 static int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt,
6983 group->conf = conf;
6999 static void free_thread_groups(struct r5conf *conf)
7001 if (conf->worker_groups)
7002 kfree(conf->worker_groups[0].workers);
7003 kfree(conf->worker_groups);
7004 conf->worker_groups = NULL;
7010 struct r5conf *conf = mddev->private;
7016 raid_disks = min(conf->raid_disks, conf->previous_raid_disks);
7018 sectors &= ~((sector_t)conf->chunk_sectors - 1);
7019 sectors &= ~((sector_t)conf->prev_chunk_sectors - 1);
7020 return sectors * (raid_disks - conf->max_degraded);
7023 static void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
7031 static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
7033 if (conf->level == 6 && !percpu->spare_page) {
7040 max(conf->raid_disks,
7041 conf->previous_raid_disks),
7042 max(conf->chunk_sectors,
7043 conf->prev_chunk_sectors)
7044 / RAID5_STRIPE_SECTORS(conf))) {
7045 free_scratch_buffer(conf, percpu);
7054 struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
7056 free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
7060 static void raid5_free_percpu(struct r5conf *conf)
7062 if (!conf->percpu)
7065 cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
7066 free_percpu(conf->percpu);
7069 static void free_conf(struct r5conf *conf)
7073 log_exit(conf);
7075 unregister_shrinker(&conf->shrinker);
7076 free_thread_groups(conf);
7077 shrink_stripes(conf);
7078 raid5_free_percpu(conf);
7079 for (i = 0; i < conf->pool_size; i++)
7080 if (conf->disks[i].extra_page)
7081 put_page(conf->disks[i].extra_page);
7082 kfree(conf->disks);
7083 bioset_exit(&conf->bio_split);
7084 kfree(conf->stripe_hashtbl);
7085 kfree(conf->pending_data);
7086 kfree(conf);
7091 struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
7092 struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
7094 if (alloc_scratch_buffer(conf, percpu)) {
7102 static int raid5_alloc_percpu(struct r5conf *conf)
7106 conf->percpu = alloc_percpu(struct raid5_percpu);
7107 if (!conf->percpu)
7110 err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
7112 conf->scribble_disks = max(conf->raid_disks,
7113 conf->previous_raid_disks);
7114 conf->scribble_sectors = max(conf->chunk_sectors,
7115 conf->prev_chunk_sectors);
7123 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
7126 if (mutex_trylock(&conf->cache_size_mutex)) {
7129 conf->max_nr_stripes > conf->min_nr_stripes) {
7130 if (drop_one_stripe(conf) == 0) {
7136 mutex_unlock(&conf->cache_size_mutex);
7144 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
7146 if (conf->max_nr_stripes < conf->min_nr_stripes)
7149 return conf->max_nr_stripes - conf->min_nr_stripes;
7154 struct r5conf *conf;
7193 conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL);
7194 if (conf == NULL)
7198 conf->stripe_size = DEFAULT_STRIPE_SIZE;
7199 conf->stripe_shift = ilog2(DEFAULT_STRIPE_SIZE) - 9;
7200 conf->stripe_sectors = DEFAULT_STRIPE_SIZE >> 9;
7202 INIT_LIST_HEAD(&conf->free_list);
7203 INIT_LIST_HEAD(&conf->pending_list);
7204 conf->pending_data = kcalloc(PENDING_IO_MAX,
7207 if (!conf->pending_data)
7210 list_add(&conf->pending_data[i].sibling, &conf->free_list);
7212 if (!alloc_thread_groups(conf, 0, &group_cnt, &new_group)) {
7213 conf->group_cnt = group_cnt;
7214 conf->worker_cnt_per_group = 0;
7215 conf->worker_groups = new_group;
7218 spin_lock_init(&conf->device_lock);
7219 seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock);
7220 mutex_init(&conf->cache_size_mutex);
7221 init_waitqueue_head(&conf->wait_for_quiescent);
7222 init_waitqueue_head(&conf->wait_for_stripe);
7223 init_waitqueue_head(&conf->wait_for_overlap);
7224 INIT_LIST_HEAD(&conf->handle_list);
7225 INIT_LIST_HEAD(&conf->loprio_list);
7226 INIT_LIST_HEAD(&conf->hold_list);
7227 INIT_LIST_HEAD(&conf->delayed_list);
7228 INIT_LIST_HEAD(&conf->bitmap_list);
7229 init_llist_head(&conf->released_stripes);
7230 atomic_set(&conf->active_stripes, 0);
7231 atomic_set(&conf->preread_active_stripes, 0);
7232 atomic_set(&conf->active_aligned_reads, 0);
7233 spin_lock_init(&conf->pending_bios_lock);
7234 conf->batch_bio_dispatch = true;
7239 conf->batch_bio_dispatch = false;
7244 conf->bypass_threshold = BYPASS_THRESHOLD;
7245 conf->recovery_disabled = mddev->recovery_disabled - 1;
7247 conf->raid_disks = mddev->raid_disks;
7249 conf->previous_raid_disks = mddev->raid_disks;
7251 conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
7252 max_disks = max(conf->raid_disks, conf->previous_raid_disks);
7254 conf->disks = kcalloc(max_disks, sizeof(struct disk_info),
7257 if (!conf->disks)
7261 conf->disks[i].extra_page = alloc_page(GFP_KERNEL);
7262 if (!conf->disks[i].extra_page)
7266 ret = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0);
7269 conf->mddev = mddev;
7271 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
7279 spin_lock_init(conf->hash_locks);
7281 spin_lock_init(conf->hash_locks + i);
7284 INIT_LIST_HEAD(conf->inactive_list + i);
7287 INIT_LIST_HEAD(conf->temp_inactive_list + i);
7289 atomic_set(&conf->r5c_cached_full_stripes, 0);
7290 INIT_LIST_HEAD(&conf->r5c_full_stripe_list);
7291 atomic_set(&conf->r5c_cached_partial_stripes, 0);
7292 INIT_LIST_HEAD(&conf->r5c_partial_stripe_list);
7293 atomic_set(&conf->r5c_flushing_full_stripes, 0);
7294 atomic_set(&conf->r5c_flushing_partial_stripes, 0);
7296 conf->level = mddev->new_level;
7297 conf->chunk_sectors = mddev->new_chunk_sectors;
7298 if (raid5_alloc_percpu(conf) != 0)
7308 disk = conf->disks + raid_disk;
7326 conf->fullsync = 1;
7329 conf->level = mddev->new_level;
7330 if (conf->level == 6) {
7331 conf->max_degraded = 2;
7333 conf->rmw_level = PARITY_ENABLE_RMW;
7335 conf->rmw_level = PARITY_DISABLE_RMW;
7337 conf->max_degraded = 1;
7338 conf->rmw_level = PARITY_ENABLE_RMW;
7340 conf->algorithm = mddev->new_layout;
7341 conf->reshape_progress = mddev->reshape_position;
7342 if (conf->reshape_progress != MaxSector) {
7343 conf->prev_chunk_sectors = mddev->chunk_sectors;
7344 conf->prev_algo = mddev->layout;
7346 conf->prev_chunk_sectors = conf->chunk_sectors;
7347 conf->prev_algo = conf->algorithm;
7350 conf->min_nr_stripes = NR_STRIPES;
7353 ((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4,
7354 ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4);
7355 conf->min_nr_stripes = max(NR_STRIPES, stripes);
7356 if (conf->min_nr_stripes != NR_STRIPES)
7358 mdname(mddev), conf->min_nr_stripes);
7360 memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
7362 atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
7363 if (grow_stripes(conf, conf->min_nr_stripes)) {
7374 conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4;
7375 conf->shrinker.scan_objects = raid5_cache_scan;
7376 conf->shrinker.count_objects = raid5_cache_count;
7377 conf->shrinker.batch = 128;
7378 conf->shrinker.flags = 0;
7379 if (register_shrinker(&conf->shrinker)) {
7386 conf->thread = md_register_thread(raid5d, mddev, pers_name);
7387 if (!conf->thread) {
7393 return conf;
7396 if (conf) {
7397 free_conf(conf);
7429 static void raid5_set_io_opt(struct r5conf *conf)
7431 blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) *
7432 (conf->raid_disks - conf->max_degraded));
7437 struct r5conf *conf;
7578 conf = setup_conf(mddev);
7580 conf = mddev->private;
7582 if (IS_ERR(conf))
7583 return PTR_ERR(conf);
7595 conf->min_offset_diff = min_offset_diff;
7596 mddev->thread = conf->thread;
7597 conf->thread = NULL;
7598 mddev->private = conf;
7600 for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
7602 rdev = conf->disks[i].rdev;
7603 if (!rdev && conf->disks[i].replacement) {
7605 rdev = conf->disks[i].replacement;
7606 conf->disks[i].replacement = NULL;
7608 conf->disks[i].rdev = rdev;
7612 if (conf->disks[i].replacement &&
7613 conf->reshape_progress != MaxSector) {
7638 conf->algorithm,
7639 conf->raid_disks,
7640 conf->max_degraded))
7644 conf->prev_algo,
7645 conf->previous_raid_disks,
7646 conf->max_degraded))
7654 mddev->degraded = raid5_calc_degraded(conf);
7656 if (has_failed(conf)) {
7658 mdname(mddev), mddev->degraded, conf->raid_disks);
7682 mdname(mddev), conf->level,
7686 print_raid5_conf(conf);
7688 if (conf->reshape_progress != MaxSector) {
7689 conf->reshape_safe = conf->reshape_progress;
7690 atomic_set(&conf->reshape_stripes, 0);
7716 int data_disks = conf->previous_raid_disks - conf->max_degraded;
7722 raid5_set_io_opt(conf);
7773 if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
7779 print_raid5_conf(conf);
7780 free_conf(conf);
7788 struct r5conf *conf = priv;
7790 free_conf(conf);
7796 struct r5conf *conf = mddev->private;
7800 conf->chunk_sectors / 2, mddev->layout);
7801 seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
7803 for (i = 0; i < conf->raid_disks; i++) {
7804 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
7811 static void print_raid5_conf (struct r5conf *conf)
7816 pr_debug("RAID conf printout:\n");
7817 if (!conf) {
7818 pr_debug("(conf==NULL)\n");
7821 pr_debug(" --- level:%d rd:%d wd:%d\n", conf->level,
7822 conf->raid_disks,
7823 conf->raid_disks - conf->mddev->degraded);
7825 for (i = 0; i < conf->raid_disks; i++) {
7827 tmp = conf->disks + i;
7838 struct r5conf *conf = mddev->private;
7843 for (i = 0; i < conf->raid_disks; i++) {
7844 tmp = conf->disks + i;
7871 spin_lock_irqsave(&conf->device_lock, flags);
7872 mddev->degraded = raid5_calc_degraded(conf);
7873 spin_unlock_irqrestore(&conf->device_lock, flags);
7874 print_raid5_conf(conf);
7880 struct r5conf *conf = mddev->private;
7884 struct disk_info *p = conf->disks + number;
7886 print_raid5_conf(conf);
7887 if (test_bit(Journal, &rdev->flags) && conf->log) {
7894 if (atomic_read(&conf->active_stripes) ||
7895 atomic_read(&conf->r5c_cached_full_stripes) ||
7896 atomic_read(&conf->r5c_cached_partial_stripes)) {
7899 log_exit(conf);
7909 if (number >= conf->raid_disks &&
7910 conf->reshape_progress == MaxSector)
7922 mddev->recovery_disabled != conf->recovery_disabled &&
7923 !has_failed(conf) &&
7925 number < conf->raid_disks) {
7939 err = log_modify(conf, rdev, false);
7953 err = log_modify(conf, p->rdev, true);
7959 print_raid5_conf(conf);
7965 struct r5conf *conf = mddev->private;
7970 int last = conf->raid_disks - 1;
7973 if (conf->log)
7981 ret = log_init(conf, rdev, false);
7985 ret = r5l_start(conf->log);
7991 if (mddev->recovery_disabled == conf->recovery_disabled)
7994 if (rdev->saved_raid_disk < 0 && has_failed(conf))
8008 conf->disks[rdev->saved_raid_disk].rdev == NULL)
8012 p = conf->disks + disk;
8017 conf->fullsync = 1;
8020 err = log_modify(conf, rdev, true);
8026 p = conf->disks + disk;
8033 conf->fullsync = 1;
8039 print_raid5_conf(conf);
8053 struct r5conf *conf = mddev->private;
8055 if (raid5_has_log(conf) || raid5_has_ppl(conf))
8057 sectors &= ~((sector_t)conf->chunk_sectors - 1);
8088 struct r5conf *conf = mddev->private;
8089 if (((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4
8090 > conf->min_nr_stripes ||
8091 ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4
8092 > conf->min_nr_stripes) {
8096 / RAID5_STRIPE_SIZE(conf))*4);
8104 struct r5conf *conf = mddev->private;
8106 if (raid5_has_log(conf) || raid5_has_ppl(conf))
8112 if (has_failed(conf))
8132 if (resize_chunks(conf,
8133 conf->previous_raid_disks
8140 if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size)
8142 return resize_stripes(conf, (conf->previous_raid_disks
8148 struct r5conf *conf = mddev->private;
8159 if (has_failed(conf))
8168 if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
8178 if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks)
8185 atomic_set(&conf->reshape_stripes, 0);
8186 spin_lock_irq(&conf->device_lock);
8187 write_seqcount_begin(&conf->gen_lock);
8188 conf->previous_raid_disks = conf->raid_disks;
8189 conf->raid_disks += mddev->delta_disks;
8190 conf->prev_chunk_sectors = conf->chunk_sectors;
8191 conf->chunk_sectors = mddev->new_chunk_sectors;
8192 conf->prev_algo = conf->algorithm;
8193 conf->algorithm = mddev->new_layout;
8194 conf->generation++;
8200 conf->reshape_progress = raid5_size(mddev, 0, 0);
8202 conf->reshape_progress = 0;
8203 conf->reshape_safe = conf->reshape_progress;
8204 write_seqcount_end(&conf->gen_lock);
8205 spin_unlock_irq(&conf->device_lock);
8227 >= conf->previous_raid_disks)
8235 } else if (rdev->raid_disk >= conf->previous_raid_disks
8245 spin_lock_irqsave(&conf->device_lock, flags);
8246 mddev->degraded = raid5_calc_degraded(conf);
8247 spin_unlock_irqrestore(&conf->device_lock, flags);
8249 mddev->raid_disks = conf->raid_disks;
8250 mddev->reshape_position = conf->reshape_progress;
8262 spin_lock_irq(&conf->device_lock);
8263 write_seqcount_begin(&conf->gen_lock);
8264 mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
8266 conf->chunk_sectors = conf->prev_chunk_sectors;
8267 mddev->new_layout = conf->algorithm = conf->prev_algo;
8271 conf->generation --;
8272 conf->reshape_progress = MaxSector;
8274 write_seqcount_end(&conf->gen_lock);
8275 spin_unlock_irq(&conf->device_lock);
8278 conf->reshape_checkpoint = jiffies;
8285 * changes needed in 'conf'
8287 static void end_reshape(struct r5conf *conf)
8290 if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
8293 spin_lock_irq(&conf->device_lock);
8294 conf->previous_raid_disks = conf->raid_disks;
8295 md_finish_reshape(conf->mddev);
8297 conf->reshape_progress = MaxSector;
8298 conf->mddev->reshape_position = MaxSector;
8299 rdev_for_each(rdev, conf->mddev)
8304 spin_unlock_irq(&conf->device_lock);
8305 wake_up(&conf->wait_for_overlap);
8307 if (conf->mddev->queue)
8308 raid5_set_io_opt(conf);
8317 struct r5conf *conf = mddev->private;
8323 spin_lock_irq(&conf->device_lock);
8324 mddev->degraded = raid5_calc_degraded(conf);
8325 spin_unlock_irq(&conf->device_lock);
8326 for (d = conf->raid_disks ;
8327 d < conf->raid_disks - mddev->delta_disks;
8329 struct md_rdev *rdev = conf->disks[d].rdev;
8332 rdev = conf->disks[d].replacement;
8337 mddev->layout = conf->algorithm;
8338 mddev->chunk_sectors = conf->chunk_sectors;
8347 struct r5conf *conf = mddev->private;
8351 lock_all_device_hash_locks_irq(conf);
8355 r5c_flush_cache(conf, INT_MAX);
8356 conf->quiesce = 2;
8357 wait_event_cmd(conf->wait_for_quiescent,
8358 atomic_read(&conf->active_stripes) == 0 &&
8359 atomic_read(&conf->active_aligned_reads) == 0,
8360 unlock_all_device_hash_locks_irq(conf),
8361 lock_all_device_hash_locks_irq(conf));
8362 conf->quiesce = 1;
8363 unlock_all_device_hash_locks_irq(conf);
8365 wake_up(&conf->wait_for_overlap);
8368 lock_all_device_hash_locks_irq(conf);
8369 conf->quiesce = 0;
8370 wake_up(&conf->wait_for_quiescent);
8371 wake_up(&conf->wait_for_overlap);
8372 unlock_all_device_hash_locks_irq(conf);
8374 log_quiesce(conf, quiesce);
8475 struct r5conf *conf = mddev->private;
8495 conf->algorithm = mddev->new_layout;
8499 conf->chunk_sectors = new_chunk ;
8618 struct r5conf *conf;
8624 conf = mddev->private;
8625 if (!conf) {
8632 if (!raid5_has_ppl(conf) && conf->level == 5) {
8633 err = log_init(conf, NULL, true);
8635 err = resize_stripes(conf, conf->pool_size);
8637 log_exit(conf);
8642 if (raid5_has_ppl(conf)) {
8644 log_exit(conf);
8646 err = resize_stripes(conf, conf->pool_size);
8647 } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) &&
8648 r5l_log_disk_error(conf)) {
8680 struct r5conf *conf = mddev->private;
8682 return r5l_start(conf->log);