Lines Matching defs:mddev

74  * mddev->thread when the mutex cannot be held.
90 static int remove_and_add_spares(struct mddev *mddev,
92 static void mddev_detach(struct mddev *mddev);
117 static inline int speed_min(struct mddev *mddev)
119 return mddev->sync_speed_min ?
120 mddev->sync_speed_min : sysctl_speed_limit_min;
123 static inline int speed_max(struct mddev *mddev)
125 return mddev->sync_speed_max ?
126 mddev->sync_speed_max : sysctl_speed_limit_max;
138 static void rdevs_uninit_serial(struct mddev *mddev)
142 rdev_for_each(rdev, mddev)
174 static int rdevs_init_serial(struct mddev *mddev)
179 rdev_for_each(rdev, mddev) {
186 if (ret && !mddev->serial_info_pool)
187 rdevs_uninit_serial(mddev);
199 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
209 void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
219 mddev_suspend(mddev);
222 ret = rdevs_init_serial(mddev);
228 if (mddev->serial_info_pool == NULL) {
233 mddev->serial_info_pool =
236 if (!mddev->serial_info_pool) {
237 rdevs_uninit_serial(mddev);
244 mddev_resume(mddev);
253 void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
259 if (mddev->serial_info_pool) {
264 mddev_suspend(mddev);
265 rdev_for_each(temp, mddev) {
267 if (!mddev->serialize_policy ||
283 mempool_destroy(mddev->serial_info_pool);
284 mddev->serial_info_pool = NULL;
287 mddev_resume(mddev);
344 struct mddev *mddev)
346 if (!mddev || !bioset_initialized(&mddev->bio_set))
349 return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
353 static struct bio *md_bio_alloc_sync(struct mddev *mddev)
355 if (!mddev || !bioset_initialized(&mddev->sync_set))
358 return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
373 void md_new_event(struct mddev *mddev)
392 * a reference to the current mddev and must mddev_put it.
400 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
403 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
416 static bool is_suspended(struct mddev *mddev, struct bio *bio)
418 if (mddev->suspended)
422 if (mddev->suspend_lo >= mddev->suspend_hi)
424 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
426 if (bio_end_sector(bio) < mddev->suspend_lo)
431 void md_handle_request(struct mddev *mddev, struct bio *bio)
435 if (is_suspended(mddev, bio)) {
438 prepare_to_wait(&mddev->sb_wait, &__wait,
440 if (!is_suspended(mddev, bio))
446 finish_wait(&mddev->sb_wait, &__wait);
448 atomic_inc(&mddev->active_io);
451 if (!mddev->pers->make_request(mddev, bio)) {
452 atomic_dec(&mddev->active_io);
453 wake_up(&mddev->sb_wait);
457 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
458 wake_up(&mddev->sb_wait);
465 struct mddev *mddev = bio->bi_disk->private_data;
467 if (mddev == NULL || mddev->pers == NULL) {
472 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
479 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
489 md_handle_request(mddev, bio);
500 void mddev_suspend(struct mddev *mddev)
502 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
503 lockdep_assert_held(&mddev->reconfig_mutex);
504 if (mddev->suspended++)
507 wake_up(&mddev->sb_wait);
508 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
510 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
511 mddev->pers->quiesce(mddev, 1);
512 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
513 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
515 del_timer_sync(&mddev->safemode_timer);
517 mddev->noio_flag = memalloc_noio_save();
521 void mddev_resume(struct mddev *mddev)
524 memalloc_noio_restore(mddev->noio_flag);
525 lockdep_assert_held(&mddev->reconfig_mutex);
526 if (--mddev->suspended)
528 wake_up(&mddev->sb_wait);
529 mddev->pers->quiesce(mddev, 0);
531 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
532 md_wakeup_thread(mddev->thread);
533 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
544 struct mddev *mddev = rdev->mddev;
548 rdev_dec_pending(rdev, mddev);
550 if (atomic_dec_and_test(&mddev->flush_pending)) {
552 queue_work(md_wq, &mddev->flush_work);
560 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
563 mddev->start_flush = ktime_get_boottime();
564 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
565 atomic_set(&mddev->flush_pending, 1);
567 rdev_for_each_rcu(rdev, mddev)
578 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
583 atomic_inc(&mddev->flush_pending);
586 rdev_dec_pending(rdev, mddev);
589 if (atomic_dec_and_test(&mddev->flush_pending))
590 queue_work(md_wq, &mddev->flush_work);
595 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
596 struct bio *bio = mddev->flush_bio;
604 spin_lock_irq(&mddev->lock);
605 mddev->last_flush = mddev->start_flush;
606 mddev->flush_bio = NULL;
607 spin_unlock_irq(&mddev->lock);
608 wake_up(&mddev->sb_wait);
615 md_handle_request(mddev, bio);
625 bool md_flush_request(struct mddev *mddev, struct bio *bio)
628 spin_lock_irq(&mddev->lock);
629 wait_event_lock_irq(mddev->sb_wait,
630 !mddev->flush_bio ||
631 ktime_after(mddev->last_flush, start),
632 mddev->lock);
633 if (!ktime_after(mddev->last_flush, start)) {
634 WARN_ON(mddev->flush_bio);
635 mddev->flush_bio = bio;
638 spin_unlock_irq(&mddev->lock);
641 INIT_WORK(&mddev->flush_work, submit_flushes);
642 queue_work(md_wq, &mddev->flush_work);
657 static inline struct mddev *mddev_get(struct mddev *mddev)
659 atomic_inc(&mddev->active);
660 return mddev;
665 static void mddev_put(struct mddev *mddev)
667 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
669 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
670 mddev->ctime == 0 && !mddev->hold_active) {
673 list_del_init(&mddev->all_mddevs);
680 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
681 queue_work(md_misc_wq, &mddev->del_work);
688 void mddev_init(struct mddev *mddev)
690 kobject_init(&mddev->kobj, &md_ktype);
691 mutex_init(&mddev->open_mutex);
692 mutex_init(&mddev->reconfig_mutex);
693 mutex_init(&mddev->bitmap_info.mutex);
694 INIT_LIST_HEAD(&mddev->disks);
695 INIT_LIST_HEAD(&mddev->all_mddevs);
696 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
697 atomic_set(&mddev->active, 1);
698 atomic_set(&mddev->openers, 0);
699 atomic_set(&mddev->active_io, 0);
700 spin_lock_init(&mddev->lock);
701 atomic_set(&mddev->flush_pending, 0);
702 init_waitqueue_head(&mddev->sb_wait);
703 init_waitqueue_head(&mddev->recovery_wait);
704 mddev->reshape_position = MaxSector;
705 mddev->reshape_backwards = 0;
706 mddev->last_sync_action = "none";
707 mddev->resync_min = 0;
708 mddev->resync_max = MaxSector;
709 mddev->level = LEVEL_NONE;
713 static struct mddev *mddev_find_locked(dev_t unit)
715 struct mddev *mddev;
717 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
718 if (mddev->unit == unit)
719 return mddev;
724 static struct mddev *mddev_find(dev_t unit)
726 struct mddev *mddev;
732 mddev = mddev_find_locked(unit);
733 if (mddev)
734 mddev_get(mddev);
737 return mddev;
740 static struct mddev *mddev_find_or_alloc(dev_t unit)
742 struct mddev *mddev, *new = NULL;
751 mddev = mddev_find_locked(unit);
752 if (mddev) {
753 mddev_get(mddev);
756 return mddev;
811 void mddev_unlock(struct mddev *mddev)
813 if (mddev->to_remove) {
826 struct attribute_group *to_remove = mddev->to_remove;
827 mddev->to_remove = NULL;
828 mddev->sysfs_active = 1;
829 mutex_unlock(&mddev->reconfig_mutex);
831 if (mddev->kobj.sd) {
833 sysfs_remove_group(&mddev->kobj, to_remove);
834 if (mddev->pers == NULL ||
835 mddev->pers->sync_request == NULL) {
836 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
837 if (mddev->sysfs_action)
838 sysfs_put(mddev->sysfs_action);
839 if (mddev->sysfs_completed)
840 sysfs_put(mddev->sysfs_completed);
841 if (mddev->sysfs_degraded)
842 sysfs_put(mddev->sysfs_degraded);
843 mddev->sysfs_action = NULL;
844 mddev->sysfs_completed = NULL;
845 mddev->sysfs_degraded = NULL;
848 mddev->sysfs_active = 0;
850 mutex_unlock(&mddev->reconfig_mutex);
856 md_wakeup_thread(mddev->thread);
857 wake_up(&mddev->sb_wait);
862 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
866 rdev_for_each_rcu(rdev, mddev)
874 static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
878 rdev_for_each(rdev, mddev)
885 struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
889 rdev_for_each_rcu(rdev, mddev)
944 struct mddev *mddev = rdev->mddev;
949 md_error(mddev, rdev);
952 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
960 rdev_dec_pending(rdev, mddev);
962 if (atomic_dec_and_test(&mddev->pending_writes))
963 wake_up(&mddev->sb_wait);
966 void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
970 * Increment mddev->pending_writes before returning
984 bio = md_bio_alloc_sync(mddev);
994 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
1000 atomic_inc(&mddev->pending_writes);
1004 int md_super_wait(struct mddev *mddev)
1007 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
1008 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
1016 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
1026 else if (rdev->mddev->reshape_position != MaxSector &&
1027 (rdev->mddev->reshape_backwards ==
1028 (sector >= rdev->mddev->reshape_position)))
1151 * int validate_super(struct mddev *mddev, struct md_rdev *dev)
1152 * Verify that dev is acceptable into mddev.
1153 * The first time, mddev->raid_disks will be 0, and data from
1157 * void sync_super(struct mddev *mddev, struct md_rdev *dev)
1158 * Update the superblock for rdev with data in mddev
1169 int (*validate_super)(struct mddev *mddev,
1172 void (*sync_super)(struct mddev *mddev,
1181 * Check that the given mddev has no bitmap.
1184 * support bitmaps. It prints an error message and returns non-zero if mddev
1188 int md_check_no_bitmap(struct mddev *mddev)
1190 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1193 mdname(mddev), mddev->pers->name);
1311 static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
1323 if (mddev->raid_disks == 0) {
1324 mddev->major_version = 0;
1325 mddev->minor_version = sb->minor_version;
1326 mddev->patch_version = sb->patch_version;
1327 mddev->external = 0;
1328 mddev->chunk_sectors = sb->chunk_size >> 9;
1329 mddev->ctime = sb->ctime;
1330 mddev->utime = sb->utime;
1331 mddev->level = sb->level;
1332 mddev->clevel[0] = 0;
1333 mddev->layout = sb->layout;
1334 mddev->raid_disks = sb->raid_disks;
1335 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1336 mddev->events = ev1;
1337 mddev->bitmap_info.offset = 0;
1338 mddev->bitmap_info.space = 0;
1340 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1341 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1342 mddev->reshape_backwards = 0;
1344 if (mddev->minor_version >= 91) {
1345 mddev->reshape_position = sb->reshape_position;
1346 mddev->delta_disks = sb->delta_disks;
1347 mddev->new_level = sb->new_level;
1348 mddev->new_layout = sb->new_layout;
1349 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1350 if (mddev->delta_disks < 0)
1351 mddev->reshape_backwards = 1;
1353 mddev->reshape_position = MaxSector;
1354 mddev->delta_disks = 0;
1355 mddev->new_level = mddev->level;
1356 mddev->new_layout = mddev->layout;
1357 mddev->new_chunk_sectors = mddev->chunk_sectors;
1359 if (mddev->level == 0)
1360 mddev->layout = -1;
1363 mddev->recovery_cp = MaxSector;
1367 mddev->recovery_cp = sb->recovery_cp;
1369 mddev->recovery_cp = 0;
1372 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1373 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1374 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1375 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1377 mddev->max_disks = MD_SB_DISKS;
1380 mddev->bitmap_info.file == NULL) {
1381 mddev->bitmap_info.offset =
1382 mddev->bitmap_info.default_offset;
1383 mddev->bitmap_info.space =
1384 mddev->bitmap_info.default_space;
1387 } else if (mddev->pers == NULL) {
1393 if (ev1 < mddev->events)
1395 } else if (mddev->bitmap) {
1399 if (ev1 < mddev->bitmap->events_cleared)
1401 if (ev1 < mddev->events)
1404 if (ev1 < mddev->events)
1409 if (mddev->level != LEVEL_MULTIPATH) {
1415 desc->raid_disk < mddev->raid_disks */) {
1423 if (mddev->minor_version >= 91) {
1440 static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1444 int next_spare = mddev->raid_disks;
1446 /* make rdev->sb match mddev data..
1466 sb->major_version = mddev->major_version;
1467 sb->patch_version = mddev->patch_version;
1469 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1470 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1471 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1472 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1474 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1475 sb->level = mddev->level;
1476 sb->size = mddev->dev_sectors / 2;
1477 sb->raid_disks = mddev->raid_disks;
1478 sb->md_minor = mddev->md_minor;
1480 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1482 sb->events_hi = (mddev->events>>32);
1483 sb->events_lo = (u32)mddev->events;
1485 if (mddev->reshape_position == MaxSector)
1489 sb->reshape_position = mddev->reshape_position;
1490 sb->new_level = mddev->new_level;
1491 sb->delta_disks = mddev->delta_disks;
1492 sb->new_layout = mddev->new_layout;
1493 sb->new_chunk = mddev->new_chunk_sectors << 9;
1495 mddev->minor_version = sb->minor_version;
1496 if (mddev->in_sync)
1498 sb->recovery_cp = mddev->recovery_cp;
1499 sb->cp_events_hi = (mddev->events>>32);
1500 sb->cp_events_lo = (u32)mddev->events;
1501 if (mddev->recovery_cp == MaxSector)
1506 sb->layout = mddev->layout;
1507 sb->chunk_size = mddev->chunk_sectors << 9;
1509 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1513 rdev_for_each(rdev2, mddev) {
1561 for (i=0 ; i < mddev->raid_disks ; i++) {
1587 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1589 if (rdev->mddev->bitmap_info.offset)
1597 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1600 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1602 } while (md_super_wait(rdev->mddev) < 0);
1826 static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
1837 if (mddev->raid_disks == 0) {
1838 mddev->major_version = 1;
1839 mddev->patch_version = 0;
1840 mddev->external = 0;
1841 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1842 mddev->ctime = le64_to_cpu(sb->ctime);
1843 mddev->utime = le64_to_cpu(sb->utime);
1844 mddev->level = le32_to_cpu(sb->level);
1845 mddev->clevel[0] = 0;
1846 mddev->layout = le32_to_cpu(sb->layout);
1847 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1848 mddev->dev_sectors = le64_to_cpu(sb->size);
1849 mddev->events = ev1;
1850 mddev->bitmap_info.offset = 0;
1851 mddev->bitmap_info.space = 0;
1855 mddev->bitmap_info.default_offset = 1024 >> 9;
1856 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1857 mddev->reshape_backwards = 0;
1859 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1860 memcpy(mddev->uuid, sb->set_uuid, 16);
1862 mddev->max_disks = (4096-256)/2;
1865 mddev->bitmap_info.file == NULL) {
1866 mddev->bitmap_info.offset =
1873 if (mddev->minor_version > 0)
1874 mddev->bitmap_info.space = 0;
1875 else if (mddev->bitmap_info.offset > 0)
1876 mddev->bitmap_info.space =
1877 8 - mddev->bitmap_info.offset;
1879 mddev->bitmap_info.space =
1880 -mddev->bitmap_info.offset;
1884 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1885 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1886 mddev->new_level = le32_to_cpu(sb->new_level);
1887 mddev->new_layout = le32_to_cpu(sb->new_layout);
1888 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1889 if (mddev->delta_disks < 0 ||
1890 (mddev->delta_disks == 0 &&
1893 mddev->reshape_backwards = 1;
1895 mddev->reshape_position = MaxSector;
1896 mddev->delta_disks = 0;
1897 mddev->new_level = mddev->level;
1898 mddev->new_layout = mddev->layout;
1899 mddev->new_chunk_sectors = mddev->chunk_sectors;
1902 if (mddev->level == 0 &&
1904 mddev->layout = -1;
1907 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1918 set_bit(MD_HAS_PPL, &mddev->flags);
1920 } else if (mddev->pers == NULL) {
1930 if (ev1 + 1 < mddev->events)
1932 } else if (mddev->bitmap) {
1936 if (ev1 < mddev->bitmap->events_cleared)
1938 if (ev1 < mddev->events)
1941 if (ev1 < mddev->events)
1945 if (mddev->level != LEVEL_MULTIPATH) {
1951 } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) {
1972 mdname(mddev), rdev->bdev, rdev->desc_nr,
1979 mdname(mddev), rdev->bdev, role, role, freshest->bdev);
2013 &mddev->recovery))
2031 static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
2036 /* make rdev->sb match mddev and rdev data. */
2045 sb->utime = cpu_to_le64((__u64)mddev->utime);
2046 sb->events = cpu_to_le64(mddev->events);
2047 if (mddev->in_sync)
2048 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
2049 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
2056 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2057 sb->size = cpu_to_le64(mddev->dev_sectors);
2058 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2059 sb->level = cpu_to_le32(mddev->level);
2060 sb->layout = cpu_to_le32(mddev->layout);
2073 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2074 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2084 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2095 if (mddev->reshape_position != MaxSector) {
2097 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2098 sb->new_layout = cpu_to_le32(mddev->new_layout);
2099 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2100 sb->new_level = cpu_to_le32(mddev->new_level);
2101 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2102 if (mddev->delta_disks == 0 &&
2103 mddev->reshape_backwards)
2114 if (mddev_is_clustered(mddev))
2121 md_error(mddev, rdev);
2152 rdev_for_each(rdev2, mddev)
2169 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2172 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2173 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2182 rdev_for_each(rdev2, mddev) {
2222 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2232 } else if (rdev->mddev->bitmap_info.offset) {
2260 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2262 } while (md_super_wait(rdev->mddev) < 0);
2278 if (rdev->mddev->minor_version == 0)
2289 bitmap = rdev->mddev->bitmap;
2290 if (bitmap && !rdev->mddev->bitmap_info.file &&
2291 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2321 static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2323 if (mddev->sync_super) {
2324 mddev->sync_super(mddev, rdev);
2328 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2330 super_types[mddev->major_version].sync_super(mddev, rdev);
2333 static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2361 * Try to register data integrity profile for an mddev
2367 int md_integrity_register(struct mddev *mddev)
2371 if (list_empty(&mddev->disks))
2373 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2375 rdev_for_each(rdev, mddev) {
2397 blk_integrity_register(mddev->gendisk,
2400 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2401 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2403 mdname(mddev));
2414 int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2419 if (!mddev->gendisk)
2422 bi_mddev = blk_get_integrity(mddev->gendisk);
2427 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2429 mdname(mddev), bdevname(rdev->bdev, name));
2437 static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2444 if (find_rdev(mddev, rdev->bdev->bd_dev))
2448 mddev->pers)
2451 /* make sure rdev->sectors exceeds mddev->dev_sectors */
2454 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2455 if (mddev->pers) {
2457 * If mddev->level <= 0, then we don't care
2460 if (mddev->level > 0)
2463 mddev->dev_sectors = rdev->sectors;
2473 if (mddev->pers)
2474 choice = mddev->raid_disks;
2475 while (md_find_rdev_nr_rcu(mddev, choice))
2479 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2486 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2488 mdname(mddev), mddev->max_disks);
2494 rdev->mddev = mddev;
2497 if (mddev->raid_disks)
2498 mddev_create_serial_pool(mddev, rdev, false);
2500 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2512 list_add_rcu(&rdev->same_set, &mddev->disks);
2513 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2516 mddev->recovery_disabled++;
2522 b, mdname(mddev));
2523 mddev_destroy_serial_pool(mddev, rdev, false);
2538 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2541 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2542 rdev->mddev = NULL;
2612 static void export_array(struct mddev *mddev)
2616 while (!list_empty(&mddev->disks)) {
2617 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2621 mddev->raid_disks = 0;
2622 mddev->major_version = 0;
2625 static bool set_in_sync(struct mddev *mddev)
2627 lockdep_assert_held(&mddev->lock);
2628 if (!mddev->in_sync) {
2629 mddev->sync_checkers++;
2630 spin_unlock(&mddev->lock);
2631 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2632 spin_lock(&mddev->lock);
2633 if (!mddev->in_sync &&
2634 percpu_ref_is_zero(&mddev->writes_pending)) {
2635 mddev->in_sync = 1;
2641 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2642 sysfs_notify_dirent_safe(mddev->sysfs_state);
2644 if (--mddev->sync_checkers == 0)
2645 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2647 if (mddev->safemode == 1)
2648 mddev->safemode = 0;
2649 return mddev->in_sync;
2652 static void sync_sbs(struct mddev *mddev, int nospares)
2661 rdev_for_each(rdev, mddev) {
2662 if (rdev->sb_events == mddev->events ||
2665 rdev->sb_events+1 == mddev->events)) {
2669 sync_super(mddev, rdev);
2675 static bool does_sb_need_changing(struct mddev *mddev)
2682 rdev_for_each(iter, mddev)
2694 rdev_for_each(rdev, mddev) {
2705 /* Check if any mddev parameters have changed */
2706 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2707 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2708 (mddev->layout != le32_to_cpu(sb->layout)) ||
2709 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2710 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2716 void md_update_sb(struct mddev *mddev, int force_change)
2724 if (mddev->ro) {
2726 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2731 if (mddev_is_clustered(mddev)) {
2732 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2734 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2736 ret = md_cluster_ops->metadata_update_start(mddev);
2738 if (!does_sb_need_changing(mddev)) {
2740 md_cluster_ops->metadata_update_cancel(mddev);
2741 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2754 rdev_for_each(rdev, mddev) {
2756 mddev->delta_disks >= 0 &&
2757 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2758 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2759 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2762 mddev->curr_resync_completed > rdev->recovery_offset)
2763 rdev->recovery_offset = mddev->curr_resync_completed;
2766 if (!mddev->persistent) {
2767 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2768 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2769 if (!mddev->external) {
2770 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2771 rdev_for_each(rdev, mddev) {
2775 md_error(mddev, rdev);
2782 wake_up(&mddev->sb_wait);
2786 spin_lock(&mddev->lock);
2788 mddev->utime = ktime_get_real_seconds();
2790 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2792 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2800 if (mddev->degraded)
2812 sync_req = mddev->in_sync;
2817 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2818 && mddev->can_decrease_events
2819 && mddev->events != 1) {
2820 mddev->events--;
2821 mddev->can_decrease_events = 0;
2824 mddev->events ++;
2825 mddev->can_decrease_events = nospares;
2833 WARN_ON(mddev->events == 0);
2835 rdev_for_each(rdev, mddev) {
2842 sync_sbs(mddev, nospares);
2843 spin_unlock(&mddev->lock);
2846 mdname(mddev), mddev->in_sync);
2848 if (mddev->queue)
2849 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2851 md_bitmap_update_sb(mddev->bitmap);
2852 rdev_for_each(rdev, mddev) {
2859 md_super_write(mddev,rdev,
2865 rdev->sb_events = mddev->events;
2867 md_super_write(mddev, rdev,
2878 if (mddev->level == LEVEL_MULTIPATH)
2882 if (md_super_wait(mddev) < 0)
2886 if (mddev_is_clustered(mddev) && ret == 0)
2887 md_cluster_ops->metadata_update_finish(mddev);
2889 if (mddev->in_sync != sync_req ||
2890 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2894 wake_up(&mddev->sb_wait);
2895 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2896 sysfs_notify_dirent_safe(mddev->sysfs_completed);
2898 rdev_for_each(rdev, mddev) {
2912 struct mddev *mddev = rdev->mddev;
2916 if (!mddev->pers->hot_remove_disk || add_journal) {
2921 super_types[mddev->major_version].
2922 validate_super(mddev, NULL/*freshest*/, rdev);
2924 mddev_suspend(mddev);
2925 err = mddev->pers->hot_add_disk(mddev, rdev);
2927 mddev_resume(mddev);
2935 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2936 if (mddev->degraded)
2937 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2938 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2939 md_new_event(mddev);
2940 md_wakeup_thread(mddev->thread);
3030 struct mddev *mddev = rdev->mddev;
3034 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
3035 md_error(rdev->mddev, rdev);
3041 if (rdev->mddev->pers) {
3043 remove_and_add_spares(rdev->mddev, rdev);
3049 if (mddev_is_clustered(mddev))
3050 err = md_cluster_ops->remove_disk(mddev, rdev);
3054 if (mddev->pers) {
3055 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3056 md_wakeup_thread(mddev->thread);
3058 md_new_event(mddev);
3063 mddev_create_serial_pool(rdev->mddev, rdev, false);
3067 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
3081 md_error(rdev->mddev, rdev);
3086 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3087 md_wakeup_thread(rdev->mddev->thread);
3103 if (rdev->mddev->pers == NULL) {
3124 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3125 md_wakeup_thread(rdev->mddev->thread);
3138 if (rdev->mddev->pers)
3146 if (rdev->mddev->pers)
3153 if (!rdev->mddev->pers)
3163 if (!mddev_is_clustered(rdev->mddev) ||
3170 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3174 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3179 md_update_sb(mddev, 1);
3237 if (rdev->mddev->pers && slot == -1) {
3248 if (rdev->mddev->pers->hot_remove_disk == NULL)
3251 remove_and_add_spares(rdev->mddev, rdev);
3254 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3255 md_wakeup_thread(rdev->mddev->thread);
3256 } else if (rdev->mddev->pers) {
3265 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3268 if (rdev->mddev->pers->hot_add_disk == NULL)
3271 if (slot >= rdev->mddev->raid_disks &&
3272 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3282 err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
3289 sysfs_link_rdev(rdev->mddev, rdev);
3292 if (slot >= rdev->mddev->raid_disks &&
3293 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3320 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3322 if (rdev->sectors && rdev->mddev->external)
3344 struct mddev *mddev = rdev->mddev;
3349 if (mddev->sync_thread ||
3350 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3358 + mddev->dev_sectors > rdev->sectors)
3367 mddev->reshape_backwards)
3374 !mddev->reshape_backwards)
3377 if (mddev->pers && mddev->persistent &&
3378 !super_types[mddev->major_version]
3383 mddev->reshape_backwards = 1;
3385 mddev->reshape_backwards = 0;
3430 struct mddev *my_mddev = rdev->mddev;
3464 struct mddev *mddev;
3469 for_each_mddev(mddev, tmp) {
3472 rdev_for_each(rdev2, mddev)
3482 mddev_put(mddev);
3524 if (rdev->mddev->pers &&
3592 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3596 if (rdev->mddev->persistent) {
3597 if (rdev->mddev->major_version == 0)
3605 } else if (!rdev->mddev->external) {
3629 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3633 if (rdev->mddev->persistent) {
3634 if (rdev->mddev->major_version == 0)
3638 } else if (!rdev->mddev->external) {
3670 if (!rdev->mddev)
3682 struct mddev *mddev = rdev->mddev;
3688 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3690 if (rdev->mddev == NULL)
3694 mddev_unlock(mddev);
3812 static int analyze_sbs(struct mddev *mddev)
3819 rdev_for_each_safe(rdev, tmp, mddev)
3820 switch (super_types[mddev->major_version].
3821 load_super(rdev, freshest, mddev->minor_version)) {
3839 super_types[mddev->major_version].
3840 validate_super(mddev, NULL/*freshest*/, freshest);
3843 rdev_for_each_safe(rdev, tmp, mddev) {
3844 if (mddev->max_disks &&
3845 (rdev->desc_nr >= mddev->max_disks ||
3846 i > mddev->max_disks)) {
3848 mdname(mddev), bdevname(rdev->bdev, b),
3849 mddev->max_disks);
3854 if (super_types[mddev->major_version].
3855 validate_super(mddev, freshest, rdev)) {
3862 if (mddev->level == LEVEL_MULTIPATH) {
3867 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3914 safe_delay_show(struct mddev *mddev, char *page)
3916 unsigned int msec = ((unsigned long)mddev->safemode_delay*1000)/HZ;
3921 safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3925 if (mddev_is_clustered(mddev)) {
3933 mddev->safemode_delay = 0;
3935 unsigned long old_delay = mddev->safemode_delay;
3940 mddev->safemode_delay = new_delay;
3942 mod_timer(&mddev->safemode_timer, jiffies+1);
3950 level_show(struct mddev *mddev, char *page)
3954 spin_lock(&mddev->lock);
3955 p = mddev->pers;
3958 else if (mddev->clevel[0])
3959 ret = sprintf(page, "%s\n", mddev->clevel);
3960 else if (mddev->level != LEVEL_NONE)
3961 ret = sprintf(page, "%d\n", mddev->level);
3964 spin_unlock(&mddev->lock);
3969 level_store(struct mddev *mddev, const char *buf, size_t len)
3982 rv = mddev_lock(mddev);
3986 if (mddev->pers == NULL) {
3987 strncpy(mddev->clevel, buf, slen);
3988 if (mddev->clevel[slen-1] == '\n')
3990 mddev->clevel[slen] = 0;
3991 mddev->level = LEVEL_NONE;
3996 if (mddev->ro)
4006 if (mddev->sync_thread ||
4007 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4008 mddev->reshape_position != MaxSector ||
4009 mddev->sysfs_active)
4013 if (!mddev->pers->quiesce) {
4015 mdname(mddev), mddev->pers->name);
4039 if (pers == mddev->pers) {
4048 mdname(mddev), clevel);
4053 rdev_for_each(rdev, mddev)
4059 priv = pers->takeover(mddev);
4061 mddev->new_level = mddev->level;
4062 mddev->new_layout = mddev->layout;
4063 mddev->new_chunk_sectors = mddev->chunk_sectors;
4064 mddev->raid_disks -= mddev->delta_disks;
4065 mddev->delta_disks = 0;
4066 mddev->reshape_backwards = 0;
4069 mdname(mddev), clevel);
4075 mddev_suspend(mddev);
4076 mddev_detach(mddev);
4078 spin_lock(&mddev->lock);
4079 oldpers = mddev->pers;
4080 oldpriv = mddev->private;
4081 mddev->pers = pers;
4082 mddev->private = priv;
4083 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4084 mddev->level = mddev->new_level;
4085 mddev->layout = mddev->new_layout;
4086 mddev->chunk_sectors = mddev->new_chunk_sectors;
4087 mddev->delta_disks = 0;
4088 mddev->reshape_backwards = 0;
4089 mddev->degraded = 0;
4090 spin_unlock(&mddev->lock);
4093 mddev->external) {
4101 mddev->in_sync = 0;
4102 mddev->safemode_delay = 0;
4103 mddev->safemode = 0;
4106 oldpers->free(mddev, oldpriv);
4111 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4113 mdname(mddev));
4114 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4115 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
4116 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
4121 if (mddev->to_remove == NULL)
4122 mddev->to_remove = &md_redundancy_group;
4127 rdev_for_each(rdev, mddev) {
4130 if (rdev->new_raid_disk >= mddev->raid_disks)
4134 sysfs_unlink_rdev(mddev, rdev);
4136 rdev_for_each(rdev, mddev) {
4145 if (sysfs_link_rdev(mddev, rdev))
4147 rdev->raid_disk, mdname(mddev));
4155 mddev->in_sync = 1;
4156 del_timer_sync(&mddev->safemode_timer);
4158 blk_set_stacking_limits(&mddev->queue->limits);
4159 pers->run(mddev);
4160 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4161 mddev_resume(mddev);
4162 if (!mddev->thread)
4163 md_update_sb(mddev, 1);
4164 sysfs_notify_dirent_safe(mddev->sysfs_level);
4165 md_new_event(mddev);
4168 mddev_unlock(mddev);
4176 layout_show(struct mddev *mddev, char *page)
4179 if (mddev->reshape_position != MaxSector &&
4180 mddev->layout != mddev->new_layout)
4182 mddev->new_layout, mddev->layout);
4183 return sprintf(page, "%d\n", mddev->layout);
4187 layout_store(struct mddev *mddev, const char *buf, size_t len)
4195 err = mddev_lock(mddev);
4199 if (mddev->pers) {
4200 if (mddev->pers->check_reshape == NULL)
4202 else if (mddev->ro)
4205 mddev->new_layout = n;
4206 err = mddev->pers->check_reshape(mddev);
4208 mddev->new_layout = mddev->layout;
4211 mddev->new_layout = n;
4212 if (mddev->reshape_position == MaxSector)
4213 mddev->layout = n;
4215 mddev_unlock(mddev);
4222 raid_disks_show(struct mddev *mddev, char *page)
4224 if (mddev->raid_disks == 0)
4226 if (mddev->reshape_position != MaxSector &&
4227 mddev->delta_disks != 0)
4228 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4229 mddev->raid_disks - mddev->delta_disks);
4230 return sprintf(page, "%d\n", mddev->raid_disks);
4233 static int update_raid_disks(struct mddev *mddev, int raid_disks);
4236 raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
4245 err = mddev_lock(mddev);
4248 if (mddev->pers)
4249 err = update_raid_disks(mddev, n);
4250 else if (mddev->reshape_position != MaxSector) {
4252 int olddisks = mddev->raid_disks - mddev->delta_disks;
4255 rdev_for_each(rdev, mddev) {
4264 mddev->delta_disks = n - olddisks;
4265 mddev->raid_disks = n;
4266 mddev->reshape_backwards = (mddev->delta_disks < 0);
4268 mddev->raid_disks = n;
4270 mddev_unlock(mddev);
4277 uuid_show(struct mddev *mddev, char *page)
4279 return sprintf(page, "%pU\n", mddev->uuid);
4285 chunk_size_show(struct mddev *mddev, char *page)
4287 if (mddev->reshape_position != MaxSector &&
4288 mddev->chunk_sectors != mddev->new_chunk_sectors)
4290 mddev->new_chunk_sectors << 9,
4291 mddev->chunk_sectors << 9);
4292 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4296 chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4305 err = mddev_lock(mddev);
4308 if (mddev->pers) {
4309 if (mddev->pers->check_reshape == NULL)
4311 else if (mddev->ro)
4314 mddev->new_chunk_sectors = n >> 9;
4315 err = mddev->pers->check_reshape(mddev);
4317 mddev->new_chunk_sectors = mddev->chunk_sectors;
4320 mddev->new_chunk_sectors = n >> 9;
4321 if (mddev->reshape_position == MaxSector)
4322 mddev->chunk_sectors = n >> 9;
4324 mddev_unlock(mddev);
4331 resync_start_show(struct mddev *mddev, char *page)
4333 if (mddev->recovery_cp == MaxSector)
4335 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4339 resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4354 err = mddev_lock(mddev);
4357 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4361 mddev->recovery_cp = n;
4362 if (mddev->pers)
4363 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4365 mddev_unlock(mddev);
4429 array_state_show(struct mddev *mddev, char *page)
4433 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4434 switch(mddev->ro) {
4442 spin_lock(&mddev->lock);
4443 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4445 else if (mddev->in_sync)
4447 else if (mddev->safemode)
4451 spin_unlock(&mddev->lock);
4454 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4457 if (list_empty(&mddev->disks) &&
4458 mddev->raid_disks == 0 &&
4459 mddev->dev_sectors == 0)
4467 static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4468 static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4469 static int restart_array(struct mddev *mddev);
4472 array_state_store(struct mddev *mddev, const char *buf, size_t len)
4477 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4481 spin_lock(&mddev->lock);
4483 restart_array(mddev);
4484 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4485 md_wakeup_thread(mddev->thread);
4486 wake_up(&mddev->sb_wait);
4488 restart_array(mddev);
4489 if (!set_in_sync(mddev))
4493 sysfs_notify_dirent_safe(mddev->sysfs_state);
4494 spin_unlock(&mddev->lock);
4497 err = mddev_lock(mddev);
4506 err = do_md_stop(mddev, 0, NULL);
4510 if (mddev->pers)
4511 err = do_md_stop(mddev, 2, NULL);
4518 if (mddev->pers)
4519 err = md_set_readonly(mddev, NULL);
4521 mddev->ro = 1;
4522 set_disk_ro(mddev->gendisk, 1);
4523 err = do_md_run(mddev);
4527 if (mddev->pers) {
4528 if (mddev->ro == 0)
4529 err = md_set_readonly(mddev, NULL);
4530 else if (mddev->ro == 1)
4531 err = restart_array(mddev);
4533 mddev->ro = 2;
4534 set_disk_ro(mddev->gendisk, 0);
4537 mddev->ro = 2;
4538 err = do_md_run(mddev);
4542 if (mddev->pers) {
4543 err = restart_array(mddev);
4546 spin_lock(&mddev->lock);
4547 if (!set_in_sync(mddev))
4549 spin_unlock(&mddev->lock);
4554 if (mddev->pers) {
4555 err = restart_array(mddev);
4558 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4559 wake_up(&mddev->sb_wait);
4562 mddev->ro = 0;
4563 set_disk_ro(mddev->gendisk, 0);
4564 err = do_md_run(mddev);
4575 if (mddev->hold_active == UNTIL_IOCTL)
4576 mddev->hold_active = 0;
4577 sysfs_notify_dirent_safe(mddev->sysfs_state);
4579 mddev_unlock(mddev);
4586 max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4588 atomic_read(&mddev->max_corr_read_errors));
4592 max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4602 atomic_set(&mddev->max_corr_read_errors, n);
4611 null_show(struct mddev *mddev, char *page)
4617 static void flush_rdev_wq(struct mddev *mddev)
4622 rdev_for_each_rcu(rdev, mddev)
4631 new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4657 flush_rdev_wq(mddev);
4658 err = mddev_lock(mddev);
4661 if (mddev->persistent) {
4662 rdev = md_import_device(dev, mddev->major_version,
4663 mddev->minor_version);
4664 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4666 = list_entry(mddev->disks.next,
4668 err = super_types[mddev->major_version]
4669 .load_super(rdev, rdev0, mddev->minor_version);
4673 } else if (mddev->external)
4679 mddev_unlock(mddev);
4682 err = bind_rdev_to_array(rdev, mddev);
4686 mddev_unlock(mddev);
4688 md_new_event(mddev);
4696 bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4702 err = mddev_lock(mddev);
4705 if (!mddev->bitmap)
4717 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4720 md_bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
4722 mddev_unlock(mddev);
4730 size_show(struct mddev *mddev, char *page)
4733 (unsigned long long)mddev->dev_sectors / 2);
4736 static int update_size(struct mddev *mddev, sector_t num_sectors);
4739 size_store(struct mddev *mddev, const char *buf, size_t len)
4750 err = mddev_lock(mddev);
4753 if (mddev->pers) {
4754 err = update_size(mddev, sectors);
4756 md_update_sb(mddev, 1);
4758 if (mddev->dev_sectors == 0 ||
4759 mddev->dev_sectors > sectors)
4760 mddev->dev_sectors = sectors;
4764 mddev_unlock(mddev);
4778 metadata_show(struct mddev *mddev, char *page)
4780 if (mddev->persistent)
4782 mddev->major_version, mddev->minor_version);
4783 else if (mddev->external)
4784 return sprintf(page, "external:%s\n", mddev->metadata_type);
4790 metadata_store(struct mddev *mddev, const char *buf, size_t len)
4800 err = mddev_lock(mddev);
4804 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4806 else if (!list_empty(&mddev->disks))
4811 mddev->persistent = 0;
4812 mddev->external = 0;
4813 mddev->major_version = 0;
4814 mddev->minor_version = 90;
4819 if (namelen >= sizeof(mddev->metadata_type))
4820 namelen = sizeof(mddev->metadata_type)-1;
4821 strncpy(mddev->metadata_type, buf+9, namelen);
4822 mddev->metadata_type[namelen] = 0;
4823 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4824 mddev->metadata_type[--namelen] = 0;
4825 mddev->persistent = 0;
4826 mddev->external = 1;
4827 mddev->major_version = 0;
4828 mddev->minor_version = 90;
4842 mddev->major_version = major;
4843 mddev->minor_version = minor;
4844 mddev->persistent = 1;
4845 mddev->external = 0;
4848 mddev_unlock(mddev);
4856 action_show(struct mddev *mddev, char *page)
4859 unsigned long recovery = mddev->recovery;
4863 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4875 else if (mddev->reshape_position != MaxSector)
4882 action_store(struct mddev *mddev, const char *page, size_t len)
4884 if (!mddev->pers || !mddev->pers->sync_request)
4890 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4892 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4893 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4894 mddev_lock(mddev) == 0) {
4895 if (work_pending(&mddev->del_work))
4897 if (mddev->sync_thread) {
4898 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4899 md_reap_sync_thread(mddev);
4901 mddev_unlock(mddev);
4903 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4906 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4908 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4909 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4912 if (mddev->pers->start_reshape == NULL)
4914 err = mddev_lock(mddev);
4916 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
4918 } else if (mddev->reshape_position == MaxSector ||
4919 mddev->pers->check_reshape == NULL ||
4920 mddev->pers->check_reshape(mddev)) {
4921 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4922 err = mddev->pers->start_reshape(mddev);
4930 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4932 mddev_unlock(mddev);
4936 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
4939 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4942 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4943 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4944 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4946 if (mddev->ro == 2) {
4950 mddev->ro = 0;
4951 md_wakeup_thread(mddev->sync_thread);
4953 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4954 md_wakeup_thread(mddev->thread);
4955 sysfs_notify_dirent_safe(mddev->sysfs_action);
4963 last_sync_action_show(struct mddev *mddev, char *page)
4965 return sprintf(page, "%s\n", mddev->last_sync_action);
4971 mismatch_cnt_show(struct mddev *mddev, char *page)
4975 atomic64_read(&mddev->resync_mismatches));
4981 sync_min_show(struct mddev *mddev, char *page)
4983 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4984 mddev->sync_speed_min ? "local": "system");
4988 sync_min_store(struct mddev *mddev, const char *buf, size_t len)
5002 mddev->sync_speed_min = min;
5010 sync_max_show(struct mddev *mddev, char *page)
5012 return sprintf(page, "%d (%s)\n", speed_max(mddev),
5013 mddev->sync_speed_max ? "local": "system");
5017 sync_max_store(struct mddev *mddev, const char *buf, size_t len)
5031 mddev->sync_speed_max = max;
5039 degraded_show(struct mddev *mddev, char *page)
5041 return sprintf(page, "%d\n", mddev->degraded);
5046 sync_force_parallel_show(struct mddev *mddev, char *page)
5048 return sprintf(page, "%d\n", mddev->parallel_resync);
5052 sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
5062 mddev->parallel_resync = n;
5064 if (mddev->sync_thread)
5076 sync_speed_show(struct mddev *mddev, char *page)
5079 if (mddev->curr_resync == 0)
5081 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
5082 dt = (jiffies - mddev->resync_mark) / HZ;
5084 db = resync - mddev->resync_mark_cnt;
5091 sync_completed_show(struct mddev *mddev, char *page)
5095 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5098 if (mddev->curr_resync == 1 ||
5099 mddev->curr_resync == 2)
5102 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
5103 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5104 max_sectors = mddev->resync_max_sectors;
5106 max_sectors = mddev->dev_sectors;
5108 resync = mddev->curr_resync_completed;
5116 min_sync_show(struct mddev *mddev, char *page)
5119 (unsigned long long)mddev->resync_min);
5122 min_sync_store(struct mddev *mddev, const char *buf, size_t len)
5130 spin_lock(&mddev->lock);
5132 if (min > mddev->resync_max)
5136 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5140 mddev->resync_min = round_down(min, 8);
5144 spin_unlock(&mddev->lock);
5152 max_sync_show(struct mddev *mddev, char *page)
5154 if (mddev->resync_max == MaxSector)
5158 (unsigned long long)mddev->resync_max);
5161 max_sync_store(struct mddev *mddev, const char *buf, size_t len)
5164 spin_lock(&mddev->lock);
5166 mddev->resync_max = MaxSector;
5174 if (max < mddev->resync_min)
5178 if (max < mddev->resync_max &&
5179 mddev->ro == 0 &&
5180 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5184 chunk = mddev->chunk_sectors;
5192 mddev->resync_max = max;
5194 wake_up(&mddev->recovery_wait);
5197 spin_unlock(&mddev->lock);
5205 suspend_lo_show(struct mddev *mddev, char *page)
5207 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5211 suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
5222 err = mddev_lock(mddev);
5226 if (mddev->pers == NULL ||
5227 mddev->pers->quiesce == NULL)
5229 mddev_suspend(mddev);
5230 mddev->suspend_lo = new;
5231 mddev_resume(mddev);
5235 mddev_unlock(mddev);
5242 suspend_hi_show(struct mddev *mddev, char *page)
5244 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5248 suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
5259 err = mddev_lock(mddev);
5263 if (mddev->pers == NULL)
5266 mddev_suspend(mddev);
5267 mddev->suspend_hi = new;
5268 mddev_resume(mddev);
5272 mddev_unlock(mddev);
5279 reshape_position_show(struct mddev *mddev, char *page)
5281 if (mddev->reshape_position != MaxSector)
5283 (unsigned long long)mddev->reshape_position);
5289 reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5300 err = mddev_lock(mddev);
5304 if (mddev->pers)
5306 mddev->reshape_position = new;
5307 mddev->delta_disks = 0;
5308 mddev->reshape_backwards = 0;
5309 mddev->new_level = mddev->level;
5310 mddev->new_layout = mddev->layout;
5311 mddev->new_chunk_sectors = mddev->chunk_sectors;
5312 rdev_for_each(rdev, mddev)
5316 mddev_unlock(mddev);
5325 reshape_direction_show(struct mddev *mddev, char *page)
5328 mddev->reshape_backwards ? "backwards" : "forwards");
5332 reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5343 if (mddev->reshape_backwards == backwards)
5346 err = mddev_lock(mddev);
5350 if (mddev->delta_disks)
5352 else if (mddev->persistent &&
5353 mddev->major_version == 0)
5356 mddev->reshape_backwards = backwards;
5357 mddev_unlock(mddev);
5366 array_size_show(struct mddev *mddev, char *page)
5368 if (mddev->external_size)
5370 (unsigned long long)mddev->array_sectors/2);
5376 array_size_store(struct mddev *mddev, const char *buf, size_t len)
5381 err = mddev_lock(mddev);
5386 if (mddev_is_clustered(mddev)) {
5387 mddev_unlock(mddev);
5392 if (mddev->pers)
5393 sectors = mddev->pers->size(mddev, 0, 0);
5395 sectors = mddev->array_sectors;
5397 mddev->external_size = 0;
5401 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5404 mddev->external_size = 1;
5408 mddev->array_sectors = sectors;
5409 if (mddev->pers) {
5410 set_capacity(mddev->gendisk, mddev->array_sectors);
5411 revalidate_disk_size(mddev->gendisk, true);
5414 mddev_unlock(mddev);
5423 consistency_policy_show(struct mddev *mddev, char *page)
5427 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5429 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5431 } else if (mddev->bitmap) {
5433 } else if (mddev->pers) {
5434 if (mddev->pers->sync_request)
5446 consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5450 if (mddev->pers) {
5451 if (mddev->pers->change_consistency_policy)
5452 err = mddev->pers->change_consistency_policy(mddev, buf);
5455 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5456 set_bit(MD_HAS_PPL, &mddev->flags);
5468 static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
5470 return sprintf(page, "%d\n", mddev->fail_last_dev);
5478 fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
5487 if (value != mddev->fail_last_dev)
5488 mddev->fail_last_dev = value;
5496 static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
5498 if (mddev->pers == NULL || (mddev->pers->level != 1))
5501 return sprintf(page, "%d\n", mddev->serialize_policy);
5509 serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
5518 if (value == mddev->serialize_policy)
5521 err = mddev_lock(mddev);
5524 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5530 mddev_suspend(mddev);
5532 mddev_create_serial_pool(mddev, NULL, true);
5534 mddev_destroy_serial_pool(mddev, NULL, true);
5535 mddev->serialize_policy = value;
5536 mddev_resume(mddev);
5538 mddev_unlock(mddev);
5595 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5601 if (list_empty(&mddev->all_mddevs)) {
5605 mddev_get(mddev);
5608 rv = entry->show(mddev, page);
5609 mddev_put(mddev);
5618 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5626 if (list_empty(&mddev->all_mddevs)) {
5630 mddev_get(mddev);
5632 rv = entry->store(mddev, page, length);
5633 mddev_put(mddev);
5639 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5641 if (mddev->sysfs_state)
5642 sysfs_put(mddev->sysfs_state);
5643 if (mddev->sysfs_level)
5644 sysfs_put(mddev->sysfs_level);
5646 if (mddev->gendisk)
5647 del_gendisk(mddev->gendisk);
5648 if (mddev->queue)
5649 blk_cleanup_queue(mddev->queue);
5650 if (mddev->gendisk)
5651 put_disk(mddev->gendisk);
5652 percpu_ref_exit(&mddev->writes_pending);
5654 bioset_exit(&mddev->bio_set);
5655 bioset_exit(&mddev->sync_set);
5656 kfree(mddev);
5673 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5675 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5676 kobject_del(&mddev->kobj);
5677 kobject_put(&mddev->kobj);
5682 int mddev_init_writes_pending(struct mddev *mddev)
5684 if (mddev->writes_pending.percpu_count_ptr)
5686 if (percpu_ref_init(&mddev->writes_pending, no_op,
5690 percpu_ref_put(&mddev->writes_pending);
5707 struct mddev *mddev = mddev_find_or_alloc(dev);
5714 if (!mddev)
5717 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5719 unit = MINOR(mddev->unit) >> shift;
5729 if (mddev->gendisk)
5735 struct mddev *mddev2;
5750 mddev->hold_active = UNTIL_STOP;
5753 mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
5754 if (!mddev->queue)
5757 blk_set_stacking_limits(&mddev->queue->limits);
5761 blk_cleanup_queue(mddev->queue);
5762 mddev->queue = NULL;
5765 disk->major = MAJOR(mddev->unit);
5774 disk->private_data = mddev;
5775 disk->queue = mddev->queue;
5776 blk_queue_write_cache(mddev->queue, true, true);
5783 mddev->gendisk = disk;
5786 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5795 if (mddev->kobj.sd &&
5796 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5800 if (!error && mddev->kobj.sd) {
5801 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5802 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5803 mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
5805 mddev_put(mddev);
5847 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5849 mddev->safemode = 1;
5850 if (mddev->external)
5851 sysfs_notify_dirent_safe(mddev->sysfs_state);
5853 md_wakeup_thread(mddev->thread);
5858 int md_run(struct mddev *mddev)
5864 if (list_empty(&mddev->disks))
5868 if (mddev->pers)
5871 if (mddev->sysfs_active)
5877 if (!mddev->raid_disks) {
5878 if (!mddev->persistent)
5880 err = analyze_sbs(mddev);
5885 if (mddev->level != LEVEL_NONE)
5886 request_module("md-level-%d", mddev->level);
5887 else if (mddev->clevel[0])
5888 request_module("md-%s", mddev->clevel);
5895 mddev->has_superblocks = false;
5896 rdev_for_each(rdev, mddev) {
5901 if (mddev->ro != 1 &&
5904 mddev->ro = 1;
5905 if (mddev->gendisk)
5906 set_disk_ro(mddev->gendisk, 1);
5910 mddev->has_superblocks = true;
5919 if (mddev->dev_sectors &&
5920 rdev->data_offset + mddev->dev_sectors
5923 mdname(mddev));
5930 mdname(mddev));
5937 if (!bioset_initialized(&mddev->bio_set)) {
5938 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5942 if (!bioset_initialized(&mddev->sync_set)) {
5943 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5949 pers = find_pers(mddev->level, mddev->clevel);
5952 if (mddev->level != LEVEL_NONE)
5954 mddev->level);
5957 mddev->clevel);
5962 if (mddev->level != pers->level) {
5963 mddev->level = pers->level;
5964 mddev->new_level = pers->level;
5966 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5968 if (mddev->reshape_position != MaxSector &&
5984 rdev_for_each(rdev, mddev)
5985 rdev_for_each(rdev2, mddev) {
5990 mdname(mddev),
6001 mddev->recovery = 0;
6003 mddev->resync_max_sectors = mddev->dev_sectors;
6005 mddev->ok_start_degraded = start_dirty_degraded;
6007 if (start_readonly && mddev->ro == 0)
6008 mddev->ro = 2; /* read-only, but switch on first write */
6010 err = pers->run(mddev);
6013 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
6014 WARN_ONCE(!mddev->external_size,
6018 (unsigned long long)mddev->array_sectors / 2,
6019 (unsigned long long)pers->size(mddev, 0, 0) / 2);
6023 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
6026 bitmap = md_bitmap_create(mddev, -1);
6030 mdname(mddev), err);
6032 mddev->bitmap = bitmap;
6038 if (mddev->bitmap_info.max_write_behind > 0) {
6041 rdev_for_each(rdev, mddev) {
6046 if (create_pool && mddev->serial_info_pool == NULL) {
6047 mddev->serial_info_pool =
6050 if (!mddev->serial_info_pool) {
6057 if (mddev->queue) {
6060 rdev_for_each(rdev, mddev) {
6067 if (mddev->degraded)
6070 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
6072 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
6075 if (mddev->kobj.sd &&
6076 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
6078 mdname(mddev));
6079 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
6080 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
6081 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
6082 } else if (mddev->ro == 2) /* auto-readonly not meaningful */
6083 mddev->ro = 0;
6085 atomic_set(&mddev->max_corr_read_errors,
6087 mddev->safemode = 0;
6088 if (mddev_is_clustered(mddev))
6089 mddev->safemode_delay = 0;
6091 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
6092 mddev->in_sync = 1;
6094 spin_lock(&mddev->lock);
6095 mddev->pers = pers;
6096 spin_unlock(&mddev->lock);
6097 rdev_for_each(rdev, mddev)
6099 sysfs_link_rdev(mddev, rdev); /* failure here is OK */
6101 if (mddev->degraded && !mddev->ro)
6105 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6106 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6108 if (mddev->sb_flags)
6109 md_update_sb(mddev, 0);
6111 md_new_event(mddev);
6115 mddev_detach(mddev);
6116 if (mddev->private)
6117 pers->free(mddev, mddev->private);
6118 mddev->private = NULL;
6120 md_bitmap_destroy(mddev);
6122 bioset_exit(&mddev->bio_set);
6123 bioset_exit(&mddev->sync_set);
6128 int do_md_run(struct mddev *mddev)
6132 set_bit(MD_NOT_READY, &mddev->flags);
6133 err = md_run(mddev);
6136 err = md_bitmap_load(mddev);
6138 md_bitmap_destroy(mddev);
6142 if (mddev_is_clustered(mddev))
6143 md_allow_write(mddev);
6146 md_start(mddev);
6148 md_wakeup_thread(mddev->thread);
6149 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
6151 set_capacity(mddev->gendisk, mddev->array_sectors);
6152 revalidate_disk_size(mddev->gendisk, true);
6153 clear_bit(MD_NOT_READY, &mddev->flags);
6154 mddev->changed = 1;
6155 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6156 sysfs_notify_dirent_safe(mddev->sysfs_state);
6157 sysfs_notify_dirent_safe(mddev->sysfs_action);
6158 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
6160 clear_bit(MD_NOT_READY, &mddev->flags);
6164 int md_start(struct mddev *mddev)
6168 if (mddev->pers->start) {
6169 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6170 md_wakeup_thread(mddev->thread);
6171 ret = mddev->pers->start(mddev);
6172 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6173 md_wakeup_thread(mddev->sync_thread);
6179 static int restart_array(struct mddev *mddev)
6181 struct gendisk *disk = mddev->gendisk;
6187 if (list_empty(&mddev->disks))
6189 if (!mddev->pers)
6191 if (!mddev->ro)
6195 rdev_for_each_rcu(rdev, mddev) {
6203 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6209 mddev->safemode = 0;
6210 mddev->ro = 0;
6212 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6214 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6215 md_wakeup_thread(mddev->thread);
6216 md_wakeup_thread(mddev->sync_thread);
6217 sysfs_notify_dirent_safe(mddev->sysfs_state);
6221 static void md_clean(struct mddev *mddev)
6223 mddev->array_sectors = 0;
6224 mddev->external_size = 0;
6225 mddev->dev_sectors = 0;
6226 mddev->raid_disks = 0;
6227 mddev->recovery_cp = 0;
6228 mddev->resync_min = 0;
6229 mddev->resync_max = MaxSector;
6230 mddev->reshape_position = MaxSector;
6231 mddev->external = 0;
6232 mddev->persistent = 0;
6233 mddev->level = LEVEL_NONE;
6234 mddev->clevel[0] = 0;
6235 mddev->flags = 0;
6236 mddev->sb_flags = 0;
6237 mddev->ro = 0;
6238 mddev->metadata_type[0] = 0;
6239 mddev->chunk_sectors = 0;
6240 mddev->ctime = mddev->utime = 0;
6241 mddev->layout = 0;
6242 mddev->max_disks = 0;
6243 mddev->events = 0;
6244 mddev->can_decrease_events = 0;
6245 mddev->delta_disks = 0;
6246 mddev->reshape_backwards = 0;
6247 mddev->new_level = LEVEL_NONE;
6248 mddev->new_layout = 0;
6249 mddev->new_chunk_sectors = 0;
6250 mddev->curr_resync = 0;
6251 atomic64_set(&mddev->resync_mismatches, 0);
6252 mddev->suspend_lo = mddev->suspend_hi = 0;
6253 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6254 mddev->recovery = 0;
6255 mddev->in_sync = 0;
6256 mddev->changed = 0;
6257 mddev->degraded = 0;
6258 mddev->safemode = 0;
6259 mddev->private = NULL;
6260 mddev->cluster_info = NULL;
6261 mddev->bitmap_info.offset = 0;
6262 mddev->bitmap_info.default_offset = 0;
6263 mddev->bitmap_info.default_space = 0;
6264 mddev->bitmap_info.chunksize = 0;
6265 mddev->bitmap_info.daemon_sleep = 0;
6266 mddev->bitmap_info.max_write_behind = 0;
6267 mddev->bitmap_info.nodes = 0;
6270 static void __md_stop_writes(struct mddev *mddev)
6272 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6273 if (work_pending(&mddev->del_work))
6275 if (mddev->sync_thread) {
6276 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6277 md_reap_sync_thread(mddev);
6280 del_timer_sync(&mddev->safemode_timer);
6282 if (mddev->pers && mddev->pers->quiesce) {
6283 mddev->pers->quiesce(mddev, 1);
6284 mddev->pers->quiesce(mddev, 0);
6286 md_bitmap_flush(mddev);
6288 if (mddev->ro == 0 &&
6289 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6290 mddev->sb_flags)) {
6292 if (!mddev_is_clustered(mddev))
6293 mddev->in_sync = 1;
6294 md_update_sb(mddev, 1);
6297 mddev->serialize_policy = 0;
6298 mddev_destroy_serial_pool(mddev, NULL, true);
6301 void md_stop_writes(struct mddev *mddev)
6303 mddev_lock_nointr(mddev);
6304 __md_stop_writes(mddev);
6305 mddev_unlock(mddev);
6309 static void mddev_detach(struct mddev *mddev)
6311 md_bitmap_wait_behind_writes(mddev);
6312 if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
6313 mddev->pers->quiesce(mddev, 1);
6314 mddev->pers->quiesce(mddev, 0);
6316 md_unregister_thread(&mddev->thread);
6317 if (mddev->queue)
6318 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
6321 static void __md_stop(struct mddev *mddev)
6323 struct md_personality *pers = mddev->pers;
6324 md_bitmap_destroy(mddev);
6325 mddev_detach(mddev);
6327 if (mddev->event_work.func)
6329 spin_lock(&mddev->lock);
6330 mddev->pers = NULL;
6331 spin_unlock(&mddev->lock);
6332 pers->free(mddev, mddev->private);
6333 mddev->private = NULL;
6334 if (pers->sync_request && mddev->to_remove == NULL)
6335 mddev->to_remove = &md_redundancy_group;
6337 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6340 void md_stop(struct mddev *mddev)
6342 lockdep_assert_held(&mddev->reconfig_mutex);
6347 __md_stop_writes(mddev);
6348 __md_stop(mddev);
6349 bioset_exit(&mddev->bio_set);
6350 bioset_exit(&mddev->sync_set);
6355 static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
6360 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6362 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6363 md_wakeup_thread(mddev->thread);
6365 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6366 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6367 if (mddev->sync_thread)
6370 wake_up_process(mddev->sync_thread->tsk);
6372 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6374 mddev_unlock(mddev);
6376 &mddev->recovery));
6377 wait_event(mddev->sb_wait,
6378 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6379 mddev_lock_nointr(mddev);
6381 mutex_lock(&mddev->open_mutex);
6382 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6383 mddev->sync_thread ||
6384 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6385 pr_warn("md: %s still in use.\n",mdname(mddev));
6387 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6388 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6389 md_wakeup_thread(mddev->thread);
6394 if (mddev->pers) {
6395 __md_stop_writes(mddev);
6398 if (mddev->ro==1)
6400 mddev->ro = 1;
6401 set_disk_ro(mddev->gendisk, 1);
6402 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6403 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6404 md_wakeup_thread(mddev->thread);
6405 sysfs_notify_dirent_safe(mddev->sysfs_state);
6409 mutex_unlock(&mddev->open_mutex);
6417 static int do_md_stop(struct mddev *mddev, int mode,
6420 struct gendisk *disk = mddev->gendisk;
6424 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6426 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6427 md_wakeup_thread(mddev->thread);
6429 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6430 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6431 if (mddev->sync_thread)
6434 wake_up_process(mddev->sync_thread->tsk);
6436 mddev_unlock(mddev);
6437 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6439 &mddev->recovery)));
6440 mddev_lock_nointr(mddev);
6442 mutex_lock(&mddev->open_mutex);
6443 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6444 mddev->sysfs_active ||
6445 mddev->sync_thread ||
6446 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6447 pr_warn("md: %s still in use.\n",mdname(mddev));
6448 mutex_unlock(&mddev->open_mutex);
6450 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6451 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6452 md_wakeup_thread(mddev->thread);
6456 if (mddev->pers) {
6457 if (mddev->ro)
6460 __md_stop_writes(mddev);
6461 __md_stop(mddev);
6464 sysfs_notify_dirent_safe(mddev->sysfs_state);
6466 rdev_for_each(rdev, mddev)
6468 sysfs_unlink_rdev(mddev, rdev);
6471 mutex_unlock(&mddev->open_mutex);
6472 mddev->changed = 1;
6475 if (mddev->ro)
6476 mddev->ro = 0;
6478 mutex_unlock(&mddev->open_mutex);
6483 pr_info("md: %s stopped.\n", mdname(mddev));
6485 if (mddev->bitmap_info.file) {
6486 struct file *f = mddev->bitmap_info.file;
6487 spin_lock(&mddev->lock);
6488 mddev->bitmap_info.file = NULL;
6489 spin_unlock(&mddev->lock);
6492 mddev->bitmap_info.offset = 0;
6494 export_array(mddev);
6496 md_clean(mddev);
6497 if (mddev->hold_active == UNTIL_STOP)
6498 mddev->hold_active = 0;
6500 md_new_event(mddev);
6501 sysfs_notify_dirent_safe(mddev->sysfs_state);
6506 static void autorun_array(struct mddev *mddev)
6511 if (list_empty(&mddev->disks))
6516 rdev_for_each(rdev, mddev) {
6522 err = do_md_run(mddev);
6525 do_md_stop(mddev, 0, NULL);
6544 struct mddev *mddev;
6566 * mddev.
6583 mddev = mddev_find(dev);
6584 if (!mddev)
6587 if (mddev_lock(mddev))
6588 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6589 else if (mddev->raid_disks || mddev->major_version
6590 || !list_empty(&mddev->disks)) {
6592 mdname(mddev), bdevname(rdev0->bdev,b));
6593 mddev_unlock(mddev);
6595 pr_debug("md: created %s\n", mdname(mddev));
6596 mddev->persistent = 1;
6599 if (bind_rdev_to_array(rdev, mddev))
6602 autorun_array(mddev);
6603 mddev_unlock(mddev);
6612 mddev_put(mddev);
6632 static int get_array_info(struct mddev *mddev, void __user *arg)
6640 rdev_for_each_rcu(rdev, mddev) {
6657 info.major_version = mddev->major_version;
6658 info.minor_version = mddev->minor_version;
6660 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6661 info.level = mddev->level;
6662 info.size = mddev->dev_sectors / 2;
6663 if (info.size != mddev->dev_sectors / 2) /* overflow */
6666 info.raid_disks = mddev->raid_disks;
6667 info.md_minor = mddev->md_minor;
6668 info.not_persistent= !mddev->persistent;
6670 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6672 if (mddev->in_sync)
6674 if (mddev->bitmap && mddev->bitmap_info.offset)
6676 if (mddev_is_clustered(mddev))
6683 info.layout = mddev->layout;
6684 info.chunk_size = mddev->chunk_sectors << 9;
6692 static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6703 spin_lock(&mddev->lock);
6705 if (mddev->bitmap_info.file) {
6706 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6714 spin_unlock(&mddev->lock);
6724 static int get_disk_info(struct mddev *mddev, void __user * arg)
6733 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6764 int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
6770 if (mddev_is_clustered(mddev) &&
6772 pr_warn("%s: Cannot add to clustered mddev.\n",
6773 mdname(mddev));
6780 if (!mddev->raid_disks) {
6783 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6789 if (!list_empty(&mddev->disks)) {
6791 = list_entry(mddev->disks.next,
6793 err = super_types[mddev->major_version]
6794 .load_super(rdev, rdev0, mddev->minor_version);
6803 err = bind_rdev_to_array(rdev, mddev);
6814 if (mddev->pers) {
6816 if (!mddev->pers->hot_add_disk) {
6818 mdname(mddev));
6821 if (mddev->persistent)
6822 rdev = md_import_device(dev, mddev->major_version,
6823 mddev->minor_version);
6832 if (!mddev->persistent) {
6834 info->raid_disk < mddev->raid_disks) {
6842 super_types[mddev->major_version].
6843 validate_super(mddev, NULL/*freshest*/, rdev);
6868 rdev_for_each(rdev2, mddev) {
6874 if (has_journal || mddev->bitmap) {
6883 if (mddev_is_clustered(mddev)) {
6888 err = md_cluster_ops->add_new_disk(mddev, rdev);
6897 err = bind_rdev_to_array(rdev, mddev);
6902 if (mddev_is_clustered(mddev)) {
6905 err = md_cluster_ops->new_disk_ack(mddev,
6912 md_cluster_ops->add_new_disk_cancel(mddev);
6926 if (mddev->major_version != 0) {
6927 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6940 if (info->raid_disk < mddev->raid_disks)
6945 if (rdev->raid_disk < mddev->raid_disks)
6954 if (!mddev->persistent) {
6961 err = bind_rdev_to_array(rdev, mddev);
6971 static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6976 if (!mddev->pers)
6979 rdev = find_rdev(mddev, dev);
6987 remove_and_add_spares(mddev, rdev);
6993 if (mddev_is_clustered(mddev)) {
6994 if (md_cluster_ops->remove_disk(mddev, rdev))
6999 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7000 if (mddev->thread)
7001 md_wakeup_thread(mddev->thread);
7003 md_update_sb(mddev, 1);
7004 md_new_event(mddev);
7009 bdevname(rdev->bdev,b), mdname(mddev));
7013 static int hot_add_disk(struct mddev *mddev, dev_t dev)
7019 if (!mddev->pers)
7022 if (mddev->major_version != 0) {
7024 mdname(mddev));
7027 if (!mddev->pers->hot_add_disk) {
7029 mdname(mddev));
7040 if (mddev->persistent)
7049 bdevname(rdev->bdev,b), mdname(mddev));
7057 err = bind_rdev_to_array(rdev, mddev);
7068 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7069 if (!mddev->thread)
7070 md_update_sb(mddev, 1);
7075 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7076 md_wakeup_thread(mddev->thread);
7077 md_new_event(mddev);
7085 static int set_bitmap_file(struct mddev *mddev, int fd)
7089 if (mddev->pers) {
7090 if (!mddev->pers->quiesce || !mddev->thread)
7092 if (mddev->recovery || mddev->sync_thread)
7101 if (mddev->bitmap || mddev->bitmap_info.file)
7107 mdname(mddev));
7114 mdname(mddev));
7118 mdname(mddev));
7122 mdname(mddev));
7129 mddev->bitmap_info.file = f;
7130 mddev->bitmap_info.offset = 0; /* file overrides offset */
7131 } else if (mddev->bitmap == NULL)
7134 if (mddev->pers) {
7138 bitmap = md_bitmap_create(mddev, -1);
7139 mddev_suspend(mddev);
7141 mddev->bitmap = bitmap;
7142 err = md_bitmap_load(mddev);
7146 md_bitmap_destroy(mddev);
7149 mddev_resume(mddev);
7151 mddev_suspend(mddev);
7152 md_bitmap_destroy(mddev);
7153 mddev_resume(mddev);
7157 struct file *f = mddev->bitmap_info.file;
7159 spin_lock(&mddev->lock);
7160 mddev->bitmap_info.file = NULL;
7161 spin_unlock(&mddev->lock);
7182 int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info)
7194 mddev->major_version = info->major_version;
7195 mddev->minor_version = info->minor_version;
7196 mddev->patch_version = info->patch_version;
7197 mddev->persistent = !info->not_persistent;
7201 mddev->ctime = ktime_get_real_seconds();
7204 mddev->major_version = MD_MAJOR_VERSION;
7205 mddev->minor_version = MD_MINOR_VERSION;
7206 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7207 mddev->ctime = ktime_get_real_seconds();
7209 mddev->level = info->level;
7210 mddev->clevel[0] = 0;
7211 mddev->dev_sectors = 2 * (sector_t)info->size;
7212 mddev->raid_disks = info->raid_disks;
7217 mddev->recovery_cp = MaxSector;
7219 mddev->recovery_cp = 0;
7220 mddev->persistent = ! info->not_persistent;
7221 mddev->external = 0;
7223 mddev->layout = info->layout;
7224 if (mddev->level == 0)
7226 mddev->layout = -1;
7227 mddev->chunk_sectors = info->chunk_size >> 9;
7229 if (mddev->persistent) {
7230 mddev->max_disks = MD_SB_DISKS;
7231 mddev->flags = 0;
7232 mddev->sb_flags = 0;
7234 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7236 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7237 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7238 mddev->bitmap_info.offset = 0;
7240 mddev->reshape_position = MaxSector;
7245 get_random_bytes(mddev->uuid, 16);
7247 mddev->new_level = mddev->level;
7248 mddev->new_chunk_sectors = mddev->chunk_sectors;
7249 mddev->new_layout = mddev->layout;
7250 mddev->delta_disks = 0;
7251 mddev->reshape_backwards = 0;
7256 void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
7258 lockdep_assert_held(&mddev->reconfig_mutex);
7260 if (mddev->external_size)
7263 mddev->array_sectors = array_sectors;
7267 static int update_size(struct mddev *mddev, sector_t num_sectors)
7272 sector_t old_dev_sectors = mddev->dev_sectors;
7274 if (mddev->pers->resize == NULL)
7285 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7286 mddev->sync_thread)
7288 if (mddev->ro)
7291 rdev_for_each(rdev, mddev) {
7299 rv = mddev->pers->resize(mddev, num_sectors);
7301 if (mddev_is_clustered(mddev))
7302 md_cluster_ops->update_size(mddev, old_dev_sectors);
7303 else if (mddev->queue) {
7304 set_capacity(mddev->gendisk, mddev->array_sectors);
7305 revalidate_disk_size(mddev->gendisk, true);
7311 static int update_raid_disks(struct mddev *mddev, int raid_disks)
7316 if (mddev->pers->check_reshape == NULL)
7318 if (mddev->ro)
7321 (mddev->max_disks && raid_disks >= mddev->max_disks))
7323 if (mddev->sync_thread ||
7324 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7325 test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
7326 mddev->reshape_position != MaxSector)
7329 rdev_for_each(rdev, mddev) {
7330 if (mddev->raid_disks < raid_disks &&
7333 if (mddev->raid_disks > raid_disks &&
7338 mddev->delta_disks = raid_disks - mddev->raid_disks;
7339 if (mddev->delta_disks < 0)
7340 mddev->reshape_backwards = 1;
7341 else if (mddev->delta_disks > 0)
7342 mddev->reshape_backwards = 0;
7344 rv = mddev->pers->check_reshape(mddev);
7346 mddev->delta_disks = 0;
7347 mddev->reshape_backwards = 0;
7360 static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
7367 if (mddev->bitmap && mddev->bitmap_info.offset)
7370 if (mddev->major_version != info->major_version ||
7371 mddev->minor_version != info->minor_version ||
7372 /* mddev->patch_version != info->patch_version || */
7373 mddev->ctime != info->ctime ||
7374 mddev->level != info->level ||
7375 /* mddev->layout != info->layout || */
7376 mddev->persistent != !info->not_persistent ||
7377 mddev->chunk_sectors != info->chunk_size >> 9 ||
7383 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7385 if (mddev->raid_disks != info->raid_disks)
7387 if (mddev->layout != info->layout)
7396 if (mddev->layout != info->layout) {
7401 if (mddev->pers->check_reshape == NULL)
7404 mddev->new_layout = info->layout;
7405 rv = mddev->pers->check_reshape(mddev);
7407 mddev->new_layout = mddev->layout;
7411 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7412 rv = update_size(mddev, (sector_t)info->size * 2);
7414 if (mddev->raid_disks != info->raid_disks)
7415 rv = update_raid_disks(mddev, info->raid_disks);
7418 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7422 if (mddev->recovery || mddev->sync_thread) {
7429 if (mddev->bitmap) {
7433 if (mddev->bitmap_info.default_offset == 0) {
7437 mddev->bitmap_info.offset =
7438 mddev->bitmap_info.default_offset;
7439 mddev->bitmap_info.space =
7440 mddev->bitmap_info.default_space;
7441 bitmap = md_bitmap_create(mddev, -1);
7442 mddev_suspend(mddev);
7444 mddev->bitmap = bitmap;
7445 rv = md_bitmap_load(mddev);
7449 md_bitmap_destroy(mddev);
7450 mddev_resume(mddev);
7453 if (!mddev->bitmap) {
7457 if (mddev->bitmap->storage.file) {
7461 if (mddev->bitmap_info.nodes) {
7463 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7466 md_cluster_ops->unlock_all_bitmaps(mddev);
7470 mddev->bitmap_info.nodes = 0;
7471 md_cluster_ops->leave(mddev);
7473 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
7475 mddev_suspend(mddev);
7476 md_bitmap_destroy(mddev);
7477 mddev_resume(mddev);
7478 mddev->bitmap_info.offset = 0;
7481 md_update_sb(mddev, 1);
7487 static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7492 if (mddev->pers == NULL)
7496 rdev = md_find_rdev_rcu(mddev, dev);
7500 md_error(mddev, rdev);
7516 struct mddev *mddev = bdev->bd_disk->private_data;
7520 geo->cylinders = mddev->array_sectors / 8;
7554 struct mddev *mddev = NULL;
7586 mddev = bdev->bd_disk->private_data;
7588 if (!mddev) {
7596 if (!mddev->raid_disks && !mddev->external)
7599 err = get_array_info(mddev, argp);
7603 if (!mddev->raid_disks && !mddev->external)
7606 err = get_disk_info(mddev, argp);
7610 err = set_disk_faulty(mddev, new_decode_dev(arg));
7614 err = get_bitmap_file(mddev, argp);
7620 flush_rdev_wq(mddev);
7624 wait_event_interruptible_timeout(mddev->sb_wait,
7626 &mddev->recovery),
7632 mutex_lock(&mddev->open_mutex);
7633 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7634 mutex_unlock(&mddev->open_mutex);
7638 if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
7639 mutex_unlock(&mddev->open_mutex);
7644 mutex_unlock(&mddev->open_mutex);
7647 err = mddev_lock(mddev);
7662 if (mddev->pers) {
7663 err = update_array_info(mddev, &info);
7670 if (!list_empty(&mddev->disks)) {
7671 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7675 if (mddev->raid_disks) {
7676 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7680 err = md_set_array_info(mddev, &info);
7693 if ((!mddev->raid_disks && !mddev->external)
7706 err = restart_array(mddev);
7710 err = do_md_stop(mddev, 0, bdev);
7714 err = md_set_readonly(mddev, bdev);
7718 err = hot_remove_disk(mddev, new_decode_dev(arg));
7724 * So require mddev->pers and MD_DISK_SYNC.
7726 if (mddev->pers) {
7734 err = md_add_new_disk(mddev, &info);
7746 /* if the bdev is going readonly the value of mddev->ro
7753 if (mddev->ro != 1)
7759 if (mddev->pers) {
7760 err = restart_array(mddev);
7762 mddev->ro = 2;
7763 set_disk_ro(mddev->gendisk, 0);
7773 if (mddev->ro && mddev->pers) {
7774 if (mddev->ro == 2) {
7775 mddev->ro = 0;
7776 sysfs_notify_dirent_safe(mddev->sysfs_state);
7777 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7782 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7783 mddev_unlock(mddev);
7784 wait_event(mddev->sb_wait,
7785 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7786 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7787 mddev_lock_nointr(mddev);
7802 err = md_add_new_disk(mddev, &info);
7807 if (mddev_is_clustered(mddev))
7808 md_cluster_ops->new_disk_ack(mddev, false);
7814 err = hot_add_disk(mddev, new_decode_dev(arg));
7818 err = do_md_run(mddev);
7822 err = set_bitmap_file(mddev, (int)arg);
7831 if (mddev->hold_active == UNTIL_IOCTL &&
7833 mddev->hold_active = 0;
7834 mddev_unlock(mddev);
7837 clear_bit(MD_CLOSING, &mddev->flags);
7863 * Succeed if we can lock the mddev, which confirms that
7866 struct mddev *mddev = mddev_find(bdev->bd_dev);
7869 if (!mddev)
7872 if (mddev->gendisk != bdev->bd_disk) {
7876 mddev_put(mddev);
7878 if (work_pending(&mddev->del_work))
7882 BUG_ON(mddev != bdev->bd_disk->private_data);
7884 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7887 if (test_bit(MD_CLOSING, &mddev->flags)) {
7888 mutex_unlock(&mddev->open_mutex);
7894 atomic_inc(&mddev->openers);
7895 mutex_unlock(&mddev->open_mutex);
7900 mddev_put(mddev);
7906 struct mddev *mddev = disk->private_data;
7908 BUG_ON(!mddev);
7909 atomic_dec(&mddev->openers);
7910 mddev_put(mddev);
7915 struct mddev *mddev = disk->private_data;
7918 if (mddev->changed)
7920 mddev->changed = 0;
7992 struct mddev *mddev, const char *name)
8003 thread->mddev = mddev;
8007 mdname(thread->mddev),
8040 void md_error(struct mddev *mddev, struct md_rdev *rdev)
8045 if (!mddev->pers || !mddev->pers->error_handler)
8047 mddev->pers->error_handler(mddev,rdev);
8048 if (mddev->degraded)
8049 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8051 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8052 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8053 md_wakeup_thread(mddev->thread);
8054 if (mddev->event_work.func)
8055 queue_work(md_misc_wq, &mddev->event_work);
8056 md_new_event(mddev);
8081 static int status_resync(struct seq_file *seq, struct mddev *mddev)
8089 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8090 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8091 max_sectors = mddev->resync_max_sectors;
8093 max_sectors = mddev->dev_sectors;
8095 resync = mddev->curr_resync;
8097 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
8103 resync -= atomic_read(&mddev->recovery_active);
8106 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
8109 rdev_for_each(rdev, mddev)
8117 if (mddev->reshape_position != MaxSector)
8123 if (mddev->recovery_cp < MaxSector) {
8160 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8162 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8164 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8187 dt = ((jiffies - mddev->resync_mark) / HZ);
8190 curr_mark_cnt = mddev->curr_mark_cnt;
8191 recovery_active = atomic_read(&mddev->recovery_active);
8192 resync_mark_cnt = mddev->resync_mark_cnt;
8213 struct mddev *mddev;
8228 mddev = list_entry(tmp, struct mddev, all_mddevs);
8229 mddev_get(mddev);
8231 return mddev;
8242 struct mddev *next_mddev, *mddev = v;
8252 tmp = mddev->all_mddevs.next;
8254 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
8262 mddev_put(mddev);
8269 struct mddev *mddev = v;
8271 if (mddev && v != (void*)1 && v != (void*)2)
8272 mddev_put(mddev);
8277 struct mddev *mddev = v;
8298 spin_lock(&mddev->lock);
8299 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8300 seq_printf(seq, "%s : %sactive", mdname(mddev),
8301 mddev->pers ? "" : "in");
8302 if (mddev->pers) {
8303 if (mddev->ro==1)
8305 if (mddev->ro==2)
8307 seq_printf(seq, " %s", mddev->pers->name);
8312 rdev_for_each_rcu(rdev, mddev) {
8332 if (!list_empty(&mddev->disks)) {
8333 if (mddev->pers)
8336 mddev->array_sectors / 2);
8341 if (mddev->persistent) {
8342 if (mddev->major_version != 0 ||
8343 mddev->minor_version != 90) {
8345 mddev->major_version,
8346 mddev->minor_version);
8348 } else if (mddev->external)
8350 mddev->metadata_type);
8354 if (mddev->pers) {
8355 mddev->pers->status(seq, mddev);
8357 if (mddev->pers->sync_request) {
8358 if (status_resync(seq, mddev))
8364 md_bitmap_status(seq, mddev->bitmap);
8368 spin_unlock(&mddev->lock);
8466 int md_setup_cluster(struct mddev *mddev, int nodes)
8480 ret = md_cluster_ops->join(mddev, nodes);
8482 mddev->safemode_delay = 0;
8486 void md_cluster_stop(struct mddev *mddev)
8490 md_cluster_ops->leave(mddev);
8494 static int is_mddev_idle(struct mddev *mddev, int init)
8502 rdev_for_each_rcu(rdev, mddev) {
8537 void md_done_sync(struct mddev *mddev, int blocks, int ok)
8540 atomic_sub(blocks, &mddev->recovery_active);
8541 wake_up(&mddev->recovery_wait);
8543 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8544 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8545 md_wakeup_thread(mddev->thread);
8551 /* md_write_start(mddev, bi)
8558 bool md_write_start(struct mddev *mddev, struct bio *bi)
8565 BUG_ON(mddev->ro == 1);
8566 if (mddev->ro == 2) {
8568 mddev->ro = 0;
8569 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8570 md_wakeup_thread(mddev->thread);
8571 md_wakeup_thread(mddev->sync_thread);
8575 percpu_ref_get(&mddev->writes_pending);
8577 if (mddev->safemode == 1)
8578 mddev->safemode = 0;
8580 if (mddev->in_sync || mddev->sync_checkers) {
8581 spin_lock(&mddev->lock);
8582 if (mddev->in_sync) {
8583 mddev->in_sync = 0;
8584 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8585 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8586 md_wakeup_thread(mddev->thread);
8589 spin_unlock(&mddev->lock);
8593 sysfs_notify_dirent_safe(mddev->sysfs_state);
8594 if (!mddev->has_superblocks)
8596 wait_event(mddev->sb_wait,
8597 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8598 mddev->suspended);
8599 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8600 percpu_ref_put(&mddev->writes_pending);
8615 void md_write_inc(struct mddev *mddev, struct bio *bi)
8619 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8620 percpu_ref_get(&mddev->writes_pending);
8624 void md_write_end(struct mddev *mddev)
8626 percpu_ref_put(&mddev->writes_pending);
8628 if (mddev->safemode == 2)
8629 md_wakeup_thread(mddev->thread);
8630 else if (mddev->safemode_delay)
8634 mod_timer(&mddev->safemode_timer,
8635 roundup(jiffies, mddev->safemode_delay) +
8636 mddev->safemode_delay);
8641 /* md_allow_write(mddev)
8644 * attempting a GFP_KERNEL allocation while holding the mddev lock.
8647 void md_allow_write(struct mddev *mddev)
8649 if (!mddev->pers)
8651 if (mddev->ro)
8653 if (!mddev->pers->sync_request)
8656 spin_lock(&mddev->lock);
8657 if (mddev->in_sync) {
8658 mddev->in_sync = 0;
8659 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8660 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8661 if (mddev->safemode_delay &&
8662 mddev->safemode == 0)
8663 mddev->safemode = 1;
8664 spin_unlock(&mddev->lock);
8665 md_update_sb(mddev, 0);
8666 sysfs_notify_dirent_safe(mddev->sysfs_state);
8668 wait_event(mddev->sb_wait,
8669 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8671 spin_unlock(&mddev->lock);
8680 struct mddev *mddev = thread->mddev;
8681 struct mddev *mddev2;
8697 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8698 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8700 if (mddev->ro) {/* never try to sync a read-only array */
8701 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8705 if (mddev_is_clustered(mddev)) {
8706 ret = md_cluster_ops->resync_start(mddev);
8710 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8711 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8712 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8713 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8714 && ((unsigned long long)mddev->curr_resync_completed
8715 < (unsigned long long)mddev->resync_max_sectors))
8719 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8720 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8723 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8728 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8733 mddev->last_sync_action = action ?: desc;
8746 * to 1 if we choose to yield (based arbitrarily on address of mddev structure).
8753 mddev->curr_resync = 2;
8756 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8759 if (mddev2 == mddev)
8761 if (!mddev->parallel_resync
8763 && match_mddev_units(mddev, mddev2)) {
8765 if (mddev < mddev2 && mddev->curr_resync == 2) {
8767 mddev->curr_resync = 1;
8770 if (mddev > mddev2 && mddev->curr_resync == 1)
8780 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8781 mddev2->curr_resync >= mddev->curr_resync) {
8785 desc, mdname(mddev),
8798 } while (mddev->curr_resync < 2);
8801 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8805 max_sectors = mddev->resync_max_sectors;
8806 atomic64_set(&mddev->resync_mismatches, 0);
8808 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8809 j = mddev->resync_min;
8810 else if (!mddev->bitmap)
8811 j = mddev->recovery_cp;
8813 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8814 max_sectors = mddev->resync_max_sectors;
8820 if (mddev_is_clustered(mddev) &&
8821 mddev->reshape_position != MaxSector)
8822 j = mddev->reshape_position;
8825 max_sectors = mddev->dev_sectors;
8828 rdev_for_each_rcu(rdev, mddev)
8845 if (mddev->bitmap) {
8846 mddev->pers->quiesce(mddev, 1);
8847 mddev->pers->quiesce(mddev, 0);
8851 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8852 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8854 speed_max(mddev), desc);
8856 is_mddev_idle(mddev, 1); /* this initializes IO event counters */
8864 mddev->resync_mark = mark[last_mark];
8865 mddev->resync_mark_cnt = mark_cnt[last_mark];
8874 atomic_set(&mddev->recovery_active, 0);
8879 desc, mdname(mddev));
8880 mddev->curr_resync = j;
8882 mddev->curr_resync = 3; /* no longer delayed */
8883 mddev->curr_resync_completed = j;
8884 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8885 md_new_event(mddev);
8894 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8895 ((mddev->curr_resync > mddev->curr_resync_completed &&
8896 (mddev->curr_resync - mddev->curr_resync_completed)
8899 (j - mddev->curr_resync_completed)*2
8900 >= mddev->resync_max - mddev->curr_resync_completed ||
8901 mddev->curr_resync_completed > mddev->resync_max
8904 wait_event(mddev->recovery_wait,
8905 atomic_read(&mddev->recovery_active) == 0);
8906 mddev->curr_resync_completed = j;
8907 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8908 j > mddev->recovery_cp)
8909 mddev->recovery_cp = j;
8911 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8912 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8915 while (j >= mddev->resync_max &&
8916 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8922 wait_event_interruptible(mddev->recovery_wait,
8923 mddev->resync_max > j
8925 &mddev->recovery));
8928 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8931 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8933 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8939 atomic_add(sectors, &mddev->recovery_active);
8942 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8950 mddev->curr_resync = j;
8951 mddev->curr_mark_cnt = io_sectors;
8956 md_new_event(mddev);
8967 mddev->resync_mark = mark[next];
8968 mddev->resync_mark_cnt = mark_cnt[next];
8970 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8974 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8987 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8988 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8989 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8991 if (currspeed > speed_min(mddev)) {
8992 if (currspeed > speed_max(mddev)) {
8996 if (!is_mddev_idle(mddev, 0)) {
9001 wait_event(mddev->recovery_wait,
9002 !atomic_read(&mddev->recovery_active));
9006 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
9007 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
9013 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
9015 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9016 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9017 mddev->curr_resync > 3) {
9018 mddev->curr_resync_completed = mddev->curr_resync;
9019 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9021 mddev->pers->sync_request(mddev, max_sectors, &skipped);
9023 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
9024 mddev->curr_resync > 3) {
9025 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
9026 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9027 if (mddev->curr_resync >= mddev->recovery_cp) {
9029 desc, mdname(mddev));
9031 &mddev->recovery))
9032 mddev->recovery_cp =
9033 mddev->curr_resync_completed;
9035 mddev->recovery_cp =
9036 mddev->curr_resync;
9039 mddev->recovery_cp = MaxSector;
9041 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9042 mddev->curr_resync = MaxSector;
9043 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9044 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
9046 rdev_for_each_rcu(rdev, mddev)
9048 mddev->delta_disks >= 0 &&
9052 rdev->recovery_offset < mddev->curr_resync)
9053 rdev->recovery_offset = mddev->curr_resync;
9062 set_mask_bits(&mddev->sb_flags, 0,
9065 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9066 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9067 mddev->delta_disks > 0 &&
9068 mddev->pers->finish_reshape &&
9069 mddev->pers->size &&
9070 mddev->queue) {
9071 mddev_lock_nointr(mddev);
9072 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
9073 mddev_unlock(mddev);
9074 if (!mddev_is_clustered(mddev)) {
9075 set_capacity(mddev->gendisk, mddev->array_sectors);
9076 revalidate_disk_size(mddev->gendisk, true);
9080 spin_lock(&mddev->lock);
9081 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9083 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9084 mddev->resync_min = 0;
9085 mddev->resync_max = MaxSector;
9086 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9087 mddev->resync_min = mddev->curr_resync_completed;
9088 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
9089 mddev->curr_resync = 0;
9090 spin_unlock(&mddev->lock);
9093 md_wakeup_thread(mddev->thread);
9098 static int remove_and_add_spares(struct mddev *mddev,
9106 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9110 rdev_for_each(rdev, mddev) {
9128 rdev_for_each(rdev, mddev) {
9136 if (mddev->pers->hot_remove_disk(
9137 mddev, rdev) == 0) {
9138 sysfs_unlink_rdev(mddev, rdev);
9148 if (removed && mddev->kobj.sd)
9149 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9154 rdev_for_each(rdev, mddev) {
9169 if (mddev->ro &&
9176 if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
9178 sysfs_link_rdev(mddev, rdev);
9181 md_new_event(mddev);
9182 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9187 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9193 struct mddev *mddev = container_of(ws, struct mddev, del_work);
9195 mddev->sync_thread = md_register_thread(md_do_sync,
9196 mddev,
9198 if (!mddev->sync_thread) {
9200 mdname(mddev));
9202 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9203 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9204 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9205 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9206 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9209 &mddev->recovery))
9210 if (mddev->sysfs_action)
9211 sysfs_notify_dirent_safe(mddev->sysfs_action);
9213 md_wakeup_thread(mddev->sync_thread);
9214 sysfs_notify_dirent_safe(mddev->sysfs_action);
9215 md_new_event(mddev);
9240 void md_check_recovery(struct mddev *mddev)
9242 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9246 set_bit(MD_UPDATING_SB, &mddev->flags);
9248 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9249 md_update_sb(mddev, 0);
9250 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9251 wake_up(&mddev->sb_wait);
9254 if (mddev->suspended)
9257 if (mddev->bitmap)
9258 md_bitmap_daemon_work(mddev);
9261 if (mddev->pers->sync_request && !mddev->external) {
9263 mdname(mddev));
9264 mddev->safemode = 2;
9269 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9272 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9273 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9274 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9275 (mddev->external == 0 && mddev->safemode == 1) ||
9276 (mddev->safemode == 2
9277 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9281 if (mddev_trylock(mddev)) {
9283 bool try_set_sync = mddev->safemode != 0;
9285 if (!mddev->external && mddev->safemode == 1)
9286 mddev->safemode = 0;
9288 if (mddev->ro) {
9290 if (!mddev->external && mddev->in_sync)
9296 rdev_for_each(rdev, mddev)
9305 remove_and_add_spares(mddev, NULL);
9309 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9310 md_reap_sync_thread(mddev);
9311 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9312 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9313 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9317 if (mddev_is_clustered(mddev)) {
9322 rdev_for_each_safe(rdev, tmp, mddev) {
9329 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9330 spin_lock(&mddev->lock);
9331 set_in_sync(mddev);
9332 spin_unlock(&mddev->lock);
9335 if (mddev->sb_flags)
9336 md_update_sb(mddev, 0);
9338 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9339 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9341 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9344 if (mddev->sync_thread) {
9345 md_reap_sync_thread(mddev);
9351 mddev->curr_resync_completed = 0;
9352 spin_lock(&mddev->lock);
9353 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9354 spin_unlock(&mddev->lock);
9358 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9359 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9361 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9362 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9371 if (mddev->reshape_position != MaxSector) {
9372 if (mddev->pers->check_reshape == NULL ||
9373 mddev->pers->check_reshape(mddev) != 0)
9376 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9377 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9378 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
9379 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9380 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9381 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9382 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9383 } else if (mddev->recovery_cp < MaxSector) {
9384 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9385 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9386 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9390 if (mddev->pers->sync_request) {
9396 md_bitmap_write_all(mddev->bitmap);
9398 INIT_WORK(&mddev->del_work, md_start_sync);
9399 queue_work(md_misc_wq, &mddev->del_work);
9403 if (!mddev->sync_thread) {
9404 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9407 &mddev->recovery))
9408 if (mddev->sysfs_action)
9409 sysfs_notify_dirent_safe(mddev->sysfs_action);
9412 wake_up(&mddev->sb_wait);
9413 mddev_unlock(mddev);
9418 void md_reap_sync_thread(struct mddev *mddev)
9421 sector_t old_dev_sectors = mddev->dev_sectors;
9425 md_unregister_thread(&mddev->sync_thread);
9426 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9427 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9428 mddev->degraded != mddev->raid_disks) {
9431 if (mddev->pers->spare_active(mddev)) {
9432 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9433 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9436 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9437 mddev->pers->finish_reshape) {
9438 mddev->pers->finish_reshape(mddev);
9439 if (mddev_is_clustered(mddev))
9446 if (!mddev->degraded)
9447 rdev_for_each(rdev, mddev)
9450 md_update_sb(mddev, 1);
9454 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9455 md_cluster_ops->resync_finish(mddev);
9456 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9457 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9458 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9459 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9460 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9461 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9467 if (mddev_is_clustered(mddev) && is_reshaped
9468 && !test_bit(MD_CLOSING, &mddev->flags))
9469 md_cluster_ops->update_size(mddev, old_dev_sectors);
9472 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9473 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9474 sysfs_notify_dirent_safe(mddev->sysfs_action);
9475 md_new_event(mddev);
9476 if (mddev->event_work.func)
9477 queue_work(md_misc_wq, &mddev->event_work);
9481 void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9488 rdev_dec_pending(rdev, mddev);
9492 void md_finish_reshape(struct mddev *mddev)
9497 rdev_for_each(rdev, mddev) {
9513 struct mddev *mddev = rdev->mddev;
9525 set_mask_bits(&mddev->sb_flags, 0,
9527 md_wakeup_thread(rdev->mddev->thread);
9553 struct mddev *mddev;
9556 for_each_mddev(mddev, tmp) {
9557 if (mddev_trylock(mddev)) {
9558 if (mddev->pers)
9559 __md_stop_writes(mddev);
9560 if (mddev->persistent)
9561 mddev->safemode = 2;
9562 mddev_unlock(mddev);
9637 static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9648 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9649 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9653 md_bitmap_update_sb(mddev->bitmap);
9657 rdev_for_each_safe(rdev2, tmp, mddev) {
9682 ret = remove_and_add_spares(mddev, rdev2);
9685 /* wakeup mddev->thread here, so array could
9687 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9688 md_wakeup_thread(mddev->thread);
9696 md_error(mddev, rdev2);
9702 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) {
9703 ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9709 * Since mddev->delta_disks has already updated in update_raid_disks,
9712 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9718 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9719 if (mddev->pers->update_reshape_pos)
9720 mddev->pers->update_reshape_pos(mddev);
9721 if (mddev->pers->start_reshape)
9722 mddev->pers->start_reshape(mddev);
9723 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9724 mddev->reshape_position != MaxSector &&
9727 mddev->reshape_position = MaxSector;
9728 if (mddev->pers->update_reshape_pos)
9729 mddev->pers->update_reshape_pos(mddev);
9733 mddev->events = le64_to_cpu(sb->events);
9736 static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9750 err = super_types[mddev->major_version].
9751 load_super(rdev, NULL, mddev->minor_version);
9772 * device In_sync and mddev->degraded
9776 mddev->pers->spare_active(mddev))
9777 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9783 void md_reload_sb(struct mddev *mddev, int nr)
9789 rdev_for_each_rcu(iter, mddev) {
9801 err = read_rdev(mddev, rdev);
9805 check_sb_changes(mddev, rdev);
9808 rdev_for_each_rcu(rdev, mddev) {
9810 read_rdev(mddev, rdev);
9886 struct mddev *mddev;
9910 for_each_mddev(mddev, tmp) {
9911 export_array(mddev);
9912 mddev->ctime = 0;
9913 mddev->hold_active = 0;
9916 * iteration. As the mddev is now fully clear, this will
9917 * schedule the mddev for destruction by a workqueue, and the