Lines Matching refs:sh
26 * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
119 static inline int raid6_d0(struct stripe_head *sh)
121 if (sh->ddf_layout)
125 if (sh->qd_idx == sh->disks - 1)
128 return sh->qd_idx + 1;
141 static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
146 if (sh->ddf_layout)
148 if (idx == sh->pd_idx)
150 if (idx == sh->qd_idx)
152 if (!sh->ddf_layout)
159 static int stripe_operations_active(struct stripe_head *sh)
161 return sh->check_state || sh->reconstruct_state ||
162 test_bit(STRIPE_BIOFILL_RUN, &sh->state) ||
163 test_bit(STRIPE_COMPUTE_RUN, &sh->state);
166 static bool stripe_is_lowprio(struct stripe_head *sh)
168 return (test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) ||
169 test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) &&
170 !test_bit(STRIPE_R5C_CACHING, &sh->state);
173 static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
174 __must_hold(&sh->raid_conf->device_lock)
176 struct r5conf *conf = sh->raid_conf;
179 int i, cpu = sh->cpu;
183 sh->cpu = cpu;
186 if (list_empty(&sh->lru)) {
189 if (stripe_is_lowprio(sh))
190 list_add_tail(&sh->lru, &group->loprio_list);
192 list_add_tail(&sh->lru, &group->handle_list);
194 sh->group = group;
202 group = conf->worker_groups + cpu_to_group(sh->cpu);
206 queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work);
213 queue_work_on(sh->cpu, raid5_wq,
220 static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
227 BUG_ON(!list_empty(&sh->lru));
231 for (i = sh->disks; i--; )
232 if (test_bit(R5_InJournal, &sh->dev[i].flags))
241 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) ||
243 !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) {
244 if (test_bit(STRIPE_R5C_CACHING, &sh->state))
245 r5c_make_stripe_write_out(sh);
246 set_bit(STRIPE_HANDLE, &sh->state);
249 if (test_bit(STRIPE_HANDLE, &sh->state)) {
250 if (test_bit(STRIPE_DELAYED, &sh->state) &&
251 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
252 list_add_tail(&sh->lru, &conf->delayed_list);
253 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
254 sh->bm_seq - conf->seq_write > 0)
255 list_add_tail(&sh->lru, &conf->bitmap_list);
257 clear_bit(STRIPE_DELAYED, &sh->state);
258 clear_bit(STRIPE_BIT_DELAY, &sh->state);
260 if (stripe_is_lowprio(sh))
261 list_add_tail(&sh->lru,
264 list_add_tail(&sh->lru,
267 raid5_wakeup_stripe_thread(sh);
273 BUG_ON(stripe_operations_active(sh));
274 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
279 if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
281 list_add_tail(&sh->lru, temp_inactive_list);
283 WARN_ON(test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags));
285 list_add_tail(&sh->lru, temp_inactive_list);
288 if (!test_and_set_bit(STRIPE_R5C_FULL_STRIPE, &sh->state))
290 if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state))
292 list_add_tail(&sh->lru, &conf->r5c_full_stripe_list);
300 list_add_tail(&sh->lru, &conf->r5c_partial_stripe_list);
306 static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
310 if (atomic_dec_and_test(&sh->count))
311 do_release_stripe(conf, sh, temp_inactive_list);
367 struct stripe_head *sh, *t;
373 llist_for_each_entry_safe(sh, t, head, release_list) {
376 /* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */
378 clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state);
384 hash = sh->hash_lock_index;
385 __release_stripe(conf, sh, &temp_inactive_list[hash]);
392 void raid5_release_stripe(struct stripe_head *sh)
394 struct r5conf *conf = sh->raid_conf;
402 if (atomic_add_unless(&sh->count, -1, 1))
406 test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
408 wakeup = llist_add(&sh->release_list, &conf->released_stripes);
414 if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) {
416 hash = sh->hash_lock_index;
417 do_release_stripe(conf, sh, &list);
423 static inline void remove_hash(struct stripe_head *sh)
426 (unsigned long long)sh->sector);
428 hlist_del_init(&sh->hash);
431 static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh)
433 struct hlist_head *hp = stripe_hash(conf, sh->sector);
436 (unsigned long long)sh->sector);
438 hlist_add_head(&sh->hash, hp);
444 struct stripe_head *sh = NULL;
450 sh = list_entry(first, struct stripe_head, lru);
452 remove_hash(sh);
454 BUG_ON(hash != sh->hash_lock_index);
458 return sh;
462 static void free_stripe_pages(struct stripe_head *sh)
468 if (!sh->pages)
471 for (i = 0; i < sh->nr_pages; i++) {
472 p = sh->pages[i];
475 sh->pages[i] = NULL;
479 static int alloc_stripe_pages(struct stripe_head *sh, gfp_t gfp)
484 for (i = 0; i < sh->nr_pages; i++) {
486 if (sh->pages[i])
491 free_stripe_pages(sh);
494 sh->pages[i] = p;
500 init_stripe_shared_pages(struct stripe_head *sh, struct r5conf *conf, int disks)
504 if (sh->pages)
507 /* Each of the sh->dev[i] need one conf->stripe_size */
511 sh->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
512 if (!sh->pages)
514 sh->nr_pages = nr_pages;
515 sh->stripes_per_page = cnt;
520 static void shrink_buffers(struct stripe_head *sh)
523 int num = sh->raid_conf->pool_size;
529 WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
530 p = sh->dev[i].page;
533 sh->dev[i].page = NULL;
538 sh->dev[i].page = NULL;
539 free_stripe_pages(sh); /* Free pages */
543 static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
546 int num = sh->raid_conf->pool_size;
555 sh->dev[i].page = page;
556 sh->dev[i].orig_page = page;
557 sh->dev[i].offset = 0;
560 if (alloc_stripe_pages(sh, gfp))
564 sh->dev[i].page = raid5_get_dev_page(sh, i);
565 sh->dev[i].orig_page = sh->dev[i].page;
566 sh->dev[i].offset = raid5_get_page_offset(sh, i);
573 struct stripe_head *sh);
575 static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
577 struct r5conf *conf = sh->raid_conf;
580 BUG_ON(atomic_read(&sh->count) != 0);
581 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
582 BUG_ON(stripe_operations_active(sh));
583 BUG_ON(sh->batch_head);
589 sh->generation = conf->generation - previous;
590 sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
591 sh->sector = sector;
592 stripe_set_idx(sector, conf, previous, sh);
593 sh->state = 0;
595 for (i = sh->disks; i--; ) {
596 struct r5dev *dev = &sh->dev[i];
601 (unsigned long long)sh->sector, i, dev->toread,
607 dev->sector = raid5_compute_blocknr(sh, i, previous);
611 sh->overwrite_disks = 0;
612 insert_hash(conf, sh);
613 sh->cpu = smp_processor_id();
614 set_bit(STRIPE_BATCH_READY, &sh->state);
620 struct stripe_head *sh;
623 hlist_for_each_entry(sh, stripe_hash(conf, sector), hash)
624 if (sh->sector == sector && sh->generation == generation)
625 return sh;
634 struct stripe_head *sh;
636 sh = __find_stripe(conf, sector, generation);
637 if (!sh)
640 if (atomic_inc_not_zero(&sh->count))
641 return sh;
645 * be on a list (sh->lru). Must remove the stripe from the list that
650 if (!atomic_read(&sh->count)) {
651 if (!test_bit(STRIPE_HANDLE, &sh->state))
653 BUG_ON(list_empty(&sh->lru) &&
654 !test_bit(STRIPE_EXPANDING, &sh->state));
658 list_del_init(&sh->lru);
662 if (sh->group) {
663 sh->group->stripes_cnt--;
664 sh->group = NULL;
667 atomic_inc(&sh->count);
670 return sh;
807 struct stripe_head *sh;
834 sh = find_get_stripe(conf, sector, conf->generation - previous,
836 if (sh)
840 sh = get_free_stripe(conf, hash);
841 if (sh) {
843 init_stripe(sh, sector, previous);
844 atomic_inc(&sh->count);
871 return sh;
874 static bool is_full_stripe_write(struct stripe_head *sh)
876 BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded));
877 return sh->overwrite_disks == (sh->disks - sh->raid_conf->max_degraded);
902 static bool stripe_can_batch(struct stripe_head *sh)
904 struct r5conf *conf = sh->raid_conf;
908 return test_bit(STRIPE_BATCH_READY, &sh->state) &&
909 !test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
910 is_full_stripe_write(sh);
915 struct stripe_head *sh, struct stripe_head *last_sh)
923 tmp_sec = sh->sector;
926 head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf);
943 lock_two_stripes(head, sh);
945 if (!stripe_can_batch(head) || !stripe_can_batch(sh))
948 if (sh->batch_head)
952 while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
954 if (head->dev[dd_idx].towrite->bi_opf != sh->dev[dd_idx].towrite->bi_opf ||
955 bio_op(head->dev[dd_idx].towrite) != bio_op(sh->dev[dd_idx].towrite))
972 sh->batch_head = head->batch_head;
978 list_add(&sh->batch_list, &head->batch_list);
982 sh->batch_head = head->batch_head;
984 list_add_tail(&sh->batch_list, &head->batch_list);
988 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
993 if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) {
994 int seq = sh->bm_seq;
995 if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) &&
996 sh->batch_head->bm_seq > seq)
997 seq = sh->batch_head->bm_seq;
998 set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state);
999 sh->batch_head->bm_seq = seq;
1002 atomic_inc(&sh->count);
1004 unlock_two_stripes(head, sh);
1012 static int use_new_offset(struct r5conf *conf, struct stripe_head *sh)
1022 if (sh->generation == conf->generation - 1)
1139 static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
1141 struct r5conf *conf = sh->raid_conf;
1142 int i, disks = sh->disks;
1143 struct stripe_head *head_sh = sh;
1150 if (log_stripe(sh, s) == 0)
1162 sh = head_sh;
1163 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
1165 if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
1167 if (test_bit(R5_Discard, &sh->dev[i].flags))
1169 } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
1172 &sh->dev[i].flags)) {
1177 if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags))
1181 dev = &sh->dev[i];
1223 int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
1257 set_bit(STRIPE_IO_STARTED, &sh->state);
1263 bi->bi_private = sh;
1266 __func__, (unsigned long long)sh->sector,
1268 atomic_inc(&sh->count);
1269 if (sh != head_sh)
1271 if (use_new_offset(conf, sh))
1272 bi->bi_iter.bi_sector = (sh->sector
1275 bi->bi_iter.bi_sector = (sh->sector
1280 if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
1281 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
1284 test_bit(R5_InJournal, &sh->dev[i].flags))
1290 sh->dev[i].vec.bv_page = sh->dev[i].orig_page;
1292 sh->dev[i].vec.bv_page = sh->dev[i].page;
1295 bi->bi_io_vec[0].bv_offset = sh->dev[i].offset;
1304 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
1309 sh->dev[i].sector);
1320 set_bit(STRIPE_IO_STARTED, &sh->state);
1325 rbi->bi_private = sh;
1329 __func__, (unsigned long long)sh->sector,
1331 atomic_inc(&sh->count);
1332 if (sh != head_sh)
1334 if (use_new_offset(conf, sh))
1335 rbi->bi_iter.bi_sector = (sh->sector
1338 rbi->bi_iter.bi_sector = (sh->sector
1340 if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
1341 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
1342 sh->dev[i].rvec.bv_page = sh->dev[i].page;
1345 rbi->bi_io_vec[0].bv_offset = sh->dev[i].offset;
1356 sh->dev[i].sector);
1364 set_bit(STRIPE_DEGRADED, &sh->state);
1366 bi->bi_opf, i, (unsigned long long)sh->sector);
1367 clear_bit(R5_LOCKED, &sh->dev[i].flags);
1368 set_bit(STRIPE_HANDLE, &sh->state);
1373 sh = list_first_entry(&sh->batch_list, struct stripe_head,
1375 if (sh != head_sh)
1386 struct stripe_head *sh, int no_skipcopy)
1394 struct r5conf *conf = sh->raid_conf;
1450 struct stripe_head *sh = stripe_head_ref;
1452 struct r5conf *conf = sh->raid_conf;
1455 (unsigned long long)sh->sector);
1458 for (i = sh->disks; i--; ) {
1459 struct r5dev *dev = &sh->dev[i];
1480 clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
1482 set_bit(STRIPE_HANDLE, &sh->state);
1483 raid5_release_stripe(sh);
1486 static void ops_run_biofill(struct stripe_head *sh)
1491 struct r5conf *conf = sh->raid_conf;
1493 BUG_ON(sh->batch_head);
1495 (unsigned long long)sh->sector);
1497 for (i = sh->disks; i--; ) {
1498 struct r5dev *dev = &sh->dev[i];
1501 spin_lock_irq(&sh->stripe_lock);
1504 spin_unlock_irq(&sh->stripe_lock);
1509 dev->sector, tx, sh, 0);
1515 atomic_inc(&sh->count);
1516 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
1520 static void mark_target_uptodate(struct stripe_head *sh, int target)
1527 tgt = &sh->dev[target];
1535 struct stripe_head *sh = stripe_head_ref;
1538 (unsigned long long)sh->sector);
1541 mark_target_uptodate(sh, sh->ops.target);
1542 mark_target_uptodate(sh, sh->ops.target2);
1544 clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
1545 if (sh->check_state == check_state_compute_run)
1546 sh->check_state = check_state_compute_result;
1547 set_bit(STRIPE_HANDLE, &sh->state);
1548 raid5_release_stripe(sh);
1558 static addr_conv_t *to_addr_conv(struct stripe_head *sh,
1561 return (void *) (to_addr_page(percpu, i) + sh->disks + 2);
1568 to_addr_offs(struct stripe_head *sh, struct raid5_percpu *percpu)
1570 return (unsigned int *) (to_addr_conv(sh, percpu, 0) + sh->disks + 2);
1574 ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
1576 int disks = sh->disks;
1578 unsigned int *off_srcs = to_addr_offs(sh, percpu);
1579 int target = sh->ops.target;
1580 struct r5dev *tgt = &sh->dev[target];
1588 BUG_ON(sh->batch_head);
1591 __func__, (unsigned long long)sh->sector, target);
1596 off_srcs[count] = sh->dev[i].offset;
1597 xor_srcs[count++] = sh->dev[i].page;
1601 atomic_inc(&sh->count);
1604 ops_complete_compute, sh, to_addr_conv(sh, percpu, 0));
1607 RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
1610 RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
1616 * @srcs - (struct page *) array of size sh->disks
1618 * @sh - stripe_head to parse
1627 struct stripe_head *sh,
1630 int disks = sh->disks;
1631 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
1632 int d0_idx = raid6_d0(sh);
1642 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1643 struct r5dev *dev = &sh->dev[i];
1645 if (i == sh->qd_idx || i == sh->pd_idx ||
1654 srcs[slot] = sh->dev[i].orig_page;
1656 srcs[slot] = sh->dev[i].page;
1662 offs[slot] = sh->dev[i].offset;
1671 ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
1673 int disks = sh->disks;
1675 unsigned int *offs = to_addr_offs(sh, percpu);
1677 int qd_idx = sh->qd_idx;
1686 BUG_ON(sh->batch_head);
1687 if (sh->ops.target < 0)
1688 target = sh->ops.target2;
1689 else if (sh->ops.target2 < 0)
1690 target = sh->ops.target;
1696 __func__, (unsigned long long)sh->sector, target);
1698 tgt = &sh->dev[target];
1703 atomic_inc(&sh->count);
1706 count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL);
1710 ops_complete_compute, sh,
1711 to_addr_conv(sh, percpu, 0));
1713 RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
1720 offs[count] = sh->dev[i].offset;
1721 blocks[count++] = sh->dev[i].page;
1725 NULL, ops_complete_compute, sh,
1726 to_addr_conv(sh, percpu, 0));
1728 RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
1735 ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
1737 int i, count, disks = sh->disks;
1738 int syndrome_disks = sh->ddf_layout ? disks : disks-2;
1739 int d0_idx = raid6_d0(sh);
1741 int target = sh->ops.target;
1742 int target2 = sh->ops.target2;
1743 struct r5dev *tgt = &sh->dev[target];
1744 struct r5dev *tgt2 = &sh->dev[target2];
1747 unsigned int *offs = to_addr_offs(sh, percpu);
1750 BUG_ON(sh->batch_head);
1752 __func__, (unsigned long long)sh->sector, target, target2);
1767 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1769 offs[slot] = sh->dev[i].offset;
1770 blocks[slot] = sh->dev[i].page;
1783 __func__, (unsigned long long)sh->sector, faila, failb);
1785 atomic_inc(&sh->count);
1792 ops_complete_compute, sh,
1793 to_addr_conv(sh, percpu, 0));
1795 RAID5_STRIPE_SIZE(sh->raid_conf),
1801 int qd_idx = sh->qd_idx;
1813 offs[count] = sh->dev[i].offset;
1814 blocks[count++] = sh->dev[i].page;
1816 dest = sh->dev[data_target].page;
1817 dest_off = sh->dev[data_target].offset;
1821 to_addr_conv(sh, percpu, 0));
1823 RAID5_STRIPE_SIZE(sh->raid_conf),
1826 count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL);
1828 ops_complete_compute, sh,
1829 to_addr_conv(sh, percpu, 0));
1831 RAID5_STRIPE_SIZE(sh->raid_conf),
1836 ops_complete_compute, sh,
1837 to_addr_conv(sh, percpu, 0));
1841 RAID5_STRIPE_SIZE(sh->raid_conf),
1847 RAID5_STRIPE_SIZE(sh->raid_conf),
1856 struct stripe_head *sh = stripe_head_ref;
1859 (unsigned long long)sh->sector);
1861 if (r5c_is_writeback(sh->raid_conf->log))
1866 r5c_release_extra_page(sh);
1870 ops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu,
1873 int disks = sh->disks;
1875 unsigned int *off_srcs = to_addr_offs(sh, percpu);
1876 int count = 0, pd_idx = sh->pd_idx, i;
1880 unsigned int off_dest = off_srcs[count] = sh->dev[pd_idx].offset;
1881 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
1883 BUG_ON(sh->batch_head);
1885 (unsigned long long)sh->sector);
1888 struct r5dev *dev = &sh->dev[i];
1904 ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
1906 RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
1912 ops_run_prexor6(struct stripe_head *sh, struct raid5_percpu *percpu,
1916 unsigned int *offs = to_addr_offs(sh, percpu);
1921 (unsigned long long)sh->sector);
1923 count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_WANT_DRAIN);
1926 ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
1928 RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
1934 ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
1936 struct r5conf *conf = sh->raid_conf;
1937 int disks = sh->disks;
1939 struct stripe_head *head_sh = sh;
1942 (unsigned long long)sh->sector);
1948 sh = head_sh;
1953 dev = &sh->dev[i];
1959 spin_lock_irq(&sh->stripe_lock);
1962 sh->overwrite_disks = 0;
1965 spin_unlock_irq(&sh->stripe_lock);
1979 dev->sector, tx, sh,
1992 sh = list_first_entry(&sh->batch_list,
1995 if (sh == head_sh)
2007 struct stripe_head *sh = stripe_head_ref;
2008 int disks = sh->disks;
2009 int pd_idx = sh->pd_idx;
2010 int qd_idx = sh->qd_idx;
2015 (unsigned long long)sh->sector);
2018 fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
2019 sync |= test_bit(R5_SyncIO, &sh->dev[i].flags);
2020 discard |= test_bit(R5_Discard, &sh->dev[i].flags);
2024 struct r5dev *dev = &sh->dev[i];
2029 if (test_bit(STRIPE_EXPAND_READY, &sh->state))
2039 if (sh->reconstruct_state == reconstruct_state_drain_run)
2040 sh->reconstruct_state = reconstruct_state_drain_result;
2041 else if (sh->reconstruct_state == reconstruct_state_prexor_drain_run)
2042 sh->reconstruct_state = reconstruct_state_prexor_drain_result;
2044 BUG_ON(sh->reconstruct_state != reconstruct_state_run);
2045 sh->reconstruct_state = reconstruct_state_result;
2048 set_bit(STRIPE_HANDLE, &sh->state);
2049 raid5_release_stripe(sh);
2053 ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
2056 int disks = sh->disks;
2060 int count, pd_idx = sh->pd_idx, i;
2066 struct stripe_head *head_sh = sh;
2070 (unsigned long long)sh->sector);
2072 for (i = 0; i < sh->disks; i++) {
2075 if (!test_bit(R5_Discard, &sh->dev[i].flags))
2078 if (i >= sh->disks) {
2079 atomic_inc(&sh->count);
2080 set_bit(R5_Discard, &sh->dev[pd_idx].flags);
2081 ops_complete_reconstruct(sh);
2087 off_srcs = to_addr_offs(sh, percpu);
2093 off_dest = off_srcs[count] = sh->dev[pd_idx].offset;
2094 xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
2096 struct r5dev *dev = &sh->dev[i];
2104 xor_dest = sh->dev[pd_idx].page;
2105 off_dest = sh->dev[pd_idx].offset;
2107 struct r5dev *dev = &sh->dev[i];
2121 list_first_entry(&sh->batch_list,
2129 to_addr_conv(sh, percpu, j));
2133 to_addr_conv(sh, percpu, j));
2138 RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
2141 RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
2144 sh = list_first_entry(&sh->batch_list, struct stripe_head,
2151 ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
2158 struct stripe_head *head_sh = sh;
2163 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
2165 for (i = 0; i < sh->disks; i++) {
2166 if (sh->pd_idx == i || sh->qd_idx == i)
2168 if (!test_bit(R5_Discard, &sh->dev[i].flags))
2171 if (i >= sh->disks) {
2172 atomic_inc(&sh->count);
2173 set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
2174 set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
2175 ops_complete_reconstruct(sh);
2181 offs = to_addr_offs(sh, percpu);
2183 if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
2191 count = set_syndrome_sources(blocks, offs, sh, synflags);
2193 list_first_entry(&sh->batch_list,
2199 head_sh, to_addr_conv(sh, percpu, j));
2202 to_addr_conv(sh, percpu, j));
2204 RAID5_STRIPE_SIZE(sh->raid_conf), &submit);
2207 sh = list_first_entry(&sh->batch_list, struct stripe_head,
2215 struct stripe_head *sh = stripe_head_ref;
2218 (unsigned long long)sh->sector);
2220 sh->check_state = check_state_check_result;
2221 set_bit(STRIPE_HANDLE, &sh->state);
2222 raid5_release_stripe(sh);
2225 static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
2227 int disks = sh->disks;
2228 int pd_idx = sh->pd_idx;
2229 int qd_idx = sh->qd_idx;
2233 unsigned int *off_srcs = to_addr_offs(sh, percpu);
2240 (unsigned long long)sh->sector);
2242 BUG_ON(sh->batch_head);
2244 xor_dest = sh->dev[pd_idx].page;
2245 off_dest = sh->dev[pd_idx].offset;
2251 off_srcs[count] = sh->dev[i].offset;
2252 xor_srcs[count++] = sh->dev[i].page;
2256 to_addr_conv(sh, percpu, 0));
2258 RAID5_STRIPE_SIZE(sh->raid_conf),
2259 &sh->ops.zero_sum_result, &submit);
2261 atomic_inc(&sh->count);
2262 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
2266 static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
2269 unsigned int *offs = to_addr_offs(sh, percpu);
2274 (unsigned long long)sh->sector, checkp);
2276 BUG_ON(sh->batch_head);
2277 count = set_syndrome_sources(srcs, offs, sh, SYNDROME_SRC_ALL);
2281 atomic_inc(&sh->count);
2283 sh, to_addr_conv(sh, percpu, 0));
2285 RAID5_STRIPE_SIZE(sh->raid_conf),
2286 &sh->ops.zero_sum_result, percpu->spare_page, 0, &submit);
2289 static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
2291 int overlap_clear = 0, i, disks = sh->disks;
2293 struct r5conf *conf = sh->raid_conf;
2300 ops_run_biofill(sh);
2306 tx = ops_run_compute5(sh, percpu);
2308 if (sh->ops.target2 < 0 || sh->ops.target < 0)
2309 tx = ops_run_compute6_1(sh, percpu);
2311 tx = ops_run_compute6_2(sh, percpu);
2320 tx = ops_run_prexor5(sh, percpu, tx);
2322 tx = ops_run_prexor6(sh, percpu, tx);
2326 tx = ops_run_partial_parity(sh, percpu, tx);
2329 tx = ops_run_biodrain(sh, tx);
2335 ops_run_reconstruct5(sh, percpu, tx);
2337 ops_run_reconstruct6(sh, percpu, tx);
2341 if (sh->check_state == check_state_run)
2342 ops_run_check_p(sh, percpu);
2343 else if (sh->check_state == check_state_run_q)
2344 ops_run_check_pq(sh, percpu, 0);
2345 else if (sh->check_state == check_state_run_pq)
2346 ops_run_check_pq(sh, percpu, 1);
2351 if (overlap_clear && !sh->batch_head) {
2353 struct r5dev *dev = &sh->dev[i];
2355 wake_up(&sh->raid_conf->wait_for_overlap);
2361 static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
2364 kfree(sh->pages);
2366 if (sh->ppl_page)
2367 __free_page(sh->ppl_page);
2368 kmem_cache_free(sc, sh);
2374 struct stripe_head *sh;
2376 sh = kmem_cache_zalloc(sc, gfp);
2377 if (sh) {
2378 spin_lock_init(&sh->stripe_lock);
2379 spin_lock_init(&sh->batch_lock);
2380 INIT_LIST_HEAD(&sh->batch_list);
2381 INIT_LIST_HEAD(&sh->lru);
2382 INIT_LIST_HEAD(&sh->r5c);
2383 INIT_LIST_HEAD(&sh->log_list);
2384 atomic_set(&sh->count, 1);
2385 sh->raid_conf = conf;
2386 sh->log_start = MaxSector;
2389 sh->ppl_page = alloc_page(gfp);
2390 if (!sh->ppl_page) {
2391 free_stripe(sc, sh);
2396 if (init_stripe_shared_pages(sh, conf, disks)) {
2397 free_stripe(sc, sh);
2402 return sh;
2406 struct stripe_head *sh;
2408 sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf);
2409 if (!sh)
2412 if (grow_buffers(sh, gfp)) {
2413 shrink_buffers(sh);
2414 free_stripe(conf->slab_cache, sh);
2417 sh->hash_lock_index =
2422 raid5_release_stripe(sh);
2708 struct stripe_head *sh;
2712 sh = get_free_stripe(conf, hash);
2714 if (!sh)
2716 BUG_ON(atomic_read(&sh->count));
2717 shrink_buffers(sh);
2718 free_stripe(conf->slab_cache, sh);
2758 struct stripe_head *sh = bi->bi_private;
2759 struct r5conf *conf = sh->raid_conf;
2760 int disks = sh->disks, i;
2765 if (bi == &sh->dev[i].req)
2769 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
2775 if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
2785 if (use_new_offset(conf, sh))
2786 s = sh->sector + rdev->new_data_offset;
2788 s = sh->sector + rdev->data_offset;
2790 set_bit(R5_UPTODATE, &sh->dev[i].flags);
2791 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
2802 clear_bit(R5_ReadError, &sh->dev[i].flags);
2803 clear_bit(R5_ReWrite, &sh->dev[i].flags);
2804 } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
2805 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
2807 if (test_bit(R5_InJournal, &sh->dev[i].flags))
2812 set_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
2820 clear_bit(R5_UPTODATE, &sh->dev[i].flags);
2823 if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
2836 } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
2857 && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
2860 if (sh->qd_idx >= 0 && sh->pd_idx == i)
2861 set_bit(R5_ReadError, &sh->dev[i].flags);
2862 else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
2863 set_bit(R5_ReadError, &sh->dev[i].flags);
2864 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
2866 set_bit(R5_ReadNoMerge, &sh->dev[i].flags);
2868 clear_bit(R5_ReadError, &sh->dev[i].flags);
2869 clear_bit(R5_ReWrite, &sh->dev[i].flags);
2873 rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0)))
2879 clear_bit(R5_LOCKED, &sh->dev[i].flags);
2880 set_bit(STRIPE_HANDLE, &sh->state);
2881 raid5_release_stripe(sh);
2886 struct stripe_head *sh = bi->bi_private;
2887 struct r5conf *conf = sh->raid_conf;
2888 int disks = sh->disks, i;
2895 if (bi == &sh->dev[i].req) {
2899 if (bi == &sh->dev[i].rreq) {
2913 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
2923 else if (is_badblock(rdev, sh->sector,
2926 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
2929 set_bit(STRIPE_DEGRADED, &sh->state);
2931 set_bit(R5_WriteError, &sh->dev[i].flags);
2935 } else if (is_badblock(rdev, sh->sector,
2938 set_bit(R5_MadeGood, &sh->dev[i].flags);
2939 if (test_bit(R5_ReadError, &sh->dev[i].flags))
2944 set_bit(R5_ReWrite, &sh->dev[i].flags);
2949 if (sh->batch_head && bi->bi_status && !replacement)
2950 set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
2953 if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
2954 clear_bit(R5_LOCKED, &sh->dev[i].flags);
2955 set_bit(STRIPE_HANDLE, &sh->state);
2957 if (sh->batch_head && sh != sh->batch_head)
2958 raid5_release_stripe(sh->batch_head);
2959 raid5_release_stripe(sh);
3002 struct stripe_head *sh)
3190 if (sh) {
3191 sh->pd_idx = pd_idx;
3192 sh->qd_idx = qd_idx;
3193 sh->ddf_layout = ddf_layout;
3202 sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
3204 struct r5conf *conf = sh->raid_conf;
3205 int raid_disks = sh->disks;
3207 sector_t new_sector = sh->sector, check;
3222 if (i == sh->pd_idx)
3230 if (i > sh->pd_idx)
3235 if (i < sh->pd_idx)
3237 i -= (sh->pd_idx + 1);
3249 if (i == sh->qd_idx)
3256 if (sh->pd_idx == raid_disks-1)
3258 else if (i > sh->pd_idx)
3263 if (sh->pd_idx == raid_disks-1)
3267 if (i < sh->pd_idx)
3269 i -= (sh->pd_idx + 2);
3279 if (sh->pd_idx == 0)
3283 if (i < sh->pd_idx)
3285 i -= (sh->pd_idx + 1);
3290 if (i > sh->pd_idx)
3295 if (i < sh->pd_idx)
3297 i -= (sh->pd_idx + 1);
3313 if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx
3314 || sh2.qd_idx != sh->qd_idx) {
3379 schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
3382 int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks;
3383 struct r5conf *conf = sh->raid_conf;
3393 r5c_release_extra_page(sh);
3396 struct r5dev *dev = &sh->dev[i];
3417 sh->reconstruct_state = reconstruct_state_drain_run;
3420 sh->reconstruct_state = reconstruct_state_run;
3425 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
3428 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
3429 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
3431 (!(test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags) ||
3432 test_bit(R5_Wantcompute, &sh->dev[qd_idx].flags))));
3435 struct r5dev *dev = &sh->dev[i];
3454 sh->reconstruct_state = reconstruct_state_prexor_drain_run;
3463 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
3464 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
3468 int qd_idx = sh->qd_idx;
3469 struct r5dev *dev = &sh->dev[qd_idx];
3476 if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page &&
3478 !test_bit(STRIPE_FULL_WRITE, &sh->state) &&
3479 test_bit(R5_Insync, &sh->dev[pd_idx].flags))
3483 __func__, (unsigned long long)sh->sector,
3487 static bool stripe_bio_overlaps(struct stripe_head *sh, struct bio *bi,
3490 struct r5conf *conf = sh->raid_conf;
3494 bi->bi_iter.bi_sector, sh->sector);
3497 if (sh->batch_head)
3501 bip = &sh->dev[dd_idx].towrite;
3503 bip = &sh->dev[dd_idx].toread;
3528 for (i = 0; i < sh->disks; i++) {
3529 if (i != sh->pd_idx &&
3530 (i == dd_idx || sh->dev[i].towrite)) {
3531 sector = sh->dev[i].sector;
3547 static void __add_stripe_bio(struct stripe_head *sh, struct bio *bi,
3550 struct r5conf *conf = sh->raid_conf;
3555 bip = &sh->dev[dd_idx].towrite;
3559 bip = &sh->dev[dd_idx].toread;
3566 clear_bit(STRIPE_BATCH_READY, &sh->state);
3577 sector_t sector = sh->dev[dd_idx].sector;
3578 for (bi=sh->dev[dd_idx].towrite;
3579 sector < sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf) &&
3581 bi = r5_next_bio(conf, bi, sh->dev[dd_idx].sector)) {
3585 if (sector >= sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf))
3586 if (!test_and_set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags))
3587 sh->overwrite_disks++;
3591 (*bip)->bi_iter.bi_sector, sh->sector, dd_idx,
3592 sh->dev[dd_idx].sector);
3607 set_bit(STRIPE_BITMAP_PENDING, &sh->state);
3608 spin_unlock_irq(&sh->stripe_lock);
3609 md_bitmap_startwrite(conf->mddev->bitmap, sh->sector,
3611 spin_lock_irq(&sh->stripe_lock);
3612 clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
3613 if (!sh->batch_head) {
3614 sh->bm_seq = conf->seq_flush+1;
3615 set_bit(STRIPE_BIT_DELAY, &sh->state);
3625 static bool add_stripe_bio(struct stripe_head *sh, struct bio *bi,
3628 spin_lock_irq(&sh->stripe_lock);
3630 if (stripe_bio_overlaps(sh, bi, dd_idx, forwrite)) {
3631 set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
3632 spin_unlock_irq(&sh->stripe_lock);
3636 __add_stripe_bio(sh, bi, dd_idx, forwrite, previous);
3637 spin_unlock_irq(&sh->stripe_lock);
3644 struct stripe_head *sh)
3656 &dd_idx, sh);
3660 handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
3664 BUG_ON(sh->batch_head);
3669 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
3682 sh->sector,
3688 spin_lock_irq(&sh->stripe_lock);
3690 bi = sh->dev[i].towrite;
3691 sh->dev[i].towrite = NULL;
3692 sh->overwrite_disks = 0;
3693 spin_unlock_irq(&sh->stripe_lock);
3697 log_stripe_write_finished(sh);
3699 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
3703 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
3704 struct bio *nextbi = r5_next_bio(conf, bi, sh->dev[i].sector);
3711 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
3715 bi = sh->dev[i].written;
3716 sh->dev[i].written = NULL;
3717 if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) {
3718 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
3719 sh->dev[i].page = sh->dev[i].orig_page;
3724 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
3725 struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector);
3735 if (!test_bit(R5_Wantfill, &sh->dev[i].flags) &&
3737 (!test_bit(R5_Insync, &sh->dev[i].flags) ||
3738 test_bit(R5_ReadError, &sh->dev[i].flags))) {
3739 spin_lock_irq(&sh->stripe_lock);
3740 bi = sh->dev[i].toread;
3741 sh->dev[i].toread = NULL;
3742 spin_unlock_irq(&sh->stripe_lock);
3743 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
3748 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
3750 r5_next_bio(conf, bi, sh->dev[i].sector);
3757 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
3762 clear_bit(R5_LOCKED, &sh->dev[i].flags);
3767 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
3773 handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
3779 BUG_ON(sh->batch_head);
3780 clear_bit(STRIPE_SYNCING, &sh->state);
3781 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
3802 && !rdev_set_badblocks(rdev, sh->sector,
3809 && !rdev_set_badblocks(rdev, sh->sector,
3821 static int want_replace(struct stripe_head *sh, int disk_idx)
3827 rdev = rcu_dereference(sh->raid_conf->disks[disk_idx].replacement);
3831 && (rdev->recovery_offset <= sh->sector
3832 || rdev->mddev->recovery_cp <= sh->sector))
3838 static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
3841 struct r5dev *dev = &sh->dev[disk_idx];
3842 struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
3843 &sh->dev[s->failed_num[1]] };
3845 bool force_rcw = (sh->raid_conf->rmw_level == PARITY_DISABLE_RMW);
3861 (s->replacing && want_replace(sh, disk_idx)))
3886 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
3907 s->failed_num[i] == sh->pd_idx ||
3908 s->failed_num[i] == sh->qd_idx) &&
3925 sh->sector < sh->raid_conf->mddev->recovery_cp)
3929 if (s->failed_num[i] != sh->pd_idx &&
3930 s->failed_num[i] != sh->qd_idx &&
3945 static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
3948 struct r5dev *dev = &sh->dev[disk_idx];
3951 if (need_this_block(sh, s, disk_idx, disks)) {
3957 BUG_ON(sh->batch_head);
3969 ((sh->qd_idx >= 0 && sh->pd_idx == disk_idx) ||
3976 (unsigned long long)sh->sector, disk_idx);
3977 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
3980 sh->ops.target = disk_idx;
3981 sh->ops.target2 = -1; /* no 2nd target */
4000 &sh->dev[other].flags))
4005 (unsigned long long)sh->sector,
4007 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
4009 set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
4010 set_bit(R5_Wantcompute, &sh->dev[other].flags);
4011 sh->ops.target = disk_idx;
4012 sh->ops.target2 = other;
4031 static void handle_stripe_fill(struct stripe_head *sh,
4041 if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
4042 !sh->reconstruct_state) {
4052 if (test_bit(STRIPE_R5C_CACHING, &sh->state))
4053 r5c_make_stripe_write_out(sh);
4058 if (fetch_block(sh, s, i, disks))
4062 set_bit(STRIPE_HANDLE, &sh->state);
4073 struct stripe_head *sh, int disks)
4078 struct stripe_head *head_sh = sh;
4082 if (sh->dev[i].written) {
4083 dev = &sh->dev[i];
4109 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
4111 !test_bit(STRIPE_DEGRADED, &sh->state),
4114 sh = list_first_entry(&sh->batch_list,
4117 if (sh != head_sh) {
4118 dev = &sh->dev[i];
4122 sh = head_sh;
4123 dev = &sh->dev[i];
4128 log_stripe_write_finished(sh);
4131 test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
4133 clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
4134 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
4135 if (sh->qd_idx >= 0) {
4136 clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
4137 clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
4140 clear_bit(STRIPE_DISCARD, &sh->state);
4147 hash = sh->hash_lock_index;
4149 remove_hash(sh);
4152 sh = list_first_entry(&sh->batch_list,
4154 if (sh != head_sh)
4157 sh = head_sh;
4159 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
4160 set_bit(STRIPE_HANDLE, &sh->state);
4164 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
4188 struct stripe_head *sh,
4203 (recovery_cp < MaxSector && sh->sector >= recovery_cp &&
4209 pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n",
4211 (unsigned long long)sh->sector);
4214 struct r5dev *dev = &sh->dev[i];
4216 i == sh->pd_idx || i == sh->qd_idx ||
4228 i != sh->pd_idx && i != sh->qd_idx &&
4240 (unsigned long long)sh->sector, sh->state, rmw, rcw);
4241 set_bit(STRIPE_HANDLE, &sh->state);
4247 (unsigned long long)sh->sector, rmw);
4249 struct r5dev *dev = &sh->dev[i];
4252 !test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) {
4267 r5c_use_extra_page(sh);
4272 set_bit(STRIPE_DELAYED, &sh->state);
4279 struct r5dev *dev = &sh->dev[i];
4281 i == sh->pd_idx || i == sh->qd_idx ||
4288 &sh->state)) {
4295 set_bit(STRIPE_DELAYED, &sh->state);
4304 struct r5dev *dev = &sh->dev[i];
4306 i != sh->pd_idx && i != sh->qd_idx &&
4313 &sh->state)) {
4321 set_bit(STRIPE_DELAYED, &sh->state);
4326 (unsigned long long)sh->sector,
4327 rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
4331 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
4332 set_bit(STRIPE_DELAYED, &sh->state);
4344 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
4346 !test_bit(STRIPE_BIT_DELAY, &sh->state)))
4347 schedule_reconstruction(sh, s, rcw == 0, 0);
4351 static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
4356 BUG_ON(sh->batch_head);
4357 set_bit(STRIPE_HANDLE, &sh->state);
4359 switch (sh->check_state) {
4364 sh->check_state = check_state_run;
4366 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
4370 dev = &sh->dev[s->failed_num[0]];
4373 sh->check_state = check_state_idle;
4375 dev = &sh->dev[sh->pd_idx];
4378 if (test_bit(STRIPE_INSYNC, &sh->state))
4389 clear_bit(STRIPE_DEGRADED, &sh->state);
4390 set_bit(STRIPE_INSYNC, &sh->state);
4395 sh->check_state = check_state_idle;
4407 if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
4411 set_bit(STRIPE_INSYNC, &sh->state);
4416 set_bit(STRIPE_INSYNC, &sh->state);
4419 (unsigned long long) sh->sector,
4420 (unsigned long long) sh->sector +
4423 sh->check_state = check_state_compute_run;
4424 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
4427 &sh->dev[sh->pd_idx].flags);
4428 sh->ops.target = sh->pd_idx;
4429 sh->ops.target2 = -1;
4438 __func__, sh->check_state,
4439 (unsigned long long) sh->sector);
4444 static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
4448 int pd_idx = sh->pd_idx;
4449 int qd_idx = sh->qd_idx;
4452 BUG_ON(sh->batch_head);
4453 set_bit(STRIPE_HANDLE, &sh->state);
4463 switch (sh->check_state) {
4471 sh->check_state = check_state_run;
4477 if (sh->check_state == check_state_run)
4478 sh->check_state = check_state_run_pq;
4480 sh->check_state = check_state_run_q;
4484 sh->ops.zero_sum_result = 0;
4486 if (sh->check_state == check_state_run) {
4488 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
4491 if (sh->check_state >= check_state_run &&
4492 sh->check_state <= check_state_run_pq) {
4504 sh->check_state = check_state_idle;
4507 if (test_bit(STRIPE_INSYNC, &sh->state))
4515 dev = &sh->dev[s->failed_num[1]];
4521 dev = &sh->dev[s->failed_num[0]];
4526 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
4527 dev = &sh->dev[pd_idx];
4532 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
4533 dev = &sh->dev[qd_idx];
4541 dev - (struct r5dev *) &sh->dev)) {
4546 clear_bit(STRIPE_DEGRADED, &sh->state);
4548 set_bit(STRIPE_INSYNC, &sh->state);
4555 sh->check_state = check_state_idle;
4561 if (sh->ops.zero_sum_result == 0) {
4564 set_bit(STRIPE_INSYNC, &sh->state);
4570 sh->check_state = check_state_compute_result;
4581 set_bit(STRIPE_INSYNC, &sh->state);
4584 (unsigned long long) sh->sector,
4585 (unsigned long long) sh->sector +
4588 int *target = &sh->ops.target;
4590 sh->ops.target = -1;
4591 sh->ops.target2 = -1;
4592 sh->check_state = check_state_compute_run;
4593 set_bit(STRIPE_COMPUTE_RUN, &sh->state);
4595 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
4597 &sh->dev[pd_idx].flags);
4599 target = &sh->ops.target2;
4602 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
4604 &sh->dev[qd_idx].flags);
4615 __func__, sh->check_state,
4616 (unsigned long long) sh->sector);
4621 static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
4629 BUG_ON(sh->batch_head);
4630 clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
4631 for (i = 0; i < sh->disks; i++)
4632 if (i != sh->pd_idx && i != sh->qd_idx) {
4637 sector_t bn = raid5_compute_blocknr(sh, i, 1);
4658 sh->dev[i].page, sh2->dev[dd_idx].offset,
4659 sh->dev[i].offset, RAID5_STRIPE_SIZE(conf),
4694 static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
4696 struct r5conf *conf = sh->raid_conf;
4697 int disks = sh->disks;
4704 s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state) && !sh->batch_head;
4705 s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state) && !sh->batch_head;
4718 dev = &sh->dev[i];
4729 !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
4758 rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
4759 !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
4773 is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
4800 else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset)
4870 if (test_bit(STRIPE_SYNCING, &sh->state)) {
4880 sh->sector >= conf->mddev->recovery_cp ||
4893 static int clear_batch_ready(struct stripe_head *sh)
4896 if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state))
4897 return (sh->batch_head && sh->batch_head != sh);
4898 spin_lock(&sh->stripe_lock);
4899 if (!sh->batch_head) {
4900 spin_unlock(&sh->stripe_lock);
4908 if (sh->batch_head != sh) {
4909 spin_unlock(&sh->stripe_lock);
4912 spin_lock(&sh->batch_lock);
4913 list_for_each_entry(tmp, &sh->batch_list, batch_list)
4915 spin_unlock(&sh->batch_lock);
4916 spin_unlock(&sh->stripe_lock);
4928 struct stripe_head *sh, *next;
4932 list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) {
4934 list_del_init(&sh->batch_list);
4936 WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
4948 "stripe state: %lx\n", sh->state);
4953 set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
4959 sh->check_state = head_sh->check_state;
4960 sh->reconstruct_state = head_sh->reconstruct_state;
4961 spin_lock_irq(&sh->stripe_lock);
4962 sh->batch_head = NULL;
4963 spin_unlock_irq(&sh->stripe_lock);
4964 for (i = 0; i < sh->disks; i++) {
4965 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
4967 sh->dev[i].flags = head_sh->dev[i].flags &
4971 sh->state & handle_flags)
4972 set_bit(STRIPE_HANDLE, &sh->state);
4973 raid5_release_stripe(sh);
4988 static void handle_stripe(struct stripe_head *sh)
4991 struct r5conf *conf = sh->raid_conf;
4994 int disks = sh->disks;
4997 clear_bit(STRIPE_HANDLE, &sh->state);
5005 if (clear_batch_ready(sh))
5008 if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) {
5011 set_bit(STRIPE_HANDLE, &sh->state);
5015 if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
5016 break_stripe_batch_list(sh, 0);
5018 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
5019 spin_lock(&sh->stripe_lock);
5024 if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) &&
5025 !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) &&
5026 !test_bit(STRIPE_DISCARD, &sh->state) &&
5027 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
5028 set_bit(STRIPE_SYNCING, &sh->state);
5029 clear_bit(STRIPE_INSYNC, &sh->state);
5030 clear_bit(STRIPE_REPLACED, &sh->state);
5032 spin_unlock(&sh->stripe_lock);
5034 clear_bit(STRIPE_DELAYED, &sh->state);
5038 (unsigned long long)sh->sector, sh->state,
5039 atomic_read(&sh->count), sh->pd_idx, sh->qd_idx,
5040 sh->check_state, sh->reconstruct_state);
5042 analyse_stripe(sh, &s);
5044 if (test_bit(STRIPE_LOG_TRAPPED, &sh->state))
5049 set_bit(STRIPE_HANDLE, &sh->state);
5056 set_bit(STRIPE_HANDLE, &sh->state);
5064 if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
5066 set_bit(STRIPE_BIOFILL_RUN, &sh->state);
5082 sh->check_state = 0;
5083 sh->reconstruct_state = 0;
5084 break_stripe_batch_list(sh, 0);
5086 handle_failed_stripe(conf, sh, &s, disks);
5088 handle_failed_sync(conf, sh, &s);
5095 if (sh->reconstruct_state == reconstruct_state_prexor_drain_result)
5097 if (sh->reconstruct_state == reconstruct_state_drain_result ||
5098 sh->reconstruct_state == reconstruct_state_prexor_drain_result) {
5099 sh->reconstruct_state = reconstruct_state_idle;
5104 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags) &&
5105 !test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags));
5106 BUG_ON(sh->qd_idx >= 0 &&
5107 !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags) &&
5108 !test_bit(R5_Discard, &sh->dev[sh->qd_idx].flags));
5110 struct r5dev *dev = &sh->dev[i];
5112 (i == sh->pd_idx || i == sh->qd_idx ||
5122 ((i == sh->pd_idx || i == sh->qd_idx) &&
5124 set_bit(STRIPE_INSYNC, &sh->state);
5127 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
5135 pdev = &sh->dev[sh->pd_idx];
5136 s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
5137 || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
5138 qdev = &sh->dev[sh->qd_idx];
5139 s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
5140 || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
5152 handle_stripe_clean_event(conf, sh, disks);
5155 r5c_handle_cached_data_endio(conf, sh, disks);
5156 log_stripe_write_finished(sh);
5167 handle_stripe_fill(sh, &s, disks);
5174 r5c_finish_stripe_write_out(conf, sh, &s);
5185 if (!sh->reconstruct_state && !sh->check_state && !sh->log_io) {
5188 handle_stripe_dirtying(conf, sh, &s, disks);
5194 ret = r5c_try_caching_write(conf, sh, &s,
5205 (!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
5207 ret = handle_stripe_dirtying(conf, sh, &s,
5220 if (sh->check_state ||
5222 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
5223 !test_bit(STRIPE_INSYNC, &sh->state))) {
5225 handle_parity_checks6(conf, sh, &s, disks);
5227 handle_parity_checks5(conf, sh, &s, disks);
5231 && !test_bit(STRIPE_COMPUTE_RUN, &sh->state)
5232 && !test_bit(STRIPE_REPLACED, &sh->state)) {
5235 if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) {
5236 WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags));
5237 set_bit(R5_WantReplace, &sh->dev[i].flags);
5238 set_bit(R5_LOCKED, &sh->dev[i].flags);
5242 set_bit(STRIPE_INSYNC, &sh->state);
5243 set_bit(STRIPE_REPLACED, &sh->state);
5246 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
5247 test_bit(STRIPE_INSYNC, &sh->state)) {
5249 clear_bit(STRIPE_SYNCING, &sh->state);
5250 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
5259 struct r5dev *dev = &sh->dev[s.failed_num[i]];
5276 if (sh->reconstruct_state == reconstruct_state_result) {
5278 = raid5_get_active_stripe(conf, NULL, sh->sector,
5282 /* sh cannot be written until sh_src has been read.
5283 * so arrange for sh to be delayed a little
5285 set_bit(STRIPE_DELAYED, &sh->state);
5286 set_bit(STRIPE_HANDLE, &sh->state);
5296 sh->reconstruct_state = reconstruct_state_idle;
5297 clear_bit(STRIPE_EXPANDING, &sh->state);
5299 set_bit(R5_Wantwrite, &sh->dev[i].flags);
5300 set_bit(R5_LOCKED, &sh->dev[i].flags);
5305 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
5306 !sh->reconstruct_state) {
5308 sh->disks = conf->raid_disks;
5309 stripe_set_idx(sh->sector, conf, 0, sh);
5310 schedule_reconstruction(sh, &s, 1, 1);
5311 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
5312 clear_bit(STRIPE_EXPAND_READY, &sh->state);
5319 !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
5320 handle_stripe_expansion(conf, sh);
5340 struct r5dev *dev = &sh->dev[i];
5344 if (!rdev_set_badblocks(rdev, sh->sector,
5351 rdev_clear_badblocks(rdev, sh->sector,
5360 rdev_clear_badblocks(rdev, sh->sector,
5367 raid_run_ops(sh, s.ops_request);
5369 ops_run_io(sh, &s);
5382 clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
5391 struct stripe_head *sh;
5392 sh = list_entry(l, struct stripe_head, lru);
5394 clear_bit(STRIPE_DELAYED, &sh->state);
5395 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
5397 list_add_tail(&sh->lru, &conf->hold_list);
5398 raid5_wakeup_stripe_thread(sh);
5411 struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
5413 list_del_init(&sh->lru);
5414 atomic_inc(&sh->count);
5415 hash = sh->hash_lock_index;
5416 __release_stripe(conf, sh, &temp_inactive_list[hash]);
5619 struct stripe_head *sh, *tmp;
5629 sh = NULL;
5655 sh = list_entry(handle_list->next, typeof(*sh), lru);
5659 else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) {
5679 sh = tmp;
5684 if (sh) {
5692 if (!sh) {
5702 sh->group = NULL;
5704 list_del_init(&sh->lru);
5705 BUG_ON(atomic_inc_return(&sh->count) != 1);
5706 return sh;
5719 struct stripe_head *sh;
5728 sh = list_first_entry(&cb->list, struct stripe_head, lru);
5729 list_del_init(&sh->lru);
5736 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
5741 hash = sh->hash_lock_index;
5742 __release_stripe(conf, sh, &cb->temp_inactive_list[hash]);
5755 struct stripe_head *sh)
5763 raid5_release_stripe(sh);
5776 if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
5777 list_add_tail(&sh->lru, &cb->list);
5779 raid5_release_stripe(sh);
5786 struct stripe_head *sh;
5816 sh = raid5_get_active_stripe(conf, NULL, logical_sector, 0);
5819 set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
5820 if (test_bit(STRIPE_SYNCING, &sh->state)) {
5821 raid5_release_stripe(sh);
5825 clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
5826 spin_lock_irq(&sh->stripe_lock);
5828 if (d == sh->pd_idx || d == sh->qd_idx)
5830 if (sh->dev[d].towrite || sh->dev[d].toread) {
5831 set_bit(R5_Overlap, &sh->dev[d].flags);
5832 spin_unlock_irq(&sh->stripe_lock);
5833 raid5_release_stripe(sh);
5838 set_bit(STRIPE_DISCARD, &sh->state);
5840 sh->overwrite_disks = 0;
5842 if (d == sh->pd_idx || d == sh->qd_idx)
5844 sh->dev[d].towrite = bi;
5845 set_bit(R5_OVERWRITE, &sh->dev[d].flags);
5848 sh->overwrite_disks++;
5850 spin_unlock_irq(&sh->stripe_lock);
5856 sh->sector,
5859 sh->bm_seq = conf->seq_flush + 1;
5860 set_bit(STRIPE_BIT_DELAY, &sh->state);
5863 set_bit(STRIPE_HANDLE, &sh->state);
5864 clear_bit(STRIPE_DELAYED, &sh->state);
5865 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
5867 release_stripe_plug(mddev, sh);
5888 struct stripe_head *sh)
5894 for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
5895 if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
5898 min_sector = min(min_sector, sh->dev[dd_idx].sector);
5899 max_sector = max(max_sector, sh->dev[dd_idx].sector);
5915 struct stripe_request_ctx *ctx, struct stripe_head *sh,
5921 spin_lock_irq(&sh->stripe_lock);
5923 for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
5924 struct r5dev *dev = &sh->dev[dd_idx];
5926 if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
5933 if (stripe_bio_overlaps(sh, bi, dd_idx, forwrite)) {
5943 for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
5944 struct r5dev *dev = &sh->dev[dd_idx];
5946 if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
5953 __add_stripe_bio(sh, bi, dd_idx, forwrite, previous);
5959 spin_unlock_irq(&sh->stripe_lock);
5982 struct stripe_head *sh;
6023 sh = raid5_get_active_stripe(conf, ctx, new_sector, flags);
6024 if (unlikely(!sh)) {
6031 stripe_ahead_of_reshape(mddev, conf, sh)) {
6036 * 'sh', we know that if that happens,
6050 if (test_bit(STRIPE_EXPANDING, &sh->state) ||
6051 !add_all_stripe_bios(conf, ctx, sh, bi, rw, previous)) {
6061 if (stripe_can_batch(sh)) {
6062 stripe_add_to_batch_list(conf, sh, ctx->batch_last);
6065 atomic_inc(&sh->count);
6066 ctx->batch_last = sh;
6070 set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
6075 set_bit(STRIPE_HANDLE, &sh->state);
6076 clear_bit(STRIPE_DELAYED, &sh->state);
6077 if ((!sh->batch_head || sh == sh->batch_head) &&
6079 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
6082 release_stripe_plug(mddev, sh);
6086 raid5_release_stripe(sh);
6109 struct stripe_head sh;
6115 sector = raid5_compute_sector(conf, r_sector, 0, &dd_idx, &sh);
6124 while (dd_idx == sh.pd_idx || dd_idx == sh.qd_idx)
6273 struct stripe_head *sh;
6424 sh = raid5_get_active_stripe(conf, NULL, stripe_addr+i,
6426 set_bit(STRIPE_EXPANDING, &sh->state);
6431 for (j=sh->disks; j--;) {
6433 if (j == sh->pd_idx)
6436 j == sh->qd_idx)
6438 s = raid5_compute_blocknr(sh, j, 0);
6443 memset(page_address(sh->dev[j].page), 0, RAID5_STRIPE_SIZE(conf));
6444 set_bit(R5_Expanded, &sh->dev[j].flags);
6445 set_bit(R5_UPTODATE, &sh->dev[j].flags);
6448 set_bit(STRIPE_EXPAND_READY, &sh->state);
6449 set_bit(STRIPE_HANDLE, &sh->state);
6451 list_add(&sh->lru, &stripes);
6474 sh = raid5_get_active_stripe(conf, NULL, first_sector,
6476 set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
6477 set_bit(STRIPE_HANDLE, &sh->state);
6478 raid5_release_stripe(sh);
6485 sh = list_entry(stripes.next, struct stripe_head, lru);
6486 list_del_init(&sh->lru);
6487 raid5_release_stripe(sh);
6536 struct stripe_head *sh;
6595 sh = raid5_get_active_stripe(conf, NULL, sector_nr,
6597 if (sh == NULL) {
6598 sh = raid5_get_active_stripe(conf, NULL, sector_nr, 0);
6619 set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
6620 set_bit(STRIPE_HANDLE, &sh->state);
6622 raid5_release_stripe(sh);
6640 struct stripe_head *sh;
6661 sh = raid5_get_active_stripe(conf, NULL, sector,
6663 if (!sh) {
6670 if (!add_stripe_bio(sh, raid_bio, dd_idx, 0, 0)) {
6671 raid5_release_stripe(sh);
6677 set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags);
6678 handle_stripe(sh);
6679 raid5_release_stripe(sh);
6695 struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
6700 (sh = __get_priority_stripe(conf, group)) != NULL)
6701 batch[batch_size++] = sh;