1// SPDX-License-Identifier: GPL-2.0+ 2/* 3 * linux/fs/jbd2/recovery.c 4 * 5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 6 * 7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 8 * 9 * Journal recovery routines for the generic filesystem journaling code; 10 * part of the ext2fs journaling system. 11 */ 12 13#ifndef __KERNEL__ 14#include "jfs_user.h" 15#else 16#include <linux/time.h> 17#include <linux/fs.h> 18#include <linux/jbd2.h> 19#include <linux/errno.h> 20#include <linux/crc32.h> 21#include <linux/blkdev.h> 22#endif 23 24/* 25 * Maintain information about the progress of the recovery job, so that 26 * the different passes can carry information between them. 27 */ 28struct recovery_info 29{ 30 tid_t start_transaction; 31 tid_t end_transaction; 32 33 int nr_replays; 34 int nr_revokes; 35 int nr_revoke_hits; 36}; 37 38static int do_one_pass(journal_t *journal, 39 struct recovery_info *info, enum passtype pass); 40static int scan_revoke_records(journal_t *, struct buffer_head *, 41 tid_t, struct recovery_info *); 42 43#ifdef __KERNEL__ 44 45/* Release readahead buffers after use */ 46static void journal_brelse_array(struct buffer_head *b[], int n) 47{ 48 while (--n >= 0) 49 brelse (b[n]); 50} 51 52 53/* 54 * When reading from the journal, we are going through the block device 55 * layer directly and so there is no readahead being done for us. We 56 * need to implement any readahead ourselves if we want it to happen at 57 * all. Recovery is basically one long sequential read, so make sure we 58 * do the IO in reasonably large chunks. 59 * 60 * This is not so critical that we need to be enormously clever about 61 * the readahead size, though. 128K is a purely arbitrary, good-enough 62 * fixed value. 63 */ 64 65#define MAXBUF 8 66static int do_readahead(journal_t *journal, unsigned int start) 67{ 68 int err; 69 unsigned int max, nbufs, next; 70 unsigned long long blocknr; 71 struct buffer_head *bh; 72 73 struct buffer_head * bufs[MAXBUF]; 74 75 /* Do up to 128K of readahead */ 76 max = start + (128 * 1024 / journal->j_blocksize); 77 if (max > journal->j_total_len) 78 max = journal->j_total_len; 79 80 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 81 * a time to the block device IO layer. */ 82 83 nbufs = 0; 84 85 for (next = start; next < max; next++) { 86 err = jbd2_journal_bmap(journal, next, &blocknr); 87 88 if (err) { 89 printk(KERN_ERR "JBD2: bad block at offset %u\n", 90 next); 91 goto failed; 92 } 93 94 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 95 if (!bh) { 96 err = -ENOMEM; 97 goto failed; 98 } 99 100 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 101 bufs[nbufs++] = bh; 102 if (nbufs == MAXBUF) { 103 ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); 104 journal_brelse_array(bufs, nbufs); 105 nbufs = 0; 106 } 107 } else 108 brelse(bh); 109 } 110 111 if (nbufs) 112 ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); 113 err = 0; 114 115failed: 116 if (nbufs) 117 journal_brelse_array(bufs, nbufs); 118 return err; 119} 120 121#endif /* __KERNEL__ */ 122 123 124/* 125 * Read a block from the journal 126 */ 127 128static int jread(struct buffer_head **bhp, journal_t *journal, 129 unsigned int offset) 130{ 131 int err; 132 unsigned long long blocknr; 133 struct buffer_head *bh; 134 135 *bhp = NULL; 136 137 if (offset >= journal->j_total_len) { 138 printk(KERN_ERR "JBD2: corrupted journal superblock\n"); 139 return -EFSCORRUPTED; 140 } 141 142 err = jbd2_journal_bmap(journal, offset, &blocknr); 143 144 if (err) { 145 printk(KERN_ERR "JBD2: bad block at offset %u\n", 146 offset); 147 return err; 148 } 149 150 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 151 if (!bh) 152 return -ENOMEM; 153 154 if (!buffer_uptodate(bh)) { 155 /* If this is a brand new buffer, start readahead. 156 Otherwise, we assume we are already reading it. */ 157 if (!buffer_req(bh)) 158 do_readahead(journal, offset); 159 wait_on_buffer(bh); 160 } 161 162 if (!buffer_uptodate(bh)) { 163 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", 164 offset); 165 brelse(bh); 166 return -EIO; 167 } 168 169 *bhp = bh; 170 return 0; 171} 172 173static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) 174{ 175 struct jbd2_journal_block_tail *tail; 176 __be32 provided; 177 __u32 calculated; 178 179 if (!jbd2_journal_has_csum_v2or3(j)) 180 return 1; 181 182 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - 183 sizeof(struct jbd2_journal_block_tail)); 184 provided = tail->t_checksum; 185 tail->t_checksum = 0; 186 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 187 tail->t_checksum = provided; 188 189 return provided == cpu_to_be32(calculated); 190} 191 192/* 193 * Count the number of in-use tags in a journal descriptor block. 194 */ 195 196static int count_tags(journal_t *journal, struct buffer_head *bh) 197{ 198 char * tagp; 199 journal_block_tag_t * tag; 200 int nr = 0, size = journal->j_blocksize; 201 int tag_bytes = journal_tag_bytes(journal); 202 203 if (jbd2_journal_has_csum_v2or3(journal)) 204 size -= sizeof(struct jbd2_journal_block_tail); 205 206 tagp = &bh->b_data[sizeof(journal_header_t)]; 207 208 while ((tagp - bh->b_data + tag_bytes) <= size) { 209 tag = (journal_block_tag_t *) tagp; 210 211 nr++; 212 tagp += tag_bytes; 213 if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 214 tagp += 16; 215 216 if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 217 break; 218 } 219 220 return nr; 221} 222 223 224/* Make sure we wrap around the log correctly! */ 225#define wrap(journal, var) \ 226do { \ 227 unsigned long _wrap_last = \ 228 jbd2_has_feature_fast_commit(journal) ? \ 229 (journal)->j_fc_last : (journal)->j_last; \ 230 \ 231 if (var >= _wrap_last) \ 232 var -= (_wrap_last - (journal)->j_first); \ 233} while (0) 234 235static int fc_do_one_pass(journal_t *journal, 236 struct recovery_info *info, enum passtype pass) 237{ 238 unsigned int expected_commit_id = info->end_transaction; 239 unsigned long next_fc_block; 240 struct buffer_head *bh; 241 int err = 0; 242 243 next_fc_block = journal->j_fc_first; 244 if (!journal->j_fc_replay_callback) 245 return 0; 246 247 while (next_fc_block <= journal->j_fc_last) { 248 jbd_debug(3, "Fast commit replay: next block %ld\n", 249 next_fc_block); 250 err = jread(&bh, journal, next_fc_block); 251 if (err) { 252 jbd_debug(3, "Fast commit replay: read error\n"); 253 break; 254 } 255 256 err = journal->j_fc_replay_callback(journal, bh, pass, 257 next_fc_block - journal->j_fc_first, 258 expected_commit_id); 259 brelse(bh); 260 next_fc_block++; 261 if (err < 0 || err == JBD2_FC_REPLAY_STOP) 262 break; 263 err = 0; 264 } 265 266 if (err) 267 jbd_debug(3, "Fast commit replay failed, err = %d\n", err); 268 269 return err; 270} 271 272/** 273 * jbd2_journal_recover - recovers a on-disk journal 274 * @journal: the journal to recover 275 * 276 * The primary function for recovering the log contents when mounting a 277 * journaled device. 278 * 279 * Recovery is done in three passes. In the first pass, we look for the 280 * end of the log. In the second, we assemble the list of revoke 281 * blocks. In the third and final pass, we replay any un-revoked blocks 282 * in the log. 283 */ 284int jbd2_journal_recover(journal_t *journal) 285{ 286 int err, err2; 287 journal_superblock_t * sb; 288 289 struct recovery_info info; 290 errseq_t wb_err; 291 struct address_space *mapping; 292 293 memset(&info, 0, sizeof(info)); 294 sb = journal->j_superblock; 295 296 /* 297 * The journal superblock's s_start field (the current log head) 298 * is always zero if, and only if, the journal was cleanly 299 * unmounted. 300 */ 301 302 if (!sb->s_start) { 303 jbd_debug(1, "No recovery required, last transaction %d\n", 304 be32_to_cpu(sb->s_sequence)); 305 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 306 return 0; 307 } 308 309 wb_err = 0; 310 mapping = journal->j_fs_dev->bd_inode->i_mapping; 311 errseq_check_and_advance(&mapping->wb_err, &wb_err); 312 err = do_one_pass(journal, &info, PASS_SCAN); 313 if (!err) 314 err = do_one_pass(journal, &info, PASS_REVOKE); 315 if (!err) 316 err = do_one_pass(journal, &info, PASS_REPLAY); 317 318 jbd_debug(1, "JBD2: recovery, exit status %d, " 319 "recovered transactions %u to %u\n", 320 err, info.start_transaction, info.end_transaction); 321 jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", 322 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 323 324 /* Restart the log at the next transaction ID, thus invalidating 325 * any existing commit records in the log. */ 326 journal->j_transaction_sequence = ++info.end_transaction; 327 328 jbd2_journal_clear_revoke(journal); 329 err2 = sync_blockdev(journal->j_fs_dev); 330 if (!err) 331 err = err2; 332 err2 = errseq_check_and_advance(&mapping->wb_err, &wb_err); 333 if (!err) 334 err = err2; 335 /* Make sure all replayed data is on permanent storage */ 336 if (journal->j_flags & JBD2_BARRIER) { 337 err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL); 338 if (!err) 339 err = err2; 340 } 341 return err; 342} 343 344/** 345 * jbd2_journal_skip_recovery - Start journal and wipe exiting records 346 * @journal: journal to startup 347 * 348 * Locate any valid recovery information from the journal and set up the 349 * journal structures in memory to ignore it (presumably because the 350 * caller has evidence that it is out of date). 351 * This function doesn't appear to be exported.. 352 * 353 * We perform one pass over the journal to allow us to tell the user how 354 * much recovery information is being erased, and to let us initialise 355 * the journal transaction sequence numbers to the next unused ID. 356 */ 357int jbd2_journal_skip_recovery(journal_t *journal) 358{ 359 int err; 360 361 struct recovery_info info; 362 363 memset (&info, 0, sizeof(info)); 364 365 err = do_one_pass(journal, &info, PASS_SCAN); 366 367 if (err) { 368 printk(KERN_ERR "JBD2: error %d scanning journal\n", err); 369 ++journal->j_transaction_sequence; 370 } else { 371#ifdef CONFIG_JBD2_DEBUG 372 int dropped = info.end_transaction - 373 be32_to_cpu(journal->j_superblock->s_sequence); 374 jbd_debug(1, 375 "JBD2: ignoring %d transaction%s from the journal.\n", 376 dropped, (dropped == 1) ? "" : "s"); 377#endif 378 journal->j_transaction_sequence = ++info.end_transaction; 379 } 380 381 journal->j_tail = 0; 382 return err; 383} 384 385static inline unsigned long long read_tag_block(journal_t *journal, 386 journal_block_tag_t *tag) 387{ 388 unsigned long long block = be32_to_cpu(tag->t_blocknr); 389 if (jbd2_has_feature_64bit(journal)) 390 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 391 return block; 392} 393 394/* 395 * calc_chksums calculates the checksums for the blocks described in the 396 * descriptor block. 397 */ 398static int calc_chksums(journal_t *journal, struct buffer_head *bh, 399 unsigned long *next_log_block, __u32 *crc32_sum) 400{ 401 int i, num_blks, err; 402 unsigned long io_block; 403 struct buffer_head *obh; 404 405 num_blks = count_tags(journal, bh); 406 /* Calculate checksum of the descriptor block. */ 407 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 408 409 for (i = 0; i < num_blks; i++) { 410 io_block = (*next_log_block)++; 411 wrap(journal, *next_log_block); 412 err = jread(&obh, journal, io_block); 413 if (err) { 414 printk(KERN_ERR "JBD2: IO error %d recovering block " 415 "%lu in log\n", err, io_block); 416 return 1; 417 } else { 418 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 419 obh->b_size); 420 } 421 put_bh(obh); 422 } 423 return 0; 424} 425 426static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 427{ 428 struct commit_header *h; 429 __be32 provided; 430 __u32 calculated; 431 432 if (!jbd2_journal_has_csum_v2or3(j)) 433 return 1; 434 435 h = buf; 436 provided = h->h_chksum[0]; 437 h->h_chksum[0] = 0; 438 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 439 h->h_chksum[0] = provided; 440 441 return provided == cpu_to_be32(calculated); 442} 443 444static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 445 void *buf, __u32 sequence) 446{ 447 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; 448 __u32 csum32; 449 __be32 seq; 450 451 if (!jbd2_journal_has_csum_v2or3(j)) 452 return 1; 453 454 seq = cpu_to_be32(sequence); 455 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 456 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 457 458 if (jbd2_has_feature_csum3(j)) 459 return tag3->t_checksum == cpu_to_be32(csum32); 460 else 461 return tag->t_checksum == cpu_to_be16(csum32); 462} 463 464static int do_one_pass(journal_t *journal, 465 struct recovery_info *info, enum passtype pass) 466{ 467 unsigned int first_commit_ID, next_commit_ID; 468 unsigned long next_log_block; 469 int err, success = 0; 470 journal_superblock_t * sb; 471 journal_header_t * tmp; 472 struct buffer_head * bh; 473 unsigned int sequence; 474 int blocktype; 475 int tag_bytes = journal_tag_bytes(journal); 476 __u32 crc32_sum = ~0; /* Transactional Checksums */ 477 int descr_csum_size = 0; 478 int block_error = 0; 479 bool need_check_commit_time = false; 480 __u64 last_trans_commit_time = 0, commit_time; 481 482 /* 483 * First thing is to establish what we expect to find in the log 484 * (in terms of transaction IDs), and where (in terms of log 485 * block offsets): query the superblock. 486 */ 487 488 sb = journal->j_superblock; 489 next_commit_ID = be32_to_cpu(sb->s_sequence); 490 next_log_block = be32_to_cpu(sb->s_start); 491 492 first_commit_ID = next_commit_ID; 493 if (pass == PASS_SCAN) 494 info->start_transaction = first_commit_ID; 495 496 jbd_debug(1, "Starting recovery pass %d\n", pass); 497 498 /* 499 * Now we walk through the log, transaction by transaction, 500 * making sure that each transaction has a commit block in the 501 * expected place. Each complete transaction gets replayed back 502 * into the main filesystem. 503 */ 504 505 while (1) { 506 int flags; 507 char * tagp; 508 journal_block_tag_t * tag; 509 struct buffer_head * obh; 510 struct buffer_head * nbh; 511 512 cond_resched(); 513 514 /* If we already know where to stop the log traversal, 515 * check right now that we haven't gone past the end of 516 * the log. */ 517 518 if (pass != PASS_SCAN) 519 if (tid_geq(next_commit_ID, info->end_transaction)) 520 break; 521 522 jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 523 next_commit_ID, next_log_block, 524 jbd2_has_feature_fast_commit(journal) ? 525 journal->j_fc_last : journal->j_last); 526 527 /* Skip over each chunk of the transaction looking 528 * either the next descriptor block or the final commit 529 * record. */ 530 531 jbd_debug(3, "JBD2: checking block %ld\n", next_log_block); 532 err = jread(&bh, journal, next_log_block); 533 if (err) 534 goto failed; 535 536 next_log_block++; 537 wrap(journal, next_log_block); 538 539 /* What kind of buffer is it? 540 * 541 * If it is a descriptor block, check that it has the 542 * expected sequence number. Otherwise, we're all done 543 * here. */ 544 545 tmp = (journal_header_t *)bh->b_data; 546 547 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { 548 brelse(bh); 549 break; 550 } 551 552 blocktype = be32_to_cpu(tmp->h_blocktype); 553 sequence = be32_to_cpu(tmp->h_sequence); 554 jbd_debug(3, "Found magic %d, sequence %d\n", 555 blocktype, sequence); 556 557 if (sequence != next_commit_ID) { 558 brelse(bh); 559 break; 560 } 561 562 /* OK, we have a valid descriptor block which matches 563 * all of the sequence number checks. What are we going 564 * to do with it? That depends on the pass... */ 565 566 switch(blocktype) { 567 case JBD2_DESCRIPTOR_BLOCK: 568 /* Verify checksum first */ 569 if (jbd2_journal_has_csum_v2or3(journal)) 570 descr_csum_size = 571 sizeof(struct jbd2_journal_block_tail); 572 if (descr_csum_size > 0 && 573 !jbd2_descriptor_block_csum_verify(journal, 574 bh->b_data)) { 575 /* 576 * PASS_SCAN can see stale blocks due to lazy 577 * journal init. Don't error out on those yet. 578 */ 579 if (pass != PASS_SCAN) { 580 pr_err("JBD2: Invalid checksum recovering block %lu in log\n", 581 next_log_block); 582 err = -EFSBADCRC; 583 brelse(bh); 584 goto failed; 585 } 586 need_check_commit_time = true; 587 jbd_debug(1, 588 "invalid descriptor block found in %lu\n", 589 next_log_block); 590 } 591 592 /* If it is a valid descriptor block, replay it 593 * in pass REPLAY; if journal_checksums enabled, then 594 * calculate checksums in PASS_SCAN, otherwise, 595 * just skip over the blocks it describes. */ 596 if (pass != PASS_REPLAY) { 597 if (pass == PASS_SCAN && 598 jbd2_has_feature_checksum(journal) && 599 !need_check_commit_time && 600 !info->end_transaction) { 601 if (calc_chksums(journal, bh, 602 &next_log_block, 603 &crc32_sum)) { 604 put_bh(bh); 605 break; 606 } 607 put_bh(bh); 608 continue; 609 } 610 next_log_block += count_tags(journal, bh); 611 wrap(journal, next_log_block); 612 put_bh(bh); 613 continue; 614 } 615 616 /* A descriptor block: we can now write all of 617 * the data blocks. Yay, useful work is finally 618 * getting done here! */ 619 620 tagp = &bh->b_data[sizeof(journal_header_t)]; 621 while ((tagp - bh->b_data + tag_bytes) 622 <= journal->j_blocksize - descr_csum_size) { 623 unsigned long io_block; 624 625 tag = (journal_block_tag_t *) tagp; 626 flags = be16_to_cpu(tag->t_flags); 627 628 io_block = next_log_block++; 629 wrap(journal, next_log_block); 630 err = jread(&obh, journal, io_block); 631 if (err) { 632 /* Recover what we can, but 633 * report failure at the end. */ 634 success = err; 635 printk(KERN_ERR 636 "JBD2: IO error %d recovering " 637 "block %ld in log\n", 638 err, io_block); 639 } else { 640 unsigned long long blocknr; 641 642 J_ASSERT(obh != NULL); 643 blocknr = read_tag_block(journal, 644 tag); 645 646 /* If the block has been 647 * revoked, then we're all done 648 * here. */ 649 if (jbd2_journal_test_revoke 650 (journal, blocknr, 651 next_commit_ID)) { 652 brelse(obh); 653 ++info->nr_revoke_hits; 654 goto skip_write; 655 } 656 657 /* Look for block corruption */ 658 if (!jbd2_block_tag_csum_verify( 659 journal, tag, obh->b_data, 660 be32_to_cpu(tmp->h_sequence))) { 661 brelse(obh); 662 success = -EFSBADCRC; 663 printk(KERN_ERR "JBD2: Invalid " 664 "checksum recovering " 665 "data block %llu in " 666 "log\n", blocknr); 667 block_error = 1; 668 goto skip_write; 669 } 670 671 /* Find a buffer for the new 672 * data being restored */ 673 nbh = __getblk(journal->j_fs_dev, 674 blocknr, 675 journal->j_blocksize); 676 if (nbh == NULL) { 677 printk(KERN_ERR 678 "JBD2: Out of memory " 679 "during recovery.\n"); 680 err = -ENOMEM; 681 brelse(bh); 682 brelse(obh); 683 goto failed; 684 } 685 686 lock_buffer(nbh); 687 memcpy(nbh->b_data, obh->b_data, 688 journal->j_blocksize); 689 if (flags & JBD2_FLAG_ESCAPE) { 690 *((__be32 *)nbh->b_data) = 691 cpu_to_be32(JBD2_MAGIC_NUMBER); 692 } 693 694 BUFFER_TRACE(nbh, "marking dirty"); 695 set_buffer_uptodate(nbh); 696 mark_buffer_dirty(nbh); 697 BUFFER_TRACE(nbh, "marking uptodate"); 698 ++info->nr_replays; 699 /* ll_rw_block(WRITE, 1, &nbh); */ 700 unlock_buffer(nbh); 701 brelse(obh); 702 brelse(nbh); 703 } 704 705 skip_write: 706 tagp += tag_bytes; 707 if (!(flags & JBD2_FLAG_SAME_UUID)) 708 tagp += 16; 709 710 if (flags & JBD2_FLAG_LAST_TAG) 711 break; 712 } 713 714 brelse(bh); 715 continue; 716 717 case JBD2_COMMIT_BLOCK: 718 /* How to differentiate between interrupted commit 719 * and journal corruption ? 720 * 721 * {nth transaction} 722 * Checksum Verification Failed 723 * | 724 * ____________________ 725 * | | 726 * async_commit sync_commit 727 * | | 728 * | GO TO NEXT "Journal Corruption" 729 * | TRANSACTION 730 * | 731 * {(n+1)th transanction} 732 * | 733 * _______|______________ 734 * | | 735 * Commit block found Commit block not found 736 * | | 737 * "Journal Corruption" | 738 * _____________|_________ 739 * | | 740 * nth trans corrupt OR nth trans 741 * and (n+1)th interrupted interrupted 742 * before commit block 743 * could reach the disk. 744 * (Cannot find the difference in above 745 * mentioned conditions. Hence assume 746 * "Interrupted Commit".) 747 */ 748 commit_time = be64_to_cpu( 749 ((struct commit_header *)bh->b_data)->h_commit_sec); 750 /* 751 * If need_check_commit_time is set, it means we are in 752 * PASS_SCAN and csum verify failed before. If 753 * commit_time is increasing, it's the same journal, 754 * otherwise it is stale journal block, just end this 755 * recovery. 756 */ 757 if (need_check_commit_time) { 758 if (commit_time >= last_trans_commit_time) { 759 pr_err("JBD2: Invalid checksum found in transaction %u\n", 760 next_commit_ID); 761 err = -EFSBADCRC; 762 brelse(bh); 763 goto failed; 764 } 765 ignore_crc_mismatch: 766 /* 767 * It likely does not belong to same journal, 768 * just end this recovery with success. 769 */ 770 jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n", 771 next_commit_ID); 772 err = 0; 773 brelse(bh); 774 goto done; 775 } 776 777 /* 778 * Found an expected commit block: if checksums 779 * are present, verify them in PASS_SCAN; else not 780 * much to do other than move on to the next sequence 781 * number. 782 */ 783 if (pass == PASS_SCAN && 784 jbd2_has_feature_checksum(journal)) { 785 struct commit_header *cbh = 786 (struct commit_header *)bh->b_data; 787 unsigned found_chksum = 788 be32_to_cpu(cbh->h_chksum[0]); 789 790 if (info->end_transaction) { 791 journal->j_failed_commit = 792 info->end_transaction; 793 brelse(bh); 794 break; 795 } 796 797 /* Neither checksum match nor unused? */ 798 if (!((crc32_sum == found_chksum && 799 cbh->h_chksum_type == 800 JBD2_CRC32_CHKSUM && 801 cbh->h_chksum_size == 802 JBD2_CRC32_CHKSUM_SIZE) || 803 (cbh->h_chksum_type == 0 && 804 cbh->h_chksum_size == 0 && 805 found_chksum == 0))) 806 goto chksum_error; 807 808 crc32_sum = ~0; 809 } 810 if (pass == PASS_SCAN && 811 !jbd2_commit_block_csum_verify(journal, 812 bh->b_data)) { 813 chksum_error: 814 if (commit_time < last_trans_commit_time) 815 goto ignore_crc_mismatch; 816 info->end_transaction = next_commit_ID; 817 818 if (!jbd2_has_feature_async_commit(journal)) { 819 journal->j_failed_commit = 820 next_commit_ID; 821 brelse(bh); 822 break; 823 } 824 } 825 if (pass == PASS_SCAN) 826 last_trans_commit_time = commit_time; 827 brelse(bh); 828 next_commit_ID++; 829 continue; 830 831 case JBD2_REVOKE_BLOCK: 832 /* 833 * Check revoke block crc in pass_scan, if csum verify 834 * failed, check commit block time later. 835 */ 836 if (pass == PASS_SCAN && 837 !jbd2_descriptor_block_csum_verify(journal, 838 bh->b_data)) { 839 jbd_debug(1, "JBD2: invalid revoke block found in %lu\n", 840 next_log_block); 841 need_check_commit_time = true; 842 } 843 /* If we aren't in the REVOKE pass, then we can 844 * just skip over this block. */ 845 if (pass != PASS_REVOKE) { 846 brelse(bh); 847 continue; 848 } 849 850 err = scan_revoke_records(journal, bh, 851 next_commit_ID, info); 852 brelse(bh); 853 if (err) 854 goto failed; 855 continue; 856 857 default: 858 jbd_debug(3, "Unrecognised magic %d, end of scan.\n", 859 blocktype); 860 brelse(bh); 861 goto done; 862 } 863 } 864 865 done: 866 /* 867 * We broke out of the log scan loop: either we came to the 868 * known end of the log or we found an unexpected block in the 869 * log. If the latter happened, then we know that the "current" 870 * transaction marks the end of the valid log. 871 */ 872 873 if (pass == PASS_SCAN) { 874 if (!info->end_transaction) 875 info->end_transaction = next_commit_ID; 876 } else { 877 /* It's really bad news if different passes end up at 878 * different places (but possible due to IO errors). */ 879 if (info->end_transaction != next_commit_ID) { 880 printk(KERN_ERR "JBD2: recovery pass %d ended at " 881 "transaction %u, expected %u\n", 882 pass, next_commit_ID, info->end_transaction); 883 if (!success) 884 success = -EIO; 885 } 886 } 887 888 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { 889 err = fc_do_one_pass(journal, info, pass); 890 if (err) 891 success = err; 892 } 893 894 if (block_error && success == 0) 895 success = -EIO; 896 return success; 897 898 failed: 899 return err; 900} 901 902/* Scan a revoke record, marking all blocks mentioned as revoked. */ 903 904static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 905 tid_t sequence, struct recovery_info *info) 906{ 907 jbd2_journal_revoke_header_t *header; 908 int offset, max; 909 int csum_size = 0; 910 __u32 rcount; 911 int record_len = 4; 912 913 header = (jbd2_journal_revoke_header_t *) bh->b_data; 914 offset = sizeof(jbd2_journal_revoke_header_t); 915 rcount = be32_to_cpu(header->r_count); 916 917 if (jbd2_journal_has_csum_v2or3(journal)) 918 csum_size = sizeof(struct jbd2_journal_block_tail); 919 if (rcount > journal->j_blocksize - csum_size) 920 return -EINVAL; 921 max = rcount; 922 923 if (jbd2_has_feature_64bit(journal)) 924 record_len = 8; 925 926 while (offset + record_len <= max) { 927 unsigned long long blocknr; 928 int err; 929 930 if (record_len == 4) 931 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 932 else 933 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 934 offset += record_len; 935 err = jbd2_journal_set_revoke(journal, blocknr, sequence); 936 if (err) 937 return err; 938 ++info->nr_revokes; 939 } 940 return 0; 941} 942