1// SPDX-License-Identifier: GPL-2.0 2#include <linux/fs.h> 3#include <linux/random.h> 4#include <linux/buffer_head.h> 5#include <linux/utsname.h> 6#include <linux/kthread.h> 7 8#include "ext4.h" 9 10/* Checksumming functions */ 11static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) 12{ 13 struct ext4_sb_info *sbi = EXT4_SB(sb); 14 int offset = offsetof(struct mmp_struct, mmp_checksum); 15 __u32 csum; 16 17 csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset); 18 19 return cpu_to_le32(csum); 20} 21 22static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) 23{ 24 if (!ext4_has_metadata_csum(sb)) 25 return 1; 26 27 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); 28} 29 30static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) 31{ 32 if (!ext4_has_metadata_csum(sb)) 33 return; 34 35 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); 36} 37 38/* 39 * Write the MMP block using REQ_SYNC to try to get the block on-disk 40 * faster. 41 */ 42static int write_mmp_block_thawed(struct super_block *sb, 43 struct buffer_head *bh) 44{ 45 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); 46 47 ext4_mmp_csum_set(sb, mmp); 48 lock_buffer(bh); 49 bh->b_end_io = end_buffer_write_sync; 50 get_bh(bh); 51 submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh); 52 wait_on_buffer(bh); 53 if (unlikely(!buffer_uptodate(bh))) 54 return -EIO; 55 return 0; 56} 57 58static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) 59{ 60 int err; 61 62 /* 63 * We protect against freezing so that we don't create dirty buffers 64 * on frozen filesystem. 65 */ 66 sb_start_write(sb); 67 err = write_mmp_block_thawed(sb, bh); 68 sb_end_write(sb); 69 return err; 70} 71 72/* 73 * Read the MMP block. It _must_ be read from disk and hence we clear the 74 * uptodate flag on the buffer. 75 */ 76static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, 77 ext4_fsblk_t mmp_block) 78{ 79 struct mmp_struct *mmp; 80 int ret; 81 82 if (*bh) 83 clear_buffer_uptodate(*bh); 84 85 /* This would be sb_bread(sb, mmp_block), except we need to be sure 86 * that the MD RAID device cache has been bypassed, and that the read 87 * is not blocked in the elevator. */ 88 if (!*bh) { 89 *bh = sb_getblk(sb, mmp_block); 90 if (!*bh) { 91 ret = -ENOMEM; 92 goto warn_exit; 93 } 94 } 95 96 lock_buffer(*bh); 97 ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL); 98 if (ret) 99 goto warn_exit; 100 101 mmp = (struct mmp_struct *)((*bh)->b_data); 102 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) { 103 ret = -EFSCORRUPTED; 104 goto warn_exit; 105 } 106 if (!ext4_mmp_csum_verify(sb, mmp)) { 107 ret = -EFSBADCRC; 108 goto warn_exit; 109 } 110 return 0; 111warn_exit: 112 brelse(*bh); 113 *bh = NULL; 114 ext4_warning(sb, "Error %d while reading MMP block %llu", 115 ret, mmp_block); 116 return ret; 117} 118 119/* 120 * Dump as much information as possible to help the admin. 121 */ 122void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, 123 const char *function, unsigned int line, const char *msg) 124{ 125 __ext4_warning(sb, function, line, "%s", msg); 126 __ext4_warning(sb, function, line, 127 "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s", 128 (unsigned long long)le64_to_cpu(mmp->mmp_time), 129 (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename, 130 (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname); 131} 132 133/* 134 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds 135 */ 136static int kmmpd(void *data) 137{ 138 struct super_block *sb = (struct super_block *) data; 139 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 140 struct buffer_head *bh = EXT4_SB(sb)->s_mmp_bh; 141 struct mmp_struct *mmp; 142 ext4_fsblk_t mmp_block; 143 u32 seq = 0; 144 unsigned long failed_writes = 0; 145 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); 146 unsigned mmp_check_interval; 147 unsigned long last_update_time; 148 unsigned long diff; 149 int retval = 0; 150 151 mmp_block = le64_to_cpu(es->s_mmp_block); 152 mmp = (struct mmp_struct *)(bh->b_data); 153 mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); 154 /* 155 * Start with the higher mmp_check_interval and reduce it if 156 * the MMP block is being updated on time. 157 */ 158 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, 159 EXT4_MMP_MIN_CHECK_INTERVAL); 160 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 161 BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); 162 bdevname(bh->b_bdev, mmp->mmp_bdevname); 163 164 memcpy(mmp->mmp_nodename, init_utsname()->nodename, 165 sizeof(mmp->mmp_nodename)); 166 167 while (!kthread_should_stop() && !sb_rdonly(sb)) { 168 if (!ext4_has_feature_mmp(sb)) { 169 ext4_warning(sb, "kmmpd being stopped since MMP feature" 170 " has been disabled."); 171 goto wait_to_exit; 172 } 173 if (++seq > EXT4_MMP_SEQ_MAX) 174 seq = 1; 175 176 mmp->mmp_seq = cpu_to_le32(seq); 177 mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); 178 last_update_time = jiffies; 179 180 retval = write_mmp_block(sb, bh); 181 /* 182 * Don't spew too many error messages. Print one every 183 * (s_mmp_update_interval * 60) seconds. 184 */ 185 if (retval) { 186 if ((failed_writes % 60) == 0) { 187 ext4_error_err(sb, -retval, 188 "Error writing to MMP block"); 189 } 190 failed_writes++; 191 } 192 193 diff = jiffies - last_update_time; 194 if (diff < mmp_update_interval * HZ) 195 schedule_timeout_interruptible(mmp_update_interval * 196 HZ - diff); 197 198 /* 199 * We need to make sure that more than mmp_check_interval 200 * seconds have not passed since writing. If that has happened 201 * we need to check if the MMP block is as we left it. 202 */ 203 diff = jiffies - last_update_time; 204 if (diff > mmp_check_interval * HZ) { 205 struct buffer_head *bh_check = NULL; 206 struct mmp_struct *mmp_check; 207 208 retval = read_mmp_block(sb, &bh_check, mmp_block); 209 if (retval) { 210 ext4_error_err(sb, -retval, 211 "error reading MMP data: %d", 212 retval); 213 goto wait_to_exit; 214 } 215 216 mmp_check = (struct mmp_struct *)(bh_check->b_data); 217 if (mmp->mmp_seq != mmp_check->mmp_seq || 218 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, 219 sizeof(mmp->mmp_nodename))) { 220 dump_mmp_msg(sb, mmp_check, 221 "Error while updating MMP info. " 222 "The filesystem seems to have been" 223 " multiply mounted."); 224 ext4_error_err(sb, EBUSY, "abort"); 225 put_bh(bh_check); 226 retval = -EBUSY; 227 goto wait_to_exit; 228 } 229 put_bh(bh_check); 230 } 231 232 /* 233 * Adjust the mmp_check_interval depending on how much time 234 * it took for the MMP block to be written. 235 */ 236 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, 237 EXT4_MMP_MAX_CHECK_INTERVAL), 238 EXT4_MMP_MIN_CHECK_INTERVAL); 239 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 240 } 241 242 /* 243 * Unmount seems to be clean. 244 */ 245 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); 246 mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); 247 248 retval = write_mmp_block(sb, bh); 249 250wait_to_exit: 251 while (!kthread_should_stop()) { 252 set_current_state(TASK_INTERRUPTIBLE); 253 if (!kthread_should_stop()) 254 schedule(); 255 } 256 set_current_state(TASK_RUNNING); 257 return retval; 258} 259 260void ext4_stop_mmpd(struct ext4_sb_info *sbi) 261{ 262 if (sbi->s_mmp_tsk) { 263 kthread_stop(sbi->s_mmp_tsk); 264 brelse(sbi->s_mmp_bh); 265 sbi->s_mmp_tsk = NULL; 266 } 267} 268 269/* 270 * Get a random new sequence number but make sure it is not greater than 271 * EXT4_MMP_SEQ_MAX. 272 */ 273static unsigned int mmp_new_seq(void) 274{ 275 u32 new_seq; 276 277 do { 278 new_seq = prandom_u32(); 279 } while (new_seq > EXT4_MMP_SEQ_MAX); 280 281 return new_seq; 282} 283 284/* 285 * Protect the filesystem from being mounted more than once. 286 */ 287int ext4_multi_mount_protect(struct super_block *sb, 288 ext4_fsblk_t mmp_block) 289{ 290 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 291 struct buffer_head *bh = NULL; 292 struct mmp_struct *mmp = NULL; 293 u32 seq; 294 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); 295 unsigned int wait_time = 0; 296 int retval; 297 298 if (mmp_block < le32_to_cpu(es->s_first_data_block) || 299 mmp_block >= ext4_blocks_count(es)) { 300 ext4_warning(sb, "Invalid MMP block in superblock"); 301 retval = -EINVAL; 302 goto failed; 303 } 304 305 retval = read_mmp_block(sb, &bh, mmp_block); 306 if (retval) 307 goto failed; 308 309 mmp = (struct mmp_struct *)(bh->b_data); 310 311 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) 312 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; 313 314 /* 315 * If check_interval in MMP block is larger, use that instead of 316 * update_interval from the superblock. 317 */ 318 if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) 319 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); 320 321 seq = le32_to_cpu(mmp->mmp_seq); 322 if (seq == EXT4_MMP_SEQ_CLEAN) 323 goto skip; 324 325 if (seq == EXT4_MMP_SEQ_FSCK) { 326 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); 327 retval = -EBUSY; 328 goto failed; 329 } 330 331 wait_time = min(mmp_check_interval * 2 + 1, 332 mmp_check_interval + 60); 333 334 /* Print MMP interval if more than 20 secs. */ 335 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) 336 ext4_warning(sb, "MMP interval %u higher than expected, please" 337 " wait.\n", wait_time * 2); 338 339 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 340 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 341 retval = -ETIMEDOUT; 342 goto failed; 343 } 344 345 retval = read_mmp_block(sb, &bh, mmp_block); 346 if (retval) 347 goto failed; 348 mmp = (struct mmp_struct *)(bh->b_data); 349 if (seq != le32_to_cpu(mmp->mmp_seq)) { 350 dump_mmp_msg(sb, mmp, 351 "Device is already active on another node."); 352 retval = -EBUSY; 353 goto failed; 354 } 355 356skip: 357 /* 358 * write a new random sequence number. 359 */ 360 seq = mmp_new_seq(); 361 mmp->mmp_seq = cpu_to_le32(seq); 362 363 /* 364 * On mount / remount we are protected against fs freezing (by s_umount 365 * semaphore) and grabbing freeze protection upsets lockdep 366 */ 367 retval = write_mmp_block_thawed(sb, bh); 368 if (retval) 369 goto failed; 370 371 /* 372 * wait for MMP interval and check mmp_seq. 373 */ 374 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 375 ext4_warning(sb, "MMP startup interrupted, failing mount"); 376 retval = -ETIMEDOUT; 377 goto failed; 378 } 379 380 retval = read_mmp_block(sb, &bh, mmp_block); 381 if (retval) 382 goto failed; 383 mmp = (struct mmp_struct *)(bh->b_data); 384 if (seq != le32_to_cpu(mmp->mmp_seq)) { 385 dump_mmp_msg(sb, mmp, 386 "Device is already active on another node."); 387 retval = -EBUSY; 388 goto failed; 389 } 390 391 EXT4_SB(sb)->s_mmp_bh = bh; 392 393 /* 394 * Start a kernel thread to update the MMP block periodically. 395 */ 396 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%.*s", 397 (int)sizeof(mmp->mmp_bdevname), 398 bdevname(bh->b_bdev, 399 mmp->mmp_bdevname)); 400 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { 401 EXT4_SB(sb)->s_mmp_tsk = NULL; 402 ext4_warning(sb, "Unable to create kmmpd thread for %s.", 403 sb->s_id); 404 retval = -ENOMEM; 405 goto failed; 406 } 407 408 return 0; 409 410failed: 411 brelse(bh); 412 return retval; 413} 414