162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci/* 462306a36Sopenharmony_ci * fs/ext4/fast_commit.c 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com> 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Ext4 fast commits routines. 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci#include "ext4.h" 1162306a36Sopenharmony_ci#include "ext4_jbd2.h" 1262306a36Sopenharmony_ci#include "ext4_extents.h" 1362306a36Sopenharmony_ci#include "mballoc.h" 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci/* 1662306a36Sopenharmony_ci * Ext4 Fast Commits 1762306a36Sopenharmony_ci * ----------------- 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * Ext4 fast commits implement fine grained journalling for Ext4. 2062306a36Sopenharmony_ci * 2162306a36Sopenharmony_ci * Fast commits are organized as a log of tag-length-value (TLV) structs. (See 2262306a36Sopenharmony_ci * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by 2362306a36Sopenharmony_ci * TLV during the recovery phase. For the scenarios for which we currently 2462306a36Sopenharmony_ci * don't have replay code, fast commit falls back to full commits. 2562306a36Sopenharmony_ci * Fast commits record delta in one of the following three categories. 2662306a36Sopenharmony_ci * 2762306a36Sopenharmony_ci * (A) Directory entry updates: 2862306a36Sopenharmony_ci * 2962306a36Sopenharmony_ci * - EXT4_FC_TAG_UNLINK - records directory entry unlink 3062306a36Sopenharmony_ci * - EXT4_FC_TAG_LINK - records directory entry link 3162306a36Sopenharmony_ci * - EXT4_FC_TAG_CREAT - records inode and directory entry creation 3262306a36Sopenharmony_ci * 3362306a36Sopenharmony_ci * (B) File specific data range updates: 3462306a36Sopenharmony_ci * 3562306a36Sopenharmony_ci * - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode 3662306a36Sopenharmony_ci * - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode 3762306a36Sopenharmony_ci * 3862306a36Sopenharmony_ci * (C) Inode metadata (mtime / ctime etc): 3962306a36Sopenharmony_ci * 4062306a36Sopenharmony_ci * - EXT4_FC_TAG_INODE - record the inode that should be replayed 4162306a36Sopenharmony_ci * during recovery. Note that iblocks field is 4262306a36Sopenharmony_ci * not replayed and instead derived during 4362306a36Sopenharmony_ci * replay. 4462306a36Sopenharmony_ci * Commit Operation 4562306a36Sopenharmony_ci * ---------------- 4662306a36Sopenharmony_ci * With fast commits, we maintain all the directory entry operations in the 4762306a36Sopenharmony_ci * order in which they are issued in an in-memory queue. This queue is flushed 4862306a36Sopenharmony_ci * to disk during the commit operation. We also maintain a list of inodes 4962306a36Sopenharmony_ci * that need to be committed during a fast commit in another in memory queue of 5062306a36Sopenharmony_ci * inodes. During the commit operation, we commit in the following order: 5162306a36Sopenharmony_ci * 5262306a36Sopenharmony_ci * [1] Lock inodes for any further data updates by setting COMMITTING state 5362306a36Sopenharmony_ci * [2] Submit data buffers of all the inodes 5462306a36Sopenharmony_ci * [3] Wait for [2] to complete 5562306a36Sopenharmony_ci * [4] Commit all the directory entry updates in the fast commit space 5662306a36Sopenharmony_ci * [5] Commit all the changed inode structures 5762306a36Sopenharmony_ci * [6] Write tail tag (this tag ensures the atomicity, please read the following 5862306a36Sopenharmony_ci * section for more details). 5962306a36Sopenharmony_ci * [7] Wait for [4], [5] and [6] to complete. 6062306a36Sopenharmony_ci * 6162306a36Sopenharmony_ci * All the inode updates must call ext4_fc_start_update() before starting an 6262306a36Sopenharmony_ci * update. If such an ongoing update is present, fast commit waits for it to 6362306a36Sopenharmony_ci * complete. The completion of such an update is marked by 6462306a36Sopenharmony_ci * ext4_fc_stop_update(). 6562306a36Sopenharmony_ci * 6662306a36Sopenharmony_ci * Fast Commit Ineligibility 6762306a36Sopenharmony_ci * ------------------------- 6862306a36Sopenharmony_ci * 6962306a36Sopenharmony_ci * Not all operations are supported by fast commits today (e.g extended 7062306a36Sopenharmony_ci * attributes). Fast commit ineligibility is marked by calling 7162306a36Sopenharmony_ci * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back 7262306a36Sopenharmony_ci * to full commit. 7362306a36Sopenharmony_ci * 7462306a36Sopenharmony_ci * Atomicity of commits 7562306a36Sopenharmony_ci * -------------------- 7662306a36Sopenharmony_ci * In order to guarantee atomicity during the commit operation, fast commit 7762306a36Sopenharmony_ci * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail 7862306a36Sopenharmony_ci * tag contains CRC of the contents and TID of the transaction after which 7962306a36Sopenharmony_ci * this fast commit should be applied. Recovery code replays fast commit 8062306a36Sopenharmony_ci * logs only if there's at least 1 valid tail present. For every fast commit 8162306a36Sopenharmony_ci * operation, there is 1 tail. This means, we may end up with multiple tails 8262306a36Sopenharmony_ci * in the fast commit space. Here's an example: 8362306a36Sopenharmony_ci * 8462306a36Sopenharmony_ci * - Create a new file A and remove existing file B 8562306a36Sopenharmony_ci * - fsync() 8662306a36Sopenharmony_ci * - Append contents to file A 8762306a36Sopenharmony_ci * - Truncate file A 8862306a36Sopenharmony_ci * - fsync() 8962306a36Sopenharmony_ci * 9062306a36Sopenharmony_ci * The fast commit space at the end of above operations would look like this: 9162306a36Sopenharmony_ci * [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL] 9262306a36Sopenharmony_ci * |<--- Fast Commit 1 --->|<--- Fast Commit 2 ---->| 9362306a36Sopenharmony_ci * 9462306a36Sopenharmony_ci * Replay code should thus check for all the valid tails in the FC area. 9562306a36Sopenharmony_ci * 9662306a36Sopenharmony_ci * Fast Commit Replay Idempotence 9762306a36Sopenharmony_ci * ------------------------------ 9862306a36Sopenharmony_ci * 9962306a36Sopenharmony_ci * Fast commits tags are idempotent in nature provided the recovery code follows 10062306a36Sopenharmony_ci * certain rules. The guiding principle that the commit path follows while 10162306a36Sopenharmony_ci * committing is that it stores the result of a particular operation instead of 10262306a36Sopenharmony_ci * storing the procedure. 10362306a36Sopenharmony_ci * 10462306a36Sopenharmony_ci * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a' 10562306a36Sopenharmony_ci * was associated with inode 10. During fast commit, instead of storing this 10662306a36Sopenharmony_ci * operation as a procedure "rename a to b", we store the resulting file system 10762306a36Sopenharmony_ci * state as a "series" of outcomes: 10862306a36Sopenharmony_ci * 10962306a36Sopenharmony_ci * - Link dirent b to inode 10 11062306a36Sopenharmony_ci * - Unlink dirent a 11162306a36Sopenharmony_ci * - Inode <10> with valid refcount 11262306a36Sopenharmony_ci * 11362306a36Sopenharmony_ci * Now when recovery code runs, it needs "enforce" this state on the file 11462306a36Sopenharmony_ci * system. This is what guarantees idempotence of fast commit replay. 11562306a36Sopenharmony_ci * 11662306a36Sopenharmony_ci * Let's take an example of a procedure that is not idempotent and see how fast 11762306a36Sopenharmony_ci * commits make it idempotent. Consider following sequence of operations: 11862306a36Sopenharmony_ci * 11962306a36Sopenharmony_ci * rm A; mv B A; read A 12062306a36Sopenharmony_ci * (x) (y) (z) 12162306a36Sopenharmony_ci * 12262306a36Sopenharmony_ci * (x), (y) and (z) are the points at which we can crash. If we store this 12362306a36Sopenharmony_ci * sequence of operations as is then the replay is not idempotent. Let's say 12462306a36Sopenharmony_ci * while in replay, we crash at (z). During the second replay, file A (which was 12562306a36Sopenharmony_ci * actually created as a result of "mv B A" operation) would get deleted. Thus, 12662306a36Sopenharmony_ci * file named A would be absent when we try to read A. So, this sequence of 12762306a36Sopenharmony_ci * operations is not idempotent. However, as mentioned above, instead of storing 12862306a36Sopenharmony_ci * the procedure fast commits store the outcome of each procedure. Thus the fast 12962306a36Sopenharmony_ci * commit log for above procedure would be as follows: 13062306a36Sopenharmony_ci * 13162306a36Sopenharmony_ci * (Let's assume dirent A was linked to inode 10 and dirent B was linked to 13262306a36Sopenharmony_ci * inode 11 before the replay) 13362306a36Sopenharmony_ci * 13462306a36Sopenharmony_ci * [Unlink A] [Link A to inode 11] [Unlink B] [Inode 11] 13562306a36Sopenharmony_ci * (w) (x) (y) (z) 13662306a36Sopenharmony_ci * 13762306a36Sopenharmony_ci * If we crash at (z), we will have file A linked to inode 11. During the second 13862306a36Sopenharmony_ci * replay, we will remove file A (inode 11). But we will create it back and make 13962306a36Sopenharmony_ci * it point to inode 11. We won't find B, so we'll just skip that step. At this 14062306a36Sopenharmony_ci * point, the refcount for inode 11 is not reliable, but that gets fixed by the 14162306a36Sopenharmony_ci * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled 14262306a36Sopenharmony_ci * similarly. Thus, by converting a non-idempotent procedure into a series of 14362306a36Sopenharmony_ci * idempotent outcomes, fast commits ensured idempotence during the replay. 14462306a36Sopenharmony_ci * 14562306a36Sopenharmony_ci * TODOs 14662306a36Sopenharmony_ci * ----- 14762306a36Sopenharmony_ci * 14862306a36Sopenharmony_ci * 0) Fast commit replay path hardening: Fast commit replay code should use 14962306a36Sopenharmony_ci * journal handles to make sure all the updates it does during the replay 15062306a36Sopenharmony_ci * path are atomic. With that if we crash during fast commit replay, after 15162306a36Sopenharmony_ci * trying to do recovery again, we will find a file system where fast commit 15262306a36Sopenharmony_ci * area is invalid (because new full commit would be found). In order to deal 15362306a36Sopenharmony_ci * with that, fast commit replay code should ensure that the "FC_REPLAY" 15462306a36Sopenharmony_ci * superblock state is persisted before starting the replay, so that after 15562306a36Sopenharmony_ci * the crash, fast commit recovery code can look at that flag and perform 15662306a36Sopenharmony_ci * fast commit recovery even if that area is invalidated by later full 15762306a36Sopenharmony_ci * commits. 15862306a36Sopenharmony_ci * 15962306a36Sopenharmony_ci * 1) Fast commit's commit path locks the entire file system during fast 16062306a36Sopenharmony_ci * commit. This has significant performance penalty. Instead of that, we 16162306a36Sopenharmony_ci * should use ext4_fc_start/stop_update functions to start inode level 16262306a36Sopenharmony_ci * updates from ext4_journal_start/stop. Once we do that we can drop file 16362306a36Sopenharmony_ci * system locking during commit path. 16462306a36Sopenharmony_ci * 16562306a36Sopenharmony_ci * 2) Handle more ineligible cases. 16662306a36Sopenharmony_ci */ 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci#include <trace/events/ext4.h> 16962306a36Sopenharmony_cistatic struct kmem_cache *ext4_fc_dentry_cachep; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_cistatic void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 17262306a36Sopenharmony_ci{ 17362306a36Sopenharmony_ci BUFFER_TRACE(bh, ""); 17462306a36Sopenharmony_ci if (uptodate) { 17562306a36Sopenharmony_ci ext4_debug("%s: Block %lld up-to-date", 17662306a36Sopenharmony_ci __func__, bh->b_blocknr); 17762306a36Sopenharmony_ci set_buffer_uptodate(bh); 17862306a36Sopenharmony_ci } else { 17962306a36Sopenharmony_ci ext4_debug("%s: Block %lld not up-to-date", 18062306a36Sopenharmony_ci __func__, bh->b_blocknr); 18162306a36Sopenharmony_ci clear_buffer_uptodate(bh); 18262306a36Sopenharmony_ci } 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci unlock_buffer(bh); 18562306a36Sopenharmony_ci} 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_cistatic inline void ext4_fc_reset_inode(struct inode *inode) 18862306a36Sopenharmony_ci{ 18962306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci ei->i_fc_lblk_start = 0; 19262306a36Sopenharmony_ci ei->i_fc_lblk_len = 0; 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_civoid ext4_fc_init_inode(struct inode *inode) 19662306a36Sopenharmony_ci{ 19762306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci ext4_fc_reset_inode(inode); 20062306a36Sopenharmony_ci ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING); 20162306a36Sopenharmony_ci INIT_LIST_HEAD(&ei->i_fc_list); 20262306a36Sopenharmony_ci INIT_LIST_HEAD(&ei->i_fc_dilist); 20362306a36Sopenharmony_ci init_waitqueue_head(&ei->i_fc_wait); 20462306a36Sopenharmony_ci atomic_set(&ei->i_fc_updates, 0); 20562306a36Sopenharmony_ci} 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci/* This function must be called with sbi->s_fc_lock held. */ 20862306a36Sopenharmony_cistatic void ext4_fc_wait_committing_inode(struct inode *inode) 20962306a36Sopenharmony_ci__releases(&EXT4_SB(inode->i_sb)->s_fc_lock) 21062306a36Sopenharmony_ci{ 21162306a36Sopenharmony_ci wait_queue_head_t *wq; 21262306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci#if (BITS_PER_LONG < 64) 21562306a36Sopenharmony_ci DEFINE_WAIT_BIT(wait, &ei->i_state_flags, 21662306a36Sopenharmony_ci EXT4_STATE_FC_COMMITTING); 21762306a36Sopenharmony_ci wq = bit_waitqueue(&ei->i_state_flags, 21862306a36Sopenharmony_ci EXT4_STATE_FC_COMMITTING); 21962306a36Sopenharmony_ci#else 22062306a36Sopenharmony_ci DEFINE_WAIT_BIT(wait, &ei->i_flags, 22162306a36Sopenharmony_ci EXT4_STATE_FC_COMMITTING); 22262306a36Sopenharmony_ci wq = bit_waitqueue(&ei->i_flags, 22362306a36Sopenharmony_ci EXT4_STATE_FC_COMMITTING); 22462306a36Sopenharmony_ci#endif 22562306a36Sopenharmony_ci lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock); 22662306a36Sopenharmony_ci prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 22762306a36Sopenharmony_ci spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 22862306a36Sopenharmony_ci schedule(); 22962306a36Sopenharmony_ci finish_wait(wq, &wait.wq_entry); 23062306a36Sopenharmony_ci} 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_cistatic bool ext4_fc_disabled(struct super_block *sb) 23362306a36Sopenharmony_ci{ 23462306a36Sopenharmony_ci return (!test_opt2(sb, JOURNAL_FAST_COMMIT) || 23562306a36Sopenharmony_ci (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)); 23662306a36Sopenharmony_ci} 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci/* 23962306a36Sopenharmony_ci * Inform Ext4's fast about start of an inode update 24062306a36Sopenharmony_ci * 24162306a36Sopenharmony_ci * This function is called by the high level call VFS callbacks before 24262306a36Sopenharmony_ci * performing any inode update. This function blocks if there's an ongoing 24362306a36Sopenharmony_ci * fast commit on the inode in question. 24462306a36Sopenharmony_ci */ 24562306a36Sopenharmony_civoid ext4_fc_start_update(struct inode *inode) 24662306a36Sopenharmony_ci{ 24762306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci if (ext4_fc_disabled(inode->i_sb)) 25062306a36Sopenharmony_ci return; 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_cirestart: 25362306a36Sopenharmony_ci spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 25462306a36Sopenharmony_ci if (list_empty(&ei->i_fc_list)) 25562306a36Sopenharmony_ci goto out; 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 25862306a36Sopenharmony_ci ext4_fc_wait_committing_inode(inode); 25962306a36Sopenharmony_ci goto restart; 26062306a36Sopenharmony_ci } 26162306a36Sopenharmony_ciout: 26262306a36Sopenharmony_ci atomic_inc(&ei->i_fc_updates); 26362306a36Sopenharmony_ci spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 26462306a36Sopenharmony_ci} 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci/* 26762306a36Sopenharmony_ci * Stop inode update and wake up waiting fast commits if any. 26862306a36Sopenharmony_ci */ 26962306a36Sopenharmony_civoid ext4_fc_stop_update(struct inode *inode) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci if (ext4_fc_disabled(inode->i_sb)) 27462306a36Sopenharmony_ci return; 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci if (atomic_dec_and_test(&ei->i_fc_updates)) 27762306a36Sopenharmony_ci wake_up_all(&ei->i_fc_wait); 27862306a36Sopenharmony_ci} 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci/* 28162306a36Sopenharmony_ci * Remove inode from fast commit list. If the inode is being committed 28262306a36Sopenharmony_ci * we wait until inode commit is done. 28362306a36Sopenharmony_ci */ 28462306a36Sopenharmony_civoid ext4_fc_del(struct inode *inode) 28562306a36Sopenharmony_ci{ 28662306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 28762306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 28862306a36Sopenharmony_ci struct ext4_fc_dentry_update *fc_dentry; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci if (ext4_fc_disabled(inode->i_sb)) 29162306a36Sopenharmony_ci return; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_cirestart: 29462306a36Sopenharmony_ci spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock); 29562306a36Sopenharmony_ci if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) { 29662306a36Sopenharmony_ci spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock); 29762306a36Sopenharmony_ci return; 29862306a36Sopenharmony_ci } 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) { 30162306a36Sopenharmony_ci ext4_fc_wait_committing_inode(inode); 30262306a36Sopenharmony_ci goto restart; 30362306a36Sopenharmony_ci } 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci if (!list_empty(&ei->i_fc_list)) 30662306a36Sopenharmony_ci list_del_init(&ei->i_fc_list); 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci /* 30962306a36Sopenharmony_ci * Since this inode is getting removed, let's also remove all FC 31062306a36Sopenharmony_ci * dentry create references, since it is not needed to log it anyways. 31162306a36Sopenharmony_ci */ 31262306a36Sopenharmony_ci if (list_empty(&ei->i_fc_dilist)) { 31362306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 31462306a36Sopenharmony_ci return; 31562306a36Sopenharmony_ci } 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist); 31862306a36Sopenharmony_ci WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT); 31962306a36Sopenharmony_ci list_del_init(&fc_dentry->fcd_list); 32062306a36Sopenharmony_ci list_del_init(&fc_dentry->fcd_dilist); 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci WARN_ON(!list_empty(&ei->i_fc_dilist)); 32362306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci if (fc_dentry->fcd_name.name && 32662306a36Sopenharmony_ci fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 32762306a36Sopenharmony_ci kfree(fc_dentry->fcd_name.name); 32862306a36Sopenharmony_ci kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci return; 33162306a36Sopenharmony_ci} 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci/* 33462306a36Sopenharmony_ci * Mark file system as fast commit ineligible, and record latest 33562306a36Sopenharmony_ci * ineligible transaction tid. This means until the recorded 33662306a36Sopenharmony_ci * transaction, commit operation would result in a full jbd2 commit. 33762306a36Sopenharmony_ci */ 33862306a36Sopenharmony_civoid ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle) 33962306a36Sopenharmony_ci{ 34062306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 34162306a36Sopenharmony_ci tid_t tid; 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci if (ext4_fc_disabled(sb)) 34462306a36Sopenharmony_ci return; 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 34762306a36Sopenharmony_ci if (handle && !IS_ERR(handle)) 34862306a36Sopenharmony_ci tid = handle->h_transaction->t_tid; 34962306a36Sopenharmony_ci else { 35062306a36Sopenharmony_ci read_lock(&sbi->s_journal->j_state_lock); 35162306a36Sopenharmony_ci tid = sbi->s_journal->j_running_transaction ? 35262306a36Sopenharmony_ci sbi->s_journal->j_running_transaction->t_tid : 0; 35362306a36Sopenharmony_ci read_unlock(&sbi->s_journal->j_state_lock); 35462306a36Sopenharmony_ci } 35562306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 35662306a36Sopenharmony_ci if (sbi->s_fc_ineligible_tid < tid) 35762306a36Sopenharmony_ci sbi->s_fc_ineligible_tid = tid; 35862306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 35962306a36Sopenharmony_ci WARN_ON(reason >= EXT4_FC_REASON_MAX); 36062306a36Sopenharmony_ci sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; 36162306a36Sopenharmony_ci} 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci/* 36462306a36Sopenharmony_ci * Generic fast commit tracking function. If this is the first time this we are 36562306a36Sopenharmony_ci * called after a full commit, we initialize fast commit fields and then call 36662306a36Sopenharmony_ci * __fc_track_fn() with update = 0. If we have already been called after a full 36762306a36Sopenharmony_ci * commit, we pass update = 1. Based on that, the track function can determine 36862306a36Sopenharmony_ci * if it needs to track a field for the first time or if it needs to just 36962306a36Sopenharmony_ci * update the previously tracked value. 37062306a36Sopenharmony_ci * 37162306a36Sopenharmony_ci * If enqueue is set, this function enqueues the inode in fast commit list. 37262306a36Sopenharmony_ci */ 37362306a36Sopenharmony_cistatic int ext4_fc_track_template( 37462306a36Sopenharmony_ci handle_t *handle, struct inode *inode, 37562306a36Sopenharmony_ci int (*__fc_track_fn)(struct inode *, void *, bool), 37662306a36Sopenharmony_ci void *args, int enqueue) 37762306a36Sopenharmony_ci{ 37862306a36Sopenharmony_ci bool update = false; 37962306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 38062306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 38162306a36Sopenharmony_ci tid_t tid = 0; 38262306a36Sopenharmony_ci int ret; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci tid = handle->h_transaction->t_tid; 38562306a36Sopenharmony_ci mutex_lock(&ei->i_fc_lock); 38662306a36Sopenharmony_ci if (tid == ei->i_sync_tid) { 38762306a36Sopenharmony_ci update = true; 38862306a36Sopenharmony_ci } else { 38962306a36Sopenharmony_ci ext4_fc_reset_inode(inode); 39062306a36Sopenharmony_ci ei->i_sync_tid = tid; 39162306a36Sopenharmony_ci } 39262306a36Sopenharmony_ci ret = __fc_track_fn(inode, args, update); 39362306a36Sopenharmony_ci mutex_unlock(&ei->i_fc_lock); 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci if (!enqueue) 39662306a36Sopenharmony_ci return ret; 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 39962306a36Sopenharmony_ci if (list_empty(&EXT4_I(inode)->i_fc_list)) 40062306a36Sopenharmony_ci list_add_tail(&EXT4_I(inode)->i_fc_list, 40162306a36Sopenharmony_ci (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || 40262306a36Sopenharmony_ci sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ? 40362306a36Sopenharmony_ci &sbi->s_fc_q[FC_Q_STAGING] : 40462306a36Sopenharmony_ci &sbi->s_fc_q[FC_Q_MAIN]); 40562306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci return ret; 40862306a36Sopenharmony_ci} 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_cistruct __track_dentry_update_args { 41162306a36Sopenharmony_ci struct dentry *dentry; 41262306a36Sopenharmony_ci int op; 41362306a36Sopenharmony_ci}; 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci/* __track_fn for directory entry updates. Called with ei->i_fc_lock. */ 41662306a36Sopenharmony_cistatic int __track_dentry_update(struct inode *inode, void *arg, bool update) 41762306a36Sopenharmony_ci{ 41862306a36Sopenharmony_ci struct ext4_fc_dentry_update *node; 41962306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 42062306a36Sopenharmony_ci struct __track_dentry_update_args *dentry_update = 42162306a36Sopenharmony_ci (struct __track_dentry_update_args *)arg; 42262306a36Sopenharmony_ci struct dentry *dentry = dentry_update->dentry; 42362306a36Sopenharmony_ci struct inode *dir = dentry->d_parent->d_inode; 42462306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 42562306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci mutex_unlock(&ei->i_fc_lock); 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci if (IS_ENCRYPTED(dir)) { 43062306a36Sopenharmony_ci ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_ENCRYPTED_FILENAME, 43162306a36Sopenharmony_ci NULL); 43262306a36Sopenharmony_ci mutex_lock(&ei->i_fc_lock); 43362306a36Sopenharmony_ci return -EOPNOTSUPP; 43462306a36Sopenharmony_ci } 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); 43762306a36Sopenharmony_ci if (!node) { 43862306a36Sopenharmony_ci ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); 43962306a36Sopenharmony_ci mutex_lock(&ei->i_fc_lock); 44062306a36Sopenharmony_ci return -ENOMEM; 44162306a36Sopenharmony_ci } 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci node->fcd_op = dentry_update->op; 44462306a36Sopenharmony_ci node->fcd_parent = dir->i_ino; 44562306a36Sopenharmony_ci node->fcd_ino = inode->i_ino; 44662306a36Sopenharmony_ci if (dentry->d_name.len > DNAME_INLINE_LEN) { 44762306a36Sopenharmony_ci node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); 44862306a36Sopenharmony_ci if (!node->fcd_name.name) { 44962306a36Sopenharmony_ci kmem_cache_free(ext4_fc_dentry_cachep, node); 45062306a36Sopenharmony_ci ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); 45162306a36Sopenharmony_ci mutex_lock(&ei->i_fc_lock); 45262306a36Sopenharmony_ci return -ENOMEM; 45362306a36Sopenharmony_ci } 45462306a36Sopenharmony_ci memcpy((u8 *)node->fcd_name.name, dentry->d_name.name, 45562306a36Sopenharmony_ci dentry->d_name.len); 45662306a36Sopenharmony_ci } else { 45762306a36Sopenharmony_ci memcpy(node->fcd_iname, dentry->d_name.name, 45862306a36Sopenharmony_ci dentry->d_name.len); 45962306a36Sopenharmony_ci node->fcd_name.name = node->fcd_iname; 46062306a36Sopenharmony_ci } 46162306a36Sopenharmony_ci node->fcd_name.len = dentry->d_name.len; 46262306a36Sopenharmony_ci INIT_LIST_HEAD(&node->fcd_dilist); 46362306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 46462306a36Sopenharmony_ci if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || 46562306a36Sopenharmony_ci sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) 46662306a36Sopenharmony_ci list_add_tail(&node->fcd_list, 46762306a36Sopenharmony_ci &sbi->s_fc_dentry_q[FC_Q_STAGING]); 46862306a36Sopenharmony_ci else 46962306a36Sopenharmony_ci list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]); 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci /* 47262306a36Sopenharmony_ci * This helps us keep a track of all fc_dentry updates which is part of 47362306a36Sopenharmony_ci * this ext4 inode. So in case the inode is getting unlinked, before 47462306a36Sopenharmony_ci * even we get a chance to fsync, we could remove all fc_dentry 47562306a36Sopenharmony_ci * references while evicting the inode in ext4_fc_del(). 47662306a36Sopenharmony_ci * Also with this, we don't need to loop over all the inodes in 47762306a36Sopenharmony_ci * sbi->s_fc_q to get the corresponding inode in 47862306a36Sopenharmony_ci * ext4_fc_commit_dentry_updates(). 47962306a36Sopenharmony_ci */ 48062306a36Sopenharmony_ci if (dentry_update->op == EXT4_FC_TAG_CREAT) { 48162306a36Sopenharmony_ci WARN_ON(!list_empty(&ei->i_fc_dilist)); 48262306a36Sopenharmony_ci list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist); 48362306a36Sopenharmony_ci } 48462306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 48562306a36Sopenharmony_ci mutex_lock(&ei->i_fc_lock); 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci return 0; 48862306a36Sopenharmony_ci} 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_civoid __ext4_fc_track_unlink(handle_t *handle, 49162306a36Sopenharmony_ci struct inode *inode, struct dentry *dentry) 49262306a36Sopenharmony_ci{ 49362306a36Sopenharmony_ci struct __track_dentry_update_args args; 49462306a36Sopenharmony_ci int ret; 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci args.dentry = dentry; 49762306a36Sopenharmony_ci args.op = EXT4_FC_TAG_UNLINK; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 50062306a36Sopenharmony_ci (void *)&args, 0); 50162306a36Sopenharmony_ci trace_ext4_fc_track_unlink(handle, inode, dentry, ret); 50262306a36Sopenharmony_ci} 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_civoid ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry) 50562306a36Sopenharmony_ci{ 50662306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci if (ext4_fc_disabled(inode->i_sb)) 50962306a36Sopenharmony_ci return; 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 51262306a36Sopenharmony_ci return; 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci __ext4_fc_track_unlink(handle, inode, dentry); 51562306a36Sopenharmony_ci} 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_civoid __ext4_fc_track_link(handle_t *handle, 51862306a36Sopenharmony_ci struct inode *inode, struct dentry *dentry) 51962306a36Sopenharmony_ci{ 52062306a36Sopenharmony_ci struct __track_dentry_update_args args; 52162306a36Sopenharmony_ci int ret; 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci args.dentry = dentry; 52462306a36Sopenharmony_ci args.op = EXT4_FC_TAG_LINK; 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 52762306a36Sopenharmony_ci (void *)&args, 0); 52862306a36Sopenharmony_ci trace_ext4_fc_track_link(handle, inode, dentry, ret); 52962306a36Sopenharmony_ci} 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_civoid ext4_fc_track_link(handle_t *handle, struct dentry *dentry) 53262306a36Sopenharmony_ci{ 53362306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci if (ext4_fc_disabled(inode->i_sb)) 53662306a36Sopenharmony_ci return; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 53962306a36Sopenharmony_ci return; 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci __ext4_fc_track_link(handle, inode, dentry); 54262306a36Sopenharmony_ci} 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_civoid __ext4_fc_track_create(handle_t *handle, struct inode *inode, 54562306a36Sopenharmony_ci struct dentry *dentry) 54662306a36Sopenharmony_ci{ 54762306a36Sopenharmony_ci struct __track_dentry_update_args args; 54862306a36Sopenharmony_ci int ret; 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci args.dentry = dentry; 55162306a36Sopenharmony_ci args.op = EXT4_FC_TAG_CREAT; 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci ret = ext4_fc_track_template(handle, inode, __track_dentry_update, 55462306a36Sopenharmony_ci (void *)&args, 0); 55562306a36Sopenharmony_ci trace_ext4_fc_track_create(handle, inode, dentry, ret); 55662306a36Sopenharmony_ci} 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_civoid ext4_fc_track_create(handle_t *handle, struct dentry *dentry) 55962306a36Sopenharmony_ci{ 56062306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci if (ext4_fc_disabled(inode->i_sb)) 56362306a36Sopenharmony_ci return; 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 56662306a36Sopenharmony_ci return; 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci __ext4_fc_track_create(handle, inode, dentry); 56962306a36Sopenharmony_ci} 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci/* __track_fn for inode tracking */ 57262306a36Sopenharmony_cistatic int __track_inode(struct inode *inode, void *arg, bool update) 57362306a36Sopenharmony_ci{ 57462306a36Sopenharmony_ci if (update) 57562306a36Sopenharmony_ci return -EEXIST; 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci EXT4_I(inode)->i_fc_lblk_len = 0; 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci return 0; 58062306a36Sopenharmony_ci} 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_civoid ext4_fc_track_inode(handle_t *handle, struct inode *inode) 58362306a36Sopenharmony_ci{ 58462306a36Sopenharmony_ci int ret; 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) 58762306a36Sopenharmony_ci return; 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci if (ext4_fc_disabled(inode->i_sb)) 59062306a36Sopenharmony_ci return; 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci if (ext4_should_journal_data(inode)) { 59362306a36Sopenharmony_ci ext4_fc_mark_ineligible(inode->i_sb, 59462306a36Sopenharmony_ci EXT4_FC_REASON_INODE_JOURNAL_DATA, handle); 59562306a36Sopenharmony_ci return; 59662306a36Sopenharmony_ci } 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 59962306a36Sopenharmony_ci return; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1); 60262306a36Sopenharmony_ci trace_ext4_fc_track_inode(handle, inode, ret); 60362306a36Sopenharmony_ci} 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_cistruct __track_range_args { 60662306a36Sopenharmony_ci ext4_lblk_t start, end; 60762306a36Sopenharmony_ci}; 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci/* __track_fn for tracking data updates */ 61062306a36Sopenharmony_cistatic int __track_range(struct inode *inode, void *arg, bool update) 61162306a36Sopenharmony_ci{ 61262306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 61362306a36Sopenharmony_ci ext4_lblk_t oldstart; 61462306a36Sopenharmony_ci struct __track_range_args *__arg = 61562306a36Sopenharmony_ci (struct __track_range_args *)arg; 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) { 61862306a36Sopenharmony_ci ext4_debug("Special inode %ld being modified\n", inode->i_ino); 61962306a36Sopenharmony_ci return -ECANCELED; 62062306a36Sopenharmony_ci } 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ci oldstart = ei->i_fc_lblk_start; 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci if (update && ei->i_fc_lblk_len > 0) { 62562306a36Sopenharmony_ci ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start); 62662306a36Sopenharmony_ci ei->i_fc_lblk_len = 62762306a36Sopenharmony_ci max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) - 62862306a36Sopenharmony_ci ei->i_fc_lblk_start + 1; 62962306a36Sopenharmony_ci } else { 63062306a36Sopenharmony_ci ei->i_fc_lblk_start = __arg->start; 63162306a36Sopenharmony_ci ei->i_fc_lblk_len = __arg->end - __arg->start + 1; 63262306a36Sopenharmony_ci } 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci return 0; 63562306a36Sopenharmony_ci} 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_civoid ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start, 63862306a36Sopenharmony_ci ext4_lblk_t end) 63962306a36Sopenharmony_ci{ 64062306a36Sopenharmony_ci struct __track_range_args args; 64162306a36Sopenharmony_ci int ret; 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) 64462306a36Sopenharmony_ci return; 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci if (ext4_fc_disabled(inode->i_sb)) 64762306a36Sopenharmony_ci return; 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) 65062306a36Sopenharmony_ci return; 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci args.start = start; 65362306a36Sopenharmony_ci args.end = end; 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1); 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci trace_ext4_fc_track_range(handle, inode, start, end, ret); 65862306a36Sopenharmony_ci} 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_cistatic void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) 66162306a36Sopenharmony_ci{ 66262306a36Sopenharmony_ci blk_opf_t write_flags = REQ_SYNC; 66362306a36Sopenharmony_ci struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci /* Add REQ_FUA | REQ_PREFLUSH only its tail */ 66662306a36Sopenharmony_ci if (test_opt(sb, BARRIER) && is_tail) 66762306a36Sopenharmony_ci write_flags |= REQ_FUA | REQ_PREFLUSH; 66862306a36Sopenharmony_ci lock_buffer(bh); 66962306a36Sopenharmony_ci set_buffer_dirty(bh); 67062306a36Sopenharmony_ci set_buffer_uptodate(bh); 67162306a36Sopenharmony_ci bh->b_end_io = ext4_end_buffer_io_sync; 67262306a36Sopenharmony_ci submit_bh(REQ_OP_WRITE | write_flags, bh); 67362306a36Sopenharmony_ci EXT4_SB(sb)->s_fc_bh = NULL; 67462306a36Sopenharmony_ci} 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci/* Ext4 commit path routines */ 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci/* 67962306a36Sopenharmony_ci * Allocate len bytes on a fast commit buffer. 68062306a36Sopenharmony_ci * 68162306a36Sopenharmony_ci * During the commit time this function is used to manage fast commit 68262306a36Sopenharmony_ci * block space. We don't split a fast commit log onto different 68362306a36Sopenharmony_ci * blocks. So this function makes sure that if there's not enough space 68462306a36Sopenharmony_ci * on the current block, the remaining space in the current block is 68562306a36Sopenharmony_ci * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case, 68662306a36Sopenharmony_ci * new block is from jbd2 and CRC is updated to reflect the padding 68762306a36Sopenharmony_ci * we added. 68862306a36Sopenharmony_ci */ 68962306a36Sopenharmony_cistatic u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) 69062306a36Sopenharmony_ci{ 69162306a36Sopenharmony_ci struct ext4_fc_tl tl; 69262306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 69362306a36Sopenharmony_ci struct buffer_head *bh; 69462306a36Sopenharmony_ci int bsize = sbi->s_journal->j_blocksize; 69562306a36Sopenharmony_ci int ret, off = sbi->s_fc_bytes % bsize; 69662306a36Sopenharmony_ci int remaining; 69762306a36Sopenharmony_ci u8 *dst; 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci /* 70062306a36Sopenharmony_ci * If 'len' is too long to fit in any block alongside a PAD tlv, then we 70162306a36Sopenharmony_ci * cannot fulfill the request. 70262306a36Sopenharmony_ci */ 70362306a36Sopenharmony_ci if (len > bsize - EXT4_FC_TAG_BASE_LEN) 70462306a36Sopenharmony_ci return NULL; 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ci if (!sbi->s_fc_bh) { 70762306a36Sopenharmony_ci ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 70862306a36Sopenharmony_ci if (ret) 70962306a36Sopenharmony_ci return NULL; 71062306a36Sopenharmony_ci sbi->s_fc_bh = bh; 71162306a36Sopenharmony_ci } 71262306a36Sopenharmony_ci dst = sbi->s_fc_bh->b_data + off; 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci /* 71562306a36Sopenharmony_ci * Allocate the bytes in the current block if we can do so while still 71662306a36Sopenharmony_ci * leaving enough space for a PAD tlv. 71762306a36Sopenharmony_ci */ 71862306a36Sopenharmony_ci remaining = bsize - EXT4_FC_TAG_BASE_LEN - off; 71962306a36Sopenharmony_ci if (len <= remaining) { 72062306a36Sopenharmony_ci sbi->s_fc_bytes += len; 72162306a36Sopenharmony_ci return dst; 72262306a36Sopenharmony_ci } 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci /* 72562306a36Sopenharmony_ci * Else, terminate the current block with a PAD tlv, then allocate a new 72662306a36Sopenharmony_ci * block and allocate the bytes at the start of that new block. 72762306a36Sopenharmony_ci */ 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); 73062306a36Sopenharmony_ci tl.fc_len = cpu_to_le16(remaining); 73162306a36Sopenharmony_ci memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); 73262306a36Sopenharmony_ci memset(dst + EXT4_FC_TAG_BASE_LEN, 0, remaining); 73362306a36Sopenharmony_ci *crc = ext4_chksum(sbi, *crc, sbi->s_fc_bh->b_data, bsize); 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci ext4_fc_submit_bh(sb, false); 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); 73862306a36Sopenharmony_ci if (ret) 73962306a36Sopenharmony_ci return NULL; 74062306a36Sopenharmony_ci sbi->s_fc_bh = bh; 74162306a36Sopenharmony_ci sbi->s_fc_bytes += bsize - off + len; 74262306a36Sopenharmony_ci return sbi->s_fc_bh->b_data; 74362306a36Sopenharmony_ci} 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci/* 74662306a36Sopenharmony_ci * Complete a fast commit by writing tail tag. 74762306a36Sopenharmony_ci * 74862306a36Sopenharmony_ci * Writing tail tag marks the end of a fast commit. In order to guarantee 74962306a36Sopenharmony_ci * atomicity, after writing tail tag, even if there's space remaining 75062306a36Sopenharmony_ci * in the block, next commit shouldn't use it. That's why tail tag 75162306a36Sopenharmony_ci * has the length as that of the remaining space on the block. 75262306a36Sopenharmony_ci */ 75362306a36Sopenharmony_cistatic int ext4_fc_write_tail(struct super_block *sb, u32 crc) 75462306a36Sopenharmony_ci{ 75562306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 75662306a36Sopenharmony_ci struct ext4_fc_tl tl; 75762306a36Sopenharmony_ci struct ext4_fc_tail tail; 75862306a36Sopenharmony_ci int off, bsize = sbi->s_journal->j_blocksize; 75962306a36Sopenharmony_ci u8 *dst; 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci /* 76262306a36Sopenharmony_ci * ext4_fc_reserve_space takes care of allocating an extra block if 76362306a36Sopenharmony_ci * there's no enough space on this block for accommodating this tail. 76462306a36Sopenharmony_ci */ 76562306a36Sopenharmony_ci dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc); 76662306a36Sopenharmony_ci if (!dst) 76762306a36Sopenharmony_ci return -ENOSPC; 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci off = sbi->s_fc_bytes % bsize; 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL); 77262306a36Sopenharmony_ci tl.fc_len = cpu_to_le16(bsize - off + sizeof(struct ext4_fc_tail)); 77362306a36Sopenharmony_ci sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); 77662306a36Sopenharmony_ci dst += EXT4_FC_TAG_BASE_LEN; 77762306a36Sopenharmony_ci tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); 77862306a36Sopenharmony_ci memcpy(dst, &tail.fc_tid, sizeof(tail.fc_tid)); 77962306a36Sopenharmony_ci dst += sizeof(tail.fc_tid); 78062306a36Sopenharmony_ci crc = ext4_chksum(sbi, crc, sbi->s_fc_bh->b_data, 78162306a36Sopenharmony_ci dst - (u8 *)sbi->s_fc_bh->b_data); 78262306a36Sopenharmony_ci tail.fc_crc = cpu_to_le32(crc); 78362306a36Sopenharmony_ci memcpy(dst, &tail.fc_crc, sizeof(tail.fc_crc)); 78462306a36Sopenharmony_ci dst += sizeof(tail.fc_crc); 78562306a36Sopenharmony_ci memset(dst, 0, bsize - off); /* Don't leak uninitialized memory. */ 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci ext4_fc_submit_bh(sb, true); 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci return 0; 79062306a36Sopenharmony_ci} 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci/* 79362306a36Sopenharmony_ci * Adds tag, length, value and updates CRC. Returns true if tlv was added. 79462306a36Sopenharmony_ci * Returns false if there's not enough space. 79562306a36Sopenharmony_ci */ 79662306a36Sopenharmony_cistatic bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, 79762306a36Sopenharmony_ci u32 *crc) 79862306a36Sopenharmony_ci{ 79962306a36Sopenharmony_ci struct ext4_fc_tl tl; 80062306a36Sopenharmony_ci u8 *dst; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc); 80362306a36Sopenharmony_ci if (!dst) 80462306a36Sopenharmony_ci return false; 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci tl.fc_tag = cpu_to_le16(tag); 80762306a36Sopenharmony_ci tl.fc_len = cpu_to_le16(len); 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); 81062306a36Sopenharmony_ci memcpy(dst + EXT4_FC_TAG_BASE_LEN, val, len); 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ci return true; 81362306a36Sopenharmony_ci} 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci/* Same as above, but adds dentry tlv. */ 81662306a36Sopenharmony_cistatic bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, 81762306a36Sopenharmony_ci struct ext4_fc_dentry_update *fc_dentry) 81862306a36Sopenharmony_ci{ 81962306a36Sopenharmony_ci struct ext4_fc_dentry_info fcd; 82062306a36Sopenharmony_ci struct ext4_fc_tl tl; 82162306a36Sopenharmony_ci int dlen = fc_dentry->fcd_name.len; 82262306a36Sopenharmony_ci u8 *dst = ext4_fc_reserve_space(sb, 82362306a36Sopenharmony_ci EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc); 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci if (!dst) 82662306a36Sopenharmony_ci return false; 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent); 82962306a36Sopenharmony_ci fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino); 83062306a36Sopenharmony_ci tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op); 83162306a36Sopenharmony_ci tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); 83262306a36Sopenharmony_ci memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); 83362306a36Sopenharmony_ci dst += EXT4_FC_TAG_BASE_LEN; 83462306a36Sopenharmony_ci memcpy(dst, &fcd, sizeof(fcd)); 83562306a36Sopenharmony_ci dst += sizeof(fcd); 83662306a36Sopenharmony_ci memcpy(dst, fc_dentry->fcd_name.name, dlen); 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_ci return true; 83962306a36Sopenharmony_ci} 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci/* 84262306a36Sopenharmony_ci * Writes inode in the fast commit space under TLV with tag @tag. 84362306a36Sopenharmony_ci * Returns 0 on success, error on failure. 84462306a36Sopenharmony_ci */ 84562306a36Sopenharmony_cistatic int ext4_fc_write_inode(struct inode *inode, u32 *crc) 84662306a36Sopenharmony_ci{ 84762306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 84862306a36Sopenharmony_ci int inode_len = EXT4_GOOD_OLD_INODE_SIZE; 84962306a36Sopenharmony_ci int ret; 85062306a36Sopenharmony_ci struct ext4_iloc iloc; 85162306a36Sopenharmony_ci struct ext4_fc_inode fc_inode; 85262306a36Sopenharmony_ci struct ext4_fc_tl tl; 85362306a36Sopenharmony_ci u8 *dst; 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci ret = ext4_get_inode_loc(inode, &iloc); 85662306a36Sopenharmony_ci if (ret) 85762306a36Sopenharmony_ci return ret; 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) 86062306a36Sopenharmony_ci inode_len = EXT4_INODE_SIZE(inode->i_sb); 86162306a36Sopenharmony_ci else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) 86262306a36Sopenharmony_ci inode_len += ei->i_extra_isize; 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci fc_inode.fc_ino = cpu_to_le32(inode->i_ino); 86562306a36Sopenharmony_ci tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); 86662306a36Sopenharmony_ci tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci ret = -ECANCELED; 86962306a36Sopenharmony_ci dst = ext4_fc_reserve_space(inode->i_sb, 87062306a36Sopenharmony_ci EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc); 87162306a36Sopenharmony_ci if (!dst) 87262306a36Sopenharmony_ci goto err; 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_ci memcpy(dst, &tl, EXT4_FC_TAG_BASE_LEN); 87562306a36Sopenharmony_ci dst += EXT4_FC_TAG_BASE_LEN; 87662306a36Sopenharmony_ci memcpy(dst, &fc_inode, sizeof(fc_inode)); 87762306a36Sopenharmony_ci dst += sizeof(fc_inode); 87862306a36Sopenharmony_ci memcpy(dst, (u8 *)ext4_raw_inode(&iloc), inode_len); 87962306a36Sopenharmony_ci ret = 0; 88062306a36Sopenharmony_cierr: 88162306a36Sopenharmony_ci brelse(iloc.bh); 88262306a36Sopenharmony_ci return ret; 88362306a36Sopenharmony_ci} 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci/* 88662306a36Sopenharmony_ci * Writes updated data ranges for the inode in question. Updates CRC. 88762306a36Sopenharmony_ci * Returns 0 on success, error otherwise. 88862306a36Sopenharmony_ci */ 88962306a36Sopenharmony_cistatic int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) 89062306a36Sopenharmony_ci{ 89162306a36Sopenharmony_ci ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size; 89262306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 89362306a36Sopenharmony_ci struct ext4_map_blocks map; 89462306a36Sopenharmony_ci struct ext4_fc_add_range fc_ext; 89562306a36Sopenharmony_ci struct ext4_fc_del_range lrange; 89662306a36Sopenharmony_ci struct ext4_extent *ex; 89762306a36Sopenharmony_ci int ret; 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci mutex_lock(&ei->i_fc_lock); 90062306a36Sopenharmony_ci if (ei->i_fc_lblk_len == 0) { 90162306a36Sopenharmony_ci mutex_unlock(&ei->i_fc_lock); 90262306a36Sopenharmony_ci return 0; 90362306a36Sopenharmony_ci } 90462306a36Sopenharmony_ci old_blk_size = ei->i_fc_lblk_start; 90562306a36Sopenharmony_ci new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1; 90662306a36Sopenharmony_ci ei->i_fc_lblk_len = 0; 90762306a36Sopenharmony_ci mutex_unlock(&ei->i_fc_lock); 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci cur_lblk_off = old_blk_size; 91062306a36Sopenharmony_ci ext4_debug("will try writing %d to %d for inode %ld\n", 91162306a36Sopenharmony_ci cur_lblk_off, new_blk_size, inode->i_ino); 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci while (cur_lblk_off <= new_blk_size) { 91462306a36Sopenharmony_ci map.m_lblk = cur_lblk_off; 91562306a36Sopenharmony_ci map.m_len = new_blk_size - cur_lblk_off + 1; 91662306a36Sopenharmony_ci ret = ext4_map_blocks(NULL, inode, &map, 0); 91762306a36Sopenharmony_ci if (ret < 0) 91862306a36Sopenharmony_ci return -ECANCELED; 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_ci if (map.m_len == 0) { 92162306a36Sopenharmony_ci cur_lblk_off++; 92262306a36Sopenharmony_ci continue; 92362306a36Sopenharmony_ci } 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci if (ret == 0) { 92662306a36Sopenharmony_ci lrange.fc_ino = cpu_to_le32(inode->i_ino); 92762306a36Sopenharmony_ci lrange.fc_lblk = cpu_to_le32(map.m_lblk); 92862306a36Sopenharmony_ci lrange.fc_len = cpu_to_le32(map.m_len); 92962306a36Sopenharmony_ci if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE, 93062306a36Sopenharmony_ci sizeof(lrange), (u8 *)&lrange, crc)) 93162306a36Sopenharmony_ci return -ENOSPC; 93262306a36Sopenharmony_ci } else { 93362306a36Sopenharmony_ci unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ? 93462306a36Sopenharmony_ci EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN; 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci /* Limit the number of blocks in one extent */ 93762306a36Sopenharmony_ci map.m_len = min(max, map.m_len); 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci fc_ext.fc_ino = cpu_to_le32(inode->i_ino); 94062306a36Sopenharmony_ci ex = (struct ext4_extent *)&fc_ext.fc_ex; 94162306a36Sopenharmony_ci ex->ee_block = cpu_to_le32(map.m_lblk); 94262306a36Sopenharmony_ci ex->ee_len = cpu_to_le16(map.m_len); 94362306a36Sopenharmony_ci ext4_ext_store_pblock(ex, map.m_pblk); 94462306a36Sopenharmony_ci if (map.m_flags & EXT4_MAP_UNWRITTEN) 94562306a36Sopenharmony_ci ext4_ext_mark_unwritten(ex); 94662306a36Sopenharmony_ci else 94762306a36Sopenharmony_ci ext4_ext_mark_initialized(ex); 94862306a36Sopenharmony_ci if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE, 94962306a36Sopenharmony_ci sizeof(fc_ext), (u8 *)&fc_ext, crc)) 95062306a36Sopenharmony_ci return -ENOSPC; 95162306a36Sopenharmony_ci } 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ci cur_lblk_off += map.m_len; 95462306a36Sopenharmony_ci } 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci return 0; 95762306a36Sopenharmony_ci} 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci/* Submit data for all the fast commit inodes */ 96162306a36Sopenharmony_cistatic int ext4_fc_submit_inode_data_all(journal_t *journal) 96262306a36Sopenharmony_ci{ 96362306a36Sopenharmony_ci struct super_block *sb = journal->j_private; 96462306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 96562306a36Sopenharmony_ci struct ext4_inode_info *ei; 96662306a36Sopenharmony_ci int ret = 0; 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 96962306a36Sopenharmony_ci list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 97062306a36Sopenharmony_ci ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); 97162306a36Sopenharmony_ci while (atomic_read(&ei->i_fc_updates)) { 97262306a36Sopenharmony_ci DEFINE_WAIT(wait); 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_ci prepare_to_wait(&ei->i_fc_wait, &wait, 97562306a36Sopenharmony_ci TASK_UNINTERRUPTIBLE); 97662306a36Sopenharmony_ci if (atomic_read(&ei->i_fc_updates)) { 97762306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 97862306a36Sopenharmony_ci schedule(); 97962306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 98062306a36Sopenharmony_ci } 98162306a36Sopenharmony_ci finish_wait(&ei->i_fc_wait, &wait); 98262306a36Sopenharmony_ci } 98362306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 98462306a36Sopenharmony_ci ret = jbd2_submit_inode_data(journal, ei->jinode); 98562306a36Sopenharmony_ci if (ret) 98662306a36Sopenharmony_ci return ret; 98762306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 98862306a36Sopenharmony_ci } 98962306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci return ret; 99262306a36Sopenharmony_ci} 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci/* Wait for completion of data for all the fast commit inodes */ 99562306a36Sopenharmony_cistatic int ext4_fc_wait_inode_data_all(journal_t *journal) 99662306a36Sopenharmony_ci{ 99762306a36Sopenharmony_ci struct super_block *sb = journal->j_private; 99862306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 99962306a36Sopenharmony_ci struct ext4_inode_info *pos, *n; 100062306a36Sopenharmony_ci int ret = 0; 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 100362306a36Sopenharmony_ci list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 100462306a36Sopenharmony_ci if (!ext4_test_inode_state(&pos->vfs_inode, 100562306a36Sopenharmony_ci EXT4_STATE_FC_COMMITTING)) 100662306a36Sopenharmony_ci continue; 100762306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci ret = jbd2_wait_inode_data(journal, pos->jinode); 101062306a36Sopenharmony_ci if (ret) 101162306a36Sopenharmony_ci return ret; 101262306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 101362306a36Sopenharmony_ci } 101462306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_ci return 0; 101762306a36Sopenharmony_ci} 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci/* Commit all the directory entry updates */ 102062306a36Sopenharmony_cistatic int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc) 102162306a36Sopenharmony_ci__acquires(&sbi->s_fc_lock) 102262306a36Sopenharmony_ci__releases(&sbi->s_fc_lock) 102362306a36Sopenharmony_ci{ 102462306a36Sopenharmony_ci struct super_block *sb = journal->j_private; 102562306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 102662306a36Sopenharmony_ci struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n; 102762306a36Sopenharmony_ci struct inode *inode; 102862306a36Sopenharmony_ci struct ext4_inode_info *ei; 102962306a36Sopenharmony_ci int ret; 103062306a36Sopenharmony_ci 103162306a36Sopenharmony_ci if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) 103262306a36Sopenharmony_ci return 0; 103362306a36Sopenharmony_ci list_for_each_entry_safe(fc_dentry, fc_dentry_n, 103462306a36Sopenharmony_ci &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) { 103562306a36Sopenharmony_ci if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) { 103662306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 103762306a36Sopenharmony_ci if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { 103862306a36Sopenharmony_ci ret = -ENOSPC; 103962306a36Sopenharmony_ci goto lock_and_exit; 104062306a36Sopenharmony_ci } 104162306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 104262306a36Sopenharmony_ci continue; 104362306a36Sopenharmony_ci } 104462306a36Sopenharmony_ci /* 104562306a36Sopenharmony_ci * With fcd_dilist we need not loop in sbi->s_fc_q to get the 104662306a36Sopenharmony_ci * corresponding inode pointer 104762306a36Sopenharmony_ci */ 104862306a36Sopenharmony_ci WARN_ON(list_empty(&fc_dentry->fcd_dilist)); 104962306a36Sopenharmony_ci ei = list_first_entry(&fc_dentry->fcd_dilist, 105062306a36Sopenharmony_ci struct ext4_inode_info, i_fc_dilist); 105162306a36Sopenharmony_ci inode = &ei->vfs_inode; 105262306a36Sopenharmony_ci WARN_ON(inode->i_ino != fc_dentry->fcd_ino); 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci /* 105762306a36Sopenharmony_ci * We first write the inode and then the create dirent. This 105862306a36Sopenharmony_ci * allows the recovery code to create an unnamed inode first 105962306a36Sopenharmony_ci * and then link it to a directory entry. This allows us 106062306a36Sopenharmony_ci * to use namei.c routines almost as is and simplifies 106162306a36Sopenharmony_ci * the recovery code. 106262306a36Sopenharmony_ci */ 106362306a36Sopenharmony_ci ret = ext4_fc_write_inode(inode, crc); 106462306a36Sopenharmony_ci if (ret) 106562306a36Sopenharmony_ci goto lock_and_exit; 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci ret = ext4_fc_write_inode_data(inode, crc); 106862306a36Sopenharmony_ci if (ret) 106962306a36Sopenharmony_ci goto lock_and_exit; 107062306a36Sopenharmony_ci 107162306a36Sopenharmony_ci if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) { 107262306a36Sopenharmony_ci ret = -ENOSPC; 107362306a36Sopenharmony_ci goto lock_and_exit; 107462306a36Sopenharmony_ci } 107562306a36Sopenharmony_ci 107662306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 107762306a36Sopenharmony_ci } 107862306a36Sopenharmony_ci return 0; 107962306a36Sopenharmony_cilock_and_exit: 108062306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 108162306a36Sopenharmony_ci return ret; 108262306a36Sopenharmony_ci} 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_cistatic int ext4_fc_perform_commit(journal_t *journal) 108562306a36Sopenharmony_ci{ 108662306a36Sopenharmony_ci struct super_block *sb = journal->j_private; 108762306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 108862306a36Sopenharmony_ci struct ext4_inode_info *iter; 108962306a36Sopenharmony_ci struct ext4_fc_head head; 109062306a36Sopenharmony_ci struct inode *inode; 109162306a36Sopenharmony_ci struct blk_plug plug; 109262306a36Sopenharmony_ci int ret = 0; 109362306a36Sopenharmony_ci u32 crc = 0; 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_ci ret = ext4_fc_submit_inode_data_all(journal); 109662306a36Sopenharmony_ci if (ret) 109762306a36Sopenharmony_ci return ret; 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci ret = ext4_fc_wait_inode_data_all(journal); 110062306a36Sopenharmony_ci if (ret) 110162306a36Sopenharmony_ci return ret; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci /* 110462306a36Sopenharmony_ci * If file system device is different from journal device, issue a cache 110562306a36Sopenharmony_ci * flush before we start writing fast commit blocks. 110662306a36Sopenharmony_ci */ 110762306a36Sopenharmony_ci if (journal->j_fs_dev != journal->j_dev) 110862306a36Sopenharmony_ci blkdev_issue_flush(journal->j_fs_dev); 110962306a36Sopenharmony_ci 111062306a36Sopenharmony_ci blk_start_plug(&plug); 111162306a36Sopenharmony_ci if (sbi->s_fc_bytes == 0) { 111262306a36Sopenharmony_ci /* 111362306a36Sopenharmony_ci * Add a head tag only if this is the first fast commit 111462306a36Sopenharmony_ci * in this TID. 111562306a36Sopenharmony_ci */ 111662306a36Sopenharmony_ci head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES); 111762306a36Sopenharmony_ci head.fc_tid = cpu_to_le32( 111862306a36Sopenharmony_ci sbi->s_journal->j_running_transaction->t_tid); 111962306a36Sopenharmony_ci if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head), 112062306a36Sopenharmony_ci (u8 *)&head, &crc)) { 112162306a36Sopenharmony_ci ret = -ENOSPC; 112262306a36Sopenharmony_ci goto out; 112362306a36Sopenharmony_ci } 112462306a36Sopenharmony_ci } 112562306a36Sopenharmony_ci 112662306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 112762306a36Sopenharmony_ci ret = ext4_fc_commit_dentry_updates(journal, &crc); 112862306a36Sopenharmony_ci if (ret) { 112962306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 113062306a36Sopenharmony_ci goto out; 113162306a36Sopenharmony_ci } 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { 113462306a36Sopenharmony_ci inode = &iter->vfs_inode; 113562306a36Sopenharmony_ci if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) 113662306a36Sopenharmony_ci continue; 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 113962306a36Sopenharmony_ci ret = ext4_fc_write_inode_data(inode, &crc); 114062306a36Sopenharmony_ci if (ret) 114162306a36Sopenharmony_ci goto out; 114262306a36Sopenharmony_ci ret = ext4_fc_write_inode(inode, &crc); 114362306a36Sopenharmony_ci if (ret) 114462306a36Sopenharmony_ci goto out; 114562306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 114662306a36Sopenharmony_ci } 114762306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci ret = ext4_fc_write_tail(sb, crc); 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ciout: 115262306a36Sopenharmony_ci blk_finish_plug(&plug); 115362306a36Sopenharmony_ci return ret; 115462306a36Sopenharmony_ci} 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_cistatic void ext4_fc_update_stats(struct super_block *sb, int status, 115762306a36Sopenharmony_ci u64 commit_time, int nblks, tid_t commit_tid) 115862306a36Sopenharmony_ci{ 115962306a36Sopenharmony_ci struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats; 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci ext4_debug("Fast commit ended with status = %d for tid %u", 116262306a36Sopenharmony_ci status, commit_tid); 116362306a36Sopenharmony_ci if (status == EXT4_FC_STATUS_OK) { 116462306a36Sopenharmony_ci stats->fc_num_commits++; 116562306a36Sopenharmony_ci stats->fc_numblks += nblks; 116662306a36Sopenharmony_ci if (likely(stats->s_fc_avg_commit_time)) 116762306a36Sopenharmony_ci stats->s_fc_avg_commit_time = 116862306a36Sopenharmony_ci (commit_time + 116962306a36Sopenharmony_ci stats->s_fc_avg_commit_time * 3) / 4; 117062306a36Sopenharmony_ci else 117162306a36Sopenharmony_ci stats->s_fc_avg_commit_time = commit_time; 117262306a36Sopenharmony_ci } else if (status == EXT4_FC_STATUS_FAILED || 117362306a36Sopenharmony_ci status == EXT4_FC_STATUS_INELIGIBLE) { 117462306a36Sopenharmony_ci if (status == EXT4_FC_STATUS_FAILED) 117562306a36Sopenharmony_ci stats->fc_failed_commits++; 117662306a36Sopenharmony_ci stats->fc_ineligible_commits++; 117762306a36Sopenharmony_ci } else { 117862306a36Sopenharmony_ci stats->fc_skipped_commits++; 117962306a36Sopenharmony_ci } 118062306a36Sopenharmony_ci trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid); 118162306a36Sopenharmony_ci} 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci/* 118462306a36Sopenharmony_ci * The main commit entry point. Performs a fast commit for transaction 118562306a36Sopenharmony_ci * commit_tid if needed. If it's not possible to perform a fast commit 118662306a36Sopenharmony_ci * due to various reasons, we fall back to full commit. Returns 0 118762306a36Sopenharmony_ci * on success, error otherwise. 118862306a36Sopenharmony_ci */ 118962306a36Sopenharmony_ciint ext4_fc_commit(journal_t *journal, tid_t commit_tid) 119062306a36Sopenharmony_ci{ 119162306a36Sopenharmony_ci struct super_block *sb = journal->j_private; 119262306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 119362306a36Sopenharmony_ci int nblks = 0, ret, bsize = journal->j_blocksize; 119462306a36Sopenharmony_ci int subtid = atomic_read(&sbi->s_fc_subtid); 119562306a36Sopenharmony_ci int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0; 119662306a36Sopenharmony_ci ktime_t start_time, commit_time; 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 119962306a36Sopenharmony_ci return jbd2_complete_transaction(journal, commit_tid); 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_ci trace_ext4_fc_commit_start(sb, commit_tid); 120262306a36Sopenharmony_ci 120362306a36Sopenharmony_ci start_time = ktime_get(); 120462306a36Sopenharmony_ci 120562306a36Sopenharmony_cirestart_fc: 120662306a36Sopenharmony_ci ret = jbd2_fc_begin_commit(journal, commit_tid); 120762306a36Sopenharmony_ci if (ret == -EALREADY) { 120862306a36Sopenharmony_ci /* There was an ongoing commit, check if we need to restart */ 120962306a36Sopenharmony_ci if (atomic_read(&sbi->s_fc_subtid) <= subtid && 121062306a36Sopenharmony_ci commit_tid > journal->j_commit_sequence) 121162306a36Sopenharmony_ci goto restart_fc; 121262306a36Sopenharmony_ci ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0, 121362306a36Sopenharmony_ci commit_tid); 121462306a36Sopenharmony_ci return 0; 121562306a36Sopenharmony_ci } else if (ret) { 121662306a36Sopenharmony_ci /* 121762306a36Sopenharmony_ci * Commit couldn't start. Just update stats and perform a 121862306a36Sopenharmony_ci * full commit. 121962306a36Sopenharmony_ci */ 122062306a36Sopenharmony_ci ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0, 122162306a36Sopenharmony_ci commit_tid); 122262306a36Sopenharmony_ci return jbd2_complete_transaction(journal, commit_tid); 122362306a36Sopenharmony_ci } 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci /* 122662306a36Sopenharmony_ci * After establishing journal barrier via jbd2_fc_begin_commit(), check 122762306a36Sopenharmony_ci * if we are fast commit ineligible. 122862306a36Sopenharmony_ci */ 122962306a36Sopenharmony_ci if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) { 123062306a36Sopenharmony_ci status = EXT4_FC_STATUS_INELIGIBLE; 123162306a36Sopenharmony_ci goto fallback; 123262306a36Sopenharmony_ci } 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; 123562306a36Sopenharmony_ci ret = ext4_fc_perform_commit(journal); 123662306a36Sopenharmony_ci if (ret < 0) { 123762306a36Sopenharmony_ci status = EXT4_FC_STATUS_FAILED; 123862306a36Sopenharmony_ci goto fallback; 123962306a36Sopenharmony_ci } 124062306a36Sopenharmony_ci nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; 124162306a36Sopenharmony_ci ret = jbd2_fc_wait_bufs(journal, nblks); 124262306a36Sopenharmony_ci if (ret < 0) { 124362306a36Sopenharmony_ci status = EXT4_FC_STATUS_FAILED; 124462306a36Sopenharmony_ci goto fallback; 124562306a36Sopenharmony_ci } 124662306a36Sopenharmony_ci atomic_inc(&sbi->s_fc_subtid); 124762306a36Sopenharmony_ci ret = jbd2_fc_end_commit(journal); 124862306a36Sopenharmony_ci /* 124962306a36Sopenharmony_ci * weight the commit time higher than the average time so we 125062306a36Sopenharmony_ci * don't react too strongly to vast changes in the commit time 125162306a36Sopenharmony_ci */ 125262306a36Sopenharmony_ci commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); 125362306a36Sopenharmony_ci ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid); 125462306a36Sopenharmony_ci return ret; 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_cifallback: 125762306a36Sopenharmony_ci ret = jbd2_fc_end_commit_fallback(journal); 125862306a36Sopenharmony_ci ext4_fc_update_stats(sb, status, 0, 0, commit_tid); 125962306a36Sopenharmony_ci return ret; 126062306a36Sopenharmony_ci} 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci/* 126362306a36Sopenharmony_ci * Fast commit cleanup routine. This is called after every fast commit and 126462306a36Sopenharmony_ci * full commit. full is true if we are called after a full commit. 126562306a36Sopenharmony_ci */ 126662306a36Sopenharmony_cistatic void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) 126762306a36Sopenharmony_ci{ 126862306a36Sopenharmony_ci struct super_block *sb = journal->j_private; 126962306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 127062306a36Sopenharmony_ci struct ext4_inode_info *iter, *iter_n; 127162306a36Sopenharmony_ci struct ext4_fc_dentry_update *fc_dentry; 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci if (full && sbi->s_fc_bh) 127462306a36Sopenharmony_ci sbi->s_fc_bh = NULL; 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci trace_ext4_fc_cleanup(journal, full, tid); 127762306a36Sopenharmony_ci jbd2_fc_release_bufs(journal); 127862306a36Sopenharmony_ci 127962306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 128062306a36Sopenharmony_ci list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN], 128162306a36Sopenharmony_ci i_fc_list) { 128262306a36Sopenharmony_ci list_del_init(&iter->i_fc_list); 128362306a36Sopenharmony_ci ext4_clear_inode_state(&iter->vfs_inode, 128462306a36Sopenharmony_ci EXT4_STATE_FC_COMMITTING); 128562306a36Sopenharmony_ci if (iter->i_sync_tid <= tid) 128662306a36Sopenharmony_ci ext4_fc_reset_inode(&iter->vfs_inode); 128762306a36Sopenharmony_ci /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ 128862306a36Sopenharmony_ci smp_mb(); 128962306a36Sopenharmony_ci#if (BITS_PER_LONG < 64) 129062306a36Sopenharmony_ci wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING); 129162306a36Sopenharmony_ci#else 129262306a36Sopenharmony_ci wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING); 129362306a36Sopenharmony_ci#endif 129462306a36Sopenharmony_ci } 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) { 129762306a36Sopenharmony_ci fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN], 129862306a36Sopenharmony_ci struct ext4_fc_dentry_update, 129962306a36Sopenharmony_ci fcd_list); 130062306a36Sopenharmony_ci list_del_init(&fc_dentry->fcd_list); 130162306a36Sopenharmony_ci list_del_init(&fc_dentry->fcd_dilist); 130262306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci if (fc_dentry->fcd_name.name && 130562306a36Sopenharmony_ci fc_dentry->fcd_name.len > DNAME_INLINE_LEN) 130662306a36Sopenharmony_ci kfree(fc_dentry->fcd_name.name); 130762306a36Sopenharmony_ci kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry); 130862306a36Sopenharmony_ci spin_lock(&sbi->s_fc_lock); 130962306a36Sopenharmony_ci } 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_ci list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], 131262306a36Sopenharmony_ci &sbi->s_fc_dentry_q[FC_Q_MAIN]); 131362306a36Sopenharmony_ci list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], 131462306a36Sopenharmony_ci &sbi->s_fc_q[FC_Q_MAIN]); 131562306a36Sopenharmony_ci 131662306a36Sopenharmony_ci if (tid >= sbi->s_fc_ineligible_tid) { 131762306a36Sopenharmony_ci sbi->s_fc_ineligible_tid = 0; 131862306a36Sopenharmony_ci ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); 131962306a36Sopenharmony_ci } 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ci if (full) 132262306a36Sopenharmony_ci sbi->s_fc_bytes = 0; 132362306a36Sopenharmony_ci spin_unlock(&sbi->s_fc_lock); 132462306a36Sopenharmony_ci trace_ext4_fc_stats(sb); 132562306a36Sopenharmony_ci} 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_ci/* Ext4 Replay Path Routines */ 132862306a36Sopenharmony_ci 132962306a36Sopenharmony_ci/* Helper struct for dentry replay routines */ 133062306a36Sopenharmony_cistruct dentry_info_args { 133162306a36Sopenharmony_ci int parent_ino, dname_len, ino, inode_len; 133262306a36Sopenharmony_ci char *dname; 133362306a36Sopenharmony_ci}; 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci/* Same as struct ext4_fc_tl, but uses native endianness fields */ 133662306a36Sopenharmony_cistruct ext4_fc_tl_mem { 133762306a36Sopenharmony_ci u16 fc_tag; 133862306a36Sopenharmony_ci u16 fc_len; 133962306a36Sopenharmony_ci}; 134062306a36Sopenharmony_ci 134162306a36Sopenharmony_cistatic inline void tl_to_darg(struct dentry_info_args *darg, 134262306a36Sopenharmony_ci struct ext4_fc_tl_mem *tl, u8 *val) 134362306a36Sopenharmony_ci{ 134462306a36Sopenharmony_ci struct ext4_fc_dentry_info fcd; 134562306a36Sopenharmony_ci 134662306a36Sopenharmony_ci memcpy(&fcd, val, sizeof(fcd)); 134762306a36Sopenharmony_ci 134862306a36Sopenharmony_ci darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino); 134962306a36Sopenharmony_ci darg->ino = le32_to_cpu(fcd.fc_ino); 135062306a36Sopenharmony_ci darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname); 135162306a36Sopenharmony_ci darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info); 135262306a36Sopenharmony_ci} 135362306a36Sopenharmony_ci 135462306a36Sopenharmony_cistatic inline void ext4_fc_get_tl(struct ext4_fc_tl_mem *tl, u8 *val) 135562306a36Sopenharmony_ci{ 135662306a36Sopenharmony_ci struct ext4_fc_tl tl_disk; 135762306a36Sopenharmony_ci 135862306a36Sopenharmony_ci memcpy(&tl_disk, val, EXT4_FC_TAG_BASE_LEN); 135962306a36Sopenharmony_ci tl->fc_len = le16_to_cpu(tl_disk.fc_len); 136062306a36Sopenharmony_ci tl->fc_tag = le16_to_cpu(tl_disk.fc_tag); 136162306a36Sopenharmony_ci} 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_ci/* Unlink replay function */ 136462306a36Sopenharmony_cistatic int ext4_fc_replay_unlink(struct super_block *sb, 136562306a36Sopenharmony_ci struct ext4_fc_tl_mem *tl, u8 *val) 136662306a36Sopenharmony_ci{ 136762306a36Sopenharmony_ci struct inode *inode, *old_parent; 136862306a36Sopenharmony_ci struct qstr entry; 136962306a36Sopenharmony_ci struct dentry_info_args darg; 137062306a36Sopenharmony_ci int ret = 0; 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_ci tl_to_darg(&darg, tl, val); 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_ci trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino, 137562306a36Sopenharmony_ci darg.parent_ino, darg.dname_len); 137662306a36Sopenharmony_ci 137762306a36Sopenharmony_ci entry.name = darg.dname; 137862306a36Sopenharmony_ci entry.len = darg.dname_len; 137962306a36Sopenharmony_ci inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 138062306a36Sopenharmony_ci 138162306a36Sopenharmony_ci if (IS_ERR(inode)) { 138262306a36Sopenharmony_ci ext4_debug("Inode %d not found", darg.ino); 138362306a36Sopenharmony_ci return 0; 138462306a36Sopenharmony_ci } 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_ci old_parent = ext4_iget(sb, darg.parent_ino, 138762306a36Sopenharmony_ci EXT4_IGET_NORMAL); 138862306a36Sopenharmony_ci if (IS_ERR(old_parent)) { 138962306a36Sopenharmony_ci ext4_debug("Dir with inode %d not found", darg.parent_ino); 139062306a36Sopenharmony_ci iput(inode); 139162306a36Sopenharmony_ci return 0; 139262306a36Sopenharmony_ci } 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci ret = __ext4_unlink(old_parent, &entry, inode, NULL); 139562306a36Sopenharmony_ci /* -ENOENT ok coz it might not exist anymore. */ 139662306a36Sopenharmony_ci if (ret == -ENOENT) 139762306a36Sopenharmony_ci ret = 0; 139862306a36Sopenharmony_ci iput(old_parent); 139962306a36Sopenharmony_ci iput(inode); 140062306a36Sopenharmony_ci return ret; 140162306a36Sopenharmony_ci} 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_cistatic int ext4_fc_replay_link_internal(struct super_block *sb, 140462306a36Sopenharmony_ci struct dentry_info_args *darg, 140562306a36Sopenharmony_ci struct inode *inode) 140662306a36Sopenharmony_ci{ 140762306a36Sopenharmony_ci struct inode *dir = NULL; 140862306a36Sopenharmony_ci struct dentry *dentry_dir = NULL, *dentry_inode = NULL; 140962306a36Sopenharmony_ci struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len); 141062306a36Sopenharmony_ci int ret = 0; 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL); 141362306a36Sopenharmony_ci if (IS_ERR(dir)) { 141462306a36Sopenharmony_ci ext4_debug("Dir with inode %d not found.", darg->parent_ino); 141562306a36Sopenharmony_ci dir = NULL; 141662306a36Sopenharmony_ci goto out; 141762306a36Sopenharmony_ci } 141862306a36Sopenharmony_ci 141962306a36Sopenharmony_ci dentry_dir = d_obtain_alias(dir); 142062306a36Sopenharmony_ci if (IS_ERR(dentry_dir)) { 142162306a36Sopenharmony_ci ext4_debug("Failed to obtain dentry"); 142262306a36Sopenharmony_ci dentry_dir = NULL; 142362306a36Sopenharmony_ci goto out; 142462306a36Sopenharmony_ci } 142562306a36Sopenharmony_ci 142662306a36Sopenharmony_ci dentry_inode = d_alloc(dentry_dir, &qstr_dname); 142762306a36Sopenharmony_ci if (!dentry_inode) { 142862306a36Sopenharmony_ci ext4_debug("Inode dentry not created."); 142962306a36Sopenharmony_ci ret = -ENOMEM; 143062306a36Sopenharmony_ci goto out; 143162306a36Sopenharmony_ci } 143262306a36Sopenharmony_ci 143362306a36Sopenharmony_ci ret = __ext4_link(dir, inode, dentry_inode); 143462306a36Sopenharmony_ci /* 143562306a36Sopenharmony_ci * It's possible that link already existed since data blocks 143662306a36Sopenharmony_ci * for the dir in question got persisted before we crashed OR 143762306a36Sopenharmony_ci * we replayed this tag and crashed before the entire replay 143862306a36Sopenharmony_ci * could complete. 143962306a36Sopenharmony_ci */ 144062306a36Sopenharmony_ci if (ret && ret != -EEXIST) { 144162306a36Sopenharmony_ci ext4_debug("Failed to link\n"); 144262306a36Sopenharmony_ci goto out; 144362306a36Sopenharmony_ci } 144462306a36Sopenharmony_ci 144562306a36Sopenharmony_ci ret = 0; 144662306a36Sopenharmony_ciout: 144762306a36Sopenharmony_ci if (dentry_dir) { 144862306a36Sopenharmony_ci d_drop(dentry_dir); 144962306a36Sopenharmony_ci dput(dentry_dir); 145062306a36Sopenharmony_ci } else if (dir) { 145162306a36Sopenharmony_ci iput(dir); 145262306a36Sopenharmony_ci } 145362306a36Sopenharmony_ci if (dentry_inode) { 145462306a36Sopenharmony_ci d_drop(dentry_inode); 145562306a36Sopenharmony_ci dput(dentry_inode); 145662306a36Sopenharmony_ci } 145762306a36Sopenharmony_ci 145862306a36Sopenharmony_ci return ret; 145962306a36Sopenharmony_ci} 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci/* Link replay function */ 146262306a36Sopenharmony_cistatic int ext4_fc_replay_link(struct super_block *sb, 146362306a36Sopenharmony_ci struct ext4_fc_tl_mem *tl, u8 *val) 146462306a36Sopenharmony_ci{ 146562306a36Sopenharmony_ci struct inode *inode; 146662306a36Sopenharmony_ci struct dentry_info_args darg; 146762306a36Sopenharmony_ci int ret = 0; 146862306a36Sopenharmony_ci 146962306a36Sopenharmony_ci tl_to_darg(&darg, tl, val); 147062306a36Sopenharmony_ci trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino, 147162306a36Sopenharmony_ci darg.parent_ino, darg.dname_len); 147262306a36Sopenharmony_ci 147362306a36Sopenharmony_ci inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 147462306a36Sopenharmony_ci if (IS_ERR(inode)) { 147562306a36Sopenharmony_ci ext4_debug("Inode not found."); 147662306a36Sopenharmony_ci return 0; 147762306a36Sopenharmony_ci } 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci ret = ext4_fc_replay_link_internal(sb, &darg, inode); 148062306a36Sopenharmony_ci iput(inode); 148162306a36Sopenharmony_ci return ret; 148262306a36Sopenharmony_ci} 148362306a36Sopenharmony_ci 148462306a36Sopenharmony_ci/* 148562306a36Sopenharmony_ci * Record all the modified inodes during replay. We use this later to setup 148662306a36Sopenharmony_ci * block bitmaps correctly. 148762306a36Sopenharmony_ci */ 148862306a36Sopenharmony_cistatic int ext4_fc_record_modified_inode(struct super_block *sb, int ino) 148962306a36Sopenharmony_ci{ 149062306a36Sopenharmony_ci struct ext4_fc_replay_state *state; 149162306a36Sopenharmony_ci int i; 149262306a36Sopenharmony_ci 149362306a36Sopenharmony_ci state = &EXT4_SB(sb)->s_fc_replay_state; 149462306a36Sopenharmony_ci for (i = 0; i < state->fc_modified_inodes_used; i++) 149562306a36Sopenharmony_ci if (state->fc_modified_inodes[i] == ino) 149662306a36Sopenharmony_ci return 0; 149762306a36Sopenharmony_ci if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { 149862306a36Sopenharmony_ci int *fc_modified_inodes; 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci fc_modified_inodes = krealloc(state->fc_modified_inodes, 150162306a36Sopenharmony_ci sizeof(int) * (state->fc_modified_inodes_size + 150262306a36Sopenharmony_ci EXT4_FC_REPLAY_REALLOC_INCREMENT), 150362306a36Sopenharmony_ci GFP_KERNEL); 150462306a36Sopenharmony_ci if (!fc_modified_inodes) 150562306a36Sopenharmony_ci return -ENOMEM; 150662306a36Sopenharmony_ci state->fc_modified_inodes = fc_modified_inodes; 150762306a36Sopenharmony_ci state->fc_modified_inodes_size += 150862306a36Sopenharmony_ci EXT4_FC_REPLAY_REALLOC_INCREMENT; 150962306a36Sopenharmony_ci } 151062306a36Sopenharmony_ci state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; 151162306a36Sopenharmony_ci return 0; 151262306a36Sopenharmony_ci} 151362306a36Sopenharmony_ci 151462306a36Sopenharmony_ci/* 151562306a36Sopenharmony_ci * Inode replay function 151662306a36Sopenharmony_ci */ 151762306a36Sopenharmony_cistatic int ext4_fc_replay_inode(struct super_block *sb, 151862306a36Sopenharmony_ci struct ext4_fc_tl_mem *tl, u8 *val) 151962306a36Sopenharmony_ci{ 152062306a36Sopenharmony_ci struct ext4_fc_inode fc_inode; 152162306a36Sopenharmony_ci struct ext4_inode *raw_inode; 152262306a36Sopenharmony_ci struct ext4_inode *raw_fc_inode; 152362306a36Sopenharmony_ci struct inode *inode = NULL; 152462306a36Sopenharmony_ci struct ext4_iloc iloc; 152562306a36Sopenharmony_ci int inode_len, ino, ret, tag = tl->fc_tag; 152662306a36Sopenharmony_ci struct ext4_extent_header *eh; 152762306a36Sopenharmony_ci size_t off_gen = offsetof(struct ext4_inode, i_generation); 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci memcpy(&fc_inode, val, sizeof(fc_inode)); 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_ci ino = le32_to_cpu(fc_inode.fc_ino); 153262306a36Sopenharmony_ci trace_ext4_fc_replay(sb, tag, ino, 0, 0); 153362306a36Sopenharmony_ci 153462306a36Sopenharmony_ci inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 153562306a36Sopenharmony_ci if (!IS_ERR(inode)) { 153662306a36Sopenharmony_ci ext4_ext_clear_bb(inode); 153762306a36Sopenharmony_ci iput(inode); 153862306a36Sopenharmony_ci } 153962306a36Sopenharmony_ci inode = NULL; 154062306a36Sopenharmony_ci 154162306a36Sopenharmony_ci ret = ext4_fc_record_modified_inode(sb, ino); 154262306a36Sopenharmony_ci if (ret) 154362306a36Sopenharmony_ci goto out; 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci raw_fc_inode = (struct ext4_inode *) 154662306a36Sopenharmony_ci (val + offsetof(struct ext4_fc_inode, fc_raw_inode)); 154762306a36Sopenharmony_ci ret = ext4_get_fc_inode_loc(sb, ino, &iloc); 154862306a36Sopenharmony_ci if (ret) 154962306a36Sopenharmony_ci goto out; 155062306a36Sopenharmony_ci 155162306a36Sopenharmony_ci inode_len = tl->fc_len - sizeof(struct ext4_fc_inode); 155262306a36Sopenharmony_ci raw_inode = ext4_raw_inode(&iloc); 155362306a36Sopenharmony_ci 155462306a36Sopenharmony_ci memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); 155562306a36Sopenharmony_ci memcpy((u8 *)raw_inode + off_gen, (u8 *)raw_fc_inode + off_gen, 155662306a36Sopenharmony_ci inode_len - off_gen); 155762306a36Sopenharmony_ci if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) { 155862306a36Sopenharmony_ci eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]); 155962306a36Sopenharmony_ci if (eh->eh_magic != EXT4_EXT_MAGIC) { 156062306a36Sopenharmony_ci memset(eh, 0, sizeof(*eh)); 156162306a36Sopenharmony_ci eh->eh_magic = EXT4_EXT_MAGIC; 156262306a36Sopenharmony_ci eh->eh_max = cpu_to_le16( 156362306a36Sopenharmony_ci (sizeof(raw_inode->i_block) - 156462306a36Sopenharmony_ci sizeof(struct ext4_extent_header)) 156562306a36Sopenharmony_ci / sizeof(struct ext4_extent)); 156662306a36Sopenharmony_ci } 156762306a36Sopenharmony_ci } else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) { 156862306a36Sopenharmony_ci memcpy(raw_inode->i_block, raw_fc_inode->i_block, 156962306a36Sopenharmony_ci sizeof(raw_inode->i_block)); 157062306a36Sopenharmony_ci } 157162306a36Sopenharmony_ci 157262306a36Sopenharmony_ci /* Immediately update the inode on disk. */ 157362306a36Sopenharmony_ci ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 157462306a36Sopenharmony_ci if (ret) 157562306a36Sopenharmony_ci goto out; 157662306a36Sopenharmony_ci ret = sync_dirty_buffer(iloc.bh); 157762306a36Sopenharmony_ci if (ret) 157862306a36Sopenharmony_ci goto out; 157962306a36Sopenharmony_ci ret = ext4_mark_inode_used(sb, ino); 158062306a36Sopenharmony_ci if (ret) 158162306a36Sopenharmony_ci goto out; 158262306a36Sopenharmony_ci 158362306a36Sopenharmony_ci /* Given that we just wrote the inode on disk, this SHOULD succeed. */ 158462306a36Sopenharmony_ci inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); 158562306a36Sopenharmony_ci if (IS_ERR(inode)) { 158662306a36Sopenharmony_ci ext4_debug("Inode not found."); 158762306a36Sopenharmony_ci return -EFSCORRUPTED; 158862306a36Sopenharmony_ci } 158962306a36Sopenharmony_ci 159062306a36Sopenharmony_ci /* 159162306a36Sopenharmony_ci * Our allocator could have made different decisions than before 159262306a36Sopenharmony_ci * crashing. This should be fixed but until then, we calculate 159362306a36Sopenharmony_ci * the number of blocks the inode. 159462306a36Sopenharmony_ci */ 159562306a36Sopenharmony_ci if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) 159662306a36Sopenharmony_ci ext4_ext_replay_set_iblocks(inode); 159762306a36Sopenharmony_ci 159862306a36Sopenharmony_ci inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation); 159962306a36Sopenharmony_ci ext4_reset_inode_seed(inode); 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_ci ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode)); 160262306a36Sopenharmony_ci ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh); 160362306a36Sopenharmony_ci sync_dirty_buffer(iloc.bh); 160462306a36Sopenharmony_ci brelse(iloc.bh); 160562306a36Sopenharmony_ciout: 160662306a36Sopenharmony_ci iput(inode); 160762306a36Sopenharmony_ci if (!ret) 160862306a36Sopenharmony_ci blkdev_issue_flush(sb->s_bdev); 160962306a36Sopenharmony_ci 161062306a36Sopenharmony_ci return 0; 161162306a36Sopenharmony_ci} 161262306a36Sopenharmony_ci 161362306a36Sopenharmony_ci/* 161462306a36Sopenharmony_ci * Dentry create replay function. 161562306a36Sopenharmony_ci * 161662306a36Sopenharmony_ci * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the 161762306a36Sopenharmony_ci * inode for which we are trying to create a dentry here, should already have 161862306a36Sopenharmony_ci * been replayed before we start here. 161962306a36Sopenharmony_ci */ 162062306a36Sopenharmony_cistatic int ext4_fc_replay_create(struct super_block *sb, 162162306a36Sopenharmony_ci struct ext4_fc_tl_mem *tl, u8 *val) 162262306a36Sopenharmony_ci{ 162362306a36Sopenharmony_ci int ret = 0; 162462306a36Sopenharmony_ci struct inode *inode = NULL; 162562306a36Sopenharmony_ci struct inode *dir = NULL; 162662306a36Sopenharmony_ci struct dentry_info_args darg; 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_ci tl_to_darg(&darg, tl, val); 162962306a36Sopenharmony_ci 163062306a36Sopenharmony_ci trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino, 163162306a36Sopenharmony_ci darg.parent_ino, darg.dname_len); 163262306a36Sopenharmony_ci 163362306a36Sopenharmony_ci /* This takes care of update group descriptor and other metadata */ 163462306a36Sopenharmony_ci ret = ext4_mark_inode_used(sb, darg.ino); 163562306a36Sopenharmony_ci if (ret) 163662306a36Sopenharmony_ci goto out; 163762306a36Sopenharmony_ci 163862306a36Sopenharmony_ci inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); 163962306a36Sopenharmony_ci if (IS_ERR(inode)) { 164062306a36Sopenharmony_ci ext4_debug("inode %d not found.", darg.ino); 164162306a36Sopenharmony_ci inode = NULL; 164262306a36Sopenharmony_ci ret = -EINVAL; 164362306a36Sopenharmony_ci goto out; 164462306a36Sopenharmony_ci } 164562306a36Sopenharmony_ci 164662306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) { 164762306a36Sopenharmony_ci /* 164862306a36Sopenharmony_ci * If we are creating a directory, we need to make sure that the 164962306a36Sopenharmony_ci * dot and dot dot dirents are setup properly. 165062306a36Sopenharmony_ci */ 165162306a36Sopenharmony_ci dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); 165262306a36Sopenharmony_ci if (IS_ERR(dir)) { 165362306a36Sopenharmony_ci ext4_debug("Dir %d not found.", darg.ino); 165462306a36Sopenharmony_ci goto out; 165562306a36Sopenharmony_ci } 165662306a36Sopenharmony_ci ret = ext4_init_new_dir(NULL, dir, inode); 165762306a36Sopenharmony_ci iput(dir); 165862306a36Sopenharmony_ci if (ret) { 165962306a36Sopenharmony_ci ret = 0; 166062306a36Sopenharmony_ci goto out; 166162306a36Sopenharmony_ci } 166262306a36Sopenharmony_ci } 166362306a36Sopenharmony_ci ret = ext4_fc_replay_link_internal(sb, &darg, inode); 166462306a36Sopenharmony_ci if (ret) 166562306a36Sopenharmony_ci goto out; 166662306a36Sopenharmony_ci set_nlink(inode, 1); 166762306a36Sopenharmony_ci ext4_mark_inode_dirty(NULL, inode); 166862306a36Sopenharmony_ciout: 166962306a36Sopenharmony_ci iput(inode); 167062306a36Sopenharmony_ci return ret; 167162306a36Sopenharmony_ci} 167262306a36Sopenharmony_ci 167362306a36Sopenharmony_ci/* 167462306a36Sopenharmony_ci * Record physical disk regions which are in use as per fast commit area, 167562306a36Sopenharmony_ci * and used by inodes during replay phase. Our simple replay phase 167662306a36Sopenharmony_ci * allocator excludes these regions from allocation. 167762306a36Sopenharmony_ci */ 167862306a36Sopenharmony_ciint ext4_fc_record_regions(struct super_block *sb, int ino, 167962306a36Sopenharmony_ci ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay) 168062306a36Sopenharmony_ci{ 168162306a36Sopenharmony_ci struct ext4_fc_replay_state *state; 168262306a36Sopenharmony_ci struct ext4_fc_alloc_region *region; 168362306a36Sopenharmony_ci 168462306a36Sopenharmony_ci state = &EXT4_SB(sb)->s_fc_replay_state; 168562306a36Sopenharmony_ci /* 168662306a36Sopenharmony_ci * during replay phase, the fc_regions_valid may not same as 168762306a36Sopenharmony_ci * fc_regions_used, update it when do new additions. 168862306a36Sopenharmony_ci */ 168962306a36Sopenharmony_ci if (replay && state->fc_regions_used != state->fc_regions_valid) 169062306a36Sopenharmony_ci state->fc_regions_used = state->fc_regions_valid; 169162306a36Sopenharmony_ci if (state->fc_regions_used == state->fc_regions_size) { 169262306a36Sopenharmony_ci struct ext4_fc_alloc_region *fc_regions; 169362306a36Sopenharmony_ci 169462306a36Sopenharmony_ci fc_regions = krealloc(state->fc_regions, 169562306a36Sopenharmony_ci sizeof(struct ext4_fc_alloc_region) * 169662306a36Sopenharmony_ci (state->fc_regions_size + 169762306a36Sopenharmony_ci EXT4_FC_REPLAY_REALLOC_INCREMENT), 169862306a36Sopenharmony_ci GFP_KERNEL); 169962306a36Sopenharmony_ci if (!fc_regions) 170062306a36Sopenharmony_ci return -ENOMEM; 170162306a36Sopenharmony_ci state->fc_regions_size += 170262306a36Sopenharmony_ci EXT4_FC_REPLAY_REALLOC_INCREMENT; 170362306a36Sopenharmony_ci state->fc_regions = fc_regions; 170462306a36Sopenharmony_ci } 170562306a36Sopenharmony_ci region = &state->fc_regions[state->fc_regions_used++]; 170662306a36Sopenharmony_ci region->ino = ino; 170762306a36Sopenharmony_ci region->lblk = lblk; 170862306a36Sopenharmony_ci region->pblk = pblk; 170962306a36Sopenharmony_ci region->len = len; 171062306a36Sopenharmony_ci 171162306a36Sopenharmony_ci if (replay) 171262306a36Sopenharmony_ci state->fc_regions_valid++; 171362306a36Sopenharmony_ci 171462306a36Sopenharmony_ci return 0; 171562306a36Sopenharmony_ci} 171662306a36Sopenharmony_ci 171762306a36Sopenharmony_ci/* Replay add range tag */ 171862306a36Sopenharmony_cistatic int ext4_fc_replay_add_range(struct super_block *sb, 171962306a36Sopenharmony_ci struct ext4_fc_tl_mem *tl, u8 *val) 172062306a36Sopenharmony_ci{ 172162306a36Sopenharmony_ci struct ext4_fc_add_range fc_add_ex; 172262306a36Sopenharmony_ci struct ext4_extent newex, *ex; 172362306a36Sopenharmony_ci struct inode *inode; 172462306a36Sopenharmony_ci ext4_lblk_t start, cur; 172562306a36Sopenharmony_ci int remaining, len; 172662306a36Sopenharmony_ci ext4_fsblk_t start_pblk; 172762306a36Sopenharmony_ci struct ext4_map_blocks map; 172862306a36Sopenharmony_ci struct ext4_ext_path *path = NULL; 172962306a36Sopenharmony_ci int ret; 173062306a36Sopenharmony_ci 173162306a36Sopenharmony_ci memcpy(&fc_add_ex, val, sizeof(fc_add_ex)); 173262306a36Sopenharmony_ci ex = (struct ext4_extent *)&fc_add_ex.fc_ex; 173362306a36Sopenharmony_ci 173462306a36Sopenharmony_ci trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE, 173562306a36Sopenharmony_ci le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block), 173662306a36Sopenharmony_ci ext4_ext_get_actual_len(ex)); 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL); 173962306a36Sopenharmony_ci if (IS_ERR(inode)) { 174062306a36Sopenharmony_ci ext4_debug("Inode not found."); 174162306a36Sopenharmony_ci return 0; 174262306a36Sopenharmony_ci } 174362306a36Sopenharmony_ci 174462306a36Sopenharmony_ci ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 174562306a36Sopenharmony_ci if (ret) 174662306a36Sopenharmony_ci goto out; 174762306a36Sopenharmony_ci 174862306a36Sopenharmony_ci start = le32_to_cpu(ex->ee_block); 174962306a36Sopenharmony_ci start_pblk = ext4_ext_pblock(ex); 175062306a36Sopenharmony_ci len = ext4_ext_get_actual_len(ex); 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_ci cur = start; 175362306a36Sopenharmony_ci remaining = len; 175462306a36Sopenharmony_ci ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n", 175562306a36Sopenharmony_ci start, start_pblk, len, ext4_ext_is_unwritten(ex), 175662306a36Sopenharmony_ci inode->i_ino); 175762306a36Sopenharmony_ci 175862306a36Sopenharmony_ci while (remaining > 0) { 175962306a36Sopenharmony_ci map.m_lblk = cur; 176062306a36Sopenharmony_ci map.m_len = remaining; 176162306a36Sopenharmony_ci map.m_pblk = 0; 176262306a36Sopenharmony_ci ret = ext4_map_blocks(NULL, inode, &map, 0); 176362306a36Sopenharmony_ci 176462306a36Sopenharmony_ci if (ret < 0) 176562306a36Sopenharmony_ci goto out; 176662306a36Sopenharmony_ci 176762306a36Sopenharmony_ci if (ret == 0) { 176862306a36Sopenharmony_ci /* Range is not mapped */ 176962306a36Sopenharmony_ci path = ext4_find_extent(inode, cur, NULL, 0); 177062306a36Sopenharmony_ci if (IS_ERR(path)) 177162306a36Sopenharmony_ci goto out; 177262306a36Sopenharmony_ci memset(&newex, 0, sizeof(newex)); 177362306a36Sopenharmony_ci newex.ee_block = cpu_to_le32(cur); 177462306a36Sopenharmony_ci ext4_ext_store_pblock( 177562306a36Sopenharmony_ci &newex, start_pblk + cur - start); 177662306a36Sopenharmony_ci newex.ee_len = cpu_to_le16(map.m_len); 177762306a36Sopenharmony_ci if (ext4_ext_is_unwritten(ex)) 177862306a36Sopenharmony_ci ext4_ext_mark_unwritten(&newex); 177962306a36Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 178062306a36Sopenharmony_ci ret = ext4_ext_insert_extent( 178162306a36Sopenharmony_ci NULL, inode, &path, &newex, 0); 178262306a36Sopenharmony_ci up_write((&EXT4_I(inode)->i_data_sem)); 178362306a36Sopenharmony_ci ext4_free_ext_path(path); 178462306a36Sopenharmony_ci if (ret) 178562306a36Sopenharmony_ci goto out; 178662306a36Sopenharmony_ci goto next; 178762306a36Sopenharmony_ci } 178862306a36Sopenharmony_ci 178962306a36Sopenharmony_ci if (start_pblk + cur - start != map.m_pblk) { 179062306a36Sopenharmony_ci /* 179162306a36Sopenharmony_ci * Logical to physical mapping changed. This can happen 179262306a36Sopenharmony_ci * if this range was removed and then reallocated to 179362306a36Sopenharmony_ci * map to new physical blocks during a fast commit. 179462306a36Sopenharmony_ci */ 179562306a36Sopenharmony_ci ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 179662306a36Sopenharmony_ci ext4_ext_is_unwritten(ex), 179762306a36Sopenharmony_ci start_pblk + cur - start); 179862306a36Sopenharmony_ci if (ret) 179962306a36Sopenharmony_ci goto out; 180062306a36Sopenharmony_ci /* 180162306a36Sopenharmony_ci * Mark the old blocks as free since they aren't used 180262306a36Sopenharmony_ci * anymore. We maintain an array of all the modified 180362306a36Sopenharmony_ci * inodes. In case these blocks are still used at either 180462306a36Sopenharmony_ci * a different logical range in the same inode or in 180562306a36Sopenharmony_ci * some different inode, we will mark them as allocated 180662306a36Sopenharmony_ci * at the end of the FC replay using our array of 180762306a36Sopenharmony_ci * modified inodes. 180862306a36Sopenharmony_ci */ 180962306a36Sopenharmony_ci ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 181062306a36Sopenharmony_ci goto next; 181162306a36Sopenharmony_ci } 181262306a36Sopenharmony_ci 181362306a36Sopenharmony_ci /* Range is mapped and needs a state change */ 181462306a36Sopenharmony_ci ext4_debug("Converting from %ld to %d %lld", 181562306a36Sopenharmony_ci map.m_flags & EXT4_MAP_UNWRITTEN, 181662306a36Sopenharmony_ci ext4_ext_is_unwritten(ex), map.m_pblk); 181762306a36Sopenharmony_ci ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, 181862306a36Sopenharmony_ci ext4_ext_is_unwritten(ex), map.m_pblk); 181962306a36Sopenharmony_ci if (ret) 182062306a36Sopenharmony_ci goto out; 182162306a36Sopenharmony_ci /* 182262306a36Sopenharmony_ci * We may have split the extent tree while toggling the state. 182362306a36Sopenharmony_ci * Try to shrink the extent tree now. 182462306a36Sopenharmony_ci */ 182562306a36Sopenharmony_ci ext4_ext_replay_shrink_inode(inode, start + len); 182662306a36Sopenharmony_cinext: 182762306a36Sopenharmony_ci cur += map.m_len; 182862306a36Sopenharmony_ci remaining -= map.m_len; 182962306a36Sopenharmony_ci } 183062306a36Sopenharmony_ci ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> 183162306a36Sopenharmony_ci sb->s_blocksize_bits); 183262306a36Sopenharmony_ciout: 183362306a36Sopenharmony_ci iput(inode); 183462306a36Sopenharmony_ci return 0; 183562306a36Sopenharmony_ci} 183662306a36Sopenharmony_ci 183762306a36Sopenharmony_ci/* Replay DEL_RANGE tag */ 183862306a36Sopenharmony_cistatic int 183962306a36Sopenharmony_ciext4_fc_replay_del_range(struct super_block *sb, 184062306a36Sopenharmony_ci struct ext4_fc_tl_mem *tl, u8 *val) 184162306a36Sopenharmony_ci{ 184262306a36Sopenharmony_ci struct inode *inode; 184362306a36Sopenharmony_ci struct ext4_fc_del_range lrange; 184462306a36Sopenharmony_ci struct ext4_map_blocks map; 184562306a36Sopenharmony_ci ext4_lblk_t cur, remaining; 184662306a36Sopenharmony_ci int ret; 184762306a36Sopenharmony_ci 184862306a36Sopenharmony_ci memcpy(&lrange, val, sizeof(lrange)); 184962306a36Sopenharmony_ci cur = le32_to_cpu(lrange.fc_lblk); 185062306a36Sopenharmony_ci remaining = le32_to_cpu(lrange.fc_len); 185162306a36Sopenharmony_ci 185262306a36Sopenharmony_ci trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE, 185362306a36Sopenharmony_ci le32_to_cpu(lrange.fc_ino), cur, remaining); 185462306a36Sopenharmony_ci 185562306a36Sopenharmony_ci inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL); 185662306a36Sopenharmony_ci if (IS_ERR(inode)) { 185762306a36Sopenharmony_ci ext4_debug("Inode %d not found", le32_to_cpu(lrange.fc_ino)); 185862306a36Sopenharmony_ci return 0; 185962306a36Sopenharmony_ci } 186062306a36Sopenharmony_ci 186162306a36Sopenharmony_ci ret = ext4_fc_record_modified_inode(sb, inode->i_ino); 186262306a36Sopenharmony_ci if (ret) 186362306a36Sopenharmony_ci goto out; 186462306a36Sopenharmony_ci 186562306a36Sopenharmony_ci ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n", 186662306a36Sopenharmony_ci inode->i_ino, le32_to_cpu(lrange.fc_lblk), 186762306a36Sopenharmony_ci le32_to_cpu(lrange.fc_len)); 186862306a36Sopenharmony_ci while (remaining > 0) { 186962306a36Sopenharmony_ci map.m_lblk = cur; 187062306a36Sopenharmony_ci map.m_len = remaining; 187162306a36Sopenharmony_ci 187262306a36Sopenharmony_ci ret = ext4_map_blocks(NULL, inode, &map, 0); 187362306a36Sopenharmony_ci if (ret < 0) 187462306a36Sopenharmony_ci goto out; 187562306a36Sopenharmony_ci if (ret > 0) { 187662306a36Sopenharmony_ci remaining -= ret; 187762306a36Sopenharmony_ci cur += ret; 187862306a36Sopenharmony_ci ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); 187962306a36Sopenharmony_ci } else { 188062306a36Sopenharmony_ci remaining -= map.m_len; 188162306a36Sopenharmony_ci cur += map.m_len; 188262306a36Sopenharmony_ci } 188362306a36Sopenharmony_ci } 188462306a36Sopenharmony_ci 188562306a36Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 188662306a36Sopenharmony_ci ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk), 188762306a36Sopenharmony_ci le32_to_cpu(lrange.fc_lblk) + 188862306a36Sopenharmony_ci le32_to_cpu(lrange.fc_len) - 1); 188962306a36Sopenharmony_ci up_write(&EXT4_I(inode)->i_data_sem); 189062306a36Sopenharmony_ci if (ret) 189162306a36Sopenharmony_ci goto out; 189262306a36Sopenharmony_ci ext4_ext_replay_shrink_inode(inode, 189362306a36Sopenharmony_ci i_size_read(inode) >> sb->s_blocksize_bits); 189462306a36Sopenharmony_ci ext4_mark_inode_dirty(NULL, inode); 189562306a36Sopenharmony_ciout: 189662306a36Sopenharmony_ci iput(inode); 189762306a36Sopenharmony_ci return 0; 189862306a36Sopenharmony_ci} 189962306a36Sopenharmony_ci 190062306a36Sopenharmony_cistatic void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) 190162306a36Sopenharmony_ci{ 190262306a36Sopenharmony_ci struct ext4_fc_replay_state *state; 190362306a36Sopenharmony_ci struct inode *inode; 190462306a36Sopenharmony_ci struct ext4_ext_path *path = NULL; 190562306a36Sopenharmony_ci struct ext4_map_blocks map; 190662306a36Sopenharmony_ci int i, ret, j; 190762306a36Sopenharmony_ci ext4_lblk_t cur, end; 190862306a36Sopenharmony_ci 190962306a36Sopenharmony_ci state = &EXT4_SB(sb)->s_fc_replay_state; 191062306a36Sopenharmony_ci for (i = 0; i < state->fc_modified_inodes_used; i++) { 191162306a36Sopenharmony_ci inode = ext4_iget(sb, state->fc_modified_inodes[i], 191262306a36Sopenharmony_ci EXT4_IGET_NORMAL); 191362306a36Sopenharmony_ci if (IS_ERR(inode)) { 191462306a36Sopenharmony_ci ext4_debug("Inode %d not found.", 191562306a36Sopenharmony_ci state->fc_modified_inodes[i]); 191662306a36Sopenharmony_ci continue; 191762306a36Sopenharmony_ci } 191862306a36Sopenharmony_ci cur = 0; 191962306a36Sopenharmony_ci end = EXT_MAX_BLOCKS; 192062306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) { 192162306a36Sopenharmony_ci iput(inode); 192262306a36Sopenharmony_ci continue; 192362306a36Sopenharmony_ci } 192462306a36Sopenharmony_ci while (cur < end) { 192562306a36Sopenharmony_ci map.m_lblk = cur; 192662306a36Sopenharmony_ci map.m_len = end - cur; 192762306a36Sopenharmony_ci 192862306a36Sopenharmony_ci ret = ext4_map_blocks(NULL, inode, &map, 0); 192962306a36Sopenharmony_ci if (ret < 0) 193062306a36Sopenharmony_ci break; 193162306a36Sopenharmony_ci 193262306a36Sopenharmony_ci if (ret > 0) { 193362306a36Sopenharmony_ci path = ext4_find_extent(inode, map.m_lblk, NULL, 0); 193462306a36Sopenharmony_ci if (!IS_ERR(path)) { 193562306a36Sopenharmony_ci for (j = 0; j < path->p_depth; j++) 193662306a36Sopenharmony_ci ext4_mb_mark_bb(inode->i_sb, 193762306a36Sopenharmony_ci path[j].p_block, 1, 1); 193862306a36Sopenharmony_ci ext4_free_ext_path(path); 193962306a36Sopenharmony_ci } 194062306a36Sopenharmony_ci cur += ret; 194162306a36Sopenharmony_ci ext4_mb_mark_bb(inode->i_sb, map.m_pblk, 194262306a36Sopenharmony_ci map.m_len, 1); 194362306a36Sopenharmony_ci } else { 194462306a36Sopenharmony_ci cur = cur + (map.m_len ? map.m_len : 1); 194562306a36Sopenharmony_ci } 194662306a36Sopenharmony_ci } 194762306a36Sopenharmony_ci iput(inode); 194862306a36Sopenharmony_ci } 194962306a36Sopenharmony_ci} 195062306a36Sopenharmony_ci 195162306a36Sopenharmony_ci/* 195262306a36Sopenharmony_ci * Check if block is in excluded regions for block allocation. The simple 195362306a36Sopenharmony_ci * allocator that runs during replay phase is calls this function to see 195462306a36Sopenharmony_ci * if it is okay to use a block. 195562306a36Sopenharmony_ci */ 195662306a36Sopenharmony_cibool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk) 195762306a36Sopenharmony_ci{ 195862306a36Sopenharmony_ci int i; 195962306a36Sopenharmony_ci struct ext4_fc_replay_state *state; 196062306a36Sopenharmony_ci 196162306a36Sopenharmony_ci state = &EXT4_SB(sb)->s_fc_replay_state; 196262306a36Sopenharmony_ci for (i = 0; i < state->fc_regions_valid; i++) { 196362306a36Sopenharmony_ci if (state->fc_regions[i].ino == 0 || 196462306a36Sopenharmony_ci state->fc_regions[i].len == 0) 196562306a36Sopenharmony_ci continue; 196662306a36Sopenharmony_ci if (in_range(blk, state->fc_regions[i].pblk, 196762306a36Sopenharmony_ci state->fc_regions[i].len)) 196862306a36Sopenharmony_ci return true; 196962306a36Sopenharmony_ci } 197062306a36Sopenharmony_ci return false; 197162306a36Sopenharmony_ci} 197262306a36Sopenharmony_ci 197362306a36Sopenharmony_ci/* Cleanup function called after replay */ 197462306a36Sopenharmony_civoid ext4_fc_replay_cleanup(struct super_block *sb) 197562306a36Sopenharmony_ci{ 197662306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 197762306a36Sopenharmony_ci 197862306a36Sopenharmony_ci sbi->s_mount_state &= ~EXT4_FC_REPLAY; 197962306a36Sopenharmony_ci kfree(sbi->s_fc_replay_state.fc_regions); 198062306a36Sopenharmony_ci kfree(sbi->s_fc_replay_state.fc_modified_inodes); 198162306a36Sopenharmony_ci} 198262306a36Sopenharmony_ci 198362306a36Sopenharmony_cistatic bool ext4_fc_value_len_isvalid(struct ext4_sb_info *sbi, 198462306a36Sopenharmony_ci int tag, int len) 198562306a36Sopenharmony_ci{ 198662306a36Sopenharmony_ci switch (tag) { 198762306a36Sopenharmony_ci case EXT4_FC_TAG_ADD_RANGE: 198862306a36Sopenharmony_ci return len == sizeof(struct ext4_fc_add_range); 198962306a36Sopenharmony_ci case EXT4_FC_TAG_DEL_RANGE: 199062306a36Sopenharmony_ci return len == sizeof(struct ext4_fc_del_range); 199162306a36Sopenharmony_ci case EXT4_FC_TAG_CREAT: 199262306a36Sopenharmony_ci case EXT4_FC_TAG_LINK: 199362306a36Sopenharmony_ci case EXT4_FC_TAG_UNLINK: 199462306a36Sopenharmony_ci len -= sizeof(struct ext4_fc_dentry_info); 199562306a36Sopenharmony_ci return len >= 1 && len <= EXT4_NAME_LEN; 199662306a36Sopenharmony_ci case EXT4_FC_TAG_INODE: 199762306a36Sopenharmony_ci len -= sizeof(struct ext4_fc_inode); 199862306a36Sopenharmony_ci return len >= EXT4_GOOD_OLD_INODE_SIZE && 199962306a36Sopenharmony_ci len <= sbi->s_inode_size; 200062306a36Sopenharmony_ci case EXT4_FC_TAG_PAD: 200162306a36Sopenharmony_ci return true; /* padding can have any length */ 200262306a36Sopenharmony_ci case EXT4_FC_TAG_TAIL: 200362306a36Sopenharmony_ci return len >= sizeof(struct ext4_fc_tail); 200462306a36Sopenharmony_ci case EXT4_FC_TAG_HEAD: 200562306a36Sopenharmony_ci return len == sizeof(struct ext4_fc_head); 200662306a36Sopenharmony_ci } 200762306a36Sopenharmony_ci return false; 200862306a36Sopenharmony_ci} 200962306a36Sopenharmony_ci 201062306a36Sopenharmony_ci/* 201162306a36Sopenharmony_ci * Recovery Scan phase handler 201262306a36Sopenharmony_ci * 201362306a36Sopenharmony_ci * This function is called during the scan phase and is responsible 201462306a36Sopenharmony_ci * for doing following things: 201562306a36Sopenharmony_ci * - Make sure the fast commit area has valid tags for replay 201662306a36Sopenharmony_ci * - Count number of tags that need to be replayed by the replay handler 201762306a36Sopenharmony_ci * - Verify CRC 201862306a36Sopenharmony_ci * - Create a list of excluded blocks for allocation during replay phase 201962306a36Sopenharmony_ci * 202062306a36Sopenharmony_ci * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is 202162306a36Sopenharmony_ci * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP 202262306a36Sopenharmony_ci * to indicate that scan has finished and JBD2 can now start replay phase. 202362306a36Sopenharmony_ci * It returns a negative error to indicate that there was an error. At the end 202462306a36Sopenharmony_ci * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set 202562306a36Sopenharmony_ci * to indicate the number of tags that need to replayed during the replay phase. 202662306a36Sopenharmony_ci */ 202762306a36Sopenharmony_cistatic int ext4_fc_replay_scan(journal_t *journal, 202862306a36Sopenharmony_ci struct buffer_head *bh, int off, 202962306a36Sopenharmony_ci tid_t expected_tid) 203062306a36Sopenharmony_ci{ 203162306a36Sopenharmony_ci struct super_block *sb = journal->j_private; 203262306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 203362306a36Sopenharmony_ci struct ext4_fc_replay_state *state; 203462306a36Sopenharmony_ci int ret = JBD2_FC_REPLAY_CONTINUE; 203562306a36Sopenharmony_ci struct ext4_fc_add_range ext; 203662306a36Sopenharmony_ci struct ext4_fc_tl_mem tl; 203762306a36Sopenharmony_ci struct ext4_fc_tail tail; 203862306a36Sopenharmony_ci __u8 *start, *end, *cur, *val; 203962306a36Sopenharmony_ci struct ext4_fc_head head; 204062306a36Sopenharmony_ci struct ext4_extent *ex; 204162306a36Sopenharmony_ci 204262306a36Sopenharmony_ci state = &sbi->s_fc_replay_state; 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci start = (u8 *)bh->b_data; 204562306a36Sopenharmony_ci end = start + journal->j_blocksize; 204662306a36Sopenharmony_ci 204762306a36Sopenharmony_ci if (state->fc_replay_expected_off == 0) { 204862306a36Sopenharmony_ci state->fc_cur_tag = 0; 204962306a36Sopenharmony_ci state->fc_replay_num_tags = 0; 205062306a36Sopenharmony_ci state->fc_crc = 0; 205162306a36Sopenharmony_ci state->fc_regions = NULL; 205262306a36Sopenharmony_ci state->fc_regions_valid = state->fc_regions_used = 205362306a36Sopenharmony_ci state->fc_regions_size = 0; 205462306a36Sopenharmony_ci /* Check if we can stop early */ 205562306a36Sopenharmony_ci if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag) 205662306a36Sopenharmony_ci != EXT4_FC_TAG_HEAD) 205762306a36Sopenharmony_ci return 0; 205862306a36Sopenharmony_ci } 205962306a36Sopenharmony_ci 206062306a36Sopenharmony_ci if (off != state->fc_replay_expected_off) { 206162306a36Sopenharmony_ci ret = -EFSCORRUPTED; 206262306a36Sopenharmony_ci goto out_err; 206362306a36Sopenharmony_ci } 206462306a36Sopenharmony_ci 206562306a36Sopenharmony_ci state->fc_replay_expected_off++; 206662306a36Sopenharmony_ci for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN; 206762306a36Sopenharmony_ci cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { 206862306a36Sopenharmony_ci ext4_fc_get_tl(&tl, cur); 206962306a36Sopenharmony_ci val = cur + EXT4_FC_TAG_BASE_LEN; 207062306a36Sopenharmony_ci if (tl.fc_len > end - val || 207162306a36Sopenharmony_ci !ext4_fc_value_len_isvalid(sbi, tl.fc_tag, tl.fc_len)) { 207262306a36Sopenharmony_ci ret = state->fc_replay_num_tags ? 207362306a36Sopenharmony_ci JBD2_FC_REPLAY_STOP : -ECANCELED; 207462306a36Sopenharmony_ci goto out_err; 207562306a36Sopenharmony_ci } 207662306a36Sopenharmony_ci ext4_debug("Scan phase, tag:%s, blk %lld\n", 207762306a36Sopenharmony_ci tag2str(tl.fc_tag), bh->b_blocknr); 207862306a36Sopenharmony_ci switch (tl.fc_tag) { 207962306a36Sopenharmony_ci case EXT4_FC_TAG_ADD_RANGE: 208062306a36Sopenharmony_ci memcpy(&ext, val, sizeof(ext)); 208162306a36Sopenharmony_ci ex = (struct ext4_extent *)&ext.fc_ex; 208262306a36Sopenharmony_ci ret = ext4_fc_record_regions(sb, 208362306a36Sopenharmony_ci le32_to_cpu(ext.fc_ino), 208462306a36Sopenharmony_ci le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), 208562306a36Sopenharmony_ci ext4_ext_get_actual_len(ex), 0); 208662306a36Sopenharmony_ci if (ret < 0) 208762306a36Sopenharmony_ci break; 208862306a36Sopenharmony_ci ret = JBD2_FC_REPLAY_CONTINUE; 208962306a36Sopenharmony_ci fallthrough; 209062306a36Sopenharmony_ci case EXT4_FC_TAG_DEL_RANGE: 209162306a36Sopenharmony_ci case EXT4_FC_TAG_LINK: 209262306a36Sopenharmony_ci case EXT4_FC_TAG_UNLINK: 209362306a36Sopenharmony_ci case EXT4_FC_TAG_CREAT: 209462306a36Sopenharmony_ci case EXT4_FC_TAG_INODE: 209562306a36Sopenharmony_ci case EXT4_FC_TAG_PAD: 209662306a36Sopenharmony_ci state->fc_cur_tag++; 209762306a36Sopenharmony_ci state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 209862306a36Sopenharmony_ci EXT4_FC_TAG_BASE_LEN + tl.fc_len); 209962306a36Sopenharmony_ci break; 210062306a36Sopenharmony_ci case EXT4_FC_TAG_TAIL: 210162306a36Sopenharmony_ci state->fc_cur_tag++; 210262306a36Sopenharmony_ci memcpy(&tail, val, sizeof(tail)); 210362306a36Sopenharmony_ci state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 210462306a36Sopenharmony_ci EXT4_FC_TAG_BASE_LEN + 210562306a36Sopenharmony_ci offsetof(struct ext4_fc_tail, 210662306a36Sopenharmony_ci fc_crc)); 210762306a36Sopenharmony_ci if (le32_to_cpu(tail.fc_tid) == expected_tid && 210862306a36Sopenharmony_ci le32_to_cpu(tail.fc_crc) == state->fc_crc) { 210962306a36Sopenharmony_ci state->fc_replay_num_tags = state->fc_cur_tag; 211062306a36Sopenharmony_ci state->fc_regions_valid = 211162306a36Sopenharmony_ci state->fc_regions_used; 211262306a36Sopenharmony_ci } else { 211362306a36Sopenharmony_ci ret = state->fc_replay_num_tags ? 211462306a36Sopenharmony_ci JBD2_FC_REPLAY_STOP : -EFSBADCRC; 211562306a36Sopenharmony_ci } 211662306a36Sopenharmony_ci state->fc_crc = 0; 211762306a36Sopenharmony_ci break; 211862306a36Sopenharmony_ci case EXT4_FC_TAG_HEAD: 211962306a36Sopenharmony_ci memcpy(&head, val, sizeof(head)); 212062306a36Sopenharmony_ci if (le32_to_cpu(head.fc_features) & 212162306a36Sopenharmony_ci ~EXT4_FC_SUPPORTED_FEATURES) { 212262306a36Sopenharmony_ci ret = -EOPNOTSUPP; 212362306a36Sopenharmony_ci break; 212462306a36Sopenharmony_ci } 212562306a36Sopenharmony_ci if (le32_to_cpu(head.fc_tid) != expected_tid) { 212662306a36Sopenharmony_ci ret = JBD2_FC_REPLAY_STOP; 212762306a36Sopenharmony_ci break; 212862306a36Sopenharmony_ci } 212962306a36Sopenharmony_ci state->fc_cur_tag++; 213062306a36Sopenharmony_ci state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, 213162306a36Sopenharmony_ci EXT4_FC_TAG_BASE_LEN + tl.fc_len); 213262306a36Sopenharmony_ci break; 213362306a36Sopenharmony_ci default: 213462306a36Sopenharmony_ci ret = state->fc_replay_num_tags ? 213562306a36Sopenharmony_ci JBD2_FC_REPLAY_STOP : -ECANCELED; 213662306a36Sopenharmony_ci } 213762306a36Sopenharmony_ci if (ret < 0 || ret == JBD2_FC_REPLAY_STOP) 213862306a36Sopenharmony_ci break; 213962306a36Sopenharmony_ci } 214062306a36Sopenharmony_ci 214162306a36Sopenharmony_ciout_err: 214262306a36Sopenharmony_ci trace_ext4_fc_replay_scan(sb, ret, off); 214362306a36Sopenharmony_ci return ret; 214462306a36Sopenharmony_ci} 214562306a36Sopenharmony_ci 214662306a36Sopenharmony_ci/* 214762306a36Sopenharmony_ci * Main recovery path entry point. 214862306a36Sopenharmony_ci * The meaning of return codes is similar as above. 214962306a36Sopenharmony_ci */ 215062306a36Sopenharmony_cistatic int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, 215162306a36Sopenharmony_ci enum passtype pass, int off, tid_t expected_tid) 215262306a36Sopenharmony_ci{ 215362306a36Sopenharmony_ci struct super_block *sb = journal->j_private; 215462306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 215562306a36Sopenharmony_ci struct ext4_fc_tl_mem tl; 215662306a36Sopenharmony_ci __u8 *start, *end, *cur, *val; 215762306a36Sopenharmony_ci int ret = JBD2_FC_REPLAY_CONTINUE; 215862306a36Sopenharmony_ci struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state; 215962306a36Sopenharmony_ci struct ext4_fc_tail tail; 216062306a36Sopenharmony_ci 216162306a36Sopenharmony_ci if (pass == PASS_SCAN) { 216262306a36Sopenharmony_ci state->fc_current_pass = PASS_SCAN; 216362306a36Sopenharmony_ci return ext4_fc_replay_scan(journal, bh, off, expected_tid); 216462306a36Sopenharmony_ci } 216562306a36Sopenharmony_ci 216662306a36Sopenharmony_ci if (state->fc_current_pass != pass) { 216762306a36Sopenharmony_ci state->fc_current_pass = pass; 216862306a36Sopenharmony_ci sbi->s_mount_state |= EXT4_FC_REPLAY; 216962306a36Sopenharmony_ci } 217062306a36Sopenharmony_ci if (!sbi->s_fc_replay_state.fc_replay_num_tags) { 217162306a36Sopenharmony_ci ext4_debug("Replay stops\n"); 217262306a36Sopenharmony_ci ext4_fc_set_bitmaps_and_counters(sb); 217362306a36Sopenharmony_ci return 0; 217462306a36Sopenharmony_ci } 217562306a36Sopenharmony_ci 217662306a36Sopenharmony_ci#ifdef CONFIG_EXT4_DEBUG 217762306a36Sopenharmony_ci if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) { 217862306a36Sopenharmony_ci pr_warn("Dropping fc block %d because max_replay set\n", off); 217962306a36Sopenharmony_ci return JBD2_FC_REPLAY_STOP; 218062306a36Sopenharmony_ci } 218162306a36Sopenharmony_ci#endif 218262306a36Sopenharmony_ci 218362306a36Sopenharmony_ci start = (u8 *)bh->b_data; 218462306a36Sopenharmony_ci end = start + journal->j_blocksize; 218562306a36Sopenharmony_ci 218662306a36Sopenharmony_ci for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN; 218762306a36Sopenharmony_ci cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { 218862306a36Sopenharmony_ci ext4_fc_get_tl(&tl, cur); 218962306a36Sopenharmony_ci val = cur + EXT4_FC_TAG_BASE_LEN; 219062306a36Sopenharmony_ci 219162306a36Sopenharmony_ci if (state->fc_replay_num_tags == 0) { 219262306a36Sopenharmony_ci ret = JBD2_FC_REPLAY_STOP; 219362306a36Sopenharmony_ci ext4_fc_set_bitmaps_and_counters(sb); 219462306a36Sopenharmony_ci break; 219562306a36Sopenharmony_ci } 219662306a36Sopenharmony_ci 219762306a36Sopenharmony_ci ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag)); 219862306a36Sopenharmony_ci state->fc_replay_num_tags--; 219962306a36Sopenharmony_ci switch (tl.fc_tag) { 220062306a36Sopenharmony_ci case EXT4_FC_TAG_LINK: 220162306a36Sopenharmony_ci ret = ext4_fc_replay_link(sb, &tl, val); 220262306a36Sopenharmony_ci break; 220362306a36Sopenharmony_ci case EXT4_FC_TAG_UNLINK: 220462306a36Sopenharmony_ci ret = ext4_fc_replay_unlink(sb, &tl, val); 220562306a36Sopenharmony_ci break; 220662306a36Sopenharmony_ci case EXT4_FC_TAG_ADD_RANGE: 220762306a36Sopenharmony_ci ret = ext4_fc_replay_add_range(sb, &tl, val); 220862306a36Sopenharmony_ci break; 220962306a36Sopenharmony_ci case EXT4_FC_TAG_CREAT: 221062306a36Sopenharmony_ci ret = ext4_fc_replay_create(sb, &tl, val); 221162306a36Sopenharmony_ci break; 221262306a36Sopenharmony_ci case EXT4_FC_TAG_DEL_RANGE: 221362306a36Sopenharmony_ci ret = ext4_fc_replay_del_range(sb, &tl, val); 221462306a36Sopenharmony_ci break; 221562306a36Sopenharmony_ci case EXT4_FC_TAG_INODE: 221662306a36Sopenharmony_ci ret = ext4_fc_replay_inode(sb, &tl, val); 221762306a36Sopenharmony_ci break; 221862306a36Sopenharmony_ci case EXT4_FC_TAG_PAD: 221962306a36Sopenharmony_ci trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0, 222062306a36Sopenharmony_ci tl.fc_len, 0); 222162306a36Sopenharmony_ci break; 222262306a36Sopenharmony_ci case EXT4_FC_TAG_TAIL: 222362306a36Sopenharmony_ci trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 222462306a36Sopenharmony_ci 0, tl.fc_len, 0); 222562306a36Sopenharmony_ci memcpy(&tail, val, sizeof(tail)); 222662306a36Sopenharmony_ci WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid); 222762306a36Sopenharmony_ci break; 222862306a36Sopenharmony_ci case EXT4_FC_TAG_HEAD: 222962306a36Sopenharmony_ci break; 223062306a36Sopenharmony_ci default: 223162306a36Sopenharmony_ci trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0); 223262306a36Sopenharmony_ci ret = -ECANCELED; 223362306a36Sopenharmony_ci break; 223462306a36Sopenharmony_ci } 223562306a36Sopenharmony_ci if (ret < 0) 223662306a36Sopenharmony_ci break; 223762306a36Sopenharmony_ci ret = JBD2_FC_REPLAY_CONTINUE; 223862306a36Sopenharmony_ci } 223962306a36Sopenharmony_ci return ret; 224062306a36Sopenharmony_ci} 224162306a36Sopenharmony_ci 224262306a36Sopenharmony_civoid ext4_fc_init(struct super_block *sb, journal_t *journal) 224362306a36Sopenharmony_ci{ 224462306a36Sopenharmony_ci /* 224562306a36Sopenharmony_ci * We set replay callback even if fast commit disabled because we may 224662306a36Sopenharmony_ci * could still have fast commit blocks that need to be replayed even if 224762306a36Sopenharmony_ci * fast commit has now been turned off. 224862306a36Sopenharmony_ci */ 224962306a36Sopenharmony_ci journal->j_fc_replay_callback = ext4_fc_replay; 225062306a36Sopenharmony_ci if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) 225162306a36Sopenharmony_ci return; 225262306a36Sopenharmony_ci journal->j_fc_cleanup_callback = ext4_fc_cleanup; 225362306a36Sopenharmony_ci} 225462306a36Sopenharmony_ci 225562306a36Sopenharmony_cistatic const char * const fc_ineligible_reasons[] = { 225662306a36Sopenharmony_ci [EXT4_FC_REASON_XATTR] = "Extended attributes changed", 225762306a36Sopenharmony_ci [EXT4_FC_REASON_CROSS_RENAME] = "Cross rename", 225862306a36Sopenharmony_ci [EXT4_FC_REASON_JOURNAL_FLAG_CHANGE] = "Journal flag changed", 225962306a36Sopenharmony_ci [EXT4_FC_REASON_NOMEM] = "Insufficient memory", 226062306a36Sopenharmony_ci [EXT4_FC_REASON_SWAP_BOOT] = "Swap boot", 226162306a36Sopenharmony_ci [EXT4_FC_REASON_RESIZE] = "Resize", 226262306a36Sopenharmony_ci [EXT4_FC_REASON_RENAME_DIR] = "Dir renamed", 226362306a36Sopenharmony_ci [EXT4_FC_REASON_FALLOC_RANGE] = "Falloc range op", 226462306a36Sopenharmony_ci [EXT4_FC_REASON_INODE_JOURNAL_DATA] = "Data journalling", 226562306a36Sopenharmony_ci [EXT4_FC_REASON_ENCRYPTED_FILENAME] = "Encrypted filename", 226662306a36Sopenharmony_ci}; 226762306a36Sopenharmony_ci 226862306a36Sopenharmony_ciint ext4_fc_info_show(struct seq_file *seq, void *v) 226962306a36Sopenharmony_ci{ 227062306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private); 227162306a36Sopenharmony_ci struct ext4_fc_stats *stats = &sbi->s_fc_stats; 227262306a36Sopenharmony_ci int i; 227362306a36Sopenharmony_ci 227462306a36Sopenharmony_ci if (v != SEQ_START_TOKEN) 227562306a36Sopenharmony_ci return 0; 227662306a36Sopenharmony_ci 227762306a36Sopenharmony_ci seq_printf(seq, 227862306a36Sopenharmony_ci "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n", 227962306a36Sopenharmony_ci stats->fc_num_commits, stats->fc_ineligible_commits, 228062306a36Sopenharmony_ci stats->fc_numblks, 228162306a36Sopenharmony_ci div_u64(stats->s_fc_avg_commit_time, 1000)); 228262306a36Sopenharmony_ci seq_puts(seq, "Ineligible reasons:\n"); 228362306a36Sopenharmony_ci for (i = 0; i < EXT4_FC_REASON_MAX; i++) 228462306a36Sopenharmony_ci seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i], 228562306a36Sopenharmony_ci stats->fc_ineligible_reason_count[i]); 228662306a36Sopenharmony_ci 228762306a36Sopenharmony_ci return 0; 228862306a36Sopenharmony_ci} 228962306a36Sopenharmony_ci 229062306a36Sopenharmony_ciint __init ext4_fc_init_dentry_cache(void) 229162306a36Sopenharmony_ci{ 229262306a36Sopenharmony_ci ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update, 229362306a36Sopenharmony_ci SLAB_RECLAIM_ACCOUNT); 229462306a36Sopenharmony_ci 229562306a36Sopenharmony_ci if (ext4_fc_dentry_cachep == NULL) 229662306a36Sopenharmony_ci return -ENOMEM; 229762306a36Sopenharmony_ci 229862306a36Sopenharmony_ci return 0; 229962306a36Sopenharmony_ci} 230062306a36Sopenharmony_ci 230162306a36Sopenharmony_civoid ext4_fc_destroy_dentry_cache(void) 230262306a36Sopenharmony_ci{ 230362306a36Sopenharmony_ci kmem_cache_destroy(ext4_fc_dentry_cachep); 230462306a36Sopenharmony_ci} 2305