162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * journal.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Defines functions of journalling api 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Copyright (C) 2003, 2004 Oracle. All rights reserved. 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/fs.h> 1162306a36Sopenharmony_ci#include <linux/types.h> 1262306a36Sopenharmony_ci#include <linux/slab.h> 1362306a36Sopenharmony_ci#include <linux/highmem.h> 1462306a36Sopenharmony_ci#include <linux/kthread.h> 1562306a36Sopenharmony_ci#include <linux/time.h> 1662306a36Sopenharmony_ci#include <linux/random.h> 1762306a36Sopenharmony_ci#include <linux/delay.h> 1862306a36Sopenharmony_ci#include <linux/writeback.h> 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#include <cluster/masklog.h> 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#include "ocfs2.h" 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#include "alloc.h" 2562306a36Sopenharmony_ci#include "blockcheck.h" 2662306a36Sopenharmony_ci#include "dir.h" 2762306a36Sopenharmony_ci#include "dlmglue.h" 2862306a36Sopenharmony_ci#include "extent_map.h" 2962306a36Sopenharmony_ci#include "heartbeat.h" 3062306a36Sopenharmony_ci#include "inode.h" 3162306a36Sopenharmony_ci#include "journal.h" 3262306a36Sopenharmony_ci#include "localalloc.h" 3362306a36Sopenharmony_ci#include "slot_map.h" 3462306a36Sopenharmony_ci#include "super.h" 3562306a36Sopenharmony_ci#include "sysfile.h" 3662306a36Sopenharmony_ci#include "uptodate.h" 3762306a36Sopenharmony_ci#include "quota.h" 3862306a36Sopenharmony_ci#include "file.h" 3962306a36Sopenharmony_ci#include "namei.h" 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#include "buffer_head_io.h" 4262306a36Sopenharmony_ci#include "ocfs2_trace.h" 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ciDEFINE_SPINLOCK(trans_inc_lock); 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_cistatic int ocfs2_force_read_journal(struct inode *inode); 4962306a36Sopenharmony_cistatic int ocfs2_recover_node(struct ocfs2_super *osb, 5062306a36Sopenharmony_ci int node_num, int slot_num); 5162306a36Sopenharmony_cistatic int __ocfs2_recovery_thread(void *arg); 5262306a36Sopenharmony_cistatic int ocfs2_commit_cache(struct ocfs2_super *osb); 5362306a36Sopenharmony_cistatic int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota); 5462306a36Sopenharmony_cistatic int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 5562306a36Sopenharmony_ci int dirty, int replayed); 5662306a36Sopenharmony_cistatic int ocfs2_trylock_journal(struct ocfs2_super *osb, 5762306a36Sopenharmony_ci int slot_num); 5862306a36Sopenharmony_cistatic int ocfs2_recover_orphans(struct ocfs2_super *osb, 5962306a36Sopenharmony_ci int slot, 6062306a36Sopenharmony_ci enum ocfs2_orphan_reco_type orphan_reco_type); 6162306a36Sopenharmony_cistatic int ocfs2_commit_thread(void *arg); 6262306a36Sopenharmony_cistatic void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, 6362306a36Sopenharmony_ci int slot_num, 6462306a36Sopenharmony_ci struct ocfs2_dinode *la_dinode, 6562306a36Sopenharmony_ci struct ocfs2_dinode *tl_dinode, 6662306a36Sopenharmony_ci struct ocfs2_quota_recovery *qrec, 6762306a36Sopenharmony_ci enum ocfs2_orphan_reco_type orphan_reco_type); 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_cistatic inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci return __ocfs2_wait_on_mount(osb, 0); 7262306a36Sopenharmony_ci} 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_cistatic inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb) 7562306a36Sopenharmony_ci{ 7662306a36Sopenharmony_ci return __ocfs2_wait_on_mount(osb, 1); 7762306a36Sopenharmony_ci} 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci/* 8062306a36Sopenharmony_ci * This replay_map is to track online/offline slots, so we could recover 8162306a36Sopenharmony_ci * offline slots during recovery and mount 8262306a36Sopenharmony_ci */ 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_cienum ocfs2_replay_state { 8562306a36Sopenharmony_ci REPLAY_UNNEEDED = 0, /* Replay is not needed, so ignore this map */ 8662306a36Sopenharmony_ci REPLAY_NEEDED, /* Replay slots marked in rm_replay_slots */ 8762306a36Sopenharmony_ci REPLAY_DONE /* Replay was already queued */ 8862306a36Sopenharmony_ci}; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_cistruct ocfs2_replay_map { 9162306a36Sopenharmony_ci unsigned int rm_slots; 9262306a36Sopenharmony_ci enum ocfs2_replay_state rm_state; 9362306a36Sopenharmony_ci unsigned char rm_replay_slots[]; 9462306a36Sopenharmony_ci}; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_cistatic void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state) 9762306a36Sopenharmony_ci{ 9862306a36Sopenharmony_ci if (!osb->replay_map) 9962306a36Sopenharmony_ci return; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci /* If we've already queued the replay, we don't have any more to do */ 10262306a36Sopenharmony_ci if (osb->replay_map->rm_state == REPLAY_DONE) 10362306a36Sopenharmony_ci return; 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci osb->replay_map->rm_state = state; 10662306a36Sopenharmony_ci} 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ciint ocfs2_compute_replay_slots(struct ocfs2_super *osb) 10962306a36Sopenharmony_ci{ 11062306a36Sopenharmony_ci struct ocfs2_replay_map *replay_map; 11162306a36Sopenharmony_ci int i, node_num; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci /* If replay map is already set, we don't do it again */ 11462306a36Sopenharmony_ci if (osb->replay_map) 11562306a36Sopenharmony_ci return 0; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci replay_map = kzalloc(struct_size(replay_map, rm_replay_slots, 11862306a36Sopenharmony_ci osb->max_slots), 11962306a36Sopenharmony_ci GFP_KERNEL); 12062306a36Sopenharmony_ci if (!replay_map) { 12162306a36Sopenharmony_ci mlog_errno(-ENOMEM); 12262306a36Sopenharmony_ci return -ENOMEM; 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci spin_lock(&osb->osb_lock); 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci replay_map->rm_slots = osb->max_slots; 12862306a36Sopenharmony_ci replay_map->rm_state = REPLAY_UNNEEDED; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci /* set rm_replay_slots for offline slot(s) */ 13162306a36Sopenharmony_ci for (i = 0; i < replay_map->rm_slots; i++) { 13262306a36Sopenharmony_ci if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT) 13362306a36Sopenharmony_ci replay_map->rm_replay_slots[i] = 1; 13462306a36Sopenharmony_ci } 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci osb->replay_map = replay_map; 13762306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 13862306a36Sopenharmony_ci return 0; 13962306a36Sopenharmony_ci} 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_cistatic void ocfs2_queue_replay_slots(struct ocfs2_super *osb, 14262306a36Sopenharmony_ci enum ocfs2_orphan_reco_type orphan_reco_type) 14362306a36Sopenharmony_ci{ 14462306a36Sopenharmony_ci struct ocfs2_replay_map *replay_map = osb->replay_map; 14562306a36Sopenharmony_ci int i; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci if (!replay_map) 14862306a36Sopenharmony_ci return; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci if (replay_map->rm_state != REPLAY_NEEDED) 15162306a36Sopenharmony_ci return; 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci for (i = 0; i < replay_map->rm_slots; i++) 15462306a36Sopenharmony_ci if (replay_map->rm_replay_slots[i]) 15562306a36Sopenharmony_ci ocfs2_queue_recovery_completion(osb->journal, i, NULL, 15662306a36Sopenharmony_ci NULL, NULL, 15762306a36Sopenharmony_ci orphan_reco_type); 15862306a36Sopenharmony_ci replay_map->rm_state = REPLAY_DONE; 15962306a36Sopenharmony_ci} 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_civoid ocfs2_free_replay_slots(struct ocfs2_super *osb) 16262306a36Sopenharmony_ci{ 16362306a36Sopenharmony_ci struct ocfs2_replay_map *replay_map = osb->replay_map; 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci if (!osb->replay_map) 16662306a36Sopenharmony_ci return; 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci kfree(replay_map); 16962306a36Sopenharmony_ci osb->replay_map = NULL; 17062306a36Sopenharmony_ci} 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ciint ocfs2_recovery_init(struct ocfs2_super *osb) 17362306a36Sopenharmony_ci{ 17462306a36Sopenharmony_ci struct ocfs2_recovery_map *rm; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci mutex_init(&osb->recovery_lock); 17762306a36Sopenharmony_ci osb->disable_recovery = 0; 17862306a36Sopenharmony_ci osb->recovery_thread_task = NULL; 17962306a36Sopenharmony_ci init_waitqueue_head(&osb->recovery_event); 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci rm = kzalloc(struct_size(rm, rm_entries, osb->max_slots), 18262306a36Sopenharmony_ci GFP_KERNEL); 18362306a36Sopenharmony_ci if (!rm) { 18462306a36Sopenharmony_ci mlog_errno(-ENOMEM); 18562306a36Sopenharmony_ci return -ENOMEM; 18662306a36Sopenharmony_ci } 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci osb->recovery_map = rm; 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci return 0; 19162306a36Sopenharmony_ci} 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci/* we can't grab the goofy sem lock from inside wait_event, so we use 19462306a36Sopenharmony_ci * memory barriers to make sure that we'll see the null task before 19562306a36Sopenharmony_ci * being woken up */ 19662306a36Sopenharmony_cistatic int ocfs2_recovery_thread_running(struct ocfs2_super *osb) 19762306a36Sopenharmony_ci{ 19862306a36Sopenharmony_ci mb(); 19962306a36Sopenharmony_ci return osb->recovery_thread_task != NULL; 20062306a36Sopenharmony_ci} 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_civoid ocfs2_recovery_exit(struct ocfs2_super *osb) 20362306a36Sopenharmony_ci{ 20462306a36Sopenharmony_ci struct ocfs2_recovery_map *rm; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci /* disable any new recovery threads and wait for any currently 20762306a36Sopenharmony_ci * running ones to exit. Do this before setting the vol_state. */ 20862306a36Sopenharmony_ci mutex_lock(&osb->recovery_lock); 20962306a36Sopenharmony_ci osb->disable_recovery = 1; 21062306a36Sopenharmony_ci mutex_unlock(&osb->recovery_lock); 21162306a36Sopenharmony_ci wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci /* At this point, we know that no more recovery threads can be 21462306a36Sopenharmony_ci * launched, so wait for any recovery completion work to 21562306a36Sopenharmony_ci * complete. */ 21662306a36Sopenharmony_ci if (osb->ocfs2_wq) 21762306a36Sopenharmony_ci flush_workqueue(osb->ocfs2_wq); 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci /* 22062306a36Sopenharmony_ci * Now that recovery is shut down, and the osb is about to be 22162306a36Sopenharmony_ci * freed, the osb_lock is not taken here. 22262306a36Sopenharmony_ci */ 22362306a36Sopenharmony_ci rm = osb->recovery_map; 22462306a36Sopenharmony_ci /* XXX: Should we bug if there are dirty entries? */ 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci kfree(rm); 22762306a36Sopenharmony_ci} 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_cistatic int __ocfs2_recovery_map_test(struct ocfs2_super *osb, 23062306a36Sopenharmony_ci unsigned int node_num) 23162306a36Sopenharmony_ci{ 23262306a36Sopenharmony_ci int i; 23362306a36Sopenharmony_ci struct ocfs2_recovery_map *rm = osb->recovery_map; 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci assert_spin_locked(&osb->osb_lock); 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci for (i = 0; i < rm->rm_used; i++) { 23862306a36Sopenharmony_ci if (rm->rm_entries[i] == node_num) 23962306a36Sopenharmony_ci return 1; 24062306a36Sopenharmony_ci } 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci return 0; 24362306a36Sopenharmony_ci} 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci/* Behaves like test-and-set. Returns the previous value */ 24662306a36Sopenharmony_cistatic int ocfs2_recovery_map_set(struct ocfs2_super *osb, 24762306a36Sopenharmony_ci unsigned int node_num) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci struct ocfs2_recovery_map *rm = osb->recovery_map; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci spin_lock(&osb->osb_lock); 25262306a36Sopenharmony_ci if (__ocfs2_recovery_map_test(osb, node_num)) { 25362306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 25462306a36Sopenharmony_ci return 1; 25562306a36Sopenharmony_ci } 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci /* XXX: Can this be exploited? Not from o2dlm... */ 25862306a36Sopenharmony_ci BUG_ON(rm->rm_used >= osb->max_slots); 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci rm->rm_entries[rm->rm_used] = node_num; 26162306a36Sopenharmony_ci rm->rm_used++; 26262306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci return 0; 26562306a36Sopenharmony_ci} 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_cistatic void ocfs2_recovery_map_clear(struct ocfs2_super *osb, 26862306a36Sopenharmony_ci unsigned int node_num) 26962306a36Sopenharmony_ci{ 27062306a36Sopenharmony_ci int i; 27162306a36Sopenharmony_ci struct ocfs2_recovery_map *rm = osb->recovery_map; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci spin_lock(&osb->osb_lock); 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci for (i = 0; i < rm->rm_used; i++) { 27662306a36Sopenharmony_ci if (rm->rm_entries[i] == node_num) 27762306a36Sopenharmony_ci break; 27862306a36Sopenharmony_ci } 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci if (i < rm->rm_used) { 28162306a36Sopenharmony_ci /* XXX: be careful with the pointer math */ 28262306a36Sopenharmony_ci memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]), 28362306a36Sopenharmony_ci (rm->rm_used - i - 1) * sizeof(unsigned int)); 28462306a36Sopenharmony_ci rm->rm_used--; 28562306a36Sopenharmony_ci } 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 28862306a36Sopenharmony_ci} 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_cistatic int ocfs2_commit_cache(struct ocfs2_super *osb) 29162306a36Sopenharmony_ci{ 29262306a36Sopenharmony_ci int status = 0; 29362306a36Sopenharmony_ci unsigned int flushed; 29462306a36Sopenharmony_ci struct ocfs2_journal *journal = NULL; 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci journal = osb->journal; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci /* Flush all pending commits and checkpoint the journal. */ 29962306a36Sopenharmony_ci down_write(&journal->j_trans_barrier); 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci flushed = atomic_read(&journal->j_num_trans); 30262306a36Sopenharmony_ci trace_ocfs2_commit_cache_begin(flushed); 30362306a36Sopenharmony_ci if (flushed == 0) { 30462306a36Sopenharmony_ci up_write(&journal->j_trans_barrier); 30562306a36Sopenharmony_ci goto finally; 30662306a36Sopenharmony_ci } 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci jbd2_journal_lock_updates(journal->j_journal); 30962306a36Sopenharmony_ci status = jbd2_journal_flush(journal->j_journal, 0); 31062306a36Sopenharmony_ci jbd2_journal_unlock_updates(journal->j_journal); 31162306a36Sopenharmony_ci if (status < 0) { 31262306a36Sopenharmony_ci up_write(&journal->j_trans_barrier); 31362306a36Sopenharmony_ci mlog_errno(status); 31462306a36Sopenharmony_ci goto finally; 31562306a36Sopenharmony_ci } 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci ocfs2_inc_trans_id(journal); 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci flushed = atomic_read(&journal->j_num_trans); 32062306a36Sopenharmony_ci atomic_set(&journal->j_num_trans, 0); 32162306a36Sopenharmony_ci up_write(&journal->j_trans_barrier); 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci ocfs2_wake_downconvert_thread(osb); 32662306a36Sopenharmony_ci wake_up(&journal->j_checkpointed); 32762306a36Sopenharmony_cifinally: 32862306a36Sopenharmony_ci return status; 32962306a36Sopenharmony_ci} 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_cihandle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) 33262306a36Sopenharmony_ci{ 33362306a36Sopenharmony_ci journal_t *journal = osb->journal->j_journal; 33462306a36Sopenharmony_ci handle_t *handle; 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci BUG_ON(!osb || !osb->journal->j_journal); 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci if (ocfs2_is_hard_readonly(osb)) 33962306a36Sopenharmony_ci return ERR_PTR(-EROFS); 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); 34262306a36Sopenharmony_ci BUG_ON(max_buffs <= 0); 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci /* Nested transaction? Just return the handle... */ 34562306a36Sopenharmony_ci if (journal_current_handle()) 34662306a36Sopenharmony_ci return jbd2_journal_start(journal, max_buffs); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci sb_start_intwrite(osb->sb); 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci down_read(&osb->journal->j_trans_barrier); 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci handle = jbd2_journal_start(journal, max_buffs); 35362306a36Sopenharmony_ci if (IS_ERR(handle)) { 35462306a36Sopenharmony_ci up_read(&osb->journal->j_trans_barrier); 35562306a36Sopenharmony_ci sb_end_intwrite(osb->sb); 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci mlog_errno(PTR_ERR(handle)); 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci if (is_journal_aborted(journal)) { 36062306a36Sopenharmony_ci ocfs2_abort(osb->sb, "Detected aborted journal\n"); 36162306a36Sopenharmony_ci handle = ERR_PTR(-EROFS); 36262306a36Sopenharmony_ci } 36362306a36Sopenharmony_ci } else { 36462306a36Sopenharmony_ci if (!ocfs2_mount_local(osb)) 36562306a36Sopenharmony_ci atomic_inc(&(osb->journal->j_num_trans)); 36662306a36Sopenharmony_ci } 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci return handle; 36962306a36Sopenharmony_ci} 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ciint ocfs2_commit_trans(struct ocfs2_super *osb, 37262306a36Sopenharmony_ci handle_t *handle) 37362306a36Sopenharmony_ci{ 37462306a36Sopenharmony_ci int ret, nested; 37562306a36Sopenharmony_ci struct ocfs2_journal *journal = osb->journal; 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci BUG_ON(!handle); 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci nested = handle->h_ref > 1; 38062306a36Sopenharmony_ci ret = jbd2_journal_stop(handle); 38162306a36Sopenharmony_ci if (ret < 0) 38262306a36Sopenharmony_ci mlog_errno(ret); 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci if (!nested) { 38562306a36Sopenharmony_ci up_read(&journal->j_trans_barrier); 38662306a36Sopenharmony_ci sb_end_intwrite(osb->sb); 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci return ret; 39062306a36Sopenharmony_ci} 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci/* 39362306a36Sopenharmony_ci * 'nblocks' is what you want to add to the current transaction. 39462306a36Sopenharmony_ci * 39562306a36Sopenharmony_ci * This might call jbd2_journal_restart() which will commit dirty buffers 39662306a36Sopenharmony_ci * and then restart the transaction. Before calling 39762306a36Sopenharmony_ci * ocfs2_extend_trans(), any changed blocks should have been 39862306a36Sopenharmony_ci * dirtied. After calling it, all blocks which need to be changed must 39962306a36Sopenharmony_ci * go through another set of journal_access/journal_dirty calls. 40062306a36Sopenharmony_ci * 40162306a36Sopenharmony_ci * WARNING: This will not release any semaphores or disk locks taken 40262306a36Sopenharmony_ci * during the transaction, so make sure they were taken *before* 40362306a36Sopenharmony_ci * start_trans or we'll have ordering deadlocks. 40462306a36Sopenharmony_ci * 40562306a36Sopenharmony_ci * WARNING2: Note that we do *not* drop j_trans_barrier here. This is 40662306a36Sopenharmony_ci * good because transaction ids haven't yet been recorded on the 40762306a36Sopenharmony_ci * cluster locks associated with this handle. 40862306a36Sopenharmony_ci */ 40962306a36Sopenharmony_ciint ocfs2_extend_trans(handle_t *handle, int nblocks) 41062306a36Sopenharmony_ci{ 41162306a36Sopenharmony_ci int status, old_nblocks; 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci BUG_ON(!handle); 41462306a36Sopenharmony_ci BUG_ON(nblocks < 0); 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci if (!nblocks) 41762306a36Sopenharmony_ci return 0; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci old_nblocks = jbd2_handle_buffer_credits(handle); 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci trace_ocfs2_extend_trans(old_nblocks, nblocks); 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci#ifdef CONFIG_OCFS2_DEBUG_FS 42462306a36Sopenharmony_ci status = 1; 42562306a36Sopenharmony_ci#else 42662306a36Sopenharmony_ci status = jbd2_journal_extend(handle, nblocks, 0); 42762306a36Sopenharmony_ci if (status < 0) { 42862306a36Sopenharmony_ci mlog_errno(status); 42962306a36Sopenharmony_ci goto bail; 43062306a36Sopenharmony_ci } 43162306a36Sopenharmony_ci#endif 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci if (status > 0) { 43462306a36Sopenharmony_ci trace_ocfs2_extend_trans_restart(old_nblocks + nblocks); 43562306a36Sopenharmony_ci status = jbd2_journal_restart(handle, 43662306a36Sopenharmony_ci old_nblocks + nblocks); 43762306a36Sopenharmony_ci if (status < 0) { 43862306a36Sopenharmony_ci mlog_errno(status); 43962306a36Sopenharmony_ci goto bail; 44062306a36Sopenharmony_ci } 44162306a36Sopenharmony_ci } 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci status = 0; 44462306a36Sopenharmony_cibail: 44562306a36Sopenharmony_ci return status; 44662306a36Sopenharmony_ci} 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci/* 44962306a36Sopenharmony_ci * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA. 45062306a36Sopenharmony_ci * If that fails, restart the transaction & regain write access for the 45162306a36Sopenharmony_ci * buffer head which is used for metadata modifications. 45262306a36Sopenharmony_ci * Taken from Ext4: extend_or_restart_transaction() 45362306a36Sopenharmony_ci */ 45462306a36Sopenharmony_ciint ocfs2_allocate_extend_trans(handle_t *handle, int thresh) 45562306a36Sopenharmony_ci{ 45662306a36Sopenharmony_ci int status, old_nblks; 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci BUG_ON(!handle); 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci old_nblks = jbd2_handle_buffer_credits(handle); 46162306a36Sopenharmony_ci trace_ocfs2_allocate_extend_trans(old_nblks, thresh); 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci if (old_nblks < thresh) 46462306a36Sopenharmony_ci return 0; 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA, 0); 46762306a36Sopenharmony_ci if (status < 0) { 46862306a36Sopenharmony_ci mlog_errno(status); 46962306a36Sopenharmony_ci goto bail; 47062306a36Sopenharmony_ci } 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci if (status > 0) { 47362306a36Sopenharmony_ci status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA); 47462306a36Sopenharmony_ci if (status < 0) 47562306a36Sopenharmony_ci mlog_errno(status); 47662306a36Sopenharmony_ci } 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_cibail: 47962306a36Sopenharmony_ci return status; 48062306a36Sopenharmony_ci} 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_cistruct ocfs2_triggers { 48462306a36Sopenharmony_ci struct jbd2_buffer_trigger_type ot_triggers; 48562306a36Sopenharmony_ci int ot_offset; 48662306a36Sopenharmony_ci}; 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_cistatic inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers) 48962306a36Sopenharmony_ci{ 49062306a36Sopenharmony_ci return container_of(triggers, struct ocfs2_triggers, ot_triggers); 49162306a36Sopenharmony_ci} 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_cistatic void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, 49462306a36Sopenharmony_ci struct buffer_head *bh, 49562306a36Sopenharmony_ci void *data, size_t size) 49662306a36Sopenharmony_ci{ 49762306a36Sopenharmony_ci struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers); 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci /* 50062306a36Sopenharmony_ci * We aren't guaranteed to have the superblock here, so we 50162306a36Sopenharmony_ci * must unconditionally compute the ecc data. 50262306a36Sopenharmony_ci * __ocfs2_journal_access() will only set the triggers if 50362306a36Sopenharmony_ci * metaecc is enabled. 50462306a36Sopenharmony_ci */ 50562306a36Sopenharmony_ci ocfs2_block_check_compute(data, size, data + ot->ot_offset); 50662306a36Sopenharmony_ci} 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci/* 50962306a36Sopenharmony_ci * Quota blocks have their own trigger because the struct ocfs2_block_check 51062306a36Sopenharmony_ci * offset depends on the blocksize. 51162306a36Sopenharmony_ci */ 51262306a36Sopenharmony_cistatic void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, 51362306a36Sopenharmony_ci struct buffer_head *bh, 51462306a36Sopenharmony_ci void *data, size_t size) 51562306a36Sopenharmony_ci{ 51662306a36Sopenharmony_ci struct ocfs2_disk_dqtrailer *dqt = 51762306a36Sopenharmony_ci ocfs2_block_dqtrailer(size, data); 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci /* 52062306a36Sopenharmony_ci * We aren't guaranteed to have the superblock here, so we 52162306a36Sopenharmony_ci * must unconditionally compute the ecc data. 52262306a36Sopenharmony_ci * __ocfs2_journal_access() will only set the triggers if 52362306a36Sopenharmony_ci * metaecc is enabled. 52462306a36Sopenharmony_ci */ 52562306a36Sopenharmony_ci ocfs2_block_check_compute(data, size, &dqt->dq_check); 52662306a36Sopenharmony_ci} 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci/* 52962306a36Sopenharmony_ci * Directory blocks also have their own trigger because the 53062306a36Sopenharmony_ci * struct ocfs2_block_check offset depends on the blocksize. 53162306a36Sopenharmony_ci */ 53262306a36Sopenharmony_cistatic void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, 53362306a36Sopenharmony_ci struct buffer_head *bh, 53462306a36Sopenharmony_ci void *data, size_t size) 53562306a36Sopenharmony_ci{ 53662306a36Sopenharmony_ci struct ocfs2_dir_block_trailer *trailer = 53762306a36Sopenharmony_ci ocfs2_dir_trailer_from_size(size, data); 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci /* 54062306a36Sopenharmony_ci * We aren't guaranteed to have the superblock here, so we 54162306a36Sopenharmony_ci * must unconditionally compute the ecc data. 54262306a36Sopenharmony_ci * __ocfs2_journal_access() will only set the triggers if 54362306a36Sopenharmony_ci * metaecc is enabled. 54462306a36Sopenharmony_ci */ 54562306a36Sopenharmony_ci ocfs2_block_check_compute(data, size, &trailer->db_check); 54662306a36Sopenharmony_ci} 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_cistatic void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, 54962306a36Sopenharmony_ci struct buffer_head *bh) 55062306a36Sopenharmony_ci{ 55162306a36Sopenharmony_ci mlog(ML_ERROR, 55262306a36Sopenharmony_ci "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, " 55362306a36Sopenharmony_ci "bh->b_blocknr = %llu\n", 55462306a36Sopenharmony_ci (unsigned long)bh, 55562306a36Sopenharmony_ci (unsigned long long)bh->b_blocknr); 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci ocfs2_error(bh->b_assoc_map->host->i_sb, 55862306a36Sopenharmony_ci "JBD2 has aborted our journal, ocfs2 cannot continue\n"); 55962306a36Sopenharmony_ci} 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_cistatic struct ocfs2_triggers di_triggers = { 56262306a36Sopenharmony_ci .ot_triggers = { 56362306a36Sopenharmony_ci .t_frozen = ocfs2_frozen_trigger, 56462306a36Sopenharmony_ci .t_abort = ocfs2_abort_trigger, 56562306a36Sopenharmony_ci }, 56662306a36Sopenharmony_ci .ot_offset = offsetof(struct ocfs2_dinode, i_check), 56762306a36Sopenharmony_ci}; 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_cistatic struct ocfs2_triggers eb_triggers = { 57062306a36Sopenharmony_ci .ot_triggers = { 57162306a36Sopenharmony_ci .t_frozen = ocfs2_frozen_trigger, 57262306a36Sopenharmony_ci .t_abort = ocfs2_abort_trigger, 57362306a36Sopenharmony_ci }, 57462306a36Sopenharmony_ci .ot_offset = offsetof(struct ocfs2_extent_block, h_check), 57562306a36Sopenharmony_ci}; 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_cistatic struct ocfs2_triggers rb_triggers = { 57862306a36Sopenharmony_ci .ot_triggers = { 57962306a36Sopenharmony_ci .t_frozen = ocfs2_frozen_trigger, 58062306a36Sopenharmony_ci .t_abort = ocfs2_abort_trigger, 58162306a36Sopenharmony_ci }, 58262306a36Sopenharmony_ci .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), 58362306a36Sopenharmony_ci}; 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_cistatic struct ocfs2_triggers gd_triggers = { 58662306a36Sopenharmony_ci .ot_triggers = { 58762306a36Sopenharmony_ci .t_frozen = ocfs2_frozen_trigger, 58862306a36Sopenharmony_ci .t_abort = ocfs2_abort_trigger, 58962306a36Sopenharmony_ci }, 59062306a36Sopenharmony_ci .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), 59162306a36Sopenharmony_ci}; 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_cistatic struct ocfs2_triggers db_triggers = { 59462306a36Sopenharmony_ci .ot_triggers = { 59562306a36Sopenharmony_ci .t_frozen = ocfs2_db_frozen_trigger, 59662306a36Sopenharmony_ci .t_abort = ocfs2_abort_trigger, 59762306a36Sopenharmony_ci }, 59862306a36Sopenharmony_ci}; 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_cistatic struct ocfs2_triggers xb_triggers = { 60162306a36Sopenharmony_ci .ot_triggers = { 60262306a36Sopenharmony_ci .t_frozen = ocfs2_frozen_trigger, 60362306a36Sopenharmony_ci .t_abort = ocfs2_abort_trigger, 60462306a36Sopenharmony_ci }, 60562306a36Sopenharmony_ci .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), 60662306a36Sopenharmony_ci}; 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_cistatic struct ocfs2_triggers dq_triggers = { 60962306a36Sopenharmony_ci .ot_triggers = { 61062306a36Sopenharmony_ci .t_frozen = ocfs2_dq_frozen_trigger, 61162306a36Sopenharmony_ci .t_abort = ocfs2_abort_trigger, 61262306a36Sopenharmony_ci }, 61362306a36Sopenharmony_ci}; 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_cistatic struct ocfs2_triggers dr_triggers = { 61662306a36Sopenharmony_ci .ot_triggers = { 61762306a36Sopenharmony_ci .t_frozen = ocfs2_frozen_trigger, 61862306a36Sopenharmony_ci .t_abort = ocfs2_abort_trigger, 61962306a36Sopenharmony_ci }, 62062306a36Sopenharmony_ci .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), 62162306a36Sopenharmony_ci}; 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_cistatic struct ocfs2_triggers dl_triggers = { 62462306a36Sopenharmony_ci .ot_triggers = { 62562306a36Sopenharmony_ci .t_frozen = ocfs2_frozen_trigger, 62662306a36Sopenharmony_ci .t_abort = ocfs2_abort_trigger, 62762306a36Sopenharmony_ci }, 62862306a36Sopenharmony_ci .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), 62962306a36Sopenharmony_ci}; 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_cistatic int __ocfs2_journal_access(handle_t *handle, 63262306a36Sopenharmony_ci struct ocfs2_caching_info *ci, 63362306a36Sopenharmony_ci struct buffer_head *bh, 63462306a36Sopenharmony_ci struct ocfs2_triggers *triggers, 63562306a36Sopenharmony_ci int type) 63662306a36Sopenharmony_ci{ 63762306a36Sopenharmony_ci int status; 63862306a36Sopenharmony_ci struct ocfs2_super *osb = 63962306a36Sopenharmony_ci OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci BUG_ON(!ci || !ci->ci_ops); 64262306a36Sopenharmony_ci BUG_ON(!handle); 64362306a36Sopenharmony_ci BUG_ON(!bh); 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci trace_ocfs2_journal_access( 64662306a36Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci), 64762306a36Sopenharmony_ci (unsigned long long)bh->b_blocknr, type, bh->b_size); 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci /* we can safely remove this assertion after testing. */ 65062306a36Sopenharmony_ci if (!buffer_uptodate(bh)) { 65162306a36Sopenharmony_ci mlog(ML_ERROR, "giving me a buffer that's not uptodate!\n"); 65262306a36Sopenharmony_ci mlog(ML_ERROR, "b_blocknr=%llu, b_state=0x%lx\n", 65362306a36Sopenharmony_ci (unsigned long long)bh->b_blocknr, bh->b_state); 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci lock_buffer(bh); 65662306a36Sopenharmony_ci /* 65762306a36Sopenharmony_ci * A previous transaction with a couple of buffer heads fail 65862306a36Sopenharmony_ci * to checkpoint, so all the bhs are marked as BH_Write_EIO. 65962306a36Sopenharmony_ci * For current transaction, the bh is just among those error 66062306a36Sopenharmony_ci * bhs which previous transaction handle. We can't just clear 66162306a36Sopenharmony_ci * its BH_Write_EIO and reuse directly, since other bhs are 66262306a36Sopenharmony_ci * not written to disk yet and that will cause metadata 66362306a36Sopenharmony_ci * inconsistency. So we should set fs read-only to avoid 66462306a36Sopenharmony_ci * further damage. 66562306a36Sopenharmony_ci */ 66662306a36Sopenharmony_ci if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) { 66762306a36Sopenharmony_ci unlock_buffer(bh); 66862306a36Sopenharmony_ci return ocfs2_error(osb->sb, "A previous attempt to " 66962306a36Sopenharmony_ci "write this buffer head failed\n"); 67062306a36Sopenharmony_ci } 67162306a36Sopenharmony_ci unlock_buffer(bh); 67262306a36Sopenharmony_ci } 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci /* Set the current transaction information on the ci so 67562306a36Sopenharmony_ci * that the locking code knows whether it can drop it's locks 67662306a36Sopenharmony_ci * on this ci or not. We're protected from the commit 67762306a36Sopenharmony_ci * thread updating the current transaction id until 67862306a36Sopenharmony_ci * ocfs2_commit_trans() because ocfs2_start_trans() took 67962306a36Sopenharmony_ci * j_trans_barrier for us. */ 68062306a36Sopenharmony_ci ocfs2_set_ci_lock_trans(osb->journal, ci); 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_ci ocfs2_metadata_cache_io_lock(ci); 68362306a36Sopenharmony_ci switch (type) { 68462306a36Sopenharmony_ci case OCFS2_JOURNAL_ACCESS_CREATE: 68562306a36Sopenharmony_ci case OCFS2_JOURNAL_ACCESS_WRITE: 68662306a36Sopenharmony_ci status = jbd2_journal_get_write_access(handle, bh); 68762306a36Sopenharmony_ci break; 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci case OCFS2_JOURNAL_ACCESS_UNDO: 69062306a36Sopenharmony_ci status = jbd2_journal_get_undo_access(handle, bh); 69162306a36Sopenharmony_ci break; 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci default: 69462306a36Sopenharmony_ci status = -EINVAL; 69562306a36Sopenharmony_ci mlog(ML_ERROR, "Unknown access type!\n"); 69662306a36Sopenharmony_ci } 69762306a36Sopenharmony_ci if (!status && ocfs2_meta_ecc(osb) && triggers) 69862306a36Sopenharmony_ci jbd2_journal_set_triggers(bh, &triggers->ot_triggers); 69962306a36Sopenharmony_ci ocfs2_metadata_cache_io_unlock(ci); 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci if (status < 0) 70262306a36Sopenharmony_ci mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", 70362306a36Sopenharmony_ci status, type); 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci return status; 70662306a36Sopenharmony_ci} 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ciint ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci, 70962306a36Sopenharmony_ci struct buffer_head *bh, int type) 71062306a36Sopenharmony_ci{ 71162306a36Sopenharmony_ci return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type); 71262306a36Sopenharmony_ci} 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ciint ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci, 71562306a36Sopenharmony_ci struct buffer_head *bh, int type) 71662306a36Sopenharmony_ci{ 71762306a36Sopenharmony_ci return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type); 71862306a36Sopenharmony_ci} 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ciint ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci, 72162306a36Sopenharmony_ci struct buffer_head *bh, int type) 72262306a36Sopenharmony_ci{ 72362306a36Sopenharmony_ci return __ocfs2_journal_access(handle, ci, bh, &rb_triggers, 72462306a36Sopenharmony_ci type); 72562306a36Sopenharmony_ci} 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ciint ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci, 72862306a36Sopenharmony_ci struct buffer_head *bh, int type) 72962306a36Sopenharmony_ci{ 73062306a36Sopenharmony_ci return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type); 73162306a36Sopenharmony_ci} 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ciint ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci, 73462306a36Sopenharmony_ci struct buffer_head *bh, int type) 73562306a36Sopenharmony_ci{ 73662306a36Sopenharmony_ci return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type); 73762306a36Sopenharmony_ci} 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ciint ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci, 74062306a36Sopenharmony_ci struct buffer_head *bh, int type) 74162306a36Sopenharmony_ci{ 74262306a36Sopenharmony_ci return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type); 74362306a36Sopenharmony_ci} 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ciint ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci, 74662306a36Sopenharmony_ci struct buffer_head *bh, int type) 74762306a36Sopenharmony_ci{ 74862306a36Sopenharmony_ci return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type); 74962306a36Sopenharmony_ci} 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ciint ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci, 75262306a36Sopenharmony_ci struct buffer_head *bh, int type) 75362306a36Sopenharmony_ci{ 75462306a36Sopenharmony_ci return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type); 75562306a36Sopenharmony_ci} 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ciint ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci, 75862306a36Sopenharmony_ci struct buffer_head *bh, int type) 75962306a36Sopenharmony_ci{ 76062306a36Sopenharmony_ci return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type); 76162306a36Sopenharmony_ci} 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ciint ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci, 76462306a36Sopenharmony_ci struct buffer_head *bh, int type) 76562306a36Sopenharmony_ci{ 76662306a36Sopenharmony_ci return __ocfs2_journal_access(handle, ci, bh, NULL, type); 76762306a36Sopenharmony_ci} 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_civoid ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh) 77062306a36Sopenharmony_ci{ 77162306a36Sopenharmony_ci int status; 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr); 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci status = jbd2_journal_dirty_metadata(handle, bh); 77662306a36Sopenharmony_ci if (status) { 77762306a36Sopenharmony_ci mlog_errno(status); 77862306a36Sopenharmony_ci if (!is_handle_aborted(handle)) { 77962306a36Sopenharmony_ci journal_t *journal = handle->h_transaction->t_journal; 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. " 78262306a36Sopenharmony_ci "Aborting transaction and journal.\n"); 78362306a36Sopenharmony_ci handle->h_err = status; 78462306a36Sopenharmony_ci jbd2_journal_abort_handle(handle); 78562306a36Sopenharmony_ci jbd2_journal_abort(journal, status); 78662306a36Sopenharmony_ci ocfs2_abort(bh->b_assoc_map->host->i_sb, 78762306a36Sopenharmony_ci "Journal already aborted.\n"); 78862306a36Sopenharmony_ci } 78962306a36Sopenharmony_ci } 79062306a36Sopenharmony_ci} 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_civoid ocfs2_set_journal_params(struct ocfs2_super *osb) 79562306a36Sopenharmony_ci{ 79662306a36Sopenharmony_ci journal_t *journal = osb->journal->j_journal; 79762306a36Sopenharmony_ci unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci if (osb->osb_commit_interval) 80062306a36Sopenharmony_ci commit_interval = osb->osb_commit_interval; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 80362306a36Sopenharmony_ci journal->j_commit_interval = commit_interval; 80462306a36Sopenharmony_ci if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) 80562306a36Sopenharmony_ci journal->j_flags |= JBD2_BARRIER; 80662306a36Sopenharmony_ci else 80762306a36Sopenharmony_ci journal->j_flags &= ~JBD2_BARRIER; 80862306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 80962306a36Sopenharmony_ci} 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci/* 81262306a36Sopenharmony_ci * alloc & initialize skeleton for journal structure. 81362306a36Sopenharmony_ci * ocfs2_journal_init() will make fs have journal ability. 81462306a36Sopenharmony_ci */ 81562306a36Sopenharmony_ciint ocfs2_journal_alloc(struct ocfs2_super *osb) 81662306a36Sopenharmony_ci{ 81762306a36Sopenharmony_ci int status = 0; 81862306a36Sopenharmony_ci struct ocfs2_journal *journal; 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL); 82162306a36Sopenharmony_ci if (!journal) { 82262306a36Sopenharmony_ci mlog(ML_ERROR, "unable to alloc journal\n"); 82362306a36Sopenharmony_ci status = -ENOMEM; 82462306a36Sopenharmony_ci goto bail; 82562306a36Sopenharmony_ci } 82662306a36Sopenharmony_ci osb->journal = journal; 82762306a36Sopenharmony_ci journal->j_osb = osb; 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci atomic_set(&journal->j_num_trans, 0); 83062306a36Sopenharmony_ci init_rwsem(&journal->j_trans_barrier); 83162306a36Sopenharmony_ci init_waitqueue_head(&journal->j_checkpointed); 83262306a36Sopenharmony_ci spin_lock_init(&journal->j_lock); 83362306a36Sopenharmony_ci journal->j_trans_id = 1UL; 83462306a36Sopenharmony_ci INIT_LIST_HEAD(&journal->j_la_cleanups); 83562306a36Sopenharmony_ci INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); 83662306a36Sopenharmony_ci journal->j_state = OCFS2_JOURNAL_FREE; 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_cibail: 83962306a36Sopenharmony_ci return status; 84062306a36Sopenharmony_ci} 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_cistatic int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) 84362306a36Sopenharmony_ci{ 84462306a36Sopenharmony_ci struct address_space *mapping = jinode->i_vfs_inode->i_mapping; 84562306a36Sopenharmony_ci struct writeback_control wbc = { 84662306a36Sopenharmony_ci .sync_mode = WB_SYNC_ALL, 84762306a36Sopenharmony_ci .nr_to_write = mapping->nrpages * 2, 84862306a36Sopenharmony_ci .range_start = jinode->i_dirty_start, 84962306a36Sopenharmony_ci .range_end = jinode->i_dirty_end, 85062306a36Sopenharmony_ci }; 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci return filemap_fdatawrite_wbc(mapping, &wbc); 85362306a36Sopenharmony_ci} 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ciint ocfs2_journal_init(struct ocfs2_super *osb, int *dirty) 85662306a36Sopenharmony_ci{ 85762306a36Sopenharmony_ci int status = -1; 85862306a36Sopenharmony_ci struct inode *inode = NULL; /* the journal inode */ 85962306a36Sopenharmony_ci journal_t *j_journal = NULL; 86062306a36Sopenharmony_ci struct ocfs2_journal *journal = osb->journal; 86162306a36Sopenharmony_ci struct ocfs2_dinode *di = NULL; 86262306a36Sopenharmony_ci struct buffer_head *bh = NULL; 86362306a36Sopenharmony_ci int inode_lock = 0; 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci BUG_ON(!journal); 86662306a36Sopenharmony_ci /* already have the inode for our journal */ 86762306a36Sopenharmony_ci inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, 86862306a36Sopenharmony_ci osb->slot_num); 86962306a36Sopenharmony_ci if (inode == NULL) { 87062306a36Sopenharmony_ci status = -EACCES; 87162306a36Sopenharmony_ci mlog_errno(status); 87262306a36Sopenharmony_ci goto done; 87362306a36Sopenharmony_ci } 87462306a36Sopenharmony_ci if (is_bad_inode(inode)) { 87562306a36Sopenharmony_ci mlog(ML_ERROR, "access error (bad inode)\n"); 87662306a36Sopenharmony_ci iput(inode); 87762306a36Sopenharmony_ci inode = NULL; 87862306a36Sopenharmony_ci status = -EACCES; 87962306a36Sopenharmony_ci goto done; 88062306a36Sopenharmony_ci } 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci SET_INODE_JOURNAL(inode); 88362306a36Sopenharmony_ci OCFS2_I(inode)->ip_open_count++; 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci /* Skip recovery waits here - journal inode metadata never 88662306a36Sopenharmony_ci * changes in a live cluster so it can be considered an 88762306a36Sopenharmony_ci * exception to the rule. */ 88862306a36Sopenharmony_ci status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); 88962306a36Sopenharmony_ci if (status < 0) { 89062306a36Sopenharmony_ci if (status != -ERESTARTSYS) 89162306a36Sopenharmony_ci mlog(ML_ERROR, "Could not get lock on journal!\n"); 89262306a36Sopenharmony_ci goto done; 89362306a36Sopenharmony_ci } 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci inode_lock = 1; 89662306a36Sopenharmony_ci di = (struct ocfs2_dinode *)bh->b_data; 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) { 89962306a36Sopenharmony_ci mlog(ML_ERROR, "Journal file size (%lld) is too small!\n", 90062306a36Sopenharmony_ci i_size_read(inode)); 90162306a36Sopenharmony_ci status = -EINVAL; 90262306a36Sopenharmony_ci goto done; 90362306a36Sopenharmony_ci } 90462306a36Sopenharmony_ci 90562306a36Sopenharmony_ci trace_ocfs2_journal_init(i_size_read(inode), 90662306a36Sopenharmony_ci (unsigned long long)inode->i_blocks, 90762306a36Sopenharmony_ci OCFS2_I(inode)->ip_clusters); 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci /* call the kernels journal init function now */ 91062306a36Sopenharmony_ci j_journal = jbd2_journal_init_inode(inode); 91162306a36Sopenharmony_ci if (IS_ERR(j_journal)) { 91262306a36Sopenharmony_ci mlog(ML_ERROR, "Linux journal layer error\n"); 91362306a36Sopenharmony_ci status = PTR_ERR(j_journal); 91462306a36Sopenharmony_ci goto done; 91562306a36Sopenharmony_ci } 91662306a36Sopenharmony_ci 91762306a36Sopenharmony_ci trace_ocfs2_journal_init_maxlen(j_journal->j_total_len); 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & 92062306a36Sopenharmony_ci OCFS2_JOURNAL_DIRTY_FL); 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_ci journal->j_journal = j_journal; 92362306a36Sopenharmony_ci journal->j_journal->j_submit_inode_data_buffers = 92462306a36Sopenharmony_ci ocfs2_journal_submit_inode_data_buffers; 92562306a36Sopenharmony_ci journal->j_journal->j_finish_inode_data_buffers = 92662306a36Sopenharmony_ci jbd2_journal_finish_inode_data_buffers; 92762306a36Sopenharmony_ci journal->j_inode = inode; 92862306a36Sopenharmony_ci journal->j_bh = bh; 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci ocfs2_set_journal_params(osb); 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_ci journal->j_state = OCFS2_JOURNAL_LOADED; 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci status = 0; 93562306a36Sopenharmony_cidone: 93662306a36Sopenharmony_ci if (status < 0) { 93762306a36Sopenharmony_ci if (inode_lock) 93862306a36Sopenharmony_ci ocfs2_inode_unlock(inode, 1); 93962306a36Sopenharmony_ci brelse(bh); 94062306a36Sopenharmony_ci if (inode) { 94162306a36Sopenharmony_ci OCFS2_I(inode)->ip_open_count--; 94262306a36Sopenharmony_ci iput(inode); 94362306a36Sopenharmony_ci } 94462306a36Sopenharmony_ci } 94562306a36Sopenharmony_ci 94662306a36Sopenharmony_ci return status; 94762306a36Sopenharmony_ci} 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_cistatic void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di) 95062306a36Sopenharmony_ci{ 95162306a36Sopenharmony_ci le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1); 95262306a36Sopenharmony_ci} 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_cistatic u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di) 95562306a36Sopenharmony_ci{ 95662306a36Sopenharmony_ci return le32_to_cpu(di->id1.journal1.ij_recovery_generation); 95762306a36Sopenharmony_ci} 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_cistatic int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 96062306a36Sopenharmony_ci int dirty, int replayed) 96162306a36Sopenharmony_ci{ 96262306a36Sopenharmony_ci int status; 96362306a36Sopenharmony_ci unsigned int flags; 96462306a36Sopenharmony_ci struct ocfs2_journal *journal = osb->journal; 96562306a36Sopenharmony_ci struct buffer_head *bh = journal->j_bh; 96662306a36Sopenharmony_ci struct ocfs2_dinode *fe; 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci fe = (struct ocfs2_dinode *)bh->b_data; 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci /* The journal bh on the osb always comes from ocfs2_journal_init() 97162306a36Sopenharmony_ci * and was validated there inside ocfs2_inode_lock_full(). It's a 97262306a36Sopenharmony_ci * code bug if we mess it up. */ 97362306a36Sopenharmony_ci BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci flags = le32_to_cpu(fe->id1.journal1.ij_flags); 97662306a36Sopenharmony_ci if (dirty) 97762306a36Sopenharmony_ci flags |= OCFS2_JOURNAL_DIRTY_FL; 97862306a36Sopenharmony_ci else 97962306a36Sopenharmony_ci flags &= ~OCFS2_JOURNAL_DIRTY_FL; 98062306a36Sopenharmony_ci fe->id1.journal1.ij_flags = cpu_to_le32(flags); 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci if (replayed) 98362306a36Sopenharmony_ci ocfs2_bump_recovery_generation(fe); 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); 98662306a36Sopenharmony_ci status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode)); 98762306a36Sopenharmony_ci if (status < 0) 98862306a36Sopenharmony_ci mlog_errno(status); 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci return status; 99162306a36Sopenharmony_ci} 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci/* 99462306a36Sopenharmony_ci * If the journal has been kmalloc'd it needs to be freed after this 99562306a36Sopenharmony_ci * call. 99662306a36Sopenharmony_ci */ 99762306a36Sopenharmony_civoid ocfs2_journal_shutdown(struct ocfs2_super *osb) 99862306a36Sopenharmony_ci{ 99962306a36Sopenharmony_ci struct ocfs2_journal *journal = NULL; 100062306a36Sopenharmony_ci int status = 0; 100162306a36Sopenharmony_ci struct inode *inode = NULL; 100262306a36Sopenharmony_ci int num_running_trans = 0; 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci BUG_ON(!osb); 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_ci journal = osb->journal; 100762306a36Sopenharmony_ci if (!journal) 100862306a36Sopenharmony_ci goto done; 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci inode = journal->j_inode; 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci if (journal->j_state != OCFS2_JOURNAL_LOADED) 101362306a36Sopenharmony_ci goto done; 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci /* need to inc inode use count - jbd2_journal_destroy will iput. */ 101662306a36Sopenharmony_ci if (!igrab(inode)) 101762306a36Sopenharmony_ci BUG(); 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci num_running_trans = atomic_read(&(osb->journal->j_num_trans)); 102062306a36Sopenharmony_ci trace_ocfs2_journal_shutdown(num_running_trans); 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci /* Do a commit_cache here. It will flush our journal, *and* 102362306a36Sopenharmony_ci * release any locks that are still held. 102462306a36Sopenharmony_ci * set the SHUTDOWN flag and release the trans lock. 102562306a36Sopenharmony_ci * the commit thread will take the trans lock for us below. */ 102662306a36Sopenharmony_ci journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN; 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ci /* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not 102962306a36Sopenharmony_ci * drop the trans_lock (which we want to hold until we 103062306a36Sopenharmony_ci * completely destroy the journal. */ 103162306a36Sopenharmony_ci if (osb->commit_task) { 103262306a36Sopenharmony_ci /* Wait for the commit thread */ 103362306a36Sopenharmony_ci trace_ocfs2_journal_shutdown_wait(osb->commit_task); 103462306a36Sopenharmony_ci kthread_stop(osb->commit_task); 103562306a36Sopenharmony_ci osb->commit_task = NULL; 103662306a36Sopenharmony_ci } 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci if (ocfs2_mount_local(osb)) { 104162306a36Sopenharmony_ci jbd2_journal_lock_updates(journal->j_journal); 104262306a36Sopenharmony_ci status = jbd2_journal_flush(journal->j_journal, 0); 104362306a36Sopenharmony_ci jbd2_journal_unlock_updates(journal->j_journal); 104462306a36Sopenharmony_ci if (status < 0) 104562306a36Sopenharmony_ci mlog_errno(status); 104662306a36Sopenharmony_ci } 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_ci /* Shutdown the kernel journal system */ 104962306a36Sopenharmony_ci if (!jbd2_journal_destroy(journal->j_journal) && !status) { 105062306a36Sopenharmony_ci /* 105162306a36Sopenharmony_ci * Do not toggle if flush was unsuccessful otherwise 105262306a36Sopenharmony_ci * will leave dirty metadata in a "clean" journal 105362306a36Sopenharmony_ci */ 105462306a36Sopenharmony_ci status = ocfs2_journal_toggle_dirty(osb, 0, 0); 105562306a36Sopenharmony_ci if (status < 0) 105662306a36Sopenharmony_ci mlog_errno(status); 105762306a36Sopenharmony_ci } 105862306a36Sopenharmony_ci journal->j_journal = NULL; 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci OCFS2_I(inode)->ip_open_count--; 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci /* unlock our journal */ 106362306a36Sopenharmony_ci ocfs2_inode_unlock(inode, 1); 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ci brelse(journal->j_bh); 106662306a36Sopenharmony_ci journal->j_bh = NULL; 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ci journal->j_state = OCFS2_JOURNAL_FREE; 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_cidone: 107162306a36Sopenharmony_ci iput(inode); 107262306a36Sopenharmony_ci kfree(journal); 107362306a36Sopenharmony_ci osb->journal = NULL; 107462306a36Sopenharmony_ci} 107562306a36Sopenharmony_ci 107662306a36Sopenharmony_cistatic void ocfs2_clear_journal_error(struct super_block *sb, 107762306a36Sopenharmony_ci journal_t *journal, 107862306a36Sopenharmony_ci int slot) 107962306a36Sopenharmony_ci{ 108062306a36Sopenharmony_ci int olderr; 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci olderr = jbd2_journal_errno(journal); 108362306a36Sopenharmony_ci if (olderr) { 108462306a36Sopenharmony_ci mlog(ML_ERROR, "File system error %d recorded in " 108562306a36Sopenharmony_ci "journal %u.\n", olderr, slot); 108662306a36Sopenharmony_ci mlog(ML_ERROR, "File system on device %s needs checking.\n", 108762306a36Sopenharmony_ci sb->s_id); 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_ci jbd2_journal_ack_err(journal); 109062306a36Sopenharmony_ci jbd2_journal_clear_err(journal); 109162306a36Sopenharmony_ci } 109262306a36Sopenharmony_ci} 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ciint ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) 109562306a36Sopenharmony_ci{ 109662306a36Sopenharmony_ci int status = 0; 109762306a36Sopenharmony_ci struct ocfs2_super *osb; 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci BUG_ON(!journal); 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_ci osb = journal->j_osb; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci status = jbd2_journal_load(journal->j_journal); 110462306a36Sopenharmony_ci if (status < 0) { 110562306a36Sopenharmony_ci mlog(ML_ERROR, "Failed to load journal!\n"); 110662306a36Sopenharmony_ci goto done; 110762306a36Sopenharmony_ci } 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci if (replayed) { 111262306a36Sopenharmony_ci jbd2_journal_lock_updates(journal->j_journal); 111362306a36Sopenharmony_ci status = jbd2_journal_flush(journal->j_journal, 0); 111462306a36Sopenharmony_ci jbd2_journal_unlock_updates(journal->j_journal); 111562306a36Sopenharmony_ci if (status < 0) 111662306a36Sopenharmony_ci mlog_errno(status); 111762306a36Sopenharmony_ci } 111862306a36Sopenharmony_ci 111962306a36Sopenharmony_ci status = ocfs2_journal_toggle_dirty(osb, 1, replayed); 112062306a36Sopenharmony_ci if (status < 0) { 112162306a36Sopenharmony_ci mlog_errno(status); 112262306a36Sopenharmony_ci goto done; 112362306a36Sopenharmony_ci } 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_ci /* Launch the commit thread */ 112662306a36Sopenharmony_ci if (!local) { 112762306a36Sopenharmony_ci osb->commit_task = kthread_run(ocfs2_commit_thread, osb, 112862306a36Sopenharmony_ci "ocfs2cmt-%s", osb->uuid_str); 112962306a36Sopenharmony_ci if (IS_ERR(osb->commit_task)) { 113062306a36Sopenharmony_ci status = PTR_ERR(osb->commit_task); 113162306a36Sopenharmony_ci osb->commit_task = NULL; 113262306a36Sopenharmony_ci mlog(ML_ERROR, "unable to launch ocfs2commit thread, " 113362306a36Sopenharmony_ci "error=%d", status); 113462306a36Sopenharmony_ci goto done; 113562306a36Sopenharmony_ci } 113662306a36Sopenharmony_ci } else 113762306a36Sopenharmony_ci osb->commit_task = NULL; 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_cidone: 114062306a36Sopenharmony_ci return status; 114162306a36Sopenharmony_ci} 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_ci/* 'full' flag tells us whether we clear out all blocks or if we just 114562306a36Sopenharmony_ci * mark the journal clean */ 114662306a36Sopenharmony_ciint ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) 114762306a36Sopenharmony_ci{ 114862306a36Sopenharmony_ci int status; 114962306a36Sopenharmony_ci 115062306a36Sopenharmony_ci BUG_ON(!journal); 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci status = jbd2_journal_wipe(journal->j_journal, full); 115362306a36Sopenharmony_ci if (status < 0) { 115462306a36Sopenharmony_ci mlog_errno(status); 115562306a36Sopenharmony_ci goto bail; 115662306a36Sopenharmony_ci } 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0); 115962306a36Sopenharmony_ci if (status < 0) 116062306a36Sopenharmony_ci mlog_errno(status); 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_cibail: 116362306a36Sopenharmony_ci return status; 116462306a36Sopenharmony_ci} 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_cistatic int ocfs2_recovery_completed(struct ocfs2_super *osb) 116762306a36Sopenharmony_ci{ 116862306a36Sopenharmony_ci int empty; 116962306a36Sopenharmony_ci struct ocfs2_recovery_map *rm = osb->recovery_map; 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ci spin_lock(&osb->osb_lock); 117262306a36Sopenharmony_ci empty = (rm->rm_used == 0); 117362306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci return empty; 117662306a36Sopenharmony_ci} 117762306a36Sopenharmony_ci 117862306a36Sopenharmony_civoid ocfs2_wait_for_recovery(struct ocfs2_super *osb) 117962306a36Sopenharmony_ci{ 118062306a36Sopenharmony_ci wait_event(osb->recovery_event, ocfs2_recovery_completed(osb)); 118162306a36Sopenharmony_ci} 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci/* 118462306a36Sopenharmony_ci * JBD Might read a cached version of another nodes journal file. We 118562306a36Sopenharmony_ci * don't want this as this file changes often and we get no 118662306a36Sopenharmony_ci * notification on those changes. The only way to be sure that we've 118762306a36Sopenharmony_ci * got the most up to date version of those blocks then is to force 118862306a36Sopenharmony_ci * read them off disk. Just searching through the buffer cache won't 118962306a36Sopenharmony_ci * work as there may be pages backing this file which are still marked 119062306a36Sopenharmony_ci * up to date. We know things can't change on this file underneath us 119162306a36Sopenharmony_ci * as we have the lock by now :) 119262306a36Sopenharmony_ci */ 119362306a36Sopenharmony_cistatic int ocfs2_force_read_journal(struct inode *inode) 119462306a36Sopenharmony_ci{ 119562306a36Sopenharmony_ci int status = 0; 119662306a36Sopenharmony_ci int i; 119762306a36Sopenharmony_ci u64 v_blkno, p_blkno, p_blocks, num_blocks; 119862306a36Sopenharmony_ci struct buffer_head *bh = NULL; 119962306a36Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_ci num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 120262306a36Sopenharmony_ci v_blkno = 0; 120362306a36Sopenharmony_ci while (v_blkno < num_blocks) { 120462306a36Sopenharmony_ci status = ocfs2_extent_map_get_blocks(inode, v_blkno, 120562306a36Sopenharmony_ci &p_blkno, &p_blocks, NULL); 120662306a36Sopenharmony_ci if (status < 0) { 120762306a36Sopenharmony_ci mlog_errno(status); 120862306a36Sopenharmony_ci goto bail; 120962306a36Sopenharmony_ci } 121062306a36Sopenharmony_ci 121162306a36Sopenharmony_ci for (i = 0; i < p_blocks; i++, p_blkno++) { 121262306a36Sopenharmony_ci bh = __find_get_block(osb->sb->s_bdev, p_blkno, 121362306a36Sopenharmony_ci osb->sb->s_blocksize); 121462306a36Sopenharmony_ci /* block not cached. */ 121562306a36Sopenharmony_ci if (!bh) 121662306a36Sopenharmony_ci continue; 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci brelse(bh); 121962306a36Sopenharmony_ci bh = NULL; 122062306a36Sopenharmony_ci /* We are reading journal data which should not 122162306a36Sopenharmony_ci * be put in the uptodate cache. 122262306a36Sopenharmony_ci */ 122362306a36Sopenharmony_ci status = ocfs2_read_blocks_sync(osb, p_blkno, 1, &bh); 122462306a36Sopenharmony_ci if (status < 0) { 122562306a36Sopenharmony_ci mlog_errno(status); 122662306a36Sopenharmony_ci goto bail; 122762306a36Sopenharmony_ci } 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci brelse(bh); 123062306a36Sopenharmony_ci bh = NULL; 123162306a36Sopenharmony_ci } 123262306a36Sopenharmony_ci 123362306a36Sopenharmony_ci v_blkno += p_blocks; 123462306a36Sopenharmony_ci } 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_cibail: 123762306a36Sopenharmony_ci return status; 123862306a36Sopenharmony_ci} 123962306a36Sopenharmony_ci 124062306a36Sopenharmony_cistruct ocfs2_la_recovery_item { 124162306a36Sopenharmony_ci struct list_head lri_list; 124262306a36Sopenharmony_ci int lri_slot; 124362306a36Sopenharmony_ci struct ocfs2_dinode *lri_la_dinode; 124462306a36Sopenharmony_ci struct ocfs2_dinode *lri_tl_dinode; 124562306a36Sopenharmony_ci struct ocfs2_quota_recovery *lri_qrec; 124662306a36Sopenharmony_ci enum ocfs2_orphan_reco_type lri_orphan_reco_type; 124762306a36Sopenharmony_ci}; 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci/* Does the second half of the recovery process. By this point, the 125062306a36Sopenharmony_ci * node is marked clean and can actually be considered recovered, 125162306a36Sopenharmony_ci * hence it's no longer in the recovery map, but there's still some 125262306a36Sopenharmony_ci * cleanup we can do which shouldn't happen within the recovery thread 125362306a36Sopenharmony_ci * as locking in that context becomes very difficult if we are to take 125462306a36Sopenharmony_ci * recovering nodes into account. 125562306a36Sopenharmony_ci * 125662306a36Sopenharmony_ci * NOTE: This function can and will sleep on recovery of other nodes 125762306a36Sopenharmony_ci * during cluster locking, just like any other ocfs2 process. 125862306a36Sopenharmony_ci */ 125962306a36Sopenharmony_civoid ocfs2_complete_recovery(struct work_struct *work) 126062306a36Sopenharmony_ci{ 126162306a36Sopenharmony_ci int ret = 0; 126262306a36Sopenharmony_ci struct ocfs2_journal *journal = 126362306a36Sopenharmony_ci container_of(work, struct ocfs2_journal, j_recovery_work); 126462306a36Sopenharmony_ci struct ocfs2_super *osb = journal->j_osb; 126562306a36Sopenharmony_ci struct ocfs2_dinode *la_dinode, *tl_dinode; 126662306a36Sopenharmony_ci struct ocfs2_la_recovery_item *item, *n; 126762306a36Sopenharmony_ci struct ocfs2_quota_recovery *qrec; 126862306a36Sopenharmony_ci enum ocfs2_orphan_reco_type orphan_reco_type; 126962306a36Sopenharmony_ci LIST_HEAD(tmp_la_list); 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ci trace_ocfs2_complete_recovery( 127262306a36Sopenharmony_ci (unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno); 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_ci spin_lock(&journal->j_lock); 127562306a36Sopenharmony_ci list_splice_init(&journal->j_la_cleanups, &tmp_la_list); 127662306a36Sopenharmony_ci spin_unlock(&journal->j_lock); 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) { 127962306a36Sopenharmony_ci list_del_init(&item->lri_list); 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_ci ocfs2_wait_on_quotas(osb); 128262306a36Sopenharmony_ci 128362306a36Sopenharmony_ci la_dinode = item->lri_la_dinode; 128462306a36Sopenharmony_ci tl_dinode = item->lri_tl_dinode; 128562306a36Sopenharmony_ci qrec = item->lri_qrec; 128662306a36Sopenharmony_ci orphan_reco_type = item->lri_orphan_reco_type; 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci trace_ocfs2_complete_recovery_slot(item->lri_slot, 128962306a36Sopenharmony_ci la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0, 129062306a36Sopenharmony_ci tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0, 129162306a36Sopenharmony_ci qrec); 129262306a36Sopenharmony_ci 129362306a36Sopenharmony_ci if (la_dinode) { 129462306a36Sopenharmony_ci ret = ocfs2_complete_local_alloc_recovery(osb, 129562306a36Sopenharmony_ci la_dinode); 129662306a36Sopenharmony_ci if (ret < 0) 129762306a36Sopenharmony_ci mlog_errno(ret); 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_ci kfree(la_dinode); 130062306a36Sopenharmony_ci } 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_ci if (tl_dinode) { 130362306a36Sopenharmony_ci ret = ocfs2_complete_truncate_log_recovery(osb, 130462306a36Sopenharmony_ci tl_dinode); 130562306a36Sopenharmony_ci if (ret < 0) 130662306a36Sopenharmony_ci mlog_errno(ret); 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_ci kfree(tl_dinode); 130962306a36Sopenharmony_ci } 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_ci ret = ocfs2_recover_orphans(osb, item->lri_slot, 131262306a36Sopenharmony_ci orphan_reco_type); 131362306a36Sopenharmony_ci if (ret < 0) 131462306a36Sopenharmony_ci mlog_errno(ret); 131562306a36Sopenharmony_ci 131662306a36Sopenharmony_ci if (qrec) { 131762306a36Sopenharmony_ci ret = ocfs2_finish_quota_recovery(osb, qrec, 131862306a36Sopenharmony_ci item->lri_slot); 131962306a36Sopenharmony_ci if (ret < 0) 132062306a36Sopenharmony_ci mlog_errno(ret); 132162306a36Sopenharmony_ci /* Recovery info is already freed now */ 132262306a36Sopenharmony_ci } 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ci kfree(item); 132562306a36Sopenharmony_ci } 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_ci trace_ocfs2_complete_recovery_end(ret); 132862306a36Sopenharmony_ci} 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_ci/* NOTE: This function always eats your references to la_dinode and 133162306a36Sopenharmony_ci * tl_dinode, either manually on error, or by passing them to 133262306a36Sopenharmony_ci * ocfs2_complete_recovery */ 133362306a36Sopenharmony_cistatic void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, 133462306a36Sopenharmony_ci int slot_num, 133562306a36Sopenharmony_ci struct ocfs2_dinode *la_dinode, 133662306a36Sopenharmony_ci struct ocfs2_dinode *tl_dinode, 133762306a36Sopenharmony_ci struct ocfs2_quota_recovery *qrec, 133862306a36Sopenharmony_ci enum ocfs2_orphan_reco_type orphan_reco_type) 133962306a36Sopenharmony_ci{ 134062306a36Sopenharmony_ci struct ocfs2_la_recovery_item *item; 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS); 134362306a36Sopenharmony_ci if (!item) { 134462306a36Sopenharmony_ci /* Though we wish to avoid it, we are in fact safe in 134562306a36Sopenharmony_ci * skipping local alloc cleanup as fsck.ocfs2 is more 134662306a36Sopenharmony_ci * than capable of reclaiming unused space. */ 134762306a36Sopenharmony_ci kfree(la_dinode); 134862306a36Sopenharmony_ci kfree(tl_dinode); 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci if (qrec) 135162306a36Sopenharmony_ci ocfs2_free_quota_recovery(qrec); 135262306a36Sopenharmony_ci 135362306a36Sopenharmony_ci mlog_errno(-ENOMEM); 135462306a36Sopenharmony_ci return; 135562306a36Sopenharmony_ci } 135662306a36Sopenharmony_ci 135762306a36Sopenharmony_ci INIT_LIST_HEAD(&item->lri_list); 135862306a36Sopenharmony_ci item->lri_la_dinode = la_dinode; 135962306a36Sopenharmony_ci item->lri_slot = slot_num; 136062306a36Sopenharmony_ci item->lri_tl_dinode = tl_dinode; 136162306a36Sopenharmony_ci item->lri_qrec = qrec; 136262306a36Sopenharmony_ci item->lri_orphan_reco_type = orphan_reco_type; 136362306a36Sopenharmony_ci 136462306a36Sopenharmony_ci spin_lock(&journal->j_lock); 136562306a36Sopenharmony_ci list_add_tail(&item->lri_list, &journal->j_la_cleanups); 136662306a36Sopenharmony_ci queue_work(journal->j_osb->ocfs2_wq, &journal->j_recovery_work); 136762306a36Sopenharmony_ci spin_unlock(&journal->j_lock); 136862306a36Sopenharmony_ci} 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_ci/* Called by the mount code to queue recovery the last part of 137162306a36Sopenharmony_ci * recovery for it's own and offline slot(s). */ 137262306a36Sopenharmony_civoid ocfs2_complete_mount_recovery(struct ocfs2_super *osb) 137362306a36Sopenharmony_ci{ 137462306a36Sopenharmony_ci struct ocfs2_journal *journal = osb->journal; 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci if (ocfs2_is_hard_readonly(osb)) 137762306a36Sopenharmony_ci return; 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_ci /* No need to queue up our truncate_log as regular cleanup will catch 138062306a36Sopenharmony_ci * that */ 138162306a36Sopenharmony_ci ocfs2_queue_recovery_completion(journal, osb->slot_num, 138262306a36Sopenharmony_ci osb->local_alloc_copy, NULL, NULL, 138362306a36Sopenharmony_ci ORPHAN_NEED_TRUNCATE); 138462306a36Sopenharmony_ci ocfs2_schedule_truncate_log_flush(osb, 0); 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_ci osb->local_alloc_copy = NULL; 138762306a36Sopenharmony_ci 138862306a36Sopenharmony_ci /* queue to recover orphan slots for all offline slots */ 138962306a36Sopenharmony_ci ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); 139062306a36Sopenharmony_ci ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE); 139162306a36Sopenharmony_ci ocfs2_free_replay_slots(osb); 139262306a36Sopenharmony_ci} 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_civoid ocfs2_complete_quota_recovery(struct ocfs2_super *osb) 139562306a36Sopenharmony_ci{ 139662306a36Sopenharmony_ci if (osb->quota_rec) { 139762306a36Sopenharmony_ci ocfs2_queue_recovery_completion(osb->journal, 139862306a36Sopenharmony_ci osb->slot_num, 139962306a36Sopenharmony_ci NULL, 140062306a36Sopenharmony_ci NULL, 140162306a36Sopenharmony_ci osb->quota_rec, 140262306a36Sopenharmony_ci ORPHAN_NEED_TRUNCATE); 140362306a36Sopenharmony_ci osb->quota_rec = NULL; 140462306a36Sopenharmony_ci } 140562306a36Sopenharmony_ci} 140662306a36Sopenharmony_ci 140762306a36Sopenharmony_cistatic int __ocfs2_recovery_thread(void *arg) 140862306a36Sopenharmony_ci{ 140962306a36Sopenharmony_ci int status, node_num, slot_num; 141062306a36Sopenharmony_ci struct ocfs2_super *osb = arg; 141162306a36Sopenharmony_ci struct ocfs2_recovery_map *rm = osb->recovery_map; 141262306a36Sopenharmony_ci int *rm_quota = NULL; 141362306a36Sopenharmony_ci int rm_quota_used = 0, i; 141462306a36Sopenharmony_ci struct ocfs2_quota_recovery *qrec; 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_ci /* Whether the quota supported. */ 141762306a36Sopenharmony_ci int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, 141862306a36Sopenharmony_ci OCFS2_FEATURE_RO_COMPAT_USRQUOTA) 141962306a36Sopenharmony_ci || OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, 142062306a36Sopenharmony_ci OCFS2_FEATURE_RO_COMPAT_GRPQUOTA); 142162306a36Sopenharmony_ci 142262306a36Sopenharmony_ci status = ocfs2_wait_on_mount(osb); 142362306a36Sopenharmony_ci if (status < 0) { 142462306a36Sopenharmony_ci goto bail; 142562306a36Sopenharmony_ci } 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_ci if (quota_enabled) { 142862306a36Sopenharmony_ci rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS); 142962306a36Sopenharmony_ci if (!rm_quota) { 143062306a36Sopenharmony_ci status = -ENOMEM; 143162306a36Sopenharmony_ci goto bail; 143262306a36Sopenharmony_ci } 143362306a36Sopenharmony_ci } 143462306a36Sopenharmony_cirestart: 143562306a36Sopenharmony_ci status = ocfs2_super_lock(osb, 1); 143662306a36Sopenharmony_ci if (status < 0) { 143762306a36Sopenharmony_ci mlog_errno(status); 143862306a36Sopenharmony_ci goto bail; 143962306a36Sopenharmony_ci } 144062306a36Sopenharmony_ci 144162306a36Sopenharmony_ci status = ocfs2_compute_replay_slots(osb); 144262306a36Sopenharmony_ci if (status < 0) 144362306a36Sopenharmony_ci mlog_errno(status); 144462306a36Sopenharmony_ci 144562306a36Sopenharmony_ci /* queue recovery for our own slot */ 144662306a36Sopenharmony_ci ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, 144762306a36Sopenharmony_ci NULL, NULL, ORPHAN_NO_NEED_TRUNCATE); 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci spin_lock(&osb->osb_lock); 145062306a36Sopenharmony_ci while (rm->rm_used) { 145162306a36Sopenharmony_ci /* It's always safe to remove entry zero, as we won't 145262306a36Sopenharmony_ci * clear it until ocfs2_recover_node() has succeeded. */ 145362306a36Sopenharmony_ci node_num = rm->rm_entries[0]; 145462306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 145562306a36Sopenharmony_ci slot_num = ocfs2_node_num_to_slot(osb, node_num); 145662306a36Sopenharmony_ci trace_ocfs2_recovery_thread_node(node_num, slot_num); 145762306a36Sopenharmony_ci if (slot_num == -ENOENT) { 145862306a36Sopenharmony_ci status = 0; 145962306a36Sopenharmony_ci goto skip_recovery; 146062306a36Sopenharmony_ci } 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_ci /* It is a bit subtle with quota recovery. We cannot do it 146362306a36Sopenharmony_ci * immediately because we have to obtain cluster locks from 146462306a36Sopenharmony_ci * quota files and we also don't want to just skip it because 146562306a36Sopenharmony_ci * then quota usage would be out of sync until some node takes 146662306a36Sopenharmony_ci * the slot. So we remember which nodes need quota recovery 146762306a36Sopenharmony_ci * and when everything else is done, we recover quotas. */ 146862306a36Sopenharmony_ci if (quota_enabled) { 146962306a36Sopenharmony_ci for (i = 0; i < rm_quota_used 147062306a36Sopenharmony_ci && rm_quota[i] != slot_num; i++) 147162306a36Sopenharmony_ci ; 147262306a36Sopenharmony_ci 147362306a36Sopenharmony_ci if (i == rm_quota_used) 147462306a36Sopenharmony_ci rm_quota[rm_quota_used++] = slot_num; 147562306a36Sopenharmony_ci } 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_ci status = ocfs2_recover_node(osb, node_num, slot_num); 147862306a36Sopenharmony_ciskip_recovery: 147962306a36Sopenharmony_ci if (!status) { 148062306a36Sopenharmony_ci ocfs2_recovery_map_clear(osb, node_num); 148162306a36Sopenharmony_ci } else { 148262306a36Sopenharmony_ci mlog(ML_ERROR, 148362306a36Sopenharmony_ci "Error %d recovering node %d on device (%u,%u)!\n", 148462306a36Sopenharmony_ci status, node_num, 148562306a36Sopenharmony_ci MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 148662306a36Sopenharmony_ci mlog(ML_ERROR, "Volume requires unmount.\n"); 148762306a36Sopenharmony_ci } 148862306a36Sopenharmony_ci 148962306a36Sopenharmony_ci spin_lock(&osb->osb_lock); 149062306a36Sopenharmony_ci } 149162306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 149262306a36Sopenharmony_ci trace_ocfs2_recovery_thread_end(status); 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci /* Refresh all journal recovery generations from disk */ 149562306a36Sopenharmony_ci status = ocfs2_check_journals_nolocks(osb); 149662306a36Sopenharmony_ci status = (status == -EROFS) ? 0 : status; 149762306a36Sopenharmony_ci if (status < 0) 149862306a36Sopenharmony_ci mlog_errno(status); 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci /* Now it is right time to recover quotas... We have to do this under 150162306a36Sopenharmony_ci * superblock lock so that no one can start using the slot (and crash) 150262306a36Sopenharmony_ci * before we recover it */ 150362306a36Sopenharmony_ci if (quota_enabled) { 150462306a36Sopenharmony_ci for (i = 0; i < rm_quota_used; i++) { 150562306a36Sopenharmony_ci qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); 150662306a36Sopenharmony_ci if (IS_ERR(qrec)) { 150762306a36Sopenharmony_ci status = PTR_ERR(qrec); 150862306a36Sopenharmony_ci mlog_errno(status); 150962306a36Sopenharmony_ci continue; 151062306a36Sopenharmony_ci } 151162306a36Sopenharmony_ci ocfs2_queue_recovery_completion(osb->journal, 151262306a36Sopenharmony_ci rm_quota[i], 151362306a36Sopenharmony_ci NULL, NULL, qrec, 151462306a36Sopenharmony_ci ORPHAN_NEED_TRUNCATE); 151562306a36Sopenharmony_ci } 151662306a36Sopenharmony_ci } 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_ci ocfs2_super_unlock(osb, 1); 151962306a36Sopenharmony_ci 152062306a36Sopenharmony_ci /* queue recovery for offline slots */ 152162306a36Sopenharmony_ci ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE); 152262306a36Sopenharmony_ci 152362306a36Sopenharmony_cibail: 152462306a36Sopenharmony_ci mutex_lock(&osb->recovery_lock); 152562306a36Sopenharmony_ci if (!status && !ocfs2_recovery_completed(osb)) { 152662306a36Sopenharmony_ci mutex_unlock(&osb->recovery_lock); 152762306a36Sopenharmony_ci goto restart; 152862306a36Sopenharmony_ci } 152962306a36Sopenharmony_ci 153062306a36Sopenharmony_ci ocfs2_free_replay_slots(osb); 153162306a36Sopenharmony_ci osb->recovery_thread_task = NULL; 153262306a36Sopenharmony_ci mb(); /* sync with ocfs2_recovery_thread_running */ 153362306a36Sopenharmony_ci wake_up(&osb->recovery_event); 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_ci mutex_unlock(&osb->recovery_lock); 153662306a36Sopenharmony_ci 153762306a36Sopenharmony_ci if (quota_enabled) 153862306a36Sopenharmony_ci kfree(rm_quota); 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_ci return status; 154162306a36Sopenharmony_ci} 154262306a36Sopenharmony_ci 154362306a36Sopenharmony_civoid ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) 154462306a36Sopenharmony_ci{ 154562306a36Sopenharmony_ci mutex_lock(&osb->recovery_lock); 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_ci trace_ocfs2_recovery_thread(node_num, osb->node_num, 154862306a36Sopenharmony_ci osb->disable_recovery, osb->recovery_thread_task, 154962306a36Sopenharmony_ci osb->disable_recovery ? 155062306a36Sopenharmony_ci -1 : ocfs2_recovery_map_set(osb, node_num)); 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci if (osb->disable_recovery) 155362306a36Sopenharmony_ci goto out; 155462306a36Sopenharmony_ci 155562306a36Sopenharmony_ci if (osb->recovery_thread_task) 155662306a36Sopenharmony_ci goto out; 155762306a36Sopenharmony_ci 155862306a36Sopenharmony_ci osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb, 155962306a36Sopenharmony_ci "ocfs2rec-%s", osb->uuid_str); 156062306a36Sopenharmony_ci if (IS_ERR(osb->recovery_thread_task)) { 156162306a36Sopenharmony_ci mlog_errno((int)PTR_ERR(osb->recovery_thread_task)); 156262306a36Sopenharmony_ci osb->recovery_thread_task = NULL; 156362306a36Sopenharmony_ci } 156462306a36Sopenharmony_ci 156562306a36Sopenharmony_ciout: 156662306a36Sopenharmony_ci mutex_unlock(&osb->recovery_lock); 156762306a36Sopenharmony_ci wake_up(&osb->recovery_event); 156862306a36Sopenharmony_ci} 156962306a36Sopenharmony_ci 157062306a36Sopenharmony_cistatic int ocfs2_read_journal_inode(struct ocfs2_super *osb, 157162306a36Sopenharmony_ci int slot_num, 157262306a36Sopenharmony_ci struct buffer_head **bh, 157362306a36Sopenharmony_ci struct inode **ret_inode) 157462306a36Sopenharmony_ci{ 157562306a36Sopenharmony_ci int status = -EACCES; 157662306a36Sopenharmony_ci struct inode *inode = NULL; 157762306a36Sopenharmony_ci 157862306a36Sopenharmony_ci BUG_ON(slot_num >= osb->max_slots); 157962306a36Sopenharmony_ci 158062306a36Sopenharmony_ci inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, 158162306a36Sopenharmony_ci slot_num); 158262306a36Sopenharmony_ci if (!inode || is_bad_inode(inode)) { 158362306a36Sopenharmony_ci mlog_errno(status); 158462306a36Sopenharmony_ci goto bail; 158562306a36Sopenharmony_ci } 158662306a36Sopenharmony_ci SET_INODE_JOURNAL(inode); 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE); 158962306a36Sopenharmony_ci if (status < 0) { 159062306a36Sopenharmony_ci mlog_errno(status); 159162306a36Sopenharmony_ci goto bail; 159262306a36Sopenharmony_ci } 159362306a36Sopenharmony_ci 159462306a36Sopenharmony_ci status = 0; 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_cibail: 159762306a36Sopenharmony_ci if (inode) { 159862306a36Sopenharmony_ci if (status || !ret_inode) 159962306a36Sopenharmony_ci iput(inode); 160062306a36Sopenharmony_ci else 160162306a36Sopenharmony_ci *ret_inode = inode; 160262306a36Sopenharmony_ci } 160362306a36Sopenharmony_ci return status; 160462306a36Sopenharmony_ci} 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_ci/* Does the actual journal replay and marks the journal inode as 160762306a36Sopenharmony_ci * clean. Will only replay if the journal inode is marked dirty. */ 160862306a36Sopenharmony_cistatic int ocfs2_replay_journal(struct ocfs2_super *osb, 160962306a36Sopenharmony_ci int node_num, 161062306a36Sopenharmony_ci int slot_num) 161162306a36Sopenharmony_ci{ 161262306a36Sopenharmony_ci int status; 161362306a36Sopenharmony_ci int got_lock = 0; 161462306a36Sopenharmony_ci unsigned int flags; 161562306a36Sopenharmony_ci struct inode *inode = NULL; 161662306a36Sopenharmony_ci struct ocfs2_dinode *fe; 161762306a36Sopenharmony_ci journal_t *journal = NULL; 161862306a36Sopenharmony_ci struct buffer_head *bh = NULL; 161962306a36Sopenharmony_ci u32 slot_reco_gen; 162062306a36Sopenharmony_ci 162162306a36Sopenharmony_ci status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode); 162262306a36Sopenharmony_ci if (status) { 162362306a36Sopenharmony_ci mlog_errno(status); 162462306a36Sopenharmony_ci goto done; 162562306a36Sopenharmony_ci } 162662306a36Sopenharmony_ci 162762306a36Sopenharmony_ci fe = (struct ocfs2_dinode *)bh->b_data; 162862306a36Sopenharmony_ci slot_reco_gen = ocfs2_get_recovery_generation(fe); 162962306a36Sopenharmony_ci brelse(bh); 163062306a36Sopenharmony_ci bh = NULL; 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_ci /* 163362306a36Sopenharmony_ci * As the fs recovery is asynchronous, there is a small chance that 163462306a36Sopenharmony_ci * another node mounted (and recovered) the slot before the recovery 163562306a36Sopenharmony_ci * thread could get the lock. To handle that, we dirty read the journal 163662306a36Sopenharmony_ci * inode for that slot to get the recovery generation. If it is 163762306a36Sopenharmony_ci * different than what we expected, the slot has been recovered. 163862306a36Sopenharmony_ci * If not, it needs recovery. 163962306a36Sopenharmony_ci */ 164062306a36Sopenharmony_ci if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) { 164162306a36Sopenharmony_ci trace_ocfs2_replay_journal_recovered(slot_num, 164262306a36Sopenharmony_ci osb->slot_recovery_generations[slot_num], slot_reco_gen); 164362306a36Sopenharmony_ci osb->slot_recovery_generations[slot_num] = slot_reco_gen; 164462306a36Sopenharmony_ci status = -EBUSY; 164562306a36Sopenharmony_ci goto done; 164662306a36Sopenharmony_ci } 164762306a36Sopenharmony_ci 164862306a36Sopenharmony_ci /* Continue with recovery as the journal has not yet been recovered */ 164962306a36Sopenharmony_ci 165062306a36Sopenharmony_ci status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); 165162306a36Sopenharmony_ci if (status < 0) { 165262306a36Sopenharmony_ci trace_ocfs2_replay_journal_lock_err(status); 165362306a36Sopenharmony_ci if (status != -ERESTARTSYS) 165462306a36Sopenharmony_ci mlog(ML_ERROR, "Could not lock journal!\n"); 165562306a36Sopenharmony_ci goto done; 165662306a36Sopenharmony_ci } 165762306a36Sopenharmony_ci got_lock = 1; 165862306a36Sopenharmony_ci 165962306a36Sopenharmony_ci fe = (struct ocfs2_dinode *) bh->b_data; 166062306a36Sopenharmony_ci 166162306a36Sopenharmony_ci flags = le32_to_cpu(fe->id1.journal1.ij_flags); 166262306a36Sopenharmony_ci slot_reco_gen = ocfs2_get_recovery_generation(fe); 166362306a36Sopenharmony_ci 166462306a36Sopenharmony_ci if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { 166562306a36Sopenharmony_ci trace_ocfs2_replay_journal_skip(node_num); 166662306a36Sopenharmony_ci /* Refresh recovery generation for the slot */ 166762306a36Sopenharmony_ci osb->slot_recovery_generations[slot_num] = slot_reco_gen; 166862306a36Sopenharmony_ci goto done; 166962306a36Sopenharmony_ci } 167062306a36Sopenharmony_ci 167162306a36Sopenharmony_ci /* we need to run complete recovery for offline orphan slots */ 167262306a36Sopenharmony_ci ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); 167362306a36Sopenharmony_ci 167462306a36Sopenharmony_ci printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\ 167562306a36Sopenharmony_ci "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev), 167662306a36Sopenharmony_ci MINOR(osb->sb->s_dev)); 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_ci OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci status = ocfs2_force_read_journal(inode); 168162306a36Sopenharmony_ci if (status < 0) { 168262306a36Sopenharmony_ci mlog_errno(status); 168362306a36Sopenharmony_ci goto done; 168462306a36Sopenharmony_ci } 168562306a36Sopenharmony_ci 168662306a36Sopenharmony_ci journal = jbd2_journal_init_inode(inode); 168762306a36Sopenharmony_ci if (IS_ERR(journal)) { 168862306a36Sopenharmony_ci mlog(ML_ERROR, "Linux journal layer error\n"); 168962306a36Sopenharmony_ci status = PTR_ERR(journal); 169062306a36Sopenharmony_ci goto done; 169162306a36Sopenharmony_ci } 169262306a36Sopenharmony_ci 169362306a36Sopenharmony_ci status = jbd2_journal_load(journal); 169462306a36Sopenharmony_ci if (status < 0) { 169562306a36Sopenharmony_ci mlog_errno(status); 169662306a36Sopenharmony_ci BUG_ON(!igrab(inode)); 169762306a36Sopenharmony_ci jbd2_journal_destroy(journal); 169862306a36Sopenharmony_ci goto done; 169962306a36Sopenharmony_ci } 170062306a36Sopenharmony_ci 170162306a36Sopenharmony_ci ocfs2_clear_journal_error(osb->sb, journal, slot_num); 170262306a36Sopenharmony_ci 170362306a36Sopenharmony_ci /* wipe the journal */ 170462306a36Sopenharmony_ci jbd2_journal_lock_updates(journal); 170562306a36Sopenharmony_ci status = jbd2_journal_flush(journal, 0); 170662306a36Sopenharmony_ci jbd2_journal_unlock_updates(journal); 170762306a36Sopenharmony_ci if (status < 0) 170862306a36Sopenharmony_ci mlog_errno(status); 170962306a36Sopenharmony_ci 171062306a36Sopenharmony_ci /* This will mark the node clean */ 171162306a36Sopenharmony_ci flags = le32_to_cpu(fe->id1.journal1.ij_flags); 171262306a36Sopenharmony_ci flags &= ~OCFS2_JOURNAL_DIRTY_FL; 171362306a36Sopenharmony_ci fe->id1.journal1.ij_flags = cpu_to_le32(flags); 171462306a36Sopenharmony_ci 171562306a36Sopenharmony_ci /* Increment recovery generation to indicate successful recovery */ 171662306a36Sopenharmony_ci ocfs2_bump_recovery_generation(fe); 171762306a36Sopenharmony_ci osb->slot_recovery_generations[slot_num] = 171862306a36Sopenharmony_ci ocfs2_get_recovery_generation(fe); 171962306a36Sopenharmony_ci 172062306a36Sopenharmony_ci ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); 172162306a36Sopenharmony_ci status = ocfs2_write_block(osb, bh, INODE_CACHE(inode)); 172262306a36Sopenharmony_ci if (status < 0) 172362306a36Sopenharmony_ci mlog_errno(status); 172462306a36Sopenharmony_ci 172562306a36Sopenharmony_ci BUG_ON(!igrab(inode)); 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_ci jbd2_journal_destroy(journal); 172862306a36Sopenharmony_ci 172962306a36Sopenharmony_ci printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\ 173062306a36Sopenharmony_ci "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev), 173162306a36Sopenharmony_ci MINOR(osb->sb->s_dev)); 173262306a36Sopenharmony_cidone: 173362306a36Sopenharmony_ci /* drop the lock on this nodes journal */ 173462306a36Sopenharmony_ci if (got_lock) 173562306a36Sopenharmony_ci ocfs2_inode_unlock(inode, 1); 173662306a36Sopenharmony_ci 173762306a36Sopenharmony_ci iput(inode); 173862306a36Sopenharmony_ci brelse(bh); 173962306a36Sopenharmony_ci 174062306a36Sopenharmony_ci return status; 174162306a36Sopenharmony_ci} 174262306a36Sopenharmony_ci 174362306a36Sopenharmony_ci/* 174462306a36Sopenharmony_ci * Do the most important parts of node recovery: 174562306a36Sopenharmony_ci * - Replay it's journal 174662306a36Sopenharmony_ci * - Stamp a clean local allocator file 174762306a36Sopenharmony_ci * - Stamp a clean truncate log 174862306a36Sopenharmony_ci * - Mark the node clean 174962306a36Sopenharmony_ci * 175062306a36Sopenharmony_ci * If this function completes without error, a node in OCFS2 can be 175162306a36Sopenharmony_ci * said to have been safely recovered. As a result, failure during the 175262306a36Sopenharmony_ci * second part of a nodes recovery process (local alloc recovery) is 175362306a36Sopenharmony_ci * far less concerning. 175462306a36Sopenharmony_ci */ 175562306a36Sopenharmony_cistatic int ocfs2_recover_node(struct ocfs2_super *osb, 175662306a36Sopenharmony_ci int node_num, int slot_num) 175762306a36Sopenharmony_ci{ 175862306a36Sopenharmony_ci int status = 0; 175962306a36Sopenharmony_ci struct ocfs2_dinode *la_copy = NULL; 176062306a36Sopenharmony_ci struct ocfs2_dinode *tl_copy = NULL; 176162306a36Sopenharmony_ci 176262306a36Sopenharmony_ci trace_ocfs2_recover_node(node_num, slot_num, osb->node_num); 176362306a36Sopenharmony_ci 176462306a36Sopenharmony_ci /* Should not ever be called to recover ourselves -- in that 176562306a36Sopenharmony_ci * case we should've called ocfs2_journal_load instead. */ 176662306a36Sopenharmony_ci BUG_ON(osb->node_num == node_num); 176762306a36Sopenharmony_ci 176862306a36Sopenharmony_ci status = ocfs2_replay_journal(osb, node_num, slot_num); 176962306a36Sopenharmony_ci if (status < 0) { 177062306a36Sopenharmony_ci if (status == -EBUSY) { 177162306a36Sopenharmony_ci trace_ocfs2_recover_node_skip(slot_num, node_num); 177262306a36Sopenharmony_ci status = 0; 177362306a36Sopenharmony_ci goto done; 177462306a36Sopenharmony_ci } 177562306a36Sopenharmony_ci mlog_errno(status); 177662306a36Sopenharmony_ci goto done; 177762306a36Sopenharmony_ci } 177862306a36Sopenharmony_ci 177962306a36Sopenharmony_ci /* Stamp a clean local alloc file AFTER recovering the journal... */ 178062306a36Sopenharmony_ci status = ocfs2_begin_local_alloc_recovery(osb, slot_num, &la_copy); 178162306a36Sopenharmony_ci if (status < 0) { 178262306a36Sopenharmony_ci mlog_errno(status); 178362306a36Sopenharmony_ci goto done; 178462306a36Sopenharmony_ci } 178562306a36Sopenharmony_ci 178662306a36Sopenharmony_ci /* An error from begin_truncate_log_recovery is not 178762306a36Sopenharmony_ci * serious enough to warrant halting the rest of 178862306a36Sopenharmony_ci * recovery. */ 178962306a36Sopenharmony_ci status = ocfs2_begin_truncate_log_recovery(osb, slot_num, &tl_copy); 179062306a36Sopenharmony_ci if (status < 0) 179162306a36Sopenharmony_ci mlog_errno(status); 179262306a36Sopenharmony_ci 179362306a36Sopenharmony_ci /* Likewise, this would be a strange but ultimately not so 179462306a36Sopenharmony_ci * harmful place to get an error... */ 179562306a36Sopenharmony_ci status = ocfs2_clear_slot(osb, slot_num); 179662306a36Sopenharmony_ci if (status < 0) 179762306a36Sopenharmony_ci mlog_errno(status); 179862306a36Sopenharmony_ci 179962306a36Sopenharmony_ci /* This will kfree the memory pointed to by la_copy and tl_copy */ 180062306a36Sopenharmony_ci ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, 180162306a36Sopenharmony_ci tl_copy, NULL, ORPHAN_NEED_TRUNCATE); 180262306a36Sopenharmony_ci 180362306a36Sopenharmony_ci status = 0; 180462306a36Sopenharmony_cidone: 180562306a36Sopenharmony_ci 180662306a36Sopenharmony_ci return status; 180762306a36Sopenharmony_ci} 180862306a36Sopenharmony_ci 180962306a36Sopenharmony_ci/* Test node liveness by trylocking his journal. If we get the lock, 181062306a36Sopenharmony_ci * we drop it here. Return 0 if we got the lock, -EAGAIN if node is 181162306a36Sopenharmony_ci * still alive (we couldn't get the lock) and < 0 on error. */ 181262306a36Sopenharmony_cistatic int ocfs2_trylock_journal(struct ocfs2_super *osb, 181362306a36Sopenharmony_ci int slot_num) 181462306a36Sopenharmony_ci{ 181562306a36Sopenharmony_ci int status, flags; 181662306a36Sopenharmony_ci struct inode *inode = NULL; 181762306a36Sopenharmony_ci 181862306a36Sopenharmony_ci inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, 181962306a36Sopenharmony_ci slot_num); 182062306a36Sopenharmony_ci if (inode == NULL) { 182162306a36Sopenharmony_ci mlog(ML_ERROR, "access error\n"); 182262306a36Sopenharmony_ci status = -EACCES; 182362306a36Sopenharmony_ci goto bail; 182462306a36Sopenharmony_ci } 182562306a36Sopenharmony_ci if (is_bad_inode(inode)) { 182662306a36Sopenharmony_ci mlog(ML_ERROR, "access error (bad inode)\n"); 182762306a36Sopenharmony_ci iput(inode); 182862306a36Sopenharmony_ci inode = NULL; 182962306a36Sopenharmony_ci status = -EACCES; 183062306a36Sopenharmony_ci goto bail; 183162306a36Sopenharmony_ci } 183262306a36Sopenharmony_ci SET_INODE_JOURNAL(inode); 183362306a36Sopenharmony_ci 183462306a36Sopenharmony_ci flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE; 183562306a36Sopenharmony_ci status = ocfs2_inode_lock_full(inode, NULL, 1, flags); 183662306a36Sopenharmony_ci if (status < 0) { 183762306a36Sopenharmony_ci if (status != -EAGAIN) 183862306a36Sopenharmony_ci mlog_errno(status); 183962306a36Sopenharmony_ci goto bail; 184062306a36Sopenharmony_ci } 184162306a36Sopenharmony_ci 184262306a36Sopenharmony_ci ocfs2_inode_unlock(inode, 1); 184362306a36Sopenharmony_cibail: 184462306a36Sopenharmony_ci iput(inode); 184562306a36Sopenharmony_ci 184662306a36Sopenharmony_ci return status; 184762306a36Sopenharmony_ci} 184862306a36Sopenharmony_ci 184962306a36Sopenharmony_ci/* Call this underneath ocfs2_super_lock. It also assumes that the 185062306a36Sopenharmony_ci * slot info struct has been updated from disk. */ 185162306a36Sopenharmony_ciint ocfs2_mark_dead_nodes(struct ocfs2_super *osb) 185262306a36Sopenharmony_ci{ 185362306a36Sopenharmony_ci unsigned int node_num; 185462306a36Sopenharmony_ci int status, i; 185562306a36Sopenharmony_ci u32 gen; 185662306a36Sopenharmony_ci struct buffer_head *bh = NULL; 185762306a36Sopenharmony_ci struct ocfs2_dinode *di; 185862306a36Sopenharmony_ci 185962306a36Sopenharmony_ci /* This is called with the super block cluster lock, so we 186062306a36Sopenharmony_ci * know that the slot map can't change underneath us. */ 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_ci for (i = 0; i < osb->max_slots; i++) { 186362306a36Sopenharmony_ci /* Read journal inode to get the recovery generation */ 186462306a36Sopenharmony_ci status = ocfs2_read_journal_inode(osb, i, &bh, NULL); 186562306a36Sopenharmony_ci if (status) { 186662306a36Sopenharmony_ci mlog_errno(status); 186762306a36Sopenharmony_ci goto bail; 186862306a36Sopenharmony_ci } 186962306a36Sopenharmony_ci di = (struct ocfs2_dinode *)bh->b_data; 187062306a36Sopenharmony_ci gen = ocfs2_get_recovery_generation(di); 187162306a36Sopenharmony_ci brelse(bh); 187262306a36Sopenharmony_ci bh = NULL; 187362306a36Sopenharmony_ci 187462306a36Sopenharmony_ci spin_lock(&osb->osb_lock); 187562306a36Sopenharmony_ci osb->slot_recovery_generations[i] = gen; 187662306a36Sopenharmony_ci 187762306a36Sopenharmony_ci trace_ocfs2_mark_dead_nodes(i, 187862306a36Sopenharmony_ci osb->slot_recovery_generations[i]); 187962306a36Sopenharmony_ci 188062306a36Sopenharmony_ci if (i == osb->slot_num) { 188162306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 188262306a36Sopenharmony_ci continue; 188362306a36Sopenharmony_ci } 188462306a36Sopenharmony_ci 188562306a36Sopenharmony_ci status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); 188662306a36Sopenharmony_ci if (status == -ENOENT) { 188762306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 188862306a36Sopenharmony_ci continue; 188962306a36Sopenharmony_ci } 189062306a36Sopenharmony_ci 189162306a36Sopenharmony_ci if (__ocfs2_recovery_map_test(osb, node_num)) { 189262306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 189362306a36Sopenharmony_ci continue; 189462306a36Sopenharmony_ci } 189562306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 189662306a36Sopenharmony_ci 189762306a36Sopenharmony_ci /* Ok, we have a slot occupied by another node which 189862306a36Sopenharmony_ci * is not in the recovery map. We trylock his journal 189962306a36Sopenharmony_ci * file here to test if he's alive. */ 190062306a36Sopenharmony_ci status = ocfs2_trylock_journal(osb, i); 190162306a36Sopenharmony_ci if (!status) { 190262306a36Sopenharmony_ci /* Since we're called from mount, we know that 190362306a36Sopenharmony_ci * the recovery thread can't race us on 190462306a36Sopenharmony_ci * setting / checking the recovery bits. */ 190562306a36Sopenharmony_ci ocfs2_recovery_thread(osb, node_num); 190662306a36Sopenharmony_ci } else if ((status < 0) && (status != -EAGAIN)) { 190762306a36Sopenharmony_ci mlog_errno(status); 190862306a36Sopenharmony_ci goto bail; 190962306a36Sopenharmony_ci } 191062306a36Sopenharmony_ci } 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci status = 0; 191362306a36Sopenharmony_cibail: 191462306a36Sopenharmony_ci return status; 191562306a36Sopenharmony_ci} 191662306a36Sopenharmony_ci 191762306a36Sopenharmony_ci/* 191862306a36Sopenharmony_ci * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some 191962306a36Sopenharmony_ci * randomness to the timeout to minimize multple nodes firing the timer at the 192062306a36Sopenharmony_ci * same time. 192162306a36Sopenharmony_ci */ 192262306a36Sopenharmony_cistatic inline unsigned long ocfs2_orphan_scan_timeout(void) 192362306a36Sopenharmony_ci{ 192462306a36Sopenharmony_ci unsigned long time; 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_ci get_random_bytes(&time, sizeof(time)); 192762306a36Sopenharmony_ci time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000); 192862306a36Sopenharmony_ci return msecs_to_jiffies(time); 192962306a36Sopenharmony_ci} 193062306a36Sopenharmony_ci 193162306a36Sopenharmony_ci/* 193262306a36Sopenharmony_ci * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for 193362306a36Sopenharmony_ci * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This 193462306a36Sopenharmony_ci * is done to catch any orphans that are left over in orphan directories. 193562306a36Sopenharmony_ci * 193662306a36Sopenharmony_ci * It scans all slots, even ones that are in use. It does so to handle the 193762306a36Sopenharmony_ci * case described below: 193862306a36Sopenharmony_ci * 193962306a36Sopenharmony_ci * Node 1 has an inode it was using. The dentry went away due to memory 194062306a36Sopenharmony_ci * pressure. Node 1 closes the inode, but it's on the free list. The node 194162306a36Sopenharmony_ci * has the open lock. 194262306a36Sopenharmony_ci * Node 2 unlinks the inode. It grabs the dentry lock to notify others, 194362306a36Sopenharmony_ci * but node 1 has no dentry and doesn't get the message. It trylocks the 194462306a36Sopenharmony_ci * open lock, sees that another node has a PR, and does nothing. 194562306a36Sopenharmony_ci * Later node 2 runs its orphan dir. It igets the inode, trylocks the 194662306a36Sopenharmony_ci * open lock, sees the PR still, and does nothing. 194762306a36Sopenharmony_ci * Basically, we have to trigger an orphan iput on node 1. The only way 194862306a36Sopenharmony_ci * for this to happen is if node 1 runs node 2's orphan dir. 194962306a36Sopenharmony_ci * 195062306a36Sopenharmony_ci * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT 195162306a36Sopenharmony_ci * seconds. It gets an EX lock on os_lockres and checks sequence number 195262306a36Sopenharmony_ci * stored in LVB. If the sequence number has changed, it means some other 195362306a36Sopenharmony_ci * node has done the scan. This node skips the scan and tracks the 195462306a36Sopenharmony_ci * sequence number. If the sequence number didn't change, it means a scan 195562306a36Sopenharmony_ci * hasn't happened. The node queues a scan and increments the 195662306a36Sopenharmony_ci * sequence number in the LVB. 195762306a36Sopenharmony_ci */ 195862306a36Sopenharmony_cistatic void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) 195962306a36Sopenharmony_ci{ 196062306a36Sopenharmony_ci struct ocfs2_orphan_scan *os; 196162306a36Sopenharmony_ci int status, i; 196262306a36Sopenharmony_ci u32 seqno = 0; 196362306a36Sopenharmony_ci 196462306a36Sopenharmony_ci os = &osb->osb_orphan_scan; 196562306a36Sopenharmony_ci 196662306a36Sopenharmony_ci if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) 196762306a36Sopenharmony_ci goto out; 196862306a36Sopenharmony_ci 196962306a36Sopenharmony_ci trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno, 197062306a36Sopenharmony_ci atomic_read(&os->os_state)); 197162306a36Sopenharmony_ci 197262306a36Sopenharmony_ci status = ocfs2_orphan_scan_lock(osb, &seqno); 197362306a36Sopenharmony_ci if (status < 0) { 197462306a36Sopenharmony_ci if (status != -EAGAIN) 197562306a36Sopenharmony_ci mlog_errno(status); 197662306a36Sopenharmony_ci goto out; 197762306a36Sopenharmony_ci } 197862306a36Sopenharmony_ci 197962306a36Sopenharmony_ci /* Do no queue the tasks if the volume is being umounted */ 198062306a36Sopenharmony_ci if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) 198162306a36Sopenharmony_ci goto unlock; 198262306a36Sopenharmony_ci 198362306a36Sopenharmony_ci if (os->os_seqno != seqno) { 198462306a36Sopenharmony_ci os->os_seqno = seqno; 198562306a36Sopenharmony_ci goto unlock; 198662306a36Sopenharmony_ci } 198762306a36Sopenharmony_ci 198862306a36Sopenharmony_ci for (i = 0; i < osb->max_slots; i++) 198962306a36Sopenharmony_ci ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL, 199062306a36Sopenharmony_ci NULL, ORPHAN_NO_NEED_TRUNCATE); 199162306a36Sopenharmony_ci /* 199262306a36Sopenharmony_ci * We queued a recovery on orphan slots, increment the sequence 199362306a36Sopenharmony_ci * number and update LVB so other node will skip the scan for a while 199462306a36Sopenharmony_ci */ 199562306a36Sopenharmony_ci seqno++; 199662306a36Sopenharmony_ci os->os_count++; 199762306a36Sopenharmony_ci os->os_scantime = ktime_get_seconds(); 199862306a36Sopenharmony_ciunlock: 199962306a36Sopenharmony_ci ocfs2_orphan_scan_unlock(osb, seqno); 200062306a36Sopenharmony_ciout: 200162306a36Sopenharmony_ci trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno, 200262306a36Sopenharmony_ci atomic_read(&os->os_state)); 200362306a36Sopenharmony_ci return; 200462306a36Sopenharmony_ci} 200562306a36Sopenharmony_ci 200662306a36Sopenharmony_ci/* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */ 200762306a36Sopenharmony_cistatic void ocfs2_orphan_scan_work(struct work_struct *work) 200862306a36Sopenharmony_ci{ 200962306a36Sopenharmony_ci struct ocfs2_orphan_scan *os; 201062306a36Sopenharmony_ci struct ocfs2_super *osb; 201162306a36Sopenharmony_ci 201262306a36Sopenharmony_ci os = container_of(work, struct ocfs2_orphan_scan, 201362306a36Sopenharmony_ci os_orphan_scan_work.work); 201462306a36Sopenharmony_ci osb = os->os_osb; 201562306a36Sopenharmony_ci 201662306a36Sopenharmony_ci mutex_lock(&os->os_lock); 201762306a36Sopenharmony_ci ocfs2_queue_orphan_scan(osb); 201862306a36Sopenharmony_ci if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) 201962306a36Sopenharmony_ci queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work, 202062306a36Sopenharmony_ci ocfs2_orphan_scan_timeout()); 202162306a36Sopenharmony_ci mutex_unlock(&os->os_lock); 202262306a36Sopenharmony_ci} 202362306a36Sopenharmony_ci 202462306a36Sopenharmony_civoid ocfs2_orphan_scan_stop(struct ocfs2_super *osb) 202562306a36Sopenharmony_ci{ 202662306a36Sopenharmony_ci struct ocfs2_orphan_scan *os; 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_ci os = &osb->osb_orphan_scan; 202962306a36Sopenharmony_ci if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) { 203062306a36Sopenharmony_ci atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 203162306a36Sopenharmony_ci mutex_lock(&os->os_lock); 203262306a36Sopenharmony_ci cancel_delayed_work(&os->os_orphan_scan_work); 203362306a36Sopenharmony_ci mutex_unlock(&os->os_lock); 203462306a36Sopenharmony_ci } 203562306a36Sopenharmony_ci} 203662306a36Sopenharmony_ci 203762306a36Sopenharmony_civoid ocfs2_orphan_scan_init(struct ocfs2_super *osb) 203862306a36Sopenharmony_ci{ 203962306a36Sopenharmony_ci struct ocfs2_orphan_scan *os; 204062306a36Sopenharmony_ci 204162306a36Sopenharmony_ci os = &osb->osb_orphan_scan; 204262306a36Sopenharmony_ci os->os_osb = osb; 204362306a36Sopenharmony_ci os->os_count = 0; 204462306a36Sopenharmony_ci os->os_seqno = 0; 204562306a36Sopenharmony_ci mutex_init(&os->os_lock); 204662306a36Sopenharmony_ci INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); 204762306a36Sopenharmony_ci} 204862306a36Sopenharmony_ci 204962306a36Sopenharmony_civoid ocfs2_orphan_scan_start(struct ocfs2_super *osb) 205062306a36Sopenharmony_ci{ 205162306a36Sopenharmony_ci struct ocfs2_orphan_scan *os; 205262306a36Sopenharmony_ci 205362306a36Sopenharmony_ci os = &osb->osb_orphan_scan; 205462306a36Sopenharmony_ci os->os_scantime = ktime_get_seconds(); 205562306a36Sopenharmony_ci if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 205662306a36Sopenharmony_ci atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 205762306a36Sopenharmony_ci else { 205862306a36Sopenharmony_ci atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); 205962306a36Sopenharmony_ci queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work, 206062306a36Sopenharmony_ci ocfs2_orphan_scan_timeout()); 206162306a36Sopenharmony_ci } 206262306a36Sopenharmony_ci} 206362306a36Sopenharmony_ci 206462306a36Sopenharmony_cistruct ocfs2_orphan_filldir_priv { 206562306a36Sopenharmony_ci struct dir_context ctx; 206662306a36Sopenharmony_ci struct inode *head; 206762306a36Sopenharmony_ci struct ocfs2_super *osb; 206862306a36Sopenharmony_ci enum ocfs2_orphan_reco_type orphan_reco_type; 206962306a36Sopenharmony_ci}; 207062306a36Sopenharmony_ci 207162306a36Sopenharmony_cistatic bool ocfs2_orphan_filldir(struct dir_context *ctx, const char *name, 207262306a36Sopenharmony_ci int name_len, loff_t pos, u64 ino, 207362306a36Sopenharmony_ci unsigned type) 207462306a36Sopenharmony_ci{ 207562306a36Sopenharmony_ci struct ocfs2_orphan_filldir_priv *p = 207662306a36Sopenharmony_ci container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx); 207762306a36Sopenharmony_ci struct inode *iter; 207862306a36Sopenharmony_ci 207962306a36Sopenharmony_ci if (name_len == 1 && !strncmp(".", name, 1)) 208062306a36Sopenharmony_ci return true; 208162306a36Sopenharmony_ci if (name_len == 2 && !strncmp("..", name, 2)) 208262306a36Sopenharmony_ci return true; 208362306a36Sopenharmony_ci 208462306a36Sopenharmony_ci /* do not include dio entry in case of orphan scan */ 208562306a36Sopenharmony_ci if ((p->orphan_reco_type == ORPHAN_NO_NEED_TRUNCATE) && 208662306a36Sopenharmony_ci (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX, 208762306a36Sopenharmony_ci OCFS2_DIO_ORPHAN_PREFIX_LEN))) 208862306a36Sopenharmony_ci return true; 208962306a36Sopenharmony_ci 209062306a36Sopenharmony_ci /* Skip bad inodes so that recovery can continue */ 209162306a36Sopenharmony_ci iter = ocfs2_iget(p->osb, ino, 209262306a36Sopenharmony_ci OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0); 209362306a36Sopenharmony_ci if (IS_ERR(iter)) 209462306a36Sopenharmony_ci return true; 209562306a36Sopenharmony_ci 209662306a36Sopenharmony_ci if (!strncmp(name, OCFS2_DIO_ORPHAN_PREFIX, 209762306a36Sopenharmony_ci OCFS2_DIO_ORPHAN_PREFIX_LEN)) 209862306a36Sopenharmony_ci OCFS2_I(iter)->ip_flags |= OCFS2_INODE_DIO_ORPHAN_ENTRY; 209962306a36Sopenharmony_ci 210062306a36Sopenharmony_ci /* Skip inodes which are already added to recover list, since dio may 210162306a36Sopenharmony_ci * happen concurrently with unlink/rename */ 210262306a36Sopenharmony_ci if (OCFS2_I(iter)->ip_next_orphan) { 210362306a36Sopenharmony_ci iput(iter); 210462306a36Sopenharmony_ci return true; 210562306a36Sopenharmony_ci } 210662306a36Sopenharmony_ci 210762306a36Sopenharmony_ci trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno); 210862306a36Sopenharmony_ci /* No locking is required for the next_orphan queue as there 210962306a36Sopenharmony_ci * is only ever a single process doing orphan recovery. */ 211062306a36Sopenharmony_ci OCFS2_I(iter)->ip_next_orphan = p->head; 211162306a36Sopenharmony_ci p->head = iter; 211262306a36Sopenharmony_ci 211362306a36Sopenharmony_ci return true; 211462306a36Sopenharmony_ci} 211562306a36Sopenharmony_ci 211662306a36Sopenharmony_cistatic int ocfs2_queue_orphans(struct ocfs2_super *osb, 211762306a36Sopenharmony_ci int slot, 211862306a36Sopenharmony_ci struct inode **head, 211962306a36Sopenharmony_ci enum ocfs2_orphan_reco_type orphan_reco_type) 212062306a36Sopenharmony_ci{ 212162306a36Sopenharmony_ci int status; 212262306a36Sopenharmony_ci struct inode *orphan_dir_inode = NULL; 212362306a36Sopenharmony_ci struct ocfs2_orphan_filldir_priv priv = { 212462306a36Sopenharmony_ci .ctx.actor = ocfs2_orphan_filldir, 212562306a36Sopenharmony_ci .osb = osb, 212662306a36Sopenharmony_ci .head = *head, 212762306a36Sopenharmony_ci .orphan_reco_type = orphan_reco_type 212862306a36Sopenharmony_ci }; 212962306a36Sopenharmony_ci 213062306a36Sopenharmony_ci orphan_dir_inode = ocfs2_get_system_file_inode(osb, 213162306a36Sopenharmony_ci ORPHAN_DIR_SYSTEM_INODE, 213262306a36Sopenharmony_ci slot); 213362306a36Sopenharmony_ci if (!orphan_dir_inode) { 213462306a36Sopenharmony_ci status = -ENOENT; 213562306a36Sopenharmony_ci mlog_errno(status); 213662306a36Sopenharmony_ci return status; 213762306a36Sopenharmony_ci } 213862306a36Sopenharmony_ci 213962306a36Sopenharmony_ci inode_lock(orphan_dir_inode); 214062306a36Sopenharmony_ci status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0); 214162306a36Sopenharmony_ci if (status < 0) { 214262306a36Sopenharmony_ci mlog_errno(status); 214362306a36Sopenharmony_ci goto out; 214462306a36Sopenharmony_ci } 214562306a36Sopenharmony_ci 214662306a36Sopenharmony_ci status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx); 214762306a36Sopenharmony_ci if (status) { 214862306a36Sopenharmony_ci mlog_errno(status); 214962306a36Sopenharmony_ci goto out_cluster; 215062306a36Sopenharmony_ci } 215162306a36Sopenharmony_ci 215262306a36Sopenharmony_ci *head = priv.head; 215362306a36Sopenharmony_ci 215462306a36Sopenharmony_ciout_cluster: 215562306a36Sopenharmony_ci ocfs2_inode_unlock(orphan_dir_inode, 0); 215662306a36Sopenharmony_ciout: 215762306a36Sopenharmony_ci inode_unlock(orphan_dir_inode); 215862306a36Sopenharmony_ci iput(orphan_dir_inode); 215962306a36Sopenharmony_ci return status; 216062306a36Sopenharmony_ci} 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_cistatic int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb, 216362306a36Sopenharmony_ci int slot) 216462306a36Sopenharmony_ci{ 216562306a36Sopenharmony_ci int ret; 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_ci spin_lock(&osb->osb_lock); 216862306a36Sopenharmony_ci ret = !osb->osb_orphan_wipes[slot]; 216962306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 217062306a36Sopenharmony_ci return ret; 217162306a36Sopenharmony_ci} 217262306a36Sopenharmony_ci 217362306a36Sopenharmony_cistatic void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb, 217462306a36Sopenharmony_ci int slot) 217562306a36Sopenharmony_ci{ 217662306a36Sopenharmony_ci spin_lock(&osb->osb_lock); 217762306a36Sopenharmony_ci /* Mark ourselves such that new processes in delete_inode() 217862306a36Sopenharmony_ci * know to quit early. */ 217962306a36Sopenharmony_ci ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot); 218062306a36Sopenharmony_ci while (osb->osb_orphan_wipes[slot]) { 218162306a36Sopenharmony_ci /* If any processes are already in the middle of an 218262306a36Sopenharmony_ci * orphan wipe on this dir, then we need to wait for 218362306a36Sopenharmony_ci * them. */ 218462306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 218562306a36Sopenharmony_ci wait_event_interruptible(osb->osb_wipe_event, 218662306a36Sopenharmony_ci ocfs2_orphan_recovery_can_continue(osb, slot)); 218762306a36Sopenharmony_ci spin_lock(&osb->osb_lock); 218862306a36Sopenharmony_ci } 218962306a36Sopenharmony_ci spin_unlock(&osb->osb_lock); 219062306a36Sopenharmony_ci} 219162306a36Sopenharmony_ci 219262306a36Sopenharmony_cistatic void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb, 219362306a36Sopenharmony_ci int slot) 219462306a36Sopenharmony_ci{ 219562306a36Sopenharmony_ci ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot); 219662306a36Sopenharmony_ci} 219762306a36Sopenharmony_ci 219862306a36Sopenharmony_ci/* 219962306a36Sopenharmony_ci * Orphan recovery. Each mounted node has it's own orphan dir which we 220062306a36Sopenharmony_ci * must run during recovery. Our strategy here is to build a list of 220162306a36Sopenharmony_ci * the inodes in the orphan dir and iget/iput them. The VFS does 220262306a36Sopenharmony_ci * (most) of the rest of the work. 220362306a36Sopenharmony_ci * 220462306a36Sopenharmony_ci * Orphan recovery can happen at any time, not just mount so we have a 220562306a36Sopenharmony_ci * couple of extra considerations. 220662306a36Sopenharmony_ci * 220762306a36Sopenharmony_ci * - We grab as many inodes as we can under the orphan dir lock - 220862306a36Sopenharmony_ci * doing iget() outside the orphan dir risks getting a reference on 220962306a36Sopenharmony_ci * an invalid inode. 221062306a36Sopenharmony_ci * - We must be sure not to deadlock with other processes on the 221162306a36Sopenharmony_ci * system wanting to run delete_inode(). This can happen when they go 221262306a36Sopenharmony_ci * to lock the orphan dir and the orphan recovery process attempts to 221362306a36Sopenharmony_ci * iget() inside the orphan dir lock. This can be avoided by 221462306a36Sopenharmony_ci * advertising our state to ocfs2_delete_inode(). 221562306a36Sopenharmony_ci */ 221662306a36Sopenharmony_cistatic int ocfs2_recover_orphans(struct ocfs2_super *osb, 221762306a36Sopenharmony_ci int slot, 221862306a36Sopenharmony_ci enum ocfs2_orphan_reco_type orphan_reco_type) 221962306a36Sopenharmony_ci{ 222062306a36Sopenharmony_ci int ret = 0; 222162306a36Sopenharmony_ci struct inode *inode = NULL; 222262306a36Sopenharmony_ci struct inode *iter; 222362306a36Sopenharmony_ci struct ocfs2_inode_info *oi; 222462306a36Sopenharmony_ci struct buffer_head *di_bh = NULL; 222562306a36Sopenharmony_ci struct ocfs2_dinode *di = NULL; 222662306a36Sopenharmony_ci 222762306a36Sopenharmony_ci trace_ocfs2_recover_orphans(slot); 222862306a36Sopenharmony_ci 222962306a36Sopenharmony_ci ocfs2_mark_recovering_orphan_dir(osb, slot); 223062306a36Sopenharmony_ci ret = ocfs2_queue_orphans(osb, slot, &inode, orphan_reco_type); 223162306a36Sopenharmony_ci ocfs2_clear_recovering_orphan_dir(osb, slot); 223262306a36Sopenharmony_ci 223362306a36Sopenharmony_ci /* Error here should be noted, but we want to continue with as 223462306a36Sopenharmony_ci * many queued inodes as we've got. */ 223562306a36Sopenharmony_ci if (ret) 223662306a36Sopenharmony_ci mlog_errno(ret); 223762306a36Sopenharmony_ci 223862306a36Sopenharmony_ci while (inode) { 223962306a36Sopenharmony_ci oi = OCFS2_I(inode); 224062306a36Sopenharmony_ci trace_ocfs2_recover_orphans_iput( 224162306a36Sopenharmony_ci (unsigned long long)oi->ip_blkno); 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci iter = oi->ip_next_orphan; 224462306a36Sopenharmony_ci oi->ip_next_orphan = NULL; 224562306a36Sopenharmony_ci 224662306a36Sopenharmony_ci if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) { 224762306a36Sopenharmony_ci inode_lock(inode); 224862306a36Sopenharmony_ci ret = ocfs2_rw_lock(inode, 1); 224962306a36Sopenharmony_ci if (ret < 0) { 225062306a36Sopenharmony_ci mlog_errno(ret); 225162306a36Sopenharmony_ci goto unlock_mutex; 225262306a36Sopenharmony_ci } 225362306a36Sopenharmony_ci /* 225462306a36Sopenharmony_ci * We need to take and drop the inode lock to 225562306a36Sopenharmony_ci * force read inode from disk. 225662306a36Sopenharmony_ci */ 225762306a36Sopenharmony_ci ret = ocfs2_inode_lock(inode, &di_bh, 1); 225862306a36Sopenharmony_ci if (ret) { 225962306a36Sopenharmony_ci mlog_errno(ret); 226062306a36Sopenharmony_ci goto unlock_rw; 226162306a36Sopenharmony_ci } 226262306a36Sopenharmony_ci 226362306a36Sopenharmony_ci di = (struct ocfs2_dinode *)di_bh->b_data; 226462306a36Sopenharmony_ci 226562306a36Sopenharmony_ci if (di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)) { 226662306a36Sopenharmony_ci ret = ocfs2_truncate_file(inode, di_bh, 226762306a36Sopenharmony_ci i_size_read(inode)); 226862306a36Sopenharmony_ci if (ret < 0) { 226962306a36Sopenharmony_ci if (ret != -ENOSPC) 227062306a36Sopenharmony_ci mlog_errno(ret); 227162306a36Sopenharmony_ci goto unlock_inode; 227262306a36Sopenharmony_ci } 227362306a36Sopenharmony_ci 227462306a36Sopenharmony_ci ret = ocfs2_del_inode_from_orphan(osb, inode, 227562306a36Sopenharmony_ci di_bh, 0, 0); 227662306a36Sopenharmony_ci if (ret) 227762306a36Sopenharmony_ci mlog_errno(ret); 227862306a36Sopenharmony_ci } 227962306a36Sopenharmony_ciunlock_inode: 228062306a36Sopenharmony_ci ocfs2_inode_unlock(inode, 1); 228162306a36Sopenharmony_ci brelse(di_bh); 228262306a36Sopenharmony_ci di_bh = NULL; 228362306a36Sopenharmony_ciunlock_rw: 228462306a36Sopenharmony_ci ocfs2_rw_unlock(inode, 1); 228562306a36Sopenharmony_ciunlock_mutex: 228662306a36Sopenharmony_ci inode_unlock(inode); 228762306a36Sopenharmony_ci 228862306a36Sopenharmony_ci /* clear dio flag in ocfs2_inode_info */ 228962306a36Sopenharmony_ci oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY; 229062306a36Sopenharmony_ci } else { 229162306a36Sopenharmony_ci spin_lock(&oi->ip_lock); 229262306a36Sopenharmony_ci /* Set the proper information to get us going into 229362306a36Sopenharmony_ci * ocfs2_delete_inode. */ 229462306a36Sopenharmony_ci oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 229562306a36Sopenharmony_ci spin_unlock(&oi->ip_lock); 229662306a36Sopenharmony_ci } 229762306a36Sopenharmony_ci 229862306a36Sopenharmony_ci iput(inode); 229962306a36Sopenharmony_ci inode = iter; 230062306a36Sopenharmony_ci } 230162306a36Sopenharmony_ci 230262306a36Sopenharmony_ci return ret; 230362306a36Sopenharmony_ci} 230462306a36Sopenharmony_ci 230562306a36Sopenharmony_cistatic int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota) 230662306a36Sopenharmony_ci{ 230762306a36Sopenharmony_ci /* This check is good because ocfs2 will wait on our recovery 230862306a36Sopenharmony_ci * thread before changing it to something other than MOUNTED 230962306a36Sopenharmony_ci * or DISABLED. */ 231062306a36Sopenharmony_ci wait_event(osb->osb_mount_event, 231162306a36Sopenharmony_ci (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) || 231262306a36Sopenharmony_ci atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS || 231362306a36Sopenharmony_ci atomic_read(&osb->vol_state) == VOLUME_DISABLED); 231462306a36Sopenharmony_ci 231562306a36Sopenharmony_ci /* If there's an error on mount, then we may never get to the 231662306a36Sopenharmony_ci * MOUNTED flag, but this is set right before 231762306a36Sopenharmony_ci * dismount_volume() so we can trust it. */ 231862306a36Sopenharmony_ci if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) { 231962306a36Sopenharmony_ci trace_ocfs2_wait_on_mount(VOLUME_DISABLED); 232062306a36Sopenharmony_ci mlog(0, "mount error, exiting!\n"); 232162306a36Sopenharmony_ci return -EBUSY; 232262306a36Sopenharmony_ci } 232362306a36Sopenharmony_ci 232462306a36Sopenharmony_ci return 0; 232562306a36Sopenharmony_ci} 232662306a36Sopenharmony_ci 232762306a36Sopenharmony_cistatic int ocfs2_commit_thread(void *arg) 232862306a36Sopenharmony_ci{ 232962306a36Sopenharmony_ci int status; 233062306a36Sopenharmony_ci struct ocfs2_super *osb = arg; 233162306a36Sopenharmony_ci struct ocfs2_journal *journal = osb->journal; 233262306a36Sopenharmony_ci 233362306a36Sopenharmony_ci /* we can trust j_num_trans here because _should_stop() is only set in 233462306a36Sopenharmony_ci * shutdown and nobody other than ourselves should be able to start 233562306a36Sopenharmony_ci * transactions. committing on shutdown might take a few iterations 233662306a36Sopenharmony_ci * as final transactions put deleted inodes on the list */ 233762306a36Sopenharmony_ci while (!(kthread_should_stop() && 233862306a36Sopenharmony_ci atomic_read(&journal->j_num_trans) == 0)) { 233962306a36Sopenharmony_ci 234062306a36Sopenharmony_ci wait_event_interruptible(osb->checkpoint_event, 234162306a36Sopenharmony_ci atomic_read(&journal->j_num_trans) 234262306a36Sopenharmony_ci || kthread_should_stop()); 234362306a36Sopenharmony_ci 234462306a36Sopenharmony_ci status = ocfs2_commit_cache(osb); 234562306a36Sopenharmony_ci if (status < 0) { 234662306a36Sopenharmony_ci static unsigned long abort_warn_time; 234762306a36Sopenharmony_ci 234862306a36Sopenharmony_ci /* Warn about this once per minute */ 234962306a36Sopenharmony_ci if (printk_timed_ratelimit(&abort_warn_time, 60*HZ)) 235062306a36Sopenharmony_ci mlog(ML_ERROR, "status = %d, journal is " 235162306a36Sopenharmony_ci "already aborted.\n", status); 235262306a36Sopenharmony_ci /* 235362306a36Sopenharmony_ci * After ocfs2_commit_cache() fails, j_num_trans has a 235462306a36Sopenharmony_ci * non-zero value. Sleep here to avoid a busy-wait 235562306a36Sopenharmony_ci * loop. 235662306a36Sopenharmony_ci */ 235762306a36Sopenharmony_ci msleep_interruptible(1000); 235862306a36Sopenharmony_ci } 235962306a36Sopenharmony_ci 236062306a36Sopenharmony_ci if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){ 236162306a36Sopenharmony_ci mlog(ML_KTHREAD, 236262306a36Sopenharmony_ci "commit_thread: %u transactions pending on " 236362306a36Sopenharmony_ci "shutdown\n", 236462306a36Sopenharmony_ci atomic_read(&journal->j_num_trans)); 236562306a36Sopenharmony_ci } 236662306a36Sopenharmony_ci } 236762306a36Sopenharmony_ci 236862306a36Sopenharmony_ci return 0; 236962306a36Sopenharmony_ci} 237062306a36Sopenharmony_ci 237162306a36Sopenharmony_ci/* Reads all the journal inodes without taking any cluster locks. Used 237262306a36Sopenharmony_ci * for hard readonly access to determine whether any journal requires 237362306a36Sopenharmony_ci * recovery. Also used to refresh the recovery generation numbers after 237462306a36Sopenharmony_ci * a journal has been recovered by another node. 237562306a36Sopenharmony_ci */ 237662306a36Sopenharmony_ciint ocfs2_check_journals_nolocks(struct ocfs2_super *osb) 237762306a36Sopenharmony_ci{ 237862306a36Sopenharmony_ci int ret = 0; 237962306a36Sopenharmony_ci unsigned int slot; 238062306a36Sopenharmony_ci struct buffer_head *di_bh = NULL; 238162306a36Sopenharmony_ci struct ocfs2_dinode *di; 238262306a36Sopenharmony_ci int journal_dirty = 0; 238362306a36Sopenharmony_ci 238462306a36Sopenharmony_ci for(slot = 0; slot < osb->max_slots; slot++) { 238562306a36Sopenharmony_ci ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL); 238662306a36Sopenharmony_ci if (ret) { 238762306a36Sopenharmony_ci mlog_errno(ret); 238862306a36Sopenharmony_ci goto out; 238962306a36Sopenharmony_ci } 239062306a36Sopenharmony_ci 239162306a36Sopenharmony_ci di = (struct ocfs2_dinode *) di_bh->b_data; 239262306a36Sopenharmony_ci 239362306a36Sopenharmony_ci osb->slot_recovery_generations[slot] = 239462306a36Sopenharmony_ci ocfs2_get_recovery_generation(di); 239562306a36Sopenharmony_ci 239662306a36Sopenharmony_ci if (le32_to_cpu(di->id1.journal1.ij_flags) & 239762306a36Sopenharmony_ci OCFS2_JOURNAL_DIRTY_FL) 239862306a36Sopenharmony_ci journal_dirty = 1; 239962306a36Sopenharmony_ci 240062306a36Sopenharmony_ci brelse(di_bh); 240162306a36Sopenharmony_ci di_bh = NULL; 240262306a36Sopenharmony_ci } 240362306a36Sopenharmony_ci 240462306a36Sopenharmony_ciout: 240562306a36Sopenharmony_ci if (journal_dirty) 240662306a36Sopenharmony_ci ret = -EROFS; 240762306a36Sopenharmony_ci return ret; 240862306a36Sopenharmony_ci} 2409