18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/****************************************************************************** 38c2ecf20Sopenharmony_ci******************************************************************************* 48c2ecf20Sopenharmony_ci** 58c2ecf20Sopenharmony_ci** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 68c2ecf20Sopenharmony_ci** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 78c2ecf20Sopenharmony_ci** 88c2ecf20Sopenharmony_ci** 98c2ecf20Sopenharmony_ci******************************************************************************* 108c2ecf20Sopenharmony_ci******************************************************************************/ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include "dlm_internal.h" 138c2ecf20Sopenharmony_ci#include "lockspace.h" 148c2ecf20Sopenharmony_ci#include "dir.h" 158c2ecf20Sopenharmony_ci#include "config.h" 168c2ecf20Sopenharmony_ci#include "ast.h" 178c2ecf20Sopenharmony_ci#include "memory.h" 188c2ecf20Sopenharmony_ci#include "rcom.h" 198c2ecf20Sopenharmony_ci#include "lock.h" 208c2ecf20Sopenharmony_ci#include "lowcomms.h" 218c2ecf20Sopenharmony_ci#include "member.h" 228c2ecf20Sopenharmony_ci#include "recover.h" 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci/* 268c2ecf20Sopenharmony_ci * Recovery waiting routines: these functions wait for a particular reply from 278c2ecf20Sopenharmony_ci * a remote node, or for the remote node to report a certain status. They need 288c2ecf20Sopenharmony_ci * to abort if the lockspace is stopped indicating a node has failed (perhaps 298c2ecf20Sopenharmony_ci * the one being waited for). 308c2ecf20Sopenharmony_ci */ 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci/* 338c2ecf20Sopenharmony_ci * Wait until given function returns non-zero or lockspace is stopped 348c2ecf20Sopenharmony_ci * (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another 358c2ecf20Sopenharmony_ci * function thinks it could have completed the waited-on task, they should wake 368c2ecf20Sopenharmony_ci * up ls_wait_general to get an immediate response rather than waiting for the 378c2ecf20Sopenharmony_ci * timeout. This uses a timeout so it can check periodically if the wait 388c2ecf20Sopenharmony_ci * should abort due to node failure (which doesn't cause a wake_up). 398c2ecf20Sopenharmony_ci * This should only be called by the dlm_recoverd thread. 408c2ecf20Sopenharmony_ci */ 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ciint dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) 438c2ecf20Sopenharmony_ci{ 448c2ecf20Sopenharmony_ci int error = 0; 458c2ecf20Sopenharmony_ci int rv; 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci while (1) { 488c2ecf20Sopenharmony_ci rv = wait_event_timeout(ls->ls_wait_general, 498c2ecf20Sopenharmony_ci testfn(ls) || dlm_recovery_stopped(ls), 508c2ecf20Sopenharmony_ci dlm_config.ci_recover_timer * HZ); 518c2ecf20Sopenharmony_ci if (rv) 528c2ecf20Sopenharmony_ci break; 538c2ecf20Sopenharmony_ci if (test_bit(LSFL_RCOM_WAIT, &ls->ls_flags)) { 548c2ecf20Sopenharmony_ci log_debug(ls, "dlm_wait_function timed out"); 558c2ecf20Sopenharmony_ci return -ETIMEDOUT; 568c2ecf20Sopenharmony_ci } 578c2ecf20Sopenharmony_ci } 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci if (dlm_recovery_stopped(ls)) { 608c2ecf20Sopenharmony_ci log_debug(ls, "dlm_wait_function aborted"); 618c2ecf20Sopenharmony_ci error = -EINTR; 628c2ecf20Sopenharmony_ci } 638c2ecf20Sopenharmony_ci return error; 648c2ecf20Sopenharmony_ci} 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci/* 678c2ecf20Sopenharmony_ci * An efficient way for all nodes to wait for all others to have a certain 688c2ecf20Sopenharmony_ci * status. The node with the lowest nodeid polls all the others for their 698c2ecf20Sopenharmony_ci * status (wait_status_all) and all the others poll the node with the low id 708c2ecf20Sopenharmony_ci * for its accumulated result (wait_status_low). When all nodes have set 718c2ecf20Sopenharmony_ci * status flag X, then status flag X_ALL will be set on the low nodeid. 728c2ecf20Sopenharmony_ci */ 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ciuint32_t dlm_recover_status(struct dlm_ls *ls) 758c2ecf20Sopenharmony_ci{ 768c2ecf20Sopenharmony_ci uint32_t status; 778c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_lock); 788c2ecf20Sopenharmony_ci status = ls->ls_recover_status; 798c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_lock); 808c2ecf20Sopenharmony_ci return status; 818c2ecf20Sopenharmony_ci} 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_cistatic void _set_recover_status(struct dlm_ls *ls, uint32_t status) 848c2ecf20Sopenharmony_ci{ 858c2ecf20Sopenharmony_ci ls->ls_recover_status |= status; 868c2ecf20Sopenharmony_ci} 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_civoid dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) 898c2ecf20Sopenharmony_ci{ 908c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_lock); 918c2ecf20Sopenharmony_ci _set_recover_status(ls, status); 928c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_lock); 938c2ecf20Sopenharmony_ci} 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_cistatic int wait_status_all(struct dlm_ls *ls, uint32_t wait_status, 968c2ecf20Sopenharmony_ci int save_slots) 978c2ecf20Sopenharmony_ci{ 988c2ecf20Sopenharmony_ci struct dlm_rcom *rc = ls->ls_recover_buf; 998c2ecf20Sopenharmony_ci struct dlm_member *memb; 1008c2ecf20Sopenharmony_ci int error = 0, delay; 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci list_for_each_entry(memb, &ls->ls_nodes, list) { 1038c2ecf20Sopenharmony_ci delay = 0; 1048c2ecf20Sopenharmony_ci for (;;) { 1058c2ecf20Sopenharmony_ci if (dlm_recovery_stopped(ls)) { 1068c2ecf20Sopenharmony_ci error = -EINTR; 1078c2ecf20Sopenharmony_ci goto out; 1088c2ecf20Sopenharmony_ci } 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci error = dlm_rcom_status(ls, memb->nodeid, 0); 1118c2ecf20Sopenharmony_ci if (error) 1128c2ecf20Sopenharmony_ci goto out; 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci if (save_slots) 1158c2ecf20Sopenharmony_ci dlm_slot_save(ls, rc, memb); 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci if (rc->rc_result & wait_status) 1188c2ecf20Sopenharmony_ci break; 1198c2ecf20Sopenharmony_ci if (delay < 1000) 1208c2ecf20Sopenharmony_ci delay += 20; 1218c2ecf20Sopenharmony_ci msleep(delay); 1228c2ecf20Sopenharmony_ci } 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci out: 1258c2ecf20Sopenharmony_ci return error; 1268c2ecf20Sopenharmony_ci} 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_cistatic int wait_status_low(struct dlm_ls *ls, uint32_t wait_status, 1298c2ecf20Sopenharmony_ci uint32_t status_flags) 1308c2ecf20Sopenharmony_ci{ 1318c2ecf20Sopenharmony_ci struct dlm_rcom *rc = ls->ls_recover_buf; 1328c2ecf20Sopenharmony_ci int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci for (;;) { 1358c2ecf20Sopenharmony_ci if (dlm_recovery_stopped(ls)) { 1368c2ecf20Sopenharmony_ci error = -EINTR; 1378c2ecf20Sopenharmony_ci goto out; 1388c2ecf20Sopenharmony_ci } 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci error = dlm_rcom_status(ls, nodeid, status_flags); 1418c2ecf20Sopenharmony_ci if (error) 1428c2ecf20Sopenharmony_ci break; 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci if (rc->rc_result & wait_status) 1458c2ecf20Sopenharmony_ci break; 1468c2ecf20Sopenharmony_ci if (delay < 1000) 1478c2ecf20Sopenharmony_ci delay += 20; 1488c2ecf20Sopenharmony_ci msleep(delay); 1498c2ecf20Sopenharmony_ci } 1508c2ecf20Sopenharmony_ci out: 1518c2ecf20Sopenharmony_ci return error; 1528c2ecf20Sopenharmony_ci} 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_cistatic int wait_status(struct dlm_ls *ls, uint32_t status) 1558c2ecf20Sopenharmony_ci{ 1568c2ecf20Sopenharmony_ci uint32_t status_all = status << 1; 1578c2ecf20Sopenharmony_ci int error; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci if (ls->ls_low_nodeid == dlm_our_nodeid()) { 1608c2ecf20Sopenharmony_ci error = wait_status_all(ls, status, 0); 1618c2ecf20Sopenharmony_ci if (!error) 1628c2ecf20Sopenharmony_ci dlm_set_recover_status(ls, status_all); 1638c2ecf20Sopenharmony_ci } else 1648c2ecf20Sopenharmony_ci error = wait_status_low(ls, status_all, 0); 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci return error; 1678c2ecf20Sopenharmony_ci} 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ciint dlm_recover_members_wait(struct dlm_ls *ls) 1708c2ecf20Sopenharmony_ci{ 1718c2ecf20Sopenharmony_ci struct dlm_member *memb; 1728c2ecf20Sopenharmony_ci struct dlm_slot *slots; 1738c2ecf20Sopenharmony_ci int num_slots, slots_size; 1748c2ecf20Sopenharmony_ci int error, rv; 1758c2ecf20Sopenharmony_ci uint32_t gen; 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci list_for_each_entry(memb, &ls->ls_nodes, list) { 1788c2ecf20Sopenharmony_ci memb->slot = -1; 1798c2ecf20Sopenharmony_ci memb->generation = 0; 1808c2ecf20Sopenharmony_ci } 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci if (ls->ls_low_nodeid == dlm_our_nodeid()) { 1838c2ecf20Sopenharmony_ci error = wait_status_all(ls, DLM_RS_NODES, 1); 1848c2ecf20Sopenharmony_ci if (error) 1858c2ecf20Sopenharmony_ci goto out; 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci /* slots array is sparse, slots_size may be > num_slots */ 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci rv = dlm_slots_assign(ls, &num_slots, &slots_size, &slots, &gen); 1908c2ecf20Sopenharmony_ci if (!rv) { 1918c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_lock); 1928c2ecf20Sopenharmony_ci _set_recover_status(ls, DLM_RS_NODES_ALL); 1938c2ecf20Sopenharmony_ci ls->ls_num_slots = num_slots; 1948c2ecf20Sopenharmony_ci ls->ls_slots_size = slots_size; 1958c2ecf20Sopenharmony_ci ls->ls_slots = slots; 1968c2ecf20Sopenharmony_ci ls->ls_generation = gen; 1978c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_lock); 1988c2ecf20Sopenharmony_ci } else { 1998c2ecf20Sopenharmony_ci dlm_set_recover_status(ls, DLM_RS_NODES_ALL); 2008c2ecf20Sopenharmony_ci } 2018c2ecf20Sopenharmony_ci } else { 2028c2ecf20Sopenharmony_ci error = wait_status_low(ls, DLM_RS_NODES_ALL, DLM_RSF_NEED_SLOTS); 2038c2ecf20Sopenharmony_ci if (error) 2048c2ecf20Sopenharmony_ci goto out; 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci dlm_slots_copy_in(ls); 2078c2ecf20Sopenharmony_ci } 2088c2ecf20Sopenharmony_ci out: 2098c2ecf20Sopenharmony_ci return error; 2108c2ecf20Sopenharmony_ci} 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ciint dlm_recover_directory_wait(struct dlm_ls *ls) 2138c2ecf20Sopenharmony_ci{ 2148c2ecf20Sopenharmony_ci return wait_status(ls, DLM_RS_DIR); 2158c2ecf20Sopenharmony_ci} 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ciint dlm_recover_locks_wait(struct dlm_ls *ls) 2188c2ecf20Sopenharmony_ci{ 2198c2ecf20Sopenharmony_ci return wait_status(ls, DLM_RS_LOCKS); 2208c2ecf20Sopenharmony_ci} 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ciint dlm_recover_done_wait(struct dlm_ls *ls) 2238c2ecf20Sopenharmony_ci{ 2248c2ecf20Sopenharmony_ci return wait_status(ls, DLM_RS_DONE); 2258c2ecf20Sopenharmony_ci} 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci/* 2288c2ecf20Sopenharmony_ci * The recover_list contains all the rsb's for which we've requested the new 2298c2ecf20Sopenharmony_ci * master nodeid. As replies are returned from the resource directories the 2308c2ecf20Sopenharmony_ci * rsb's are removed from the list. When the list is empty we're done. 2318c2ecf20Sopenharmony_ci * 2328c2ecf20Sopenharmony_ci * The recover_list is later similarly used for all rsb's for which we've sent 2338c2ecf20Sopenharmony_ci * new lkb's and need to receive new corresponding lkid's. 2348c2ecf20Sopenharmony_ci * 2358c2ecf20Sopenharmony_ci * We use the address of the rsb struct as a simple local identifier for the 2368c2ecf20Sopenharmony_ci * rsb so we can match an rcom reply with the rsb it was sent for. 2378c2ecf20Sopenharmony_ci */ 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_cistatic int recover_list_empty(struct dlm_ls *ls) 2408c2ecf20Sopenharmony_ci{ 2418c2ecf20Sopenharmony_ci int empty; 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_list_lock); 2448c2ecf20Sopenharmony_ci empty = list_empty(&ls->ls_recover_list); 2458c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_list_lock); 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci return empty; 2488c2ecf20Sopenharmony_ci} 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_cistatic void recover_list_add(struct dlm_rsb *r) 2518c2ecf20Sopenharmony_ci{ 2528c2ecf20Sopenharmony_ci struct dlm_ls *ls = r->res_ls; 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_list_lock); 2558c2ecf20Sopenharmony_ci if (list_empty(&r->res_recover_list)) { 2568c2ecf20Sopenharmony_ci list_add_tail(&r->res_recover_list, &ls->ls_recover_list); 2578c2ecf20Sopenharmony_ci ls->ls_recover_list_count++; 2588c2ecf20Sopenharmony_ci dlm_hold_rsb(r); 2598c2ecf20Sopenharmony_ci } 2608c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_list_lock); 2618c2ecf20Sopenharmony_ci} 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_cistatic void recover_list_del(struct dlm_rsb *r) 2648c2ecf20Sopenharmony_ci{ 2658c2ecf20Sopenharmony_ci struct dlm_ls *ls = r->res_ls; 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_list_lock); 2688c2ecf20Sopenharmony_ci list_del_init(&r->res_recover_list); 2698c2ecf20Sopenharmony_ci ls->ls_recover_list_count--; 2708c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_list_lock); 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci dlm_put_rsb(r); 2738c2ecf20Sopenharmony_ci} 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_cistatic void recover_list_clear(struct dlm_ls *ls) 2768c2ecf20Sopenharmony_ci{ 2778c2ecf20Sopenharmony_ci struct dlm_rsb *r, *s; 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_list_lock); 2808c2ecf20Sopenharmony_ci list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) { 2818c2ecf20Sopenharmony_ci list_del_init(&r->res_recover_list); 2828c2ecf20Sopenharmony_ci r->res_recover_locks_count = 0; 2838c2ecf20Sopenharmony_ci dlm_put_rsb(r); 2848c2ecf20Sopenharmony_ci ls->ls_recover_list_count--; 2858c2ecf20Sopenharmony_ci } 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci if (ls->ls_recover_list_count != 0) { 2888c2ecf20Sopenharmony_ci log_error(ls, "warning: recover_list_count %d", 2898c2ecf20Sopenharmony_ci ls->ls_recover_list_count); 2908c2ecf20Sopenharmony_ci ls->ls_recover_list_count = 0; 2918c2ecf20Sopenharmony_ci } 2928c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_list_lock); 2938c2ecf20Sopenharmony_ci} 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_cistatic int recover_idr_empty(struct dlm_ls *ls) 2968c2ecf20Sopenharmony_ci{ 2978c2ecf20Sopenharmony_ci int empty = 1; 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_idr_lock); 3008c2ecf20Sopenharmony_ci if (ls->ls_recover_list_count) 3018c2ecf20Sopenharmony_ci empty = 0; 3028c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_idr_lock); 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci return empty; 3058c2ecf20Sopenharmony_ci} 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_cistatic int recover_idr_add(struct dlm_rsb *r) 3088c2ecf20Sopenharmony_ci{ 3098c2ecf20Sopenharmony_ci struct dlm_ls *ls = r->res_ls; 3108c2ecf20Sopenharmony_ci int rv; 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci idr_preload(GFP_NOFS); 3138c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_idr_lock); 3148c2ecf20Sopenharmony_ci if (r->res_id) { 3158c2ecf20Sopenharmony_ci rv = -1; 3168c2ecf20Sopenharmony_ci goto out_unlock; 3178c2ecf20Sopenharmony_ci } 3188c2ecf20Sopenharmony_ci rv = idr_alloc(&ls->ls_recover_idr, r, 1, 0, GFP_NOWAIT); 3198c2ecf20Sopenharmony_ci if (rv < 0) 3208c2ecf20Sopenharmony_ci goto out_unlock; 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci r->res_id = rv; 3238c2ecf20Sopenharmony_ci ls->ls_recover_list_count++; 3248c2ecf20Sopenharmony_ci dlm_hold_rsb(r); 3258c2ecf20Sopenharmony_ci rv = 0; 3268c2ecf20Sopenharmony_ciout_unlock: 3278c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_idr_lock); 3288c2ecf20Sopenharmony_ci idr_preload_end(); 3298c2ecf20Sopenharmony_ci return rv; 3308c2ecf20Sopenharmony_ci} 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_cistatic void recover_idr_del(struct dlm_rsb *r) 3338c2ecf20Sopenharmony_ci{ 3348c2ecf20Sopenharmony_ci struct dlm_ls *ls = r->res_ls; 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_idr_lock); 3378c2ecf20Sopenharmony_ci idr_remove(&ls->ls_recover_idr, r->res_id); 3388c2ecf20Sopenharmony_ci r->res_id = 0; 3398c2ecf20Sopenharmony_ci ls->ls_recover_list_count--; 3408c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_idr_lock); 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci dlm_put_rsb(r); 3438c2ecf20Sopenharmony_ci} 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_cistatic struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id) 3468c2ecf20Sopenharmony_ci{ 3478c2ecf20Sopenharmony_ci struct dlm_rsb *r; 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_idr_lock); 3508c2ecf20Sopenharmony_ci r = idr_find(&ls->ls_recover_idr, (int)id); 3518c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_idr_lock); 3528c2ecf20Sopenharmony_ci return r; 3538c2ecf20Sopenharmony_ci} 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_cistatic void recover_idr_clear(struct dlm_ls *ls) 3568c2ecf20Sopenharmony_ci{ 3578c2ecf20Sopenharmony_ci struct dlm_rsb *r; 3588c2ecf20Sopenharmony_ci int id; 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci spin_lock(&ls->ls_recover_idr_lock); 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci idr_for_each_entry(&ls->ls_recover_idr, r, id) { 3638c2ecf20Sopenharmony_ci idr_remove(&ls->ls_recover_idr, id); 3648c2ecf20Sopenharmony_ci r->res_id = 0; 3658c2ecf20Sopenharmony_ci r->res_recover_locks_count = 0; 3668c2ecf20Sopenharmony_ci ls->ls_recover_list_count--; 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci dlm_put_rsb(r); 3698c2ecf20Sopenharmony_ci } 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_ci if (ls->ls_recover_list_count != 0) { 3728c2ecf20Sopenharmony_ci log_error(ls, "warning: recover_list_count %d", 3738c2ecf20Sopenharmony_ci ls->ls_recover_list_count); 3748c2ecf20Sopenharmony_ci ls->ls_recover_list_count = 0; 3758c2ecf20Sopenharmony_ci } 3768c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_recover_idr_lock); 3778c2ecf20Sopenharmony_ci} 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci/* Master recovery: find new master node for rsb's that were 3818c2ecf20Sopenharmony_ci mastered on nodes that have been removed. 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci dlm_recover_masters 3848c2ecf20Sopenharmony_ci recover_master 3858c2ecf20Sopenharmony_ci dlm_send_rcom_lookup -> receive_rcom_lookup 3868c2ecf20Sopenharmony_ci dlm_dir_lookup 3878c2ecf20Sopenharmony_ci receive_rcom_lookup_reply <- 3888c2ecf20Sopenharmony_ci dlm_recover_master_reply 3898c2ecf20Sopenharmony_ci set_new_master 3908c2ecf20Sopenharmony_ci set_master_lkbs 3918c2ecf20Sopenharmony_ci set_lock_master 3928c2ecf20Sopenharmony_ci*/ 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci/* 3958c2ecf20Sopenharmony_ci * Set the lock master for all LKBs in a lock queue 3968c2ecf20Sopenharmony_ci * If we are the new master of the rsb, we may have received new 3978c2ecf20Sopenharmony_ci * MSTCPY locks from other nodes already which we need to ignore 3988c2ecf20Sopenharmony_ci * when setting the new nodeid. 3998c2ecf20Sopenharmony_ci */ 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_cistatic void set_lock_master(struct list_head *queue, int nodeid) 4028c2ecf20Sopenharmony_ci{ 4038c2ecf20Sopenharmony_ci struct dlm_lkb *lkb; 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci list_for_each_entry(lkb, queue, lkb_statequeue) { 4068c2ecf20Sopenharmony_ci if (!(lkb->lkb_flags & DLM_IFL_MSTCPY)) { 4078c2ecf20Sopenharmony_ci lkb->lkb_nodeid = nodeid; 4088c2ecf20Sopenharmony_ci lkb->lkb_remid = 0; 4098c2ecf20Sopenharmony_ci } 4108c2ecf20Sopenharmony_ci } 4118c2ecf20Sopenharmony_ci} 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_cistatic void set_master_lkbs(struct dlm_rsb *r) 4148c2ecf20Sopenharmony_ci{ 4158c2ecf20Sopenharmony_ci set_lock_master(&r->res_grantqueue, r->res_nodeid); 4168c2ecf20Sopenharmony_ci set_lock_master(&r->res_convertqueue, r->res_nodeid); 4178c2ecf20Sopenharmony_ci set_lock_master(&r->res_waitqueue, r->res_nodeid); 4188c2ecf20Sopenharmony_ci} 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci/* 4218c2ecf20Sopenharmony_ci * Propagate the new master nodeid to locks 4228c2ecf20Sopenharmony_ci * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. 4238c2ecf20Sopenharmony_ci * The NEW_MASTER2 flag tells recover_lvb() and recover_grant() which 4248c2ecf20Sopenharmony_ci * rsb's to consider. 4258c2ecf20Sopenharmony_ci */ 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_cistatic void set_new_master(struct dlm_rsb *r) 4288c2ecf20Sopenharmony_ci{ 4298c2ecf20Sopenharmony_ci set_master_lkbs(r); 4308c2ecf20Sopenharmony_ci rsb_set_flag(r, RSB_NEW_MASTER); 4318c2ecf20Sopenharmony_ci rsb_set_flag(r, RSB_NEW_MASTER2); 4328c2ecf20Sopenharmony_ci} 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci/* 4358c2ecf20Sopenharmony_ci * We do async lookups on rsb's that need new masters. The rsb's 4368c2ecf20Sopenharmony_ci * waiting for a lookup reply are kept on the recover_list. 4378c2ecf20Sopenharmony_ci * 4388c2ecf20Sopenharmony_ci * Another node recovering the master may have sent us a rcom lookup, 4398c2ecf20Sopenharmony_ci * and our dlm_master_lookup() set it as the new master, along with 4408c2ecf20Sopenharmony_ci * NEW_MASTER so that we'll recover it here (this implies dir_nodeid 4418c2ecf20Sopenharmony_ci * equals our_nodeid below). 4428c2ecf20Sopenharmony_ci */ 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_cistatic int recover_master(struct dlm_rsb *r, unsigned int *count) 4458c2ecf20Sopenharmony_ci{ 4468c2ecf20Sopenharmony_ci struct dlm_ls *ls = r->res_ls; 4478c2ecf20Sopenharmony_ci int our_nodeid, dir_nodeid; 4488c2ecf20Sopenharmony_ci int is_removed = 0; 4498c2ecf20Sopenharmony_ci int error; 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci if (is_master(r)) 4528c2ecf20Sopenharmony_ci return 0; 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci is_removed = dlm_is_removed(ls, r->res_nodeid); 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci if (!is_removed && !rsb_flag(r, RSB_NEW_MASTER)) 4578c2ecf20Sopenharmony_ci return 0; 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci our_nodeid = dlm_our_nodeid(); 4608c2ecf20Sopenharmony_ci dir_nodeid = dlm_dir_nodeid(r); 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci if (dir_nodeid == our_nodeid) { 4638c2ecf20Sopenharmony_ci if (is_removed) { 4648c2ecf20Sopenharmony_ci r->res_master_nodeid = our_nodeid; 4658c2ecf20Sopenharmony_ci r->res_nodeid = 0; 4668c2ecf20Sopenharmony_ci } 4678c2ecf20Sopenharmony_ci 4688c2ecf20Sopenharmony_ci /* set master of lkbs to ourself when is_removed, or to 4698c2ecf20Sopenharmony_ci another new master which we set along with NEW_MASTER 4708c2ecf20Sopenharmony_ci in dlm_master_lookup */ 4718c2ecf20Sopenharmony_ci set_new_master(r); 4728c2ecf20Sopenharmony_ci error = 0; 4738c2ecf20Sopenharmony_ci } else { 4748c2ecf20Sopenharmony_ci recover_idr_add(r); 4758c2ecf20Sopenharmony_ci error = dlm_send_rcom_lookup(r, dir_nodeid); 4768c2ecf20Sopenharmony_ci } 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_ci (*count)++; 4798c2ecf20Sopenharmony_ci return error; 4808c2ecf20Sopenharmony_ci} 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_ci/* 4838c2ecf20Sopenharmony_ci * All MSTCPY locks are purged and rebuilt, even if the master stayed the same. 4848c2ecf20Sopenharmony_ci * This is necessary because recovery can be started, aborted and restarted, 4858c2ecf20Sopenharmony_ci * causing the master nodeid to briefly change during the aborted recovery, and 4868c2ecf20Sopenharmony_ci * change back to the original value in the second recovery. The MSTCPY locks 4878c2ecf20Sopenharmony_ci * may or may not have been purged during the aborted recovery. Another node 4888c2ecf20Sopenharmony_ci * with an outstanding request in waiters list and a request reply saved in the 4898c2ecf20Sopenharmony_ci * requestqueue, cannot know whether it should ignore the reply and resend the 4908c2ecf20Sopenharmony_ci * request, or accept the reply and complete the request. It must do the 4918c2ecf20Sopenharmony_ci * former if the remote node purged MSTCPY locks, and it must do the later if 4928c2ecf20Sopenharmony_ci * the remote node did not. This is solved by always purging MSTCPY locks, in 4938c2ecf20Sopenharmony_ci * which case, the request reply would always be ignored and the request 4948c2ecf20Sopenharmony_ci * resent. 4958c2ecf20Sopenharmony_ci */ 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_cistatic int recover_master_static(struct dlm_rsb *r, unsigned int *count) 4988c2ecf20Sopenharmony_ci{ 4998c2ecf20Sopenharmony_ci int dir_nodeid = dlm_dir_nodeid(r); 5008c2ecf20Sopenharmony_ci int new_master = dir_nodeid; 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_ci if (dir_nodeid == dlm_our_nodeid()) 5038c2ecf20Sopenharmony_ci new_master = 0; 5048c2ecf20Sopenharmony_ci 5058c2ecf20Sopenharmony_ci dlm_purge_mstcpy_locks(r); 5068c2ecf20Sopenharmony_ci r->res_master_nodeid = dir_nodeid; 5078c2ecf20Sopenharmony_ci r->res_nodeid = new_master; 5088c2ecf20Sopenharmony_ci set_new_master(r); 5098c2ecf20Sopenharmony_ci (*count)++; 5108c2ecf20Sopenharmony_ci return 0; 5118c2ecf20Sopenharmony_ci} 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci/* 5148c2ecf20Sopenharmony_ci * Go through local root resources and for each rsb which has a master which 5158c2ecf20Sopenharmony_ci * has departed, get the new master nodeid from the directory. The dir will 5168c2ecf20Sopenharmony_ci * assign mastery to the first node to look up the new master. That means 5178c2ecf20Sopenharmony_ci * we'll discover in this lookup if we're the new master of any rsb's. 5188c2ecf20Sopenharmony_ci * 5198c2ecf20Sopenharmony_ci * We fire off all the dir lookup requests individually and asynchronously to 5208c2ecf20Sopenharmony_ci * the correct dir node. 5218c2ecf20Sopenharmony_ci */ 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_ciint dlm_recover_masters(struct dlm_ls *ls) 5248c2ecf20Sopenharmony_ci{ 5258c2ecf20Sopenharmony_ci struct dlm_rsb *r; 5268c2ecf20Sopenharmony_ci unsigned int total = 0; 5278c2ecf20Sopenharmony_ci unsigned int count = 0; 5288c2ecf20Sopenharmony_ci int nodir = dlm_no_directory(ls); 5298c2ecf20Sopenharmony_ci int error; 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci log_rinfo(ls, "dlm_recover_masters"); 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci down_read(&ls->ls_root_sem); 5348c2ecf20Sopenharmony_ci list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 5358c2ecf20Sopenharmony_ci if (dlm_recovery_stopped(ls)) { 5368c2ecf20Sopenharmony_ci up_read(&ls->ls_root_sem); 5378c2ecf20Sopenharmony_ci error = -EINTR; 5388c2ecf20Sopenharmony_ci goto out; 5398c2ecf20Sopenharmony_ci } 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci lock_rsb(r); 5428c2ecf20Sopenharmony_ci if (nodir) 5438c2ecf20Sopenharmony_ci error = recover_master_static(r, &count); 5448c2ecf20Sopenharmony_ci else 5458c2ecf20Sopenharmony_ci error = recover_master(r, &count); 5468c2ecf20Sopenharmony_ci unlock_rsb(r); 5478c2ecf20Sopenharmony_ci cond_resched(); 5488c2ecf20Sopenharmony_ci total++; 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci if (error) { 5518c2ecf20Sopenharmony_ci up_read(&ls->ls_root_sem); 5528c2ecf20Sopenharmony_ci goto out; 5538c2ecf20Sopenharmony_ci } 5548c2ecf20Sopenharmony_ci } 5558c2ecf20Sopenharmony_ci up_read(&ls->ls_root_sem); 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci log_rinfo(ls, "dlm_recover_masters %u of %u", count, total); 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci error = dlm_wait_function(ls, &recover_idr_empty); 5608c2ecf20Sopenharmony_ci out: 5618c2ecf20Sopenharmony_ci if (error) 5628c2ecf20Sopenharmony_ci recover_idr_clear(ls); 5638c2ecf20Sopenharmony_ci return error; 5648c2ecf20Sopenharmony_ci} 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_ciint dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) 5678c2ecf20Sopenharmony_ci{ 5688c2ecf20Sopenharmony_ci struct dlm_rsb *r; 5698c2ecf20Sopenharmony_ci int ret_nodeid, new_master; 5708c2ecf20Sopenharmony_ci 5718c2ecf20Sopenharmony_ci r = recover_idr_find(ls, rc->rc_id); 5728c2ecf20Sopenharmony_ci if (!r) { 5738c2ecf20Sopenharmony_ci log_error(ls, "dlm_recover_master_reply no id %llx", 5748c2ecf20Sopenharmony_ci (unsigned long long)rc->rc_id); 5758c2ecf20Sopenharmony_ci goto out; 5768c2ecf20Sopenharmony_ci } 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci ret_nodeid = rc->rc_result; 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci if (ret_nodeid == dlm_our_nodeid()) 5818c2ecf20Sopenharmony_ci new_master = 0; 5828c2ecf20Sopenharmony_ci else 5838c2ecf20Sopenharmony_ci new_master = ret_nodeid; 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci lock_rsb(r); 5868c2ecf20Sopenharmony_ci r->res_master_nodeid = ret_nodeid; 5878c2ecf20Sopenharmony_ci r->res_nodeid = new_master; 5888c2ecf20Sopenharmony_ci set_new_master(r); 5898c2ecf20Sopenharmony_ci unlock_rsb(r); 5908c2ecf20Sopenharmony_ci recover_idr_del(r); 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_ci if (recover_idr_empty(ls)) 5938c2ecf20Sopenharmony_ci wake_up(&ls->ls_wait_general); 5948c2ecf20Sopenharmony_ci out: 5958c2ecf20Sopenharmony_ci return 0; 5968c2ecf20Sopenharmony_ci} 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci/* Lock recovery: rebuild the process-copy locks we hold on a 6008c2ecf20Sopenharmony_ci remastered rsb on the new rsb master. 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci dlm_recover_locks 6038c2ecf20Sopenharmony_ci recover_locks 6048c2ecf20Sopenharmony_ci recover_locks_queue 6058c2ecf20Sopenharmony_ci dlm_send_rcom_lock -> receive_rcom_lock 6068c2ecf20Sopenharmony_ci dlm_recover_master_copy 6078c2ecf20Sopenharmony_ci receive_rcom_lock_reply <- 6088c2ecf20Sopenharmony_ci dlm_recover_process_copy 6098c2ecf20Sopenharmony_ci*/ 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci/* 6138c2ecf20Sopenharmony_ci * keep a count of the number of lkb's we send to the new master; when we get 6148c2ecf20Sopenharmony_ci * an equal number of replies then recovery for the rsb is done 6158c2ecf20Sopenharmony_ci */ 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_cistatic int recover_locks_queue(struct dlm_rsb *r, struct list_head *head) 6188c2ecf20Sopenharmony_ci{ 6198c2ecf20Sopenharmony_ci struct dlm_lkb *lkb; 6208c2ecf20Sopenharmony_ci int error = 0; 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_ci list_for_each_entry(lkb, head, lkb_statequeue) { 6238c2ecf20Sopenharmony_ci error = dlm_send_rcom_lock(r, lkb); 6248c2ecf20Sopenharmony_ci if (error) 6258c2ecf20Sopenharmony_ci break; 6268c2ecf20Sopenharmony_ci r->res_recover_locks_count++; 6278c2ecf20Sopenharmony_ci } 6288c2ecf20Sopenharmony_ci 6298c2ecf20Sopenharmony_ci return error; 6308c2ecf20Sopenharmony_ci} 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_cistatic int recover_locks(struct dlm_rsb *r) 6338c2ecf20Sopenharmony_ci{ 6348c2ecf20Sopenharmony_ci int error = 0; 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci lock_rsb(r); 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r);); 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci error = recover_locks_queue(r, &r->res_grantqueue); 6418c2ecf20Sopenharmony_ci if (error) 6428c2ecf20Sopenharmony_ci goto out; 6438c2ecf20Sopenharmony_ci error = recover_locks_queue(r, &r->res_convertqueue); 6448c2ecf20Sopenharmony_ci if (error) 6458c2ecf20Sopenharmony_ci goto out; 6468c2ecf20Sopenharmony_ci error = recover_locks_queue(r, &r->res_waitqueue); 6478c2ecf20Sopenharmony_ci if (error) 6488c2ecf20Sopenharmony_ci goto out; 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci if (r->res_recover_locks_count) 6518c2ecf20Sopenharmony_ci recover_list_add(r); 6528c2ecf20Sopenharmony_ci else 6538c2ecf20Sopenharmony_ci rsb_clear_flag(r, RSB_NEW_MASTER); 6548c2ecf20Sopenharmony_ci out: 6558c2ecf20Sopenharmony_ci unlock_rsb(r); 6568c2ecf20Sopenharmony_ci return error; 6578c2ecf20Sopenharmony_ci} 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ciint dlm_recover_locks(struct dlm_ls *ls) 6608c2ecf20Sopenharmony_ci{ 6618c2ecf20Sopenharmony_ci struct dlm_rsb *r; 6628c2ecf20Sopenharmony_ci int error, count = 0; 6638c2ecf20Sopenharmony_ci 6648c2ecf20Sopenharmony_ci down_read(&ls->ls_root_sem); 6658c2ecf20Sopenharmony_ci list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 6668c2ecf20Sopenharmony_ci if (is_master(r)) { 6678c2ecf20Sopenharmony_ci rsb_clear_flag(r, RSB_NEW_MASTER); 6688c2ecf20Sopenharmony_ci continue; 6698c2ecf20Sopenharmony_ci } 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ci if (!rsb_flag(r, RSB_NEW_MASTER)) 6728c2ecf20Sopenharmony_ci continue; 6738c2ecf20Sopenharmony_ci 6748c2ecf20Sopenharmony_ci if (dlm_recovery_stopped(ls)) { 6758c2ecf20Sopenharmony_ci error = -EINTR; 6768c2ecf20Sopenharmony_ci up_read(&ls->ls_root_sem); 6778c2ecf20Sopenharmony_ci goto out; 6788c2ecf20Sopenharmony_ci } 6798c2ecf20Sopenharmony_ci 6808c2ecf20Sopenharmony_ci error = recover_locks(r); 6818c2ecf20Sopenharmony_ci if (error) { 6828c2ecf20Sopenharmony_ci up_read(&ls->ls_root_sem); 6838c2ecf20Sopenharmony_ci goto out; 6848c2ecf20Sopenharmony_ci } 6858c2ecf20Sopenharmony_ci 6868c2ecf20Sopenharmony_ci count += r->res_recover_locks_count; 6878c2ecf20Sopenharmony_ci } 6888c2ecf20Sopenharmony_ci up_read(&ls->ls_root_sem); 6898c2ecf20Sopenharmony_ci 6908c2ecf20Sopenharmony_ci log_rinfo(ls, "dlm_recover_locks %d out", count); 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci error = dlm_wait_function(ls, &recover_list_empty); 6938c2ecf20Sopenharmony_ci out: 6948c2ecf20Sopenharmony_ci if (error) 6958c2ecf20Sopenharmony_ci recover_list_clear(ls); 6968c2ecf20Sopenharmony_ci return error; 6978c2ecf20Sopenharmony_ci} 6988c2ecf20Sopenharmony_ci 6998c2ecf20Sopenharmony_civoid dlm_recovered_lock(struct dlm_rsb *r) 7008c2ecf20Sopenharmony_ci{ 7018c2ecf20Sopenharmony_ci DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_dump_rsb(r);); 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci r->res_recover_locks_count--; 7048c2ecf20Sopenharmony_ci if (!r->res_recover_locks_count) { 7058c2ecf20Sopenharmony_ci rsb_clear_flag(r, RSB_NEW_MASTER); 7068c2ecf20Sopenharmony_ci recover_list_del(r); 7078c2ecf20Sopenharmony_ci } 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci if (recover_list_empty(r->res_ls)) 7108c2ecf20Sopenharmony_ci wake_up(&r->res_ls->ls_wait_general); 7118c2ecf20Sopenharmony_ci} 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci/* 7148c2ecf20Sopenharmony_ci * The lvb needs to be recovered on all master rsb's. This includes setting 7158c2ecf20Sopenharmony_ci * the VALNOTVALID flag if necessary, and determining the correct lvb contents 7168c2ecf20Sopenharmony_ci * based on the lvb's of the locks held on the rsb. 7178c2ecf20Sopenharmony_ci * 7188c2ecf20Sopenharmony_ci * RSB_VALNOTVALID is set in two cases: 7198c2ecf20Sopenharmony_ci * 7208c2ecf20Sopenharmony_ci * 1. we are master, but not new, and we purged an EX/PW lock held by a 7218c2ecf20Sopenharmony_ci * failed node (in dlm_recover_purge which set RSB_RECOVER_LVB_INVAL) 7228c2ecf20Sopenharmony_ci * 7238c2ecf20Sopenharmony_ci * 2. we are a new master, and there are only NL/CR locks left. 7248c2ecf20Sopenharmony_ci * (We could probably improve this by only invaliding in this way when 7258c2ecf20Sopenharmony_ci * the previous master left uncleanly. VMS docs mention that.) 7268c2ecf20Sopenharmony_ci * 7278c2ecf20Sopenharmony_ci * The LVB contents are only considered for changing when this is a new master 7288c2ecf20Sopenharmony_ci * of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with 7298c2ecf20Sopenharmony_ci * mode > CR. If no lkb's exist with mode above CR, the lvb contents are taken 7308c2ecf20Sopenharmony_ci * from the lkb with the largest lvb sequence number. 7318c2ecf20Sopenharmony_ci */ 7328c2ecf20Sopenharmony_ci 7338c2ecf20Sopenharmony_cistatic void recover_lvb(struct dlm_rsb *r) 7348c2ecf20Sopenharmony_ci{ 7358c2ecf20Sopenharmony_ci struct dlm_lkb *big_lkb = NULL, *iter, *high_lkb = NULL; 7368c2ecf20Sopenharmony_ci uint32_t high_seq = 0; 7378c2ecf20Sopenharmony_ci int lock_lvb_exists = 0; 7388c2ecf20Sopenharmony_ci int lvblen = r->res_ls->ls_lvblen; 7398c2ecf20Sopenharmony_ci 7408c2ecf20Sopenharmony_ci if (!rsb_flag(r, RSB_NEW_MASTER2) && 7418c2ecf20Sopenharmony_ci rsb_flag(r, RSB_RECOVER_LVB_INVAL)) { 7428c2ecf20Sopenharmony_ci /* case 1 above */ 7438c2ecf20Sopenharmony_ci rsb_set_flag(r, RSB_VALNOTVALID); 7448c2ecf20Sopenharmony_ci return; 7458c2ecf20Sopenharmony_ci } 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci if (!rsb_flag(r, RSB_NEW_MASTER2)) 7488c2ecf20Sopenharmony_ci return; 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_ci /* we are the new master, so figure out if VALNOTVALID should 7518c2ecf20Sopenharmony_ci be set, and set the rsb lvb from the best lkb available. */ 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_ci list_for_each_entry(iter, &r->res_grantqueue, lkb_statequeue) { 7548c2ecf20Sopenharmony_ci if (!(iter->lkb_exflags & DLM_LKF_VALBLK)) 7558c2ecf20Sopenharmony_ci continue; 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_ci lock_lvb_exists = 1; 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_ci if (iter->lkb_grmode > DLM_LOCK_CR) { 7608c2ecf20Sopenharmony_ci big_lkb = iter; 7618c2ecf20Sopenharmony_ci goto setflag; 7628c2ecf20Sopenharmony_ci } 7638c2ecf20Sopenharmony_ci 7648c2ecf20Sopenharmony_ci if (((int)iter->lkb_lvbseq - (int)high_seq) >= 0) { 7658c2ecf20Sopenharmony_ci high_lkb = iter; 7668c2ecf20Sopenharmony_ci high_seq = iter->lkb_lvbseq; 7678c2ecf20Sopenharmony_ci } 7688c2ecf20Sopenharmony_ci } 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_ci list_for_each_entry(iter, &r->res_convertqueue, lkb_statequeue) { 7718c2ecf20Sopenharmony_ci if (!(iter->lkb_exflags & DLM_LKF_VALBLK)) 7728c2ecf20Sopenharmony_ci continue; 7738c2ecf20Sopenharmony_ci 7748c2ecf20Sopenharmony_ci lock_lvb_exists = 1; 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_ci if (iter->lkb_grmode > DLM_LOCK_CR) { 7778c2ecf20Sopenharmony_ci big_lkb = iter; 7788c2ecf20Sopenharmony_ci goto setflag; 7798c2ecf20Sopenharmony_ci } 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_ci if (((int)iter->lkb_lvbseq - (int)high_seq) >= 0) { 7828c2ecf20Sopenharmony_ci high_lkb = iter; 7838c2ecf20Sopenharmony_ci high_seq = iter->lkb_lvbseq; 7848c2ecf20Sopenharmony_ci } 7858c2ecf20Sopenharmony_ci } 7868c2ecf20Sopenharmony_ci 7878c2ecf20Sopenharmony_ci setflag: 7888c2ecf20Sopenharmony_ci if (!lock_lvb_exists) 7898c2ecf20Sopenharmony_ci goto out; 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_ci /* lvb is invalidated if only NL/CR locks remain */ 7928c2ecf20Sopenharmony_ci if (!big_lkb) 7938c2ecf20Sopenharmony_ci rsb_set_flag(r, RSB_VALNOTVALID); 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_ci if (!r->res_lvbptr) { 7968c2ecf20Sopenharmony_ci r->res_lvbptr = dlm_allocate_lvb(r->res_ls); 7978c2ecf20Sopenharmony_ci if (!r->res_lvbptr) 7988c2ecf20Sopenharmony_ci goto out; 7998c2ecf20Sopenharmony_ci } 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_ci if (big_lkb) { 8028c2ecf20Sopenharmony_ci r->res_lvbseq = big_lkb->lkb_lvbseq; 8038c2ecf20Sopenharmony_ci memcpy(r->res_lvbptr, big_lkb->lkb_lvbptr, lvblen); 8048c2ecf20Sopenharmony_ci } else if (high_lkb) { 8058c2ecf20Sopenharmony_ci r->res_lvbseq = high_lkb->lkb_lvbseq; 8068c2ecf20Sopenharmony_ci memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen); 8078c2ecf20Sopenharmony_ci } else { 8088c2ecf20Sopenharmony_ci r->res_lvbseq = 0; 8098c2ecf20Sopenharmony_ci memset(r->res_lvbptr, 0, lvblen); 8108c2ecf20Sopenharmony_ci } 8118c2ecf20Sopenharmony_ci out: 8128c2ecf20Sopenharmony_ci return; 8138c2ecf20Sopenharmony_ci} 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci/* All master rsb's flagged RECOVER_CONVERT need to be looked at. The locks 8168c2ecf20Sopenharmony_ci converting PR->CW or CW->PR need to have their lkb_grmode set. */ 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_cistatic void recover_conversion(struct dlm_rsb *r) 8198c2ecf20Sopenharmony_ci{ 8208c2ecf20Sopenharmony_ci struct dlm_ls *ls = r->res_ls; 8218c2ecf20Sopenharmony_ci struct dlm_lkb *lkb; 8228c2ecf20Sopenharmony_ci int grmode = -1; 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { 8258c2ecf20Sopenharmony_ci if (lkb->lkb_grmode == DLM_LOCK_PR || 8268c2ecf20Sopenharmony_ci lkb->lkb_grmode == DLM_LOCK_CW) { 8278c2ecf20Sopenharmony_ci grmode = lkb->lkb_grmode; 8288c2ecf20Sopenharmony_ci break; 8298c2ecf20Sopenharmony_ci } 8308c2ecf20Sopenharmony_ci } 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_ci list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) { 8338c2ecf20Sopenharmony_ci if (lkb->lkb_grmode != DLM_LOCK_IV) 8348c2ecf20Sopenharmony_ci continue; 8358c2ecf20Sopenharmony_ci if (grmode == -1) { 8368c2ecf20Sopenharmony_ci log_debug(ls, "recover_conversion %x set gr to rq %d", 8378c2ecf20Sopenharmony_ci lkb->lkb_id, lkb->lkb_rqmode); 8388c2ecf20Sopenharmony_ci lkb->lkb_grmode = lkb->lkb_rqmode; 8398c2ecf20Sopenharmony_ci } else { 8408c2ecf20Sopenharmony_ci log_debug(ls, "recover_conversion %x set gr %d", 8418c2ecf20Sopenharmony_ci lkb->lkb_id, grmode); 8428c2ecf20Sopenharmony_ci lkb->lkb_grmode = grmode; 8438c2ecf20Sopenharmony_ci } 8448c2ecf20Sopenharmony_ci } 8458c2ecf20Sopenharmony_ci} 8468c2ecf20Sopenharmony_ci 8478c2ecf20Sopenharmony_ci/* We've become the new master for this rsb and waiting/converting locks may 8488c2ecf20Sopenharmony_ci need to be granted in dlm_recover_grant() due to locks that may have 8498c2ecf20Sopenharmony_ci existed from a removed node. */ 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_cistatic void recover_grant(struct dlm_rsb *r) 8528c2ecf20Sopenharmony_ci{ 8538c2ecf20Sopenharmony_ci if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue)) 8548c2ecf20Sopenharmony_ci rsb_set_flag(r, RSB_RECOVER_GRANT); 8558c2ecf20Sopenharmony_ci} 8568c2ecf20Sopenharmony_ci 8578c2ecf20Sopenharmony_civoid dlm_recover_rsbs(struct dlm_ls *ls) 8588c2ecf20Sopenharmony_ci{ 8598c2ecf20Sopenharmony_ci struct dlm_rsb *r; 8608c2ecf20Sopenharmony_ci unsigned int count = 0; 8618c2ecf20Sopenharmony_ci 8628c2ecf20Sopenharmony_ci down_read(&ls->ls_root_sem); 8638c2ecf20Sopenharmony_ci list_for_each_entry(r, &ls->ls_root_list, res_root_list) { 8648c2ecf20Sopenharmony_ci lock_rsb(r); 8658c2ecf20Sopenharmony_ci if (is_master(r)) { 8668c2ecf20Sopenharmony_ci if (rsb_flag(r, RSB_RECOVER_CONVERT)) 8678c2ecf20Sopenharmony_ci recover_conversion(r); 8688c2ecf20Sopenharmony_ci 8698c2ecf20Sopenharmony_ci /* recover lvb before granting locks so the updated 8708c2ecf20Sopenharmony_ci lvb/VALNOTVALID is presented in the completion */ 8718c2ecf20Sopenharmony_ci recover_lvb(r); 8728c2ecf20Sopenharmony_ci 8738c2ecf20Sopenharmony_ci if (rsb_flag(r, RSB_NEW_MASTER2)) 8748c2ecf20Sopenharmony_ci recover_grant(r); 8758c2ecf20Sopenharmony_ci count++; 8768c2ecf20Sopenharmony_ci } else { 8778c2ecf20Sopenharmony_ci rsb_clear_flag(r, RSB_VALNOTVALID); 8788c2ecf20Sopenharmony_ci } 8798c2ecf20Sopenharmony_ci rsb_clear_flag(r, RSB_RECOVER_CONVERT); 8808c2ecf20Sopenharmony_ci rsb_clear_flag(r, RSB_RECOVER_LVB_INVAL); 8818c2ecf20Sopenharmony_ci rsb_clear_flag(r, RSB_NEW_MASTER2); 8828c2ecf20Sopenharmony_ci unlock_rsb(r); 8838c2ecf20Sopenharmony_ci } 8848c2ecf20Sopenharmony_ci up_read(&ls->ls_root_sem); 8858c2ecf20Sopenharmony_ci 8868c2ecf20Sopenharmony_ci if (count) 8878c2ecf20Sopenharmony_ci log_rinfo(ls, "dlm_recover_rsbs %d done", count); 8888c2ecf20Sopenharmony_ci} 8898c2ecf20Sopenharmony_ci 8908c2ecf20Sopenharmony_ci/* Create a single list of all root rsb's to be used during recovery */ 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_ciint dlm_create_root_list(struct dlm_ls *ls) 8938c2ecf20Sopenharmony_ci{ 8948c2ecf20Sopenharmony_ci struct rb_node *n; 8958c2ecf20Sopenharmony_ci struct dlm_rsb *r; 8968c2ecf20Sopenharmony_ci int i, error = 0; 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci down_write(&ls->ls_root_sem); 8998c2ecf20Sopenharmony_ci if (!list_empty(&ls->ls_root_list)) { 9008c2ecf20Sopenharmony_ci log_error(ls, "root list not empty"); 9018c2ecf20Sopenharmony_ci error = -EINVAL; 9028c2ecf20Sopenharmony_ci goto out; 9038c2ecf20Sopenharmony_ci } 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci for (i = 0; i < ls->ls_rsbtbl_size; i++) { 9068c2ecf20Sopenharmony_ci spin_lock(&ls->ls_rsbtbl[i].lock); 9078c2ecf20Sopenharmony_ci for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) { 9088c2ecf20Sopenharmony_ci r = rb_entry(n, struct dlm_rsb, res_hashnode); 9098c2ecf20Sopenharmony_ci list_add(&r->res_root_list, &ls->ls_root_list); 9108c2ecf20Sopenharmony_ci dlm_hold_rsb(r); 9118c2ecf20Sopenharmony_ci } 9128c2ecf20Sopenharmony_ci 9138c2ecf20Sopenharmony_ci if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[i].toss)) 9148c2ecf20Sopenharmony_ci log_error(ls, "dlm_create_root_list toss not empty"); 9158c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_rsbtbl[i].lock); 9168c2ecf20Sopenharmony_ci } 9178c2ecf20Sopenharmony_ci out: 9188c2ecf20Sopenharmony_ci up_write(&ls->ls_root_sem); 9198c2ecf20Sopenharmony_ci return error; 9208c2ecf20Sopenharmony_ci} 9218c2ecf20Sopenharmony_ci 9228c2ecf20Sopenharmony_civoid dlm_release_root_list(struct dlm_ls *ls) 9238c2ecf20Sopenharmony_ci{ 9248c2ecf20Sopenharmony_ci struct dlm_rsb *r, *safe; 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ci down_write(&ls->ls_root_sem); 9278c2ecf20Sopenharmony_ci list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) { 9288c2ecf20Sopenharmony_ci list_del_init(&r->res_root_list); 9298c2ecf20Sopenharmony_ci dlm_put_rsb(r); 9308c2ecf20Sopenharmony_ci } 9318c2ecf20Sopenharmony_ci up_write(&ls->ls_root_sem); 9328c2ecf20Sopenharmony_ci} 9338c2ecf20Sopenharmony_ci 9348c2ecf20Sopenharmony_civoid dlm_clear_toss(struct dlm_ls *ls) 9358c2ecf20Sopenharmony_ci{ 9368c2ecf20Sopenharmony_ci struct rb_node *n, *next; 9378c2ecf20Sopenharmony_ci struct dlm_rsb *r; 9388c2ecf20Sopenharmony_ci unsigned int count = 0; 9398c2ecf20Sopenharmony_ci int i; 9408c2ecf20Sopenharmony_ci 9418c2ecf20Sopenharmony_ci for (i = 0; i < ls->ls_rsbtbl_size; i++) { 9428c2ecf20Sopenharmony_ci spin_lock(&ls->ls_rsbtbl[i].lock); 9438c2ecf20Sopenharmony_ci for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) { 9448c2ecf20Sopenharmony_ci next = rb_next(n); 9458c2ecf20Sopenharmony_ci r = rb_entry(n, struct dlm_rsb, res_hashnode); 9468c2ecf20Sopenharmony_ci rb_erase(n, &ls->ls_rsbtbl[i].toss); 9478c2ecf20Sopenharmony_ci dlm_free_rsb(r); 9488c2ecf20Sopenharmony_ci count++; 9498c2ecf20Sopenharmony_ci } 9508c2ecf20Sopenharmony_ci spin_unlock(&ls->ls_rsbtbl[i].lock); 9518c2ecf20Sopenharmony_ci } 9528c2ecf20Sopenharmony_ci 9538c2ecf20Sopenharmony_ci if (count) 9548c2ecf20Sopenharmony_ci log_rinfo(ls, "dlm_clear_toss %u done", count); 9558c2ecf20Sopenharmony_ci} 9568c2ecf20Sopenharmony_ci 957