18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*- 38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0: 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * dlmconvert.c 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * underlying calls for lock conversion 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Copyright (C) 2004 Oracle. All rights reserved. 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#include <linux/module.h> 148c2ecf20Sopenharmony_ci#include <linux/fs.h> 158c2ecf20Sopenharmony_ci#include <linux/types.h> 168c2ecf20Sopenharmony_ci#include <linux/highmem.h> 178c2ecf20Sopenharmony_ci#include <linux/init.h> 188c2ecf20Sopenharmony_ci#include <linux/sysctl.h> 198c2ecf20Sopenharmony_ci#include <linux/random.h> 208c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 218c2ecf20Sopenharmony_ci#include <linux/socket.h> 228c2ecf20Sopenharmony_ci#include <linux/inet.h> 238c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci#include "../cluster/heartbeat.h" 278c2ecf20Sopenharmony_ci#include "../cluster/nodemanager.h" 288c2ecf20Sopenharmony_ci#include "../cluster/tcp.h" 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci#include "dlmapi.h" 318c2ecf20Sopenharmony_ci#include "dlmcommon.h" 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci#include "dlmconvert.h" 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci#define MLOG_MASK_PREFIX ML_DLM 368c2ecf20Sopenharmony_ci#include "../cluster/masklog.h" 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci/* NOTE: __dlmconvert_master is the only function in here that 398c2ecf20Sopenharmony_ci * needs a spinlock held on entry (res->spinlock) and it is the 408c2ecf20Sopenharmony_ci * only one that holds a lock on exit (res->spinlock). 418c2ecf20Sopenharmony_ci * All other functions in here need no locks and drop all of 428c2ecf20Sopenharmony_ci * the locks that they acquire. */ 438c2ecf20Sopenharmony_cistatic enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, 448c2ecf20Sopenharmony_ci struct dlm_lock_resource *res, 458c2ecf20Sopenharmony_ci struct dlm_lock *lock, int flags, 468c2ecf20Sopenharmony_ci int type, int *call_ast, 478c2ecf20Sopenharmony_ci int *kick_thread); 488c2ecf20Sopenharmony_cistatic enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, 498c2ecf20Sopenharmony_ci struct dlm_lock_resource *res, 508c2ecf20Sopenharmony_ci struct dlm_lock *lock, int flags, int type); 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci/* 538c2ecf20Sopenharmony_ci * this is only called directly by dlmlock(), and only when the 548c2ecf20Sopenharmony_ci * local node is the owner of the lockres 558c2ecf20Sopenharmony_ci * locking: 568c2ecf20Sopenharmony_ci * caller needs: none 578c2ecf20Sopenharmony_ci * taken: takes and drops res->spinlock 588c2ecf20Sopenharmony_ci * held on exit: none 598c2ecf20Sopenharmony_ci * returns: see __dlmconvert_master 608c2ecf20Sopenharmony_ci */ 618c2ecf20Sopenharmony_cienum dlm_status dlmconvert_master(struct dlm_ctxt *dlm, 628c2ecf20Sopenharmony_ci struct dlm_lock_resource *res, 638c2ecf20Sopenharmony_ci struct dlm_lock *lock, int flags, int type) 648c2ecf20Sopenharmony_ci{ 658c2ecf20Sopenharmony_ci int call_ast = 0, kick_thread = 0; 668c2ecf20Sopenharmony_ci enum dlm_status status; 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 698c2ecf20Sopenharmony_ci /* we are not in a network handler, this is fine */ 708c2ecf20Sopenharmony_ci __dlm_wait_on_lockres(res); 718c2ecf20Sopenharmony_ci __dlm_lockres_reserve_ast(res); 728c2ecf20Sopenharmony_ci res->state |= DLM_LOCK_RES_IN_PROGRESS; 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci status = __dlmconvert_master(dlm, res, lock, flags, type, 758c2ecf20Sopenharmony_ci &call_ast, &kick_thread); 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 788c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 798c2ecf20Sopenharmony_ci wake_up(&res->wq); 808c2ecf20Sopenharmony_ci if (status != DLM_NORMAL && status != DLM_NOTQUEUED) 818c2ecf20Sopenharmony_ci dlm_error(status); 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci /* either queue the ast or release it */ 848c2ecf20Sopenharmony_ci if (call_ast) 858c2ecf20Sopenharmony_ci dlm_queue_ast(dlm, lock); 868c2ecf20Sopenharmony_ci else 878c2ecf20Sopenharmony_ci dlm_lockres_release_ast(dlm, res); 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci if (kick_thread) 908c2ecf20Sopenharmony_ci dlm_kick_thread(dlm, res); 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci return status; 938c2ecf20Sopenharmony_ci} 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci/* performs lock conversion at the lockres master site 968c2ecf20Sopenharmony_ci * locking: 978c2ecf20Sopenharmony_ci * caller needs: res->spinlock 988c2ecf20Sopenharmony_ci * taken: takes and drops lock->spinlock 998c2ecf20Sopenharmony_ci * held on exit: res->spinlock 1008c2ecf20Sopenharmony_ci * returns: DLM_NORMAL, DLM_NOTQUEUED, DLM_DENIED 1018c2ecf20Sopenharmony_ci * call_ast: whether ast should be called for this lock 1028c2ecf20Sopenharmony_ci * kick_thread: whether dlm_kick_thread should be called 1038c2ecf20Sopenharmony_ci */ 1048c2ecf20Sopenharmony_cistatic enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, 1058c2ecf20Sopenharmony_ci struct dlm_lock_resource *res, 1068c2ecf20Sopenharmony_ci struct dlm_lock *lock, int flags, 1078c2ecf20Sopenharmony_ci int type, int *call_ast, 1088c2ecf20Sopenharmony_ci int *kick_thread) 1098c2ecf20Sopenharmony_ci{ 1108c2ecf20Sopenharmony_ci enum dlm_status status = DLM_NORMAL; 1118c2ecf20Sopenharmony_ci struct dlm_lock *tmplock=NULL; 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci assert_spin_locked(&res->spinlock); 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci mlog(0, "type=%d, convert_type=%d, new convert_type=%d\n", 1168c2ecf20Sopenharmony_ci lock->ml.type, lock->ml.convert_type, type); 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci spin_lock(&lock->spinlock); 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci /* already converting? */ 1218c2ecf20Sopenharmony_ci if (lock->ml.convert_type != LKM_IVMODE) { 1228c2ecf20Sopenharmony_ci mlog(ML_ERROR, "attempted to convert a lock with a lock " 1238c2ecf20Sopenharmony_ci "conversion pending\n"); 1248c2ecf20Sopenharmony_ci status = DLM_DENIED; 1258c2ecf20Sopenharmony_ci goto unlock_exit; 1268c2ecf20Sopenharmony_ci } 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci /* must be on grant queue to convert */ 1298c2ecf20Sopenharmony_ci if (!dlm_lock_on_list(&res->granted, lock)) { 1308c2ecf20Sopenharmony_ci mlog(ML_ERROR, "attempted to convert a lock not on grant " 1318c2ecf20Sopenharmony_ci "queue\n"); 1328c2ecf20Sopenharmony_ci status = DLM_DENIED; 1338c2ecf20Sopenharmony_ci goto unlock_exit; 1348c2ecf20Sopenharmony_ci } 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci if (flags & LKM_VALBLK) { 1378c2ecf20Sopenharmony_ci switch (lock->ml.type) { 1388c2ecf20Sopenharmony_ci case LKM_EXMODE: 1398c2ecf20Sopenharmony_ci /* EX + LKM_VALBLK + convert == set lvb */ 1408c2ecf20Sopenharmony_ci mlog(0, "will set lvb: converting %s->%s\n", 1418c2ecf20Sopenharmony_ci dlm_lock_mode_name(lock->ml.type), 1428c2ecf20Sopenharmony_ci dlm_lock_mode_name(type)); 1438c2ecf20Sopenharmony_ci lock->lksb->flags |= DLM_LKSB_PUT_LVB; 1448c2ecf20Sopenharmony_ci break; 1458c2ecf20Sopenharmony_ci case LKM_PRMODE: 1468c2ecf20Sopenharmony_ci case LKM_NLMODE: 1478c2ecf20Sopenharmony_ci /* refetch if new level is not NL */ 1488c2ecf20Sopenharmony_ci if (type > LKM_NLMODE) { 1498c2ecf20Sopenharmony_ci mlog(0, "will fetch new value into " 1508c2ecf20Sopenharmony_ci "lvb: converting %s->%s\n", 1518c2ecf20Sopenharmony_ci dlm_lock_mode_name(lock->ml.type), 1528c2ecf20Sopenharmony_ci dlm_lock_mode_name(type)); 1538c2ecf20Sopenharmony_ci lock->lksb->flags |= DLM_LKSB_GET_LVB; 1548c2ecf20Sopenharmony_ci } else { 1558c2ecf20Sopenharmony_ci mlog(0, "will NOT fetch new value " 1568c2ecf20Sopenharmony_ci "into lvb: converting %s->%s\n", 1578c2ecf20Sopenharmony_ci dlm_lock_mode_name(lock->ml.type), 1588c2ecf20Sopenharmony_ci dlm_lock_mode_name(type)); 1598c2ecf20Sopenharmony_ci flags &= ~(LKM_VALBLK); 1608c2ecf20Sopenharmony_ci } 1618c2ecf20Sopenharmony_ci break; 1628c2ecf20Sopenharmony_ci } 1638c2ecf20Sopenharmony_ci } 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci /* in-place downconvert? */ 1678c2ecf20Sopenharmony_ci if (type <= lock->ml.type) 1688c2ecf20Sopenharmony_ci goto grant; 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci /* upconvert from here on */ 1718c2ecf20Sopenharmony_ci status = DLM_NORMAL; 1728c2ecf20Sopenharmony_ci list_for_each_entry(tmplock, &res->granted, list) { 1738c2ecf20Sopenharmony_ci if (tmplock == lock) 1748c2ecf20Sopenharmony_ci continue; 1758c2ecf20Sopenharmony_ci if (!dlm_lock_compatible(tmplock->ml.type, type)) 1768c2ecf20Sopenharmony_ci goto switch_queues; 1778c2ecf20Sopenharmony_ci } 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci list_for_each_entry(tmplock, &res->converting, list) { 1808c2ecf20Sopenharmony_ci if (!dlm_lock_compatible(tmplock->ml.type, type)) 1818c2ecf20Sopenharmony_ci goto switch_queues; 1828c2ecf20Sopenharmony_ci /* existing conversion requests take precedence */ 1838c2ecf20Sopenharmony_ci if (!dlm_lock_compatible(tmplock->ml.convert_type, type)) 1848c2ecf20Sopenharmony_ci goto switch_queues; 1858c2ecf20Sopenharmony_ci } 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci /* fall thru to grant */ 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_cigrant: 1908c2ecf20Sopenharmony_ci mlog(0, "res %.*s, granting %s lock\n", res->lockname.len, 1918c2ecf20Sopenharmony_ci res->lockname.name, dlm_lock_mode_name(type)); 1928c2ecf20Sopenharmony_ci /* immediately grant the new lock type */ 1938c2ecf20Sopenharmony_ci lock->lksb->status = DLM_NORMAL; 1948c2ecf20Sopenharmony_ci if (lock->ml.node == dlm->node_num) 1958c2ecf20Sopenharmony_ci mlog(0, "doing in-place convert for nonlocal lock\n"); 1968c2ecf20Sopenharmony_ci lock->ml.type = type; 1978c2ecf20Sopenharmony_ci if (lock->lksb->flags & DLM_LKSB_PUT_LVB) 1988c2ecf20Sopenharmony_ci memcpy(res->lvb, lock->lksb->lvb, DLM_LVB_LEN); 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci /* 2018c2ecf20Sopenharmony_ci * Move the lock to the tail because it may be the only lock which has 2028c2ecf20Sopenharmony_ci * an invalid lvb. 2038c2ecf20Sopenharmony_ci */ 2048c2ecf20Sopenharmony_ci list_move_tail(&lock->list, &res->granted); 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci status = DLM_NORMAL; 2078c2ecf20Sopenharmony_ci *call_ast = 1; 2088c2ecf20Sopenharmony_ci goto unlock_exit; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ciswitch_queues: 2118c2ecf20Sopenharmony_ci if (flags & LKM_NOQUEUE) { 2128c2ecf20Sopenharmony_ci mlog(0, "failed to convert NOQUEUE lock %.*s from " 2138c2ecf20Sopenharmony_ci "%d to %d...\n", res->lockname.len, res->lockname.name, 2148c2ecf20Sopenharmony_ci lock->ml.type, type); 2158c2ecf20Sopenharmony_ci status = DLM_NOTQUEUED; 2168c2ecf20Sopenharmony_ci goto unlock_exit; 2178c2ecf20Sopenharmony_ci } 2188c2ecf20Sopenharmony_ci mlog(0, "res %.*s, queueing...\n", res->lockname.len, 2198c2ecf20Sopenharmony_ci res->lockname.name); 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci lock->ml.convert_type = type; 2228c2ecf20Sopenharmony_ci /* do not alter lock refcount. switching lists. */ 2238c2ecf20Sopenharmony_ci list_move_tail(&lock->list, &res->converting); 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ciunlock_exit: 2268c2ecf20Sopenharmony_ci spin_unlock(&lock->spinlock); 2278c2ecf20Sopenharmony_ci if (status == DLM_DENIED) { 2288c2ecf20Sopenharmony_ci __dlm_print_one_lock_resource(res); 2298c2ecf20Sopenharmony_ci } 2308c2ecf20Sopenharmony_ci if (status == DLM_NORMAL) 2318c2ecf20Sopenharmony_ci *kick_thread = 1; 2328c2ecf20Sopenharmony_ci return status; 2338c2ecf20Sopenharmony_ci} 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_civoid dlm_revert_pending_convert(struct dlm_lock_resource *res, 2368c2ecf20Sopenharmony_ci struct dlm_lock *lock) 2378c2ecf20Sopenharmony_ci{ 2388c2ecf20Sopenharmony_ci /* do not alter lock refcount. switching lists. */ 2398c2ecf20Sopenharmony_ci list_move_tail(&lock->list, &res->granted); 2408c2ecf20Sopenharmony_ci lock->ml.convert_type = LKM_IVMODE; 2418c2ecf20Sopenharmony_ci lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB); 2428c2ecf20Sopenharmony_ci} 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci/* messages the master site to do lock conversion 2458c2ecf20Sopenharmony_ci * locking: 2468c2ecf20Sopenharmony_ci * caller needs: none 2478c2ecf20Sopenharmony_ci * taken: takes and drops res->spinlock, uses DLM_LOCK_RES_IN_PROGRESS 2488c2ecf20Sopenharmony_ci * held on exit: none 2498c2ecf20Sopenharmony_ci * returns: DLM_NORMAL, DLM_RECOVERING, status from remote node 2508c2ecf20Sopenharmony_ci */ 2518c2ecf20Sopenharmony_cienum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, 2528c2ecf20Sopenharmony_ci struct dlm_lock_resource *res, 2538c2ecf20Sopenharmony_ci struct dlm_lock *lock, int flags, int type) 2548c2ecf20Sopenharmony_ci{ 2558c2ecf20Sopenharmony_ci enum dlm_status status; 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, 2588c2ecf20Sopenharmony_ci lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 2618c2ecf20Sopenharmony_ci if (res->state & DLM_LOCK_RES_RECOVERING) { 2628c2ecf20Sopenharmony_ci mlog(0, "bailing out early since res is RECOVERING " 2638c2ecf20Sopenharmony_ci "on secondary queue\n"); 2648c2ecf20Sopenharmony_ci /* __dlm_print_one_lock_resource(res); */ 2658c2ecf20Sopenharmony_ci status = DLM_RECOVERING; 2668c2ecf20Sopenharmony_ci goto bail; 2678c2ecf20Sopenharmony_ci } 2688c2ecf20Sopenharmony_ci /* will exit this call with spinlock held */ 2698c2ecf20Sopenharmony_ci __dlm_wait_on_lockres(res); 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci if (lock->ml.convert_type != LKM_IVMODE) { 2728c2ecf20Sopenharmony_ci __dlm_print_one_lock_resource(res); 2738c2ecf20Sopenharmony_ci mlog(ML_ERROR, "converting a remote lock that is already " 2748c2ecf20Sopenharmony_ci "converting! (cookie=%u:%llu, conv=%d)\n", 2758c2ecf20Sopenharmony_ci dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), 2768c2ecf20Sopenharmony_ci dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), 2778c2ecf20Sopenharmony_ci lock->ml.convert_type); 2788c2ecf20Sopenharmony_ci status = DLM_DENIED; 2798c2ecf20Sopenharmony_ci goto bail; 2808c2ecf20Sopenharmony_ci } 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci if (lock->ml.type == type && lock->ml.convert_type == LKM_IVMODE) { 2838c2ecf20Sopenharmony_ci mlog(0, "last convert request returned DLM_RECOVERING, but " 2848c2ecf20Sopenharmony_ci "owner has already queued and sent ast to me. res %.*s, " 2858c2ecf20Sopenharmony_ci "(cookie=%u:%llu, type=%d, conv=%d)\n", 2868c2ecf20Sopenharmony_ci res->lockname.len, res->lockname.name, 2878c2ecf20Sopenharmony_ci dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), 2888c2ecf20Sopenharmony_ci dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), 2898c2ecf20Sopenharmony_ci lock->ml.type, lock->ml.convert_type); 2908c2ecf20Sopenharmony_ci status = DLM_NORMAL; 2918c2ecf20Sopenharmony_ci goto bail; 2928c2ecf20Sopenharmony_ci } 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci res->state |= DLM_LOCK_RES_IN_PROGRESS; 2958c2ecf20Sopenharmony_ci /* move lock to local convert queue */ 2968c2ecf20Sopenharmony_ci /* do not alter lock refcount. switching lists. */ 2978c2ecf20Sopenharmony_ci list_move_tail(&lock->list, &res->converting); 2988c2ecf20Sopenharmony_ci lock->convert_pending = 1; 2998c2ecf20Sopenharmony_ci lock->ml.convert_type = type; 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci if (flags & LKM_VALBLK) { 3028c2ecf20Sopenharmony_ci if (lock->ml.type == LKM_EXMODE) { 3038c2ecf20Sopenharmony_ci flags |= LKM_PUT_LVB; 3048c2ecf20Sopenharmony_ci lock->lksb->flags |= DLM_LKSB_PUT_LVB; 3058c2ecf20Sopenharmony_ci } else { 3068c2ecf20Sopenharmony_ci if (lock->ml.convert_type == LKM_NLMODE) 3078c2ecf20Sopenharmony_ci flags &= ~LKM_VALBLK; 3088c2ecf20Sopenharmony_ci else { 3098c2ecf20Sopenharmony_ci flags |= LKM_GET_LVB; 3108c2ecf20Sopenharmony_ci lock->lksb->flags |= DLM_LKSB_GET_LVB; 3118c2ecf20Sopenharmony_ci } 3128c2ecf20Sopenharmony_ci } 3138c2ecf20Sopenharmony_ci } 3148c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci /* no locks held here. 3178c2ecf20Sopenharmony_ci * need to wait for a reply as to whether it got queued or not. */ 3188c2ecf20Sopenharmony_ci status = dlm_send_remote_convert_request(dlm, res, lock, flags, type); 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 3218c2ecf20Sopenharmony_ci res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 3228c2ecf20Sopenharmony_ci /* if it failed, move it back to granted queue. 3238c2ecf20Sopenharmony_ci * if master returns DLM_NORMAL and then down before sending ast, 3248c2ecf20Sopenharmony_ci * it may have already been moved to granted queue, reset to 3258c2ecf20Sopenharmony_ci * DLM_RECOVERING and retry convert */ 3268c2ecf20Sopenharmony_ci if (status != DLM_NORMAL) { 3278c2ecf20Sopenharmony_ci if (status != DLM_NOTQUEUED) 3288c2ecf20Sopenharmony_ci dlm_error(status); 3298c2ecf20Sopenharmony_ci dlm_revert_pending_convert(res, lock); 3308c2ecf20Sopenharmony_ci } else if (!lock->convert_pending) { 3318c2ecf20Sopenharmony_ci mlog(0, "%s: res %.*s, owner died and lock has been moved back " 3328c2ecf20Sopenharmony_ci "to granted list, retry convert.\n", 3338c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name); 3348c2ecf20Sopenharmony_ci status = DLM_RECOVERING; 3358c2ecf20Sopenharmony_ci } 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci lock->convert_pending = 0; 3388c2ecf20Sopenharmony_cibail: 3398c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci /* TODO: should this be a wake_one? */ 3428c2ecf20Sopenharmony_ci /* wake up any IN_PROGRESS waiters */ 3438c2ecf20Sopenharmony_ci wake_up(&res->wq); 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci return status; 3468c2ecf20Sopenharmony_ci} 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci/* sends DLM_CONVERT_LOCK_MSG to master site 3498c2ecf20Sopenharmony_ci * locking: 3508c2ecf20Sopenharmony_ci * caller needs: none 3518c2ecf20Sopenharmony_ci * taken: none 3528c2ecf20Sopenharmony_ci * held on exit: none 3538c2ecf20Sopenharmony_ci * returns: DLM_NOLOCKMGR, status from remote node 3548c2ecf20Sopenharmony_ci */ 3558c2ecf20Sopenharmony_cistatic enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, 3568c2ecf20Sopenharmony_ci struct dlm_lock_resource *res, 3578c2ecf20Sopenharmony_ci struct dlm_lock *lock, int flags, int type) 3588c2ecf20Sopenharmony_ci{ 3598c2ecf20Sopenharmony_ci struct dlm_convert_lock convert; 3608c2ecf20Sopenharmony_ci int tmpret; 3618c2ecf20Sopenharmony_ci enum dlm_status ret; 3628c2ecf20Sopenharmony_ci int status = 0; 3638c2ecf20Sopenharmony_ci struct kvec vec[2]; 3648c2ecf20Sopenharmony_ci size_t veclen = 1; 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci mlog(0, "%.*s\n", res->lockname.len, res->lockname.name); 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci memset(&convert, 0, sizeof(struct dlm_convert_lock)); 3698c2ecf20Sopenharmony_ci convert.node_idx = dlm->node_num; 3708c2ecf20Sopenharmony_ci convert.requested_type = type; 3718c2ecf20Sopenharmony_ci convert.cookie = lock->ml.cookie; 3728c2ecf20Sopenharmony_ci convert.namelen = res->lockname.len; 3738c2ecf20Sopenharmony_ci convert.flags = cpu_to_be32(flags); 3748c2ecf20Sopenharmony_ci memcpy(convert.name, res->lockname.name, convert.namelen); 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci vec[0].iov_len = sizeof(struct dlm_convert_lock); 3778c2ecf20Sopenharmony_ci vec[0].iov_base = &convert; 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci if (flags & LKM_PUT_LVB) { 3808c2ecf20Sopenharmony_ci /* extra data to send if we are updating lvb */ 3818c2ecf20Sopenharmony_ci vec[1].iov_len = DLM_LVB_LEN; 3828c2ecf20Sopenharmony_ci vec[1].iov_base = lock->lksb->lvb; 3838c2ecf20Sopenharmony_ci veclen++; 3848c2ecf20Sopenharmony_ci } 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci tmpret = o2net_send_message_vec(DLM_CONVERT_LOCK_MSG, dlm->key, 3878c2ecf20Sopenharmony_ci vec, veclen, res->owner, &status); 3888c2ecf20Sopenharmony_ci if (tmpret >= 0) { 3898c2ecf20Sopenharmony_ci // successfully sent and received 3908c2ecf20Sopenharmony_ci ret = status; // this is already a dlm_status 3918c2ecf20Sopenharmony_ci if (ret == DLM_RECOVERING) { 3928c2ecf20Sopenharmony_ci mlog(0, "node %u returned DLM_RECOVERING from convert " 3938c2ecf20Sopenharmony_ci "message!\n", res->owner); 3948c2ecf20Sopenharmony_ci } else if (ret == DLM_MIGRATING) { 3958c2ecf20Sopenharmony_ci mlog(0, "node %u returned DLM_MIGRATING from convert " 3968c2ecf20Sopenharmony_ci "message!\n", res->owner); 3978c2ecf20Sopenharmony_ci } else if (ret == DLM_FORWARD) { 3988c2ecf20Sopenharmony_ci mlog(0, "node %u returned DLM_FORWARD from convert " 3998c2ecf20Sopenharmony_ci "message!\n", res->owner); 4008c2ecf20Sopenharmony_ci } else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED) 4018c2ecf20Sopenharmony_ci dlm_error(ret); 4028c2ecf20Sopenharmony_ci } else { 4038c2ecf20Sopenharmony_ci mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " 4048c2ecf20Sopenharmony_ci "node %u\n", tmpret, DLM_CONVERT_LOCK_MSG, dlm->key, 4058c2ecf20Sopenharmony_ci res->owner); 4068c2ecf20Sopenharmony_ci if (dlm_is_host_down(tmpret)) { 4078c2ecf20Sopenharmony_ci /* instead of logging the same network error over 4088c2ecf20Sopenharmony_ci * and over, sleep here and wait for the heartbeat 4098c2ecf20Sopenharmony_ci * to notice the node is dead. times out after 5s. */ 4108c2ecf20Sopenharmony_ci dlm_wait_for_node_death(dlm, res->owner, 4118c2ecf20Sopenharmony_ci DLM_NODE_DEATH_WAIT_MAX); 4128c2ecf20Sopenharmony_ci ret = DLM_RECOVERING; 4138c2ecf20Sopenharmony_ci mlog(0, "node %u died so returning DLM_RECOVERING " 4148c2ecf20Sopenharmony_ci "from convert message!\n", res->owner); 4158c2ecf20Sopenharmony_ci } else { 4168c2ecf20Sopenharmony_ci ret = dlm_err_to_dlm_status(tmpret); 4178c2ecf20Sopenharmony_ci } 4188c2ecf20Sopenharmony_ci } 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci return ret; 4218c2ecf20Sopenharmony_ci} 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci/* handler for DLM_CONVERT_LOCK_MSG on master site 4248c2ecf20Sopenharmony_ci * locking: 4258c2ecf20Sopenharmony_ci * caller needs: none 4268c2ecf20Sopenharmony_ci * taken: takes and drop res->spinlock 4278c2ecf20Sopenharmony_ci * held on exit: none 4288c2ecf20Sopenharmony_ci * returns: DLM_NORMAL, DLM_IVLOCKID, DLM_BADARGS, 4298c2ecf20Sopenharmony_ci * status from __dlmconvert_master 4308c2ecf20Sopenharmony_ci */ 4318c2ecf20Sopenharmony_ciint dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, 4328c2ecf20Sopenharmony_ci void **ret_data) 4338c2ecf20Sopenharmony_ci{ 4348c2ecf20Sopenharmony_ci struct dlm_ctxt *dlm = data; 4358c2ecf20Sopenharmony_ci struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; 4368c2ecf20Sopenharmony_ci struct dlm_lock_resource *res = NULL; 4378c2ecf20Sopenharmony_ci struct dlm_lock *lock = NULL; 4388c2ecf20Sopenharmony_ci struct dlm_lock *tmp_lock; 4398c2ecf20Sopenharmony_ci struct dlm_lockstatus *lksb; 4408c2ecf20Sopenharmony_ci enum dlm_status status = DLM_NORMAL; 4418c2ecf20Sopenharmony_ci u32 flags; 4428c2ecf20Sopenharmony_ci int call_ast = 0, kick_thread = 0, ast_reserved = 0, wake = 0; 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci if (!dlm_grab(dlm)) { 4458c2ecf20Sopenharmony_ci dlm_error(DLM_REJECTED); 4468c2ecf20Sopenharmony_ci return DLM_REJECTED; 4478c2ecf20Sopenharmony_ci } 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ci mlog_bug_on_msg(!dlm_domain_fully_joined(dlm), 4508c2ecf20Sopenharmony_ci "Domain %s not fully joined!\n", dlm->name); 4518c2ecf20Sopenharmony_ci 4528c2ecf20Sopenharmony_ci if (cnv->namelen > DLM_LOCKID_NAME_MAX) { 4538c2ecf20Sopenharmony_ci status = DLM_IVBUFLEN; 4548c2ecf20Sopenharmony_ci dlm_error(status); 4558c2ecf20Sopenharmony_ci goto leave; 4568c2ecf20Sopenharmony_ci } 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci flags = be32_to_cpu(cnv->flags); 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_ci if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) == 4618c2ecf20Sopenharmony_ci (LKM_PUT_LVB|LKM_GET_LVB)) { 4628c2ecf20Sopenharmony_ci mlog(ML_ERROR, "both PUT and GET lvb specified\n"); 4638c2ecf20Sopenharmony_ci status = DLM_BADARGS; 4648c2ecf20Sopenharmony_ci goto leave; 4658c2ecf20Sopenharmony_ci } 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_ci mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" : 4688c2ecf20Sopenharmony_ci (flags & LKM_GET_LVB ? "get lvb" : "none")); 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci status = DLM_IVLOCKID; 4718c2ecf20Sopenharmony_ci res = dlm_lookup_lockres(dlm, cnv->name, cnv->namelen); 4728c2ecf20Sopenharmony_ci if (!res) { 4738c2ecf20Sopenharmony_ci dlm_error(status); 4748c2ecf20Sopenharmony_ci goto leave; 4758c2ecf20Sopenharmony_ci } 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 4788c2ecf20Sopenharmony_ci status = __dlm_lockres_state_to_status(res); 4798c2ecf20Sopenharmony_ci if (status != DLM_NORMAL) { 4808c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 4818c2ecf20Sopenharmony_ci dlm_error(status); 4828c2ecf20Sopenharmony_ci goto leave; 4838c2ecf20Sopenharmony_ci } 4848c2ecf20Sopenharmony_ci list_for_each_entry(tmp_lock, &res->granted, list) { 4858c2ecf20Sopenharmony_ci if (tmp_lock->ml.cookie == cnv->cookie && 4868c2ecf20Sopenharmony_ci tmp_lock->ml.node == cnv->node_idx) { 4878c2ecf20Sopenharmony_ci lock = tmp_lock; 4888c2ecf20Sopenharmony_ci dlm_lock_get(lock); 4898c2ecf20Sopenharmony_ci break; 4908c2ecf20Sopenharmony_ci } 4918c2ecf20Sopenharmony_ci } 4928c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 4938c2ecf20Sopenharmony_ci if (!lock) { 4948c2ecf20Sopenharmony_ci status = DLM_IVLOCKID; 4958c2ecf20Sopenharmony_ci mlog(ML_ERROR, "did not find lock to convert on grant queue! " 4968c2ecf20Sopenharmony_ci "cookie=%u:%llu\n", 4978c2ecf20Sopenharmony_ci dlm_get_lock_cookie_node(be64_to_cpu(cnv->cookie)), 4988c2ecf20Sopenharmony_ci dlm_get_lock_cookie_seq(be64_to_cpu(cnv->cookie))); 4998c2ecf20Sopenharmony_ci dlm_print_one_lock_resource(res); 5008c2ecf20Sopenharmony_ci goto leave; 5018c2ecf20Sopenharmony_ci } 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci /* found the lock */ 5048c2ecf20Sopenharmony_ci lksb = lock->lksb; 5058c2ecf20Sopenharmony_ci 5068c2ecf20Sopenharmony_ci /* see if caller needed to get/put lvb */ 5078c2ecf20Sopenharmony_ci if (flags & LKM_PUT_LVB) { 5088c2ecf20Sopenharmony_ci BUG_ON(lksb->flags & (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB)); 5098c2ecf20Sopenharmony_ci lksb->flags |= DLM_LKSB_PUT_LVB; 5108c2ecf20Sopenharmony_ci memcpy(&lksb->lvb[0], &cnv->lvb[0], DLM_LVB_LEN); 5118c2ecf20Sopenharmony_ci } else if (flags & LKM_GET_LVB) { 5128c2ecf20Sopenharmony_ci BUG_ON(lksb->flags & (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB)); 5138c2ecf20Sopenharmony_ci lksb->flags |= DLM_LKSB_GET_LVB; 5148c2ecf20Sopenharmony_ci } 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 5178c2ecf20Sopenharmony_ci status = __dlm_lockres_state_to_status(res); 5188c2ecf20Sopenharmony_ci if (status == DLM_NORMAL) { 5198c2ecf20Sopenharmony_ci __dlm_lockres_reserve_ast(res); 5208c2ecf20Sopenharmony_ci ast_reserved = 1; 5218c2ecf20Sopenharmony_ci res->state |= DLM_LOCK_RES_IN_PROGRESS; 5228c2ecf20Sopenharmony_ci status = __dlmconvert_master(dlm, res, lock, flags, 5238c2ecf20Sopenharmony_ci cnv->requested_type, 5248c2ecf20Sopenharmony_ci &call_ast, &kick_thread); 5258c2ecf20Sopenharmony_ci res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 5268c2ecf20Sopenharmony_ci wake = 1; 5278c2ecf20Sopenharmony_ci } 5288c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 5298c2ecf20Sopenharmony_ci if (wake) 5308c2ecf20Sopenharmony_ci wake_up(&res->wq); 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci if (status != DLM_NORMAL) { 5338c2ecf20Sopenharmony_ci if (status != DLM_NOTQUEUED) 5348c2ecf20Sopenharmony_ci dlm_error(status); 5358c2ecf20Sopenharmony_ci lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB); 5368c2ecf20Sopenharmony_ci } 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_cileave: 5398c2ecf20Sopenharmony_ci if (lock) 5408c2ecf20Sopenharmony_ci dlm_lock_put(lock); 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci /* either queue the ast or release it, if reserved */ 5438c2ecf20Sopenharmony_ci if (call_ast) 5448c2ecf20Sopenharmony_ci dlm_queue_ast(dlm, lock); 5458c2ecf20Sopenharmony_ci else if (ast_reserved) 5468c2ecf20Sopenharmony_ci dlm_lockres_release_ast(dlm, res); 5478c2ecf20Sopenharmony_ci 5488c2ecf20Sopenharmony_ci if (kick_thread) 5498c2ecf20Sopenharmony_ci dlm_kick_thread(dlm, res); 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_ci if (res) 5528c2ecf20Sopenharmony_ci dlm_lockres_put(res); 5538c2ecf20Sopenharmony_ci 5548c2ecf20Sopenharmony_ci dlm_put(dlm); 5558c2ecf20Sopenharmony_ci 5568c2ecf20Sopenharmony_ci return status; 5578c2ecf20Sopenharmony_ci} 558