18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*- 38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0: 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * dlmthread.c 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * standalone DLM module 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Copyright (C) 2004 Oracle. All rights reserved. 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#include <linux/module.h> 148c2ecf20Sopenharmony_ci#include <linux/fs.h> 158c2ecf20Sopenharmony_ci#include <linux/types.h> 168c2ecf20Sopenharmony_ci#include <linux/highmem.h> 178c2ecf20Sopenharmony_ci#include <linux/init.h> 188c2ecf20Sopenharmony_ci#include <linux/sysctl.h> 198c2ecf20Sopenharmony_ci#include <linux/random.h> 208c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 218c2ecf20Sopenharmony_ci#include <linux/socket.h> 228c2ecf20Sopenharmony_ci#include <linux/inet.h> 238c2ecf20Sopenharmony_ci#include <linux/timer.h> 248c2ecf20Sopenharmony_ci#include <linux/kthread.h> 258c2ecf20Sopenharmony_ci#include <linux/delay.h> 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#include "../cluster/heartbeat.h" 298c2ecf20Sopenharmony_ci#include "../cluster/nodemanager.h" 308c2ecf20Sopenharmony_ci#include "../cluster/tcp.h" 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci#include "dlmapi.h" 338c2ecf20Sopenharmony_ci#include "dlmcommon.h" 348c2ecf20Sopenharmony_ci#include "dlmdomain.h" 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_THREAD) 378c2ecf20Sopenharmony_ci#include "../cluster/masklog.h" 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_cistatic int dlm_thread(void *data); 408c2ecf20Sopenharmony_cistatic void dlm_flush_asts(struct dlm_ctxt *dlm); 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci/* will exit holding res->spinlock, but may drop in function */ 438c2ecf20Sopenharmony_ci/* waits until flags are cleared on res->state */ 448c2ecf20Sopenharmony_civoid __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags) 458c2ecf20Sopenharmony_ci{ 468c2ecf20Sopenharmony_ci DECLARE_WAITQUEUE(wait, current); 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci assert_spin_locked(&res->spinlock); 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci add_wait_queue(&res->wq, &wait); 518c2ecf20Sopenharmony_cirepeat: 528c2ecf20Sopenharmony_ci set_current_state(TASK_UNINTERRUPTIBLE); 538c2ecf20Sopenharmony_ci if (res->state & flags) { 548c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 558c2ecf20Sopenharmony_ci schedule(); 568c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 578c2ecf20Sopenharmony_ci goto repeat; 588c2ecf20Sopenharmony_ci } 598c2ecf20Sopenharmony_ci remove_wait_queue(&res->wq, &wait); 608c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 618c2ecf20Sopenharmony_ci} 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ciint __dlm_lockres_has_locks(struct dlm_lock_resource *res) 648c2ecf20Sopenharmony_ci{ 658c2ecf20Sopenharmony_ci if (list_empty(&res->granted) && 668c2ecf20Sopenharmony_ci list_empty(&res->converting) && 678c2ecf20Sopenharmony_ci list_empty(&res->blocked)) 688c2ecf20Sopenharmony_ci return 0; 698c2ecf20Sopenharmony_ci return 1; 708c2ecf20Sopenharmony_ci} 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci/* "unused": the lockres has no locks, is not on the dirty list, 738c2ecf20Sopenharmony_ci * has no inflight locks (in the gap between mastery and acquiring 748c2ecf20Sopenharmony_ci * the first lock), and has no bits in its refmap. 758c2ecf20Sopenharmony_ci * truly ready to be freed. */ 768c2ecf20Sopenharmony_ciint __dlm_lockres_unused(struct dlm_lock_resource *res) 778c2ecf20Sopenharmony_ci{ 788c2ecf20Sopenharmony_ci int bit; 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci assert_spin_locked(&res->spinlock); 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci if (__dlm_lockres_has_locks(res)) 838c2ecf20Sopenharmony_ci return 0; 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci /* Locks are in the process of being created */ 868c2ecf20Sopenharmony_ci if (res->inflight_locks) 878c2ecf20Sopenharmony_ci return 0; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) 908c2ecf20Sopenharmony_ci return 0; 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci if (res->state & (DLM_LOCK_RES_RECOVERING| 938c2ecf20Sopenharmony_ci DLM_LOCK_RES_RECOVERY_WAITING)) 948c2ecf20Sopenharmony_ci return 0; 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci /* Another node has this resource with this node as the master */ 978c2ecf20Sopenharmony_ci bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); 988c2ecf20Sopenharmony_ci if (bit < O2NM_MAX_NODES) 998c2ecf20Sopenharmony_ci return 0; 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci return 1; 1028c2ecf20Sopenharmony_ci} 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci/* Call whenever you may have added or deleted something from one of 1068c2ecf20Sopenharmony_ci * the lockres queue's. This will figure out whether it belongs on the 1078c2ecf20Sopenharmony_ci * unused list or not and does the appropriate thing. */ 1088c2ecf20Sopenharmony_civoid __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, 1098c2ecf20Sopenharmony_ci struct dlm_lock_resource *res) 1108c2ecf20Sopenharmony_ci{ 1118c2ecf20Sopenharmony_ci assert_spin_locked(&dlm->spinlock); 1128c2ecf20Sopenharmony_ci assert_spin_locked(&res->spinlock); 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci if (__dlm_lockres_unused(res)){ 1158c2ecf20Sopenharmony_ci if (list_empty(&res->purge)) { 1168c2ecf20Sopenharmony_ci mlog(0, "%s: Adding res %.*s to purge list\n", 1178c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name); 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci res->last_used = jiffies; 1208c2ecf20Sopenharmony_ci dlm_lockres_get(res); 1218c2ecf20Sopenharmony_ci list_add_tail(&res->purge, &dlm->purge_list); 1228c2ecf20Sopenharmony_ci dlm->purge_count++; 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci } else if (!list_empty(&res->purge)) { 1258c2ecf20Sopenharmony_ci mlog(0, "%s: Removing res %.*s from purge list\n", 1268c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name); 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci list_del_init(&res->purge); 1298c2ecf20Sopenharmony_ci dlm_lockres_put(res); 1308c2ecf20Sopenharmony_ci dlm->purge_count--; 1318c2ecf20Sopenharmony_ci } 1328c2ecf20Sopenharmony_ci} 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_civoid dlm_lockres_calc_usage(struct dlm_ctxt *dlm, 1358c2ecf20Sopenharmony_ci struct dlm_lock_resource *res) 1368c2ecf20Sopenharmony_ci{ 1378c2ecf20Sopenharmony_ci spin_lock(&dlm->spinlock); 1388c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci __dlm_lockres_calc_usage(dlm, res); 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 1438c2ecf20Sopenharmony_ci spin_unlock(&dlm->spinlock); 1448c2ecf20Sopenharmony_ci} 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci/* 1478c2ecf20Sopenharmony_ci * Do the real purge work: 1488c2ecf20Sopenharmony_ci * unhash the lockres, and 1498c2ecf20Sopenharmony_ci * clear flag DLM_LOCK_RES_DROPPING_REF. 1508c2ecf20Sopenharmony_ci * It requires dlm and lockres spinlock to be taken. 1518c2ecf20Sopenharmony_ci */ 1528c2ecf20Sopenharmony_civoid __dlm_do_purge_lockres(struct dlm_ctxt *dlm, 1538c2ecf20Sopenharmony_ci struct dlm_lock_resource *res) 1548c2ecf20Sopenharmony_ci{ 1558c2ecf20Sopenharmony_ci assert_spin_locked(&dlm->spinlock); 1568c2ecf20Sopenharmony_ci assert_spin_locked(&res->spinlock); 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci if (!list_empty(&res->purge)) { 1598c2ecf20Sopenharmony_ci mlog(0, "%s: Removing res %.*s from purgelist\n", 1608c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name); 1618c2ecf20Sopenharmony_ci list_del_init(&res->purge); 1628c2ecf20Sopenharmony_ci dlm_lockres_put(res); 1638c2ecf20Sopenharmony_ci dlm->purge_count--; 1648c2ecf20Sopenharmony_ci } 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci if (!__dlm_lockres_unused(res)) { 1678c2ecf20Sopenharmony_ci mlog(ML_ERROR, "%s: res %.*s in use after deref\n", 1688c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name); 1698c2ecf20Sopenharmony_ci __dlm_print_one_lock_resource(res); 1708c2ecf20Sopenharmony_ci BUG(); 1718c2ecf20Sopenharmony_ci } 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci __dlm_unhash_lockres(dlm, res); 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci spin_lock(&dlm->track_lock); 1768c2ecf20Sopenharmony_ci if (!list_empty(&res->tracking)) 1778c2ecf20Sopenharmony_ci list_del_init(&res->tracking); 1788c2ecf20Sopenharmony_ci else { 1798c2ecf20Sopenharmony_ci mlog(ML_ERROR, "%s: Resource %.*s not on the Tracking list\n", 1808c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name); 1818c2ecf20Sopenharmony_ci __dlm_print_one_lock_resource(res); 1828c2ecf20Sopenharmony_ci } 1838c2ecf20Sopenharmony_ci spin_unlock(&dlm->track_lock); 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci /* 1868c2ecf20Sopenharmony_ci * lockres is not in the hash now. drop the flag and wake up 1878c2ecf20Sopenharmony_ci * any processes waiting in dlm_get_lock_resource. 1888c2ecf20Sopenharmony_ci */ 1898c2ecf20Sopenharmony_ci res->state &= ~DLM_LOCK_RES_DROPPING_REF; 1908c2ecf20Sopenharmony_ci} 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_cistatic void dlm_purge_lockres(struct dlm_ctxt *dlm, 1938c2ecf20Sopenharmony_ci struct dlm_lock_resource *res) 1948c2ecf20Sopenharmony_ci{ 1958c2ecf20Sopenharmony_ci int master; 1968c2ecf20Sopenharmony_ci int ret = 0; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci assert_spin_locked(&dlm->spinlock); 1998c2ecf20Sopenharmony_ci assert_spin_locked(&res->spinlock); 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci master = (res->owner == dlm->node_num); 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci mlog(0, "%s: Purging res %.*s, master %d\n", dlm->name, 2048c2ecf20Sopenharmony_ci res->lockname.len, res->lockname.name, master); 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci if (!master) { 2078c2ecf20Sopenharmony_ci if (res->state & DLM_LOCK_RES_DROPPING_REF) { 2088c2ecf20Sopenharmony_ci mlog(ML_NOTICE, "%s: res %.*s already in DLM_LOCK_RES_DROPPING_REF state\n", 2098c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name); 2108c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 2118c2ecf20Sopenharmony_ci return; 2128c2ecf20Sopenharmony_ci } 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci res->state |= DLM_LOCK_RES_DROPPING_REF; 2158c2ecf20Sopenharmony_ci /* drop spinlock... retake below */ 2168c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 2178c2ecf20Sopenharmony_ci spin_unlock(&dlm->spinlock); 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 2208c2ecf20Sopenharmony_ci /* This ensures that clear refmap is sent after the set */ 2218c2ecf20Sopenharmony_ci __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); 2228c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci /* clear our bit from the master's refmap, ignore errors */ 2258c2ecf20Sopenharmony_ci ret = dlm_drop_lockres_ref(dlm, res); 2268c2ecf20Sopenharmony_ci if (ret < 0) { 2278c2ecf20Sopenharmony_ci if (!dlm_is_host_down(ret)) 2288c2ecf20Sopenharmony_ci BUG(); 2298c2ecf20Sopenharmony_ci } 2308c2ecf20Sopenharmony_ci spin_lock(&dlm->spinlock); 2318c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 2328c2ecf20Sopenharmony_ci } 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci if (!list_empty(&res->purge)) { 2358c2ecf20Sopenharmony_ci mlog(0, "%s: Removing res %.*s from purgelist, master %d\n", 2368c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name, master); 2378c2ecf20Sopenharmony_ci list_del_init(&res->purge); 2388c2ecf20Sopenharmony_ci dlm_lockres_put(res); 2398c2ecf20Sopenharmony_ci dlm->purge_count--; 2408c2ecf20Sopenharmony_ci } 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci if (!master && ret == DLM_DEREF_RESPONSE_INPROG) { 2438c2ecf20Sopenharmony_ci mlog(0, "%s: deref %.*s in progress\n", 2448c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name); 2458c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 2468c2ecf20Sopenharmony_ci return; 2478c2ecf20Sopenharmony_ci } 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci if (!__dlm_lockres_unused(res)) { 2508c2ecf20Sopenharmony_ci mlog(ML_ERROR, "%s: res %.*s in use after deref\n", 2518c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name); 2528c2ecf20Sopenharmony_ci __dlm_print_one_lock_resource(res); 2538c2ecf20Sopenharmony_ci BUG(); 2548c2ecf20Sopenharmony_ci } 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci __dlm_unhash_lockres(dlm, res); 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci spin_lock(&dlm->track_lock); 2598c2ecf20Sopenharmony_ci if (!list_empty(&res->tracking)) 2608c2ecf20Sopenharmony_ci list_del_init(&res->tracking); 2618c2ecf20Sopenharmony_ci else { 2628c2ecf20Sopenharmony_ci mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n", 2638c2ecf20Sopenharmony_ci res->lockname.len, res->lockname.name); 2648c2ecf20Sopenharmony_ci __dlm_print_one_lock_resource(res); 2658c2ecf20Sopenharmony_ci } 2668c2ecf20Sopenharmony_ci spin_unlock(&dlm->track_lock); 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci /* lockres is not in the hash now. drop the flag and wake up 2698c2ecf20Sopenharmony_ci * any processes waiting in dlm_get_lock_resource. */ 2708c2ecf20Sopenharmony_ci if (!master) { 2718c2ecf20Sopenharmony_ci res->state &= ~DLM_LOCK_RES_DROPPING_REF; 2728c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 2738c2ecf20Sopenharmony_ci wake_up(&res->wq); 2748c2ecf20Sopenharmony_ci } else 2758c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 2768c2ecf20Sopenharmony_ci} 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_cistatic void dlm_run_purge_list(struct dlm_ctxt *dlm, 2798c2ecf20Sopenharmony_ci int purge_now) 2808c2ecf20Sopenharmony_ci{ 2818c2ecf20Sopenharmony_ci unsigned int run_max, unused; 2828c2ecf20Sopenharmony_ci unsigned long purge_jiffies; 2838c2ecf20Sopenharmony_ci struct dlm_lock_resource *lockres; 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci spin_lock(&dlm->spinlock); 2868c2ecf20Sopenharmony_ci run_max = dlm->purge_count; 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci while(run_max && !list_empty(&dlm->purge_list)) { 2898c2ecf20Sopenharmony_ci run_max--; 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci lockres = list_entry(dlm->purge_list.next, 2928c2ecf20Sopenharmony_ci struct dlm_lock_resource, purge); 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci spin_lock(&lockres->spinlock); 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci purge_jiffies = lockres->last_used + 2978c2ecf20Sopenharmony_ci msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci /* Make sure that we want to be processing this guy at 3008c2ecf20Sopenharmony_ci * this time. */ 3018c2ecf20Sopenharmony_ci if (!purge_now && time_after(purge_jiffies, jiffies)) { 3028c2ecf20Sopenharmony_ci /* Since resources are added to the purge list 3038c2ecf20Sopenharmony_ci * in tail order, we can stop at the first 3048c2ecf20Sopenharmony_ci * unpurgable resource -- anyone added after 3058c2ecf20Sopenharmony_ci * him will have a greater last_used value */ 3068c2ecf20Sopenharmony_ci spin_unlock(&lockres->spinlock); 3078c2ecf20Sopenharmony_ci break; 3088c2ecf20Sopenharmony_ci } 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci /* Status of the lockres *might* change so double 3118c2ecf20Sopenharmony_ci * check. If the lockres is unused, holding the dlm 3128c2ecf20Sopenharmony_ci * spinlock will prevent people from getting and more 3138c2ecf20Sopenharmony_ci * refs on it. */ 3148c2ecf20Sopenharmony_ci unused = __dlm_lockres_unused(lockres); 3158c2ecf20Sopenharmony_ci if (!unused || 3168c2ecf20Sopenharmony_ci (lockres->state & DLM_LOCK_RES_MIGRATING) || 3178c2ecf20Sopenharmony_ci (lockres->inflight_assert_workers != 0)) { 3188c2ecf20Sopenharmony_ci mlog(0, "%s: res %.*s is in use or being remastered, " 3198c2ecf20Sopenharmony_ci "used %d, state %d, assert master workers %u\n", 3208c2ecf20Sopenharmony_ci dlm->name, lockres->lockname.len, 3218c2ecf20Sopenharmony_ci lockres->lockname.name, 3228c2ecf20Sopenharmony_ci !unused, lockres->state, 3238c2ecf20Sopenharmony_ci lockres->inflight_assert_workers); 3248c2ecf20Sopenharmony_ci list_move_tail(&lockres->purge, &dlm->purge_list); 3258c2ecf20Sopenharmony_ci spin_unlock(&lockres->spinlock); 3268c2ecf20Sopenharmony_ci continue; 3278c2ecf20Sopenharmony_ci } 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci dlm_lockres_get(lockres); 3308c2ecf20Sopenharmony_ci 3318c2ecf20Sopenharmony_ci dlm_purge_lockres(dlm, lockres); 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci dlm_lockres_put(lockres); 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci /* Avoid adding any scheduling latencies */ 3368c2ecf20Sopenharmony_ci cond_resched_lock(&dlm->spinlock); 3378c2ecf20Sopenharmony_ci } 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci spin_unlock(&dlm->spinlock); 3408c2ecf20Sopenharmony_ci} 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_cistatic void dlm_shuffle_lists(struct dlm_ctxt *dlm, 3438c2ecf20Sopenharmony_ci struct dlm_lock_resource *res) 3448c2ecf20Sopenharmony_ci{ 3458c2ecf20Sopenharmony_ci struct dlm_lock *lock, *target; 3468c2ecf20Sopenharmony_ci int can_grant = 1; 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci /* 3498c2ecf20Sopenharmony_ci * Because this function is called with the lockres 3508c2ecf20Sopenharmony_ci * spinlock, and because we know that it is not migrating/ 3518c2ecf20Sopenharmony_ci * recovering/in-progress, it is fine to reserve asts and 3528c2ecf20Sopenharmony_ci * basts right before queueing them all throughout 3538c2ecf20Sopenharmony_ci */ 3548c2ecf20Sopenharmony_ci assert_spin_locked(&dlm->ast_lock); 3558c2ecf20Sopenharmony_ci assert_spin_locked(&res->spinlock); 3568c2ecf20Sopenharmony_ci BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING| 3578c2ecf20Sopenharmony_ci DLM_LOCK_RES_RECOVERING| 3588c2ecf20Sopenharmony_ci DLM_LOCK_RES_IN_PROGRESS))); 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ciconverting: 3618c2ecf20Sopenharmony_ci if (list_empty(&res->converting)) 3628c2ecf20Sopenharmony_ci goto blocked; 3638c2ecf20Sopenharmony_ci mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm->name, 3648c2ecf20Sopenharmony_ci res->lockname.len, res->lockname.name); 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci target = list_entry(res->converting.next, struct dlm_lock, list); 3678c2ecf20Sopenharmony_ci if (target->ml.convert_type == LKM_IVMODE) { 3688c2ecf20Sopenharmony_ci mlog(ML_ERROR, "%s: res %.*s converting lock to invalid mode\n", 3698c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name); 3708c2ecf20Sopenharmony_ci BUG(); 3718c2ecf20Sopenharmony_ci } 3728c2ecf20Sopenharmony_ci list_for_each_entry(lock, &res->granted, list) { 3738c2ecf20Sopenharmony_ci if (lock==target) 3748c2ecf20Sopenharmony_ci continue; 3758c2ecf20Sopenharmony_ci if (!dlm_lock_compatible(lock->ml.type, 3768c2ecf20Sopenharmony_ci target->ml.convert_type)) { 3778c2ecf20Sopenharmony_ci can_grant = 0; 3788c2ecf20Sopenharmony_ci /* queue the BAST if not already */ 3798c2ecf20Sopenharmony_ci if (lock->ml.highest_blocked == LKM_IVMODE) { 3808c2ecf20Sopenharmony_ci __dlm_lockres_reserve_ast(res); 3818c2ecf20Sopenharmony_ci __dlm_queue_bast(dlm, lock); 3828c2ecf20Sopenharmony_ci } 3838c2ecf20Sopenharmony_ci /* update the highest_blocked if needed */ 3848c2ecf20Sopenharmony_ci if (lock->ml.highest_blocked < target->ml.convert_type) 3858c2ecf20Sopenharmony_ci lock->ml.highest_blocked = 3868c2ecf20Sopenharmony_ci target->ml.convert_type; 3878c2ecf20Sopenharmony_ci } 3888c2ecf20Sopenharmony_ci } 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci list_for_each_entry(lock, &res->converting, list) { 3918c2ecf20Sopenharmony_ci if (lock==target) 3928c2ecf20Sopenharmony_ci continue; 3938c2ecf20Sopenharmony_ci if (!dlm_lock_compatible(lock->ml.type, 3948c2ecf20Sopenharmony_ci target->ml.convert_type)) { 3958c2ecf20Sopenharmony_ci can_grant = 0; 3968c2ecf20Sopenharmony_ci if (lock->ml.highest_blocked == LKM_IVMODE) { 3978c2ecf20Sopenharmony_ci __dlm_lockres_reserve_ast(res); 3988c2ecf20Sopenharmony_ci __dlm_queue_bast(dlm, lock); 3998c2ecf20Sopenharmony_ci } 4008c2ecf20Sopenharmony_ci if (lock->ml.highest_blocked < target->ml.convert_type) 4018c2ecf20Sopenharmony_ci lock->ml.highest_blocked = 4028c2ecf20Sopenharmony_ci target->ml.convert_type; 4038c2ecf20Sopenharmony_ci } 4048c2ecf20Sopenharmony_ci } 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci /* we can convert the lock */ 4078c2ecf20Sopenharmony_ci if (can_grant) { 4088c2ecf20Sopenharmony_ci spin_lock(&target->spinlock); 4098c2ecf20Sopenharmony_ci BUG_ON(target->ml.highest_blocked != LKM_IVMODE); 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type " 4128c2ecf20Sopenharmony_ci "%d => %d, node %u\n", dlm->name, res->lockname.len, 4138c2ecf20Sopenharmony_ci res->lockname.name, 4148c2ecf20Sopenharmony_ci dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)), 4158c2ecf20Sopenharmony_ci dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)), 4168c2ecf20Sopenharmony_ci target->ml.type, 4178c2ecf20Sopenharmony_ci target->ml.convert_type, target->ml.node); 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_ci target->ml.type = target->ml.convert_type; 4208c2ecf20Sopenharmony_ci target->ml.convert_type = LKM_IVMODE; 4218c2ecf20Sopenharmony_ci list_move_tail(&target->list, &res->granted); 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci BUG_ON(!target->lksb); 4248c2ecf20Sopenharmony_ci target->lksb->status = DLM_NORMAL; 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_ci spin_unlock(&target->spinlock); 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci __dlm_lockres_reserve_ast(res); 4298c2ecf20Sopenharmony_ci __dlm_queue_ast(dlm, target); 4308c2ecf20Sopenharmony_ci /* go back and check for more */ 4318c2ecf20Sopenharmony_ci goto converting; 4328c2ecf20Sopenharmony_ci } 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ciblocked: 4358c2ecf20Sopenharmony_ci if (list_empty(&res->blocked)) 4368c2ecf20Sopenharmony_ci goto leave; 4378c2ecf20Sopenharmony_ci target = list_entry(res->blocked.next, struct dlm_lock, list); 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci list_for_each_entry(lock, &res->granted, list) { 4408c2ecf20Sopenharmony_ci if (lock==target) 4418c2ecf20Sopenharmony_ci continue; 4428c2ecf20Sopenharmony_ci if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { 4438c2ecf20Sopenharmony_ci can_grant = 0; 4448c2ecf20Sopenharmony_ci if (lock->ml.highest_blocked == LKM_IVMODE) { 4458c2ecf20Sopenharmony_ci __dlm_lockres_reserve_ast(res); 4468c2ecf20Sopenharmony_ci __dlm_queue_bast(dlm, lock); 4478c2ecf20Sopenharmony_ci } 4488c2ecf20Sopenharmony_ci if (lock->ml.highest_blocked < target->ml.type) 4498c2ecf20Sopenharmony_ci lock->ml.highest_blocked = target->ml.type; 4508c2ecf20Sopenharmony_ci } 4518c2ecf20Sopenharmony_ci } 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci list_for_each_entry(lock, &res->converting, list) { 4548c2ecf20Sopenharmony_ci if (lock==target) 4558c2ecf20Sopenharmony_ci continue; 4568c2ecf20Sopenharmony_ci if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { 4578c2ecf20Sopenharmony_ci can_grant = 0; 4588c2ecf20Sopenharmony_ci if (lock->ml.highest_blocked == LKM_IVMODE) { 4598c2ecf20Sopenharmony_ci __dlm_lockres_reserve_ast(res); 4608c2ecf20Sopenharmony_ci __dlm_queue_bast(dlm, lock); 4618c2ecf20Sopenharmony_ci } 4628c2ecf20Sopenharmony_ci if (lock->ml.highest_blocked < target->ml.type) 4638c2ecf20Sopenharmony_ci lock->ml.highest_blocked = target->ml.type; 4648c2ecf20Sopenharmony_ci } 4658c2ecf20Sopenharmony_ci } 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_ci /* we can grant the blocked lock (only 4688c2ecf20Sopenharmony_ci * possible if converting list empty) */ 4698c2ecf20Sopenharmony_ci if (can_grant) { 4708c2ecf20Sopenharmony_ci spin_lock(&target->spinlock); 4718c2ecf20Sopenharmony_ci BUG_ON(target->ml.highest_blocked != LKM_IVMODE); 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_ci mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, " 4748c2ecf20Sopenharmony_ci "node %u\n", dlm->name, res->lockname.len, 4758c2ecf20Sopenharmony_ci res->lockname.name, 4768c2ecf20Sopenharmony_ci dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)), 4778c2ecf20Sopenharmony_ci dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)), 4788c2ecf20Sopenharmony_ci target->ml.type, target->ml.node); 4798c2ecf20Sopenharmony_ci 4808c2ecf20Sopenharmony_ci /* target->ml.type is already correct */ 4818c2ecf20Sopenharmony_ci list_move_tail(&target->list, &res->granted); 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci BUG_ON(!target->lksb); 4848c2ecf20Sopenharmony_ci target->lksb->status = DLM_NORMAL; 4858c2ecf20Sopenharmony_ci 4868c2ecf20Sopenharmony_ci spin_unlock(&target->spinlock); 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ci __dlm_lockres_reserve_ast(res); 4898c2ecf20Sopenharmony_ci __dlm_queue_ast(dlm, target); 4908c2ecf20Sopenharmony_ci /* go back and check for more */ 4918c2ecf20Sopenharmony_ci goto converting; 4928c2ecf20Sopenharmony_ci } 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_cileave: 4958c2ecf20Sopenharmony_ci return; 4968c2ecf20Sopenharmony_ci} 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci/* must have NO locks when calling this with res !=NULL * */ 4998c2ecf20Sopenharmony_civoid dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 5008c2ecf20Sopenharmony_ci{ 5018c2ecf20Sopenharmony_ci if (res) { 5028c2ecf20Sopenharmony_ci spin_lock(&dlm->spinlock); 5038c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 5048c2ecf20Sopenharmony_ci __dlm_dirty_lockres(dlm, res); 5058c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 5068c2ecf20Sopenharmony_ci spin_unlock(&dlm->spinlock); 5078c2ecf20Sopenharmony_ci } 5088c2ecf20Sopenharmony_ci wake_up(&dlm->dlm_thread_wq); 5098c2ecf20Sopenharmony_ci} 5108c2ecf20Sopenharmony_ci 5118c2ecf20Sopenharmony_civoid __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 5128c2ecf20Sopenharmony_ci{ 5138c2ecf20Sopenharmony_ci assert_spin_locked(&dlm->spinlock); 5148c2ecf20Sopenharmony_ci assert_spin_locked(&res->spinlock); 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci /* don't shuffle secondary queues */ 5178c2ecf20Sopenharmony_ci if (res->owner == dlm->node_num) { 5188c2ecf20Sopenharmony_ci if (res->state & (DLM_LOCK_RES_MIGRATING | 5198c2ecf20Sopenharmony_ci DLM_LOCK_RES_BLOCK_DIRTY)) 5208c2ecf20Sopenharmony_ci return; 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_ci if (list_empty(&res->dirty)) { 5238c2ecf20Sopenharmony_ci /* ref for dirty_list */ 5248c2ecf20Sopenharmony_ci dlm_lockres_get(res); 5258c2ecf20Sopenharmony_ci list_add_tail(&res->dirty, &dlm->dirty_list); 5268c2ecf20Sopenharmony_ci res->state |= DLM_LOCK_RES_DIRTY; 5278c2ecf20Sopenharmony_ci } 5288c2ecf20Sopenharmony_ci } 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len, 5318c2ecf20Sopenharmony_ci res->lockname.name); 5328c2ecf20Sopenharmony_ci} 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci/* Launch the NM thread for the mounted volume */ 5368c2ecf20Sopenharmony_ciint dlm_launch_thread(struct dlm_ctxt *dlm) 5378c2ecf20Sopenharmony_ci{ 5388c2ecf20Sopenharmony_ci mlog(0, "Starting dlm_thread...\n"); 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm-%s", 5418c2ecf20Sopenharmony_ci dlm->name); 5428c2ecf20Sopenharmony_ci if (IS_ERR(dlm->dlm_thread_task)) { 5438c2ecf20Sopenharmony_ci mlog_errno(PTR_ERR(dlm->dlm_thread_task)); 5448c2ecf20Sopenharmony_ci dlm->dlm_thread_task = NULL; 5458c2ecf20Sopenharmony_ci return -EINVAL; 5468c2ecf20Sopenharmony_ci } 5478c2ecf20Sopenharmony_ci 5488c2ecf20Sopenharmony_ci return 0; 5498c2ecf20Sopenharmony_ci} 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_civoid dlm_complete_thread(struct dlm_ctxt *dlm) 5528c2ecf20Sopenharmony_ci{ 5538c2ecf20Sopenharmony_ci if (dlm->dlm_thread_task) { 5548c2ecf20Sopenharmony_ci mlog(ML_KTHREAD, "Waiting for dlm thread to exit\n"); 5558c2ecf20Sopenharmony_ci kthread_stop(dlm->dlm_thread_task); 5568c2ecf20Sopenharmony_ci dlm->dlm_thread_task = NULL; 5578c2ecf20Sopenharmony_ci } 5588c2ecf20Sopenharmony_ci} 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_cistatic int dlm_dirty_list_empty(struct dlm_ctxt *dlm) 5618c2ecf20Sopenharmony_ci{ 5628c2ecf20Sopenharmony_ci int empty; 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_ci spin_lock(&dlm->spinlock); 5658c2ecf20Sopenharmony_ci empty = list_empty(&dlm->dirty_list); 5668c2ecf20Sopenharmony_ci spin_unlock(&dlm->spinlock); 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci return empty; 5698c2ecf20Sopenharmony_ci} 5708c2ecf20Sopenharmony_ci 5718c2ecf20Sopenharmony_cistatic void dlm_flush_asts(struct dlm_ctxt *dlm) 5728c2ecf20Sopenharmony_ci{ 5738c2ecf20Sopenharmony_ci int ret; 5748c2ecf20Sopenharmony_ci struct dlm_lock *lock; 5758c2ecf20Sopenharmony_ci struct dlm_lock_resource *res; 5768c2ecf20Sopenharmony_ci u8 hi; 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci spin_lock(&dlm->ast_lock); 5798c2ecf20Sopenharmony_ci while (!list_empty(&dlm->pending_asts)) { 5808c2ecf20Sopenharmony_ci lock = list_entry(dlm->pending_asts.next, 5818c2ecf20Sopenharmony_ci struct dlm_lock, ast_list); 5828c2ecf20Sopenharmony_ci /* get an extra ref on lock */ 5838c2ecf20Sopenharmony_ci dlm_lock_get(lock); 5848c2ecf20Sopenharmony_ci res = lock->lockres; 5858c2ecf20Sopenharmony_ci mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, " 5868c2ecf20Sopenharmony_ci "node %u\n", dlm->name, res->lockname.len, 5878c2ecf20Sopenharmony_ci res->lockname.name, 5888c2ecf20Sopenharmony_ci dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), 5898c2ecf20Sopenharmony_ci dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), 5908c2ecf20Sopenharmony_ci lock->ml.type, lock->ml.node); 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_ci BUG_ON(!lock->ast_pending); 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ci /* remove from list (including ref) */ 5958c2ecf20Sopenharmony_ci list_del_init(&lock->ast_list); 5968c2ecf20Sopenharmony_ci dlm_lock_put(lock); 5978c2ecf20Sopenharmony_ci spin_unlock(&dlm->ast_lock); 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci if (lock->ml.node != dlm->node_num) { 6008c2ecf20Sopenharmony_ci ret = dlm_do_remote_ast(dlm, res, lock); 6018c2ecf20Sopenharmony_ci if (ret < 0) 6028c2ecf20Sopenharmony_ci mlog_errno(ret); 6038c2ecf20Sopenharmony_ci } else 6048c2ecf20Sopenharmony_ci dlm_do_local_ast(dlm, res, lock); 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_ci spin_lock(&dlm->ast_lock); 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci /* possible that another ast was queued while 6098c2ecf20Sopenharmony_ci * we were delivering the last one */ 6108c2ecf20Sopenharmony_ci if (!list_empty(&lock->ast_list)) { 6118c2ecf20Sopenharmony_ci mlog(0, "%s: res %.*s, AST queued while flushing last " 6128c2ecf20Sopenharmony_ci "one\n", dlm->name, res->lockname.len, 6138c2ecf20Sopenharmony_ci res->lockname.name); 6148c2ecf20Sopenharmony_ci } else 6158c2ecf20Sopenharmony_ci lock->ast_pending = 0; 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci /* drop the extra ref. 6188c2ecf20Sopenharmony_ci * this may drop it completely. */ 6198c2ecf20Sopenharmony_ci dlm_lock_put(lock); 6208c2ecf20Sopenharmony_ci dlm_lockres_release_ast(dlm, res); 6218c2ecf20Sopenharmony_ci } 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci while (!list_empty(&dlm->pending_basts)) { 6248c2ecf20Sopenharmony_ci lock = list_entry(dlm->pending_basts.next, 6258c2ecf20Sopenharmony_ci struct dlm_lock, bast_list); 6268c2ecf20Sopenharmony_ci /* get an extra ref on lock */ 6278c2ecf20Sopenharmony_ci dlm_lock_get(lock); 6288c2ecf20Sopenharmony_ci res = lock->lockres; 6298c2ecf20Sopenharmony_ci 6308c2ecf20Sopenharmony_ci BUG_ON(!lock->bast_pending); 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_ci /* get the highest blocked lock, and reset */ 6338c2ecf20Sopenharmony_ci spin_lock(&lock->spinlock); 6348c2ecf20Sopenharmony_ci BUG_ON(lock->ml.highest_blocked <= LKM_IVMODE); 6358c2ecf20Sopenharmony_ci hi = lock->ml.highest_blocked; 6368c2ecf20Sopenharmony_ci lock->ml.highest_blocked = LKM_IVMODE; 6378c2ecf20Sopenharmony_ci spin_unlock(&lock->spinlock); 6388c2ecf20Sopenharmony_ci 6398c2ecf20Sopenharmony_ci /* remove from list (including ref) */ 6408c2ecf20Sopenharmony_ci list_del_init(&lock->bast_list); 6418c2ecf20Sopenharmony_ci dlm_lock_put(lock); 6428c2ecf20Sopenharmony_ci spin_unlock(&dlm->ast_lock); 6438c2ecf20Sopenharmony_ci 6448c2ecf20Sopenharmony_ci mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, " 6458c2ecf20Sopenharmony_ci "blocked %d, node %u\n", 6468c2ecf20Sopenharmony_ci dlm->name, res->lockname.len, res->lockname.name, 6478c2ecf20Sopenharmony_ci dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), 6488c2ecf20Sopenharmony_ci dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), 6498c2ecf20Sopenharmony_ci hi, lock->ml.node); 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci if (lock->ml.node != dlm->node_num) { 6528c2ecf20Sopenharmony_ci ret = dlm_send_proxy_bast(dlm, res, lock, hi); 6538c2ecf20Sopenharmony_ci if (ret < 0) 6548c2ecf20Sopenharmony_ci mlog_errno(ret); 6558c2ecf20Sopenharmony_ci } else 6568c2ecf20Sopenharmony_ci dlm_do_local_bast(dlm, res, lock, hi); 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci spin_lock(&dlm->ast_lock); 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_ci /* possible that another bast was queued while 6618c2ecf20Sopenharmony_ci * we were delivering the last one */ 6628c2ecf20Sopenharmony_ci if (!list_empty(&lock->bast_list)) { 6638c2ecf20Sopenharmony_ci mlog(0, "%s: res %.*s, BAST queued while flushing last " 6648c2ecf20Sopenharmony_ci "one\n", dlm->name, res->lockname.len, 6658c2ecf20Sopenharmony_ci res->lockname.name); 6668c2ecf20Sopenharmony_ci } else 6678c2ecf20Sopenharmony_ci lock->bast_pending = 0; 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci /* drop the extra ref. 6708c2ecf20Sopenharmony_ci * this may drop it completely. */ 6718c2ecf20Sopenharmony_ci dlm_lock_put(lock); 6728c2ecf20Sopenharmony_ci dlm_lockres_release_ast(dlm, res); 6738c2ecf20Sopenharmony_ci } 6748c2ecf20Sopenharmony_ci wake_up(&dlm->ast_wq); 6758c2ecf20Sopenharmony_ci spin_unlock(&dlm->ast_lock); 6768c2ecf20Sopenharmony_ci} 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_ci#define DLM_THREAD_TIMEOUT_MS (4 * 1000) 6808c2ecf20Sopenharmony_ci#define DLM_THREAD_MAX_DIRTY 100 6818c2ecf20Sopenharmony_ci 6828c2ecf20Sopenharmony_cistatic int dlm_thread(void *data) 6838c2ecf20Sopenharmony_ci{ 6848c2ecf20Sopenharmony_ci struct dlm_lock_resource *res; 6858c2ecf20Sopenharmony_ci struct dlm_ctxt *dlm = data; 6868c2ecf20Sopenharmony_ci unsigned long timeout = msecs_to_jiffies(DLM_THREAD_TIMEOUT_MS); 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci mlog(0, "dlm thread running for %s...\n", dlm->name); 6898c2ecf20Sopenharmony_ci 6908c2ecf20Sopenharmony_ci while (!kthread_should_stop()) { 6918c2ecf20Sopenharmony_ci int n = DLM_THREAD_MAX_DIRTY; 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ci /* dlm_shutting_down is very point-in-time, but that 6948c2ecf20Sopenharmony_ci * doesn't matter as we'll just loop back around if we 6958c2ecf20Sopenharmony_ci * get false on the leading edge of a state 6968c2ecf20Sopenharmony_ci * transition. */ 6978c2ecf20Sopenharmony_ci dlm_run_purge_list(dlm, dlm_shutting_down(dlm)); 6988c2ecf20Sopenharmony_ci 6998c2ecf20Sopenharmony_ci /* We really don't want to hold dlm->spinlock while 7008c2ecf20Sopenharmony_ci * calling dlm_shuffle_lists on each lockres that 7018c2ecf20Sopenharmony_ci * needs to have its queues adjusted and AST/BASTs 7028c2ecf20Sopenharmony_ci * run. So let's pull each entry off the dirty_list 7038c2ecf20Sopenharmony_ci * and drop dlm->spinlock ASAP. Once off the list, 7048c2ecf20Sopenharmony_ci * res->spinlock needs to be taken again to protect 7058c2ecf20Sopenharmony_ci * the queues while calling dlm_shuffle_lists. */ 7068c2ecf20Sopenharmony_ci spin_lock(&dlm->spinlock); 7078c2ecf20Sopenharmony_ci while (!list_empty(&dlm->dirty_list)) { 7088c2ecf20Sopenharmony_ci int delay = 0; 7098c2ecf20Sopenharmony_ci res = list_entry(dlm->dirty_list.next, 7108c2ecf20Sopenharmony_ci struct dlm_lock_resource, dirty); 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ci /* peel a lockres off, remove it from the list, 7138c2ecf20Sopenharmony_ci * unset the dirty flag and drop the dlm lock */ 7148c2ecf20Sopenharmony_ci BUG_ON(!res); 7158c2ecf20Sopenharmony_ci dlm_lockres_get(res); 7168c2ecf20Sopenharmony_ci 7178c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 7188c2ecf20Sopenharmony_ci /* We clear the DLM_LOCK_RES_DIRTY state once we shuffle lists below */ 7198c2ecf20Sopenharmony_ci list_del_init(&res->dirty); 7208c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 7218c2ecf20Sopenharmony_ci spin_unlock(&dlm->spinlock); 7228c2ecf20Sopenharmony_ci /* Drop dirty_list ref */ 7238c2ecf20Sopenharmony_ci dlm_lockres_put(res); 7248c2ecf20Sopenharmony_ci 7258c2ecf20Sopenharmony_ci /* lockres can be re-dirtied/re-added to the 7268c2ecf20Sopenharmony_ci * dirty_list in this gap, but that is ok */ 7278c2ecf20Sopenharmony_ci 7288c2ecf20Sopenharmony_ci spin_lock(&dlm->ast_lock); 7298c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 7308c2ecf20Sopenharmony_ci if (res->owner != dlm->node_num) { 7318c2ecf20Sopenharmony_ci __dlm_print_one_lock_resource(res); 7328c2ecf20Sopenharmony_ci mlog(ML_ERROR, "%s: inprog %d, mig %d, reco %d," 7338c2ecf20Sopenharmony_ci " dirty %d\n", dlm->name, 7348c2ecf20Sopenharmony_ci !!(res->state & DLM_LOCK_RES_IN_PROGRESS), 7358c2ecf20Sopenharmony_ci !!(res->state & DLM_LOCK_RES_MIGRATING), 7368c2ecf20Sopenharmony_ci !!(res->state & DLM_LOCK_RES_RECOVERING), 7378c2ecf20Sopenharmony_ci !!(res->state & DLM_LOCK_RES_DIRTY)); 7388c2ecf20Sopenharmony_ci } 7398c2ecf20Sopenharmony_ci BUG_ON(res->owner != dlm->node_num); 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_ci /* it is now ok to move lockreses in these states 7428c2ecf20Sopenharmony_ci * to the dirty list, assuming that they will only be 7438c2ecf20Sopenharmony_ci * dirty for a short while. */ 7448c2ecf20Sopenharmony_ci BUG_ON(res->state & DLM_LOCK_RES_MIGRATING); 7458c2ecf20Sopenharmony_ci if (res->state & (DLM_LOCK_RES_IN_PROGRESS | 7468c2ecf20Sopenharmony_ci DLM_LOCK_RES_RECOVERING | 7478c2ecf20Sopenharmony_ci DLM_LOCK_RES_RECOVERY_WAITING)) { 7488c2ecf20Sopenharmony_ci /* move it to the tail and keep going */ 7498c2ecf20Sopenharmony_ci res->state &= ~DLM_LOCK_RES_DIRTY; 7508c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 7518c2ecf20Sopenharmony_ci spin_unlock(&dlm->ast_lock); 7528c2ecf20Sopenharmony_ci mlog(0, "%s: res %.*s, inprogress, delay list " 7538c2ecf20Sopenharmony_ci "shuffle, state %d\n", dlm->name, 7548c2ecf20Sopenharmony_ci res->lockname.len, res->lockname.name, 7558c2ecf20Sopenharmony_ci res->state); 7568c2ecf20Sopenharmony_ci delay = 1; 7578c2ecf20Sopenharmony_ci goto in_progress; 7588c2ecf20Sopenharmony_ci } 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci /* at this point the lockres is not migrating/ 7618c2ecf20Sopenharmony_ci * recovering/in-progress. we have the lockres 7628c2ecf20Sopenharmony_ci * spinlock and do NOT have the dlm lock. 7638c2ecf20Sopenharmony_ci * safe to reserve/queue asts and run the lists. */ 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci /* called while holding lockres lock */ 7668c2ecf20Sopenharmony_ci dlm_shuffle_lists(dlm, res); 7678c2ecf20Sopenharmony_ci res->state &= ~DLM_LOCK_RES_DIRTY; 7688c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 7698c2ecf20Sopenharmony_ci spin_unlock(&dlm->ast_lock); 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci dlm_lockres_calc_usage(dlm, res); 7728c2ecf20Sopenharmony_ci 7738c2ecf20Sopenharmony_ciin_progress: 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_ci spin_lock(&dlm->spinlock); 7768c2ecf20Sopenharmony_ci /* if the lock was in-progress, stick 7778c2ecf20Sopenharmony_ci * it on the back of the list */ 7788c2ecf20Sopenharmony_ci if (delay) { 7798c2ecf20Sopenharmony_ci spin_lock(&res->spinlock); 7808c2ecf20Sopenharmony_ci __dlm_dirty_lockres(dlm, res); 7818c2ecf20Sopenharmony_ci spin_unlock(&res->spinlock); 7828c2ecf20Sopenharmony_ci } 7838c2ecf20Sopenharmony_ci dlm_lockres_put(res); 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci /* unlikely, but we may need to give time to 7868c2ecf20Sopenharmony_ci * other tasks */ 7878c2ecf20Sopenharmony_ci if (!--n) { 7888c2ecf20Sopenharmony_ci mlog(0, "%s: Throttling dlm thread\n", 7898c2ecf20Sopenharmony_ci dlm->name); 7908c2ecf20Sopenharmony_ci break; 7918c2ecf20Sopenharmony_ci } 7928c2ecf20Sopenharmony_ci } 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci spin_unlock(&dlm->spinlock); 7958c2ecf20Sopenharmony_ci dlm_flush_asts(dlm); 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_ci /* yield and continue right away if there is more work to do */ 7988c2ecf20Sopenharmony_ci if (!n) { 7998c2ecf20Sopenharmony_ci cond_resched(); 8008c2ecf20Sopenharmony_ci continue; 8018c2ecf20Sopenharmony_ci } 8028c2ecf20Sopenharmony_ci 8038c2ecf20Sopenharmony_ci wait_event_interruptible_timeout(dlm->dlm_thread_wq, 8048c2ecf20Sopenharmony_ci !dlm_dirty_list_empty(dlm) || 8058c2ecf20Sopenharmony_ci kthread_should_stop(), 8068c2ecf20Sopenharmony_ci timeout); 8078c2ecf20Sopenharmony_ci } 8088c2ecf20Sopenharmony_ci 8098c2ecf20Sopenharmony_ci mlog(0, "quitting DLM thread\n"); 8108c2ecf20Sopenharmony_ci return 0; 8118c2ecf20Sopenharmony_ci} 812