162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * dlmthread.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * standalone DLM module
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Copyright (C) 2004 Oracle.  All rights reserved.
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/module.h>
1262306a36Sopenharmony_ci#include <linux/fs.h>
1362306a36Sopenharmony_ci#include <linux/types.h>
1462306a36Sopenharmony_ci#include <linux/highmem.h>
1562306a36Sopenharmony_ci#include <linux/init.h>
1662306a36Sopenharmony_ci#include <linux/sysctl.h>
1762306a36Sopenharmony_ci#include <linux/random.h>
1862306a36Sopenharmony_ci#include <linux/blkdev.h>
1962306a36Sopenharmony_ci#include <linux/socket.h>
2062306a36Sopenharmony_ci#include <linux/inet.h>
2162306a36Sopenharmony_ci#include <linux/timer.h>
2262306a36Sopenharmony_ci#include <linux/kthread.h>
2362306a36Sopenharmony_ci#include <linux/delay.h>
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#include "../cluster/heartbeat.h"
2762306a36Sopenharmony_ci#include "../cluster/nodemanager.h"
2862306a36Sopenharmony_ci#include "../cluster/tcp.h"
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#include "dlmapi.h"
3162306a36Sopenharmony_ci#include "dlmcommon.h"
3262306a36Sopenharmony_ci#include "dlmdomain.h"
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_THREAD)
3562306a36Sopenharmony_ci#include "../cluster/masklog.h"
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic int dlm_thread(void *data);
3862306a36Sopenharmony_cistatic void dlm_flush_asts(struct dlm_ctxt *dlm);
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci/* will exit holding res->spinlock, but may drop in function */
4162306a36Sopenharmony_ci/* waits until flags are cleared on res->state */
4262306a36Sopenharmony_civoid __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags)
4362306a36Sopenharmony_ci{
4462306a36Sopenharmony_ci	DECLARE_WAITQUEUE(wait, current);
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci	assert_spin_locked(&res->spinlock);
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	add_wait_queue(&res->wq, &wait);
4962306a36Sopenharmony_cirepeat:
5062306a36Sopenharmony_ci	set_current_state(TASK_UNINTERRUPTIBLE);
5162306a36Sopenharmony_ci	if (res->state & flags) {
5262306a36Sopenharmony_ci		spin_unlock(&res->spinlock);
5362306a36Sopenharmony_ci		schedule();
5462306a36Sopenharmony_ci		spin_lock(&res->spinlock);
5562306a36Sopenharmony_ci		goto repeat;
5662306a36Sopenharmony_ci	}
5762306a36Sopenharmony_ci	remove_wait_queue(&res->wq, &wait);
5862306a36Sopenharmony_ci	__set_current_state(TASK_RUNNING);
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ciint __dlm_lockres_has_locks(struct dlm_lock_resource *res)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	if (list_empty(&res->granted) &&
6462306a36Sopenharmony_ci	    list_empty(&res->converting) &&
6562306a36Sopenharmony_ci	    list_empty(&res->blocked))
6662306a36Sopenharmony_ci		return 0;
6762306a36Sopenharmony_ci	return 1;
6862306a36Sopenharmony_ci}
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci/* "unused": the lockres has no locks, is not on the dirty list,
7162306a36Sopenharmony_ci * has no inflight locks (in the gap between mastery and acquiring
7262306a36Sopenharmony_ci * the first lock), and has no bits in its refmap.
7362306a36Sopenharmony_ci * truly ready to be freed. */
7462306a36Sopenharmony_ciint __dlm_lockres_unused(struct dlm_lock_resource *res)
7562306a36Sopenharmony_ci{
7662306a36Sopenharmony_ci	int bit;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	assert_spin_locked(&res->spinlock);
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	if (__dlm_lockres_has_locks(res))
8162306a36Sopenharmony_ci		return 0;
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	/* Locks are in the process of being created */
8462306a36Sopenharmony_ci	if (res->inflight_locks)
8562306a36Sopenharmony_ci		return 0;
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY)
8862306a36Sopenharmony_ci		return 0;
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	if (res->state & (DLM_LOCK_RES_RECOVERING|
9162306a36Sopenharmony_ci			DLM_LOCK_RES_RECOVERY_WAITING))
9262306a36Sopenharmony_ci		return 0;
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	/* Another node has this resource with this node as the master */
9562306a36Sopenharmony_ci	bit = find_first_bit(res->refmap, O2NM_MAX_NODES);
9662306a36Sopenharmony_ci	if (bit < O2NM_MAX_NODES)
9762306a36Sopenharmony_ci		return 0;
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	return 1;
10062306a36Sopenharmony_ci}
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci/* Call whenever you may have added or deleted something from one of
10462306a36Sopenharmony_ci * the lockres queue's. This will figure out whether it belongs on the
10562306a36Sopenharmony_ci * unused list or not and does the appropriate thing. */
10662306a36Sopenharmony_civoid __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
10762306a36Sopenharmony_ci			      struct dlm_lock_resource *res)
10862306a36Sopenharmony_ci{
10962306a36Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
11062306a36Sopenharmony_ci	assert_spin_locked(&res->spinlock);
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	if (__dlm_lockres_unused(res)){
11362306a36Sopenharmony_ci		if (list_empty(&res->purge)) {
11462306a36Sopenharmony_ci			mlog(0, "%s: Adding res %.*s to purge list\n",
11562306a36Sopenharmony_ci			     dlm->name, res->lockname.len, res->lockname.name);
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci			res->last_used = jiffies;
11862306a36Sopenharmony_ci			dlm_lockres_get(res);
11962306a36Sopenharmony_ci			list_add_tail(&res->purge, &dlm->purge_list);
12062306a36Sopenharmony_ci			dlm->purge_count++;
12162306a36Sopenharmony_ci		}
12262306a36Sopenharmony_ci	} else if (!list_empty(&res->purge)) {
12362306a36Sopenharmony_ci		mlog(0, "%s: Removing res %.*s from purge list\n",
12462306a36Sopenharmony_ci		     dlm->name, res->lockname.len, res->lockname.name);
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci		list_del_init(&res->purge);
12762306a36Sopenharmony_ci		dlm_lockres_put(res);
12862306a36Sopenharmony_ci		dlm->purge_count--;
12962306a36Sopenharmony_ci	}
13062306a36Sopenharmony_ci}
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_civoid dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
13362306a36Sopenharmony_ci			    struct dlm_lock_resource *res)
13462306a36Sopenharmony_ci{
13562306a36Sopenharmony_ci	spin_lock(&dlm->spinlock);
13662306a36Sopenharmony_ci	spin_lock(&res->spinlock);
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	__dlm_lockres_calc_usage(dlm, res);
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	spin_unlock(&res->spinlock);
14162306a36Sopenharmony_ci	spin_unlock(&dlm->spinlock);
14262306a36Sopenharmony_ci}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci/*
14562306a36Sopenharmony_ci * Do the real purge work:
14662306a36Sopenharmony_ci *     unhash the lockres, and
14762306a36Sopenharmony_ci *     clear flag DLM_LOCK_RES_DROPPING_REF.
14862306a36Sopenharmony_ci * It requires dlm and lockres spinlock to be taken.
14962306a36Sopenharmony_ci */
15062306a36Sopenharmony_civoid __dlm_do_purge_lockres(struct dlm_ctxt *dlm,
15162306a36Sopenharmony_ci		struct dlm_lock_resource *res)
15262306a36Sopenharmony_ci{
15362306a36Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
15462306a36Sopenharmony_ci	assert_spin_locked(&res->spinlock);
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	if (!list_empty(&res->purge)) {
15762306a36Sopenharmony_ci		mlog(0, "%s: Removing res %.*s from purgelist\n",
15862306a36Sopenharmony_ci		     dlm->name, res->lockname.len, res->lockname.name);
15962306a36Sopenharmony_ci		list_del_init(&res->purge);
16062306a36Sopenharmony_ci		dlm_lockres_put(res);
16162306a36Sopenharmony_ci		dlm->purge_count--;
16262306a36Sopenharmony_ci	}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	if (!__dlm_lockres_unused(res)) {
16562306a36Sopenharmony_ci		mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
16662306a36Sopenharmony_ci		     dlm->name, res->lockname.len, res->lockname.name);
16762306a36Sopenharmony_ci		__dlm_print_one_lock_resource(res);
16862306a36Sopenharmony_ci		BUG();
16962306a36Sopenharmony_ci	}
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	__dlm_unhash_lockres(dlm, res);
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	spin_lock(&dlm->track_lock);
17462306a36Sopenharmony_ci	if (!list_empty(&res->tracking))
17562306a36Sopenharmony_ci		list_del_init(&res->tracking);
17662306a36Sopenharmony_ci	else {
17762306a36Sopenharmony_ci		mlog(ML_ERROR, "%s: Resource %.*s not on the Tracking list\n",
17862306a36Sopenharmony_ci		     dlm->name, res->lockname.len, res->lockname.name);
17962306a36Sopenharmony_ci		__dlm_print_one_lock_resource(res);
18062306a36Sopenharmony_ci	}
18162306a36Sopenharmony_ci	spin_unlock(&dlm->track_lock);
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	/*
18462306a36Sopenharmony_ci	 * lockres is not in the hash now. drop the flag and wake up
18562306a36Sopenharmony_ci	 * any processes waiting in dlm_get_lock_resource.
18662306a36Sopenharmony_ci	 */
18762306a36Sopenharmony_ci	res->state &= ~DLM_LOCK_RES_DROPPING_REF;
18862306a36Sopenharmony_ci}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_cistatic void dlm_purge_lockres(struct dlm_ctxt *dlm,
19162306a36Sopenharmony_ci			     struct dlm_lock_resource *res)
19262306a36Sopenharmony_ci{
19362306a36Sopenharmony_ci	int master;
19462306a36Sopenharmony_ci	int ret = 0;
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
19762306a36Sopenharmony_ci	assert_spin_locked(&res->spinlock);
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	master = (res->owner == dlm->node_num);
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	mlog(0, "%s: Purging res %.*s, master %d\n", dlm->name,
20262306a36Sopenharmony_ci	     res->lockname.len, res->lockname.name, master);
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	if (!master) {
20562306a36Sopenharmony_ci		if (res->state & DLM_LOCK_RES_DROPPING_REF) {
20662306a36Sopenharmony_ci			mlog(ML_NOTICE, "%s: res %.*s already in DLM_LOCK_RES_DROPPING_REF state\n",
20762306a36Sopenharmony_ci				dlm->name, res->lockname.len, res->lockname.name);
20862306a36Sopenharmony_ci			spin_unlock(&res->spinlock);
20962306a36Sopenharmony_ci			return;
21062306a36Sopenharmony_ci		}
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci		res->state |= DLM_LOCK_RES_DROPPING_REF;
21362306a36Sopenharmony_ci		/* drop spinlock...  retake below */
21462306a36Sopenharmony_ci		spin_unlock(&res->spinlock);
21562306a36Sopenharmony_ci		spin_unlock(&dlm->spinlock);
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci		spin_lock(&res->spinlock);
21862306a36Sopenharmony_ci		/* This ensures that clear refmap is sent after the set */
21962306a36Sopenharmony_ci		__dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
22062306a36Sopenharmony_ci		spin_unlock(&res->spinlock);
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci		/* clear our bit from the master's refmap, ignore errors */
22362306a36Sopenharmony_ci		ret = dlm_drop_lockres_ref(dlm, res);
22462306a36Sopenharmony_ci		if (ret < 0) {
22562306a36Sopenharmony_ci			if (!dlm_is_host_down(ret))
22662306a36Sopenharmony_ci				BUG();
22762306a36Sopenharmony_ci		}
22862306a36Sopenharmony_ci		spin_lock(&dlm->spinlock);
22962306a36Sopenharmony_ci		spin_lock(&res->spinlock);
23062306a36Sopenharmony_ci	}
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	if (!list_empty(&res->purge)) {
23362306a36Sopenharmony_ci		mlog(0, "%s: Removing res %.*s from purgelist, master %d\n",
23462306a36Sopenharmony_ci		     dlm->name, res->lockname.len, res->lockname.name, master);
23562306a36Sopenharmony_ci		list_del_init(&res->purge);
23662306a36Sopenharmony_ci		dlm_lockres_put(res);
23762306a36Sopenharmony_ci		dlm->purge_count--;
23862306a36Sopenharmony_ci	}
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	if (!master && ret == DLM_DEREF_RESPONSE_INPROG) {
24162306a36Sopenharmony_ci		mlog(0, "%s: deref %.*s in progress\n",
24262306a36Sopenharmony_ci			dlm->name, res->lockname.len, res->lockname.name);
24362306a36Sopenharmony_ci		spin_unlock(&res->spinlock);
24462306a36Sopenharmony_ci		return;
24562306a36Sopenharmony_ci	}
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	if (!__dlm_lockres_unused(res)) {
24862306a36Sopenharmony_ci		mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
24962306a36Sopenharmony_ci		     dlm->name, res->lockname.len, res->lockname.name);
25062306a36Sopenharmony_ci		__dlm_print_one_lock_resource(res);
25162306a36Sopenharmony_ci		BUG();
25262306a36Sopenharmony_ci	}
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	__dlm_unhash_lockres(dlm, res);
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	spin_lock(&dlm->track_lock);
25762306a36Sopenharmony_ci	if (!list_empty(&res->tracking))
25862306a36Sopenharmony_ci		list_del_init(&res->tracking);
25962306a36Sopenharmony_ci	else {
26062306a36Sopenharmony_ci		mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n",
26162306a36Sopenharmony_ci				res->lockname.len, res->lockname.name);
26262306a36Sopenharmony_ci		__dlm_print_one_lock_resource(res);
26362306a36Sopenharmony_ci	}
26462306a36Sopenharmony_ci	spin_unlock(&dlm->track_lock);
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	/* lockres is not in the hash now.  drop the flag and wake up
26762306a36Sopenharmony_ci	 * any processes waiting in dlm_get_lock_resource. */
26862306a36Sopenharmony_ci	if (!master) {
26962306a36Sopenharmony_ci		res->state &= ~DLM_LOCK_RES_DROPPING_REF;
27062306a36Sopenharmony_ci		spin_unlock(&res->spinlock);
27162306a36Sopenharmony_ci		wake_up(&res->wq);
27262306a36Sopenharmony_ci	} else
27362306a36Sopenharmony_ci		spin_unlock(&res->spinlock);
27462306a36Sopenharmony_ci}
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_cistatic void dlm_run_purge_list(struct dlm_ctxt *dlm,
27762306a36Sopenharmony_ci			       int purge_now)
27862306a36Sopenharmony_ci{
27962306a36Sopenharmony_ci	unsigned int run_max, unused;
28062306a36Sopenharmony_ci	unsigned long purge_jiffies;
28162306a36Sopenharmony_ci	struct dlm_lock_resource *lockres;
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	spin_lock(&dlm->spinlock);
28462306a36Sopenharmony_ci	run_max = dlm->purge_count;
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	while(run_max && !list_empty(&dlm->purge_list)) {
28762306a36Sopenharmony_ci		run_max--;
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci		lockres = list_entry(dlm->purge_list.next,
29062306a36Sopenharmony_ci				     struct dlm_lock_resource, purge);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci		spin_lock(&lockres->spinlock);
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci		purge_jiffies = lockres->last_used +
29562306a36Sopenharmony_ci			msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci		/* Make sure that we want to be processing this guy at
29862306a36Sopenharmony_ci		 * this time. */
29962306a36Sopenharmony_ci		if (!purge_now && time_after(purge_jiffies, jiffies)) {
30062306a36Sopenharmony_ci			/* Since resources are added to the purge list
30162306a36Sopenharmony_ci			 * in tail order, we can stop at the first
30262306a36Sopenharmony_ci			 * unpurgable resource -- anyone added after
30362306a36Sopenharmony_ci			 * him will have a greater last_used value */
30462306a36Sopenharmony_ci			spin_unlock(&lockres->spinlock);
30562306a36Sopenharmony_ci			break;
30662306a36Sopenharmony_ci		}
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci		/* Status of the lockres *might* change so double
30962306a36Sopenharmony_ci		 * check. If the lockres is unused, holding the dlm
31062306a36Sopenharmony_ci		 * spinlock will prevent people from getting and more
31162306a36Sopenharmony_ci		 * refs on it. */
31262306a36Sopenharmony_ci		unused = __dlm_lockres_unused(lockres);
31362306a36Sopenharmony_ci		if (!unused ||
31462306a36Sopenharmony_ci		    (lockres->state & DLM_LOCK_RES_MIGRATING) ||
31562306a36Sopenharmony_ci		    (lockres->inflight_assert_workers != 0)) {
31662306a36Sopenharmony_ci			mlog(0, "%s: res %.*s is in use or being remastered, "
31762306a36Sopenharmony_ci			     "used %d, state %d, assert master workers %u\n",
31862306a36Sopenharmony_ci			     dlm->name, lockres->lockname.len,
31962306a36Sopenharmony_ci			     lockres->lockname.name,
32062306a36Sopenharmony_ci			     !unused, lockres->state,
32162306a36Sopenharmony_ci			     lockres->inflight_assert_workers);
32262306a36Sopenharmony_ci			list_move_tail(&lockres->purge, &dlm->purge_list);
32362306a36Sopenharmony_ci			spin_unlock(&lockres->spinlock);
32462306a36Sopenharmony_ci			continue;
32562306a36Sopenharmony_ci		}
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci		dlm_lockres_get(lockres);
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci		dlm_purge_lockres(dlm, lockres);
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci		dlm_lockres_put(lockres);
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci		/* Avoid adding any scheduling latencies */
33462306a36Sopenharmony_ci		cond_resched_lock(&dlm->spinlock);
33562306a36Sopenharmony_ci	}
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	spin_unlock(&dlm->spinlock);
33862306a36Sopenharmony_ci}
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_cistatic void dlm_shuffle_lists(struct dlm_ctxt *dlm,
34162306a36Sopenharmony_ci			      struct dlm_lock_resource *res)
34262306a36Sopenharmony_ci{
34362306a36Sopenharmony_ci	struct dlm_lock *lock, *target;
34462306a36Sopenharmony_ci	int can_grant = 1;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	/*
34762306a36Sopenharmony_ci	 * Because this function is called with the lockres
34862306a36Sopenharmony_ci	 * spinlock, and because we know that it is not migrating/
34962306a36Sopenharmony_ci	 * recovering/in-progress, it is fine to reserve asts and
35062306a36Sopenharmony_ci	 * basts right before queueing them all throughout
35162306a36Sopenharmony_ci	 */
35262306a36Sopenharmony_ci	assert_spin_locked(&dlm->ast_lock);
35362306a36Sopenharmony_ci	assert_spin_locked(&res->spinlock);
35462306a36Sopenharmony_ci	BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
35562306a36Sopenharmony_ci			      DLM_LOCK_RES_RECOVERING|
35662306a36Sopenharmony_ci			      DLM_LOCK_RES_IN_PROGRESS)));
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ciconverting:
35962306a36Sopenharmony_ci	if (list_empty(&res->converting))
36062306a36Sopenharmony_ci		goto blocked;
36162306a36Sopenharmony_ci	mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm->name,
36262306a36Sopenharmony_ci	     res->lockname.len, res->lockname.name);
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	target = list_entry(res->converting.next, struct dlm_lock, list);
36562306a36Sopenharmony_ci	if (target->ml.convert_type == LKM_IVMODE) {
36662306a36Sopenharmony_ci		mlog(ML_ERROR, "%s: res %.*s converting lock to invalid mode\n",
36762306a36Sopenharmony_ci		     dlm->name, res->lockname.len, res->lockname.name);
36862306a36Sopenharmony_ci		BUG();
36962306a36Sopenharmony_ci	}
37062306a36Sopenharmony_ci	list_for_each_entry(lock, &res->granted, list) {
37162306a36Sopenharmony_ci		if (lock==target)
37262306a36Sopenharmony_ci			continue;
37362306a36Sopenharmony_ci		if (!dlm_lock_compatible(lock->ml.type,
37462306a36Sopenharmony_ci					 target->ml.convert_type)) {
37562306a36Sopenharmony_ci			can_grant = 0;
37662306a36Sopenharmony_ci			/* queue the BAST if not already */
37762306a36Sopenharmony_ci			if (lock->ml.highest_blocked == LKM_IVMODE) {
37862306a36Sopenharmony_ci				__dlm_lockres_reserve_ast(res);
37962306a36Sopenharmony_ci				__dlm_queue_bast(dlm, lock);
38062306a36Sopenharmony_ci			}
38162306a36Sopenharmony_ci			/* update the highest_blocked if needed */
38262306a36Sopenharmony_ci			if (lock->ml.highest_blocked < target->ml.convert_type)
38362306a36Sopenharmony_ci				lock->ml.highest_blocked =
38462306a36Sopenharmony_ci					target->ml.convert_type;
38562306a36Sopenharmony_ci		}
38662306a36Sopenharmony_ci	}
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	list_for_each_entry(lock, &res->converting, list) {
38962306a36Sopenharmony_ci		if (lock==target)
39062306a36Sopenharmony_ci			continue;
39162306a36Sopenharmony_ci		if (!dlm_lock_compatible(lock->ml.type,
39262306a36Sopenharmony_ci					 target->ml.convert_type)) {
39362306a36Sopenharmony_ci			can_grant = 0;
39462306a36Sopenharmony_ci			if (lock->ml.highest_blocked == LKM_IVMODE) {
39562306a36Sopenharmony_ci				__dlm_lockres_reserve_ast(res);
39662306a36Sopenharmony_ci				__dlm_queue_bast(dlm, lock);
39762306a36Sopenharmony_ci			}
39862306a36Sopenharmony_ci			if (lock->ml.highest_blocked < target->ml.convert_type)
39962306a36Sopenharmony_ci				lock->ml.highest_blocked =
40062306a36Sopenharmony_ci					target->ml.convert_type;
40162306a36Sopenharmony_ci		}
40262306a36Sopenharmony_ci	}
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	/* we can convert the lock */
40562306a36Sopenharmony_ci	if (can_grant) {
40662306a36Sopenharmony_ci		spin_lock(&target->spinlock);
40762306a36Sopenharmony_ci		BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci		mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type "
41062306a36Sopenharmony_ci		     "%d => %d, node %u\n", dlm->name, res->lockname.len,
41162306a36Sopenharmony_ci		     res->lockname.name,
41262306a36Sopenharmony_ci		     dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
41362306a36Sopenharmony_ci		     dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
41462306a36Sopenharmony_ci		     target->ml.type,
41562306a36Sopenharmony_ci		     target->ml.convert_type, target->ml.node);
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci		target->ml.type = target->ml.convert_type;
41862306a36Sopenharmony_ci		target->ml.convert_type = LKM_IVMODE;
41962306a36Sopenharmony_ci		list_move_tail(&target->list, &res->granted);
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci		BUG_ON(!target->lksb);
42262306a36Sopenharmony_ci		target->lksb->status = DLM_NORMAL;
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci		spin_unlock(&target->spinlock);
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci		__dlm_lockres_reserve_ast(res);
42762306a36Sopenharmony_ci		__dlm_queue_ast(dlm, target);
42862306a36Sopenharmony_ci		/* go back and check for more */
42962306a36Sopenharmony_ci		goto converting;
43062306a36Sopenharmony_ci	}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ciblocked:
43362306a36Sopenharmony_ci	if (list_empty(&res->blocked))
43462306a36Sopenharmony_ci		goto leave;
43562306a36Sopenharmony_ci	target = list_entry(res->blocked.next, struct dlm_lock, list);
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci	list_for_each_entry(lock, &res->granted, list) {
43862306a36Sopenharmony_ci		if (lock==target)
43962306a36Sopenharmony_ci			continue;
44062306a36Sopenharmony_ci		if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) {
44162306a36Sopenharmony_ci			can_grant = 0;
44262306a36Sopenharmony_ci			if (lock->ml.highest_blocked == LKM_IVMODE) {
44362306a36Sopenharmony_ci				__dlm_lockres_reserve_ast(res);
44462306a36Sopenharmony_ci				__dlm_queue_bast(dlm, lock);
44562306a36Sopenharmony_ci			}
44662306a36Sopenharmony_ci			if (lock->ml.highest_blocked < target->ml.type)
44762306a36Sopenharmony_ci				lock->ml.highest_blocked = target->ml.type;
44862306a36Sopenharmony_ci		}
44962306a36Sopenharmony_ci	}
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci	list_for_each_entry(lock, &res->converting, list) {
45262306a36Sopenharmony_ci		if (lock==target)
45362306a36Sopenharmony_ci			continue;
45462306a36Sopenharmony_ci		if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) {
45562306a36Sopenharmony_ci			can_grant = 0;
45662306a36Sopenharmony_ci			if (lock->ml.highest_blocked == LKM_IVMODE) {
45762306a36Sopenharmony_ci				__dlm_lockres_reserve_ast(res);
45862306a36Sopenharmony_ci				__dlm_queue_bast(dlm, lock);
45962306a36Sopenharmony_ci			}
46062306a36Sopenharmony_ci			if (lock->ml.highest_blocked < target->ml.type)
46162306a36Sopenharmony_ci				lock->ml.highest_blocked = target->ml.type;
46262306a36Sopenharmony_ci		}
46362306a36Sopenharmony_ci	}
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	/* we can grant the blocked lock (only
46662306a36Sopenharmony_ci	 * possible if converting list empty) */
46762306a36Sopenharmony_ci	if (can_grant) {
46862306a36Sopenharmony_ci		spin_lock(&target->spinlock);
46962306a36Sopenharmony_ci		BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci		mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, "
47262306a36Sopenharmony_ci		     "node %u\n", dlm->name, res->lockname.len,
47362306a36Sopenharmony_ci		     res->lockname.name,
47462306a36Sopenharmony_ci		     dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
47562306a36Sopenharmony_ci		     dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
47662306a36Sopenharmony_ci		     target->ml.type, target->ml.node);
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci		/* target->ml.type is already correct */
47962306a36Sopenharmony_ci		list_move_tail(&target->list, &res->granted);
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci		BUG_ON(!target->lksb);
48262306a36Sopenharmony_ci		target->lksb->status = DLM_NORMAL;
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci		spin_unlock(&target->spinlock);
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci		__dlm_lockres_reserve_ast(res);
48762306a36Sopenharmony_ci		__dlm_queue_ast(dlm, target);
48862306a36Sopenharmony_ci		/* go back and check for more */
48962306a36Sopenharmony_ci		goto converting;
49062306a36Sopenharmony_ci	}
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_cileave:
49362306a36Sopenharmony_ci	return;
49462306a36Sopenharmony_ci}
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci/* must have NO locks when calling this with res !=NULL * */
49762306a36Sopenharmony_civoid dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
49862306a36Sopenharmony_ci{
49962306a36Sopenharmony_ci	if (res) {
50062306a36Sopenharmony_ci		spin_lock(&dlm->spinlock);
50162306a36Sopenharmony_ci		spin_lock(&res->spinlock);
50262306a36Sopenharmony_ci		__dlm_dirty_lockres(dlm, res);
50362306a36Sopenharmony_ci		spin_unlock(&res->spinlock);
50462306a36Sopenharmony_ci		spin_unlock(&dlm->spinlock);
50562306a36Sopenharmony_ci	}
50662306a36Sopenharmony_ci	wake_up(&dlm->dlm_thread_wq);
50762306a36Sopenharmony_ci}
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_civoid __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
51062306a36Sopenharmony_ci{
51162306a36Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
51262306a36Sopenharmony_ci	assert_spin_locked(&res->spinlock);
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	/* don't shuffle secondary queues */
51562306a36Sopenharmony_ci	if (res->owner == dlm->node_num) {
51662306a36Sopenharmony_ci		if (res->state & (DLM_LOCK_RES_MIGRATING |
51762306a36Sopenharmony_ci				  DLM_LOCK_RES_BLOCK_DIRTY))
51862306a36Sopenharmony_ci		    return;
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci		if (list_empty(&res->dirty)) {
52162306a36Sopenharmony_ci			/* ref for dirty_list */
52262306a36Sopenharmony_ci			dlm_lockres_get(res);
52362306a36Sopenharmony_ci			list_add_tail(&res->dirty, &dlm->dirty_list);
52462306a36Sopenharmony_ci			res->state |= DLM_LOCK_RES_DIRTY;
52562306a36Sopenharmony_ci		}
52662306a36Sopenharmony_ci	}
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci	mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
52962306a36Sopenharmony_ci	     res->lockname.name);
53062306a36Sopenharmony_ci}
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci/* Launch the NM thread for the mounted volume */
53462306a36Sopenharmony_ciint dlm_launch_thread(struct dlm_ctxt *dlm)
53562306a36Sopenharmony_ci{
53662306a36Sopenharmony_ci	mlog(0, "Starting dlm_thread...\n");
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm-%s",
53962306a36Sopenharmony_ci			dlm->name);
54062306a36Sopenharmony_ci	if (IS_ERR(dlm->dlm_thread_task)) {
54162306a36Sopenharmony_ci		mlog_errno(PTR_ERR(dlm->dlm_thread_task));
54262306a36Sopenharmony_ci		dlm->dlm_thread_task = NULL;
54362306a36Sopenharmony_ci		return -EINVAL;
54462306a36Sopenharmony_ci	}
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci	return 0;
54762306a36Sopenharmony_ci}
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_civoid dlm_complete_thread(struct dlm_ctxt *dlm)
55062306a36Sopenharmony_ci{
55162306a36Sopenharmony_ci	if (dlm->dlm_thread_task) {
55262306a36Sopenharmony_ci		mlog(ML_KTHREAD, "Waiting for dlm thread to exit\n");
55362306a36Sopenharmony_ci		kthread_stop(dlm->dlm_thread_task);
55462306a36Sopenharmony_ci		dlm->dlm_thread_task = NULL;
55562306a36Sopenharmony_ci	}
55662306a36Sopenharmony_ci}
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_cistatic int dlm_dirty_list_empty(struct dlm_ctxt *dlm)
55962306a36Sopenharmony_ci{
56062306a36Sopenharmony_ci	int empty;
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci	spin_lock(&dlm->spinlock);
56362306a36Sopenharmony_ci	empty = list_empty(&dlm->dirty_list);
56462306a36Sopenharmony_ci	spin_unlock(&dlm->spinlock);
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci	return empty;
56762306a36Sopenharmony_ci}
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_cistatic void dlm_flush_asts(struct dlm_ctxt *dlm)
57062306a36Sopenharmony_ci{
57162306a36Sopenharmony_ci	int ret;
57262306a36Sopenharmony_ci	struct dlm_lock *lock;
57362306a36Sopenharmony_ci	struct dlm_lock_resource *res;
57462306a36Sopenharmony_ci	u8 hi;
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	spin_lock(&dlm->ast_lock);
57762306a36Sopenharmony_ci	while (!list_empty(&dlm->pending_asts)) {
57862306a36Sopenharmony_ci		lock = list_entry(dlm->pending_asts.next,
57962306a36Sopenharmony_ci				  struct dlm_lock, ast_list);
58062306a36Sopenharmony_ci		/* get an extra ref on lock */
58162306a36Sopenharmony_ci		dlm_lock_get(lock);
58262306a36Sopenharmony_ci		res = lock->lockres;
58362306a36Sopenharmony_ci		mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, "
58462306a36Sopenharmony_ci		     "node %u\n", dlm->name, res->lockname.len,
58562306a36Sopenharmony_ci		     res->lockname.name,
58662306a36Sopenharmony_ci		     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
58762306a36Sopenharmony_ci		     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
58862306a36Sopenharmony_ci		     lock->ml.type, lock->ml.node);
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci		BUG_ON(!lock->ast_pending);
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ci		/* remove from list (including ref) */
59362306a36Sopenharmony_ci		list_del_init(&lock->ast_list);
59462306a36Sopenharmony_ci		dlm_lock_put(lock);
59562306a36Sopenharmony_ci		spin_unlock(&dlm->ast_lock);
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci		if (lock->ml.node != dlm->node_num) {
59862306a36Sopenharmony_ci			ret = dlm_do_remote_ast(dlm, res, lock);
59962306a36Sopenharmony_ci			if (ret < 0)
60062306a36Sopenharmony_ci				mlog_errno(ret);
60162306a36Sopenharmony_ci		} else
60262306a36Sopenharmony_ci			dlm_do_local_ast(dlm, res, lock);
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci		spin_lock(&dlm->ast_lock);
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_ci		/* possible that another ast was queued while
60762306a36Sopenharmony_ci		 * we were delivering the last one */
60862306a36Sopenharmony_ci		if (!list_empty(&lock->ast_list)) {
60962306a36Sopenharmony_ci			mlog(0, "%s: res %.*s, AST queued while flushing last "
61062306a36Sopenharmony_ci			     "one\n", dlm->name, res->lockname.len,
61162306a36Sopenharmony_ci			     res->lockname.name);
61262306a36Sopenharmony_ci		} else
61362306a36Sopenharmony_ci			lock->ast_pending = 0;
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci		/* drop the extra ref.
61662306a36Sopenharmony_ci		 * this may drop it completely. */
61762306a36Sopenharmony_ci		dlm_lock_put(lock);
61862306a36Sopenharmony_ci		dlm_lockres_release_ast(dlm, res);
61962306a36Sopenharmony_ci	}
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	while (!list_empty(&dlm->pending_basts)) {
62262306a36Sopenharmony_ci		lock = list_entry(dlm->pending_basts.next,
62362306a36Sopenharmony_ci				  struct dlm_lock, bast_list);
62462306a36Sopenharmony_ci		/* get an extra ref on lock */
62562306a36Sopenharmony_ci		dlm_lock_get(lock);
62662306a36Sopenharmony_ci		res = lock->lockres;
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci		BUG_ON(!lock->bast_pending);
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci		/* get the highest blocked lock, and reset */
63162306a36Sopenharmony_ci		spin_lock(&lock->spinlock);
63262306a36Sopenharmony_ci		BUG_ON(lock->ml.highest_blocked <= LKM_IVMODE);
63362306a36Sopenharmony_ci		hi = lock->ml.highest_blocked;
63462306a36Sopenharmony_ci		lock->ml.highest_blocked = LKM_IVMODE;
63562306a36Sopenharmony_ci		spin_unlock(&lock->spinlock);
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci		/* remove from list (including ref) */
63862306a36Sopenharmony_ci		list_del_init(&lock->bast_list);
63962306a36Sopenharmony_ci		dlm_lock_put(lock);
64062306a36Sopenharmony_ci		spin_unlock(&dlm->ast_lock);
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci		mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, "
64362306a36Sopenharmony_ci		     "blocked %d, node %u\n",
64462306a36Sopenharmony_ci		     dlm->name, res->lockname.len, res->lockname.name,
64562306a36Sopenharmony_ci		     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
64662306a36Sopenharmony_ci		     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
64762306a36Sopenharmony_ci		     hi, lock->ml.node);
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_ci		if (lock->ml.node != dlm->node_num) {
65062306a36Sopenharmony_ci			ret = dlm_send_proxy_bast(dlm, res, lock, hi);
65162306a36Sopenharmony_ci			if (ret < 0)
65262306a36Sopenharmony_ci				mlog_errno(ret);
65362306a36Sopenharmony_ci		} else
65462306a36Sopenharmony_ci			dlm_do_local_bast(dlm, res, lock, hi);
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_ci		spin_lock(&dlm->ast_lock);
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci		/* possible that another bast was queued while
65962306a36Sopenharmony_ci		 * we were delivering the last one */
66062306a36Sopenharmony_ci		if (!list_empty(&lock->bast_list)) {
66162306a36Sopenharmony_ci			mlog(0, "%s: res %.*s, BAST queued while flushing last "
66262306a36Sopenharmony_ci			     "one\n", dlm->name, res->lockname.len,
66362306a36Sopenharmony_ci			     res->lockname.name);
66462306a36Sopenharmony_ci		} else
66562306a36Sopenharmony_ci			lock->bast_pending = 0;
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci		/* drop the extra ref.
66862306a36Sopenharmony_ci		 * this may drop it completely. */
66962306a36Sopenharmony_ci		dlm_lock_put(lock);
67062306a36Sopenharmony_ci		dlm_lockres_release_ast(dlm, res);
67162306a36Sopenharmony_ci	}
67262306a36Sopenharmony_ci	wake_up(&dlm->ast_wq);
67362306a36Sopenharmony_ci	spin_unlock(&dlm->ast_lock);
67462306a36Sopenharmony_ci}
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci#define DLM_THREAD_TIMEOUT_MS (4 * 1000)
67862306a36Sopenharmony_ci#define DLM_THREAD_MAX_DIRTY  100
67962306a36Sopenharmony_ci
68062306a36Sopenharmony_cistatic int dlm_thread(void *data)
68162306a36Sopenharmony_ci{
68262306a36Sopenharmony_ci	struct dlm_lock_resource *res;
68362306a36Sopenharmony_ci	struct dlm_ctxt *dlm = data;
68462306a36Sopenharmony_ci	unsigned long timeout = msecs_to_jiffies(DLM_THREAD_TIMEOUT_MS);
68562306a36Sopenharmony_ci
68662306a36Sopenharmony_ci	mlog(0, "dlm thread running for %s...\n", dlm->name);
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci	while (!kthread_should_stop()) {
68962306a36Sopenharmony_ci		int n = DLM_THREAD_MAX_DIRTY;
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci		/* dlm_shutting_down is very point-in-time, but that
69262306a36Sopenharmony_ci		 * doesn't matter as we'll just loop back around if we
69362306a36Sopenharmony_ci		 * get false on the leading edge of a state
69462306a36Sopenharmony_ci		 * transition. */
69562306a36Sopenharmony_ci		dlm_run_purge_list(dlm, dlm_shutting_down(dlm));
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci		/* We really don't want to hold dlm->spinlock while
69862306a36Sopenharmony_ci		 * calling dlm_shuffle_lists on each lockres that
69962306a36Sopenharmony_ci		 * needs to have its queues adjusted and AST/BASTs
70062306a36Sopenharmony_ci		 * run.  So let's pull each entry off the dirty_list
70162306a36Sopenharmony_ci		 * and drop dlm->spinlock ASAP.  Once off the list,
70262306a36Sopenharmony_ci		 * res->spinlock needs to be taken again to protect
70362306a36Sopenharmony_ci		 * the queues while calling dlm_shuffle_lists.  */
70462306a36Sopenharmony_ci		spin_lock(&dlm->spinlock);
70562306a36Sopenharmony_ci		while (!list_empty(&dlm->dirty_list)) {
70662306a36Sopenharmony_ci			int delay = 0;
70762306a36Sopenharmony_ci			res = list_entry(dlm->dirty_list.next,
70862306a36Sopenharmony_ci					 struct dlm_lock_resource, dirty);
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci			/* peel a lockres off, remove it from the list,
71162306a36Sopenharmony_ci			 * unset the dirty flag and drop the dlm lock */
71262306a36Sopenharmony_ci			BUG_ON(!res);
71362306a36Sopenharmony_ci			dlm_lockres_get(res);
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci			spin_lock(&res->spinlock);
71662306a36Sopenharmony_ci			/* We clear the DLM_LOCK_RES_DIRTY state once we shuffle lists below */
71762306a36Sopenharmony_ci			list_del_init(&res->dirty);
71862306a36Sopenharmony_ci			spin_unlock(&res->spinlock);
71962306a36Sopenharmony_ci			spin_unlock(&dlm->spinlock);
72062306a36Sopenharmony_ci			/* Drop dirty_list ref */
72162306a36Sopenharmony_ci			dlm_lockres_put(res);
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci		 	/* lockres can be re-dirtied/re-added to the
72462306a36Sopenharmony_ci			 * dirty_list in this gap, but that is ok */
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci			spin_lock(&dlm->ast_lock);
72762306a36Sopenharmony_ci			spin_lock(&res->spinlock);
72862306a36Sopenharmony_ci			if (res->owner != dlm->node_num) {
72962306a36Sopenharmony_ci				__dlm_print_one_lock_resource(res);
73062306a36Sopenharmony_ci				mlog(ML_ERROR, "%s: inprog %d, mig %d, reco %d,"
73162306a36Sopenharmony_ci				     " dirty %d\n", dlm->name,
73262306a36Sopenharmony_ci				     !!(res->state & DLM_LOCK_RES_IN_PROGRESS),
73362306a36Sopenharmony_ci				     !!(res->state & DLM_LOCK_RES_MIGRATING),
73462306a36Sopenharmony_ci				     !!(res->state & DLM_LOCK_RES_RECOVERING),
73562306a36Sopenharmony_ci				     !!(res->state & DLM_LOCK_RES_DIRTY));
73662306a36Sopenharmony_ci			}
73762306a36Sopenharmony_ci			BUG_ON(res->owner != dlm->node_num);
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci			/* it is now ok to move lockreses in these states
74062306a36Sopenharmony_ci			 * to the dirty list, assuming that they will only be
74162306a36Sopenharmony_ci			 * dirty for a short while. */
74262306a36Sopenharmony_ci			BUG_ON(res->state & DLM_LOCK_RES_MIGRATING);
74362306a36Sopenharmony_ci			if (res->state & (DLM_LOCK_RES_IN_PROGRESS |
74462306a36Sopenharmony_ci					  DLM_LOCK_RES_RECOVERING |
74562306a36Sopenharmony_ci					  DLM_LOCK_RES_RECOVERY_WAITING)) {
74662306a36Sopenharmony_ci				/* move it to the tail and keep going */
74762306a36Sopenharmony_ci				res->state &= ~DLM_LOCK_RES_DIRTY;
74862306a36Sopenharmony_ci				spin_unlock(&res->spinlock);
74962306a36Sopenharmony_ci				spin_unlock(&dlm->ast_lock);
75062306a36Sopenharmony_ci				mlog(0, "%s: res %.*s, inprogress, delay list "
75162306a36Sopenharmony_ci				     "shuffle, state %d\n", dlm->name,
75262306a36Sopenharmony_ci				     res->lockname.len, res->lockname.name,
75362306a36Sopenharmony_ci				     res->state);
75462306a36Sopenharmony_ci				delay = 1;
75562306a36Sopenharmony_ci				goto in_progress;
75662306a36Sopenharmony_ci			}
75762306a36Sopenharmony_ci
75862306a36Sopenharmony_ci			/* at this point the lockres is not migrating/
75962306a36Sopenharmony_ci			 * recovering/in-progress.  we have the lockres
76062306a36Sopenharmony_ci			 * spinlock and do NOT have the dlm lock.
76162306a36Sopenharmony_ci			 * safe to reserve/queue asts and run the lists. */
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci			/* called while holding lockres lock */
76462306a36Sopenharmony_ci			dlm_shuffle_lists(dlm, res);
76562306a36Sopenharmony_ci			res->state &= ~DLM_LOCK_RES_DIRTY;
76662306a36Sopenharmony_ci			spin_unlock(&res->spinlock);
76762306a36Sopenharmony_ci			spin_unlock(&dlm->ast_lock);
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ci			dlm_lockres_calc_usage(dlm, res);
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ciin_progress:
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci			spin_lock(&dlm->spinlock);
77462306a36Sopenharmony_ci			/* if the lock was in-progress, stick
77562306a36Sopenharmony_ci			 * it on the back of the list */
77662306a36Sopenharmony_ci			if (delay) {
77762306a36Sopenharmony_ci				spin_lock(&res->spinlock);
77862306a36Sopenharmony_ci				__dlm_dirty_lockres(dlm, res);
77962306a36Sopenharmony_ci				spin_unlock(&res->spinlock);
78062306a36Sopenharmony_ci			}
78162306a36Sopenharmony_ci			dlm_lockres_put(res);
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci			/* unlikely, but we may need to give time to
78462306a36Sopenharmony_ci			 * other tasks */
78562306a36Sopenharmony_ci			if (!--n) {
78662306a36Sopenharmony_ci				mlog(0, "%s: Throttling dlm thread\n",
78762306a36Sopenharmony_ci				     dlm->name);
78862306a36Sopenharmony_ci				break;
78962306a36Sopenharmony_ci			}
79062306a36Sopenharmony_ci		}
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_ci		spin_unlock(&dlm->spinlock);
79362306a36Sopenharmony_ci		dlm_flush_asts(dlm);
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ci		/* yield and continue right away if there is more work to do */
79662306a36Sopenharmony_ci		if (!n) {
79762306a36Sopenharmony_ci			cond_resched();
79862306a36Sopenharmony_ci			continue;
79962306a36Sopenharmony_ci		}
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci		wait_event_interruptible_timeout(dlm->dlm_thread_wq,
80262306a36Sopenharmony_ci						 !dlm_dirty_list_empty(dlm) ||
80362306a36Sopenharmony_ci						 kthread_should_stop(),
80462306a36Sopenharmony_ci						 timeout);
80562306a36Sopenharmony_ci	}
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci	mlog(0, "quitting DLM thread\n");
80862306a36Sopenharmony_ci	return 0;
80962306a36Sopenharmony_ci}
810