xref: /kernel/linux/linux-5.10/fs/ocfs2/dlm/dlmmaster.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*-
38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0:
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * dlmmod.c
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * standalone DLM module
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Copyright (C) 2004 Oracle.  All rights reserved.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <linux/module.h>
148c2ecf20Sopenharmony_ci#include <linux/fs.h>
158c2ecf20Sopenharmony_ci#include <linux/types.h>
168c2ecf20Sopenharmony_ci#include <linux/slab.h>
178c2ecf20Sopenharmony_ci#include <linux/highmem.h>
188c2ecf20Sopenharmony_ci#include <linux/init.h>
198c2ecf20Sopenharmony_ci#include <linux/sysctl.h>
208c2ecf20Sopenharmony_ci#include <linux/random.h>
218c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
228c2ecf20Sopenharmony_ci#include <linux/socket.h>
238c2ecf20Sopenharmony_ci#include <linux/inet.h>
248c2ecf20Sopenharmony_ci#include <linux/spinlock.h>
258c2ecf20Sopenharmony_ci#include <linux/delay.h>
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci#include "../cluster/heartbeat.h"
298c2ecf20Sopenharmony_ci#include "../cluster/nodemanager.h"
308c2ecf20Sopenharmony_ci#include "../cluster/tcp.h"
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci#include "dlmapi.h"
338c2ecf20Sopenharmony_ci#include "dlmcommon.h"
348c2ecf20Sopenharmony_ci#include "dlmdomain.h"
358c2ecf20Sopenharmony_ci#include "dlmdebug.h"
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER)
388c2ecf20Sopenharmony_ci#include "../cluster/masklog.h"
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_cistatic void dlm_mle_node_down(struct dlm_ctxt *dlm,
418c2ecf20Sopenharmony_ci			      struct dlm_master_list_entry *mle,
428c2ecf20Sopenharmony_ci			      struct o2nm_node *node,
438c2ecf20Sopenharmony_ci			      int idx);
448c2ecf20Sopenharmony_cistatic void dlm_mle_node_up(struct dlm_ctxt *dlm,
458c2ecf20Sopenharmony_ci			    struct dlm_master_list_entry *mle,
468c2ecf20Sopenharmony_ci			    struct o2nm_node *node,
478c2ecf20Sopenharmony_ci			    int idx);
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_cistatic void dlm_assert_master_worker(struct dlm_work_item *item, void *data);
508c2ecf20Sopenharmony_cistatic int dlm_do_assert_master(struct dlm_ctxt *dlm,
518c2ecf20Sopenharmony_ci				struct dlm_lock_resource *res,
528c2ecf20Sopenharmony_ci				void *nodemap, u32 flags);
538c2ecf20Sopenharmony_cistatic void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data);
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_cistatic inline int dlm_mle_equal(struct dlm_ctxt *dlm,
568c2ecf20Sopenharmony_ci				struct dlm_master_list_entry *mle,
578c2ecf20Sopenharmony_ci				const char *name,
588c2ecf20Sopenharmony_ci				unsigned int namelen)
598c2ecf20Sopenharmony_ci{
608c2ecf20Sopenharmony_ci	if (dlm != mle->dlm)
618c2ecf20Sopenharmony_ci		return 0;
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci	if (namelen != mle->mnamelen ||
648c2ecf20Sopenharmony_ci	    memcmp(name, mle->mname, namelen) != 0)
658c2ecf20Sopenharmony_ci		return 0;
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	return 1;
688c2ecf20Sopenharmony_ci}
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_cistatic struct kmem_cache *dlm_lockres_cache;
718c2ecf20Sopenharmony_cistatic struct kmem_cache *dlm_lockname_cache;
728c2ecf20Sopenharmony_cistatic struct kmem_cache *dlm_mle_cache;
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_cistatic void dlm_mle_release(struct kref *kref);
758c2ecf20Sopenharmony_cistatic void dlm_init_mle(struct dlm_master_list_entry *mle,
768c2ecf20Sopenharmony_ci			enum dlm_mle_type type,
778c2ecf20Sopenharmony_ci			struct dlm_ctxt *dlm,
788c2ecf20Sopenharmony_ci			struct dlm_lock_resource *res,
798c2ecf20Sopenharmony_ci			const char *name,
808c2ecf20Sopenharmony_ci			unsigned int namelen);
818c2ecf20Sopenharmony_cistatic void dlm_put_mle(struct dlm_master_list_entry *mle);
828c2ecf20Sopenharmony_cistatic void __dlm_put_mle(struct dlm_master_list_entry *mle);
838c2ecf20Sopenharmony_cistatic int dlm_find_mle(struct dlm_ctxt *dlm,
848c2ecf20Sopenharmony_ci			struct dlm_master_list_entry **mle,
858c2ecf20Sopenharmony_ci			char *name, unsigned int namelen);
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_cistatic int dlm_do_master_request(struct dlm_lock_resource *res,
888c2ecf20Sopenharmony_ci				 struct dlm_master_list_entry *mle, int to);
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_cistatic int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm,
928c2ecf20Sopenharmony_ci				     struct dlm_lock_resource *res,
938c2ecf20Sopenharmony_ci				     struct dlm_master_list_entry *mle,
948c2ecf20Sopenharmony_ci				     int *blocked);
958c2ecf20Sopenharmony_cistatic int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
968c2ecf20Sopenharmony_ci				    struct dlm_lock_resource *res,
978c2ecf20Sopenharmony_ci				    struct dlm_master_list_entry *mle,
988c2ecf20Sopenharmony_ci				    int blocked);
998c2ecf20Sopenharmony_cistatic int dlm_add_migration_mle(struct dlm_ctxt *dlm,
1008c2ecf20Sopenharmony_ci				 struct dlm_lock_resource *res,
1018c2ecf20Sopenharmony_ci				 struct dlm_master_list_entry *mle,
1028c2ecf20Sopenharmony_ci				 struct dlm_master_list_entry **oldmle,
1038c2ecf20Sopenharmony_ci				 const char *name, unsigned int namelen,
1048c2ecf20Sopenharmony_ci				 u8 new_master, u8 master);
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_cistatic u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
1078c2ecf20Sopenharmony_ci				    struct dlm_lock_resource *res);
1088c2ecf20Sopenharmony_cistatic void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
1098c2ecf20Sopenharmony_ci				      struct dlm_lock_resource *res);
1108c2ecf20Sopenharmony_cistatic int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
1118c2ecf20Sopenharmony_ci				       struct dlm_lock_resource *res,
1128c2ecf20Sopenharmony_ci				       u8 target);
1138c2ecf20Sopenharmony_cistatic int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
1148c2ecf20Sopenharmony_ci				       struct dlm_lock_resource *res);
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ciint dlm_is_host_down(int errno)
1188c2ecf20Sopenharmony_ci{
1198c2ecf20Sopenharmony_ci	switch (errno) {
1208c2ecf20Sopenharmony_ci		case -EBADF:
1218c2ecf20Sopenharmony_ci		case -ECONNREFUSED:
1228c2ecf20Sopenharmony_ci		case -ENOTCONN:
1238c2ecf20Sopenharmony_ci		case -ECONNRESET:
1248c2ecf20Sopenharmony_ci		case -EPIPE:
1258c2ecf20Sopenharmony_ci		case -EHOSTDOWN:
1268c2ecf20Sopenharmony_ci		case -EHOSTUNREACH:
1278c2ecf20Sopenharmony_ci		case -ETIMEDOUT:
1288c2ecf20Sopenharmony_ci		case -ECONNABORTED:
1298c2ecf20Sopenharmony_ci		case -ENETDOWN:
1308c2ecf20Sopenharmony_ci		case -ENETUNREACH:
1318c2ecf20Sopenharmony_ci		case -ENETRESET:
1328c2ecf20Sopenharmony_ci		case -ESHUTDOWN:
1338c2ecf20Sopenharmony_ci		case -ENOPROTOOPT:
1348c2ecf20Sopenharmony_ci		case -EINVAL:   /* if returned from our tcp code,
1358c2ecf20Sopenharmony_ci				   this means there is no socket */
1368c2ecf20Sopenharmony_ci			return 1;
1378c2ecf20Sopenharmony_ci	}
1388c2ecf20Sopenharmony_ci	return 0;
1398c2ecf20Sopenharmony_ci}
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci/*
1438c2ecf20Sopenharmony_ci * MASTER LIST FUNCTIONS
1448c2ecf20Sopenharmony_ci */
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci/*
1488c2ecf20Sopenharmony_ci * regarding master list entries and heartbeat callbacks:
1498c2ecf20Sopenharmony_ci *
1508c2ecf20Sopenharmony_ci * in order to avoid sleeping and allocation that occurs in
1518c2ecf20Sopenharmony_ci * heartbeat, master list entries are simply attached to the
1528c2ecf20Sopenharmony_ci * dlm's established heartbeat callbacks.  the mle is attached
1538c2ecf20Sopenharmony_ci * when it is created, and since the dlm->spinlock is held at
1548c2ecf20Sopenharmony_ci * that time, any heartbeat event will be properly discovered
1558c2ecf20Sopenharmony_ci * by the mle.  the mle needs to be detached from the
1568c2ecf20Sopenharmony_ci * dlm->mle_hb_events list as soon as heartbeat events are no
1578c2ecf20Sopenharmony_ci * longer useful to the mle, and before the mle is freed.
1588c2ecf20Sopenharmony_ci *
1598c2ecf20Sopenharmony_ci * as a general rule, heartbeat events are no longer needed by
1608c2ecf20Sopenharmony_ci * the mle once an "answer" regarding the lock master has been
1618c2ecf20Sopenharmony_ci * received.
1628c2ecf20Sopenharmony_ci */
1638c2ecf20Sopenharmony_cistatic inline void __dlm_mle_attach_hb_events(struct dlm_ctxt *dlm,
1648c2ecf20Sopenharmony_ci					      struct dlm_master_list_entry *mle)
1658c2ecf20Sopenharmony_ci{
1668c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	list_add_tail(&mle->hb_events, &dlm->mle_hb_events);
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_cistatic inline void __dlm_mle_detach_hb_events(struct dlm_ctxt *dlm,
1738c2ecf20Sopenharmony_ci					      struct dlm_master_list_entry *mle)
1748c2ecf20Sopenharmony_ci{
1758c2ecf20Sopenharmony_ci	if (!list_empty(&mle->hb_events))
1768c2ecf20Sopenharmony_ci		list_del_init(&mle->hb_events);
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_cistatic inline void dlm_mle_detach_hb_events(struct dlm_ctxt *dlm,
1818c2ecf20Sopenharmony_ci					    struct dlm_master_list_entry *mle)
1828c2ecf20Sopenharmony_ci{
1838c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
1848c2ecf20Sopenharmony_ci	__dlm_mle_detach_hb_events(dlm, mle);
1858c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
1868c2ecf20Sopenharmony_ci}
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_cistatic void dlm_get_mle_inuse(struct dlm_master_list_entry *mle)
1898c2ecf20Sopenharmony_ci{
1908c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm;
1918c2ecf20Sopenharmony_ci	dlm = mle->dlm;
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
1948c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->master_lock);
1958c2ecf20Sopenharmony_ci	mle->inuse++;
1968c2ecf20Sopenharmony_ci	kref_get(&mle->mle_refs);
1978c2ecf20Sopenharmony_ci}
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_cistatic void dlm_put_mle_inuse(struct dlm_master_list_entry *mle)
2008c2ecf20Sopenharmony_ci{
2018c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm;
2028c2ecf20Sopenharmony_ci	dlm = mle->dlm;
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
2058c2ecf20Sopenharmony_ci	spin_lock(&dlm->master_lock);
2068c2ecf20Sopenharmony_ci	mle->inuse--;
2078c2ecf20Sopenharmony_ci	__dlm_put_mle(mle);
2088c2ecf20Sopenharmony_ci	spin_unlock(&dlm->master_lock);
2098c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_ci}
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci/* remove from list and free */
2148c2ecf20Sopenharmony_cistatic void __dlm_put_mle(struct dlm_master_list_entry *mle)
2158c2ecf20Sopenharmony_ci{
2168c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm;
2178c2ecf20Sopenharmony_ci	dlm = mle->dlm;
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
2208c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->master_lock);
2218c2ecf20Sopenharmony_ci	if (!kref_read(&mle->mle_refs)) {
2228c2ecf20Sopenharmony_ci		/* this may or may not crash, but who cares.
2238c2ecf20Sopenharmony_ci		 * it's a BUG. */
2248c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "bad mle: %p\n", mle);
2258c2ecf20Sopenharmony_ci		dlm_print_one_mle(mle);
2268c2ecf20Sopenharmony_ci		BUG();
2278c2ecf20Sopenharmony_ci	} else
2288c2ecf20Sopenharmony_ci		kref_put(&mle->mle_refs, dlm_mle_release);
2298c2ecf20Sopenharmony_ci}
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci/* must not have any spinlocks coming in */
2338c2ecf20Sopenharmony_cistatic void dlm_put_mle(struct dlm_master_list_entry *mle)
2348c2ecf20Sopenharmony_ci{
2358c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm;
2368c2ecf20Sopenharmony_ci	dlm = mle->dlm;
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
2398c2ecf20Sopenharmony_ci	spin_lock(&dlm->master_lock);
2408c2ecf20Sopenharmony_ci	__dlm_put_mle(mle);
2418c2ecf20Sopenharmony_ci	spin_unlock(&dlm->master_lock);
2428c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
2438c2ecf20Sopenharmony_ci}
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_cistatic inline void dlm_get_mle(struct dlm_master_list_entry *mle)
2468c2ecf20Sopenharmony_ci{
2478c2ecf20Sopenharmony_ci	kref_get(&mle->mle_refs);
2488c2ecf20Sopenharmony_ci}
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_cistatic void dlm_init_mle(struct dlm_master_list_entry *mle,
2518c2ecf20Sopenharmony_ci			enum dlm_mle_type type,
2528c2ecf20Sopenharmony_ci			struct dlm_ctxt *dlm,
2538c2ecf20Sopenharmony_ci			struct dlm_lock_resource *res,
2548c2ecf20Sopenharmony_ci			const char *name,
2558c2ecf20Sopenharmony_ci			unsigned int namelen)
2568c2ecf20Sopenharmony_ci{
2578c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	mle->dlm = dlm;
2608c2ecf20Sopenharmony_ci	mle->type = type;
2618c2ecf20Sopenharmony_ci	INIT_HLIST_NODE(&mle->master_hash_node);
2628c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&mle->hb_events);
2638c2ecf20Sopenharmony_ci	memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
2648c2ecf20Sopenharmony_ci	spin_lock_init(&mle->spinlock);
2658c2ecf20Sopenharmony_ci	init_waitqueue_head(&mle->wq);
2668c2ecf20Sopenharmony_ci	atomic_set(&mle->woken, 0);
2678c2ecf20Sopenharmony_ci	kref_init(&mle->mle_refs);
2688c2ecf20Sopenharmony_ci	memset(mle->response_map, 0, sizeof(mle->response_map));
2698c2ecf20Sopenharmony_ci	mle->master = O2NM_MAX_NODES;
2708c2ecf20Sopenharmony_ci	mle->new_master = O2NM_MAX_NODES;
2718c2ecf20Sopenharmony_ci	mle->inuse = 0;
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	BUG_ON(mle->type != DLM_MLE_BLOCK &&
2748c2ecf20Sopenharmony_ci	       mle->type != DLM_MLE_MASTER &&
2758c2ecf20Sopenharmony_ci	       mle->type != DLM_MLE_MIGRATION);
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci	if (mle->type == DLM_MLE_MASTER) {
2788c2ecf20Sopenharmony_ci		BUG_ON(!res);
2798c2ecf20Sopenharmony_ci		mle->mleres = res;
2808c2ecf20Sopenharmony_ci		memcpy(mle->mname, res->lockname.name, res->lockname.len);
2818c2ecf20Sopenharmony_ci		mle->mnamelen = res->lockname.len;
2828c2ecf20Sopenharmony_ci		mle->mnamehash = res->lockname.hash;
2838c2ecf20Sopenharmony_ci	} else {
2848c2ecf20Sopenharmony_ci		BUG_ON(!name);
2858c2ecf20Sopenharmony_ci		mle->mleres = NULL;
2868c2ecf20Sopenharmony_ci		memcpy(mle->mname, name, namelen);
2878c2ecf20Sopenharmony_ci		mle->mnamelen = namelen;
2888c2ecf20Sopenharmony_ci		mle->mnamehash = dlm_lockid_hash(name, namelen);
2898c2ecf20Sopenharmony_ci	}
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	atomic_inc(&dlm->mle_tot_count[mle->type]);
2928c2ecf20Sopenharmony_ci	atomic_inc(&dlm->mle_cur_count[mle->type]);
2938c2ecf20Sopenharmony_ci
2948c2ecf20Sopenharmony_ci	/* copy off the node_map and register hb callbacks on our copy */
2958c2ecf20Sopenharmony_ci	memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map));
2968c2ecf20Sopenharmony_ci	memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map));
2978c2ecf20Sopenharmony_ci	clear_bit(dlm->node_num, mle->vote_map);
2988c2ecf20Sopenharmony_ci	clear_bit(dlm->node_num, mle->node_map);
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci	/* attach the mle to the domain node up/down events */
3018c2ecf20Sopenharmony_ci	__dlm_mle_attach_hb_events(dlm, mle);
3028c2ecf20Sopenharmony_ci}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_civoid __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle)
3058c2ecf20Sopenharmony_ci{
3068c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
3078c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->master_lock);
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	if (!hlist_unhashed(&mle->master_hash_node))
3108c2ecf20Sopenharmony_ci		hlist_del_init(&mle->master_hash_node);
3118c2ecf20Sopenharmony_ci}
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_civoid __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle)
3148c2ecf20Sopenharmony_ci{
3158c2ecf20Sopenharmony_ci	struct hlist_head *bucket;
3168c2ecf20Sopenharmony_ci
3178c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->master_lock);
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci	bucket = dlm_master_hash(dlm, mle->mnamehash);
3208c2ecf20Sopenharmony_ci	hlist_add_head(&mle->master_hash_node, bucket);
3218c2ecf20Sopenharmony_ci}
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci/* returns 1 if found, 0 if not */
3248c2ecf20Sopenharmony_cistatic int dlm_find_mle(struct dlm_ctxt *dlm,
3258c2ecf20Sopenharmony_ci			struct dlm_master_list_entry **mle,
3268c2ecf20Sopenharmony_ci			char *name, unsigned int namelen)
3278c2ecf20Sopenharmony_ci{
3288c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *tmpmle;
3298c2ecf20Sopenharmony_ci	struct hlist_head *bucket;
3308c2ecf20Sopenharmony_ci	unsigned int hash;
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->master_lock);
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci	hash = dlm_lockid_hash(name, namelen);
3358c2ecf20Sopenharmony_ci	bucket = dlm_master_hash(dlm, hash);
3368c2ecf20Sopenharmony_ci	hlist_for_each_entry(tmpmle, bucket, master_hash_node) {
3378c2ecf20Sopenharmony_ci		if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
3388c2ecf20Sopenharmony_ci			continue;
3398c2ecf20Sopenharmony_ci		dlm_get_mle(tmpmle);
3408c2ecf20Sopenharmony_ci		*mle = tmpmle;
3418c2ecf20Sopenharmony_ci		return 1;
3428c2ecf20Sopenharmony_ci	}
3438c2ecf20Sopenharmony_ci	return 0;
3448c2ecf20Sopenharmony_ci}
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_civoid dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
3478c2ecf20Sopenharmony_ci{
3488c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *mle;
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci	list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
3538c2ecf20Sopenharmony_ci		if (node_up)
3548c2ecf20Sopenharmony_ci			dlm_mle_node_up(dlm, mle, NULL, idx);
3558c2ecf20Sopenharmony_ci		else
3568c2ecf20Sopenharmony_ci			dlm_mle_node_down(dlm, mle, NULL, idx);
3578c2ecf20Sopenharmony_ci	}
3588c2ecf20Sopenharmony_ci}
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_cistatic void dlm_mle_node_down(struct dlm_ctxt *dlm,
3618c2ecf20Sopenharmony_ci			      struct dlm_master_list_entry *mle,
3628c2ecf20Sopenharmony_ci			      struct o2nm_node *node, int idx)
3638c2ecf20Sopenharmony_ci{
3648c2ecf20Sopenharmony_ci	spin_lock(&mle->spinlock);
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	if (!test_bit(idx, mle->node_map))
3678c2ecf20Sopenharmony_ci		mlog(0, "node %u already removed from nodemap!\n", idx);
3688c2ecf20Sopenharmony_ci	else
3698c2ecf20Sopenharmony_ci		clear_bit(idx, mle->node_map);
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci	spin_unlock(&mle->spinlock);
3728c2ecf20Sopenharmony_ci}
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_cistatic void dlm_mle_node_up(struct dlm_ctxt *dlm,
3758c2ecf20Sopenharmony_ci			    struct dlm_master_list_entry *mle,
3768c2ecf20Sopenharmony_ci			    struct o2nm_node *node, int idx)
3778c2ecf20Sopenharmony_ci{
3788c2ecf20Sopenharmony_ci	spin_lock(&mle->spinlock);
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	if (test_bit(idx, mle->node_map))
3818c2ecf20Sopenharmony_ci		mlog(0, "node %u already in node map!\n", idx);
3828c2ecf20Sopenharmony_ci	else
3838c2ecf20Sopenharmony_ci		set_bit(idx, mle->node_map);
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_ci	spin_unlock(&mle->spinlock);
3868c2ecf20Sopenharmony_ci}
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci
3898c2ecf20Sopenharmony_ciint dlm_init_mle_cache(void)
3908c2ecf20Sopenharmony_ci{
3918c2ecf20Sopenharmony_ci	dlm_mle_cache = kmem_cache_create("o2dlm_mle",
3928c2ecf20Sopenharmony_ci					  sizeof(struct dlm_master_list_entry),
3938c2ecf20Sopenharmony_ci					  0, SLAB_HWCACHE_ALIGN,
3948c2ecf20Sopenharmony_ci					  NULL);
3958c2ecf20Sopenharmony_ci	if (dlm_mle_cache == NULL)
3968c2ecf20Sopenharmony_ci		return -ENOMEM;
3978c2ecf20Sopenharmony_ci	return 0;
3988c2ecf20Sopenharmony_ci}
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_civoid dlm_destroy_mle_cache(void)
4018c2ecf20Sopenharmony_ci{
4028c2ecf20Sopenharmony_ci	kmem_cache_destroy(dlm_mle_cache);
4038c2ecf20Sopenharmony_ci}
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_cistatic void dlm_mle_release(struct kref *kref)
4068c2ecf20Sopenharmony_ci{
4078c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *mle;
4088c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm;
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci	mle = container_of(kref, struct dlm_master_list_entry, mle_refs);
4118c2ecf20Sopenharmony_ci	dlm = mle->dlm;
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
4148c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->master_lock);
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci	mlog(0, "Releasing mle for %.*s, type %d\n", mle->mnamelen, mle->mname,
4178c2ecf20Sopenharmony_ci	     mle->type);
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ci	/* remove from list if not already */
4208c2ecf20Sopenharmony_ci	__dlm_unlink_mle(dlm, mle);
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci	/* detach the mle from the domain node up/down events */
4238c2ecf20Sopenharmony_ci	__dlm_mle_detach_hb_events(dlm, mle);
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	atomic_dec(&dlm->mle_cur_count[mle->type]);
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_ci	/* NOTE: kfree under spinlock here.
4288c2ecf20Sopenharmony_ci	 * if this is bad, we can move this to a freelist. */
4298c2ecf20Sopenharmony_ci	kmem_cache_free(dlm_mle_cache, mle);
4308c2ecf20Sopenharmony_ci}
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci/*
4348c2ecf20Sopenharmony_ci * LOCK RESOURCE FUNCTIONS
4358c2ecf20Sopenharmony_ci */
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ciint dlm_init_master_caches(void)
4388c2ecf20Sopenharmony_ci{
4398c2ecf20Sopenharmony_ci	dlm_lockres_cache = kmem_cache_create("o2dlm_lockres",
4408c2ecf20Sopenharmony_ci					      sizeof(struct dlm_lock_resource),
4418c2ecf20Sopenharmony_ci					      0, SLAB_HWCACHE_ALIGN, NULL);
4428c2ecf20Sopenharmony_ci	if (!dlm_lockres_cache)
4438c2ecf20Sopenharmony_ci		goto bail;
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_ci	dlm_lockname_cache = kmem_cache_create("o2dlm_lockname",
4468c2ecf20Sopenharmony_ci					       DLM_LOCKID_NAME_MAX, 0,
4478c2ecf20Sopenharmony_ci					       SLAB_HWCACHE_ALIGN, NULL);
4488c2ecf20Sopenharmony_ci	if (!dlm_lockname_cache)
4498c2ecf20Sopenharmony_ci		goto bail;
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	return 0;
4528c2ecf20Sopenharmony_cibail:
4538c2ecf20Sopenharmony_ci	dlm_destroy_master_caches();
4548c2ecf20Sopenharmony_ci	return -ENOMEM;
4558c2ecf20Sopenharmony_ci}
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_civoid dlm_destroy_master_caches(void)
4588c2ecf20Sopenharmony_ci{
4598c2ecf20Sopenharmony_ci	kmem_cache_destroy(dlm_lockname_cache);
4608c2ecf20Sopenharmony_ci	dlm_lockname_cache = NULL;
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci	kmem_cache_destroy(dlm_lockres_cache);
4638c2ecf20Sopenharmony_ci	dlm_lockres_cache = NULL;
4648c2ecf20Sopenharmony_ci}
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_cistatic void dlm_lockres_release(struct kref *kref)
4678c2ecf20Sopenharmony_ci{
4688c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res;
4698c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm;
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci	res = container_of(kref, struct dlm_lock_resource, refs);
4728c2ecf20Sopenharmony_ci	dlm = res->dlm;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	/* This should not happen -- all lockres' have a name
4758c2ecf20Sopenharmony_ci	 * associated with them at init time. */
4768c2ecf20Sopenharmony_ci	BUG_ON(!res->lockname.name);
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	mlog(0, "destroying lockres %.*s\n", res->lockname.len,
4798c2ecf20Sopenharmony_ci	     res->lockname.name);
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_ci	atomic_dec(&dlm->res_cur_count);
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_ci	if (!hlist_unhashed(&res->hash_node) ||
4848c2ecf20Sopenharmony_ci	    !list_empty(&res->granted) ||
4858c2ecf20Sopenharmony_ci	    !list_empty(&res->converting) ||
4868c2ecf20Sopenharmony_ci	    !list_empty(&res->blocked) ||
4878c2ecf20Sopenharmony_ci	    !list_empty(&res->dirty) ||
4888c2ecf20Sopenharmony_ci	    !list_empty(&res->recovering) ||
4898c2ecf20Sopenharmony_ci	    !list_empty(&res->purge)) {
4908c2ecf20Sopenharmony_ci		mlog(ML_ERROR,
4918c2ecf20Sopenharmony_ci		     "Going to BUG for resource %.*s."
4928c2ecf20Sopenharmony_ci		     "  We're on a list! [%c%c%c%c%c%c%c]\n",
4938c2ecf20Sopenharmony_ci		     res->lockname.len, res->lockname.name,
4948c2ecf20Sopenharmony_ci		     !hlist_unhashed(&res->hash_node) ? 'H' : ' ',
4958c2ecf20Sopenharmony_ci		     !list_empty(&res->granted) ? 'G' : ' ',
4968c2ecf20Sopenharmony_ci		     !list_empty(&res->converting) ? 'C' : ' ',
4978c2ecf20Sopenharmony_ci		     !list_empty(&res->blocked) ? 'B' : ' ',
4988c2ecf20Sopenharmony_ci		     !list_empty(&res->dirty) ? 'D' : ' ',
4998c2ecf20Sopenharmony_ci		     !list_empty(&res->recovering) ? 'R' : ' ',
5008c2ecf20Sopenharmony_ci		     !list_empty(&res->purge) ? 'P' : ' ');
5018c2ecf20Sopenharmony_ci
5028c2ecf20Sopenharmony_ci		dlm_print_one_lock_resource(res);
5038c2ecf20Sopenharmony_ci	}
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	/* By the time we're ready to blow this guy away, we shouldn't
5068c2ecf20Sopenharmony_ci	 * be on any lists. */
5078c2ecf20Sopenharmony_ci	BUG_ON(!hlist_unhashed(&res->hash_node));
5088c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&res->granted));
5098c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&res->converting));
5108c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&res->blocked));
5118c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&res->dirty));
5128c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&res->recovering));
5138c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&res->purge));
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci	kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name);
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci	kmem_cache_free(dlm_lockres_cache, res);
5188c2ecf20Sopenharmony_ci}
5198c2ecf20Sopenharmony_ci
5208c2ecf20Sopenharmony_civoid dlm_lockres_put(struct dlm_lock_resource *res)
5218c2ecf20Sopenharmony_ci{
5228c2ecf20Sopenharmony_ci	kref_put(&res->refs, dlm_lockres_release);
5238c2ecf20Sopenharmony_ci}
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_cistatic void dlm_init_lockres(struct dlm_ctxt *dlm,
5268c2ecf20Sopenharmony_ci			     struct dlm_lock_resource *res,
5278c2ecf20Sopenharmony_ci			     const char *name, unsigned int namelen)
5288c2ecf20Sopenharmony_ci{
5298c2ecf20Sopenharmony_ci	char *qname;
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci	/* If we memset here, we lose our reference to the kmalloc'd
5328c2ecf20Sopenharmony_ci	 * res->lockname.name, so be sure to init every field
5338c2ecf20Sopenharmony_ci	 * correctly! */
5348c2ecf20Sopenharmony_ci
5358c2ecf20Sopenharmony_ci	qname = (char *) res->lockname.name;
5368c2ecf20Sopenharmony_ci	memcpy(qname, name, namelen);
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci	res->lockname.len = namelen;
5398c2ecf20Sopenharmony_ci	res->lockname.hash = dlm_lockid_hash(name, namelen);
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_ci	init_waitqueue_head(&res->wq);
5428c2ecf20Sopenharmony_ci	spin_lock_init(&res->spinlock);
5438c2ecf20Sopenharmony_ci	INIT_HLIST_NODE(&res->hash_node);
5448c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&res->granted);
5458c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&res->converting);
5468c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&res->blocked);
5478c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&res->dirty);
5488c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&res->recovering);
5498c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&res->purge);
5508c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&res->tracking);
5518c2ecf20Sopenharmony_ci	atomic_set(&res->asts_reserved, 0);
5528c2ecf20Sopenharmony_ci	res->migration_pending = 0;
5538c2ecf20Sopenharmony_ci	res->inflight_locks = 0;
5548c2ecf20Sopenharmony_ci	res->inflight_assert_workers = 0;
5558c2ecf20Sopenharmony_ci
5568c2ecf20Sopenharmony_ci	res->dlm = dlm;
5578c2ecf20Sopenharmony_ci
5588c2ecf20Sopenharmony_ci	kref_init(&res->refs);
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci	atomic_inc(&dlm->res_tot_count);
5618c2ecf20Sopenharmony_ci	atomic_inc(&dlm->res_cur_count);
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_ci	/* just for consistency */
5648c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
5658c2ecf20Sopenharmony_ci	dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN);
5668c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci	res->state = DLM_LOCK_RES_IN_PROGRESS;
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci	res->last_used = 0;
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci	spin_lock(&dlm->track_lock);
5738c2ecf20Sopenharmony_ci	list_add_tail(&res->tracking, &dlm->tracking_list);
5748c2ecf20Sopenharmony_ci	spin_unlock(&dlm->track_lock);
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_ci	memset(res->lvb, 0, DLM_LVB_LEN);
5778c2ecf20Sopenharmony_ci	memset(res->refmap, 0, sizeof(res->refmap));
5788c2ecf20Sopenharmony_ci}
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_cistruct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
5818c2ecf20Sopenharmony_ci				   const char *name,
5828c2ecf20Sopenharmony_ci				   unsigned int namelen)
5838c2ecf20Sopenharmony_ci{
5848c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res = NULL;
5858c2ecf20Sopenharmony_ci
5868c2ecf20Sopenharmony_ci	res = kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
5878c2ecf20Sopenharmony_ci	if (!res)
5888c2ecf20Sopenharmony_ci		goto error;
5898c2ecf20Sopenharmony_ci
5908c2ecf20Sopenharmony_ci	res->lockname.name = kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
5918c2ecf20Sopenharmony_ci	if (!res->lockname.name)
5928c2ecf20Sopenharmony_ci		goto error;
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ci	dlm_init_lockres(dlm, res, name, namelen);
5958c2ecf20Sopenharmony_ci	return res;
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_cierror:
5988c2ecf20Sopenharmony_ci	if (res)
5998c2ecf20Sopenharmony_ci		kmem_cache_free(dlm_lockres_cache, res);
6008c2ecf20Sopenharmony_ci	return NULL;
6018c2ecf20Sopenharmony_ci}
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_civoid dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm,
6048c2ecf20Sopenharmony_ci				struct dlm_lock_resource *res, int bit)
6058c2ecf20Sopenharmony_ci{
6068c2ecf20Sopenharmony_ci	assert_spin_locked(&res->spinlock);
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ci	mlog(0, "res %.*s, set node %u, %ps()\n", res->lockname.len,
6098c2ecf20Sopenharmony_ci	     res->lockname.name, bit, __builtin_return_address(0));
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_ci	set_bit(bit, res->refmap);
6128c2ecf20Sopenharmony_ci}
6138c2ecf20Sopenharmony_ci
6148c2ecf20Sopenharmony_civoid dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm,
6158c2ecf20Sopenharmony_ci				  struct dlm_lock_resource *res, int bit)
6168c2ecf20Sopenharmony_ci{
6178c2ecf20Sopenharmony_ci	assert_spin_locked(&res->spinlock);
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci	mlog(0, "res %.*s, clr node %u, %ps()\n", res->lockname.len,
6208c2ecf20Sopenharmony_ci	     res->lockname.name, bit, __builtin_return_address(0));
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci	clear_bit(bit, res->refmap);
6238c2ecf20Sopenharmony_ci}
6248c2ecf20Sopenharmony_ci
6258c2ecf20Sopenharmony_cistatic void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
6268c2ecf20Sopenharmony_ci				   struct dlm_lock_resource *res)
6278c2ecf20Sopenharmony_ci{
6288c2ecf20Sopenharmony_ci	res->inflight_locks++;
6298c2ecf20Sopenharmony_ci
6308c2ecf20Sopenharmony_ci	mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name,
6318c2ecf20Sopenharmony_ci	     res->lockname.len, res->lockname.name, res->inflight_locks,
6328c2ecf20Sopenharmony_ci	     __builtin_return_address(0));
6338c2ecf20Sopenharmony_ci}
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_civoid dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
6368c2ecf20Sopenharmony_ci				   struct dlm_lock_resource *res)
6378c2ecf20Sopenharmony_ci{
6388c2ecf20Sopenharmony_ci	assert_spin_locked(&res->spinlock);
6398c2ecf20Sopenharmony_ci	__dlm_lockres_grab_inflight_ref(dlm, res);
6408c2ecf20Sopenharmony_ci}
6418c2ecf20Sopenharmony_ci
6428c2ecf20Sopenharmony_civoid dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
6438c2ecf20Sopenharmony_ci				   struct dlm_lock_resource *res)
6448c2ecf20Sopenharmony_ci{
6458c2ecf20Sopenharmony_ci	assert_spin_locked(&res->spinlock);
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci	BUG_ON(res->inflight_locks == 0);
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_ci	res->inflight_locks--;
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_ci	mlog(0, "%s: res %.*s, inflight--: now %u, %ps()\n", dlm->name,
6528c2ecf20Sopenharmony_ci	     res->lockname.len, res->lockname.name, res->inflight_locks,
6538c2ecf20Sopenharmony_ci	     __builtin_return_address(0));
6548c2ecf20Sopenharmony_ci
6558c2ecf20Sopenharmony_ci	wake_up(&res->wq);
6568c2ecf20Sopenharmony_ci}
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_civoid __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
6598c2ecf20Sopenharmony_ci		struct dlm_lock_resource *res)
6608c2ecf20Sopenharmony_ci{
6618c2ecf20Sopenharmony_ci	assert_spin_locked(&res->spinlock);
6628c2ecf20Sopenharmony_ci	res->inflight_assert_workers++;
6638c2ecf20Sopenharmony_ci	mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
6648c2ecf20Sopenharmony_ci			dlm->name, res->lockname.len, res->lockname.name,
6658c2ecf20Sopenharmony_ci			res->inflight_assert_workers);
6668c2ecf20Sopenharmony_ci}
6678c2ecf20Sopenharmony_ci
6688c2ecf20Sopenharmony_cistatic void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
6698c2ecf20Sopenharmony_ci		struct dlm_lock_resource *res)
6708c2ecf20Sopenharmony_ci{
6718c2ecf20Sopenharmony_ci	assert_spin_locked(&res->spinlock);
6728c2ecf20Sopenharmony_ci	BUG_ON(res->inflight_assert_workers == 0);
6738c2ecf20Sopenharmony_ci	res->inflight_assert_workers--;
6748c2ecf20Sopenharmony_ci	mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
6758c2ecf20Sopenharmony_ci			dlm->name, res->lockname.len, res->lockname.name,
6768c2ecf20Sopenharmony_ci			res->inflight_assert_workers);
6778c2ecf20Sopenharmony_ci}
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_cistatic void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
6808c2ecf20Sopenharmony_ci		struct dlm_lock_resource *res)
6818c2ecf20Sopenharmony_ci{
6828c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
6838c2ecf20Sopenharmony_ci	__dlm_lockres_drop_inflight_worker(dlm, res);
6848c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
6858c2ecf20Sopenharmony_ci}
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ci/*
6888c2ecf20Sopenharmony_ci * lookup a lock resource by name.
6898c2ecf20Sopenharmony_ci * may already exist in the hashtable.
6908c2ecf20Sopenharmony_ci * lockid is null terminated
6918c2ecf20Sopenharmony_ci *
6928c2ecf20Sopenharmony_ci * if not, allocate enough for the lockres and for
6938c2ecf20Sopenharmony_ci * the temporary structure used in doing the mastering.
6948c2ecf20Sopenharmony_ci *
6958c2ecf20Sopenharmony_ci * also, do a lookup in the dlm->master_list to see
6968c2ecf20Sopenharmony_ci * if another node has begun mastering the same lock.
6978c2ecf20Sopenharmony_ci * if so, there should be a block entry in there
6988c2ecf20Sopenharmony_ci * for this name, and we should *not* attempt to master
6998c2ecf20Sopenharmony_ci * the lock here.   need to wait around for that node
7008c2ecf20Sopenharmony_ci * to assert_master (or die).
7018c2ecf20Sopenharmony_ci *
7028c2ecf20Sopenharmony_ci */
7038c2ecf20Sopenharmony_cistruct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
7048c2ecf20Sopenharmony_ci					  const char *lockid,
7058c2ecf20Sopenharmony_ci					  int namelen,
7068c2ecf20Sopenharmony_ci					  int flags)
7078c2ecf20Sopenharmony_ci{
7088c2ecf20Sopenharmony_ci	struct dlm_lock_resource *tmpres=NULL, *res=NULL;
7098c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *mle = NULL;
7108c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *alloc_mle = NULL;
7118c2ecf20Sopenharmony_ci	int blocked = 0;
7128c2ecf20Sopenharmony_ci	int ret, nodenum;
7138c2ecf20Sopenharmony_ci	struct dlm_node_iter iter;
7148c2ecf20Sopenharmony_ci	unsigned int hash;
7158c2ecf20Sopenharmony_ci	int tries = 0;
7168c2ecf20Sopenharmony_ci	int bit, wait_on_recovery = 0;
7178c2ecf20Sopenharmony_ci
7188c2ecf20Sopenharmony_ci	BUG_ON(!lockid);
7198c2ecf20Sopenharmony_ci
7208c2ecf20Sopenharmony_ci	hash = dlm_lockid_hash(lockid, namelen);
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci	mlog(0, "get lockres %s (len %d)\n", lockid, namelen);
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_cilookup:
7258c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
7268c2ecf20Sopenharmony_ci	tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash);
7278c2ecf20Sopenharmony_ci	if (tmpres) {
7288c2ecf20Sopenharmony_ci		spin_unlock(&dlm->spinlock);
7298c2ecf20Sopenharmony_ci		spin_lock(&tmpres->spinlock);
7308c2ecf20Sopenharmony_ci
7318c2ecf20Sopenharmony_ci		/*
7328c2ecf20Sopenharmony_ci		 * Right after dlm spinlock was released, dlm_thread could have
7338c2ecf20Sopenharmony_ci		 * purged the lockres. Check if lockres got unhashed. If so
7348c2ecf20Sopenharmony_ci		 * start over.
7358c2ecf20Sopenharmony_ci		 */
7368c2ecf20Sopenharmony_ci		if (hlist_unhashed(&tmpres->hash_node)) {
7378c2ecf20Sopenharmony_ci			spin_unlock(&tmpres->spinlock);
7388c2ecf20Sopenharmony_ci			dlm_lockres_put(tmpres);
7398c2ecf20Sopenharmony_ci			tmpres = NULL;
7408c2ecf20Sopenharmony_ci			goto lookup;
7418c2ecf20Sopenharmony_ci		}
7428c2ecf20Sopenharmony_ci
7438c2ecf20Sopenharmony_ci		/* Wait on the thread that is mastering the resource */
7448c2ecf20Sopenharmony_ci		if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
7458c2ecf20Sopenharmony_ci			__dlm_wait_on_lockres(tmpres);
7468c2ecf20Sopenharmony_ci			BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN);
7478c2ecf20Sopenharmony_ci			spin_unlock(&tmpres->spinlock);
7488c2ecf20Sopenharmony_ci			dlm_lockres_put(tmpres);
7498c2ecf20Sopenharmony_ci			tmpres = NULL;
7508c2ecf20Sopenharmony_ci			goto lookup;
7518c2ecf20Sopenharmony_ci		}
7528c2ecf20Sopenharmony_ci
7538c2ecf20Sopenharmony_ci		/* Wait on the resource purge to complete before continuing */
7548c2ecf20Sopenharmony_ci		if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) {
7558c2ecf20Sopenharmony_ci			BUG_ON(tmpres->owner == dlm->node_num);
7568c2ecf20Sopenharmony_ci			__dlm_wait_on_lockres_flags(tmpres,
7578c2ecf20Sopenharmony_ci						    DLM_LOCK_RES_DROPPING_REF);
7588c2ecf20Sopenharmony_ci			spin_unlock(&tmpres->spinlock);
7598c2ecf20Sopenharmony_ci			dlm_lockres_put(tmpres);
7608c2ecf20Sopenharmony_ci			tmpres = NULL;
7618c2ecf20Sopenharmony_ci			goto lookup;
7628c2ecf20Sopenharmony_ci		}
7638c2ecf20Sopenharmony_ci
7648c2ecf20Sopenharmony_ci		/* Grab inflight ref to pin the resource */
7658c2ecf20Sopenharmony_ci		dlm_lockres_grab_inflight_ref(dlm, tmpres);
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ci		spin_unlock(&tmpres->spinlock);
7688c2ecf20Sopenharmony_ci		if (res) {
7698c2ecf20Sopenharmony_ci			spin_lock(&dlm->track_lock);
7708c2ecf20Sopenharmony_ci			if (!list_empty(&res->tracking))
7718c2ecf20Sopenharmony_ci				list_del_init(&res->tracking);
7728c2ecf20Sopenharmony_ci			else
7738c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "Resource %.*s not "
7748c2ecf20Sopenharmony_ci						"on the Tracking list\n",
7758c2ecf20Sopenharmony_ci						res->lockname.len,
7768c2ecf20Sopenharmony_ci						res->lockname.name);
7778c2ecf20Sopenharmony_ci			spin_unlock(&dlm->track_lock);
7788c2ecf20Sopenharmony_ci			dlm_lockres_put(res);
7798c2ecf20Sopenharmony_ci		}
7808c2ecf20Sopenharmony_ci		res = tmpres;
7818c2ecf20Sopenharmony_ci		goto leave;
7828c2ecf20Sopenharmony_ci	}
7838c2ecf20Sopenharmony_ci
7848c2ecf20Sopenharmony_ci	if (!res) {
7858c2ecf20Sopenharmony_ci		spin_unlock(&dlm->spinlock);
7868c2ecf20Sopenharmony_ci		mlog(0, "allocating a new resource\n");
7878c2ecf20Sopenharmony_ci		/* nothing found and we need to allocate one. */
7888c2ecf20Sopenharmony_ci		alloc_mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
7898c2ecf20Sopenharmony_ci		if (!alloc_mle)
7908c2ecf20Sopenharmony_ci			goto leave;
7918c2ecf20Sopenharmony_ci		res = dlm_new_lockres(dlm, lockid, namelen);
7928c2ecf20Sopenharmony_ci		if (!res)
7938c2ecf20Sopenharmony_ci			goto leave;
7948c2ecf20Sopenharmony_ci		goto lookup;
7958c2ecf20Sopenharmony_ci	}
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci	mlog(0, "no lockres found, allocated our own: %p\n", res);
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_ci	if (flags & LKM_LOCAL) {
8008c2ecf20Sopenharmony_ci		/* caller knows it's safe to assume it's not mastered elsewhere
8018c2ecf20Sopenharmony_ci		 * DONE!  return right away */
8028c2ecf20Sopenharmony_ci		spin_lock(&res->spinlock);
8038c2ecf20Sopenharmony_ci		dlm_change_lockres_owner(dlm, res, dlm->node_num);
8048c2ecf20Sopenharmony_ci		__dlm_insert_lockres(dlm, res);
8058c2ecf20Sopenharmony_ci		dlm_lockres_grab_inflight_ref(dlm, res);
8068c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
8078c2ecf20Sopenharmony_ci		spin_unlock(&dlm->spinlock);
8088c2ecf20Sopenharmony_ci		/* lockres still marked IN_PROGRESS */
8098c2ecf20Sopenharmony_ci		goto wake_waiters;
8108c2ecf20Sopenharmony_ci	}
8118c2ecf20Sopenharmony_ci
8128c2ecf20Sopenharmony_ci	/* check master list to see if another node has started mastering it */
8138c2ecf20Sopenharmony_ci	spin_lock(&dlm->master_lock);
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci	/* if we found a block, wait for lock to be mastered by another node */
8168c2ecf20Sopenharmony_ci	blocked = dlm_find_mle(dlm, &mle, (char *)lockid, namelen);
8178c2ecf20Sopenharmony_ci	if (blocked) {
8188c2ecf20Sopenharmony_ci		int mig;
8198c2ecf20Sopenharmony_ci		if (mle->type == DLM_MLE_MASTER) {
8208c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "master entry for nonexistent lock!\n");
8218c2ecf20Sopenharmony_ci			BUG();
8228c2ecf20Sopenharmony_ci		}
8238c2ecf20Sopenharmony_ci		mig = (mle->type == DLM_MLE_MIGRATION);
8248c2ecf20Sopenharmony_ci		/* if there is a migration in progress, let the migration
8258c2ecf20Sopenharmony_ci		 * finish before continuing.  we can wait for the absence
8268c2ecf20Sopenharmony_ci		 * of the MIGRATION mle: either the migrate finished or
8278c2ecf20Sopenharmony_ci		 * one of the nodes died and the mle was cleaned up.
8288c2ecf20Sopenharmony_ci		 * if there is a BLOCK here, but it already has a master
8298c2ecf20Sopenharmony_ci		 * set, we are too late.  the master does not have a ref
8308c2ecf20Sopenharmony_ci		 * for us in the refmap.  detach the mle and drop it.
8318c2ecf20Sopenharmony_ci		 * either way, go back to the top and start over. */
8328c2ecf20Sopenharmony_ci		if (mig || mle->master != O2NM_MAX_NODES) {
8338c2ecf20Sopenharmony_ci			BUG_ON(mig && mle->master == dlm->node_num);
8348c2ecf20Sopenharmony_ci			/* we arrived too late.  the master does not
8358c2ecf20Sopenharmony_ci			 * have a ref for us. retry. */
8368c2ecf20Sopenharmony_ci			mlog(0, "%s:%.*s: late on %s\n",
8378c2ecf20Sopenharmony_ci			     dlm->name, namelen, lockid,
8388c2ecf20Sopenharmony_ci			     mig ?  "MIGRATION" : "BLOCK");
8398c2ecf20Sopenharmony_ci			spin_unlock(&dlm->master_lock);
8408c2ecf20Sopenharmony_ci			spin_unlock(&dlm->spinlock);
8418c2ecf20Sopenharmony_ci
8428c2ecf20Sopenharmony_ci			/* master is known, detach */
8438c2ecf20Sopenharmony_ci			if (!mig)
8448c2ecf20Sopenharmony_ci				dlm_mle_detach_hb_events(dlm, mle);
8458c2ecf20Sopenharmony_ci			dlm_put_mle(mle);
8468c2ecf20Sopenharmony_ci			mle = NULL;
8478c2ecf20Sopenharmony_ci			/* this is lame, but we can't wait on either
8488c2ecf20Sopenharmony_ci			 * the mle or lockres waitqueue here */
8498c2ecf20Sopenharmony_ci			if (mig)
8508c2ecf20Sopenharmony_ci				msleep(100);
8518c2ecf20Sopenharmony_ci			goto lookup;
8528c2ecf20Sopenharmony_ci		}
8538c2ecf20Sopenharmony_ci	} else {
8548c2ecf20Sopenharmony_ci		/* go ahead and try to master lock on this node */
8558c2ecf20Sopenharmony_ci		mle = alloc_mle;
8568c2ecf20Sopenharmony_ci		/* make sure this does not get freed below */
8578c2ecf20Sopenharmony_ci		alloc_mle = NULL;
8588c2ecf20Sopenharmony_ci		dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0);
8598c2ecf20Sopenharmony_ci		set_bit(dlm->node_num, mle->maybe_map);
8608c2ecf20Sopenharmony_ci		__dlm_insert_mle(dlm, mle);
8618c2ecf20Sopenharmony_ci
8628c2ecf20Sopenharmony_ci		/* still holding the dlm spinlock, check the recovery map
8638c2ecf20Sopenharmony_ci		 * to see if there are any nodes that still need to be
8648c2ecf20Sopenharmony_ci		 * considered.  these will not appear in the mle nodemap
8658c2ecf20Sopenharmony_ci		 * but they might own this lockres.  wait on them. */
8668c2ecf20Sopenharmony_ci		bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
8678c2ecf20Sopenharmony_ci		if (bit < O2NM_MAX_NODES) {
8688c2ecf20Sopenharmony_ci			mlog(0, "%s: res %.*s, At least one node (%d) "
8698c2ecf20Sopenharmony_ci			     "to recover before lock mastery can begin\n",
8708c2ecf20Sopenharmony_ci			     dlm->name, namelen, (char *)lockid, bit);
8718c2ecf20Sopenharmony_ci			wait_on_recovery = 1;
8728c2ecf20Sopenharmony_ci		}
8738c2ecf20Sopenharmony_ci	}
8748c2ecf20Sopenharmony_ci
8758c2ecf20Sopenharmony_ci	/* at this point there is either a DLM_MLE_BLOCK or a
8768c2ecf20Sopenharmony_ci	 * DLM_MLE_MASTER on the master list, so it's safe to add the
8778c2ecf20Sopenharmony_ci	 * lockres to the hashtable.  anyone who finds the lock will
8788c2ecf20Sopenharmony_ci	 * still have to wait on the IN_PROGRESS. */
8798c2ecf20Sopenharmony_ci
8808c2ecf20Sopenharmony_ci	/* finally add the lockres to its hash bucket */
8818c2ecf20Sopenharmony_ci	__dlm_insert_lockres(dlm, res);
8828c2ecf20Sopenharmony_ci
8838c2ecf20Sopenharmony_ci	/* since this lockres is new it doesn't not require the spinlock */
8848c2ecf20Sopenharmony_ci	__dlm_lockres_grab_inflight_ref(dlm, res);
8858c2ecf20Sopenharmony_ci
8868c2ecf20Sopenharmony_ci	/* get an extra ref on the mle in case this is a BLOCK
8878c2ecf20Sopenharmony_ci	 * if so, the creator of the BLOCK may try to put the last
8888c2ecf20Sopenharmony_ci	 * ref at this time in the assert master handler, so we
8898c2ecf20Sopenharmony_ci	 * need an extra one to keep from a bad ptr deref. */
8908c2ecf20Sopenharmony_ci	dlm_get_mle_inuse(mle);
8918c2ecf20Sopenharmony_ci	spin_unlock(&dlm->master_lock);
8928c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
8938c2ecf20Sopenharmony_ci
8948c2ecf20Sopenharmony_ciredo_request:
8958c2ecf20Sopenharmony_ci	while (wait_on_recovery) {
8968c2ecf20Sopenharmony_ci		/* any cluster changes that occurred after dropping the
8978c2ecf20Sopenharmony_ci		 * dlm spinlock would be detectable be a change on the mle,
8988c2ecf20Sopenharmony_ci		 * so we only need to clear out the recovery map once. */
8998c2ecf20Sopenharmony_ci		if (dlm_is_recovery_lock(lockid, namelen)) {
9008c2ecf20Sopenharmony_ci			mlog(0, "%s: Recovery map is not empty, but must "
9018c2ecf20Sopenharmony_ci			     "master $RECOVERY lock now\n", dlm->name);
9028c2ecf20Sopenharmony_ci			if (!dlm_pre_master_reco_lockres(dlm, res))
9038c2ecf20Sopenharmony_ci				wait_on_recovery = 0;
9048c2ecf20Sopenharmony_ci			else {
9058c2ecf20Sopenharmony_ci				mlog(0, "%s: waiting 500ms for heartbeat state "
9068c2ecf20Sopenharmony_ci				    "change\n", dlm->name);
9078c2ecf20Sopenharmony_ci				msleep(500);
9088c2ecf20Sopenharmony_ci			}
9098c2ecf20Sopenharmony_ci			continue;
9108c2ecf20Sopenharmony_ci		}
9118c2ecf20Sopenharmony_ci
9128c2ecf20Sopenharmony_ci		dlm_kick_recovery_thread(dlm);
9138c2ecf20Sopenharmony_ci		msleep(1000);
9148c2ecf20Sopenharmony_ci		dlm_wait_for_recovery(dlm);
9158c2ecf20Sopenharmony_ci
9168c2ecf20Sopenharmony_ci		spin_lock(&dlm->spinlock);
9178c2ecf20Sopenharmony_ci		bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
9188c2ecf20Sopenharmony_ci		if (bit < O2NM_MAX_NODES) {
9198c2ecf20Sopenharmony_ci			mlog(0, "%s: res %.*s, At least one node (%d) "
9208c2ecf20Sopenharmony_ci			     "to recover before lock mastery can begin\n",
9218c2ecf20Sopenharmony_ci			     dlm->name, namelen, (char *)lockid, bit);
9228c2ecf20Sopenharmony_ci			wait_on_recovery = 1;
9238c2ecf20Sopenharmony_ci		} else
9248c2ecf20Sopenharmony_ci			wait_on_recovery = 0;
9258c2ecf20Sopenharmony_ci		spin_unlock(&dlm->spinlock);
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_ci		if (wait_on_recovery)
9288c2ecf20Sopenharmony_ci			dlm_wait_for_node_recovery(dlm, bit, 10000);
9298c2ecf20Sopenharmony_ci	}
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_ci	/* must wait for lock to be mastered elsewhere */
9328c2ecf20Sopenharmony_ci	if (blocked)
9338c2ecf20Sopenharmony_ci		goto wait;
9348c2ecf20Sopenharmony_ci
9358c2ecf20Sopenharmony_ci	ret = -EINVAL;
9368c2ecf20Sopenharmony_ci	dlm_node_iter_init(mle->vote_map, &iter);
9378c2ecf20Sopenharmony_ci	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
9388c2ecf20Sopenharmony_ci		ret = dlm_do_master_request(res, mle, nodenum);
9398c2ecf20Sopenharmony_ci		if (ret < 0)
9408c2ecf20Sopenharmony_ci			mlog_errno(ret);
9418c2ecf20Sopenharmony_ci		if (mle->master != O2NM_MAX_NODES) {
9428c2ecf20Sopenharmony_ci			/* found a master ! */
9438c2ecf20Sopenharmony_ci			if (mle->master <= nodenum)
9448c2ecf20Sopenharmony_ci				break;
9458c2ecf20Sopenharmony_ci			/* if our master request has not reached the master
9468c2ecf20Sopenharmony_ci			 * yet, keep going until it does.  this is how the
9478c2ecf20Sopenharmony_ci			 * master will know that asserts are needed back to
9488c2ecf20Sopenharmony_ci			 * the lower nodes. */
9498c2ecf20Sopenharmony_ci			mlog(0, "%s: res %.*s, Requests only up to %u but "
9508c2ecf20Sopenharmony_ci			     "master is %u, keep going\n", dlm->name, namelen,
9518c2ecf20Sopenharmony_ci			     lockid, nodenum, mle->master);
9528c2ecf20Sopenharmony_ci		}
9538c2ecf20Sopenharmony_ci	}
9548c2ecf20Sopenharmony_ci
9558c2ecf20Sopenharmony_ciwait:
9568c2ecf20Sopenharmony_ci	/* keep going until the response map includes all nodes */
9578c2ecf20Sopenharmony_ci	ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked);
9588c2ecf20Sopenharmony_ci	if (ret < 0) {
9598c2ecf20Sopenharmony_ci		wait_on_recovery = 1;
9608c2ecf20Sopenharmony_ci		mlog(0, "%s: res %.*s, Node map changed, redo the master "
9618c2ecf20Sopenharmony_ci		     "request now, blocked=%d\n", dlm->name, res->lockname.len,
9628c2ecf20Sopenharmony_ci		     res->lockname.name, blocked);
9638c2ecf20Sopenharmony_ci		if (++tries > 20) {
9648c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "%s: res %.*s, Spinning on "
9658c2ecf20Sopenharmony_ci			     "dlm_wait_for_lock_mastery, blocked = %d\n",
9668c2ecf20Sopenharmony_ci			     dlm->name, res->lockname.len,
9678c2ecf20Sopenharmony_ci			     res->lockname.name, blocked);
9688c2ecf20Sopenharmony_ci			dlm_print_one_lock_resource(res);
9698c2ecf20Sopenharmony_ci			dlm_print_one_mle(mle);
9708c2ecf20Sopenharmony_ci			tries = 0;
9718c2ecf20Sopenharmony_ci		}
9728c2ecf20Sopenharmony_ci		goto redo_request;
9738c2ecf20Sopenharmony_ci	}
9748c2ecf20Sopenharmony_ci
9758c2ecf20Sopenharmony_ci	mlog(0, "%s: res %.*s, Mastered by %u\n", dlm->name, res->lockname.len,
9768c2ecf20Sopenharmony_ci	     res->lockname.name, res->owner);
9778c2ecf20Sopenharmony_ci	/* make sure we never continue without this */
9788c2ecf20Sopenharmony_ci	BUG_ON(res->owner == O2NM_MAX_NODES);
9798c2ecf20Sopenharmony_ci
9808c2ecf20Sopenharmony_ci	/* master is known, detach if not already detached */
9818c2ecf20Sopenharmony_ci	dlm_mle_detach_hb_events(dlm, mle);
9828c2ecf20Sopenharmony_ci	dlm_put_mle(mle);
9838c2ecf20Sopenharmony_ci	/* put the extra ref */
9848c2ecf20Sopenharmony_ci	dlm_put_mle_inuse(mle);
9858c2ecf20Sopenharmony_ci
9868c2ecf20Sopenharmony_ciwake_waiters:
9878c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
9888c2ecf20Sopenharmony_ci	res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
9898c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
9908c2ecf20Sopenharmony_ci	wake_up(&res->wq);
9918c2ecf20Sopenharmony_ci
9928c2ecf20Sopenharmony_cileave:
9938c2ecf20Sopenharmony_ci	/* need to free the unused mle */
9948c2ecf20Sopenharmony_ci	if (alloc_mle)
9958c2ecf20Sopenharmony_ci		kmem_cache_free(dlm_mle_cache, alloc_mle);
9968c2ecf20Sopenharmony_ci
9978c2ecf20Sopenharmony_ci	return res;
9988c2ecf20Sopenharmony_ci}
9998c2ecf20Sopenharmony_ci
10008c2ecf20Sopenharmony_ci
10018c2ecf20Sopenharmony_ci#define DLM_MASTERY_TIMEOUT_MS   5000
10028c2ecf20Sopenharmony_ci
10038c2ecf20Sopenharmony_cistatic int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm,
10048c2ecf20Sopenharmony_ci				     struct dlm_lock_resource *res,
10058c2ecf20Sopenharmony_ci				     struct dlm_master_list_entry *mle,
10068c2ecf20Sopenharmony_ci				     int *blocked)
10078c2ecf20Sopenharmony_ci{
10088c2ecf20Sopenharmony_ci	u8 m;
10098c2ecf20Sopenharmony_ci	int ret, bit;
10108c2ecf20Sopenharmony_ci	int map_changed, voting_done;
10118c2ecf20Sopenharmony_ci	int assert, sleep;
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_cirecheck:
10148c2ecf20Sopenharmony_ci	ret = 0;
10158c2ecf20Sopenharmony_ci	assert = 0;
10168c2ecf20Sopenharmony_ci
10178c2ecf20Sopenharmony_ci	/* check if another node has already become the owner */
10188c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
10198c2ecf20Sopenharmony_ci	if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
10208c2ecf20Sopenharmony_ci		mlog(0, "%s:%.*s: owner is suddenly %u\n", dlm->name,
10218c2ecf20Sopenharmony_ci		     res->lockname.len, res->lockname.name, res->owner);
10228c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
10238c2ecf20Sopenharmony_ci		/* this will cause the master to re-assert across
10248c2ecf20Sopenharmony_ci		 * the whole cluster, freeing up mles */
10258c2ecf20Sopenharmony_ci		if (res->owner != dlm->node_num) {
10268c2ecf20Sopenharmony_ci			ret = dlm_do_master_request(res, mle, res->owner);
10278c2ecf20Sopenharmony_ci			if (ret < 0) {
10288c2ecf20Sopenharmony_ci				/* give recovery a chance to run */
10298c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret);
10308c2ecf20Sopenharmony_ci				msleep(500);
10318c2ecf20Sopenharmony_ci				goto recheck;
10328c2ecf20Sopenharmony_ci			}
10338c2ecf20Sopenharmony_ci		}
10348c2ecf20Sopenharmony_ci		ret = 0;
10358c2ecf20Sopenharmony_ci		goto leave;
10368c2ecf20Sopenharmony_ci	}
10378c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
10388c2ecf20Sopenharmony_ci
10398c2ecf20Sopenharmony_ci	spin_lock(&mle->spinlock);
10408c2ecf20Sopenharmony_ci	m = mle->master;
10418c2ecf20Sopenharmony_ci	map_changed = (memcmp(mle->vote_map, mle->node_map,
10428c2ecf20Sopenharmony_ci			      sizeof(mle->vote_map)) != 0);
10438c2ecf20Sopenharmony_ci	voting_done = (memcmp(mle->vote_map, mle->response_map,
10448c2ecf20Sopenharmony_ci			     sizeof(mle->vote_map)) == 0);
10458c2ecf20Sopenharmony_ci
10468c2ecf20Sopenharmony_ci	/* restart if we hit any errors */
10478c2ecf20Sopenharmony_ci	if (map_changed) {
10488c2ecf20Sopenharmony_ci		int b;
10498c2ecf20Sopenharmony_ci		mlog(0, "%s: %.*s: node map changed, restarting\n",
10508c2ecf20Sopenharmony_ci		     dlm->name, res->lockname.len, res->lockname.name);
10518c2ecf20Sopenharmony_ci		ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked);
10528c2ecf20Sopenharmony_ci		b = (mle->type == DLM_MLE_BLOCK);
10538c2ecf20Sopenharmony_ci		if ((*blocked && !b) || (!*blocked && b)) {
10548c2ecf20Sopenharmony_ci			mlog(0, "%s:%.*s: status change: old=%d new=%d\n",
10558c2ecf20Sopenharmony_ci			     dlm->name, res->lockname.len, res->lockname.name,
10568c2ecf20Sopenharmony_ci			     *blocked, b);
10578c2ecf20Sopenharmony_ci			*blocked = b;
10588c2ecf20Sopenharmony_ci		}
10598c2ecf20Sopenharmony_ci		spin_unlock(&mle->spinlock);
10608c2ecf20Sopenharmony_ci		if (ret < 0) {
10618c2ecf20Sopenharmony_ci			mlog_errno(ret);
10628c2ecf20Sopenharmony_ci			goto leave;
10638c2ecf20Sopenharmony_ci		}
10648c2ecf20Sopenharmony_ci		mlog(0, "%s:%.*s: restart lock mastery succeeded, "
10658c2ecf20Sopenharmony_ci		     "rechecking now\n", dlm->name, res->lockname.len,
10668c2ecf20Sopenharmony_ci		     res->lockname.name);
10678c2ecf20Sopenharmony_ci		goto recheck;
10688c2ecf20Sopenharmony_ci	} else {
10698c2ecf20Sopenharmony_ci		if (!voting_done) {
10708c2ecf20Sopenharmony_ci			mlog(0, "map not changed and voting not done "
10718c2ecf20Sopenharmony_ci			     "for %s:%.*s\n", dlm->name, res->lockname.len,
10728c2ecf20Sopenharmony_ci			     res->lockname.name);
10738c2ecf20Sopenharmony_ci		}
10748c2ecf20Sopenharmony_ci	}
10758c2ecf20Sopenharmony_ci
10768c2ecf20Sopenharmony_ci	if (m != O2NM_MAX_NODES) {
10778c2ecf20Sopenharmony_ci		/* another node has done an assert!
10788c2ecf20Sopenharmony_ci		 * all done! */
10798c2ecf20Sopenharmony_ci		sleep = 0;
10808c2ecf20Sopenharmony_ci	} else {
10818c2ecf20Sopenharmony_ci		sleep = 1;
10828c2ecf20Sopenharmony_ci		/* have all nodes responded? */
10838c2ecf20Sopenharmony_ci		if (voting_done && !*blocked) {
10848c2ecf20Sopenharmony_ci			bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
10858c2ecf20Sopenharmony_ci			if (dlm->node_num <= bit) {
10868c2ecf20Sopenharmony_ci				/* my node number is lowest.
10878c2ecf20Sopenharmony_ci			 	 * now tell other nodes that I am
10888c2ecf20Sopenharmony_ci				 * mastering this. */
10898c2ecf20Sopenharmony_ci				mle->master = dlm->node_num;
10908c2ecf20Sopenharmony_ci				/* ref was grabbed in get_lock_resource
10918c2ecf20Sopenharmony_ci				 * will be dropped in dlmlock_master */
10928c2ecf20Sopenharmony_ci				assert = 1;
10938c2ecf20Sopenharmony_ci				sleep = 0;
10948c2ecf20Sopenharmony_ci			}
10958c2ecf20Sopenharmony_ci			/* if voting is done, but we have not received
10968c2ecf20Sopenharmony_ci			 * an assert master yet, we must sleep */
10978c2ecf20Sopenharmony_ci		}
10988c2ecf20Sopenharmony_ci	}
10998c2ecf20Sopenharmony_ci
11008c2ecf20Sopenharmony_ci	spin_unlock(&mle->spinlock);
11018c2ecf20Sopenharmony_ci
11028c2ecf20Sopenharmony_ci	/* sleep if we haven't finished voting yet */
11038c2ecf20Sopenharmony_ci	if (sleep) {
11048c2ecf20Sopenharmony_ci		unsigned long timeo = msecs_to_jiffies(DLM_MASTERY_TIMEOUT_MS);
11058c2ecf20Sopenharmony_ci		atomic_set(&mle->woken, 0);
11068c2ecf20Sopenharmony_ci		(void)wait_event_timeout(mle->wq,
11078c2ecf20Sopenharmony_ci					 (atomic_read(&mle->woken) == 1),
11088c2ecf20Sopenharmony_ci					 timeo);
11098c2ecf20Sopenharmony_ci		if (res->owner == O2NM_MAX_NODES) {
11108c2ecf20Sopenharmony_ci			mlog(0, "%s:%.*s: waiting again\n", dlm->name,
11118c2ecf20Sopenharmony_ci			     res->lockname.len, res->lockname.name);
11128c2ecf20Sopenharmony_ci			goto recheck;
11138c2ecf20Sopenharmony_ci		}
11148c2ecf20Sopenharmony_ci		mlog(0, "done waiting, master is %u\n", res->owner);
11158c2ecf20Sopenharmony_ci		ret = 0;
11168c2ecf20Sopenharmony_ci		goto leave;
11178c2ecf20Sopenharmony_ci	}
11188c2ecf20Sopenharmony_ci
11198c2ecf20Sopenharmony_ci	ret = 0;   /* done */
11208c2ecf20Sopenharmony_ci	if (assert) {
11218c2ecf20Sopenharmony_ci		m = dlm->node_num;
11228c2ecf20Sopenharmony_ci		mlog(0, "about to master %.*s here, this=%u\n",
11238c2ecf20Sopenharmony_ci		     res->lockname.len, res->lockname.name, m);
11248c2ecf20Sopenharmony_ci		ret = dlm_do_assert_master(dlm, res, mle->vote_map, 0);
11258c2ecf20Sopenharmony_ci		if (ret) {
11268c2ecf20Sopenharmony_ci			/* This is a failure in the network path,
11278c2ecf20Sopenharmony_ci			 * not in the response to the assert_master
11288c2ecf20Sopenharmony_ci			 * (any nonzero response is a BUG on this node).
11298c2ecf20Sopenharmony_ci			 * Most likely a socket just got disconnected
11308c2ecf20Sopenharmony_ci			 * due to node death. */
11318c2ecf20Sopenharmony_ci			mlog_errno(ret);
11328c2ecf20Sopenharmony_ci		}
11338c2ecf20Sopenharmony_ci		/* no longer need to restart lock mastery.
11348c2ecf20Sopenharmony_ci		 * all living nodes have been contacted. */
11358c2ecf20Sopenharmony_ci		ret = 0;
11368c2ecf20Sopenharmony_ci	}
11378c2ecf20Sopenharmony_ci
11388c2ecf20Sopenharmony_ci	/* set the lockres owner */
11398c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
11408c2ecf20Sopenharmony_ci	/* mastery reference obtained either during
11418c2ecf20Sopenharmony_ci	 * assert_master_handler or in get_lock_resource */
11428c2ecf20Sopenharmony_ci	dlm_change_lockres_owner(dlm, res, m);
11438c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
11448c2ecf20Sopenharmony_ci
11458c2ecf20Sopenharmony_cileave:
11468c2ecf20Sopenharmony_ci	return ret;
11478c2ecf20Sopenharmony_ci}
11488c2ecf20Sopenharmony_ci
11498c2ecf20Sopenharmony_cistruct dlm_bitmap_diff_iter
11508c2ecf20Sopenharmony_ci{
11518c2ecf20Sopenharmony_ci	int curnode;
11528c2ecf20Sopenharmony_ci	unsigned long *orig_bm;
11538c2ecf20Sopenharmony_ci	unsigned long *cur_bm;
11548c2ecf20Sopenharmony_ci	unsigned long diff_bm[BITS_TO_LONGS(O2NM_MAX_NODES)];
11558c2ecf20Sopenharmony_ci};
11568c2ecf20Sopenharmony_ci
11578c2ecf20Sopenharmony_cienum dlm_node_state_change
11588c2ecf20Sopenharmony_ci{
11598c2ecf20Sopenharmony_ci	NODE_DOWN = -1,
11608c2ecf20Sopenharmony_ci	NODE_NO_CHANGE = 0,
11618c2ecf20Sopenharmony_ci	NODE_UP
11628c2ecf20Sopenharmony_ci};
11638c2ecf20Sopenharmony_ci
11648c2ecf20Sopenharmony_cistatic void dlm_bitmap_diff_iter_init(struct dlm_bitmap_diff_iter *iter,
11658c2ecf20Sopenharmony_ci				      unsigned long *orig_bm,
11668c2ecf20Sopenharmony_ci				      unsigned long *cur_bm)
11678c2ecf20Sopenharmony_ci{
11688c2ecf20Sopenharmony_ci	unsigned long p1, p2;
11698c2ecf20Sopenharmony_ci	int i;
11708c2ecf20Sopenharmony_ci
11718c2ecf20Sopenharmony_ci	iter->curnode = -1;
11728c2ecf20Sopenharmony_ci	iter->orig_bm = orig_bm;
11738c2ecf20Sopenharmony_ci	iter->cur_bm = cur_bm;
11748c2ecf20Sopenharmony_ci
11758c2ecf20Sopenharmony_ci	for (i = 0; i < BITS_TO_LONGS(O2NM_MAX_NODES); i++) {
11768c2ecf20Sopenharmony_ci       		p1 = *(iter->orig_bm + i);
11778c2ecf20Sopenharmony_ci	       	p2 = *(iter->cur_bm + i);
11788c2ecf20Sopenharmony_ci		iter->diff_bm[i] = (p1 & ~p2) | (p2 & ~p1);
11798c2ecf20Sopenharmony_ci	}
11808c2ecf20Sopenharmony_ci}
11818c2ecf20Sopenharmony_ci
11828c2ecf20Sopenharmony_cistatic int dlm_bitmap_diff_iter_next(struct dlm_bitmap_diff_iter *iter,
11838c2ecf20Sopenharmony_ci				     enum dlm_node_state_change *state)
11848c2ecf20Sopenharmony_ci{
11858c2ecf20Sopenharmony_ci	int bit;
11868c2ecf20Sopenharmony_ci
11878c2ecf20Sopenharmony_ci	if (iter->curnode >= O2NM_MAX_NODES)
11888c2ecf20Sopenharmony_ci		return -ENOENT;
11898c2ecf20Sopenharmony_ci
11908c2ecf20Sopenharmony_ci	bit = find_next_bit(iter->diff_bm, O2NM_MAX_NODES,
11918c2ecf20Sopenharmony_ci			    iter->curnode+1);
11928c2ecf20Sopenharmony_ci	if (bit >= O2NM_MAX_NODES) {
11938c2ecf20Sopenharmony_ci		iter->curnode = O2NM_MAX_NODES;
11948c2ecf20Sopenharmony_ci		return -ENOENT;
11958c2ecf20Sopenharmony_ci	}
11968c2ecf20Sopenharmony_ci
11978c2ecf20Sopenharmony_ci	/* if it was there in the original then this node died */
11988c2ecf20Sopenharmony_ci	if (test_bit(bit, iter->orig_bm))
11998c2ecf20Sopenharmony_ci		*state = NODE_DOWN;
12008c2ecf20Sopenharmony_ci	else
12018c2ecf20Sopenharmony_ci		*state = NODE_UP;
12028c2ecf20Sopenharmony_ci
12038c2ecf20Sopenharmony_ci	iter->curnode = bit;
12048c2ecf20Sopenharmony_ci	return bit;
12058c2ecf20Sopenharmony_ci}
12068c2ecf20Sopenharmony_ci
12078c2ecf20Sopenharmony_ci
12088c2ecf20Sopenharmony_cistatic int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
12098c2ecf20Sopenharmony_ci				    struct dlm_lock_resource *res,
12108c2ecf20Sopenharmony_ci				    struct dlm_master_list_entry *mle,
12118c2ecf20Sopenharmony_ci				    int blocked)
12128c2ecf20Sopenharmony_ci{
12138c2ecf20Sopenharmony_ci	struct dlm_bitmap_diff_iter bdi;
12148c2ecf20Sopenharmony_ci	enum dlm_node_state_change sc;
12158c2ecf20Sopenharmony_ci	int node;
12168c2ecf20Sopenharmony_ci	int ret = 0;
12178c2ecf20Sopenharmony_ci
12188c2ecf20Sopenharmony_ci	mlog(0, "something happened such that the "
12198c2ecf20Sopenharmony_ci	     "master process may need to be restarted!\n");
12208c2ecf20Sopenharmony_ci
12218c2ecf20Sopenharmony_ci	assert_spin_locked(&mle->spinlock);
12228c2ecf20Sopenharmony_ci
12238c2ecf20Sopenharmony_ci	dlm_bitmap_diff_iter_init(&bdi, mle->vote_map, mle->node_map);
12248c2ecf20Sopenharmony_ci	node = dlm_bitmap_diff_iter_next(&bdi, &sc);
12258c2ecf20Sopenharmony_ci	while (node >= 0) {
12268c2ecf20Sopenharmony_ci		if (sc == NODE_UP) {
12278c2ecf20Sopenharmony_ci			/* a node came up.  clear any old vote from
12288c2ecf20Sopenharmony_ci			 * the response map and set it in the vote map
12298c2ecf20Sopenharmony_ci			 * then restart the mastery. */
12308c2ecf20Sopenharmony_ci			mlog(ML_NOTICE, "node %d up while restarting\n", node);
12318c2ecf20Sopenharmony_ci
12328c2ecf20Sopenharmony_ci			/* redo the master request, but only for the new node */
12338c2ecf20Sopenharmony_ci			mlog(0, "sending request to new node\n");
12348c2ecf20Sopenharmony_ci			clear_bit(node, mle->response_map);
12358c2ecf20Sopenharmony_ci			set_bit(node, mle->vote_map);
12368c2ecf20Sopenharmony_ci		} else {
12378c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "node down! %d\n", node);
12388c2ecf20Sopenharmony_ci			if (blocked) {
12398c2ecf20Sopenharmony_ci				int lowest = find_next_bit(mle->maybe_map,
12408c2ecf20Sopenharmony_ci						       O2NM_MAX_NODES, 0);
12418c2ecf20Sopenharmony_ci
12428c2ecf20Sopenharmony_ci				/* act like it was never there */
12438c2ecf20Sopenharmony_ci				clear_bit(node, mle->maybe_map);
12448c2ecf20Sopenharmony_ci
12458c2ecf20Sopenharmony_ci			       	if (node == lowest) {
12468c2ecf20Sopenharmony_ci					mlog(0, "expected master %u died"
12478c2ecf20Sopenharmony_ci					    " while this node was blocked "
12488c2ecf20Sopenharmony_ci					    "waiting on it!\n", node);
12498c2ecf20Sopenharmony_ci					lowest = find_next_bit(mle->maybe_map,
12508c2ecf20Sopenharmony_ci						       	O2NM_MAX_NODES,
12518c2ecf20Sopenharmony_ci						       	lowest+1);
12528c2ecf20Sopenharmony_ci					if (lowest < O2NM_MAX_NODES) {
12538c2ecf20Sopenharmony_ci						mlog(0, "%s:%.*s:still "
12548c2ecf20Sopenharmony_ci						     "blocked. waiting on %u "
12558c2ecf20Sopenharmony_ci						     "now\n", dlm->name,
12568c2ecf20Sopenharmony_ci						     res->lockname.len,
12578c2ecf20Sopenharmony_ci						     res->lockname.name,
12588c2ecf20Sopenharmony_ci						     lowest);
12598c2ecf20Sopenharmony_ci					} else {
12608c2ecf20Sopenharmony_ci						/* mle is an MLE_BLOCK, but
12618c2ecf20Sopenharmony_ci						 * there is now nothing left to
12628c2ecf20Sopenharmony_ci						 * block on.  we need to return
12638c2ecf20Sopenharmony_ci						 * all the way back out and try
12648c2ecf20Sopenharmony_ci						 * again with an MLE_MASTER.
12658c2ecf20Sopenharmony_ci						 * dlm_do_local_recovery_cleanup
12668c2ecf20Sopenharmony_ci						 * has already run, so the mle
12678c2ecf20Sopenharmony_ci						 * refcount is ok */
12688c2ecf20Sopenharmony_ci						mlog(0, "%s:%.*s: no "
12698c2ecf20Sopenharmony_ci						     "longer blocking. try to "
12708c2ecf20Sopenharmony_ci						     "master this here\n",
12718c2ecf20Sopenharmony_ci						     dlm->name,
12728c2ecf20Sopenharmony_ci						     res->lockname.len,
12738c2ecf20Sopenharmony_ci						     res->lockname.name);
12748c2ecf20Sopenharmony_ci						mle->type = DLM_MLE_MASTER;
12758c2ecf20Sopenharmony_ci						mle->mleres = res;
12768c2ecf20Sopenharmony_ci					}
12778c2ecf20Sopenharmony_ci				}
12788c2ecf20Sopenharmony_ci			}
12798c2ecf20Sopenharmony_ci
12808c2ecf20Sopenharmony_ci			/* now blank out everything, as if we had never
12818c2ecf20Sopenharmony_ci			 * contacted anyone */
12828c2ecf20Sopenharmony_ci			memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
12838c2ecf20Sopenharmony_ci			memset(mle->response_map, 0, sizeof(mle->response_map));
12848c2ecf20Sopenharmony_ci			/* reset the vote_map to the current node_map */
12858c2ecf20Sopenharmony_ci			memcpy(mle->vote_map, mle->node_map,
12868c2ecf20Sopenharmony_ci			       sizeof(mle->node_map));
12878c2ecf20Sopenharmony_ci			/* put myself into the maybe map */
12888c2ecf20Sopenharmony_ci			if (mle->type != DLM_MLE_BLOCK)
12898c2ecf20Sopenharmony_ci				set_bit(dlm->node_num, mle->maybe_map);
12908c2ecf20Sopenharmony_ci		}
12918c2ecf20Sopenharmony_ci		ret = -EAGAIN;
12928c2ecf20Sopenharmony_ci		node = dlm_bitmap_diff_iter_next(&bdi, &sc);
12938c2ecf20Sopenharmony_ci	}
12948c2ecf20Sopenharmony_ci	return ret;
12958c2ecf20Sopenharmony_ci}
12968c2ecf20Sopenharmony_ci
12978c2ecf20Sopenharmony_ci
12988c2ecf20Sopenharmony_ci/*
12998c2ecf20Sopenharmony_ci * DLM_MASTER_REQUEST_MSG
13008c2ecf20Sopenharmony_ci *
13018c2ecf20Sopenharmony_ci * returns: 0 on success,
13028c2ecf20Sopenharmony_ci *          -errno on a network error
13038c2ecf20Sopenharmony_ci *
13048c2ecf20Sopenharmony_ci * on error, the caller should assume the target node is "dead"
13058c2ecf20Sopenharmony_ci *
13068c2ecf20Sopenharmony_ci */
13078c2ecf20Sopenharmony_ci
13088c2ecf20Sopenharmony_cistatic int dlm_do_master_request(struct dlm_lock_resource *res,
13098c2ecf20Sopenharmony_ci				 struct dlm_master_list_entry *mle, int to)
13108c2ecf20Sopenharmony_ci{
13118c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm = mle->dlm;
13128c2ecf20Sopenharmony_ci	struct dlm_master_request request;
13138c2ecf20Sopenharmony_ci	int ret, response=0, resend;
13148c2ecf20Sopenharmony_ci
13158c2ecf20Sopenharmony_ci	memset(&request, 0, sizeof(request));
13168c2ecf20Sopenharmony_ci	request.node_idx = dlm->node_num;
13178c2ecf20Sopenharmony_ci
13188c2ecf20Sopenharmony_ci	BUG_ON(mle->type == DLM_MLE_MIGRATION);
13198c2ecf20Sopenharmony_ci
13208c2ecf20Sopenharmony_ci	request.namelen = (u8)mle->mnamelen;
13218c2ecf20Sopenharmony_ci	memcpy(request.name, mle->mname, request.namelen);
13228c2ecf20Sopenharmony_ci
13238c2ecf20Sopenharmony_ciagain:
13248c2ecf20Sopenharmony_ci	ret = o2net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key, &request,
13258c2ecf20Sopenharmony_ci				 sizeof(request), to, &response);
13268c2ecf20Sopenharmony_ci	if (ret < 0)  {
13278c2ecf20Sopenharmony_ci		if (ret == -ESRCH) {
13288c2ecf20Sopenharmony_ci			/* should never happen */
13298c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "TCP stack not ready!\n");
13308c2ecf20Sopenharmony_ci			BUG();
13318c2ecf20Sopenharmony_ci		} else if (ret == -EINVAL) {
13328c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "bad args passed to o2net!\n");
13338c2ecf20Sopenharmony_ci			BUG();
13348c2ecf20Sopenharmony_ci		} else if (ret == -ENOMEM) {
13358c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "out of memory while trying to send "
13368c2ecf20Sopenharmony_ci			     "network message!  retrying\n");
13378c2ecf20Sopenharmony_ci			/* this is totally crude */
13388c2ecf20Sopenharmony_ci			msleep(50);
13398c2ecf20Sopenharmony_ci			goto again;
13408c2ecf20Sopenharmony_ci		} else if (!dlm_is_host_down(ret)) {
13418c2ecf20Sopenharmony_ci			/* not a network error. bad. */
13428c2ecf20Sopenharmony_ci			mlog_errno(ret);
13438c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "unhandled error!");
13448c2ecf20Sopenharmony_ci			BUG();
13458c2ecf20Sopenharmony_ci		}
13468c2ecf20Sopenharmony_ci		/* all other errors should be network errors,
13478c2ecf20Sopenharmony_ci		 * and likely indicate node death */
13488c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "link to %d went down!\n", to);
13498c2ecf20Sopenharmony_ci		goto out;
13508c2ecf20Sopenharmony_ci	}
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci	ret = 0;
13538c2ecf20Sopenharmony_ci	resend = 0;
13548c2ecf20Sopenharmony_ci	spin_lock(&mle->spinlock);
13558c2ecf20Sopenharmony_ci	switch (response) {
13568c2ecf20Sopenharmony_ci		case DLM_MASTER_RESP_YES:
13578c2ecf20Sopenharmony_ci			set_bit(to, mle->response_map);
13588c2ecf20Sopenharmony_ci			mlog(0, "node %u is the master, response=YES\n", to);
13598c2ecf20Sopenharmony_ci			mlog(0, "%s:%.*s: master node %u now knows I have a "
13608c2ecf20Sopenharmony_ci			     "reference\n", dlm->name, res->lockname.len,
13618c2ecf20Sopenharmony_ci			     res->lockname.name, to);
13628c2ecf20Sopenharmony_ci			mle->master = to;
13638c2ecf20Sopenharmony_ci			break;
13648c2ecf20Sopenharmony_ci		case DLM_MASTER_RESP_NO:
13658c2ecf20Sopenharmony_ci			mlog(0, "node %u not master, response=NO\n", to);
13668c2ecf20Sopenharmony_ci			set_bit(to, mle->response_map);
13678c2ecf20Sopenharmony_ci			break;
13688c2ecf20Sopenharmony_ci		case DLM_MASTER_RESP_MAYBE:
13698c2ecf20Sopenharmony_ci			mlog(0, "node %u not master, response=MAYBE\n", to);
13708c2ecf20Sopenharmony_ci			set_bit(to, mle->response_map);
13718c2ecf20Sopenharmony_ci			set_bit(to, mle->maybe_map);
13728c2ecf20Sopenharmony_ci			break;
13738c2ecf20Sopenharmony_ci		case DLM_MASTER_RESP_ERROR:
13748c2ecf20Sopenharmony_ci			mlog(0, "node %u hit an error, resending\n", to);
13758c2ecf20Sopenharmony_ci			resend = 1;
13768c2ecf20Sopenharmony_ci			response = 0;
13778c2ecf20Sopenharmony_ci			break;
13788c2ecf20Sopenharmony_ci		default:
13798c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "bad response! %u\n", response);
13808c2ecf20Sopenharmony_ci			BUG();
13818c2ecf20Sopenharmony_ci	}
13828c2ecf20Sopenharmony_ci	spin_unlock(&mle->spinlock);
13838c2ecf20Sopenharmony_ci	if (resend) {
13848c2ecf20Sopenharmony_ci		/* this is also totally crude */
13858c2ecf20Sopenharmony_ci		msleep(50);
13868c2ecf20Sopenharmony_ci		goto again;
13878c2ecf20Sopenharmony_ci	}
13888c2ecf20Sopenharmony_ci
13898c2ecf20Sopenharmony_ciout:
13908c2ecf20Sopenharmony_ci	return ret;
13918c2ecf20Sopenharmony_ci}
13928c2ecf20Sopenharmony_ci
13938c2ecf20Sopenharmony_ci/*
13948c2ecf20Sopenharmony_ci * locks that can be taken here:
13958c2ecf20Sopenharmony_ci * dlm->spinlock
13968c2ecf20Sopenharmony_ci * res->spinlock
13978c2ecf20Sopenharmony_ci * mle->spinlock
13988c2ecf20Sopenharmony_ci * dlm->master_list
13998c2ecf20Sopenharmony_ci *
14008c2ecf20Sopenharmony_ci * if possible, TRIM THIS DOWN!!!
14018c2ecf20Sopenharmony_ci */
14028c2ecf20Sopenharmony_ciint dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
14038c2ecf20Sopenharmony_ci			       void **ret_data)
14048c2ecf20Sopenharmony_ci{
14058c2ecf20Sopenharmony_ci	u8 response = DLM_MASTER_RESP_MAYBE;
14068c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm = data;
14078c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res = NULL;
14088c2ecf20Sopenharmony_ci	struct dlm_master_request *request = (struct dlm_master_request *) msg->buf;
14098c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL;
14108c2ecf20Sopenharmony_ci	char *name;
14118c2ecf20Sopenharmony_ci	unsigned int namelen, hash;
14128c2ecf20Sopenharmony_ci	int found, ret;
14138c2ecf20Sopenharmony_ci	int set_maybe;
14148c2ecf20Sopenharmony_ci	int dispatch_assert = 0;
14158c2ecf20Sopenharmony_ci	int dispatched = 0;
14168c2ecf20Sopenharmony_ci
14178c2ecf20Sopenharmony_ci	if (!dlm_grab(dlm))
14188c2ecf20Sopenharmony_ci		return DLM_MASTER_RESP_NO;
14198c2ecf20Sopenharmony_ci
14208c2ecf20Sopenharmony_ci	if (!dlm_domain_fully_joined(dlm)) {
14218c2ecf20Sopenharmony_ci		response = DLM_MASTER_RESP_NO;
14228c2ecf20Sopenharmony_ci		goto send_response;
14238c2ecf20Sopenharmony_ci	}
14248c2ecf20Sopenharmony_ci
14258c2ecf20Sopenharmony_ci	name = request->name;
14268c2ecf20Sopenharmony_ci	namelen = request->namelen;
14278c2ecf20Sopenharmony_ci	hash = dlm_lockid_hash(name, namelen);
14288c2ecf20Sopenharmony_ci
14298c2ecf20Sopenharmony_ci	if (namelen > DLM_LOCKID_NAME_MAX) {
14308c2ecf20Sopenharmony_ci		response = DLM_IVBUFLEN;
14318c2ecf20Sopenharmony_ci		goto send_response;
14328c2ecf20Sopenharmony_ci	}
14338c2ecf20Sopenharmony_ci
14348c2ecf20Sopenharmony_ciway_up_top:
14358c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
14368c2ecf20Sopenharmony_ci	res = __dlm_lookup_lockres(dlm, name, namelen, hash);
14378c2ecf20Sopenharmony_ci	if (res) {
14388c2ecf20Sopenharmony_ci		spin_unlock(&dlm->spinlock);
14398c2ecf20Sopenharmony_ci
14408c2ecf20Sopenharmony_ci		/* take care of the easy cases up front */
14418c2ecf20Sopenharmony_ci		spin_lock(&res->spinlock);
14428c2ecf20Sopenharmony_ci
14438c2ecf20Sopenharmony_ci		/*
14448c2ecf20Sopenharmony_ci		 * Right after dlm spinlock was released, dlm_thread could have
14458c2ecf20Sopenharmony_ci		 * purged the lockres. Check if lockres got unhashed. If so
14468c2ecf20Sopenharmony_ci		 * start over.
14478c2ecf20Sopenharmony_ci		 */
14488c2ecf20Sopenharmony_ci		if (hlist_unhashed(&res->hash_node)) {
14498c2ecf20Sopenharmony_ci			spin_unlock(&res->spinlock);
14508c2ecf20Sopenharmony_ci			dlm_lockres_put(res);
14518c2ecf20Sopenharmony_ci			goto way_up_top;
14528c2ecf20Sopenharmony_ci		}
14538c2ecf20Sopenharmony_ci
14548c2ecf20Sopenharmony_ci		if (res->state & (DLM_LOCK_RES_RECOVERING|
14558c2ecf20Sopenharmony_ci				  DLM_LOCK_RES_MIGRATING)) {
14568c2ecf20Sopenharmony_ci			spin_unlock(&res->spinlock);
14578c2ecf20Sopenharmony_ci			mlog(0, "returning DLM_MASTER_RESP_ERROR since res is "
14588c2ecf20Sopenharmony_ci			     "being recovered/migrated\n");
14598c2ecf20Sopenharmony_ci			response = DLM_MASTER_RESP_ERROR;
14608c2ecf20Sopenharmony_ci			if (mle)
14618c2ecf20Sopenharmony_ci				kmem_cache_free(dlm_mle_cache, mle);
14628c2ecf20Sopenharmony_ci			goto send_response;
14638c2ecf20Sopenharmony_ci		}
14648c2ecf20Sopenharmony_ci
14658c2ecf20Sopenharmony_ci		if (res->owner == dlm->node_num) {
14668c2ecf20Sopenharmony_ci			dlm_lockres_set_refmap_bit(dlm, res, request->node_idx);
14678c2ecf20Sopenharmony_ci			spin_unlock(&res->spinlock);
14688c2ecf20Sopenharmony_ci			response = DLM_MASTER_RESP_YES;
14698c2ecf20Sopenharmony_ci			if (mle)
14708c2ecf20Sopenharmony_ci				kmem_cache_free(dlm_mle_cache, mle);
14718c2ecf20Sopenharmony_ci
14728c2ecf20Sopenharmony_ci			/* this node is the owner.
14738c2ecf20Sopenharmony_ci			 * there is some extra work that needs to
14748c2ecf20Sopenharmony_ci			 * happen now.  the requesting node has
14758c2ecf20Sopenharmony_ci			 * caused all nodes up to this one to
14768c2ecf20Sopenharmony_ci			 * create mles.  this node now needs to
14778c2ecf20Sopenharmony_ci			 * go back and clean those up. */
14788c2ecf20Sopenharmony_ci			dispatch_assert = 1;
14798c2ecf20Sopenharmony_ci			goto send_response;
14808c2ecf20Sopenharmony_ci		} else if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
14818c2ecf20Sopenharmony_ci			spin_unlock(&res->spinlock);
14828c2ecf20Sopenharmony_ci			// mlog(0, "node %u is the master\n", res->owner);
14838c2ecf20Sopenharmony_ci			response = DLM_MASTER_RESP_NO;
14848c2ecf20Sopenharmony_ci			if (mle)
14858c2ecf20Sopenharmony_ci				kmem_cache_free(dlm_mle_cache, mle);
14868c2ecf20Sopenharmony_ci			goto send_response;
14878c2ecf20Sopenharmony_ci		}
14888c2ecf20Sopenharmony_ci
14898c2ecf20Sopenharmony_ci		/* ok, there is no owner.  either this node is
14908c2ecf20Sopenharmony_ci		 * being blocked, or it is actively trying to
14918c2ecf20Sopenharmony_ci		 * master this lock. */
14928c2ecf20Sopenharmony_ci		if (!(res->state & DLM_LOCK_RES_IN_PROGRESS)) {
14938c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "lock with no owner should be "
14948c2ecf20Sopenharmony_ci			     "in-progress!\n");
14958c2ecf20Sopenharmony_ci			BUG();
14968c2ecf20Sopenharmony_ci		}
14978c2ecf20Sopenharmony_ci
14988c2ecf20Sopenharmony_ci		// mlog(0, "lockres is in progress...\n");
14998c2ecf20Sopenharmony_ci		spin_lock(&dlm->master_lock);
15008c2ecf20Sopenharmony_ci		found = dlm_find_mle(dlm, &tmpmle, name, namelen);
15018c2ecf20Sopenharmony_ci		if (!found) {
15028c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "no mle found for this lock!\n");
15038c2ecf20Sopenharmony_ci			BUG();
15048c2ecf20Sopenharmony_ci		}
15058c2ecf20Sopenharmony_ci		set_maybe = 1;
15068c2ecf20Sopenharmony_ci		spin_lock(&tmpmle->spinlock);
15078c2ecf20Sopenharmony_ci		if (tmpmle->type == DLM_MLE_BLOCK) {
15088c2ecf20Sopenharmony_ci			// mlog(0, "this node is waiting for "
15098c2ecf20Sopenharmony_ci			// "lockres to be mastered\n");
15108c2ecf20Sopenharmony_ci			response = DLM_MASTER_RESP_NO;
15118c2ecf20Sopenharmony_ci		} else if (tmpmle->type == DLM_MLE_MIGRATION) {
15128c2ecf20Sopenharmony_ci			mlog(0, "node %u is master, but trying to migrate to "
15138c2ecf20Sopenharmony_ci			     "node %u.\n", tmpmle->master, tmpmle->new_master);
15148c2ecf20Sopenharmony_ci			if (tmpmle->master == dlm->node_num) {
15158c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "no owner on lockres, but this "
15168c2ecf20Sopenharmony_ci				     "node is trying to migrate it to %u?!\n",
15178c2ecf20Sopenharmony_ci				     tmpmle->new_master);
15188c2ecf20Sopenharmony_ci				BUG();
15198c2ecf20Sopenharmony_ci			} else {
15208c2ecf20Sopenharmony_ci				/* the real master can respond on its own */
15218c2ecf20Sopenharmony_ci				response = DLM_MASTER_RESP_NO;
15228c2ecf20Sopenharmony_ci			}
15238c2ecf20Sopenharmony_ci		} else if (tmpmle->master != DLM_LOCK_RES_OWNER_UNKNOWN) {
15248c2ecf20Sopenharmony_ci			set_maybe = 0;
15258c2ecf20Sopenharmony_ci			if (tmpmle->master == dlm->node_num) {
15268c2ecf20Sopenharmony_ci				response = DLM_MASTER_RESP_YES;
15278c2ecf20Sopenharmony_ci				/* this node will be the owner.
15288c2ecf20Sopenharmony_ci				 * go back and clean the mles on any
15298c2ecf20Sopenharmony_ci				 * other nodes */
15308c2ecf20Sopenharmony_ci				dispatch_assert = 1;
15318c2ecf20Sopenharmony_ci				dlm_lockres_set_refmap_bit(dlm, res,
15328c2ecf20Sopenharmony_ci							   request->node_idx);
15338c2ecf20Sopenharmony_ci			} else
15348c2ecf20Sopenharmony_ci				response = DLM_MASTER_RESP_NO;
15358c2ecf20Sopenharmony_ci		} else {
15368c2ecf20Sopenharmony_ci			// mlog(0, "this node is attempting to "
15378c2ecf20Sopenharmony_ci			// "master lockres\n");
15388c2ecf20Sopenharmony_ci			response = DLM_MASTER_RESP_MAYBE;
15398c2ecf20Sopenharmony_ci		}
15408c2ecf20Sopenharmony_ci		if (set_maybe)
15418c2ecf20Sopenharmony_ci			set_bit(request->node_idx, tmpmle->maybe_map);
15428c2ecf20Sopenharmony_ci		spin_unlock(&tmpmle->spinlock);
15438c2ecf20Sopenharmony_ci
15448c2ecf20Sopenharmony_ci		spin_unlock(&dlm->master_lock);
15458c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
15468c2ecf20Sopenharmony_ci
15478c2ecf20Sopenharmony_ci		/* keep the mle attached to heartbeat events */
15488c2ecf20Sopenharmony_ci		dlm_put_mle(tmpmle);
15498c2ecf20Sopenharmony_ci		if (mle)
15508c2ecf20Sopenharmony_ci			kmem_cache_free(dlm_mle_cache, mle);
15518c2ecf20Sopenharmony_ci		goto send_response;
15528c2ecf20Sopenharmony_ci	}
15538c2ecf20Sopenharmony_ci
15548c2ecf20Sopenharmony_ci	/*
15558c2ecf20Sopenharmony_ci	 * lockres doesn't exist on this node
15568c2ecf20Sopenharmony_ci	 * if there is an MLE_BLOCK, return NO
15578c2ecf20Sopenharmony_ci	 * if there is an MLE_MASTER, return MAYBE
15588c2ecf20Sopenharmony_ci	 * otherwise, add an MLE_BLOCK, return NO
15598c2ecf20Sopenharmony_ci	 */
15608c2ecf20Sopenharmony_ci	spin_lock(&dlm->master_lock);
15618c2ecf20Sopenharmony_ci	found = dlm_find_mle(dlm, &tmpmle, name, namelen);
15628c2ecf20Sopenharmony_ci	if (!found) {
15638c2ecf20Sopenharmony_ci		/* this lockid has never been seen on this node yet */
15648c2ecf20Sopenharmony_ci		// mlog(0, "no mle found\n");
15658c2ecf20Sopenharmony_ci		if (!mle) {
15668c2ecf20Sopenharmony_ci			spin_unlock(&dlm->master_lock);
15678c2ecf20Sopenharmony_ci			spin_unlock(&dlm->spinlock);
15688c2ecf20Sopenharmony_ci
15698c2ecf20Sopenharmony_ci			mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
15708c2ecf20Sopenharmony_ci			if (!mle) {
15718c2ecf20Sopenharmony_ci				response = DLM_MASTER_RESP_ERROR;
15728c2ecf20Sopenharmony_ci				mlog_errno(-ENOMEM);
15738c2ecf20Sopenharmony_ci				goto send_response;
15748c2ecf20Sopenharmony_ci			}
15758c2ecf20Sopenharmony_ci			goto way_up_top;
15768c2ecf20Sopenharmony_ci		}
15778c2ecf20Sopenharmony_ci
15788c2ecf20Sopenharmony_ci		// mlog(0, "this is second time thru, already allocated, "
15798c2ecf20Sopenharmony_ci		// "add the block.\n");
15808c2ecf20Sopenharmony_ci		dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen);
15818c2ecf20Sopenharmony_ci		set_bit(request->node_idx, mle->maybe_map);
15828c2ecf20Sopenharmony_ci		__dlm_insert_mle(dlm, mle);
15838c2ecf20Sopenharmony_ci		response = DLM_MASTER_RESP_NO;
15848c2ecf20Sopenharmony_ci	} else {
15858c2ecf20Sopenharmony_ci		spin_lock(&tmpmle->spinlock);
15868c2ecf20Sopenharmony_ci		if (tmpmle->master == dlm->node_num) {
15878c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n");
15888c2ecf20Sopenharmony_ci			BUG();
15898c2ecf20Sopenharmony_ci		}
15908c2ecf20Sopenharmony_ci		if (tmpmle->type == DLM_MLE_BLOCK)
15918c2ecf20Sopenharmony_ci			response = DLM_MASTER_RESP_NO;
15928c2ecf20Sopenharmony_ci		else if (tmpmle->type == DLM_MLE_MIGRATION) {
15938c2ecf20Sopenharmony_ci			mlog(0, "migration mle was found (%u->%u)\n",
15948c2ecf20Sopenharmony_ci			     tmpmle->master, tmpmle->new_master);
15958c2ecf20Sopenharmony_ci			/* real master can respond on its own */
15968c2ecf20Sopenharmony_ci			response = DLM_MASTER_RESP_NO;
15978c2ecf20Sopenharmony_ci		} else
15988c2ecf20Sopenharmony_ci			response = DLM_MASTER_RESP_MAYBE;
15998c2ecf20Sopenharmony_ci		set_bit(request->node_idx, tmpmle->maybe_map);
16008c2ecf20Sopenharmony_ci		spin_unlock(&tmpmle->spinlock);
16018c2ecf20Sopenharmony_ci	}
16028c2ecf20Sopenharmony_ci	spin_unlock(&dlm->master_lock);
16038c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
16048c2ecf20Sopenharmony_ci
16058c2ecf20Sopenharmony_ci	if (found) {
16068c2ecf20Sopenharmony_ci		/* keep the mle attached to heartbeat events */
16078c2ecf20Sopenharmony_ci		dlm_put_mle(tmpmle);
16088c2ecf20Sopenharmony_ci	}
16098c2ecf20Sopenharmony_cisend_response:
16108c2ecf20Sopenharmony_ci	/*
16118c2ecf20Sopenharmony_ci	 * __dlm_lookup_lockres() grabbed a reference to this lockres.
16128c2ecf20Sopenharmony_ci	 * The reference is released by dlm_assert_master_worker() under
16138c2ecf20Sopenharmony_ci	 * the call to dlm_dispatch_assert_master().  If
16148c2ecf20Sopenharmony_ci	 * dlm_assert_master_worker() isn't called, we drop it here.
16158c2ecf20Sopenharmony_ci	 */
16168c2ecf20Sopenharmony_ci	if (dispatch_assert) {
16178c2ecf20Sopenharmony_ci		mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
16188c2ecf20Sopenharmony_ci			     dlm->node_num, res->lockname.len, res->lockname.name);
16198c2ecf20Sopenharmony_ci		spin_lock(&res->spinlock);
16208c2ecf20Sopenharmony_ci		ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
16218c2ecf20Sopenharmony_ci						 DLM_ASSERT_MASTER_MLE_CLEANUP);
16228c2ecf20Sopenharmony_ci		if (ret < 0) {
16238c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "failed to dispatch assert master work\n");
16248c2ecf20Sopenharmony_ci			response = DLM_MASTER_RESP_ERROR;
16258c2ecf20Sopenharmony_ci			spin_unlock(&res->spinlock);
16268c2ecf20Sopenharmony_ci			dlm_lockres_put(res);
16278c2ecf20Sopenharmony_ci		} else {
16288c2ecf20Sopenharmony_ci			dispatched = 1;
16298c2ecf20Sopenharmony_ci			__dlm_lockres_grab_inflight_worker(dlm, res);
16308c2ecf20Sopenharmony_ci			spin_unlock(&res->spinlock);
16318c2ecf20Sopenharmony_ci		}
16328c2ecf20Sopenharmony_ci	} else {
16338c2ecf20Sopenharmony_ci		if (res)
16348c2ecf20Sopenharmony_ci			dlm_lockres_put(res);
16358c2ecf20Sopenharmony_ci	}
16368c2ecf20Sopenharmony_ci
16378c2ecf20Sopenharmony_ci	if (!dispatched)
16388c2ecf20Sopenharmony_ci		dlm_put(dlm);
16398c2ecf20Sopenharmony_ci	return response;
16408c2ecf20Sopenharmony_ci}
16418c2ecf20Sopenharmony_ci
16428c2ecf20Sopenharmony_ci/*
16438c2ecf20Sopenharmony_ci * DLM_ASSERT_MASTER_MSG
16448c2ecf20Sopenharmony_ci */
16458c2ecf20Sopenharmony_ci
16468c2ecf20Sopenharmony_ci
16478c2ecf20Sopenharmony_ci/*
16488c2ecf20Sopenharmony_ci * NOTE: this can be used for debugging
16498c2ecf20Sopenharmony_ci * can periodically run all locks owned by this node
16508c2ecf20Sopenharmony_ci * and re-assert across the cluster...
16518c2ecf20Sopenharmony_ci */
16528c2ecf20Sopenharmony_cistatic int dlm_do_assert_master(struct dlm_ctxt *dlm,
16538c2ecf20Sopenharmony_ci				struct dlm_lock_resource *res,
16548c2ecf20Sopenharmony_ci				void *nodemap, u32 flags)
16558c2ecf20Sopenharmony_ci{
16568c2ecf20Sopenharmony_ci	struct dlm_assert_master assert;
16578c2ecf20Sopenharmony_ci	int to, tmpret;
16588c2ecf20Sopenharmony_ci	struct dlm_node_iter iter;
16598c2ecf20Sopenharmony_ci	int ret = 0;
16608c2ecf20Sopenharmony_ci	int reassert;
16618c2ecf20Sopenharmony_ci	const char *lockname = res->lockname.name;
16628c2ecf20Sopenharmony_ci	unsigned int namelen = res->lockname.len;
16638c2ecf20Sopenharmony_ci
16648c2ecf20Sopenharmony_ci	BUG_ON(namelen > O2NM_MAX_NAME_LEN);
16658c2ecf20Sopenharmony_ci
16668c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
16678c2ecf20Sopenharmony_ci	res->state |= DLM_LOCK_RES_SETREF_INPROG;
16688c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
16698c2ecf20Sopenharmony_ci
16708c2ecf20Sopenharmony_ciagain:
16718c2ecf20Sopenharmony_ci	reassert = 0;
16728c2ecf20Sopenharmony_ci
16738c2ecf20Sopenharmony_ci	/* note that if this nodemap is empty, it returns 0 */
16748c2ecf20Sopenharmony_ci	dlm_node_iter_init(nodemap, &iter);
16758c2ecf20Sopenharmony_ci	while ((to = dlm_node_iter_next(&iter)) >= 0) {
16768c2ecf20Sopenharmony_ci		int r = 0;
16778c2ecf20Sopenharmony_ci		struct dlm_master_list_entry *mle = NULL;
16788c2ecf20Sopenharmony_ci
16798c2ecf20Sopenharmony_ci		mlog(0, "sending assert master to %d (%.*s)\n", to,
16808c2ecf20Sopenharmony_ci		     namelen, lockname);
16818c2ecf20Sopenharmony_ci		memset(&assert, 0, sizeof(assert));
16828c2ecf20Sopenharmony_ci		assert.node_idx = dlm->node_num;
16838c2ecf20Sopenharmony_ci		assert.namelen = namelen;
16848c2ecf20Sopenharmony_ci		memcpy(assert.name, lockname, namelen);
16858c2ecf20Sopenharmony_ci		assert.flags = cpu_to_be32(flags);
16868c2ecf20Sopenharmony_ci
16878c2ecf20Sopenharmony_ci		tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
16888c2ecf20Sopenharmony_ci					    &assert, sizeof(assert), to, &r);
16898c2ecf20Sopenharmony_ci		if (tmpret < 0) {
16908c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "Error %d when sending message %u (key "
16918c2ecf20Sopenharmony_ci			     "0x%x) to node %u\n", tmpret,
16928c2ecf20Sopenharmony_ci			     DLM_ASSERT_MASTER_MSG, dlm->key, to);
16938c2ecf20Sopenharmony_ci			if (!dlm_is_host_down(tmpret)) {
16948c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
16958c2ecf20Sopenharmony_ci				BUG();
16968c2ecf20Sopenharmony_ci			}
16978c2ecf20Sopenharmony_ci			/* a node died.  finish out the rest of the nodes. */
16988c2ecf20Sopenharmony_ci			mlog(0, "link to %d went down!\n", to);
16998c2ecf20Sopenharmony_ci			/* any nonzero status return will do */
17008c2ecf20Sopenharmony_ci			ret = tmpret;
17018c2ecf20Sopenharmony_ci			r = 0;
17028c2ecf20Sopenharmony_ci		} else if (r < 0) {
17038c2ecf20Sopenharmony_ci			/* ok, something horribly messed.  kill thyself. */
17048c2ecf20Sopenharmony_ci			mlog(ML_ERROR,"during assert master of %.*s to %u, "
17058c2ecf20Sopenharmony_ci			     "got %d.\n", namelen, lockname, to, r);
17068c2ecf20Sopenharmony_ci			spin_lock(&dlm->spinlock);
17078c2ecf20Sopenharmony_ci			spin_lock(&dlm->master_lock);
17088c2ecf20Sopenharmony_ci			if (dlm_find_mle(dlm, &mle, (char *)lockname,
17098c2ecf20Sopenharmony_ci					 namelen)) {
17108c2ecf20Sopenharmony_ci				dlm_print_one_mle(mle);
17118c2ecf20Sopenharmony_ci				__dlm_put_mle(mle);
17128c2ecf20Sopenharmony_ci			}
17138c2ecf20Sopenharmony_ci			spin_unlock(&dlm->master_lock);
17148c2ecf20Sopenharmony_ci			spin_unlock(&dlm->spinlock);
17158c2ecf20Sopenharmony_ci			BUG();
17168c2ecf20Sopenharmony_ci		}
17178c2ecf20Sopenharmony_ci
17188c2ecf20Sopenharmony_ci		if (r & DLM_ASSERT_RESPONSE_REASSERT &&
17198c2ecf20Sopenharmony_ci		    !(r & DLM_ASSERT_RESPONSE_MASTERY_REF)) {
17208c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "%.*s: very strange, "
17218c2ecf20Sopenharmony_ci				     "master MLE but no lockres on %u\n",
17228c2ecf20Sopenharmony_ci				     namelen, lockname, to);
17238c2ecf20Sopenharmony_ci		}
17248c2ecf20Sopenharmony_ci
17258c2ecf20Sopenharmony_ci		if (r & DLM_ASSERT_RESPONSE_REASSERT) {
17268c2ecf20Sopenharmony_ci			mlog(0, "%.*s: node %u create mles on other "
17278c2ecf20Sopenharmony_ci			     "nodes and requests a re-assert\n",
17288c2ecf20Sopenharmony_ci			     namelen, lockname, to);
17298c2ecf20Sopenharmony_ci			reassert = 1;
17308c2ecf20Sopenharmony_ci		}
17318c2ecf20Sopenharmony_ci		if (r & DLM_ASSERT_RESPONSE_MASTERY_REF) {
17328c2ecf20Sopenharmony_ci			mlog(0, "%.*s: node %u has a reference to this "
17338c2ecf20Sopenharmony_ci			     "lockres, set the bit in the refmap\n",
17348c2ecf20Sopenharmony_ci			     namelen, lockname, to);
17358c2ecf20Sopenharmony_ci			spin_lock(&res->spinlock);
17368c2ecf20Sopenharmony_ci			dlm_lockres_set_refmap_bit(dlm, res, to);
17378c2ecf20Sopenharmony_ci			spin_unlock(&res->spinlock);
17388c2ecf20Sopenharmony_ci		}
17398c2ecf20Sopenharmony_ci	}
17408c2ecf20Sopenharmony_ci
17418c2ecf20Sopenharmony_ci	if (reassert)
17428c2ecf20Sopenharmony_ci		goto again;
17438c2ecf20Sopenharmony_ci
17448c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
17458c2ecf20Sopenharmony_ci	res->state &= ~DLM_LOCK_RES_SETREF_INPROG;
17468c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
17478c2ecf20Sopenharmony_ci	wake_up(&res->wq);
17488c2ecf20Sopenharmony_ci
17498c2ecf20Sopenharmony_ci	return ret;
17508c2ecf20Sopenharmony_ci}
17518c2ecf20Sopenharmony_ci
17528c2ecf20Sopenharmony_ci/*
17538c2ecf20Sopenharmony_ci * locks that can be taken here:
17548c2ecf20Sopenharmony_ci * dlm->spinlock
17558c2ecf20Sopenharmony_ci * res->spinlock
17568c2ecf20Sopenharmony_ci * mle->spinlock
17578c2ecf20Sopenharmony_ci * dlm->master_list
17588c2ecf20Sopenharmony_ci *
17598c2ecf20Sopenharmony_ci * if possible, TRIM THIS DOWN!!!
17608c2ecf20Sopenharmony_ci */
17618c2ecf20Sopenharmony_ciint dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
17628c2ecf20Sopenharmony_ci			      void **ret_data)
17638c2ecf20Sopenharmony_ci{
17648c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm = data;
17658c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *mle = NULL;
17668c2ecf20Sopenharmony_ci	struct dlm_assert_master *assert = (struct dlm_assert_master *)msg->buf;
17678c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res = NULL;
17688c2ecf20Sopenharmony_ci	char *name;
17698c2ecf20Sopenharmony_ci	unsigned int namelen, hash;
17708c2ecf20Sopenharmony_ci	u32 flags;
17718c2ecf20Sopenharmony_ci	int master_request = 0, have_lockres_ref = 0;
17728c2ecf20Sopenharmony_ci	int ret = 0;
17738c2ecf20Sopenharmony_ci
17748c2ecf20Sopenharmony_ci	if (!dlm_grab(dlm))
17758c2ecf20Sopenharmony_ci		return 0;
17768c2ecf20Sopenharmony_ci
17778c2ecf20Sopenharmony_ci	name = assert->name;
17788c2ecf20Sopenharmony_ci	namelen = assert->namelen;
17798c2ecf20Sopenharmony_ci	hash = dlm_lockid_hash(name, namelen);
17808c2ecf20Sopenharmony_ci	flags = be32_to_cpu(assert->flags);
17818c2ecf20Sopenharmony_ci
17828c2ecf20Sopenharmony_ci	if (namelen > DLM_LOCKID_NAME_MAX) {
17838c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "Invalid name length!");
17848c2ecf20Sopenharmony_ci		goto done;
17858c2ecf20Sopenharmony_ci	}
17868c2ecf20Sopenharmony_ci
17878c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
17888c2ecf20Sopenharmony_ci
17898c2ecf20Sopenharmony_ci	if (flags)
17908c2ecf20Sopenharmony_ci		mlog(0, "assert_master with flags: %u\n", flags);
17918c2ecf20Sopenharmony_ci
17928c2ecf20Sopenharmony_ci	/* find the MLE */
17938c2ecf20Sopenharmony_ci	spin_lock(&dlm->master_lock);
17948c2ecf20Sopenharmony_ci	if (!dlm_find_mle(dlm, &mle, name, namelen)) {
17958c2ecf20Sopenharmony_ci		/* not an error, could be master just re-asserting */
17968c2ecf20Sopenharmony_ci		mlog(0, "just got an assert_master from %u, but no "
17978c2ecf20Sopenharmony_ci		     "MLE for it! (%.*s)\n", assert->node_idx,
17988c2ecf20Sopenharmony_ci		     namelen, name);
17998c2ecf20Sopenharmony_ci	} else {
18008c2ecf20Sopenharmony_ci		int bit = find_next_bit (mle->maybe_map, O2NM_MAX_NODES, 0);
18018c2ecf20Sopenharmony_ci		if (bit >= O2NM_MAX_NODES) {
18028c2ecf20Sopenharmony_ci			/* not necessarily an error, though less likely.
18038c2ecf20Sopenharmony_ci			 * could be master just re-asserting. */
18048c2ecf20Sopenharmony_ci			mlog(0, "no bits set in the maybe_map, but %u "
18058c2ecf20Sopenharmony_ci			     "is asserting! (%.*s)\n", assert->node_idx,
18068c2ecf20Sopenharmony_ci			     namelen, name);
18078c2ecf20Sopenharmony_ci		} else if (bit != assert->node_idx) {
18088c2ecf20Sopenharmony_ci			if (flags & DLM_ASSERT_MASTER_MLE_CLEANUP) {
18098c2ecf20Sopenharmony_ci				mlog(0, "master %u was found, %u should "
18108c2ecf20Sopenharmony_ci				     "back off\n", assert->node_idx, bit);
18118c2ecf20Sopenharmony_ci			} else {
18128c2ecf20Sopenharmony_ci				/* with the fix for bug 569, a higher node
18138c2ecf20Sopenharmony_ci				 * number winning the mastery will respond
18148c2ecf20Sopenharmony_ci				 * YES to mastery requests, but this node
18158c2ecf20Sopenharmony_ci				 * had no way of knowing.  let it pass. */
18168c2ecf20Sopenharmony_ci				mlog(0, "%u is the lowest node, "
18178c2ecf20Sopenharmony_ci				     "%u is asserting. (%.*s)  %u must "
18188c2ecf20Sopenharmony_ci				     "have begun after %u won.\n", bit,
18198c2ecf20Sopenharmony_ci				     assert->node_idx, namelen, name, bit,
18208c2ecf20Sopenharmony_ci				     assert->node_idx);
18218c2ecf20Sopenharmony_ci			}
18228c2ecf20Sopenharmony_ci		}
18238c2ecf20Sopenharmony_ci		if (mle->type == DLM_MLE_MIGRATION) {
18248c2ecf20Sopenharmony_ci			if (flags & DLM_ASSERT_MASTER_MLE_CLEANUP) {
18258c2ecf20Sopenharmony_ci				mlog(0, "%s:%.*s: got cleanup assert"
18268c2ecf20Sopenharmony_ci				     " from %u for migration\n",
18278c2ecf20Sopenharmony_ci				     dlm->name, namelen, name,
18288c2ecf20Sopenharmony_ci				     assert->node_idx);
18298c2ecf20Sopenharmony_ci			} else if (!(flags & DLM_ASSERT_MASTER_FINISH_MIGRATION)) {
18308c2ecf20Sopenharmony_ci				mlog(0, "%s:%.*s: got unrelated assert"
18318c2ecf20Sopenharmony_ci				     " from %u for migration, ignoring\n",
18328c2ecf20Sopenharmony_ci				     dlm->name, namelen, name,
18338c2ecf20Sopenharmony_ci				     assert->node_idx);
18348c2ecf20Sopenharmony_ci				__dlm_put_mle(mle);
18358c2ecf20Sopenharmony_ci				spin_unlock(&dlm->master_lock);
18368c2ecf20Sopenharmony_ci				spin_unlock(&dlm->spinlock);
18378c2ecf20Sopenharmony_ci				goto done;
18388c2ecf20Sopenharmony_ci			}
18398c2ecf20Sopenharmony_ci		}
18408c2ecf20Sopenharmony_ci	}
18418c2ecf20Sopenharmony_ci	spin_unlock(&dlm->master_lock);
18428c2ecf20Sopenharmony_ci
18438c2ecf20Sopenharmony_ci	/* ok everything checks out with the MLE
18448c2ecf20Sopenharmony_ci	 * now check to see if there is a lockres */
18458c2ecf20Sopenharmony_ci	res = __dlm_lookup_lockres(dlm, name, namelen, hash);
18468c2ecf20Sopenharmony_ci	if (res) {
18478c2ecf20Sopenharmony_ci		spin_lock(&res->spinlock);
18488c2ecf20Sopenharmony_ci		if (res->state & DLM_LOCK_RES_RECOVERING)  {
18498c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "%u asserting but %.*s is "
18508c2ecf20Sopenharmony_ci			     "RECOVERING!\n", assert->node_idx, namelen, name);
18518c2ecf20Sopenharmony_ci			goto kill;
18528c2ecf20Sopenharmony_ci		}
18538c2ecf20Sopenharmony_ci		if (!mle) {
18548c2ecf20Sopenharmony_ci			if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN &&
18558c2ecf20Sopenharmony_ci			    res->owner != assert->node_idx) {
18568c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "DIE! Mastery assert from %u, "
18578c2ecf20Sopenharmony_ci				     "but current owner is %u! (%.*s)\n",
18588c2ecf20Sopenharmony_ci				     assert->node_idx, res->owner, namelen,
18598c2ecf20Sopenharmony_ci				     name);
18608c2ecf20Sopenharmony_ci				__dlm_print_one_lock_resource(res);
18618c2ecf20Sopenharmony_ci				BUG();
18628c2ecf20Sopenharmony_ci			}
18638c2ecf20Sopenharmony_ci		} else if (mle->type != DLM_MLE_MIGRATION) {
18648c2ecf20Sopenharmony_ci			if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
18658c2ecf20Sopenharmony_ci				/* owner is just re-asserting */
18668c2ecf20Sopenharmony_ci				if (res->owner == assert->node_idx) {
18678c2ecf20Sopenharmony_ci					mlog(0, "owner %u re-asserting on "
18688c2ecf20Sopenharmony_ci					     "lock %.*s\n", assert->node_idx,
18698c2ecf20Sopenharmony_ci					     namelen, name);
18708c2ecf20Sopenharmony_ci					goto ok;
18718c2ecf20Sopenharmony_ci				}
18728c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "got assert_master from "
18738c2ecf20Sopenharmony_ci				     "node %u, but %u is the owner! "
18748c2ecf20Sopenharmony_ci				     "(%.*s)\n", assert->node_idx,
18758c2ecf20Sopenharmony_ci				     res->owner, namelen, name);
18768c2ecf20Sopenharmony_ci				goto kill;
18778c2ecf20Sopenharmony_ci			}
18788c2ecf20Sopenharmony_ci			if (!(res->state & DLM_LOCK_RES_IN_PROGRESS)) {
18798c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "got assert from %u, but lock "
18808c2ecf20Sopenharmony_ci				     "with no owner should be "
18818c2ecf20Sopenharmony_ci				     "in-progress! (%.*s)\n",
18828c2ecf20Sopenharmony_ci				     assert->node_idx,
18838c2ecf20Sopenharmony_ci				     namelen, name);
18848c2ecf20Sopenharmony_ci				goto kill;
18858c2ecf20Sopenharmony_ci			}
18868c2ecf20Sopenharmony_ci		} else /* mle->type == DLM_MLE_MIGRATION */ {
18878c2ecf20Sopenharmony_ci			/* should only be getting an assert from new master */
18888c2ecf20Sopenharmony_ci			if (assert->node_idx != mle->new_master) {
18898c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "got assert from %u, but "
18908c2ecf20Sopenharmony_ci				     "new master is %u, and old master "
18918c2ecf20Sopenharmony_ci				     "was %u (%.*s)\n",
18928c2ecf20Sopenharmony_ci				     assert->node_idx, mle->new_master,
18938c2ecf20Sopenharmony_ci				     mle->master, namelen, name);
18948c2ecf20Sopenharmony_ci				goto kill;
18958c2ecf20Sopenharmony_ci			}
18968c2ecf20Sopenharmony_ci
18978c2ecf20Sopenharmony_ci		}
18988c2ecf20Sopenharmony_ciok:
18998c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
19008c2ecf20Sopenharmony_ci	}
19018c2ecf20Sopenharmony_ci
19028c2ecf20Sopenharmony_ci	// mlog(0, "woo!  got an assert_master from node %u!\n",
19038c2ecf20Sopenharmony_ci	// 	     assert->node_idx);
19048c2ecf20Sopenharmony_ci	if (mle) {
19058c2ecf20Sopenharmony_ci		int extra_ref = 0;
19068c2ecf20Sopenharmony_ci		int nn = -1;
19078c2ecf20Sopenharmony_ci		int rr, err = 0;
19088c2ecf20Sopenharmony_ci
19098c2ecf20Sopenharmony_ci		spin_lock(&mle->spinlock);
19108c2ecf20Sopenharmony_ci		if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
19118c2ecf20Sopenharmony_ci			extra_ref = 1;
19128c2ecf20Sopenharmony_ci		else {
19138c2ecf20Sopenharmony_ci			/* MASTER mle: if any bits set in the response map
19148c2ecf20Sopenharmony_ci			 * then the calling node needs to re-assert to clear
19158c2ecf20Sopenharmony_ci			 * up nodes that this node contacted */
19168c2ecf20Sopenharmony_ci			while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES,
19178c2ecf20Sopenharmony_ci						    nn+1)) < O2NM_MAX_NODES) {
19188c2ecf20Sopenharmony_ci				if (nn != dlm->node_num && nn != assert->node_idx) {
19198c2ecf20Sopenharmony_ci					master_request = 1;
19208c2ecf20Sopenharmony_ci					break;
19218c2ecf20Sopenharmony_ci				}
19228c2ecf20Sopenharmony_ci			}
19238c2ecf20Sopenharmony_ci		}
19248c2ecf20Sopenharmony_ci		mle->master = assert->node_idx;
19258c2ecf20Sopenharmony_ci		atomic_set(&mle->woken, 1);
19268c2ecf20Sopenharmony_ci		wake_up(&mle->wq);
19278c2ecf20Sopenharmony_ci		spin_unlock(&mle->spinlock);
19288c2ecf20Sopenharmony_ci
19298c2ecf20Sopenharmony_ci		if (res) {
19308c2ecf20Sopenharmony_ci			int wake = 0;
19318c2ecf20Sopenharmony_ci			spin_lock(&res->spinlock);
19328c2ecf20Sopenharmony_ci			if (mle->type == DLM_MLE_MIGRATION) {
19338c2ecf20Sopenharmony_ci				mlog(0, "finishing off migration of lockres %.*s, "
19348c2ecf20Sopenharmony_ci			     		"from %u to %u\n",
19358c2ecf20Sopenharmony_ci			       		res->lockname.len, res->lockname.name,
19368c2ecf20Sopenharmony_ci			       		dlm->node_num, mle->new_master);
19378c2ecf20Sopenharmony_ci				res->state &= ~DLM_LOCK_RES_MIGRATING;
19388c2ecf20Sopenharmony_ci				wake = 1;
19398c2ecf20Sopenharmony_ci				dlm_change_lockres_owner(dlm, res, mle->new_master);
19408c2ecf20Sopenharmony_ci				BUG_ON(res->state & DLM_LOCK_RES_DIRTY);
19418c2ecf20Sopenharmony_ci			} else {
19428c2ecf20Sopenharmony_ci				dlm_change_lockres_owner(dlm, res, mle->master);
19438c2ecf20Sopenharmony_ci			}
19448c2ecf20Sopenharmony_ci			spin_unlock(&res->spinlock);
19458c2ecf20Sopenharmony_ci			have_lockres_ref = 1;
19468c2ecf20Sopenharmony_ci			if (wake)
19478c2ecf20Sopenharmony_ci				wake_up(&res->wq);
19488c2ecf20Sopenharmony_ci		}
19498c2ecf20Sopenharmony_ci
19508c2ecf20Sopenharmony_ci		/* master is known, detach if not already detached.
19518c2ecf20Sopenharmony_ci		 * ensures that only one assert_master call will happen
19528c2ecf20Sopenharmony_ci		 * on this mle. */
19538c2ecf20Sopenharmony_ci		spin_lock(&dlm->master_lock);
19548c2ecf20Sopenharmony_ci
19558c2ecf20Sopenharmony_ci		rr = kref_read(&mle->mle_refs);
19568c2ecf20Sopenharmony_ci		if (mle->inuse > 0) {
19578c2ecf20Sopenharmony_ci			if (extra_ref && rr < 3)
19588c2ecf20Sopenharmony_ci				err = 1;
19598c2ecf20Sopenharmony_ci			else if (!extra_ref && rr < 2)
19608c2ecf20Sopenharmony_ci				err = 1;
19618c2ecf20Sopenharmony_ci		} else {
19628c2ecf20Sopenharmony_ci			if (extra_ref && rr < 2)
19638c2ecf20Sopenharmony_ci				err = 1;
19648c2ecf20Sopenharmony_ci			else if (!extra_ref && rr < 1)
19658c2ecf20Sopenharmony_ci				err = 1;
19668c2ecf20Sopenharmony_ci		}
19678c2ecf20Sopenharmony_ci		if (err) {
19688c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "%s:%.*s: got assert master from %u "
19698c2ecf20Sopenharmony_ci			     "that will mess up this node, refs=%d, extra=%d, "
19708c2ecf20Sopenharmony_ci			     "inuse=%d\n", dlm->name, namelen, name,
19718c2ecf20Sopenharmony_ci			     assert->node_idx, rr, extra_ref, mle->inuse);
19728c2ecf20Sopenharmony_ci			dlm_print_one_mle(mle);
19738c2ecf20Sopenharmony_ci		}
19748c2ecf20Sopenharmony_ci		__dlm_unlink_mle(dlm, mle);
19758c2ecf20Sopenharmony_ci		__dlm_mle_detach_hb_events(dlm, mle);
19768c2ecf20Sopenharmony_ci		__dlm_put_mle(mle);
19778c2ecf20Sopenharmony_ci		if (extra_ref) {
19788c2ecf20Sopenharmony_ci			/* the assert master message now balances the extra
19798c2ecf20Sopenharmony_ci		 	 * ref given by the master / migration request message.
19808c2ecf20Sopenharmony_ci		 	 * if this is the last put, it will be removed
19818c2ecf20Sopenharmony_ci		 	 * from the list. */
19828c2ecf20Sopenharmony_ci			__dlm_put_mle(mle);
19838c2ecf20Sopenharmony_ci		}
19848c2ecf20Sopenharmony_ci		spin_unlock(&dlm->master_lock);
19858c2ecf20Sopenharmony_ci	} else if (res) {
19868c2ecf20Sopenharmony_ci		if (res->owner != assert->node_idx) {
19878c2ecf20Sopenharmony_ci			mlog(0, "assert_master from %u, but current "
19888c2ecf20Sopenharmony_ci			     "owner is %u (%.*s), no mle\n", assert->node_idx,
19898c2ecf20Sopenharmony_ci			     res->owner, namelen, name);
19908c2ecf20Sopenharmony_ci		}
19918c2ecf20Sopenharmony_ci	}
19928c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
19938c2ecf20Sopenharmony_ci
19948c2ecf20Sopenharmony_cidone:
19958c2ecf20Sopenharmony_ci	ret = 0;
19968c2ecf20Sopenharmony_ci	if (res) {
19978c2ecf20Sopenharmony_ci		spin_lock(&res->spinlock);
19988c2ecf20Sopenharmony_ci		res->state |= DLM_LOCK_RES_SETREF_INPROG;
19998c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
20008c2ecf20Sopenharmony_ci		*ret_data = (void *)res;
20018c2ecf20Sopenharmony_ci	}
20028c2ecf20Sopenharmony_ci	dlm_put(dlm);
20038c2ecf20Sopenharmony_ci	if (master_request) {
20048c2ecf20Sopenharmony_ci		mlog(0, "need to tell master to reassert\n");
20058c2ecf20Sopenharmony_ci		/* positive. negative would shoot down the node. */
20068c2ecf20Sopenharmony_ci		ret |= DLM_ASSERT_RESPONSE_REASSERT;
20078c2ecf20Sopenharmony_ci		if (!have_lockres_ref) {
20088c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "strange, got assert from %u, MASTER "
20098c2ecf20Sopenharmony_ci			     "mle present here for %s:%.*s, but no lockres!\n",
20108c2ecf20Sopenharmony_ci			     assert->node_idx, dlm->name, namelen, name);
20118c2ecf20Sopenharmony_ci		}
20128c2ecf20Sopenharmony_ci	}
20138c2ecf20Sopenharmony_ci	if (have_lockres_ref) {
20148c2ecf20Sopenharmony_ci		/* let the master know we have a reference to the lockres */
20158c2ecf20Sopenharmony_ci		ret |= DLM_ASSERT_RESPONSE_MASTERY_REF;
20168c2ecf20Sopenharmony_ci		mlog(0, "%s:%.*s: got assert from %u, need a ref\n",
20178c2ecf20Sopenharmony_ci		     dlm->name, namelen, name, assert->node_idx);
20188c2ecf20Sopenharmony_ci	}
20198c2ecf20Sopenharmony_ci	return ret;
20208c2ecf20Sopenharmony_ci
20218c2ecf20Sopenharmony_cikill:
20228c2ecf20Sopenharmony_ci	/* kill the caller! */
20238c2ecf20Sopenharmony_ci	mlog(ML_ERROR, "Bad message received from another node.  Dumping state "
20248c2ecf20Sopenharmony_ci	     "and killing the other node now!  This node is OK and can continue.\n");
20258c2ecf20Sopenharmony_ci	__dlm_print_one_lock_resource(res);
20268c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
20278c2ecf20Sopenharmony_ci	spin_lock(&dlm->master_lock);
20288c2ecf20Sopenharmony_ci	if (mle)
20298c2ecf20Sopenharmony_ci		__dlm_put_mle(mle);
20308c2ecf20Sopenharmony_ci	spin_unlock(&dlm->master_lock);
20318c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
20328c2ecf20Sopenharmony_ci	*ret_data = (void *)res;
20338c2ecf20Sopenharmony_ci	dlm_put(dlm);
20348c2ecf20Sopenharmony_ci	return -EINVAL;
20358c2ecf20Sopenharmony_ci}
20368c2ecf20Sopenharmony_ci
20378c2ecf20Sopenharmony_civoid dlm_assert_master_post_handler(int status, void *data, void *ret_data)
20388c2ecf20Sopenharmony_ci{
20398c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res = (struct dlm_lock_resource *)ret_data;
20408c2ecf20Sopenharmony_ci
20418c2ecf20Sopenharmony_ci	if (ret_data) {
20428c2ecf20Sopenharmony_ci		spin_lock(&res->spinlock);
20438c2ecf20Sopenharmony_ci		res->state &= ~DLM_LOCK_RES_SETREF_INPROG;
20448c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
20458c2ecf20Sopenharmony_ci		wake_up(&res->wq);
20468c2ecf20Sopenharmony_ci		dlm_lockres_put(res);
20478c2ecf20Sopenharmony_ci	}
20488c2ecf20Sopenharmony_ci	return;
20498c2ecf20Sopenharmony_ci}
20508c2ecf20Sopenharmony_ci
20518c2ecf20Sopenharmony_ciint dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
20528c2ecf20Sopenharmony_ci			       struct dlm_lock_resource *res,
20538c2ecf20Sopenharmony_ci			       int ignore_higher, u8 request_from, u32 flags)
20548c2ecf20Sopenharmony_ci{
20558c2ecf20Sopenharmony_ci	struct dlm_work_item *item;
20568c2ecf20Sopenharmony_ci	item = kzalloc(sizeof(*item), GFP_ATOMIC);
20578c2ecf20Sopenharmony_ci	if (!item)
20588c2ecf20Sopenharmony_ci		return -ENOMEM;
20598c2ecf20Sopenharmony_ci
20608c2ecf20Sopenharmony_ci
20618c2ecf20Sopenharmony_ci	/* queue up work for dlm_assert_master_worker */
20628c2ecf20Sopenharmony_ci	dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL);
20638c2ecf20Sopenharmony_ci	item->u.am.lockres = res; /* already have a ref */
20648c2ecf20Sopenharmony_ci	/* can optionally ignore node numbers higher than this node */
20658c2ecf20Sopenharmony_ci	item->u.am.ignore_higher = ignore_higher;
20668c2ecf20Sopenharmony_ci	item->u.am.request_from = request_from;
20678c2ecf20Sopenharmony_ci	item->u.am.flags = flags;
20688c2ecf20Sopenharmony_ci
20698c2ecf20Sopenharmony_ci	if (ignore_higher)
20708c2ecf20Sopenharmony_ci		mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len,
20718c2ecf20Sopenharmony_ci		     res->lockname.name);
20728c2ecf20Sopenharmony_ci
20738c2ecf20Sopenharmony_ci	spin_lock(&dlm->work_lock);
20748c2ecf20Sopenharmony_ci	list_add_tail(&item->list, &dlm->work_list);
20758c2ecf20Sopenharmony_ci	spin_unlock(&dlm->work_lock);
20768c2ecf20Sopenharmony_ci
20778c2ecf20Sopenharmony_ci	queue_work(dlm->dlm_worker, &dlm->dispatched_work);
20788c2ecf20Sopenharmony_ci	return 0;
20798c2ecf20Sopenharmony_ci}
20808c2ecf20Sopenharmony_ci
20818c2ecf20Sopenharmony_cistatic void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
20828c2ecf20Sopenharmony_ci{
20838c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm = data;
20848c2ecf20Sopenharmony_ci	int ret = 0;
20858c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res;
20868c2ecf20Sopenharmony_ci	unsigned long nodemap[BITS_TO_LONGS(O2NM_MAX_NODES)];
20878c2ecf20Sopenharmony_ci	int ignore_higher;
20888c2ecf20Sopenharmony_ci	int bit;
20898c2ecf20Sopenharmony_ci	u8 request_from;
20908c2ecf20Sopenharmony_ci	u32 flags;
20918c2ecf20Sopenharmony_ci
20928c2ecf20Sopenharmony_ci	dlm = item->dlm;
20938c2ecf20Sopenharmony_ci	res = item->u.am.lockres;
20948c2ecf20Sopenharmony_ci	ignore_higher = item->u.am.ignore_higher;
20958c2ecf20Sopenharmony_ci	request_from = item->u.am.request_from;
20968c2ecf20Sopenharmony_ci	flags = item->u.am.flags;
20978c2ecf20Sopenharmony_ci
20988c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
20998c2ecf20Sopenharmony_ci	memcpy(nodemap, dlm->domain_map, sizeof(nodemap));
21008c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
21018c2ecf20Sopenharmony_ci
21028c2ecf20Sopenharmony_ci	clear_bit(dlm->node_num, nodemap);
21038c2ecf20Sopenharmony_ci	if (ignore_higher) {
21048c2ecf20Sopenharmony_ci		/* if is this just to clear up mles for nodes below
21058c2ecf20Sopenharmony_ci		 * this node, do not send the message to the original
21068c2ecf20Sopenharmony_ci		 * caller or any node number higher than this */
21078c2ecf20Sopenharmony_ci		clear_bit(request_from, nodemap);
21088c2ecf20Sopenharmony_ci		bit = dlm->node_num;
21098c2ecf20Sopenharmony_ci		while (1) {
21108c2ecf20Sopenharmony_ci			bit = find_next_bit(nodemap, O2NM_MAX_NODES,
21118c2ecf20Sopenharmony_ci					    bit+1);
21128c2ecf20Sopenharmony_ci		       	if (bit >= O2NM_MAX_NODES)
21138c2ecf20Sopenharmony_ci				break;
21148c2ecf20Sopenharmony_ci			clear_bit(bit, nodemap);
21158c2ecf20Sopenharmony_ci		}
21168c2ecf20Sopenharmony_ci	}
21178c2ecf20Sopenharmony_ci
21188c2ecf20Sopenharmony_ci	/*
21198c2ecf20Sopenharmony_ci	 * If we're migrating this lock to someone else, we are no
21208c2ecf20Sopenharmony_ci	 * longer allowed to assert out own mastery.  OTOH, we need to
21218c2ecf20Sopenharmony_ci	 * prevent migration from starting while we're still asserting
21228c2ecf20Sopenharmony_ci	 * our dominance.  The reserved ast delays migration.
21238c2ecf20Sopenharmony_ci	 */
21248c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
21258c2ecf20Sopenharmony_ci	if (res->state & DLM_LOCK_RES_MIGRATING) {
21268c2ecf20Sopenharmony_ci		mlog(0, "Someone asked us to assert mastery, but we're "
21278c2ecf20Sopenharmony_ci		     "in the middle of migration.  Skipping assert, "
21288c2ecf20Sopenharmony_ci		     "the new master will handle that.\n");
21298c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
21308c2ecf20Sopenharmony_ci		goto put;
21318c2ecf20Sopenharmony_ci	} else
21328c2ecf20Sopenharmony_ci		__dlm_lockres_reserve_ast(res);
21338c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
21348c2ecf20Sopenharmony_ci
21358c2ecf20Sopenharmony_ci	/* this call now finishes out the nodemap
21368c2ecf20Sopenharmony_ci	 * even if one or more nodes die */
21378c2ecf20Sopenharmony_ci	mlog(0, "worker about to master %.*s here, this=%u\n",
21388c2ecf20Sopenharmony_ci		     res->lockname.len, res->lockname.name, dlm->node_num);
21398c2ecf20Sopenharmony_ci	ret = dlm_do_assert_master(dlm, res, nodemap, flags);
21408c2ecf20Sopenharmony_ci	if (ret < 0) {
21418c2ecf20Sopenharmony_ci		/* no need to restart, we are done */
21428c2ecf20Sopenharmony_ci		if (!dlm_is_host_down(ret))
21438c2ecf20Sopenharmony_ci			mlog_errno(ret);
21448c2ecf20Sopenharmony_ci	}
21458c2ecf20Sopenharmony_ci
21468c2ecf20Sopenharmony_ci	/* Ok, we've asserted ourselves.  Let's let migration start. */
21478c2ecf20Sopenharmony_ci	dlm_lockres_release_ast(dlm, res);
21488c2ecf20Sopenharmony_ci
21498c2ecf20Sopenharmony_ciput:
21508c2ecf20Sopenharmony_ci	dlm_lockres_drop_inflight_worker(dlm, res);
21518c2ecf20Sopenharmony_ci
21528c2ecf20Sopenharmony_ci	dlm_lockres_put(res);
21538c2ecf20Sopenharmony_ci
21548c2ecf20Sopenharmony_ci	mlog(0, "finished with dlm_assert_master_worker\n");
21558c2ecf20Sopenharmony_ci}
21568c2ecf20Sopenharmony_ci
21578c2ecf20Sopenharmony_ci/* SPECIAL CASE for the $RECOVERY lock used by the recovery thread.
21588c2ecf20Sopenharmony_ci * We cannot wait for node recovery to complete to begin mastering this
21598c2ecf20Sopenharmony_ci * lockres because this lockres is used to kick off recovery! ;-)
21608c2ecf20Sopenharmony_ci * So, do a pre-check on all living nodes to see if any of those nodes
21618c2ecf20Sopenharmony_ci * think that $RECOVERY is currently mastered by a dead node.  If so,
21628c2ecf20Sopenharmony_ci * we wait a short time to allow that node to get notified by its own
21638c2ecf20Sopenharmony_ci * heartbeat stack, then check again.  All $RECOVERY lock resources
21648c2ecf20Sopenharmony_ci * mastered by dead nodes are purged when the heartbeat callback is
21658c2ecf20Sopenharmony_ci * fired, so we can know for sure that it is safe to continue once
21668c2ecf20Sopenharmony_ci * the node returns a live node or no node.  */
21678c2ecf20Sopenharmony_cistatic int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
21688c2ecf20Sopenharmony_ci				       struct dlm_lock_resource *res)
21698c2ecf20Sopenharmony_ci{
21708c2ecf20Sopenharmony_ci	struct dlm_node_iter iter;
21718c2ecf20Sopenharmony_ci	int nodenum;
21728c2ecf20Sopenharmony_ci	int ret = 0;
21738c2ecf20Sopenharmony_ci	u8 master = DLM_LOCK_RES_OWNER_UNKNOWN;
21748c2ecf20Sopenharmony_ci
21758c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
21768c2ecf20Sopenharmony_ci	dlm_node_iter_init(dlm->domain_map, &iter);
21778c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
21788c2ecf20Sopenharmony_ci
21798c2ecf20Sopenharmony_ci	while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
21808c2ecf20Sopenharmony_ci		/* do not send to self */
21818c2ecf20Sopenharmony_ci		if (nodenum == dlm->node_num)
21828c2ecf20Sopenharmony_ci			continue;
21838c2ecf20Sopenharmony_ci		ret = dlm_do_master_requery(dlm, res, nodenum, &master);
21848c2ecf20Sopenharmony_ci		if (ret < 0) {
21858c2ecf20Sopenharmony_ci			mlog_errno(ret);
21868c2ecf20Sopenharmony_ci			if (!dlm_is_host_down(ret))
21878c2ecf20Sopenharmony_ci				BUG();
21888c2ecf20Sopenharmony_ci			/* host is down, so answer for that node would be
21898c2ecf20Sopenharmony_ci			 * DLM_LOCK_RES_OWNER_UNKNOWN.  continue. */
21908c2ecf20Sopenharmony_ci			ret = 0;
21918c2ecf20Sopenharmony_ci		}
21928c2ecf20Sopenharmony_ci
21938c2ecf20Sopenharmony_ci		if (master != DLM_LOCK_RES_OWNER_UNKNOWN) {
21948c2ecf20Sopenharmony_ci			/* check to see if this master is in the recovery map */
21958c2ecf20Sopenharmony_ci			spin_lock(&dlm->spinlock);
21968c2ecf20Sopenharmony_ci			if (test_bit(master, dlm->recovery_map)) {
21978c2ecf20Sopenharmony_ci				mlog(ML_NOTICE, "%s: node %u has not seen "
21988c2ecf20Sopenharmony_ci				     "node %u go down yet, and thinks the "
21998c2ecf20Sopenharmony_ci				     "dead node is mastering the recovery "
22008c2ecf20Sopenharmony_ci				     "lock.  must wait.\n", dlm->name,
22018c2ecf20Sopenharmony_ci				     nodenum, master);
22028c2ecf20Sopenharmony_ci				ret = -EAGAIN;
22038c2ecf20Sopenharmony_ci			}
22048c2ecf20Sopenharmony_ci			spin_unlock(&dlm->spinlock);
22058c2ecf20Sopenharmony_ci			mlog(0, "%s: reco lock master is %u\n", dlm->name,
22068c2ecf20Sopenharmony_ci			     master);
22078c2ecf20Sopenharmony_ci			break;
22088c2ecf20Sopenharmony_ci		}
22098c2ecf20Sopenharmony_ci	}
22108c2ecf20Sopenharmony_ci	return ret;
22118c2ecf20Sopenharmony_ci}
22128c2ecf20Sopenharmony_ci
22138c2ecf20Sopenharmony_ci/*
22148c2ecf20Sopenharmony_ci * DLM_DEREF_LOCKRES_MSG
22158c2ecf20Sopenharmony_ci */
22168c2ecf20Sopenharmony_ci
22178c2ecf20Sopenharmony_ciint dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
22188c2ecf20Sopenharmony_ci{
22198c2ecf20Sopenharmony_ci	struct dlm_deref_lockres deref;
22208c2ecf20Sopenharmony_ci	int ret = 0, r;
22218c2ecf20Sopenharmony_ci	const char *lockname;
22228c2ecf20Sopenharmony_ci	unsigned int namelen;
22238c2ecf20Sopenharmony_ci
22248c2ecf20Sopenharmony_ci	lockname = res->lockname.name;
22258c2ecf20Sopenharmony_ci	namelen = res->lockname.len;
22268c2ecf20Sopenharmony_ci	BUG_ON(namelen > O2NM_MAX_NAME_LEN);
22278c2ecf20Sopenharmony_ci
22288c2ecf20Sopenharmony_ci	memset(&deref, 0, sizeof(deref));
22298c2ecf20Sopenharmony_ci	deref.node_idx = dlm->node_num;
22308c2ecf20Sopenharmony_ci	deref.namelen = namelen;
22318c2ecf20Sopenharmony_ci	memcpy(deref.name, lockname, namelen);
22328c2ecf20Sopenharmony_ci
22338c2ecf20Sopenharmony_ci	ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
22348c2ecf20Sopenharmony_ci				 &deref, sizeof(deref), res->owner, &r);
22358c2ecf20Sopenharmony_ci	if (ret < 0)
22368c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF to node %u\n",
22378c2ecf20Sopenharmony_ci		     dlm->name, namelen, lockname, ret, res->owner);
22388c2ecf20Sopenharmony_ci	else if (r < 0) {
22398c2ecf20Sopenharmony_ci		/* BAD.  other node says I did not have a ref. */
22408c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n",
22418c2ecf20Sopenharmony_ci		     dlm->name, namelen, lockname, res->owner, r);
22428c2ecf20Sopenharmony_ci		dlm_print_one_lock_resource(res);
22438c2ecf20Sopenharmony_ci		if (r == -ENOMEM)
22448c2ecf20Sopenharmony_ci			BUG();
22458c2ecf20Sopenharmony_ci	} else
22468c2ecf20Sopenharmony_ci		ret = r;
22478c2ecf20Sopenharmony_ci
22488c2ecf20Sopenharmony_ci	return ret;
22498c2ecf20Sopenharmony_ci}
22508c2ecf20Sopenharmony_ci
22518c2ecf20Sopenharmony_ciint dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
22528c2ecf20Sopenharmony_ci			      void **ret_data)
22538c2ecf20Sopenharmony_ci{
22548c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm = data;
22558c2ecf20Sopenharmony_ci	struct dlm_deref_lockres *deref = (struct dlm_deref_lockres *)msg->buf;
22568c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res = NULL;
22578c2ecf20Sopenharmony_ci	char *name;
22588c2ecf20Sopenharmony_ci	unsigned int namelen;
22598c2ecf20Sopenharmony_ci	int ret = -EINVAL;
22608c2ecf20Sopenharmony_ci	u8 node;
22618c2ecf20Sopenharmony_ci	unsigned int hash;
22628c2ecf20Sopenharmony_ci	struct dlm_work_item *item;
22638c2ecf20Sopenharmony_ci	int cleared = 0;
22648c2ecf20Sopenharmony_ci	int dispatch = 0;
22658c2ecf20Sopenharmony_ci
22668c2ecf20Sopenharmony_ci	if (!dlm_grab(dlm))
22678c2ecf20Sopenharmony_ci		return 0;
22688c2ecf20Sopenharmony_ci
22698c2ecf20Sopenharmony_ci	name = deref->name;
22708c2ecf20Sopenharmony_ci	namelen = deref->namelen;
22718c2ecf20Sopenharmony_ci	node = deref->node_idx;
22728c2ecf20Sopenharmony_ci
22738c2ecf20Sopenharmony_ci	if (namelen > DLM_LOCKID_NAME_MAX) {
22748c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "Invalid name length!");
22758c2ecf20Sopenharmony_ci		goto done;
22768c2ecf20Sopenharmony_ci	}
22778c2ecf20Sopenharmony_ci	if (deref->node_idx >= O2NM_MAX_NODES) {
22788c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "Invalid node number: %u\n", node);
22798c2ecf20Sopenharmony_ci		goto done;
22808c2ecf20Sopenharmony_ci	}
22818c2ecf20Sopenharmony_ci
22828c2ecf20Sopenharmony_ci	hash = dlm_lockid_hash(name, namelen);
22838c2ecf20Sopenharmony_ci
22848c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
22858c2ecf20Sopenharmony_ci	res = __dlm_lookup_lockres_full(dlm, name, namelen, hash);
22868c2ecf20Sopenharmony_ci	if (!res) {
22878c2ecf20Sopenharmony_ci		spin_unlock(&dlm->spinlock);
22888c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "%s:%.*s: bad lockres name\n",
22898c2ecf20Sopenharmony_ci		     dlm->name, namelen, name);
22908c2ecf20Sopenharmony_ci		goto done;
22918c2ecf20Sopenharmony_ci	}
22928c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
22938c2ecf20Sopenharmony_ci
22948c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
22958c2ecf20Sopenharmony_ci	if (res->state & DLM_LOCK_RES_SETREF_INPROG)
22968c2ecf20Sopenharmony_ci		dispatch = 1;
22978c2ecf20Sopenharmony_ci	else {
22988c2ecf20Sopenharmony_ci		BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF);
22998c2ecf20Sopenharmony_ci		if (test_bit(node, res->refmap)) {
23008c2ecf20Sopenharmony_ci			dlm_lockres_clear_refmap_bit(dlm, res, node);
23018c2ecf20Sopenharmony_ci			cleared = 1;
23028c2ecf20Sopenharmony_ci		}
23038c2ecf20Sopenharmony_ci	}
23048c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
23058c2ecf20Sopenharmony_ci
23068c2ecf20Sopenharmony_ci	if (!dispatch) {
23078c2ecf20Sopenharmony_ci		if (cleared)
23088c2ecf20Sopenharmony_ci			dlm_lockres_calc_usage(dlm, res);
23098c2ecf20Sopenharmony_ci		else {
23108c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
23118c2ecf20Sopenharmony_ci		     	"but it is already dropped!\n", dlm->name,
23128c2ecf20Sopenharmony_ci		     	res->lockname.len, res->lockname.name, node);
23138c2ecf20Sopenharmony_ci			dlm_print_one_lock_resource(res);
23148c2ecf20Sopenharmony_ci		}
23158c2ecf20Sopenharmony_ci		ret = DLM_DEREF_RESPONSE_DONE;
23168c2ecf20Sopenharmony_ci		goto done;
23178c2ecf20Sopenharmony_ci	}
23188c2ecf20Sopenharmony_ci
23198c2ecf20Sopenharmony_ci	item = kzalloc(sizeof(*item), GFP_NOFS);
23208c2ecf20Sopenharmony_ci	if (!item) {
23218c2ecf20Sopenharmony_ci		ret = -ENOMEM;
23228c2ecf20Sopenharmony_ci		mlog_errno(ret);
23238c2ecf20Sopenharmony_ci		goto done;
23248c2ecf20Sopenharmony_ci	}
23258c2ecf20Sopenharmony_ci
23268c2ecf20Sopenharmony_ci	dlm_init_work_item(dlm, item, dlm_deref_lockres_worker, NULL);
23278c2ecf20Sopenharmony_ci	item->u.dl.deref_res = res;
23288c2ecf20Sopenharmony_ci	item->u.dl.deref_node = node;
23298c2ecf20Sopenharmony_ci
23308c2ecf20Sopenharmony_ci	spin_lock(&dlm->work_lock);
23318c2ecf20Sopenharmony_ci	list_add_tail(&item->list, &dlm->work_list);
23328c2ecf20Sopenharmony_ci	spin_unlock(&dlm->work_lock);
23338c2ecf20Sopenharmony_ci
23348c2ecf20Sopenharmony_ci	queue_work(dlm->dlm_worker, &dlm->dispatched_work);
23358c2ecf20Sopenharmony_ci	return DLM_DEREF_RESPONSE_INPROG;
23368c2ecf20Sopenharmony_ci
23378c2ecf20Sopenharmony_cidone:
23388c2ecf20Sopenharmony_ci	if (res)
23398c2ecf20Sopenharmony_ci		dlm_lockres_put(res);
23408c2ecf20Sopenharmony_ci	dlm_put(dlm);
23418c2ecf20Sopenharmony_ci
23428c2ecf20Sopenharmony_ci	return ret;
23438c2ecf20Sopenharmony_ci}
23448c2ecf20Sopenharmony_ci
23458c2ecf20Sopenharmony_ciint dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
23468c2ecf20Sopenharmony_ci			      void **ret_data)
23478c2ecf20Sopenharmony_ci{
23488c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm = data;
23498c2ecf20Sopenharmony_ci	struct dlm_deref_lockres_done *deref
23508c2ecf20Sopenharmony_ci			= (struct dlm_deref_lockres_done *)msg->buf;
23518c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res = NULL;
23528c2ecf20Sopenharmony_ci	char *name;
23538c2ecf20Sopenharmony_ci	unsigned int namelen;
23548c2ecf20Sopenharmony_ci	int ret = -EINVAL;
23558c2ecf20Sopenharmony_ci	u8 node;
23568c2ecf20Sopenharmony_ci	unsigned int hash;
23578c2ecf20Sopenharmony_ci
23588c2ecf20Sopenharmony_ci	if (!dlm_grab(dlm))
23598c2ecf20Sopenharmony_ci		return 0;
23608c2ecf20Sopenharmony_ci
23618c2ecf20Sopenharmony_ci	name = deref->name;
23628c2ecf20Sopenharmony_ci	namelen = deref->namelen;
23638c2ecf20Sopenharmony_ci	node = deref->node_idx;
23648c2ecf20Sopenharmony_ci
23658c2ecf20Sopenharmony_ci	if (namelen > DLM_LOCKID_NAME_MAX) {
23668c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "Invalid name length!");
23678c2ecf20Sopenharmony_ci		goto done;
23688c2ecf20Sopenharmony_ci	}
23698c2ecf20Sopenharmony_ci	if (deref->node_idx >= O2NM_MAX_NODES) {
23708c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "Invalid node number: %u\n", node);
23718c2ecf20Sopenharmony_ci		goto done;
23728c2ecf20Sopenharmony_ci	}
23738c2ecf20Sopenharmony_ci
23748c2ecf20Sopenharmony_ci	hash = dlm_lockid_hash(name, namelen);
23758c2ecf20Sopenharmony_ci
23768c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
23778c2ecf20Sopenharmony_ci	res = __dlm_lookup_lockres_full(dlm, name, namelen, hash);
23788c2ecf20Sopenharmony_ci	if (!res) {
23798c2ecf20Sopenharmony_ci		spin_unlock(&dlm->spinlock);
23808c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "%s:%.*s: bad lockres name\n",
23818c2ecf20Sopenharmony_ci		     dlm->name, namelen, name);
23828c2ecf20Sopenharmony_ci		goto done;
23838c2ecf20Sopenharmony_ci	}
23848c2ecf20Sopenharmony_ci
23858c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
23868c2ecf20Sopenharmony_ci	if (!(res->state & DLM_LOCK_RES_DROPPING_REF)) {
23878c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
23888c2ecf20Sopenharmony_ci		spin_unlock(&dlm->spinlock);
23898c2ecf20Sopenharmony_ci		mlog(ML_NOTICE, "%s:%.*s: node %u sends deref done "
23908c2ecf20Sopenharmony_ci			"but it is already derefed!\n", dlm->name,
23918c2ecf20Sopenharmony_ci			res->lockname.len, res->lockname.name, node);
23928c2ecf20Sopenharmony_ci		ret = 0;
23938c2ecf20Sopenharmony_ci		goto done;
23948c2ecf20Sopenharmony_ci	}
23958c2ecf20Sopenharmony_ci
23968c2ecf20Sopenharmony_ci	__dlm_do_purge_lockres(dlm, res);
23978c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
23988c2ecf20Sopenharmony_ci	wake_up(&res->wq);
23998c2ecf20Sopenharmony_ci
24008c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
24018c2ecf20Sopenharmony_ci
24028c2ecf20Sopenharmony_ci	ret = 0;
24038c2ecf20Sopenharmony_cidone:
24048c2ecf20Sopenharmony_ci	if (res)
24058c2ecf20Sopenharmony_ci		dlm_lockres_put(res);
24068c2ecf20Sopenharmony_ci	dlm_put(dlm);
24078c2ecf20Sopenharmony_ci	return ret;
24088c2ecf20Sopenharmony_ci}
24098c2ecf20Sopenharmony_ci
24108c2ecf20Sopenharmony_cistatic void dlm_drop_lockres_ref_done(struct dlm_ctxt *dlm,
24118c2ecf20Sopenharmony_ci		struct dlm_lock_resource *res, u8 node)
24128c2ecf20Sopenharmony_ci{
24138c2ecf20Sopenharmony_ci	struct dlm_deref_lockres_done deref;
24148c2ecf20Sopenharmony_ci	int ret = 0, r;
24158c2ecf20Sopenharmony_ci	const char *lockname;
24168c2ecf20Sopenharmony_ci	unsigned int namelen;
24178c2ecf20Sopenharmony_ci
24188c2ecf20Sopenharmony_ci	lockname = res->lockname.name;
24198c2ecf20Sopenharmony_ci	namelen = res->lockname.len;
24208c2ecf20Sopenharmony_ci	BUG_ON(namelen > O2NM_MAX_NAME_LEN);
24218c2ecf20Sopenharmony_ci
24228c2ecf20Sopenharmony_ci	memset(&deref, 0, sizeof(deref));
24238c2ecf20Sopenharmony_ci	deref.node_idx = dlm->node_num;
24248c2ecf20Sopenharmony_ci	deref.namelen = namelen;
24258c2ecf20Sopenharmony_ci	memcpy(deref.name, lockname, namelen);
24268c2ecf20Sopenharmony_ci
24278c2ecf20Sopenharmony_ci	ret = o2net_send_message(DLM_DEREF_LOCKRES_DONE, dlm->key,
24288c2ecf20Sopenharmony_ci				 &deref, sizeof(deref), node, &r);
24298c2ecf20Sopenharmony_ci	if (ret < 0) {
24308c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF DONE "
24318c2ecf20Sopenharmony_ci				" to node %u\n", dlm->name, namelen,
24328c2ecf20Sopenharmony_ci				lockname, ret, node);
24338c2ecf20Sopenharmony_ci	} else if (r < 0) {
24348c2ecf20Sopenharmony_ci		/* ignore the error */
24358c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n",
24368c2ecf20Sopenharmony_ci		     dlm->name, namelen, lockname, node, r);
24378c2ecf20Sopenharmony_ci		dlm_print_one_lock_resource(res);
24388c2ecf20Sopenharmony_ci	}
24398c2ecf20Sopenharmony_ci}
24408c2ecf20Sopenharmony_ci
24418c2ecf20Sopenharmony_cistatic void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
24428c2ecf20Sopenharmony_ci{
24438c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm;
24448c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res;
24458c2ecf20Sopenharmony_ci	u8 node;
24468c2ecf20Sopenharmony_ci	u8 cleared = 0;
24478c2ecf20Sopenharmony_ci
24488c2ecf20Sopenharmony_ci	dlm = item->dlm;
24498c2ecf20Sopenharmony_ci	res = item->u.dl.deref_res;
24508c2ecf20Sopenharmony_ci	node = item->u.dl.deref_node;
24518c2ecf20Sopenharmony_ci
24528c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
24538c2ecf20Sopenharmony_ci	BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF);
24548c2ecf20Sopenharmony_ci	__dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
24558c2ecf20Sopenharmony_ci	if (test_bit(node, res->refmap)) {
24568c2ecf20Sopenharmony_ci		dlm_lockres_clear_refmap_bit(dlm, res, node);
24578c2ecf20Sopenharmony_ci		cleared = 1;
24588c2ecf20Sopenharmony_ci	}
24598c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
24608c2ecf20Sopenharmony_ci
24618c2ecf20Sopenharmony_ci	dlm_drop_lockres_ref_done(dlm, res, node);
24628c2ecf20Sopenharmony_ci
24638c2ecf20Sopenharmony_ci	if (cleared) {
24648c2ecf20Sopenharmony_ci		mlog(0, "%s:%.*s node %u ref dropped in dispatch\n",
24658c2ecf20Sopenharmony_ci		     dlm->name, res->lockname.len, res->lockname.name, node);
24668c2ecf20Sopenharmony_ci		dlm_lockres_calc_usage(dlm, res);
24678c2ecf20Sopenharmony_ci	} else {
24688c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
24698c2ecf20Sopenharmony_ci		     "but it is already dropped!\n", dlm->name,
24708c2ecf20Sopenharmony_ci		     res->lockname.len, res->lockname.name, node);
24718c2ecf20Sopenharmony_ci		dlm_print_one_lock_resource(res);
24728c2ecf20Sopenharmony_ci	}
24738c2ecf20Sopenharmony_ci
24748c2ecf20Sopenharmony_ci	dlm_lockres_put(res);
24758c2ecf20Sopenharmony_ci}
24768c2ecf20Sopenharmony_ci
24778c2ecf20Sopenharmony_ci/*
24788c2ecf20Sopenharmony_ci * A migratable resource is one that is :
24798c2ecf20Sopenharmony_ci * 1. locally mastered, and,
24808c2ecf20Sopenharmony_ci * 2. zero local locks, and,
24818c2ecf20Sopenharmony_ci * 3. one or more non-local locks, or, one or more references
24828c2ecf20Sopenharmony_ci * Returns 1 if yes, 0 if not.
24838c2ecf20Sopenharmony_ci */
24848c2ecf20Sopenharmony_cistatic int dlm_is_lockres_migratable(struct dlm_ctxt *dlm,
24858c2ecf20Sopenharmony_ci				      struct dlm_lock_resource *res)
24868c2ecf20Sopenharmony_ci{
24878c2ecf20Sopenharmony_ci	enum dlm_lockres_list idx;
24888c2ecf20Sopenharmony_ci	int nonlocal = 0, node_ref;
24898c2ecf20Sopenharmony_ci	struct list_head *queue;
24908c2ecf20Sopenharmony_ci	struct dlm_lock *lock;
24918c2ecf20Sopenharmony_ci	u64 cookie;
24928c2ecf20Sopenharmony_ci
24938c2ecf20Sopenharmony_ci	assert_spin_locked(&res->spinlock);
24948c2ecf20Sopenharmony_ci
24958c2ecf20Sopenharmony_ci	/* delay migration when the lockres is in MIGRATING state */
24968c2ecf20Sopenharmony_ci	if (res->state & DLM_LOCK_RES_MIGRATING)
24978c2ecf20Sopenharmony_ci		return 0;
24988c2ecf20Sopenharmony_ci
24998c2ecf20Sopenharmony_ci	/* delay migration when the lockres is in RECOCERING state */
25008c2ecf20Sopenharmony_ci	if (res->state & (DLM_LOCK_RES_RECOVERING|
25018c2ecf20Sopenharmony_ci			DLM_LOCK_RES_RECOVERY_WAITING))
25028c2ecf20Sopenharmony_ci		return 0;
25038c2ecf20Sopenharmony_ci
25048c2ecf20Sopenharmony_ci	if (res->owner != dlm->node_num)
25058c2ecf20Sopenharmony_ci		return 0;
25068c2ecf20Sopenharmony_ci
25078c2ecf20Sopenharmony_ci        for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) {
25088c2ecf20Sopenharmony_ci		queue = dlm_list_idx_to_ptr(res, idx);
25098c2ecf20Sopenharmony_ci		list_for_each_entry(lock, queue, list) {
25108c2ecf20Sopenharmony_ci			if (lock->ml.node != dlm->node_num) {
25118c2ecf20Sopenharmony_ci				nonlocal++;
25128c2ecf20Sopenharmony_ci				continue;
25138c2ecf20Sopenharmony_ci			}
25148c2ecf20Sopenharmony_ci			cookie = be64_to_cpu(lock->ml.cookie);
25158c2ecf20Sopenharmony_ci			mlog(0, "%s: Not migratable res %.*s, lock %u:%llu on "
25168c2ecf20Sopenharmony_ci			     "%s list\n", dlm->name, res->lockname.len,
25178c2ecf20Sopenharmony_ci			     res->lockname.name,
25188c2ecf20Sopenharmony_ci			     dlm_get_lock_cookie_node(cookie),
25198c2ecf20Sopenharmony_ci			     dlm_get_lock_cookie_seq(cookie),
25208c2ecf20Sopenharmony_ci			     dlm_list_in_text(idx));
25218c2ecf20Sopenharmony_ci			return 0;
25228c2ecf20Sopenharmony_ci		}
25238c2ecf20Sopenharmony_ci	}
25248c2ecf20Sopenharmony_ci
25258c2ecf20Sopenharmony_ci	if (!nonlocal) {
25268c2ecf20Sopenharmony_ci		node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
25278c2ecf20Sopenharmony_ci		if (node_ref >= O2NM_MAX_NODES)
25288c2ecf20Sopenharmony_ci			return 0;
25298c2ecf20Sopenharmony_ci	}
25308c2ecf20Sopenharmony_ci
25318c2ecf20Sopenharmony_ci	mlog(0, "%s: res %.*s, Migratable\n", dlm->name, res->lockname.len,
25328c2ecf20Sopenharmony_ci	     res->lockname.name);
25338c2ecf20Sopenharmony_ci
25348c2ecf20Sopenharmony_ci	return 1;
25358c2ecf20Sopenharmony_ci}
25368c2ecf20Sopenharmony_ci
25378c2ecf20Sopenharmony_ci/*
25388c2ecf20Sopenharmony_ci * DLM_MIGRATE_LOCKRES
25398c2ecf20Sopenharmony_ci */
25408c2ecf20Sopenharmony_ci
25418c2ecf20Sopenharmony_ci
25428c2ecf20Sopenharmony_cistatic int dlm_migrate_lockres(struct dlm_ctxt *dlm,
25438c2ecf20Sopenharmony_ci			       struct dlm_lock_resource *res, u8 target)
25448c2ecf20Sopenharmony_ci{
25458c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *mle = NULL;
25468c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *oldmle = NULL;
25478c2ecf20Sopenharmony_ci 	struct dlm_migratable_lockres *mres = NULL;
25488c2ecf20Sopenharmony_ci	int ret = 0;
25498c2ecf20Sopenharmony_ci	const char *name;
25508c2ecf20Sopenharmony_ci	unsigned int namelen;
25518c2ecf20Sopenharmony_ci	int mle_added = 0;
25528c2ecf20Sopenharmony_ci	int wake = 0;
25538c2ecf20Sopenharmony_ci
25548c2ecf20Sopenharmony_ci	if (!dlm_grab(dlm))
25558c2ecf20Sopenharmony_ci		return -EINVAL;
25568c2ecf20Sopenharmony_ci
25578c2ecf20Sopenharmony_ci	name = res->lockname.name;
25588c2ecf20Sopenharmony_ci	namelen = res->lockname.len;
25598c2ecf20Sopenharmony_ci
25608c2ecf20Sopenharmony_ci	mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name,
25618c2ecf20Sopenharmony_ci	     target);
25628c2ecf20Sopenharmony_ci
25638c2ecf20Sopenharmony_ci	/* preallocate up front. if this fails, abort */
25648c2ecf20Sopenharmony_ci	ret = -ENOMEM;
25658c2ecf20Sopenharmony_ci	mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS);
25668c2ecf20Sopenharmony_ci	if (!mres) {
25678c2ecf20Sopenharmony_ci		mlog_errno(ret);
25688c2ecf20Sopenharmony_ci		goto leave;
25698c2ecf20Sopenharmony_ci	}
25708c2ecf20Sopenharmony_ci
25718c2ecf20Sopenharmony_ci	mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
25728c2ecf20Sopenharmony_ci	if (!mle) {
25738c2ecf20Sopenharmony_ci		mlog_errno(ret);
25748c2ecf20Sopenharmony_ci		goto leave;
25758c2ecf20Sopenharmony_ci	}
25768c2ecf20Sopenharmony_ci	ret = 0;
25778c2ecf20Sopenharmony_ci
25788c2ecf20Sopenharmony_ci	/*
25798c2ecf20Sopenharmony_ci	 * clear any existing master requests and
25808c2ecf20Sopenharmony_ci	 * add the migration mle to the list
25818c2ecf20Sopenharmony_ci	 */
25828c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
25838c2ecf20Sopenharmony_ci	spin_lock(&dlm->master_lock);
25848c2ecf20Sopenharmony_ci	ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
25858c2ecf20Sopenharmony_ci				    namelen, target, dlm->node_num);
25868c2ecf20Sopenharmony_ci	/* get an extra reference on the mle.
25878c2ecf20Sopenharmony_ci	 * otherwise the assert_master from the new
25888c2ecf20Sopenharmony_ci	 * master will destroy this.
25898c2ecf20Sopenharmony_ci	 */
25908c2ecf20Sopenharmony_ci	if (ret != -EEXIST)
25918c2ecf20Sopenharmony_ci		dlm_get_mle_inuse(mle);
25928c2ecf20Sopenharmony_ci
25938c2ecf20Sopenharmony_ci	spin_unlock(&dlm->master_lock);
25948c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
25958c2ecf20Sopenharmony_ci
25968c2ecf20Sopenharmony_ci	if (ret == -EEXIST) {
25978c2ecf20Sopenharmony_ci		mlog(0, "another process is already migrating it\n");
25988c2ecf20Sopenharmony_ci		goto fail;
25998c2ecf20Sopenharmony_ci	}
26008c2ecf20Sopenharmony_ci	mle_added = 1;
26018c2ecf20Sopenharmony_ci
26028c2ecf20Sopenharmony_ci	/*
26038c2ecf20Sopenharmony_ci	 * set the MIGRATING flag and flush asts
26048c2ecf20Sopenharmony_ci	 * if we fail after this we need to re-dirty the lockres
26058c2ecf20Sopenharmony_ci	 */
26068c2ecf20Sopenharmony_ci	if (dlm_mark_lockres_migrating(dlm, res, target) < 0) {
26078c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "tried to migrate %.*s to %u, but "
26088c2ecf20Sopenharmony_ci		     "the target went down.\n", res->lockname.len,
26098c2ecf20Sopenharmony_ci		     res->lockname.name, target);
26108c2ecf20Sopenharmony_ci		spin_lock(&res->spinlock);
26118c2ecf20Sopenharmony_ci		res->state &= ~DLM_LOCK_RES_MIGRATING;
26128c2ecf20Sopenharmony_ci		wake = 1;
26138c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
26148c2ecf20Sopenharmony_ci		ret = -EINVAL;
26158c2ecf20Sopenharmony_ci	}
26168c2ecf20Sopenharmony_ci
26178c2ecf20Sopenharmony_cifail:
26188c2ecf20Sopenharmony_ci	if (ret != -EEXIST && oldmle) {
26198c2ecf20Sopenharmony_ci		/* master is known, detach if not already detached */
26208c2ecf20Sopenharmony_ci		dlm_mle_detach_hb_events(dlm, oldmle);
26218c2ecf20Sopenharmony_ci		dlm_put_mle(oldmle);
26228c2ecf20Sopenharmony_ci	}
26238c2ecf20Sopenharmony_ci
26248c2ecf20Sopenharmony_ci	if (ret < 0) {
26258c2ecf20Sopenharmony_ci		if (mle_added) {
26268c2ecf20Sopenharmony_ci			dlm_mle_detach_hb_events(dlm, mle);
26278c2ecf20Sopenharmony_ci			dlm_put_mle(mle);
26288c2ecf20Sopenharmony_ci			dlm_put_mle_inuse(mle);
26298c2ecf20Sopenharmony_ci		} else if (mle) {
26308c2ecf20Sopenharmony_ci			kmem_cache_free(dlm_mle_cache, mle);
26318c2ecf20Sopenharmony_ci			mle = NULL;
26328c2ecf20Sopenharmony_ci		}
26338c2ecf20Sopenharmony_ci		goto leave;
26348c2ecf20Sopenharmony_ci	}
26358c2ecf20Sopenharmony_ci
26368c2ecf20Sopenharmony_ci	/*
26378c2ecf20Sopenharmony_ci	 * at this point, we have a migration target, an mle
26388c2ecf20Sopenharmony_ci	 * in the master list, and the MIGRATING flag set on
26398c2ecf20Sopenharmony_ci	 * the lockres
26408c2ecf20Sopenharmony_ci	 */
26418c2ecf20Sopenharmony_ci
26428c2ecf20Sopenharmony_ci	/* now that remote nodes are spinning on the MIGRATING flag,
26438c2ecf20Sopenharmony_ci	 * ensure that all assert_master work is flushed. */
26448c2ecf20Sopenharmony_ci	flush_workqueue(dlm->dlm_worker);
26458c2ecf20Sopenharmony_ci
26468c2ecf20Sopenharmony_ci	/* notify new node and send all lock state */
26478c2ecf20Sopenharmony_ci	/* call send_one_lockres with migration flag.
26488c2ecf20Sopenharmony_ci	 * this serves as notice to the target node that a
26498c2ecf20Sopenharmony_ci	 * migration is starting. */
26508c2ecf20Sopenharmony_ci	ret = dlm_send_one_lockres(dlm, res, mres, target,
26518c2ecf20Sopenharmony_ci				   DLM_MRES_MIGRATION);
26528c2ecf20Sopenharmony_ci
26538c2ecf20Sopenharmony_ci	if (ret < 0) {
26548c2ecf20Sopenharmony_ci		mlog(0, "migration to node %u failed with %d\n",
26558c2ecf20Sopenharmony_ci		     target, ret);
26568c2ecf20Sopenharmony_ci		/* migration failed, detach and clean up mle */
26578c2ecf20Sopenharmony_ci		dlm_mle_detach_hb_events(dlm, mle);
26588c2ecf20Sopenharmony_ci		dlm_put_mle(mle);
26598c2ecf20Sopenharmony_ci		dlm_put_mle_inuse(mle);
26608c2ecf20Sopenharmony_ci		spin_lock(&res->spinlock);
26618c2ecf20Sopenharmony_ci		res->state &= ~DLM_LOCK_RES_MIGRATING;
26628c2ecf20Sopenharmony_ci		wake = 1;
26638c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
26648c2ecf20Sopenharmony_ci		if (dlm_is_host_down(ret))
26658c2ecf20Sopenharmony_ci			dlm_wait_for_node_death(dlm, target,
26668c2ecf20Sopenharmony_ci						DLM_NODE_DEATH_WAIT_MAX);
26678c2ecf20Sopenharmony_ci		goto leave;
26688c2ecf20Sopenharmony_ci	}
26698c2ecf20Sopenharmony_ci
26708c2ecf20Sopenharmony_ci	/* at this point, the target sends a message to all nodes,
26718c2ecf20Sopenharmony_ci	 * (using dlm_do_migrate_request).  this node is skipped since
26728c2ecf20Sopenharmony_ci	 * we had to put an mle in the list to begin the process.  this
26738c2ecf20Sopenharmony_ci	 * node now waits for target to do an assert master.  this node
26748c2ecf20Sopenharmony_ci	 * will be the last one notified, ensuring that the migration
26758c2ecf20Sopenharmony_ci	 * is complete everywhere.  if the target dies while this is
26768c2ecf20Sopenharmony_ci	 * going on, some nodes could potentially see the target as the
26778c2ecf20Sopenharmony_ci	 * master, so it is important that my recovery finds the migration
26788c2ecf20Sopenharmony_ci	 * mle and sets the master to UNKNOWN. */
26798c2ecf20Sopenharmony_ci
26808c2ecf20Sopenharmony_ci
26818c2ecf20Sopenharmony_ci	/* wait for new node to assert master */
26828c2ecf20Sopenharmony_ci	while (1) {
26838c2ecf20Sopenharmony_ci		ret = wait_event_interruptible_timeout(mle->wq,
26848c2ecf20Sopenharmony_ci					(atomic_read(&mle->woken) == 1),
26858c2ecf20Sopenharmony_ci					msecs_to_jiffies(5000));
26868c2ecf20Sopenharmony_ci
26878c2ecf20Sopenharmony_ci		if (ret >= 0) {
26888c2ecf20Sopenharmony_ci		       	if (atomic_read(&mle->woken) == 1 ||
26898c2ecf20Sopenharmony_ci			    res->owner == target)
26908c2ecf20Sopenharmony_ci				break;
26918c2ecf20Sopenharmony_ci
26928c2ecf20Sopenharmony_ci			mlog(0, "%s:%.*s: timed out during migration\n",
26938c2ecf20Sopenharmony_ci			     dlm->name, res->lockname.len, res->lockname.name);
26948c2ecf20Sopenharmony_ci			/* avoid hang during shutdown when migrating lockres
26958c2ecf20Sopenharmony_ci			 * to a node which also goes down */
26968c2ecf20Sopenharmony_ci			if (dlm_is_node_dead(dlm, target)) {
26978c2ecf20Sopenharmony_ci				mlog(0, "%s:%.*s: expected migration "
26988c2ecf20Sopenharmony_ci				     "target %u is no longer up, restarting\n",
26998c2ecf20Sopenharmony_ci				     dlm->name, res->lockname.len,
27008c2ecf20Sopenharmony_ci				     res->lockname.name, target);
27018c2ecf20Sopenharmony_ci				ret = -EINVAL;
27028c2ecf20Sopenharmony_ci				/* migration failed, detach and clean up mle */
27038c2ecf20Sopenharmony_ci				dlm_mle_detach_hb_events(dlm, mle);
27048c2ecf20Sopenharmony_ci				dlm_put_mle(mle);
27058c2ecf20Sopenharmony_ci				dlm_put_mle_inuse(mle);
27068c2ecf20Sopenharmony_ci				spin_lock(&res->spinlock);
27078c2ecf20Sopenharmony_ci				res->state &= ~DLM_LOCK_RES_MIGRATING;
27088c2ecf20Sopenharmony_ci				wake = 1;
27098c2ecf20Sopenharmony_ci				spin_unlock(&res->spinlock);
27108c2ecf20Sopenharmony_ci				goto leave;
27118c2ecf20Sopenharmony_ci			}
27128c2ecf20Sopenharmony_ci		} else
27138c2ecf20Sopenharmony_ci			mlog(0, "%s:%.*s: caught signal during migration\n",
27148c2ecf20Sopenharmony_ci			     dlm->name, res->lockname.len, res->lockname.name);
27158c2ecf20Sopenharmony_ci	}
27168c2ecf20Sopenharmony_ci
27178c2ecf20Sopenharmony_ci	/* all done, set the owner, clear the flag */
27188c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
27198c2ecf20Sopenharmony_ci	dlm_set_lockres_owner(dlm, res, target);
27208c2ecf20Sopenharmony_ci	res->state &= ~DLM_LOCK_RES_MIGRATING;
27218c2ecf20Sopenharmony_ci	dlm_remove_nonlocal_locks(dlm, res);
27228c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
27238c2ecf20Sopenharmony_ci	wake_up(&res->wq);
27248c2ecf20Sopenharmony_ci
27258c2ecf20Sopenharmony_ci	/* master is known, detach if not already detached */
27268c2ecf20Sopenharmony_ci	dlm_mle_detach_hb_events(dlm, mle);
27278c2ecf20Sopenharmony_ci	dlm_put_mle_inuse(mle);
27288c2ecf20Sopenharmony_ci	ret = 0;
27298c2ecf20Sopenharmony_ci
27308c2ecf20Sopenharmony_ci	dlm_lockres_calc_usage(dlm, res);
27318c2ecf20Sopenharmony_ci
27328c2ecf20Sopenharmony_cileave:
27338c2ecf20Sopenharmony_ci	/* re-dirty the lockres if we failed */
27348c2ecf20Sopenharmony_ci	if (ret < 0)
27358c2ecf20Sopenharmony_ci		dlm_kick_thread(dlm, res);
27368c2ecf20Sopenharmony_ci
27378c2ecf20Sopenharmony_ci	/* wake up waiters if the MIGRATING flag got set
27388c2ecf20Sopenharmony_ci	 * but migration failed */
27398c2ecf20Sopenharmony_ci	if (wake)
27408c2ecf20Sopenharmony_ci		wake_up(&res->wq);
27418c2ecf20Sopenharmony_ci
27428c2ecf20Sopenharmony_ci	if (mres)
27438c2ecf20Sopenharmony_ci		free_page((unsigned long)mres);
27448c2ecf20Sopenharmony_ci
27458c2ecf20Sopenharmony_ci	dlm_put(dlm);
27468c2ecf20Sopenharmony_ci
27478c2ecf20Sopenharmony_ci	mlog(0, "%s: Migrating %.*s to %u, returns %d\n", dlm->name, namelen,
27488c2ecf20Sopenharmony_ci	     name, target, ret);
27498c2ecf20Sopenharmony_ci	return ret;
27508c2ecf20Sopenharmony_ci}
27518c2ecf20Sopenharmony_ci
27528c2ecf20Sopenharmony_ci/*
27538c2ecf20Sopenharmony_ci * Should be called only after beginning the domain leave process.
27548c2ecf20Sopenharmony_ci * There should not be any remaining locks on nonlocal lock resources,
27558c2ecf20Sopenharmony_ci * and there should be no local locks left on locally mastered resources.
27568c2ecf20Sopenharmony_ci *
27578c2ecf20Sopenharmony_ci * Called with the dlm spinlock held, may drop it to do migration, but
27588c2ecf20Sopenharmony_ci * will re-acquire before exit.
27598c2ecf20Sopenharmony_ci *
27608c2ecf20Sopenharmony_ci * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped
27618c2ecf20Sopenharmony_ci */
27628c2ecf20Sopenharmony_ciint dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
27638c2ecf20Sopenharmony_ci	__must_hold(&dlm->spinlock)
27648c2ecf20Sopenharmony_ci{
27658c2ecf20Sopenharmony_ci	int ret;
27668c2ecf20Sopenharmony_ci	int lock_dropped = 0;
27678c2ecf20Sopenharmony_ci	u8 target = O2NM_MAX_NODES;
27688c2ecf20Sopenharmony_ci
27698c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
27708c2ecf20Sopenharmony_ci
27718c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
27728c2ecf20Sopenharmony_ci	if (dlm_is_lockres_migratable(dlm, res))
27738c2ecf20Sopenharmony_ci		target = dlm_pick_migration_target(dlm, res);
27748c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
27758c2ecf20Sopenharmony_ci
27768c2ecf20Sopenharmony_ci	if (target == O2NM_MAX_NODES)
27778c2ecf20Sopenharmony_ci		goto leave;
27788c2ecf20Sopenharmony_ci
27798c2ecf20Sopenharmony_ci	/* Wheee! Migrate lockres here! Will sleep so drop spinlock. */
27808c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
27818c2ecf20Sopenharmony_ci	lock_dropped = 1;
27828c2ecf20Sopenharmony_ci	ret = dlm_migrate_lockres(dlm, res, target);
27838c2ecf20Sopenharmony_ci	if (ret)
27848c2ecf20Sopenharmony_ci		mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n",
27858c2ecf20Sopenharmony_ci		     dlm->name, res->lockname.len, res->lockname.name,
27868c2ecf20Sopenharmony_ci		     target, ret);
27878c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
27888c2ecf20Sopenharmony_cileave:
27898c2ecf20Sopenharmony_ci	return lock_dropped;
27908c2ecf20Sopenharmony_ci}
27918c2ecf20Sopenharmony_ci
27928c2ecf20Sopenharmony_ciint dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock)
27938c2ecf20Sopenharmony_ci{
27948c2ecf20Sopenharmony_ci	int ret;
27958c2ecf20Sopenharmony_ci	spin_lock(&dlm->ast_lock);
27968c2ecf20Sopenharmony_ci	spin_lock(&lock->spinlock);
27978c2ecf20Sopenharmony_ci	ret = (list_empty(&lock->bast_list) && !lock->bast_pending);
27988c2ecf20Sopenharmony_ci	spin_unlock(&lock->spinlock);
27998c2ecf20Sopenharmony_ci	spin_unlock(&dlm->ast_lock);
28008c2ecf20Sopenharmony_ci	return ret;
28018c2ecf20Sopenharmony_ci}
28028c2ecf20Sopenharmony_ci
28038c2ecf20Sopenharmony_cistatic int dlm_migration_can_proceed(struct dlm_ctxt *dlm,
28048c2ecf20Sopenharmony_ci				     struct dlm_lock_resource *res,
28058c2ecf20Sopenharmony_ci				     u8 mig_target)
28068c2ecf20Sopenharmony_ci{
28078c2ecf20Sopenharmony_ci	int can_proceed;
28088c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
28098c2ecf20Sopenharmony_ci	can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING);
28108c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
28118c2ecf20Sopenharmony_ci
28128c2ecf20Sopenharmony_ci	/* target has died, so make the caller break out of the
28138c2ecf20Sopenharmony_ci	 * wait_event, but caller must recheck the domain_map */
28148c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
28158c2ecf20Sopenharmony_ci	if (!test_bit(mig_target, dlm->domain_map))
28168c2ecf20Sopenharmony_ci		can_proceed = 1;
28178c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
28188c2ecf20Sopenharmony_ci	return can_proceed;
28198c2ecf20Sopenharmony_ci}
28208c2ecf20Sopenharmony_ci
28218c2ecf20Sopenharmony_cistatic int dlm_lockres_is_dirty(struct dlm_ctxt *dlm,
28228c2ecf20Sopenharmony_ci				struct dlm_lock_resource *res)
28238c2ecf20Sopenharmony_ci{
28248c2ecf20Sopenharmony_ci	int ret;
28258c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
28268c2ecf20Sopenharmony_ci	ret = !!(res->state & DLM_LOCK_RES_DIRTY);
28278c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
28288c2ecf20Sopenharmony_ci	return ret;
28298c2ecf20Sopenharmony_ci}
28308c2ecf20Sopenharmony_ci
28318c2ecf20Sopenharmony_ci
28328c2ecf20Sopenharmony_cistatic int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
28338c2ecf20Sopenharmony_ci				       struct dlm_lock_resource *res,
28348c2ecf20Sopenharmony_ci				       u8 target)
28358c2ecf20Sopenharmony_ci{
28368c2ecf20Sopenharmony_ci	int ret = 0;
28378c2ecf20Sopenharmony_ci
28388c2ecf20Sopenharmony_ci	mlog(0, "dlm_mark_lockres_migrating: %.*s, from %u to %u\n",
28398c2ecf20Sopenharmony_ci	       res->lockname.len, res->lockname.name, dlm->node_num,
28408c2ecf20Sopenharmony_ci	       target);
28418c2ecf20Sopenharmony_ci	/* need to set MIGRATING flag on lockres.  this is done by
28428c2ecf20Sopenharmony_ci	 * ensuring that all asts have been flushed for this lockres. */
28438c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
28448c2ecf20Sopenharmony_ci	BUG_ON(res->migration_pending);
28458c2ecf20Sopenharmony_ci	res->migration_pending = 1;
28468c2ecf20Sopenharmony_ci	/* strategy is to reserve an extra ast then release
28478c2ecf20Sopenharmony_ci	 * it below, letting the release do all of the work */
28488c2ecf20Sopenharmony_ci	__dlm_lockres_reserve_ast(res);
28498c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
28508c2ecf20Sopenharmony_ci
28518c2ecf20Sopenharmony_ci	/* now flush all the pending asts */
28528c2ecf20Sopenharmony_ci	dlm_kick_thread(dlm, res);
28538c2ecf20Sopenharmony_ci	/* before waiting on DIRTY, block processes which may
28548c2ecf20Sopenharmony_ci	 * try to dirty the lockres before MIGRATING is set */
28558c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
28568c2ecf20Sopenharmony_ci	BUG_ON(res->state & DLM_LOCK_RES_BLOCK_DIRTY);
28578c2ecf20Sopenharmony_ci	res->state |= DLM_LOCK_RES_BLOCK_DIRTY;
28588c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
28598c2ecf20Sopenharmony_ci	/* now wait on any pending asts and the DIRTY state */
28608c2ecf20Sopenharmony_ci	wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
28618c2ecf20Sopenharmony_ci	dlm_lockres_release_ast(dlm, res);
28628c2ecf20Sopenharmony_ci
28638c2ecf20Sopenharmony_ci	mlog(0, "about to wait on migration_wq, dirty=%s\n",
28648c2ecf20Sopenharmony_ci	       res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no");
28658c2ecf20Sopenharmony_ci	/* if the extra ref we just put was the final one, this
28668c2ecf20Sopenharmony_ci	 * will pass thru immediately.  otherwise, we need to wait
28678c2ecf20Sopenharmony_ci	 * for the last ast to finish. */
28688c2ecf20Sopenharmony_ciagain:
28698c2ecf20Sopenharmony_ci	ret = wait_event_interruptible_timeout(dlm->migration_wq,
28708c2ecf20Sopenharmony_ci		   dlm_migration_can_proceed(dlm, res, target),
28718c2ecf20Sopenharmony_ci		   msecs_to_jiffies(1000));
28728c2ecf20Sopenharmony_ci	if (ret < 0) {
28738c2ecf20Sopenharmony_ci		mlog(0, "woken again: migrating? %s, dead? %s\n",
28748c2ecf20Sopenharmony_ci		       res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no",
28758c2ecf20Sopenharmony_ci		       test_bit(target, dlm->domain_map) ? "no":"yes");
28768c2ecf20Sopenharmony_ci	} else {
28778c2ecf20Sopenharmony_ci		mlog(0, "all is well: migrating? %s, dead? %s\n",
28788c2ecf20Sopenharmony_ci		       res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no",
28798c2ecf20Sopenharmony_ci		       test_bit(target, dlm->domain_map) ? "no":"yes");
28808c2ecf20Sopenharmony_ci	}
28818c2ecf20Sopenharmony_ci	if (!dlm_migration_can_proceed(dlm, res, target)) {
28828c2ecf20Sopenharmony_ci		mlog(0, "trying again...\n");
28838c2ecf20Sopenharmony_ci		goto again;
28848c2ecf20Sopenharmony_ci	}
28858c2ecf20Sopenharmony_ci
28868c2ecf20Sopenharmony_ci	ret = 0;
28878c2ecf20Sopenharmony_ci	/* did the target go down or die? */
28888c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
28898c2ecf20Sopenharmony_ci	if (!test_bit(target, dlm->domain_map)) {
28908c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "aha. migration target %u just went down\n",
28918c2ecf20Sopenharmony_ci		     target);
28928c2ecf20Sopenharmony_ci		ret = -EHOSTDOWN;
28938c2ecf20Sopenharmony_ci	}
28948c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
28958c2ecf20Sopenharmony_ci
28968c2ecf20Sopenharmony_ci	/*
28978c2ecf20Sopenharmony_ci	 * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
28988c2ecf20Sopenharmony_ci	 * another try; otherwise, we are sure the MIGRATING state is there,
28998c2ecf20Sopenharmony_ci	 * drop the unneeded state which blocked threads trying to DIRTY
29008c2ecf20Sopenharmony_ci	 */
29018c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
29028c2ecf20Sopenharmony_ci	BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
29038c2ecf20Sopenharmony_ci	res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
29048c2ecf20Sopenharmony_ci	if (!ret)
29058c2ecf20Sopenharmony_ci		BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
29068c2ecf20Sopenharmony_ci	else
29078c2ecf20Sopenharmony_ci		res->migration_pending = 0;
29088c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
29098c2ecf20Sopenharmony_ci
29108c2ecf20Sopenharmony_ci	/*
29118c2ecf20Sopenharmony_ci	 * at this point:
29128c2ecf20Sopenharmony_ci	 *
29138c2ecf20Sopenharmony_ci	 *   o the DLM_LOCK_RES_MIGRATING flag is set if target not down
29148c2ecf20Sopenharmony_ci	 *   o there are no pending asts on this lockres
29158c2ecf20Sopenharmony_ci	 *   o all processes trying to reserve an ast on this
29168c2ecf20Sopenharmony_ci	 *     lockres must wait for the MIGRATING flag to clear
29178c2ecf20Sopenharmony_ci	 */
29188c2ecf20Sopenharmony_ci	return ret;
29198c2ecf20Sopenharmony_ci}
29208c2ecf20Sopenharmony_ci
29218c2ecf20Sopenharmony_ci/* last step in the migration process.
29228c2ecf20Sopenharmony_ci * original master calls this to free all of the dlm_lock
29238c2ecf20Sopenharmony_ci * structures that used to be for other nodes. */
29248c2ecf20Sopenharmony_cistatic void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
29258c2ecf20Sopenharmony_ci				      struct dlm_lock_resource *res)
29268c2ecf20Sopenharmony_ci{
29278c2ecf20Sopenharmony_ci	struct list_head *queue = &res->granted;
29288c2ecf20Sopenharmony_ci	int i, bit;
29298c2ecf20Sopenharmony_ci	struct dlm_lock *lock, *next;
29308c2ecf20Sopenharmony_ci
29318c2ecf20Sopenharmony_ci	assert_spin_locked(&res->spinlock);
29328c2ecf20Sopenharmony_ci
29338c2ecf20Sopenharmony_ci	BUG_ON(res->owner == dlm->node_num);
29348c2ecf20Sopenharmony_ci
29358c2ecf20Sopenharmony_ci	for (i=0; i<3; i++) {
29368c2ecf20Sopenharmony_ci		list_for_each_entry_safe(lock, next, queue, list) {
29378c2ecf20Sopenharmony_ci			if (lock->ml.node != dlm->node_num) {
29388c2ecf20Sopenharmony_ci				mlog(0, "putting lock for node %u\n",
29398c2ecf20Sopenharmony_ci				     lock->ml.node);
29408c2ecf20Sopenharmony_ci				/* be extra careful */
29418c2ecf20Sopenharmony_ci				BUG_ON(!list_empty(&lock->ast_list));
29428c2ecf20Sopenharmony_ci				BUG_ON(!list_empty(&lock->bast_list));
29438c2ecf20Sopenharmony_ci				BUG_ON(lock->ast_pending);
29448c2ecf20Sopenharmony_ci				BUG_ON(lock->bast_pending);
29458c2ecf20Sopenharmony_ci				dlm_lockres_clear_refmap_bit(dlm, res,
29468c2ecf20Sopenharmony_ci							     lock->ml.node);
29478c2ecf20Sopenharmony_ci				list_del_init(&lock->list);
29488c2ecf20Sopenharmony_ci				dlm_lock_put(lock);
29498c2ecf20Sopenharmony_ci				/* In a normal unlock, we would have added a
29508c2ecf20Sopenharmony_ci				 * DLM_UNLOCK_FREE_LOCK action. Force it. */
29518c2ecf20Sopenharmony_ci				dlm_lock_put(lock);
29528c2ecf20Sopenharmony_ci			}
29538c2ecf20Sopenharmony_ci		}
29548c2ecf20Sopenharmony_ci		queue++;
29558c2ecf20Sopenharmony_ci	}
29568c2ecf20Sopenharmony_ci	bit = 0;
29578c2ecf20Sopenharmony_ci	while (1) {
29588c2ecf20Sopenharmony_ci		bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit);
29598c2ecf20Sopenharmony_ci		if (bit >= O2NM_MAX_NODES)
29608c2ecf20Sopenharmony_ci			break;
29618c2ecf20Sopenharmony_ci		/* do not clear the local node reference, if there is a
29628c2ecf20Sopenharmony_ci		 * process holding this, let it drop the ref itself */
29638c2ecf20Sopenharmony_ci		if (bit != dlm->node_num) {
29648c2ecf20Sopenharmony_ci			mlog(0, "%s:%.*s: node %u had a ref to this "
29658c2ecf20Sopenharmony_ci			     "migrating lockres, clearing\n", dlm->name,
29668c2ecf20Sopenharmony_ci			     res->lockname.len, res->lockname.name, bit);
29678c2ecf20Sopenharmony_ci			dlm_lockres_clear_refmap_bit(dlm, res, bit);
29688c2ecf20Sopenharmony_ci		}
29698c2ecf20Sopenharmony_ci		bit++;
29708c2ecf20Sopenharmony_ci	}
29718c2ecf20Sopenharmony_ci}
29728c2ecf20Sopenharmony_ci
29738c2ecf20Sopenharmony_ci/*
29748c2ecf20Sopenharmony_ci * Pick a node to migrate the lock resource to. This function selects a
29758c2ecf20Sopenharmony_ci * potential target based first on the locks and then on refmap. It skips
29768c2ecf20Sopenharmony_ci * nodes that are in the process of exiting the domain.
29778c2ecf20Sopenharmony_ci */
29788c2ecf20Sopenharmony_cistatic u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
29798c2ecf20Sopenharmony_ci				    struct dlm_lock_resource *res)
29808c2ecf20Sopenharmony_ci{
29818c2ecf20Sopenharmony_ci	enum dlm_lockres_list idx;
29828c2ecf20Sopenharmony_ci	struct list_head *queue = &res->granted;
29838c2ecf20Sopenharmony_ci	struct dlm_lock *lock;
29848c2ecf20Sopenharmony_ci	int noderef;
29858c2ecf20Sopenharmony_ci	u8 nodenum = O2NM_MAX_NODES;
29868c2ecf20Sopenharmony_ci
29878c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
29888c2ecf20Sopenharmony_ci	assert_spin_locked(&res->spinlock);
29898c2ecf20Sopenharmony_ci
29908c2ecf20Sopenharmony_ci	/* Go through all the locks */
29918c2ecf20Sopenharmony_ci	for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) {
29928c2ecf20Sopenharmony_ci		queue = dlm_list_idx_to_ptr(res, idx);
29938c2ecf20Sopenharmony_ci		list_for_each_entry(lock, queue, list) {
29948c2ecf20Sopenharmony_ci			if (lock->ml.node == dlm->node_num)
29958c2ecf20Sopenharmony_ci				continue;
29968c2ecf20Sopenharmony_ci			if (test_bit(lock->ml.node, dlm->exit_domain_map))
29978c2ecf20Sopenharmony_ci				continue;
29988c2ecf20Sopenharmony_ci			nodenum = lock->ml.node;
29998c2ecf20Sopenharmony_ci			goto bail;
30008c2ecf20Sopenharmony_ci		}
30018c2ecf20Sopenharmony_ci	}
30028c2ecf20Sopenharmony_ci
30038c2ecf20Sopenharmony_ci	/* Go thru the refmap */
30048c2ecf20Sopenharmony_ci	noderef = -1;
30058c2ecf20Sopenharmony_ci	while (1) {
30068c2ecf20Sopenharmony_ci		noderef = find_next_bit(res->refmap, O2NM_MAX_NODES,
30078c2ecf20Sopenharmony_ci					noderef + 1);
30088c2ecf20Sopenharmony_ci		if (noderef >= O2NM_MAX_NODES)
30098c2ecf20Sopenharmony_ci			break;
30108c2ecf20Sopenharmony_ci		if (noderef == dlm->node_num)
30118c2ecf20Sopenharmony_ci			continue;
30128c2ecf20Sopenharmony_ci		if (test_bit(noderef, dlm->exit_domain_map))
30138c2ecf20Sopenharmony_ci			continue;
30148c2ecf20Sopenharmony_ci		nodenum = noderef;
30158c2ecf20Sopenharmony_ci		goto bail;
30168c2ecf20Sopenharmony_ci	}
30178c2ecf20Sopenharmony_ci
30188c2ecf20Sopenharmony_cibail:
30198c2ecf20Sopenharmony_ci	return nodenum;
30208c2ecf20Sopenharmony_ci}
30218c2ecf20Sopenharmony_ci
30228c2ecf20Sopenharmony_ci/* this is called by the new master once all lockres
30238c2ecf20Sopenharmony_ci * data has been received */
30248c2ecf20Sopenharmony_cistatic int dlm_do_migrate_request(struct dlm_ctxt *dlm,
30258c2ecf20Sopenharmony_ci				  struct dlm_lock_resource *res,
30268c2ecf20Sopenharmony_ci				  u8 master, u8 new_master,
30278c2ecf20Sopenharmony_ci				  struct dlm_node_iter *iter)
30288c2ecf20Sopenharmony_ci{
30298c2ecf20Sopenharmony_ci	struct dlm_migrate_request migrate;
30308c2ecf20Sopenharmony_ci	int ret, skip, status = 0;
30318c2ecf20Sopenharmony_ci	int nodenum;
30328c2ecf20Sopenharmony_ci
30338c2ecf20Sopenharmony_ci	memset(&migrate, 0, sizeof(migrate));
30348c2ecf20Sopenharmony_ci	migrate.namelen = res->lockname.len;
30358c2ecf20Sopenharmony_ci	memcpy(migrate.name, res->lockname.name, migrate.namelen);
30368c2ecf20Sopenharmony_ci	migrate.new_master = new_master;
30378c2ecf20Sopenharmony_ci	migrate.master = master;
30388c2ecf20Sopenharmony_ci
30398c2ecf20Sopenharmony_ci	ret = 0;
30408c2ecf20Sopenharmony_ci
30418c2ecf20Sopenharmony_ci	/* send message to all nodes, except the master and myself */
30428c2ecf20Sopenharmony_ci	while ((nodenum = dlm_node_iter_next(iter)) >= 0) {
30438c2ecf20Sopenharmony_ci		if (nodenum == master ||
30448c2ecf20Sopenharmony_ci		    nodenum == new_master)
30458c2ecf20Sopenharmony_ci			continue;
30468c2ecf20Sopenharmony_ci
30478c2ecf20Sopenharmony_ci		/* We could race exit domain. If exited, skip. */
30488c2ecf20Sopenharmony_ci		spin_lock(&dlm->spinlock);
30498c2ecf20Sopenharmony_ci		skip = (!test_bit(nodenum, dlm->domain_map));
30508c2ecf20Sopenharmony_ci		spin_unlock(&dlm->spinlock);
30518c2ecf20Sopenharmony_ci		if (skip) {
30528c2ecf20Sopenharmony_ci			clear_bit(nodenum, iter->node_map);
30538c2ecf20Sopenharmony_ci			continue;
30548c2ecf20Sopenharmony_ci		}
30558c2ecf20Sopenharmony_ci
30568c2ecf20Sopenharmony_ci		ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key,
30578c2ecf20Sopenharmony_ci					 &migrate, sizeof(migrate), nodenum,
30588c2ecf20Sopenharmony_ci					 &status);
30598c2ecf20Sopenharmony_ci		if (ret < 0) {
30608c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "%s: res %.*s, Error %d send "
30618c2ecf20Sopenharmony_ci			     "MIGRATE_REQUEST to node %u\n", dlm->name,
30628c2ecf20Sopenharmony_ci			     migrate.namelen, migrate.name, ret, nodenum);
30638c2ecf20Sopenharmony_ci			if (!dlm_is_host_down(ret)) {
30648c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "unhandled error=%d!\n", ret);
30658c2ecf20Sopenharmony_ci				BUG();
30668c2ecf20Sopenharmony_ci			}
30678c2ecf20Sopenharmony_ci			clear_bit(nodenum, iter->node_map);
30688c2ecf20Sopenharmony_ci			ret = 0;
30698c2ecf20Sopenharmony_ci		} else if (status < 0) {
30708c2ecf20Sopenharmony_ci			mlog(0, "migrate request (node %u) returned %d!\n",
30718c2ecf20Sopenharmony_ci			     nodenum, status);
30728c2ecf20Sopenharmony_ci			ret = status;
30738c2ecf20Sopenharmony_ci		} else if (status == DLM_MIGRATE_RESPONSE_MASTERY_REF) {
30748c2ecf20Sopenharmony_ci			/* during the migration request we short-circuited
30758c2ecf20Sopenharmony_ci			 * the mastery of the lockres.  make sure we have
30768c2ecf20Sopenharmony_ci			 * a mastery ref for nodenum */
30778c2ecf20Sopenharmony_ci			mlog(0, "%s:%.*s: need ref for node %u\n",
30788c2ecf20Sopenharmony_ci			     dlm->name, res->lockname.len, res->lockname.name,
30798c2ecf20Sopenharmony_ci			     nodenum);
30808c2ecf20Sopenharmony_ci			spin_lock(&res->spinlock);
30818c2ecf20Sopenharmony_ci			dlm_lockres_set_refmap_bit(dlm, res, nodenum);
30828c2ecf20Sopenharmony_ci			spin_unlock(&res->spinlock);
30838c2ecf20Sopenharmony_ci		}
30848c2ecf20Sopenharmony_ci	}
30858c2ecf20Sopenharmony_ci
30868c2ecf20Sopenharmony_ci	if (ret < 0)
30878c2ecf20Sopenharmony_ci		mlog_errno(ret);
30888c2ecf20Sopenharmony_ci
30898c2ecf20Sopenharmony_ci	mlog(0, "returning ret=%d\n", ret);
30908c2ecf20Sopenharmony_ci	return ret;
30918c2ecf20Sopenharmony_ci}
30928c2ecf20Sopenharmony_ci
30938c2ecf20Sopenharmony_ci
30948c2ecf20Sopenharmony_ci/* if there is an existing mle for this lockres, we now know who the master is.
30958c2ecf20Sopenharmony_ci * (the one who sent us *this* message) we can clear it up right away.
30968c2ecf20Sopenharmony_ci * since the process that put the mle on the list still has a reference to it,
30978c2ecf20Sopenharmony_ci * we can unhash it now, set the master and wake the process.  as a result,
30988c2ecf20Sopenharmony_ci * we will have no mle in the list to start with.  now we can add an mle for
30998c2ecf20Sopenharmony_ci * the migration and this should be the only one found for those scanning the
31008c2ecf20Sopenharmony_ci * list.  */
31018c2ecf20Sopenharmony_ciint dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
31028c2ecf20Sopenharmony_ci				void **ret_data)
31038c2ecf20Sopenharmony_ci{
31048c2ecf20Sopenharmony_ci	struct dlm_ctxt *dlm = data;
31058c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res = NULL;
31068c2ecf20Sopenharmony_ci	struct dlm_migrate_request *migrate = (struct dlm_migrate_request *) msg->buf;
31078c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *mle = NULL, *oldmle = NULL;
31088c2ecf20Sopenharmony_ci	const char *name;
31098c2ecf20Sopenharmony_ci	unsigned int namelen, hash;
31108c2ecf20Sopenharmony_ci	int ret = 0;
31118c2ecf20Sopenharmony_ci
31128c2ecf20Sopenharmony_ci	if (!dlm_grab(dlm))
31138c2ecf20Sopenharmony_ci		return 0;
31148c2ecf20Sopenharmony_ci
31158c2ecf20Sopenharmony_ci	name = migrate->name;
31168c2ecf20Sopenharmony_ci	namelen = migrate->namelen;
31178c2ecf20Sopenharmony_ci	hash = dlm_lockid_hash(name, namelen);
31188c2ecf20Sopenharmony_ci
31198c2ecf20Sopenharmony_ci	/* preallocate.. if this fails, abort */
31208c2ecf20Sopenharmony_ci	mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
31218c2ecf20Sopenharmony_ci
31228c2ecf20Sopenharmony_ci	if (!mle) {
31238c2ecf20Sopenharmony_ci		ret = -ENOMEM;
31248c2ecf20Sopenharmony_ci		goto leave;
31258c2ecf20Sopenharmony_ci	}
31268c2ecf20Sopenharmony_ci
31278c2ecf20Sopenharmony_ci	/* check for pre-existing lock */
31288c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
31298c2ecf20Sopenharmony_ci	res = __dlm_lookup_lockres(dlm, name, namelen, hash);
31308c2ecf20Sopenharmony_ci	if (res) {
31318c2ecf20Sopenharmony_ci		spin_lock(&res->spinlock);
31328c2ecf20Sopenharmony_ci		if (res->state & DLM_LOCK_RES_RECOVERING) {
31338c2ecf20Sopenharmony_ci			/* if all is working ok, this can only mean that we got
31348c2ecf20Sopenharmony_ci		 	* a migrate request from a node that we now see as
31358c2ecf20Sopenharmony_ci		 	* dead.  what can we do here?  drop it to the floor? */
31368c2ecf20Sopenharmony_ci			spin_unlock(&res->spinlock);
31378c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "Got a migrate request, but the "
31388c2ecf20Sopenharmony_ci			     "lockres is marked as recovering!");
31398c2ecf20Sopenharmony_ci			kmem_cache_free(dlm_mle_cache, mle);
31408c2ecf20Sopenharmony_ci			ret = -EINVAL; /* need a better solution */
31418c2ecf20Sopenharmony_ci			goto unlock;
31428c2ecf20Sopenharmony_ci		}
31438c2ecf20Sopenharmony_ci		res->state |= DLM_LOCK_RES_MIGRATING;
31448c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
31458c2ecf20Sopenharmony_ci	}
31468c2ecf20Sopenharmony_ci
31478c2ecf20Sopenharmony_ci	spin_lock(&dlm->master_lock);
31488c2ecf20Sopenharmony_ci	/* ignore status.  only nonzero status would BUG. */
31498c2ecf20Sopenharmony_ci	ret = dlm_add_migration_mle(dlm, res, mle, &oldmle,
31508c2ecf20Sopenharmony_ci				    name, namelen,
31518c2ecf20Sopenharmony_ci				    migrate->new_master,
31528c2ecf20Sopenharmony_ci				    migrate->master);
31538c2ecf20Sopenharmony_ci
31548c2ecf20Sopenharmony_ci	if (ret < 0)
31558c2ecf20Sopenharmony_ci		kmem_cache_free(dlm_mle_cache, mle);
31568c2ecf20Sopenharmony_ci
31578c2ecf20Sopenharmony_ci	spin_unlock(&dlm->master_lock);
31588c2ecf20Sopenharmony_ciunlock:
31598c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
31608c2ecf20Sopenharmony_ci
31618c2ecf20Sopenharmony_ci	if (oldmle) {
31628c2ecf20Sopenharmony_ci		/* master is known, detach if not already detached */
31638c2ecf20Sopenharmony_ci		dlm_mle_detach_hb_events(dlm, oldmle);
31648c2ecf20Sopenharmony_ci		dlm_put_mle(oldmle);
31658c2ecf20Sopenharmony_ci	}
31668c2ecf20Sopenharmony_ci
31678c2ecf20Sopenharmony_ci	if (res)
31688c2ecf20Sopenharmony_ci		dlm_lockres_put(res);
31698c2ecf20Sopenharmony_cileave:
31708c2ecf20Sopenharmony_ci	dlm_put(dlm);
31718c2ecf20Sopenharmony_ci	return ret;
31728c2ecf20Sopenharmony_ci}
31738c2ecf20Sopenharmony_ci
31748c2ecf20Sopenharmony_ci/* must be holding dlm->spinlock and dlm->master_lock
31758c2ecf20Sopenharmony_ci * when adding a migration mle, we can clear any other mles
31768c2ecf20Sopenharmony_ci * in the master list because we know with certainty that
31778c2ecf20Sopenharmony_ci * the master is "master".  so we remove any old mle from
31788c2ecf20Sopenharmony_ci * the list after setting it's master field, and then add
31798c2ecf20Sopenharmony_ci * the new migration mle.  this way we can hold with the rule
31808c2ecf20Sopenharmony_ci * of having only one mle for a given lock name at all times. */
31818c2ecf20Sopenharmony_cistatic int dlm_add_migration_mle(struct dlm_ctxt *dlm,
31828c2ecf20Sopenharmony_ci				 struct dlm_lock_resource *res,
31838c2ecf20Sopenharmony_ci				 struct dlm_master_list_entry *mle,
31848c2ecf20Sopenharmony_ci				 struct dlm_master_list_entry **oldmle,
31858c2ecf20Sopenharmony_ci				 const char *name, unsigned int namelen,
31868c2ecf20Sopenharmony_ci				 u8 new_master, u8 master)
31878c2ecf20Sopenharmony_ci{
31888c2ecf20Sopenharmony_ci	int found;
31898c2ecf20Sopenharmony_ci	int ret = 0;
31908c2ecf20Sopenharmony_ci
31918c2ecf20Sopenharmony_ci	*oldmle = NULL;
31928c2ecf20Sopenharmony_ci
31938c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
31948c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->master_lock);
31958c2ecf20Sopenharmony_ci
31968c2ecf20Sopenharmony_ci	/* caller is responsible for any ref taken here on oldmle */
31978c2ecf20Sopenharmony_ci	found = dlm_find_mle(dlm, oldmle, (char *)name, namelen);
31988c2ecf20Sopenharmony_ci	if (found) {
31998c2ecf20Sopenharmony_ci		struct dlm_master_list_entry *tmp = *oldmle;
32008c2ecf20Sopenharmony_ci		spin_lock(&tmp->spinlock);
32018c2ecf20Sopenharmony_ci		if (tmp->type == DLM_MLE_MIGRATION) {
32028c2ecf20Sopenharmony_ci			if (master == dlm->node_num) {
32038c2ecf20Sopenharmony_ci				/* ah another process raced me to it */
32048c2ecf20Sopenharmony_ci				mlog(0, "tried to migrate %.*s, but some "
32058c2ecf20Sopenharmony_ci				     "process beat me to it\n",
32068c2ecf20Sopenharmony_ci				     namelen, name);
32078c2ecf20Sopenharmony_ci				spin_unlock(&tmp->spinlock);
32088c2ecf20Sopenharmony_ci				return -EEXIST;
32098c2ecf20Sopenharmony_ci			} else {
32108c2ecf20Sopenharmony_ci				/* bad.  2 NODES are trying to migrate! */
32118c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "migration error  mle: "
32128c2ecf20Sopenharmony_ci				     "master=%u new_master=%u // request: "
32138c2ecf20Sopenharmony_ci				     "master=%u new_master=%u // "
32148c2ecf20Sopenharmony_ci				     "lockres=%.*s\n",
32158c2ecf20Sopenharmony_ci				     tmp->master, tmp->new_master,
32168c2ecf20Sopenharmony_ci				     master, new_master,
32178c2ecf20Sopenharmony_ci				     namelen, name);
32188c2ecf20Sopenharmony_ci				BUG();
32198c2ecf20Sopenharmony_ci			}
32208c2ecf20Sopenharmony_ci		} else {
32218c2ecf20Sopenharmony_ci			/* this is essentially what assert_master does */
32228c2ecf20Sopenharmony_ci			tmp->master = master;
32238c2ecf20Sopenharmony_ci			atomic_set(&tmp->woken, 1);
32248c2ecf20Sopenharmony_ci			wake_up(&tmp->wq);
32258c2ecf20Sopenharmony_ci			/* remove it so that only one mle will be found */
32268c2ecf20Sopenharmony_ci			__dlm_unlink_mle(dlm, tmp);
32278c2ecf20Sopenharmony_ci			__dlm_mle_detach_hb_events(dlm, tmp);
32288c2ecf20Sopenharmony_ci			if (tmp->type == DLM_MLE_MASTER) {
32298c2ecf20Sopenharmony_ci				ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
32308c2ecf20Sopenharmony_ci				mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
32318c2ecf20Sopenharmony_ci						"telling master to get ref "
32328c2ecf20Sopenharmony_ci						"for cleared out mle during "
32338c2ecf20Sopenharmony_ci						"migration\n", dlm->name,
32348c2ecf20Sopenharmony_ci						namelen, name, master,
32358c2ecf20Sopenharmony_ci						new_master);
32368c2ecf20Sopenharmony_ci			}
32378c2ecf20Sopenharmony_ci		}
32388c2ecf20Sopenharmony_ci		spin_unlock(&tmp->spinlock);
32398c2ecf20Sopenharmony_ci	}
32408c2ecf20Sopenharmony_ci
32418c2ecf20Sopenharmony_ci	/* now add a migration mle to the tail of the list */
32428c2ecf20Sopenharmony_ci	dlm_init_mle(mle, DLM_MLE_MIGRATION, dlm, res, name, namelen);
32438c2ecf20Sopenharmony_ci	mle->new_master = new_master;
32448c2ecf20Sopenharmony_ci	/* the new master will be sending an assert master for this.
32458c2ecf20Sopenharmony_ci	 * at that point we will get the refmap reference */
32468c2ecf20Sopenharmony_ci	mle->master = master;
32478c2ecf20Sopenharmony_ci	/* do this for consistency with other mle types */
32488c2ecf20Sopenharmony_ci	set_bit(new_master, mle->maybe_map);
32498c2ecf20Sopenharmony_ci	__dlm_insert_mle(dlm, mle);
32508c2ecf20Sopenharmony_ci
32518c2ecf20Sopenharmony_ci	return ret;
32528c2ecf20Sopenharmony_ci}
32538c2ecf20Sopenharmony_ci
32548c2ecf20Sopenharmony_ci/*
32558c2ecf20Sopenharmony_ci * Sets the owner of the lockres, associated to the mle, to UNKNOWN
32568c2ecf20Sopenharmony_ci */
32578c2ecf20Sopenharmony_cistatic struct dlm_lock_resource *dlm_reset_mleres_owner(struct dlm_ctxt *dlm,
32588c2ecf20Sopenharmony_ci					struct dlm_master_list_entry *mle)
32598c2ecf20Sopenharmony_ci{
32608c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res;
32618c2ecf20Sopenharmony_ci
32628c2ecf20Sopenharmony_ci	/* Find the lockres associated to the mle and set its owner to UNK */
32638c2ecf20Sopenharmony_ci	res = __dlm_lookup_lockres(dlm, mle->mname, mle->mnamelen,
32648c2ecf20Sopenharmony_ci				   mle->mnamehash);
32658c2ecf20Sopenharmony_ci	if (res) {
32668c2ecf20Sopenharmony_ci		spin_unlock(&dlm->master_lock);
32678c2ecf20Sopenharmony_ci
32688c2ecf20Sopenharmony_ci		/* move lockres onto recovery list */
32698c2ecf20Sopenharmony_ci		spin_lock(&res->spinlock);
32708c2ecf20Sopenharmony_ci		dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN);
32718c2ecf20Sopenharmony_ci		dlm_move_lockres_to_recovery_list(dlm, res);
32728c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
32738c2ecf20Sopenharmony_ci		dlm_lockres_put(res);
32748c2ecf20Sopenharmony_ci
32758c2ecf20Sopenharmony_ci		/* about to get rid of mle, detach from heartbeat */
32768c2ecf20Sopenharmony_ci		__dlm_mle_detach_hb_events(dlm, mle);
32778c2ecf20Sopenharmony_ci
32788c2ecf20Sopenharmony_ci		/* dump the mle */
32798c2ecf20Sopenharmony_ci		spin_lock(&dlm->master_lock);
32808c2ecf20Sopenharmony_ci		__dlm_put_mle(mle);
32818c2ecf20Sopenharmony_ci		spin_unlock(&dlm->master_lock);
32828c2ecf20Sopenharmony_ci	}
32838c2ecf20Sopenharmony_ci
32848c2ecf20Sopenharmony_ci	return res;
32858c2ecf20Sopenharmony_ci}
32868c2ecf20Sopenharmony_ci
32878c2ecf20Sopenharmony_cistatic void dlm_clean_migration_mle(struct dlm_ctxt *dlm,
32888c2ecf20Sopenharmony_ci				    struct dlm_master_list_entry *mle)
32898c2ecf20Sopenharmony_ci{
32908c2ecf20Sopenharmony_ci	__dlm_mle_detach_hb_events(dlm, mle);
32918c2ecf20Sopenharmony_ci
32928c2ecf20Sopenharmony_ci	spin_lock(&mle->spinlock);
32938c2ecf20Sopenharmony_ci	__dlm_unlink_mle(dlm, mle);
32948c2ecf20Sopenharmony_ci	atomic_set(&mle->woken, 1);
32958c2ecf20Sopenharmony_ci	spin_unlock(&mle->spinlock);
32968c2ecf20Sopenharmony_ci
32978c2ecf20Sopenharmony_ci	wake_up(&mle->wq);
32988c2ecf20Sopenharmony_ci}
32998c2ecf20Sopenharmony_ci
33008c2ecf20Sopenharmony_cistatic void dlm_clean_block_mle(struct dlm_ctxt *dlm,
33018c2ecf20Sopenharmony_ci				struct dlm_master_list_entry *mle, u8 dead_node)
33028c2ecf20Sopenharmony_ci{
33038c2ecf20Sopenharmony_ci	int bit;
33048c2ecf20Sopenharmony_ci
33058c2ecf20Sopenharmony_ci	BUG_ON(mle->type != DLM_MLE_BLOCK);
33068c2ecf20Sopenharmony_ci
33078c2ecf20Sopenharmony_ci	spin_lock(&mle->spinlock);
33088c2ecf20Sopenharmony_ci	bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
33098c2ecf20Sopenharmony_ci	if (bit != dead_node) {
33108c2ecf20Sopenharmony_ci		mlog(0, "mle found, but dead node %u would not have been "
33118c2ecf20Sopenharmony_ci		     "master\n", dead_node);
33128c2ecf20Sopenharmony_ci		spin_unlock(&mle->spinlock);
33138c2ecf20Sopenharmony_ci	} else {
33148c2ecf20Sopenharmony_ci		/* Must drop the refcount by one since the assert_master will
33158c2ecf20Sopenharmony_ci		 * never arrive. This may result in the mle being unlinked and
33168c2ecf20Sopenharmony_ci		 * freed, but there may still be a process waiting in the
33178c2ecf20Sopenharmony_ci		 * dlmlock path which is fine. */
33188c2ecf20Sopenharmony_ci		mlog(0, "node %u was expected master\n", dead_node);
33198c2ecf20Sopenharmony_ci		atomic_set(&mle->woken, 1);
33208c2ecf20Sopenharmony_ci		spin_unlock(&mle->spinlock);
33218c2ecf20Sopenharmony_ci		wake_up(&mle->wq);
33228c2ecf20Sopenharmony_ci
33238c2ecf20Sopenharmony_ci		/* Do not need events any longer, so detach from heartbeat */
33248c2ecf20Sopenharmony_ci		__dlm_mle_detach_hb_events(dlm, mle);
33258c2ecf20Sopenharmony_ci		__dlm_put_mle(mle);
33268c2ecf20Sopenharmony_ci	}
33278c2ecf20Sopenharmony_ci}
33288c2ecf20Sopenharmony_ci
33298c2ecf20Sopenharmony_civoid dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
33308c2ecf20Sopenharmony_ci{
33318c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *mle;
33328c2ecf20Sopenharmony_ci	struct dlm_lock_resource *res;
33338c2ecf20Sopenharmony_ci	struct hlist_head *bucket;
33348c2ecf20Sopenharmony_ci	struct hlist_node *tmp;
33358c2ecf20Sopenharmony_ci	unsigned int i;
33368c2ecf20Sopenharmony_ci
33378c2ecf20Sopenharmony_ci	mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node);
33388c2ecf20Sopenharmony_citop:
33398c2ecf20Sopenharmony_ci	assert_spin_locked(&dlm->spinlock);
33408c2ecf20Sopenharmony_ci
33418c2ecf20Sopenharmony_ci	/* clean the master list */
33428c2ecf20Sopenharmony_ci	spin_lock(&dlm->master_lock);
33438c2ecf20Sopenharmony_ci	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
33448c2ecf20Sopenharmony_ci		bucket = dlm_master_hash(dlm, i);
33458c2ecf20Sopenharmony_ci		hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) {
33468c2ecf20Sopenharmony_ci			BUG_ON(mle->type != DLM_MLE_BLOCK &&
33478c2ecf20Sopenharmony_ci			       mle->type != DLM_MLE_MASTER &&
33488c2ecf20Sopenharmony_ci			       mle->type != DLM_MLE_MIGRATION);
33498c2ecf20Sopenharmony_ci
33508c2ecf20Sopenharmony_ci			/* MASTER mles are initiated locally. The waiting
33518c2ecf20Sopenharmony_ci			 * process will notice the node map change shortly.
33528c2ecf20Sopenharmony_ci			 * Let that happen as normal. */
33538c2ecf20Sopenharmony_ci			if (mle->type == DLM_MLE_MASTER)
33548c2ecf20Sopenharmony_ci				continue;
33558c2ecf20Sopenharmony_ci
33568c2ecf20Sopenharmony_ci			/* BLOCK mles are initiated by other nodes. Need to
33578c2ecf20Sopenharmony_ci			 * clean up if the dead node would have been the
33588c2ecf20Sopenharmony_ci			 * master. */
33598c2ecf20Sopenharmony_ci			if (mle->type == DLM_MLE_BLOCK) {
33608c2ecf20Sopenharmony_ci				dlm_clean_block_mle(dlm, mle, dead_node);
33618c2ecf20Sopenharmony_ci				continue;
33628c2ecf20Sopenharmony_ci			}
33638c2ecf20Sopenharmony_ci
33648c2ecf20Sopenharmony_ci			/* Everything else is a MIGRATION mle */
33658c2ecf20Sopenharmony_ci
33668c2ecf20Sopenharmony_ci			/* The rule for MIGRATION mles is that the master
33678c2ecf20Sopenharmony_ci			 * becomes UNKNOWN if *either* the original or the new
33688c2ecf20Sopenharmony_ci			 * master dies. All UNKNOWN lockres' are sent to
33698c2ecf20Sopenharmony_ci			 * whichever node becomes the recovery master. The new
33708c2ecf20Sopenharmony_ci			 * master is responsible for determining if there is
33718c2ecf20Sopenharmony_ci			 * still a master for this lockres, or if he needs to
33728c2ecf20Sopenharmony_ci			 * take over mastery. Either way, this node should
33738c2ecf20Sopenharmony_ci			 * expect another message to resolve this. */
33748c2ecf20Sopenharmony_ci
33758c2ecf20Sopenharmony_ci			if (mle->master != dead_node &&
33768c2ecf20Sopenharmony_ci			    mle->new_master != dead_node)
33778c2ecf20Sopenharmony_ci				continue;
33788c2ecf20Sopenharmony_ci
33798c2ecf20Sopenharmony_ci			if (mle->new_master == dead_node && mle->inuse) {
33808c2ecf20Sopenharmony_ci				mlog(ML_NOTICE, "%s: target %u died during "
33818c2ecf20Sopenharmony_ci						"migration from %u, the MLE is "
33828c2ecf20Sopenharmony_ci						"still keep used, ignore it!\n",
33838c2ecf20Sopenharmony_ci						dlm->name, dead_node,
33848c2ecf20Sopenharmony_ci						mle->master);
33858c2ecf20Sopenharmony_ci				continue;
33868c2ecf20Sopenharmony_ci			}
33878c2ecf20Sopenharmony_ci
33888c2ecf20Sopenharmony_ci			/* If we have reached this point, this mle needs to be
33898c2ecf20Sopenharmony_ci			 * removed from the list and freed. */
33908c2ecf20Sopenharmony_ci			dlm_clean_migration_mle(dlm, mle);
33918c2ecf20Sopenharmony_ci
33928c2ecf20Sopenharmony_ci			mlog(0, "%s: node %u died during migration from "
33938c2ecf20Sopenharmony_ci			     "%u to %u!\n", dlm->name, dead_node, mle->master,
33948c2ecf20Sopenharmony_ci			     mle->new_master);
33958c2ecf20Sopenharmony_ci
33968c2ecf20Sopenharmony_ci			/* If we find a lockres associated with the mle, we've
33978c2ecf20Sopenharmony_ci			 * hit this rare case that messes up our lock ordering.
33988c2ecf20Sopenharmony_ci			 * If so, we need to drop the master lock so that we can
33998c2ecf20Sopenharmony_ci			 * take the lockres lock, meaning that we will have to
34008c2ecf20Sopenharmony_ci			 * restart from the head of list. */
34018c2ecf20Sopenharmony_ci			res = dlm_reset_mleres_owner(dlm, mle);
34028c2ecf20Sopenharmony_ci			if (res)
34038c2ecf20Sopenharmony_ci				/* restart */
34048c2ecf20Sopenharmony_ci				goto top;
34058c2ecf20Sopenharmony_ci
34068c2ecf20Sopenharmony_ci			/* This may be the last reference */
34078c2ecf20Sopenharmony_ci			__dlm_put_mle(mle);
34088c2ecf20Sopenharmony_ci		}
34098c2ecf20Sopenharmony_ci	}
34108c2ecf20Sopenharmony_ci	spin_unlock(&dlm->master_lock);
34118c2ecf20Sopenharmony_ci}
34128c2ecf20Sopenharmony_ci
34138c2ecf20Sopenharmony_ciint dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
34148c2ecf20Sopenharmony_ci			 u8 old_master)
34158c2ecf20Sopenharmony_ci{
34168c2ecf20Sopenharmony_ci	struct dlm_node_iter iter;
34178c2ecf20Sopenharmony_ci	int ret = 0;
34188c2ecf20Sopenharmony_ci
34198c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
34208c2ecf20Sopenharmony_ci	dlm_node_iter_init(dlm->domain_map, &iter);
34218c2ecf20Sopenharmony_ci	clear_bit(old_master, iter.node_map);
34228c2ecf20Sopenharmony_ci	clear_bit(dlm->node_num, iter.node_map);
34238c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
34248c2ecf20Sopenharmony_ci
34258c2ecf20Sopenharmony_ci	/* ownership of the lockres is changing.  account for the
34268c2ecf20Sopenharmony_ci	 * mastery reference here since old_master will briefly have
34278c2ecf20Sopenharmony_ci	 * a reference after the migration completes */
34288c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
34298c2ecf20Sopenharmony_ci	dlm_lockres_set_refmap_bit(dlm, res, old_master);
34308c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
34318c2ecf20Sopenharmony_ci
34328c2ecf20Sopenharmony_ci	mlog(0, "now time to do a migrate request to other nodes\n");
34338c2ecf20Sopenharmony_ci	ret = dlm_do_migrate_request(dlm, res, old_master,
34348c2ecf20Sopenharmony_ci				     dlm->node_num, &iter);
34358c2ecf20Sopenharmony_ci	if (ret < 0) {
34368c2ecf20Sopenharmony_ci		mlog_errno(ret);
34378c2ecf20Sopenharmony_ci		goto leave;
34388c2ecf20Sopenharmony_ci	}
34398c2ecf20Sopenharmony_ci
34408c2ecf20Sopenharmony_ci	mlog(0, "doing assert master of %.*s to all except the original node\n",
34418c2ecf20Sopenharmony_ci	     res->lockname.len, res->lockname.name);
34428c2ecf20Sopenharmony_ci	/* this call now finishes out the nodemap
34438c2ecf20Sopenharmony_ci	 * even if one or more nodes die */
34448c2ecf20Sopenharmony_ci	ret = dlm_do_assert_master(dlm, res, iter.node_map,
34458c2ecf20Sopenharmony_ci				   DLM_ASSERT_MASTER_FINISH_MIGRATION);
34468c2ecf20Sopenharmony_ci	if (ret < 0) {
34478c2ecf20Sopenharmony_ci		/* no longer need to retry.  all living nodes contacted. */
34488c2ecf20Sopenharmony_ci		mlog_errno(ret);
34498c2ecf20Sopenharmony_ci		ret = 0;
34508c2ecf20Sopenharmony_ci	}
34518c2ecf20Sopenharmony_ci
34528c2ecf20Sopenharmony_ci	memset(iter.node_map, 0, sizeof(iter.node_map));
34538c2ecf20Sopenharmony_ci	set_bit(old_master, iter.node_map);
34548c2ecf20Sopenharmony_ci	mlog(0, "doing assert master of %.*s back to %u\n",
34558c2ecf20Sopenharmony_ci	     res->lockname.len, res->lockname.name, old_master);
34568c2ecf20Sopenharmony_ci	ret = dlm_do_assert_master(dlm, res, iter.node_map,
34578c2ecf20Sopenharmony_ci				   DLM_ASSERT_MASTER_FINISH_MIGRATION);
34588c2ecf20Sopenharmony_ci	if (ret < 0) {
34598c2ecf20Sopenharmony_ci		mlog(0, "assert master to original master failed "
34608c2ecf20Sopenharmony_ci		     "with %d.\n", ret);
34618c2ecf20Sopenharmony_ci		/* the only nonzero status here would be because of
34628c2ecf20Sopenharmony_ci		 * a dead original node.  we're done. */
34638c2ecf20Sopenharmony_ci		ret = 0;
34648c2ecf20Sopenharmony_ci	}
34658c2ecf20Sopenharmony_ci
34668c2ecf20Sopenharmony_ci	/* all done, set the owner, clear the flag */
34678c2ecf20Sopenharmony_ci	spin_lock(&res->spinlock);
34688c2ecf20Sopenharmony_ci	dlm_set_lockres_owner(dlm, res, dlm->node_num);
34698c2ecf20Sopenharmony_ci	res->state &= ~DLM_LOCK_RES_MIGRATING;
34708c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
34718c2ecf20Sopenharmony_ci	/* re-dirty it on the new master */
34728c2ecf20Sopenharmony_ci	dlm_kick_thread(dlm, res);
34738c2ecf20Sopenharmony_ci	wake_up(&res->wq);
34748c2ecf20Sopenharmony_cileave:
34758c2ecf20Sopenharmony_ci	return ret;
34768c2ecf20Sopenharmony_ci}
34778c2ecf20Sopenharmony_ci
34788c2ecf20Sopenharmony_ci/*
34798c2ecf20Sopenharmony_ci * LOCKRES AST REFCOUNT
34808c2ecf20Sopenharmony_ci * this is integral to migration
34818c2ecf20Sopenharmony_ci */
34828c2ecf20Sopenharmony_ci
34838c2ecf20Sopenharmony_ci/* for future intent to call an ast, reserve one ahead of time.
34848c2ecf20Sopenharmony_ci * this should be called only after waiting on the lockres
34858c2ecf20Sopenharmony_ci * with dlm_wait_on_lockres, and while still holding the
34868c2ecf20Sopenharmony_ci * spinlock after the call. */
34878c2ecf20Sopenharmony_civoid __dlm_lockres_reserve_ast(struct dlm_lock_resource *res)
34888c2ecf20Sopenharmony_ci{
34898c2ecf20Sopenharmony_ci	assert_spin_locked(&res->spinlock);
34908c2ecf20Sopenharmony_ci	if (res->state & DLM_LOCK_RES_MIGRATING) {
34918c2ecf20Sopenharmony_ci		__dlm_print_one_lock_resource(res);
34928c2ecf20Sopenharmony_ci	}
34938c2ecf20Sopenharmony_ci	BUG_ON(res->state & DLM_LOCK_RES_MIGRATING);
34948c2ecf20Sopenharmony_ci
34958c2ecf20Sopenharmony_ci	atomic_inc(&res->asts_reserved);
34968c2ecf20Sopenharmony_ci}
34978c2ecf20Sopenharmony_ci
34988c2ecf20Sopenharmony_ci/*
34998c2ecf20Sopenharmony_ci * used to drop the reserved ast, either because it went unused,
35008c2ecf20Sopenharmony_ci * or because the ast/bast was actually called.
35018c2ecf20Sopenharmony_ci *
35028c2ecf20Sopenharmony_ci * also, if there is a pending migration on this lockres,
35038c2ecf20Sopenharmony_ci * and this was the last pending ast on the lockres,
35048c2ecf20Sopenharmony_ci * atomically set the MIGRATING flag before we drop the lock.
35058c2ecf20Sopenharmony_ci * this is how we ensure that migration can proceed with no
35068c2ecf20Sopenharmony_ci * asts in progress.  note that it is ok if the state of the
35078c2ecf20Sopenharmony_ci * queues is such that a lock should be granted in the future
35088c2ecf20Sopenharmony_ci * or that a bast should be fired, because the new master will
35098c2ecf20Sopenharmony_ci * shuffle the lists on this lockres as soon as it is migrated.
35108c2ecf20Sopenharmony_ci */
35118c2ecf20Sopenharmony_civoid dlm_lockres_release_ast(struct dlm_ctxt *dlm,
35128c2ecf20Sopenharmony_ci			     struct dlm_lock_resource *res)
35138c2ecf20Sopenharmony_ci{
35148c2ecf20Sopenharmony_ci	if (!atomic_dec_and_lock(&res->asts_reserved, &res->spinlock))
35158c2ecf20Sopenharmony_ci		return;
35168c2ecf20Sopenharmony_ci
35178c2ecf20Sopenharmony_ci	if (!res->migration_pending) {
35188c2ecf20Sopenharmony_ci		spin_unlock(&res->spinlock);
35198c2ecf20Sopenharmony_ci		return;
35208c2ecf20Sopenharmony_ci	}
35218c2ecf20Sopenharmony_ci
35228c2ecf20Sopenharmony_ci	BUG_ON(res->state & DLM_LOCK_RES_MIGRATING);
35238c2ecf20Sopenharmony_ci	res->migration_pending = 0;
35248c2ecf20Sopenharmony_ci	res->state |= DLM_LOCK_RES_MIGRATING;
35258c2ecf20Sopenharmony_ci	spin_unlock(&res->spinlock);
35268c2ecf20Sopenharmony_ci	wake_up(&res->wq);
35278c2ecf20Sopenharmony_ci	wake_up(&dlm->migration_wq);
35288c2ecf20Sopenharmony_ci}
35298c2ecf20Sopenharmony_ci
35308c2ecf20Sopenharmony_civoid dlm_force_free_mles(struct dlm_ctxt *dlm)
35318c2ecf20Sopenharmony_ci{
35328c2ecf20Sopenharmony_ci	int i;
35338c2ecf20Sopenharmony_ci	struct hlist_head *bucket;
35348c2ecf20Sopenharmony_ci	struct dlm_master_list_entry *mle;
35358c2ecf20Sopenharmony_ci	struct hlist_node *tmp;
35368c2ecf20Sopenharmony_ci
35378c2ecf20Sopenharmony_ci	/*
35388c2ecf20Sopenharmony_ci	 * We notified all other nodes that we are exiting the domain and
35398c2ecf20Sopenharmony_ci	 * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still
35408c2ecf20Sopenharmony_ci	 * around we force free them and wake any processes that are waiting
35418c2ecf20Sopenharmony_ci	 * on the mles
35428c2ecf20Sopenharmony_ci	 */
35438c2ecf20Sopenharmony_ci	spin_lock(&dlm->spinlock);
35448c2ecf20Sopenharmony_ci	spin_lock(&dlm->master_lock);
35458c2ecf20Sopenharmony_ci
35468c2ecf20Sopenharmony_ci	BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING);
35478c2ecf20Sopenharmony_ci	BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES));
35488c2ecf20Sopenharmony_ci
35498c2ecf20Sopenharmony_ci	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
35508c2ecf20Sopenharmony_ci		bucket = dlm_master_hash(dlm, i);
35518c2ecf20Sopenharmony_ci		hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) {
35528c2ecf20Sopenharmony_ci			if (mle->type != DLM_MLE_BLOCK) {
35538c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "bad mle: %p\n", mle);
35548c2ecf20Sopenharmony_ci				dlm_print_one_mle(mle);
35558c2ecf20Sopenharmony_ci			}
35568c2ecf20Sopenharmony_ci			atomic_set(&mle->woken, 1);
35578c2ecf20Sopenharmony_ci			wake_up(&mle->wq);
35588c2ecf20Sopenharmony_ci
35598c2ecf20Sopenharmony_ci			__dlm_unlink_mle(dlm, mle);
35608c2ecf20Sopenharmony_ci			__dlm_mle_detach_hb_events(dlm, mle);
35618c2ecf20Sopenharmony_ci			__dlm_put_mle(mle);
35628c2ecf20Sopenharmony_ci		}
35638c2ecf20Sopenharmony_ci	}
35648c2ecf20Sopenharmony_ci	spin_unlock(&dlm->master_lock);
35658c2ecf20Sopenharmony_ci	spin_unlock(&dlm->spinlock);
35668c2ecf20Sopenharmony_ci}
3567