162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * userdlm.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Code which implements the kernel side of a minimal userspace
662306a36Sopenharmony_ci * interface to our DLM.
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Many of the functions here are pared down versions of dlmglue.c
962306a36Sopenharmony_ci * functions.
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
1262306a36Sopenharmony_ci */
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <linux/signal.h>
1562306a36Sopenharmony_ci#include <linux/sched/signal.h>
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci#include <linux/module.h>
1862306a36Sopenharmony_ci#include <linux/fs.h>
1962306a36Sopenharmony_ci#include <linux/types.h>
2062306a36Sopenharmony_ci#include <linux/crc32.h>
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#include "../ocfs2_lockingver.h"
2362306a36Sopenharmony_ci#include "../stackglue.h"
2462306a36Sopenharmony_ci#include "userdlm.h"
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#define MLOG_MASK_PREFIX ML_DLMFS
2762306a36Sopenharmony_ci#include "../cluster/masklog.h"
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_cistatic inline struct user_lock_res *user_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
3162306a36Sopenharmony_ci{
3262306a36Sopenharmony_ci	return container_of(lksb, struct user_lock_res, l_lksb);
3362306a36Sopenharmony_ci}
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_cistatic inline int user_check_wait_flag(struct user_lock_res *lockres,
3662306a36Sopenharmony_ci				       int flag)
3762306a36Sopenharmony_ci{
3862306a36Sopenharmony_ci	int ret;
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	spin_lock(&lockres->l_lock);
4162306a36Sopenharmony_ci	ret = lockres->l_flags & flag;
4262306a36Sopenharmony_ci	spin_unlock(&lockres->l_lock);
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	return ret;
4562306a36Sopenharmony_ci}
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_cistatic inline void user_wait_on_busy_lock(struct user_lock_res *lockres)
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci{
5062306a36Sopenharmony_ci	wait_event(lockres->l_event,
5162306a36Sopenharmony_ci		   !user_check_wait_flag(lockres, USER_LOCK_BUSY));
5262306a36Sopenharmony_ci}
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_cistatic inline void user_wait_on_blocked_lock(struct user_lock_res *lockres)
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci{
5762306a36Sopenharmony_ci	wait_event(lockres->l_event,
5862306a36Sopenharmony_ci		   !user_check_wait_flag(lockres, USER_LOCK_BLOCKED));
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci/* I heart container_of... */
6262306a36Sopenharmony_cistatic inline struct ocfs2_cluster_connection *
6362306a36Sopenharmony_cicluster_connection_from_user_lockres(struct user_lock_res *lockres)
6462306a36Sopenharmony_ci{
6562306a36Sopenharmony_ci	struct dlmfs_inode_private *ip;
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	ip = container_of(lockres,
6862306a36Sopenharmony_ci			  struct dlmfs_inode_private,
6962306a36Sopenharmony_ci			  ip_lockres);
7062306a36Sopenharmony_ci	return ip->ip_conn;
7162306a36Sopenharmony_ci}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_cistatic struct inode *
7462306a36Sopenharmony_ciuser_dlm_inode_from_user_lockres(struct user_lock_res *lockres)
7562306a36Sopenharmony_ci{
7662306a36Sopenharmony_ci	struct dlmfs_inode_private *ip;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	ip = container_of(lockres,
7962306a36Sopenharmony_ci			  struct dlmfs_inode_private,
8062306a36Sopenharmony_ci			  ip_lockres);
8162306a36Sopenharmony_ci	return &ip->ip_vfs_inode;
8262306a36Sopenharmony_ci}
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_cistatic inline void user_recover_from_dlm_error(struct user_lock_res *lockres)
8562306a36Sopenharmony_ci{
8662306a36Sopenharmony_ci	spin_lock(&lockres->l_lock);
8762306a36Sopenharmony_ci	lockres->l_flags &= ~USER_LOCK_BUSY;
8862306a36Sopenharmony_ci	spin_unlock(&lockres->l_lock);
8962306a36Sopenharmony_ci}
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci#define user_log_dlm_error(_func, _stat, _lockres) do {			\
9262306a36Sopenharmony_ci	mlog(ML_ERROR, "Dlm error %d while calling %s on "		\
9362306a36Sopenharmony_ci		"resource %.*s\n", _stat, _func,			\
9462306a36Sopenharmony_ci		_lockres->l_namelen, _lockres->l_name); 		\
9562306a36Sopenharmony_ci} while (0)
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci/* WARNING: This function lives in a world where the only three lock
9862306a36Sopenharmony_ci * levels are EX, PR, and NL. It *will* have to be adjusted when more
9962306a36Sopenharmony_ci * lock types are added. */
10062306a36Sopenharmony_cistatic inline int user_highest_compat_lock_level(int level)
10162306a36Sopenharmony_ci{
10262306a36Sopenharmony_ci	int new_level = DLM_LOCK_EX;
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	if (level == DLM_LOCK_EX)
10562306a36Sopenharmony_ci		new_level = DLM_LOCK_NL;
10662306a36Sopenharmony_ci	else if (level == DLM_LOCK_PR)
10762306a36Sopenharmony_ci		new_level = DLM_LOCK_PR;
10862306a36Sopenharmony_ci	return new_level;
10962306a36Sopenharmony_ci}
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_cistatic void user_ast(struct ocfs2_dlm_lksb *lksb)
11262306a36Sopenharmony_ci{
11362306a36Sopenharmony_ci	struct user_lock_res *lockres = user_lksb_to_lock_res(lksb);
11462306a36Sopenharmony_ci	int status;
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	mlog(ML_BASTS, "AST fired for lockres %.*s, level %d => %d\n",
11762306a36Sopenharmony_ci	     lockres->l_namelen, lockres->l_name, lockres->l_level,
11862306a36Sopenharmony_ci	     lockres->l_requested);
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	spin_lock(&lockres->l_lock);
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	status = ocfs2_dlm_lock_status(&lockres->l_lksb);
12362306a36Sopenharmony_ci	if (status) {
12462306a36Sopenharmony_ci		mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n",
12562306a36Sopenharmony_ci		     status, lockres->l_namelen, lockres->l_name);
12662306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
12762306a36Sopenharmony_ci		return;
12862306a36Sopenharmony_ci	}
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	mlog_bug_on_msg(lockres->l_requested == DLM_LOCK_IV,
13162306a36Sopenharmony_ci			"Lockres %.*s, requested ivmode. flags 0x%x\n",
13262306a36Sopenharmony_ci			lockres->l_namelen, lockres->l_name, lockres->l_flags);
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	/* we're downconverting. */
13562306a36Sopenharmony_ci	if (lockres->l_requested < lockres->l_level) {
13662306a36Sopenharmony_ci		if (lockres->l_requested <=
13762306a36Sopenharmony_ci		    user_highest_compat_lock_level(lockres->l_blocking)) {
13862306a36Sopenharmony_ci			lockres->l_blocking = DLM_LOCK_NL;
13962306a36Sopenharmony_ci			lockres->l_flags &= ~USER_LOCK_BLOCKED;
14062306a36Sopenharmony_ci		}
14162306a36Sopenharmony_ci	}
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	lockres->l_level = lockres->l_requested;
14462306a36Sopenharmony_ci	lockres->l_requested = DLM_LOCK_IV;
14562306a36Sopenharmony_ci	lockres->l_flags |= USER_LOCK_ATTACHED;
14662306a36Sopenharmony_ci	lockres->l_flags &= ~USER_LOCK_BUSY;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	spin_unlock(&lockres->l_lock);
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	wake_up(&lockres->l_event);
15162306a36Sopenharmony_ci}
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_cistatic inline void user_dlm_grab_inode_ref(struct user_lock_res *lockres)
15462306a36Sopenharmony_ci{
15562306a36Sopenharmony_ci	struct inode *inode;
15662306a36Sopenharmony_ci	inode = user_dlm_inode_from_user_lockres(lockres);
15762306a36Sopenharmony_ci	if (!igrab(inode))
15862306a36Sopenharmony_ci		BUG();
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_cistatic void user_dlm_unblock_lock(struct work_struct *work);
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_cistatic void __user_dlm_queue_lockres(struct user_lock_res *lockres)
16462306a36Sopenharmony_ci{
16562306a36Sopenharmony_ci	if (!(lockres->l_flags & USER_LOCK_QUEUED)) {
16662306a36Sopenharmony_ci		user_dlm_grab_inode_ref(lockres);
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci		INIT_WORK(&lockres->l_work, user_dlm_unblock_lock);
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci		queue_work(user_dlm_worker, &lockres->l_work);
17162306a36Sopenharmony_ci		lockres->l_flags |= USER_LOCK_QUEUED;
17262306a36Sopenharmony_ci	}
17362306a36Sopenharmony_ci}
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_cistatic void __user_dlm_cond_queue_lockres(struct user_lock_res *lockres)
17662306a36Sopenharmony_ci{
17762306a36Sopenharmony_ci	int queue = 0;
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	if (!(lockres->l_flags & USER_LOCK_BLOCKED))
18062306a36Sopenharmony_ci		return;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	switch (lockres->l_blocking) {
18362306a36Sopenharmony_ci	case DLM_LOCK_EX:
18462306a36Sopenharmony_ci		if (!lockres->l_ex_holders && !lockres->l_ro_holders)
18562306a36Sopenharmony_ci			queue = 1;
18662306a36Sopenharmony_ci		break;
18762306a36Sopenharmony_ci	case DLM_LOCK_PR:
18862306a36Sopenharmony_ci		if (!lockres->l_ex_holders)
18962306a36Sopenharmony_ci			queue = 1;
19062306a36Sopenharmony_ci		break;
19162306a36Sopenharmony_ci	default:
19262306a36Sopenharmony_ci		BUG();
19362306a36Sopenharmony_ci	}
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	if (queue)
19662306a36Sopenharmony_ci		__user_dlm_queue_lockres(lockres);
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_cistatic void user_bast(struct ocfs2_dlm_lksb *lksb, int level)
20062306a36Sopenharmony_ci{
20162306a36Sopenharmony_ci	struct user_lock_res *lockres = user_lksb_to_lock_res(lksb);
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	mlog(ML_BASTS, "BAST fired for lockres %.*s, blocking %d, level %d\n",
20462306a36Sopenharmony_ci	     lockres->l_namelen, lockres->l_name, level, lockres->l_level);
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	spin_lock(&lockres->l_lock);
20762306a36Sopenharmony_ci	lockres->l_flags |= USER_LOCK_BLOCKED;
20862306a36Sopenharmony_ci	if (level > lockres->l_blocking)
20962306a36Sopenharmony_ci		lockres->l_blocking = level;
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	__user_dlm_queue_lockres(lockres);
21262306a36Sopenharmony_ci	spin_unlock(&lockres->l_lock);
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	wake_up(&lockres->l_event);
21562306a36Sopenharmony_ci}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_cistatic void user_unlock_ast(struct ocfs2_dlm_lksb *lksb, int status)
21862306a36Sopenharmony_ci{
21962306a36Sopenharmony_ci	struct user_lock_res *lockres = user_lksb_to_lock_res(lksb);
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	mlog(ML_BASTS, "UNLOCK AST fired for lockres %.*s, flags 0x%x\n",
22262306a36Sopenharmony_ci	     lockres->l_namelen, lockres->l_name, lockres->l_flags);
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	if (status)
22562306a36Sopenharmony_ci		mlog(ML_ERROR, "dlm returns status %d\n", status);
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	spin_lock(&lockres->l_lock);
22862306a36Sopenharmony_ci	/* The teardown flag gets set early during the unlock process,
22962306a36Sopenharmony_ci	 * so test the cancel flag to make sure that this ast isn't
23062306a36Sopenharmony_ci	 * for a concurrent cancel. */
23162306a36Sopenharmony_ci	if (lockres->l_flags & USER_LOCK_IN_TEARDOWN
23262306a36Sopenharmony_ci	    && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
23362306a36Sopenharmony_ci		lockres->l_level = DLM_LOCK_IV;
23462306a36Sopenharmony_ci	} else if (status == DLM_CANCELGRANT) {
23562306a36Sopenharmony_ci		/* We tried to cancel a convert request, but it was
23662306a36Sopenharmony_ci		 * already granted. Don't clear the busy flag - the
23762306a36Sopenharmony_ci		 * ast should've done this already. */
23862306a36Sopenharmony_ci		BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
23962306a36Sopenharmony_ci		lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
24062306a36Sopenharmony_ci		goto out_noclear;
24162306a36Sopenharmony_ci	} else {
24262306a36Sopenharmony_ci		BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
24362306a36Sopenharmony_ci		/* Cancel succeeded, we want to re-queue */
24462306a36Sopenharmony_ci		lockres->l_requested = DLM_LOCK_IV; /* cancel an
24562306a36Sopenharmony_ci						    * upconvert
24662306a36Sopenharmony_ci						    * request. */
24762306a36Sopenharmony_ci		lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
24862306a36Sopenharmony_ci		/* we want the unblock thread to look at it again
24962306a36Sopenharmony_ci		 * now. */
25062306a36Sopenharmony_ci		if (lockres->l_flags & USER_LOCK_BLOCKED)
25162306a36Sopenharmony_ci			__user_dlm_queue_lockres(lockres);
25262306a36Sopenharmony_ci	}
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	lockres->l_flags &= ~USER_LOCK_BUSY;
25562306a36Sopenharmony_ciout_noclear:
25662306a36Sopenharmony_ci	spin_unlock(&lockres->l_lock);
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	wake_up(&lockres->l_event);
25962306a36Sopenharmony_ci}
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci/*
26262306a36Sopenharmony_ci * This is the userdlmfs locking protocol version.
26362306a36Sopenharmony_ci *
26462306a36Sopenharmony_ci * See fs/ocfs2/dlmglue.c for more details on locking versions.
26562306a36Sopenharmony_ci */
26662306a36Sopenharmony_cistatic struct ocfs2_locking_protocol user_dlm_lproto = {
26762306a36Sopenharmony_ci	.lp_max_version = {
26862306a36Sopenharmony_ci		.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
26962306a36Sopenharmony_ci		.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
27062306a36Sopenharmony_ci	},
27162306a36Sopenharmony_ci	.lp_lock_ast		= user_ast,
27262306a36Sopenharmony_ci	.lp_blocking_ast	= user_bast,
27362306a36Sopenharmony_ci	.lp_unlock_ast		= user_unlock_ast,
27462306a36Sopenharmony_ci};
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_cistatic inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres)
27762306a36Sopenharmony_ci{
27862306a36Sopenharmony_ci	struct inode *inode;
27962306a36Sopenharmony_ci	inode = user_dlm_inode_from_user_lockres(lockres);
28062306a36Sopenharmony_ci	iput(inode);
28162306a36Sopenharmony_ci}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_cistatic void user_dlm_unblock_lock(struct work_struct *work)
28462306a36Sopenharmony_ci{
28562306a36Sopenharmony_ci	int new_level, status;
28662306a36Sopenharmony_ci	struct user_lock_res *lockres =
28762306a36Sopenharmony_ci		container_of(work, struct user_lock_res, l_work);
28862306a36Sopenharmony_ci	struct ocfs2_cluster_connection *conn =
28962306a36Sopenharmony_ci		cluster_connection_from_user_lockres(lockres);
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	mlog(0, "lockres %.*s\n", lockres->l_namelen, lockres->l_name);
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	spin_lock(&lockres->l_lock);
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci	mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
29662306a36Sopenharmony_ci			"Lockres %.*s, flags 0x%x\n",
29762306a36Sopenharmony_ci			lockres->l_namelen, lockres->l_name, lockres->l_flags);
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	/* notice that we don't clear USER_LOCK_BLOCKED here. If it's
30062306a36Sopenharmony_ci	 * set, we want user_ast clear it. */
30162306a36Sopenharmony_ci	lockres->l_flags &= ~USER_LOCK_QUEUED;
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	/* It's valid to get here and no longer be blocked - if we get
30462306a36Sopenharmony_ci	 * several basts in a row, we might be queued by the first
30562306a36Sopenharmony_ci	 * one, the unblock thread might run and clear the queued
30662306a36Sopenharmony_ci	 * flag, and finally we might get another bast which re-queues
30762306a36Sopenharmony_ci	 * us before our ast for the downconvert is called. */
30862306a36Sopenharmony_ci	if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
30962306a36Sopenharmony_ci		mlog(ML_BASTS, "lockres %.*s USER_LOCK_BLOCKED\n",
31062306a36Sopenharmony_ci		     lockres->l_namelen, lockres->l_name);
31162306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
31262306a36Sopenharmony_ci		goto drop_ref;
31362306a36Sopenharmony_ci	}
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
31662306a36Sopenharmony_ci		mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_TEARDOWN\n",
31762306a36Sopenharmony_ci		     lockres->l_namelen, lockres->l_name);
31862306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
31962306a36Sopenharmony_ci		goto drop_ref;
32062306a36Sopenharmony_ci	}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	if (lockres->l_flags & USER_LOCK_BUSY) {
32362306a36Sopenharmony_ci		if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
32462306a36Sopenharmony_ci			mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_CANCEL\n",
32562306a36Sopenharmony_ci			     lockres->l_namelen, lockres->l_name);
32662306a36Sopenharmony_ci			spin_unlock(&lockres->l_lock);
32762306a36Sopenharmony_ci			goto drop_ref;
32862306a36Sopenharmony_ci		}
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci		lockres->l_flags |= USER_LOCK_IN_CANCEL;
33162306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci		status = ocfs2_dlm_unlock(conn, &lockres->l_lksb,
33462306a36Sopenharmony_ci					  DLM_LKF_CANCEL);
33562306a36Sopenharmony_ci		if (status)
33662306a36Sopenharmony_ci			user_log_dlm_error("ocfs2_dlm_unlock", status, lockres);
33762306a36Sopenharmony_ci		goto drop_ref;
33862306a36Sopenharmony_ci	}
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	/* If there are still incompat holders, we can exit safely
34162306a36Sopenharmony_ci	 * without worrying about re-queueing this lock as that will
34262306a36Sopenharmony_ci	 * happen on the last call to user_cluster_unlock. */
34362306a36Sopenharmony_ci	if ((lockres->l_blocking == DLM_LOCK_EX)
34462306a36Sopenharmony_ci	    && (lockres->l_ex_holders || lockres->l_ro_holders)) {
34562306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
34662306a36Sopenharmony_ci		mlog(ML_BASTS, "lockres %.*s, EX/PR Holders %u,%u\n",
34762306a36Sopenharmony_ci		     lockres->l_namelen, lockres->l_name,
34862306a36Sopenharmony_ci		     lockres->l_ex_holders, lockres->l_ro_holders);
34962306a36Sopenharmony_ci		goto drop_ref;
35062306a36Sopenharmony_ci	}
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	if ((lockres->l_blocking == DLM_LOCK_PR)
35362306a36Sopenharmony_ci	    && lockres->l_ex_holders) {
35462306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
35562306a36Sopenharmony_ci		mlog(ML_BASTS, "lockres %.*s, EX Holders %u\n",
35662306a36Sopenharmony_ci		     lockres->l_namelen, lockres->l_name,
35762306a36Sopenharmony_ci		     lockres->l_ex_holders);
35862306a36Sopenharmony_ci		goto drop_ref;
35962306a36Sopenharmony_ci	}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	/* yay, we can downconvert now. */
36262306a36Sopenharmony_ci	new_level = user_highest_compat_lock_level(lockres->l_blocking);
36362306a36Sopenharmony_ci	lockres->l_requested = new_level;
36462306a36Sopenharmony_ci	lockres->l_flags |= USER_LOCK_BUSY;
36562306a36Sopenharmony_ci	mlog(ML_BASTS, "lockres %.*s, downconvert %d => %d\n",
36662306a36Sopenharmony_ci	     lockres->l_namelen, lockres->l_name, lockres->l_level, new_level);
36762306a36Sopenharmony_ci	spin_unlock(&lockres->l_lock);
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	/* need lock downconvert request now... */
37062306a36Sopenharmony_ci	status = ocfs2_dlm_lock(conn, new_level, &lockres->l_lksb,
37162306a36Sopenharmony_ci				DLM_LKF_CONVERT|DLM_LKF_VALBLK,
37262306a36Sopenharmony_ci				lockres->l_name,
37362306a36Sopenharmony_ci				lockres->l_namelen);
37462306a36Sopenharmony_ci	if (status) {
37562306a36Sopenharmony_ci		user_log_dlm_error("ocfs2_dlm_lock", status, lockres);
37662306a36Sopenharmony_ci		user_recover_from_dlm_error(lockres);
37762306a36Sopenharmony_ci	}
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_cidrop_ref:
38062306a36Sopenharmony_ci	user_dlm_drop_inode_ref(lockres);
38162306a36Sopenharmony_ci}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_cistatic inline void user_dlm_inc_holders(struct user_lock_res *lockres,
38462306a36Sopenharmony_ci					int level)
38562306a36Sopenharmony_ci{
38662306a36Sopenharmony_ci	switch(level) {
38762306a36Sopenharmony_ci	case DLM_LOCK_EX:
38862306a36Sopenharmony_ci		lockres->l_ex_holders++;
38962306a36Sopenharmony_ci		break;
39062306a36Sopenharmony_ci	case DLM_LOCK_PR:
39162306a36Sopenharmony_ci		lockres->l_ro_holders++;
39262306a36Sopenharmony_ci		break;
39362306a36Sopenharmony_ci	default:
39462306a36Sopenharmony_ci		BUG();
39562306a36Sopenharmony_ci	}
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci/* predict what lock level we'll be dropping down to on behalf
39962306a36Sopenharmony_ci * of another node, and return true if the currently wanted
40062306a36Sopenharmony_ci * level will be compatible with it. */
40162306a36Sopenharmony_cistatic inline int
40262306a36Sopenharmony_ciuser_may_continue_on_blocked_lock(struct user_lock_res *lockres,
40362306a36Sopenharmony_ci				  int wanted)
40462306a36Sopenharmony_ci{
40562306a36Sopenharmony_ci	BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED));
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	return wanted <= user_highest_compat_lock_level(lockres->l_blocking);
40862306a36Sopenharmony_ci}
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ciint user_dlm_cluster_lock(struct user_lock_res *lockres,
41162306a36Sopenharmony_ci			  int level,
41262306a36Sopenharmony_ci			  int lkm_flags)
41362306a36Sopenharmony_ci{
41462306a36Sopenharmony_ci	int status, local_flags;
41562306a36Sopenharmony_ci	struct ocfs2_cluster_connection *conn =
41662306a36Sopenharmony_ci		cluster_connection_from_user_lockres(lockres);
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci	if (level != DLM_LOCK_EX &&
41962306a36Sopenharmony_ci	    level != DLM_LOCK_PR) {
42062306a36Sopenharmony_ci		mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
42162306a36Sopenharmony_ci		     lockres->l_namelen, lockres->l_name);
42262306a36Sopenharmony_ci		status = -EINVAL;
42362306a36Sopenharmony_ci		goto bail;
42462306a36Sopenharmony_ci	}
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	mlog(ML_BASTS, "lockres %.*s, level %d, flags = 0x%x\n",
42762306a36Sopenharmony_ci	     lockres->l_namelen, lockres->l_name, level, lkm_flags);
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ciagain:
43062306a36Sopenharmony_ci	if (signal_pending(current)) {
43162306a36Sopenharmony_ci		status = -ERESTARTSYS;
43262306a36Sopenharmony_ci		goto bail;
43362306a36Sopenharmony_ci	}
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci	spin_lock(&lockres->l_lock);
43662306a36Sopenharmony_ci	if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
43762306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
43862306a36Sopenharmony_ci		status = -EAGAIN;
43962306a36Sopenharmony_ci		goto bail;
44062306a36Sopenharmony_ci	}
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	/* We only compare against the currently granted level
44362306a36Sopenharmony_ci	 * here. If the lock is blocked waiting on a downconvert,
44462306a36Sopenharmony_ci	 * we'll get caught below. */
44562306a36Sopenharmony_ci	if ((lockres->l_flags & USER_LOCK_BUSY) &&
44662306a36Sopenharmony_ci	    (level > lockres->l_level)) {
44762306a36Sopenharmony_ci		/* is someone sitting in dlm_lock? If so, wait on
44862306a36Sopenharmony_ci		 * them. */
44962306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci		user_wait_on_busy_lock(lockres);
45262306a36Sopenharmony_ci		goto again;
45362306a36Sopenharmony_ci	}
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	if ((lockres->l_flags & USER_LOCK_BLOCKED) &&
45662306a36Sopenharmony_ci	    (!user_may_continue_on_blocked_lock(lockres, level))) {
45762306a36Sopenharmony_ci		/* is the lock is currently blocked on behalf of
45862306a36Sopenharmony_ci		 * another node */
45962306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci		user_wait_on_blocked_lock(lockres);
46262306a36Sopenharmony_ci		goto again;
46362306a36Sopenharmony_ci	}
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	if (level > lockres->l_level) {
46662306a36Sopenharmony_ci		local_flags = lkm_flags | DLM_LKF_VALBLK;
46762306a36Sopenharmony_ci		if (lockres->l_level != DLM_LOCK_IV)
46862306a36Sopenharmony_ci			local_flags |= DLM_LKF_CONVERT;
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci		lockres->l_requested = level;
47162306a36Sopenharmony_ci		lockres->l_flags |= USER_LOCK_BUSY;
47262306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci		BUG_ON(level == DLM_LOCK_IV);
47562306a36Sopenharmony_ci		BUG_ON(level == DLM_LOCK_NL);
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci		/* call dlm_lock to upgrade lock now */
47862306a36Sopenharmony_ci		status = ocfs2_dlm_lock(conn, level, &lockres->l_lksb,
47962306a36Sopenharmony_ci					local_flags, lockres->l_name,
48062306a36Sopenharmony_ci					lockres->l_namelen);
48162306a36Sopenharmony_ci		if (status) {
48262306a36Sopenharmony_ci			if ((lkm_flags & DLM_LKF_NOQUEUE) &&
48362306a36Sopenharmony_ci			    (status != -EAGAIN))
48462306a36Sopenharmony_ci				user_log_dlm_error("ocfs2_dlm_lock",
48562306a36Sopenharmony_ci						   status, lockres);
48662306a36Sopenharmony_ci			user_recover_from_dlm_error(lockres);
48762306a36Sopenharmony_ci			goto bail;
48862306a36Sopenharmony_ci		}
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci		user_wait_on_busy_lock(lockres);
49162306a36Sopenharmony_ci		goto again;
49262306a36Sopenharmony_ci	}
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	user_dlm_inc_holders(lockres, level);
49562306a36Sopenharmony_ci	spin_unlock(&lockres->l_lock);
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	status = 0;
49862306a36Sopenharmony_cibail:
49962306a36Sopenharmony_ci	return status;
50062306a36Sopenharmony_ci}
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_cistatic inline void user_dlm_dec_holders(struct user_lock_res *lockres,
50362306a36Sopenharmony_ci					int level)
50462306a36Sopenharmony_ci{
50562306a36Sopenharmony_ci	switch(level) {
50662306a36Sopenharmony_ci	case DLM_LOCK_EX:
50762306a36Sopenharmony_ci		BUG_ON(!lockres->l_ex_holders);
50862306a36Sopenharmony_ci		lockres->l_ex_holders--;
50962306a36Sopenharmony_ci		break;
51062306a36Sopenharmony_ci	case DLM_LOCK_PR:
51162306a36Sopenharmony_ci		BUG_ON(!lockres->l_ro_holders);
51262306a36Sopenharmony_ci		lockres->l_ro_holders--;
51362306a36Sopenharmony_ci		break;
51462306a36Sopenharmony_ci	default:
51562306a36Sopenharmony_ci		BUG();
51662306a36Sopenharmony_ci	}
51762306a36Sopenharmony_ci}
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_civoid user_dlm_cluster_unlock(struct user_lock_res *lockres,
52062306a36Sopenharmony_ci			     int level)
52162306a36Sopenharmony_ci{
52262306a36Sopenharmony_ci	if (level != DLM_LOCK_EX &&
52362306a36Sopenharmony_ci	    level != DLM_LOCK_PR) {
52462306a36Sopenharmony_ci		mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
52562306a36Sopenharmony_ci		     lockres->l_namelen, lockres->l_name);
52662306a36Sopenharmony_ci		return;
52762306a36Sopenharmony_ci	}
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	spin_lock(&lockres->l_lock);
53062306a36Sopenharmony_ci	user_dlm_dec_holders(lockres, level);
53162306a36Sopenharmony_ci	__user_dlm_cond_queue_lockres(lockres);
53262306a36Sopenharmony_ci	spin_unlock(&lockres->l_lock);
53362306a36Sopenharmony_ci}
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_civoid user_dlm_write_lvb(struct inode *inode,
53662306a36Sopenharmony_ci			const char *val,
53762306a36Sopenharmony_ci			unsigned int len)
53862306a36Sopenharmony_ci{
53962306a36Sopenharmony_ci	struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres;
54062306a36Sopenharmony_ci	char *lvb;
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	BUG_ON(len > DLM_LVB_LEN);
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci	spin_lock(&lockres->l_lock);
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci	BUG_ON(lockres->l_level < DLM_LOCK_EX);
54762306a36Sopenharmony_ci	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
54862306a36Sopenharmony_ci	memcpy(lvb, val, len);
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci	spin_unlock(&lockres->l_lock);
55162306a36Sopenharmony_ci}
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_cibool user_dlm_read_lvb(struct inode *inode, char *val)
55462306a36Sopenharmony_ci{
55562306a36Sopenharmony_ci	struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres;
55662306a36Sopenharmony_ci	char *lvb;
55762306a36Sopenharmony_ci	bool ret = true;
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci	spin_lock(&lockres->l_lock);
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci	BUG_ON(lockres->l_level < DLM_LOCK_PR);
56262306a36Sopenharmony_ci	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)) {
56362306a36Sopenharmony_ci		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
56462306a36Sopenharmony_ci		memcpy(val, lvb, DLM_LVB_LEN);
56562306a36Sopenharmony_ci	} else
56662306a36Sopenharmony_ci		ret = false;
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci	spin_unlock(&lockres->l_lock);
56962306a36Sopenharmony_ci	return ret;
57062306a36Sopenharmony_ci}
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_civoid user_dlm_lock_res_init(struct user_lock_res *lockres,
57362306a36Sopenharmony_ci			    struct dentry *dentry)
57462306a36Sopenharmony_ci{
57562306a36Sopenharmony_ci	memset(lockres, 0, sizeof(*lockres));
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci	spin_lock_init(&lockres->l_lock);
57862306a36Sopenharmony_ci	init_waitqueue_head(&lockres->l_event);
57962306a36Sopenharmony_ci	lockres->l_level = DLM_LOCK_IV;
58062306a36Sopenharmony_ci	lockres->l_requested = DLM_LOCK_IV;
58162306a36Sopenharmony_ci	lockres->l_blocking = DLM_LOCK_IV;
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	/* should have been checked before getting here. */
58462306a36Sopenharmony_ci	BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN);
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci	memcpy(lockres->l_name,
58762306a36Sopenharmony_ci	       dentry->d_name.name,
58862306a36Sopenharmony_ci	       dentry->d_name.len);
58962306a36Sopenharmony_ci	lockres->l_namelen = dentry->d_name.len;
59062306a36Sopenharmony_ci}
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ciint user_dlm_destroy_lock(struct user_lock_res *lockres)
59362306a36Sopenharmony_ci{
59462306a36Sopenharmony_ci	int status = -EBUSY;
59562306a36Sopenharmony_ci	struct ocfs2_cluster_connection *conn =
59662306a36Sopenharmony_ci		cluster_connection_from_user_lockres(lockres);
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_ci	mlog(ML_BASTS, "lockres %.*s\n", lockres->l_namelen, lockres->l_name);
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	spin_lock(&lockres->l_lock);
60162306a36Sopenharmony_ci	if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
60262306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
60362306a36Sopenharmony_ci		goto bail;
60462306a36Sopenharmony_ci	}
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_ci	lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_ci	while (lockres->l_flags & USER_LOCK_BUSY) {
60962306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci		user_wait_on_busy_lock(lockres);
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci		spin_lock(&lockres->l_lock);
61462306a36Sopenharmony_ci	}
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_ci	if (lockres->l_ro_holders || lockres->l_ex_holders) {
61762306a36Sopenharmony_ci		lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN;
61862306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
61962306a36Sopenharmony_ci		goto bail;
62062306a36Sopenharmony_ci	}
62162306a36Sopenharmony_ci
62262306a36Sopenharmony_ci	status = 0;
62362306a36Sopenharmony_ci	if (!(lockres->l_flags & USER_LOCK_ATTACHED)) {
62462306a36Sopenharmony_ci		/*
62562306a36Sopenharmony_ci		 * lock is never requested, leave USER_LOCK_IN_TEARDOWN set
62662306a36Sopenharmony_ci		 * to avoid new lock request coming in.
62762306a36Sopenharmony_ci		 */
62862306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
62962306a36Sopenharmony_ci		goto bail;
63062306a36Sopenharmony_ci	}
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci	lockres->l_flags |= USER_LOCK_BUSY;
63362306a36Sopenharmony_ci	spin_unlock(&lockres->l_lock);
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci	status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK);
63662306a36Sopenharmony_ci	if (status) {
63762306a36Sopenharmony_ci		spin_lock(&lockres->l_lock);
63862306a36Sopenharmony_ci		lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN;
63962306a36Sopenharmony_ci		lockres->l_flags &= ~USER_LOCK_BUSY;
64062306a36Sopenharmony_ci		spin_unlock(&lockres->l_lock);
64162306a36Sopenharmony_ci		user_log_dlm_error("ocfs2_dlm_unlock", status, lockres);
64262306a36Sopenharmony_ci		goto bail;
64362306a36Sopenharmony_ci	}
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	user_wait_on_busy_lock(lockres);
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci	status = 0;
64862306a36Sopenharmony_cibail:
64962306a36Sopenharmony_ci	return status;
65062306a36Sopenharmony_ci}
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_cistatic void user_dlm_recovery_handler_noop(int node_num,
65362306a36Sopenharmony_ci					   void *recovery_data)
65462306a36Sopenharmony_ci{
65562306a36Sopenharmony_ci	/* We ignore recovery events */
65662306a36Sopenharmony_ci	return;
65762306a36Sopenharmony_ci}
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_civoid user_dlm_set_locking_protocol(void)
66062306a36Sopenharmony_ci{
66162306a36Sopenharmony_ci	ocfs2_stack_glue_set_max_proto_version(&user_dlm_lproto.lp_max_version);
66262306a36Sopenharmony_ci}
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_cistruct ocfs2_cluster_connection *user_dlm_register(const struct qstr *name)
66562306a36Sopenharmony_ci{
66662306a36Sopenharmony_ci	int rc;
66762306a36Sopenharmony_ci	struct ocfs2_cluster_connection *conn;
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci	rc = ocfs2_cluster_connect_agnostic(name->name, name->len,
67062306a36Sopenharmony_ci					    &user_dlm_lproto,
67162306a36Sopenharmony_ci					    user_dlm_recovery_handler_noop,
67262306a36Sopenharmony_ci					    NULL, &conn);
67362306a36Sopenharmony_ci	if (rc)
67462306a36Sopenharmony_ci		mlog_errno(rc);
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci	return rc ? ERR_PTR(rc) : conn;
67762306a36Sopenharmony_ci}
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_civoid user_dlm_unregister(struct ocfs2_cluster_connection *conn)
68062306a36Sopenharmony_ci{
68162306a36Sopenharmony_ci	ocfs2_cluster_disconnect(conn, 0);
68262306a36Sopenharmony_ci}
683