18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*-
38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0:
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * dlmglue.c
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Code which implements an OCFS2 specific interface to our DLM.
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/types.h>
138c2ecf20Sopenharmony_ci#include <linux/slab.h>
148c2ecf20Sopenharmony_ci#include <linux/highmem.h>
158c2ecf20Sopenharmony_ci#include <linux/mm.h>
168c2ecf20Sopenharmony_ci#include <linux/kthread.h>
178c2ecf20Sopenharmony_ci#include <linux/pagemap.h>
188c2ecf20Sopenharmony_ci#include <linux/debugfs.h>
198c2ecf20Sopenharmony_ci#include <linux/seq_file.h>
208c2ecf20Sopenharmony_ci#include <linux/time.h>
218c2ecf20Sopenharmony_ci#include <linux/quotaops.h>
228c2ecf20Sopenharmony_ci#include <linux/sched/signal.h>
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci#define MLOG_MASK_PREFIX ML_DLM_GLUE
258c2ecf20Sopenharmony_ci#include <cluster/masklog.h>
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci#include "ocfs2.h"
288c2ecf20Sopenharmony_ci#include "ocfs2_lockingver.h"
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci#include "alloc.h"
318c2ecf20Sopenharmony_ci#include "dcache.h"
328c2ecf20Sopenharmony_ci#include "dlmglue.h"
338c2ecf20Sopenharmony_ci#include "extent_map.h"
348c2ecf20Sopenharmony_ci#include "file.h"
358c2ecf20Sopenharmony_ci#include "heartbeat.h"
368c2ecf20Sopenharmony_ci#include "inode.h"
378c2ecf20Sopenharmony_ci#include "journal.h"
388c2ecf20Sopenharmony_ci#include "stackglue.h"
398c2ecf20Sopenharmony_ci#include "slot_map.h"
408c2ecf20Sopenharmony_ci#include "super.h"
418c2ecf20Sopenharmony_ci#include "uptodate.h"
428c2ecf20Sopenharmony_ci#include "quota.h"
438c2ecf20Sopenharmony_ci#include "refcounttree.h"
448c2ecf20Sopenharmony_ci#include "acl.h"
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci#include "buffer_head_io.h"
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_cistruct ocfs2_mask_waiter {
498c2ecf20Sopenharmony_ci	struct list_head	mw_item;
508c2ecf20Sopenharmony_ci	int			mw_status;
518c2ecf20Sopenharmony_ci	struct completion	mw_complete;
528c2ecf20Sopenharmony_ci	unsigned long		mw_mask;
538c2ecf20Sopenharmony_ci	unsigned long		mw_goal;
548c2ecf20Sopenharmony_ci#ifdef CONFIG_OCFS2_FS_STATS
558c2ecf20Sopenharmony_ci	ktime_t			mw_lock_start;
568c2ecf20Sopenharmony_ci#endif
578c2ecf20Sopenharmony_ci};
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_cistatic struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
608c2ecf20Sopenharmony_cistatic struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
618c2ecf20Sopenharmony_cistatic struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
628c2ecf20Sopenharmony_cistatic struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci/*
658c2ecf20Sopenharmony_ci * Return value from ->downconvert_worker functions.
668c2ecf20Sopenharmony_ci *
678c2ecf20Sopenharmony_ci * These control the precise actions of ocfs2_unblock_lock()
688c2ecf20Sopenharmony_ci * and ocfs2_process_blocked_lock()
698c2ecf20Sopenharmony_ci *
708c2ecf20Sopenharmony_ci */
718c2ecf20Sopenharmony_cienum ocfs2_unblock_action {
728c2ecf20Sopenharmony_ci	UNBLOCK_CONTINUE	= 0, /* Continue downconvert */
738c2ecf20Sopenharmony_ci	UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire
748c2ecf20Sopenharmony_ci				      * ->post_unlock callback */
758c2ecf20Sopenharmony_ci	UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire
768c2ecf20Sopenharmony_ci				      * ->post_unlock() callback. */
778c2ecf20Sopenharmony_ci};
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_cistruct ocfs2_unblock_ctl {
808c2ecf20Sopenharmony_ci	int requeue;
818c2ecf20Sopenharmony_ci	enum ocfs2_unblock_action unblock_action;
828c2ecf20Sopenharmony_ci};
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci/* Lockdep class keys */
858c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_LOCK_ALLOC
868c2ecf20Sopenharmony_cistatic struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
878c2ecf20Sopenharmony_ci#endif
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_cistatic int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
908c2ecf20Sopenharmony_ci					int new_level);
918c2ecf20Sopenharmony_cistatic void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_cistatic int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
948c2ecf20Sopenharmony_ci				     int blocking);
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_cistatic int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
978c2ecf20Sopenharmony_ci				       int blocking);
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_cistatic void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
1008c2ecf20Sopenharmony_ci				     struct ocfs2_lock_res *lockres);
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_cistatic void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_cistatic int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
1058c2ecf20Sopenharmony_ci					    int new_level);
1068c2ecf20Sopenharmony_cistatic int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
1078c2ecf20Sopenharmony_ci					 int blocking);
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci/* This aids in debugging situations where a bad LVB might be involved. */
1128c2ecf20Sopenharmony_cistatic void ocfs2_dump_meta_lvb_info(u64 level,
1138c2ecf20Sopenharmony_ci				     const char *function,
1148c2ecf20Sopenharmony_ci				     unsigned int line,
1158c2ecf20Sopenharmony_ci				     struct ocfs2_lock_res *lockres)
1168c2ecf20Sopenharmony_ci{
1178c2ecf20Sopenharmony_ci	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci	mlog(level, "LVB information for %s (called from %s:%u):\n",
1208c2ecf20Sopenharmony_ci	     lockres->l_name, function, line);
1218c2ecf20Sopenharmony_ci	mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
1228c2ecf20Sopenharmony_ci	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
1238c2ecf20Sopenharmony_ci	     be32_to_cpu(lvb->lvb_igeneration));
1248c2ecf20Sopenharmony_ci	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
1258c2ecf20Sopenharmony_ci	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
1268c2ecf20Sopenharmony_ci	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
1278c2ecf20Sopenharmony_ci	     be16_to_cpu(lvb->lvb_imode));
1288c2ecf20Sopenharmony_ci	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
1298c2ecf20Sopenharmony_ci	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
1308c2ecf20Sopenharmony_ci	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
1318c2ecf20Sopenharmony_ci	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
1328c2ecf20Sopenharmony_ci	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
1338c2ecf20Sopenharmony_ci	     be32_to_cpu(lvb->lvb_iattr));
1348c2ecf20Sopenharmony_ci}
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci/*
1388c2ecf20Sopenharmony_ci * OCFS2 Lock Resource Operations
1398c2ecf20Sopenharmony_ci *
1408c2ecf20Sopenharmony_ci * These fine tune the behavior of the generic dlmglue locking infrastructure.
1418c2ecf20Sopenharmony_ci *
1428c2ecf20Sopenharmony_ci * The most basic of lock types can point ->l_priv to their respective
1438c2ecf20Sopenharmony_ci * struct ocfs2_super and allow the default actions to manage things.
1448c2ecf20Sopenharmony_ci *
1458c2ecf20Sopenharmony_ci * Right now, each lock type also needs to implement an init function,
1468c2ecf20Sopenharmony_ci * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
1478c2ecf20Sopenharmony_ci * should be called when the lock is no longer needed (i.e., object
1488c2ecf20Sopenharmony_ci * destruction time).
1498c2ecf20Sopenharmony_ci */
1508c2ecf20Sopenharmony_cistruct ocfs2_lock_res_ops {
1518c2ecf20Sopenharmony_ci	/*
1528c2ecf20Sopenharmony_ci	 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
1538c2ecf20Sopenharmony_ci	 * this callback if ->l_priv is not an ocfs2_super pointer
1548c2ecf20Sopenharmony_ci	 */
1558c2ecf20Sopenharmony_ci	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	/*
1588c2ecf20Sopenharmony_ci	 * Optionally called in the downconvert thread after a
1598c2ecf20Sopenharmony_ci	 * successful downconvert. The lockres will not be referenced
1608c2ecf20Sopenharmony_ci	 * after this callback is called, so it is safe to free
1618c2ecf20Sopenharmony_ci	 * memory, etc.
1628c2ecf20Sopenharmony_ci	 *
1638c2ecf20Sopenharmony_ci	 * The exact semantics of when this is called are controlled
1648c2ecf20Sopenharmony_ci	 * by ->downconvert_worker()
1658c2ecf20Sopenharmony_ci	 */
1668c2ecf20Sopenharmony_ci	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	/*
1698c2ecf20Sopenharmony_ci	 * Allow a lock type to add checks to determine whether it is
1708c2ecf20Sopenharmony_ci	 * safe to downconvert a lock. Return 0 to re-queue the
1718c2ecf20Sopenharmony_ci	 * downconvert at a later time, nonzero to continue.
1728c2ecf20Sopenharmony_ci	 *
1738c2ecf20Sopenharmony_ci	 * For most locks, the default checks that there are no
1748c2ecf20Sopenharmony_ci	 * incompatible holders are sufficient.
1758c2ecf20Sopenharmony_ci	 *
1768c2ecf20Sopenharmony_ci	 * Called with the lockres spinlock held.
1778c2ecf20Sopenharmony_ci	 */
1788c2ecf20Sopenharmony_ci	int (*check_downconvert)(struct ocfs2_lock_res *, int);
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	/*
1818c2ecf20Sopenharmony_ci	 * Allows a lock type to populate the lock value block. This
1828c2ecf20Sopenharmony_ci	 * is called on downconvert, and when we drop a lock.
1838c2ecf20Sopenharmony_ci	 *
1848c2ecf20Sopenharmony_ci	 * Locks that want to use this should set LOCK_TYPE_USES_LVB
1858c2ecf20Sopenharmony_ci	 * in the flags field.
1868c2ecf20Sopenharmony_ci	 *
1878c2ecf20Sopenharmony_ci	 * Called with the lockres spinlock held.
1888c2ecf20Sopenharmony_ci	 */
1898c2ecf20Sopenharmony_ci	void (*set_lvb)(struct ocfs2_lock_res *);
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	/*
1928c2ecf20Sopenharmony_ci	 * Called from the downconvert thread when it is determined
1938c2ecf20Sopenharmony_ci	 * that a lock will be downconverted. This is called without
1948c2ecf20Sopenharmony_ci	 * any locks held so the function can do work that might
1958c2ecf20Sopenharmony_ci	 * schedule (syncing out data, etc).
1968c2ecf20Sopenharmony_ci	 *
1978c2ecf20Sopenharmony_ci	 * This should return any one of the ocfs2_unblock_action
1988c2ecf20Sopenharmony_ci	 * values, depending on what it wants the thread to do.
1998c2ecf20Sopenharmony_ci	 */
2008c2ecf20Sopenharmony_ci	int (*downconvert_worker)(struct ocfs2_lock_res *, int);
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	/*
2038c2ecf20Sopenharmony_ci	 * LOCK_TYPE_* flags which describe the specific requirements
2048c2ecf20Sopenharmony_ci	 * of a lock type. Descriptions of each individual flag follow.
2058c2ecf20Sopenharmony_ci	 */
2068c2ecf20Sopenharmony_ci	int flags;
2078c2ecf20Sopenharmony_ci};
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci/*
2108c2ecf20Sopenharmony_ci * Some locks want to "refresh" potentially stale data when a
2118c2ecf20Sopenharmony_ci * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
2128c2ecf20Sopenharmony_ci * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
2138c2ecf20Sopenharmony_ci * individual lockres l_flags member from the ast function. It is
2148c2ecf20Sopenharmony_ci * expected that the locking wrapper will clear the
2158c2ecf20Sopenharmony_ci * OCFS2_LOCK_NEEDS_REFRESH flag when done.
2168c2ecf20Sopenharmony_ci */
2178c2ecf20Sopenharmony_ci#define LOCK_TYPE_REQUIRES_REFRESH 0x1
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci/*
2208c2ecf20Sopenharmony_ci * Indicate that a lock type makes use of the lock value block. The
2218c2ecf20Sopenharmony_ci * ->set_lvb lock type callback must be defined.
2228c2ecf20Sopenharmony_ci */
2238c2ecf20Sopenharmony_ci#define LOCK_TYPE_USES_LVB		0x2
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
2268c2ecf20Sopenharmony_ci	.get_osb	= ocfs2_get_inode_osb,
2278c2ecf20Sopenharmony_ci	.flags		= 0,
2288c2ecf20Sopenharmony_ci};
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
2318c2ecf20Sopenharmony_ci	.get_osb	= ocfs2_get_inode_osb,
2328c2ecf20Sopenharmony_ci	.check_downconvert = ocfs2_check_meta_downconvert,
2338c2ecf20Sopenharmony_ci	.set_lvb	= ocfs2_set_meta_lvb,
2348c2ecf20Sopenharmony_ci	.downconvert_worker = ocfs2_data_convert_worker,
2358c2ecf20Sopenharmony_ci	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
2368c2ecf20Sopenharmony_ci};
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_super_lops = {
2398c2ecf20Sopenharmony_ci	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
2408c2ecf20Sopenharmony_ci};
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_rename_lops = {
2438c2ecf20Sopenharmony_ci	.flags		= 0,
2448c2ecf20Sopenharmony_ci};
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
2478c2ecf20Sopenharmony_ci	.flags		= 0,
2488c2ecf20Sopenharmony_ci};
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
2518c2ecf20Sopenharmony_ci	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
2528c2ecf20Sopenharmony_ci};
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
2558c2ecf20Sopenharmony_ci	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
2568c2ecf20Sopenharmony_ci};
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
2598c2ecf20Sopenharmony_ci	.get_osb	= ocfs2_get_dentry_osb,
2608c2ecf20Sopenharmony_ci	.post_unlock	= ocfs2_dentry_post_unlock,
2618c2ecf20Sopenharmony_ci	.downconvert_worker = ocfs2_dentry_convert_worker,
2628c2ecf20Sopenharmony_ci	.flags		= 0,
2638c2ecf20Sopenharmony_ci};
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
2668c2ecf20Sopenharmony_ci	.get_osb	= ocfs2_get_inode_osb,
2678c2ecf20Sopenharmony_ci	.flags		= 0,
2688c2ecf20Sopenharmony_ci};
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_flock_lops = {
2718c2ecf20Sopenharmony_ci	.get_osb	= ocfs2_get_file_osb,
2728c2ecf20Sopenharmony_ci	.flags		= 0,
2738c2ecf20Sopenharmony_ci};
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
2768c2ecf20Sopenharmony_ci	.set_lvb	= ocfs2_set_qinfo_lvb,
2778c2ecf20Sopenharmony_ci	.get_osb	= ocfs2_get_qinfo_osb,
2788c2ecf20Sopenharmony_ci	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
2798c2ecf20Sopenharmony_ci};
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
2828c2ecf20Sopenharmony_ci	.check_downconvert = ocfs2_check_refcount_downconvert,
2838c2ecf20Sopenharmony_ci	.downconvert_worker = ocfs2_refcount_convert_worker,
2848c2ecf20Sopenharmony_ci	.flags		= 0,
2858c2ecf20Sopenharmony_ci};
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_cistatic inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
2888c2ecf20Sopenharmony_ci{
2898c2ecf20Sopenharmony_ci	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
2908c2ecf20Sopenharmony_ci		lockres->l_type == OCFS2_LOCK_TYPE_RW ||
2918c2ecf20Sopenharmony_ci		lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
2928c2ecf20Sopenharmony_ci}
2938c2ecf20Sopenharmony_ci
2948c2ecf20Sopenharmony_cistatic inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
2958c2ecf20Sopenharmony_ci{
2968c2ecf20Sopenharmony_ci	return container_of(lksb, struct ocfs2_lock_res, l_lksb);
2978c2ecf20Sopenharmony_ci}
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_cistatic inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
3008c2ecf20Sopenharmony_ci{
3018c2ecf20Sopenharmony_ci	BUG_ON(!ocfs2_is_inode_lock(lockres));
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci	return (struct inode *) lockres->l_priv;
3048c2ecf20Sopenharmony_ci}
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_cistatic inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
3078c2ecf20Sopenharmony_ci{
3088c2ecf20Sopenharmony_ci	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	return (struct ocfs2_dentry_lock *)lockres->l_priv;
3118c2ecf20Sopenharmony_ci}
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_cistatic inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
3148c2ecf20Sopenharmony_ci{
3158c2ecf20Sopenharmony_ci	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
3168c2ecf20Sopenharmony_ci
3178c2ecf20Sopenharmony_ci	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
3188c2ecf20Sopenharmony_ci}
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_cistatic inline struct ocfs2_refcount_tree *
3218c2ecf20Sopenharmony_ciocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
3228c2ecf20Sopenharmony_ci{
3238c2ecf20Sopenharmony_ci	return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
3248c2ecf20Sopenharmony_ci}
3258c2ecf20Sopenharmony_ci
3268c2ecf20Sopenharmony_cistatic inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
3278c2ecf20Sopenharmony_ci{
3288c2ecf20Sopenharmony_ci	if (lockres->l_ops->get_osb)
3298c2ecf20Sopenharmony_ci		return lockres->l_ops->get_osb(lockres);
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	return (struct ocfs2_super *)lockres->l_priv;
3328c2ecf20Sopenharmony_ci}
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_cistatic int ocfs2_lock_create(struct ocfs2_super *osb,
3358c2ecf20Sopenharmony_ci			     struct ocfs2_lock_res *lockres,
3368c2ecf20Sopenharmony_ci			     int level,
3378c2ecf20Sopenharmony_ci			     u32 dlm_flags);
3388c2ecf20Sopenharmony_cistatic inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
3398c2ecf20Sopenharmony_ci						     int wanted);
3408c2ecf20Sopenharmony_cistatic void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
3418c2ecf20Sopenharmony_ci				   struct ocfs2_lock_res *lockres,
3428c2ecf20Sopenharmony_ci				   int level, unsigned long caller_ip);
3438c2ecf20Sopenharmony_cistatic inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
3448c2ecf20Sopenharmony_ci					struct ocfs2_lock_res *lockres,
3458c2ecf20Sopenharmony_ci					int level)
3468c2ecf20Sopenharmony_ci{
3478c2ecf20Sopenharmony_ci	__ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
3488c2ecf20Sopenharmony_ci}
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_cistatic inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
3518c2ecf20Sopenharmony_cistatic inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
3528c2ecf20Sopenharmony_cistatic inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
3538c2ecf20Sopenharmony_cistatic int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
3548c2ecf20Sopenharmony_cistatic void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3558c2ecf20Sopenharmony_ci					struct ocfs2_lock_res *lockres);
3568c2ecf20Sopenharmony_cistatic inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
3578c2ecf20Sopenharmony_ci						int convert);
3588c2ecf20Sopenharmony_ci#define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\
3598c2ecf20Sopenharmony_ci	if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\
3608c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\
3618c2ecf20Sopenharmony_ci		     _err, _func, _lockres->l_name);					\
3628c2ecf20Sopenharmony_ci	else										\
3638c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\
3648c2ecf20Sopenharmony_ci		     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\
3658c2ecf20Sopenharmony_ci		     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\
3668c2ecf20Sopenharmony_ci} while (0)
3678c2ecf20Sopenharmony_cistatic int ocfs2_downconvert_thread(void *arg);
3688c2ecf20Sopenharmony_cistatic void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
3698c2ecf20Sopenharmony_ci					struct ocfs2_lock_res *lockres);
3708c2ecf20Sopenharmony_cistatic int ocfs2_inode_lock_update(struct inode *inode,
3718c2ecf20Sopenharmony_ci				  struct buffer_head **bh);
3728c2ecf20Sopenharmony_cistatic void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
3738c2ecf20Sopenharmony_cistatic inline int ocfs2_highest_compat_lock_level(int level);
3748c2ecf20Sopenharmony_cistatic unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3758c2ecf20Sopenharmony_ci					      int new_level);
3768c2ecf20Sopenharmony_cistatic int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3778c2ecf20Sopenharmony_ci				  struct ocfs2_lock_res *lockres,
3788c2ecf20Sopenharmony_ci				  int new_level,
3798c2ecf20Sopenharmony_ci				  int lvb,
3808c2ecf20Sopenharmony_ci				  unsigned int generation);
3818c2ecf20Sopenharmony_cistatic int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3828c2ecf20Sopenharmony_ci				        struct ocfs2_lock_res *lockres);
3838c2ecf20Sopenharmony_cistatic int ocfs2_cancel_convert(struct ocfs2_super *osb,
3848c2ecf20Sopenharmony_ci				struct ocfs2_lock_res *lockres);
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_cistatic void ocfs2_build_lock_name(enum ocfs2_lock_type type,
3888c2ecf20Sopenharmony_ci				  u64 blkno,
3898c2ecf20Sopenharmony_ci				  u32 generation,
3908c2ecf20Sopenharmony_ci				  char *name)
3918c2ecf20Sopenharmony_ci{
3928c2ecf20Sopenharmony_ci	int len;
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
3958c2ecf20Sopenharmony_ci
3968c2ecf20Sopenharmony_ci	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
3978c2ecf20Sopenharmony_ci		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
3988c2ecf20Sopenharmony_ci		       (long long)blkno, generation);
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
4018c2ecf20Sopenharmony_ci
4028c2ecf20Sopenharmony_ci	mlog(0, "built lock resource with name: %s\n", name);
4038c2ecf20Sopenharmony_ci}
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
4068c2ecf20Sopenharmony_ci
4078c2ecf20Sopenharmony_cistatic void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
4088c2ecf20Sopenharmony_ci				       struct ocfs2_dlm_debug *dlm_debug)
4098c2ecf20Sopenharmony_ci{
4108c2ecf20Sopenharmony_ci	mlog(0, "Add tracking for lockres %s\n", res->l_name);
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ci	spin_lock(&ocfs2_dlm_tracking_lock);
4138c2ecf20Sopenharmony_ci	list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
4148c2ecf20Sopenharmony_ci	spin_unlock(&ocfs2_dlm_tracking_lock);
4158c2ecf20Sopenharmony_ci}
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_cistatic void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
4188c2ecf20Sopenharmony_ci{
4198c2ecf20Sopenharmony_ci	spin_lock(&ocfs2_dlm_tracking_lock);
4208c2ecf20Sopenharmony_ci	if (!list_empty(&res->l_debug_list))
4218c2ecf20Sopenharmony_ci		list_del_init(&res->l_debug_list);
4228c2ecf20Sopenharmony_ci	spin_unlock(&ocfs2_dlm_tracking_lock);
4238c2ecf20Sopenharmony_ci}
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci#ifdef CONFIG_OCFS2_FS_STATS
4268c2ecf20Sopenharmony_cistatic void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4278c2ecf20Sopenharmony_ci{
4288c2ecf20Sopenharmony_ci	res->l_lock_refresh = 0;
4298c2ecf20Sopenharmony_ci	res->l_lock_wait = 0;
4308c2ecf20Sopenharmony_ci	memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
4318c2ecf20Sopenharmony_ci	memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
4328c2ecf20Sopenharmony_ci}
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_cistatic void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
4358c2ecf20Sopenharmony_ci				    struct ocfs2_mask_waiter *mw, int ret)
4368c2ecf20Sopenharmony_ci{
4378c2ecf20Sopenharmony_ci	u32 usec;
4388c2ecf20Sopenharmony_ci	ktime_t kt;
4398c2ecf20Sopenharmony_ci	struct ocfs2_lock_stats *stats;
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci	if (level == LKM_PRMODE)
4428c2ecf20Sopenharmony_ci		stats = &res->l_lock_prmode;
4438c2ecf20Sopenharmony_ci	else if (level == LKM_EXMODE)
4448c2ecf20Sopenharmony_ci		stats = &res->l_lock_exmode;
4458c2ecf20Sopenharmony_ci	else
4468c2ecf20Sopenharmony_ci		return;
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci	kt = ktime_sub(ktime_get(), mw->mw_lock_start);
4498c2ecf20Sopenharmony_ci	usec = ktime_to_us(kt);
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	stats->ls_gets++;
4528c2ecf20Sopenharmony_ci	stats->ls_total += ktime_to_ns(kt);
4538c2ecf20Sopenharmony_ci	/* overflow */
4548c2ecf20Sopenharmony_ci	if (unlikely(stats->ls_gets == 0)) {
4558c2ecf20Sopenharmony_ci		stats->ls_gets++;
4568c2ecf20Sopenharmony_ci		stats->ls_total = ktime_to_ns(kt);
4578c2ecf20Sopenharmony_ci	}
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci	if (stats->ls_max < usec)
4608c2ecf20Sopenharmony_ci		stats->ls_max = usec;
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci	if (ret)
4638c2ecf20Sopenharmony_ci		stats->ls_fail++;
4648c2ecf20Sopenharmony_ci
4658c2ecf20Sopenharmony_ci	stats->ls_last = ktime_to_us(ktime_get_real());
4668c2ecf20Sopenharmony_ci}
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_cistatic inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4698c2ecf20Sopenharmony_ci{
4708c2ecf20Sopenharmony_ci	lockres->l_lock_refresh++;
4718c2ecf20Sopenharmony_ci}
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_cistatic inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres)
4748c2ecf20Sopenharmony_ci{
4758c2ecf20Sopenharmony_ci	struct ocfs2_mask_waiter *mw;
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci	if (list_empty(&lockres->l_mask_waiters)) {
4788c2ecf20Sopenharmony_ci		lockres->l_lock_wait = 0;
4798c2ecf20Sopenharmony_ci		return;
4808c2ecf20Sopenharmony_ci	}
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_ci	mw = list_first_entry(&lockres->l_mask_waiters,
4838c2ecf20Sopenharmony_ci				struct ocfs2_mask_waiter, mw_item);
4848c2ecf20Sopenharmony_ci	lockres->l_lock_wait =
4858c2ecf20Sopenharmony_ci			ktime_to_us(ktime_mono_to_real(mw->mw_lock_start));
4868c2ecf20Sopenharmony_ci}
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_cistatic inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4898c2ecf20Sopenharmony_ci{
4908c2ecf20Sopenharmony_ci	mw->mw_lock_start = ktime_get();
4918c2ecf20Sopenharmony_ci}
4928c2ecf20Sopenharmony_ci#else
4938c2ecf20Sopenharmony_cistatic inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4948c2ecf20Sopenharmony_ci{
4958c2ecf20Sopenharmony_ci}
4968c2ecf20Sopenharmony_cistatic inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
4978c2ecf20Sopenharmony_ci			   int level, struct ocfs2_mask_waiter *mw, int ret)
4988c2ecf20Sopenharmony_ci{
4998c2ecf20Sopenharmony_ci}
5008c2ecf20Sopenharmony_cistatic inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
5018c2ecf20Sopenharmony_ci{
5028c2ecf20Sopenharmony_ci}
5038c2ecf20Sopenharmony_cistatic inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres)
5048c2ecf20Sopenharmony_ci{
5058c2ecf20Sopenharmony_ci}
5068c2ecf20Sopenharmony_cistatic inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
5078c2ecf20Sopenharmony_ci{
5088c2ecf20Sopenharmony_ci}
5098c2ecf20Sopenharmony_ci#endif
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_cistatic void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
5128c2ecf20Sopenharmony_ci				       struct ocfs2_lock_res *res,
5138c2ecf20Sopenharmony_ci				       enum ocfs2_lock_type type,
5148c2ecf20Sopenharmony_ci				       struct ocfs2_lock_res_ops *ops,
5158c2ecf20Sopenharmony_ci				       void *priv)
5168c2ecf20Sopenharmony_ci{
5178c2ecf20Sopenharmony_ci	res->l_type          = type;
5188c2ecf20Sopenharmony_ci	res->l_ops           = ops;
5198c2ecf20Sopenharmony_ci	res->l_priv          = priv;
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	res->l_level         = DLM_LOCK_IV;
5228c2ecf20Sopenharmony_ci	res->l_requested     = DLM_LOCK_IV;
5238c2ecf20Sopenharmony_ci	res->l_blocking      = DLM_LOCK_IV;
5248c2ecf20Sopenharmony_ci	res->l_action        = OCFS2_AST_INVALID;
5258c2ecf20Sopenharmony_ci	res->l_unlock_action = OCFS2_UNLOCK_INVALID;
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	res->l_flags         = OCFS2_LOCK_INITIALIZED;
5288c2ecf20Sopenharmony_ci
5298c2ecf20Sopenharmony_ci	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci	ocfs2_init_lock_stats(res);
5328c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_LOCK_ALLOC
5338c2ecf20Sopenharmony_ci	if (type != OCFS2_LOCK_TYPE_OPEN)
5348c2ecf20Sopenharmony_ci		lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
5358c2ecf20Sopenharmony_ci				 &lockdep_keys[type], 0);
5368c2ecf20Sopenharmony_ci	else
5378c2ecf20Sopenharmony_ci		res->l_lockdep_map.key = NULL;
5388c2ecf20Sopenharmony_ci#endif
5398c2ecf20Sopenharmony_ci}
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_civoid ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
5428c2ecf20Sopenharmony_ci{
5438c2ecf20Sopenharmony_ci	/* This also clears out the lock status block */
5448c2ecf20Sopenharmony_ci	memset(res, 0, sizeof(struct ocfs2_lock_res));
5458c2ecf20Sopenharmony_ci	spin_lock_init(&res->l_lock);
5468c2ecf20Sopenharmony_ci	init_waitqueue_head(&res->l_event);
5478c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&res->l_blocked_list);
5488c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&res->l_mask_waiters);
5498c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&res->l_holders);
5508c2ecf20Sopenharmony_ci}
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_civoid ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
5538c2ecf20Sopenharmony_ci			       enum ocfs2_lock_type type,
5548c2ecf20Sopenharmony_ci			       unsigned int generation,
5558c2ecf20Sopenharmony_ci			       struct inode *inode)
5568c2ecf20Sopenharmony_ci{
5578c2ecf20Sopenharmony_ci	struct ocfs2_lock_res_ops *ops;
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci	switch(type) {
5608c2ecf20Sopenharmony_ci		case OCFS2_LOCK_TYPE_RW:
5618c2ecf20Sopenharmony_ci			ops = &ocfs2_inode_rw_lops;
5628c2ecf20Sopenharmony_ci			break;
5638c2ecf20Sopenharmony_ci		case OCFS2_LOCK_TYPE_META:
5648c2ecf20Sopenharmony_ci			ops = &ocfs2_inode_inode_lops;
5658c2ecf20Sopenharmony_ci			break;
5668c2ecf20Sopenharmony_ci		case OCFS2_LOCK_TYPE_OPEN:
5678c2ecf20Sopenharmony_ci			ops = &ocfs2_inode_open_lops;
5688c2ecf20Sopenharmony_ci			break;
5698c2ecf20Sopenharmony_ci		default:
5708c2ecf20Sopenharmony_ci			mlog_bug_on_msg(1, "type: %d\n", type);
5718c2ecf20Sopenharmony_ci			ops = NULL; /* thanks, gcc */
5728c2ecf20Sopenharmony_ci			break;
5738c2ecf20Sopenharmony_ci	}
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
5768c2ecf20Sopenharmony_ci			      generation, res->l_name);
5778c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
5788c2ecf20Sopenharmony_ci}
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_cistatic struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
5818c2ecf20Sopenharmony_ci{
5828c2ecf20Sopenharmony_ci	struct inode *inode = ocfs2_lock_res_inode(lockres);
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_ci	return OCFS2_SB(inode->i_sb);
5858c2ecf20Sopenharmony_ci}
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_cistatic struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
5888c2ecf20Sopenharmony_ci{
5898c2ecf20Sopenharmony_ci	struct ocfs2_mem_dqinfo *info = lockres->l_priv;
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci	return OCFS2_SB(info->dqi_gi.dqi_sb);
5928c2ecf20Sopenharmony_ci}
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_cistatic struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
5958c2ecf20Sopenharmony_ci{
5968c2ecf20Sopenharmony_ci	struct ocfs2_file_private *fp = lockres->l_priv;
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci	return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
5998c2ecf20Sopenharmony_ci}
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_cistatic __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
6028c2ecf20Sopenharmony_ci{
6038c2ecf20Sopenharmony_ci	__be64 inode_blkno_be;
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
6068c2ecf20Sopenharmony_ci	       sizeof(__be64));
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ci	return be64_to_cpu(inode_blkno_be);
6098c2ecf20Sopenharmony_ci}
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_cistatic struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
6128c2ecf20Sopenharmony_ci{
6138c2ecf20Sopenharmony_ci	struct ocfs2_dentry_lock *dl = lockres->l_priv;
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_ci	return OCFS2_SB(dl->dl_inode->i_sb);
6168c2ecf20Sopenharmony_ci}
6178c2ecf20Sopenharmony_ci
6188c2ecf20Sopenharmony_civoid ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
6198c2ecf20Sopenharmony_ci				u64 parent, struct inode *inode)
6208c2ecf20Sopenharmony_ci{
6218c2ecf20Sopenharmony_ci	int len;
6228c2ecf20Sopenharmony_ci	u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
6238c2ecf20Sopenharmony_ci	__be64 inode_blkno_be = cpu_to_be64(inode_blkno);
6248c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &dl->dl_lockres;
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_once(lockres);
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_ci	/*
6298c2ecf20Sopenharmony_ci	 * Unfortunately, the standard lock naming scheme won't work
6308c2ecf20Sopenharmony_ci	 * here because we have two 16 byte values to use. Instead,
6318c2ecf20Sopenharmony_ci	 * we'll stuff the inode number as a binary value. We still
6328c2ecf20Sopenharmony_ci	 * want error prints to show something without garbling the
6338c2ecf20Sopenharmony_ci	 * display, so drop a null byte in there before the inode
6348c2ecf20Sopenharmony_ci	 * number. A future version of OCFS2 will likely use all
6358c2ecf20Sopenharmony_ci	 * binary lock names. The stringified names have been a
6368c2ecf20Sopenharmony_ci	 * tremendous aid in debugging, but now that the debugfs
6378c2ecf20Sopenharmony_ci	 * interface exists, we can mangle things there if need be.
6388c2ecf20Sopenharmony_ci	 *
6398c2ecf20Sopenharmony_ci	 * NOTE: We also drop the standard "pad" value (the total lock
6408c2ecf20Sopenharmony_ci	 * name size stays the same though - the last part is all
6418c2ecf20Sopenharmony_ci	 * zeros due to the memset in ocfs2_lock_res_init_once()
6428c2ecf20Sopenharmony_ci	 */
6438c2ecf20Sopenharmony_ci	len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
6448c2ecf20Sopenharmony_ci		       "%c%016llx",
6458c2ecf20Sopenharmony_ci		       ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
6468c2ecf20Sopenharmony_ci		       (long long)parent);
6478c2ecf20Sopenharmony_ci
6488c2ecf20Sopenharmony_ci	BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
6498c2ecf20Sopenharmony_ci
6508c2ecf20Sopenharmony_ci	memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
6518c2ecf20Sopenharmony_ci	       sizeof(__be64));
6528c2ecf20Sopenharmony_ci
6538c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
6548c2ecf20Sopenharmony_ci				   OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
6558c2ecf20Sopenharmony_ci				   dl);
6568c2ecf20Sopenharmony_ci}
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_cistatic void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
6598c2ecf20Sopenharmony_ci				      struct ocfs2_super *osb)
6608c2ecf20Sopenharmony_ci{
6618c2ecf20Sopenharmony_ci	/* Superblock lockres doesn't come from a slab so we call init
6628c2ecf20Sopenharmony_ci	 * once on it manually.  */
6638c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_once(res);
6648c2ecf20Sopenharmony_ci	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
6658c2ecf20Sopenharmony_ci			      0, res->l_name);
6668c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
6678c2ecf20Sopenharmony_ci				   &ocfs2_super_lops, osb);
6688c2ecf20Sopenharmony_ci}
6698c2ecf20Sopenharmony_ci
6708c2ecf20Sopenharmony_cistatic void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
6718c2ecf20Sopenharmony_ci				       struct ocfs2_super *osb)
6728c2ecf20Sopenharmony_ci{
6738c2ecf20Sopenharmony_ci	/* Rename lockres doesn't come from a slab so we call init
6748c2ecf20Sopenharmony_ci	 * once on it manually.  */
6758c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_once(res);
6768c2ecf20Sopenharmony_ci	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
6778c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
6788c2ecf20Sopenharmony_ci				   &ocfs2_rename_lops, osb);
6798c2ecf20Sopenharmony_ci}
6808c2ecf20Sopenharmony_ci
6818c2ecf20Sopenharmony_cistatic void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
6828c2ecf20Sopenharmony_ci					 struct ocfs2_super *osb)
6838c2ecf20Sopenharmony_ci{
6848c2ecf20Sopenharmony_ci	/* nfs_sync lockres doesn't come from a slab so we call init
6858c2ecf20Sopenharmony_ci	 * once on it manually.  */
6868c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_once(res);
6878c2ecf20Sopenharmony_ci	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
6888c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
6898c2ecf20Sopenharmony_ci				   &ocfs2_nfs_sync_lops, osb);
6908c2ecf20Sopenharmony_ci}
6918c2ecf20Sopenharmony_ci
6928c2ecf20Sopenharmony_cistatic void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb)
6938c2ecf20Sopenharmony_ci{
6948c2ecf20Sopenharmony_ci	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
6958c2ecf20Sopenharmony_ci	init_rwsem(&osb->nfs_sync_rwlock);
6968c2ecf20Sopenharmony_ci}
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_civoid ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
6998c2ecf20Sopenharmony_ci{
7008c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
7018c2ecf20Sopenharmony_ci
7028c2ecf20Sopenharmony_ci	/* Only one trimfs thread are allowed to work at the same time. */
7038c2ecf20Sopenharmony_ci	mutex_lock(&osb->obs_trim_fs_mutex);
7048c2ecf20Sopenharmony_ci
7058c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_once(lockres);
7068c2ecf20Sopenharmony_ci	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_TRIM_FS, 0, 0, lockres->l_name);
7078c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_TRIM_FS,
7088c2ecf20Sopenharmony_ci				   &ocfs2_trim_fs_lops, osb);
7098c2ecf20Sopenharmony_ci}
7108c2ecf20Sopenharmony_ci
7118c2ecf20Sopenharmony_civoid ocfs2_trim_fs_lock_res_uninit(struct ocfs2_super *osb)
7128c2ecf20Sopenharmony_ci{
7138c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
7148c2ecf20Sopenharmony_ci
7158c2ecf20Sopenharmony_ci	ocfs2_simple_drop_lockres(osb, lockres);
7168c2ecf20Sopenharmony_ci	ocfs2_lock_res_free(lockres);
7178c2ecf20Sopenharmony_ci
7188c2ecf20Sopenharmony_ci	mutex_unlock(&osb->obs_trim_fs_mutex);
7198c2ecf20Sopenharmony_ci}
7208c2ecf20Sopenharmony_ci
7218c2ecf20Sopenharmony_cistatic void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
7228c2ecf20Sopenharmony_ci					    struct ocfs2_super *osb)
7238c2ecf20Sopenharmony_ci{
7248c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_once(res);
7258c2ecf20Sopenharmony_ci	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
7268c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
7278c2ecf20Sopenharmony_ci				   &ocfs2_orphan_scan_lops, osb);
7288c2ecf20Sopenharmony_ci}
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_civoid ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
7318c2ecf20Sopenharmony_ci			      struct ocfs2_file_private *fp)
7328c2ecf20Sopenharmony_ci{
7338c2ecf20Sopenharmony_ci	struct inode *inode = fp->fp_file->f_mapping->host;
7348c2ecf20Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
7358c2ecf20Sopenharmony_ci
7368c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_once(lockres);
7378c2ecf20Sopenharmony_ci	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
7388c2ecf20Sopenharmony_ci			      inode->i_generation, lockres->l_name);
7398c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
7408c2ecf20Sopenharmony_ci				   OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
7418c2ecf20Sopenharmony_ci				   fp);
7428c2ecf20Sopenharmony_ci	lockres->l_flags |= OCFS2_LOCK_NOCACHE;
7438c2ecf20Sopenharmony_ci}
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_civoid ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
7468c2ecf20Sopenharmony_ci			       struct ocfs2_mem_dqinfo *info)
7478c2ecf20Sopenharmony_ci{
7488c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_once(lockres);
7498c2ecf20Sopenharmony_ci	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
7508c2ecf20Sopenharmony_ci			      0, lockres->l_name);
7518c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
7528c2ecf20Sopenharmony_ci				   OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
7538c2ecf20Sopenharmony_ci				   info);
7548c2ecf20Sopenharmony_ci}
7558c2ecf20Sopenharmony_ci
7568c2ecf20Sopenharmony_civoid ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
7578c2ecf20Sopenharmony_ci				  struct ocfs2_super *osb, u64 ref_blkno,
7588c2ecf20Sopenharmony_ci				  unsigned int generation)
7598c2ecf20Sopenharmony_ci{
7608c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_once(lockres);
7618c2ecf20Sopenharmony_ci	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
7628c2ecf20Sopenharmony_ci			      generation, lockres->l_name);
7638c2ecf20Sopenharmony_ci	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
7648c2ecf20Sopenharmony_ci				   &ocfs2_refcount_block_lops, osb);
7658c2ecf20Sopenharmony_ci}
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_civoid ocfs2_lock_res_free(struct ocfs2_lock_res *res)
7688c2ecf20Sopenharmony_ci{
7698c2ecf20Sopenharmony_ci	if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
7708c2ecf20Sopenharmony_ci		return;
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	ocfs2_remove_lockres_tracking(res);
7738c2ecf20Sopenharmony_ci
7748c2ecf20Sopenharmony_ci	mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
7758c2ecf20Sopenharmony_ci			"Lockres %s is on the blocked list\n",
7768c2ecf20Sopenharmony_ci			res->l_name);
7778c2ecf20Sopenharmony_ci	mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
7788c2ecf20Sopenharmony_ci			"Lockres %s has mask waiters pending\n",
7798c2ecf20Sopenharmony_ci			res->l_name);
7808c2ecf20Sopenharmony_ci	mlog_bug_on_msg(spin_is_locked(&res->l_lock),
7818c2ecf20Sopenharmony_ci			"Lockres %s is locked\n",
7828c2ecf20Sopenharmony_ci			res->l_name);
7838c2ecf20Sopenharmony_ci	mlog_bug_on_msg(res->l_ro_holders,
7848c2ecf20Sopenharmony_ci			"Lockres %s has %u ro holders\n",
7858c2ecf20Sopenharmony_ci			res->l_name, res->l_ro_holders);
7868c2ecf20Sopenharmony_ci	mlog_bug_on_msg(res->l_ex_holders,
7878c2ecf20Sopenharmony_ci			"Lockres %s has %u ex holders\n",
7888c2ecf20Sopenharmony_ci			res->l_name, res->l_ex_holders);
7898c2ecf20Sopenharmony_ci
7908c2ecf20Sopenharmony_ci	/* Need to clear out the lock status block for the dlm */
7918c2ecf20Sopenharmony_ci	memset(&res->l_lksb, 0, sizeof(res->l_lksb));
7928c2ecf20Sopenharmony_ci
7938c2ecf20Sopenharmony_ci	res->l_flags = 0UL;
7948c2ecf20Sopenharmony_ci}
7958c2ecf20Sopenharmony_ci
7968c2ecf20Sopenharmony_ci/*
7978c2ecf20Sopenharmony_ci * Keep a list of processes who have interest in a lockres.
7988c2ecf20Sopenharmony_ci * Note: this is now only uesed for check recursive cluster locking.
7998c2ecf20Sopenharmony_ci */
8008c2ecf20Sopenharmony_cistatic inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
8018c2ecf20Sopenharmony_ci				   struct ocfs2_lock_holder *oh)
8028c2ecf20Sopenharmony_ci{
8038c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&oh->oh_list);
8048c2ecf20Sopenharmony_ci	oh->oh_owner_pid = get_pid(task_pid(current));
8058c2ecf20Sopenharmony_ci
8068c2ecf20Sopenharmony_ci	spin_lock(&lockres->l_lock);
8078c2ecf20Sopenharmony_ci	list_add_tail(&oh->oh_list, &lockres->l_holders);
8088c2ecf20Sopenharmony_ci	spin_unlock(&lockres->l_lock);
8098c2ecf20Sopenharmony_ci}
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_cistatic struct ocfs2_lock_holder *
8128c2ecf20Sopenharmony_ciocfs2_pid_holder(struct ocfs2_lock_res *lockres,
8138c2ecf20Sopenharmony_ci		struct pid *pid)
8148c2ecf20Sopenharmony_ci{
8158c2ecf20Sopenharmony_ci	struct ocfs2_lock_holder *oh;
8168c2ecf20Sopenharmony_ci
8178c2ecf20Sopenharmony_ci	spin_lock(&lockres->l_lock);
8188c2ecf20Sopenharmony_ci	list_for_each_entry(oh, &lockres->l_holders, oh_list) {
8198c2ecf20Sopenharmony_ci		if (oh->oh_owner_pid == pid) {
8208c2ecf20Sopenharmony_ci			spin_unlock(&lockres->l_lock);
8218c2ecf20Sopenharmony_ci			return oh;
8228c2ecf20Sopenharmony_ci		}
8238c2ecf20Sopenharmony_ci	}
8248c2ecf20Sopenharmony_ci	spin_unlock(&lockres->l_lock);
8258c2ecf20Sopenharmony_ci	return NULL;
8268c2ecf20Sopenharmony_ci}
8278c2ecf20Sopenharmony_ci
8288c2ecf20Sopenharmony_cistatic inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
8298c2ecf20Sopenharmony_ci				       struct ocfs2_lock_holder *oh)
8308c2ecf20Sopenharmony_ci{
8318c2ecf20Sopenharmony_ci	spin_lock(&lockres->l_lock);
8328c2ecf20Sopenharmony_ci	list_del(&oh->oh_list);
8338c2ecf20Sopenharmony_ci	spin_unlock(&lockres->l_lock);
8348c2ecf20Sopenharmony_ci
8358c2ecf20Sopenharmony_ci	put_pid(oh->oh_owner_pid);
8368c2ecf20Sopenharmony_ci}
8378c2ecf20Sopenharmony_ci
8388c2ecf20Sopenharmony_ci
8398c2ecf20Sopenharmony_cistatic inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
8408c2ecf20Sopenharmony_ci				     int level)
8418c2ecf20Sopenharmony_ci{
8428c2ecf20Sopenharmony_ci	BUG_ON(!lockres);
8438c2ecf20Sopenharmony_ci
8448c2ecf20Sopenharmony_ci	switch(level) {
8458c2ecf20Sopenharmony_ci	case DLM_LOCK_EX:
8468c2ecf20Sopenharmony_ci		lockres->l_ex_holders++;
8478c2ecf20Sopenharmony_ci		break;
8488c2ecf20Sopenharmony_ci	case DLM_LOCK_PR:
8498c2ecf20Sopenharmony_ci		lockres->l_ro_holders++;
8508c2ecf20Sopenharmony_ci		break;
8518c2ecf20Sopenharmony_ci	default:
8528c2ecf20Sopenharmony_ci		BUG();
8538c2ecf20Sopenharmony_ci	}
8548c2ecf20Sopenharmony_ci}
8558c2ecf20Sopenharmony_ci
8568c2ecf20Sopenharmony_cistatic inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
8578c2ecf20Sopenharmony_ci				     int level)
8588c2ecf20Sopenharmony_ci{
8598c2ecf20Sopenharmony_ci	BUG_ON(!lockres);
8608c2ecf20Sopenharmony_ci
8618c2ecf20Sopenharmony_ci	switch(level) {
8628c2ecf20Sopenharmony_ci	case DLM_LOCK_EX:
8638c2ecf20Sopenharmony_ci		BUG_ON(!lockres->l_ex_holders);
8648c2ecf20Sopenharmony_ci		lockres->l_ex_holders--;
8658c2ecf20Sopenharmony_ci		break;
8668c2ecf20Sopenharmony_ci	case DLM_LOCK_PR:
8678c2ecf20Sopenharmony_ci		BUG_ON(!lockres->l_ro_holders);
8688c2ecf20Sopenharmony_ci		lockres->l_ro_holders--;
8698c2ecf20Sopenharmony_ci		break;
8708c2ecf20Sopenharmony_ci	default:
8718c2ecf20Sopenharmony_ci		BUG();
8728c2ecf20Sopenharmony_ci	}
8738c2ecf20Sopenharmony_ci}
8748c2ecf20Sopenharmony_ci
8758c2ecf20Sopenharmony_ci/* WARNING: This function lives in a world where the only three lock
8768c2ecf20Sopenharmony_ci * levels are EX, PR, and NL. It *will* have to be adjusted when more
8778c2ecf20Sopenharmony_ci * lock types are added. */
8788c2ecf20Sopenharmony_cistatic inline int ocfs2_highest_compat_lock_level(int level)
8798c2ecf20Sopenharmony_ci{
8808c2ecf20Sopenharmony_ci	int new_level = DLM_LOCK_EX;
8818c2ecf20Sopenharmony_ci
8828c2ecf20Sopenharmony_ci	if (level == DLM_LOCK_EX)
8838c2ecf20Sopenharmony_ci		new_level = DLM_LOCK_NL;
8848c2ecf20Sopenharmony_ci	else if (level == DLM_LOCK_PR)
8858c2ecf20Sopenharmony_ci		new_level = DLM_LOCK_PR;
8868c2ecf20Sopenharmony_ci	return new_level;
8878c2ecf20Sopenharmony_ci}
8888c2ecf20Sopenharmony_ci
8898c2ecf20Sopenharmony_cistatic void lockres_set_flags(struct ocfs2_lock_res *lockres,
8908c2ecf20Sopenharmony_ci			      unsigned long newflags)
8918c2ecf20Sopenharmony_ci{
8928c2ecf20Sopenharmony_ci	struct ocfs2_mask_waiter *mw, *tmp;
8938c2ecf20Sopenharmony_ci
8948c2ecf20Sopenharmony_ci 	assert_spin_locked(&lockres->l_lock);
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci	lockres->l_flags = newflags;
8978c2ecf20Sopenharmony_ci
8988c2ecf20Sopenharmony_ci	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
8998c2ecf20Sopenharmony_ci		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
9008c2ecf20Sopenharmony_ci			continue;
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci		list_del_init(&mw->mw_item);
9038c2ecf20Sopenharmony_ci		mw->mw_status = 0;
9048c2ecf20Sopenharmony_ci		complete(&mw->mw_complete);
9058c2ecf20Sopenharmony_ci		ocfs2_track_lock_wait(lockres);
9068c2ecf20Sopenharmony_ci	}
9078c2ecf20Sopenharmony_ci}
9088c2ecf20Sopenharmony_cistatic void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
9098c2ecf20Sopenharmony_ci{
9108c2ecf20Sopenharmony_ci	lockres_set_flags(lockres, lockres->l_flags | or);
9118c2ecf20Sopenharmony_ci}
9128c2ecf20Sopenharmony_cistatic void lockres_clear_flags(struct ocfs2_lock_res *lockres,
9138c2ecf20Sopenharmony_ci				unsigned long clear)
9148c2ecf20Sopenharmony_ci{
9158c2ecf20Sopenharmony_ci	lockres_set_flags(lockres, lockres->l_flags & ~clear);
9168c2ecf20Sopenharmony_ci}
9178c2ecf20Sopenharmony_ci
9188c2ecf20Sopenharmony_cistatic inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
9198c2ecf20Sopenharmony_ci{
9208c2ecf20Sopenharmony_ci	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
9218c2ecf20Sopenharmony_ci	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
9228c2ecf20Sopenharmony_ci	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
9238c2ecf20Sopenharmony_ci	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
9248c2ecf20Sopenharmony_ci
9258c2ecf20Sopenharmony_ci	lockres->l_level = lockres->l_requested;
9268c2ecf20Sopenharmony_ci	if (lockres->l_level <=
9278c2ecf20Sopenharmony_ci	    ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
9288c2ecf20Sopenharmony_ci		lockres->l_blocking = DLM_LOCK_NL;
9298c2ecf20Sopenharmony_ci		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
9308c2ecf20Sopenharmony_ci	}
9318c2ecf20Sopenharmony_ci	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
9328c2ecf20Sopenharmony_ci}
9338c2ecf20Sopenharmony_ci
9348c2ecf20Sopenharmony_cistatic inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
9358c2ecf20Sopenharmony_ci{
9368c2ecf20Sopenharmony_ci	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
9378c2ecf20Sopenharmony_ci	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ci	/* Convert from RO to EX doesn't really need anything as our
9408c2ecf20Sopenharmony_ci	 * information is already up to data. Convert from NL to
9418c2ecf20Sopenharmony_ci	 * *anything* however should mark ourselves as needing an
9428c2ecf20Sopenharmony_ci	 * update */
9438c2ecf20Sopenharmony_ci	if (lockres->l_level == DLM_LOCK_NL &&
9448c2ecf20Sopenharmony_ci	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
9458c2ecf20Sopenharmony_ci		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
9468c2ecf20Sopenharmony_ci
9478c2ecf20Sopenharmony_ci	lockres->l_level = lockres->l_requested;
9488c2ecf20Sopenharmony_ci
9498c2ecf20Sopenharmony_ci	/*
9508c2ecf20Sopenharmony_ci	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
9518c2ecf20Sopenharmony_ci	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
9528c2ecf20Sopenharmony_ci	 * downconverting the lock before the upconvert has fully completed.
9538c2ecf20Sopenharmony_ci	 * Do not prevent the dc thread from downconverting if NONBLOCK lock
9548c2ecf20Sopenharmony_ci	 * had already returned.
9558c2ecf20Sopenharmony_ci	 */
9568c2ecf20Sopenharmony_ci	if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED))
9578c2ecf20Sopenharmony_ci		lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
9588c2ecf20Sopenharmony_ci	else
9598c2ecf20Sopenharmony_ci		lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED);
9608c2ecf20Sopenharmony_ci
9618c2ecf20Sopenharmony_ci	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
9628c2ecf20Sopenharmony_ci}
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_cistatic inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
9658c2ecf20Sopenharmony_ci{
9668c2ecf20Sopenharmony_ci	BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
9678c2ecf20Sopenharmony_ci	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
9688c2ecf20Sopenharmony_ci
9698c2ecf20Sopenharmony_ci	if (lockres->l_requested > DLM_LOCK_NL &&
9708c2ecf20Sopenharmony_ci	    !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
9718c2ecf20Sopenharmony_ci	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
9728c2ecf20Sopenharmony_ci		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
9738c2ecf20Sopenharmony_ci
9748c2ecf20Sopenharmony_ci	lockres->l_level = lockres->l_requested;
9758c2ecf20Sopenharmony_ci	lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
9768c2ecf20Sopenharmony_ci	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
9778c2ecf20Sopenharmony_ci}
9788c2ecf20Sopenharmony_ci
9798c2ecf20Sopenharmony_cistatic int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
9808c2ecf20Sopenharmony_ci				     int level)
9818c2ecf20Sopenharmony_ci{
9828c2ecf20Sopenharmony_ci	int needs_downconvert = 0;
9838c2ecf20Sopenharmony_ci
9848c2ecf20Sopenharmony_ci	assert_spin_locked(&lockres->l_lock);
9858c2ecf20Sopenharmony_ci
9868c2ecf20Sopenharmony_ci	if (level > lockres->l_blocking) {
9878c2ecf20Sopenharmony_ci		/* only schedule a downconvert if we haven't already scheduled
9888c2ecf20Sopenharmony_ci		 * one that goes low enough to satisfy the level we're
9898c2ecf20Sopenharmony_ci		 * blocking.  this also catches the case where we get
9908c2ecf20Sopenharmony_ci		 * duplicate BASTs */
9918c2ecf20Sopenharmony_ci		if (ocfs2_highest_compat_lock_level(level) <
9928c2ecf20Sopenharmony_ci		    ocfs2_highest_compat_lock_level(lockres->l_blocking))
9938c2ecf20Sopenharmony_ci			needs_downconvert = 1;
9948c2ecf20Sopenharmony_ci
9958c2ecf20Sopenharmony_ci		lockres->l_blocking = level;
9968c2ecf20Sopenharmony_ci	}
9978c2ecf20Sopenharmony_ci
9988c2ecf20Sopenharmony_ci	mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n",
9998c2ecf20Sopenharmony_ci	     lockres->l_name, level, lockres->l_level, lockres->l_blocking,
10008c2ecf20Sopenharmony_ci	     needs_downconvert);
10018c2ecf20Sopenharmony_ci
10028c2ecf20Sopenharmony_ci	if (needs_downconvert)
10038c2ecf20Sopenharmony_ci		lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
10048c2ecf20Sopenharmony_ci	mlog(0, "needs_downconvert = %d\n", needs_downconvert);
10058c2ecf20Sopenharmony_ci	return needs_downconvert;
10068c2ecf20Sopenharmony_ci}
10078c2ecf20Sopenharmony_ci
10088c2ecf20Sopenharmony_ci/*
10098c2ecf20Sopenharmony_ci * OCFS2_LOCK_PENDING and l_pending_gen.
10108c2ecf20Sopenharmony_ci *
10118c2ecf20Sopenharmony_ci * Why does OCFS2_LOCK_PENDING exist?  To close a race between setting
10128c2ecf20Sopenharmony_ci * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock().  See ocfs2_unblock_lock()
10138c2ecf20Sopenharmony_ci * for more details on the race.
10148c2ecf20Sopenharmony_ci *
10158c2ecf20Sopenharmony_ci * OCFS2_LOCK_PENDING closes the race quite nicely.  However, it introduces
10168c2ecf20Sopenharmony_ci * a race on itself.  In o2dlm, we can get the ast before ocfs2_dlm_lock()
10178c2ecf20Sopenharmony_ci * returns.  The ast clears OCFS2_LOCK_BUSY, and must therefore clear
10188c2ecf20Sopenharmony_ci * OCFS2_LOCK_PENDING at the same time.  When ocfs2_dlm_lock() returns,
10198c2ecf20Sopenharmony_ci * the caller is going to try to clear PENDING again.  If nothing else is
10208c2ecf20Sopenharmony_ci * happening, __lockres_clear_pending() sees PENDING is unset and does
10218c2ecf20Sopenharmony_ci * nothing.
10228c2ecf20Sopenharmony_ci *
10238c2ecf20Sopenharmony_ci * But what if another path (eg downconvert thread) has just started a
10248c2ecf20Sopenharmony_ci * new locking action?  The other path has re-set PENDING.  Our path
10258c2ecf20Sopenharmony_ci * cannot clear PENDING, because that will re-open the original race
10268c2ecf20Sopenharmony_ci * window.
10278c2ecf20Sopenharmony_ci *
10288c2ecf20Sopenharmony_ci * [Example]
10298c2ecf20Sopenharmony_ci *
10308c2ecf20Sopenharmony_ci * ocfs2_meta_lock()
10318c2ecf20Sopenharmony_ci *  ocfs2_cluster_lock()
10328c2ecf20Sopenharmony_ci *   set BUSY
10338c2ecf20Sopenharmony_ci *   set PENDING
10348c2ecf20Sopenharmony_ci *   drop l_lock
10358c2ecf20Sopenharmony_ci *   ocfs2_dlm_lock()
10368c2ecf20Sopenharmony_ci *    ocfs2_locking_ast()		ocfs2_downconvert_thread()
10378c2ecf20Sopenharmony_ci *     clear PENDING			 ocfs2_unblock_lock()
10388c2ecf20Sopenharmony_ci *					  take_l_lock
10398c2ecf20Sopenharmony_ci *					  !BUSY
10408c2ecf20Sopenharmony_ci *					  ocfs2_prepare_downconvert()
10418c2ecf20Sopenharmony_ci *					   set BUSY
10428c2ecf20Sopenharmony_ci *					   set PENDING
10438c2ecf20Sopenharmony_ci *					  drop l_lock
10448c2ecf20Sopenharmony_ci *   take l_lock
10458c2ecf20Sopenharmony_ci *   clear PENDING
10468c2ecf20Sopenharmony_ci *   drop l_lock
10478c2ecf20Sopenharmony_ci *			<window>
10488c2ecf20Sopenharmony_ci *					  ocfs2_dlm_lock()
10498c2ecf20Sopenharmony_ci *
10508c2ecf20Sopenharmony_ci * So as you can see, we now have a window where l_lock is not held,
10518c2ecf20Sopenharmony_ci * PENDING is not set, and ocfs2_dlm_lock() has not been called.
10528c2ecf20Sopenharmony_ci *
10538c2ecf20Sopenharmony_ci * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
10548c2ecf20Sopenharmony_ci * set by ocfs2_prepare_downconvert().  That wasn't nice.
10558c2ecf20Sopenharmony_ci *
10568c2ecf20Sopenharmony_ci * To solve this we introduce l_pending_gen.  A call to
10578c2ecf20Sopenharmony_ci * lockres_clear_pending() will only do so when it is passed a generation
10588c2ecf20Sopenharmony_ci * number that matches the lockres.  lockres_set_pending() will return the
10598c2ecf20Sopenharmony_ci * current generation number.  When ocfs2_cluster_lock() goes to clear
10608c2ecf20Sopenharmony_ci * PENDING, it passes the generation it got from set_pending().  In our
10618c2ecf20Sopenharmony_ci * example above, the generation numbers will *not* match.  Thus,
10628c2ecf20Sopenharmony_ci * ocfs2_cluster_lock() will not clear the PENDING set by
10638c2ecf20Sopenharmony_ci * ocfs2_prepare_downconvert().
10648c2ecf20Sopenharmony_ci */
10658c2ecf20Sopenharmony_ci
10668c2ecf20Sopenharmony_ci/* Unlocked version for ocfs2_locking_ast() */
10678c2ecf20Sopenharmony_cistatic void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
10688c2ecf20Sopenharmony_ci				    unsigned int generation,
10698c2ecf20Sopenharmony_ci				    struct ocfs2_super *osb)
10708c2ecf20Sopenharmony_ci{
10718c2ecf20Sopenharmony_ci	assert_spin_locked(&lockres->l_lock);
10728c2ecf20Sopenharmony_ci
10738c2ecf20Sopenharmony_ci	/*
10748c2ecf20Sopenharmony_ci	 * The ast and locking functions can race us here.  The winner
10758c2ecf20Sopenharmony_ci	 * will clear pending, the loser will not.
10768c2ecf20Sopenharmony_ci	 */
10778c2ecf20Sopenharmony_ci	if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
10788c2ecf20Sopenharmony_ci	    (lockres->l_pending_gen != generation))
10798c2ecf20Sopenharmony_ci		return;
10808c2ecf20Sopenharmony_ci
10818c2ecf20Sopenharmony_ci	lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
10828c2ecf20Sopenharmony_ci	lockres->l_pending_gen++;
10838c2ecf20Sopenharmony_ci
10848c2ecf20Sopenharmony_ci	/*
10858c2ecf20Sopenharmony_ci	 * The downconvert thread may have skipped us because we
10868c2ecf20Sopenharmony_ci	 * were PENDING.  Wake it up.
10878c2ecf20Sopenharmony_ci	 */
10888c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
10898c2ecf20Sopenharmony_ci		ocfs2_wake_downconvert_thread(osb);
10908c2ecf20Sopenharmony_ci}
10918c2ecf20Sopenharmony_ci
10928c2ecf20Sopenharmony_ci/* Locked version for callers of ocfs2_dlm_lock() */
10938c2ecf20Sopenharmony_cistatic void lockres_clear_pending(struct ocfs2_lock_res *lockres,
10948c2ecf20Sopenharmony_ci				  unsigned int generation,
10958c2ecf20Sopenharmony_ci				  struct ocfs2_super *osb)
10968c2ecf20Sopenharmony_ci{
10978c2ecf20Sopenharmony_ci	unsigned long flags;
10988c2ecf20Sopenharmony_ci
10998c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
11008c2ecf20Sopenharmony_ci	__lockres_clear_pending(lockres, generation, osb);
11018c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
11028c2ecf20Sopenharmony_ci}
11038c2ecf20Sopenharmony_ci
11048c2ecf20Sopenharmony_cistatic unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
11058c2ecf20Sopenharmony_ci{
11068c2ecf20Sopenharmony_ci	assert_spin_locked(&lockres->l_lock);
11078c2ecf20Sopenharmony_ci	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
11088c2ecf20Sopenharmony_ci
11098c2ecf20Sopenharmony_ci	lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
11108c2ecf20Sopenharmony_ci
11118c2ecf20Sopenharmony_ci	return lockres->l_pending_gen;
11128c2ecf20Sopenharmony_ci}
11138c2ecf20Sopenharmony_ci
11148c2ecf20Sopenharmony_cistatic void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level)
11158c2ecf20Sopenharmony_ci{
11168c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
11178c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
11188c2ecf20Sopenharmony_ci	int needs_downconvert;
11198c2ecf20Sopenharmony_ci	unsigned long flags;
11208c2ecf20Sopenharmony_ci
11218c2ecf20Sopenharmony_ci	BUG_ON(level <= DLM_LOCK_NL);
11228c2ecf20Sopenharmony_ci
11238c2ecf20Sopenharmony_ci	mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, "
11248c2ecf20Sopenharmony_ci	     "type %s\n", lockres->l_name, level, lockres->l_level,
11258c2ecf20Sopenharmony_ci	     ocfs2_lock_type_string(lockres->l_type));
11268c2ecf20Sopenharmony_ci
11278c2ecf20Sopenharmony_ci	/*
11288c2ecf20Sopenharmony_ci	 * We can skip the bast for locks which don't enable caching -
11298c2ecf20Sopenharmony_ci	 * they'll be dropped at the earliest possible time anyway.
11308c2ecf20Sopenharmony_ci	 */
11318c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
11328c2ecf20Sopenharmony_ci		return;
11338c2ecf20Sopenharmony_ci
11348c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
11358c2ecf20Sopenharmony_ci	needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
11368c2ecf20Sopenharmony_ci	if (needs_downconvert)
11378c2ecf20Sopenharmony_ci		ocfs2_schedule_blocked_lock(osb, lockres);
11388c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
11398c2ecf20Sopenharmony_ci
11408c2ecf20Sopenharmony_ci	wake_up(&lockres->l_event);
11418c2ecf20Sopenharmony_ci
11428c2ecf20Sopenharmony_ci	ocfs2_wake_downconvert_thread(osb);
11438c2ecf20Sopenharmony_ci}
11448c2ecf20Sopenharmony_ci
11458c2ecf20Sopenharmony_cistatic void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
11468c2ecf20Sopenharmony_ci{
11478c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
11488c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
11498c2ecf20Sopenharmony_ci	unsigned long flags;
11508c2ecf20Sopenharmony_ci	int status;
11518c2ecf20Sopenharmony_ci
11528c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
11538c2ecf20Sopenharmony_ci
11548c2ecf20Sopenharmony_ci	status = ocfs2_dlm_lock_status(&lockres->l_lksb);
11558c2ecf20Sopenharmony_ci
11568c2ecf20Sopenharmony_ci	if (status == -EAGAIN) {
11578c2ecf20Sopenharmony_ci		lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
11588c2ecf20Sopenharmony_ci		goto out;
11598c2ecf20Sopenharmony_ci	}
11608c2ecf20Sopenharmony_ci
11618c2ecf20Sopenharmony_ci	if (status) {
11628c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
11638c2ecf20Sopenharmony_ci		     lockres->l_name, status);
11648c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
11658c2ecf20Sopenharmony_ci		return;
11668c2ecf20Sopenharmony_ci	}
11678c2ecf20Sopenharmony_ci
11688c2ecf20Sopenharmony_ci	mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, "
11698c2ecf20Sopenharmony_ci	     "level %d => %d\n", lockres->l_name, lockres->l_action,
11708c2ecf20Sopenharmony_ci	     lockres->l_unlock_action, lockres->l_level, lockres->l_requested);
11718c2ecf20Sopenharmony_ci
11728c2ecf20Sopenharmony_ci	switch(lockres->l_action) {
11738c2ecf20Sopenharmony_ci	case OCFS2_AST_ATTACH:
11748c2ecf20Sopenharmony_ci		ocfs2_generic_handle_attach_action(lockres);
11758c2ecf20Sopenharmony_ci		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
11768c2ecf20Sopenharmony_ci		break;
11778c2ecf20Sopenharmony_ci	case OCFS2_AST_CONVERT:
11788c2ecf20Sopenharmony_ci		ocfs2_generic_handle_convert_action(lockres);
11798c2ecf20Sopenharmony_ci		break;
11808c2ecf20Sopenharmony_ci	case OCFS2_AST_DOWNCONVERT:
11818c2ecf20Sopenharmony_ci		ocfs2_generic_handle_downconvert_action(lockres);
11828c2ecf20Sopenharmony_ci		break;
11838c2ecf20Sopenharmony_ci	default:
11848c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, "
11858c2ecf20Sopenharmony_ci		     "flags 0x%lx, unlock: %u\n",
11868c2ecf20Sopenharmony_ci		     lockres->l_name, lockres->l_action, lockres->l_flags,
11878c2ecf20Sopenharmony_ci		     lockres->l_unlock_action);
11888c2ecf20Sopenharmony_ci		BUG();
11898c2ecf20Sopenharmony_ci	}
11908c2ecf20Sopenharmony_ciout:
11918c2ecf20Sopenharmony_ci	/* set it to something invalid so if we get called again we
11928c2ecf20Sopenharmony_ci	 * can catch it. */
11938c2ecf20Sopenharmony_ci	lockres->l_action = OCFS2_AST_INVALID;
11948c2ecf20Sopenharmony_ci
11958c2ecf20Sopenharmony_ci	/* Did we try to cancel this lock?  Clear that state */
11968c2ecf20Sopenharmony_ci	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
11978c2ecf20Sopenharmony_ci		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
11988c2ecf20Sopenharmony_ci
11998c2ecf20Sopenharmony_ci	/*
12008c2ecf20Sopenharmony_ci	 * We may have beaten the locking functions here.  We certainly
12018c2ecf20Sopenharmony_ci	 * know that dlm_lock() has been called :-)
12028c2ecf20Sopenharmony_ci	 * Because we can't have two lock calls in flight at once, we
12038c2ecf20Sopenharmony_ci	 * can use lockres->l_pending_gen.
12048c2ecf20Sopenharmony_ci	 */
12058c2ecf20Sopenharmony_ci	__lockres_clear_pending(lockres, lockres->l_pending_gen,  osb);
12068c2ecf20Sopenharmony_ci
12078c2ecf20Sopenharmony_ci	wake_up(&lockres->l_event);
12088c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
12098c2ecf20Sopenharmony_ci}
12108c2ecf20Sopenharmony_ci
12118c2ecf20Sopenharmony_cistatic void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
12128c2ecf20Sopenharmony_ci{
12138c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
12148c2ecf20Sopenharmony_ci	unsigned long flags;
12158c2ecf20Sopenharmony_ci
12168c2ecf20Sopenharmony_ci	mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
12178c2ecf20Sopenharmony_ci	     lockres->l_name, lockres->l_unlock_action);
12188c2ecf20Sopenharmony_ci
12198c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
12208c2ecf20Sopenharmony_ci	if (error) {
12218c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
12228c2ecf20Sopenharmony_ci		     "unlock_action %d\n", error, lockres->l_name,
12238c2ecf20Sopenharmony_ci		     lockres->l_unlock_action);
12248c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
12258c2ecf20Sopenharmony_ci		return;
12268c2ecf20Sopenharmony_ci	}
12278c2ecf20Sopenharmony_ci
12288c2ecf20Sopenharmony_ci	switch(lockres->l_unlock_action) {
12298c2ecf20Sopenharmony_ci	case OCFS2_UNLOCK_CANCEL_CONVERT:
12308c2ecf20Sopenharmony_ci		mlog(0, "Cancel convert success for %s\n", lockres->l_name);
12318c2ecf20Sopenharmony_ci		lockres->l_action = OCFS2_AST_INVALID;
12328c2ecf20Sopenharmony_ci		/* Downconvert thread may have requeued this lock, we
12338c2ecf20Sopenharmony_ci		 * need to wake it. */
12348c2ecf20Sopenharmony_ci		if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
12358c2ecf20Sopenharmony_ci			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
12368c2ecf20Sopenharmony_ci		break;
12378c2ecf20Sopenharmony_ci	case OCFS2_UNLOCK_DROP_LOCK:
12388c2ecf20Sopenharmony_ci		lockres->l_level = DLM_LOCK_IV;
12398c2ecf20Sopenharmony_ci		break;
12408c2ecf20Sopenharmony_ci	default:
12418c2ecf20Sopenharmony_ci		BUG();
12428c2ecf20Sopenharmony_ci	}
12438c2ecf20Sopenharmony_ci
12448c2ecf20Sopenharmony_ci	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
12458c2ecf20Sopenharmony_ci	lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
12468c2ecf20Sopenharmony_ci	wake_up(&lockres->l_event);
12478c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
12488c2ecf20Sopenharmony_ci}
12498c2ecf20Sopenharmony_ci
12508c2ecf20Sopenharmony_ci/*
12518c2ecf20Sopenharmony_ci * This is the filesystem locking protocol.  It provides the lock handling
12528c2ecf20Sopenharmony_ci * hooks for the underlying DLM.  It has a maximum version number.
12538c2ecf20Sopenharmony_ci * The version number allows interoperability with systems running at
12548c2ecf20Sopenharmony_ci * the same major number and an equal or smaller minor number.
12558c2ecf20Sopenharmony_ci *
12568c2ecf20Sopenharmony_ci * Whenever the filesystem does new things with locks (adds or removes a
12578c2ecf20Sopenharmony_ci * lock, orders them differently, does different things underneath a lock),
12588c2ecf20Sopenharmony_ci * the version must be changed.  The protocol is negotiated when joining
12598c2ecf20Sopenharmony_ci * the dlm domain.  A node may join the domain if its major version is
12608c2ecf20Sopenharmony_ci * identical to all other nodes and its minor version is greater than
12618c2ecf20Sopenharmony_ci * or equal to all other nodes.  When its minor version is greater than
12628c2ecf20Sopenharmony_ci * the other nodes, it will run at the minor version specified by the
12638c2ecf20Sopenharmony_ci * other nodes.
12648c2ecf20Sopenharmony_ci *
12658c2ecf20Sopenharmony_ci * If a locking change is made that will not be compatible with older
12668c2ecf20Sopenharmony_ci * versions, the major number must be increased and the minor version set
12678c2ecf20Sopenharmony_ci * to zero.  If a change merely adds a behavior that can be disabled when
12688c2ecf20Sopenharmony_ci * speaking to older versions, the minor version must be increased.  If a
12698c2ecf20Sopenharmony_ci * change adds a fully backwards compatible change (eg, LVB changes that
12708c2ecf20Sopenharmony_ci * are just ignored by older versions), the version does not need to be
12718c2ecf20Sopenharmony_ci * updated.
12728c2ecf20Sopenharmony_ci */
12738c2ecf20Sopenharmony_cistatic struct ocfs2_locking_protocol lproto = {
12748c2ecf20Sopenharmony_ci	.lp_max_version = {
12758c2ecf20Sopenharmony_ci		.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
12768c2ecf20Sopenharmony_ci		.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
12778c2ecf20Sopenharmony_ci	},
12788c2ecf20Sopenharmony_ci	.lp_lock_ast		= ocfs2_locking_ast,
12798c2ecf20Sopenharmony_ci	.lp_blocking_ast	= ocfs2_blocking_ast,
12808c2ecf20Sopenharmony_ci	.lp_unlock_ast		= ocfs2_unlock_ast,
12818c2ecf20Sopenharmony_ci};
12828c2ecf20Sopenharmony_ci
12838c2ecf20Sopenharmony_civoid ocfs2_set_locking_protocol(void)
12848c2ecf20Sopenharmony_ci{
12858c2ecf20Sopenharmony_ci	ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version);
12868c2ecf20Sopenharmony_ci}
12878c2ecf20Sopenharmony_ci
12888c2ecf20Sopenharmony_cistatic inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
12898c2ecf20Sopenharmony_ci						int convert)
12908c2ecf20Sopenharmony_ci{
12918c2ecf20Sopenharmony_ci	unsigned long flags;
12928c2ecf20Sopenharmony_ci
12938c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
12948c2ecf20Sopenharmony_ci	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
12958c2ecf20Sopenharmony_ci	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
12968c2ecf20Sopenharmony_ci	if (convert)
12978c2ecf20Sopenharmony_ci		lockres->l_action = OCFS2_AST_INVALID;
12988c2ecf20Sopenharmony_ci	else
12998c2ecf20Sopenharmony_ci		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
13008c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
13018c2ecf20Sopenharmony_ci
13028c2ecf20Sopenharmony_ci	wake_up(&lockres->l_event);
13038c2ecf20Sopenharmony_ci}
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_ci/* Note: If we detect another process working on the lock (i.e.,
13068c2ecf20Sopenharmony_ci * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
13078c2ecf20Sopenharmony_ci * to do the right thing in that case.
13088c2ecf20Sopenharmony_ci */
13098c2ecf20Sopenharmony_cistatic int ocfs2_lock_create(struct ocfs2_super *osb,
13108c2ecf20Sopenharmony_ci			     struct ocfs2_lock_res *lockres,
13118c2ecf20Sopenharmony_ci			     int level,
13128c2ecf20Sopenharmony_ci			     u32 dlm_flags)
13138c2ecf20Sopenharmony_ci{
13148c2ecf20Sopenharmony_ci	int ret = 0;
13158c2ecf20Sopenharmony_ci	unsigned long flags;
13168c2ecf20Sopenharmony_ci	unsigned int gen;
13178c2ecf20Sopenharmony_ci
13188c2ecf20Sopenharmony_ci	mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
13198c2ecf20Sopenharmony_ci	     dlm_flags);
13208c2ecf20Sopenharmony_ci
13218c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
13228c2ecf20Sopenharmony_ci	if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
13238c2ecf20Sopenharmony_ci	    (lockres->l_flags & OCFS2_LOCK_BUSY)) {
13248c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
13258c2ecf20Sopenharmony_ci		goto bail;
13268c2ecf20Sopenharmony_ci	}
13278c2ecf20Sopenharmony_ci
13288c2ecf20Sopenharmony_ci	lockres->l_action = OCFS2_AST_ATTACH;
13298c2ecf20Sopenharmony_ci	lockres->l_requested = level;
13308c2ecf20Sopenharmony_ci	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
13318c2ecf20Sopenharmony_ci	gen = lockres_set_pending(lockres);
13328c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
13338c2ecf20Sopenharmony_ci
13348c2ecf20Sopenharmony_ci	ret = ocfs2_dlm_lock(osb->cconn,
13358c2ecf20Sopenharmony_ci			     level,
13368c2ecf20Sopenharmony_ci			     &lockres->l_lksb,
13378c2ecf20Sopenharmony_ci			     dlm_flags,
13388c2ecf20Sopenharmony_ci			     lockres->l_name,
13398c2ecf20Sopenharmony_ci			     OCFS2_LOCK_ID_MAX_LEN - 1);
13408c2ecf20Sopenharmony_ci	lockres_clear_pending(lockres, gen, osb);
13418c2ecf20Sopenharmony_ci	if (ret) {
13428c2ecf20Sopenharmony_ci		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
13438c2ecf20Sopenharmony_ci		ocfs2_recover_from_dlm_error(lockres, 1);
13448c2ecf20Sopenharmony_ci	}
13458c2ecf20Sopenharmony_ci
13468c2ecf20Sopenharmony_ci	mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
13478c2ecf20Sopenharmony_ci
13488c2ecf20Sopenharmony_cibail:
13498c2ecf20Sopenharmony_ci	return ret;
13508c2ecf20Sopenharmony_ci}
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_cistatic inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
13538c2ecf20Sopenharmony_ci					int flag)
13548c2ecf20Sopenharmony_ci{
13558c2ecf20Sopenharmony_ci	unsigned long flags;
13568c2ecf20Sopenharmony_ci	int ret;
13578c2ecf20Sopenharmony_ci
13588c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
13598c2ecf20Sopenharmony_ci	ret = lockres->l_flags & flag;
13608c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
13618c2ecf20Sopenharmony_ci
13628c2ecf20Sopenharmony_ci	return ret;
13638c2ecf20Sopenharmony_ci}
13648c2ecf20Sopenharmony_ci
13658c2ecf20Sopenharmony_cistatic inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
13668c2ecf20Sopenharmony_ci
13678c2ecf20Sopenharmony_ci{
13688c2ecf20Sopenharmony_ci	wait_event(lockres->l_event,
13698c2ecf20Sopenharmony_ci		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
13708c2ecf20Sopenharmony_ci}
13718c2ecf20Sopenharmony_ci
13728c2ecf20Sopenharmony_cistatic inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
13738c2ecf20Sopenharmony_ci
13748c2ecf20Sopenharmony_ci{
13758c2ecf20Sopenharmony_ci	wait_event(lockres->l_event,
13768c2ecf20Sopenharmony_ci		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
13778c2ecf20Sopenharmony_ci}
13788c2ecf20Sopenharmony_ci
13798c2ecf20Sopenharmony_ci/* predict what lock level we'll be dropping down to on behalf
13808c2ecf20Sopenharmony_ci * of another node, and return true if the currently wanted
13818c2ecf20Sopenharmony_ci * level will be compatible with it. */
13828c2ecf20Sopenharmony_cistatic inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
13838c2ecf20Sopenharmony_ci						     int wanted)
13848c2ecf20Sopenharmony_ci{
13858c2ecf20Sopenharmony_ci	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
13868c2ecf20Sopenharmony_ci
13878c2ecf20Sopenharmony_ci	return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
13888c2ecf20Sopenharmony_ci}
13898c2ecf20Sopenharmony_ci
13908c2ecf20Sopenharmony_cistatic void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
13918c2ecf20Sopenharmony_ci{
13928c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&mw->mw_item);
13938c2ecf20Sopenharmony_ci	init_completion(&mw->mw_complete);
13948c2ecf20Sopenharmony_ci	ocfs2_init_start_time(mw);
13958c2ecf20Sopenharmony_ci}
13968c2ecf20Sopenharmony_ci
13978c2ecf20Sopenharmony_cistatic int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
13988c2ecf20Sopenharmony_ci{
13998c2ecf20Sopenharmony_ci	wait_for_completion(&mw->mw_complete);
14008c2ecf20Sopenharmony_ci	/* Re-arm the completion in case we want to wait on it again */
14018c2ecf20Sopenharmony_ci	reinit_completion(&mw->mw_complete);
14028c2ecf20Sopenharmony_ci	return mw->mw_status;
14038c2ecf20Sopenharmony_ci}
14048c2ecf20Sopenharmony_ci
14058c2ecf20Sopenharmony_cistatic void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
14068c2ecf20Sopenharmony_ci				    struct ocfs2_mask_waiter *mw,
14078c2ecf20Sopenharmony_ci				    unsigned long mask,
14088c2ecf20Sopenharmony_ci				    unsigned long goal)
14098c2ecf20Sopenharmony_ci{
14108c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&mw->mw_item));
14118c2ecf20Sopenharmony_ci
14128c2ecf20Sopenharmony_ci	assert_spin_locked(&lockres->l_lock);
14138c2ecf20Sopenharmony_ci
14148c2ecf20Sopenharmony_ci	list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
14158c2ecf20Sopenharmony_ci	mw->mw_mask = mask;
14168c2ecf20Sopenharmony_ci	mw->mw_goal = goal;
14178c2ecf20Sopenharmony_ci	ocfs2_track_lock_wait(lockres);
14188c2ecf20Sopenharmony_ci}
14198c2ecf20Sopenharmony_ci
14208c2ecf20Sopenharmony_ci/* returns 0 if the mw that was removed was already satisfied, -EBUSY
14218c2ecf20Sopenharmony_ci * if the mask still hadn't reached its goal */
14228c2ecf20Sopenharmony_cistatic int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
14238c2ecf20Sopenharmony_ci				      struct ocfs2_mask_waiter *mw)
14248c2ecf20Sopenharmony_ci{
14258c2ecf20Sopenharmony_ci	int ret = 0;
14268c2ecf20Sopenharmony_ci
14278c2ecf20Sopenharmony_ci	assert_spin_locked(&lockres->l_lock);
14288c2ecf20Sopenharmony_ci	if (!list_empty(&mw->mw_item)) {
14298c2ecf20Sopenharmony_ci		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
14308c2ecf20Sopenharmony_ci			ret = -EBUSY;
14318c2ecf20Sopenharmony_ci
14328c2ecf20Sopenharmony_ci		list_del_init(&mw->mw_item);
14338c2ecf20Sopenharmony_ci		init_completion(&mw->mw_complete);
14348c2ecf20Sopenharmony_ci		ocfs2_track_lock_wait(lockres);
14358c2ecf20Sopenharmony_ci	}
14368c2ecf20Sopenharmony_ci
14378c2ecf20Sopenharmony_ci	return ret;
14388c2ecf20Sopenharmony_ci}
14398c2ecf20Sopenharmony_ci
14408c2ecf20Sopenharmony_cistatic int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
14418c2ecf20Sopenharmony_ci				      struct ocfs2_mask_waiter *mw)
14428c2ecf20Sopenharmony_ci{
14438c2ecf20Sopenharmony_ci	unsigned long flags;
14448c2ecf20Sopenharmony_ci	int ret = 0;
14458c2ecf20Sopenharmony_ci
14468c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
14478c2ecf20Sopenharmony_ci	ret = __lockres_remove_mask_waiter(lockres, mw);
14488c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
14498c2ecf20Sopenharmony_ci
14508c2ecf20Sopenharmony_ci	return ret;
14518c2ecf20Sopenharmony_ci
14528c2ecf20Sopenharmony_ci}
14538c2ecf20Sopenharmony_ci
14548c2ecf20Sopenharmony_cistatic int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
14558c2ecf20Sopenharmony_ci					     struct ocfs2_lock_res *lockres)
14568c2ecf20Sopenharmony_ci{
14578c2ecf20Sopenharmony_ci	int ret;
14588c2ecf20Sopenharmony_ci
14598c2ecf20Sopenharmony_ci	ret = wait_for_completion_interruptible(&mw->mw_complete);
14608c2ecf20Sopenharmony_ci	if (ret)
14618c2ecf20Sopenharmony_ci		lockres_remove_mask_waiter(lockres, mw);
14628c2ecf20Sopenharmony_ci	else
14638c2ecf20Sopenharmony_ci		ret = mw->mw_status;
14648c2ecf20Sopenharmony_ci	/* Re-arm the completion in case we want to wait on it again */
14658c2ecf20Sopenharmony_ci	reinit_completion(&mw->mw_complete);
14668c2ecf20Sopenharmony_ci	return ret;
14678c2ecf20Sopenharmony_ci}
14688c2ecf20Sopenharmony_ci
14698c2ecf20Sopenharmony_cistatic int __ocfs2_cluster_lock(struct ocfs2_super *osb,
14708c2ecf20Sopenharmony_ci				struct ocfs2_lock_res *lockres,
14718c2ecf20Sopenharmony_ci				int level,
14728c2ecf20Sopenharmony_ci				u32 lkm_flags,
14738c2ecf20Sopenharmony_ci				int arg_flags,
14748c2ecf20Sopenharmony_ci				int l_subclass,
14758c2ecf20Sopenharmony_ci				unsigned long caller_ip)
14768c2ecf20Sopenharmony_ci{
14778c2ecf20Sopenharmony_ci	struct ocfs2_mask_waiter mw;
14788c2ecf20Sopenharmony_ci	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
14798c2ecf20Sopenharmony_ci	int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
14808c2ecf20Sopenharmony_ci	unsigned long flags;
14818c2ecf20Sopenharmony_ci	unsigned int gen;
14828c2ecf20Sopenharmony_ci	int noqueue_attempted = 0;
14838c2ecf20Sopenharmony_ci	int dlm_locked = 0;
14848c2ecf20Sopenharmony_ci	int kick_dc = 0;
14858c2ecf20Sopenharmony_ci
14868c2ecf20Sopenharmony_ci	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
14878c2ecf20Sopenharmony_ci		mlog_errno(-EINVAL);
14888c2ecf20Sopenharmony_ci		return -EINVAL;
14898c2ecf20Sopenharmony_ci	}
14908c2ecf20Sopenharmony_ci
14918c2ecf20Sopenharmony_ci	ocfs2_init_mask_waiter(&mw);
14928c2ecf20Sopenharmony_ci
14938c2ecf20Sopenharmony_ci	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
14948c2ecf20Sopenharmony_ci		lkm_flags |= DLM_LKF_VALBLK;
14958c2ecf20Sopenharmony_ci
14968c2ecf20Sopenharmony_ciagain:
14978c2ecf20Sopenharmony_ci	wait = 0;
14988c2ecf20Sopenharmony_ci
14998c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
15008c2ecf20Sopenharmony_ci
15018c2ecf20Sopenharmony_ci	if (catch_signals && signal_pending(current)) {
15028c2ecf20Sopenharmony_ci		ret = -ERESTARTSYS;
15038c2ecf20Sopenharmony_ci		goto unlock;
15048c2ecf20Sopenharmony_ci	}
15058c2ecf20Sopenharmony_ci
15068c2ecf20Sopenharmony_ci	mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
15078c2ecf20Sopenharmony_ci			"Cluster lock called on freeing lockres %s! flags "
15088c2ecf20Sopenharmony_ci			"0x%lx\n", lockres->l_name, lockres->l_flags);
15098c2ecf20Sopenharmony_ci
15108c2ecf20Sopenharmony_ci	/* We only compare against the currently granted level
15118c2ecf20Sopenharmony_ci	 * here. If the lock is blocked waiting on a downconvert,
15128c2ecf20Sopenharmony_ci	 * we'll get caught below. */
15138c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_BUSY &&
15148c2ecf20Sopenharmony_ci	    level > lockres->l_level) {
15158c2ecf20Sopenharmony_ci		/* is someone sitting in dlm_lock? If so, wait on
15168c2ecf20Sopenharmony_ci		 * them. */
15178c2ecf20Sopenharmony_ci		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
15188c2ecf20Sopenharmony_ci		wait = 1;
15198c2ecf20Sopenharmony_ci		goto unlock;
15208c2ecf20Sopenharmony_ci	}
15218c2ecf20Sopenharmony_ci
15228c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
15238c2ecf20Sopenharmony_ci		/*
15248c2ecf20Sopenharmony_ci		 * We've upconverted. If the lock now has a level we can
15258c2ecf20Sopenharmony_ci		 * work with, we take it. If, however, the lock is not at the
15268c2ecf20Sopenharmony_ci		 * required level, we go thru the full cycle. One way this could
15278c2ecf20Sopenharmony_ci		 * happen is if a process requesting an upconvert to PR is
15288c2ecf20Sopenharmony_ci		 * closely followed by another requesting upconvert to an EX.
15298c2ecf20Sopenharmony_ci		 * If the process requesting EX lands here, we want it to
15308c2ecf20Sopenharmony_ci		 * continue attempting to upconvert and let the process
15318c2ecf20Sopenharmony_ci		 * requesting PR take the lock.
15328c2ecf20Sopenharmony_ci		 * If multiple processes request upconvert to PR, the first one
15338c2ecf20Sopenharmony_ci		 * here will take the lock. The others will have to go thru the
15348c2ecf20Sopenharmony_ci		 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
15358c2ecf20Sopenharmony_ci		 * downconvert request.
15368c2ecf20Sopenharmony_ci		 */
15378c2ecf20Sopenharmony_ci		if (level <= lockres->l_level)
15388c2ecf20Sopenharmony_ci			goto update_holders;
15398c2ecf20Sopenharmony_ci	}
15408c2ecf20Sopenharmony_ci
15418c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
15428c2ecf20Sopenharmony_ci	    !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
15438c2ecf20Sopenharmony_ci		/* is the lock is currently blocked on behalf of
15448c2ecf20Sopenharmony_ci		 * another node */
15458c2ecf20Sopenharmony_ci		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
15468c2ecf20Sopenharmony_ci		wait = 1;
15478c2ecf20Sopenharmony_ci		goto unlock;
15488c2ecf20Sopenharmony_ci	}
15498c2ecf20Sopenharmony_ci
15508c2ecf20Sopenharmony_ci	if (level > lockres->l_level) {
15518c2ecf20Sopenharmony_ci		if (noqueue_attempted > 0) {
15528c2ecf20Sopenharmony_ci			ret = -EAGAIN;
15538c2ecf20Sopenharmony_ci			goto unlock;
15548c2ecf20Sopenharmony_ci		}
15558c2ecf20Sopenharmony_ci		if (lkm_flags & DLM_LKF_NOQUEUE)
15568c2ecf20Sopenharmony_ci			noqueue_attempted = 1;
15578c2ecf20Sopenharmony_ci
15588c2ecf20Sopenharmony_ci		if (lockres->l_action != OCFS2_AST_INVALID)
15598c2ecf20Sopenharmony_ci			mlog(ML_ERROR, "lockres %s has action %u pending\n",
15608c2ecf20Sopenharmony_ci			     lockres->l_name, lockres->l_action);
15618c2ecf20Sopenharmony_ci
15628c2ecf20Sopenharmony_ci		if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
15638c2ecf20Sopenharmony_ci			lockres->l_action = OCFS2_AST_ATTACH;
15648c2ecf20Sopenharmony_ci			lkm_flags &= ~DLM_LKF_CONVERT;
15658c2ecf20Sopenharmony_ci		} else {
15668c2ecf20Sopenharmony_ci			lockres->l_action = OCFS2_AST_CONVERT;
15678c2ecf20Sopenharmony_ci			lkm_flags |= DLM_LKF_CONVERT;
15688c2ecf20Sopenharmony_ci		}
15698c2ecf20Sopenharmony_ci
15708c2ecf20Sopenharmony_ci		lockres->l_requested = level;
15718c2ecf20Sopenharmony_ci		lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
15728c2ecf20Sopenharmony_ci		gen = lockres_set_pending(lockres);
15738c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
15748c2ecf20Sopenharmony_ci
15758c2ecf20Sopenharmony_ci		BUG_ON(level == DLM_LOCK_IV);
15768c2ecf20Sopenharmony_ci		BUG_ON(level == DLM_LOCK_NL);
15778c2ecf20Sopenharmony_ci
15788c2ecf20Sopenharmony_ci		mlog(ML_BASTS, "lockres %s, convert from %d to %d\n",
15798c2ecf20Sopenharmony_ci		     lockres->l_name, lockres->l_level, level);
15808c2ecf20Sopenharmony_ci
15818c2ecf20Sopenharmony_ci		/* call dlm_lock to upgrade lock now */
15828c2ecf20Sopenharmony_ci		ret = ocfs2_dlm_lock(osb->cconn,
15838c2ecf20Sopenharmony_ci				     level,
15848c2ecf20Sopenharmony_ci				     &lockres->l_lksb,
15858c2ecf20Sopenharmony_ci				     lkm_flags,
15868c2ecf20Sopenharmony_ci				     lockres->l_name,
15878c2ecf20Sopenharmony_ci				     OCFS2_LOCK_ID_MAX_LEN - 1);
15888c2ecf20Sopenharmony_ci		lockres_clear_pending(lockres, gen, osb);
15898c2ecf20Sopenharmony_ci		if (ret) {
15908c2ecf20Sopenharmony_ci			if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
15918c2ecf20Sopenharmony_ci			    (ret != -EAGAIN)) {
15928c2ecf20Sopenharmony_ci				ocfs2_log_dlm_error("ocfs2_dlm_lock",
15938c2ecf20Sopenharmony_ci						    ret, lockres);
15948c2ecf20Sopenharmony_ci			}
15958c2ecf20Sopenharmony_ci			ocfs2_recover_from_dlm_error(lockres, 1);
15968c2ecf20Sopenharmony_ci			goto out;
15978c2ecf20Sopenharmony_ci		}
15988c2ecf20Sopenharmony_ci		dlm_locked = 1;
15998c2ecf20Sopenharmony_ci
16008c2ecf20Sopenharmony_ci		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
16018c2ecf20Sopenharmony_ci		     lockres->l_name);
16028c2ecf20Sopenharmony_ci
16038c2ecf20Sopenharmony_ci		/* At this point we've gone inside the dlm and need to
16048c2ecf20Sopenharmony_ci		 * complete our work regardless. */
16058c2ecf20Sopenharmony_ci		catch_signals = 0;
16068c2ecf20Sopenharmony_ci
16078c2ecf20Sopenharmony_ci		/* wait for busy to clear and carry on */
16088c2ecf20Sopenharmony_ci		goto again;
16098c2ecf20Sopenharmony_ci	}
16108c2ecf20Sopenharmony_ci
16118c2ecf20Sopenharmony_ciupdate_holders:
16128c2ecf20Sopenharmony_ci	/* Ok, if we get here then we're good to go. */
16138c2ecf20Sopenharmony_ci	ocfs2_inc_holders(lockres, level);
16148c2ecf20Sopenharmony_ci
16158c2ecf20Sopenharmony_ci	ret = 0;
16168c2ecf20Sopenharmony_ciunlock:
16178c2ecf20Sopenharmony_ci	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
16188c2ecf20Sopenharmony_ci
16198c2ecf20Sopenharmony_ci	/* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
16208c2ecf20Sopenharmony_ci	kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
16218c2ecf20Sopenharmony_ci
16228c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
16238c2ecf20Sopenharmony_ci	if (kick_dc)
16248c2ecf20Sopenharmony_ci		ocfs2_wake_downconvert_thread(osb);
16258c2ecf20Sopenharmony_ciout:
16268c2ecf20Sopenharmony_ci	/*
16278c2ecf20Sopenharmony_ci	 * This is helping work around a lock inversion between the page lock
16288c2ecf20Sopenharmony_ci	 * and dlm locks.  One path holds the page lock while calling aops
16298c2ecf20Sopenharmony_ci	 * which block acquiring dlm locks.  The voting thread holds dlm
16308c2ecf20Sopenharmony_ci	 * locks while acquiring page locks while down converting data locks.
16318c2ecf20Sopenharmony_ci	 * This block is helping an aop path notice the inversion and back
16328c2ecf20Sopenharmony_ci	 * off to unlock its page lock before trying the dlm lock again.
16338c2ecf20Sopenharmony_ci	 */
16348c2ecf20Sopenharmony_ci	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
16358c2ecf20Sopenharmony_ci	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
16368c2ecf20Sopenharmony_ci		wait = 0;
16378c2ecf20Sopenharmony_ci		spin_lock_irqsave(&lockres->l_lock, flags);
16388c2ecf20Sopenharmony_ci		if (__lockres_remove_mask_waiter(lockres, &mw)) {
16398c2ecf20Sopenharmony_ci			if (dlm_locked)
16408c2ecf20Sopenharmony_ci				lockres_or_flags(lockres,
16418c2ecf20Sopenharmony_ci					OCFS2_LOCK_NONBLOCK_FINISHED);
16428c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&lockres->l_lock, flags);
16438c2ecf20Sopenharmony_ci			ret = -EAGAIN;
16448c2ecf20Sopenharmony_ci		} else {
16458c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&lockres->l_lock, flags);
16468c2ecf20Sopenharmony_ci			goto again;
16478c2ecf20Sopenharmony_ci		}
16488c2ecf20Sopenharmony_ci	}
16498c2ecf20Sopenharmony_ci	if (wait) {
16508c2ecf20Sopenharmony_ci		ret = ocfs2_wait_for_mask(&mw);
16518c2ecf20Sopenharmony_ci		if (ret == 0)
16528c2ecf20Sopenharmony_ci			goto again;
16538c2ecf20Sopenharmony_ci		mlog_errno(ret);
16548c2ecf20Sopenharmony_ci	}
16558c2ecf20Sopenharmony_ci	ocfs2_update_lock_stats(lockres, level, &mw, ret);
16568c2ecf20Sopenharmony_ci
16578c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_LOCK_ALLOC
16588c2ecf20Sopenharmony_ci	if (!ret && lockres->l_lockdep_map.key != NULL) {
16598c2ecf20Sopenharmony_ci		if (level == DLM_LOCK_PR)
16608c2ecf20Sopenharmony_ci			rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
16618c2ecf20Sopenharmony_ci				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
16628c2ecf20Sopenharmony_ci				caller_ip);
16638c2ecf20Sopenharmony_ci		else
16648c2ecf20Sopenharmony_ci			rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
16658c2ecf20Sopenharmony_ci				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
16668c2ecf20Sopenharmony_ci				caller_ip);
16678c2ecf20Sopenharmony_ci	}
16688c2ecf20Sopenharmony_ci#endif
16698c2ecf20Sopenharmony_ci	return ret;
16708c2ecf20Sopenharmony_ci}
16718c2ecf20Sopenharmony_ci
16728c2ecf20Sopenharmony_cistatic inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
16738c2ecf20Sopenharmony_ci				     struct ocfs2_lock_res *lockres,
16748c2ecf20Sopenharmony_ci				     int level,
16758c2ecf20Sopenharmony_ci				     u32 lkm_flags,
16768c2ecf20Sopenharmony_ci				     int arg_flags)
16778c2ecf20Sopenharmony_ci{
16788c2ecf20Sopenharmony_ci	return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
16798c2ecf20Sopenharmony_ci				    0, _RET_IP_);
16808c2ecf20Sopenharmony_ci}
16818c2ecf20Sopenharmony_ci
16828c2ecf20Sopenharmony_ci
16838c2ecf20Sopenharmony_cistatic void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
16848c2ecf20Sopenharmony_ci				   struct ocfs2_lock_res *lockres,
16858c2ecf20Sopenharmony_ci				   int level,
16868c2ecf20Sopenharmony_ci				   unsigned long caller_ip)
16878c2ecf20Sopenharmony_ci{
16888c2ecf20Sopenharmony_ci	unsigned long flags;
16898c2ecf20Sopenharmony_ci
16908c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
16918c2ecf20Sopenharmony_ci	ocfs2_dec_holders(lockres, level);
16928c2ecf20Sopenharmony_ci	ocfs2_downconvert_on_unlock(osb, lockres);
16938c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
16948c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_LOCK_ALLOC
16958c2ecf20Sopenharmony_ci	if (lockres->l_lockdep_map.key != NULL)
16968c2ecf20Sopenharmony_ci		rwsem_release(&lockres->l_lockdep_map, caller_ip);
16978c2ecf20Sopenharmony_ci#endif
16988c2ecf20Sopenharmony_ci}
16998c2ecf20Sopenharmony_ci
17008c2ecf20Sopenharmony_cistatic int ocfs2_create_new_lock(struct ocfs2_super *osb,
17018c2ecf20Sopenharmony_ci				 struct ocfs2_lock_res *lockres,
17028c2ecf20Sopenharmony_ci				 int ex,
17038c2ecf20Sopenharmony_ci				 int local)
17048c2ecf20Sopenharmony_ci{
17058c2ecf20Sopenharmony_ci	int level =  ex ? DLM_LOCK_EX : DLM_LOCK_PR;
17068c2ecf20Sopenharmony_ci	unsigned long flags;
17078c2ecf20Sopenharmony_ci	u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
17088c2ecf20Sopenharmony_ci
17098c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
17108c2ecf20Sopenharmony_ci	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
17118c2ecf20Sopenharmony_ci	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
17128c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
17138c2ecf20Sopenharmony_ci
17148c2ecf20Sopenharmony_ci	return ocfs2_lock_create(osb, lockres, level, lkm_flags);
17158c2ecf20Sopenharmony_ci}
17168c2ecf20Sopenharmony_ci
17178c2ecf20Sopenharmony_ci/* Grants us an EX lock on the data and metadata resources, skipping
17188c2ecf20Sopenharmony_ci * the normal cluster directory lookup. Use this ONLY on newly created
17198c2ecf20Sopenharmony_ci * inodes which other nodes can't possibly see, and which haven't been
17208c2ecf20Sopenharmony_ci * hashed in the inode hash yet. This can give us a good performance
17218c2ecf20Sopenharmony_ci * increase as it'll skip the network broadcast normally associated
17228c2ecf20Sopenharmony_ci * with creating a new lock resource. */
17238c2ecf20Sopenharmony_ciint ocfs2_create_new_inode_locks(struct inode *inode)
17248c2ecf20Sopenharmony_ci{
17258c2ecf20Sopenharmony_ci	int ret;
17268c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
17278c2ecf20Sopenharmony_ci
17288c2ecf20Sopenharmony_ci	BUG_ON(!ocfs2_inode_is_new(inode));
17298c2ecf20Sopenharmony_ci
17308c2ecf20Sopenharmony_ci	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
17318c2ecf20Sopenharmony_ci
17328c2ecf20Sopenharmony_ci	/* NOTE: That we don't increment any of the holder counts, nor
17338c2ecf20Sopenharmony_ci	 * do we add anything to a journal handle. Since this is
17348c2ecf20Sopenharmony_ci	 * supposed to be a new inode which the cluster doesn't know
17358c2ecf20Sopenharmony_ci	 * about yet, there is no need to.  As far as the LVB handling
17368c2ecf20Sopenharmony_ci	 * is concerned, this is basically like acquiring an EX lock
17378c2ecf20Sopenharmony_ci	 * on a resource which has an invalid one -- we'll set it
17388c2ecf20Sopenharmony_ci	 * valid when we release the EX. */
17398c2ecf20Sopenharmony_ci
17408c2ecf20Sopenharmony_ci	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
17418c2ecf20Sopenharmony_ci	if (ret) {
17428c2ecf20Sopenharmony_ci		mlog_errno(ret);
17438c2ecf20Sopenharmony_ci		goto bail;
17448c2ecf20Sopenharmony_ci	}
17458c2ecf20Sopenharmony_ci
17468c2ecf20Sopenharmony_ci	/*
17478c2ecf20Sopenharmony_ci	 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
17488c2ecf20Sopenharmony_ci	 * don't use a generation in their lock names.
17498c2ecf20Sopenharmony_ci	 */
17508c2ecf20Sopenharmony_ci	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
17518c2ecf20Sopenharmony_ci	if (ret) {
17528c2ecf20Sopenharmony_ci		mlog_errno(ret);
17538c2ecf20Sopenharmony_ci		goto bail;
17548c2ecf20Sopenharmony_ci	}
17558c2ecf20Sopenharmony_ci
17568c2ecf20Sopenharmony_ci	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
17578c2ecf20Sopenharmony_ci	if (ret)
17588c2ecf20Sopenharmony_ci		mlog_errno(ret);
17598c2ecf20Sopenharmony_ci
17608c2ecf20Sopenharmony_cibail:
17618c2ecf20Sopenharmony_ci	return ret;
17628c2ecf20Sopenharmony_ci}
17638c2ecf20Sopenharmony_ci
17648c2ecf20Sopenharmony_ciint ocfs2_rw_lock(struct inode *inode, int write)
17658c2ecf20Sopenharmony_ci{
17668c2ecf20Sopenharmony_ci	int status, level;
17678c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres;
17688c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
17698c2ecf20Sopenharmony_ci
17708c2ecf20Sopenharmony_ci	mlog(0, "inode %llu take %s RW lock\n",
17718c2ecf20Sopenharmony_ci	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
17728c2ecf20Sopenharmony_ci	     write ? "EXMODE" : "PRMODE");
17738c2ecf20Sopenharmony_ci
17748c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
17758c2ecf20Sopenharmony_ci		return 0;
17768c2ecf20Sopenharmony_ci
17778c2ecf20Sopenharmony_ci	lockres = &OCFS2_I(inode)->ip_rw_lockres;
17788c2ecf20Sopenharmony_ci
17798c2ecf20Sopenharmony_ci	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
17808c2ecf20Sopenharmony_ci
17818c2ecf20Sopenharmony_ci	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
17828c2ecf20Sopenharmony_ci	if (status < 0)
17838c2ecf20Sopenharmony_ci		mlog_errno(status);
17848c2ecf20Sopenharmony_ci
17858c2ecf20Sopenharmony_ci	return status;
17868c2ecf20Sopenharmony_ci}
17878c2ecf20Sopenharmony_ci
17888c2ecf20Sopenharmony_ciint ocfs2_try_rw_lock(struct inode *inode, int write)
17898c2ecf20Sopenharmony_ci{
17908c2ecf20Sopenharmony_ci	int status, level;
17918c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres;
17928c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
17938c2ecf20Sopenharmony_ci
17948c2ecf20Sopenharmony_ci	mlog(0, "inode %llu try to take %s RW lock\n",
17958c2ecf20Sopenharmony_ci	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
17968c2ecf20Sopenharmony_ci	     write ? "EXMODE" : "PRMODE");
17978c2ecf20Sopenharmony_ci
17988c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
17998c2ecf20Sopenharmony_ci		return 0;
18008c2ecf20Sopenharmony_ci
18018c2ecf20Sopenharmony_ci	lockres = &OCFS2_I(inode)->ip_rw_lockres;
18028c2ecf20Sopenharmony_ci
18038c2ecf20Sopenharmony_ci	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
18048c2ecf20Sopenharmony_ci
18058c2ecf20Sopenharmony_ci	status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0);
18068c2ecf20Sopenharmony_ci	return status;
18078c2ecf20Sopenharmony_ci}
18088c2ecf20Sopenharmony_ci
18098c2ecf20Sopenharmony_civoid ocfs2_rw_unlock(struct inode *inode, int write)
18108c2ecf20Sopenharmony_ci{
18118c2ecf20Sopenharmony_ci	int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
18128c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
18138c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
18148c2ecf20Sopenharmony_ci
18158c2ecf20Sopenharmony_ci	mlog(0, "inode %llu drop %s RW lock\n",
18168c2ecf20Sopenharmony_ci	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
18178c2ecf20Sopenharmony_ci	     write ? "EXMODE" : "PRMODE");
18188c2ecf20Sopenharmony_ci
18198c2ecf20Sopenharmony_ci	if (!ocfs2_mount_local(osb))
18208c2ecf20Sopenharmony_ci		ocfs2_cluster_unlock(osb, lockres, level);
18218c2ecf20Sopenharmony_ci}
18228c2ecf20Sopenharmony_ci
18238c2ecf20Sopenharmony_ci/*
18248c2ecf20Sopenharmony_ci * ocfs2_open_lock always get PR mode lock.
18258c2ecf20Sopenharmony_ci */
18268c2ecf20Sopenharmony_ciint ocfs2_open_lock(struct inode *inode)
18278c2ecf20Sopenharmony_ci{
18288c2ecf20Sopenharmony_ci	int status = 0;
18298c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres;
18308c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
18318c2ecf20Sopenharmony_ci
18328c2ecf20Sopenharmony_ci	mlog(0, "inode %llu take PRMODE open lock\n",
18338c2ecf20Sopenharmony_ci	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
18348c2ecf20Sopenharmony_ci
18358c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
18368c2ecf20Sopenharmony_ci		goto out;
18378c2ecf20Sopenharmony_ci
18388c2ecf20Sopenharmony_ci	lockres = &OCFS2_I(inode)->ip_open_lockres;
18398c2ecf20Sopenharmony_ci
18408c2ecf20Sopenharmony_ci	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_PR, 0, 0);
18418c2ecf20Sopenharmony_ci	if (status < 0)
18428c2ecf20Sopenharmony_ci		mlog_errno(status);
18438c2ecf20Sopenharmony_ci
18448c2ecf20Sopenharmony_ciout:
18458c2ecf20Sopenharmony_ci	return status;
18468c2ecf20Sopenharmony_ci}
18478c2ecf20Sopenharmony_ci
18488c2ecf20Sopenharmony_ciint ocfs2_try_open_lock(struct inode *inode, int write)
18498c2ecf20Sopenharmony_ci{
18508c2ecf20Sopenharmony_ci	int status = 0, level;
18518c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres;
18528c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
18538c2ecf20Sopenharmony_ci
18548c2ecf20Sopenharmony_ci	mlog(0, "inode %llu try to take %s open lock\n",
18558c2ecf20Sopenharmony_ci	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
18568c2ecf20Sopenharmony_ci	     write ? "EXMODE" : "PRMODE");
18578c2ecf20Sopenharmony_ci
18588c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb)) {
18598c2ecf20Sopenharmony_ci		if (write)
18608c2ecf20Sopenharmony_ci			status = -EROFS;
18618c2ecf20Sopenharmony_ci		goto out;
18628c2ecf20Sopenharmony_ci	}
18638c2ecf20Sopenharmony_ci
18648c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
18658c2ecf20Sopenharmony_ci		goto out;
18668c2ecf20Sopenharmony_ci
18678c2ecf20Sopenharmony_ci	lockres = &OCFS2_I(inode)->ip_open_lockres;
18688c2ecf20Sopenharmony_ci
18698c2ecf20Sopenharmony_ci	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
18708c2ecf20Sopenharmony_ci
18718c2ecf20Sopenharmony_ci	/*
18728c2ecf20Sopenharmony_ci	 * The file system may already holding a PRMODE/EXMODE open lock.
18738c2ecf20Sopenharmony_ci	 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
18748c2ecf20Sopenharmony_ci	 * other nodes and the -EAGAIN will indicate to the caller that
18758c2ecf20Sopenharmony_ci	 * this inode is still in use.
18768c2ecf20Sopenharmony_ci	 */
18778c2ecf20Sopenharmony_ci	status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0);
18788c2ecf20Sopenharmony_ci
18798c2ecf20Sopenharmony_ciout:
18808c2ecf20Sopenharmony_ci	return status;
18818c2ecf20Sopenharmony_ci}
18828c2ecf20Sopenharmony_ci
18838c2ecf20Sopenharmony_ci/*
18848c2ecf20Sopenharmony_ci * ocfs2_open_unlock unlock PR and EX mode open locks.
18858c2ecf20Sopenharmony_ci */
18868c2ecf20Sopenharmony_civoid ocfs2_open_unlock(struct inode *inode)
18878c2ecf20Sopenharmony_ci{
18888c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
18898c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
18908c2ecf20Sopenharmony_ci
18918c2ecf20Sopenharmony_ci	mlog(0, "inode %llu drop open lock\n",
18928c2ecf20Sopenharmony_ci	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
18938c2ecf20Sopenharmony_ci
18948c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
18958c2ecf20Sopenharmony_ci		goto out;
18968c2ecf20Sopenharmony_ci
18978c2ecf20Sopenharmony_ci	if(lockres->l_ro_holders)
18988c2ecf20Sopenharmony_ci		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_PR);
18998c2ecf20Sopenharmony_ci	if(lockres->l_ex_holders)
19008c2ecf20Sopenharmony_ci		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
19018c2ecf20Sopenharmony_ci
19028c2ecf20Sopenharmony_ciout:
19038c2ecf20Sopenharmony_ci	return;
19048c2ecf20Sopenharmony_ci}
19058c2ecf20Sopenharmony_ci
19068c2ecf20Sopenharmony_cistatic int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
19078c2ecf20Sopenharmony_ci				     int level)
19088c2ecf20Sopenharmony_ci{
19098c2ecf20Sopenharmony_ci	int ret;
19108c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
19118c2ecf20Sopenharmony_ci	unsigned long flags;
19128c2ecf20Sopenharmony_ci	struct ocfs2_mask_waiter mw;
19138c2ecf20Sopenharmony_ci
19148c2ecf20Sopenharmony_ci	ocfs2_init_mask_waiter(&mw);
19158c2ecf20Sopenharmony_ci
19168c2ecf20Sopenharmony_ciretry_cancel:
19178c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
19188c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
19198c2ecf20Sopenharmony_ci		ret = ocfs2_prepare_cancel_convert(osb, lockres);
19208c2ecf20Sopenharmony_ci		if (ret) {
19218c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&lockres->l_lock, flags);
19228c2ecf20Sopenharmony_ci			ret = ocfs2_cancel_convert(osb, lockres);
19238c2ecf20Sopenharmony_ci			if (ret < 0) {
19248c2ecf20Sopenharmony_ci				mlog_errno(ret);
19258c2ecf20Sopenharmony_ci				goto out;
19268c2ecf20Sopenharmony_ci			}
19278c2ecf20Sopenharmony_ci			goto retry_cancel;
19288c2ecf20Sopenharmony_ci		}
19298c2ecf20Sopenharmony_ci		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
19308c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
19318c2ecf20Sopenharmony_ci
19328c2ecf20Sopenharmony_ci		ocfs2_wait_for_mask(&mw);
19338c2ecf20Sopenharmony_ci		goto retry_cancel;
19348c2ecf20Sopenharmony_ci	}
19358c2ecf20Sopenharmony_ci
19368c2ecf20Sopenharmony_ci	ret = -ERESTARTSYS;
19378c2ecf20Sopenharmony_ci	/*
19388c2ecf20Sopenharmony_ci	 * We may still have gotten the lock, in which case there's no
19398c2ecf20Sopenharmony_ci	 * point to restarting the syscall.
19408c2ecf20Sopenharmony_ci	 */
19418c2ecf20Sopenharmony_ci	if (lockres->l_level == level)
19428c2ecf20Sopenharmony_ci		ret = 0;
19438c2ecf20Sopenharmony_ci
19448c2ecf20Sopenharmony_ci	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
19458c2ecf20Sopenharmony_ci	     lockres->l_flags, lockres->l_level, lockres->l_action);
19468c2ecf20Sopenharmony_ci
19478c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
19488c2ecf20Sopenharmony_ci
19498c2ecf20Sopenharmony_ciout:
19508c2ecf20Sopenharmony_ci	return ret;
19518c2ecf20Sopenharmony_ci}
19528c2ecf20Sopenharmony_ci
19538c2ecf20Sopenharmony_ci/*
19548c2ecf20Sopenharmony_ci * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
19558c2ecf20Sopenharmony_ci * flock() calls. The locking approach this requires is sufficiently
19568c2ecf20Sopenharmony_ci * different from all other cluster lock types that we implement a
19578c2ecf20Sopenharmony_ci * separate path to the "low-level" dlm calls. In particular:
19588c2ecf20Sopenharmony_ci *
19598c2ecf20Sopenharmony_ci * - No optimization of lock levels is done - we take at exactly
19608c2ecf20Sopenharmony_ci *   what's been requested.
19618c2ecf20Sopenharmony_ci *
19628c2ecf20Sopenharmony_ci * - No lock caching is employed. We immediately downconvert to
19638c2ecf20Sopenharmony_ci *   no-lock at unlock time. This also means flock locks never go on
19648c2ecf20Sopenharmony_ci *   the blocking list).
19658c2ecf20Sopenharmony_ci *
19668c2ecf20Sopenharmony_ci * - Since userspace can trivially deadlock itself with flock, we make
19678c2ecf20Sopenharmony_ci *   sure to allow cancellation of a misbehaving applications flock()
19688c2ecf20Sopenharmony_ci *   request.
19698c2ecf20Sopenharmony_ci *
19708c2ecf20Sopenharmony_ci * - Access to any flock lockres doesn't require concurrency, so we
19718c2ecf20Sopenharmony_ci *   can simplify the code by requiring the caller to guarantee
19728c2ecf20Sopenharmony_ci *   serialization of dlmglue flock calls.
19738c2ecf20Sopenharmony_ci */
19748c2ecf20Sopenharmony_ciint ocfs2_file_lock(struct file *file, int ex, int trylock)
19758c2ecf20Sopenharmony_ci{
19768c2ecf20Sopenharmony_ci	int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
19778c2ecf20Sopenharmony_ci	unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
19788c2ecf20Sopenharmony_ci	unsigned long flags;
19798c2ecf20Sopenharmony_ci	struct ocfs2_file_private *fp = file->private_data;
19808c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &fp->fp_flock;
19818c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
19828c2ecf20Sopenharmony_ci	struct ocfs2_mask_waiter mw;
19838c2ecf20Sopenharmony_ci
19848c2ecf20Sopenharmony_ci	ocfs2_init_mask_waiter(&mw);
19858c2ecf20Sopenharmony_ci
19868c2ecf20Sopenharmony_ci	if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
19878c2ecf20Sopenharmony_ci	    (lockres->l_level > DLM_LOCK_NL)) {
19888c2ecf20Sopenharmony_ci		mlog(ML_ERROR,
19898c2ecf20Sopenharmony_ci		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
19908c2ecf20Sopenharmony_ci		     "level: %u\n", lockres->l_name, lockres->l_flags,
19918c2ecf20Sopenharmony_ci		     lockres->l_level);
19928c2ecf20Sopenharmony_ci		return -EINVAL;
19938c2ecf20Sopenharmony_ci	}
19948c2ecf20Sopenharmony_ci
19958c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
19968c2ecf20Sopenharmony_ci	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
19978c2ecf20Sopenharmony_ci		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
19988c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
19998c2ecf20Sopenharmony_ci
20008c2ecf20Sopenharmony_ci		/*
20018c2ecf20Sopenharmony_ci		 * Get the lock at NLMODE to start - that way we
20028c2ecf20Sopenharmony_ci		 * can cancel the upconvert request if need be.
20038c2ecf20Sopenharmony_ci		 */
20048c2ecf20Sopenharmony_ci		ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
20058c2ecf20Sopenharmony_ci		if (ret < 0) {
20068c2ecf20Sopenharmony_ci			mlog_errno(ret);
20078c2ecf20Sopenharmony_ci			goto out;
20088c2ecf20Sopenharmony_ci		}
20098c2ecf20Sopenharmony_ci
20108c2ecf20Sopenharmony_ci		ret = ocfs2_wait_for_mask(&mw);
20118c2ecf20Sopenharmony_ci		if (ret) {
20128c2ecf20Sopenharmony_ci			mlog_errno(ret);
20138c2ecf20Sopenharmony_ci			goto out;
20148c2ecf20Sopenharmony_ci		}
20158c2ecf20Sopenharmony_ci		spin_lock_irqsave(&lockres->l_lock, flags);
20168c2ecf20Sopenharmony_ci	}
20178c2ecf20Sopenharmony_ci
20188c2ecf20Sopenharmony_ci	lockres->l_action = OCFS2_AST_CONVERT;
20198c2ecf20Sopenharmony_ci	lkm_flags |= DLM_LKF_CONVERT;
20208c2ecf20Sopenharmony_ci	lockres->l_requested = level;
20218c2ecf20Sopenharmony_ci	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
20228c2ecf20Sopenharmony_ci
20238c2ecf20Sopenharmony_ci	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
20248c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
20258c2ecf20Sopenharmony_ci
20268c2ecf20Sopenharmony_ci	ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
20278c2ecf20Sopenharmony_ci			     lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1);
20288c2ecf20Sopenharmony_ci	if (ret) {
20298c2ecf20Sopenharmony_ci		if (!trylock || (ret != -EAGAIN)) {
20308c2ecf20Sopenharmony_ci			ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
20318c2ecf20Sopenharmony_ci			ret = -EINVAL;
20328c2ecf20Sopenharmony_ci		}
20338c2ecf20Sopenharmony_ci
20348c2ecf20Sopenharmony_ci		ocfs2_recover_from_dlm_error(lockres, 1);
20358c2ecf20Sopenharmony_ci		lockres_remove_mask_waiter(lockres, &mw);
20368c2ecf20Sopenharmony_ci		goto out;
20378c2ecf20Sopenharmony_ci	}
20388c2ecf20Sopenharmony_ci
20398c2ecf20Sopenharmony_ci	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
20408c2ecf20Sopenharmony_ci	if (ret == -ERESTARTSYS) {
20418c2ecf20Sopenharmony_ci		/*
20428c2ecf20Sopenharmony_ci		 * Userspace can cause deadlock itself with
20438c2ecf20Sopenharmony_ci		 * flock(). Current behavior locally is to allow the
20448c2ecf20Sopenharmony_ci		 * deadlock, but abort the system call if a signal is
20458c2ecf20Sopenharmony_ci		 * received. We follow this example, otherwise a
20468c2ecf20Sopenharmony_ci		 * poorly written program could sit in kernel until
20478c2ecf20Sopenharmony_ci		 * reboot.
20488c2ecf20Sopenharmony_ci		 *
20498c2ecf20Sopenharmony_ci		 * Handling this is a bit more complicated for Ocfs2
20508c2ecf20Sopenharmony_ci		 * though. We can't exit this function with an
20518c2ecf20Sopenharmony_ci		 * outstanding lock request, so a cancel convert is
20528c2ecf20Sopenharmony_ci		 * required. We intentionally overwrite 'ret' - if the
20538c2ecf20Sopenharmony_ci		 * cancel fails and the lock was granted, it's easier
20548c2ecf20Sopenharmony_ci		 * to just bubble success back up to the user.
20558c2ecf20Sopenharmony_ci		 */
20568c2ecf20Sopenharmony_ci		ret = ocfs2_flock_handle_signal(lockres, level);
20578c2ecf20Sopenharmony_ci	} else if (!ret && (level > lockres->l_level)) {
20588c2ecf20Sopenharmony_ci		/* Trylock failed asynchronously */
20598c2ecf20Sopenharmony_ci		BUG_ON(!trylock);
20608c2ecf20Sopenharmony_ci		ret = -EAGAIN;
20618c2ecf20Sopenharmony_ci	}
20628c2ecf20Sopenharmony_ci
20638c2ecf20Sopenharmony_ciout:
20648c2ecf20Sopenharmony_ci
20658c2ecf20Sopenharmony_ci	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
20668c2ecf20Sopenharmony_ci	     lockres->l_name, ex, trylock, ret);
20678c2ecf20Sopenharmony_ci	return ret;
20688c2ecf20Sopenharmony_ci}
20698c2ecf20Sopenharmony_ci
20708c2ecf20Sopenharmony_civoid ocfs2_file_unlock(struct file *file)
20718c2ecf20Sopenharmony_ci{
20728c2ecf20Sopenharmony_ci	int ret;
20738c2ecf20Sopenharmony_ci	unsigned int gen;
20748c2ecf20Sopenharmony_ci	unsigned long flags;
20758c2ecf20Sopenharmony_ci	struct ocfs2_file_private *fp = file->private_data;
20768c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &fp->fp_flock;
20778c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
20788c2ecf20Sopenharmony_ci	struct ocfs2_mask_waiter mw;
20798c2ecf20Sopenharmony_ci
20808c2ecf20Sopenharmony_ci	ocfs2_init_mask_waiter(&mw);
20818c2ecf20Sopenharmony_ci
20828c2ecf20Sopenharmony_ci	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
20838c2ecf20Sopenharmony_ci		return;
20848c2ecf20Sopenharmony_ci
20858c2ecf20Sopenharmony_ci	if (lockres->l_level == DLM_LOCK_NL)
20868c2ecf20Sopenharmony_ci		return;
20878c2ecf20Sopenharmony_ci
20888c2ecf20Sopenharmony_ci	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
20898c2ecf20Sopenharmony_ci	     lockres->l_name, lockres->l_flags, lockres->l_level,
20908c2ecf20Sopenharmony_ci	     lockres->l_action);
20918c2ecf20Sopenharmony_ci
20928c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
20938c2ecf20Sopenharmony_ci	/*
20948c2ecf20Sopenharmony_ci	 * Fake a blocking ast for the downconvert code.
20958c2ecf20Sopenharmony_ci	 */
20968c2ecf20Sopenharmony_ci	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
20978c2ecf20Sopenharmony_ci	lockres->l_blocking = DLM_LOCK_EX;
20988c2ecf20Sopenharmony_ci
20998c2ecf20Sopenharmony_ci	gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
21008c2ecf20Sopenharmony_ci	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
21018c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
21028c2ecf20Sopenharmony_ci
21038c2ecf20Sopenharmony_ci	ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
21048c2ecf20Sopenharmony_ci	if (ret) {
21058c2ecf20Sopenharmony_ci		mlog_errno(ret);
21068c2ecf20Sopenharmony_ci		return;
21078c2ecf20Sopenharmony_ci	}
21088c2ecf20Sopenharmony_ci
21098c2ecf20Sopenharmony_ci	ret = ocfs2_wait_for_mask(&mw);
21108c2ecf20Sopenharmony_ci	if (ret)
21118c2ecf20Sopenharmony_ci		mlog_errno(ret);
21128c2ecf20Sopenharmony_ci}
21138c2ecf20Sopenharmony_ci
21148c2ecf20Sopenharmony_cistatic void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
21158c2ecf20Sopenharmony_ci					struct ocfs2_lock_res *lockres)
21168c2ecf20Sopenharmony_ci{
21178c2ecf20Sopenharmony_ci	int kick = 0;
21188c2ecf20Sopenharmony_ci
21198c2ecf20Sopenharmony_ci	/* If we know that another node is waiting on our lock, kick
21208c2ecf20Sopenharmony_ci	 * the downconvert thread * pre-emptively when we reach a release
21218c2ecf20Sopenharmony_ci	 * condition. */
21228c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
21238c2ecf20Sopenharmony_ci		switch(lockres->l_blocking) {
21248c2ecf20Sopenharmony_ci		case DLM_LOCK_EX:
21258c2ecf20Sopenharmony_ci			if (!lockres->l_ex_holders && !lockres->l_ro_holders)
21268c2ecf20Sopenharmony_ci				kick = 1;
21278c2ecf20Sopenharmony_ci			break;
21288c2ecf20Sopenharmony_ci		case DLM_LOCK_PR:
21298c2ecf20Sopenharmony_ci			if (!lockres->l_ex_holders)
21308c2ecf20Sopenharmony_ci				kick = 1;
21318c2ecf20Sopenharmony_ci			break;
21328c2ecf20Sopenharmony_ci		default:
21338c2ecf20Sopenharmony_ci			BUG();
21348c2ecf20Sopenharmony_ci		}
21358c2ecf20Sopenharmony_ci	}
21368c2ecf20Sopenharmony_ci
21378c2ecf20Sopenharmony_ci	if (kick)
21388c2ecf20Sopenharmony_ci		ocfs2_wake_downconvert_thread(osb);
21398c2ecf20Sopenharmony_ci}
21408c2ecf20Sopenharmony_ci
21418c2ecf20Sopenharmony_ci#define OCFS2_SEC_BITS   34
21428c2ecf20Sopenharmony_ci#define OCFS2_SEC_SHIFT  (64 - OCFS2_SEC_BITS)
21438c2ecf20Sopenharmony_ci#define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)
21448c2ecf20Sopenharmony_ci
21458c2ecf20Sopenharmony_ci/* LVB only has room for 64 bits of time here so we pack it for
21468c2ecf20Sopenharmony_ci * now. */
21478c2ecf20Sopenharmony_cistatic u64 ocfs2_pack_timespec(struct timespec64 *spec)
21488c2ecf20Sopenharmony_ci{
21498c2ecf20Sopenharmony_ci	u64 res;
21508c2ecf20Sopenharmony_ci	u64 sec = clamp_t(time64_t, spec->tv_sec, 0, 0x3ffffffffull);
21518c2ecf20Sopenharmony_ci	u32 nsec = spec->tv_nsec;
21528c2ecf20Sopenharmony_ci
21538c2ecf20Sopenharmony_ci	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
21548c2ecf20Sopenharmony_ci
21558c2ecf20Sopenharmony_ci	return res;
21568c2ecf20Sopenharmony_ci}
21578c2ecf20Sopenharmony_ci
21588c2ecf20Sopenharmony_ci/* Call this with the lockres locked. I am reasonably sure we don't
21598c2ecf20Sopenharmony_ci * need ip_lock in this function as anyone who would be changing those
21608c2ecf20Sopenharmony_ci * values is supposed to be blocked in ocfs2_inode_lock right now. */
21618c2ecf20Sopenharmony_cistatic void __ocfs2_stuff_meta_lvb(struct inode *inode)
21628c2ecf20Sopenharmony_ci{
21638c2ecf20Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
21648c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
21658c2ecf20Sopenharmony_ci	struct ocfs2_meta_lvb *lvb;
21668c2ecf20Sopenharmony_ci
21678c2ecf20Sopenharmony_ci	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
21688c2ecf20Sopenharmony_ci
21698c2ecf20Sopenharmony_ci	/*
21708c2ecf20Sopenharmony_ci	 * Invalidate the LVB of a deleted inode - this way other
21718c2ecf20Sopenharmony_ci	 * nodes are forced to go to disk and discover the new inode
21728c2ecf20Sopenharmony_ci	 * status.
21738c2ecf20Sopenharmony_ci	 */
21748c2ecf20Sopenharmony_ci	if (oi->ip_flags & OCFS2_INODE_DELETED) {
21758c2ecf20Sopenharmony_ci		lvb->lvb_version = 0;
21768c2ecf20Sopenharmony_ci		goto out;
21778c2ecf20Sopenharmony_ci	}
21788c2ecf20Sopenharmony_ci
21798c2ecf20Sopenharmony_ci	lvb->lvb_version   = OCFS2_LVB_VERSION;
21808c2ecf20Sopenharmony_ci	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));
21818c2ecf20Sopenharmony_ci	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
21828c2ecf20Sopenharmony_ci	lvb->lvb_iuid      = cpu_to_be32(i_uid_read(inode));
21838c2ecf20Sopenharmony_ci	lvb->lvb_igid      = cpu_to_be32(i_gid_read(inode));
21848c2ecf20Sopenharmony_ci	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
21858c2ecf20Sopenharmony_ci	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
21868c2ecf20Sopenharmony_ci	lvb->lvb_iatime_packed  =
21878c2ecf20Sopenharmony_ci		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
21888c2ecf20Sopenharmony_ci	lvb->lvb_ictime_packed =
21898c2ecf20Sopenharmony_ci		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
21908c2ecf20Sopenharmony_ci	lvb->lvb_imtime_packed =
21918c2ecf20Sopenharmony_ci		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
21928c2ecf20Sopenharmony_ci	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
21938c2ecf20Sopenharmony_ci	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
21948c2ecf20Sopenharmony_ci	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
21958c2ecf20Sopenharmony_ci
21968c2ecf20Sopenharmony_ciout:
21978c2ecf20Sopenharmony_ci	mlog_meta_lvb(0, lockres);
21988c2ecf20Sopenharmony_ci}
21998c2ecf20Sopenharmony_ci
22008c2ecf20Sopenharmony_cistatic void ocfs2_unpack_timespec(struct timespec64 *spec,
22018c2ecf20Sopenharmony_ci				  u64 packed_time)
22028c2ecf20Sopenharmony_ci{
22038c2ecf20Sopenharmony_ci	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
22048c2ecf20Sopenharmony_ci	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
22058c2ecf20Sopenharmony_ci}
22068c2ecf20Sopenharmony_ci
22078c2ecf20Sopenharmony_cistatic void ocfs2_refresh_inode_from_lvb(struct inode *inode)
22088c2ecf20Sopenharmony_ci{
22098c2ecf20Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
22108c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
22118c2ecf20Sopenharmony_ci	struct ocfs2_meta_lvb *lvb;
22128c2ecf20Sopenharmony_ci
22138c2ecf20Sopenharmony_ci	mlog_meta_lvb(0, lockres);
22148c2ecf20Sopenharmony_ci
22158c2ecf20Sopenharmony_ci	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
22168c2ecf20Sopenharmony_ci
22178c2ecf20Sopenharmony_ci	/* We're safe here without the lockres lock... */
22188c2ecf20Sopenharmony_ci	spin_lock(&oi->ip_lock);
22198c2ecf20Sopenharmony_ci	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
22208c2ecf20Sopenharmony_ci	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
22218c2ecf20Sopenharmony_ci
22228c2ecf20Sopenharmony_ci	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
22238c2ecf20Sopenharmony_ci	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
22248c2ecf20Sopenharmony_ci	ocfs2_set_inode_flags(inode);
22258c2ecf20Sopenharmony_ci
22268c2ecf20Sopenharmony_ci	/* fast-symlinks are a special case */
22278c2ecf20Sopenharmony_ci	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
22288c2ecf20Sopenharmony_ci		inode->i_blocks = 0;
22298c2ecf20Sopenharmony_ci	else
22308c2ecf20Sopenharmony_ci		inode->i_blocks = ocfs2_inode_sector_count(inode);
22318c2ecf20Sopenharmony_ci
22328c2ecf20Sopenharmony_ci	i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
22338c2ecf20Sopenharmony_ci	i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
22348c2ecf20Sopenharmony_ci	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
22358c2ecf20Sopenharmony_ci	set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
22368c2ecf20Sopenharmony_ci	ocfs2_unpack_timespec(&inode->i_atime,
22378c2ecf20Sopenharmony_ci			      be64_to_cpu(lvb->lvb_iatime_packed));
22388c2ecf20Sopenharmony_ci	ocfs2_unpack_timespec(&inode->i_mtime,
22398c2ecf20Sopenharmony_ci			      be64_to_cpu(lvb->lvb_imtime_packed));
22408c2ecf20Sopenharmony_ci	ocfs2_unpack_timespec(&inode->i_ctime,
22418c2ecf20Sopenharmony_ci			      be64_to_cpu(lvb->lvb_ictime_packed));
22428c2ecf20Sopenharmony_ci	spin_unlock(&oi->ip_lock);
22438c2ecf20Sopenharmony_ci}
22448c2ecf20Sopenharmony_ci
22458c2ecf20Sopenharmony_cistatic inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
22468c2ecf20Sopenharmony_ci					      struct ocfs2_lock_res *lockres)
22478c2ecf20Sopenharmony_ci{
22488c2ecf20Sopenharmony_ci	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
22498c2ecf20Sopenharmony_ci
22508c2ecf20Sopenharmony_ci	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
22518c2ecf20Sopenharmony_ci	    && lvb->lvb_version == OCFS2_LVB_VERSION
22528c2ecf20Sopenharmony_ci	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
22538c2ecf20Sopenharmony_ci		return 1;
22548c2ecf20Sopenharmony_ci	return 0;
22558c2ecf20Sopenharmony_ci}
22568c2ecf20Sopenharmony_ci
22578c2ecf20Sopenharmony_ci/* Determine whether a lock resource needs to be refreshed, and
22588c2ecf20Sopenharmony_ci * arbitrate who gets to refresh it.
22598c2ecf20Sopenharmony_ci *
22608c2ecf20Sopenharmony_ci *   0 means no refresh needed.
22618c2ecf20Sopenharmony_ci *
22628c2ecf20Sopenharmony_ci *   > 0 means you need to refresh this and you MUST call
22638c2ecf20Sopenharmony_ci *   ocfs2_complete_lock_res_refresh afterwards. */
22648c2ecf20Sopenharmony_cistatic int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
22658c2ecf20Sopenharmony_ci{
22668c2ecf20Sopenharmony_ci	unsigned long flags;
22678c2ecf20Sopenharmony_ci	int status = 0;
22688c2ecf20Sopenharmony_ci
22698c2ecf20Sopenharmony_cirefresh_check:
22708c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
22718c2ecf20Sopenharmony_ci	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
22728c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
22738c2ecf20Sopenharmony_ci		goto bail;
22748c2ecf20Sopenharmony_ci	}
22758c2ecf20Sopenharmony_ci
22768c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
22778c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
22788c2ecf20Sopenharmony_ci
22798c2ecf20Sopenharmony_ci		ocfs2_wait_on_refreshing_lock(lockres);
22808c2ecf20Sopenharmony_ci		goto refresh_check;
22818c2ecf20Sopenharmony_ci	}
22828c2ecf20Sopenharmony_ci
22838c2ecf20Sopenharmony_ci	/* Ok, I'll be the one to refresh this lock. */
22848c2ecf20Sopenharmony_ci	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
22858c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
22868c2ecf20Sopenharmony_ci
22878c2ecf20Sopenharmony_ci	status = 1;
22888c2ecf20Sopenharmony_cibail:
22898c2ecf20Sopenharmony_ci	mlog(0, "status %d\n", status);
22908c2ecf20Sopenharmony_ci	return status;
22918c2ecf20Sopenharmony_ci}
22928c2ecf20Sopenharmony_ci
22938c2ecf20Sopenharmony_ci/* If status is non zero, I'll mark it as not being in refresh
22948c2ecf20Sopenharmony_ci * anymroe, but i won't clear the needs refresh flag. */
22958c2ecf20Sopenharmony_cistatic inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
22968c2ecf20Sopenharmony_ci						   int status)
22978c2ecf20Sopenharmony_ci{
22988c2ecf20Sopenharmony_ci	unsigned long flags;
22998c2ecf20Sopenharmony_ci
23008c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
23018c2ecf20Sopenharmony_ci	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
23028c2ecf20Sopenharmony_ci	if (!status)
23038c2ecf20Sopenharmony_ci		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
23048c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
23058c2ecf20Sopenharmony_ci
23068c2ecf20Sopenharmony_ci	wake_up(&lockres->l_event);
23078c2ecf20Sopenharmony_ci}
23088c2ecf20Sopenharmony_ci
23098c2ecf20Sopenharmony_ci/* may or may not return a bh if it went to disk. */
23108c2ecf20Sopenharmony_cistatic int ocfs2_inode_lock_update(struct inode *inode,
23118c2ecf20Sopenharmony_ci				  struct buffer_head **bh)
23128c2ecf20Sopenharmony_ci{
23138c2ecf20Sopenharmony_ci	int status = 0;
23148c2ecf20Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
23158c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
23168c2ecf20Sopenharmony_ci	struct ocfs2_dinode *fe;
23178c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
23188c2ecf20Sopenharmony_ci
23198c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
23208c2ecf20Sopenharmony_ci		goto bail;
23218c2ecf20Sopenharmony_ci
23228c2ecf20Sopenharmony_ci	spin_lock(&oi->ip_lock);
23238c2ecf20Sopenharmony_ci	if (oi->ip_flags & OCFS2_INODE_DELETED) {
23248c2ecf20Sopenharmony_ci		mlog(0, "Orphaned inode %llu was deleted while we "
23258c2ecf20Sopenharmony_ci		     "were waiting on a lock. ip_flags = 0x%x\n",
23268c2ecf20Sopenharmony_ci		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
23278c2ecf20Sopenharmony_ci		spin_unlock(&oi->ip_lock);
23288c2ecf20Sopenharmony_ci		status = -ENOENT;
23298c2ecf20Sopenharmony_ci		goto bail;
23308c2ecf20Sopenharmony_ci	}
23318c2ecf20Sopenharmony_ci	spin_unlock(&oi->ip_lock);
23328c2ecf20Sopenharmony_ci
23338c2ecf20Sopenharmony_ci	if (!ocfs2_should_refresh_lock_res(lockres))
23348c2ecf20Sopenharmony_ci		goto bail;
23358c2ecf20Sopenharmony_ci
23368c2ecf20Sopenharmony_ci	/* This will discard any caching information we might have had
23378c2ecf20Sopenharmony_ci	 * for the inode metadata. */
23388c2ecf20Sopenharmony_ci	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
23398c2ecf20Sopenharmony_ci
23408c2ecf20Sopenharmony_ci	ocfs2_extent_map_trunc(inode, 0);
23418c2ecf20Sopenharmony_ci
23428c2ecf20Sopenharmony_ci	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
23438c2ecf20Sopenharmony_ci		mlog(0, "Trusting LVB on inode %llu\n",
23448c2ecf20Sopenharmony_ci		     (unsigned long long)oi->ip_blkno);
23458c2ecf20Sopenharmony_ci		ocfs2_refresh_inode_from_lvb(inode);
23468c2ecf20Sopenharmony_ci	} else {
23478c2ecf20Sopenharmony_ci		/* Boo, we have to go to disk. */
23488c2ecf20Sopenharmony_ci		/* read bh, cast, ocfs2_refresh_inode */
23498c2ecf20Sopenharmony_ci		status = ocfs2_read_inode_block(inode, bh);
23508c2ecf20Sopenharmony_ci		if (status < 0) {
23518c2ecf20Sopenharmony_ci			mlog_errno(status);
23528c2ecf20Sopenharmony_ci			goto bail_refresh;
23538c2ecf20Sopenharmony_ci		}
23548c2ecf20Sopenharmony_ci		fe = (struct ocfs2_dinode *) (*bh)->b_data;
23558c2ecf20Sopenharmony_ci
23568c2ecf20Sopenharmony_ci		/* This is a good chance to make sure we're not
23578c2ecf20Sopenharmony_ci		 * locking an invalid object.  ocfs2_read_inode_block()
23588c2ecf20Sopenharmony_ci		 * already checked that the inode block is sane.
23598c2ecf20Sopenharmony_ci		 *
23608c2ecf20Sopenharmony_ci		 * We bug on a stale inode here because we checked
23618c2ecf20Sopenharmony_ci		 * above whether it was wiped from disk. The wiping
23628c2ecf20Sopenharmony_ci		 * node provides a guarantee that we receive that
23638c2ecf20Sopenharmony_ci		 * message and can mark the inode before dropping any
23648c2ecf20Sopenharmony_ci		 * locks associated with it. */
23658c2ecf20Sopenharmony_ci		mlog_bug_on_msg(inode->i_generation !=
23668c2ecf20Sopenharmony_ci				le32_to_cpu(fe->i_generation),
23678c2ecf20Sopenharmony_ci				"Invalid dinode %llu disk generation: %u "
23688c2ecf20Sopenharmony_ci				"inode->i_generation: %u\n",
23698c2ecf20Sopenharmony_ci				(unsigned long long)oi->ip_blkno,
23708c2ecf20Sopenharmony_ci				le32_to_cpu(fe->i_generation),
23718c2ecf20Sopenharmony_ci				inode->i_generation);
23728c2ecf20Sopenharmony_ci		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
23738c2ecf20Sopenharmony_ci				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
23748c2ecf20Sopenharmony_ci				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
23758c2ecf20Sopenharmony_ci				(unsigned long long)oi->ip_blkno,
23768c2ecf20Sopenharmony_ci				(unsigned long long)le64_to_cpu(fe->i_dtime),
23778c2ecf20Sopenharmony_ci				le32_to_cpu(fe->i_flags));
23788c2ecf20Sopenharmony_ci
23798c2ecf20Sopenharmony_ci		ocfs2_refresh_inode(inode, fe);
23808c2ecf20Sopenharmony_ci		ocfs2_track_lock_refresh(lockres);
23818c2ecf20Sopenharmony_ci	}
23828c2ecf20Sopenharmony_ci
23838c2ecf20Sopenharmony_ci	status = 0;
23848c2ecf20Sopenharmony_cibail_refresh:
23858c2ecf20Sopenharmony_ci	ocfs2_complete_lock_res_refresh(lockres, status);
23868c2ecf20Sopenharmony_cibail:
23878c2ecf20Sopenharmony_ci	return status;
23888c2ecf20Sopenharmony_ci}
23898c2ecf20Sopenharmony_ci
23908c2ecf20Sopenharmony_cistatic int ocfs2_assign_bh(struct inode *inode,
23918c2ecf20Sopenharmony_ci			   struct buffer_head **ret_bh,
23928c2ecf20Sopenharmony_ci			   struct buffer_head *passed_bh)
23938c2ecf20Sopenharmony_ci{
23948c2ecf20Sopenharmony_ci	int status;
23958c2ecf20Sopenharmony_ci
23968c2ecf20Sopenharmony_ci	if (passed_bh) {
23978c2ecf20Sopenharmony_ci		/* Ok, the update went to disk for us, use the
23988c2ecf20Sopenharmony_ci		 * returned bh. */
23998c2ecf20Sopenharmony_ci		*ret_bh = passed_bh;
24008c2ecf20Sopenharmony_ci		get_bh(*ret_bh);
24018c2ecf20Sopenharmony_ci
24028c2ecf20Sopenharmony_ci		return 0;
24038c2ecf20Sopenharmony_ci	}
24048c2ecf20Sopenharmony_ci
24058c2ecf20Sopenharmony_ci	status = ocfs2_read_inode_block(inode, ret_bh);
24068c2ecf20Sopenharmony_ci	if (status < 0)
24078c2ecf20Sopenharmony_ci		mlog_errno(status);
24088c2ecf20Sopenharmony_ci
24098c2ecf20Sopenharmony_ci	return status;
24108c2ecf20Sopenharmony_ci}
24118c2ecf20Sopenharmony_ci
24128c2ecf20Sopenharmony_ci/*
24138c2ecf20Sopenharmony_ci * returns < 0 error if the callback will never be called, otherwise
24148c2ecf20Sopenharmony_ci * the result of the lock will be communicated via the callback.
24158c2ecf20Sopenharmony_ci */
24168c2ecf20Sopenharmony_ciint ocfs2_inode_lock_full_nested(struct inode *inode,
24178c2ecf20Sopenharmony_ci				 struct buffer_head **ret_bh,
24188c2ecf20Sopenharmony_ci				 int ex,
24198c2ecf20Sopenharmony_ci				 int arg_flags,
24208c2ecf20Sopenharmony_ci				 int subclass)
24218c2ecf20Sopenharmony_ci{
24228c2ecf20Sopenharmony_ci	int status, level, acquired;
24238c2ecf20Sopenharmony_ci	u32 dlm_flags;
24248c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = NULL;
24258c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
24268c2ecf20Sopenharmony_ci	struct buffer_head *local_bh = NULL;
24278c2ecf20Sopenharmony_ci
24288c2ecf20Sopenharmony_ci	mlog(0, "inode %llu, take %s META lock\n",
24298c2ecf20Sopenharmony_ci	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
24308c2ecf20Sopenharmony_ci	     ex ? "EXMODE" : "PRMODE");
24318c2ecf20Sopenharmony_ci
24328c2ecf20Sopenharmony_ci	status = 0;
24338c2ecf20Sopenharmony_ci	acquired = 0;
24348c2ecf20Sopenharmony_ci	/* We'll allow faking a readonly metadata lock for
24358c2ecf20Sopenharmony_ci	 * rodevices. */
24368c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb)) {
24378c2ecf20Sopenharmony_ci		if (ex)
24388c2ecf20Sopenharmony_ci			status = -EROFS;
24398c2ecf20Sopenharmony_ci		goto getbh;
24408c2ecf20Sopenharmony_ci	}
24418c2ecf20Sopenharmony_ci
24428c2ecf20Sopenharmony_ci	if ((arg_flags & OCFS2_META_LOCK_GETBH) ||
24438c2ecf20Sopenharmony_ci	    ocfs2_mount_local(osb))
24448c2ecf20Sopenharmony_ci		goto update;
24458c2ecf20Sopenharmony_ci
24468c2ecf20Sopenharmony_ci	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
24478c2ecf20Sopenharmony_ci		ocfs2_wait_for_recovery(osb);
24488c2ecf20Sopenharmony_ci
24498c2ecf20Sopenharmony_ci	lockres = &OCFS2_I(inode)->ip_inode_lockres;
24508c2ecf20Sopenharmony_ci	level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
24518c2ecf20Sopenharmony_ci	dlm_flags = 0;
24528c2ecf20Sopenharmony_ci	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
24538c2ecf20Sopenharmony_ci		dlm_flags |= DLM_LKF_NOQUEUE;
24548c2ecf20Sopenharmony_ci
24558c2ecf20Sopenharmony_ci	status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
24568c2ecf20Sopenharmony_ci				      arg_flags, subclass, _RET_IP_);
24578c2ecf20Sopenharmony_ci	if (status < 0) {
24588c2ecf20Sopenharmony_ci		if (status != -EAGAIN)
24598c2ecf20Sopenharmony_ci			mlog_errno(status);
24608c2ecf20Sopenharmony_ci		goto bail;
24618c2ecf20Sopenharmony_ci	}
24628c2ecf20Sopenharmony_ci
24638c2ecf20Sopenharmony_ci	/* Notify the error cleanup path to drop the cluster lock. */
24648c2ecf20Sopenharmony_ci	acquired = 1;
24658c2ecf20Sopenharmony_ci
24668c2ecf20Sopenharmony_ci	/* We wait twice because a node may have died while we were in
24678c2ecf20Sopenharmony_ci	 * the lower dlm layers. The second time though, we've
24688c2ecf20Sopenharmony_ci	 * committed to owning this lock so we don't allow signals to
24698c2ecf20Sopenharmony_ci	 * abort the operation. */
24708c2ecf20Sopenharmony_ci	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
24718c2ecf20Sopenharmony_ci		ocfs2_wait_for_recovery(osb);
24728c2ecf20Sopenharmony_ci
24738c2ecf20Sopenharmony_ciupdate:
24748c2ecf20Sopenharmony_ci	/*
24758c2ecf20Sopenharmony_ci	 * We only see this flag if we're being called from
24768c2ecf20Sopenharmony_ci	 * ocfs2_read_locked_inode(). It means we're locking an inode
24778c2ecf20Sopenharmony_ci	 * which hasn't been populated yet, so clear the refresh flag
24788c2ecf20Sopenharmony_ci	 * and let the caller handle it.
24798c2ecf20Sopenharmony_ci	 */
24808c2ecf20Sopenharmony_ci	if (inode->i_state & I_NEW) {
24818c2ecf20Sopenharmony_ci		status = 0;
24828c2ecf20Sopenharmony_ci		if (lockres)
24838c2ecf20Sopenharmony_ci			ocfs2_complete_lock_res_refresh(lockres, 0);
24848c2ecf20Sopenharmony_ci		goto bail;
24858c2ecf20Sopenharmony_ci	}
24868c2ecf20Sopenharmony_ci
24878c2ecf20Sopenharmony_ci	/* This is fun. The caller may want a bh back, or it may
24888c2ecf20Sopenharmony_ci	 * not. ocfs2_inode_lock_update definitely wants one in, but
24898c2ecf20Sopenharmony_ci	 * may or may not read one, depending on what's in the
24908c2ecf20Sopenharmony_ci	 * LVB. The result of all of this is that we've *only* gone to
24918c2ecf20Sopenharmony_ci	 * disk if we have to, so the complexity is worthwhile. */
24928c2ecf20Sopenharmony_ci	status = ocfs2_inode_lock_update(inode, &local_bh);
24938c2ecf20Sopenharmony_ci	if (status < 0) {
24948c2ecf20Sopenharmony_ci		if (status != -ENOENT)
24958c2ecf20Sopenharmony_ci			mlog_errno(status);
24968c2ecf20Sopenharmony_ci		goto bail;
24978c2ecf20Sopenharmony_ci	}
24988c2ecf20Sopenharmony_cigetbh:
24998c2ecf20Sopenharmony_ci	if (ret_bh) {
25008c2ecf20Sopenharmony_ci		status = ocfs2_assign_bh(inode, ret_bh, local_bh);
25018c2ecf20Sopenharmony_ci		if (status < 0) {
25028c2ecf20Sopenharmony_ci			mlog_errno(status);
25038c2ecf20Sopenharmony_ci			goto bail;
25048c2ecf20Sopenharmony_ci		}
25058c2ecf20Sopenharmony_ci	}
25068c2ecf20Sopenharmony_ci
25078c2ecf20Sopenharmony_cibail:
25088c2ecf20Sopenharmony_ci	if (status < 0) {
25098c2ecf20Sopenharmony_ci		if (ret_bh && (*ret_bh)) {
25108c2ecf20Sopenharmony_ci			brelse(*ret_bh);
25118c2ecf20Sopenharmony_ci			*ret_bh = NULL;
25128c2ecf20Sopenharmony_ci		}
25138c2ecf20Sopenharmony_ci		if (acquired)
25148c2ecf20Sopenharmony_ci			ocfs2_inode_unlock(inode, ex);
25158c2ecf20Sopenharmony_ci	}
25168c2ecf20Sopenharmony_ci
25178c2ecf20Sopenharmony_ci	brelse(local_bh);
25188c2ecf20Sopenharmony_ci	return status;
25198c2ecf20Sopenharmony_ci}
25208c2ecf20Sopenharmony_ci
25218c2ecf20Sopenharmony_ci/*
25228c2ecf20Sopenharmony_ci * This is working around a lock inversion between tasks acquiring DLM
25238c2ecf20Sopenharmony_ci * locks while holding a page lock and the downconvert thread which
25248c2ecf20Sopenharmony_ci * blocks dlm lock acquiry while acquiring page locks.
25258c2ecf20Sopenharmony_ci *
25268c2ecf20Sopenharmony_ci * ** These _with_page variantes are only intended to be called from aop
25278c2ecf20Sopenharmony_ci * methods that hold page locks and return a very specific *positive* error
25288c2ecf20Sopenharmony_ci * code that aop methods pass up to the VFS -- test for errors with != 0. **
25298c2ecf20Sopenharmony_ci *
25308c2ecf20Sopenharmony_ci * The DLM is called such that it returns -EAGAIN if it would have
25318c2ecf20Sopenharmony_ci * blocked waiting for the downconvert thread.  In that case we unlock
25328c2ecf20Sopenharmony_ci * our page so the downconvert thread can make progress.  Once we've
25338c2ecf20Sopenharmony_ci * done this we have to return AOP_TRUNCATED_PAGE so the aop method
25348c2ecf20Sopenharmony_ci * that called us can bubble that back up into the VFS who will then
25358c2ecf20Sopenharmony_ci * immediately retry the aop call.
25368c2ecf20Sopenharmony_ci */
25378c2ecf20Sopenharmony_ciint ocfs2_inode_lock_with_page(struct inode *inode,
25388c2ecf20Sopenharmony_ci			      struct buffer_head **ret_bh,
25398c2ecf20Sopenharmony_ci			      int ex,
25408c2ecf20Sopenharmony_ci			      struct page *page)
25418c2ecf20Sopenharmony_ci{
25428c2ecf20Sopenharmony_ci	int ret;
25438c2ecf20Sopenharmony_ci
25448c2ecf20Sopenharmony_ci	ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
25458c2ecf20Sopenharmony_ci	if (ret == -EAGAIN) {
25468c2ecf20Sopenharmony_ci		unlock_page(page);
25478c2ecf20Sopenharmony_ci		/*
25488c2ecf20Sopenharmony_ci		 * If we can't get inode lock immediately, we should not return
25498c2ecf20Sopenharmony_ci		 * directly here, since this will lead to a softlockup problem.
25508c2ecf20Sopenharmony_ci		 * The method is to get a blocking lock and immediately unlock
25518c2ecf20Sopenharmony_ci		 * before returning, this can avoid CPU resource waste due to
25528c2ecf20Sopenharmony_ci		 * lots of retries, and benefits fairness in getting lock.
25538c2ecf20Sopenharmony_ci		 */
25548c2ecf20Sopenharmony_ci		if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
25558c2ecf20Sopenharmony_ci			ocfs2_inode_unlock(inode, ex);
25568c2ecf20Sopenharmony_ci		ret = AOP_TRUNCATED_PAGE;
25578c2ecf20Sopenharmony_ci	}
25588c2ecf20Sopenharmony_ci
25598c2ecf20Sopenharmony_ci	return ret;
25608c2ecf20Sopenharmony_ci}
25618c2ecf20Sopenharmony_ci
25628c2ecf20Sopenharmony_ciint ocfs2_inode_lock_atime(struct inode *inode,
25638c2ecf20Sopenharmony_ci			  struct vfsmount *vfsmnt,
25648c2ecf20Sopenharmony_ci			  int *level, int wait)
25658c2ecf20Sopenharmony_ci{
25668c2ecf20Sopenharmony_ci	int ret;
25678c2ecf20Sopenharmony_ci
25688c2ecf20Sopenharmony_ci	if (wait)
25698c2ecf20Sopenharmony_ci		ret = ocfs2_inode_lock(inode, NULL, 0);
25708c2ecf20Sopenharmony_ci	else
25718c2ecf20Sopenharmony_ci		ret = ocfs2_try_inode_lock(inode, NULL, 0);
25728c2ecf20Sopenharmony_ci
25738c2ecf20Sopenharmony_ci	if (ret < 0) {
25748c2ecf20Sopenharmony_ci		if (ret != -EAGAIN)
25758c2ecf20Sopenharmony_ci			mlog_errno(ret);
25768c2ecf20Sopenharmony_ci		return ret;
25778c2ecf20Sopenharmony_ci	}
25788c2ecf20Sopenharmony_ci
25798c2ecf20Sopenharmony_ci	/*
25808c2ecf20Sopenharmony_ci	 * If we should update atime, we will get EX lock,
25818c2ecf20Sopenharmony_ci	 * otherwise we just get PR lock.
25828c2ecf20Sopenharmony_ci	 */
25838c2ecf20Sopenharmony_ci	if (ocfs2_should_update_atime(inode, vfsmnt)) {
25848c2ecf20Sopenharmony_ci		struct buffer_head *bh = NULL;
25858c2ecf20Sopenharmony_ci
25868c2ecf20Sopenharmony_ci		ocfs2_inode_unlock(inode, 0);
25878c2ecf20Sopenharmony_ci		if (wait)
25888c2ecf20Sopenharmony_ci			ret = ocfs2_inode_lock(inode, &bh, 1);
25898c2ecf20Sopenharmony_ci		else
25908c2ecf20Sopenharmony_ci			ret = ocfs2_try_inode_lock(inode, &bh, 1);
25918c2ecf20Sopenharmony_ci
25928c2ecf20Sopenharmony_ci		if (ret < 0) {
25938c2ecf20Sopenharmony_ci			if (ret != -EAGAIN)
25948c2ecf20Sopenharmony_ci				mlog_errno(ret);
25958c2ecf20Sopenharmony_ci			return ret;
25968c2ecf20Sopenharmony_ci		}
25978c2ecf20Sopenharmony_ci		*level = 1;
25988c2ecf20Sopenharmony_ci		if (ocfs2_should_update_atime(inode, vfsmnt))
25998c2ecf20Sopenharmony_ci			ocfs2_update_inode_atime(inode, bh);
26008c2ecf20Sopenharmony_ci		brelse(bh);
26018c2ecf20Sopenharmony_ci	} else
26028c2ecf20Sopenharmony_ci		*level = 0;
26038c2ecf20Sopenharmony_ci
26048c2ecf20Sopenharmony_ci	return ret;
26058c2ecf20Sopenharmony_ci}
26068c2ecf20Sopenharmony_ci
26078c2ecf20Sopenharmony_civoid ocfs2_inode_unlock(struct inode *inode,
26088c2ecf20Sopenharmony_ci		       int ex)
26098c2ecf20Sopenharmony_ci{
26108c2ecf20Sopenharmony_ci	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
26118c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
26128c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
26138c2ecf20Sopenharmony_ci
26148c2ecf20Sopenharmony_ci	mlog(0, "inode %llu drop %s META lock\n",
26158c2ecf20Sopenharmony_ci	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
26168c2ecf20Sopenharmony_ci	     ex ? "EXMODE" : "PRMODE");
26178c2ecf20Sopenharmony_ci
26188c2ecf20Sopenharmony_ci	if (!ocfs2_is_hard_readonly(osb) &&
26198c2ecf20Sopenharmony_ci	    !ocfs2_mount_local(osb))
26208c2ecf20Sopenharmony_ci		ocfs2_cluster_unlock(osb, lockres, level);
26218c2ecf20Sopenharmony_ci}
26228c2ecf20Sopenharmony_ci
26238c2ecf20Sopenharmony_ci/*
26248c2ecf20Sopenharmony_ci * This _tracker variantes are introduced to deal with the recursive cluster
26258c2ecf20Sopenharmony_ci * locking issue. The idea is to keep track of a lock holder on the stack of
26268c2ecf20Sopenharmony_ci * the current process. If there's a lock holder on the stack, we know the
26278c2ecf20Sopenharmony_ci * task context is already protected by cluster locking. Currently, they're
26288c2ecf20Sopenharmony_ci * used in some VFS entry routines.
26298c2ecf20Sopenharmony_ci *
26308c2ecf20Sopenharmony_ci * return < 0 on error, return == 0 if there's no lock holder on the stack
26318c2ecf20Sopenharmony_ci * before this call, return == 1 if this call would be a recursive locking.
26328c2ecf20Sopenharmony_ci * return == -1 if this lock attempt will cause an upgrade which is forbidden.
26338c2ecf20Sopenharmony_ci *
26348c2ecf20Sopenharmony_ci * When taking lock levels into account,we face some different situations.
26358c2ecf20Sopenharmony_ci *
26368c2ecf20Sopenharmony_ci * 1. no lock is held
26378c2ecf20Sopenharmony_ci *    In this case, just lock the inode as requested and return 0
26388c2ecf20Sopenharmony_ci *
26398c2ecf20Sopenharmony_ci * 2. We are holding a lock
26408c2ecf20Sopenharmony_ci *    For this situation, things diverges into several cases
26418c2ecf20Sopenharmony_ci *
26428c2ecf20Sopenharmony_ci *    wanted     holding	     what to do
26438c2ecf20Sopenharmony_ci *    ex		ex	    see 2.1 below
26448c2ecf20Sopenharmony_ci *    ex		pr	    see 2.2 below
26458c2ecf20Sopenharmony_ci *    pr		ex	    see 2.1 below
26468c2ecf20Sopenharmony_ci *    pr		pr	    see 2.1 below
26478c2ecf20Sopenharmony_ci *
26488c2ecf20Sopenharmony_ci *    2.1 lock level that is been held is compatible
26498c2ecf20Sopenharmony_ci *    with the wanted level, so no lock action will be tacken.
26508c2ecf20Sopenharmony_ci *
26518c2ecf20Sopenharmony_ci *    2.2 Otherwise, an upgrade is needed, but it is forbidden.
26528c2ecf20Sopenharmony_ci *
26538c2ecf20Sopenharmony_ci * Reason why upgrade within a process is forbidden is that
26548c2ecf20Sopenharmony_ci * lock upgrade may cause dead lock. The following illustrates
26558c2ecf20Sopenharmony_ci * how it happens.
26568c2ecf20Sopenharmony_ci *
26578c2ecf20Sopenharmony_ci *         thread on node1                             thread on node2
26588c2ecf20Sopenharmony_ci * ocfs2_inode_lock_tracker(ex=0)
26598c2ecf20Sopenharmony_ci *
26608c2ecf20Sopenharmony_ci *                                <======   ocfs2_inode_lock_tracker(ex=1)
26618c2ecf20Sopenharmony_ci *
26628c2ecf20Sopenharmony_ci * ocfs2_inode_lock_tracker(ex=1)
26638c2ecf20Sopenharmony_ci */
26648c2ecf20Sopenharmony_ciint ocfs2_inode_lock_tracker(struct inode *inode,
26658c2ecf20Sopenharmony_ci			     struct buffer_head **ret_bh,
26668c2ecf20Sopenharmony_ci			     int ex,
26678c2ecf20Sopenharmony_ci			     struct ocfs2_lock_holder *oh)
26688c2ecf20Sopenharmony_ci{
26698c2ecf20Sopenharmony_ci	int status = 0;
26708c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres;
26718c2ecf20Sopenharmony_ci	struct ocfs2_lock_holder *tmp_oh;
26728c2ecf20Sopenharmony_ci	struct pid *pid = task_pid(current);
26738c2ecf20Sopenharmony_ci
26748c2ecf20Sopenharmony_ci
26758c2ecf20Sopenharmony_ci	lockres = &OCFS2_I(inode)->ip_inode_lockres;
26768c2ecf20Sopenharmony_ci	tmp_oh = ocfs2_pid_holder(lockres, pid);
26778c2ecf20Sopenharmony_ci
26788c2ecf20Sopenharmony_ci	if (!tmp_oh) {
26798c2ecf20Sopenharmony_ci		/*
26808c2ecf20Sopenharmony_ci		 * This corresponds to the case 1.
26818c2ecf20Sopenharmony_ci		 * We haven't got any lock before.
26828c2ecf20Sopenharmony_ci		 */
26838c2ecf20Sopenharmony_ci		status = ocfs2_inode_lock_full(inode, ret_bh, ex, 0);
26848c2ecf20Sopenharmony_ci		if (status < 0) {
26858c2ecf20Sopenharmony_ci			if (status != -ENOENT)
26868c2ecf20Sopenharmony_ci				mlog_errno(status);
26878c2ecf20Sopenharmony_ci			return status;
26888c2ecf20Sopenharmony_ci		}
26898c2ecf20Sopenharmony_ci
26908c2ecf20Sopenharmony_ci		oh->oh_ex = ex;
26918c2ecf20Sopenharmony_ci		ocfs2_add_holder(lockres, oh);
26928c2ecf20Sopenharmony_ci		return 0;
26938c2ecf20Sopenharmony_ci	}
26948c2ecf20Sopenharmony_ci
26958c2ecf20Sopenharmony_ci	if (unlikely(ex && !tmp_oh->oh_ex)) {
26968c2ecf20Sopenharmony_ci		/*
26978c2ecf20Sopenharmony_ci		 * case 2.2 upgrade may cause dead lock, forbid it.
26988c2ecf20Sopenharmony_ci		 */
26998c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "Recursive locking is not permitted to "
27008c2ecf20Sopenharmony_ci		     "upgrade to EX level from PR level.\n");
27018c2ecf20Sopenharmony_ci		dump_stack();
27028c2ecf20Sopenharmony_ci		return -EINVAL;
27038c2ecf20Sopenharmony_ci	}
27048c2ecf20Sopenharmony_ci
27058c2ecf20Sopenharmony_ci	/*
27068c2ecf20Sopenharmony_ci	 *  case 2.1 OCFS2_META_LOCK_GETBH flag make ocfs2_inode_lock_full.
27078c2ecf20Sopenharmony_ci	 *  ignore the lock level and just update it.
27088c2ecf20Sopenharmony_ci	 */
27098c2ecf20Sopenharmony_ci	if (ret_bh) {
27108c2ecf20Sopenharmony_ci		status = ocfs2_inode_lock_full(inode, ret_bh, ex,
27118c2ecf20Sopenharmony_ci					       OCFS2_META_LOCK_GETBH);
27128c2ecf20Sopenharmony_ci		if (status < 0) {
27138c2ecf20Sopenharmony_ci			if (status != -ENOENT)
27148c2ecf20Sopenharmony_ci				mlog_errno(status);
27158c2ecf20Sopenharmony_ci			return status;
27168c2ecf20Sopenharmony_ci		}
27178c2ecf20Sopenharmony_ci	}
27188c2ecf20Sopenharmony_ci	return tmp_oh ? 1 : 0;
27198c2ecf20Sopenharmony_ci}
27208c2ecf20Sopenharmony_ci
27218c2ecf20Sopenharmony_civoid ocfs2_inode_unlock_tracker(struct inode *inode,
27228c2ecf20Sopenharmony_ci				int ex,
27238c2ecf20Sopenharmony_ci				struct ocfs2_lock_holder *oh,
27248c2ecf20Sopenharmony_ci				int had_lock)
27258c2ecf20Sopenharmony_ci{
27268c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres;
27278c2ecf20Sopenharmony_ci
27288c2ecf20Sopenharmony_ci	lockres = &OCFS2_I(inode)->ip_inode_lockres;
27298c2ecf20Sopenharmony_ci	/* had_lock means that the currect process already takes the cluster
27308c2ecf20Sopenharmony_ci	 * lock previously.
27318c2ecf20Sopenharmony_ci	 * If had_lock is 1, we have nothing to do here.
27328c2ecf20Sopenharmony_ci	 * If had_lock is 0, we will release the lock.
27338c2ecf20Sopenharmony_ci	 */
27348c2ecf20Sopenharmony_ci	if (!had_lock) {
27358c2ecf20Sopenharmony_ci		ocfs2_inode_unlock(inode, oh->oh_ex);
27368c2ecf20Sopenharmony_ci		ocfs2_remove_holder(lockres, oh);
27378c2ecf20Sopenharmony_ci	}
27388c2ecf20Sopenharmony_ci}
27398c2ecf20Sopenharmony_ci
27408c2ecf20Sopenharmony_ciint ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
27418c2ecf20Sopenharmony_ci{
27428c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres;
27438c2ecf20Sopenharmony_ci	struct ocfs2_orphan_scan_lvb *lvb;
27448c2ecf20Sopenharmony_ci	int status = 0;
27458c2ecf20Sopenharmony_ci
27468c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb))
27478c2ecf20Sopenharmony_ci		return -EROFS;
27488c2ecf20Sopenharmony_ci
27498c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
27508c2ecf20Sopenharmony_ci		return 0;
27518c2ecf20Sopenharmony_ci
27528c2ecf20Sopenharmony_ci	lockres = &osb->osb_orphan_scan.os_lockres;
27538c2ecf20Sopenharmony_ci	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
27548c2ecf20Sopenharmony_ci	if (status < 0)
27558c2ecf20Sopenharmony_ci		return status;
27568c2ecf20Sopenharmony_ci
27578c2ecf20Sopenharmony_ci	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
27588c2ecf20Sopenharmony_ci	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
27598c2ecf20Sopenharmony_ci	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
27608c2ecf20Sopenharmony_ci		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
27618c2ecf20Sopenharmony_ci	else
27628c2ecf20Sopenharmony_ci		*seqno = osb->osb_orphan_scan.os_seqno + 1;
27638c2ecf20Sopenharmony_ci
27648c2ecf20Sopenharmony_ci	return status;
27658c2ecf20Sopenharmony_ci}
27668c2ecf20Sopenharmony_ci
27678c2ecf20Sopenharmony_civoid ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
27688c2ecf20Sopenharmony_ci{
27698c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres;
27708c2ecf20Sopenharmony_ci	struct ocfs2_orphan_scan_lvb *lvb;
27718c2ecf20Sopenharmony_ci
27728c2ecf20Sopenharmony_ci	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
27738c2ecf20Sopenharmony_ci		lockres = &osb->osb_orphan_scan.os_lockres;
27748c2ecf20Sopenharmony_ci		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
27758c2ecf20Sopenharmony_ci		lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
27768c2ecf20Sopenharmony_ci		lvb->lvb_os_seqno = cpu_to_be32(seqno);
27778c2ecf20Sopenharmony_ci		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
27788c2ecf20Sopenharmony_ci	}
27798c2ecf20Sopenharmony_ci}
27808c2ecf20Sopenharmony_ci
27818c2ecf20Sopenharmony_ciint ocfs2_super_lock(struct ocfs2_super *osb,
27828c2ecf20Sopenharmony_ci		     int ex)
27838c2ecf20Sopenharmony_ci{
27848c2ecf20Sopenharmony_ci	int status = 0;
27858c2ecf20Sopenharmony_ci	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
27868c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
27878c2ecf20Sopenharmony_ci
27888c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb))
27898c2ecf20Sopenharmony_ci		return -EROFS;
27908c2ecf20Sopenharmony_ci
27918c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
27928c2ecf20Sopenharmony_ci		goto bail;
27938c2ecf20Sopenharmony_ci
27948c2ecf20Sopenharmony_ci	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
27958c2ecf20Sopenharmony_ci	if (status < 0) {
27968c2ecf20Sopenharmony_ci		mlog_errno(status);
27978c2ecf20Sopenharmony_ci		goto bail;
27988c2ecf20Sopenharmony_ci	}
27998c2ecf20Sopenharmony_ci
28008c2ecf20Sopenharmony_ci	/* The super block lock path is really in the best position to
28018c2ecf20Sopenharmony_ci	 * know when resources covered by the lock need to be
28028c2ecf20Sopenharmony_ci	 * refreshed, so we do it here. Of course, making sense of
28038c2ecf20Sopenharmony_ci	 * everything is up to the caller :) */
28048c2ecf20Sopenharmony_ci	status = ocfs2_should_refresh_lock_res(lockres);
28058c2ecf20Sopenharmony_ci	if (status) {
28068c2ecf20Sopenharmony_ci		status = ocfs2_refresh_slot_info(osb);
28078c2ecf20Sopenharmony_ci
28088c2ecf20Sopenharmony_ci		ocfs2_complete_lock_res_refresh(lockres, status);
28098c2ecf20Sopenharmony_ci
28108c2ecf20Sopenharmony_ci		if (status < 0) {
28118c2ecf20Sopenharmony_ci			ocfs2_cluster_unlock(osb, lockres, level);
28128c2ecf20Sopenharmony_ci			mlog_errno(status);
28138c2ecf20Sopenharmony_ci		}
28148c2ecf20Sopenharmony_ci		ocfs2_track_lock_refresh(lockres);
28158c2ecf20Sopenharmony_ci	}
28168c2ecf20Sopenharmony_cibail:
28178c2ecf20Sopenharmony_ci	return status;
28188c2ecf20Sopenharmony_ci}
28198c2ecf20Sopenharmony_ci
28208c2ecf20Sopenharmony_civoid ocfs2_super_unlock(struct ocfs2_super *osb,
28218c2ecf20Sopenharmony_ci			int ex)
28228c2ecf20Sopenharmony_ci{
28238c2ecf20Sopenharmony_ci	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
28248c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
28258c2ecf20Sopenharmony_ci
28268c2ecf20Sopenharmony_ci	if (!ocfs2_mount_local(osb))
28278c2ecf20Sopenharmony_ci		ocfs2_cluster_unlock(osb, lockres, level);
28288c2ecf20Sopenharmony_ci}
28298c2ecf20Sopenharmony_ci
28308c2ecf20Sopenharmony_ciint ocfs2_rename_lock(struct ocfs2_super *osb)
28318c2ecf20Sopenharmony_ci{
28328c2ecf20Sopenharmony_ci	int status;
28338c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
28348c2ecf20Sopenharmony_ci
28358c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb))
28368c2ecf20Sopenharmony_ci		return -EROFS;
28378c2ecf20Sopenharmony_ci
28388c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
28398c2ecf20Sopenharmony_ci		return 0;
28408c2ecf20Sopenharmony_ci
28418c2ecf20Sopenharmony_ci	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
28428c2ecf20Sopenharmony_ci	if (status < 0)
28438c2ecf20Sopenharmony_ci		mlog_errno(status);
28448c2ecf20Sopenharmony_ci
28458c2ecf20Sopenharmony_ci	return status;
28468c2ecf20Sopenharmony_ci}
28478c2ecf20Sopenharmony_ci
28488c2ecf20Sopenharmony_civoid ocfs2_rename_unlock(struct ocfs2_super *osb)
28498c2ecf20Sopenharmony_ci{
28508c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
28518c2ecf20Sopenharmony_ci
28528c2ecf20Sopenharmony_ci	if (!ocfs2_mount_local(osb))
28538c2ecf20Sopenharmony_ci		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
28548c2ecf20Sopenharmony_ci}
28558c2ecf20Sopenharmony_ci
28568c2ecf20Sopenharmony_ciint ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
28578c2ecf20Sopenharmony_ci{
28588c2ecf20Sopenharmony_ci	int status;
28598c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
28608c2ecf20Sopenharmony_ci
28618c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb))
28628c2ecf20Sopenharmony_ci		return -EROFS;
28638c2ecf20Sopenharmony_ci
28648c2ecf20Sopenharmony_ci	if (ex)
28658c2ecf20Sopenharmony_ci		down_write(&osb->nfs_sync_rwlock);
28668c2ecf20Sopenharmony_ci	else
28678c2ecf20Sopenharmony_ci		down_read(&osb->nfs_sync_rwlock);
28688c2ecf20Sopenharmony_ci
28698c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
28708c2ecf20Sopenharmony_ci		return 0;
28718c2ecf20Sopenharmony_ci
28728c2ecf20Sopenharmony_ci	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
28738c2ecf20Sopenharmony_ci				    0, 0);
28748c2ecf20Sopenharmony_ci	if (status < 0) {
28758c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
28768c2ecf20Sopenharmony_ci
28778c2ecf20Sopenharmony_ci		if (ex)
28788c2ecf20Sopenharmony_ci			up_write(&osb->nfs_sync_rwlock);
28798c2ecf20Sopenharmony_ci		else
28808c2ecf20Sopenharmony_ci			up_read(&osb->nfs_sync_rwlock);
28818c2ecf20Sopenharmony_ci	}
28828c2ecf20Sopenharmony_ci
28838c2ecf20Sopenharmony_ci	return status;
28848c2ecf20Sopenharmony_ci}
28858c2ecf20Sopenharmony_ci
28868c2ecf20Sopenharmony_civoid ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
28878c2ecf20Sopenharmony_ci{
28888c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
28898c2ecf20Sopenharmony_ci
28908c2ecf20Sopenharmony_ci	if (!ocfs2_mount_local(osb))
28918c2ecf20Sopenharmony_ci		ocfs2_cluster_unlock(osb, lockres,
28928c2ecf20Sopenharmony_ci				     ex ? LKM_EXMODE : LKM_PRMODE);
28938c2ecf20Sopenharmony_ci	if (ex)
28948c2ecf20Sopenharmony_ci		up_write(&osb->nfs_sync_rwlock);
28958c2ecf20Sopenharmony_ci	else
28968c2ecf20Sopenharmony_ci		up_read(&osb->nfs_sync_rwlock);
28978c2ecf20Sopenharmony_ci}
28988c2ecf20Sopenharmony_ci
28998c2ecf20Sopenharmony_ciint ocfs2_trim_fs_lock(struct ocfs2_super *osb,
29008c2ecf20Sopenharmony_ci		       struct ocfs2_trim_fs_info *info, int trylock)
29018c2ecf20Sopenharmony_ci{
29028c2ecf20Sopenharmony_ci	int status;
29038c2ecf20Sopenharmony_ci	struct ocfs2_trim_fs_lvb *lvb;
29048c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
29058c2ecf20Sopenharmony_ci
29068c2ecf20Sopenharmony_ci	if (info)
29078c2ecf20Sopenharmony_ci		info->tf_valid = 0;
29088c2ecf20Sopenharmony_ci
29098c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb))
29108c2ecf20Sopenharmony_ci		return -EROFS;
29118c2ecf20Sopenharmony_ci
29128c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
29138c2ecf20Sopenharmony_ci		return 0;
29148c2ecf20Sopenharmony_ci
29158c2ecf20Sopenharmony_ci	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX,
29168c2ecf20Sopenharmony_ci				    trylock ? DLM_LKF_NOQUEUE : 0, 0);
29178c2ecf20Sopenharmony_ci	if (status < 0) {
29188c2ecf20Sopenharmony_ci		if (status != -EAGAIN)
29198c2ecf20Sopenharmony_ci			mlog_errno(status);
29208c2ecf20Sopenharmony_ci		return status;
29218c2ecf20Sopenharmony_ci	}
29228c2ecf20Sopenharmony_ci
29238c2ecf20Sopenharmony_ci	if (info) {
29248c2ecf20Sopenharmony_ci		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
29258c2ecf20Sopenharmony_ci		if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
29268c2ecf20Sopenharmony_ci		    lvb->lvb_version == OCFS2_TRIMFS_LVB_VERSION) {
29278c2ecf20Sopenharmony_ci			info->tf_valid = 1;
29288c2ecf20Sopenharmony_ci			info->tf_success = lvb->lvb_success;
29298c2ecf20Sopenharmony_ci			info->tf_nodenum = be32_to_cpu(lvb->lvb_nodenum);
29308c2ecf20Sopenharmony_ci			info->tf_start = be64_to_cpu(lvb->lvb_start);
29318c2ecf20Sopenharmony_ci			info->tf_len = be64_to_cpu(lvb->lvb_len);
29328c2ecf20Sopenharmony_ci			info->tf_minlen = be64_to_cpu(lvb->lvb_minlen);
29338c2ecf20Sopenharmony_ci			info->tf_trimlen = be64_to_cpu(lvb->lvb_trimlen);
29348c2ecf20Sopenharmony_ci		}
29358c2ecf20Sopenharmony_ci	}
29368c2ecf20Sopenharmony_ci
29378c2ecf20Sopenharmony_ci	return status;
29388c2ecf20Sopenharmony_ci}
29398c2ecf20Sopenharmony_ci
29408c2ecf20Sopenharmony_civoid ocfs2_trim_fs_unlock(struct ocfs2_super *osb,
29418c2ecf20Sopenharmony_ci			  struct ocfs2_trim_fs_info *info)
29428c2ecf20Sopenharmony_ci{
29438c2ecf20Sopenharmony_ci	struct ocfs2_trim_fs_lvb *lvb;
29448c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
29458c2ecf20Sopenharmony_ci
29468c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
29478c2ecf20Sopenharmony_ci		return;
29488c2ecf20Sopenharmony_ci
29498c2ecf20Sopenharmony_ci	if (info) {
29508c2ecf20Sopenharmony_ci		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
29518c2ecf20Sopenharmony_ci		lvb->lvb_version = OCFS2_TRIMFS_LVB_VERSION;
29528c2ecf20Sopenharmony_ci		lvb->lvb_success = info->tf_success;
29538c2ecf20Sopenharmony_ci		lvb->lvb_nodenum = cpu_to_be32(info->tf_nodenum);
29548c2ecf20Sopenharmony_ci		lvb->lvb_start = cpu_to_be64(info->tf_start);
29558c2ecf20Sopenharmony_ci		lvb->lvb_len = cpu_to_be64(info->tf_len);
29568c2ecf20Sopenharmony_ci		lvb->lvb_minlen = cpu_to_be64(info->tf_minlen);
29578c2ecf20Sopenharmony_ci		lvb->lvb_trimlen = cpu_to_be64(info->tf_trimlen);
29588c2ecf20Sopenharmony_ci	}
29598c2ecf20Sopenharmony_ci
29608c2ecf20Sopenharmony_ci	ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
29618c2ecf20Sopenharmony_ci}
29628c2ecf20Sopenharmony_ci
29638c2ecf20Sopenharmony_ciint ocfs2_dentry_lock(struct dentry *dentry, int ex)
29648c2ecf20Sopenharmony_ci{
29658c2ecf20Sopenharmony_ci	int ret;
29668c2ecf20Sopenharmony_ci	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
29678c2ecf20Sopenharmony_ci	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
29688c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
29698c2ecf20Sopenharmony_ci
29708c2ecf20Sopenharmony_ci	BUG_ON(!dl);
29718c2ecf20Sopenharmony_ci
29728c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb)) {
29738c2ecf20Sopenharmony_ci		if (ex)
29748c2ecf20Sopenharmony_ci			return -EROFS;
29758c2ecf20Sopenharmony_ci		return 0;
29768c2ecf20Sopenharmony_ci	}
29778c2ecf20Sopenharmony_ci
29788c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
29798c2ecf20Sopenharmony_ci		return 0;
29808c2ecf20Sopenharmony_ci
29818c2ecf20Sopenharmony_ci	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
29828c2ecf20Sopenharmony_ci	if (ret < 0)
29838c2ecf20Sopenharmony_ci		mlog_errno(ret);
29848c2ecf20Sopenharmony_ci
29858c2ecf20Sopenharmony_ci	return ret;
29868c2ecf20Sopenharmony_ci}
29878c2ecf20Sopenharmony_ci
29888c2ecf20Sopenharmony_civoid ocfs2_dentry_unlock(struct dentry *dentry, int ex)
29898c2ecf20Sopenharmony_ci{
29908c2ecf20Sopenharmony_ci	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
29918c2ecf20Sopenharmony_ci	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
29928c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
29938c2ecf20Sopenharmony_ci
29948c2ecf20Sopenharmony_ci	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
29958c2ecf20Sopenharmony_ci		ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
29968c2ecf20Sopenharmony_ci}
29978c2ecf20Sopenharmony_ci
29988c2ecf20Sopenharmony_ci/* Reference counting of the dlm debug structure. We want this because
29998c2ecf20Sopenharmony_ci * open references on the debug inodes can live on after a mount, so
30008c2ecf20Sopenharmony_ci * we can't rely on the ocfs2_super to always exist. */
30018c2ecf20Sopenharmony_cistatic void ocfs2_dlm_debug_free(struct kref *kref)
30028c2ecf20Sopenharmony_ci{
30038c2ecf20Sopenharmony_ci	struct ocfs2_dlm_debug *dlm_debug;
30048c2ecf20Sopenharmony_ci
30058c2ecf20Sopenharmony_ci	dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
30068c2ecf20Sopenharmony_ci
30078c2ecf20Sopenharmony_ci	kfree(dlm_debug);
30088c2ecf20Sopenharmony_ci}
30098c2ecf20Sopenharmony_ci
30108c2ecf20Sopenharmony_civoid ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
30118c2ecf20Sopenharmony_ci{
30128c2ecf20Sopenharmony_ci	if (dlm_debug)
30138c2ecf20Sopenharmony_ci		kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
30148c2ecf20Sopenharmony_ci}
30158c2ecf20Sopenharmony_ci
30168c2ecf20Sopenharmony_cistatic void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
30178c2ecf20Sopenharmony_ci{
30188c2ecf20Sopenharmony_ci	kref_get(&debug->d_refcnt);
30198c2ecf20Sopenharmony_ci}
30208c2ecf20Sopenharmony_ci
30218c2ecf20Sopenharmony_cistruct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
30228c2ecf20Sopenharmony_ci{
30238c2ecf20Sopenharmony_ci	struct ocfs2_dlm_debug *dlm_debug;
30248c2ecf20Sopenharmony_ci
30258c2ecf20Sopenharmony_ci	dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
30268c2ecf20Sopenharmony_ci	if (!dlm_debug) {
30278c2ecf20Sopenharmony_ci		mlog_errno(-ENOMEM);
30288c2ecf20Sopenharmony_ci		goto out;
30298c2ecf20Sopenharmony_ci	}
30308c2ecf20Sopenharmony_ci
30318c2ecf20Sopenharmony_ci	kref_init(&dlm_debug->d_refcnt);
30328c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
30338c2ecf20Sopenharmony_ci	dlm_debug->d_filter_secs = 0;
30348c2ecf20Sopenharmony_ciout:
30358c2ecf20Sopenharmony_ci	return dlm_debug;
30368c2ecf20Sopenharmony_ci}
30378c2ecf20Sopenharmony_ci
30388c2ecf20Sopenharmony_ci/* Access to this is arbitrated for us via seq_file->sem. */
30398c2ecf20Sopenharmony_cistruct ocfs2_dlm_seq_priv {
30408c2ecf20Sopenharmony_ci	struct ocfs2_dlm_debug *p_dlm_debug;
30418c2ecf20Sopenharmony_ci	struct ocfs2_lock_res p_iter_res;
30428c2ecf20Sopenharmony_ci	struct ocfs2_lock_res p_tmp_res;
30438c2ecf20Sopenharmony_ci};
30448c2ecf20Sopenharmony_ci
30458c2ecf20Sopenharmony_cistatic struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
30468c2ecf20Sopenharmony_ci						 struct ocfs2_dlm_seq_priv *priv)
30478c2ecf20Sopenharmony_ci{
30488c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *iter, *ret = NULL;
30498c2ecf20Sopenharmony_ci	struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
30508c2ecf20Sopenharmony_ci
30518c2ecf20Sopenharmony_ci	assert_spin_locked(&ocfs2_dlm_tracking_lock);
30528c2ecf20Sopenharmony_ci
30538c2ecf20Sopenharmony_ci	list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
30548c2ecf20Sopenharmony_ci		/* discover the head of the list */
30558c2ecf20Sopenharmony_ci		if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
30568c2ecf20Sopenharmony_ci			mlog(0, "End of list found, %p\n", ret);
30578c2ecf20Sopenharmony_ci			break;
30588c2ecf20Sopenharmony_ci		}
30598c2ecf20Sopenharmony_ci
30608c2ecf20Sopenharmony_ci		/* We track our "dummy" iteration lockres' by a NULL
30618c2ecf20Sopenharmony_ci		 * l_ops field. */
30628c2ecf20Sopenharmony_ci		if (iter->l_ops != NULL) {
30638c2ecf20Sopenharmony_ci			ret = iter;
30648c2ecf20Sopenharmony_ci			break;
30658c2ecf20Sopenharmony_ci		}
30668c2ecf20Sopenharmony_ci	}
30678c2ecf20Sopenharmony_ci
30688c2ecf20Sopenharmony_ci	return ret;
30698c2ecf20Sopenharmony_ci}
30708c2ecf20Sopenharmony_ci
30718c2ecf20Sopenharmony_cistatic void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
30728c2ecf20Sopenharmony_ci{
30738c2ecf20Sopenharmony_ci	struct ocfs2_dlm_seq_priv *priv = m->private;
30748c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *iter;
30758c2ecf20Sopenharmony_ci
30768c2ecf20Sopenharmony_ci	spin_lock(&ocfs2_dlm_tracking_lock);
30778c2ecf20Sopenharmony_ci	iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
30788c2ecf20Sopenharmony_ci	if (iter) {
30798c2ecf20Sopenharmony_ci		/* Since lockres' have the lifetime of their container
30808c2ecf20Sopenharmony_ci		 * (which can be inodes, ocfs2_supers, etc) we want to
30818c2ecf20Sopenharmony_ci		 * copy this out to a temporary lockres while still
30828c2ecf20Sopenharmony_ci		 * under the spinlock. Obviously after this we can't
30838c2ecf20Sopenharmony_ci		 * trust any pointers on the copy returned, but that's
30848c2ecf20Sopenharmony_ci		 * ok as the information we want isn't typically held
30858c2ecf20Sopenharmony_ci		 * in them. */
30868c2ecf20Sopenharmony_ci		priv->p_tmp_res = *iter;
30878c2ecf20Sopenharmony_ci		iter = &priv->p_tmp_res;
30888c2ecf20Sopenharmony_ci	}
30898c2ecf20Sopenharmony_ci	spin_unlock(&ocfs2_dlm_tracking_lock);
30908c2ecf20Sopenharmony_ci
30918c2ecf20Sopenharmony_ci	return iter;
30928c2ecf20Sopenharmony_ci}
30938c2ecf20Sopenharmony_ci
30948c2ecf20Sopenharmony_cistatic void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
30958c2ecf20Sopenharmony_ci{
30968c2ecf20Sopenharmony_ci}
30978c2ecf20Sopenharmony_ci
30988c2ecf20Sopenharmony_cistatic void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
30998c2ecf20Sopenharmony_ci{
31008c2ecf20Sopenharmony_ci	struct ocfs2_dlm_seq_priv *priv = m->private;
31018c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *iter = v;
31028c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *dummy = &priv->p_iter_res;
31038c2ecf20Sopenharmony_ci
31048c2ecf20Sopenharmony_ci	spin_lock(&ocfs2_dlm_tracking_lock);
31058c2ecf20Sopenharmony_ci	iter = ocfs2_dlm_next_res(iter, priv);
31068c2ecf20Sopenharmony_ci	list_del_init(&dummy->l_debug_list);
31078c2ecf20Sopenharmony_ci	if (iter) {
31088c2ecf20Sopenharmony_ci		list_add(&dummy->l_debug_list, &iter->l_debug_list);
31098c2ecf20Sopenharmony_ci		priv->p_tmp_res = *iter;
31108c2ecf20Sopenharmony_ci		iter = &priv->p_tmp_res;
31118c2ecf20Sopenharmony_ci	}
31128c2ecf20Sopenharmony_ci	spin_unlock(&ocfs2_dlm_tracking_lock);
31138c2ecf20Sopenharmony_ci
31148c2ecf20Sopenharmony_ci	return iter;
31158c2ecf20Sopenharmony_ci}
31168c2ecf20Sopenharmony_ci
31178c2ecf20Sopenharmony_ci/*
31188c2ecf20Sopenharmony_ci * Version is used by debugfs.ocfs2 to determine the format being used
31198c2ecf20Sopenharmony_ci *
31208c2ecf20Sopenharmony_ci * New in version 2
31218c2ecf20Sopenharmony_ci *	- Lock stats printed
31228c2ecf20Sopenharmony_ci * New in version 3
31238c2ecf20Sopenharmony_ci *	- Max time in lock stats is in usecs (instead of nsecs)
31248c2ecf20Sopenharmony_ci * New in version 4
31258c2ecf20Sopenharmony_ci *	- Add last pr/ex unlock times and first lock wait time in usecs
31268c2ecf20Sopenharmony_ci */
31278c2ecf20Sopenharmony_ci#define OCFS2_DLM_DEBUG_STR_VERSION 4
31288c2ecf20Sopenharmony_cistatic int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
31298c2ecf20Sopenharmony_ci{
31308c2ecf20Sopenharmony_ci	int i;
31318c2ecf20Sopenharmony_ci	char *lvb;
31328c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = v;
31338c2ecf20Sopenharmony_ci#ifdef CONFIG_OCFS2_FS_STATS
31348c2ecf20Sopenharmony_ci	u64 now, last;
31358c2ecf20Sopenharmony_ci	struct ocfs2_dlm_debug *dlm_debug =
31368c2ecf20Sopenharmony_ci			((struct ocfs2_dlm_seq_priv *)m->private)->p_dlm_debug;
31378c2ecf20Sopenharmony_ci#endif
31388c2ecf20Sopenharmony_ci
31398c2ecf20Sopenharmony_ci	if (!lockres)
31408c2ecf20Sopenharmony_ci		return -EINVAL;
31418c2ecf20Sopenharmony_ci
31428c2ecf20Sopenharmony_ci#ifdef CONFIG_OCFS2_FS_STATS
31438c2ecf20Sopenharmony_ci	if (!lockres->l_lock_wait && dlm_debug->d_filter_secs) {
31448c2ecf20Sopenharmony_ci		now = ktime_to_us(ktime_get_real());
31458c2ecf20Sopenharmony_ci		if (lockres->l_lock_prmode.ls_last >
31468c2ecf20Sopenharmony_ci		    lockres->l_lock_exmode.ls_last)
31478c2ecf20Sopenharmony_ci			last = lockres->l_lock_prmode.ls_last;
31488c2ecf20Sopenharmony_ci		else
31498c2ecf20Sopenharmony_ci			last = lockres->l_lock_exmode.ls_last;
31508c2ecf20Sopenharmony_ci		/*
31518c2ecf20Sopenharmony_ci		 * Use d_filter_secs field to filter lock resources dump,
31528c2ecf20Sopenharmony_ci		 * the default d_filter_secs(0) value filters nothing,
31538c2ecf20Sopenharmony_ci		 * otherwise, only dump the last N seconds active lock
31548c2ecf20Sopenharmony_ci		 * resources.
31558c2ecf20Sopenharmony_ci		 */
31568c2ecf20Sopenharmony_ci		if (div_u64(now - last, 1000000) > dlm_debug->d_filter_secs)
31578c2ecf20Sopenharmony_ci			return 0;
31588c2ecf20Sopenharmony_ci	}
31598c2ecf20Sopenharmony_ci#endif
31608c2ecf20Sopenharmony_ci
31618c2ecf20Sopenharmony_ci	seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
31628c2ecf20Sopenharmony_ci
31638c2ecf20Sopenharmony_ci	if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
31648c2ecf20Sopenharmony_ci		seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
31658c2ecf20Sopenharmony_ci			   lockres->l_name,
31668c2ecf20Sopenharmony_ci			   (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
31678c2ecf20Sopenharmony_ci	else
31688c2ecf20Sopenharmony_ci		seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
31698c2ecf20Sopenharmony_ci
31708c2ecf20Sopenharmony_ci	seq_printf(m, "%d\t"
31718c2ecf20Sopenharmony_ci		   "0x%lx\t"
31728c2ecf20Sopenharmony_ci		   "0x%x\t"
31738c2ecf20Sopenharmony_ci		   "0x%x\t"
31748c2ecf20Sopenharmony_ci		   "%u\t"
31758c2ecf20Sopenharmony_ci		   "%u\t"
31768c2ecf20Sopenharmony_ci		   "%d\t"
31778c2ecf20Sopenharmony_ci		   "%d\t",
31788c2ecf20Sopenharmony_ci		   lockres->l_level,
31798c2ecf20Sopenharmony_ci		   lockres->l_flags,
31808c2ecf20Sopenharmony_ci		   lockres->l_action,
31818c2ecf20Sopenharmony_ci		   lockres->l_unlock_action,
31828c2ecf20Sopenharmony_ci		   lockres->l_ro_holders,
31838c2ecf20Sopenharmony_ci		   lockres->l_ex_holders,
31848c2ecf20Sopenharmony_ci		   lockres->l_requested,
31858c2ecf20Sopenharmony_ci		   lockres->l_blocking);
31868c2ecf20Sopenharmony_ci
31878c2ecf20Sopenharmony_ci	/* Dump the raw LVB */
31888c2ecf20Sopenharmony_ci	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
31898c2ecf20Sopenharmony_ci	for(i = 0; i < DLM_LVB_LEN; i++)
31908c2ecf20Sopenharmony_ci		seq_printf(m, "0x%x\t", lvb[i]);
31918c2ecf20Sopenharmony_ci
31928c2ecf20Sopenharmony_ci#ifdef CONFIG_OCFS2_FS_STATS
31938c2ecf20Sopenharmony_ci# define lock_num_prmode(_l)		((_l)->l_lock_prmode.ls_gets)
31948c2ecf20Sopenharmony_ci# define lock_num_exmode(_l)		((_l)->l_lock_exmode.ls_gets)
31958c2ecf20Sopenharmony_ci# define lock_num_prmode_failed(_l)	((_l)->l_lock_prmode.ls_fail)
31968c2ecf20Sopenharmony_ci# define lock_num_exmode_failed(_l)	((_l)->l_lock_exmode.ls_fail)
31978c2ecf20Sopenharmony_ci# define lock_total_prmode(_l)		((_l)->l_lock_prmode.ls_total)
31988c2ecf20Sopenharmony_ci# define lock_total_exmode(_l)		((_l)->l_lock_exmode.ls_total)
31998c2ecf20Sopenharmony_ci# define lock_max_prmode(_l)		((_l)->l_lock_prmode.ls_max)
32008c2ecf20Sopenharmony_ci# define lock_max_exmode(_l)		((_l)->l_lock_exmode.ls_max)
32018c2ecf20Sopenharmony_ci# define lock_refresh(_l)		((_l)->l_lock_refresh)
32028c2ecf20Sopenharmony_ci# define lock_last_prmode(_l)		((_l)->l_lock_prmode.ls_last)
32038c2ecf20Sopenharmony_ci# define lock_last_exmode(_l)		((_l)->l_lock_exmode.ls_last)
32048c2ecf20Sopenharmony_ci# define lock_wait(_l)			((_l)->l_lock_wait)
32058c2ecf20Sopenharmony_ci#else
32068c2ecf20Sopenharmony_ci# define lock_num_prmode(_l)		(0)
32078c2ecf20Sopenharmony_ci# define lock_num_exmode(_l)		(0)
32088c2ecf20Sopenharmony_ci# define lock_num_prmode_failed(_l)	(0)
32098c2ecf20Sopenharmony_ci# define lock_num_exmode_failed(_l)	(0)
32108c2ecf20Sopenharmony_ci# define lock_total_prmode(_l)		(0ULL)
32118c2ecf20Sopenharmony_ci# define lock_total_exmode(_l)		(0ULL)
32128c2ecf20Sopenharmony_ci# define lock_max_prmode(_l)		(0)
32138c2ecf20Sopenharmony_ci# define lock_max_exmode(_l)		(0)
32148c2ecf20Sopenharmony_ci# define lock_refresh(_l)		(0)
32158c2ecf20Sopenharmony_ci# define lock_last_prmode(_l)		(0ULL)
32168c2ecf20Sopenharmony_ci# define lock_last_exmode(_l)		(0ULL)
32178c2ecf20Sopenharmony_ci# define lock_wait(_l)			(0ULL)
32188c2ecf20Sopenharmony_ci#endif
32198c2ecf20Sopenharmony_ci	/* The following seq_print was added in version 2 of this output */
32208c2ecf20Sopenharmony_ci	seq_printf(m, "%u\t"
32218c2ecf20Sopenharmony_ci		   "%u\t"
32228c2ecf20Sopenharmony_ci		   "%u\t"
32238c2ecf20Sopenharmony_ci		   "%u\t"
32248c2ecf20Sopenharmony_ci		   "%llu\t"
32258c2ecf20Sopenharmony_ci		   "%llu\t"
32268c2ecf20Sopenharmony_ci		   "%u\t"
32278c2ecf20Sopenharmony_ci		   "%u\t"
32288c2ecf20Sopenharmony_ci		   "%u\t"
32298c2ecf20Sopenharmony_ci		   "%llu\t"
32308c2ecf20Sopenharmony_ci		   "%llu\t"
32318c2ecf20Sopenharmony_ci		   "%llu\t",
32328c2ecf20Sopenharmony_ci		   lock_num_prmode(lockres),
32338c2ecf20Sopenharmony_ci		   lock_num_exmode(lockres),
32348c2ecf20Sopenharmony_ci		   lock_num_prmode_failed(lockres),
32358c2ecf20Sopenharmony_ci		   lock_num_exmode_failed(lockres),
32368c2ecf20Sopenharmony_ci		   lock_total_prmode(lockres),
32378c2ecf20Sopenharmony_ci		   lock_total_exmode(lockres),
32388c2ecf20Sopenharmony_ci		   lock_max_prmode(lockres),
32398c2ecf20Sopenharmony_ci		   lock_max_exmode(lockres),
32408c2ecf20Sopenharmony_ci		   lock_refresh(lockres),
32418c2ecf20Sopenharmony_ci		   lock_last_prmode(lockres),
32428c2ecf20Sopenharmony_ci		   lock_last_exmode(lockres),
32438c2ecf20Sopenharmony_ci		   lock_wait(lockres));
32448c2ecf20Sopenharmony_ci
32458c2ecf20Sopenharmony_ci	/* End the line */
32468c2ecf20Sopenharmony_ci	seq_printf(m, "\n");
32478c2ecf20Sopenharmony_ci	return 0;
32488c2ecf20Sopenharmony_ci}
32498c2ecf20Sopenharmony_ci
32508c2ecf20Sopenharmony_cistatic const struct seq_operations ocfs2_dlm_seq_ops = {
32518c2ecf20Sopenharmony_ci	.start =	ocfs2_dlm_seq_start,
32528c2ecf20Sopenharmony_ci	.stop =		ocfs2_dlm_seq_stop,
32538c2ecf20Sopenharmony_ci	.next =		ocfs2_dlm_seq_next,
32548c2ecf20Sopenharmony_ci	.show =		ocfs2_dlm_seq_show,
32558c2ecf20Sopenharmony_ci};
32568c2ecf20Sopenharmony_ci
32578c2ecf20Sopenharmony_cistatic int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
32588c2ecf20Sopenharmony_ci{
32598c2ecf20Sopenharmony_ci	struct seq_file *seq = file->private_data;
32608c2ecf20Sopenharmony_ci	struct ocfs2_dlm_seq_priv *priv = seq->private;
32618c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *res = &priv->p_iter_res;
32628c2ecf20Sopenharmony_ci
32638c2ecf20Sopenharmony_ci	ocfs2_remove_lockres_tracking(res);
32648c2ecf20Sopenharmony_ci	ocfs2_put_dlm_debug(priv->p_dlm_debug);
32658c2ecf20Sopenharmony_ci	return seq_release_private(inode, file);
32668c2ecf20Sopenharmony_ci}
32678c2ecf20Sopenharmony_ci
32688c2ecf20Sopenharmony_cistatic int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
32698c2ecf20Sopenharmony_ci{
32708c2ecf20Sopenharmony_ci	struct ocfs2_dlm_seq_priv *priv;
32718c2ecf20Sopenharmony_ci	struct ocfs2_super *osb;
32728c2ecf20Sopenharmony_ci
32738c2ecf20Sopenharmony_ci	priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv));
32748c2ecf20Sopenharmony_ci	if (!priv) {
32758c2ecf20Sopenharmony_ci		mlog_errno(-ENOMEM);
32768c2ecf20Sopenharmony_ci		return -ENOMEM;
32778c2ecf20Sopenharmony_ci	}
32788c2ecf20Sopenharmony_ci
32798c2ecf20Sopenharmony_ci	osb = inode->i_private;
32808c2ecf20Sopenharmony_ci	ocfs2_get_dlm_debug(osb->osb_dlm_debug);
32818c2ecf20Sopenharmony_ci	priv->p_dlm_debug = osb->osb_dlm_debug;
32828c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
32838c2ecf20Sopenharmony_ci
32848c2ecf20Sopenharmony_ci	ocfs2_add_lockres_tracking(&priv->p_iter_res,
32858c2ecf20Sopenharmony_ci				   priv->p_dlm_debug);
32868c2ecf20Sopenharmony_ci
32878c2ecf20Sopenharmony_ci	return 0;
32888c2ecf20Sopenharmony_ci}
32898c2ecf20Sopenharmony_ci
32908c2ecf20Sopenharmony_cistatic const struct file_operations ocfs2_dlm_debug_fops = {
32918c2ecf20Sopenharmony_ci	.open =		ocfs2_dlm_debug_open,
32928c2ecf20Sopenharmony_ci	.release =	ocfs2_dlm_debug_release,
32938c2ecf20Sopenharmony_ci	.read =		seq_read,
32948c2ecf20Sopenharmony_ci	.llseek =	seq_lseek,
32958c2ecf20Sopenharmony_ci};
32968c2ecf20Sopenharmony_ci
32978c2ecf20Sopenharmony_cistatic void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
32988c2ecf20Sopenharmony_ci{
32998c2ecf20Sopenharmony_ci	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
33008c2ecf20Sopenharmony_ci
33018c2ecf20Sopenharmony_ci	debugfs_create_file("locking_state", S_IFREG|S_IRUSR,
33028c2ecf20Sopenharmony_ci			    osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops);
33038c2ecf20Sopenharmony_ci
33048c2ecf20Sopenharmony_ci	debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root,
33058c2ecf20Sopenharmony_ci			   &dlm_debug->d_filter_secs);
33068c2ecf20Sopenharmony_ci	ocfs2_get_dlm_debug(dlm_debug);
33078c2ecf20Sopenharmony_ci}
33088c2ecf20Sopenharmony_ci
33098c2ecf20Sopenharmony_cistatic void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
33108c2ecf20Sopenharmony_ci{
33118c2ecf20Sopenharmony_ci	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
33128c2ecf20Sopenharmony_ci
33138c2ecf20Sopenharmony_ci	if (dlm_debug)
33148c2ecf20Sopenharmony_ci		ocfs2_put_dlm_debug(dlm_debug);
33158c2ecf20Sopenharmony_ci}
33168c2ecf20Sopenharmony_ci
33178c2ecf20Sopenharmony_ciint ocfs2_dlm_init(struct ocfs2_super *osb)
33188c2ecf20Sopenharmony_ci{
33198c2ecf20Sopenharmony_ci	int status = 0;
33208c2ecf20Sopenharmony_ci	struct ocfs2_cluster_connection *conn = NULL;
33218c2ecf20Sopenharmony_ci
33228c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb)) {
33238c2ecf20Sopenharmony_ci		osb->node_num = 0;
33248c2ecf20Sopenharmony_ci		goto local;
33258c2ecf20Sopenharmony_ci	}
33268c2ecf20Sopenharmony_ci
33278c2ecf20Sopenharmony_ci	ocfs2_dlm_init_debug(osb);
33288c2ecf20Sopenharmony_ci
33298c2ecf20Sopenharmony_ci	/* launch downconvert thread */
33308c2ecf20Sopenharmony_ci	osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s",
33318c2ecf20Sopenharmony_ci			osb->uuid_str);
33328c2ecf20Sopenharmony_ci	if (IS_ERR(osb->dc_task)) {
33338c2ecf20Sopenharmony_ci		status = PTR_ERR(osb->dc_task);
33348c2ecf20Sopenharmony_ci		osb->dc_task = NULL;
33358c2ecf20Sopenharmony_ci		mlog_errno(status);
33368c2ecf20Sopenharmony_ci		goto bail;
33378c2ecf20Sopenharmony_ci	}
33388c2ecf20Sopenharmony_ci
33398c2ecf20Sopenharmony_ci	/* for now, uuid == domain */
33408c2ecf20Sopenharmony_ci	status = ocfs2_cluster_connect(osb->osb_cluster_stack,
33418c2ecf20Sopenharmony_ci				       osb->osb_cluster_name,
33428c2ecf20Sopenharmony_ci				       strlen(osb->osb_cluster_name),
33438c2ecf20Sopenharmony_ci				       osb->uuid_str,
33448c2ecf20Sopenharmony_ci				       strlen(osb->uuid_str),
33458c2ecf20Sopenharmony_ci				       &lproto, ocfs2_do_node_down, osb,
33468c2ecf20Sopenharmony_ci				       &conn);
33478c2ecf20Sopenharmony_ci	if (status) {
33488c2ecf20Sopenharmony_ci		mlog_errno(status);
33498c2ecf20Sopenharmony_ci		goto bail;
33508c2ecf20Sopenharmony_ci	}
33518c2ecf20Sopenharmony_ci
33528c2ecf20Sopenharmony_ci	status = ocfs2_cluster_this_node(conn, &osb->node_num);
33538c2ecf20Sopenharmony_ci	if (status < 0) {
33548c2ecf20Sopenharmony_ci		mlog_errno(status);
33558c2ecf20Sopenharmony_ci		mlog(ML_ERROR,
33568c2ecf20Sopenharmony_ci		     "could not find this host's node number\n");
33578c2ecf20Sopenharmony_ci		ocfs2_cluster_disconnect(conn, 0);
33588c2ecf20Sopenharmony_ci		goto bail;
33598c2ecf20Sopenharmony_ci	}
33608c2ecf20Sopenharmony_ci
33618c2ecf20Sopenharmony_cilocal:
33628c2ecf20Sopenharmony_ci	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
33638c2ecf20Sopenharmony_ci	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
33648c2ecf20Sopenharmony_ci	ocfs2_nfs_sync_lock_init(osb);
33658c2ecf20Sopenharmony_ci	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
33668c2ecf20Sopenharmony_ci
33678c2ecf20Sopenharmony_ci	osb->cconn = conn;
33688c2ecf20Sopenharmony_cibail:
33698c2ecf20Sopenharmony_ci	if (status < 0) {
33708c2ecf20Sopenharmony_ci		ocfs2_dlm_shutdown_debug(osb);
33718c2ecf20Sopenharmony_ci		if (osb->dc_task)
33728c2ecf20Sopenharmony_ci			kthread_stop(osb->dc_task);
33738c2ecf20Sopenharmony_ci	}
33748c2ecf20Sopenharmony_ci
33758c2ecf20Sopenharmony_ci	return status;
33768c2ecf20Sopenharmony_ci}
33778c2ecf20Sopenharmony_ci
33788c2ecf20Sopenharmony_civoid ocfs2_dlm_shutdown(struct ocfs2_super *osb,
33798c2ecf20Sopenharmony_ci			int hangup_pending)
33808c2ecf20Sopenharmony_ci{
33818c2ecf20Sopenharmony_ci	ocfs2_drop_osb_locks(osb);
33828c2ecf20Sopenharmony_ci
33838c2ecf20Sopenharmony_ci	/*
33848c2ecf20Sopenharmony_ci	 * Now that we have dropped all locks and ocfs2_dismount_volume()
33858c2ecf20Sopenharmony_ci	 * has disabled recovery, the DLM won't be talking to us.  It's
33868c2ecf20Sopenharmony_ci	 * safe to tear things down before disconnecting the cluster.
33878c2ecf20Sopenharmony_ci	 */
33888c2ecf20Sopenharmony_ci
33898c2ecf20Sopenharmony_ci	if (osb->dc_task) {
33908c2ecf20Sopenharmony_ci		kthread_stop(osb->dc_task);
33918c2ecf20Sopenharmony_ci		osb->dc_task = NULL;
33928c2ecf20Sopenharmony_ci	}
33938c2ecf20Sopenharmony_ci
33948c2ecf20Sopenharmony_ci	ocfs2_lock_res_free(&osb->osb_super_lockres);
33958c2ecf20Sopenharmony_ci	ocfs2_lock_res_free(&osb->osb_rename_lockres);
33968c2ecf20Sopenharmony_ci	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
33978c2ecf20Sopenharmony_ci	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
33988c2ecf20Sopenharmony_ci
33998c2ecf20Sopenharmony_ci	if (osb->cconn) {
34008c2ecf20Sopenharmony_ci		ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
34018c2ecf20Sopenharmony_ci		osb->cconn = NULL;
34028c2ecf20Sopenharmony_ci
34038c2ecf20Sopenharmony_ci		ocfs2_dlm_shutdown_debug(osb);
34048c2ecf20Sopenharmony_ci	}
34058c2ecf20Sopenharmony_ci}
34068c2ecf20Sopenharmony_ci
34078c2ecf20Sopenharmony_cistatic int ocfs2_drop_lock(struct ocfs2_super *osb,
34088c2ecf20Sopenharmony_ci			   struct ocfs2_lock_res *lockres)
34098c2ecf20Sopenharmony_ci{
34108c2ecf20Sopenharmony_ci	int ret;
34118c2ecf20Sopenharmony_ci	unsigned long flags;
34128c2ecf20Sopenharmony_ci	u32 lkm_flags = 0;
34138c2ecf20Sopenharmony_ci
34148c2ecf20Sopenharmony_ci	/* We didn't get anywhere near actually using this lockres. */
34158c2ecf20Sopenharmony_ci	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
34168c2ecf20Sopenharmony_ci		goto out;
34178c2ecf20Sopenharmony_ci
34188c2ecf20Sopenharmony_ci	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
34198c2ecf20Sopenharmony_ci		lkm_flags |= DLM_LKF_VALBLK;
34208c2ecf20Sopenharmony_ci
34218c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
34228c2ecf20Sopenharmony_ci
34238c2ecf20Sopenharmony_ci	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
34248c2ecf20Sopenharmony_ci			"lockres %s, flags 0x%lx\n",
34258c2ecf20Sopenharmony_ci			lockres->l_name, lockres->l_flags);
34268c2ecf20Sopenharmony_ci
34278c2ecf20Sopenharmony_ci	while (lockres->l_flags & OCFS2_LOCK_BUSY) {
34288c2ecf20Sopenharmony_ci		mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
34298c2ecf20Sopenharmony_ci		     "%u, unlock_action = %u\n",
34308c2ecf20Sopenharmony_ci		     lockres->l_name, lockres->l_flags, lockres->l_action,
34318c2ecf20Sopenharmony_ci		     lockres->l_unlock_action);
34328c2ecf20Sopenharmony_ci
34338c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
34348c2ecf20Sopenharmony_ci
34358c2ecf20Sopenharmony_ci		/* XXX: Today we just wait on any busy
34368c2ecf20Sopenharmony_ci		 * locks... Perhaps we need to cancel converts in the
34378c2ecf20Sopenharmony_ci		 * future? */
34388c2ecf20Sopenharmony_ci		ocfs2_wait_on_busy_lock(lockres);
34398c2ecf20Sopenharmony_ci
34408c2ecf20Sopenharmony_ci		spin_lock_irqsave(&lockres->l_lock, flags);
34418c2ecf20Sopenharmony_ci	}
34428c2ecf20Sopenharmony_ci
34438c2ecf20Sopenharmony_ci	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
34448c2ecf20Sopenharmony_ci		if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
34458c2ecf20Sopenharmony_ci		    lockres->l_level == DLM_LOCK_EX &&
34468c2ecf20Sopenharmony_ci		    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
34478c2ecf20Sopenharmony_ci			lockres->l_ops->set_lvb(lockres);
34488c2ecf20Sopenharmony_ci	}
34498c2ecf20Sopenharmony_ci
34508c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_BUSY)
34518c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
34528c2ecf20Sopenharmony_ci		     lockres->l_name);
34538c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
34548c2ecf20Sopenharmony_ci		mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
34558c2ecf20Sopenharmony_ci
34568c2ecf20Sopenharmony_ci	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
34578c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
34588c2ecf20Sopenharmony_ci		goto out;
34598c2ecf20Sopenharmony_ci	}
34608c2ecf20Sopenharmony_ci
34618c2ecf20Sopenharmony_ci	lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
34628c2ecf20Sopenharmony_ci
34638c2ecf20Sopenharmony_ci	/* make sure we never get here while waiting for an ast to
34648c2ecf20Sopenharmony_ci	 * fire. */
34658c2ecf20Sopenharmony_ci	BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
34668c2ecf20Sopenharmony_ci
34678c2ecf20Sopenharmony_ci	/* is this necessary? */
34688c2ecf20Sopenharmony_ci	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
34698c2ecf20Sopenharmony_ci	lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
34708c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
34718c2ecf20Sopenharmony_ci
34728c2ecf20Sopenharmony_ci	mlog(0, "lock %s\n", lockres->l_name);
34738c2ecf20Sopenharmony_ci
34748c2ecf20Sopenharmony_ci	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags);
34758c2ecf20Sopenharmony_ci	if (ret) {
34768c2ecf20Sopenharmony_ci		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
34778c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
34788c2ecf20Sopenharmony_ci		ocfs2_dlm_dump_lksb(&lockres->l_lksb);
34798c2ecf20Sopenharmony_ci		BUG();
34808c2ecf20Sopenharmony_ci	}
34818c2ecf20Sopenharmony_ci	mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
34828c2ecf20Sopenharmony_ci	     lockres->l_name);
34838c2ecf20Sopenharmony_ci
34848c2ecf20Sopenharmony_ci	ocfs2_wait_on_busy_lock(lockres);
34858c2ecf20Sopenharmony_ciout:
34868c2ecf20Sopenharmony_ci	return 0;
34878c2ecf20Sopenharmony_ci}
34888c2ecf20Sopenharmony_ci
34898c2ecf20Sopenharmony_cistatic void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
34908c2ecf20Sopenharmony_ci				       struct ocfs2_lock_res *lockres);
34918c2ecf20Sopenharmony_ci
34928c2ecf20Sopenharmony_ci/* Mark the lockres as being dropped. It will no longer be
34938c2ecf20Sopenharmony_ci * queued if blocking, but we still may have to wait on it
34948c2ecf20Sopenharmony_ci * being dequeued from the downconvert thread before we can consider
34958c2ecf20Sopenharmony_ci * it safe to drop.
34968c2ecf20Sopenharmony_ci *
34978c2ecf20Sopenharmony_ci * You can *not* attempt to call cluster_lock on this lockres anymore. */
34988c2ecf20Sopenharmony_civoid ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
34998c2ecf20Sopenharmony_ci				struct ocfs2_lock_res *lockres)
35008c2ecf20Sopenharmony_ci{
35018c2ecf20Sopenharmony_ci	int status;
35028c2ecf20Sopenharmony_ci	struct ocfs2_mask_waiter mw;
35038c2ecf20Sopenharmony_ci	unsigned long flags, flags2;
35048c2ecf20Sopenharmony_ci
35058c2ecf20Sopenharmony_ci	ocfs2_init_mask_waiter(&mw);
35068c2ecf20Sopenharmony_ci
35078c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
35088c2ecf20Sopenharmony_ci	lockres->l_flags |= OCFS2_LOCK_FREEING;
35098c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) {
35108c2ecf20Sopenharmony_ci		/*
35118c2ecf20Sopenharmony_ci		 * We know the downconvert is queued but not in progress
35128c2ecf20Sopenharmony_ci		 * because we are the downconvert thread and processing
35138c2ecf20Sopenharmony_ci		 * different lock. So we can just remove the lock from the
35148c2ecf20Sopenharmony_ci		 * queue. This is not only an optimization but also a way
35158c2ecf20Sopenharmony_ci		 * to avoid the following deadlock:
35168c2ecf20Sopenharmony_ci		 *   ocfs2_dentry_post_unlock()
35178c2ecf20Sopenharmony_ci		 *     ocfs2_dentry_lock_put()
35188c2ecf20Sopenharmony_ci		 *       ocfs2_drop_dentry_lock()
35198c2ecf20Sopenharmony_ci		 *         iput()
35208c2ecf20Sopenharmony_ci		 *           ocfs2_evict_inode()
35218c2ecf20Sopenharmony_ci		 *             ocfs2_clear_inode()
35228c2ecf20Sopenharmony_ci		 *               ocfs2_mark_lockres_freeing()
35238c2ecf20Sopenharmony_ci		 *                 ... blocks waiting for OCFS2_LOCK_QUEUED
35248c2ecf20Sopenharmony_ci		 *                 since we are the downconvert thread which
35258c2ecf20Sopenharmony_ci		 *                 should clear the flag.
35268c2ecf20Sopenharmony_ci		 */
35278c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
35288c2ecf20Sopenharmony_ci		spin_lock_irqsave(&osb->dc_task_lock, flags2);
35298c2ecf20Sopenharmony_ci		list_del_init(&lockres->l_blocked_list);
35308c2ecf20Sopenharmony_ci		osb->blocked_lock_count--;
35318c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&osb->dc_task_lock, flags2);
35328c2ecf20Sopenharmony_ci		/*
35338c2ecf20Sopenharmony_ci		 * Warn if we recurse into another post_unlock call.  Strictly
35348c2ecf20Sopenharmony_ci		 * speaking it isn't a problem but we need to be careful if
35358c2ecf20Sopenharmony_ci		 * that happens (stack overflow, deadlocks, ...) so warn if
35368c2ecf20Sopenharmony_ci		 * ocfs2 grows a path for which this can happen.
35378c2ecf20Sopenharmony_ci		 */
35388c2ecf20Sopenharmony_ci		WARN_ON_ONCE(lockres->l_ops->post_unlock);
35398c2ecf20Sopenharmony_ci		/* Since the lock is freeing we don't do much in the fn below */
35408c2ecf20Sopenharmony_ci		ocfs2_process_blocked_lock(osb, lockres);
35418c2ecf20Sopenharmony_ci		return;
35428c2ecf20Sopenharmony_ci	}
35438c2ecf20Sopenharmony_ci	while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
35448c2ecf20Sopenharmony_ci		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
35458c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
35468c2ecf20Sopenharmony_ci
35478c2ecf20Sopenharmony_ci		mlog(0, "Waiting on lockres %s\n", lockres->l_name);
35488c2ecf20Sopenharmony_ci
35498c2ecf20Sopenharmony_ci		status = ocfs2_wait_for_mask(&mw);
35508c2ecf20Sopenharmony_ci		if (status)
35518c2ecf20Sopenharmony_ci			mlog_errno(status);
35528c2ecf20Sopenharmony_ci
35538c2ecf20Sopenharmony_ci		spin_lock_irqsave(&lockres->l_lock, flags);
35548c2ecf20Sopenharmony_ci	}
35558c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
35568c2ecf20Sopenharmony_ci}
35578c2ecf20Sopenharmony_ci
35588c2ecf20Sopenharmony_civoid ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
35598c2ecf20Sopenharmony_ci			       struct ocfs2_lock_res *lockres)
35608c2ecf20Sopenharmony_ci{
35618c2ecf20Sopenharmony_ci	int ret;
35628c2ecf20Sopenharmony_ci
35638c2ecf20Sopenharmony_ci	ocfs2_mark_lockres_freeing(osb, lockres);
35648c2ecf20Sopenharmony_ci	ret = ocfs2_drop_lock(osb, lockres);
35658c2ecf20Sopenharmony_ci	if (ret)
35668c2ecf20Sopenharmony_ci		mlog_errno(ret);
35678c2ecf20Sopenharmony_ci}
35688c2ecf20Sopenharmony_ci
35698c2ecf20Sopenharmony_cistatic void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
35708c2ecf20Sopenharmony_ci{
35718c2ecf20Sopenharmony_ci	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
35728c2ecf20Sopenharmony_ci	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
35738c2ecf20Sopenharmony_ci	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
35748c2ecf20Sopenharmony_ci	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
35758c2ecf20Sopenharmony_ci}
35768c2ecf20Sopenharmony_ci
35778c2ecf20Sopenharmony_ciint ocfs2_drop_inode_locks(struct inode *inode)
35788c2ecf20Sopenharmony_ci{
35798c2ecf20Sopenharmony_ci	int status, err;
35808c2ecf20Sopenharmony_ci
35818c2ecf20Sopenharmony_ci	/* No need to call ocfs2_mark_lockres_freeing here -
35828c2ecf20Sopenharmony_ci	 * ocfs2_clear_inode has done it for us. */
35838c2ecf20Sopenharmony_ci
35848c2ecf20Sopenharmony_ci	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
35858c2ecf20Sopenharmony_ci			      &OCFS2_I(inode)->ip_open_lockres);
35868c2ecf20Sopenharmony_ci	if (err < 0)
35878c2ecf20Sopenharmony_ci		mlog_errno(err);
35888c2ecf20Sopenharmony_ci
35898c2ecf20Sopenharmony_ci	status = err;
35908c2ecf20Sopenharmony_ci
35918c2ecf20Sopenharmony_ci	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
35928c2ecf20Sopenharmony_ci			      &OCFS2_I(inode)->ip_inode_lockres);
35938c2ecf20Sopenharmony_ci	if (err < 0)
35948c2ecf20Sopenharmony_ci		mlog_errno(err);
35958c2ecf20Sopenharmony_ci	if (err < 0 && !status)
35968c2ecf20Sopenharmony_ci		status = err;
35978c2ecf20Sopenharmony_ci
35988c2ecf20Sopenharmony_ci	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
35998c2ecf20Sopenharmony_ci			      &OCFS2_I(inode)->ip_rw_lockres);
36008c2ecf20Sopenharmony_ci	if (err < 0)
36018c2ecf20Sopenharmony_ci		mlog_errno(err);
36028c2ecf20Sopenharmony_ci	if (err < 0 && !status)
36038c2ecf20Sopenharmony_ci		status = err;
36048c2ecf20Sopenharmony_ci
36058c2ecf20Sopenharmony_ci	return status;
36068c2ecf20Sopenharmony_ci}
36078c2ecf20Sopenharmony_ci
36088c2ecf20Sopenharmony_cistatic unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
36098c2ecf20Sopenharmony_ci					      int new_level)
36108c2ecf20Sopenharmony_ci{
36118c2ecf20Sopenharmony_ci	assert_spin_locked(&lockres->l_lock);
36128c2ecf20Sopenharmony_ci
36138c2ecf20Sopenharmony_ci	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
36148c2ecf20Sopenharmony_ci
36158c2ecf20Sopenharmony_ci	if (lockres->l_level <= new_level) {
36168c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, "
36178c2ecf20Sopenharmony_ci		     "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, "
36188c2ecf20Sopenharmony_ci		     "block %d, pgen %d\n", lockres->l_name, lockres->l_level,
36198c2ecf20Sopenharmony_ci		     new_level, list_empty(&lockres->l_blocked_list),
36208c2ecf20Sopenharmony_ci		     list_empty(&lockres->l_mask_waiters), lockres->l_type,
36218c2ecf20Sopenharmony_ci		     lockres->l_flags, lockres->l_ro_holders,
36228c2ecf20Sopenharmony_ci		     lockres->l_ex_holders, lockres->l_action,
36238c2ecf20Sopenharmony_ci		     lockres->l_unlock_action, lockres->l_requested,
36248c2ecf20Sopenharmony_ci		     lockres->l_blocking, lockres->l_pending_gen);
36258c2ecf20Sopenharmony_ci		BUG();
36268c2ecf20Sopenharmony_ci	}
36278c2ecf20Sopenharmony_ci
36288c2ecf20Sopenharmony_ci	mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n",
36298c2ecf20Sopenharmony_ci	     lockres->l_name, lockres->l_level, new_level, lockres->l_blocking);
36308c2ecf20Sopenharmony_ci
36318c2ecf20Sopenharmony_ci	lockres->l_action = OCFS2_AST_DOWNCONVERT;
36328c2ecf20Sopenharmony_ci	lockres->l_requested = new_level;
36338c2ecf20Sopenharmony_ci	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
36348c2ecf20Sopenharmony_ci	return lockres_set_pending(lockres);
36358c2ecf20Sopenharmony_ci}
36368c2ecf20Sopenharmony_ci
36378c2ecf20Sopenharmony_cistatic int ocfs2_downconvert_lock(struct ocfs2_super *osb,
36388c2ecf20Sopenharmony_ci				  struct ocfs2_lock_res *lockres,
36398c2ecf20Sopenharmony_ci				  int new_level,
36408c2ecf20Sopenharmony_ci				  int lvb,
36418c2ecf20Sopenharmony_ci				  unsigned int generation)
36428c2ecf20Sopenharmony_ci{
36438c2ecf20Sopenharmony_ci	int ret;
36448c2ecf20Sopenharmony_ci	u32 dlm_flags = DLM_LKF_CONVERT;
36458c2ecf20Sopenharmony_ci
36468c2ecf20Sopenharmony_ci	mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
36478c2ecf20Sopenharmony_ci	     lockres->l_level, new_level);
36488c2ecf20Sopenharmony_ci
36498c2ecf20Sopenharmony_ci	/*
36508c2ecf20Sopenharmony_ci	 * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
36518c2ecf20Sopenharmony_ci	 * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
36528c2ecf20Sopenharmony_ci	 * we can recover correctly from node failure. Otherwise, we may get
36538c2ecf20Sopenharmony_ci	 * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
36548c2ecf20Sopenharmony_ci	 */
36558c2ecf20Sopenharmony_ci	if (ocfs2_userspace_stack(osb) &&
36568c2ecf20Sopenharmony_ci	    lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
36578c2ecf20Sopenharmony_ci		lvb = 1;
36588c2ecf20Sopenharmony_ci
36598c2ecf20Sopenharmony_ci	if (lvb)
36608c2ecf20Sopenharmony_ci		dlm_flags |= DLM_LKF_VALBLK;
36618c2ecf20Sopenharmony_ci
36628c2ecf20Sopenharmony_ci	ret = ocfs2_dlm_lock(osb->cconn,
36638c2ecf20Sopenharmony_ci			     new_level,
36648c2ecf20Sopenharmony_ci			     &lockres->l_lksb,
36658c2ecf20Sopenharmony_ci			     dlm_flags,
36668c2ecf20Sopenharmony_ci			     lockres->l_name,
36678c2ecf20Sopenharmony_ci			     OCFS2_LOCK_ID_MAX_LEN - 1);
36688c2ecf20Sopenharmony_ci	lockres_clear_pending(lockres, generation, osb);
36698c2ecf20Sopenharmony_ci	if (ret) {
36708c2ecf20Sopenharmony_ci		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
36718c2ecf20Sopenharmony_ci		ocfs2_recover_from_dlm_error(lockres, 1);
36728c2ecf20Sopenharmony_ci		goto bail;
36738c2ecf20Sopenharmony_ci	}
36748c2ecf20Sopenharmony_ci
36758c2ecf20Sopenharmony_ci	ret = 0;
36768c2ecf20Sopenharmony_cibail:
36778c2ecf20Sopenharmony_ci	return ret;
36788c2ecf20Sopenharmony_ci}
36798c2ecf20Sopenharmony_ci
36808c2ecf20Sopenharmony_ci/* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
36818c2ecf20Sopenharmony_cistatic int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
36828c2ecf20Sopenharmony_ci				        struct ocfs2_lock_res *lockres)
36838c2ecf20Sopenharmony_ci{
36848c2ecf20Sopenharmony_ci	assert_spin_locked(&lockres->l_lock);
36858c2ecf20Sopenharmony_ci
36868c2ecf20Sopenharmony_ci	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
36878c2ecf20Sopenharmony_ci		/* If we're already trying to cancel a lock conversion
36888c2ecf20Sopenharmony_ci		 * then just drop the spinlock and allow the caller to
36898c2ecf20Sopenharmony_ci		 * requeue this lock. */
36908c2ecf20Sopenharmony_ci		mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name);
36918c2ecf20Sopenharmony_ci		return 0;
36928c2ecf20Sopenharmony_ci	}
36938c2ecf20Sopenharmony_ci
36948c2ecf20Sopenharmony_ci	/* were we in a convert when we got the bast fire? */
36958c2ecf20Sopenharmony_ci	BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
36968c2ecf20Sopenharmony_ci	       lockres->l_action != OCFS2_AST_DOWNCONVERT);
36978c2ecf20Sopenharmony_ci	/* set things up for the unlockast to know to just
36988c2ecf20Sopenharmony_ci	 * clear out the ast_action and unset busy, etc. */
36998c2ecf20Sopenharmony_ci	lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
37008c2ecf20Sopenharmony_ci
37018c2ecf20Sopenharmony_ci	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
37028c2ecf20Sopenharmony_ci			"lock %s, invalid flags: 0x%lx\n",
37038c2ecf20Sopenharmony_ci			lockres->l_name, lockres->l_flags);
37048c2ecf20Sopenharmony_ci
37058c2ecf20Sopenharmony_ci	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
37068c2ecf20Sopenharmony_ci
37078c2ecf20Sopenharmony_ci	return 1;
37088c2ecf20Sopenharmony_ci}
37098c2ecf20Sopenharmony_ci
37108c2ecf20Sopenharmony_cistatic int ocfs2_cancel_convert(struct ocfs2_super *osb,
37118c2ecf20Sopenharmony_ci				struct ocfs2_lock_res *lockres)
37128c2ecf20Sopenharmony_ci{
37138c2ecf20Sopenharmony_ci	int ret;
37148c2ecf20Sopenharmony_ci
37158c2ecf20Sopenharmony_ci	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
37168c2ecf20Sopenharmony_ci			       DLM_LKF_CANCEL);
37178c2ecf20Sopenharmony_ci	if (ret) {
37188c2ecf20Sopenharmony_ci		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
37198c2ecf20Sopenharmony_ci		ocfs2_recover_from_dlm_error(lockres, 0);
37208c2ecf20Sopenharmony_ci	}
37218c2ecf20Sopenharmony_ci
37228c2ecf20Sopenharmony_ci	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
37238c2ecf20Sopenharmony_ci
37248c2ecf20Sopenharmony_ci	return ret;
37258c2ecf20Sopenharmony_ci}
37268c2ecf20Sopenharmony_ci
37278c2ecf20Sopenharmony_cistatic int ocfs2_unblock_lock(struct ocfs2_super *osb,
37288c2ecf20Sopenharmony_ci			      struct ocfs2_lock_res *lockres,
37298c2ecf20Sopenharmony_ci			      struct ocfs2_unblock_ctl *ctl)
37308c2ecf20Sopenharmony_ci{
37318c2ecf20Sopenharmony_ci	unsigned long flags;
37328c2ecf20Sopenharmony_ci	int blocking;
37338c2ecf20Sopenharmony_ci	int new_level;
37348c2ecf20Sopenharmony_ci	int level;
37358c2ecf20Sopenharmony_ci	int ret = 0;
37368c2ecf20Sopenharmony_ci	int set_lvb = 0;
37378c2ecf20Sopenharmony_ci	unsigned int gen;
37388c2ecf20Sopenharmony_ci
37398c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
37408c2ecf20Sopenharmony_ci
37418c2ecf20Sopenharmony_cirecheck:
37428c2ecf20Sopenharmony_ci	/*
37438c2ecf20Sopenharmony_ci	 * Is it still blocking? If not, we have no more work to do.
37448c2ecf20Sopenharmony_ci	 */
37458c2ecf20Sopenharmony_ci	if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
37468c2ecf20Sopenharmony_ci		BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
37478c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
37488c2ecf20Sopenharmony_ci		ret = 0;
37498c2ecf20Sopenharmony_ci		goto leave;
37508c2ecf20Sopenharmony_ci	}
37518c2ecf20Sopenharmony_ci
37528c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
37538c2ecf20Sopenharmony_ci		/* XXX
37548c2ecf20Sopenharmony_ci		 * This is a *big* race.  The OCFS2_LOCK_PENDING flag
37558c2ecf20Sopenharmony_ci		 * exists entirely for one reason - another thread has set
37568c2ecf20Sopenharmony_ci		 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
37578c2ecf20Sopenharmony_ci		 *
37588c2ecf20Sopenharmony_ci		 * If we do ocfs2_cancel_convert() before the other thread
37598c2ecf20Sopenharmony_ci		 * calls dlm_lock(), our cancel will do nothing.  We will
37608c2ecf20Sopenharmony_ci		 * get no ast, and we will have no way of knowing the
37618c2ecf20Sopenharmony_ci		 * cancel failed.  Meanwhile, the other thread will call
37628c2ecf20Sopenharmony_ci		 * into dlm_lock() and wait...forever.
37638c2ecf20Sopenharmony_ci		 *
37648c2ecf20Sopenharmony_ci		 * Why forever?  Because another node has asked for the
37658c2ecf20Sopenharmony_ci		 * lock first; that's why we're here in unblock_lock().
37668c2ecf20Sopenharmony_ci		 *
37678c2ecf20Sopenharmony_ci		 * The solution is OCFS2_LOCK_PENDING.  When PENDING is
37688c2ecf20Sopenharmony_ci		 * set, we just requeue the unblock.  Only when the other
37698c2ecf20Sopenharmony_ci		 * thread has called dlm_lock() and cleared PENDING will
37708c2ecf20Sopenharmony_ci		 * we then cancel their request.
37718c2ecf20Sopenharmony_ci		 *
37728c2ecf20Sopenharmony_ci		 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
37738c2ecf20Sopenharmony_ci		 * at the same time they set OCFS2_DLM_BUSY.  They must
37748c2ecf20Sopenharmony_ci		 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
37758c2ecf20Sopenharmony_ci		 */
37768c2ecf20Sopenharmony_ci		if (lockres->l_flags & OCFS2_LOCK_PENDING) {
37778c2ecf20Sopenharmony_ci			mlog(ML_BASTS, "lockres %s, ReQ: Pending\n",
37788c2ecf20Sopenharmony_ci			     lockres->l_name);
37798c2ecf20Sopenharmony_ci			goto leave_requeue;
37808c2ecf20Sopenharmony_ci		}
37818c2ecf20Sopenharmony_ci
37828c2ecf20Sopenharmony_ci		ctl->requeue = 1;
37838c2ecf20Sopenharmony_ci		ret = ocfs2_prepare_cancel_convert(osb, lockres);
37848c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
37858c2ecf20Sopenharmony_ci		if (ret) {
37868c2ecf20Sopenharmony_ci			ret = ocfs2_cancel_convert(osb, lockres);
37878c2ecf20Sopenharmony_ci			if (ret < 0)
37888c2ecf20Sopenharmony_ci				mlog_errno(ret);
37898c2ecf20Sopenharmony_ci		}
37908c2ecf20Sopenharmony_ci		goto leave;
37918c2ecf20Sopenharmony_ci	}
37928c2ecf20Sopenharmony_ci
37938c2ecf20Sopenharmony_ci	/*
37948c2ecf20Sopenharmony_ci	 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
37958c2ecf20Sopenharmony_ci	 * set when the ast is received for an upconvert just before the
37968c2ecf20Sopenharmony_ci	 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
37978c2ecf20Sopenharmony_ci	 * on the heels of the ast, we want to delay the downconvert just
37988c2ecf20Sopenharmony_ci	 * enough to allow the up requestor to do its task. Because this
37998c2ecf20Sopenharmony_ci	 * lock is in the blocked queue, the lock will be downconverted
38008c2ecf20Sopenharmony_ci	 * as soon as the requestor is done with the lock.
38018c2ecf20Sopenharmony_ci	 */
38028c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
38038c2ecf20Sopenharmony_ci		goto leave_requeue;
38048c2ecf20Sopenharmony_ci
38058c2ecf20Sopenharmony_ci	/*
38068c2ecf20Sopenharmony_ci	 * How can we block and yet be at NL?  We were trying to upconvert
38078c2ecf20Sopenharmony_ci	 * from NL and got canceled.  The code comes back here, and now
38088c2ecf20Sopenharmony_ci	 * we notice and clear BLOCKING.
38098c2ecf20Sopenharmony_ci	 */
38108c2ecf20Sopenharmony_ci	if (lockres->l_level == DLM_LOCK_NL) {
38118c2ecf20Sopenharmony_ci		BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
38128c2ecf20Sopenharmony_ci		mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name);
38138c2ecf20Sopenharmony_ci		lockres->l_blocking = DLM_LOCK_NL;
38148c2ecf20Sopenharmony_ci		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
38158c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&lockres->l_lock, flags);
38168c2ecf20Sopenharmony_ci		goto leave;
38178c2ecf20Sopenharmony_ci	}
38188c2ecf20Sopenharmony_ci
38198c2ecf20Sopenharmony_ci	/* if we're blocking an exclusive and we have *any* holders,
38208c2ecf20Sopenharmony_ci	 * then requeue. */
38218c2ecf20Sopenharmony_ci	if ((lockres->l_blocking == DLM_LOCK_EX)
38228c2ecf20Sopenharmony_ci	    && (lockres->l_ex_holders || lockres->l_ro_holders)) {
38238c2ecf20Sopenharmony_ci		mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n",
38248c2ecf20Sopenharmony_ci		     lockres->l_name, lockres->l_ex_holders,
38258c2ecf20Sopenharmony_ci		     lockres->l_ro_holders);
38268c2ecf20Sopenharmony_ci		goto leave_requeue;
38278c2ecf20Sopenharmony_ci	}
38288c2ecf20Sopenharmony_ci
38298c2ecf20Sopenharmony_ci	/* If it's a PR we're blocking, then only
38308c2ecf20Sopenharmony_ci	 * requeue if we've got any EX holders */
38318c2ecf20Sopenharmony_ci	if (lockres->l_blocking == DLM_LOCK_PR &&
38328c2ecf20Sopenharmony_ci	    lockres->l_ex_holders) {
38338c2ecf20Sopenharmony_ci		mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n",
38348c2ecf20Sopenharmony_ci		     lockres->l_name, lockres->l_ex_holders);
38358c2ecf20Sopenharmony_ci		goto leave_requeue;
38368c2ecf20Sopenharmony_ci	}
38378c2ecf20Sopenharmony_ci
38388c2ecf20Sopenharmony_ci	/*
38398c2ecf20Sopenharmony_ci	 * Can we get a lock in this state if the holder counts are
38408c2ecf20Sopenharmony_ci	 * zero? The meta data unblock code used to check this.
38418c2ecf20Sopenharmony_ci	 */
38428c2ecf20Sopenharmony_ci	if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
38438c2ecf20Sopenharmony_ci	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) {
38448c2ecf20Sopenharmony_ci		mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n",
38458c2ecf20Sopenharmony_ci		     lockres->l_name);
38468c2ecf20Sopenharmony_ci		goto leave_requeue;
38478c2ecf20Sopenharmony_ci	}
38488c2ecf20Sopenharmony_ci
38498c2ecf20Sopenharmony_ci	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
38508c2ecf20Sopenharmony_ci
38518c2ecf20Sopenharmony_ci	if (lockres->l_ops->check_downconvert
38528c2ecf20Sopenharmony_ci	    && !lockres->l_ops->check_downconvert(lockres, new_level)) {
38538c2ecf20Sopenharmony_ci		mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n",
38548c2ecf20Sopenharmony_ci		     lockres->l_name);
38558c2ecf20Sopenharmony_ci		goto leave_requeue;
38568c2ecf20Sopenharmony_ci	}
38578c2ecf20Sopenharmony_ci
38588c2ecf20Sopenharmony_ci	/* If we get here, then we know that there are no more
38598c2ecf20Sopenharmony_ci	 * incompatible holders (and anyone asking for an incompatible
38608c2ecf20Sopenharmony_ci	 * lock is blocked). We can now downconvert the lock */
38618c2ecf20Sopenharmony_ci	if (!lockres->l_ops->downconvert_worker)
38628c2ecf20Sopenharmony_ci		goto downconvert;
38638c2ecf20Sopenharmony_ci
38648c2ecf20Sopenharmony_ci	/* Some lockres types want to do a bit of work before
38658c2ecf20Sopenharmony_ci	 * downconverting a lock. Allow that here. The worker function
38668c2ecf20Sopenharmony_ci	 * may sleep, so we save off a copy of what we're blocking as
38678c2ecf20Sopenharmony_ci	 * it may change while we're not holding the spin lock. */
38688c2ecf20Sopenharmony_ci	blocking = lockres->l_blocking;
38698c2ecf20Sopenharmony_ci	level = lockres->l_level;
38708c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
38718c2ecf20Sopenharmony_ci
38728c2ecf20Sopenharmony_ci	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
38738c2ecf20Sopenharmony_ci
38748c2ecf20Sopenharmony_ci	if (ctl->unblock_action == UNBLOCK_STOP_POST) {
38758c2ecf20Sopenharmony_ci		mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n",
38768c2ecf20Sopenharmony_ci		     lockres->l_name);
38778c2ecf20Sopenharmony_ci		goto leave;
38788c2ecf20Sopenharmony_ci	}
38798c2ecf20Sopenharmony_ci
38808c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
38818c2ecf20Sopenharmony_ci	if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
38828c2ecf20Sopenharmony_ci		/* If this changed underneath us, then we can't drop
38838c2ecf20Sopenharmony_ci		 * it just yet. */
38848c2ecf20Sopenharmony_ci		mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, "
38858c2ecf20Sopenharmony_ci		     "Recheck\n", lockres->l_name, blocking,
38868c2ecf20Sopenharmony_ci		     lockres->l_blocking, level, lockres->l_level);
38878c2ecf20Sopenharmony_ci		goto recheck;
38888c2ecf20Sopenharmony_ci	}
38898c2ecf20Sopenharmony_ci
38908c2ecf20Sopenharmony_cidownconvert:
38918c2ecf20Sopenharmony_ci	ctl->requeue = 0;
38928c2ecf20Sopenharmony_ci
38938c2ecf20Sopenharmony_ci	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
38948c2ecf20Sopenharmony_ci		if (lockres->l_level == DLM_LOCK_EX)
38958c2ecf20Sopenharmony_ci			set_lvb = 1;
38968c2ecf20Sopenharmony_ci
38978c2ecf20Sopenharmony_ci		/*
38988c2ecf20Sopenharmony_ci		 * We only set the lvb if the lock has been fully
38998c2ecf20Sopenharmony_ci		 * refreshed - otherwise we risk setting stale
39008c2ecf20Sopenharmony_ci		 * data. Otherwise, there's no need to actually clear
39018c2ecf20Sopenharmony_ci		 * out the lvb here as it's value is still valid.
39028c2ecf20Sopenharmony_ci		 */
39038c2ecf20Sopenharmony_ci		if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
39048c2ecf20Sopenharmony_ci			lockres->l_ops->set_lvb(lockres);
39058c2ecf20Sopenharmony_ci	}
39068c2ecf20Sopenharmony_ci
39078c2ecf20Sopenharmony_ci	gen = ocfs2_prepare_downconvert(lockres, new_level);
39088c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
39098c2ecf20Sopenharmony_ci	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
39108c2ecf20Sopenharmony_ci				     gen);
39118c2ecf20Sopenharmony_ci
39128c2ecf20Sopenharmony_cileave:
39138c2ecf20Sopenharmony_ci	if (ret)
39148c2ecf20Sopenharmony_ci		mlog_errno(ret);
39158c2ecf20Sopenharmony_ci	return ret;
39168c2ecf20Sopenharmony_ci
39178c2ecf20Sopenharmony_cileave_requeue:
39188c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
39198c2ecf20Sopenharmony_ci	ctl->requeue = 1;
39208c2ecf20Sopenharmony_ci
39218c2ecf20Sopenharmony_ci	return 0;
39228c2ecf20Sopenharmony_ci}
39238c2ecf20Sopenharmony_ci
39248c2ecf20Sopenharmony_cistatic int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
39258c2ecf20Sopenharmony_ci				     int blocking)
39268c2ecf20Sopenharmony_ci{
39278c2ecf20Sopenharmony_ci	struct inode *inode;
39288c2ecf20Sopenharmony_ci	struct address_space *mapping;
39298c2ecf20Sopenharmony_ci	struct ocfs2_inode_info *oi;
39308c2ecf20Sopenharmony_ci
39318c2ecf20Sopenharmony_ci       	inode = ocfs2_lock_res_inode(lockres);
39328c2ecf20Sopenharmony_ci	mapping = inode->i_mapping;
39338c2ecf20Sopenharmony_ci
39348c2ecf20Sopenharmony_ci	if (S_ISDIR(inode->i_mode)) {
39358c2ecf20Sopenharmony_ci		oi = OCFS2_I(inode);
39368c2ecf20Sopenharmony_ci		oi->ip_dir_lock_gen++;
39378c2ecf20Sopenharmony_ci		mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
39388c2ecf20Sopenharmony_ci		goto out_forget;
39398c2ecf20Sopenharmony_ci	}
39408c2ecf20Sopenharmony_ci
39418c2ecf20Sopenharmony_ci	if (!S_ISREG(inode->i_mode))
39428c2ecf20Sopenharmony_ci		goto out;
39438c2ecf20Sopenharmony_ci
39448c2ecf20Sopenharmony_ci	/*
39458c2ecf20Sopenharmony_ci	 * We need this before the filemap_fdatawrite() so that it can
39468c2ecf20Sopenharmony_ci	 * transfer the dirty bit from the PTE to the
39478c2ecf20Sopenharmony_ci	 * page. Unfortunately this means that even for EX->PR
39488c2ecf20Sopenharmony_ci	 * downconverts, we'll lose our mappings and have to build
39498c2ecf20Sopenharmony_ci	 * them up again.
39508c2ecf20Sopenharmony_ci	 */
39518c2ecf20Sopenharmony_ci	unmap_mapping_range(mapping, 0, 0, 0);
39528c2ecf20Sopenharmony_ci
39538c2ecf20Sopenharmony_ci	if (filemap_fdatawrite(mapping)) {
39548c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
39558c2ecf20Sopenharmony_ci		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
39568c2ecf20Sopenharmony_ci	}
39578c2ecf20Sopenharmony_ci	sync_mapping_buffers(mapping);
39588c2ecf20Sopenharmony_ci	if (blocking == DLM_LOCK_EX) {
39598c2ecf20Sopenharmony_ci		truncate_inode_pages(mapping, 0);
39608c2ecf20Sopenharmony_ci	} else {
39618c2ecf20Sopenharmony_ci		/* We only need to wait on the I/O if we're not also
39628c2ecf20Sopenharmony_ci		 * truncating pages because truncate_inode_pages waits
39638c2ecf20Sopenharmony_ci		 * for us above. We don't truncate pages if we're
39648c2ecf20Sopenharmony_ci		 * blocking anything < EXMODE because we want to keep
39658c2ecf20Sopenharmony_ci		 * them around in that case. */
39668c2ecf20Sopenharmony_ci		filemap_fdatawait(mapping);
39678c2ecf20Sopenharmony_ci	}
39688c2ecf20Sopenharmony_ci
39698c2ecf20Sopenharmony_ciout_forget:
39708c2ecf20Sopenharmony_ci	forget_all_cached_acls(inode);
39718c2ecf20Sopenharmony_ci
39728c2ecf20Sopenharmony_ciout:
39738c2ecf20Sopenharmony_ci	return UNBLOCK_CONTINUE;
39748c2ecf20Sopenharmony_ci}
39758c2ecf20Sopenharmony_ci
39768c2ecf20Sopenharmony_cistatic int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
39778c2ecf20Sopenharmony_ci				 struct ocfs2_lock_res *lockres,
39788c2ecf20Sopenharmony_ci				 int new_level)
39798c2ecf20Sopenharmony_ci{
39808c2ecf20Sopenharmony_ci	int checkpointed = ocfs2_ci_fully_checkpointed(ci);
39818c2ecf20Sopenharmony_ci
39828c2ecf20Sopenharmony_ci	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
39838c2ecf20Sopenharmony_ci	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
39848c2ecf20Sopenharmony_ci
39858c2ecf20Sopenharmony_ci	if (checkpointed)
39868c2ecf20Sopenharmony_ci		return 1;
39878c2ecf20Sopenharmony_ci
39888c2ecf20Sopenharmony_ci	ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
39898c2ecf20Sopenharmony_ci	return 0;
39908c2ecf20Sopenharmony_ci}
39918c2ecf20Sopenharmony_ci
39928c2ecf20Sopenharmony_cistatic int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
39938c2ecf20Sopenharmony_ci					int new_level)
39948c2ecf20Sopenharmony_ci{
39958c2ecf20Sopenharmony_ci	struct inode *inode = ocfs2_lock_res_inode(lockres);
39968c2ecf20Sopenharmony_ci
39978c2ecf20Sopenharmony_ci	return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
39988c2ecf20Sopenharmony_ci}
39998c2ecf20Sopenharmony_ci
40008c2ecf20Sopenharmony_cistatic void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
40018c2ecf20Sopenharmony_ci{
40028c2ecf20Sopenharmony_ci	struct inode *inode = ocfs2_lock_res_inode(lockres);
40038c2ecf20Sopenharmony_ci
40048c2ecf20Sopenharmony_ci	__ocfs2_stuff_meta_lvb(inode);
40058c2ecf20Sopenharmony_ci}
40068c2ecf20Sopenharmony_ci
40078c2ecf20Sopenharmony_ci/*
40088c2ecf20Sopenharmony_ci * Does the final reference drop on our dentry lock. Right now this
40098c2ecf20Sopenharmony_ci * happens in the downconvert thread, but we could choose to simplify the
40108c2ecf20Sopenharmony_ci * dlmglue API and push these off to the ocfs2_wq in the future.
40118c2ecf20Sopenharmony_ci */
40128c2ecf20Sopenharmony_cistatic void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
40138c2ecf20Sopenharmony_ci				     struct ocfs2_lock_res *lockres)
40148c2ecf20Sopenharmony_ci{
40158c2ecf20Sopenharmony_ci	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
40168c2ecf20Sopenharmony_ci	ocfs2_dentry_lock_put(osb, dl);
40178c2ecf20Sopenharmony_ci}
40188c2ecf20Sopenharmony_ci
40198c2ecf20Sopenharmony_ci/*
40208c2ecf20Sopenharmony_ci * d_delete() matching dentries before the lock downconvert.
40218c2ecf20Sopenharmony_ci *
40228c2ecf20Sopenharmony_ci * At this point, any process waiting to destroy the
40238c2ecf20Sopenharmony_ci * dentry_lock due to last ref count is stopped by the
40248c2ecf20Sopenharmony_ci * OCFS2_LOCK_QUEUED flag.
40258c2ecf20Sopenharmony_ci *
40268c2ecf20Sopenharmony_ci * We have two potential problems
40278c2ecf20Sopenharmony_ci *
40288c2ecf20Sopenharmony_ci * 1) If we do the last reference drop on our dentry_lock (via dput)
40298c2ecf20Sopenharmony_ci *    we'll wind up in ocfs2_release_dentry_lock(), waiting on
40308c2ecf20Sopenharmony_ci *    the downconvert to finish. Instead we take an elevated
40318c2ecf20Sopenharmony_ci *    reference and push the drop until after we've completed our
40328c2ecf20Sopenharmony_ci *    unblock processing.
40338c2ecf20Sopenharmony_ci *
40348c2ecf20Sopenharmony_ci * 2) There might be another process with a final reference,
40358c2ecf20Sopenharmony_ci *    waiting on us to finish processing. If this is the case, we
40368c2ecf20Sopenharmony_ci *    detect it and exit out - there's no more dentries anyway.
40378c2ecf20Sopenharmony_ci */
40388c2ecf20Sopenharmony_cistatic int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
40398c2ecf20Sopenharmony_ci				       int blocking)
40408c2ecf20Sopenharmony_ci{
40418c2ecf20Sopenharmony_ci	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
40428c2ecf20Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
40438c2ecf20Sopenharmony_ci	struct dentry *dentry;
40448c2ecf20Sopenharmony_ci	unsigned long flags;
40458c2ecf20Sopenharmony_ci	int extra_ref = 0;
40468c2ecf20Sopenharmony_ci
40478c2ecf20Sopenharmony_ci	/*
40488c2ecf20Sopenharmony_ci	 * This node is blocking another node from getting a read
40498c2ecf20Sopenharmony_ci	 * lock. This happens when we've renamed within a
40508c2ecf20Sopenharmony_ci	 * directory. We've forced the other nodes to d_delete(), but
40518c2ecf20Sopenharmony_ci	 * we never actually dropped our lock because it's still
40528c2ecf20Sopenharmony_ci	 * valid. The downconvert code will retain a PR for this node,
40538c2ecf20Sopenharmony_ci	 * so there's no further work to do.
40548c2ecf20Sopenharmony_ci	 */
40558c2ecf20Sopenharmony_ci	if (blocking == DLM_LOCK_PR)
40568c2ecf20Sopenharmony_ci		return UNBLOCK_CONTINUE;
40578c2ecf20Sopenharmony_ci
40588c2ecf20Sopenharmony_ci	/*
40598c2ecf20Sopenharmony_ci	 * Mark this inode as potentially orphaned. The code in
40608c2ecf20Sopenharmony_ci	 * ocfs2_delete_inode() will figure out whether it actually
40618c2ecf20Sopenharmony_ci	 * needs to be freed or not.
40628c2ecf20Sopenharmony_ci	 */
40638c2ecf20Sopenharmony_ci	spin_lock(&oi->ip_lock);
40648c2ecf20Sopenharmony_ci	oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
40658c2ecf20Sopenharmony_ci	spin_unlock(&oi->ip_lock);
40668c2ecf20Sopenharmony_ci
40678c2ecf20Sopenharmony_ci	/*
40688c2ecf20Sopenharmony_ci	 * Yuck. We need to make sure however that the check of
40698c2ecf20Sopenharmony_ci	 * OCFS2_LOCK_FREEING and the extra reference are atomic with
40708c2ecf20Sopenharmony_ci	 * respect to a reference decrement or the setting of that
40718c2ecf20Sopenharmony_ci	 * flag.
40728c2ecf20Sopenharmony_ci	 */
40738c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
40748c2ecf20Sopenharmony_ci	spin_lock(&dentry_attach_lock);
40758c2ecf20Sopenharmony_ci	if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
40768c2ecf20Sopenharmony_ci	    && dl->dl_count) {
40778c2ecf20Sopenharmony_ci		dl->dl_count++;
40788c2ecf20Sopenharmony_ci		extra_ref = 1;
40798c2ecf20Sopenharmony_ci	}
40808c2ecf20Sopenharmony_ci	spin_unlock(&dentry_attach_lock);
40818c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
40828c2ecf20Sopenharmony_ci
40838c2ecf20Sopenharmony_ci	mlog(0, "extra_ref = %d\n", extra_ref);
40848c2ecf20Sopenharmony_ci
40858c2ecf20Sopenharmony_ci	/*
40868c2ecf20Sopenharmony_ci	 * We have a process waiting on us in ocfs2_dentry_iput(),
40878c2ecf20Sopenharmony_ci	 * which means we can't have any more outstanding
40888c2ecf20Sopenharmony_ci	 * aliases. There's no need to do any more work.
40898c2ecf20Sopenharmony_ci	 */
40908c2ecf20Sopenharmony_ci	if (!extra_ref)
40918c2ecf20Sopenharmony_ci		return UNBLOCK_CONTINUE;
40928c2ecf20Sopenharmony_ci
40938c2ecf20Sopenharmony_ci	spin_lock(&dentry_attach_lock);
40948c2ecf20Sopenharmony_ci	while (1) {
40958c2ecf20Sopenharmony_ci		dentry = ocfs2_find_local_alias(dl->dl_inode,
40968c2ecf20Sopenharmony_ci						dl->dl_parent_blkno, 1);
40978c2ecf20Sopenharmony_ci		if (!dentry)
40988c2ecf20Sopenharmony_ci			break;
40998c2ecf20Sopenharmony_ci		spin_unlock(&dentry_attach_lock);
41008c2ecf20Sopenharmony_ci
41018c2ecf20Sopenharmony_ci		if (S_ISDIR(dl->dl_inode->i_mode))
41028c2ecf20Sopenharmony_ci			shrink_dcache_parent(dentry);
41038c2ecf20Sopenharmony_ci
41048c2ecf20Sopenharmony_ci		mlog(0, "d_delete(%pd);\n", dentry);
41058c2ecf20Sopenharmony_ci
41068c2ecf20Sopenharmony_ci		/*
41078c2ecf20Sopenharmony_ci		 * The following dcache calls may do an
41088c2ecf20Sopenharmony_ci		 * iput(). Normally we don't want that from the
41098c2ecf20Sopenharmony_ci		 * downconverting thread, but in this case it's ok
41108c2ecf20Sopenharmony_ci		 * because the requesting node already has an
41118c2ecf20Sopenharmony_ci		 * exclusive lock on the inode, so it can't be queued
41128c2ecf20Sopenharmony_ci		 * for a downconvert.
41138c2ecf20Sopenharmony_ci		 */
41148c2ecf20Sopenharmony_ci		d_delete(dentry);
41158c2ecf20Sopenharmony_ci		dput(dentry);
41168c2ecf20Sopenharmony_ci
41178c2ecf20Sopenharmony_ci		spin_lock(&dentry_attach_lock);
41188c2ecf20Sopenharmony_ci	}
41198c2ecf20Sopenharmony_ci	spin_unlock(&dentry_attach_lock);
41208c2ecf20Sopenharmony_ci
41218c2ecf20Sopenharmony_ci	/*
41228c2ecf20Sopenharmony_ci	 * If we are the last holder of this dentry lock, there is no
41238c2ecf20Sopenharmony_ci	 * reason to downconvert so skip straight to the unlock.
41248c2ecf20Sopenharmony_ci	 */
41258c2ecf20Sopenharmony_ci	if (dl->dl_count == 1)
41268c2ecf20Sopenharmony_ci		return UNBLOCK_STOP_POST;
41278c2ecf20Sopenharmony_ci
41288c2ecf20Sopenharmony_ci	return UNBLOCK_CONTINUE_POST;
41298c2ecf20Sopenharmony_ci}
41308c2ecf20Sopenharmony_ci
41318c2ecf20Sopenharmony_cistatic int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
41328c2ecf20Sopenharmony_ci					    int new_level)
41338c2ecf20Sopenharmony_ci{
41348c2ecf20Sopenharmony_ci	struct ocfs2_refcount_tree *tree =
41358c2ecf20Sopenharmony_ci				ocfs2_lock_res_refcount_tree(lockres);
41368c2ecf20Sopenharmony_ci
41378c2ecf20Sopenharmony_ci	return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
41388c2ecf20Sopenharmony_ci}
41398c2ecf20Sopenharmony_ci
41408c2ecf20Sopenharmony_cistatic int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
41418c2ecf20Sopenharmony_ci					 int blocking)
41428c2ecf20Sopenharmony_ci{
41438c2ecf20Sopenharmony_ci	struct ocfs2_refcount_tree *tree =
41448c2ecf20Sopenharmony_ci				ocfs2_lock_res_refcount_tree(lockres);
41458c2ecf20Sopenharmony_ci
41468c2ecf20Sopenharmony_ci	ocfs2_metadata_cache_purge(&tree->rf_ci);
41478c2ecf20Sopenharmony_ci
41488c2ecf20Sopenharmony_ci	return UNBLOCK_CONTINUE;
41498c2ecf20Sopenharmony_ci}
41508c2ecf20Sopenharmony_ci
41518c2ecf20Sopenharmony_cistatic void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
41528c2ecf20Sopenharmony_ci{
41538c2ecf20Sopenharmony_ci	struct ocfs2_qinfo_lvb *lvb;
41548c2ecf20Sopenharmony_ci	struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
41558c2ecf20Sopenharmony_ci	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
41568c2ecf20Sopenharmony_ci					    oinfo->dqi_gi.dqi_type);
41578c2ecf20Sopenharmony_ci
41588c2ecf20Sopenharmony_ci	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
41598c2ecf20Sopenharmony_ci	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
41608c2ecf20Sopenharmony_ci	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
41618c2ecf20Sopenharmony_ci	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
41628c2ecf20Sopenharmony_ci	lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
41638c2ecf20Sopenharmony_ci	lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
41648c2ecf20Sopenharmony_ci	lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
41658c2ecf20Sopenharmony_ci	lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
41668c2ecf20Sopenharmony_ci}
41678c2ecf20Sopenharmony_ci
41688c2ecf20Sopenharmony_civoid ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
41698c2ecf20Sopenharmony_ci{
41708c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
41718c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
41728c2ecf20Sopenharmony_ci	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
41738c2ecf20Sopenharmony_ci
41748c2ecf20Sopenharmony_ci	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
41758c2ecf20Sopenharmony_ci		ocfs2_cluster_unlock(osb, lockres, level);
41768c2ecf20Sopenharmony_ci}
41778c2ecf20Sopenharmony_ci
41788c2ecf20Sopenharmony_cistatic int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
41798c2ecf20Sopenharmony_ci{
41808c2ecf20Sopenharmony_ci	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
41818c2ecf20Sopenharmony_ci					    oinfo->dqi_gi.dqi_type);
41828c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
41838c2ecf20Sopenharmony_ci	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
41848c2ecf20Sopenharmony_ci	struct buffer_head *bh = NULL;
41858c2ecf20Sopenharmony_ci	struct ocfs2_global_disk_dqinfo *gdinfo;
41868c2ecf20Sopenharmony_ci	int status = 0;
41878c2ecf20Sopenharmony_ci
41888c2ecf20Sopenharmony_ci	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
41898c2ecf20Sopenharmony_ci	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
41908c2ecf20Sopenharmony_ci		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
41918c2ecf20Sopenharmony_ci		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
41928c2ecf20Sopenharmony_ci		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
41938c2ecf20Sopenharmony_ci		oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
41948c2ecf20Sopenharmony_ci		oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
41958c2ecf20Sopenharmony_ci		oinfo->dqi_gi.dqi_free_entry =
41968c2ecf20Sopenharmony_ci					be32_to_cpu(lvb->lvb_free_entry);
41978c2ecf20Sopenharmony_ci	} else {
41988c2ecf20Sopenharmony_ci		status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode,
41998c2ecf20Sopenharmony_ci						     oinfo->dqi_giblk, &bh);
42008c2ecf20Sopenharmony_ci		if (status) {
42018c2ecf20Sopenharmony_ci			mlog_errno(status);
42028c2ecf20Sopenharmony_ci			goto bail;
42038c2ecf20Sopenharmony_ci		}
42048c2ecf20Sopenharmony_ci		gdinfo = (struct ocfs2_global_disk_dqinfo *)
42058c2ecf20Sopenharmony_ci					(bh->b_data + OCFS2_GLOBAL_INFO_OFF);
42068c2ecf20Sopenharmony_ci		info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
42078c2ecf20Sopenharmony_ci		info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
42088c2ecf20Sopenharmony_ci		oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
42098c2ecf20Sopenharmony_ci		oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
42108c2ecf20Sopenharmony_ci		oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
42118c2ecf20Sopenharmony_ci		oinfo->dqi_gi.dqi_free_entry =
42128c2ecf20Sopenharmony_ci					le32_to_cpu(gdinfo->dqi_free_entry);
42138c2ecf20Sopenharmony_ci		brelse(bh);
42148c2ecf20Sopenharmony_ci		ocfs2_track_lock_refresh(lockres);
42158c2ecf20Sopenharmony_ci	}
42168c2ecf20Sopenharmony_ci
42178c2ecf20Sopenharmony_cibail:
42188c2ecf20Sopenharmony_ci	return status;
42198c2ecf20Sopenharmony_ci}
42208c2ecf20Sopenharmony_ci
42218c2ecf20Sopenharmony_ci/* Lock quota info, this function expects at least shared lock on the quota file
42228c2ecf20Sopenharmony_ci * so that we can safely refresh quota info from disk. */
42238c2ecf20Sopenharmony_ciint ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
42248c2ecf20Sopenharmony_ci{
42258c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
42268c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
42278c2ecf20Sopenharmony_ci	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
42288c2ecf20Sopenharmony_ci	int status = 0;
42298c2ecf20Sopenharmony_ci
42308c2ecf20Sopenharmony_ci	/* On RO devices, locking really isn't needed... */
42318c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb)) {
42328c2ecf20Sopenharmony_ci		if (ex)
42338c2ecf20Sopenharmony_ci			status = -EROFS;
42348c2ecf20Sopenharmony_ci		goto bail;
42358c2ecf20Sopenharmony_ci	}
42368c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
42378c2ecf20Sopenharmony_ci		goto bail;
42388c2ecf20Sopenharmony_ci
42398c2ecf20Sopenharmony_ci	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
42408c2ecf20Sopenharmony_ci	if (status < 0) {
42418c2ecf20Sopenharmony_ci		mlog_errno(status);
42428c2ecf20Sopenharmony_ci		goto bail;
42438c2ecf20Sopenharmony_ci	}
42448c2ecf20Sopenharmony_ci	if (!ocfs2_should_refresh_lock_res(lockres))
42458c2ecf20Sopenharmony_ci		goto bail;
42468c2ecf20Sopenharmony_ci	/* OK, we have the lock but we need to refresh the quota info */
42478c2ecf20Sopenharmony_ci	status = ocfs2_refresh_qinfo(oinfo);
42488c2ecf20Sopenharmony_ci	if (status)
42498c2ecf20Sopenharmony_ci		ocfs2_qinfo_unlock(oinfo, ex);
42508c2ecf20Sopenharmony_ci	ocfs2_complete_lock_res_refresh(lockres, status);
42518c2ecf20Sopenharmony_cibail:
42528c2ecf20Sopenharmony_ci	return status;
42538c2ecf20Sopenharmony_ci}
42548c2ecf20Sopenharmony_ci
42558c2ecf20Sopenharmony_ciint ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
42568c2ecf20Sopenharmony_ci{
42578c2ecf20Sopenharmony_ci	int status;
42588c2ecf20Sopenharmony_ci	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
42598c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
42608c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = lockres->l_priv;
42618c2ecf20Sopenharmony_ci
42628c2ecf20Sopenharmony_ci
42638c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb))
42648c2ecf20Sopenharmony_ci		return -EROFS;
42658c2ecf20Sopenharmony_ci
42668c2ecf20Sopenharmony_ci	if (ocfs2_mount_local(osb))
42678c2ecf20Sopenharmony_ci		return 0;
42688c2ecf20Sopenharmony_ci
42698c2ecf20Sopenharmony_ci	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
42708c2ecf20Sopenharmony_ci	if (status < 0)
42718c2ecf20Sopenharmony_ci		mlog_errno(status);
42728c2ecf20Sopenharmony_ci
42738c2ecf20Sopenharmony_ci	return status;
42748c2ecf20Sopenharmony_ci}
42758c2ecf20Sopenharmony_ci
42768c2ecf20Sopenharmony_civoid ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
42778c2ecf20Sopenharmony_ci{
42788c2ecf20Sopenharmony_ci	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
42798c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
42808c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = lockres->l_priv;
42818c2ecf20Sopenharmony_ci
42828c2ecf20Sopenharmony_ci	if (!ocfs2_mount_local(osb))
42838c2ecf20Sopenharmony_ci		ocfs2_cluster_unlock(osb, lockres, level);
42848c2ecf20Sopenharmony_ci}
42858c2ecf20Sopenharmony_ci
42868c2ecf20Sopenharmony_cistatic void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
42878c2ecf20Sopenharmony_ci				       struct ocfs2_lock_res *lockres)
42888c2ecf20Sopenharmony_ci{
42898c2ecf20Sopenharmony_ci	int status;
42908c2ecf20Sopenharmony_ci	struct ocfs2_unblock_ctl ctl = {0, 0,};
42918c2ecf20Sopenharmony_ci	unsigned long flags;
42928c2ecf20Sopenharmony_ci
42938c2ecf20Sopenharmony_ci	/* Our reference to the lockres in this function can be
42948c2ecf20Sopenharmony_ci	 * considered valid until we remove the OCFS2_LOCK_QUEUED
42958c2ecf20Sopenharmony_ci	 * flag. */
42968c2ecf20Sopenharmony_ci
42978c2ecf20Sopenharmony_ci	BUG_ON(!lockres);
42988c2ecf20Sopenharmony_ci	BUG_ON(!lockres->l_ops);
42998c2ecf20Sopenharmony_ci
43008c2ecf20Sopenharmony_ci	mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name);
43018c2ecf20Sopenharmony_ci
43028c2ecf20Sopenharmony_ci	/* Detect whether a lock has been marked as going away while
43038c2ecf20Sopenharmony_ci	 * the downconvert thread was processing other things. A lock can
43048c2ecf20Sopenharmony_ci	 * still be marked with OCFS2_LOCK_FREEING after this check,
43058c2ecf20Sopenharmony_ci	 * but short circuiting here will still save us some
43068c2ecf20Sopenharmony_ci	 * performance. */
43078c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
43088c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_FREEING)
43098c2ecf20Sopenharmony_ci		goto unqueue;
43108c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
43118c2ecf20Sopenharmony_ci
43128c2ecf20Sopenharmony_ci	status = ocfs2_unblock_lock(osb, lockres, &ctl);
43138c2ecf20Sopenharmony_ci	if (status < 0)
43148c2ecf20Sopenharmony_ci		mlog_errno(status);
43158c2ecf20Sopenharmony_ci
43168c2ecf20Sopenharmony_ci	spin_lock_irqsave(&lockres->l_lock, flags);
43178c2ecf20Sopenharmony_ciunqueue:
43188c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
43198c2ecf20Sopenharmony_ci		lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
43208c2ecf20Sopenharmony_ci	} else
43218c2ecf20Sopenharmony_ci		ocfs2_schedule_blocked_lock(osb, lockres);
43228c2ecf20Sopenharmony_ci
43238c2ecf20Sopenharmony_ci	mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
43248c2ecf20Sopenharmony_ci	     ctl.requeue ? "yes" : "no");
43258c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&lockres->l_lock, flags);
43268c2ecf20Sopenharmony_ci
43278c2ecf20Sopenharmony_ci	if (ctl.unblock_action != UNBLOCK_CONTINUE
43288c2ecf20Sopenharmony_ci	    && lockres->l_ops->post_unlock)
43298c2ecf20Sopenharmony_ci		lockres->l_ops->post_unlock(osb, lockres);
43308c2ecf20Sopenharmony_ci}
43318c2ecf20Sopenharmony_ci
43328c2ecf20Sopenharmony_cistatic void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
43338c2ecf20Sopenharmony_ci					struct ocfs2_lock_res *lockres)
43348c2ecf20Sopenharmony_ci{
43358c2ecf20Sopenharmony_ci	unsigned long flags;
43368c2ecf20Sopenharmony_ci
43378c2ecf20Sopenharmony_ci	assert_spin_locked(&lockres->l_lock);
43388c2ecf20Sopenharmony_ci
43398c2ecf20Sopenharmony_ci	if (lockres->l_flags & OCFS2_LOCK_FREEING) {
43408c2ecf20Sopenharmony_ci		/* Do not schedule a lock for downconvert when it's on
43418c2ecf20Sopenharmony_ci		 * the way to destruction - any nodes wanting access
43428c2ecf20Sopenharmony_ci		 * to the resource will get it soon. */
43438c2ecf20Sopenharmony_ci		mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n",
43448c2ecf20Sopenharmony_ci		     lockres->l_name, lockres->l_flags);
43458c2ecf20Sopenharmony_ci		return;
43468c2ecf20Sopenharmony_ci	}
43478c2ecf20Sopenharmony_ci
43488c2ecf20Sopenharmony_ci	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
43498c2ecf20Sopenharmony_ci
43508c2ecf20Sopenharmony_ci	spin_lock_irqsave(&osb->dc_task_lock, flags);
43518c2ecf20Sopenharmony_ci	if (list_empty(&lockres->l_blocked_list)) {
43528c2ecf20Sopenharmony_ci		list_add_tail(&lockres->l_blocked_list,
43538c2ecf20Sopenharmony_ci			      &osb->blocked_lock_list);
43548c2ecf20Sopenharmony_ci		osb->blocked_lock_count++;
43558c2ecf20Sopenharmony_ci	}
43568c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
43578c2ecf20Sopenharmony_ci}
43588c2ecf20Sopenharmony_ci
43598c2ecf20Sopenharmony_cistatic void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
43608c2ecf20Sopenharmony_ci{
43618c2ecf20Sopenharmony_ci	unsigned long processed;
43628c2ecf20Sopenharmony_ci	unsigned long flags;
43638c2ecf20Sopenharmony_ci	struct ocfs2_lock_res *lockres;
43648c2ecf20Sopenharmony_ci
43658c2ecf20Sopenharmony_ci	spin_lock_irqsave(&osb->dc_task_lock, flags);
43668c2ecf20Sopenharmony_ci	/* grab this early so we know to try again if a state change and
43678c2ecf20Sopenharmony_ci	 * wake happens part-way through our work  */
43688c2ecf20Sopenharmony_ci	osb->dc_work_sequence = osb->dc_wake_sequence;
43698c2ecf20Sopenharmony_ci
43708c2ecf20Sopenharmony_ci	processed = osb->blocked_lock_count;
43718c2ecf20Sopenharmony_ci	/*
43728c2ecf20Sopenharmony_ci	 * blocked lock processing in this loop might call iput which can
43738c2ecf20Sopenharmony_ci	 * remove items off osb->blocked_lock_list. Downconvert up to
43748c2ecf20Sopenharmony_ci	 * 'processed' number of locks, but stop short if we had some
43758c2ecf20Sopenharmony_ci	 * removed in ocfs2_mark_lockres_freeing when downconverting.
43768c2ecf20Sopenharmony_ci	 */
43778c2ecf20Sopenharmony_ci	while (processed && !list_empty(&osb->blocked_lock_list)) {
43788c2ecf20Sopenharmony_ci		lockres = list_entry(osb->blocked_lock_list.next,
43798c2ecf20Sopenharmony_ci				     struct ocfs2_lock_res, l_blocked_list);
43808c2ecf20Sopenharmony_ci		list_del_init(&lockres->l_blocked_list);
43818c2ecf20Sopenharmony_ci		osb->blocked_lock_count--;
43828c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&osb->dc_task_lock, flags);
43838c2ecf20Sopenharmony_ci
43848c2ecf20Sopenharmony_ci		BUG_ON(!processed);
43858c2ecf20Sopenharmony_ci		processed--;
43868c2ecf20Sopenharmony_ci
43878c2ecf20Sopenharmony_ci		ocfs2_process_blocked_lock(osb, lockres);
43888c2ecf20Sopenharmony_ci
43898c2ecf20Sopenharmony_ci		spin_lock_irqsave(&osb->dc_task_lock, flags);
43908c2ecf20Sopenharmony_ci	}
43918c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
43928c2ecf20Sopenharmony_ci}
43938c2ecf20Sopenharmony_ci
43948c2ecf20Sopenharmony_cistatic int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
43958c2ecf20Sopenharmony_ci{
43968c2ecf20Sopenharmony_ci	int empty = 0;
43978c2ecf20Sopenharmony_ci	unsigned long flags;
43988c2ecf20Sopenharmony_ci
43998c2ecf20Sopenharmony_ci	spin_lock_irqsave(&osb->dc_task_lock, flags);
44008c2ecf20Sopenharmony_ci	if (list_empty(&osb->blocked_lock_list))
44018c2ecf20Sopenharmony_ci		empty = 1;
44028c2ecf20Sopenharmony_ci
44038c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
44048c2ecf20Sopenharmony_ci	return empty;
44058c2ecf20Sopenharmony_ci}
44068c2ecf20Sopenharmony_ci
44078c2ecf20Sopenharmony_cistatic int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
44088c2ecf20Sopenharmony_ci{
44098c2ecf20Sopenharmony_ci	int should_wake = 0;
44108c2ecf20Sopenharmony_ci	unsigned long flags;
44118c2ecf20Sopenharmony_ci
44128c2ecf20Sopenharmony_ci	spin_lock_irqsave(&osb->dc_task_lock, flags);
44138c2ecf20Sopenharmony_ci	if (osb->dc_work_sequence != osb->dc_wake_sequence)
44148c2ecf20Sopenharmony_ci		should_wake = 1;
44158c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
44168c2ecf20Sopenharmony_ci
44178c2ecf20Sopenharmony_ci	return should_wake;
44188c2ecf20Sopenharmony_ci}
44198c2ecf20Sopenharmony_ci
44208c2ecf20Sopenharmony_cistatic int ocfs2_downconvert_thread(void *arg)
44218c2ecf20Sopenharmony_ci{
44228c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = arg;
44238c2ecf20Sopenharmony_ci
44248c2ecf20Sopenharmony_ci	/* only quit once we've been asked to stop and there is no more
44258c2ecf20Sopenharmony_ci	 * work available */
44268c2ecf20Sopenharmony_ci	while (!(kthread_should_stop() &&
44278c2ecf20Sopenharmony_ci		ocfs2_downconvert_thread_lists_empty(osb))) {
44288c2ecf20Sopenharmony_ci
44298c2ecf20Sopenharmony_ci		wait_event_interruptible(osb->dc_event,
44308c2ecf20Sopenharmony_ci					 ocfs2_downconvert_thread_should_wake(osb) ||
44318c2ecf20Sopenharmony_ci					 kthread_should_stop());
44328c2ecf20Sopenharmony_ci
44338c2ecf20Sopenharmony_ci		mlog(0, "downconvert_thread: awoken\n");
44348c2ecf20Sopenharmony_ci
44358c2ecf20Sopenharmony_ci		ocfs2_downconvert_thread_do_work(osb);
44368c2ecf20Sopenharmony_ci	}
44378c2ecf20Sopenharmony_ci
44388c2ecf20Sopenharmony_ci	osb->dc_task = NULL;
44398c2ecf20Sopenharmony_ci	return 0;
44408c2ecf20Sopenharmony_ci}
44418c2ecf20Sopenharmony_ci
44428c2ecf20Sopenharmony_civoid ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
44438c2ecf20Sopenharmony_ci{
44448c2ecf20Sopenharmony_ci	unsigned long flags;
44458c2ecf20Sopenharmony_ci
44468c2ecf20Sopenharmony_ci	spin_lock_irqsave(&osb->dc_task_lock, flags);
44478c2ecf20Sopenharmony_ci	/* make sure the voting thread gets a swipe at whatever changes
44488c2ecf20Sopenharmony_ci	 * the caller may have made to the voting state */
44498c2ecf20Sopenharmony_ci	osb->dc_wake_sequence++;
44508c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
44518c2ecf20Sopenharmony_ci	wake_up(&osb->dc_event);
44528c2ecf20Sopenharmony_ci}
4453