162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci#include <linux/ceph/ceph_debug.h>
362306a36Sopenharmony_ci
462306a36Sopenharmony_ci#include <linux/fs.h>
562306a36Sopenharmony_ci#include <linux/kernel.h>
662306a36Sopenharmony_ci#include <linux/sched/signal.h>
762306a36Sopenharmony_ci#include <linux/slab.h>
862306a36Sopenharmony_ci#include <linux/vmalloc.h>
962306a36Sopenharmony_ci#include <linux/wait.h>
1062306a36Sopenharmony_ci#include <linux/writeback.h>
1162306a36Sopenharmony_ci#include <linux/iversion.h>
1262306a36Sopenharmony_ci#include <linux/filelock.h>
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include "super.h"
1562306a36Sopenharmony_ci#include "mds_client.h"
1662306a36Sopenharmony_ci#include "cache.h"
1762306a36Sopenharmony_ci#include "crypto.h"
1862306a36Sopenharmony_ci#include <linux/ceph/decode.h>
1962306a36Sopenharmony_ci#include <linux/ceph/messenger.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci/*
2262306a36Sopenharmony_ci * Capability management
2362306a36Sopenharmony_ci *
2462306a36Sopenharmony_ci * The Ceph metadata servers control client access to inode metadata
2562306a36Sopenharmony_ci * and file data by issuing capabilities, granting clients permission
2662306a36Sopenharmony_ci * to read and/or write both inode field and file data to OSDs
2762306a36Sopenharmony_ci * (storage nodes).  Each capability consists of a set of bits
2862306a36Sopenharmony_ci * indicating which operations are allowed.
2962306a36Sopenharmony_ci *
3062306a36Sopenharmony_ci * If the client holds a *_SHARED cap, the client has a coherent value
3162306a36Sopenharmony_ci * that can be safely read from the cached inode.
3262306a36Sopenharmony_ci *
3362306a36Sopenharmony_ci * In the case of a *_EXCL (exclusive) or FILE_WR capabilities, the
3462306a36Sopenharmony_ci * client is allowed to change inode attributes (e.g., file size,
3562306a36Sopenharmony_ci * mtime), note its dirty state in the ceph_cap, and asynchronously
3662306a36Sopenharmony_ci * flush that metadata change to the MDS.
3762306a36Sopenharmony_ci *
3862306a36Sopenharmony_ci * In the event of a conflicting operation (perhaps by another
3962306a36Sopenharmony_ci * client), the MDS will revoke the conflicting client capabilities.
4062306a36Sopenharmony_ci *
4162306a36Sopenharmony_ci * In order for a client to cache an inode, it must hold a capability
4262306a36Sopenharmony_ci * with at least one MDS server.  When inodes are released, release
4362306a36Sopenharmony_ci * notifications are batched and periodically sent en masse to the MDS
4462306a36Sopenharmony_ci * cluster to release server state.
4562306a36Sopenharmony_ci */
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_cistatic u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc);
4862306a36Sopenharmony_cistatic void __kick_flushing_caps(struct ceph_mds_client *mdsc,
4962306a36Sopenharmony_ci				 struct ceph_mds_session *session,
5062306a36Sopenharmony_ci				 struct ceph_inode_info *ci,
5162306a36Sopenharmony_ci				 u64 oldest_flush_tid);
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci/*
5462306a36Sopenharmony_ci * Generate readable cap strings for debugging output.
5562306a36Sopenharmony_ci */
5662306a36Sopenharmony_ci#define MAX_CAP_STR 20
5762306a36Sopenharmony_cistatic char cap_str[MAX_CAP_STR][40];
5862306a36Sopenharmony_cistatic DEFINE_SPINLOCK(cap_str_lock);
5962306a36Sopenharmony_cistatic int last_cap_str;
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_cistatic char *gcap_string(char *s, int c)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	if (c & CEPH_CAP_GSHARED)
6462306a36Sopenharmony_ci		*s++ = 's';
6562306a36Sopenharmony_ci	if (c & CEPH_CAP_GEXCL)
6662306a36Sopenharmony_ci		*s++ = 'x';
6762306a36Sopenharmony_ci	if (c & CEPH_CAP_GCACHE)
6862306a36Sopenharmony_ci		*s++ = 'c';
6962306a36Sopenharmony_ci	if (c & CEPH_CAP_GRD)
7062306a36Sopenharmony_ci		*s++ = 'r';
7162306a36Sopenharmony_ci	if (c & CEPH_CAP_GWR)
7262306a36Sopenharmony_ci		*s++ = 'w';
7362306a36Sopenharmony_ci	if (c & CEPH_CAP_GBUFFER)
7462306a36Sopenharmony_ci		*s++ = 'b';
7562306a36Sopenharmony_ci	if (c & CEPH_CAP_GWREXTEND)
7662306a36Sopenharmony_ci		*s++ = 'a';
7762306a36Sopenharmony_ci	if (c & CEPH_CAP_GLAZYIO)
7862306a36Sopenharmony_ci		*s++ = 'l';
7962306a36Sopenharmony_ci	return s;
8062306a36Sopenharmony_ci}
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ciconst char *ceph_cap_string(int caps)
8362306a36Sopenharmony_ci{
8462306a36Sopenharmony_ci	int i;
8562306a36Sopenharmony_ci	char *s;
8662306a36Sopenharmony_ci	int c;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	spin_lock(&cap_str_lock);
8962306a36Sopenharmony_ci	i = last_cap_str++;
9062306a36Sopenharmony_ci	if (last_cap_str == MAX_CAP_STR)
9162306a36Sopenharmony_ci		last_cap_str = 0;
9262306a36Sopenharmony_ci	spin_unlock(&cap_str_lock);
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	s = cap_str[i];
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	if (caps & CEPH_CAP_PIN)
9762306a36Sopenharmony_ci		*s++ = 'p';
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	c = (caps >> CEPH_CAP_SAUTH) & 3;
10062306a36Sopenharmony_ci	if (c) {
10162306a36Sopenharmony_ci		*s++ = 'A';
10262306a36Sopenharmony_ci		s = gcap_string(s, c);
10362306a36Sopenharmony_ci	}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	c = (caps >> CEPH_CAP_SLINK) & 3;
10662306a36Sopenharmony_ci	if (c) {
10762306a36Sopenharmony_ci		*s++ = 'L';
10862306a36Sopenharmony_ci		s = gcap_string(s, c);
10962306a36Sopenharmony_ci	}
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	c = (caps >> CEPH_CAP_SXATTR) & 3;
11262306a36Sopenharmony_ci	if (c) {
11362306a36Sopenharmony_ci		*s++ = 'X';
11462306a36Sopenharmony_ci		s = gcap_string(s, c);
11562306a36Sopenharmony_ci	}
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	c = caps >> CEPH_CAP_SFILE;
11862306a36Sopenharmony_ci	if (c) {
11962306a36Sopenharmony_ci		*s++ = 'F';
12062306a36Sopenharmony_ci		s = gcap_string(s, c);
12162306a36Sopenharmony_ci	}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	if (s == cap_str[i])
12462306a36Sopenharmony_ci		*s++ = '-';
12562306a36Sopenharmony_ci	*s = 0;
12662306a36Sopenharmony_ci	return cap_str[i];
12762306a36Sopenharmony_ci}
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_civoid ceph_caps_init(struct ceph_mds_client *mdsc)
13062306a36Sopenharmony_ci{
13162306a36Sopenharmony_ci	INIT_LIST_HEAD(&mdsc->caps_list);
13262306a36Sopenharmony_ci	spin_lock_init(&mdsc->caps_list_lock);
13362306a36Sopenharmony_ci}
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_civoid ceph_caps_finalize(struct ceph_mds_client *mdsc)
13662306a36Sopenharmony_ci{
13762306a36Sopenharmony_ci	struct ceph_cap *cap;
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	spin_lock(&mdsc->caps_list_lock);
14062306a36Sopenharmony_ci	while (!list_empty(&mdsc->caps_list)) {
14162306a36Sopenharmony_ci		cap = list_first_entry(&mdsc->caps_list,
14262306a36Sopenharmony_ci				       struct ceph_cap, caps_item);
14362306a36Sopenharmony_ci		list_del(&cap->caps_item);
14462306a36Sopenharmony_ci		kmem_cache_free(ceph_cap_cachep, cap);
14562306a36Sopenharmony_ci	}
14662306a36Sopenharmony_ci	mdsc->caps_total_count = 0;
14762306a36Sopenharmony_ci	mdsc->caps_avail_count = 0;
14862306a36Sopenharmony_ci	mdsc->caps_use_count = 0;
14962306a36Sopenharmony_ci	mdsc->caps_reserve_count = 0;
15062306a36Sopenharmony_ci	mdsc->caps_min_count = 0;
15162306a36Sopenharmony_ci	spin_unlock(&mdsc->caps_list_lock);
15262306a36Sopenharmony_ci}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_civoid ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
15562306a36Sopenharmony_ci			      struct ceph_mount_options *fsopt)
15662306a36Sopenharmony_ci{
15762306a36Sopenharmony_ci	spin_lock(&mdsc->caps_list_lock);
15862306a36Sopenharmony_ci	mdsc->caps_min_count = fsopt->max_readdir;
15962306a36Sopenharmony_ci	if (mdsc->caps_min_count < 1024)
16062306a36Sopenharmony_ci		mdsc->caps_min_count = 1024;
16162306a36Sopenharmony_ci	mdsc->caps_use_max = fsopt->caps_max;
16262306a36Sopenharmony_ci	if (mdsc->caps_use_max > 0 &&
16362306a36Sopenharmony_ci	    mdsc->caps_use_max < mdsc->caps_min_count)
16462306a36Sopenharmony_ci		mdsc->caps_use_max = mdsc->caps_min_count;
16562306a36Sopenharmony_ci	spin_unlock(&mdsc->caps_list_lock);
16662306a36Sopenharmony_ci}
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_cistatic void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps)
16962306a36Sopenharmony_ci{
17062306a36Sopenharmony_ci	struct ceph_cap *cap;
17162306a36Sopenharmony_ci	int i;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	if (nr_caps) {
17462306a36Sopenharmony_ci		BUG_ON(mdsc->caps_reserve_count < nr_caps);
17562306a36Sopenharmony_ci		mdsc->caps_reserve_count -= nr_caps;
17662306a36Sopenharmony_ci		if (mdsc->caps_avail_count >=
17762306a36Sopenharmony_ci		    mdsc->caps_reserve_count + mdsc->caps_min_count) {
17862306a36Sopenharmony_ci			mdsc->caps_total_count -= nr_caps;
17962306a36Sopenharmony_ci			for (i = 0; i < nr_caps; i++) {
18062306a36Sopenharmony_ci				cap = list_first_entry(&mdsc->caps_list,
18162306a36Sopenharmony_ci					struct ceph_cap, caps_item);
18262306a36Sopenharmony_ci				list_del(&cap->caps_item);
18362306a36Sopenharmony_ci				kmem_cache_free(ceph_cap_cachep, cap);
18462306a36Sopenharmony_ci			}
18562306a36Sopenharmony_ci		} else {
18662306a36Sopenharmony_ci			mdsc->caps_avail_count += nr_caps;
18762306a36Sopenharmony_ci		}
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci		dout("%s: caps %d = %d used + %d resv + %d avail\n",
19062306a36Sopenharmony_ci		     __func__,
19162306a36Sopenharmony_ci		     mdsc->caps_total_count, mdsc->caps_use_count,
19262306a36Sopenharmony_ci		     mdsc->caps_reserve_count, mdsc->caps_avail_count);
19362306a36Sopenharmony_ci		BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
19462306a36Sopenharmony_ci						 mdsc->caps_reserve_count +
19562306a36Sopenharmony_ci						 mdsc->caps_avail_count);
19662306a36Sopenharmony_ci	}
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci/*
20062306a36Sopenharmony_ci * Called under mdsc->mutex.
20162306a36Sopenharmony_ci */
20262306a36Sopenharmony_ciint ceph_reserve_caps(struct ceph_mds_client *mdsc,
20362306a36Sopenharmony_ci		      struct ceph_cap_reservation *ctx, int need)
20462306a36Sopenharmony_ci{
20562306a36Sopenharmony_ci	int i, j;
20662306a36Sopenharmony_ci	struct ceph_cap *cap;
20762306a36Sopenharmony_ci	int have;
20862306a36Sopenharmony_ci	int alloc = 0;
20962306a36Sopenharmony_ci	int max_caps;
21062306a36Sopenharmony_ci	int err = 0;
21162306a36Sopenharmony_ci	bool trimmed = false;
21262306a36Sopenharmony_ci	struct ceph_mds_session *s;
21362306a36Sopenharmony_ci	LIST_HEAD(newcaps);
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	dout("reserve caps ctx=%p need=%d\n", ctx, need);
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	/* first reserve any caps that are already allocated */
21862306a36Sopenharmony_ci	spin_lock(&mdsc->caps_list_lock);
21962306a36Sopenharmony_ci	if (mdsc->caps_avail_count >= need)
22062306a36Sopenharmony_ci		have = need;
22162306a36Sopenharmony_ci	else
22262306a36Sopenharmony_ci		have = mdsc->caps_avail_count;
22362306a36Sopenharmony_ci	mdsc->caps_avail_count -= have;
22462306a36Sopenharmony_ci	mdsc->caps_reserve_count += have;
22562306a36Sopenharmony_ci	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
22662306a36Sopenharmony_ci					 mdsc->caps_reserve_count +
22762306a36Sopenharmony_ci					 mdsc->caps_avail_count);
22862306a36Sopenharmony_ci	spin_unlock(&mdsc->caps_list_lock);
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	for (i = have; i < need; ) {
23162306a36Sopenharmony_ci		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
23262306a36Sopenharmony_ci		if (cap) {
23362306a36Sopenharmony_ci			list_add(&cap->caps_item, &newcaps);
23462306a36Sopenharmony_ci			alloc++;
23562306a36Sopenharmony_ci			i++;
23662306a36Sopenharmony_ci			continue;
23762306a36Sopenharmony_ci		}
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci		if (!trimmed) {
24062306a36Sopenharmony_ci			for (j = 0; j < mdsc->max_sessions; j++) {
24162306a36Sopenharmony_ci				s = __ceph_lookup_mds_session(mdsc, j);
24262306a36Sopenharmony_ci				if (!s)
24362306a36Sopenharmony_ci					continue;
24462306a36Sopenharmony_ci				mutex_unlock(&mdsc->mutex);
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci				mutex_lock(&s->s_mutex);
24762306a36Sopenharmony_ci				max_caps = s->s_nr_caps - (need - i);
24862306a36Sopenharmony_ci				ceph_trim_caps(mdsc, s, max_caps);
24962306a36Sopenharmony_ci				mutex_unlock(&s->s_mutex);
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci				ceph_put_mds_session(s);
25262306a36Sopenharmony_ci				mutex_lock(&mdsc->mutex);
25362306a36Sopenharmony_ci			}
25462306a36Sopenharmony_ci			trimmed = true;
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci			spin_lock(&mdsc->caps_list_lock);
25762306a36Sopenharmony_ci			if (mdsc->caps_avail_count) {
25862306a36Sopenharmony_ci				int more_have;
25962306a36Sopenharmony_ci				if (mdsc->caps_avail_count >= need - i)
26062306a36Sopenharmony_ci					more_have = need - i;
26162306a36Sopenharmony_ci				else
26262306a36Sopenharmony_ci					more_have = mdsc->caps_avail_count;
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci				i += more_have;
26562306a36Sopenharmony_ci				have += more_have;
26662306a36Sopenharmony_ci				mdsc->caps_avail_count -= more_have;
26762306a36Sopenharmony_ci				mdsc->caps_reserve_count += more_have;
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci			}
27062306a36Sopenharmony_ci			spin_unlock(&mdsc->caps_list_lock);
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci			continue;
27362306a36Sopenharmony_ci		}
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci		pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
27662306a36Sopenharmony_ci			ctx, need, have + alloc);
27762306a36Sopenharmony_ci		err = -ENOMEM;
27862306a36Sopenharmony_ci		break;
27962306a36Sopenharmony_ci	}
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	if (!err) {
28262306a36Sopenharmony_ci		BUG_ON(have + alloc != need);
28362306a36Sopenharmony_ci		ctx->count = need;
28462306a36Sopenharmony_ci		ctx->used = 0;
28562306a36Sopenharmony_ci	}
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	spin_lock(&mdsc->caps_list_lock);
28862306a36Sopenharmony_ci	mdsc->caps_total_count += alloc;
28962306a36Sopenharmony_ci	mdsc->caps_reserve_count += alloc;
29062306a36Sopenharmony_ci	list_splice(&newcaps, &mdsc->caps_list);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
29362306a36Sopenharmony_ci					 mdsc->caps_reserve_count +
29462306a36Sopenharmony_ci					 mdsc->caps_avail_count);
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	if (err)
29762306a36Sopenharmony_ci		__ceph_unreserve_caps(mdsc, have + alloc);
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	spin_unlock(&mdsc->caps_list_lock);
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
30262306a36Sopenharmony_ci	     ctx, mdsc->caps_total_count, mdsc->caps_use_count,
30362306a36Sopenharmony_ci	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
30462306a36Sopenharmony_ci	return err;
30562306a36Sopenharmony_ci}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_civoid ceph_unreserve_caps(struct ceph_mds_client *mdsc,
30862306a36Sopenharmony_ci			 struct ceph_cap_reservation *ctx)
30962306a36Sopenharmony_ci{
31062306a36Sopenharmony_ci	bool reclaim = false;
31162306a36Sopenharmony_ci	if (!ctx->count)
31262306a36Sopenharmony_ci		return;
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci	dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
31562306a36Sopenharmony_ci	spin_lock(&mdsc->caps_list_lock);
31662306a36Sopenharmony_ci	__ceph_unreserve_caps(mdsc, ctx->count);
31762306a36Sopenharmony_ci	ctx->count = 0;
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	if (mdsc->caps_use_max > 0 &&
32062306a36Sopenharmony_ci	    mdsc->caps_use_count > mdsc->caps_use_max)
32162306a36Sopenharmony_ci		reclaim = true;
32262306a36Sopenharmony_ci	spin_unlock(&mdsc->caps_list_lock);
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci	if (reclaim)
32562306a36Sopenharmony_ci		ceph_reclaim_caps_nr(mdsc, ctx->used);
32662306a36Sopenharmony_ci}
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_cistruct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
32962306a36Sopenharmony_ci			      struct ceph_cap_reservation *ctx)
33062306a36Sopenharmony_ci{
33162306a36Sopenharmony_ci	struct ceph_cap *cap = NULL;
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	/* temporary, until we do something about cap import/export */
33462306a36Sopenharmony_ci	if (!ctx) {
33562306a36Sopenharmony_ci		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
33662306a36Sopenharmony_ci		if (cap) {
33762306a36Sopenharmony_ci			spin_lock(&mdsc->caps_list_lock);
33862306a36Sopenharmony_ci			mdsc->caps_use_count++;
33962306a36Sopenharmony_ci			mdsc->caps_total_count++;
34062306a36Sopenharmony_ci			spin_unlock(&mdsc->caps_list_lock);
34162306a36Sopenharmony_ci		} else {
34262306a36Sopenharmony_ci			spin_lock(&mdsc->caps_list_lock);
34362306a36Sopenharmony_ci			if (mdsc->caps_avail_count) {
34462306a36Sopenharmony_ci				BUG_ON(list_empty(&mdsc->caps_list));
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci				mdsc->caps_avail_count--;
34762306a36Sopenharmony_ci				mdsc->caps_use_count++;
34862306a36Sopenharmony_ci				cap = list_first_entry(&mdsc->caps_list,
34962306a36Sopenharmony_ci						struct ceph_cap, caps_item);
35062306a36Sopenharmony_ci				list_del(&cap->caps_item);
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci				BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
35362306a36Sopenharmony_ci				       mdsc->caps_reserve_count + mdsc->caps_avail_count);
35462306a36Sopenharmony_ci			}
35562306a36Sopenharmony_ci			spin_unlock(&mdsc->caps_list_lock);
35662306a36Sopenharmony_ci		}
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci		return cap;
35962306a36Sopenharmony_ci	}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	spin_lock(&mdsc->caps_list_lock);
36262306a36Sopenharmony_ci	dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
36362306a36Sopenharmony_ci	     ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count,
36462306a36Sopenharmony_ci	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
36562306a36Sopenharmony_ci	BUG_ON(!ctx->count);
36662306a36Sopenharmony_ci	BUG_ON(ctx->count > mdsc->caps_reserve_count);
36762306a36Sopenharmony_ci	BUG_ON(list_empty(&mdsc->caps_list));
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	ctx->count--;
37062306a36Sopenharmony_ci	ctx->used++;
37162306a36Sopenharmony_ci	mdsc->caps_reserve_count--;
37262306a36Sopenharmony_ci	mdsc->caps_use_count++;
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item);
37562306a36Sopenharmony_ci	list_del(&cap->caps_item);
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
37862306a36Sopenharmony_ci	       mdsc->caps_reserve_count + mdsc->caps_avail_count);
37962306a36Sopenharmony_ci	spin_unlock(&mdsc->caps_list_lock);
38062306a36Sopenharmony_ci	return cap;
38162306a36Sopenharmony_ci}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_civoid ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
38462306a36Sopenharmony_ci{
38562306a36Sopenharmony_ci	spin_lock(&mdsc->caps_list_lock);
38662306a36Sopenharmony_ci	dout("put_cap %p %d = %d used + %d resv + %d avail\n",
38762306a36Sopenharmony_ci	     cap, mdsc->caps_total_count, mdsc->caps_use_count,
38862306a36Sopenharmony_ci	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
38962306a36Sopenharmony_ci	mdsc->caps_use_count--;
39062306a36Sopenharmony_ci	/*
39162306a36Sopenharmony_ci	 * Keep some preallocated caps around (ceph_min_count), to
39262306a36Sopenharmony_ci	 * avoid lots of free/alloc churn.
39362306a36Sopenharmony_ci	 */
39462306a36Sopenharmony_ci	if (mdsc->caps_avail_count >= mdsc->caps_reserve_count +
39562306a36Sopenharmony_ci				      mdsc->caps_min_count) {
39662306a36Sopenharmony_ci		mdsc->caps_total_count--;
39762306a36Sopenharmony_ci		kmem_cache_free(ceph_cap_cachep, cap);
39862306a36Sopenharmony_ci	} else {
39962306a36Sopenharmony_ci		mdsc->caps_avail_count++;
40062306a36Sopenharmony_ci		list_add(&cap->caps_item, &mdsc->caps_list);
40162306a36Sopenharmony_ci	}
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
40462306a36Sopenharmony_ci	       mdsc->caps_reserve_count + mdsc->caps_avail_count);
40562306a36Sopenharmony_ci	spin_unlock(&mdsc->caps_list_lock);
40662306a36Sopenharmony_ci}
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_civoid ceph_reservation_status(struct ceph_fs_client *fsc,
40962306a36Sopenharmony_ci			     int *total, int *avail, int *used, int *reserved,
41062306a36Sopenharmony_ci			     int *min)
41162306a36Sopenharmony_ci{
41262306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = fsc->mdsc;
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	spin_lock(&mdsc->caps_list_lock);
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci	if (total)
41762306a36Sopenharmony_ci		*total = mdsc->caps_total_count;
41862306a36Sopenharmony_ci	if (avail)
41962306a36Sopenharmony_ci		*avail = mdsc->caps_avail_count;
42062306a36Sopenharmony_ci	if (used)
42162306a36Sopenharmony_ci		*used = mdsc->caps_use_count;
42262306a36Sopenharmony_ci	if (reserved)
42362306a36Sopenharmony_ci		*reserved = mdsc->caps_reserve_count;
42462306a36Sopenharmony_ci	if (min)
42562306a36Sopenharmony_ci		*min = mdsc->caps_min_count;
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci	spin_unlock(&mdsc->caps_list_lock);
42862306a36Sopenharmony_ci}
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci/*
43162306a36Sopenharmony_ci * Find ceph_cap for given mds, if any.
43262306a36Sopenharmony_ci *
43362306a36Sopenharmony_ci * Called with i_ceph_lock held.
43462306a36Sopenharmony_ci */
43562306a36Sopenharmony_cistruct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
43662306a36Sopenharmony_ci{
43762306a36Sopenharmony_ci	struct ceph_cap *cap;
43862306a36Sopenharmony_ci	struct rb_node *n = ci->i_caps.rb_node;
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci	while (n) {
44162306a36Sopenharmony_ci		cap = rb_entry(n, struct ceph_cap, ci_node);
44262306a36Sopenharmony_ci		if (mds < cap->mds)
44362306a36Sopenharmony_ci			n = n->rb_left;
44462306a36Sopenharmony_ci		else if (mds > cap->mds)
44562306a36Sopenharmony_ci			n = n->rb_right;
44662306a36Sopenharmony_ci		else
44762306a36Sopenharmony_ci			return cap;
44862306a36Sopenharmony_ci	}
44962306a36Sopenharmony_ci	return NULL;
45062306a36Sopenharmony_ci}
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_cistruct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
45362306a36Sopenharmony_ci{
45462306a36Sopenharmony_ci	struct ceph_cap *cap;
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
45762306a36Sopenharmony_ci	cap = __get_cap_for_mds(ci, mds);
45862306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
45962306a36Sopenharmony_ci	return cap;
46062306a36Sopenharmony_ci}
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci/*
46362306a36Sopenharmony_ci * Called under i_ceph_lock.
46462306a36Sopenharmony_ci */
46562306a36Sopenharmony_cistatic void __insert_cap_node(struct ceph_inode_info *ci,
46662306a36Sopenharmony_ci			      struct ceph_cap *new)
46762306a36Sopenharmony_ci{
46862306a36Sopenharmony_ci	struct rb_node **p = &ci->i_caps.rb_node;
46962306a36Sopenharmony_ci	struct rb_node *parent = NULL;
47062306a36Sopenharmony_ci	struct ceph_cap *cap = NULL;
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci	while (*p) {
47362306a36Sopenharmony_ci		parent = *p;
47462306a36Sopenharmony_ci		cap = rb_entry(parent, struct ceph_cap, ci_node);
47562306a36Sopenharmony_ci		if (new->mds < cap->mds)
47662306a36Sopenharmony_ci			p = &(*p)->rb_left;
47762306a36Sopenharmony_ci		else if (new->mds > cap->mds)
47862306a36Sopenharmony_ci			p = &(*p)->rb_right;
47962306a36Sopenharmony_ci		else
48062306a36Sopenharmony_ci			BUG();
48162306a36Sopenharmony_ci	}
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	rb_link_node(&new->ci_node, parent, p);
48462306a36Sopenharmony_ci	rb_insert_color(&new->ci_node, &ci->i_caps);
48562306a36Sopenharmony_ci}
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci/*
48862306a36Sopenharmony_ci * (re)set cap hold timeouts, which control the delayed release
48962306a36Sopenharmony_ci * of unused caps back to the MDS.  Should be called on cap use.
49062306a36Sopenharmony_ci */
49162306a36Sopenharmony_cistatic void __cap_set_timeouts(struct ceph_mds_client *mdsc,
49262306a36Sopenharmony_ci			       struct ceph_inode_info *ci)
49362306a36Sopenharmony_ci{
49462306a36Sopenharmony_ci	struct ceph_mount_options *opt = mdsc->fsc->mount_options;
49562306a36Sopenharmony_ci	ci->i_hold_caps_max = round_jiffies(jiffies +
49662306a36Sopenharmony_ci					    opt->caps_wanted_delay_max * HZ);
49762306a36Sopenharmony_ci	dout("__cap_set_timeouts %p %lu\n", &ci->netfs.inode,
49862306a36Sopenharmony_ci	     ci->i_hold_caps_max - jiffies);
49962306a36Sopenharmony_ci}
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci/*
50262306a36Sopenharmony_ci * (Re)queue cap at the end of the delayed cap release list.
50362306a36Sopenharmony_ci *
50462306a36Sopenharmony_ci * If I_FLUSH is set, leave the inode at the front of the list.
50562306a36Sopenharmony_ci *
50662306a36Sopenharmony_ci * Caller holds i_ceph_lock
50762306a36Sopenharmony_ci *    -> we take mdsc->cap_delay_lock
50862306a36Sopenharmony_ci */
50962306a36Sopenharmony_cistatic void __cap_delay_requeue(struct ceph_mds_client *mdsc,
51062306a36Sopenharmony_ci				struct ceph_inode_info *ci)
51162306a36Sopenharmony_ci{
51262306a36Sopenharmony_ci	dout("__cap_delay_requeue %p flags 0x%lx at %lu\n", &ci->netfs.inode,
51362306a36Sopenharmony_ci	     ci->i_ceph_flags, ci->i_hold_caps_max);
51462306a36Sopenharmony_ci	if (!mdsc->stopping) {
51562306a36Sopenharmony_ci		spin_lock(&mdsc->cap_delay_lock);
51662306a36Sopenharmony_ci		if (!list_empty(&ci->i_cap_delay_list)) {
51762306a36Sopenharmony_ci			if (ci->i_ceph_flags & CEPH_I_FLUSH)
51862306a36Sopenharmony_ci				goto no_change;
51962306a36Sopenharmony_ci			list_del_init(&ci->i_cap_delay_list);
52062306a36Sopenharmony_ci		}
52162306a36Sopenharmony_ci		__cap_set_timeouts(mdsc, ci);
52262306a36Sopenharmony_ci		list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
52362306a36Sopenharmony_cino_change:
52462306a36Sopenharmony_ci		spin_unlock(&mdsc->cap_delay_lock);
52562306a36Sopenharmony_ci	}
52662306a36Sopenharmony_ci}
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci/*
52962306a36Sopenharmony_ci * Queue an inode for immediate writeback.  Mark inode with I_FLUSH,
53062306a36Sopenharmony_ci * indicating we should send a cap message to flush dirty metadata
53162306a36Sopenharmony_ci * asap, and move to the front of the delayed cap list.
53262306a36Sopenharmony_ci */
53362306a36Sopenharmony_cistatic void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
53462306a36Sopenharmony_ci				      struct ceph_inode_info *ci)
53562306a36Sopenharmony_ci{
53662306a36Sopenharmony_ci	dout("__cap_delay_requeue_front %p\n", &ci->netfs.inode);
53762306a36Sopenharmony_ci	spin_lock(&mdsc->cap_delay_lock);
53862306a36Sopenharmony_ci	ci->i_ceph_flags |= CEPH_I_FLUSH;
53962306a36Sopenharmony_ci	if (!list_empty(&ci->i_cap_delay_list))
54062306a36Sopenharmony_ci		list_del_init(&ci->i_cap_delay_list);
54162306a36Sopenharmony_ci	list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
54262306a36Sopenharmony_ci	spin_unlock(&mdsc->cap_delay_lock);
54362306a36Sopenharmony_ci}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci/*
54662306a36Sopenharmony_ci * Cancel delayed work on cap.
54762306a36Sopenharmony_ci *
54862306a36Sopenharmony_ci * Caller must hold i_ceph_lock.
54962306a36Sopenharmony_ci */
55062306a36Sopenharmony_cistatic void __cap_delay_cancel(struct ceph_mds_client *mdsc,
55162306a36Sopenharmony_ci			       struct ceph_inode_info *ci)
55262306a36Sopenharmony_ci{
55362306a36Sopenharmony_ci	dout("__cap_delay_cancel %p\n", &ci->netfs.inode);
55462306a36Sopenharmony_ci	if (list_empty(&ci->i_cap_delay_list))
55562306a36Sopenharmony_ci		return;
55662306a36Sopenharmony_ci	spin_lock(&mdsc->cap_delay_lock);
55762306a36Sopenharmony_ci	list_del_init(&ci->i_cap_delay_list);
55862306a36Sopenharmony_ci	spin_unlock(&mdsc->cap_delay_lock);
55962306a36Sopenharmony_ci}
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci/* Common issue checks for add_cap, handle_cap_grant. */
56262306a36Sopenharmony_cistatic void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
56362306a36Sopenharmony_ci			      unsigned issued)
56462306a36Sopenharmony_ci{
56562306a36Sopenharmony_ci	unsigned had = __ceph_caps_issued(ci, NULL);
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci	/*
57062306a36Sopenharmony_ci	 * Each time we receive FILE_CACHE anew, we increment
57162306a36Sopenharmony_ci	 * i_rdcache_gen.
57262306a36Sopenharmony_ci	 */
57362306a36Sopenharmony_ci	if (S_ISREG(ci->netfs.inode.i_mode) &&
57462306a36Sopenharmony_ci	    (issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
57562306a36Sopenharmony_ci	    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
57662306a36Sopenharmony_ci		ci->i_rdcache_gen++;
57762306a36Sopenharmony_ci	}
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci	/*
58062306a36Sopenharmony_ci	 * If FILE_SHARED is newly issued, mark dir not complete. We don't
58162306a36Sopenharmony_ci	 * know what happened to this directory while we didn't have the cap.
58262306a36Sopenharmony_ci	 * If FILE_SHARED is being revoked, also mark dir not complete. It
58362306a36Sopenharmony_ci	 * stops on-going cached readdir.
58462306a36Sopenharmony_ci	 */
58562306a36Sopenharmony_ci	if ((issued & CEPH_CAP_FILE_SHARED) != (had & CEPH_CAP_FILE_SHARED)) {
58662306a36Sopenharmony_ci		if (issued & CEPH_CAP_FILE_SHARED)
58762306a36Sopenharmony_ci			atomic_inc(&ci->i_shared_gen);
58862306a36Sopenharmony_ci		if (S_ISDIR(ci->netfs.inode.i_mode)) {
58962306a36Sopenharmony_ci			dout(" marking %p NOT complete\n", &ci->netfs.inode);
59062306a36Sopenharmony_ci			__ceph_dir_clear_complete(ci);
59162306a36Sopenharmony_ci		}
59262306a36Sopenharmony_ci	}
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci	/* Wipe saved layout if we're losing DIR_CREATE caps */
59562306a36Sopenharmony_ci	if (S_ISDIR(ci->netfs.inode.i_mode) && (had & CEPH_CAP_DIR_CREATE) &&
59662306a36Sopenharmony_ci		!(issued & CEPH_CAP_DIR_CREATE)) {
59762306a36Sopenharmony_ci	     ceph_put_string(rcu_dereference_raw(ci->i_cached_layout.pool_ns));
59862306a36Sopenharmony_ci	     memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout));
59962306a36Sopenharmony_ci	}
60062306a36Sopenharmony_ci}
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci/**
60362306a36Sopenharmony_ci * change_auth_cap_ses - move inode to appropriate lists when auth caps change
60462306a36Sopenharmony_ci * @ci: inode to be moved
60562306a36Sopenharmony_ci * @session: new auth caps session
60662306a36Sopenharmony_ci */
60762306a36Sopenharmony_civoid change_auth_cap_ses(struct ceph_inode_info *ci,
60862306a36Sopenharmony_ci			 struct ceph_mds_session *session)
60962306a36Sopenharmony_ci{
61062306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci	if (list_empty(&ci->i_dirty_item) && list_empty(&ci->i_flushing_item))
61362306a36Sopenharmony_ci		return;
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	spin_lock(&session->s_mdsc->cap_dirty_lock);
61662306a36Sopenharmony_ci	if (!list_empty(&ci->i_dirty_item))
61762306a36Sopenharmony_ci		list_move(&ci->i_dirty_item, &session->s_cap_dirty);
61862306a36Sopenharmony_ci	if (!list_empty(&ci->i_flushing_item))
61962306a36Sopenharmony_ci		list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
62062306a36Sopenharmony_ci	spin_unlock(&session->s_mdsc->cap_dirty_lock);
62162306a36Sopenharmony_ci}
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci/*
62462306a36Sopenharmony_ci * Add a capability under the given MDS session.
62562306a36Sopenharmony_ci *
62662306a36Sopenharmony_ci * Caller should hold session snap_rwsem (read) and ci->i_ceph_lock
62762306a36Sopenharmony_ci *
62862306a36Sopenharmony_ci * @fmode is the open file mode, if we are opening a file, otherwise
62962306a36Sopenharmony_ci * it is < 0.  (This is so we can atomically add the cap and add an
63062306a36Sopenharmony_ci * open file reference to it.)
63162306a36Sopenharmony_ci */
63262306a36Sopenharmony_civoid ceph_add_cap(struct inode *inode,
63362306a36Sopenharmony_ci		  struct ceph_mds_session *session, u64 cap_id,
63462306a36Sopenharmony_ci		  unsigned issued, unsigned wanted,
63562306a36Sopenharmony_ci		  unsigned seq, unsigned mseq, u64 realmino, int flags,
63662306a36Sopenharmony_ci		  struct ceph_cap **new_cap)
63762306a36Sopenharmony_ci{
63862306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
63962306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
64062306a36Sopenharmony_ci	struct ceph_cap *cap;
64162306a36Sopenharmony_ci	int mds = session->s_mds;
64262306a36Sopenharmony_ci	int actual_wanted;
64362306a36Sopenharmony_ci	u32 gen;
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci	dout("add_cap %p mds%d cap %llx %s seq %d\n", inode,
64862306a36Sopenharmony_ci	     session->s_mds, cap_id, ceph_cap_string(issued), seq);
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci	gen = atomic_read(&session->s_cap_gen);
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci	cap = __get_cap_for_mds(ci, mds);
65362306a36Sopenharmony_ci	if (!cap) {
65462306a36Sopenharmony_ci		cap = *new_cap;
65562306a36Sopenharmony_ci		*new_cap = NULL;
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci		cap->issued = 0;
65862306a36Sopenharmony_ci		cap->implemented = 0;
65962306a36Sopenharmony_ci		cap->mds = mds;
66062306a36Sopenharmony_ci		cap->mds_wanted = 0;
66162306a36Sopenharmony_ci		cap->mseq = 0;
66262306a36Sopenharmony_ci
66362306a36Sopenharmony_ci		cap->ci = ci;
66462306a36Sopenharmony_ci		__insert_cap_node(ci, cap);
66562306a36Sopenharmony_ci
66662306a36Sopenharmony_ci		/* add to session cap list */
66762306a36Sopenharmony_ci		cap->session = session;
66862306a36Sopenharmony_ci		spin_lock(&session->s_cap_lock);
66962306a36Sopenharmony_ci		list_add_tail(&cap->session_caps, &session->s_caps);
67062306a36Sopenharmony_ci		session->s_nr_caps++;
67162306a36Sopenharmony_ci		atomic64_inc(&mdsc->metric.total_caps);
67262306a36Sopenharmony_ci		spin_unlock(&session->s_cap_lock);
67362306a36Sopenharmony_ci	} else {
67462306a36Sopenharmony_ci		spin_lock(&session->s_cap_lock);
67562306a36Sopenharmony_ci		list_move_tail(&cap->session_caps, &session->s_caps);
67662306a36Sopenharmony_ci		spin_unlock(&session->s_cap_lock);
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci		if (cap->cap_gen < gen)
67962306a36Sopenharmony_ci			cap->issued = cap->implemented = CEPH_CAP_PIN;
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci		/*
68262306a36Sopenharmony_ci		 * auth mds of the inode changed. we received the cap export
68362306a36Sopenharmony_ci		 * message, but still haven't received the cap import message.
68462306a36Sopenharmony_ci		 * handle_cap_export() updated the new auth MDS' cap.
68562306a36Sopenharmony_ci		 *
68662306a36Sopenharmony_ci		 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing
68762306a36Sopenharmony_ci		 * a message that was send before the cap import message. So
68862306a36Sopenharmony_ci		 * don't remove caps.
68962306a36Sopenharmony_ci		 */
69062306a36Sopenharmony_ci		if (ceph_seq_cmp(seq, cap->seq) <= 0) {
69162306a36Sopenharmony_ci			WARN_ON(cap != ci->i_auth_cap);
69262306a36Sopenharmony_ci			WARN_ON(cap->cap_id != cap_id);
69362306a36Sopenharmony_ci			seq = cap->seq;
69462306a36Sopenharmony_ci			mseq = cap->mseq;
69562306a36Sopenharmony_ci			issued |= cap->issued;
69662306a36Sopenharmony_ci			flags |= CEPH_CAP_FLAG_AUTH;
69762306a36Sopenharmony_ci		}
69862306a36Sopenharmony_ci	}
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ci	if (!ci->i_snap_realm ||
70162306a36Sopenharmony_ci	    ((flags & CEPH_CAP_FLAG_AUTH) &&
70262306a36Sopenharmony_ci	     realmino != (u64)-1 && ci->i_snap_realm->ino != realmino)) {
70362306a36Sopenharmony_ci		/*
70462306a36Sopenharmony_ci		 * add this inode to the appropriate snap realm
70562306a36Sopenharmony_ci		 */
70662306a36Sopenharmony_ci		struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc,
70762306a36Sopenharmony_ci							       realmino);
70862306a36Sopenharmony_ci		if (realm)
70962306a36Sopenharmony_ci			ceph_change_snap_realm(inode, realm);
71062306a36Sopenharmony_ci		else
71162306a36Sopenharmony_ci			WARN(1, "%s: couldn't find snap realm 0x%llx (ino 0x%llx oldrealm 0x%llx)\n",
71262306a36Sopenharmony_ci			     __func__, realmino, ci->i_vino.ino,
71362306a36Sopenharmony_ci			     ci->i_snap_realm ? ci->i_snap_realm->ino : 0);
71462306a36Sopenharmony_ci	}
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	__check_cap_issue(ci, cap, issued);
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	/*
71962306a36Sopenharmony_ci	 * If we are issued caps we don't want, or the mds' wanted
72062306a36Sopenharmony_ci	 * value appears to be off, queue a check so we'll release
72162306a36Sopenharmony_ci	 * later and/or update the mds wanted value.
72262306a36Sopenharmony_ci	 */
72362306a36Sopenharmony_ci	actual_wanted = __ceph_caps_wanted(ci);
72462306a36Sopenharmony_ci	if ((wanted & ~actual_wanted) ||
72562306a36Sopenharmony_ci	    (issued & ~actual_wanted & CEPH_CAP_ANY_WR)) {
72662306a36Sopenharmony_ci		dout(" issued %s, mds wanted %s, actual %s, queueing\n",
72762306a36Sopenharmony_ci		     ceph_cap_string(issued), ceph_cap_string(wanted),
72862306a36Sopenharmony_ci		     ceph_cap_string(actual_wanted));
72962306a36Sopenharmony_ci		__cap_delay_requeue(mdsc, ci);
73062306a36Sopenharmony_ci	}
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	if (flags & CEPH_CAP_FLAG_AUTH) {
73362306a36Sopenharmony_ci		if (!ci->i_auth_cap ||
73462306a36Sopenharmony_ci		    ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) {
73562306a36Sopenharmony_ci			if (ci->i_auth_cap &&
73662306a36Sopenharmony_ci			    ci->i_auth_cap->session != cap->session)
73762306a36Sopenharmony_ci				change_auth_cap_ses(ci, cap->session);
73862306a36Sopenharmony_ci			ci->i_auth_cap = cap;
73962306a36Sopenharmony_ci			cap->mds_wanted = wanted;
74062306a36Sopenharmony_ci		}
74162306a36Sopenharmony_ci	} else {
74262306a36Sopenharmony_ci		WARN_ON(ci->i_auth_cap == cap);
74362306a36Sopenharmony_ci	}
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_ci	dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
74662306a36Sopenharmony_ci	     inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
74762306a36Sopenharmony_ci	     ceph_cap_string(issued|cap->issued), seq, mds);
74862306a36Sopenharmony_ci	cap->cap_id = cap_id;
74962306a36Sopenharmony_ci	cap->issued = issued;
75062306a36Sopenharmony_ci	cap->implemented |= issued;
75162306a36Sopenharmony_ci	if (ceph_seq_cmp(mseq, cap->mseq) > 0)
75262306a36Sopenharmony_ci		cap->mds_wanted = wanted;
75362306a36Sopenharmony_ci	else
75462306a36Sopenharmony_ci		cap->mds_wanted |= wanted;
75562306a36Sopenharmony_ci	cap->seq = seq;
75662306a36Sopenharmony_ci	cap->issue_seq = seq;
75762306a36Sopenharmony_ci	cap->mseq = mseq;
75862306a36Sopenharmony_ci	cap->cap_gen = gen;
75962306a36Sopenharmony_ci	wake_up_all(&ci->i_cap_wq);
76062306a36Sopenharmony_ci}
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_ci/*
76362306a36Sopenharmony_ci * Return true if cap has not timed out and belongs to the current
76462306a36Sopenharmony_ci * generation of the MDS session (i.e. has not gone 'stale' due to
76562306a36Sopenharmony_ci * us losing touch with the mds).
76662306a36Sopenharmony_ci */
76762306a36Sopenharmony_cistatic int __cap_is_valid(struct ceph_cap *cap)
76862306a36Sopenharmony_ci{
76962306a36Sopenharmony_ci	unsigned long ttl;
77062306a36Sopenharmony_ci	u32 gen;
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci	gen = atomic_read(&cap->session->s_cap_gen);
77362306a36Sopenharmony_ci	ttl = cap->session->s_cap_ttl;
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) {
77662306a36Sopenharmony_ci		dout("__cap_is_valid %p cap %p issued %s "
77762306a36Sopenharmony_ci		     "but STALE (gen %u vs %u)\n", &cap->ci->netfs.inode,
77862306a36Sopenharmony_ci		     cap, ceph_cap_string(cap->issued), cap->cap_gen, gen);
77962306a36Sopenharmony_ci		return 0;
78062306a36Sopenharmony_ci	}
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci	return 1;
78362306a36Sopenharmony_ci}
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci/*
78662306a36Sopenharmony_ci * Return set of valid cap bits issued to us.  Note that caps time
78762306a36Sopenharmony_ci * out, and may be invalidated in bulk if the client session times out
78862306a36Sopenharmony_ci * and session->s_cap_gen is bumped.
78962306a36Sopenharmony_ci */
79062306a36Sopenharmony_ciint __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
79162306a36Sopenharmony_ci{
79262306a36Sopenharmony_ci	int have = ci->i_snap_caps;
79362306a36Sopenharmony_ci	struct ceph_cap *cap;
79462306a36Sopenharmony_ci	struct rb_node *p;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	if (implemented)
79762306a36Sopenharmony_ci		*implemented = 0;
79862306a36Sopenharmony_ci	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
79962306a36Sopenharmony_ci		cap = rb_entry(p, struct ceph_cap, ci_node);
80062306a36Sopenharmony_ci		if (!__cap_is_valid(cap))
80162306a36Sopenharmony_ci			continue;
80262306a36Sopenharmony_ci		dout("__ceph_caps_issued %p cap %p issued %s\n",
80362306a36Sopenharmony_ci		     &ci->netfs.inode, cap, ceph_cap_string(cap->issued));
80462306a36Sopenharmony_ci		have |= cap->issued;
80562306a36Sopenharmony_ci		if (implemented)
80662306a36Sopenharmony_ci			*implemented |= cap->implemented;
80762306a36Sopenharmony_ci	}
80862306a36Sopenharmony_ci	/*
80962306a36Sopenharmony_ci	 * exclude caps issued by non-auth MDS, but are been revoking
81062306a36Sopenharmony_ci	 * by the auth MDS. The non-auth MDS should be revoking/exporting
81162306a36Sopenharmony_ci	 * these caps, but the message is delayed.
81262306a36Sopenharmony_ci	 */
81362306a36Sopenharmony_ci	if (ci->i_auth_cap) {
81462306a36Sopenharmony_ci		cap = ci->i_auth_cap;
81562306a36Sopenharmony_ci		have &= ~cap->implemented | cap->issued;
81662306a36Sopenharmony_ci	}
81762306a36Sopenharmony_ci	return have;
81862306a36Sopenharmony_ci}
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci/*
82162306a36Sopenharmony_ci * Get cap bits issued by caps other than @ocap
82262306a36Sopenharmony_ci */
82362306a36Sopenharmony_ciint __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap)
82462306a36Sopenharmony_ci{
82562306a36Sopenharmony_ci	int have = ci->i_snap_caps;
82662306a36Sopenharmony_ci	struct ceph_cap *cap;
82762306a36Sopenharmony_ci	struct rb_node *p;
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
83062306a36Sopenharmony_ci		cap = rb_entry(p, struct ceph_cap, ci_node);
83162306a36Sopenharmony_ci		if (cap == ocap)
83262306a36Sopenharmony_ci			continue;
83362306a36Sopenharmony_ci		if (!__cap_is_valid(cap))
83462306a36Sopenharmony_ci			continue;
83562306a36Sopenharmony_ci		have |= cap->issued;
83662306a36Sopenharmony_ci	}
83762306a36Sopenharmony_ci	return have;
83862306a36Sopenharmony_ci}
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci/*
84162306a36Sopenharmony_ci * Move a cap to the end of the LRU (oldest caps at list head, newest
84262306a36Sopenharmony_ci * at list tail).
84362306a36Sopenharmony_ci */
84462306a36Sopenharmony_cistatic void __touch_cap(struct ceph_cap *cap)
84562306a36Sopenharmony_ci{
84662306a36Sopenharmony_ci	struct ceph_mds_session *s = cap->session;
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci	spin_lock(&s->s_cap_lock);
84962306a36Sopenharmony_ci	if (!s->s_cap_iterator) {
85062306a36Sopenharmony_ci		dout("__touch_cap %p cap %p mds%d\n", &cap->ci->netfs.inode, cap,
85162306a36Sopenharmony_ci		     s->s_mds);
85262306a36Sopenharmony_ci		list_move_tail(&cap->session_caps, &s->s_caps);
85362306a36Sopenharmony_ci	} else {
85462306a36Sopenharmony_ci		dout("__touch_cap %p cap %p mds%d NOP, iterating over caps\n",
85562306a36Sopenharmony_ci		     &cap->ci->netfs.inode, cap, s->s_mds);
85662306a36Sopenharmony_ci	}
85762306a36Sopenharmony_ci	spin_unlock(&s->s_cap_lock);
85862306a36Sopenharmony_ci}
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci/*
86162306a36Sopenharmony_ci * Check if we hold the given mask.  If so, move the cap(s) to the
86262306a36Sopenharmony_ci * front of their respective LRUs.  (This is the preferred way for
86362306a36Sopenharmony_ci * callers to check for caps they want.)
86462306a36Sopenharmony_ci */
86562306a36Sopenharmony_ciint __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
86662306a36Sopenharmony_ci{
86762306a36Sopenharmony_ci	struct ceph_cap *cap;
86862306a36Sopenharmony_ci	struct rb_node *p;
86962306a36Sopenharmony_ci	int have = ci->i_snap_caps;
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	if ((have & mask) == mask) {
87262306a36Sopenharmony_ci		dout("__ceph_caps_issued_mask ino 0x%llx snap issued %s"
87362306a36Sopenharmony_ci		     " (mask %s)\n", ceph_ino(&ci->netfs.inode),
87462306a36Sopenharmony_ci		     ceph_cap_string(have),
87562306a36Sopenharmony_ci		     ceph_cap_string(mask));
87662306a36Sopenharmony_ci		return 1;
87762306a36Sopenharmony_ci	}
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
88062306a36Sopenharmony_ci		cap = rb_entry(p, struct ceph_cap, ci_node);
88162306a36Sopenharmony_ci		if (!__cap_is_valid(cap))
88262306a36Sopenharmony_ci			continue;
88362306a36Sopenharmony_ci		if ((cap->issued & mask) == mask) {
88462306a36Sopenharmony_ci			dout("__ceph_caps_issued_mask ino 0x%llx cap %p issued %s"
88562306a36Sopenharmony_ci			     " (mask %s)\n", ceph_ino(&ci->netfs.inode), cap,
88662306a36Sopenharmony_ci			     ceph_cap_string(cap->issued),
88762306a36Sopenharmony_ci			     ceph_cap_string(mask));
88862306a36Sopenharmony_ci			if (touch)
88962306a36Sopenharmony_ci				__touch_cap(cap);
89062306a36Sopenharmony_ci			return 1;
89162306a36Sopenharmony_ci		}
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci		/* does a combination of caps satisfy mask? */
89462306a36Sopenharmony_ci		have |= cap->issued;
89562306a36Sopenharmony_ci		if ((have & mask) == mask) {
89662306a36Sopenharmony_ci			dout("__ceph_caps_issued_mask ino 0x%llx combo issued %s"
89762306a36Sopenharmony_ci			     " (mask %s)\n", ceph_ino(&ci->netfs.inode),
89862306a36Sopenharmony_ci			     ceph_cap_string(cap->issued),
89962306a36Sopenharmony_ci			     ceph_cap_string(mask));
90062306a36Sopenharmony_ci			if (touch) {
90162306a36Sopenharmony_ci				struct rb_node *q;
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci				/* touch this + preceding caps */
90462306a36Sopenharmony_ci				__touch_cap(cap);
90562306a36Sopenharmony_ci				for (q = rb_first(&ci->i_caps); q != p;
90662306a36Sopenharmony_ci				     q = rb_next(q)) {
90762306a36Sopenharmony_ci					cap = rb_entry(q, struct ceph_cap,
90862306a36Sopenharmony_ci						       ci_node);
90962306a36Sopenharmony_ci					if (!__cap_is_valid(cap))
91062306a36Sopenharmony_ci						continue;
91162306a36Sopenharmony_ci					if (cap->issued & mask)
91262306a36Sopenharmony_ci						__touch_cap(cap);
91362306a36Sopenharmony_ci				}
91462306a36Sopenharmony_ci			}
91562306a36Sopenharmony_ci			return 1;
91662306a36Sopenharmony_ci		}
91762306a36Sopenharmony_ci	}
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_ci	return 0;
92062306a36Sopenharmony_ci}
92162306a36Sopenharmony_ci
92262306a36Sopenharmony_ciint __ceph_caps_issued_mask_metric(struct ceph_inode_info *ci, int mask,
92362306a36Sopenharmony_ci				   int touch)
92462306a36Sopenharmony_ci{
92562306a36Sopenharmony_ci	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb);
92662306a36Sopenharmony_ci	int r;
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	r = __ceph_caps_issued_mask(ci, mask, touch);
92962306a36Sopenharmony_ci	if (r)
93062306a36Sopenharmony_ci		ceph_update_cap_hit(&fsc->mdsc->metric);
93162306a36Sopenharmony_ci	else
93262306a36Sopenharmony_ci		ceph_update_cap_mis(&fsc->mdsc->metric);
93362306a36Sopenharmony_ci	return r;
93462306a36Sopenharmony_ci}
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci/*
93762306a36Sopenharmony_ci * Return true if mask caps are currently being revoked by an MDS.
93862306a36Sopenharmony_ci */
93962306a36Sopenharmony_ciint __ceph_caps_revoking_other(struct ceph_inode_info *ci,
94062306a36Sopenharmony_ci			       struct ceph_cap *ocap, int mask)
94162306a36Sopenharmony_ci{
94262306a36Sopenharmony_ci	struct ceph_cap *cap;
94362306a36Sopenharmony_ci	struct rb_node *p;
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
94662306a36Sopenharmony_ci		cap = rb_entry(p, struct ceph_cap, ci_node);
94762306a36Sopenharmony_ci		if (cap != ocap &&
94862306a36Sopenharmony_ci		    (cap->implemented & ~cap->issued & mask))
94962306a36Sopenharmony_ci			return 1;
95062306a36Sopenharmony_ci	}
95162306a36Sopenharmony_ci	return 0;
95262306a36Sopenharmony_ci}
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ciint ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
95562306a36Sopenharmony_ci{
95662306a36Sopenharmony_ci	struct inode *inode = &ci->netfs.inode;
95762306a36Sopenharmony_ci	int ret;
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
96062306a36Sopenharmony_ci	ret = __ceph_caps_revoking_other(ci, NULL, mask);
96162306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
96262306a36Sopenharmony_ci	dout("ceph_caps_revoking %p %s = %d\n", inode,
96362306a36Sopenharmony_ci	     ceph_cap_string(mask), ret);
96462306a36Sopenharmony_ci	return ret;
96562306a36Sopenharmony_ci}
96662306a36Sopenharmony_ci
96762306a36Sopenharmony_ciint __ceph_caps_used(struct ceph_inode_info *ci)
96862306a36Sopenharmony_ci{
96962306a36Sopenharmony_ci	int used = 0;
97062306a36Sopenharmony_ci	if (ci->i_pin_ref)
97162306a36Sopenharmony_ci		used |= CEPH_CAP_PIN;
97262306a36Sopenharmony_ci	if (ci->i_rd_ref)
97362306a36Sopenharmony_ci		used |= CEPH_CAP_FILE_RD;
97462306a36Sopenharmony_ci	if (ci->i_rdcache_ref ||
97562306a36Sopenharmony_ci	    (S_ISREG(ci->netfs.inode.i_mode) &&
97662306a36Sopenharmony_ci	     ci->netfs.inode.i_data.nrpages))
97762306a36Sopenharmony_ci		used |= CEPH_CAP_FILE_CACHE;
97862306a36Sopenharmony_ci	if (ci->i_wr_ref)
97962306a36Sopenharmony_ci		used |= CEPH_CAP_FILE_WR;
98062306a36Sopenharmony_ci	if (ci->i_wb_ref || ci->i_wrbuffer_ref)
98162306a36Sopenharmony_ci		used |= CEPH_CAP_FILE_BUFFER;
98262306a36Sopenharmony_ci	if (ci->i_fx_ref)
98362306a36Sopenharmony_ci		used |= CEPH_CAP_FILE_EXCL;
98462306a36Sopenharmony_ci	return used;
98562306a36Sopenharmony_ci}
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_ci#define FMODE_WAIT_BIAS 1000
98862306a36Sopenharmony_ci
98962306a36Sopenharmony_ci/*
99062306a36Sopenharmony_ci * wanted, by virtue of open file modes
99162306a36Sopenharmony_ci */
99262306a36Sopenharmony_ciint __ceph_caps_file_wanted(struct ceph_inode_info *ci)
99362306a36Sopenharmony_ci{
99462306a36Sopenharmony_ci	const int PIN_SHIFT = ffs(CEPH_FILE_MODE_PIN);
99562306a36Sopenharmony_ci	const int RD_SHIFT = ffs(CEPH_FILE_MODE_RD);
99662306a36Sopenharmony_ci	const int WR_SHIFT = ffs(CEPH_FILE_MODE_WR);
99762306a36Sopenharmony_ci	const int LAZY_SHIFT = ffs(CEPH_FILE_MODE_LAZY);
99862306a36Sopenharmony_ci	struct ceph_mount_options *opt =
99962306a36Sopenharmony_ci		ceph_inode_to_client(&ci->netfs.inode)->mount_options;
100062306a36Sopenharmony_ci	unsigned long used_cutoff = jiffies - opt->caps_wanted_delay_max * HZ;
100162306a36Sopenharmony_ci	unsigned long idle_cutoff = jiffies - opt->caps_wanted_delay_min * HZ;
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci	if (S_ISDIR(ci->netfs.inode.i_mode)) {
100462306a36Sopenharmony_ci		int want = 0;
100562306a36Sopenharmony_ci
100662306a36Sopenharmony_ci		/* use used_cutoff here, to keep dir's wanted caps longer */
100762306a36Sopenharmony_ci		if (ci->i_nr_by_mode[RD_SHIFT] > 0 ||
100862306a36Sopenharmony_ci		    time_after(ci->i_last_rd, used_cutoff))
100962306a36Sopenharmony_ci			want |= CEPH_CAP_ANY_SHARED;
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci		if (ci->i_nr_by_mode[WR_SHIFT] > 0 ||
101262306a36Sopenharmony_ci		    time_after(ci->i_last_wr, used_cutoff)) {
101362306a36Sopenharmony_ci			want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
101462306a36Sopenharmony_ci			if (opt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
101562306a36Sopenharmony_ci				want |= CEPH_CAP_ANY_DIR_OPS;
101662306a36Sopenharmony_ci		}
101762306a36Sopenharmony_ci
101862306a36Sopenharmony_ci		if (want || ci->i_nr_by_mode[PIN_SHIFT] > 0)
101962306a36Sopenharmony_ci			want |= CEPH_CAP_PIN;
102062306a36Sopenharmony_ci
102162306a36Sopenharmony_ci		return want;
102262306a36Sopenharmony_ci	} else {
102362306a36Sopenharmony_ci		int bits = 0;
102462306a36Sopenharmony_ci
102562306a36Sopenharmony_ci		if (ci->i_nr_by_mode[RD_SHIFT] > 0) {
102662306a36Sopenharmony_ci			if (ci->i_nr_by_mode[RD_SHIFT] >= FMODE_WAIT_BIAS ||
102762306a36Sopenharmony_ci			    time_after(ci->i_last_rd, used_cutoff))
102862306a36Sopenharmony_ci				bits |= 1 << RD_SHIFT;
102962306a36Sopenharmony_ci		} else if (time_after(ci->i_last_rd, idle_cutoff)) {
103062306a36Sopenharmony_ci			bits |= 1 << RD_SHIFT;
103162306a36Sopenharmony_ci		}
103262306a36Sopenharmony_ci
103362306a36Sopenharmony_ci		if (ci->i_nr_by_mode[WR_SHIFT] > 0) {
103462306a36Sopenharmony_ci			if (ci->i_nr_by_mode[WR_SHIFT] >= FMODE_WAIT_BIAS ||
103562306a36Sopenharmony_ci			    time_after(ci->i_last_wr, used_cutoff))
103662306a36Sopenharmony_ci				bits |= 1 << WR_SHIFT;
103762306a36Sopenharmony_ci		} else if (time_after(ci->i_last_wr, idle_cutoff)) {
103862306a36Sopenharmony_ci			bits |= 1 << WR_SHIFT;
103962306a36Sopenharmony_ci		}
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ci		/* check lazyio only when read/write is wanted */
104262306a36Sopenharmony_ci		if ((bits & (CEPH_FILE_MODE_RDWR << 1)) &&
104362306a36Sopenharmony_ci		    ci->i_nr_by_mode[LAZY_SHIFT] > 0)
104462306a36Sopenharmony_ci			bits |= 1 << LAZY_SHIFT;
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ci		return bits ? ceph_caps_for_mode(bits >> 1) : 0;
104762306a36Sopenharmony_ci	}
104862306a36Sopenharmony_ci}
104962306a36Sopenharmony_ci
105062306a36Sopenharmony_ci/*
105162306a36Sopenharmony_ci * wanted, by virtue of open file modes AND cap refs (buffered/cached data)
105262306a36Sopenharmony_ci */
105362306a36Sopenharmony_ciint __ceph_caps_wanted(struct ceph_inode_info *ci)
105462306a36Sopenharmony_ci{
105562306a36Sopenharmony_ci	int w = __ceph_caps_file_wanted(ci) | __ceph_caps_used(ci);
105662306a36Sopenharmony_ci	if (S_ISDIR(ci->netfs.inode.i_mode)) {
105762306a36Sopenharmony_ci		/* we want EXCL if holding caps of dir ops */
105862306a36Sopenharmony_ci		if (w & CEPH_CAP_ANY_DIR_OPS)
105962306a36Sopenharmony_ci			w |= CEPH_CAP_FILE_EXCL;
106062306a36Sopenharmony_ci	} else {
106162306a36Sopenharmony_ci		/* we want EXCL if dirty data */
106262306a36Sopenharmony_ci		if (w & CEPH_CAP_FILE_BUFFER)
106362306a36Sopenharmony_ci			w |= CEPH_CAP_FILE_EXCL;
106462306a36Sopenharmony_ci	}
106562306a36Sopenharmony_ci	return w;
106662306a36Sopenharmony_ci}
106762306a36Sopenharmony_ci
106862306a36Sopenharmony_ci/*
106962306a36Sopenharmony_ci * Return caps we have registered with the MDS(s) as 'wanted'.
107062306a36Sopenharmony_ci */
107162306a36Sopenharmony_ciint __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
107262306a36Sopenharmony_ci{
107362306a36Sopenharmony_ci	struct ceph_cap *cap;
107462306a36Sopenharmony_ci	struct rb_node *p;
107562306a36Sopenharmony_ci	int mds_wanted = 0;
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_ci	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
107862306a36Sopenharmony_ci		cap = rb_entry(p, struct ceph_cap, ci_node);
107962306a36Sopenharmony_ci		if (check && !__cap_is_valid(cap))
108062306a36Sopenharmony_ci			continue;
108162306a36Sopenharmony_ci		if (cap == ci->i_auth_cap)
108262306a36Sopenharmony_ci			mds_wanted |= cap->mds_wanted;
108362306a36Sopenharmony_ci		else
108462306a36Sopenharmony_ci			mds_wanted |= (cap->mds_wanted & ~CEPH_CAP_ANY_FILE_WR);
108562306a36Sopenharmony_ci	}
108662306a36Sopenharmony_ci	return mds_wanted;
108762306a36Sopenharmony_ci}
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_ciint ceph_is_any_caps(struct inode *inode)
109062306a36Sopenharmony_ci{
109162306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
109262306a36Sopenharmony_ci	int ret;
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
109562306a36Sopenharmony_ci	ret = __ceph_is_any_real_caps(ci);
109662306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
109762306a36Sopenharmony_ci
109862306a36Sopenharmony_ci	return ret;
109962306a36Sopenharmony_ci}
110062306a36Sopenharmony_ci
110162306a36Sopenharmony_ci/*
110262306a36Sopenharmony_ci * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
110362306a36Sopenharmony_ci *
110462306a36Sopenharmony_ci * caller should hold i_ceph_lock.
110562306a36Sopenharmony_ci * caller will not hold session s_mutex if called from destroy_inode.
110662306a36Sopenharmony_ci */
110762306a36Sopenharmony_civoid __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
110862306a36Sopenharmony_ci{
110962306a36Sopenharmony_ci	struct ceph_mds_session *session = cap->session;
111062306a36Sopenharmony_ci	struct ceph_inode_info *ci = cap->ci;
111162306a36Sopenharmony_ci	struct ceph_mds_client *mdsc;
111262306a36Sopenharmony_ci	int removed = 0;
111362306a36Sopenharmony_ci
111462306a36Sopenharmony_ci	/* 'ci' being NULL means the remove have already occurred */
111562306a36Sopenharmony_ci	if (!ci) {
111662306a36Sopenharmony_ci		dout("%s: cap inode is NULL\n", __func__);
111762306a36Sopenharmony_ci		return;
111862306a36Sopenharmony_ci	}
111962306a36Sopenharmony_ci
112062306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
112162306a36Sopenharmony_ci
112262306a36Sopenharmony_ci	dout("__ceph_remove_cap %p from %p\n", cap, &ci->netfs.inode);
112362306a36Sopenharmony_ci
112462306a36Sopenharmony_ci	mdsc = ceph_inode_to_client(&ci->netfs.inode)->mdsc;
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci	/* remove from inode's cap rbtree, and clear auth cap */
112762306a36Sopenharmony_ci	rb_erase(&cap->ci_node, &ci->i_caps);
112862306a36Sopenharmony_ci	if (ci->i_auth_cap == cap)
112962306a36Sopenharmony_ci		ci->i_auth_cap = NULL;
113062306a36Sopenharmony_ci
113162306a36Sopenharmony_ci	/* remove from session list */
113262306a36Sopenharmony_ci	spin_lock(&session->s_cap_lock);
113362306a36Sopenharmony_ci	if (session->s_cap_iterator == cap) {
113462306a36Sopenharmony_ci		/* not yet, we are iterating over this very cap */
113562306a36Sopenharmony_ci		dout("__ceph_remove_cap  delaying %p removal from session %p\n",
113662306a36Sopenharmony_ci		     cap, cap->session);
113762306a36Sopenharmony_ci	} else {
113862306a36Sopenharmony_ci		list_del_init(&cap->session_caps);
113962306a36Sopenharmony_ci		session->s_nr_caps--;
114062306a36Sopenharmony_ci		atomic64_dec(&mdsc->metric.total_caps);
114162306a36Sopenharmony_ci		cap->session = NULL;
114262306a36Sopenharmony_ci		removed = 1;
114362306a36Sopenharmony_ci	}
114462306a36Sopenharmony_ci	/* protect backpointer with s_cap_lock: see iterate_session_caps */
114562306a36Sopenharmony_ci	cap->ci = NULL;
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_ci	/*
114862306a36Sopenharmony_ci	 * s_cap_reconnect is protected by s_cap_lock. no one changes
114962306a36Sopenharmony_ci	 * s_cap_gen while session is in the reconnect state.
115062306a36Sopenharmony_ci	 */
115162306a36Sopenharmony_ci	if (queue_release &&
115262306a36Sopenharmony_ci	    (!session->s_cap_reconnect ||
115362306a36Sopenharmony_ci	     cap->cap_gen == atomic_read(&session->s_cap_gen))) {
115462306a36Sopenharmony_ci		cap->queue_release = 1;
115562306a36Sopenharmony_ci		if (removed) {
115662306a36Sopenharmony_ci			__ceph_queue_cap_release(session, cap);
115762306a36Sopenharmony_ci			removed = 0;
115862306a36Sopenharmony_ci		}
115962306a36Sopenharmony_ci	} else {
116062306a36Sopenharmony_ci		cap->queue_release = 0;
116162306a36Sopenharmony_ci	}
116262306a36Sopenharmony_ci	cap->cap_ino = ci->i_vino.ino;
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci	spin_unlock(&session->s_cap_lock);
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci	if (removed)
116762306a36Sopenharmony_ci		ceph_put_cap(mdsc, cap);
116862306a36Sopenharmony_ci
116962306a36Sopenharmony_ci	if (!__ceph_is_any_real_caps(ci)) {
117062306a36Sopenharmony_ci		/* when reconnect denied, we remove session caps forcibly,
117162306a36Sopenharmony_ci		 * i_wr_ref can be non-zero. If there are ongoing write,
117262306a36Sopenharmony_ci		 * keep i_snap_realm.
117362306a36Sopenharmony_ci		 */
117462306a36Sopenharmony_ci		if (ci->i_wr_ref == 0 && ci->i_snap_realm)
117562306a36Sopenharmony_ci			ceph_change_snap_realm(&ci->netfs.inode, NULL);
117662306a36Sopenharmony_ci
117762306a36Sopenharmony_ci		__cap_delay_cancel(mdsc, ci);
117862306a36Sopenharmony_ci	}
117962306a36Sopenharmony_ci}
118062306a36Sopenharmony_ci
118162306a36Sopenharmony_civoid ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
118262306a36Sopenharmony_ci{
118362306a36Sopenharmony_ci	struct ceph_inode_info *ci = cap->ci;
118462306a36Sopenharmony_ci	struct ceph_fs_client *fsc;
118562306a36Sopenharmony_ci
118662306a36Sopenharmony_ci	/* 'ci' being NULL means the remove have already occurred */
118762306a36Sopenharmony_ci	if (!ci) {
118862306a36Sopenharmony_ci		dout("%s: cap inode is NULL\n", __func__);
118962306a36Sopenharmony_ci		return;
119062306a36Sopenharmony_ci	}
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_ci	fsc = ceph_inode_to_client(&ci->netfs.inode);
119562306a36Sopenharmony_ci	WARN_ON_ONCE(ci->i_auth_cap == cap &&
119662306a36Sopenharmony_ci		     !list_empty(&ci->i_dirty_item) &&
119762306a36Sopenharmony_ci		     !fsc->blocklisted &&
119862306a36Sopenharmony_ci		     !ceph_inode_is_shutdown(&ci->netfs.inode));
119962306a36Sopenharmony_ci
120062306a36Sopenharmony_ci	__ceph_remove_cap(cap, queue_release);
120162306a36Sopenharmony_ci}
120262306a36Sopenharmony_ci
120362306a36Sopenharmony_cistruct cap_msg_args {
120462306a36Sopenharmony_ci	struct ceph_mds_session	*session;
120562306a36Sopenharmony_ci	u64			ino, cid, follows;
120662306a36Sopenharmony_ci	u64			flush_tid, oldest_flush_tid, size, max_size;
120762306a36Sopenharmony_ci	u64			xattr_version;
120862306a36Sopenharmony_ci	u64			change_attr;
120962306a36Sopenharmony_ci	struct ceph_buffer	*xattr_buf;
121062306a36Sopenharmony_ci	struct ceph_buffer	*old_xattr_buf;
121162306a36Sopenharmony_ci	struct timespec64	atime, mtime, ctime, btime;
121262306a36Sopenharmony_ci	int			op, caps, wanted, dirty;
121362306a36Sopenharmony_ci	u32			seq, issue_seq, mseq, time_warp_seq;
121462306a36Sopenharmony_ci	u32			flags;
121562306a36Sopenharmony_ci	kuid_t			uid;
121662306a36Sopenharmony_ci	kgid_t			gid;
121762306a36Sopenharmony_ci	umode_t			mode;
121862306a36Sopenharmony_ci	bool			inline_data;
121962306a36Sopenharmony_ci	bool			wake;
122062306a36Sopenharmony_ci	bool			encrypted;
122162306a36Sopenharmony_ci	u32			fscrypt_auth_len;
122262306a36Sopenharmony_ci	u8			fscrypt_auth[sizeof(struct ceph_fscrypt_auth)]; // for context
122362306a36Sopenharmony_ci};
122462306a36Sopenharmony_ci
122562306a36Sopenharmony_ci/* Marshal up the cap msg to the MDS */
122662306a36Sopenharmony_cistatic void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
122762306a36Sopenharmony_ci{
122862306a36Sopenharmony_ci	struct ceph_mds_caps *fc;
122962306a36Sopenharmony_ci	void *p;
123062306a36Sopenharmony_ci	struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc;
123162306a36Sopenharmony_ci
123262306a36Sopenharmony_ci	dout("%s %s %llx %llx caps %s wanted %s dirty %s seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu xattr_ver %llu xattr_len %d\n",
123362306a36Sopenharmony_ci	     __func__, ceph_cap_op_name(arg->op), arg->cid, arg->ino,
123462306a36Sopenharmony_ci	     ceph_cap_string(arg->caps), ceph_cap_string(arg->wanted),
123562306a36Sopenharmony_ci	     ceph_cap_string(arg->dirty), arg->seq, arg->issue_seq,
123662306a36Sopenharmony_ci	     arg->flush_tid, arg->oldest_flush_tid, arg->mseq, arg->follows,
123762306a36Sopenharmony_ci	     arg->size, arg->max_size, arg->xattr_version,
123862306a36Sopenharmony_ci	     arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0);
123962306a36Sopenharmony_ci
124062306a36Sopenharmony_ci	msg->hdr.version = cpu_to_le16(12);
124162306a36Sopenharmony_ci	msg->hdr.tid = cpu_to_le64(arg->flush_tid);
124262306a36Sopenharmony_ci
124362306a36Sopenharmony_ci	fc = msg->front.iov_base;
124462306a36Sopenharmony_ci	memset(fc, 0, sizeof(*fc));
124562306a36Sopenharmony_ci
124662306a36Sopenharmony_ci	fc->cap_id = cpu_to_le64(arg->cid);
124762306a36Sopenharmony_ci	fc->op = cpu_to_le32(arg->op);
124862306a36Sopenharmony_ci	fc->seq = cpu_to_le32(arg->seq);
124962306a36Sopenharmony_ci	fc->issue_seq = cpu_to_le32(arg->issue_seq);
125062306a36Sopenharmony_ci	fc->migrate_seq = cpu_to_le32(arg->mseq);
125162306a36Sopenharmony_ci	fc->caps = cpu_to_le32(arg->caps);
125262306a36Sopenharmony_ci	fc->wanted = cpu_to_le32(arg->wanted);
125362306a36Sopenharmony_ci	fc->dirty = cpu_to_le32(arg->dirty);
125462306a36Sopenharmony_ci	fc->ino = cpu_to_le64(arg->ino);
125562306a36Sopenharmony_ci	fc->snap_follows = cpu_to_le64(arg->follows);
125662306a36Sopenharmony_ci
125762306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
125862306a36Sopenharmony_ci	if (arg->encrypted)
125962306a36Sopenharmony_ci		fc->size = cpu_to_le64(round_up(arg->size,
126062306a36Sopenharmony_ci						CEPH_FSCRYPT_BLOCK_SIZE));
126162306a36Sopenharmony_ci	else
126262306a36Sopenharmony_ci#endif
126362306a36Sopenharmony_ci		fc->size = cpu_to_le64(arg->size);
126462306a36Sopenharmony_ci	fc->max_size = cpu_to_le64(arg->max_size);
126562306a36Sopenharmony_ci	ceph_encode_timespec64(&fc->mtime, &arg->mtime);
126662306a36Sopenharmony_ci	ceph_encode_timespec64(&fc->atime, &arg->atime);
126762306a36Sopenharmony_ci	ceph_encode_timespec64(&fc->ctime, &arg->ctime);
126862306a36Sopenharmony_ci	fc->time_warp_seq = cpu_to_le32(arg->time_warp_seq);
126962306a36Sopenharmony_ci
127062306a36Sopenharmony_ci	fc->uid = cpu_to_le32(from_kuid(&init_user_ns, arg->uid));
127162306a36Sopenharmony_ci	fc->gid = cpu_to_le32(from_kgid(&init_user_ns, arg->gid));
127262306a36Sopenharmony_ci	fc->mode = cpu_to_le32(arg->mode);
127362306a36Sopenharmony_ci
127462306a36Sopenharmony_ci	fc->xattr_version = cpu_to_le64(arg->xattr_version);
127562306a36Sopenharmony_ci	if (arg->xattr_buf) {
127662306a36Sopenharmony_ci		msg->middle = ceph_buffer_get(arg->xattr_buf);
127762306a36Sopenharmony_ci		fc->xattr_len = cpu_to_le32(arg->xattr_buf->vec.iov_len);
127862306a36Sopenharmony_ci		msg->hdr.middle_len = cpu_to_le32(arg->xattr_buf->vec.iov_len);
127962306a36Sopenharmony_ci	}
128062306a36Sopenharmony_ci
128162306a36Sopenharmony_ci	p = fc + 1;
128262306a36Sopenharmony_ci	/* flock buffer size (version 2) */
128362306a36Sopenharmony_ci	ceph_encode_32(&p, 0);
128462306a36Sopenharmony_ci	/* inline version (version 4) */
128562306a36Sopenharmony_ci	ceph_encode_64(&p, arg->inline_data ? 0 : CEPH_INLINE_NONE);
128662306a36Sopenharmony_ci	/* inline data size */
128762306a36Sopenharmony_ci	ceph_encode_32(&p, 0);
128862306a36Sopenharmony_ci	/*
128962306a36Sopenharmony_ci	 * osd_epoch_barrier (version 5)
129062306a36Sopenharmony_ci	 * The epoch_barrier is protected osdc->lock, so READ_ONCE here in
129162306a36Sopenharmony_ci	 * case it was recently changed
129262306a36Sopenharmony_ci	 */
129362306a36Sopenharmony_ci	ceph_encode_32(&p, READ_ONCE(osdc->epoch_barrier));
129462306a36Sopenharmony_ci	/* oldest_flush_tid (version 6) */
129562306a36Sopenharmony_ci	ceph_encode_64(&p, arg->oldest_flush_tid);
129662306a36Sopenharmony_ci
129762306a36Sopenharmony_ci	/*
129862306a36Sopenharmony_ci	 * caller_uid/caller_gid (version 7)
129962306a36Sopenharmony_ci	 *
130062306a36Sopenharmony_ci	 * Currently, we don't properly track which caller dirtied the caps
130162306a36Sopenharmony_ci	 * last, and force a flush of them when there is a conflict. For now,
130262306a36Sopenharmony_ci	 * just set this to 0:0, to emulate how the MDS has worked up to now.
130362306a36Sopenharmony_ci	 */
130462306a36Sopenharmony_ci	ceph_encode_32(&p, 0);
130562306a36Sopenharmony_ci	ceph_encode_32(&p, 0);
130662306a36Sopenharmony_ci
130762306a36Sopenharmony_ci	/* pool namespace (version 8) (mds always ignores this) */
130862306a36Sopenharmony_ci	ceph_encode_32(&p, 0);
130962306a36Sopenharmony_ci
131062306a36Sopenharmony_ci	/* btime and change_attr (version 9) */
131162306a36Sopenharmony_ci	ceph_encode_timespec64(p, &arg->btime);
131262306a36Sopenharmony_ci	p += sizeof(struct ceph_timespec);
131362306a36Sopenharmony_ci	ceph_encode_64(&p, arg->change_attr);
131462306a36Sopenharmony_ci
131562306a36Sopenharmony_ci	/* Advisory flags (version 10) */
131662306a36Sopenharmony_ci	ceph_encode_32(&p, arg->flags);
131762306a36Sopenharmony_ci
131862306a36Sopenharmony_ci	/* dirstats (version 11) - these are r/o on the client */
131962306a36Sopenharmony_ci	ceph_encode_64(&p, 0);
132062306a36Sopenharmony_ci	ceph_encode_64(&p, 0);
132162306a36Sopenharmony_ci
132262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
132362306a36Sopenharmony_ci	/*
132462306a36Sopenharmony_ci	 * fscrypt_auth and fscrypt_file (version 12)
132562306a36Sopenharmony_ci	 *
132662306a36Sopenharmony_ci	 * fscrypt_auth holds the crypto context (if any). fscrypt_file
132762306a36Sopenharmony_ci	 * tracks the real i_size as an __le64 field (and we use a rounded-up
132862306a36Sopenharmony_ci	 * i_size in the traditional size field).
132962306a36Sopenharmony_ci	 */
133062306a36Sopenharmony_ci	ceph_encode_32(&p, arg->fscrypt_auth_len);
133162306a36Sopenharmony_ci	ceph_encode_copy(&p, arg->fscrypt_auth, arg->fscrypt_auth_len);
133262306a36Sopenharmony_ci	ceph_encode_32(&p, sizeof(__le64));
133362306a36Sopenharmony_ci	ceph_encode_64(&p, arg->size);
133462306a36Sopenharmony_ci#else /* CONFIG_FS_ENCRYPTION */
133562306a36Sopenharmony_ci	ceph_encode_32(&p, 0);
133662306a36Sopenharmony_ci	ceph_encode_32(&p, 0);
133762306a36Sopenharmony_ci#endif /* CONFIG_FS_ENCRYPTION */
133862306a36Sopenharmony_ci}
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci/*
134162306a36Sopenharmony_ci * Queue cap releases when an inode is dropped from our cache.
134262306a36Sopenharmony_ci */
134362306a36Sopenharmony_civoid __ceph_remove_caps(struct ceph_inode_info *ci)
134462306a36Sopenharmony_ci{
134562306a36Sopenharmony_ci	struct rb_node *p;
134662306a36Sopenharmony_ci
134762306a36Sopenharmony_ci	/* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU)
134862306a36Sopenharmony_ci	 * may call __ceph_caps_issued_mask() on a freeing inode. */
134962306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
135062306a36Sopenharmony_ci	p = rb_first(&ci->i_caps);
135162306a36Sopenharmony_ci	while (p) {
135262306a36Sopenharmony_ci		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
135362306a36Sopenharmony_ci		p = rb_next(p);
135462306a36Sopenharmony_ci		ceph_remove_cap(cap, true);
135562306a36Sopenharmony_ci	}
135662306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
135762306a36Sopenharmony_ci}
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci/*
136062306a36Sopenharmony_ci * Prepare to send a cap message to an MDS. Update the cap state, and populate
136162306a36Sopenharmony_ci * the arg struct with the parameters that will need to be sent. This should
136262306a36Sopenharmony_ci * be done under the i_ceph_lock to guard against changes to cap state.
136362306a36Sopenharmony_ci *
136462306a36Sopenharmony_ci * Make note of max_size reported/requested from mds, revoked caps
136562306a36Sopenharmony_ci * that have now been implemented.
136662306a36Sopenharmony_ci */
136762306a36Sopenharmony_cistatic void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
136862306a36Sopenharmony_ci		       int op, int flags, int used, int want, int retain,
136962306a36Sopenharmony_ci		       int flushing, u64 flush_tid, u64 oldest_flush_tid)
137062306a36Sopenharmony_ci{
137162306a36Sopenharmony_ci	struct ceph_inode_info *ci = cap->ci;
137262306a36Sopenharmony_ci	struct inode *inode = &ci->netfs.inode;
137362306a36Sopenharmony_ci	int held, revoking;
137462306a36Sopenharmony_ci
137562306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
137662306a36Sopenharmony_ci
137762306a36Sopenharmony_ci	held = cap->issued | cap->implemented;
137862306a36Sopenharmony_ci	revoking = cap->implemented & ~cap->issued;
137962306a36Sopenharmony_ci	retain &= ~revoking;
138062306a36Sopenharmony_ci
138162306a36Sopenharmony_ci	dout("%s %p cap %p session %p %s -> %s (revoking %s)\n",
138262306a36Sopenharmony_ci	     __func__, inode, cap, cap->session,
138362306a36Sopenharmony_ci	     ceph_cap_string(held), ceph_cap_string(held & retain),
138462306a36Sopenharmony_ci	     ceph_cap_string(revoking));
138562306a36Sopenharmony_ci	BUG_ON((retain & CEPH_CAP_PIN) == 0);
138662306a36Sopenharmony_ci
138762306a36Sopenharmony_ci	ci->i_ceph_flags &= ~CEPH_I_FLUSH;
138862306a36Sopenharmony_ci
138962306a36Sopenharmony_ci	cap->issued &= retain;  /* drop bits we don't want */
139062306a36Sopenharmony_ci	/*
139162306a36Sopenharmony_ci	 * Wake up any waiters on wanted -> needed transition. This is due to
139262306a36Sopenharmony_ci	 * the weird transition from buffered to sync IO... we need to flush
139362306a36Sopenharmony_ci	 * dirty pages _before_ allowing sync writes to avoid reordering.
139462306a36Sopenharmony_ci	 */
139562306a36Sopenharmony_ci	arg->wake = cap->implemented & ~cap->issued;
139662306a36Sopenharmony_ci	cap->implemented &= cap->issued | used;
139762306a36Sopenharmony_ci	cap->mds_wanted = want;
139862306a36Sopenharmony_ci
139962306a36Sopenharmony_ci	arg->session = cap->session;
140062306a36Sopenharmony_ci	arg->ino = ceph_vino(inode).ino;
140162306a36Sopenharmony_ci	arg->cid = cap->cap_id;
140262306a36Sopenharmony_ci	arg->follows = flushing ? ci->i_head_snapc->seq : 0;
140362306a36Sopenharmony_ci	arg->flush_tid = flush_tid;
140462306a36Sopenharmony_ci	arg->oldest_flush_tid = oldest_flush_tid;
140562306a36Sopenharmony_ci	arg->size = i_size_read(inode);
140662306a36Sopenharmony_ci	ci->i_reported_size = arg->size;
140762306a36Sopenharmony_ci	arg->max_size = ci->i_wanted_max_size;
140862306a36Sopenharmony_ci	if (cap == ci->i_auth_cap) {
140962306a36Sopenharmony_ci		if (want & CEPH_CAP_ANY_FILE_WR)
141062306a36Sopenharmony_ci			ci->i_requested_max_size = arg->max_size;
141162306a36Sopenharmony_ci		else
141262306a36Sopenharmony_ci			ci->i_requested_max_size = 0;
141362306a36Sopenharmony_ci	}
141462306a36Sopenharmony_ci
141562306a36Sopenharmony_ci	if (flushing & CEPH_CAP_XATTR_EXCL) {
141662306a36Sopenharmony_ci		arg->old_xattr_buf = __ceph_build_xattrs_blob(ci);
141762306a36Sopenharmony_ci		arg->xattr_version = ci->i_xattrs.version;
141862306a36Sopenharmony_ci		arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob);
141962306a36Sopenharmony_ci	} else {
142062306a36Sopenharmony_ci		arg->xattr_buf = NULL;
142162306a36Sopenharmony_ci		arg->old_xattr_buf = NULL;
142262306a36Sopenharmony_ci	}
142362306a36Sopenharmony_ci
142462306a36Sopenharmony_ci	arg->mtime = inode->i_mtime;
142562306a36Sopenharmony_ci	arg->atime = inode->i_atime;
142662306a36Sopenharmony_ci	arg->ctime = inode_get_ctime(inode);
142762306a36Sopenharmony_ci	arg->btime = ci->i_btime;
142862306a36Sopenharmony_ci	arg->change_attr = inode_peek_iversion_raw(inode);
142962306a36Sopenharmony_ci
143062306a36Sopenharmony_ci	arg->op = op;
143162306a36Sopenharmony_ci	arg->caps = cap->implemented;
143262306a36Sopenharmony_ci	arg->wanted = want;
143362306a36Sopenharmony_ci	arg->dirty = flushing;
143462306a36Sopenharmony_ci
143562306a36Sopenharmony_ci	arg->seq = cap->seq;
143662306a36Sopenharmony_ci	arg->issue_seq = cap->issue_seq;
143762306a36Sopenharmony_ci	arg->mseq = cap->mseq;
143862306a36Sopenharmony_ci	arg->time_warp_seq = ci->i_time_warp_seq;
143962306a36Sopenharmony_ci
144062306a36Sopenharmony_ci	arg->uid = inode->i_uid;
144162306a36Sopenharmony_ci	arg->gid = inode->i_gid;
144262306a36Sopenharmony_ci	arg->mode = inode->i_mode;
144362306a36Sopenharmony_ci
144462306a36Sopenharmony_ci	arg->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
144562306a36Sopenharmony_ci	if (!(flags & CEPH_CLIENT_CAPS_PENDING_CAPSNAP) &&
144662306a36Sopenharmony_ci	    !list_empty(&ci->i_cap_snaps)) {
144762306a36Sopenharmony_ci		struct ceph_cap_snap *capsnap;
144862306a36Sopenharmony_ci		list_for_each_entry_reverse(capsnap, &ci->i_cap_snaps, ci_item) {
144962306a36Sopenharmony_ci			if (capsnap->cap_flush.tid)
145062306a36Sopenharmony_ci				break;
145162306a36Sopenharmony_ci			if (capsnap->need_flush) {
145262306a36Sopenharmony_ci				flags |= CEPH_CLIENT_CAPS_PENDING_CAPSNAP;
145362306a36Sopenharmony_ci				break;
145462306a36Sopenharmony_ci			}
145562306a36Sopenharmony_ci		}
145662306a36Sopenharmony_ci	}
145762306a36Sopenharmony_ci	arg->flags = flags;
145862306a36Sopenharmony_ci	arg->encrypted = IS_ENCRYPTED(inode);
145962306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
146062306a36Sopenharmony_ci	if (ci->fscrypt_auth_len &&
146162306a36Sopenharmony_ci	    WARN_ON_ONCE(ci->fscrypt_auth_len > sizeof(struct ceph_fscrypt_auth))) {
146262306a36Sopenharmony_ci		/* Don't set this if it's too big */
146362306a36Sopenharmony_ci		arg->fscrypt_auth_len = 0;
146462306a36Sopenharmony_ci	} else {
146562306a36Sopenharmony_ci		arg->fscrypt_auth_len = ci->fscrypt_auth_len;
146662306a36Sopenharmony_ci		memcpy(arg->fscrypt_auth, ci->fscrypt_auth,
146762306a36Sopenharmony_ci		       min_t(size_t, ci->fscrypt_auth_len,
146862306a36Sopenharmony_ci			     sizeof(arg->fscrypt_auth)));
146962306a36Sopenharmony_ci	}
147062306a36Sopenharmony_ci#endif /* CONFIG_FS_ENCRYPTION */
147162306a36Sopenharmony_ci}
147262306a36Sopenharmony_ci
147362306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
147462306a36Sopenharmony_ci#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
147562306a36Sopenharmony_ci		      4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4 + 8)
147662306a36Sopenharmony_ci
147762306a36Sopenharmony_cistatic inline int cap_msg_size(struct cap_msg_args *arg)
147862306a36Sopenharmony_ci{
147962306a36Sopenharmony_ci	return CAP_MSG_FIXED_FIELDS + arg->fscrypt_auth_len;
148062306a36Sopenharmony_ci}
148162306a36Sopenharmony_ci#else
148262306a36Sopenharmony_ci#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
148362306a36Sopenharmony_ci		      4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4)
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_cistatic inline int cap_msg_size(struct cap_msg_args *arg)
148662306a36Sopenharmony_ci{
148762306a36Sopenharmony_ci	return CAP_MSG_FIXED_FIELDS;
148862306a36Sopenharmony_ci}
148962306a36Sopenharmony_ci#endif /* CONFIG_FS_ENCRYPTION */
149062306a36Sopenharmony_ci
149162306a36Sopenharmony_ci/*
149262306a36Sopenharmony_ci * Send a cap msg on the given inode.
149362306a36Sopenharmony_ci *
149462306a36Sopenharmony_ci * Caller should hold snap_rwsem (read), s_mutex.
149562306a36Sopenharmony_ci */
149662306a36Sopenharmony_cistatic void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
149762306a36Sopenharmony_ci{
149862306a36Sopenharmony_ci	struct ceph_msg *msg;
149962306a36Sopenharmony_ci	struct inode *inode = &ci->netfs.inode;
150062306a36Sopenharmony_ci
150162306a36Sopenharmony_ci	msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(arg), GFP_NOFS,
150262306a36Sopenharmony_ci			   false);
150362306a36Sopenharmony_ci	if (!msg) {
150462306a36Sopenharmony_ci		pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n",
150562306a36Sopenharmony_ci		       ceph_vinop(inode), ceph_cap_string(arg->dirty),
150662306a36Sopenharmony_ci		       arg->flush_tid);
150762306a36Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
150862306a36Sopenharmony_ci		__cap_delay_requeue(arg->session->s_mdsc, ci);
150962306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
151062306a36Sopenharmony_ci		return;
151162306a36Sopenharmony_ci	}
151262306a36Sopenharmony_ci
151362306a36Sopenharmony_ci	encode_cap_msg(msg, arg);
151462306a36Sopenharmony_ci	ceph_con_send(&arg->session->s_con, msg);
151562306a36Sopenharmony_ci	ceph_buffer_put(arg->old_xattr_buf);
151662306a36Sopenharmony_ci	ceph_buffer_put(arg->xattr_buf);
151762306a36Sopenharmony_ci	if (arg->wake)
151862306a36Sopenharmony_ci		wake_up_all(&ci->i_cap_wq);
151962306a36Sopenharmony_ci}
152062306a36Sopenharmony_ci
152162306a36Sopenharmony_cistatic inline int __send_flush_snap(struct inode *inode,
152262306a36Sopenharmony_ci				    struct ceph_mds_session *session,
152362306a36Sopenharmony_ci				    struct ceph_cap_snap *capsnap,
152462306a36Sopenharmony_ci				    u32 mseq, u64 oldest_flush_tid)
152562306a36Sopenharmony_ci{
152662306a36Sopenharmony_ci	struct cap_msg_args	arg;
152762306a36Sopenharmony_ci	struct ceph_msg		*msg;
152862306a36Sopenharmony_ci
152962306a36Sopenharmony_ci	arg.session = session;
153062306a36Sopenharmony_ci	arg.ino = ceph_vino(inode).ino;
153162306a36Sopenharmony_ci	arg.cid = 0;
153262306a36Sopenharmony_ci	arg.follows = capsnap->follows;
153362306a36Sopenharmony_ci	arg.flush_tid = capsnap->cap_flush.tid;
153462306a36Sopenharmony_ci	arg.oldest_flush_tid = oldest_flush_tid;
153562306a36Sopenharmony_ci
153662306a36Sopenharmony_ci	arg.size = capsnap->size;
153762306a36Sopenharmony_ci	arg.max_size = 0;
153862306a36Sopenharmony_ci	arg.xattr_version = capsnap->xattr_version;
153962306a36Sopenharmony_ci	arg.xattr_buf = capsnap->xattr_blob;
154062306a36Sopenharmony_ci	arg.old_xattr_buf = NULL;
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_ci	arg.atime = capsnap->atime;
154362306a36Sopenharmony_ci	arg.mtime = capsnap->mtime;
154462306a36Sopenharmony_ci	arg.ctime = capsnap->ctime;
154562306a36Sopenharmony_ci	arg.btime = capsnap->btime;
154662306a36Sopenharmony_ci	arg.change_attr = capsnap->change_attr;
154762306a36Sopenharmony_ci
154862306a36Sopenharmony_ci	arg.op = CEPH_CAP_OP_FLUSHSNAP;
154962306a36Sopenharmony_ci	arg.caps = capsnap->issued;
155062306a36Sopenharmony_ci	arg.wanted = 0;
155162306a36Sopenharmony_ci	arg.dirty = capsnap->dirty;
155262306a36Sopenharmony_ci
155362306a36Sopenharmony_ci	arg.seq = 0;
155462306a36Sopenharmony_ci	arg.issue_seq = 0;
155562306a36Sopenharmony_ci	arg.mseq = mseq;
155662306a36Sopenharmony_ci	arg.time_warp_seq = capsnap->time_warp_seq;
155762306a36Sopenharmony_ci
155862306a36Sopenharmony_ci	arg.uid = capsnap->uid;
155962306a36Sopenharmony_ci	arg.gid = capsnap->gid;
156062306a36Sopenharmony_ci	arg.mode = capsnap->mode;
156162306a36Sopenharmony_ci
156262306a36Sopenharmony_ci	arg.inline_data = capsnap->inline_data;
156362306a36Sopenharmony_ci	arg.flags = 0;
156462306a36Sopenharmony_ci	arg.wake = false;
156562306a36Sopenharmony_ci	arg.encrypted = IS_ENCRYPTED(inode);
156662306a36Sopenharmony_ci
156762306a36Sopenharmony_ci	/* No fscrypt_auth changes from a capsnap.*/
156862306a36Sopenharmony_ci	arg.fscrypt_auth_len = 0;
156962306a36Sopenharmony_ci
157062306a36Sopenharmony_ci	msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(&arg),
157162306a36Sopenharmony_ci			   GFP_NOFS, false);
157262306a36Sopenharmony_ci	if (!msg)
157362306a36Sopenharmony_ci		return -ENOMEM;
157462306a36Sopenharmony_ci
157562306a36Sopenharmony_ci	encode_cap_msg(msg, &arg);
157662306a36Sopenharmony_ci	ceph_con_send(&arg.session->s_con, msg);
157762306a36Sopenharmony_ci	return 0;
157862306a36Sopenharmony_ci}
157962306a36Sopenharmony_ci
158062306a36Sopenharmony_ci/*
158162306a36Sopenharmony_ci * When a snapshot is taken, clients accumulate dirty metadata on
158262306a36Sopenharmony_ci * inodes with capabilities in ceph_cap_snaps to describe the file
158362306a36Sopenharmony_ci * state at the time the snapshot was taken.  This must be flushed
158462306a36Sopenharmony_ci * asynchronously back to the MDS once sync writes complete and dirty
158562306a36Sopenharmony_ci * data is written out.
158662306a36Sopenharmony_ci *
158762306a36Sopenharmony_ci * Called under i_ceph_lock.
158862306a36Sopenharmony_ci */
158962306a36Sopenharmony_cistatic void __ceph_flush_snaps(struct ceph_inode_info *ci,
159062306a36Sopenharmony_ci			       struct ceph_mds_session *session)
159162306a36Sopenharmony_ci		__releases(ci->i_ceph_lock)
159262306a36Sopenharmony_ci		__acquires(ci->i_ceph_lock)
159362306a36Sopenharmony_ci{
159462306a36Sopenharmony_ci	struct inode *inode = &ci->netfs.inode;
159562306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = session->s_mdsc;
159662306a36Sopenharmony_ci	struct ceph_cap_snap *capsnap;
159762306a36Sopenharmony_ci	u64 oldest_flush_tid = 0;
159862306a36Sopenharmony_ci	u64 first_tid = 1, last_tid = 0;
159962306a36Sopenharmony_ci
160062306a36Sopenharmony_ci	dout("__flush_snaps %p session %p\n", inode, session);
160162306a36Sopenharmony_ci
160262306a36Sopenharmony_ci	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
160362306a36Sopenharmony_ci		/*
160462306a36Sopenharmony_ci		 * we need to wait for sync writes to complete and for dirty
160562306a36Sopenharmony_ci		 * pages to be written out.
160662306a36Sopenharmony_ci		 */
160762306a36Sopenharmony_ci		if (capsnap->dirty_pages || capsnap->writing)
160862306a36Sopenharmony_ci			break;
160962306a36Sopenharmony_ci
161062306a36Sopenharmony_ci		/* should be removed by ceph_try_drop_cap_snap() */
161162306a36Sopenharmony_ci		BUG_ON(!capsnap->need_flush);
161262306a36Sopenharmony_ci
161362306a36Sopenharmony_ci		/* only flush each capsnap once */
161462306a36Sopenharmony_ci		if (capsnap->cap_flush.tid > 0) {
161562306a36Sopenharmony_ci			dout(" already flushed %p, skipping\n", capsnap);
161662306a36Sopenharmony_ci			continue;
161762306a36Sopenharmony_ci		}
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci		spin_lock(&mdsc->cap_dirty_lock);
162062306a36Sopenharmony_ci		capsnap->cap_flush.tid = ++mdsc->last_cap_flush_tid;
162162306a36Sopenharmony_ci		list_add_tail(&capsnap->cap_flush.g_list,
162262306a36Sopenharmony_ci			      &mdsc->cap_flush_list);
162362306a36Sopenharmony_ci		if (oldest_flush_tid == 0)
162462306a36Sopenharmony_ci			oldest_flush_tid = __get_oldest_flush_tid(mdsc);
162562306a36Sopenharmony_ci		if (list_empty(&ci->i_flushing_item)) {
162662306a36Sopenharmony_ci			list_add_tail(&ci->i_flushing_item,
162762306a36Sopenharmony_ci				      &session->s_cap_flushing);
162862306a36Sopenharmony_ci		}
162962306a36Sopenharmony_ci		spin_unlock(&mdsc->cap_dirty_lock);
163062306a36Sopenharmony_ci
163162306a36Sopenharmony_ci		list_add_tail(&capsnap->cap_flush.i_list,
163262306a36Sopenharmony_ci			      &ci->i_cap_flush_list);
163362306a36Sopenharmony_ci
163462306a36Sopenharmony_ci		if (first_tid == 1)
163562306a36Sopenharmony_ci			first_tid = capsnap->cap_flush.tid;
163662306a36Sopenharmony_ci		last_tid = capsnap->cap_flush.tid;
163762306a36Sopenharmony_ci	}
163862306a36Sopenharmony_ci
163962306a36Sopenharmony_ci	ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS;
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_ci	while (first_tid <= last_tid) {
164262306a36Sopenharmony_ci		struct ceph_cap *cap = ci->i_auth_cap;
164362306a36Sopenharmony_ci		struct ceph_cap_flush *cf = NULL, *iter;
164462306a36Sopenharmony_ci		int ret;
164562306a36Sopenharmony_ci
164662306a36Sopenharmony_ci		if (!(cap && cap->session == session)) {
164762306a36Sopenharmony_ci			dout("__flush_snaps %p auth cap %p not mds%d, "
164862306a36Sopenharmony_ci			     "stop\n", inode, cap, session->s_mds);
164962306a36Sopenharmony_ci			break;
165062306a36Sopenharmony_ci		}
165162306a36Sopenharmony_ci
165262306a36Sopenharmony_ci		ret = -ENOENT;
165362306a36Sopenharmony_ci		list_for_each_entry(iter, &ci->i_cap_flush_list, i_list) {
165462306a36Sopenharmony_ci			if (iter->tid >= first_tid) {
165562306a36Sopenharmony_ci				cf = iter;
165662306a36Sopenharmony_ci				ret = 0;
165762306a36Sopenharmony_ci				break;
165862306a36Sopenharmony_ci			}
165962306a36Sopenharmony_ci		}
166062306a36Sopenharmony_ci		if (ret < 0)
166162306a36Sopenharmony_ci			break;
166262306a36Sopenharmony_ci
166362306a36Sopenharmony_ci		first_tid = cf->tid + 1;
166462306a36Sopenharmony_ci
166562306a36Sopenharmony_ci		capsnap = container_of(cf, struct ceph_cap_snap, cap_flush);
166662306a36Sopenharmony_ci		refcount_inc(&capsnap->nref);
166762306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
166862306a36Sopenharmony_ci
166962306a36Sopenharmony_ci		dout("__flush_snaps %p capsnap %p tid %llu %s\n",
167062306a36Sopenharmony_ci		     inode, capsnap, cf->tid, ceph_cap_string(capsnap->dirty));
167162306a36Sopenharmony_ci
167262306a36Sopenharmony_ci		ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
167362306a36Sopenharmony_ci					oldest_flush_tid);
167462306a36Sopenharmony_ci		if (ret < 0) {
167562306a36Sopenharmony_ci			pr_err("__flush_snaps: error sending cap flushsnap, "
167662306a36Sopenharmony_ci			       "ino (%llx.%llx) tid %llu follows %llu\n",
167762306a36Sopenharmony_ci				ceph_vinop(inode), cf->tid, capsnap->follows);
167862306a36Sopenharmony_ci		}
167962306a36Sopenharmony_ci
168062306a36Sopenharmony_ci		ceph_put_cap_snap(capsnap);
168162306a36Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
168262306a36Sopenharmony_ci	}
168362306a36Sopenharmony_ci}
168462306a36Sopenharmony_ci
168562306a36Sopenharmony_civoid ceph_flush_snaps(struct ceph_inode_info *ci,
168662306a36Sopenharmony_ci		      struct ceph_mds_session **psession)
168762306a36Sopenharmony_ci{
168862306a36Sopenharmony_ci	struct inode *inode = &ci->netfs.inode;
168962306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
169062306a36Sopenharmony_ci	struct ceph_mds_session *session = NULL;
169162306a36Sopenharmony_ci	bool need_put = false;
169262306a36Sopenharmony_ci	int mds;
169362306a36Sopenharmony_ci
169462306a36Sopenharmony_ci	dout("ceph_flush_snaps %p\n", inode);
169562306a36Sopenharmony_ci	if (psession)
169662306a36Sopenharmony_ci		session = *psession;
169762306a36Sopenharmony_ciretry:
169862306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
169962306a36Sopenharmony_ci	if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
170062306a36Sopenharmony_ci		dout(" no capsnap needs flush, doing nothing\n");
170162306a36Sopenharmony_ci		goto out;
170262306a36Sopenharmony_ci	}
170362306a36Sopenharmony_ci	if (!ci->i_auth_cap) {
170462306a36Sopenharmony_ci		dout(" no auth cap (migrating?), doing nothing\n");
170562306a36Sopenharmony_ci		goto out;
170662306a36Sopenharmony_ci	}
170762306a36Sopenharmony_ci
170862306a36Sopenharmony_ci	mds = ci->i_auth_cap->session->s_mds;
170962306a36Sopenharmony_ci	if (session && session->s_mds != mds) {
171062306a36Sopenharmony_ci		dout(" oops, wrong session %p mutex\n", session);
171162306a36Sopenharmony_ci		ceph_put_mds_session(session);
171262306a36Sopenharmony_ci		session = NULL;
171362306a36Sopenharmony_ci	}
171462306a36Sopenharmony_ci	if (!session) {
171562306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
171662306a36Sopenharmony_ci		mutex_lock(&mdsc->mutex);
171762306a36Sopenharmony_ci		session = __ceph_lookup_mds_session(mdsc, mds);
171862306a36Sopenharmony_ci		mutex_unlock(&mdsc->mutex);
171962306a36Sopenharmony_ci		goto retry;
172062306a36Sopenharmony_ci	}
172162306a36Sopenharmony_ci
172262306a36Sopenharmony_ci	// make sure flushsnap messages are sent in proper order.
172362306a36Sopenharmony_ci	if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
172462306a36Sopenharmony_ci		__kick_flushing_caps(mdsc, session, ci, 0);
172562306a36Sopenharmony_ci
172662306a36Sopenharmony_ci	__ceph_flush_snaps(ci, session);
172762306a36Sopenharmony_ciout:
172862306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
172962306a36Sopenharmony_ci
173062306a36Sopenharmony_ci	if (psession)
173162306a36Sopenharmony_ci		*psession = session;
173262306a36Sopenharmony_ci	else
173362306a36Sopenharmony_ci		ceph_put_mds_session(session);
173462306a36Sopenharmony_ci	/* we flushed them all; remove this inode from the queue */
173562306a36Sopenharmony_ci	spin_lock(&mdsc->snap_flush_lock);
173662306a36Sopenharmony_ci	if (!list_empty(&ci->i_snap_flush_item))
173762306a36Sopenharmony_ci		need_put = true;
173862306a36Sopenharmony_ci	list_del_init(&ci->i_snap_flush_item);
173962306a36Sopenharmony_ci	spin_unlock(&mdsc->snap_flush_lock);
174062306a36Sopenharmony_ci
174162306a36Sopenharmony_ci	if (need_put)
174262306a36Sopenharmony_ci		iput(inode);
174362306a36Sopenharmony_ci}
174462306a36Sopenharmony_ci
174562306a36Sopenharmony_ci/*
174662306a36Sopenharmony_ci * Mark caps dirty.  If inode is newly dirty, return the dirty flags.
174762306a36Sopenharmony_ci * Caller is then responsible for calling __mark_inode_dirty with the
174862306a36Sopenharmony_ci * returned flags value.
174962306a36Sopenharmony_ci */
175062306a36Sopenharmony_ciint __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
175162306a36Sopenharmony_ci			   struct ceph_cap_flush **pcf)
175262306a36Sopenharmony_ci{
175362306a36Sopenharmony_ci	struct ceph_mds_client *mdsc =
175462306a36Sopenharmony_ci		ceph_sb_to_client(ci->netfs.inode.i_sb)->mdsc;
175562306a36Sopenharmony_ci	struct inode *inode = &ci->netfs.inode;
175662306a36Sopenharmony_ci	int was = ci->i_dirty_caps;
175762306a36Sopenharmony_ci	int dirty = 0;
175862306a36Sopenharmony_ci
175962306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
176062306a36Sopenharmony_ci
176162306a36Sopenharmony_ci	if (!ci->i_auth_cap) {
176262306a36Sopenharmony_ci		pr_warn("__mark_dirty_caps %p %llx mask %s, "
176362306a36Sopenharmony_ci			"but no auth cap (session was closed?)\n",
176462306a36Sopenharmony_ci			inode, ceph_ino(inode), ceph_cap_string(mask));
176562306a36Sopenharmony_ci		return 0;
176662306a36Sopenharmony_ci	}
176762306a36Sopenharmony_ci
176862306a36Sopenharmony_ci	dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->netfs.inode,
176962306a36Sopenharmony_ci	     ceph_cap_string(mask), ceph_cap_string(was),
177062306a36Sopenharmony_ci	     ceph_cap_string(was | mask));
177162306a36Sopenharmony_ci	ci->i_dirty_caps |= mask;
177262306a36Sopenharmony_ci	if (was == 0) {
177362306a36Sopenharmony_ci		struct ceph_mds_session *session = ci->i_auth_cap->session;
177462306a36Sopenharmony_ci
177562306a36Sopenharmony_ci		WARN_ON_ONCE(ci->i_prealloc_cap_flush);
177662306a36Sopenharmony_ci		swap(ci->i_prealloc_cap_flush, *pcf);
177762306a36Sopenharmony_ci
177862306a36Sopenharmony_ci		if (!ci->i_head_snapc) {
177962306a36Sopenharmony_ci			WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem));
178062306a36Sopenharmony_ci			ci->i_head_snapc = ceph_get_snap_context(
178162306a36Sopenharmony_ci				ci->i_snap_realm->cached_context);
178262306a36Sopenharmony_ci		}
178362306a36Sopenharmony_ci		dout(" inode %p now dirty snapc %p auth cap %p\n",
178462306a36Sopenharmony_ci		     &ci->netfs.inode, ci->i_head_snapc, ci->i_auth_cap);
178562306a36Sopenharmony_ci		BUG_ON(!list_empty(&ci->i_dirty_item));
178662306a36Sopenharmony_ci		spin_lock(&mdsc->cap_dirty_lock);
178762306a36Sopenharmony_ci		list_add(&ci->i_dirty_item, &session->s_cap_dirty);
178862306a36Sopenharmony_ci		spin_unlock(&mdsc->cap_dirty_lock);
178962306a36Sopenharmony_ci		if (ci->i_flushing_caps == 0) {
179062306a36Sopenharmony_ci			ihold(inode);
179162306a36Sopenharmony_ci			dirty |= I_DIRTY_SYNC;
179262306a36Sopenharmony_ci		}
179362306a36Sopenharmony_ci	} else {
179462306a36Sopenharmony_ci		WARN_ON_ONCE(!ci->i_prealloc_cap_flush);
179562306a36Sopenharmony_ci	}
179662306a36Sopenharmony_ci	BUG_ON(list_empty(&ci->i_dirty_item));
179762306a36Sopenharmony_ci	if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
179862306a36Sopenharmony_ci	    (mask & CEPH_CAP_FILE_BUFFER))
179962306a36Sopenharmony_ci		dirty |= I_DIRTY_DATASYNC;
180062306a36Sopenharmony_ci	__cap_delay_requeue(mdsc, ci);
180162306a36Sopenharmony_ci	return dirty;
180262306a36Sopenharmony_ci}
180362306a36Sopenharmony_ci
180462306a36Sopenharmony_cistruct ceph_cap_flush *ceph_alloc_cap_flush(void)
180562306a36Sopenharmony_ci{
180662306a36Sopenharmony_ci	struct ceph_cap_flush *cf;
180762306a36Sopenharmony_ci
180862306a36Sopenharmony_ci	cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
180962306a36Sopenharmony_ci	if (!cf)
181062306a36Sopenharmony_ci		return NULL;
181162306a36Sopenharmony_ci
181262306a36Sopenharmony_ci	cf->is_capsnap = false;
181362306a36Sopenharmony_ci	return cf;
181462306a36Sopenharmony_ci}
181562306a36Sopenharmony_ci
181662306a36Sopenharmony_civoid ceph_free_cap_flush(struct ceph_cap_flush *cf)
181762306a36Sopenharmony_ci{
181862306a36Sopenharmony_ci	if (cf)
181962306a36Sopenharmony_ci		kmem_cache_free(ceph_cap_flush_cachep, cf);
182062306a36Sopenharmony_ci}
182162306a36Sopenharmony_ci
182262306a36Sopenharmony_cistatic u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc)
182362306a36Sopenharmony_ci{
182462306a36Sopenharmony_ci	if (!list_empty(&mdsc->cap_flush_list)) {
182562306a36Sopenharmony_ci		struct ceph_cap_flush *cf =
182662306a36Sopenharmony_ci			list_first_entry(&mdsc->cap_flush_list,
182762306a36Sopenharmony_ci					 struct ceph_cap_flush, g_list);
182862306a36Sopenharmony_ci		return cf->tid;
182962306a36Sopenharmony_ci	}
183062306a36Sopenharmony_ci	return 0;
183162306a36Sopenharmony_ci}
183262306a36Sopenharmony_ci
183362306a36Sopenharmony_ci/*
183462306a36Sopenharmony_ci * Remove cap_flush from the mdsc's or inode's flushing cap list.
183562306a36Sopenharmony_ci * Return true if caller needs to wake up flush waiters.
183662306a36Sopenharmony_ci */
183762306a36Sopenharmony_cistatic bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
183862306a36Sopenharmony_ci					 struct ceph_cap_flush *cf)
183962306a36Sopenharmony_ci{
184062306a36Sopenharmony_ci	struct ceph_cap_flush *prev;
184162306a36Sopenharmony_ci	bool wake = cf->wake;
184262306a36Sopenharmony_ci
184362306a36Sopenharmony_ci	if (wake && cf->g_list.prev != &mdsc->cap_flush_list) {
184462306a36Sopenharmony_ci		prev = list_prev_entry(cf, g_list);
184562306a36Sopenharmony_ci		prev->wake = true;
184662306a36Sopenharmony_ci		wake = false;
184762306a36Sopenharmony_ci	}
184862306a36Sopenharmony_ci	list_del_init(&cf->g_list);
184962306a36Sopenharmony_ci	return wake;
185062306a36Sopenharmony_ci}
185162306a36Sopenharmony_ci
185262306a36Sopenharmony_cistatic bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
185362306a36Sopenharmony_ci				       struct ceph_cap_flush *cf)
185462306a36Sopenharmony_ci{
185562306a36Sopenharmony_ci	struct ceph_cap_flush *prev;
185662306a36Sopenharmony_ci	bool wake = cf->wake;
185762306a36Sopenharmony_ci
185862306a36Sopenharmony_ci	if (wake && cf->i_list.prev != &ci->i_cap_flush_list) {
185962306a36Sopenharmony_ci		prev = list_prev_entry(cf, i_list);
186062306a36Sopenharmony_ci		prev->wake = true;
186162306a36Sopenharmony_ci		wake = false;
186262306a36Sopenharmony_ci	}
186362306a36Sopenharmony_ci	list_del_init(&cf->i_list);
186462306a36Sopenharmony_ci	return wake;
186562306a36Sopenharmony_ci}
186662306a36Sopenharmony_ci
186762306a36Sopenharmony_ci/*
186862306a36Sopenharmony_ci * Add dirty inode to the flushing list.  Assigned a seq number so we
186962306a36Sopenharmony_ci * can wait for caps to flush without starving.
187062306a36Sopenharmony_ci *
187162306a36Sopenharmony_ci * Called under i_ceph_lock. Returns the flush tid.
187262306a36Sopenharmony_ci */
187362306a36Sopenharmony_cistatic u64 __mark_caps_flushing(struct inode *inode,
187462306a36Sopenharmony_ci				struct ceph_mds_session *session, bool wake,
187562306a36Sopenharmony_ci				u64 *oldest_flush_tid)
187662306a36Sopenharmony_ci{
187762306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
187862306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
187962306a36Sopenharmony_ci	struct ceph_cap_flush *cf = NULL;
188062306a36Sopenharmony_ci	int flushing;
188162306a36Sopenharmony_ci
188262306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
188362306a36Sopenharmony_ci	BUG_ON(ci->i_dirty_caps == 0);
188462306a36Sopenharmony_ci	BUG_ON(list_empty(&ci->i_dirty_item));
188562306a36Sopenharmony_ci	BUG_ON(!ci->i_prealloc_cap_flush);
188662306a36Sopenharmony_ci
188762306a36Sopenharmony_ci	flushing = ci->i_dirty_caps;
188862306a36Sopenharmony_ci	dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n",
188962306a36Sopenharmony_ci	     ceph_cap_string(flushing),
189062306a36Sopenharmony_ci	     ceph_cap_string(ci->i_flushing_caps),
189162306a36Sopenharmony_ci	     ceph_cap_string(ci->i_flushing_caps | flushing));
189262306a36Sopenharmony_ci	ci->i_flushing_caps |= flushing;
189362306a36Sopenharmony_ci	ci->i_dirty_caps = 0;
189462306a36Sopenharmony_ci	dout(" inode %p now !dirty\n", inode);
189562306a36Sopenharmony_ci
189662306a36Sopenharmony_ci	swap(cf, ci->i_prealloc_cap_flush);
189762306a36Sopenharmony_ci	cf->caps = flushing;
189862306a36Sopenharmony_ci	cf->wake = wake;
189962306a36Sopenharmony_ci
190062306a36Sopenharmony_ci	spin_lock(&mdsc->cap_dirty_lock);
190162306a36Sopenharmony_ci	list_del_init(&ci->i_dirty_item);
190262306a36Sopenharmony_ci
190362306a36Sopenharmony_ci	cf->tid = ++mdsc->last_cap_flush_tid;
190462306a36Sopenharmony_ci	list_add_tail(&cf->g_list, &mdsc->cap_flush_list);
190562306a36Sopenharmony_ci	*oldest_flush_tid = __get_oldest_flush_tid(mdsc);
190662306a36Sopenharmony_ci
190762306a36Sopenharmony_ci	if (list_empty(&ci->i_flushing_item)) {
190862306a36Sopenharmony_ci		list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing);
190962306a36Sopenharmony_ci		mdsc->num_cap_flushing++;
191062306a36Sopenharmony_ci	}
191162306a36Sopenharmony_ci	spin_unlock(&mdsc->cap_dirty_lock);
191262306a36Sopenharmony_ci
191362306a36Sopenharmony_ci	list_add_tail(&cf->i_list, &ci->i_cap_flush_list);
191462306a36Sopenharmony_ci
191562306a36Sopenharmony_ci	return cf->tid;
191662306a36Sopenharmony_ci}
191762306a36Sopenharmony_ci
191862306a36Sopenharmony_ci/*
191962306a36Sopenharmony_ci * try to invalidate mapping pages without blocking.
192062306a36Sopenharmony_ci */
192162306a36Sopenharmony_cistatic int try_nonblocking_invalidate(struct inode *inode)
192262306a36Sopenharmony_ci	__releases(ci->i_ceph_lock)
192362306a36Sopenharmony_ci	__acquires(ci->i_ceph_lock)
192462306a36Sopenharmony_ci{
192562306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
192662306a36Sopenharmony_ci	u32 invalidating_gen = ci->i_rdcache_gen;
192762306a36Sopenharmony_ci
192862306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
192962306a36Sopenharmony_ci	ceph_fscache_invalidate(inode, false);
193062306a36Sopenharmony_ci	invalidate_mapping_pages(&inode->i_data, 0, -1);
193162306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
193262306a36Sopenharmony_ci
193362306a36Sopenharmony_ci	if (inode->i_data.nrpages == 0 &&
193462306a36Sopenharmony_ci	    invalidating_gen == ci->i_rdcache_gen) {
193562306a36Sopenharmony_ci		/* success. */
193662306a36Sopenharmony_ci		dout("try_nonblocking_invalidate %p success\n", inode);
193762306a36Sopenharmony_ci		/* save any racing async invalidate some trouble */
193862306a36Sopenharmony_ci		ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
193962306a36Sopenharmony_ci		return 0;
194062306a36Sopenharmony_ci	}
194162306a36Sopenharmony_ci	dout("try_nonblocking_invalidate %p failed\n", inode);
194262306a36Sopenharmony_ci	return -1;
194362306a36Sopenharmony_ci}
194462306a36Sopenharmony_ci
194562306a36Sopenharmony_cibool __ceph_should_report_size(struct ceph_inode_info *ci)
194662306a36Sopenharmony_ci{
194762306a36Sopenharmony_ci	loff_t size = i_size_read(&ci->netfs.inode);
194862306a36Sopenharmony_ci	/* mds will adjust max size according to the reported size */
194962306a36Sopenharmony_ci	if (ci->i_flushing_caps & CEPH_CAP_FILE_WR)
195062306a36Sopenharmony_ci		return false;
195162306a36Sopenharmony_ci	if (size >= ci->i_max_size)
195262306a36Sopenharmony_ci		return true;
195362306a36Sopenharmony_ci	/* half of previous max_size increment has been used */
195462306a36Sopenharmony_ci	if (ci->i_max_size > ci->i_reported_size &&
195562306a36Sopenharmony_ci	    (size << 1) >= ci->i_max_size + ci->i_reported_size)
195662306a36Sopenharmony_ci		return true;
195762306a36Sopenharmony_ci	return false;
195862306a36Sopenharmony_ci}
195962306a36Sopenharmony_ci
196062306a36Sopenharmony_ci/*
196162306a36Sopenharmony_ci * Swiss army knife function to examine currently used and wanted
196262306a36Sopenharmony_ci * versus held caps.  Release, flush, ack revoked caps to mds as
196362306a36Sopenharmony_ci * appropriate.
196462306a36Sopenharmony_ci *
196562306a36Sopenharmony_ci *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
196662306a36Sopenharmony_ci *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
196762306a36Sopenharmony_ci *    further delay.
196862306a36Sopenharmony_ci */
196962306a36Sopenharmony_civoid ceph_check_caps(struct ceph_inode_info *ci, int flags)
197062306a36Sopenharmony_ci{
197162306a36Sopenharmony_ci	struct inode *inode = &ci->netfs.inode;
197262306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
197362306a36Sopenharmony_ci	struct ceph_cap *cap;
197462306a36Sopenharmony_ci	u64 flush_tid, oldest_flush_tid;
197562306a36Sopenharmony_ci	int file_wanted, used, cap_used;
197662306a36Sopenharmony_ci	int issued, implemented, want, retain, revoking, flushing = 0;
197762306a36Sopenharmony_ci	int mds = -1;   /* keep track of how far we've gone through i_caps list
197862306a36Sopenharmony_ci			   to avoid an infinite loop on retry */
197962306a36Sopenharmony_ci	struct rb_node *p;
198062306a36Sopenharmony_ci	bool queue_invalidate = false;
198162306a36Sopenharmony_ci	bool tried_invalidate = false;
198262306a36Sopenharmony_ci	bool queue_writeback = false;
198362306a36Sopenharmony_ci	struct ceph_mds_session *session = NULL;
198462306a36Sopenharmony_ci
198562306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
198662306a36Sopenharmony_ci	if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) {
198762306a36Sopenharmony_ci		ci->i_ceph_flags |= CEPH_I_ASYNC_CHECK_CAPS;
198862306a36Sopenharmony_ci
198962306a36Sopenharmony_ci		/* Don't send messages until we get async create reply */
199062306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
199162306a36Sopenharmony_ci		return;
199262306a36Sopenharmony_ci	}
199362306a36Sopenharmony_ci
199462306a36Sopenharmony_ci	if (ci->i_ceph_flags & CEPH_I_FLUSH)
199562306a36Sopenharmony_ci		flags |= CHECK_CAPS_FLUSH;
199662306a36Sopenharmony_ciretry:
199762306a36Sopenharmony_ci	/* Caps wanted by virtue of active open files. */
199862306a36Sopenharmony_ci	file_wanted = __ceph_caps_file_wanted(ci);
199962306a36Sopenharmony_ci
200062306a36Sopenharmony_ci	/* Caps which have active references against them */
200162306a36Sopenharmony_ci	used = __ceph_caps_used(ci);
200262306a36Sopenharmony_ci
200362306a36Sopenharmony_ci	/*
200462306a36Sopenharmony_ci	 * "issued" represents the current caps that the MDS wants us to have.
200562306a36Sopenharmony_ci	 * "implemented" is the set that we have been granted, and includes the
200662306a36Sopenharmony_ci	 * ones that have not yet been returned to the MDS (the "revoking" set,
200762306a36Sopenharmony_ci	 * usually because they have outstanding references).
200862306a36Sopenharmony_ci	 */
200962306a36Sopenharmony_ci	issued = __ceph_caps_issued(ci, &implemented);
201062306a36Sopenharmony_ci	revoking = implemented & ~issued;
201162306a36Sopenharmony_ci
201262306a36Sopenharmony_ci	want = file_wanted;
201362306a36Sopenharmony_ci
201462306a36Sopenharmony_ci	/* The ones we currently want to retain (may be adjusted below) */
201562306a36Sopenharmony_ci	retain = file_wanted | used | CEPH_CAP_PIN;
201662306a36Sopenharmony_ci	if (!mdsc->stopping && inode->i_nlink > 0) {
201762306a36Sopenharmony_ci		if (file_wanted) {
201862306a36Sopenharmony_ci			retain |= CEPH_CAP_ANY;       /* be greedy */
201962306a36Sopenharmony_ci		} else if (S_ISDIR(inode->i_mode) &&
202062306a36Sopenharmony_ci			   (issued & CEPH_CAP_FILE_SHARED) &&
202162306a36Sopenharmony_ci			   __ceph_dir_is_complete(ci)) {
202262306a36Sopenharmony_ci			/*
202362306a36Sopenharmony_ci			 * If a directory is complete, we want to keep
202462306a36Sopenharmony_ci			 * the exclusive cap. So that MDS does not end up
202562306a36Sopenharmony_ci			 * revoking the shared cap on every create/unlink
202662306a36Sopenharmony_ci			 * operation.
202762306a36Sopenharmony_ci			 */
202862306a36Sopenharmony_ci			if (IS_RDONLY(inode)) {
202962306a36Sopenharmony_ci				want = CEPH_CAP_ANY_SHARED;
203062306a36Sopenharmony_ci			} else {
203162306a36Sopenharmony_ci				want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
203262306a36Sopenharmony_ci			}
203362306a36Sopenharmony_ci			retain |= want;
203462306a36Sopenharmony_ci		} else {
203562306a36Sopenharmony_ci
203662306a36Sopenharmony_ci			retain |= CEPH_CAP_ANY_SHARED;
203762306a36Sopenharmony_ci			/*
203862306a36Sopenharmony_ci			 * keep RD only if we didn't have the file open RW,
203962306a36Sopenharmony_ci			 * because then the mds would revoke it anyway to
204062306a36Sopenharmony_ci			 * journal max_size=0.
204162306a36Sopenharmony_ci			 */
204262306a36Sopenharmony_ci			if (ci->i_max_size == 0)
204362306a36Sopenharmony_ci				retain |= CEPH_CAP_ANY_RD;
204462306a36Sopenharmony_ci		}
204562306a36Sopenharmony_ci	}
204662306a36Sopenharmony_ci
204762306a36Sopenharmony_ci	dout("check_caps %llx.%llx file_want %s used %s dirty %s flushing %s"
204862306a36Sopenharmony_ci	     " issued %s revoking %s retain %s %s%s%s\n", ceph_vinop(inode),
204962306a36Sopenharmony_ci	     ceph_cap_string(file_wanted),
205062306a36Sopenharmony_ci	     ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
205162306a36Sopenharmony_ci	     ceph_cap_string(ci->i_flushing_caps),
205262306a36Sopenharmony_ci	     ceph_cap_string(issued), ceph_cap_string(revoking),
205362306a36Sopenharmony_ci	     ceph_cap_string(retain),
205462306a36Sopenharmony_ci	     (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
205562306a36Sopenharmony_ci	     (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
205662306a36Sopenharmony_ci	     (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
205762306a36Sopenharmony_ci
205862306a36Sopenharmony_ci	/*
205962306a36Sopenharmony_ci	 * If we no longer need to hold onto old our caps, and we may
206062306a36Sopenharmony_ci	 * have cached pages, but don't want them, then try to invalidate.
206162306a36Sopenharmony_ci	 * If we fail, it's because pages are locked.... try again later.
206262306a36Sopenharmony_ci	 */
206362306a36Sopenharmony_ci	if ((!(flags & CHECK_CAPS_NOINVAL) || mdsc->stopping) &&
206462306a36Sopenharmony_ci	    S_ISREG(inode->i_mode) &&
206562306a36Sopenharmony_ci	    !(ci->i_wb_ref || ci->i_wrbuffer_ref) &&   /* no dirty pages... */
206662306a36Sopenharmony_ci	    inode->i_data.nrpages &&		/* have cached pages */
206762306a36Sopenharmony_ci	    (revoking & (CEPH_CAP_FILE_CACHE|
206862306a36Sopenharmony_ci			 CEPH_CAP_FILE_LAZYIO)) && /*  or revoking cache */
206962306a36Sopenharmony_ci	    !tried_invalidate) {
207062306a36Sopenharmony_ci		dout("check_caps trying to invalidate on %llx.%llx\n",
207162306a36Sopenharmony_ci		     ceph_vinop(inode));
207262306a36Sopenharmony_ci		if (try_nonblocking_invalidate(inode) < 0) {
207362306a36Sopenharmony_ci			dout("check_caps queuing invalidate\n");
207462306a36Sopenharmony_ci			queue_invalidate = true;
207562306a36Sopenharmony_ci			ci->i_rdcache_revoking = ci->i_rdcache_gen;
207662306a36Sopenharmony_ci		}
207762306a36Sopenharmony_ci		tried_invalidate = true;
207862306a36Sopenharmony_ci		goto retry;
207962306a36Sopenharmony_ci	}
208062306a36Sopenharmony_ci
208162306a36Sopenharmony_ci	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
208262306a36Sopenharmony_ci		int mflags = 0;
208362306a36Sopenharmony_ci		struct cap_msg_args arg;
208462306a36Sopenharmony_ci
208562306a36Sopenharmony_ci		cap = rb_entry(p, struct ceph_cap, ci_node);
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_ci		/* avoid looping forever */
208862306a36Sopenharmony_ci		if (mds >= cap->mds ||
208962306a36Sopenharmony_ci		    ((flags & CHECK_CAPS_AUTHONLY) && cap != ci->i_auth_cap))
209062306a36Sopenharmony_ci			continue;
209162306a36Sopenharmony_ci
209262306a36Sopenharmony_ci		/*
209362306a36Sopenharmony_ci		 * If we have an auth cap, we don't need to consider any
209462306a36Sopenharmony_ci		 * overlapping caps as used.
209562306a36Sopenharmony_ci		 */
209662306a36Sopenharmony_ci		cap_used = used;
209762306a36Sopenharmony_ci		if (ci->i_auth_cap && cap != ci->i_auth_cap)
209862306a36Sopenharmony_ci			cap_used &= ~ci->i_auth_cap->issued;
209962306a36Sopenharmony_ci
210062306a36Sopenharmony_ci		revoking = cap->implemented & ~cap->issued;
210162306a36Sopenharmony_ci		dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n",
210262306a36Sopenharmony_ci		     cap->mds, cap, ceph_cap_string(cap_used),
210362306a36Sopenharmony_ci		     ceph_cap_string(cap->issued),
210462306a36Sopenharmony_ci		     ceph_cap_string(cap->implemented),
210562306a36Sopenharmony_ci		     ceph_cap_string(revoking));
210662306a36Sopenharmony_ci
210762306a36Sopenharmony_ci		if (cap == ci->i_auth_cap &&
210862306a36Sopenharmony_ci		    (cap->issued & CEPH_CAP_FILE_WR)) {
210962306a36Sopenharmony_ci			/* request larger max_size from MDS? */
211062306a36Sopenharmony_ci			if (ci->i_wanted_max_size > ci->i_max_size &&
211162306a36Sopenharmony_ci			    ci->i_wanted_max_size > ci->i_requested_max_size) {
211262306a36Sopenharmony_ci				dout("requesting new max_size\n");
211362306a36Sopenharmony_ci				goto ack;
211462306a36Sopenharmony_ci			}
211562306a36Sopenharmony_ci
211662306a36Sopenharmony_ci			/* approaching file_max? */
211762306a36Sopenharmony_ci			if (__ceph_should_report_size(ci)) {
211862306a36Sopenharmony_ci				dout("i_size approaching max_size\n");
211962306a36Sopenharmony_ci				goto ack;
212062306a36Sopenharmony_ci			}
212162306a36Sopenharmony_ci		}
212262306a36Sopenharmony_ci		/* flush anything dirty? */
212362306a36Sopenharmony_ci		if (cap == ci->i_auth_cap) {
212462306a36Sopenharmony_ci			if ((flags & CHECK_CAPS_FLUSH) && ci->i_dirty_caps) {
212562306a36Sopenharmony_ci				dout("flushing dirty caps\n");
212662306a36Sopenharmony_ci				goto ack;
212762306a36Sopenharmony_ci			}
212862306a36Sopenharmony_ci			if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) {
212962306a36Sopenharmony_ci				dout("flushing snap caps\n");
213062306a36Sopenharmony_ci				goto ack;
213162306a36Sopenharmony_ci			}
213262306a36Sopenharmony_ci		}
213362306a36Sopenharmony_ci
213462306a36Sopenharmony_ci		/* completed revocation? going down and there are no caps? */
213562306a36Sopenharmony_ci		if (revoking) {
213662306a36Sopenharmony_ci			if ((revoking & cap_used) == 0) {
213762306a36Sopenharmony_ci				dout("completed revocation of %s\n",
213862306a36Sopenharmony_ci				      ceph_cap_string(cap->implemented & ~cap->issued));
213962306a36Sopenharmony_ci				goto ack;
214062306a36Sopenharmony_ci			}
214162306a36Sopenharmony_ci
214262306a36Sopenharmony_ci			/*
214362306a36Sopenharmony_ci			 * If the "i_wrbuffer_ref" was increased by mmap or generic
214462306a36Sopenharmony_ci			 * cache write just before the ceph_check_caps() is called,
214562306a36Sopenharmony_ci			 * the Fb capability revoking will fail this time. Then we
214662306a36Sopenharmony_ci			 * must wait for the BDI's delayed work to flush the dirty
214762306a36Sopenharmony_ci			 * pages and to release the "i_wrbuffer_ref", which will cost
214862306a36Sopenharmony_ci			 * at most 5 seconds. That means the MDS needs to wait at
214962306a36Sopenharmony_ci			 * most 5 seconds to finished the Fb capability's revocation.
215062306a36Sopenharmony_ci			 *
215162306a36Sopenharmony_ci			 * Let's queue a writeback for it.
215262306a36Sopenharmony_ci			 */
215362306a36Sopenharmony_ci			if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
215462306a36Sopenharmony_ci			    (revoking & CEPH_CAP_FILE_BUFFER))
215562306a36Sopenharmony_ci				queue_writeback = true;
215662306a36Sopenharmony_ci		}
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_ci		/* want more caps from mds? */
215962306a36Sopenharmony_ci		if (want & ~cap->mds_wanted) {
216062306a36Sopenharmony_ci			if (want & ~(cap->mds_wanted | cap->issued))
216162306a36Sopenharmony_ci				goto ack;
216262306a36Sopenharmony_ci			if (!__cap_is_valid(cap))
216362306a36Sopenharmony_ci				goto ack;
216462306a36Sopenharmony_ci		}
216562306a36Sopenharmony_ci
216662306a36Sopenharmony_ci		/* things we might delay */
216762306a36Sopenharmony_ci		if ((cap->issued & ~retain) == 0)
216862306a36Sopenharmony_ci			continue;     /* nope, all good */
216962306a36Sopenharmony_ci
217062306a36Sopenharmony_ciack:
217162306a36Sopenharmony_ci		ceph_put_mds_session(session);
217262306a36Sopenharmony_ci		session = ceph_get_mds_session(cap->session);
217362306a36Sopenharmony_ci
217462306a36Sopenharmony_ci		/* kick flushing and flush snaps before sending normal
217562306a36Sopenharmony_ci		 * cap message */
217662306a36Sopenharmony_ci		if (cap == ci->i_auth_cap &&
217762306a36Sopenharmony_ci		    (ci->i_ceph_flags &
217862306a36Sopenharmony_ci		     (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS))) {
217962306a36Sopenharmony_ci			if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
218062306a36Sopenharmony_ci				__kick_flushing_caps(mdsc, session, ci, 0);
218162306a36Sopenharmony_ci			if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)
218262306a36Sopenharmony_ci				__ceph_flush_snaps(ci, session);
218362306a36Sopenharmony_ci
218462306a36Sopenharmony_ci			goto retry;
218562306a36Sopenharmony_ci		}
218662306a36Sopenharmony_ci
218762306a36Sopenharmony_ci		if (cap == ci->i_auth_cap && ci->i_dirty_caps) {
218862306a36Sopenharmony_ci			flushing = ci->i_dirty_caps;
218962306a36Sopenharmony_ci			flush_tid = __mark_caps_flushing(inode, session, false,
219062306a36Sopenharmony_ci							 &oldest_flush_tid);
219162306a36Sopenharmony_ci			if (flags & CHECK_CAPS_FLUSH &&
219262306a36Sopenharmony_ci			    list_empty(&session->s_cap_dirty))
219362306a36Sopenharmony_ci				mflags |= CEPH_CLIENT_CAPS_SYNC;
219462306a36Sopenharmony_ci		} else {
219562306a36Sopenharmony_ci			flushing = 0;
219662306a36Sopenharmony_ci			flush_tid = 0;
219762306a36Sopenharmony_ci			spin_lock(&mdsc->cap_dirty_lock);
219862306a36Sopenharmony_ci			oldest_flush_tid = __get_oldest_flush_tid(mdsc);
219962306a36Sopenharmony_ci			spin_unlock(&mdsc->cap_dirty_lock);
220062306a36Sopenharmony_ci		}
220162306a36Sopenharmony_ci
220262306a36Sopenharmony_ci		mds = cap->mds;  /* remember mds, so we don't repeat */
220362306a36Sopenharmony_ci
220462306a36Sopenharmony_ci		__prep_cap(&arg, cap, CEPH_CAP_OP_UPDATE, mflags, cap_used,
220562306a36Sopenharmony_ci			   want, retain, flushing, flush_tid, oldest_flush_tid);
220662306a36Sopenharmony_ci
220762306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
220862306a36Sopenharmony_ci		__send_cap(&arg, ci);
220962306a36Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
221062306a36Sopenharmony_ci
221162306a36Sopenharmony_ci		goto retry; /* retake i_ceph_lock and restart our cap scan. */
221262306a36Sopenharmony_ci	}
221362306a36Sopenharmony_ci
221462306a36Sopenharmony_ci	/* periodically re-calculate caps wanted by open files */
221562306a36Sopenharmony_ci	if (__ceph_is_any_real_caps(ci) &&
221662306a36Sopenharmony_ci	    list_empty(&ci->i_cap_delay_list) &&
221762306a36Sopenharmony_ci	    (file_wanted & ~CEPH_CAP_PIN) &&
221862306a36Sopenharmony_ci	    !(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
221962306a36Sopenharmony_ci		__cap_delay_requeue(mdsc, ci);
222062306a36Sopenharmony_ci	}
222162306a36Sopenharmony_ci
222262306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
222362306a36Sopenharmony_ci
222462306a36Sopenharmony_ci	ceph_put_mds_session(session);
222562306a36Sopenharmony_ci	if (queue_writeback)
222662306a36Sopenharmony_ci		ceph_queue_writeback(inode);
222762306a36Sopenharmony_ci	if (queue_invalidate)
222862306a36Sopenharmony_ci		ceph_queue_invalidate(inode);
222962306a36Sopenharmony_ci}
223062306a36Sopenharmony_ci
223162306a36Sopenharmony_ci/*
223262306a36Sopenharmony_ci * Try to flush dirty caps back to the auth mds.
223362306a36Sopenharmony_ci */
223462306a36Sopenharmony_cistatic int try_flush_caps(struct inode *inode, u64 *ptid)
223562306a36Sopenharmony_ci{
223662306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
223762306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
223862306a36Sopenharmony_ci	int flushing = 0;
223962306a36Sopenharmony_ci	u64 flush_tid = 0, oldest_flush_tid = 0;
224062306a36Sopenharmony_ci
224162306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
224262306a36Sopenharmony_ciretry_locked:
224362306a36Sopenharmony_ci	if (ci->i_dirty_caps && ci->i_auth_cap) {
224462306a36Sopenharmony_ci		struct ceph_cap *cap = ci->i_auth_cap;
224562306a36Sopenharmony_ci		struct cap_msg_args arg;
224662306a36Sopenharmony_ci		struct ceph_mds_session *session = cap->session;
224762306a36Sopenharmony_ci
224862306a36Sopenharmony_ci		if (session->s_state < CEPH_MDS_SESSION_OPEN) {
224962306a36Sopenharmony_ci			spin_unlock(&ci->i_ceph_lock);
225062306a36Sopenharmony_ci			goto out;
225162306a36Sopenharmony_ci		}
225262306a36Sopenharmony_ci
225362306a36Sopenharmony_ci		if (ci->i_ceph_flags &
225462306a36Sopenharmony_ci		    (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS)) {
225562306a36Sopenharmony_ci			if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
225662306a36Sopenharmony_ci				__kick_flushing_caps(mdsc, session, ci, 0);
225762306a36Sopenharmony_ci			if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)
225862306a36Sopenharmony_ci				__ceph_flush_snaps(ci, session);
225962306a36Sopenharmony_ci			goto retry_locked;
226062306a36Sopenharmony_ci		}
226162306a36Sopenharmony_ci
226262306a36Sopenharmony_ci		flushing = ci->i_dirty_caps;
226362306a36Sopenharmony_ci		flush_tid = __mark_caps_flushing(inode, session, true,
226462306a36Sopenharmony_ci						 &oldest_flush_tid);
226562306a36Sopenharmony_ci
226662306a36Sopenharmony_ci		__prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH, CEPH_CLIENT_CAPS_SYNC,
226762306a36Sopenharmony_ci			   __ceph_caps_used(ci), __ceph_caps_wanted(ci),
226862306a36Sopenharmony_ci			   (cap->issued | cap->implemented),
226962306a36Sopenharmony_ci			   flushing, flush_tid, oldest_flush_tid);
227062306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
227162306a36Sopenharmony_ci
227262306a36Sopenharmony_ci		__send_cap(&arg, ci);
227362306a36Sopenharmony_ci	} else {
227462306a36Sopenharmony_ci		if (!list_empty(&ci->i_cap_flush_list)) {
227562306a36Sopenharmony_ci			struct ceph_cap_flush *cf =
227662306a36Sopenharmony_ci				list_last_entry(&ci->i_cap_flush_list,
227762306a36Sopenharmony_ci						struct ceph_cap_flush, i_list);
227862306a36Sopenharmony_ci			cf->wake = true;
227962306a36Sopenharmony_ci			flush_tid = cf->tid;
228062306a36Sopenharmony_ci		}
228162306a36Sopenharmony_ci		flushing = ci->i_flushing_caps;
228262306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
228362306a36Sopenharmony_ci	}
228462306a36Sopenharmony_ciout:
228562306a36Sopenharmony_ci	*ptid = flush_tid;
228662306a36Sopenharmony_ci	return flushing;
228762306a36Sopenharmony_ci}
228862306a36Sopenharmony_ci
228962306a36Sopenharmony_ci/*
229062306a36Sopenharmony_ci * Return true if we've flushed caps through the given flush_tid.
229162306a36Sopenharmony_ci */
229262306a36Sopenharmony_cistatic int caps_are_flushed(struct inode *inode, u64 flush_tid)
229362306a36Sopenharmony_ci{
229462306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
229562306a36Sopenharmony_ci	int ret = 1;
229662306a36Sopenharmony_ci
229762306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
229862306a36Sopenharmony_ci	if (!list_empty(&ci->i_cap_flush_list)) {
229962306a36Sopenharmony_ci		struct ceph_cap_flush * cf =
230062306a36Sopenharmony_ci			list_first_entry(&ci->i_cap_flush_list,
230162306a36Sopenharmony_ci					 struct ceph_cap_flush, i_list);
230262306a36Sopenharmony_ci		if (cf->tid <= flush_tid)
230362306a36Sopenharmony_ci			ret = 0;
230462306a36Sopenharmony_ci	}
230562306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
230662306a36Sopenharmony_ci	return ret;
230762306a36Sopenharmony_ci}
230862306a36Sopenharmony_ci
230962306a36Sopenharmony_ci/*
231062306a36Sopenharmony_ci * flush the mdlog and wait for any unsafe requests to complete.
231162306a36Sopenharmony_ci */
231262306a36Sopenharmony_cistatic int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
231362306a36Sopenharmony_ci{
231462306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
231562306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
231662306a36Sopenharmony_ci	struct ceph_mds_request *req1 = NULL, *req2 = NULL;
231762306a36Sopenharmony_ci	int ret, err = 0;
231862306a36Sopenharmony_ci
231962306a36Sopenharmony_ci	spin_lock(&ci->i_unsafe_lock);
232062306a36Sopenharmony_ci	if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) {
232162306a36Sopenharmony_ci		req1 = list_last_entry(&ci->i_unsafe_dirops,
232262306a36Sopenharmony_ci					struct ceph_mds_request,
232362306a36Sopenharmony_ci					r_unsafe_dir_item);
232462306a36Sopenharmony_ci		ceph_mdsc_get_request(req1);
232562306a36Sopenharmony_ci	}
232662306a36Sopenharmony_ci	if (!list_empty(&ci->i_unsafe_iops)) {
232762306a36Sopenharmony_ci		req2 = list_last_entry(&ci->i_unsafe_iops,
232862306a36Sopenharmony_ci					struct ceph_mds_request,
232962306a36Sopenharmony_ci					r_unsafe_target_item);
233062306a36Sopenharmony_ci		ceph_mdsc_get_request(req2);
233162306a36Sopenharmony_ci	}
233262306a36Sopenharmony_ci	spin_unlock(&ci->i_unsafe_lock);
233362306a36Sopenharmony_ci
233462306a36Sopenharmony_ci	/*
233562306a36Sopenharmony_ci	 * Trigger to flush the journal logs in all the relevant MDSes
233662306a36Sopenharmony_ci	 * manually, or in the worst case we must wait at most 5 seconds
233762306a36Sopenharmony_ci	 * to wait the journal logs to be flushed by the MDSes periodically.
233862306a36Sopenharmony_ci	 */
233962306a36Sopenharmony_ci	if (req1 || req2) {
234062306a36Sopenharmony_ci		struct ceph_mds_request *req;
234162306a36Sopenharmony_ci		struct ceph_mds_session **sessions;
234262306a36Sopenharmony_ci		struct ceph_mds_session *s;
234362306a36Sopenharmony_ci		unsigned int max_sessions;
234462306a36Sopenharmony_ci		int i;
234562306a36Sopenharmony_ci
234662306a36Sopenharmony_ci		mutex_lock(&mdsc->mutex);
234762306a36Sopenharmony_ci		max_sessions = mdsc->max_sessions;
234862306a36Sopenharmony_ci
234962306a36Sopenharmony_ci		sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
235062306a36Sopenharmony_ci		if (!sessions) {
235162306a36Sopenharmony_ci			mutex_unlock(&mdsc->mutex);
235262306a36Sopenharmony_ci			err = -ENOMEM;
235362306a36Sopenharmony_ci			goto out;
235462306a36Sopenharmony_ci		}
235562306a36Sopenharmony_ci
235662306a36Sopenharmony_ci		spin_lock(&ci->i_unsafe_lock);
235762306a36Sopenharmony_ci		if (req1) {
235862306a36Sopenharmony_ci			list_for_each_entry(req, &ci->i_unsafe_dirops,
235962306a36Sopenharmony_ci					    r_unsafe_dir_item) {
236062306a36Sopenharmony_ci				s = req->r_session;
236162306a36Sopenharmony_ci				if (!s)
236262306a36Sopenharmony_ci					continue;
236362306a36Sopenharmony_ci				if (!sessions[s->s_mds]) {
236462306a36Sopenharmony_ci					s = ceph_get_mds_session(s);
236562306a36Sopenharmony_ci					sessions[s->s_mds] = s;
236662306a36Sopenharmony_ci				}
236762306a36Sopenharmony_ci			}
236862306a36Sopenharmony_ci		}
236962306a36Sopenharmony_ci		if (req2) {
237062306a36Sopenharmony_ci			list_for_each_entry(req, &ci->i_unsafe_iops,
237162306a36Sopenharmony_ci					    r_unsafe_target_item) {
237262306a36Sopenharmony_ci				s = req->r_session;
237362306a36Sopenharmony_ci				if (!s)
237462306a36Sopenharmony_ci					continue;
237562306a36Sopenharmony_ci				if (!sessions[s->s_mds]) {
237662306a36Sopenharmony_ci					s = ceph_get_mds_session(s);
237762306a36Sopenharmony_ci					sessions[s->s_mds] = s;
237862306a36Sopenharmony_ci				}
237962306a36Sopenharmony_ci			}
238062306a36Sopenharmony_ci		}
238162306a36Sopenharmony_ci		spin_unlock(&ci->i_unsafe_lock);
238262306a36Sopenharmony_ci
238362306a36Sopenharmony_ci		/* the auth MDS */
238462306a36Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
238562306a36Sopenharmony_ci		if (ci->i_auth_cap) {
238662306a36Sopenharmony_ci			s = ci->i_auth_cap->session;
238762306a36Sopenharmony_ci			if (!sessions[s->s_mds])
238862306a36Sopenharmony_ci				sessions[s->s_mds] = ceph_get_mds_session(s);
238962306a36Sopenharmony_ci		}
239062306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
239162306a36Sopenharmony_ci		mutex_unlock(&mdsc->mutex);
239262306a36Sopenharmony_ci
239362306a36Sopenharmony_ci		/* send flush mdlog request to MDSes */
239462306a36Sopenharmony_ci		for (i = 0; i < max_sessions; i++) {
239562306a36Sopenharmony_ci			s = sessions[i];
239662306a36Sopenharmony_ci			if (s) {
239762306a36Sopenharmony_ci				send_flush_mdlog(s);
239862306a36Sopenharmony_ci				ceph_put_mds_session(s);
239962306a36Sopenharmony_ci			}
240062306a36Sopenharmony_ci		}
240162306a36Sopenharmony_ci		kfree(sessions);
240262306a36Sopenharmony_ci	}
240362306a36Sopenharmony_ci
240462306a36Sopenharmony_ci	dout("%s %p wait on tid %llu %llu\n", __func__,
240562306a36Sopenharmony_ci	     inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL);
240662306a36Sopenharmony_ci	if (req1) {
240762306a36Sopenharmony_ci		ret = !wait_for_completion_timeout(&req1->r_safe_completion,
240862306a36Sopenharmony_ci					ceph_timeout_jiffies(req1->r_timeout));
240962306a36Sopenharmony_ci		if (ret)
241062306a36Sopenharmony_ci			err = -EIO;
241162306a36Sopenharmony_ci	}
241262306a36Sopenharmony_ci	if (req2) {
241362306a36Sopenharmony_ci		ret = !wait_for_completion_timeout(&req2->r_safe_completion,
241462306a36Sopenharmony_ci					ceph_timeout_jiffies(req2->r_timeout));
241562306a36Sopenharmony_ci		if (ret)
241662306a36Sopenharmony_ci			err = -EIO;
241762306a36Sopenharmony_ci	}
241862306a36Sopenharmony_ci
241962306a36Sopenharmony_ciout:
242062306a36Sopenharmony_ci	if (req1)
242162306a36Sopenharmony_ci		ceph_mdsc_put_request(req1);
242262306a36Sopenharmony_ci	if (req2)
242362306a36Sopenharmony_ci		ceph_mdsc_put_request(req2);
242462306a36Sopenharmony_ci	return err;
242562306a36Sopenharmony_ci}
242662306a36Sopenharmony_ci
242762306a36Sopenharmony_ciint ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
242862306a36Sopenharmony_ci{
242962306a36Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
243062306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
243162306a36Sopenharmony_ci	u64 flush_tid;
243262306a36Sopenharmony_ci	int ret, err;
243362306a36Sopenharmony_ci	int dirty;
243462306a36Sopenharmony_ci
243562306a36Sopenharmony_ci	dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
243662306a36Sopenharmony_ci
243762306a36Sopenharmony_ci	ret = file_write_and_wait_range(file, start, end);
243862306a36Sopenharmony_ci	if (datasync)
243962306a36Sopenharmony_ci		goto out;
244062306a36Sopenharmony_ci
244162306a36Sopenharmony_ci	ret = ceph_wait_on_async_create(inode);
244262306a36Sopenharmony_ci	if (ret)
244362306a36Sopenharmony_ci		goto out;
244462306a36Sopenharmony_ci
244562306a36Sopenharmony_ci	dirty = try_flush_caps(inode, &flush_tid);
244662306a36Sopenharmony_ci	dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
244762306a36Sopenharmony_ci
244862306a36Sopenharmony_ci	err = flush_mdlog_and_wait_inode_unsafe_requests(inode);
244962306a36Sopenharmony_ci
245062306a36Sopenharmony_ci	/*
245162306a36Sopenharmony_ci	 * only wait on non-file metadata writeback (the mds
245262306a36Sopenharmony_ci	 * can recover size and mtime, so we don't need to
245362306a36Sopenharmony_ci	 * wait for that)
245462306a36Sopenharmony_ci	 */
245562306a36Sopenharmony_ci	if (!err && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
245662306a36Sopenharmony_ci		err = wait_event_interruptible(ci->i_cap_wq,
245762306a36Sopenharmony_ci					caps_are_flushed(inode, flush_tid));
245862306a36Sopenharmony_ci	}
245962306a36Sopenharmony_ci
246062306a36Sopenharmony_ci	if (err < 0)
246162306a36Sopenharmony_ci		ret = err;
246262306a36Sopenharmony_ci
246362306a36Sopenharmony_ci	err = file_check_and_advance_wb_err(file);
246462306a36Sopenharmony_ci	if (err < 0)
246562306a36Sopenharmony_ci		ret = err;
246662306a36Sopenharmony_ciout:
246762306a36Sopenharmony_ci	dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
246862306a36Sopenharmony_ci	return ret;
246962306a36Sopenharmony_ci}
247062306a36Sopenharmony_ci
247162306a36Sopenharmony_ci/*
247262306a36Sopenharmony_ci * Flush any dirty caps back to the mds.  If we aren't asked to wait,
247362306a36Sopenharmony_ci * queue inode for flush but don't do so immediately, because we can
247462306a36Sopenharmony_ci * get by with fewer MDS messages if we wait for data writeback to
247562306a36Sopenharmony_ci * complete first.
247662306a36Sopenharmony_ci */
247762306a36Sopenharmony_ciint ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
247862306a36Sopenharmony_ci{
247962306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
248062306a36Sopenharmony_ci	u64 flush_tid;
248162306a36Sopenharmony_ci	int err = 0;
248262306a36Sopenharmony_ci	int dirty;
248362306a36Sopenharmony_ci	int wait = (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync);
248462306a36Sopenharmony_ci
248562306a36Sopenharmony_ci	dout("write_inode %p wait=%d\n", inode, wait);
248662306a36Sopenharmony_ci	ceph_fscache_unpin_writeback(inode, wbc);
248762306a36Sopenharmony_ci	if (wait) {
248862306a36Sopenharmony_ci		err = ceph_wait_on_async_create(inode);
248962306a36Sopenharmony_ci		if (err)
249062306a36Sopenharmony_ci			return err;
249162306a36Sopenharmony_ci		dirty = try_flush_caps(inode, &flush_tid);
249262306a36Sopenharmony_ci		if (dirty)
249362306a36Sopenharmony_ci			err = wait_event_interruptible(ci->i_cap_wq,
249462306a36Sopenharmony_ci				       caps_are_flushed(inode, flush_tid));
249562306a36Sopenharmony_ci	} else {
249662306a36Sopenharmony_ci		struct ceph_mds_client *mdsc =
249762306a36Sopenharmony_ci			ceph_sb_to_client(inode->i_sb)->mdsc;
249862306a36Sopenharmony_ci
249962306a36Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
250062306a36Sopenharmony_ci		if (__ceph_caps_dirty(ci))
250162306a36Sopenharmony_ci			__cap_delay_requeue_front(mdsc, ci);
250262306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
250362306a36Sopenharmony_ci	}
250462306a36Sopenharmony_ci	return err;
250562306a36Sopenharmony_ci}
250662306a36Sopenharmony_ci
250762306a36Sopenharmony_cistatic void __kick_flushing_caps(struct ceph_mds_client *mdsc,
250862306a36Sopenharmony_ci				 struct ceph_mds_session *session,
250962306a36Sopenharmony_ci				 struct ceph_inode_info *ci,
251062306a36Sopenharmony_ci				 u64 oldest_flush_tid)
251162306a36Sopenharmony_ci	__releases(ci->i_ceph_lock)
251262306a36Sopenharmony_ci	__acquires(ci->i_ceph_lock)
251362306a36Sopenharmony_ci{
251462306a36Sopenharmony_ci	struct inode *inode = &ci->netfs.inode;
251562306a36Sopenharmony_ci	struct ceph_cap *cap;
251662306a36Sopenharmony_ci	struct ceph_cap_flush *cf;
251762306a36Sopenharmony_ci	int ret;
251862306a36Sopenharmony_ci	u64 first_tid = 0;
251962306a36Sopenharmony_ci	u64 last_snap_flush = 0;
252062306a36Sopenharmony_ci
252162306a36Sopenharmony_ci	/* Don't do anything until create reply comes in */
252262306a36Sopenharmony_ci	if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE)
252362306a36Sopenharmony_ci		return;
252462306a36Sopenharmony_ci
252562306a36Sopenharmony_ci	ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
252662306a36Sopenharmony_ci
252762306a36Sopenharmony_ci	list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
252862306a36Sopenharmony_ci		if (cf->is_capsnap) {
252962306a36Sopenharmony_ci			last_snap_flush = cf->tid;
253062306a36Sopenharmony_ci			break;
253162306a36Sopenharmony_ci		}
253262306a36Sopenharmony_ci	}
253362306a36Sopenharmony_ci
253462306a36Sopenharmony_ci	list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
253562306a36Sopenharmony_ci		if (cf->tid < first_tid)
253662306a36Sopenharmony_ci			continue;
253762306a36Sopenharmony_ci
253862306a36Sopenharmony_ci		cap = ci->i_auth_cap;
253962306a36Sopenharmony_ci		if (!(cap && cap->session == session)) {
254062306a36Sopenharmony_ci			pr_err("%p auth cap %p not mds%d ???\n",
254162306a36Sopenharmony_ci			       inode, cap, session->s_mds);
254262306a36Sopenharmony_ci			break;
254362306a36Sopenharmony_ci		}
254462306a36Sopenharmony_ci
254562306a36Sopenharmony_ci		first_tid = cf->tid + 1;
254662306a36Sopenharmony_ci
254762306a36Sopenharmony_ci		if (!cf->is_capsnap) {
254862306a36Sopenharmony_ci			struct cap_msg_args arg;
254962306a36Sopenharmony_ci
255062306a36Sopenharmony_ci			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
255162306a36Sopenharmony_ci			     inode, cap, cf->tid, ceph_cap_string(cf->caps));
255262306a36Sopenharmony_ci			__prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH,
255362306a36Sopenharmony_ci					 (cf->tid < last_snap_flush ?
255462306a36Sopenharmony_ci					  CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0),
255562306a36Sopenharmony_ci					  __ceph_caps_used(ci),
255662306a36Sopenharmony_ci					  __ceph_caps_wanted(ci),
255762306a36Sopenharmony_ci					  (cap->issued | cap->implemented),
255862306a36Sopenharmony_ci					  cf->caps, cf->tid, oldest_flush_tid);
255962306a36Sopenharmony_ci			spin_unlock(&ci->i_ceph_lock);
256062306a36Sopenharmony_ci			__send_cap(&arg, ci);
256162306a36Sopenharmony_ci		} else {
256262306a36Sopenharmony_ci			struct ceph_cap_snap *capsnap =
256362306a36Sopenharmony_ci					container_of(cf, struct ceph_cap_snap,
256462306a36Sopenharmony_ci						    cap_flush);
256562306a36Sopenharmony_ci			dout("kick_flushing_caps %p capsnap %p tid %llu %s\n",
256662306a36Sopenharmony_ci			     inode, capsnap, cf->tid,
256762306a36Sopenharmony_ci			     ceph_cap_string(capsnap->dirty));
256862306a36Sopenharmony_ci
256962306a36Sopenharmony_ci			refcount_inc(&capsnap->nref);
257062306a36Sopenharmony_ci			spin_unlock(&ci->i_ceph_lock);
257162306a36Sopenharmony_ci
257262306a36Sopenharmony_ci			ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
257362306a36Sopenharmony_ci						oldest_flush_tid);
257462306a36Sopenharmony_ci			if (ret < 0) {
257562306a36Sopenharmony_ci				pr_err("kick_flushing_caps: error sending "
257662306a36Sopenharmony_ci					"cap flushsnap, ino (%llx.%llx) "
257762306a36Sopenharmony_ci					"tid %llu follows %llu\n",
257862306a36Sopenharmony_ci					ceph_vinop(inode), cf->tid,
257962306a36Sopenharmony_ci					capsnap->follows);
258062306a36Sopenharmony_ci			}
258162306a36Sopenharmony_ci
258262306a36Sopenharmony_ci			ceph_put_cap_snap(capsnap);
258362306a36Sopenharmony_ci		}
258462306a36Sopenharmony_ci
258562306a36Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
258662306a36Sopenharmony_ci	}
258762306a36Sopenharmony_ci}
258862306a36Sopenharmony_ci
258962306a36Sopenharmony_civoid ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
259062306a36Sopenharmony_ci				   struct ceph_mds_session *session)
259162306a36Sopenharmony_ci{
259262306a36Sopenharmony_ci	struct ceph_inode_info *ci;
259362306a36Sopenharmony_ci	struct ceph_cap *cap;
259462306a36Sopenharmony_ci	u64 oldest_flush_tid;
259562306a36Sopenharmony_ci
259662306a36Sopenharmony_ci	dout("early_kick_flushing_caps mds%d\n", session->s_mds);
259762306a36Sopenharmony_ci
259862306a36Sopenharmony_ci	spin_lock(&mdsc->cap_dirty_lock);
259962306a36Sopenharmony_ci	oldest_flush_tid = __get_oldest_flush_tid(mdsc);
260062306a36Sopenharmony_ci	spin_unlock(&mdsc->cap_dirty_lock);
260162306a36Sopenharmony_ci
260262306a36Sopenharmony_ci	list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
260362306a36Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
260462306a36Sopenharmony_ci		cap = ci->i_auth_cap;
260562306a36Sopenharmony_ci		if (!(cap && cap->session == session)) {
260662306a36Sopenharmony_ci			pr_err("%p auth cap %p not mds%d ???\n",
260762306a36Sopenharmony_ci				&ci->netfs.inode, cap, session->s_mds);
260862306a36Sopenharmony_ci			spin_unlock(&ci->i_ceph_lock);
260962306a36Sopenharmony_ci			continue;
261062306a36Sopenharmony_ci		}
261162306a36Sopenharmony_ci
261262306a36Sopenharmony_ci
261362306a36Sopenharmony_ci		/*
261462306a36Sopenharmony_ci		 * if flushing caps were revoked, we re-send the cap flush
261562306a36Sopenharmony_ci		 * in client reconnect stage. This guarantees MDS * processes
261662306a36Sopenharmony_ci		 * the cap flush message before issuing the flushing caps to
261762306a36Sopenharmony_ci		 * other client.
261862306a36Sopenharmony_ci		 */
261962306a36Sopenharmony_ci		if ((cap->issued & ci->i_flushing_caps) !=
262062306a36Sopenharmony_ci		    ci->i_flushing_caps) {
262162306a36Sopenharmony_ci			/* encode_caps_cb() also will reset these sequence
262262306a36Sopenharmony_ci			 * numbers. make sure sequence numbers in cap flush
262362306a36Sopenharmony_ci			 * message match later reconnect message */
262462306a36Sopenharmony_ci			cap->seq = 0;
262562306a36Sopenharmony_ci			cap->issue_seq = 0;
262662306a36Sopenharmony_ci			cap->mseq = 0;
262762306a36Sopenharmony_ci			__kick_flushing_caps(mdsc, session, ci,
262862306a36Sopenharmony_ci					     oldest_flush_tid);
262962306a36Sopenharmony_ci		} else {
263062306a36Sopenharmony_ci			ci->i_ceph_flags |= CEPH_I_KICK_FLUSH;
263162306a36Sopenharmony_ci		}
263262306a36Sopenharmony_ci
263362306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
263462306a36Sopenharmony_ci	}
263562306a36Sopenharmony_ci}
263662306a36Sopenharmony_ci
263762306a36Sopenharmony_civoid ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
263862306a36Sopenharmony_ci			     struct ceph_mds_session *session)
263962306a36Sopenharmony_ci{
264062306a36Sopenharmony_ci	struct ceph_inode_info *ci;
264162306a36Sopenharmony_ci	struct ceph_cap *cap;
264262306a36Sopenharmony_ci	u64 oldest_flush_tid;
264362306a36Sopenharmony_ci
264462306a36Sopenharmony_ci	lockdep_assert_held(&session->s_mutex);
264562306a36Sopenharmony_ci
264662306a36Sopenharmony_ci	dout("kick_flushing_caps mds%d\n", session->s_mds);
264762306a36Sopenharmony_ci
264862306a36Sopenharmony_ci	spin_lock(&mdsc->cap_dirty_lock);
264962306a36Sopenharmony_ci	oldest_flush_tid = __get_oldest_flush_tid(mdsc);
265062306a36Sopenharmony_ci	spin_unlock(&mdsc->cap_dirty_lock);
265162306a36Sopenharmony_ci
265262306a36Sopenharmony_ci	list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
265362306a36Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
265462306a36Sopenharmony_ci		cap = ci->i_auth_cap;
265562306a36Sopenharmony_ci		if (!(cap && cap->session == session)) {
265662306a36Sopenharmony_ci			pr_err("%p auth cap %p not mds%d ???\n",
265762306a36Sopenharmony_ci				&ci->netfs.inode, cap, session->s_mds);
265862306a36Sopenharmony_ci			spin_unlock(&ci->i_ceph_lock);
265962306a36Sopenharmony_ci			continue;
266062306a36Sopenharmony_ci		}
266162306a36Sopenharmony_ci		if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) {
266262306a36Sopenharmony_ci			__kick_flushing_caps(mdsc, session, ci,
266362306a36Sopenharmony_ci					     oldest_flush_tid);
266462306a36Sopenharmony_ci		}
266562306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
266662306a36Sopenharmony_ci	}
266762306a36Sopenharmony_ci}
266862306a36Sopenharmony_ci
266962306a36Sopenharmony_civoid ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
267062306a36Sopenharmony_ci				   struct ceph_inode_info *ci)
267162306a36Sopenharmony_ci{
267262306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = session->s_mdsc;
267362306a36Sopenharmony_ci	struct ceph_cap *cap = ci->i_auth_cap;
267462306a36Sopenharmony_ci
267562306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
267662306a36Sopenharmony_ci
267762306a36Sopenharmony_ci	dout("%s %p flushing %s\n", __func__, &ci->netfs.inode,
267862306a36Sopenharmony_ci	     ceph_cap_string(ci->i_flushing_caps));
267962306a36Sopenharmony_ci
268062306a36Sopenharmony_ci	if (!list_empty(&ci->i_cap_flush_list)) {
268162306a36Sopenharmony_ci		u64 oldest_flush_tid;
268262306a36Sopenharmony_ci		spin_lock(&mdsc->cap_dirty_lock);
268362306a36Sopenharmony_ci		list_move_tail(&ci->i_flushing_item,
268462306a36Sopenharmony_ci			       &cap->session->s_cap_flushing);
268562306a36Sopenharmony_ci		oldest_flush_tid = __get_oldest_flush_tid(mdsc);
268662306a36Sopenharmony_ci		spin_unlock(&mdsc->cap_dirty_lock);
268762306a36Sopenharmony_ci
268862306a36Sopenharmony_ci		__kick_flushing_caps(mdsc, session, ci, oldest_flush_tid);
268962306a36Sopenharmony_ci	}
269062306a36Sopenharmony_ci}
269162306a36Sopenharmony_ci
269262306a36Sopenharmony_ci
269362306a36Sopenharmony_ci/*
269462306a36Sopenharmony_ci * Take references to capabilities we hold, so that we don't release
269562306a36Sopenharmony_ci * them to the MDS prematurely.
269662306a36Sopenharmony_ci */
269762306a36Sopenharmony_civoid ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
269862306a36Sopenharmony_ci			    bool snap_rwsem_locked)
269962306a36Sopenharmony_ci{
270062306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
270162306a36Sopenharmony_ci
270262306a36Sopenharmony_ci	if (got & CEPH_CAP_PIN)
270362306a36Sopenharmony_ci		ci->i_pin_ref++;
270462306a36Sopenharmony_ci	if (got & CEPH_CAP_FILE_RD)
270562306a36Sopenharmony_ci		ci->i_rd_ref++;
270662306a36Sopenharmony_ci	if (got & CEPH_CAP_FILE_CACHE)
270762306a36Sopenharmony_ci		ci->i_rdcache_ref++;
270862306a36Sopenharmony_ci	if (got & CEPH_CAP_FILE_EXCL)
270962306a36Sopenharmony_ci		ci->i_fx_ref++;
271062306a36Sopenharmony_ci	if (got & CEPH_CAP_FILE_WR) {
271162306a36Sopenharmony_ci		if (ci->i_wr_ref == 0 && !ci->i_head_snapc) {
271262306a36Sopenharmony_ci			BUG_ON(!snap_rwsem_locked);
271362306a36Sopenharmony_ci			ci->i_head_snapc = ceph_get_snap_context(
271462306a36Sopenharmony_ci					ci->i_snap_realm->cached_context);
271562306a36Sopenharmony_ci		}
271662306a36Sopenharmony_ci		ci->i_wr_ref++;
271762306a36Sopenharmony_ci	}
271862306a36Sopenharmony_ci	if (got & CEPH_CAP_FILE_BUFFER) {
271962306a36Sopenharmony_ci		if (ci->i_wb_ref == 0)
272062306a36Sopenharmony_ci			ihold(&ci->netfs.inode);
272162306a36Sopenharmony_ci		ci->i_wb_ref++;
272262306a36Sopenharmony_ci		dout("%s %p wb %d -> %d (?)\n", __func__,
272362306a36Sopenharmony_ci		     &ci->netfs.inode, ci->i_wb_ref-1, ci->i_wb_ref);
272462306a36Sopenharmony_ci	}
272562306a36Sopenharmony_ci}
272662306a36Sopenharmony_ci
272762306a36Sopenharmony_ci/*
272862306a36Sopenharmony_ci * Try to grab cap references.  Specify those refs we @want, and the
272962306a36Sopenharmony_ci * minimal set we @need.  Also include the larger offset we are writing
273062306a36Sopenharmony_ci * to (when applicable), and check against max_size here as well.
273162306a36Sopenharmony_ci * Note that caller is responsible for ensuring max_size increases are
273262306a36Sopenharmony_ci * requested from the MDS.
273362306a36Sopenharmony_ci *
273462306a36Sopenharmony_ci * Returns 0 if caps were not able to be acquired (yet), 1 if succeed,
273562306a36Sopenharmony_ci * or a negative error code. There are 3 speical error codes:
273662306a36Sopenharmony_ci *  -EAGAIN:  need to sleep but non-blocking is specified
273762306a36Sopenharmony_ci *  -EFBIG:   ask caller to call check_max_size() and try again.
273862306a36Sopenharmony_ci *  -EUCLEAN: ask caller to call ceph_renew_caps() and try again.
273962306a36Sopenharmony_ci */
274062306a36Sopenharmony_cienum {
274162306a36Sopenharmony_ci	/* first 8 bits are reserved for CEPH_FILE_MODE_FOO */
274262306a36Sopenharmony_ci	NON_BLOCKING	= (1 << 8),
274362306a36Sopenharmony_ci	CHECK_FILELOCK	= (1 << 9),
274462306a36Sopenharmony_ci};
274562306a36Sopenharmony_ci
274662306a36Sopenharmony_cistatic int try_get_cap_refs(struct inode *inode, int need, int want,
274762306a36Sopenharmony_ci			    loff_t endoff, int flags, int *got)
274862306a36Sopenharmony_ci{
274962306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
275062306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
275162306a36Sopenharmony_ci	int ret = 0;
275262306a36Sopenharmony_ci	int have, implemented;
275362306a36Sopenharmony_ci	bool snap_rwsem_locked = false;
275462306a36Sopenharmony_ci
275562306a36Sopenharmony_ci	dout("get_cap_refs %p need %s want %s\n", inode,
275662306a36Sopenharmony_ci	     ceph_cap_string(need), ceph_cap_string(want));
275762306a36Sopenharmony_ci
275862306a36Sopenharmony_ciagain:
275962306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
276062306a36Sopenharmony_ci
276162306a36Sopenharmony_ci	if ((flags & CHECK_FILELOCK) &&
276262306a36Sopenharmony_ci	    (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) {
276362306a36Sopenharmony_ci		dout("try_get_cap_refs %p error filelock\n", inode);
276462306a36Sopenharmony_ci		ret = -EIO;
276562306a36Sopenharmony_ci		goto out_unlock;
276662306a36Sopenharmony_ci	}
276762306a36Sopenharmony_ci
276862306a36Sopenharmony_ci	/* finish pending truncate */
276962306a36Sopenharmony_ci	while (ci->i_truncate_pending) {
277062306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
277162306a36Sopenharmony_ci		if (snap_rwsem_locked) {
277262306a36Sopenharmony_ci			up_read(&mdsc->snap_rwsem);
277362306a36Sopenharmony_ci			snap_rwsem_locked = false;
277462306a36Sopenharmony_ci		}
277562306a36Sopenharmony_ci		__ceph_do_pending_vmtruncate(inode);
277662306a36Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
277762306a36Sopenharmony_ci	}
277862306a36Sopenharmony_ci
277962306a36Sopenharmony_ci	have = __ceph_caps_issued(ci, &implemented);
278062306a36Sopenharmony_ci
278162306a36Sopenharmony_ci	if (have & need & CEPH_CAP_FILE_WR) {
278262306a36Sopenharmony_ci		if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
278362306a36Sopenharmony_ci			dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
278462306a36Sopenharmony_ci			     inode, endoff, ci->i_max_size);
278562306a36Sopenharmony_ci			if (endoff > ci->i_requested_max_size)
278662306a36Sopenharmony_ci				ret = ci->i_auth_cap ? -EFBIG : -EUCLEAN;
278762306a36Sopenharmony_ci			goto out_unlock;
278862306a36Sopenharmony_ci		}
278962306a36Sopenharmony_ci		/*
279062306a36Sopenharmony_ci		 * If a sync write is in progress, we must wait, so that we
279162306a36Sopenharmony_ci		 * can get a final snapshot value for size+mtime.
279262306a36Sopenharmony_ci		 */
279362306a36Sopenharmony_ci		if (__ceph_have_pending_cap_snap(ci)) {
279462306a36Sopenharmony_ci			dout("get_cap_refs %p cap_snap_pending\n", inode);
279562306a36Sopenharmony_ci			goto out_unlock;
279662306a36Sopenharmony_ci		}
279762306a36Sopenharmony_ci	}
279862306a36Sopenharmony_ci
279962306a36Sopenharmony_ci	if ((have & need) == need) {
280062306a36Sopenharmony_ci		/*
280162306a36Sopenharmony_ci		 * Look at (implemented & ~have & not) so that we keep waiting
280262306a36Sopenharmony_ci		 * on transition from wanted -> needed caps.  This is needed
280362306a36Sopenharmony_ci		 * for WRBUFFER|WR -> WR to avoid a new WR sync write from
280462306a36Sopenharmony_ci		 * going before a prior buffered writeback happens.
280562306a36Sopenharmony_ci		 *
280662306a36Sopenharmony_ci		 * For RDCACHE|RD -> RD, there is not need to wait and we can
280762306a36Sopenharmony_ci		 * just exclude the revoking caps and force to sync read.
280862306a36Sopenharmony_ci		 */
280962306a36Sopenharmony_ci		int not = want & ~(have & need);
281062306a36Sopenharmony_ci		int revoking = implemented & ~have;
281162306a36Sopenharmony_ci		int exclude = revoking & not;
281262306a36Sopenharmony_ci		dout("get_cap_refs %p have %s but not %s (revoking %s)\n",
281362306a36Sopenharmony_ci		     inode, ceph_cap_string(have), ceph_cap_string(not),
281462306a36Sopenharmony_ci		     ceph_cap_string(revoking));
281562306a36Sopenharmony_ci		if (!exclude || !(exclude & CEPH_CAP_FILE_BUFFER)) {
281662306a36Sopenharmony_ci			if (!snap_rwsem_locked &&
281762306a36Sopenharmony_ci			    !ci->i_head_snapc &&
281862306a36Sopenharmony_ci			    (need & CEPH_CAP_FILE_WR)) {
281962306a36Sopenharmony_ci				if (!down_read_trylock(&mdsc->snap_rwsem)) {
282062306a36Sopenharmony_ci					/*
282162306a36Sopenharmony_ci					 * we can not call down_read() when
282262306a36Sopenharmony_ci					 * task isn't in TASK_RUNNING state
282362306a36Sopenharmony_ci					 */
282462306a36Sopenharmony_ci					if (flags & NON_BLOCKING) {
282562306a36Sopenharmony_ci						ret = -EAGAIN;
282662306a36Sopenharmony_ci						goto out_unlock;
282762306a36Sopenharmony_ci					}
282862306a36Sopenharmony_ci
282962306a36Sopenharmony_ci					spin_unlock(&ci->i_ceph_lock);
283062306a36Sopenharmony_ci					down_read(&mdsc->snap_rwsem);
283162306a36Sopenharmony_ci					snap_rwsem_locked = true;
283262306a36Sopenharmony_ci					goto again;
283362306a36Sopenharmony_ci				}
283462306a36Sopenharmony_ci				snap_rwsem_locked = true;
283562306a36Sopenharmony_ci			}
283662306a36Sopenharmony_ci			if ((have & want) == want)
283762306a36Sopenharmony_ci				*got = need | (want & ~exclude);
283862306a36Sopenharmony_ci			else
283962306a36Sopenharmony_ci				*got = need;
284062306a36Sopenharmony_ci			ceph_take_cap_refs(ci, *got, true);
284162306a36Sopenharmony_ci			ret = 1;
284262306a36Sopenharmony_ci		}
284362306a36Sopenharmony_ci	} else {
284462306a36Sopenharmony_ci		int session_readonly = false;
284562306a36Sopenharmony_ci		int mds_wanted;
284662306a36Sopenharmony_ci		if (ci->i_auth_cap &&
284762306a36Sopenharmony_ci		    (need & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_EXCL))) {
284862306a36Sopenharmony_ci			struct ceph_mds_session *s = ci->i_auth_cap->session;
284962306a36Sopenharmony_ci			spin_lock(&s->s_cap_lock);
285062306a36Sopenharmony_ci			session_readonly = s->s_readonly;
285162306a36Sopenharmony_ci			spin_unlock(&s->s_cap_lock);
285262306a36Sopenharmony_ci		}
285362306a36Sopenharmony_ci		if (session_readonly) {
285462306a36Sopenharmony_ci			dout("get_cap_refs %p need %s but mds%d readonly\n",
285562306a36Sopenharmony_ci			     inode, ceph_cap_string(need), ci->i_auth_cap->mds);
285662306a36Sopenharmony_ci			ret = -EROFS;
285762306a36Sopenharmony_ci			goto out_unlock;
285862306a36Sopenharmony_ci		}
285962306a36Sopenharmony_ci
286062306a36Sopenharmony_ci		if (ceph_inode_is_shutdown(inode)) {
286162306a36Sopenharmony_ci			dout("get_cap_refs %p inode is shutdown\n", inode);
286262306a36Sopenharmony_ci			ret = -ESTALE;
286362306a36Sopenharmony_ci			goto out_unlock;
286462306a36Sopenharmony_ci		}
286562306a36Sopenharmony_ci		mds_wanted = __ceph_caps_mds_wanted(ci, false);
286662306a36Sopenharmony_ci		if (need & ~mds_wanted) {
286762306a36Sopenharmony_ci			dout("get_cap_refs %p need %s > mds_wanted %s\n",
286862306a36Sopenharmony_ci			     inode, ceph_cap_string(need),
286962306a36Sopenharmony_ci			     ceph_cap_string(mds_wanted));
287062306a36Sopenharmony_ci			ret = -EUCLEAN;
287162306a36Sopenharmony_ci			goto out_unlock;
287262306a36Sopenharmony_ci		}
287362306a36Sopenharmony_ci
287462306a36Sopenharmony_ci		dout("get_cap_refs %p have %s need %s\n", inode,
287562306a36Sopenharmony_ci		     ceph_cap_string(have), ceph_cap_string(need));
287662306a36Sopenharmony_ci	}
287762306a36Sopenharmony_ciout_unlock:
287862306a36Sopenharmony_ci
287962306a36Sopenharmony_ci	__ceph_touch_fmode(ci, mdsc, flags);
288062306a36Sopenharmony_ci
288162306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
288262306a36Sopenharmony_ci	if (snap_rwsem_locked)
288362306a36Sopenharmony_ci		up_read(&mdsc->snap_rwsem);
288462306a36Sopenharmony_ci
288562306a36Sopenharmony_ci	if (!ret)
288662306a36Sopenharmony_ci		ceph_update_cap_mis(&mdsc->metric);
288762306a36Sopenharmony_ci	else if (ret == 1)
288862306a36Sopenharmony_ci		ceph_update_cap_hit(&mdsc->metric);
288962306a36Sopenharmony_ci
289062306a36Sopenharmony_ci	dout("get_cap_refs %p ret %d got %s\n", inode,
289162306a36Sopenharmony_ci	     ret, ceph_cap_string(*got));
289262306a36Sopenharmony_ci	return ret;
289362306a36Sopenharmony_ci}
289462306a36Sopenharmony_ci
289562306a36Sopenharmony_ci/*
289662306a36Sopenharmony_ci * Check the offset we are writing up to against our current
289762306a36Sopenharmony_ci * max_size.  If necessary, tell the MDS we want to write to
289862306a36Sopenharmony_ci * a larger offset.
289962306a36Sopenharmony_ci */
290062306a36Sopenharmony_cistatic void check_max_size(struct inode *inode, loff_t endoff)
290162306a36Sopenharmony_ci{
290262306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
290362306a36Sopenharmony_ci	int check = 0;
290462306a36Sopenharmony_ci
290562306a36Sopenharmony_ci	/* do we need to explicitly request a larger max_size? */
290662306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
290762306a36Sopenharmony_ci	if (endoff >= ci->i_max_size && endoff > ci->i_wanted_max_size) {
290862306a36Sopenharmony_ci		dout("write %p at large endoff %llu, req max_size\n",
290962306a36Sopenharmony_ci		     inode, endoff);
291062306a36Sopenharmony_ci		ci->i_wanted_max_size = endoff;
291162306a36Sopenharmony_ci	}
291262306a36Sopenharmony_ci	/* duplicate ceph_check_caps()'s logic */
291362306a36Sopenharmony_ci	if (ci->i_auth_cap &&
291462306a36Sopenharmony_ci	    (ci->i_auth_cap->issued & CEPH_CAP_FILE_WR) &&
291562306a36Sopenharmony_ci	    ci->i_wanted_max_size > ci->i_max_size &&
291662306a36Sopenharmony_ci	    ci->i_wanted_max_size > ci->i_requested_max_size)
291762306a36Sopenharmony_ci		check = 1;
291862306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
291962306a36Sopenharmony_ci	if (check)
292062306a36Sopenharmony_ci		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY);
292162306a36Sopenharmony_ci}
292262306a36Sopenharmony_ci
292362306a36Sopenharmony_cistatic inline int get_used_fmode(int caps)
292462306a36Sopenharmony_ci{
292562306a36Sopenharmony_ci	int fmode = 0;
292662306a36Sopenharmony_ci	if (caps & CEPH_CAP_FILE_RD)
292762306a36Sopenharmony_ci		fmode |= CEPH_FILE_MODE_RD;
292862306a36Sopenharmony_ci	if (caps & CEPH_CAP_FILE_WR)
292962306a36Sopenharmony_ci		fmode |= CEPH_FILE_MODE_WR;
293062306a36Sopenharmony_ci	return fmode;
293162306a36Sopenharmony_ci}
293262306a36Sopenharmony_ci
293362306a36Sopenharmony_ciint ceph_try_get_caps(struct inode *inode, int need, int want,
293462306a36Sopenharmony_ci		      bool nonblock, int *got)
293562306a36Sopenharmony_ci{
293662306a36Sopenharmony_ci	int ret, flags;
293762306a36Sopenharmony_ci
293862306a36Sopenharmony_ci	BUG_ON(need & ~CEPH_CAP_FILE_RD);
293962306a36Sopenharmony_ci	BUG_ON(want & ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO |
294062306a36Sopenharmony_ci			CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
294162306a36Sopenharmony_ci			CEPH_CAP_ANY_DIR_OPS));
294262306a36Sopenharmony_ci	if (need) {
294362306a36Sopenharmony_ci		ret = ceph_pool_perm_check(inode, need);
294462306a36Sopenharmony_ci		if (ret < 0)
294562306a36Sopenharmony_ci			return ret;
294662306a36Sopenharmony_ci	}
294762306a36Sopenharmony_ci
294862306a36Sopenharmony_ci	flags = get_used_fmode(need | want);
294962306a36Sopenharmony_ci	if (nonblock)
295062306a36Sopenharmony_ci		flags |= NON_BLOCKING;
295162306a36Sopenharmony_ci
295262306a36Sopenharmony_ci	ret = try_get_cap_refs(inode, need, want, 0, flags, got);
295362306a36Sopenharmony_ci	/* three special error codes */
295462306a36Sopenharmony_ci	if (ret == -EAGAIN || ret == -EFBIG || ret == -EUCLEAN)
295562306a36Sopenharmony_ci		ret = 0;
295662306a36Sopenharmony_ci	return ret;
295762306a36Sopenharmony_ci}
295862306a36Sopenharmony_ci
295962306a36Sopenharmony_ci/*
296062306a36Sopenharmony_ci * Wait for caps, and take cap references.  If we can't get a WR cap
296162306a36Sopenharmony_ci * due to a small max_size, make sure we check_max_size (and possibly
296262306a36Sopenharmony_ci * ask the mds) so we don't get hung up indefinitely.
296362306a36Sopenharmony_ci */
296462306a36Sopenharmony_ciint __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
296562306a36Sopenharmony_ci		    int want, loff_t endoff, int *got)
296662306a36Sopenharmony_ci{
296762306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
296862306a36Sopenharmony_ci	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
296962306a36Sopenharmony_ci	int ret, _got, flags;
297062306a36Sopenharmony_ci
297162306a36Sopenharmony_ci	ret = ceph_pool_perm_check(inode, need);
297262306a36Sopenharmony_ci	if (ret < 0)
297362306a36Sopenharmony_ci		return ret;
297462306a36Sopenharmony_ci
297562306a36Sopenharmony_ci	if (fi && (fi->fmode & CEPH_FILE_MODE_WR) &&
297662306a36Sopenharmony_ci	    fi->filp_gen != READ_ONCE(fsc->filp_gen))
297762306a36Sopenharmony_ci		return -EBADF;
297862306a36Sopenharmony_ci
297962306a36Sopenharmony_ci	flags = get_used_fmode(need | want);
298062306a36Sopenharmony_ci
298162306a36Sopenharmony_ci	while (true) {
298262306a36Sopenharmony_ci		flags &= CEPH_FILE_MODE_MASK;
298362306a36Sopenharmony_ci		if (vfs_inode_has_locks(inode))
298462306a36Sopenharmony_ci			flags |= CHECK_FILELOCK;
298562306a36Sopenharmony_ci		_got = 0;
298662306a36Sopenharmony_ci		ret = try_get_cap_refs(inode, need, want, endoff,
298762306a36Sopenharmony_ci				       flags, &_got);
298862306a36Sopenharmony_ci		WARN_ON_ONCE(ret == -EAGAIN);
298962306a36Sopenharmony_ci		if (!ret) {
299062306a36Sopenharmony_ci			struct ceph_mds_client *mdsc = fsc->mdsc;
299162306a36Sopenharmony_ci			struct cap_wait cw;
299262306a36Sopenharmony_ci			DEFINE_WAIT_FUNC(wait, woken_wake_function);
299362306a36Sopenharmony_ci
299462306a36Sopenharmony_ci			cw.ino = ceph_ino(inode);
299562306a36Sopenharmony_ci			cw.tgid = current->tgid;
299662306a36Sopenharmony_ci			cw.need = need;
299762306a36Sopenharmony_ci			cw.want = want;
299862306a36Sopenharmony_ci
299962306a36Sopenharmony_ci			spin_lock(&mdsc->caps_list_lock);
300062306a36Sopenharmony_ci			list_add(&cw.list, &mdsc->cap_wait_list);
300162306a36Sopenharmony_ci			spin_unlock(&mdsc->caps_list_lock);
300262306a36Sopenharmony_ci
300362306a36Sopenharmony_ci			/* make sure used fmode not timeout */
300462306a36Sopenharmony_ci			ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS);
300562306a36Sopenharmony_ci			add_wait_queue(&ci->i_cap_wq, &wait);
300662306a36Sopenharmony_ci
300762306a36Sopenharmony_ci			flags |= NON_BLOCKING;
300862306a36Sopenharmony_ci			while (!(ret = try_get_cap_refs(inode, need, want,
300962306a36Sopenharmony_ci							endoff, flags, &_got))) {
301062306a36Sopenharmony_ci				if (signal_pending(current)) {
301162306a36Sopenharmony_ci					ret = -ERESTARTSYS;
301262306a36Sopenharmony_ci					break;
301362306a36Sopenharmony_ci				}
301462306a36Sopenharmony_ci				wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
301562306a36Sopenharmony_ci			}
301662306a36Sopenharmony_ci
301762306a36Sopenharmony_ci			remove_wait_queue(&ci->i_cap_wq, &wait);
301862306a36Sopenharmony_ci			ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS);
301962306a36Sopenharmony_ci
302062306a36Sopenharmony_ci			spin_lock(&mdsc->caps_list_lock);
302162306a36Sopenharmony_ci			list_del(&cw.list);
302262306a36Sopenharmony_ci			spin_unlock(&mdsc->caps_list_lock);
302362306a36Sopenharmony_ci
302462306a36Sopenharmony_ci			if (ret == -EAGAIN)
302562306a36Sopenharmony_ci				continue;
302662306a36Sopenharmony_ci		}
302762306a36Sopenharmony_ci
302862306a36Sopenharmony_ci		if (fi && (fi->fmode & CEPH_FILE_MODE_WR) &&
302962306a36Sopenharmony_ci		    fi->filp_gen != READ_ONCE(fsc->filp_gen)) {
303062306a36Sopenharmony_ci			if (ret >= 0 && _got)
303162306a36Sopenharmony_ci				ceph_put_cap_refs(ci, _got);
303262306a36Sopenharmony_ci			return -EBADF;
303362306a36Sopenharmony_ci		}
303462306a36Sopenharmony_ci
303562306a36Sopenharmony_ci		if (ret < 0) {
303662306a36Sopenharmony_ci			if (ret == -EFBIG || ret == -EUCLEAN) {
303762306a36Sopenharmony_ci				int ret2 = ceph_wait_on_async_create(inode);
303862306a36Sopenharmony_ci				if (ret2 < 0)
303962306a36Sopenharmony_ci					return ret2;
304062306a36Sopenharmony_ci			}
304162306a36Sopenharmony_ci			if (ret == -EFBIG) {
304262306a36Sopenharmony_ci				check_max_size(inode, endoff);
304362306a36Sopenharmony_ci				continue;
304462306a36Sopenharmony_ci			}
304562306a36Sopenharmony_ci			if (ret == -EUCLEAN) {
304662306a36Sopenharmony_ci				/* session was killed, try renew caps */
304762306a36Sopenharmony_ci				ret = ceph_renew_caps(inode, flags);
304862306a36Sopenharmony_ci				if (ret == 0)
304962306a36Sopenharmony_ci					continue;
305062306a36Sopenharmony_ci			}
305162306a36Sopenharmony_ci			return ret;
305262306a36Sopenharmony_ci		}
305362306a36Sopenharmony_ci
305462306a36Sopenharmony_ci		if (S_ISREG(ci->netfs.inode.i_mode) &&
305562306a36Sopenharmony_ci		    ceph_has_inline_data(ci) &&
305662306a36Sopenharmony_ci		    (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
305762306a36Sopenharmony_ci		    i_size_read(inode) > 0) {
305862306a36Sopenharmony_ci			struct page *page =
305962306a36Sopenharmony_ci				find_get_page(inode->i_mapping, 0);
306062306a36Sopenharmony_ci			if (page) {
306162306a36Sopenharmony_ci				bool uptodate = PageUptodate(page);
306262306a36Sopenharmony_ci
306362306a36Sopenharmony_ci				put_page(page);
306462306a36Sopenharmony_ci				if (uptodate)
306562306a36Sopenharmony_ci					break;
306662306a36Sopenharmony_ci			}
306762306a36Sopenharmony_ci			/*
306862306a36Sopenharmony_ci			 * drop cap refs first because getattr while
306962306a36Sopenharmony_ci			 * holding * caps refs can cause deadlock.
307062306a36Sopenharmony_ci			 */
307162306a36Sopenharmony_ci			ceph_put_cap_refs(ci, _got);
307262306a36Sopenharmony_ci			_got = 0;
307362306a36Sopenharmony_ci
307462306a36Sopenharmony_ci			/*
307562306a36Sopenharmony_ci			 * getattr request will bring inline data into
307662306a36Sopenharmony_ci			 * page cache
307762306a36Sopenharmony_ci			 */
307862306a36Sopenharmony_ci			ret = __ceph_do_getattr(inode, NULL,
307962306a36Sopenharmony_ci						CEPH_STAT_CAP_INLINE_DATA,
308062306a36Sopenharmony_ci						true);
308162306a36Sopenharmony_ci			if (ret < 0)
308262306a36Sopenharmony_ci				return ret;
308362306a36Sopenharmony_ci			continue;
308462306a36Sopenharmony_ci		}
308562306a36Sopenharmony_ci		break;
308662306a36Sopenharmony_ci	}
308762306a36Sopenharmony_ci	*got = _got;
308862306a36Sopenharmony_ci	return 0;
308962306a36Sopenharmony_ci}
309062306a36Sopenharmony_ci
309162306a36Sopenharmony_ciint ceph_get_caps(struct file *filp, int need, int want, loff_t endoff,
309262306a36Sopenharmony_ci		  int *got)
309362306a36Sopenharmony_ci{
309462306a36Sopenharmony_ci	struct ceph_file_info *fi = filp->private_data;
309562306a36Sopenharmony_ci	struct inode *inode = file_inode(filp);
309662306a36Sopenharmony_ci
309762306a36Sopenharmony_ci	return __ceph_get_caps(inode, fi, need, want, endoff, got);
309862306a36Sopenharmony_ci}
309962306a36Sopenharmony_ci
310062306a36Sopenharmony_ci/*
310162306a36Sopenharmony_ci * Take cap refs.  Caller must already know we hold at least one ref
310262306a36Sopenharmony_ci * on the caps in question or we don't know this is safe.
310362306a36Sopenharmony_ci */
310462306a36Sopenharmony_civoid ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
310562306a36Sopenharmony_ci{
310662306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
310762306a36Sopenharmony_ci	ceph_take_cap_refs(ci, caps, false);
310862306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
310962306a36Sopenharmony_ci}
311062306a36Sopenharmony_ci
311162306a36Sopenharmony_ci
311262306a36Sopenharmony_ci/*
311362306a36Sopenharmony_ci * drop cap_snap that is not associated with any snapshot.
311462306a36Sopenharmony_ci * we don't need to send FLUSHSNAP message for it.
311562306a36Sopenharmony_ci */
311662306a36Sopenharmony_cistatic int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
311762306a36Sopenharmony_ci				  struct ceph_cap_snap *capsnap)
311862306a36Sopenharmony_ci{
311962306a36Sopenharmony_ci	if (!capsnap->need_flush &&
312062306a36Sopenharmony_ci	    !capsnap->writing && !capsnap->dirty_pages) {
312162306a36Sopenharmony_ci		dout("dropping cap_snap %p follows %llu\n",
312262306a36Sopenharmony_ci		     capsnap, capsnap->follows);
312362306a36Sopenharmony_ci		BUG_ON(capsnap->cap_flush.tid > 0);
312462306a36Sopenharmony_ci		ceph_put_snap_context(capsnap->context);
312562306a36Sopenharmony_ci		if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps))
312662306a36Sopenharmony_ci			ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS;
312762306a36Sopenharmony_ci
312862306a36Sopenharmony_ci		list_del(&capsnap->ci_item);
312962306a36Sopenharmony_ci		ceph_put_cap_snap(capsnap);
313062306a36Sopenharmony_ci		return 1;
313162306a36Sopenharmony_ci	}
313262306a36Sopenharmony_ci	return 0;
313362306a36Sopenharmony_ci}
313462306a36Sopenharmony_ci
313562306a36Sopenharmony_cienum put_cap_refs_mode {
313662306a36Sopenharmony_ci	PUT_CAP_REFS_SYNC = 0,
313762306a36Sopenharmony_ci	PUT_CAP_REFS_NO_CHECK,
313862306a36Sopenharmony_ci	PUT_CAP_REFS_ASYNC,
313962306a36Sopenharmony_ci};
314062306a36Sopenharmony_ci
314162306a36Sopenharmony_ci/*
314262306a36Sopenharmony_ci * Release cap refs.
314362306a36Sopenharmony_ci *
314462306a36Sopenharmony_ci * If we released the last ref on any given cap, call ceph_check_caps
314562306a36Sopenharmony_ci * to release (or schedule a release).
314662306a36Sopenharmony_ci *
314762306a36Sopenharmony_ci * If we are releasing a WR cap (from a sync write), finalize any affected
314862306a36Sopenharmony_ci * cap_snap, and wake up any waiters.
314962306a36Sopenharmony_ci */
315062306a36Sopenharmony_cistatic void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
315162306a36Sopenharmony_ci				enum put_cap_refs_mode mode)
315262306a36Sopenharmony_ci{
315362306a36Sopenharmony_ci	struct inode *inode = &ci->netfs.inode;
315462306a36Sopenharmony_ci	int last = 0, put = 0, flushsnaps = 0, wake = 0;
315562306a36Sopenharmony_ci	bool check_flushsnaps = false;
315662306a36Sopenharmony_ci
315762306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
315862306a36Sopenharmony_ci	if (had & CEPH_CAP_PIN)
315962306a36Sopenharmony_ci		--ci->i_pin_ref;
316062306a36Sopenharmony_ci	if (had & CEPH_CAP_FILE_RD)
316162306a36Sopenharmony_ci		if (--ci->i_rd_ref == 0)
316262306a36Sopenharmony_ci			last++;
316362306a36Sopenharmony_ci	if (had & CEPH_CAP_FILE_CACHE)
316462306a36Sopenharmony_ci		if (--ci->i_rdcache_ref == 0)
316562306a36Sopenharmony_ci			last++;
316662306a36Sopenharmony_ci	if (had & CEPH_CAP_FILE_EXCL)
316762306a36Sopenharmony_ci		if (--ci->i_fx_ref == 0)
316862306a36Sopenharmony_ci			last++;
316962306a36Sopenharmony_ci	if (had & CEPH_CAP_FILE_BUFFER) {
317062306a36Sopenharmony_ci		if (--ci->i_wb_ref == 0) {
317162306a36Sopenharmony_ci			last++;
317262306a36Sopenharmony_ci			/* put the ref held by ceph_take_cap_refs() */
317362306a36Sopenharmony_ci			put++;
317462306a36Sopenharmony_ci			check_flushsnaps = true;
317562306a36Sopenharmony_ci		}
317662306a36Sopenharmony_ci		dout("put_cap_refs %p wb %d -> %d (?)\n",
317762306a36Sopenharmony_ci		     inode, ci->i_wb_ref+1, ci->i_wb_ref);
317862306a36Sopenharmony_ci	}
317962306a36Sopenharmony_ci	if (had & CEPH_CAP_FILE_WR) {
318062306a36Sopenharmony_ci		if (--ci->i_wr_ref == 0) {
318162306a36Sopenharmony_ci			/*
318262306a36Sopenharmony_ci			 * The Fb caps will always be took and released
318362306a36Sopenharmony_ci			 * together with the Fw caps.
318462306a36Sopenharmony_ci			 */
318562306a36Sopenharmony_ci			WARN_ON_ONCE(ci->i_wb_ref);
318662306a36Sopenharmony_ci
318762306a36Sopenharmony_ci			last++;
318862306a36Sopenharmony_ci			check_flushsnaps = true;
318962306a36Sopenharmony_ci			if (ci->i_wrbuffer_ref_head == 0 &&
319062306a36Sopenharmony_ci			    ci->i_dirty_caps == 0 &&
319162306a36Sopenharmony_ci			    ci->i_flushing_caps == 0) {
319262306a36Sopenharmony_ci				BUG_ON(!ci->i_head_snapc);
319362306a36Sopenharmony_ci				ceph_put_snap_context(ci->i_head_snapc);
319462306a36Sopenharmony_ci				ci->i_head_snapc = NULL;
319562306a36Sopenharmony_ci			}
319662306a36Sopenharmony_ci			/* see comment in __ceph_remove_cap() */
319762306a36Sopenharmony_ci			if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
319862306a36Sopenharmony_ci				ceph_change_snap_realm(inode, NULL);
319962306a36Sopenharmony_ci		}
320062306a36Sopenharmony_ci	}
320162306a36Sopenharmony_ci	if (check_flushsnaps && __ceph_have_pending_cap_snap(ci)) {
320262306a36Sopenharmony_ci		struct ceph_cap_snap *capsnap =
320362306a36Sopenharmony_ci			list_last_entry(&ci->i_cap_snaps,
320462306a36Sopenharmony_ci					struct ceph_cap_snap,
320562306a36Sopenharmony_ci					ci_item);
320662306a36Sopenharmony_ci
320762306a36Sopenharmony_ci		capsnap->writing = 0;
320862306a36Sopenharmony_ci		if (ceph_try_drop_cap_snap(ci, capsnap))
320962306a36Sopenharmony_ci			/* put the ref held by ceph_queue_cap_snap() */
321062306a36Sopenharmony_ci			put++;
321162306a36Sopenharmony_ci		else if (__ceph_finish_cap_snap(ci, capsnap))
321262306a36Sopenharmony_ci			flushsnaps = 1;
321362306a36Sopenharmony_ci		wake = 1;
321462306a36Sopenharmony_ci	}
321562306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
321662306a36Sopenharmony_ci
321762306a36Sopenharmony_ci	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
321862306a36Sopenharmony_ci	     last ? " last" : "", put ? " put" : "");
321962306a36Sopenharmony_ci
322062306a36Sopenharmony_ci	switch (mode) {
322162306a36Sopenharmony_ci	case PUT_CAP_REFS_SYNC:
322262306a36Sopenharmony_ci		if (last)
322362306a36Sopenharmony_ci			ceph_check_caps(ci, 0);
322462306a36Sopenharmony_ci		else if (flushsnaps)
322562306a36Sopenharmony_ci			ceph_flush_snaps(ci, NULL);
322662306a36Sopenharmony_ci		break;
322762306a36Sopenharmony_ci	case PUT_CAP_REFS_ASYNC:
322862306a36Sopenharmony_ci		if (last)
322962306a36Sopenharmony_ci			ceph_queue_check_caps(inode);
323062306a36Sopenharmony_ci		else if (flushsnaps)
323162306a36Sopenharmony_ci			ceph_queue_flush_snaps(inode);
323262306a36Sopenharmony_ci		break;
323362306a36Sopenharmony_ci	default:
323462306a36Sopenharmony_ci		break;
323562306a36Sopenharmony_ci	}
323662306a36Sopenharmony_ci	if (wake)
323762306a36Sopenharmony_ci		wake_up_all(&ci->i_cap_wq);
323862306a36Sopenharmony_ci	while (put-- > 0)
323962306a36Sopenharmony_ci		iput(inode);
324062306a36Sopenharmony_ci}
324162306a36Sopenharmony_ci
324262306a36Sopenharmony_civoid ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
324362306a36Sopenharmony_ci{
324462306a36Sopenharmony_ci	__ceph_put_cap_refs(ci, had, PUT_CAP_REFS_SYNC);
324562306a36Sopenharmony_ci}
324662306a36Sopenharmony_ci
324762306a36Sopenharmony_civoid ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had)
324862306a36Sopenharmony_ci{
324962306a36Sopenharmony_ci	__ceph_put_cap_refs(ci, had, PUT_CAP_REFS_ASYNC);
325062306a36Sopenharmony_ci}
325162306a36Sopenharmony_ci
325262306a36Sopenharmony_civoid ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had)
325362306a36Sopenharmony_ci{
325462306a36Sopenharmony_ci	__ceph_put_cap_refs(ci, had, PUT_CAP_REFS_NO_CHECK);
325562306a36Sopenharmony_ci}
325662306a36Sopenharmony_ci
325762306a36Sopenharmony_ci/*
325862306a36Sopenharmony_ci * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
325962306a36Sopenharmony_ci * context.  Adjust per-snap dirty page accounting as appropriate.
326062306a36Sopenharmony_ci * Once all dirty data for a cap_snap is flushed, flush snapped file
326162306a36Sopenharmony_ci * metadata back to the MDS.  If we dropped the last ref, call
326262306a36Sopenharmony_ci * ceph_check_caps.
326362306a36Sopenharmony_ci */
326462306a36Sopenharmony_civoid ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
326562306a36Sopenharmony_ci				struct ceph_snap_context *snapc)
326662306a36Sopenharmony_ci{
326762306a36Sopenharmony_ci	struct inode *inode = &ci->netfs.inode;
326862306a36Sopenharmony_ci	struct ceph_cap_snap *capsnap = NULL, *iter;
326962306a36Sopenharmony_ci	int put = 0;
327062306a36Sopenharmony_ci	bool last = false;
327162306a36Sopenharmony_ci	bool flush_snaps = false;
327262306a36Sopenharmony_ci	bool complete_capsnap = false;
327362306a36Sopenharmony_ci
327462306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
327562306a36Sopenharmony_ci	ci->i_wrbuffer_ref -= nr;
327662306a36Sopenharmony_ci	if (ci->i_wrbuffer_ref == 0) {
327762306a36Sopenharmony_ci		last = true;
327862306a36Sopenharmony_ci		put++;
327962306a36Sopenharmony_ci	}
328062306a36Sopenharmony_ci
328162306a36Sopenharmony_ci	if (ci->i_head_snapc == snapc) {
328262306a36Sopenharmony_ci		ci->i_wrbuffer_ref_head -= nr;
328362306a36Sopenharmony_ci		if (ci->i_wrbuffer_ref_head == 0 &&
328462306a36Sopenharmony_ci		    ci->i_wr_ref == 0 &&
328562306a36Sopenharmony_ci		    ci->i_dirty_caps == 0 &&
328662306a36Sopenharmony_ci		    ci->i_flushing_caps == 0) {
328762306a36Sopenharmony_ci			BUG_ON(!ci->i_head_snapc);
328862306a36Sopenharmony_ci			ceph_put_snap_context(ci->i_head_snapc);
328962306a36Sopenharmony_ci			ci->i_head_snapc = NULL;
329062306a36Sopenharmony_ci		}
329162306a36Sopenharmony_ci		dout("put_wrbuffer_cap_refs on %p head %d/%d -> %d/%d %s\n",
329262306a36Sopenharmony_ci		     inode,
329362306a36Sopenharmony_ci		     ci->i_wrbuffer_ref+nr, ci->i_wrbuffer_ref_head+nr,
329462306a36Sopenharmony_ci		     ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
329562306a36Sopenharmony_ci		     last ? " LAST" : "");
329662306a36Sopenharmony_ci	} else {
329762306a36Sopenharmony_ci		list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) {
329862306a36Sopenharmony_ci			if (iter->context == snapc) {
329962306a36Sopenharmony_ci				capsnap = iter;
330062306a36Sopenharmony_ci				break;
330162306a36Sopenharmony_ci			}
330262306a36Sopenharmony_ci		}
330362306a36Sopenharmony_ci
330462306a36Sopenharmony_ci		if (!capsnap) {
330562306a36Sopenharmony_ci			/*
330662306a36Sopenharmony_ci			 * The capsnap should already be removed when removing
330762306a36Sopenharmony_ci			 * auth cap in the case of a forced unmount.
330862306a36Sopenharmony_ci			 */
330962306a36Sopenharmony_ci			WARN_ON_ONCE(ci->i_auth_cap);
331062306a36Sopenharmony_ci			goto unlock;
331162306a36Sopenharmony_ci		}
331262306a36Sopenharmony_ci
331362306a36Sopenharmony_ci		capsnap->dirty_pages -= nr;
331462306a36Sopenharmony_ci		if (capsnap->dirty_pages == 0) {
331562306a36Sopenharmony_ci			complete_capsnap = true;
331662306a36Sopenharmony_ci			if (!capsnap->writing) {
331762306a36Sopenharmony_ci				if (ceph_try_drop_cap_snap(ci, capsnap)) {
331862306a36Sopenharmony_ci					put++;
331962306a36Sopenharmony_ci				} else {
332062306a36Sopenharmony_ci					ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS;
332162306a36Sopenharmony_ci					flush_snaps = true;
332262306a36Sopenharmony_ci				}
332362306a36Sopenharmony_ci			}
332462306a36Sopenharmony_ci		}
332562306a36Sopenharmony_ci		dout("put_wrbuffer_cap_refs on %p cap_snap %p "
332662306a36Sopenharmony_ci		     " snap %lld %d/%d -> %d/%d %s%s\n",
332762306a36Sopenharmony_ci		     inode, capsnap, capsnap->context->seq,
332862306a36Sopenharmony_ci		     ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
332962306a36Sopenharmony_ci		     ci->i_wrbuffer_ref, capsnap->dirty_pages,
333062306a36Sopenharmony_ci		     last ? " (wrbuffer last)" : "",
333162306a36Sopenharmony_ci		     complete_capsnap ? " (complete capsnap)" : "");
333262306a36Sopenharmony_ci	}
333362306a36Sopenharmony_ci
333462306a36Sopenharmony_ciunlock:
333562306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
333662306a36Sopenharmony_ci
333762306a36Sopenharmony_ci	if (last) {
333862306a36Sopenharmony_ci		ceph_check_caps(ci, 0);
333962306a36Sopenharmony_ci	} else if (flush_snaps) {
334062306a36Sopenharmony_ci		ceph_flush_snaps(ci, NULL);
334162306a36Sopenharmony_ci	}
334262306a36Sopenharmony_ci	if (complete_capsnap)
334362306a36Sopenharmony_ci		wake_up_all(&ci->i_cap_wq);
334462306a36Sopenharmony_ci	while (put-- > 0) {
334562306a36Sopenharmony_ci		iput(inode);
334662306a36Sopenharmony_ci	}
334762306a36Sopenharmony_ci}
334862306a36Sopenharmony_ci
334962306a36Sopenharmony_ci/*
335062306a36Sopenharmony_ci * Invalidate unlinked inode's aliases, so we can drop the inode ASAP.
335162306a36Sopenharmony_ci */
335262306a36Sopenharmony_cistatic void invalidate_aliases(struct inode *inode)
335362306a36Sopenharmony_ci{
335462306a36Sopenharmony_ci	struct dentry *dn, *prev = NULL;
335562306a36Sopenharmony_ci
335662306a36Sopenharmony_ci	dout("invalidate_aliases inode %p\n", inode);
335762306a36Sopenharmony_ci	d_prune_aliases(inode);
335862306a36Sopenharmony_ci	/*
335962306a36Sopenharmony_ci	 * For non-directory inode, d_find_alias() only returns
336062306a36Sopenharmony_ci	 * hashed dentry. After calling d_invalidate(), the
336162306a36Sopenharmony_ci	 * dentry becomes unhashed.
336262306a36Sopenharmony_ci	 *
336362306a36Sopenharmony_ci	 * For directory inode, d_find_alias() can return
336462306a36Sopenharmony_ci	 * unhashed dentry. But directory inode should have
336562306a36Sopenharmony_ci	 * one alias at most.
336662306a36Sopenharmony_ci	 */
336762306a36Sopenharmony_ci	while ((dn = d_find_alias(inode))) {
336862306a36Sopenharmony_ci		if (dn == prev) {
336962306a36Sopenharmony_ci			dput(dn);
337062306a36Sopenharmony_ci			break;
337162306a36Sopenharmony_ci		}
337262306a36Sopenharmony_ci		d_invalidate(dn);
337362306a36Sopenharmony_ci		if (prev)
337462306a36Sopenharmony_ci			dput(prev);
337562306a36Sopenharmony_ci		prev = dn;
337662306a36Sopenharmony_ci	}
337762306a36Sopenharmony_ci	if (prev)
337862306a36Sopenharmony_ci		dput(prev);
337962306a36Sopenharmony_ci}
338062306a36Sopenharmony_ci
338162306a36Sopenharmony_cistruct cap_extra_info {
338262306a36Sopenharmony_ci	struct ceph_string *pool_ns;
338362306a36Sopenharmony_ci	/* inline data */
338462306a36Sopenharmony_ci	u64 inline_version;
338562306a36Sopenharmony_ci	void *inline_data;
338662306a36Sopenharmony_ci	u32 inline_len;
338762306a36Sopenharmony_ci	/* dirstat */
338862306a36Sopenharmony_ci	bool dirstat_valid;
338962306a36Sopenharmony_ci	u64 nfiles;
339062306a36Sopenharmony_ci	u64 nsubdirs;
339162306a36Sopenharmony_ci	u64 change_attr;
339262306a36Sopenharmony_ci	/* currently issued */
339362306a36Sopenharmony_ci	int issued;
339462306a36Sopenharmony_ci	struct timespec64 btime;
339562306a36Sopenharmony_ci	u8 *fscrypt_auth;
339662306a36Sopenharmony_ci	u32 fscrypt_auth_len;
339762306a36Sopenharmony_ci	u64 fscrypt_file_size;
339862306a36Sopenharmony_ci};
339962306a36Sopenharmony_ci
340062306a36Sopenharmony_ci/*
340162306a36Sopenharmony_ci * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
340262306a36Sopenharmony_ci * actually be a revocation if it specifies a smaller cap set.)
340362306a36Sopenharmony_ci *
340462306a36Sopenharmony_ci * caller holds s_mutex and i_ceph_lock, we drop both.
340562306a36Sopenharmony_ci */
340662306a36Sopenharmony_cistatic void handle_cap_grant(struct inode *inode,
340762306a36Sopenharmony_ci			     struct ceph_mds_session *session,
340862306a36Sopenharmony_ci			     struct ceph_cap *cap,
340962306a36Sopenharmony_ci			     struct ceph_mds_caps *grant,
341062306a36Sopenharmony_ci			     struct ceph_buffer *xattr_buf,
341162306a36Sopenharmony_ci			     struct cap_extra_info *extra_info)
341262306a36Sopenharmony_ci	__releases(ci->i_ceph_lock)
341362306a36Sopenharmony_ci	__releases(session->s_mdsc->snap_rwsem)
341462306a36Sopenharmony_ci{
341562306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
341662306a36Sopenharmony_ci	int seq = le32_to_cpu(grant->seq);
341762306a36Sopenharmony_ci	int newcaps = le32_to_cpu(grant->caps);
341862306a36Sopenharmony_ci	int used, wanted, dirty;
341962306a36Sopenharmony_ci	u64 size = le64_to_cpu(grant->size);
342062306a36Sopenharmony_ci	u64 max_size = le64_to_cpu(grant->max_size);
342162306a36Sopenharmony_ci	unsigned char check_caps = 0;
342262306a36Sopenharmony_ci	bool was_stale = cap->cap_gen < atomic_read(&session->s_cap_gen);
342362306a36Sopenharmony_ci	bool wake = false;
342462306a36Sopenharmony_ci	bool writeback = false;
342562306a36Sopenharmony_ci	bool queue_trunc = false;
342662306a36Sopenharmony_ci	bool queue_invalidate = false;
342762306a36Sopenharmony_ci	bool deleted_inode = false;
342862306a36Sopenharmony_ci	bool fill_inline = false;
342962306a36Sopenharmony_ci
343062306a36Sopenharmony_ci	/*
343162306a36Sopenharmony_ci	 * If there is at least one crypto block then we'll trust
343262306a36Sopenharmony_ci	 * fscrypt_file_size. If the real length of the file is 0, then
343362306a36Sopenharmony_ci	 * ignore it (it has probably been truncated down to 0 by the MDS).
343462306a36Sopenharmony_ci	 */
343562306a36Sopenharmony_ci	if (IS_ENCRYPTED(inode) && size)
343662306a36Sopenharmony_ci		size = extra_info->fscrypt_file_size;
343762306a36Sopenharmony_ci
343862306a36Sopenharmony_ci	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
343962306a36Sopenharmony_ci	     inode, cap, session->s_mds, seq, ceph_cap_string(newcaps));
344062306a36Sopenharmony_ci	dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
344162306a36Sopenharmony_ci		i_size_read(inode));
344262306a36Sopenharmony_ci
344362306a36Sopenharmony_ci
344462306a36Sopenharmony_ci	/*
344562306a36Sopenharmony_ci	 * If CACHE is being revoked, and we have no dirty buffers,
344662306a36Sopenharmony_ci	 * try to invalidate (once).  (If there are dirty buffers, we
344762306a36Sopenharmony_ci	 * will invalidate _after_ writeback.)
344862306a36Sopenharmony_ci	 */
344962306a36Sopenharmony_ci	if (S_ISREG(inode->i_mode) && /* don't invalidate readdir cache */
345062306a36Sopenharmony_ci	    ((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
345162306a36Sopenharmony_ci	    (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
345262306a36Sopenharmony_ci	    !(ci->i_wrbuffer_ref || ci->i_wb_ref)) {
345362306a36Sopenharmony_ci		if (try_nonblocking_invalidate(inode)) {
345462306a36Sopenharmony_ci			/* there were locked pages.. invalidate later
345562306a36Sopenharmony_ci			   in a separate thread. */
345662306a36Sopenharmony_ci			if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
345762306a36Sopenharmony_ci				queue_invalidate = true;
345862306a36Sopenharmony_ci				ci->i_rdcache_revoking = ci->i_rdcache_gen;
345962306a36Sopenharmony_ci			}
346062306a36Sopenharmony_ci		}
346162306a36Sopenharmony_ci	}
346262306a36Sopenharmony_ci
346362306a36Sopenharmony_ci	if (was_stale)
346462306a36Sopenharmony_ci		cap->issued = cap->implemented = CEPH_CAP_PIN;
346562306a36Sopenharmony_ci
346662306a36Sopenharmony_ci	/*
346762306a36Sopenharmony_ci	 * auth mds of the inode changed. we received the cap export message,
346862306a36Sopenharmony_ci	 * but still haven't received the cap import message. handle_cap_export
346962306a36Sopenharmony_ci	 * updated the new auth MDS' cap.
347062306a36Sopenharmony_ci	 *
347162306a36Sopenharmony_ci	 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
347262306a36Sopenharmony_ci	 * that was sent before the cap import message. So don't remove caps.
347362306a36Sopenharmony_ci	 */
347462306a36Sopenharmony_ci	if (ceph_seq_cmp(seq, cap->seq) <= 0) {
347562306a36Sopenharmony_ci		WARN_ON(cap != ci->i_auth_cap);
347662306a36Sopenharmony_ci		WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
347762306a36Sopenharmony_ci		seq = cap->seq;
347862306a36Sopenharmony_ci		newcaps |= cap->issued;
347962306a36Sopenharmony_ci	}
348062306a36Sopenharmony_ci
348162306a36Sopenharmony_ci	/* side effects now are allowed */
348262306a36Sopenharmony_ci	cap->cap_gen = atomic_read(&session->s_cap_gen);
348362306a36Sopenharmony_ci	cap->seq = seq;
348462306a36Sopenharmony_ci
348562306a36Sopenharmony_ci	__check_cap_issue(ci, cap, newcaps);
348662306a36Sopenharmony_ci
348762306a36Sopenharmony_ci	inode_set_max_iversion_raw(inode, extra_info->change_attr);
348862306a36Sopenharmony_ci
348962306a36Sopenharmony_ci	if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
349062306a36Sopenharmony_ci	    (extra_info->issued & CEPH_CAP_AUTH_EXCL) == 0) {
349162306a36Sopenharmony_ci		umode_t mode = le32_to_cpu(grant->mode);
349262306a36Sopenharmony_ci
349362306a36Sopenharmony_ci		if (inode_wrong_type(inode, mode))
349462306a36Sopenharmony_ci			pr_warn_once("inode type changed! (ino %llx.%llx is 0%o, mds says 0%o)\n",
349562306a36Sopenharmony_ci				     ceph_vinop(inode), inode->i_mode, mode);
349662306a36Sopenharmony_ci		else
349762306a36Sopenharmony_ci			inode->i_mode = mode;
349862306a36Sopenharmony_ci		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
349962306a36Sopenharmony_ci		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
350062306a36Sopenharmony_ci		ci->i_btime = extra_info->btime;
350162306a36Sopenharmony_ci		dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
350262306a36Sopenharmony_ci		     from_kuid(&init_user_ns, inode->i_uid),
350362306a36Sopenharmony_ci		     from_kgid(&init_user_ns, inode->i_gid));
350462306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
350562306a36Sopenharmony_ci		if (ci->fscrypt_auth_len != extra_info->fscrypt_auth_len ||
350662306a36Sopenharmony_ci		    memcmp(ci->fscrypt_auth, extra_info->fscrypt_auth,
350762306a36Sopenharmony_ci			   ci->fscrypt_auth_len))
350862306a36Sopenharmony_ci			pr_warn_ratelimited("%s: cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n",
350962306a36Sopenharmony_ci				__func__, ci->fscrypt_auth_len,
351062306a36Sopenharmony_ci				extra_info->fscrypt_auth_len);
351162306a36Sopenharmony_ci#endif
351262306a36Sopenharmony_ci	}
351362306a36Sopenharmony_ci
351462306a36Sopenharmony_ci	if ((newcaps & CEPH_CAP_LINK_SHARED) &&
351562306a36Sopenharmony_ci	    (extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) {
351662306a36Sopenharmony_ci		set_nlink(inode, le32_to_cpu(grant->nlink));
351762306a36Sopenharmony_ci		if (inode->i_nlink == 0)
351862306a36Sopenharmony_ci			deleted_inode = true;
351962306a36Sopenharmony_ci	}
352062306a36Sopenharmony_ci
352162306a36Sopenharmony_ci	if ((extra_info->issued & CEPH_CAP_XATTR_EXCL) == 0 &&
352262306a36Sopenharmony_ci	    grant->xattr_len) {
352362306a36Sopenharmony_ci		int len = le32_to_cpu(grant->xattr_len);
352462306a36Sopenharmony_ci		u64 version = le64_to_cpu(grant->xattr_version);
352562306a36Sopenharmony_ci
352662306a36Sopenharmony_ci		if (version > ci->i_xattrs.version) {
352762306a36Sopenharmony_ci			dout(" got new xattrs v%llu on %p len %d\n",
352862306a36Sopenharmony_ci			     version, inode, len);
352962306a36Sopenharmony_ci			if (ci->i_xattrs.blob)
353062306a36Sopenharmony_ci				ceph_buffer_put(ci->i_xattrs.blob);
353162306a36Sopenharmony_ci			ci->i_xattrs.blob = ceph_buffer_get(xattr_buf);
353262306a36Sopenharmony_ci			ci->i_xattrs.version = version;
353362306a36Sopenharmony_ci			ceph_forget_all_cached_acls(inode);
353462306a36Sopenharmony_ci			ceph_security_invalidate_secctx(inode);
353562306a36Sopenharmony_ci		}
353662306a36Sopenharmony_ci	}
353762306a36Sopenharmony_ci
353862306a36Sopenharmony_ci	if (newcaps & CEPH_CAP_ANY_RD) {
353962306a36Sopenharmony_ci		struct timespec64 mtime, atime, ctime;
354062306a36Sopenharmony_ci		/* ctime/mtime/atime? */
354162306a36Sopenharmony_ci		ceph_decode_timespec64(&mtime, &grant->mtime);
354262306a36Sopenharmony_ci		ceph_decode_timespec64(&atime, &grant->atime);
354362306a36Sopenharmony_ci		ceph_decode_timespec64(&ctime, &grant->ctime);
354462306a36Sopenharmony_ci		ceph_fill_file_time(inode, extra_info->issued,
354562306a36Sopenharmony_ci				    le32_to_cpu(grant->time_warp_seq),
354662306a36Sopenharmony_ci				    &ctime, &mtime, &atime);
354762306a36Sopenharmony_ci	}
354862306a36Sopenharmony_ci
354962306a36Sopenharmony_ci	if ((newcaps & CEPH_CAP_FILE_SHARED) && extra_info->dirstat_valid) {
355062306a36Sopenharmony_ci		ci->i_files = extra_info->nfiles;
355162306a36Sopenharmony_ci		ci->i_subdirs = extra_info->nsubdirs;
355262306a36Sopenharmony_ci	}
355362306a36Sopenharmony_ci
355462306a36Sopenharmony_ci	if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
355562306a36Sopenharmony_ci		/* file layout may have changed */
355662306a36Sopenharmony_ci		s64 old_pool = ci->i_layout.pool_id;
355762306a36Sopenharmony_ci		struct ceph_string *old_ns;
355862306a36Sopenharmony_ci
355962306a36Sopenharmony_ci		ceph_file_layout_from_legacy(&ci->i_layout, &grant->layout);
356062306a36Sopenharmony_ci		old_ns = rcu_dereference_protected(ci->i_layout.pool_ns,
356162306a36Sopenharmony_ci					lockdep_is_held(&ci->i_ceph_lock));
356262306a36Sopenharmony_ci		rcu_assign_pointer(ci->i_layout.pool_ns, extra_info->pool_ns);
356362306a36Sopenharmony_ci
356462306a36Sopenharmony_ci		if (ci->i_layout.pool_id != old_pool ||
356562306a36Sopenharmony_ci		    extra_info->pool_ns != old_ns)
356662306a36Sopenharmony_ci			ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
356762306a36Sopenharmony_ci
356862306a36Sopenharmony_ci		extra_info->pool_ns = old_ns;
356962306a36Sopenharmony_ci
357062306a36Sopenharmony_ci		/* size/truncate_seq? */
357162306a36Sopenharmony_ci		queue_trunc = ceph_fill_file_size(inode, extra_info->issued,
357262306a36Sopenharmony_ci					le32_to_cpu(grant->truncate_seq),
357362306a36Sopenharmony_ci					le64_to_cpu(grant->truncate_size),
357462306a36Sopenharmony_ci					size);
357562306a36Sopenharmony_ci	}
357662306a36Sopenharmony_ci
357762306a36Sopenharmony_ci	if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) {
357862306a36Sopenharmony_ci		if (max_size != ci->i_max_size) {
357962306a36Sopenharmony_ci			dout("max_size %lld -> %llu\n",
358062306a36Sopenharmony_ci			     ci->i_max_size, max_size);
358162306a36Sopenharmony_ci			ci->i_max_size = max_size;
358262306a36Sopenharmony_ci			if (max_size >= ci->i_wanted_max_size) {
358362306a36Sopenharmony_ci				ci->i_wanted_max_size = 0;  /* reset */
358462306a36Sopenharmony_ci				ci->i_requested_max_size = 0;
358562306a36Sopenharmony_ci			}
358662306a36Sopenharmony_ci			wake = true;
358762306a36Sopenharmony_ci		}
358862306a36Sopenharmony_ci	}
358962306a36Sopenharmony_ci
359062306a36Sopenharmony_ci	/* check cap bits */
359162306a36Sopenharmony_ci	wanted = __ceph_caps_wanted(ci);
359262306a36Sopenharmony_ci	used = __ceph_caps_used(ci);
359362306a36Sopenharmony_ci	dirty = __ceph_caps_dirty(ci);
359462306a36Sopenharmony_ci	dout(" my wanted = %s, used = %s, dirty %s\n",
359562306a36Sopenharmony_ci	     ceph_cap_string(wanted),
359662306a36Sopenharmony_ci	     ceph_cap_string(used),
359762306a36Sopenharmony_ci	     ceph_cap_string(dirty));
359862306a36Sopenharmony_ci
359962306a36Sopenharmony_ci	if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) &&
360062306a36Sopenharmony_ci	    (wanted & ~(cap->mds_wanted | newcaps))) {
360162306a36Sopenharmony_ci		/*
360262306a36Sopenharmony_ci		 * If mds is importing cap, prior cap messages that update
360362306a36Sopenharmony_ci		 * 'wanted' may get dropped by mds (migrate seq mismatch).
360462306a36Sopenharmony_ci		 *
360562306a36Sopenharmony_ci		 * We don't send cap message to update 'wanted' if what we
360662306a36Sopenharmony_ci		 * want are already issued. If mds revokes caps, cap message
360762306a36Sopenharmony_ci		 * that releases caps also tells mds what we want. But if
360862306a36Sopenharmony_ci		 * caps got revoked by mds forcedly (session stale). We may
360962306a36Sopenharmony_ci		 * haven't told mds what we want.
361062306a36Sopenharmony_ci		 */
361162306a36Sopenharmony_ci		check_caps = 1;
361262306a36Sopenharmony_ci	}
361362306a36Sopenharmony_ci
361462306a36Sopenharmony_ci	/* revocation, grant, or no-op? */
361562306a36Sopenharmony_ci	if (cap->issued & ~newcaps) {
361662306a36Sopenharmony_ci		int revoking = cap->issued & ~newcaps;
361762306a36Sopenharmony_ci
361862306a36Sopenharmony_ci		dout("revocation: %s -> %s (revoking %s)\n",
361962306a36Sopenharmony_ci		     ceph_cap_string(cap->issued),
362062306a36Sopenharmony_ci		     ceph_cap_string(newcaps),
362162306a36Sopenharmony_ci		     ceph_cap_string(revoking));
362262306a36Sopenharmony_ci		if (S_ISREG(inode->i_mode) &&
362362306a36Sopenharmony_ci		    (revoking & used & CEPH_CAP_FILE_BUFFER))
362462306a36Sopenharmony_ci			writeback = true;  /* initiate writeback; will delay ack */
362562306a36Sopenharmony_ci		else if (queue_invalidate &&
362662306a36Sopenharmony_ci			 revoking == CEPH_CAP_FILE_CACHE &&
362762306a36Sopenharmony_ci			 (newcaps & CEPH_CAP_FILE_LAZYIO) == 0)
362862306a36Sopenharmony_ci			; /* do nothing yet, invalidation will be queued */
362962306a36Sopenharmony_ci		else if (cap == ci->i_auth_cap)
363062306a36Sopenharmony_ci			check_caps = 1; /* check auth cap only */
363162306a36Sopenharmony_ci		else
363262306a36Sopenharmony_ci			check_caps = 2; /* check all caps */
363362306a36Sopenharmony_ci		/* If there is new caps, try to wake up the waiters */
363462306a36Sopenharmony_ci		if (~cap->issued & newcaps)
363562306a36Sopenharmony_ci			wake = true;
363662306a36Sopenharmony_ci		cap->issued = newcaps;
363762306a36Sopenharmony_ci		cap->implemented |= newcaps;
363862306a36Sopenharmony_ci	} else if (cap->issued == newcaps) {
363962306a36Sopenharmony_ci		dout("caps unchanged: %s -> %s\n",
364062306a36Sopenharmony_ci		     ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
364162306a36Sopenharmony_ci	} else {
364262306a36Sopenharmony_ci		dout("grant: %s -> %s\n", ceph_cap_string(cap->issued),
364362306a36Sopenharmony_ci		     ceph_cap_string(newcaps));
364462306a36Sopenharmony_ci		/* non-auth MDS is revoking the newly grant caps ? */
364562306a36Sopenharmony_ci		if (cap == ci->i_auth_cap &&
364662306a36Sopenharmony_ci		    __ceph_caps_revoking_other(ci, cap, newcaps))
364762306a36Sopenharmony_ci		    check_caps = 2;
364862306a36Sopenharmony_ci
364962306a36Sopenharmony_ci		cap->issued = newcaps;
365062306a36Sopenharmony_ci		cap->implemented |= newcaps; /* add bits only, to
365162306a36Sopenharmony_ci					      * avoid stepping on a
365262306a36Sopenharmony_ci					      * pending revocation */
365362306a36Sopenharmony_ci		wake = true;
365462306a36Sopenharmony_ci	}
365562306a36Sopenharmony_ci	BUG_ON(cap->issued & ~cap->implemented);
365662306a36Sopenharmony_ci
365762306a36Sopenharmony_ci	/* don't let check_caps skip sending a response to MDS for revoke msgs */
365862306a36Sopenharmony_ci	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
365962306a36Sopenharmony_ci		cap->mds_wanted = 0;
366062306a36Sopenharmony_ci		if (cap == ci->i_auth_cap)
366162306a36Sopenharmony_ci			check_caps = 1; /* check auth cap only */
366262306a36Sopenharmony_ci		else
366362306a36Sopenharmony_ci			check_caps = 2; /* check all caps */
366462306a36Sopenharmony_ci	}
366562306a36Sopenharmony_ci
366662306a36Sopenharmony_ci	if (extra_info->inline_version > 0 &&
366762306a36Sopenharmony_ci	    extra_info->inline_version >= ci->i_inline_version) {
366862306a36Sopenharmony_ci		ci->i_inline_version = extra_info->inline_version;
366962306a36Sopenharmony_ci		if (ci->i_inline_version != CEPH_INLINE_NONE &&
367062306a36Sopenharmony_ci		    (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
367162306a36Sopenharmony_ci			fill_inline = true;
367262306a36Sopenharmony_ci	}
367362306a36Sopenharmony_ci
367462306a36Sopenharmony_ci	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
367562306a36Sopenharmony_ci		if (ci->i_auth_cap == cap) {
367662306a36Sopenharmony_ci			if (newcaps & ~extra_info->issued)
367762306a36Sopenharmony_ci				wake = true;
367862306a36Sopenharmony_ci
367962306a36Sopenharmony_ci			if (ci->i_requested_max_size > max_size ||
368062306a36Sopenharmony_ci			    !(le32_to_cpu(grant->wanted) & CEPH_CAP_ANY_FILE_WR)) {
368162306a36Sopenharmony_ci				/* re-request max_size if necessary */
368262306a36Sopenharmony_ci				ci->i_requested_max_size = 0;
368362306a36Sopenharmony_ci				wake = true;
368462306a36Sopenharmony_ci			}
368562306a36Sopenharmony_ci
368662306a36Sopenharmony_ci			ceph_kick_flushing_inode_caps(session, ci);
368762306a36Sopenharmony_ci		}
368862306a36Sopenharmony_ci		up_read(&session->s_mdsc->snap_rwsem);
368962306a36Sopenharmony_ci	}
369062306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
369162306a36Sopenharmony_ci
369262306a36Sopenharmony_ci	if (fill_inline)
369362306a36Sopenharmony_ci		ceph_fill_inline_data(inode, NULL, extra_info->inline_data,
369462306a36Sopenharmony_ci				      extra_info->inline_len);
369562306a36Sopenharmony_ci
369662306a36Sopenharmony_ci	if (queue_trunc)
369762306a36Sopenharmony_ci		ceph_queue_vmtruncate(inode);
369862306a36Sopenharmony_ci
369962306a36Sopenharmony_ci	if (writeback)
370062306a36Sopenharmony_ci		/*
370162306a36Sopenharmony_ci		 * queue inode for writeback: we can't actually call
370262306a36Sopenharmony_ci		 * filemap_write_and_wait, etc. from message handler
370362306a36Sopenharmony_ci		 * context.
370462306a36Sopenharmony_ci		 */
370562306a36Sopenharmony_ci		ceph_queue_writeback(inode);
370662306a36Sopenharmony_ci	if (queue_invalidate)
370762306a36Sopenharmony_ci		ceph_queue_invalidate(inode);
370862306a36Sopenharmony_ci	if (deleted_inode)
370962306a36Sopenharmony_ci		invalidate_aliases(inode);
371062306a36Sopenharmony_ci	if (wake)
371162306a36Sopenharmony_ci		wake_up_all(&ci->i_cap_wq);
371262306a36Sopenharmony_ci
371362306a36Sopenharmony_ci	mutex_unlock(&session->s_mutex);
371462306a36Sopenharmony_ci	if (check_caps == 1)
371562306a36Sopenharmony_ci		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
371662306a36Sopenharmony_ci	else if (check_caps == 2)
371762306a36Sopenharmony_ci		ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
371862306a36Sopenharmony_ci}
371962306a36Sopenharmony_ci
372062306a36Sopenharmony_ci/*
372162306a36Sopenharmony_ci * Handle FLUSH_ACK from MDS, indicating that metadata we sent to the
372262306a36Sopenharmony_ci * MDS has been safely committed.
372362306a36Sopenharmony_ci */
372462306a36Sopenharmony_cistatic void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
372562306a36Sopenharmony_ci				 struct ceph_mds_caps *m,
372662306a36Sopenharmony_ci				 struct ceph_mds_session *session,
372762306a36Sopenharmony_ci				 struct ceph_cap *cap)
372862306a36Sopenharmony_ci	__releases(ci->i_ceph_lock)
372962306a36Sopenharmony_ci{
373062306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
373162306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
373262306a36Sopenharmony_ci	struct ceph_cap_flush *cf, *tmp_cf;
373362306a36Sopenharmony_ci	LIST_HEAD(to_remove);
373462306a36Sopenharmony_ci	unsigned seq = le32_to_cpu(m->seq);
373562306a36Sopenharmony_ci	int dirty = le32_to_cpu(m->dirty);
373662306a36Sopenharmony_ci	int cleaned = 0;
373762306a36Sopenharmony_ci	bool drop = false;
373862306a36Sopenharmony_ci	bool wake_ci = false;
373962306a36Sopenharmony_ci	bool wake_mdsc = false;
374062306a36Sopenharmony_ci
374162306a36Sopenharmony_ci	list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
374262306a36Sopenharmony_ci		/* Is this the one that was flushed? */
374362306a36Sopenharmony_ci		if (cf->tid == flush_tid)
374462306a36Sopenharmony_ci			cleaned = cf->caps;
374562306a36Sopenharmony_ci
374662306a36Sopenharmony_ci		/* Is this a capsnap? */
374762306a36Sopenharmony_ci		if (cf->is_capsnap)
374862306a36Sopenharmony_ci			continue;
374962306a36Sopenharmony_ci
375062306a36Sopenharmony_ci		if (cf->tid <= flush_tid) {
375162306a36Sopenharmony_ci			/*
375262306a36Sopenharmony_ci			 * An earlier or current tid. The FLUSH_ACK should
375362306a36Sopenharmony_ci			 * represent a superset of this flush's caps.
375462306a36Sopenharmony_ci			 */
375562306a36Sopenharmony_ci			wake_ci |= __detach_cap_flush_from_ci(ci, cf);
375662306a36Sopenharmony_ci			list_add_tail(&cf->i_list, &to_remove);
375762306a36Sopenharmony_ci		} else {
375862306a36Sopenharmony_ci			/*
375962306a36Sopenharmony_ci			 * This is a later one. Any caps in it are still dirty
376062306a36Sopenharmony_ci			 * so don't count them as cleaned.
376162306a36Sopenharmony_ci			 */
376262306a36Sopenharmony_ci			cleaned &= ~cf->caps;
376362306a36Sopenharmony_ci			if (!cleaned)
376462306a36Sopenharmony_ci				break;
376562306a36Sopenharmony_ci		}
376662306a36Sopenharmony_ci	}
376762306a36Sopenharmony_ci
376862306a36Sopenharmony_ci	dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s,"
376962306a36Sopenharmony_ci	     " flushing %s -> %s\n",
377062306a36Sopenharmony_ci	     inode, session->s_mds, seq, ceph_cap_string(dirty),
377162306a36Sopenharmony_ci	     ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps),
377262306a36Sopenharmony_ci	     ceph_cap_string(ci->i_flushing_caps & ~cleaned));
377362306a36Sopenharmony_ci
377462306a36Sopenharmony_ci	if (list_empty(&to_remove) && !cleaned)
377562306a36Sopenharmony_ci		goto out;
377662306a36Sopenharmony_ci
377762306a36Sopenharmony_ci	ci->i_flushing_caps &= ~cleaned;
377862306a36Sopenharmony_ci
377962306a36Sopenharmony_ci	spin_lock(&mdsc->cap_dirty_lock);
378062306a36Sopenharmony_ci
378162306a36Sopenharmony_ci	list_for_each_entry(cf, &to_remove, i_list)
378262306a36Sopenharmony_ci		wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc, cf);
378362306a36Sopenharmony_ci
378462306a36Sopenharmony_ci	if (ci->i_flushing_caps == 0) {
378562306a36Sopenharmony_ci		if (list_empty(&ci->i_cap_flush_list)) {
378662306a36Sopenharmony_ci			list_del_init(&ci->i_flushing_item);
378762306a36Sopenharmony_ci			if (!list_empty(&session->s_cap_flushing)) {
378862306a36Sopenharmony_ci				dout(" mds%d still flushing cap on %p\n",
378962306a36Sopenharmony_ci				     session->s_mds,
379062306a36Sopenharmony_ci				     &list_first_entry(&session->s_cap_flushing,
379162306a36Sopenharmony_ci						struct ceph_inode_info,
379262306a36Sopenharmony_ci						i_flushing_item)->netfs.inode);
379362306a36Sopenharmony_ci			}
379462306a36Sopenharmony_ci		}
379562306a36Sopenharmony_ci		mdsc->num_cap_flushing--;
379662306a36Sopenharmony_ci		dout(" inode %p now !flushing\n", inode);
379762306a36Sopenharmony_ci
379862306a36Sopenharmony_ci		if (ci->i_dirty_caps == 0) {
379962306a36Sopenharmony_ci			dout(" inode %p now clean\n", inode);
380062306a36Sopenharmony_ci			BUG_ON(!list_empty(&ci->i_dirty_item));
380162306a36Sopenharmony_ci			drop = true;
380262306a36Sopenharmony_ci			if (ci->i_wr_ref == 0 &&
380362306a36Sopenharmony_ci			    ci->i_wrbuffer_ref_head == 0) {
380462306a36Sopenharmony_ci				BUG_ON(!ci->i_head_snapc);
380562306a36Sopenharmony_ci				ceph_put_snap_context(ci->i_head_snapc);
380662306a36Sopenharmony_ci				ci->i_head_snapc = NULL;
380762306a36Sopenharmony_ci			}
380862306a36Sopenharmony_ci		} else {
380962306a36Sopenharmony_ci			BUG_ON(list_empty(&ci->i_dirty_item));
381062306a36Sopenharmony_ci		}
381162306a36Sopenharmony_ci	}
381262306a36Sopenharmony_ci	spin_unlock(&mdsc->cap_dirty_lock);
381362306a36Sopenharmony_ci
381462306a36Sopenharmony_ciout:
381562306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
381662306a36Sopenharmony_ci
381762306a36Sopenharmony_ci	while (!list_empty(&to_remove)) {
381862306a36Sopenharmony_ci		cf = list_first_entry(&to_remove,
381962306a36Sopenharmony_ci				      struct ceph_cap_flush, i_list);
382062306a36Sopenharmony_ci		list_del_init(&cf->i_list);
382162306a36Sopenharmony_ci		if (!cf->is_capsnap)
382262306a36Sopenharmony_ci			ceph_free_cap_flush(cf);
382362306a36Sopenharmony_ci	}
382462306a36Sopenharmony_ci
382562306a36Sopenharmony_ci	if (wake_ci)
382662306a36Sopenharmony_ci		wake_up_all(&ci->i_cap_wq);
382762306a36Sopenharmony_ci	if (wake_mdsc)
382862306a36Sopenharmony_ci		wake_up_all(&mdsc->cap_flushing_wq);
382962306a36Sopenharmony_ci	if (drop)
383062306a36Sopenharmony_ci		iput(inode);
383162306a36Sopenharmony_ci}
383262306a36Sopenharmony_ci
383362306a36Sopenharmony_civoid __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
383462306a36Sopenharmony_ci			   bool *wake_ci, bool *wake_mdsc)
383562306a36Sopenharmony_ci{
383662306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
383762306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
383862306a36Sopenharmony_ci	bool ret;
383962306a36Sopenharmony_ci
384062306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
384162306a36Sopenharmony_ci
384262306a36Sopenharmony_ci	dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
384362306a36Sopenharmony_ci
384462306a36Sopenharmony_ci	list_del_init(&capsnap->ci_item);
384562306a36Sopenharmony_ci	ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
384662306a36Sopenharmony_ci	if (wake_ci)
384762306a36Sopenharmony_ci		*wake_ci = ret;
384862306a36Sopenharmony_ci
384962306a36Sopenharmony_ci	spin_lock(&mdsc->cap_dirty_lock);
385062306a36Sopenharmony_ci	if (list_empty(&ci->i_cap_flush_list))
385162306a36Sopenharmony_ci		list_del_init(&ci->i_flushing_item);
385262306a36Sopenharmony_ci
385362306a36Sopenharmony_ci	ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
385462306a36Sopenharmony_ci	if (wake_mdsc)
385562306a36Sopenharmony_ci		*wake_mdsc = ret;
385662306a36Sopenharmony_ci	spin_unlock(&mdsc->cap_dirty_lock);
385762306a36Sopenharmony_ci}
385862306a36Sopenharmony_ci
385962306a36Sopenharmony_civoid ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
386062306a36Sopenharmony_ci			 bool *wake_ci, bool *wake_mdsc)
386162306a36Sopenharmony_ci{
386262306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
386362306a36Sopenharmony_ci
386462306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
386562306a36Sopenharmony_ci
386662306a36Sopenharmony_ci	WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing);
386762306a36Sopenharmony_ci	__ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc);
386862306a36Sopenharmony_ci}
386962306a36Sopenharmony_ci
387062306a36Sopenharmony_ci/*
387162306a36Sopenharmony_ci * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
387262306a36Sopenharmony_ci * throw away our cap_snap.
387362306a36Sopenharmony_ci *
387462306a36Sopenharmony_ci * Caller hold s_mutex.
387562306a36Sopenharmony_ci */
387662306a36Sopenharmony_cistatic void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
387762306a36Sopenharmony_ci				     struct ceph_mds_caps *m,
387862306a36Sopenharmony_ci				     struct ceph_mds_session *session)
387962306a36Sopenharmony_ci{
388062306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
388162306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
388262306a36Sopenharmony_ci	u64 follows = le64_to_cpu(m->snap_follows);
388362306a36Sopenharmony_ci	struct ceph_cap_snap *capsnap = NULL, *iter;
388462306a36Sopenharmony_ci	bool wake_ci = false;
388562306a36Sopenharmony_ci	bool wake_mdsc = false;
388662306a36Sopenharmony_ci
388762306a36Sopenharmony_ci	dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
388862306a36Sopenharmony_ci	     inode, ci, session->s_mds, follows);
388962306a36Sopenharmony_ci
389062306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
389162306a36Sopenharmony_ci	list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) {
389262306a36Sopenharmony_ci		if (iter->follows == follows) {
389362306a36Sopenharmony_ci			if (iter->cap_flush.tid != flush_tid) {
389462306a36Sopenharmony_ci				dout(" cap_snap %p follows %lld tid %lld !="
389562306a36Sopenharmony_ci				     " %lld\n", iter, follows,
389662306a36Sopenharmony_ci				     flush_tid, iter->cap_flush.tid);
389762306a36Sopenharmony_ci				break;
389862306a36Sopenharmony_ci			}
389962306a36Sopenharmony_ci			capsnap = iter;
390062306a36Sopenharmony_ci			break;
390162306a36Sopenharmony_ci		} else {
390262306a36Sopenharmony_ci			dout(" skipping cap_snap %p follows %lld\n",
390362306a36Sopenharmony_ci			     iter, iter->follows);
390462306a36Sopenharmony_ci		}
390562306a36Sopenharmony_ci	}
390662306a36Sopenharmony_ci	if (capsnap)
390762306a36Sopenharmony_ci		ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
390862306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
390962306a36Sopenharmony_ci
391062306a36Sopenharmony_ci	if (capsnap) {
391162306a36Sopenharmony_ci		ceph_put_snap_context(capsnap->context);
391262306a36Sopenharmony_ci		ceph_put_cap_snap(capsnap);
391362306a36Sopenharmony_ci		if (wake_ci)
391462306a36Sopenharmony_ci			wake_up_all(&ci->i_cap_wq);
391562306a36Sopenharmony_ci		if (wake_mdsc)
391662306a36Sopenharmony_ci			wake_up_all(&mdsc->cap_flushing_wq);
391762306a36Sopenharmony_ci		iput(inode);
391862306a36Sopenharmony_ci	}
391962306a36Sopenharmony_ci}
392062306a36Sopenharmony_ci
392162306a36Sopenharmony_ci/*
392262306a36Sopenharmony_ci * Handle TRUNC from MDS, indicating file truncation.
392362306a36Sopenharmony_ci *
392462306a36Sopenharmony_ci * caller hold s_mutex.
392562306a36Sopenharmony_ci */
392662306a36Sopenharmony_cistatic bool handle_cap_trunc(struct inode *inode,
392762306a36Sopenharmony_ci			     struct ceph_mds_caps *trunc,
392862306a36Sopenharmony_ci			     struct ceph_mds_session *session,
392962306a36Sopenharmony_ci			     struct cap_extra_info *extra_info)
393062306a36Sopenharmony_ci{
393162306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
393262306a36Sopenharmony_ci	int mds = session->s_mds;
393362306a36Sopenharmony_ci	int seq = le32_to_cpu(trunc->seq);
393462306a36Sopenharmony_ci	u32 truncate_seq = le32_to_cpu(trunc->truncate_seq);
393562306a36Sopenharmony_ci	u64 truncate_size = le64_to_cpu(trunc->truncate_size);
393662306a36Sopenharmony_ci	u64 size = le64_to_cpu(trunc->size);
393762306a36Sopenharmony_ci	int implemented = 0;
393862306a36Sopenharmony_ci	int dirty = __ceph_caps_dirty(ci);
393962306a36Sopenharmony_ci	int issued = __ceph_caps_issued(ceph_inode(inode), &implemented);
394062306a36Sopenharmony_ci	bool queue_trunc = false;
394162306a36Sopenharmony_ci
394262306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
394362306a36Sopenharmony_ci
394462306a36Sopenharmony_ci	issued |= implemented | dirty;
394562306a36Sopenharmony_ci
394662306a36Sopenharmony_ci	/*
394762306a36Sopenharmony_ci	 * If there is at least one crypto block then we'll trust
394862306a36Sopenharmony_ci	 * fscrypt_file_size. If the real length of the file is 0, then
394962306a36Sopenharmony_ci	 * ignore it (it has probably been truncated down to 0 by the MDS).
395062306a36Sopenharmony_ci	 */
395162306a36Sopenharmony_ci	if (IS_ENCRYPTED(inode) && size)
395262306a36Sopenharmony_ci		size = extra_info->fscrypt_file_size;
395362306a36Sopenharmony_ci
395462306a36Sopenharmony_ci	dout("%s inode %p mds%d seq %d to %lld truncate seq %d\n",
395562306a36Sopenharmony_ci	     __func__, inode, mds, seq, truncate_size, truncate_seq);
395662306a36Sopenharmony_ci	queue_trunc = ceph_fill_file_size(inode, issued,
395762306a36Sopenharmony_ci					  truncate_seq, truncate_size, size);
395862306a36Sopenharmony_ci	return queue_trunc;
395962306a36Sopenharmony_ci}
396062306a36Sopenharmony_ci
396162306a36Sopenharmony_ci/*
396262306a36Sopenharmony_ci * Handle EXPORT from MDS.  Cap is being migrated _from_ this mds to a
396362306a36Sopenharmony_ci * different one.  If we are the most recent migration we've seen (as
396462306a36Sopenharmony_ci * indicated by mseq), make note of the migrating cap bits for the
396562306a36Sopenharmony_ci * duration (until we see the corresponding IMPORT).
396662306a36Sopenharmony_ci *
396762306a36Sopenharmony_ci * caller holds s_mutex
396862306a36Sopenharmony_ci */
396962306a36Sopenharmony_cistatic void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
397062306a36Sopenharmony_ci			      struct ceph_mds_cap_peer *ph,
397162306a36Sopenharmony_ci			      struct ceph_mds_session *session)
397262306a36Sopenharmony_ci{
397362306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
397462306a36Sopenharmony_ci	struct ceph_mds_session *tsession = NULL;
397562306a36Sopenharmony_ci	struct ceph_cap *cap, *tcap, *new_cap = NULL;
397662306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
397762306a36Sopenharmony_ci	u64 t_cap_id;
397862306a36Sopenharmony_ci	unsigned mseq = le32_to_cpu(ex->migrate_seq);
397962306a36Sopenharmony_ci	unsigned t_seq, t_mseq;
398062306a36Sopenharmony_ci	int target, issued;
398162306a36Sopenharmony_ci	int mds = session->s_mds;
398262306a36Sopenharmony_ci
398362306a36Sopenharmony_ci	if (ph) {
398462306a36Sopenharmony_ci		t_cap_id = le64_to_cpu(ph->cap_id);
398562306a36Sopenharmony_ci		t_seq = le32_to_cpu(ph->seq);
398662306a36Sopenharmony_ci		t_mseq = le32_to_cpu(ph->mseq);
398762306a36Sopenharmony_ci		target = le32_to_cpu(ph->mds);
398862306a36Sopenharmony_ci	} else {
398962306a36Sopenharmony_ci		t_cap_id = t_seq = t_mseq = 0;
399062306a36Sopenharmony_ci		target = -1;
399162306a36Sopenharmony_ci	}
399262306a36Sopenharmony_ci
399362306a36Sopenharmony_ci	dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n",
399462306a36Sopenharmony_ci	     inode, ci, mds, mseq, target);
399562306a36Sopenharmony_ciretry:
399662306a36Sopenharmony_ci	down_read(&mdsc->snap_rwsem);
399762306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
399862306a36Sopenharmony_ci	cap = __get_cap_for_mds(ci, mds);
399962306a36Sopenharmony_ci	if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id))
400062306a36Sopenharmony_ci		goto out_unlock;
400162306a36Sopenharmony_ci
400262306a36Sopenharmony_ci	if (target < 0) {
400362306a36Sopenharmony_ci		ceph_remove_cap(cap, false);
400462306a36Sopenharmony_ci		goto out_unlock;
400562306a36Sopenharmony_ci	}
400662306a36Sopenharmony_ci
400762306a36Sopenharmony_ci	/*
400862306a36Sopenharmony_ci	 * now we know we haven't received the cap import message yet
400962306a36Sopenharmony_ci	 * because the exported cap still exist.
401062306a36Sopenharmony_ci	 */
401162306a36Sopenharmony_ci
401262306a36Sopenharmony_ci	issued = cap->issued;
401362306a36Sopenharmony_ci	if (issued != cap->implemented)
401462306a36Sopenharmony_ci		pr_err_ratelimited("handle_cap_export: issued != implemented: "
401562306a36Sopenharmony_ci				"ino (%llx.%llx) mds%d seq %d mseq %d "
401662306a36Sopenharmony_ci				"issued %s implemented %s\n",
401762306a36Sopenharmony_ci				ceph_vinop(inode), mds, cap->seq, cap->mseq,
401862306a36Sopenharmony_ci				ceph_cap_string(issued),
401962306a36Sopenharmony_ci				ceph_cap_string(cap->implemented));
402062306a36Sopenharmony_ci
402162306a36Sopenharmony_ci
402262306a36Sopenharmony_ci	tcap = __get_cap_for_mds(ci, target);
402362306a36Sopenharmony_ci	if (tcap) {
402462306a36Sopenharmony_ci		/* already have caps from the target */
402562306a36Sopenharmony_ci		if (tcap->cap_id == t_cap_id &&
402662306a36Sopenharmony_ci		    ceph_seq_cmp(tcap->seq, t_seq) < 0) {
402762306a36Sopenharmony_ci			dout(" updating import cap %p mds%d\n", tcap, target);
402862306a36Sopenharmony_ci			tcap->cap_id = t_cap_id;
402962306a36Sopenharmony_ci			tcap->seq = t_seq - 1;
403062306a36Sopenharmony_ci			tcap->issue_seq = t_seq - 1;
403162306a36Sopenharmony_ci			tcap->issued |= issued;
403262306a36Sopenharmony_ci			tcap->implemented |= issued;
403362306a36Sopenharmony_ci			if (cap == ci->i_auth_cap) {
403462306a36Sopenharmony_ci				ci->i_auth_cap = tcap;
403562306a36Sopenharmony_ci				change_auth_cap_ses(ci, tcap->session);
403662306a36Sopenharmony_ci			}
403762306a36Sopenharmony_ci		}
403862306a36Sopenharmony_ci		ceph_remove_cap(cap, false);
403962306a36Sopenharmony_ci		goto out_unlock;
404062306a36Sopenharmony_ci	} else if (tsession) {
404162306a36Sopenharmony_ci		/* add placeholder for the export tagert */
404262306a36Sopenharmony_ci		int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
404362306a36Sopenharmony_ci		tcap = new_cap;
404462306a36Sopenharmony_ci		ceph_add_cap(inode, tsession, t_cap_id, issued, 0,
404562306a36Sopenharmony_ci			     t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
404662306a36Sopenharmony_ci
404762306a36Sopenharmony_ci		if (!list_empty(&ci->i_cap_flush_list) &&
404862306a36Sopenharmony_ci		    ci->i_auth_cap == tcap) {
404962306a36Sopenharmony_ci			spin_lock(&mdsc->cap_dirty_lock);
405062306a36Sopenharmony_ci			list_move_tail(&ci->i_flushing_item,
405162306a36Sopenharmony_ci				       &tcap->session->s_cap_flushing);
405262306a36Sopenharmony_ci			spin_unlock(&mdsc->cap_dirty_lock);
405362306a36Sopenharmony_ci		}
405462306a36Sopenharmony_ci
405562306a36Sopenharmony_ci		ceph_remove_cap(cap, false);
405662306a36Sopenharmony_ci		goto out_unlock;
405762306a36Sopenharmony_ci	}
405862306a36Sopenharmony_ci
405962306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
406062306a36Sopenharmony_ci	up_read(&mdsc->snap_rwsem);
406162306a36Sopenharmony_ci	mutex_unlock(&session->s_mutex);
406262306a36Sopenharmony_ci
406362306a36Sopenharmony_ci	/* open target session */
406462306a36Sopenharmony_ci	tsession = ceph_mdsc_open_export_target_session(mdsc, target);
406562306a36Sopenharmony_ci	if (!IS_ERR(tsession)) {
406662306a36Sopenharmony_ci		if (mds > target) {
406762306a36Sopenharmony_ci			mutex_lock(&session->s_mutex);
406862306a36Sopenharmony_ci			mutex_lock_nested(&tsession->s_mutex,
406962306a36Sopenharmony_ci					  SINGLE_DEPTH_NESTING);
407062306a36Sopenharmony_ci		} else {
407162306a36Sopenharmony_ci			mutex_lock(&tsession->s_mutex);
407262306a36Sopenharmony_ci			mutex_lock_nested(&session->s_mutex,
407362306a36Sopenharmony_ci					  SINGLE_DEPTH_NESTING);
407462306a36Sopenharmony_ci		}
407562306a36Sopenharmony_ci		new_cap = ceph_get_cap(mdsc, NULL);
407662306a36Sopenharmony_ci	} else {
407762306a36Sopenharmony_ci		WARN_ON(1);
407862306a36Sopenharmony_ci		tsession = NULL;
407962306a36Sopenharmony_ci		target = -1;
408062306a36Sopenharmony_ci		mutex_lock(&session->s_mutex);
408162306a36Sopenharmony_ci	}
408262306a36Sopenharmony_ci	goto retry;
408362306a36Sopenharmony_ci
408462306a36Sopenharmony_ciout_unlock:
408562306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
408662306a36Sopenharmony_ci	up_read(&mdsc->snap_rwsem);
408762306a36Sopenharmony_ci	mutex_unlock(&session->s_mutex);
408862306a36Sopenharmony_ci	if (tsession) {
408962306a36Sopenharmony_ci		mutex_unlock(&tsession->s_mutex);
409062306a36Sopenharmony_ci		ceph_put_mds_session(tsession);
409162306a36Sopenharmony_ci	}
409262306a36Sopenharmony_ci	if (new_cap)
409362306a36Sopenharmony_ci		ceph_put_cap(mdsc, new_cap);
409462306a36Sopenharmony_ci}
409562306a36Sopenharmony_ci
409662306a36Sopenharmony_ci/*
409762306a36Sopenharmony_ci * Handle cap IMPORT.
409862306a36Sopenharmony_ci *
409962306a36Sopenharmony_ci * caller holds s_mutex. acquires i_ceph_lock
410062306a36Sopenharmony_ci */
410162306a36Sopenharmony_cistatic void handle_cap_import(struct ceph_mds_client *mdsc,
410262306a36Sopenharmony_ci			      struct inode *inode, struct ceph_mds_caps *im,
410362306a36Sopenharmony_ci			      struct ceph_mds_cap_peer *ph,
410462306a36Sopenharmony_ci			      struct ceph_mds_session *session,
410562306a36Sopenharmony_ci			      struct ceph_cap **target_cap, int *old_issued)
410662306a36Sopenharmony_ci{
410762306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
410862306a36Sopenharmony_ci	struct ceph_cap *cap, *ocap, *new_cap = NULL;
410962306a36Sopenharmony_ci	int mds = session->s_mds;
411062306a36Sopenharmony_ci	int issued;
411162306a36Sopenharmony_ci	unsigned caps = le32_to_cpu(im->caps);
411262306a36Sopenharmony_ci	unsigned wanted = le32_to_cpu(im->wanted);
411362306a36Sopenharmony_ci	unsigned seq = le32_to_cpu(im->seq);
411462306a36Sopenharmony_ci	unsigned mseq = le32_to_cpu(im->migrate_seq);
411562306a36Sopenharmony_ci	u64 realmino = le64_to_cpu(im->realm);
411662306a36Sopenharmony_ci	u64 cap_id = le64_to_cpu(im->cap_id);
411762306a36Sopenharmony_ci	u64 p_cap_id;
411862306a36Sopenharmony_ci	int peer;
411962306a36Sopenharmony_ci
412062306a36Sopenharmony_ci	if (ph) {
412162306a36Sopenharmony_ci		p_cap_id = le64_to_cpu(ph->cap_id);
412262306a36Sopenharmony_ci		peer = le32_to_cpu(ph->mds);
412362306a36Sopenharmony_ci	} else {
412462306a36Sopenharmony_ci		p_cap_id = 0;
412562306a36Sopenharmony_ci		peer = -1;
412662306a36Sopenharmony_ci	}
412762306a36Sopenharmony_ci
412862306a36Sopenharmony_ci	dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n",
412962306a36Sopenharmony_ci	     inode, ci, mds, mseq, peer);
413062306a36Sopenharmony_ciretry:
413162306a36Sopenharmony_ci	cap = __get_cap_for_mds(ci, mds);
413262306a36Sopenharmony_ci	if (!cap) {
413362306a36Sopenharmony_ci		if (!new_cap) {
413462306a36Sopenharmony_ci			spin_unlock(&ci->i_ceph_lock);
413562306a36Sopenharmony_ci			new_cap = ceph_get_cap(mdsc, NULL);
413662306a36Sopenharmony_ci			spin_lock(&ci->i_ceph_lock);
413762306a36Sopenharmony_ci			goto retry;
413862306a36Sopenharmony_ci		}
413962306a36Sopenharmony_ci		cap = new_cap;
414062306a36Sopenharmony_ci	} else {
414162306a36Sopenharmony_ci		if (new_cap) {
414262306a36Sopenharmony_ci			ceph_put_cap(mdsc, new_cap);
414362306a36Sopenharmony_ci			new_cap = NULL;
414462306a36Sopenharmony_ci		}
414562306a36Sopenharmony_ci	}
414662306a36Sopenharmony_ci
414762306a36Sopenharmony_ci	__ceph_caps_issued(ci, &issued);
414862306a36Sopenharmony_ci	issued |= __ceph_caps_dirty(ci);
414962306a36Sopenharmony_ci
415062306a36Sopenharmony_ci	ceph_add_cap(inode, session, cap_id, caps, wanted, seq, mseq,
415162306a36Sopenharmony_ci		     realmino, CEPH_CAP_FLAG_AUTH, &new_cap);
415262306a36Sopenharmony_ci
415362306a36Sopenharmony_ci	ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
415462306a36Sopenharmony_ci	if (ocap && ocap->cap_id == p_cap_id) {
415562306a36Sopenharmony_ci		dout(" remove export cap %p mds%d flags %d\n",
415662306a36Sopenharmony_ci		     ocap, peer, ph->flags);
415762306a36Sopenharmony_ci		if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
415862306a36Sopenharmony_ci		    (ocap->seq != le32_to_cpu(ph->seq) ||
415962306a36Sopenharmony_ci		     ocap->mseq != le32_to_cpu(ph->mseq))) {
416062306a36Sopenharmony_ci			pr_err_ratelimited("handle_cap_import: "
416162306a36Sopenharmony_ci					"mismatched seq/mseq: ino (%llx.%llx) "
416262306a36Sopenharmony_ci					"mds%d seq %d mseq %d importer mds%d "
416362306a36Sopenharmony_ci					"has peer seq %d mseq %d\n",
416462306a36Sopenharmony_ci					ceph_vinop(inode), peer, ocap->seq,
416562306a36Sopenharmony_ci					ocap->mseq, mds, le32_to_cpu(ph->seq),
416662306a36Sopenharmony_ci					le32_to_cpu(ph->mseq));
416762306a36Sopenharmony_ci		}
416862306a36Sopenharmony_ci		ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
416962306a36Sopenharmony_ci	}
417062306a36Sopenharmony_ci
417162306a36Sopenharmony_ci	*old_issued = issued;
417262306a36Sopenharmony_ci	*target_cap = cap;
417362306a36Sopenharmony_ci}
417462306a36Sopenharmony_ci
417562306a36Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION
417662306a36Sopenharmony_cistatic int parse_fscrypt_fields(void **p, void *end,
417762306a36Sopenharmony_ci				struct cap_extra_info *extra)
417862306a36Sopenharmony_ci{
417962306a36Sopenharmony_ci	u32 len;
418062306a36Sopenharmony_ci
418162306a36Sopenharmony_ci	ceph_decode_32_safe(p, end, extra->fscrypt_auth_len, bad);
418262306a36Sopenharmony_ci	if (extra->fscrypt_auth_len) {
418362306a36Sopenharmony_ci		ceph_decode_need(p, end, extra->fscrypt_auth_len, bad);
418462306a36Sopenharmony_ci		extra->fscrypt_auth = kmalloc(extra->fscrypt_auth_len,
418562306a36Sopenharmony_ci					      GFP_KERNEL);
418662306a36Sopenharmony_ci		if (!extra->fscrypt_auth)
418762306a36Sopenharmony_ci			return -ENOMEM;
418862306a36Sopenharmony_ci		ceph_decode_copy_safe(p, end, extra->fscrypt_auth,
418962306a36Sopenharmony_ci					extra->fscrypt_auth_len, bad);
419062306a36Sopenharmony_ci	}
419162306a36Sopenharmony_ci
419262306a36Sopenharmony_ci	ceph_decode_32_safe(p, end, len, bad);
419362306a36Sopenharmony_ci	if (len >= sizeof(u64)) {
419462306a36Sopenharmony_ci		ceph_decode_64_safe(p, end, extra->fscrypt_file_size, bad);
419562306a36Sopenharmony_ci		len -= sizeof(u64);
419662306a36Sopenharmony_ci	}
419762306a36Sopenharmony_ci	ceph_decode_skip_n(p, end, len, bad);
419862306a36Sopenharmony_ci	return 0;
419962306a36Sopenharmony_cibad:
420062306a36Sopenharmony_ci	return -EIO;
420162306a36Sopenharmony_ci}
420262306a36Sopenharmony_ci#else
420362306a36Sopenharmony_cistatic int parse_fscrypt_fields(void **p, void *end,
420462306a36Sopenharmony_ci				struct cap_extra_info *extra)
420562306a36Sopenharmony_ci{
420662306a36Sopenharmony_ci	u32 len;
420762306a36Sopenharmony_ci
420862306a36Sopenharmony_ci	/* Don't care about these fields unless we're encryption-capable */
420962306a36Sopenharmony_ci	ceph_decode_32_safe(p, end, len, bad);
421062306a36Sopenharmony_ci	if (len)
421162306a36Sopenharmony_ci		ceph_decode_skip_n(p, end, len, bad);
421262306a36Sopenharmony_ci	ceph_decode_32_safe(p, end, len, bad);
421362306a36Sopenharmony_ci	if (len)
421462306a36Sopenharmony_ci		ceph_decode_skip_n(p, end, len, bad);
421562306a36Sopenharmony_ci	return 0;
421662306a36Sopenharmony_cibad:
421762306a36Sopenharmony_ci	return -EIO;
421862306a36Sopenharmony_ci}
421962306a36Sopenharmony_ci#endif
422062306a36Sopenharmony_ci
422162306a36Sopenharmony_ci/*
422262306a36Sopenharmony_ci * Handle a caps message from the MDS.
422362306a36Sopenharmony_ci *
422462306a36Sopenharmony_ci * Identify the appropriate session, inode, and call the right handler
422562306a36Sopenharmony_ci * based on the cap op.
422662306a36Sopenharmony_ci */
422762306a36Sopenharmony_civoid ceph_handle_caps(struct ceph_mds_session *session,
422862306a36Sopenharmony_ci		      struct ceph_msg *msg)
422962306a36Sopenharmony_ci{
423062306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = session->s_mdsc;
423162306a36Sopenharmony_ci	struct inode *inode;
423262306a36Sopenharmony_ci	struct ceph_inode_info *ci;
423362306a36Sopenharmony_ci	struct ceph_cap *cap;
423462306a36Sopenharmony_ci	struct ceph_mds_caps *h;
423562306a36Sopenharmony_ci	struct ceph_mds_cap_peer *peer = NULL;
423662306a36Sopenharmony_ci	struct ceph_snap_realm *realm = NULL;
423762306a36Sopenharmony_ci	int op;
423862306a36Sopenharmony_ci	int msg_version = le16_to_cpu(msg->hdr.version);
423962306a36Sopenharmony_ci	u32 seq, mseq;
424062306a36Sopenharmony_ci	struct ceph_vino vino;
424162306a36Sopenharmony_ci	void *snaptrace;
424262306a36Sopenharmony_ci	size_t snaptrace_len;
424362306a36Sopenharmony_ci	void *p, *end;
424462306a36Sopenharmony_ci	struct cap_extra_info extra_info = {};
424562306a36Sopenharmony_ci	bool queue_trunc;
424662306a36Sopenharmony_ci	bool close_sessions = false;
424762306a36Sopenharmony_ci	bool do_cap_release = false;
424862306a36Sopenharmony_ci
424962306a36Sopenharmony_ci	dout("handle_caps from mds%d\n", session->s_mds);
425062306a36Sopenharmony_ci
425162306a36Sopenharmony_ci	if (!ceph_inc_mds_stopping_blocker(mdsc, session))
425262306a36Sopenharmony_ci		return;
425362306a36Sopenharmony_ci
425462306a36Sopenharmony_ci	/* decode */
425562306a36Sopenharmony_ci	end = msg->front.iov_base + msg->front.iov_len;
425662306a36Sopenharmony_ci	if (msg->front.iov_len < sizeof(*h))
425762306a36Sopenharmony_ci		goto bad;
425862306a36Sopenharmony_ci	h = msg->front.iov_base;
425962306a36Sopenharmony_ci	op = le32_to_cpu(h->op);
426062306a36Sopenharmony_ci	vino.ino = le64_to_cpu(h->ino);
426162306a36Sopenharmony_ci	vino.snap = CEPH_NOSNAP;
426262306a36Sopenharmony_ci	seq = le32_to_cpu(h->seq);
426362306a36Sopenharmony_ci	mseq = le32_to_cpu(h->migrate_seq);
426462306a36Sopenharmony_ci
426562306a36Sopenharmony_ci	snaptrace = h + 1;
426662306a36Sopenharmony_ci	snaptrace_len = le32_to_cpu(h->snap_trace_len);
426762306a36Sopenharmony_ci	p = snaptrace + snaptrace_len;
426862306a36Sopenharmony_ci
426962306a36Sopenharmony_ci	if (msg_version >= 2) {
427062306a36Sopenharmony_ci		u32 flock_len;
427162306a36Sopenharmony_ci		ceph_decode_32_safe(&p, end, flock_len, bad);
427262306a36Sopenharmony_ci		if (p + flock_len > end)
427362306a36Sopenharmony_ci			goto bad;
427462306a36Sopenharmony_ci		p += flock_len;
427562306a36Sopenharmony_ci	}
427662306a36Sopenharmony_ci
427762306a36Sopenharmony_ci	if (msg_version >= 3) {
427862306a36Sopenharmony_ci		if (op == CEPH_CAP_OP_IMPORT) {
427962306a36Sopenharmony_ci			if (p + sizeof(*peer) > end)
428062306a36Sopenharmony_ci				goto bad;
428162306a36Sopenharmony_ci			peer = p;
428262306a36Sopenharmony_ci			p += sizeof(*peer);
428362306a36Sopenharmony_ci		} else if (op == CEPH_CAP_OP_EXPORT) {
428462306a36Sopenharmony_ci			/* recorded in unused fields */
428562306a36Sopenharmony_ci			peer = (void *)&h->size;
428662306a36Sopenharmony_ci		}
428762306a36Sopenharmony_ci	}
428862306a36Sopenharmony_ci
428962306a36Sopenharmony_ci	if (msg_version >= 4) {
429062306a36Sopenharmony_ci		ceph_decode_64_safe(&p, end, extra_info.inline_version, bad);
429162306a36Sopenharmony_ci		ceph_decode_32_safe(&p, end, extra_info.inline_len, bad);
429262306a36Sopenharmony_ci		if (p + extra_info.inline_len > end)
429362306a36Sopenharmony_ci			goto bad;
429462306a36Sopenharmony_ci		extra_info.inline_data = p;
429562306a36Sopenharmony_ci		p += extra_info.inline_len;
429662306a36Sopenharmony_ci	}
429762306a36Sopenharmony_ci
429862306a36Sopenharmony_ci	if (msg_version >= 5) {
429962306a36Sopenharmony_ci		struct ceph_osd_client	*osdc = &mdsc->fsc->client->osdc;
430062306a36Sopenharmony_ci		u32			epoch_barrier;
430162306a36Sopenharmony_ci
430262306a36Sopenharmony_ci		ceph_decode_32_safe(&p, end, epoch_barrier, bad);
430362306a36Sopenharmony_ci		ceph_osdc_update_epoch_barrier(osdc, epoch_barrier);
430462306a36Sopenharmony_ci	}
430562306a36Sopenharmony_ci
430662306a36Sopenharmony_ci	if (msg_version >= 8) {
430762306a36Sopenharmony_ci		u32 pool_ns_len;
430862306a36Sopenharmony_ci
430962306a36Sopenharmony_ci		/* version >= 6 */
431062306a36Sopenharmony_ci		ceph_decode_skip_64(&p, end, bad);	// flush_tid
431162306a36Sopenharmony_ci		/* version >= 7 */
431262306a36Sopenharmony_ci		ceph_decode_skip_32(&p, end, bad);	// caller_uid
431362306a36Sopenharmony_ci		ceph_decode_skip_32(&p, end, bad);	// caller_gid
431462306a36Sopenharmony_ci		/* version >= 8 */
431562306a36Sopenharmony_ci		ceph_decode_32_safe(&p, end, pool_ns_len, bad);
431662306a36Sopenharmony_ci		if (pool_ns_len > 0) {
431762306a36Sopenharmony_ci			ceph_decode_need(&p, end, pool_ns_len, bad);
431862306a36Sopenharmony_ci			extra_info.pool_ns =
431962306a36Sopenharmony_ci				ceph_find_or_create_string(p, pool_ns_len);
432062306a36Sopenharmony_ci			p += pool_ns_len;
432162306a36Sopenharmony_ci		}
432262306a36Sopenharmony_ci	}
432362306a36Sopenharmony_ci
432462306a36Sopenharmony_ci	if (msg_version >= 9) {
432562306a36Sopenharmony_ci		struct ceph_timespec *btime;
432662306a36Sopenharmony_ci
432762306a36Sopenharmony_ci		if (p + sizeof(*btime) > end)
432862306a36Sopenharmony_ci			goto bad;
432962306a36Sopenharmony_ci		btime = p;
433062306a36Sopenharmony_ci		ceph_decode_timespec64(&extra_info.btime, btime);
433162306a36Sopenharmony_ci		p += sizeof(*btime);
433262306a36Sopenharmony_ci		ceph_decode_64_safe(&p, end, extra_info.change_attr, bad);
433362306a36Sopenharmony_ci	}
433462306a36Sopenharmony_ci
433562306a36Sopenharmony_ci	if (msg_version >= 11) {
433662306a36Sopenharmony_ci		/* version >= 10 */
433762306a36Sopenharmony_ci		ceph_decode_skip_32(&p, end, bad); // flags
433862306a36Sopenharmony_ci		/* version >= 11 */
433962306a36Sopenharmony_ci		extra_info.dirstat_valid = true;
434062306a36Sopenharmony_ci		ceph_decode_64_safe(&p, end, extra_info.nfiles, bad);
434162306a36Sopenharmony_ci		ceph_decode_64_safe(&p, end, extra_info.nsubdirs, bad);
434262306a36Sopenharmony_ci	}
434362306a36Sopenharmony_ci
434462306a36Sopenharmony_ci	if (msg_version >= 12) {
434562306a36Sopenharmony_ci		if (parse_fscrypt_fields(&p, end, &extra_info))
434662306a36Sopenharmony_ci			goto bad;
434762306a36Sopenharmony_ci	}
434862306a36Sopenharmony_ci
434962306a36Sopenharmony_ci	/* lookup ino */
435062306a36Sopenharmony_ci	inode = ceph_find_inode(mdsc->fsc->sb, vino);
435162306a36Sopenharmony_ci	dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
435262306a36Sopenharmony_ci	     vino.snap, inode);
435362306a36Sopenharmony_ci
435462306a36Sopenharmony_ci	mutex_lock(&session->s_mutex);
435562306a36Sopenharmony_ci	dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
435662306a36Sopenharmony_ci	     (unsigned)seq);
435762306a36Sopenharmony_ci
435862306a36Sopenharmony_ci	if (!inode) {
435962306a36Sopenharmony_ci		dout(" i don't have ino %llx\n", vino.ino);
436062306a36Sopenharmony_ci
436162306a36Sopenharmony_ci		switch (op) {
436262306a36Sopenharmony_ci		case CEPH_CAP_OP_IMPORT:
436362306a36Sopenharmony_ci		case CEPH_CAP_OP_REVOKE:
436462306a36Sopenharmony_ci		case CEPH_CAP_OP_GRANT:
436562306a36Sopenharmony_ci			do_cap_release = true;
436662306a36Sopenharmony_ci			break;
436762306a36Sopenharmony_ci		default:
436862306a36Sopenharmony_ci			break;
436962306a36Sopenharmony_ci		}
437062306a36Sopenharmony_ci		goto flush_cap_releases;
437162306a36Sopenharmony_ci	}
437262306a36Sopenharmony_ci	ci = ceph_inode(inode);
437362306a36Sopenharmony_ci
437462306a36Sopenharmony_ci	/* these will work even if we don't have a cap yet */
437562306a36Sopenharmony_ci	switch (op) {
437662306a36Sopenharmony_ci	case CEPH_CAP_OP_FLUSHSNAP_ACK:
437762306a36Sopenharmony_ci		handle_cap_flushsnap_ack(inode, le64_to_cpu(msg->hdr.tid),
437862306a36Sopenharmony_ci					 h, session);
437962306a36Sopenharmony_ci		goto done;
438062306a36Sopenharmony_ci
438162306a36Sopenharmony_ci	case CEPH_CAP_OP_EXPORT:
438262306a36Sopenharmony_ci		handle_cap_export(inode, h, peer, session);
438362306a36Sopenharmony_ci		goto done_unlocked;
438462306a36Sopenharmony_ci
438562306a36Sopenharmony_ci	case CEPH_CAP_OP_IMPORT:
438662306a36Sopenharmony_ci		realm = NULL;
438762306a36Sopenharmony_ci		if (snaptrace_len) {
438862306a36Sopenharmony_ci			down_write(&mdsc->snap_rwsem);
438962306a36Sopenharmony_ci			if (ceph_update_snap_trace(mdsc, snaptrace,
439062306a36Sopenharmony_ci						   snaptrace + snaptrace_len,
439162306a36Sopenharmony_ci						   false, &realm)) {
439262306a36Sopenharmony_ci				up_write(&mdsc->snap_rwsem);
439362306a36Sopenharmony_ci				close_sessions = true;
439462306a36Sopenharmony_ci				goto done;
439562306a36Sopenharmony_ci			}
439662306a36Sopenharmony_ci			downgrade_write(&mdsc->snap_rwsem);
439762306a36Sopenharmony_ci		} else {
439862306a36Sopenharmony_ci			down_read(&mdsc->snap_rwsem);
439962306a36Sopenharmony_ci		}
440062306a36Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
440162306a36Sopenharmony_ci		handle_cap_import(mdsc, inode, h, peer, session,
440262306a36Sopenharmony_ci				  &cap, &extra_info.issued);
440362306a36Sopenharmony_ci		handle_cap_grant(inode, session, cap,
440462306a36Sopenharmony_ci				 h, msg->middle, &extra_info);
440562306a36Sopenharmony_ci		if (realm)
440662306a36Sopenharmony_ci			ceph_put_snap_realm(mdsc, realm);
440762306a36Sopenharmony_ci		goto done_unlocked;
440862306a36Sopenharmony_ci	}
440962306a36Sopenharmony_ci
441062306a36Sopenharmony_ci	/* the rest require a cap */
441162306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
441262306a36Sopenharmony_ci	cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds);
441362306a36Sopenharmony_ci	if (!cap) {
441462306a36Sopenharmony_ci		dout(" no cap on %p ino %llx.%llx from mds%d\n",
441562306a36Sopenharmony_ci		     inode, ceph_ino(inode), ceph_snap(inode),
441662306a36Sopenharmony_ci		     session->s_mds);
441762306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
441862306a36Sopenharmony_ci		switch (op) {
441962306a36Sopenharmony_ci		case CEPH_CAP_OP_REVOKE:
442062306a36Sopenharmony_ci		case CEPH_CAP_OP_GRANT:
442162306a36Sopenharmony_ci			do_cap_release = true;
442262306a36Sopenharmony_ci			break;
442362306a36Sopenharmony_ci		default:
442462306a36Sopenharmony_ci			break;
442562306a36Sopenharmony_ci		}
442662306a36Sopenharmony_ci		goto flush_cap_releases;
442762306a36Sopenharmony_ci	}
442862306a36Sopenharmony_ci
442962306a36Sopenharmony_ci	/* note that each of these drops i_ceph_lock for us */
443062306a36Sopenharmony_ci	switch (op) {
443162306a36Sopenharmony_ci	case CEPH_CAP_OP_REVOKE:
443262306a36Sopenharmony_ci	case CEPH_CAP_OP_GRANT:
443362306a36Sopenharmony_ci		__ceph_caps_issued(ci, &extra_info.issued);
443462306a36Sopenharmony_ci		extra_info.issued |= __ceph_caps_dirty(ci);
443562306a36Sopenharmony_ci		handle_cap_grant(inode, session, cap,
443662306a36Sopenharmony_ci				 h, msg->middle, &extra_info);
443762306a36Sopenharmony_ci		goto done_unlocked;
443862306a36Sopenharmony_ci
443962306a36Sopenharmony_ci	case CEPH_CAP_OP_FLUSH_ACK:
444062306a36Sopenharmony_ci		handle_cap_flush_ack(inode, le64_to_cpu(msg->hdr.tid),
444162306a36Sopenharmony_ci				     h, session, cap);
444262306a36Sopenharmony_ci		break;
444362306a36Sopenharmony_ci
444462306a36Sopenharmony_ci	case CEPH_CAP_OP_TRUNC:
444562306a36Sopenharmony_ci		queue_trunc = handle_cap_trunc(inode, h, session,
444662306a36Sopenharmony_ci						&extra_info);
444762306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
444862306a36Sopenharmony_ci		if (queue_trunc)
444962306a36Sopenharmony_ci			ceph_queue_vmtruncate(inode);
445062306a36Sopenharmony_ci		break;
445162306a36Sopenharmony_ci
445262306a36Sopenharmony_ci	default:
445362306a36Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
445462306a36Sopenharmony_ci		pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
445562306a36Sopenharmony_ci		       ceph_cap_op_name(op));
445662306a36Sopenharmony_ci	}
445762306a36Sopenharmony_ci
445862306a36Sopenharmony_cidone:
445962306a36Sopenharmony_ci	mutex_unlock(&session->s_mutex);
446062306a36Sopenharmony_cidone_unlocked:
446162306a36Sopenharmony_ci	iput(inode);
446262306a36Sopenharmony_ciout:
446362306a36Sopenharmony_ci	ceph_dec_mds_stopping_blocker(mdsc);
446462306a36Sopenharmony_ci
446562306a36Sopenharmony_ci	ceph_put_string(extra_info.pool_ns);
446662306a36Sopenharmony_ci
446762306a36Sopenharmony_ci	/* Defer closing the sessions after s_mutex lock being released */
446862306a36Sopenharmony_ci	if (close_sessions)
446962306a36Sopenharmony_ci		ceph_mdsc_close_sessions(mdsc);
447062306a36Sopenharmony_ci
447162306a36Sopenharmony_ci	kfree(extra_info.fscrypt_auth);
447262306a36Sopenharmony_ci	return;
447362306a36Sopenharmony_ci
447462306a36Sopenharmony_ciflush_cap_releases:
447562306a36Sopenharmony_ci	/*
447662306a36Sopenharmony_ci	 * send any cap release message to try to move things
447762306a36Sopenharmony_ci	 * along for the mds (who clearly thinks we still have this
447862306a36Sopenharmony_ci	 * cap).
447962306a36Sopenharmony_ci	 */
448062306a36Sopenharmony_ci	if (do_cap_release) {
448162306a36Sopenharmony_ci		cap = ceph_get_cap(mdsc, NULL);
448262306a36Sopenharmony_ci		cap->cap_ino = vino.ino;
448362306a36Sopenharmony_ci		cap->queue_release = 1;
448462306a36Sopenharmony_ci		cap->cap_id = le64_to_cpu(h->cap_id);
448562306a36Sopenharmony_ci		cap->mseq = mseq;
448662306a36Sopenharmony_ci		cap->seq = seq;
448762306a36Sopenharmony_ci		cap->issue_seq = seq;
448862306a36Sopenharmony_ci		spin_lock(&session->s_cap_lock);
448962306a36Sopenharmony_ci		__ceph_queue_cap_release(session, cap);
449062306a36Sopenharmony_ci		spin_unlock(&session->s_cap_lock);
449162306a36Sopenharmony_ci	}
449262306a36Sopenharmony_ci	ceph_flush_cap_releases(mdsc, session);
449362306a36Sopenharmony_ci	goto done;
449462306a36Sopenharmony_ci
449562306a36Sopenharmony_cibad:
449662306a36Sopenharmony_ci	pr_err("ceph_handle_caps: corrupt message\n");
449762306a36Sopenharmony_ci	ceph_msg_dump(msg);
449862306a36Sopenharmony_ci	goto out;
449962306a36Sopenharmony_ci}
450062306a36Sopenharmony_ci
450162306a36Sopenharmony_ci/*
450262306a36Sopenharmony_ci * Delayed work handler to process end of delayed cap release LRU list.
450362306a36Sopenharmony_ci *
450462306a36Sopenharmony_ci * If new caps are added to the list while processing it, these won't get
450562306a36Sopenharmony_ci * processed in this run.  In this case, the ci->i_hold_caps_max will be
450662306a36Sopenharmony_ci * returned so that the work can be scheduled accordingly.
450762306a36Sopenharmony_ci */
450862306a36Sopenharmony_ciunsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
450962306a36Sopenharmony_ci{
451062306a36Sopenharmony_ci	struct inode *inode;
451162306a36Sopenharmony_ci	struct ceph_inode_info *ci;
451262306a36Sopenharmony_ci	struct ceph_mount_options *opt = mdsc->fsc->mount_options;
451362306a36Sopenharmony_ci	unsigned long delay_max = opt->caps_wanted_delay_max * HZ;
451462306a36Sopenharmony_ci	unsigned long loop_start = jiffies;
451562306a36Sopenharmony_ci	unsigned long delay = 0;
451662306a36Sopenharmony_ci
451762306a36Sopenharmony_ci	dout("check_delayed_caps\n");
451862306a36Sopenharmony_ci	spin_lock(&mdsc->cap_delay_lock);
451962306a36Sopenharmony_ci	while (!list_empty(&mdsc->cap_delay_list)) {
452062306a36Sopenharmony_ci		ci = list_first_entry(&mdsc->cap_delay_list,
452162306a36Sopenharmony_ci				      struct ceph_inode_info,
452262306a36Sopenharmony_ci				      i_cap_delay_list);
452362306a36Sopenharmony_ci		if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) {
452462306a36Sopenharmony_ci			dout("%s caps added recently.  Exiting loop", __func__);
452562306a36Sopenharmony_ci			delay = ci->i_hold_caps_max;
452662306a36Sopenharmony_ci			break;
452762306a36Sopenharmony_ci		}
452862306a36Sopenharmony_ci		if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
452962306a36Sopenharmony_ci		    time_before(jiffies, ci->i_hold_caps_max))
453062306a36Sopenharmony_ci			break;
453162306a36Sopenharmony_ci		list_del_init(&ci->i_cap_delay_list);
453262306a36Sopenharmony_ci
453362306a36Sopenharmony_ci		inode = igrab(&ci->netfs.inode);
453462306a36Sopenharmony_ci		if (inode) {
453562306a36Sopenharmony_ci			spin_unlock(&mdsc->cap_delay_lock);
453662306a36Sopenharmony_ci			dout("check_delayed_caps on %p\n", inode);
453762306a36Sopenharmony_ci			ceph_check_caps(ci, 0);
453862306a36Sopenharmony_ci			iput(inode);
453962306a36Sopenharmony_ci			spin_lock(&mdsc->cap_delay_lock);
454062306a36Sopenharmony_ci		}
454162306a36Sopenharmony_ci	}
454262306a36Sopenharmony_ci	spin_unlock(&mdsc->cap_delay_lock);
454362306a36Sopenharmony_ci
454462306a36Sopenharmony_ci	return delay;
454562306a36Sopenharmony_ci}
454662306a36Sopenharmony_ci
454762306a36Sopenharmony_ci/*
454862306a36Sopenharmony_ci * Flush all dirty caps to the mds
454962306a36Sopenharmony_ci */
455062306a36Sopenharmony_cistatic void flush_dirty_session_caps(struct ceph_mds_session *s)
455162306a36Sopenharmony_ci{
455262306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = s->s_mdsc;
455362306a36Sopenharmony_ci	struct ceph_inode_info *ci;
455462306a36Sopenharmony_ci	struct inode *inode;
455562306a36Sopenharmony_ci
455662306a36Sopenharmony_ci	dout("flush_dirty_caps\n");
455762306a36Sopenharmony_ci	spin_lock(&mdsc->cap_dirty_lock);
455862306a36Sopenharmony_ci	while (!list_empty(&s->s_cap_dirty)) {
455962306a36Sopenharmony_ci		ci = list_first_entry(&s->s_cap_dirty, struct ceph_inode_info,
456062306a36Sopenharmony_ci				      i_dirty_item);
456162306a36Sopenharmony_ci		inode = &ci->netfs.inode;
456262306a36Sopenharmony_ci		ihold(inode);
456362306a36Sopenharmony_ci		dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode));
456462306a36Sopenharmony_ci		spin_unlock(&mdsc->cap_dirty_lock);
456562306a36Sopenharmony_ci		ceph_wait_on_async_create(inode);
456662306a36Sopenharmony_ci		ceph_check_caps(ci, CHECK_CAPS_FLUSH);
456762306a36Sopenharmony_ci		iput(inode);
456862306a36Sopenharmony_ci		spin_lock(&mdsc->cap_dirty_lock);
456962306a36Sopenharmony_ci	}
457062306a36Sopenharmony_ci	spin_unlock(&mdsc->cap_dirty_lock);
457162306a36Sopenharmony_ci	dout("flush_dirty_caps done\n");
457262306a36Sopenharmony_ci}
457362306a36Sopenharmony_ci
457462306a36Sopenharmony_civoid ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
457562306a36Sopenharmony_ci{
457662306a36Sopenharmony_ci	ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true);
457762306a36Sopenharmony_ci}
457862306a36Sopenharmony_ci
457962306a36Sopenharmony_civoid __ceph_touch_fmode(struct ceph_inode_info *ci,
458062306a36Sopenharmony_ci			struct ceph_mds_client *mdsc, int fmode)
458162306a36Sopenharmony_ci{
458262306a36Sopenharmony_ci	unsigned long now = jiffies;
458362306a36Sopenharmony_ci	if (fmode & CEPH_FILE_MODE_RD)
458462306a36Sopenharmony_ci		ci->i_last_rd = now;
458562306a36Sopenharmony_ci	if (fmode & CEPH_FILE_MODE_WR)
458662306a36Sopenharmony_ci		ci->i_last_wr = now;
458762306a36Sopenharmony_ci	/* queue periodic check */
458862306a36Sopenharmony_ci	if (fmode &&
458962306a36Sopenharmony_ci	    __ceph_is_any_real_caps(ci) &&
459062306a36Sopenharmony_ci	    list_empty(&ci->i_cap_delay_list))
459162306a36Sopenharmony_ci		__cap_delay_requeue(mdsc, ci);
459262306a36Sopenharmony_ci}
459362306a36Sopenharmony_ci
459462306a36Sopenharmony_civoid ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
459562306a36Sopenharmony_ci{
459662306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->netfs.inode.i_sb);
459762306a36Sopenharmony_ci	int bits = (fmode << 1) | 1;
459862306a36Sopenharmony_ci	bool already_opened = false;
459962306a36Sopenharmony_ci	int i;
460062306a36Sopenharmony_ci
460162306a36Sopenharmony_ci	if (count == 1)
460262306a36Sopenharmony_ci		atomic64_inc(&mdsc->metric.opened_files);
460362306a36Sopenharmony_ci
460462306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
460562306a36Sopenharmony_ci	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
460662306a36Sopenharmony_ci		/*
460762306a36Sopenharmony_ci		 * If any of the mode ref is larger than 0,
460862306a36Sopenharmony_ci		 * that means it has been already opened by
460962306a36Sopenharmony_ci		 * others. Just skip checking the PIN ref.
461062306a36Sopenharmony_ci		 */
461162306a36Sopenharmony_ci		if (i && ci->i_nr_by_mode[i])
461262306a36Sopenharmony_ci			already_opened = true;
461362306a36Sopenharmony_ci
461462306a36Sopenharmony_ci		if (bits & (1 << i))
461562306a36Sopenharmony_ci			ci->i_nr_by_mode[i] += count;
461662306a36Sopenharmony_ci	}
461762306a36Sopenharmony_ci
461862306a36Sopenharmony_ci	if (!already_opened)
461962306a36Sopenharmony_ci		percpu_counter_inc(&mdsc->metric.opened_inodes);
462062306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
462162306a36Sopenharmony_ci}
462262306a36Sopenharmony_ci
462362306a36Sopenharmony_ci/*
462462306a36Sopenharmony_ci * Drop open file reference.  If we were the last open file,
462562306a36Sopenharmony_ci * we may need to release capabilities to the MDS (or schedule
462662306a36Sopenharmony_ci * their delayed release).
462762306a36Sopenharmony_ci */
462862306a36Sopenharmony_civoid ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count)
462962306a36Sopenharmony_ci{
463062306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->netfs.inode.i_sb);
463162306a36Sopenharmony_ci	int bits = (fmode << 1) | 1;
463262306a36Sopenharmony_ci	bool is_closed = true;
463362306a36Sopenharmony_ci	int i;
463462306a36Sopenharmony_ci
463562306a36Sopenharmony_ci	if (count == 1)
463662306a36Sopenharmony_ci		atomic64_dec(&mdsc->metric.opened_files);
463762306a36Sopenharmony_ci
463862306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
463962306a36Sopenharmony_ci	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
464062306a36Sopenharmony_ci		if (bits & (1 << i)) {
464162306a36Sopenharmony_ci			BUG_ON(ci->i_nr_by_mode[i] < count);
464262306a36Sopenharmony_ci			ci->i_nr_by_mode[i] -= count;
464362306a36Sopenharmony_ci		}
464462306a36Sopenharmony_ci
464562306a36Sopenharmony_ci		/*
464662306a36Sopenharmony_ci		 * If any of the mode ref is not 0 after
464762306a36Sopenharmony_ci		 * decreased, that means it is still opened
464862306a36Sopenharmony_ci		 * by others. Just skip checking the PIN ref.
464962306a36Sopenharmony_ci		 */
465062306a36Sopenharmony_ci		if (i && ci->i_nr_by_mode[i])
465162306a36Sopenharmony_ci			is_closed = false;
465262306a36Sopenharmony_ci	}
465362306a36Sopenharmony_ci
465462306a36Sopenharmony_ci	if (is_closed)
465562306a36Sopenharmony_ci		percpu_counter_dec(&mdsc->metric.opened_inodes);
465662306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
465762306a36Sopenharmony_ci}
465862306a36Sopenharmony_ci
465962306a36Sopenharmony_ci/*
466062306a36Sopenharmony_ci * For a soon-to-be unlinked file, drop the LINK caps. If it
466162306a36Sopenharmony_ci * looks like the link count will hit 0, drop any other caps (other
466262306a36Sopenharmony_ci * than PIN) we don't specifically want (due to the file still being
466362306a36Sopenharmony_ci * open).
466462306a36Sopenharmony_ci */
466562306a36Sopenharmony_ciint ceph_drop_caps_for_unlink(struct inode *inode)
466662306a36Sopenharmony_ci{
466762306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
466862306a36Sopenharmony_ci	int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
466962306a36Sopenharmony_ci
467062306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
467162306a36Sopenharmony_ci	if (inode->i_nlink == 1) {
467262306a36Sopenharmony_ci		drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
467362306a36Sopenharmony_ci
467462306a36Sopenharmony_ci		if (__ceph_caps_dirty(ci)) {
467562306a36Sopenharmony_ci			struct ceph_mds_client *mdsc =
467662306a36Sopenharmony_ci				ceph_inode_to_client(inode)->mdsc;
467762306a36Sopenharmony_ci			__cap_delay_requeue_front(mdsc, ci);
467862306a36Sopenharmony_ci		}
467962306a36Sopenharmony_ci	}
468062306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
468162306a36Sopenharmony_ci	return drop;
468262306a36Sopenharmony_ci}
468362306a36Sopenharmony_ci
468462306a36Sopenharmony_ci/*
468562306a36Sopenharmony_ci * Helpers for embedding cap and dentry lease releases into mds
468662306a36Sopenharmony_ci * requests.
468762306a36Sopenharmony_ci *
468862306a36Sopenharmony_ci * @force is used by dentry_release (below) to force inclusion of a
468962306a36Sopenharmony_ci * record for the directory inode, even when there aren't any caps to
469062306a36Sopenharmony_ci * drop.
469162306a36Sopenharmony_ci */
469262306a36Sopenharmony_ciint ceph_encode_inode_release(void **p, struct inode *inode,
469362306a36Sopenharmony_ci			      int mds, int drop, int unless, int force)
469462306a36Sopenharmony_ci{
469562306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
469662306a36Sopenharmony_ci	struct ceph_cap *cap;
469762306a36Sopenharmony_ci	struct ceph_mds_request_release *rel = *p;
469862306a36Sopenharmony_ci	int used, dirty;
469962306a36Sopenharmony_ci	int ret = 0;
470062306a36Sopenharmony_ci
470162306a36Sopenharmony_ci	spin_lock(&ci->i_ceph_lock);
470262306a36Sopenharmony_ci	used = __ceph_caps_used(ci);
470362306a36Sopenharmony_ci	dirty = __ceph_caps_dirty(ci);
470462306a36Sopenharmony_ci
470562306a36Sopenharmony_ci	dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
470662306a36Sopenharmony_ci	     inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
470762306a36Sopenharmony_ci	     ceph_cap_string(unless));
470862306a36Sopenharmony_ci
470962306a36Sopenharmony_ci	/* only drop unused, clean caps */
471062306a36Sopenharmony_ci	drop &= ~(used | dirty);
471162306a36Sopenharmony_ci
471262306a36Sopenharmony_ci	cap = __get_cap_for_mds(ci, mds);
471362306a36Sopenharmony_ci	if (cap && __cap_is_valid(cap)) {
471462306a36Sopenharmony_ci		unless &= cap->issued;
471562306a36Sopenharmony_ci		if (unless) {
471662306a36Sopenharmony_ci			if (unless & CEPH_CAP_AUTH_EXCL)
471762306a36Sopenharmony_ci				drop &= ~CEPH_CAP_AUTH_SHARED;
471862306a36Sopenharmony_ci			if (unless & CEPH_CAP_LINK_EXCL)
471962306a36Sopenharmony_ci				drop &= ~CEPH_CAP_LINK_SHARED;
472062306a36Sopenharmony_ci			if (unless & CEPH_CAP_XATTR_EXCL)
472162306a36Sopenharmony_ci				drop &= ~CEPH_CAP_XATTR_SHARED;
472262306a36Sopenharmony_ci			if (unless & CEPH_CAP_FILE_EXCL)
472362306a36Sopenharmony_ci				drop &= ~CEPH_CAP_FILE_SHARED;
472462306a36Sopenharmony_ci		}
472562306a36Sopenharmony_ci
472662306a36Sopenharmony_ci		if (force || (cap->issued & drop)) {
472762306a36Sopenharmony_ci			if (cap->issued & drop) {
472862306a36Sopenharmony_ci				int wanted = __ceph_caps_wanted(ci);
472962306a36Sopenharmony_ci				dout("encode_inode_release %p cap %p "
473062306a36Sopenharmony_ci				     "%s -> %s, wanted %s -> %s\n", inode, cap,
473162306a36Sopenharmony_ci				     ceph_cap_string(cap->issued),
473262306a36Sopenharmony_ci				     ceph_cap_string(cap->issued & ~drop),
473362306a36Sopenharmony_ci				     ceph_cap_string(cap->mds_wanted),
473462306a36Sopenharmony_ci				     ceph_cap_string(wanted));
473562306a36Sopenharmony_ci
473662306a36Sopenharmony_ci				cap->issued &= ~drop;
473762306a36Sopenharmony_ci				cap->implemented &= ~drop;
473862306a36Sopenharmony_ci				cap->mds_wanted = wanted;
473962306a36Sopenharmony_ci				if (cap == ci->i_auth_cap &&
474062306a36Sopenharmony_ci				    !(wanted & CEPH_CAP_ANY_FILE_WR))
474162306a36Sopenharmony_ci					ci->i_requested_max_size = 0;
474262306a36Sopenharmony_ci			} else {
474362306a36Sopenharmony_ci				dout("encode_inode_release %p cap %p %s"
474462306a36Sopenharmony_ci				     " (force)\n", inode, cap,
474562306a36Sopenharmony_ci				     ceph_cap_string(cap->issued));
474662306a36Sopenharmony_ci			}
474762306a36Sopenharmony_ci
474862306a36Sopenharmony_ci			rel->ino = cpu_to_le64(ceph_ino(inode));
474962306a36Sopenharmony_ci			rel->cap_id = cpu_to_le64(cap->cap_id);
475062306a36Sopenharmony_ci			rel->seq = cpu_to_le32(cap->seq);
475162306a36Sopenharmony_ci			rel->issue_seq = cpu_to_le32(cap->issue_seq);
475262306a36Sopenharmony_ci			rel->mseq = cpu_to_le32(cap->mseq);
475362306a36Sopenharmony_ci			rel->caps = cpu_to_le32(cap->implemented);
475462306a36Sopenharmony_ci			rel->wanted = cpu_to_le32(cap->mds_wanted);
475562306a36Sopenharmony_ci			rel->dname_len = 0;
475662306a36Sopenharmony_ci			rel->dname_seq = 0;
475762306a36Sopenharmony_ci			*p += sizeof(*rel);
475862306a36Sopenharmony_ci			ret = 1;
475962306a36Sopenharmony_ci		} else {
476062306a36Sopenharmony_ci			dout("encode_inode_release %p cap %p %s (noop)\n",
476162306a36Sopenharmony_ci			     inode, cap, ceph_cap_string(cap->issued));
476262306a36Sopenharmony_ci		}
476362306a36Sopenharmony_ci	}
476462306a36Sopenharmony_ci	spin_unlock(&ci->i_ceph_lock);
476562306a36Sopenharmony_ci	return ret;
476662306a36Sopenharmony_ci}
476762306a36Sopenharmony_ci
476862306a36Sopenharmony_ci/**
476962306a36Sopenharmony_ci * ceph_encode_dentry_release - encode a dentry release into an outgoing request
477062306a36Sopenharmony_ci * @p: outgoing request buffer
477162306a36Sopenharmony_ci * @dentry: dentry to release
477262306a36Sopenharmony_ci * @dir: dir to release it from
477362306a36Sopenharmony_ci * @mds: mds that we're speaking to
477462306a36Sopenharmony_ci * @drop: caps being dropped
477562306a36Sopenharmony_ci * @unless: unless we have these caps
477662306a36Sopenharmony_ci *
477762306a36Sopenharmony_ci * Encode a dentry release into an outgoing request buffer. Returns 1 if the
477862306a36Sopenharmony_ci * thing was released, or a negative error code otherwise.
477962306a36Sopenharmony_ci */
478062306a36Sopenharmony_ciint ceph_encode_dentry_release(void **p, struct dentry *dentry,
478162306a36Sopenharmony_ci			       struct inode *dir,
478262306a36Sopenharmony_ci			       int mds, int drop, int unless)
478362306a36Sopenharmony_ci{
478462306a36Sopenharmony_ci	struct ceph_mds_request_release *rel = *p;
478562306a36Sopenharmony_ci	struct ceph_dentry_info *di = ceph_dentry(dentry);
478662306a36Sopenharmony_ci	int force = 0;
478762306a36Sopenharmony_ci	int ret;
478862306a36Sopenharmony_ci
478962306a36Sopenharmony_ci	/* This shouldn't happen */
479062306a36Sopenharmony_ci	BUG_ON(!dir);
479162306a36Sopenharmony_ci
479262306a36Sopenharmony_ci	/*
479362306a36Sopenharmony_ci	 * force an record for the directory caps if we have a dentry lease.
479462306a36Sopenharmony_ci	 * this is racy (can't take i_ceph_lock and d_lock together), but it
479562306a36Sopenharmony_ci	 * doesn't have to be perfect; the mds will revoke anything we don't
479662306a36Sopenharmony_ci	 * release.
479762306a36Sopenharmony_ci	 */
479862306a36Sopenharmony_ci	spin_lock(&dentry->d_lock);
479962306a36Sopenharmony_ci	if (di->lease_session && di->lease_session->s_mds == mds)
480062306a36Sopenharmony_ci		force = 1;
480162306a36Sopenharmony_ci	spin_unlock(&dentry->d_lock);
480262306a36Sopenharmony_ci
480362306a36Sopenharmony_ci	ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
480462306a36Sopenharmony_ci
480562306a36Sopenharmony_ci	spin_lock(&dentry->d_lock);
480662306a36Sopenharmony_ci	if (ret && di->lease_session && di->lease_session->s_mds == mds) {
480762306a36Sopenharmony_ci		dout("encode_dentry_release %p mds%d seq %d\n",
480862306a36Sopenharmony_ci		     dentry, mds, (int)di->lease_seq);
480962306a36Sopenharmony_ci		rel->dname_seq = cpu_to_le32(di->lease_seq);
481062306a36Sopenharmony_ci		__ceph_mdsc_drop_dentry_lease(dentry);
481162306a36Sopenharmony_ci		spin_unlock(&dentry->d_lock);
481262306a36Sopenharmony_ci		if (IS_ENCRYPTED(dir) && fscrypt_has_encryption_key(dir)) {
481362306a36Sopenharmony_ci			int ret2 = ceph_encode_encrypted_fname(dir, dentry, *p);
481462306a36Sopenharmony_ci
481562306a36Sopenharmony_ci			if (ret2 < 0)
481662306a36Sopenharmony_ci				return ret2;
481762306a36Sopenharmony_ci
481862306a36Sopenharmony_ci			rel->dname_len = cpu_to_le32(ret2);
481962306a36Sopenharmony_ci			*p += ret2;
482062306a36Sopenharmony_ci		} else {
482162306a36Sopenharmony_ci			rel->dname_len = cpu_to_le32(dentry->d_name.len);
482262306a36Sopenharmony_ci			memcpy(*p, dentry->d_name.name, dentry->d_name.len);
482362306a36Sopenharmony_ci			*p += dentry->d_name.len;
482462306a36Sopenharmony_ci		}
482562306a36Sopenharmony_ci	} else {
482662306a36Sopenharmony_ci		spin_unlock(&dentry->d_lock);
482762306a36Sopenharmony_ci	}
482862306a36Sopenharmony_ci	return ret;
482962306a36Sopenharmony_ci}
483062306a36Sopenharmony_ci
483162306a36Sopenharmony_cistatic int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
483262306a36Sopenharmony_ci{
483362306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
483462306a36Sopenharmony_ci	struct ceph_cap_snap *capsnap;
483562306a36Sopenharmony_ci	int capsnap_release = 0;
483662306a36Sopenharmony_ci
483762306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
483862306a36Sopenharmony_ci
483962306a36Sopenharmony_ci	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
484062306a36Sopenharmony_ci
484162306a36Sopenharmony_ci	while (!list_empty(&ci->i_cap_snaps)) {
484262306a36Sopenharmony_ci		capsnap = list_first_entry(&ci->i_cap_snaps,
484362306a36Sopenharmony_ci					   struct ceph_cap_snap, ci_item);
484462306a36Sopenharmony_ci		__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
484562306a36Sopenharmony_ci		ceph_put_snap_context(capsnap->context);
484662306a36Sopenharmony_ci		ceph_put_cap_snap(capsnap);
484762306a36Sopenharmony_ci		capsnap_release++;
484862306a36Sopenharmony_ci	}
484962306a36Sopenharmony_ci	wake_up_all(&ci->i_cap_wq);
485062306a36Sopenharmony_ci	wake_up_all(&mdsc->cap_flushing_wq);
485162306a36Sopenharmony_ci	return capsnap_release;
485262306a36Sopenharmony_ci}
485362306a36Sopenharmony_ci
485462306a36Sopenharmony_ciint ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate)
485562306a36Sopenharmony_ci{
485662306a36Sopenharmony_ci	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
485762306a36Sopenharmony_ci	struct ceph_mds_client *mdsc = fsc->mdsc;
485862306a36Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
485962306a36Sopenharmony_ci	bool is_auth;
486062306a36Sopenharmony_ci	bool dirty_dropped = false;
486162306a36Sopenharmony_ci	int iputs = 0;
486262306a36Sopenharmony_ci
486362306a36Sopenharmony_ci	lockdep_assert_held(&ci->i_ceph_lock);
486462306a36Sopenharmony_ci
486562306a36Sopenharmony_ci	dout("removing cap %p, ci is %p, inode is %p\n",
486662306a36Sopenharmony_ci	     cap, ci, &ci->netfs.inode);
486762306a36Sopenharmony_ci
486862306a36Sopenharmony_ci	is_auth = (cap == ci->i_auth_cap);
486962306a36Sopenharmony_ci	__ceph_remove_cap(cap, false);
487062306a36Sopenharmony_ci	if (is_auth) {
487162306a36Sopenharmony_ci		struct ceph_cap_flush *cf;
487262306a36Sopenharmony_ci
487362306a36Sopenharmony_ci		if (ceph_inode_is_shutdown(inode)) {
487462306a36Sopenharmony_ci			if (inode->i_data.nrpages > 0)
487562306a36Sopenharmony_ci				*invalidate = true;
487662306a36Sopenharmony_ci			if (ci->i_wrbuffer_ref > 0)
487762306a36Sopenharmony_ci				mapping_set_error(&inode->i_data, -EIO);
487862306a36Sopenharmony_ci		}
487962306a36Sopenharmony_ci
488062306a36Sopenharmony_ci		spin_lock(&mdsc->cap_dirty_lock);
488162306a36Sopenharmony_ci
488262306a36Sopenharmony_ci		/* trash all of the cap flushes for this inode */
488362306a36Sopenharmony_ci		while (!list_empty(&ci->i_cap_flush_list)) {
488462306a36Sopenharmony_ci			cf = list_first_entry(&ci->i_cap_flush_list,
488562306a36Sopenharmony_ci					      struct ceph_cap_flush, i_list);
488662306a36Sopenharmony_ci			list_del_init(&cf->g_list);
488762306a36Sopenharmony_ci			list_del_init(&cf->i_list);
488862306a36Sopenharmony_ci			if (!cf->is_capsnap)
488962306a36Sopenharmony_ci				ceph_free_cap_flush(cf);
489062306a36Sopenharmony_ci		}
489162306a36Sopenharmony_ci
489262306a36Sopenharmony_ci		if (!list_empty(&ci->i_dirty_item)) {
489362306a36Sopenharmony_ci			pr_warn_ratelimited(
489462306a36Sopenharmony_ci				" dropping dirty %s state for %p %lld\n",
489562306a36Sopenharmony_ci				ceph_cap_string(ci->i_dirty_caps),
489662306a36Sopenharmony_ci				inode, ceph_ino(inode));
489762306a36Sopenharmony_ci			ci->i_dirty_caps = 0;
489862306a36Sopenharmony_ci			list_del_init(&ci->i_dirty_item);
489962306a36Sopenharmony_ci			dirty_dropped = true;
490062306a36Sopenharmony_ci		}
490162306a36Sopenharmony_ci		if (!list_empty(&ci->i_flushing_item)) {
490262306a36Sopenharmony_ci			pr_warn_ratelimited(
490362306a36Sopenharmony_ci				" dropping dirty+flushing %s state for %p %lld\n",
490462306a36Sopenharmony_ci				ceph_cap_string(ci->i_flushing_caps),
490562306a36Sopenharmony_ci				inode, ceph_ino(inode));
490662306a36Sopenharmony_ci			ci->i_flushing_caps = 0;
490762306a36Sopenharmony_ci			list_del_init(&ci->i_flushing_item);
490862306a36Sopenharmony_ci			mdsc->num_cap_flushing--;
490962306a36Sopenharmony_ci			dirty_dropped = true;
491062306a36Sopenharmony_ci		}
491162306a36Sopenharmony_ci		spin_unlock(&mdsc->cap_dirty_lock);
491262306a36Sopenharmony_ci
491362306a36Sopenharmony_ci		if (dirty_dropped) {
491462306a36Sopenharmony_ci			mapping_set_error(inode->i_mapping, -EIO);
491562306a36Sopenharmony_ci
491662306a36Sopenharmony_ci			if (ci->i_wrbuffer_ref_head == 0 &&
491762306a36Sopenharmony_ci			    ci->i_wr_ref == 0 &&
491862306a36Sopenharmony_ci			    ci->i_dirty_caps == 0 &&
491962306a36Sopenharmony_ci			    ci->i_flushing_caps == 0) {
492062306a36Sopenharmony_ci				ceph_put_snap_context(ci->i_head_snapc);
492162306a36Sopenharmony_ci				ci->i_head_snapc = NULL;
492262306a36Sopenharmony_ci			}
492362306a36Sopenharmony_ci		}
492462306a36Sopenharmony_ci
492562306a36Sopenharmony_ci		if (atomic_read(&ci->i_filelock_ref) > 0) {
492662306a36Sopenharmony_ci			/* make further file lock syscall return -EIO */
492762306a36Sopenharmony_ci			ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
492862306a36Sopenharmony_ci			pr_warn_ratelimited(" dropping file locks for %p %lld\n",
492962306a36Sopenharmony_ci					    inode, ceph_ino(inode));
493062306a36Sopenharmony_ci		}
493162306a36Sopenharmony_ci
493262306a36Sopenharmony_ci		if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
493362306a36Sopenharmony_ci			cf = ci->i_prealloc_cap_flush;
493462306a36Sopenharmony_ci			ci->i_prealloc_cap_flush = NULL;
493562306a36Sopenharmony_ci			if (!cf->is_capsnap)
493662306a36Sopenharmony_ci				ceph_free_cap_flush(cf);
493762306a36Sopenharmony_ci		}
493862306a36Sopenharmony_ci
493962306a36Sopenharmony_ci		if (!list_empty(&ci->i_cap_snaps))
494062306a36Sopenharmony_ci			iputs = remove_capsnaps(mdsc, inode);
494162306a36Sopenharmony_ci	}
494262306a36Sopenharmony_ci	if (dirty_dropped)
494362306a36Sopenharmony_ci		++iputs;
494462306a36Sopenharmony_ci	return iputs;
494562306a36Sopenharmony_ci}
4946