xref: /kernel/linux/linux-5.10/fs/nfsd/nfs4state.c (revision 8c2ecf20)
1/*
2*  Copyright (c) 2001 The Regents of the University of Michigan.
3*  All rights reserved.
4*
5*  Kendrick Smith <kmsmith@umich.edu>
6*  Andy Adamson <kandros@umich.edu>
7*
8*  Redistribution and use in source and binary forms, with or without
9*  modification, are permitted provided that the following conditions
10*  are met:
11*
12*  1. Redistributions of source code must retain the above copyright
13*     notice, this list of conditions and the following disclaimer.
14*  2. Redistributions in binary form must reproduce the above copyright
15*     notice, this list of conditions and the following disclaimer in the
16*     documentation and/or other materials provided with the distribution.
17*  3. Neither the name of the University nor the names of its
18*     contributors may be used to endorse or promote products derived
19*     from this software without specific prior written permission.
20*
21*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
22*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
23*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*
33*/
34
35#include <linux/file.h>
36#include <linux/fs.h>
37#include <linux/slab.h>
38#include <linux/namei.h>
39#include <linux/swap.h>
40#include <linux/pagemap.h>
41#include <linux/ratelimit.h>
42#include <linux/sunrpc/svcauth_gss.h>
43#include <linux/sunrpc/addr.h>
44#include <linux/jhash.h>
45#include <linux/string_helpers.h>
46#include "xdr4.h"
47#include "xdr4cb.h"
48#include "vfs.h"
49#include "current_stateid.h"
50
51#include "netns.h"
52#include "pnfs.h"
53#include "filecache.h"
54#include "trace.h"
55
56#define NFSDDBG_FACILITY                NFSDDBG_PROC
57
58#define all_ones {{~0,~0},~0}
59static const stateid_t one_stateid = {
60	.si_generation = ~0,
61	.si_opaque = all_ones,
62};
63static const stateid_t zero_stateid = {
64	/* all fields zero */
65};
66static const stateid_t currentstateid = {
67	.si_generation = 1,
68};
69static const stateid_t close_stateid = {
70	.si_generation = 0xffffffffU,
71};
72
73static u64 current_sessionid = 1;
74
75#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
76#define ONE_STATEID(stateid)  (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
77#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t)))
78#define CLOSE_STATEID(stateid)  (!memcmp((stateid), &close_stateid, sizeof(stateid_t)))
79
80/* forward declarations */
81static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
82static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
83void nfsd4_end_grace(struct nfsd_net *nn);
84static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
85
86/* Locking: */
87
88/*
89 * Currently used for the del_recall_lru and file hash table.  In an
90 * effort to decrease the scope of the client_mutex, this spinlock may
91 * eventually cover more:
92 */
93static DEFINE_SPINLOCK(state_lock);
94
95enum nfsd4_st_mutex_lock_subclass {
96	OPEN_STATEID_MUTEX = 0,
97	LOCK_STATEID_MUTEX = 1,
98};
99
100/*
101 * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for
102 * the refcount on the open stateid to drop.
103 */
104static DECLARE_WAIT_QUEUE_HEAD(close_wq);
105
106/*
107 * A waitqueue where a writer to clients/#/ctl destroying a client can
108 * wait for cl_rpc_users to drop to 0 and then for the client to be
109 * unhashed.
110 */
111static DECLARE_WAIT_QUEUE_HEAD(expiry_wq);
112
113static struct kmem_cache *client_slab;
114static struct kmem_cache *openowner_slab;
115static struct kmem_cache *lockowner_slab;
116static struct kmem_cache *file_slab;
117static struct kmem_cache *stateid_slab;
118static struct kmem_cache *deleg_slab;
119static struct kmem_cache *odstate_slab;
120
121static void free_session(struct nfsd4_session *);
122
123static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
124static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
125
126static bool is_session_dead(struct nfsd4_session *ses)
127{
128	return ses->se_flags & NFS4_SESSION_DEAD;
129}
130
131static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me)
132{
133	if (atomic_read(&ses->se_ref) > ref_held_by_me)
134		return nfserr_jukebox;
135	ses->se_flags |= NFS4_SESSION_DEAD;
136	return nfs_ok;
137}
138
139static bool is_client_expired(struct nfs4_client *clp)
140{
141	return clp->cl_time == 0;
142}
143
144static __be32 get_client_locked(struct nfs4_client *clp)
145{
146	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
147
148	lockdep_assert_held(&nn->client_lock);
149
150	if (is_client_expired(clp))
151		return nfserr_expired;
152	atomic_inc(&clp->cl_rpc_users);
153	return nfs_ok;
154}
155
156/* must be called under the client_lock */
157static inline void
158renew_client_locked(struct nfs4_client *clp)
159{
160	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
161
162	if (is_client_expired(clp)) {
163		WARN_ON(1);
164		printk("%s: client (clientid %08x/%08x) already expired\n",
165			__func__,
166			clp->cl_clientid.cl_boot,
167			clp->cl_clientid.cl_id);
168		return;
169	}
170
171	list_move_tail(&clp->cl_lru, &nn->client_lru);
172	clp->cl_time = ktime_get_boottime_seconds();
173}
174
175static void put_client_renew_locked(struct nfs4_client *clp)
176{
177	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
178
179	lockdep_assert_held(&nn->client_lock);
180
181	if (!atomic_dec_and_test(&clp->cl_rpc_users))
182		return;
183	if (!is_client_expired(clp))
184		renew_client_locked(clp);
185	else
186		wake_up_all(&expiry_wq);
187}
188
189static void put_client_renew(struct nfs4_client *clp)
190{
191	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
192
193	if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock))
194		return;
195	if (!is_client_expired(clp))
196		renew_client_locked(clp);
197	else
198		wake_up_all(&expiry_wq);
199	spin_unlock(&nn->client_lock);
200}
201
202static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses)
203{
204	__be32 status;
205
206	if (is_session_dead(ses))
207		return nfserr_badsession;
208	status = get_client_locked(ses->se_client);
209	if (status)
210		return status;
211	atomic_inc(&ses->se_ref);
212	return nfs_ok;
213}
214
215static void nfsd4_put_session_locked(struct nfsd4_session *ses)
216{
217	struct nfs4_client *clp = ses->se_client;
218	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
219
220	lockdep_assert_held(&nn->client_lock);
221
222	if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses))
223		free_session(ses);
224	put_client_renew_locked(clp);
225}
226
227static void nfsd4_put_session(struct nfsd4_session *ses)
228{
229	struct nfs4_client *clp = ses->se_client;
230	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
231
232	spin_lock(&nn->client_lock);
233	nfsd4_put_session_locked(ses);
234	spin_unlock(&nn->client_lock);
235}
236
237static struct nfsd4_blocked_lock *
238find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
239			struct nfsd_net *nn)
240{
241	struct nfsd4_blocked_lock *cur, *found = NULL;
242
243	spin_lock(&nn->blocked_locks_lock);
244	list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
245		if (fh_match(fh, &cur->nbl_fh)) {
246			list_del_init(&cur->nbl_list);
247			list_del_init(&cur->nbl_lru);
248			found = cur;
249			break;
250		}
251	}
252	spin_unlock(&nn->blocked_locks_lock);
253	if (found)
254		locks_delete_block(&found->nbl_lock);
255	return found;
256}
257
258static struct nfsd4_blocked_lock *
259find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
260			struct nfsd_net *nn)
261{
262	struct nfsd4_blocked_lock *nbl;
263
264	nbl = find_blocked_lock(lo, fh, nn);
265	if (!nbl) {
266		nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
267		if (nbl) {
268			INIT_LIST_HEAD(&nbl->nbl_list);
269			INIT_LIST_HEAD(&nbl->nbl_lru);
270			fh_copy_shallow(&nbl->nbl_fh, fh);
271			locks_init_lock(&nbl->nbl_lock);
272			nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
273					&nfsd4_cb_notify_lock_ops,
274					NFSPROC4_CLNT_CB_NOTIFY_LOCK);
275		}
276	}
277	return nbl;
278}
279
280static void
281free_blocked_lock(struct nfsd4_blocked_lock *nbl)
282{
283	locks_delete_block(&nbl->nbl_lock);
284	locks_release_private(&nbl->nbl_lock);
285	kfree(nbl);
286}
287
288static void
289remove_blocked_locks(struct nfs4_lockowner *lo)
290{
291	struct nfs4_client *clp = lo->lo_owner.so_client;
292	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
293	struct nfsd4_blocked_lock *nbl;
294	LIST_HEAD(reaplist);
295
296	/* Dequeue all blocked locks */
297	spin_lock(&nn->blocked_locks_lock);
298	while (!list_empty(&lo->lo_blocked)) {
299		nbl = list_first_entry(&lo->lo_blocked,
300					struct nfsd4_blocked_lock,
301					nbl_list);
302		list_del_init(&nbl->nbl_list);
303		list_move(&nbl->nbl_lru, &reaplist);
304	}
305	spin_unlock(&nn->blocked_locks_lock);
306
307	/* Now free them */
308	while (!list_empty(&reaplist)) {
309		nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
310					nbl_lru);
311		list_del_init(&nbl->nbl_lru);
312		free_blocked_lock(nbl);
313	}
314}
315
316static void
317nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
318{
319	struct nfsd4_blocked_lock	*nbl = container_of(cb,
320						struct nfsd4_blocked_lock, nbl_cb);
321	locks_delete_block(&nbl->nbl_lock);
322}
323
324static int
325nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
326{
327	/*
328	 * Since this is just an optimization, we don't try very hard if it
329	 * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
330	 * just quit trying on anything else.
331	 */
332	switch (task->tk_status) {
333	case -NFS4ERR_DELAY:
334		rpc_delay(task, 1 * HZ);
335		return 0;
336	default:
337		return 1;
338	}
339}
340
341static void
342nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
343{
344	struct nfsd4_blocked_lock	*nbl = container_of(cb,
345						struct nfsd4_blocked_lock, nbl_cb);
346
347	free_blocked_lock(nbl);
348}
349
350static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
351	.prepare	= nfsd4_cb_notify_lock_prepare,
352	.done		= nfsd4_cb_notify_lock_done,
353	.release	= nfsd4_cb_notify_lock_release,
354};
355
356static inline struct nfs4_stateowner *
357nfs4_get_stateowner(struct nfs4_stateowner *sop)
358{
359	atomic_inc(&sop->so_count);
360	return sop;
361}
362
363static int
364same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner)
365{
366	return (sop->so_owner.len == owner->len) &&
367		0 == memcmp(sop->so_owner.data, owner->data, owner->len);
368}
369
370static struct nfs4_openowner *
371find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
372			struct nfs4_client *clp)
373{
374	struct nfs4_stateowner *so;
375
376	lockdep_assert_held(&clp->cl_lock);
377
378	list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[hashval],
379			    so_strhash) {
380		if (!so->so_is_open_owner)
381			continue;
382		if (same_owner_str(so, &open->op_owner))
383			return openowner(nfs4_get_stateowner(so));
384	}
385	return NULL;
386}
387
388static struct nfs4_openowner *
389find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
390			struct nfs4_client *clp)
391{
392	struct nfs4_openowner *oo;
393
394	spin_lock(&clp->cl_lock);
395	oo = find_openstateowner_str_locked(hashval, open, clp);
396	spin_unlock(&clp->cl_lock);
397	return oo;
398}
399
400static inline u32
401opaque_hashval(const void *ptr, int nbytes)
402{
403	unsigned char *cptr = (unsigned char *) ptr;
404
405	u32 x = 0;
406	while (nbytes--) {
407		x *= 37;
408		x += *cptr++;
409	}
410	return x;
411}
412
413static void nfsd4_free_file_rcu(struct rcu_head *rcu)
414{
415	struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu);
416
417	kmem_cache_free(file_slab, fp);
418}
419
420void
421put_nfs4_file(struct nfs4_file *fi)
422{
423	might_lock(&state_lock);
424
425	if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
426		hlist_del_rcu(&fi->fi_hash);
427		spin_unlock(&state_lock);
428		WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
429		WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
430		call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
431	}
432}
433
434static struct nfsd_file *
435__nfs4_get_fd(struct nfs4_file *f, int oflag)
436{
437	if (f->fi_fds[oflag])
438		return nfsd_file_get(f->fi_fds[oflag]);
439	return NULL;
440}
441
442static struct nfsd_file *
443find_writeable_file_locked(struct nfs4_file *f)
444{
445	struct nfsd_file *ret;
446
447	lockdep_assert_held(&f->fi_lock);
448
449	ret = __nfs4_get_fd(f, O_WRONLY);
450	if (!ret)
451		ret = __nfs4_get_fd(f, O_RDWR);
452	return ret;
453}
454
455static struct nfsd_file *
456find_writeable_file(struct nfs4_file *f)
457{
458	struct nfsd_file *ret;
459
460	spin_lock(&f->fi_lock);
461	ret = find_writeable_file_locked(f);
462	spin_unlock(&f->fi_lock);
463
464	return ret;
465}
466
467static struct nfsd_file *
468find_readable_file_locked(struct nfs4_file *f)
469{
470	struct nfsd_file *ret;
471
472	lockdep_assert_held(&f->fi_lock);
473
474	ret = __nfs4_get_fd(f, O_RDONLY);
475	if (!ret)
476		ret = __nfs4_get_fd(f, O_RDWR);
477	return ret;
478}
479
480static struct nfsd_file *
481find_readable_file(struct nfs4_file *f)
482{
483	struct nfsd_file *ret;
484
485	spin_lock(&f->fi_lock);
486	ret = find_readable_file_locked(f);
487	spin_unlock(&f->fi_lock);
488
489	return ret;
490}
491
492struct nfsd_file *
493find_any_file(struct nfs4_file *f)
494{
495	struct nfsd_file *ret;
496
497	if (!f)
498		return NULL;
499	spin_lock(&f->fi_lock);
500	ret = __nfs4_get_fd(f, O_RDWR);
501	if (!ret) {
502		ret = __nfs4_get_fd(f, O_WRONLY);
503		if (!ret)
504			ret = __nfs4_get_fd(f, O_RDONLY);
505	}
506	spin_unlock(&f->fi_lock);
507	return ret;
508}
509
510static struct nfsd_file *find_any_file_locked(struct nfs4_file *f)
511{
512	lockdep_assert_held(&f->fi_lock);
513
514	if (f->fi_fds[O_RDWR])
515		return f->fi_fds[O_RDWR];
516	if (f->fi_fds[O_WRONLY])
517		return f->fi_fds[O_WRONLY];
518	if (f->fi_fds[O_RDONLY])
519		return f->fi_fds[O_RDONLY];
520	return NULL;
521}
522
523static struct nfsd_file *find_deleg_file_locked(struct nfs4_file *f)
524{
525	lockdep_assert_held(&f->fi_lock);
526
527	if (f->fi_deleg_file)
528		return f->fi_deleg_file;
529	return NULL;
530}
531
532static atomic_long_t num_delegations;
533unsigned long max_delegations;
534
535/*
536 * Open owner state (share locks)
537 */
538
539/* hash tables for lock and open owners */
540#define OWNER_HASH_BITS              8
541#define OWNER_HASH_SIZE             (1 << OWNER_HASH_BITS)
542#define OWNER_HASH_MASK             (OWNER_HASH_SIZE - 1)
543
544static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
545{
546	unsigned int ret;
547
548	ret = opaque_hashval(ownername->data, ownername->len);
549	return ret & OWNER_HASH_MASK;
550}
551
552/* hash table for nfs4_file */
553#define FILE_HASH_BITS                   8
554#define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
555
556static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh)
557{
558	return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0);
559}
560
561static unsigned int file_hashval(struct knfsd_fh *fh)
562{
563	return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1);
564}
565
566static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
567
568static void
569__nfs4_file_get_access(struct nfs4_file *fp, u32 access)
570{
571	lockdep_assert_held(&fp->fi_lock);
572
573	if (access & NFS4_SHARE_ACCESS_WRITE)
574		atomic_inc(&fp->fi_access[O_WRONLY]);
575	if (access & NFS4_SHARE_ACCESS_READ)
576		atomic_inc(&fp->fi_access[O_RDONLY]);
577}
578
579static __be32
580nfs4_file_get_access(struct nfs4_file *fp, u32 access)
581{
582	lockdep_assert_held(&fp->fi_lock);
583
584	/* Does this access mode make sense? */
585	if (access & ~NFS4_SHARE_ACCESS_BOTH)
586		return nfserr_inval;
587
588	/* Does it conflict with a deny mode already set? */
589	if ((access & fp->fi_share_deny) != 0)
590		return nfserr_share_denied;
591
592	__nfs4_file_get_access(fp, access);
593	return nfs_ok;
594}
595
596static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny)
597{
598	/* Common case is that there is no deny mode. */
599	if (deny) {
600		/* Does this deny mode make sense? */
601		if (deny & ~NFS4_SHARE_DENY_BOTH)
602			return nfserr_inval;
603
604		if ((deny & NFS4_SHARE_DENY_READ) &&
605		    atomic_read(&fp->fi_access[O_RDONLY]))
606			return nfserr_share_denied;
607
608		if ((deny & NFS4_SHARE_DENY_WRITE) &&
609		    atomic_read(&fp->fi_access[O_WRONLY]))
610			return nfserr_share_denied;
611	}
612	return nfs_ok;
613}
614
615static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
616{
617	might_lock(&fp->fi_lock);
618
619	if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
620		struct nfsd_file *f1 = NULL;
621		struct nfsd_file *f2 = NULL;
622
623		swap(f1, fp->fi_fds[oflag]);
624		if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
625			swap(f2, fp->fi_fds[O_RDWR]);
626		spin_unlock(&fp->fi_lock);
627		if (f1)
628			nfsd_file_put(f1);
629		if (f2)
630			nfsd_file_put(f2);
631	}
632}
633
634static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
635{
636	WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH);
637
638	if (access & NFS4_SHARE_ACCESS_WRITE)
639		__nfs4_file_put_access(fp, O_WRONLY);
640	if (access & NFS4_SHARE_ACCESS_READ)
641		__nfs4_file_put_access(fp, O_RDONLY);
642}
643
644/*
645 * Allocate a new open/delegation state counter. This is needed for
646 * pNFS for proper return on close semantics.
647 *
648 * Note that we only allocate it for pNFS-enabled exports, otherwise
649 * all pointers to struct nfs4_clnt_odstate are always NULL.
650 */
651static struct nfs4_clnt_odstate *
652alloc_clnt_odstate(struct nfs4_client *clp)
653{
654	struct nfs4_clnt_odstate *co;
655
656	co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL);
657	if (co) {
658		co->co_client = clp;
659		refcount_set(&co->co_odcount, 1);
660	}
661	return co;
662}
663
664static void
665hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co)
666{
667	struct nfs4_file *fp = co->co_file;
668
669	lockdep_assert_held(&fp->fi_lock);
670	list_add(&co->co_perfile, &fp->fi_clnt_odstate);
671}
672
673static inline void
674get_clnt_odstate(struct nfs4_clnt_odstate *co)
675{
676	if (co)
677		refcount_inc(&co->co_odcount);
678}
679
680static void
681put_clnt_odstate(struct nfs4_clnt_odstate *co)
682{
683	struct nfs4_file *fp;
684
685	if (!co)
686		return;
687
688	fp = co->co_file;
689	if (refcount_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
690		list_del(&co->co_perfile);
691		spin_unlock(&fp->fi_lock);
692
693		nfsd4_return_all_file_layouts(co->co_client, fp);
694		kmem_cache_free(odstate_slab, co);
695	}
696}
697
698static struct nfs4_clnt_odstate *
699find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new)
700{
701	struct nfs4_clnt_odstate *co;
702	struct nfs4_client *cl;
703
704	if (!new)
705		return NULL;
706
707	cl = new->co_client;
708
709	spin_lock(&fp->fi_lock);
710	list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) {
711		if (co->co_client == cl) {
712			get_clnt_odstate(co);
713			goto out;
714		}
715	}
716	co = new;
717	co->co_file = fp;
718	hash_clnt_odstate_locked(new);
719out:
720	spin_unlock(&fp->fi_lock);
721	return co;
722}
723
724struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
725				  void (*sc_free)(struct nfs4_stid *))
726{
727	struct nfs4_stid *stid;
728	int new_id;
729
730	stid = kmem_cache_zalloc(slab, GFP_KERNEL);
731	if (!stid)
732		return NULL;
733
734	idr_preload(GFP_KERNEL);
735	spin_lock(&cl->cl_lock);
736	/* Reserving 0 for start of file in nfsdfs "states" file: */
737	new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT);
738	spin_unlock(&cl->cl_lock);
739	idr_preload_end();
740	if (new_id < 0)
741		goto out_free;
742
743	stid->sc_free = sc_free;
744	stid->sc_client = cl;
745	stid->sc_stateid.si_opaque.so_id = new_id;
746	stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
747	/* Will be incremented before return to client: */
748	refcount_set(&stid->sc_count, 1);
749	spin_lock_init(&stid->sc_lock);
750	INIT_LIST_HEAD(&stid->sc_cp_list);
751
752	/*
753	 * It shouldn't be a problem to reuse an opaque stateid value.
754	 * I don't think it is for 4.1.  But with 4.0 I worry that, for
755	 * example, a stray write retransmission could be accepted by
756	 * the server when it should have been rejected.  Therefore,
757	 * adopt a trick from the sctp code to attempt to maximize the
758	 * amount of time until an id is reused, by ensuring they always
759	 * "increase" (mod INT_MAX):
760	 */
761	return stid;
762out_free:
763	kmem_cache_free(slab, stid);
764	return NULL;
765}
766
767/*
768 * Create a unique stateid_t to represent each COPY.
769 */
770static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
771			      unsigned char sc_type)
772{
773	int new_id;
774
775	stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
776	stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
777	stid->sc_type = sc_type;
778
779	idr_preload(GFP_KERNEL);
780	spin_lock(&nn->s2s_cp_lock);
781	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
782	stid->stid.si_opaque.so_id = new_id;
783	stid->stid.si_generation = 1;
784	spin_unlock(&nn->s2s_cp_lock);
785	idr_preload_end();
786	if (new_id < 0)
787		return 0;
788	return 1;
789}
790
791int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
792{
793	return nfs4_init_cp_state(nn, &copy->cp_stateid, NFS4_COPY_STID);
794}
795
796struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
797						     struct nfs4_stid *p_stid)
798{
799	struct nfs4_cpntf_state *cps;
800
801	cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
802	if (!cps)
803		return NULL;
804	cps->cpntf_time = ktime_get_boottime_seconds();
805	refcount_set(&cps->cp_stateid.sc_count, 1);
806	if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID))
807		goto out_free;
808	spin_lock(&nn->s2s_cp_lock);
809	list_add(&cps->cp_list, &p_stid->sc_cp_list);
810	spin_unlock(&nn->s2s_cp_lock);
811	return cps;
812out_free:
813	kfree(cps);
814	return NULL;
815}
816
817void nfs4_free_copy_state(struct nfsd4_copy *copy)
818{
819	struct nfsd_net *nn;
820
821	WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID);
822	nn = net_generic(copy->cp_clp->net, nfsd_net_id);
823	spin_lock(&nn->s2s_cp_lock);
824	idr_remove(&nn->s2s_cp_stateids,
825		   copy->cp_stateid.stid.si_opaque.so_id);
826	spin_unlock(&nn->s2s_cp_lock);
827}
828
829static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid)
830{
831	struct nfs4_cpntf_state *cps;
832	struct nfsd_net *nn;
833
834	nn = net_generic(net, nfsd_net_id);
835	spin_lock(&nn->s2s_cp_lock);
836	while (!list_empty(&stid->sc_cp_list)) {
837		cps = list_first_entry(&stid->sc_cp_list,
838				       struct nfs4_cpntf_state, cp_list);
839		_free_cpntf_state_locked(nn, cps);
840	}
841	spin_unlock(&nn->s2s_cp_lock);
842}
843
844static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
845{
846	struct nfs4_stid *stid;
847
848	stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid);
849	if (!stid)
850		return NULL;
851
852	return openlockstateid(stid);
853}
854
855static void nfs4_free_deleg(struct nfs4_stid *stid)
856{
857	WARN_ON(!list_empty(&stid->sc_cp_list));
858	kmem_cache_free(deleg_slab, stid);
859	atomic_long_dec(&num_delegations);
860}
861
862/*
863 * When we recall a delegation, we should be careful not to hand it
864 * out again straight away.
865 * To ensure this we keep a pair of bloom filters ('new' and 'old')
866 * in which the filehandles of recalled delegations are "stored".
867 * If a filehandle appear in either filter, a delegation is blocked.
868 * When a delegation is recalled, the filehandle is stored in the "new"
869 * filter.
870 * Every 30 seconds we swap the filters and clear the "new" one,
871 * unless both are empty of course.
872 *
873 * Each filter is 256 bits.  We hash the filehandle to 32bit and use the
874 * low 3 bytes as hash-table indices.
875 *
876 * 'blocked_delegations_lock', which is always taken in block_delegations(),
877 * is used to manage concurrent access.  Testing does not need the lock
878 * except when swapping the two filters.
879 */
880static DEFINE_SPINLOCK(blocked_delegations_lock);
881static struct bloom_pair {
882	int	entries, old_entries;
883	time64_t swap_time;
884	int	new; /* index into 'set' */
885	DECLARE_BITMAP(set[2], 256);
886} blocked_delegations;
887
888static int delegation_blocked(struct knfsd_fh *fh)
889{
890	u32 hash;
891	struct bloom_pair *bd = &blocked_delegations;
892
893	if (bd->entries == 0)
894		return 0;
895	if (ktime_get_seconds() - bd->swap_time > 30) {
896		spin_lock(&blocked_delegations_lock);
897		if (ktime_get_seconds() - bd->swap_time > 30) {
898			bd->entries -= bd->old_entries;
899			bd->old_entries = bd->entries;
900			memset(bd->set[bd->new], 0,
901			       sizeof(bd->set[0]));
902			bd->new = 1-bd->new;
903			bd->swap_time = ktime_get_seconds();
904		}
905		spin_unlock(&blocked_delegations_lock);
906	}
907	hash = jhash(&fh->fh_base, fh->fh_size, 0);
908	if (test_bit(hash&255, bd->set[0]) &&
909	    test_bit((hash>>8)&255, bd->set[0]) &&
910	    test_bit((hash>>16)&255, bd->set[0]))
911		return 1;
912
913	if (test_bit(hash&255, bd->set[1]) &&
914	    test_bit((hash>>8)&255, bd->set[1]) &&
915	    test_bit((hash>>16)&255, bd->set[1]))
916		return 1;
917
918	return 0;
919}
920
921static void block_delegations(struct knfsd_fh *fh)
922{
923	u32 hash;
924	struct bloom_pair *bd = &blocked_delegations;
925
926	hash = jhash(&fh->fh_base, fh->fh_size, 0);
927
928	spin_lock(&blocked_delegations_lock);
929	__set_bit(hash&255, bd->set[bd->new]);
930	__set_bit((hash>>8)&255, bd->set[bd->new]);
931	__set_bit((hash>>16)&255, bd->set[bd->new]);
932	if (bd->entries == 0)
933		bd->swap_time = ktime_get_seconds();
934	bd->entries += 1;
935	spin_unlock(&blocked_delegations_lock);
936}
937
938static struct nfs4_delegation *
939alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
940		 struct svc_fh *current_fh,
941		 struct nfs4_clnt_odstate *odstate)
942{
943	struct nfs4_delegation *dp;
944	long n;
945
946	dprintk("NFSD alloc_init_deleg\n");
947	n = atomic_long_inc_return(&num_delegations);
948	if (n < 0 || n > max_delegations)
949		goto out_dec;
950	if (delegation_blocked(&current_fh->fh_handle))
951		goto out_dec;
952	dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg));
953	if (dp == NULL)
954		goto out_dec;
955
956	/*
957	 * delegation seqid's are never incremented.  The 4.1 special
958	 * meaning of seqid 0 isn't meaningful, really, but let's avoid
959	 * 0 anyway just for consistency and use 1:
960	 */
961	dp->dl_stid.sc_stateid.si_generation = 1;
962	INIT_LIST_HEAD(&dp->dl_perfile);
963	INIT_LIST_HEAD(&dp->dl_perclnt);
964	INIT_LIST_HEAD(&dp->dl_recall_lru);
965	dp->dl_clnt_odstate = odstate;
966	get_clnt_odstate(odstate);
967	dp->dl_type = NFS4_OPEN_DELEGATE_READ;
968	dp->dl_retries = 1;
969	nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
970		      &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
971	get_nfs4_file(fp);
972	dp->dl_stid.sc_file = fp;
973	return dp;
974out_dec:
975	atomic_long_dec(&num_delegations);
976	return NULL;
977}
978
979void
980nfs4_put_stid(struct nfs4_stid *s)
981{
982	struct nfs4_file *fp = s->sc_file;
983	struct nfs4_client *clp = s->sc_client;
984
985	might_lock(&clp->cl_lock);
986
987	if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
988		wake_up_all(&close_wq);
989		return;
990	}
991	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
992	nfs4_free_cpntf_statelist(clp->net, s);
993	spin_unlock(&clp->cl_lock);
994	s->sc_free(s);
995	if (fp)
996		put_nfs4_file(fp);
997}
998
999void
1000nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
1001{
1002	stateid_t *src = &stid->sc_stateid;
1003
1004	spin_lock(&stid->sc_lock);
1005	if (unlikely(++src->si_generation == 0))
1006		src->si_generation = 1;
1007	memcpy(dst, src, sizeof(*dst));
1008	spin_unlock(&stid->sc_lock);
1009}
1010
1011static void put_deleg_file(struct nfs4_file *fp)
1012{
1013	struct nfsd_file *nf = NULL;
1014
1015	spin_lock(&fp->fi_lock);
1016	if (--fp->fi_delegees == 0)
1017		swap(nf, fp->fi_deleg_file);
1018	spin_unlock(&fp->fi_lock);
1019
1020	if (nf)
1021		nfsd_file_put(nf);
1022}
1023
1024static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
1025{
1026	struct nfs4_file *fp = dp->dl_stid.sc_file;
1027	struct nfsd_file *nf = fp->fi_deleg_file;
1028
1029	WARN_ON_ONCE(!fp->fi_delegees);
1030
1031	vfs_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
1032	put_deleg_file(fp);
1033}
1034
1035static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
1036{
1037	put_clnt_odstate(dp->dl_clnt_odstate);
1038	nfs4_unlock_deleg_lease(dp);
1039	nfs4_put_stid(&dp->dl_stid);
1040}
1041
1042void nfs4_unhash_stid(struct nfs4_stid *s)
1043{
1044	s->sc_type = 0;
1045}
1046
1047/**
1048 * nfs4_delegation_exists - Discover if this delegation already exists
1049 * @clp:     a pointer to the nfs4_client we're granting a delegation to
1050 * @fp:      a pointer to the nfs4_file we're granting a delegation on
1051 *
1052 * Return:
1053 *      On success: true iff an existing delegation is found
1054 */
1055
1056static bool
1057nfs4_delegation_exists(struct nfs4_client *clp, struct nfs4_file *fp)
1058{
1059	struct nfs4_delegation *searchdp = NULL;
1060	struct nfs4_client *searchclp = NULL;
1061
1062	lockdep_assert_held(&state_lock);
1063	lockdep_assert_held(&fp->fi_lock);
1064
1065	list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
1066		searchclp = searchdp->dl_stid.sc_client;
1067		if (clp == searchclp) {
1068			return true;
1069		}
1070	}
1071	return false;
1072}
1073
1074/**
1075 * hash_delegation_locked - Add a delegation to the appropriate lists
1076 * @dp:     a pointer to the nfs4_delegation we are adding.
1077 * @fp:     a pointer to the nfs4_file we're granting a delegation on
1078 *
1079 * Return:
1080 *      On success: NULL if the delegation was successfully hashed.
1081 *
1082 *      On error: -EAGAIN if one was previously granted to this
1083 *                 nfs4_client for this nfs4_file. Delegation is not hashed.
1084 *
1085 */
1086
1087static int
1088hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
1089{
1090	struct nfs4_client *clp = dp->dl_stid.sc_client;
1091
1092	lockdep_assert_held(&state_lock);
1093	lockdep_assert_held(&fp->fi_lock);
1094
1095	if (nfs4_delegation_exists(clp, fp))
1096		return -EAGAIN;
1097	refcount_inc(&dp->dl_stid.sc_count);
1098	dp->dl_stid.sc_type = NFS4_DELEG_STID;
1099	list_add(&dp->dl_perfile, &fp->fi_delegations);
1100	list_add(&dp->dl_perclnt, &clp->cl_delegations);
1101	return 0;
1102}
1103
1104static bool delegation_hashed(struct nfs4_delegation *dp)
1105{
1106	return !(list_empty(&dp->dl_perfile));
1107}
1108
1109static bool
1110unhash_delegation_locked(struct nfs4_delegation *dp)
1111{
1112	struct nfs4_file *fp = dp->dl_stid.sc_file;
1113
1114	lockdep_assert_held(&state_lock);
1115
1116	if (!delegation_hashed(dp))
1117		return false;
1118
1119	dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
1120	/* Ensure that deleg break won't try to requeue it */
1121	++dp->dl_time;
1122	spin_lock(&fp->fi_lock);
1123	list_del_init(&dp->dl_perclnt);
1124	list_del_init(&dp->dl_recall_lru);
1125	list_del_init(&dp->dl_perfile);
1126	spin_unlock(&fp->fi_lock);
1127	return true;
1128}
1129
1130static void destroy_delegation(struct nfs4_delegation *dp)
1131{
1132	bool unhashed;
1133
1134	spin_lock(&state_lock);
1135	unhashed = unhash_delegation_locked(dp);
1136	spin_unlock(&state_lock);
1137	if (unhashed)
1138		destroy_unhashed_deleg(dp);
1139}
1140
1141static void revoke_delegation(struct nfs4_delegation *dp)
1142{
1143	struct nfs4_client *clp = dp->dl_stid.sc_client;
1144
1145	WARN_ON(!list_empty(&dp->dl_recall_lru));
1146
1147	if (clp->cl_minorversion) {
1148		spin_lock(&clp->cl_lock);
1149		dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
1150		refcount_inc(&dp->dl_stid.sc_count);
1151		list_add(&dp->dl_recall_lru, &clp->cl_revoked);
1152		spin_unlock(&clp->cl_lock);
1153	}
1154	destroy_unhashed_deleg(dp);
1155}
1156
1157/*
1158 * SETCLIENTID state
1159 */
1160
1161static unsigned int clientid_hashval(u32 id)
1162{
1163	return id & CLIENT_HASH_MASK;
1164}
1165
1166static unsigned int clientstr_hashval(struct xdr_netobj name)
1167{
1168	return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK;
1169}
1170
1171/*
1172 * We store the NONE, READ, WRITE, and BOTH bits separately in the
1173 * st_{access,deny}_bmap field of the stateid, in order to track not
1174 * only what share bits are currently in force, but also what
1175 * combinations of share bits previous opens have used.  This allows us
1176 * to enforce the recommendation of rfc 3530 14.2.19 that the server
1177 * return an error if the client attempt to downgrade to a combination
1178 * of share bits not explicable by closing some of its previous opens.
1179 *
1180 * XXX: This enforcement is actually incomplete, since we don't keep
1181 * track of access/deny bit combinations; so, e.g., we allow:
1182 *
1183 *	OPEN allow read, deny write
1184 *	OPEN allow both, deny none
1185 *	DOWNGRADE allow read, deny none
1186 *
1187 * which we should reject.
1188 */
1189static unsigned int
1190bmap_to_share_mode(unsigned long bmap) {
1191	int i;
1192	unsigned int access = 0;
1193
1194	for (i = 1; i < 4; i++) {
1195		if (test_bit(i, &bmap))
1196			access |= i;
1197	}
1198	return access;
1199}
1200
1201/* set share access for a given stateid */
1202static inline void
1203set_access(u32 access, struct nfs4_ol_stateid *stp)
1204{
1205	unsigned char mask = 1 << access;
1206
1207	WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
1208	stp->st_access_bmap |= mask;
1209}
1210
1211/* clear share access for a given stateid */
1212static inline void
1213clear_access(u32 access, struct nfs4_ol_stateid *stp)
1214{
1215	unsigned char mask = 1 << access;
1216
1217	WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
1218	stp->st_access_bmap &= ~mask;
1219}
1220
1221/* test whether a given stateid has access */
1222static inline bool
1223test_access(u32 access, struct nfs4_ol_stateid *stp)
1224{
1225	unsigned char mask = 1 << access;
1226
1227	return (bool)(stp->st_access_bmap & mask);
1228}
1229
1230/* set share deny for a given stateid */
1231static inline void
1232set_deny(u32 deny, struct nfs4_ol_stateid *stp)
1233{
1234	unsigned char mask = 1 << deny;
1235
1236	WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
1237	stp->st_deny_bmap |= mask;
1238}
1239
1240/* clear share deny for a given stateid */
1241static inline void
1242clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
1243{
1244	unsigned char mask = 1 << deny;
1245
1246	WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
1247	stp->st_deny_bmap &= ~mask;
1248}
1249
1250/* test whether a given stateid is denying specific access */
1251static inline bool
1252test_deny(u32 deny, struct nfs4_ol_stateid *stp)
1253{
1254	unsigned char mask = 1 << deny;
1255
1256	return (bool)(stp->st_deny_bmap & mask);
1257}
1258
1259static int nfs4_access_to_omode(u32 access)
1260{
1261	switch (access & NFS4_SHARE_ACCESS_BOTH) {
1262	case NFS4_SHARE_ACCESS_READ:
1263		return O_RDONLY;
1264	case NFS4_SHARE_ACCESS_WRITE:
1265		return O_WRONLY;
1266	case NFS4_SHARE_ACCESS_BOTH:
1267		return O_RDWR;
1268	}
1269	WARN_ON_ONCE(1);
1270	return O_RDONLY;
1271}
1272
1273/*
1274 * A stateid that had a deny mode associated with it is being released
1275 * or downgraded. Recalculate the deny mode on the file.
1276 */
1277static void
1278recalculate_deny_mode(struct nfs4_file *fp)
1279{
1280	struct nfs4_ol_stateid *stp;
1281
1282	spin_lock(&fp->fi_lock);
1283	fp->fi_share_deny = 0;
1284	list_for_each_entry(stp, &fp->fi_stateids, st_perfile)
1285		fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap);
1286	spin_unlock(&fp->fi_lock);
1287}
1288
1289static void
1290reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp)
1291{
1292	int i;
1293	bool change = false;
1294
1295	for (i = 1; i < 4; i++) {
1296		if ((i & deny) != i) {
1297			change = true;
1298			clear_deny(i, stp);
1299		}
1300	}
1301
1302	/* Recalculate per-file deny mode if there was a change */
1303	if (change)
1304		recalculate_deny_mode(stp->st_stid.sc_file);
1305}
1306
1307/* release all access and file references for a given stateid */
1308static void
1309release_all_access(struct nfs4_ol_stateid *stp)
1310{
1311	int i;
1312	struct nfs4_file *fp = stp->st_stid.sc_file;
1313
1314	if (fp && stp->st_deny_bmap != 0)
1315		recalculate_deny_mode(fp);
1316
1317	for (i = 1; i < 4; i++) {
1318		if (test_access(i, stp))
1319			nfs4_file_put_access(stp->st_stid.sc_file, i);
1320		clear_access(i, stp);
1321	}
1322}
1323
1324static inline void nfs4_free_stateowner(struct nfs4_stateowner *sop)
1325{
1326	kfree(sop->so_owner.data);
1327	sop->so_ops->so_free(sop);
1328}
1329
1330static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
1331{
1332	struct nfs4_client *clp = sop->so_client;
1333
1334	might_lock(&clp->cl_lock);
1335
1336	if (!atomic_dec_and_lock(&sop->so_count, &clp->cl_lock))
1337		return;
1338	sop->so_ops->so_unhash(sop);
1339	spin_unlock(&clp->cl_lock);
1340	nfs4_free_stateowner(sop);
1341}
1342
1343static bool
1344nfs4_ol_stateid_unhashed(const struct nfs4_ol_stateid *stp)
1345{
1346	return list_empty(&stp->st_perfile);
1347}
1348
1349static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp)
1350{
1351	struct nfs4_file *fp = stp->st_stid.sc_file;
1352
1353	lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock);
1354
1355	if (list_empty(&stp->st_perfile))
1356		return false;
1357
1358	spin_lock(&fp->fi_lock);
1359	list_del_init(&stp->st_perfile);
1360	spin_unlock(&fp->fi_lock);
1361	list_del(&stp->st_perstateowner);
1362	return true;
1363}
1364
1365static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
1366{
1367	struct nfs4_ol_stateid *stp = openlockstateid(stid);
1368
1369	put_clnt_odstate(stp->st_clnt_odstate);
1370	release_all_access(stp);
1371	if (stp->st_stateowner)
1372		nfs4_put_stateowner(stp->st_stateowner);
1373	WARN_ON(!list_empty(&stid->sc_cp_list));
1374	kmem_cache_free(stateid_slab, stid);
1375}
1376
1377static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
1378{
1379	struct nfs4_ol_stateid *stp = openlockstateid(stid);
1380	struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
1381	struct nfsd_file *nf;
1382
1383	nf = find_any_file(stp->st_stid.sc_file);
1384	if (nf) {
1385		get_file(nf->nf_file);
1386		filp_close(nf->nf_file, (fl_owner_t)lo);
1387		nfsd_file_put(nf);
1388	}
1389	nfs4_free_ol_stateid(stid);
1390}
1391
1392/*
1393 * Put the persistent reference to an already unhashed generic stateid, while
1394 * holding the cl_lock. If it's the last reference, then put it onto the
1395 * reaplist for later destruction.
1396 */
1397static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
1398				       struct list_head *reaplist)
1399{
1400	struct nfs4_stid *s = &stp->st_stid;
1401	struct nfs4_client *clp = s->sc_client;
1402
1403	lockdep_assert_held(&clp->cl_lock);
1404
1405	WARN_ON_ONCE(!list_empty(&stp->st_locks));
1406
1407	if (!refcount_dec_and_test(&s->sc_count)) {
1408		wake_up_all(&close_wq);
1409		return;
1410	}
1411
1412	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
1413	list_add(&stp->st_locks, reaplist);
1414}
1415
1416static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
1417{
1418	lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
1419
1420	if (!unhash_ol_stateid(stp))
1421		return false;
1422	list_del_init(&stp->st_locks);
1423	nfs4_unhash_stid(&stp->st_stid);
1424	return true;
1425}
1426
1427static void release_lock_stateid(struct nfs4_ol_stateid *stp)
1428{
1429	struct nfs4_client *clp = stp->st_stid.sc_client;
1430	bool unhashed;
1431
1432	spin_lock(&clp->cl_lock);
1433	unhashed = unhash_lock_stateid(stp);
1434	spin_unlock(&clp->cl_lock);
1435	if (unhashed)
1436		nfs4_put_stid(&stp->st_stid);
1437}
1438
1439static void unhash_lockowner_locked(struct nfs4_lockowner *lo)
1440{
1441	struct nfs4_client *clp = lo->lo_owner.so_client;
1442
1443	lockdep_assert_held(&clp->cl_lock);
1444
1445	list_del_init(&lo->lo_owner.so_strhash);
1446}
1447
1448/*
1449 * Free a list of generic stateids that were collected earlier after being
1450 * fully unhashed.
1451 */
1452static void
1453free_ol_stateid_reaplist(struct list_head *reaplist)
1454{
1455	struct nfs4_ol_stateid *stp;
1456	struct nfs4_file *fp;
1457
1458	might_sleep();
1459
1460	while (!list_empty(reaplist)) {
1461		stp = list_first_entry(reaplist, struct nfs4_ol_stateid,
1462				       st_locks);
1463		list_del(&stp->st_locks);
1464		fp = stp->st_stid.sc_file;
1465		stp->st_stid.sc_free(&stp->st_stid);
1466		if (fp)
1467			put_nfs4_file(fp);
1468	}
1469}
1470
1471static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
1472				       struct list_head *reaplist)
1473{
1474	struct nfs4_ol_stateid *stp;
1475
1476	lockdep_assert_held(&open_stp->st_stid.sc_client->cl_lock);
1477
1478	while (!list_empty(&open_stp->st_locks)) {
1479		stp = list_entry(open_stp->st_locks.next,
1480				struct nfs4_ol_stateid, st_locks);
1481		WARN_ON(!unhash_lock_stateid(stp));
1482		put_ol_stateid_locked(stp, reaplist);
1483	}
1484}
1485
1486static bool unhash_open_stateid(struct nfs4_ol_stateid *stp,
1487				struct list_head *reaplist)
1488{
1489	lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
1490
1491	if (!unhash_ol_stateid(stp))
1492		return false;
1493	release_open_stateid_locks(stp, reaplist);
1494	return true;
1495}
1496
1497static void release_open_stateid(struct nfs4_ol_stateid *stp)
1498{
1499	LIST_HEAD(reaplist);
1500
1501	spin_lock(&stp->st_stid.sc_client->cl_lock);
1502	if (unhash_open_stateid(stp, &reaplist))
1503		put_ol_stateid_locked(stp, &reaplist);
1504	spin_unlock(&stp->st_stid.sc_client->cl_lock);
1505	free_ol_stateid_reaplist(&reaplist);
1506}
1507
1508static void unhash_openowner_locked(struct nfs4_openowner *oo)
1509{
1510	struct nfs4_client *clp = oo->oo_owner.so_client;
1511
1512	lockdep_assert_held(&clp->cl_lock);
1513
1514	list_del_init(&oo->oo_owner.so_strhash);
1515	list_del_init(&oo->oo_perclient);
1516}
1517
1518static void release_last_closed_stateid(struct nfs4_openowner *oo)
1519{
1520	struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net,
1521					  nfsd_net_id);
1522	struct nfs4_ol_stateid *s;
1523
1524	spin_lock(&nn->client_lock);
1525	s = oo->oo_last_closed_stid;
1526	if (s) {
1527		list_del_init(&oo->oo_close_lru);
1528		oo->oo_last_closed_stid = NULL;
1529	}
1530	spin_unlock(&nn->client_lock);
1531	if (s)
1532		nfs4_put_stid(&s->st_stid);
1533}
1534
1535static void release_openowner(struct nfs4_openowner *oo)
1536{
1537	struct nfs4_ol_stateid *stp;
1538	struct nfs4_client *clp = oo->oo_owner.so_client;
1539	struct list_head reaplist;
1540
1541	INIT_LIST_HEAD(&reaplist);
1542
1543	spin_lock(&clp->cl_lock);
1544	unhash_openowner_locked(oo);
1545	while (!list_empty(&oo->oo_owner.so_stateids)) {
1546		stp = list_first_entry(&oo->oo_owner.so_stateids,
1547				struct nfs4_ol_stateid, st_perstateowner);
1548		if (unhash_open_stateid(stp, &reaplist))
1549			put_ol_stateid_locked(stp, &reaplist);
1550	}
1551	spin_unlock(&clp->cl_lock);
1552	free_ol_stateid_reaplist(&reaplist);
1553	release_last_closed_stateid(oo);
1554	nfs4_put_stateowner(&oo->oo_owner);
1555}
1556
1557static inline int
1558hash_sessionid(struct nfs4_sessionid *sessionid)
1559{
1560	struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid;
1561
1562	return sid->sequence % SESSION_HASH_SIZE;
1563}
1564
1565#ifdef CONFIG_SUNRPC_DEBUG
1566static inline void
1567dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
1568{
1569	u32 *ptr = (u32 *)(&sessionid->data[0]);
1570	dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]);
1571}
1572#else
1573static inline void
1574dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
1575{
1576}
1577#endif
1578
1579/*
1580 * Bump the seqid on cstate->replay_owner, and clear replay_owner if it
1581 * won't be used for replay.
1582 */
1583void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr)
1584{
1585	struct nfs4_stateowner *so = cstate->replay_owner;
1586
1587	if (nfserr == nfserr_replay_me)
1588		return;
1589
1590	if (!seqid_mutating_err(ntohl(nfserr))) {
1591		nfsd4_cstate_clear_replay(cstate);
1592		return;
1593	}
1594	if (!so)
1595		return;
1596	if (so->so_is_open_owner)
1597		release_last_closed_stateid(openowner(so));
1598	so->so_seqid++;
1599	return;
1600}
1601
1602static void
1603gen_sessionid(struct nfsd4_session *ses)
1604{
1605	struct nfs4_client *clp = ses->se_client;
1606	struct nfsd4_sessionid *sid;
1607
1608	sid = (struct nfsd4_sessionid *)ses->se_sessionid.data;
1609	sid->clientid = clp->cl_clientid;
1610	sid->sequence = current_sessionid++;
1611	sid->reserved = 0;
1612}
1613
1614/*
1615 * The protocol defines ca_maxresponssize_cached to include the size of
1616 * the rpc header, but all we need to cache is the data starting after
1617 * the end of the initial SEQUENCE operation--the rest we regenerate
1618 * each time.  Therefore we can advertise a ca_maxresponssize_cached
1619 * value that is the number of bytes in our cache plus a few additional
1620 * bytes.  In order to stay on the safe side, and not promise more than
1621 * we can cache, those additional bytes must be the minimum possible: 24
1622 * bytes of rpc header (xid through accept state, with AUTH_NULL
1623 * verifier), 12 for the compound header (with zero-length tag), and 44
1624 * for the SEQUENCE op response:
1625 */
1626#define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
1627
1628static void
1629free_session_slots(struct nfsd4_session *ses)
1630{
1631	int i;
1632
1633	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
1634		free_svc_cred(&ses->se_slots[i]->sl_cred);
1635		kfree(ses->se_slots[i]);
1636	}
1637}
1638
1639/*
1640 * We don't actually need to cache the rpc and session headers, so we
1641 * can allocate a little less for each slot:
1642 */
1643static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
1644{
1645	u32 size;
1646
1647	if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ)
1648		size = 0;
1649	else
1650		size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
1651	return size + sizeof(struct nfsd4_slot);
1652}
1653
1654/*
1655 * XXX: If we run out of reserved DRC memory we could (up to a point)
1656 * re-negotiate active sessions and reduce their slot usage to make
1657 * room for new connections. For now we just fail the create session.
1658 */
1659static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
1660{
1661	u32 slotsize = slot_bytes(ca);
1662	u32 num = ca->maxreqs;
1663	unsigned long avail, total_avail;
1664	unsigned int scale_factor;
1665
1666	spin_lock(&nfsd_drc_lock);
1667	if (nfsd_drc_max_mem > nfsd_drc_mem_used)
1668		total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
1669	else
1670		/* We have handed out more space than we chose in
1671		 * set_max_drc() to allow.  That isn't really a
1672		 * problem as long as that doesn't make us think we
1673		 * have lots more due to integer overflow.
1674		 */
1675		total_avail = 0;
1676	avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
1677	/*
1678	 * Never use more than a fraction of the remaining memory,
1679	 * unless it's the only way to give this client a slot.
1680	 * The chosen fraction is either 1/8 or 1/number of threads,
1681	 * whichever is smaller.  This ensures there are adequate
1682	 * slots to support multiple clients per thread.
1683	 * Give the client one slot even if that would require
1684	 * over-allocation--it is better than failure.
1685	 */
1686	scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
1687
1688	avail = clamp_t(unsigned long, avail, slotsize,
1689			total_avail/scale_factor);
1690	num = min_t(int, num, avail / slotsize);
1691	num = max_t(int, num, 1);
1692	nfsd_drc_mem_used += num * slotsize;
1693	spin_unlock(&nfsd_drc_lock);
1694
1695	return num;
1696}
1697
1698static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
1699{
1700	int slotsize = slot_bytes(ca);
1701
1702	spin_lock(&nfsd_drc_lock);
1703	nfsd_drc_mem_used -= slotsize * ca->maxreqs;
1704	spin_unlock(&nfsd_drc_lock);
1705}
1706
1707static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
1708					   struct nfsd4_channel_attrs *battrs)
1709{
1710	int numslots = fattrs->maxreqs;
1711	int slotsize = slot_bytes(fattrs);
1712	struct nfsd4_session *new;
1713	int mem, i;
1714
1715	BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
1716			+ sizeof(struct nfsd4_session) > PAGE_SIZE);
1717	mem = numslots * sizeof(struct nfsd4_slot *);
1718
1719	new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
1720	if (!new)
1721		return NULL;
1722	/* allocate each struct nfsd4_slot and data cache in one piece */
1723	for (i = 0; i < numslots; i++) {
1724		new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
1725		if (!new->se_slots[i])
1726			goto out_free;
1727	}
1728
1729	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
1730	memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs));
1731
1732	return new;
1733out_free:
1734	while (i--)
1735		kfree(new->se_slots[i]);
1736	kfree(new);
1737	return NULL;
1738}
1739
1740static void free_conn(struct nfsd4_conn *c)
1741{
1742	svc_xprt_put(c->cn_xprt);
1743	kfree(c);
1744}
1745
1746static void nfsd4_conn_lost(struct svc_xpt_user *u)
1747{
1748	struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
1749	struct nfs4_client *clp = c->cn_session->se_client;
1750
1751	spin_lock(&clp->cl_lock);
1752	if (!list_empty(&c->cn_persession)) {
1753		list_del(&c->cn_persession);
1754		free_conn(c);
1755	}
1756	nfsd4_probe_callback(clp);
1757	spin_unlock(&clp->cl_lock);
1758}
1759
1760static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags)
1761{
1762	struct nfsd4_conn *conn;
1763
1764	conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL);
1765	if (!conn)
1766		return NULL;
1767	svc_xprt_get(rqstp->rq_xprt);
1768	conn->cn_xprt = rqstp->rq_xprt;
1769	conn->cn_flags = flags;
1770	INIT_LIST_HEAD(&conn->cn_xpt_user.list);
1771	return conn;
1772}
1773
1774static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
1775{
1776	conn->cn_session = ses;
1777	list_add(&conn->cn_persession, &ses->se_conns);
1778}
1779
1780static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
1781{
1782	struct nfs4_client *clp = ses->se_client;
1783
1784	spin_lock(&clp->cl_lock);
1785	__nfsd4_hash_conn(conn, ses);
1786	spin_unlock(&clp->cl_lock);
1787}
1788
1789static int nfsd4_register_conn(struct nfsd4_conn *conn)
1790{
1791	conn->cn_xpt_user.callback = nfsd4_conn_lost;
1792	return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
1793}
1794
1795static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, struct nfsd4_session *ses)
1796{
1797	int ret;
1798
1799	nfsd4_hash_conn(conn, ses);
1800	ret = nfsd4_register_conn(conn);
1801	if (ret)
1802		/* oops; xprt is already down: */
1803		nfsd4_conn_lost(&conn->cn_xpt_user);
1804	/* We may have gained or lost a callback channel: */
1805	nfsd4_probe_callback_sync(ses->se_client);
1806}
1807
1808static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses)
1809{
1810	u32 dir = NFS4_CDFC4_FORE;
1811
1812	if (cses->flags & SESSION4_BACK_CHAN)
1813		dir |= NFS4_CDFC4_BACK;
1814	return alloc_conn(rqstp, dir);
1815}
1816
1817/* must be called under client_lock */
1818static void nfsd4_del_conns(struct nfsd4_session *s)
1819{
1820	struct nfs4_client *clp = s->se_client;
1821	struct nfsd4_conn *c;
1822
1823	spin_lock(&clp->cl_lock);
1824	while (!list_empty(&s->se_conns)) {
1825		c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession);
1826		list_del_init(&c->cn_persession);
1827		spin_unlock(&clp->cl_lock);
1828
1829		unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user);
1830		free_conn(c);
1831
1832		spin_lock(&clp->cl_lock);
1833	}
1834	spin_unlock(&clp->cl_lock);
1835}
1836
1837static void __free_session(struct nfsd4_session *ses)
1838{
1839	free_session_slots(ses);
1840	kfree(ses);
1841}
1842
1843static void free_session(struct nfsd4_session *ses)
1844{
1845	nfsd4_del_conns(ses);
1846	nfsd4_put_drc_mem(&ses->se_fchannel);
1847	__free_session(ses);
1848}
1849
1850static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
1851{
1852	int idx;
1853	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
1854
1855	new->se_client = clp;
1856	gen_sessionid(new);
1857
1858	INIT_LIST_HEAD(&new->se_conns);
1859
1860	new->se_cb_seq_nr = 1;
1861	new->se_flags = cses->flags;
1862	new->se_cb_prog = cses->callback_prog;
1863	new->se_cb_sec = cses->cb_sec;
1864	atomic_set(&new->se_ref, 0);
1865	idx = hash_sessionid(&new->se_sessionid);
1866	list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
1867	spin_lock(&clp->cl_lock);
1868	list_add(&new->se_perclnt, &clp->cl_sessions);
1869	spin_unlock(&clp->cl_lock);
1870
1871	{
1872		struct sockaddr *sa = svc_addr(rqstp);
1873		/*
1874		 * This is a little silly; with sessions there's no real
1875		 * use for the callback address.  Use the peer address
1876		 * as a reasonable default for now, but consider fixing
1877		 * the rpc client not to require an address in the
1878		 * future:
1879		 */
1880		rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
1881		clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
1882	}
1883}
1884
1885/* caller must hold client_lock */
1886static struct nfsd4_session *
1887__find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
1888{
1889	struct nfsd4_session *elem;
1890	int idx;
1891	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1892
1893	lockdep_assert_held(&nn->client_lock);
1894
1895	dump_sessionid(__func__, sessionid);
1896	idx = hash_sessionid(sessionid);
1897	/* Search in the appropriate list */
1898	list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) {
1899		if (!memcmp(elem->se_sessionid.data, sessionid->data,
1900			    NFS4_MAX_SESSIONID_LEN)) {
1901			return elem;
1902		}
1903	}
1904
1905	dprintk("%s: session not found\n", __func__);
1906	return NULL;
1907}
1908
1909static struct nfsd4_session *
1910find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net,
1911		__be32 *ret)
1912{
1913	struct nfsd4_session *session;
1914	__be32 status = nfserr_badsession;
1915
1916	session = __find_in_sessionid_hashtbl(sessionid, net);
1917	if (!session)
1918		goto out;
1919	status = nfsd4_get_session_locked(session);
1920	if (status)
1921		session = NULL;
1922out:
1923	*ret = status;
1924	return session;
1925}
1926
1927/* caller must hold client_lock */
1928static void
1929unhash_session(struct nfsd4_session *ses)
1930{
1931	struct nfs4_client *clp = ses->se_client;
1932	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1933
1934	lockdep_assert_held(&nn->client_lock);
1935
1936	list_del(&ses->se_hash);
1937	spin_lock(&ses->se_client->cl_lock);
1938	list_del(&ses->se_perclnt);
1939	spin_unlock(&ses->se_client->cl_lock);
1940}
1941
1942/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
1943static int
1944STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
1945{
1946	/*
1947	 * We're assuming the clid was not given out from a boot
1948	 * precisely 2^32 (about 136 years) before this one.  That seems
1949	 * a safe assumption:
1950	 */
1951	if (clid->cl_boot == (u32)nn->boot_time)
1952		return 0;
1953	trace_nfsd_clid_stale(clid);
1954	return 1;
1955}
1956
1957/*
1958 * XXX Should we use a slab cache ?
1959 * This type of memory management is somewhat inefficient, but we use it
1960 * anyway since SETCLIENTID is not a common operation.
1961 */
1962static struct nfs4_client *alloc_client(struct xdr_netobj name)
1963{
1964	struct nfs4_client *clp;
1965	int i;
1966
1967	clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
1968	if (clp == NULL)
1969		return NULL;
1970	xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL);
1971	if (clp->cl_name.data == NULL)
1972		goto err_no_name;
1973	clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE,
1974						 sizeof(struct list_head),
1975						 GFP_KERNEL);
1976	if (!clp->cl_ownerstr_hashtbl)
1977		goto err_no_hashtbl;
1978	for (i = 0; i < OWNER_HASH_SIZE; i++)
1979		INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
1980	INIT_LIST_HEAD(&clp->cl_sessions);
1981	idr_init(&clp->cl_stateids);
1982	atomic_set(&clp->cl_rpc_users, 0);
1983	clp->cl_cb_state = NFSD4_CB_UNKNOWN;
1984	INIT_LIST_HEAD(&clp->cl_idhash);
1985	INIT_LIST_HEAD(&clp->cl_openowners);
1986	INIT_LIST_HEAD(&clp->cl_delegations);
1987	INIT_LIST_HEAD(&clp->cl_lru);
1988	INIT_LIST_HEAD(&clp->cl_revoked);
1989#ifdef CONFIG_NFSD_PNFS
1990	INIT_LIST_HEAD(&clp->cl_lo_states);
1991#endif
1992	INIT_LIST_HEAD(&clp->async_copies);
1993	spin_lock_init(&clp->async_lock);
1994	spin_lock_init(&clp->cl_lock);
1995	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
1996	return clp;
1997err_no_hashtbl:
1998	kfree(clp->cl_name.data);
1999err_no_name:
2000	kmem_cache_free(client_slab, clp);
2001	return NULL;
2002}
2003
2004static void __free_client(struct kref *k)
2005{
2006	struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref);
2007	struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs);
2008
2009	free_svc_cred(&clp->cl_cred);
2010	kfree(clp->cl_ownerstr_hashtbl);
2011	kfree(clp->cl_name.data);
2012	kfree(clp->cl_nii_domain.data);
2013	kfree(clp->cl_nii_name.data);
2014	idr_destroy(&clp->cl_stateids);
2015	kmem_cache_free(client_slab, clp);
2016}
2017
2018static void drop_client(struct nfs4_client *clp)
2019{
2020	kref_put(&clp->cl_nfsdfs.cl_ref, __free_client);
2021}
2022
2023static void
2024free_client(struct nfs4_client *clp)
2025{
2026	while (!list_empty(&clp->cl_sessions)) {
2027		struct nfsd4_session *ses;
2028		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
2029				se_perclnt);
2030		list_del(&ses->se_perclnt);
2031		WARN_ON_ONCE(atomic_read(&ses->se_ref));
2032		free_session(ses);
2033	}
2034	rpc_destroy_wait_queue(&clp->cl_cb_waitq);
2035	if (clp->cl_nfsd_dentry) {
2036		nfsd_client_rmdir(clp->cl_nfsd_dentry);
2037		clp->cl_nfsd_dentry = NULL;
2038		wake_up_all(&expiry_wq);
2039	}
2040	drop_client(clp);
2041}
2042
2043/* must be called under the client_lock */
2044static void
2045unhash_client_locked(struct nfs4_client *clp)
2046{
2047	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
2048	struct nfsd4_session *ses;
2049
2050	lockdep_assert_held(&nn->client_lock);
2051
2052	/* Mark the client as expired! */
2053	clp->cl_time = 0;
2054	/* Make it invisible */
2055	if (!list_empty(&clp->cl_idhash)) {
2056		list_del_init(&clp->cl_idhash);
2057		if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
2058			rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
2059		else
2060			rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
2061	}
2062	list_del_init(&clp->cl_lru);
2063	spin_lock(&clp->cl_lock);
2064	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
2065		list_del_init(&ses->se_hash);
2066	spin_unlock(&clp->cl_lock);
2067}
2068
2069static void
2070unhash_client(struct nfs4_client *clp)
2071{
2072	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
2073
2074	spin_lock(&nn->client_lock);
2075	unhash_client_locked(clp);
2076	spin_unlock(&nn->client_lock);
2077}
2078
2079static __be32 mark_client_expired_locked(struct nfs4_client *clp)
2080{
2081	if (atomic_read(&clp->cl_rpc_users))
2082		return nfserr_jukebox;
2083	unhash_client_locked(clp);
2084	return nfs_ok;
2085}
2086
2087static void
2088__destroy_client(struct nfs4_client *clp)
2089{
2090	int i;
2091	struct nfs4_openowner *oo;
2092	struct nfs4_delegation *dp;
2093	struct list_head reaplist;
2094
2095	INIT_LIST_HEAD(&reaplist);
2096	spin_lock(&state_lock);
2097	while (!list_empty(&clp->cl_delegations)) {
2098		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
2099		WARN_ON(!unhash_delegation_locked(dp));
2100		list_add(&dp->dl_recall_lru, &reaplist);
2101	}
2102	spin_unlock(&state_lock);
2103	while (!list_empty(&reaplist)) {
2104		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
2105		list_del_init(&dp->dl_recall_lru);
2106		destroy_unhashed_deleg(dp);
2107	}
2108	while (!list_empty(&clp->cl_revoked)) {
2109		dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru);
2110		list_del_init(&dp->dl_recall_lru);
2111		nfs4_put_stid(&dp->dl_stid);
2112	}
2113	while (!list_empty(&clp->cl_openowners)) {
2114		oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);
2115		nfs4_get_stateowner(&oo->oo_owner);
2116		release_openowner(oo);
2117	}
2118	for (i = 0; i < OWNER_HASH_SIZE; i++) {
2119		struct nfs4_stateowner *so, *tmp;
2120
2121		list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i],
2122					 so_strhash) {
2123			/* Should be no openowners at this point */
2124			WARN_ON_ONCE(so->so_is_open_owner);
2125			remove_blocked_locks(lockowner(so));
2126		}
2127	}
2128	nfsd4_return_all_client_layouts(clp);
2129	nfsd4_shutdown_copy(clp);
2130	nfsd4_shutdown_callback(clp);
2131	if (clp->cl_cb_conn.cb_xprt)
2132		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
2133	free_client(clp);
2134	wake_up_all(&expiry_wq);
2135}
2136
2137static void
2138destroy_client(struct nfs4_client *clp)
2139{
2140	unhash_client(clp);
2141	__destroy_client(clp);
2142}
2143
2144static void inc_reclaim_complete(struct nfs4_client *clp)
2145{
2146	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
2147
2148	if (!nn->track_reclaim_completes)
2149		return;
2150	if (!nfsd4_find_reclaim_client(clp->cl_name, nn))
2151		return;
2152	if (atomic_inc_return(&nn->nr_reclaim_complete) ==
2153			nn->reclaim_str_hashtbl_size) {
2154		printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n",
2155				clp->net->ns.inum);
2156		nfsd4_end_grace(nn);
2157	}
2158}
2159
2160static void expire_client(struct nfs4_client *clp)
2161{
2162	unhash_client(clp);
2163	nfsd4_client_record_remove(clp);
2164	__destroy_client(clp);
2165}
2166
2167static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
2168{
2169	memcpy(target->cl_verifier.data, source->data,
2170			sizeof(target->cl_verifier.data));
2171}
2172
2173static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
2174{
2175	target->cl_clientid.cl_boot = source->cl_clientid.cl_boot;
2176	target->cl_clientid.cl_id = source->cl_clientid.cl_id;
2177}
2178
2179static int copy_cred(struct svc_cred *target, struct svc_cred *source)
2180{
2181	target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL);
2182	target->cr_raw_principal = kstrdup(source->cr_raw_principal,
2183								GFP_KERNEL);
2184	target->cr_targ_princ = kstrdup(source->cr_targ_princ, GFP_KERNEL);
2185	if ((source->cr_principal && !target->cr_principal) ||
2186	    (source->cr_raw_principal && !target->cr_raw_principal) ||
2187	    (source->cr_targ_princ && !target->cr_targ_princ))
2188		return -ENOMEM;
2189
2190	target->cr_flavor = source->cr_flavor;
2191	target->cr_uid = source->cr_uid;
2192	target->cr_gid = source->cr_gid;
2193	target->cr_group_info = source->cr_group_info;
2194	get_group_info(target->cr_group_info);
2195	target->cr_gss_mech = source->cr_gss_mech;
2196	if (source->cr_gss_mech)
2197		gss_mech_get(source->cr_gss_mech);
2198	return 0;
2199}
2200
2201static int
2202compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2)
2203{
2204	if (o1->len < o2->len)
2205		return -1;
2206	if (o1->len > o2->len)
2207		return 1;
2208	return memcmp(o1->data, o2->data, o1->len);
2209}
2210
2211static int
2212same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
2213{
2214	return 0 == memcmp(v1->data, v2->data, sizeof(v1->data));
2215}
2216
2217static int
2218same_clid(clientid_t *cl1, clientid_t *cl2)
2219{
2220	return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id);
2221}
2222
2223static bool groups_equal(struct group_info *g1, struct group_info *g2)
2224{
2225	int i;
2226
2227	if (g1->ngroups != g2->ngroups)
2228		return false;
2229	for (i=0; i<g1->ngroups; i++)
2230		if (!gid_eq(g1->gid[i], g2->gid[i]))
2231			return false;
2232	return true;
2233}
2234
2235/*
2236 * RFC 3530 language requires clid_inuse be returned when the
2237 * "principal" associated with a requests differs from that previously
2238 * used.  We use uid, gid's, and gss principal string as our best
2239 * approximation.  We also don't want to allow non-gss use of a client
2240 * established using gss: in theory cr_principal should catch that
2241 * change, but in practice cr_principal can be null even in the gss case
2242 * since gssd doesn't always pass down a principal string.
2243 */
2244static bool is_gss_cred(struct svc_cred *cr)
2245{
2246	/* Is cr_flavor one of the gss "pseudoflavors"?: */
2247	return (cr->cr_flavor > RPC_AUTH_MAXFLAVOR);
2248}
2249
2250
2251static bool
2252same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
2253{
2254	if ((is_gss_cred(cr1) != is_gss_cred(cr2))
2255		|| (!uid_eq(cr1->cr_uid, cr2->cr_uid))
2256		|| (!gid_eq(cr1->cr_gid, cr2->cr_gid))
2257		|| !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
2258		return false;
2259	/* XXX: check that cr_targ_princ fields match ? */
2260	if (cr1->cr_principal == cr2->cr_principal)
2261		return true;
2262	if (!cr1->cr_principal || !cr2->cr_principal)
2263		return false;
2264	return 0 == strcmp(cr1->cr_principal, cr2->cr_principal);
2265}
2266
2267static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp)
2268{
2269	struct svc_cred *cr = &rqstp->rq_cred;
2270	u32 service;
2271
2272	if (!cr->cr_gss_mech)
2273		return false;
2274	service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor);
2275	return service == RPC_GSS_SVC_INTEGRITY ||
2276	       service == RPC_GSS_SVC_PRIVACY;
2277}
2278
2279bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
2280{
2281	struct svc_cred *cr = &rqstp->rq_cred;
2282
2283	if (!cl->cl_mach_cred)
2284		return true;
2285	if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech)
2286		return false;
2287	if (!svc_rqst_integrity_protected(rqstp))
2288		return false;
2289	if (cl->cl_cred.cr_raw_principal)
2290		return 0 == strcmp(cl->cl_cred.cr_raw_principal,
2291						cr->cr_raw_principal);
2292	if (!cr->cr_principal)
2293		return false;
2294	return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal);
2295}
2296
2297static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
2298{
2299	__be32 verf[2];
2300
2301	/*
2302	 * This is opaque to client, so no need to byte-swap. Use
2303	 * __force to keep sparse happy
2304	 */
2305	verf[0] = (__force __be32)(u32)ktime_get_real_seconds();
2306	verf[1] = (__force __be32)nn->clverifier_counter++;
2307	memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
2308}
2309
2310static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
2311{
2312	clp->cl_clientid.cl_boot = (u32)nn->boot_time;
2313	clp->cl_clientid.cl_id = nn->clientid_counter++;
2314	gen_confirm(clp, nn);
2315}
2316
2317static struct nfs4_stid *
2318find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
2319{
2320	struct nfs4_stid *ret;
2321
2322	ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id);
2323	if (!ret || !ret->sc_type)
2324		return NULL;
2325	return ret;
2326}
2327
2328static struct nfs4_stid *
2329find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
2330{
2331	struct nfs4_stid *s;
2332
2333	spin_lock(&cl->cl_lock);
2334	s = find_stateid_locked(cl, t);
2335	if (s != NULL) {
2336		if (typemask & s->sc_type)
2337			refcount_inc(&s->sc_count);
2338		else
2339			s = NULL;
2340	}
2341	spin_unlock(&cl->cl_lock);
2342	return s;
2343}
2344
2345static struct nfs4_client *get_nfsdfs_clp(struct inode *inode)
2346{
2347	struct nfsdfs_client *nc;
2348	nc = get_nfsdfs_client(inode);
2349	if (!nc)
2350		return NULL;
2351	return container_of(nc, struct nfs4_client, cl_nfsdfs);
2352}
2353
2354static void seq_quote_mem(struct seq_file *m, char *data, int len)
2355{
2356	seq_printf(m, "\"");
2357	seq_escape_mem_ascii(m, data, len);
2358	seq_printf(m, "\"");
2359}
2360
2361static int client_info_show(struct seq_file *m, void *v)
2362{
2363	struct inode *inode = m->private;
2364	struct nfs4_client *clp;
2365	u64 clid;
2366
2367	clp = get_nfsdfs_clp(inode);
2368	if (!clp)
2369		return -ENXIO;
2370	memcpy(&clid, &clp->cl_clientid, sizeof(clid));
2371	seq_printf(m, "clientid: 0x%llx\n", clid);
2372	seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr);
2373	seq_printf(m, "name: ");
2374	seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
2375	seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
2376	if (clp->cl_nii_domain.data) {
2377		seq_printf(m, "Implementation domain: ");
2378		seq_quote_mem(m, clp->cl_nii_domain.data,
2379					clp->cl_nii_domain.len);
2380		seq_printf(m, "\nImplementation name: ");
2381		seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
2382		seq_printf(m, "\nImplementation time: [%lld, %ld]\n",
2383			clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
2384	}
2385	drop_client(clp);
2386
2387	return 0;
2388}
2389
2390static int client_info_open(struct inode *inode, struct file *file)
2391{
2392	return single_open(file, client_info_show, inode);
2393}
2394
2395static const struct file_operations client_info_fops = {
2396	.open		= client_info_open,
2397	.read		= seq_read,
2398	.llseek		= seq_lseek,
2399	.release	= single_release,
2400};
2401
2402static void *states_start(struct seq_file *s, loff_t *pos)
2403	__acquires(&clp->cl_lock)
2404{
2405	struct nfs4_client *clp = s->private;
2406	unsigned long id = *pos;
2407	void *ret;
2408
2409	spin_lock(&clp->cl_lock);
2410	ret = idr_get_next_ul(&clp->cl_stateids, &id);
2411	*pos = id;
2412	return ret;
2413}
2414
2415static void *states_next(struct seq_file *s, void *v, loff_t *pos)
2416{
2417	struct nfs4_client *clp = s->private;
2418	unsigned long id = *pos;
2419	void *ret;
2420
2421	id = *pos;
2422	id++;
2423	ret = idr_get_next_ul(&clp->cl_stateids, &id);
2424	*pos = id;
2425	return ret;
2426}
2427
2428static void states_stop(struct seq_file *s, void *v)
2429	__releases(&clp->cl_lock)
2430{
2431	struct nfs4_client *clp = s->private;
2432
2433	spin_unlock(&clp->cl_lock);
2434}
2435
2436static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f)
2437{
2438         seq_printf(s, "filename: \"%pD2\"", f->nf_file);
2439}
2440
2441static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
2442{
2443	struct inode *inode = f->nf_inode;
2444
2445	seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
2446					MAJOR(inode->i_sb->s_dev),
2447					 MINOR(inode->i_sb->s_dev),
2448					 inode->i_ino);
2449}
2450
2451static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo)
2452{
2453	seq_printf(s, "owner: ");
2454	seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len);
2455}
2456
2457static void nfs4_show_stateid(struct seq_file *s, stateid_t *stid)
2458{
2459	seq_printf(s, "0x%.8x", stid->si_generation);
2460	seq_printf(s, "%12phN", &stid->si_opaque);
2461}
2462
2463static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
2464{
2465	struct nfs4_ol_stateid *ols;
2466	struct nfs4_file *nf;
2467	struct nfsd_file *file;
2468	struct nfs4_stateowner *oo;
2469	unsigned int access, deny;
2470
2471	if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID)
2472		return 0; /* XXX: or SEQ_SKIP? */
2473	ols = openlockstateid(st);
2474	oo = ols->st_stateowner;
2475	nf = st->sc_file;
2476
2477	spin_lock(&nf->fi_lock);
2478	file = find_any_file_locked(nf);
2479	if (!file)
2480		goto out;
2481
2482	seq_printf(s, "- ");
2483	nfs4_show_stateid(s, &st->sc_stateid);
2484	seq_printf(s, ": { type: open, ");
2485
2486	access = bmap_to_share_mode(ols->st_access_bmap);
2487	deny   = bmap_to_share_mode(ols->st_deny_bmap);
2488
2489	seq_printf(s, "access: %s%s, ",
2490		access & NFS4_SHARE_ACCESS_READ ? "r" : "-",
2491		access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
2492	seq_printf(s, "deny: %s%s, ",
2493		deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
2494		deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
2495
2496	nfs4_show_superblock(s, file);
2497	seq_printf(s, ", ");
2498	nfs4_show_fname(s, file);
2499	seq_printf(s, ", ");
2500	nfs4_show_owner(s, oo);
2501	seq_printf(s, " }\n");
2502out:
2503	spin_unlock(&nf->fi_lock);
2504	return 0;
2505}
2506
2507static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
2508{
2509	struct nfs4_ol_stateid *ols;
2510	struct nfs4_file *nf;
2511	struct nfsd_file *file;
2512	struct nfs4_stateowner *oo;
2513
2514	ols = openlockstateid(st);
2515	oo = ols->st_stateowner;
2516	nf = st->sc_file;
2517	spin_lock(&nf->fi_lock);
2518	file = find_any_file_locked(nf);
2519	if (!file)
2520		goto out;
2521
2522	seq_printf(s, "- ");
2523	nfs4_show_stateid(s, &st->sc_stateid);
2524	seq_printf(s, ": { type: lock, ");
2525
2526	/*
2527	 * Note: a lock stateid isn't really the same thing as a lock,
2528	 * it's the locking state held by one owner on a file, and there
2529	 * may be multiple (or no) lock ranges associated with it.
2530	 * (Same for the matter is true of open stateids.)
2531	 */
2532
2533	nfs4_show_superblock(s, file);
2534	/* XXX: open stateid? */
2535	seq_printf(s, ", ");
2536	nfs4_show_fname(s, file);
2537	seq_printf(s, ", ");
2538	nfs4_show_owner(s, oo);
2539	seq_printf(s, " }\n");
2540out:
2541	spin_unlock(&nf->fi_lock);
2542	return 0;
2543}
2544
2545static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
2546{
2547	struct nfs4_delegation *ds;
2548	struct nfs4_file *nf;
2549	struct nfsd_file *file;
2550
2551	ds = delegstateid(st);
2552	nf = st->sc_file;
2553	spin_lock(&nf->fi_lock);
2554	file = find_deleg_file_locked(nf);
2555	if (!file)
2556		goto out;
2557
2558	seq_printf(s, "- ");
2559	nfs4_show_stateid(s, &st->sc_stateid);
2560	seq_printf(s, ": { type: deleg, ");
2561
2562	/* Kinda dead code as long as we only support read delegs: */
2563	seq_printf(s, "access: %s, ",
2564		ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
2565
2566	/* XXX: lease time, whether it's being recalled. */
2567
2568	nfs4_show_superblock(s, file);
2569	seq_printf(s, ", ");
2570	nfs4_show_fname(s, file);
2571	seq_printf(s, " }\n");
2572out:
2573	spin_unlock(&nf->fi_lock);
2574	return 0;
2575}
2576
2577static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
2578{
2579	struct nfs4_layout_stateid *ls;
2580	struct nfsd_file *file;
2581
2582	ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
2583	file = ls->ls_file;
2584
2585	seq_printf(s, "- ");
2586	nfs4_show_stateid(s, &st->sc_stateid);
2587	seq_printf(s, ": { type: layout, ");
2588
2589	/* XXX: What else would be useful? */
2590
2591	nfs4_show_superblock(s, file);
2592	seq_printf(s, ", ");
2593	nfs4_show_fname(s, file);
2594	seq_printf(s, " }\n");
2595
2596	return 0;
2597}
2598
2599static int states_show(struct seq_file *s, void *v)
2600{
2601	struct nfs4_stid *st = v;
2602
2603	switch (st->sc_type) {
2604	case NFS4_OPEN_STID:
2605		return nfs4_show_open(s, st);
2606	case NFS4_LOCK_STID:
2607		return nfs4_show_lock(s, st);
2608	case NFS4_DELEG_STID:
2609		return nfs4_show_deleg(s, st);
2610	case NFS4_LAYOUT_STID:
2611		return nfs4_show_layout(s, st);
2612	default:
2613		return 0; /* XXX: or SEQ_SKIP? */
2614	}
2615	/* XXX: copy stateids? */
2616}
2617
2618static struct seq_operations states_seq_ops = {
2619	.start = states_start,
2620	.next = states_next,
2621	.stop = states_stop,
2622	.show = states_show
2623};
2624
2625static int client_states_open(struct inode *inode, struct file *file)
2626{
2627	struct seq_file *s;
2628	struct nfs4_client *clp;
2629	int ret;
2630
2631	clp = get_nfsdfs_clp(inode);
2632	if (!clp)
2633		return -ENXIO;
2634
2635	ret = seq_open(file, &states_seq_ops);
2636	if (ret)
2637		return ret;
2638	s = file->private_data;
2639	s->private = clp;
2640	return 0;
2641}
2642
2643static int client_opens_release(struct inode *inode, struct file *file)
2644{
2645	struct seq_file *m = file->private_data;
2646	struct nfs4_client *clp = m->private;
2647
2648	/* XXX: alternatively, we could get/drop in seq start/stop */
2649	drop_client(clp);
2650	return seq_release(inode, file);
2651}
2652
2653static const struct file_operations client_states_fops = {
2654	.open		= client_states_open,
2655	.read		= seq_read,
2656	.llseek		= seq_lseek,
2657	.release	= client_opens_release,
2658};
2659
2660/*
2661 * Normally we refuse to destroy clients that are in use, but here the
2662 * administrator is telling us to just do it.  We also want to wait
2663 * so the caller has a guarantee that the client's locks are gone by
2664 * the time the write returns:
2665 */
2666static void force_expire_client(struct nfs4_client *clp)
2667{
2668	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
2669	bool already_expired;
2670
2671	spin_lock(&nn->client_lock);
2672	clp->cl_time = 0;
2673	spin_unlock(&nn->client_lock);
2674
2675	wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
2676	spin_lock(&nn->client_lock);
2677	already_expired = list_empty(&clp->cl_lru);
2678	if (!already_expired)
2679		unhash_client_locked(clp);
2680	spin_unlock(&nn->client_lock);
2681
2682	if (!already_expired)
2683		expire_client(clp);
2684	else
2685		wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL);
2686}
2687
2688static ssize_t client_ctl_write(struct file *file, const char __user *buf,
2689				   size_t size, loff_t *pos)
2690{
2691	char *data;
2692	struct nfs4_client *clp;
2693
2694	data = simple_transaction_get(file, buf, size);
2695	if (IS_ERR(data))
2696		return PTR_ERR(data);
2697	if (size != 7 || 0 != memcmp(data, "expire\n", 7))
2698		return -EINVAL;
2699	clp = get_nfsdfs_clp(file_inode(file));
2700	if (!clp)
2701		return -ENXIO;
2702	force_expire_client(clp);
2703	drop_client(clp);
2704	return 7;
2705}
2706
2707static const struct file_operations client_ctl_fops = {
2708	.write		= client_ctl_write,
2709	.release	= simple_transaction_release,
2710};
2711
2712static const struct tree_descr client_files[] = {
2713	[0] = {"info", &client_info_fops, S_IRUSR},
2714	[1] = {"states", &client_states_fops, S_IRUSR},
2715	[2] = {"ctl", &client_ctl_fops, S_IWUSR},
2716	[3] = {""},
2717};
2718
2719static struct nfs4_client *create_client(struct xdr_netobj name,
2720		struct svc_rqst *rqstp, nfs4_verifier *verf)
2721{
2722	struct nfs4_client *clp;
2723	struct sockaddr *sa = svc_addr(rqstp);
2724	int ret;
2725	struct net *net = SVC_NET(rqstp);
2726	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
2727
2728	clp = alloc_client(name);
2729	if (clp == NULL)
2730		return NULL;
2731
2732	ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
2733	if (ret) {
2734		free_client(clp);
2735		return NULL;
2736	}
2737	gen_clid(clp, nn);
2738	kref_init(&clp->cl_nfsdfs.cl_ref);
2739	nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
2740	clp->cl_time = ktime_get_boottime_seconds();
2741	clear_bit(0, &clp->cl_cb_slot_busy);
2742	copy_verf(clp, verf);
2743	memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
2744	clp->cl_cb_session = NULL;
2745	clp->net = net;
2746	clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs,
2747			clp->cl_clientid.cl_id - nn->clientid_base,
2748			client_files);
2749	if (!clp->cl_nfsd_dentry) {
2750		free_client(clp);
2751		return NULL;
2752	}
2753	return clp;
2754}
2755
2756static void
2757add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root)
2758{
2759	struct rb_node **new = &(root->rb_node), *parent = NULL;
2760	struct nfs4_client *clp;
2761
2762	while (*new) {
2763		clp = rb_entry(*new, struct nfs4_client, cl_namenode);
2764		parent = *new;
2765
2766		if (compare_blob(&clp->cl_name, &new_clp->cl_name) > 0)
2767			new = &((*new)->rb_left);
2768		else
2769			new = &((*new)->rb_right);
2770	}
2771
2772	rb_link_node(&new_clp->cl_namenode, parent, new);
2773	rb_insert_color(&new_clp->cl_namenode, root);
2774}
2775
2776static struct nfs4_client *
2777find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root)
2778{
2779	int cmp;
2780	struct rb_node *node = root->rb_node;
2781	struct nfs4_client *clp;
2782
2783	while (node) {
2784		clp = rb_entry(node, struct nfs4_client, cl_namenode);
2785		cmp = compare_blob(&clp->cl_name, name);
2786		if (cmp > 0)
2787			node = node->rb_left;
2788		else if (cmp < 0)
2789			node = node->rb_right;
2790		else
2791			return clp;
2792	}
2793	return NULL;
2794}
2795
2796static void
2797add_to_unconfirmed(struct nfs4_client *clp)
2798{
2799	unsigned int idhashval;
2800	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
2801
2802	lockdep_assert_held(&nn->client_lock);
2803
2804	clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
2805	add_clp_to_name_tree(clp, &nn->unconf_name_tree);
2806	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
2807	list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]);
2808	renew_client_locked(clp);
2809}
2810
2811static void
2812move_to_confirmed(struct nfs4_client *clp)
2813{
2814	unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
2815	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
2816
2817	lockdep_assert_held(&nn->client_lock);
2818
2819	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
2820	list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
2821	rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
2822	add_clp_to_name_tree(clp, &nn->conf_name_tree);
2823	set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
2824	renew_client_locked(clp);
2825}
2826
2827static struct nfs4_client *
2828find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions)
2829{
2830	struct nfs4_client *clp;
2831	unsigned int idhashval = clientid_hashval(clid->cl_id);
2832
2833	list_for_each_entry(clp, &tbl[idhashval], cl_idhash) {
2834		if (same_clid(&clp->cl_clientid, clid)) {
2835			if ((bool)clp->cl_minorversion != sessions)
2836				return NULL;
2837			renew_client_locked(clp);
2838			return clp;
2839		}
2840	}
2841	return NULL;
2842}
2843
2844static struct nfs4_client *
2845find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
2846{
2847	struct list_head *tbl = nn->conf_id_hashtbl;
2848
2849	lockdep_assert_held(&nn->client_lock);
2850	return find_client_in_id_table(tbl, clid, sessions);
2851}
2852
2853static struct nfs4_client *
2854find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
2855{
2856	struct list_head *tbl = nn->unconf_id_hashtbl;
2857
2858	lockdep_assert_held(&nn->client_lock);
2859	return find_client_in_id_table(tbl, clid, sessions);
2860}
2861
2862static bool clp_used_exchangeid(struct nfs4_client *clp)
2863{
2864	return clp->cl_exchange_flags != 0;
2865}
2866
2867static struct nfs4_client *
2868find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
2869{
2870	lockdep_assert_held(&nn->client_lock);
2871	return find_clp_in_name_tree(name, &nn->conf_name_tree);
2872}
2873
2874static struct nfs4_client *
2875find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
2876{
2877	lockdep_assert_held(&nn->client_lock);
2878	return find_clp_in_name_tree(name, &nn->unconf_name_tree);
2879}
2880
2881static void
2882gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp)
2883{
2884	struct nfs4_cb_conn *conn = &clp->cl_cb_conn;
2885	struct sockaddr	*sa = svc_addr(rqstp);
2886	u32 scopeid = rpc_get_scope_id(sa);
2887	unsigned short expected_family;
2888
2889	/* Currently, we only support tcp and tcp6 for the callback channel */
2890	if (se->se_callback_netid_len == 3 &&
2891	    !memcmp(se->se_callback_netid_val, "tcp", 3))
2892		expected_family = AF_INET;
2893	else if (se->se_callback_netid_len == 4 &&
2894		 !memcmp(se->se_callback_netid_val, "tcp6", 4))
2895		expected_family = AF_INET6;
2896	else
2897		goto out_err;
2898
2899	conn->cb_addrlen = rpc_uaddr2sockaddr(clp->net, se->se_callback_addr_val,
2900					    se->se_callback_addr_len,
2901					    (struct sockaddr *)&conn->cb_addr,
2902					    sizeof(conn->cb_addr));
2903
2904	if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family)
2905		goto out_err;
2906
2907	if (conn->cb_addr.ss_family == AF_INET6)
2908		((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid;
2909
2910	conn->cb_prog = se->se_callback_prog;
2911	conn->cb_ident = se->se_callback_ident;
2912	memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen);
2913	trace_nfsd_cb_args(clp, conn);
2914	return;
2915out_err:
2916	conn->cb_addr.ss_family = AF_UNSPEC;
2917	conn->cb_addrlen = 0;
2918	trace_nfsd_cb_nodelegs(clp);
2919	return;
2920}
2921
2922/*
2923 * Cache a reply. nfsd4_check_resp_size() has bounded the cache size.
2924 */
2925static void
2926nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
2927{
2928	struct xdr_buf *buf = resp->xdr.buf;
2929	struct nfsd4_slot *slot = resp->cstate.slot;
2930	unsigned int base;
2931
2932	dprintk("--> %s slot %p\n", __func__, slot);
2933
2934	slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
2935	slot->sl_opcnt = resp->opcnt;
2936	slot->sl_status = resp->cstate.status;
2937	free_svc_cred(&slot->sl_cred);
2938	copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred);
2939
2940	if (!nfsd4_cache_this(resp)) {
2941		slot->sl_flags &= ~NFSD4_SLOT_CACHED;
2942		return;
2943	}
2944	slot->sl_flags |= NFSD4_SLOT_CACHED;
2945
2946	base = resp->cstate.data_offset;
2947	slot->sl_datalen = buf->len - base;
2948	if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
2949		WARN(1, "%s: sessions DRC could not cache compound\n",
2950		     __func__);
2951	return;
2952}
2953
2954/*
2955 * Encode the replay sequence operation from the slot values.
2956 * If cachethis is FALSE encode the uncached rep error on the next
2957 * operation which sets resp->p and increments resp->opcnt for
2958 * nfs4svc_encode_compoundres.
2959 *
2960 */
2961static __be32
2962nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
2963			  struct nfsd4_compoundres *resp)
2964{
2965	struct nfsd4_op *op;
2966	struct nfsd4_slot *slot = resp->cstate.slot;
2967
2968	/* Encode the replayed sequence operation */
2969	op = &args->ops[resp->opcnt - 1];
2970	nfsd4_encode_operation(resp, op);
2971
2972	if (slot->sl_flags & NFSD4_SLOT_CACHED)
2973		return op->status;
2974	if (args->opcnt == 1) {
2975		/*
2976		 * The original operation wasn't a solo sequence--we
2977		 * always cache those--so this retry must not match the
2978		 * original:
2979		 */
2980		op->status = nfserr_seq_false_retry;
2981	} else {
2982		op = &args->ops[resp->opcnt++];
2983		op->status = nfserr_retry_uncached_rep;
2984		nfsd4_encode_operation(resp, op);
2985	}
2986	return op->status;
2987}
2988
2989/*
2990 * The sequence operation is not cached because we can use the slot and
2991 * session values.
2992 */
2993static __be32
2994nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
2995			 struct nfsd4_sequence *seq)
2996{
2997	struct nfsd4_slot *slot = resp->cstate.slot;
2998	struct xdr_stream *xdr = &resp->xdr;
2999	__be32 *p;
3000	__be32 status;
3001
3002	dprintk("--> %s slot %p\n", __func__, slot);
3003
3004	status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
3005	if (status)
3006		return status;
3007
3008	p = xdr_reserve_space(xdr, slot->sl_datalen);
3009	if (!p) {
3010		WARN_ON_ONCE(1);
3011		return nfserr_serverfault;
3012	}
3013	xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen);
3014	xdr_commit_encode(xdr);
3015
3016	resp->opcnt = slot->sl_opcnt;
3017	return slot->sl_status;
3018}
3019
3020/*
3021 * Set the exchange_id flags returned by the server.
3022 */
3023static void
3024nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
3025{
3026#ifdef CONFIG_NFSD_PNFS
3027	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS;
3028#else
3029	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
3030#endif
3031
3032	/* Referrals are supported, Migration is not. */
3033	new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
3034
3035	/* set the wire flags to return to client. */
3036	clid->flags = new->cl_exchange_flags;
3037}
3038
3039static bool client_has_openowners(struct nfs4_client *clp)
3040{
3041	struct nfs4_openowner *oo;
3042
3043	list_for_each_entry(oo, &clp->cl_openowners, oo_perclient) {
3044		if (!list_empty(&oo->oo_owner.so_stateids))
3045			return true;
3046	}
3047	return false;
3048}
3049
3050static bool client_has_state(struct nfs4_client *clp)
3051{
3052	return client_has_openowners(clp)
3053#ifdef CONFIG_NFSD_PNFS
3054		|| !list_empty(&clp->cl_lo_states)
3055#endif
3056		|| !list_empty(&clp->cl_delegations)
3057		|| !list_empty(&clp->cl_sessions)
3058		|| !list_empty(&clp->async_copies);
3059}
3060
3061static __be32 copy_impl_id(struct nfs4_client *clp,
3062				struct nfsd4_exchange_id *exid)
3063{
3064	if (!exid->nii_domain.data)
3065		return 0;
3066	xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL);
3067	if (!clp->cl_nii_domain.data)
3068		return nfserr_jukebox;
3069	xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL);
3070	if (!clp->cl_nii_name.data)
3071		return nfserr_jukebox;
3072	clp->cl_nii_time = exid->nii_time;
3073	return 0;
3074}
3075
3076__be32
3077nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3078		union nfsd4_op_u *u)
3079{
3080	struct nfsd4_exchange_id *exid = &u->exchange_id;
3081	struct nfs4_client *conf, *new;
3082	struct nfs4_client *unconf = NULL;
3083	__be32 status;
3084	char			addr_str[INET6_ADDRSTRLEN];
3085	nfs4_verifier		verf = exid->verifier;
3086	struct sockaddr		*sa = svc_addr(rqstp);
3087	bool	update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A;
3088	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3089
3090	rpc_ntop(sa, addr_str, sizeof(addr_str));
3091	dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
3092		"ip_addr=%s flags %x, spa_how %d\n",
3093		__func__, rqstp, exid, exid->clname.len, exid->clname.data,
3094		addr_str, exid->flags, exid->spa_how);
3095
3096	if (exid->flags & ~EXCHGID4_FLAG_MASK_A)
3097		return nfserr_inval;
3098
3099	new = create_client(exid->clname, rqstp, &verf);
3100	if (new == NULL)
3101		return nfserr_jukebox;
3102	status = copy_impl_id(new, exid);
3103	if (status)
3104		goto out_nolock;
3105
3106	switch (exid->spa_how) {
3107	case SP4_MACH_CRED:
3108		exid->spo_must_enforce[0] = 0;
3109		exid->spo_must_enforce[1] = (
3110			1 << (OP_BIND_CONN_TO_SESSION - 32) |
3111			1 << (OP_EXCHANGE_ID - 32) |
3112			1 << (OP_CREATE_SESSION - 32) |
3113			1 << (OP_DESTROY_SESSION - 32) |
3114			1 << (OP_DESTROY_CLIENTID - 32));
3115
3116		exid->spo_must_allow[0] &= (1 << (OP_CLOSE) |
3117					1 << (OP_OPEN_DOWNGRADE) |
3118					1 << (OP_LOCKU) |
3119					1 << (OP_DELEGRETURN));
3120
3121		exid->spo_must_allow[1] &= (
3122					1 << (OP_TEST_STATEID - 32) |
3123					1 << (OP_FREE_STATEID - 32));
3124		if (!svc_rqst_integrity_protected(rqstp)) {
3125			status = nfserr_inval;
3126			goto out_nolock;
3127		}
3128		/*
3129		 * Sometimes userspace doesn't give us a principal.
3130		 * Which is a bug, really.  Anyway, we can't enforce
3131		 * MACH_CRED in that case, better to give up now:
3132		 */
3133		if (!new->cl_cred.cr_principal &&
3134					!new->cl_cred.cr_raw_principal) {
3135			status = nfserr_serverfault;
3136			goto out_nolock;
3137		}
3138		new->cl_mach_cred = true;
3139	case SP4_NONE:
3140		break;
3141	default:				/* checked by xdr code */
3142		WARN_ON_ONCE(1);
3143		fallthrough;
3144	case SP4_SSV:
3145		status = nfserr_encr_alg_unsupp;
3146		goto out_nolock;
3147	}
3148
3149	/* Cases below refer to rfc 5661 section 18.35.4: */
3150	spin_lock(&nn->client_lock);
3151	conf = find_confirmed_client_by_name(&exid->clname, nn);
3152	if (conf) {
3153		bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred);
3154		bool verfs_match = same_verf(&verf, &conf->cl_verifier);
3155
3156		if (update) {
3157			if (!clp_used_exchangeid(conf)) { /* buggy client */
3158				status = nfserr_inval;
3159				goto out;
3160			}
3161			if (!nfsd4_mach_creds_match(conf, rqstp)) {
3162				status = nfserr_wrong_cred;
3163				goto out;
3164			}
3165			if (!creds_match) { /* case 9 */
3166				status = nfserr_perm;
3167				goto out;
3168			}
3169			if (!verfs_match) { /* case 8 */
3170				status = nfserr_not_same;
3171				goto out;
3172			}
3173			/* case 6 */
3174			exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
3175			goto out_copy;
3176		}
3177		if (!creds_match) { /* case 3 */
3178			if (client_has_state(conf)) {
3179				status = nfserr_clid_inuse;
3180				goto out;
3181			}
3182			goto out_new;
3183		}
3184		if (verfs_match) { /* case 2 */
3185			conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
3186			goto out_copy;
3187		}
3188		/* case 5, client reboot */
3189		conf = NULL;
3190		goto out_new;
3191	}
3192
3193	if (update) { /* case 7 */
3194		status = nfserr_noent;
3195		goto out;
3196	}
3197
3198	unconf  = find_unconfirmed_client_by_name(&exid->clname, nn);
3199	if (unconf) /* case 4, possible retry or client restart */
3200		unhash_client_locked(unconf);
3201
3202	/* case 1 (normal case) */
3203out_new:
3204	if (conf) {
3205		status = mark_client_expired_locked(conf);
3206		if (status)
3207			goto out;
3208	}
3209	new->cl_minorversion = cstate->minorversion;
3210	new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
3211	new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1];
3212
3213	add_to_unconfirmed(new);
3214	swap(new, conf);
3215out_copy:
3216	exid->clientid.cl_boot = conf->cl_clientid.cl_boot;
3217	exid->clientid.cl_id = conf->cl_clientid.cl_id;
3218
3219	exid->seqid = conf->cl_cs_slot.sl_seqid + 1;
3220	nfsd4_set_ex_flags(conf, exid);
3221
3222	dprintk("nfsd4_exchange_id seqid %d flags %x\n",
3223		conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags);
3224	status = nfs_ok;
3225
3226out:
3227	spin_unlock(&nn->client_lock);
3228out_nolock:
3229	if (new)
3230		expire_client(new);
3231	if (unconf)
3232		expire_client(unconf);
3233	return status;
3234}
3235
3236static __be32
3237check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
3238{
3239	dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
3240		slot_seqid);
3241
3242	/* The slot is in use, and no response has been sent. */
3243	if (slot_inuse) {
3244		if (seqid == slot_seqid)
3245			return nfserr_jukebox;
3246		else
3247			return nfserr_seq_misordered;
3248	}
3249	/* Note unsigned 32-bit arithmetic handles wraparound: */
3250	if (likely(seqid == slot_seqid + 1))
3251		return nfs_ok;
3252	if (seqid == slot_seqid)
3253		return nfserr_replay_cache;
3254	return nfserr_seq_misordered;
3255}
3256
3257/*
3258 * Cache the create session result into the create session single DRC
3259 * slot cache by saving the xdr structure. sl_seqid has been set.
3260 * Do this for solo or embedded create session operations.
3261 */
3262static void
3263nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses,
3264			   struct nfsd4_clid_slot *slot, __be32 nfserr)
3265{
3266	slot->sl_status = nfserr;
3267	memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
3268}
3269
3270static __be32
3271nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
3272			    struct nfsd4_clid_slot *slot)
3273{
3274	memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses));
3275	return slot->sl_status;
3276}
3277
3278#define NFSD_MIN_REQ_HDR_SEQ_SZ	((\
3279			2 * 2 + /* credential,verifier: AUTH_NULL, length 0 */ \
3280			1 +	/* MIN tag is length with zero, only length */ \
3281			3 +	/* version, opcount, opcode */ \
3282			XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
3283				/* seqid, slotID, slotID, cache */ \
3284			4 ) * sizeof(__be32))
3285
3286#define NFSD_MIN_RESP_HDR_SEQ_SZ ((\
3287			2 +	/* verifier: AUTH_NULL, length 0 */\
3288			1 +	/* status */ \
3289			1 +	/* MIN tag is length with zero, only length */ \
3290			3 +	/* opcount, opcode, opstatus*/ \
3291			XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
3292				/* seqid, slotID, slotID, slotID, status */ \
3293			5 ) * sizeof(__be32))
3294
3295static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
3296{
3297	u32 maxrpc = nn->nfsd_serv->sv_max_mesg;
3298
3299	if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ)
3300		return nfserr_toosmall;
3301	if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ)
3302		return nfserr_toosmall;
3303	ca->headerpadsz = 0;
3304	ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc);
3305	ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc);
3306	ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND);
3307	ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
3308			NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
3309	ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
3310	/*
3311	 * Note decreasing slot size below client's request may make it
3312	 * difficult for client to function correctly, whereas
3313	 * decreasing the number of slots will (just?) affect
3314	 * performance.  When short on memory we therefore prefer to
3315	 * decrease number of slots instead of their size.  Clients that
3316	 * request larger slots than they need will get poor results:
3317	 * Note that we always allow at least one slot, because our
3318	 * accounting is soft and provides no guarantees either way.
3319	 */
3320	ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
3321
3322	return nfs_ok;
3323}
3324
3325/*
3326 * Server's NFSv4.1 backchannel support is AUTH_SYS-only for now.
3327 * These are based on similar macros in linux/sunrpc/msg_prot.h .
3328 */
3329#define RPC_MAX_HEADER_WITH_AUTH_SYS \
3330	(RPC_CALLHDRSIZE + 2 * (2 + UNX_CALLSLACK))
3331
3332#define RPC_MAX_REPHEADER_WITH_AUTH_SYS \
3333	(RPC_REPHDRSIZE + (2 + NUL_REPLYSLACK))
3334
3335#define NFSD_CB_MAX_REQ_SZ	((NFS4_enc_cb_recall_sz + \
3336				 RPC_MAX_HEADER_WITH_AUTH_SYS) * sizeof(__be32))
3337#define NFSD_CB_MAX_RESP_SZ	((NFS4_dec_cb_recall_sz + \
3338				 RPC_MAX_REPHEADER_WITH_AUTH_SYS) * \
3339				 sizeof(__be32))
3340
3341static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
3342{
3343	ca->headerpadsz = 0;
3344
3345	if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ)
3346		return nfserr_toosmall;
3347	if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ)
3348		return nfserr_toosmall;
3349	ca->maxresp_cached = 0;
3350	if (ca->maxops < 2)
3351		return nfserr_toosmall;
3352
3353	return nfs_ok;
3354}
3355
3356static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs)
3357{
3358	switch (cbs->flavor) {
3359	case RPC_AUTH_NULL:
3360	case RPC_AUTH_UNIX:
3361		return nfs_ok;
3362	default:
3363		/*
3364		 * GSS case: the spec doesn't allow us to return this
3365		 * error.  But it also doesn't allow us not to support
3366		 * GSS.
3367		 * I'd rather this fail hard than return some error the
3368		 * client might think it can already handle:
3369		 */
3370		return nfserr_encr_alg_unsupp;
3371	}
3372}
3373
3374__be32
3375nfsd4_create_session(struct svc_rqst *rqstp,
3376		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
3377{
3378	struct nfsd4_create_session *cr_ses = &u->create_session;
3379	struct sockaddr *sa = svc_addr(rqstp);
3380	struct nfs4_client *conf, *unconf;
3381	struct nfs4_client *old = NULL;
3382	struct nfsd4_session *new;
3383	struct nfsd4_conn *conn;
3384	struct nfsd4_clid_slot *cs_slot = NULL;
3385	__be32 status = 0;
3386	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3387
3388	if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
3389		return nfserr_inval;
3390	status = nfsd4_check_cb_sec(&cr_ses->cb_sec);
3391	if (status)
3392		return status;
3393	status = check_forechannel_attrs(&cr_ses->fore_channel, nn);
3394	if (status)
3395		return status;
3396	status = check_backchannel_attrs(&cr_ses->back_channel);
3397	if (status)
3398		goto out_release_drc_mem;
3399	status = nfserr_jukebox;
3400	new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
3401	if (!new)
3402		goto out_release_drc_mem;
3403	conn = alloc_conn_from_crses(rqstp, cr_ses);
3404	if (!conn)
3405		goto out_free_session;
3406
3407	spin_lock(&nn->client_lock);
3408	unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
3409	conf = find_confirmed_client(&cr_ses->clientid, true, nn);
3410	WARN_ON_ONCE(conf && unconf);
3411
3412	if (conf) {
3413		status = nfserr_wrong_cred;
3414		if (!nfsd4_mach_creds_match(conf, rqstp))
3415			goto out_free_conn;
3416		cs_slot = &conf->cl_cs_slot;
3417		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
3418		if (status) {
3419			if (status == nfserr_replay_cache)
3420				status = nfsd4_replay_create_session(cr_ses, cs_slot);
3421			goto out_free_conn;
3422		}
3423	} else if (unconf) {
3424		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
3425		    !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
3426			status = nfserr_clid_inuse;
3427			goto out_free_conn;
3428		}
3429		status = nfserr_wrong_cred;
3430		if (!nfsd4_mach_creds_match(unconf, rqstp))
3431			goto out_free_conn;
3432		cs_slot = &unconf->cl_cs_slot;
3433		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
3434		if (status) {
3435			/* an unconfirmed replay returns misordered */
3436			status = nfserr_seq_misordered;
3437			goto out_free_conn;
3438		}
3439		old = find_confirmed_client_by_name(&unconf->cl_name, nn);
3440		if (old) {
3441			status = mark_client_expired_locked(old);
3442			if (status) {
3443				old = NULL;
3444				goto out_free_conn;
3445			}
3446		}
3447		move_to_confirmed(unconf);
3448		conf = unconf;
3449	} else {
3450		status = nfserr_stale_clientid;
3451		goto out_free_conn;
3452	}
3453	status = nfs_ok;
3454	/* Persistent sessions are not supported */
3455	cr_ses->flags &= ~SESSION4_PERSIST;
3456	/* Upshifting from TCP to RDMA is not supported */
3457	cr_ses->flags &= ~SESSION4_RDMA;
3458
3459	init_session(rqstp, new, conf, cr_ses);
3460	nfsd4_get_session_locked(new);
3461
3462	memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
3463	       NFS4_MAX_SESSIONID_LEN);
3464	cs_slot->sl_seqid++;
3465	cr_ses->seqid = cs_slot->sl_seqid;
3466
3467	/* cache solo and embedded create sessions under the client_lock */
3468	nfsd4_cache_create_session(cr_ses, cs_slot, status);
3469	spin_unlock(&nn->client_lock);
3470	/* init connection and backchannel */
3471	nfsd4_init_conn(rqstp, conn, new);
3472	nfsd4_put_session(new);
3473	if (old)
3474		expire_client(old);
3475	return status;
3476out_free_conn:
3477	spin_unlock(&nn->client_lock);
3478	free_conn(conn);
3479	if (old)
3480		expire_client(old);
3481out_free_session:
3482	__free_session(new);
3483out_release_drc_mem:
3484	nfsd4_put_drc_mem(&cr_ses->fore_channel);
3485	return status;
3486}
3487
3488static __be32 nfsd4_map_bcts_dir(u32 *dir)
3489{
3490	switch (*dir) {
3491	case NFS4_CDFC4_FORE:
3492	case NFS4_CDFC4_BACK:
3493		return nfs_ok;
3494	case NFS4_CDFC4_FORE_OR_BOTH:
3495	case NFS4_CDFC4_BACK_OR_BOTH:
3496		*dir = NFS4_CDFC4_BOTH;
3497		return nfs_ok;
3498	}
3499	return nfserr_inval;
3500}
3501
3502__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp,
3503		struct nfsd4_compound_state *cstate,
3504		union nfsd4_op_u *u)
3505{
3506	struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl;
3507	struct nfsd4_session *session = cstate->session;
3508	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3509	__be32 status;
3510
3511	status = nfsd4_check_cb_sec(&bc->bc_cb_sec);
3512	if (status)
3513		return status;
3514	spin_lock(&nn->client_lock);
3515	session->se_cb_prog = bc->bc_cb_program;
3516	session->se_cb_sec = bc->bc_cb_sec;
3517	spin_unlock(&nn->client_lock);
3518
3519	nfsd4_probe_callback(session->se_client);
3520
3521	return nfs_ok;
3522}
3523
3524static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
3525{
3526	struct nfsd4_conn *c;
3527
3528	list_for_each_entry(c, &s->se_conns, cn_persession) {
3529		if (c->cn_xprt == xpt) {
3530			return c;
3531		}
3532	}
3533	return NULL;
3534}
3535
3536static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst,
3537		struct nfsd4_session *session, u32 req, struct nfsd4_conn **conn)
3538{
3539	struct nfs4_client *clp = session->se_client;
3540	struct svc_xprt *xpt = rqst->rq_xprt;
3541	struct nfsd4_conn *c;
3542	__be32 status;
3543
3544	/* Following the last paragraph of RFC 5661 Section 18.34.3: */
3545	spin_lock(&clp->cl_lock);
3546	c = __nfsd4_find_conn(xpt, session);
3547	if (!c)
3548		status = nfserr_noent;
3549	else if (req == c->cn_flags)
3550		status = nfs_ok;
3551	else if (req == NFS4_CDFC4_FORE_OR_BOTH &&
3552				c->cn_flags != NFS4_CDFC4_BACK)
3553		status = nfs_ok;
3554	else if (req == NFS4_CDFC4_BACK_OR_BOTH &&
3555				c->cn_flags != NFS4_CDFC4_FORE)
3556		status = nfs_ok;
3557	else
3558		status = nfserr_inval;
3559	spin_unlock(&clp->cl_lock);
3560	if (status == nfs_ok && conn)
3561		*conn = c;
3562	return status;
3563}
3564
3565__be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
3566		     struct nfsd4_compound_state *cstate,
3567		     union nfsd4_op_u *u)
3568{
3569	struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
3570	__be32 status;
3571	struct nfsd4_conn *conn;
3572	struct nfsd4_session *session;
3573	struct net *net = SVC_NET(rqstp);
3574	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
3575
3576	if (!nfsd4_last_compound_op(rqstp))
3577		return nfserr_not_only_op;
3578	spin_lock(&nn->client_lock);
3579	session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status);
3580	spin_unlock(&nn->client_lock);
3581	if (!session)
3582		goto out_no_session;
3583	status = nfserr_wrong_cred;
3584	if (!nfsd4_mach_creds_match(session->se_client, rqstp))
3585		goto out;
3586	status = nfsd4_match_existing_connection(rqstp, session,
3587			bcts->dir, &conn);
3588	if (status == nfs_ok) {
3589		if (bcts->dir == NFS4_CDFC4_FORE_OR_BOTH ||
3590				bcts->dir == NFS4_CDFC4_BACK)
3591			conn->cn_flags |= NFS4_CDFC4_BACK;
3592		nfsd4_probe_callback(session->se_client);
3593		goto out;
3594	}
3595	if (status == nfserr_inval)
3596		goto out;
3597	status = nfsd4_map_bcts_dir(&bcts->dir);
3598	if (status)
3599		goto out;
3600	conn = alloc_conn(rqstp, bcts->dir);
3601	status = nfserr_jukebox;
3602	if (!conn)
3603		goto out;
3604	nfsd4_init_conn(rqstp, conn, session);
3605	status = nfs_ok;
3606out:
3607	nfsd4_put_session(session);
3608out_no_session:
3609	return status;
3610}
3611
3612static bool nfsd4_compound_in_session(struct nfsd4_compound_state *cstate, struct nfs4_sessionid *sid)
3613{
3614	if (!cstate->session)
3615		return false;
3616	return !memcmp(sid, &cstate->session->se_sessionid, sizeof(*sid));
3617}
3618
3619__be32
3620nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,
3621		union nfsd4_op_u *u)
3622{
3623	struct nfs4_sessionid *sessionid = &u->destroy_session.sessionid;
3624	struct nfsd4_session *ses;
3625	__be32 status;
3626	int ref_held_by_me = 0;
3627	struct net *net = SVC_NET(r);
3628	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
3629
3630	status = nfserr_not_only_op;
3631	if (nfsd4_compound_in_session(cstate, sessionid)) {
3632		if (!nfsd4_last_compound_op(r))
3633			goto out;
3634		ref_held_by_me++;
3635	}
3636	dump_sessionid(__func__, sessionid);
3637	spin_lock(&nn->client_lock);
3638	ses = find_in_sessionid_hashtbl(sessionid, net, &status);
3639	if (!ses)
3640		goto out_client_lock;
3641	status = nfserr_wrong_cred;
3642	if (!nfsd4_mach_creds_match(ses->se_client, r))
3643		goto out_put_session;
3644	status = mark_session_dead_locked(ses, 1 + ref_held_by_me);
3645	if (status)
3646		goto out_put_session;
3647	unhash_session(ses);
3648	spin_unlock(&nn->client_lock);
3649
3650	nfsd4_probe_callback_sync(ses->se_client);
3651
3652	spin_lock(&nn->client_lock);
3653	status = nfs_ok;
3654out_put_session:
3655	nfsd4_put_session_locked(ses);
3656out_client_lock:
3657	spin_unlock(&nn->client_lock);
3658out:
3659	return status;
3660}
3661
3662static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
3663{
3664	struct nfs4_client *clp = ses->se_client;
3665	struct nfsd4_conn *c;
3666	__be32 status = nfs_ok;
3667	int ret;
3668
3669	spin_lock(&clp->cl_lock);
3670	c = __nfsd4_find_conn(new->cn_xprt, ses);
3671	if (c)
3672		goto out_free;
3673	status = nfserr_conn_not_bound_to_session;
3674	if (clp->cl_mach_cred)
3675		goto out_free;
3676	__nfsd4_hash_conn(new, ses);
3677	spin_unlock(&clp->cl_lock);
3678	ret = nfsd4_register_conn(new);
3679	if (ret)
3680		/* oops; xprt is already down: */
3681		nfsd4_conn_lost(&new->cn_xpt_user);
3682	return nfs_ok;
3683out_free:
3684	spin_unlock(&clp->cl_lock);
3685	free_conn(new);
3686	return status;
3687}
3688
3689static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session)
3690{
3691	struct nfsd4_compoundargs *args = rqstp->rq_argp;
3692
3693	return args->opcnt > session->se_fchannel.maxops;
3694}
3695
3696static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
3697				  struct nfsd4_session *session)
3698{
3699	struct xdr_buf *xb = &rqstp->rq_arg;
3700
3701	return xb->len > session->se_fchannel.maxreq_sz;
3702}
3703
3704static bool replay_matches_cache(struct svc_rqst *rqstp,
3705		 struct nfsd4_sequence *seq, struct nfsd4_slot *slot)
3706{
3707	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
3708
3709	if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) !=
3710	    (bool)seq->cachethis)
3711		return false;
3712	/*
3713	 * If there's an error then the reply can have fewer ops than
3714	 * the call.
3715	 */
3716	if (slot->sl_opcnt < argp->opcnt && !slot->sl_status)
3717		return false;
3718	/*
3719	 * But if we cached a reply with *more* ops than the call you're
3720	 * sending us now, then this new call is clearly not really a
3721	 * replay of the old one:
3722	 */
3723	if (slot->sl_opcnt > argp->opcnt)
3724		return false;
3725	/* This is the only check explicitly called by spec: */
3726	if (!same_creds(&rqstp->rq_cred, &slot->sl_cred))
3727		return false;
3728	/*
3729	 * There may be more comparisons we could actually do, but the
3730	 * spec doesn't require us to catch every case where the calls
3731	 * don't match (that would require caching the call as well as
3732	 * the reply), so we don't bother.
3733	 */
3734	return true;
3735}
3736
3737__be32
3738nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3739		union nfsd4_op_u *u)
3740{
3741	struct nfsd4_sequence *seq = &u->sequence;
3742	struct nfsd4_compoundres *resp = rqstp->rq_resp;
3743	struct xdr_stream *xdr = &resp->xdr;
3744	struct nfsd4_session *session;
3745	struct nfs4_client *clp;
3746	struct nfsd4_slot *slot;
3747	struct nfsd4_conn *conn;
3748	__be32 status;
3749	int buflen;
3750	struct net *net = SVC_NET(rqstp);
3751	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
3752
3753	if (resp->opcnt != 1)
3754		return nfserr_sequence_pos;
3755
3756	/*
3757	 * Will be either used or freed by nfsd4_sequence_check_conn
3758	 * below.
3759	 */
3760	conn = alloc_conn(rqstp, NFS4_CDFC4_FORE);
3761	if (!conn)
3762		return nfserr_jukebox;
3763
3764	spin_lock(&nn->client_lock);
3765	session = find_in_sessionid_hashtbl(&seq->sessionid, net, &status);
3766	if (!session)
3767		goto out_no_session;
3768	clp = session->se_client;
3769
3770	status = nfserr_too_many_ops;
3771	if (nfsd4_session_too_many_ops(rqstp, session))
3772		goto out_put_session;
3773
3774	status = nfserr_req_too_big;
3775	if (nfsd4_request_too_big(rqstp, session))
3776		goto out_put_session;
3777
3778	status = nfserr_badslot;
3779	if (seq->slotid >= session->se_fchannel.maxreqs)
3780		goto out_put_session;
3781
3782	slot = session->se_slots[seq->slotid];
3783	dprintk("%s: slotid %d\n", __func__, seq->slotid);
3784
3785	/* We do not negotiate the number of slots yet, so set the
3786	 * maxslots to the session maxreqs which is used to encode
3787	 * sr_highest_slotid and the sr_target_slot id to maxslots */
3788	seq->maxslots = session->se_fchannel.maxreqs;
3789
3790	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
3791					slot->sl_flags & NFSD4_SLOT_INUSE);
3792	if (status == nfserr_replay_cache) {
3793		status = nfserr_seq_misordered;
3794		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
3795			goto out_put_session;
3796		status = nfserr_seq_false_retry;
3797		if (!replay_matches_cache(rqstp, seq, slot))
3798			goto out_put_session;
3799		cstate->slot = slot;
3800		cstate->session = session;
3801		cstate->clp = clp;
3802		/* Return the cached reply status and set cstate->status
3803		 * for nfsd4_proc_compound processing */
3804		status = nfsd4_replay_cache_entry(resp, seq);
3805		cstate->status = nfserr_replay_cache;
3806		goto out;
3807	}
3808	if (status)
3809		goto out_put_session;
3810
3811	status = nfsd4_sequence_check_conn(conn, session);
3812	conn = NULL;
3813	if (status)
3814		goto out_put_session;
3815
3816	buflen = (seq->cachethis) ?
3817			session->se_fchannel.maxresp_cached :
3818			session->se_fchannel.maxresp_sz;
3819	status = (seq->cachethis) ? nfserr_rep_too_big_to_cache :
3820				    nfserr_rep_too_big;
3821	if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack))
3822		goto out_put_session;
3823	svc_reserve(rqstp, buflen);
3824
3825	status = nfs_ok;
3826	/* Success! bump slot seqid */
3827	slot->sl_seqid = seq->seqid;
3828	slot->sl_flags |= NFSD4_SLOT_INUSE;
3829	if (seq->cachethis)
3830		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
3831	else
3832		slot->sl_flags &= ~NFSD4_SLOT_CACHETHIS;
3833
3834	cstate->slot = slot;
3835	cstate->session = session;
3836	cstate->clp = clp;
3837
3838out:
3839	switch (clp->cl_cb_state) {
3840	case NFSD4_CB_DOWN:
3841		seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
3842		break;
3843	case NFSD4_CB_FAULT:
3844		seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT;
3845		break;
3846	default:
3847		seq->status_flags = 0;
3848	}
3849	if (!list_empty(&clp->cl_revoked))
3850		seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
3851out_no_session:
3852	if (conn)
3853		free_conn(conn);
3854	spin_unlock(&nn->client_lock);
3855	return status;
3856out_put_session:
3857	nfsd4_put_session_locked(session);
3858	goto out_no_session;
3859}
3860
3861void
3862nfsd4_sequence_done(struct nfsd4_compoundres *resp)
3863{
3864	struct nfsd4_compound_state *cs = &resp->cstate;
3865
3866	if (nfsd4_has_session(cs)) {
3867		if (cs->status != nfserr_replay_cache) {
3868			nfsd4_store_cache_entry(resp);
3869			cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
3870		}
3871		/* Drop session reference that was taken in nfsd4_sequence() */
3872		nfsd4_put_session(cs->session);
3873	} else if (cs->clp)
3874		put_client_renew(cs->clp);
3875}
3876
3877__be32
3878nfsd4_destroy_clientid(struct svc_rqst *rqstp,
3879		struct nfsd4_compound_state *cstate,
3880		union nfsd4_op_u *u)
3881{
3882	struct nfsd4_destroy_clientid *dc = &u->destroy_clientid;
3883	struct nfs4_client *conf, *unconf;
3884	struct nfs4_client *clp = NULL;
3885	__be32 status = 0;
3886	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3887
3888	spin_lock(&nn->client_lock);
3889	unconf = find_unconfirmed_client(&dc->clientid, true, nn);
3890	conf = find_confirmed_client(&dc->clientid, true, nn);
3891	WARN_ON_ONCE(conf && unconf);
3892
3893	if (conf) {
3894		if (client_has_state(conf)) {
3895			status = nfserr_clientid_busy;
3896			goto out;
3897		}
3898		status = mark_client_expired_locked(conf);
3899		if (status)
3900			goto out;
3901		clp = conf;
3902	} else if (unconf)
3903		clp = unconf;
3904	else {
3905		status = nfserr_stale_clientid;
3906		goto out;
3907	}
3908	if (!nfsd4_mach_creds_match(clp, rqstp)) {
3909		clp = NULL;
3910		status = nfserr_wrong_cred;
3911		goto out;
3912	}
3913	unhash_client_locked(clp);
3914out:
3915	spin_unlock(&nn->client_lock);
3916	if (clp)
3917		expire_client(clp);
3918	return status;
3919}
3920
3921__be32
3922nfsd4_reclaim_complete(struct svc_rqst *rqstp,
3923		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
3924{
3925	struct nfsd4_reclaim_complete *rc = &u->reclaim_complete;
3926	__be32 status = 0;
3927
3928	if (rc->rca_one_fs) {
3929		if (!cstate->current_fh.fh_dentry)
3930			return nfserr_nofilehandle;
3931		/*
3932		 * We don't take advantage of the rca_one_fs case.
3933		 * That's OK, it's optional, we can safely ignore it.
3934		 */
3935		return nfs_ok;
3936	}
3937
3938	status = nfserr_complete_already;
3939	if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
3940			     &cstate->session->se_client->cl_flags))
3941		goto out;
3942
3943	status = nfserr_stale_clientid;
3944	if (is_client_expired(cstate->session->se_client))
3945		/*
3946		 * The following error isn't really legal.
3947		 * But we only get here if the client just explicitly
3948		 * destroyed the client.  Surely it no longer cares what
3949		 * error it gets back on an operation for the dead
3950		 * client.
3951		 */
3952		goto out;
3953
3954	status = nfs_ok;
3955	nfsd4_client_record_create(cstate->session->se_client);
3956	inc_reclaim_complete(cstate->session->se_client);
3957out:
3958	return status;
3959}
3960
3961__be32
3962nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3963		  union nfsd4_op_u *u)
3964{
3965	struct nfsd4_setclientid *setclid = &u->setclientid;
3966	struct xdr_netobj 	clname = setclid->se_name;
3967	nfs4_verifier		clverifier = setclid->se_verf;
3968	struct nfs4_client	*conf, *new;
3969	struct nfs4_client	*unconf = NULL;
3970	__be32 			status;
3971	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3972
3973	new = create_client(clname, rqstp, &clverifier);
3974	if (new == NULL)
3975		return nfserr_jukebox;
3976	/* Cases below refer to rfc 3530 section 14.2.33: */
3977	spin_lock(&nn->client_lock);
3978	conf = find_confirmed_client_by_name(&clname, nn);
3979	if (conf && client_has_state(conf)) {
3980		/* case 0: */
3981		status = nfserr_clid_inuse;
3982		if (clp_used_exchangeid(conf))
3983			goto out;
3984		if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
3985			trace_nfsd_clid_inuse_err(conf);
3986			goto out;
3987		}
3988	}
3989	unconf = find_unconfirmed_client_by_name(&clname, nn);
3990	if (unconf)
3991		unhash_client_locked(unconf);
3992	/* We need to handle only case 1: probable callback update */
3993	if (conf && same_verf(&conf->cl_verifier, &clverifier)) {
3994		copy_clid(new, conf);
3995		gen_confirm(new, nn);
3996	}
3997	new->cl_minorversion = 0;
3998	gen_callback(new, setclid, rqstp);
3999	add_to_unconfirmed(new);
4000	setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
4001	setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
4002	memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
4003	new = NULL;
4004	status = nfs_ok;
4005out:
4006	spin_unlock(&nn->client_lock);
4007	if (new)
4008		free_client(new);
4009	if (unconf)
4010		expire_client(unconf);
4011	return status;
4012}
4013
4014
4015__be32
4016nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
4017			struct nfsd4_compound_state *cstate,
4018			union nfsd4_op_u *u)
4019{
4020	struct nfsd4_setclientid_confirm *setclientid_confirm =
4021			&u->setclientid_confirm;
4022	struct nfs4_client *conf, *unconf;
4023	struct nfs4_client *old = NULL;
4024	nfs4_verifier confirm = setclientid_confirm->sc_confirm;
4025	clientid_t * clid = &setclientid_confirm->sc_clientid;
4026	__be32 status;
4027	struct nfsd_net	*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
4028
4029	if (STALE_CLIENTID(clid, nn))
4030		return nfserr_stale_clientid;
4031
4032	spin_lock(&nn->client_lock);
4033	conf = find_confirmed_client(clid, false, nn);
4034	unconf = find_unconfirmed_client(clid, false, nn);
4035	/*
4036	 * We try hard to give out unique clientid's, so if we get an
4037	 * attempt to confirm the same clientid with a different cred,
4038	 * the client may be buggy; this should never happen.
4039	 *
4040	 * Nevertheless, RFC 7530 recommends INUSE for this case:
4041	 */
4042	status = nfserr_clid_inuse;
4043	if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred))
4044		goto out;
4045	if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred))
4046		goto out;
4047	/* cases below refer to rfc 3530 section 14.2.34: */
4048	if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) {
4049		if (conf && same_verf(&confirm, &conf->cl_confirm)) {
4050			/* case 2: probable retransmit */
4051			status = nfs_ok;
4052		} else /* case 4: client hasn't noticed we rebooted yet? */
4053			status = nfserr_stale_clientid;
4054		goto out;
4055	}
4056	status = nfs_ok;
4057	if (conf) { /* case 1: callback update */
4058		old = unconf;
4059		unhash_client_locked(old);
4060		nfsd4_change_callback(conf, &unconf->cl_cb_conn);
4061	} else { /* case 3: normal case; new or rebooted client */
4062		old = find_confirmed_client_by_name(&unconf->cl_name, nn);
4063		if (old) {
4064			status = nfserr_clid_inuse;
4065			if (client_has_state(old)
4066					&& !same_creds(&unconf->cl_cred,
4067							&old->cl_cred)) {
4068				old = NULL;
4069				goto out;
4070			}
4071			status = mark_client_expired_locked(old);
4072			if (status) {
4073				old = NULL;
4074				goto out;
4075			}
4076		}
4077		move_to_confirmed(unconf);
4078		conf = unconf;
4079	}
4080	get_client_locked(conf);
4081	spin_unlock(&nn->client_lock);
4082	nfsd4_probe_callback(conf);
4083	spin_lock(&nn->client_lock);
4084	put_client_renew_locked(conf);
4085out:
4086	spin_unlock(&nn->client_lock);
4087	if (old)
4088		expire_client(old);
4089	return status;
4090}
4091
4092static struct nfs4_file *nfsd4_alloc_file(void)
4093{
4094	return kmem_cache_alloc(file_slab, GFP_KERNEL);
4095}
4096
4097/* OPEN Share state helper functions */
4098static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
4099				struct nfs4_file *fp)
4100{
4101	lockdep_assert_held(&state_lock);
4102
4103	refcount_set(&fp->fi_ref, 1);
4104	spin_lock_init(&fp->fi_lock);
4105	INIT_LIST_HEAD(&fp->fi_stateids);
4106	INIT_LIST_HEAD(&fp->fi_delegations);
4107	INIT_LIST_HEAD(&fp->fi_clnt_odstate);
4108	fh_copy_shallow(&fp->fi_fhandle, fh);
4109	fp->fi_deleg_file = NULL;
4110	fp->fi_had_conflict = false;
4111	fp->fi_share_deny = 0;
4112	memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
4113	memset(fp->fi_access, 0, sizeof(fp->fi_access));
4114#ifdef CONFIG_NFSD_PNFS
4115	INIT_LIST_HEAD(&fp->fi_lo_states);
4116	atomic_set(&fp->fi_lo_recalls, 0);
4117#endif
4118	hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
4119}
4120
4121void
4122nfsd4_free_slabs(void)
4123{
4124	kmem_cache_destroy(client_slab);
4125	kmem_cache_destroy(openowner_slab);
4126	kmem_cache_destroy(lockowner_slab);
4127	kmem_cache_destroy(file_slab);
4128	kmem_cache_destroy(stateid_slab);
4129	kmem_cache_destroy(deleg_slab);
4130	kmem_cache_destroy(odstate_slab);
4131}
4132
4133int
4134nfsd4_init_slabs(void)
4135{
4136	client_slab = kmem_cache_create("nfsd4_clients",
4137			sizeof(struct nfs4_client), 0, 0, NULL);
4138	if (client_slab == NULL)
4139		goto out;
4140	openowner_slab = kmem_cache_create("nfsd4_openowners",
4141			sizeof(struct nfs4_openowner), 0, 0, NULL);
4142	if (openowner_slab == NULL)
4143		goto out_free_client_slab;
4144	lockowner_slab = kmem_cache_create("nfsd4_lockowners",
4145			sizeof(struct nfs4_lockowner), 0, 0, NULL);
4146	if (lockowner_slab == NULL)
4147		goto out_free_openowner_slab;
4148	file_slab = kmem_cache_create("nfsd4_files",
4149			sizeof(struct nfs4_file), 0, 0, NULL);
4150	if (file_slab == NULL)
4151		goto out_free_lockowner_slab;
4152	stateid_slab = kmem_cache_create("nfsd4_stateids",
4153			sizeof(struct nfs4_ol_stateid), 0, 0, NULL);
4154	if (stateid_slab == NULL)
4155		goto out_free_file_slab;
4156	deleg_slab = kmem_cache_create("nfsd4_delegations",
4157			sizeof(struct nfs4_delegation), 0, 0, NULL);
4158	if (deleg_slab == NULL)
4159		goto out_free_stateid_slab;
4160	odstate_slab = kmem_cache_create("nfsd4_odstate",
4161			sizeof(struct nfs4_clnt_odstate), 0, 0, NULL);
4162	if (odstate_slab == NULL)
4163		goto out_free_deleg_slab;
4164	return 0;
4165
4166out_free_deleg_slab:
4167	kmem_cache_destroy(deleg_slab);
4168out_free_stateid_slab:
4169	kmem_cache_destroy(stateid_slab);
4170out_free_file_slab:
4171	kmem_cache_destroy(file_slab);
4172out_free_lockowner_slab:
4173	kmem_cache_destroy(lockowner_slab);
4174out_free_openowner_slab:
4175	kmem_cache_destroy(openowner_slab);
4176out_free_client_slab:
4177	kmem_cache_destroy(client_slab);
4178out:
4179	return -ENOMEM;
4180}
4181
4182static void init_nfs4_replay(struct nfs4_replay *rp)
4183{
4184	rp->rp_status = nfserr_serverfault;
4185	rp->rp_buflen = 0;
4186	rp->rp_buf = rp->rp_ibuf;
4187	mutex_init(&rp->rp_mutex);
4188}
4189
4190static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
4191		struct nfs4_stateowner *so)
4192{
4193	if (!nfsd4_has_session(cstate)) {
4194		mutex_lock(&so->so_replay.rp_mutex);
4195		cstate->replay_owner = nfs4_get_stateowner(so);
4196	}
4197}
4198
4199void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
4200{
4201	struct nfs4_stateowner *so = cstate->replay_owner;
4202
4203	if (so != NULL) {
4204		cstate->replay_owner = NULL;
4205		mutex_unlock(&so->so_replay.rp_mutex);
4206		nfs4_put_stateowner(so);
4207	}
4208}
4209
4210static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp)
4211{
4212	struct nfs4_stateowner *sop;
4213
4214	sop = kmem_cache_alloc(slab, GFP_KERNEL);
4215	if (!sop)
4216		return NULL;
4217
4218	xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL);
4219	if (!sop->so_owner.data) {
4220		kmem_cache_free(slab, sop);
4221		return NULL;
4222	}
4223
4224	INIT_LIST_HEAD(&sop->so_stateids);
4225	sop->so_client = clp;
4226	init_nfs4_replay(&sop->so_replay);
4227	atomic_set(&sop->so_count, 1);
4228	return sop;
4229}
4230
4231static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
4232{
4233	lockdep_assert_held(&clp->cl_lock);
4234
4235	list_add(&oo->oo_owner.so_strhash,
4236		 &clp->cl_ownerstr_hashtbl[strhashval]);
4237	list_add(&oo->oo_perclient, &clp->cl_openowners);
4238}
4239
4240static void nfs4_unhash_openowner(struct nfs4_stateowner *so)
4241{
4242	unhash_openowner_locked(openowner(so));
4243}
4244
4245static void nfs4_free_openowner(struct nfs4_stateowner *so)
4246{
4247	struct nfs4_openowner *oo = openowner(so);
4248
4249	kmem_cache_free(openowner_slab, oo);
4250}
4251
4252static const struct nfs4_stateowner_operations openowner_ops = {
4253	.so_unhash =	nfs4_unhash_openowner,
4254	.so_free =	nfs4_free_openowner,
4255};
4256
4257static struct nfs4_ol_stateid *
4258nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
4259{
4260	struct nfs4_ol_stateid *local, *ret = NULL;
4261	struct nfs4_openowner *oo = open->op_openowner;
4262
4263	lockdep_assert_held(&fp->fi_lock);
4264
4265	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
4266		/* ignore lock owners */
4267		if (local->st_stateowner->so_is_open_owner == 0)
4268			continue;
4269		if (local->st_stateowner != &oo->oo_owner)
4270			continue;
4271		if (local->st_stid.sc_type == NFS4_OPEN_STID) {
4272			ret = local;
4273			refcount_inc(&ret->st_stid.sc_count);
4274			break;
4275		}
4276	}
4277	return ret;
4278}
4279
4280static __be32
4281nfsd4_verify_open_stid(struct nfs4_stid *s)
4282{
4283	__be32 ret = nfs_ok;
4284
4285	switch (s->sc_type) {
4286	default:
4287		break;
4288	case 0:
4289	case NFS4_CLOSED_STID:
4290	case NFS4_CLOSED_DELEG_STID:
4291		ret = nfserr_bad_stateid;
4292		break;
4293	case NFS4_REVOKED_DELEG_STID:
4294		ret = nfserr_deleg_revoked;
4295	}
4296	return ret;
4297}
4298
4299/* Lock the stateid st_mutex, and deal with races with CLOSE */
4300static __be32
4301nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
4302{
4303	__be32 ret;
4304
4305	mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX);
4306	ret = nfsd4_verify_open_stid(&stp->st_stid);
4307	if (ret != nfs_ok)
4308		mutex_unlock(&stp->st_mutex);
4309	return ret;
4310}
4311
4312static struct nfs4_ol_stateid *
4313nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
4314{
4315	struct nfs4_ol_stateid *stp;
4316	for (;;) {
4317		spin_lock(&fp->fi_lock);
4318		stp = nfsd4_find_existing_open(fp, open);
4319		spin_unlock(&fp->fi_lock);
4320		if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok)
4321			break;
4322		nfs4_put_stid(&stp->st_stid);
4323	}
4324	return stp;
4325}
4326
4327static struct nfs4_openowner *
4328alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
4329			   struct nfsd4_compound_state *cstate)
4330{
4331	struct nfs4_client *clp = cstate->clp;
4332	struct nfs4_openowner *oo, *ret;
4333
4334	oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
4335	if (!oo)
4336		return NULL;
4337	oo->oo_owner.so_ops = &openowner_ops;
4338	oo->oo_owner.so_is_open_owner = 1;
4339	oo->oo_owner.so_seqid = open->op_seqid;
4340	oo->oo_flags = 0;
4341	if (nfsd4_has_session(cstate))
4342		oo->oo_flags |= NFS4_OO_CONFIRMED;
4343	oo->oo_time = 0;
4344	oo->oo_last_closed_stid = NULL;
4345	INIT_LIST_HEAD(&oo->oo_close_lru);
4346	spin_lock(&clp->cl_lock);
4347	ret = find_openstateowner_str_locked(strhashval, open, clp);
4348	if (ret == NULL) {
4349		hash_openowner(oo, clp, strhashval);
4350		ret = oo;
4351	} else
4352		nfs4_free_stateowner(&oo->oo_owner);
4353
4354	spin_unlock(&clp->cl_lock);
4355	return ret;
4356}
4357
4358static struct nfs4_ol_stateid *
4359init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
4360{
4361
4362	struct nfs4_openowner *oo = open->op_openowner;
4363	struct nfs4_ol_stateid *retstp = NULL;
4364	struct nfs4_ol_stateid *stp;
4365
4366	stp = open->op_stp;
4367	/* We are moving these outside of the spinlocks to avoid the warnings */
4368	mutex_init(&stp->st_mutex);
4369	mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX);
4370
4371retry:
4372	spin_lock(&oo->oo_owner.so_client->cl_lock);
4373	spin_lock(&fp->fi_lock);
4374
4375	retstp = nfsd4_find_existing_open(fp, open);
4376	if (retstp)
4377		goto out_unlock;
4378
4379	open->op_stp = NULL;
4380	refcount_inc(&stp->st_stid.sc_count);
4381	stp->st_stid.sc_type = NFS4_OPEN_STID;
4382	INIT_LIST_HEAD(&stp->st_locks);
4383	stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner);
4384	get_nfs4_file(fp);
4385	stp->st_stid.sc_file = fp;
4386	stp->st_access_bmap = 0;
4387	stp->st_deny_bmap = 0;
4388	stp->st_openstp = NULL;
4389	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
4390	list_add(&stp->st_perfile, &fp->fi_stateids);
4391
4392out_unlock:
4393	spin_unlock(&fp->fi_lock);
4394	spin_unlock(&oo->oo_owner.so_client->cl_lock);
4395	if (retstp) {
4396		/* Handle races with CLOSE */
4397		if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
4398			nfs4_put_stid(&retstp->st_stid);
4399			goto retry;
4400		}
4401		/* To keep mutex tracking happy */
4402		mutex_unlock(&stp->st_mutex);
4403		stp = retstp;
4404	}
4405	return stp;
4406}
4407
4408/*
4409 * In the 4.0 case we need to keep the owners around a little while to handle
4410 * CLOSE replay. We still do need to release any file access that is held by
4411 * them before returning however.
4412 */
4413static void
4414move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
4415{
4416	struct nfs4_ol_stateid *last;
4417	struct nfs4_openowner *oo = openowner(s->st_stateowner);
4418	struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net,
4419						nfsd_net_id);
4420
4421	dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
4422
4423	/*
4424	 * We know that we hold one reference via nfsd4_close, and another
4425	 * "persistent" reference for the client. If the refcount is higher
4426	 * than 2, then there are still calls in progress that are using this
4427	 * stateid. We can't put the sc_file reference until they are finished.
4428	 * Wait for the refcount to drop to 2. Since it has been unhashed,
4429	 * there should be no danger of the refcount going back up again at
4430	 * this point.
4431	 */
4432	wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
4433
4434	release_all_access(s);
4435	if (s->st_stid.sc_file) {
4436		put_nfs4_file(s->st_stid.sc_file);
4437		s->st_stid.sc_file = NULL;
4438	}
4439
4440	spin_lock(&nn->client_lock);
4441	last = oo->oo_last_closed_stid;
4442	oo->oo_last_closed_stid = s;
4443	list_move_tail(&oo->oo_close_lru, &nn->close_lru);
4444	oo->oo_time = ktime_get_boottime_seconds();
4445	spin_unlock(&nn->client_lock);
4446	if (last)
4447		nfs4_put_stid(&last->st_stid);
4448}
4449
4450/* search file_hashtbl[] for file */
4451static struct nfs4_file *
4452find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
4453{
4454	struct nfs4_file *fp;
4455
4456	hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
4457				lockdep_is_held(&state_lock)) {
4458		if (fh_match(&fp->fi_fhandle, fh)) {
4459			if (refcount_inc_not_zero(&fp->fi_ref))
4460				return fp;
4461		}
4462	}
4463	return NULL;
4464}
4465
4466struct nfs4_file *
4467find_file(struct knfsd_fh *fh)
4468{
4469	struct nfs4_file *fp;
4470	unsigned int hashval = file_hashval(fh);
4471
4472	rcu_read_lock();
4473	fp = find_file_locked(fh, hashval);
4474	rcu_read_unlock();
4475	return fp;
4476}
4477
4478static struct nfs4_file *
4479find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
4480{
4481	struct nfs4_file *fp;
4482	unsigned int hashval = file_hashval(fh);
4483
4484	rcu_read_lock();
4485	fp = find_file_locked(fh, hashval);
4486	rcu_read_unlock();
4487	if (fp)
4488		return fp;
4489
4490	spin_lock(&state_lock);
4491	fp = find_file_locked(fh, hashval);
4492	if (likely(fp == NULL)) {
4493		nfsd4_init_file(fh, hashval, new);
4494		fp = new;
4495	}
4496	spin_unlock(&state_lock);
4497
4498	return fp;
4499}
4500
4501/*
4502 * Called to check deny when READ with all zero stateid or
4503 * WRITE with all zero or all one stateid
4504 */
4505static __be32
4506nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
4507{
4508	struct nfs4_file *fp;
4509	__be32 ret = nfs_ok;
4510
4511	fp = find_file(&current_fh->fh_handle);
4512	if (!fp)
4513		return ret;
4514	/* Check for conflicting share reservations */
4515	spin_lock(&fp->fi_lock);
4516	if (fp->fi_share_deny & deny_type)
4517		ret = nfserr_locked;
4518	spin_unlock(&fp->fi_lock);
4519	put_nfs4_file(fp);
4520	return ret;
4521}
4522
4523static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
4524{
4525	struct nfs4_delegation *dp = cb_to_delegation(cb);
4526	struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
4527					  nfsd_net_id);
4528
4529	block_delegations(&dp->dl_stid.sc_file->fi_fhandle);
4530
4531	/*
4532	 * We can't do this in nfsd_break_deleg_cb because it is
4533	 * already holding inode->i_lock.
4534	 *
4535	 * If the dl_time != 0, then we know that it has already been
4536	 * queued for a lease break. Don't queue it again.
4537	 */
4538	spin_lock(&state_lock);
4539	if (delegation_hashed(dp) && dp->dl_time == 0) {
4540		dp->dl_time = ktime_get_boottime_seconds();
4541		list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
4542	}
4543	spin_unlock(&state_lock);
4544}
4545
4546static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
4547		struct rpc_task *task)
4548{
4549	struct nfs4_delegation *dp = cb_to_delegation(cb);
4550
4551	if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
4552	    dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
4553	        return 1;
4554
4555	switch (task->tk_status) {
4556	case 0:
4557		return 1;
4558	case -NFS4ERR_DELAY:
4559		rpc_delay(task, 2 * HZ);
4560		return 0;
4561	case -EBADHANDLE:
4562	case -NFS4ERR_BAD_STATEID:
4563		/*
4564		 * Race: client probably got cb_recall before open reply
4565		 * granting delegation.
4566		 */
4567		if (dp->dl_retries--) {
4568			rpc_delay(task, 2 * HZ);
4569			return 0;
4570		}
4571		fallthrough;
4572	default:
4573		return 1;
4574	}
4575}
4576
4577static void nfsd4_cb_recall_release(struct nfsd4_callback *cb)
4578{
4579	struct nfs4_delegation *dp = cb_to_delegation(cb);
4580
4581	nfs4_put_stid(&dp->dl_stid);
4582}
4583
4584static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
4585	.prepare	= nfsd4_cb_recall_prepare,
4586	.done		= nfsd4_cb_recall_done,
4587	.release	= nfsd4_cb_recall_release,
4588};
4589
4590static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
4591{
4592	/*
4593	 * We're assuming the state code never drops its reference
4594	 * without first removing the lease.  Since we're in this lease
4595	 * callback (and since the lease code is serialized by the
4596	 * i_lock) we know the server hasn't removed the lease yet, and
4597	 * we know it's safe to take a reference.
4598	 */
4599	refcount_inc(&dp->dl_stid.sc_count);
4600	nfsd4_run_cb(&dp->dl_recall);
4601}
4602
4603/* Called from break_lease() with i_lock held. */
4604static bool
4605nfsd_break_deleg_cb(struct file_lock *fl)
4606{
4607	bool ret = false;
4608	struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
4609	struct nfs4_file *fp = dp->dl_stid.sc_file;
4610
4611	trace_nfsd_deleg_break(&dp->dl_stid.sc_stateid);
4612
4613	/*
4614	 * We don't want the locks code to timeout the lease for us;
4615	 * we'll remove it ourself if a delegation isn't returned
4616	 * in time:
4617	 */
4618	fl->fl_break_time = 0;
4619
4620	spin_lock(&fp->fi_lock);
4621	fp->fi_had_conflict = true;
4622	nfsd_break_one_deleg(dp);
4623	spin_unlock(&fp->fi_lock);
4624	return ret;
4625}
4626
4627/**
4628 * nfsd_breaker_owns_lease - Check if lease conflict was resolved
4629 * @fl: Lock state to check
4630 *
4631 * Return values:
4632 *   %true: Lease conflict was resolved
4633 *   %false: Lease conflict was not resolved.
4634 */
4635static bool nfsd_breaker_owns_lease(struct file_lock *fl)
4636{
4637	struct nfs4_delegation *dl = fl->fl_owner;
4638	struct svc_rqst *rqst;
4639	struct nfs4_client *clp;
4640
4641	if (!i_am_nfsd())
4642		return false;
4643	rqst = kthread_data(current);
4644	/* Note rq_prog == NFS_ACL_PROGRAM is also possible: */
4645	if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4)
4646		return false;
4647	clp = *(rqst->rq_lease_breaker);
4648	return dl->dl_stid.sc_client == clp;
4649}
4650
4651static int
4652nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
4653		     struct list_head *dispose)
4654{
4655	if (arg & F_UNLCK)
4656		return lease_modify(onlist, arg, dispose);
4657	else
4658		return -EAGAIN;
4659}
4660
4661static const struct lock_manager_operations nfsd_lease_mng_ops = {
4662	.lm_breaker_owns_lease = nfsd_breaker_owns_lease,
4663	.lm_break = nfsd_break_deleg_cb,
4664	.lm_change = nfsd_change_deleg_cb,
4665};
4666
4667static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid)
4668{
4669	if (nfsd4_has_session(cstate))
4670		return nfs_ok;
4671	if (seqid == so->so_seqid - 1)
4672		return nfserr_replay_me;
4673	if (seqid == so->so_seqid)
4674		return nfs_ok;
4675	return nfserr_bad_seqid;
4676}
4677
4678static __be32 lookup_clientid(clientid_t *clid,
4679		struct nfsd4_compound_state *cstate,
4680		struct nfsd_net *nn,
4681		bool sessions)
4682{
4683	struct nfs4_client *found;
4684
4685	if (cstate->clp) {
4686		found = cstate->clp;
4687		if (!same_clid(&found->cl_clientid, clid))
4688			return nfserr_stale_clientid;
4689		return nfs_ok;
4690	}
4691
4692	if (STALE_CLIENTID(clid, nn))
4693		return nfserr_stale_clientid;
4694
4695	/*
4696	 * For v4.1+ we get the client in the SEQUENCE op. If we don't have one
4697	 * cached already then we know this is for is for v4.0 and "sessions"
4698	 * will be false.
4699	 */
4700	WARN_ON_ONCE(cstate->session);
4701	spin_lock(&nn->client_lock);
4702	found = find_confirmed_client(clid, sessions, nn);
4703	if (!found) {
4704		spin_unlock(&nn->client_lock);
4705		return nfserr_expired;
4706	}
4707	atomic_inc(&found->cl_rpc_users);
4708	spin_unlock(&nn->client_lock);
4709
4710	/* Cache the nfs4_client in cstate! */
4711	cstate->clp = found;
4712	return nfs_ok;
4713}
4714
4715__be32
4716nfsd4_process_open1(struct nfsd4_compound_state *cstate,
4717		    struct nfsd4_open *open, struct nfsd_net *nn)
4718{
4719	clientid_t *clientid = &open->op_clientid;
4720	struct nfs4_client *clp = NULL;
4721	unsigned int strhashval;
4722	struct nfs4_openowner *oo = NULL;
4723	__be32 status;
4724
4725	if (STALE_CLIENTID(&open->op_clientid, nn))
4726		return nfserr_stale_clientid;
4727	/*
4728	 * In case we need it later, after we've already created the
4729	 * file and don't want to risk a further failure:
4730	 */
4731	open->op_file = nfsd4_alloc_file();
4732	if (open->op_file == NULL)
4733		return nfserr_jukebox;
4734
4735	status = lookup_clientid(clientid, cstate, nn, false);
4736	if (status)
4737		return status;
4738	clp = cstate->clp;
4739
4740	strhashval = ownerstr_hashval(&open->op_owner);
4741	oo = find_openstateowner_str(strhashval, open, clp);
4742	open->op_openowner = oo;
4743	if (!oo) {
4744		goto new_owner;
4745	}
4746	if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
4747		/* Replace unconfirmed owners without checking for replay. */
4748		release_openowner(oo);
4749		open->op_openowner = NULL;
4750		goto new_owner;
4751	}
4752	status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
4753	if (status)
4754		return status;
4755	goto alloc_stateid;
4756new_owner:
4757	oo = alloc_init_open_stateowner(strhashval, open, cstate);
4758	if (oo == NULL)
4759		return nfserr_jukebox;
4760	open->op_openowner = oo;
4761alloc_stateid:
4762	open->op_stp = nfs4_alloc_open_stateid(clp);
4763	if (!open->op_stp)
4764		return nfserr_jukebox;
4765
4766	if (nfsd4_has_session(cstate) &&
4767	    (cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) {
4768		open->op_odstate = alloc_clnt_odstate(clp);
4769		if (!open->op_odstate)
4770			return nfserr_jukebox;
4771	}
4772
4773	return nfs_ok;
4774}
4775
4776static inline __be32
4777nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
4778{
4779	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
4780		return nfserr_openmode;
4781	else
4782		return nfs_ok;
4783}
4784
4785static int share_access_to_flags(u32 share_access)
4786{
4787	return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
4788}
4789
4790static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s)
4791{
4792	struct nfs4_stid *ret;
4793
4794	ret = find_stateid_by_type(cl, s,
4795				NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
4796	if (!ret)
4797		return NULL;
4798	return delegstateid(ret);
4799}
4800
4801static bool nfsd4_is_deleg_cur(struct nfsd4_open *open)
4802{
4803	return open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR ||
4804	       open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH;
4805}
4806
4807static __be32
4808nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
4809		struct nfs4_delegation **dp)
4810{
4811	int flags;
4812	__be32 status = nfserr_bad_stateid;
4813	struct nfs4_delegation *deleg;
4814
4815	deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
4816	if (deleg == NULL)
4817		goto out;
4818	if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
4819		nfs4_put_stid(&deleg->dl_stid);
4820		if (cl->cl_minorversion)
4821			status = nfserr_deleg_revoked;
4822		goto out;
4823	}
4824	flags = share_access_to_flags(open->op_share_access);
4825	status = nfs4_check_delegmode(deleg, flags);
4826	if (status) {
4827		nfs4_put_stid(&deleg->dl_stid);
4828		goto out;
4829	}
4830	*dp = deleg;
4831out:
4832	if (!nfsd4_is_deleg_cur(open))
4833		return nfs_ok;
4834	if (status)
4835		return status;
4836	open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
4837	return nfs_ok;
4838}
4839
4840static inline int nfs4_access_to_access(u32 nfs4_access)
4841{
4842	int flags = 0;
4843
4844	if (nfs4_access & NFS4_SHARE_ACCESS_READ)
4845		flags |= NFSD_MAY_READ;
4846	if (nfs4_access & NFS4_SHARE_ACCESS_WRITE)
4847		flags |= NFSD_MAY_WRITE;
4848	return flags;
4849}
4850
4851static inline __be32
4852nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
4853		struct nfsd4_open *open)
4854{
4855	struct iattr iattr = {
4856		.ia_valid = ATTR_SIZE,
4857		.ia_size = 0,
4858	};
4859	if (!open->op_truncate)
4860		return 0;
4861	if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
4862		return nfserr_inval;
4863	return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0);
4864}
4865
4866static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
4867		struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
4868		struct nfsd4_open *open)
4869{
4870	struct nfsd_file *nf = NULL;
4871	__be32 status;
4872	int oflag = nfs4_access_to_omode(open->op_share_access);
4873	int access = nfs4_access_to_access(open->op_share_access);
4874	unsigned char old_access_bmap, old_deny_bmap;
4875
4876	spin_lock(&fp->fi_lock);
4877
4878	/*
4879	 * Are we trying to set a deny mode that would conflict with
4880	 * current access?
4881	 */
4882	status = nfs4_file_check_deny(fp, open->op_share_deny);
4883	if (status != nfs_ok) {
4884		spin_unlock(&fp->fi_lock);
4885		goto out;
4886	}
4887
4888	/* set access to the file */
4889	status = nfs4_file_get_access(fp, open->op_share_access);
4890	if (status != nfs_ok) {
4891		spin_unlock(&fp->fi_lock);
4892		goto out;
4893	}
4894
4895	/* Set access bits in stateid */
4896	old_access_bmap = stp->st_access_bmap;
4897	set_access(open->op_share_access, stp);
4898
4899	/* Set new deny mask */
4900	old_deny_bmap = stp->st_deny_bmap;
4901	set_deny(open->op_share_deny, stp);
4902	fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
4903
4904	if (!fp->fi_fds[oflag]) {
4905		spin_unlock(&fp->fi_lock);
4906		status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
4907		if (status)
4908			goto out_put_access;
4909		spin_lock(&fp->fi_lock);
4910		if (!fp->fi_fds[oflag]) {
4911			fp->fi_fds[oflag] = nf;
4912			nf = NULL;
4913		}
4914	}
4915	spin_unlock(&fp->fi_lock);
4916	if (nf)
4917		nfsd_file_put(nf);
4918
4919	status = nfserrno(nfsd_open_break_lease(cur_fh->fh_dentry->d_inode,
4920								access));
4921	if (status)
4922		goto out_put_access;
4923
4924	status = nfsd4_truncate(rqstp, cur_fh, open);
4925	if (status)
4926		goto out_put_access;
4927out:
4928	return status;
4929out_put_access:
4930	stp->st_access_bmap = old_access_bmap;
4931	nfs4_file_put_access(fp, open->op_share_access);
4932	reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp);
4933	goto out;
4934}
4935
4936static __be32
4937nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
4938{
4939	__be32 status;
4940	unsigned char old_deny_bmap = stp->st_deny_bmap;
4941
4942	if (!test_access(open->op_share_access, stp))
4943		return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
4944
4945	/* test and set deny mode */
4946	spin_lock(&fp->fi_lock);
4947	status = nfs4_file_check_deny(fp, open->op_share_deny);
4948	if (status == nfs_ok) {
4949		set_deny(open->op_share_deny, stp);
4950		fp->fi_share_deny |=
4951				(open->op_share_deny & NFS4_SHARE_DENY_BOTH);
4952	}
4953	spin_unlock(&fp->fi_lock);
4954
4955	if (status != nfs_ok)
4956		return status;
4957
4958	status = nfsd4_truncate(rqstp, cur_fh, open);
4959	if (status != nfs_ok)
4960		reset_union_bmap_deny(old_deny_bmap, stp);
4961	return status;
4962}
4963
4964/* Should we give out recallable state?: */
4965static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
4966{
4967	if (clp->cl_cb_state == NFSD4_CB_UP)
4968		return true;
4969	/*
4970	 * In the sessions case, since we don't have to establish a
4971	 * separate connection for callbacks, we assume it's OK
4972	 * until we hear otherwise:
4973	 */
4974	return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
4975}
4976
4977static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
4978						int flag)
4979{
4980	struct file_lock *fl;
4981
4982	fl = locks_alloc_lock();
4983	if (!fl)
4984		return NULL;
4985	fl->fl_lmops = &nfsd_lease_mng_ops;
4986	fl->fl_flags = FL_DELEG;
4987	fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
4988	fl->fl_end = OFFSET_MAX;
4989	fl->fl_owner = (fl_owner_t)dp;
4990	fl->fl_pid = current->tgid;
4991	fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
4992	return fl;
4993}
4994
4995static struct nfs4_delegation *
4996nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
4997		    struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
4998{
4999	int status = 0;
5000	struct nfs4_delegation *dp;
5001	struct nfsd_file *nf;
5002	struct file_lock *fl;
5003
5004	/*
5005	 * The fi_had_conflict and nfs_get_existing_delegation checks
5006	 * here are just optimizations; we'll need to recheck them at
5007	 * the end:
5008	 */
5009	if (fp->fi_had_conflict)
5010		return ERR_PTR(-EAGAIN);
5011
5012	nf = find_readable_file(fp);
5013	if (!nf) {
5014		/* We should always have a readable file here */
5015		WARN_ON_ONCE(1);
5016		return ERR_PTR(-EBADF);
5017	}
5018	spin_lock(&state_lock);
5019	spin_lock(&fp->fi_lock);
5020	if (nfs4_delegation_exists(clp, fp))
5021		status = -EAGAIN;
5022	else if (!fp->fi_deleg_file) {
5023		fp->fi_deleg_file = nf;
5024		/* increment early to prevent fi_deleg_file from being
5025		 * cleared */
5026		fp->fi_delegees = 1;
5027		nf = NULL;
5028	} else
5029		fp->fi_delegees++;
5030	spin_unlock(&fp->fi_lock);
5031	spin_unlock(&state_lock);
5032	if (nf)
5033		nfsd_file_put(nf);
5034	if (status)
5035		return ERR_PTR(status);
5036
5037	status = -ENOMEM;
5038	dp = alloc_init_deleg(clp, fp, fh, odstate);
5039	if (!dp)
5040		goto out_delegees;
5041
5042	fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
5043	if (!fl)
5044		goto out_clnt_odstate;
5045
5046	status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL);
5047	if (fl)
5048		locks_free_lock(fl);
5049	if (status)
5050		goto out_clnt_odstate;
5051
5052	spin_lock(&state_lock);
5053	spin_lock(&fp->fi_lock);
5054	if (fp->fi_had_conflict)
5055		status = -EAGAIN;
5056	else
5057		status = hash_delegation_locked(dp, fp);
5058	spin_unlock(&fp->fi_lock);
5059	spin_unlock(&state_lock);
5060
5061	if (status)
5062		goto out_unlock;
5063
5064	return dp;
5065out_unlock:
5066	vfs_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
5067out_clnt_odstate:
5068	put_clnt_odstate(dp->dl_clnt_odstate);
5069	nfs4_put_stid(&dp->dl_stid);
5070out_delegees:
5071	put_deleg_file(fp);
5072	return ERR_PTR(status);
5073}
5074
5075static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
5076{
5077	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
5078	if (status == -EAGAIN)
5079		open->op_why_no_deleg = WND4_CONTENTION;
5080	else {
5081		open->op_why_no_deleg = WND4_RESOURCE;
5082		switch (open->op_deleg_want) {
5083		case NFS4_SHARE_WANT_READ_DELEG:
5084		case NFS4_SHARE_WANT_WRITE_DELEG:
5085		case NFS4_SHARE_WANT_ANY_DELEG:
5086			break;
5087		case NFS4_SHARE_WANT_CANCEL:
5088			open->op_why_no_deleg = WND4_CANCELLED;
5089			break;
5090		case NFS4_SHARE_WANT_NO_DELEG:
5091			WARN_ON_ONCE(1);
5092		}
5093	}
5094}
5095
5096/*
5097 * Attempt to hand out a delegation.
5098 *
5099 * Note we don't support write delegations, and won't until the vfs has
5100 * proper support for them.
5101 */
5102static void
5103nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
5104			struct nfs4_ol_stateid *stp)
5105{
5106	struct nfs4_delegation *dp;
5107	struct nfs4_openowner *oo = openowner(stp->st_stateowner);
5108	struct nfs4_client *clp = stp->st_stid.sc_client;
5109	int cb_up;
5110	int status = 0;
5111
5112	cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
5113	open->op_recall = 0;
5114	switch (open->op_claim_type) {
5115		case NFS4_OPEN_CLAIM_PREVIOUS:
5116			if (!cb_up)
5117				open->op_recall = 1;
5118			if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ)
5119				goto out_no_deleg;
5120			break;
5121		case NFS4_OPEN_CLAIM_NULL:
5122		case NFS4_OPEN_CLAIM_FH:
5123			/*
5124			 * Let's not give out any delegations till everyone's
5125			 * had the chance to reclaim theirs, *and* until
5126			 * NLM locks have all been reclaimed:
5127			 */
5128			if (locks_in_grace(clp->net))
5129				goto out_no_deleg;
5130			if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
5131				goto out_no_deleg;
5132			/*
5133			 * Also, if the file was opened for write or
5134			 * create, there's a good chance the client's
5135			 * about to write to it, resulting in an
5136			 * immediate recall (since we don't support
5137			 * write delegations):
5138			 */
5139			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
5140				goto out_no_deleg;
5141			if (open->op_create == NFS4_OPEN_CREATE)
5142				goto out_no_deleg;
5143			break;
5144		default:
5145			goto out_no_deleg;
5146	}
5147	dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate);
5148	if (IS_ERR(dp))
5149		goto out_no_deleg;
5150
5151	memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
5152
5153	trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
5154	open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
5155	nfs4_put_stid(&dp->dl_stid);
5156	return;
5157out_no_deleg:
5158	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
5159	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
5160	    open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) {
5161		dprintk("NFSD: WARNING: refusing delegation reclaim\n");
5162		open->op_recall = 1;
5163	}
5164
5165	/* 4.1 client asking for a delegation? */
5166	if (open->op_deleg_want)
5167		nfsd4_open_deleg_none_ext(open, status);
5168	return;
5169}
5170
5171static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
5172					struct nfs4_delegation *dp)
5173{
5174	if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
5175	    dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
5176		open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
5177		open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
5178	} else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
5179		   dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
5180		open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
5181		open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
5182	}
5183	/* Otherwise the client must be confused wanting a delegation
5184	 * it already has, therefore we don't return
5185	 * NFS4_OPEN_DELEGATE_NONE_EXT and reason.
5186	 */
5187}
5188
5189__be32
5190nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
5191{
5192	struct nfsd4_compoundres *resp = rqstp->rq_resp;
5193	struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
5194	struct nfs4_file *fp = NULL;
5195	struct nfs4_ol_stateid *stp = NULL;
5196	struct nfs4_delegation *dp = NULL;
5197	__be32 status;
5198	bool new_stp = false;
5199
5200	/*
5201	 * Lookup file; if found, lookup stateid and check open request,
5202	 * and check for delegations in the process of being recalled.
5203	 * If not found, create the nfs4_file struct
5204	 */
5205	fp = find_or_add_file(open->op_file, &current_fh->fh_handle);
5206	if (fp != open->op_file) {
5207		status = nfs4_check_deleg(cl, open, &dp);
5208		if (status)
5209			goto out;
5210		stp = nfsd4_find_and_lock_existing_open(fp, open);
5211	} else {
5212		open->op_file = NULL;
5213		status = nfserr_bad_stateid;
5214		if (nfsd4_is_deleg_cur(open))
5215			goto out;
5216	}
5217
5218	if (!stp) {
5219		stp = init_open_stateid(fp, open);
5220		if (!open->op_stp)
5221			new_stp = true;
5222	}
5223
5224	/*
5225	 * OPEN the file, or upgrade an existing OPEN.
5226	 * If truncate fails, the OPEN fails.
5227	 *
5228	 * stp is already locked.
5229	 */
5230	if (!new_stp) {
5231		/* Stateid was found, this is an OPEN upgrade */
5232		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
5233		if (status) {
5234			mutex_unlock(&stp->st_mutex);
5235			goto out;
5236		}
5237	} else {
5238		status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
5239		if (status) {
5240			stp->st_stid.sc_type = NFS4_CLOSED_STID;
5241			release_open_stateid(stp);
5242			mutex_unlock(&stp->st_mutex);
5243			goto out;
5244		}
5245
5246		stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp,
5247							open->op_odstate);
5248		if (stp->st_clnt_odstate == open->op_odstate)
5249			open->op_odstate = NULL;
5250	}
5251
5252	nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
5253	mutex_unlock(&stp->st_mutex);
5254
5255	if (nfsd4_has_session(&resp->cstate)) {
5256		if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
5257			open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
5258			open->op_why_no_deleg = WND4_NOT_WANTED;
5259			goto nodeleg;
5260		}
5261	}
5262
5263	/*
5264	* Attempt to hand out a delegation. No error return, because the
5265	* OPEN succeeds even if we fail.
5266	*/
5267	nfs4_open_delegation(current_fh, open, stp);
5268nodeleg:
5269	status = nfs_ok;
5270	trace_nfsd_open(&stp->st_stid.sc_stateid);
5271out:
5272	/* 4.1 client trying to upgrade/downgrade delegation? */
5273	if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
5274	    open->op_deleg_want)
5275		nfsd4_deleg_xgrade_none_ext(open, dp);
5276
5277	if (fp)
5278		put_nfs4_file(fp);
5279	if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
5280		open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
5281	/*
5282	* To finish the open response, we just need to set the rflags.
5283	*/
5284	open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
5285	if (nfsd4_has_session(&resp->cstate))
5286		open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK;
5287	else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED))
5288		open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
5289
5290	if (dp)
5291		nfs4_put_stid(&dp->dl_stid);
5292	if (stp)
5293		nfs4_put_stid(&stp->st_stid);
5294
5295	return status;
5296}
5297
5298void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
5299			      struct nfsd4_open *open)
5300{
5301	if (open->op_openowner) {
5302		struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
5303
5304		nfsd4_cstate_assign_replay(cstate, so);
5305		nfs4_put_stateowner(so);
5306	}
5307	if (open->op_file)
5308		kmem_cache_free(file_slab, open->op_file);
5309	if (open->op_stp)
5310		nfs4_put_stid(&open->op_stp->st_stid);
5311	if (open->op_odstate)
5312		kmem_cache_free(odstate_slab, open->op_odstate);
5313}
5314
5315__be32
5316nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5317	    union nfsd4_op_u *u)
5318{
5319	clientid_t *clid = &u->renew;
5320	struct nfs4_client *clp;
5321	__be32 status;
5322	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
5323
5324	trace_nfsd_clid_renew(clid);
5325	status = lookup_clientid(clid, cstate, nn, false);
5326	if (status)
5327		goto out;
5328	clp = cstate->clp;
5329	status = nfserr_cb_path_down;
5330	if (!list_empty(&clp->cl_delegations)
5331			&& clp->cl_cb_state != NFSD4_CB_UP)
5332		goto out;
5333	status = nfs_ok;
5334out:
5335	return status;
5336}
5337
5338void
5339nfsd4_end_grace(struct nfsd_net *nn)
5340{
5341	/* do nothing if grace period already ended */
5342	if (nn->grace_ended)
5343		return;
5344
5345	trace_nfsd_grace_complete(nn);
5346	nn->grace_ended = true;
5347	/*
5348	 * If the server goes down again right now, an NFSv4
5349	 * client will still be allowed to reclaim after it comes back up,
5350	 * even if it hasn't yet had a chance to reclaim state this time.
5351	 *
5352	 */
5353	nfsd4_record_grace_done(nn);
5354	/*
5355	 * At this point, NFSv4 clients can still reclaim.  But if the
5356	 * server crashes, any that have not yet reclaimed will be out
5357	 * of luck on the next boot.
5358	 *
5359	 * (NFSv4.1+ clients are considered to have reclaimed once they
5360	 * call RECLAIM_COMPLETE.  NFSv4.0 clients are considered to
5361	 * have reclaimed after their first OPEN.)
5362	 */
5363	locks_end_grace(&nn->nfsd4_manager);
5364	/*
5365	 * At this point, and once lockd and/or any other containers
5366	 * exit their grace period, further reclaims will fail and
5367	 * regular locking can resume.
5368	 */
5369}
5370
5371/*
5372 * If we've waited a lease period but there are still clients trying to
5373 * reclaim, wait a little longer to give them a chance to finish.
5374 */
5375static bool clients_still_reclaiming(struct nfsd_net *nn)
5376{
5377	time64_t double_grace_period_end = nn->boot_time +
5378					   2 * nn->nfsd4_lease;
5379
5380	if (nn->track_reclaim_completes &&
5381			atomic_read(&nn->nr_reclaim_complete) ==
5382			nn->reclaim_str_hashtbl_size)
5383		return false;
5384	if (!nn->somebody_reclaimed)
5385		return false;
5386	nn->somebody_reclaimed = false;
5387	/*
5388	 * If we've given them *two* lease times to reclaim, and they're
5389	 * still not done, give up:
5390	 */
5391	if (ktime_get_boottime_seconds() > double_grace_period_end)
5392		return false;
5393	return true;
5394}
5395
5396static time64_t
5397nfs4_laundromat(struct nfsd_net *nn)
5398{
5399	struct nfs4_client *clp;
5400	struct nfs4_openowner *oo;
5401	struct nfs4_delegation *dp;
5402	struct nfs4_ol_stateid *stp;
5403	struct nfsd4_blocked_lock *nbl;
5404	struct list_head *pos, *next, reaplist;
5405	time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease;
5406	time64_t t, new_timeo = nn->nfsd4_lease;
5407	struct nfs4_cpntf_state *cps;
5408	copy_stateid_t *cps_t;
5409	int i;
5410
5411	if (clients_still_reclaiming(nn)) {
5412		new_timeo = 0;
5413		goto out;
5414	}
5415	nfsd4_end_grace(nn);
5416	INIT_LIST_HEAD(&reaplist);
5417
5418	spin_lock(&nn->s2s_cp_lock);
5419	idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
5420		cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
5421		if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID &&
5422				cps->cpntf_time < cutoff)
5423			_free_cpntf_state_locked(nn, cps);
5424	}
5425	spin_unlock(&nn->s2s_cp_lock);
5426
5427	spin_lock(&nn->client_lock);
5428	list_for_each_safe(pos, next, &nn->client_lru) {
5429		clp = list_entry(pos, struct nfs4_client, cl_lru);
5430		if (clp->cl_time > cutoff) {
5431			t = clp->cl_time - cutoff;
5432			new_timeo = min(new_timeo, t);
5433			break;
5434		}
5435		if (mark_client_expired_locked(clp)) {
5436			trace_nfsd_clid_expired(&clp->cl_clientid);
5437			continue;
5438		}
5439		list_add(&clp->cl_lru, &reaplist);
5440	}
5441	spin_unlock(&nn->client_lock);
5442	list_for_each_safe(pos, next, &reaplist) {
5443		clp = list_entry(pos, struct nfs4_client, cl_lru);
5444		trace_nfsd_clid_purged(&clp->cl_clientid);
5445		list_del_init(&clp->cl_lru);
5446		expire_client(clp);
5447	}
5448	spin_lock(&state_lock);
5449	list_for_each_safe(pos, next, &nn->del_recall_lru) {
5450		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
5451		if (dp->dl_time > cutoff) {
5452			t = dp->dl_time - cutoff;
5453			new_timeo = min(new_timeo, t);
5454			break;
5455		}
5456		WARN_ON(!unhash_delegation_locked(dp));
5457		list_add(&dp->dl_recall_lru, &reaplist);
5458	}
5459	spin_unlock(&state_lock);
5460	while (!list_empty(&reaplist)) {
5461		dp = list_first_entry(&reaplist, struct nfs4_delegation,
5462					dl_recall_lru);
5463		list_del_init(&dp->dl_recall_lru);
5464		revoke_delegation(dp);
5465	}
5466
5467	spin_lock(&nn->client_lock);
5468	while (!list_empty(&nn->close_lru)) {
5469		oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
5470					oo_close_lru);
5471		if (oo->oo_time > cutoff) {
5472			t = oo->oo_time - cutoff;
5473			new_timeo = min(new_timeo, t);
5474			break;
5475		}
5476		list_del_init(&oo->oo_close_lru);
5477		stp = oo->oo_last_closed_stid;
5478		oo->oo_last_closed_stid = NULL;
5479		spin_unlock(&nn->client_lock);
5480		nfs4_put_stid(&stp->st_stid);
5481		spin_lock(&nn->client_lock);
5482	}
5483	spin_unlock(&nn->client_lock);
5484
5485	/*
5486	 * It's possible for a client to try and acquire an already held lock
5487	 * that is being held for a long time, and then lose interest in it.
5488	 * So, we clean out any un-revisited request after a lease period
5489	 * under the assumption that the client is no longer interested.
5490	 *
5491	 * RFC5661, sec. 9.6 states that the client must not rely on getting
5492	 * notifications and must continue to poll for locks, even when the
5493	 * server supports them. Thus this shouldn't lead to clients blocking
5494	 * indefinitely once the lock does become free.
5495	 */
5496	BUG_ON(!list_empty(&reaplist));
5497	spin_lock(&nn->blocked_locks_lock);
5498	while (!list_empty(&nn->blocked_locks_lru)) {
5499		nbl = list_first_entry(&nn->blocked_locks_lru,
5500					struct nfsd4_blocked_lock, nbl_lru);
5501		if (nbl->nbl_time > cutoff) {
5502			t = nbl->nbl_time - cutoff;
5503			new_timeo = min(new_timeo, t);
5504			break;
5505		}
5506		list_move(&nbl->nbl_lru, &reaplist);
5507		list_del_init(&nbl->nbl_list);
5508	}
5509	spin_unlock(&nn->blocked_locks_lock);
5510
5511	while (!list_empty(&reaplist)) {
5512		nbl = list_first_entry(&reaplist,
5513					struct nfsd4_blocked_lock, nbl_lru);
5514		list_del_init(&nbl->nbl_lru);
5515		free_blocked_lock(nbl);
5516	}
5517out:
5518	new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
5519	return new_timeo;
5520}
5521
5522static struct workqueue_struct *laundry_wq;
5523static void laundromat_main(struct work_struct *);
5524
5525static void
5526laundromat_main(struct work_struct *laundry)
5527{
5528	time64_t t;
5529	struct delayed_work *dwork = to_delayed_work(laundry);
5530	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
5531					   laundromat_work);
5532
5533	t = nfs4_laundromat(nn);
5534	queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
5535}
5536
5537static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
5538{
5539	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
5540		return nfserr_bad_stateid;
5541	return nfs_ok;
5542}
5543
5544static inline int
5545access_permit_read(struct nfs4_ol_stateid *stp)
5546{
5547	return test_access(NFS4_SHARE_ACCESS_READ, stp) ||
5548		test_access(NFS4_SHARE_ACCESS_BOTH, stp) ||
5549		test_access(NFS4_SHARE_ACCESS_WRITE, stp);
5550}
5551
5552static inline int
5553access_permit_write(struct nfs4_ol_stateid *stp)
5554{
5555	return test_access(NFS4_SHARE_ACCESS_WRITE, stp) ||
5556		test_access(NFS4_SHARE_ACCESS_BOTH, stp);
5557}
5558
5559static
5560__be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags)
5561{
5562        __be32 status = nfserr_openmode;
5563
5564	/* For lock stateid's, we test the parent open, not the lock: */
5565	if (stp->st_openstp)
5566		stp = stp->st_openstp;
5567	if ((flags & WR_STATE) && !access_permit_write(stp))
5568                goto out;
5569	if ((flags & RD_STATE) && !access_permit_read(stp))
5570                goto out;
5571	status = nfs_ok;
5572out:
5573	return status;
5574}
5575
5576static inline __be32
5577check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, int flags)
5578{
5579	if (ONE_STATEID(stateid) && (flags & RD_STATE))
5580		return nfs_ok;
5581	else if (opens_in_grace(net)) {
5582		/* Answer in remaining cases depends on existence of
5583		 * conflicting state; so we must wait out the grace period. */
5584		return nfserr_grace;
5585	} else if (flags & WR_STATE)
5586		return nfs4_share_conflict(current_fh,
5587				NFS4_SHARE_DENY_WRITE);
5588	else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */
5589		return nfs4_share_conflict(current_fh,
5590				NFS4_SHARE_DENY_READ);
5591}
5592
5593/*
5594 * Allow READ/WRITE during grace period on recovered state only for files
5595 * that are not able to provide mandatory locking.
5596 */
5597static inline int
5598grace_disallows_io(struct net *net, struct inode *inode)
5599{
5600	return opens_in_grace(net) && mandatory_lock(inode);
5601}
5602
5603static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session)
5604{
5605	/*
5606	 * When sessions are used the stateid generation number is ignored
5607	 * when it is zero.
5608	 */
5609	if (has_session && in->si_generation == 0)
5610		return nfs_ok;
5611
5612	if (in->si_generation == ref->si_generation)
5613		return nfs_ok;
5614
5615	/* If the client sends us a stateid from the future, it's buggy: */
5616	if (nfsd4_stateid_generation_after(in, ref))
5617		return nfserr_bad_stateid;
5618	/*
5619	 * However, we could see a stateid from the past, even from a
5620	 * non-buggy client.  For example, if the client sends a lock
5621	 * while some IO is outstanding, the lock may bump si_generation
5622	 * while the IO is still in flight.  The client could avoid that
5623	 * situation by waiting for responses on all the IO requests,
5624	 * but better performance may result in retrying IO that
5625	 * receives an old_stateid error if requests are rarely
5626	 * reordered in flight:
5627	 */
5628	return nfserr_old_stateid;
5629}
5630
5631static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_stid *s, bool has_session)
5632{
5633	__be32 ret;
5634
5635	spin_lock(&s->sc_lock);
5636	ret = nfsd4_verify_open_stid(s);
5637	if (ret == nfs_ok)
5638		ret = check_stateid_generation(in, &s->sc_stateid, has_session);
5639	spin_unlock(&s->sc_lock);
5640	return ret;
5641}
5642
5643static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols)
5644{
5645	if (ols->st_stateowner->so_is_open_owner &&
5646	    !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
5647		return nfserr_bad_stateid;
5648	return nfs_ok;
5649}
5650
5651static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
5652{
5653	struct nfs4_stid *s;
5654	__be32 status = nfserr_bad_stateid;
5655
5656	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
5657		CLOSE_STATEID(stateid))
5658		return status;
5659	spin_lock(&cl->cl_lock);
5660	s = find_stateid_locked(cl, stateid);
5661	if (!s)
5662		goto out_unlock;
5663	status = nfsd4_stid_check_stateid_generation(stateid, s, 1);
5664	if (status)
5665		goto out_unlock;
5666	switch (s->sc_type) {
5667	case NFS4_DELEG_STID:
5668		status = nfs_ok;
5669		break;
5670	case NFS4_REVOKED_DELEG_STID:
5671		status = nfserr_deleg_revoked;
5672		break;
5673	case NFS4_OPEN_STID:
5674	case NFS4_LOCK_STID:
5675		status = nfsd4_check_openowner_confirmed(openlockstateid(s));
5676		break;
5677	default:
5678		printk("unknown stateid type %x\n", s->sc_type);
5679		fallthrough;
5680	case NFS4_CLOSED_STID:
5681	case NFS4_CLOSED_DELEG_STID:
5682		status = nfserr_bad_stateid;
5683	}
5684out_unlock:
5685	spin_unlock(&cl->cl_lock);
5686	return status;
5687}
5688
5689__be32
5690nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
5691		     stateid_t *stateid, unsigned char typemask,
5692		     struct nfs4_stid **s, struct nfsd_net *nn)
5693{
5694	__be32 status;
5695	bool return_revoked = false;
5696
5697	/*
5698	 *  only return revoked delegations if explicitly asked.
5699	 *  otherwise we report revoked or bad_stateid status.
5700	 */
5701	if (typemask & NFS4_REVOKED_DELEG_STID)
5702		return_revoked = true;
5703	else if (typemask & NFS4_DELEG_STID)
5704		typemask |= NFS4_REVOKED_DELEG_STID;
5705
5706	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
5707		CLOSE_STATEID(stateid))
5708		return nfserr_bad_stateid;
5709	status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn,
5710				 false);
5711	if (status == nfserr_stale_clientid) {
5712		if (cstate->session)
5713			return nfserr_bad_stateid;
5714		return nfserr_stale_stateid;
5715	}
5716	if (status)
5717		return status;
5718	*s = find_stateid_by_type(cstate->clp, stateid, typemask);
5719	if (!*s)
5720		return nfserr_bad_stateid;
5721	if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
5722		nfs4_put_stid(*s);
5723		if (cstate->minorversion)
5724			return nfserr_deleg_revoked;
5725		return nfserr_bad_stateid;
5726	}
5727	return nfs_ok;
5728}
5729
5730static struct nfsd_file *
5731nfs4_find_file(struct nfs4_stid *s, int flags)
5732{
5733	if (!s)
5734		return NULL;
5735
5736	switch (s->sc_type) {
5737	case NFS4_DELEG_STID:
5738		if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
5739			return NULL;
5740		return nfsd_file_get(s->sc_file->fi_deleg_file);
5741	case NFS4_OPEN_STID:
5742	case NFS4_LOCK_STID:
5743		if (flags & RD_STATE)
5744			return find_readable_file(s->sc_file);
5745		else
5746			return find_writeable_file(s->sc_file);
5747	}
5748
5749	return NULL;
5750}
5751
5752static __be32
5753nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
5754{
5755	__be32 status;
5756
5757	status = nfsd4_check_openowner_confirmed(ols);
5758	if (status)
5759		return status;
5760	return nfs4_check_openmode(ols, flags);
5761}
5762
5763static __be32
5764nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
5765		struct nfsd_file **nfp, int flags)
5766{
5767	int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
5768	struct nfsd_file *nf;
5769	__be32 status;
5770
5771	nf = nfs4_find_file(s, flags);
5772	if (nf) {
5773		status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
5774				acc | NFSD_MAY_OWNER_OVERRIDE);
5775		if (status) {
5776			nfsd_file_put(nf);
5777			goto out;
5778		}
5779	} else {
5780		status = nfsd_file_acquire(rqstp, fhp, acc, &nf);
5781		if (status)
5782			return status;
5783	}
5784	*nfp = nf;
5785out:
5786	return status;
5787}
5788static void
5789_free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
5790{
5791	WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID);
5792	if (!refcount_dec_and_test(&cps->cp_stateid.sc_count))
5793		return;
5794	list_del(&cps->cp_list);
5795	idr_remove(&nn->s2s_cp_stateids,
5796		   cps->cp_stateid.stid.si_opaque.so_id);
5797	kfree(cps);
5798}
5799/*
5800 * A READ from an inter server to server COPY will have a
5801 * copy stateid. Look up the copy notify stateid from the
5802 * idr structure and take a reference on it.
5803 */
5804__be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
5805			  struct nfs4_client *clp,
5806			  struct nfs4_cpntf_state **cps)
5807{
5808	copy_stateid_t *cps_t;
5809	struct nfs4_cpntf_state *state = NULL;
5810
5811	if (st->si_opaque.so_clid.cl_id != nn->s2s_cp_cl_id)
5812		return nfserr_bad_stateid;
5813	spin_lock(&nn->s2s_cp_lock);
5814	cps_t = idr_find(&nn->s2s_cp_stateids, st->si_opaque.so_id);
5815	if (cps_t) {
5816		state = container_of(cps_t, struct nfs4_cpntf_state,
5817				     cp_stateid);
5818		if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) {
5819			state = NULL;
5820			goto unlock;
5821		}
5822		if (!clp)
5823			refcount_inc(&state->cp_stateid.sc_count);
5824		else
5825			_free_cpntf_state_locked(nn, state);
5826	}
5827unlock:
5828	spin_unlock(&nn->s2s_cp_lock);
5829	if (!state)
5830		return nfserr_bad_stateid;
5831	if (!clp && state)
5832		*cps = state;
5833	return 0;
5834}
5835
5836static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
5837			       struct nfs4_stid **stid)
5838{
5839	__be32 status;
5840	struct nfs4_cpntf_state *cps = NULL;
5841	struct nfsd4_compound_state cstate;
5842
5843	status = manage_cpntf_state(nn, st, NULL, &cps);
5844	if (status)
5845		return status;
5846
5847	cps->cpntf_time = ktime_get_boottime_seconds();
5848	memset(&cstate, 0, sizeof(cstate));
5849	status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true);
5850	if (status)
5851		goto out;
5852	status = nfsd4_lookup_stateid(&cstate, &cps->cp_p_stateid,
5853				NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
5854				stid, nn);
5855	put_client_renew(cstate.clp);
5856out:
5857	nfs4_put_cpntf_state(nn, cps);
5858	return status;
5859}
5860
5861void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
5862{
5863	spin_lock(&nn->s2s_cp_lock);
5864	_free_cpntf_state_locked(nn, cps);
5865	spin_unlock(&nn->s2s_cp_lock);
5866}
5867
5868/*
5869 * Checks for stateid operations
5870 */
5871__be32
5872nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
5873		struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
5874		stateid_t *stateid, int flags, struct nfsd_file **nfp,
5875		struct nfs4_stid **cstid)
5876{
5877	struct inode *ino = d_inode(fhp->fh_dentry);
5878	struct net *net = SVC_NET(rqstp);
5879	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
5880	struct nfs4_stid *s = NULL;
5881	__be32 status;
5882
5883	if (nfp)
5884		*nfp = NULL;
5885
5886	if (grace_disallows_io(net, ino))
5887		return nfserr_grace;
5888
5889	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
5890		status = check_special_stateids(net, fhp, stateid, flags);
5891		goto done;
5892	}
5893
5894	status = nfsd4_lookup_stateid(cstate, stateid,
5895				NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
5896				&s, nn);
5897	if (status == nfserr_bad_stateid)
5898		status = find_cpntf_state(nn, stateid, &s);
5899	if (status)
5900		return status;
5901	status = nfsd4_stid_check_stateid_generation(stateid, s,
5902			nfsd4_has_session(cstate));
5903	if (status)
5904		goto out;
5905
5906	switch (s->sc_type) {
5907	case NFS4_DELEG_STID:
5908		status = nfs4_check_delegmode(delegstateid(s), flags);
5909		break;
5910	case NFS4_OPEN_STID:
5911	case NFS4_LOCK_STID:
5912		status = nfs4_check_olstateid(openlockstateid(s), flags);
5913		break;
5914	default:
5915		status = nfserr_bad_stateid;
5916		break;
5917	}
5918	if (status)
5919		goto out;
5920	status = nfs4_check_fh(fhp, s);
5921
5922done:
5923	if (status == nfs_ok && nfp)
5924		status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
5925out:
5926	if (s) {
5927		if (!status && cstid)
5928			*cstid = s;
5929		else
5930			nfs4_put_stid(s);
5931	}
5932	return status;
5933}
5934
5935/*
5936 * Test if the stateid is valid
5937 */
5938__be32
5939nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5940		   union nfsd4_op_u *u)
5941{
5942	struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
5943	struct nfsd4_test_stateid_id *stateid;
5944	struct nfs4_client *cl = cstate->session->se_client;
5945
5946	list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
5947		stateid->ts_id_status =
5948			nfsd4_validate_stateid(cl, &stateid->ts_id_stateid);
5949
5950	return nfs_ok;
5951}
5952
5953static __be32
5954nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
5955{
5956	struct nfs4_ol_stateid *stp = openlockstateid(s);
5957	__be32 ret;
5958
5959	ret = nfsd4_lock_ol_stateid(stp);
5960	if (ret)
5961		goto out_put_stid;
5962
5963	ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
5964	if (ret)
5965		goto out;
5966
5967	ret = nfserr_locks_held;
5968	if (check_for_locks(stp->st_stid.sc_file,
5969			    lockowner(stp->st_stateowner)))
5970		goto out;
5971
5972	release_lock_stateid(stp);
5973	ret = nfs_ok;
5974
5975out:
5976	mutex_unlock(&stp->st_mutex);
5977out_put_stid:
5978	nfs4_put_stid(s);
5979	return ret;
5980}
5981
5982__be32
5983nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5984		   union nfsd4_op_u *u)
5985{
5986	struct nfsd4_free_stateid *free_stateid = &u->free_stateid;
5987	stateid_t *stateid = &free_stateid->fr_stateid;
5988	struct nfs4_stid *s;
5989	struct nfs4_delegation *dp;
5990	struct nfs4_client *cl = cstate->session->se_client;
5991	__be32 ret = nfserr_bad_stateid;
5992
5993	spin_lock(&cl->cl_lock);
5994	s = find_stateid_locked(cl, stateid);
5995	if (!s)
5996		goto out_unlock;
5997	spin_lock(&s->sc_lock);
5998	switch (s->sc_type) {
5999	case NFS4_DELEG_STID:
6000		ret = nfserr_locks_held;
6001		break;
6002	case NFS4_OPEN_STID:
6003		ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
6004		if (ret)
6005			break;
6006		ret = nfserr_locks_held;
6007		break;
6008	case NFS4_LOCK_STID:
6009		spin_unlock(&s->sc_lock);
6010		refcount_inc(&s->sc_count);
6011		spin_unlock(&cl->cl_lock);
6012		ret = nfsd4_free_lock_stateid(stateid, s);
6013		goto out;
6014	case NFS4_REVOKED_DELEG_STID:
6015		spin_unlock(&s->sc_lock);
6016		dp = delegstateid(s);
6017		list_del_init(&dp->dl_recall_lru);
6018		spin_unlock(&cl->cl_lock);
6019		nfs4_put_stid(s);
6020		ret = nfs_ok;
6021		goto out;
6022	/* Default falls through and returns nfserr_bad_stateid */
6023	}
6024	spin_unlock(&s->sc_lock);
6025out_unlock:
6026	spin_unlock(&cl->cl_lock);
6027out:
6028	return ret;
6029}
6030
6031static inline int
6032setlkflg (int type)
6033{
6034	return (type == NFS4_READW_LT || type == NFS4_READ_LT) ?
6035		RD_STATE : WR_STATE;
6036}
6037
6038static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_t *stateid, u32 seqid, struct nfs4_ol_stateid *stp)
6039{
6040	struct svc_fh *current_fh = &cstate->current_fh;
6041	struct nfs4_stateowner *sop = stp->st_stateowner;
6042	__be32 status;
6043
6044	status = nfsd4_check_seqid(cstate, sop, seqid);
6045	if (status)
6046		return status;
6047	status = nfsd4_lock_ol_stateid(stp);
6048	if (status != nfs_ok)
6049		return status;
6050	status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
6051	if (status == nfs_ok)
6052		status = nfs4_check_fh(current_fh, &stp->st_stid);
6053	if (status != nfs_ok)
6054		mutex_unlock(&stp->st_mutex);
6055	return status;
6056}
6057
6058/*
6059 * Checks for sequence id mutating operations.
6060 */
6061static __be32
6062nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
6063			 stateid_t *stateid, char typemask,
6064			 struct nfs4_ol_stateid **stpp,
6065			 struct nfsd_net *nn)
6066{
6067	__be32 status;
6068	struct nfs4_stid *s;
6069	struct nfs4_ol_stateid *stp = NULL;
6070
6071	trace_nfsd_preprocess(seqid, stateid);
6072
6073	*stpp = NULL;
6074	status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
6075	if (status)
6076		return status;
6077	stp = openlockstateid(s);
6078	nfsd4_cstate_assign_replay(cstate, stp->st_stateowner);
6079
6080	status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp);
6081	if (!status)
6082		*stpp = stp;
6083	else
6084		nfs4_put_stid(&stp->st_stid);
6085	return status;
6086}
6087
6088static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
6089						 stateid_t *stateid, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn)
6090{
6091	__be32 status;
6092	struct nfs4_openowner *oo;
6093	struct nfs4_ol_stateid *stp;
6094
6095	status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
6096						NFS4_OPEN_STID, &stp, nn);
6097	if (status)
6098		return status;
6099	oo = openowner(stp->st_stateowner);
6100	if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
6101		mutex_unlock(&stp->st_mutex);
6102		nfs4_put_stid(&stp->st_stid);
6103		return nfserr_bad_stateid;
6104	}
6105	*stpp = stp;
6106	return nfs_ok;
6107}
6108
6109__be32
6110nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6111		   union nfsd4_op_u *u)
6112{
6113	struct nfsd4_open_confirm *oc = &u->open_confirm;
6114	__be32 status;
6115	struct nfs4_openowner *oo;
6116	struct nfs4_ol_stateid *stp;
6117	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
6118
6119	dprintk("NFSD: nfsd4_open_confirm on file %pd\n",
6120			cstate->current_fh.fh_dentry);
6121
6122	status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
6123	if (status)
6124		return status;
6125
6126	status = nfs4_preprocess_seqid_op(cstate,
6127					oc->oc_seqid, &oc->oc_req_stateid,
6128					NFS4_OPEN_STID, &stp, nn);
6129	if (status)
6130		goto out;
6131	oo = openowner(stp->st_stateowner);
6132	status = nfserr_bad_stateid;
6133	if (oo->oo_flags & NFS4_OO_CONFIRMED) {
6134		mutex_unlock(&stp->st_mutex);
6135		goto put_stateid;
6136	}
6137	oo->oo_flags |= NFS4_OO_CONFIRMED;
6138	nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
6139	mutex_unlock(&stp->st_mutex);
6140	trace_nfsd_open_confirm(oc->oc_seqid, &stp->st_stid.sc_stateid);
6141	nfsd4_client_record_create(oo->oo_owner.so_client);
6142	status = nfs_ok;
6143put_stateid:
6144	nfs4_put_stid(&stp->st_stid);
6145out:
6146	nfsd4_bump_seqid(cstate, status);
6147	return status;
6148}
6149
6150static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access)
6151{
6152	if (!test_access(access, stp))
6153		return;
6154	nfs4_file_put_access(stp->st_stid.sc_file, access);
6155	clear_access(access, stp);
6156}
6157
6158static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access)
6159{
6160	switch (to_access) {
6161	case NFS4_SHARE_ACCESS_READ:
6162		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_WRITE);
6163		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH);
6164		break;
6165	case NFS4_SHARE_ACCESS_WRITE:
6166		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_READ);
6167		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH);
6168		break;
6169	case NFS4_SHARE_ACCESS_BOTH:
6170		break;
6171	default:
6172		WARN_ON_ONCE(1);
6173	}
6174}
6175
6176__be32
6177nfsd4_open_downgrade(struct svc_rqst *rqstp,
6178		     struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
6179{
6180	struct nfsd4_open_downgrade *od = &u->open_downgrade;
6181	__be32 status;
6182	struct nfs4_ol_stateid *stp;
6183	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
6184
6185	dprintk("NFSD: nfsd4_open_downgrade on file %pd\n",
6186			cstate->current_fh.fh_dentry);
6187
6188	/* We don't yet support WANT bits: */
6189	if (od->od_deleg_want)
6190		dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__,
6191			od->od_deleg_want);
6192
6193	status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
6194					&od->od_stateid, &stp, nn);
6195	if (status)
6196		goto out;
6197	status = nfserr_inval;
6198	if (!test_access(od->od_share_access, stp)) {
6199		dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n",
6200			stp->st_access_bmap, od->od_share_access);
6201		goto put_stateid;
6202	}
6203	if (!test_deny(od->od_share_deny, stp)) {
6204		dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n",
6205			stp->st_deny_bmap, od->od_share_deny);
6206		goto put_stateid;
6207	}
6208	nfs4_stateid_downgrade(stp, od->od_share_access);
6209	reset_union_bmap_deny(od->od_share_deny, stp);
6210	nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid);
6211	status = nfs_ok;
6212put_stateid:
6213	mutex_unlock(&stp->st_mutex);
6214	nfs4_put_stid(&stp->st_stid);
6215out:
6216	nfsd4_bump_seqid(cstate, status);
6217	return status;
6218}
6219
6220static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
6221{
6222	struct nfs4_client *clp = s->st_stid.sc_client;
6223	bool unhashed;
6224	LIST_HEAD(reaplist);
6225	struct nfs4_ol_stateid *stp;
6226
6227	spin_lock(&clp->cl_lock);
6228	unhashed = unhash_open_stateid(s, &reaplist);
6229
6230	if (clp->cl_minorversion) {
6231		if (unhashed)
6232			put_ol_stateid_locked(s, &reaplist);
6233		spin_unlock(&clp->cl_lock);
6234		list_for_each_entry(stp, &reaplist, st_locks)
6235			nfs4_free_cpntf_statelist(clp->net, &stp->st_stid);
6236		free_ol_stateid_reaplist(&reaplist);
6237	} else {
6238		spin_unlock(&clp->cl_lock);
6239		free_ol_stateid_reaplist(&reaplist);
6240		if (unhashed)
6241			move_to_close_lru(s, clp->net);
6242	}
6243}
6244
6245/*
6246 * nfs4_unlock_state() called after encode
6247 */
6248__be32
6249nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6250		union nfsd4_op_u *u)
6251{
6252	struct nfsd4_close *close = &u->close;
6253	__be32 status;
6254	struct nfs4_ol_stateid *stp;
6255	struct net *net = SVC_NET(rqstp);
6256	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
6257
6258	dprintk("NFSD: nfsd4_close on file %pd\n",
6259			cstate->current_fh.fh_dentry);
6260
6261	status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
6262					&close->cl_stateid,
6263					NFS4_OPEN_STID|NFS4_CLOSED_STID,
6264					&stp, nn);
6265	nfsd4_bump_seqid(cstate, status);
6266	if (status)
6267		goto out;
6268
6269	stp->st_stid.sc_type = NFS4_CLOSED_STID;
6270
6271	/*
6272	 * Technically we don't _really_ have to increment or copy it, since
6273	 * it should just be gone after this operation and we clobber the
6274	 * copied value below, but we continue to do so here just to ensure
6275	 * that racing ops see that there was a state change.
6276	 */
6277	nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
6278
6279	nfsd4_close_open_stateid(stp);
6280	mutex_unlock(&stp->st_mutex);
6281
6282	/* v4.1+ suggests that we send a special stateid in here, since the
6283	 * clients should just ignore this anyway. Since this is not useful
6284	 * for v4.0 clients either, we set it to the special close_stateid
6285	 * universally.
6286	 *
6287	 * See RFC5661 section 18.2.4, and RFC7530 section 16.2.5
6288	 */
6289	memcpy(&close->cl_stateid, &close_stateid, sizeof(close->cl_stateid));
6290
6291	/* put reference from nfs4_preprocess_seqid_op */
6292	nfs4_put_stid(&stp->st_stid);
6293out:
6294	return status;
6295}
6296
6297__be32
6298nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6299		  union nfsd4_op_u *u)
6300{
6301	struct nfsd4_delegreturn *dr = &u->delegreturn;
6302	struct nfs4_delegation *dp;
6303	stateid_t *stateid = &dr->dr_stateid;
6304	struct nfs4_stid *s;
6305	__be32 status;
6306	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
6307
6308	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
6309		return status;
6310
6311	status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
6312	if (status)
6313		goto out;
6314	dp = delegstateid(s);
6315	status = nfsd4_stid_check_stateid_generation(stateid, &dp->dl_stid, nfsd4_has_session(cstate));
6316	if (status)
6317		goto put_stateid;
6318
6319	destroy_delegation(dp);
6320put_stateid:
6321	nfs4_put_stid(&dp->dl_stid);
6322out:
6323	return status;
6324}
6325
6326static inline u64
6327end_offset(u64 start, u64 len)
6328{
6329	u64 end;
6330
6331	end = start + len;
6332	return end >= start ? end: NFS4_MAX_UINT64;
6333}
6334
6335/* last octet in a range */
6336static inline u64
6337last_byte_offset(u64 start, u64 len)
6338{
6339	u64 end;
6340
6341	WARN_ON_ONCE(!len);
6342	end = start + len;
6343	return end > start ? end - 1: NFS4_MAX_UINT64;
6344}
6345
6346/*
6347 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
6348 * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
6349 * byte, because of sign extension problems.  Since NFSv4 calls for 64-bit
6350 * locking, this prevents us from being completely protocol-compliant.  The
6351 * real solution to this problem is to start using unsigned file offsets in
6352 * the VFS, but this is a very deep change!
6353 */
6354static inline void
6355nfs4_transform_lock_offset(struct file_lock *lock)
6356{
6357	if (lock->fl_start < 0)
6358		lock->fl_start = OFFSET_MAX;
6359	if (lock->fl_end < 0)
6360		lock->fl_end = OFFSET_MAX;
6361}
6362
6363static fl_owner_t
6364nfsd4_fl_get_owner(fl_owner_t owner)
6365{
6366	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
6367
6368	nfs4_get_stateowner(&lo->lo_owner);
6369	return owner;
6370}
6371
6372static void
6373nfsd4_fl_put_owner(fl_owner_t owner)
6374{
6375	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
6376
6377	if (lo)
6378		nfs4_put_stateowner(&lo->lo_owner);
6379}
6380
6381static void
6382nfsd4_lm_notify(struct file_lock *fl)
6383{
6384	struct nfs4_lockowner		*lo = (struct nfs4_lockowner *)fl->fl_owner;
6385	struct net			*net = lo->lo_owner.so_client->net;
6386	struct nfsd_net			*nn = net_generic(net, nfsd_net_id);
6387	struct nfsd4_blocked_lock	*nbl = container_of(fl,
6388						struct nfsd4_blocked_lock, nbl_lock);
6389	bool queue = false;
6390
6391	/* An empty list means that something else is going to be using it */
6392	spin_lock(&nn->blocked_locks_lock);
6393	if (!list_empty(&nbl->nbl_list)) {
6394		list_del_init(&nbl->nbl_list);
6395		list_del_init(&nbl->nbl_lru);
6396		queue = true;
6397	}
6398	spin_unlock(&nn->blocked_locks_lock);
6399
6400	if (queue)
6401		nfsd4_run_cb(&nbl->nbl_cb);
6402}
6403
6404static const struct lock_manager_operations nfsd_posix_mng_ops  = {
6405	.lm_notify = nfsd4_lm_notify,
6406	.lm_get_owner = nfsd4_fl_get_owner,
6407	.lm_put_owner = nfsd4_fl_put_owner,
6408};
6409
6410static inline void
6411nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
6412{
6413	struct nfs4_lockowner *lo;
6414
6415	if (fl->fl_lmops == &nfsd_posix_mng_ops) {
6416		lo = (struct nfs4_lockowner *) fl->fl_owner;
6417		xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner,
6418						GFP_KERNEL);
6419		if (!deny->ld_owner.data)
6420			/* We just don't care that much */
6421			goto nevermind;
6422		deny->ld_clientid = lo->lo_owner.so_client->cl_clientid;
6423	} else {
6424nevermind:
6425		deny->ld_owner.len = 0;
6426		deny->ld_owner.data = NULL;
6427		deny->ld_clientid.cl_boot = 0;
6428		deny->ld_clientid.cl_id = 0;
6429	}
6430	deny->ld_start = fl->fl_start;
6431	deny->ld_length = NFS4_MAX_UINT64;
6432	if (fl->fl_end != NFS4_MAX_UINT64)
6433		deny->ld_length = fl->fl_end - fl->fl_start + 1;
6434	deny->ld_type = NFS4_READ_LT;
6435	if (fl->fl_type != F_RDLCK)
6436		deny->ld_type = NFS4_WRITE_LT;
6437}
6438
6439static struct nfs4_lockowner *
6440find_lockowner_str_locked(struct nfs4_client *clp, struct xdr_netobj *owner)
6441{
6442	unsigned int strhashval = ownerstr_hashval(owner);
6443	struct nfs4_stateowner *so;
6444
6445	lockdep_assert_held(&clp->cl_lock);
6446
6447	list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval],
6448			    so_strhash) {
6449		if (so->so_is_open_owner)
6450			continue;
6451		if (same_owner_str(so, owner))
6452			return lockowner(nfs4_get_stateowner(so));
6453	}
6454	return NULL;
6455}
6456
6457static struct nfs4_lockowner *
6458find_lockowner_str(struct nfs4_client *clp, struct xdr_netobj *owner)
6459{
6460	struct nfs4_lockowner *lo;
6461
6462	spin_lock(&clp->cl_lock);
6463	lo = find_lockowner_str_locked(clp, owner);
6464	spin_unlock(&clp->cl_lock);
6465	return lo;
6466}
6467
6468static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop)
6469{
6470	unhash_lockowner_locked(lockowner(sop));
6471}
6472
6473static void nfs4_free_lockowner(struct nfs4_stateowner *sop)
6474{
6475	struct nfs4_lockowner *lo = lockowner(sop);
6476
6477	kmem_cache_free(lockowner_slab, lo);
6478}
6479
6480static const struct nfs4_stateowner_operations lockowner_ops = {
6481	.so_unhash =	nfs4_unhash_lockowner,
6482	.so_free =	nfs4_free_lockowner,
6483};
6484
6485/*
6486 * Alloc a lock owner structure.
6487 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
6488 * occurred.
6489 *
6490 * strhashval = ownerstr_hashval
6491 */
6492static struct nfs4_lockowner *
6493alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
6494			   struct nfs4_ol_stateid *open_stp,
6495			   struct nfsd4_lock *lock)
6496{
6497	struct nfs4_lockowner *lo, *ret;
6498
6499	lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
6500	if (!lo)
6501		return NULL;
6502	INIT_LIST_HEAD(&lo->lo_blocked);
6503	INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
6504	lo->lo_owner.so_is_open_owner = 0;
6505	lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
6506	lo->lo_owner.so_ops = &lockowner_ops;
6507	spin_lock(&clp->cl_lock);
6508	ret = find_lockowner_str_locked(clp, &lock->lk_new_owner);
6509	if (ret == NULL) {
6510		list_add(&lo->lo_owner.so_strhash,
6511			 &clp->cl_ownerstr_hashtbl[strhashval]);
6512		ret = lo;
6513	} else
6514		nfs4_free_stateowner(&lo->lo_owner);
6515
6516	spin_unlock(&clp->cl_lock);
6517	return ret;
6518}
6519
6520static struct nfs4_ol_stateid *
6521find_lock_stateid(const struct nfs4_lockowner *lo,
6522		  const struct nfs4_ol_stateid *ost)
6523{
6524	struct nfs4_ol_stateid *lst;
6525
6526	lockdep_assert_held(&ost->st_stid.sc_client->cl_lock);
6527
6528	/* If ost is not hashed, ost->st_locks will not be valid */
6529	if (!nfs4_ol_stateid_unhashed(ost))
6530		list_for_each_entry(lst, &ost->st_locks, st_locks) {
6531			if (lst->st_stateowner == &lo->lo_owner) {
6532				refcount_inc(&lst->st_stid.sc_count);
6533				return lst;
6534			}
6535		}
6536	return NULL;
6537}
6538
6539static struct nfs4_ol_stateid *
6540init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
6541		  struct nfs4_file *fp, struct inode *inode,
6542		  struct nfs4_ol_stateid *open_stp)
6543{
6544	struct nfs4_client *clp = lo->lo_owner.so_client;
6545	struct nfs4_ol_stateid *retstp;
6546
6547	mutex_init(&stp->st_mutex);
6548	mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX);
6549retry:
6550	spin_lock(&clp->cl_lock);
6551	if (nfs4_ol_stateid_unhashed(open_stp))
6552		goto out_close;
6553	retstp = find_lock_stateid(lo, open_stp);
6554	if (retstp)
6555		goto out_found;
6556	refcount_inc(&stp->st_stid.sc_count);
6557	stp->st_stid.sc_type = NFS4_LOCK_STID;
6558	stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
6559	get_nfs4_file(fp);
6560	stp->st_stid.sc_file = fp;
6561	stp->st_access_bmap = 0;
6562	stp->st_deny_bmap = open_stp->st_deny_bmap;
6563	stp->st_openstp = open_stp;
6564	spin_lock(&fp->fi_lock);
6565	list_add(&stp->st_locks, &open_stp->st_locks);
6566	list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
6567	list_add(&stp->st_perfile, &fp->fi_stateids);
6568	spin_unlock(&fp->fi_lock);
6569	spin_unlock(&clp->cl_lock);
6570	return stp;
6571out_found:
6572	spin_unlock(&clp->cl_lock);
6573	if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
6574		nfs4_put_stid(&retstp->st_stid);
6575		goto retry;
6576	}
6577	/* To keep mutex tracking happy */
6578	mutex_unlock(&stp->st_mutex);
6579	return retstp;
6580out_close:
6581	spin_unlock(&clp->cl_lock);
6582	mutex_unlock(&stp->st_mutex);
6583	return NULL;
6584}
6585
6586static struct nfs4_ol_stateid *
6587find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
6588			    struct inode *inode, struct nfs4_ol_stateid *ost,
6589			    bool *new)
6590{
6591	struct nfs4_stid *ns = NULL;
6592	struct nfs4_ol_stateid *lst;
6593	struct nfs4_openowner *oo = openowner(ost->st_stateowner);
6594	struct nfs4_client *clp = oo->oo_owner.so_client;
6595
6596	*new = false;
6597	spin_lock(&clp->cl_lock);
6598	lst = find_lock_stateid(lo, ost);
6599	spin_unlock(&clp->cl_lock);
6600	if (lst != NULL) {
6601		if (nfsd4_lock_ol_stateid(lst) == nfs_ok)
6602			goto out;
6603		nfs4_put_stid(&lst->st_stid);
6604	}
6605	ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid);
6606	if (ns == NULL)
6607		return NULL;
6608
6609	lst = init_lock_stateid(openlockstateid(ns), lo, fi, inode, ost);
6610	if (lst == openlockstateid(ns))
6611		*new = true;
6612	else
6613		nfs4_put_stid(ns);
6614out:
6615	return lst;
6616}
6617
6618static int
6619check_lock_length(u64 offset, u64 length)
6620{
6621	return ((length == 0) || ((length != NFS4_MAX_UINT64) &&
6622		(length > ~offset)));
6623}
6624
6625static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
6626{
6627	struct nfs4_file *fp = lock_stp->st_stid.sc_file;
6628
6629	lockdep_assert_held(&fp->fi_lock);
6630
6631	if (test_access(access, lock_stp))
6632		return;
6633	__nfs4_file_get_access(fp, access);
6634	set_access(access, lock_stp);
6635}
6636
6637static __be32
6638lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
6639			    struct nfs4_ol_stateid *ost,
6640			    struct nfsd4_lock *lock,
6641			    struct nfs4_ol_stateid **plst, bool *new)
6642{
6643	__be32 status;
6644	struct nfs4_file *fi = ost->st_stid.sc_file;
6645	struct nfs4_openowner *oo = openowner(ost->st_stateowner);
6646	struct nfs4_client *cl = oo->oo_owner.so_client;
6647	struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
6648	struct nfs4_lockowner *lo;
6649	struct nfs4_ol_stateid *lst;
6650	unsigned int strhashval;
6651
6652	lo = find_lockowner_str(cl, &lock->lk_new_owner);
6653	if (!lo) {
6654		strhashval = ownerstr_hashval(&lock->lk_new_owner);
6655		lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
6656		if (lo == NULL)
6657			return nfserr_jukebox;
6658	} else {
6659		/* with an existing lockowner, seqids must be the same */
6660		status = nfserr_bad_seqid;
6661		if (!cstate->minorversion &&
6662		    lock->lk_new_lock_seqid != lo->lo_owner.so_seqid)
6663			goto out;
6664	}
6665
6666	lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
6667	if (lst == NULL) {
6668		status = nfserr_jukebox;
6669		goto out;
6670	}
6671
6672	status = nfs_ok;
6673	*plst = lst;
6674out:
6675	nfs4_put_stateowner(&lo->lo_owner);
6676	return status;
6677}
6678
6679/*
6680 *  LOCK operation
6681 */
6682__be32
6683nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6684	   union nfsd4_op_u *u)
6685{
6686	struct nfsd4_lock *lock = &u->lock;
6687	struct nfs4_openowner *open_sop = NULL;
6688	struct nfs4_lockowner *lock_sop = NULL;
6689	struct nfs4_ol_stateid *lock_stp = NULL;
6690	struct nfs4_ol_stateid *open_stp = NULL;
6691	struct nfs4_file *fp;
6692	struct nfsd_file *nf = NULL;
6693	struct nfsd4_blocked_lock *nbl = NULL;
6694	struct file_lock *file_lock = NULL;
6695	struct file_lock *conflock = NULL;
6696	__be32 status = 0;
6697	int lkflg;
6698	int err;
6699	bool new = false;
6700	unsigned char fl_type;
6701	unsigned int fl_flags = FL_POSIX;
6702	struct net *net = SVC_NET(rqstp);
6703	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
6704
6705	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
6706		(long long) lock->lk_offset,
6707		(long long) lock->lk_length);
6708
6709	if (check_lock_length(lock->lk_offset, lock->lk_length))
6710		 return nfserr_inval;
6711
6712	if ((status = fh_verify(rqstp, &cstate->current_fh,
6713				S_IFREG, NFSD_MAY_LOCK))) {
6714		dprintk("NFSD: nfsd4_lock: permission denied!\n");
6715		return status;
6716	}
6717
6718	if (lock->lk_is_new) {
6719		if (nfsd4_has_session(cstate))
6720			/* See rfc 5661 18.10.3: given clientid is ignored: */
6721			memcpy(&lock->lk_new_clientid,
6722				&cstate->session->se_client->cl_clientid,
6723				sizeof(clientid_t));
6724
6725		status = nfserr_stale_clientid;
6726		if (STALE_CLIENTID(&lock->lk_new_clientid, nn))
6727			goto out;
6728
6729		/* validate and update open stateid and open seqid */
6730		status = nfs4_preprocess_confirmed_seqid_op(cstate,
6731				        lock->lk_new_open_seqid,
6732		                        &lock->lk_new_open_stateid,
6733					&open_stp, nn);
6734		if (status)
6735			goto out;
6736		mutex_unlock(&open_stp->st_mutex);
6737		open_sop = openowner(open_stp->st_stateowner);
6738		status = nfserr_bad_stateid;
6739		if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
6740						&lock->lk_new_clientid))
6741			goto out;
6742		status = lookup_or_create_lock_state(cstate, open_stp, lock,
6743							&lock_stp, &new);
6744	} else {
6745		status = nfs4_preprocess_seqid_op(cstate,
6746				       lock->lk_old_lock_seqid,
6747				       &lock->lk_old_lock_stateid,
6748				       NFS4_LOCK_STID, &lock_stp, nn);
6749	}
6750	if (status)
6751		goto out;
6752	lock_sop = lockowner(lock_stp->st_stateowner);
6753
6754	lkflg = setlkflg(lock->lk_type);
6755	status = nfs4_check_openmode(lock_stp, lkflg);
6756	if (status)
6757		goto out;
6758
6759	status = nfserr_grace;
6760	if (locks_in_grace(net) && !lock->lk_reclaim)
6761		goto out;
6762	status = nfserr_no_grace;
6763	if (!locks_in_grace(net) && lock->lk_reclaim)
6764		goto out;
6765
6766	fp = lock_stp->st_stid.sc_file;
6767	switch (lock->lk_type) {
6768		case NFS4_READW_LT:
6769			if (nfsd4_has_session(cstate))
6770				fl_flags |= FL_SLEEP;
6771			fallthrough;
6772		case NFS4_READ_LT:
6773			spin_lock(&fp->fi_lock);
6774			nf = find_readable_file_locked(fp);
6775			if (nf)
6776				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
6777			spin_unlock(&fp->fi_lock);
6778			fl_type = F_RDLCK;
6779			break;
6780		case NFS4_WRITEW_LT:
6781			if (nfsd4_has_session(cstate))
6782				fl_flags |= FL_SLEEP;
6783			fallthrough;
6784		case NFS4_WRITE_LT:
6785			spin_lock(&fp->fi_lock);
6786			nf = find_writeable_file_locked(fp);
6787			if (nf)
6788				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
6789			spin_unlock(&fp->fi_lock);
6790			fl_type = F_WRLCK;
6791			break;
6792		default:
6793			status = nfserr_inval;
6794		goto out;
6795	}
6796
6797	if (!nf) {
6798		status = nfserr_openmode;
6799		goto out;
6800	}
6801
6802	nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
6803	if (!nbl) {
6804		dprintk("NFSD: %s: unable to allocate block!\n", __func__);
6805		status = nfserr_jukebox;
6806		goto out;
6807	}
6808
6809	file_lock = &nbl->nbl_lock;
6810	file_lock->fl_type = fl_type;
6811	file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
6812	file_lock->fl_pid = current->tgid;
6813	file_lock->fl_file = nf->nf_file;
6814	file_lock->fl_flags = fl_flags;
6815	file_lock->fl_lmops = &nfsd_posix_mng_ops;
6816	file_lock->fl_start = lock->lk_offset;
6817	file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
6818	nfs4_transform_lock_offset(file_lock);
6819
6820	conflock = locks_alloc_lock();
6821	if (!conflock) {
6822		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
6823		status = nfserr_jukebox;
6824		goto out;
6825	}
6826
6827	if (fl_flags & FL_SLEEP) {
6828		nbl->nbl_time = ktime_get_boottime_seconds();
6829		spin_lock(&nn->blocked_locks_lock);
6830		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
6831		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
6832		spin_unlock(&nn->blocked_locks_lock);
6833	}
6834
6835	err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock);
6836	switch (err) {
6837	case 0: /* success! */
6838		nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
6839		status = 0;
6840		if (lock->lk_reclaim)
6841			nn->somebody_reclaimed = true;
6842		break;
6843	case FILE_LOCK_DEFERRED:
6844		nbl = NULL;
6845		fallthrough;
6846	case -EAGAIN:		/* conflock holds conflicting lock */
6847		status = nfserr_denied;
6848		dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
6849		nfs4_set_lock_denied(conflock, &lock->lk_denied);
6850		break;
6851	case -EDEADLK:
6852		status = nfserr_deadlock;
6853		break;
6854	default:
6855		dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
6856		status = nfserrno(err);
6857		break;
6858	}
6859out:
6860	if (nbl) {
6861		/* dequeue it if we queued it before */
6862		if (fl_flags & FL_SLEEP) {
6863			spin_lock(&nn->blocked_locks_lock);
6864			list_del_init(&nbl->nbl_list);
6865			list_del_init(&nbl->nbl_lru);
6866			spin_unlock(&nn->blocked_locks_lock);
6867		}
6868		free_blocked_lock(nbl);
6869	}
6870	if (nf)
6871		nfsd_file_put(nf);
6872	if (lock_stp) {
6873		/* Bump seqid manually if the 4.0 replay owner is openowner */
6874		if (cstate->replay_owner &&
6875		    cstate->replay_owner != &lock_sop->lo_owner &&
6876		    seqid_mutating_err(ntohl(status)))
6877			lock_sop->lo_owner.so_seqid++;
6878
6879		/*
6880		 * If this is a new, never-before-used stateid, and we are
6881		 * returning an error, then just go ahead and release it.
6882		 */
6883		if (status && new)
6884			release_lock_stateid(lock_stp);
6885
6886		mutex_unlock(&lock_stp->st_mutex);
6887
6888		nfs4_put_stid(&lock_stp->st_stid);
6889	}
6890	if (open_stp)
6891		nfs4_put_stid(&open_stp->st_stid);
6892	nfsd4_bump_seqid(cstate, status);
6893	if (conflock)
6894		locks_free_lock(conflock);
6895	return status;
6896}
6897
6898/*
6899 * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
6900 * so we do a temporary open here just to get an open file to pass to
6901 * vfs_test_lock.
6902 */
6903static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
6904{
6905	struct nfsd_file *nf;
6906	__be32 err;
6907
6908	err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
6909	if (err)
6910		return err;
6911	fh_lock(fhp); /* to block new leases till after test_lock: */
6912	err = nfserrno(nfsd_open_break_lease(fhp->fh_dentry->d_inode,
6913							NFSD_MAY_READ));
6914	if (err)
6915		goto out;
6916	lock->fl_file = nf->nf_file;
6917	err = nfserrno(vfs_test_lock(nf->nf_file, lock));
6918	lock->fl_file = NULL;
6919out:
6920	fh_unlock(fhp);
6921	nfsd_file_put(nf);
6922	return err;
6923}
6924
6925/*
6926 * LOCKT operation
6927 */
6928__be32
6929nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
6930	    union nfsd4_op_u *u)
6931{
6932	struct nfsd4_lockt *lockt = &u->lockt;
6933	struct file_lock *file_lock = NULL;
6934	struct nfs4_lockowner *lo = NULL;
6935	__be32 status;
6936	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
6937
6938	if (locks_in_grace(SVC_NET(rqstp)))
6939		return nfserr_grace;
6940
6941	if (check_lock_length(lockt->lt_offset, lockt->lt_length))
6942		 return nfserr_inval;
6943
6944	if (!nfsd4_has_session(cstate)) {
6945		status = lookup_clientid(&lockt->lt_clientid, cstate, nn,
6946					 false);
6947		if (status)
6948			goto out;
6949	}
6950
6951	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
6952		goto out;
6953
6954	file_lock = locks_alloc_lock();
6955	if (!file_lock) {
6956		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
6957		status = nfserr_jukebox;
6958		goto out;
6959	}
6960
6961	switch (lockt->lt_type) {
6962		case NFS4_READ_LT:
6963		case NFS4_READW_LT:
6964			file_lock->fl_type = F_RDLCK;
6965			break;
6966		case NFS4_WRITE_LT:
6967		case NFS4_WRITEW_LT:
6968			file_lock->fl_type = F_WRLCK;
6969			break;
6970		default:
6971			dprintk("NFSD: nfs4_lockt: bad lock type!\n");
6972			status = nfserr_inval;
6973			goto out;
6974	}
6975
6976	lo = find_lockowner_str(cstate->clp, &lockt->lt_owner);
6977	if (lo)
6978		file_lock->fl_owner = (fl_owner_t)lo;
6979	file_lock->fl_pid = current->tgid;
6980	file_lock->fl_flags = FL_POSIX;
6981
6982	file_lock->fl_start = lockt->lt_offset;
6983	file_lock->fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length);
6984
6985	nfs4_transform_lock_offset(file_lock);
6986
6987	status = nfsd_test_lock(rqstp, &cstate->current_fh, file_lock);
6988	if (status)
6989		goto out;
6990
6991	if (file_lock->fl_type != F_UNLCK) {
6992		status = nfserr_denied;
6993		nfs4_set_lock_denied(file_lock, &lockt->lt_denied);
6994	}
6995out:
6996	if (lo)
6997		nfs4_put_stateowner(&lo->lo_owner);
6998	if (file_lock)
6999		locks_free_lock(file_lock);
7000	return status;
7001}
7002
7003__be32
7004nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
7005	    union nfsd4_op_u *u)
7006{
7007	struct nfsd4_locku *locku = &u->locku;
7008	struct nfs4_ol_stateid *stp;
7009	struct nfsd_file *nf = NULL;
7010	struct file_lock *file_lock = NULL;
7011	__be32 status;
7012	int err;
7013	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
7014
7015	dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
7016		(long long) locku->lu_offset,
7017		(long long) locku->lu_length);
7018
7019	if (check_lock_length(locku->lu_offset, locku->lu_length))
7020		 return nfserr_inval;
7021
7022	status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
7023					&locku->lu_stateid, NFS4_LOCK_STID,
7024					&stp, nn);
7025	if (status)
7026		goto out;
7027	nf = find_any_file(stp->st_stid.sc_file);
7028	if (!nf) {
7029		status = nfserr_lock_range;
7030		goto put_stateid;
7031	}
7032	file_lock = locks_alloc_lock();
7033	if (!file_lock) {
7034		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
7035		status = nfserr_jukebox;
7036		goto put_file;
7037	}
7038
7039	file_lock->fl_type = F_UNLCK;
7040	file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
7041	file_lock->fl_pid = current->tgid;
7042	file_lock->fl_file = nf->nf_file;
7043	file_lock->fl_flags = FL_POSIX;
7044	file_lock->fl_lmops = &nfsd_posix_mng_ops;
7045	file_lock->fl_start = locku->lu_offset;
7046
7047	file_lock->fl_end = last_byte_offset(locku->lu_offset,
7048						locku->lu_length);
7049	nfs4_transform_lock_offset(file_lock);
7050
7051	err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL);
7052	if (err) {
7053		dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
7054		goto out_nfserr;
7055	}
7056	nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
7057put_file:
7058	nfsd_file_put(nf);
7059put_stateid:
7060	mutex_unlock(&stp->st_mutex);
7061	nfs4_put_stid(&stp->st_stid);
7062out:
7063	nfsd4_bump_seqid(cstate, status);
7064	if (file_lock)
7065		locks_free_lock(file_lock);
7066	return status;
7067
7068out_nfserr:
7069	status = nfserrno(err);
7070	goto put_file;
7071}
7072
7073/*
7074 * returns
7075 * 	true:  locks held by lockowner
7076 * 	false: no locks held by lockowner
7077 */
7078static bool
7079check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
7080{
7081	struct file_lock *fl;
7082	int status = false;
7083	struct nfsd_file *nf = find_any_file(fp);
7084	struct inode *inode;
7085	struct file_lock_context *flctx;
7086
7087	if (!nf) {
7088		/* Any valid lock stateid should have some sort of access */
7089		WARN_ON_ONCE(1);
7090		return status;
7091	}
7092
7093	inode = locks_inode(nf->nf_file);
7094	flctx = inode->i_flctx;
7095
7096	if (flctx && !list_empty_careful(&flctx->flc_posix)) {
7097		spin_lock(&flctx->flc_lock);
7098		list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
7099			if (fl->fl_owner == (fl_owner_t)lowner) {
7100				status = true;
7101				break;
7102			}
7103		}
7104		spin_unlock(&flctx->flc_lock);
7105	}
7106	nfsd_file_put(nf);
7107	return status;
7108}
7109
7110__be32
7111nfsd4_release_lockowner(struct svc_rqst *rqstp,
7112			struct nfsd4_compound_state *cstate,
7113			union nfsd4_op_u *u)
7114{
7115	struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
7116	clientid_t *clid = &rlockowner->rl_clientid;
7117	struct nfs4_stateowner *sop;
7118	struct nfs4_lockowner *lo = NULL;
7119	struct nfs4_ol_stateid *stp;
7120	struct xdr_netobj *owner = &rlockowner->rl_owner;
7121	unsigned int hashval = ownerstr_hashval(owner);
7122	__be32 status;
7123	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
7124	struct nfs4_client *clp;
7125	LIST_HEAD (reaplist);
7126
7127	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
7128		clid->cl_boot, clid->cl_id);
7129
7130	status = lookup_clientid(clid, cstate, nn, false);
7131	if (status)
7132		return status;
7133
7134	clp = cstate->clp;
7135	/* Find the matching lock stateowner */
7136	spin_lock(&clp->cl_lock);
7137	list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
7138			    so_strhash) {
7139
7140		if (sop->so_is_open_owner || !same_owner_str(sop, owner))
7141			continue;
7142
7143		if (atomic_read(&sop->so_count) != 1) {
7144			spin_unlock(&clp->cl_lock);
7145			return nfserr_locks_held;
7146		}
7147
7148		lo = lockowner(sop);
7149		nfs4_get_stateowner(sop);
7150		break;
7151	}
7152	if (!lo) {
7153		spin_unlock(&clp->cl_lock);
7154		return status;
7155	}
7156
7157	unhash_lockowner_locked(lo);
7158	while (!list_empty(&lo->lo_owner.so_stateids)) {
7159		stp = list_first_entry(&lo->lo_owner.so_stateids,
7160				       struct nfs4_ol_stateid,
7161				       st_perstateowner);
7162		WARN_ON(!unhash_lock_stateid(stp));
7163		put_ol_stateid_locked(stp, &reaplist);
7164	}
7165	spin_unlock(&clp->cl_lock);
7166	free_ol_stateid_reaplist(&reaplist);
7167	remove_blocked_locks(lo);
7168	nfs4_put_stateowner(&lo->lo_owner);
7169
7170	return status;
7171}
7172
7173static inline struct nfs4_client_reclaim *
7174alloc_reclaim(void)
7175{
7176	return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
7177}
7178
7179bool
7180nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
7181{
7182	struct nfs4_client_reclaim *crp;
7183
7184	crp = nfsd4_find_reclaim_client(name, nn);
7185	return (crp && crp->cr_clp);
7186}
7187
7188/*
7189 * failure => all reset bets are off, nfserr_no_grace...
7190 *
7191 * The caller is responsible for freeing name.data if NULL is returned (it
7192 * will be freed in nfs4_remove_reclaim_record in the normal case).
7193 */
7194struct nfs4_client_reclaim *
7195nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
7196		struct nfsd_net *nn)
7197{
7198	unsigned int strhashval;
7199	struct nfs4_client_reclaim *crp;
7200
7201	crp = alloc_reclaim();
7202	if (crp) {
7203		strhashval = clientstr_hashval(name);
7204		INIT_LIST_HEAD(&crp->cr_strhash);
7205		list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
7206		crp->cr_name.data = name.data;
7207		crp->cr_name.len = name.len;
7208		crp->cr_princhash.data = princhash.data;
7209		crp->cr_princhash.len = princhash.len;
7210		crp->cr_clp = NULL;
7211		nn->reclaim_str_hashtbl_size++;
7212	}
7213	return crp;
7214}
7215
7216void
7217nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
7218{
7219	list_del(&crp->cr_strhash);
7220	kfree(crp->cr_name.data);
7221	kfree(crp->cr_princhash.data);
7222	kfree(crp);
7223	nn->reclaim_str_hashtbl_size--;
7224}
7225
7226void
7227nfs4_release_reclaim(struct nfsd_net *nn)
7228{
7229	struct nfs4_client_reclaim *crp = NULL;
7230	int i;
7231
7232	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
7233		while (!list_empty(&nn->reclaim_str_hashtbl[i])) {
7234			crp = list_entry(nn->reclaim_str_hashtbl[i].next,
7235			                struct nfs4_client_reclaim, cr_strhash);
7236			nfs4_remove_reclaim_record(crp, nn);
7237		}
7238	}
7239	WARN_ON_ONCE(nn->reclaim_str_hashtbl_size);
7240}
7241
7242/*
7243 * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
7244struct nfs4_client_reclaim *
7245nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn)
7246{
7247	unsigned int strhashval;
7248	struct nfs4_client_reclaim *crp = NULL;
7249
7250	strhashval = clientstr_hashval(name);
7251	list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) {
7252		if (compare_blob(&crp->cr_name, &name) == 0) {
7253			return crp;
7254		}
7255	}
7256	return NULL;
7257}
7258
7259/*
7260* Called from OPEN. Look for clientid in reclaim list.
7261*/
7262__be32
7263nfs4_check_open_reclaim(clientid_t *clid,
7264		struct nfsd4_compound_state *cstate,
7265		struct nfsd_net *nn)
7266{
7267	__be32 status;
7268
7269	/* find clientid in conf_id_hashtbl */
7270	status = lookup_clientid(clid, cstate, nn, false);
7271	if (status)
7272		return nfserr_reclaim_bad;
7273
7274	if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags))
7275		return nfserr_no_grace;
7276
7277	if (nfsd4_client_record_check(cstate->clp))
7278		return nfserr_reclaim_bad;
7279
7280	return nfs_ok;
7281}
7282
7283/*
7284 * Since the lifetime of a delegation isn't limited to that of an open, a
7285 * client may quite reasonably hang on to a delegation as long as it has
7286 * the inode cached.  This becomes an obvious problem the first time a
7287 * client's inode cache approaches the size of the server's total memory.
7288 *
7289 * For now we avoid this problem by imposing a hard limit on the number
7290 * of delegations, which varies according to the server's memory size.
7291 */
7292static void
7293set_max_delegations(void)
7294{
7295	/*
7296	 * Allow at most 4 delegations per megabyte of RAM.  Quick
7297	 * estimates suggest that in the worst case (where every delegation
7298	 * is for a different inode), a delegation could take about 1.5K,
7299	 * giving a worst case usage of about 6% of memory.
7300	 */
7301	max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT);
7302}
7303
7304static int nfs4_state_create_net(struct net *net)
7305{
7306	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
7307	int i;
7308
7309	nn->conf_id_hashtbl = kmalloc_array(CLIENT_HASH_SIZE,
7310					    sizeof(struct list_head),
7311					    GFP_KERNEL);
7312	if (!nn->conf_id_hashtbl)
7313		goto err;
7314	nn->unconf_id_hashtbl = kmalloc_array(CLIENT_HASH_SIZE,
7315					      sizeof(struct list_head),
7316					      GFP_KERNEL);
7317	if (!nn->unconf_id_hashtbl)
7318		goto err_unconf_id;
7319	nn->sessionid_hashtbl = kmalloc_array(SESSION_HASH_SIZE,
7320					      sizeof(struct list_head),
7321					      GFP_KERNEL);
7322	if (!nn->sessionid_hashtbl)
7323		goto err_sessionid;
7324
7325	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
7326		INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
7327		INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]);
7328	}
7329	for (i = 0; i < SESSION_HASH_SIZE; i++)
7330		INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
7331	nn->conf_name_tree = RB_ROOT;
7332	nn->unconf_name_tree = RB_ROOT;
7333	nn->boot_time = ktime_get_real_seconds();
7334	nn->grace_ended = false;
7335	nn->nfsd4_manager.block_opens = true;
7336	INIT_LIST_HEAD(&nn->nfsd4_manager.list);
7337	INIT_LIST_HEAD(&nn->client_lru);
7338	INIT_LIST_HEAD(&nn->close_lru);
7339	INIT_LIST_HEAD(&nn->del_recall_lru);
7340	spin_lock_init(&nn->client_lock);
7341	spin_lock_init(&nn->s2s_cp_lock);
7342	idr_init(&nn->s2s_cp_stateids);
7343
7344	spin_lock_init(&nn->blocked_locks_lock);
7345	INIT_LIST_HEAD(&nn->blocked_locks_lru);
7346
7347	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
7348	get_net(net);
7349
7350	return 0;
7351
7352err_sessionid:
7353	kfree(nn->unconf_id_hashtbl);
7354err_unconf_id:
7355	kfree(nn->conf_id_hashtbl);
7356err:
7357	return -ENOMEM;
7358}
7359
7360static void
7361nfs4_state_destroy_net(struct net *net)
7362{
7363	int i;
7364	struct nfs4_client *clp = NULL;
7365	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
7366
7367	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
7368		while (!list_empty(&nn->conf_id_hashtbl[i])) {
7369			clp = list_entry(nn->conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
7370			destroy_client(clp);
7371		}
7372	}
7373
7374	WARN_ON(!list_empty(&nn->blocked_locks_lru));
7375
7376	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
7377		while (!list_empty(&nn->unconf_id_hashtbl[i])) {
7378			clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
7379			destroy_client(clp);
7380		}
7381	}
7382
7383	kfree(nn->sessionid_hashtbl);
7384	kfree(nn->unconf_id_hashtbl);
7385	kfree(nn->conf_id_hashtbl);
7386	put_net(net);
7387}
7388
7389int
7390nfs4_state_start_net(struct net *net)
7391{
7392	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
7393	int ret;
7394
7395	ret = nfs4_state_create_net(net);
7396	if (ret)
7397		return ret;
7398	locks_start_grace(net, &nn->nfsd4_manager);
7399	nfsd4_client_tracking_init(net);
7400	if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
7401		goto skip_grace;
7402	printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n",
7403	       nn->nfsd4_grace, net->ns.inum);
7404	trace_nfsd_grace_start(nn);
7405	queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
7406	return 0;
7407
7408skip_grace:
7409	printk(KERN_INFO "NFSD: no clients to reclaim, skipping NFSv4 grace period (net %x)\n",
7410			net->ns.inum);
7411	queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_lease * HZ);
7412	nfsd4_end_grace(nn);
7413	return 0;
7414}
7415
7416/* initialization to perform when the nfsd service is started: */
7417
7418int
7419nfs4_state_start(void)
7420{
7421	int ret;
7422
7423	laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
7424	if (laundry_wq == NULL) {
7425		ret = -ENOMEM;
7426		goto out;
7427	}
7428	ret = nfsd4_create_callback_queue();
7429	if (ret)
7430		goto out_free_laundry;
7431
7432	set_max_delegations();
7433	return 0;
7434
7435out_free_laundry:
7436	destroy_workqueue(laundry_wq);
7437out:
7438	return ret;
7439}
7440
7441void
7442nfs4_state_shutdown_net(struct net *net)
7443{
7444	struct nfs4_delegation *dp = NULL;
7445	struct list_head *pos, *next, reaplist;
7446	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
7447
7448	cancel_delayed_work_sync(&nn->laundromat_work);
7449	locks_end_grace(&nn->nfsd4_manager);
7450
7451	INIT_LIST_HEAD(&reaplist);
7452	spin_lock(&state_lock);
7453	list_for_each_safe(pos, next, &nn->del_recall_lru) {
7454		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
7455		WARN_ON(!unhash_delegation_locked(dp));
7456		list_add(&dp->dl_recall_lru, &reaplist);
7457	}
7458	spin_unlock(&state_lock);
7459	list_for_each_safe(pos, next, &reaplist) {
7460		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
7461		list_del_init(&dp->dl_recall_lru);
7462		destroy_unhashed_deleg(dp);
7463	}
7464
7465	nfsd4_client_tracking_exit(net);
7466	nfs4_state_destroy_net(net);
7467}
7468
7469void
7470nfs4_state_shutdown(void)
7471{
7472	destroy_workqueue(laundry_wq);
7473	nfsd4_destroy_callback_queue();
7474}
7475
7476static void
7477get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
7478{
7479	if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG) &&
7480	    CURRENT_STATEID(stateid))
7481		memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t));
7482}
7483
7484static void
7485put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
7486{
7487	if (cstate->minorversion) {
7488		memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t));
7489		SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
7490	}
7491}
7492
7493void
7494clear_current_stateid(struct nfsd4_compound_state *cstate)
7495{
7496	CLEAR_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
7497}
7498
7499/*
7500 * functions to set current state id
7501 */
7502void
7503nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate,
7504		union nfsd4_op_u *u)
7505{
7506	put_stateid(cstate, &u->open_downgrade.od_stateid);
7507}
7508
7509void
7510nfsd4_set_openstateid(struct nfsd4_compound_state *cstate,
7511		union nfsd4_op_u *u)
7512{
7513	put_stateid(cstate, &u->open.op_stateid);
7514}
7515
7516void
7517nfsd4_set_closestateid(struct nfsd4_compound_state *cstate,
7518		union nfsd4_op_u *u)
7519{
7520	put_stateid(cstate, &u->close.cl_stateid);
7521}
7522
7523void
7524nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate,
7525		union nfsd4_op_u *u)
7526{
7527	put_stateid(cstate, &u->lock.lk_resp_stateid);
7528}
7529
7530/*
7531 * functions to consume current state id
7532 */
7533
7534void
7535nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate,
7536		union nfsd4_op_u *u)
7537{
7538	get_stateid(cstate, &u->open_downgrade.od_stateid);
7539}
7540
7541void
7542nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate,
7543		union nfsd4_op_u *u)
7544{
7545	get_stateid(cstate, &u->delegreturn.dr_stateid);
7546}
7547
7548void
7549nfsd4_get_freestateid(struct nfsd4_compound_state *cstate,
7550		union nfsd4_op_u *u)
7551{
7552	get_stateid(cstate, &u->free_stateid.fr_stateid);
7553}
7554
7555void
7556nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate,
7557		union nfsd4_op_u *u)
7558{
7559	get_stateid(cstate, &u->setattr.sa_stateid);
7560}
7561
7562void
7563nfsd4_get_closestateid(struct nfsd4_compound_state *cstate,
7564		union nfsd4_op_u *u)
7565{
7566	get_stateid(cstate, &u->close.cl_stateid);
7567}
7568
7569void
7570nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate,
7571		union nfsd4_op_u *u)
7572{
7573	get_stateid(cstate, &u->locku.lu_stateid);
7574}
7575
7576void
7577nfsd4_get_readstateid(struct nfsd4_compound_state *cstate,
7578		union nfsd4_op_u *u)
7579{
7580	get_stateid(cstate, &u->read.rd_stateid);
7581}
7582
7583void
7584nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
7585		union nfsd4_op_u *u)
7586{
7587	get_stateid(cstate, &u->write.wr_stateid);
7588}
7589