162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci *  pNFS functions to call and manage layout drivers.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci *  Copyright (c) 2002 [year of first publication]
562306a36Sopenharmony_ci *  The Regents of the University of Michigan
662306a36Sopenharmony_ci *  All Rights Reserved
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci *  Dean Hildebrand <dhildebz@umich.edu>
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci *  Permission is granted to use, copy, create derivative works, and
1162306a36Sopenharmony_ci *  redistribute this software and such derivative works for any purpose,
1262306a36Sopenharmony_ci *  so long as the name of the University of Michigan is not used in
1362306a36Sopenharmony_ci *  any advertising or publicity pertaining to the use or distribution
1462306a36Sopenharmony_ci *  of this software without specific, written prior authorization. If
1562306a36Sopenharmony_ci *  the above copyright notice or any other identification of the
1662306a36Sopenharmony_ci *  University of Michigan is included in any copy of any portion of
1762306a36Sopenharmony_ci *  this software, then the disclaimer below must also be included.
1862306a36Sopenharmony_ci *
1962306a36Sopenharmony_ci *  This software is provided as is, without representation or warranty
2062306a36Sopenharmony_ci *  of any kind either express or implied, including without limitation
2162306a36Sopenharmony_ci *  the implied warranties of merchantability, fitness for a particular
2262306a36Sopenharmony_ci *  purpose, or noninfringement.  The Regents of the University of
2362306a36Sopenharmony_ci *  Michigan shall not be liable for any damages, including special,
2462306a36Sopenharmony_ci *  indirect, incidental, or consequential damages, with respect to any
2562306a36Sopenharmony_ci *  claim arising out of or in connection with the use of the software,
2662306a36Sopenharmony_ci *  even if it has been or is hereafter advised of the possibility of
2762306a36Sopenharmony_ci *  such damages.
2862306a36Sopenharmony_ci */
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#include <linux/nfs_fs.h>
3162306a36Sopenharmony_ci#include <linux/nfs_page.h>
3262306a36Sopenharmony_ci#include <linux/module.h>
3362306a36Sopenharmony_ci#include <linux/sort.h>
3462306a36Sopenharmony_ci#include "internal.h"
3562306a36Sopenharmony_ci#include "pnfs.h"
3662306a36Sopenharmony_ci#include "iostat.h"
3762306a36Sopenharmony_ci#include "nfs4trace.h"
3862306a36Sopenharmony_ci#include "delegation.h"
3962306a36Sopenharmony_ci#include "nfs42.h"
4062306a36Sopenharmony_ci#include "nfs4_fs.h"
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci#define NFSDBG_FACILITY		NFSDBG_PNFS
4362306a36Sopenharmony_ci#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci/* Locking:
4662306a36Sopenharmony_ci *
4762306a36Sopenharmony_ci * pnfs_spinlock:
4862306a36Sopenharmony_ci *      protects pnfs_modules_tbl.
4962306a36Sopenharmony_ci */
5062306a36Sopenharmony_cistatic DEFINE_SPINLOCK(pnfs_spinlock);
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci/*
5362306a36Sopenharmony_ci * pnfs_modules_tbl holds all pnfs modules
5462306a36Sopenharmony_ci */
5562306a36Sopenharmony_cistatic LIST_HEAD(pnfs_modules_tbl);
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_cistatic void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
5862306a36Sopenharmony_cistatic void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
5962306a36Sopenharmony_ci		struct list_head *free_me,
6062306a36Sopenharmony_ci		const struct pnfs_layout_range *range,
6162306a36Sopenharmony_ci		u32 seq);
6262306a36Sopenharmony_cistatic bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
6362306a36Sopenharmony_ci		                struct list_head *tmp_list);
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci/* Return the registered pnfs layout driver module matching given id */
6662306a36Sopenharmony_cistatic struct pnfs_layoutdriver_type *
6762306a36Sopenharmony_cifind_pnfs_driver_locked(u32 id)
6862306a36Sopenharmony_ci{
6962306a36Sopenharmony_ci	struct pnfs_layoutdriver_type *local;
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
7262306a36Sopenharmony_ci		if (local->id == id)
7362306a36Sopenharmony_ci			goto out;
7462306a36Sopenharmony_ci	local = NULL;
7562306a36Sopenharmony_ciout:
7662306a36Sopenharmony_ci	dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
7762306a36Sopenharmony_ci	return local;
7862306a36Sopenharmony_ci}
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_cistatic struct pnfs_layoutdriver_type *
8162306a36Sopenharmony_cifind_pnfs_driver(u32 id)
8262306a36Sopenharmony_ci{
8362306a36Sopenharmony_ci	struct pnfs_layoutdriver_type *local;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	spin_lock(&pnfs_spinlock);
8662306a36Sopenharmony_ci	local = find_pnfs_driver_locked(id);
8762306a36Sopenharmony_ci	if (local != NULL && !try_module_get(local->owner)) {
8862306a36Sopenharmony_ci		dprintk("%s: Could not grab reference on module\n", __func__);
8962306a36Sopenharmony_ci		local = NULL;
9062306a36Sopenharmony_ci	}
9162306a36Sopenharmony_ci	spin_unlock(&pnfs_spinlock);
9262306a36Sopenharmony_ci	return local;
9362306a36Sopenharmony_ci}
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ciconst struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id)
9662306a36Sopenharmony_ci{
9762306a36Sopenharmony_ci	return find_pnfs_driver(id);
9862306a36Sopenharmony_ci}
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_civoid pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld)
10162306a36Sopenharmony_ci{
10262306a36Sopenharmony_ci	if (ld)
10362306a36Sopenharmony_ci		module_put(ld->owner);
10462306a36Sopenharmony_ci}
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_civoid
10762306a36Sopenharmony_ciunset_pnfs_layoutdriver(struct nfs_server *nfss)
10862306a36Sopenharmony_ci{
10962306a36Sopenharmony_ci	if (nfss->pnfs_curr_ld) {
11062306a36Sopenharmony_ci		if (nfss->pnfs_curr_ld->clear_layoutdriver)
11162306a36Sopenharmony_ci			nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
11262306a36Sopenharmony_ci		/* Decrement the MDS count. Purge the deviceid cache if zero */
11362306a36Sopenharmony_ci		if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count))
11462306a36Sopenharmony_ci			nfs4_deviceid_purge_client(nfss->nfs_client);
11562306a36Sopenharmony_ci		module_put(nfss->pnfs_curr_ld->owner);
11662306a36Sopenharmony_ci	}
11762306a36Sopenharmony_ci	nfss->pnfs_curr_ld = NULL;
11862306a36Sopenharmony_ci}
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci/*
12162306a36Sopenharmony_ci * When the server sends a list of layout types, we choose one in the order
12262306a36Sopenharmony_ci * given in the list below.
12362306a36Sopenharmony_ci *
12462306a36Sopenharmony_ci * FIXME: should this list be configurable in some fashion? module param?
12562306a36Sopenharmony_ci * 	  mount option? something else?
12662306a36Sopenharmony_ci */
12762306a36Sopenharmony_cistatic const u32 ld_prefs[] = {
12862306a36Sopenharmony_ci	LAYOUT_SCSI,
12962306a36Sopenharmony_ci	LAYOUT_BLOCK_VOLUME,
13062306a36Sopenharmony_ci	LAYOUT_OSD2_OBJECTS,
13162306a36Sopenharmony_ci	LAYOUT_FLEX_FILES,
13262306a36Sopenharmony_ci	LAYOUT_NFSV4_1_FILES,
13362306a36Sopenharmony_ci	0
13462306a36Sopenharmony_ci};
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_cistatic int
13762306a36Sopenharmony_cild_cmp(const void *e1, const void *e2)
13862306a36Sopenharmony_ci{
13962306a36Sopenharmony_ci	u32 ld1 = *((u32 *)e1);
14062306a36Sopenharmony_ci	u32 ld2 = *((u32 *)e2);
14162306a36Sopenharmony_ci	int i;
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	for (i = 0; ld_prefs[i] != 0; i++) {
14462306a36Sopenharmony_ci		if (ld1 == ld_prefs[i])
14562306a36Sopenharmony_ci			return -1;
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci		if (ld2 == ld_prefs[i])
14862306a36Sopenharmony_ci			return 1;
14962306a36Sopenharmony_ci	}
15062306a36Sopenharmony_ci	return 0;
15162306a36Sopenharmony_ci}
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci/*
15462306a36Sopenharmony_ci * Try to set the server's pnfs module to the pnfs layout type specified by id.
15562306a36Sopenharmony_ci * Currently only one pNFS layout driver per filesystem is supported.
15662306a36Sopenharmony_ci *
15762306a36Sopenharmony_ci * @ids array of layout types supported by MDS.
15862306a36Sopenharmony_ci */
15962306a36Sopenharmony_civoid
16062306a36Sopenharmony_ciset_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
16162306a36Sopenharmony_ci		      struct nfs_fsinfo *fsinfo)
16262306a36Sopenharmony_ci{
16362306a36Sopenharmony_ci	struct pnfs_layoutdriver_type *ld_type = NULL;
16462306a36Sopenharmony_ci	u32 id;
16562306a36Sopenharmony_ci	int i;
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	if (fsinfo->nlayouttypes == 0)
16862306a36Sopenharmony_ci		goto out_no_driver;
16962306a36Sopenharmony_ci	if (!(server->nfs_client->cl_exchange_flags &
17062306a36Sopenharmony_ci		 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
17162306a36Sopenharmony_ci		printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n",
17262306a36Sopenharmony_ci			__func__, server->nfs_client->cl_exchange_flags);
17362306a36Sopenharmony_ci		goto out_no_driver;
17462306a36Sopenharmony_ci	}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	sort(fsinfo->layouttype, fsinfo->nlayouttypes,
17762306a36Sopenharmony_ci		sizeof(*fsinfo->layouttype), ld_cmp, NULL);
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	for (i = 0; i < fsinfo->nlayouttypes; i++) {
18062306a36Sopenharmony_ci		id = fsinfo->layouttype[i];
18162306a36Sopenharmony_ci		ld_type = find_pnfs_driver(id);
18262306a36Sopenharmony_ci		if (!ld_type) {
18362306a36Sopenharmony_ci			request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX,
18462306a36Sopenharmony_ci					id);
18562306a36Sopenharmony_ci			ld_type = find_pnfs_driver(id);
18662306a36Sopenharmony_ci		}
18762306a36Sopenharmony_ci		if (ld_type)
18862306a36Sopenharmony_ci			break;
18962306a36Sopenharmony_ci	}
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	if (!ld_type) {
19262306a36Sopenharmony_ci		dprintk("%s: No pNFS module found!\n", __func__);
19362306a36Sopenharmony_ci		goto out_no_driver;
19462306a36Sopenharmony_ci	}
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	server->pnfs_curr_ld = ld_type;
19762306a36Sopenharmony_ci	if (ld_type->set_layoutdriver
19862306a36Sopenharmony_ci	    && ld_type->set_layoutdriver(server, mntfh)) {
19962306a36Sopenharmony_ci		printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
20062306a36Sopenharmony_ci			"driver %u.\n", __func__, id);
20162306a36Sopenharmony_ci		module_put(ld_type->owner);
20262306a36Sopenharmony_ci		goto out_no_driver;
20362306a36Sopenharmony_ci	}
20462306a36Sopenharmony_ci	/* Bump the MDS count */
20562306a36Sopenharmony_ci	atomic_inc(&server->nfs_client->cl_mds_count);
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	dprintk("%s: pNFS module for %u set\n", __func__, id);
20862306a36Sopenharmony_ci	return;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ciout_no_driver:
21162306a36Sopenharmony_ci	dprintk("%s: Using NFSv4 I/O\n", __func__);
21262306a36Sopenharmony_ci	server->pnfs_curr_ld = NULL;
21362306a36Sopenharmony_ci}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ciint
21662306a36Sopenharmony_cipnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
21762306a36Sopenharmony_ci{
21862306a36Sopenharmony_ci	int status = -EINVAL;
21962306a36Sopenharmony_ci	struct pnfs_layoutdriver_type *tmp;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	if (ld_type->id == 0) {
22262306a36Sopenharmony_ci		printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__);
22362306a36Sopenharmony_ci		return status;
22462306a36Sopenharmony_ci	}
22562306a36Sopenharmony_ci	if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
22662306a36Sopenharmony_ci		printk(KERN_ERR "NFS: %s Layout driver must provide "
22762306a36Sopenharmony_ci		       "alloc_lseg and free_lseg.\n", __func__);
22862306a36Sopenharmony_ci		return status;
22962306a36Sopenharmony_ci	}
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	spin_lock(&pnfs_spinlock);
23262306a36Sopenharmony_ci	tmp = find_pnfs_driver_locked(ld_type->id);
23362306a36Sopenharmony_ci	if (!tmp) {
23462306a36Sopenharmony_ci		list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
23562306a36Sopenharmony_ci		status = 0;
23662306a36Sopenharmony_ci		dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
23762306a36Sopenharmony_ci			ld_type->name);
23862306a36Sopenharmony_ci	} else {
23962306a36Sopenharmony_ci		printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n",
24062306a36Sopenharmony_ci			__func__, ld_type->id);
24162306a36Sopenharmony_ci	}
24262306a36Sopenharmony_ci	spin_unlock(&pnfs_spinlock);
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	return status;
24562306a36Sopenharmony_ci}
24662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_civoid
24962306a36Sopenharmony_cipnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
25062306a36Sopenharmony_ci{
25162306a36Sopenharmony_ci	dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
25262306a36Sopenharmony_ci	spin_lock(&pnfs_spinlock);
25362306a36Sopenharmony_ci	list_del(&ld_type->pnfs_tblid);
25462306a36Sopenharmony_ci	spin_unlock(&pnfs_spinlock);
25562306a36Sopenharmony_ci}
25662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci/*
25962306a36Sopenharmony_ci * pNFS client layout cache
26062306a36Sopenharmony_ci */
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci/* Need to hold i_lock if caller does not already hold reference */
26362306a36Sopenharmony_civoid
26462306a36Sopenharmony_cipnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)
26562306a36Sopenharmony_ci{
26662306a36Sopenharmony_ci	refcount_inc(&lo->plh_refcount);
26762306a36Sopenharmony_ci}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_cistatic struct pnfs_layout_hdr *
27062306a36Sopenharmony_cipnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
27162306a36Sopenharmony_ci{
27262306a36Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
27362306a36Sopenharmony_ci	return ld->alloc_layout_hdr(ino, gfp_flags);
27462306a36Sopenharmony_ci}
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_cistatic void
27762306a36Sopenharmony_cipnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
27862306a36Sopenharmony_ci{
27962306a36Sopenharmony_ci	struct nfs_server *server = NFS_SERVER(lo->plh_inode);
28062306a36Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci	if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) {
28362306a36Sopenharmony_ci		struct nfs_client *clp = server->nfs_client;
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci		spin_lock(&clp->cl_lock);
28662306a36Sopenharmony_ci		list_del_rcu(&lo->plh_layouts);
28762306a36Sopenharmony_ci		spin_unlock(&clp->cl_lock);
28862306a36Sopenharmony_ci	}
28962306a36Sopenharmony_ci	put_cred(lo->plh_lc_cred);
29062306a36Sopenharmony_ci	return ld->free_layout_hdr(lo);
29162306a36Sopenharmony_ci}
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_cistatic void
29462306a36Sopenharmony_cipnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
29562306a36Sopenharmony_ci{
29662306a36Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(lo->plh_inode);
29762306a36Sopenharmony_ci	dprintk("%s: freeing layout cache %p\n", __func__, lo);
29862306a36Sopenharmony_ci	nfsi->layout = NULL;
29962306a36Sopenharmony_ci	/* Reset MDS Threshold I/O counters */
30062306a36Sopenharmony_ci	nfsi->write_io = 0;
30162306a36Sopenharmony_ci	nfsi->read_io = 0;
30262306a36Sopenharmony_ci}
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_civoid
30562306a36Sopenharmony_cipnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
30662306a36Sopenharmony_ci{
30762306a36Sopenharmony_ci	struct inode *inode;
30862306a36Sopenharmony_ci	unsigned long i_state;
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	if (!lo)
31162306a36Sopenharmony_ci		return;
31262306a36Sopenharmony_ci	inode = lo->plh_inode;
31362306a36Sopenharmony_ci	pnfs_layoutreturn_before_put_layout_hdr(lo);
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
31662306a36Sopenharmony_ci		if (!list_empty(&lo->plh_segs))
31762306a36Sopenharmony_ci			WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
31862306a36Sopenharmony_ci		pnfs_detach_layout_hdr(lo);
31962306a36Sopenharmony_ci		i_state = inode->i_state;
32062306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
32162306a36Sopenharmony_ci		pnfs_free_layout_hdr(lo);
32262306a36Sopenharmony_ci		/* Notify pnfs_destroy_layout_final() that we're done */
32362306a36Sopenharmony_ci		if (i_state & (I_FREEING | I_CLEAR))
32462306a36Sopenharmony_ci			wake_up_var(lo);
32562306a36Sopenharmony_ci	}
32662306a36Sopenharmony_ci}
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_cistatic struct inode *
32962306a36Sopenharmony_cipnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo)
33062306a36Sopenharmony_ci{
33162306a36Sopenharmony_ci	struct inode *inode = igrab(lo->plh_inode);
33262306a36Sopenharmony_ci	if (inode)
33362306a36Sopenharmony_ci		return inode;
33462306a36Sopenharmony_ci	set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
33562306a36Sopenharmony_ci	return NULL;
33662306a36Sopenharmony_ci}
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci/*
33962306a36Sopenharmony_ci * Compare 2 layout stateid sequence ids, to see which is newer,
34062306a36Sopenharmony_ci * taking into account wraparound issues.
34162306a36Sopenharmony_ci */
34262306a36Sopenharmony_cistatic bool pnfs_seqid_is_newer(u32 s1, u32 s2)
34362306a36Sopenharmony_ci{
34462306a36Sopenharmony_ci	return (s32)(s1 - s2) > 0;
34562306a36Sopenharmony_ci}
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_cistatic void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq)
34862306a36Sopenharmony_ci{
34962306a36Sopenharmony_ci	if (pnfs_seqid_is_newer(newseq, lo->plh_barrier) || !lo->plh_barrier)
35062306a36Sopenharmony_ci		lo->plh_barrier = newseq;
35162306a36Sopenharmony_ci}
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_cistatic void
35462306a36Sopenharmony_cipnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
35562306a36Sopenharmony_ci			 u32 seq)
35662306a36Sopenharmony_ci{
35762306a36Sopenharmony_ci	if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode)
35862306a36Sopenharmony_ci		iomode = IOMODE_ANY;
35962306a36Sopenharmony_ci	lo->plh_return_iomode = iomode;
36062306a36Sopenharmony_ci	set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
36162306a36Sopenharmony_ci	/*
36262306a36Sopenharmony_ci	 * We must set lo->plh_return_seq to avoid livelocks with
36362306a36Sopenharmony_ci	 * pnfs_layout_need_return()
36462306a36Sopenharmony_ci	 */
36562306a36Sopenharmony_ci	if (seq == 0)
36662306a36Sopenharmony_ci		seq = be32_to_cpu(lo->plh_stateid.seqid);
36762306a36Sopenharmony_ci	if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq))
36862306a36Sopenharmony_ci		lo->plh_return_seq = seq;
36962306a36Sopenharmony_ci	pnfs_barrier_update(lo, seq);
37062306a36Sopenharmony_ci}
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_cistatic void
37362306a36Sopenharmony_cipnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
37462306a36Sopenharmony_ci{
37562306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg;
37662306a36Sopenharmony_ci	lo->plh_return_iomode = 0;
37762306a36Sopenharmony_ci	lo->plh_return_seq = 0;
37862306a36Sopenharmony_ci	clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
37962306a36Sopenharmony_ci	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
38062306a36Sopenharmony_ci		if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
38162306a36Sopenharmony_ci			continue;
38262306a36Sopenharmony_ci		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
38362306a36Sopenharmony_ci	}
38462306a36Sopenharmony_ci}
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_cistatic void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
38762306a36Sopenharmony_ci{
38862306a36Sopenharmony_ci	clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
38962306a36Sopenharmony_ci	clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags);
39062306a36Sopenharmony_ci	smp_mb__after_atomic();
39162306a36Sopenharmony_ci	wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
39262306a36Sopenharmony_ci	rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
39362306a36Sopenharmony_ci}
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_cistatic void
39662306a36Sopenharmony_cipnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
39762306a36Sopenharmony_ci		struct list_head *free_me)
39862306a36Sopenharmony_ci{
39962306a36Sopenharmony_ci	clear_bit(NFS_LSEG_ROC, &lseg->pls_flags);
40062306a36Sopenharmony_ci	clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
40162306a36Sopenharmony_ci	if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags))
40262306a36Sopenharmony_ci		pnfs_lseg_dec_and_remove_zero(lseg, free_me);
40362306a36Sopenharmony_ci	if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
40462306a36Sopenharmony_ci		pnfs_lseg_dec_and_remove_zero(lseg, free_me);
40562306a36Sopenharmony_ci}
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci/*
40862306a36Sopenharmony_ci * Update the seqid of a layout stateid after receiving
40962306a36Sopenharmony_ci * NFS4ERR_OLD_STATEID
41062306a36Sopenharmony_ci */
41162306a36Sopenharmony_cibool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
41262306a36Sopenharmony_ci		struct pnfs_layout_range *dst_range,
41362306a36Sopenharmony_ci		struct inode *inode)
41462306a36Sopenharmony_ci{
41562306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
41662306a36Sopenharmony_ci	struct pnfs_layout_range range = {
41762306a36Sopenharmony_ci		.iomode = IOMODE_ANY,
41862306a36Sopenharmony_ci		.offset = 0,
41962306a36Sopenharmony_ci		.length = NFS4_MAX_UINT64,
42062306a36Sopenharmony_ci	};
42162306a36Sopenharmony_ci	bool ret = false;
42262306a36Sopenharmony_ci	LIST_HEAD(head);
42362306a36Sopenharmony_ci	int err;
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
42662306a36Sopenharmony_ci	lo = NFS_I(inode)->layout;
42762306a36Sopenharmony_ci	if (lo &&  pnfs_layout_is_valid(lo) &&
42862306a36Sopenharmony_ci	    nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
42962306a36Sopenharmony_ci		/* Is our call using the most recent seqid? If so, bump it */
43062306a36Sopenharmony_ci		if (!nfs4_stateid_is_newer(&lo->plh_stateid, dst)) {
43162306a36Sopenharmony_ci			nfs4_stateid_seqid_inc(dst);
43262306a36Sopenharmony_ci			ret = true;
43362306a36Sopenharmony_ci			goto out;
43462306a36Sopenharmony_ci		}
43562306a36Sopenharmony_ci		/* Try to update the seqid to the most recent */
43662306a36Sopenharmony_ci		err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
43762306a36Sopenharmony_ci		if (err != -EBUSY) {
43862306a36Sopenharmony_ci			dst->seqid = lo->plh_stateid.seqid;
43962306a36Sopenharmony_ci			*dst_range = range;
44062306a36Sopenharmony_ci			ret = true;
44162306a36Sopenharmony_ci		}
44262306a36Sopenharmony_ci	}
44362306a36Sopenharmony_ciout:
44462306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
44562306a36Sopenharmony_ci	pnfs_free_lseg_list(&head);
44662306a36Sopenharmony_ci	return ret;
44762306a36Sopenharmony_ci}
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci/*
45062306a36Sopenharmony_ci * Mark a pnfs_layout_hdr and all associated layout segments as invalid
45162306a36Sopenharmony_ci *
45262306a36Sopenharmony_ci * In order to continue using the pnfs_layout_hdr, a full recovery
45362306a36Sopenharmony_ci * is required.
45462306a36Sopenharmony_ci * Note that caller must hold inode->i_lock.
45562306a36Sopenharmony_ci */
45662306a36Sopenharmony_ciint
45762306a36Sopenharmony_cipnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
45862306a36Sopenharmony_ci		struct list_head *lseg_list)
45962306a36Sopenharmony_ci{
46062306a36Sopenharmony_ci	struct pnfs_layout_range range = {
46162306a36Sopenharmony_ci		.iomode = IOMODE_ANY,
46262306a36Sopenharmony_ci		.offset = 0,
46362306a36Sopenharmony_ci		.length = NFS4_MAX_UINT64,
46462306a36Sopenharmony_ci	};
46562306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg, *next;
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
46862306a36Sopenharmony_ci	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
46962306a36Sopenharmony_ci		pnfs_clear_lseg_state(lseg, lseg_list);
47062306a36Sopenharmony_ci	pnfs_clear_layoutreturn_info(lo);
47162306a36Sopenharmony_ci	pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
47262306a36Sopenharmony_ci	set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
47362306a36Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
47462306a36Sopenharmony_ci	    !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
47562306a36Sopenharmony_ci		pnfs_clear_layoutreturn_waitbit(lo);
47662306a36Sopenharmony_ci	return !list_empty(&lo->plh_segs);
47762306a36Sopenharmony_ci}
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_cistatic int
48062306a36Sopenharmony_cipnfs_iomode_to_fail_bit(u32 iomode)
48162306a36Sopenharmony_ci{
48262306a36Sopenharmony_ci	return iomode == IOMODE_RW ?
48362306a36Sopenharmony_ci		NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
48462306a36Sopenharmony_ci}
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_cistatic void
48762306a36Sopenharmony_cipnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
48862306a36Sopenharmony_ci{
48962306a36Sopenharmony_ci	lo->plh_retry_timestamp = jiffies;
49062306a36Sopenharmony_ci	if (!test_and_set_bit(fail_bit, &lo->plh_flags))
49162306a36Sopenharmony_ci		refcount_inc(&lo->plh_refcount);
49262306a36Sopenharmony_ci}
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_cistatic void
49562306a36Sopenharmony_cipnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
49662306a36Sopenharmony_ci{
49762306a36Sopenharmony_ci	if (test_and_clear_bit(fail_bit, &lo->plh_flags))
49862306a36Sopenharmony_ci		refcount_dec(&lo->plh_refcount);
49962306a36Sopenharmony_ci}
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_cistatic void
50262306a36Sopenharmony_cipnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
50362306a36Sopenharmony_ci{
50462306a36Sopenharmony_ci	struct inode *inode = lo->plh_inode;
50562306a36Sopenharmony_ci	struct pnfs_layout_range range = {
50662306a36Sopenharmony_ci		.iomode = iomode,
50762306a36Sopenharmony_ci		.offset = 0,
50862306a36Sopenharmony_ci		.length = NFS4_MAX_UINT64,
50962306a36Sopenharmony_ci	};
51062306a36Sopenharmony_ci	LIST_HEAD(head);
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
51362306a36Sopenharmony_ci	pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
51462306a36Sopenharmony_ci	pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
51562306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
51662306a36Sopenharmony_ci	pnfs_free_lseg_list(&head);
51762306a36Sopenharmony_ci	dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
51862306a36Sopenharmony_ci			iomode == IOMODE_RW ?  "RW" : "READ");
51962306a36Sopenharmony_ci}
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_cistatic bool
52262306a36Sopenharmony_cipnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode)
52362306a36Sopenharmony_ci{
52462306a36Sopenharmony_ci	unsigned long start, end;
52562306a36Sopenharmony_ci	int fail_bit = pnfs_iomode_to_fail_bit(iomode);
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	if (test_bit(fail_bit, &lo->plh_flags) == 0)
52862306a36Sopenharmony_ci		return false;
52962306a36Sopenharmony_ci	end = jiffies;
53062306a36Sopenharmony_ci	start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT;
53162306a36Sopenharmony_ci	if (!time_in_range(lo->plh_retry_timestamp, start, end)) {
53262306a36Sopenharmony_ci		/* It is time to retry the failed layoutgets */
53362306a36Sopenharmony_ci		pnfs_layout_clear_fail_bit(lo, fail_bit);
53462306a36Sopenharmony_ci		return false;
53562306a36Sopenharmony_ci	}
53662306a36Sopenharmony_ci	return true;
53762306a36Sopenharmony_ci}
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_cistatic void
54062306a36Sopenharmony_cipnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg,
54162306a36Sopenharmony_ci		const struct pnfs_layout_range *range,
54262306a36Sopenharmony_ci		const nfs4_stateid *stateid)
54362306a36Sopenharmony_ci{
54462306a36Sopenharmony_ci	INIT_LIST_HEAD(&lseg->pls_list);
54562306a36Sopenharmony_ci	INIT_LIST_HEAD(&lseg->pls_lc_list);
54662306a36Sopenharmony_ci	INIT_LIST_HEAD(&lseg->pls_commits);
54762306a36Sopenharmony_ci	refcount_set(&lseg->pls_refcount, 1);
54862306a36Sopenharmony_ci	set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
54962306a36Sopenharmony_ci	lseg->pls_layout = lo;
55062306a36Sopenharmony_ci	lseg->pls_range = *range;
55162306a36Sopenharmony_ci	lseg->pls_seq = be32_to_cpu(stateid->seqid);
55262306a36Sopenharmony_ci}
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_cistatic void pnfs_free_lseg(struct pnfs_layout_segment *lseg)
55562306a36Sopenharmony_ci{
55662306a36Sopenharmony_ci	if (lseg != NULL) {
55762306a36Sopenharmony_ci		struct inode *inode = lseg->pls_layout->plh_inode;
55862306a36Sopenharmony_ci		NFS_SERVER(inode)->pnfs_curr_ld->free_lseg(lseg);
55962306a36Sopenharmony_ci	}
56062306a36Sopenharmony_ci}
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_cistatic void
56362306a36Sopenharmony_cipnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
56462306a36Sopenharmony_ci		struct pnfs_layout_segment *lseg)
56562306a36Sopenharmony_ci{
56662306a36Sopenharmony_ci	WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
56762306a36Sopenharmony_ci	list_del_init(&lseg->pls_list);
56862306a36Sopenharmony_ci	/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
56962306a36Sopenharmony_ci	refcount_dec(&lo->plh_refcount);
57062306a36Sopenharmony_ci	if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
57162306a36Sopenharmony_ci		return;
57262306a36Sopenharmony_ci	if (list_empty(&lo->plh_segs) &&
57362306a36Sopenharmony_ci	    !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) &&
57462306a36Sopenharmony_ci	    !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
57562306a36Sopenharmony_ci		if (atomic_read(&lo->plh_outstanding) == 0)
57662306a36Sopenharmony_ci			set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
57762306a36Sopenharmony_ci		clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
57862306a36Sopenharmony_ci	}
57962306a36Sopenharmony_ci}
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_cistatic bool
58262306a36Sopenharmony_cipnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr *lo,
58362306a36Sopenharmony_ci		struct pnfs_layout_segment *lseg)
58462306a36Sopenharmony_ci{
58562306a36Sopenharmony_ci	if (test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) &&
58662306a36Sopenharmony_ci	    pnfs_layout_is_valid(lo)) {
58762306a36Sopenharmony_ci		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
58862306a36Sopenharmony_ci		list_move_tail(&lseg->pls_list, &lo->plh_return_segs);
58962306a36Sopenharmony_ci		return true;
59062306a36Sopenharmony_ci	}
59162306a36Sopenharmony_ci	return false;
59262306a36Sopenharmony_ci}
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_civoid
59562306a36Sopenharmony_cipnfs_put_lseg(struct pnfs_layout_segment *lseg)
59662306a36Sopenharmony_ci{
59762306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
59862306a36Sopenharmony_ci	struct inode *inode;
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	if (!lseg)
60162306a36Sopenharmony_ci		return;
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
60462306a36Sopenharmony_ci		refcount_read(&lseg->pls_refcount),
60562306a36Sopenharmony_ci		test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci	lo = lseg->pls_layout;
60862306a36Sopenharmony_ci	inode = lo->plh_inode;
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
61162306a36Sopenharmony_ci		pnfs_get_layout_hdr(lo);
61262306a36Sopenharmony_ci		pnfs_layout_remove_lseg(lo, lseg);
61362306a36Sopenharmony_ci		if (pnfs_cache_lseg_for_layoutreturn(lo, lseg))
61462306a36Sopenharmony_ci			lseg = NULL;
61562306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
61662306a36Sopenharmony_ci		pnfs_free_lseg(lseg);
61762306a36Sopenharmony_ci		pnfs_put_layout_hdr(lo);
61862306a36Sopenharmony_ci	}
61962306a36Sopenharmony_ci}
62062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_put_lseg);
62162306a36Sopenharmony_ci
62262306a36Sopenharmony_ci/*
62362306a36Sopenharmony_ci * is l2 fully contained in l1?
62462306a36Sopenharmony_ci *   start1                             end1
62562306a36Sopenharmony_ci *   [----------------------------------)
62662306a36Sopenharmony_ci *           start2           end2
62762306a36Sopenharmony_ci *           [----------------)
62862306a36Sopenharmony_ci */
62962306a36Sopenharmony_cistatic bool
63062306a36Sopenharmony_cipnfs_lseg_range_contained(const struct pnfs_layout_range *l1,
63162306a36Sopenharmony_ci		 const struct pnfs_layout_range *l2)
63262306a36Sopenharmony_ci{
63362306a36Sopenharmony_ci	u64 start1 = l1->offset;
63462306a36Sopenharmony_ci	u64 end1 = pnfs_end_offset(start1, l1->length);
63562306a36Sopenharmony_ci	u64 start2 = l2->offset;
63662306a36Sopenharmony_ci	u64 end2 = pnfs_end_offset(start2, l2->length);
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_ci	return (start1 <= start2) && (end1 >= end2);
63962306a36Sopenharmony_ci}
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_cistatic bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
64262306a36Sopenharmony_ci		struct list_head *tmp_list)
64362306a36Sopenharmony_ci{
64462306a36Sopenharmony_ci	if (!refcount_dec_and_test(&lseg->pls_refcount))
64562306a36Sopenharmony_ci		return false;
64662306a36Sopenharmony_ci	pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
64762306a36Sopenharmony_ci	list_add(&lseg->pls_list, tmp_list);
64862306a36Sopenharmony_ci	return true;
64962306a36Sopenharmony_ci}
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci/* Returns 1 if lseg is removed from list, 0 otherwise */
65262306a36Sopenharmony_cistatic int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
65362306a36Sopenharmony_ci			     struct list_head *tmp_list)
65462306a36Sopenharmony_ci{
65562306a36Sopenharmony_ci	int rv = 0;
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci	if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
65862306a36Sopenharmony_ci		/* Remove the reference keeping the lseg in the
65962306a36Sopenharmony_ci		 * list.  It will now be removed when all
66062306a36Sopenharmony_ci		 * outstanding io is finished.
66162306a36Sopenharmony_ci		 */
66262306a36Sopenharmony_ci		dprintk("%s: lseg %p ref %d\n", __func__, lseg,
66362306a36Sopenharmony_ci			refcount_read(&lseg->pls_refcount));
66462306a36Sopenharmony_ci		if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
66562306a36Sopenharmony_ci			rv = 1;
66662306a36Sopenharmony_ci	}
66762306a36Sopenharmony_ci	return rv;
66862306a36Sopenharmony_ci}
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_cistatic bool
67162306a36Sopenharmony_cipnfs_should_free_range(const struct pnfs_layout_range *lseg_range,
67262306a36Sopenharmony_ci		 const struct pnfs_layout_range *recall_range)
67362306a36Sopenharmony_ci{
67462306a36Sopenharmony_ci	return (recall_range->iomode == IOMODE_ANY ||
67562306a36Sopenharmony_ci		lseg_range->iomode == recall_range->iomode) &&
67662306a36Sopenharmony_ci	       pnfs_lseg_range_intersecting(lseg_range, recall_range);
67762306a36Sopenharmony_ci}
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_cistatic bool
68062306a36Sopenharmony_cipnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg,
68162306a36Sopenharmony_ci		const struct pnfs_layout_range *recall_range,
68262306a36Sopenharmony_ci		u32 seq)
68362306a36Sopenharmony_ci{
68462306a36Sopenharmony_ci	if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq))
68562306a36Sopenharmony_ci		return false;
68662306a36Sopenharmony_ci	if (recall_range == NULL)
68762306a36Sopenharmony_ci		return true;
68862306a36Sopenharmony_ci	return pnfs_should_free_range(&lseg->pls_range, recall_range);
68962306a36Sopenharmony_ci}
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci/**
69262306a36Sopenharmony_ci * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later
69362306a36Sopenharmony_ci * @lo: layout header containing the lsegs
69462306a36Sopenharmony_ci * @tmp_list: list head where doomed lsegs should go
69562306a36Sopenharmony_ci * @recall_range: optional recall range argument to match (may be NULL)
69662306a36Sopenharmony_ci * @seq: only invalidate lsegs obtained prior to this sequence (may be 0)
69762306a36Sopenharmony_ci *
69862306a36Sopenharmony_ci * Walk the list of lsegs in the layout header, and tear down any that should
69962306a36Sopenharmony_ci * be destroyed. If "recall_range" is specified then the segment must match
70062306a36Sopenharmony_ci * that range. If "seq" is non-zero, then only match segments that were handed
70162306a36Sopenharmony_ci * out at or before that sequence.
70262306a36Sopenharmony_ci *
70362306a36Sopenharmony_ci * Returns number of matching invalid lsegs remaining in list after scanning
70462306a36Sopenharmony_ci * it and purging them.
70562306a36Sopenharmony_ci */
70662306a36Sopenharmony_ciint
70762306a36Sopenharmony_cipnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
70862306a36Sopenharmony_ci			    struct list_head *tmp_list,
70962306a36Sopenharmony_ci			    const struct pnfs_layout_range *recall_range,
71062306a36Sopenharmony_ci			    u32 seq)
71162306a36Sopenharmony_ci{
71262306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg, *next;
71362306a36Sopenharmony_ci	struct nfs_server *server = NFS_SERVER(lo->plh_inode);
71462306a36Sopenharmony_ci	int remaining = 0;
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	dprintk("%s:Begin lo %p\n", __func__, lo);
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	if (list_empty(&lo->plh_segs))
71962306a36Sopenharmony_ci		return 0;
72062306a36Sopenharmony_ci	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
72162306a36Sopenharmony_ci		if (pnfs_match_lseg_recall(lseg, recall_range, seq)) {
72262306a36Sopenharmony_ci			dprintk("%s: freeing lseg %p iomode %d seq %u "
72362306a36Sopenharmony_ci				"offset %llu length %llu\n", __func__,
72462306a36Sopenharmony_ci				lseg, lseg->pls_range.iomode, lseg->pls_seq,
72562306a36Sopenharmony_ci				lseg->pls_range.offset, lseg->pls_range.length);
72662306a36Sopenharmony_ci			if (mark_lseg_invalid(lseg, tmp_list))
72762306a36Sopenharmony_ci				continue;
72862306a36Sopenharmony_ci			remaining++;
72962306a36Sopenharmony_ci			pnfs_lseg_cancel_io(server, lseg);
73062306a36Sopenharmony_ci		}
73162306a36Sopenharmony_ci	dprintk("%s:Return %i\n", __func__, remaining);
73262306a36Sopenharmony_ci	return remaining;
73362306a36Sopenharmony_ci}
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_cistatic void
73662306a36Sopenharmony_cipnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
73762306a36Sopenharmony_ci		struct list_head *free_me,
73862306a36Sopenharmony_ci		const struct pnfs_layout_range *range,
73962306a36Sopenharmony_ci		u32 seq)
74062306a36Sopenharmony_ci{
74162306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg, *next;
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci	list_for_each_entry_safe(lseg, next, &lo->plh_return_segs, pls_list) {
74462306a36Sopenharmony_ci		if (pnfs_match_lseg_recall(lseg, range, seq))
74562306a36Sopenharmony_ci			list_move_tail(&lseg->pls_list, free_me);
74662306a36Sopenharmony_ci	}
74762306a36Sopenharmony_ci}
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_ci/* note free_me must contain lsegs from a single layout_hdr */
75062306a36Sopenharmony_civoid
75162306a36Sopenharmony_cipnfs_free_lseg_list(struct list_head *free_me)
75262306a36Sopenharmony_ci{
75362306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg, *tmp;
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	if (list_empty(free_me))
75662306a36Sopenharmony_ci		return;
75762306a36Sopenharmony_ci
75862306a36Sopenharmony_ci	list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
75962306a36Sopenharmony_ci		list_del(&lseg->pls_list);
76062306a36Sopenharmony_ci		pnfs_free_lseg(lseg);
76162306a36Sopenharmony_ci	}
76262306a36Sopenharmony_ci}
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_cistatic struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi)
76562306a36Sopenharmony_ci{
76662306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
76762306a36Sopenharmony_ci	LIST_HEAD(tmp_list);
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ci	spin_lock(&nfsi->vfs_inode.i_lock);
77062306a36Sopenharmony_ci	lo = nfsi->layout;
77162306a36Sopenharmony_ci	if (lo) {
77262306a36Sopenharmony_ci		pnfs_get_layout_hdr(lo);
77362306a36Sopenharmony_ci		pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
77462306a36Sopenharmony_ci		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
77562306a36Sopenharmony_ci		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
77662306a36Sopenharmony_ci		spin_unlock(&nfsi->vfs_inode.i_lock);
77762306a36Sopenharmony_ci		pnfs_free_lseg_list(&tmp_list);
77862306a36Sopenharmony_ci		nfs_commit_inode(&nfsi->vfs_inode, 0);
77962306a36Sopenharmony_ci		pnfs_put_layout_hdr(lo);
78062306a36Sopenharmony_ci	} else
78162306a36Sopenharmony_ci		spin_unlock(&nfsi->vfs_inode.i_lock);
78262306a36Sopenharmony_ci	return lo;
78362306a36Sopenharmony_ci}
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_civoid pnfs_destroy_layout(struct nfs_inode *nfsi)
78662306a36Sopenharmony_ci{
78762306a36Sopenharmony_ci	__pnfs_destroy_layout(nfsi);
78862306a36Sopenharmony_ci}
78962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_destroy_layout);
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_cistatic bool pnfs_layout_removed(struct nfs_inode *nfsi,
79262306a36Sopenharmony_ci				struct pnfs_layout_hdr *lo)
79362306a36Sopenharmony_ci{
79462306a36Sopenharmony_ci	bool ret;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	spin_lock(&nfsi->vfs_inode.i_lock);
79762306a36Sopenharmony_ci	ret = nfsi->layout != lo;
79862306a36Sopenharmony_ci	spin_unlock(&nfsi->vfs_inode.i_lock);
79962306a36Sopenharmony_ci	return ret;
80062306a36Sopenharmony_ci}
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_civoid pnfs_destroy_layout_final(struct nfs_inode *nfsi)
80362306a36Sopenharmony_ci{
80462306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi);
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_ci	if (lo)
80762306a36Sopenharmony_ci		wait_var_event(lo, pnfs_layout_removed(nfsi, lo));
80862306a36Sopenharmony_ci}
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_cistatic bool
81162306a36Sopenharmony_cipnfs_layout_add_bulk_destroy_list(struct inode *inode,
81262306a36Sopenharmony_ci		struct list_head *layout_list)
81362306a36Sopenharmony_ci{
81462306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
81562306a36Sopenharmony_ci	bool ret = false;
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
81862306a36Sopenharmony_ci	lo = NFS_I(inode)->layout;
81962306a36Sopenharmony_ci	if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
82062306a36Sopenharmony_ci		pnfs_get_layout_hdr(lo);
82162306a36Sopenharmony_ci		list_add(&lo->plh_bulk_destroy, layout_list);
82262306a36Sopenharmony_ci		ret = true;
82362306a36Sopenharmony_ci	}
82462306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
82562306a36Sopenharmony_ci	return ret;
82662306a36Sopenharmony_ci}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci/* Caller must hold rcu_read_lock and clp->cl_lock */
82962306a36Sopenharmony_cistatic int
83062306a36Sopenharmony_cipnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
83162306a36Sopenharmony_ci		struct nfs_server *server,
83262306a36Sopenharmony_ci		struct list_head *layout_list)
83362306a36Sopenharmony_ci	__must_hold(&clp->cl_lock)
83462306a36Sopenharmony_ci	__must_hold(RCU)
83562306a36Sopenharmony_ci{
83662306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo, *next;
83762306a36Sopenharmony_ci	struct inode *inode;
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_ci	list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
84062306a36Sopenharmony_ci		if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
84162306a36Sopenharmony_ci		    test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) ||
84262306a36Sopenharmony_ci		    !list_empty(&lo->plh_bulk_destroy))
84362306a36Sopenharmony_ci			continue;
84462306a36Sopenharmony_ci		/* If the sb is being destroyed, just bail */
84562306a36Sopenharmony_ci		if (!nfs_sb_active(server->super))
84662306a36Sopenharmony_ci			break;
84762306a36Sopenharmony_ci		inode = pnfs_grab_inode_layout_hdr(lo);
84862306a36Sopenharmony_ci		if (inode != NULL) {
84962306a36Sopenharmony_ci			if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags))
85062306a36Sopenharmony_ci				list_del_rcu(&lo->plh_layouts);
85162306a36Sopenharmony_ci			if (pnfs_layout_add_bulk_destroy_list(inode,
85262306a36Sopenharmony_ci						layout_list))
85362306a36Sopenharmony_ci				continue;
85462306a36Sopenharmony_ci			rcu_read_unlock();
85562306a36Sopenharmony_ci			spin_unlock(&clp->cl_lock);
85662306a36Sopenharmony_ci			iput(inode);
85762306a36Sopenharmony_ci		} else {
85862306a36Sopenharmony_ci			rcu_read_unlock();
85962306a36Sopenharmony_ci			spin_unlock(&clp->cl_lock);
86062306a36Sopenharmony_ci		}
86162306a36Sopenharmony_ci		nfs_sb_deactive(server->super);
86262306a36Sopenharmony_ci		spin_lock(&clp->cl_lock);
86362306a36Sopenharmony_ci		rcu_read_lock();
86462306a36Sopenharmony_ci		return -EAGAIN;
86562306a36Sopenharmony_ci	}
86662306a36Sopenharmony_ci	return 0;
86762306a36Sopenharmony_ci}
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_cistatic int
87062306a36Sopenharmony_cipnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
87162306a36Sopenharmony_ci		bool is_bulk_recall)
87262306a36Sopenharmony_ci{
87362306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
87462306a36Sopenharmony_ci	struct inode *inode;
87562306a36Sopenharmony_ci	LIST_HEAD(lseg_list);
87662306a36Sopenharmony_ci	int ret = 0;
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci	while (!list_empty(layout_list)) {
87962306a36Sopenharmony_ci		lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
88062306a36Sopenharmony_ci				plh_bulk_destroy);
88162306a36Sopenharmony_ci		dprintk("%s freeing layout for inode %lu\n", __func__,
88262306a36Sopenharmony_ci			lo->plh_inode->i_ino);
88362306a36Sopenharmony_ci		inode = lo->plh_inode;
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci		pnfs_layoutcommit_inode(inode, false);
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
88862306a36Sopenharmony_ci		list_del_init(&lo->plh_bulk_destroy);
88962306a36Sopenharmony_ci		if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
89062306a36Sopenharmony_ci			if (is_bulk_recall)
89162306a36Sopenharmony_ci				set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
89262306a36Sopenharmony_ci			ret = -EAGAIN;
89362306a36Sopenharmony_ci		}
89462306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
89562306a36Sopenharmony_ci		pnfs_free_lseg_list(&lseg_list);
89662306a36Sopenharmony_ci		/* Free all lsegs that are attached to commit buckets */
89762306a36Sopenharmony_ci		nfs_commit_inode(inode, 0);
89862306a36Sopenharmony_ci		pnfs_put_layout_hdr(lo);
89962306a36Sopenharmony_ci		nfs_iput_and_deactive(inode);
90062306a36Sopenharmony_ci	}
90162306a36Sopenharmony_ci	return ret;
90262306a36Sopenharmony_ci}
90362306a36Sopenharmony_ci
90462306a36Sopenharmony_ciint
90562306a36Sopenharmony_cipnfs_destroy_layouts_byfsid(struct nfs_client *clp,
90662306a36Sopenharmony_ci		struct nfs_fsid *fsid,
90762306a36Sopenharmony_ci		bool is_recall)
90862306a36Sopenharmony_ci{
90962306a36Sopenharmony_ci	struct nfs_server *server;
91062306a36Sopenharmony_ci	LIST_HEAD(layout_list);
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci	spin_lock(&clp->cl_lock);
91362306a36Sopenharmony_ci	rcu_read_lock();
91462306a36Sopenharmony_cirestart:
91562306a36Sopenharmony_ci	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
91662306a36Sopenharmony_ci		if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
91762306a36Sopenharmony_ci			continue;
91862306a36Sopenharmony_ci		if (pnfs_layout_bulk_destroy_byserver_locked(clp,
91962306a36Sopenharmony_ci				server,
92062306a36Sopenharmony_ci				&layout_list) != 0)
92162306a36Sopenharmony_ci			goto restart;
92262306a36Sopenharmony_ci	}
92362306a36Sopenharmony_ci	rcu_read_unlock();
92462306a36Sopenharmony_ci	spin_unlock(&clp->cl_lock);
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci	if (list_empty(&layout_list))
92762306a36Sopenharmony_ci		return 0;
92862306a36Sopenharmony_ci	return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
92962306a36Sopenharmony_ci}
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ciint
93262306a36Sopenharmony_cipnfs_destroy_layouts_byclid(struct nfs_client *clp,
93362306a36Sopenharmony_ci		bool is_recall)
93462306a36Sopenharmony_ci{
93562306a36Sopenharmony_ci	struct nfs_server *server;
93662306a36Sopenharmony_ci	LIST_HEAD(layout_list);
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_ci	spin_lock(&clp->cl_lock);
93962306a36Sopenharmony_ci	rcu_read_lock();
94062306a36Sopenharmony_cirestart:
94162306a36Sopenharmony_ci	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
94262306a36Sopenharmony_ci		if (pnfs_layout_bulk_destroy_byserver_locked(clp,
94362306a36Sopenharmony_ci					server,
94462306a36Sopenharmony_ci					&layout_list) != 0)
94562306a36Sopenharmony_ci			goto restart;
94662306a36Sopenharmony_ci	}
94762306a36Sopenharmony_ci	rcu_read_unlock();
94862306a36Sopenharmony_ci	spin_unlock(&clp->cl_lock);
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci	if (list_empty(&layout_list))
95162306a36Sopenharmony_ci		return 0;
95262306a36Sopenharmony_ci	return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
95362306a36Sopenharmony_ci}
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ci/*
95662306a36Sopenharmony_ci * Called by the state manager to remove all layouts established under an
95762306a36Sopenharmony_ci * expired lease.
95862306a36Sopenharmony_ci */
95962306a36Sopenharmony_civoid
96062306a36Sopenharmony_cipnfs_destroy_all_layouts(struct nfs_client *clp)
96162306a36Sopenharmony_ci{
96262306a36Sopenharmony_ci	nfs4_deviceid_mark_client_invalid(clp);
96362306a36Sopenharmony_ci	nfs4_deviceid_purge_client(clp);
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci	pnfs_destroy_layouts_byclid(clp, false);
96662306a36Sopenharmony_ci}
96762306a36Sopenharmony_ci
96862306a36Sopenharmony_cistatic void
96962306a36Sopenharmony_cipnfs_set_layout_cred(struct pnfs_layout_hdr *lo, const struct cred *cred)
97062306a36Sopenharmony_ci{
97162306a36Sopenharmony_ci	const struct cred *old;
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_ci	if (cred && cred_fscmp(lo->plh_lc_cred, cred) != 0) {
97462306a36Sopenharmony_ci		old = xchg(&lo->plh_lc_cred, get_cred(cred));
97562306a36Sopenharmony_ci		put_cred(old);
97662306a36Sopenharmony_ci	}
97762306a36Sopenharmony_ci}
97862306a36Sopenharmony_ci
97962306a36Sopenharmony_ci/* update lo->plh_stateid with new if is more recent */
98062306a36Sopenharmony_civoid
98162306a36Sopenharmony_cipnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
98262306a36Sopenharmony_ci			const struct cred *cred, bool update_barrier)
98362306a36Sopenharmony_ci{
98462306a36Sopenharmony_ci	u32 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
98562306a36Sopenharmony_ci	u32 newseq = be32_to_cpu(new->seqid);
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_ci	if (!pnfs_layout_is_valid(lo)) {
98862306a36Sopenharmony_ci		pnfs_set_layout_cred(lo, cred);
98962306a36Sopenharmony_ci		nfs4_stateid_copy(&lo->plh_stateid, new);
99062306a36Sopenharmony_ci		lo->plh_barrier = newseq;
99162306a36Sopenharmony_ci		pnfs_clear_layoutreturn_info(lo);
99262306a36Sopenharmony_ci		clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
99362306a36Sopenharmony_ci		return;
99462306a36Sopenharmony_ci	}
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci	if (pnfs_seqid_is_newer(newseq, oldseq))
99762306a36Sopenharmony_ci		nfs4_stateid_copy(&lo->plh_stateid, new);
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	if (update_barrier) {
100062306a36Sopenharmony_ci		pnfs_barrier_update(lo, newseq);
100162306a36Sopenharmony_ci		return;
100262306a36Sopenharmony_ci	}
100362306a36Sopenharmony_ci	/*
100462306a36Sopenharmony_ci	 * Because of wraparound, we want to keep the barrier
100562306a36Sopenharmony_ci	 * "close" to the current seqids. We really only want to
100662306a36Sopenharmony_ci	 * get here from a layoutget call.
100762306a36Sopenharmony_ci	 */
100862306a36Sopenharmony_ci	if (atomic_read(&lo->plh_outstanding) == 1)
100962306a36Sopenharmony_ci		 pnfs_barrier_update(lo, be32_to_cpu(lo->plh_stateid.seqid));
101062306a36Sopenharmony_ci}
101162306a36Sopenharmony_ci
101262306a36Sopenharmony_cistatic bool
101362306a36Sopenharmony_cipnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo,
101462306a36Sopenharmony_ci		const nfs4_stateid *stateid)
101562306a36Sopenharmony_ci{
101662306a36Sopenharmony_ci	u32 seqid = be32_to_cpu(stateid->seqid);
101762306a36Sopenharmony_ci
101862306a36Sopenharmony_ci	return lo->plh_barrier && pnfs_seqid_is_newer(lo->plh_barrier, seqid);
101962306a36Sopenharmony_ci}
102062306a36Sopenharmony_ci
102162306a36Sopenharmony_ci/* lget is set to 1 if called from inside send_layoutget call chain */
102262306a36Sopenharmony_cistatic bool
102362306a36Sopenharmony_cipnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo)
102462306a36Sopenharmony_ci{
102562306a36Sopenharmony_ci	return lo->plh_block_lgets ||
102662306a36Sopenharmony_ci		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
102762306a36Sopenharmony_ci}
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_cistatic struct nfs_server *
103062306a36Sopenharmony_cipnfs_find_server(struct inode *inode, struct nfs_open_context *ctx)
103162306a36Sopenharmony_ci{
103262306a36Sopenharmony_ci	struct nfs_server *server;
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ci	if (inode) {
103562306a36Sopenharmony_ci		server = NFS_SERVER(inode);
103662306a36Sopenharmony_ci	} else {
103762306a36Sopenharmony_ci		struct dentry *parent_dir = dget_parent(ctx->dentry);
103862306a36Sopenharmony_ci		server = NFS_SERVER(parent_dir->d_inode);
103962306a36Sopenharmony_ci		dput(parent_dir);
104062306a36Sopenharmony_ci	}
104162306a36Sopenharmony_ci	return server;
104262306a36Sopenharmony_ci}
104362306a36Sopenharmony_ci
104462306a36Sopenharmony_cistatic void nfs4_free_pages(struct page **pages, size_t size)
104562306a36Sopenharmony_ci{
104662306a36Sopenharmony_ci	int i;
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci	if (!pages)
104962306a36Sopenharmony_ci		return;
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci	for (i = 0; i < size; i++) {
105262306a36Sopenharmony_ci		if (!pages[i])
105362306a36Sopenharmony_ci			break;
105462306a36Sopenharmony_ci		__free_page(pages[i]);
105562306a36Sopenharmony_ci	}
105662306a36Sopenharmony_ci	kfree(pages);
105762306a36Sopenharmony_ci}
105862306a36Sopenharmony_ci
105962306a36Sopenharmony_cistatic struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
106062306a36Sopenharmony_ci{
106162306a36Sopenharmony_ci	struct page **pages;
106262306a36Sopenharmony_ci	int i;
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_ci	pages = kmalloc_array(size, sizeof(struct page *), gfp_flags);
106562306a36Sopenharmony_ci	if (!pages) {
106662306a36Sopenharmony_ci		dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
106762306a36Sopenharmony_ci		return NULL;
106862306a36Sopenharmony_ci	}
106962306a36Sopenharmony_ci
107062306a36Sopenharmony_ci	for (i = 0; i < size; i++) {
107162306a36Sopenharmony_ci		pages[i] = alloc_page(gfp_flags);
107262306a36Sopenharmony_ci		if (!pages[i]) {
107362306a36Sopenharmony_ci			dprintk("%s: failed to allocate page\n", __func__);
107462306a36Sopenharmony_ci			nfs4_free_pages(pages, i);
107562306a36Sopenharmony_ci			return NULL;
107662306a36Sopenharmony_ci		}
107762306a36Sopenharmony_ci	}
107862306a36Sopenharmony_ci
107962306a36Sopenharmony_ci	return pages;
108062306a36Sopenharmony_ci}
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_cistatic struct nfs4_layoutget *
108362306a36Sopenharmony_cipnfs_alloc_init_layoutget_args(struct inode *ino,
108462306a36Sopenharmony_ci	   struct nfs_open_context *ctx,
108562306a36Sopenharmony_ci	   const nfs4_stateid *stateid,
108662306a36Sopenharmony_ci	   const struct pnfs_layout_range *range,
108762306a36Sopenharmony_ci	   gfp_t gfp_flags)
108862306a36Sopenharmony_ci{
108962306a36Sopenharmony_ci	struct nfs_server *server = pnfs_find_server(ino, ctx);
109062306a36Sopenharmony_ci	size_t max_reply_sz = server->pnfs_curr_ld->max_layoutget_response;
109162306a36Sopenharmony_ci	size_t max_pages = max_response_pages(server);
109262306a36Sopenharmony_ci	struct nfs4_layoutget *lgp;
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci	dprintk("--> %s\n", __func__);
109562306a36Sopenharmony_ci
109662306a36Sopenharmony_ci	lgp = kzalloc(sizeof(*lgp), gfp_flags);
109762306a36Sopenharmony_ci	if (lgp == NULL)
109862306a36Sopenharmony_ci		return NULL;
109962306a36Sopenharmony_ci
110062306a36Sopenharmony_ci	if (max_reply_sz) {
110162306a36Sopenharmony_ci		size_t npages = (max_reply_sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
110262306a36Sopenharmony_ci		if (npages < max_pages)
110362306a36Sopenharmony_ci			max_pages = npages;
110462306a36Sopenharmony_ci	}
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci	lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
110762306a36Sopenharmony_ci	if (!lgp->args.layout.pages) {
110862306a36Sopenharmony_ci		kfree(lgp);
110962306a36Sopenharmony_ci		return NULL;
111062306a36Sopenharmony_ci	}
111162306a36Sopenharmony_ci	lgp->args.layout.pglen = max_pages * PAGE_SIZE;
111262306a36Sopenharmony_ci	lgp->res.layoutp = &lgp->args.layout;
111362306a36Sopenharmony_ci
111462306a36Sopenharmony_ci	/* Don't confuse uninitialised result and success */
111562306a36Sopenharmony_ci	lgp->res.status = -NFS4ERR_DELAY;
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci	lgp->args.minlength = PAGE_SIZE;
111862306a36Sopenharmony_ci	if (lgp->args.minlength > range->length)
111962306a36Sopenharmony_ci		lgp->args.minlength = range->length;
112062306a36Sopenharmony_ci	if (ino) {
112162306a36Sopenharmony_ci		loff_t i_size = i_size_read(ino);
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci		if (range->iomode == IOMODE_READ) {
112462306a36Sopenharmony_ci			if (range->offset >= i_size)
112562306a36Sopenharmony_ci				lgp->args.minlength = 0;
112662306a36Sopenharmony_ci			else if (i_size - range->offset < lgp->args.minlength)
112762306a36Sopenharmony_ci				lgp->args.minlength = i_size - range->offset;
112862306a36Sopenharmony_ci		}
112962306a36Sopenharmony_ci	}
113062306a36Sopenharmony_ci	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
113162306a36Sopenharmony_ci	pnfs_copy_range(&lgp->args.range, range);
113262306a36Sopenharmony_ci	lgp->args.type = server->pnfs_curr_ld->id;
113362306a36Sopenharmony_ci	lgp->args.inode = ino;
113462306a36Sopenharmony_ci	lgp->args.ctx = get_nfs_open_context(ctx);
113562306a36Sopenharmony_ci	nfs4_stateid_copy(&lgp->args.stateid, stateid);
113662306a36Sopenharmony_ci	lgp->gfp_flags = gfp_flags;
113762306a36Sopenharmony_ci	lgp->cred = ctx->cred;
113862306a36Sopenharmony_ci	return lgp;
113962306a36Sopenharmony_ci}
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_civoid pnfs_layoutget_free(struct nfs4_layoutget *lgp)
114262306a36Sopenharmony_ci{
114362306a36Sopenharmony_ci	size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE;
114462306a36Sopenharmony_ci
114562306a36Sopenharmony_ci	nfs4_free_pages(lgp->args.layout.pages, max_pages);
114662306a36Sopenharmony_ci	pnfs_put_layout_hdr(lgp->lo);
114762306a36Sopenharmony_ci	put_nfs_open_context(lgp->args.ctx);
114862306a36Sopenharmony_ci	kfree(lgp);
114962306a36Sopenharmony_ci}
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_cistatic void pnfs_clear_layoutcommit(struct inode *inode,
115262306a36Sopenharmony_ci		struct list_head *head)
115362306a36Sopenharmony_ci{
115462306a36Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(inode);
115562306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg, *tmp;
115662306a36Sopenharmony_ci
115762306a36Sopenharmony_ci	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
115862306a36Sopenharmony_ci		return;
115962306a36Sopenharmony_ci	list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
116062306a36Sopenharmony_ci		if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
116162306a36Sopenharmony_ci			continue;
116262306a36Sopenharmony_ci		pnfs_lseg_dec_and_remove_zero(lseg, head);
116362306a36Sopenharmony_ci	}
116462306a36Sopenharmony_ci}
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_civoid pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
116762306a36Sopenharmony_ci		const nfs4_stateid *arg_stateid,
116862306a36Sopenharmony_ci		const struct pnfs_layout_range *range,
116962306a36Sopenharmony_ci		const nfs4_stateid *stateid)
117062306a36Sopenharmony_ci{
117162306a36Sopenharmony_ci	struct inode *inode = lo->plh_inode;
117262306a36Sopenharmony_ci	LIST_HEAD(freeme);
117362306a36Sopenharmony_ci
117462306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
117562306a36Sopenharmony_ci	if (!pnfs_layout_is_valid(lo) ||
117662306a36Sopenharmony_ci	    !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
117762306a36Sopenharmony_ci		goto out_unlock;
117862306a36Sopenharmony_ci	if (stateid) {
117962306a36Sopenharmony_ci		u32 seq = be32_to_cpu(arg_stateid->seqid);
118062306a36Sopenharmony_ci
118162306a36Sopenharmony_ci		pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
118262306a36Sopenharmony_ci		pnfs_free_returned_lsegs(lo, &freeme, range, seq);
118362306a36Sopenharmony_ci		pnfs_set_layout_stateid(lo, stateid, NULL, true);
118462306a36Sopenharmony_ci	} else
118562306a36Sopenharmony_ci		pnfs_mark_layout_stateid_invalid(lo, &freeme);
118662306a36Sopenharmony_ciout_unlock:
118762306a36Sopenharmony_ci	pnfs_clear_layoutreturn_waitbit(lo);
118862306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
118962306a36Sopenharmony_ci	pnfs_free_lseg_list(&freeme);
119062306a36Sopenharmony_ci
119162306a36Sopenharmony_ci}
119262306a36Sopenharmony_ci
119362306a36Sopenharmony_cistatic bool
119462306a36Sopenharmony_cipnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
119562306a36Sopenharmony_ci		nfs4_stateid *stateid,
119662306a36Sopenharmony_ci		const struct cred **cred,
119762306a36Sopenharmony_ci		enum pnfs_iomode *iomode)
119862306a36Sopenharmony_ci{
119962306a36Sopenharmony_ci	/* Serialise LAYOUTGET/LAYOUTRETURN */
120062306a36Sopenharmony_ci	if (atomic_read(&lo->plh_outstanding) != 0)
120162306a36Sopenharmony_ci		return false;
120262306a36Sopenharmony_ci	if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
120362306a36Sopenharmony_ci		return false;
120462306a36Sopenharmony_ci	set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
120562306a36Sopenharmony_ci	pnfs_get_layout_hdr(lo);
120662306a36Sopenharmony_ci	nfs4_stateid_copy(stateid, &lo->plh_stateid);
120762306a36Sopenharmony_ci	*cred = get_cred(lo->plh_lc_cred);
120862306a36Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) {
120962306a36Sopenharmony_ci		if (lo->plh_return_seq != 0)
121062306a36Sopenharmony_ci			stateid->seqid = cpu_to_be32(lo->plh_return_seq);
121162306a36Sopenharmony_ci		if (iomode != NULL)
121262306a36Sopenharmony_ci			*iomode = lo->plh_return_iomode;
121362306a36Sopenharmony_ci		pnfs_clear_layoutreturn_info(lo);
121462306a36Sopenharmony_ci	} else if (iomode != NULL)
121562306a36Sopenharmony_ci		*iomode = IOMODE_ANY;
121662306a36Sopenharmony_ci	pnfs_barrier_update(lo, be32_to_cpu(stateid->seqid));
121762306a36Sopenharmony_ci	return true;
121862306a36Sopenharmony_ci}
121962306a36Sopenharmony_ci
122062306a36Sopenharmony_cistatic void
122162306a36Sopenharmony_cipnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args *args,
122262306a36Sopenharmony_ci		struct pnfs_layout_hdr *lo,
122362306a36Sopenharmony_ci		const nfs4_stateid *stateid,
122462306a36Sopenharmony_ci		enum pnfs_iomode iomode)
122562306a36Sopenharmony_ci{
122662306a36Sopenharmony_ci	struct inode *inode = lo->plh_inode;
122762306a36Sopenharmony_ci
122862306a36Sopenharmony_ci	args->layout_type = NFS_SERVER(inode)->pnfs_curr_ld->id;
122962306a36Sopenharmony_ci	args->inode = inode;
123062306a36Sopenharmony_ci	args->range.iomode = iomode;
123162306a36Sopenharmony_ci	args->range.offset = 0;
123262306a36Sopenharmony_ci	args->range.length = NFS4_MAX_UINT64;
123362306a36Sopenharmony_ci	args->layout = lo;
123462306a36Sopenharmony_ci	nfs4_stateid_copy(&args->stateid, stateid);
123562306a36Sopenharmony_ci}
123662306a36Sopenharmony_ci
123762306a36Sopenharmony_cistatic int
123862306a36Sopenharmony_cipnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
123962306a36Sopenharmony_ci		       const nfs4_stateid *stateid,
124062306a36Sopenharmony_ci		       const struct cred **pcred,
124162306a36Sopenharmony_ci		       enum pnfs_iomode iomode,
124262306a36Sopenharmony_ci		       bool sync)
124362306a36Sopenharmony_ci{
124462306a36Sopenharmony_ci	struct inode *ino = lo->plh_inode;
124562306a36Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
124662306a36Sopenharmony_ci	struct nfs4_layoutreturn *lrp;
124762306a36Sopenharmony_ci	const struct cred *cred = *pcred;
124862306a36Sopenharmony_ci	int status = 0;
124962306a36Sopenharmony_ci
125062306a36Sopenharmony_ci	*pcred = NULL;
125162306a36Sopenharmony_ci	lrp = kzalloc(sizeof(*lrp), nfs_io_gfp_mask());
125262306a36Sopenharmony_ci	if (unlikely(lrp == NULL)) {
125362306a36Sopenharmony_ci		status = -ENOMEM;
125462306a36Sopenharmony_ci		spin_lock(&ino->i_lock);
125562306a36Sopenharmony_ci		pnfs_clear_layoutreturn_waitbit(lo);
125662306a36Sopenharmony_ci		spin_unlock(&ino->i_lock);
125762306a36Sopenharmony_ci		put_cred(cred);
125862306a36Sopenharmony_ci		pnfs_put_layout_hdr(lo);
125962306a36Sopenharmony_ci		goto out;
126062306a36Sopenharmony_ci	}
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_ci	pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode);
126362306a36Sopenharmony_ci	lrp->args.ld_private = &lrp->ld_private;
126462306a36Sopenharmony_ci	lrp->clp = NFS_SERVER(ino)->nfs_client;
126562306a36Sopenharmony_ci	lrp->cred = cred;
126662306a36Sopenharmony_ci	if (ld->prepare_layoutreturn)
126762306a36Sopenharmony_ci		ld->prepare_layoutreturn(&lrp->args);
126862306a36Sopenharmony_ci
126962306a36Sopenharmony_ci	status = nfs4_proc_layoutreturn(lrp, sync);
127062306a36Sopenharmony_ciout:
127162306a36Sopenharmony_ci	dprintk("<-- %s status: %d\n", __func__, status);
127262306a36Sopenharmony_ci	return status;
127362306a36Sopenharmony_ci}
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_cistatic bool
127662306a36Sopenharmony_cipnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo,
127762306a36Sopenharmony_ci				enum pnfs_iomode iomode,
127862306a36Sopenharmony_ci				u32 seq)
127962306a36Sopenharmony_ci{
128062306a36Sopenharmony_ci	struct pnfs_layout_range recall_range = {
128162306a36Sopenharmony_ci		.length = NFS4_MAX_UINT64,
128262306a36Sopenharmony_ci		.iomode = iomode,
128362306a36Sopenharmony_ci	};
128462306a36Sopenharmony_ci	return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
128562306a36Sopenharmony_ci					       &recall_range, seq) != -EBUSY;
128662306a36Sopenharmony_ci}
128762306a36Sopenharmony_ci
128862306a36Sopenharmony_ci/* Return true if layoutreturn is needed */
128962306a36Sopenharmony_cistatic bool
129062306a36Sopenharmony_cipnfs_layout_need_return(struct pnfs_layout_hdr *lo)
129162306a36Sopenharmony_ci{
129262306a36Sopenharmony_ci	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
129362306a36Sopenharmony_ci		return false;
129462306a36Sopenharmony_ci	return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode,
129562306a36Sopenharmony_ci					       lo->plh_return_seq);
129662306a36Sopenharmony_ci}
129762306a36Sopenharmony_ci
129862306a36Sopenharmony_cistatic void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
129962306a36Sopenharmony_ci{
130062306a36Sopenharmony_ci	struct inode *inode= lo->plh_inode;
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
130362306a36Sopenharmony_ci		return;
130462306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
130562306a36Sopenharmony_ci	if (pnfs_layout_need_return(lo)) {
130662306a36Sopenharmony_ci		const struct cred *cred;
130762306a36Sopenharmony_ci		nfs4_stateid stateid;
130862306a36Sopenharmony_ci		enum pnfs_iomode iomode;
130962306a36Sopenharmony_ci		bool send;
131062306a36Sopenharmony_ci
131162306a36Sopenharmony_ci		send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
131262306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
131362306a36Sopenharmony_ci		if (send) {
131462306a36Sopenharmony_ci			/* Send an async layoutreturn so we dont deadlock */
131562306a36Sopenharmony_ci			pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
131662306a36Sopenharmony_ci		}
131762306a36Sopenharmony_ci	} else
131862306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
131962306a36Sopenharmony_ci}
132062306a36Sopenharmony_ci
132162306a36Sopenharmony_ci/*
132262306a36Sopenharmony_ci * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
132362306a36Sopenharmony_ci * when the layout segment list is empty.
132462306a36Sopenharmony_ci *
132562306a36Sopenharmony_ci * Note that a pnfs_layout_hdr can exist with an empty layout segment
132662306a36Sopenharmony_ci * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the
132762306a36Sopenharmony_ci * deviceid is marked invalid.
132862306a36Sopenharmony_ci */
132962306a36Sopenharmony_ciint
133062306a36Sopenharmony_ci_pnfs_return_layout(struct inode *ino)
133162306a36Sopenharmony_ci{
133262306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo = NULL;
133362306a36Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(ino);
133462306a36Sopenharmony_ci	struct pnfs_layout_range range = {
133562306a36Sopenharmony_ci		.iomode		= IOMODE_ANY,
133662306a36Sopenharmony_ci		.offset		= 0,
133762306a36Sopenharmony_ci		.length		= NFS4_MAX_UINT64,
133862306a36Sopenharmony_ci	};
133962306a36Sopenharmony_ci	LIST_HEAD(tmp_list);
134062306a36Sopenharmony_ci	const struct cred *cred;
134162306a36Sopenharmony_ci	nfs4_stateid stateid;
134262306a36Sopenharmony_ci	int status = 0;
134362306a36Sopenharmony_ci	bool send, valid_layout;
134462306a36Sopenharmony_ci
134562306a36Sopenharmony_ci	dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
134662306a36Sopenharmony_ci
134762306a36Sopenharmony_ci	spin_lock(&ino->i_lock);
134862306a36Sopenharmony_ci	lo = nfsi->layout;
134962306a36Sopenharmony_ci	if (!lo) {
135062306a36Sopenharmony_ci		spin_unlock(&ino->i_lock);
135162306a36Sopenharmony_ci		dprintk("NFS: %s no layout to return\n", __func__);
135262306a36Sopenharmony_ci		goto out;
135362306a36Sopenharmony_ci	}
135462306a36Sopenharmony_ci	/* Reference matched in nfs4_layoutreturn_release */
135562306a36Sopenharmony_ci	pnfs_get_layout_hdr(lo);
135662306a36Sopenharmony_ci	/* Is there an outstanding layoutreturn ? */
135762306a36Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
135862306a36Sopenharmony_ci		spin_unlock(&ino->i_lock);
135962306a36Sopenharmony_ci		if (wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
136062306a36Sopenharmony_ci					TASK_UNINTERRUPTIBLE))
136162306a36Sopenharmony_ci			goto out_put_layout_hdr;
136262306a36Sopenharmony_ci		spin_lock(&ino->i_lock);
136362306a36Sopenharmony_ci	}
136462306a36Sopenharmony_ci	valid_layout = pnfs_layout_is_valid(lo);
136562306a36Sopenharmony_ci	pnfs_clear_layoutcommit(ino, &tmp_list);
136662306a36Sopenharmony_ci	pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
136762306a36Sopenharmony_ci
136862306a36Sopenharmony_ci	if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
136962306a36Sopenharmony_ci		NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_ci	/* Don't send a LAYOUTRETURN if list was initially empty */
137262306a36Sopenharmony_ci	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) ||
137362306a36Sopenharmony_ci			!valid_layout) {
137462306a36Sopenharmony_ci		spin_unlock(&ino->i_lock);
137562306a36Sopenharmony_ci		dprintk("NFS: %s no layout segments to return\n", __func__);
137662306a36Sopenharmony_ci		goto out_wait_layoutreturn;
137762306a36Sopenharmony_ci	}
137862306a36Sopenharmony_ci
137962306a36Sopenharmony_ci	send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL);
138062306a36Sopenharmony_ci	spin_unlock(&ino->i_lock);
138162306a36Sopenharmony_ci	if (send)
138262306a36Sopenharmony_ci		status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true);
138362306a36Sopenharmony_ciout_wait_layoutreturn:
138462306a36Sopenharmony_ci	wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE);
138562306a36Sopenharmony_ciout_put_layout_hdr:
138662306a36Sopenharmony_ci	pnfs_free_lseg_list(&tmp_list);
138762306a36Sopenharmony_ci	pnfs_put_layout_hdr(lo);
138862306a36Sopenharmony_ciout:
138962306a36Sopenharmony_ci	dprintk("<-- %s status: %d\n", __func__, status);
139062306a36Sopenharmony_ci	return status;
139162306a36Sopenharmony_ci}
139262306a36Sopenharmony_ci
139362306a36Sopenharmony_ciint
139462306a36Sopenharmony_cipnfs_commit_and_return_layout(struct inode *inode)
139562306a36Sopenharmony_ci{
139662306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
139762306a36Sopenharmony_ci	int ret;
139862306a36Sopenharmony_ci
139962306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
140062306a36Sopenharmony_ci	lo = NFS_I(inode)->layout;
140162306a36Sopenharmony_ci	if (lo == NULL) {
140262306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
140362306a36Sopenharmony_ci		return 0;
140462306a36Sopenharmony_ci	}
140562306a36Sopenharmony_ci	pnfs_get_layout_hdr(lo);
140662306a36Sopenharmony_ci	/* Block new layoutgets and read/write to ds */
140762306a36Sopenharmony_ci	lo->plh_block_lgets++;
140862306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
140962306a36Sopenharmony_ci	filemap_fdatawait(inode->i_mapping);
141062306a36Sopenharmony_ci	ret = pnfs_layoutcommit_inode(inode, true);
141162306a36Sopenharmony_ci	if (ret == 0)
141262306a36Sopenharmony_ci		ret = _pnfs_return_layout(inode);
141362306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
141462306a36Sopenharmony_ci	lo->plh_block_lgets--;
141562306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
141662306a36Sopenharmony_ci	pnfs_put_layout_hdr(lo);
141762306a36Sopenharmony_ci	return ret;
141862306a36Sopenharmony_ci}
141962306a36Sopenharmony_ci
142062306a36Sopenharmony_cibool pnfs_roc(struct inode *ino,
142162306a36Sopenharmony_ci		struct nfs4_layoutreturn_args *args,
142262306a36Sopenharmony_ci		struct nfs4_layoutreturn_res *res,
142362306a36Sopenharmony_ci		const struct cred *cred)
142462306a36Sopenharmony_ci{
142562306a36Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(ino);
142662306a36Sopenharmony_ci	struct nfs_open_context *ctx;
142762306a36Sopenharmony_ci	struct nfs4_state *state;
142862306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
142962306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg, *next;
143062306a36Sopenharmony_ci	const struct cred *lc_cred;
143162306a36Sopenharmony_ci	nfs4_stateid stateid;
143262306a36Sopenharmony_ci	enum pnfs_iomode iomode = 0;
143362306a36Sopenharmony_ci	bool layoutreturn = false, roc = false;
143462306a36Sopenharmony_ci	bool skip_read = false;
143562306a36Sopenharmony_ci
143662306a36Sopenharmony_ci	if (!nfs_have_layout(ino))
143762306a36Sopenharmony_ci		return false;
143862306a36Sopenharmony_ciretry:
143962306a36Sopenharmony_ci	rcu_read_lock();
144062306a36Sopenharmony_ci	spin_lock(&ino->i_lock);
144162306a36Sopenharmony_ci	lo = nfsi->layout;
144262306a36Sopenharmony_ci	if (!lo || !pnfs_layout_is_valid(lo) ||
144362306a36Sopenharmony_ci	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
144462306a36Sopenharmony_ci		lo = NULL;
144562306a36Sopenharmony_ci		goto out_noroc;
144662306a36Sopenharmony_ci	}
144762306a36Sopenharmony_ci	pnfs_get_layout_hdr(lo);
144862306a36Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
144962306a36Sopenharmony_ci		spin_unlock(&ino->i_lock);
145062306a36Sopenharmony_ci		rcu_read_unlock();
145162306a36Sopenharmony_ci		wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
145262306a36Sopenharmony_ci				TASK_UNINTERRUPTIBLE);
145362306a36Sopenharmony_ci		pnfs_put_layout_hdr(lo);
145462306a36Sopenharmony_ci		goto retry;
145562306a36Sopenharmony_ci	}
145662306a36Sopenharmony_ci
145762306a36Sopenharmony_ci	/* no roc if we hold a delegation */
145862306a36Sopenharmony_ci	if (nfs4_check_delegation(ino, FMODE_READ)) {
145962306a36Sopenharmony_ci		if (nfs4_check_delegation(ino, FMODE_WRITE))
146062306a36Sopenharmony_ci			goto out_noroc;
146162306a36Sopenharmony_ci		skip_read = true;
146262306a36Sopenharmony_ci	}
146362306a36Sopenharmony_ci
146462306a36Sopenharmony_ci	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
146562306a36Sopenharmony_ci		state = ctx->state;
146662306a36Sopenharmony_ci		if (state == NULL)
146762306a36Sopenharmony_ci			continue;
146862306a36Sopenharmony_ci		/* Don't return layout if there is open file state */
146962306a36Sopenharmony_ci		if (state->state & FMODE_WRITE)
147062306a36Sopenharmony_ci			goto out_noroc;
147162306a36Sopenharmony_ci		if (state->state & FMODE_READ)
147262306a36Sopenharmony_ci			skip_read = true;
147362306a36Sopenharmony_ci	}
147462306a36Sopenharmony_ci
147562306a36Sopenharmony_ci
147662306a36Sopenharmony_ci	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) {
147762306a36Sopenharmony_ci		if (skip_read && lseg->pls_range.iomode == IOMODE_READ)
147862306a36Sopenharmony_ci			continue;
147962306a36Sopenharmony_ci		/* If we are sending layoutreturn, invalidate all valid lsegs */
148062306a36Sopenharmony_ci		if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags))
148162306a36Sopenharmony_ci			continue;
148262306a36Sopenharmony_ci		/*
148362306a36Sopenharmony_ci		 * Note: mark lseg for return so pnfs_layout_remove_lseg
148462306a36Sopenharmony_ci		 * doesn't invalidate the layout for us.
148562306a36Sopenharmony_ci		 */
148662306a36Sopenharmony_ci		set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
148762306a36Sopenharmony_ci		if (!mark_lseg_invalid(lseg, &lo->plh_return_segs))
148862306a36Sopenharmony_ci			continue;
148962306a36Sopenharmony_ci		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
149062306a36Sopenharmony_ci	}
149162306a36Sopenharmony_ci
149262306a36Sopenharmony_ci	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
149362306a36Sopenharmony_ci		goto out_noroc;
149462306a36Sopenharmony_ci
149562306a36Sopenharmony_ci	/* ROC in two conditions:
149662306a36Sopenharmony_ci	 * 1. there are ROC lsegs
149762306a36Sopenharmony_ci	 * 2. we don't send layoutreturn
149862306a36Sopenharmony_ci	 */
149962306a36Sopenharmony_ci	/* lo ref dropped in pnfs_roc_release() */
150062306a36Sopenharmony_ci	layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &lc_cred, &iomode);
150162306a36Sopenharmony_ci	/* If the creds don't match, we can't compound the layoutreturn */
150262306a36Sopenharmony_ci	if (!layoutreturn || cred_fscmp(cred, lc_cred) != 0)
150362306a36Sopenharmony_ci		goto out_noroc;
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_ci	roc = layoutreturn;
150662306a36Sopenharmony_ci	pnfs_init_layoutreturn_args(args, lo, &stateid, iomode);
150762306a36Sopenharmony_ci	res->lrs_present = 0;
150862306a36Sopenharmony_ci	layoutreturn = false;
150962306a36Sopenharmony_ci	put_cred(lc_cred);
151062306a36Sopenharmony_ci
151162306a36Sopenharmony_ciout_noroc:
151262306a36Sopenharmony_ci	spin_unlock(&ino->i_lock);
151362306a36Sopenharmony_ci	rcu_read_unlock();
151462306a36Sopenharmony_ci	pnfs_layoutcommit_inode(ino, true);
151562306a36Sopenharmony_ci	if (roc) {
151662306a36Sopenharmony_ci		struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
151762306a36Sopenharmony_ci		if (ld->prepare_layoutreturn)
151862306a36Sopenharmony_ci			ld->prepare_layoutreturn(args);
151962306a36Sopenharmony_ci		pnfs_put_layout_hdr(lo);
152062306a36Sopenharmony_ci		return true;
152162306a36Sopenharmony_ci	}
152262306a36Sopenharmony_ci	if (layoutreturn)
152362306a36Sopenharmony_ci		pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true);
152462306a36Sopenharmony_ci	pnfs_put_layout_hdr(lo);
152562306a36Sopenharmony_ci	return false;
152662306a36Sopenharmony_ci}
152762306a36Sopenharmony_ci
152862306a36Sopenharmony_ciint pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
152962306a36Sopenharmony_ci		  struct nfs4_layoutreturn_res **respp, int *ret)
153062306a36Sopenharmony_ci{
153162306a36Sopenharmony_ci	struct nfs4_layoutreturn_args *arg = *argpp;
153262306a36Sopenharmony_ci	int retval = -EAGAIN;
153362306a36Sopenharmony_ci
153462306a36Sopenharmony_ci	if (!arg)
153562306a36Sopenharmony_ci		return 0;
153662306a36Sopenharmony_ci	/* Handle Layoutreturn errors */
153762306a36Sopenharmony_ci	switch (*ret) {
153862306a36Sopenharmony_ci	case 0:
153962306a36Sopenharmony_ci		retval = 0;
154062306a36Sopenharmony_ci		break;
154162306a36Sopenharmony_ci	case -NFS4ERR_NOMATCHING_LAYOUT:
154262306a36Sopenharmony_ci		/* Was there an RPC level error? If not, retry */
154362306a36Sopenharmony_ci		if (task->tk_rpc_status == 0)
154462306a36Sopenharmony_ci			break;
154562306a36Sopenharmony_ci		/* If the call was not sent, let caller handle it */
154662306a36Sopenharmony_ci		if (!RPC_WAS_SENT(task))
154762306a36Sopenharmony_ci			return 0;
154862306a36Sopenharmony_ci		/*
154962306a36Sopenharmony_ci		 * Otherwise, assume the call succeeded and
155062306a36Sopenharmony_ci		 * that we need to release the layout
155162306a36Sopenharmony_ci		 */
155262306a36Sopenharmony_ci		*ret = 0;
155362306a36Sopenharmony_ci		(*respp)->lrs_present = 0;
155462306a36Sopenharmony_ci		retval = 0;
155562306a36Sopenharmony_ci		break;
155662306a36Sopenharmony_ci	case -NFS4ERR_DELAY:
155762306a36Sopenharmony_ci		/* Let the caller handle the retry */
155862306a36Sopenharmony_ci		*ret = -NFS4ERR_NOMATCHING_LAYOUT;
155962306a36Sopenharmony_ci		return 0;
156062306a36Sopenharmony_ci	case -NFS4ERR_OLD_STATEID:
156162306a36Sopenharmony_ci		if (!nfs4_layout_refresh_old_stateid(&arg->stateid,
156262306a36Sopenharmony_ci						     &arg->range, arg->inode))
156362306a36Sopenharmony_ci			break;
156462306a36Sopenharmony_ci		*ret = -NFS4ERR_NOMATCHING_LAYOUT;
156562306a36Sopenharmony_ci		return -EAGAIN;
156662306a36Sopenharmony_ci	}
156762306a36Sopenharmony_ci	*argpp = NULL;
156862306a36Sopenharmony_ci	*respp = NULL;
156962306a36Sopenharmony_ci	return retval;
157062306a36Sopenharmony_ci}
157162306a36Sopenharmony_ci
157262306a36Sopenharmony_civoid pnfs_roc_release(struct nfs4_layoutreturn_args *args,
157362306a36Sopenharmony_ci		struct nfs4_layoutreturn_res *res,
157462306a36Sopenharmony_ci		int ret)
157562306a36Sopenharmony_ci{
157662306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo = args->layout;
157762306a36Sopenharmony_ci	struct inode *inode = args->inode;
157862306a36Sopenharmony_ci	const nfs4_stateid *res_stateid = NULL;
157962306a36Sopenharmony_ci	struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
158062306a36Sopenharmony_ci
158162306a36Sopenharmony_ci	switch (ret) {
158262306a36Sopenharmony_ci	case -NFS4ERR_NOMATCHING_LAYOUT:
158362306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
158462306a36Sopenharmony_ci		if (pnfs_layout_is_valid(lo) &&
158562306a36Sopenharmony_ci		    nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid))
158662306a36Sopenharmony_ci			pnfs_set_plh_return_info(lo, args->range.iomode, 0);
158762306a36Sopenharmony_ci		pnfs_clear_layoutreturn_waitbit(lo);
158862306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
158962306a36Sopenharmony_ci		break;
159062306a36Sopenharmony_ci	case 0:
159162306a36Sopenharmony_ci		if (res->lrs_present)
159262306a36Sopenharmony_ci			res_stateid = &res->stateid;
159362306a36Sopenharmony_ci		fallthrough;
159462306a36Sopenharmony_ci	default:
159562306a36Sopenharmony_ci		pnfs_layoutreturn_free_lsegs(lo, &args->stateid, &args->range,
159662306a36Sopenharmony_ci					     res_stateid);
159762306a36Sopenharmony_ci	}
159862306a36Sopenharmony_ci	trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret);
159962306a36Sopenharmony_ci	if (ld_private && ld_private->ops && ld_private->ops->free)
160062306a36Sopenharmony_ci		ld_private->ops->free(ld_private);
160162306a36Sopenharmony_ci	pnfs_put_layout_hdr(lo);
160262306a36Sopenharmony_ci}
160362306a36Sopenharmony_ci
160462306a36Sopenharmony_cibool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
160562306a36Sopenharmony_ci{
160662306a36Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(ino);
160762306a36Sopenharmony_ci        struct pnfs_layout_hdr *lo;
160862306a36Sopenharmony_ci        bool sleep = false;
160962306a36Sopenharmony_ci
161062306a36Sopenharmony_ci	/* we might not have grabbed lo reference. so need to check under
161162306a36Sopenharmony_ci	 * i_lock */
161262306a36Sopenharmony_ci        spin_lock(&ino->i_lock);
161362306a36Sopenharmony_ci        lo = nfsi->layout;
161462306a36Sopenharmony_ci        if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
161562306a36Sopenharmony_ci                rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
161662306a36Sopenharmony_ci                sleep = true;
161762306a36Sopenharmony_ci	}
161862306a36Sopenharmony_ci        spin_unlock(&ino->i_lock);
161962306a36Sopenharmony_ci        return sleep;
162062306a36Sopenharmony_ci}
162162306a36Sopenharmony_ci
162262306a36Sopenharmony_ci/*
162362306a36Sopenharmony_ci * Compare two layout segments for sorting into layout cache.
162462306a36Sopenharmony_ci * We want to preferentially return RW over RO layouts, so ensure those
162562306a36Sopenharmony_ci * are seen first.
162662306a36Sopenharmony_ci */
162762306a36Sopenharmony_cistatic s64
162862306a36Sopenharmony_cipnfs_lseg_range_cmp(const struct pnfs_layout_range *l1,
162962306a36Sopenharmony_ci	   const struct pnfs_layout_range *l2)
163062306a36Sopenharmony_ci{
163162306a36Sopenharmony_ci	s64 d;
163262306a36Sopenharmony_ci
163362306a36Sopenharmony_ci	/* high offset > low offset */
163462306a36Sopenharmony_ci	d = l1->offset - l2->offset;
163562306a36Sopenharmony_ci	if (d)
163662306a36Sopenharmony_ci		return d;
163762306a36Sopenharmony_ci
163862306a36Sopenharmony_ci	/* short length > long length */
163962306a36Sopenharmony_ci	d = l2->length - l1->length;
164062306a36Sopenharmony_ci	if (d)
164162306a36Sopenharmony_ci		return d;
164262306a36Sopenharmony_ci
164362306a36Sopenharmony_ci	/* read > read/write */
164462306a36Sopenharmony_ci	return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
164562306a36Sopenharmony_ci}
164662306a36Sopenharmony_ci
164762306a36Sopenharmony_cistatic bool
164862306a36Sopenharmony_cipnfs_lseg_range_is_after(const struct pnfs_layout_range *l1,
164962306a36Sopenharmony_ci		const struct pnfs_layout_range *l2)
165062306a36Sopenharmony_ci{
165162306a36Sopenharmony_ci	return pnfs_lseg_range_cmp(l1, l2) > 0;
165262306a36Sopenharmony_ci}
165362306a36Sopenharmony_ci
165462306a36Sopenharmony_cistatic bool
165562306a36Sopenharmony_cipnfs_lseg_no_merge(struct pnfs_layout_segment *lseg,
165662306a36Sopenharmony_ci		struct pnfs_layout_segment *old)
165762306a36Sopenharmony_ci{
165862306a36Sopenharmony_ci	return false;
165962306a36Sopenharmony_ci}
166062306a36Sopenharmony_ci
166162306a36Sopenharmony_civoid
166262306a36Sopenharmony_cipnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo,
166362306a36Sopenharmony_ci		   struct pnfs_layout_segment *lseg,
166462306a36Sopenharmony_ci		   bool (*is_after)(const struct pnfs_layout_range *,
166562306a36Sopenharmony_ci			   const struct pnfs_layout_range *),
166662306a36Sopenharmony_ci		   bool (*do_merge)(struct pnfs_layout_segment *,
166762306a36Sopenharmony_ci			   struct pnfs_layout_segment *),
166862306a36Sopenharmony_ci		   struct list_head *free_me)
166962306a36Sopenharmony_ci{
167062306a36Sopenharmony_ci	struct pnfs_layout_segment *lp, *tmp;
167162306a36Sopenharmony_ci
167262306a36Sopenharmony_ci	dprintk("%s:Begin\n", __func__);
167362306a36Sopenharmony_ci
167462306a36Sopenharmony_ci	list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) {
167562306a36Sopenharmony_ci		if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0)
167662306a36Sopenharmony_ci			continue;
167762306a36Sopenharmony_ci		if (do_merge(lseg, lp)) {
167862306a36Sopenharmony_ci			mark_lseg_invalid(lp, free_me);
167962306a36Sopenharmony_ci			continue;
168062306a36Sopenharmony_ci		}
168162306a36Sopenharmony_ci		if (is_after(&lseg->pls_range, &lp->pls_range))
168262306a36Sopenharmony_ci			continue;
168362306a36Sopenharmony_ci		list_add_tail(&lseg->pls_list, &lp->pls_list);
168462306a36Sopenharmony_ci		dprintk("%s: inserted lseg %p "
168562306a36Sopenharmony_ci			"iomode %d offset %llu length %llu before "
168662306a36Sopenharmony_ci			"lp %p iomode %d offset %llu length %llu\n",
168762306a36Sopenharmony_ci			__func__, lseg, lseg->pls_range.iomode,
168862306a36Sopenharmony_ci			lseg->pls_range.offset, lseg->pls_range.length,
168962306a36Sopenharmony_ci			lp, lp->pls_range.iomode, lp->pls_range.offset,
169062306a36Sopenharmony_ci			lp->pls_range.length);
169162306a36Sopenharmony_ci		goto out;
169262306a36Sopenharmony_ci	}
169362306a36Sopenharmony_ci	list_add_tail(&lseg->pls_list, &lo->plh_segs);
169462306a36Sopenharmony_ci	dprintk("%s: inserted lseg %p "
169562306a36Sopenharmony_ci		"iomode %d offset %llu length %llu at tail\n",
169662306a36Sopenharmony_ci		__func__, lseg, lseg->pls_range.iomode,
169762306a36Sopenharmony_ci		lseg->pls_range.offset, lseg->pls_range.length);
169862306a36Sopenharmony_ciout:
169962306a36Sopenharmony_ci	pnfs_get_layout_hdr(lo);
170062306a36Sopenharmony_ci
170162306a36Sopenharmony_ci	dprintk("%s:Return\n", __func__);
170262306a36Sopenharmony_ci}
170362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg);
170462306a36Sopenharmony_ci
170562306a36Sopenharmony_cistatic void
170662306a36Sopenharmony_cipnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo,
170762306a36Sopenharmony_ci		   struct pnfs_layout_segment *lseg,
170862306a36Sopenharmony_ci		   struct list_head *free_me)
170962306a36Sopenharmony_ci{
171062306a36Sopenharmony_ci	struct inode *inode = lo->plh_inode;
171162306a36Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
171262306a36Sopenharmony_ci
171362306a36Sopenharmony_ci	if (ld->add_lseg != NULL)
171462306a36Sopenharmony_ci		ld->add_lseg(lo, lseg, free_me);
171562306a36Sopenharmony_ci	else
171662306a36Sopenharmony_ci		pnfs_generic_layout_insert_lseg(lo, lseg,
171762306a36Sopenharmony_ci				pnfs_lseg_range_is_after,
171862306a36Sopenharmony_ci				pnfs_lseg_no_merge,
171962306a36Sopenharmony_ci				free_me);
172062306a36Sopenharmony_ci}
172162306a36Sopenharmony_ci
172262306a36Sopenharmony_cistatic struct pnfs_layout_hdr *
172362306a36Sopenharmony_cialloc_init_layout_hdr(struct inode *ino,
172462306a36Sopenharmony_ci		      struct nfs_open_context *ctx,
172562306a36Sopenharmony_ci		      gfp_t gfp_flags)
172662306a36Sopenharmony_ci{
172762306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
172862306a36Sopenharmony_ci
172962306a36Sopenharmony_ci	lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
173062306a36Sopenharmony_ci	if (!lo)
173162306a36Sopenharmony_ci		return NULL;
173262306a36Sopenharmony_ci	refcount_set(&lo->plh_refcount, 1);
173362306a36Sopenharmony_ci	INIT_LIST_HEAD(&lo->plh_layouts);
173462306a36Sopenharmony_ci	INIT_LIST_HEAD(&lo->plh_segs);
173562306a36Sopenharmony_ci	INIT_LIST_HEAD(&lo->plh_return_segs);
173662306a36Sopenharmony_ci	INIT_LIST_HEAD(&lo->plh_bulk_destroy);
173762306a36Sopenharmony_ci	lo->plh_inode = ino;
173862306a36Sopenharmony_ci	lo->plh_lc_cred = get_cred(ctx->cred);
173962306a36Sopenharmony_ci	lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;
174062306a36Sopenharmony_ci	return lo;
174162306a36Sopenharmony_ci}
174262306a36Sopenharmony_ci
174362306a36Sopenharmony_cistatic struct pnfs_layout_hdr *
174462306a36Sopenharmony_cipnfs_find_alloc_layout(struct inode *ino,
174562306a36Sopenharmony_ci		       struct nfs_open_context *ctx,
174662306a36Sopenharmony_ci		       gfp_t gfp_flags)
174762306a36Sopenharmony_ci	__releases(&ino->i_lock)
174862306a36Sopenharmony_ci	__acquires(&ino->i_lock)
174962306a36Sopenharmony_ci{
175062306a36Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(ino);
175162306a36Sopenharmony_ci	struct pnfs_layout_hdr *new = NULL;
175262306a36Sopenharmony_ci
175362306a36Sopenharmony_ci	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
175462306a36Sopenharmony_ci
175562306a36Sopenharmony_ci	if (nfsi->layout != NULL)
175662306a36Sopenharmony_ci		goto out_existing;
175762306a36Sopenharmony_ci	spin_unlock(&ino->i_lock);
175862306a36Sopenharmony_ci	new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
175962306a36Sopenharmony_ci	spin_lock(&ino->i_lock);
176062306a36Sopenharmony_ci
176162306a36Sopenharmony_ci	if (likely(nfsi->layout == NULL)) {	/* Won the race? */
176262306a36Sopenharmony_ci		nfsi->layout = new;
176362306a36Sopenharmony_ci		return new;
176462306a36Sopenharmony_ci	} else if (new != NULL)
176562306a36Sopenharmony_ci		pnfs_free_layout_hdr(new);
176662306a36Sopenharmony_ciout_existing:
176762306a36Sopenharmony_ci	pnfs_get_layout_hdr(nfsi->layout);
176862306a36Sopenharmony_ci	return nfsi->layout;
176962306a36Sopenharmony_ci}
177062306a36Sopenharmony_ci
177162306a36Sopenharmony_ci/*
177262306a36Sopenharmony_ci * iomode matching rules:
177362306a36Sopenharmony_ci * iomode	lseg	strict match
177462306a36Sopenharmony_ci *                      iomode
177562306a36Sopenharmony_ci * -----	-----	------ -----
177662306a36Sopenharmony_ci * ANY		READ	N/A    true
177762306a36Sopenharmony_ci * ANY		RW	N/A    true
177862306a36Sopenharmony_ci * RW		READ	N/A    false
177962306a36Sopenharmony_ci * RW		RW	N/A    true
178062306a36Sopenharmony_ci * READ		READ	N/A    true
178162306a36Sopenharmony_ci * READ		RW	true   false
178262306a36Sopenharmony_ci * READ		RW	false  true
178362306a36Sopenharmony_ci */
178462306a36Sopenharmony_cistatic bool
178562306a36Sopenharmony_cipnfs_lseg_range_match(const struct pnfs_layout_range *ls_range,
178662306a36Sopenharmony_ci		 const struct pnfs_layout_range *range,
178762306a36Sopenharmony_ci		 bool strict_iomode)
178862306a36Sopenharmony_ci{
178962306a36Sopenharmony_ci	struct pnfs_layout_range range1;
179062306a36Sopenharmony_ci
179162306a36Sopenharmony_ci	if ((range->iomode == IOMODE_RW &&
179262306a36Sopenharmony_ci	     ls_range->iomode != IOMODE_RW) ||
179362306a36Sopenharmony_ci	    (range->iomode != ls_range->iomode &&
179462306a36Sopenharmony_ci	     strict_iomode) ||
179562306a36Sopenharmony_ci	    !pnfs_lseg_range_intersecting(ls_range, range))
179662306a36Sopenharmony_ci		return false;
179762306a36Sopenharmony_ci
179862306a36Sopenharmony_ci	/* range1 covers only the first byte in the range */
179962306a36Sopenharmony_ci	range1 = *range;
180062306a36Sopenharmony_ci	range1.length = 1;
180162306a36Sopenharmony_ci	return pnfs_lseg_range_contained(ls_range, &range1);
180262306a36Sopenharmony_ci}
180362306a36Sopenharmony_ci
180462306a36Sopenharmony_ci/*
180562306a36Sopenharmony_ci * lookup range in layout
180662306a36Sopenharmony_ci */
180762306a36Sopenharmony_cistatic struct pnfs_layout_segment *
180862306a36Sopenharmony_cipnfs_find_lseg(struct pnfs_layout_hdr *lo,
180962306a36Sopenharmony_ci		struct pnfs_layout_range *range,
181062306a36Sopenharmony_ci		bool strict_iomode)
181162306a36Sopenharmony_ci{
181262306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg, *ret = NULL;
181362306a36Sopenharmony_ci
181462306a36Sopenharmony_ci	dprintk("%s:Begin\n", __func__);
181562306a36Sopenharmony_ci
181662306a36Sopenharmony_ci	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
181762306a36Sopenharmony_ci		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
181862306a36Sopenharmony_ci		    pnfs_lseg_range_match(&lseg->pls_range, range,
181962306a36Sopenharmony_ci					  strict_iomode)) {
182062306a36Sopenharmony_ci			ret = pnfs_get_lseg(lseg);
182162306a36Sopenharmony_ci			break;
182262306a36Sopenharmony_ci		}
182362306a36Sopenharmony_ci	}
182462306a36Sopenharmony_ci
182562306a36Sopenharmony_ci	dprintk("%s:Return lseg %p ref %d\n",
182662306a36Sopenharmony_ci		__func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0);
182762306a36Sopenharmony_ci	return ret;
182862306a36Sopenharmony_ci}
182962306a36Sopenharmony_ci
183062306a36Sopenharmony_ci/*
183162306a36Sopenharmony_ci * Use mdsthreshold hints set at each OPEN to determine if I/O should go
183262306a36Sopenharmony_ci * to the MDS or over pNFS
183362306a36Sopenharmony_ci *
183462306a36Sopenharmony_ci * The nfs_inode read_io and write_io fields are cumulative counters reset
183562306a36Sopenharmony_ci * when there are no layout segments. Note that in pnfs_update_layout iomode
183662306a36Sopenharmony_ci * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
183762306a36Sopenharmony_ci * WRITE request.
183862306a36Sopenharmony_ci *
183962306a36Sopenharmony_ci * A return of true means use MDS I/O.
184062306a36Sopenharmony_ci *
184162306a36Sopenharmony_ci * From rfc 5661:
184262306a36Sopenharmony_ci * If a file's size is smaller than the file size threshold, data accesses
184362306a36Sopenharmony_ci * SHOULD be sent to the metadata server.  If an I/O request has a length that
184462306a36Sopenharmony_ci * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
184562306a36Sopenharmony_ci * server.  If both file size and I/O size are provided, the client SHOULD
184662306a36Sopenharmony_ci * reach or exceed  both thresholds before sending its read or write
184762306a36Sopenharmony_ci * requests to the data server.
184862306a36Sopenharmony_ci */
184962306a36Sopenharmony_cistatic bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
185062306a36Sopenharmony_ci				     struct inode *ino, int iomode)
185162306a36Sopenharmony_ci{
185262306a36Sopenharmony_ci	struct nfs4_threshold *t = ctx->mdsthreshold;
185362306a36Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(ino);
185462306a36Sopenharmony_ci	loff_t fsize = i_size_read(ino);
185562306a36Sopenharmony_ci	bool size = false, size_set = false, io = false, io_set = false, ret = false;
185662306a36Sopenharmony_ci
185762306a36Sopenharmony_ci	if (t == NULL)
185862306a36Sopenharmony_ci		return ret;
185962306a36Sopenharmony_ci
186062306a36Sopenharmony_ci	dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
186162306a36Sopenharmony_ci		__func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
186262306a36Sopenharmony_ci
186362306a36Sopenharmony_ci	switch (iomode) {
186462306a36Sopenharmony_ci	case IOMODE_READ:
186562306a36Sopenharmony_ci		if (t->bm & THRESHOLD_RD) {
186662306a36Sopenharmony_ci			dprintk("%s fsize %llu\n", __func__, fsize);
186762306a36Sopenharmony_ci			size_set = true;
186862306a36Sopenharmony_ci			if (fsize < t->rd_sz)
186962306a36Sopenharmony_ci				size = true;
187062306a36Sopenharmony_ci		}
187162306a36Sopenharmony_ci		if (t->bm & THRESHOLD_RD_IO) {
187262306a36Sopenharmony_ci			dprintk("%s nfsi->read_io %llu\n", __func__,
187362306a36Sopenharmony_ci				nfsi->read_io);
187462306a36Sopenharmony_ci			io_set = true;
187562306a36Sopenharmony_ci			if (nfsi->read_io < t->rd_io_sz)
187662306a36Sopenharmony_ci				io = true;
187762306a36Sopenharmony_ci		}
187862306a36Sopenharmony_ci		break;
187962306a36Sopenharmony_ci	case IOMODE_RW:
188062306a36Sopenharmony_ci		if (t->bm & THRESHOLD_WR) {
188162306a36Sopenharmony_ci			dprintk("%s fsize %llu\n", __func__, fsize);
188262306a36Sopenharmony_ci			size_set = true;
188362306a36Sopenharmony_ci			if (fsize < t->wr_sz)
188462306a36Sopenharmony_ci				size = true;
188562306a36Sopenharmony_ci		}
188662306a36Sopenharmony_ci		if (t->bm & THRESHOLD_WR_IO) {
188762306a36Sopenharmony_ci			dprintk("%s nfsi->write_io %llu\n", __func__,
188862306a36Sopenharmony_ci				nfsi->write_io);
188962306a36Sopenharmony_ci			io_set = true;
189062306a36Sopenharmony_ci			if (nfsi->write_io < t->wr_io_sz)
189162306a36Sopenharmony_ci				io = true;
189262306a36Sopenharmony_ci		}
189362306a36Sopenharmony_ci		break;
189462306a36Sopenharmony_ci	}
189562306a36Sopenharmony_ci	if (size_set && io_set) {
189662306a36Sopenharmony_ci		if (size && io)
189762306a36Sopenharmony_ci			ret = true;
189862306a36Sopenharmony_ci	} else if (size || io)
189962306a36Sopenharmony_ci		ret = true;
190062306a36Sopenharmony_ci
190162306a36Sopenharmony_ci	dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
190262306a36Sopenharmony_ci	return ret;
190362306a36Sopenharmony_ci}
190462306a36Sopenharmony_ci
190562306a36Sopenharmony_cistatic int pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
190662306a36Sopenharmony_ci{
190762306a36Sopenharmony_ci	/*
190862306a36Sopenharmony_ci	 * send layoutcommit as it can hold up layoutreturn due to lseg
190962306a36Sopenharmony_ci	 * reference
191062306a36Sopenharmony_ci	 */
191162306a36Sopenharmony_ci	pnfs_layoutcommit_inode(lo->plh_inode, false);
191262306a36Sopenharmony_ci	return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
191362306a36Sopenharmony_ci				   nfs_wait_bit_killable,
191462306a36Sopenharmony_ci				   TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
191562306a36Sopenharmony_ci}
191662306a36Sopenharmony_ci
191762306a36Sopenharmony_cistatic void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
191862306a36Sopenharmony_ci{
191962306a36Sopenharmony_ci	atomic_inc(&lo->plh_outstanding);
192062306a36Sopenharmony_ci}
192162306a36Sopenharmony_ci
192262306a36Sopenharmony_cistatic void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
192362306a36Sopenharmony_ci{
192462306a36Sopenharmony_ci	if (atomic_dec_and_test(&lo->plh_outstanding) &&
192562306a36Sopenharmony_ci	    test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
192662306a36Sopenharmony_ci		wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
192762306a36Sopenharmony_ci}
192862306a36Sopenharmony_ci
192962306a36Sopenharmony_cistatic bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
193062306a36Sopenharmony_ci{
193162306a36Sopenharmony_ci	return test_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags);
193262306a36Sopenharmony_ci}
193362306a36Sopenharmony_ci
193462306a36Sopenharmony_cistatic void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo)
193562306a36Sopenharmony_ci{
193662306a36Sopenharmony_ci	unsigned long *bitlock = &lo->plh_flags;
193762306a36Sopenharmony_ci
193862306a36Sopenharmony_ci	clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock);
193962306a36Sopenharmony_ci	smp_mb__after_atomic();
194062306a36Sopenharmony_ci	wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET);
194162306a36Sopenharmony_ci}
194262306a36Sopenharmony_ci
194362306a36Sopenharmony_cistatic void _add_to_server_list(struct pnfs_layout_hdr *lo,
194462306a36Sopenharmony_ci				struct nfs_server *server)
194562306a36Sopenharmony_ci{
194662306a36Sopenharmony_ci	if (!test_and_set_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) {
194762306a36Sopenharmony_ci		struct nfs_client *clp = server->nfs_client;
194862306a36Sopenharmony_ci
194962306a36Sopenharmony_ci		/* The lo must be on the clp list if there is any
195062306a36Sopenharmony_ci		 * chance of a CB_LAYOUTRECALL(FILE) coming in.
195162306a36Sopenharmony_ci		 */
195262306a36Sopenharmony_ci		spin_lock(&clp->cl_lock);
195362306a36Sopenharmony_ci		list_add_tail_rcu(&lo->plh_layouts, &server->layouts);
195462306a36Sopenharmony_ci		spin_unlock(&clp->cl_lock);
195562306a36Sopenharmony_ci	}
195662306a36Sopenharmony_ci}
195762306a36Sopenharmony_ci
195862306a36Sopenharmony_ci/*
195962306a36Sopenharmony_ci * Layout segment is retreived from the server if not cached.
196062306a36Sopenharmony_ci * The appropriate layout segment is referenced and returned to the caller.
196162306a36Sopenharmony_ci */
196262306a36Sopenharmony_cistruct pnfs_layout_segment *
196362306a36Sopenharmony_cipnfs_update_layout(struct inode *ino,
196462306a36Sopenharmony_ci		   struct nfs_open_context *ctx,
196562306a36Sopenharmony_ci		   loff_t pos,
196662306a36Sopenharmony_ci		   u64 count,
196762306a36Sopenharmony_ci		   enum pnfs_iomode iomode,
196862306a36Sopenharmony_ci		   bool strict_iomode,
196962306a36Sopenharmony_ci		   gfp_t gfp_flags)
197062306a36Sopenharmony_ci{
197162306a36Sopenharmony_ci	struct pnfs_layout_range arg = {
197262306a36Sopenharmony_ci		.iomode = iomode,
197362306a36Sopenharmony_ci		.offset = pos,
197462306a36Sopenharmony_ci		.length = count,
197562306a36Sopenharmony_ci	};
197662306a36Sopenharmony_ci	unsigned pg_offset;
197762306a36Sopenharmony_ci	struct nfs_server *server = NFS_SERVER(ino);
197862306a36Sopenharmony_ci	struct nfs_client *clp = server->nfs_client;
197962306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo = NULL;
198062306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg = NULL;
198162306a36Sopenharmony_ci	struct nfs4_layoutget *lgp;
198262306a36Sopenharmony_ci	nfs4_stateid stateid;
198362306a36Sopenharmony_ci	long timeout = 0;
198462306a36Sopenharmony_ci	unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
198562306a36Sopenharmony_ci	bool first;
198662306a36Sopenharmony_ci
198762306a36Sopenharmony_ci	if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
198862306a36Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
198962306a36Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_NO_PNFS);
199062306a36Sopenharmony_ci		goto out;
199162306a36Sopenharmony_ci	}
199262306a36Sopenharmony_ci
199362306a36Sopenharmony_ci	if (pnfs_within_mdsthreshold(ctx, ino, iomode)) {
199462306a36Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
199562306a36Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_MDSTHRESH);
199662306a36Sopenharmony_ci		goto out;
199762306a36Sopenharmony_ci	}
199862306a36Sopenharmony_ci
199962306a36Sopenharmony_cilookup_again:
200062306a36Sopenharmony_ci	lseg = ERR_PTR(nfs4_client_recover_expired_lease(clp));
200162306a36Sopenharmony_ci	if (IS_ERR(lseg))
200262306a36Sopenharmony_ci		goto out;
200362306a36Sopenharmony_ci	first = false;
200462306a36Sopenharmony_ci	spin_lock(&ino->i_lock);
200562306a36Sopenharmony_ci	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
200662306a36Sopenharmony_ci	if (lo == NULL) {
200762306a36Sopenharmony_ci		spin_unlock(&ino->i_lock);
200862306a36Sopenharmony_ci		lseg = ERR_PTR(-ENOMEM);
200962306a36Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
201062306a36Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_NOMEM);
201162306a36Sopenharmony_ci		goto out;
201262306a36Sopenharmony_ci	}
201362306a36Sopenharmony_ci
201462306a36Sopenharmony_ci	/* Do we even need to bother with this? */
201562306a36Sopenharmony_ci	if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
201662306a36Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
201762306a36Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_BULK_RECALL);
201862306a36Sopenharmony_ci		dprintk("%s matches recall, use MDS\n", __func__);
201962306a36Sopenharmony_ci		goto out_unlock;
202062306a36Sopenharmony_ci	}
202162306a36Sopenharmony_ci
202262306a36Sopenharmony_ci	/* if LAYOUTGET already failed once we don't try again */
202362306a36Sopenharmony_ci	if (pnfs_layout_io_test_failed(lo, iomode)) {
202462306a36Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
202562306a36Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
202662306a36Sopenharmony_ci		goto out_unlock;
202762306a36Sopenharmony_ci	}
202862306a36Sopenharmony_ci
202962306a36Sopenharmony_ci	/*
203062306a36Sopenharmony_ci	 * If the layout segment list is empty, but there are outstanding
203162306a36Sopenharmony_ci	 * layoutget calls, then they might be subject to a layoutrecall.
203262306a36Sopenharmony_ci	 */
203362306a36Sopenharmony_ci	if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
203462306a36Sopenharmony_ci	    atomic_read(&lo->plh_outstanding) != 0) {
203562306a36Sopenharmony_ci		spin_unlock(&ino->i_lock);
203662306a36Sopenharmony_ci		lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN,
203762306a36Sopenharmony_ci					   TASK_KILLABLE));
203862306a36Sopenharmony_ci		if (IS_ERR(lseg))
203962306a36Sopenharmony_ci			goto out_put_layout_hdr;
204062306a36Sopenharmony_ci		pnfs_put_layout_hdr(lo);
204162306a36Sopenharmony_ci		goto lookup_again;
204262306a36Sopenharmony_ci	}
204362306a36Sopenharmony_ci
204462306a36Sopenharmony_ci	/*
204562306a36Sopenharmony_ci	 * Because we free lsegs when sending LAYOUTRETURN, we need to wait
204662306a36Sopenharmony_ci	 * for LAYOUTRETURN.
204762306a36Sopenharmony_ci	 */
204862306a36Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
204962306a36Sopenharmony_ci		spin_unlock(&ino->i_lock);
205062306a36Sopenharmony_ci		dprintk("%s wait for layoutreturn\n", __func__);
205162306a36Sopenharmony_ci		lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo));
205262306a36Sopenharmony_ci		if (!IS_ERR(lseg)) {
205362306a36Sopenharmony_ci			pnfs_put_layout_hdr(lo);
205462306a36Sopenharmony_ci			dprintk("%s retrying\n", __func__);
205562306a36Sopenharmony_ci			trace_pnfs_update_layout(ino, pos, count, iomode, lo,
205662306a36Sopenharmony_ci						 lseg,
205762306a36Sopenharmony_ci						 PNFS_UPDATE_LAYOUT_RETRY);
205862306a36Sopenharmony_ci			goto lookup_again;
205962306a36Sopenharmony_ci		}
206062306a36Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
206162306a36Sopenharmony_ci					 PNFS_UPDATE_LAYOUT_RETURN);
206262306a36Sopenharmony_ci		goto out_put_layout_hdr;
206362306a36Sopenharmony_ci	}
206462306a36Sopenharmony_ci
206562306a36Sopenharmony_ci	lseg = pnfs_find_lseg(lo, &arg, strict_iomode);
206662306a36Sopenharmony_ci	if (lseg) {
206762306a36Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
206862306a36Sopenharmony_ci				PNFS_UPDATE_LAYOUT_FOUND_CACHED);
206962306a36Sopenharmony_ci		goto out_unlock;
207062306a36Sopenharmony_ci	}
207162306a36Sopenharmony_ci
207262306a36Sopenharmony_ci	/*
207362306a36Sopenharmony_ci	 * Choose a stateid for the LAYOUTGET. If we don't have a layout
207462306a36Sopenharmony_ci	 * stateid, or it has been invalidated, then we must use the open
207562306a36Sopenharmony_ci	 * stateid.
207662306a36Sopenharmony_ci	 */
207762306a36Sopenharmony_ci	if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
207862306a36Sopenharmony_ci		int status;
207962306a36Sopenharmony_ci
208062306a36Sopenharmony_ci		/*
208162306a36Sopenharmony_ci		 * The first layoutget for the file. Need to serialize per
208262306a36Sopenharmony_ci		 * RFC 5661 Errata 3208.
208362306a36Sopenharmony_ci		 */
208462306a36Sopenharmony_ci		if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET,
208562306a36Sopenharmony_ci				     &lo->plh_flags)) {
208662306a36Sopenharmony_ci			spin_unlock(&ino->i_lock);
208762306a36Sopenharmony_ci			lseg = ERR_PTR(wait_on_bit(&lo->plh_flags,
208862306a36Sopenharmony_ci						NFS_LAYOUT_FIRST_LAYOUTGET,
208962306a36Sopenharmony_ci						TASK_KILLABLE));
209062306a36Sopenharmony_ci			if (IS_ERR(lseg))
209162306a36Sopenharmony_ci				goto out_put_layout_hdr;
209262306a36Sopenharmony_ci			pnfs_put_layout_hdr(lo);
209362306a36Sopenharmony_ci			dprintk("%s retrying\n", __func__);
209462306a36Sopenharmony_ci			goto lookup_again;
209562306a36Sopenharmony_ci		}
209662306a36Sopenharmony_ci
209762306a36Sopenharmony_ci		spin_unlock(&ino->i_lock);
209862306a36Sopenharmony_ci		first = true;
209962306a36Sopenharmony_ci		status = nfs4_select_rw_stateid(ctx->state,
210062306a36Sopenharmony_ci					iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ,
210162306a36Sopenharmony_ci					NULL, &stateid, NULL);
210262306a36Sopenharmony_ci		if (status != 0) {
210362306a36Sopenharmony_ci			lseg = ERR_PTR(status);
210462306a36Sopenharmony_ci			trace_pnfs_update_layout(ino, pos, count,
210562306a36Sopenharmony_ci					iomode, lo, lseg,
210662306a36Sopenharmony_ci					PNFS_UPDATE_LAYOUT_INVALID_OPEN);
210762306a36Sopenharmony_ci			nfs4_schedule_stateid_recovery(server, ctx->state);
210862306a36Sopenharmony_ci			pnfs_clear_first_layoutget(lo);
210962306a36Sopenharmony_ci			pnfs_put_layout_hdr(lo);
211062306a36Sopenharmony_ci			goto lookup_again;
211162306a36Sopenharmony_ci		}
211262306a36Sopenharmony_ci		spin_lock(&ino->i_lock);
211362306a36Sopenharmony_ci	} else {
211462306a36Sopenharmony_ci		nfs4_stateid_copy(&stateid, &lo->plh_stateid);
211562306a36Sopenharmony_ci	}
211662306a36Sopenharmony_ci
211762306a36Sopenharmony_ci	if (pnfs_layoutgets_blocked(lo)) {
211862306a36Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
211962306a36Sopenharmony_ci				PNFS_UPDATE_LAYOUT_BLOCKED);
212062306a36Sopenharmony_ci		goto out_unlock;
212162306a36Sopenharmony_ci	}
212262306a36Sopenharmony_ci	nfs_layoutget_begin(lo);
212362306a36Sopenharmony_ci	spin_unlock(&ino->i_lock);
212462306a36Sopenharmony_ci
212562306a36Sopenharmony_ci	_add_to_server_list(lo, server);
212662306a36Sopenharmony_ci
212762306a36Sopenharmony_ci	pg_offset = arg.offset & ~PAGE_MASK;
212862306a36Sopenharmony_ci	if (pg_offset) {
212962306a36Sopenharmony_ci		arg.offset -= pg_offset;
213062306a36Sopenharmony_ci		arg.length += pg_offset;
213162306a36Sopenharmony_ci	}
213262306a36Sopenharmony_ci	if (arg.length != NFS4_MAX_UINT64)
213362306a36Sopenharmony_ci		arg.length = PAGE_ALIGN(arg.length);
213462306a36Sopenharmony_ci
213562306a36Sopenharmony_ci	lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags);
213662306a36Sopenharmony_ci	if (!lgp) {
213762306a36Sopenharmony_ci		lseg = ERR_PTR(-ENOMEM);
213862306a36Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL,
213962306a36Sopenharmony_ci					 PNFS_UPDATE_LAYOUT_NOMEM);
214062306a36Sopenharmony_ci		nfs_layoutget_end(lo);
214162306a36Sopenharmony_ci		goto out_put_layout_hdr;
214262306a36Sopenharmony_ci	}
214362306a36Sopenharmony_ci
214462306a36Sopenharmony_ci	lgp->lo = lo;
214562306a36Sopenharmony_ci	pnfs_get_layout_hdr(lo);
214662306a36Sopenharmony_ci
214762306a36Sopenharmony_ci	lseg = nfs4_proc_layoutget(lgp, &timeout);
214862306a36Sopenharmony_ci	trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
214962306a36Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
215062306a36Sopenharmony_ci	nfs_layoutget_end(lo);
215162306a36Sopenharmony_ci	if (IS_ERR(lseg)) {
215262306a36Sopenharmony_ci		switch(PTR_ERR(lseg)) {
215362306a36Sopenharmony_ci		case -EBUSY:
215462306a36Sopenharmony_ci			if (time_after(jiffies, giveup))
215562306a36Sopenharmony_ci				lseg = NULL;
215662306a36Sopenharmony_ci			break;
215762306a36Sopenharmony_ci		case -ERECALLCONFLICT:
215862306a36Sopenharmony_ci		case -EAGAIN:
215962306a36Sopenharmony_ci			break;
216062306a36Sopenharmony_ci		case -ENODATA:
216162306a36Sopenharmony_ci			/* The server returned NFS4ERR_LAYOUTUNAVAILABLE */
216262306a36Sopenharmony_ci			pnfs_layout_set_fail_bit(
216362306a36Sopenharmony_ci				lo, pnfs_iomode_to_fail_bit(iomode));
216462306a36Sopenharmony_ci			lseg = NULL;
216562306a36Sopenharmony_ci			goto out_put_layout_hdr;
216662306a36Sopenharmony_ci		default:
216762306a36Sopenharmony_ci			if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
216862306a36Sopenharmony_ci				pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
216962306a36Sopenharmony_ci				lseg = NULL;
217062306a36Sopenharmony_ci			}
217162306a36Sopenharmony_ci			goto out_put_layout_hdr;
217262306a36Sopenharmony_ci		}
217362306a36Sopenharmony_ci		if (lseg) {
217462306a36Sopenharmony_ci			if (first)
217562306a36Sopenharmony_ci				pnfs_clear_first_layoutget(lo);
217662306a36Sopenharmony_ci			trace_pnfs_update_layout(ino, pos, count,
217762306a36Sopenharmony_ci				iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
217862306a36Sopenharmony_ci			pnfs_put_layout_hdr(lo);
217962306a36Sopenharmony_ci			goto lookup_again;
218062306a36Sopenharmony_ci		}
218162306a36Sopenharmony_ci	} else {
218262306a36Sopenharmony_ci		pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
218362306a36Sopenharmony_ci	}
218462306a36Sopenharmony_ci
218562306a36Sopenharmony_ciout_put_layout_hdr:
218662306a36Sopenharmony_ci	if (first)
218762306a36Sopenharmony_ci		pnfs_clear_first_layoutget(lo);
218862306a36Sopenharmony_ci	trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
218962306a36Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_EXIT);
219062306a36Sopenharmony_ci	pnfs_put_layout_hdr(lo);
219162306a36Sopenharmony_ciout:
219262306a36Sopenharmony_ci	dprintk("%s: inode %s/%llu pNFS layout segment %s for "
219362306a36Sopenharmony_ci			"(%s, offset: %llu, length: %llu)\n",
219462306a36Sopenharmony_ci			__func__, ino->i_sb->s_id,
219562306a36Sopenharmony_ci			(unsigned long long)NFS_FILEID(ino),
219662306a36Sopenharmony_ci			IS_ERR_OR_NULL(lseg) ? "not found" : "found",
219762306a36Sopenharmony_ci			iomode==IOMODE_RW ?  "read/write" : "read-only",
219862306a36Sopenharmony_ci			(unsigned long long)pos,
219962306a36Sopenharmony_ci			(unsigned long long)count);
220062306a36Sopenharmony_ci	return lseg;
220162306a36Sopenharmony_ciout_unlock:
220262306a36Sopenharmony_ci	spin_unlock(&ino->i_lock);
220362306a36Sopenharmony_ci	goto out_put_layout_hdr;
220462306a36Sopenharmony_ci}
220562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_update_layout);
220662306a36Sopenharmony_ci
220762306a36Sopenharmony_cistatic bool
220862306a36Sopenharmony_cipnfs_sanity_check_layout_range(struct pnfs_layout_range *range)
220962306a36Sopenharmony_ci{
221062306a36Sopenharmony_ci	switch (range->iomode) {
221162306a36Sopenharmony_ci	case IOMODE_READ:
221262306a36Sopenharmony_ci	case IOMODE_RW:
221362306a36Sopenharmony_ci		break;
221462306a36Sopenharmony_ci	default:
221562306a36Sopenharmony_ci		return false;
221662306a36Sopenharmony_ci	}
221762306a36Sopenharmony_ci	if (range->offset == NFS4_MAX_UINT64)
221862306a36Sopenharmony_ci		return false;
221962306a36Sopenharmony_ci	if (range->length == 0)
222062306a36Sopenharmony_ci		return false;
222162306a36Sopenharmony_ci	if (range->length != NFS4_MAX_UINT64 &&
222262306a36Sopenharmony_ci	    range->length > NFS4_MAX_UINT64 - range->offset)
222362306a36Sopenharmony_ci		return false;
222462306a36Sopenharmony_ci	return true;
222562306a36Sopenharmony_ci}
222662306a36Sopenharmony_ci
222762306a36Sopenharmony_cistatic struct pnfs_layout_hdr *
222862306a36Sopenharmony_ci_pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx)
222962306a36Sopenharmony_ci{
223062306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
223162306a36Sopenharmony_ci
223262306a36Sopenharmony_ci	spin_lock(&ino->i_lock);
223362306a36Sopenharmony_ci	lo = pnfs_find_alloc_layout(ino, ctx, nfs_io_gfp_mask());
223462306a36Sopenharmony_ci	if (!lo)
223562306a36Sopenharmony_ci		goto out_unlock;
223662306a36Sopenharmony_ci	if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
223762306a36Sopenharmony_ci		goto out_unlock;
223862306a36Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
223962306a36Sopenharmony_ci		goto out_unlock;
224062306a36Sopenharmony_ci	if (pnfs_layoutgets_blocked(lo))
224162306a36Sopenharmony_ci		goto out_unlock;
224262306a36Sopenharmony_ci	if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags))
224362306a36Sopenharmony_ci		goto out_unlock;
224462306a36Sopenharmony_ci	nfs_layoutget_begin(lo);
224562306a36Sopenharmony_ci	spin_unlock(&ino->i_lock);
224662306a36Sopenharmony_ci	_add_to_server_list(lo, NFS_SERVER(ino));
224762306a36Sopenharmony_ci	return lo;
224862306a36Sopenharmony_ci
224962306a36Sopenharmony_ciout_unlock:
225062306a36Sopenharmony_ci	spin_unlock(&ino->i_lock);
225162306a36Sopenharmony_ci	pnfs_put_layout_hdr(lo);
225262306a36Sopenharmony_ci	return NULL;
225362306a36Sopenharmony_ci}
225462306a36Sopenharmony_ci
225562306a36Sopenharmony_cistatic void _lgopen_prepare_attached(struct nfs4_opendata *data,
225662306a36Sopenharmony_ci				     struct nfs_open_context *ctx)
225762306a36Sopenharmony_ci{
225862306a36Sopenharmony_ci	struct inode *ino = data->dentry->d_inode;
225962306a36Sopenharmony_ci	struct pnfs_layout_range rng = {
226062306a36Sopenharmony_ci		.iomode = (data->o_arg.fmode & FMODE_WRITE) ?
226162306a36Sopenharmony_ci			  IOMODE_RW: IOMODE_READ,
226262306a36Sopenharmony_ci		.offset = 0,
226362306a36Sopenharmony_ci		.length = NFS4_MAX_UINT64,
226462306a36Sopenharmony_ci	};
226562306a36Sopenharmony_ci	struct nfs4_layoutget *lgp;
226662306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
226762306a36Sopenharmony_ci
226862306a36Sopenharmony_ci	/* Heuristic: don't send layoutget if we have cached data */
226962306a36Sopenharmony_ci	if (rng.iomode == IOMODE_READ &&
227062306a36Sopenharmony_ci	   (i_size_read(ino) == 0 || ino->i_mapping->nrpages != 0))
227162306a36Sopenharmony_ci		return;
227262306a36Sopenharmony_ci
227362306a36Sopenharmony_ci	lo = _pnfs_grab_empty_layout(ino, ctx);
227462306a36Sopenharmony_ci	if (!lo)
227562306a36Sopenharmony_ci		return;
227662306a36Sopenharmony_ci	lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid, &rng,
227762306a36Sopenharmony_ci					     nfs_io_gfp_mask());
227862306a36Sopenharmony_ci	if (!lgp) {
227962306a36Sopenharmony_ci		pnfs_clear_first_layoutget(lo);
228062306a36Sopenharmony_ci		nfs_layoutget_end(lo);
228162306a36Sopenharmony_ci		pnfs_put_layout_hdr(lo);
228262306a36Sopenharmony_ci		return;
228362306a36Sopenharmony_ci	}
228462306a36Sopenharmony_ci	lgp->lo = lo;
228562306a36Sopenharmony_ci	data->lgp = lgp;
228662306a36Sopenharmony_ci	data->o_arg.lg_args = &lgp->args;
228762306a36Sopenharmony_ci	data->o_res.lg_res = &lgp->res;
228862306a36Sopenharmony_ci}
228962306a36Sopenharmony_ci
229062306a36Sopenharmony_cistatic void _lgopen_prepare_floating(struct nfs4_opendata *data,
229162306a36Sopenharmony_ci				     struct nfs_open_context *ctx)
229262306a36Sopenharmony_ci{
229362306a36Sopenharmony_ci	struct inode *ino = data->dentry->d_inode;
229462306a36Sopenharmony_ci	struct pnfs_layout_range rng = {
229562306a36Sopenharmony_ci		.iomode = (data->o_arg.fmode & FMODE_WRITE) ?
229662306a36Sopenharmony_ci			  IOMODE_RW: IOMODE_READ,
229762306a36Sopenharmony_ci		.offset = 0,
229862306a36Sopenharmony_ci		.length = NFS4_MAX_UINT64,
229962306a36Sopenharmony_ci	};
230062306a36Sopenharmony_ci	struct nfs4_layoutget *lgp;
230162306a36Sopenharmony_ci
230262306a36Sopenharmony_ci	lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid, &rng,
230362306a36Sopenharmony_ci					     nfs_io_gfp_mask());
230462306a36Sopenharmony_ci	if (!lgp)
230562306a36Sopenharmony_ci		return;
230662306a36Sopenharmony_ci	data->lgp = lgp;
230762306a36Sopenharmony_ci	data->o_arg.lg_args = &lgp->args;
230862306a36Sopenharmony_ci	data->o_res.lg_res = &lgp->res;
230962306a36Sopenharmony_ci}
231062306a36Sopenharmony_ci
231162306a36Sopenharmony_civoid pnfs_lgopen_prepare(struct nfs4_opendata *data,
231262306a36Sopenharmony_ci			 struct nfs_open_context *ctx)
231362306a36Sopenharmony_ci{
231462306a36Sopenharmony_ci	struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
231562306a36Sopenharmony_ci
231662306a36Sopenharmony_ci	if (!(pnfs_enabled_sb(server) &&
231762306a36Sopenharmony_ci	      server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN))
231862306a36Sopenharmony_ci		return;
231962306a36Sopenharmony_ci	/* Could check on max_ops, but currently hardcoded high enough */
232062306a36Sopenharmony_ci	if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN))
232162306a36Sopenharmony_ci		return;
232262306a36Sopenharmony_ci	if (data->lgp)
232362306a36Sopenharmony_ci		return;
232462306a36Sopenharmony_ci	if (data->state)
232562306a36Sopenharmony_ci		_lgopen_prepare_attached(data, ctx);
232662306a36Sopenharmony_ci	else
232762306a36Sopenharmony_ci		_lgopen_prepare_floating(data, ctx);
232862306a36Sopenharmony_ci}
232962306a36Sopenharmony_ci
233062306a36Sopenharmony_civoid pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
233162306a36Sopenharmony_ci		       struct nfs_open_context *ctx)
233262306a36Sopenharmony_ci{
233362306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
233462306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg;
233562306a36Sopenharmony_ci	struct nfs_server *srv = NFS_SERVER(ino);
233662306a36Sopenharmony_ci	u32 iomode;
233762306a36Sopenharmony_ci
233862306a36Sopenharmony_ci	if (!lgp)
233962306a36Sopenharmony_ci		return;
234062306a36Sopenharmony_ci	dprintk("%s: entered with status %i\n", __func__, lgp->res.status);
234162306a36Sopenharmony_ci	if (lgp->res.status) {
234262306a36Sopenharmony_ci		switch (lgp->res.status) {
234362306a36Sopenharmony_ci		default:
234462306a36Sopenharmony_ci			break;
234562306a36Sopenharmony_ci		/*
234662306a36Sopenharmony_ci		 * Halt lgopen attempts if the server doesn't recognise
234762306a36Sopenharmony_ci		 * the "current stateid" value, the layout type, or the
234862306a36Sopenharmony_ci		 * layoutget operation as being valid.
234962306a36Sopenharmony_ci		 * Also if it complains about too many ops in the compound
235062306a36Sopenharmony_ci		 * or of the request/reply being too big.
235162306a36Sopenharmony_ci		 */
235262306a36Sopenharmony_ci		case -NFS4ERR_BAD_STATEID:
235362306a36Sopenharmony_ci		case -NFS4ERR_NOTSUPP:
235462306a36Sopenharmony_ci		case -NFS4ERR_REP_TOO_BIG:
235562306a36Sopenharmony_ci		case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
235662306a36Sopenharmony_ci		case -NFS4ERR_REQ_TOO_BIG:
235762306a36Sopenharmony_ci		case -NFS4ERR_TOO_MANY_OPS:
235862306a36Sopenharmony_ci		case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
235962306a36Sopenharmony_ci			srv->caps &= ~NFS_CAP_LGOPEN;
236062306a36Sopenharmony_ci		}
236162306a36Sopenharmony_ci		return;
236262306a36Sopenharmony_ci	}
236362306a36Sopenharmony_ci	if (!lgp->lo) {
236462306a36Sopenharmony_ci		lo = _pnfs_grab_empty_layout(ino, ctx);
236562306a36Sopenharmony_ci		if (!lo)
236662306a36Sopenharmony_ci			return;
236762306a36Sopenharmony_ci		lgp->lo = lo;
236862306a36Sopenharmony_ci	} else
236962306a36Sopenharmony_ci		lo = lgp->lo;
237062306a36Sopenharmony_ci
237162306a36Sopenharmony_ci	lseg = pnfs_layout_process(lgp);
237262306a36Sopenharmony_ci	if (!IS_ERR(lseg)) {
237362306a36Sopenharmony_ci		iomode = lgp->args.range.iomode;
237462306a36Sopenharmony_ci		pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
237562306a36Sopenharmony_ci		pnfs_put_lseg(lseg);
237662306a36Sopenharmony_ci	}
237762306a36Sopenharmony_ci}
237862306a36Sopenharmony_ci
237962306a36Sopenharmony_civoid nfs4_lgopen_release(struct nfs4_layoutget *lgp)
238062306a36Sopenharmony_ci{
238162306a36Sopenharmony_ci	if (lgp != NULL) {
238262306a36Sopenharmony_ci		if (lgp->lo) {
238362306a36Sopenharmony_ci			pnfs_clear_first_layoutget(lgp->lo);
238462306a36Sopenharmony_ci			nfs_layoutget_end(lgp->lo);
238562306a36Sopenharmony_ci		}
238662306a36Sopenharmony_ci		pnfs_layoutget_free(lgp);
238762306a36Sopenharmony_ci	}
238862306a36Sopenharmony_ci}
238962306a36Sopenharmony_ci
239062306a36Sopenharmony_cistruct pnfs_layout_segment *
239162306a36Sopenharmony_cipnfs_layout_process(struct nfs4_layoutget *lgp)
239262306a36Sopenharmony_ci{
239362306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo = lgp->lo;
239462306a36Sopenharmony_ci	struct nfs4_layoutget_res *res = &lgp->res;
239562306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg;
239662306a36Sopenharmony_ci	struct inode *ino = lo->plh_inode;
239762306a36Sopenharmony_ci	LIST_HEAD(free_me);
239862306a36Sopenharmony_ci
239962306a36Sopenharmony_ci	if (!pnfs_sanity_check_layout_range(&res->range))
240062306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
240162306a36Sopenharmony_ci
240262306a36Sopenharmony_ci	/* Inject layout blob into I/O device driver */
240362306a36Sopenharmony_ci	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
240462306a36Sopenharmony_ci	if (IS_ERR_OR_NULL(lseg)) {
240562306a36Sopenharmony_ci		if (!lseg)
240662306a36Sopenharmony_ci			lseg = ERR_PTR(-ENOMEM);
240762306a36Sopenharmony_ci
240862306a36Sopenharmony_ci		dprintk("%s: Could not allocate layout: error %ld\n",
240962306a36Sopenharmony_ci		       __func__, PTR_ERR(lseg));
241062306a36Sopenharmony_ci		return lseg;
241162306a36Sopenharmony_ci	}
241262306a36Sopenharmony_ci
241362306a36Sopenharmony_ci	pnfs_init_lseg(lo, lseg, &res->range, &res->stateid);
241462306a36Sopenharmony_ci
241562306a36Sopenharmony_ci	spin_lock(&ino->i_lock);
241662306a36Sopenharmony_ci	if (pnfs_layoutgets_blocked(lo)) {
241762306a36Sopenharmony_ci		dprintk("%s forget reply due to state\n", __func__);
241862306a36Sopenharmony_ci		goto out_forget;
241962306a36Sopenharmony_ci	}
242062306a36Sopenharmony_ci
242162306a36Sopenharmony_ci	if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
242262306a36Sopenharmony_ci	    !pnfs_is_first_layoutget(lo))
242362306a36Sopenharmony_ci		goto out_forget;
242462306a36Sopenharmony_ci
242562306a36Sopenharmony_ci	if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
242662306a36Sopenharmony_ci		/* existing state ID, make sure the sequence number matches. */
242762306a36Sopenharmony_ci		if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
242862306a36Sopenharmony_ci			if (!pnfs_layout_is_valid(lo))
242962306a36Sopenharmony_ci				lo->plh_barrier = 0;
243062306a36Sopenharmony_ci			dprintk("%s forget reply due to sequence\n", __func__);
243162306a36Sopenharmony_ci			goto out_forget;
243262306a36Sopenharmony_ci		}
243362306a36Sopenharmony_ci		pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, false);
243462306a36Sopenharmony_ci	} else if (pnfs_layout_is_valid(lo)) {
243562306a36Sopenharmony_ci		/*
243662306a36Sopenharmony_ci		 * We got an entirely new state ID.  Mark all segments for the
243762306a36Sopenharmony_ci		 * inode invalid, and retry the layoutget
243862306a36Sopenharmony_ci		 */
243962306a36Sopenharmony_ci		struct pnfs_layout_range range = {
244062306a36Sopenharmony_ci			.iomode = IOMODE_ANY,
244162306a36Sopenharmony_ci			.length = NFS4_MAX_UINT64,
244262306a36Sopenharmony_ci		};
244362306a36Sopenharmony_ci		pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0);
244462306a36Sopenharmony_ci		goto out_forget;
244562306a36Sopenharmony_ci	} else {
244662306a36Sopenharmony_ci		/* We have a completely new layout */
244762306a36Sopenharmony_ci		pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true);
244862306a36Sopenharmony_ci	}
244962306a36Sopenharmony_ci
245062306a36Sopenharmony_ci	pnfs_get_lseg(lseg);
245162306a36Sopenharmony_ci	pnfs_layout_insert_lseg(lo, lseg, &free_me);
245262306a36Sopenharmony_ci
245362306a36Sopenharmony_ci
245462306a36Sopenharmony_ci	if (res->return_on_close)
245562306a36Sopenharmony_ci		set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
245662306a36Sopenharmony_ci
245762306a36Sopenharmony_ci	spin_unlock(&ino->i_lock);
245862306a36Sopenharmony_ci	pnfs_free_lseg_list(&free_me);
245962306a36Sopenharmony_ci	return lseg;
246062306a36Sopenharmony_ci
246162306a36Sopenharmony_ciout_forget:
246262306a36Sopenharmony_ci	spin_unlock(&ino->i_lock);
246362306a36Sopenharmony_ci	lseg->pls_layout = lo;
246462306a36Sopenharmony_ci	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
246562306a36Sopenharmony_ci	return ERR_PTR(-EAGAIN);
246662306a36Sopenharmony_ci}
246762306a36Sopenharmony_ci
246862306a36Sopenharmony_ci/**
246962306a36Sopenharmony_ci * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
247062306a36Sopenharmony_ci * @lo: pointer to layout header
247162306a36Sopenharmony_ci * @tmp_list: list header to be used with pnfs_free_lseg_list()
247262306a36Sopenharmony_ci * @return_range: describe layout segment ranges to be returned
247362306a36Sopenharmony_ci * @seq: stateid seqid to match
247462306a36Sopenharmony_ci *
247562306a36Sopenharmony_ci * This function is mainly intended for use by layoutrecall. It attempts
247662306a36Sopenharmony_ci * to free the layout segment immediately, or else to mark it for return
247762306a36Sopenharmony_ci * as soon as its reference count drops to zero.
247862306a36Sopenharmony_ci *
247962306a36Sopenharmony_ci * Returns
248062306a36Sopenharmony_ci * - 0: a layoutreturn needs to be scheduled.
248162306a36Sopenharmony_ci * - EBUSY: there are layout segment that are still in use.
248262306a36Sopenharmony_ci * - ENOENT: there are no layout segments that need to be returned.
248362306a36Sopenharmony_ci */
248462306a36Sopenharmony_ciint
248562306a36Sopenharmony_cipnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
248662306a36Sopenharmony_ci				struct list_head *tmp_list,
248762306a36Sopenharmony_ci				const struct pnfs_layout_range *return_range,
248862306a36Sopenharmony_ci				u32 seq)
248962306a36Sopenharmony_ci{
249062306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg, *next;
249162306a36Sopenharmony_ci	struct nfs_server *server = NFS_SERVER(lo->plh_inode);
249262306a36Sopenharmony_ci	int remaining = 0;
249362306a36Sopenharmony_ci
249462306a36Sopenharmony_ci	dprintk("%s:Begin lo %p\n", __func__, lo);
249562306a36Sopenharmony_ci
249662306a36Sopenharmony_ci	assert_spin_locked(&lo->plh_inode->i_lock);
249762306a36Sopenharmony_ci
249862306a36Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
249962306a36Sopenharmony_ci		tmp_list = &lo->plh_return_segs;
250062306a36Sopenharmony_ci
250162306a36Sopenharmony_ci	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
250262306a36Sopenharmony_ci		if (pnfs_match_lseg_recall(lseg, return_range, seq)) {
250362306a36Sopenharmony_ci			dprintk("%s: marking lseg %p iomode %d "
250462306a36Sopenharmony_ci				"offset %llu length %llu\n", __func__,
250562306a36Sopenharmony_ci				lseg, lseg->pls_range.iomode,
250662306a36Sopenharmony_ci				lseg->pls_range.offset,
250762306a36Sopenharmony_ci				lseg->pls_range.length);
250862306a36Sopenharmony_ci			if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
250962306a36Sopenharmony_ci				tmp_list = &lo->plh_return_segs;
251062306a36Sopenharmony_ci			if (mark_lseg_invalid(lseg, tmp_list))
251162306a36Sopenharmony_ci				continue;
251262306a36Sopenharmony_ci			remaining++;
251362306a36Sopenharmony_ci			set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
251462306a36Sopenharmony_ci			pnfs_lseg_cancel_io(server, lseg);
251562306a36Sopenharmony_ci		}
251662306a36Sopenharmony_ci
251762306a36Sopenharmony_ci	if (remaining) {
251862306a36Sopenharmony_ci		pnfs_set_plh_return_info(lo, return_range->iomode, seq);
251962306a36Sopenharmony_ci		return -EBUSY;
252062306a36Sopenharmony_ci	}
252162306a36Sopenharmony_ci
252262306a36Sopenharmony_ci	if (!list_empty(&lo->plh_return_segs)) {
252362306a36Sopenharmony_ci		pnfs_set_plh_return_info(lo, return_range->iomode, seq);
252462306a36Sopenharmony_ci		return 0;
252562306a36Sopenharmony_ci	}
252662306a36Sopenharmony_ci
252762306a36Sopenharmony_ci	return -ENOENT;
252862306a36Sopenharmony_ci}
252962306a36Sopenharmony_ci
253062306a36Sopenharmony_cistatic void
253162306a36Sopenharmony_cipnfs_mark_layout_for_return(struct inode *inode,
253262306a36Sopenharmony_ci			    const struct pnfs_layout_range *range)
253362306a36Sopenharmony_ci{
253462306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
253562306a36Sopenharmony_ci	bool return_now = false;
253662306a36Sopenharmony_ci
253762306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
253862306a36Sopenharmony_ci	lo = NFS_I(inode)->layout;
253962306a36Sopenharmony_ci	if (!pnfs_layout_is_valid(lo)) {
254062306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
254162306a36Sopenharmony_ci		return;
254262306a36Sopenharmony_ci	}
254362306a36Sopenharmony_ci	pnfs_set_plh_return_info(lo, range->iomode, 0);
254462306a36Sopenharmony_ci	/*
254562306a36Sopenharmony_ci	 * mark all matching lsegs so that we are sure to have no live
254662306a36Sopenharmony_ci	 * segments at hand when sending layoutreturn. See pnfs_put_lseg()
254762306a36Sopenharmony_ci	 * for how it works.
254862306a36Sopenharmony_ci	 */
254962306a36Sopenharmony_ci	if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) {
255062306a36Sopenharmony_ci		const struct cred *cred;
255162306a36Sopenharmony_ci		nfs4_stateid stateid;
255262306a36Sopenharmony_ci		enum pnfs_iomode iomode;
255362306a36Sopenharmony_ci
255462306a36Sopenharmony_ci		return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
255562306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
255662306a36Sopenharmony_ci		if (return_now)
255762306a36Sopenharmony_ci			pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
255862306a36Sopenharmony_ci	} else {
255962306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
256062306a36Sopenharmony_ci		nfs_commit_inode(inode, 0);
256162306a36Sopenharmony_ci	}
256262306a36Sopenharmony_ci}
256362306a36Sopenharmony_ci
256462306a36Sopenharmony_civoid pnfs_error_mark_layout_for_return(struct inode *inode,
256562306a36Sopenharmony_ci				       struct pnfs_layout_segment *lseg)
256662306a36Sopenharmony_ci{
256762306a36Sopenharmony_ci	struct pnfs_layout_range range = {
256862306a36Sopenharmony_ci		.iomode = lseg->pls_range.iomode,
256962306a36Sopenharmony_ci		.offset = 0,
257062306a36Sopenharmony_ci		.length = NFS4_MAX_UINT64,
257162306a36Sopenharmony_ci	};
257262306a36Sopenharmony_ci
257362306a36Sopenharmony_ci	pnfs_mark_layout_for_return(inode, &range);
257462306a36Sopenharmony_ci}
257562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
257662306a36Sopenharmony_ci
257762306a36Sopenharmony_cistatic bool
257862306a36Sopenharmony_cipnfs_layout_can_be_returned(struct pnfs_layout_hdr *lo)
257962306a36Sopenharmony_ci{
258062306a36Sopenharmony_ci	return pnfs_layout_is_valid(lo) &&
258162306a36Sopenharmony_ci		!test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) &&
258262306a36Sopenharmony_ci		!test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
258362306a36Sopenharmony_ci}
258462306a36Sopenharmony_ci
258562306a36Sopenharmony_cistatic struct pnfs_layout_segment *
258662306a36Sopenharmony_cipnfs_find_first_lseg(struct pnfs_layout_hdr *lo,
258762306a36Sopenharmony_ci		     const struct pnfs_layout_range *range,
258862306a36Sopenharmony_ci		     enum pnfs_iomode iomode)
258962306a36Sopenharmony_ci{
259062306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg;
259162306a36Sopenharmony_ci
259262306a36Sopenharmony_ci	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
259362306a36Sopenharmony_ci		if (!test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
259462306a36Sopenharmony_ci			continue;
259562306a36Sopenharmony_ci		if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
259662306a36Sopenharmony_ci			continue;
259762306a36Sopenharmony_ci		if (lseg->pls_range.iomode != iomode && iomode != IOMODE_ANY)
259862306a36Sopenharmony_ci			continue;
259962306a36Sopenharmony_ci		if (pnfs_lseg_range_intersecting(&lseg->pls_range, range))
260062306a36Sopenharmony_ci			return lseg;
260162306a36Sopenharmony_ci	}
260262306a36Sopenharmony_ci	return NULL;
260362306a36Sopenharmony_ci}
260462306a36Sopenharmony_ci
260562306a36Sopenharmony_ci/* Find open file states whose mode matches that of the range */
260662306a36Sopenharmony_cistatic bool
260762306a36Sopenharmony_cipnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
260862306a36Sopenharmony_ci				 const struct pnfs_layout_range *range)
260962306a36Sopenharmony_ci{
261062306a36Sopenharmony_ci	struct list_head *head;
261162306a36Sopenharmony_ci	struct nfs_open_context *ctx;
261262306a36Sopenharmony_ci	fmode_t mode = 0;
261362306a36Sopenharmony_ci
261462306a36Sopenharmony_ci	if (!pnfs_layout_can_be_returned(lo) ||
261562306a36Sopenharmony_ci	    !pnfs_find_first_lseg(lo, range, range->iomode))
261662306a36Sopenharmony_ci		return false;
261762306a36Sopenharmony_ci
261862306a36Sopenharmony_ci	head = &NFS_I(lo->plh_inode)->open_files;
261962306a36Sopenharmony_ci	list_for_each_entry_rcu(ctx, head, list) {
262062306a36Sopenharmony_ci		if (ctx->state)
262162306a36Sopenharmony_ci			mode |= ctx->state->state & (FMODE_READ|FMODE_WRITE);
262262306a36Sopenharmony_ci	}
262362306a36Sopenharmony_ci
262462306a36Sopenharmony_ci	switch (range->iomode) {
262562306a36Sopenharmony_ci	default:
262662306a36Sopenharmony_ci		break;
262762306a36Sopenharmony_ci	case IOMODE_READ:
262862306a36Sopenharmony_ci		mode &= ~FMODE_WRITE;
262962306a36Sopenharmony_ci		break;
263062306a36Sopenharmony_ci	case IOMODE_RW:
263162306a36Sopenharmony_ci		if (pnfs_find_first_lseg(lo, range, IOMODE_READ))
263262306a36Sopenharmony_ci			mode &= ~FMODE_READ;
263362306a36Sopenharmony_ci	}
263462306a36Sopenharmony_ci	return mode == 0;
263562306a36Sopenharmony_ci}
263662306a36Sopenharmony_ci
263762306a36Sopenharmony_cistatic int pnfs_layout_return_unused_byserver(struct nfs_server *server,
263862306a36Sopenharmony_ci					      void *data)
263962306a36Sopenharmony_ci{
264062306a36Sopenharmony_ci	const struct pnfs_layout_range *range = data;
264162306a36Sopenharmony_ci	const struct cred *cred;
264262306a36Sopenharmony_ci	struct pnfs_layout_hdr *lo;
264362306a36Sopenharmony_ci	struct inode *inode;
264462306a36Sopenharmony_ci	nfs4_stateid stateid;
264562306a36Sopenharmony_ci	enum pnfs_iomode iomode;
264662306a36Sopenharmony_ci
264762306a36Sopenharmony_cirestart:
264862306a36Sopenharmony_ci	rcu_read_lock();
264962306a36Sopenharmony_ci	list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
265062306a36Sopenharmony_ci		inode = lo->plh_inode;
265162306a36Sopenharmony_ci		if (!inode || !pnfs_layout_can_be_returned(lo) ||
265262306a36Sopenharmony_ci		    test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
265362306a36Sopenharmony_ci			continue;
265462306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
265562306a36Sopenharmony_ci		if (!lo->plh_inode ||
265662306a36Sopenharmony_ci		    !pnfs_should_return_unused_layout(lo, range)) {
265762306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
265862306a36Sopenharmony_ci			continue;
265962306a36Sopenharmony_ci		}
266062306a36Sopenharmony_ci		pnfs_get_layout_hdr(lo);
266162306a36Sopenharmony_ci		pnfs_set_plh_return_info(lo, range->iomode, 0);
266262306a36Sopenharmony_ci		if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
266362306a36Sopenharmony_ci						    range, 0) != 0 ||
266462306a36Sopenharmony_ci		    !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
266562306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
266662306a36Sopenharmony_ci			rcu_read_unlock();
266762306a36Sopenharmony_ci			pnfs_put_layout_hdr(lo);
266862306a36Sopenharmony_ci			cond_resched();
266962306a36Sopenharmony_ci			goto restart;
267062306a36Sopenharmony_ci		}
267162306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
267262306a36Sopenharmony_ci		rcu_read_unlock();
267362306a36Sopenharmony_ci		pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
267462306a36Sopenharmony_ci		pnfs_put_layout_hdr(lo);
267562306a36Sopenharmony_ci		cond_resched();
267662306a36Sopenharmony_ci		goto restart;
267762306a36Sopenharmony_ci	}
267862306a36Sopenharmony_ci	rcu_read_unlock();
267962306a36Sopenharmony_ci	return 0;
268062306a36Sopenharmony_ci}
268162306a36Sopenharmony_ci
268262306a36Sopenharmony_civoid
268362306a36Sopenharmony_cipnfs_layout_return_unused_byclid(struct nfs_client *clp,
268462306a36Sopenharmony_ci				 enum pnfs_iomode iomode)
268562306a36Sopenharmony_ci{
268662306a36Sopenharmony_ci	struct pnfs_layout_range range = {
268762306a36Sopenharmony_ci		.iomode = iomode,
268862306a36Sopenharmony_ci		.offset = 0,
268962306a36Sopenharmony_ci		.length = NFS4_MAX_UINT64,
269062306a36Sopenharmony_ci	};
269162306a36Sopenharmony_ci
269262306a36Sopenharmony_ci	nfs_client_for_each_server(clp, pnfs_layout_return_unused_byserver,
269362306a36Sopenharmony_ci			&range);
269462306a36Sopenharmony_ci}
269562306a36Sopenharmony_ci
269662306a36Sopenharmony_civoid
269762306a36Sopenharmony_cipnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
269862306a36Sopenharmony_ci{
269962306a36Sopenharmony_ci	if (pgio->pg_lseg == NULL ||
270062306a36Sopenharmony_ci	    test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
270162306a36Sopenharmony_ci		return;
270262306a36Sopenharmony_ci	pnfs_put_lseg(pgio->pg_lseg);
270362306a36Sopenharmony_ci	pgio->pg_lseg = NULL;
270462306a36Sopenharmony_ci}
270562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
270662306a36Sopenharmony_ci
270762306a36Sopenharmony_ci/*
270862306a36Sopenharmony_ci * Check for any intersection between the request and the pgio->pg_lseg,
270962306a36Sopenharmony_ci * and if none, put this pgio->pg_lseg away.
271062306a36Sopenharmony_ci */
271162306a36Sopenharmony_civoid
271262306a36Sopenharmony_cipnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
271362306a36Sopenharmony_ci{
271462306a36Sopenharmony_ci	if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
271562306a36Sopenharmony_ci		pnfs_put_lseg(pgio->pg_lseg);
271662306a36Sopenharmony_ci		pgio->pg_lseg = NULL;
271762306a36Sopenharmony_ci	}
271862306a36Sopenharmony_ci}
271962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range);
272062306a36Sopenharmony_ci
272162306a36Sopenharmony_civoid
272262306a36Sopenharmony_cipnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
272362306a36Sopenharmony_ci{
272462306a36Sopenharmony_ci	u64 rd_size;
272562306a36Sopenharmony_ci
272662306a36Sopenharmony_ci	pnfs_generic_pg_check_layout(pgio);
272762306a36Sopenharmony_ci	pnfs_generic_pg_check_range(pgio, req);
272862306a36Sopenharmony_ci	if (pgio->pg_lseg == NULL) {
272962306a36Sopenharmony_ci		if (pgio->pg_dreq == NULL)
273062306a36Sopenharmony_ci			rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
273162306a36Sopenharmony_ci		else
273262306a36Sopenharmony_ci			rd_size = nfs_dreq_bytes_left(pgio->pg_dreq,
273362306a36Sopenharmony_ci						      req_offset(req));
273462306a36Sopenharmony_ci
273562306a36Sopenharmony_ci		pgio->pg_lseg =
273662306a36Sopenharmony_ci			pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
273762306a36Sopenharmony_ci					   req_offset(req), rd_size,
273862306a36Sopenharmony_ci					   IOMODE_READ, false,
273962306a36Sopenharmony_ci					   nfs_io_gfp_mask());
274062306a36Sopenharmony_ci		if (IS_ERR(pgio->pg_lseg)) {
274162306a36Sopenharmony_ci			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
274262306a36Sopenharmony_ci			pgio->pg_lseg = NULL;
274362306a36Sopenharmony_ci			return;
274462306a36Sopenharmony_ci		}
274562306a36Sopenharmony_ci	}
274662306a36Sopenharmony_ci	/* If no lseg, fall back to read through mds */
274762306a36Sopenharmony_ci	if (pgio->pg_lseg == NULL)
274862306a36Sopenharmony_ci		nfs_pageio_reset_read_mds(pgio);
274962306a36Sopenharmony_ci
275062306a36Sopenharmony_ci}
275162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
275262306a36Sopenharmony_ci
275362306a36Sopenharmony_civoid
275462306a36Sopenharmony_cipnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
275562306a36Sopenharmony_ci			   struct nfs_page *req, u64 wb_size)
275662306a36Sopenharmony_ci{
275762306a36Sopenharmony_ci	pnfs_generic_pg_check_layout(pgio);
275862306a36Sopenharmony_ci	pnfs_generic_pg_check_range(pgio, req);
275962306a36Sopenharmony_ci	if (pgio->pg_lseg == NULL) {
276062306a36Sopenharmony_ci		pgio->pg_lseg =
276162306a36Sopenharmony_ci			pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
276262306a36Sopenharmony_ci					   req_offset(req), wb_size, IOMODE_RW,
276362306a36Sopenharmony_ci					   false, nfs_io_gfp_mask());
276462306a36Sopenharmony_ci		if (IS_ERR(pgio->pg_lseg)) {
276562306a36Sopenharmony_ci			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
276662306a36Sopenharmony_ci			pgio->pg_lseg = NULL;
276762306a36Sopenharmony_ci			return;
276862306a36Sopenharmony_ci		}
276962306a36Sopenharmony_ci	}
277062306a36Sopenharmony_ci	/* If no lseg, fall back to write through mds */
277162306a36Sopenharmony_ci	if (pgio->pg_lseg == NULL)
277262306a36Sopenharmony_ci		nfs_pageio_reset_write_mds(pgio);
277362306a36Sopenharmony_ci}
277462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
277562306a36Sopenharmony_ci
277662306a36Sopenharmony_civoid
277762306a36Sopenharmony_cipnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc)
277862306a36Sopenharmony_ci{
277962306a36Sopenharmony_ci	if (desc->pg_lseg) {
278062306a36Sopenharmony_ci		pnfs_put_lseg(desc->pg_lseg);
278162306a36Sopenharmony_ci		desc->pg_lseg = NULL;
278262306a36Sopenharmony_ci	}
278362306a36Sopenharmony_ci}
278462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup);
278562306a36Sopenharmony_ci
278662306a36Sopenharmony_ci/*
278762306a36Sopenharmony_ci * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
278862306a36Sopenharmony_ci * of bytes (maximum @req->wb_bytes) that can be coalesced.
278962306a36Sopenharmony_ci */
279062306a36Sopenharmony_cisize_t
279162306a36Sopenharmony_cipnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
279262306a36Sopenharmony_ci		     struct nfs_page *prev, struct nfs_page *req)
279362306a36Sopenharmony_ci{
279462306a36Sopenharmony_ci	unsigned int size;
279562306a36Sopenharmony_ci	u64 seg_end, req_start, seg_left;
279662306a36Sopenharmony_ci
279762306a36Sopenharmony_ci	size = nfs_generic_pg_test(pgio, prev, req);
279862306a36Sopenharmony_ci	if (!size)
279962306a36Sopenharmony_ci		return 0;
280062306a36Sopenharmony_ci
280162306a36Sopenharmony_ci	/*
280262306a36Sopenharmony_ci	 * 'size' contains the number of bytes left in the current page (up
280362306a36Sopenharmony_ci	 * to the original size asked for in @req->wb_bytes).
280462306a36Sopenharmony_ci	 *
280562306a36Sopenharmony_ci	 * Calculate how many bytes are left in the layout segment
280662306a36Sopenharmony_ci	 * and if there are less bytes than 'size', return that instead.
280762306a36Sopenharmony_ci	 *
280862306a36Sopenharmony_ci	 * Please also note that 'end_offset' is actually the offset of the
280962306a36Sopenharmony_ci	 * first byte that lies outside the pnfs_layout_range. FIXME?
281062306a36Sopenharmony_ci	 *
281162306a36Sopenharmony_ci	 */
281262306a36Sopenharmony_ci	if (pgio->pg_lseg) {
281362306a36Sopenharmony_ci		seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset,
281462306a36Sopenharmony_ci				     pgio->pg_lseg->pls_range.length);
281562306a36Sopenharmony_ci		req_start = req_offset(req);
281662306a36Sopenharmony_ci
281762306a36Sopenharmony_ci		/* start of request is past the last byte of this segment */
281862306a36Sopenharmony_ci		if (req_start >= seg_end)
281962306a36Sopenharmony_ci			return 0;
282062306a36Sopenharmony_ci
282162306a36Sopenharmony_ci		/* adjust 'size' iff there are fewer bytes left in the
282262306a36Sopenharmony_ci		 * segment than what nfs_generic_pg_test returned */
282362306a36Sopenharmony_ci		seg_left = seg_end - req_start;
282462306a36Sopenharmony_ci		if (seg_left < size)
282562306a36Sopenharmony_ci			size = (unsigned int)seg_left;
282662306a36Sopenharmony_ci	}
282762306a36Sopenharmony_ci
282862306a36Sopenharmony_ci	return size;
282962306a36Sopenharmony_ci}
283062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
283162306a36Sopenharmony_ci
283262306a36Sopenharmony_ciint pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
283362306a36Sopenharmony_ci{
283462306a36Sopenharmony_ci	struct nfs_pageio_descriptor pgio;
283562306a36Sopenharmony_ci
283662306a36Sopenharmony_ci	/* Resend all requests through the MDS */
283762306a36Sopenharmony_ci	nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
283862306a36Sopenharmony_ci			      hdr->completion_ops);
283962306a36Sopenharmony_ci	return nfs_pageio_resend(&pgio, hdr);
284062306a36Sopenharmony_ci}
284162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
284262306a36Sopenharmony_ci
284362306a36Sopenharmony_cistatic void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
284462306a36Sopenharmony_ci{
284562306a36Sopenharmony_ci
284662306a36Sopenharmony_ci	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
284762306a36Sopenharmony_ci	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
284862306a36Sopenharmony_ci	    PNFS_LAYOUTRET_ON_ERROR) {
284962306a36Sopenharmony_ci		pnfs_return_layout(hdr->inode);
285062306a36Sopenharmony_ci	}
285162306a36Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
285262306a36Sopenharmony_ci		hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
285362306a36Sopenharmony_ci}
285462306a36Sopenharmony_ci
285562306a36Sopenharmony_ci/*
285662306a36Sopenharmony_ci * Called by non rpc-based layout drivers
285762306a36Sopenharmony_ci */
285862306a36Sopenharmony_civoid pnfs_ld_write_done(struct nfs_pgio_header *hdr)
285962306a36Sopenharmony_ci{
286062306a36Sopenharmony_ci	if (likely(!hdr->pnfs_error)) {
286162306a36Sopenharmony_ci		pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
286262306a36Sopenharmony_ci				hdr->mds_offset + hdr->res.count);
286362306a36Sopenharmony_ci		hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
286462306a36Sopenharmony_ci	}
286562306a36Sopenharmony_ci	trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
286662306a36Sopenharmony_ci	if (unlikely(hdr->pnfs_error))
286762306a36Sopenharmony_ci		pnfs_ld_handle_write_error(hdr);
286862306a36Sopenharmony_ci	hdr->mds_ops->rpc_release(hdr);
286962306a36Sopenharmony_ci}
287062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_ld_write_done);
287162306a36Sopenharmony_ci
287262306a36Sopenharmony_cistatic void
287362306a36Sopenharmony_cipnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
287462306a36Sopenharmony_ci		struct nfs_pgio_header *hdr)
287562306a36Sopenharmony_ci{
287662306a36Sopenharmony_ci	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
287762306a36Sopenharmony_ci
287862306a36Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
287962306a36Sopenharmony_ci		list_splice_tail_init(&hdr->pages, &mirror->pg_list);
288062306a36Sopenharmony_ci		nfs_pageio_reset_write_mds(desc);
288162306a36Sopenharmony_ci		mirror->pg_recoalesce = 1;
288262306a36Sopenharmony_ci	}
288362306a36Sopenharmony_ci	hdr->completion_ops->completion(hdr);
288462306a36Sopenharmony_ci}
288562306a36Sopenharmony_ci
288662306a36Sopenharmony_cistatic enum pnfs_try_status
288762306a36Sopenharmony_cipnfs_try_to_write_data(struct nfs_pgio_header *hdr,
288862306a36Sopenharmony_ci			const struct rpc_call_ops *call_ops,
288962306a36Sopenharmony_ci			struct pnfs_layout_segment *lseg,
289062306a36Sopenharmony_ci			int how)
289162306a36Sopenharmony_ci{
289262306a36Sopenharmony_ci	struct inode *inode = hdr->inode;
289362306a36Sopenharmony_ci	enum pnfs_try_status trypnfs;
289462306a36Sopenharmony_ci	struct nfs_server *nfss = NFS_SERVER(inode);
289562306a36Sopenharmony_ci
289662306a36Sopenharmony_ci	hdr->mds_ops = call_ops;
289762306a36Sopenharmony_ci
289862306a36Sopenharmony_ci	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
289962306a36Sopenharmony_ci		inode->i_ino, hdr->args.count, hdr->args.offset, how);
290062306a36Sopenharmony_ci	trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
290162306a36Sopenharmony_ci	if (trypnfs != PNFS_NOT_ATTEMPTED)
290262306a36Sopenharmony_ci		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
290362306a36Sopenharmony_ci	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
290462306a36Sopenharmony_ci	return trypnfs;
290562306a36Sopenharmony_ci}
290662306a36Sopenharmony_ci
290762306a36Sopenharmony_cistatic void
290862306a36Sopenharmony_cipnfs_do_write(struct nfs_pageio_descriptor *desc,
290962306a36Sopenharmony_ci	      struct nfs_pgio_header *hdr, int how)
291062306a36Sopenharmony_ci{
291162306a36Sopenharmony_ci	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
291262306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg = desc->pg_lseg;
291362306a36Sopenharmony_ci	enum pnfs_try_status trypnfs;
291462306a36Sopenharmony_ci
291562306a36Sopenharmony_ci	trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
291662306a36Sopenharmony_ci	switch (trypnfs) {
291762306a36Sopenharmony_ci	case PNFS_NOT_ATTEMPTED:
291862306a36Sopenharmony_ci		pnfs_write_through_mds(desc, hdr);
291962306a36Sopenharmony_ci		break;
292062306a36Sopenharmony_ci	case PNFS_ATTEMPTED:
292162306a36Sopenharmony_ci		break;
292262306a36Sopenharmony_ci	case PNFS_TRY_AGAIN:
292362306a36Sopenharmony_ci		/* cleanup hdr and prepare to redo pnfs */
292462306a36Sopenharmony_ci		if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
292562306a36Sopenharmony_ci			struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
292662306a36Sopenharmony_ci			list_splice_init(&hdr->pages, &mirror->pg_list);
292762306a36Sopenharmony_ci			mirror->pg_recoalesce = 1;
292862306a36Sopenharmony_ci		}
292962306a36Sopenharmony_ci		hdr->mds_ops->rpc_release(hdr);
293062306a36Sopenharmony_ci	}
293162306a36Sopenharmony_ci}
293262306a36Sopenharmony_ci
293362306a36Sopenharmony_cistatic void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
293462306a36Sopenharmony_ci{
293562306a36Sopenharmony_ci	pnfs_put_lseg(hdr->lseg);
293662306a36Sopenharmony_ci	nfs_pgio_header_free(hdr);
293762306a36Sopenharmony_ci}
293862306a36Sopenharmony_ci
293962306a36Sopenharmony_ciint
294062306a36Sopenharmony_cipnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
294162306a36Sopenharmony_ci{
294262306a36Sopenharmony_ci	struct nfs_pgio_header *hdr;
294362306a36Sopenharmony_ci	int ret;
294462306a36Sopenharmony_ci
294562306a36Sopenharmony_ci	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
294662306a36Sopenharmony_ci	if (!hdr) {
294762306a36Sopenharmony_ci		desc->pg_error = -ENOMEM;
294862306a36Sopenharmony_ci		return desc->pg_error;
294962306a36Sopenharmony_ci	}
295062306a36Sopenharmony_ci	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
295162306a36Sopenharmony_ci
295262306a36Sopenharmony_ci	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
295362306a36Sopenharmony_ci	ret = nfs_generic_pgio(desc, hdr);
295462306a36Sopenharmony_ci	if (!ret)
295562306a36Sopenharmony_ci		pnfs_do_write(desc, hdr, desc->pg_ioflags);
295662306a36Sopenharmony_ci
295762306a36Sopenharmony_ci	return ret;
295862306a36Sopenharmony_ci}
295962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
296062306a36Sopenharmony_ci
296162306a36Sopenharmony_ciint pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
296262306a36Sopenharmony_ci{
296362306a36Sopenharmony_ci	struct nfs_pageio_descriptor pgio;
296462306a36Sopenharmony_ci
296562306a36Sopenharmony_ci	/* Resend all requests through the MDS */
296662306a36Sopenharmony_ci	nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
296762306a36Sopenharmony_ci	return nfs_pageio_resend(&pgio, hdr);
296862306a36Sopenharmony_ci}
296962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
297062306a36Sopenharmony_ci
297162306a36Sopenharmony_cistatic void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
297262306a36Sopenharmony_ci{
297362306a36Sopenharmony_ci	dprintk("pnfs read error = %d\n", hdr->pnfs_error);
297462306a36Sopenharmony_ci	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
297562306a36Sopenharmony_ci	    PNFS_LAYOUTRET_ON_ERROR) {
297662306a36Sopenharmony_ci		pnfs_return_layout(hdr->inode);
297762306a36Sopenharmony_ci	}
297862306a36Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
297962306a36Sopenharmony_ci		hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
298062306a36Sopenharmony_ci}
298162306a36Sopenharmony_ci
298262306a36Sopenharmony_ci/*
298362306a36Sopenharmony_ci * Called by non rpc-based layout drivers
298462306a36Sopenharmony_ci */
298562306a36Sopenharmony_civoid pnfs_ld_read_done(struct nfs_pgio_header *hdr)
298662306a36Sopenharmony_ci{
298762306a36Sopenharmony_ci	if (likely(!hdr->pnfs_error))
298862306a36Sopenharmony_ci		hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
298962306a36Sopenharmony_ci	trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
299062306a36Sopenharmony_ci	if (unlikely(hdr->pnfs_error))
299162306a36Sopenharmony_ci		pnfs_ld_handle_read_error(hdr);
299262306a36Sopenharmony_ci	hdr->mds_ops->rpc_release(hdr);
299362306a36Sopenharmony_ci}
299462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_ld_read_done);
299562306a36Sopenharmony_ci
299662306a36Sopenharmony_cistatic void
299762306a36Sopenharmony_cipnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
299862306a36Sopenharmony_ci		struct nfs_pgio_header *hdr)
299962306a36Sopenharmony_ci{
300062306a36Sopenharmony_ci	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
300162306a36Sopenharmony_ci
300262306a36Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
300362306a36Sopenharmony_ci		list_splice_tail_init(&hdr->pages, &mirror->pg_list);
300462306a36Sopenharmony_ci		nfs_pageio_reset_read_mds(desc);
300562306a36Sopenharmony_ci		mirror->pg_recoalesce = 1;
300662306a36Sopenharmony_ci	}
300762306a36Sopenharmony_ci	hdr->completion_ops->completion(hdr);
300862306a36Sopenharmony_ci}
300962306a36Sopenharmony_ci
301062306a36Sopenharmony_ci/*
301162306a36Sopenharmony_ci * Call the appropriate parallel I/O subsystem read function.
301262306a36Sopenharmony_ci */
301362306a36Sopenharmony_cistatic enum pnfs_try_status
301462306a36Sopenharmony_cipnfs_try_to_read_data(struct nfs_pgio_header *hdr,
301562306a36Sopenharmony_ci		       const struct rpc_call_ops *call_ops,
301662306a36Sopenharmony_ci		       struct pnfs_layout_segment *lseg)
301762306a36Sopenharmony_ci{
301862306a36Sopenharmony_ci	struct inode *inode = hdr->inode;
301962306a36Sopenharmony_ci	struct nfs_server *nfss = NFS_SERVER(inode);
302062306a36Sopenharmony_ci	enum pnfs_try_status trypnfs;
302162306a36Sopenharmony_ci
302262306a36Sopenharmony_ci	hdr->mds_ops = call_ops;
302362306a36Sopenharmony_ci
302462306a36Sopenharmony_ci	dprintk("%s: Reading ino:%lu %u@%llu\n",
302562306a36Sopenharmony_ci		__func__, inode->i_ino, hdr->args.count, hdr->args.offset);
302662306a36Sopenharmony_ci
302762306a36Sopenharmony_ci	trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
302862306a36Sopenharmony_ci	if (trypnfs != PNFS_NOT_ATTEMPTED)
302962306a36Sopenharmony_ci		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
303062306a36Sopenharmony_ci	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
303162306a36Sopenharmony_ci	return trypnfs;
303262306a36Sopenharmony_ci}
303362306a36Sopenharmony_ci
303462306a36Sopenharmony_ci/* Resend all requests through pnfs. */
303562306a36Sopenharmony_civoid pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr,
303662306a36Sopenharmony_ci			   unsigned int mirror_idx)
303762306a36Sopenharmony_ci{
303862306a36Sopenharmony_ci	struct nfs_pageio_descriptor pgio;
303962306a36Sopenharmony_ci
304062306a36Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
304162306a36Sopenharmony_ci		/* Prevent deadlocks with layoutreturn! */
304262306a36Sopenharmony_ci		pnfs_put_lseg(hdr->lseg);
304362306a36Sopenharmony_ci		hdr->lseg = NULL;
304462306a36Sopenharmony_ci
304562306a36Sopenharmony_ci		nfs_pageio_init_read(&pgio, hdr->inode, false,
304662306a36Sopenharmony_ci					hdr->completion_ops);
304762306a36Sopenharmony_ci		pgio.pg_mirror_idx = mirror_idx;
304862306a36Sopenharmony_ci		hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr);
304962306a36Sopenharmony_ci	}
305062306a36Sopenharmony_ci}
305162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs);
305262306a36Sopenharmony_ci
305362306a36Sopenharmony_cistatic void
305462306a36Sopenharmony_cipnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
305562306a36Sopenharmony_ci{
305662306a36Sopenharmony_ci	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
305762306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg = desc->pg_lseg;
305862306a36Sopenharmony_ci	enum pnfs_try_status trypnfs;
305962306a36Sopenharmony_ci
306062306a36Sopenharmony_ci	trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
306162306a36Sopenharmony_ci	switch (trypnfs) {
306262306a36Sopenharmony_ci	case PNFS_NOT_ATTEMPTED:
306362306a36Sopenharmony_ci		pnfs_read_through_mds(desc, hdr);
306462306a36Sopenharmony_ci		break;
306562306a36Sopenharmony_ci	case PNFS_ATTEMPTED:
306662306a36Sopenharmony_ci		break;
306762306a36Sopenharmony_ci	case PNFS_TRY_AGAIN:
306862306a36Sopenharmony_ci		/* cleanup hdr and prepare to redo pnfs */
306962306a36Sopenharmony_ci		if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
307062306a36Sopenharmony_ci			struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
307162306a36Sopenharmony_ci			list_splice_init(&hdr->pages, &mirror->pg_list);
307262306a36Sopenharmony_ci			mirror->pg_recoalesce = 1;
307362306a36Sopenharmony_ci		}
307462306a36Sopenharmony_ci		hdr->mds_ops->rpc_release(hdr);
307562306a36Sopenharmony_ci	}
307662306a36Sopenharmony_ci}
307762306a36Sopenharmony_ci
307862306a36Sopenharmony_cistatic void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
307962306a36Sopenharmony_ci{
308062306a36Sopenharmony_ci	pnfs_put_lseg(hdr->lseg);
308162306a36Sopenharmony_ci	nfs_pgio_header_free(hdr);
308262306a36Sopenharmony_ci}
308362306a36Sopenharmony_ci
308462306a36Sopenharmony_ciint
308562306a36Sopenharmony_cipnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
308662306a36Sopenharmony_ci{
308762306a36Sopenharmony_ci	struct nfs_pgio_header *hdr;
308862306a36Sopenharmony_ci	int ret;
308962306a36Sopenharmony_ci
309062306a36Sopenharmony_ci	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
309162306a36Sopenharmony_ci	if (!hdr) {
309262306a36Sopenharmony_ci		desc->pg_error = -ENOMEM;
309362306a36Sopenharmony_ci		return desc->pg_error;
309462306a36Sopenharmony_ci	}
309562306a36Sopenharmony_ci	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
309662306a36Sopenharmony_ci	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
309762306a36Sopenharmony_ci	ret = nfs_generic_pgio(desc, hdr);
309862306a36Sopenharmony_ci	if (!ret)
309962306a36Sopenharmony_ci		pnfs_do_read(desc, hdr);
310062306a36Sopenharmony_ci	return ret;
310162306a36Sopenharmony_ci}
310262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
310362306a36Sopenharmony_ci
310462306a36Sopenharmony_cistatic void pnfs_clear_layoutcommitting(struct inode *inode)
310562306a36Sopenharmony_ci{
310662306a36Sopenharmony_ci	unsigned long *bitlock = &NFS_I(inode)->flags;
310762306a36Sopenharmony_ci
310862306a36Sopenharmony_ci	clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
310962306a36Sopenharmony_ci	smp_mb__after_atomic();
311062306a36Sopenharmony_ci	wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
311162306a36Sopenharmony_ci}
311262306a36Sopenharmony_ci
311362306a36Sopenharmony_ci/*
311462306a36Sopenharmony_ci * There can be multiple RW segments.
311562306a36Sopenharmony_ci */
311662306a36Sopenharmony_cistatic void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
311762306a36Sopenharmony_ci{
311862306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg;
311962306a36Sopenharmony_ci
312062306a36Sopenharmony_ci	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
312162306a36Sopenharmony_ci		if (lseg->pls_range.iomode == IOMODE_RW &&
312262306a36Sopenharmony_ci		    test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
312362306a36Sopenharmony_ci			list_add(&lseg->pls_lc_list, listp);
312462306a36Sopenharmony_ci	}
312562306a36Sopenharmony_ci}
312662306a36Sopenharmony_ci
312762306a36Sopenharmony_cistatic void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
312862306a36Sopenharmony_ci{
312962306a36Sopenharmony_ci	struct pnfs_layout_segment *lseg, *tmp;
313062306a36Sopenharmony_ci
313162306a36Sopenharmony_ci	/* Matched by references in pnfs_set_layoutcommit */
313262306a36Sopenharmony_ci	list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
313362306a36Sopenharmony_ci		list_del_init(&lseg->pls_lc_list);
313462306a36Sopenharmony_ci		pnfs_put_lseg(lseg);
313562306a36Sopenharmony_ci	}
313662306a36Sopenharmony_ci
313762306a36Sopenharmony_ci	pnfs_clear_layoutcommitting(inode);
313862306a36Sopenharmony_ci}
313962306a36Sopenharmony_ci
314062306a36Sopenharmony_civoid pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
314162306a36Sopenharmony_ci{
314262306a36Sopenharmony_ci	pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
314362306a36Sopenharmony_ci}
314462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
314562306a36Sopenharmony_ci
314662306a36Sopenharmony_civoid
314762306a36Sopenharmony_cipnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg,
314862306a36Sopenharmony_ci		loff_t end_pos)
314962306a36Sopenharmony_ci{
315062306a36Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(inode);
315162306a36Sopenharmony_ci	bool mark_as_dirty = false;
315262306a36Sopenharmony_ci
315362306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
315462306a36Sopenharmony_ci	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
315562306a36Sopenharmony_ci		nfsi->layout->plh_lwb = end_pos;
315662306a36Sopenharmony_ci		mark_as_dirty = true;
315762306a36Sopenharmony_ci		dprintk("%s: Set layoutcommit for inode %lu ",
315862306a36Sopenharmony_ci			__func__, inode->i_ino);
315962306a36Sopenharmony_ci	} else if (end_pos > nfsi->layout->plh_lwb)
316062306a36Sopenharmony_ci		nfsi->layout->plh_lwb = end_pos;
316162306a36Sopenharmony_ci	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) {
316262306a36Sopenharmony_ci		/* references matched in nfs4_layoutcommit_release */
316362306a36Sopenharmony_ci		pnfs_get_lseg(lseg);
316462306a36Sopenharmony_ci	}
316562306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
316662306a36Sopenharmony_ci	dprintk("%s: lseg %p end_pos %llu\n",
316762306a36Sopenharmony_ci		__func__, lseg, nfsi->layout->plh_lwb);
316862306a36Sopenharmony_ci
316962306a36Sopenharmony_ci	/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
317062306a36Sopenharmony_ci	 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
317162306a36Sopenharmony_ci	if (mark_as_dirty)
317262306a36Sopenharmony_ci		mark_inode_dirty_sync(inode);
317362306a36Sopenharmony_ci}
317462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
317562306a36Sopenharmony_ci
317662306a36Sopenharmony_civoid pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
317762306a36Sopenharmony_ci{
317862306a36Sopenharmony_ci	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
317962306a36Sopenharmony_ci
318062306a36Sopenharmony_ci	if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
318162306a36Sopenharmony_ci		nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
318262306a36Sopenharmony_ci	pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
318362306a36Sopenharmony_ci}
318462306a36Sopenharmony_ci
318562306a36Sopenharmony_ci/*
318662306a36Sopenharmony_ci * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
318762306a36Sopenharmony_ci * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
318862306a36Sopenharmony_ci * data to disk to allow the server to recover the data if it crashes.
318962306a36Sopenharmony_ci * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
319062306a36Sopenharmony_ci * is off, and a COMMIT is sent to a data server, or
319162306a36Sopenharmony_ci * if WRITEs to a data server return NFS_DATA_SYNC.
319262306a36Sopenharmony_ci */
319362306a36Sopenharmony_ciint
319462306a36Sopenharmony_cipnfs_layoutcommit_inode(struct inode *inode, bool sync)
319562306a36Sopenharmony_ci{
319662306a36Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
319762306a36Sopenharmony_ci	struct nfs4_layoutcommit_data *data;
319862306a36Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(inode);
319962306a36Sopenharmony_ci	loff_t end_pos;
320062306a36Sopenharmony_ci	int status;
320162306a36Sopenharmony_ci
320262306a36Sopenharmony_ci	if (!pnfs_layoutcommit_outstanding(inode))
320362306a36Sopenharmony_ci		return 0;
320462306a36Sopenharmony_ci
320562306a36Sopenharmony_ci	dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
320662306a36Sopenharmony_ci
320762306a36Sopenharmony_ci	status = -EAGAIN;
320862306a36Sopenharmony_ci	if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
320962306a36Sopenharmony_ci		if (!sync)
321062306a36Sopenharmony_ci			goto out;
321162306a36Sopenharmony_ci		status = wait_on_bit_lock_action(&nfsi->flags,
321262306a36Sopenharmony_ci				NFS_INO_LAYOUTCOMMITTING,
321362306a36Sopenharmony_ci				nfs_wait_bit_killable,
321462306a36Sopenharmony_ci				TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
321562306a36Sopenharmony_ci		if (status)
321662306a36Sopenharmony_ci			goto out;
321762306a36Sopenharmony_ci	}
321862306a36Sopenharmony_ci
321962306a36Sopenharmony_ci	status = -ENOMEM;
322062306a36Sopenharmony_ci	/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
322162306a36Sopenharmony_ci	data = kzalloc(sizeof(*data), nfs_io_gfp_mask());
322262306a36Sopenharmony_ci	if (!data)
322362306a36Sopenharmony_ci		goto clear_layoutcommitting;
322462306a36Sopenharmony_ci
322562306a36Sopenharmony_ci	status = 0;
322662306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
322762306a36Sopenharmony_ci	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
322862306a36Sopenharmony_ci		goto out_unlock;
322962306a36Sopenharmony_ci
323062306a36Sopenharmony_ci	INIT_LIST_HEAD(&data->lseg_list);
323162306a36Sopenharmony_ci	pnfs_list_write_lseg(inode, &data->lseg_list);
323262306a36Sopenharmony_ci
323362306a36Sopenharmony_ci	end_pos = nfsi->layout->plh_lwb;
323462306a36Sopenharmony_ci
323562306a36Sopenharmony_ci	nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
323662306a36Sopenharmony_ci	data->cred = get_cred(nfsi->layout->plh_lc_cred);
323762306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
323862306a36Sopenharmony_ci
323962306a36Sopenharmony_ci	data->args.inode = inode;
324062306a36Sopenharmony_ci	nfs_fattr_init(&data->fattr);
324162306a36Sopenharmony_ci	data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
324262306a36Sopenharmony_ci	data->res.fattr = &data->fattr;
324362306a36Sopenharmony_ci	if (end_pos != 0)
324462306a36Sopenharmony_ci		data->args.lastbytewritten = end_pos - 1;
324562306a36Sopenharmony_ci	else
324662306a36Sopenharmony_ci		data->args.lastbytewritten = U64_MAX;
324762306a36Sopenharmony_ci	data->res.server = NFS_SERVER(inode);
324862306a36Sopenharmony_ci
324962306a36Sopenharmony_ci	if (ld->prepare_layoutcommit) {
325062306a36Sopenharmony_ci		status = ld->prepare_layoutcommit(&data->args);
325162306a36Sopenharmony_ci		if (status) {
325262306a36Sopenharmony_ci			put_cred(data->cred);
325362306a36Sopenharmony_ci			spin_lock(&inode->i_lock);
325462306a36Sopenharmony_ci			set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
325562306a36Sopenharmony_ci			if (end_pos > nfsi->layout->plh_lwb)
325662306a36Sopenharmony_ci				nfsi->layout->plh_lwb = end_pos;
325762306a36Sopenharmony_ci			goto out_unlock;
325862306a36Sopenharmony_ci		}
325962306a36Sopenharmony_ci	}
326062306a36Sopenharmony_ci
326162306a36Sopenharmony_ci
326262306a36Sopenharmony_ci	status = nfs4_proc_layoutcommit(data, sync);
326362306a36Sopenharmony_ciout:
326462306a36Sopenharmony_ci	if (status)
326562306a36Sopenharmony_ci		mark_inode_dirty_sync(inode);
326662306a36Sopenharmony_ci	dprintk("<-- %s status %d\n", __func__, status);
326762306a36Sopenharmony_ci	return status;
326862306a36Sopenharmony_ciout_unlock:
326962306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
327062306a36Sopenharmony_ci	kfree(data);
327162306a36Sopenharmony_ciclear_layoutcommitting:
327262306a36Sopenharmony_ci	pnfs_clear_layoutcommitting(inode);
327362306a36Sopenharmony_ci	goto out;
327462306a36Sopenharmony_ci}
327562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode);
327662306a36Sopenharmony_ci
327762306a36Sopenharmony_ciint
327862306a36Sopenharmony_cipnfs_generic_sync(struct inode *inode, bool datasync)
327962306a36Sopenharmony_ci{
328062306a36Sopenharmony_ci	return pnfs_layoutcommit_inode(inode, true);
328162306a36Sopenharmony_ci}
328262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_sync);
328362306a36Sopenharmony_ci
328462306a36Sopenharmony_cistruct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
328562306a36Sopenharmony_ci{
328662306a36Sopenharmony_ci	struct nfs4_threshold *thp;
328762306a36Sopenharmony_ci
328862306a36Sopenharmony_ci	thp = kzalloc(sizeof(*thp), nfs_io_gfp_mask());
328962306a36Sopenharmony_ci	if (!thp) {
329062306a36Sopenharmony_ci		dprintk("%s mdsthreshold allocation failed\n", __func__);
329162306a36Sopenharmony_ci		return NULL;
329262306a36Sopenharmony_ci	}
329362306a36Sopenharmony_ci	return thp;
329462306a36Sopenharmony_ci}
329562306a36Sopenharmony_ci
329662306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_NFS_V4_2)
329762306a36Sopenharmony_ciint
329862306a36Sopenharmony_cipnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags)
329962306a36Sopenharmony_ci{
330062306a36Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
330162306a36Sopenharmony_ci	struct nfs_server *server = NFS_SERVER(inode);
330262306a36Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(inode);
330362306a36Sopenharmony_ci	struct nfs42_layoutstat_data *data;
330462306a36Sopenharmony_ci	struct pnfs_layout_hdr *hdr;
330562306a36Sopenharmony_ci	int status = 0;
330662306a36Sopenharmony_ci
330762306a36Sopenharmony_ci	if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats)
330862306a36Sopenharmony_ci		goto out;
330962306a36Sopenharmony_ci
331062306a36Sopenharmony_ci	if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS))
331162306a36Sopenharmony_ci		goto out;
331262306a36Sopenharmony_ci
331362306a36Sopenharmony_ci	if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags))
331462306a36Sopenharmony_ci		goto out;
331562306a36Sopenharmony_ci
331662306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
331762306a36Sopenharmony_ci	if (!NFS_I(inode)->layout) {
331862306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
331962306a36Sopenharmony_ci		goto out_clear_layoutstats;
332062306a36Sopenharmony_ci	}
332162306a36Sopenharmony_ci	hdr = NFS_I(inode)->layout;
332262306a36Sopenharmony_ci	pnfs_get_layout_hdr(hdr);
332362306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
332462306a36Sopenharmony_ci
332562306a36Sopenharmony_ci	data = kzalloc(sizeof(*data), gfp_flags);
332662306a36Sopenharmony_ci	if (!data) {
332762306a36Sopenharmony_ci		status = -ENOMEM;
332862306a36Sopenharmony_ci		goto out_put;
332962306a36Sopenharmony_ci	}
333062306a36Sopenharmony_ci
333162306a36Sopenharmony_ci	data->args.fh = NFS_FH(inode);
333262306a36Sopenharmony_ci	data->args.inode = inode;
333362306a36Sopenharmony_ci	status = ld->prepare_layoutstats(&data->args);
333462306a36Sopenharmony_ci	if (status)
333562306a36Sopenharmony_ci		goto out_free;
333662306a36Sopenharmony_ci
333762306a36Sopenharmony_ci	status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data);
333862306a36Sopenharmony_ci
333962306a36Sopenharmony_ciout:
334062306a36Sopenharmony_ci	dprintk("%s returns %d\n", __func__, status);
334162306a36Sopenharmony_ci	return status;
334262306a36Sopenharmony_ci
334362306a36Sopenharmony_ciout_free:
334462306a36Sopenharmony_ci	kfree(data);
334562306a36Sopenharmony_ciout_put:
334662306a36Sopenharmony_ci	pnfs_put_layout_hdr(hdr);
334762306a36Sopenharmony_ciout_clear_layoutstats:
334862306a36Sopenharmony_ci	smp_mb__before_atomic();
334962306a36Sopenharmony_ci	clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags);
335062306a36Sopenharmony_ci	smp_mb__after_atomic();
335162306a36Sopenharmony_ci	goto out;
335262306a36Sopenharmony_ci}
335362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_report_layoutstat);
335462306a36Sopenharmony_ci#endif
335562306a36Sopenharmony_ci
335662306a36Sopenharmony_ciunsigned int layoutstats_timer;
335762306a36Sopenharmony_cimodule_param(layoutstats_timer, uint, 0644);
335862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(layoutstats_timer);
3359