18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci *  pNFS functions to call and manage layout drivers.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci *  Copyright (c) 2002 [year of first publication]
58c2ecf20Sopenharmony_ci *  The Regents of the University of Michigan
68c2ecf20Sopenharmony_ci *  All Rights Reserved
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci *  Dean Hildebrand <dhildebz@umich.edu>
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci *  Permission is granted to use, copy, create derivative works, and
118c2ecf20Sopenharmony_ci *  redistribute this software and such derivative works for any purpose,
128c2ecf20Sopenharmony_ci *  so long as the name of the University of Michigan is not used in
138c2ecf20Sopenharmony_ci *  any advertising or publicity pertaining to the use or distribution
148c2ecf20Sopenharmony_ci *  of this software without specific, written prior authorization. If
158c2ecf20Sopenharmony_ci *  the above copyright notice or any other identification of the
168c2ecf20Sopenharmony_ci *  University of Michigan is included in any copy of any portion of
178c2ecf20Sopenharmony_ci *  this software, then the disclaimer below must also be included.
188c2ecf20Sopenharmony_ci *
198c2ecf20Sopenharmony_ci *  This software is provided as is, without representation or warranty
208c2ecf20Sopenharmony_ci *  of any kind either express or implied, including without limitation
218c2ecf20Sopenharmony_ci *  the implied warranties of merchantability, fitness for a particular
228c2ecf20Sopenharmony_ci *  purpose, or noninfringement.  The Regents of the University of
238c2ecf20Sopenharmony_ci *  Michigan shall not be liable for any damages, including special,
248c2ecf20Sopenharmony_ci *  indirect, incidental, or consequential damages, with respect to any
258c2ecf20Sopenharmony_ci *  claim arising out of or in connection with the use of the software,
268c2ecf20Sopenharmony_ci *  even if it has been or is hereafter advised of the possibility of
278c2ecf20Sopenharmony_ci *  such damages.
288c2ecf20Sopenharmony_ci */
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci#include <linux/nfs_fs.h>
318c2ecf20Sopenharmony_ci#include <linux/nfs_page.h>
328c2ecf20Sopenharmony_ci#include <linux/module.h>
338c2ecf20Sopenharmony_ci#include <linux/sort.h>
348c2ecf20Sopenharmony_ci#include "internal.h"
358c2ecf20Sopenharmony_ci#include "pnfs.h"
368c2ecf20Sopenharmony_ci#include "iostat.h"
378c2ecf20Sopenharmony_ci#include "nfs4trace.h"
388c2ecf20Sopenharmony_ci#include "delegation.h"
398c2ecf20Sopenharmony_ci#include "nfs42.h"
408c2ecf20Sopenharmony_ci#include "nfs4_fs.h"
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci#define NFSDBG_FACILITY		NFSDBG_PNFS
438c2ecf20Sopenharmony_ci#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci/* Locking:
468c2ecf20Sopenharmony_ci *
478c2ecf20Sopenharmony_ci * pnfs_spinlock:
488c2ecf20Sopenharmony_ci *      protects pnfs_modules_tbl.
498c2ecf20Sopenharmony_ci */
508c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(pnfs_spinlock);
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci/*
538c2ecf20Sopenharmony_ci * pnfs_modules_tbl holds all pnfs modules
548c2ecf20Sopenharmony_ci */
558c2ecf20Sopenharmony_cistatic LIST_HEAD(pnfs_modules_tbl);
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_cistatic void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
588c2ecf20Sopenharmony_cistatic void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
598c2ecf20Sopenharmony_ci		struct list_head *free_me,
608c2ecf20Sopenharmony_ci		const struct pnfs_layout_range *range,
618c2ecf20Sopenharmony_ci		u32 seq);
628c2ecf20Sopenharmony_cistatic bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
638c2ecf20Sopenharmony_ci		                struct list_head *tmp_list);
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci/* Return the registered pnfs layout driver module matching given id */
668c2ecf20Sopenharmony_cistatic struct pnfs_layoutdriver_type *
678c2ecf20Sopenharmony_cifind_pnfs_driver_locked(u32 id)
688c2ecf20Sopenharmony_ci{
698c2ecf20Sopenharmony_ci	struct pnfs_layoutdriver_type *local;
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
728c2ecf20Sopenharmony_ci		if (local->id == id)
738c2ecf20Sopenharmony_ci			goto out;
748c2ecf20Sopenharmony_ci	local = NULL;
758c2ecf20Sopenharmony_ciout:
768c2ecf20Sopenharmony_ci	dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
778c2ecf20Sopenharmony_ci	return local;
788c2ecf20Sopenharmony_ci}
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_cistatic struct pnfs_layoutdriver_type *
818c2ecf20Sopenharmony_cifind_pnfs_driver(u32 id)
828c2ecf20Sopenharmony_ci{
838c2ecf20Sopenharmony_ci	struct pnfs_layoutdriver_type *local;
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci	spin_lock(&pnfs_spinlock);
868c2ecf20Sopenharmony_ci	local = find_pnfs_driver_locked(id);
878c2ecf20Sopenharmony_ci	if (local != NULL && !try_module_get(local->owner)) {
888c2ecf20Sopenharmony_ci		dprintk("%s: Could not grab reference on module\n", __func__);
898c2ecf20Sopenharmony_ci		local = NULL;
908c2ecf20Sopenharmony_ci	}
918c2ecf20Sopenharmony_ci	spin_unlock(&pnfs_spinlock);
928c2ecf20Sopenharmony_ci	return local;
938c2ecf20Sopenharmony_ci}
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ciconst struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id)
968c2ecf20Sopenharmony_ci{
978c2ecf20Sopenharmony_ci	return find_pnfs_driver(id);
988c2ecf20Sopenharmony_ci}
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_civoid pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld)
1018c2ecf20Sopenharmony_ci{
1028c2ecf20Sopenharmony_ci	if (ld)
1038c2ecf20Sopenharmony_ci		module_put(ld->owner);
1048c2ecf20Sopenharmony_ci}
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_civoid
1078c2ecf20Sopenharmony_ciunset_pnfs_layoutdriver(struct nfs_server *nfss)
1088c2ecf20Sopenharmony_ci{
1098c2ecf20Sopenharmony_ci	if (nfss->pnfs_curr_ld) {
1108c2ecf20Sopenharmony_ci		if (nfss->pnfs_curr_ld->clear_layoutdriver)
1118c2ecf20Sopenharmony_ci			nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
1128c2ecf20Sopenharmony_ci		/* Decrement the MDS count. Purge the deviceid cache if zero */
1138c2ecf20Sopenharmony_ci		if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count))
1148c2ecf20Sopenharmony_ci			nfs4_deviceid_purge_client(nfss->nfs_client);
1158c2ecf20Sopenharmony_ci		module_put(nfss->pnfs_curr_ld->owner);
1168c2ecf20Sopenharmony_ci	}
1178c2ecf20Sopenharmony_ci	nfss->pnfs_curr_ld = NULL;
1188c2ecf20Sopenharmony_ci}
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci/*
1218c2ecf20Sopenharmony_ci * When the server sends a list of layout types, we choose one in the order
1228c2ecf20Sopenharmony_ci * given in the list below.
1238c2ecf20Sopenharmony_ci *
1248c2ecf20Sopenharmony_ci * FIXME: should this list be configurable in some fashion? module param?
1258c2ecf20Sopenharmony_ci * 	  mount option? something else?
1268c2ecf20Sopenharmony_ci */
1278c2ecf20Sopenharmony_cistatic const u32 ld_prefs[] = {
1288c2ecf20Sopenharmony_ci	LAYOUT_SCSI,
1298c2ecf20Sopenharmony_ci	LAYOUT_BLOCK_VOLUME,
1308c2ecf20Sopenharmony_ci	LAYOUT_OSD2_OBJECTS,
1318c2ecf20Sopenharmony_ci	LAYOUT_FLEX_FILES,
1328c2ecf20Sopenharmony_ci	LAYOUT_NFSV4_1_FILES,
1338c2ecf20Sopenharmony_ci	0
1348c2ecf20Sopenharmony_ci};
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_cistatic int
1378c2ecf20Sopenharmony_cild_cmp(const void *e1, const void *e2)
1388c2ecf20Sopenharmony_ci{
1398c2ecf20Sopenharmony_ci	u32 ld1 = *((u32 *)e1);
1408c2ecf20Sopenharmony_ci	u32 ld2 = *((u32 *)e2);
1418c2ecf20Sopenharmony_ci	int i;
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	for (i = 0; ld_prefs[i] != 0; i++) {
1448c2ecf20Sopenharmony_ci		if (ld1 == ld_prefs[i])
1458c2ecf20Sopenharmony_ci			return -1;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci		if (ld2 == ld_prefs[i])
1488c2ecf20Sopenharmony_ci			return 1;
1498c2ecf20Sopenharmony_ci	}
1508c2ecf20Sopenharmony_ci	return 0;
1518c2ecf20Sopenharmony_ci}
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci/*
1548c2ecf20Sopenharmony_ci * Try to set the server's pnfs module to the pnfs layout type specified by id.
1558c2ecf20Sopenharmony_ci * Currently only one pNFS layout driver per filesystem is supported.
1568c2ecf20Sopenharmony_ci *
1578c2ecf20Sopenharmony_ci * @ids array of layout types supported by MDS.
1588c2ecf20Sopenharmony_ci */
1598c2ecf20Sopenharmony_civoid
1608c2ecf20Sopenharmony_ciset_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
1618c2ecf20Sopenharmony_ci		      struct nfs_fsinfo *fsinfo)
1628c2ecf20Sopenharmony_ci{
1638c2ecf20Sopenharmony_ci	struct pnfs_layoutdriver_type *ld_type = NULL;
1648c2ecf20Sopenharmony_ci	u32 id;
1658c2ecf20Sopenharmony_ci	int i;
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci	if (fsinfo->nlayouttypes == 0)
1688c2ecf20Sopenharmony_ci		goto out_no_driver;
1698c2ecf20Sopenharmony_ci	if (!(server->nfs_client->cl_exchange_flags &
1708c2ecf20Sopenharmony_ci		 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
1718c2ecf20Sopenharmony_ci		printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n",
1728c2ecf20Sopenharmony_ci			__func__, server->nfs_client->cl_exchange_flags);
1738c2ecf20Sopenharmony_ci		goto out_no_driver;
1748c2ecf20Sopenharmony_ci	}
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	sort(fsinfo->layouttype, fsinfo->nlayouttypes,
1778c2ecf20Sopenharmony_ci		sizeof(*fsinfo->layouttype), ld_cmp, NULL);
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	for (i = 0; i < fsinfo->nlayouttypes; i++) {
1808c2ecf20Sopenharmony_ci		id = fsinfo->layouttype[i];
1818c2ecf20Sopenharmony_ci		ld_type = find_pnfs_driver(id);
1828c2ecf20Sopenharmony_ci		if (!ld_type) {
1838c2ecf20Sopenharmony_ci			request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX,
1848c2ecf20Sopenharmony_ci					id);
1858c2ecf20Sopenharmony_ci			ld_type = find_pnfs_driver(id);
1868c2ecf20Sopenharmony_ci		}
1878c2ecf20Sopenharmony_ci		if (ld_type)
1888c2ecf20Sopenharmony_ci			break;
1898c2ecf20Sopenharmony_ci	}
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	if (!ld_type) {
1928c2ecf20Sopenharmony_ci		dprintk("%s: No pNFS module found!\n", __func__);
1938c2ecf20Sopenharmony_ci		goto out_no_driver;
1948c2ecf20Sopenharmony_ci	}
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	server->pnfs_curr_ld = ld_type;
1978c2ecf20Sopenharmony_ci	if (ld_type->set_layoutdriver
1988c2ecf20Sopenharmony_ci	    && ld_type->set_layoutdriver(server, mntfh)) {
1998c2ecf20Sopenharmony_ci		printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
2008c2ecf20Sopenharmony_ci			"driver %u.\n", __func__, id);
2018c2ecf20Sopenharmony_ci		module_put(ld_type->owner);
2028c2ecf20Sopenharmony_ci		goto out_no_driver;
2038c2ecf20Sopenharmony_ci	}
2048c2ecf20Sopenharmony_ci	/* Bump the MDS count */
2058c2ecf20Sopenharmony_ci	atomic_inc(&server->nfs_client->cl_mds_count);
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	dprintk("%s: pNFS module for %u set\n", __func__, id);
2088c2ecf20Sopenharmony_ci	return;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ciout_no_driver:
2118c2ecf20Sopenharmony_ci	dprintk("%s: Using NFSv4 I/O\n", __func__);
2128c2ecf20Sopenharmony_ci	server->pnfs_curr_ld = NULL;
2138c2ecf20Sopenharmony_ci}
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ciint
2168c2ecf20Sopenharmony_cipnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
2178c2ecf20Sopenharmony_ci{
2188c2ecf20Sopenharmony_ci	int status = -EINVAL;
2198c2ecf20Sopenharmony_ci	struct pnfs_layoutdriver_type *tmp;
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	if (ld_type->id == 0) {
2228c2ecf20Sopenharmony_ci		printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__);
2238c2ecf20Sopenharmony_ci		return status;
2248c2ecf20Sopenharmony_ci	}
2258c2ecf20Sopenharmony_ci	if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
2268c2ecf20Sopenharmony_ci		printk(KERN_ERR "NFS: %s Layout driver must provide "
2278c2ecf20Sopenharmony_ci		       "alloc_lseg and free_lseg.\n", __func__);
2288c2ecf20Sopenharmony_ci		return status;
2298c2ecf20Sopenharmony_ci	}
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	spin_lock(&pnfs_spinlock);
2328c2ecf20Sopenharmony_ci	tmp = find_pnfs_driver_locked(ld_type->id);
2338c2ecf20Sopenharmony_ci	if (!tmp) {
2348c2ecf20Sopenharmony_ci		list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
2358c2ecf20Sopenharmony_ci		status = 0;
2368c2ecf20Sopenharmony_ci		dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
2378c2ecf20Sopenharmony_ci			ld_type->name);
2388c2ecf20Sopenharmony_ci	} else {
2398c2ecf20Sopenharmony_ci		printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n",
2408c2ecf20Sopenharmony_ci			__func__, ld_type->id);
2418c2ecf20Sopenharmony_ci	}
2428c2ecf20Sopenharmony_ci	spin_unlock(&pnfs_spinlock);
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci	return status;
2458c2ecf20Sopenharmony_ci}
2468c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_civoid
2498c2ecf20Sopenharmony_cipnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
2508c2ecf20Sopenharmony_ci{
2518c2ecf20Sopenharmony_ci	dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
2528c2ecf20Sopenharmony_ci	spin_lock(&pnfs_spinlock);
2538c2ecf20Sopenharmony_ci	list_del(&ld_type->pnfs_tblid);
2548c2ecf20Sopenharmony_ci	spin_unlock(&pnfs_spinlock);
2558c2ecf20Sopenharmony_ci}
2568c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci/*
2598c2ecf20Sopenharmony_ci * pNFS client layout cache
2608c2ecf20Sopenharmony_ci */
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci/* Need to hold i_lock if caller does not already hold reference */
2638c2ecf20Sopenharmony_civoid
2648c2ecf20Sopenharmony_cipnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)
2658c2ecf20Sopenharmony_ci{
2668c2ecf20Sopenharmony_ci	refcount_inc(&lo->plh_refcount);
2678c2ecf20Sopenharmony_ci}
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_cistatic struct pnfs_layout_hdr *
2708c2ecf20Sopenharmony_cipnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
2718c2ecf20Sopenharmony_ci{
2728c2ecf20Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
2738c2ecf20Sopenharmony_ci	return ld->alloc_layout_hdr(ino, gfp_flags);
2748c2ecf20Sopenharmony_ci}
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_cistatic void
2778c2ecf20Sopenharmony_cipnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
2788c2ecf20Sopenharmony_ci{
2798c2ecf20Sopenharmony_ci	struct nfs_server *server = NFS_SERVER(lo->plh_inode);
2808c2ecf20Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci	if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) {
2838c2ecf20Sopenharmony_ci		struct nfs_client *clp = server->nfs_client;
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci		spin_lock(&clp->cl_lock);
2868c2ecf20Sopenharmony_ci		list_del_rcu(&lo->plh_layouts);
2878c2ecf20Sopenharmony_ci		spin_unlock(&clp->cl_lock);
2888c2ecf20Sopenharmony_ci	}
2898c2ecf20Sopenharmony_ci	put_cred(lo->plh_lc_cred);
2908c2ecf20Sopenharmony_ci	return ld->free_layout_hdr(lo);
2918c2ecf20Sopenharmony_ci}
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_cistatic void
2948c2ecf20Sopenharmony_cipnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
2958c2ecf20Sopenharmony_ci{
2968c2ecf20Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(lo->plh_inode);
2978c2ecf20Sopenharmony_ci	dprintk("%s: freeing layout cache %p\n", __func__, lo);
2988c2ecf20Sopenharmony_ci	nfsi->layout = NULL;
2998c2ecf20Sopenharmony_ci	/* Reset MDS Threshold I/O counters */
3008c2ecf20Sopenharmony_ci	nfsi->write_io = 0;
3018c2ecf20Sopenharmony_ci	nfsi->read_io = 0;
3028c2ecf20Sopenharmony_ci}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_civoid
3058c2ecf20Sopenharmony_cipnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
3068c2ecf20Sopenharmony_ci{
3078c2ecf20Sopenharmony_ci	struct inode *inode;
3088c2ecf20Sopenharmony_ci	unsigned long i_state;
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	if (!lo)
3118c2ecf20Sopenharmony_ci		return;
3128c2ecf20Sopenharmony_ci	inode = lo->plh_inode;
3138c2ecf20Sopenharmony_ci	pnfs_layoutreturn_before_put_layout_hdr(lo);
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ci	if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
3168c2ecf20Sopenharmony_ci		if (!list_empty(&lo->plh_segs))
3178c2ecf20Sopenharmony_ci			WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
3188c2ecf20Sopenharmony_ci		pnfs_detach_layout_hdr(lo);
3198c2ecf20Sopenharmony_ci		i_state = inode->i_state;
3208c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
3218c2ecf20Sopenharmony_ci		pnfs_free_layout_hdr(lo);
3228c2ecf20Sopenharmony_ci		/* Notify pnfs_destroy_layout_final() that we're done */
3238c2ecf20Sopenharmony_ci		if (i_state & (I_FREEING | I_CLEAR))
3248c2ecf20Sopenharmony_ci			wake_up_var(lo);
3258c2ecf20Sopenharmony_ci	}
3268c2ecf20Sopenharmony_ci}
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_cistatic struct inode *
3298c2ecf20Sopenharmony_cipnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo)
3308c2ecf20Sopenharmony_ci{
3318c2ecf20Sopenharmony_ci	struct inode *inode = igrab(lo->plh_inode);
3328c2ecf20Sopenharmony_ci	if (inode)
3338c2ecf20Sopenharmony_ci		return inode;
3348c2ecf20Sopenharmony_ci	set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
3358c2ecf20Sopenharmony_ci	return NULL;
3368c2ecf20Sopenharmony_ci}
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci/*
3398c2ecf20Sopenharmony_ci * Compare 2 layout stateid sequence ids, to see which is newer,
3408c2ecf20Sopenharmony_ci * taking into account wraparound issues.
3418c2ecf20Sopenharmony_ci */
3428c2ecf20Sopenharmony_cistatic bool pnfs_seqid_is_newer(u32 s1, u32 s2)
3438c2ecf20Sopenharmony_ci{
3448c2ecf20Sopenharmony_ci	return (s32)(s1 - s2) > 0;
3458c2ecf20Sopenharmony_ci}
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_cistatic void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq)
3488c2ecf20Sopenharmony_ci{
3498c2ecf20Sopenharmony_ci	if (pnfs_seqid_is_newer(newseq, lo->plh_barrier) || !lo->plh_barrier)
3508c2ecf20Sopenharmony_ci		lo->plh_barrier = newseq;
3518c2ecf20Sopenharmony_ci}
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_cistatic void
3548c2ecf20Sopenharmony_cipnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
3558c2ecf20Sopenharmony_ci			 u32 seq)
3568c2ecf20Sopenharmony_ci{
3578c2ecf20Sopenharmony_ci	if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode)
3588c2ecf20Sopenharmony_ci		iomode = IOMODE_ANY;
3598c2ecf20Sopenharmony_ci	lo->plh_return_iomode = iomode;
3608c2ecf20Sopenharmony_ci	set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
3618c2ecf20Sopenharmony_ci	/*
3628c2ecf20Sopenharmony_ci	 * We must set lo->plh_return_seq to avoid livelocks with
3638c2ecf20Sopenharmony_ci	 * pnfs_layout_need_return()
3648c2ecf20Sopenharmony_ci	 */
3658c2ecf20Sopenharmony_ci	if (seq == 0)
3668c2ecf20Sopenharmony_ci		seq = be32_to_cpu(lo->plh_stateid.seqid);
3678c2ecf20Sopenharmony_ci	if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq))
3688c2ecf20Sopenharmony_ci		lo->plh_return_seq = seq;
3698c2ecf20Sopenharmony_ci	pnfs_barrier_update(lo, seq);
3708c2ecf20Sopenharmony_ci}
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_cistatic void
3738c2ecf20Sopenharmony_cipnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
3748c2ecf20Sopenharmony_ci{
3758c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg;
3768c2ecf20Sopenharmony_ci	lo->plh_return_iomode = 0;
3778c2ecf20Sopenharmony_ci	lo->plh_return_seq = 0;
3788c2ecf20Sopenharmony_ci	clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
3798c2ecf20Sopenharmony_ci	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
3808c2ecf20Sopenharmony_ci		if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
3818c2ecf20Sopenharmony_ci			continue;
3828c2ecf20Sopenharmony_ci		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
3838c2ecf20Sopenharmony_ci	}
3848c2ecf20Sopenharmony_ci}
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_cistatic void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
3878c2ecf20Sopenharmony_ci{
3888c2ecf20Sopenharmony_ci	clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
3898c2ecf20Sopenharmony_ci	clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags);
3908c2ecf20Sopenharmony_ci	smp_mb__after_atomic();
3918c2ecf20Sopenharmony_ci	wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
3928c2ecf20Sopenharmony_ci	rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
3938c2ecf20Sopenharmony_ci}
3948c2ecf20Sopenharmony_ci
3958c2ecf20Sopenharmony_cistatic void
3968c2ecf20Sopenharmony_cipnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
3978c2ecf20Sopenharmony_ci		struct list_head *free_me)
3988c2ecf20Sopenharmony_ci{
3998c2ecf20Sopenharmony_ci	clear_bit(NFS_LSEG_ROC, &lseg->pls_flags);
4008c2ecf20Sopenharmony_ci	clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
4018c2ecf20Sopenharmony_ci	if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags))
4028c2ecf20Sopenharmony_ci		pnfs_lseg_dec_and_remove_zero(lseg, free_me);
4038c2ecf20Sopenharmony_ci	if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
4048c2ecf20Sopenharmony_ci		pnfs_lseg_dec_and_remove_zero(lseg, free_me);
4058c2ecf20Sopenharmony_ci}
4068c2ecf20Sopenharmony_ci
4078c2ecf20Sopenharmony_ci/*
4088c2ecf20Sopenharmony_ci * Update the seqid of a layout stateid after receiving
4098c2ecf20Sopenharmony_ci * NFS4ERR_OLD_STATEID
4108c2ecf20Sopenharmony_ci */
4118c2ecf20Sopenharmony_cibool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
4128c2ecf20Sopenharmony_ci		struct pnfs_layout_range *dst_range,
4138c2ecf20Sopenharmony_ci		struct inode *inode)
4148c2ecf20Sopenharmony_ci{
4158c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
4168c2ecf20Sopenharmony_ci	struct pnfs_layout_range range = {
4178c2ecf20Sopenharmony_ci		.iomode = IOMODE_ANY,
4188c2ecf20Sopenharmony_ci		.offset = 0,
4198c2ecf20Sopenharmony_ci		.length = NFS4_MAX_UINT64,
4208c2ecf20Sopenharmony_ci	};
4218c2ecf20Sopenharmony_ci	bool ret = false;
4228c2ecf20Sopenharmony_ci	LIST_HEAD(head);
4238c2ecf20Sopenharmony_ci	int err;
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
4268c2ecf20Sopenharmony_ci	lo = NFS_I(inode)->layout;
4278c2ecf20Sopenharmony_ci	if (lo &&  pnfs_layout_is_valid(lo) &&
4288c2ecf20Sopenharmony_ci	    nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
4298c2ecf20Sopenharmony_ci		/* Is our call using the most recent seqid? If so, bump it */
4308c2ecf20Sopenharmony_ci		if (!nfs4_stateid_is_newer(&lo->plh_stateid, dst)) {
4318c2ecf20Sopenharmony_ci			nfs4_stateid_seqid_inc(dst);
4328c2ecf20Sopenharmony_ci			ret = true;
4338c2ecf20Sopenharmony_ci			goto out;
4348c2ecf20Sopenharmony_ci		}
4358c2ecf20Sopenharmony_ci		/* Try to update the seqid to the most recent */
4368c2ecf20Sopenharmony_ci		err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
4378c2ecf20Sopenharmony_ci		if (err != -EBUSY) {
4388c2ecf20Sopenharmony_ci			dst->seqid = lo->plh_stateid.seqid;
4398c2ecf20Sopenharmony_ci			*dst_range = range;
4408c2ecf20Sopenharmony_ci			ret = true;
4418c2ecf20Sopenharmony_ci		}
4428c2ecf20Sopenharmony_ci	}
4438c2ecf20Sopenharmony_ciout:
4448c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
4458c2ecf20Sopenharmony_ci	pnfs_free_lseg_list(&head);
4468c2ecf20Sopenharmony_ci	return ret;
4478c2ecf20Sopenharmony_ci}
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_ci/*
4508c2ecf20Sopenharmony_ci * Mark a pnfs_layout_hdr and all associated layout segments as invalid
4518c2ecf20Sopenharmony_ci *
4528c2ecf20Sopenharmony_ci * In order to continue using the pnfs_layout_hdr, a full recovery
4538c2ecf20Sopenharmony_ci * is required.
4548c2ecf20Sopenharmony_ci * Note that caller must hold inode->i_lock.
4558c2ecf20Sopenharmony_ci */
4568c2ecf20Sopenharmony_ciint
4578c2ecf20Sopenharmony_cipnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
4588c2ecf20Sopenharmony_ci		struct list_head *lseg_list)
4598c2ecf20Sopenharmony_ci{
4608c2ecf20Sopenharmony_ci	struct pnfs_layout_range range = {
4618c2ecf20Sopenharmony_ci		.iomode = IOMODE_ANY,
4628c2ecf20Sopenharmony_ci		.offset = 0,
4638c2ecf20Sopenharmony_ci		.length = NFS4_MAX_UINT64,
4648c2ecf20Sopenharmony_ci	};
4658c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg, *next;
4668c2ecf20Sopenharmony_ci
4678c2ecf20Sopenharmony_ci	set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
4688c2ecf20Sopenharmony_ci	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
4698c2ecf20Sopenharmony_ci		pnfs_clear_lseg_state(lseg, lseg_list);
4708c2ecf20Sopenharmony_ci	pnfs_clear_layoutreturn_info(lo);
4718c2ecf20Sopenharmony_ci	pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
4728c2ecf20Sopenharmony_ci	set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
4738c2ecf20Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
4748c2ecf20Sopenharmony_ci	    !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
4758c2ecf20Sopenharmony_ci		pnfs_clear_layoutreturn_waitbit(lo);
4768c2ecf20Sopenharmony_ci	return !list_empty(&lo->plh_segs);
4778c2ecf20Sopenharmony_ci}
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_cistatic int
4808c2ecf20Sopenharmony_cipnfs_iomode_to_fail_bit(u32 iomode)
4818c2ecf20Sopenharmony_ci{
4828c2ecf20Sopenharmony_ci	return iomode == IOMODE_RW ?
4838c2ecf20Sopenharmony_ci		NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
4848c2ecf20Sopenharmony_ci}
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_cistatic void
4878c2ecf20Sopenharmony_cipnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
4888c2ecf20Sopenharmony_ci{
4898c2ecf20Sopenharmony_ci	lo->plh_retry_timestamp = jiffies;
4908c2ecf20Sopenharmony_ci	if (!test_and_set_bit(fail_bit, &lo->plh_flags))
4918c2ecf20Sopenharmony_ci		refcount_inc(&lo->plh_refcount);
4928c2ecf20Sopenharmony_ci}
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_cistatic void
4958c2ecf20Sopenharmony_cipnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
4968c2ecf20Sopenharmony_ci{
4978c2ecf20Sopenharmony_ci	if (test_and_clear_bit(fail_bit, &lo->plh_flags))
4988c2ecf20Sopenharmony_ci		refcount_dec(&lo->plh_refcount);
4998c2ecf20Sopenharmony_ci}
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_cistatic void
5028c2ecf20Sopenharmony_cipnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
5038c2ecf20Sopenharmony_ci{
5048c2ecf20Sopenharmony_ci	struct inode *inode = lo->plh_inode;
5058c2ecf20Sopenharmony_ci	struct pnfs_layout_range range = {
5068c2ecf20Sopenharmony_ci		.iomode = iomode,
5078c2ecf20Sopenharmony_ci		.offset = 0,
5088c2ecf20Sopenharmony_ci		.length = NFS4_MAX_UINT64,
5098c2ecf20Sopenharmony_ci	};
5108c2ecf20Sopenharmony_ci	LIST_HEAD(head);
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
5138c2ecf20Sopenharmony_ci	pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
5148c2ecf20Sopenharmony_ci	pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0);
5158c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
5168c2ecf20Sopenharmony_ci	pnfs_free_lseg_list(&head);
5178c2ecf20Sopenharmony_ci	dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
5188c2ecf20Sopenharmony_ci			iomode == IOMODE_RW ?  "RW" : "READ");
5198c2ecf20Sopenharmony_ci}
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_cistatic bool
5228c2ecf20Sopenharmony_cipnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode)
5238c2ecf20Sopenharmony_ci{
5248c2ecf20Sopenharmony_ci	unsigned long start, end;
5258c2ecf20Sopenharmony_ci	int fail_bit = pnfs_iomode_to_fail_bit(iomode);
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	if (test_bit(fail_bit, &lo->plh_flags) == 0)
5288c2ecf20Sopenharmony_ci		return false;
5298c2ecf20Sopenharmony_ci	end = jiffies;
5308c2ecf20Sopenharmony_ci	start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT;
5318c2ecf20Sopenharmony_ci	if (!time_in_range(lo->plh_retry_timestamp, start, end)) {
5328c2ecf20Sopenharmony_ci		/* It is time to retry the failed layoutgets */
5338c2ecf20Sopenharmony_ci		pnfs_layout_clear_fail_bit(lo, fail_bit);
5348c2ecf20Sopenharmony_ci		return false;
5358c2ecf20Sopenharmony_ci	}
5368c2ecf20Sopenharmony_ci	return true;
5378c2ecf20Sopenharmony_ci}
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_cistatic void
5408c2ecf20Sopenharmony_cipnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg,
5418c2ecf20Sopenharmony_ci		const struct pnfs_layout_range *range,
5428c2ecf20Sopenharmony_ci		const nfs4_stateid *stateid)
5438c2ecf20Sopenharmony_ci{
5448c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&lseg->pls_list);
5458c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&lseg->pls_lc_list);
5468c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&lseg->pls_commits);
5478c2ecf20Sopenharmony_ci	refcount_set(&lseg->pls_refcount, 1);
5488c2ecf20Sopenharmony_ci	set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
5498c2ecf20Sopenharmony_ci	lseg->pls_layout = lo;
5508c2ecf20Sopenharmony_ci	lseg->pls_range = *range;
5518c2ecf20Sopenharmony_ci	lseg->pls_seq = be32_to_cpu(stateid->seqid);
5528c2ecf20Sopenharmony_ci}
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_cistatic void pnfs_free_lseg(struct pnfs_layout_segment *lseg)
5558c2ecf20Sopenharmony_ci{
5568c2ecf20Sopenharmony_ci	if (lseg != NULL) {
5578c2ecf20Sopenharmony_ci		struct inode *inode = lseg->pls_layout->plh_inode;
5588c2ecf20Sopenharmony_ci		NFS_SERVER(inode)->pnfs_curr_ld->free_lseg(lseg);
5598c2ecf20Sopenharmony_ci	}
5608c2ecf20Sopenharmony_ci}
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_cistatic void
5638c2ecf20Sopenharmony_cipnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
5648c2ecf20Sopenharmony_ci		struct pnfs_layout_segment *lseg)
5658c2ecf20Sopenharmony_ci{
5668c2ecf20Sopenharmony_ci	WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
5678c2ecf20Sopenharmony_ci	list_del_init(&lseg->pls_list);
5688c2ecf20Sopenharmony_ci	/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
5698c2ecf20Sopenharmony_ci	refcount_dec(&lo->plh_refcount);
5708c2ecf20Sopenharmony_ci	if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
5718c2ecf20Sopenharmony_ci		return;
5728c2ecf20Sopenharmony_ci	if (list_empty(&lo->plh_segs) &&
5738c2ecf20Sopenharmony_ci	    !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) &&
5748c2ecf20Sopenharmony_ci	    !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
5758c2ecf20Sopenharmony_ci		if (atomic_read(&lo->plh_outstanding) == 0)
5768c2ecf20Sopenharmony_ci			set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
5778c2ecf20Sopenharmony_ci		clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
5788c2ecf20Sopenharmony_ci	}
5798c2ecf20Sopenharmony_ci}
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_cistatic bool
5828c2ecf20Sopenharmony_cipnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr *lo,
5838c2ecf20Sopenharmony_ci		struct pnfs_layout_segment *lseg)
5848c2ecf20Sopenharmony_ci{
5858c2ecf20Sopenharmony_ci	if (test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) &&
5868c2ecf20Sopenharmony_ci	    pnfs_layout_is_valid(lo)) {
5878c2ecf20Sopenharmony_ci		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
5888c2ecf20Sopenharmony_ci		list_move_tail(&lseg->pls_list, &lo->plh_return_segs);
5898c2ecf20Sopenharmony_ci		return true;
5908c2ecf20Sopenharmony_ci	}
5918c2ecf20Sopenharmony_ci	return false;
5928c2ecf20Sopenharmony_ci}
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_civoid
5958c2ecf20Sopenharmony_cipnfs_put_lseg(struct pnfs_layout_segment *lseg)
5968c2ecf20Sopenharmony_ci{
5978c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
5988c2ecf20Sopenharmony_ci	struct inode *inode;
5998c2ecf20Sopenharmony_ci
6008c2ecf20Sopenharmony_ci	if (!lseg)
6018c2ecf20Sopenharmony_ci		return;
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_ci	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
6048c2ecf20Sopenharmony_ci		refcount_read(&lseg->pls_refcount),
6058c2ecf20Sopenharmony_ci		test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_ci	lo = lseg->pls_layout;
6088c2ecf20Sopenharmony_ci	inode = lo->plh_inode;
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_ci	if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
6118c2ecf20Sopenharmony_ci		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
6128c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
6138c2ecf20Sopenharmony_ci			return;
6148c2ecf20Sopenharmony_ci		}
6158c2ecf20Sopenharmony_ci		pnfs_get_layout_hdr(lo);
6168c2ecf20Sopenharmony_ci		pnfs_layout_remove_lseg(lo, lseg);
6178c2ecf20Sopenharmony_ci		if (pnfs_cache_lseg_for_layoutreturn(lo, lseg))
6188c2ecf20Sopenharmony_ci			lseg = NULL;
6198c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
6208c2ecf20Sopenharmony_ci		pnfs_free_lseg(lseg);
6218c2ecf20Sopenharmony_ci		pnfs_put_layout_hdr(lo);
6228c2ecf20Sopenharmony_ci	}
6238c2ecf20Sopenharmony_ci}
6248c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_put_lseg);
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci/*
6278c2ecf20Sopenharmony_ci * is l2 fully contained in l1?
6288c2ecf20Sopenharmony_ci *   start1                             end1
6298c2ecf20Sopenharmony_ci *   [----------------------------------)
6308c2ecf20Sopenharmony_ci *           start2           end2
6318c2ecf20Sopenharmony_ci *           [----------------)
6328c2ecf20Sopenharmony_ci */
6338c2ecf20Sopenharmony_cistatic bool
6348c2ecf20Sopenharmony_cipnfs_lseg_range_contained(const struct pnfs_layout_range *l1,
6358c2ecf20Sopenharmony_ci		 const struct pnfs_layout_range *l2)
6368c2ecf20Sopenharmony_ci{
6378c2ecf20Sopenharmony_ci	u64 start1 = l1->offset;
6388c2ecf20Sopenharmony_ci	u64 end1 = pnfs_end_offset(start1, l1->length);
6398c2ecf20Sopenharmony_ci	u64 start2 = l2->offset;
6408c2ecf20Sopenharmony_ci	u64 end2 = pnfs_end_offset(start2, l2->length);
6418c2ecf20Sopenharmony_ci
6428c2ecf20Sopenharmony_ci	return (start1 <= start2) && (end1 >= end2);
6438c2ecf20Sopenharmony_ci}
6448c2ecf20Sopenharmony_ci
6458c2ecf20Sopenharmony_cistatic bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
6468c2ecf20Sopenharmony_ci		struct list_head *tmp_list)
6478c2ecf20Sopenharmony_ci{
6488c2ecf20Sopenharmony_ci	if (!refcount_dec_and_test(&lseg->pls_refcount))
6498c2ecf20Sopenharmony_ci		return false;
6508c2ecf20Sopenharmony_ci	pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
6518c2ecf20Sopenharmony_ci	list_add(&lseg->pls_list, tmp_list);
6528c2ecf20Sopenharmony_ci	return true;
6538c2ecf20Sopenharmony_ci}
6548c2ecf20Sopenharmony_ci
6558c2ecf20Sopenharmony_ci/* Returns 1 if lseg is removed from list, 0 otherwise */
6568c2ecf20Sopenharmony_cistatic int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
6578c2ecf20Sopenharmony_ci			     struct list_head *tmp_list)
6588c2ecf20Sopenharmony_ci{
6598c2ecf20Sopenharmony_ci	int rv = 0;
6608c2ecf20Sopenharmony_ci
6618c2ecf20Sopenharmony_ci	if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
6628c2ecf20Sopenharmony_ci		/* Remove the reference keeping the lseg in the
6638c2ecf20Sopenharmony_ci		 * list.  It will now be removed when all
6648c2ecf20Sopenharmony_ci		 * outstanding io is finished.
6658c2ecf20Sopenharmony_ci		 */
6668c2ecf20Sopenharmony_ci		dprintk("%s: lseg %p ref %d\n", __func__, lseg,
6678c2ecf20Sopenharmony_ci			refcount_read(&lseg->pls_refcount));
6688c2ecf20Sopenharmony_ci		if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
6698c2ecf20Sopenharmony_ci			rv = 1;
6708c2ecf20Sopenharmony_ci	}
6718c2ecf20Sopenharmony_ci	return rv;
6728c2ecf20Sopenharmony_ci}
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_cistatic bool
6758c2ecf20Sopenharmony_cipnfs_should_free_range(const struct pnfs_layout_range *lseg_range,
6768c2ecf20Sopenharmony_ci		 const struct pnfs_layout_range *recall_range)
6778c2ecf20Sopenharmony_ci{
6788c2ecf20Sopenharmony_ci	return (recall_range->iomode == IOMODE_ANY ||
6798c2ecf20Sopenharmony_ci		lseg_range->iomode == recall_range->iomode) &&
6808c2ecf20Sopenharmony_ci	       pnfs_lseg_range_intersecting(lseg_range, recall_range);
6818c2ecf20Sopenharmony_ci}
6828c2ecf20Sopenharmony_ci
6838c2ecf20Sopenharmony_cistatic bool
6848c2ecf20Sopenharmony_cipnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg,
6858c2ecf20Sopenharmony_ci		const struct pnfs_layout_range *recall_range,
6868c2ecf20Sopenharmony_ci		u32 seq)
6878c2ecf20Sopenharmony_ci{
6888c2ecf20Sopenharmony_ci	if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq))
6898c2ecf20Sopenharmony_ci		return false;
6908c2ecf20Sopenharmony_ci	if (recall_range == NULL)
6918c2ecf20Sopenharmony_ci		return true;
6928c2ecf20Sopenharmony_ci	return pnfs_should_free_range(&lseg->pls_range, recall_range);
6938c2ecf20Sopenharmony_ci}
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_ci/**
6968c2ecf20Sopenharmony_ci * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later
6978c2ecf20Sopenharmony_ci * @lo: layout header containing the lsegs
6988c2ecf20Sopenharmony_ci * @tmp_list: list head where doomed lsegs should go
6998c2ecf20Sopenharmony_ci * @recall_range: optional recall range argument to match (may be NULL)
7008c2ecf20Sopenharmony_ci * @seq: only invalidate lsegs obtained prior to this sequence (may be 0)
7018c2ecf20Sopenharmony_ci *
7028c2ecf20Sopenharmony_ci * Walk the list of lsegs in the layout header, and tear down any that should
7038c2ecf20Sopenharmony_ci * be destroyed. If "recall_range" is specified then the segment must match
7048c2ecf20Sopenharmony_ci * that range. If "seq" is non-zero, then only match segments that were handed
7058c2ecf20Sopenharmony_ci * out at or before that sequence.
7068c2ecf20Sopenharmony_ci *
7078c2ecf20Sopenharmony_ci * Returns number of matching invalid lsegs remaining in list after scanning
7088c2ecf20Sopenharmony_ci * it and purging them.
7098c2ecf20Sopenharmony_ci */
7108c2ecf20Sopenharmony_ciint
7118c2ecf20Sopenharmony_cipnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
7128c2ecf20Sopenharmony_ci			    struct list_head *tmp_list,
7138c2ecf20Sopenharmony_ci			    const struct pnfs_layout_range *recall_range,
7148c2ecf20Sopenharmony_ci			    u32 seq)
7158c2ecf20Sopenharmony_ci{
7168c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg, *next;
7178c2ecf20Sopenharmony_ci	int remaining = 0;
7188c2ecf20Sopenharmony_ci
7198c2ecf20Sopenharmony_ci	dprintk("%s:Begin lo %p\n", __func__, lo);
7208c2ecf20Sopenharmony_ci
7218c2ecf20Sopenharmony_ci	if (list_empty(&lo->plh_segs))
7228c2ecf20Sopenharmony_ci		return 0;
7238c2ecf20Sopenharmony_ci	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
7248c2ecf20Sopenharmony_ci		if (pnfs_match_lseg_recall(lseg, recall_range, seq)) {
7258c2ecf20Sopenharmony_ci			dprintk("%s: freeing lseg %p iomode %d seq %u "
7268c2ecf20Sopenharmony_ci				"offset %llu length %llu\n", __func__,
7278c2ecf20Sopenharmony_ci				lseg, lseg->pls_range.iomode, lseg->pls_seq,
7288c2ecf20Sopenharmony_ci				lseg->pls_range.offset, lseg->pls_range.length);
7298c2ecf20Sopenharmony_ci			if (!mark_lseg_invalid(lseg, tmp_list))
7308c2ecf20Sopenharmony_ci				remaining++;
7318c2ecf20Sopenharmony_ci		}
7328c2ecf20Sopenharmony_ci	dprintk("%s:Return %i\n", __func__, remaining);
7338c2ecf20Sopenharmony_ci	return remaining;
7348c2ecf20Sopenharmony_ci}
7358c2ecf20Sopenharmony_ci
7368c2ecf20Sopenharmony_cistatic void
7378c2ecf20Sopenharmony_cipnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
7388c2ecf20Sopenharmony_ci		struct list_head *free_me,
7398c2ecf20Sopenharmony_ci		const struct pnfs_layout_range *range,
7408c2ecf20Sopenharmony_ci		u32 seq)
7418c2ecf20Sopenharmony_ci{
7428c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg, *next;
7438c2ecf20Sopenharmony_ci
7448c2ecf20Sopenharmony_ci	list_for_each_entry_safe(lseg, next, &lo->plh_return_segs, pls_list) {
7458c2ecf20Sopenharmony_ci		if (pnfs_match_lseg_recall(lseg, range, seq))
7468c2ecf20Sopenharmony_ci			list_move_tail(&lseg->pls_list, free_me);
7478c2ecf20Sopenharmony_ci	}
7488c2ecf20Sopenharmony_ci}
7498c2ecf20Sopenharmony_ci
7508c2ecf20Sopenharmony_ci/* note free_me must contain lsegs from a single layout_hdr */
7518c2ecf20Sopenharmony_civoid
7528c2ecf20Sopenharmony_cipnfs_free_lseg_list(struct list_head *free_me)
7538c2ecf20Sopenharmony_ci{
7548c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg, *tmp;
7558c2ecf20Sopenharmony_ci
7568c2ecf20Sopenharmony_ci	if (list_empty(free_me))
7578c2ecf20Sopenharmony_ci		return;
7588c2ecf20Sopenharmony_ci
7598c2ecf20Sopenharmony_ci	list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
7608c2ecf20Sopenharmony_ci		list_del(&lseg->pls_list);
7618c2ecf20Sopenharmony_ci		pnfs_free_lseg(lseg);
7628c2ecf20Sopenharmony_ci	}
7638c2ecf20Sopenharmony_ci}
7648c2ecf20Sopenharmony_ci
7658c2ecf20Sopenharmony_cistatic struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi)
7668c2ecf20Sopenharmony_ci{
7678c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
7688c2ecf20Sopenharmony_ci	LIST_HEAD(tmp_list);
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_ci	spin_lock(&nfsi->vfs_inode.i_lock);
7718c2ecf20Sopenharmony_ci	lo = nfsi->layout;
7728c2ecf20Sopenharmony_ci	if (lo) {
7738c2ecf20Sopenharmony_ci		pnfs_get_layout_hdr(lo);
7748c2ecf20Sopenharmony_ci		pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
7758c2ecf20Sopenharmony_ci		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
7768c2ecf20Sopenharmony_ci		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
7778c2ecf20Sopenharmony_ci		spin_unlock(&nfsi->vfs_inode.i_lock);
7788c2ecf20Sopenharmony_ci		pnfs_free_lseg_list(&tmp_list);
7798c2ecf20Sopenharmony_ci		nfs_commit_inode(&nfsi->vfs_inode, 0);
7808c2ecf20Sopenharmony_ci		pnfs_put_layout_hdr(lo);
7818c2ecf20Sopenharmony_ci	} else
7828c2ecf20Sopenharmony_ci		spin_unlock(&nfsi->vfs_inode.i_lock);
7838c2ecf20Sopenharmony_ci	return lo;
7848c2ecf20Sopenharmony_ci}
7858c2ecf20Sopenharmony_ci
7868c2ecf20Sopenharmony_civoid pnfs_destroy_layout(struct nfs_inode *nfsi)
7878c2ecf20Sopenharmony_ci{
7888c2ecf20Sopenharmony_ci	__pnfs_destroy_layout(nfsi);
7898c2ecf20Sopenharmony_ci}
7908c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_destroy_layout);
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_cistatic bool pnfs_layout_removed(struct nfs_inode *nfsi,
7938c2ecf20Sopenharmony_ci				struct pnfs_layout_hdr *lo)
7948c2ecf20Sopenharmony_ci{
7958c2ecf20Sopenharmony_ci	bool ret;
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci	spin_lock(&nfsi->vfs_inode.i_lock);
7988c2ecf20Sopenharmony_ci	ret = nfsi->layout != lo;
7998c2ecf20Sopenharmony_ci	spin_unlock(&nfsi->vfs_inode.i_lock);
8008c2ecf20Sopenharmony_ci	return ret;
8018c2ecf20Sopenharmony_ci}
8028c2ecf20Sopenharmony_ci
8038c2ecf20Sopenharmony_civoid pnfs_destroy_layout_final(struct nfs_inode *nfsi)
8048c2ecf20Sopenharmony_ci{
8058c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi);
8068c2ecf20Sopenharmony_ci
8078c2ecf20Sopenharmony_ci	if (lo)
8088c2ecf20Sopenharmony_ci		wait_var_event(lo, pnfs_layout_removed(nfsi, lo));
8098c2ecf20Sopenharmony_ci}
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_cistatic bool
8128c2ecf20Sopenharmony_cipnfs_layout_add_bulk_destroy_list(struct inode *inode,
8138c2ecf20Sopenharmony_ci		struct list_head *layout_list)
8148c2ecf20Sopenharmony_ci{
8158c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
8168c2ecf20Sopenharmony_ci	bool ret = false;
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
8198c2ecf20Sopenharmony_ci	lo = NFS_I(inode)->layout;
8208c2ecf20Sopenharmony_ci	if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
8218c2ecf20Sopenharmony_ci		pnfs_get_layout_hdr(lo);
8228c2ecf20Sopenharmony_ci		list_add(&lo->plh_bulk_destroy, layout_list);
8238c2ecf20Sopenharmony_ci		ret = true;
8248c2ecf20Sopenharmony_ci	}
8258c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
8268c2ecf20Sopenharmony_ci	return ret;
8278c2ecf20Sopenharmony_ci}
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_ci/* Caller must hold rcu_read_lock and clp->cl_lock */
8308c2ecf20Sopenharmony_cistatic int
8318c2ecf20Sopenharmony_cipnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
8328c2ecf20Sopenharmony_ci		struct nfs_server *server,
8338c2ecf20Sopenharmony_ci		struct list_head *layout_list)
8348c2ecf20Sopenharmony_ci	__must_hold(&clp->cl_lock)
8358c2ecf20Sopenharmony_ci	__must_hold(RCU)
8368c2ecf20Sopenharmony_ci{
8378c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo, *next;
8388c2ecf20Sopenharmony_ci	struct inode *inode;
8398c2ecf20Sopenharmony_ci
8408c2ecf20Sopenharmony_ci	list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
8418c2ecf20Sopenharmony_ci		if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
8428c2ecf20Sopenharmony_ci		    test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) ||
8438c2ecf20Sopenharmony_ci		    !list_empty(&lo->plh_bulk_destroy))
8448c2ecf20Sopenharmony_ci			continue;
8458c2ecf20Sopenharmony_ci		/* If the sb is being destroyed, just bail */
8468c2ecf20Sopenharmony_ci		if (!nfs_sb_active(server->super))
8478c2ecf20Sopenharmony_ci			break;
8488c2ecf20Sopenharmony_ci		inode = pnfs_grab_inode_layout_hdr(lo);
8498c2ecf20Sopenharmony_ci		if (inode != NULL) {
8508c2ecf20Sopenharmony_ci			if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags))
8518c2ecf20Sopenharmony_ci				list_del_rcu(&lo->plh_layouts);
8528c2ecf20Sopenharmony_ci			if (pnfs_layout_add_bulk_destroy_list(inode,
8538c2ecf20Sopenharmony_ci						layout_list))
8548c2ecf20Sopenharmony_ci				continue;
8558c2ecf20Sopenharmony_ci			rcu_read_unlock();
8568c2ecf20Sopenharmony_ci			spin_unlock(&clp->cl_lock);
8578c2ecf20Sopenharmony_ci			iput(inode);
8588c2ecf20Sopenharmony_ci		} else {
8598c2ecf20Sopenharmony_ci			rcu_read_unlock();
8608c2ecf20Sopenharmony_ci			spin_unlock(&clp->cl_lock);
8618c2ecf20Sopenharmony_ci		}
8628c2ecf20Sopenharmony_ci		nfs_sb_deactive(server->super);
8638c2ecf20Sopenharmony_ci		spin_lock(&clp->cl_lock);
8648c2ecf20Sopenharmony_ci		rcu_read_lock();
8658c2ecf20Sopenharmony_ci		return -EAGAIN;
8668c2ecf20Sopenharmony_ci	}
8678c2ecf20Sopenharmony_ci	return 0;
8688c2ecf20Sopenharmony_ci}
8698c2ecf20Sopenharmony_ci
8708c2ecf20Sopenharmony_cistatic int
8718c2ecf20Sopenharmony_cipnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
8728c2ecf20Sopenharmony_ci		bool is_bulk_recall)
8738c2ecf20Sopenharmony_ci{
8748c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
8758c2ecf20Sopenharmony_ci	struct inode *inode;
8768c2ecf20Sopenharmony_ci	LIST_HEAD(lseg_list);
8778c2ecf20Sopenharmony_ci	int ret = 0;
8788c2ecf20Sopenharmony_ci
8798c2ecf20Sopenharmony_ci	while (!list_empty(layout_list)) {
8808c2ecf20Sopenharmony_ci		lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
8818c2ecf20Sopenharmony_ci				plh_bulk_destroy);
8828c2ecf20Sopenharmony_ci		dprintk("%s freeing layout for inode %lu\n", __func__,
8838c2ecf20Sopenharmony_ci			lo->plh_inode->i_ino);
8848c2ecf20Sopenharmony_ci		inode = lo->plh_inode;
8858c2ecf20Sopenharmony_ci
8868c2ecf20Sopenharmony_ci		pnfs_layoutcommit_inode(inode, false);
8878c2ecf20Sopenharmony_ci
8888c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
8898c2ecf20Sopenharmony_ci		list_del_init(&lo->plh_bulk_destroy);
8908c2ecf20Sopenharmony_ci		if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
8918c2ecf20Sopenharmony_ci			if (is_bulk_recall)
8928c2ecf20Sopenharmony_ci				set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
8938c2ecf20Sopenharmony_ci			ret = -EAGAIN;
8948c2ecf20Sopenharmony_ci		}
8958c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
8968c2ecf20Sopenharmony_ci		pnfs_free_lseg_list(&lseg_list);
8978c2ecf20Sopenharmony_ci		/* Free all lsegs that are attached to commit buckets */
8988c2ecf20Sopenharmony_ci		nfs_commit_inode(inode, 0);
8998c2ecf20Sopenharmony_ci		pnfs_put_layout_hdr(lo);
9008c2ecf20Sopenharmony_ci		nfs_iput_and_deactive(inode);
9018c2ecf20Sopenharmony_ci	}
9028c2ecf20Sopenharmony_ci	return ret;
9038c2ecf20Sopenharmony_ci}
9048c2ecf20Sopenharmony_ci
9058c2ecf20Sopenharmony_ciint
9068c2ecf20Sopenharmony_cipnfs_destroy_layouts_byfsid(struct nfs_client *clp,
9078c2ecf20Sopenharmony_ci		struct nfs_fsid *fsid,
9088c2ecf20Sopenharmony_ci		bool is_recall)
9098c2ecf20Sopenharmony_ci{
9108c2ecf20Sopenharmony_ci	struct nfs_server *server;
9118c2ecf20Sopenharmony_ci	LIST_HEAD(layout_list);
9128c2ecf20Sopenharmony_ci
9138c2ecf20Sopenharmony_ci	spin_lock(&clp->cl_lock);
9148c2ecf20Sopenharmony_ci	rcu_read_lock();
9158c2ecf20Sopenharmony_cirestart:
9168c2ecf20Sopenharmony_ci	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
9178c2ecf20Sopenharmony_ci		if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
9188c2ecf20Sopenharmony_ci			continue;
9198c2ecf20Sopenharmony_ci		if (pnfs_layout_bulk_destroy_byserver_locked(clp,
9208c2ecf20Sopenharmony_ci				server,
9218c2ecf20Sopenharmony_ci				&layout_list) != 0)
9228c2ecf20Sopenharmony_ci			goto restart;
9238c2ecf20Sopenharmony_ci	}
9248c2ecf20Sopenharmony_ci	rcu_read_unlock();
9258c2ecf20Sopenharmony_ci	spin_unlock(&clp->cl_lock);
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_ci	if (list_empty(&layout_list))
9288c2ecf20Sopenharmony_ci		return 0;
9298c2ecf20Sopenharmony_ci	return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
9308c2ecf20Sopenharmony_ci}
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_ciint
9338c2ecf20Sopenharmony_cipnfs_destroy_layouts_byclid(struct nfs_client *clp,
9348c2ecf20Sopenharmony_ci		bool is_recall)
9358c2ecf20Sopenharmony_ci{
9368c2ecf20Sopenharmony_ci	struct nfs_server *server;
9378c2ecf20Sopenharmony_ci	LIST_HEAD(layout_list);
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ci	spin_lock(&clp->cl_lock);
9408c2ecf20Sopenharmony_ci	rcu_read_lock();
9418c2ecf20Sopenharmony_cirestart:
9428c2ecf20Sopenharmony_ci	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
9438c2ecf20Sopenharmony_ci		if (pnfs_layout_bulk_destroy_byserver_locked(clp,
9448c2ecf20Sopenharmony_ci					server,
9458c2ecf20Sopenharmony_ci					&layout_list) != 0)
9468c2ecf20Sopenharmony_ci			goto restart;
9478c2ecf20Sopenharmony_ci	}
9488c2ecf20Sopenharmony_ci	rcu_read_unlock();
9498c2ecf20Sopenharmony_ci	spin_unlock(&clp->cl_lock);
9508c2ecf20Sopenharmony_ci
9518c2ecf20Sopenharmony_ci	if (list_empty(&layout_list))
9528c2ecf20Sopenharmony_ci		return 0;
9538c2ecf20Sopenharmony_ci	return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
9548c2ecf20Sopenharmony_ci}
9558c2ecf20Sopenharmony_ci
9568c2ecf20Sopenharmony_ci/*
9578c2ecf20Sopenharmony_ci * Called by the state manager to remove all layouts established under an
9588c2ecf20Sopenharmony_ci * expired lease.
9598c2ecf20Sopenharmony_ci */
9608c2ecf20Sopenharmony_civoid
9618c2ecf20Sopenharmony_cipnfs_destroy_all_layouts(struct nfs_client *clp)
9628c2ecf20Sopenharmony_ci{
9638c2ecf20Sopenharmony_ci	nfs4_deviceid_mark_client_invalid(clp);
9648c2ecf20Sopenharmony_ci	nfs4_deviceid_purge_client(clp);
9658c2ecf20Sopenharmony_ci
9668c2ecf20Sopenharmony_ci	pnfs_destroy_layouts_byclid(clp, false);
9678c2ecf20Sopenharmony_ci}
9688c2ecf20Sopenharmony_ci
9698c2ecf20Sopenharmony_cistatic void
9708c2ecf20Sopenharmony_cipnfs_set_layout_cred(struct pnfs_layout_hdr *lo, const struct cred *cred)
9718c2ecf20Sopenharmony_ci{
9728c2ecf20Sopenharmony_ci	const struct cred *old;
9738c2ecf20Sopenharmony_ci
9748c2ecf20Sopenharmony_ci	if (cred && cred_fscmp(lo->plh_lc_cred, cred) != 0) {
9758c2ecf20Sopenharmony_ci		old = xchg(&lo->plh_lc_cred, get_cred(cred));
9768c2ecf20Sopenharmony_ci		put_cred(old);
9778c2ecf20Sopenharmony_ci	}
9788c2ecf20Sopenharmony_ci}
9798c2ecf20Sopenharmony_ci
9808c2ecf20Sopenharmony_ci/* update lo->plh_stateid with new if is more recent */
9818c2ecf20Sopenharmony_civoid
9828c2ecf20Sopenharmony_cipnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
9838c2ecf20Sopenharmony_ci			const struct cred *cred, bool update_barrier)
9848c2ecf20Sopenharmony_ci{
9858c2ecf20Sopenharmony_ci	u32 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
9868c2ecf20Sopenharmony_ci	u32 newseq = be32_to_cpu(new->seqid);
9878c2ecf20Sopenharmony_ci
9888c2ecf20Sopenharmony_ci	if (!pnfs_layout_is_valid(lo)) {
9898c2ecf20Sopenharmony_ci		pnfs_set_layout_cred(lo, cred);
9908c2ecf20Sopenharmony_ci		nfs4_stateid_copy(&lo->plh_stateid, new);
9918c2ecf20Sopenharmony_ci		lo->plh_barrier = newseq;
9928c2ecf20Sopenharmony_ci		pnfs_clear_layoutreturn_info(lo);
9938c2ecf20Sopenharmony_ci		clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
9948c2ecf20Sopenharmony_ci		return;
9958c2ecf20Sopenharmony_ci	}
9968c2ecf20Sopenharmony_ci
9978c2ecf20Sopenharmony_ci	if (pnfs_seqid_is_newer(newseq, oldseq))
9988c2ecf20Sopenharmony_ci		nfs4_stateid_copy(&lo->plh_stateid, new);
9998c2ecf20Sopenharmony_ci
10008c2ecf20Sopenharmony_ci	if (update_barrier) {
10018c2ecf20Sopenharmony_ci		pnfs_barrier_update(lo, newseq);
10028c2ecf20Sopenharmony_ci		return;
10038c2ecf20Sopenharmony_ci	}
10048c2ecf20Sopenharmony_ci	/*
10058c2ecf20Sopenharmony_ci	 * Because of wraparound, we want to keep the barrier
10068c2ecf20Sopenharmony_ci	 * "close" to the current seqids. We really only want to
10078c2ecf20Sopenharmony_ci	 * get here from a layoutget call.
10088c2ecf20Sopenharmony_ci	 */
10098c2ecf20Sopenharmony_ci	if (atomic_read(&lo->plh_outstanding) == 1)
10108c2ecf20Sopenharmony_ci		 pnfs_barrier_update(lo, be32_to_cpu(lo->plh_stateid.seqid));
10118c2ecf20Sopenharmony_ci}
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_cistatic bool
10148c2ecf20Sopenharmony_cipnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo,
10158c2ecf20Sopenharmony_ci		const nfs4_stateid *stateid)
10168c2ecf20Sopenharmony_ci{
10178c2ecf20Sopenharmony_ci	u32 seqid = be32_to_cpu(stateid->seqid);
10188c2ecf20Sopenharmony_ci
10198c2ecf20Sopenharmony_ci	return lo->plh_barrier && pnfs_seqid_is_newer(lo->plh_barrier, seqid);
10208c2ecf20Sopenharmony_ci}
10218c2ecf20Sopenharmony_ci
10228c2ecf20Sopenharmony_ci/* lget is set to 1 if called from inside send_layoutget call chain */
10238c2ecf20Sopenharmony_cistatic bool
10248c2ecf20Sopenharmony_cipnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo)
10258c2ecf20Sopenharmony_ci{
10268c2ecf20Sopenharmony_ci	return lo->plh_block_lgets ||
10278c2ecf20Sopenharmony_ci		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
10288c2ecf20Sopenharmony_ci}
10298c2ecf20Sopenharmony_ci
10308c2ecf20Sopenharmony_cistatic struct nfs_server *
10318c2ecf20Sopenharmony_cipnfs_find_server(struct inode *inode, struct nfs_open_context *ctx)
10328c2ecf20Sopenharmony_ci{
10338c2ecf20Sopenharmony_ci	struct nfs_server *server;
10348c2ecf20Sopenharmony_ci
10358c2ecf20Sopenharmony_ci	if (inode) {
10368c2ecf20Sopenharmony_ci		server = NFS_SERVER(inode);
10378c2ecf20Sopenharmony_ci	} else {
10388c2ecf20Sopenharmony_ci		struct dentry *parent_dir = dget_parent(ctx->dentry);
10398c2ecf20Sopenharmony_ci		server = NFS_SERVER(parent_dir->d_inode);
10408c2ecf20Sopenharmony_ci		dput(parent_dir);
10418c2ecf20Sopenharmony_ci	}
10428c2ecf20Sopenharmony_ci	return server;
10438c2ecf20Sopenharmony_ci}
10448c2ecf20Sopenharmony_ci
10458c2ecf20Sopenharmony_cistatic void nfs4_free_pages(struct page **pages, size_t size)
10468c2ecf20Sopenharmony_ci{
10478c2ecf20Sopenharmony_ci	int i;
10488c2ecf20Sopenharmony_ci
10498c2ecf20Sopenharmony_ci	if (!pages)
10508c2ecf20Sopenharmony_ci		return;
10518c2ecf20Sopenharmony_ci
10528c2ecf20Sopenharmony_ci	for (i = 0; i < size; i++) {
10538c2ecf20Sopenharmony_ci		if (!pages[i])
10548c2ecf20Sopenharmony_ci			break;
10558c2ecf20Sopenharmony_ci		__free_page(pages[i]);
10568c2ecf20Sopenharmony_ci	}
10578c2ecf20Sopenharmony_ci	kfree(pages);
10588c2ecf20Sopenharmony_ci}
10598c2ecf20Sopenharmony_ci
10608c2ecf20Sopenharmony_cistatic struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
10618c2ecf20Sopenharmony_ci{
10628c2ecf20Sopenharmony_ci	struct page **pages;
10638c2ecf20Sopenharmony_ci	int i;
10648c2ecf20Sopenharmony_ci
10658c2ecf20Sopenharmony_ci	pages = kmalloc_array(size, sizeof(struct page *), gfp_flags);
10668c2ecf20Sopenharmony_ci	if (!pages) {
10678c2ecf20Sopenharmony_ci		dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
10688c2ecf20Sopenharmony_ci		return NULL;
10698c2ecf20Sopenharmony_ci	}
10708c2ecf20Sopenharmony_ci
10718c2ecf20Sopenharmony_ci	for (i = 0; i < size; i++) {
10728c2ecf20Sopenharmony_ci		pages[i] = alloc_page(gfp_flags);
10738c2ecf20Sopenharmony_ci		if (!pages[i]) {
10748c2ecf20Sopenharmony_ci			dprintk("%s: failed to allocate page\n", __func__);
10758c2ecf20Sopenharmony_ci			nfs4_free_pages(pages, i);
10768c2ecf20Sopenharmony_ci			return NULL;
10778c2ecf20Sopenharmony_ci		}
10788c2ecf20Sopenharmony_ci	}
10798c2ecf20Sopenharmony_ci
10808c2ecf20Sopenharmony_ci	return pages;
10818c2ecf20Sopenharmony_ci}
10828c2ecf20Sopenharmony_ci
10838c2ecf20Sopenharmony_cistatic struct nfs4_layoutget *
10848c2ecf20Sopenharmony_cipnfs_alloc_init_layoutget_args(struct inode *ino,
10858c2ecf20Sopenharmony_ci	   struct nfs_open_context *ctx,
10868c2ecf20Sopenharmony_ci	   const nfs4_stateid *stateid,
10878c2ecf20Sopenharmony_ci	   const struct pnfs_layout_range *range,
10888c2ecf20Sopenharmony_ci	   gfp_t gfp_flags)
10898c2ecf20Sopenharmony_ci{
10908c2ecf20Sopenharmony_ci	struct nfs_server *server = pnfs_find_server(ino, ctx);
10918c2ecf20Sopenharmony_ci	size_t max_reply_sz = server->pnfs_curr_ld->max_layoutget_response;
10928c2ecf20Sopenharmony_ci	size_t max_pages = max_response_pages(server);
10938c2ecf20Sopenharmony_ci	struct nfs4_layoutget *lgp;
10948c2ecf20Sopenharmony_ci
10958c2ecf20Sopenharmony_ci	dprintk("--> %s\n", __func__);
10968c2ecf20Sopenharmony_ci
10978c2ecf20Sopenharmony_ci	lgp = kzalloc(sizeof(*lgp), gfp_flags);
10988c2ecf20Sopenharmony_ci	if (lgp == NULL)
10998c2ecf20Sopenharmony_ci		return NULL;
11008c2ecf20Sopenharmony_ci
11018c2ecf20Sopenharmony_ci	if (max_reply_sz) {
11028c2ecf20Sopenharmony_ci		size_t npages = (max_reply_sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
11038c2ecf20Sopenharmony_ci		if (npages < max_pages)
11048c2ecf20Sopenharmony_ci			max_pages = npages;
11058c2ecf20Sopenharmony_ci	}
11068c2ecf20Sopenharmony_ci
11078c2ecf20Sopenharmony_ci	lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
11088c2ecf20Sopenharmony_ci	if (!lgp->args.layout.pages) {
11098c2ecf20Sopenharmony_ci		kfree(lgp);
11108c2ecf20Sopenharmony_ci		return NULL;
11118c2ecf20Sopenharmony_ci	}
11128c2ecf20Sopenharmony_ci	lgp->args.layout.pglen = max_pages * PAGE_SIZE;
11138c2ecf20Sopenharmony_ci	lgp->res.layoutp = &lgp->args.layout;
11148c2ecf20Sopenharmony_ci
11158c2ecf20Sopenharmony_ci	/* Don't confuse uninitialised result and success */
11168c2ecf20Sopenharmony_ci	lgp->res.status = -NFS4ERR_DELAY;
11178c2ecf20Sopenharmony_ci
11188c2ecf20Sopenharmony_ci	lgp->args.minlength = PAGE_SIZE;
11198c2ecf20Sopenharmony_ci	if (lgp->args.minlength > range->length)
11208c2ecf20Sopenharmony_ci		lgp->args.minlength = range->length;
11218c2ecf20Sopenharmony_ci	if (ino) {
11228c2ecf20Sopenharmony_ci		loff_t i_size = i_size_read(ino);
11238c2ecf20Sopenharmony_ci
11248c2ecf20Sopenharmony_ci		if (range->iomode == IOMODE_READ) {
11258c2ecf20Sopenharmony_ci			if (range->offset >= i_size)
11268c2ecf20Sopenharmony_ci				lgp->args.minlength = 0;
11278c2ecf20Sopenharmony_ci			else if (i_size - range->offset < lgp->args.minlength)
11288c2ecf20Sopenharmony_ci				lgp->args.minlength = i_size - range->offset;
11298c2ecf20Sopenharmony_ci		}
11308c2ecf20Sopenharmony_ci	}
11318c2ecf20Sopenharmony_ci	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
11328c2ecf20Sopenharmony_ci	pnfs_copy_range(&lgp->args.range, range);
11338c2ecf20Sopenharmony_ci	lgp->args.type = server->pnfs_curr_ld->id;
11348c2ecf20Sopenharmony_ci	lgp->args.inode = ino;
11358c2ecf20Sopenharmony_ci	lgp->args.ctx = get_nfs_open_context(ctx);
11368c2ecf20Sopenharmony_ci	nfs4_stateid_copy(&lgp->args.stateid, stateid);
11378c2ecf20Sopenharmony_ci	lgp->gfp_flags = gfp_flags;
11388c2ecf20Sopenharmony_ci	lgp->cred = ctx->cred;
11398c2ecf20Sopenharmony_ci	return lgp;
11408c2ecf20Sopenharmony_ci}
11418c2ecf20Sopenharmony_ci
11428c2ecf20Sopenharmony_civoid pnfs_layoutget_free(struct nfs4_layoutget *lgp)
11438c2ecf20Sopenharmony_ci{
11448c2ecf20Sopenharmony_ci	size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE;
11458c2ecf20Sopenharmony_ci
11468c2ecf20Sopenharmony_ci	nfs4_free_pages(lgp->args.layout.pages, max_pages);
11478c2ecf20Sopenharmony_ci	if (lgp->args.inode)
11488c2ecf20Sopenharmony_ci		pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout);
11498c2ecf20Sopenharmony_ci	put_nfs_open_context(lgp->args.ctx);
11508c2ecf20Sopenharmony_ci	kfree(lgp);
11518c2ecf20Sopenharmony_ci}
11528c2ecf20Sopenharmony_ci
11538c2ecf20Sopenharmony_cistatic void pnfs_clear_layoutcommit(struct inode *inode,
11548c2ecf20Sopenharmony_ci		struct list_head *head)
11558c2ecf20Sopenharmony_ci{
11568c2ecf20Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(inode);
11578c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg, *tmp;
11588c2ecf20Sopenharmony_ci
11598c2ecf20Sopenharmony_ci	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
11608c2ecf20Sopenharmony_ci		return;
11618c2ecf20Sopenharmony_ci	list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
11628c2ecf20Sopenharmony_ci		if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
11638c2ecf20Sopenharmony_ci			continue;
11648c2ecf20Sopenharmony_ci		pnfs_lseg_dec_and_remove_zero(lseg, head);
11658c2ecf20Sopenharmony_ci	}
11668c2ecf20Sopenharmony_ci}
11678c2ecf20Sopenharmony_ci
11688c2ecf20Sopenharmony_civoid pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
11698c2ecf20Sopenharmony_ci		const nfs4_stateid *arg_stateid,
11708c2ecf20Sopenharmony_ci		const struct pnfs_layout_range *range,
11718c2ecf20Sopenharmony_ci		const nfs4_stateid *stateid)
11728c2ecf20Sopenharmony_ci{
11738c2ecf20Sopenharmony_ci	struct inode *inode = lo->plh_inode;
11748c2ecf20Sopenharmony_ci	LIST_HEAD(freeme);
11758c2ecf20Sopenharmony_ci
11768c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
11778c2ecf20Sopenharmony_ci	if (!pnfs_layout_is_valid(lo) || !arg_stateid ||
11788c2ecf20Sopenharmony_ci	    !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
11798c2ecf20Sopenharmony_ci		goto out_unlock;
11808c2ecf20Sopenharmony_ci	if (stateid) {
11818c2ecf20Sopenharmony_ci		u32 seq = be32_to_cpu(arg_stateid->seqid);
11828c2ecf20Sopenharmony_ci
11838c2ecf20Sopenharmony_ci		pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
11848c2ecf20Sopenharmony_ci		pnfs_free_returned_lsegs(lo, &freeme, range, seq);
11858c2ecf20Sopenharmony_ci		pnfs_set_layout_stateid(lo, stateid, NULL, true);
11868c2ecf20Sopenharmony_ci	} else
11878c2ecf20Sopenharmony_ci		pnfs_mark_layout_stateid_invalid(lo, &freeme);
11888c2ecf20Sopenharmony_ciout_unlock:
11898c2ecf20Sopenharmony_ci	pnfs_clear_layoutreturn_waitbit(lo);
11908c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
11918c2ecf20Sopenharmony_ci	pnfs_free_lseg_list(&freeme);
11928c2ecf20Sopenharmony_ci
11938c2ecf20Sopenharmony_ci}
11948c2ecf20Sopenharmony_ci
11958c2ecf20Sopenharmony_cistatic bool
11968c2ecf20Sopenharmony_cipnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
11978c2ecf20Sopenharmony_ci		nfs4_stateid *stateid,
11988c2ecf20Sopenharmony_ci		const struct cred **cred,
11998c2ecf20Sopenharmony_ci		enum pnfs_iomode *iomode)
12008c2ecf20Sopenharmony_ci{
12018c2ecf20Sopenharmony_ci	/* Serialise LAYOUTGET/LAYOUTRETURN */
12028c2ecf20Sopenharmony_ci	if (atomic_read(&lo->plh_outstanding) != 0)
12038c2ecf20Sopenharmony_ci		return false;
12048c2ecf20Sopenharmony_ci	if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
12058c2ecf20Sopenharmony_ci		return false;
12068c2ecf20Sopenharmony_ci	set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
12078c2ecf20Sopenharmony_ci	pnfs_get_layout_hdr(lo);
12088c2ecf20Sopenharmony_ci	nfs4_stateid_copy(stateid, &lo->plh_stateid);
12098c2ecf20Sopenharmony_ci	*cred = get_cred(lo->plh_lc_cred);
12108c2ecf20Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) {
12118c2ecf20Sopenharmony_ci		if (lo->plh_return_seq != 0)
12128c2ecf20Sopenharmony_ci			stateid->seqid = cpu_to_be32(lo->plh_return_seq);
12138c2ecf20Sopenharmony_ci		if (iomode != NULL)
12148c2ecf20Sopenharmony_ci			*iomode = lo->plh_return_iomode;
12158c2ecf20Sopenharmony_ci		pnfs_clear_layoutreturn_info(lo);
12168c2ecf20Sopenharmony_ci	} else if (iomode != NULL)
12178c2ecf20Sopenharmony_ci		*iomode = IOMODE_ANY;
12188c2ecf20Sopenharmony_ci	pnfs_barrier_update(lo, be32_to_cpu(stateid->seqid));
12198c2ecf20Sopenharmony_ci	return true;
12208c2ecf20Sopenharmony_ci}
12218c2ecf20Sopenharmony_ci
12228c2ecf20Sopenharmony_cistatic void
12238c2ecf20Sopenharmony_cipnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args *args,
12248c2ecf20Sopenharmony_ci		struct pnfs_layout_hdr *lo,
12258c2ecf20Sopenharmony_ci		const nfs4_stateid *stateid,
12268c2ecf20Sopenharmony_ci		enum pnfs_iomode iomode)
12278c2ecf20Sopenharmony_ci{
12288c2ecf20Sopenharmony_ci	struct inode *inode = lo->plh_inode;
12298c2ecf20Sopenharmony_ci
12308c2ecf20Sopenharmony_ci	args->layout_type = NFS_SERVER(inode)->pnfs_curr_ld->id;
12318c2ecf20Sopenharmony_ci	args->inode = inode;
12328c2ecf20Sopenharmony_ci	args->range.iomode = iomode;
12338c2ecf20Sopenharmony_ci	args->range.offset = 0;
12348c2ecf20Sopenharmony_ci	args->range.length = NFS4_MAX_UINT64;
12358c2ecf20Sopenharmony_ci	args->layout = lo;
12368c2ecf20Sopenharmony_ci	nfs4_stateid_copy(&args->stateid, stateid);
12378c2ecf20Sopenharmony_ci}
12388c2ecf20Sopenharmony_ci
12398c2ecf20Sopenharmony_cistatic int
12408c2ecf20Sopenharmony_cipnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
12418c2ecf20Sopenharmony_ci		       const nfs4_stateid *stateid,
12428c2ecf20Sopenharmony_ci		       const struct cred **pcred,
12438c2ecf20Sopenharmony_ci		       enum pnfs_iomode iomode,
12448c2ecf20Sopenharmony_ci		       bool sync)
12458c2ecf20Sopenharmony_ci{
12468c2ecf20Sopenharmony_ci	struct inode *ino = lo->plh_inode;
12478c2ecf20Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
12488c2ecf20Sopenharmony_ci	struct nfs4_layoutreturn *lrp;
12498c2ecf20Sopenharmony_ci	const struct cred *cred = *pcred;
12508c2ecf20Sopenharmony_ci	int status = 0;
12518c2ecf20Sopenharmony_ci
12528c2ecf20Sopenharmony_ci	*pcred = NULL;
12538c2ecf20Sopenharmony_ci	lrp = kzalloc(sizeof(*lrp), GFP_NOFS);
12548c2ecf20Sopenharmony_ci	if (unlikely(lrp == NULL)) {
12558c2ecf20Sopenharmony_ci		status = -ENOMEM;
12568c2ecf20Sopenharmony_ci		spin_lock(&ino->i_lock);
12578c2ecf20Sopenharmony_ci		pnfs_clear_layoutreturn_waitbit(lo);
12588c2ecf20Sopenharmony_ci		spin_unlock(&ino->i_lock);
12598c2ecf20Sopenharmony_ci		put_cred(cred);
12608c2ecf20Sopenharmony_ci		pnfs_put_layout_hdr(lo);
12618c2ecf20Sopenharmony_ci		goto out;
12628c2ecf20Sopenharmony_ci	}
12638c2ecf20Sopenharmony_ci
12648c2ecf20Sopenharmony_ci	pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode);
12658c2ecf20Sopenharmony_ci	lrp->args.ld_private = &lrp->ld_private;
12668c2ecf20Sopenharmony_ci	lrp->clp = NFS_SERVER(ino)->nfs_client;
12678c2ecf20Sopenharmony_ci	lrp->cred = cred;
12688c2ecf20Sopenharmony_ci	if (ld->prepare_layoutreturn)
12698c2ecf20Sopenharmony_ci		ld->prepare_layoutreturn(&lrp->args);
12708c2ecf20Sopenharmony_ci
12718c2ecf20Sopenharmony_ci	status = nfs4_proc_layoutreturn(lrp, sync);
12728c2ecf20Sopenharmony_ciout:
12738c2ecf20Sopenharmony_ci	dprintk("<-- %s status: %d\n", __func__, status);
12748c2ecf20Sopenharmony_ci	return status;
12758c2ecf20Sopenharmony_ci}
12768c2ecf20Sopenharmony_ci
12778c2ecf20Sopenharmony_cistatic bool
12788c2ecf20Sopenharmony_cipnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo,
12798c2ecf20Sopenharmony_ci				enum pnfs_iomode iomode,
12808c2ecf20Sopenharmony_ci				u32 seq)
12818c2ecf20Sopenharmony_ci{
12828c2ecf20Sopenharmony_ci	struct pnfs_layout_range recall_range = {
12838c2ecf20Sopenharmony_ci		.length = NFS4_MAX_UINT64,
12848c2ecf20Sopenharmony_ci		.iomode = iomode,
12858c2ecf20Sopenharmony_ci	};
12868c2ecf20Sopenharmony_ci	return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
12878c2ecf20Sopenharmony_ci					       &recall_range, seq) != -EBUSY;
12888c2ecf20Sopenharmony_ci}
12898c2ecf20Sopenharmony_ci
12908c2ecf20Sopenharmony_ci/* Return true if layoutreturn is needed */
12918c2ecf20Sopenharmony_cistatic bool
12928c2ecf20Sopenharmony_cipnfs_layout_need_return(struct pnfs_layout_hdr *lo)
12938c2ecf20Sopenharmony_ci{
12948c2ecf20Sopenharmony_ci	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
12958c2ecf20Sopenharmony_ci		return false;
12968c2ecf20Sopenharmony_ci	return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode,
12978c2ecf20Sopenharmony_ci					       lo->plh_return_seq);
12988c2ecf20Sopenharmony_ci}
12998c2ecf20Sopenharmony_ci
13008c2ecf20Sopenharmony_cistatic void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
13018c2ecf20Sopenharmony_ci{
13028c2ecf20Sopenharmony_ci	struct inode *inode= lo->plh_inode;
13038c2ecf20Sopenharmony_ci
13048c2ecf20Sopenharmony_ci	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
13058c2ecf20Sopenharmony_ci		return;
13068c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
13078c2ecf20Sopenharmony_ci	if (pnfs_layout_need_return(lo)) {
13088c2ecf20Sopenharmony_ci		const struct cred *cred;
13098c2ecf20Sopenharmony_ci		nfs4_stateid stateid;
13108c2ecf20Sopenharmony_ci		enum pnfs_iomode iomode;
13118c2ecf20Sopenharmony_ci		bool send;
13128c2ecf20Sopenharmony_ci
13138c2ecf20Sopenharmony_ci		send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
13148c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
13158c2ecf20Sopenharmony_ci		if (send) {
13168c2ecf20Sopenharmony_ci			/* Send an async layoutreturn so we dont deadlock */
13178c2ecf20Sopenharmony_ci			pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
13188c2ecf20Sopenharmony_ci		}
13198c2ecf20Sopenharmony_ci	} else
13208c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
13218c2ecf20Sopenharmony_ci}
13228c2ecf20Sopenharmony_ci
13238c2ecf20Sopenharmony_ci/*
13248c2ecf20Sopenharmony_ci * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
13258c2ecf20Sopenharmony_ci * when the layout segment list is empty.
13268c2ecf20Sopenharmony_ci *
13278c2ecf20Sopenharmony_ci * Note that a pnfs_layout_hdr can exist with an empty layout segment
13288c2ecf20Sopenharmony_ci * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the
13298c2ecf20Sopenharmony_ci * deviceid is marked invalid.
13308c2ecf20Sopenharmony_ci */
13318c2ecf20Sopenharmony_ciint
13328c2ecf20Sopenharmony_ci_pnfs_return_layout(struct inode *ino)
13338c2ecf20Sopenharmony_ci{
13348c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo = NULL;
13358c2ecf20Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(ino);
13368c2ecf20Sopenharmony_ci	struct pnfs_layout_range range = {
13378c2ecf20Sopenharmony_ci		.iomode		= IOMODE_ANY,
13388c2ecf20Sopenharmony_ci		.offset		= 0,
13398c2ecf20Sopenharmony_ci		.length		= NFS4_MAX_UINT64,
13408c2ecf20Sopenharmony_ci	};
13418c2ecf20Sopenharmony_ci	LIST_HEAD(tmp_list);
13428c2ecf20Sopenharmony_ci	const struct cred *cred;
13438c2ecf20Sopenharmony_ci	nfs4_stateid stateid;
13448c2ecf20Sopenharmony_ci	int status = 0;
13458c2ecf20Sopenharmony_ci	bool send, valid_layout;
13468c2ecf20Sopenharmony_ci
13478c2ecf20Sopenharmony_ci	dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
13488c2ecf20Sopenharmony_ci
13498c2ecf20Sopenharmony_ci	spin_lock(&ino->i_lock);
13508c2ecf20Sopenharmony_ci	lo = nfsi->layout;
13518c2ecf20Sopenharmony_ci	if (!lo) {
13528c2ecf20Sopenharmony_ci		spin_unlock(&ino->i_lock);
13538c2ecf20Sopenharmony_ci		dprintk("NFS: %s no layout to return\n", __func__);
13548c2ecf20Sopenharmony_ci		goto out;
13558c2ecf20Sopenharmony_ci	}
13568c2ecf20Sopenharmony_ci	/* Reference matched in nfs4_layoutreturn_release */
13578c2ecf20Sopenharmony_ci	pnfs_get_layout_hdr(lo);
13588c2ecf20Sopenharmony_ci	/* Is there an outstanding layoutreturn ? */
13598c2ecf20Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
13608c2ecf20Sopenharmony_ci		spin_unlock(&ino->i_lock);
13618c2ecf20Sopenharmony_ci		if (wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
13628c2ecf20Sopenharmony_ci					TASK_UNINTERRUPTIBLE))
13638c2ecf20Sopenharmony_ci			goto out_put_layout_hdr;
13648c2ecf20Sopenharmony_ci		spin_lock(&ino->i_lock);
13658c2ecf20Sopenharmony_ci	}
13668c2ecf20Sopenharmony_ci	valid_layout = pnfs_layout_is_valid(lo);
13678c2ecf20Sopenharmony_ci	pnfs_clear_layoutcommit(ino, &tmp_list);
13688c2ecf20Sopenharmony_ci	pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
13698c2ecf20Sopenharmony_ci
13708c2ecf20Sopenharmony_ci	if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
13718c2ecf20Sopenharmony_ci		NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
13728c2ecf20Sopenharmony_ci
13738c2ecf20Sopenharmony_ci	/* Don't send a LAYOUTRETURN if list was initially empty */
13748c2ecf20Sopenharmony_ci	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) ||
13758c2ecf20Sopenharmony_ci			!valid_layout) {
13768c2ecf20Sopenharmony_ci		spin_unlock(&ino->i_lock);
13778c2ecf20Sopenharmony_ci		dprintk("NFS: %s no layout segments to return\n", __func__);
13788c2ecf20Sopenharmony_ci		goto out_wait_layoutreturn;
13798c2ecf20Sopenharmony_ci	}
13808c2ecf20Sopenharmony_ci
13818c2ecf20Sopenharmony_ci	send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL);
13828c2ecf20Sopenharmony_ci	spin_unlock(&ino->i_lock);
13838c2ecf20Sopenharmony_ci	if (send)
13848c2ecf20Sopenharmony_ci		status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true);
13858c2ecf20Sopenharmony_ciout_wait_layoutreturn:
13868c2ecf20Sopenharmony_ci	wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE);
13878c2ecf20Sopenharmony_ciout_put_layout_hdr:
13888c2ecf20Sopenharmony_ci	pnfs_free_lseg_list(&tmp_list);
13898c2ecf20Sopenharmony_ci	pnfs_put_layout_hdr(lo);
13908c2ecf20Sopenharmony_ciout:
13918c2ecf20Sopenharmony_ci	dprintk("<-- %s status: %d\n", __func__, status);
13928c2ecf20Sopenharmony_ci	return status;
13938c2ecf20Sopenharmony_ci}
13948c2ecf20Sopenharmony_ci
13958c2ecf20Sopenharmony_ciint
13968c2ecf20Sopenharmony_cipnfs_commit_and_return_layout(struct inode *inode)
13978c2ecf20Sopenharmony_ci{
13988c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
13998c2ecf20Sopenharmony_ci	int ret;
14008c2ecf20Sopenharmony_ci
14018c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
14028c2ecf20Sopenharmony_ci	lo = NFS_I(inode)->layout;
14038c2ecf20Sopenharmony_ci	if (lo == NULL) {
14048c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
14058c2ecf20Sopenharmony_ci		return 0;
14068c2ecf20Sopenharmony_ci	}
14078c2ecf20Sopenharmony_ci	pnfs_get_layout_hdr(lo);
14088c2ecf20Sopenharmony_ci	/* Block new layoutgets and read/write to ds */
14098c2ecf20Sopenharmony_ci	lo->plh_block_lgets++;
14108c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
14118c2ecf20Sopenharmony_ci	filemap_fdatawait(inode->i_mapping);
14128c2ecf20Sopenharmony_ci	ret = pnfs_layoutcommit_inode(inode, true);
14138c2ecf20Sopenharmony_ci	if (ret == 0)
14148c2ecf20Sopenharmony_ci		ret = _pnfs_return_layout(inode);
14158c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
14168c2ecf20Sopenharmony_ci	lo->plh_block_lgets--;
14178c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
14188c2ecf20Sopenharmony_ci	pnfs_put_layout_hdr(lo);
14198c2ecf20Sopenharmony_ci	return ret;
14208c2ecf20Sopenharmony_ci}
14218c2ecf20Sopenharmony_ci
14228c2ecf20Sopenharmony_cibool pnfs_roc(struct inode *ino,
14238c2ecf20Sopenharmony_ci		struct nfs4_layoutreturn_args *args,
14248c2ecf20Sopenharmony_ci		struct nfs4_layoutreturn_res *res,
14258c2ecf20Sopenharmony_ci		const struct cred *cred)
14268c2ecf20Sopenharmony_ci{
14278c2ecf20Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(ino);
14288c2ecf20Sopenharmony_ci	struct nfs_open_context *ctx;
14298c2ecf20Sopenharmony_ci	struct nfs4_state *state;
14308c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
14318c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg, *next;
14328c2ecf20Sopenharmony_ci	const struct cred *lc_cred;
14338c2ecf20Sopenharmony_ci	nfs4_stateid stateid;
14348c2ecf20Sopenharmony_ci	enum pnfs_iomode iomode = 0;
14358c2ecf20Sopenharmony_ci	bool layoutreturn = false, roc = false;
14368c2ecf20Sopenharmony_ci	bool skip_read = false;
14378c2ecf20Sopenharmony_ci
14388c2ecf20Sopenharmony_ci	if (!nfs_have_layout(ino))
14398c2ecf20Sopenharmony_ci		return false;
14408c2ecf20Sopenharmony_ciretry:
14418c2ecf20Sopenharmony_ci	rcu_read_lock();
14428c2ecf20Sopenharmony_ci	spin_lock(&ino->i_lock);
14438c2ecf20Sopenharmony_ci	lo = nfsi->layout;
14448c2ecf20Sopenharmony_ci	if (!lo || !pnfs_layout_is_valid(lo) ||
14458c2ecf20Sopenharmony_ci	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
14468c2ecf20Sopenharmony_ci		lo = NULL;
14478c2ecf20Sopenharmony_ci		goto out_noroc;
14488c2ecf20Sopenharmony_ci	}
14498c2ecf20Sopenharmony_ci	pnfs_get_layout_hdr(lo);
14508c2ecf20Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
14518c2ecf20Sopenharmony_ci		spin_unlock(&ino->i_lock);
14528c2ecf20Sopenharmony_ci		rcu_read_unlock();
14538c2ecf20Sopenharmony_ci		wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
14548c2ecf20Sopenharmony_ci				TASK_UNINTERRUPTIBLE);
14558c2ecf20Sopenharmony_ci		pnfs_put_layout_hdr(lo);
14568c2ecf20Sopenharmony_ci		goto retry;
14578c2ecf20Sopenharmony_ci	}
14588c2ecf20Sopenharmony_ci
14598c2ecf20Sopenharmony_ci	/* no roc if we hold a delegation */
14608c2ecf20Sopenharmony_ci	if (nfs4_check_delegation(ino, FMODE_READ)) {
14618c2ecf20Sopenharmony_ci		if (nfs4_check_delegation(ino, FMODE_WRITE))
14628c2ecf20Sopenharmony_ci			goto out_noroc;
14638c2ecf20Sopenharmony_ci		skip_read = true;
14648c2ecf20Sopenharmony_ci	}
14658c2ecf20Sopenharmony_ci
14668c2ecf20Sopenharmony_ci	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
14678c2ecf20Sopenharmony_ci		state = ctx->state;
14688c2ecf20Sopenharmony_ci		if (state == NULL)
14698c2ecf20Sopenharmony_ci			continue;
14708c2ecf20Sopenharmony_ci		/* Don't return layout if there is open file state */
14718c2ecf20Sopenharmony_ci		if (state->state & FMODE_WRITE)
14728c2ecf20Sopenharmony_ci			goto out_noroc;
14738c2ecf20Sopenharmony_ci		if (state->state & FMODE_READ)
14748c2ecf20Sopenharmony_ci			skip_read = true;
14758c2ecf20Sopenharmony_ci	}
14768c2ecf20Sopenharmony_ci
14778c2ecf20Sopenharmony_ci
14788c2ecf20Sopenharmony_ci	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) {
14798c2ecf20Sopenharmony_ci		if (skip_read && lseg->pls_range.iomode == IOMODE_READ)
14808c2ecf20Sopenharmony_ci			continue;
14818c2ecf20Sopenharmony_ci		/* If we are sending layoutreturn, invalidate all valid lsegs */
14828c2ecf20Sopenharmony_ci		if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags))
14838c2ecf20Sopenharmony_ci			continue;
14848c2ecf20Sopenharmony_ci		/*
14858c2ecf20Sopenharmony_ci		 * Note: mark lseg for return so pnfs_layout_remove_lseg
14868c2ecf20Sopenharmony_ci		 * doesn't invalidate the layout for us.
14878c2ecf20Sopenharmony_ci		 */
14888c2ecf20Sopenharmony_ci		set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
14898c2ecf20Sopenharmony_ci		if (!mark_lseg_invalid(lseg, &lo->plh_return_segs))
14908c2ecf20Sopenharmony_ci			continue;
14918c2ecf20Sopenharmony_ci		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
14928c2ecf20Sopenharmony_ci	}
14938c2ecf20Sopenharmony_ci
14948c2ecf20Sopenharmony_ci	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
14958c2ecf20Sopenharmony_ci		goto out_noroc;
14968c2ecf20Sopenharmony_ci
14978c2ecf20Sopenharmony_ci	/* ROC in two conditions:
14988c2ecf20Sopenharmony_ci	 * 1. there are ROC lsegs
14998c2ecf20Sopenharmony_ci	 * 2. we don't send layoutreturn
15008c2ecf20Sopenharmony_ci	 */
15018c2ecf20Sopenharmony_ci	/* lo ref dropped in pnfs_roc_release() */
15028c2ecf20Sopenharmony_ci	layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &lc_cred, &iomode);
15038c2ecf20Sopenharmony_ci	/* If the creds don't match, we can't compound the layoutreturn */
15048c2ecf20Sopenharmony_ci	if (!layoutreturn || cred_fscmp(cred, lc_cred) != 0)
15058c2ecf20Sopenharmony_ci		goto out_noroc;
15068c2ecf20Sopenharmony_ci
15078c2ecf20Sopenharmony_ci	roc = layoutreturn;
15088c2ecf20Sopenharmony_ci	pnfs_init_layoutreturn_args(args, lo, &stateid, iomode);
15098c2ecf20Sopenharmony_ci	res->lrs_present = 0;
15108c2ecf20Sopenharmony_ci	layoutreturn = false;
15118c2ecf20Sopenharmony_ci	put_cred(lc_cred);
15128c2ecf20Sopenharmony_ci
15138c2ecf20Sopenharmony_ciout_noroc:
15148c2ecf20Sopenharmony_ci	spin_unlock(&ino->i_lock);
15158c2ecf20Sopenharmony_ci	rcu_read_unlock();
15168c2ecf20Sopenharmony_ci	pnfs_layoutcommit_inode(ino, true);
15178c2ecf20Sopenharmony_ci	if (roc) {
15188c2ecf20Sopenharmony_ci		struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
15198c2ecf20Sopenharmony_ci		if (ld->prepare_layoutreturn)
15208c2ecf20Sopenharmony_ci			ld->prepare_layoutreturn(args);
15218c2ecf20Sopenharmony_ci		pnfs_put_layout_hdr(lo);
15228c2ecf20Sopenharmony_ci		return true;
15238c2ecf20Sopenharmony_ci	}
15248c2ecf20Sopenharmony_ci	if (layoutreturn)
15258c2ecf20Sopenharmony_ci		pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true);
15268c2ecf20Sopenharmony_ci	pnfs_put_layout_hdr(lo);
15278c2ecf20Sopenharmony_ci	return false;
15288c2ecf20Sopenharmony_ci}
15298c2ecf20Sopenharmony_ci
15308c2ecf20Sopenharmony_ciint pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
15318c2ecf20Sopenharmony_ci		  struct nfs4_layoutreturn_res **respp, int *ret)
15328c2ecf20Sopenharmony_ci{
15338c2ecf20Sopenharmony_ci	struct nfs4_layoutreturn_args *arg = *argpp;
15348c2ecf20Sopenharmony_ci	int retval = -EAGAIN;
15358c2ecf20Sopenharmony_ci
15368c2ecf20Sopenharmony_ci	if (!arg)
15378c2ecf20Sopenharmony_ci		return 0;
15388c2ecf20Sopenharmony_ci	/* Handle Layoutreturn errors */
15398c2ecf20Sopenharmony_ci	switch (*ret) {
15408c2ecf20Sopenharmony_ci	case 0:
15418c2ecf20Sopenharmony_ci		retval = 0;
15428c2ecf20Sopenharmony_ci		break;
15438c2ecf20Sopenharmony_ci	case -NFS4ERR_NOMATCHING_LAYOUT:
15448c2ecf20Sopenharmony_ci		/* Was there an RPC level error? If not, retry */
15458c2ecf20Sopenharmony_ci		if (task->tk_rpc_status == 0)
15468c2ecf20Sopenharmony_ci			break;
15478c2ecf20Sopenharmony_ci		/* If the call was not sent, let caller handle it */
15488c2ecf20Sopenharmony_ci		if (!RPC_WAS_SENT(task))
15498c2ecf20Sopenharmony_ci			return 0;
15508c2ecf20Sopenharmony_ci		/*
15518c2ecf20Sopenharmony_ci		 * Otherwise, assume the call succeeded and
15528c2ecf20Sopenharmony_ci		 * that we need to release the layout
15538c2ecf20Sopenharmony_ci		 */
15548c2ecf20Sopenharmony_ci		*ret = 0;
15558c2ecf20Sopenharmony_ci		(*respp)->lrs_present = 0;
15568c2ecf20Sopenharmony_ci		retval = 0;
15578c2ecf20Sopenharmony_ci		break;
15588c2ecf20Sopenharmony_ci	case -NFS4ERR_DELAY:
15598c2ecf20Sopenharmony_ci		/* Let the caller handle the retry */
15608c2ecf20Sopenharmony_ci		*ret = -NFS4ERR_NOMATCHING_LAYOUT;
15618c2ecf20Sopenharmony_ci		return 0;
15628c2ecf20Sopenharmony_ci	case -NFS4ERR_OLD_STATEID:
15638c2ecf20Sopenharmony_ci		if (!nfs4_layout_refresh_old_stateid(&arg->stateid,
15648c2ecf20Sopenharmony_ci						     &arg->range, arg->inode))
15658c2ecf20Sopenharmony_ci			break;
15668c2ecf20Sopenharmony_ci		*ret = -NFS4ERR_NOMATCHING_LAYOUT;
15678c2ecf20Sopenharmony_ci		return -EAGAIN;
15688c2ecf20Sopenharmony_ci	}
15698c2ecf20Sopenharmony_ci	*argpp = NULL;
15708c2ecf20Sopenharmony_ci	*respp = NULL;
15718c2ecf20Sopenharmony_ci	return retval;
15728c2ecf20Sopenharmony_ci}
15738c2ecf20Sopenharmony_ci
15748c2ecf20Sopenharmony_civoid pnfs_roc_release(struct nfs4_layoutreturn_args *args,
15758c2ecf20Sopenharmony_ci		struct nfs4_layoutreturn_res *res,
15768c2ecf20Sopenharmony_ci		int ret)
15778c2ecf20Sopenharmony_ci{
15788c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo = args->layout;
15798c2ecf20Sopenharmony_ci	struct inode *inode = args->inode;
15808c2ecf20Sopenharmony_ci	const nfs4_stateid *arg_stateid = NULL;
15818c2ecf20Sopenharmony_ci	const nfs4_stateid *res_stateid = NULL;
15828c2ecf20Sopenharmony_ci	struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
15838c2ecf20Sopenharmony_ci
15848c2ecf20Sopenharmony_ci	switch (ret) {
15858c2ecf20Sopenharmony_ci	case -NFS4ERR_NOMATCHING_LAYOUT:
15868c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
15878c2ecf20Sopenharmony_ci		if (pnfs_layout_is_valid(lo) &&
15888c2ecf20Sopenharmony_ci		    nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid))
15898c2ecf20Sopenharmony_ci			pnfs_set_plh_return_info(lo, args->range.iomode, 0);
15908c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
15918c2ecf20Sopenharmony_ci		break;
15928c2ecf20Sopenharmony_ci	case 0:
15938c2ecf20Sopenharmony_ci		if (res->lrs_present)
15948c2ecf20Sopenharmony_ci			res_stateid = &res->stateid;
15958c2ecf20Sopenharmony_ci		fallthrough;
15968c2ecf20Sopenharmony_ci	default:
15978c2ecf20Sopenharmony_ci		arg_stateid = &args->stateid;
15988c2ecf20Sopenharmony_ci	}
15998c2ecf20Sopenharmony_ci	trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret);
16008c2ecf20Sopenharmony_ci	pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range,
16018c2ecf20Sopenharmony_ci			res_stateid);
16028c2ecf20Sopenharmony_ci	if (ld_private && ld_private->ops && ld_private->ops->free)
16038c2ecf20Sopenharmony_ci		ld_private->ops->free(ld_private);
16048c2ecf20Sopenharmony_ci	pnfs_put_layout_hdr(lo);
16058c2ecf20Sopenharmony_ci}
16068c2ecf20Sopenharmony_ci
16078c2ecf20Sopenharmony_cibool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
16088c2ecf20Sopenharmony_ci{
16098c2ecf20Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(ino);
16108c2ecf20Sopenharmony_ci        struct pnfs_layout_hdr *lo;
16118c2ecf20Sopenharmony_ci        bool sleep = false;
16128c2ecf20Sopenharmony_ci
16138c2ecf20Sopenharmony_ci	/* we might not have grabbed lo reference. so need to check under
16148c2ecf20Sopenharmony_ci	 * i_lock */
16158c2ecf20Sopenharmony_ci        spin_lock(&ino->i_lock);
16168c2ecf20Sopenharmony_ci        lo = nfsi->layout;
16178c2ecf20Sopenharmony_ci        if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
16188c2ecf20Sopenharmony_ci                rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
16198c2ecf20Sopenharmony_ci                sleep = true;
16208c2ecf20Sopenharmony_ci	}
16218c2ecf20Sopenharmony_ci        spin_unlock(&ino->i_lock);
16228c2ecf20Sopenharmony_ci        return sleep;
16238c2ecf20Sopenharmony_ci}
16248c2ecf20Sopenharmony_ci
16258c2ecf20Sopenharmony_ci/*
16268c2ecf20Sopenharmony_ci * Compare two layout segments for sorting into layout cache.
16278c2ecf20Sopenharmony_ci * We want to preferentially return RW over RO layouts, so ensure those
16288c2ecf20Sopenharmony_ci * are seen first.
16298c2ecf20Sopenharmony_ci */
16308c2ecf20Sopenharmony_cistatic s64
16318c2ecf20Sopenharmony_cipnfs_lseg_range_cmp(const struct pnfs_layout_range *l1,
16328c2ecf20Sopenharmony_ci	   const struct pnfs_layout_range *l2)
16338c2ecf20Sopenharmony_ci{
16348c2ecf20Sopenharmony_ci	s64 d;
16358c2ecf20Sopenharmony_ci
16368c2ecf20Sopenharmony_ci	/* high offset > low offset */
16378c2ecf20Sopenharmony_ci	d = l1->offset - l2->offset;
16388c2ecf20Sopenharmony_ci	if (d)
16398c2ecf20Sopenharmony_ci		return d;
16408c2ecf20Sopenharmony_ci
16418c2ecf20Sopenharmony_ci	/* short length > long length */
16428c2ecf20Sopenharmony_ci	d = l2->length - l1->length;
16438c2ecf20Sopenharmony_ci	if (d)
16448c2ecf20Sopenharmony_ci		return d;
16458c2ecf20Sopenharmony_ci
16468c2ecf20Sopenharmony_ci	/* read > read/write */
16478c2ecf20Sopenharmony_ci	return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
16488c2ecf20Sopenharmony_ci}
16498c2ecf20Sopenharmony_ci
16508c2ecf20Sopenharmony_cistatic bool
16518c2ecf20Sopenharmony_cipnfs_lseg_range_is_after(const struct pnfs_layout_range *l1,
16528c2ecf20Sopenharmony_ci		const struct pnfs_layout_range *l2)
16538c2ecf20Sopenharmony_ci{
16548c2ecf20Sopenharmony_ci	return pnfs_lseg_range_cmp(l1, l2) > 0;
16558c2ecf20Sopenharmony_ci}
16568c2ecf20Sopenharmony_ci
16578c2ecf20Sopenharmony_cistatic bool
16588c2ecf20Sopenharmony_cipnfs_lseg_no_merge(struct pnfs_layout_segment *lseg,
16598c2ecf20Sopenharmony_ci		struct pnfs_layout_segment *old)
16608c2ecf20Sopenharmony_ci{
16618c2ecf20Sopenharmony_ci	return false;
16628c2ecf20Sopenharmony_ci}
16638c2ecf20Sopenharmony_ci
16648c2ecf20Sopenharmony_civoid
16658c2ecf20Sopenharmony_cipnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo,
16668c2ecf20Sopenharmony_ci		   struct pnfs_layout_segment *lseg,
16678c2ecf20Sopenharmony_ci		   bool (*is_after)(const struct pnfs_layout_range *,
16688c2ecf20Sopenharmony_ci			   const struct pnfs_layout_range *),
16698c2ecf20Sopenharmony_ci		   bool (*do_merge)(struct pnfs_layout_segment *,
16708c2ecf20Sopenharmony_ci			   struct pnfs_layout_segment *),
16718c2ecf20Sopenharmony_ci		   struct list_head *free_me)
16728c2ecf20Sopenharmony_ci{
16738c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lp, *tmp;
16748c2ecf20Sopenharmony_ci
16758c2ecf20Sopenharmony_ci	dprintk("%s:Begin\n", __func__);
16768c2ecf20Sopenharmony_ci
16778c2ecf20Sopenharmony_ci	list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) {
16788c2ecf20Sopenharmony_ci		if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0)
16798c2ecf20Sopenharmony_ci			continue;
16808c2ecf20Sopenharmony_ci		if (do_merge(lseg, lp)) {
16818c2ecf20Sopenharmony_ci			mark_lseg_invalid(lp, free_me);
16828c2ecf20Sopenharmony_ci			continue;
16838c2ecf20Sopenharmony_ci		}
16848c2ecf20Sopenharmony_ci		if (is_after(&lseg->pls_range, &lp->pls_range))
16858c2ecf20Sopenharmony_ci			continue;
16868c2ecf20Sopenharmony_ci		list_add_tail(&lseg->pls_list, &lp->pls_list);
16878c2ecf20Sopenharmony_ci		dprintk("%s: inserted lseg %p "
16888c2ecf20Sopenharmony_ci			"iomode %d offset %llu length %llu before "
16898c2ecf20Sopenharmony_ci			"lp %p iomode %d offset %llu length %llu\n",
16908c2ecf20Sopenharmony_ci			__func__, lseg, lseg->pls_range.iomode,
16918c2ecf20Sopenharmony_ci			lseg->pls_range.offset, lseg->pls_range.length,
16928c2ecf20Sopenharmony_ci			lp, lp->pls_range.iomode, lp->pls_range.offset,
16938c2ecf20Sopenharmony_ci			lp->pls_range.length);
16948c2ecf20Sopenharmony_ci		goto out;
16958c2ecf20Sopenharmony_ci	}
16968c2ecf20Sopenharmony_ci	list_add_tail(&lseg->pls_list, &lo->plh_segs);
16978c2ecf20Sopenharmony_ci	dprintk("%s: inserted lseg %p "
16988c2ecf20Sopenharmony_ci		"iomode %d offset %llu length %llu at tail\n",
16998c2ecf20Sopenharmony_ci		__func__, lseg, lseg->pls_range.iomode,
17008c2ecf20Sopenharmony_ci		lseg->pls_range.offset, lseg->pls_range.length);
17018c2ecf20Sopenharmony_ciout:
17028c2ecf20Sopenharmony_ci	pnfs_get_layout_hdr(lo);
17038c2ecf20Sopenharmony_ci
17048c2ecf20Sopenharmony_ci	dprintk("%s:Return\n", __func__);
17058c2ecf20Sopenharmony_ci}
17068c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg);
17078c2ecf20Sopenharmony_ci
17088c2ecf20Sopenharmony_cistatic void
17098c2ecf20Sopenharmony_cipnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo,
17108c2ecf20Sopenharmony_ci		   struct pnfs_layout_segment *lseg,
17118c2ecf20Sopenharmony_ci		   struct list_head *free_me)
17128c2ecf20Sopenharmony_ci{
17138c2ecf20Sopenharmony_ci	struct inode *inode = lo->plh_inode;
17148c2ecf20Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
17158c2ecf20Sopenharmony_ci
17168c2ecf20Sopenharmony_ci	if (ld->add_lseg != NULL)
17178c2ecf20Sopenharmony_ci		ld->add_lseg(lo, lseg, free_me);
17188c2ecf20Sopenharmony_ci	else
17198c2ecf20Sopenharmony_ci		pnfs_generic_layout_insert_lseg(lo, lseg,
17208c2ecf20Sopenharmony_ci				pnfs_lseg_range_is_after,
17218c2ecf20Sopenharmony_ci				pnfs_lseg_no_merge,
17228c2ecf20Sopenharmony_ci				free_me);
17238c2ecf20Sopenharmony_ci}
17248c2ecf20Sopenharmony_ci
17258c2ecf20Sopenharmony_cistatic struct pnfs_layout_hdr *
17268c2ecf20Sopenharmony_cialloc_init_layout_hdr(struct inode *ino,
17278c2ecf20Sopenharmony_ci		      struct nfs_open_context *ctx,
17288c2ecf20Sopenharmony_ci		      gfp_t gfp_flags)
17298c2ecf20Sopenharmony_ci{
17308c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
17318c2ecf20Sopenharmony_ci
17328c2ecf20Sopenharmony_ci	lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
17338c2ecf20Sopenharmony_ci	if (!lo)
17348c2ecf20Sopenharmony_ci		return NULL;
17358c2ecf20Sopenharmony_ci	refcount_set(&lo->plh_refcount, 1);
17368c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&lo->plh_layouts);
17378c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&lo->plh_segs);
17388c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&lo->plh_return_segs);
17398c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&lo->plh_bulk_destroy);
17408c2ecf20Sopenharmony_ci	lo->plh_inode = ino;
17418c2ecf20Sopenharmony_ci	lo->plh_lc_cred = get_cred(ctx->cred);
17428c2ecf20Sopenharmony_ci	lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;
17438c2ecf20Sopenharmony_ci	return lo;
17448c2ecf20Sopenharmony_ci}
17458c2ecf20Sopenharmony_ci
17468c2ecf20Sopenharmony_cistatic struct pnfs_layout_hdr *
17478c2ecf20Sopenharmony_cipnfs_find_alloc_layout(struct inode *ino,
17488c2ecf20Sopenharmony_ci		       struct nfs_open_context *ctx,
17498c2ecf20Sopenharmony_ci		       gfp_t gfp_flags)
17508c2ecf20Sopenharmony_ci	__releases(&ino->i_lock)
17518c2ecf20Sopenharmony_ci	__acquires(&ino->i_lock)
17528c2ecf20Sopenharmony_ci{
17538c2ecf20Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(ino);
17548c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *new = NULL;
17558c2ecf20Sopenharmony_ci
17568c2ecf20Sopenharmony_ci	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
17578c2ecf20Sopenharmony_ci
17588c2ecf20Sopenharmony_ci	if (nfsi->layout != NULL)
17598c2ecf20Sopenharmony_ci		goto out_existing;
17608c2ecf20Sopenharmony_ci	spin_unlock(&ino->i_lock);
17618c2ecf20Sopenharmony_ci	new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
17628c2ecf20Sopenharmony_ci	spin_lock(&ino->i_lock);
17638c2ecf20Sopenharmony_ci
17648c2ecf20Sopenharmony_ci	if (likely(nfsi->layout == NULL)) {	/* Won the race? */
17658c2ecf20Sopenharmony_ci		nfsi->layout = new;
17668c2ecf20Sopenharmony_ci		return new;
17678c2ecf20Sopenharmony_ci	} else if (new != NULL)
17688c2ecf20Sopenharmony_ci		pnfs_free_layout_hdr(new);
17698c2ecf20Sopenharmony_ciout_existing:
17708c2ecf20Sopenharmony_ci	pnfs_get_layout_hdr(nfsi->layout);
17718c2ecf20Sopenharmony_ci	return nfsi->layout;
17728c2ecf20Sopenharmony_ci}
17738c2ecf20Sopenharmony_ci
17748c2ecf20Sopenharmony_ci/*
17758c2ecf20Sopenharmony_ci * iomode matching rules:
17768c2ecf20Sopenharmony_ci * iomode	lseg	strict match
17778c2ecf20Sopenharmony_ci *                      iomode
17788c2ecf20Sopenharmony_ci * -----	-----	------ -----
17798c2ecf20Sopenharmony_ci * ANY		READ	N/A    true
17808c2ecf20Sopenharmony_ci * ANY		RW	N/A    true
17818c2ecf20Sopenharmony_ci * RW		READ	N/A    false
17828c2ecf20Sopenharmony_ci * RW		RW	N/A    true
17838c2ecf20Sopenharmony_ci * READ		READ	N/A    true
17848c2ecf20Sopenharmony_ci * READ		RW	true   false
17858c2ecf20Sopenharmony_ci * READ		RW	false  true
17868c2ecf20Sopenharmony_ci */
17878c2ecf20Sopenharmony_cistatic bool
17888c2ecf20Sopenharmony_cipnfs_lseg_range_match(const struct pnfs_layout_range *ls_range,
17898c2ecf20Sopenharmony_ci		 const struct pnfs_layout_range *range,
17908c2ecf20Sopenharmony_ci		 bool strict_iomode)
17918c2ecf20Sopenharmony_ci{
17928c2ecf20Sopenharmony_ci	struct pnfs_layout_range range1;
17938c2ecf20Sopenharmony_ci
17948c2ecf20Sopenharmony_ci	if ((range->iomode == IOMODE_RW &&
17958c2ecf20Sopenharmony_ci	     ls_range->iomode != IOMODE_RW) ||
17968c2ecf20Sopenharmony_ci	    (range->iomode != ls_range->iomode &&
17978c2ecf20Sopenharmony_ci	     strict_iomode) ||
17988c2ecf20Sopenharmony_ci	    !pnfs_lseg_range_intersecting(ls_range, range))
17998c2ecf20Sopenharmony_ci		return false;
18008c2ecf20Sopenharmony_ci
18018c2ecf20Sopenharmony_ci	/* range1 covers only the first byte in the range */
18028c2ecf20Sopenharmony_ci	range1 = *range;
18038c2ecf20Sopenharmony_ci	range1.length = 1;
18048c2ecf20Sopenharmony_ci	return pnfs_lseg_range_contained(ls_range, &range1);
18058c2ecf20Sopenharmony_ci}
18068c2ecf20Sopenharmony_ci
18078c2ecf20Sopenharmony_ci/*
18088c2ecf20Sopenharmony_ci * lookup range in layout
18098c2ecf20Sopenharmony_ci */
18108c2ecf20Sopenharmony_cistatic struct pnfs_layout_segment *
18118c2ecf20Sopenharmony_cipnfs_find_lseg(struct pnfs_layout_hdr *lo,
18128c2ecf20Sopenharmony_ci		struct pnfs_layout_range *range,
18138c2ecf20Sopenharmony_ci		bool strict_iomode)
18148c2ecf20Sopenharmony_ci{
18158c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg, *ret = NULL;
18168c2ecf20Sopenharmony_ci
18178c2ecf20Sopenharmony_ci	dprintk("%s:Begin\n", __func__);
18188c2ecf20Sopenharmony_ci
18198c2ecf20Sopenharmony_ci	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
18208c2ecf20Sopenharmony_ci		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
18218c2ecf20Sopenharmony_ci		    pnfs_lseg_range_match(&lseg->pls_range, range,
18228c2ecf20Sopenharmony_ci					  strict_iomode)) {
18238c2ecf20Sopenharmony_ci			ret = pnfs_get_lseg(lseg);
18248c2ecf20Sopenharmony_ci			break;
18258c2ecf20Sopenharmony_ci		}
18268c2ecf20Sopenharmony_ci	}
18278c2ecf20Sopenharmony_ci
18288c2ecf20Sopenharmony_ci	dprintk("%s:Return lseg %p ref %d\n",
18298c2ecf20Sopenharmony_ci		__func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0);
18308c2ecf20Sopenharmony_ci	return ret;
18318c2ecf20Sopenharmony_ci}
18328c2ecf20Sopenharmony_ci
18338c2ecf20Sopenharmony_ci/*
18348c2ecf20Sopenharmony_ci * Use mdsthreshold hints set at each OPEN to determine if I/O should go
18358c2ecf20Sopenharmony_ci * to the MDS or over pNFS
18368c2ecf20Sopenharmony_ci *
18378c2ecf20Sopenharmony_ci * The nfs_inode read_io and write_io fields are cumulative counters reset
18388c2ecf20Sopenharmony_ci * when there are no layout segments. Note that in pnfs_update_layout iomode
18398c2ecf20Sopenharmony_ci * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
18408c2ecf20Sopenharmony_ci * WRITE request.
18418c2ecf20Sopenharmony_ci *
18428c2ecf20Sopenharmony_ci * A return of true means use MDS I/O.
18438c2ecf20Sopenharmony_ci *
18448c2ecf20Sopenharmony_ci * From rfc 5661:
18458c2ecf20Sopenharmony_ci * If a file's size is smaller than the file size threshold, data accesses
18468c2ecf20Sopenharmony_ci * SHOULD be sent to the metadata server.  If an I/O request has a length that
18478c2ecf20Sopenharmony_ci * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
18488c2ecf20Sopenharmony_ci * server.  If both file size and I/O size are provided, the client SHOULD
18498c2ecf20Sopenharmony_ci * reach or exceed  both thresholds before sending its read or write
18508c2ecf20Sopenharmony_ci * requests to the data server.
18518c2ecf20Sopenharmony_ci */
18528c2ecf20Sopenharmony_cistatic bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
18538c2ecf20Sopenharmony_ci				     struct inode *ino, int iomode)
18548c2ecf20Sopenharmony_ci{
18558c2ecf20Sopenharmony_ci	struct nfs4_threshold *t = ctx->mdsthreshold;
18568c2ecf20Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(ino);
18578c2ecf20Sopenharmony_ci	loff_t fsize = i_size_read(ino);
18588c2ecf20Sopenharmony_ci	bool size = false, size_set = false, io = false, io_set = false, ret = false;
18598c2ecf20Sopenharmony_ci
18608c2ecf20Sopenharmony_ci	if (t == NULL)
18618c2ecf20Sopenharmony_ci		return ret;
18628c2ecf20Sopenharmony_ci
18638c2ecf20Sopenharmony_ci	dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
18648c2ecf20Sopenharmony_ci		__func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
18658c2ecf20Sopenharmony_ci
18668c2ecf20Sopenharmony_ci	switch (iomode) {
18678c2ecf20Sopenharmony_ci	case IOMODE_READ:
18688c2ecf20Sopenharmony_ci		if (t->bm & THRESHOLD_RD) {
18698c2ecf20Sopenharmony_ci			dprintk("%s fsize %llu\n", __func__, fsize);
18708c2ecf20Sopenharmony_ci			size_set = true;
18718c2ecf20Sopenharmony_ci			if (fsize < t->rd_sz)
18728c2ecf20Sopenharmony_ci				size = true;
18738c2ecf20Sopenharmony_ci		}
18748c2ecf20Sopenharmony_ci		if (t->bm & THRESHOLD_RD_IO) {
18758c2ecf20Sopenharmony_ci			dprintk("%s nfsi->read_io %llu\n", __func__,
18768c2ecf20Sopenharmony_ci				nfsi->read_io);
18778c2ecf20Sopenharmony_ci			io_set = true;
18788c2ecf20Sopenharmony_ci			if (nfsi->read_io < t->rd_io_sz)
18798c2ecf20Sopenharmony_ci				io = true;
18808c2ecf20Sopenharmony_ci		}
18818c2ecf20Sopenharmony_ci		break;
18828c2ecf20Sopenharmony_ci	case IOMODE_RW:
18838c2ecf20Sopenharmony_ci		if (t->bm & THRESHOLD_WR) {
18848c2ecf20Sopenharmony_ci			dprintk("%s fsize %llu\n", __func__, fsize);
18858c2ecf20Sopenharmony_ci			size_set = true;
18868c2ecf20Sopenharmony_ci			if (fsize < t->wr_sz)
18878c2ecf20Sopenharmony_ci				size = true;
18888c2ecf20Sopenharmony_ci		}
18898c2ecf20Sopenharmony_ci		if (t->bm & THRESHOLD_WR_IO) {
18908c2ecf20Sopenharmony_ci			dprintk("%s nfsi->write_io %llu\n", __func__,
18918c2ecf20Sopenharmony_ci				nfsi->write_io);
18928c2ecf20Sopenharmony_ci			io_set = true;
18938c2ecf20Sopenharmony_ci			if (nfsi->write_io < t->wr_io_sz)
18948c2ecf20Sopenharmony_ci				io = true;
18958c2ecf20Sopenharmony_ci		}
18968c2ecf20Sopenharmony_ci		break;
18978c2ecf20Sopenharmony_ci	}
18988c2ecf20Sopenharmony_ci	if (size_set && io_set) {
18998c2ecf20Sopenharmony_ci		if (size && io)
19008c2ecf20Sopenharmony_ci			ret = true;
19018c2ecf20Sopenharmony_ci	} else if (size || io)
19028c2ecf20Sopenharmony_ci		ret = true;
19038c2ecf20Sopenharmony_ci
19048c2ecf20Sopenharmony_ci	dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
19058c2ecf20Sopenharmony_ci	return ret;
19068c2ecf20Sopenharmony_ci}
19078c2ecf20Sopenharmony_ci
19088c2ecf20Sopenharmony_cistatic int pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
19098c2ecf20Sopenharmony_ci{
19108c2ecf20Sopenharmony_ci	/*
19118c2ecf20Sopenharmony_ci	 * send layoutcommit as it can hold up layoutreturn due to lseg
19128c2ecf20Sopenharmony_ci	 * reference
19138c2ecf20Sopenharmony_ci	 */
19148c2ecf20Sopenharmony_ci	pnfs_layoutcommit_inode(lo->plh_inode, false);
19158c2ecf20Sopenharmony_ci	return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
19168c2ecf20Sopenharmony_ci				   nfs_wait_bit_killable,
19178c2ecf20Sopenharmony_ci				   TASK_KILLABLE);
19188c2ecf20Sopenharmony_ci}
19198c2ecf20Sopenharmony_ci
19208c2ecf20Sopenharmony_cistatic void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
19218c2ecf20Sopenharmony_ci{
19228c2ecf20Sopenharmony_ci	atomic_inc(&lo->plh_outstanding);
19238c2ecf20Sopenharmony_ci}
19248c2ecf20Sopenharmony_ci
19258c2ecf20Sopenharmony_cistatic void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
19268c2ecf20Sopenharmony_ci{
19278c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&lo->plh_outstanding) &&
19288c2ecf20Sopenharmony_ci	    test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
19298c2ecf20Sopenharmony_ci		wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
19308c2ecf20Sopenharmony_ci}
19318c2ecf20Sopenharmony_ci
19328c2ecf20Sopenharmony_cistatic bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
19338c2ecf20Sopenharmony_ci{
19348c2ecf20Sopenharmony_ci	return test_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags);
19358c2ecf20Sopenharmony_ci}
19368c2ecf20Sopenharmony_ci
19378c2ecf20Sopenharmony_cistatic void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo)
19388c2ecf20Sopenharmony_ci{
19398c2ecf20Sopenharmony_ci	unsigned long *bitlock = &lo->plh_flags;
19408c2ecf20Sopenharmony_ci
19418c2ecf20Sopenharmony_ci	clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock);
19428c2ecf20Sopenharmony_ci	smp_mb__after_atomic();
19438c2ecf20Sopenharmony_ci	wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET);
19448c2ecf20Sopenharmony_ci}
19458c2ecf20Sopenharmony_ci
19468c2ecf20Sopenharmony_cistatic void _add_to_server_list(struct pnfs_layout_hdr *lo,
19478c2ecf20Sopenharmony_ci				struct nfs_server *server)
19488c2ecf20Sopenharmony_ci{
19498c2ecf20Sopenharmony_ci	if (!test_and_set_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) {
19508c2ecf20Sopenharmony_ci		struct nfs_client *clp = server->nfs_client;
19518c2ecf20Sopenharmony_ci
19528c2ecf20Sopenharmony_ci		/* The lo must be on the clp list if there is any
19538c2ecf20Sopenharmony_ci		 * chance of a CB_LAYOUTRECALL(FILE) coming in.
19548c2ecf20Sopenharmony_ci		 */
19558c2ecf20Sopenharmony_ci		spin_lock(&clp->cl_lock);
19568c2ecf20Sopenharmony_ci		list_add_tail_rcu(&lo->plh_layouts, &server->layouts);
19578c2ecf20Sopenharmony_ci		spin_unlock(&clp->cl_lock);
19588c2ecf20Sopenharmony_ci	}
19598c2ecf20Sopenharmony_ci}
19608c2ecf20Sopenharmony_ci
19618c2ecf20Sopenharmony_ci/*
19628c2ecf20Sopenharmony_ci * Layout segment is retreived from the server if not cached.
19638c2ecf20Sopenharmony_ci * The appropriate layout segment is referenced and returned to the caller.
19648c2ecf20Sopenharmony_ci */
19658c2ecf20Sopenharmony_cistruct pnfs_layout_segment *
19668c2ecf20Sopenharmony_cipnfs_update_layout(struct inode *ino,
19678c2ecf20Sopenharmony_ci		   struct nfs_open_context *ctx,
19688c2ecf20Sopenharmony_ci		   loff_t pos,
19698c2ecf20Sopenharmony_ci		   u64 count,
19708c2ecf20Sopenharmony_ci		   enum pnfs_iomode iomode,
19718c2ecf20Sopenharmony_ci		   bool strict_iomode,
19728c2ecf20Sopenharmony_ci		   gfp_t gfp_flags)
19738c2ecf20Sopenharmony_ci{
19748c2ecf20Sopenharmony_ci	struct pnfs_layout_range arg = {
19758c2ecf20Sopenharmony_ci		.iomode = iomode,
19768c2ecf20Sopenharmony_ci		.offset = pos,
19778c2ecf20Sopenharmony_ci		.length = count,
19788c2ecf20Sopenharmony_ci	};
19798c2ecf20Sopenharmony_ci	unsigned pg_offset;
19808c2ecf20Sopenharmony_ci	struct nfs_server *server = NFS_SERVER(ino);
19818c2ecf20Sopenharmony_ci	struct nfs_client *clp = server->nfs_client;
19828c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo = NULL;
19838c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg = NULL;
19848c2ecf20Sopenharmony_ci	struct nfs4_layoutget *lgp;
19858c2ecf20Sopenharmony_ci	nfs4_stateid stateid;
19868c2ecf20Sopenharmony_ci	long timeout = 0;
19878c2ecf20Sopenharmony_ci	unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
19888c2ecf20Sopenharmony_ci	bool first;
19898c2ecf20Sopenharmony_ci
19908c2ecf20Sopenharmony_ci	if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
19918c2ecf20Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
19928c2ecf20Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_NO_PNFS);
19938c2ecf20Sopenharmony_ci		goto out;
19948c2ecf20Sopenharmony_ci	}
19958c2ecf20Sopenharmony_ci
19968c2ecf20Sopenharmony_ci	if (pnfs_within_mdsthreshold(ctx, ino, iomode)) {
19978c2ecf20Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
19988c2ecf20Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_MDSTHRESH);
19998c2ecf20Sopenharmony_ci		goto out;
20008c2ecf20Sopenharmony_ci	}
20018c2ecf20Sopenharmony_ci
20028c2ecf20Sopenharmony_cilookup_again:
20038c2ecf20Sopenharmony_ci	lseg = ERR_PTR(nfs4_client_recover_expired_lease(clp));
20048c2ecf20Sopenharmony_ci	if (IS_ERR(lseg))
20058c2ecf20Sopenharmony_ci		goto out;
20068c2ecf20Sopenharmony_ci	first = false;
20078c2ecf20Sopenharmony_ci	spin_lock(&ino->i_lock);
20088c2ecf20Sopenharmony_ci	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
20098c2ecf20Sopenharmony_ci	if (lo == NULL) {
20108c2ecf20Sopenharmony_ci		spin_unlock(&ino->i_lock);
20118c2ecf20Sopenharmony_ci		lseg = ERR_PTR(-ENOMEM);
20128c2ecf20Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20138c2ecf20Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_NOMEM);
20148c2ecf20Sopenharmony_ci		goto out;
20158c2ecf20Sopenharmony_ci	}
20168c2ecf20Sopenharmony_ci
20178c2ecf20Sopenharmony_ci	/* Do we even need to bother with this? */
20188c2ecf20Sopenharmony_ci	if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
20198c2ecf20Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20208c2ecf20Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_BULK_RECALL);
20218c2ecf20Sopenharmony_ci		dprintk("%s matches recall, use MDS\n", __func__);
20228c2ecf20Sopenharmony_ci		goto out_unlock;
20238c2ecf20Sopenharmony_ci	}
20248c2ecf20Sopenharmony_ci
20258c2ecf20Sopenharmony_ci	/* if LAYOUTGET already failed once we don't try again */
20268c2ecf20Sopenharmony_ci	if (pnfs_layout_io_test_failed(lo, iomode)) {
20278c2ecf20Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20288c2ecf20Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
20298c2ecf20Sopenharmony_ci		goto out_unlock;
20308c2ecf20Sopenharmony_ci	}
20318c2ecf20Sopenharmony_ci
20328c2ecf20Sopenharmony_ci	/*
20338c2ecf20Sopenharmony_ci	 * If the layout segment list is empty, but there are outstanding
20348c2ecf20Sopenharmony_ci	 * layoutget calls, then they might be subject to a layoutrecall.
20358c2ecf20Sopenharmony_ci	 */
20368c2ecf20Sopenharmony_ci	if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
20378c2ecf20Sopenharmony_ci	    atomic_read(&lo->plh_outstanding) != 0) {
20388c2ecf20Sopenharmony_ci		spin_unlock(&ino->i_lock);
20398c2ecf20Sopenharmony_ci		lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN,
20408c2ecf20Sopenharmony_ci					   TASK_KILLABLE));
20418c2ecf20Sopenharmony_ci		if (IS_ERR(lseg))
20428c2ecf20Sopenharmony_ci			goto out_put_layout_hdr;
20438c2ecf20Sopenharmony_ci		pnfs_put_layout_hdr(lo);
20448c2ecf20Sopenharmony_ci		goto lookup_again;
20458c2ecf20Sopenharmony_ci	}
20468c2ecf20Sopenharmony_ci
20478c2ecf20Sopenharmony_ci	/*
20488c2ecf20Sopenharmony_ci	 * Because we free lsegs when sending LAYOUTRETURN, we need to wait
20498c2ecf20Sopenharmony_ci	 * for LAYOUTRETURN.
20508c2ecf20Sopenharmony_ci	 */
20518c2ecf20Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
20528c2ecf20Sopenharmony_ci		spin_unlock(&ino->i_lock);
20538c2ecf20Sopenharmony_ci		dprintk("%s wait for layoutreturn\n", __func__);
20548c2ecf20Sopenharmony_ci		lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo));
20558c2ecf20Sopenharmony_ci		if (!IS_ERR(lseg)) {
20568c2ecf20Sopenharmony_ci			pnfs_put_layout_hdr(lo);
20578c2ecf20Sopenharmony_ci			dprintk("%s retrying\n", __func__);
20588c2ecf20Sopenharmony_ci			trace_pnfs_update_layout(ino, pos, count, iomode, lo,
20598c2ecf20Sopenharmony_ci						 lseg,
20608c2ecf20Sopenharmony_ci						 PNFS_UPDATE_LAYOUT_RETRY);
20618c2ecf20Sopenharmony_ci			goto lookup_again;
20628c2ecf20Sopenharmony_ci		}
20638c2ecf20Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20648c2ecf20Sopenharmony_ci					 PNFS_UPDATE_LAYOUT_RETURN);
20658c2ecf20Sopenharmony_ci		goto out_put_layout_hdr;
20668c2ecf20Sopenharmony_ci	}
20678c2ecf20Sopenharmony_ci
20688c2ecf20Sopenharmony_ci	lseg = pnfs_find_lseg(lo, &arg, strict_iomode);
20698c2ecf20Sopenharmony_ci	if (lseg) {
20708c2ecf20Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20718c2ecf20Sopenharmony_ci				PNFS_UPDATE_LAYOUT_FOUND_CACHED);
20728c2ecf20Sopenharmony_ci		goto out_unlock;
20738c2ecf20Sopenharmony_ci	}
20748c2ecf20Sopenharmony_ci
20758c2ecf20Sopenharmony_ci	/*
20768c2ecf20Sopenharmony_ci	 * Choose a stateid for the LAYOUTGET. If we don't have a layout
20778c2ecf20Sopenharmony_ci	 * stateid, or it has been invalidated, then we must use the open
20788c2ecf20Sopenharmony_ci	 * stateid.
20798c2ecf20Sopenharmony_ci	 */
20808c2ecf20Sopenharmony_ci	if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
20818c2ecf20Sopenharmony_ci		int status;
20828c2ecf20Sopenharmony_ci
20838c2ecf20Sopenharmony_ci		/*
20848c2ecf20Sopenharmony_ci		 * The first layoutget for the file. Need to serialize per
20858c2ecf20Sopenharmony_ci		 * RFC 5661 Errata 3208.
20868c2ecf20Sopenharmony_ci		 */
20878c2ecf20Sopenharmony_ci		if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET,
20888c2ecf20Sopenharmony_ci				     &lo->plh_flags)) {
20898c2ecf20Sopenharmony_ci			spin_unlock(&ino->i_lock);
20908c2ecf20Sopenharmony_ci			lseg = ERR_PTR(wait_on_bit(&lo->plh_flags,
20918c2ecf20Sopenharmony_ci						NFS_LAYOUT_FIRST_LAYOUTGET,
20928c2ecf20Sopenharmony_ci						TASK_KILLABLE));
20938c2ecf20Sopenharmony_ci			if (IS_ERR(lseg))
20948c2ecf20Sopenharmony_ci				goto out_put_layout_hdr;
20958c2ecf20Sopenharmony_ci			pnfs_put_layout_hdr(lo);
20968c2ecf20Sopenharmony_ci			dprintk("%s retrying\n", __func__);
20978c2ecf20Sopenharmony_ci			goto lookup_again;
20988c2ecf20Sopenharmony_ci		}
20998c2ecf20Sopenharmony_ci
21008c2ecf20Sopenharmony_ci		spin_unlock(&ino->i_lock);
21018c2ecf20Sopenharmony_ci		first = true;
21028c2ecf20Sopenharmony_ci		status = nfs4_select_rw_stateid(ctx->state,
21038c2ecf20Sopenharmony_ci					iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ,
21048c2ecf20Sopenharmony_ci					NULL, &stateid, NULL);
21058c2ecf20Sopenharmony_ci		if (status != 0) {
21068c2ecf20Sopenharmony_ci			lseg = ERR_PTR(status);
21078c2ecf20Sopenharmony_ci			trace_pnfs_update_layout(ino, pos, count,
21088c2ecf20Sopenharmony_ci					iomode, lo, lseg,
21098c2ecf20Sopenharmony_ci					PNFS_UPDATE_LAYOUT_INVALID_OPEN);
21108c2ecf20Sopenharmony_ci			nfs4_schedule_stateid_recovery(server, ctx->state);
21118c2ecf20Sopenharmony_ci			pnfs_clear_first_layoutget(lo);
21128c2ecf20Sopenharmony_ci			pnfs_put_layout_hdr(lo);
21138c2ecf20Sopenharmony_ci			goto lookup_again;
21148c2ecf20Sopenharmony_ci		}
21158c2ecf20Sopenharmony_ci		spin_lock(&ino->i_lock);
21168c2ecf20Sopenharmony_ci	} else {
21178c2ecf20Sopenharmony_ci		nfs4_stateid_copy(&stateid, &lo->plh_stateid);
21188c2ecf20Sopenharmony_ci	}
21198c2ecf20Sopenharmony_ci
21208c2ecf20Sopenharmony_ci	if (pnfs_layoutgets_blocked(lo)) {
21218c2ecf20Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
21228c2ecf20Sopenharmony_ci				PNFS_UPDATE_LAYOUT_BLOCKED);
21238c2ecf20Sopenharmony_ci		goto out_unlock;
21248c2ecf20Sopenharmony_ci	}
21258c2ecf20Sopenharmony_ci	nfs_layoutget_begin(lo);
21268c2ecf20Sopenharmony_ci	spin_unlock(&ino->i_lock);
21278c2ecf20Sopenharmony_ci
21288c2ecf20Sopenharmony_ci	_add_to_server_list(lo, server);
21298c2ecf20Sopenharmony_ci
21308c2ecf20Sopenharmony_ci	pg_offset = arg.offset & ~PAGE_MASK;
21318c2ecf20Sopenharmony_ci	if (pg_offset) {
21328c2ecf20Sopenharmony_ci		arg.offset -= pg_offset;
21338c2ecf20Sopenharmony_ci		arg.length += pg_offset;
21348c2ecf20Sopenharmony_ci	}
21358c2ecf20Sopenharmony_ci	if (arg.length != NFS4_MAX_UINT64)
21368c2ecf20Sopenharmony_ci		arg.length = PAGE_ALIGN(arg.length);
21378c2ecf20Sopenharmony_ci
21388c2ecf20Sopenharmony_ci	lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags);
21398c2ecf20Sopenharmony_ci	if (!lgp) {
21408c2ecf20Sopenharmony_ci		lseg = ERR_PTR(-ENOMEM);
21418c2ecf20Sopenharmony_ci		trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL,
21428c2ecf20Sopenharmony_ci					 PNFS_UPDATE_LAYOUT_NOMEM);
21438c2ecf20Sopenharmony_ci		nfs_layoutget_end(lo);
21448c2ecf20Sopenharmony_ci		goto out_put_layout_hdr;
21458c2ecf20Sopenharmony_ci	}
21468c2ecf20Sopenharmony_ci
21478c2ecf20Sopenharmony_ci	lseg = nfs4_proc_layoutget(lgp, &timeout);
21488c2ecf20Sopenharmony_ci	trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
21498c2ecf20Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
21508c2ecf20Sopenharmony_ci	nfs_layoutget_end(lo);
21518c2ecf20Sopenharmony_ci	if (IS_ERR(lseg)) {
21528c2ecf20Sopenharmony_ci		switch(PTR_ERR(lseg)) {
21538c2ecf20Sopenharmony_ci		case -EBUSY:
21548c2ecf20Sopenharmony_ci			if (time_after(jiffies, giveup))
21558c2ecf20Sopenharmony_ci				lseg = NULL;
21568c2ecf20Sopenharmony_ci			break;
21578c2ecf20Sopenharmony_ci		case -ERECALLCONFLICT:
21588c2ecf20Sopenharmony_ci		case -EAGAIN:
21598c2ecf20Sopenharmony_ci			break;
21608c2ecf20Sopenharmony_ci		case -ENODATA:
21618c2ecf20Sopenharmony_ci			/* The server returned NFS4ERR_LAYOUTUNAVAILABLE */
21628c2ecf20Sopenharmony_ci			pnfs_layout_set_fail_bit(
21638c2ecf20Sopenharmony_ci				lo, pnfs_iomode_to_fail_bit(iomode));
21648c2ecf20Sopenharmony_ci			lseg = NULL;
21658c2ecf20Sopenharmony_ci			goto out_put_layout_hdr;
21668c2ecf20Sopenharmony_ci		default:
21678c2ecf20Sopenharmony_ci			if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
21688c2ecf20Sopenharmony_ci				pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
21698c2ecf20Sopenharmony_ci				lseg = NULL;
21708c2ecf20Sopenharmony_ci			}
21718c2ecf20Sopenharmony_ci			goto out_put_layout_hdr;
21728c2ecf20Sopenharmony_ci		}
21738c2ecf20Sopenharmony_ci		if (lseg) {
21748c2ecf20Sopenharmony_ci			if (first)
21758c2ecf20Sopenharmony_ci				pnfs_clear_first_layoutget(lo);
21768c2ecf20Sopenharmony_ci			trace_pnfs_update_layout(ino, pos, count,
21778c2ecf20Sopenharmony_ci				iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
21788c2ecf20Sopenharmony_ci			pnfs_put_layout_hdr(lo);
21798c2ecf20Sopenharmony_ci			goto lookup_again;
21808c2ecf20Sopenharmony_ci		}
21818c2ecf20Sopenharmony_ci	} else {
21828c2ecf20Sopenharmony_ci		pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
21838c2ecf20Sopenharmony_ci	}
21848c2ecf20Sopenharmony_ci
21858c2ecf20Sopenharmony_ciout_put_layout_hdr:
21868c2ecf20Sopenharmony_ci	if (first)
21878c2ecf20Sopenharmony_ci		pnfs_clear_first_layoutget(lo);
21888c2ecf20Sopenharmony_ci	trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
21898c2ecf20Sopenharmony_ci				 PNFS_UPDATE_LAYOUT_EXIT);
21908c2ecf20Sopenharmony_ci	pnfs_put_layout_hdr(lo);
21918c2ecf20Sopenharmony_ciout:
21928c2ecf20Sopenharmony_ci	dprintk("%s: inode %s/%llu pNFS layout segment %s for "
21938c2ecf20Sopenharmony_ci			"(%s, offset: %llu, length: %llu)\n",
21948c2ecf20Sopenharmony_ci			__func__, ino->i_sb->s_id,
21958c2ecf20Sopenharmony_ci			(unsigned long long)NFS_FILEID(ino),
21968c2ecf20Sopenharmony_ci			IS_ERR_OR_NULL(lseg) ? "not found" : "found",
21978c2ecf20Sopenharmony_ci			iomode==IOMODE_RW ?  "read/write" : "read-only",
21988c2ecf20Sopenharmony_ci			(unsigned long long)pos,
21998c2ecf20Sopenharmony_ci			(unsigned long long)count);
22008c2ecf20Sopenharmony_ci	return lseg;
22018c2ecf20Sopenharmony_ciout_unlock:
22028c2ecf20Sopenharmony_ci	spin_unlock(&ino->i_lock);
22038c2ecf20Sopenharmony_ci	goto out_put_layout_hdr;
22048c2ecf20Sopenharmony_ci}
22058c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_update_layout);
22068c2ecf20Sopenharmony_ci
22078c2ecf20Sopenharmony_cistatic bool
22088c2ecf20Sopenharmony_cipnfs_sanity_check_layout_range(struct pnfs_layout_range *range)
22098c2ecf20Sopenharmony_ci{
22108c2ecf20Sopenharmony_ci	switch (range->iomode) {
22118c2ecf20Sopenharmony_ci	case IOMODE_READ:
22128c2ecf20Sopenharmony_ci	case IOMODE_RW:
22138c2ecf20Sopenharmony_ci		break;
22148c2ecf20Sopenharmony_ci	default:
22158c2ecf20Sopenharmony_ci		return false;
22168c2ecf20Sopenharmony_ci	}
22178c2ecf20Sopenharmony_ci	if (range->offset == NFS4_MAX_UINT64)
22188c2ecf20Sopenharmony_ci		return false;
22198c2ecf20Sopenharmony_ci	if (range->length == 0)
22208c2ecf20Sopenharmony_ci		return false;
22218c2ecf20Sopenharmony_ci	if (range->length != NFS4_MAX_UINT64 &&
22228c2ecf20Sopenharmony_ci	    range->length > NFS4_MAX_UINT64 - range->offset)
22238c2ecf20Sopenharmony_ci		return false;
22248c2ecf20Sopenharmony_ci	return true;
22258c2ecf20Sopenharmony_ci}
22268c2ecf20Sopenharmony_ci
22278c2ecf20Sopenharmony_cistatic struct pnfs_layout_hdr *
22288c2ecf20Sopenharmony_ci_pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx)
22298c2ecf20Sopenharmony_ci{
22308c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
22318c2ecf20Sopenharmony_ci
22328c2ecf20Sopenharmony_ci	spin_lock(&ino->i_lock);
22338c2ecf20Sopenharmony_ci	lo = pnfs_find_alloc_layout(ino, ctx, GFP_KERNEL);
22348c2ecf20Sopenharmony_ci	if (!lo)
22358c2ecf20Sopenharmony_ci		goto out_unlock;
22368c2ecf20Sopenharmony_ci	if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
22378c2ecf20Sopenharmony_ci		goto out_unlock;
22388c2ecf20Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
22398c2ecf20Sopenharmony_ci		goto out_unlock;
22408c2ecf20Sopenharmony_ci	if (pnfs_layoutgets_blocked(lo))
22418c2ecf20Sopenharmony_ci		goto out_unlock;
22428c2ecf20Sopenharmony_ci	if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags))
22438c2ecf20Sopenharmony_ci		goto out_unlock;
22448c2ecf20Sopenharmony_ci	nfs_layoutget_begin(lo);
22458c2ecf20Sopenharmony_ci	spin_unlock(&ino->i_lock);
22468c2ecf20Sopenharmony_ci	_add_to_server_list(lo, NFS_SERVER(ino));
22478c2ecf20Sopenharmony_ci	return lo;
22488c2ecf20Sopenharmony_ci
22498c2ecf20Sopenharmony_ciout_unlock:
22508c2ecf20Sopenharmony_ci	spin_unlock(&ino->i_lock);
22518c2ecf20Sopenharmony_ci	pnfs_put_layout_hdr(lo);
22528c2ecf20Sopenharmony_ci	return NULL;
22538c2ecf20Sopenharmony_ci}
22548c2ecf20Sopenharmony_ci
22558c2ecf20Sopenharmony_cistatic void _lgopen_prepare_attached(struct nfs4_opendata *data,
22568c2ecf20Sopenharmony_ci				     struct nfs_open_context *ctx)
22578c2ecf20Sopenharmony_ci{
22588c2ecf20Sopenharmony_ci	struct inode *ino = data->dentry->d_inode;
22598c2ecf20Sopenharmony_ci	struct pnfs_layout_range rng = {
22608c2ecf20Sopenharmony_ci		.iomode = (data->o_arg.fmode & FMODE_WRITE) ?
22618c2ecf20Sopenharmony_ci			  IOMODE_RW: IOMODE_READ,
22628c2ecf20Sopenharmony_ci		.offset = 0,
22638c2ecf20Sopenharmony_ci		.length = NFS4_MAX_UINT64,
22648c2ecf20Sopenharmony_ci	};
22658c2ecf20Sopenharmony_ci	struct nfs4_layoutget *lgp;
22668c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
22678c2ecf20Sopenharmony_ci
22688c2ecf20Sopenharmony_ci	/* Heuristic: don't send layoutget if we have cached data */
22698c2ecf20Sopenharmony_ci	if (rng.iomode == IOMODE_READ &&
22708c2ecf20Sopenharmony_ci	   (i_size_read(ino) == 0 || ino->i_mapping->nrpages != 0))
22718c2ecf20Sopenharmony_ci		return;
22728c2ecf20Sopenharmony_ci
22738c2ecf20Sopenharmony_ci	lo = _pnfs_grab_empty_layout(ino, ctx);
22748c2ecf20Sopenharmony_ci	if (!lo)
22758c2ecf20Sopenharmony_ci		return;
22768c2ecf20Sopenharmony_ci	lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid,
22778c2ecf20Sopenharmony_ci					     &rng, GFP_KERNEL);
22788c2ecf20Sopenharmony_ci	if (!lgp) {
22798c2ecf20Sopenharmony_ci		pnfs_clear_first_layoutget(lo);
22808c2ecf20Sopenharmony_ci		nfs_layoutget_end(lo);
22818c2ecf20Sopenharmony_ci		pnfs_put_layout_hdr(lo);
22828c2ecf20Sopenharmony_ci		return;
22838c2ecf20Sopenharmony_ci	}
22848c2ecf20Sopenharmony_ci	data->lgp = lgp;
22858c2ecf20Sopenharmony_ci	data->o_arg.lg_args = &lgp->args;
22868c2ecf20Sopenharmony_ci	data->o_res.lg_res = &lgp->res;
22878c2ecf20Sopenharmony_ci}
22888c2ecf20Sopenharmony_ci
22898c2ecf20Sopenharmony_cistatic void _lgopen_prepare_floating(struct nfs4_opendata *data,
22908c2ecf20Sopenharmony_ci				     struct nfs_open_context *ctx)
22918c2ecf20Sopenharmony_ci{
22928c2ecf20Sopenharmony_ci	struct pnfs_layout_range rng = {
22938c2ecf20Sopenharmony_ci		.iomode = (data->o_arg.fmode & FMODE_WRITE) ?
22948c2ecf20Sopenharmony_ci			  IOMODE_RW: IOMODE_READ,
22958c2ecf20Sopenharmony_ci		.offset = 0,
22968c2ecf20Sopenharmony_ci		.length = NFS4_MAX_UINT64,
22978c2ecf20Sopenharmony_ci	};
22988c2ecf20Sopenharmony_ci	struct nfs4_layoutget *lgp;
22998c2ecf20Sopenharmony_ci
23008c2ecf20Sopenharmony_ci	lgp = pnfs_alloc_init_layoutget_args(NULL, ctx, &current_stateid,
23018c2ecf20Sopenharmony_ci					     &rng, GFP_KERNEL);
23028c2ecf20Sopenharmony_ci	if (!lgp)
23038c2ecf20Sopenharmony_ci		return;
23048c2ecf20Sopenharmony_ci	data->lgp = lgp;
23058c2ecf20Sopenharmony_ci	data->o_arg.lg_args = &lgp->args;
23068c2ecf20Sopenharmony_ci	data->o_res.lg_res = &lgp->res;
23078c2ecf20Sopenharmony_ci}
23088c2ecf20Sopenharmony_ci
23098c2ecf20Sopenharmony_civoid pnfs_lgopen_prepare(struct nfs4_opendata *data,
23108c2ecf20Sopenharmony_ci			 struct nfs_open_context *ctx)
23118c2ecf20Sopenharmony_ci{
23128c2ecf20Sopenharmony_ci	struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
23138c2ecf20Sopenharmony_ci
23148c2ecf20Sopenharmony_ci	if (!(pnfs_enabled_sb(server) &&
23158c2ecf20Sopenharmony_ci	      server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN))
23168c2ecf20Sopenharmony_ci		return;
23178c2ecf20Sopenharmony_ci	/* Could check on max_ops, but currently hardcoded high enough */
23188c2ecf20Sopenharmony_ci	if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN))
23198c2ecf20Sopenharmony_ci		return;
23208c2ecf20Sopenharmony_ci	if (data->state)
23218c2ecf20Sopenharmony_ci		_lgopen_prepare_attached(data, ctx);
23228c2ecf20Sopenharmony_ci	else
23238c2ecf20Sopenharmony_ci		_lgopen_prepare_floating(data, ctx);
23248c2ecf20Sopenharmony_ci}
23258c2ecf20Sopenharmony_ci
23268c2ecf20Sopenharmony_civoid pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
23278c2ecf20Sopenharmony_ci		       struct nfs_open_context *ctx)
23288c2ecf20Sopenharmony_ci{
23298c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
23308c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg;
23318c2ecf20Sopenharmony_ci	struct nfs_server *srv = NFS_SERVER(ino);
23328c2ecf20Sopenharmony_ci	u32 iomode;
23338c2ecf20Sopenharmony_ci
23348c2ecf20Sopenharmony_ci	if (!lgp)
23358c2ecf20Sopenharmony_ci		return;
23368c2ecf20Sopenharmony_ci	dprintk("%s: entered with status %i\n", __func__, lgp->res.status);
23378c2ecf20Sopenharmony_ci	if (lgp->res.status) {
23388c2ecf20Sopenharmony_ci		switch (lgp->res.status) {
23398c2ecf20Sopenharmony_ci		default:
23408c2ecf20Sopenharmony_ci			break;
23418c2ecf20Sopenharmony_ci		/*
23428c2ecf20Sopenharmony_ci		 * Halt lgopen attempts if the server doesn't recognise
23438c2ecf20Sopenharmony_ci		 * the "current stateid" value, the layout type, or the
23448c2ecf20Sopenharmony_ci		 * layoutget operation as being valid.
23458c2ecf20Sopenharmony_ci		 * Also if it complains about too many ops in the compound
23468c2ecf20Sopenharmony_ci		 * or of the request/reply being too big.
23478c2ecf20Sopenharmony_ci		 */
23488c2ecf20Sopenharmony_ci		case -NFS4ERR_BAD_STATEID:
23498c2ecf20Sopenharmony_ci		case -NFS4ERR_NOTSUPP:
23508c2ecf20Sopenharmony_ci		case -NFS4ERR_REP_TOO_BIG:
23518c2ecf20Sopenharmony_ci		case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
23528c2ecf20Sopenharmony_ci		case -NFS4ERR_REQ_TOO_BIG:
23538c2ecf20Sopenharmony_ci		case -NFS4ERR_TOO_MANY_OPS:
23548c2ecf20Sopenharmony_ci		case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
23558c2ecf20Sopenharmony_ci			srv->caps &= ~NFS_CAP_LGOPEN;
23568c2ecf20Sopenharmony_ci		}
23578c2ecf20Sopenharmony_ci		return;
23588c2ecf20Sopenharmony_ci	}
23598c2ecf20Sopenharmony_ci	if (!lgp->args.inode) {
23608c2ecf20Sopenharmony_ci		lo = _pnfs_grab_empty_layout(ino, ctx);
23618c2ecf20Sopenharmony_ci		if (!lo)
23628c2ecf20Sopenharmony_ci			return;
23638c2ecf20Sopenharmony_ci		lgp->args.inode = ino;
23648c2ecf20Sopenharmony_ci	} else
23658c2ecf20Sopenharmony_ci		lo = NFS_I(lgp->args.inode)->layout;
23668c2ecf20Sopenharmony_ci
23678c2ecf20Sopenharmony_ci	lseg = pnfs_layout_process(lgp);
23688c2ecf20Sopenharmony_ci	if (!IS_ERR(lseg)) {
23698c2ecf20Sopenharmony_ci		iomode = lgp->args.range.iomode;
23708c2ecf20Sopenharmony_ci		pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
23718c2ecf20Sopenharmony_ci		pnfs_put_lseg(lseg);
23728c2ecf20Sopenharmony_ci	}
23738c2ecf20Sopenharmony_ci}
23748c2ecf20Sopenharmony_ci
23758c2ecf20Sopenharmony_civoid nfs4_lgopen_release(struct nfs4_layoutget *lgp)
23768c2ecf20Sopenharmony_ci{
23778c2ecf20Sopenharmony_ci	if (lgp != NULL) {
23788c2ecf20Sopenharmony_ci		struct inode *inode = lgp->args.inode;
23798c2ecf20Sopenharmony_ci		if (inode) {
23808c2ecf20Sopenharmony_ci			struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
23818c2ecf20Sopenharmony_ci			pnfs_clear_first_layoutget(lo);
23828c2ecf20Sopenharmony_ci			nfs_layoutget_end(lo);
23838c2ecf20Sopenharmony_ci		}
23848c2ecf20Sopenharmony_ci		pnfs_layoutget_free(lgp);
23858c2ecf20Sopenharmony_ci	}
23868c2ecf20Sopenharmony_ci}
23878c2ecf20Sopenharmony_ci
23888c2ecf20Sopenharmony_cistruct pnfs_layout_segment *
23898c2ecf20Sopenharmony_cipnfs_layout_process(struct nfs4_layoutget *lgp)
23908c2ecf20Sopenharmony_ci{
23918c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
23928c2ecf20Sopenharmony_ci	struct nfs4_layoutget_res *res = &lgp->res;
23938c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg;
23948c2ecf20Sopenharmony_ci	struct inode *ino = lo->plh_inode;
23958c2ecf20Sopenharmony_ci	LIST_HEAD(free_me);
23968c2ecf20Sopenharmony_ci
23978c2ecf20Sopenharmony_ci	if (!pnfs_sanity_check_layout_range(&res->range))
23988c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
23998c2ecf20Sopenharmony_ci
24008c2ecf20Sopenharmony_ci	/* Inject layout blob into I/O device driver */
24018c2ecf20Sopenharmony_ci	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
24028c2ecf20Sopenharmony_ci	if (IS_ERR_OR_NULL(lseg)) {
24038c2ecf20Sopenharmony_ci		if (!lseg)
24048c2ecf20Sopenharmony_ci			lseg = ERR_PTR(-ENOMEM);
24058c2ecf20Sopenharmony_ci
24068c2ecf20Sopenharmony_ci		dprintk("%s: Could not allocate layout: error %ld\n",
24078c2ecf20Sopenharmony_ci		       __func__, PTR_ERR(lseg));
24088c2ecf20Sopenharmony_ci		return lseg;
24098c2ecf20Sopenharmony_ci	}
24108c2ecf20Sopenharmony_ci
24118c2ecf20Sopenharmony_ci	pnfs_init_lseg(lo, lseg, &res->range, &res->stateid);
24128c2ecf20Sopenharmony_ci
24138c2ecf20Sopenharmony_ci	spin_lock(&ino->i_lock);
24148c2ecf20Sopenharmony_ci	if (pnfs_layoutgets_blocked(lo)) {
24158c2ecf20Sopenharmony_ci		dprintk("%s forget reply due to state\n", __func__);
24168c2ecf20Sopenharmony_ci		goto out_forget;
24178c2ecf20Sopenharmony_ci	}
24188c2ecf20Sopenharmony_ci
24198c2ecf20Sopenharmony_ci	if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
24208c2ecf20Sopenharmony_ci	    !pnfs_is_first_layoutget(lo))
24218c2ecf20Sopenharmony_ci		goto out_forget;
24228c2ecf20Sopenharmony_ci
24238c2ecf20Sopenharmony_ci	if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
24248c2ecf20Sopenharmony_ci		/* existing state ID, make sure the sequence number matches. */
24258c2ecf20Sopenharmony_ci		if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
24268c2ecf20Sopenharmony_ci			if (!pnfs_layout_is_valid(lo))
24278c2ecf20Sopenharmony_ci				lo->plh_barrier = 0;
24288c2ecf20Sopenharmony_ci			dprintk("%s forget reply due to sequence\n", __func__);
24298c2ecf20Sopenharmony_ci			goto out_forget;
24308c2ecf20Sopenharmony_ci		}
24318c2ecf20Sopenharmony_ci		pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, false);
24328c2ecf20Sopenharmony_ci	} else if (pnfs_layout_is_valid(lo)) {
24338c2ecf20Sopenharmony_ci		/*
24348c2ecf20Sopenharmony_ci		 * We got an entirely new state ID.  Mark all segments for the
24358c2ecf20Sopenharmony_ci		 * inode invalid, and retry the layoutget
24368c2ecf20Sopenharmony_ci		 */
24378c2ecf20Sopenharmony_ci		struct pnfs_layout_range range = {
24388c2ecf20Sopenharmony_ci			.iomode = IOMODE_ANY,
24398c2ecf20Sopenharmony_ci			.length = NFS4_MAX_UINT64,
24408c2ecf20Sopenharmony_ci		};
24418c2ecf20Sopenharmony_ci		pnfs_set_plh_return_info(lo, IOMODE_ANY, 0);
24428c2ecf20Sopenharmony_ci		pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
24438c2ecf20Sopenharmony_ci						&range, 0);
24448c2ecf20Sopenharmony_ci		goto out_forget;
24458c2ecf20Sopenharmony_ci	} else {
24468c2ecf20Sopenharmony_ci		/* We have a completely new layout */
24478c2ecf20Sopenharmony_ci		pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true);
24488c2ecf20Sopenharmony_ci	}
24498c2ecf20Sopenharmony_ci
24508c2ecf20Sopenharmony_ci	pnfs_get_lseg(lseg);
24518c2ecf20Sopenharmony_ci	pnfs_layout_insert_lseg(lo, lseg, &free_me);
24528c2ecf20Sopenharmony_ci
24538c2ecf20Sopenharmony_ci
24548c2ecf20Sopenharmony_ci	if (res->return_on_close)
24558c2ecf20Sopenharmony_ci		set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
24568c2ecf20Sopenharmony_ci
24578c2ecf20Sopenharmony_ci	spin_unlock(&ino->i_lock);
24588c2ecf20Sopenharmony_ci	pnfs_free_lseg_list(&free_me);
24598c2ecf20Sopenharmony_ci	return lseg;
24608c2ecf20Sopenharmony_ci
24618c2ecf20Sopenharmony_ciout_forget:
24628c2ecf20Sopenharmony_ci	spin_unlock(&ino->i_lock);
24638c2ecf20Sopenharmony_ci	lseg->pls_layout = lo;
24648c2ecf20Sopenharmony_ci	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
24658c2ecf20Sopenharmony_ci	pnfs_free_lseg_list(&free_me);
24668c2ecf20Sopenharmony_ci	return ERR_PTR(-EAGAIN);
24678c2ecf20Sopenharmony_ci}
24688c2ecf20Sopenharmony_ci
24698c2ecf20Sopenharmony_ci/**
24708c2ecf20Sopenharmony_ci * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
24718c2ecf20Sopenharmony_ci * @lo: pointer to layout header
24728c2ecf20Sopenharmony_ci * @tmp_list: list header to be used with pnfs_free_lseg_list()
24738c2ecf20Sopenharmony_ci * @return_range: describe layout segment ranges to be returned
24748c2ecf20Sopenharmony_ci * @seq: stateid seqid to match
24758c2ecf20Sopenharmony_ci *
24768c2ecf20Sopenharmony_ci * This function is mainly intended for use by layoutrecall. It attempts
24778c2ecf20Sopenharmony_ci * to free the layout segment immediately, or else to mark it for return
24788c2ecf20Sopenharmony_ci * as soon as its reference count drops to zero.
24798c2ecf20Sopenharmony_ci *
24808c2ecf20Sopenharmony_ci * Returns
24818c2ecf20Sopenharmony_ci * - 0: a layoutreturn needs to be scheduled.
24828c2ecf20Sopenharmony_ci * - EBUSY: there are layout segment that are still in use.
24838c2ecf20Sopenharmony_ci * - ENOENT: there are no layout segments that need to be returned.
24848c2ecf20Sopenharmony_ci */
24858c2ecf20Sopenharmony_ciint
24868c2ecf20Sopenharmony_cipnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
24878c2ecf20Sopenharmony_ci				struct list_head *tmp_list,
24888c2ecf20Sopenharmony_ci				const struct pnfs_layout_range *return_range,
24898c2ecf20Sopenharmony_ci				u32 seq)
24908c2ecf20Sopenharmony_ci{
24918c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg, *next;
24928c2ecf20Sopenharmony_ci	int remaining = 0;
24938c2ecf20Sopenharmony_ci
24948c2ecf20Sopenharmony_ci	dprintk("%s:Begin lo %p\n", __func__, lo);
24958c2ecf20Sopenharmony_ci
24968c2ecf20Sopenharmony_ci	assert_spin_locked(&lo->plh_inode->i_lock);
24978c2ecf20Sopenharmony_ci
24988c2ecf20Sopenharmony_ci	if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
24998c2ecf20Sopenharmony_ci		tmp_list = &lo->plh_return_segs;
25008c2ecf20Sopenharmony_ci
25018c2ecf20Sopenharmony_ci	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
25028c2ecf20Sopenharmony_ci		if (pnfs_match_lseg_recall(lseg, return_range, seq)) {
25038c2ecf20Sopenharmony_ci			dprintk("%s: marking lseg %p iomode %d "
25048c2ecf20Sopenharmony_ci				"offset %llu length %llu\n", __func__,
25058c2ecf20Sopenharmony_ci				lseg, lseg->pls_range.iomode,
25068c2ecf20Sopenharmony_ci				lseg->pls_range.offset,
25078c2ecf20Sopenharmony_ci				lseg->pls_range.length);
25088c2ecf20Sopenharmony_ci			if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
25098c2ecf20Sopenharmony_ci				tmp_list = &lo->plh_return_segs;
25108c2ecf20Sopenharmony_ci			if (mark_lseg_invalid(lseg, tmp_list))
25118c2ecf20Sopenharmony_ci				continue;
25128c2ecf20Sopenharmony_ci			remaining++;
25138c2ecf20Sopenharmony_ci			set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
25148c2ecf20Sopenharmony_ci		}
25158c2ecf20Sopenharmony_ci
25168c2ecf20Sopenharmony_ci	if (remaining) {
25178c2ecf20Sopenharmony_ci		pnfs_set_plh_return_info(lo, return_range->iomode, seq);
25188c2ecf20Sopenharmony_ci		return -EBUSY;
25198c2ecf20Sopenharmony_ci	}
25208c2ecf20Sopenharmony_ci
25218c2ecf20Sopenharmony_ci	if (!list_empty(&lo->plh_return_segs)) {
25228c2ecf20Sopenharmony_ci		pnfs_set_plh_return_info(lo, return_range->iomode, seq);
25238c2ecf20Sopenharmony_ci		return 0;
25248c2ecf20Sopenharmony_ci	}
25258c2ecf20Sopenharmony_ci
25268c2ecf20Sopenharmony_ci	return -ENOENT;
25278c2ecf20Sopenharmony_ci}
25288c2ecf20Sopenharmony_ci
25298c2ecf20Sopenharmony_cistatic void
25308c2ecf20Sopenharmony_cipnfs_mark_layout_for_return(struct inode *inode,
25318c2ecf20Sopenharmony_ci			    const struct pnfs_layout_range *range)
25328c2ecf20Sopenharmony_ci{
25338c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
25348c2ecf20Sopenharmony_ci	bool return_now = false;
25358c2ecf20Sopenharmony_ci
25368c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
25378c2ecf20Sopenharmony_ci	lo = NFS_I(inode)->layout;
25388c2ecf20Sopenharmony_ci	if (!pnfs_layout_is_valid(lo)) {
25398c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
25408c2ecf20Sopenharmony_ci		return;
25418c2ecf20Sopenharmony_ci	}
25428c2ecf20Sopenharmony_ci	pnfs_set_plh_return_info(lo, range->iomode, 0);
25438c2ecf20Sopenharmony_ci	/*
25448c2ecf20Sopenharmony_ci	 * mark all matching lsegs so that we are sure to have no live
25458c2ecf20Sopenharmony_ci	 * segments at hand when sending layoutreturn. See pnfs_put_lseg()
25468c2ecf20Sopenharmony_ci	 * for how it works.
25478c2ecf20Sopenharmony_ci	 */
25488c2ecf20Sopenharmony_ci	if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) {
25498c2ecf20Sopenharmony_ci		const struct cred *cred;
25508c2ecf20Sopenharmony_ci		nfs4_stateid stateid;
25518c2ecf20Sopenharmony_ci		enum pnfs_iomode iomode;
25528c2ecf20Sopenharmony_ci
25538c2ecf20Sopenharmony_ci		return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
25548c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
25558c2ecf20Sopenharmony_ci		if (return_now)
25568c2ecf20Sopenharmony_ci			pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
25578c2ecf20Sopenharmony_ci	} else {
25588c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
25598c2ecf20Sopenharmony_ci		nfs_commit_inode(inode, 0);
25608c2ecf20Sopenharmony_ci	}
25618c2ecf20Sopenharmony_ci}
25628c2ecf20Sopenharmony_ci
25638c2ecf20Sopenharmony_civoid pnfs_error_mark_layout_for_return(struct inode *inode,
25648c2ecf20Sopenharmony_ci				       struct pnfs_layout_segment *lseg)
25658c2ecf20Sopenharmony_ci{
25668c2ecf20Sopenharmony_ci	struct pnfs_layout_range range = {
25678c2ecf20Sopenharmony_ci		.iomode = lseg->pls_range.iomode,
25688c2ecf20Sopenharmony_ci		.offset = 0,
25698c2ecf20Sopenharmony_ci		.length = NFS4_MAX_UINT64,
25708c2ecf20Sopenharmony_ci	};
25718c2ecf20Sopenharmony_ci
25728c2ecf20Sopenharmony_ci	pnfs_mark_layout_for_return(inode, &range);
25738c2ecf20Sopenharmony_ci}
25748c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
25758c2ecf20Sopenharmony_ci
25768c2ecf20Sopenharmony_cistatic bool
25778c2ecf20Sopenharmony_cipnfs_layout_can_be_returned(struct pnfs_layout_hdr *lo)
25788c2ecf20Sopenharmony_ci{
25798c2ecf20Sopenharmony_ci	return pnfs_layout_is_valid(lo) &&
25808c2ecf20Sopenharmony_ci		!test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) &&
25818c2ecf20Sopenharmony_ci		!test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
25828c2ecf20Sopenharmony_ci}
25838c2ecf20Sopenharmony_ci
25848c2ecf20Sopenharmony_cistatic struct pnfs_layout_segment *
25858c2ecf20Sopenharmony_cipnfs_find_first_lseg(struct pnfs_layout_hdr *lo,
25868c2ecf20Sopenharmony_ci		     const struct pnfs_layout_range *range,
25878c2ecf20Sopenharmony_ci		     enum pnfs_iomode iomode)
25888c2ecf20Sopenharmony_ci{
25898c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg;
25908c2ecf20Sopenharmony_ci
25918c2ecf20Sopenharmony_ci	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
25928c2ecf20Sopenharmony_ci		if (!test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
25938c2ecf20Sopenharmony_ci			continue;
25948c2ecf20Sopenharmony_ci		if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
25958c2ecf20Sopenharmony_ci			continue;
25968c2ecf20Sopenharmony_ci		if (lseg->pls_range.iomode != iomode && iomode != IOMODE_ANY)
25978c2ecf20Sopenharmony_ci			continue;
25988c2ecf20Sopenharmony_ci		if (pnfs_lseg_range_intersecting(&lseg->pls_range, range))
25998c2ecf20Sopenharmony_ci			return lseg;
26008c2ecf20Sopenharmony_ci	}
26018c2ecf20Sopenharmony_ci	return NULL;
26028c2ecf20Sopenharmony_ci}
26038c2ecf20Sopenharmony_ci
26048c2ecf20Sopenharmony_ci/* Find open file states whose mode matches that of the range */
26058c2ecf20Sopenharmony_cistatic bool
26068c2ecf20Sopenharmony_cipnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
26078c2ecf20Sopenharmony_ci				 const struct pnfs_layout_range *range)
26088c2ecf20Sopenharmony_ci{
26098c2ecf20Sopenharmony_ci	struct list_head *head;
26108c2ecf20Sopenharmony_ci	struct nfs_open_context *ctx;
26118c2ecf20Sopenharmony_ci	fmode_t mode = 0;
26128c2ecf20Sopenharmony_ci
26138c2ecf20Sopenharmony_ci	if (!pnfs_layout_can_be_returned(lo) ||
26148c2ecf20Sopenharmony_ci	    !pnfs_find_first_lseg(lo, range, range->iomode))
26158c2ecf20Sopenharmony_ci		return false;
26168c2ecf20Sopenharmony_ci
26178c2ecf20Sopenharmony_ci	head = &NFS_I(lo->plh_inode)->open_files;
26188c2ecf20Sopenharmony_ci	list_for_each_entry_rcu(ctx, head, list) {
26198c2ecf20Sopenharmony_ci		if (ctx->state)
26208c2ecf20Sopenharmony_ci			mode |= ctx->state->state & (FMODE_READ|FMODE_WRITE);
26218c2ecf20Sopenharmony_ci	}
26228c2ecf20Sopenharmony_ci
26238c2ecf20Sopenharmony_ci	switch (range->iomode) {
26248c2ecf20Sopenharmony_ci	default:
26258c2ecf20Sopenharmony_ci		break;
26268c2ecf20Sopenharmony_ci	case IOMODE_READ:
26278c2ecf20Sopenharmony_ci		mode &= ~FMODE_WRITE;
26288c2ecf20Sopenharmony_ci		break;
26298c2ecf20Sopenharmony_ci	case IOMODE_RW:
26308c2ecf20Sopenharmony_ci		if (pnfs_find_first_lseg(lo, range, IOMODE_READ))
26318c2ecf20Sopenharmony_ci			mode &= ~FMODE_READ;
26328c2ecf20Sopenharmony_ci	}
26338c2ecf20Sopenharmony_ci	return mode == 0;
26348c2ecf20Sopenharmony_ci}
26358c2ecf20Sopenharmony_ci
26368c2ecf20Sopenharmony_cistatic int pnfs_layout_return_unused_byserver(struct nfs_server *server,
26378c2ecf20Sopenharmony_ci					      void *data)
26388c2ecf20Sopenharmony_ci{
26398c2ecf20Sopenharmony_ci	const struct pnfs_layout_range *range = data;
26408c2ecf20Sopenharmony_ci	const struct cred *cred;
26418c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
26428c2ecf20Sopenharmony_ci	struct inode *inode;
26438c2ecf20Sopenharmony_ci	nfs4_stateid stateid;
26448c2ecf20Sopenharmony_ci	enum pnfs_iomode iomode;
26458c2ecf20Sopenharmony_ci
26468c2ecf20Sopenharmony_cirestart:
26478c2ecf20Sopenharmony_ci	rcu_read_lock();
26488c2ecf20Sopenharmony_ci	list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
26498c2ecf20Sopenharmony_ci		inode = lo->plh_inode;
26508c2ecf20Sopenharmony_ci		if (!inode || !pnfs_layout_can_be_returned(lo) ||
26518c2ecf20Sopenharmony_ci		    test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
26528c2ecf20Sopenharmony_ci			continue;
26538c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
26548c2ecf20Sopenharmony_ci		if (!lo->plh_inode ||
26558c2ecf20Sopenharmony_ci		    !pnfs_should_return_unused_layout(lo, range)) {
26568c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
26578c2ecf20Sopenharmony_ci			continue;
26588c2ecf20Sopenharmony_ci		}
26598c2ecf20Sopenharmony_ci		pnfs_get_layout_hdr(lo);
26608c2ecf20Sopenharmony_ci		pnfs_set_plh_return_info(lo, range->iomode, 0);
26618c2ecf20Sopenharmony_ci		if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
26628c2ecf20Sopenharmony_ci						    range, 0) != 0 ||
26638c2ecf20Sopenharmony_ci		    !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
26648c2ecf20Sopenharmony_ci			spin_unlock(&inode->i_lock);
26658c2ecf20Sopenharmony_ci			rcu_read_unlock();
26668c2ecf20Sopenharmony_ci			pnfs_put_layout_hdr(lo);
26678c2ecf20Sopenharmony_ci			cond_resched();
26688c2ecf20Sopenharmony_ci			goto restart;
26698c2ecf20Sopenharmony_ci		}
26708c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
26718c2ecf20Sopenharmony_ci		rcu_read_unlock();
26728c2ecf20Sopenharmony_ci		pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
26738c2ecf20Sopenharmony_ci		pnfs_put_layout_hdr(lo);
26748c2ecf20Sopenharmony_ci		cond_resched();
26758c2ecf20Sopenharmony_ci		goto restart;
26768c2ecf20Sopenharmony_ci	}
26778c2ecf20Sopenharmony_ci	rcu_read_unlock();
26788c2ecf20Sopenharmony_ci	return 0;
26798c2ecf20Sopenharmony_ci}
26808c2ecf20Sopenharmony_ci
26818c2ecf20Sopenharmony_civoid
26828c2ecf20Sopenharmony_cipnfs_layout_return_unused_byclid(struct nfs_client *clp,
26838c2ecf20Sopenharmony_ci				 enum pnfs_iomode iomode)
26848c2ecf20Sopenharmony_ci{
26858c2ecf20Sopenharmony_ci	struct pnfs_layout_range range = {
26868c2ecf20Sopenharmony_ci		.iomode = iomode,
26878c2ecf20Sopenharmony_ci		.offset = 0,
26888c2ecf20Sopenharmony_ci		.length = NFS4_MAX_UINT64,
26898c2ecf20Sopenharmony_ci	};
26908c2ecf20Sopenharmony_ci
26918c2ecf20Sopenharmony_ci	nfs_client_for_each_server(clp, pnfs_layout_return_unused_byserver,
26928c2ecf20Sopenharmony_ci			&range);
26938c2ecf20Sopenharmony_ci}
26948c2ecf20Sopenharmony_ci
26958c2ecf20Sopenharmony_civoid
26968c2ecf20Sopenharmony_cipnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
26978c2ecf20Sopenharmony_ci{
26988c2ecf20Sopenharmony_ci	if (pgio->pg_lseg == NULL ||
26998c2ecf20Sopenharmony_ci	    test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
27008c2ecf20Sopenharmony_ci		return;
27018c2ecf20Sopenharmony_ci	pnfs_put_lseg(pgio->pg_lseg);
27028c2ecf20Sopenharmony_ci	pgio->pg_lseg = NULL;
27038c2ecf20Sopenharmony_ci}
27048c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
27058c2ecf20Sopenharmony_ci
27068c2ecf20Sopenharmony_ci/*
27078c2ecf20Sopenharmony_ci * Check for any intersection between the request and the pgio->pg_lseg,
27088c2ecf20Sopenharmony_ci * and if none, put this pgio->pg_lseg away.
27098c2ecf20Sopenharmony_ci */
27108c2ecf20Sopenharmony_civoid
27118c2ecf20Sopenharmony_cipnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
27128c2ecf20Sopenharmony_ci{
27138c2ecf20Sopenharmony_ci	if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
27148c2ecf20Sopenharmony_ci		pnfs_put_lseg(pgio->pg_lseg);
27158c2ecf20Sopenharmony_ci		pgio->pg_lseg = NULL;
27168c2ecf20Sopenharmony_ci	}
27178c2ecf20Sopenharmony_ci}
27188c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range);
27198c2ecf20Sopenharmony_ci
27208c2ecf20Sopenharmony_civoid
27218c2ecf20Sopenharmony_cipnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
27228c2ecf20Sopenharmony_ci{
27238c2ecf20Sopenharmony_ci	u64 rd_size = req->wb_bytes;
27248c2ecf20Sopenharmony_ci
27258c2ecf20Sopenharmony_ci	pnfs_generic_pg_check_layout(pgio);
27268c2ecf20Sopenharmony_ci	pnfs_generic_pg_check_range(pgio, req);
27278c2ecf20Sopenharmony_ci	if (pgio->pg_lseg == NULL) {
27288c2ecf20Sopenharmony_ci		if (pgio->pg_dreq == NULL)
27298c2ecf20Sopenharmony_ci			rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
27308c2ecf20Sopenharmony_ci		else
27318c2ecf20Sopenharmony_ci			rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
27328c2ecf20Sopenharmony_ci
27338c2ecf20Sopenharmony_ci		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
27348c2ecf20Sopenharmony_ci						   nfs_req_openctx(req),
27358c2ecf20Sopenharmony_ci						   req_offset(req),
27368c2ecf20Sopenharmony_ci						   rd_size,
27378c2ecf20Sopenharmony_ci						   IOMODE_READ,
27388c2ecf20Sopenharmony_ci						   false,
27398c2ecf20Sopenharmony_ci						   GFP_KERNEL);
27408c2ecf20Sopenharmony_ci		if (IS_ERR(pgio->pg_lseg)) {
27418c2ecf20Sopenharmony_ci			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
27428c2ecf20Sopenharmony_ci			pgio->pg_lseg = NULL;
27438c2ecf20Sopenharmony_ci			return;
27448c2ecf20Sopenharmony_ci		}
27458c2ecf20Sopenharmony_ci	}
27468c2ecf20Sopenharmony_ci	/* If no lseg, fall back to read through mds */
27478c2ecf20Sopenharmony_ci	if (pgio->pg_lseg == NULL)
27488c2ecf20Sopenharmony_ci		nfs_pageio_reset_read_mds(pgio);
27498c2ecf20Sopenharmony_ci
27508c2ecf20Sopenharmony_ci}
27518c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
27528c2ecf20Sopenharmony_ci
27538c2ecf20Sopenharmony_civoid
27548c2ecf20Sopenharmony_cipnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
27558c2ecf20Sopenharmony_ci			   struct nfs_page *req, u64 wb_size)
27568c2ecf20Sopenharmony_ci{
27578c2ecf20Sopenharmony_ci	pnfs_generic_pg_check_layout(pgio);
27588c2ecf20Sopenharmony_ci	pnfs_generic_pg_check_range(pgio, req);
27598c2ecf20Sopenharmony_ci	if (pgio->pg_lseg == NULL) {
27608c2ecf20Sopenharmony_ci		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
27618c2ecf20Sopenharmony_ci						   nfs_req_openctx(req),
27628c2ecf20Sopenharmony_ci						   req_offset(req),
27638c2ecf20Sopenharmony_ci						   wb_size,
27648c2ecf20Sopenharmony_ci						   IOMODE_RW,
27658c2ecf20Sopenharmony_ci						   false,
27668c2ecf20Sopenharmony_ci						   GFP_KERNEL);
27678c2ecf20Sopenharmony_ci		if (IS_ERR(pgio->pg_lseg)) {
27688c2ecf20Sopenharmony_ci			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
27698c2ecf20Sopenharmony_ci			pgio->pg_lseg = NULL;
27708c2ecf20Sopenharmony_ci			return;
27718c2ecf20Sopenharmony_ci		}
27728c2ecf20Sopenharmony_ci	}
27738c2ecf20Sopenharmony_ci	/* If no lseg, fall back to write through mds */
27748c2ecf20Sopenharmony_ci	if (pgio->pg_lseg == NULL)
27758c2ecf20Sopenharmony_ci		nfs_pageio_reset_write_mds(pgio);
27768c2ecf20Sopenharmony_ci}
27778c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
27788c2ecf20Sopenharmony_ci
27798c2ecf20Sopenharmony_civoid
27808c2ecf20Sopenharmony_cipnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc)
27818c2ecf20Sopenharmony_ci{
27828c2ecf20Sopenharmony_ci	if (desc->pg_lseg) {
27838c2ecf20Sopenharmony_ci		pnfs_put_lseg(desc->pg_lseg);
27848c2ecf20Sopenharmony_ci		desc->pg_lseg = NULL;
27858c2ecf20Sopenharmony_ci	}
27868c2ecf20Sopenharmony_ci}
27878c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup);
27888c2ecf20Sopenharmony_ci
27898c2ecf20Sopenharmony_ci/*
27908c2ecf20Sopenharmony_ci * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
27918c2ecf20Sopenharmony_ci * of bytes (maximum @req->wb_bytes) that can be coalesced.
27928c2ecf20Sopenharmony_ci */
27938c2ecf20Sopenharmony_cisize_t
27948c2ecf20Sopenharmony_cipnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
27958c2ecf20Sopenharmony_ci		     struct nfs_page *prev, struct nfs_page *req)
27968c2ecf20Sopenharmony_ci{
27978c2ecf20Sopenharmony_ci	unsigned int size;
27988c2ecf20Sopenharmony_ci	u64 seg_end, req_start, seg_left;
27998c2ecf20Sopenharmony_ci
28008c2ecf20Sopenharmony_ci	size = nfs_generic_pg_test(pgio, prev, req);
28018c2ecf20Sopenharmony_ci	if (!size)
28028c2ecf20Sopenharmony_ci		return 0;
28038c2ecf20Sopenharmony_ci
28048c2ecf20Sopenharmony_ci	/*
28058c2ecf20Sopenharmony_ci	 * 'size' contains the number of bytes left in the current page (up
28068c2ecf20Sopenharmony_ci	 * to the original size asked for in @req->wb_bytes).
28078c2ecf20Sopenharmony_ci	 *
28088c2ecf20Sopenharmony_ci	 * Calculate how many bytes are left in the layout segment
28098c2ecf20Sopenharmony_ci	 * and if there are less bytes than 'size', return that instead.
28108c2ecf20Sopenharmony_ci	 *
28118c2ecf20Sopenharmony_ci	 * Please also note that 'end_offset' is actually the offset of the
28128c2ecf20Sopenharmony_ci	 * first byte that lies outside the pnfs_layout_range. FIXME?
28138c2ecf20Sopenharmony_ci	 *
28148c2ecf20Sopenharmony_ci	 */
28158c2ecf20Sopenharmony_ci	if (pgio->pg_lseg) {
28168c2ecf20Sopenharmony_ci		seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset,
28178c2ecf20Sopenharmony_ci				     pgio->pg_lseg->pls_range.length);
28188c2ecf20Sopenharmony_ci		req_start = req_offset(req);
28198c2ecf20Sopenharmony_ci
28208c2ecf20Sopenharmony_ci		/* start of request is past the last byte of this segment */
28218c2ecf20Sopenharmony_ci		if (req_start >= seg_end)
28228c2ecf20Sopenharmony_ci			return 0;
28238c2ecf20Sopenharmony_ci
28248c2ecf20Sopenharmony_ci		/* adjust 'size' iff there are fewer bytes left in the
28258c2ecf20Sopenharmony_ci		 * segment than what nfs_generic_pg_test returned */
28268c2ecf20Sopenharmony_ci		seg_left = seg_end - req_start;
28278c2ecf20Sopenharmony_ci		if (seg_left < size)
28288c2ecf20Sopenharmony_ci			size = (unsigned int)seg_left;
28298c2ecf20Sopenharmony_ci	}
28308c2ecf20Sopenharmony_ci
28318c2ecf20Sopenharmony_ci	return size;
28328c2ecf20Sopenharmony_ci}
28338c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
28348c2ecf20Sopenharmony_ci
28358c2ecf20Sopenharmony_ciint pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
28368c2ecf20Sopenharmony_ci{
28378c2ecf20Sopenharmony_ci	struct nfs_pageio_descriptor pgio;
28388c2ecf20Sopenharmony_ci
28398c2ecf20Sopenharmony_ci	/* Resend all requests through the MDS */
28408c2ecf20Sopenharmony_ci	nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
28418c2ecf20Sopenharmony_ci			      hdr->completion_ops);
28428c2ecf20Sopenharmony_ci	set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags);
28438c2ecf20Sopenharmony_ci	return nfs_pageio_resend(&pgio, hdr);
28448c2ecf20Sopenharmony_ci}
28458c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
28468c2ecf20Sopenharmony_ci
28478c2ecf20Sopenharmony_cistatic void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
28488c2ecf20Sopenharmony_ci{
28498c2ecf20Sopenharmony_ci
28508c2ecf20Sopenharmony_ci	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
28518c2ecf20Sopenharmony_ci	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
28528c2ecf20Sopenharmony_ci	    PNFS_LAYOUTRET_ON_ERROR) {
28538c2ecf20Sopenharmony_ci		pnfs_return_layout(hdr->inode);
28548c2ecf20Sopenharmony_ci	}
28558c2ecf20Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
28568c2ecf20Sopenharmony_ci		hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
28578c2ecf20Sopenharmony_ci}
28588c2ecf20Sopenharmony_ci
28598c2ecf20Sopenharmony_ci/*
28608c2ecf20Sopenharmony_ci * Called by non rpc-based layout drivers
28618c2ecf20Sopenharmony_ci */
28628c2ecf20Sopenharmony_civoid pnfs_ld_write_done(struct nfs_pgio_header *hdr)
28638c2ecf20Sopenharmony_ci{
28648c2ecf20Sopenharmony_ci	if (likely(!hdr->pnfs_error)) {
28658c2ecf20Sopenharmony_ci		pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
28668c2ecf20Sopenharmony_ci				hdr->mds_offset + hdr->res.count);
28678c2ecf20Sopenharmony_ci		hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
28688c2ecf20Sopenharmony_ci	}
28698c2ecf20Sopenharmony_ci	trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
28708c2ecf20Sopenharmony_ci	if (unlikely(hdr->pnfs_error))
28718c2ecf20Sopenharmony_ci		pnfs_ld_handle_write_error(hdr);
28728c2ecf20Sopenharmony_ci	hdr->mds_ops->rpc_release(hdr);
28738c2ecf20Sopenharmony_ci}
28748c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_ld_write_done);
28758c2ecf20Sopenharmony_ci
28768c2ecf20Sopenharmony_cistatic void
28778c2ecf20Sopenharmony_cipnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
28788c2ecf20Sopenharmony_ci		struct nfs_pgio_header *hdr)
28798c2ecf20Sopenharmony_ci{
28808c2ecf20Sopenharmony_ci	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
28818c2ecf20Sopenharmony_ci
28828c2ecf20Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
28838c2ecf20Sopenharmony_ci		list_splice_tail_init(&hdr->pages, &mirror->pg_list);
28848c2ecf20Sopenharmony_ci		nfs_pageio_reset_write_mds(desc);
28858c2ecf20Sopenharmony_ci		mirror->pg_recoalesce = 1;
28868c2ecf20Sopenharmony_ci	}
28878c2ecf20Sopenharmony_ci	hdr->completion_ops->completion(hdr);
28888c2ecf20Sopenharmony_ci}
28898c2ecf20Sopenharmony_ci
28908c2ecf20Sopenharmony_cistatic enum pnfs_try_status
28918c2ecf20Sopenharmony_cipnfs_try_to_write_data(struct nfs_pgio_header *hdr,
28928c2ecf20Sopenharmony_ci			const struct rpc_call_ops *call_ops,
28938c2ecf20Sopenharmony_ci			struct pnfs_layout_segment *lseg,
28948c2ecf20Sopenharmony_ci			int how)
28958c2ecf20Sopenharmony_ci{
28968c2ecf20Sopenharmony_ci	struct inode *inode = hdr->inode;
28978c2ecf20Sopenharmony_ci	enum pnfs_try_status trypnfs;
28988c2ecf20Sopenharmony_ci	struct nfs_server *nfss = NFS_SERVER(inode);
28998c2ecf20Sopenharmony_ci
29008c2ecf20Sopenharmony_ci	hdr->mds_ops = call_ops;
29018c2ecf20Sopenharmony_ci
29028c2ecf20Sopenharmony_ci	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
29038c2ecf20Sopenharmony_ci		inode->i_ino, hdr->args.count, hdr->args.offset, how);
29048c2ecf20Sopenharmony_ci	trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
29058c2ecf20Sopenharmony_ci	if (trypnfs != PNFS_NOT_ATTEMPTED)
29068c2ecf20Sopenharmony_ci		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
29078c2ecf20Sopenharmony_ci	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
29088c2ecf20Sopenharmony_ci	return trypnfs;
29098c2ecf20Sopenharmony_ci}
29108c2ecf20Sopenharmony_ci
29118c2ecf20Sopenharmony_cistatic void
29128c2ecf20Sopenharmony_cipnfs_do_write(struct nfs_pageio_descriptor *desc,
29138c2ecf20Sopenharmony_ci	      struct nfs_pgio_header *hdr, int how)
29148c2ecf20Sopenharmony_ci{
29158c2ecf20Sopenharmony_ci	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
29168c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg = desc->pg_lseg;
29178c2ecf20Sopenharmony_ci	enum pnfs_try_status trypnfs;
29188c2ecf20Sopenharmony_ci
29198c2ecf20Sopenharmony_ci	trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
29208c2ecf20Sopenharmony_ci	switch (trypnfs) {
29218c2ecf20Sopenharmony_ci	case PNFS_NOT_ATTEMPTED:
29228c2ecf20Sopenharmony_ci		pnfs_write_through_mds(desc, hdr);
29238c2ecf20Sopenharmony_ci	case PNFS_ATTEMPTED:
29248c2ecf20Sopenharmony_ci		break;
29258c2ecf20Sopenharmony_ci	case PNFS_TRY_AGAIN:
29268c2ecf20Sopenharmony_ci		/* cleanup hdr and prepare to redo pnfs */
29278c2ecf20Sopenharmony_ci		if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
29288c2ecf20Sopenharmony_ci			struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
29298c2ecf20Sopenharmony_ci			list_splice_init(&hdr->pages, &mirror->pg_list);
29308c2ecf20Sopenharmony_ci			mirror->pg_recoalesce = 1;
29318c2ecf20Sopenharmony_ci		}
29328c2ecf20Sopenharmony_ci		hdr->mds_ops->rpc_release(hdr);
29338c2ecf20Sopenharmony_ci	}
29348c2ecf20Sopenharmony_ci}
29358c2ecf20Sopenharmony_ci
29368c2ecf20Sopenharmony_cistatic void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
29378c2ecf20Sopenharmony_ci{
29388c2ecf20Sopenharmony_ci	pnfs_put_lseg(hdr->lseg);
29398c2ecf20Sopenharmony_ci	nfs_pgio_header_free(hdr);
29408c2ecf20Sopenharmony_ci}
29418c2ecf20Sopenharmony_ci
29428c2ecf20Sopenharmony_ciint
29438c2ecf20Sopenharmony_cipnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
29448c2ecf20Sopenharmony_ci{
29458c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr;
29468c2ecf20Sopenharmony_ci	int ret;
29478c2ecf20Sopenharmony_ci
29488c2ecf20Sopenharmony_ci	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
29498c2ecf20Sopenharmony_ci	if (!hdr) {
29508c2ecf20Sopenharmony_ci		desc->pg_error = -ENOMEM;
29518c2ecf20Sopenharmony_ci		return desc->pg_error;
29528c2ecf20Sopenharmony_ci	}
29538c2ecf20Sopenharmony_ci	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
29548c2ecf20Sopenharmony_ci
29558c2ecf20Sopenharmony_ci	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
29568c2ecf20Sopenharmony_ci	ret = nfs_generic_pgio(desc, hdr);
29578c2ecf20Sopenharmony_ci	if (!ret)
29588c2ecf20Sopenharmony_ci		pnfs_do_write(desc, hdr, desc->pg_ioflags);
29598c2ecf20Sopenharmony_ci
29608c2ecf20Sopenharmony_ci	return ret;
29618c2ecf20Sopenharmony_ci}
29628c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
29638c2ecf20Sopenharmony_ci
29648c2ecf20Sopenharmony_ciint pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
29658c2ecf20Sopenharmony_ci{
29668c2ecf20Sopenharmony_ci	struct nfs_pageio_descriptor pgio;
29678c2ecf20Sopenharmony_ci
29688c2ecf20Sopenharmony_ci	/* Resend all requests through the MDS */
29698c2ecf20Sopenharmony_ci	nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
29708c2ecf20Sopenharmony_ci	return nfs_pageio_resend(&pgio, hdr);
29718c2ecf20Sopenharmony_ci}
29728c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
29738c2ecf20Sopenharmony_ci
29748c2ecf20Sopenharmony_cistatic void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
29758c2ecf20Sopenharmony_ci{
29768c2ecf20Sopenharmony_ci	dprintk("pnfs read error = %d\n", hdr->pnfs_error);
29778c2ecf20Sopenharmony_ci	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
29788c2ecf20Sopenharmony_ci	    PNFS_LAYOUTRET_ON_ERROR) {
29798c2ecf20Sopenharmony_ci		pnfs_return_layout(hdr->inode);
29808c2ecf20Sopenharmony_ci	}
29818c2ecf20Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
29828c2ecf20Sopenharmony_ci		hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
29838c2ecf20Sopenharmony_ci}
29848c2ecf20Sopenharmony_ci
29858c2ecf20Sopenharmony_ci/*
29868c2ecf20Sopenharmony_ci * Called by non rpc-based layout drivers
29878c2ecf20Sopenharmony_ci */
29888c2ecf20Sopenharmony_civoid pnfs_ld_read_done(struct nfs_pgio_header *hdr)
29898c2ecf20Sopenharmony_ci{
29908c2ecf20Sopenharmony_ci	if (likely(!hdr->pnfs_error))
29918c2ecf20Sopenharmony_ci		hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
29928c2ecf20Sopenharmony_ci	trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
29938c2ecf20Sopenharmony_ci	if (unlikely(hdr->pnfs_error))
29948c2ecf20Sopenharmony_ci		pnfs_ld_handle_read_error(hdr);
29958c2ecf20Sopenharmony_ci	hdr->mds_ops->rpc_release(hdr);
29968c2ecf20Sopenharmony_ci}
29978c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_ld_read_done);
29988c2ecf20Sopenharmony_ci
29998c2ecf20Sopenharmony_cistatic void
30008c2ecf20Sopenharmony_cipnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
30018c2ecf20Sopenharmony_ci		struct nfs_pgio_header *hdr)
30028c2ecf20Sopenharmony_ci{
30038c2ecf20Sopenharmony_ci	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
30048c2ecf20Sopenharmony_ci
30058c2ecf20Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
30068c2ecf20Sopenharmony_ci		list_splice_tail_init(&hdr->pages, &mirror->pg_list);
30078c2ecf20Sopenharmony_ci		nfs_pageio_reset_read_mds(desc);
30088c2ecf20Sopenharmony_ci		mirror->pg_recoalesce = 1;
30098c2ecf20Sopenharmony_ci	}
30108c2ecf20Sopenharmony_ci	hdr->completion_ops->completion(hdr);
30118c2ecf20Sopenharmony_ci}
30128c2ecf20Sopenharmony_ci
30138c2ecf20Sopenharmony_ci/*
30148c2ecf20Sopenharmony_ci * Call the appropriate parallel I/O subsystem read function.
30158c2ecf20Sopenharmony_ci */
30168c2ecf20Sopenharmony_cistatic enum pnfs_try_status
30178c2ecf20Sopenharmony_cipnfs_try_to_read_data(struct nfs_pgio_header *hdr,
30188c2ecf20Sopenharmony_ci		       const struct rpc_call_ops *call_ops,
30198c2ecf20Sopenharmony_ci		       struct pnfs_layout_segment *lseg)
30208c2ecf20Sopenharmony_ci{
30218c2ecf20Sopenharmony_ci	struct inode *inode = hdr->inode;
30228c2ecf20Sopenharmony_ci	struct nfs_server *nfss = NFS_SERVER(inode);
30238c2ecf20Sopenharmony_ci	enum pnfs_try_status trypnfs;
30248c2ecf20Sopenharmony_ci
30258c2ecf20Sopenharmony_ci	hdr->mds_ops = call_ops;
30268c2ecf20Sopenharmony_ci
30278c2ecf20Sopenharmony_ci	dprintk("%s: Reading ino:%lu %u@%llu\n",
30288c2ecf20Sopenharmony_ci		__func__, inode->i_ino, hdr->args.count, hdr->args.offset);
30298c2ecf20Sopenharmony_ci
30308c2ecf20Sopenharmony_ci	trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
30318c2ecf20Sopenharmony_ci	if (trypnfs != PNFS_NOT_ATTEMPTED)
30328c2ecf20Sopenharmony_ci		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
30338c2ecf20Sopenharmony_ci	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
30348c2ecf20Sopenharmony_ci	return trypnfs;
30358c2ecf20Sopenharmony_ci}
30368c2ecf20Sopenharmony_ci
30378c2ecf20Sopenharmony_ci/* Resend all requests through pnfs. */
30388c2ecf20Sopenharmony_civoid pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr,
30398c2ecf20Sopenharmony_ci			   unsigned int mirror_idx)
30408c2ecf20Sopenharmony_ci{
30418c2ecf20Sopenharmony_ci	struct nfs_pageio_descriptor pgio;
30428c2ecf20Sopenharmony_ci
30438c2ecf20Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
30448c2ecf20Sopenharmony_ci		/* Prevent deadlocks with layoutreturn! */
30458c2ecf20Sopenharmony_ci		pnfs_put_lseg(hdr->lseg);
30468c2ecf20Sopenharmony_ci		hdr->lseg = NULL;
30478c2ecf20Sopenharmony_ci
30488c2ecf20Sopenharmony_ci		nfs_pageio_init_read(&pgio, hdr->inode, false,
30498c2ecf20Sopenharmony_ci					hdr->completion_ops);
30508c2ecf20Sopenharmony_ci		pgio.pg_mirror_idx = mirror_idx;
30518c2ecf20Sopenharmony_ci		hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr);
30528c2ecf20Sopenharmony_ci	}
30538c2ecf20Sopenharmony_ci}
30548c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs);
30558c2ecf20Sopenharmony_ci
30568c2ecf20Sopenharmony_cistatic void
30578c2ecf20Sopenharmony_cipnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
30588c2ecf20Sopenharmony_ci{
30598c2ecf20Sopenharmony_ci	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
30608c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg = desc->pg_lseg;
30618c2ecf20Sopenharmony_ci	enum pnfs_try_status trypnfs;
30628c2ecf20Sopenharmony_ci
30638c2ecf20Sopenharmony_ci	trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
30648c2ecf20Sopenharmony_ci	switch (trypnfs) {
30658c2ecf20Sopenharmony_ci	case PNFS_NOT_ATTEMPTED:
30668c2ecf20Sopenharmony_ci		pnfs_read_through_mds(desc, hdr);
30678c2ecf20Sopenharmony_ci	case PNFS_ATTEMPTED:
30688c2ecf20Sopenharmony_ci		break;
30698c2ecf20Sopenharmony_ci	case PNFS_TRY_AGAIN:
30708c2ecf20Sopenharmony_ci		/* cleanup hdr and prepare to redo pnfs */
30718c2ecf20Sopenharmony_ci		if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
30728c2ecf20Sopenharmony_ci			struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
30738c2ecf20Sopenharmony_ci			list_splice_init(&hdr->pages, &mirror->pg_list);
30748c2ecf20Sopenharmony_ci			mirror->pg_recoalesce = 1;
30758c2ecf20Sopenharmony_ci		}
30768c2ecf20Sopenharmony_ci		hdr->mds_ops->rpc_release(hdr);
30778c2ecf20Sopenharmony_ci	}
30788c2ecf20Sopenharmony_ci}
30798c2ecf20Sopenharmony_ci
30808c2ecf20Sopenharmony_cistatic void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
30818c2ecf20Sopenharmony_ci{
30828c2ecf20Sopenharmony_ci	pnfs_put_lseg(hdr->lseg);
30838c2ecf20Sopenharmony_ci	nfs_pgio_header_free(hdr);
30848c2ecf20Sopenharmony_ci}
30858c2ecf20Sopenharmony_ci
30868c2ecf20Sopenharmony_ciint
30878c2ecf20Sopenharmony_cipnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
30888c2ecf20Sopenharmony_ci{
30898c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr;
30908c2ecf20Sopenharmony_ci	int ret;
30918c2ecf20Sopenharmony_ci
30928c2ecf20Sopenharmony_ci	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
30938c2ecf20Sopenharmony_ci	if (!hdr) {
30948c2ecf20Sopenharmony_ci		desc->pg_error = -ENOMEM;
30958c2ecf20Sopenharmony_ci		return desc->pg_error;
30968c2ecf20Sopenharmony_ci	}
30978c2ecf20Sopenharmony_ci	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
30988c2ecf20Sopenharmony_ci	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
30998c2ecf20Sopenharmony_ci	ret = nfs_generic_pgio(desc, hdr);
31008c2ecf20Sopenharmony_ci	if (!ret)
31018c2ecf20Sopenharmony_ci		pnfs_do_read(desc, hdr);
31028c2ecf20Sopenharmony_ci	return ret;
31038c2ecf20Sopenharmony_ci}
31048c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
31058c2ecf20Sopenharmony_ci
31068c2ecf20Sopenharmony_cistatic void pnfs_clear_layoutcommitting(struct inode *inode)
31078c2ecf20Sopenharmony_ci{
31088c2ecf20Sopenharmony_ci	unsigned long *bitlock = &NFS_I(inode)->flags;
31098c2ecf20Sopenharmony_ci
31108c2ecf20Sopenharmony_ci	clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
31118c2ecf20Sopenharmony_ci	smp_mb__after_atomic();
31128c2ecf20Sopenharmony_ci	wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
31138c2ecf20Sopenharmony_ci}
31148c2ecf20Sopenharmony_ci
31158c2ecf20Sopenharmony_ci/*
31168c2ecf20Sopenharmony_ci * There can be multiple RW segments.
31178c2ecf20Sopenharmony_ci */
31188c2ecf20Sopenharmony_cistatic void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
31198c2ecf20Sopenharmony_ci{
31208c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg;
31218c2ecf20Sopenharmony_ci
31228c2ecf20Sopenharmony_ci	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
31238c2ecf20Sopenharmony_ci		if (lseg->pls_range.iomode == IOMODE_RW &&
31248c2ecf20Sopenharmony_ci		    test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
31258c2ecf20Sopenharmony_ci			list_add(&lseg->pls_lc_list, listp);
31268c2ecf20Sopenharmony_ci	}
31278c2ecf20Sopenharmony_ci}
31288c2ecf20Sopenharmony_ci
31298c2ecf20Sopenharmony_cistatic void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
31308c2ecf20Sopenharmony_ci{
31318c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg, *tmp;
31328c2ecf20Sopenharmony_ci
31338c2ecf20Sopenharmony_ci	/* Matched by references in pnfs_set_layoutcommit */
31348c2ecf20Sopenharmony_ci	list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
31358c2ecf20Sopenharmony_ci		list_del_init(&lseg->pls_lc_list);
31368c2ecf20Sopenharmony_ci		pnfs_put_lseg(lseg);
31378c2ecf20Sopenharmony_ci	}
31388c2ecf20Sopenharmony_ci
31398c2ecf20Sopenharmony_ci	pnfs_clear_layoutcommitting(inode);
31408c2ecf20Sopenharmony_ci}
31418c2ecf20Sopenharmony_ci
31428c2ecf20Sopenharmony_civoid pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
31438c2ecf20Sopenharmony_ci{
31448c2ecf20Sopenharmony_ci	pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
31458c2ecf20Sopenharmony_ci}
31468c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
31478c2ecf20Sopenharmony_ci
31488c2ecf20Sopenharmony_civoid
31498c2ecf20Sopenharmony_cipnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg,
31508c2ecf20Sopenharmony_ci		loff_t end_pos)
31518c2ecf20Sopenharmony_ci{
31528c2ecf20Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(inode);
31538c2ecf20Sopenharmony_ci	bool mark_as_dirty = false;
31548c2ecf20Sopenharmony_ci
31558c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
31568c2ecf20Sopenharmony_ci	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
31578c2ecf20Sopenharmony_ci		nfsi->layout->plh_lwb = end_pos;
31588c2ecf20Sopenharmony_ci		mark_as_dirty = true;
31598c2ecf20Sopenharmony_ci		dprintk("%s: Set layoutcommit for inode %lu ",
31608c2ecf20Sopenharmony_ci			__func__, inode->i_ino);
31618c2ecf20Sopenharmony_ci	} else if (end_pos > nfsi->layout->plh_lwb)
31628c2ecf20Sopenharmony_ci		nfsi->layout->plh_lwb = end_pos;
31638c2ecf20Sopenharmony_ci	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) {
31648c2ecf20Sopenharmony_ci		/* references matched in nfs4_layoutcommit_release */
31658c2ecf20Sopenharmony_ci		pnfs_get_lseg(lseg);
31668c2ecf20Sopenharmony_ci	}
31678c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
31688c2ecf20Sopenharmony_ci	dprintk("%s: lseg %p end_pos %llu\n",
31698c2ecf20Sopenharmony_ci		__func__, lseg, nfsi->layout->plh_lwb);
31708c2ecf20Sopenharmony_ci
31718c2ecf20Sopenharmony_ci	/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
31728c2ecf20Sopenharmony_ci	 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
31738c2ecf20Sopenharmony_ci	if (mark_as_dirty)
31748c2ecf20Sopenharmony_ci		mark_inode_dirty_sync(inode);
31758c2ecf20Sopenharmony_ci}
31768c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
31778c2ecf20Sopenharmony_ci
31788c2ecf20Sopenharmony_civoid pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
31798c2ecf20Sopenharmony_ci{
31808c2ecf20Sopenharmony_ci	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
31818c2ecf20Sopenharmony_ci
31828c2ecf20Sopenharmony_ci	if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
31838c2ecf20Sopenharmony_ci		nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
31848c2ecf20Sopenharmony_ci	pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
31858c2ecf20Sopenharmony_ci}
31868c2ecf20Sopenharmony_ci
31878c2ecf20Sopenharmony_ci/*
31888c2ecf20Sopenharmony_ci * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
31898c2ecf20Sopenharmony_ci * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
31908c2ecf20Sopenharmony_ci * data to disk to allow the server to recover the data if it crashes.
31918c2ecf20Sopenharmony_ci * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
31928c2ecf20Sopenharmony_ci * is off, and a COMMIT is sent to a data server, or
31938c2ecf20Sopenharmony_ci * if WRITEs to a data server return NFS_DATA_SYNC.
31948c2ecf20Sopenharmony_ci */
31958c2ecf20Sopenharmony_ciint
31968c2ecf20Sopenharmony_cipnfs_layoutcommit_inode(struct inode *inode, bool sync)
31978c2ecf20Sopenharmony_ci{
31988c2ecf20Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
31998c2ecf20Sopenharmony_ci	struct nfs4_layoutcommit_data *data;
32008c2ecf20Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(inode);
32018c2ecf20Sopenharmony_ci	loff_t end_pos;
32028c2ecf20Sopenharmony_ci	int status;
32038c2ecf20Sopenharmony_ci
32048c2ecf20Sopenharmony_ci	if (!pnfs_layoutcommit_outstanding(inode))
32058c2ecf20Sopenharmony_ci		return 0;
32068c2ecf20Sopenharmony_ci
32078c2ecf20Sopenharmony_ci	dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
32088c2ecf20Sopenharmony_ci
32098c2ecf20Sopenharmony_ci	status = -EAGAIN;
32108c2ecf20Sopenharmony_ci	if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
32118c2ecf20Sopenharmony_ci		if (!sync)
32128c2ecf20Sopenharmony_ci			goto out;
32138c2ecf20Sopenharmony_ci		status = wait_on_bit_lock_action(&nfsi->flags,
32148c2ecf20Sopenharmony_ci				NFS_INO_LAYOUTCOMMITTING,
32158c2ecf20Sopenharmony_ci				nfs_wait_bit_killable,
32168c2ecf20Sopenharmony_ci				TASK_KILLABLE);
32178c2ecf20Sopenharmony_ci		if (status)
32188c2ecf20Sopenharmony_ci			goto out;
32198c2ecf20Sopenharmony_ci	}
32208c2ecf20Sopenharmony_ci
32218c2ecf20Sopenharmony_ci	status = -ENOMEM;
32228c2ecf20Sopenharmony_ci	/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
32238c2ecf20Sopenharmony_ci	data = kzalloc(sizeof(*data), GFP_NOFS);
32248c2ecf20Sopenharmony_ci	if (!data)
32258c2ecf20Sopenharmony_ci		goto clear_layoutcommitting;
32268c2ecf20Sopenharmony_ci
32278c2ecf20Sopenharmony_ci	status = 0;
32288c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
32298c2ecf20Sopenharmony_ci	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
32308c2ecf20Sopenharmony_ci		goto out_unlock;
32318c2ecf20Sopenharmony_ci
32328c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&data->lseg_list);
32338c2ecf20Sopenharmony_ci	pnfs_list_write_lseg(inode, &data->lseg_list);
32348c2ecf20Sopenharmony_ci
32358c2ecf20Sopenharmony_ci	end_pos = nfsi->layout->plh_lwb;
32368c2ecf20Sopenharmony_ci
32378c2ecf20Sopenharmony_ci	nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
32388c2ecf20Sopenharmony_ci	data->cred = get_cred(nfsi->layout->plh_lc_cred);
32398c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
32408c2ecf20Sopenharmony_ci
32418c2ecf20Sopenharmony_ci	data->args.inode = inode;
32428c2ecf20Sopenharmony_ci	nfs_fattr_init(&data->fattr);
32438c2ecf20Sopenharmony_ci	data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
32448c2ecf20Sopenharmony_ci	data->res.fattr = &data->fattr;
32458c2ecf20Sopenharmony_ci	if (end_pos != 0)
32468c2ecf20Sopenharmony_ci		data->args.lastbytewritten = end_pos - 1;
32478c2ecf20Sopenharmony_ci	else
32488c2ecf20Sopenharmony_ci		data->args.lastbytewritten = U64_MAX;
32498c2ecf20Sopenharmony_ci	data->res.server = NFS_SERVER(inode);
32508c2ecf20Sopenharmony_ci
32518c2ecf20Sopenharmony_ci	if (ld->prepare_layoutcommit) {
32528c2ecf20Sopenharmony_ci		status = ld->prepare_layoutcommit(&data->args);
32538c2ecf20Sopenharmony_ci		if (status) {
32548c2ecf20Sopenharmony_ci			put_cred(data->cred);
32558c2ecf20Sopenharmony_ci			spin_lock(&inode->i_lock);
32568c2ecf20Sopenharmony_ci			set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
32578c2ecf20Sopenharmony_ci			if (end_pos > nfsi->layout->plh_lwb)
32588c2ecf20Sopenharmony_ci				nfsi->layout->plh_lwb = end_pos;
32598c2ecf20Sopenharmony_ci			goto out_unlock;
32608c2ecf20Sopenharmony_ci		}
32618c2ecf20Sopenharmony_ci	}
32628c2ecf20Sopenharmony_ci
32638c2ecf20Sopenharmony_ci
32648c2ecf20Sopenharmony_ci	status = nfs4_proc_layoutcommit(data, sync);
32658c2ecf20Sopenharmony_ciout:
32668c2ecf20Sopenharmony_ci	if (status)
32678c2ecf20Sopenharmony_ci		mark_inode_dirty_sync(inode);
32688c2ecf20Sopenharmony_ci	dprintk("<-- %s status %d\n", __func__, status);
32698c2ecf20Sopenharmony_ci	return status;
32708c2ecf20Sopenharmony_ciout_unlock:
32718c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
32728c2ecf20Sopenharmony_ci	kfree(data);
32738c2ecf20Sopenharmony_ciclear_layoutcommitting:
32748c2ecf20Sopenharmony_ci	pnfs_clear_layoutcommitting(inode);
32758c2ecf20Sopenharmony_ci	goto out;
32768c2ecf20Sopenharmony_ci}
32778c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode);
32788c2ecf20Sopenharmony_ci
32798c2ecf20Sopenharmony_ciint
32808c2ecf20Sopenharmony_cipnfs_generic_sync(struct inode *inode, bool datasync)
32818c2ecf20Sopenharmony_ci{
32828c2ecf20Sopenharmony_ci	return pnfs_layoutcommit_inode(inode, true);
32838c2ecf20Sopenharmony_ci}
32848c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_sync);
32858c2ecf20Sopenharmony_ci
32868c2ecf20Sopenharmony_cistruct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
32878c2ecf20Sopenharmony_ci{
32888c2ecf20Sopenharmony_ci	struct nfs4_threshold *thp;
32898c2ecf20Sopenharmony_ci
32908c2ecf20Sopenharmony_ci	thp = kzalloc(sizeof(*thp), GFP_NOFS);
32918c2ecf20Sopenharmony_ci	if (!thp) {
32928c2ecf20Sopenharmony_ci		dprintk("%s mdsthreshold allocation failed\n", __func__);
32938c2ecf20Sopenharmony_ci		return NULL;
32948c2ecf20Sopenharmony_ci	}
32958c2ecf20Sopenharmony_ci	return thp;
32968c2ecf20Sopenharmony_ci}
32978c2ecf20Sopenharmony_ci
32988c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_NFS_V4_2)
32998c2ecf20Sopenharmony_ciint
33008c2ecf20Sopenharmony_cipnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags)
33018c2ecf20Sopenharmony_ci{
33028c2ecf20Sopenharmony_ci	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
33038c2ecf20Sopenharmony_ci	struct nfs_server *server = NFS_SERVER(inode);
33048c2ecf20Sopenharmony_ci	struct nfs_inode *nfsi = NFS_I(inode);
33058c2ecf20Sopenharmony_ci	struct nfs42_layoutstat_data *data;
33068c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *hdr;
33078c2ecf20Sopenharmony_ci	int status = 0;
33088c2ecf20Sopenharmony_ci
33098c2ecf20Sopenharmony_ci	if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats)
33108c2ecf20Sopenharmony_ci		goto out;
33118c2ecf20Sopenharmony_ci
33128c2ecf20Sopenharmony_ci	if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS))
33138c2ecf20Sopenharmony_ci		goto out;
33148c2ecf20Sopenharmony_ci
33158c2ecf20Sopenharmony_ci	if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags))
33168c2ecf20Sopenharmony_ci		goto out;
33178c2ecf20Sopenharmony_ci
33188c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
33198c2ecf20Sopenharmony_ci	if (!NFS_I(inode)->layout) {
33208c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
33218c2ecf20Sopenharmony_ci		goto out_clear_layoutstats;
33228c2ecf20Sopenharmony_ci	}
33238c2ecf20Sopenharmony_ci	hdr = NFS_I(inode)->layout;
33248c2ecf20Sopenharmony_ci	pnfs_get_layout_hdr(hdr);
33258c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
33268c2ecf20Sopenharmony_ci
33278c2ecf20Sopenharmony_ci	data = kzalloc(sizeof(*data), gfp_flags);
33288c2ecf20Sopenharmony_ci	if (!data) {
33298c2ecf20Sopenharmony_ci		status = -ENOMEM;
33308c2ecf20Sopenharmony_ci		goto out_put;
33318c2ecf20Sopenharmony_ci	}
33328c2ecf20Sopenharmony_ci
33338c2ecf20Sopenharmony_ci	data->args.fh = NFS_FH(inode);
33348c2ecf20Sopenharmony_ci	data->args.inode = inode;
33358c2ecf20Sopenharmony_ci	status = ld->prepare_layoutstats(&data->args);
33368c2ecf20Sopenharmony_ci	if (status)
33378c2ecf20Sopenharmony_ci		goto out_free;
33388c2ecf20Sopenharmony_ci
33398c2ecf20Sopenharmony_ci	status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data);
33408c2ecf20Sopenharmony_ci
33418c2ecf20Sopenharmony_ciout:
33428c2ecf20Sopenharmony_ci	dprintk("%s returns %d\n", __func__, status);
33438c2ecf20Sopenharmony_ci	return status;
33448c2ecf20Sopenharmony_ci
33458c2ecf20Sopenharmony_ciout_free:
33468c2ecf20Sopenharmony_ci	kfree(data);
33478c2ecf20Sopenharmony_ciout_put:
33488c2ecf20Sopenharmony_ci	pnfs_put_layout_hdr(hdr);
33498c2ecf20Sopenharmony_ciout_clear_layoutstats:
33508c2ecf20Sopenharmony_ci	smp_mb__before_atomic();
33518c2ecf20Sopenharmony_ci	clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags);
33528c2ecf20Sopenharmony_ci	smp_mb__after_atomic();
33538c2ecf20Sopenharmony_ci	goto out;
33548c2ecf20Sopenharmony_ci}
33558c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_report_layoutstat);
33568c2ecf20Sopenharmony_ci#endif
33578c2ecf20Sopenharmony_ci
33588c2ecf20Sopenharmony_ciunsigned int layoutstats_timer;
33598c2ecf20Sopenharmony_cimodule_param(layoutstats_timer, uint, 0644);
33608c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(layoutstats_timer);
3361