162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * pNFS functions to call and manage layout drivers. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Copyright (c) 2002 [year of first publication] 562306a36Sopenharmony_ci * The Regents of the University of Michigan 662306a36Sopenharmony_ci * All Rights Reserved 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Dean Hildebrand <dhildebz@umich.edu> 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Permission is granted to use, copy, create derivative works, and 1162306a36Sopenharmony_ci * redistribute this software and such derivative works for any purpose, 1262306a36Sopenharmony_ci * so long as the name of the University of Michigan is not used in 1362306a36Sopenharmony_ci * any advertising or publicity pertaining to the use or distribution 1462306a36Sopenharmony_ci * of this software without specific, written prior authorization. If 1562306a36Sopenharmony_ci * the above copyright notice or any other identification of the 1662306a36Sopenharmony_ci * University of Michigan is included in any copy of any portion of 1762306a36Sopenharmony_ci * this software, then the disclaimer below must also be included. 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * This software is provided as is, without representation or warranty 2062306a36Sopenharmony_ci * of any kind either express or implied, including without limitation 2162306a36Sopenharmony_ci * the implied warranties of merchantability, fitness for a particular 2262306a36Sopenharmony_ci * purpose, or noninfringement. The Regents of the University of 2362306a36Sopenharmony_ci * Michigan shall not be liable for any damages, including special, 2462306a36Sopenharmony_ci * indirect, incidental, or consequential damages, with respect to any 2562306a36Sopenharmony_ci * claim arising out of or in connection with the use of the software, 2662306a36Sopenharmony_ci * even if it has been or is hereafter advised of the possibility of 2762306a36Sopenharmony_ci * such damages. 2862306a36Sopenharmony_ci */ 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci#include <linux/nfs_fs.h> 3162306a36Sopenharmony_ci#include <linux/nfs_page.h> 3262306a36Sopenharmony_ci#include <linux/module.h> 3362306a36Sopenharmony_ci#include <linux/sort.h> 3462306a36Sopenharmony_ci#include "internal.h" 3562306a36Sopenharmony_ci#include "pnfs.h" 3662306a36Sopenharmony_ci#include "iostat.h" 3762306a36Sopenharmony_ci#include "nfs4trace.h" 3862306a36Sopenharmony_ci#include "delegation.h" 3962306a36Sopenharmony_ci#include "nfs42.h" 4062306a36Sopenharmony_ci#include "nfs4_fs.h" 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci#define NFSDBG_FACILITY NFSDBG_PNFS 4362306a36Sopenharmony_ci#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci/* Locking: 4662306a36Sopenharmony_ci * 4762306a36Sopenharmony_ci * pnfs_spinlock: 4862306a36Sopenharmony_ci * protects pnfs_modules_tbl. 4962306a36Sopenharmony_ci */ 5062306a36Sopenharmony_cistatic DEFINE_SPINLOCK(pnfs_spinlock); 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci/* 5362306a36Sopenharmony_ci * pnfs_modules_tbl holds all pnfs modules 5462306a36Sopenharmony_ci */ 5562306a36Sopenharmony_cistatic LIST_HEAD(pnfs_modules_tbl); 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_cistatic void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); 5862306a36Sopenharmony_cistatic void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, 5962306a36Sopenharmony_ci struct list_head *free_me, 6062306a36Sopenharmony_ci const struct pnfs_layout_range *range, 6162306a36Sopenharmony_ci u32 seq); 6262306a36Sopenharmony_cistatic bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, 6362306a36Sopenharmony_ci struct list_head *tmp_list); 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci/* Return the registered pnfs layout driver module matching given id */ 6662306a36Sopenharmony_cistatic struct pnfs_layoutdriver_type * 6762306a36Sopenharmony_cifind_pnfs_driver_locked(u32 id) 6862306a36Sopenharmony_ci{ 6962306a36Sopenharmony_ci struct pnfs_layoutdriver_type *local; 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) 7262306a36Sopenharmony_ci if (local->id == id) 7362306a36Sopenharmony_ci goto out; 7462306a36Sopenharmony_ci local = NULL; 7562306a36Sopenharmony_ciout: 7662306a36Sopenharmony_ci dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); 7762306a36Sopenharmony_ci return local; 7862306a36Sopenharmony_ci} 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_cistatic struct pnfs_layoutdriver_type * 8162306a36Sopenharmony_cifind_pnfs_driver(u32 id) 8262306a36Sopenharmony_ci{ 8362306a36Sopenharmony_ci struct pnfs_layoutdriver_type *local; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci spin_lock(&pnfs_spinlock); 8662306a36Sopenharmony_ci local = find_pnfs_driver_locked(id); 8762306a36Sopenharmony_ci if (local != NULL && !try_module_get(local->owner)) { 8862306a36Sopenharmony_ci dprintk("%s: Could not grab reference on module\n", __func__); 8962306a36Sopenharmony_ci local = NULL; 9062306a36Sopenharmony_ci } 9162306a36Sopenharmony_ci spin_unlock(&pnfs_spinlock); 9262306a36Sopenharmony_ci return local; 9362306a36Sopenharmony_ci} 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ciconst struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id) 9662306a36Sopenharmony_ci{ 9762306a36Sopenharmony_ci return find_pnfs_driver(id); 9862306a36Sopenharmony_ci} 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_civoid pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld) 10162306a36Sopenharmony_ci{ 10262306a36Sopenharmony_ci if (ld) 10362306a36Sopenharmony_ci module_put(ld->owner); 10462306a36Sopenharmony_ci} 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_civoid 10762306a36Sopenharmony_ciunset_pnfs_layoutdriver(struct nfs_server *nfss) 10862306a36Sopenharmony_ci{ 10962306a36Sopenharmony_ci if (nfss->pnfs_curr_ld) { 11062306a36Sopenharmony_ci if (nfss->pnfs_curr_ld->clear_layoutdriver) 11162306a36Sopenharmony_ci nfss->pnfs_curr_ld->clear_layoutdriver(nfss); 11262306a36Sopenharmony_ci /* Decrement the MDS count. Purge the deviceid cache if zero */ 11362306a36Sopenharmony_ci if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) 11462306a36Sopenharmony_ci nfs4_deviceid_purge_client(nfss->nfs_client); 11562306a36Sopenharmony_ci module_put(nfss->pnfs_curr_ld->owner); 11662306a36Sopenharmony_ci } 11762306a36Sopenharmony_ci nfss->pnfs_curr_ld = NULL; 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci/* 12162306a36Sopenharmony_ci * When the server sends a list of layout types, we choose one in the order 12262306a36Sopenharmony_ci * given in the list below. 12362306a36Sopenharmony_ci * 12462306a36Sopenharmony_ci * FIXME: should this list be configurable in some fashion? module param? 12562306a36Sopenharmony_ci * mount option? something else? 12662306a36Sopenharmony_ci */ 12762306a36Sopenharmony_cistatic const u32 ld_prefs[] = { 12862306a36Sopenharmony_ci LAYOUT_SCSI, 12962306a36Sopenharmony_ci LAYOUT_BLOCK_VOLUME, 13062306a36Sopenharmony_ci LAYOUT_OSD2_OBJECTS, 13162306a36Sopenharmony_ci LAYOUT_FLEX_FILES, 13262306a36Sopenharmony_ci LAYOUT_NFSV4_1_FILES, 13362306a36Sopenharmony_ci 0 13462306a36Sopenharmony_ci}; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_cistatic int 13762306a36Sopenharmony_cild_cmp(const void *e1, const void *e2) 13862306a36Sopenharmony_ci{ 13962306a36Sopenharmony_ci u32 ld1 = *((u32 *)e1); 14062306a36Sopenharmony_ci u32 ld2 = *((u32 *)e2); 14162306a36Sopenharmony_ci int i; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci for (i = 0; ld_prefs[i] != 0; i++) { 14462306a36Sopenharmony_ci if (ld1 == ld_prefs[i]) 14562306a36Sopenharmony_ci return -1; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci if (ld2 == ld_prefs[i]) 14862306a36Sopenharmony_ci return 1; 14962306a36Sopenharmony_ci } 15062306a36Sopenharmony_ci return 0; 15162306a36Sopenharmony_ci} 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci/* 15462306a36Sopenharmony_ci * Try to set the server's pnfs module to the pnfs layout type specified by id. 15562306a36Sopenharmony_ci * Currently only one pNFS layout driver per filesystem is supported. 15662306a36Sopenharmony_ci * 15762306a36Sopenharmony_ci * @ids array of layout types supported by MDS. 15862306a36Sopenharmony_ci */ 15962306a36Sopenharmony_civoid 16062306a36Sopenharmony_ciset_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, 16162306a36Sopenharmony_ci struct nfs_fsinfo *fsinfo) 16262306a36Sopenharmony_ci{ 16362306a36Sopenharmony_ci struct pnfs_layoutdriver_type *ld_type = NULL; 16462306a36Sopenharmony_ci u32 id; 16562306a36Sopenharmony_ci int i; 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci if (fsinfo->nlayouttypes == 0) 16862306a36Sopenharmony_ci goto out_no_driver; 16962306a36Sopenharmony_ci if (!(server->nfs_client->cl_exchange_flags & 17062306a36Sopenharmony_ci (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { 17162306a36Sopenharmony_ci printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n", 17262306a36Sopenharmony_ci __func__, server->nfs_client->cl_exchange_flags); 17362306a36Sopenharmony_ci goto out_no_driver; 17462306a36Sopenharmony_ci } 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci sort(fsinfo->layouttype, fsinfo->nlayouttypes, 17762306a36Sopenharmony_ci sizeof(*fsinfo->layouttype), ld_cmp, NULL); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci for (i = 0; i < fsinfo->nlayouttypes; i++) { 18062306a36Sopenharmony_ci id = fsinfo->layouttype[i]; 18162306a36Sopenharmony_ci ld_type = find_pnfs_driver(id); 18262306a36Sopenharmony_ci if (!ld_type) { 18362306a36Sopenharmony_ci request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, 18462306a36Sopenharmony_ci id); 18562306a36Sopenharmony_ci ld_type = find_pnfs_driver(id); 18662306a36Sopenharmony_ci } 18762306a36Sopenharmony_ci if (ld_type) 18862306a36Sopenharmony_ci break; 18962306a36Sopenharmony_ci } 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci if (!ld_type) { 19262306a36Sopenharmony_ci dprintk("%s: No pNFS module found!\n", __func__); 19362306a36Sopenharmony_ci goto out_no_driver; 19462306a36Sopenharmony_ci } 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci server->pnfs_curr_ld = ld_type; 19762306a36Sopenharmony_ci if (ld_type->set_layoutdriver 19862306a36Sopenharmony_ci && ld_type->set_layoutdriver(server, mntfh)) { 19962306a36Sopenharmony_ci printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " 20062306a36Sopenharmony_ci "driver %u.\n", __func__, id); 20162306a36Sopenharmony_ci module_put(ld_type->owner); 20262306a36Sopenharmony_ci goto out_no_driver; 20362306a36Sopenharmony_ci } 20462306a36Sopenharmony_ci /* Bump the MDS count */ 20562306a36Sopenharmony_ci atomic_inc(&server->nfs_client->cl_mds_count); 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci dprintk("%s: pNFS module for %u set\n", __func__, id); 20862306a36Sopenharmony_ci return; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ciout_no_driver: 21162306a36Sopenharmony_ci dprintk("%s: Using NFSv4 I/O\n", __func__); 21262306a36Sopenharmony_ci server->pnfs_curr_ld = NULL; 21362306a36Sopenharmony_ci} 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ciint 21662306a36Sopenharmony_cipnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 21762306a36Sopenharmony_ci{ 21862306a36Sopenharmony_ci int status = -EINVAL; 21962306a36Sopenharmony_ci struct pnfs_layoutdriver_type *tmp; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci if (ld_type->id == 0) { 22262306a36Sopenharmony_ci printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); 22362306a36Sopenharmony_ci return status; 22462306a36Sopenharmony_ci } 22562306a36Sopenharmony_ci if (!ld_type->alloc_lseg || !ld_type->free_lseg) { 22662306a36Sopenharmony_ci printk(KERN_ERR "NFS: %s Layout driver must provide " 22762306a36Sopenharmony_ci "alloc_lseg and free_lseg.\n", __func__); 22862306a36Sopenharmony_ci return status; 22962306a36Sopenharmony_ci } 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci spin_lock(&pnfs_spinlock); 23262306a36Sopenharmony_ci tmp = find_pnfs_driver_locked(ld_type->id); 23362306a36Sopenharmony_ci if (!tmp) { 23462306a36Sopenharmony_ci list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); 23562306a36Sopenharmony_ci status = 0; 23662306a36Sopenharmony_ci dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, 23762306a36Sopenharmony_ci ld_type->name); 23862306a36Sopenharmony_ci } else { 23962306a36Sopenharmony_ci printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", 24062306a36Sopenharmony_ci __func__, ld_type->id); 24162306a36Sopenharmony_ci } 24262306a36Sopenharmony_ci spin_unlock(&pnfs_spinlock); 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci return status; 24562306a36Sopenharmony_ci} 24662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_civoid 24962306a36Sopenharmony_cipnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 25062306a36Sopenharmony_ci{ 25162306a36Sopenharmony_ci dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); 25262306a36Sopenharmony_ci spin_lock(&pnfs_spinlock); 25362306a36Sopenharmony_ci list_del(&ld_type->pnfs_tblid); 25462306a36Sopenharmony_ci spin_unlock(&pnfs_spinlock); 25562306a36Sopenharmony_ci} 25662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci/* 25962306a36Sopenharmony_ci * pNFS client layout cache 26062306a36Sopenharmony_ci */ 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci/* Need to hold i_lock if caller does not already hold reference */ 26362306a36Sopenharmony_civoid 26462306a36Sopenharmony_cipnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) 26562306a36Sopenharmony_ci{ 26662306a36Sopenharmony_ci refcount_inc(&lo->plh_refcount); 26762306a36Sopenharmony_ci} 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_cistatic struct pnfs_layout_hdr * 27062306a36Sopenharmony_cipnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) 27162306a36Sopenharmony_ci{ 27262306a36Sopenharmony_ci struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 27362306a36Sopenharmony_ci return ld->alloc_layout_hdr(ino, gfp_flags); 27462306a36Sopenharmony_ci} 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_cistatic void 27762306a36Sopenharmony_cipnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) 27862306a36Sopenharmony_ci{ 27962306a36Sopenharmony_ci struct nfs_server *server = NFS_SERVER(lo->plh_inode); 28062306a36Sopenharmony_ci struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) { 28362306a36Sopenharmony_ci struct nfs_client *clp = server->nfs_client; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci spin_lock(&clp->cl_lock); 28662306a36Sopenharmony_ci list_del_rcu(&lo->plh_layouts); 28762306a36Sopenharmony_ci spin_unlock(&clp->cl_lock); 28862306a36Sopenharmony_ci } 28962306a36Sopenharmony_ci put_cred(lo->plh_lc_cred); 29062306a36Sopenharmony_ci return ld->free_layout_hdr(lo); 29162306a36Sopenharmony_ci} 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_cistatic void 29462306a36Sopenharmony_cipnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) 29562306a36Sopenharmony_ci{ 29662306a36Sopenharmony_ci struct nfs_inode *nfsi = NFS_I(lo->plh_inode); 29762306a36Sopenharmony_ci dprintk("%s: freeing layout cache %p\n", __func__, lo); 29862306a36Sopenharmony_ci nfsi->layout = NULL; 29962306a36Sopenharmony_ci /* Reset MDS Threshold I/O counters */ 30062306a36Sopenharmony_ci nfsi->write_io = 0; 30162306a36Sopenharmony_ci nfsi->read_io = 0; 30262306a36Sopenharmony_ci} 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_civoid 30562306a36Sopenharmony_cipnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) 30662306a36Sopenharmony_ci{ 30762306a36Sopenharmony_ci struct inode *inode; 30862306a36Sopenharmony_ci unsigned long i_state; 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci if (!lo) 31162306a36Sopenharmony_ci return; 31262306a36Sopenharmony_ci inode = lo->plh_inode; 31362306a36Sopenharmony_ci pnfs_layoutreturn_before_put_layout_hdr(lo); 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 31662306a36Sopenharmony_ci if (!list_empty(&lo->plh_segs)) 31762306a36Sopenharmony_ci WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); 31862306a36Sopenharmony_ci pnfs_detach_layout_hdr(lo); 31962306a36Sopenharmony_ci i_state = inode->i_state; 32062306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 32162306a36Sopenharmony_ci pnfs_free_layout_hdr(lo); 32262306a36Sopenharmony_ci /* Notify pnfs_destroy_layout_final() that we're done */ 32362306a36Sopenharmony_ci if (i_state & (I_FREEING | I_CLEAR)) 32462306a36Sopenharmony_ci wake_up_var(lo); 32562306a36Sopenharmony_ci } 32662306a36Sopenharmony_ci} 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_cistatic struct inode * 32962306a36Sopenharmony_cipnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo) 33062306a36Sopenharmony_ci{ 33162306a36Sopenharmony_ci struct inode *inode = igrab(lo->plh_inode); 33262306a36Sopenharmony_ci if (inode) 33362306a36Sopenharmony_ci return inode; 33462306a36Sopenharmony_ci set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags); 33562306a36Sopenharmony_ci return NULL; 33662306a36Sopenharmony_ci} 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci/* 33962306a36Sopenharmony_ci * Compare 2 layout stateid sequence ids, to see which is newer, 34062306a36Sopenharmony_ci * taking into account wraparound issues. 34162306a36Sopenharmony_ci */ 34262306a36Sopenharmony_cistatic bool pnfs_seqid_is_newer(u32 s1, u32 s2) 34362306a36Sopenharmony_ci{ 34462306a36Sopenharmony_ci return (s32)(s1 - s2) > 0; 34562306a36Sopenharmony_ci} 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_cistatic void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq) 34862306a36Sopenharmony_ci{ 34962306a36Sopenharmony_ci if (pnfs_seqid_is_newer(newseq, lo->plh_barrier) || !lo->plh_barrier) 35062306a36Sopenharmony_ci lo->plh_barrier = newseq; 35162306a36Sopenharmony_ci} 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_cistatic void 35462306a36Sopenharmony_cipnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, 35562306a36Sopenharmony_ci u32 seq) 35662306a36Sopenharmony_ci{ 35762306a36Sopenharmony_ci if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) 35862306a36Sopenharmony_ci iomode = IOMODE_ANY; 35962306a36Sopenharmony_ci lo->plh_return_iomode = iomode; 36062306a36Sopenharmony_ci set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 36162306a36Sopenharmony_ci /* 36262306a36Sopenharmony_ci * We must set lo->plh_return_seq to avoid livelocks with 36362306a36Sopenharmony_ci * pnfs_layout_need_return() 36462306a36Sopenharmony_ci */ 36562306a36Sopenharmony_ci if (seq == 0) 36662306a36Sopenharmony_ci seq = be32_to_cpu(lo->plh_stateid.seqid); 36762306a36Sopenharmony_ci if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq)) 36862306a36Sopenharmony_ci lo->plh_return_seq = seq; 36962306a36Sopenharmony_ci pnfs_barrier_update(lo, seq); 37062306a36Sopenharmony_ci} 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_cistatic void 37362306a36Sopenharmony_cipnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) 37462306a36Sopenharmony_ci{ 37562306a36Sopenharmony_ci struct pnfs_layout_segment *lseg; 37662306a36Sopenharmony_ci lo->plh_return_iomode = 0; 37762306a36Sopenharmony_ci lo->plh_return_seq = 0; 37862306a36Sopenharmony_ci clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 37962306a36Sopenharmony_ci list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 38062306a36Sopenharmony_ci if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) 38162306a36Sopenharmony_ci continue; 38262306a36Sopenharmony_ci pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci} 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_cistatic void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) 38762306a36Sopenharmony_ci{ 38862306a36Sopenharmony_ci clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); 38962306a36Sopenharmony_ci clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags); 39062306a36Sopenharmony_ci smp_mb__after_atomic(); 39162306a36Sopenharmony_ci wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); 39262306a36Sopenharmony_ci rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); 39362306a36Sopenharmony_ci} 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_cistatic void 39662306a36Sopenharmony_cipnfs_clear_lseg_state(struct pnfs_layout_segment *lseg, 39762306a36Sopenharmony_ci struct list_head *free_me) 39862306a36Sopenharmony_ci{ 39962306a36Sopenharmony_ci clear_bit(NFS_LSEG_ROC, &lseg->pls_flags); 40062306a36Sopenharmony_ci clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 40162306a36Sopenharmony_ci if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) 40262306a36Sopenharmony_ci pnfs_lseg_dec_and_remove_zero(lseg, free_me); 40362306a36Sopenharmony_ci if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 40462306a36Sopenharmony_ci pnfs_lseg_dec_and_remove_zero(lseg, free_me); 40562306a36Sopenharmony_ci} 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci/* 40862306a36Sopenharmony_ci * Update the seqid of a layout stateid after receiving 40962306a36Sopenharmony_ci * NFS4ERR_OLD_STATEID 41062306a36Sopenharmony_ci */ 41162306a36Sopenharmony_cibool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst, 41262306a36Sopenharmony_ci struct pnfs_layout_range *dst_range, 41362306a36Sopenharmony_ci struct inode *inode) 41462306a36Sopenharmony_ci{ 41562306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 41662306a36Sopenharmony_ci struct pnfs_layout_range range = { 41762306a36Sopenharmony_ci .iomode = IOMODE_ANY, 41862306a36Sopenharmony_ci .offset = 0, 41962306a36Sopenharmony_ci .length = NFS4_MAX_UINT64, 42062306a36Sopenharmony_ci }; 42162306a36Sopenharmony_ci bool ret = false; 42262306a36Sopenharmony_ci LIST_HEAD(head); 42362306a36Sopenharmony_ci int err; 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci spin_lock(&inode->i_lock); 42662306a36Sopenharmony_ci lo = NFS_I(inode)->layout; 42762306a36Sopenharmony_ci if (lo && pnfs_layout_is_valid(lo) && 42862306a36Sopenharmony_ci nfs4_stateid_match_other(dst, &lo->plh_stateid)) { 42962306a36Sopenharmony_ci /* Is our call using the most recent seqid? If so, bump it */ 43062306a36Sopenharmony_ci if (!nfs4_stateid_is_newer(&lo->plh_stateid, dst)) { 43162306a36Sopenharmony_ci nfs4_stateid_seqid_inc(dst); 43262306a36Sopenharmony_ci ret = true; 43362306a36Sopenharmony_ci goto out; 43462306a36Sopenharmony_ci } 43562306a36Sopenharmony_ci /* Try to update the seqid to the most recent */ 43662306a36Sopenharmony_ci err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0); 43762306a36Sopenharmony_ci if (err != -EBUSY) { 43862306a36Sopenharmony_ci dst->seqid = lo->plh_stateid.seqid; 43962306a36Sopenharmony_ci *dst_range = range; 44062306a36Sopenharmony_ci ret = true; 44162306a36Sopenharmony_ci } 44262306a36Sopenharmony_ci } 44362306a36Sopenharmony_ciout: 44462306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 44562306a36Sopenharmony_ci pnfs_free_lseg_list(&head); 44662306a36Sopenharmony_ci return ret; 44762306a36Sopenharmony_ci} 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci/* 45062306a36Sopenharmony_ci * Mark a pnfs_layout_hdr and all associated layout segments as invalid 45162306a36Sopenharmony_ci * 45262306a36Sopenharmony_ci * In order to continue using the pnfs_layout_hdr, a full recovery 45362306a36Sopenharmony_ci * is required. 45462306a36Sopenharmony_ci * Note that caller must hold inode->i_lock. 45562306a36Sopenharmony_ci */ 45662306a36Sopenharmony_ciint 45762306a36Sopenharmony_cipnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, 45862306a36Sopenharmony_ci struct list_head *lseg_list) 45962306a36Sopenharmony_ci{ 46062306a36Sopenharmony_ci struct pnfs_layout_range range = { 46162306a36Sopenharmony_ci .iomode = IOMODE_ANY, 46262306a36Sopenharmony_ci .offset = 0, 46362306a36Sopenharmony_ci .length = NFS4_MAX_UINT64, 46462306a36Sopenharmony_ci }; 46562306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, *next; 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 46862306a36Sopenharmony_ci list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 46962306a36Sopenharmony_ci pnfs_clear_lseg_state(lseg, lseg_list); 47062306a36Sopenharmony_ci pnfs_clear_layoutreturn_info(lo); 47162306a36Sopenharmony_ci pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); 47262306a36Sopenharmony_ci set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags); 47362306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && 47462306a36Sopenharmony_ci !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) 47562306a36Sopenharmony_ci pnfs_clear_layoutreturn_waitbit(lo); 47662306a36Sopenharmony_ci return !list_empty(&lo->plh_segs); 47762306a36Sopenharmony_ci} 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_cistatic int 48062306a36Sopenharmony_cipnfs_iomode_to_fail_bit(u32 iomode) 48162306a36Sopenharmony_ci{ 48262306a36Sopenharmony_ci return iomode == IOMODE_RW ? 48362306a36Sopenharmony_ci NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 48462306a36Sopenharmony_ci} 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_cistatic void 48762306a36Sopenharmony_cipnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 48862306a36Sopenharmony_ci{ 48962306a36Sopenharmony_ci lo->plh_retry_timestamp = jiffies; 49062306a36Sopenharmony_ci if (!test_and_set_bit(fail_bit, &lo->plh_flags)) 49162306a36Sopenharmony_ci refcount_inc(&lo->plh_refcount); 49262306a36Sopenharmony_ci} 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_cistatic void 49562306a36Sopenharmony_cipnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 49662306a36Sopenharmony_ci{ 49762306a36Sopenharmony_ci if (test_and_clear_bit(fail_bit, &lo->plh_flags)) 49862306a36Sopenharmony_ci refcount_dec(&lo->plh_refcount); 49962306a36Sopenharmony_ci} 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_cistatic void 50262306a36Sopenharmony_cipnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) 50362306a36Sopenharmony_ci{ 50462306a36Sopenharmony_ci struct inode *inode = lo->plh_inode; 50562306a36Sopenharmony_ci struct pnfs_layout_range range = { 50662306a36Sopenharmony_ci .iomode = iomode, 50762306a36Sopenharmony_ci .offset = 0, 50862306a36Sopenharmony_ci .length = NFS4_MAX_UINT64, 50962306a36Sopenharmony_ci }; 51062306a36Sopenharmony_ci LIST_HEAD(head); 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci spin_lock(&inode->i_lock); 51362306a36Sopenharmony_ci pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 51462306a36Sopenharmony_ci pnfs_mark_matching_lsegs_return(lo, &head, &range, 0); 51562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 51662306a36Sopenharmony_ci pnfs_free_lseg_list(&head); 51762306a36Sopenharmony_ci dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, 51862306a36Sopenharmony_ci iomode == IOMODE_RW ? "RW" : "READ"); 51962306a36Sopenharmony_ci} 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_cistatic bool 52262306a36Sopenharmony_cipnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) 52362306a36Sopenharmony_ci{ 52462306a36Sopenharmony_ci unsigned long start, end; 52562306a36Sopenharmony_ci int fail_bit = pnfs_iomode_to_fail_bit(iomode); 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci if (test_bit(fail_bit, &lo->plh_flags) == 0) 52862306a36Sopenharmony_ci return false; 52962306a36Sopenharmony_ci end = jiffies; 53062306a36Sopenharmony_ci start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; 53162306a36Sopenharmony_ci if (!time_in_range(lo->plh_retry_timestamp, start, end)) { 53262306a36Sopenharmony_ci /* It is time to retry the failed layoutgets */ 53362306a36Sopenharmony_ci pnfs_layout_clear_fail_bit(lo, fail_bit); 53462306a36Sopenharmony_ci return false; 53562306a36Sopenharmony_ci } 53662306a36Sopenharmony_ci return true; 53762306a36Sopenharmony_ci} 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_cistatic void 54062306a36Sopenharmony_cipnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, 54162306a36Sopenharmony_ci const struct pnfs_layout_range *range, 54262306a36Sopenharmony_ci const nfs4_stateid *stateid) 54362306a36Sopenharmony_ci{ 54462306a36Sopenharmony_ci INIT_LIST_HEAD(&lseg->pls_list); 54562306a36Sopenharmony_ci INIT_LIST_HEAD(&lseg->pls_lc_list); 54662306a36Sopenharmony_ci INIT_LIST_HEAD(&lseg->pls_commits); 54762306a36Sopenharmony_ci refcount_set(&lseg->pls_refcount, 1); 54862306a36Sopenharmony_ci set_bit(NFS_LSEG_VALID, &lseg->pls_flags); 54962306a36Sopenharmony_ci lseg->pls_layout = lo; 55062306a36Sopenharmony_ci lseg->pls_range = *range; 55162306a36Sopenharmony_ci lseg->pls_seq = be32_to_cpu(stateid->seqid); 55262306a36Sopenharmony_ci} 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_cistatic void pnfs_free_lseg(struct pnfs_layout_segment *lseg) 55562306a36Sopenharmony_ci{ 55662306a36Sopenharmony_ci if (lseg != NULL) { 55762306a36Sopenharmony_ci struct inode *inode = lseg->pls_layout->plh_inode; 55862306a36Sopenharmony_ci NFS_SERVER(inode)->pnfs_curr_ld->free_lseg(lseg); 55962306a36Sopenharmony_ci } 56062306a36Sopenharmony_ci} 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_cistatic void 56362306a36Sopenharmony_cipnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, 56462306a36Sopenharmony_ci struct pnfs_layout_segment *lseg) 56562306a36Sopenharmony_ci{ 56662306a36Sopenharmony_ci WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 56762306a36Sopenharmony_ci list_del_init(&lseg->pls_list); 56862306a36Sopenharmony_ci /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ 56962306a36Sopenharmony_ci refcount_dec(&lo->plh_refcount); 57062306a36Sopenharmony_ci if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) 57162306a36Sopenharmony_ci return; 57262306a36Sopenharmony_ci if (list_empty(&lo->plh_segs) && 57362306a36Sopenharmony_ci !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && 57462306a36Sopenharmony_ci !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { 57562306a36Sopenharmony_ci if (atomic_read(&lo->plh_outstanding) == 0) 57662306a36Sopenharmony_ci set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 57762306a36Sopenharmony_ci clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 57862306a36Sopenharmony_ci } 57962306a36Sopenharmony_ci} 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_cistatic bool 58262306a36Sopenharmony_cipnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr *lo, 58362306a36Sopenharmony_ci struct pnfs_layout_segment *lseg) 58462306a36Sopenharmony_ci{ 58562306a36Sopenharmony_ci if (test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && 58662306a36Sopenharmony_ci pnfs_layout_is_valid(lo)) { 58762306a36Sopenharmony_ci pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); 58862306a36Sopenharmony_ci list_move_tail(&lseg->pls_list, &lo->plh_return_segs); 58962306a36Sopenharmony_ci return true; 59062306a36Sopenharmony_ci } 59162306a36Sopenharmony_ci return false; 59262306a36Sopenharmony_ci} 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_civoid 59562306a36Sopenharmony_cipnfs_put_lseg(struct pnfs_layout_segment *lseg) 59662306a36Sopenharmony_ci{ 59762306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 59862306a36Sopenharmony_ci struct inode *inode; 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci if (!lseg) 60162306a36Sopenharmony_ci return; 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 60462306a36Sopenharmony_ci refcount_read(&lseg->pls_refcount), 60562306a36Sopenharmony_ci test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci lo = lseg->pls_layout; 60862306a36Sopenharmony_ci inode = lo->plh_inode; 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 61162306a36Sopenharmony_ci pnfs_get_layout_hdr(lo); 61262306a36Sopenharmony_ci pnfs_layout_remove_lseg(lo, lseg); 61362306a36Sopenharmony_ci if (pnfs_cache_lseg_for_layoutreturn(lo, lseg)) 61462306a36Sopenharmony_ci lseg = NULL; 61562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 61662306a36Sopenharmony_ci pnfs_free_lseg(lseg); 61762306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 61862306a36Sopenharmony_ci } 61962306a36Sopenharmony_ci} 62062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_put_lseg); 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ci/* 62362306a36Sopenharmony_ci * is l2 fully contained in l1? 62462306a36Sopenharmony_ci * start1 end1 62562306a36Sopenharmony_ci * [----------------------------------) 62662306a36Sopenharmony_ci * start2 end2 62762306a36Sopenharmony_ci * [----------------) 62862306a36Sopenharmony_ci */ 62962306a36Sopenharmony_cistatic bool 63062306a36Sopenharmony_cipnfs_lseg_range_contained(const struct pnfs_layout_range *l1, 63162306a36Sopenharmony_ci const struct pnfs_layout_range *l2) 63262306a36Sopenharmony_ci{ 63362306a36Sopenharmony_ci u64 start1 = l1->offset; 63462306a36Sopenharmony_ci u64 end1 = pnfs_end_offset(start1, l1->length); 63562306a36Sopenharmony_ci u64 start2 = l2->offset; 63662306a36Sopenharmony_ci u64 end2 = pnfs_end_offset(start2, l2->length); 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci return (start1 <= start2) && (end1 >= end2); 63962306a36Sopenharmony_ci} 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_cistatic bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, 64262306a36Sopenharmony_ci struct list_head *tmp_list) 64362306a36Sopenharmony_ci{ 64462306a36Sopenharmony_ci if (!refcount_dec_and_test(&lseg->pls_refcount)) 64562306a36Sopenharmony_ci return false; 64662306a36Sopenharmony_ci pnfs_layout_remove_lseg(lseg->pls_layout, lseg); 64762306a36Sopenharmony_ci list_add(&lseg->pls_list, tmp_list); 64862306a36Sopenharmony_ci return true; 64962306a36Sopenharmony_ci} 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci/* Returns 1 if lseg is removed from list, 0 otherwise */ 65262306a36Sopenharmony_cistatic int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 65362306a36Sopenharmony_ci struct list_head *tmp_list) 65462306a36Sopenharmony_ci{ 65562306a36Sopenharmony_ci int rv = 0; 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 65862306a36Sopenharmony_ci /* Remove the reference keeping the lseg in the 65962306a36Sopenharmony_ci * list. It will now be removed when all 66062306a36Sopenharmony_ci * outstanding io is finished. 66162306a36Sopenharmony_ci */ 66262306a36Sopenharmony_ci dprintk("%s: lseg %p ref %d\n", __func__, lseg, 66362306a36Sopenharmony_ci refcount_read(&lseg->pls_refcount)); 66462306a36Sopenharmony_ci if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) 66562306a36Sopenharmony_ci rv = 1; 66662306a36Sopenharmony_ci } 66762306a36Sopenharmony_ci return rv; 66862306a36Sopenharmony_ci} 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_cistatic bool 67162306a36Sopenharmony_cipnfs_should_free_range(const struct pnfs_layout_range *lseg_range, 67262306a36Sopenharmony_ci const struct pnfs_layout_range *recall_range) 67362306a36Sopenharmony_ci{ 67462306a36Sopenharmony_ci return (recall_range->iomode == IOMODE_ANY || 67562306a36Sopenharmony_ci lseg_range->iomode == recall_range->iomode) && 67662306a36Sopenharmony_ci pnfs_lseg_range_intersecting(lseg_range, recall_range); 67762306a36Sopenharmony_ci} 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_cistatic bool 68062306a36Sopenharmony_cipnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg, 68162306a36Sopenharmony_ci const struct pnfs_layout_range *recall_range, 68262306a36Sopenharmony_ci u32 seq) 68362306a36Sopenharmony_ci{ 68462306a36Sopenharmony_ci if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq)) 68562306a36Sopenharmony_ci return false; 68662306a36Sopenharmony_ci if (recall_range == NULL) 68762306a36Sopenharmony_ci return true; 68862306a36Sopenharmony_ci return pnfs_should_free_range(&lseg->pls_range, recall_range); 68962306a36Sopenharmony_ci} 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_ci/** 69262306a36Sopenharmony_ci * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later 69362306a36Sopenharmony_ci * @lo: layout header containing the lsegs 69462306a36Sopenharmony_ci * @tmp_list: list head where doomed lsegs should go 69562306a36Sopenharmony_ci * @recall_range: optional recall range argument to match (may be NULL) 69662306a36Sopenharmony_ci * @seq: only invalidate lsegs obtained prior to this sequence (may be 0) 69762306a36Sopenharmony_ci * 69862306a36Sopenharmony_ci * Walk the list of lsegs in the layout header, and tear down any that should 69962306a36Sopenharmony_ci * be destroyed. If "recall_range" is specified then the segment must match 70062306a36Sopenharmony_ci * that range. If "seq" is non-zero, then only match segments that were handed 70162306a36Sopenharmony_ci * out at or before that sequence. 70262306a36Sopenharmony_ci * 70362306a36Sopenharmony_ci * Returns number of matching invalid lsegs remaining in list after scanning 70462306a36Sopenharmony_ci * it and purging them. 70562306a36Sopenharmony_ci */ 70662306a36Sopenharmony_ciint 70762306a36Sopenharmony_cipnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 70862306a36Sopenharmony_ci struct list_head *tmp_list, 70962306a36Sopenharmony_ci const struct pnfs_layout_range *recall_range, 71062306a36Sopenharmony_ci u32 seq) 71162306a36Sopenharmony_ci{ 71262306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, *next; 71362306a36Sopenharmony_ci struct nfs_server *server = NFS_SERVER(lo->plh_inode); 71462306a36Sopenharmony_ci int remaining = 0; 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci dprintk("%s:Begin lo %p\n", __func__, lo); 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci if (list_empty(&lo->plh_segs)) 71962306a36Sopenharmony_ci return 0; 72062306a36Sopenharmony_ci list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 72162306a36Sopenharmony_ci if (pnfs_match_lseg_recall(lseg, recall_range, seq)) { 72262306a36Sopenharmony_ci dprintk("%s: freeing lseg %p iomode %d seq %u " 72362306a36Sopenharmony_ci "offset %llu length %llu\n", __func__, 72462306a36Sopenharmony_ci lseg, lseg->pls_range.iomode, lseg->pls_seq, 72562306a36Sopenharmony_ci lseg->pls_range.offset, lseg->pls_range.length); 72662306a36Sopenharmony_ci if (mark_lseg_invalid(lseg, tmp_list)) 72762306a36Sopenharmony_ci continue; 72862306a36Sopenharmony_ci remaining++; 72962306a36Sopenharmony_ci pnfs_lseg_cancel_io(server, lseg); 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci dprintk("%s:Return %i\n", __func__, remaining); 73262306a36Sopenharmony_ci return remaining; 73362306a36Sopenharmony_ci} 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_cistatic void 73662306a36Sopenharmony_cipnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, 73762306a36Sopenharmony_ci struct list_head *free_me, 73862306a36Sopenharmony_ci const struct pnfs_layout_range *range, 73962306a36Sopenharmony_ci u32 seq) 74062306a36Sopenharmony_ci{ 74162306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, *next; 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci list_for_each_entry_safe(lseg, next, &lo->plh_return_segs, pls_list) { 74462306a36Sopenharmony_ci if (pnfs_match_lseg_recall(lseg, range, seq)) 74562306a36Sopenharmony_ci list_move_tail(&lseg->pls_list, free_me); 74662306a36Sopenharmony_ci } 74762306a36Sopenharmony_ci} 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci/* note free_me must contain lsegs from a single layout_hdr */ 75062306a36Sopenharmony_civoid 75162306a36Sopenharmony_cipnfs_free_lseg_list(struct list_head *free_me) 75262306a36Sopenharmony_ci{ 75362306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, *tmp; 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci if (list_empty(free_me)) 75662306a36Sopenharmony_ci return; 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_ci list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 75962306a36Sopenharmony_ci list_del(&lseg->pls_list); 76062306a36Sopenharmony_ci pnfs_free_lseg(lseg); 76162306a36Sopenharmony_ci } 76262306a36Sopenharmony_ci} 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_cistatic struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi) 76562306a36Sopenharmony_ci{ 76662306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 76762306a36Sopenharmony_ci LIST_HEAD(tmp_list); 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci spin_lock(&nfsi->vfs_inode.i_lock); 77062306a36Sopenharmony_ci lo = nfsi->layout; 77162306a36Sopenharmony_ci if (lo) { 77262306a36Sopenharmony_ci pnfs_get_layout_hdr(lo); 77362306a36Sopenharmony_ci pnfs_mark_layout_stateid_invalid(lo, &tmp_list); 77462306a36Sopenharmony_ci pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 77562306a36Sopenharmony_ci pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 77662306a36Sopenharmony_ci spin_unlock(&nfsi->vfs_inode.i_lock); 77762306a36Sopenharmony_ci pnfs_free_lseg_list(&tmp_list); 77862306a36Sopenharmony_ci nfs_commit_inode(&nfsi->vfs_inode, 0); 77962306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 78062306a36Sopenharmony_ci } else 78162306a36Sopenharmony_ci spin_unlock(&nfsi->vfs_inode.i_lock); 78262306a36Sopenharmony_ci return lo; 78362306a36Sopenharmony_ci} 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_civoid pnfs_destroy_layout(struct nfs_inode *nfsi) 78662306a36Sopenharmony_ci{ 78762306a36Sopenharmony_ci __pnfs_destroy_layout(nfsi); 78862306a36Sopenharmony_ci} 78962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_destroy_layout); 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_cistatic bool pnfs_layout_removed(struct nfs_inode *nfsi, 79262306a36Sopenharmony_ci struct pnfs_layout_hdr *lo) 79362306a36Sopenharmony_ci{ 79462306a36Sopenharmony_ci bool ret; 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci spin_lock(&nfsi->vfs_inode.i_lock); 79762306a36Sopenharmony_ci ret = nfsi->layout != lo; 79862306a36Sopenharmony_ci spin_unlock(&nfsi->vfs_inode.i_lock); 79962306a36Sopenharmony_ci return ret; 80062306a36Sopenharmony_ci} 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_civoid pnfs_destroy_layout_final(struct nfs_inode *nfsi) 80362306a36Sopenharmony_ci{ 80462306a36Sopenharmony_ci struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi); 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci if (lo) 80762306a36Sopenharmony_ci wait_var_event(lo, pnfs_layout_removed(nfsi, lo)); 80862306a36Sopenharmony_ci} 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_cistatic bool 81162306a36Sopenharmony_cipnfs_layout_add_bulk_destroy_list(struct inode *inode, 81262306a36Sopenharmony_ci struct list_head *layout_list) 81362306a36Sopenharmony_ci{ 81462306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 81562306a36Sopenharmony_ci bool ret = false; 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci spin_lock(&inode->i_lock); 81862306a36Sopenharmony_ci lo = NFS_I(inode)->layout; 81962306a36Sopenharmony_ci if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { 82062306a36Sopenharmony_ci pnfs_get_layout_hdr(lo); 82162306a36Sopenharmony_ci list_add(&lo->plh_bulk_destroy, layout_list); 82262306a36Sopenharmony_ci ret = true; 82362306a36Sopenharmony_ci } 82462306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 82562306a36Sopenharmony_ci return ret; 82662306a36Sopenharmony_ci} 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci/* Caller must hold rcu_read_lock and clp->cl_lock */ 82962306a36Sopenharmony_cistatic int 83062306a36Sopenharmony_cipnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, 83162306a36Sopenharmony_ci struct nfs_server *server, 83262306a36Sopenharmony_ci struct list_head *layout_list) 83362306a36Sopenharmony_ci __must_hold(&clp->cl_lock) 83462306a36Sopenharmony_ci __must_hold(RCU) 83562306a36Sopenharmony_ci{ 83662306a36Sopenharmony_ci struct pnfs_layout_hdr *lo, *next; 83762306a36Sopenharmony_ci struct inode *inode; 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { 84062306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || 84162306a36Sopenharmony_ci test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) || 84262306a36Sopenharmony_ci !list_empty(&lo->plh_bulk_destroy)) 84362306a36Sopenharmony_ci continue; 84462306a36Sopenharmony_ci /* If the sb is being destroyed, just bail */ 84562306a36Sopenharmony_ci if (!nfs_sb_active(server->super)) 84662306a36Sopenharmony_ci break; 84762306a36Sopenharmony_ci inode = pnfs_grab_inode_layout_hdr(lo); 84862306a36Sopenharmony_ci if (inode != NULL) { 84962306a36Sopenharmony_ci if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) 85062306a36Sopenharmony_ci list_del_rcu(&lo->plh_layouts); 85162306a36Sopenharmony_ci if (pnfs_layout_add_bulk_destroy_list(inode, 85262306a36Sopenharmony_ci layout_list)) 85362306a36Sopenharmony_ci continue; 85462306a36Sopenharmony_ci rcu_read_unlock(); 85562306a36Sopenharmony_ci spin_unlock(&clp->cl_lock); 85662306a36Sopenharmony_ci iput(inode); 85762306a36Sopenharmony_ci } else { 85862306a36Sopenharmony_ci rcu_read_unlock(); 85962306a36Sopenharmony_ci spin_unlock(&clp->cl_lock); 86062306a36Sopenharmony_ci } 86162306a36Sopenharmony_ci nfs_sb_deactive(server->super); 86262306a36Sopenharmony_ci spin_lock(&clp->cl_lock); 86362306a36Sopenharmony_ci rcu_read_lock(); 86462306a36Sopenharmony_ci return -EAGAIN; 86562306a36Sopenharmony_ci } 86662306a36Sopenharmony_ci return 0; 86762306a36Sopenharmony_ci} 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_cistatic int 87062306a36Sopenharmony_cipnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, 87162306a36Sopenharmony_ci bool is_bulk_recall) 87262306a36Sopenharmony_ci{ 87362306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 87462306a36Sopenharmony_ci struct inode *inode; 87562306a36Sopenharmony_ci LIST_HEAD(lseg_list); 87662306a36Sopenharmony_ci int ret = 0; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci while (!list_empty(layout_list)) { 87962306a36Sopenharmony_ci lo = list_entry(layout_list->next, struct pnfs_layout_hdr, 88062306a36Sopenharmony_ci plh_bulk_destroy); 88162306a36Sopenharmony_ci dprintk("%s freeing layout for inode %lu\n", __func__, 88262306a36Sopenharmony_ci lo->plh_inode->i_ino); 88362306a36Sopenharmony_ci inode = lo->plh_inode; 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci pnfs_layoutcommit_inode(inode, false); 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci spin_lock(&inode->i_lock); 88862306a36Sopenharmony_ci list_del_init(&lo->plh_bulk_destroy); 88962306a36Sopenharmony_ci if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { 89062306a36Sopenharmony_ci if (is_bulk_recall) 89162306a36Sopenharmony_ci set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 89262306a36Sopenharmony_ci ret = -EAGAIN; 89362306a36Sopenharmony_ci } 89462306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 89562306a36Sopenharmony_ci pnfs_free_lseg_list(&lseg_list); 89662306a36Sopenharmony_ci /* Free all lsegs that are attached to commit buckets */ 89762306a36Sopenharmony_ci nfs_commit_inode(inode, 0); 89862306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 89962306a36Sopenharmony_ci nfs_iput_and_deactive(inode); 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci return ret; 90262306a36Sopenharmony_ci} 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ciint 90562306a36Sopenharmony_cipnfs_destroy_layouts_byfsid(struct nfs_client *clp, 90662306a36Sopenharmony_ci struct nfs_fsid *fsid, 90762306a36Sopenharmony_ci bool is_recall) 90862306a36Sopenharmony_ci{ 90962306a36Sopenharmony_ci struct nfs_server *server; 91062306a36Sopenharmony_ci LIST_HEAD(layout_list); 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci spin_lock(&clp->cl_lock); 91362306a36Sopenharmony_ci rcu_read_lock(); 91462306a36Sopenharmony_cirestart: 91562306a36Sopenharmony_ci list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 91662306a36Sopenharmony_ci if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) 91762306a36Sopenharmony_ci continue; 91862306a36Sopenharmony_ci if (pnfs_layout_bulk_destroy_byserver_locked(clp, 91962306a36Sopenharmony_ci server, 92062306a36Sopenharmony_ci &layout_list) != 0) 92162306a36Sopenharmony_ci goto restart; 92262306a36Sopenharmony_ci } 92362306a36Sopenharmony_ci rcu_read_unlock(); 92462306a36Sopenharmony_ci spin_unlock(&clp->cl_lock); 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_ci if (list_empty(&layout_list)) 92762306a36Sopenharmony_ci return 0; 92862306a36Sopenharmony_ci return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 92962306a36Sopenharmony_ci} 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_ciint 93262306a36Sopenharmony_cipnfs_destroy_layouts_byclid(struct nfs_client *clp, 93362306a36Sopenharmony_ci bool is_recall) 93462306a36Sopenharmony_ci{ 93562306a36Sopenharmony_ci struct nfs_server *server; 93662306a36Sopenharmony_ci LIST_HEAD(layout_list); 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_ci spin_lock(&clp->cl_lock); 93962306a36Sopenharmony_ci rcu_read_lock(); 94062306a36Sopenharmony_cirestart: 94162306a36Sopenharmony_ci list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 94262306a36Sopenharmony_ci if (pnfs_layout_bulk_destroy_byserver_locked(clp, 94362306a36Sopenharmony_ci server, 94462306a36Sopenharmony_ci &layout_list) != 0) 94562306a36Sopenharmony_ci goto restart; 94662306a36Sopenharmony_ci } 94762306a36Sopenharmony_ci rcu_read_unlock(); 94862306a36Sopenharmony_ci spin_unlock(&clp->cl_lock); 94962306a36Sopenharmony_ci 95062306a36Sopenharmony_ci if (list_empty(&layout_list)) 95162306a36Sopenharmony_ci return 0; 95262306a36Sopenharmony_ci return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 95362306a36Sopenharmony_ci} 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci/* 95662306a36Sopenharmony_ci * Called by the state manager to remove all layouts established under an 95762306a36Sopenharmony_ci * expired lease. 95862306a36Sopenharmony_ci */ 95962306a36Sopenharmony_civoid 96062306a36Sopenharmony_cipnfs_destroy_all_layouts(struct nfs_client *clp) 96162306a36Sopenharmony_ci{ 96262306a36Sopenharmony_ci nfs4_deviceid_mark_client_invalid(clp); 96362306a36Sopenharmony_ci nfs4_deviceid_purge_client(clp); 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci pnfs_destroy_layouts_byclid(clp, false); 96662306a36Sopenharmony_ci} 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_cistatic void 96962306a36Sopenharmony_cipnfs_set_layout_cred(struct pnfs_layout_hdr *lo, const struct cred *cred) 97062306a36Sopenharmony_ci{ 97162306a36Sopenharmony_ci const struct cred *old; 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci if (cred && cred_fscmp(lo->plh_lc_cred, cred) != 0) { 97462306a36Sopenharmony_ci old = xchg(&lo->plh_lc_cred, get_cred(cred)); 97562306a36Sopenharmony_ci put_cred(old); 97662306a36Sopenharmony_ci } 97762306a36Sopenharmony_ci} 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci/* update lo->plh_stateid with new if is more recent */ 98062306a36Sopenharmony_civoid 98162306a36Sopenharmony_cipnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, 98262306a36Sopenharmony_ci const struct cred *cred, bool update_barrier) 98362306a36Sopenharmony_ci{ 98462306a36Sopenharmony_ci u32 oldseq = be32_to_cpu(lo->plh_stateid.seqid); 98562306a36Sopenharmony_ci u32 newseq = be32_to_cpu(new->seqid); 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci if (!pnfs_layout_is_valid(lo)) { 98862306a36Sopenharmony_ci pnfs_set_layout_cred(lo, cred); 98962306a36Sopenharmony_ci nfs4_stateid_copy(&lo->plh_stateid, new); 99062306a36Sopenharmony_ci lo->plh_barrier = newseq; 99162306a36Sopenharmony_ci pnfs_clear_layoutreturn_info(lo); 99262306a36Sopenharmony_ci clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 99362306a36Sopenharmony_ci return; 99462306a36Sopenharmony_ci } 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci if (pnfs_seqid_is_newer(newseq, oldseq)) 99762306a36Sopenharmony_ci nfs4_stateid_copy(&lo->plh_stateid, new); 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci if (update_barrier) { 100062306a36Sopenharmony_ci pnfs_barrier_update(lo, newseq); 100162306a36Sopenharmony_ci return; 100262306a36Sopenharmony_ci } 100362306a36Sopenharmony_ci /* 100462306a36Sopenharmony_ci * Because of wraparound, we want to keep the barrier 100562306a36Sopenharmony_ci * "close" to the current seqids. We really only want to 100662306a36Sopenharmony_ci * get here from a layoutget call. 100762306a36Sopenharmony_ci */ 100862306a36Sopenharmony_ci if (atomic_read(&lo->plh_outstanding) == 1) 100962306a36Sopenharmony_ci pnfs_barrier_update(lo, be32_to_cpu(lo->plh_stateid.seqid)); 101062306a36Sopenharmony_ci} 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_cistatic bool 101362306a36Sopenharmony_cipnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, 101462306a36Sopenharmony_ci const nfs4_stateid *stateid) 101562306a36Sopenharmony_ci{ 101662306a36Sopenharmony_ci u32 seqid = be32_to_cpu(stateid->seqid); 101762306a36Sopenharmony_ci 101862306a36Sopenharmony_ci return lo->plh_barrier && pnfs_seqid_is_newer(lo->plh_barrier, seqid); 101962306a36Sopenharmony_ci} 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_ci/* lget is set to 1 if called from inside send_layoutget call chain */ 102262306a36Sopenharmony_cistatic bool 102362306a36Sopenharmony_cipnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) 102462306a36Sopenharmony_ci{ 102562306a36Sopenharmony_ci return lo->plh_block_lgets || 102662306a36Sopenharmony_ci test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 102762306a36Sopenharmony_ci} 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_cistatic struct nfs_server * 103062306a36Sopenharmony_cipnfs_find_server(struct inode *inode, struct nfs_open_context *ctx) 103162306a36Sopenharmony_ci{ 103262306a36Sopenharmony_ci struct nfs_server *server; 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci if (inode) { 103562306a36Sopenharmony_ci server = NFS_SERVER(inode); 103662306a36Sopenharmony_ci } else { 103762306a36Sopenharmony_ci struct dentry *parent_dir = dget_parent(ctx->dentry); 103862306a36Sopenharmony_ci server = NFS_SERVER(parent_dir->d_inode); 103962306a36Sopenharmony_ci dput(parent_dir); 104062306a36Sopenharmony_ci } 104162306a36Sopenharmony_ci return server; 104262306a36Sopenharmony_ci} 104362306a36Sopenharmony_ci 104462306a36Sopenharmony_cistatic void nfs4_free_pages(struct page **pages, size_t size) 104562306a36Sopenharmony_ci{ 104662306a36Sopenharmony_ci int i; 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_ci if (!pages) 104962306a36Sopenharmony_ci return; 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci for (i = 0; i < size; i++) { 105262306a36Sopenharmony_ci if (!pages[i]) 105362306a36Sopenharmony_ci break; 105462306a36Sopenharmony_ci __free_page(pages[i]); 105562306a36Sopenharmony_ci } 105662306a36Sopenharmony_ci kfree(pages); 105762306a36Sopenharmony_ci} 105862306a36Sopenharmony_ci 105962306a36Sopenharmony_cistatic struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) 106062306a36Sopenharmony_ci{ 106162306a36Sopenharmony_ci struct page **pages; 106262306a36Sopenharmony_ci int i; 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_ci pages = kmalloc_array(size, sizeof(struct page *), gfp_flags); 106562306a36Sopenharmony_ci if (!pages) { 106662306a36Sopenharmony_ci dprintk("%s: can't alloc array of %zu pages\n", __func__, size); 106762306a36Sopenharmony_ci return NULL; 106862306a36Sopenharmony_ci } 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci for (i = 0; i < size; i++) { 107162306a36Sopenharmony_ci pages[i] = alloc_page(gfp_flags); 107262306a36Sopenharmony_ci if (!pages[i]) { 107362306a36Sopenharmony_ci dprintk("%s: failed to allocate page\n", __func__); 107462306a36Sopenharmony_ci nfs4_free_pages(pages, i); 107562306a36Sopenharmony_ci return NULL; 107662306a36Sopenharmony_ci } 107762306a36Sopenharmony_ci } 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_ci return pages; 108062306a36Sopenharmony_ci} 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_cistatic struct nfs4_layoutget * 108362306a36Sopenharmony_cipnfs_alloc_init_layoutget_args(struct inode *ino, 108462306a36Sopenharmony_ci struct nfs_open_context *ctx, 108562306a36Sopenharmony_ci const nfs4_stateid *stateid, 108662306a36Sopenharmony_ci const struct pnfs_layout_range *range, 108762306a36Sopenharmony_ci gfp_t gfp_flags) 108862306a36Sopenharmony_ci{ 108962306a36Sopenharmony_ci struct nfs_server *server = pnfs_find_server(ino, ctx); 109062306a36Sopenharmony_ci size_t max_reply_sz = server->pnfs_curr_ld->max_layoutget_response; 109162306a36Sopenharmony_ci size_t max_pages = max_response_pages(server); 109262306a36Sopenharmony_ci struct nfs4_layoutget *lgp; 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ci dprintk("--> %s\n", __func__); 109562306a36Sopenharmony_ci 109662306a36Sopenharmony_ci lgp = kzalloc(sizeof(*lgp), gfp_flags); 109762306a36Sopenharmony_ci if (lgp == NULL) 109862306a36Sopenharmony_ci return NULL; 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci if (max_reply_sz) { 110162306a36Sopenharmony_ci size_t npages = (max_reply_sz + PAGE_SIZE - 1) >> PAGE_SHIFT; 110262306a36Sopenharmony_ci if (npages < max_pages) 110362306a36Sopenharmony_ci max_pages = npages; 110462306a36Sopenharmony_ci } 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); 110762306a36Sopenharmony_ci if (!lgp->args.layout.pages) { 110862306a36Sopenharmony_ci kfree(lgp); 110962306a36Sopenharmony_ci return NULL; 111062306a36Sopenharmony_ci } 111162306a36Sopenharmony_ci lgp->args.layout.pglen = max_pages * PAGE_SIZE; 111262306a36Sopenharmony_ci lgp->res.layoutp = &lgp->args.layout; 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci /* Don't confuse uninitialised result and success */ 111562306a36Sopenharmony_ci lgp->res.status = -NFS4ERR_DELAY; 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci lgp->args.minlength = PAGE_SIZE; 111862306a36Sopenharmony_ci if (lgp->args.minlength > range->length) 111962306a36Sopenharmony_ci lgp->args.minlength = range->length; 112062306a36Sopenharmony_ci if (ino) { 112162306a36Sopenharmony_ci loff_t i_size = i_size_read(ino); 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci if (range->iomode == IOMODE_READ) { 112462306a36Sopenharmony_ci if (range->offset >= i_size) 112562306a36Sopenharmony_ci lgp->args.minlength = 0; 112662306a36Sopenharmony_ci else if (i_size - range->offset < lgp->args.minlength) 112762306a36Sopenharmony_ci lgp->args.minlength = i_size - range->offset; 112862306a36Sopenharmony_ci } 112962306a36Sopenharmony_ci } 113062306a36Sopenharmony_ci lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 113162306a36Sopenharmony_ci pnfs_copy_range(&lgp->args.range, range); 113262306a36Sopenharmony_ci lgp->args.type = server->pnfs_curr_ld->id; 113362306a36Sopenharmony_ci lgp->args.inode = ino; 113462306a36Sopenharmony_ci lgp->args.ctx = get_nfs_open_context(ctx); 113562306a36Sopenharmony_ci nfs4_stateid_copy(&lgp->args.stateid, stateid); 113662306a36Sopenharmony_ci lgp->gfp_flags = gfp_flags; 113762306a36Sopenharmony_ci lgp->cred = ctx->cred; 113862306a36Sopenharmony_ci return lgp; 113962306a36Sopenharmony_ci} 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_civoid pnfs_layoutget_free(struct nfs4_layoutget *lgp) 114262306a36Sopenharmony_ci{ 114362306a36Sopenharmony_ci size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE; 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci nfs4_free_pages(lgp->args.layout.pages, max_pages); 114662306a36Sopenharmony_ci pnfs_put_layout_hdr(lgp->lo); 114762306a36Sopenharmony_ci put_nfs_open_context(lgp->args.ctx); 114862306a36Sopenharmony_ci kfree(lgp); 114962306a36Sopenharmony_ci} 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_cistatic void pnfs_clear_layoutcommit(struct inode *inode, 115262306a36Sopenharmony_ci struct list_head *head) 115362306a36Sopenharmony_ci{ 115462306a36Sopenharmony_ci struct nfs_inode *nfsi = NFS_I(inode); 115562306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, *tmp; 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_ci if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 115862306a36Sopenharmony_ci return; 115962306a36Sopenharmony_ci list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { 116062306a36Sopenharmony_ci if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 116162306a36Sopenharmony_ci continue; 116262306a36Sopenharmony_ci pnfs_lseg_dec_and_remove_zero(lseg, head); 116362306a36Sopenharmony_ci } 116462306a36Sopenharmony_ci} 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_civoid pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, 116762306a36Sopenharmony_ci const nfs4_stateid *arg_stateid, 116862306a36Sopenharmony_ci const struct pnfs_layout_range *range, 116962306a36Sopenharmony_ci const nfs4_stateid *stateid) 117062306a36Sopenharmony_ci{ 117162306a36Sopenharmony_ci struct inode *inode = lo->plh_inode; 117262306a36Sopenharmony_ci LIST_HEAD(freeme); 117362306a36Sopenharmony_ci 117462306a36Sopenharmony_ci spin_lock(&inode->i_lock); 117562306a36Sopenharmony_ci if (!pnfs_layout_is_valid(lo) || 117662306a36Sopenharmony_ci !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) 117762306a36Sopenharmony_ci goto out_unlock; 117862306a36Sopenharmony_ci if (stateid) { 117962306a36Sopenharmony_ci u32 seq = be32_to_cpu(arg_stateid->seqid); 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_ci pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); 118262306a36Sopenharmony_ci pnfs_free_returned_lsegs(lo, &freeme, range, seq); 118362306a36Sopenharmony_ci pnfs_set_layout_stateid(lo, stateid, NULL, true); 118462306a36Sopenharmony_ci } else 118562306a36Sopenharmony_ci pnfs_mark_layout_stateid_invalid(lo, &freeme); 118662306a36Sopenharmony_ciout_unlock: 118762306a36Sopenharmony_ci pnfs_clear_layoutreturn_waitbit(lo); 118862306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 118962306a36Sopenharmony_ci pnfs_free_lseg_list(&freeme); 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_ci} 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_cistatic bool 119462306a36Sopenharmony_cipnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, 119562306a36Sopenharmony_ci nfs4_stateid *stateid, 119662306a36Sopenharmony_ci const struct cred **cred, 119762306a36Sopenharmony_ci enum pnfs_iomode *iomode) 119862306a36Sopenharmony_ci{ 119962306a36Sopenharmony_ci /* Serialise LAYOUTGET/LAYOUTRETURN */ 120062306a36Sopenharmony_ci if (atomic_read(&lo->plh_outstanding) != 0) 120162306a36Sopenharmony_ci return false; 120262306a36Sopenharmony_ci if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) 120362306a36Sopenharmony_ci return false; 120462306a36Sopenharmony_ci set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); 120562306a36Sopenharmony_ci pnfs_get_layout_hdr(lo); 120662306a36Sopenharmony_ci nfs4_stateid_copy(stateid, &lo->plh_stateid); 120762306a36Sopenharmony_ci *cred = get_cred(lo->plh_lc_cred); 120862306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { 120962306a36Sopenharmony_ci if (lo->plh_return_seq != 0) 121062306a36Sopenharmony_ci stateid->seqid = cpu_to_be32(lo->plh_return_seq); 121162306a36Sopenharmony_ci if (iomode != NULL) 121262306a36Sopenharmony_ci *iomode = lo->plh_return_iomode; 121362306a36Sopenharmony_ci pnfs_clear_layoutreturn_info(lo); 121462306a36Sopenharmony_ci } else if (iomode != NULL) 121562306a36Sopenharmony_ci *iomode = IOMODE_ANY; 121662306a36Sopenharmony_ci pnfs_barrier_update(lo, be32_to_cpu(stateid->seqid)); 121762306a36Sopenharmony_ci return true; 121862306a36Sopenharmony_ci} 121962306a36Sopenharmony_ci 122062306a36Sopenharmony_cistatic void 122162306a36Sopenharmony_cipnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args *args, 122262306a36Sopenharmony_ci struct pnfs_layout_hdr *lo, 122362306a36Sopenharmony_ci const nfs4_stateid *stateid, 122462306a36Sopenharmony_ci enum pnfs_iomode iomode) 122562306a36Sopenharmony_ci{ 122662306a36Sopenharmony_ci struct inode *inode = lo->plh_inode; 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_ci args->layout_type = NFS_SERVER(inode)->pnfs_curr_ld->id; 122962306a36Sopenharmony_ci args->inode = inode; 123062306a36Sopenharmony_ci args->range.iomode = iomode; 123162306a36Sopenharmony_ci args->range.offset = 0; 123262306a36Sopenharmony_ci args->range.length = NFS4_MAX_UINT64; 123362306a36Sopenharmony_ci args->layout = lo; 123462306a36Sopenharmony_ci nfs4_stateid_copy(&args->stateid, stateid); 123562306a36Sopenharmony_ci} 123662306a36Sopenharmony_ci 123762306a36Sopenharmony_cistatic int 123862306a36Sopenharmony_cipnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, 123962306a36Sopenharmony_ci const nfs4_stateid *stateid, 124062306a36Sopenharmony_ci const struct cred **pcred, 124162306a36Sopenharmony_ci enum pnfs_iomode iomode, 124262306a36Sopenharmony_ci bool sync) 124362306a36Sopenharmony_ci{ 124462306a36Sopenharmony_ci struct inode *ino = lo->plh_inode; 124562306a36Sopenharmony_ci struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 124662306a36Sopenharmony_ci struct nfs4_layoutreturn *lrp; 124762306a36Sopenharmony_ci const struct cred *cred = *pcred; 124862306a36Sopenharmony_ci int status = 0; 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_ci *pcred = NULL; 125162306a36Sopenharmony_ci lrp = kzalloc(sizeof(*lrp), nfs_io_gfp_mask()); 125262306a36Sopenharmony_ci if (unlikely(lrp == NULL)) { 125362306a36Sopenharmony_ci status = -ENOMEM; 125462306a36Sopenharmony_ci spin_lock(&ino->i_lock); 125562306a36Sopenharmony_ci pnfs_clear_layoutreturn_waitbit(lo); 125662306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 125762306a36Sopenharmony_ci put_cred(cred); 125862306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 125962306a36Sopenharmony_ci goto out; 126062306a36Sopenharmony_ci } 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode); 126362306a36Sopenharmony_ci lrp->args.ld_private = &lrp->ld_private; 126462306a36Sopenharmony_ci lrp->clp = NFS_SERVER(ino)->nfs_client; 126562306a36Sopenharmony_ci lrp->cred = cred; 126662306a36Sopenharmony_ci if (ld->prepare_layoutreturn) 126762306a36Sopenharmony_ci ld->prepare_layoutreturn(&lrp->args); 126862306a36Sopenharmony_ci 126962306a36Sopenharmony_ci status = nfs4_proc_layoutreturn(lrp, sync); 127062306a36Sopenharmony_ciout: 127162306a36Sopenharmony_ci dprintk("<-- %s status: %d\n", __func__, status); 127262306a36Sopenharmony_ci return status; 127362306a36Sopenharmony_ci} 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_cistatic bool 127662306a36Sopenharmony_cipnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo, 127762306a36Sopenharmony_ci enum pnfs_iomode iomode, 127862306a36Sopenharmony_ci u32 seq) 127962306a36Sopenharmony_ci{ 128062306a36Sopenharmony_ci struct pnfs_layout_range recall_range = { 128162306a36Sopenharmony_ci .length = NFS4_MAX_UINT64, 128262306a36Sopenharmony_ci .iomode = iomode, 128362306a36Sopenharmony_ci }; 128462306a36Sopenharmony_ci return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, 128562306a36Sopenharmony_ci &recall_range, seq) != -EBUSY; 128662306a36Sopenharmony_ci} 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci/* Return true if layoutreturn is needed */ 128962306a36Sopenharmony_cistatic bool 129062306a36Sopenharmony_cipnfs_layout_need_return(struct pnfs_layout_hdr *lo) 129162306a36Sopenharmony_ci{ 129262306a36Sopenharmony_ci if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 129362306a36Sopenharmony_ci return false; 129462306a36Sopenharmony_ci return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode, 129562306a36Sopenharmony_ci lo->plh_return_seq); 129662306a36Sopenharmony_ci} 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_cistatic void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) 129962306a36Sopenharmony_ci{ 130062306a36Sopenharmony_ci struct inode *inode= lo->plh_inode; 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_ci if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 130362306a36Sopenharmony_ci return; 130462306a36Sopenharmony_ci spin_lock(&inode->i_lock); 130562306a36Sopenharmony_ci if (pnfs_layout_need_return(lo)) { 130662306a36Sopenharmony_ci const struct cred *cred; 130762306a36Sopenharmony_ci nfs4_stateid stateid; 130862306a36Sopenharmony_ci enum pnfs_iomode iomode; 130962306a36Sopenharmony_ci bool send; 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_ci send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode); 131262306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 131362306a36Sopenharmony_ci if (send) { 131462306a36Sopenharmony_ci /* Send an async layoutreturn so we dont deadlock */ 131562306a36Sopenharmony_ci pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); 131662306a36Sopenharmony_ci } 131762306a36Sopenharmony_ci } else 131862306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 131962306a36Sopenharmony_ci} 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ci/* 132262306a36Sopenharmony_ci * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 132362306a36Sopenharmony_ci * when the layout segment list is empty. 132462306a36Sopenharmony_ci * 132562306a36Sopenharmony_ci * Note that a pnfs_layout_hdr can exist with an empty layout segment 132662306a36Sopenharmony_ci * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the 132762306a36Sopenharmony_ci * deviceid is marked invalid. 132862306a36Sopenharmony_ci */ 132962306a36Sopenharmony_ciint 133062306a36Sopenharmony_ci_pnfs_return_layout(struct inode *ino) 133162306a36Sopenharmony_ci{ 133262306a36Sopenharmony_ci struct pnfs_layout_hdr *lo = NULL; 133362306a36Sopenharmony_ci struct nfs_inode *nfsi = NFS_I(ino); 133462306a36Sopenharmony_ci struct pnfs_layout_range range = { 133562306a36Sopenharmony_ci .iomode = IOMODE_ANY, 133662306a36Sopenharmony_ci .offset = 0, 133762306a36Sopenharmony_ci .length = NFS4_MAX_UINT64, 133862306a36Sopenharmony_ci }; 133962306a36Sopenharmony_ci LIST_HEAD(tmp_list); 134062306a36Sopenharmony_ci const struct cred *cred; 134162306a36Sopenharmony_ci nfs4_stateid stateid; 134262306a36Sopenharmony_ci int status = 0; 134362306a36Sopenharmony_ci bool send, valid_layout; 134462306a36Sopenharmony_ci 134562306a36Sopenharmony_ci dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); 134662306a36Sopenharmony_ci 134762306a36Sopenharmony_ci spin_lock(&ino->i_lock); 134862306a36Sopenharmony_ci lo = nfsi->layout; 134962306a36Sopenharmony_ci if (!lo) { 135062306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 135162306a36Sopenharmony_ci dprintk("NFS: %s no layout to return\n", __func__); 135262306a36Sopenharmony_ci goto out; 135362306a36Sopenharmony_ci } 135462306a36Sopenharmony_ci /* Reference matched in nfs4_layoutreturn_release */ 135562306a36Sopenharmony_ci pnfs_get_layout_hdr(lo); 135662306a36Sopenharmony_ci /* Is there an outstanding layoutreturn ? */ 135762306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { 135862306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 135962306a36Sopenharmony_ci if (wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, 136062306a36Sopenharmony_ci TASK_UNINTERRUPTIBLE)) 136162306a36Sopenharmony_ci goto out_put_layout_hdr; 136262306a36Sopenharmony_ci spin_lock(&ino->i_lock); 136362306a36Sopenharmony_ci } 136462306a36Sopenharmony_ci valid_layout = pnfs_layout_is_valid(lo); 136562306a36Sopenharmony_ci pnfs_clear_layoutcommit(ino, &tmp_list); 136662306a36Sopenharmony_ci pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0); 136762306a36Sopenharmony_ci 136862306a36Sopenharmony_ci if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) 136962306a36Sopenharmony_ci NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci /* Don't send a LAYOUTRETURN if list was initially empty */ 137262306a36Sopenharmony_ci if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) || 137362306a36Sopenharmony_ci !valid_layout) { 137462306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 137562306a36Sopenharmony_ci dprintk("NFS: %s no layout segments to return\n", __func__); 137662306a36Sopenharmony_ci goto out_wait_layoutreturn; 137762306a36Sopenharmony_ci } 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_ci send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL); 138062306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 138162306a36Sopenharmony_ci if (send) 138262306a36Sopenharmony_ci status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true); 138362306a36Sopenharmony_ciout_wait_layoutreturn: 138462306a36Sopenharmony_ci wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE); 138562306a36Sopenharmony_ciout_put_layout_hdr: 138662306a36Sopenharmony_ci pnfs_free_lseg_list(&tmp_list); 138762306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 138862306a36Sopenharmony_ciout: 138962306a36Sopenharmony_ci dprintk("<-- %s status: %d\n", __func__, status); 139062306a36Sopenharmony_ci return status; 139162306a36Sopenharmony_ci} 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ciint 139462306a36Sopenharmony_cipnfs_commit_and_return_layout(struct inode *inode) 139562306a36Sopenharmony_ci{ 139662306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 139762306a36Sopenharmony_ci int ret; 139862306a36Sopenharmony_ci 139962306a36Sopenharmony_ci spin_lock(&inode->i_lock); 140062306a36Sopenharmony_ci lo = NFS_I(inode)->layout; 140162306a36Sopenharmony_ci if (lo == NULL) { 140262306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 140362306a36Sopenharmony_ci return 0; 140462306a36Sopenharmony_ci } 140562306a36Sopenharmony_ci pnfs_get_layout_hdr(lo); 140662306a36Sopenharmony_ci /* Block new layoutgets and read/write to ds */ 140762306a36Sopenharmony_ci lo->plh_block_lgets++; 140862306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 140962306a36Sopenharmony_ci filemap_fdatawait(inode->i_mapping); 141062306a36Sopenharmony_ci ret = pnfs_layoutcommit_inode(inode, true); 141162306a36Sopenharmony_ci if (ret == 0) 141262306a36Sopenharmony_ci ret = _pnfs_return_layout(inode); 141362306a36Sopenharmony_ci spin_lock(&inode->i_lock); 141462306a36Sopenharmony_ci lo->plh_block_lgets--; 141562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 141662306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 141762306a36Sopenharmony_ci return ret; 141862306a36Sopenharmony_ci} 141962306a36Sopenharmony_ci 142062306a36Sopenharmony_cibool pnfs_roc(struct inode *ino, 142162306a36Sopenharmony_ci struct nfs4_layoutreturn_args *args, 142262306a36Sopenharmony_ci struct nfs4_layoutreturn_res *res, 142362306a36Sopenharmony_ci const struct cred *cred) 142462306a36Sopenharmony_ci{ 142562306a36Sopenharmony_ci struct nfs_inode *nfsi = NFS_I(ino); 142662306a36Sopenharmony_ci struct nfs_open_context *ctx; 142762306a36Sopenharmony_ci struct nfs4_state *state; 142862306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 142962306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, *next; 143062306a36Sopenharmony_ci const struct cred *lc_cred; 143162306a36Sopenharmony_ci nfs4_stateid stateid; 143262306a36Sopenharmony_ci enum pnfs_iomode iomode = 0; 143362306a36Sopenharmony_ci bool layoutreturn = false, roc = false; 143462306a36Sopenharmony_ci bool skip_read = false; 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci if (!nfs_have_layout(ino)) 143762306a36Sopenharmony_ci return false; 143862306a36Sopenharmony_ciretry: 143962306a36Sopenharmony_ci rcu_read_lock(); 144062306a36Sopenharmony_ci spin_lock(&ino->i_lock); 144162306a36Sopenharmony_ci lo = nfsi->layout; 144262306a36Sopenharmony_ci if (!lo || !pnfs_layout_is_valid(lo) || 144362306a36Sopenharmony_ci test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 144462306a36Sopenharmony_ci lo = NULL; 144562306a36Sopenharmony_ci goto out_noroc; 144662306a36Sopenharmony_ci } 144762306a36Sopenharmony_ci pnfs_get_layout_hdr(lo); 144862306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { 144962306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 145062306a36Sopenharmony_ci rcu_read_unlock(); 145162306a36Sopenharmony_ci wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, 145262306a36Sopenharmony_ci TASK_UNINTERRUPTIBLE); 145362306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 145462306a36Sopenharmony_ci goto retry; 145562306a36Sopenharmony_ci } 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_ci /* no roc if we hold a delegation */ 145862306a36Sopenharmony_ci if (nfs4_check_delegation(ino, FMODE_READ)) { 145962306a36Sopenharmony_ci if (nfs4_check_delegation(ino, FMODE_WRITE)) 146062306a36Sopenharmony_ci goto out_noroc; 146162306a36Sopenharmony_ci skip_read = true; 146262306a36Sopenharmony_ci } 146362306a36Sopenharmony_ci 146462306a36Sopenharmony_ci list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { 146562306a36Sopenharmony_ci state = ctx->state; 146662306a36Sopenharmony_ci if (state == NULL) 146762306a36Sopenharmony_ci continue; 146862306a36Sopenharmony_ci /* Don't return layout if there is open file state */ 146962306a36Sopenharmony_ci if (state->state & FMODE_WRITE) 147062306a36Sopenharmony_ci goto out_noroc; 147162306a36Sopenharmony_ci if (state->state & FMODE_READ) 147262306a36Sopenharmony_ci skip_read = true; 147362306a36Sopenharmony_ci } 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_ci list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) { 147762306a36Sopenharmony_ci if (skip_read && lseg->pls_range.iomode == IOMODE_READ) 147862306a36Sopenharmony_ci continue; 147962306a36Sopenharmony_ci /* If we are sending layoutreturn, invalidate all valid lsegs */ 148062306a36Sopenharmony_ci if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags)) 148162306a36Sopenharmony_ci continue; 148262306a36Sopenharmony_ci /* 148362306a36Sopenharmony_ci * Note: mark lseg for return so pnfs_layout_remove_lseg 148462306a36Sopenharmony_ci * doesn't invalidate the layout for us. 148562306a36Sopenharmony_ci */ 148662306a36Sopenharmony_ci set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 148762306a36Sopenharmony_ci if (!mark_lseg_invalid(lseg, &lo->plh_return_segs)) 148862306a36Sopenharmony_ci continue; 148962306a36Sopenharmony_ci pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); 149062306a36Sopenharmony_ci } 149162306a36Sopenharmony_ci 149262306a36Sopenharmony_ci if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 149362306a36Sopenharmony_ci goto out_noroc; 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_ci /* ROC in two conditions: 149662306a36Sopenharmony_ci * 1. there are ROC lsegs 149762306a36Sopenharmony_ci * 2. we don't send layoutreturn 149862306a36Sopenharmony_ci */ 149962306a36Sopenharmony_ci /* lo ref dropped in pnfs_roc_release() */ 150062306a36Sopenharmony_ci layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &lc_cred, &iomode); 150162306a36Sopenharmony_ci /* If the creds don't match, we can't compound the layoutreturn */ 150262306a36Sopenharmony_ci if (!layoutreturn || cred_fscmp(cred, lc_cred) != 0) 150362306a36Sopenharmony_ci goto out_noroc; 150462306a36Sopenharmony_ci 150562306a36Sopenharmony_ci roc = layoutreturn; 150662306a36Sopenharmony_ci pnfs_init_layoutreturn_args(args, lo, &stateid, iomode); 150762306a36Sopenharmony_ci res->lrs_present = 0; 150862306a36Sopenharmony_ci layoutreturn = false; 150962306a36Sopenharmony_ci put_cred(lc_cred); 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_ciout_noroc: 151262306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 151362306a36Sopenharmony_ci rcu_read_unlock(); 151462306a36Sopenharmony_ci pnfs_layoutcommit_inode(ino, true); 151562306a36Sopenharmony_ci if (roc) { 151662306a36Sopenharmony_ci struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 151762306a36Sopenharmony_ci if (ld->prepare_layoutreturn) 151862306a36Sopenharmony_ci ld->prepare_layoutreturn(args); 151962306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 152062306a36Sopenharmony_ci return true; 152162306a36Sopenharmony_ci } 152262306a36Sopenharmony_ci if (layoutreturn) 152362306a36Sopenharmony_ci pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true); 152462306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 152562306a36Sopenharmony_ci return false; 152662306a36Sopenharmony_ci} 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ciint pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, 152962306a36Sopenharmony_ci struct nfs4_layoutreturn_res **respp, int *ret) 153062306a36Sopenharmony_ci{ 153162306a36Sopenharmony_ci struct nfs4_layoutreturn_args *arg = *argpp; 153262306a36Sopenharmony_ci int retval = -EAGAIN; 153362306a36Sopenharmony_ci 153462306a36Sopenharmony_ci if (!arg) 153562306a36Sopenharmony_ci return 0; 153662306a36Sopenharmony_ci /* Handle Layoutreturn errors */ 153762306a36Sopenharmony_ci switch (*ret) { 153862306a36Sopenharmony_ci case 0: 153962306a36Sopenharmony_ci retval = 0; 154062306a36Sopenharmony_ci break; 154162306a36Sopenharmony_ci case -NFS4ERR_NOMATCHING_LAYOUT: 154262306a36Sopenharmony_ci /* Was there an RPC level error? If not, retry */ 154362306a36Sopenharmony_ci if (task->tk_rpc_status == 0) 154462306a36Sopenharmony_ci break; 154562306a36Sopenharmony_ci /* If the call was not sent, let caller handle it */ 154662306a36Sopenharmony_ci if (!RPC_WAS_SENT(task)) 154762306a36Sopenharmony_ci return 0; 154862306a36Sopenharmony_ci /* 154962306a36Sopenharmony_ci * Otherwise, assume the call succeeded and 155062306a36Sopenharmony_ci * that we need to release the layout 155162306a36Sopenharmony_ci */ 155262306a36Sopenharmony_ci *ret = 0; 155362306a36Sopenharmony_ci (*respp)->lrs_present = 0; 155462306a36Sopenharmony_ci retval = 0; 155562306a36Sopenharmony_ci break; 155662306a36Sopenharmony_ci case -NFS4ERR_DELAY: 155762306a36Sopenharmony_ci /* Let the caller handle the retry */ 155862306a36Sopenharmony_ci *ret = -NFS4ERR_NOMATCHING_LAYOUT; 155962306a36Sopenharmony_ci return 0; 156062306a36Sopenharmony_ci case -NFS4ERR_OLD_STATEID: 156162306a36Sopenharmony_ci if (!nfs4_layout_refresh_old_stateid(&arg->stateid, 156262306a36Sopenharmony_ci &arg->range, arg->inode)) 156362306a36Sopenharmony_ci break; 156462306a36Sopenharmony_ci *ret = -NFS4ERR_NOMATCHING_LAYOUT; 156562306a36Sopenharmony_ci return -EAGAIN; 156662306a36Sopenharmony_ci } 156762306a36Sopenharmony_ci *argpp = NULL; 156862306a36Sopenharmony_ci *respp = NULL; 156962306a36Sopenharmony_ci return retval; 157062306a36Sopenharmony_ci} 157162306a36Sopenharmony_ci 157262306a36Sopenharmony_civoid pnfs_roc_release(struct nfs4_layoutreturn_args *args, 157362306a36Sopenharmony_ci struct nfs4_layoutreturn_res *res, 157462306a36Sopenharmony_ci int ret) 157562306a36Sopenharmony_ci{ 157662306a36Sopenharmony_ci struct pnfs_layout_hdr *lo = args->layout; 157762306a36Sopenharmony_ci struct inode *inode = args->inode; 157862306a36Sopenharmony_ci const nfs4_stateid *res_stateid = NULL; 157962306a36Sopenharmony_ci struct nfs4_xdr_opaque_data *ld_private = args->ld_private; 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_ci switch (ret) { 158262306a36Sopenharmony_ci case -NFS4ERR_NOMATCHING_LAYOUT: 158362306a36Sopenharmony_ci spin_lock(&inode->i_lock); 158462306a36Sopenharmony_ci if (pnfs_layout_is_valid(lo) && 158562306a36Sopenharmony_ci nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) 158662306a36Sopenharmony_ci pnfs_set_plh_return_info(lo, args->range.iomode, 0); 158762306a36Sopenharmony_ci pnfs_clear_layoutreturn_waitbit(lo); 158862306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 158962306a36Sopenharmony_ci break; 159062306a36Sopenharmony_ci case 0: 159162306a36Sopenharmony_ci if (res->lrs_present) 159262306a36Sopenharmony_ci res_stateid = &res->stateid; 159362306a36Sopenharmony_ci fallthrough; 159462306a36Sopenharmony_ci default: 159562306a36Sopenharmony_ci pnfs_layoutreturn_free_lsegs(lo, &args->stateid, &args->range, 159662306a36Sopenharmony_ci res_stateid); 159762306a36Sopenharmony_ci } 159862306a36Sopenharmony_ci trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret); 159962306a36Sopenharmony_ci if (ld_private && ld_private->ops && ld_private->ops->free) 160062306a36Sopenharmony_ci ld_private->ops->free(ld_private); 160162306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 160262306a36Sopenharmony_ci} 160362306a36Sopenharmony_ci 160462306a36Sopenharmony_cibool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) 160562306a36Sopenharmony_ci{ 160662306a36Sopenharmony_ci struct nfs_inode *nfsi = NFS_I(ino); 160762306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 160862306a36Sopenharmony_ci bool sleep = false; 160962306a36Sopenharmony_ci 161062306a36Sopenharmony_ci /* we might not have grabbed lo reference. so need to check under 161162306a36Sopenharmony_ci * i_lock */ 161262306a36Sopenharmony_ci spin_lock(&ino->i_lock); 161362306a36Sopenharmony_ci lo = nfsi->layout; 161462306a36Sopenharmony_ci if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { 161562306a36Sopenharmony_ci rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); 161662306a36Sopenharmony_ci sleep = true; 161762306a36Sopenharmony_ci } 161862306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 161962306a36Sopenharmony_ci return sleep; 162062306a36Sopenharmony_ci} 162162306a36Sopenharmony_ci 162262306a36Sopenharmony_ci/* 162362306a36Sopenharmony_ci * Compare two layout segments for sorting into layout cache. 162462306a36Sopenharmony_ci * We want to preferentially return RW over RO layouts, so ensure those 162562306a36Sopenharmony_ci * are seen first. 162662306a36Sopenharmony_ci */ 162762306a36Sopenharmony_cistatic s64 162862306a36Sopenharmony_cipnfs_lseg_range_cmp(const struct pnfs_layout_range *l1, 162962306a36Sopenharmony_ci const struct pnfs_layout_range *l2) 163062306a36Sopenharmony_ci{ 163162306a36Sopenharmony_ci s64 d; 163262306a36Sopenharmony_ci 163362306a36Sopenharmony_ci /* high offset > low offset */ 163462306a36Sopenharmony_ci d = l1->offset - l2->offset; 163562306a36Sopenharmony_ci if (d) 163662306a36Sopenharmony_ci return d; 163762306a36Sopenharmony_ci 163862306a36Sopenharmony_ci /* short length > long length */ 163962306a36Sopenharmony_ci d = l2->length - l1->length; 164062306a36Sopenharmony_ci if (d) 164162306a36Sopenharmony_ci return d; 164262306a36Sopenharmony_ci 164362306a36Sopenharmony_ci /* read > read/write */ 164462306a36Sopenharmony_ci return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); 164562306a36Sopenharmony_ci} 164662306a36Sopenharmony_ci 164762306a36Sopenharmony_cistatic bool 164862306a36Sopenharmony_cipnfs_lseg_range_is_after(const struct pnfs_layout_range *l1, 164962306a36Sopenharmony_ci const struct pnfs_layout_range *l2) 165062306a36Sopenharmony_ci{ 165162306a36Sopenharmony_ci return pnfs_lseg_range_cmp(l1, l2) > 0; 165262306a36Sopenharmony_ci} 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_cistatic bool 165562306a36Sopenharmony_cipnfs_lseg_no_merge(struct pnfs_layout_segment *lseg, 165662306a36Sopenharmony_ci struct pnfs_layout_segment *old) 165762306a36Sopenharmony_ci{ 165862306a36Sopenharmony_ci return false; 165962306a36Sopenharmony_ci} 166062306a36Sopenharmony_ci 166162306a36Sopenharmony_civoid 166262306a36Sopenharmony_cipnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo, 166362306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, 166462306a36Sopenharmony_ci bool (*is_after)(const struct pnfs_layout_range *, 166562306a36Sopenharmony_ci const struct pnfs_layout_range *), 166662306a36Sopenharmony_ci bool (*do_merge)(struct pnfs_layout_segment *, 166762306a36Sopenharmony_ci struct pnfs_layout_segment *), 166862306a36Sopenharmony_ci struct list_head *free_me) 166962306a36Sopenharmony_ci{ 167062306a36Sopenharmony_ci struct pnfs_layout_segment *lp, *tmp; 167162306a36Sopenharmony_ci 167262306a36Sopenharmony_ci dprintk("%s:Begin\n", __func__); 167362306a36Sopenharmony_ci 167462306a36Sopenharmony_ci list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) { 167562306a36Sopenharmony_ci if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0) 167662306a36Sopenharmony_ci continue; 167762306a36Sopenharmony_ci if (do_merge(lseg, lp)) { 167862306a36Sopenharmony_ci mark_lseg_invalid(lp, free_me); 167962306a36Sopenharmony_ci continue; 168062306a36Sopenharmony_ci } 168162306a36Sopenharmony_ci if (is_after(&lseg->pls_range, &lp->pls_range)) 168262306a36Sopenharmony_ci continue; 168362306a36Sopenharmony_ci list_add_tail(&lseg->pls_list, &lp->pls_list); 168462306a36Sopenharmony_ci dprintk("%s: inserted lseg %p " 168562306a36Sopenharmony_ci "iomode %d offset %llu length %llu before " 168662306a36Sopenharmony_ci "lp %p iomode %d offset %llu length %llu\n", 168762306a36Sopenharmony_ci __func__, lseg, lseg->pls_range.iomode, 168862306a36Sopenharmony_ci lseg->pls_range.offset, lseg->pls_range.length, 168962306a36Sopenharmony_ci lp, lp->pls_range.iomode, lp->pls_range.offset, 169062306a36Sopenharmony_ci lp->pls_range.length); 169162306a36Sopenharmony_ci goto out; 169262306a36Sopenharmony_ci } 169362306a36Sopenharmony_ci list_add_tail(&lseg->pls_list, &lo->plh_segs); 169462306a36Sopenharmony_ci dprintk("%s: inserted lseg %p " 169562306a36Sopenharmony_ci "iomode %d offset %llu length %llu at tail\n", 169662306a36Sopenharmony_ci __func__, lseg, lseg->pls_range.iomode, 169762306a36Sopenharmony_ci lseg->pls_range.offset, lseg->pls_range.length); 169862306a36Sopenharmony_ciout: 169962306a36Sopenharmony_ci pnfs_get_layout_hdr(lo); 170062306a36Sopenharmony_ci 170162306a36Sopenharmony_ci dprintk("%s:Return\n", __func__); 170262306a36Sopenharmony_ci} 170362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg); 170462306a36Sopenharmony_ci 170562306a36Sopenharmony_cistatic void 170662306a36Sopenharmony_cipnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, 170762306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, 170862306a36Sopenharmony_ci struct list_head *free_me) 170962306a36Sopenharmony_ci{ 171062306a36Sopenharmony_ci struct inode *inode = lo->plh_inode; 171162306a36Sopenharmony_ci struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 171262306a36Sopenharmony_ci 171362306a36Sopenharmony_ci if (ld->add_lseg != NULL) 171462306a36Sopenharmony_ci ld->add_lseg(lo, lseg, free_me); 171562306a36Sopenharmony_ci else 171662306a36Sopenharmony_ci pnfs_generic_layout_insert_lseg(lo, lseg, 171762306a36Sopenharmony_ci pnfs_lseg_range_is_after, 171862306a36Sopenharmony_ci pnfs_lseg_no_merge, 171962306a36Sopenharmony_ci free_me); 172062306a36Sopenharmony_ci} 172162306a36Sopenharmony_ci 172262306a36Sopenharmony_cistatic struct pnfs_layout_hdr * 172362306a36Sopenharmony_cialloc_init_layout_hdr(struct inode *ino, 172462306a36Sopenharmony_ci struct nfs_open_context *ctx, 172562306a36Sopenharmony_ci gfp_t gfp_flags) 172662306a36Sopenharmony_ci{ 172762306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 172862306a36Sopenharmony_ci 172962306a36Sopenharmony_ci lo = pnfs_alloc_layout_hdr(ino, gfp_flags); 173062306a36Sopenharmony_ci if (!lo) 173162306a36Sopenharmony_ci return NULL; 173262306a36Sopenharmony_ci refcount_set(&lo->plh_refcount, 1); 173362306a36Sopenharmony_ci INIT_LIST_HEAD(&lo->plh_layouts); 173462306a36Sopenharmony_ci INIT_LIST_HEAD(&lo->plh_segs); 173562306a36Sopenharmony_ci INIT_LIST_HEAD(&lo->plh_return_segs); 173662306a36Sopenharmony_ci INIT_LIST_HEAD(&lo->plh_bulk_destroy); 173762306a36Sopenharmony_ci lo->plh_inode = ino; 173862306a36Sopenharmony_ci lo->plh_lc_cred = get_cred(ctx->cred); 173962306a36Sopenharmony_ci lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID; 174062306a36Sopenharmony_ci return lo; 174162306a36Sopenharmony_ci} 174262306a36Sopenharmony_ci 174362306a36Sopenharmony_cistatic struct pnfs_layout_hdr * 174462306a36Sopenharmony_cipnfs_find_alloc_layout(struct inode *ino, 174562306a36Sopenharmony_ci struct nfs_open_context *ctx, 174662306a36Sopenharmony_ci gfp_t gfp_flags) 174762306a36Sopenharmony_ci __releases(&ino->i_lock) 174862306a36Sopenharmony_ci __acquires(&ino->i_lock) 174962306a36Sopenharmony_ci{ 175062306a36Sopenharmony_ci struct nfs_inode *nfsi = NFS_I(ino); 175162306a36Sopenharmony_ci struct pnfs_layout_hdr *new = NULL; 175262306a36Sopenharmony_ci 175362306a36Sopenharmony_ci dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 175462306a36Sopenharmony_ci 175562306a36Sopenharmony_ci if (nfsi->layout != NULL) 175662306a36Sopenharmony_ci goto out_existing; 175762306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 175862306a36Sopenharmony_ci new = alloc_init_layout_hdr(ino, ctx, gfp_flags); 175962306a36Sopenharmony_ci spin_lock(&ino->i_lock); 176062306a36Sopenharmony_ci 176162306a36Sopenharmony_ci if (likely(nfsi->layout == NULL)) { /* Won the race? */ 176262306a36Sopenharmony_ci nfsi->layout = new; 176362306a36Sopenharmony_ci return new; 176462306a36Sopenharmony_ci } else if (new != NULL) 176562306a36Sopenharmony_ci pnfs_free_layout_hdr(new); 176662306a36Sopenharmony_ciout_existing: 176762306a36Sopenharmony_ci pnfs_get_layout_hdr(nfsi->layout); 176862306a36Sopenharmony_ci return nfsi->layout; 176962306a36Sopenharmony_ci} 177062306a36Sopenharmony_ci 177162306a36Sopenharmony_ci/* 177262306a36Sopenharmony_ci * iomode matching rules: 177362306a36Sopenharmony_ci * iomode lseg strict match 177462306a36Sopenharmony_ci * iomode 177562306a36Sopenharmony_ci * ----- ----- ------ ----- 177662306a36Sopenharmony_ci * ANY READ N/A true 177762306a36Sopenharmony_ci * ANY RW N/A true 177862306a36Sopenharmony_ci * RW READ N/A false 177962306a36Sopenharmony_ci * RW RW N/A true 178062306a36Sopenharmony_ci * READ READ N/A true 178162306a36Sopenharmony_ci * READ RW true false 178262306a36Sopenharmony_ci * READ RW false true 178362306a36Sopenharmony_ci */ 178462306a36Sopenharmony_cistatic bool 178562306a36Sopenharmony_cipnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, 178662306a36Sopenharmony_ci const struct pnfs_layout_range *range, 178762306a36Sopenharmony_ci bool strict_iomode) 178862306a36Sopenharmony_ci{ 178962306a36Sopenharmony_ci struct pnfs_layout_range range1; 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_ci if ((range->iomode == IOMODE_RW && 179262306a36Sopenharmony_ci ls_range->iomode != IOMODE_RW) || 179362306a36Sopenharmony_ci (range->iomode != ls_range->iomode && 179462306a36Sopenharmony_ci strict_iomode) || 179562306a36Sopenharmony_ci !pnfs_lseg_range_intersecting(ls_range, range)) 179662306a36Sopenharmony_ci return false; 179762306a36Sopenharmony_ci 179862306a36Sopenharmony_ci /* range1 covers only the first byte in the range */ 179962306a36Sopenharmony_ci range1 = *range; 180062306a36Sopenharmony_ci range1.length = 1; 180162306a36Sopenharmony_ci return pnfs_lseg_range_contained(ls_range, &range1); 180262306a36Sopenharmony_ci} 180362306a36Sopenharmony_ci 180462306a36Sopenharmony_ci/* 180562306a36Sopenharmony_ci * lookup range in layout 180662306a36Sopenharmony_ci */ 180762306a36Sopenharmony_cistatic struct pnfs_layout_segment * 180862306a36Sopenharmony_cipnfs_find_lseg(struct pnfs_layout_hdr *lo, 180962306a36Sopenharmony_ci struct pnfs_layout_range *range, 181062306a36Sopenharmony_ci bool strict_iomode) 181162306a36Sopenharmony_ci{ 181262306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, *ret = NULL; 181362306a36Sopenharmony_ci 181462306a36Sopenharmony_ci dprintk("%s:Begin\n", __func__); 181562306a36Sopenharmony_ci 181662306a36Sopenharmony_ci list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 181762306a36Sopenharmony_ci if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 181862306a36Sopenharmony_ci pnfs_lseg_range_match(&lseg->pls_range, range, 181962306a36Sopenharmony_ci strict_iomode)) { 182062306a36Sopenharmony_ci ret = pnfs_get_lseg(lseg); 182162306a36Sopenharmony_ci break; 182262306a36Sopenharmony_ci } 182362306a36Sopenharmony_ci } 182462306a36Sopenharmony_ci 182562306a36Sopenharmony_ci dprintk("%s:Return lseg %p ref %d\n", 182662306a36Sopenharmony_ci __func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0); 182762306a36Sopenharmony_ci return ret; 182862306a36Sopenharmony_ci} 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_ci/* 183162306a36Sopenharmony_ci * Use mdsthreshold hints set at each OPEN to determine if I/O should go 183262306a36Sopenharmony_ci * to the MDS or over pNFS 183362306a36Sopenharmony_ci * 183462306a36Sopenharmony_ci * The nfs_inode read_io and write_io fields are cumulative counters reset 183562306a36Sopenharmony_ci * when there are no layout segments. Note that in pnfs_update_layout iomode 183662306a36Sopenharmony_ci * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a 183762306a36Sopenharmony_ci * WRITE request. 183862306a36Sopenharmony_ci * 183962306a36Sopenharmony_ci * A return of true means use MDS I/O. 184062306a36Sopenharmony_ci * 184162306a36Sopenharmony_ci * From rfc 5661: 184262306a36Sopenharmony_ci * If a file's size is smaller than the file size threshold, data accesses 184362306a36Sopenharmony_ci * SHOULD be sent to the metadata server. If an I/O request has a length that 184462306a36Sopenharmony_ci * is below the I/O size threshold, the I/O SHOULD be sent to the metadata 184562306a36Sopenharmony_ci * server. If both file size and I/O size are provided, the client SHOULD 184662306a36Sopenharmony_ci * reach or exceed both thresholds before sending its read or write 184762306a36Sopenharmony_ci * requests to the data server. 184862306a36Sopenharmony_ci */ 184962306a36Sopenharmony_cistatic bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, 185062306a36Sopenharmony_ci struct inode *ino, int iomode) 185162306a36Sopenharmony_ci{ 185262306a36Sopenharmony_ci struct nfs4_threshold *t = ctx->mdsthreshold; 185362306a36Sopenharmony_ci struct nfs_inode *nfsi = NFS_I(ino); 185462306a36Sopenharmony_ci loff_t fsize = i_size_read(ino); 185562306a36Sopenharmony_ci bool size = false, size_set = false, io = false, io_set = false, ret = false; 185662306a36Sopenharmony_ci 185762306a36Sopenharmony_ci if (t == NULL) 185862306a36Sopenharmony_ci return ret; 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_ci dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", 186162306a36Sopenharmony_ci __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); 186262306a36Sopenharmony_ci 186362306a36Sopenharmony_ci switch (iomode) { 186462306a36Sopenharmony_ci case IOMODE_READ: 186562306a36Sopenharmony_ci if (t->bm & THRESHOLD_RD) { 186662306a36Sopenharmony_ci dprintk("%s fsize %llu\n", __func__, fsize); 186762306a36Sopenharmony_ci size_set = true; 186862306a36Sopenharmony_ci if (fsize < t->rd_sz) 186962306a36Sopenharmony_ci size = true; 187062306a36Sopenharmony_ci } 187162306a36Sopenharmony_ci if (t->bm & THRESHOLD_RD_IO) { 187262306a36Sopenharmony_ci dprintk("%s nfsi->read_io %llu\n", __func__, 187362306a36Sopenharmony_ci nfsi->read_io); 187462306a36Sopenharmony_ci io_set = true; 187562306a36Sopenharmony_ci if (nfsi->read_io < t->rd_io_sz) 187662306a36Sopenharmony_ci io = true; 187762306a36Sopenharmony_ci } 187862306a36Sopenharmony_ci break; 187962306a36Sopenharmony_ci case IOMODE_RW: 188062306a36Sopenharmony_ci if (t->bm & THRESHOLD_WR) { 188162306a36Sopenharmony_ci dprintk("%s fsize %llu\n", __func__, fsize); 188262306a36Sopenharmony_ci size_set = true; 188362306a36Sopenharmony_ci if (fsize < t->wr_sz) 188462306a36Sopenharmony_ci size = true; 188562306a36Sopenharmony_ci } 188662306a36Sopenharmony_ci if (t->bm & THRESHOLD_WR_IO) { 188762306a36Sopenharmony_ci dprintk("%s nfsi->write_io %llu\n", __func__, 188862306a36Sopenharmony_ci nfsi->write_io); 188962306a36Sopenharmony_ci io_set = true; 189062306a36Sopenharmony_ci if (nfsi->write_io < t->wr_io_sz) 189162306a36Sopenharmony_ci io = true; 189262306a36Sopenharmony_ci } 189362306a36Sopenharmony_ci break; 189462306a36Sopenharmony_ci } 189562306a36Sopenharmony_ci if (size_set && io_set) { 189662306a36Sopenharmony_ci if (size && io) 189762306a36Sopenharmony_ci ret = true; 189862306a36Sopenharmony_ci } else if (size || io) 189962306a36Sopenharmony_ci ret = true; 190062306a36Sopenharmony_ci 190162306a36Sopenharmony_ci dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); 190262306a36Sopenharmony_ci return ret; 190362306a36Sopenharmony_ci} 190462306a36Sopenharmony_ci 190562306a36Sopenharmony_cistatic int pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) 190662306a36Sopenharmony_ci{ 190762306a36Sopenharmony_ci /* 190862306a36Sopenharmony_ci * send layoutcommit as it can hold up layoutreturn due to lseg 190962306a36Sopenharmony_ci * reference 191062306a36Sopenharmony_ci */ 191162306a36Sopenharmony_ci pnfs_layoutcommit_inode(lo->plh_inode, false); 191262306a36Sopenharmony_ci return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, 191362306a36Sopenharmony_ci nfs_wait_bit_killable, 191462306a36Sopenharmony_ci TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); 191562306a36Sopenharmony_ci} 191662306a36Sopenharmony_ci 191762306a36Sopenharmony_cistatic void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) 191862306a36Sopenharmony_ci{ 191962306a36Sopenharmony_ci atomic_inc(&lo->plh_outstanding); 192062306a36Sopenharmony_ci} 192162306a36Sopenharmony_ci 192262306a36Sopenharmony_cistatic void nfs_layoutget_end(struct pnfs_layout_hdr *lo) 192362306a36Sopenharmony_ci{ 192462306a36Sopenharmony_ci if (atomic_dec_and_test(&lo->plh_outstanding) && 192562306a36Sopenharmony_ci test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) 192662306a36Sopenharmony_ci wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); 192762306a36Sopenharmony_ci} 192862306a36Sopenharmony_ci 192962306a36Sopenharmony_cistatic bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) 193062306a36Sopenharmony_ci{ 193162306a36Sopenharmony_ci return test_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags); 193262306a36Sopenharmony_ci} 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_cistatic void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) 193562306a36Sopenharmony_ci{ 193662306a36Sopenharmony_ci unsigned long *bitlock = &lo->plh_flags; 193762306a36Sopenharmony_ci 193862306a36Sopenharmony_ci clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); 193962306a36Sopenharmony_ci smp_mb__after_atomic(); 194062306a36Sopenharmony_ci wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); 194162306a36Sopenharmony_ci} 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_cistatic void _add_to_server_list(struct pnfs_layout_hdr *lo, 194462306a36Sopenharmony_ci struct nfs_server *server) 194562306a36Sopenharmony_ci{ 194662306a36Sopenharmony_ci if (!test_and_set_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) { 194762306a36Sopenharmony_ci struct nfs_client *clp = server->nfs_client; 194862306a36Sopenharmony_ci 194962306a36Sopenharmony_ci /* The lo must be on the clp list if there is any 195062306a36Sopenharmony_ci * chance of a CB_LAYOUTRECALL(FILE) coming in. 195162306a36Sopenharmony_ci */ 195262306a36Sopenharmony_ci spin_lock(&clp->cl_lock); 195362306a36Sopenharmony_ci list_add_tail_rcu(&lo->plh_layouts, &server->layouts); 195462306a36Sopenharmony_ci spin_unlock(&clp->cl_lock); 195562306a36Sopenharmony_ci } 195662306a36Sopenharmony_ci} 195762306a36Sopenharmony_ci 195862306a36Sopenharmony_ci/* 195962306a36Sopenharmony_ci * Layout segment is retreived from the server if not cached. 196062306a36Sopenharmony_ci * The appropriate layout segment is referenced and returned to the caller. 196162306a36Sopenharmony_ci */ 196262306a36Sopenharmony_cistruct pnfs_layout_segment * 196362306a36Sopenharmony_cipnfs_update_layout(struct inode *ino, 196462306a36Sopenharmony_ci struct nfs_open_context *ctx, 196562306a36Sopenharmony_ci loff_t pos, 196662306a36Sopenharmony_ci u64 count, 196762306a36Sopenharmony_ci enum pnfs_iomode iomode, 196862306a36Sopenharmony_ci bool strict_iomode, 196962306a36Sopenharmony_ci gfp_t gfp_flags) 197062306a36Sopenharmony_ci{ 197162306a36Sopenharmony_ci struct pnfs_layout_range arg = { 197262306a36Sopenharmony_ci .iomode = iomode, 197362306a36Sopenharmony_ci .offset = pos, 197462306a36Sopenharmony_ci .length = count, 197562306a36Sopenharmony_ci }; 197662306a36Sopenharmony_ci unsigned pg_offset; 197762306a36Sopenharmony_ci struct nfs_server *server = NFS_SERVER(ino); 197862306a36Sopenharmony_ci struct nfs_client *clp = server->nfs_client; 197962306a36Sopenharmony_ci struct pnfs_layout_hdr *lo = NULL; 198062306a36Sopenharmony_ci struct pnfs_layout_segment *lseg = NULL; 198162306a36Sopenharmony_ci struct nfs4_layoutget *lgp; 198262306a36Sopenharmony_ci nfs4_stateid stateid; 198362306a36Sopenharmony_ci long timeout = 0; 198462306a36Sopenharmony_ci unsigned long giveup = jiffies + (clp->cl_lease_time << 1); 198562306a36Sopenharmony_ci bool first; 198662306a36Sopenharmony_ci 198762306a36Sopenharmony_ci if (!pnfs_enabled_sb(NFS_SERVER(ino))) { 198862306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 198962306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_NO_PNFS); 199062306a36Sopenharmony_ci goto out; 199162306a36Sopenharmony_ci } 199262306a36Sopenharmony_ci 199362306a36Sopenharmony_ci if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { 199462306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 199562306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_MDSTHRESH); 199662306a36Sopenharmony_ci goto out; 199762306a36Sopenharmony_ci } 199862306a36Sopenharmony_ci 199962306a36Sopenharmony_cilookup_again: 200062306a36Sopenharmony_ci lseg = ERR_PTR(nfs4_client_recover_expired_lease(clp)); 200162306a36Sopenharmony_ci if (IS_ERR(lseg)) 200262306a36Sopenharmony_ci goto out; 200362306a36Sopenharmony_ci first = false; 200462306a36Sopenharmony_ci spin_lock(&ino->i_lock); 200562306a36Sopenharmony_ci lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 200662306a36Sopenharmony_ci if (lo == NULL) { 200762306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 200862306a36Sopenharmony_ci lseg = ERR_PTR(-ENOMEM); 200962306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 201062306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_NOMEM); 201162306a36Sopenharmony_ci goto out; 201262306a36Sopenharmony_ci } 201362306a36Sopenharmony_ci 201462306a36Sopenharmony_ci /* Do we even need to bother with this? */ 201562306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 201662306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 201762306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_BULK_RECALL); 201862306a36Sopenharmony_ci dprintk("%s matches recall, use MDS\n", __func__); 201962306a36Sopenharmony_ci goto out_unlock; 202062306a36Sopenharmony_ci } 202162306a36Sopenharmony_ci 202262306a36Sopenharmony_ci /* if LAYOUTGET already failed once we don't try again */ 202362306a36Sopenharmony_ci if (pnfs_layout_io_test_failed(lo, iomode)) { 202462306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 202562306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); 202662306a36Sopenharmony_ci goto out_unlock; 202762306a36Sopenharmony_ci } 202862306a36Sopenharmony_ci 202962306a36Sopenharmony_ci /* 203062306a36Sopenharmony_ci * If the layout segment list is empty, but there are outstanding 203162306a36Sopenharmony_ci * layoutget calls, then they might be subject to a layoutrecall. 203262306a36Sopenharmony_ci */ 203362306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && 203462306a36Sopenharmony_ci atomic_read(&lo->plh_outstanding) != 0) { 203562306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 203662306a36Sopenharmony_ci lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN, 203762306a36Sopenharmony_ci TASK_KILLABLE)); 203862306a36Sopenharmony_ci if (IS_ERR(lseg)) 203962306a36Sopenharmony_ci goto out_put_layout_hdr; 204062306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 204162306a36Sopenharmony_ci goto lookup_again; 204262306a36Sopenharmony_ci } 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci /* 204562306a36Sopenharmony_ci * Because we free lsegs when sending LAYOUTRETURN, we need to wait 204662306a36Sopenharmony_ci * for LAYOUTRETURN. 204762306a36Sopenharmony_ci */ 204862306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { 204962306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 205062306a36Sopenharmony_ci dprintk("%s wait for layoutreturn\n", __func__); 205162306a36Sopenharmony_ci lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); 205262306a36Sopenharmony_ci if (!IS_ERR(lseg)) { 205362306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 205462306a36Sopenharmony_ci dprintk("%s retrying\n", __func__); 205562306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, 205662306a36Sopenharmony_ci lseg, 205762306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_RETRY); 205862306a36Sopenharmony_ci goto lookup_again; 205962306a36Sopenharmony_ci } 206062306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 206162306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_RETURN); 206262306a36Sopenharmony_ci goto out_put_layout_hdr; 206362306a36Sopenharmony_ci } 206462306a36Sopenharmony_ci 206562306a36Sopenharmony_ci lseg = pnfs_find_lseg(lo, &arg, strict_iomode); 206662306a36Sopenharmony_ci if (lseg) { 206762306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 206862306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_FOUND_CACHED); 206962306a36Sopenharmony_ci goto out_unlock; 207062306a36Sopenharmony_ci } 207162306a36Sopenharmony_ci 207262306a36Sopenharmony_ci /* 207362306a36Sopenharmony_ci * Choose a stateid for the LAYOUTGET. If we don't have a layout 207462306a36Sopenharmony_ci * stateid, or it has been invalidated, then we must use the open 207562306a36Sopenharmony_ci * stateid. 207662306a36Sopenharmony_ci */ 207762306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { 207862306a36Sopenharmony_ci int status; 207962306a36Sopenharmony_ci 208062306a36Sopenharmony_ci /* 208162306a36Sopenharmony_ci * The first layoutget for the file. Need to serialize per 208262306a36Sopenharmony_ci * RFC 5661 Errata 3208. 208362306a36Sopenharmony_ci */ 208462306a36Sopenharmony_ci if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, 208562306a36Sopenharmony_ci &lo->plh_flags)) { 208662306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 208762306a36Sopenharmony_ci lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, 208862306a36Sopenharmony_ci NFS_LAYOUT_FIRST_LAYOUTGET, 208962306a36Sopenharmony_ci TASK_KILLABLE)); 209062306a36Sopenharmony_ci if (IS_ERR(lseg)) 209162306a36Sopenharmony_ci goto out_put_layout_hdr; 209262306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 209362306a36Sopenharmony_ci dprintk("%s retrying\n", __func__); 209462306a36Sopenharmony_ci goto lookup_again; 209562306a36Sopenharmony_ci } 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 209862306a36Sopenharmony_ci first = true; 209962306a36Sopenharmony_ci status = nfs4_select_rw_stateid(ctx->state, 210062306a36Sopenharmony_ci iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ, 210162306a36Sopenharmony_ci NULL, &stateid, NULL); 210262306a36Sopenharmony_ci if (status != 0) { 210362306a36Sopenharmony_ci lseg = ERR_PTR(status); 210462306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, 210562306a36Sopenharmony_ci iomode, lo, lseg, 210662306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_INVALID_OPEN); 210762306a36Sopenharmony_ci nfs4_schedule_stateid_recovery(server, ctx->state); 210862306a36Sopenharmony_ci pnfs_clear_first_layoutget(lo); 210962306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 211062306a36Sopenharmony_ci goto lookup_again; 211162306a36Sopenharmony_ci } 211262306a36Sopenharmony_ci spin_lock(&ino->i_lock); 211362306a36Sopenharmony_ci } else { 211462306a36Sopenharmony_ci nfs4_stateid_copy(&stateid, &lo->plh_stateid); 211562306a36Sopenharmony_ci } 211662306a36Sopenharmony_ci 211762306a36Sopenharmony_ci if (pnfs_layoutgets_blocked(lo)) { 211862306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 211962306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_BLOCKED); 212062306a36Sopenharmony_ci goto out_unlock; 212162306a36Sopenharmony_ci } 212262306a36Sopenharmony_ci nfs_layoutget_begin(lo); 212362306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 212462306a36Sopenharmony_ci 212562306a36Sopenharmony_ci _add_to_server_list(lo, server); 212662306a36Sopenharmony_ci 212762306a36Sopenharmony_ci pg_offset = arg.offset & ~PAGE_MASK; 212862306a36Sopenharmony_ci if (pg_offset) { 212962306a36Sopenharmony_ci arg.offset -= pg_offset; 213062306a36Sopenharmony_ci arg.length += pg_offset; 213162306a36Sopenharmony_ci } 213262306a36Sopenharmony_ci if (arg.length != NFS4_MAX_UINT64) 213362306a36Sopenharmony_ci arg.length = PAGE_ALIGN(arg.length); 213462306a36Sopenharmony_ci 213562306a36Sopenharmony_ci lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); 213662306a36Sopenharmony_ci if (!lgp) { 213762306a36Sopenharmony_ci lseg = ERR_PTR(-ENOMEM); 213862306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, 213962306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_NOMEM); 214062306a36Sopenharmony_ci nfs_layoutget_end(lo); 214162306a36Sopenharmony_ci goto out_put_layout_hdr; 214262306a36Sopenharmony_ci } 214362306a36Sopenharmony_ci 214462306a36Sopenharmony_ci lgp->lo = lo; 214562306a36Sopenharmony_ci pnfs_get_layout_hdr(lo); 214662306a36Sopenharmony_ci 214762306a36Sopenharmony_ci lseg = nfs4_proc_layoutget(lgp, &timeout); 214862306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 214962306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); 215062306a36Sopenharmony_ci nfs_layoutget_end(lo); 215162306a36Sopenharmony_ci if (IS_ERR(lseg)) { 215262306a36Sopenharmony_ci switch(PTR_ERR(lseg)) { 215362306a36Sopenharmony_ci case -EBUSY: 215462306a36Sopenharmony_ci if (time_after(jiffies, giveup)) 215562306a36Sopenharmony_ci lseg = NULL; 215662306a36Sopenharmony_ci break; 215762306a36Sopenharmony_ci case -ERECALLCONFLICT: 215862306a36Sopenharmony_ci case -EAGAIN: 215962306a36Sopenharmony_ci break; 216062306a36Sopenharmony_ci case -ENODATA: 216162306a36Sopenharmony_ci /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */ 216262306a36Sopenharmony_ci pnfs_layout_set_fail_bit( 216362306a36Sopenharmony_ci lo, pnfs_iomode_to_fail_bit(iomode)); 216462306a36Sopenharmony_ci lseg = NULL; 216562306a36Sopenharmony_ci goto out_put_layout_hdr; 216662306a36Sopenharmony_ci default: 216762306a36Sopenharmony_ci if (!nfs_error_is_fatal(PTR_ERR(lseg))) { 216862306a36Sopenharmony_ci pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 216962306a36Sopenharmony_ci lseg = NULL; 217062306a36Sopenharmony_ci } 217162306a36Sopenharmony_ci goto out_put_layout_hdr; 217262306a36Sopenharmony_ci } 217362306a36Sopenharmony_ci if (lseg) { 217462306a36Sopenharmony_ci if (first) 217562306a36Sopenharmony_ci pnfs_clear_first_layoutget(lo); 217662306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, 217762306a36Sopenharmony_ci iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); 217862306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 217962306a36Sopenharmony_ci goto lookup_again; 218062306a36Sopenharmony_ci } 218162306a36Sopenharmony_ci } else { 218262306a36Sopenharmony_ci pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 218362306a36Sopenharmony_ci } 218462306a36Sopenharmony_ci 218562306a36Sopenharmony_ciout_put_layout_hdr: 218662306a36Sopenharmony_ci if (first) 218762306a36Sopenharmony_ci pnfs_clear_first_layoutget(lo); 218862306a36Sopenharmony_ci trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 218962306a36Sopenharmony_ci PNFS_UPDATE_LAYOUT_EXIT); 219062306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 219162306a36Sopenharmony_ciout: 219262306a36Sopenharmony_ci dprintk("%s: inode %s/%llu pNFS layout segment %s for " 219362306a36Sopenharmony_ci "(%s, offset: %llu, length: %llu)\n", 219462306a36Sopenharmony_ci __func__, ino->i_sb->s_id, 219562306a36Sopenharmony_ci (unsigned long long)NFS_FILEID(ino), 219662306a36Sopenharmony_ci IS_ERR_OR_NULL(lseg) ? "not found" : "found", 219762306a36Sopenharmony_ci iomode==IOMODE_RW ? "read/write" : "read-only", 219862306a36Sopenharmony_ci (unsigned long long)pos, 219962306a36Sopenharmony_ci (unsigned long long)count); 220062306a36Sopenharmony_ci return lseg; 220162306a36Sopenharmony_ciout_unlock: 220262306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 220362306a36Sopenharmony_ci goto out_put_layout_hdr; 220462306a36Sopenharmony_ci} 220562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_update_layout); 220662306a36Sopenharmony_ci 220762306a36Sopenharmony_cistatic bool 220862306a36Sopenharmony_cipnfs_sanity_check_layout_range(struct pnfs_layout_range *range) 220962306a36Sopenharmony_ci{ 221062306a36Sopenharmony_ci switch (range->iomode) { 221162306a36Sopenharmony_ci case IOMODE_READ: 221262306a36Sopenharmony_ci case IOMODE_RW: 221362306a36Sopenharmony_ci break; 221462306a36Sopenharmony_ci default: 221562306a36Sopenharmony_ci return false; 221662306a36Sopenharmony_ci } 221762306a36Sopenharmony_ci if (range->offset == NFS4_MAX_UINT64) 221862306a36Sopenharmony_ci return false; 221962306a36Sopenharmony_ci if (range->length == 0) 222062306a36Sopenharmony_ci return false; 222162306a36Sopenharmony_ci if (range->length != NFS4_MAX_UINT64 && 222262306a36Sopenharmony_ci range->length > NFS4_MAX_UINT64 - range->offset) 222362306a36Sopenharmony_ci return false; 222462306a36Sopenharmony_ci return true; 222562306a36Sopenharmony_ci} 222662306a36Sopenharmony_ci 222762306a36Sopenharmony_cistatic struct pnfs_layout_hdr * 222862306a36Sopenharmony_ci_pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx) 222962306a36Sopenharmony_ci{ 223062306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 223162306a36Sopenharmony_ci 223262306a36Sopenharmony_ci spin_lock(&ino->i_lock); 223362306a36Sopenharmony_ci lo = pnfs_find_alloc_layout(ino, ctx, nfs_io_gfp_mask()); 223462306a36Sopenharmony_ci if (!lo) 223562306a36Sopenharmony_ci goto out_unlock; 223662306a36Sopenharmony_ci if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) 223762306a36Sopenharmony_ci goto out_unlock; 223862306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 223962306a36Sopenharmony_ci goto out_unlock; 224062306a36Sopenharmony_ci if (pnfs_layoutgets_blocked(lo)) 224162306a36Sopenharmony_ci goto out_unlock; 224262306a36Sopenharmony_ci if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags)) 224362306a36Sopenharmony_ci goto out_unlock; 224462306a36Sopenharmony_ci nfs_layoutget_begin(lo); 224562306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 224662306a36Sopenharmony_ci _add_to_server_list(lo, NFS_SERVER(ino)); 224762306a36Sopenharmony_ci return lo; 224862306a36Sopenharmony_ci 224962306a36Sopenharmony_ciout_unlock: 225062306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 225162306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 225262306a36Sopenharmony_ci return NULL; 225362306a36Sopenharmony_ci} 225462306a36Sopenharmony_ci 225562306a36Sopenharmony_cistatic void _lgopen_prepare_attached(struct nfs4_opendata *data, 225662306a36Sopenharmony_ci struct nfs_open_context *ctx) 225762306a36Sopenharmony_ci{ 225862306a36Sopenharmony_ci struct inode *ino = data->dentry->d_inode; 225962306a36Sopenharmony_ci struct pnfs_layout_range rng = { 226062306a36Sopenharmony_ci .iomode = (data->o_arg.fmode & FMODE_WRITE) ? 226162306a36Sopenharmony_ci IOMODE_RW: IOMODE_READ, 226262306a36Sopenharmony_ci .offset = 0, 226362306a36Sopenharmony_ci .length = NFS4_MAX_UINT64, 226462306a36Sopenharmony_ci }; 226562306a36Sopenharmony_ci struct nfs4_layoutget *lgp; 226662306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 226762306a36Sopenharmony_ci 226862306a36Sopenharmony_ci /* Heuristic: don't send layoutget if we have cached data */ 226962306a36Sopenharmony_ci if (rng.iomode == IOMODE_READ && 227062306a36Sopenharmony_ci (i_size_read(ino) == 0 || ino->i_mapping->nrpages != 0)) 227162306a36Sopenharmony_ci return; 227262306a36Sopenharmony_ci 227362306a36Sopenharmony_ci lo = _pnfs_grab_empty_layout(ino, ctx); 227462306a36Sopenharmony_ci if (!lo) 227562306a36Sopenharmony_ci return; 227662306a36Sopenharmony_ci lgp = pnfs_alloc_init_layoutget_args(ino, ctx, ¤t_stateid, &rng, 227762306a36Sopenharmony_ci nfs_io_gfp_mask()); 227862306a36Sopenharmony_ci if (!lgp) { 227962306a36Sopenharmony_ci pnfs_clear_first_layoutget(lo); 228062306a36Sopenharmony_ci nfs_layoutget_end(lo); 228162306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 228262306a36Sopenharmony_ci return; 228362306a36Sopenharmony_ci } 228462306a36Sopenharmony_ci lgp->lo = lo; 228562306a36Sopenharmony_ci data->lgp = lgp; 228662306a36Sopenharmony_ci data->o_arg.lg_args = &lgp->args; 228762306a36Sopenharmony_ci data->o_res.lg_res = &lgp->res; 228862306a36Sopenharmony_ci} 228962306a36Sopenharmony_ci 229062306a36Sopenharmony_cistatic void _lgopen_prepare_floating(struct nfs4_opendata *data, 229162306a36Sopenharmony_ci struct nfs_open_context *ctx) 229262306a36Sopenharmony_ci{ 229362306a36Sopenharmony_ci struct inode *ino = data->dentry->d_inode; 229462306a36Sopenharmony_ci struct pnfs_layout_range rng = { 229562306a36Sopenharmony_ci .iomode = (data->o_arg.fmode & FMODE_WRITE) ? 229662306a36Sopenharmony_ci IOMODE_RW: IOMODE_READ, 229762306a36Sopenharmony_ci .offset = 0, 229862306a36Sopenharmony_ci .length = NFS4_MAX_UINT64, 229962306a36Sopenharmony_ci }; 230062306a36Sopenharmony_ci struct nfs4_layoutget *lgp; 230162306a36Sopenharmony_ci 230262306a36Sopenharmony_ci lgp = pnfs_alloc_init_layoutget_args(ino, ctx, ¤t_stateid, &rng, 230362306a36Sopenharmony_ci nfs_io_gfp_mask()); 230462306a36Sopenharmony_ci if (!lgp) 230562306a36Sopenharmony_ci return; 230662306a36Sopenharmony_ci data->lgp = lgp; 230762306a36Sopenharmony_ci data->o_arg.lg_args = &lgp->args; 230862306a36Sopenharmony_ci data->o_res.lg_res = &lgp->res; 230962306a36Sopenharmony_ci} 231062306a36Sopenharmony_ci 231162306a36Sopenharmony_civoid pnfs_lgopen_prepare(struct nfs4_opendata *data, 231262306a36Sopenharmony_ci struct nfs_open_context *ctx) 231362306a36Sopenharmony_ci{ 231462306a36Sopenharmony_ci struct nfs_server *server = NFS_SERVER(data->dir->d_inode); 231562306a36Sopenharmony_ci 231662306a36Sopenharmony_ci if (!(pnfs_enabled_sb(server) && 231762306a36Sopenharmony_ci server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN)) 231862306a36Sopenharmony_ci return; 231962306a36Sopenharmony_ci /* Could check on max_ops, but currently hardcoded high enough */ 232062306a36Sopenharmony_ci if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN)) 232162306a36Sopenharmony_ci return; 232262306a36Sopenharmony_ci if (data->lgp) 232362306a36Sopenharmony_ci return; 232462306a36Sopenharmony_ci if (data->state) 232562306a36Sopenharmony_ci _lgopen_prepare_attached(data, ctx); 232662306a36Sopenharmony_ci else 232762306a36Sopenharmony_ci _lgopen_prepare_floating(data, ctx); 232862306a36Sopenharmony_ci} 232962306a36Sopenharmony_ci 233062306a36Sopenharmony_civoid pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, 233162306a36Sopenharmony_ci struct nfs_open_context *ctx) 233262306a36Sopenharmony_ci{ 233362306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 233462306a36Sopenharmony_ci struct pnfs_layout_segment *lseg; 233562306a36Sopenharmony_ci struct nfs_server *srv = NFS_SERVER(ino); 233662306a36Sopenharmony_ci u32 iomode; 233762306a36Sopenharmony_ci 233862306a36Sopenharmony_ci if (!lgp) 233962306a36Sopenharmony_ci return; 234062306a36Sopenharmony_ci dprintk("%s: entered with status %i\n", __func__, lgp->res.status); 234162306a36Sopenharmony_ci if (lgp->res.status) { 234262306a36Sopenharmony_ci switch (lgp->res.status) { 234362306a36Sopenharmony_ci default: 234462306a36Sopenharmony_ci break; 234562306a36Sopenharmony_ci /* 234662306a36Sopenharmony_ci * Halt lgopen attempts if the server doesn't recognise 234762306a36Sopenharmony_ci * the "current stateid" value, the layout type, or the 234862306a36Sopenharmony_ci * layoutget operation as being valid. 234962306a36Sopenharmony_ci * Also if it complains about too many ops in the compound 235062306a36Sopenharmony_ci * or of the request/reply being too big. 235162306a36Sopenharmony_ci */ 235262306a36Sopenharmony_ci case -NFS4ERR_BAD_STATEID: 235362306a36Sopenharmony_ci case -NFS4ERR_NOTSUPP: 235462306a36Sopenharmony_ci case -NFS4ERR_REP_TOO_BIG: 235562306a36Sopenharmony_ci case -NFS4ERR_REP_TOO_BIG_TO_CACHE: 235662306a36Sopenharmony_ci case -NFS4ERR_REQ_TOO_BIG: 235762306a36Sopenharmony_ci case -NFS4ERR_TOO_MANY_OPS: 235862306a36Sopenharmony_ci case -NFS4ERR_UNKNOWN_LAYOUTTYPE: 235962306a36Sopenharmony_ci srv->caps &= ~NFS_CAP_LGOPEN; 236062306a36Sopenharmony_ci } 236162306a36Sopenharmony_ci return; 236262306a36Sopenharmony_ci } 236362306a36Sopenharmony_ci if (!lgp->lo) { 236462306a36Sopenharmony_ci lo = _pnfs_grab_empty_layout(ino, ctx); 236562306a36Sopenharmony_ci if (!lo) 236662306a36Sopenharmony_ci return; 236762306a36Sopenharmony_ci lgp->lo = lo; 236862306a36Sopenharmony_ci } else 236962306a36Sopenharmony_ci lo = lgp->lo; 237062306a36Sopenharmony_ci 237162306a36Sopenharmony_ci lseg = pnfs_layout_process(lgp); 237262306a36Sopenharmony_ci if (!IS_ERR(lseg)) { 237362306a36Sopenharmony_ci iomode = lgp->args.range.iomode; 237462306a36Sopenharmony_ci pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 237562306a36Sopenharmony_ci pnfs_put_lseg(lseg); 237662306a36Sopenharmony_ci } 237762306a36Sopenharmony_ci} 237862306a36Sopenharmony_ci 237962306a36Sopenharmony_civoid nfs4_lgopen_release(struct nfs4_layoutget *lgp) 238062306a36Sopenharmony_ci{ 238162306a36Sopenharmony_ci if (lgp != NULL) { 238262306a36Sopenharmony_ci if (lgp->lo) { 238362306a36Sopenharmony_ci pnfs_clear_first_layoutget(lgp->lo); 238462306a36Sopenharmony_ci nfs_layoutget_end(lgp->lo); 238562306a36Sopenharmony_ci } 238662306a36Sopenharmony_ci pnfs_layoutget_free(lgp); 238762306a36Sopenharmony_ci } 238862306a36Sopenharmony_ci} 238962306a36Sopenharmony_ci 239062306a36Sopenharmony_cistruct pnfs_layout_segment * 239162306a36Sopenharmony_cipnfs_layout_process(struct nfs4_layoutget *lgp) 239262306a36Sopenharmony_ci{ 239362306a36Sopenharmony_ci struct pnfs_layout_hdr *lo = lgp->lo; 239462306a36Sopenharmony_ci struct nfs4_layoutget_res *res = &lgp->res; 239562306a36Sopenharmony_ci struct pnfs_layout_segment *lseg; 239662306a36Sopenharmony_ci struct inode *ino = lo->plh_inode; 239762306a36Sopenharmony_ci LIST_HEAD(free_me); 239862306a36Sopenharmony_ci 239962306a36Sopenharmony_ci if (!pnfs_sanity_check_layout_range(&res->range)) 240062306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 240162306a36Sopenharmony_ci 240262306a36Sopenharmony_ci /* Inject layout blob into I/O device driver */ 240362306a36Sopenharmony_ci lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); 240462306a36Sopenharmony_ci if (IS_ERR_OR_NULL(lseg)) { 240562306a36Sopenharmony_ci if (!lseg) 240662306a36Sopenharmony_ci lseg = ERR_PTR(-ENOMEM); 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci dprintk("%s: Could not allocate layout: error %ld\n", 240962306a36Sopenharmony_ci __func__, PTR_ERR(lseg)); 241062306a36Sopenharmony_ci return lseg; 241162306a36Sopenharmony_ci } 241262306a36Sopenharmony_ci 241362306a36Sopenharmony_ci pnfs_init_lseg(lo, lseg, &res->range, &res->stateid); 241462306a36Sopenharmony_ci 241562306a36Sopenharmony_ci spin_lock(&ino->i_lock); 241662306a36Sopenharmony_ci if (pnfs_layoutgets_blocked(lo)) { 241762306a36Sopenharmony_ci dprintk("%s forget reply due to state\n", __func__); 241862306a36Sopenharmony_ci goto out_forget; 241962306a36Sopenharmony_ci } 242062306a36Sopenharmony_ci 242162306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && 242262306a36Sopenharmony_ci !pnfs_is_first_layoutget(lo)) 242362306a36Sopenharmony_ci goto out_forget; 242462306a36Sopenharmony_ci 242562306a36Sopenharmony_ci if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { 242662306a36Sopenharmony_ci /* existing state ID, make sure the sequence number matches. */ 242762306a36Sopenharmony_ci if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { 242862306a36Sopenharmony_ci if (!pnfs_layout_is_valid(lo)) 242962306a36Sopenharmony_ci lo->plh_barrier = 0; 243062306a36Sopenharmony_ci dprintk("%s forget reply due to sequence\n", __func__); 243162306a36Sopenharmony_ci goto out_forget; 243262306a36Sopenharmony_ci } 243362306a36Sopenharmony_ci pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, false); 243462306a36Sopenharmony_ci } else if (pnfs_layout_is_valid(lo)) { 243562306a36Sopenharmony_ci /* 243662306a36Sopenharmony_ci * We got an entirely new state ID. Mark all segments for the 243762306a36Sopenharmony_ci * inode invalid, and retry the layoutget 243862306a36Sopenharmony_ci */ 243962306a36Sopenharmony_ci struct pnfs_layout_range range = { 244062306a36Sopenharmony_ci .iomode = IOMODE_ANY, 244162306a36Sopenharmony_ci .length = NFS4_MAX_UINT64, 244262306a36Sopenharmony_ci }; 244362306a36Sopenharmony_ci pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0); 244462306a36Sopenharmony_ci goto out_forget; 244562306a36Sopenharmony_ci } else { 244662306a36Sopenharmony_ci /* We have a completely new layout */ 244762306a36Sopenharmony_ci pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true); 244862306a36Sopenharmony_ci } 244962306a36Sopenharmony_ci 245062306a36Sopenharmony_ci pnfs_get_lseg(lseg); 245162306a36Sopenharmony_ci pnfs_layout_insert_lseg(lo, lseg, &free_me); 245262306a36Sopenharmony_ci 245362306a36Sopenharmony_ci 245462306a36Sopenharmony_ci if (res->return_on_close) 245562306a36Sopenharmony_ci set_bit(NFS_LSEG_ROC, &lseg->pls_flags); 245662306a36Sopenharmony_ci 245762306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 245862306a36Sopenharmony_ci pnfs_free_lseg_list(&free_me); 245962306a36Sopenharmony_ci return lseg; 246062306a36Sopenharmony_ci 246162306a36Sopenharmony_ciout_forget: 246262306a36Sopenharmony_ci spin_unlock(&ino->i_lock); 246362306a36Sopenharmony_ci lseg->pls_layout = lo; 246462306a36Sopenharmony_ci NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 246562306a36Sopenharmony_ci return ERR_PTR(-EAGAIN); 246662306a36Sopenharmony_ci} 246762306a36Sopenharmony_ci 246862306a36Sopenharmony_ci/** 246962306a36Sopenharmony_ci * pnfs_mark_matching_lsegs_return - Free or return matching layout segments 247062306a36Sopenharmony_ci * @lo: pointer to layout header 247162306a36Sopenharmony_ci * @tmp_list: list header to be used with pnfs_free_lseg_list() 247262306a36Sopenharmony_ci * @return_range: describe layout segment ranges to be returned 247362306a36Sopenharmony_ci * @seq: stateid seqid to match 247462306a36Sopenharmony_ci * 247562306a36Sopenharmony_ci * This function is mainly intended for use by layoutrecall. It attempts 247662306a36Sopenharmony_ci * to free the layout segment immediately, or else to mark it for return 247762306a36Sopenharmony_ci * as soon as its reference count drops to zero. 247862306a36Sopenharmony_ci * 247962306a36Sopenharmony_ci * Returns 248062306a36Sopenharmony_ci * - 0: a layoutreturn needs to be scheduled. 248162306a36Sopenharmony_ci * - EBUSY: there are layout segment that are still in use. 248262306a36Sopenharmony_ci * - ENOENT: there are no layout segments that need to be returned. 248362306a36Sopenharmony_ci */ 248462306a36Sopenharmony_ciint 248562306a36Sopenharmony_cipnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 248662306a36Sopenharmony_ci struct list_head *tmp_list, 248762306a36Sopenharmony_ci const struct pnfs_layout_range *return_range, 248862306a36Sopenharmony_ci u32 seq) 248962306a36Sopenharmony_ci{ 249062306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, *next; 249162306a36Sopenharmony_ci struct nfs_server *server = NFS_SERVER(lo->plh_inode); 249262306a36Sopenharmony_ci int remaining = 0; 249362306a36Sopenharmony_ci 249462306a36Sopenharmony_ci dprintk("%s:Begin lo %p\n", __func__, lo); 249562306a36Sopenharmony_ci 249662306a36Sopenharmony_ci assert_spin_locked(&lo->plh_inode->i_lock); 249762306a36Sopenharmony_ci 249862306a36Sopenharmony_ci if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 249962306a36Sopenharmony_ci tmp_list = &lo->plh_return_segs; 250062306a36Sopenharmony_ci 250162306a36Sopenharmony_ci list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 250262306a36Sopenharmony_ci if (pnfs_match_lseg_recall(lseg, return_range, seq)) { 250362306a36Sopenharmony_ci dprintk("%s: marking lseg %p iomode %d " 250462306a36Sopenharmony_ci "offset %llu length %llu\n", __func__, 250562306a36Sopenharmony_ci lseg, lseg->pls_range.iomode, 250662306a36Sopenharmony_ci lseg->pls_range.offset, 250762306a36Sopenharmony_ci lseg->pls_range.length); 250862306a36Sopenharmony_ci if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) 250962306a36Sopenharmony_ci tmp_list = &lo->plh_return_segs; 251062306a36Sopenharmony_ci if (mark_lseg_invalid(lseg, tmp_list)) 251162306a36Sopenharmony_ci continue; 251262306a36Sopenharmony_ci remaining++; 251362306a36Sopenharmony_ci set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 251462306a36Sopenharmony_ci pnfs_lseg_cancel_io(server, lseg); 251562306a36Sopenharmony_ci } 251662306a36Sopenharmony_ci 251762306a36Sopenharmony_ci if (remaining) { 251862306a36Sopenharmony_ci pnfs_set_plh_return_info(lo, return_range->iomode, seq); 251962306a36Sopenharmony_ci return -EBUSY; 252062306a36Sopenharmony_ci } 252162306a36Sopenharmony_ci 252262306a36Sopenharmony_ci if (!list_empty(&lo->plh_return_segs)) { 252362306a36Sopenharmony_ci pnfs_set_plh_return_info(lo, return_range->iomode, seq); 252462306a36Sopenharmony_ci return 0; 252562306a36Sopenharmony_ci } 252662306a36Sopenharmony_ci 252762306a36Sopenharmony_ci return -ENOENT; 252862306a36Sopenharmony_ci} 252962306a36Sopenharmony_ci 253062306a36Sopenharmony_cistatic void 253162306a36Sopenharmony_cipnfs_mark_layout_for_return(struct inode *inode, 253262306a36Sopenharmony_ci const struct pnfs_layout_range *range) 253362306a36Sopenharmony_ci{ 253462306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 253562306a36Sopenharmony_ci bool return_now = false; 253662306a36Sopenharmony_ci 253762306a36Sopenharmony_ci spin_lock(&inode->i_lock); 253862306a36Sopenharmony_ci lo = NFS_I(inode)->layout; 253962306a36Sopenharmony_ci if (!pnfs_layout_is_valid(lo)) { 254062306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 254162306a36Sopenharmony_ci return; 254262306a36Sopenharmony_ci } 254362306a36Sopenharmony_ci pnfs_set_plh_return_info(lo, range->iomode, 0); 254462306a36Sopenharmony_ci /* 254562306a36Sopenharmony_ci * mark all matching lsegs so that we are sure to have no live 254662306a36Sopenharmony_ci * segments at hand when sending layoutreturn. See pnfs_put_lseg() 254762306a36Sopenharmony_ci * for how it works. 254862306a36Sopenharmony_ci */ 254962306a36Sopenharmony_ci if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) { 255062306a36Sopenharmony_ci const struct cred *cred; 255162306a36Sopenharmony_ci nfs4_stateid stateid; 255262306a36Sopenharmony_ci enum pnfs_iomode iomode; 255362306a36Sopenharmony_ci 255462306a36Sopenharmony_ci return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode); 255562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 255662306a36Sopenharmony_ci if (return_now) 255762306a36Sopenharmony_ci pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); 255862306a36Sopenharmony_ci } else { 255962306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 256062306a36Sopenharmony_ci nfs_commit_inode(inode, 0); 256162306a36Sopenharmony_ci } 256262306a36Sopenharmony_ci} 256362306a36Sopenharmony_ci 256462306a36Sopenharmony_civoid pnfs_error_mark_layout_for_return(struct inode *inode, 256562306a36Sopenharmony_ci struct pnfs_layout_segment *lseg) 256662306a36Sopenharmony_ci{ 256762306a36Sopenharmony_ci struct pnfs_layout_range range = { 256862306a36Sopenharmony_ci .iomode = lseg->pls_range.iomode, 256962306a36Sopenharmony_ci .offset = 0, 257062306a36Sopenharmony_ci .length = NFS4_MAX_UINT64, 257162306a36Sopenharmony_ci }; 257262306a36Sopenharmony_ci 257362306a36Sopenharmony_ci pnfs_mark_layout_for_return(inode, &range); 257462306a36Sopenharmony_ci} 257562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); 257662306a36Sopenharmony_ci 257762306a36Sopenharmony_cistatic bool 257862306a36Sopenharmony_cipnfs_layout_can_be_returned(struct pnfs_layout_hdr *lo) 257962306a36Sopenharmony_ci{ 258062306a36Sopenharmony_ci return pnfs_layout_is_valid(lo) && 258162306a36Sopenharmony_ci !test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) && 258262306a36Sopenharmony_ci !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); 258362306a36Sopenharmony_ci} 258462306a36Sopenharmony_ci 258562306a36Sopenharmony_cistatic struct pnfs_layout_segment * 258662306a36Sopenharmony_cipnfs_find_first_lseg(struct pnfs_layout_hdr *lo, 258762306a36Sopenharmony_ci const struct pnfs_layout_range *range, 258862306a36Sopenharmony_ci enum pnfs_iomode iomode) 258962306a36Sopenharmony_ci{ 259062306a36Sopenharmony_ci struct pnfs_layout_segment *lseg; 259162306a36Sopenharmony_ci 259262306a36Sopenharmony_ci list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 259362306a36Sopenharmony_ci if (!test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) 259462306a36Sopenharmony_ci continue; 259562306a36Sopenharmony_ci if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) 259662306a36Sopenharmony_ci continue; 259762306a36Sopenharmony_ci if (lseg->pls_range.iomode != iomode && iomode != IOMODE_ANY) 259862306a36Sopenharmony_ci continue; 259962306a36Sopenharmony_ci if (pnfs_lseg_range_intersecting(&lseg->pls_range, range)) 260062306a36Sopenharmony_ci return lseg; 260162306a36Sopenharmony_ci } 260262306a36Sopenharmony_ci return NULL; 260362306a36Sopenharmony_ci} 260462306a36Sopenharmony_ci 260562306a36Sopenharmony_ci/* Find open file states whose mode matches that of the range */ 260662306a36Sopenharmony_cistatic bool 260762306a36Sopenharmony_cipnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo, 260862306a36Sopenharmony_ci const struct pnfs_layout_range *range) 260962306a36Sopenharmony_ci{ 261062306a36Sopenharmony_ci struct list_head *head; 261162306a36Sopenharmony_ci struct nfs_open_context *ctx; 261262306a36Sopenharmony_ci fmode_t mode = 0; 261362306a36Sopenharmony_ci 261462306a36Sopenharmony_ci if (!pnfs_layout_can_be_returned(lo) || 261562306a36Sopenharmony_ci !pnfs_find_first_lseg(lo, range, range->iomode)) 261662306a36Sopenharmony_ci return false; 261762306a36Sopenharmony_ci 261862306a36Sopenharmony_ci head = &NFS_I(lo->plh_inode)->open_files; 261962306a36Sopenharmony_ci list_for_each_entry_rcu(ctx, head, list) { 262062306a36Sopenharmony_ci if (ctx->state) 262162306a36Sopenharmony_ci mode |= ctx->state->state & (FMODE_READ|FMODE_WRITE); 262262306a36Sopenharmony_ci } 262362306a36Sopenharmony_ci 262462306a36Sopenharmony_ci switch (range->iomode) { 262562306a36Sopenharmony_ci default: 262662306a36Sopenharmony_ci break; 262762306a36Sopenharmony_ci case IOMODE_READ: 262862306a36Sopenharmony_ci mode &= ~FMODE_WRITE; 262962306a36Sopenharmony_ci break; 263062306a36Sopenharmony_ci case IOMODE_RW: 263162306a36Sopenharmony_ci if (pnfs_find_first_lseg(lo, range, IOMODE_READ)) 263262306a36Sopenharmony_ci mode &= ~FMODE_READ; 263362306a36Sopenharmony_ci } 263462306a36Sopenharmony_ci return mode == 0; 263562306a36Sopenharmony_ci} 263662306a36Sopenharmony_ci 263762306a36Sopenharmony_cistatic int pnfs_layout_return_unused_byserver(struct nfs_server *server, 263862306a36Sopenharmony_ci void *data) 263962306a36Sopenharmony_ci{ 264062306a36Sopenharmony_ci const struct pnfs_layout_range *range = data; 264162306a36Sopenharmony_ci const struct cred *cred; 264262306a36Sopenharmony_ci struct pnfs_layout_hdr *lo; 264362306a36Sopenharmony_ci struct inode *inode; 264462306a36Sopenharmony_ci nfs4_stateid stateid; 264562306a36Sopenharmony_ci enum pnfs_iomode iomode; 264662306a36Sopenharmony_ci 264762306a36Sopenharmony_cirestart: 264862306a36Sopenharmony_ci rcu_read_lock(); 264962306a36Sopenharmony_ci list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) { 265062306a36Sopenharmony_ci inode = lo->plh_inode; 265162306a36Sopenharmony_ci if (!inode || !pnfs_layout_can_be_returned(lo) || 265262306a36Sopenharmony_ci test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 265362306a36Sopenharmony_ci continue; 265462306a36Sopenharmony_ci spin_lock(&inode->i_lock); 265562306a36Sopenharmony_ci if (!lo->plh_inode || 265662306a36Sopenharmony_ci !pnfs_should_return_unused_layout(lo, range)) { 265762306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 265862306a36Sopenharmony_ci continue; 265962306a36Sopenharmony_ci } 266062306a36Sopenharmony_ci pnfs_get_layout_hdr(lo); 266162306a36Sopenharmony_ci pnfs_set_plh_return_info(lo, range->iomode, 0); 266262306a36Sopenharmony_ci if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, 266362306a36Sopenharmony_ci range, 0) != 0 || 266462306a36Sopenharmony_ci !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) { 266562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 266662306a36Sopenharmony_ci rcu_read_unlock(); 266762306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 266862306a36Sopenharmony_ci cond_resched(); 266962306a36Sopenharmony_ci goto restart; 267062306a36Sopenharmony_ci } 267162306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 267262306a36Sopenharmony_ci rcu_read_unlock(); 267362306a36Sopenharmony_ci pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false); 267462306a36Sopenharmony_ci pnfs_put_layout_hdr(lo); 267562306a36Sopenharmony_ci cond_resched(); 267662306a36Sopenharmony_ci goto restart; 267762306a36Sopenharmony_ci } 267862306a36Sopenharmony_ci rcu_read_unlock(); 267962306a36Sopenharmony_ci return 0; 268062306a36Sopenharmony_ci} 268162306a36Sopenharmony_ci 268262306a36Sopenharmony_civoid 268362306a36Sopenharmony_cipnfs_layout_return_unused_byclid(struct nfs_client *clp, 268462306a36Sopenharmony_ci enum pnfs_iomode iomode) 268562306a36Sopenharmony_ci{ 268662306a36Sopenharmony_ci struct pnfs_layout_range range = { 268762306a36Sopenharmony_ci .iomode = iomode, 268862306a36Sopenharmony_ci .offset = 0, 268962306a36Sopenharmony_ci .length = NFS4_MAX_UINT64, 269062306a36Sopenharmony_ci }; 269162306a36Sopenharmony_ci 269262306a36Sopenharmony_ci nfs_client_for_each_server(clp, pnfs_layout_return_unused_byserver, 269362306a36Sopenharmony_ci &range); 269462306a36Sopenharmony_ci} 269562306a36Sopenharmony_ci 269662306a36Sopenharmony_civoid 269762306a36Sopenharmony_cipnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio) 269862306a36Sopenharmony_ci{ 269962306a36Sopenharmony_ci if (pgio->pg_lseg == NULL || 270062306a36Sopenharmony_ci test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags)) 270162306a36Sopenharmony_ci return; 270262306a36Sopenharmony_ci pnfs_put_lseg(pgio->pg_lseg); 270362306a36Sopenharmony_ci pgio->pg_lseg = NULL; 270462306a36Sopenharmony_ci} 270562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout); 270662306a36Sopenharmony_ci 270762306a36Sopenharmony_ci/* 270862306a36Sopenharmony_ci * Check for any intersection between the request and the pgio->pg_lseg, 270962306a36Sopenharmony_ci * and if none, put this pgio->pg_lseg away. 271062306a36Sopenharmony_ci */ 271162306a36Sopenharmony_civoid 271262306a36Sopenharmony_cipnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 271362306a36Sopenharmony_ci{ 271462306a36Sopenharmony_ci if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) { 271562306a36Sopenharmony_ci pnfs_put_lseg(pgio->pg_lseg); 271662306a36Sopenharmony_ci pgio->pg_lseg = NULL; 271762306a36Sopenharmony_ci } 271862306a36Sopenharmony_ci} 271962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range); 272062306a36Sopenharmony_ci 272162306a36Sopenharmony_civoid 272262306a36Sopenharmony_cipnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 272362306a36Sopenharmony_ci{ 272462306a36Sopenharmony_ci u64 rd_size; 272562306a36Sopenharmony_ci 272662306a36Sopenharmony_ci pnfs_generic_pg_check_layout(pgio); 272762306a36Sopenharmony_ci pnfs_generic_pg_check_range(pgio, req); 272862306a36Sopenharmony_ci if (pgio->pg_lseg == NULL) { 272962306a36Sopenharmony_ci if (pgio->pg_dreq == NULL) 273062306a36Sopenharmony_ci rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 273162306a36Sopenharmony_ci else 273262306a36Sopenharmony_ci rd_size = nfs_dreq_bytes_left(pgio->pg_dreq, 273362306a36Sopenharmony_ci req_offset(req)); 273462306a36Sopenharmony_ci 273562306a36Sopenharmony_ci pgio->pg_lseg = 273662306a36Sopenharmony_ci pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), 273762306a36Sopenharmony_ci req_offset(req), rd_size, 273862306a36Sopenharmony_ci IOMODE_READ, false, 273962306a36Sopenharmony_ci nfs_io_gfp_mask()); 274062306a36Sopenharmony_ci if (IS_ERR(pgio->pg_lseg)) { 274162306a36Sopenharmony_ci pgio->pg_error = PTR_ERR(pgio->pg_lseg); 274262306a36Sopenharmony_ci pgio->pg_lseg = NULL; 274362306a36Sopenharmony_ci return; 274462306a36Sopenharmony_ci } 274562306a36Sopenharmony_ci } 274662306a36Sopenharmony_ci /* If no lseg, fall back to read through mds */ 274762306a36Sopenharmony_ci if (pgio->pg_lseg == NULL) 274862306a36Sopenharmony_ci nfs_pageio_reset_read_mds(pgio); 274962306a36Sopenharmony_ci 275062306a36Sopenharmony_ci} 275162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); 275262306a36Sopenharmony_ci 275362306a36Sopenharmony_civoid 275462306a36Sopenharmony_cipnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 275562306a36Sopenharmony_ci struct nfs_page *req, u64 wb_size) 275662306a36Sopenharmony_ci{ 275762306a36Sopenharmony_ci pnfs_generic_pg_check_layout(pgio); 275862306a36Sopenharmony_ci pnfs_generic_pg_check_range(pgio, req); 275962306a36Sopenharmony_ci if (pgio->pg_lseg == NULL) { 276062306a36Sopenharmony_ci pgio->pg_lseg = 276162306a36Sopenharmony_ci pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), 276262306a36Sopenharmony_ci req_offset(req), wb_size, IOMODE_RW, 276362306a36Sopenharmony_ci false, nfs_io_gfp_mask()); 276462306a36Sopenharmony_ci if (IS_ERR(pgio->pg_lseg)) { 276562306a36Sopenharmony_ci pgio->pg_error = PTR_ERR(pgio->pg_lseg); 276662306a36Sopenharmony_ci pgio->pg_lseg = NULL; 276762306a36Sopenharmony_ci return; 276862306a36Sopenharmony_ci } 276962306a36Sopenharmony_ci } 277062306a36Sopenharmony_ci /* If no lseg, fall back to write through mds */ 277162306a36Sopenharmony_ci if (pgio->pg_lseg == NULL) 277262306a36Sopenharmony_ci nfs_pageio_reset_write_mds(pgio); 277362306a36Sopenharmony_ci} 277462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 277562306a36Sopenharmony_ci 277662306a36Sopenharmony_civoid 277762306a36Sopenharmony_cipnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) 277862306a36Sopenharmony_ci{ 277962306a36Sopenharmony_ci if (desc->pg_lseg) { 278062306a36Sopenharmony_ci pnfs_put_lseg(desc->pg_lseg); 278162306a36Sopenharmony_ci desc->pg_lseg = NULL; 278262306a36Sopenharmony_ci } 278362306a36Sopenharmony_ci} 278462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); 278562306a36Sopenharmony_ci 278662306a36Sopenharmony_ci/* 278762306a36Sopenharmony_ci * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 278862306a36Sopenharmony_ci * of bytes (maximum @req->wb_bytes) that can be coalesced. 278962306a36Sopenharmony_ci */ 279062306a36Sopenharmony_cisize_t 279162306a36Sopenharmony_cipnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, 279262306a36Sopenharmony_ci struct nfs_page *prev, struct nfs_page *req) 279362306a36Sopenharmony_ci{ 279462306a36Sopenharmony_ci unsigned int size; 279562306a36Sopenharmony_ci u64 seg_end, req_start, seg_left; 279662306a36Sopenharmony_ci 279762306a36Sopenharmony_ci size = nfs_generic_pg_test(pgio, prev, req); 279862306a36Sopenharmony_ci if (!size) 279962306a36Sopenharmony_ci return 0; 280062306a36Sopenharmony_ci 280162306a36Sopenharmony_ci /* 280262306a36Sopenharmony_ci * 'size' contains the number of bytes left in the current page (up 280362306a36Sopenharmony_ci * to the original size asked for in @req->wb_bytes). 280462306a36Sopenharmony_ci * 280562306a36Sopenharmony_ci * Calculate how many bytes are left in the layout segment 280662306a36Sopenharmony_ci * and if there are less bytes than 'size', return that instead. 280762306a36Sopenharmony_ci * 280862306a36Sopenharmony_ci * Please also note that 'end_offset' is actually the offset of the 280962306a36Sopenharmony_ci * first byte that lies outside the pnfs_layout_range. FIXME? 281062306a36Sopenharmony_ci * 281162306a36Sopenharmony_ci */ 281262306a36Sopenharmony_ci if (pgio->pg_lseg) { 281362306a36Sopenharmony_ci seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset, 281462306a36Sopenharmony_ci pgio->pg_lseg->pls_range.length); 281562306a36Sopenharmony_ci req_start = req_offset(req); 281662306a36Sopenharmony_ci 281762306a36Sopenharmony_ci /* start of request is past the last byte of this segment */ 281862306a36Sopenharmony_ci if (req_start >= seg_end) 281962306a36Sopenharmony_ci return 0; 282062306a36Sopenharmony_ci 282162306a36Sopenharmony_ci /* adjust 'size' iff there are fewer bytes left in the 282262306a36Sopenharmony_ci * segment than what nfs_generic_pg_test returned */ 282362306a36Sopenharmony_ci seg_left = seg_end - req_start; 282462306a36Sopenharmony_ci if (seg_left < size) 282562306a36Sopenharmony_ci size = (unsigned int)seg_left; 282662306a36Sopenharmony_ci } 282762306a36Sopenharmony_ci 282862306a36Sopenharmony_ci return size; 282962306a36Sopenharmony_ci} 283062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 283162306a36Sopenharmony_ci 283262306a36Sopenharmony_ciint pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) 283362306a36Sopenharmony_ci{ 283462306a36Sopenharmony_ci struct nfs_pageio_descriptor pgio; 283562306a36Sopenharmony_ci 283662306a36Sopenharmony_ci /* Resend all requests through the MDS */ 283762306a36Sopenharmony_ci nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, 283862306a36Sopenharmony_ci hdr->completion_ops); 283962306a36Sopenharmony_ci return nfs_pageio_resend(&pgio, hdr); 284062306a36Sopenharmony_ci} 284162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 284262306a36Sopenharmony_ci 284362306a36Sopenharmony_cistatic void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) 284462306a36Sopenharmony_ci{ 284562306a36Sopenharmony_ci 284662306a36Sopenharmony_ci dprintk("pnfs write error = %d\n", hdr->pnfs_error); 284762306a36Sopenharmony_ci if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 284862306a36Sopenharmony_ci PNFS_LAYOUTRET_ON_ERROR) { 284962306a36Sopenharmony_ci pnfs_return_layout(hdr->inode); 285062306a36Sopenharmony_ci } 285162306a36Sopenharmony_ci if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 285262306a36Sopenharmony_ci hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr); 285362306a36Sopenharmony_ci} 285462306a36Sopenharmony_ci 285562306a36Sopenharmony_ci/* 285662306a36Sopenharmony_ci * Called by non rpc-based layout drivers 285762306a36Sopenharmony_ci */ 285862306a36Sopenharmony_civoid pnfs_ld_write_done(struct nfs_pgio_header *hdr) 285962306a36Sopenharmony_ci{ 286062306a36Sopenharmony_ci if (likely(!hdr->pnfs_error)) { 286162306a36Sopenharmony_ci pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 286262306a36Sopenharmony_ci hdr->mds_offset + hdr->res.count); 286362306a36Sopenharmony_ci hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 286462306a36Sopenharmony_ci } 286562306a36Sopenharmony_ci trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); 286662306a36Sopenharmony_ci if (unlikely(hdr->pnfs_error)) 286762306a36Sopenharmony_ci pnfs_ld_handle_write_error(hdr); 286862306a36Sopenharmony_ci hdr->mds_ops->rpc_release(hdr); 286962306a36Sopenharmony_ci} 287062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_ld_write_done); 287162306a36Sopenharmony_ci 287262306a36Sopenharmony_cistatic void 287362306a36Sopenharmony_cipnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 287462306a36Sopenharmony_ci struct nfs_pgio_header *hdr) 287562306a36Sopenharmony_ci{ 287662306a36Sopenharmony_ci struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 287762306a36Sopenharmony_ci 287862306a36Sopenharmony_ci if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 287962306a36Sopenharmony_ci list_splice_tail_init(&hdr->pages, &mirror->pg_list); 288062306a36Sopenharmony_ci nfs_pageio_reset_write_mds(desc); 288162306a36Sopenharmony_ci mirror->pg_recoalesce = 1; 288262306a36Sopenharmony_ci } 288362306a36Sopenharmony_ci hdr->completion_ops->completion(hdr); 288462306a36Sopenharmony_ci} 288562306a36Sopenharmony_ci 288662306a36Sopenharmony_cistatic enum pnfs_try_status 288762306a36Sopenharmony_cipnfs_try_to_write_data(struct nfs_pgio_header *hdr, 288862306a36Sopenharmony_ci const struct rpc_call_ops *call_ops, 288962306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, 289062306a36Sopenharmony_ci int how) 289162306a36Sopenharmony_ci{ 289262306a36Sopenharmony_ci struct inode *inode = hdr->inode; 289362306a36Sopenharmony_ci enum pnfs_try_status trypnfs; 289462306a36Sopenharmony_ci struct nfs_server *nfss = NFS_SERVER(inode); 289562306a36Sopenharmony_ci 289662306a36Sopenharmony_ci hdr->mds_ops = call_ops; 289762306a36Sopenharmony_ci 289862306a36Sopenharmony_ci dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 289962306a36Sopenharmony_ci inode->i_ino, hdr->args.count, hdr->args.offset, how); 290062306a36Sopenharmony_ci trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how); 290162306a36Sopenharmony_ci if (trypnfs != PNFS_NOT_ATTEMPTED) 290262306a36Sopenharmony_ci nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 290362306a36Sopenharmony_ci dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 290462306a36Sopenharmony_ci return trypnfs; 290562306a36Sopenharmony_ci} 290662306a36Sopenharmony_ci 290762306a36Sopenharmony_cistatic void 290862306a36Sopenharmony_cipnfs_do_write(struct nfs_pageio_descriptor *desc, 290962306a36Sopenharmony_ci struct nfs_pgio_header *hdr, int how) 291062306a36Sopenharmony_ci{ 291162306a36Sopenharmony_ci const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 291262306a36Sopenharmony_ci struct pnfs_layout_segment *lseg = desc->pg_lseg; 291362306a36Sopenharmony_ci enum pnfs_try_status trypnfs; 291462306a36Sopenharmony_ci 291562306a36Sopenharmony_ci trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); 291662306a36Sopenharmony_ci switch (trypnfs) { 291762306a36Sopenharmony_ci case PNFS_NOT_ATTEMPTED: 291862306a36Sopenharmony_ci pnfs_write_through_mds(desc, hdr); 291962306a36Sopenharmony_ci break; 292062306a36Sopenharmony_ci case PNFS_ATTEMPTED: 292162306a36Sopenharmony_ci break; 292262306a36Sopenharmony_ci case PNFS_TRY_AGAIN: 292362306a36Sopenharmony_ci /* cleanup hdr and prepare to redo pnfs */ 292462306a36Sopenharmony_ci if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 292562306a36Sopenharmony_ci struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 292662306a36Sopenharmony_ci list_splice_init(&hdr->pages, &mirror->pg_list); 292762306a36Sopenharmony_ci mirror->pg_recoalesce = 1; 292862306a36Sopenharmony_ci } 292962306a36Sopenharmony_ci hdr->mds_ops->rpc_release(hdr); 293062306a36Sopenharmony_ci } 293162306a36Sopenharmony_ci} 293262306a36Sopenharmony_ci 293362306a36Sopenharmony_cistatic void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 293462306a36Sopenharmony_ci{ 293562306a36Sopenharmony_ci pnfs_put_lseg(hdr->lseg); 293662306a36Sopenharmony_ci nfs_pgio_header_free(hdr); 293762306a36Sopenharmony_ci} 293862306a36Sopenharmony_ci 293962306a36Sopenharmony_ciint 294062306a36Sopenharmony_cipnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 294162306a36Sopenharmony_ci{ 294262306a36Sopenharmony_ci struct nfs_pgio_header *hdr; 294362306a36Sopenharmony_ci int ret; 294462306a36Sopenharmony_ci 294562306a36Sopenharmony_ci hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 294662306a36Sopenharmony_ci if (!hdr) { 294762306a36Sopenharmony_ci desc->pg_error = -ENOMEM; 294862306a36Sopenharmony_ci return desc->pg_error; 294962306a36Sopenharmony_ci } 295062306a36Sopenharmony_ci nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 295162306a36Sopenharmony_ci 295262306a36Sopenharmony_ci hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 295362306a36Sopenharmony_ci ret = nfs_generic_pgio(desc, hdr); 295462306a36Sopenharmony_ci if (!ret) 295562306a36Sopenharmony_ci pnfs_do_write(desc, hdr, desc->pg_ioflags); 295662306a36Sopenharmony_ci 295762306a36Sopenharmony_ci return ret; 295862306a36Sopenharmony_ci} 295962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 296062306a36Sopenharmony_ci 296162306a36Sopenharmony_ciint pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr) 296262306a36Sopenharmony_ci{ 296362306a36Sopenharmony_ci struct nfs_pageio_descriptor pgio; 296462306a36Sopenharmony_ci 296562306a36Sopenharmony_ci /* Resend all requests through the MDS */ 296662306a36Sopenharmony_ci nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops); 296762306a36Sopenharmony_ci return nfs_pageio_resend(&pgio, hdr); 296862306a36Sopenharmony_ci} 296962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 297062306a36Sopenharmony_ci 297162306a36Sopenharmony_cistatic void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) 297262306a36Sopenharmony_ci{ 297362306a36Sopenharmony_ci dprintk("pnfs read error = %d\n", hdr->pnfs_error); 297462306a36Sopenharmony_ci if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 297562306a36Sopenharmony_ci PNFS_LAYOUTRET_ON_ERROR) { 297662306a36Sopenharmony_ci pnfs_return_layout(hdr->inode); 297762306a36Sopenharmony_ci } 297862306a36Sopenharmony_ci if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 297962306a36Sopenharmony_ci hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr); 298062306a36Sopenharmony_ci} 298162306a36Sopenharmony_ci 298262306a36Sopenharmony_ci/* 298362306a36Sopenharmony_ci * Called by non rpc-based layout drivers 298462306a36Sopenharmony_ci */ 298562306a36Sopenharmony_civoid pnfs_ld_read_done(struct nfs_pgio_header *hdr) 298662306a36Sopenharmony_ci{ 298762306a36Sopenharmony_ci if (likely(!hdr->pnfs_error)) 298862306a36Sopenharmony_ci hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 298962306a36Sopenharmony_ci trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); 299062306a36Sopenharmony_ci if (unlikely(hdr->pnfs_error)) 299162306a36Sopenharmony_ci pnfs_ld_handle_read_error(hdr); 299262306a36Sopenharmony_ci hdr->mds_ops->rpc_release(hdr); 299362306a36Sopenharmony_ci} 299462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_ld_read_done); 299562306a36Sopenharmony_ci 299662306a36Sopenharmony_cistatic void 299762306a36Sopenharmony_cipnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 299862306a36Sopenharmony_ci struct nfs_pgio_header *hdr) 299962306a36Sopenharmony_ci{ 300062306a36Sopenharmony_ci struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 300162306a36Sopenharmony_ci 300262306a36Sopenharmony_ci if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 300362306a36Sopenharmony_ci list_splice_tail_init(&hdr->pages, &mirror->pg_list); 300462306a36Sopenharmony_ci nfs_pageio_reset_read_mds(desc); 300562306a36Sopenharmony_ci mirror->pg_recoalesce = 1; 300662306a36Sopenharmony_ci } 300762306a36Sopenharmony_ci hdr->completion_ops->completion(hdr); 300862306a36Sopenharmony_ci} 300962306a36Sopenharmony_ci 301062306a36Sopenharmony_ci/* 301162306a36Sopenharmony_ci * Call the appropriate parallel I/O subsystem read function. 301262306a36Sopenharmony_ci */ 301362306a36Sopenharmony_cistatic enum pnfs_try_status 301462306a36Sopenharmony_cipnfs_try_to_read_data(struct nfs_pgio_header *hdr, 301562306a36Sopenharmony_ci const struct rpc_call_ops *call_ops, 301662306a36Sopenharmony_ci struct pnfs_layout_segment *lseg) 301762306a36Sopenharmony_ci{ 301862306a36Sopenharmony_ci struct inode *inode = hdr->inode; 301962306a36Sopenharmony_ci struct nfs_server *nfss = NFS_SERVER(inode); 302062306a36Sopenharmony_ci enum pnfs_try_status trypnfs; 302162306a36Sopenharmony_ci 302262306a36Sopenharmony_ci hdr->mds_ops = call_ops; 302362306a36Sopenharmony_ci 302462306a36Sopenharmony_ci dprintk("%s: Reading ino:%lu %u@%llu\n", 302562306a36Sopenharmony_ci __func__, inode->i_ino, hdr->args.count, hdr->args.offset); 302662306a36Sopenharmony_ci 302762306a36Sopenharmony_ci trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr); 302862306a36Sopenharmony_ci if (trypnfs != PNFS_NOT_ATTEMPTED) 302962306a36Sopenharmony_ci nfs_inc_stats(inode, NFSIOS_PNFS_READ); 303062306a36Sopenharmony_ci dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 303162306a36Sopenharmony_ci return trypnfs; 303262306a36Sopenharmony_ci} 303362306a36Sopenharmony_ci 303462306a36Sopenharmony_ci/* Resend all requests through pnfs. */ 303562306a36Sopenharmony_civoid pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr, 303662306a36Sopenharmony_ci unsigned int mirror_idx) 303762306a36Sopenharmony_ci{ 303862306a36Sopenharmony_ci struct nfs_pageio_descriptor pgio; 303962306a36Sopenharmony_ci 304062306a36Sopenharmony_ci if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 304162306a36Sopenharmony_ci /* Prevent deadlocks with layoutreturn! */ 304262306a36Sopenharmony_ci pnfs_put_lseg(hdr->lseg); 304362306a36Sopenharmony_ci hdr->lseg = NULL; 304462306a36Sopenharmony_ci 304562306a36Sopenharmony_ci nfs_pageio_init_read(&pgio, hdr->inode, false, 304662306a36Sopenharmony_ci hdr->completion_ops); 304762306a36Sopenharmony_ci pgio.pg_mirror_idx = mirror_idx; 304862306a36Sopenharmony_ci hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); 304962306a36Sopenharmony_ci } 305062306a36Sopenharmony_ci} 305162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); 305262306a36Sopenharmony_ci 305362306a36Sopenharmony_cistatic void 305462306a36Sopenharmony_cipnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) 305562306a36Sopenharmony_ci{ 305662306a36Sopenharmony_ci const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 305762306a36Sopenharmony_ci struct pnfs_layout_segment *lseg = desc->pg_lseg; 305862306a36Sopenharmony_ci enum pnfs_try_status trypnfs; 305962306a36Sopenharmony_ci 306062306a36Sopenharmony_ci trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); 306162306a36Sopenharmony_ci switch (trypnfs) { 306262306a36Sopenharmony_ci case PNFS_NOT_ATTEMPTED: 306362306a36Sopenharmony_ci pnfs_read_through_mds(desc, hdr); 306462306a36Sopenharmony_ci break; 306562306a36Sopenharmony_ci case PNFS_ATTEMPTED: 306662306a36Sopenharmony_ci break; 306762306a36Sopenharmony_ci case PNFS_TRY_AGAIN: 306862306a36Sopenharmony_ci /* cleanup hdr and prepare to redo pnfs */ 306962306a36Sopenharmony_ci if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 307062306a36Sopenharmony_ci struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 307162306a36Sopenharmony_ci list_splice_init(&hdr->pages, &mirror->pg_list); 307262306a36Sopenharmony_ci mirror->pg_recoalesce = 1; 307362306a36Sopenharmony_ci } 307462306a36Sopenharmony_ci hdr->mds_ops->rpc_release(hdr); 307562306a36Sopenharmony_ci } 307662306a36Sopenharmony_ci} 307762306a36Sopenharmony_ci 307862306a36Sopenharmony_cistatic void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 307962306a36Sopenharmony_ci{ 308062306a36Sopenharmony_ci pnfs_put_lseg(hdr->lseg); 308162306a36Sopenharmony_ci nfs_pgio_header_free(hdr); 308262306a36Sopenharmony_ci} 308362306a36Sopenharmony_ci 308462306a36Sopenharmony_ciint 308562306a36Sopenharmony_cipnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 308662306a36Sopenharmony_ci{ 308762306a36Sopenharmony_ci struct nfs_pgio_header *hdr; 308862306a36Sopenharmony_ci int ret; 308962306a36Sopenharmony_ci 309062306a36Sopenharmony_ci hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 309162306a36Sopenharmony_ci if (!hdr) { 309262306a36Sopenharmony_ci desc->pg_error = -ENOMEM; 309362306a36Sopenharmony_ci return desc->pg_error; 309462306a36Sopenharmony_ci } 309562306a36Sopenharmony_ci nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 309662306a36Sopenharmony_ci hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 309762306a36Sopenharmony_ci ret = nfs_generic_pgio(desc, hdr); 309862306a36Sopenharmony_ci if (!ret) 309962306a36Sopenharmony_ci pnfs_do_read(desc, hdr); 310062306a36Sopenharmony_ci return ret; 310162306a36Sopenharmony_ci} 310262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 310362306a36Sopenharmony_ci 310462306a36Sopenharmony_cistatic void pnfs_clear_layoutcommitting(struct inode *inode) 310562306a36Sopenharmony_ci{ 310662306a36Sopenharmony_ci unsigned long *bitlock = &NFS_I(inode)->flags; 310762306a36Sopenharmony_ci 310862306a36Sopenharmony_ci clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); 310962306a36Sopenharmony_ci smp_mb__after_atomic(); 311062306a36Sopenharmony_ci wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); 311162306a36Sopenharmony_ci} 311262306a36Sopenharmony_ci 311362306a36Sopenharmony_ci/* 311462306a36Sopenharmony_ci * There can be multiple RW segments. 311562306a36Sopenharmony_ci */ 311662306a36Sopenharmony_cistatic void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) 311762306a36Sopenharmony_ci{ 311862306a36Sopenharmony_ci struct pnfs_layout_segment *lseg; 311962306a36Sopenharmony_ci 312062306a36Sopenharmony_ci list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 312162306a36Sopenharmony_ci if (lseg->pls_range.iomode == IOMODE_RW && 312262306a36Sopenharmony_ci test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 312362306a36Sopenharmony_ci list_add(&lseg->pls_lc_list, listp); 312462306a36Sopenharmony_ci } 312562306a36Sopenharmony_ci} 312662306a36Sopenharmony_ci 312762306a36Sopenharmony_cistatic void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) 312862306a36Sopenharmony_ci{ 312962306a36Sopenharmony_ci struct pnfs_layout_segment *lseg, *tmp; 313062306a36Sopenharmony_ci 313162306a36Sopenharmony_ci /* Matched by references in pnfs_set_layoutcommit */ 313262306a36Sopenharmony_ci list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { 313362306a36Sopenharmony_ci list_del_init(&lseg->pls_lc_list); 313462306a36Sopenharmony_ci pnfs_put_lseg(lseg); 313562306a36Sopenharmony_ci } 313662306a36Sopenharmony_ci 313762306a36Sopenharmony_ci pnfs_clear_layoutcommitting(inode); 313862306a36Sopenharmony_ci} 313962306a36Sopenharmony_ci 314062306a36Sopenharmony_civoid pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 314162306a36Sopenharmony_ci{ 314262306a36Sopenharmony_ci pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); 314362306a36Sopenharmony_ci} 314462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 314562306a36Sopenharmony_ci 314662306a36Sopenharmony_civoid 314762306a36Sopenharmony_cipnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, 314862306a36Sopenharmony_ci loff_t end_pos) 314962306a36Sopenharmony_ci{ 315062306a36Sopenharmony_ci struct nfs_inode *nfsi = NFS_I(inode); 315162306a36Sopenharmony_ci bool mark_as_dirty = false; 315262306a36Sopenharmony_ci 315362306a36Sopenharmony_ci spin_lock(&inode->i_lock); 315462306a36Sopenharmony_ci if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 315562306a36Sopenharmony_ci nfsi->layout->plh_lwb = end_pos; 315662306a36Sopenharmony_ci mark_as_dirty = true; 315762306a36Sopenharmony_ci dprintk("%s: Set layoutcommit for inode %lu ", 315862306a36Sopenharmony_ci __func__, inode->i_ino); 315962306a36Sopenharmony_ci } else if (end_pos > nfsi->layout->plh_lwb) 316062306a36Sopenharmony_ci nfsi->layout->plh_lwb = end_pos; 316162306a36Sopenharmony_ci if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { 316262306a36Sopenharmony_ci /* references matched in nfs4_layoutcommit_release */ 316362306a36Sopenharmony_ci pnfs_get_lseg(lseg); 316462306a36Sopenharmony_ci } 316562306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 316662306a36Sopenharmony_ci dprintk("%s: lseg %p end_pos %llu\n", 316762306a36Sopenharmony_ci __func__, lseg, nfsi->layout->plh_lwb); 316862306a36Sopenharmony_ci 316962306a36Sopenharmony_ci /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 317062306a36Sopenharmony_ci * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 317162306a36Sopenharmony_ci if (mark_as_dirty) 317262306a36Sopenharmony_ci mark_inode_dirty_sync(inode); 317362306a36Sopenharmony_ci} 317462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 317562306a36Sopenharmony_ci 317662306a36Sopenharmony_civoid pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) 317762306a36Sopenharmony_ci{ 317862306a36Sopenharmony_ci struct nfs_server *nfss = NFS_SERVER(data->args.inode); 317962306a36Sopenharmony_ci 318062306a36Sopenharmony_ci if (nfss->pnfs_curr_ld->cleanup_layoutcommit) 318162306a36Sopenharmony_ci nfss->pnfs_curr_ld->cleanup_layoutcommit(data); 318262306a36Sopenharmony_ci pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); 318362306a36Sopenharmony_ci} 318462306a36Sopenharmony_ci 318562306a36Sopenharmony_ci/* 318662306a36Sopenharmony_ci * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and 318762306a36Sopenharmony_ci * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough 318862306a36Sopenharmony_ci * data to disk to allow the server to recover the data if it crashes. 318962306a36Sopenharmony_ci * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag 319062306a36Sopenharmony_ci * is off, and a COMMIT is sent to a data server, or 319162306a36Sopenharmony_ci * if WRITEs to a data server return NFS_DATA_SYNC. 319262306a36Sopenharmony_ci */ 319362306a36Sopenharmony_ciint 319462306a36Sopenharmony_cipnfs_layoutcommit_inode(struct inode *inode, bool sync) 319562306a36Sopenharmony_ci{ 319662306a36Sopenharmony_ci struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 319762306a36Sopenharmony_ci struct nfs4_layoutcommit_data *data; 319862306a36Sopenharmony_ci struct nfs_inode *nfsi = NFS_I(inode); 319962306a36Sopenharmony_ci loff_t end_pos; 320062306a36Sopenharmony_ci int status; 320162306a36Sopenharmony_ci 320262306a36Sopenharmony_ci if (!pnfs_layoutcommit_outstanding(inode)) 320362306a36Sopenharmony_ci return 0; 320462306a36Sopenharmony_ci 320562306a36Sopenharmony_ci dprintk("--> %s inode %lu\n", __func__, inode->i_ino); 320662306a36Sopenharmony_ci 320762306a36Sopenharmony_ci status = -EAGAIN; 320862306a36Sopenharmony_ci if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { 320962306a36Sopenharmony_ci if (!sync) 321062306a36Sopenharmony_ci goto out; 321162306a36Sopenharmony_ci status = wait_on_bit_lock_action(&nfsi->flags, 321262306a36Sopenharmony_ci NFS_INO_LAYOUTCOMMITTING, 321362306a36Sopenharmony_ci nfs_wait_bit_killable, 321462306a36Sopenharmony_ci TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); 321562306a36Sopenharmony_ci if (status) 321662306a36Sopenharmony_ci goto out; 321762306a36Sopenharmony_ci } 321862306a36Sopenharmony_ci 321962306a36Sopenharmony_ci status = -ENOMEM; 322062306a36Sopenharmony_ci /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ 322162306a36Sopenharmony_ci data = kzalloc(sizeof(*data), nfs_io_gfp_mask()); 322262306a36Sopenharmony_ci if (!data) 322362306a36Sopenharmony_ci goto clear_layoutcommitting; 322462306a36Sopenharmony_ci 322562306a36Sopenharmony_ci status = 0; 322662306a36Sopenharmony_ci spin_lock(&inode->i_lock); 322762306a36Sopenharmony_ci if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 322862306a36Sopenharmony_ci goto out_unlock; 322962306a36Sopenharmony_ci 323062306a36Sopenharmony_ci INIT_LIST_HEAD(&data->lseg_list); 323162306a36Sopenharmony_ci pnfs_list_write_lseg(inode, &data->lseg_list); 323262306a36Sopenharmony_ci 323362306a36Sopenharmony_ci end_pos = nfsi->layout->plh_lwb; 323462306a36Sopenharmony_ci 323562306a36Sopenharmony_ci nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); 323662306a36Sopenharmony_ci data->cred = get_cred(nfsi->layout->plh_lc_cred); 323762306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 323862306a36Sopenharmony_ci 323962306a36Sopenharmony_ci data->args.inode = inode; 324062306a36Sopenharmony_ci nfs_fattr_init(&data->fattr); 324162306a36Sopenharmony_ci data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; 324262306a36Sopenharmony_ci data->res.fattr = &data->fattr; 324362306a36Sopenharmony_ci if (end_pos != 0) 324462306a36Sopenharmony_ci data->args.lastbytewritten = end_pos - 1; 324562306a36Sopenharmony_ci else 324662306a36Sopenharmony_ci data->args.lastbytewritten = U64_MAX; 324762306a36Sopenharmony_ci data->res.server = NFS_SERVER(inode); 324862306a36Sopenharmony_ci 324962306a36Sopenharmony_ci if (ld->prepare_layoutcommit) { 325062306a36Sopenharmony_ci status = ld->prepare_layoutcommit(&data->args); 325162306a36Sopenharmony_ci if (status) { 325262306a36Sopenharmony_ci put_cred(data->cred); 325362306a36Sopenharmony_ci spin_lock(&inode->i_lock); 325462306a36Sopenharmony_ci set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); 325562306a36Sopenharmony_ci if (end_pos > nfsi->layout->plh_lwb) 325662306a36Sopenharmony_ci nfsi->layout->plh_lwb = end_pos; 325762306a36Sopenharmony_ci goto out_unlock; 325862306a36Sopenharmony_ci } 325962306a36Sopenharmony_ci } 326062306a36Sopenharmony_ci 326162306a36Sopenharmony_ci 326262306a36Sopenharmony_ci status = nfs4_proc_layoutcommit(data, sync); 326362306a36Sopenharmony_ciout: 326462306a36Sopenharmony_ci if (status) 326562306a36Sopenharmony_ci mark_inode_dirty_sync(inode); 326662306a36Sopenharmony_ci dprintk("<-- %s status %d\n", __func__, status); 326762306a36Sopenharmony_ci return status; 326862306a36Sopenharmony_ciout_unlock: 326962306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 327062306a36Sopenharmony_ci kfree(data); 327162306a36Sopenharmony_ciclear_layoutcommitting: 327262306a36Sopenharmony_ci pnfs_clear_layoutcommitting(inode); 327362306a36Sopenharmony_ci goto out; 327462306a36Sopenharmony_ci} 327562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); 327662306a36Sopenharmony_ci 327762306a36Sopenharmony_ciint 327862306a36Sopenharmony_cipnfs_generic_sync(struct inode *inode, bool datasync) 327962306a36Sopenharmony_ci{ 328062306a36Sopenharmony_ci return pnfs_layoutcommit_inode(inode, true); 328162306a36Sopenharmony_ci} 328262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_generic_sync); 328362306a36Sopenharmony_ci 328462306a36Sopenharmony_cistruct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 328562306a36Sopenharmony_ci{ 328662306a36Sopenharmony_ci struct nfs4_threshold *thp; 328762306a36Sopenharmony_ci 328862306a36Sopenharmony_ci thp = kzalloc(sizeof(*thp), nfs_io_gfp_mask()); 328962306a36Sopenharmony_ci if (!thp) { 329062306a36Sopenharmony_ci dprintk("%s mdsthreshold allocation failed\n", __func__); 329162306a36Sopenharmony_ci return NULL; 329262306a36Sopenharmony_ci } 329362306a36Sopenharmony_ci return thp; 329462306a36Sopenharmony_ci} 329562306a36Sopenharmony_ci 329662306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_NFS_V4_2) 329762306a36Sopenharmony_ciint 329862306a36Sopenharmony_cipnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) 329962306a36Sopenharmony_ci{ 330062306a36Sopenharmony_ci struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 330162306a36Sopenharmony_ci struct nfs_server *server = NFS_SERVER(inode); 330262306a36Sopenharmony_ci struct nfs_inode *nfsi = NFS_I(inode); 330362306a36Sopenharmony_ci struct nfs42_layoutstat_data *data; 330462306a36Sopenharmony_ci struct pnfs_layout_hdr *hdr; 330562306a36Sopenharmony_ci int status = 0; 330662306a36Sopenharmony_ci 330762306a36Sopenharmony_ci if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) 330862306a36Sopenharmony_ci goto out; 330962306a36Sopenharmony_ci 331062306a36Sopenharmony_ci if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) 331162306a36Sopenharmony_ci goto out; 331262306a36Sopenharmony_ci 331362306a36Sopenharmony_ci if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) 331462306a36Sopenharmony_ci goto out; 331562306a36Sopenharmony_ci 331662306a36Sopenharmony_ci spin_lock(&inode->i_lock); 331762306a36Sopenharmony_ci if (!NFS_I(inode)->layout) { 331862306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 331962306a36Sopenharmony_ci goto out_clear_layoutstats; 332062306a36Sopenharmony_ci } 332162306a36Sopenharmony_ci hdr = NFS_I(inode)->layout; 332262306a36Sopenharmony_ci pnfs_get_layout_hdr(hdr); 332362306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 332462306a36Sopenharmony_ci 332562306a36Sopenharmony_ci data = kzalloc(sizeof(*data), gfp_flags); 332662306a36Sopenharmony_ci if (!data) { 332762306a36Sopenharmony_ci status = -ENOMEM; 332862306a36Sopenharmony_ci goto out_put; 332962306a36Sopenharmony_ci } 333062306a36Sopenharmony_ci 333162306a36Sopenharmony_ci data->args.fh = NFS_FH(inode); 333262306a36Sopenharmony_ci data->args.inode = inode; 333362306a36Sopenharmony_ci status = ld->prepare_layoutstats(&data->args); 333462306a36Sopenharmony_ci if (status) 333562306a36Sopenharmony_ci goto out_free; 333662306a36Sopenharmony_ci 333762306a36Sopenharmony_ci status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); 333862306a36Sopenharmony_ci 333962306a36Sopenharmony_ciout: 334062306a36Sopenharmony_ci dprintk("%s returns %d\n", __func__, status); 334162306a36Sopenharmony_ci return status; 334262306a36Sopenharmony_ci 334362306a36Sopenharmony_ciout_free: 334462306a36Sopenharmony_ci kfree(data); 334562306a36Sopenharmony_ciout_put: 334662306a36Sopenharmony_ci pnfs_put_layout_hdr(hdr); 334762306a36Sopenharmony_ciout_clear_layoutstats: 334862306a36Sopenharmony_ci smp_mb__before_atomic(); 334962306a36Sopenharmony_ci clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); 335062306a36Sopenharmony_ci smp_mb__after_atomic(); 335162306a36Sopenharmony_ci goto out; 335262306a36Sopenharmony_ci} 335362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(pnfs_report_layoutstat); 335462306a36Sopenharmony_ci#endif 335562306a36Sopenharmony_ci 335662306a36Sopenharmony_ciunsigned int layoutstats_timer; 335762306a36Sopenharmony_cimodule_param(layoutstats_timer, uint, 0644); 335862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(layoutstats_timer); 3359