162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Device operations for the pnfs nfs4 file layout driver. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (c) 2014, Primary Data, Inc. All rights reserved. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Tao Peng <bergwolf@primarydata.com> 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/nfs_fs.h> 1162306a36Sopenharmony_ci#include <linux/vmalloc.h> 1262306a36Sopenharmony_ci#include <linux/module.h> 1362306a36Sopenharmony_ci#include <linux/sunrpc/addr.h> 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#include "../internal.h" 1662306a36Sopenharmony_ci#include "../nfs4session.h" 1762306a36Sopenharmony_ci#include "flexfilelayout.h" 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#define NFSDBG_FACILITY NFSDBG_PNFS_LD 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_cistatic unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO; 2262306a36Sopenharmony_cistatic unsigned int dataserver_retrans; 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_cistatic bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_civoid nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) 2762306a36Sopenharmony_ci{ 2862306a36Sopenharmony_ci if (!IS_ERR_OR_NULL(mirror_ds)) 2962306a36Sopenharmony_ci nfs4_put_deviceid_node(&mirror_ds->id_node); 3062306a36Sopenharmony_ci} 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_civoid nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) 3362306a36Sopenharmony_ci{ 3462306a36Sopenharmony_ci nfs4_print_deviceid(&mirror_ds->id_node.deviceid); 3562306a36Sopenharmony_ci nfs4_pnfs_ds_put(mirror_ds->ds); 3662306a36Sopenharmony_ci kfree(mirror_ds->ds_versions); 3762306a36Sopenharmony_ci kfree_rcu(mirror_ds, id_node.rcu); 3862306a36Sopenharmony_ci} 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci/* Decode opaque device data and construct new_ds using it */ 4162306a36Sopenharmony_cistruct nfs4_ff_layout_ds * 4262306a36Sopenharmony_cinfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, 4362306a36Sopenharmony_ci gfp_t gfp_flags) 4462306a36Sopenharmony_ci{ 4562306a36Sopenharmony_ci struct xdr_stream stream; 4662306a36Sopenharmony_ci struct xdr_buf buf; 4762306a36Sopenharmony_ci struct page *scratch; 4862306a36Sopenharmony_ci struct list_head dsaddrs; 4962306a36Sopenharmony_ci struct nfs4_pnfs_ds_addr *da; 5062306a36Sopenharmony_ci struct nfs4_ff_layout_ds *new_ds = NULL; 5162306a36Sopenharmony_ci struct nfs4_ff_ds_version *ds_versions = NULL; 5262306a36Sopenharmony_ci u32 mp_count; 5362306a36Sopenharmony_ci u32 version_count; 5462306a36Sopenharmony_ci __be32 *p; 5562306a36Sopenharmony_ci int i, ret = -ENOMEM; 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci /* set up xdr stream */ 5862306a36Sopenharmony_ci scratch = alloc_page(gfp_flags); 5962306a36Sopenharmony_ci if (!scratch) 6062306a36Sopenharmony_ci goto out_err; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags); 6362306a36Sopenharmony_ci if (!new_ds) 6462306a36Sopenharmony_ci goto out_scratch; 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci nfs4_init_deviceid_node(&new_ds->id_node, 6762306a36Sopenharmony_ci server, 6862306a36Sopenharmony_ci &pdev->dev_id); 6962306a36Sopenharmony_ci INIT_LIST_HEAD(&dsaddrs); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); 7262306a36Sopenharmony_ci xdr_set_scratch_page(&stream, scratch); 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci /* multipath count */ 7562306a36Sopenharmony_ci p = xdr_inline_decode(&stream, 4); 7662306a36Sopenharmony_ci if (unlikely(!p)) 7762306a36Sopenharmony_ci goto out_err_drain_dsaddrs; 7862306a36Sopenharmony_ci mp_count = be32_to_cpup(p); 7962306a36Sopenharmony_ci dprintk("%s: multipath ds count %d\n", __func__, mp_count); 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci for (i = 0; i < mp_count; i++) { 8262306a36Sopenharmony_ci /* multipath ds */ 8362306a36Sopenharmony_ci da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, 8462306a36Sopenharmony_ci &stream, gfp_flags); 8562306a36Sopenharmony_ci if (da) 8662306a36Sopenharmony_ci list_add_tail(&da->da_node, &dsaddrs); 8762306a36Sopenharmony_ci } 8862306a36Sopenharmony_ci if (list_empty(&dsaddrs)) { 8962306a36Sopenharmony_ci dprintk("%s: no suitable DS addresses found\n", 9062306a36Sopenharmony_ci __func__); 9162306a36Sopenharmony_ci ret = -ENOMEDIUM; 9262306a36Sopenharmony_ci goto out_err_drain_dsaddrs; 9362306a36Sopenharmony_ci } 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci /* version count */ 9662306a36Sopenharmony_ci p = xdr_inline_decode(&stream, 4); 9762306a36Sopenharmony_ci if (unlikely(!p)) 9862306a36Sopenharmony_ci goto out_err_drain_dsaddrs; 9962306a36Sopenharmony_ci version_count = be32_to_cpup(p); 10062306a36Sopenharmony_ci dprintk("%s: version count %d\n", __func__, version_count); 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci ds_versions = kcalloc(version_count, 10362306a36Sopenharmony_ci sizeof(struct nfs4_ff_ds_version), 10462306a36Sopenharmony_ci gfp_flags); 10562306a36Sopenharmony_ci if (!ds_versions) 10662306a36Sopenharmony_ci goto out_scratch; 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci for (i = 0; i < version_count; i++) { 10962306a36Sopenharmony_ci /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) + 11062306a36Sopenharmony_ci * tightly_coupled(4) */ 11162306a36Sopenharmony_ci p = xdr_inline_decode(&stream, 20); 11262306a36Sopenharmony_ci if (unlikely(!p)) 11362306a36Sopenharmony_ci goto out_err_drain_dsaddrs; 11462306a36Sopenharmony_ci ds_versions[i].version = be32_to_cpup(p++); 11562306a36Sopenharmony_ci ds_versions[i].minor_version = be32_to_cpup(p++); 11662306a36Sopenharmony_ci ds_versions[i].rsize = nfs_io_size(be32_to_cpup(p++), 11762306a36Sopenharmony_ci server->nfs_client->cl_proto); 11862306a36Sopenharmony_ci ds_versions[i].wsize = nfs_io_size(be32_to_cpup(p++), 11962306a36Sopenharmony_ci server->nfs_client->cl_proto); 12062306a36Sopenharmony_ci ds_versions[i].tightly_coupled = be32_to_cpup(p); 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE) 12362306a36Sopenharmony_ci ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE; 12462306a36Sopenharmony_ci if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) 12562306a36Sopenharmony_ci ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci /* 12862306a36Sopenharmony_ci * check for valid major/minor combination. 12962306a36Sopenharmony_ci * currently we support dataserver which talk: 13062306a36Sopenharmony_ci * v3, v4.0, v4.1, v4.2 13162306a36Sopenharmony_ci */ 13262306a36Sopenharmony_ci if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) || 13362306a36Sopenharmony_ci (ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) { 13462306a36Sopenharmony_ci dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, 13562306a36Sopenharmony_ci i, ds_versions[i].version, 13662306a36Sopenharmony_ci ds_versions[i].minor_version); 13762306a36Sopenharmony_ci ret = -EPROTONOSUPPORT; 13862306a36Sopenharmony_ci goto out_err_drain_dsaddrs; 13962306a36Sopenharmony_ci } 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n", 14262306a36Sopenharmony_ci __func__, i, ds_versions[i].version, 14362306a36Sopenharmony_ci ds_versions[i].minor_version, 14462306a36Sopenharmony_ci ds_versions[i].rsize, 14562306a36Sopenharmony_ci ds_versions[i].wsize, 14662306a36Sopenharmony_ci ds_versions[i].tightly_coupled); 14762306a36Sopenharmony_ci } 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci new_ds->ds_versions = ds_versions; 15062306a36Sopenharmony_ci new_ds->ds_versions_cnt = version_count; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); 15362306a36Sopenharmony_ci if (!new_ds->ds) 15462306a36Sopenharmony_ci goto out_err_drain_dsaddrs; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci /* If DS was already in cache, free ds addrs */ 15762306a36Sopenharmony_ci while (!list_empty(&dsaddrs)) { 15862306a36Sopenharmony_ci da = list_first_entry(&dsaddrs, 15962306a36Sopenharmony_ci struct nfs4_pnfs_ds_addr, 16062306a36Sopenharmony_ci da_node); 16162306a36Sopenharmony_ci list_del_init(&da->da_node); 16262306a36Sopenharmony_ci kfree(da->da_remotestr); 16362306a36Sopenharmony_ci kfree(da); 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci __free_page(scratch); 16762306a36Sopenharmony_ci return new_ds; 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ciout_err_drain_dsaddrs: 17062306a36Sopenharmony_ci while (!list_empty(&dsaddrs)) { 17162306a36Sopenharmony_ci da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, 17262306a36Sopenharmony_ci da_node); 17362306a36Sopenharmony_ci list_del_init(&da->da_node); 17462306a36Sopenharmony_ci kfree(da->da_remotestr); 17562306a36Sopenharmony_ci kfree(da); 17662306a36Sopenharmony_ci } 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci kfree(ds_versions); 17962306a36Sopenharmony_ciout_scratch: 18062306a36Sopenharmony_ci __free_page(scratch); 18162306a36Sopenharmony_ciout_err: 18262306a36Sopenharmony_ci kfree(new_ds); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci dprintk("%s ERROR: returning %d\n", __func__, ret); 18562306a36Sopenharmony_ci return NULL; 18662306a36Sopenharmony_ci} 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_cistatic void extend_ds_error(struct nfs4_ff_layout_ds_err *err, 18962306a36Sopenharmony_ci u64 offset, u64 length) 19062306a36Sopenharmony_ci{ 19162306a36Sopenharmony_ci u64 end; 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci end = max_t(u64, pnfs_end_offset(err->offset, err->length), 19462306a36Sopenharmony_ci pnfs_end_offset(offset, length)); 19562306a36Sopenharmony_ci err->offset = min_t(u64, err->offset, offset); 19662306a36Sopenharmony_ci err->length = end - err->offset; 19762306a36Sopenharmony_ci} 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_cistatic int 20062306a36Sopenharmony_ciff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1, 20162306a36Sopenharmony_ci const struct nfs4_ff_layout_ds_err *e2) 20262306a36Sopenharmony_ci{ 20362306a36Sopenharmony_ci int ret; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci if (e1->opnum != e2->opnum) 20662306a36Sopenharmony_ci return e1->opnum < e2->opnum ? -1 : 1; 20762306a36Sopenharmony_ci if (e1->status != e2->status) 20862306a36Sopenharmony_ci return e1->status < e2->status ? -1 : 1; 20962306a36Sopenharmony_ci ret = memcmp(e1->stateid.data, e2->stateid.data, 21062306a36Sopenharmony_ci sizeof(e1->stateid.data)); 21162306a36Sopenharmony_ci if (ret != 0) 21262306a36Sopenharmony_ci return ret; 21362306a36Sopenharmony_ci ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid)); 21462306a36Sopenharmony_ci if (ret != 0) 21562306a36Sopenharmony_ci return ret; 21662306a36Sopenharmony_ci if (pnfs_end_offset(e1->offset, e1->length) < e2->offset) 21762306a36Sopenharmony_ci return -1; 21862306a36Sopenharmony_ci if (e1->offset > pnfs_end_offset(e2->offset, e2->length)) 21962306a36Sopenharmony_ci return 1; 22062306a36Sopenharmony_ci /* If ranges overlap or are contiguous, they are the same */ 22162306a36Sopenharmony_ci return 0; 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cistatic void 22562306a36Sopenharmony_ciff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, 22662306a36Sopenharmony_ci struct nfs4_ff_layout_ds_err *dserr) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci struct nfs4_ff_layout_ds_err *err, *tmp; 22962306a36Sopenharmony_ci struct list_head *head = &flo->error_list; 23062306a36Sopenharmony_ci int match; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci /* Do insertion sort w/ merges */ 23362306a36Sopenharmony_ci list_for_each_entry_safe(err, tmp, &flo->error_list, list) { 23462306a36Sopenharmony_ci match = ff_ds_error_match(err, dserr); 23562306a36Sopenharmony_ci if (match < 0) 23662306a36Sopenharmony_ci continue; 23762306a36Sopenharmony_ci if (match > 0) { 23862306a36Sopenharmony_ci /* Add entry "dserr" _before_ entry "err" */ 23962306a36Sopenharmony_ci head = &err->list; 24062306a36Sopenharmony_ci break; 24162306a36Sopenharmony_ci } 24262306a36Sopenharmony_ci /* Entries match, so merge "err" into "dserr" */ 24362306a36Sopenharmony_ci extend_ds_error(dserr, err->offset, err->length); 24462306a36Sopenharmony_ci list_replace(&err->list, &dserr->list); 24562306a36Sopenharmony_ci kfree(err); 24662306a36Sopenharmony_ci return; 24762306a36Sopenharmony_ci } 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci list_add_tail(&dserr->list, head); 25062306a36Sopenharmony_ci} 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ciint ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, 25362306a36Sopenharmony_ci struct nfs4_ff_layout_mirror *mirror, u64 offset, 25462306a36Sopenharmony_ci u64 length, int status, enum nfs_opnum4 opnum, 25562306a36Sopenharmony_ci gfp_t gfp_flags) 25662306a36Sopenharmony_ci{ 25762306a36Sopenharmony_ci struct nfs4_ff_layout_ds_err *dserr; 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci if (status == 0) 26062306a36Sopenharmony_ci return 0; 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci if (IS_ERR_OR_NULL(mirror->mirror_ds)) 26362306a36Sopenharmony_ci return -EINVAL; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci dserr = kmalloc(sizeof(*dserr), gfp_flags); 26662306a36Sopenharmony_ci if (!dserr) 26762306a36Sopenharmony_ci return -ENOMEM; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci INIT_LIST_HEAD(&dserr->list); 27062306a36Sopenharmony_ci dserr->offset = offset; 27162306a36Sopenharmony_ci dserr->length = length; 27262306a36Sopenharmony_ci dserr->status = status; 27362306a36Sopenharmony_ci dserr->opnum = opnum; 27462306a36Sopenharmony_ci nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); 27562306a36Sopenharmony_ci memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, 27662306a36Sopenharmony_ci NFS4_DEVICEID4_SIZE); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci spin_lock(&flo->generic_hdr.plh_inode->i_lock); 27962306a36Sopenharmony_ci ff_layout_add_ds_error_locked(flo, dserr); 28062306a36Sopenharmony_ci spin_unlock(&flo->generic_hdr.plh_inode->i_lock); 28162306a36Sopenharmony_ci return 0; 28262306a36Sopenharmony_ci} 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_cistatic const struct cred * 28562306a36Sopenharmony_ciff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci const struct cred *cred, __rcu **pcred; 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci if (iomode == IOMODE_READ) 29062306a36Sopenharmony_ci pcred = &mirror->ro_cred; 29162306a36Sopenharmony_ci else 29262306a36Sopenharmony_ci pcred = &mirror->rw_cred; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci rcu_read_lock(); 29562306a36Sopenharmony_ci do { 29662306a36Sopenharmony_ci cred = rcu_dereference(*pcred); 29762306a36Sopenharmony_ci if (!cred) 29862306a36Sopenharmony_ci break; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci cred = get_cred_rcu(cred); 30162306a36Sopenharmony_ci } while(!cred); 30262306a36Sopenharmony_ci rcu_read_unlock(); 30362306a36Sopenharmony_ci return cred; 30462306a36Sopenharmony_ci} 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_cistruct nfs_fh * 30762306a36Sopenharmony_cinfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror) 30862306a36Sopenharmony_ci{ 30962306a36Sopenharmony_ci /* FIXME: For now assume there is only 1 version available for the DS */ 31062306a36Sopenharmony_ci return &mirror->fh_versions[0]; 31162306a36Sopenharmony_ci} 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_civoid 31462306a36Sopenharmony_cinfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror, 31562306a36Sopenharmony_ci nfs4_stateid *stateid) 31662306a36Sopenharmony_ci{ 31762306a36Sopenharmony_ci if (nfs4_ff_layout_ds_version(mirror) == 4) 31862306a36Sopenharmony_ci nfs4_stateid_copy(stateid, &mirror->stateid); 31962306a36Sopenharmony_ci} 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_cistatic bool 32262306a36Sopenharmony_ciff_layout_init_mirror_ds(struct pnfs_layout_hdr *lo, 32362306a36Sopenharmony_ci struct nfs4_ff_layout_mirror *mirror) 32462306a36Sopenharmony_ci{ 32562306a36Sopenharmony_ci if (mirror == NULL) 32662306a36Sopenharmony_ci goto outerr; 32762306a36Sopenharmony_ci if (mirror->mirror_ds == NULL) { 32862306a36Sopenharmony_ci struct nfs4_deviceid_node *node; 32962306a36Sopenharmony_ci struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci node = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), 33262306a36Sopenharmony_ci &mirror->devid, lo->plh_lc_cred, 33362306a36Sopenharmony_ci GFP_KERNEL); 33462306a36Sopenharmony_ci if (node) 33562306a36Sopenharmony_ci mirror_ds = FF_LAYOUT_MIRROR_DS(node); 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci /* check for race with another call to this function */ 33862306a36Sopenharmony_ci if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && 33962306a36Sopenharmony_ci mirror_ds != ERR_PTR(-ENODEV)) 34062306a36Sopenharmony_ci nfs4_put_deviceid_node(node); 34162306a36Sopenharmony_ci } 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci if (IS_ERR(mirror->mirror_ds)) 34462306a36Sopenharmony_ci goto outerr; 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci return true; 34762306a36Sopenharmony_ciouterr: 34862306a36Sopenharmony_ci return false; 34962306a36Sopenharmony_ci} 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci/** 35262306a36Sopenharmony_ci * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call 35362306a36Sopenharmony_ci * @lseg: the layout segment we're operating on 35462306a36Sopenharmony_ci * @mirror: layout mirror describing the DS to use 35562306a36Sopenharmony_ci * @fail_return: return layout on connect failure? 35662306a36Sopenharmony_ci * 35762306a36Sopenharmony_ci * Try to prepare a DS connection to accept an RPC call. This involves 35862306a36Sopenharmony_ci * selecting a mirror to use and connecting the client to it if it's not 35962306a36Sopenharmony_ci * already connected. 36062306a36Sopenharmony_ci * 36162306a36Sopenharmony_ci * Since we only need a single functioning mirror to satisfy a read, we don't 36262306a36Sopenharmony_ci * want to return the layout if there is one. For writes though, any down 36362306a36Sopenharmony_ci * mirror should result in a LAYOUTRETURN. @fail_return is how we distinguish 36462306a36Sopenharmony_ci * between the two cases. 36562306a36Sopenharmony_ci * 36662306a36Sopenharmony_ci * Returns a pointer to a connected DS object on success or NULL on failure. 36762306a36Sopenharmony_ci */ 36862306a36Sopenharmony_cistruct nfs4_pnfs_ds * 36962306a36Sopenharmony_cinfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, 37062306a36Sopenharmony_ci struct nfs4_ff_layout_mirror *mirror, 37162306a36Sopenharmony_ci bool fail_return) 37262306a36Sopenharmony_ci{ 37362306a36Sopenharmony_ci struct nfs4_pnfs_ds *ds = NULL; 37462306a36Sopenharmony_ci struct inode *ino = lseg->pls_layout->plh_inode; 37562306a36Sopenharmony_ci struct nfs_server *s = NFS_SERVER(ino); 37662306a36Sopenharmony_ci unsigned int max_payload; 37762306a36Sopenharmony_ci int status; 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror)) 38062306a36Sopenharmony_ci goto noconnect; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci ds = mirror->mirror_ds->ds; 38362306a36Sopenharmony_ci if (READ_ONCE(ds->ds_clp)) 38462306a36Sopenharmony_ci goto out; 38562306a36Sopenharmony_ci /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ 38662306a36Sopenharmony_ci smp_rmb(); 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci /* FIXME: For now we assume the server sent only one version of NFS 38962306a36Sopenharmony_ci * to use for the DS. 39062306a36Sopenharmony_ci */ 39162306a36Sopenharmony_ci status = nfs4_pnfs_ds_connect(s, ds, &mirror->mirror_ds->id_node, 39262306a36Sopenharmony_ci dataserver_timeo, dataserver_retrans, 39362306a36Sopenharmony_ci mirror->mirror_ds->ds_versions[0].version, 39462306a36Sopenharmony_ci mirror->mirror_ds->ds_versions[0].minor_version); 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci /* connect success, check rsize/wsize limit */ 39762306a36Sopenharmony_ci if (!status) { 39862306a36Sopenharmony_ci max_payload = 39962306a36Sopenharmony_ci nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), 40062306a36Sopenharmony_ci NULL); 40162306a36Sopenharmony_ci if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) 40262306a36Sopenharmony_ci mirror->mirror_ds->ds_versions[0].rsize = max_payload; 40362306a36Sopenharmony_ci if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) 40462306a36Sopenharmony_ci mirror->mirror_ds->ds_versions[0].wsize = max_payload; 40562306a36Sopenharmony_ci goto out; 40662306a36Sopenharmony_ci } 40762306a36Sopenharmony_cinoconnect: 40862306a36Sopenharmony_ci ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 40962306a36Sopenharmony_ci mirror, lseg->pls_range.offset, 41062306a36Sopenharmony_ci lseg->pls_range.length, NFS4ERR_NXIO, 41162306a36Sopenharmony_ci OP_ILLEGAL, GFP_NOIO); 41262306a36Sopenharmony_ci ff_layout_send_layouterror(lseg); 41362306a36Sopenharmony_ci if (fail_return || !ff_layout_has_available_ds(lseg)) 41462306a36Sopenharmony_ci pnfs_error_mark_layout_for_return(ino, lseg); 41562306a36Sopenharmony_ci ds = NULL; 41662306a36Sopenharmony_ciout: 41762306a36Sopenharmony_ci return ds; 41862306a36Sopenharmony_ci} 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_ciconst struct cred * 42162306a36Sopenharmony_ciff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror, 42262306a36Sopenharmony_ci const struct pnfs_layout_range *range, 42362306a36Sopenharmony_ci const struct cred *mdscred) 42462306a36Sopenharmony_ci{ 42562306a36Sopenharmony_ci const struct cred *cred; 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) { 42862306a36Sopenharmony_ci cred = ff_layout_get_mirror_cred(mirror, range->iomode); 42962306a36Sopenharmony_ci if (!cred) 43062306a36Sopenharmony_ci cred = get_cred(mdscred); 43162306a36Sopenharmony_ci } else { 43262306a36Sopenharmony_ci cred = get_cred(mdscred); 43362306a36Sopenharmony_ci } 43462306a36Sopenharmony_ci return cred; 43562306a36Sopenharmony_ci} 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci/** 43862306a36Sopenharmony_ci * nfs4_ff_find_or_create_ds_client - Find or create a DS rpc client 43962306a36Sopenharmony_ci * @mirror: pointer to the mirror 44062306a36Sopenharmony_ci * @ds_clp: nfs_client for the DS 44162306a36Sopenharmony_ci * @inode: pointer to inode 44262306a36Sopenharmony_ci * 44362306a36Sopenharmony_ci * Find or create a DS rpc client with th MDS server rpc client auth flavor 44462306a36Sopenharmony_ci * in the nfs_client cl_ds_clients list. 44562306a36Sopenharmony_ci */ 44662306a36Sopenharmony_cistruct rpc_clnt * 44762306a36Sopenharmony_cinfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror, 44862306a36Sopenharmony_ci struct nfs_client *ds_clp, struct inode *inode) 44962306a36Sopenharmony_ci{ 45062306a36Sopenharmony_ci switch (mirror->mirror_ds->ds_versions[0].version) { 45162306a36Sopenharmony_ci case 3: 45262306a36Sopenharmony_ci /* For NFSv3 DS, flavor is set when creating DS connections */ 45362306a36Sopenharmony_ci return ds_clp->cl_rpcclient; 45462306a36Sopenharmony_ci case 4: 45562306a36Sopenharmony_ci return nfs4_find_or_create_ds_client(ds_clp, inode); 45662306a36Sopenharmony_ci default: 45762306a36Sopenharmony_ci BUG(); 45862306a36Sopenharmony_ci } 45962306a36Sopenharmony_ci} 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_civoid ff_layout_free_ds_ioerr(struct list_head *head) 46262306a36Sopenharmony_ci{ 46362306a36Sopenharmony_ci struct nfs4_ff_layout_ds_err *err; 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci while (!list_empty(head)) { 46662306a36Sopenharmony_ci err = list_first_entry(head, 46762306a36Sopenharmony_ci struct nfs4_ff_layout_ds_err, 46862306a36Sopenharmony_ci list); 46962306a36Sopenharmony_ci list_del(&err->list); 47062306a36Sopenharmony_ci kfree(err); 47162306a36Sopenharmony_ci } 47262306a36Sopenharmony_ci} 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci/* called with inode i_lock held */ 47562306a36Sopenharmony_ciint ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head) 47662306a36Sopenharmony_ci{ 47762306a36Sopenharmony_ci struct nfs4_ff_layout_ds_err *err; 47862306a36Sopenharmony_ci __be32 *p; 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci list_for_each_entry(err, head, list) { 48162306a36Sopenharmony_ci /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) 48262306a36Sopenharmony_ci * + array length + deviceid(NFS4_DEVICEID4_SIZE) 48362306a36Sopenharmony_ci * + status(4) + opnum(4) 48462306a36Sopenharmony_ci */ 48562306a36Sopenharmony_ci p = xdr_reserve_space(xdr, 48662306a36Sopenharmony_ci 28 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); 48762306a36Sopenharmony_ci if (unlikely(!p)) 48862306a36Sopenharmony_ci return -ENOBUFS; 48962306a36Sopenharmony_ci p = xdr_encode_hyper(p, err->offset); 49062306a36Sopenharmony_ci p = xdr_encode_hyper(p, err->length); 49162306a36Sopenharmony_ci p = xdr_encode_opaque_fixed(p, &err->stateid, 49262306a36Sopenharmony_ci NFS4_STATEID_SIZE); 49362306a36Sopenharmony_ci /* Encode 1 error */ 49462306a36Sopenharmony_ci *p++ = cpu_to_be32(1); 49562306a36Sopenharmony_ci p = xdr_encode_opaque_fixed(p, &err->deviceid, 49662306a36Sopenharmony_ci NFS4_DEVICEID4_SIZE); 49762306a36Sopenharmony_ci *p++ = cpu_to_be32(err->status); 49862306a36Sopenharmony_ci *p++ = cpu_to_be32(err->opnum); 49962306a36Sopenharmony_ci dprintk("%s: offset %llu length %llu status %d op %d\n", 50062306a36Sopenharmony_ci __func__, err->offset, err->length, err->status, 50162306a36Sopenharmony_ci err->opnum); 50262306a36Sopenharmony_ci } 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci return 0; 50562306a36Sopenharmony_ci} 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_cistatic 50862306a36Sopenharmony_ciunsigned int do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, 50962306a36Sopenharmony_ci const struct pnfs_layout_range *range, 51062306a36Sopenharmony_ci struct list_head *head, 51162306a36Sopenharmony_ci unsigned int maxnum) 51262306a36Sopenharmony_ci{ 51362306a36Sopenharmony_ci struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); 51462306a36Sopenharmony_ci struct inode *inode = lo->plh_inode; 51562306a36Sopenharmony_ci struct nfs4_ff_layout_ds_err *err, *n; 51662306a36Sopenharmony_ci unsigned int ret = 0; 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci spin_lock(&inode->i_lock); 51962306a36Sopenharmony_ci list_for_each_entry_safe(err, n, &flo->error_list, list) { 52062306a36Sopenharmony_ci if (!pnfs_is_range_intersecting(err->offset, 52162306a36Sopenharmony_ci pnfs_end_offset(err->offset, err->length), 52262306a36Sopenharmony_ci range->offset, 52362306a36Sopenharmony_ci pnfs_end_offset(range->offset, range->length))) 52462306a36Sopenharmony_ci continue; 52562306a36Sopenharmony_ci if (!maxnum) 52662306a36Sopenharmony_ci break; 52762306a36Sopenharmony_ci list_move(&err->list, head); 52862306a36Sopenharmony_ci maxnum--; 52962306a36Sopenharmony_ci ret++; 53062306a36Sopenharmony_ci } 53162306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 53262306a36Sopenharmony_ci return ret; 53362306a36Sopenharmony_ci} 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ciunsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, 53662306a36Sopenharmony_ci const struct pnfs_layout_range *range, 53762306a36Sopenharmony_ci struct list_head *head, 53862306a36Sopenharmony_ci unsigned int maxnum) 53962306a36Sopenharmony_ci{ 54062306a36Sopenharmony_ci unsigned int ret; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci ret = do_layout_fetch_ds_ioerr(lo, range, head, maxnum); 54362306a36Sopenharmony_ci /* If we're over the max, discard all remaining entries */ 54462306a36Sopenharmony_ci if (ret == maxnum) { 54562306a36Sopenharmony_ci LIST_HEAD(discard); 54662306a36Sopenharmony_ci do_layout_fetch_ds_ioerr(lo, range, &discard, -1); 54762306a36Sopenharmony_ci ff_layout_free_ds_ioerr(&discard); 54862306a36Sopenharmony_ci } 54962306a36Sopenharmony_ci return ret; 55062306a36Sopenharmony_ci} 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_cistatic bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) 55362306a36Sopenharmony_ci{ 55462306a36Sopenharmony_ci struct nfs4_ff_layout_mirror *mirror; 55562306a36Sopenharmony_ci struct nfs4_deviceid_node *devid; 55662306a36Sopenharmony_ci u32 idx; 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { 55962306a36Sopenharmony_ci mirror = FF_LAYOUT_COMP(lseg, idx); 56062306a36Sopenharmony_ci if (mirror) { 56162306a36Sopenharmony_ci if (!mirror->mirror_ds) 56262306a36Sopenharmony_ci return true; 56362306a36Sopenharmony_ci if (IS_ERR(mirror->mirror_ds)) 56462306a36Sopenharmony_ci continue; 56562306a36Sopenharmony_ci devid = &mirror->mirror_ds->id_node; 56662306a36Sopenharmony_ci if (!nfs4_test_deviceid_unavailable(devid)) 56762306a36Sopenharmony_ci return true; 56862306a36Sopenharmony_ci } 56962306a36Sopenharmony_ci } 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci return false; 57262306a36Sopenharmony_ci} 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_cistatic bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) 57562306a36Sopenharmony_ci{ 57662306a36Sopenharmony_ci struct nfs4_ff_layout_mirror *mirror; 57762306a36Sopenharmony_ci struct nfs4_deviceid_node *devid; 57862306a36Sopenharmony_ci u32 idx; 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { 58162306a36Sopenharmony_ci mirror = FF_LAYOUT_COMP(lseg, idx); 58262306a36Sopenharmony_ci if (!mirror || IS_ERR(mirror->mirror_ds)) 58362306a36Sopenharmony_ci return false; 58462306a36Sopenharmony_ci if (!mirror->mirror_ds) 58562306a36Sopenharmony_ci continue; 58662306a36Sopenharmony_ci devid = &mirror->mirror_ds->id_node; 58762306a36Sopenharmony_ci if (nfs4_test_deviceid_unavailable(devid)) 58862306a36Sopenharmony_ci return false; 58962306a36Sopenharmony_ci } 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci return FF_LAYOUT_MIRROR_COUNT(lseg) != 0; 59262306a36Sopenharmony_ci} 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_cistatic bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) 59562306a36Sopenharmony_ci{ 59662306a36Sopenharmony_ci if (lseg->pls_range.iomode == IOMODE_READ) 59762306a36Sopenharmony_ci return ff_read_layout_has_available_ds(lseg); 59862306a36Sopenharmony_ci /* Note: RW layout needs all mirrors available */ 59962306a36Sopenharmony_ci return ff_rw_layout_has_available_ds(lseg); 60062306a36Sopenharmony_ci} 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_cibool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg) 60362306a36Sopenharmony_ci{ 60462306a36Sopenharmony_ci return ff_layout_no_fallback_to_mds(lseg) || 60562306a36Sopenharmony_ci ff_layout_has_available_ds(lseg); 60662306a36Sopenharmony_ci} 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_cibool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg) 60962306a36Sopenharmony_ci{ 61062306a36Sopenharmony_ci return lseg->pls_range.iomode == IOMODE_RW && 61162306a36Sopenharmony_ci ff_layout_no_read_on_rw(lseg); 61262306a36Sopenharmony_ci} 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_cimodule_param(dataserver_retrans, uint, 0644); 61562306a36Sopenharmony_ciMODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " 61662306a36Sopenharmony_ci "retries a request before it attempts further " 61762306a36Sopenharmony_ci " recovery action."); 61862306a36Sopenharmony_cimodule_param(dataserver_timeo, uint, 0644); 61962306a36Sopenharmony_ciMODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " 62062306a36Sopenharmony_ci "NFSv4.1 client waits for a response from a " 62162306a36Sopenharmony_ci " data server before it retries an NFS request."); 622