18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Device operations for the pnfs nfs4 file layout driver. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (c) 2014, Primary Data, Inc. All rights reserved. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Tao Peng <bergwolf@primarydata.com> 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include <linux/nfs_fs.h> 118c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 128c2ecf20Sopenharmony_ci#include <linux/module.h> 138c2ecf20Sopenharmony_ci#include <linux/sunrpc/addr.h> 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci#include "../internal.h" 168c2ecf20Sopenharmony_ci#include "../nfs4session.h" 178c2ecf20Sopenharmony_ci#include "flexfilelayout.h" 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#define NFSDBG_FACILITY NFSDBG_PNFS_LD 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_cistatic unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO; 228c2ecf20Sopenharmony_cistatic unsigned int dataserver_retrans; 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_cistatic bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_civoid nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) 278c2ecf20Sopenharmony_ci{ 288c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(mirror_ds)) 298c2ecf20Sopenharmony_ci nfs4_put_deviceid_node(&mirror_ds->id_node); 308c2ecf20Sopenharmony_ci} 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_civoid nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) 338c2ecf20Sopenharmony_ci{ 348c2ecf20Sopenharmony_ci nfs4_print_deviceid(&mirror_ds->id_node.deviceid); 358c2ecf20Sopenharmony_ci nfs4_pnfs_ds_put(mirror_ds->ds); 368c2ecf20Sopenharmony_ci kfree(mirror_ds->ds_versions); 378c2ecf20Sopenharmony_ci kfree_rcu(mirror_ds, id_node.rcu); 388c2ecf20Sopenharmony_ci} 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci/* Decode opaque device data and construct new_ds using it */ 418c2ecf20Sopenharmony_cistruct nfs4_ff_layout_ds * 428c2ecf20Sopenharmony_cinfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, 438c2ecf20Sopenharmony_ci gfp_t gfp_flags) 448c2ecf20Sopenharmony_ci{ 458c2ecf20Sopenharmony_ci struct xdr_stream stream; 468c2ecf20Sopenharmony_ci struct xdr_buf buf; 478c2ecf20Sopenharmony_ci struct page *scratch; 488c2ecf20Sopenharmony_ci struct list_head dsaddrs; 498c2ecf20Sopenharmony_ci struct nfs4_pnfs_ds_addr *da; 508c2ecf20Sopenharmony_ci struct nfs4_ff_layout_ds *new_ds = NULL; 518c2ecf20Sopenharmony_ci struct nfs4_ff_ds_version *ds_versions = NULL; 528c2ecf20Sopenharmony_ci u32 mp_count; 538c2ecf20Sopenharmony_ci u32 version_count; 548c2ecf20Sopenharmony_ci __be32 *p; 558c2ecf20Sopenharmony_ci int i, ret = -ENOMEM; 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci /* set up xdr stream */ 588c2ecf20Sopenharmony_ci scratch = alloc_page(gfp_flags); 598c2ecf20Sopenharmony_ci if (!scratch) 608c2ecf20Sopenharmony_ci goto out_err; 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags); 638c2ecf20Sopenharmony_ci if (!new_ds) 648c2ecf20Sopenharmony_ci goto out_scratch; 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci nfs4_init_deviceid_node(&new_ds->id_node, 678c2ecf20Sopenharmony_ci server, 688c2ecf20Sopenharmony_ci &pdev->dev_id); 698c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&dsaddrs); 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); 728c2ecf20Sopenharmony_ci xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci /* multipath count */ 758c2ecf20Sopenharmony_ci p = xdr_inline_decode(&stream, 4); 768c2ecf20Sopenharmony_ci if (unlikely(!p)) 778c2ecf20Sopenharmony_ci goto out_err_drain_dsaddrs; 788c2ecf20Sopenharmony_ci mp_count = be32_to_cpup(p); 798c2ecf20Sopenharmony_ci dprintk("%s: multipath ds count %d\n", __func__, mp_count); 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci for (i = 0; i < mp_count; i++) { 828c2ecf20Sopenharmony_ci /* multipath ds */ 838c2ecf20Sopenharmony_ci da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, 848c2ecf20Sopenharmony_ci &stream, gfp_flags); 858c2ecf20Sopenharmony_ci if (da) 868c2ecf20Sopenharmony_ci list_add_tail(&da->da_node, &dsaddrs); 878c2ecf20Sopenharmony_ci } 888c2ecf20Sopenharmony_ci if (list_empty(&dsaddrs)) { 898c2ecf20Sopenharmony_ci dprintk("%s: no suitable DS addresses found\n", 908c2ecf20Sopenharmony_ci __func__); 918c2ecf20Sopenharmony_ci ret = -ENOMEDIUM; 928c2ecf20Sopenharmony_ci goto out_err_drain_dsaddrs; 938c2ecf20Sopenharmony_ci } 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci /* version count */ 968c2ecf20Sopenharmony_ci p = xdr_inline_decode(&stream, 4); 978c2ecf20Sopenharmony_ci if (unlikely(!p)) 988c2ecf20Sopenharmony_ci goto out_err_drain_dsaddrs; 998c2ecf20Sopenharmony_ci version_count = be32_to_cpup(p); 1008c2ecf20Sopenharmony_ci dprintk("%s: version count %d\n", __func__, version_count); 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci ds_versions = kcalloc(version_count, 1038c2ecf20Sopenharmony_ci sizeof(struct nfs4_ff_ds_version), 1048c2ecf20Sopenharmony_ci gfp_flags); 1058c2ecf20Sopenharmony_ci if (!ds_versions) 1068c2ecf20Sopenharmony_ci goto out_scratch; 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci for (i = 0; i < version_count; i++) { 1098c2ecf20Sopenharmony_ci /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) + 1108c2ecf20Sopenharmony_ci * tightly_coupled(4) */ 1118c2ecf20Sopenharmony_ci p = xdr_inline_decode(&stream, 20); 1128c2ecf20Sopenharmony_ci if (unlikely(!p)) 1138c2ecf20Sopenharmony_ci goto out_err_drain_dsaddrs; 1148c2ecf20Sopenharmony_ci ds_versions[i].version = be32_to_cpup(p++); 1158c2ecf20Sopenharmony_ci ds_versions[i].minor_version = be32_to_cpup(p++); 1168c2ecf20Sopenharmony_ci ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL); 1178c2ecf20Sopenharmony_ci ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL); 1188c2ecf20Sopenharmony_ci ds_versions[i].tightly_coupled = be32_to_cpup(p); 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE) 1218c2ecf20Sopenharmony_ci ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE; 1228c2ecf20Sopenharmony_ci if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) 1238c2ecf20Sopenharmony_ci ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci /* 1268c2ecf20Sopenharmony_ci * check for valid major/minor combination. 1278c2ecf20Sopenharmony_ci * currently we support dataserver which talk: 1288c2ecf20Sopenharmony_ci * v3, v4.0, v4.1, v4.2 1298c2ecf20Sopenharmony_ci */ 1308c2ecf20Sopenharmony_ci if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) || 1318c2ecf20Sopenharmony_ci (ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) { 1328c2ecf20Sopenharmony_ci dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, 1338c2ecf20Sopenharmony_ci i, ds_versions[i].version, 1348c2ecf20Sopenharmony_ci ds_versions[i].minor_version); 1358c2ecf20Sopenharmony_ci ret = -EPROTONOSUPPORT; 1368c2ecf20Sopenharmony_ci goto out_err_drain_dsaddrs; 1378c2ecf20Sopenharmony_ci } 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n", 1408c2ecf20Sopenharmony_ci __func__, i, ds_versions[i].version, 1418c2ecf20Sopenharmony_ci ds_versions[i].minor_version, 1428c2ecf20Sopenharmony_ci ds_versions[i].rsize, 1438c2ecf20Sopenharmony_ci ds_versions[i].wsize, 1448c2ecf20Sopenharmony_ci ds_versions[i].tightly_coupled); 1458c2ecf20Sopenharmony_ci } 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci new_ds->ds_versions = ds_versions; 1488c2ecf20Sopenharmony_ci new_ds->ds_versions_cnt = version_count; 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); 1518c2ecf20Sopenharmony_ci if (!new_ds->ds) 1528c2ecf20Sopenharmony_ci goto out_err_drain_dsaddrs; 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci /* If DS was already in cache, free ds addrs */ 1558c2ecf20Sopenharmony_ci while (!list_empty(&dsaddrs)) { 1568c2ecf20Sopenharmony_ci da = list_first_entry(&dsaddrs, 1578c2ecf20Sopenharmony_ci struct nfs4_pnfs_ds_addr, 1588c2ecf20Sopenharmony_ci da_node); 1598c2ecf20Sopenharmony_ci list_del_init(&da->da_node); 1608c2ecf20Sopenharmony_ci kfree(da->da_remotestr); 1618c2ecf20Sopenharmony_ci kfree(da); 1628c2ecf20Sopenharmony_ci } 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci __free_page(scratch); 1658c2ecf20Sopenharmony_ci return new_ds; 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ciout_err_drain_dsaddrs: 1688c2ecf20Sopenharmony_ci while (!list_empty(&dsaddrs)) { 1698c2ecf20Sopenharmony_ci da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, 1708c2ecf20Sopenharmony_ci da_node); 1718c2ecf20Sopenharmony_ci list_del_init(&da->da_node); 1728c2ecf20Sopenharmony_ci kfree(da->da_remotestr); 1738c2ecf20Sopenharmony_ci kfree(da); 1748c2ecf20Sopenharmony_ci } 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci kfree(ds_versions); 1778c2ecf20Sopenharmony_ciout_scratch: 1788c2ecf20Sopenharmony_ci __free_page(scratch); 1798c2ecf20Sopenharmony_ciout_err: 1808c2ecf20Sopenharmony_ci kfree(new_ds); 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci dprintk("%s ERROR: returning %d\n", __func__, ret); 1838c2ecf20Sopenharmony_ci return NULL; 1848c2ecf20Sopenharmony_ci} 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_cistatic void extend_ds_error(struct nfs4_ff_layout_ds_err *err, 1878c2ecf20Sopenharmony_ci u64 offset, u64 length) 1888c2ecf20Sopenharmony_ci{ 1898c2ecf20Sopenharmony_ci u64 end; 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci end = max_t(u64, pnfs_end_offset(err->offset, err->length), 1928c2ecf20Sopenharmony_ci pnfs_end_offset(offset, length)); 1938c2ecf20Sopenharmony_ci err->offset = min_t(u64, err->offset, offset); 1948c2ecf20Sopenharmony_ci err->length = end - err->offset; 1958c2ecf20Sopenharmony_ci} 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_cistatic int 1988c2ecf20Sopenharmony_ciff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1, 1998c2ecf20Sopenharmony_ci const struct nfs4_ff_layout_ds_err *e2) 2008c2ecf20Sopenharmony_ci{ 2018c2ecf20Sopenharmony_ci int ret; 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci if (e1->opnum != e2->opnum) 2048c2ecf20Sopenharmony_ci return e1->opnum < e2->opnum ? -1 : 1; 2058c2ecf20Sopenharmony_ci if (e1->status != e2->status) 2068c2ecf20Sopenharmony_ci return e1->status < e2->status ? -1 : 1; 2078c2ecf20Sopenharmony_ci ret = memcmp(e1->stateid.data, e2->stateid.data, 2088c2ecf20Sopenharmony_ci sizeof(e1->stateid.data)); 2098c2ecf20Sopenharmony_ci if (ret != 0) 2108c2ecf20Sopenharmony_ci return ret; 2118c2ecf20Sopenharmony_ci ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid)); 2128c2ecf20Sopenharmony_ci if (ret != 0) 2138c2ecf20Sopenharmony_ci return ret; 2148c2ecf20Sopenharmony_ci if (pnfs_end_offset(e1->offset, e1->length) < e2->offset) 2158c2ecf20Sopenharmony_ci return -1; 2168c2ecf20Sopenharmony_ci if (e1->offset > pnfs_end_offset(e2->offset, e2->length)) 2178c2ecf20Sopenharmony_ci return 1; 2188c2ecf20Sopenharmony_ci /* If ranges overlap or are contiguous, they are the same */ 2198c2ecf20Sopenharmony_ci return 0; 2208c2ecf20Sopenharmony_ci} 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_cistatic void 2238c2ecf20Sopenharmony_ciff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, 2248c2ecf20Sopenharmony_ci struct nfs4_ff_layout_ds_err *dserr) 2258c2ecf20Sopenharmony_ci{ 2268c2ecf20Sopenharmony_ci struct nfs4_ff_layout_ds_err *err, *tmp; 2278c2ecf20Sopenharmony_ci struct list_head *head = &flo->error_list; 2288c2ecf20Sopenharmony_ci int match; 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci /* Do insertion sort w/ merges */ 2318c2ecf20Sopenharmony_ci list_for_each_entry_safe(err, tmp, &flo->error_list, list) { 2328c2ecf20Sopenharmony_ci match = ff_ds_error_match(err, dserr); 2338c2ecf20Sopenharmony_ci if (match < 0) 2348c2ecf20Sopenharmony_ci continue; 2358c2ecf20Sopenharmony_ci if (match > 0) { 2368c2ecf20Sopenharmony_ci /* Add entry "dserr" _before_ entry "err" */ 2378c2ecf20Sopenharmony_ci head = &err->list; 2388c2ecf20Sopenharmony_ci break; 2398c2ecf20Sopenharmony_ci } 2408c2ecf20Sopenharmony_ci /* Entries match, so merge "err" into "dserr" */ 2418c2ecf20Sopenharmony_ci extend_ds_error(dserr, err->offset, err->length); 2428c2ecf20Sopenharmony_ci list_replace(&err->list, &dserr->list); 2438c2ecf20Sopenharmony_ci kfree(err); 2448c2ecf20Sopenharmony_ci return; 2458c2ecf20Sopenharmony_ci } 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci list_add_tail(&dserr->list, head); 2488c2ecf20Sopenharmony_ci} 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ciint ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, 2518c2ecf20Sopenharmony_ci struct nfs4_ff_layout_mirror *mirror, u64 offset, 2528c2ecf20Sopenharmony_ci u64 length, int status, enum nfs_opnum4 opnum, 2538c2ecf20Sopenharmony_ci gfp_t gfp_flags) 2548c2ecf20Sopenharmony_ci{ 2558c2ecf20Sopenharmony_ci struct nfs4_ff_layout_ds_err *dserr; 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci if (status == 0) 2588c2ecf20Sopenharmony_ci return 0; 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci if (IS_ERR_OR_NULL(mirror->mirror_ds)) 2618c2ecf20Sopenharmony_ci return -EINVAL; 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci dserr = kmalloc(sizeof(*dserr), gfp_flags); 2648c2ecf20Sopenharmony_ci if (!dserr) 2658c2ecf20Sopenharmony_ci return -ENOMEM; 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&dserr->list); 2688c2ecf20Sopenharmony_ci dserr->offset = offset; 2698c2ecf20Sopenharmony_ci dserr->length = length; 2708c2ecf20Sopenharmony_ci dserr->status = status; 2718c2ecf20Sopenharmony_ci dserr->opnum = opnum; 2728c2ecf20Sopenharmony_ci nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); 2738c2ecf20Sopenharmony_ci memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, 2748c2ecf20Sopenharmony_ci NFS4_DEVICEID4_SIZE); 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci spin_lock(&flo->generic_hdr.plh_inode->i_lock); 2778c2ecf20Sopenharmony_ci ff_layout_add_ds_error_locked(flo, dserr); 2788c2ecf20Sopenharmony_ci spin_unlock(&flo->generic_hdr.plh_inode->i_lock); 2798c2ecf20Sopenharmony_ci return 0; 2808c2ecf20Sopenharmony_ci} 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_cistatic const struct cred * 2838c2ecf20Sopenharmony_ciff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) 2848c2ecf20Sopenharmony_ci{ 2858c2ecf20Sopenharmony_ci const struct cred *cred, __rcu **pcred; 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci if (iomode == IOMODE_READ) 2888c2ecf20Sopenharmony_ci pcred = &mirror->ro_cred; 2898c2ecf20Sopenharmony_ci else 2908c2ecf20Sopenharmony_ci pcred = &mirror->rw_cred; 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci rcu_read_lock(); 2938c2ecf20Sopenharmony_ci do { 2948c2ecf20Sopenharmony_ci cred = rcu_dereference(*pcred); 2958c2ecf20Sopenharmony_ci if (!cred) 2968c2ecf20Sopenharmony_ci break; 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci cred = get_cred_rcu(cred); 2998c2ecf20Sopenharmony_ci } while(!cred); 3008c2ecf20Sopenharmony_ci rcu_read_unlock(); 3018c2ecf20Sopenharmony_ci return cred; 3028c2ecf20Sopenharmony_ci} 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_cistruct nfs_fh * 3058c2ecf20Sopenharmony_cinfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror) 3068c2ecf20Sopenharmony_ci{ 3078c2ecf20Sopenharmony_ci /* FIXME: For now assume there is only 1 version available for the DS */ 3088c2ecf20Sopenharmony_ci return &mirror->fh_versions[0]; 3098c2ecf20Sopenharmony_ci} 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_civoid 3128c2ecf20Sopenharmony_cinfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror, 3138c2ecf20Sopenharmony_ci nfs4_stateid *stateid) 3148c2ecf20Sopenharmony_ci{ 3158c2ecf20Sopenharmony_ci if (nfs4_ff_layout_ds_version(mirror) == 4) 3168c2ecf20Sopenharmony_ci nfs4_stateid_copy(stateid, &mirror->stateid); 3178c2ecf20Sopenharmony_ci} 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_cistatic bool 3208c2ecf20Sopenharmony_ciff_layout_init_mirror_ds(struct pnfs_layout_hdr *lo, 3218c2ecf20Sopenharmony_ci struct nfs4_ff_layout_mirror *mirror) 3228c2ecf20Sopenharmony_ci{ 3238c2ecf20Sopenharmony_ci if (mirror == NULL) 3248c2ecf20Sopenharmony_ci goto outerr; 3258c2ecf20Sopenharmony_ci if (mirror->mirror_ds == NULL) { 3268c2ecf20Sopenharmony_ci struct nfs4_deviceid_node *node; 3278c2ecf20Sopenharmony_ci struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci node = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), 3308c2ecf20Sopenharmony_ci &mirror->devid, lo->plh_lc_cred, 3318c2ecf20Sopenharmony_ci GFP_KERNEL); 3328c2ecf20Sopenharmony_ci if (node) 3338c2ecf20Sopenharmony_ci mirror_ds = FF_LAYOUT_MIRROR_DS(node); 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci /* check for race with another call to this function */ 3368c2ecf20Sopenharmony_ci if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && 3378c2ecf20Sopenharmony_ci mirror_ds != ERR_PTR(-ENODEV)) 3388c2ecf20Sopenharmony_ci nfs4_put_deviceid_node(node); 3398c2ecf20Sopenharmony_ci } 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci if (IS_ERR(mirror->mirror_ds)) 3428c2ecf20Sopenharmony_ci goto outerr; 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_ci return true; 3458c2ecf20Sopenharmony_ciouterr: 3468c2ecf20Sopenharmony_ci return false; 3478c2ecf20Sopenharmony_ci} 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci/** 3508c2ecf20Sopenharmony_ci * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call 3518c2ecf20Sopenharmony_ci * @lseg: the layout segment we're operating on 3528c2ecf20Sopenharmony_ci * @mirror: layout mirror describing the DS to use 3538c2ecf20Sopenharmony_ci * @fail_return: return layout on connect failure? 3548c2ecf20Sopenharmony_ci * 3558c2ecf20Sopenharmony_ci * Try to prepare a DS connection to accept an RPC call. This involves 3568c2ecf20Sopenharmony_ci * selecting a mirror to use and connecting the client to it if it's not 3578c2ecf20Sopenharmony_ci * already connected. 3588c2ecf20Sopenharmony_ci * 3598c2ecf20Sopenharmony_ci * Since we only need a single functioning mirror to satisfy a read, we don't 3608c2ecf20Sopenharmony_ci * want to return the layout if there is one. For writes though, any down 3618c2ecf20Sopenharmony_ci * mirror should result in a LAYOUTRETURN. @fail_return is how we distinguish 3628c2ecf20Sopenharmony_ci * between the two cases. 3638c2ecf20Sopenharmony_ci * 3648c2ecf20Sopenharmony_ci * Returns a pointer to a connected DS object on success or NULL on failure. 3658c2ecf20Sopenharmony_ci */ 3668c2ecf20Sopenharmony_cistruct nfs4_pnfs_ds * 3678c2ecf20Sopenharmony_cinfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, 3688c2ecf20Sopenharmony_ci struct nfs4_ff_layout_mirror *mirror, 3698c2ecf20Sopenharmony_ci bool fail_return) 3708c2ecf20Sopenharmony_ci{ 3718c2ecf20Sopenharmony_ci struct nfs4_pnfs_ds *ds = NULL; 3728c2ecf20Sopenharmony_ci struct inode *ino = lseg->pls_layout->plh_inode; 3738c2ecf20Sopenharmony_ci struct nfs_server *s = NFS_SERVER(ino); 3748c2ecf20Sopenharmony_ci unsigned int max_payload; 3758c2ecf20Sopenharmony_ci int status; 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror)) 3788c2ecf20Sopenharmony_ci goto noconnect; 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci ds = mirror->mirror_ds->ds; 3818c2ecf20Sopenharmony_ci if (READ_ONCE(ds->ds_clp)) 3828c2ecf20Sopenharmony_ci goto out; 3838c2ecf20Sopenharmony_ci /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ 3848c2ecf20Sopenharmony_ci smp_rmb(); 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci /* FIXME: For now we assume the server sent only one version of NFS 3878c2ecf20Sopenharmony_ci * to use for the DS. 3888c2ecf20Sopenharmony_ci */ 3898c2ecf20Sopenharmony_ci status = nfs4_pnfs_ds_connect(s, ds, &mirror->mirror_ds->id_node, 3908c2ecf20Sopenharmony_ci dataserver_timeo, dataserver_retrans, 3918c2ecf20Sopenharmony_ci mirror->mirror_ds->ds_versions[0].version, 3928c2ecf20Sopenharmony_ci mirror->mirror_ds->ds_versions[0].minor_version); 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci /* connect success, check rsize/wsize limit */ 3958c2ecf20Sopenharmony_ci if (!status) { 3968c2ecf20Sopenharmony_ci max_payload = 3978c2ecf20Sopenharmony_ci nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), 3988c2ecf20Sopenharmony_ci NULL); 3998c2ecf20Sopenharmony_ci if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) 4008c2ecf20Sopenharmony_ci mirror->mirror_ds->ds_versions[0].rsize = max_payload; 4018c2ecf20Sopenharmony_ci if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) 4028c2ecf20Sopenharmony_ci mirror->mirror_ds->ds_versions[0].wsize = max_payload; 4038c2ecf20Sopenharmony_ci goto out; 4048c2ecf20Sopenharmony_ci } 4058c2ecf20Sopenharmony_cinoconnect: 4068c2ecf20Sopenharmony_ci ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 4078c2ecf20Sopenharmony_ci mirror, lseg->pls_range.offset, 4088c2ecf20Sopenharmony_ci lseg->pls_range.length, NFS4ERR_NXIO, 4098c2ecf20Sopenharmony_ci OP_ILLEGAL, GFP_NOIO); 4108c2ecf20Sopenharmony_ci ff_layout_send_layouterror(lseg); 4118c2ecf20Sopenharmony_ci if (fail_return || !ff_layout_has_available_ds(lseg)) 4128c2ecf20Sopenharmony_ci pnfs_error_mark_layout_for_return(ino, lseg); 4138c2ecf20Sopenharmony_ci ds = NULL; 4148c2ecf20Sopenharmony_ciout: 4158c2ecf20Sopenharmony_ci return ds; 4168c2ecf20Sopenharmony_ci} 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ciconst struct cred * 4198c2ecf20Sopenharmony_ciff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror, 4208c2ecf20Sopenharmony_ci const struct pnfs_layout_range *range, 4218c2ecf20Sopenharmony_ci const struct cred *mdscred) 4228c2ecf20Sopenharmony_ci{ 4238c2ecf20Sopenharmony_ci const struct cred *cred; 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) { 4268c2ecf20Sopenharmony_ci cred = ff_layout_get_mirror_cred(mirror, range->iomode); 4278c2ecf20Sopenharmony_ci if (!cred) 4288c2ecf20Sopenharmony_ci cred = get_cred(mdscred); 4298c2ecf20Sopenharmony_ci } else { 4308c2ecf20Sopenharmony_ci cred = get_cred(mdscred); 4318c2ecf20Sopenharmony_ci } 4328c2ecf20Sopenharmony_ci return cred; 4338c2ecf20Sopenharmony_ci} 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci/** 4368c2ecf20Sopenharmony_ci * nfs4_ff_find_or_create_ds_client - Find or create a DS rpc client 4378c2ecf20Sopenharmony_ci * @mirror: pointer to the mirror 4388c2ecf20Sopenharmony_ci * @ds_clp: nfs_client for the DS 4398c2ecf20Sopenharmony_ci * @inode: pointer to inode 4408c2ecf20Sopenharmony_ci * 4418c2ecf20Sopenharmony_ci * Find or create a DS rpc client with th MDS server rpc client auth flavor 4428c2ecf20Sopenharmony_ci * in the nfs_client cl_ds_clients list. 4438c2ecf20Sopenharmony_ci */ 4448c2ecf20Sopenharmony_cistruct rpc_clnt * 4458c2ecf20Sopenharmony_cinfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror, 4468c2ecf20Sopenharmony_ci struct nfs_client *ds_clp, struct inode *inode) 4478c2ecf20Sopenharmony_ci{ 4488c2ecf20Sopenharmony_ci switch (mirror->mirror_ds->ds_versions[0].version) { 4498c2ecf20Sopenharmony_ci case 3: 4508c2ecf20Sopenharmony_ci /* For NFSv3 DS, flavor is set when creating DS connections */ 4518c2ecf20Sopenharmony_ci return ds_clp->cl_rpcclient; 4528c2ecf20Sopenharmony_ci case 4: 4538c2ecf20Sopenharmony_ci return nfs4_find_or_create_ds_client(ds_clp, inode); 4548c2ecf20Sopenharmony_ci default: 4558c2ecf20Sopenharmony_ci BUG(); 4568c2ecf20Sopenharmony_ci } 4578c2ecf20Sopenharmony_ci} 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_civoid ff_layout_free_ds_ioerr(struct list_head *head) 4608c2ecf20Sopenharmony_ci{ 4618c2ecf20Sopenharmony_ci struct nfs4_ff_layout_ds_err *err; 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_ci while (!list_empty(head)) { 4648c2ecf20Sopenharmony_ci err = list_first_entry(head, 4658c2ecf20Sopenharmony_ci struct nfs4_ff_layout_ds_err, 4668c2ecf20Sopenharmony_ci list); 4678c2ecf20Sopenharmony_ci list_del(&err->list); 4688c2ecf20Sopenharmony_ci kfree(err); 4698c2ecf20Sopenharmony_ci } 4708c2ecf20Sopenharmony_ci} 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci/* called with inode i_lock held */ 4738c2ecf20Sopenharmony_ciint ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head) 4748c2ecf20Sopenharmony_ci{ 4758c2ecf20Sopenharmony_ci struct nfs4_ff_layout_ds_err *err; 4768c2ecf20Sopenharmony_ci __be32 *p; 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_ci list_for_each_entry(err, head, list) { 4798c2ecf20Sopenharmony_ci /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) 4808c2ecf20Sopenharmony_ci * + array length + deviceid(NFS4_DEVICEID4_SIZE) 4818c2ecf20Sopenharmony_ci * + status(4) + opnum(4) 4828c2ecf20Sopenharmony_ci */ 4838c2ecf20Sopenharmony_ci p = xdr_reserve_space(xdr, 4848c2ecf20Sopenharmony_ci 28 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); 4858c2ecf20Sopenharmony_ci if (unlikely(!p)) 4868c2ecf20Sopenharmony_ci return -ENOBUFS; 4878c2ecf20Sopenharmony_ci p = xdr_encode_hyper(p, err->offset); 4888c2ecf20Sopenharmony_ci p = xdr_encode_hyper(p, err->length); 4898c2ecf20Sopenharmony_ci p = xdr_encode_opaque_fixed(p, &err->stateid, 4908c2ecf20Sopenharmony_ci NFS4_STATEID_SIZE); 4918c2ecf20Sopenharmony_ci /* Encode 1 error */ 4928c2ecf20Sopenharmony_ci *p++ = cpu_to_be32(1); 4938c2ecf20Sopenharmony_ci p = xdr_encode_opaque_fixed(p, &err->deviceid, 4948c2ecf20Sopenharmony_ci NFS4_DEVICEID4_SIZE); 4958c2ecf20Sopenharmony_ci *p++ = cpu_to_be32(err->status); 4968c2ecf20Sopenharmony_ci *p++ = cpu_to_be32(err->opnum); 4978c2ecf20Sopenharmony_ci dprintk("%s: offset %llu length %llu status %d op %d\n", 4988c2ecf20Sopenharmony_ci __func__, err->offset, err->length, err->status, 4998c2ecf20Sopenharmony_ci err->opnum); 5008c2ecf20Sopenharmony_ci } 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_ci return 0; 5038c2ecf20Sopenharmony_ci} 5048c2ecf20Sopenharmony_ci 5058c2ecf20Sopenharmony_cistatic 5068c2ecf20Sopenharmony_ciunsigned int do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, 5078c2ecf20Sopenharmony_ci const struct pnfs_layout_range *range, 5088c2ecf20Sopenharmony_ci struct list_head *head, 5098c2ecf20Sopenharmony_ci unsigned int maxnum) 5108c2ecf20Sopenharmony_ci{ 5118c2ecf20Sopenharmony_ci struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); 5128c2ecf20Sopenharmony_ci struct inode *inode = lo->plh_inode; 5138c2ecf20Sopenharmony_ci struct nfs4_ff_layout_ds_err *err, *n; 5148c2ecf20Sopenharmony_ci unsigned int ret = 0; 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 5178c2ecf20Sopenharmony_ci list_for_each_entry_safe(err, n, &flo->error_list, list) { 5188c2ecf20Sopenharmony_ci if (!pnfs_is_range_intersecting(err->offset, 5198c2ecf20Sopenharmony_ci pnfs_end_offset(err->offset, err->length), 5208c2ecf20Sopenharmony_ci range->offset, 5218c2ecf20Sopenharmony_ci pnfs_end_offset(range->offset, range->length))) 5228c2ecf20Sopenharmony_ci continue; 5238c2ecf20Sopenharmony_ci if (!maxnum) 5248c2ecf20Sopenharmony_ci break; 5258c2ecf20Sopenharmony_ci list_move(&err->list, head); 5268c2ecf20Sopenharmony_ci maxnum--; 5278c2ecf20Sopenharmony_ci ret++; 5288c2ecf20Sopenharmony_ci } 5298c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 5308c2ecf20Sopenharmony_ci return ret; 5318c2ecf20Sopenharmony_ci} 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ciunsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, 5348c2ecf20Sopenharmony_ci const struct pnfs_layout_range *range, 5358c2ecf20Sopenharmony_ci struct list_head *head, 5368c2ecf20Sopenharmony_ci unsigned int maxnum) 5378c2ecf20Sopenharmony_ci{ 5388c2ecf20Sopenharmony_ci unsigned int ret; 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci ret = do_layout_fetch_ds_ioerr(lo, range, head, maxnum); 5418c2ecf20Sopenharmony_ci /* If we're over the max, discard all remaining entries */ 5428c2ecf20Sopenharmony_ci if (ret == maxnum) { 5438c2ecf20Sopenharmony_ci LIST_HEAD(discard); 5448c2ecf20Sopenharmony_ci do_layout_fetch_ds_ioerr(lo, range, &discard, -1); 5458c2ecf20Sopenharmony_ci ff_layout_free_ds_ioerr(&discard); 5468c2ecf20Sopenharmony_ci } 5478c2ecf20Sopenharmony_ci return ret; 5488c2ecf20Sopenharmony_ci} 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_cistatic bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) 5518c2ecf20Sopenharmony_ci{ 5528c2ecf20Sopenharmony_ci struct nfs4_ff_layout_mirror *mirror; 5538c2ecf20Sopenharmony_ci struct nfs4_deviceid_node *devid; 5548c2ecf20Sopenharmony_ci u32 idx; 5558c2ecf20Sopenharmony_ci 5568c2ecf20Sopenharmony_ci for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { 5578c2ecf20Sopenharmony_ci mirror = FF_LAYOUT_COMP(lseg, idx); 5588c2ecf20Sopenharmony_ci if (mirror) { 5598c2ecf20Sopenharmony_ci if (!mirror->mirror_ds) 5608c2ecf20Sopenharmony_ci return true; 5618c2ecf20Sopenharmony_ci if (IS_ERR(mirror->mirror_ds)) 5628c2ecf20Sopenharmony_ci continue; 5638c2ecf20Sopenharmony_ci devid = &mirror->mirror_ds->id_node; 5648c2ecf20Sopenharmony_ci if (!nfs4_test_deviceid_unavailable(devid)) 5658c2ecf20Sopenharmony_ci return true; 5668c2ecf20Sopenharmony_ci } 5678c2ecf20Sopenharmony_ci } 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci return false; 5708c2ecf20Sopenharmony_ci} 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_cistatic bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) 5738c2ecf20Sopenharmony_ci{ 5748c2ecf20Sopenharmony_ci struct nfs4_ff_layout_mirror *mirror; 5758c2ecf20Sopenharmony_ci struct nfs4_deviceid_node *devid; 5768c2ecf20Sopenharmony_ci u32 idx; 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { 5798c2ecf20Sopenharmony_ci mirror = FF_LAYOUT_COMP(lseg, idx); 5808c2ecf20Sopenharmony_ci if (!mirror || IS_ERR(mirror->mirror_ds)) 5818c2ecf20Sopenharmony_ci return false; 5828c2ecf20Sopenharmony_ci if (!mirror->mirror_ds) 5838c2ecf20Sopenharmony_ci continue; 5848c2ecf20Sopenharmony_ci devid = &mirror->mirror_ds->id_node; 5858c2ecf20Sopenharmony_ci if (nfs4_test_deviceid_unavailable(devid)) 5868c2ecf20Sopenharmony_ci return false; 5878c2ecf20Sopenharmony_ci } 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci return FF_LAYOUT_MIRROR_COUNT(lseg) != 0; 5908c2ecf20Sopenharmony_ci} 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_cistatic bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) 5938c2ecf20Sopenharmony_ci{ 5948c2ecf20Sopenharmony_ci if (lseg->pls_range.iomode == IOMODE_READ) 5958c2ecf20Sopenharmony_ci return ff_read_layout_has_available_ds(lseg); 5968c2ecf20Sopenharmony_ci /* Note: RW layout needs all mirrors available */ 5978c2ecf20Sopenharmony_ci return ff_rw_layout_has_available_ds(lseg); 5988c2ecf20Sopenharmony_ci} 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_cibool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg) 6018c2ecf20Sopenharmony_ci{ 6028c2ecf20Sopenharmony_ci return ff_layout_no_fallback_to_mds(lseg) || 6038c2ecf20Sopenharmony_ci ff_layout_has_available_ds(lseg); 6048c2ecf20Sopenharmony_ci} 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_cibool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg) 6078c2ecf20Sopenharmony_ci{ 6088c2ecf20Sopenharmony_ci return lseg->pls_range.iomode == IOMODE_RW && 6098c2ecf20Sopenharmony_ci ff_layout_no_read_on_rw(lseg); 6108c2ecf20Sopenharmony_ci} 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_cimodule_param(dataserver_retrans, uint, 0644); 6138c2ecf20Sopenharmony_ciMODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " 6148c2ecf20Sopenharmony_ci "retries a request before it attempts further " 6158c2ecf20Sopenharmony_ci " recovery action."); 6168c2ecf20Sopenharmony_cimodule_param(dataserver_timeo, uint, 0644); 6178c2ecf20Sopenharmony_ciMODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " 6188c2ecf20Sopenharmony_ci "NFSv4.1 client waits for a response from a " 6198c2ecf20Sopenharmony_ci " data server before it retries an NFS request."); 620