18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci *  Module for the pnfs nfs4 file layout driver.
38c2ecf20Sopenharmony_ci *  Defines all I/O and Policy interface operations, plus code
48c2ecf20Sopenharmony_ci *  to register itself with the pNFS client.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci *  Copyright (c) 2002
78c2ecf20Sopenharmony_ci *  The Regents of the University of Michigan
88c2ecf20Sopenharmony_ci *  All Rights Reserved
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci *  Dean Hildebrand <dhildebz@umich.edu>
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci *  Permission is granted to use, copy, create derivative works, and
138c2ecf20Sopenharmony_ci *  redistribute this software and such derivative works for any purpose,
148c2ecf20Sopenharmony_ci *  so long as the name of the University of Michigan is not used in
158c2ecf20Sopenharmony_ci *  any advertising or publicity pertaining to the use or distribution
168c2ecf20Sopenharmony_ci *  of this software without specific, written prior authorization. If
178c2ecf20Sopenharmony_ci *  the above copyright notice or any other identification of the
188c2ecf20Sopenharmony_ci *  University of Michigan is included in any copy of any portion of
198c2ecf20Sopenharmony_ci *  this software, then the disclaimer below must also be included.
208c2ecf20Sopenharmony_ci *
218c2ecf20Sopenharmony_ci *  This software is provided as is, without representation or warranty
228c2ecf20Sopenharmony_ci *  of any kind either express or implied, including without limitation
238c2ecf20Sopenharmony_ci *  the implied warranties of merchantability, fitness for a particular
248c2ecf20Sopenharmony_ci *  purpose, or noninfringement.  The Regents of the University of
258c2ecf20Sopenharmony_ci *  Michigan shall not be liable for any damages, including special,
268c2ecf20Sopenharmony_ci *  indirect, incidental, or consequential damages, with respect to any
278c2ecf20Sopenharmony_ci *  claim arising out of or in connection with the use of the software,
288c2ecf20Sopenharmony_ci *  even if it has been or is hereafter advised of the possibility of
298c2ecf20Sopenharmony_ci *  such damages.
308c2ecf20Sopenharmony_ci */
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci#include <linux/nfs_fs.h>
338c2ecf20Sopenharmony_ci#include <linux/nfs_page.h>
348c2ecf20Sopenharmony_ci#include <linux/module.h>
358c2ecf20Sopenharmony_ci#include <linux/backing-dev.h>
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci#include <linux/sunrpc/metrics.h>
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci#include "../nfs4session.h"
408c2ecf20Sopenharmony_ci#include "../internal.h"
418c2ecf20Sopenharmony_ci#include "../delegation.h"
428c2ecf20Sopenharmony_ci#include "filelayout.h"
438c2ecf20Sopenharmony_ci#include "../nfs4trace.h"
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
488c2ecf20Sopenharmony_ciMODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
498c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("The NFSv4 file layout driver");
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci#define FILELAYOUT_POLL_RETRY_MAX     (15*HZ)
528c2ecf20Sopenharmony_cistatic const struct pnfs_commit_ops filelayout_commit_ops;
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistatic loff_t
558c2ecf20Sopenharmony_cifilelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
568c2ecf20Sopenharmony_ci			    loff_t offset)
578c2ecf20Sopenharmony_ci{
588c2ecf20Sopenharmony_ci	u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count;
598c2ecf20Sopenharmony_ci	u64 stripe_no;
608c2ecf20Sopenharmony_ci	u32 rem;
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	offset -= flseg->pattern_offset;
638c2ecf20Sopenharmony_ci	stripe_no = div_u64(offset, stripe_width);
648c2ecf20Sopenharmony_ci	div_u64_rem(offset, flseg->stripe_unit, &rem);
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	return stripe_no * flseg->stripe_unit + rem;
678c2ecf20Sopenharmony_ci}
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci/* This function is used by the layout driver to calculate the
708c2ecf20Sopenharmony_ci * offset of the file on the dserver based on whether the
718c2ecf20Sopenharmony_ci * layout type is STRIPE_DENSE or STRIPE_SPARSE
728c2ecf20Sopenharmony_ci */
738c2ecf20Sopenharmony_cistatic loff_t
748c2ecf20Sopenharmony_cifilelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
758c2ecf20Sopenharmony_ci{
768c2ecf20Sopenharmony_ci	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	switch (flseg->stripe_type) {
798c2ecf20Sopenharmony_ci	case STRIPE_SPARSE:
808c2ecf20Sopenharmony_ci		return offset;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	case STRIPE_DENSE:
838c2ecf20Sopenharmony_ci		return filelayout_get_dense_offset(flseg, offset);
848c2ecf20Sopenharmony_ci	}
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	BUG();
878c2ecf20Sopenharmony_ci}
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_cistatic void filelayout_reset_write(struct nfs_pgio_header *hdr)
908c2ecf20Sopenharmony_ci{
918c2ecf20Sopenharmony_ci	struct rpc_task *task = &hdr->task;
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
948c2ecf20Sopenharmony_ci		dprintk("%s Reset task %5u for i/o through MDS "
958c2ecf20Sopenharmony_ci			"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
968c2ecf20Sopenharmony_ci			hdr->task.tk_pid,
978c2ecf20Sopenharmony_ci			hdr->inode->i_sb->s_id,
988c2ecf20Sopenharmony_ci			(unsigned long long)NFS_FILEID(hdr->inode),
998c2ecf20Sopenharmony_ci			hdr->args.count,
1008c2ecf20Sopenharmony_ci			(unsigned long long)hdr->args.offset);
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci		task->tk_status = pnfs_write_done_resend_to_mds(hdr);
1038c2ecf20Sopenharmony_ci	}
1048c2ecf20Sopenharmony_ci}
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_cistatic void filelayout_reset_read(struct nfs_pgio_header *hdr)
1078c2ecf20Sopenharmony_ci{
1088c2ecf20Sopenharmony_ci	struct rpc_task *task = &hdr->task;
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1118c2ecf20Sopenharmony_ci		dprintk("%s Reset task %5u for i/o through MDS "
1128c2ecf20Sopenharmony_ci			"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
1138c2ecf20Sopenharmony_ci			hdr->task.tk_pid,
1148c2ecf20Sopenharmony_ci			hdr->inode->i_sb->s_id,
1158c2ecf20Sopenharmony_ci			(unsigned long long)NFS_FILEID(hdr->inode),
1168c2ecf20Sopenharmony_ci			hdr->args.count,
1178c2ecf20Sopenharmony_ci			(unsigned long long)hdr->args.offset);
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci		task->tk_status = pnfs_read_done_resend_to_mds(hdr);
1208c2ecf20Sopenharmony_ci	}
1218c2ecf20Sopenharmony_ci}
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_cistatic int filelayout_async_handle_error(struct rpc_task *task,
1248c2ecf20Sopenharmony_ci					 struct nfs4_state *state,
1258c2ecf20Sopenharmony_ci					 struct nfs_client *clp,
1268c2ecf20Sopenharmony_ci					 struct pnfs_layout_segment *lseg)
1278c2ecf20Sopenharmony_ci{
1288c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo = lseg->pls_layout;
1298c2ecf20Sopenharmony_ci	struct inode *inode = lo->plh_inode;
1308c2ecf20Sopenharmony_ci	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
1318c2ecf20Sopenharmony_ci	struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci	if (task->tk_status >= 0)
1348c2ecf20Sopenharmony_ci		return 0;
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	switch (task->tk_status) {
1378c2ecf20Sopenharmony_ci	/* DS session errors */
1388c2ecf20Sopenharmony_ci	case -NFS4ERR_BADSESSION:
1398c2ecf20Sopenharmony_ci	case -NFS4ERR_BADSLOT:
1408c2ecf20Sopenharmony_ci	case -NFS4ERR_BAD_HIGH_SLOT:
1418c2ecf20Sopenharmony_ci	case -NFS4ERR_DEADSESSION:
1428c2ecf20Sopenharmony_ci	case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1438c2ecf20Sopenharmony_ci	case -NFS4ERR_SEQ_FALSE_RETRY:
1448c2ecf20Sopenharmony_ci	case -NFS4ERR_SEQ_MISORDERED:
1458c2ecf20Sopenharmony_ci		dprintk("%s ERROR %d, Reset session. Exchangeid "
1468c2ecf20Sopenharmony_ci			"flags 0x%x\n", __func__, task->tk_status,
1478c2ecf20Sopenharmony_ci			clp->cl_exchange_flags);
1488c2ecf20Sopenharmony_ci		nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
1498c2ecf20Sopenharmony_ci		break;
1508c2ecf20Sopenharmony_ci	case -NFS4ERR_DELAY:
1518c2ecf20Sopenharmony_ci	case -NFS4ERR_GRACE:
1528c2ecf20Sopenharmony_ci		rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
1538c2ecf20Sopenharmony_ci		break;
1548c2ecf20Sopenharmony_ci	case -NFS4ERR_RETRY_UNCACHED_REP:
1558c2ecf20Sopenharmony_ci		break;
1568c2ecf20Sopenharmony_ci	/* Invalidate Layout errors */
1578c2ecf20Sopenharmony_ci	case -NFS4ERR_ACCESS:
1588c2ecf20Sopenharmony_ci	case -NFS4ERR_PNFS_NO_LAYOUT:
1598c2ecf20Sopenharmony_ci	case -ESTALE:           /* mapped NFS4ERR_STALE */
1608c2ecf20Sopenharmony_ci	case -EBADHANDLE:       /* mapped NFS4ERR_BADHANDLE */
1618c2ecf20Sopenharmony_ci	case -EISDIR:           /* mapped NFS4ERR_ISDIR */
1628c2ecf20Sopenharmony_ci	case -NFS4ERR_FHEXPIRED:
1638c2ecf20Sopenharmony_ci	case -NFS4ERR_WRONG_TYPE:
1648c2ecf20Sopenharmony_ci		dprintk("%s Invalid layout error %d\n", __func__,
1658c2ecf20Sopenharmony_ci			task->tk_status);
1668c2ecf20Sopenharmony_ci		/*
1678c2ecf20Sopenharmony_ci		 * Destroy layout so new i/o will get a new layout.
1688c2ecf20Sopenharmony_ci		 * Layout will not be destroyed until all current lseg
1698c2ecf20Sopenharmony_ci		 * references are put. Mark layout as invalid to resend failed
1708c2ecf20Sopenharmony_ci		 * i/o and all i/o waiting on the slot table to the MDS until
1718c2ecf20Sopenharmony_ci		 * layout is destroyed and a new valid layout is obtained.
1728c2ecf20Sopenharmony_ci		 */
1738c2ecf20Sopenharmony_ci		pnfs_destroy_layout(NFS_I(inode));
1748c2ecf20Sopenharmony_ci		rpc_wake_up(&tbl->slot_tbl_waitq);
1758c2ecf20Sopenharmony_ci		goto reset;
1768c2ecf20Sopenharmony_ci	/* RPC connection errors */
1778c2ecf20Sopenharmony_ci	case -ECONNREFUSED:
1788c2ecf20Sopenharmony_ci	case -EHOSTDOWN:
1798c2ecf20Sopenharmony_ci	case -EHOSTUNREACH:
1808c2ecf20Sopenharmony_ci	case -ENETUNREACH:
1818c2ecf20Sopenharmony_ci	case -EIO:
1828c2ecf20Sopenharmony_ci	case -ETIMEDOUT:
1838c2ecf20Sopenharmony_ci	case -EPIPE:
1848c2ecf20Sopenharmony_ci		dprintk("%s DS connection error %d\n", __func__,
1858c2ecf20Sopenharmony_ci			task->tk_status);
1868c2ecf20Sopenharmony_ci		nfs4_mark_deviceid_unavailable(devid);
1878c2ecf20Sopenharmony_ci		pnfs_error_mark_layout_for_return(inode, lseg);
1888c2ecf20Sopenharmony_ci		pnfs_set_lo_fail(lseg);
1898c2ecf20Sopenharmony_ci		rpc_wake_up(&tbl->slot_tbl_waitq);
1908c2ecf20Sopenharmony_ci		fallthrough;
1918c2ecf20Sopenharmony_ci	default:
1928c2ecf20Sopenharmony_cireset:
1938c2ecf20Sopenharmony_ci		dprintk("%s Retry through MDS. Error %d\n", __func__,
1948c2ecf20Sopenharmony_ci			task->tk_status);
1958c2ecf20Sopenharmony_ci		return -NFS4ERR_RESET_TO_MDS;
1968c2ecf20Sopenharmony_ci	}
1978c2ecf20Sopenharmony_ci	task->tk_status = 0;
1988c2ecf20Sopenharmony_ci	return -EAGAIN;
1998c2ecf20Sopenharmony_ci}
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci/* NFS_PROTO call done callback routines */
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_cistatic int filelayout_read_done_cb(struct rpc_task *task,
2048c2ecf20Sopenharmony_ci				struct nfs_pgio_header *hdr)
2058c2ecf20Sopenharmony_ci{
2068c2ecf20Sopenharmony_ci	int err;
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci	trace_nfs4_pnfs_read(hdr, task->tk_status);
2098c2ecf20Sopenharmony_ci	err = filelayout_async_handle_error(task, hdr->args.context->state,
2108c2ecf20Sopenharmony_ci					    hdr->ds_clp, hdr->lseg);
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci	switch (err) {
2138c2ecf20Sopenharmony_ci	case -NFS4ERR_RESET_TO_MDS:
2148c2ecf20Sopenharmony_ci		filelayout_reset_read(hdr);
2158c2ecf20Sopenharmony_ci		return task->tk_status;
2168c2ecf20Sopenharmony_ci	case -EAGAIN:
2178c2ecf20Sopenharmony_ci		rpc_restart_call_prepare(task);
2188c2ecf20Sopenharmony_ci		return -EAGAIN;
2198c2ecf20Sopenharmony_ci	}
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	return 0;
2228c2ecf20Sopenharmony_ci}
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci/*
2258c2ecf20Sopenharmony_ci * We reference the rpc_cred of the first WRITE that triggers the need for
2268c2ecf20Sopenharmony_ci * a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
2278c2ecf20Sopenharmony_ci * rfc5661 is not clear about which credential should be used.
2288c2ecf20Sopenharmony_ci */
2298c2ecf20Sopenharmony_cistatic void
2308c2ecf20Sopenharmony_cifilelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
2318c2ecf20Sopenharmony_ci{
2328c2ecf20Sopenharmony_ci	loff_t end_offs = 0;
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci	if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
2358c2ecf20Sopenharmony_ci	    hdr->res.verf->committed == NFS_FILE_SYNC)
2368c2ecf20Sopenharmony_ci		return;
2378c2ecf20Sopenharmony_ci	if (hdr->res.verf->committed == NFS_DATA_SYNC)
2388c2ecf20Sopenharmony_ci		end_offs = hdr->mds_offset + (loff_t)hdr->res.count;
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	/* Note: if the write is unstable, don't set end_offs until commit */
2418c2ecf20Sopenharmony_ci	pnfs_set_layoutcommit(hdr->inode, hdr->lseg, end_offs);
2428c2ecf20Sopenharmony_ci	dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
2438c2ecf20Sopenharmony_ci		(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
2448c2ecf20Sopenharmony_ci}
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_cibool
2478c2ecf20Sopenharmony_cifilelayout_test_devid_unavailable(struct nfs4_deviceid_node *node)
2488c2ecf20Sopenharmony_ci{
2498c2ecf20Sopenharmony_ci	return filelayout_test_devid_invalid(node) ||
2508c2ecf20Sopenharmony_ci		nfs4_test_deviceid_unavailable(node);
2518c2ecf20Sopenharmony_ci}
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_cistatic bool
2548c2ecf20Sopenharmony_cifilelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
2558c2ecf20Sopenharmony_ci{
2568c2ecf20Sopenharmony_ci	struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg);
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	return filelayout_test_devid_unavailable(node);
2598c2ecf20Sopenharmony_ci}
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci/*
2628c2ecf20Sopenharmony_ci * Call ops for the async read/write cases
2638c2ecf20Sopenharmony_ci * In the case of dense layouts, the offset needs to be reset to its
2648c2ecf20Sopenharmony_ci * original value.
2658c2ecf20Sopenharmony_ci */
2668c2ecf20Sopenharmony_cistatic void filelayout_read_prepare(struct rpc_task *task, void *data)
2678c2ecf20Sopenharmony_ci{
2688c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr = data;
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
2718c2ecf20Sopenharmony_ci		rpc_exit(task, -EIO);
2728c2ecf20Sopenharmony_ci		return;
2738c2ecf20Sopenharmony_ci	}
2748c2ecf20Sopenharmony_ci	if (filelayout_reset_to_mds(hdr->lseg)) {
2758c2ecf20Sopenharmony_ci		dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
2768c2ecf20Sopenharmony_ci		filelayout_reset_read(hdr);
2778c2ecf20Sopenharmony_ci		rpc_exit(task, 0);
2788c2ecf20Sopenharmony_ci		return;
2798c2ecf20Sopenharmony_ci	}
2808c2ecf20Sopenharmony_ci	hdr->pgio_done_cb = filelayout_read_done_cb;
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci	if (nfs4_setup_sequence(hdr->ds_clp,
2838c2ecf20Sopenharmony_ci			&hdr->args.seq_args,
2848c2ecf20Sopenharmony_ci			&hdr->res.seq_res,
2858c2ecf20Sopenharmony_ci			task))
2868c2ecf20Sopenharmony_ci		return;
2878c2ecf20Sopenharmony_ci	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
2888c2ecf20Sopenharmony_ci			hdr->args.lock_context, FMODE_READ) == -EIO)
2898c2ecf20Sopenharmony_ci		rpc_exit(task, -EIO); /* lost lock, terminate I/O */
2908c2ecf20Sopenharmony_ci}
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_cistatic void filelayout_read_call_done(struct rpc_task *task, void *data)
2938c2ecf20Sopenharmony_ci{
2948c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr = data;
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
2998c2ecf20Sopenharmony_ci	    task->tk_status == 0) {
3008c2ecf20Sopenharmony_ci		nfs41_sequence_done(task, &hdr->res.seq_res);
3018c2ecf20Sopenharmony_ci		return;
3028c2ecf20Sopenharmony_ci	}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	/* Note this may cause RPC to be resent */
3058c2ecf20Sopenharmony_ci	hdr->mds_ops->rpc_call_done(task, data);
3068c2ecf20Sopenharmony_ci}
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_cistatic void filelayout_read_count_stats(struct rpc_task *task, void *data)
3098c2ecf20Sopenharmony_ci{
3108c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr = data;
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci	rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
3138c2ecf20Sopenharmony_ci}
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_cistatic int filelayout_write_done_cb(struct rpc_task *task,
3168c2ecf20Sopenharmony_ci				struct nfs_pgio_header *hdr)
3178c2ecf20Sopenharmony_ci{
3188c2ecf20Sopenharmony_ci	int err;
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_ci	trace_nfs4_pnfs_write(hdr, task->tk_status);
3218c2ecf20Sopenharmony_ci	err = filelayout_async_handle_error(task, hdr->args.context->state,
3228c2ecf20Sopenharmony_ci					    hdr->ds_clp, hdr->lseg);
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_ci	switch (err) {
3258c2ecf20Sopenharmony_ci	case -NFS4ERR_RESET_TO_MDS:
3268c2ecf20Sopenharmony_ci		filelayout_reset_write(hdr);
3278c2ecf20Sopenharmony_ci		return task->tk_status;
3288c2ecf20Sopenharmony_ci	case -EAGAIN:
3298c2ecf20Sopenharmony_ci		rpc_restart_call_prepare(task);
3308c2ecf20Sopenharmony_ci		return -EAGAIN;
3318c2ecf20Sopenharmony_ci	}
3328c2ecf20Sopenharmony_ci
3338c2ecf20Sopenharmony_ci	filelayout_set_layoutcommit(hdr);
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci	/* zero out the fattr */
3368c2ecf20Sopenharmony_ci	hdr->fattr.valid = 0;
3378c2ecf20Sopenharmony_ci	if (task->tk_status >= 0)
3388c2ecf20Sopenharmony_ci		nfs_writeback_update_inode(hdr);
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci	return 0;
3418c2ecf20Sopenharmony_ci}
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_cistatic int filelayout_commit_done_cb(struct rpc_task *task,
3448c2ecf20Sopenharmony_ci				     struct nfs_commit_data *data)
3458c2ecf20Sopenharmony_ci{
3468c2ecf20Sopenharmony_ci	int err;
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	trace_nfs4_pnfs_commit_ds(data, task->tk_status);
3498c2ecf20Sopenharmony_ci	err = filelayout_async_handle_error(task, NULL, data->ds_clp,
3508c2ecf20Sopenharmony_ci					    data->lseg);
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci	switch (err) {
3538c2ecf20Sopenharmony_ci	case -NFS4ERR_RESET_TO_MDS:
3548c2ecf20Sopenharmony_ci		pnfs_generic_prepare_to_resend_writes(data);
3558c2ecf20Sopenharmony_ci		return -EAGAIN;
3568c2ecf20Sopenharmony_ci	case -EAGAIN:
3578c2ecf20Sopenharmony_ci		rpc_restart_call_prepare(task);
3588c2ecf20Sopenharmony_ci		return -EAGAIN;
3598c2ecf20Sopenharmony_ci	}
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci	pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci	return 0;
3648c2ecf20Sopenharmony_ci}
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_cistatic void filelayout_write_prepare(struct rpc_task *task, void *data)
3678c2ecf20Sopenharmony_ci{
3688c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr = data;
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci	if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
3718c2ecf20Sopenharmony_ci		rpc_exit(task, -EIO);
3728c2ecf20Sopenharmony_ci		return;
3738c2ecf20Sopenharmony_ci	}
3748c2ecf20Sopenharmony_ci	if (filelayout_reset_to_mds(hdr->lseg)) {
3758c2ecf20Sopenharmony_ci		dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
3768c2ecf20Sopenharmony_ci		filelayout_reset_write(hdr);
3778c2ecf20Sopenharmony_ci		rpc_exit(task, 0);
3788c2ecf20Sopenharmony_ci		return;
3798c2ecf20Sopenharmony_ci	}
3808c2ecf20Sopenharmony_ci	if (nfs4_setup_sequence(hdr->ds_clp,
3818c2ecf20Sopenharmony_ci			&hdr->args.seq_args,
3828c2ecf20Sopenharmony_ci			&hdr->res.seq_res,
3838c2ecf20Sopenharmony_ci			task))
3848c2ecf20Sopenharmony_ci		return;
3858c2ecf20Sopenharmony_ci	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
3868c2ecf20Sopenharmony_ci			hdr->args.lock_context, FMODE_WRITE) == -EIO)
3878c2ecf20Sopenharmony_ci		rpc_exit(task, -EIO); /* lost lock, terminate I/O */
3888c2ecf20Sopenharmony_ci}
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_cistatic void filelayout_write_call_done(struct rpc_task *task, void *data)
3918c2ecf20Sopenharmony_ci{
3928c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr = data;
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
3958c2ecf20Sopenharmony_ci	    task->tk_status == 0) {
3968c2ecf20Sopenharmony_ci		nfs41_sequence_done(task, &hdr->res.seq_res);
3978c2ecf20Sopenharmony_ci		return;
3988c2ecf20Sopenharmony_ci	}
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	/* Note this may cause RPC to be resent */
4018c2ecf20Sopenharmony_ci	hdr->mds_ops->rpc_call_done(task, data);
4028c2ecf20Sopenharmony_ci}
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_cistatic void filelayout_write_count_stats(struct rpc_task *task, void *data)
4058c2ecf20Sopenharmony_ci{
4068c2ecf20Sopenharmony_ci	struct nfs_pgio_header *hdr = data;
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci	rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
4098c2ecf20Sopenharmony_ci}
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_cistatic void filelayout_commit_prepare(struct rpc_task *task, void *data)
4128c2ecf20Sopenharmony_ci{
4138c2ecf20Sopenharmony_ci	struct nfs_commit_data *wdata = data;
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_ci	nfs4_setup_sequence(wdata->ds_clp,
4168c2ecf20Sopenharmony_ci			&wdata->args.seq_args,
4178c2ecf20Sopenharmony_ci			&wdata->res.seq_res,
4188c2ecf20Sopenharmony_ci			task);
4198c2ecf20Sopenharmony_ci}
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_cistatic void filelayout_commit_count_stats(struct rpc_task *task, void *data)
4228c2ecf20Sopenharmony_ci{
4238c2ecf20Sopenharmony_ci	struct nfs_commit_data *cdata = data;
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
4268c2ecf20Sopenharmony_ci}
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_cistatic const struct rpc_call_ops filelayout_read_call_ops = {
4298c2ecf20Sopenharmony_ci	.rpc_call_prepare = filelayout_read_prepare,
4308c2ecf20Sopenharmony_ci	.rpc_call_done = filelayout_read_call_done,
4318c2ecf20Sopenharmony_ci	.rpc_count_stats = filelayout_read_count_stats,
4328c2ecf20Sopenharmony_ci	.rpc_release = pnfs_generic_rw_release,
4338c2ecf20Sopenharmony_ci};
4348c2ecf20Sopenharmony_ci
4358c2ecf20Sopenharmony_cistatic const struct rpc_call_ops filelayout_write_call_ops = {
4368c2ecf20Sopenharmony_ci	.rpc_call_prepare = filelayout_write_prepare,
4378c2ecf20Sopenharmony_ci	.rpc_call_done = filelayout_write_call_done,
4388c2ecf20Sopenharmony_ci	.rpc_count_stats = filelayout_write_count_stats,
4398c2ecf20Sopenharmony_ci	.rpc_release = pnfs_generic_rw_release,
4408c2ecf20Sopenharmony_ci};
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_cistatic const struct rpc_call_ops filelayout_commit_call_ops = {
4438c2ecf20Sopenharmony_ci	.rpc_call_prepare = filelayout_commit_prepare,
4448c2ecf20Sopenharmony_ci	.rpc_call_done = pnfs_generic_write_commit_done,
4458c2ecf20Sopenharmony_ci	.rpc_count_stats = filelayout_commit_count_stats,
4468c2ecf20Sopenharmony_ci	.rpc_release = pnfs_generic_commit_release,
4478c2ecf20Sopenharmony_ci};
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_cistatic enum pnfs_try_status
4508c2ecf20Sopenharmony_cifilelayout_read_pagelist(struct nfs_pgio_header *hdr)
4518c2ecf20Sopenharmony_ci{
4528c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg = hdr->lseg;
4538c2ecf20Sopenharmony_ci	struct nfs4_pnfs_ds *ds;
4548c2ecf20Sopenharmony_ci	struct rpc_clnt *ds_clnt;
4558c2ecf20Sopenharmony_ci	loff_t offset = hdr->args.offset;
4568c2ecf20Sopenharmony_ci	u32 j, idx;
4578c2ecf20Sopenharmony_ci	struct nfs_fh *fh;
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci	dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
4608c2ecf20Sopenharmony_ci		__func__, hdr->inode->i_ino,
4618c2ecf20Sopenharmony_ci		hdr->args.pgbase, (size_t)hdr->args.count, offset);
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci	/* Retrieve the correct rpc_client for the byte range */
4648c2ecf20Sopenharmony_ci	j = nfs4_fl_calc_j_index(lseg, offset);
4658c2ecf20Sopenharmony_ci	idx = nfs4_fl_calc_ds_index(lseg, j);
4668c2ecf20Sopenharmony_ci	ds = nfs4_fl_prepare_ds(lseg, idx);
4678c2ecf20Sopenharmony_ci	if (!ds)
4688c2ecf20Sopenharmony_ci		return PNFS_NOT_ATTEMPTED;
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_ci	ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
4718c2ecf20Sopenharmony_ci	if (IS_ERR(ds_clnt))
4728c2ecf20Sopenharmony_ci		return PNFS_NOT_ATTEMPTED;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	dprintk("%s USE DS: %s cl_count %d\n", __func__,
4758c2ecf20Sopenharmony_ci		ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci	/* No multipath support. Use first DS */
4788c2ecf20Sopenharmony_ci	refcount_inc(&ds->ds_clp->cl_count);
4798c2ecf20Sopenharmony_ci	hdr->ds_clp = ds->ds_clp;
4808c2ecf20Sopenharmony_ci	hdr->ds_commit_idx = idx;
4818c2ecf20Sopenharmony_ci	fh = nfs4_fl_select_ds_fh(lseg, j);
4828c2ecf20Sopenharmony_ci	if (fh)
4838c2ecf20Sopenharmony_ci		hdr->args.fh = fh;
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_ci	hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
4868c2ecf20Sopenharmony_ci	hdr->mds_offset = offset;
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_ci	/* Perform an asynchronous read to ds */
4898c2ecf20Sopenharmony_ci	nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
4908c2ecf20Sopenharmony_ci			  NFS_PROTO(hdr->inode), &filelayout_read_call_ops,
4918c2ecf20Sopenharmony_ci			  0, RPC_TASK_SOFTCONN);
4928c2ecf20Sopenharmony_ci	return PNFS_ATTEMPTED;
4938c2ecf20Sopenharmony_ci}
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci/* Perform async writes. */
4968c2ecf20Sopenharmony_cistatic enum pnfs_try_status
4978c2ecf20Sopenharmony_cifilelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
4988c2ecf20Sopenharmony_ci{
4998c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg = hdr->lseg;
5008c2ecf20Sopenharmony_ci	struct nfs4_pnfs_ds *ds;
5018c2ecf20Sopenharmony_ci	struct rpc_clnt *ds_clnt;
5028c2ecf20Sopenharmony_ci	loff_t offset = hdr->args.offset;
5038c2ecf20Sopenharmony_ci	u32 j, idx;
5048c2ecf20Sopenharmony_ci	struct nfs_fh *fh;
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_ci	/* Retrieve the correct rpc_client for the byte range */
5078c2ecf20Sopenharmony_ci	j = nfs4_fl_calc_j_index(lseg, offset);
5088c2ecf20Sopenharmony_ci	idx = nfs4_fl_calc_ds_index(lseg, j);
5098c2ecf20Sopenharmony_ci	ds = nfs4_fl_prepare_ds(lseg, idx);
5108c2ecf20Sopenharmony_ci	if (!ds)
5118c2ecf20Sopenharmony_ci		return PNFS_NOT_ATTEMPTED;
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
5148c2ecf20Sopenharmony_ci	if (IS_ERR(ds_clnt))
5158c2ecf20Sopenharmony_ci		return PNFS_NOT_ATTEMPTED;
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci	dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n",
5188c2ecf20Sopenharmony_ci		__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
5198c2ecf20Sopenharmony_ci		offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	hdr->pgio_done_cb = filelayout_write_done_cb;
5228c2ecf20Sopenharmony_ci	refcount_inc(&ds->ds_clp->cl_count);
5238c2ecf20Sopenharmony_ci	hdr->ds_clp = ds->ds_clp;
5248c2ecf20Sopenharmony_ci	hdr->ds_commit_idx = idx;
5258c2ecf20Sopenharmony_ci	fh = nfs4_fl_select_ds_fh(lseg, j);
5268c2ecf20Sopenharmony_ci	if (fh)
5278c2ecf20Sopenharmony_ci		hdr->args.fh = fh;
5288c2ecf20Sopenharmony_ci	hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
5298c2ecf20Sopenharmony_ci
5308c2ecf20Sopenharmony_ci	/* Perform an asynchronous write */
5318c2ecf20Sopenharmony_ci	nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
5328c2ecf20Sopenharmony_ci			  NFS_PROTO(hdr->inode), &filelayout_write_call_ops,
5338c2ecf20Sopenharmony_ci			  sync, RPC_TASK_SOFTCONN);
5348c2ecf20Sopenharmony_ci	return PNFS_ATTEMPTED;
5358c2ecf20Sopenharmony_ci}
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_cistatic int
5388c2ecf20Sopenharmony_cifilelayout_check_deviceid(struct pnfs_layout_hdr *lo,
5398c2ecf20Sopenharmony_ci			  struct nfs4_filelayout_segment *fl,
5408c2ecf20Sopenharmony_ci			  gfp_t gfp_flags)
5418c2ecf20Sopenharmony_ci{
5428c2ecf20Sopenharmony_ci	struct nfs4_deviceid_node *d;
5438c2ecf20Sopenharmony_ci	struct nfs4_file_layout_dsaddr *dsaddr;
5448c2ecf20Sopenharmony_ci	int status = -EINVAL;
5458c2ecf20Sopenharmony_ci
5468c2ecf20Sopenharmony_ci	/* Is the deviceid already set? If so, we're good. */
5478c2ecf20Sopenharmony_ci	if (fl->dsaddr != NULL)
5488c2ecf20Sopenharmony_ci		return 0;
5498c2ecf20Sopenharmony_ci
5508c2ecf20Sopenharmony_ci	/* find and reference the deviceid */
5518c2ecf20Sopenharmony_ci	d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid,
5528c2ecf20Sopenharmony_ci			lo->plh_lc_cred, gfp_flags);
5538c2ecf20Sopenharmony_ci	if (d == NULL)
5548c2ecf20Sopenharmony_ci		goto out;
5558c2ecf20Sopenharmony_ci
5568c2ecf20Sopenharmony_ci	dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
5578c2ecf20Sopenharmony_ci	/* Found deviceid is unavailable */
5588c2ecf20Sopenharmony_ci	if (filelayout_test_devid_unavailable(&dsaddr->id_node))
5598c2ecf20Sopenharmony_ci		goto out_put;
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ci	if (fl->first_stripe_index >= dsaddr->stripe_count) {
5628c2ecf20Sopenharmony_ci		dprintk("%s Bad first_stripe_index %u\n",
5638c2ecf20Sopenharmony_ci				__func__, fl->first_stripe_index);
5648c2ecf20Sopenharmony_ci		goto out_put;
5658c2ecf20Sopenharmony_ci	}
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci	if ((fl->stripe_type == STRIPE_SPARSE &&
5688c2ecf20Sopenharmony_ci	    fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
5698c2ecf20Sopenharmony_ci	    (fl->stripe_type == STRIPE_DENSE &&
5708c2ecf20Sopenharmony_ci	    fl->num_fh != dsaddr->stripe_count)) {
5718c2ecf20Sopenharmony_ci		dprintk("%s num_fh %u not valid for given packing\n",
5728c2ecf20Sopenharmony_ci			__func__, fl->num_fh);
5738c2ecf20Sopenharmony_ci		goto out_put;
5748c2ecf20Sopenharmony_ci	}
5758c2ecf20Sopenharmony_ci	status = 0;
5768c2ecf20Sopenharmony_ci
5778c2ecf20Sopenharmony_ci	/*
5788c2ecf20Sopenharmony_ci	 * Atomic compare and xchange to ensure we don't scribble
5798c2ecf20Sopenharmony_ci	 * over a non-NULL pointer.
5808c2ecf20Sopenharmony_ci	 */
5818c2ecf20Sopenharmony_ci	if (cmpxchg(&fl->dsaddr, NULL, dsaddr) != NULL)
5828c2ecf20Sopenharmony_ci		goto out_put;
5838c2ecf20Sopenharmony_ciout:
5848c2ecf20Sopenharmony_ci	return status;
5858c2ecf20Sopenharmony_ciout_put:
5868c2ecf20Sopenharmony_ci	nfs4_fl_put_deviceid(dsaddr);
5878c2ecf20Sopenharmony_ci	goto out;
5888c2ecf20Sopenharmony_ci}
5898c2ecf20Sopenharmony_ci
5908c2ecf20Sopenharmony_ci/*
5918c2ecf20Sopenharmony_ci * filelayout_check_layout()
5928c2ecf20Sopenharmony_ci *
5938c2ecf20Sopenharmony_ci * Make sure layout segment parameters are sane WRT the device.
5948c2ecf20Sopenharmony_ci * At this point no generic layer initialization of the lseg has occurred,
5958c2ecf20Sopenharmony_ci * and nothing has been added to the layout_hdr cache.
5968c2ecf20Sopenharmony_ci *
5978c2ecf20Sopenharmony_ci */
5988c2ecf20Sopenharmony_cistatic int
5998c2ecf20Sopenharmony_cifilelayout_check_layout(struct pnfs_layout_hdr *lo,
6008c2ecf20Sopenharmony_ci			struct nfs4_filelayout_segment *fl,
6018c2ecf20Sopenharmony_ci			struct nfs4_layoutget_res *lgr,
6028c2ecf20Sopenharmony_ci			gfp_t gfp_flags)
6038c2ecf20Sopenharmony_ci{
6048c2ecf20Sopenharmony_ci	int status = -EINVAL;
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_ci	dprintk("--> %s\n", __func__);
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ci	/* FIXME: remove this check when layout segment support is added */
6098c2ecf20Sopenharmony_ci	if (lgr->range.offset != 0 ||
6108c2ecf20Sopenharmony_ci	    lgr->range.length != NFS4_MAX_UINT64) {
6118c2ecf20Sopenharmony_ci		dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
6128c2ecf20Sopenharmony_ci			__func__);
6138c2ecf20Sopenharmony_ci		goto out;
6148c2ecf20Sopenharmony_ci	}
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci	if (fl->pattern_offset > lgr->range.offset) {
6178c2ecf20Sopenharmony_ci		dprintk("%s pattern_offset %lld too large\n",
6188c2ecf20Sopenharmony_ci				__func__, fl->pattern_offset);
6198c2ecf20Sopenharmony_ci		goto out;
6208c2ecf20Sopenharmony_ci	}
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci	if (!fl->stripe_unit) {
6238c2ecf20Sopenharmony_ci		dprintk("%s Invalid stripe unit (%u)\n",
6248c2ecf20Sopenharmony_ci			__func__, fl->stripe_unit);
6258c2ecf20Sopenharmony_ci		goto out;
6268c2ecf20Sopenharmony_ci	}
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_ci	status = 0;
6298c2ecf20Sopenharmony_ciout:
6308c2ecf20Sopenharmony_ci	dprintk("--> %s returns %d\n", __func__, status);
6318c2ecf20Sopenharmony_ci	return status;
6328c2ecf20Sopenharmony_ci}
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_cistatic void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
6358c2ecf20Sopenharmony_ci{
6368c2ecf20Sopenharmony_ci	int i;
6378c2ecf20Sopenharmony_ci
6388c2ecf20Sopenharmony_ci	if (fl->fh_array) {
6398c2ecf20Sopenharmony_ci		for (i = 0; i < fl->num_fh; i++) {
6408c2ecf20Sopenharmony_ci			if (!fl->fh_array[i])
6418c2ecf20Sopenharmony_ci				break;
6428c2ecf20Sopenharmony_ci			kfree(fl->fh_array[i]);
6438c2ecf20Sopenharmony_ci		}
6448c2ecf20Sopenharmony_ci		kfree(fl->fh_array);
6458c2ecf20Sopenharmony_ci	}
6468c2ecf20Sopenharmony_ci	kfree(fl);
6478c2ecf20Sopenharmony_ci}
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_cistatic int
6508c2ecf20Sopenharmony_cifilelayout_decode_layout(struct pnfs_layout_hdr *flo,
6518c2ecf20Sopenharmony_ci			 struct nfs4_filelayout_segment *fl,
6528c2ecf20Sopenharmony_ci			 struct nfs4_layoutget_res *lgr,
6538c2ecf20Sopenharmony_ci			 gfp_t gfp_flags)
6548c2ecf20Sopenharmony_ci{
6558c2ecf20Sopenharmony_ci	struct xdr_stream stream;
6568c2ecf20Sopenharmony_ci	struct xdr_buf buf;
6578c2ecf20Sopenharmony_ci	struct page *scratch;
6588c2ecf20Sopenharmony_ci	__be32 *p;
6598c2ecf20Sopenharmony_ci	uint32_t nfl_util;
6608c2ecf20Sopenharmony_ci	int i;
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci	dprintk("%s: set_layout_map Begin\n", __func__);
6638c2ecf20Sopenharmony_ci
6648c2ecf20Sopenharmony_ci	scratch = alloc_page(gfp_flags);
6658c2ecf20Sopenharmony_ci	if (!scratch)
6668c2ecf20Sopenharmony_ci		return -ENOMEM;
6678c2ecf20Sopenharmony_ci
6688c2ecf20Sopenharmony_ci	xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
6698c2ecf20Sopenharmony_ci	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
6708c2ecf20Sopenharmony_ci
6718c2ecf20Sopenharmony_ci	/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
6728c2ecf20Sopenharmony_ci	 * num_fh (4) */
6738c2ecf20Sopenharmony_ci	p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20);
6748c2ecf20Sopenharmony_ci	if (unlikely(!p))
6758c2ecf20Sopenharmony_ci		goto out_err;
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ci	memcpy(&fl->deviceid, p, sizeof(fl->deviceid));
6788c2ecf20Sopenharmony_ci	p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
6798c2ecf20Sopenharmony_ci	nfs4_print_deviceid(&fl->deviceid);
6808c2ecf20Sopenharmony_ci
6818c2ecf20Sopenharmony_ci	nfl_util = be32_to_cpup(p++);
6828c2ecf20Sopenharmony_ci	if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
6838c2ecf20Sopenharmony_ci		fl->commit_through_mds = 1;
6848c2ecf20Sopenharmony_ci	if (nfl_util & NFL4_UFLG_DENSE)
6858c2ecf20Sopenharmony_ci		fl->stripe_type = STRIPE_DENSE;
6868c2ecf20Sopenharmony_ci	else
6878c2ecf20Sopenharmony_ci		fl->stripe_type = STRIPE_SPARSE;
6888c2ecf20Sopenharmony_ci	fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
6898c2ecf20Sopenharmony_ci
6908c2ecf20Sopenharmony_ci	fl->first_stripe_index = be32_to_cpup(p++);
6918c2ecf20Sopenharmony_ci	p = xdr_decode_hyper(p, &fl->pattern_offset);
6928c2ecf20Sopenharmony_ci	fl->num_fh = be32_to_cpup(p++);
6938c2ecf20Sopenharmony_ci
6948c2ecf20Sopenharmony_ci	dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n",
6958c2ecf20Sopenharmony_ci		__func__, nfl_util, fl->num_fh, fl->first_stripe_index,
6968c2ecf20Sopenharmony_ci		fl->pattern_offset);
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_ci	/* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
6998c2ecf20Sopenharmony_ci	 * Futher checking is done in filelayout_check_layout */
7008c2ecf20Sopenharmony_ci	if (fl->num_fh >
7018c2ecf20Sopenharmony_ci	    max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT))
7028c2ecf20Sopenharmony_ci		goto out_err;
7038c2ecf20Sopenharmony_ci
7048c2ecf20Sopenharmony_ci	if (fl->num_fh > 0) {
7058c2ecf20Sopenharmony_ci		fl->fh_array = kcalloc(fl->num_fh, sizeof(fl->fh_array[0]),
7068c2ecf20Sopenharmony_ci				       gfp_flags);
7078c2ecf20Sopenharmony_ci		if (!fl->fh_array)
7088c2ecf20Sopenharmony_ci			goto out_err;
7098c2ecf20Sopenharmony_ci	}
7108c2ecf20Sopenharmony_ci
7118c2ecf20Sopenharmony_ci	for (i = 0; i < fl->num_fh; i++) {
7128c2ecf20Sopenharmony_ci		/* Do we want to use a mempool here? */
7138c2ecf20Sopenharmony_ci		fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
7148c2ecf20Sopenharmony_ci		if (!fl->fh_array[i])
7158c2ecf20Sopenharmony_ci			goto out_err;
7168c2ecf20Sopenharmony_ci
7178c2ecf20Sopenharmony_ci		p = xdr_inline_decode(&stream, 4);
7188c2ecf20Sopenharmony_ci		if (unlikely(!p))
7198c2ecf20Sopenharmony_ci			goto out_err;
7208c2ecf20Sopenharmony_ci		fl->fh_array[i]->size = be32_to_cpup(p++);
7218c2ecf20Sopenharmony_ci		if (fl->fh_array[i]->size > NFS_MAXFHSIZE) {
7228c2ecf20Sopenharmony_ci			printk(KERN_ERR "NFS: Too big fh %d received %d\n",
7238c2ecf20Sopenharmony_ci			       i, fl->fh_array[i]->size);
7248c2ecf20Sopenharmony_ci			goto out_err;
7258c2ecf20Sopenharmony_ci		}
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci		p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
7288c2ecf20Sopenharmony_ci		if (unlikely(!p))
7298c2ecf20Sopenharmony_ci			goto out_err;
7308c2ecf20Sopenharmony_ci		memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
7318c2ecf20Sopenharmony_ci		dprintk("DEBUG: %s: fh len %d\n", __func__,
7328c2ecf20Sopenharmony_ci			fl->fh_array[i]->size);
7338c2ecf20Sopenharmony_ci	}
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_ci	__free_page(scratch);
7368c2ecf20Sopenharmony_ci	return 0;
7378c2ecf20Sopenharmony_ci
7388c2ecf20Sopenharmony_ciout_err:
7398c2ecf20Sopenharmony_ci	__free_page(scratch);
7408c2ecf20Sopenharmony_ci	return -EIO;
7418c2ecf20Sopenharmony_ci}
7428c2ecf20Sopenharmony_ci
7438c2ecf20Sopenharmony_cistatic void
7448c2ecf20Sopenharmony_cifilelayout_free_lseg(struct pnfs_layout_segment *lseg)
7458c2ecf20Sopenharmony_ci{
7468c2ecf20Sopenharmony_ci	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
7478c2ecf20Sopenharmony_ci
7488c2ecf20Sopenharmony_ci	dprintk("--> %s\n", __func__);
7498c2ecf20Sopenharmony_ci	if (fl->dsaddr != NULL)
7508c2ecf20Sopenharmony_ci		nfs4_fl_put_deviceid(fl->dsaddr);
7518c2ecf20Sopenharmony_ci	/* This assumes a single RW lseg */
7528c2ecf20Sopenharmony_ci	if (lseg->pls_range.iomode == IOMODE_RW) {
7538c2ecf20Sopenharmony_ci		struct nfs4_filelayout *flo;
7548c2ecf20Sopenharmony_ci		struct inode *inode;
7558c2ecf20Sopenharmony_ci
7568c2ecf20Sopenharmony_ci		flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
7578c2ecf20Sopenharmony_ci		inode = flo->generic_hdr.plh_inode;
7588c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
7598c2ecf20Sopenharmony_ci		pnfs_generic_ds_cinfo_release_lseg(&flo->commit_info, lseg);
7608c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
7618c2ecf20Sopenharmony_ci	}
7628c2ecf20Sopenharmony_ci	_filelayout_free_lseg(fl);
7638c2ecf20Sopenharmony_ci}
7648c2ecf20Sopenharmony_ci
7658c2ecf20Sopenharmony_cistatic struct pnfs_layout_segment *
7668c2ecf20Sopenharmony_cifilelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
7678c2ecf20Sopenharmony_ci		      struct nfs4_layoutget_res *lgr,
7688c2ecf20Sopenharmony_ci		      gfp_t gfp_flags)
7698c2ecf20Sopenharmony_ci{
7708c2ecf20Sopenharmony_ci	struct nfs4_filelayout_segment *fl;
7718c2ecf20Sopenharmony_ci	int rc;
7728c2ecf20Sopenharmony_ci
7738c2ecf20Sopenharmony_ci	dprintk("--> %s\n", __func__);
7748c2ecf20Sopenharmony_ci	fl = kzalloc(sizeof(*fl), gfp_flags);
7758c2ecf20Sopenharmony_ci	if (!fl)
7768c2ecf20Sopenharmony_ci		return NULL;
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci	rc = filelayout_decode_layout(layoutid, fl, lgr, gfp_flags);
7798c2ecf20Sopenharmony_ci	if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, gfp_flags)) {
7808c2ecf20Sopenharmony_ci		_filelayout_free_lseg(fl);
7818c2ecf20Sopenharmony_ci		return NULL;
7828c2ecf20Sopenharmony_ci	}
7838c2ecf20Sopenharmony_ci	return &fl->generic_hdr;
7848c2ecf20Sopenharmony_ci}
7858c2ecf20Sopenharmony_ci
7868c2ecf20Sopenharmony_cistatic bool
7878c2ecf20Sopenharmony_cifilelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg)
7888c2ecf20Sopenharmony_ci{
7898c2ecf20Sopenharmony_ci	return flseg->num_fh > 1;
7908c2ecf20Sopenharmony_ci}
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci/*
7938c2ecf20Sopenharmony_ci * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
7948c2ecf20Sopenharmony_ci *
7958c2ecf20Sopenharmony_ci * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
7968c2ecf20Sopenharmony_ci * of bytes (maximum @req->wb_bytes) that can be coalesced.
7978c2ecf20Sopenharmony_ci */
7988c2ecf20Sopenharmony_cistatic size_t
7998c2ecf20Sopenharmony_cifilelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
8008c2ecf20Sopenharmony_ci		   struct nfs_page *req)
8018c2ecf20Sopenharmony_ci{
8028c2ecf20Sopenharmony_ci	unsigned int size;
8038c2ecf20Sopenharmony_ci	u64 p_stripe, r_stripe;
8048c2ecf20Sopenharmony_ci	u32 stripe_offset;
8058c2ecf20Sopenharmony_ci	u64 segment_offset = pgio->pg_lseg->pls_range.offset;
8068c2ecf20Sopenharmony_ci	u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
8078c2ecf20Sopenharmony_ci
8088c2ecf20Sopenharmony_ci	/* calls nfs_generic_pg_test */
8098c2ecf20Sopenharmony_ci	size = pnfs_generic_pg_test(pgio, prev, req);
8108c2ecf20Sopenharmony_ci	if (!size)
8118c2ecf20Sopenharmony_ci		return 0;
8128c2ecf20Sopenharmony_ci	else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg)))
8138c2ecf20Sopenharmony_ci		return size;
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci	/* see if req and prev are in the same stripe */
8168c2ecf20Sopenharmony_ci	if (prev) {
8178c2ecf20Sopenharmony_ci		p_stripe = (u64)req_offset(prev) - segment_offset;
8188c2ecf20Sopenharmony_ci		r_stripe = (u64)req_offset(req) - segment_offset;
8198c2ecf20Sopenharmony_ci		do_div(p_stripe, stripe_unit);
8208c2ecf20Sopenharmony_ci		do_div(r_stripe, stripe_unit);
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci		if (p_stripe != r_stripe)
8238c2ecf20Sopenharmony_ci			return 0;
8248c2ecf20Sopenharmony_ci	}
8258c2ecf20Sopenharmony_ci
8268c2ecf20Sopenharmony_ci	/* calculate remaining bytes in the current stripe */
8278c2ecf20Sopenharmony_ci	div_u64_rem((u64)req_offset(req) - segment_offset,
8288c2ecf20Sopenharmony_ci			stripe_unit,
8298c2ecf20Sopenharmony_ci			&stripe_offset);
8308c2ecf20Sopenharmony_ci	WARN_ON_ONCE(stripe_offset > stripe_unit);
8318c2ecf20Sopenharmony_ci	if (stripe_offset >= stripe_unit)
8328c2ecf20Sopenharmony_ci		return 0;
8338c2ecf20Sopenharmony_ci	return min(stripe_unit - (unsigned int)stripe_offset, size);
8348c2ecf20Sopenharmony_ci}
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_cistatic struct pnfs_layout_segment *
8378c2ecf20Sopenharmony_cifl_pnfs_update_layout(struct inode *ino,
8388c2ecf20Sopenharmony_ci		      struct nfs_open_context *ctx,
8398c2ecf20Sopenharmony_ci		      loff_t pos,
8408c2ecf20Sopenharmony_ci		      u64 count,
8418c2ecf20Sopenharmony_ci		      enum pnfs_iomode iomode,
8428c2ecf20Sopenharmony_ci		      bool strict_iomode,
8438c2ecf20Sopenharmony_ci		      gfp_t gfp_flags)
8448c2ecf20Sopenharmony_ci{
8458c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg = NULL;
8468c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *lo;
8478c2ecf20Sopenharmony_ci	struct nfs4_filelayout_segment *fl;
8488c2ecf20Sopenharmony_ci	int status;
8498c2ecf20Sopenharmony_ci
8508c2ecf20Sopenharmony_ci	lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
8518c2ecf20Sopenharmony_ci				  gfp_flags);
8528c2ecf20Sopenharmony_ci	if (IS_ERR_OR_NULL(lseg))
8538c2ecf20Sopenharmony_ci		goto out;
8548c2ecf20Sopenharmony_ci
8558c2ecf20Sopenharmony_ci	lo = NFS_I(ino)->layout;
8568c2ecf20Sopenharmony_ci	fl = FILELAYOUT_LSEG(lseg);
8578c2ecf20Sopenharmony_ci
8588c2ecf20Sopenharmony_ci	status = filelayout_check_deviceid(lo, fl, gfp_flags);
8598c2ecf20Sopenharmony_ci	if (status) {
8608c2ecf20Sopenharmony_ci		pnfs_put_lseg(lseg);
8618c2ecf20Sopenharmony_ci		lseg = NULL;
8628c2ecf20Sopenharmony_ci	}
8638c2ecf20Sopenharmony_ciout:
8648c2ecf20Sopenharmony_ci	return lseg;
8658c2ecf20Sopenharmony_ci}
8668c2ecf20Sopenharmony_ci
8678c2ecf20Sopenharmony_cistatic void
8688c2ecf20Sopenharmony_cifilelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
8698c2ecf20Sopenharmony_ci			struct nfs_page *req)
8708c2ecf20Sopenharmony_ci{
8718c2ecf20Sopenharmony_ci	pnfs_generic_pg_check_layout(pgio);
8728c2ecf20Sopenharmony_ci	if (!pgio->pg_lseg) {
8738c2ecf20Sopenharmony_ci		pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
8748c2ecf20Sopenharmony_ci						      nfs_req_openctx(req),
8758c2ecf20Sopenharmony_ci						      0,
8768c2ecf20Sopenharmony_ci						      NFS4_MAX_UINT64,
8778c2ecf20Sopenharmony_ci						      IOMODE_READ,
8788c2ecf20Sopenharmony_ci						      false,
8798c2ecf20Sopenharmony_ci						      GFP_KERNEL);
8808c2ecf20Sopenharmony_ci		if (IS_ERR(pgio->pg_lseg)) {
8818c2ecf20Sopenharmony_ci			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
8828c2ecf20Sopenharmony_ci			pgio->pg_lseg = NULL;
8838c2ecf20Sopenharmony_ci			return;
8848c2ecf20Sopenharmony_ci		}
8858c2ecf20Sopenharmony_ci	}
8868c2ecf20Sopenharmony_ci	/* If no lseg, fall back to read through mds */
8878c2ecf20Sopenharmony_ci	if (pgio->pg_lseg == NULL)
8888c2ecf20Sopenharmony_ci		nfs_pageio_reset_read_mds(pgio);
8898c2ecf20Sopenharmony_ci}
8908c2ecf20Sopenharmony_ci
8918c2ecf20Sopenharmony_cistatic void
8928c2ecf20Sopenharmony_cifilelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
8938c2ecf20Sopenharmony_ci			 struct nfs_page *req)
8948c2ecf20Sopenharmony_ci{
8958c2ecf20Sopenharmony_ci	pnfs_generic_pg_check_layout(pgio);
8968c2ecf20Sopenharmony_ci	if (!pgio->pg_lseg) {
8978c2ecf20Sopenharmony_ci		pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
8988c2ecf20Sopenharmony_ci						      nfs_req_openctx(req),
8998c2ecf20Sopenharmony_ci						      0,
9008c2ecf20Sopenharmony_ci						      NFS4_MAX_UINT64,
9018c2ecf20Sopenharmony_ci						      IOMODE_RW,
9028c2ecf20Sopenharmony_ci						      false,
9038c2ecf20Sopenharmony_ci						      GFP_NOFS);
9048c2ecf20Sopenharmony_ci		if (IS_ERR(pgio->pg_lseg)) {
9058c2ecf20Sopenharmony_ci			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
9068c2ecf20Sopenharmony_ci			pgio->pg_lseg = NULL;
9078c2ecf20Sopenharmony_ci			return;
9088c2ecf20Sopenharmony_ci		}
9098c2ecf20Sopenharmony_ci	}
9108c2ecf20Sopenharmony_ci
9118c2ecf20Sopenharmony_ci	/* If no lseg, fall back to write through mds */
9128c2ecf20Sopenharmony_ci	if (pgio->pg_lseg == NULL)
9138c2ecf20Sopenharmony_ci		nfs_pageio_reset_write_mds(pgio);
9148c2ecf20Sopenharmony_ci}
9158c2ecf20Sopenharmony_ci
9168c2ecf20Sopenharmony_cistatic const struct nfs_pageio_ops filelayout_pg_read_ops = {
9178c2ecf20Sopenharmony_ci	.pg_init = filelayout_pg_init_read,
9188c2ecf20Sopenharmony_ci	.pg_test = filelayout_pg_test,
9198c2ecf20Sopenharmony_ci	.pg_doio = pnfs_generic_pg_readpages,
9208c2ecf20Sopenharmony_ci	.pg_cleanup = pnfs_generic_pg_cleanup,
9218c2ecf20Sopenharmony_ci};
9228c2ecf20Sopenharmony_ci
9238c2ecf20Sopenharmony_cistatic const struct nfs_pageio_ops filelayout_pg_write_ops = {
9248c2ecf20Sopenharmony_ci	.pg_init = filelayout_pg_init_write,
9258c2ecf20Sopenharmony_ci	.pg_test = filelayout_pg_test,
9268c2ecf20Sopenharmony_ci	.pg_doio = pnfs_generic_pg_writepages,
9278c2ecf20Sopenharmony_ci	.pg_cleanup = pnfs_generic_pg_cleanup,
9288c2ecf20Sopenharmony_ci};
9298c2ecf20Sopenharmony_ci
9308c2ecf20Sopenharmony_cistatic u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
9318c2ecf20Sopenharmony_ci{
9328c2ecf20Sopenharmony_ci	if (fl->stripe_type == STRIPE_SPARSE)
9338c2ecf20Sopenharmony_ci		return nfs4_fl_calc_ds_index(&fl->generic_hdr, j);
9348c2ecf20Sopenharmony_ci	else
9358c2ecf20Sopenharmony_ci		return j;
9368c2ecf20Sopenharmony_ci}
9378c2ecf20Sopenharmony_ci
9388c2ecf20Sopenharmony_cistatic void
9398c2ecf20Sopenharmony_cifilelayout_mark_request_commit(struct nfs_page *req,
9408c2ecf20Sopenharmony_ci			       struct pnfs_layout_segment *lseg,
9418c2ecf20Sopenharmony_ci			       struct nfs_commit_info *cinfo,
9428c2ecf20Sopenharmony_ci			       u32 ds_commit_idx)
9438c2ecf20Sopenharmony_ci
9448c2ecf20Sopenharmony_ci{
9458c2ecf20Sopenharmony_ci	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
9468c2ecf20Sopenharmony_ci	u32 i, j;
9478c2ecf20Sopenharmony_ci
9488c2ecf20Sopenharmony_ci	if (fl->commit_through_mds) {
9498c2ecf20Sopenharmony_ci		nfs_request_add_commit_list(req, cinfo);
9508c2ecf20Sopenharmony_ci	} else {
9518c2ecf20Sopenharmony_ci		/* Note that we are calling nfs4_fl_calc_j_index on each page
9528c2ecf20Sopenharmony_ci		 * that ends up being committed to a data server.  An attractive
9538c2ecf20Sopenharmony_ci		 * alternative is to add a field to nfs_write_data and nfs_page
9548c2ecf20Sopenharmony_ci		 * to store the value calculated in filelayout_write_pagelist
9558c2ecf20Sopenharmony_ci		 * and just use that here.
9568c2ecf20Sopenharmony_ci		 */
9578c2ecf20Sopenharmony_ci		j = nfs4_fl_calc_j_index(lseg, req_offset(req));
9588c2ecf20Sopenharmony_ci		i = select_bucket_index(fl, j);
9598c2ecf20Sopenharmony_ci		pnfs_layout_mark_request_commit(req, lseg, cinfo, i);
9608c2ecf20Sopenharmony_ci	}
9618c2ecf20Sopenharmony_ci}
9628c2ecf20Sopenharmony_ci
9638c2ecf20Sopenharmony_cistatic u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
9648c2ecf20Sopenharmony_ci{
9658c2ecf20Sopenharmony_ci	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
9668c2ecf20Sopenharmony_ci
9678c2ecf20Sopenharmony_ci	if (flseg->stripe_type == STRIPE_SPARSE)
9688c2ecf20Sopenharmony_ci		return i;
9698c2ecf20Sopenharmony_ci	else
9708c2ecf20Sopenharmony_ci		return nfs4_fl_calc_ds_index(lseg, i);
9718c2ecf20Sopenharmony_ci}
9728c2ecf20Sopenharmony_ci
9738c2ecf20Sopenharmony_cistatic struct nfs_fh *
9748c2ecf20Sopenharmony_ciselect_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
9758c2ecf20Sopenharmony_ci{
9768c2ecf20Sopenharmony_ci	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
9778c2ecf20Sopenharmony_ci
9788c2ecf20Sopenharmony_ci	if (flseg->stripe_type == STRIPE_SPARSE) {
9798c2ecf20Sopenharmony_ci		if (flseg->num_fh == 1)
9808c2ecf20Sopenharmony_ci			i = 0;
9818c2ecf20Sopenharmony_ci		else if (flseg->num_fh == 0)
9828c2ecf20Sopenharmony_ci			/* Use the MDS OPEN fh set in nfs_read_rpcsetup */
9838c2ecf20Sopenharmony_ci			return NULL;
9848c2ecf20Sopenharmony_ci	}
9858c2ecf20Sopenharmony_ci	return flseg->fh_array[i];
9868c2ecf20Sopenharmony_ci}
9878c2ecf20Sopenharmony_ci
9888c2ecf20Sopenharmony_cistatic int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
9898c2ecf20Sopenharmony_ci{
9908c2ecf20Sopenharmony_ci	struct pnfs_layout_segment *lseg = data->lseg;
9918c2ecf20Sopenharmony_ci	struct nfs4_pnfs_ds *ds;
9928c2ecf20Sopenharmony_ci	struct rpc_clnt *ds_clnt;
9938c2ecf20Sopenharmony_ci	u32 idx;
9948c2ecf20Sopenharmony_ci	struct nfs_fh *fh;
9958c2ecf20Sopenharmony_ci
9968c2ecf20Sopenharmony_ci	idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
9978c2ecf20Sopenharmony_ci	ds = nfs4_fl_prepare_ds(lseg, idx);
9988c2ecf20Sopenharmony_ci	if (!ds)
9998c2ecf20Sopenharmony_ci		goto out_err;
10008c2ecf20Sopenharmony_ci
10018c2ecf20Sopenharmony_ci	ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, data->inode);
10028c2ecf20Sopenharmony_ci	if (IS_ERR(ds_clnt))
10038c2ecf20Sopenharmony_ci		goto out_err;
10048c2ecf20Sopenharmony_ci
10058c2ecf20Sopenharmony_ci	dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
10068c2ecf20Sopenharmony_ci		data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count));
10078c2ecf20Sopenharmony_ci	data->commit_done_cb = filelayout_commit_done_cb;
10088c2ecf20Sopenharmony_ci	refcount_inc(&ds->ds_clp->cl_count);
10098c2ecf20Sopenharmony_ci	data->ds_clp = ds->ds_clp;
10108c2ecf20Sopenharmony_ci	fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
10118c2ecf20Sopenharmony_ci	if (fh)
10128c2ecf20Sopenharmony_ci		data->args.fh = fh;
10138c2ecf20Sopenharmony_ci	return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode),
10148c2ecf20Sopenharmony_ci				   &filelayout_commit_call_ops, how,
10158c2ecf20Sopenharmony_ci				   RPC_TASK_SOFTCONN);
10168c2ecf20Sopenharmony_ciout_err:
10178c2ecf20Sopenharmony_ci	pnfs_generic_prepare_to_resend_writes(data);
10188c2ecf20Sopenharmony_ci	pnfs_generic_commit_release(data);
10198c2ecf20Sopenharmony_ci	return -EAGAIN;
10208c2ecf20Sopenharmony_ci}
10218c2ecf20Sopenharmony_ci
10228c2ecf20Sopenharmony_cistatic int
10238c2ecf20Sopenharmony_cifilelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
10248c2ecf20Sopenharmony_ci			   int how, struct nfs_commit_info *cinfo)
10258c2ecf20Sopenharmony_ci{
10268c2ecf20Sopenharmony_ci	return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo,
10278c2ecf20Sopenharmony_ci					    filelayout_initiate_commit);
10288c2ecf20Sopenharmony_ci}
10298c2ecf20Sopenharmony_ci
10308c2ecf20Sopenharmony_cistatic struct nfs4_deviceid_node *
10318c2ecf20Sopenharmony_cifilelayout_alloc_deviceid_node(struct nfs_server *server,
10328c2ecf20Sopenharmony_ci		struct pnfs_device *pdev, gfp_t gfp_flags)
10338c2ecf20Sopenharmony_ci{
10348c2ecf20Sopenharmony_ci	struct nfs4_file_layout_dsaddr *dsaddr;
10358c2ecf20Sopenharmony_ci
10368c2ecf20Sopenharmony_ci	dsaddr = nfs4_fl_alloc_deviceid_node(server, pdev, gfp_flags);
10378c2ecf20Sopenharmony_ci	if (!dsaddr)
10388c2ecf20Sopenharmony_ci		return NULL;
10398c2ecf20Sopenharmony_ci	return &dsaddr->id_node;
10408c2ecf20Sopenharmony_ci}
10418c2ecf20Sopenharmony_ci
10428c2ecf20Sopenharmony_cistatic void
10438c2ecf20Sopenharmony_cifilelayout_free_deviceid_node(struct nfs4_deviceid_node *d)
10448c2ecf20Sopenharmony_ci{
10458c2ecf20Sopenharmony_ci	nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
10468c2ecf20Sopenharmony_ci}
10478c2ecf20Sopenharmony_ci
10488c2ecf20Sopenharmony_cistatic struct pnfs_layout_hdr *
10498c2ecf20Sopenharmony_cifilelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
10508c2ecf20Sopenharmony_ci{
10518c2ecf20Sopenharmony_ci	struct nfs4_filelayout *flo;
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci	flo = kzalloc(sizeof(*flo), gfp_flags);
10548c2ecf20Sopenharmony_ci	if (flo == NULL)
10558c2ecf20Sopenharmony_ci		return NULL;
10568c2ecf20Sopenharmony_ci	pnfs_init_ds_commit_info(&flo->commit_info);
10578c2ecf20Sopenharmony_ci	flo->commit_info.ops = &filelayout_commit_ops;
10588c2ecf20Sopenharmony_ci	return &flo->generic_hdr;
10598c2ecf20Sopenharmony_ci}
10608c2ecf20Sopenharmony_ci
10618c2ecf20Sopenharmony_cistatic void
10628c2ecf20Sopenharmony_cifilelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
10638c2ecf20Sopenharmony_ci{
10648c2ecf20Sopenharmony_ci	kfree_rcu(FILELAYOUT_FROM_HDR(lo), generic_hdr.plh_rcu);
10658c2ecf20Sopenharmony_ci}
10668c2ecf20Sopenharmony_ci
10678c2ecf20Sopenharmony_cistatic struct pnfs_ds_commit_info *
10688c2ecf20Sopenharmony_cifilelayout_get_ds_info(struct inode *inode)
10698c2ecf20Sopenharmony_ci{
10708c2ecf20Sopenharmony_ci	struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
10718c2ecf20Sopenharmony_ci
10728c2ecf20Sopenharmony_ci	if (layout == NULL)
10738c2ecf20Sopenharmony_ci		return NULL;
10748c2ecf20Sopenharmony_ci	else
10758c2ecf20Sopenharmony_ci		return &FILELAYOUT_FROM_HDR(layout)->commit_info;
10768c2ecf20Sopenharmony_ci}
10778c2ecf20Sopenharmony_ci
10788c2ecf20Sopenharmony_cistatic void
10798c2ecf20Sopenharmony_cifilelayout_setup_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
10808c2ecf20Sopenharmony_ci		struct pnfs_layout_segment *lseg)
10818c2ecf20Sopenharmony_ci{
10828c2ecf20Sopenharmony_ci	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
10838c2ecf20Sopenharmony_ci	struct inode *inode = lseg->pls_layout->plh_inode;
10848c2ecf20Sopenharmony_ci	struct pnfs_commit_array *array, *new;
10858c2ecf20Sopenharmony_ci	unsigned int size = (fl->stripe_type == STRIPE_SPARSE) ?
10868c2ecf20Sopenharmony_ci		fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
10878c2ecf20Sopenharmony_ci
10888c2ecf20Sopenharmony_ci	new = pnfs_alloc_commit_array(size, GFP_NOIO);
10898c2ecf20Sopenharmony_ci	if (new) {
10908c2ecf20Sopenharmony_ci		spin_lock(&inode->i_lock);
10918c2ecf20Sopenharmony_ci		array = pnfs_add_commit_array(fl_cinfo, new, lseg);
10928c2ecf20Sopenharmony_ci		spin_unlock(&inode->i_lock);
10938c2ecf20Sopenharmony_ci		if (array != new)
10948c2ecf20Sopenharmony_ci			pnfs_free_commit_array(new);
10958c2ecf20Sopenharmony_ci	}
10968c2ecf20Sopenharmony_ci}
10978c2ecf20Sopenharmony_ci
10988c2ecf20Sopenharmony_cistatic void
10998c2ecf20Sopenharmony_cifilelayout_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
11008c2ecf20Sopenharmony_ci		struct inode *inode)
11018c2ecf20Sopenharmony_ci{
11028c2ecf20Sopenharmony_ci	spin_lock(&inode->i_lock);
11038c2ecf20Sopenharmony_ci	pnfs_generic_ds_cinfo_destroy(fl_cinfo);
11048c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_lock);
11058c2ecf20Sopenharmony_ci}
11068c2ecf20Sopenharmony_ci
11078c2ecf20Sopenharmony_cistatic const struct pnfs_commit_ops filelayout_commit_ops = {
11088c2ecf20Sopenharmony_ci	.setup_ds_info		= filelayout_setup_ds_info,
11098c2ecf20Sopenharmony_ci	.release_ds_info	= filelayout_release_ds_info,
11108c2ecf20Sopenharmony_ci	.mark_request_commit	= filelayout_mark_request_commit,
11118c2ecf20Sopenharmony_ci	.clear_request_commit	= pnfs_generic_clear_request_commit,
11128c2ecf20Sopenharmony_ci	.scan_commit_lists	= pnfs_generic_scan_commit_lists,
11138c2ecf20Sopenharmony_ci	.recover_commit_reqs	= pnfs_generic_recover_commit_reqs,
11148c2ecf20Sopenharmony_ci	.search_commit_reqs	= pnfs_generic_search_commit_reqs,
11158c2ecf20Sopenharmony_ci	.commit_pagelist	= filelayout_commit_pagelist,
11168c2ecf20Sopenharmony_ci};
11178c2ecf20Sopenharmony_ci
11188c2ecf20Sopenharmony_cistatic struct pnfs_layoutdriver_type filelayout_type = {
11198c2ecf20Sopenharmony_ci	.id			= LAYOUT_NFSV4_1_FILES,
11208c2ecf20Sopenharmony_ci	.name			= "LAYOUT_NFSV4_1_FILES",
11218c2ecf20Sopenharmony_ci	.owner			= THIS_MODULE,
11228c2ecf20Sopenharmony_ci	.flags			= PNFS_LAYOUTGET_ON_OPEN,
11238c2ecf20Sopenharmony_ci	.max_layoutget_response	= 4096, /* 1 page or so... */
11248c2ecf20Sopenharmony_ci	.alloc_layout_hdr	= filelayout_alloc_layout_hdr,
11258c2ecf20Sopenharmony_ci	.free_layout_hdr	= filelayout_free_layout_hdr,
11268c2ecf20Sopenharmony_ci	.alloc_lseg		= filelayout_alloc_lseg,
11278c2ecf20Sopenharmony_ci	.free_lseg		= filelayout_free_lseg,
11288c2ecf20Sopenharmony_ci	.pg_read_ops		= &filelayout_pg_read_ops,
11298c2ecf20Sopenharmony_ci	.pg_write_ops		= &filelayout_pg_write_ops,
11308c2ecf20Sopenharmony_ci	.get_ds_info		= &filelayout_get_ds_info,
11318c2ecf20Sopenharmony_ci	.read_pagelist		= filelayout_read_pagelist,
11328c2ecf20Sopenharmony_ci	.write_pagelist		= filelayout_write_pagelist,
11338c2ecf20Sopenharmony_ci	.alloc_deviceid_node	= filelayout_alloc_deviceid_node,
11348c2ecf20Sopenharmony_ci	.free_deviceid_node	= filelayout_free_deviceid_node,
11358c2ecf20Sopenharmony_ci	.sync			= pnfs_nfs_generic_sync,
11368c2ecf20Sopenharmony_ci};
11378c2ecf20Sopenharmony_ci
11388c2ecf20Sopenharmony_cistatic int __init nfs4filelayout_init(void)
11398c2ecf20Sopenharmony_ci{
11408c2ecf20Sopenharmony_ci	printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
11418c2ecf20Sopenharmony_ci	       __func__);
11428c2ecf20Sopenharmony_ci	return pnfs_register_layoutdriver(&filelayout_type);
11438c2ecf20Sopenharmony_ci}
11448c2ecf20Sopenharmony_ci
11458c2ecf20Sopenharmony_cistatic void __exit nfs4filelayout_exit(void)
11468c2ecf20Sopenharmony_ci{
11478c2ecf20Sopenharmony_ci	printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
11488c2ecf20Sopenharmony_ci	       __func__);
11498c2ecf20Sopenharmony_ci	pnfs_unregister_layoutdriver(&filelayout_type);
11508c2ecf20Sopenharmony_ci}
11518c2ecf20Sopenharmony_ci
11528c2ecf20Sopenharmony_ciMODULE_ALIAS("nfs-layouttype4-1");
11538c2ecf20Sopenharmony_ci
11548c2ecf20Sopenharmony_cimodule_init(nfs4filelayout_init);
11558c2ecf20Sopenharmony_cimodule_exit(nfs4filelayout_exit);
1156