162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * linux/fs/nfs/direct.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2003 by Chuck Lever <cel@netapp.com>
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * High-performance uncached I/O for the Linux NFS client
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * There are important applications whose performance or correctness
1062306a36Sopenharmony_ci * depends on uncached access to file data.  Database clusters
1162306a36Sopenharmony_ci * (multiple copies of the same instance running on separate hosts)
1262306a36Sopenharmony_ci * implement their own cache coherency protocol that subsumes file
1362306a36Sopenharmony_ci * system cache protocols.  Applications that process datasets
1462306a36Sopenharmony_ci * considerably larger than the client's memory do not always benefit
1562306a36Sopenharmony_ci * from a local cache.  A streaming video server, for instance, has no
1662306a36Sopenharmony_ci * need to cache the contents of a file.
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci * When an application requests uncached I/O, all read and write requests
1962306a36Sopenharmony_ci * are made directly to the server; data stored or fetched via these
2062306a36Sopenharmony_ci * requests is not cached in the Linux page cache.  The client does not
2162306a36Sopenharmony_ci * correct unaligned requests from applications.  All requested bytes are
2262306a36Sopenharmony_ci * held on permanent storage before a direct write system call returns to
2362306a36Sopenharmony_ci * an application.
2462306a36Sopenharmony_ci *
2562306a36Sopenharmony_ci * Solaris implements an uncached I/O facility called directio() that
2662306a36Sopenharmony_ci * is used for backups and sequential I/O to very large files.  Solaris
2762306a36Sopenharmony_ci * also supports uncaching whole NFS partitions with "-o forcedirectio,"
2862306a36Sopenharmony_ci * an undocumented mount option.
2962306a36Sopenharmony_ci *
3062306a36Sopenharmony_ci * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with
3162306a36Sopenharmony_ci * help from Andrew Morton.
3262306a36Sopenharmony_ci *
3362306a36Sopenharmony_ci * 18 Dec 2001	Initial implementation for 2.4  --cel
3462306a36Sopenharmony_ci * 08 Jul 2002	Version for 2.4.19, with bug fixes --trondmy
3562306a36Sopenharmony_ci * 08 Jun 2003	Port to 2.5 APIs  --cel
3662306a36Sopenharmony_ci * 31 Mar 2004	Handle direct I/O without VFS support  --cel
3762306a36Sopenharmony_ci * 15 Sep 2004	Parallel async reads  --cel
3862306a36Sopenharmony_ci * 04 May 2005	support O_DIRECT with aio  --cel
3962306a36Sopenharmony_ci *
4062306a36Sopenharmony_ci */
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci#include <linux/errno.h>
4362306a36Sopenharmony_ci#include <linux/sched.h>
4462306a36Sopenharmony_ci#include <linux/kernel.h>
4562306a36Sopenharmony_ci#include <linux/file.h>
4662306a36Sopenharmony_ci#include <linux/pagemap.h>
4762306a36Sopenharmony_ci#include <linux/kref.h>
4862306a36Sopenharmony_ci#include <linux/slab.h>
4962306a36Sopenharmony_ci#include <linux/task_io_accounting_ops.h>
5062306a36Sopenharmony_ci#include <linux/module.h>
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci#include <linux/nfs_fs.h>
5362306a36Sopenharmony_ci#include <linux/nfs_page.h>
5462306a36Sopenharmony_ci#include <linux/sunrpc/clnt.h>
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci#include <linux/uaccess.h>
5762306a36Sopenharmony_ci#include <linux/atomic.h>
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci#include "internal.h"
6062306a36Sopenharmony_ci#include "iostat.h"
6162306a36Sopenharmony_ci#include "pnfs.h"
6262306a36Sopenharmony_ci#include "fscache.h"
6362306a36Sopenharmony_ci#include "nfstrace.h"
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci#define NFSDBG_FACILITY		NFSDBG_VFS
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_cistatic struct kmem_cache *nfs_direct_cachep;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_cistatic const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
7062306a36Sopenharmony_cistatic const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
7162306a36Sopenharmony_cistatic void nfs_direct_write_complete(struct nfs_direct_req *dreq);
7262306a36Sopenharmony_cistatic void nfs_direct_write_schedule_work(struct work_struct *work);
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_cistatic inline void get_dreq(struct nfs_direct_req *dreq)
7562306a36Sopenharmony_ci{
7662306a36Sopenharmony_ci	atomic_inc(&dreq->io_count);
7762306a36Sopenharmony_ci}
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_cistatic inline int put_dreq(struct nfs_direct_req *dreq)
8062306a36Sopenharmony_ci{
8162306a36Sopenharmony_ci	return atomic_dec_and_test(&dreq->io_count);
8262306a36Sopenharmony_ci}
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_cistatic void
8562306a36Sopenharmony_cinfs_direct_handle_truncated(struct nfs_direct_req *dreq,
8662306a36Sopenharmony_ci			    const struct nfs_pgio_header *hdr,
8762306a36Sopenharmony_ci			    ssize_t dreq_len)
8862306a36Sopenharmony_ci{
8962306a36Sopenharmony_ci	if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) ||
9062306a36Sopenharmony_ci	      test_bit(NFS_IOHDR_EOF, &hdr->flags)))
9162306a36Sopenharmony_ci		return;
9262306a36Sopenharmony_ci	if (dreq->max_count >= dreq_len) {
9362306a36Sopenharmony_ci		dreq->max_count = dreq_len;
9462306a36Sopenharmony_ci		if (dreq->count > dreq_len)
9562306a36Sopenharmony_ci			dreq->count = dreq_len;
9662306a36Sopenharmony_ci	}
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error)
9962306a36Sopenharmony_ci		dreq->error = hdr->error;
10062306a36Sopenharmony_ci}
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_cistatic void
10362306a36Sopenharmony_cinfs_direct_count_bytes(struct nfs_direct_req *dreq,
10462306a36Sopenharmony_ci		       const struct nfs_pgio_header *hdr)
10562306a36Sopenharmony_ci{
10662306a36Sopenharmony_ci	loff_t hdr_end = hdr->io_start + hdr->good_bytes;
10762306a36Sopenharmony_ci	ssize_t dreq_len = 0;
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci	if (hdr_end > dreq->io_start)
11062306a36Sopenharmony_ci		dreq_len = hdr_end - dreq->io_start;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	nfs_direct_handle_truncated(dreq, hdr, dreq_len);
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	if (dreq_len > dreq->max_count)
11562306a36Sopenharmony_ci		dreq_len = dreq->max_count;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	if (dreq->count < dreq_len)
11862306a36Sopenharmony_ci		dreq->count = dreq_len;
11962306a36Sopenharmony_ci}
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_cistatic void nfs_direct_truncate_request(struct nfs_direct_req *dreq,
12262306a36Sopenharmony_ci					struct nfs_page *req)
12362306a36Sopenharmony_ci{
12462306a36Sopenharmony_ci	loff_t offs = req_offset(req);
12562306a36Sopenharmony_ci	size_t req_start = (size_t)(offs - dreq->io_start);
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	if (req_start < dreq->max_count)
12862306a36Sopenharmony_ci		dreq->max_count = req_start;
12962306a36Sopenharmony_ci	if (req_start < dreq->count)
13062306a36Sopenharmony_ci		dreq->count = req_start;
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci/**
13462306a36Sopenharmony_ci * nfs_swap_rw - NFS address space operation for swap I/O
13562306a36Sopenharmony_ci * @iocb: target I/O control block
13662306a36Sopenharmony_ci * @iter: I/O buffer
13762306a36Sopenharmony_ci *
13862306a36Sopenharmony_ci * Perform IO to the swap-file.  This is much like direct IO.
13962306a36Sopenharmony_ci */
14062306a36Sopenharmony_ciint nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
14162306a36Sopenharmony_ci{
14262306a36Sopenharmony_ci	ssize_t ret;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	if (iov_iter_rw(iter) == READ)
14762306a36Sopenharmony_ci		ret = nfs_file_direct_read(iocb, iter, true);
14862306a36Sopenharmony_ci	else
14962306a36Sopenharmony_ci		ret = nfs_file_direct_write(iocb, iter, true);
15062306a36Sopenharmony_ci	if (ret < 0)
15162306a36Sopenharmony_ci		return ret;
15262306a36Sopenharmony_ci	return 0;
15362306a36Sopenharmony_ci}
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_cistatic void nfs_direct_release_pages(struct page **pages, unsigned int npages)
15662306a36Sopenharmony_ci{
15762306a36Sopenharmony_ci	unsigned int i;
15862306a36Sopenharmony_ci	for (i = 0; i < npages; i++)
15962306a36Sopenharmony_ci		put_page(pages[i]);
16062306a36Sopenharmony_ci}
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_civoid nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
16362306a36Sopenharmony_ci			      struct nfs_direct_req *dreq)
16462306a36Sopenharmony_ci{
16562306a36Sopenharmony_ci	cinfo->inode = dreq->inode;
16662306a36Sopenharmony_ci	cinfo->mds = &dreq->mds_cinfo;
16762306a36Sopenharmony_ci	cinfo->ds = &dreq->ds_cinfo;
16862306a36Sopenharmony_ci	cinfo->dreq = dreq;
16962306a36Sopenharmony_ci	cinfo->completion_ops = &nfs_direct_commit_completion_ops;
17062306a36Sopenharmony_ci}
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_cistatic inline struct nfs_direct_req *nfs_direct_req_alloc(void)
17362306a36Sopenharmony_ci{
17462306a36Sopenharmony_ci	struct nfs_direct_req *dreq;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
17762306a36Sopenharmony_ci	if (!dreq)
17862306a36Sopenharmony_ci		return NULL;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	kref_init(&dreq->kref);
18162306a36Sopenharmony_ci	kref_get(&dreq->kref);
18262306a36Sopenharmony_ci	init_completion(&dreq->completion);
18362306a36Sopenharmony_ci	INIT_LIST_HEAD(&dreq->mds_cinfo.list);
18462306a36Sopenharmony_ci	pnfs_init_ds_commit_info(&dreq->ds_cinfo);
18562306a36Sopenharmony_ci	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
18662306a36Sopenharmony_ci	spin_lock_init(&dreq->lock);
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	return dreq;
18962306a36Sopenharmony_ci}
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_cistatic void nfs_direct_req_free(struct kref *kref)
19262306a36Sopenharmony_ci{
19362306a36Sopenharmony_ci	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	pnfs_release_ds_info(&dreq->ds_cinfo, dreq->inode);
19662306a36Sopenharmony_ci	if (dreq->l_ctx != NULL)
19762306a36Sopenharmony_ci		nfs_put_lock_context(dreq->l_ctx);
19862306a36Sopenharmony_ci	if (dreq->ctx != NULL)
19962306a36Sopenharmony_ci		put_nfs_open_context(dreq->ctx);
20062306a36Sopenharmony_ci	kmem_cache_free(nfs_direct_cachep, dreq);
20162306a36Sopenharmony_ci}
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_cistatic void nfs_direct_req_release(struct nfs_direct_req *dreq)
20462306a36Sopenharmony_ci{
20562306a36Sopenharmony_ci	kref_put(&dreq->kref, nfs_direct_req_free);
20662306a36Sopenharmony_ci}
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_cissize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset)
20962306a36Sopenharmony_ci{
21062306a36Sopenharmony_ci	loff_t start = offset - dreq->io_start;
21162306a36Sopenharmony_ci	return dreq->max_count - start;
21262306a36Sopenharmony_ci}
21362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(nfs_dreq_bytes_left);
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci/*
21662306a36Sopenharmony_ci * Collects and returns the final error value/byte-count.
21762306a36Sopenharmony_ci */
21862306a36Sopenharmony_cistatic ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
21962306a36Sopenharmony_ci{
22062306a36Sopenharmony_ci	ssize_t result = -EIOCBQUEUED;
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	/* Async requests don't wait here */
22362306a36Sopenharmony_ci	if (dreq->iocb)
22462306a36Sopenharmony_ci		goto out;
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	result = wait_for_completion_killable(&dreq->completion);
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	if (!result) {
22962306a36Sopenharmony_ci		result = dreq->count;
23062306a36Sopenharmony_ci		WARN_ON_ONCE(dreq->count < 0);
23162306a36Sopenharmony_ci	}
23262306a36Sopenharmony_ci	if (!result)
23362306a36Sopenharmony_ci		result = dreq->error;
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ciout:
23662306a36Sopenharmony_ci	return (ssize_t) result;
23762306a36Sopenharmony_ci}
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci/*
24062306a36Sopenharmony_ci * Synchronous I/O uses a stack-allocated iocb.  Thus we can't trust
24162306a36Sopenharmony_ci * the iocb is still valid here if this is a synchronous request.
24262306a36Sopenharmony_ci */
24362306a36Sopenharmony_cistatic void nfs_direct_complete(struct nfs_direct_req *dreq)
24462306a36Sopenharmony_ci{
24562306a36Sopenharmony_ci	struct inode *inode = dreq->inode;
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	inode_dio_end(inode);
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	if (dreq->iocb) {
25062306a36Sopenharmony_ci		long res = (long) dreq->error;
25162306a36Sopenharmony_ci		if (dreq->count != 0) {
25262306a36Sopenharmony_ci			res = (long) dreq->count;
25362306a36Sopenharmony_ci			WARN_ON_ONCE(dreq->count < 0);
25462306a36Sopenharmony_ci		}
25562306a36Sopenharmony_ci		dreq->iocb->ki_complete(dreq->iocb, res);
25662306a36Sopenharmony_ci	}
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	complete(&dreq->completion);
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	nfs_direct_req_release(dreq);
26162306a36Sopenharmony_ci}
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_cistatic void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
26462306a36Sopenharmony_ci{
26562306a36Sopenharmony_ci	unsigned long bytes = 0;
26662306a36Sopenharmony_ci	struct nfs_direct_req *dreq = hdr->dreq;
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	spin_lock(&dreq->lock);
26962306a36Sopenharmony_ci	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
27062306a36Sopenharmony_ci		spin_unlock(&dreq->lock);
27162306a36Sopenharmony_ci		goto out_put;
27262306a36Sopenharmony_ci	}
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	nfs_direct_count_bytes(dreq, hdr);
27562306a36Sopenharmony_ci	spin_unlock(&dreq->lock);
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	while (!list_empty(&hdr->pages)) {
27862306a36Sopenharmony_ci		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
27962306a36Sopenharmony_ci		struct page *page = req->wb_page;
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci		if (!PageCompound(page) && bytes < hdr->good_bytes &&
28262306a36Sopenharmony_ci		    (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY))
28362306a36Sopenharmony_ci			set_page_dirty(page);
28462306a36Sopenharmony_ci		bytes += req->wb_bytes;
28562306a36Sopenharmony_ci		nfs_list_remove_request(req);
28662306a36Sopenharmony_ci		nfs_release_request(req);
28762306a36Sopenharmony_ci	}
28862306a36Sopenharmony_ciout_put:
28962306a36Sopenharmony_ci	if (put_dreq(dreq))
29062306a36Sopenharmony_ci		nfs_direct_complete(dreq);
29162306a36Sopenharmony_ci	hdr->release(hdr);
29262306a36Sopenharmony_ci}
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_cistatic void nfs_read_sync_pgio_error(struct list_head *head, int error)
29562306a36Sopenharmony_ci{
29662306a36Sopenharmony_ci	struct nfs_page *req;
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	while (!list_empty(head)) {
29962306a36Sopenharmony_ci		req = nfs_list_entry(head->next);
30062306a36Sopenharmony_ci		nfs_list_remove_request(req);
30162306a36Sopenharmony_ci		nfs_release_request(req);
30262306a36Sopenharmony_ci	}
30362306a36Sopenharmony_ci}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_cistatic void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
30662306a36Sopenharmony_ci{
30762306a36Sopenharmony_ci	get_dreq(hdr->dreq);
30862306a36Sopenharmony_ci}
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_cistatic const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
31162306a36Sopenharmony_ci	.error_cleanup = nfs_read_sync_pgio_error,
31262306a36Sopenharmony_ci	.init_hdr = nfs_direct_pgio_init,
31362306a36Sopenharmony_ci	.completion = nfs_direct_read_completion,
31462306a36Sopenharmony_ci};
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci/*
31762306a36Sopenharmony_ci * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
31862306a36Sopenharmony_ci * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
31962306a36Sopenharmony_ci * bail and stop sending more reads.  Read length accounting is
32062306a36Sopenharmony_ci * handled automatically by nfs_direct_read_result().  Otherwise, if
32162306a36Sopenharmony_ci * no requests have been sent, just return an error.
32262306a36Sopenharmony_ci */
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_cistatic ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
32562306a36Sopenharmony_ci					      struct iov_iter *iter,
32662306a36Sopenharmony_ci					      loff_t pos)
32762306a36Sopenharmony_ci{
32862306a36Sopenharmony_ci	struct nfs_pageio_descriptor desc;
32962306a36Sopenharmony_ci	struct inode *inode = dreq->inode;
33062306a36Sopenharmony_ci	ssize_t result = -EINVAL;
33162306a36Sopenharmony_ci	size_t requested_bytes = 0;
33262306a36Sopenharmony_ci	size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	nfs_pageio_init_read(&desc, dreq->inode, false,
33562306a36Sopenharmony_ci			     &nfs_direct_read_completion_ops);
33662306a36Sopenharmony_ci	get_dreq(dreq);
33762306a36Sopenharmony_ci	desc.pg_dreq = dreq;
33862306a36Sopenharmony_ci	inode_dio_begin(inode);
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	while (iov_iter_count(iter)) {
34162306a36Sopenharmony_ci		struct page **pagevec;
34262306a36Sopenharmony_ci		size_t bytes;
34362306a36Sopenharmony_ci		size_t pgbase;
34462306a36Sopenharmony_ci		unsigned npages, i;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci		result = iov_iter_get_pages_alloc2(iter, &pagevec,
34762306a36Sopenharmony_ci						  rsize, &pgbase);
34862306a36Sopenharmony_ci		if (result < 0)
34962306a36Sopenharmony_ci			break;
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci		bytes = result;
35262306a36Sopenharmony_ci		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
35362306a36Sopenharmony_ci		for (i = 0; i < npages; i++) {
35462306a36Sopenharmony_ci			struct nfs_page *req;
35562306a36Sopenharmony_ci			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
35662306a36Sopenharmony_ci			/* XXX do we need to do the eof zeroing found in async_filler? */
35762306a36Sopenharmony_ci			req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
35862306a36Sopenharmony_ci							pgbase, pos, req_len);
35962306a36Sopenharmony_ci			if (IS_ERR(req)) {
36062306a36Sopenharmony_ci				result = PTR_ERR(req);
36162306a36Sopenharmony_ci				break;
36262306a36Sopenharmony_ci			}
36362306a36Sopenharmony_ci			if (!nfs_pageio_add_request(&desc, req)) {
36462306a36Sopenharmony_ci				result = desc.pg_error;
36562306a36Sopenharmony_ci				nfs_release_request(req);
36662306a36Sopenharmony_ci				break;
36762306a36Sopenharmony_ci			}
36862306a36Sopenharmony_ci			pgbase = 0;
36962306a36Sopenharmony_ci			bytes -= req_len;
37062306a36Sopenharmony_ci			requested_bytes += req_len;
37162306a36Sopenharmony_ci			pos += req_len;
37262306a36Sopenharmony_ci			dreq->bytes_left -= req_len;
37362306a36Sopenharmony_ci		}
37462306a36Sopenharmony_ci		nfs_direct_release_pages(pagevec, npages);
37562306a36Sopenharmony_ci		kvfree(pagevec);
37662306a36Sopenharmony_ci		if (result < 0)
37762306a36Sopenharmony_ci			break;
37862306a36Sopenharmony_ci	}
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	nfs_pageio_complete(&desc);
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci	/*
38362306a36Sopenharmony_ci	 * If no bytes were started, return the error, and let the
38462306a36Sopenharmony_ci	 * generic layer handle the completion.
38562306a36Sopenharmony_ci	 */
38662306a36Sopenharmony_ci	if (requested_bytes == 0) {
38762306a36Sopenharmony_ci		inode_dio_end(inode);
38862306a36Sopenharmony_ci		nfs_direct_req_release(dreq);
38962306a36Sopenharmony_ci		return result < 0 ? result : -EIO;
39062306a36Sopenharmony_ci	}
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	if (put_dreq(dreq))
39362306a36Sopenharmony_ci		nfs_direct_complete(dreq);
39462306a36Sopenharmony_ci	return requested_bytes;
39562306a36Sopenharmony_ci}
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci/**
39862306a36Sopenharmony_ci * nfs_file_direct_read - file direct read operation for NFS files
39962306a36Sopenharmony_ci * @iocb: target I/O control block
40062306a36Sopenharmony_ci * @iter: vector of user buffers into which to read data
40162306a36Sopenharmony_ci * @swap: flag indicating this is swap IO, not O_DIRECT IO
40262306a36Sopenharmony_ci *
40362306a36Sopenharmony_ci * We use this function for direct reads instead of calling
40462306a36Sopenharmony_ci * generic_file_aio_read() in order to avoid gfar's check to see if
40562306a36Sopenharmony_ci * the request starts before the end of the file.  For that check
40662306a36Sopenharmony_ci * to work, we must generate a GETATTR before each direct read, and
40762306a36Sopenharmony_ci * even then there is a window between the GETATTR and the subsequent
40862306a36Sopenharmony_ci * READ where the file size could change.  Our preference is simply
40962306a36Sopenharmony_ci * to do all reads the application wants, and the server will take
41062306a36Sopenharmony_ci * care of managing the end of file boundary.
41162306a36Sopenharmony_ci *
41262306a36Sopenharmony_ci * This function also eliminates unnecessarily updating the file's
41362306a36Sopenharmony_ci * atime locally, as the NFS server sets the file's atime, and this
41462306a36Sopenharmony_ci * client must read the updated atime from the server back into its
41562306a36Sopenharmony_ci * cache.
41662306a36Sopenharmony_ci */
41762306a36Sopenharmony_cissize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
41862306a36Sopenharmony_ci			     bool swap)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci	struct file *file = iocb->ki_filp;
42162306a36Sopenharmony_ci	struct address_space *mapping = file->f_mapping;
42262306a36Sopenharmony_ci	struct inode *inode = mapping->host;
42362306a36Sopenharmony_ci	struct nfs_direct_req *dreq;
42462306a36Sopenharmony_ci	struct nfs_lock_context *l_ctx;
42562306a36Sopenharmony_ci	ssize_t result, requested;
42662306a36Sopenharmony_ci	size_t count = iov_iter_count(iter);
42762306a36Sopenharmony_ci	nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
43062306a36Sopenharmony_ci		file, count, (long long) iocb->ki_pos);
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	result = 0;
43362306a36Sopenharmony_ci	if (!count)
43462306a36Sopenharmony_ci		goto out;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	task_io_account_read(count);
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci	result = -ENOMEM;
43962306a36Sopenharmony_ci	dreq = nfs_direct_req_alloc();
44062306a36Sopenharmony_ci	if (dreq == NULL)
44162306a36Sopenharmony_ci		goto out;
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci	dreq->inode = inode;
44462306a36Sopenharmony_ci	dreq->bytes_left = dreq->max_count = count;
44562306a36Sopenharmony_ci	dreq->io_start = iocb->ki_pos;
44662306a36Sopenharmony_ci	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
44762306a36Sopenharmony_ci	l_ctx = nfs_get_lock_context(dreq->ctx);
44862306a36Sopenharmony_ci	if (IS_ERR(l_ctx)) {
44962306a36Sopenharmony_ci		result = PTR_ERR(l_ctx);
45062306a36Sopenharmony_ci		nfs_direct_req_release(dreq);
45162306a36Sopenharmony_ci		goto out_release;
45262306a36Sopenharmony_ci	}
45362306a36Sopenharmony_ci	dreq->l_ctx = l_ctx;
45462306a36Sopenharmony_ci	if (!is_sync_kiocb(iocb))
45562306a36Sopenharmony_ci		dreq->iocb = iocb;
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	if (user_backed_iter(iter))
45862306a36Sopenharmony_ci		dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	if (!swap)
46162306a36Sopenharmony_ci		nfs_start_io_direct(inode);
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ci	NFS_I(inode)->read_io += count;
46462306a36Sopenharmony_ci	requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci	if (!swap)
46762306a36Sopenharmony_ci		nfs_end_io_direct(inode);
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	if (requested > 0) {
47062306a36Sopenharmony_ci		result = nfs_direct_wait(dreq);
47162306a36Sopenharmony_ci		if (result > 0) {
47262306a36Sopenharmony_ci			requested -= result;
47362306a36Sopenharmony_ci			iocb->ki_pos += result;
47462306a36Sopenharmony_ci		}
47562306a36Sopenharmony_ci		iov_iter_revert(iter, requested);
47662306a36Sopenharmony_ci	} else {
47762306a36Sopenharmony_ci		result = requested;
47862306a36Sopenharmony_ci	}
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ciout_release:
48162306a36Sopenharmony_ci	nfs_direct_req_release(dreq);
48262306a36Sopenharmony_ciout:
48362306a36Sopenharmony_ci	return result;
48462306a36Sopenharmony_ci}
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_cistatic void nfs_direct_add_page_head(struct list_head *list,
48762306a36Sopenharmony_ci				     struct nfs_page *req)
48862306a36Sopenharmony_ci{
48962306a36Sopenharmony_ci	struct nfs_page *head = req->wb_head;
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci	if (!list_empty(&head->wb_list) || !nfs_lock_request(head))
49262306a36Sopenharmony_ci		return;
49362306a36Sopenharmony_ci	if (!list_empty(&head->wb_list)) {
49462306a36Sopenharmony_ci		nfs_unlock_request(head);
49562306a36Sopenharmony_ci		return;
49662306a36Sopenharmony_ci	}
49762306a36Sopenharmony_ci	list_add(&head->wb_list, list);
49862306a36Sopenharmony_ci	kref_get(&head->wb_kref);
49962306a36Sopenharmony_ci	kref_get(&head->wb_kref);
50062306a36Sopenharmony_ci}
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_cistatic void nfs_direct_join_group(struct list_head *list,
50362306a36Sopenharmony_ci				  struct nfs_commit_info *cinfo,
50462306a36Sopenharmony_ci				  struct inode *inode)
50562306a36Sopenharmony_ci{
50662306a36Sopenharmony_ci	struct nfs_page *req, *subreq;
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci	list_for_each_entry(req, list, wb_list) {
50962306a36Sopenharmony_ci		if (req->wb_head != req) {
51062306a36Sopenharmony_ci			nfs_direct_add_page_head(&req->wb_list, req);
51162306a36Sopenharmony_ci			continue;
51262306a36Sopenharmony_ci		}
51362306a36Sopenharmony_ci		subreq = req->wb_this_page;
51462306a36Sopenharmony_ci		if (subreq == req)
51562306a36Sopenharmony_ci			continue;
51662306a36Sopenharmony_ci		do {
51762306a36Sopenharmony_ci			/*
51862306a36Sopenharmony_ci			 * Remove subrequests from this list before freeing
51962306a36Sopenharmony_ci			 * them in the call to nfs_join_page_group().
52062306a36Sopenharmony_ci			 */
52162306a36Sopenharmony_ci			if (!list_empty(&subreq->wb_list)) {
52262306a36Sopenharmony_ci				nfs_list_remove_request(subreq);
52362306a36Sopenharmony_ci				nfs_release_request(subreq);
52462306a36Sopenharmony_ci			}
52562306a36Sopenharmony_ci		} while ((subreq = subreq->wb_this_page) != req);
52662306a36Sopenharmony_ci		nfs_join_page_group(req, cinfo, inode);
52762306a36Sopenharmony_ci	}
52862306a36Sopenharmony_ci}
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_cistatic void
53162306a36Sopenharmony_cinfs_direct_write_scan_commit_list(struct inode *inode,
53262306a36Sopenharmony_ci				  struct list_head *list,
53362306a36Sopenharmony_ci				  struct nfs_commit_info *cinfo)
53462306a36Sopenharmony_ci{
53562306a36Sopenharmony_ci	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
53662306a36Sopenharmony_ci	pnfs_recover_commit_reqs(list, cinfo);
53762306a36Sopenharmony_ci	nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0);
53862306a36Sopenharmony_ci	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
53962306a36Sopenharmony_ci}
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_cistatic void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
54262306a36Sopenharmony_ci{
54362306a36Sopenharmony_ci	struct nfs_pageio_descriptor desc;
54462306a36Sopenharmony_ci	struct nfs_page *req;
54562306a36Sopenharmony_ci	LIST_HEAD(reqs);
54662306a36Sopenharmony_ci	struct nfs_commit_info cinfo;
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci	nfs_init_cinfo_from_dreq(&cinfo, dreq);
54962306a36Sopenharmony_ci	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_ci	nfs_direct_join_group(&reqs, &cinfo, dreq->inode);
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci	nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
55462306a36Sopenharmony_ci	get_dreq(dreq);
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
55762306a36Sopenharmony_ci			      &nfs_direct_write_completion_ops);
55862306a36Sopenharmony_ci	desc.pg_dreq = dreq;
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	while (!list_empty(&reqs)) {
56162306a36Sopenharmony_ci		req = nfs_list_entry(reqs.next);
56262306a36Sopenharmony_ci		/* Bump the transmission count */
56362306a36Sopenharmony_ci		req->wb_nio++;
56462306a36Sopenharmony_ci		if (!nfs_pageio_add_request(&desc, req)) {
56562306a36Sopenharmony_ci			spin_lock(&dreq->lock);
56662306a36Sopenharmony_ci			if (dreq->error < 0) {
56762306a36Sopenharmony_ci				desc.pg_error = dreq->error;
56862306a36Sopenharmony_ci			} else if (desc.pg_error != -EAGAIN) {
56962306a36Sopenharmony_ci				dreq->flags = 0;
57062306a36Sopenharmony_ci				if (!desc.pg_error)
57162306a36Sopenharmony_ci					desc.pg_error = -EIO;
57262306a36Sopenharmony_ci				dreq->error = desc.pg_error;
57362306a36Sopenharmony_ci			} else
57462306a36Sopenharmony_ci				dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
57562306a36Sopenharmony_ci			spin_unlock(&dreq->lock);
57662306a36Sopenharmony_ci			break;
57762306a36Sopenharmony_ci		}
57862306a36Sopenharmony_ci		nfs_release_request(req);
57962306a36Sopenharmony_ci	}
58062306a36Sopenharmony_ci	nfs_pageio_complete(&desc);
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci	while (!list_empty(&reqs)) {
58362306a36Sopenharmony_ci		req = nfs_list_entry(reqs.next);
58462306a36Sopenharmony_ci		nfs_list_remove_request(req);
58562306a36Sopenharmony_ci		nfs_unlock_and_release_request(req);
58662306a36Sopenharmony_ci		if (desc.pg_error == -EAGAIN) {
58762306a36Sopenharmony_ci			nfs_mark_request_commit(req, NULL, &cinfo, 0);
58862306a36Sopenharmony_ci		} else {
58962306a36Sopenharmony_ci			spin_lock(&dreq->lock);
59062306a36Sopenharmony_ci			nfs_direct_truncate_request(dreq, req);
59162306a36Sopenharmony_ci			spin_unlock(&dreq->lock);
59262306a36Sopenharmony_ci			nfs_release_request(req);
59362306a36Sopenharmony_ci		}
59462306a36Sopenharmony_ci	}
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	if (put_dreq(dreq))
59762306a36Sopenharmony_ci		nfs_direct_write_complete(dreq);
59862306a36Sopenharmony_ci}
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_cistatic void nfs_direct_commit_complete(struct nfs_commit_data *data)
60162306a36Sopenharmony_ci{
60262306a36Sopenharmony_ci	const struct nfs_writeverf *verf = data->res.verf;
60362306a36Sopenharmony_ci	struct nfs_direct_req *dreq = data->dreq;
60462306a36Sopenharmony_ci	struct nfs_commit_info cinfo;
60562306a36Sopenharmony_ci	struct nfs_page *req;
60662306a36Sopenharmony_ci	int status = data->task.tk_status;
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_ci	trace_nfs_direct_commit_complete(dreq);
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	if (status < 0) {
61162306a36Sopenharmony_ci		/* Errors in commit are fatal */
61262306a36Sopenharmony_ci		dreq->error = status;
61362306a36Sopenharmony_ci		dreq->flags = NFS_ODIRECT_DONE;
61462306a36Sopenharmony_ci	} else {
61562306a36Sopenharmony_ci		status = dreq->error;
61662306a36Sopenharmony_ci	}
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci	nfs_init_cinfo_from_dreq(&cinfo, dreq);
61962306a36Sopenharmony_ci
62062306a36Sopenharmony_ci	while (!list_empty(&data->pages)) {
62162306a36Sopenharmony_ci		req = nfs_list_entry(data->pages.next);
62262306a36Sopenharmony_ci		nfs_list_remove_request(req);
62362306a36Sopenharmony_ci		if (status < 0) {
62462306a36Sopenharmony_ci			spin_lock(&dreq->lock);
62562306a36Sopenharmony_ci			nfs_direct_truncate_request(dreq, req);
62662306a36Sopenharmony_ci			spin_unlock(&dreq->lock);
62762306a36Sopenharmony_ci			nfs_release_request(req);
62862306a36Sopenharmony_ci		} else if (!nfs_write_match_verf(verf, req)) {
62962306a36Sopenharmony_ci			dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
63062306a36Sopenharmony_ci			/*
63162306a36Sopenharmony_ci			 * Despite the reboot, the write was successful,
63262306a36Sopenharmony_ci			 * so reset wb_nio.
63362306a36Sopenharmony_ci			 */
63462306a36Sopenharmony_ci			req->wb_nio = 0;
63562306a36Sopenharmony_ci			nfs_mark_request_commit(req, NULL, &cinfo, 0);
63662306a36Sopenharmony_ci		} else
63762306a36Sopenharmony_ci			nfs_release_request(req);
63862306a36Sopenharmony_ci		nfs_unlock_and_release_request(req);
63962306a36Sopenharmony_ci	}
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci	if (nfs_commit_end(cinfo.mds))
64262306a36Sopenharmony_ci		nfs_direct_write_complete(dreq);
64362306a36Sopenharmony_ci}
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_cistatic void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
64662306a36Sopenharmony_ci		struct nfs_page *req)
64762306a36Sopenharmony_ci{
64862306a36Sopenharmony_ci	struct nfs_direct_req *dreq = cinfo->dreq;
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci	trace_nfs_direct_resched_write(dreq);
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci	spin_lock(&dreq->lock);
65362306a36Sopenharmony_ci	if (dreq->flags != NFS_ODIRECT_DONE)
65462306a36Sopenharmony_ci		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
65562306a36Sopenharmony_ci	spin_unlock(&dreq->lock);
65662306a36Sopenharmony_ci	nfs_mark_request_commit(req, NULL, cinfo, 0);
65762306a36Sopenharmony_ci}
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_cistatic const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
66062306a36Sopenharmony_ci	.completion = nfs_direct_commit_complete,
66162306a36Sopenharmony_ci	.resched_write = nfs_direct_resched_write,
66262306a36Sopenharmony_ci};
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_cistatic void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
66562306a36Sopenharmony_ci{
66662306a36Sopenharmony_ci	int res;
66762306a36Sopenharmony_ci	struct nfs_commit_info cinfo;
66862306a36Sopenharmony_ci	LIST_HEAD(mds_list);
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci	nfs_init_cinfo_from_dreq(&cinfo, dreq);
67162306a36Sopenharmony_ci	nfs_commit_begin(cinfo.mds);
67262306a36Sopenharmony_ci	nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
67362306a36Sopenharmony_ci	res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
67462306a36Sopenharmony_ci	if (res < 0) { /* res == -ENOMEM */
67562306a36Sopenharmony_ci		spin_lock(&dreq->lock);
67662306a36Sopenharmony_ci		if (dreq->flags == 0)
67762306a36Sopenharmony_ci			dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
67862306a36Sopenharmony_ci		spin_unlock(&dreq->lock);
67962306a36Sopenharmony_ci	}
68062306a36Sopenharmony_ci	if (nfs_commit_end(cinfo.mds))
68162306a36Sopenharmony_ci		nfs_direct_write_complete(dreq);
68262306a36Sopenharmony_ci}
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_cistatic void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
68562306a36Sopenharmony_ci{
68662306a36Sopenharmony_ci	struct nfs_commit_info cinfo;
68762306a36Sopenharmony_ci	struct nfs_page *req;
68862306a36Sopenharmony_ci	LIST_HEAD(reqs);
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_ci	nfs_init_cinfo_from_dreq(&cinfo, dreq);
69162306a36Sopenharmony_ci	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci	while (!list_empty(&reqs)) {
69462306a36Sopenharmony_ci		req = nfs_list_entry(reqs.next);
69562306a36Sopenharmony_ci		nfs_list_remove_request(req);
69662306a36Sopenharmony_ci		nfs_direct_truncate_request(dreq, req);
69762306a36Sopenharmony_ci		nfs_release_request(req);
69862306a36Sopenharmony_ci		nfs_unlock_and_release_request(req);
69962306a36Sopenharmony_ci	}
70062306a36Sopenharmony_ci}
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_cistatic void nfs_direct_write_schedule_work(struct work_struct *work)
70362306a36Sopenharmony_ci{
70462306a36Sopenharmony_ci	struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
70562306a36Sopenharmony_ci	int flags = dreq->flags;
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	dreq->flags = 0;
70862306a36Sopenharmony_ci	switch (flags) {
70962306a36Sopenharmony_ci		case NFS_ODIRECT_DO_COMMIT:
71062306a36Sopenharmony_ci			nfs_direct_commit_schedule(dreq);
71162306a36Sopenharmony_ci			break;
71262306a36Sopenharmony_ci		case NFS_ODIRECT_RESCHED_WRITES:
71362306a36Sopenharmony_ci			nfs_direct_write_reschedule(dreq);
71462306a36Sopenharmony_ci			break;
71562306a36Sopenharmony_ci		default:
71662306a36Sopenharmony_ci			nfs_direct_write_clear_reqs(dreq);
71762306a36Sopenharmony_ci			nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
71862306a36Sopenharmony_ci			nfs_direct_complete(dreq);
71962306a36Sopenharmony_ci	}
72062306a36Sopenharmony_ci}
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_cistatic void nfs_direct_write_complete(struct nfs_direct_req *dreq)
72362306a36Sopenharmony_ci{
72462306a36Sopenharmony_ci	trace_nfs_direct_write_complete(dreq);
72562306a36Sopenharmony_ci	queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */
72662306a36Sopenharmony_ci}
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_cistatic void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
72962306a36Sopenharmony_ci{
73062306a36Sopenharmony_ci	struct nfs_direct_req *dreq = hdr->dreq;
73162306a36Sopenharmony_ci	struct nfs_commit_info cinfo;
73262306a36Sopenharmony_ci	struct nfs_page *req = nfs_list_entry(hdr->pages.next);
73362306a36Sopenharmony_ci	int flags = NFS_ODIRECT_DONE;
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci	trace_nfs_direct_write_completion(dreq);
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_ci	nfs_init_cinfo_from_dreq(&cinfo, dreq);
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	spin_lock(&dreq->lock);
74062306a36Sopenharmony_ci	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
74162306a36Sopenharmony_ci		spin_unlock(&dreq->lock);
74262306a36Sopenharmony_ci		goto out_put;
74362306a36Sopenharmony_ci	}
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_ci	nfs_direct_count_bytes(dreq, hdr);
74662306a36Sopenharmony_ci	if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) &&
74762306a36Sopenharmony_ci	    !test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
74862306a36Sopenharmony_ci		if (!dreq->flags)
74962306a36Sopenharmony_ci			dreq->flags = NFS_ODIRECT_DO_COMMIT;
75062306a36Sopenharmony_ci		flags = dreq->flags;
75162306a36Sopenharmony_ci	}
75262306a36Sopenharmony_ci	spin_unlock(&dreq->lock);
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci	while (!list_empty(&hdr->pages)) {
75562306a36Sopenharmony_ci
75662306a36Sopenharmony_ci		req = nfs_list_entry(hdr->pages.next);
75762306a36Sopenharmony_ci		nfs_list_remove_request(req);
75862306a36Sopenharmony_ci		if (flags == NFS_ODIRECT_DO_COMMIT) {
75962306a36Sopenharmony_ci			kref_get(&req->wb_kref);
76062306a36Sopenharmony_ci			memcpy(&req->wb_verf, &hdr->verf.verifier,
76162306a36Sopenharmony_ci			       sizeof(req->wb_verf));
76262306a36Sopenharmony_ci			nfs_mark_request_commit(req, hdr->lseg, &cinfo,
76362306a36Sopenharmony_ci				hdr->ds_commit_idx);
76462306a36Sopenharmony_ci		} else if (flags == NFS_ODIRECT_RESCHED_WRITES) {
76562306a36Sopenharmony_ci			kref_get(&req->wb_kref);
76662306a36Sopenharmony_ci			nfs_mark_request_commit(req, NULL, &cinfo, 0);
76762306a36Sopenharmony_ci		}
76862306a36Sopenharmony_ci		nfs_unlock_and_release_request(req);
76962306a36Sopenharmony_ci	}
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ciout_put:
77262306a36Sopenharmony_ci	if (put_dreq(dreq))
77362306a36Sopenharmony_ci		nfs_direct_write_complete(dreq);
77462306a36Sopenharmony_ci	hdr->release(hdr);
77562306a36Sopenharmony_ci}
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_cistatic void nfs_write_sync_pgio_error(struct list_head *head, int error)
77862306a36Sopenharmony_ci{
77962306a36Sopenharmony_ci	struct nfs_page *req;
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci	while (!list_empty(head)) {
78262306a36Sopenharmony_ci		req = nfs_list_entry(head->next);
78362306a36Sopenharmony_ci		nfs_list_remove_request(req);
78462306a36Sopenharmony_ci		nfs_unlock_and_release_request(req);
78562306a36Sopenharmony_ci	}
78662306a36Sopenharmony_ci}
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_cistatic void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
78962306a36Sopenharmony_ci{
79062306a36Sopenharmony_ci	struct nfs_direct_req *dreq = hdr->dreq;
79162306a36Sopenharmony_ci	struct nfs_page *req;
79262306a36Sopenharmony_ci	struct nfs_commit_info cinfo;
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	trace_nfs_direct_write_reschedule_io(dreq);
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	nfs_init_cinfo_from_dreq(&cinfo, dreq);
79762306a36Sopenharmony_ci	spin_lock(&dreq->lock);
79862306a36Sopenharmony_ci	if (dreq->error == 0)
79962306a36Sopenharmony_ci		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
80062306a36Sopenharmony_ci	set_bit(NFS_IOHDR_REDO, &hdr->flags);
80162306a36Sopenharmony_ci	spin_unlock(&dreq->lock);
80262306a36Sopenharmony_ci	while (!list_empty(&hdr->pages)) {
80362306a36Sopenharmony_ci		req = nfs_list_entry(hdr->pages.next);
80462306a36Sopenharmony_ci		nfs_list_remove_request(req);
80562306a36Sopenharmony_ci		nfs_unlock_request(req);
80662306a36Sopenharmony_ci		nfs_mark_request_commit(req, NULL, &cinfo, 0);
80762306a36Sopenharmony_ci	}
80862306a36Sopenharmony_ci}
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_cistatic const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
81162306a36Sopenharmony_ci	.error_cleanup = nfs_write_sync_pgio_error,
81262306a36Sopenharmony_ci	.init_hdr = nfs_direct_pgio_init,
81362306a36Sopenharmony_ci	.completion = nfs_direct_write_completion,
81462306a36Sopenharmony_ci	.reschedule_io = nfs_direct_write_reschedule_io,
81562306a36Sopenharmony_ci};
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci/*
81962306a36Sopenharmony_ci * NB: Return the value of the first error return code.  Subsequent
82062306a36Sopenharmony_ci *     errors after the first one are ignored.
82162306a36Sopenharmony_ci */
82262306a36Sopenharmony_ci/*
82362306a36Sopenharmony_ci * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
82462306a36Sopenharmony_ci * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
82562306a36Sopenharmony_ci * bail and stop sending more writes.  Write length accounting is
82662306a36Sopenharmony_ci * handled automatically by nfs_direct_write_result().  Otherwise, if
82762306a36Sopenharmony_ci * no requests have been sent, just return an error.
82862306a36Sopenharmony_ci */
82962306a36Sopenharmony_cistatic ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
83062306a36Sopenharmony_ci					       struct iov_iter *iter,
83162306a36Sopenharmony_ci					       loff_t pos, int ioflags)
83262306a36Sopenharmony_ci{
83362306a36Sopenharmony_ci	struct nfs_pageio_descriptor desc;
83462306a36Sopenharmony_ci	struct inode *inode = dreq->inode;
83562306a36Sopenharmony_ci	struct nfs_commit_info cinfo;
83662306a36Sopenharmony_ci	ssize_t result = 0;
83762306a36Sopenharmony_ci	size_t requested_bytes = 0;
83862306a36Sopenharmony_ci	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
83962306a36Sopenharmony_ci	bool defer = false;
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci	trace_nfs_direct_write_schedule_iovec(dreq);
84262306a36Sopenharmony_ci
84362306a36Sopenharmony_ci	nfs_pageio_init_write(&desc, inode, ioflags, false,
84462306a36Sopenharmony_ci			      &nfs_direct_write_completion_ops);
84562306a36Sopenharmony_ci	desc.pg_dreq = dreq;
84662306a36Sopenharmony_ci	get_dreq(dreq);
84762306a36Sopenharmony_ci	inode_dio_begin(inode);
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	NFS_I(inode)->write_io += iov_iter_count(iter);
85062306a36Sopenharmony_ci	while (iov_iter_count(iter)) {
85162306a36Sopenharmony_ci		struct page **pagevec;
85262306a36Sopenharmony_ci		size_t bytes;
85362306a36Sopenharmony_ci		size_t pgbase;
85462306a36Sopenharmony_ci		unsigned npages, i;
85562306a36Sopenharmony_ci
85662306a36Sopenharmony_ci		result = iov_iter_get_pages_alloc2(iter, &pagevec,
85762306a36Sopenharmony_ci						  wsize, &pgbase);
85862306a36Sopenharmony_ci		if (result < 0)
85962306a36Sopenharmony_ci			break;
86062306a36Sopenharmony_ci
86162306a36Sopenharmony_ci		bytes = result;
86262306a36Sopenharmony_ci		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
86362306a36Sopenharmony_ci		for (i = 0; i < npages; i++) {
86462306a36Sopenharmony_ci			struct nfs_page *req;
86562306a36Sopenharmony_ci			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci			req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
86862306a36Sopenharmony_ci							pgbase, pos, req_len);
86962306a36Sopenharmony_ci			if (IS_ERR(req)) {
87062306a36Sopenharmony_ci				result = PTR_ERR(req);
87162306a36Sopenharmony_ci				break;
87262306a36Sopenharmony_ci			}
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci			if (desc.pg_error < 0) {
87562306a36Sopenharmony_ci				nfs_free_request(req);
87662306a36Sopenharmony_ci				result = desc.pg_error;
87762306a36Sopenharmony_ci				break;
87862306a36Sopenharmony_ci			}
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci			pgbase = 0;
88162306a36Sopenharmony_ci			bytes -= req_len;
88262306a36Sopenharmony_ci			requested_bytes += req_len;
88362306a36Sopenharmony_ci			pos += req_len;
88462306a36Sopenharmony_ci			dreq->bytes_left -= req_len;
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci			if (defer) {
88762306a36Sopenharmony_ci				nfs_mark_request_commit(req, NULL, &cinfo, 0);
88862306a36Sopenharmony_ci				continue;
88962306a36Sopenharmony_ci			}
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci			nfs_lock_request(req);
89262306a36Sopenharmony_ci			if (nfs_pageio_add_request(&desc, req))
89362306a36Sopenharmony_ci				continue;
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_ci			/* Exit on hard errors */
89662306a36Sopenharmony_ci			if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) {
89762306a36Sopenharmony_ci				result = desc.pg_error;
89862306a36Sopenharmony_ci				nfs_unlock_and_release_request(req);
89962306a36Sopenharmony_ci				break;
90062306a36Sopenharmony_ci			}
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci			/* If the error is soft, defer remaining requests */
90362306a36Sopenharmony_ci			nfs_init_cinfo_from_dreq(&cinfo, dreq);
90462306a36Sopenharmony_ci			spin_lock(&dreq->lock);
90562306a36Sopenharmony_ci			dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
90662306a36Sopenharmony_ci			spin_unlock(&dreq->lock);
90762306a36Sopenharmony_ci			nfs_unlock_request(req);
90862306a36Sopenharmony_ci			nfs_mark_request_commit(req, NULL, &cinfo, 0);
90962306a36Sopenharmony_ci			desc.pg_error = 0;
91062306a36Sopenharmony_ci			defer = true;
91162306a36Sopenharmony_ci		}
91262306a36Sopenharmony_ci		nfs_direct_release_pages(pagevec, npages);
91362306a36Sopenharmony_ci		kvfree(pagevec);
91462306a36Sopenharmony_ci		if (result < 0)
91562306a36Sopenharmony_ci			break;
91662306a36Sopenharmony_ci	}
91762306a36Sopenharmony_ci	nfs_pageio_complete(&desc);
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_ci	/*
92062306a36Sopenharmony_ci	 * If no bytes were started, return the error, and let the
92162306a36Sopenharmony_ci	 * generic layer handle the completion.
92262306a36Sopenharmony_ci	 */
92362306a36Sopenharmony_ci	if (requested_bytes == 0) {
92462306a36Sopenharmony_ci		inode_dio_end(inode);
92562306a36Sopenharmony_ci		nfs_direct_req_release(dreq);
92662306a36Sopenharmony_ci		return result < 0 ? result : -EIO;
92762306a36Sopenharmony_ci	}
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_ci	if (put_dreq(dreq))
93062306a36Sopenharmony_ci		nfs_direct_write_complete(dreq);
93162306a36Sopenharmony_ci	return requested_bytes;
93262306a36Sopenharmony_ci}
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci/**
93562306a36Sopenharmony_ci * nfs_file_direct_write - file direct write operation for NFS files
93662306a36Sopenharmony_ci * @iocb: target I/O control block
93762306a36Sopenharmony_ci * @iter: vector of user buffers from which to write data
93862306a36Sopenharmony_ci * @swap: flag indicating this is swap IO, not O_DIRECT IO
93962306a36Sopenharmony_ci *
94062306a36Sopenharmony_ci * We use this function for direct writes instead of calling
94162306a36Sopenharmony_ci * generic_file_aio_write() in order to avoid taking the inode
94262306a36Sopenharmony_ci * semaphore and updating the i_size.  The NFS server will set
94362306a36Sopenharmony_ci * the new i_size and this client must read the updated size
94462306a36Sopenharmony_ci * back into its cache.  We let the server do generic write
94562306a36Sopenharmony_ci * parameter checking and report problems.
94662306a36Sopenharmony_ci *
94762306a36Sopenharmony_ci * We eliminate local atime updates, see direct read above.
94862306a36Sopenharmony_ci *
94962306a36Sopenharmony_ci * We avoid unnecessary page cache invalidations for normal cached
95062306a36Sopenharmony_ci * readers of this file.
95162306a36Sopenharmony_ci *
95262306a36Sopenharmony_ci * Note that O_APPEND is not supported for NFS direct writes, as there
95362306a36Sopenharmony_ci * is no atomic O_APPEND write facility in the NFS protocol.
95462306a36Sopenharmony_ci */
95562306a36Sopenharmony_cissize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
95662306a36Sopenharmony_ci			      bool swap)
95762306a36Sopenharmony_ci{
95862306a36Sopenharmony_ci	ssize_t result, requested;
95962306a36Sopenharmony_ci	size_t count;
96062306a36Sopenharmony_ci	struct file *file = iocb->ki_filp;
96162306a36Sopenharmony_ci	struct address_space *mapping = file->f_mapping;
96262306a36Sopenharmony_ci	struct inode *inode = mapping->host;
96362306a36Sopenharmony_ci	struct nfs_direct_req *dreq;
96462306a36Sopenharmony_ci	struct nfs_lock_context *l_ctx;
96562306a36Sopenharmony_ci	loff_t pos, end;
96662306a36Sopenharmony_ci
96762306a36Sopenharmony_ci	dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
96862306a36Sopenharmony_ci		file, iov_iter_count(iter), (long long) iocb->ki_pos);
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci	if (swap)
97162306a36Sopenharmony_ci		/* bypass generic checks */
97262306a36Sopenharmony_ci		result =  iov_iter_count(iter);
97362306a36Sopenharmony_ci	else
97462306a36Sopenharmony_ci		result = generic_write_checks(iocb, iter);
97562306a36Sopenharmony_ci	if (result <= 0)
97662306a36Sopenharmony_ci		return result;
97762306a36Sopenharmony_ci	count = result;
97862306a36Sopenharmony_ci	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci	pos = iocb->ki_pos;
98162306a36Sopenharmony_ci	end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT;
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci	task_io_account_write(count);
98462306a36Sopenharmony_ci
98562306a36Sopenharmony_ci	result = -ENOMEM;
98662306a36Sopenharmony_ci	dreq = nfs_direct_req_alloc();
98762306a36Sopenharmony_ci	if (!dreq)
98862306a36Sopenharmony_ci		goto out;
98962306a36Sopenharmony_ci
99062306a36Sopenharmony_ci	dreq->inode = inode;
99162306a36Sopenharmony_ci	dreq->bytes_left = dreq->max_count = count;
99262306a36Sopenharmony_ci	dreq->io_start = pos;
99362306a36Sopenharmony_ci	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
99462306a36Sopenharmony_ci	l_ctx = nfs_get_lock_context(dreq->ctx);
99562306a36Sopenharmony_ci	if (IS_ERR(l_ctx)) {
99662306a36Sopenharmony_ci		result = PTR_ERR(l_ctx);
99762306a36Sopenharmony_ci		nfs_direct_req_release(dreq);
99862306a36Sopenharmony_ci		goto out_release;
99962306a36Sopenharmony_ci	}
100062306a36Sopenharmony_ci	dreq->l_ctx = l_ctx;
100162306a36Sopenharmony_ci	if (!is_sync_kiocb(iocb))
100262306a36Sopenharmony_ci		dreq->iocb = iocb;
100362306a36Sopenharmony_ci	pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode);
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_ci	if (swap) {
100662306a36Sopenharmony_ci		requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
100762306a36Sopenharmony_ci							    FLUSH_STABLE);
100862306a36Sopenharmony_ci	} else {
100962306a36Sopenharmony_ci		nfs_start_io_direct(inode);
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci		requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
101262306a36Sopenharmony_ci							    FLUSH_COND_STABLE);
101362306a36Sopenharmony_ci
101462306a36Sopenharmony_ci		if (mapping->nrpages) {
101562306a36Sopenharmony_ci			invalidate_inode_pages2_range(mapping,
101662306a36Sopenharmony_ci						      pos >> PAGE_SHIFT, end);
101762306a36Sopenharmony_ci		}
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci		nfs_end_io_direct(inode);
102062306a36Sopenharmony_ci	}
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ci	if (requested > 0) {
102362306a36Sopenharmony_ci		result = nfs_direct_wait(dreq);
102462306a36Sopenharmony_ci		if (result > 0) {
102562306a36Sopenharmony_ci			requested -= result;
102662306a36Sopenharmony_ci			iocb->ki_pos = pos + result;
102762306a36Sopenharmony_ci			/* XXX: should check the generic_write_sync retval */
102862306a36Sopenharmony_ci			generic_write_sync(iocb, result);
102962306a36Sopenharmony_ci		}
103062306a36Sopenharmony_ci		iov_iter_revert(iter, requested);
103162306a36Sopenharmony_ci	} else {
103262306a36Sopenharmony_ci		result = requested;
103362306a36Sopenharmony_ci	}
103462306a36Sopenharmony_ci	nfs_fscache_invalidate(inode, FSCACHE_INVAL_DIO_WRITE);
103562306a36Sopenharmony_ciout_release:
103662306a36Sopenharmony_ci	nfs_direct_req_release(dreq);
103762306a36Sopenharmony_ciout:
103862306a36Sopenharmony_ci	return result;
103962306a36Sopenharmony_ci}
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ci/**
104262306a36Sopenharmony_ci * nfs_init_directcache - create a slab cache for nfs_direct_req structures
104362306a36Sopenharmony_ci *
104462306a36Sopenharmony_ci */
104562306a36Sopenharmony_ciint __init nfs_init_directcache(void)
104662306a36Sopenharmony_ci{
104762306a36Sopenharmony_ci	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
104862306a36Sopenharmony_ci						sizeof(struct nfs_direct_req),
104962306a36Sopenharmony_ci						0, (SLAB_RECLAIM_ACCOUNT|
105062306a36Sopenharmony_ci							SLAB_MEM_SPREAD),
105162306a36Sopenharmony_ci						NULL);
105262306a36Sopenharmony_ci	if (nfs_direct_cachep == NULL)
105362306a36Sopenharmony_ci		return -ENOMEM;
105462306a36Sopenharmony_ci
105562306a36Sopenharmony_ci	return 0;
105662306a36Sopenharmony_ci}
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_ci/**
105962306a36Sopenharmony_ci * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures
106062306a36Sopenharmony_ci *
106162306a36Sopenharmony_ci */
106262306a36Sopenharmony_civoid nfs_destroy_directcache(void)
106362306a36Sopenharmony_ci{
106462306a36Sopenharmony_ci	kmem_cache_destroy(nfs_direct_cachep);
106562306a36Sopenharmony_ci}
1066