162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/fs/nfs/direct.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2003 by Chuck Lever <cel@netapp.com> 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * High-performance uncached I/O for the Linux NFS client 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * There are important applications whose performance or correctness 1062306a36Sopenharmony_ci * depends on uncached access to file data. Database clusters 1162306a36Sopenharmony_ci * (multiple copies of the same instance running on separate hosts) 1262306a36Sopenharmony_ci * implement their own cache coherency protocol that subsumes file 1362306a36Sopenharmony_ci * system cache protocols. Applications that process datasets 1462306a36Sopenharmony_ci * considerably larger than the client's memory do not always benefit 1562306a36Sopenharmony_ci * from a local cache. A streaming video server, for instance, has no 1662306a36Sopenharmony_ci * need to cache the contents of a file. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * When an application requests uncached I/O, all read and write requests 1962306a36Sopenharmony_ci * are made directly to the server; data stored or fetched via these 2062306a36Sopenharmony_ci * requests is not cached in the Linux page cache. The client does not 2162306a36Sopenharmony_ci * correct unaligned requests from applications. All requested bytes are 2262306a36Sopenharmony_ci * held on permanent storage before a direct write system call returns to 2362306a36Sopenharmony_ci * an application. 2462306a36Sopenharmony_ci * 2562306a36Sopenharmony_ci * Solaris implements an uncached I/O facility called directio() that 2662306a36Sopenharmony_ci * is used for backups and sequential I/O to very large files. Solaris 2762306a36Sopenharmony_ci * also supports uncaching whole NFS partitions with "-o forcedirectio," 2862306a36Sopenharmony_ci * an undocumented mount option. 2962306a36Sopenharmony_ci * 3062306a36Sopenharmony_ci * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with 3162306a36Sopenharmony_ci * help from Andrew Morton. 3262306a36Sopenharmony_ci * 3362306a36Sopenharmony_ci * 18 Dec 2001 Initial implementation for 2.4 --cel 3462306a36Sopenharmony_ci * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy 3562306a36Sopenharmony_ci * 08 Jun 2003 Port to 2.5 APIs --cel 3662306a36Sopenharmony_ci * 31 Mar 2004 Handle direct I/O without VFS support --cel 3762306a36Sopenharmony_ci * 15 Sep 2004 Parallel async reads --cel 3862306a36Sopenharmony_ci * 04 May 2005 support O_DIRECT with aio --cel 3962306a36Sopenharmony_ci * 4062306a36Sopenharmony_ci */ 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci#include <linux/errno.h> 4362306a36Sopenharmony_ci#include <linux/sched.h> 4462306a36Sopenharmony_ci#include <linux/kernel.h> 4562306a36Sopenharmony_ci#include <linux/file.h> 4662306a36Sopenharmony_ci#include <linux/pagemap.h> 4762306a36Sopenharmony_ci#include <linux/kref.h> 4862306a36Sopenharmony_ci#include <linux/slab.h> 4962306a36Sopenharmony_ci#include <linux/task_io_accounting_ops.h> 5062306a36Sopenharmony_ci#include <linux/module.h> 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci#include <linux/nfs_fs.h> 5362306a36Sopenharmony_ci#include <linux/nfs_page.h> 5462306a36Sopenharmony_ci#include <linux/sunrpc/clnt.h> 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci#include <linux/uaccess.h> 5762306a36Sopenharmony_ci#include <linux/atomic.h> 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci#include "internal.h" 6062306a36Sopenharmony_ci#include "iostat.h" 6162306a36Sopenharmony_ci#include "pnfs.h" 6262306a36Sopenharmony_ci#include "fscache.h" 6362306a36Sopenharmony_ci#include "nfstrace.h" 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci#define NFSDBG_FACILITY NFSDBG_VFS 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_cistatic struct kmem_cache *nfs_direct_cachep; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_cistatic const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; 7062306a36Sopenharmony_cistatic const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; 7162306a36Sopenharmony_cistatic void nfs_direct_write_complete(struct nfs_direct_req *dreq); 7262306a36Sopenharmony_cistatic void nfs_direct_write_schedule_work(struct work_struct *work); 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_cistatic inline void get_dreq(struct nfs_direct_req *dreq) 7562306a36Sopenharmony_ci{ 7662306a36Sopenharmony_ci atomic_inc(&dreq->io_count); 7762306a36Sopenharmony_ci} 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_cistatic inline int put_dreq(struct nfs_direct_req *dreq) 8062306a36Sopenharmony_ci{ 8162306a36Sopenharmony_ci return atomic_dec_and_test(&dreq->io_count); 8262306a36Sopenharmony_ci} 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_cistatic void 8562306a36Sopenharmony_cinfs_direct_handle_truncated(struct nfs_direct_req *dreq, 8662306a36Sopenharmony_ci const struct nfs_pgio_header *hdr, 8762306a36Sopenharmony_ci ssize_t dreq_len) 8862306a36Sopenharmony_ci{ 8962306a36Sopenharmony_ci if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) || 9062306a36Sopenharmony_ci test_bit(NFS_IOHDR_EOF, &hdr->flags))) 9162306a36Sopenharmony_ci return; 9262306a36Sopenharmony_ci if (dreq->max_count >= dreq_len) { 9362306a36Sopenharmony_ci dreq->max_count = dreq_len; 9462306a36Sopenharmony_ci if (dreq->count > dreq_len) 9562306a36Sopenharmony_ci dreq->count = dreq_len; 9662306a36Sopenharmony_ci } 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error) 9962306a36Sopenharmony_ci dreq->error = hdr->error; 10062306a36Sopenharmony_ci} 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_cistatic void 10362306a36Sopenharmony_cinfs_direct_count_bytes(struct nfs_direct_req *dreq, 10462306a36Sopenharmony_ci const struct nfs_pgio_header *hdr) 10562306a36Sopenharmony_ci{ 10662306a36Sopenharmony_ci loff_t hdr_end = hdr->io_start + hdr->good_bytes; 10762306a36Sopenharmony_ci ssize_t dreq_len = 0; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci if (hdr_end > dreq->io_start) 11062306a36Sopenharmony_ci dreq_len = hdr_end - dreq->io_start; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci nfs_direct_handle_truncated(dreq, hdr, dreq_len); 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci if (dreq_len > dreq->max_count) 11562306a36Sopenharmony_ci dreq_len = dreq->max_count; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci if (dreq->count < dreq_len) 11862306a36Sopenharmony_ci dreq->count = dreq_len; 11962306a36Sopenharmony_ci} 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_cistatic void nfs_direct_truncate_request(struct nfs_direct_req *dreq, 12262306a36Sopenharmony_ci struct nfs_page *req) 12362306a36Sopenharmony_ci{ 12462306a36Sopenharmony_ci loff_t offs = req_offset(req); 12562306a36Sopenharmony_ci size_t req_start = (size_t)(offs - dreq->io_start); 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci if (req_start < dreq->max_count) 12862306a36Sopenharmony_ci dreq->max_count = req_start; 12962306a36Sopenharmony_ci if (req_start < dreq->count) 13062306a36Sopenharmony_ci dreq->count = req_start; 13162306a36Sopenharmony_ci} 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci/** 13462306a36Sopenharmony_ci * nfs_swap_rw - NFS address space operation for swap I/O 13562306a36Sopenharmony_ci * @iocb: target I/O control block 13662306a36Sopenharmony_ci * @iter: I/O buffer 13762306a36Sopenharmony_ci * 13862306a36Sopenharmony_ci * Perform IO to the swap-file. This is much like direct IO. 13962306a36Sopenharmony_ci */ 14062306a36Sopenharmony_ciint nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter) 14162306a36Sopenharmony_ci{ 14262306a36Sopenharmony_ci ssize_t ret; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci if (iov_iter_rw(iter) == READ) 14762306a36Sopenharmony_ci ret = nfs_file_direct_read(iocb, iter, true); 14862306a36Sopenharmony_ci else 14962306a36Sopenharmony_ci ret = nfs_file_direct_write(iocb, iter, true); 15062306a36Sopenharmony_ci if (ret < 0) 15162306a36Sopenharmony_ci return ret; 15262306a36Sopenharmony_ci return 0; 15362306a36Sopenharmony_ci} 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_cistatic void nfs_direct_release_pages(struct page **pages, unsigned int npages) 15662306a36Sopenharmony_ci{ 15762306a36Sopenharmony_ci unsigned int i; 15862306a36Sopenharmony_ci for (i = 0; i < npages; i++) 15962306a36Sopenharmony_ci put_page(pages[i]); 16062306a36Sopenharmony_ci} 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_civoid nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, 16362306a36Sopenharmony_ci struct nfs_direct_req *dreq) 16462306a36Sopenharmony_ci{ 16562306a36Sopenharmony_ci cinfo->inode = dreq->inode; 16662306a36Sopenharmony_ci cinfo->mds = &dreq->mds_cinfo; 16762306a36Sopenharmony_ci cinfo->ds = &dreq->ds_cinfo; 16862306a36Sopenharmony_ci cinfo->dreq = dreq; 16962306a36Sopenharmony_ci cinfo->completion_ops = &nfs_direct_commit_completion_ops; 17062306a36Sopenharmony_ci} 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_cistatic inline struct nfs_direct_req *nfs_direct_req_alloc(void) 17362306a36Sopenharmony_ci{ 17462306a36Sopenharmony_ci struct nfs_direct_req *dreq; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL); 17762306a36Sopenharmony_ci if (!dreq) 17862306a36Sopenharmony_ci return NULL; 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci kref_init(&dreq->kref); 18162306a36Sopenharmony_ci kref_get(&dreq->kref); 18262306a36Sopenharmony_ci init_completion(&dreq->completion); 18362306a36Sopenharmony_ci INIT_LIST_HEAD(&dreq->mds_cinfo.list); 18462306a36Sopenharmony_ci pnfs_init_ds_commit_info(&dreq->ds_cinfo); 18562306a36Sopenharmony_ci INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); 18662306a36Sopenharmony_ci spin_lock_init(&dreq->lock); 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci return dreq; 18962306a36Sopenharmony_ci} 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_cistatic void nfs_direct_req_free(struct kref *kref) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci pnfs_release_ds_info(&dreq->ds_cinfo, dreq->inode); 19662306a36Sopenharmony_ci if (dreq->l_ctx != NULL) 19762306a36Sopenharmony_ci nfs_put_lock_context(dreq->l_ctx); 19862306a36Sopenharmony_ci if (dreq->ctx != NULL) 19962306a36Sopenharmony_ci put_nfs_open_context(dreq->ctx); 20062306a36Sopenharmony_ci kmem_cache_free(nfs_direct_cachep, dreq); 20162306a36Sopenharmony_ci} 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_cistatic void nfs_direct_req_release(struct nfs_direct_req *dreq) 20462306a36Sopenharmony_ci{ 20562306a36Sopenharmony_ci kref_put(&dreq->kref, nfs_direct_req_free); 20662306a36Sopenharmony_ci} 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_cissize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset) 20962306a36Sopenharmony_ci{ 21062306a36Sopenharmony_ci loff_t start = offset - dreq->io_start; 21162306a36Sopenharmony_ci return dreq->max_count - start; 21262306a36Sopenharmony_ci} 21362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci/* 21662306a36Sopenharmony_ci * Collects and returns the final error value/byte-count. 21762306a36Sopenharmony_ci */ 21862306a36Sopenharmony_cistatic ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) 21962306a36Sopenharmony_ci{ 22062306a36Sopenharmony_ci ssize_t result = -EIOCBQUEUED; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci /* Async requests don't wait here */ 22362306a36Sopenharmony_ci if (dreq->iocb) 22462306a36Sopenharmony_ci goto out; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci result = wait_for_completion_killable(&dreq->completion); 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci if (!result) { 22962306a36Sopenharmony_ci result = dreq->count; 23062306a36Sopenharmony_ci WARN_ON_ONCE(dreq->count < 0); 23162306a36Sopenharmony_ci } 23262306a36Sopenharmony_ci if (!result) 23362306a36Sopenharmony_ci result = dreq->error; 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ciout: 23662306a36Sopenharmony_ci return (ssize_t) result; 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci/* 24062306a36Sopenharmony_ci * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust 24162306a36Sopenharmony_ci * the iocb is still valid here if this is a synchronous request. 24262306a36Sopenharmony_ci */ 24362306a36Sopenharmony_cistatic void nfs_direct_complete(struct nfs_direct_req *dreq) 24462306a36Sopenharmony_ci{ 24562306a36Sopenharmony_ci struct inode *inode = dreq->inode; 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci inode_dio_end(inode); 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci if (dreq->iocb) { 25062306a36Sopenharmony_ci long res = (long) dreq->error; 25162306a36Sopenharmony_ci if (dreq->count != 0) { 25262306a36Sopenharmony_ci res = (long) dreq->count; 25362306a36Sopenharmony_ci WARN_ON_ONCE(dreq->count < 0); 25462306a36Sopenharmony_ci } 25562306a36Sopenharmony_ci dreq->iocb->ki_complete(dreq->iocb, res); 25662306a36Sopenharmony_ci } 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci complete(&dreq->completion); 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci nfs_direct_req_release(dreq); 26162306a36Sopenharmony_ci} 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_cistatic void nfs_direct_read_completion(struct nfs_pgio_header *hdr) 26462306a36Sopenharmony_ci{ 26562306a36Sopenharmony_ci unsigned long bytes = 0; 26662306a36Sopenharmony_ci struct nfs_direct_req *dreq = hdr->dreq; 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci spin_lock(&dreq->lock); 26962306a36Sopenharmony_ci if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { 27062306a36Sopenharmony_ci spin_unlock(&dreq->lock); 27162306a36Sopenharmony_ci goto out_put; 27262306a36Sopenharmony_ci } 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci nfs_direct_count_bytes(dreq, hdr); 27562306a36Sopenharmony_ci spin_unlock(&dreq->lock); 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci while (!list_empty(&hdr->pages)) { 27862306a36Sopenharmony_ci struct nfs_page *req = nfs_list_entry(hdr->pages.next); 27962306a36Sopenharmony_ci struct page *page = req->wb_page; 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci if (!PageCompound(page) && bytes < hdr->good_bytes && 28262306a36Sopenharmony_ci (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY)) 28362306a36Sopenharmony_ci set_page_dirty(page); 28462306a36Sopenharmony_ci bytes += req->wb_bytes; 28562306a36Sopenharmony_ci nfs_list_remove_request(req); 28662306a36Sopenharmony_ci nfs_release_request(req); 28762306a36Sopenharmony_ci } 28862306a36Sopenharmony_ciout_put: 28962306a36Sopenharmony_ci if (put_dreq(dreq)) 29062306a36Sopenharmony_ci nfs_direct_complete(dreq); 29162306a36Sopenharmony_ci hdr->release(hdr); 29262306a36Sopenharmony_ci} 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_cistatic void nfs_read_sync_pgio_error(struct list_head *head, int error) 29562306a36Sopenharmony_ci{ 29662306a36Sopenharmony_ci struct nfs_page *req; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci while (!list_empty(head)) { 29962306a36Sopenharmony_ci req = nfs_list_entry(head->next); 30062306a36Sopenharmony_ci nfs_list_remove_request(req); 30162306a36Sopenharmony_ci nfs_release_request(req); 30262306a36Sopenharmony_ci } 30362306a36Sopenharmony_ci} 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_cistatic void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) 30662306a36Sopenharmony_ci{ 30762306a36Sopenharmony_ci get_dreq(hdr->dreq); 30862306a36Sopenharmony_ci} 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_cistatic const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { 31162306a36Sopenharmony_ci .error_cleanup = nfs_read_sync_pgio_error, 31262306a36Sopenharmony_ci .init_hdr = nfs_direct_pgio_init, 31362306a36Sopenharmony_ci .completion = nfs_direct_read_completion, 31462306a36Sopenharmony_ci}; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci/* 31762306a36Sopenharmony_ci * For each rsize'd chunk of the user's buffer, dispatch an NFS READ 31862306a36Sopenharmony_ci * operation. If nfs_readdata_alloc() or get_user_pages() fails, 31962306a36Sopenharmony_ci * bail and stop sending more reads. Read length accounting is 32062306a36Sopenharmony_ci * handled automatically by nfs_direct_read_result(). Otherwise, if 32162306a36Sopenharmony_ci * no requests have been sent, just return an error. 32262306a36Sopenharmony_ci */ 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_cistatic ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, 32562306a36Sopenharmony_ci struct iov_iter *iter, 32662306a36Sopenharmony_ci loff_t pos) 32762306a36Sopenharmony_ci{ 32862306a36Sopenharmony_ci struct nfs_pageio_descriptor desc; 32962306a36Sopenharmony_ci struct inode *inode = dreq->inode; 33062306a36Sopenharmony_ci ssize_t result = -EINVAL; 33162306a36Sopenharmony_ci size_t requested_bytes = 0; 33262306a36Sopenharmony_ci size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE); 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci nfs_pageio_init_read(&desc, dreq->inode, false, 33562306a36Sopenharmony_ci &nfs_direct_read_completion_ops); 33662306a36Sopenharmony_ci get_dreq(dreq); 33762306a36Sopenharmony_ci desc.pg_dreq = dreq; 33862306a36Sopenharmony_ci inode_dio_begin(inode); 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci while (iov_iter_count(iter)) { 34162306a36Sopenharmony_ci struct page **pagevec; 34262306a36Sopenharmony_ci size_t bytes; 34362306a36Sopenharmony_ci size_t pgbase; 34462306a36Sopenharmony_ci unsigned npages, i; 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci result = iov_iter_get_pages_alloc2(iter, &pagevec, 34762306a36Sopenharmony_ci rsize, &pgbase); 34862306a36Sopenharmony_ci if (result < 0) 34962306a36Sopenharmony_ci break; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci bytes = result; 35262306a36Sopenharmony_ci npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; 35362306a36Sopenharmony_ci for (i = 0; i < npages; i++) { 35462306a36Sopenharmony_ci struct nfs_page *req; 35562306a36Sopenharmony_ci unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 35662306a36Sopenharmony_ci /* XXX do we need to do the eof zeroing found in async_filler? */ 35762306a36Sopenharmony_ci req = nfs_page_create_from_page(dreq->ctx, pagevec[i], 35862306a36Sopenharmony_ci pgbase, pos, req_len); 35962306a36Sopenharmony_ci if (IS_ERR(req)) { 36062306a36Sopenharmony_ci result = PTR_ERR(req); 36162306a36Sopenharmony_ci break; 36262306a36Sopenharmony_ci } 36362306a36Sopenharmony_ci if (!nfs_pageio_add_request(&desc, req)) { 36462306a36Sopenharmony_ci result = desc.pg_error; 36562306a36Sopenharmony_ci nfs_release_request(req); 36662306a36Sopenharmony_ci break; 36762306a36Sopenharmony_ci } 36862306a36Sopenharmony_ci pgbase = 0; 36962306a36Sopenharmony_ci bytes -= req_len; 37062306a36Sopenharmony_ci requested_bytes += req_len; 37162306a36Sopenharmony_ci pos += req_len; 37262306a36Sopenharmony_ci dreq->bytes_left -= req_len; 37362306a36Sopenharmony_ci } 37462306a36Sopenharmony_ci nfs_direct_release_pages(pagevec, npages); 37562306a36Sopenharmony_ci kvfree(pagevec); 37662306a36Sopenharmony_ci if (result < 0) 37762306a36Sopenharmony_ci break; 37862306a36Sopenharmony_ci } 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci nfs_pageio_complete(&desc); 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci /* 38362306a36Sopenharmony_ci * If no bytes were started, return the error, and let the 38462306a36Sopenharmony_ci * generic layer handle the completion. 38562306a36Sopenharmony_ci */ 38662306a36Sopenharmony_ci if (requested_bytes == 0) { 38762306a36Sopenharmony_ci inode_dio_end(inode); 38862306a36Sopenharmony_ci nfs_direct_req_release(dreq); 38962306a36Sopenharmony_ci return result < 0 ? result : -EIO; 39062306a36Sopenharmony_ci } 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci if (put_dreq(dreq)) 39362306a36Sopenharmony_ci nfs_direct_complete(dreq); 39462306a36Sopenharmony_ci return requested_bytes; 39562306a36Sopenharmony_ci} 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci/** 39862306a36Sopenharmony_ci * nfs_file_direct_read - file direct read operation for NFS files 39962306a36Sopenharmony_ci * @iocb: target I/O control block 40062306a36Sopenharmony_ci * @iter: vector of user buffers into which to read data 40162306a36Sopenharmony_ci * @swap: flag indicating this is swap IO, not O_DIRECT IO 40262306a36Sopenharmony_ci * 40362306a36Sopenharmony_ci * We use this function for direct reads instead of calling 40462306a36Sopenharmony_ci * generic_file_aio_read() in order to avoid gfar's check to see if 40562306a36Sopenharmony_ci * the request starts before the end of the file. For that check 40662306a36Sopenharmony_ci * to work, we must generate a GETATTR before each direct read, and 40762306a36Sopenharmony_ci * even then there is a window between the GETATTR and the subsequent 40862306a36Sopenharmony_ci * READ where the file size could change. Our preference is simply 40962306a36Sopenharmony_ci * to do all reads the application wants, and the server will take 41062306a36Sopenharmony_ci * care of managing the end of file boundary. 41162306a36Sopenharmony_ci * 41262306a36Sopenharmony_ci * This function also eliminates unnecessarily updating the file's 41362306a36Sopenharmony_ci * atime locally, as the NFS server sets the file's atime, and this 41462306a36Sopenharmony_ci * client must read the updated atime from the server back into its 41562306a36Sopenharmony_ci * cache. 41662306a36Sopenharmony_ci */ 41762306a36Sopenharmony_cissize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, 41862306a36Sopenharmony_ci bool swap) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci struct file *file = iocb->ki_filp; 42162306a36Sopenharmony_ci struct address_space *mapping = file->f_mapping; 42262306a36Sopenharmony_ci struct inode *inode = mapping->host; 42362306a36Sopenharmony_ci struct nfs_direct_req *dreq; 42462306a36Sopenharmony_ci struct nfs_lock_context *l_ctx; 42562306a36Sopenharmony_ci ssize_t result, requested; 42662306a36Sopenharmony_ci size_t count = iov_iter_count(iter); 42762306a36Sopenharmony_ci nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", 43062306a36Sopenharmony_ci file, count, (long long) iocb->ki_pos); 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci result = 0; 43362306a36Sopenharmony_ci if (!count) 43462306a36Sopenharmony_ci goto out; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci task_io_account_read(count); 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci result = -ENOMEM; 43962306a36Sopenharmony_ci dreq = nfs_direct_req_alloc(); 44062306a36Sopenharmony_ci if (dreq == NULL) 44162306a36Sopenharmony_ci goto out; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci dreq->inode = inode; 44462306a36Sopenharmony_ci dreq->bytes_left = dreq->max_count = count; 44562306a36Sopenharmony_ci dreq->io_start = iocb->ki_pos; 44662306a36Sopenharmony_ci dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 44762306a36Sopenharmony_ci l_ctx = nfs_get_lock_context(dreq->ctx); 44862306a36Sopenharmony_ci if (IS_ERR(l_ctx)) { 44962306a36Sopenharmony_ci result = PTR_ERR(l_ctx); 45062306a36Sopenharmony_ci nfs_direct_req_release(dreq); 45162306a36Sopenharmony_ci goto out_release; 45262306a36Sopenharmony_ci } 45362306a36Sopenharmony_ci dreq->l_ctx = l_ctx; 45462306a36Sopenharmony_ci if (!is_sync_kiocb(iocb)) 45562306a36Sopenharmony_ci dreq->iocb = iocb; 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci if (user_backed_iter(iter)) 45862306a36Sopenharmony_ci dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci if (!swap) 46162306a36Sopenharmony_ci nfs_start_io_direct(inode); 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci NFS_I(inode)->read_io += count; 46462306a36Sopenharmony_ci requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci if (!swap) 46762306a36Sopenharmony_ci nfs_end_io_direct(inode); 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci if (requested > 0) { 47062306a36Sopenharmony_ci result = nfs_direct_wait(dreq); 47162306a36Sopenharmony_ci if (result > 0) { 47262306a36Sopenharmony_ci requested -= result; 47362306a36Sopenharmony_ci iocb->ki_pos += result; 47462306a36Sopenharmony_ci } 47562306a36Sopenharmony_ci iov_iter_revert(iter, requested); 47662306a36Sopenharmony_ci } else { 47762306a36Sopenharmony_ci result = requested; 47862306a36Sopenharmony_ci } 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ciout_release: 48162306a36Sopenharmony_ci nfs_direct_req_release(dreq); 48262306a36Sopenharmony_ciout: 48362306a36Sopenharmony_ci return result; 48462306a36Sopenharmony_ci} 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_cistatic void nfs_direct_add_page_head(struct list_head *list, 48762306a36Sopenharmony_ci struct nfs_page *req) 48862306a36Sopenharmony_ci{ 48962306a36Sopenharmony_ci struct nfs_page *head = req->wb_head; 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci if (!list_empty(&head->wb_list) || !nfs_lock_request(head)) 49262306a36Sopenharmony_ci return; 49362306a36Sopenharmony_ci if (!list_empty(&head->wb_list)) { 49462306a36Sopenharmony_ci nfs_unlock_request(head); 49562306a36Sopenharmony_ci return; 49662306a36Sopenharmony_ci } 49762306a36Sopenharmony_ci list_add(&head->wb_list, list); 49862306a36Sopenharmony_ci kref_get(&head->wb_kref); 49962306a36Sopenharmony_ci kref_get(&head->wb_kref); 50062306a36Sopenharmony_ci} 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_cistatic void nfs_direct_join_group(struct list_head *list, 50362306a36Sopenharmony_ci struct nfs_commit_info *cinfo, 50462306a36Sopenharmony_ci struct inode *inode) 50562306a36Sopenharmony_ci{ 50662306a36Sopenharmony_ci struct nfs_page *req, *subreq; 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci list_for_each_entry(req, list, wb_list) { 50962306a36Sopenharmony_ci if (req->wb_head != req) { 51062306a36Sopenharmony_ci nfs_direct_add_page_head(&req->wb_list, req); 51162306a36Sopenharmony_ci continue; 51262306a36Sopenharmony_ci } 51362306a36Sopenharmony_ci subreq = req->wb_this_page; 51462306a36Sopenharmony_ci if (subreq == req) 51562306a36Sopenharmony_ci continue; 51662306a36Sopenharmony_ci do { 51762306a36Sopenharmony_ci /* 51862306a36Sopenharmony_ci * Remove subrequests from this list before freeing 51962306a36Sopenharmony_ci * them in the call to nfs_join_page_group(). 52062306a36Sopenharmony_ci */ 52162306a36Sopenharmony_ci if (!list_empty(&subreq->wb_list)) { 52262306a36Sopenharmony_ci nfs_list_remove_request(subreq); 52362306a36Sopenharmony_ci nfs_release_request(subreq); 52462306a36Sopenharmony_ci } 52562306a36Sopenharmony_ci } while ((subreq = subreq->wb_this_page) != req); 52662306a36Sopenharmony_ci nfs_join_page_group(req, cinfo, inode); 52762306a36Sopenharmony_ci } 52862306a36Sopenharmony_ci} 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_cistatic void 53162306a36Sopenharmony_cinfs_direct_write_scan_commit_list(struct inode *inode, 53262306a36Sopenharmony_ci struct list_head *list, 53362306a36Sopenharmony_ci struct nfs_commit_info *cinfo) 53462306a36Sopenharmony_ci{ 53562306a36Sopenharmony_ci mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 53662306a36Sopenharmony_ci pnfs_recover_commit_reqs(list, cinfo); 53762306a36Sopenharmony_ci nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); 53862306a36Sopenharmony_ci mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 53962306a36Sopenharmony_ci} 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_cistatic void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 54262306a36Sopenharmony_ci{ 54362306a36Sopenharmony_ci struct nfs_pageio_descriptor desc; 54462306a36Sopenharmony_ci struct nfs_page *req; 54562306a36Sopenharmony_ci LIST_HEAD(reqs); 54662306a36Sopenharmony_ci struct nfs_commit_info cinfo; 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci nfs_init_cinfo_from_dreq(&cinfo, dreq); 54962306a36Sopenharmony_ci nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci nfs_direct_join_group(&reqs, &cinfo, dreq->inode); 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); 55462306a36Sopenharmony_ci get_dreq(dreq); 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, 55762306a36Sopenharmony_ci &nfs_direct_write_completion_ops); 55862306a36Sopenharmony_ci desc.pg_dreq = dreq; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci while (!list_empty(&reqs)) { 56162306a36Sopenharmony_ci req = nfs_list_entry(reqs.next); 56262306a36Sopenharmony_ci /* Bump the transmission count */ 56362306a36Sopenharmony_ci req->wb_nio++; 56462306a36Sopenharmony_ci if (!nfs_pageio_add_request(&desc, req)) { 56562306a36Sopenharmony_ci spin_lock(&dreq->lock); 56662306a36Sopenharmony_ci if (dreq->error < 0) { 56762306a36Sopenharmony_ci desc.pg_error = dreq->error; 56862306a36Sopenharmony_ci } else if (desc.pg_error != -EAGAIN) { 56962306a36Sopenharmony_ci dreq->flags = 0; 57062306a36Sopenharmony_ci if (!desc.pg_error) 57162306a36Sopenharmony_ci desc.pg_error = -EIO; 57262306a36Sopenharmony_ci dreq->error = desc.pg_error; 57362306a36Sopenharmony_ci } else 57462306a36Sopenharmony_ci dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 57562306a36Sopenharmony_ci spin_unlock(&dreq->lock); 57662306a36Sopenharmony_ci break; 57762306a36Sopenharmony_ci } 57862306a36Sopenharmony_ci nfs_release_request(req); 57962306a36Sopenharmony_ci } 58062306a36Sopenharmony_ci nfs_pageio_complete(&desc); 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci while (!list_empty(&reqs)) { 58362306a36Sopenharmony_ci req = nfs_list_entry(reqs.next); 58462306a36Sopenharmony_ci nfs_list_remove_request(req); 58562306a36Sopenharmony_ci nfs_unlock_and_release_request(req); 58662306a36Sopenharmony_ci if (desc.pg_error == -EAGAIN) { 58762306a36Sopenharmony_ci nfs_mark_request_commit(req, NULL, &cinfo, 0); 58862306a36Sopenharmony_ci } else { 58962306a36Sopenharmony_ci spin_lock(&dreq->lock); 59062306a36Sopenharmony_ci nfs_direct_truncate_request(dreq, req); 59162306a36Sopenharmony_ci spin_unlock(&dreq->lock); 59262306a36Sopenharmony_ci nfs_release_request(req); 59362306a36Sopenharmony_ci } 59462306a36Sopenharmony_ci } 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci if (put_dreq(dreq)) 59762306a36Sopenharmony_ci nfs_direct_write_complete(dreq); 59862306a36Sopenharmony_ci} 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_cistatic void nfs_direct_commit_complete(struct nfs_commit_data *data) 60162306a36Sopenharmony_ci{ 60262306a36Sopenharmony_ci const struct nfs_writeverf *verf = data->res.verf; 60362306a36Sopenharmony_ci struct nfs_direct_req *dreq = data->dreq; 60462306a36Sopenharmony_ci struct nfs_commit_info cinfo; 60562306a36Sopenharmony_ci struct nfs_page *req; 60662306a36Sopenharmony_ci int status = data->task.tk_status; 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci trace_nfs_direct_commit_complete(dreq); 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci if (status < 0) { 61162306a36Sopenharmony_ci /* Errors in commit are fatal */ 61262306a36Sopenharmony_ci dreq->error = status; 61362306a36Sopenharmony_ci dreq->flags = NFS_ODIRECT_DONE; 61462306a36Sopenharmony_ci } else { 61562306a36Sopenharmony_ci status = dreq->error; 61662306a36Sopenharmony_ci } 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci nfs_init_cinfo_from_dreq(&cinfo, dreq); 61962306a36Sopenharmony_ci 62062306a36Sopenharmony_ci while (!list_empty(&data->pages)) { 62162306a36Sopenharmony_ci req = nfs_list_entry(data->pages.next); 62262306a36Sopenharmony_ci nfs_list_remove_request(req); 62362306a36Sopenharmony_ci if (status < 0) { 62462306a36Sopenharmony_ci spin_lock(&dreq->lock); 62562306a36Sopenharmony_ci nfs_direct_truncate_request(dreq, req); 62662306a36Sopenharmony_ci spin_unlock(&dreq->lock); 62762306a36Sopenharmony_ci nfs_release_request(req); 62862306a36Sopenharmony_ci } else if (!nfs_write_match_verf(verf, req)) { 62962306a36Sopenharmony_ci dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 63062306a36Sopenharmony_ci /* 63162306a36Sopenharmony_ci * Despite the reboot, the write was successful, 63262306a36Sopenharmony_ci * so reset wb_nio. 63362306a36Sopenharmony_ci */ 63462306a36Sopenharmony_ci req->wb_nio = 0; 63562306a36Sopenharmony_ci nfs_mark_request_commit(req, NULL, &cinfo, 0); 63662306a36Sopenharmony_ci } else 63762306a36Sopenharmony_ci nfs_release_request(req); 63862306a36Sopenharmony_ci nfs_unlock_and_release_request(req); 63962306a36Sopenharmony_ci } 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci if (nfs_commit_end(cinfo.mds)) 64262306a36Sopenharmony_ci nfs_direct_write_complete(dreq); 64362306a36Sopenharmony_ci} 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_cistatic void nfs_direct_resched_write(struct nfs_commit_info *cinfo, 64662306a36Sopenharmony_ci struct nfs_page *req) 64762306a36Sopenharmony_ci{ 64862306a36Sopenharmony_ci struct nfs_direct_req *dreq = cinfo->dreq; 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci trace_nfs_direct_resched_write(dreq); 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci spin_lock(&dreq->lock); 65362306a36Sopenharmony_ci if (dreq->flags != NFS_ODIRECT_DONE) 65462306a36Sopenharmony_ci dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 65562306a36Sopenharmony_ci spin_unlock(&dreq->lock); 65662306a36Sopenharmony_ci nfs_mark_request_commit(req, NULL, cinfo, 0); 65762306a36Sopenharmony_ci} 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_cistatic const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { 66062306a36Sopenharmony_ci .completion = nfs_direct_commit_complete, 66162306a36Sopenharmony_ci .resched_write = nfs_direct_resched_write, 66262306a36Sopenharmony_ci}; 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_cistatic void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 66562306a36Sopenharmony_ci{ 66662306a36Sopenharmony_ci int res; 66762306a36Sopenharmony_ci struct nfs_commit_info cinfo; 66862306a36Sopenharmony_ci LIST_HEAD(mds_list); 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci nfs_init_cinfo_from_dreq(&cinfo, dreq); 67162306a36Sopenharmony_ci nfs_commit_begin(cinfo.mds); 67262306a36Sopenharmony_ci nfs_scan_commit(dreq->inode, &mds_list, &cinfo); 67362306a36Sopenharmony_ci res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); 67462306a36Sopenharmony_ci if (res < 0) { /* res == -ENOMEM */ 67562306a36Sopenharmony_ci spin_lock(&dreq->lock); 67662306a36Sopenharmony_ci if (dreq->flags == 0) 67762306a36Sopenharmony_ci dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 67862306a36Sopenharmony_ci spin_unlock(&dreq->lock); 67962306a36Sopenharmony_ci } 68062306a36Sopenharmony_ci if (nfs_commit_end(cinfo.mds)) 68162306a36Sopenharmony_ci nfs_direct_write_complete(dreq); 68262306a36Sopenharmony_ci} 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_cistatic void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) 68562306a36Sopenharmony_ci{ 68662306a36Sopenharmony_ci struct nfs_commit_info cinfo; 68762306a36Sopenharmony_ci struct nfs_page *req; 68862306a36Sopenharmony_ci LIST_HEAD(reqs); 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci nfs_init_cinfo_from_dreq(&cinfo, dreq); 69162306a36Sopenharmony_ci nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci while (!list_empty(&reqs)) { 69462306a36Sopenharmony_ci req = nfs_list_entry(reqs.next); 69562306a36Sopenharmony_ci nfs_list_remove_request(req); 69662306a36Sopenharmony_ci nfs_direct_truncate_request(dreq, req); 69762306a36Sopenharmony_ci nfs_release_request(req); 69862306a36Sopenharmony_ci nfs_unlock_and_release_request(req); 69962306a36Sopenharmony_ci } 70062306a36Sopenharmony_ci} 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_cistatic void nfs_direct_write_schedule_work(struct work_struct *work) 70362306a36Sopenharmony_ci{ 70462306a36Sopenharmony_ci struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work); 70562306a36Sopenharmony_ci int flags = dreq->flags; 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci dreq->flags = 0; 70862306a36Sopenharmony_ci switch (flags) { 70962306a36Sopenharmony_ci case NFS_ODIRECT_DO_COMMIT: 71062306a36Sopenharmony_ci nfs_direct_commit_schedule(dreq); 71162306a36Sopenharmony_ci break; 71262306a36Sopenharmony_ci case NFS_ODIRECT_RESCHED_WRITES: 71362306a36Sopenharmony_ci nfs_direct_write_reschedule(dreq); 71462306a36Sopenharmony_ci break; 71562306a36Sopenharmony_ci default: 71662306a36Sopenharmony_ci nfs_direct_write_clear_reqs(dreq); 71762306a36Sopenharmony_ci nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping); 71862306a36Sopenharmony_ci nfs_direct_complete(dreq); 71962306a36Sopenharmony_ci } 72062306a36Sopenharmony_ci} 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_cistatic void nfs_direct_write_complete(struct nfs_direct_req *dreq) 72362306a36Sopenharmony_ci{ 72462306a36Sopenharmony_ci trace_nfs_direct_write_complete(dreq); 72562306a36Sopenharmony_ci queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */ 72662306a36Sopenharmony_ci} 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_cistatic void nfs_direct_write_completion(struct nfs_pgio_header *hdr) 72962306a36Sopenharmony_ci{ 73062306a36Sopenharmony_ci struct nfs_direct_req *dreq = hdr->dreq; 73162306a36Sopenharmony_ci struct nfs_commit_info cinfo; 73262306a36Sopenharmony_ci struct nfs_page *req = nfs_list_entry(hdr->pages.next); 73362306a36Sopenharmony_ci int flags = NFS_ODIRECT_DONE; 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci trace_nfs_direct_write_completion(dreq); 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci nfs_init_cinfo_from_dreq(&cinfo, dreq); 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci spin_lock(&dreq->lock); 74062306a36Sopenharmony_ci if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { 74162306a36Sopenharmony_ci spin_unlock(&dreq->lock); 74262306a36Sopenharmony_ci goto out_put; 74362306a36Sopenharmony_ci } 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci nfs_direct_count_bytes(dreq, hdr); 74662306a36Sopenharmony_ci if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) && 74762306a36Sopenharmony_ci !test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { 74862306a36Sopenharmony_ci if (!dreq->flags) 74962306a36Sopenharmony_ci dreq->flags = NFS_ODIRECT_DO_COMMIT; 75062306a36Sopenharmony_ci flags = dreq->flags; 75162306a36Sopenharmony_ci } 75262306a36Sopenharmony_ci spin_unlock(&dreq->lock); 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci while (!list_empty(&hdr->pages)) { 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_ci req = nfs_list_entry(hdr->pages.next); 75762306a36Sopenharmony_ci nfs_list_remove_request(req); 75862306a36Sopenharmony_ci if (flags == NFS_ODIRECT_DO_COMMIT) { 75962306a36Sopenharmony_ci kref_get(&req->wb_kref); 76062306a36Sopenharmony_ci memcpy(&req->wb_verf, &hdr->verf.verifier, 76162306a36Sopenharmony_ci sizeof(req->wb_verf)); 76262306a36Sopenharmony_ci nfs_mark_request_commit(req, hdr->lseg, &cinfo, 76362306a36Sopenharmony_ci hdr->ds_commit_idx); 76462306a36Sopenharmony_ci } else if (flags == NFS_ODIRECT_RESCHED_WRITES) { 76562306a36Sopenharmony_ci kref_get(&req->wb_kref); 76662306a36Sopenharmony_ci nfs_mark_request_commit(req, NULL, &cinfo, 0); 76762306a36Sopenharmony_ci } 76862306a36Sopenharmony_ci nfs_unlock_and_release_request(req); 76962306a36Sopenharmony_ci } 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ciout_put: 77262306a36Sopenharmony_ci if (put_dreq(dreq)) 77362306a36Sopenharmony_ci nfs_direct_write_complete(dreq); 77462306a36Sopenharmony_ci hdr->release(hdr); 77562306a36Sopenharmony_ci} 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_cistatic void nfs_write_sync_pgio_error(struct list_head *head, int error) 77862306a36Sopenharmony_ci{ 77962306a36Sopenharmony_ci struct nfs_page *req; 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci while (!list_empty(head)) { 78262306a36Sopenharmony_ci req = nfs_list_entry(head->next); 78362306a36Sopenharmony_ci nfs_list_remove_request(req); 78462306a36Sopenharmony_ci nfs_unlock_and_release_request(req); 78562306a36Sopenharmony_ci } 78662306a36Sopenharmony_ci} 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_cistatic void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) 78962306a36Sopenharmony_ci{ 79062306a36Sopenharmony_ci struct nfs_direct_req *dreq = hdr->dreq; 79162306a36Sopenharmony_ci struct nfs_page *req; 79262306a36Sopenharmony_ci struct nfs_commit_info cinfo; 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci trace_nfs_direct_write_reschedule_io(dreq); 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci nfs_init_cinfo_from_dreq(&cinfo, dreq); 79762306a36Sopenharmony_ci spin_lock(&dreq->lock); 79862306a36Sopenharmony_ci if (dreq->error == 0) 79962306a36Sopenharmony_ci dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 80062306a36Sopenharmony_ci set_bit(NFS_IOHDR_REDO, &hdr->flags); 80162306a36Sopenharmony_ci spin_unlock(&dreq->lock); 80262306a36Sopenharmony_ci while (!list_empty(&hdr->pages)) { 80362306a36Sopenharmony_ci req = nfs_list_entry(hdr->pages.next); 80462306a36Sopenharmony_ci nfs_list_remove_request(req); 80562306a36Sopenharmony_ci nfs_unlock_request(req); 80662306a36Sopenharmony_ci nfs_mark_request_commit(req, NULL, &cinfo, 0); 80762306a36Sopenharmony_ci } 80862306a36Sopenharmony_ci} 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_cistatic const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { 81162306a36Sopenharmony_ci .error_cleanup = nfs_write_sync_pgio_error, 81262306a36Sopenharmony_ci .init_hdr = nfs_direct_pgio_init, 81362306a36Sopenharmony_ci .completion = nfs_direct_write_completion, 81462306a36Sopenharmony_ci .reschedule_io = nfs_direct_write_reschedule_io, 81562306a36Sopenharmony_ci}; 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci/* 81962306a36Sopenharmony_ci * NB: Return the value of the first error return code. Subsequent 82062306a36Sopenharmony_ci * errors after the first one are ignored. 82162306a36Sopenharmony_ci */ 82262306a36Sopenharmony_ci/* 82362306a36Sopenharmony_ci * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE 82462306a36Sopenharmony_ci * operation. If nfs_writedata_alloc() or get_user_pages() fails, 82562306a36Sopenharmony_ci * bail and stop sending more writes. Write length accounting is 82662306a36Sopenharmony_ci * handled automatically by nfs_direct_write_result(). Otherwise, if 82762306a36Sopenharmony_ci * no requests have been sent, just return an error. 82862306a36Sopenharmony_ci */ 82962306a36Sopenharmony_cistatic ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 83062306a36Sopenharmony_ci struct iov_iter *iter, 83162306a36Sopenharmony_ci loff_t pos, int ioflags) 83262306a36Sopenharmony_ci{ 83362306a36Sopenharmony_ci struct nfs_pageio_descriptor desc; 83462306a36Sopenharmony_ci struct inode *inode = dreq->inode; 83562306a36Sopenharmony_ci struct nfs_commit_info cinfo; 83662306a36Sopenharmony_ci ssize_t result = 0; 83762306a36Sopenharmony_ci size_t requested_bytes = 0; 83862306a36Sopenharmony_ci size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); 83962306a36Sopenharmony_ci bool defer = false; 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci trace_nfs_direct_write_schedule_iovec(dreq); 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci nfs_pageio_init_write(&desc, inode, ioflags, false, 84462306a36Sopenharmony_ci &nfs_direct_write_completion_ops); 84562306a36Sopenharmony_ci desc.pg_dreq = dreq; 84662306a36Sopenharmony_ci get_dreq(dreq); 84762306a36Sopenharmony_ci inode_dio_begin(inode); 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci NFS_I(inode)->write_io += iov_iter_count(iter); 85062306a36Sopenharmony_ci while (iov_iter_count(iter)) { 85162306a36Sopenharmony_ci struct page **pagevec; 85262306a36Sopenharmony_ci size_t bytes; 85362306a36Sopenharmony_ci size_t pgbase; 85462306a36Sopenharmony_ci unsigned npages, i; 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci result = iov_iter_get_pages_alloc2(iter, &pagevec, 85762306a36Sopenharmony_ci wsize, &pgbase); 85862306a36Sopenharmony_ci if (result < 0) 85962306a36Sopenharmony_ci break; 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_ci bytes = result; 86262306a36Sopenharmony_ci npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; 86362306a36Sopenharmony_ci for (i = 0; i < npages; i++) { 86462306a36Sopenharmony_ci struct nfs_page *req; 86562306a36Sopenharmony_ci unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci req = nfs_page_create_from_page(dreq->ctx, pagevec[i], 86862306a36Sopenharmony_ci pgbase, pos, req_len); 86962306a36Sopenharmony_ci if (IS_ERR(req)) { 87062306a36Sopenharmony_ci result = PTR_ERR(req); 87162306a36Sopenharmony_ci break; 87262306a36Sopenharmony_ci } 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_ci if (desc.pg_error < 0) { 87562306a36Sopenharmony_ci nfs_free_request(req); 87662306a36Sopenharmony_ci result = desc.pg_error; 87762306a36Sopenharmony_ci break; 87862306a36Sopenharmony_ci } 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci pgbase = 0; 88162306a36Sopenharmony_ci bytes -= req_len; 88262306a36Sopenharmony_ci requested_bytes += req_len; 88362306a36Sopenharmony_ci pos += req_len; 88462306a36Sopenharmony_ci dreq->bytes_left -= req_len; 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci if (defer) { 88762306a36Sopenharmony_ci nfs_mark_request_commit(req, NULL, &cinfo, 0); 88862306a36Sopenharmony_ci continue; 88962306a36Sopenharmony_ci } 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci nfs_lock_request(req); 89262306a36Sopenharmony_ci if (nfs_pageio_add_request(&desc, req)) 89362306a36Sopenharmony_ci continue; 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci /* Exit on hard errors */ 89662306a36Sopenharmony_ci if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) { 89762306a36Sopenharmony_ci result = desc.pg_error; 89862306a36Sopenharmony_ci nfs_unlock_and_release_request(req); 89962306a36Sopenharmony_ci break; 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci /* If the error is soft, defer remaining requests */ 90362306a36Sopenharmony_ci nfs_init_cinfo_from_dreq(&cinfo, dreq); 90462306a36Sopenharmony_ci spin_lock(&dreq->lock); 90562306a36Sopenharmony_ci dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 90662306a36Sopenharmony_ci spin_unlock(&dreq->lock); 90762306a36Sopenharmony_ci nfs_unlock_request(req); 90862306a36Sopenharmony_ci nfs_mark_request_commit(req, NULL, &cinfo, 0); 90962306a36Sopenharmony_ci desc.pg_error = 0; 91062306a36Sopenharmony_ci defer = true; 91162306a36Sopenharmony_ci } 91262306a36Sopenharmony_ci nfs_direct_release_pages(pagevec, npages); 91362306a36Sopenharmony_ci kvfree(pagevec); 91462306a36Sopenharmony_ci if (result < 0) 91562306a36Sopenharmony_ci break; 91662306a36Sopenharmony_ci } 91762306a36Sopenharmony_ci nfs_pageio_complete(&desc); 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci /* 92062306a36Sopenharmony_ci * If no bytes were started, return the error, and let the 92162306a36Sopenharmony_ci * generic layer handle the completion. 92262306a36Sopenharmony_ci */ 92362306a36Sopenharmony_ci if (requested_bytes == 0) { 92462306a36Sopenharmony_ci inode_dio_end(inode); 92562306a36Sopenharmony_ci nfs_direct_req_release(dreq); 92662306a36Sopenharmony_ci return result < 0 ? result : -EIO; 92762306a36Sopenharmony_ci } 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci if (put_dreq(dreq)) 93062306a36Sopenharmony_ci nfs_direct_write_complete(dreq); 93162306a36Sopenharmony_ci return requested_bytes; 93262306a36Sopenharmony_ci} 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci/** 93562306a36Sopenharmony_ci * nfs_file_direct_write - file direct write operation for NFS files 93662306a36Sopenharmony_ci * @iocb: target I/O control block 93762306a36Sopenharmony_ci * @iter: vector of user buffers from which to write data 93862306a36Sopenharmony_ci * @swap: flag indicating this is swap IO, not O_DIRECT IO 93962306a36Sopenharmony_ci * 94062306a36Sopenharmony_ci * We use this function for direct writes instead of calling 94162306a36Sopenharmony_ci * generic_file_aio_write() in order to avoid taking the inode 94262306a36Sopenharmony_ci * semaphore and updating the i_size. The NFS server will set 94362306a36Sopenharmony_ci * the new i_size and this client must read the updated size 94462306a36Sopenharmony_ci * back into its cache. We let the server do generic write 94562306a36Sopenharmony_ci * parameter checking and report problems. 94662306a36Sopenharmony_ci * 94762306a36Sopenharmony_ci * We eliminate local atime updates, see direct read above. 94862306a36Sopenharmony_ci * 94962306a36Sopenharmony_ci * We avoid unnecessary page cache invalidations for normal cached 95062306a36Sopenharmony_ci * readers of this file. 95162306a36Sopenharmony_ci * 95262306a36Sopenharmony_ci * Note that O_APPEND is not supported for NFS direct writes, as there 95362306a36Sopenharmony_ci * is no atomic O_APPEND write facility in the NFS protocol. 95462306a36Sopenharmony_ci */ 95562306a36Sopenharmony_cissize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, 95662306a36Sopenharmony_ci bool swap) 95762306a36Sopenharmony_ci{ 95862306a36Sopenharmony_ci ssize_t result, requested; 95962306a36Sopenharmony_ci size_t count; 96062306a36Sopenharmony_ci struct file *file = iocb->ki_filp; 96162306a36Sopenharmony_ci struct address_space *mapping = file->f_mapping; 96262306a36Sopenharmony_ci struct inode *inode = mapping->host; 96362306a36Sopenharmony_ci struct nfs_direct_req *dreq; 96462306a36Sopenharmony_ci struct nfs_lock_context *l_ctx; 96562306a36Sopenharmony_ci loff_t pos, end; 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", 96862306a36Sopenharmony_ci file, iov_iter_count(iter), (long long) iocb->ki_pos); 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci if (swap) 97162306a36Sopenharmony_ci /* bypass generic checks */ 97262306a36Sopenharmony_ci result = iov_iter_count(iter); 97362306a36Sopenharmony_ci else 97462306a36Sopenharmony_ci result = generic_write_checks(iocb, iter); 97562306a36Sopenharmony_ci if (result <= 0) 97662306a36Sopenharmony_ci return result; 97762306a36Sopenharmony_ci count = result; 97862306a36Sopenharmony_ci nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci pos = iocb->ki_pos; 98162306a36Sopenharmony_ci end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT; 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_ci task_io_account_write(count); 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci result = -ENOMEM; 98662306a36Sopenharmony_ci dreq = nfs_direct_req_alloc(); 98762306a36Sopenharmony_ci if (!dreq) 98862306a36Sopenharmony_ci goto out; 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci dreq->inode = inode; 99162306a36Sopenharmony_ci dreq->bytes_left = dreq->max_count = count; 99262306a36Sopenharmony_ci dreq->io_start = pos; 99362306a36Sopenharmony_ci dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 99462306a36Sopenharmony_ci l_ctx = nfs_get_lock_context(dreq->ctx); 99562306a36Sopenharmony_ci if (IS_ERR(l_ctx)) { 99662306a36Sopenharmony_ci result = PTR_ERR(l_ctx); 99762306a36Sopenharmony_ci nfs_direct_req_release(dreq); 99862306a36Sopenharmony_ci goto out_release; 99962306a36Sopenharmony_ci } 100062306a36Sopenharmony_ci dreq->l_ctx = l_ctx; 100162306a36Sopenharmony_ci if (!is_sync_kiocb(iocb)) 100262306a36Sopenharmony_ci dreq->iocb = iocb; 100362306a36Sopenharmony_ci pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci if (swap) { 100662306a36Sopenharmony_ci requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, 100762306a36Sopenharmony_ci FLUSH_STABLE); 100862306a36Sopenharmony_ci } else { 100962306a36Sopenharmony_ci nfs_start_io_direct(inode); 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, 101262306a36Sopenharmony_ci FLUSH_COND_STABLE); 101362306a36Sopenharmony_ci 101462306a36Sopenharmony_ci if (mapping->nrpages) { 101562306a36Sopenharmony_ci invalidate_inode_pages2_range(mapping, 101662306a36Sopenharmony_ci pos >> PAGE_SHIFT, end); 101762306a36Sopenharmony_ci } 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci nfs_end_io_direct(inode); 102062306a36Sopenharmony_ci } 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci if (requested > 0) { 102362306a36Sopenharmony_ci result = nfs_direct_wait(dreq); 102462306a36Sopenharmony_ci if (result > 0) { 102562306a36Sopenharmony_ci requested -= result; 102662306a36Sopenharmony_ci iocb->ki_pos = pos + result; 102762306a36Sopenharmony_ci /* XXX: should check the generic_write_sync retval */ 102862306a36Sopenharmony_ci generic_write_sync(iocb, result); 102962306a36Sopenharmony_ci } 103062306a36Sopenharmony_ci iov_iter_revert(iter, requested); 103162306a36Sopenharmony_ci } else { 103262306a36Sopenharmony_ci result = requested; 103362306a36Sopenharmony_ci } 103462306a36Sopenharmony_ci nfs_fscache_invalidate(inode, FSCACHE_INVAL_DIO_WRITE); 103562306a36Sopenharmony_ciout_release: 103662306a36Sopenharmony_ci nfs_direct_req_release(dreq); 103762306a36Sopenharmony_ciout: 103862306a36Sopenharmony_ci return result; 103962306a36Sopenharmony_ci} 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci/** 104262306a36Sopenharmony_ci * nfs_init_directcache - create a slab cache for nfs_direct_req structures 104362306a36Sopenharmony_ci * 104462306a36Sopenharmony_ci */ 104562306a36Sopenharmony_ciint __init nfs_init_directcache(void) 104662306a36Sopenharmony_ci{ 104762306a36Sopenharmony_ci nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", 104862306a36Sopenharmony_ci sizeof(struct nfs_direct_req), 104962306a36Sopenharmony_ci 0, (SLAB_RECLAIM_ACCOUNT| 105062306a36Sopenharmony_ci SLAB_MEM_SPREAD), 105162306a36Sopenharmony_ci NULL); 105262306a36Sopenharmony_ci if (nfs_direct_cachep == NULL) 105362306a36Sopenharmony_ci return -ENOMEM; 105462306a36Sopenharmony_ci 105562306a36Sopenharmony_ci return 0; 105662306a36Sopenharmony_ci} 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci/** 105962306a36Sopenharmony_ci * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures 106062306a36Sopenharmony_ci * 106162306a36Sopenharmony_ci */ 106262306a36Sopenharmony_civoid nfs_destroy_directcache(void) 106362306a36Sopenharmony_ci{ 106462306a36Sopenharmony_ci kmem_cache_destroy(nfs_direct_cachep); 106562306a36Sopenharmony_ci} 1066