162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * (C) 2001 Clemson University and The University of Chicago
462306a36Sopenharmony_ci * Copyright 2018 Omnibond Systems, L.L.C.
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * See COPYING in top-level directory.
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci/*
1062306a36Sopenharmony_ci *  Linux VFS file operations.
1162306a36Sopenharmony_ci */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include "protocol.h"
1462306a36Sopenharmony_ci#include "orangefs-kernel.h"
1562306a36Sopenharmony_ci#include "orangefs-bufmap.h"
1662306a36Sopenharmony_ci#include <linux/fs.h>
1762306a36Sopenharmony_ci#include <linux/filelock.h>
1862306a36Sopenharmony_ci#include <linux/pagemap.h>
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_cistatic int flush_racache(struct inode *inode)
2162306a36Sopenharmony_ci{
2262306a36Sopenharmony_ci	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
2362306a36Sopenharmony_ci	struct orangefs_kernel_op_s *new_op;
2462306a36Sopenharmony_ci	int ret;
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci	gossip_debug(GOSSIP_UTILS_DEBUG,
2762306a36Sopenharmony_ci	    "%s: %pU: Handle is %pU | fs_id %d\n", __func__,
2862306a36Sopenharmony_ci	    get_khandle_from_ino(inode), &orangefs_inode->refn.khandle,
2962306a36Sopenharmony_ci	    orangefs_inode->refn.fs_id);
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci	new_op = op_alloc(ORANGEFS_VFS_OP_RA_FLUSH);
3262306a36Sopenharmony_ci	if (!new_op)
3362306a36Sopenharmony_ci		return -ENOMEM;
3462306a36Sopenharmony_ci	new_op->upcall.req.ra_cache_flush.refn = orangefs_inode->refn;
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci	ret = service_operation(new_op, "orangefs_flush_racache",
3762306a36Sopenharmony_ci	    get_interruptible_flag(inode));
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: got return value of %d\n",
4062306a36Sopenharmony_ci	    __func__, ret);
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	op_release(new_op);
4362306a36Sopenharmony_ci	return ret;
4462306a36Sopenharmony_ci}
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci/*
4762306a36Sopenharmony_ci * Post and wait for the I/O upcall to finish
4862306a36Sopenharmony_ci */
4962306a36Sopenharmony_cissize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
5062306a36Sopenharmony_ci	loff_t *offset, struct iov_iter *iter, size_t total_size,
5162306a36Sopenharmony_ci	loff_t readahead_size, struct orangefs_write_range *wr,
5262306a36Sopenharmony_ci	int *index_return, struct file *file)
5362306a36Sopenharmony_ci{
5462306a36Sopenharmony_ci	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
5562306a36Sopenharmony_ci	struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
5662306a36Sopenharmony_ci	struct orangefs_kernel_op_s *new_op = NULL;
5762306a36Sopenharmony_ci	int buffer_index;
5862306a36Sopenharmony_ci	ssize_t ret;
5962306a36Sopenharmony_ci	size_t copy_amount;
6062306a36Sopenharmony_ci	int open_for_read;
6162306a36Sopenharmony_ci	int open_for_write;
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci	new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO);
6462306a36Sopenharmony_ci	if (!new_op)
6562306a36Sopenharmony_ci		return -ENOMEM;
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	/* synchronous I/O */
6862306a36Sopenharmony_ci	new_op->upcall.req.io.readahead_size = readahead_size;
6962306a36Sopenharmony_ci	new_op->upcall.req.io.io_type = type;
7062306a36Sopenharmony_ci	new_op->upcall.req.io.refn = orangefs_inode->refn;
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_cipopulate_shared_memory:
7362306a36Sopenharmony_ci	/* get a shared buffer index */
7462306a36Sopenharmony_ci	buffer_index = orangefs_bufmap_get();
7562306a36Sopenharmony_ci	if (buffer_index < 0) {
7662306a36Sopenharmony_ci		ret = buffer_index;
7762306a36Sopenharmony_ci		gossip_debug(GOSSIP_FILE_DEBUG,
7862306a36Sopenharmony_ci			     "%s: orangefs_bufmap_get failure (%zd)\n",
7962306a36Sopenharmony_ci			     __func__, ret);
8062306a36Sopenharmony_ci		goto out;
8162306a36Sopenharmony_ci	}
8262306a36Sopenharmony_ci	gossip_debug(GOSSIP_FILE_DEBUG,
8362306a36Sopenharmony_ci		     "%s(%pU): GET op %p -> buffer_index %d\n",
8462306a36Sopenharmony_ci		     __func__,
8562306a36Sopenharmony_ci		     handle,
8662306a36Sopenharmony_ci		     new_op,
8762306a36Sopenharmony_ci		     buffer_index);
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	new_op->uses_shared_memory = 1;
9062306a36Sopenharmony_ci	new_op->upcall.req.io.buf_index = buffer_index;
9162306a36Sopenharmony_ci	new_op->upcall.req.io.count = total_size;
9262306a36Sopenharmony_ci	new_op->upcall.req.io.offset = *offset;
9362306a36Sopenharmony_ci	if (type == ORANGEFS_IO_WRITE && wr) {
9462306a36Sopenharmony_ci		new_op->upcall.uid = from_kuid(&init_user_ns, wr->uid);
9562306a36Sopenharmony_ci		new_op->upcall.gid = from_kgid(&init_user_ns, wr->gid);
9662306a36Sopenharmony_ci	}
9762306a36Sopenharmony_ci	/*
9862306a36Sopenharmony_ci	 * Orangefs has no open, and orangefs checks file permissions
9962306a36Sopenharmony_ci	 * on each file access. Posix requires that file permissions
10062306a36Sopenharmony_ci	 * be checked on open and nowhere else. Orangefs-through-the-kernel
10162306a36Sopenharmony_ci	 * needs to seem posix compliant.
10262306a36Sopenharmony_ci	 *
10362306a36Sopenharmony_ci	 * The VFS opens files, even if the filesystem provides no
10462306a36Sopenharmony_ci	 * method. We can see if a file was successfully opened for
10562306a36Sopenharmony_ci	 * read and or for write by looking at file->f_mode.
10662306a36Sopenharmony_ci	 *
10762306a36Sopenharmony_ci	 * When writes are flowing from the page cache, file is no
10862306a36Sopenharmony_ci	 * longer available. We can trust the VFS to have checked
10962306a36Sopenharmony_ci	 * file->f_mode before writing to the page cache.
11062306a36Sopenharmony_ci	 *
11162306a36Sopenharmony_ci	 * The mode of a file might change between when it is opened
11262306a36Sopenharmony_ci	 * and IO commences, or it might be created with an arbitrary mode.
11362306a36Sopenharmony_ci	 *
11462306a36Sopenharmony_ci	 * We'll make sure we don't hit EACCES during the IO stage by
11562306a36Sopenharmony_ci	 * using UID 0. Some of the time we have access without changing
11662306a36Sopenharmony_ci	 * to UID 0 - how to check?
11762306a36Sopenharmony_ci	 */
11862306a36Sopenharmony_ci	if (file) {
11962306a36Sopenharmony_ci		open_for_write = file->f_mode & FMODE_WRITE;
12062306a36Sopenharmony_ci		open_for_read = file->f_mode & FMODE_READ;
12162306a36Sopenharmony_ci	} else {
12262306a36Sopenharmony_ci		open_for_write = 1;
12362306a36Sopenharmony_ci		open_for_read = 0; /* not relevant? */
12462306a36Sopenharmony_ci	}
12562306a36Sopenharmony_ci	if ((type == ORANGEFS_IO_WRITE) && open_for_write)
12662306a36Sopenharmony_ci		new_op->upcall.uid = 0;
12762306a36Sopenharmony_ci	if ((type == ORANGEFS_IO_READ) && open_for_read)
12862306a36Sopenharmony_ci		new_op->upcall.uid = 0;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	gossip_debug(GOSSIP_FILE_DEBUG,
13162306a36Sopenharmony_ci		     "%s(%pU): offset: %llu total_size: %zd\n",
13262306a36Sopenharmony_ci		     __func__,
13362306a36Sopenharmony_ci		     handle,
13462306a36Sopenharmony_ci		     llu(*offset),
13562306a36Sopenharmony_ci		     total_size);
13662306a36Sopenharmony_ci	/*
13762306a36Sopenharmony_ci	 * Stage 1: copy the buffers into client-core's address space
13862306a36Sopenharmony_ci	 */
13962306a36Sopenharmony_ci	if (type == ORANGEFS_IO_WRITE && total_size) {
14062306a36Sopenharmony_ci		ret = orangefs_bufmap_copy_from_iovec(iter, buffer_index,
14162306a36Sopenharmony_ci		    total_size);
14262306a36Sopenharmony_ci		if (ret < 0) {
14362306a36Sopenharmony_ci			gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
14462306a36Sopenharmony_ci			    __func__, (long)ret);
14562306a36Sopenharmony_ci			goto out;
14662306a36Sopenharmony_ci		}
14762306a36Sopenharmony_ci	}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci	gossip_debug(GOSSIP_FILE_DEBUG,
15062306a36Sopenharmony_ci		     "%s(%pU): Calling post_io_request with tag (%llu)\n",
15162306a36Sopenharmony_ci		     __func__,
15262306a36Sopenharmony_ci		     handle,
15362306a36Sopenharmony_ci		     llu(new_op->tag));
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	/* Stage 2: Service the I/O operation */
15662306a36Sopenharmony_ci	ret = service_operation(new_op,
15762306a36Sopenharmony_ci				type == ORANGEFS_IO_WRITE ?
15862306a36Sopenharmony_ci					"file_write" :
15962306a36Sopenharmony_ci					"file_read",
16062306a36Sopenharmony_ci				get_interruptible_flag(inode));
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	/*
16362306a36Sopenharmony_ci	 * If service_operation() returns -EAGAIN #and# the operation was
16462306a36Sopenharmony_ci	 * purged from orangefs_request_list or htable_ops_in_progress, then
16562306a36Sopenharmony_ci	 * we know that the client was restarted, causing the shared memory
16662306a36Sopenharmony_ci	 * area to be wiped clean.  To restart a  write operation in this
16762306a36Sopenharmony_ci	 * case, we must re-copy the data from the user's iovec to a NEW
16862306a36Sopenharmony_ci	 * shared memory location. To restart a read operation, we must get
16962306a36Sopenharmony_ci	 * a new shared memory location.
17062306a36Sopenharmony_ci	 */
17162306a36Sopenharmony_ci	if (ret == -EAGAIN && op_state_purged(new_op)) {
17262306a36Sopenharmony_ci		orangefs_bufmap_put(buffer_index);
17362306a36Sopenharmony_ci		if (type == ORANGEFS_IO_WRITE)
17462306a36Sopenharmony_ci			iov_iter_revert(iter, total_size);
17562306a36Sopenharmony_ci		gossip_debug(GOSSIP_FILE_DEBUG,
17662306a36Sopenharmony_ci			     "%s:going to repopulate_shared_memory.\n",
17762306a36Sopenharmony_ci			     __func__);
17862306a36Sopenharmony_ci		goto populate_shared_memory;
17962306a36Sopenharmony_ci	}
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	if (ret < 0) {
18262306a36Sopenharmony_ci		if (ret == -EINTR) {
18362306a36Sopenharmony_ci			/*
18462306a36Sopenharmony_ci			 * We can't return EINTR if any data was written,
18562306a36Sopenharmony_ci			 * it's not POSIX. It is minimally acceptable
18662306a36Sopenharmony_ci			 * to give a partial write, the way NFS does.
18762306a36Sopenharmony_ci			 *
18862306a36Sopenharmony_ci			 * It would be optimal to return all or nothing,
18962306a36Sopenharmony_ci			 * but if a userspace write is bigger than
19062306a36Sopenharmony_ci			 * an IO buffer, and the interrupt occurs
19162306a36Sopenharmony_ci			 * between buffer writes, that would not be
19262306a36Sopenharmony_ci			 * possible.
19362306a36Sopenharmony_ci			 */
19462306a36Sopenharmony_ci			switch (new_op->op_state - OP_VFS_STATE_GIVEN_UP) {
19562306a36Sopenharmony_ci			/*
19662306a36Sopenharmony_ci			 * If the op was waiting when the interrupt
19762306a36Sopenharmony_ci			 * occurred, then the client-core did not
19862306a36Sopenharmony_ci			 * trigger the write.
19962306a36Sopenharmony_ci			 */
20062306a36Sopenharmony_ci			case OP_VFS_STATE_WAITING:
20162306a36Sopenharmony_ci				if (*offset == 0)
20262306a36Sopenharmony_ci					ret = -EINTR;
20362306a36Sopenharmony_ci				else
20462306a36Sopenharmony_ci					ret = 0;
20562306a36Sopenharmony_ci				break;
20662306a36Sopenharmony_ci			/*
20762306a36Sopenharmony_ci			 * If the op was in progress when the interrupt
20862306a36Sopenharmony_ci			 * occurred, then the client-core was able to
20962306a36Sopenharmony_ci			 * trigger the write.
21062306a36Sopenharmony_ci			 */
21162306a36Sopenharmony_ci			case OP_VFS_STATE_INPROGR:
21262306a36Sopenharmony_ci				if (type == ORANGEFS_IO_READ)
21362306a36Sopenharmony_ci					ret = -EINTR;
21462306a36Sopenharmony_ci				else
21562306a36Sopenharmony_ci					ret = total_size;
21662306a36Sopenharmony_ci				break;
21762306a36Sopenharmony_ci			default:
21862306a36Sopenharmony_ci				gossip_err("%s: unexpected op state :%d:.\n",
21962306a36Sopenharmony_ci					   __func__,
22062306a36Sopenharmony_ci					   new_op->op_state);
22162306a36Sopenharmony_ci				ret = 0;
22262306a36Sopenharmony_ci				break;
22362306a36Sopenharmony_ci			}
22462306a36Sopenharmony_ci			gossip_debug(GOSSIP_FILE_DEBUG,
22562306a36Sopenharmony_ci				     "%s: got EINTR, state:%d: %p\n",
22662306a36Sopenharmony_ci				     __func__,
22762306a36Sopenharmony_ci				     new_op->op_state,
22862306a36Sopenharmony_ci				     new_op);
22962306a36Sopenharmony_ci		} else {
23062306a36Sopenharmony_ci			gossip_err("%s: error in %s handle %pU, returning %zd\n",
23162306a36Sopenharmony_ci				__func__,
23262306a36Sopenharmony_ci				type == ORANGEFS_IO_READ ?
23362306a36Sopenharmony_ci					"read from" : "write to",
23462306a36Sopenharmony_ci				handle, ret);
23562306a36Sopenharmony_ci		}
23662306a36Sopenharmony_ci		if (orangefs_cancel_op_in_progress(new_op))
23762306a36Sopenharmony_ci			return ret;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci		goto out;
24062306a36Sopenharmony_ci	}
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	/*
24362306a36Sopenharmony_ci	 * Stage 3: Post copy buffers from client-core's address space
24462306a36Sopenharmony_ci	 */
24562306a36Sopenharmony_ci	if (type == ORANGEFS_IO_READ && new_op->downcall.resp.io.amt_complete) {
24662306a36Sopenharmony_ci		/*
24762306a36Sopenharmony_ci		 * NOTE: the iovector can either contain addresses which
24862306a36Sopenharmony_ci		 *       can futher be kernel-space or user-space addresses.
24962306a36Sopenharmony_ci		 *       or it can pointers to struct page's
25062306a36Sopenharmony_ci		 */
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci		copy_amount = new_op->downcall.resp.io.amt_complete;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci		ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index,
25562306a36Sopenharmony_ci			copy_amount);
25662306a36Sopenharmony_ci		if (ret < 0) {
25762306a36Sopenharmony_ci			gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
25862306a36Sopenharmony_ci			    __func__, (long)ret);
25962306a36Sopenharmony_ci			goto out;
26062306a36Sopenharmony_ci		}
26162306a36Sopenharmony_ci	}
26262306a36Sopenharmony_ci	gossip_debug(GOSSIP_FILE_DEBUG,
26362306a36Sopenharmony_ci	    "%s(%pU): Amount %s, returned by the sys-io call:%d\n",
26462306a36Sopenharmony_ci	    __func__,
26562306a36Sopenharmony_ci	    handle,
26662306a36Sopenharmony_ci	    type == ORANGEFS_IO_READ ?  "read" : "written",
26762306a36Sopenharmony_ci	    (int)new_op->downcall.resp.io.amt_complete);
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	ret = new_op->downcall.resp.io.amt_complete;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ciout:
27262306a36Sopenharmony_ci	if (buffer_index >= 0) {
27362306a36Sopenharmony_ci		orangefs_bufmap_put(buffer_index);
27462306a36Sopenharmony_ci		gossip_debug(GOSSIP_FILE_DEBUG,
27562306a36Sopenharmony_ci			"%s(%pU): PUT buffer_index %d\n",
27662306a36Sopenharmony_ci			__func__, handle, buffer_index);
27762306a36Sopenharmony_ci	}
27862306a36Sopenharmony_ci	op_release(new_op);
27962306a36Sopenharmony_ci	return ret;
28062306a36Sopenharmony_ci}
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ciint orangefs_revalidate_mapping(struct inode *inode)
28362306a36Sopenharmony_ci{
28462306a36Sopenharmony_ci	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
28562306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
28662306a36Sopenharmony_ci	unsigned long *bitlock = &orangefs_inode->bitlock;
28762306a36Sopenharmony_ci	int ret;
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	while (1) {
29062306a36Sopenharmony_ci		ret = wait_on_bit(bitlock, 1, TASK_KILLABLE);
29162306a36Sopenharmony_ci		if (ret)
29262306a36Sopenharmony_ci			return ret;
29362306a36Sopenharmony_ci		spin_lock(&inode->i_lock);
29462306a36Sopenharmony_ci		if (test_bit(1, bitlock)) {
29562306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
29662306a36Sopenharmony_ci			continue;
29762306a36Sopenharmony_ci		}
29862306a36Sopenharmony_ci		if (!time_before(jiffies, orangefs_inode->mapping_time))
29962306a36Sopenharmony_ci			break;
30062306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
30162306a36Sopenharmony_ci		return 0;
30262306a36Sopenharmony_ci	}
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	set_bit(1, bitlock);
30562306a36Sopenharmony_ci	smp_wmb();
30662306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	unmap_mapping_range(mapping, 0, 0, 0);
30962306a36Sopenharmony_ci	ret = filemap_write_and_wait(mapping);
31062306a36Sopenharmony_ci	if (!ret)
31162306a36Sopenharmony_ci		ret = invalidate_inode_pages2(mapping);
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	orangefs_inode->mapping_time = jiffies +
31462306a36Sopenharmony_ci	    orangefs_cache_timeout_msecs*HZ/1000;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	clear_bit(1, bitlock);
31762306a36Sopenharmony_ci	smp_mb__after_atomic();
31862306a36Sopenharmony_ci	wake_up_bit(bitlock, 1);
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	return ret;
32162306a36Sopenharmony_ci}
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_cistatic ssize_t orangefs_file_read_iter(struct kiocb *iocb,
32462306a36Sopenharmony_ci    struct iov_iter *iter)
32562306a36Sopenharmony_ci{
32662306a36Sopenharmony_ci	int ret;
32762306a36Sopenharmony_ci	orangefs_stats.reads++;
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	down_read(&file_inode(iocb->ki_filp)->i_rwsem);
33062306a36Sopenharmony_ci	ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp));
33162306a36Sopenharmony_ci	if (ret)
33262306a36Sopenharmony_ci		goto out;
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	ret = generic_file_read_iter(iocb, iter);
33562306a36Sopenharmony_ciout:
33662306a36Sopenharmony_ci	up_read(&file_inode(iocb->ki_filp)->i_rwsem);
33762306a36Sopenharmony_ci	return ret;
33862306a36Sopenharmony_ci}
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_cistatic ssize_t orangefs_file_splice_read(struct file *in, loff_t *ppos,
34162306a36Sopenharmony_ci					 struct pipe_inode_info *pipe,
34262306a36Sopenharmony_ci					 size_t len, unsigned int flags)
34362306a36Sopenharmony_ci{
34462306a36Sopenharmony_ci	struct inode *inode = file_inode(in);
34562306a36Sopenharmony_ci	ssize_t ret;
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci	orangefs_stats.reads++;
34862306a36Sopenharmony_ci
34962306a36Sopenharmony_ci	down_read(&inode->i_rwsem);
35062306a36Sopenharmony_ci	ret = orangefs_revalidate_mapping(inode);
35162306a36Sopenharmony_ci	if (ret)
35262306a36Sopenharmony_ci		goto out;
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	ret = filemap_splice_read(in, ppos, pipe, len, flags);
35562306a36Sopenharmony_ciout:
35662306a36Sopenharmony_ci	up_read(&inode->i_rwsem);
35762306a36Sopenharmony_ci	return ret;
35862306a36Sopenharmony_ci}
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_cistatic ssize_t orangefs_file_write_iter(struct kiocb *iocb,
36162306a36Sopenharmony_ci    struct iov_iter *iter)
36262306a36Sopenharmony_ci{
36362306a36Sopenharmony_ci	int ret;
36462306a36Sopenharmony_ci	orangefs_stats.writes++;
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	if (iocb->ki_pos > i_size_read(file_inode(iocb->ki_filp))) {
36762306a36Sopenharmony_ci		ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp));
36862306a36Sopenharmony_ci		if (ret)
36962306a36Sopenharmony_ci			return ret;
37062306a36Sopenharmony_ci	}
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	ret = generic_file_write_iter(iocb, iter);
37362306a36Sopenharmony_ci	return ret;
37462306a36Sopenharmony_ci}
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_cistatic vm_fault_t orangefs_fault(struct vm_fault *vmf)
37762306a36Sopenharmony_ci{
37862306a36Sopenharmony_ci	struct file *file = vmf->vma->vm_file;
37962306a36Sopenharmony_ci	int ret;
38062306a36Sopenharmony_ci	ret = orangefs_inode_getattr(file->f_mapping->host,
38162306a36Sopenharmony_ci	    ORANGEFS_GETATTR_SIZE);
38262306a36Sopenharmony_ci	if (ret == -ESTALE)
38362306a36Sopenharmony_ci		ret = -EIO;
38462306a36Sopenharmony_ci	if (ret) {
38562306a36Sopenharmony_ci		gossip_err("%s: orangefs_inode_getattr failed, "
38662306a36Sopenharmony_ci		    "ret:%d:.\n", __func__, ret);
38762306a36Sopenharmony_ci		return VM_FAULT_SIGBUS;
38862306a36Sopenharmony_ci	}
38962306a36Sopenharmony_ci	return filemap_fault(vmf);
39062306a36Sopenharmony_ci}
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_cistatic const struct vm_operations_struct orangefs_file_vm_ops = {
39362306a36Sopenharmony_ci	.fault = orangefs_fault,
39462306a36Sopenharmony_ci	.map_pages = filemap_map_pages,
39562306a36Sopenharmony_ci	.page_mkwrite = orangefs_page_mkwrite,
39662306a36Sopenharmony_ci};
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci/*
39962306a36Sopenharmony_ci * Memory map a region of a file.
40062306a36Sopenharmony_ci */
40162306a36Sopenharmony_cistatic int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
40262306a36Sopenharmony_ci{
40362306a36Sopenharmony_ci	int ret;
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci	ret = orangefs_revalidate_mapping(file_inode(file));
40662306a36Sopenharmony_ci	if (ret)
40762306a36Sopenharmony_ci		return ret;
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	gossip_debug(GOSSIP_FILE_DEBUG,
41062306a36Sopenharmony_ci		     "orangefs_file_mmap: called on %pD\n", file);
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	/* set the sequential readahead hint */
41362306a36Sopenharmony_ci	vm_flags_mod(vma, VM_SEQ_READ, VM_RAND_READ);
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci	file_accessed(file);
41662306a36Sopenharmony_ci	vma->vm_ops = &orangefs_file_vm_ops;
41762306a36Sopenharmony_ci	return 0;
41862306a36Sopenharmony_ci}
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_ci#define mapping_nrpages(idata) ((idata)->nrpages)
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci/*
42362306a36Sopenharmony_ci * Called to notify the module that there are no more references to
42462306a36Sopenharmony_ci * this file (i.e. no processes have it open).
42562306a36Sopenharmony_ci *
42662306a36Sopenharmony_ci * \note Not called when each file is closed.
42762306a36Sopenharmony_ci */
42862306a36Sopenharmony_cistatic int orangefs_file_release(struct inode *inode, struct file *file)
42962306a36Sopenharmony_ci{
43062306a36Sopenharmony_ci	gossip_debug(GOSSIP_FILE_DEBUG,
43162306a36Sopenharmony_ci		     "orangefs_file_release: called on %pD\n",
43262306a36Sopenharmony_ci		     file);
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	/*
43562306a36Sopenharmony_ci	 * remove all associated inode pages from the page cache and
43662306a36Sopenharmony_ci	 * readahead cache (if any); this forces an expensive refresh of
43762306a36Sopenharmony_ci	 * data for the next caller of mmap (or 'get_block' accesses)
43862306a36Sopenharmony_ci	 */
43962306a36Sopenharmony_ci	if (mapping_nrpages(file->f_mapping)) {
44062306a36Sopenharmony_ci		if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) {
44162306a36Sopenharmony_ci			gossip_debug(GOSSIP_INODE_DEBUG,
44262306a36Sopenharmony_ci			    "calling flush_racache on %pU\n",
44362306a36Sopenharmony_ci			    get_khandle_from_ino(inode));
44462306a36Sopenharmony_ci			flush_racache(inode);
44562306a36Sopenharmony_ci			gossip_debug(GOSSIP_INODE_DEBUG,
44662306a36Sopenharmony_ci			    "flush_racache finished\n");
44762306a36Sopenharmony_ci		}
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci	}
45062306a36Sopenharmony_ci	return 0;
45162306a36Sopenharmony_ci}
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci/*
45462306a36Sopenharmony_ci * Push all data for a specific file onto permanent storage.
45562306a36Sopenharmony_ci */
45662306a36Sopenharmony_cistatic int orangefs_fsync(struct file *file,
45762306a36Sopenharmony_ci		       loff_t start,
45862306a36Sopenharmony_ci		       loff_t end,
45962306a36Sopenharmony_ci		       int datasync)
46062306a36Sopenharmony_ci{
46162306a36Sopenharmony_ci	int ret;
46262306a36Sopenharmony_ci	struct orangefs_inode_s *orangefs_inode =
46362306a36Sopenharmony_ci		ORANGEFS_I(file_inode(file));
46462306a36Sopenharmony_ci	struct orangefs_kernel_op_s *new_op = NULL;
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci	ret = filemap_write_and_wait_range(file_inode(file)->i_mapping,
46762306a36Sopenharmony_ci	    start, end);
46862306a36Sopenharmony_ci	if (ret < 0)
46962306a36Sopenharmony_ci		return ret;
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci	new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC);
47262306a36Sopenharmony_ci	if (!new_op)
47362306a36Sopenharmony_ci		return -ENOMEM;
47462306a36Sopenharmony_ci	new_op->upcall.req.fsync.refn = orangefs_inode->refn;
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci	ret = service_operation(new_op,
47762306a36Sopenharmony_ci			"orangefs_fsync",
47862306a36Sopenharmony_ci			get_interruptible_flag(file_inode(file)));
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci	gossip_debug(GOSSIP_FILE_DEBUG,
48162306a36Sopenharmony_ci		     "orangefs_fsync got return value of %d\n",
48262306a36Sopenharmony_ci		     ret);
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	op_release(new_op);
48562306a36Sopenharmony_ci	return ret;
48662306a36Sopenharmony_ci}
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci/*
48962306a36Sopenharmony_ci * Change the file pointer position for an instance of an open file.
49062306a36Sopenharmony_ci *
49162306a36Sopenharmony_ci * \note If .llseek is overriden, we must acquire lock as described in
49262306a36Sopenharmony_ci *       Documentation/filesystems/locking.rst.
49362306a36Sopenharmony_ci *
49462306a36Sopenharmony_ci * Future upgrade could support SEEK_DATA and SEEK_HOLE but would
49562306a36Sopenharmony_ci * require much changes to the FS
49662306a36Sopenharmony_ci */
49762306a36Sopenharmony_cistatic loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin)
49862306a36Sopenharmony_ci{
49962306a36Sopenharmony_ci	int ret = -EINVAL;
50062306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_ci	if (origin == SEEK_END) {
50362306a36Sopenharmony_ci		/*
50462306a36Sopenharmony_ci		 * revalidate the inode's file size.
50562306a36Sopenharmony_ci		 * NOTE: We are only interested in file size here,
50662306a36Sopenharmony_ci		 * so we set mask accordingly.
50762306a36Sopenharmony_ci		 */
50862306a36Sopenharmony_ci		ret = orangefs_inode_getattr(file->f_mapping->host,
50962306a36Sopenharmony_ci		    ORANGEFS_GETATTR_SIZE);
51062306a36Sopenharmony_ci		if (ret == -ESTALE)
51162306a36Sopenharmony_ci			ret = -EIO;
51262306a36Sopenharmony_ci		if (ret) {
51362306a36Sopenharmony_ci			gossip_debug(GOSSIP_FILE_DEBUG,
51462306a36Sopenharmony_ci				     "%s:%s:%d calling make bad inode\n",
51562306a36Sopenharmony_ci				     __FILE__,
51662306a36Sopenharmony_ci				     __func__,
51762306a36Sopenharmony_ci				     __LINE__);
51862306a36Sopenharmony_ci			return ret;
51962306a36Sopenharmony_ci		}
52062306a36Sopenharmony_ci	}
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	gossip_debug(GOSSIP_FILE_DEBUG,
52362306a36Sopenharmony_ci		     "orangefs_file_llseek: offset is %ld | origin is %d"
52462306a36Sopenharmony_ci		     " | inode size is %lu\n",
52562306a36Sopenharmony_ci		     (long)offset,
52662306a36Sopenharmony_ci		     origin,
52762306a36Sopenharmony_ci		     (unsigned long)i_size_read(inode));
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	return generic_file_llseek(file, offset, origin);
53062306a36Sopenharmony_ci}
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci/*
53362306a36Sopenharmony_ci * Support local locks (locks that only this kernel knows about)
53462306a36Sopenharmony_ci * if Orangefs was mounted -o local_lock.
53562306a36Sopenharmony_ci */
53662306a36Sopenharmony_cistatic int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl)
53762306a36Sopenharmony_ci{
53862306a36Sopenharmony_ci	int rc = -EINVAL;
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	if (ORANGEFS_SB(file_inode(filp)->i_sb)->flags & ORANGEFS_OPT_LOCAL_LOCK) {
54162306a36Sopenharmony_ci		if (cmd == F_GETLK) {
54262306a36Sopenharmony_ci			rc = 0;
54362306a36Sopenharmony_ci			posix_test_lock(filp, fl);
54462306a36Sopenharmony_ci		} else {
54562306a36Sopenharmony_ci			rc = posix_lock_file(filp, fl, NULL);
54662306a36Sopenharmony_ci		}
54762306a36Sopenharmony_ci	}
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	return rc;
55062306a36Sopenharmony_ci}
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_cistatic int orangefs_flush(struct file *file, fl_owner_t id)
55362306a36Sopenharmony_ci{
55462306a36Sopenharmony_ci	/*
55562306a36Sopenharmony_ci	 * This is vfs_fsync_range(file, 0, LLONG_MAX, 0) without the
55662306a36Sopenharmony_ci	 * service_operation in orangefs_fsync.
55762306a36Sopenharmony_ci	 *
55862306a36Sopenharmony_ci	 * Do not send fsync to OrangeFS server on a close.  Do send fsync
55962306a36Sopenharmony_ci	 * on an explicit fsync call.  This duplicates historical OrangeFS
56062306a36Sopenharmony_ci	 * behavior.
56162306a36Sopenharmony_ci	 */
56262306a36Sopenharmony_ci	int r;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	r = filemap_write_and_wait_range(file->f_mapping, 0, LLONG_MAX);
56562306a36Sopenharmony_ci	if (r > 0)
56662306a36Sopenharmony_ci		return 0;
56762306a36Sopenharmony_ci	else
56862306a36Sopenharmony_ci		return r;
56962306a36Sopenharmony_ci}
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci/** ORANGEFS implementation of VFS file operations */
57262306a36Sopenharmony_ciconst struct file_operations orangefs_file_operations = {
57362306a36Sopenharmony_ci	.llseek		= orangefs_file_llseek,
57462306a36Sopenharmony_ci	.read_iter	= orangefs_file_read_iter,
57562306a36Sopenharmony_ci	.write_iter	= orangefs_file_write_iter,
57662306a36Sopenharmony_ci	.lock		= orangefs_lock,
57762306a36Sopenharmony_ci	.mmap		= orangefs_file_mmap,
57862306a36Sopenharmony_ci	.open		= generic_file_open,
57962306a36Sopenharmony_ci	.splice_read    = orangefs_file_splice_read,
58062306a36Sopenharmony_ci	.splice_write   = iter_file_splice_write,
58162306a36Sopenharmony_ci	.flush		= orangefs_flush,
58262306a36Sopenharmony_ci	.release	= orangefs_file_release,
58362306a36Sopenharmony_ci	.fsync		= orangefs_fsync,
58462306a36Sopenharmony_ci};
585