18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * (C) 2001 Clemson University and The University of Chicago 48c2ecf20Sopenharmony_ci * Copyright 2018 Omnibond Systems, L.L.C. 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * See COPYING in top-level directory. 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci/* 108c2ecf20Sopenharmony_ci * Linux VFS file operations. 118c2ecf20Sopenharmony_ci */ 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#include "protocol.h" 148c2ecf20Sopenharmony_ci#include "orangefs-kernel.h" 158c2ecf20Sopenharmony_ci#include "orangefs-bufmap.h" 168c2ecf20Sopenharmony_ci#include <linux/fs.h> 178c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_cistatic int flush_racache(struct inode *inode) 208c2ecf20Sopenharmony_ci{ 218c2ecf20Sopenharmony_ci struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 228c2ecf20Sopenharmony_ci struct orangefs_kernel_op_s *new_op; 238c2ecf20Sopenharmony_ci int ret; 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_UTILS_DEBUG, 268c2ecf20Sopenharmony_ci "%s: %pU: Handle is %pU | fs_id %d\n", __func__, 278c2ecf20Sopenharmony_ci get_khandle_from_ino(inode), &orangefs_inode->refn.khandle, 288c2ecf20Sopenharmony_ci orangefs_inode->refn.fs_id); 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci new_op = op_alloc(ORANGEFS_VFS_OP_RA_FLUSH); 318c2ecf20Sopenharmony_ci if (!new_op) 328c2ecf20Sopenharmony_ci return -ENOMEM; 338c2ecf20Sopenharmony_ci new_op->upcall.req.ra_cache_flush.refn = orangefs_inode->refn; 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci ret = service_operation(new_op, "orangefs_flush_racache", 368c2ecf20Sopenharmony_ci get_interruptible_flag(inode)); 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_UTILS_DEBUG, "%s: got return value of %d\n", 398c2ecf20Sopenharmony_ci __func__, ret); 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci op_release(new_op); 428c2ecf20Sopenharmony_ci return ret; 438c2ecf20Sopenharmony_ci} 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci/* 468c2ecf20Sopenharmony_ci * Post and wait for the I/O upcall to finish 478c2ecf20Sopenharmony_ci */ 488c2ecf20Sopenharmony_cissize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, 498c2ecf20Sopenharmony_ci loff_t *offset, struct iov_iter *iter, size_t total_size, 508c2ecf20Sopenharmony_ci loff_t readahead_size, struct orangefs_write_range *wr, 518c2ecf20Sopenharmony_ci int *index_return, struct file *file) 528c2ecf20Sopenharmony_ci{ 538c2ecf20Sopenharmony_ci struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 548c2ecf20Sopenharmony_ci struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; 558c2ecf20Sopenharmony_ci struct orangefs_kernel_op_s *new_op = NULL; 568c2ecf20Sopenharmony_ci int buffer_index; 578c2ecf20Sopenharmony_ci ssize_t ret; 588c2ecf20Sopenharmony_ci size_t copy_amount; 598c2ecf20Sopenharmony_ci int open_for_read; 608c2ecf20Sopenharmony_ci int open_for_write; 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); 638c2ecf20Sopenharmony_ci if (!new_op) 648c2ecf20Sopenharmony_ci return -ENOMEM; 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci /* synchronous I/O */ 678c2ecf20Sopenharmony_ci new_op->upcall.req.io.readahead_size = readahead_size; 688c2ecf20Sopenharmony_ci new_op->upcall.req.io.io_type = type; 698c2ecf20Sopenharmony_ci new_op->upcall.req.io.refn = orangefs_inode->refn; 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_cipopulate_shared_memory: 728c2ecf20Sopenharmony_ci /* get a shared buffer index */ 738c2ecf20Sopenharmony_ci buffer_index = orangefs_bufmap_get(); 748c2ecf20Sopenharmony_ci if (buffer_index < 0) { 758c2ecf20Sopenharmony_ci ret = buffer_index; 768c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 778c2ecf20Sopenharmony_ci "%s: orangefs_bufmap_get failure (%zd)\n", 788c2ecf20Sopenharmony_ci __func__, ret); 798c2ecf20Sopenharmony_ci goto out; 808c2ecf20Sopenharmony_ci } 818c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 828c2ecf20Sopenharmony_ci "%s(%pU): GET op %p -> buffer_index %d\n", 838c2ecf20Sopenharmony_ci __func__, 848c2ecf20Sopenharmony_ci handle, 858c2ecf20Sopenharmony_ci new_op, 868c2ecf20Sopenharmony_ci buffer_index); 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci new_op->uses_shared_memory = 1; 898c2ecf20Sopenharmony_ci new_op->upcall.req.io.buf_index = buffer_index; 908c2ecf20Sopenharmony_ci new_op->upcall.req.io.count = total_size; 918c2ecf20Sopenharmony_ci new_op->upcall.req.io.offset = *offset; 928c2ecf20Sopenharmony_ci if (type == ORANGEFS_IO_WRITE && wr) { 938c2ecf20Sopenharmony_ci new_op->upcall.uid = from_kuid(&init_user_ns, wr->uid); 948c2ecf20Sopenharmony_ci new_op->upcall.gid = from_kgid(&init_user_ns, wr->gid); 958c2ecf20Sopenharmony_ci } 968c2ecf20Sopenharmony_ci /* 978c2ecf20Sopenharmony_ci * Orangefs has no open, and orangefs checks file permissions 988c2ecf20Sopenharmony_ci * on each file access. Posix requires that file permissions 998c2ecf20Sopenharmony_ci * be checked on open and nowhere else. Orangefs-through-the-kernel 1008c2ecf20Sopenharmony_ci * needs to seem posix compliant. 1018c2ecf20Sopenharmony_ci * 1028c2ecf20Sopenharmony_ci * The VFS opens files, even if the filesystem provides no 1038c2ecf20Sopenharmony_ci * method. We can see if a file was successfully opened for 1048c2ecf20Sopenharmony_ci * read and or for write by looking at file->f_mode. 1058c2ecf20Sopenharmony_ci * 1068c2ecf20Sopenharmony_ci * When writes are flowing from the page cache, file is no 1078c2ecf20Sopenharmony_ci * longer available. We can trust the VFS to have checked 1088c2ecf20Sopenharmony_ci * file->f_mode before writing to the page cache. 1098c2ecf20Sopenharmony_ci * 1108c2ecf20Sopenharmony_ci * The mode of a file might change between when it is opened 1118c2ecf20Sopenharmony_ci * and IO commences, or it might be created with an arbitrary mode. 1128c2ecf20Sopenharmony_ci * 1138c2ecf20Sopenharmony_ci * We'll make sure we don't hit EACCES during the IO stage by 1148c2ecf20Sopenharmony_ci * using UID 0. Some of the time we have access without changing 1158c2ecf20Sopenharmony_ci * to UID 0 - how to check? 1168c2ecf20Sopenharmony_ci */ 1178c2ecf20Sopenharmony_ci if (file) { 1188c2ecf20Sopenharmony_ci open_for_write = file->f_mode & FMODE_WRITE; 1198c2ecf20Sopenharmony_ci open_for_read = file->f_mode & FMODE_READ; 1208c2ecf20Sopenharmony_ci } else { 1218c2ecf20Sopenharmony_ci open_for_write = 1; 1228c2ecf20Sopenharmony_ci open_for_read = 0; /* not relevant? */ 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci if ((type == ORANGEFS_IO_WRITE) && open_for_write) 1258c2ecf20Sopenharmony_ci new_op->upcall.uid = 0; 1268c2ecf20Sopenharmony_ci if ((type == ORANGEFS_IO_READ) && open_for_read) 1278c2ecf20Sopenharmony_ci new_op->upcall.uid = 0; 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 1308c2ecf20Sopenharmony_ci "%s(%pU): offset: %llu total_size: %zd\n", 1318c2ecf20Sopenharmony_ci __func__, 1328c2ecf20Sopenharmony_ci handle, 1338c2ecf20Sopenharmony_ci llu(*offset), 1348c2ecf20Sopenharmony_ci total_size); 1358c2ecf20Sopenharmony_ci /* 1368c2ecf20Sopenharmony_ci * Stage 1: copy the buffers into client-core's address space 1378c2ecf20Sopenharmony_ci */ 1388c2ecf20Sopenharmony_ci if (type == ORANGEFS_IO_WRITE && total_size) { 1398c2ecf20Sopenharmony_ci ret = orangefs_bufmap_copy_from_iovec(iter, buffer_index, 1408c2ecf20Sopenharmony_ci total_size); 1418c2ecf20Sopenharmony_ci if (ret < 0) { 1428c2ecf20Sopenharmony_ci gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", 1438c2ecf20Sopenharmony_ci __func__, (long)ret); 1448c2ecf20Sopenharmony_ci goto out; 1458c2ecf20Sopenharmony_ci } 1468c2ecf20Sopenharmony_ci } 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 1498c2ecf20Sopenharmony_ci "%s(%pU): Calling post_io_request with tag (%llu)\n", 1508c2ecf20Sopenharmony_ci __func__, 1518c2ecf20Sopenharmony_ci handle, 1528c2ecf20Sopenharmony_ci llu(new_op->tag)); 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci /* Stage 2: Service the I/O operation */ 1558c2ecf20Sopenharmony_ci ret = service_operation(new_op, 1568c2ecf20Sopenharmony_ci type == ORANGEFS_IO_WRITE ? 1578c2ecf20Sopenharmony_ci "file_write" : 1588c2ecf20Sopenharmony_ci "file_read", 1598c2ecf20Sopenharmony_ci get_interruptible_flag(inode)); 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci /* 1628c2ecf20Sopenharmony_ci * If service_operation() returns -EAGAIN #and# the operation was 1638c2ecf20Sopenharmony_ci * purged from orangefs_request_list or htable_ops_in_progress, then 1648c2ecf20Sopenharmony_ci * we know that the client was restarted, causing the shared memory 1658c2ecf20Sopenharmony_ci * area to be wiped clean. To restart a write operation in this 1668c2ecf20Sopenharmony_ci * case, we must re-copy the data from the user's iovec to a NEW 1678c2ecf20Sopenharmony_ci * shared memory location. To restart a read operation, we must get 1688c2ecf20Sopenharmony_ci * a new shared memory location. 1698c2ecf20Sopenharmony_ci */ 1708c2ecf20Sopenharmony_ci if (ret == -EAGAIN && op_state_purged(new_op)) { 1718c2ecf20Sopenharmony_ci orangefs_bufmap_put(buffer_index); 1728c2ecf20Sopenharmony_ci if (type == ORANGEFS_IO_WRITE) 1738c2ecf20Sopenharmony_ci iov_iter_revert(iter, total_size); 1748c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 1758c2ecf20Sopenharmony_ci "%s:going to repopulate_shared_memory.\n", 1768c2ecf20Sopenharmony_ci __func__); 1778c2ecf20Sopenharmony_ci goto populate_shared_memory; 1788c2ecf20Sopenharmony_ci } 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci if (ret < 0) { 1818c2ecf20Sopenharmony_ci if (ret == -EINTR) { 1828c2ecf20Sopenharmony_ci /* 1838c2ecf20Sopenharmony_ci * We can't return EINTR if any data was written, 1848c2ecf20Sopenharmony_ci * it's not POSIX. It is minimally acceptable 1858c2ecf20Sopenharmony_ci * to give a partial write, the way NFS does. 1868c2ecf20Sopenharmony_ci * 1878c2ecf20Sopenharmony_ci * It would be optimal to return all or nothing, 1888c2ecf20Sopenharmony_ci * but if a userspace write is bigger than 1898c2ecf20Sopenharmony_ci * an IO buffer, and the interrupt occurs 1908c2ecf20Sopenharmony_ci * between buffer writes, that would not be 1918c2ecf20Sopenharmony_ci * possible. 1928c2ecf20Sopenharmony_ci */ 1938c2ecf20Sopenharmony_ci switch (new_op->op_state - OP_VFS_STATE_GIVEN_UP) { 1948c2ecf20Sopenharmony_ci /* 1958c2ecf20Sopenharmony_ci * If the op was waiting when the interrupt 1968c2ecf20Sopenharmony_ci * occurred, then the client-core did not 1978c2ecf20Sopenharmony_ci * trigger the write. 1988c2ecf20Sopenharmony_ci */ 1998c2ecf20Sopenharmony_ci case OP_VFS_STATE_WAITING: 2008c2ecf20Sopenharmony_ci if (*offset == 0) 2018c2ecf20Sopenharmony_ci ret = -EINTR; 2028c2ecf20Sopenharmony_ci else 2038c2ecf20Sopenharmony_ci ret = 0; 2048c2ecf20Sopenharmony_ci break; 2058c2ecf20Sopenharmony_ci /* 2068c2ecf20Sopenharmony_ci * If the op was in progress when the interrupt 2078c2ecf20Sopenharmony_ci * occurred, then the client-core was able to 2088c2ecf20Sopenharmony_ci * trigger the write. 2098c2ecf20Sopenharmony_ci */ 2108c2ecf20Sopenharmony_ci case OP_VFS_STATE_INPROGR: 2118c2ecf20Sopenharmony_ci if (type == ORANGEFS_IO_READ) 2128c2ecf20Sopenharmony_ci ret = -EINTR; 2138c2ecf20Sopenharmony_ci else 2148c2ecf20Sopenharmony_ci ret = total_size; 2158c2ecf20Sopenharmony_ci break; 2168c2ecf20Sopenharmony_ci default: 2178c2ecf20Sopenharmony_ci gossip_err("%s: unexpected op state :%d:.\n", 2188c2ecf20Sopenharmony_ci __func__, 2198c2ecf20Sopenharmony_ci new_op->op_state); 2208c2ecf20Sopenharmony_ci ret = 0; 2218c2ecf20Sopenharmony_ci break; 2228c2ecf20Sopenharmony_ci } 2238c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 2248c2ecf20Sopenharmony_ci "%s: got EINTR, state:%d: %p\n", 2258c2ecf20Sopenharmony_ci __func__, 2268c2ecf20Sopenharmony_ci new_op->op_state, 2278c2ecf20Sopenharmony_ci new_op); 2288c2ecf20Sopenharmony_ci } else { 2298c2ecf20Sopenharmony_ci gossip_err("%s: error in %s handle %pU, returning %zd\n", 2308c2ecf20Sopenharmony_ci __func__, 2318c2ecf20Sopenharmony_ci type == ORANGEFS_IO_READ ? 2328c2ecf20Sopenharmony_ci "read from" : "write to", 2338c2ecf20Sopenharmony_ci handle, ret); 2348c2ecf20Sopenharmony_ci } 2358c2ecf20Sopenharmony_ci if (orangefs_cancel_op_in_progress(new_op)) 2368c2ecf20Sopenharmony_ci return ret; 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci goto out; 2398c2ecf20Sopenharmony_ci } 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci /* 2428c2ecf20Sopenharmony_ci * Stage 3: Post copy buffers from client-core's address space 2438c2ecf20Sopenharmony_ci */ 2448c2ecf20Sopenharmony_ci if (type == ORANGEFS_IO_READ && new_op->downcall.resp.io.amt_complete) { 2458c2ecf20Sopenharmony_ci /* 2468c2ecf20Sopenharmony_ci * NOTE: the iovector can either contain addresses which 2478c2ecf20Sopenharmony_ci * can futher be kernel-space or user-space addresses. 2488c2ecf20Sopenharmony_ci * or it can pointers to struct page's 2498c2ecf20Sopenharmony_ci */ 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci /* 2528c2ecf20Sopenharmony_ci * When reading, readahead_size will only be zero when 2538c2ecf20Sopenharmony_ci * we're doing O_DIRECT, otherwise we got here from 2548c2ecf20Sopenharmony_ci * orangefs_readpage. 2558c2ecf20Sopenharmony_ci * 2568c2ecf20Sopenharmony_ci * If we got here from orangefs_readpage we want to 2578c2ecf20Sopenharmony_ci * copy either a page or the whole file into the io 2588c2ecf20Sopenharmony_ci * vector, whichever is smaller. 2598c2ecf20Sopenharmony_ci */ 2608c2ecf20Sopenharmony_ci if (readahead_size) 2618c2ecf20Sopenharmony_ci copy_amount = 2628c2ecf20Sopenharmony_ci min(new_op->downcall.resp.io.amt_complete, 2638c2ecf20Sopenharmony_ci (__s64)PAGE_SIZE); 2648c2ecf20Sopenharmony_ci else 2658c2ecf20Sopenharmony_ci copy_amount = new_op->downcall.resp.io.amt_complete; 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index, 2688c2ecf20Sopenharmony_ci copy_amount); 2698c2ecf20Sopenharmony_ci if (ret < 0) { 2708c2ecf20Sopenharmony_ci gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", 2718c2ecf20Sopenharmony_ci __func__, (long)ret); 2728c2ecf20Sopenharmony_ci goto out; 2738c2ecf20Sopenharmony_ci } 2748c2ecf20Sopenharmony_ci } 2758c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 2768c2ecf20Sopenharmony_ci "%s(%pU): Amount %s, returned by the sys-io call:%d\n", 2778c2ecf20Sopenharmony_ci __func__, 2788c2ecf20Sopenharmony_ci handle, 2798c2ecf20Sopenharmony_ci type == ORANGEFS_IO_READ ? "read" : "written", 2808c2ecf20Sopenharmony_ci (int)new_op->downcall.resp.io.amt_complete); 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci ret = new_op->downcall.resp.io.amt_complete; 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ciout: 2858c2ecf20Sopenharmony_ci if (buffer_index >= 0) { 2868c2ecf20Sopenharmony_ci if ((readahead_size) && (type == ORANGEFS_IO_READ)) { 2878c2ecf20Sopenharmony_ci /* readpage */ 2888c2ecf20Sopenharmony_ci *index_return = buffer_index; 2898c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 2908c2ecf20Sopenharmony_ci "%s: hold on to buffer_index :%d:\n", 2918c2ecf20Sopenharmony_ci __func__, buffer_index); 2928c2ecf20Sopenharmony_ci } else { 2938c2ecf20Sopenharmony_ci /* O_DIRECT */ 2948c2ecf20Sopenharmony_ci orangefs_bufmap_put(buffer_index); 2958c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 2968c2ecf20Sopenharmony_ci "%s(%pU): PUT buffer_index %d\n", 2978c2ecf20Sopenharmony_ci __func__, handle, buffer_index); 2988c2ecf20Sopenharmony_ci } 2998c2ecf20Sopenharmony_ci } 3008c2ecf20Sopenharmony_ci op_release(new_op); 3018c2ecf20Sopenharmony_ci return ret; 3028c2ecf20Sopenharmony_ci} 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ciint orangefs_revalidate_mapping(struct inode *inode) 3058c2ecf20Sopenharmony_ci{ 3068c2ecf20Sopenharmony_ci struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); 3078c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 3088c2ecf20Sopenharmony_ci unsigned long *bitlock = &orangefs_inode->bitlock; 3098c2ecf20Sopenharmony_ci int ret; 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci while (1) { 3128c2ecf20Sopenharmony_ci ret = wait_on_bit(bitlock, 1, TASK_KILLABLE); 3138c2ecf20Sopenharmony_ci if (ret) 3148c2ecf20Sopenharmony_ci return ret; 3158c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 3168c2ecf20Sopenharmony_ci if (test_bit(1, bitlock)) { 3178c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 3188c2ecf20Sopenharmony_ci continue; 3198c2ecf20Sopenharmony_ci } 3208c2ecf20Sopenharmony_ci if (!time_before(jiffies, orangefs_inode->mapping_time)) 3218c2ecf20Sopenharmony_ci break; 3228c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 3238c2ecf20Sopenharmony_ci return 0; 3248c2ecf20Sopenharmony_ci } 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci set_bit(1, bitlock); 3278c2ecf20Sopenharmony_ci smp_wmb(); 3288c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci unmap_mapping_range(mapping, 0, 0, 0); 3318c2ecf20Sopenharmony_ci ret = filemap_write_and_wait(mapping); 3328c2ecf20Sopenharmony_ci if (!ret) 3338c2ecf20Sopenharmony_ci ret = invalidate_inode_pages2(mapping); 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci orangefs_inode->mapping_time = jiffies + 3368c2ecf20Sopenharmony_ci orangefs_cache_timeout_msecs*HZ/1000; 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci clear_bit(1, bitlock); 3398c2ecf20Sopenharmony_ci smp_mb__after_atomic(); 3408c2ecf20Sopenharmony_ci wake_up_bit(bitlock, 1); 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci return ret; 3438c2ecf20Sopenharmony_ci} 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_cistatic ssize_t orangefs_file_read_iter(struct kiocb *iocb, 3468c2ecf20Sopenharmony_ci struct iov_iter *iter) 3478c2ecf20Sopenharmony_ci{ 3488c2ecf20Sopenharmony_ci int ret; 3498c2ecf20Sopenharmony_ci orangefs_stats.reads++; 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci down_read(&file_inode(iocb->ki_filp)->i_rwsem); 3528c2ecf20Sopenharmony_ci ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); 3538c2ecf20Sopenharmony_ci if (ret) 3548c2ecf20Sopenharmony_ci goto out; 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci ret = generic_file_read_iter(iocb, iter); 3578c2ecf20Sopenharmony_ciout: 3588c2ecf20Sopenharmony_ci up_read(&file_inode(iocb->ki_filp)->i_rwsem); 3598c2ecf20Sopenharmony_ci return ret; 3608c2ecf20Sopenharmony_ci} 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_cistatic ssize_t orangefs_file_write_iter(struct kiocb *iocb, 3638c2ecf20Sopenharmony_ci struct iov_iter *iter) 3648c2ecf20Sopenharmony_ci{ 3658c2ecf20Sopenharmony_ci int ret; 3668c2ecf20Sopenharmony_ci orangefs_stats.writes++; 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci if (iocb->ki_pos > i_size_read(file_inode(iocb->ki_filp))) { 3698c2ecf20Sopenharmony_ci ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); 3708c2ecf20Sopenharmony_ci if (ret) 3718c2ecf20Sopenharmony_ci return ret; 3728c2ecf20Sopenharmony_ci } 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ci ret = generic_file_write_iter(iocb, iter); 3758c2ecf20Sopenharmony_ci return ret; 3768c2ecf20Sopenharmony_ci} 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_cistatic int orangefs_getflags(struct inode *inode, unsigned long *uval) 3798c2ecf20Sopenharmony_ci{ 3808c2ecf20Sopenharmony_ci __u64 val = 0; 3818c2ecf20Sopenharmony_ci int ret; 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci ret = orangefs_inode_getxattr(inode, 3848c2ecf20Sopenharmony_ci "user.pvfs2.meta_hint", 3858c2ecf20Sopenharmony_ci &val, sizeof(val)); 3868c2ecf20Sopenharmony_ci if (ret < 0 && ret != -ENODATA) 3878c2ecf20Sopenharmony_ci return ret; 3888c2ecf20Sopenharmony_ci else if (ret == -ENODATA) 3898c2ecf20Sopenharmony_ci val = 0; 3908c2ecf20Sopenharmony_ci *uval = val; 3918c2ecf20Sopenharmony_ci return 0; 3928c2ecf20Sopenharmony_ci} 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci/* 3958c2ecf20Sopenharmony_ci * Perform a miscellaneous operation on a file. 3968c2ecf20Sopenharmony_ci */ 3978c2ecf20Sopenharmony_cistatic long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 3988c2ecf20Sopenharmony_ci{ 3998c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 4008c2ecf20Sopenharmony_ci int ret = -ENOTTY; 4018c2ecf20Sopenharmony_ci __u64 val = 0; 4028c2ecf20Sopenharmony_ci unsigned long uval; 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 4058c2ecf20Sopenharmony_ci "orangefs_ioctl: called with cmd %d\n", 4068c2ecf20Sopenharmony_ci cmd); 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_ci /* 4098c2ecf20Sopenharmony_ci * we understand some general ioctls on files, such as the immutable 4108c2ecf20Sopenharmony_ci * and append flags 4118c2ecf20Sopenharmony_ci */ 4128c2ecf20Sopenharmony_ci if (cmd == FS_IOC_GETFLAGS) { 4138c2ecf20Sopenharmony_ci ret = orangefs_getflags(inode, &uval); 4148c2ecf20Sopenharmony_ci if (ret) 4158c2ecf20Sopenharmony_ci return ret; 4168c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 4178c2ecf20Sopenharmony_ci "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n", 4188c2ecf20Sopenharmony_ci (unsigned long long)uval); 4198c2ecf20Sopenharmony_ci return put_user(uval, (int __user *)arg); 4208c2ecf20Sopenharmony_ci } else if (cmd == FS_IOC_SETFLAGS) { 4218c2ecf20Sopenharmony_ci unsigned long old_uval; 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci ret = 0; 4248c2ecf20Sopenharmony_ci if (get_user(uval, (int __user *)arg)) 4258c2ecf20Sopenharmony_ci return -EFAULT; 4268c2ecf20Sopenharmony_ci /* 4278c2ecf20Sopenharmony_ci * ORANGEFS_MIRROR_FL is set internally when the mirroring mode 4288c2ecf20Sopenharmony_ci * is turned on for a file. The user is not allowed to turn 4298c2ecf20Sopenharmony_ci * on this bit, but the bit is present if the user first gets 4308c2ecf20Sopenharmony_ci * the flags and then updates the flags with some new 4318c2ecf20Sopenharmony_ci * settings. So, we ignore it in the following edit. bligon. 4328c2ecf20Sopenharmony_ci */ 4338c2ecf20Sopenharmony_ci if ((uval & ~ORANGEFS_MIRROR_FL) & 4348c2ecf20Sopenharmony_ci (~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL))) { 4358c2ecf20Sopenharmony_ci gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n"); 4368c2ecf20Sopenharmony_ci return -EINVAL; 4378c2ecf20Sopenharmony_ci } 4388c2ecf20Sopenharmony_ci ret = orangefs_getflags(inode, &old_uval); 4398c2ecf20Sopenharmony_ci if (ret) 4408c2ecf20Sopenharmony_ci return ret; 4418c2ecf20Sopenharmony_ci ret = vfs_ioc_setflags_prepare(inode, old_uval, uval); 4428c2ecf20Sopenharmony_ci if (ret) 4438c2ecf20Sopenharmony_ci return ret; 4448c2ecf20Sopenharmony_ci val = uval; 4458c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 4468c2ecf20Sopenharmony_ci "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n", 4478c2ecf20Sopenharmony_ci (unsigned long long)val); 4488c2ecf20Sopenharmony_ci ret = orangefs_inode_setxattr(inode, 4498c2ecf20Sopenharmony_ci "user.pvfs2.meta_hint", 4508c2ecf20Sopenharmony_ci &val, sizeof(val), 0); 4518c2ecf20Sopenharmony_ci } 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci return ret; 4548c2ecf20Sopenharmony_ci} 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_cistatic vm_fault_t orangefs_fault(struct vm_fault *vmf) 4578c2ecf20Sopenharmony_ci{ 4588c2ecf20Sopenharmony_ci struct file *file = vmf->vma->vm_file; 4598c2ecf20Sopenharmony_ci int ret; 4608c2ecf20Sopenharmony_ci ret = orangefs_inode_getattr(file->f_mapping->host, 4618c2ecf20Sopenharmony_ci ORANGEFS_GETATTR_SIZE); 4628c2ecf20Sopenharmony_ci if (ret == -ESTALE) 4638c2ecf20Sopenharmony_ci ret = -EIO; 4648c2ecf20Sopenharmony_ci if (ret) { 4658c2ecf20Sopenharmony_ci gossip_err("%s: orangefs_inode_getattr failed, " 4668c2ecf20Sopenharmony_ci "ret:%d:.\n", __func__, ret); 4678c2ecf20Sopenharmony_ci return VM_FAULT_SIGBUS; 4688c2ecf20Sopenharmony_ci } 4698c2ecf20Sopenharmony_ci return filemap_fault(vmf); 4708c2ecf20Sopenharmony_ci} 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_cistatic const struct vm_operations_struct orangefs_file_vm_ops = { 4738c2ecf20Sopenharmony_ci .fault = orangefs_fault, 4748c2ecf20Sopenharmony_ci .map_pages = filemap_map_pages, 4758c2ecf20Sopenharmony_ci .page_mkwrite = orangefs_page_mkwrite, 4768c2ecf20Sopenharmony_ci}; 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_ci/* 4798c2ecf20Sopenharmony_ci * Memory map a region of a file. 4808c2ecf20Sopenharmony_ci */ 4818c2ecf20Sopenharmony_cistatic int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) 4828c2ecf20Sopenharmony_ci{ 4838c2ecf20Sopenharmony_ci int ret; 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci ret = orangefs_revalidate_mapping(file_inode(file)); 4868c2ecf20Sopenharmony_ci if (ret) 4878c2ecf20Sopenharmony_ci return ret; 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 4908c2ecf20Sopenharmony_ci "orangefs_file_mmap: called on %s\n", 4918c2ecf20Sopenharmony_ci (file ? 4928c2ecf20Sopenharmony_ci (char *)file->f_path.dentry->d_name.name : 4938c2ecf20Sopenharmony_ci (char *)"Unknown")); 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci /* set the sequential readahead hint */ 4968c2ecf20Sopenharmony_ci vma->vm_flags |= VM_SEQ_READ; 4978c2ecf20Sopenharmony_ci vma->vm_flags &= ~VM_RAND_READ; 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_ci file_accessed(file); 5008c2ecf20Sopenharmony_ci vma->vm_ops = &orangefs_file_vm_ops; 5018c2ecf20Sopenharmony_ci return 0; 5028c2ecf20Sopenharmony_ci} 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci#define mapping_nrpages(idata) ((idata)->nrpages) 5058c2ecf20Sopenharmony_ci 5068c2ecf20Sopenharmony_ci/* 5078c2ecf20Sopenharmony_ci * Called to notify the module that there are no more references to 5088c2ecf20Sopenharmony_ci * this file (i.e. no processes have it open). 5098c2ecf20Sopenharmony_ci * 5108c2ecf20Sopenharmony_ci * \note Not called when each file is closed. 5118c2ecf20Sopenharmony_ci */ 5128c2ecf20Sopenharmony_cistatic int orangefs_file_release(struct inode *inode, struct file *file) 5138c2ecf20Sopenharmony_ci{ 5148c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 5158c2ecf20Sopenharmony_ci "orangefs_file_release: called on %pD\n", 5168c2ecf20Sopenharmony_ci file); 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ci /* 5198c2ecf20Sopenharmony_ci * remove all associated inode pages from the page cache and 5208c2ecf20Sopenharmony_ci * readahead cache (if any); this forces an expensive refresh of 5218c2ecf20Sopenharmony_ci * data for the next caller of mmap (or 'get_block' accesses) 5228c2ecf20Sopenharmony_ci */ 5238c2ecf20Sopenharmony_ci if (file_inode(file) && 5248c2ecf20Sopenharmony_ci file_inode(file)->i_mapping && 5258c2ecf20Sopenharmony_ci mapping_nrpages(&file_inode(file)->i_data)) { 5268c2ecf20Sopenharmony_ci if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) { 5278c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_INODE_DEBUG, 5288c2ecf20Sopenharmony_ci "calling flush_racache on %pU\n", 5298c2ecf20Sopenharmony_ci get_khandle_from_ino(inode)); 5308c2ecf20Sopenharmony_ci flush_racache(inode); 5318c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_INODE_DEBUG, 5328c2ecf20Sopenharmony_ci "flush_racache finished\n"); 5338c2ecf20Sopenharmony_ci } 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci } 5368c2ecf20Sopenharmony_ci return 0; 5378c2ecf20Sopenharmony_ci} 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci/* 5408c2ecf20Sopenharmony_ci * Push all data for a specific file onto permanent storage. 5418c2ecf20Sopenharmony_ci */ 5428c2ecf20Sopenharmony_cistatic int orangefs_fsync(struct file *file, 5438c2ecf20Sopenharmony_ci loff_t start, 5448c2ecf20Sopenharmony_ci loff_t end, 5458c2ecf20Sopenharmony_ci int datasync) 5468c2ecf20Sopenharmony_ci{ 5478c2ecf20Sopenharmony_ci int ret; 5488c2ecf20Sopenharmony_ci struct orangefs_inode_s *orangefs_inode = 5498c2ecf20Sopenharmony_ci ORANGEFS_I(file_inode(file)); 5508c2ecf20Sopenharmony_ci struct orangefs_kernel_op_s *new_op = NULL; 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_ci ret = filemap_write_and_wait_range(file_inode(file)->i_mapping, 5538c2ecf20Sopenharmony_ci start, end); 5548c2ecf20Sopenharmony_ci if (ret < 0) 5558c2ecf20Sopenharmony_ci return ret; 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC); 5588c2ecf20Sopenharmony_ci if (!new_op) 5598c2ecf20Sopenharmony_ci return -ENOMEM; 5608c2ecf20Sopenharmony_ci new_op->upcall.req.fsync.refn = orangefs_inode->refn; 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci ret = service_operation(new_op, 5638c2ecf20Sopenharmony_ci "orangefs_fsync", 5648c2ecf20Sopenharmony_ci get_interruptible_flag(file_inode(file))); 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 5678c2ecf20Sopenharmony_ci "orangefs_fsync got return value of %d\n", 5688c2ecf20Sopenharmony_ci ret); 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci op_release(new_op); 5718c2ecf20Sopenharmony_ci return ret; 5728c2ecf20Sopenharmony_ci} 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci/* 5758c2ecf20Sopenharmony_ci * Change the file pointer position for an instance of an open file. 5768c2ecf20Sopenharmony_ci * 5778c2ecf20Sopenharmony_ci * \note If .llseek is overriden, we must acquire lock as described in 5788c2ecf20Sopenharmony_ci * Documentation/filesystems/locking.rst. 5798c2ecf20Sopenharmony_ci * 5808c2ecf20Sopenharmony_ci * Future upgrade could support SEEK_DATA and SEEK_HOLE but would 5818c2ecf20Sopenharmony_ci * require much changes to the FS 5828c2ecf20Sopenharmony_ci */ 5838c2ecf20Sopenharmony_cistatic loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) 5848c2ecf20Sopenharmony_ci{ 5858c2ecf20Sopenharmony_ci int ret = -EINVAL; 5868c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci if (origin == SEEK_END) { 5898c2ecf20Sopenharmony_ci /* 5908c2ecf20Sopenharmony_ci * revalidate the inode's file size. 5918c2ecf20Sopenharmony_ci * NOTE: We are only interested in file size here, 5928c2ecf20Sopenharmony_ci * so we set mask accordingly. 5938c2ecf20Sopenharmony_ci */ 5948c2ecf20Sopenharmony_ci ret = orangefs_inode_getattr(file->f_mapping->host, 5958c2ecf20Sopenharmony_ci ORANGEFS_GETATTR_SIZE); 5968c2ecf20Sopenharmony_ci if (ret == -ESTALE) 5978c2ecf20Sopenharmony_ci ret = -EIO; 5988c2ecf20Sopenharmony_ci if (ret) { 5998c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 6008c2ecf20Sopenharmony_ci "%s:%s:%d calling make bad inode\n", 6018c2ecf20Sopenharmony_ci __FILE__, 6028c2ecf20Sopenharmony_ci __func__, 6038c2ecf20Sopenharmony_ci __LINE__); 6048c2ecf20Sopenharmony_ci return ret; 6058c2ecf20Sopenharmony_ci } 6068c2ecf20Sopenharmony_ci } 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci gossip_debug(GOSSIP_FILE_DEBUG, 6098c2ecf20Sopenharmony_ci "orangefs_file_llseek: offset is %ld | origin is %d" 6108c2ecf20Sopenharmony_ci " | inode size is %lu\n", 6118c2ecf20Sopenharmony_ci (long)offset, 6128c2ecf20Sopenharmony_ci origin, 6138c2ecf20Sopenharmony_ci (unsigned long)i_size_read(inode)); 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_ci return generic_file_llseek(file, offset, origin); 6168c2ecf20Sopenharmony_ci} 6178c2ecf20Sopenharmony_ci 6188c2ecf20Sopenharmony_ci/* 6198c2ecf20Sopenharmony_ci * Support local locks (locks that only this kernel knows about) 6208c2ecf20Sopenharmony_ci * if Orangefs was mounted -o local_lock. 6218c2ecf20Sopenharmony_ci */ 6228c2ecf20Sopenharmony_cistatic int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl) 6238c2ecf20Sopenharmony_ci{ 6248c2ecf20Sopenharmony_ci int rc = -EINVAL; 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_ci if (ORANGEFS_SB(file_inode(filp)->i_sb)->flags & ORANGEFS_OPT_LOCAL_LOCK) { 6278c2ecf20Sopenharmony_ci if (cmd == F_GETLK) { 6288c2ecf20Sopenharmony_ci rc = 0; 6298c2ecf20Sopenharmony_ci posix_test_lock(filp, fl); 6308c2ecf20Sopenharmony_ci } else { 6318c2ecf20Sopenharmony_ci rc = posix_lock_file(filp, fl, NULL); 6328c2ecf20Sopenharmony_ci } 6338c2ecf20Sopenharmony_ci } 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_ci return rc; 6368c2ecf20Sopenharmony_ci} 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_cistatic int orangefs_flush(struct file *file, fl_owner_t id) 6398c2ecf20Sopenharmony_ci{ 6408c2ecf20Sopenharmony_ci /* 6418c2ecf20Sopenharmony_ci * This is vfs_fsync_range(file, 0, LLONG_MAX, 0) without the 6428c2ecf20Sopenharmony_ci * service_operation in orangefs_fsync. 6438c2ecf20Sopenharmony_ci * 6448c2ecf20Sopenharmony_ci * Do not send fsync to OrangeFS server on a close. Do send fsync 6458c2ecf20Sopenharmony_ci * on an explicit fsync call. This duplicates historical OrangeFS 6468c2ecf20Sopenharmony_ci * behavior. 6478c2ecf20Sopenharmony_ci */ 6488c2ecf20Sopenharmony_ci int r; 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci r = filemap_write_and_wait_range(file->f_mapping, 0, LLONG_MAX); 6518c2ecf20Sopenharmony_ci if (r > 0) 6528c2ecf20Sopenharmony_ci return 0; 6538c2ecf20Sopenharmony_ci else 6548c2ecf20Sopenharmony_ci return r; 6558c2ecf20Sopenharmony_ci} 6568c2ecf20Sopenharmony_ci 6578c2ecf20Sopenharmony_ci/** ORANGEFS implementation of VFS file operations */ 6588c2ecf20Sopenharmony_ciconst struct file_operations orangefs_file_operations = { 6598c2ecf20Sopenharmony_ci .llseek = orangefs_file_llseek, 6608c2ecf20Sopenharmony_ci .read_iter = orangefs_file_read_iter, 6618c2ecf20Sopenharmony_ci .write_iter = orangefs_file_write_iter, 6628c2ecf20Sopenharmony_ci .lock = orangefs_lock, 6638c2ecf20Sopenharmony_ci .unlocked_ioctl = orangefs_ioctl, 6648c2ecf20Sopenharmony_ci .mmap = orangefs_file_mmap, 6658c2ecf20Sopenharmony_ci .open = generic_file_open, 6668c2ecf20Sopenharmony_ci .flush = orangefs_flush, 6678c2ecf20Sopenharmony_ci .release = orangefs_file_release, 6688c2ecf20Sopenharmony_ci .fsync = orangefs_fsync, 6698c2ecf20Sopenharmony_ci}; 670