162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * mmap.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Code to deal with the mess that is clustered mmap.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/fs.h>
1162306a36Sopenharmony_ci#include <linux/types.h>
1262306a36Sopenharmony_ci#include <linux/highmem.h>
1362306a36Sopenharmony_ci#include <linux/pagemap.h>
1462306a36Sopenharmony_ci#include <linux/uio.h>
1562306a36Sopenharmony_ci#include <linux/signal.h>
1662306a36Sopenharmony_ci#include <linux/rbtree.h>
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#include <cluster/masklog.h>
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#include "ocfs2.h"
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#include "aops.h"
2362306a36Sopenharmony_ci#include "dlmglue.h"
2462306a36Sopenharmony_ci#include "file.h"
2562306a36Sopenharmony_ci#include "inode.h"
2662306a36Sopenharmony_ci#include "mmap.h"
2762306a36Sopenharmony_ci#include "super.h"
2862306a36Sopenharmony_ci#include "ocfs2_trace.h"
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_cistatic vm_fault_t ocfs2_fault(struct vm_fault *vmf)
3262306a36Sopenharmony_ci{
3362306a36Sopenharmony_ci	struct vm_area_struct *vma = vmf->vma;
3462306a36Sopenharmony_ci	sigset_t oldset;
3562306a36Sopenharmony_ci	vm_fault_t ret;
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci	ocfs2_block_signals(&oldset);
3862306a36Sopenharmony_ci	ret = filemap_fault(vmf);
3962306a36Sopenharmony_ci	ocfs2_unblock_signals(&oldset);
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno,
4262306a36Sopenharmony_ci			  vma, vmf->page, vmf->pgoff);
4362306a36Sopenharmony_ci	return ret;
4462306a36Sopenharmony_ci}
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_cistatic vm_fault_t __ocfs2_page_mkwrite(struct file *file,
4762306a36Sopenharmony_ci			struct buffer_head *di_bh, struct page *page)
4862306a36Sopenharmony_ci{
4962306a36Sopenharmony_ci	int err;
5062306a36Sopenharmony_ci	vm_fault_t ret = VM_FAULT_NOPAGE;
5162306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
5262306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
5362306a36Sopenharmony_ci	loff_t pos = page_offset(page);
5462306a36Sopenharmony_ci	unsigned int len = PAGE_SIZE;
5562306a36Sopenharmony_ci	pgoff_t last_index;
5662306a36Sopenharmony_ci	struct page *locked_page = NULL;
5762306a36Sopenharmony_ci	void *fsdata;
5862306a36Sopenharmony_ci	loff_t size = i_size_read(inode);
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	last_index = (size - 1) >> PAGE_SHIFT;
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	/*
6362306a36Sopenharmony_ci	 * There are cases that lead to the page no longer belonging to the
6462306a36Sopenharmony_ci	 * mapping.
6562306a36Sopenharmony_ci	 * 1) pagecache truncates locally due to memory pressure.
6662306a36Sopenharmony_ci	 * 2) pagecache truncates when another is taking EX lock against
6762306a36Sopenharmony_ci	 * inode lock. see ocfs2_data_convert_worker.
6862306a36Sopenharmony_ci	 *
6962306a36Sopenharmony_ci	 * The i_size check doesn't catch the case where nodes truncated and
7062306a36Sopenharmony_ci	 * then re-extended the file. We'll re-check the page mapping after
7162306a36Sopenharmony_ci	 * taking the page lock inside of ocfs2_write_begin_nolock().
7262306a36Sopenharmony_ci	 *
7362306a36Sopenharmony_ci	 * Let VM retry with these cases.
7462306a36Sopenharmony_ci	 */
7562306a36Sopenharmony_ci	if ((page->mapping != inode->i_mapping) ||
7662306a36Sopenharmony_ci	    (!PageUptodate(page)) ||
7762306a36Sopenharmony_ci	    (page_offset(page) >= size))
7862306a36Sopenharmony_ci		goto out;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	/*
8162306a36Sopenharmony_ci	 * Call ocfs2_write_begin() and ocfs2_write_end() to take
8262306a36Sopenharmony_ci	 * advantage of the allocation code there. We pass a write
8362306a36Sopenharmony_ci	 * length of the whole page (chopped to i_size) to make sure
8462306a36Sopenharmony_ci	 * the whole thing is allocated.
8562306a36Sopenharmony_ci	 *
8662306a36Sopenharmony_ci	 * Since we know the page is up to date, we don't have to
8762306a36Sopenharmony_ci	 * worry about ocfs2_write_begin() skipping some buffer reads
8862306a36Sopenharmony_ci	 * because the "write" would invalidate their data.
8962306a36Sopenharmony_ci	 */
9062306a36Sopenharmony_ci	if (page->index == last_index)
9162306a36Sopenharmony_ci		len = ((size - 1) & ~PAGE_MASK) + 1;
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
9462306a36Sopenharmony_ci				       &locked_page, &fsdata, di_bh, page);
9562306a36Sopenharmony_ci	if (err) {
9662306a36Sopenharmony_ci		if (err != -ENOSPC)
9762306a36Sopenharmony_ci			mlog_errno(err);
9862306a36Sopenharmony_ci		ret = vmf_error(err);
9962306a36Sopenharmony_ci		goto out;
10062306a36Sopenharmony_ci	}
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	if (!locked_page) {
10362306a36Sopenharmony_ci		ret = VM_FAULT_NOPAGE;
10462306a36Sopenharmony_ci		goto out;
10562306a36Sopenharmony_ci	}
10662306a36Sopenharmony_ci	err = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata);
10762306a36Sopenharmony_ci	BUG_ON(err != len);
10862306a36Sopenharmony_ci	ret = VM_FAULT_LOCKED;
10962306a36Sopenharmony_ciout:
11062306a36Sopenharmony_ci	return ret;
11162306a36Sopenharmony_ci}
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_cistatic vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
11462306a36Sopenharmony_ci{
11562306a36Sopenharmony_ci	struct page *page = vmf->page;
11662306a36Sopenharmony_ci	struct inode *inode = file_inode(vmf->vma->vm_file);
11762306a36Sopenharmony_ci	struct buffer_head *di_bh = NULL;
11862306a36Sopenharmony_ci	sigset_t oldset;
11962306a36Sopenharmony_ci	int err;
12062306a36Sopenharmony_ci	vm_fault_t ret;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	sb_start_pagefault(inode->i_sb);
12362306a36Sopenharmony_ci	ocfs2_block_signals(&oldset);
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	/*
12662306a36Sopenharmony_ci	 * The cluster locks taken will block a truncate from another
12762306a36Sopenharmony_ci	 * node. Taking the data lock will also ensure that we don't
12862306a36Sopenharmony_ci	 * attempt page truncation as part of a downconvert.
12962306a36Sopenharmony_ci	 */
13062306a36Sopenharmony_ci	err = ocfs2_inode_lock(inode, &di_bh, 1);
13162306a36Sopenharmony_ci	if (err < 0) {
13262306a36Sopenharmony_ci		mlog_errno(err);
13362306a36Sopenharmony_ci		ret = vmf_error(err);
13462306a36Sopenharmony_ci		goto out;
13562306a36Sopenharmony_ci	}
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	/*
13862306a36Sopenharmony_ci	 * The alloc sem should be enough to serialize with
13962306a36Sopenharmony_ci	 * ocfs2_truncate_file() changing i_size as well as any thread
14062306a36Sopenharmony_ci	 * modifying the inode btree.
14162306a36Sopenharmony_ci	 */
14262306a36Sopenharmony_ci	down_write(&OCFS2_I(inode)->ip_alloc_sem);
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page);
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	up_write(&OCFS2_I(inode)->ip_alloc_sem);
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	brelse(di_bh);
14962306a36Sopenharmony_ci	ocfs2_inode_unlock(inode, 1);
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ciout:
15262306a36Sopenharmony_ci	ocfs2_unblock_signals(&oldset);
15362306a36Sopenharmony_ci	sb_end_pagefault(inode->i_sb);
15462306a36Sopenharmony_ci	return ret;
15562306a36Sopenharmony_ci}
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_cistatic const struct vm_operations_struct ocfs2_file_vm_ops = {
15862306a36Sopenharmony_ci	.fault		= ocfs2_fault,
15962306a36Sopenharmony_ci	.page_mkwrite	= ocfs2_page_mkwrite,
16062306a36Sopenharmony_ci};
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ciint ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
16362306a36Sopenharmony_ci{
16462306a36Sopenharmony_ci	int ret = 0, lock_level = 0;
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	ret = ocfs2_inode_lock_atime(file_inode(file),
16762306a36Sopenharmony_ci				    file->f_path.mnt, &lock_level, 1);
16862306a36Sopenharmony_ci	if (ret < 0) {
16962306a36Sopenharmony_ci		mlog_errno(ret);
17062306a36Sopenharmony_ci		goto out;
17162306a36Sopenharmony_ci	}
17262306a36Sopenharmony_ci	ocfs2_inode_unlock(file_inode(file), lock_level);
17362306a36Sopenharmony_ciout:
17462306a36Sopenharmony_ci	vma->vm_ops = &ocfs2_file_vm_ops;
17562306a36Sopenharmony_ci	return 0;
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
178