xref: /kernel/linux/linux-5.10/fs/ocfs2/mmap.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*-
38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0:
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * mmap.c
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Code to deal with the mess that is clustered mmap.
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/fs.h>
138c2ecf20Sopenharmony_ci#include <linux/types.h>
148c2ecf20Sopenharmony_ci#include <linux/highmem.h>
158c2ecf20Sopenharmony_ci#include <linux/pagemap.h>
168c2ecf20Sopenharmony_ci#include <linux/uio.h>
178c2ecf20Sopenharmony_ci#include <linux/signal.h>
188c2ecf20Sopenharmony_ci#include <linux/rbtree.h>
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci#include <cluster/masklog.h>
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci#include "ocfs2.h"
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci#include "aops.h"
258c2ecf20Sopenharmony_ci#include "dlmglue.h"
268c2ecf20Sopenharmony_ci#include "file.h"
278c2ecf20Sopenharmony_ci#include "inode.h"
288c2ecf20Sopenharmony_ci#include "mmap.h"
298c2ecf20Sopenharmony_ci#include "super.h"
308c2ecf20Sopenharmony_ci#include "ocfs2_trace.h"
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_cistatic vm_fault_t ocfs2_fault(struct vm_fault *vmf)
348c2ecf20Sopenharmony_ci{
358c2ecf20Sopenharmony_ci	struct vm_area_struct *vma = vmf->vma;
368c2ecf20Sopenharmony_ci	sigset_t oldset;
378c2ecf20Sopenharmony_ci	vm_fault_t ret;
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci	ocfs2_block_signals(&oldset);
408c2ecf20Sopenharmony_ci	ret = filemap_fault(vmf);
418c2ecf20Sopenharmony_ci	ocfs2_unblock_signals(&oldset);
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci	trace_ocfs2_fault(OCFS2_I(vma->vm_file->f_mapping->host)->ip_blkno,
448c2ecf20Sopenharmony_ci			  vma, vmf->page, vmf->pgoff);
458c2ecf20Sopenharmony_ci	return ret;
468c2ecf20Sopenharmony_ci}
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_cistatic vm_fault_t __ocfs2_page_mkwrite(struct file *file,
498c2ecf20Sopenharmony_ci			struct buffer_head *di_bh, struct page *page)
508c2ecf20Sopenharmony_ci{
518c2ecf20Sopenharmony_ci	int err;
528c2ecf20Sopenharmony_ci	vm_fault_t ret = VM_FAULT_NOPAGE;
538c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
548c2ecf20Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
558c2ecf20Sopenharmony_ci	loff_t pos = page_offset(page);
568c2ecf20Sopenharmony_ci	unsigned int len = PAGE_SIZE;
578c2ecf20Sopenharmony_ci	pgoff_t last_index;
588c2ecf20Sopenharmony_ci	struct page *locked_page = NULL;
598c2ecf20Sopenharmony_ci	void *fsdata;
608c2ecf20Sopenharmony_ci	loff_t size = i_size_read(inode);
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	last_index = (size - 1) >> PAGE_SHIFT;
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	/*
658c2ecf20Sopenharmony_ci	 * There are cases that lead to the page no longer belonging to the
668c2ecf20Sopenharmony_ci	 * mapping.
678c2ecf20Sopenharmony_ci	 * 1) pagecache truncates locally due to memory pressure.
688c2ecf20Sopenharmony_ci	 * 2) pagecache truncates when another is taking EX lock against
698c2ecf20Sopenharmony_ci	 * inode lock. see ocfs2_data_convert_worker.
708c2ecf20Sopenharmony_ci	 *
718c2ecf20Sopenharmony_ci	 * The i_size check doesn't catch the case where nodes truncated and
728c2ecf20Sopenharmony_ci	 * then re-extended the file. We'll re-check the page mapping after
738c2ecf20Sopenharmony_ci	 * taking the page lock inside of ocfs2_write_begin_nolock().
748c2ecf20Sopenharmony_ci	 *
758c2ecf20Sopenharmony_ci	 * Let VM retry with these cases.
768c2ecf20Sopenharmony_ci	 */
778c2ecf20Sopenharmony_ci	if ((page->mapping != inode->i_mapping) ||
788c2ecf20Sopenharmony_ci	    (!PageUptodate(page)) ||
798c2ecf20Sopenharmony_ci	    (page_offset(page) >= size))
808c2ecf20Sopenharmony_ci		goto out;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	/*
838c2ecf20Sopenharmony_ci	 * Call ocfs2_write_begin() and ocfs2_write_end() to take
848c2ecf20Sopenharmony_ci	 * advantage of the allocation code there. We pass a write
858c2ecf20Sopenharmony_ci	 * length of the whole page (chopped to i_size) to make sure
868c2ecf20Sopenharmony_ci	 * the whole thing is allocated.
878c2ecf20Sopenharmony_ci	 *
888c2ecf20Sopenharmony_ci	 * Since we know the page is up to date, we don't have to
898c2ecf20Sopenharmony_ci	 * worry about ocfs2_write_begin() skipping some buffer reads
908c2ecf20Sopenharmony_ci	 * because the "write" would invalidate their data.
918c2ecf20Sopenharmony_ci	 */
928c2ecf20Sopenharmony_ci	if (page->index == last_index)
938c2ecf20Sopenharmony_ci		len = ((size - 1) & ~PAGE_MASK) + 1;
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci	err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
968c2ecf20Sopenharmony_ci				       &locked_page, &fsdata, di_bh, page);
978c2ecf20Sopenharmony_ci	if (err) {
988c2ecf20Sopenharmony_ci		if (err != -ENOSPC)
998c2ecf20Sopenharmony_ci			mlog_errno(err);
1008c2ecf20Sopenharmony_ci		ret = vmf_error(err);
1018c2ecf20Sopenharmony_ci		goto out;
1028c2ecf20Sopenharmony_ci	}
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci	if (!locked_page) {
1058c2ecf20Sopenharmony_ci		ret = VM_FAULT_NOPAGE;
1068c2ecf20Sopenharmony_ci		goto out;
1078c2ecf20Sopenharmony_ci	}
1088c2ecf20Sopenharmony_ci	err = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata);
1098c2ecf20Sopenharmony_ci	BUG_ON(err != len);
1108c2ecf20Sopenharmony_ci	ret = VM_FAULT_LOCKED;
1118c2ecf20Sopenharmony_ciout:
1128c2ecf20Sopenharmony_ci	return ret;
1138c2ecf20Sopenharmony_ci}
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_cistatic vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
1168c2ecf20Sopenharmony_ci{
1178c2ecf20Sopenharmony_ci	struct page *page = vmf->page;
1188c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(vmf->vma->vm_file);
1198c2ecf20Sopenharmony_ci	struct buffer_head *di_bh = NULL;
1208c2ecf20Sopenharmony_ci	sigset_t oldset;
1218c2ecf20Sopenharmony_ci	int err;
1228c2ecf20Sopenharmony_ci	vm_fault_t ret;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	sb_start_pagefault(inode->i_sb);
1258c2ecf20Sopenharmony_ci	ocfs2_block_signals(&oldset);
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	/*
1288c2ecf20Sopenharmony_ci	 * The cluster locks taken will block a truncate from another
1298c2ecf20Sopenharmony_ci	 * node. Taking the data lock will also ensure that we don't
1308c2ecf20Sopenharmony_ci	 * attempt page truncation as part of a downconvert.
1318c2ecf20Sopenharmony_ci	 */
1328c2ecf20Sopenharmony_ci	err = ocfs2_inode_lock(inode, &di_bh, 1);
1338c2ecf20Sopenharmony_ci	if (err < 0) {
1348c2ecf20Sopenharmony_ci		mlog_errno(err);
1358c2ecf20Sopenharmony_ci		ret = vmf_error(err);
1368c2ecf20Sopenharmony_ci		goto out;
1378c2ecf20Sopenharmony_ci	}
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	/*
1408c2ecf20Sopenharmony_ci	 * The alloc sem should be enough to serialize with
1418c2ecf20Sopenharmony_ci	 * ocfs2_truncate_file() changing i_size as well as any thread
1428c2ecf20Sopenharmony_ci	 * modifying the inode btree.
1438c2ecf20Sopenharmony_ci	 */
1448c2ecf20Sopenharmony_ci	down_write(&OCFS2_I(inode)->ip_alloc_sem);
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page);
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci	up_write(&OCFS2_I(inode)->ip_alloc_sem);
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	brelse(di_bh);
1518c2ecf20Sopenharmony_ci	ocfs2_inode_unlock(inode, 1);
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ciout:
1548c2ecf20Sopenharmony_ci	ocfs2_unblock_signals(&oldset);
1558c2ecf20Sopenharmony_ci	sb_end_pagefault(inode->i_sb);
1568c2ecf20Sopenharmony_ci	return ret;
1578c2ecf20Sopenharmony_ci}
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_cistatic const struct vm_operations_struct ocfs2_file_vm_ops = {
1608c2ecf20Sopenharmony_ci	.fault		= ocfs2_fault,
1618c2ecf20Sopenharmony_ci	.page_mkwrite	= ocfs2_page_mkwrite,
1628c2ecf20Sopenharmony_ci};
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ciint ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
1658c2ecf20Sopenharmony_ci{
1668c2ecf20Sopenharmony_ci	int ret = 0, lock_level = 0;
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	ret = ocfs2_inode_lock_atime(file_inode(file),
1698c2ecf20Sopenharmony_ci				    file->f_path.mnt, &lock_level, 1);
1708c2ecf20Sopenharmony_ci	if (ret < 0) {
1718c2ecf20Sopenharmony_ci		mlog_errno(ret);
1728c2ecf20Sopenharmony_ci		goto out;
1738c2ecf20Sopenharmony_ci	}
1748c2ecf20Sopenharmony_ci	ocfs2_inode_unlock(file_inode(file), lock_level);
1758c2ecf20Sopenharmony_ciout:
1768c2ecf20Sopenharmony_ci	vma->vm_ops = &ocfs2_file_vm_ops;
1778c2ecf20Sopenharmony_ci	return 0;
1788c2ecf20Sopenharmony_ci}
1798c2ecf20Sopenharmony_ci
180