18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (c) 2016 Trond Myklebust
48c2ecf20Sopenharmony_ci * Copyright (c) 2019 Jeff Layton
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * I/O and data path helper functionality.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * Heavily borrowed from equivalent code in fs/nfs/io.c
98c2ecf20Sopenharmony_ci */
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include <linux/ceph/ceph_debug.h>
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <linux/types.h>
148c2ecf20Sopenharmony_ci#include <linux/kernel.h>
158c2ecf20Sopenharmony_ci#include <linux/rwsem.h>
168c2ecf20Sopenharmony_ci#include <linux/fs.h>
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci#include "super.h"
198c2ecf20Sopenharmony_ci#include "io.h"
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci/* Call with exclusively locked inode->i_rwsem */
228c2ecf20Sopenharmony_cistatic void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
238c2ecf20Sopenharmony_ci{
248c2ecf20Sopenharmony_ci	lockdep_assert_held_write(&inode->i_rwsem);
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci	if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) {
278c2ecf20Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
288c2ecf20Sopenharmony_ci		ci->i_ceph_flags &= ~CEPH_I_ODIRECT;
298c2ecf20Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
308c2ecf20Sopenharmony_ci		inode_dio_wait(inode);
318c2ecf20Sopenharmony_ci	}
328c2ecf20Sopenharmony_ci}
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci/**
358c2ecf20Sopenharmony_ci * ceph_start_io_read - declare the file is being used for buffered reads
368c2ecf20Sopenharmony_ci * @inode: file inode
378c2ecf20Sopenharmony_ci *
388c2ecf20Sopenharmony_ci * Declare that a buffered read operation is about to start, and ensure
398c2ecf20Sopenharmony_ci * that we block all direct I/O.
408c2ecf20Sopenharmony_ci * On exit, the function ensures that the CEPH_I_ODIRECT flag is unset,
418c2ecf20Sopenharmony_ci * and holds a shared lock on inode->i_rwsem to ensure that the flag
428c2ecf20Sopenharmony_ci * cannot be changed.
438c2ecf20Sopenharmony_ci * In practice, this means that buffered read operations are allowed to
448c2ecf20Sopenharmony_ci * execute in parallel, thanks to the shared lock, whereas direct I/O
458c2ecf20Sopenharmony_ci * operations need to wait to grab an exclusive lock in order to set
468c2ecf20Sopenharmony_ci * CEPH_I_ODIRECT.
478c2ecf20Sopenharmony_ci * Note that buffered writes and truncates both take a write lock on
488c2ecf20Sopenharmony_ci * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
498c2ecf20Sopenharmony_ci */
508c2ecf20Sopenharmony_civoid
518c2ecf20Sopenharmony_ciceph_start_io_read(struct inode *inode)
528c2ecf20Sopenharmony_ci{
538c2ecf20Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	/* Be an optimist! */
568c2ecf20Sopenharmony_ci	down_read(&inode->i_rwsem);
578c2ecf20Sopenharmony_ci	if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
588c2ecf20Sopenharmony_ci		return;
598c2ecf20Sopenharmony_ci	up_read(&inode->i_rwsem);
608c2ecf20Sopenharmony_ci	/* Slow path.... */
618c2ecf20Sopenharmony_ci	down_write(&inode->i_rwsem);
628c2ecf20Sopenharmony_ci	ceph_block_o_direct(ci, inode);
638c2ecf20Sopenharmony_ci	downgrade_write(&inode->i_rwsem);
648c2ecf20Sopenharmony_ci}
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci/**
678c2ecf20Sopenharmony_ci * ceph_end_io_read - declare that the buffered read operation is done
688c2ecf20Sopenharmony_ci * @inode: file inode
698c2ecf20Sopenharmony_ci *
708c2ecf20Sopenharmony_ci * Declare that a buffered read operation is done, and release the shared
718c2ecf20Sopenharmony_ci * lock on inode->i_rwsem.
728c2ecf20Sopenharmony_ci */
738c2ecf20Sopenharmony_civoid
748c2ecf20Sopenharmony_ciceph_end_io_read(struct inode *inode)
758c2ecf20Sopenharmony_ci{
768c2ecf20Sopenharmony_ci	up_read(&inode->i_rwsem);
778c2ecf20Sopenharmony_ci}
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci/**
808c2ecf20Sopenharmony_ci * ceph_start_io_write - declare the file is being used for buffered writes
818c2ecf20Sopenharmony_ci * @inode: file inode
828c2ecf20Sopenharmony_ci *
838c2ecf20Sopenharmony_ci * Declare that a buffered write operation is about to start, and ensure
848c2ecf20Sopenharmony_ci * that we block all direct I/O.
858c2ecf20Sopenharmony_ci */
868c2ecf20Sopenharmony_civoid
878c2ecf20Sopenharmony_ciceph_start_io_write(struct inode *inode)
888c2ecf20Sopenharmony_ci{
898c2ecf20Sopenharmony_ci	down_write(&inode->i_rwsem);
908c2ecf20Sopenharmony_ci	ceph_block_o_direct(ceph_inode(inode), inode);
918c2ecf20Sopenharmony_ci}
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci/**
948c2ecf20Sopenharmony_ci * ceph_end_io_write - declare that the buffered write operation is done
958c2ecf20Sopenharmony_ci * @inode: file inode
968c2ecf20Sopenharmony_ci *
978c2ecf20Sopenharmony_ci * Declare that a buffered write operation is done, and release the
988c2ecf20Sopenharmony_ci * lock on inode->i_rwsem.
998c2ecf20Sopenharmony_ci */
1008c2ecf20Sopenharmony_civoid
1018c2ecf20Sopenharmony_ciceph_end_io_write(struct inode *inode)
1028c2ecf20Sopenharmony_ci{
1038c2ecf20Sopenharmony_ci	up_write(&inode->i_rwsem);
1048c2ecf20Sopenharmony_ci}
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci/* Call with exclusively locked inode->i_rwsem */
1078c2ecf20Sopenharmony_cistatic void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
1088c2ecf20Sopenharmony_ci{
1098c2ecf20Sopenharmony_ci	lockdep_assert_held_write(&inode->i_rwsem);
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) {
1128c2ecf20Sopenharmony_ci		spin_lock(&ci->i_ceph_lock);
1138c2ecf20Sopenharmony_ci		ci->i_ceph_flags |= CEPH_I_ODIRECT;
1148c2ecf20Sopenharmony_ci		spin_unlock(&ci->i_ceph_lock);
1158c2ecf20Sopenharmony_ci		/* FIXME: unmap_mapping_range? */
1168c2ecf20Sopenharmony_ci		filemap_write_and_wait(inode->i_mapping);
1178c2ecf20Sopenharmony_ci	}
1188c2ecf20Sopenharmony_ci}
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci/**
1218c2ecf20Sopenharmony_ci * ceph_end_io_direct - declare the file is being used for direct i/o
1228c2ecf20Sopenharmony_ci * @inode: file inode
1238c2ecf20Sopenharmony_ci *
1248c2ecf20Sopenharmony_ci * Declare that a direct I/O operation is about to start, and ensure
1258c2ecf20Sopenharmony_ci * that we block all buffered I/O.
1268c2ecf20Sopenharmony_ci * On exit, the function ensures that the CEPH_I_ODIRECT flag is set,
1278c2ecf20Sopenharmony_ci * and holds a shared lock on inode->i_rwsem to ensure that the flag
1288c2ecf20Sopenharmony_ci * cannot be changed.
1298c2ecf20Sopenharmony_ci * In practice, this means that direct I/O operations are allowed to
1308c2ecf20Sopenharmony_ci * execute in parallel, thanks to the shared lock, whereas buffered I/O
1318c2ecf20Sopenharmony_ci * operations need to wait to grab an exclusive lock in order to clear
1328c2ecf20Sopenharmony_ci * CEPH_I_ODIRECT.
1338c2ecf20Sopenharmony_ci * Note that buffered writes and truncates both take a write lock on
1348c2ecf20Sopenharmony_ci * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
1358c2ecf20Sopenharmony_ci */
1368c2ecf20Sopenharmony_civoid
1378c2ecf20Sopenharmony_ciceph_start_io_direct(struct inode *inode)
1388c2ecf20Sopenharmony_ci{
1398c2ecf20Sopenharmony_ci	struct ceph_inode_info *ci = ceph_inode(inode);
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	/* Be an optimist! */
1428c2ecf20Sopenharmony_ci	down_read(&inode->i_rwsem);
1438c2ecf20Sopenharmony_ci	if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
1448c2ecf20Sopenharmony_ci		return;
1458c2ecf20Sopenharmony_ci	up_read(&inode->i_rwsem);
1468c2ecf20Sopenharmony_ci	/* Slow path.... */
1478c2ecf20Sopenharmony_ci	down_write(&inode->i_rwsem);
1488c2ecf20Sopenharmony_ci	ceph_block_buffered(ci, inode);
1498c2ecf20Sopenharmony_ci	downgrade_write(&inode->i_rwsem);
1508c2ecf20Sopenharmony_ci}
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci/**
1538c2ecf20Sopenharmony_ci * ceph_end_io_direct - declare that the direct i/o operation is done
1548c2ecf20Sopenharmony_ci * @inode: file inode
1558c2ecf20Sopenharmony_ci *
1568c2ecf20Sopenharmony_ci * Declare that a direct I/O operation is done, and release the shared
1578c2ecf20Sopenharmony_ci * lock on inode->i_rwsem.
1588c2ecf20Sopenharmony_ci */
1598c2ecf20Sopenharmony_civoid
1608c2ecf20Sopenharmony_ciceph_end_io_direct(struct inode *inode)
1618c2ecf20Sopenharmony_ci{
1628c2ecf20Sopenharmony_ci	up_read(&inode->i_rwsem);
1638c2ecf20Sopenharmony_ci}
164