162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * file.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * File open, close, extend, truncate
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/capability.h>
1162306a36Sopenharmony_ci#include <linux/fs.h>
1262306a36Sopenharmony_ci#include <linux/types.h>
1362306a36Sopenharmony_ci#include <linux/slab.h>
1462306a36Sopenharmony_ci#include <linux/highmem.h>
1562306a36Sopenharmony_ci#include <linux/pagemap.h>
1662306a36Sopenharmony_ci#include <linux/uio.h>
1762306a36Sopenharmony_ci#include <linux/sched.h>
1862306a36Sopenharmony_ci#include <linux/splice.h>
1962306a36Sopenharmony_ci#include <linux/mount.h>
2062306a36Sopenharmony_ci#include <linux/writeback.h>
2162306a36Sopenharmony_ci#include <linux/falloc.h>
2262306a36Sopenharmony_ci#include <linux/quotaops.h>
2362306a36Sopenharmony_ci#include <linux/blkdev.h>
2462306a36Sopenharmony_ci#include <linux/backing-dev.h>
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#include <cluster/masklog.h>
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci#include "ocfs2.h"
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#include "alloc.h"
3162306a36Sopenharmony_ci#include "aops.h"
3262306a36Sopenharmony_ci#include "dir.h"
3362306a36Sopenharmony_ci#include "dlmglue.h"
3462306a36Sopenharmony_ci#include "extent_map.h"
3562306a36Sopenharmony_ci#include "file.h"
3662306a36Sopenharmony_ci#include "sysfile.h"
3762306a36Sopenharmony_ci#include "inode.h"
3862306a36Sopenharmony_ci#include "ioctl.h"
3962306a36Sopenharmony_ci#include "journal.h"
4062306a36Sopenharmony_ci#include "locks.h"
4162306a36Sopenharmony_ci#include "mmap.h"
4262306a36Sopenharmony_ci#include "suballoc.h"
4362306a36Sopenharmony_ci#include "super.h"
4462306a36Sopenharmony_ci#include "xattr.h"
4562306a36Sopenharmony_ci#include "acl.h"
4662306a36Sopenharmony_ci#include "quota.h"
4762306a36Sopenharmony_ci#include "refcounttree.h"
4862306a36Sopenharmony_ci#include "ocfs2_trace.h"
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci#include "buffer_head_io.h"
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_cistatic int ocfs2_init_file_private(struct inode *inode, struct file *file)
5362306a36Sopenharmony_ci{
5462306a36Sopenharmony_ci	struct ocfs2_file_private *fp;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL);
5762306a36Sopenharmony_ci	if (!fp)
5862306a36Sopenharmony_ci		return -ENOMEM;
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	fp->fp_file = file;
6162306a36Sopenharmony_ci	mutex_init(&fp->fp_mutex);
6262306a36Sopenharmony_ci	ocfs2_file_lock_res_init(&fp->fp_flock, fp);
6362306a36Sopenharmony_ci	file->private_data = fp;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	return 0;
6662306a36Sopenharmony_ci}
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_cistatic void ocfs2_free_file_private(struct inode *inode, struct file *file)
6962306a36Sopenharmony_ci{
7062306a36Sopenharmony_ci	struct ocfs2_file_private *fp = file->private_data;
7162306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	if (fp) {
7462306a36Sopenharmony_ci		ocfs2_simple_drop_lockres(osb, &fp->fp_flock);
7562306a36Sopenharmony_ci		ocfs2_lock_res_free(&fp->fp_flock);
7662306a36Sopenharmony_ci		kfree(fp);
7762306a36Sopenharmony_ci		file->private_data = NULL;
7862306a36Sopenharmony_ci	}
7962306a36Sopenharmony_ci}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_cistatic int ocfs2_file_open(struct inode *inode, struct file *file)
8262306a36Sopenharmony_ci{
8362306a36Sopenharmony_ci	int status;
8462306a36Sopenharmony_ci	int mode = file->f_flags;
8562306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	trace_ocfs2_file_open(inode, file, file->f_path.dentry,
8862306a36Sopenharmony_ci			      (unsigned long long)oi->ip_blkno,
8962306a36Sopenharmony_ci			      file->f_path.dentry->d_name.len,
9062306a36Sopenharmony_ci			      file->f_path.dentry->d_name.name, mode);
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	if (file->f_mode & FMODE_WRITE) {
9362306a36Sopenharmony_ci		status = dquot_initialize(inode);
9462306a36Sopenharmony_ci		if (status)
9562306a36Sopenharmony_ci			goto leave;
9662306a36Sopenharmony_ci	}
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	spin_lock(&oi->ip_lock);
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	/* Check that the inode hasn't been wiped from disk by another
10162306a36Sopenharmony_ci	 * node. If it hasn't then we're safe as long as we hold the
10262306a36Sopenharmony_ci	 * spin lock until our increment of open count. */
10362306a36Sopenharmony_ci	if (oi->ip_flags & OCFS2_INODE_DELETED) {
10462306a36Sopenharmony_ci		spin_unlock(&oi->ip_lock);
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci		status = -ENOENT;
10762306a36Sopenharmony_ci		goto leave;
10862306a36Sopenharmony_ci	}
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	if (mode & O_DIRECT)
11162306a36Sopenharmony_ci		oi->ip_flags |= OCFS2_INODE_OPEN_DIRECT;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	oi->ip_open_count++;
11462306a36Sopenharmony_ci	spin_unlock(&oi->ip_lock);
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	status = ocfs2_init_file_private(inode, file);
11762306a36Sopenharmony_ci	if (status) {
11862306a36Sopenharmony_ci		/*
11962306a36Sopenharmony_ci		 * We want to set open count back if we're failing the
12062306a36Sopenharmony_ci		 * open.
12162306a36Sopenharmony_ci		 */
12262306a36Sopenharmony_ci		spin_lock(&oi->ip_lock);
12362306a36Sopenharmony_ci		oi->ip_open_count--;
12462306a36Sopenharmony_ci		spin_unlock(&oi->ip_lock);
12562306a36Sopenharmony_ci	}
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	file->f_mode |= FMODE_NOWAIT;
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_cileave:
13062306a36Sopenharmony_ci	return status;
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_cistatic int ocfs2_file_release(struct inode *inode, struct file *file)
13462306a36Sopenharmony_ci{
13562306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	spin_lock(&oi->ip_lock);
13862306a36Sopenharmony_ci	if (!--oi->ip_open_count)
13962306a36Sopenharmony_ci		oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci	trace_ocfs2_file_release(inode, file, file->f_path.dentry,
14262306a36Sopenharmony_ci				 oi->ip_blkno,
14362306a36Sopenharmony_ci				 file->f_path.dentry->d_name.len,
14462306a36Sopenharmony_ci				 file->f_path.dentry->d_name.name,
14562306a36Sopenharmony_ci				 oi->ip_open_count);
14662306a36Sopenharmony_ci	spin_unlock(&oi->ip_lock);
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	ocfs2_free_file_private(inode, file);
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	return 0;
15162306a36Sopenharmony_ci}
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_cistatic int ocfs2_dir_open(struct inode *inode, struct file *file)
15462306a36Sopenharmony_ci{
15562306a36Sopenharmony_ci	return ocfs2_init_file_private(inode, file);
15662306a36Sopenharmony_ci}
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_cistatic int ocfs2_dir_release(struct inode *inode, struct file *file)
15962306a36Sopenharmony_ci{
16062306a36Sopenharmony_ci	ocfs2_free_file_private(inode, file);
16162306a36Sopenharmony_ci	return 0;
16262306a36Sopenharmony_ci}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_cistatic int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
16562306a36Sopenharmony_ci			   int datasync)
16662306a36Sopenharmony_ci{
16762306a36Sopenharmony_ci	int err = 0;
16862306a36Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
16962306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
17062306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
17162306a36Sopenharmony_ci	journal_t *journal = osb->journal->j_journal;
17262306a36Sopenharmony_ci	int ret;
17362306a36Sopenharmony_ci	tid_t commit_tid;
17462306a36Sopenharmony_ci	bool needs_barrier = false;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	trace_ocfs2_sync_file(inode, file, file->f_path.dentry,
17762306a36Sopenharmony_ci			      oi->ip_blkno,
17862306a36Sopenharmony_ci			      file->f_path.dentry->d_name.len,
17962306a36Sopenharmony_ci			      file->f_path.dentry->d_name.name,
18062306a36Sopenharmony_ci			      (unsigned long long)datasync);
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
18362306a36Sopenharmony_ci		return -EROFS;
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	err = file_write_and_wait_range(file, start, end);
18662306a36Sopenharmony_ci	if (err)
18762306a36Sopenharmony_ci		return err;
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	commit_tid = datasync ? oi->i_datasync_tid : oi->i_sync_tid;
19062306a36Sopenharmony_ci	if (journal->j_flags & JBD2_BARRIER &&
19162306a36Sopenharmony_ci	    !jbd2_trans_will_send_data_barrier(journal, commit_tid))
19262306a36Sopenharmony_ci		needs_barrier = true;
19362306a36Sopenharmony_ci	err = jbd2_complete_transaction(journal, commit_tid);
19462306a36Sopenharmony_ci	if (needs_barrier) {
19562306a36Sopenharmony_ci		ret = blkdev_issue_flush(inode->i_sb->s_bdev);
19662306a36Sopenharmony_ci		if (!err)
19762306a36Sopenharmony_ci			err = ret;
19862306a36Sopenharmony_ci	}
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	if (err)
20162306a36Sopenharmony_ci		mlog_errno(err);
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	return (err < 0) ? -EIO : 0;
20462306a36Sopenharmony_ci}
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ciint ocfs2_should_update_atime(struct inode *inode,
20762306a36Sopenharmony_ci			      struct vfsmount *vfsmnt)
20862306a36Sopenharmony_ci{
20962306a36Sopenharmony_ci	struct timespec64 now;
21062306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
21362306a36Sopenharmony_ci		return 0;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	if ((inode->i_flags & S_NOATIME) ||
21662306a36Sopenharmony_ci	    ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)))
21762306a36Sopenharmony_ci		return 0;
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	/*
22062306a36Sopenharmony_ci	 * We can be called with no vfsmnt structure - NFSD will
22162306a36Sopenharmony_ci	 * sometimes do this.
22262306a36Sopenharmony_ci	 *
22362306a36Sopenharmony_ci	 * Note that our action here is different than touch_atime() -
22462306a36Sopenharmony_ci	 * if we can't tell whether this is a noatime mount, then we
22562306a36Sopenharmony_ci	 * don't know whether to trust the value of s_atime_quantum.
22662306a36Sopenharmony_ci	 */
22762306a36Sopenharmony_ci	if (vfsmnt == NULL)
22862306a36Sopenharmony_ci		return 0;
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	if ((vfsmnt->mnt_flags & MNT_NOATIME) ||
23162306a36Sopenharmony_ci	    ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
23262306a36Sopenharmony_ci		return 0;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	if (vfsmnt->mnt_flags & MNT_RELATIME) {
23562306a36Sopenharmony_ci		struct timespec64 ctime = inode_get_ctime(inode);
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci		if ((timespec64_compare(&inode->i_atime, &inode->i_mtime) <= 0) ||
23862306a36Sopenharmony_ci		    (timespec64_compare(&inode->i_atime, &ctime) <= 0))
23962306a36Sopenharmony_ci			return 1;
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci		return 0;
24262306a36Sopenharmony_ci	}
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	now = current_time(inode);
24562306a36Sopenharmony_ci	if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum))
24662306a36Sopenharmony_ci		return 0;
24762306a36Sopenharmony_ci	else
24862306a36Sopenharmony_ci		return 1;
24962306a36Sopenharmony_ci}
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ciint ocfs2_update_inode_atime(struct inode *inode,
25262306a36Sopenharmony_ci			     struct buffer_head *bh)
25362306a36Sopenharmony_ci{
25462306a36Sopenharmony_ci	int ret;
25562306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
25662306a36Sopenharmony_ci	handle_t *handle;
25762306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data;
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
26062306a36Sopenharmony_ci	if (IS_ERR(handle)) {
26162306a36Sopenharmony_ci		ret = PTR_ERR(handle);
26262306a36Sopenharmony_ci		mlog_errno(ret);
26362306a36Sopenharmony_ci		goto out;
26462306a36Sopenharmony_ci	}
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
26762306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
26862306a36Sopenharmony_ci	if (ret) {
26962306a36Sopenharmony_ci		mlog_errno(ret);
27062306a36Sopenharmony_ci		goto out_commit;
27162306a36Sopenharmony_ci	}
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	/*
27462306a36Sopenharmony_ci	 * Don't use ocfs2_mark_inode_dirty() here as we don't always
27562306a36Sopenharmony_ci	 * have i_rwsem to guard against concurrent changes to other
27662306a36Sopenharmony_ci	 * inode fields.
27762306a36Sopenharmony_ci	 */
27862306a36Sopenharmony_ci	inode->i_atime = current_time(inode);
27962306a36Sopenharmony_ci	di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
28062306a36Sopenharmony_ci	di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
28162306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 0);
28262306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, bh);
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ciout_commit:
28562306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
28662306a36Sopenharmony_ciout:
28762306a36Sopenharmony_ci	return ret;
28862306a36Sopenharmony_ci}
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ciint ocfs2_set_inode_size(handle_t *handle,
29162306a36Sopenharmony_ci				struct inode *inode,
29262306a36Sopenharmony_ci				struct buffer_head *fe_bh,
29362306a36Sopenharmony_ci				u64 new_i_size)
29462306a36Sopenharmony_ci{
29562306a36Sopenharmony_ci	int status;
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	i_size_write(inode, new_i_size);
29862306a36Sopenharmony_ci	inode->i_blocks = ocfs2_inode_sector_count(inode);
29962306a36Sopenharmony_ci	inode->i_mtime = inode_set_ctime_current(inode);
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
30262306a36Sopenharmony_ci	if (status < 0) {
30362306a36Sopenharmony_ci		mlog_errno(status);
30462306a36Sopenharmony_ci		goto bail;
30562306a36Sopenharmony_ci	}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_cibail:
30862306a36Sopenharmony_ci	return status;
30962306a36Sopenharmony_ci}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ciint ocfs2_simple_size_update(struct inode *inode,
31262306a36Sopenharmony_ci			     struct buffer_head *di_bh,
31362306a36Sopenharmony_ci			     u64 new_i_size)
31462306a36Sopenharmony_ci{
31562306a36Sopenharmony_ci	int ret;
31662306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
31762306a36Sopenharmony_ci	handle_t *handle = NULL;
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
32062306a36Sopenharmony_ci	if (IS_ERR(handle)) {
32162306a36Sopenharmony_ci		ret = PTR_ERR(handle);
32262306a36Sopenharmony_ci		mlog_errno(ret);
32362306a36Sopenharmony_ci		goto out;
32462306a36Sopenharmony_ci	}
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	ret = ocfs2_set_inode_size(handle, inode, di_bh,
32762306a36Sopenharmony_ci				   new_i_size);
32862306a36Sopenharmony_ci	if (ret < 0)
32962306a36Sopenharmony_ci		mlog_errno(ret);
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 0);
33262306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
33362306a36Sopenharmony_ciout:
33462306a36Sopenharmony_ci	return ret;
33562306a36Sopenharmony_ci}
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_cistatic int ocfs2_cow_file_pos(struct inode *inode,
33862306a36Sopenharmony_ci			      struct buffer_head *fe_bh,
33962306a36Sopenharmony_ci			      u64 offset)
34062306a36Sopenharmony_ci{
34162306a36Sopenharmony_ci	int status;
34262306a36Sopenharmony_ci	u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
34362306a36Sopenharmony_ci	unsigned int num_clusters = 0;
34462306a36Sopenharmony_ci	unsigned int ext_flags = 0;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	/*
34762306a36Sopenharmony_ci	 * If the new offset is aligned to the range of the cluster, there is
34862306a36Sopenharmony_ci	 * no space for ocfs2_zero_range_for_truncate to fill, so no need to
34962306a36Sopenharmony_ci	 * CoW either.
35062306a36Sopenharmony_ci	 */
35162306a36Sopenharmony_ci	if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0)
35262306a36Sopenharmony_ci		return 0;
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	status = ocfs2_get_clusters(inode, cpos, &phys,
35562306a36Sopenharmony_ci				    &num_clusters, &ext_flags);
35662306a36Sopenharmony_ci	if (status) {
35762306a36Sopenharmony_ci		mlog_errno(status);
35862306a36Sopenharmony_ci		goto out;
35962306a36Sopenharmony_ci	}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
36262306a36Sopenharmony_ci		goto out;
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ciout:
36762306a36Sopenharmony_ci	return status;
36862306a36Sopenharmony_ci}
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_cistatic int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
37162306a36Sopenharmony_ci				     struct inode *inode,
37262306a36Sopenharmony_ci				     struct buffer_head *fe_bh,
37362306a36Sopenharmony_ci				     u64 new_i_size)
37462306a36Sopenharmony_ci{
37562306a36Sopenharmony_ci	int status;
37662306a36Sopenharmony_ci	handle_t *handle;
37762306a36Sopenharmony_ci	struct ocfs2_dinode *di;
37862306a36Sopenharmony_ci	u64 cluster_bytes;
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	/*
38162306a36Sopenharmony_ci	 * We need to CoW the cluster contains the offset if it is reflinked
38262306a36Sopenharmony_ci	 * since we will call ocfs2_zero_range_for_truncate later which will
38362306a36Sopenharmony_ci	 * write "0" from offset to the end of the cluster.
38462306a36Sopenharmony_ci	 */
38562306a36Sopenharmony_ci	status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size);
38662306a36Sopenharmony_ci	if (status) {
38762306a36Sopenharmony_ci		mlog_errno(status);
38862306a36Sopenharmony_ci		return status;
38962306a36Sopenharmony_ci	}
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	/* TODO: This needs to actually orphan the inode in this
39262306a36Sopenharmony_ci	 * transaction. */
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
39562306a36Sopenharmony_ci	if (IS_ERR(handle)) {
39662306a36Sopenharmony_ci		status = PTR_ERR(handle);
39762306a36Sopenharmony_ci		mlog_errno(status);
39862306a36Sopenharmony_ci		goto out;
39962306a36Sopenharmony_ci	}
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
40262306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_WRITE);
40362306a36Sopenharmony_ci	if (status < 0) {
40462306a36Sopenharmony_ci		mlog_errno(status);
40562306a36Sopenharmony_ci		goto out_commit;
40662306a36Sopenharmony_ci	}
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	/*
40962306a36Sopenharmony_ci	 * Do this before setting i_size.
41062306a36Sopenharmony_ci	 */
41162306a36Sopenharmony_ci	cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size);
41262306a36Sopenharmony_ci	status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size,
41362306a36Sopenharmony_ci					       cluster_bytes);
41462306a36Sopenharmony_ci	if (status) {
41562306a36Sopenharmony_ci		mlog_errno(status);
41662306a36Sopenharmony_ci		goto out_commit;
41762306a36Sopenharmony_ci	}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	i_size_write(inode, new_i_size);
42062306a36Sopenharmony_ci	inode->i_mtime = inode_set_ctime_current(inode);
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	di = (struct ocfs2_dinode *) fe_bh->b_data;
42362306a36Sopenharmony_ci	di->i_size = cpu_to_le64(new_i_size);
42462306a36Sopenharmony_ci	di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
42562306a36Sopenharmony_ci	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
42662306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 0);
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, fe_bh);
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ciout_commit:
43162306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
43262306a36Sopenharmony_ciout:
43362306a36Sopenharmony_ci	return status;
43462306a36Sopenharmony_ci}
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ciint ocfs2_truncate_file(struct inode *inode,
43762306a36Sopenharmony_ci			       struct buffer_head *di_bh,
43862306a36Sopenharmony_ci			       u64 new_i_size)
43962306a36Sopenharmony_ci{
44062306a36Sopenharmony_ci	int status = 0;
44162306a36Sopenharmony_ci	struct ocfs2_dinode *fe = NULL;
44262306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	/* We trust di_bh because it comes from ocfs2_inode_lock(), which
44562306a36Sopenharmony_ci	 * already validated it */
44662306a36Sopenharmony_ci	fe = (struct ocfs2_dinode *) di_bh->b_data;
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci	trace_ocfs2_truncate_file((unsigned long long)OCFS2_I(inode)->ip_blkno,
44962306a36Sopenharmony_ci				  (unsigned long long)le64_to_cpu(fe->i_size),
45062306a36Sopenharmony_ci				  (unsigned long long)new_i_size);
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
45362306a36Sopenharmony_ci			"Inode %llu, inode i_size = %lld != di "
45462306a36Sopenharmony_ci			"i_size = %llu, i_flags = 0x%x\n",
45562306a36Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
45662306a36Sopenharmony_ci			i_size_read(inode),
45762306a36Sopenharmony_ci			(unsigned long long)le64_to_cpu(fe->i_size),
45862306a36Sopenharmony_ci			le32_to_cpu(fe->i_flags));
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	if (new_i_size > le64_to_cpu(fe->i_size)) {
46162306a36Sopenharmony_ci		trace_ocfs2_truncate_file_error(
46262306a36Sopenharmony_ci			(unsigned long long)le64_to_cpu(fe->i_size),
46362306a36Sopenharmony_ci			(unsigned long long)new_i_size);
46462306a36Sopenharmony_ci		status = -EINVAL;
46562306a36Sopenharmony_ci		mlog_errno(status);
46662306a36Sopenharmony_ci		goto bail;
46762306a36Sopenharmony_ci	}
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	down_write(&OCFS2_I(inode)->ip_alloc_sem);
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci	ocfs2_resv_discard(&osb->osb_la_resmap,
47262306a36Sopenharmony_ci			   &OCFS2_I(inode)->ip_la_data_resv);
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	/*
47562306a36Sopenharmony_ci	 * The inode lock forced other nodes to sync and drop their
47662306a36Sopenharmony_ci	 * pages, which (correctly) happens even if we have a truncate
47762306a36Sopenharmony_ci	 * without allocation change - ocfs2 cluster sizes can be much
47862306a36Sopenharmony_ci	 * greater than page size, so we have to truncate them
47962306a36Sopenharmony_ci	 * anyway.
48062306a36Sopenharmony_ci	 */
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
48362306a36Sopenharmony_ci		unmap_mapping_range(inode->i_mapping,
48462306a36Sopenharmony_ci				    new_i_size + PAGE_SIZE - 1, 0, 1);
48562306a36Sopenharmony_ci		truncate_inode_pages(inode->i_mapping, new_i_size);
48662306a36Sopenharmony_ci		status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
48762306a36Sopenharmony_ci					       i_size_read(inode), 1);
48862306a36Sopenharmony_ci		if (status)
48962306a36Sopenharmony_ci			mlog_errno(status);
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci		goto bail_unlock_sem;
49262306a36Sopenharmony_ci	}
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	/* alright, we're going to need to do a full blown alloc size
49562306a36Sopenharmony_ci	 * change. Orphan the inode so that recovery can complete the
49662306a36Sopenharmony_ci	 * truncate if necessary. This does the task of marking
49762306a36Sopenharmony_ci	 * i_size. */
49862306a36Sopenharmony_ci	status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
49962306a36Sopenharmony_ci	if (status < 0) {
50062306a36Sopenharmony_ci		mlog_errno(status);
50162306a36Sopenharmony_ci		goto bail_unlock_sem;
50262306a36Sopenharmony_ci	}
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_ci	unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
50562306a36Sopenharmony_ci	truncate_inode_pages(inode->i_mapping, new_i_size);
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	status = ocfs2_commit_truncate(osb, inode, di_bh);
50862306a36Sopenharmony_ci	if (status < 0) {
50962306a36Sopenharmony_ci		mlog_errno(status);
51062306a36Sopenharmony_ci		goto bail_unlock_sem;
51162306a36Sopenharmony_ci	}
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	/* TODO: orphan dir cleanup here. */
51462306a36Sopenharmony_cibail_unlock_sem:
51562306a36Sopenharmony_ci	up_write(&OCFS2_I(inode)->ip_alloc_sem);
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_cibail:
51862306a36Sopenharmony_ci	if (!status && OCFS2_I(inode)->ip_clusters == 0)
51962306a36Sopenharmony_ci		status = ocfs2_try_remove_refcount_tree(inode, di_bh);
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci	return status;
52262306a36Sopenharmony_ci}
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci/*
52562306a36Sopenharmony_ci * extend file allocation only here.
52662306a36Sopenharmony_ci * we'll update all the disk stuff, and oip->alloc_size
52762306a36Sopenharmony_ci *
52862306a36Sopenharmony_ci * expect stuff to be locked, a transaction started and enough data /
52962306a36Sopenharmony_ci * metadata reservations in the contexts.
53062306a36Sopenharmony_ci *
53162306a36Sopenharmony_ci * Will return -EAGAIN, and a reason if a restart is needed.
53262306a36Sopenharmony_ci * If passed in, *reason will always be set, even in error.
53362306a36Sopenharmony_ci */
53462306a36Sopenharmony_ciint ocfs2_add_inode_data(struct ocfs2_super *osb,
53562306a36Sopenharmony_ci			 struct inode *inode,
53662306a36Sopenharmony_ci			 u32 *logical_offset,
53762306a36Sopenharmony_ci			 u32 clusters_to_add,
53862306a36Sopenharmony_ci			 int mark_unwritten,
53962306a36Sopenharmony_ci			 struct buffer_head *fe_bh,
54062306a36Sopenharmony_ci			 handle_t *handle,
54162306a36Sopenharmony_ci			 struct ocfs2_alloc_context *data_ac,
54262306a36Sopenharmony_ci			 struct ocfs2_alloc_context *meta_ac,
54362306a36Sopenharmony_ci			 enum ocfs2_alloc_restarted *reason_ret)
54462306a36Sopenharmony_ci{
54562306a36Sopenharmony_ci	struct ocfs2_extent_tree et;
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh);
54862306a36Sopenharmony_ci	return ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
54962306a36Sopenharmony_ci					   clusters_to_add, mark_unwritten,
55062306a36Sopenharmony_ci					   data_ac, meta_ac, reason_ret);
55162306a36Sopenharmony_ci}
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_cistatic int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
55462306a36Sopenharmony_ci				   u32 clusters_to_add, int mark_unwritten)
55562306a36Sopenharmony_ci{
55662306a36Sopenharmony_ci	int status = 0;
55762306a36Sopenharmony_ci	int restart_func = 0;
55862306a36Sopenharmony_ci	int credits;
55962306a36Sopenharmony_ci	u32 prev_clusters;
56062306a36Sopenharmony_ci	struct buffer_head *bh = NULL;
56162306a36Sopenharmony_ci	struct ocfs2_dinode *fe = NULL;
56262306a36Sopenharmony_ci	handle_t *handle = NULL;
56362306a36Sopenharmony_ci	struct ocfs2_alloc_context *data_ac = NULL;
56462306a36Sopenharmony_ci	struct ocfs2_alloc_context *meta_ac = NULL;
56562306a36Sopenharmony_ci	enum ocfs2_alloc_restarted why = RESTART_NONE;
56662306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
56762306a36Sopenharmony_ci	struct ocfs2_extent_tree et;
56862306a36Sopenharmony_ci	int did_quota = 0;
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci	/*
57162306a36Sopenharmony_ci	 * Unwritten extent only exists for file systems which
57262306a36Sopenharmony_ci	 * support holes.
57362306a36Sopenharmony_ci	 */
57462306a36Sopenharmony_ci	BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	status = ocfs2_read_inode_block(inode, &bh);
57762306a36Sopenharmony_ci	if (status < 0) {
57862306a36Sopenharmony_ci		mlog_errno(status);
57962306a36Sopenharmony_ci		goto leave;
58062306a36Sopenharmony_ci	}
58162306a36Sopenharmony_ci	fe = (struct ocfs2_dinode *) bh->b_data;
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_cirestart_all:
58462306a36Sopenharmony_ci	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
58762306a36Sopenharmony_ci	status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
58862306a36Sopenharmony_ci				       &data_ac, &meta_ac);
58962306a36Sopenharmony_ci	if (status) {
59062306a36Sopenharmony_ci		mlog_errno(status);
59162306a36Sopenharmony_ci		goto leave;
59262306a36Sopenharmony_ci	}
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci	credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list);
59562306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, credits);
59662306a36Sopenharmony_ci	if (IS_ERR(handle)) {
59762306a36Sopenharmony_ci		status = PTR_ERR(handle);
59862306a36Sopenharmony_ci		handle = NULL;
59962306a36Sopenharmony_ci		mlog_errno(status);
60062306a36Sopenharmony_ci		goto leave;
60162306a36Sopenharmony_ci	}
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_cirestarted_transaction:
60462306a36Sopenharmony_ci	trace_ocfs2_extend_allocation(
60562306a36Sopenharmony_ci		(unsigned long long)OCFS2_I(inode)->ip_blkno,
60662306a36Sopenharmony_ci		(unsigned long long)i_size_read(inode),
60762306a36Sopenharmony_ci		le32_to_cpu(fe->i_clusters), clusters_to_add,
60862306a36Sopenharmony_ci		why, restart_func);
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	status = dquot_alloc_space_nodirty(inode,
61162306a36Sopenharmony_ci			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
61262306a36Sopenharmony_ci	if (status)
61362306a36Sopenharmony_ci		goto leave;
61462306a36Sopenharmony_ci	did_quota = 1;
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_ci	/* reserve a write to the file entry early on - that we if we
61762306a36Sopenharmony_ci	 * run out of credits in the allocation path, we can still
61862306a36Sopenharmony_ci	 * update i_size. */
61962306a36Sopenharmony_ci	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
62062306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_WRITE);
62162306a36Sopenharmony_ci	if (status < 0) {
62262306a36Sopenharmony_ci		mlog_errno(status);
62362306a36Sopenharmony_ci		goto leave;
62462306a36Sopenharmony_ci	}
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci	prev_clusters = OCFS2_I(inode)->ip_clusters;
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci	status = ocfs2_add_inode_data(osb,
62962306a36Sopenharmony_ci				      inode,
63062306a36Sopenharmony_ci				      &logical_start,
63162306a36Sopenharmony_ci				      clusters_to_add,
63262306a36Sopenharmony_ci				      mark_unwritten,
63362306a36Sopenharmony_ci				      bh,
63462306a36Sopenharmony_ci				      handle,
63562306a36Sopenharmony_ci				      data_ac,
63662306a36Sopenharmony_ci				      meta_ac,
63762306a36Sopenharmony_ci				      &why);
63862306a36Sopenharmony_ci	if ((status < 0) && (status != -EAGAIN)) {
63962306a36Sopenharmony_ci		if (status != -ENOSPC)
64062306a36Sopenharmony_ci			mlog_errno(status);
64162306a36Sopenharmony_ci		goto leave;
64262306a36Sopenharmony_ci	}
64362306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 1);
64462306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, bh);
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci	spin_lock(&OCFS2_I(inode)->ip_lock);
64762306a36Sopenharmony_ci	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
64862306a36Sopenharmony_ci	spin_unlock(&OCFS2_I(inode)->ip_lock);
64962306a36Sopenharmony_ci	/* Release unused quota reservation */
65062306a36Sopenharmony_ci	dquot_free_space(inode,
65162306a36Sopenharmony_ci			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
65262306a36Sopenharmony_ci	did_quota = 0;
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci	if (why != RESTART_NONE && clusters_to_add) {
65562306a36Sopenharmony_ci		if (why == RESTART_META) {
65662306a36Sopenharmony_ci			restart_func = 1;
65762306a36Sopenharmony_ci			status = 0;
65862306a36Sopenharmony_ci		} else {
65962306a36Sopenharmony_ci			BUG_ON(why != RESTART_TRANS);
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci			status = ocfs2_allocate_extend_trans(handle, 1);
66262306a36Sopenharmony_ci			if (status < 0) {
66362306a36Sopenharmony_ci				/* handle still has to be committed at
66462306a36Sopenharmony_ci				 * this point. */
66562306a36Sopenharmony_ci				status = -ENOMEM;
66662306a36Sopenharmony_ci				mlog_errno(status);
66762306a36Sopenharmony_ci				goto leave;
66862306a36Sopenharmony_ci			}
66962306a36Sopenharmony_ci			goto restarted_transaction;
67062306a36Sopenharmony_ci		}
67162306a36Sopenharmony_ci	}
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci	trace_ocfs2_extend_allocation_end(OCFS2_I(inode)->ip_blkno,
67462306a36Sopenharmony_ci	     le32_to_cpu(fe->i_clusters),
67562306a36Sopenharmony_ci	     (unsigned long long)le64_to_cpu(fe->i_size),
67662306a36Sopenharmony_ci	     OCFS2_I(inode)->ip_clusters,
67762306a36Sopenharmony_ci	     (unsigned long long)i_size_read(inode));
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_cileave:
68062306a36Sopenharmony_ci	if (status < 0 && did_quota)
68162306a36Sopenharmony_ci		dquot_free_space(inode,
68262306a36Sopenharmony_ci			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
68362306a36Sopenharmony_ci	if (handle) {
68462306a36Sopenharmony_ci		ocfs2_commit_trans(osb, handle);
68562306a36Sopenharmony_ci		handle = NULL;
68662306a36Sopenharmony_ci	}
68762306a36Sopenharmony_ci	if (data_ac) {
68862306a36Sopenharmony_ci		ocfs2_free_alloc_context(data_ac);
68962306a36Sopenharmony_ci		data_ac = NULL;
69062306a36Sopenharmony_ci	}
69162306a36Sopenharmony_ci	if (meta_ac) {
69262306a36Sopenharmony_ci		ocfs2_free_alloc_context(meta_ac);
69362306a36Sopenharmony_ci		meta_ac = NULL;
69462306a36Sopenharmony_ci	}
69562306a36Sopenharmony_ci	if ((!status) && restart_func) {
69662306a36Sopenharmony_ci		restart_func = 0;
69762306a36Sopenharmony_ci		goto restart_all;
69862306a36Sopenharmony_ci	}
69962306a36Sopenharmony_ci	brelse(bh);
70062306a36Sopenharmony_ci	bh = NULL;
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ci	return status;
70362306a36Sopenharmony_ci}
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci/*
70662306a36Sopenharmony_ci * While a write will already be ordering the data, a truncate will not.
70762306a36Sopenharmony_ci * Thus, we need to explicitly order the zeroed pages.
70862306a36Sopenharmony_ci */
70962306a36Sopenharmony_cistatic handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
71062306a36Sopenharmony_ci						      struct buffer_head *di_bh,
71162306a36Sopenharmony_ci						      loff_t start_byte,
71262306a36Sopenharmony_ci						      loff_t length)
71362306a36Sopenharmony_ci{
71462306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
71562306a36Sopenharmony_ci	handle_t *handle = NULL;
71662306a36Sopenharmony_ci	int ret = 0;
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	if (!ocfs2_should_order_data(inode))
71962306a36Sopenharmony_ci		goto out;
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
72262306a36Sopenharmony_ci	if (IS_ERR(handle)) {
72362306a36Sopenharmony_ci		ret = -ENOMEM;
72462306a36Sopenharmony_ci		mlog_errno(ret);
72562306a36Sopenharmony_ci		goto out;
72662306a36Sopenharmony_ci	}
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_ci	ret = ocfs2_jbd2_inode_add_write(handle, inode, start_byte, length);
72962306a36Sopenharmony_ci	if (ret < 0) {
73062306a36Sopenharmony_ci		mlog_errno(ret);
73162306a36Sopenharmony_ci		goto out;
73262306a36Sopenharmony_ci	}
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
73562306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
73662306a36Sopenharmony_ci	if (ret)
73762306a36Sopenharmony_ci		mlog_errno(ret);
73862306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 1);
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ciout:
74162306a36Sopenharmony_ci	if (ret) {
74262306a36Sopenharmony_ci		if (!IS_ERR(handle))
74362306a36Sopenharmony_ci			ocfs2_commit_trans(osb, handle);
74462306a36Sopenharmony_ci		handle = ERR_PTR(ret);
74562306a36Sopenharmony_ci	}
74662306a36Sopenharmony_ci	return handle;
74762306a36Sopenharmony_ci}
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_ci/* Some parts of this taken from generic_cont_expand, which turned out
75062306a36Sopenharmony_ci * to be too fragile to do exactly what we need without us having to
75162306a36Sopenharmony_ci * worry about recursive locking in ->write_begin() and ->write_end(). */
75262306a36Sopenharmony_cistatic int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
75362306a36Sopenharmony_ci				 u64 abs_to, struct buffer_head *di_bh)
75462306a36Sopenharmony_ci{
75562306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
75662306a36Sopenharmony_ci	struct page *page;
75762306a36Sopenharmony_ci	unsigned long index = abs_from >> PAGE_SHIFT;
75862306a36Sopenharmony_ci	handle_t *handle;
75962306a36Sopenharmony_ci	int ret = 0;
76062306a36Sopenharmony_ci	unsigned zero_from, zero_to, block_start, block_end;
76162306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	BUG_ON(abs_from >= abs_to);
76462306a36Sopenharmony_ci	BUG_ON(abs_to > (((u64)index + 1) << PAGE_SHIFT));
76562306a36Sopenharmony_ci	BUG_ON(abs_from & (inode->i_blkbits - 1));
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	handle = ocfs2_zero_start_ordered_transaction(inode, di_bh,
76862306a36Sopenharmony_ci						      abs_from,
76962306a36Sopenharmony_ci						      abs_to - abs_from);
77062306a36Sopenharmony_ci	if (IS_ERR(handle)) {
77162306a36Sopenharmony_ci		ret = PTR_ERR(handle);
77262306a36Sopenharmony_ci		goto out;
77362306a36Sopenharmony_ci	}
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	page = find_or_create_page(mapping, index, GFP_NOFS);
77662306a36Sopenharmony_ci	if (!page) {
77762306a36Sopenharmony_ci		ret = -ENOMEM;
77862306a36Sopenharmony_ci		mlog_errno(ret);
77962306a36Sopenharmony_ci		goto out_commit_trans;
78062306a36Sopenharmony_ci	}
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci	/* Get the offsets within the page that we want to zero */
78362306a36Sopenharmony_ci	zero_from = abs_from & (PAGE_SIZE - 1);
78462306a36Sopenharmony_ci	zero_to = abs_to & (PAGE_SIZE - 1);
78562306a36Sopenharmony_ci	if (!zero_to)
78662306a36Sopenharmony_ci		zero_to = PAGE_SIZE;
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_ci	trace_ocfs2_write_zero_page(
78962306a36Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
79062306a36Sopenharmony_ci			(unsigned long long)abs_from,
79162306a36Sopenharmony_ci			(unsigned long long)abs_to,
79262306a36Sopenharmony_ci			index, zero_from, zero_to);
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	/* We know that zero_from is block aligned */
79562306a36Sopenharmony_ci	for (block_start = zero_from; block_start < zero_to;
79662306a36Sopenharmony_ci	     block_start = block_end) {
79762306a36Sopenharmony_ci		block_end = block_start + i_blocksize(inode);
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci		/*
80062306a36Sopenharmony_ci		 * block_start is block-aligned.  Bump it by one to force
80162306a36Sopenharmony_ci		 * __block_write_begin and block_commit_write to zero the
80262306a36Sopenharmony_ci		 * whole block.
80362306a36Sopenharmony_ci		 */
80462306a36Sopenharmony_ci		ret = __block_write_begin(page, block_start + 1, 0,
80562306a36Sopenharmony_ci					  ocfs2_get_block);
80662306a36Sopenharmony_ci		if (ret < 0) {
80762306a36Sopenharmony_ci			mlog_errno(ret);
80862306a36Sopenharmony_ci			goto out_unlock;
80962306a36Sopenharmony_ci		}
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_ci		/* must not update i_size! */
81362306a36Sopenharmony_ci		block_commit_write(page, block_start + 1, block_start + 1);
81462306a36Sopenharmony_ci	}
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci	/*
81762306a36Sopenharmony_ci	 * fs-writeback will release the dirty pages without page lock
81862306a36Sopenharmony_ci	 * whose offset are over inode size, the release happens at
81962306a36Sopenharmony_ci	 * block_write_full_page().
82062306a36Sopenharmony_ci	 */
82162306a36Sopenharmony_ci	i_size_write(inode, abs_to);
82262306a36Sopenharmony_ci	inode->i_blocks = ocfs2_inode_sector_count(inode);
82362306a36Sopenharmony_ci	di->i_size = cpu_to_le64((u64)i_size_read(inode));
82462306a36Sopenharmony_ci	inode->i_mtime = inode_set_ctime_current(inode);
82562306a36Sopenharmony_ci	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
82662306a36Sopenharmony_ci	di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
82762306a36Sopenharmony_ci	di->i_mtime_nsec = di->i_ctime_nsec;
82862306a36Sopenharmony_ci	if (handle) {
82962306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, di_bh);
83062306a36Sopenharmony_ci		ocfs2_update_inode_fsync_trans(handle, inode, 1);
83162306a36Sopenharmony_ci	}
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_ciout_unlock:
83462306a36Sopenharmony_ci	unlock_page(page);
83562306a36Sopenharmony_ci	put_page(page);
83662306a36Sopenharmony_ciout_commit_trans:
83762306a36Sopenharmony_ci	if (handle)
83862306a36Sopenharmony_ci		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
83962306a36Sopenharmony_ciout:
84062306a36Sopenharmony_ci	return ret;
84162306a36Sopenharmony_ci}
84262306a36Sopenharmony_ci
84362306a36Sopenharmony_ci/*
84462306a36Sopenharmony_ci * Find the next range to zero.  We do this in terms of bytes because
84562306a36Sopenharmony_ci * that's what ocfs2_zero_extend() wants, and it is dealing with the
84662306a36Sopenharmony_ci * pagecache.  We may return multiple extents.
84762306a36Sopenharmony_ci *
84862306a36Sopenharmony_ci * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
84962306a36Sopenharmony_ci * needs to be zeroed.  range_start and range_end return the next zeroing
85062306a36Sopenharmony_ci * range.  A subsequent call should pass the previous range_end as its
85162306a36Sopenharmony_ci * zero_start.  If range_end is 0, there's nothing to do.
85262306a36Sopenharmony_ci *
85362306a36Sopenharmony_ci * Unwritten extents are skipped over.  Refcounted extents are CoWd.
85462306a36Sopenharmony_ci */
85562306a36Sopenharmony_cistatic int ocfs2_zero_extend_get_range(struct inode *inode,
85662306a36Sopenharmony_ci				       struct buffer_head *di_bh,
85762306a36Sopenharmony_ci				       u64 zero_start, u64 zero_end,
85862306a36Sopenharmony_ci				       u64 *range_start, u64 *range_end)
85962306a36Sopenharmony_ci{
86062306a36Sopenharmony_ci	int rc = 0, needs_cow = 0;
86162306a36Sopenharmony_ci	u32 p_cpos, zero_clusters = 0;
86262306a36Sopenharmony_ci	u32 zero_cpos =
86362306a36Sopenharmony_ci		zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
86462306a36Sopenharmony_ci	u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
86562306a36Sopenharmony_ci	unsigned int num_clusters = 0;
86662306a36Sopenharmony_ci	unsigned int ext_flags = 0;
86762306a36Sopenharmony_ci
86862306a36Sopenharmony_ci	while (zero_cpos < last_cpos) {
86962306a36Sopenharmony_ci		rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
87062306a36Sopenharmony_ci					&num_clusters, &ext_flags);
87162306a36Sopenharmony_ci		if (rc) {
87262306a36Sopenharmony_ci			mlog_errno(rc);
87362306a36Sopenharmony_ci			goto out;
87462306a36Sopenharmony_ci		}
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_ci		if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
87762306a36Sopenharmony_ci			zero_clusters = num_clusters;
87862306a36Sopenharmony_ci			if (ext_flags & OCFS2_EXT_REFCOUNTED)
87962306a36Sopenharmony_ci				needs_cow = 1;
88062306a36Sopenharmony_ci			break;
88162306a36Sopenharmony_ci		}
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_ci		zero_cpos += num_clusters;
88462306a36Sopenharmony_ci	}
88562306a36Sopenharmony_ci	if (!zero_clusters) {
88662306a36Sopenharmony_ci		*range_end = 0;
88762306a36Sopenharmony_ci		goto out;
88862306a36Sopenharmony_ci	}
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	while ((zero_cpos + zero_clusters) < last_cpos) {
89162306a36Sopenharmony_ci		rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
89262306a36Sopenharmony_ci					&p_cpos, &num_clusters,
89362306a36Sopenharmony_ci					&ext_flags);
89462306a36Sopenharmony_ci		if (rc) {
89562306a36Sopenharmony_ci			mlog_errno(rc);
89662306a36Sopenharmony_ci			goto out;
89762306a36Sopenharmony_ci		}
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci		if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
90062306a36Sopenharmony_ci			break;
90162306a36Sopenharmony_ci		if (ext_flags & OCFS2_EXT_REFCOUNTED)
90262306a36Sopenharmony_ci			needs_cow = 1;
90362306a36Sopenharmony_ci		zero_clusters += num_clusters;
90462306a36Sopenharmony_ci	}
90562306a36Sopenharmony_ci	if ((zero_cpos + zero_clusters) > last_cpos)
90662306a36Sopenharmony_ci		zero_clusters = last_cpos - zero_cpos;
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci	if (needs_cow) {
90962306a36Sopenharmony_ci		rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos,
91062306a36Sopenharmony_ci					zero_clusters, UINT_MAX);
91162306a36Sopenharmony_ci		if (rc) {
91262306a36Sopenharmony_ci			mlog_errno(rc);
91362306a36Sopenharmony_ci			goto out;
91462306a36Sopenharmony_ci		}
91562306a36Sopenharmony_ci	}
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci	*range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
91862306a36Sopenharmony_ci	*range_end = ocfs2_clusters_to_bytes(inode->i_sb,
91962306a36Sopenharmony_ci					     zero_cpos + zero_clusters);
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ciout:
92262306a36Sopenharmony_ci	return rc;
92362306a36Sopenharmony_ci}
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci/*
92662306a36Sopenharmony_ci * Zero one range returned from ocfs2_zero_extend_get_range().  The caller
92762306a36Sopenharmony_ci * has made sure that the entire range needs zeroing.
92862306a36Sopenharmony_ci */
92962306a36Sopenharmony_cistatic int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
93062306a36Sopenharmony_ci				   u64 range_end, struct buffer_head *di_bh)
93162306a36Sopenharmony_ci{
93262306a36Sopenharmony_ci	int rc = 0;
93362306a36Sopenharmony_ci	u64 next_pos;
93462306a36Sopenharmony_ci	u64 zero_pos = range_start;
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	trace_ocfs2_zero_extend_range(
93762306a36Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
93862306a36Sopenharmony_ci			(unsigned long long)range_start,
93962306a36Sopenharmony_ci			(unsigned long long)range_end);
94062306a36Sopenharmony_ci	BUG_ON(range_start >= range_end);
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci	while (zero_pos < range_end) {
94362306a36Sopenharmony_ci		next_pos = (zero_pos & PAGE_MASK) + PAGE_SIZE;
94462306a36Sopenharmony_ci		if (next_pos > range_end)
94562306a36Sopenharmony_ci			next_pos = range_end;
94662306a36Sopenharmony_ci		rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh);
94762306a36Sopenharmony_ci		if (rc < 0) {
94862306a36Sopenharmony_ci			mlog_errno(rc);
94962306a36Sopenharmony_ci			break;
95062306a36Sopenharmony_ci		}
95162306a36Sopenharmony_ci		zero_pos = next_pos;
95262306a36Sopenharmony_ci
95362306a36Sopenharmony_ci		/*
95462306a36Sopenharmony_ci		 * Very large extends have the potential to lock up
95562306a36Sopenharmony_ci		 * the cpu for extended periods of time.
95662306a36Sopenharmony_ci		 */
95762306a36Sopenharmony_ci		cond_resched();
95862306a36Sopenharmony_ci	}
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	return rc;
96162306a36Sopenharmony_ci}
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ciint ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
96462306a36Sopenharmony_ci		      loff_t zero_to_size)
96562306a36Sopenharmony_ci{
96662306a36Sopenharmony_ci	int ret = 0;
96762306a36Sopenharmony_ci	u64 zero_start, range_start = 0, range_end = 0;
96862306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci	zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
97162306a36Sopenharmony_ci	trace_ocfs2_zero_extend((unsigned long long)OCFS2_I(inode)->ip_blkno,
97262306a36Sopenharmony_ci				(unsigned long long)zero_start,
97362306a36Sopenharmony_ci				(unsigned long long)i_size_read(inode));
97462306a36Sopenharmony_ci	while (zero_start < zero_to_size) {
97562306a36Sopenharmony_ci		ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
97662306a36Sopenharmony_ci						  zero_to_size,
97762306a36Sopenharmony_ci						  &range_start,
97862306a36Sopenharmony_ci						  &range_end);
97962306a36Sopenharmony_ci		if (ret) {
98062306a36Sopenharmony_ci			mlog_errno(ret);
98162306a36Sopenharmony_ci			break;
98262306a36Sopenharmony_ci		}
98362306a36Sopenharmony_ci		if (!range_end)
98462306a36Sopenharmony_ci			break;
98562306a36Sopenharmony_ci		/* Trim the ends */
98662306a36Sopenharmony_ci		if (range_start < zero_start)
98762306a36Sopenharmony_ci			range_start = zero_start;
98862306a36Sopenharmony_ci		if (range_end > zero_to_size)
98962306a36Sopenharmony_ci			range_end = zero_to_size;
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ci		ret = ocfs2_zero_extend_range(inode, range_start,
99262306a36Sopenharmony_ci					      range_end, di_bh);
99362306a36Sopenharmony_ci		if (ret) {
99462306a36Sopenharmony_ci			mlog_errno(ret);
99562306a36Sopenharmony_ci			break;
99662306a36Sopenharmony_ci		}
99762306a36Sopenharmony_ci		zero_start = range_end;
99862306a36Sopenharmony_ci	}
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci	return ret;
100162306a36Sopenharmony_ci}
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ciint ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
100462306a36Sopenharmony_ci			  u64 new_i_size, u64 zero_to)
100562306a36Sopenharmony_ci{
100662306a36Sopenharmony_ci	int ret;
100762306a36Sopenharmony_ci	u32 clusters_to_add;
100862306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
100962306a36Sopenharmony_ci
101062306a36Sopenharmony_ci	/*
101162306a36Sopenharmony_ci	 * Only quota files call this without a bh, and they can't be
101262306a36Sopenharmony_ci	 * refcounted.
101362306a36Sopenharmony_ci	 */
101462306a36Sopenharmony_ci	BUG_ON(!di_bh && ocfs2_is_refcount_inode(inode));
101562306a36Sopenharmony_ci	BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
101862306a36Sopenharmony_ci	if (clusters_to_add < oi->ip_clusters)
101962306a36Sopenharmony_ci		clusters_to_add = 0;
102062306a36Sopenharmony_ci	else
102162306a36Sopenharmony_ci		clusters_to_add -= oi->ip_clusters;
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci	if (clusters_to_add) {
102462306a36Sopenharmony_ci		ret = ocfs2_extend_allocation(inode, oi->ip_clusters,
102562306a36Sopenharmony_ci					      clusters_to_add, 0);
102662306a36Sopenharmony_ci		if (ret) {
102762306a36Sopenharmony_ci			mlog_errno(ret);
102862306a36Sopenharmony_ci			goto out;
102962306a36Sopenharmony_ci		}
103062306a36Sopenharmony_ci	}
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci	/*
103362306a36Sopenharmony_ci	 * Call this even if we don't add any clusters to the tree. We
103462306a36Sopenharmony_ci	 * still need to zero the area between the old i_size and the
103562306a36Sopenharmony_ci	 * new i_size.
103662306a36Sopenharmony_ci	 */
103762306a36Sopenharmony_ci	ret = ocfs2_zero_extend(inode, di_bh, zero_to);
103862306a36Sopenharmony_ci	if (ret < 0)
103962306a36Sopenharmony_ci		mlog_errno(ret);
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ciout:
104262306a36Sopenharmony_ci	return ret;
104362306a36Sopenharmony_ci}
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_cistatic int ocfs2_extend_file(struct inode *inode,
104662306a36Sopenharmony_ci			     struct buffer_head *di_bh,
104762306a36Sopenharmony_ci			     u64 new_i_size)
104862306a36Sopenharmony_ci{
104962306a36Sopenharmony_ci	int ret = 0;
105062306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci	BUG_ON(!di_bh);
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_ci	/* setattr sometimes calls us like this. */
105562306a36Sopenharmony_ci	if (new_i_size == 0)
105662306a36Sopenharmony_ci		goto out;
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_ci	if (i_size_read(inode) == new_i_size)
105962306a36Sopenharmony_ci		goto out;
106062306a36Sopenharmony_ci	BUG_ON(new_i_size < i_size_read(inode));
106162306a36Sopenharmony_ci
106262306a36Sopenharmony_ci	/*
106362306a36Sopenharmony_ci	 * The alloc sem blocks people in read/write from reading our
106462306a36Sopenharmony_ci	 * allocation until we're done changing it. We depend on
106562306a36Sopenharmony_ci	 * i_rwsem to block other extend/truncate calls while we're
106662306a36Sopenharmony_ci	 * here.  We even have to hold it for sparse files because there
106762306a36Sopenharmony_ci	 * might be some tail zeroing.
106862306a36Sopenharmony_ci	 */
106962306a36Sopenharmony_ci	down_write(&oi->ip_alloc_sem);
107062306a36Sopenharmony_ci
107162306a36Sopenharmony_ci	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
107262306a36Sopenharmony_ci		/*
107362306a36Sopenharmony_ci		 * We can optimize small extends by keeping the inodes
107462306a36Sopenharmony_ci		 * inline data.
107562306a36Sopenharmony_ci		 */
107662306a36Sopenharmony_ci		if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) {
107762306a36Sopenharmony_ci			up_write(&oi->ip_alloc_sem);
107862306a36Sopenharmony_ci			goto out_update_size;
107962306a36Sopenharmony_ci		}
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_ci		ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
108262306a36Sopenharmony_ci		if (ret) {
108362306a36Sopenharmony_ci			up_write(&oi->ip_alloc_sem);
108462306a36Sopenharmony_ci			mlog_errno(ret);
108562306a36Sopenharmony_ci			goto out;
108662306a36Sopenharmony_ci		}
108762306a36Sopenharmony_ci	}
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_ci	if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
109062306a36Sopenharmony_ci		ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
109162306a36Sopenharmony_ci	else
109262306a36Sopenharmony_ci		ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
109362306a36Sopenharmony_ci					    new_i_size);
109462306a36Sopenharmony_ci
109562306a36Sopenharmony_ci	up_write(&oi->ip_alloc_sem);
109662306a36Sopenharmony_ci
109762306a36Sopenharmony_ci	if (ret < 0) {
109862306a36Sopenharmony_ci		mlog_errno(ret);
109962306a36Sopenharmony_ci		goto out;
110062306a36Sopenharmony_ci	}
110162306a36Sopenharmony_ci
110262306a36Sopenharmony_ciout_update_size:
110362306a36Sopenharmony_ci	ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
110462306a36Sopenharmony_ci	if (ret < 0)
110562306a36Sopenharmony_ci		mlog_errno(ret);
110662306a36Sopenharmony_ci
110762306a36Sopenharmony_ciout:
110862306a36Sopenharmony_ci	return ret;
110962306a36Sopenharmony_ci}
111062306a36Sopenharmony_ci
111162306a36Sopenharmony_ciint ocfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
111262306a36Sopenharmony_ci		  struct iattr *attr)
111362306a36Sopenharmony_ci{
111462306a36Sopenharmony_ci	int status = 0, size_change;
111562306a36Sopenharmony_ci	int inode_locked = 0;
111662306a36Sopenharmony_ci	struct inode *inode = d_inode(dentry);
111762306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
111862306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(sb);
111962306a36Sopenharmony_ci	struct buffer_head *bh = NULL;
112062306a36Sopenharmony_ci	handle_t *handle = NULL;
112162306a36Sopenharmony_ci	struct dquot *transfer_to[MAXQUOTAS] = { };
112262306a36Sopenharmony_ci	int qtype;
112362306a36Sopenharmony_ci	int had_lock;
112462306a36Sopenharmony_ci	struct ocfs2_lock_holder oh;
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci	trace_ocfs2_setattr(inode, dentry,
112762306a36Sopenharmony_ci			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
112862306a36Sopenharmony_ci			    dentry->d_name.len, dentry->d_name.name,
112962306a36Sopenharmony_ci			    attr->ia_valid, attr->ia_mode,
113062306a36Sopenharmony_ci			    from_kuid(&init_user_ns, attr->ia_uid),
113162306a36Sopenharmony_ci			    from_kgid(&init_user_ns, attr->ia_gid));
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ci	/* ensuring we don't even attempt to truncate a symlink */
113462306a36Sopenharmony_ci	if (S_ISLNK(inode->i_mode))
113562306a36Sopenharmony_ci		attr->ia_valid &= ~ATTR_SIZE;
113662306a36Sopenharmony_ci
113762306a36Sopenharmony_ci#define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
113862306a36Sopenharmony_ci			   | ATTR_GID | ATTR_UID | ATTR_MODE)
113962306a36Sopenharmony_ci	if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
114062306a36Sopenharmony_ci		return 0;
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	status = setattr_prepare(&nop_mnt_idmap, dentry, attr);
114362306a36Sopenharmony_ci	if (status)
114462306a36Sopenharmony_ci		return status;
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_ci	if (is_quota_modification(&nop_mnt_idmap, inode, attr)) {
114762306a36Sopenharmony_ci		status = dquot_initialize(inode);
114862306a36Sopenharmony_ci		if (status)
114962306a36Sopenharmony_ci			return status;
115062306a36Sopenharmony_ci	}
115162306a36Sopenharmony_ci	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
115262306a36Sopenharmony_ci	if (size_change) {
115362306a36Sopenharmony_ci		/*
115462306a36Sopenharmony_ci		 * Here we should wait dio to finish before inode lock
115562306a36Sopenharmony_ci		 * to avoid a deadlock between ocfs2_setattr() and
115662306a36Sopenharmony_ci		 * ocfs2_dio_end_io_write()
115762306a36Sopenharmony_ci		 */
115862306a36Sopenharmony_ci		inode_dio_wait(inode);
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci		status = ocfs2_rw_lock(inode, 1);
116162306a36Sopenharmony_ci		if (status < 0) {
116262306a36Sopenharmony_ci			mlog_errno(status);
116362306a36Sopenharmony_ci			goto bail;
116462306a36Sopenharmony_ci		}
116562306a36Sopenharmony_ci	}
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_ci	had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh);
116862306a36Sopenharmony_ci	if (had_lock < 0) {
116962306a36Sopenharmony_ci		status = had_lock;
117062306a36Sopenharmony_ci		goto bail_unlock_rw;
117162306a36Sopenharmony_ci	} else if (had_lock) {
117262306a36Sopenharmony_ci		/*
117362306a36Sopenharmony_ci		 * As far as we know, ocfs2_setattr() could only be the first
117462306a36Sopenharmony_ci		 * VFS entry point in the call chain of recursive cluster
117562306a36Sopenharmony_ci		 * locking issue.
117662306a36Sopenharmony_ci		 *
117762306a36Sopenharmony_ci		 * For instance:
117862306a36Sopenharmony_ci		 * chmod_common()
117962306a36Sopenharmony_ci		 *  notify_change()
118062306a36Sopenharmony_ci		 *   ocfs2_setattr()
118162306a36Sopenharmony_ci		 *    posix_acl_chmod()
118262306a36Sopenharmony_ci		 *     ocfs2_iop_get_acl()
118362306a36Sopenharmony_ci		 *
118462306a36Sopenharmony_ci		 * But, we're not 100% sure if it's always true, because the
118562306a36Sopenharmony_ci		 * ordering of the VFS entry points in the call chain is out
118662306a36Sopenharmony_ci		 * of our control. So, we'd better dump the stack here to
118762306a36Sopenharmony_ci		 * catch the other cases of recursive locking.
118862306a36Sopenharmony_ci		 */
118962306a36Sopenharmony_ci		mlog(ML_ERROR, "Another case of recursive locking:\n");
119062306a36Sopenharmony_ci		dump_stack();
119162306a36Sopenharmony_ci	}
119262306a36Sopenharmony_ci	inode_locked = 1;
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_ci	if (size_change) {
119562306a36Sopenharmony_ci		status = inode_newsize_ok(inode, attr->ia_size);
119662306a36Sopenharmony_ci		if (status)
119762306a36Sopenharmony_ci			goto bail_unlock;
119862306a36Sopenharmony_ci
119962306a36Sopenharmony_ci		if (i_size_read(inode) >= attr->ia_size) {
120062306a36Sopenharmony_ci			if (ocfs2_should_order_data(inode)) {
120162306a36Sopenharmony_ci				status = ocfs2_begin_ordered_truncate(inode,
120262306a36Sopenharmony_ci								      attr->ia_size);
120362306a36Sopenharmony_ci				if (status)
120462306a36Sopenharmony_ci					goto bail_unlock;
120562306a36Sopenharmony_ci			}
120662306a36Sopenharmony_ci			status = ocfs2_truncate_file(inode, bh, attr->ia_size);
120762306a36Sopenharmony_ci		} else
120862306a36Sopenharmony_ci			status = ocfs2_extend_file(inode, bh, attr->ia_size);
120962306a36Sopenharmony_ci		if (status < 0) {
121062306a36Sopenharmony_ci			if (status != -ENOSPC)
121162306a36Sopenharmony_ci				mlog_errno(status);
121262306a36Sopenharmony_ci			status = -ENOSPC;
121362306a36Sopenharmony_ci			goto bail_unlock;
121462306a36Sopenharmony_ci		}
121562306a36Sopenharmony_ci	}
121662306a36Sopenharmony_ci
121762306a36Sopenharmony_ci	if ((attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
121862306a36Sopenharmony_ci	    (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
121962306a36Sopenharmony_ci		/*
122062306a36Sopenharmony_ci		 * Gather pointers to quota structures so that allocation /
122162306a36Sopenharmony_ci		 * freeing of quota structures happens here and not inside
122262306a36Sopenharmony_ci		 * dquot_transfer() where we have problems with lock ordering
122362306a36Sopenharmony_ci		 */
122462306a36Sopenharmony_ci		if (attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)
122562306a36Sopenharmony_ci		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
122662306a36Sopenharmony_ci		    OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
122762306a36Sopenharmony_ci			transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid));
122862306a36Sopenharmony_ci			if (IS_ERR(transfer_to[USRQUOTA])) {
122962306a36Sopenharmony_ci				status = PTR_ERR(transfer_to[USRQUOTA]);
123062306a36Sopenharmony_ci				transfer_to[USRQUOTA] = NULL;
123162306a36Sopenharmony_ci				goto bail_unlock;
123262306a36Sopenharmony_ci			}
123362306a36Sopenharmony_ci		}
123462306a36Sopenharmony_ci		if (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid)
123562306a36Sopenharmony_ci		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
123662306a36Sopenharmony_ci		    OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
123762306a36Sopenharmony_ci			transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid));
123862306a36Sopenharmony_ci			if (IS_ERR(transfer_to[GRPQUOTA])) {
123962306a36Sopenharmony_ci				status = PTR_ERR(transfer_to[GRPQUOTA]);
124062306a36Sopenharmony_ci				transfer_to[GRPQUOTA] = NULL;
124162306a36Sopenharmony_ci				goto bail_unlock;
124262306a36Sopenharmony_ci			}
124362306a36Sopenharmony_ci		}
124462306a36Sopenharmony_ci		down_write(&OCFS2_I(inode)->ip_alloc_sem);
124562306a36Sopenharmony_ci		handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS +
124662306a36Sopenharmony_ci					   2 * ocfs2_quota_trans_credits(sb));
124762306a36Sopenharmony_ci		if (IS_ERR(handle)) {
124862306a36Sopenharmony_ci			status = PTR_ERR(handle);
124962306a36Sopenharmony_ci			mlog_errno(status);
125062306a36Sopenharmony_ci			goto bail_unlock_alloc;
125162306a36Sopenharmony_ci		}
125262306a36Sopenharmony_ci		status = __dquot_transfer(inode, transfer_to);
125362306a36Sopenharmony_ci		if (status < 0)
125462306a36Sopenharmony_ci			goto bail_commit;
125562306a36Sopenharmony_ci	} else {
125662306a36Sopenharmony_ci		down_write(&OCFS2_I(inode)->ip_alloc_sem);
125762306a36Sopenharmony_ci		handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
125862306a36Sopenharmony_ci		if (IS_ERR(handle)) {
125962306a36Sopenharmony_ci			status = PTR_ERR(handle);
126062306a36Sopenharmony_ci			mlog_errno(status);
126162306a36Sopenharmony_ci			goto bail_unlock_alloc;
126262306a36Sopenharmony_ci		}
126362306a36Sopenharmony_ci	}
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ci	setattr_copy(&nop_mnt_idmap, inode, attr);
126662306a36Sopenharmony_ci	mark_inode_dirty(inode);
126762306a36Sopenharmony_ci
126862306a36Sopenharmony_ci	status = ocfs2_mark_inode_dirty(handle, inode, bh);
126962306a36Sopenharmony_ci	if (status < 0)
127062306a36Sopenharmony_ci		mlog_errno(status);
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_cibail_commit:
127362306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
127462306a36Sopenharmony_cibail_unlock_alloc:
127562306a36Sopenharmony_ci	up_write(&OCFS2_I(inode)->ip_alloc_sem);
127662306a36Sopenharmony_cibail_unlock:
127762306a36Sopenharmony_ci	if (status && inode_locked) {
127862306a36Sopenharmony_ci		ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
127962306a36Sopenharmony_ci		inode_locked = 0;
128062306a36Sopenharmony_ci	}
128162306a36Sopenharmony_cibail_unlock_rw:
128262306a36Sopenharmony_ci	if (size_change)
128362306a36Sopenharmony_ci		ocfs2_rw_unlock(inode, 1);
128462306a36Sopenharmony_cibail:
128562306a36Sopenharmony_ci
128662306a36Sopenharmony_ci	/* Release quota pointers in case we acquired them */
128762306a36Sopenharmony_ci	for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++)
128862306a36Sopenharmony_ci		dqput(transfer_to[qtype]);
128962306a36Sopenharmony_ci
129062306a36Sopenharmony_ci	if (!status && attr->ia_valid & ATTR_MODE) {
129162306a36Sopenharmony_ci		status = ocfs2_acl_chmod(inode, bh);
129262306a36Sopenharmony_ci		if (status < 0)
129362306a36Sopenharmony_ci			mlog_errno(status);
129462306a36Sopenharmony_ci	}
129562306a36Sopenharmony_ci	if (inode_locked)
129662306a36Sopenharmony_ci		ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
129762306a36Sopenharmony_ci
129862306a36Sopenharmony_ci	brelse(bh);
129962306a36Sopenharmony_ci	return status;
130062306a36Sopenharmony_ci}
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ciint ocfs2_getattr(struct mnt_idmap *idmap, const struct path *path,
130362306a36Sopenharmony_ci		  struct kstat *stat, u32 request_mask, unsigned int flags)
130462306a36Sopenharmony_ci{
130562306a36Sopenharmony_ci	struct inode *inode = d_inode(path->dentry);
130662306a36Sopenharmony_ci	struct super_block *sb = path->dentry->d_sb;
130762306a36Sopenharmony_ci	struct ocfs2_super *osb = sb->s_fs_info;
130862306a36Sopenharmony_ci	int err;
130962306a36Sopenharmony_ci
131062306a36Sopenharmony_ci	err = ocfs2_inode_revalidate(path->dentry);
131162306a36Sopenharmony_ci	if (err) {
131262306a36Sopenharmony_ci		if (err != -ENOENT)
131362306a36Sopenharmony_ci			mlog_errno(err);
131462306a36Sopenharmony_ci		goto bail;
131562306a36Sopenharmony_ci	}
131662306a36Sopenharmony_ci
131762306a36Sopenharmony_ci	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
131862306a36Sopenharmony_ci	/*
131962306a36Sopenharmony_ci	 * If there is inline data in the inode, the inode will normally not
132062306a36Sopenharmony_ci	 * have data blocks allocated (it may have an external xattr block).
132162306a36Sopenharmony_ci	 * Report at least one sector for such files, so tools like tar, rsync,
132262306a36Sopenharmony_ci	 * others don't incorrectly think the file is completely sparse.
132362306a36Sopenharmony_ci	 */
132462306a36Sopenharmony_ci	if (unlikely(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
132562306a36Sopenharmony_ci		stat->blocks += (stat->size + 511)>>9;
132662306a36Sopenharmony_ci
132762306a36Sopenharmony_ci	/* We set the blksize from the cluster size for performance */
132862306a36Sopenharmony_ci	stat->blksize = osb->s_clustersize;
132962306a36Sopenharmony_ci
133062306a36Sopenharmony_cibail:
133162306a36Sopenharmony_ci	return err;
133262306a36Sopenharmony_ci}
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ciint ocfs2_permission(struct mnt_idmap *idmap, struct inode *inode,
133562306a36Sopenharmony_ci		     int mask)
133662306a36Sopenharmony_ci{
133762306a36Sopenharmony_ci	int ret, had_lock;
133862306a36Sopenharmony_ci	struct ocfs2_lock_holder oh;
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci	if (mask & MAY_NOT_BLOCK)
134162306a36Sopenharmony_ci		return -ECHILD;
134262306a36Sopenharmony_ci
134362306a36Sopenharmony_ci	had_lock = ocfs2_inode_lock_tracker(inode, NULL, 0, &oh);
134462306a36Sopenharmony_ci	if (had_lock < 0) {
134562306a36Sopenharmony_ci		ret = had_lock;
134662306a36Sopenharmony_ci		goto out;
134762306a36Sopenharmony_ci	} else if (had_lock) {
134862306a36Sopenharmony_ci		/* See comments in ocfs2_setattr() for details.
134962306a36Sopenharmony_ci		 * The call chain of this case could be:
135062306a36Sopenharmony_ci		 * do_sys_open()
135162306a36Sopenharmony_ci		 *  may_open()
135262306a36Sopenharmony_ci		 *   inode_permission()
135362306a36Sopenharmony_ci		 *    ocfs2_permission()
135462306a36Sopenharmony_ci		 *     ocfs2_iop_get_acl()
135562306a36Sopenharmony_ci		 */
135662306a36Sopenharmony_ci		mlog(ML_ERROR, "Another case of recursive locking:\n");
135762306a36Sopenharmony_ci		dump_stack();
135862306a36Sopenharmony_ci	}
135962306a36Sopenharmony_ci
136062306a36Sopenharmony_ci	ret = generic_permission(&nop_mnt_idmap, inode, mask);
136162306a36Sopenharmony_ci
136262306a36Sopenharmony_ci	ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
136362306a36Sopenharmony_ciout:
136462306a36Sopenharmony_ci	return ret;
136562306a36Sopenharmony_ci}
136662306a36Sopenharmony_ci
136762306a36Sopenharmony_cistatic int __ocfs2_write_remove_suid(struct inode *inode,
136862306a36Sopenharmony_ci				     struct buffer_head *bh)
136962306a36Sopenharmony_ci{
137062306a36Sopenharmony_ci	int ret;
137162306a36Sopenharmony_ci	handle_t *handle;
137262306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
137362306a36Sopenharmony_ci	struct ocfs2_dinode *di;
137462306a36Sopenharmony_ci
137562306a36Sopenharmony_ci	trace_ocfs2_write_remove_suid(
137662306a36Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
137762306a36Sopenharmony_ci			inode->i_mode);
137862306a36Sopenharmony_ci
137962306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
138062306a36Sopenharmony_ci	if (IS_ERR(handle)) {
138162306a36Sopenharmony_ci		ret = PTR_ERR(handle);
138262306a36Sopenharmony_ci		mlog_errno(ret);
138362306a36Sopenharmony_ci		goto out;
138462306a36Sopenharmony_ci	}
138562306a36Sopenharmony_ci
138662306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
138762306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
138862306a36Sopenharmony_ci	if (ret < 0) {
138962306a36Sopenharmony_ci		mlog_errno(ret);
139062306a36Sopenharmony_ci		goto out_trans;
139162306a36Sopenharmony_ci	}
139262306a36Sopenharmony_ci
139362306a36Sopenharmony_ci	inode->i_mode &= ~S_ISUID;
139462306a36Sopenharmony_ci	if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP))
139562306a36Sopenharmony_ci		inode->i_mode &= ~S_ISGID;
139662306a36Sopenharmony_ci
139762306a36Sopenharmony_ci	di = (struct ocfs2_dinode *) bh->b_data;
139862306a36Sopenharmony_ci	di->i_mode = cpu_to_le16(inode->i_mode);
139962306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 0);
140062306a36Sopenharmony_ci
140162306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, bh);
140262306a36Sopenharmony_ci
140362306a36Sopenharmony_ciout_trans:
140462306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
140562306a36Sopenharmony_ciout:
140662306a36Sopenharmony_ci	return ret;
140762306a36Sopenharmony_ci}
140862306a36Sopenharmony_ci
140962306a36Sopenharmony_cistatic int ocfs2_write_remove_suid(struct inode *inode)
141062306a36Sopenharmony_ci{
141162306a36Sopenharmony_ci	int ret;
141262306a36Sopenharmony_ci	struct buffer_head *bh = NULL;
141362306a36Sopenharmony_ci
141462306a36Sopenharmony_ci	ret = ocfs2_read_inode_block(inode, &bh);
141562306a36Sopenharmony_ci	if (ret < 0) {
141662306a36Sopenharmony_ci		mlog_errno(ret);
141762306a36Sopenharmony_ci		goto out;
141862306a36Sopenharmony_ci	}
141962306a36Sopenharmony_ci
142062306a36Sopenharmony_ci	ret =  __ocfs2_write_remove_suid(inode, bh);
142162306a36Sopenharmony_ciout:
142262306a36Sopenharmony_ci	brelse(bh);
142362306a36Sopenharmony_ci	return ret;
142462306a36Sopenharmony_ci}
142562306a36Sopenharmony_ci
142662306a36Sopenharmony_ci/*
142762306a36Sopenharmony_ci * Allocate enough extents to cover the region starting at byte offset
142862306a36Sopenharmony_ci * start for len bytes. Existing extents are skipped, any extents
142962306a36Sopenharmony_ci * added are marked as "unwritten".
143062306a36Sopenharmony_ci */
143162306a36Sopenharmony_cistatic int ocfs2_allocate_unwritten_extents(struct inode *inode,
143262306a36Sopenharmony_ci					    u64 start, u64 len)
143362306a36Sopenharmony_ci{
143462306a36Sopenharmony_ci	int ret;
143562306a36Sopenharmony_ci	u32 cpos, phys_cpos, clusters, alloc_size;
143662306a36Sopenharmony_ci	u64 end = start + len;
143762306a36Sopenharmony_ci	struct buffer_head *di_bh = NULL;
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
144062306a36Sopenharmony_ci		ret = ocfs2_read_inode_block(inode, &di_bh);
144162306a36Sopenharmony_ci		if (ret) {
144262306a36Sopenharmony_ci			mlog_errno(ret);
144362306a36Sopenharmony_ci			goto out;
144462306a36Sopenharmony_ci		}
144562306a36Sopenharmony_ci
144662306a36Sopenharmony_ci		/*
144762306a36Sopenharmony_ci		 * Nothing to do if the requested reservation range
144862306a36Sopenharmony_ci		 * fits within the inode.
144962306a36Sopenharmony_ci		 */
145062306a36Sopenharmony_ci		if (ocfs2_size_fits_inline_data(di_bh, end))
145162306a36Sopenharmony_ci			goto out;
145262306a36Sopenharmony_ci
145362306a36Sopenharmony_ci		ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
145462306a36Sopenharmony_ci		if (ret) {
145562306a36Sopenharmony_ci			mlog_errno(ret);
145662306a36Sopenharmony_ci			goto out;
145762306a36Sopenharmony_ci		}
145862306a36Sopenharmony_ci	}
145962306a36Sopenharmony_ci
146062306a36Sopenharmony_ci	/*
146162306a36Sopenharmony_ci	 * We consider both start and len to be inclusive.
146262306a36Sopenharmony_ci	 */
146362306a36Sopenharmony_ci	cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
146462306a36Sopenharmony_ci	clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len);
146562306a36Sopenharmony_ci	clusters -= cpos;
146662306a36Sopenharmony_ci
146762306a36Sopenharmony_ci	while (clusters) {
146862306a36Sopenharmony_ci		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
146962306a36Sopenharmony_ci					 &alloc_size, NULL);
147062306a36Sopenharmony_ci		if (ret) {
147162306a36Sopenharmony_ci			mlog_errno(ret);
147262306a36Sopenharmony_ci			goto out;
147362306a36Sopenharmony_ci		}
147462306a36Sopenharmony_ci
147562306a36Sopenharmony_ci		/*
147662306a36Sopenharmony_ci		 * Hole or existing extent len can be arbitrary, so
147762306a36Sopenharmony_ci		 * cap it to our own allocation request.
147862306a36Sopenharmony_ci		 */
147962306a36Sopenharmony_ci		if (alloc_size > clusters)
148062306a36Sopenharmony_ci			alloc_size = clusters;
148162306a36Sopenharmony_ci
148262306a36Sopenharmony_ci		if (phys_cpos) {
148362306a36Sopenharmony_ci			/*
148462306a36Sopenharmony_ci			 * We already have an allocation at this
148562306a36Sopenharmony_ci			 * region so we can safely skip it.
148662306a36Sopenharmony_ci			 */
148762306a36Sopenharmony_ci			goto next;
148862306a36Sopenharmony_ci		}
148962306a36Sopenharmony_ci
149062306a36Sopenharmony_ci		ret = ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
149162306a36Sopenharmony_ci		if (ret) {
149262306a36Sopenharmony_ci			if (ret != -ENOSPC)
149362306a36Sopenharmony_ci				mlog_errno(ret);
149462306a36Sopenharmony_ci			goto out;
149562306a36Sopenharmony_ci		}
149662306a36Sopenharmony_ci
149762306a36Sopenharmony_cinext:
149862306a36Sopenharmony_ci		cpos += alloc_size;
149962306a36Sopenharmony_ci		clusters -= alloc_size;
150062306a36Sopenharmony_ci	}
150162306a36Sopenharmony_ci
150262306a36Sopenharmony_ci	ret = 0;
150362306a36Sopenharmony_ciout:
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_ci	brelse(di_bh);
150662306a36Sopenharmony_ci	return ret;
150762306a36Sopenharmony_ci}
150862306a36Sopenharmony_ci
150962306a36Sopenharmony_ci/*
151062306a36Sopenharmony_ci * Truncate a byte range, avoiding pages within partial clusters. This
151162306a36Sopenharmony_ci * preserves those pages for the zeroing code to write to.
151262306a36Sopenharmony_ci */
151362306a36Sopenharmony_cistatic void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
151462306a36Sopenharmony_ci					 u64 byte_len)
151562306a36Sopenharmony_ci{
151662306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
151762306a36Sopenharmony_ci	loff_t start, end;
151862306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
151962306a36Sopenharmony_ci
152062306a36Sopenharmony_ci	start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start);
152162306a36Sopenharmony_ci	end = byte_start + byte_len;
152262306a36Sopenharmony_ci	end = end & ~(osb->s_clustersize - 1);
152362306a36Sopenharmony_ci
152462306a36Sopenharmony_ci	if (start < end) {
152562306a36Sopenharmony_ci		unmap_mapping_range(mapping, start, end - start, 0);
152662306a36Sopenharmony_ci		truncate_inode_pages_range(mapping, start, end - 1);
152762306a36Sopenharmony_ci	}
152862306a36Sopenharmony_ci}
152962306a36Sopenharmony_ci
153062306a36Sopenharmony_ci/*
153162306a36Sopenharmony_ci * zero out partial blocks of one cluster.
153262306a36Sopenharmony_ci *
153362306a36Sopenharmony_ci * start: file offset where zero starts, will be made upper block aligned.
153462306a36Sopenharmony_ci * len: it will be trimmed to the end of current cluster if "start + len"
153562306a36Sopenharmony_ci *      is bigger than it.
153662306a36Sopenharmony_ci */
153762306a36Sopenharmony_cistatic int ocfs2_zeroout_partial_cluster(struct inode *inode,
153862306a36Sopenharmony_ci					u64 start, u64 len)
153962306a36Sopenharmony_ci{
154062306a36Sopenharmony_ci	int ret;
154162306a36Sopenharmony_ci	u64 start_block, end_block, nr_blocks;
154262306a36Sopenharmony_ci	u64 p_block, offset;
154362306a36Sopenharmony_ci	u32 cluster, p_cluster, nr_clusters;
154462306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
154562306a36Sopenharmony_ci	u64 end = ocfs2_align_bytes_to_clusters(sb, start);
154662306a36Sopenharmony_ci
154762306a36Sopenharmony_ci	if (start + len < end)
154862306a36Sopenharmony_ci		end = start + len;
154962306a36Sopenharmony_ci
155062306a36Sopenharmony_ci	start_block = ocfs2_blocks_for_bytes(sb, start);
155162306a36Sopenharmony_ci	end_block = ocfs2_blocks_for_bytes(sb, end);
155262306a36Sopenharmony_ci	nr_blocks = end_block - start_block;
155362306a36Sopenharmony_ci	if (!nr_blocks)
155462306a36Sopenharmony_ci		return 0;
155562306a36Sopenharmony_ci
155662306a36Sopenharmony_ci	cluster = ocfs2_bytes_to_clusters(sb, start);
155762306a36Sopenharmony_ci	ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
155862306a36Sopenharmony_ci				&nr_clusters, NULL);
155962306a36Sopenharmony_ci	if (ret)
156062306a36Sopenharmony_ci		return ret;
156162306a36Sopenharmony_ci	if (!p_cluster)
156262306a36Sopenharmony_ci		return 0;
156362306a36Sopenharmony_ci
156462306a36Sopenharmony_ci	offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
156562306a36Sopenharmony_ci	p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
156662306a36Sopenharmony_ci	return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
156762306a36Sopenharmony_ci}
156862306a36Sopenharmony_ci
156962306a36Sopenharmony_cistatic int ocfs2_zero_partial_clusters(struct inode *inode,
157062306a36Sopenharmony_ci				       u64 start, u64 len)
157162306a36Sopenharmony_ci{
157262306a36Sopenharmony_ci	int ret = 0;
157362306a36Sopenharmony_ci	u64 tmpend = 0;
157462306a36Sopenharmony_ci	u64 end = start + len;
157562306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
157662306a36Sopenharmony_ci	unsigned int csize = osb->s_clustersize;
157762306a36Sopenharmony_ci	handle_t *handle;
157862306a36Sopenharmony_ci	loff_t isize = i_size_read(inode);
157962306a36Sopenharmony_ci
158062306a36Sopenharmony_ci	/*
158162306a36Sopenharmony_ci	 * The "start" and "end" values are NOT necessarily part of
158262306a36Sopenharmony_ci	 * the range whose allocation is being deleted. Rather, this
158362306a36Sopenharmony_ci	 * is what the user passed in with the request. We must zero
158462306a36Sopenharmony_ci	 * partial clusters here. There's no need to worry about
158562306a36Sopenharmony_ci	 * physical allocation - the zeroing code knows to skip holes.
158662306a36Sopenharmony_ci	 */
158762306a36Sopenharmony_ci	trace_ocfs2_zero_partial_clusters(
158862306a36Sopenharmony_ci		(unsigned long long)OCFS2_I(inode)->ip_blkno,
158962306a36Sopenharmony_ci		(unsigned long long)start, (unsigned long long)end);
159062306a36Sopenharmony_ci
159162306a36Sopenharmony_ci	/*
159262306a36Sopenharmony_ci	 * If both edges are on a cluster boundary then there's no
159362306a36Sopenharmony_ci	 * zeroing required as the region is part of the allocation to
159462306a36Sopenharmony_ci	 * be truncated.
159562306a36Sopenharmony_ci	 */
159662306a36Sopenharmony_ci	if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
159762306a36Sopenharmony_ci		goto out;
159862306a36Sopenharmony_ci
159962306a36Sopenharmony_ci	/* No page cache for EOF blocks, issue zero out to disk. */
160062306a36Sopenharmony_ci	if (end > isize) {
160162306a36Sopenharmony_ci		/*
160262306a36Sopenharmony_ci		 * zeroout eof blocks in last cluster starting from
160362306a36Sopenharmony_ci		 * "isize" even "start" > "isize" because it is
160462306a36Sopenharmony_ci		 * complicated to zeroout just at "start" as "start"
160562306a36Sopenharmony_ci		 * may be not aligned with block size, buffer write
160662306a36Sopenharmony_ci		 * would be required to do that, but out of eof buffer
160762306a36Sopenharmony_ci		 * write is not supported.
160862306a36Sopenharmony_ci		 */
160962306a36Sopenharmony_ci		ret = ocfs2_zeroout_partial_cluster(inode, isize,
161062306a36Sopenharmony_ci					end - isize);
161162306a36Sopenharmony_ci		if (ret) {
161262306a36Sopenharmony_ci			mlog_errno(ret);
161362306a36Sopenharmony_ci			goto out;
161462306a36Sopenharmony_ci		}
161562306a36Sopenharmony_ci		if (start >= isize)
161662306a36Sopenharmony_ci			goto out;
161762306a36Sopenharmony_ci		end = isize;
161862306a36Sopenharmony_ci	}
161962306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
162062306a36Sopenharmony_ci	if (IS_ERR(handle)) {
162162306a36Sopenharmony_ci		ret = PTR_ERR(handle);
162262306a36Sopenharmony_ci		mlog_errno(ret);
162362306a36Sopenharmony_ci		goto out;
162462306a36Sopenharmony_ci	}
162562306a36Sopenharmony_ci
162662306a36Sopenharmony_ci	/*
162762306a36Sopenharmony_ci	 * If start is on a cluster boundary and end is somewhere in another
162862306a36Sopenharmony_ci	 * cluster, we have not COWed the cluster starting at start, unless
162962306a36Sopenharmony_ci	 * end is also within the same cluster. So, in this case, we skip this
163062306a36Sopenharmony_ci	 * first call to ocfs2_zero_range_for_truncate() truncate and move on
163162306a36Sopenharmony_ci	 * to the next one.
163262306a36Sopenharmony_ci	 */
163362306a36Sopenharmony_ci	if ((start & (csize - 1)) != 0) {
163462306a36Sopenharmony_ci		/*
163562306a36Sopenharmony_ci		 * We want to get the byte offset of the end of the 1st
163662306a36Sopenharmony_ci		 * cluster.
163762306a36Sopenharmony_ci		 */
163862306a36Sopenharmony_ci		tmpend = (u64)osb->s_clustersize +
163962306a36Sopenharmony_ci			(start & ~(osb->s_clustersize - 1));
164062306a36Sopenharmony_ci		if (tmpend > end)
164162306a36Sopenharmony_ci			tmpend = end;
164262306a36Sopenharmony_ci
164362306a36Sopenharmony_ci		trace_ocfs2_zero_partial_clusters_range1(
164462306a36Sopenharmony_ci			(unsigned long long)start,
164562306a36Sopenharmony_ci			(unsigned long long)tmpend);
164662306a36Sopenharmony_ci
164762306a36Sopenharmony_ci		ret = ocfs2_zero_range_for_truncate(inode, handle, start,
164862306a36Sopenharmony_ci						    tmpend);
164962306a36Sopenharmony_ci		if (ret)
165062306a36Sopenharmony_ci			mlog_errno(ret);
165162306a36Sopenharmony_ci	}
165262306a36Sopenharmony_ci
165362306a36Sopenharmony_ci	if (tmpend < end) {
165462306a36Sopenharmony_ci		/*
165562306a36Sopenharmony_ci		 * This may make start and end equal, but the zeroing
165662306a36Sopenharmony_ci		 * code will skip any work in that case so there's no
165762306a36Sopenharmony_ci		 * need to catch it up here.
165862306a36Sopenharmony_ci		 */
165962306a36Sopenharmony_ci		start = end & ~(osb->s_clustersize - 1);
166062306a36Sopenharmony_ci
166162306a36Sopenharmony_ci		trace_ocfs2_zero_partial_clusters_range2(
166262306a36Sopenharmony_ci			(unsigned long long)start, (unsigned long long)end);
166362306a36Sopenharmony_ci
166462306a36Sopenharmony_ci		ret = ocfs2_zero_range_for_truncate(inode, handle, start, end);
166562306a36Sopenharmony_ci		if (ret)
166662306a36Sopenharmony_ci			mlog_errno(ret);
166762306a36Sopenharmony_ci	}
166862306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 1);
166962306a36Sopenharmony_ci
167062306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
167162306a36Sopenharmony_ciout:
167262306a36Sopenharmony_ci	return ret;
167362306a36Sopenharmony_ci}
167462306a36Sopenharmony_ci
167562306a36Sopenharmony_cistatic int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
167662306a36Sopenharmony_ci{
167762306a36Sopenharmony_ci	int i;
167862306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec = NULL;
167962306a36Sopenharmony_ci
168062306a36Sopenharmony_ci	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
168162306a36Sopenharmony_ci
168262306a36Sopenharmony_ci		rec = &el->l_recs[i];
168362306a36Sopenharmony_ci
168462306a36Sopenharmony_ci		if (le32_to_cpu(rec->e_cpos) < pos)
168562306a36Sopenharmony_ci			break;
168662306a36Sopenharmony_ci	}
168762306a36Sopenharmony_ci
168862306a36Sopenharmony_ci	return i;
168962306a36Sopenharmony_ci}
169062306a36Sopenharmony_ci
169162306a36Sopenharmony_ci/*
169262306a36Sopenharmony_ci * Helper to calculate the punching pos and length in one run, we handle the
169362306a36Sopenharmony_ci * following three cases in order:
169462306a36Sopenharmony_ci *
169562306a36Sopenharmony_ci * - remove the entire record
169662306a36Sopenharmony_ci * - remove a partial record
169762306a36Sopenharmony_ci * - no record needs to be removed (hole-punching completed)
169862306a36Sopenharmony_ci*/
169962306a36Sopenharmony_cistatic void ocfs2_calc_trunc_pos(struct inode *inode,
170062306a36Sopenharmony_ci				 struct ocfs2_extent_list *el,
170162306a36Sopenharmony_ci				 struct ocfs2_extent_rec *rec,
170262306a36Sopenharmony_ci				 u32 trunc_start, u32 *trunc_cpos,
170362306a36Sopenharmony_ci				 u32 *trunc_len, u32 *trunc_end,
170462306a36Sopenharmony_ci				 u64 *blkno, int *done)
170562306a36Sopenharmony_ci{
170662306a36Sopenharmony_ci	int ret = 0;
170762306a36Sopenharmony_ci	u32 coff, range;
170862306a36Sopenharmony_ci
170962306a36Sopenharmony_ci	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
171062306a36Sopenharmony_ci
171162306a36Sopenharmony_ci	if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
171262306a36Sopenharmony_ci		/*
171362306a36Sopenharmony_ci		 * remove an entire extent record.
171462306a36Sopenharmony_ci		 */
171562306a36Sopenharmony_ci		*trunc_cpos = le32_to_cpu(rec->e_cpos);
171662306a36Sopenharmony_ci		/*
171762306a36Sopenharmony_ci		 * Skip holes if any.
171862306a36Sopenharmony_ci		 */
171962306a36Sopenharmony_ci		if (range < *trunc_end)
172062306a36Sopenharmony_ci			*trunc_end = range;
172162306a36Sopenharmony_ci		*trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos);
172262306a36Sopenharmony_ci		*blkno = le64_to_cpu(rec->e_blkno);
172362306a36Sopenharmony_ci		*trunc_end = le32_to_cpu(rec->e_cpos);
172462306a36Sopenharmony_ci	} else if (range > trunc_start) {
172562306a36Sopenharmony_ci		/*
172662306a36Sopenharmony_ci		 * remove a partial extent record, which means we're
172762306a36Sopenharmony_ci		 * removing the last extent record.
172862306a36Sopenharmony_ci		 */
172962306a36Sopenharmony_ci		*trunc_cpos = trunc_start;
173062306a36Sopenharmony_ci		/*
173162306a36Sopenharmony_ci		 * skip hole if any.
173262306a36Sopenharmony_ci		 */
173362306a36Sopenharmony_ci		if (range < *trunc_end)
173462306a36Sopenharmony_ci			*trunc_end = range;
173562306a36Sopenharmony_ci		*trunc_len = *trunc_end - trunc_start;
173662306a36Sopenharmony_ci		coff = trunc_start - le32_to_cpu(rec->e_cpos);
173762306a36Sopenharmony_ci		*blkno = le64_to_cpu(rec->e_blkno) +
173862306a36Sopenharmony_ci				ocfs2_clusters_to_blocks(inode->i_sb, coff);
173962306a36Sopenharmony_ci		*trunc_end = trunc_start;
174062306a36Sopenharmony_ci	} else {
174162306a36Sopenharmony_ci		/*
174262306a36Sopenharmony_ci		 * It may have two following possibilities:
174362306a36Sopenharmony_ci		 *
174462306a36Sopenharmony_ci		 * - last record has been removed
174562306a36Sopenharmony_ci		 * - trunc_start was within a hole
174662306a36Sopenharmony_ci		 *
174762306a36Sopenharmony_ci		 * both two cases mean the completion of hole punching.
174862306a36Sopenharmony_ci		 */
174962306a36Sopenharmony_ci		ret = 1;
175062306a36Sopenharmony_ci	}
175162306a36Sopenharmony_ci
175262306a36Sopenharmony_ci	*done = ret;
175362306a36Sopenharmony_ci}
175462306a36Sopenharmony_ci
175562306a36Sopenharmony_ciint ocfs2_remove_inode_range(struct inode *inode,
175662306a36Sopenharmony_ci			     struct buffer_head *di_bh, u64 byte_start,
175762306a36Sopenharmony_ci			     u64 byte_len)
175862306a36Sopenharmony_ci{
175962306a36Sopenharmony_ci	int ret = 0, flags = 0, done = 0, i;
176062306a36Sopenharmony_ci	u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
176162306a36Sopenharmony_ci	u32 cluster_in_el;
176262306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
176362306a36Sopenharmony_ci	struct ocfs2_cached_dealloc_ctxt dealloc;
176462306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
176562306a36Sopenharmony_ci	struct ocfs2_extent_tree et;
176662306a36Sopenharmony_ci	struct ocfs2_path *path = NULL;
176762306a36Sopenharmony_ci	struct ocfs2_extent_list *el = NULL;
176862306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec = NULL;
176962306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
177062306a36Sopenharmony_ci	u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
177162306a36Sopenharmony_ci
177262306a36Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
177362306a36Sopenharmony_ci	ocfs2_init_dealloc_ctxt(&dealloc);
177462306a36Sopenharmony_ci
177562306a36Sopenharmony_ci	trace_ocfs2_remove_inode_range(
177662306a36Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
177762306a36Sopenharmony_ci			(unsigned long long)byte_start,
177862306a36Sopenharmony_ci			(unsigned long long)byte_len);
177962306a36Sopenharmony_ci
178062306a36Sopenharmony_ci	if (byte_len == 0)
178162306a36Sopenharmony_ci		return 0;
178262306a36Sopenharmony_ci
178362306a36Sopenharmony_ci	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
178462306a36Sopenharmony_ci		ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
178562306a36Sopenharmony_ci					    byte_start + byte_len, 0);
178662306a36Sopenharmony_ci		if (ret) {
178762306a36Sopenharmony_ci			mlog_errno(ret);
178862306a36Sopenharmony_ci			goto out;
178962306a36Sopenharmony_ci		}
179062306a36Sopenharmony_ci		/*
179162306a36Sopenharmony_ci		 * There's no need to get fancy with the page cache
179262306a36Sopenharmony_ci		 * truncate of an inline-data inode. We're talking
179362306a36Sopenharmony_ci		 * about less than a page here, which will be cached
179462306a36Sopenharmony_ci		 * in the dinode buffer anyway.
179562306a36Sopenharmony_ci		 */
179662306a36Sopenharmony_ci		unmap_mapping_range(mapping, 0, 0, 0);
179762306a36Sopenharmony_ci		truncate_inode_pages(mapping, 0);
179862306a36Sopenharmony_ci		goto out;
179962306a36Sopenharmony_ci	}
180062306a36Sopenharmony_ci
180162306a36Sopenharmony_ci	/*
180262306a36Sopenharmony_ci	 * For reflinks, we may need to CoW 2 clusters which might be
180362306a36Sopenharmony_ci	 * partially zero'd later, if hole's start and end offset were
180462306a36Sopenharmony_ci	 * within one cluster(means is not exactly aligned to clustersize).
180562306a36Sopenharmony_ci	 */
180662306a36Sopenharmony_ci
180762306a36Sopenharmony_ci	if (ocfs2_is_refcount_inode(inode)) {
180862306a36Sopenharmony_ci		ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
180962306a36Sopenharmony_ci		if (ret) {
181062306a36Sopenharmony_ci			mlog_errno(ret);
181162306a36Sopenharmony_ci			goto out;
181262306a36Sopenharmony_ci		}
181362306a36Sopenharmony_ci
181462306a36Sopenharmony_ci		ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len);
181562306a36Sopenharmony_ci		if (ret) {
181662306a36Sopenharmony_ci			mlog_errno(ret);
181762306a36Sopenharmony_ci			goto out;
181862306a36Sopenharmony_ci		}
181962306a36Sopenharmony_ci	}
182062306a36Sopenharmony_ci
182162306a36Sopenharmony_ci	trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
182262306a36Sopenharmony_ci	trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
182362306a36Sopenharmony_ci	cluster_in_el = trunc_end;
182462306a36Sopenharmony_ci
182562306a36Sopenharmony_ci	ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
182662306a36Sopenharmony_ci	if (ret) {
182762306a36Sopenharmony_ci		mlog_errno(ret);
182862306a36Sopenharmony_ci		goto out;
182962306a36Sopenharmony_ci	}
183062306a36Sopenharmony_ci
183162306a36Sopenharmony_ci	path = ocfs2_new_path_from_et(&et);
183262306a36Sopenharmony_ci	if (!path) {
183362306a36Sopenharmony_ci		ret = -ENOMEM;
183462306a36Sopenharmony_ci		mlog_errno(ret);
183562306a36Sopenharmony_ci		goto out;
183662306a36Sopenharmony_ci	}
183762306a36Sopenharmony_ci
183862306a36Sopenharmony_ci	while (trunc_end > trunc_start) {
183962306a36Sopenharmony_ci
184062306a36Sopenharmony_ci		ret = ocfs2_find_path(INODE_CACHE(inode), path,
184162306a36Sopenharmony_ci				      cluster_in_el);
184262306a36Sopenharmony_ci		if (ret) {
184362306a36Sopenharmony_ci			mlog_errno(ret);
184462306a36Sopenharmony_ci			goto out;
184562306a36Sopenharmony_ci		}
184662306a36Sopenharmony_ci
184762306a36Sopenharmony_ci		el = path_leaf_el(path);
184862306a36Sopenharmony_ci
184962306a36Sopenharmony_ci		i = ocfs2_find_rec(el, trunc_end);
185062306a36Sopenharmony_ci		/*
185162306a36Sopenharmony_ci		 * Need to go to previous extent block.
185262306a36Sopenharmony_ci		 */
185362306a36Sopenharmony_ci		if (i < 0) {
185462306a36Sopenharmony_ci			if (path->p_tree_depth == 0)
185562306a36Sopenharmony_ci				break;
185662306a36Sopenharmony_ci
185762306a36Sopenharmony_ci			ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
185862306a36Sopenharmony_ci							    path,
185962306a36Sopenharmony_ci							    &cluster_in_el);
186062306a36Sopenharmony_ci			if (ret) {
186162306a36Sopenharmony_ci				mlog_errno(ret);
186262306a36Sopenharmony_ci				goto out;
186362306a36Sopenharmony_ci			}
186462306a36Sopenharmony_ci
186562306a36Sopenharmony_ci			/*
186662306a36Sopenharmony_ci			 * We've reached the leftmost extent block,
186762306a36Sopenharmony_ci			 * it's safe to leave.
186862306a36Sopenharmony_ci			 */
186962306a36Sopenharmony_ci			if (cluster_in_el == 0)
187062306a36Sopenharmony_ci				break;
187162306a36Sopenharmony_ci
187262306a36Sopenharmony_ci			/*
187362306a36Sopenharmony_ci			 * The 'pos' searched for previous extent block is
187462306a36Sopenharmony_ci			 * always one cluster less than actual trunc_end.
187562306a36Sopenharmony_ci			 */
187662306a36Sopenharmony_ci			trunc_end = cluster_in_el + 1;
187762306a36Sopenharmony_ci
187862306a36Sopenharmony_ci			ocfs2_reinit_path(path, 1);
187962306a36Sopenharmony_ci
188062306a36Sopenharmony_ci			continue;
188162306a36Sopenharmony_ci
188262306a36Sopenharmony_ci		} else
188362306a36Sopenharmony_ci			rec = &el->l_recs[i];
188462306a36Sopenharmony_ci
188562306a36Sopenharmony_ci		ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
188662306a36Sopenharmony_ci				     &trunc_len, &trunc_end, &blkno, &done);
188762306a36Sopenharmony_ci		if (done)
188862306a36Sopenharmony_ci			break;
188962306a36Sopenharmony_ci
189062306a36Sopenharmony_ci		flags = rec->e_flags;
189162306a36Sopenharmony_ci		phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
189262306a36Sopenharmony_ci
189362306a36Sopenharmony_ci		ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
189462306a36Sopenharmony_ci					       phys_cpos, trunc_len, flags,
189562306a36Sopenharmony_ci					       &dealloc, refcount_loc, false);
189662306a36Sopenharmony_ci		if (ret < 0) {
189762306a36Sopenharmony_ci			mlog_errno(ret);
189862306a36Sopenharmony_ci			goto out;
189962306a36Sopenharmony_ci		}
190062306a36Sopenharmony_ci
190162306a36Sopenharmony_ci		cluster_in_el = trunc_end;
190262306a36Sopenharmony_ci
190362306a36Sopenharmony_ci		ocfs2_reinit_path(path, 1);
190462306a36Sopenharmony_ci	}
190562306a36Sopenharmony_ci
190662306a36Sopenharmony_ci	ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
190762306a36Sopenharmony_ci
190862306a36Sopenharmony_ciout:
190962306a36Sopenharmony_ci	ocfs2_free_path(path);
191062306a36Sopenharmony_ci	ocfs2_schedule_truncate_log_flush(osb, 1);
191162306a36Sopenharmony_ci	ocfs2_run_deallocs(osb, &dealloc);
191262306a36Sopenharmony_ci
191362306a36Sopenharmony_ci	return ret;
191462306a36Sopenharmony_ci}
191562306a36Sopenharmony_ci
191662306a36Sopenharmony_ci/*
191762306a36Sopenharmony_ci * Parts of this function taken from xfs_change_file_space()
191862306a36Sopenharmony_ci */
191962306a36Sopenharmony_cistatic int __ocfs2_change_file_space(struct file *file, struct inode *inode,
192062306a36Sopenharmony_ci				     loff_t f_pos, unsigned int cmd,
192162306a36Sopenharmony_ci				     struct ocfs2_space_resv *sr,
192262306a36Sopenharmony_ci				     int change_size)
192362306a36Sopenharmony_ci{
192462306a36Sopenharmony_ci	int ret;
192562306a36Sopenharmony_ci	s64 llen;
192662306a36Sopenharmony_ci	loff_t size, orig_isize;
192762306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
192862306a36Sopenharmony_ci	struct buffer_head *di_bh = NULL;
192962306a36Sopenharmony_ci	handle_t *handle;
193062306a36Sopenharmony_ci	unsigned long long max_off = inode->i_sb->s_maxbytes;
193162306a36Sopenharmony_ci
193262306a36Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
193362306a36Sopenharmony_ci		return -EROFS;
193462306a36Sopenharmony_ci
193562306a36Sopenharmony_ci	inode_lock(inode);
193662306a36Sopenharmony_ci
193762306a36Sopenharmony_ci	/*
193862306a36Sopenharmony_ci	 * This prevents concurrent writes on other nodes
193962306a36Sopenharmony_ci	 */
194062306a36Sopenharmony_ci	ret = ocfs2_rw_lock(inode, 1);
194162306a36Sopenharmony_ci	if (ret) {
194262306a36Sopenharmony_ci		mlog_errno(ret);
194362306a36Sopenharmony_ci		goto out;
194462306a36Sopenharmony_ci	}
194562306a36Sopenharmony_ci
194662306a36Sopenharmony_ci	ret = ocfs2_inode_lock(inode, &di_bh, 1);
194762306a36Sopenharmony_ci	if (ret) {
194862306a36Sopenharmony_ci		mlog_errno(ret);
194962306a36Sopenharmony_ci		goto out_rw_unlock;
195062306a36Sopenharmony_ci	}
195162306a36Sopenharmony_ci
195262306a36Sopenharmony_ci	if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
195362306a36Sopenharmony_ci		ret = -EPERM;
195462306a36Sopenharmony_ci		goto out_inode_unlock;
195562306a36Sopenharmony_ci	}
195662306a36Sopenharmony_ci
195762306a36Sopenharmony_ci	switch (sr->l_whence) {
195862306a36Sopenharmony_ci	case 0: /*SEEK_SET*/
195962306a36Sopenharmony_ci		break;
196062306a36Sopenharmony_ci	case 1: /*SEEK_CUR*/
196162306a36Sopenharmony_ci		sr->l_start += f_pos;
196262306a36Sopenharmony_ci		break;
196362306a36Sopenharmony_ci	case 2: /*SEEK_END*/
196462306a36Sopenharmony_ci		sr->l_start += i_size_read(inode);
196562306a36Sopenharmony_ci		break;
196662306a36Sopenharmony_ci	default:
196762306a36Sopenharmony_ci		ret = -EINVAL;
196862306a36Sopenharmony_ci		goto out_inode_unlock;
196962306a36Sopenharmony_ci	}
197062306a36Sopenharmony_ci	sr->l_whence = 0;
197162306a36Sopenharmony_ci
197262306a36Sopenharmony_ci	llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len;
197362306a36Sopenharmony_ci
197462306a36Sopenharmony_ci	if (sr->l_start < 0
197562306a36Sopenharmony_ci	    || sr->l_start > max_off
197662306a36Sopenharmony_ci	    || (sr->l_start + llen) < 0
197762306a36Sopenharmony_ci	    || (sr->l_start + llen) > max_off) {
197862306a36Sopenharmony_ci		ret = -EINVAL;
197962306a36Sopenharmony_ci		goto out_inode_unlock;
198062306a36Sopenharmony_ci	}
198162306a36Sopenharmony_ci	size = sr->l_start + sr->l_len;
198262306a36Sopenharmony_ci
198362306a36Sopenharmony_ci	if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64 ||
198462306a36Sopenharmony_ci	    cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) {
198562306a36Sopenharmony_ci		if (sr->l_len <= 0) {
198662306a36Sopenharmony_ci			ret = -EINVAL;
198762306a36Sopenharmony_ci			goto out_inode_unlock;
198862306a36Sopenharmony_ci		}
198962306a36Sopenharmony_ci	}
199062306a36Sopenharmony_ci
199162306a36Sopenharmony_ci	if (file && setattr_should_drop_suidgid(&nop_mnt_idmap, file_inode(file))) {
199262306a36Sopenharmony_ci		ret = __ocfs2_write_remove_suid(inode, di_bh);
199362306a36Sopenharmony_ci		if (ret) {
199462306a36Sopenharmony_ci			mlog_errno(ret);
199562306a36Sopenharmony_ci			goto out_inode_unlock;
199662306a36Sopenharmony_ci		}
199762306a36Sopenharmony_ci	}
199862306a36Sopenharmony_ci
199962306a36Sopenharmony_ci	down_write(&OCFS2_I(inode)->ip_alloc_sem);
200062306a36Sopenharmony_ci	switch (cmd) {
200162306a36Sopenharmony_ci	case OCFS2_IOC_RESVSP:
200262306a36Sopenharmony_ci	case OCFS2_IOC_RESVSP64:
200362306a36Sopenharmony_ci		/*
200462306a36Sopenharmony_ci		 * This takes unsigned offsets, but the signed ones we
200562306a36Sopenharmony_ci		 * pass have been checked against overflow above.
200662306a36Sopenharmony_ci		 */
200762306a36Sopenharmony_ci		ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start,
200862306a36Sopenharmony_ci						       sr->l_len);
200962306a36Sopenharmony_ci		break;
201062306a36Sopenharmony_ci	case OCFS2_IOC_UNRESVSP:
201162306a36Sopenharmony_ci	case OCFS2_IOC_UNRESVSP64:
201262306a36Sopenharmony_ci		ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start,
201362306a36Sopenharmony_ci					       sr->l_len);
201462306a36Sopenharmony_ci		break;
201562306a36Sopenharmony_ci	default:
201662306a36Sopenharmony_ci		ret = -EINVAL;
201762306a36Sopenharmony_ci	}
201862306a36Sopenharmony_ci
201962306a36Sopenharmony_ci	orig_isize = i_size_read(inode);
202062306a36Sopenharmony_ci	/* zeroout eof blocks in the cluster. */
202162306a36Sopenharmony_ci	if (!ret && change_size && orig_isize < size) {
202262306a36Sopenharmony_ci		ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
202362306a36Sopenharmony_ci					size - orig_isize);
202462306a36Sopenharmony_ci		if (!ret)
202562306a36Sopenharmony_ci			i_size_write(inode, size);
202662306a36Sopenharmony_ci	}
202762306a36Sopenharmony_ci	up_write(&OCFS2_I(inode)->ip_alloc_sem);
202862306a36Sopenharmony_ci	if (ret) {
202962306a36Sopenharmony_ci		mlog_errno(ret);
203062306a36Sopenharmony_ci		goto out_inode_unlock;
203162306a36Sopenharmony_ci	}
203262306a36Sopenharmony_ci
203362306a36Sopenharmony_ci	/*
203462306a36Sopenharmony_ci	 * We update c/mtime for these changes
203562306a36Sopenharmony_ci	 */
203662306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
203762306a36Sopenharmony_ci	if (IS_ERR(handle)) {
203862306a36Sopenharmony_ci		ret = PTR_ERR(handle);
203962306a36Sopenharmony_ci		mlog_errno(ret);
204062306a36Sopenharmony_ci		goto out_inode_unlock;
204162306a36Sopenharmony_ci	}
204262306a36Sopenharmony_ci
204362306a36Sopenharmony_ci	inode->i_mtime = inode_set_ctime_current(inode);
204462306a36Sopenharmony_ci	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
204562306a36Sopenharmony_ci	if (ret < 0)
204662306a36Sopenharmony_ci		mlog_errno(ret);
204762306a36Sopenharmony_ci
204862306a36Sopenharmony_ci	if (file && (file->f_flags & O_SYNC))
204962306a36Sopenharmony_ci		handle->h_sync = 1;
205062306a36Sopenharmony_ci
205162306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
205262306a36Sopenharmony_ci
205362306a36Sopenharmony_ciout_inode_unlock:
205462306a36Sopenharmony_ci	brelse(di_bh);
205562306a36Sopenharmony_ci	ocfs2_inode_unlock(inode, 1);
205662306a36Sopenharmony_ciout_rw_unlock:
205762306a36Sopenharmony_ci	ocfs2_rw_unlock(inode, 1);
205862306a36Sopenharmony_ci
205962306a36Sopenharmony_ciout:
206062306a36Sopenharmony_ci	inode_unlock(inode);
206162306a36Sopenharmony_ci	return ret;
206262306a36Sopenharmony_ci}
206362306a36Sopenharmony_ci
206462306a36Sopenharmony_ciint ocfs2_change_file_space(struct file *file, unsigned int cmd,
206562306a36Sopenharmony_ci			    struct ocfs2_space_resv *sr)
206662306a36Sopenharmony_ci{
206762306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
206862306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
206962306a36Sopenharmony_ci	int ret;
207062306a36Sopenharmony_ci
207162306a36Sopenharmony_ci	if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
207262306a36Sopenharmony_ci	    !ocfs2_writes_unwritten_extents(osb))
207362306a36Sopenharmony_ci		return -ENOTTY;
207462306a36Sopenharmony_ci	else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) &&
207562306a36Sopenharmony_ci		 !ocfs2_sparse_alloc(osb))
207662306a36Sopenharmony_ci		return -ENOTTY;
207762306a36Sopenharmony_ci
207862306a36Sopenharmony_ci	if (!S_ISREG(inode->i_mode))
207962306a36Sopenharmony_ci		return -EINVAL;
208062306a36Sopenharmony_ci
208162306a36Sopenharmony_ci	if (!(file->f_mode & FMODE_WRITE))
208262306a36Sopenharmony_ci		return -EBADF;
208362306a36Sopenharmony_ci
208462306a36Sopenharmony_ci	ret = mnt_want_write_file(file);
208562306a36Sopenharmony_ci	if (ret)
208662306a36Sopenharmony_ci		return ret;
208762306a36Sopenharmony_ci	ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
208862306a36Sopenharmony_ci	mnt_drop_write_file(file);
208962306a36Sopenharmony_ci	return ret;
209062306a36Sopenharmony_ci}
209162306a36Sopenharmony_ci
209262306a36Sopenharmony_cistatic long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
209362306a36Sopenharmony_ci			    loff_t len)
209462306a36Sopenharmony_ci{
209562306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
209662306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
209762306a36Sopenharmony_ci	struct ocfs2_space_resv sr;
209862306a36Sopenharmony_ci	int change_size = 1;
209962306a36Sopenharmony_ci	int cmd = OCFS2_IOC_RESVSP64;
210062306a36Sopenharmony_ci	int ret = 0;
210162306a36Sopenharmony_ci
210262306a36Sopenharmony_ci	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
210362306a36Sopenharmony_ci		return -EOPNOTSUPP;
210462306a36Sopenharmony_ci	if (!ocfs2_writes_unwritten_extents(osb))
210562306a36Sopenharmony_ci		return -EOPNOTSUPP;
210662306a36Sopenharmony_ci
210762306a36Sopenharmony_ci	if (mode & FALLOC_FL_KEEP_SIZE) {
210862306a36Sopenharmony_ci		change_size = 0;
210962306a36Sopenharmony_ci	} else {
211062306a36Sopenharmony_ci		ret = inode_newsize_ok(inode, offset + len);
211162306a36Sopenharmony_ci		if (ret)
211262306a36Sopenharmony_ci			return ret;
211362306a36Sopenharmony_ci	}
211462306a36Sopenharmony_ci
211562306a36Sopenharmony_ci	if (mode & FALLOC_FL_PUNCH_HOLE)
211662306a36Sopenharmony_ci		cmd = OCFS2_IOC_UNRESVSP64;
211762306a36Sopenharmony_ci
211862306a36Sopenharmony_ci	sr.l_whence = 0;
211962306a36Sopenharmony_ci	sr.l_start = (s64)offset;
212062306a36Sopenharmony_ci	sr.l_len = (s64)len;
212162306a36Sopenharmony_ci
212262306a36Sopenharmony_ci	return __ocfs2_change_file_space(NULL, inode, offset, cmd, &sr,
212362306a36Sopenharmony_ci					 change_size);
212462306a36Sopenharmony_ci}
212562306a36Sopenharmony_ci
212662306a36Sopenharmony_ciint ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
212762306a36Sopenharmony_ci				   size_t count)
212862306a36Sopenharmony_ci{
212962306a36Sopenharmony_ci	int ret = 0;
213062306a36Sopenharmony_ci	unsigned int extent_flags;
213162306a36Sopenharmony_ci	u32 cpos, clusters, extent_len, phys_cpos;
213262306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
213362306a36Sopenharmony_ci
213462306a36Sopenharmony_ci	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) ||
213562306a36Sopenharmony_ci	    !ocfs2_is_refcount_inode(inode) ||
213662306a36Sopenharmony_ci	    OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
213762306a36Sopenharmony_ci		return 0;
213862306a36Sopenharmony_ci
213962306a36Sopenharmony_ci	cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
214062306a36Sopenharmony_ci	clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
214162306a36Sopenharmony_ci
214262306a36Sopenharmony_ci	while (clusters) {
214362306a36Sopenharmony_ci		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
214462306a36Sopenharmony_ci					 &extent_flags);
214562306a36Sopenharmony_ci		if (ret < 0) {
214662306a36Sopenharmony_ci			mlog_errno(ret);
214762306a36Sopenharmony_ci			goto out;
214862306a36Sopenharmony_ci		}
214962306a36Sopenharmony_ci
215062306a36Sopenharmony_ci		if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) {
215162306a36Sopenharmony_ci			ret = 1;
215262306a36Sopenharmony_ci			break;
215362306a36Sopenharmony_ci		}
215462306a36Sopenharmony_ci
215562306a36Sopenharmony_ci		if (extent_len > clusters)
215662306a36Sopenharmony_ci			extent_len = clusters;
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_ci		clusters -= extent_len;
215962306a36Sopenharmony_ci		cpos += extent_len;
216062306a36Sopenharmony_ci	}
216162306a36Sopenharmony_ciout:
216262306a36Sopenharmony_ci	return ret;
216362306a36Sopenharmony_ci}
216462306a36Sopenharmony_ci
216562306a36Sopenharmony_cistatic int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
216662306a36Sopenharmony_ci{
216762306a36Sopenharmony_ci	int blockmask = inode->i_sb->s_blocksize - 1;
216862306a36Sopenharmony_ci	loff_t final_size = pos + count;
216962306a36Sopenharmony_ci
217062306a36Sopenharmony_ci	if ((pos & blockmask) || (final_size & blockmask))
217162306a36Sopenharmony_ci		return 1;
217262306a36Sopenharmony_ci	return 0;
217362306a36Sopenharmony_ci}
217462306a36Sopenharmony_ci
217562306a36Sopenharmony_cistatic int ocfs2_inode_lock_for_extent_tree(struct inode *inode,
217662306a36Sopenharmony_ci					    struct buffer_head **di_bh,
217762306a36Sopenharmony_ci					    int meta_level,
217862306a36Sopenharmony_ci					    int write_sem,
217962306a36Sopenharmony_ci					    int wait)
218062306a36Sopenharmony_ci{
218162306a36Sopenharmony_ci	int ret = 0;
218262306a36Sopenharmony_ci
218362306a36Sopenharmony_ci	if (wait)
218462306a36Sopenharmony_ci		ret = ocfs2_inode_lock(inode, di_bh, meta_level);
218562306a36Sopenharmony_ci	else
218662306a36Sopenharmony_ci		ret = ocfs2_try_inode_lock(inode, di_bh, meta_level);
218762306a36Sopenharmony_ci	if (ret < 0)
218862306a36Sopenharmony_ci		goto out;
218962306a36Sopenharmony_ci
219062306a36Sopenharmony_ci	if (wait) {
219162306a36Sopenharmony_ci		if (write_sem)
219262306a36Sopenharmony_ci			down_write(&OCFS2_I(inode)->ip_alloc_sem);
219362306a36Sopenharmony_ci		else
219462306a36Sopenharmony_ci			down_read(&OCFS2_I(inode)->ip_alloc_sem);
219562306a36Sopenharmony_ci	} else {
219662306a36Sopenharmony_ci		if (write_sem)
219762306a36Sopenharmony_ci			ret = down_write_trylock(&OCFS2_I(inode)->ip_alloc_sem);
219862306a36Sopenharmony_ci		else
219962306a36Sopenharmony_ci			ret = down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem);
220062306a36Sopenharmony_ci
220162306a36Sopenharmony_ci		if (!ret) {
220262306a36Sopenharmony_ci			ret = -EAGAIN;
220362306a36Sopenharmony_ci			goto out_unlock;
220462306a36Sopenharmony_ci		}
220562306a36Sopenharmony_ci	}
220662306a36Sopenharmony_ci
220762306a36Sopenharmony_ci	return ret;
220862306a36Sopenharmony_ci
220962306a36Sopenharmony_ciout_unlock:
221062306a36Sopenharmony_ci	brelse(*di_bh);
221162306a36Sopenharmony_ci	*di_bh = NULL;
221262306a36Sopenharmony_ci	ocfs2_inode_unlock(inode, meta_level);
221362306a36Sopenharmony_ciout:
221462306a36Sopenharmony_ci	return ret;
221562306a36Sopenharmony_ci}
221662306a36Sopenharmony_ci
221762306a36Sopenharmony_cistatic void ocfs2_inode_unlock_for_extent_tree(struct inode *inode,
221862306a36Sopenharmony_ci					       struct buffer_head **di_bh,
221962306a36Sopenharmony_ci					       int meta_level,
222062306a36Sopenharmony_ci					       int write_sem)
222162306a36Sopenharmony_ci{
222262306a36Sopenharmony_ci	if (write_sem)
222362306a36Sopenharmony_ci		up_write(&OCFS2_I(inode)->ip_alloc_sem);
222462306a36Sopenharmony_ci	else
222562306a36Sopenharmony_ci		up_read(&OCFS2_I(inode)->ip_alloc_sem);
222662306a36Sopenharmony_ci
222762306a36Sopenharmony_ci	brelse(*di_bh);
222862306a36Sopenharmony_ci	*di_bh = NULL;
222962306a36Sopenharmony_ci
223062306a36Sopenharmony_ci	if (meta_level >= 0)
223162306a36Sopenharmony_ci		ocfs2_inode_unlock(inode, meta_level);
223262306a36Sopenharmony_ci}
223362306a36Sopenharmony_ci
223462306a36Sopenharmony_cistatic int ocfs2_prepare_inode_for_write(struct file *file,
223562306a36Sopenharmony_ci					 loff_t pos, size_t count, int wait)
223662306a36Sopenharmony_ci{
223762306a36Sopenharmony_ci	int ret = 0, meta_level = 0, overwrite_io = 0;
223862306a36Sopenharmony_ci	int write_sem = 0;
223962306a36Sopenharmony_ci	struct dentry *dentry = file->f_path.dentry;
224062306a36Sopenharmony_ci	struct inode *inode = d_inode(dentry);
224162306a36Sopenharmony_ci	struct buffer_head *di_bh = NULL;
224262306a36Sopenharmony_ci	u32 cpos;
224362306a36Sopenharmony_ci	u32 clusters;
224462306a36Sopenharmony_ci
224562306a36Sopenharmony_ci	/*
224662306a36Sopenharmony_ci	 * We start with a read level meta lock and only jump to an ex
224762306a36Sopenharmony_ci	 * if we need to make modifications here.
224862306a36Sopenharmony_ci	 */
224962306a36Sopenharmony_ci	for(;;) {
225062306a36Sopenharmony_ci		ret = ocfs2_inode_lock_for_extent_tree(inode,
225162306a36Sopenharmony_ci						       &di_bh,
225262306a36Sopenharmony_ci						       meta_level,
225362306a36Sopenharmony_ci						       write_sem,
225462306a36Sopenharmony_ci						       wait);
225562306a36Sopenharmony_ci		if (ret < 0) {
225662306a36Sopenharmony_ci			if (ret != -EAGAIN)
225762306a36Sopenharmony_ci				mlog_errno(ret);
225862306a36Sopenharmony_ci			goto out;
225962306a36Sopenharmony_ci		}
226062306a36Sopenharmony_ci
226162306a36Sopenharmony_ci		/*
226262306a36Sopenharmony_ci		 * Check if IO will overwrite allocated blocks in case
226362306a36Sopenharmony_ci		 * IOCB_NOWAIT flag is set.
226462306a36Sopenharmony_ci		 */
226562306a36Sopenharmony_ci		if (!wait && !overwrite_io) {
226662306a36Sopenharmony_ci			overwrite_io = 1;
226762306a36Sopenharmony_ci
226862306a36Sopenharmony_ci			ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
226962306a36Sopenharmony_ci			if (ret < 0) {
227062306a36Sopenharmony_ci				if (ret != -EAGAIN)
227162306a36Sopenharmony_ci					mlog_errno(ret);
227262306a36Sopenharmony_ci				goto out_unlock;
227362306a36Sopenharmony_ci			}
227462306a36Sopenharmony_ci		}
227562306a36Sopenharmony_ci
227662306a36Sopenharmony_ci		/* Clear suid / sgid if necessary. We do this here
227762306a36Sopenharmony_ci		 * instead of later in the write path because
227862306a36Sopenharmony_ci		 * remove_suid() calls ->setattr without any hint that
227962306a36Sopenharmony_ci		 * we may have already done our cluster locking. Since
228062306a36Sopenharmony_ci		 * ocfs2_setattr() *must* take cluster locks to
228162306a36Sopenharmony_ci		 * proceed, this will lead us to recursively lock the
228262306a36Sopenharmony_ci		 * inode. There's also the dinode i_size state which
228362306a36Sopenharmony_ci		 * can be lost via setattr during extending writes (we
228462306a36Sopenharmony_ci		 * set inode->i_size at the end of a write. */
228562306a36Sopenharmony_ci		if (setattr_should_drop_suidgid(&nop_mnt_idmap, inode)) {
228662306a36Sopenharmony_ci			if (meta_level == 0) {
228762306a36Sopenharmony_ci				ocfs2_inode_unlock_for_extent_tree(inode,
228862306a36Sopenharmony_ci								   &di_bh,
228962306a36Sopenharmony_ci								   meta_level,
229062306a36Sopenharmony_ci								   write_sem);
229162306a36Sopenharmony_ci				meta_level = 1;
229262306a36Sopenharmony_ci				continue;
229362306a36Sopenharmony_ci			}
229462306a36Sopenharmony_ci
229562306a36Sopenharmony_ci			ret = ocfs2_write_remove_suid(inode);
229662306a36Sopenharmony_ci			if (ret < 0) {
229762306a36Sopenharmony_ci				mlog_errno(ret);
229862306a36Sopenharmony_ci				goto out_unlock;
229962306a36Sopenharmony_ci			}
230062306a36Sopenharmony_ci		}
230162306a36Sopenharmony_ci
230262306a36Sopenharmony_ci		ret = ocfs2_check_range_for_refcount(inode, pos, count);
230362306a36Sopenharmony_ci		if (ret == 1) {
230462306a36Sopenharmony_ci			ocfs2_inode_unlock_for_extent_tree(inode,
230562306a36Sopenharmony_ci							   &di_bh,
230662306a36Sopenharmony_ci							   meta_level,
230762306a36Sopenharmony_ci							   write_sem);
230862306a36Sopenharmony_ci			meta_level = 1;
230962306a36Sopenharmony_ci			write_sem = 1;
231062306a36Sopenharmony_ci			ret = ocfs2_inode_lock_for_extent_tree(inode,
231162306a36Sopenharmony_ci							       &di_bh,
231262306a36Sopenharmony_ci							       meta_level,
231362306a36Sopenharmony_ci							       write_sem,
231462306a36Sopenharmony_ci							       wait);
231562306a36Sopenharmony_ci			if (ret < 0) {
231662306a36Sopenharmony_ci				if (ret != -EAGAIN)
231762306a36Sopenharmony_ci					mlog_errno(ret);
231862306a36Sopenharmony_ci				goto out;
231962306a36Sopenharmony_ci			}
232062306a36Sopenharmony_ci
232162306a36Sopenharmony_ci			cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
232262306a36Sopenharmony_ci			clusters =
232362306a36Sopenharmony_ci				ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
232462306a36Sopenharmony_ci			ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
232562306a36Sopenharmony_ci		}
232662306a36Sopenharmony_ci
232762306a36Sopenharmony_ci		if (ret < 0) {
232862306a36Sopenharmony_ci			if (ret != -EAGAIN)
232962306a36Sopenharmony_ci				mlog_errno(ret);
233062306a36Sopenharmony_ci			goto out_unlock;
233162306a36Sopenharmony_ci		}
233262306a36Sopenharmony_ci
233362306a36Sopenharmony_ci		break;
233462306a36Sopenharmony_ci	}
233562306a36Sopenharmony_ci
233662306a36Sopenharmony_ciout_unlock:
233762306a36Sopenharmony_ci	trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
233862306a36Sopenharmony_ci					    pos, count, wait);
233962306a36Sopenharmony_ci
234062306a36Sopenharmony_ci	ocfs2_inode_unlock_for_extent_tree(inode,
234162306a36Sopenharmony_ci					   &di_bh,
234262306a36Sopenharmony_ci					   meta_level,
234362306a36Sopenharmony_ci					   write_sem);
234462306a36Sopenharmony_ci
234562306a36Sopenharmony_ciout:
234662306a36Sopenharmony_ci	return ret;
234762306a36Sopenharmony_ci}
234862306a36Sopenharmony_ci
234962306a36Sopenharmony_cistatic ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
235062306a36Sopenharmony_ci				    struct iov_iter *from)
235162306a36Sopenharmony_ci{
235262306a36Sopenharmony_ci	int rw_level;
235362306a36Sopenharmony_ci	ssize_t written = 0;
235462306a36Sopenharmony_ci	ssize_t ret;
235562306a36Sopenharmony_ci	size_t count = iov_iter_count(from);
235662306a36Sopenharmony_ci	struct file *file = iocb->ki_filp;
235762306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
235862306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
235962306a36Sopenharmony_ci	int full_coherency = !(osb->s_mount_opt &
236062306a36Sopenharmony_ci			       OCFS2_MOUNT_COHERENCY_BUFFERED);
236162306a36Sopenharmony_ci	void *saved_ki_complete = NULL;
236262306a36Sopenharmony_ci	int append_write = ((iocb->ki_pos + count) >=
236362306a36Sopenharmony_ci			i_size_read(inode) ? 1 : 0);
236462306a36Sopenharmony_ci	int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
236562306a36Sopenharmony_ci	int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
236662306a36Sopenharmony_ci
236762306a36Sopenharmony_ci	trace_ocfs2_file_write_iter(inode, file, file->f_path.dentry,
236862306a36Sopenharmony_ci		(unsigned long long)OCFS2_I(inode)->ip_blkno,
236962306a36Sopenharmony_ci		file->f_path.dentry->d_name.len,
237062306a36Sopenharmony_ci		file->f_path.dentry->d_name.name,
237162306a36Sopenharmony_ci		(unsigned int)from->nr_segs);	/* GRRRRR */
237262306a36Sopenharmony_ci
237362306a36Sopenharmony_ci	if (!direct_io && nowait)
237462306a36Sopenharmony_ci		return -EOPNOTSUPP;
237562306a36Sopenharmony_ci
237662306a36Sopenharmony_ci	if (count == 0)
237762306a36Sopenharmony_ci		return 0;
237862306a36Sopenharmony_ci
237962306a36Sopenharmony_ci	if (nowait) {
238062306a36Sopenharmony_ci		if (!inode_trylock(inode))
238162306a36Sopenharmony_ci			return -EAGAIN;
238262306a36Sopenharmony_ci	} else
238362306a36Sopenharmony_ci		inode_lock(inode);
238462306a36Sopenharmony_ci
238562306a36Sopenharmony_ci	/*
238662306a36Sopenharmony_ci	 * Concurrent O_DIRECT writes are allowed with
238762306a36Sopenharmony_ci	 * mount_option "coherency=buffered".
238862306a36Sopenharmony_ci	 * For append write, we must take rw EX.
238962306a36Sopenharmony_ci	 */
239062306a36Sopenharmony_ci	rw_level = (!direct_io || full_coherency || append_write);
239162306a36Sopenharmony_ci
239262306a36Sopenharmony_ci	if (nowait)
239362306a36Sopenharmony_ci		ret = ocfs2_try_rw_lock(inode, rw_level);
239462306a36Sopenharmony_ci	else
239562306a36Sopenharmony_ci		ret = ocfs2_rw_lock(inode, rw_level);
239662306a36Sopenharmony_ci	if (ret < 0) {
239762306a36Sopenharmony_ci		if (ret != -EAGAIN)
239862306a36Sopenharmony_ci			mlog_errno(ret);
239962306a36Sopenharmony_ci		goto out_mutex;
240062306a36Sopenharmony_ci	}
240162306a36Sopenharmony_ci
240262306a36Sopenharmony_ci	/*
240362306a36Sopenharmony_ci	 * O_DIRECT writes with "coherency=full" need to take EX cluster
240462306a36Sopenharmony_ci	 * inode_lock to guarantee coherency.
240562306a36Sopenharmony_ci	 */
240662306a36Sopenharmony_ci	if (direct_io && full_coherency) {
240762306a36Sopenharmony_ci		/*
240862306a36Sopenharmony_ci		 * We need to take and drop the inode lock to force
240962306a36Sopenharmony_ci		 * other nodes to drop their caches.  Buffered I/O
241062306a36Sopenharmony_ci		 * already does this in write_begin().
241162306a36Sopenharmony_ci		 */
241262306a36Sopenharmony_ci		if (nowait)
241362306a36Sopenharmony_ci			ret = ocfs2_try_inode_lock(inode, NULL, 1);
241462306a36Sopenharmony_ci		else
241562306a36Sopenharmony_ci			ret = ocfs2_inode_lock(inode, NULL, 1);
241662306a36Sopenharmony_ci		if (ret < 0) {
241762306a36Sopenharmony_ci			if (ret != -EAGAIN)
241862306a36Sopenharmony_ci				mlog_errno(ret);
241962306a36Sopenharmony_ci			goto out;
242062306a36Sopenharmony_ci		}
242162306a36Sopenharmony_ci
242262306a36Sopenharmony_ci		ocfs2_inode_unlock(inode, 1);
242362306a36Sopenharmony_ci	}
242462306a36Sopenharmony_ci
242562306a36Sopenharmony_ci	ret = generic_write_checks(iocb, from);
242662306a36Sopenharmony_ci	if (ret <= 0) {
242762306a36Sopenharmony_ci		if (ret)
242862306a36Sopenharmony_ci			mlog_errno(ret);
242962306a36Sopenharmony_ci		goto out;
243062306a36Sopenharmony_ci	}
243162306a36Sopenharmony_ci	count = ret;
243262306a36Sopenharmony_ci
243362306a36Sopenharmony_ci	ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait);
243462306a36Sopenharmony_ci	if (ret < 0) {
243562306a36Sopenharmony_ci		if (ret != -EAGAIN)
243662306a36Sopenharmony_ci			mlog_errno(ret);
243762306a36Sopenharmony_ci		goto out;
243862306a36Sopenharmony_ci	}
243962306a36Sopenharmony_ci
244062306a36Sopenharmony_ci	if (direct_io && !is_sync_kiocb(iocb) &&
244162306a36Sopenharmony_ci	    ocfs2_is_io_unaligned(inode, count, iocb->ki_pos)) {
244262306a36Sopenharmony_ci		/*
244362306a36Sopenharmony_ci		 * Make it a sync io if it's an unaligned aio.
244462306a36Sopenharmony_ci		 */
244562306a36Sopenharmony_ci		saved_ki_complete = xchg(&iocb->ki_complete, NULL);
244662306a36Sopenharmony_ci	}
244762306a36Sopenharmony_ci
244862306a36Sopenharmony_ci	/* communicate with ocfs2_dio_end_io */
244962306a36Sopenharmony_ci	ocfs2_iocb_set_rw_locked(iocb, rw_level);
245062306a36Sopenharmony_ci
245162306a36Sopenharmony_ci	written = __generic_file_write_iter(iocb, from);
245262306a36Sopenharmony_ci	/* buffered aio wouldn't have proper lock coverage today */
245362306a36Sopenharmony_ci	BUG_ON(written == -EIOCBQUEUED && !direct_io);
245462306a36Sopenharmony_ci
245562306a36Sopenharmony_ci	/*
245662306a36Sopenharmony_ci	 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
245762306a36Sopenharmony_ci	 * function pointer which is called when o_direct io completes so that
245862306a36Sopenharmony_ci	 * it can unlock our rw lock.
245962306a36Sopenharmony_ci	 * Unfortunately there are error cases which call end_io and others
246062306a36Sopenharmony_ci	 * that don't.  so we don't have to unlock the rw_lock if either an
246162306a36Sopenharmony_ci	 * async dio is going to do it in the future or an end_io after an
246262306a36Sopenharmony_ci	 * error has already done it.
246362306a36Sopenharmony_ci	 */
246462306a36Sopenharmony_ci	if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) {
246562306a36Sopenharmony_ci		rw_level = -1;
246662306a36Sopenharmony_ci	}
246762306a36Sopenharmony_ci
246862306a36Sopenharmony_ci	if (unlikely(written <= 0))
246962306a36Sopenharmony_ci		goto out;
247062306a36Sopenharmony_ci
247162306a36Sopenharmony_ci	if (((file->f_flags & O_DSYNC) && !direct_io) ||
247262306a36Sopenharmony_ci	    IS_SYNC(inode)) {
247362306a36Sopenharmony_ci		ret = filemap_fdatawrite_range(file->f_mapping,
247462306a36Sopenharmony_ci					       iocb->ki_pos - written,
247562306a36Sopenharmony_ci					       iocb->ki_pos - 1);
247662306a36Sopenharmony_ci		if (ret < 0)
247762306a36Sopenharmony_ci			written = ret;
247862306a36Sopenharmony_ci
247962306a36Sopenharmony_ci		if (!ret) {
248062306a36Sopenharmony_ci			ret = jbd2_journal_force_commit(osb->journal->j_journal);
248162306a36Sopenharmony_ci			if (ret < 0)
248262306a36Sopenharmony_ci				written = ret;
248362306a36Sopenharmony_ci		}
248462306a36Sopenharmony_ci
248562306a36Sopenharmony_ci		if (!ret)
248662306a36Sopenharmony_ci			ret = filemap_fdatawait_range(file->f_mapping,
248762306a36Sopenharmony_ci						      iocb->ki_pos - written,
248862306a36Sopenharmony_ci						      iocb->ki_pos - 1);
248962306a36Sopenharmony_ci	}
249062306a36Sopenharmony_ci
249162306a36Sopenharmony_ciout:
249262306a36Sopenharmony_ci	if (saved_ki_complete)
249362306a36Sopenharmony_ci		xchg(&iocb->ki_complete, saved_ki_complete);
249462306a36Sopenharmony_ci
249562306a36Sopenharmony_ci	if (rw_level != -1)
249662306a36Sopenharmony_ci		ocfs2_rw_unlock(inode, rw_level);
249762306a36Sopenharmony_ci
249862306a36Sopenharmony_ciout_mutex:
249962306a36Sopenharmony_ci	inode_unlock(inode);
250062306a36Sopenharmony_ci
250162306a36Sopenharmony_ci	if (written)
250262306a36Sopenharmony_ci		ret = written;
250362306a36Sopenharmony_ci	return ret;
250462306a36Sopenharmony_ci}
250562306a36Sopenharmony_ci
250662306a36Sopenharmony_cistatic ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
250762306a36Sopenharmony_ci				   struct iov_iter *to)
250862306a36Sopenharmony_ci{
250962306a36Sopenharmony_ci	int ret = 0, rw_level = -1, lock_level = 0;
251062306a36Sopenharmony_ci	struct file *filp = iocb->ki_filp;
251162306a36Sopenharmony_ci	struct inode *inode = file_inode(filp);
251262306a36Sopenharmony_ci	int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
251362306a36Sopenharmony_ci	int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
251462306a36Sopenharmony_ci
251562306a36Sopenharmony_ci	trace_ocfs2_file_read_iter(inode, filp, filp->f_path.dentry,
251662306a36Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
251762306a36Sopenharmony_ci			filp->f_path.dentry->d_name.len,
251862306a36Sopenharmony_ci			filp->f_path.dentry->d_name.name,
251962306a36Sopenharmony_ci			to->nr_segs);	/* GRRRRR */
252062306a36Sopenharmony_ci
252162306a36Sopenharmony_ci
252262306a36Sopenharmony_ci	if (!inode) {
252362306a36Sopenharmony_ci		ret = -EINVAL;
252462306a36Sopenharmony_ci		mlog_errno(ret);
252562306a36Sopenharmony_ci		goto bail;
252662306a36Sopenharmony_ci	}
252762306a36Sopenharmony_ci
252862306a36Sopenharmony_ci	if (!direct_io && nowait)
252962306a36Sopenharmony_ci		return -EOPNOTSUPP;
253062306a36Sopenharmony_ci
253162306a36Sopenharmony_ci	/*
253262306a36Sopenharmony_ci	 * buffered reads protect themselves in ->read_folio().  O_DIRECT reads
253362306a36Sopenharmony_ci	 * need locks to protect pending reads from racing with truncate.
253462306a36Sopenharmony_ci	 */
253562306a36Sopenharmony_ci	if (direct_io) {
253662306a36Sopenharmony_ci		if (nowait)
253762306a36Sopenharmony_ci			ret = ocfs2_try_rw_lock(inode, 0);
253862306a36Sopenharmony_ci		else
253962306a36Sopenharmony_ci			ret = ocfs2_rw_lock(inode, 0);
254062306a36Sopenharmony_ci
254162306a36Sopenharmony_ci		if (ret < 0) {
254262306a36Sopenharmony_ci			if (ret != -EAGAIN)
254362306a36Sopenharmony_ci				mlog_errno(ret);
254462306a36Sopenharmony_ci			goto bail;
254562306a36Sopenharmony_ci		}
254662306a36Sopenharmony_ci		rw_level = 0;
254762306a36Sopenharmony_ci		/* communicate with ocfs2_dio_end_io */
254862306a36Sopenharmony_ci		ocfs2_iocb_set_rw_locked(iocb, rw_level);
254962306a36Sopenharmony_ci	}
255062306a36Sopenharmony_ci
255162306a36Sopenharmony_ci	/*
255262306a36Sopenharmony_ci	 * We're fine letting folks race truncates and extending
255362306a36Sopenharmony_ci	 * writes with read across the cluster, just like they can
255462306a36Sopenharmony_ci	 * locally. Hence no rw_lock during read.
255562306a36Sopenharmony_ci	 *
255662306a36Sopenharmony_ci	 * Take and drop the meta data lock to update inode fields
255762306a36Sopenharmony_ci	 * like i_size. This allows the checks down below
255862306a36Sopenharmony_ci	 * copy_splice_read() a chance of actually working.
255962306a36Sopenharmony_ci	 */
256062306a36Sopenharmony_ci	ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
256162306a36Sopenharmony_ci				     !nowait);
256262306a36Sopenharmony_ci	if (ret < 0) {
256362306a36Sopenharmony_ci		if (ret != -EAGAIN)
256462306a36Sopenharmony_ci			mlog_errno(ret);
256562306a36Sopenharmony_ci		goto bail;
256662306a36Sopenharmony_ci	}
256762306a36Sopenharmony_ci	ocfs2_inode_unlock(inode, lock_level);
256862306a36Sopenharmony_ci
256962306a36Sopenharmony_ci	ret = generic_file_read_iter(iocb, to);
257062306a36Sopenharmony_ci	trace_generic_file_read_iter_ret(ret);
257162306a36Sopenharmony_ci
257262306a36Sopenharmony_ci	/* buffered aio wouldn't have proper lock coverage today */
257362306a36Sopenharmony_ci	BUG_ON(ret == -EIOCBQUEUED && !direct_io);
257462306a36Sopenharmony_ci
257562306a36Sopenharmony_ci	/* see ocfs2_file_write_iter */
257662306a36Sopenharmony_ci	if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
257762306a36Sopenharmony_ci		rw_level = -1;
257862306a36Sopenharmony_ci	}
257962306a36Sopenharmony_ci
258062306a36Sopenharmony_cibail:
258162306a36Sopenharmony_ci	if (rw_level != -1)
258262306a36Sopenharmony_ci		ocfs2_rw_unlock(inode, rw_level);
258362306a36Sopenharmony_ci
258462306a36Sopenharmony_ci	return ret;
258562306a36Sopenharmony_ci}
258662306a36Sopenharmony_ci
258762306a36Sopenharmony_cistatic ssize_t ocfs2_file_splice_read(struct file *in, loff_t *ppos,
258862306a36Sopenharmony_ci				      struct pipe_inode_info *pipe,
258962306a36Sopenharmony_ci				      size_t len, unsigned int flags)
259062306a36Sopenharmony_ci{
259162306a36Sopenharmony_ci	struct inode *inode = file_inode(in);
259262306a36Sopenharmony_ci	ssize_t ret = 0;
259362306a36Sopenharmony_ci	int lock_level = 0;
259462306a36Sopenharmony_ci
259562306a36Sopenharmony_ci	trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
259662306a36Sopenharmony_ci				     (unsigned long long)OCFS2_I(inode)->ip_blkno,
259762306a36Sopenharmony_ci				     in->f_path.dentry->d_name.len,
259862306a36Sopenharmony_ci				     in->f_path.dentry->d_name.name,
259962306a36Sopenharmony_ci				     flags);
260062306a36Sopenharmony_ci
260162306a36Sopenharmony_ci	/*
260262306a36Sopenharmony_ci	 * We're fine letting folks race truncates and extending writes with
260362306a36Sopenharmony_ci	 * read across the cluster, just like they can locally.  Hence no
260462306a36Sopenharmony_ci	 * rw_lock during read.
260562306a36Sopenharmony_ci	 *
260662306a36Sopenharmony_ci	 * Take and drop the meta data lock to update inode fields like i_size.
260762306a36Sopenharmony_ci	 * This allows the checks down below filemap_splice_read() a chance of
260862306a36Sopenharmony_ci	 * actually working.
260962306a36Sopenharmony_ci	 */
261062306a36Sopenharmony_ci	ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level, 1);
261162306a36Sopenharmony_ci	if (ret < 0) {
261262306a36Sopenharmony_ci		if (ret != -EAGAIN)
261362306a36Sopenharmony_ci			mlog_errno(ret);
261462306a36Sopenharmony_ci		goto bail;
261562306a36Sopenharmony_ci	}
261662306a36Sopenharmony_ci	ocfs2_inode_unlock(inode, lock_level);
261762306a36Sopenharmony_ci
261862306a36Sopenharmony_ci	ret = filemap_splice_read(in, ppos, pipe, len, flags);
261962306a36Sopenharmony_ci	trace_filemap_splice_read_ret(ret);
262062306a36Sopenharmony_cibail:
262162306a36Sopenharmony_ci	return ret;
262262306a36Sopenharmony_ci}
262362306a36Sopenharmony_ci
262462306a36Sopenharmony_ci/* Refer generic_file_llseek_unlocked() */
262562306a36Sopenharmony_cistatic loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
262662306a36Sopenharmony_ci{
262762306a36Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
262862306a36Sopenharmony_ci	int ret = 0;
262962306a36Sopenharmony_ci
263062306a36Sopenharmony_ci	inode_lock(inode);
263162306a36Sopenharmony_ci
263262306a36Sopenharmony_ci	switch (whence) {
263362306a36Sopenharmony_ci	case SEEK_SET:
263462306a36Sopenharmony_ci		break;
263562306a36Sopenharmony_ci	case SEEK_END:
263662306a36Sopenharmony_ci		/* SEEK_END requires the OCFS2 inode lock for the file
263762306a36Sopenharmony_ci		 * because it references the file's size.
263862306a36Sopenharmony_ci		 */
263962306a36Sopenharmony_ci		ret = ocfs2_inode_lock(inode, NULL, 0);
264062306a36Sopenharmony_ci		if (ret < 0) {
264162306a36Sopenharmony_ci			mlog_errno(ret);
264262306a36Sopenharmony_ci			goto out;
264362306a36Sopenharmony_ci		}
264462306a36Sopenharmony_ci		offset += i_size_read(inode);
264562306a36Sopenharmony_ci		ocfs2_inode_unlock(inode, 0);
264662306a36Sopenharmony_ci		break;
264762306a36Sopenharmony_ci	case SEEK_CUR:
264862306a36Sopenharmony_ci		if (offset == 0) {
264962306a36Sopenharmony_ci			offset = file->f_pos;
265062306a36Sopenharmony_ci			goto out;
265162306a36Sopenharmony_ci		}
265262306a36Sopenharmony_ci		offset += file->f_pos;
265362306a36Sopenharmony_ci		break;
265462306a36Sopenharmony_ci	case SEEK_DATA:
265562306a36Sopenharmony_ci	case SEEK_HOLE:
265662306a36Sopenharmony_ci		ret = ocfs2_seek_data_hole_offset(file, &offset, whence);
265762306a36Sopenharmony_ci		if (ret)
265862306a36Sopenharmony_ci			goto out;
265962306a36Sopenharmony_ci		break;
266062306a36Sopenharmony_ci	default:
266162306a36Sopenharmony_ci		ret = -EINVAL;
266262306a36Sopenharmony_ci		goto out;
266362306a36Sopenharmony_ci	}
266462306a36Sopenharmony_ci
266562306a36Sopenharmony_ci	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
266662306a36Sopenharmony_ci
266762306a36Sopenharmony_ciout:
266862306a36Sopenharmony_ci	inode_unlock(inode);
266962306a36Sopenharmony_ci	if (ret)
267062306a36Sopenharmony_ci		return ret;
267162306a36Sopenharmony_ci	return offset;
267262306a36Sopenharmony_ci}
267362306a36Sopenharmony_ci
267462306a36Sopenharmony_cistatic loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in,
267562306a36Sopenharmony_ci				     struct file *file_out, loff_t pos_out,
267662306a36Sopenharmony_ci				     loff_t len, unsigned int remap_flags)
267762306a36Sopenharmony_ci{
267862306a36Sopenharmony_ci	struct inode *inode_in = file_inode(file_in);
267962306a36Sopenharmony_ci	struct inode *inode_out = file_inode(file_out);
268062306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
268162306a36Sopenharmony_ci	struct buffer_head *in_bh = NULL, *out_bh = NULL;
268262306a36Sopenharmony_ci	bool same_inode = (inode_in == inode_out);
268362306a36Sopenharmony_ci	loff_t remapped = 0;
268462306a36Sopenharmony_ci	ssize_t ret;
268562306a36Sopenharmony_ci
268662306a36Sopenharmony_ci	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
268762306a36Sopenharmony_ci		return -EINVAL;
268862306a36Sopenharmony_ci	if (!ocfs2_refcount_tree(osb))
268962306a36Sopenharmony_ci		return -EOPNOTSUPP;
269062306a36Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
269162306a36Sopenharmony_ci		return -EROFS;
269262306a36Sopenharmony_ci
269362306a36Sopenharmony_ci	/* Lock both files against IO */
269462306a36Sopenharmony_ci	ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
269562306a36Sopenharmony_ci	if (ret)
269662306a36Sopenharmony_ci		return ret;
269762306a36Sopenharmony_ci
269862306a36Sopenharmony_ci	/* Check file eligibility and prepare for block sharing. */
269962306a36Sopenharmony_ci	ret = -EINVAL;
270062306a36Sopenharmony_ci	if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
270162306a36Sopenharmony_ci	    (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
270262306a36Sopenharmony_ci		goto out_unlock;
270362306a36Sopenharmony_ci
270462306a36Sopenharmony_ci	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
270562306a36Sopenharmony_ci			&len, remap_flags);
270662306a36Sopenharmony_ci	if (ret < 0 || len == 0)
270762306a36Sopenharmony_ci		goto out_unlock;
270862306a36Sopenharmony_ci
270962306a36Sopenharmony_ci	/* Lock out changes to the allocation maps and remap. */
271062306a36Sopenharmony_ci	down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
271162306a36Sopenharmony_ci	if (!same_inode)
271262306a36Sopenharmony_ci		down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
271362306a36Sopenharmony_ci				  SINGLE_DEPTH_NESTING);
271462306a36Sopenharmony_ci
271562306a36Sopenharmony_ci	/* Zap any page cache for the destination file's range. */
271662306a36Sopenharmony_ci	truncate_inode_pages_range(&inode_out->i_data,
271762306a36Sopenharmony_ci				   round_down(pos_out, PAGE_SIZE),
271862306a36Sopenharmony_ci				   round_up(pos_out + len, PAGE_SIZE) - 1);
271962306a36Sopenharmony_ci
272062306a36Sopenharmony_ci	remapped = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in,
272162306a36Sopenharmony_ci			inode_out, out_bh, pos_out, len);
272262306a36Sopenharmony_ci	up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
272362306a36Sopenharmony_ci	if (!same_inode)
272462306a36Sopenharmony_ci		up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
272562306a36Sopenharmony_ci	if (remapped < 0) {
272662306a36Sopenharmony_ci		ret = remapped;
272762306a36Sopenharmony_ci		mlog_errno(ret);
272862306a36Sopenharmony_ci		goto out_unlock;
272962306a36Sopenharmony_ci	}
273062306a36Sopenharmony_ci
273162306a36Sopenharmony_ci	/*
273262306a36Sopenharmony_ci	 * Empty the extent map so that we may get the right extent
273362306a36Sopenharmony_ci	 * record from the disk.
273462306a36Sopenharmony_ci	 */
273562306a36Sopenharmony_ci	ocfs2_extent_map_trunc(inode_in, 0);
273662306a36Sopenharmony_ci	ocfs2_extent_map_trunc(inode_out, 0);
273762306a36Sopenharmony_ci
273862306a36Sopenharmony_ci	ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
273962306a36Sopenharmony_ci	if (ret) {
274062306a36Sopenharmony_ci		mlog_errno(ret);
274162306a36Sopenharmony_ci		goto out_unlock;
274262306a36Sopenharmony_ci	}
274362306a36Sopenharmony_ci
274462306a36Sopenharmony_ciout_unlock:
274562306a36Sopenharmony_ci	ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
274662306a36Sopenharmony_ci	return remapped > 0 ? remapped : ret;
274762306a36Sopenharmony_ci}
274862306a36Sopenharmony_ci
274962306a36Sopenharmony_ciconst struct inode_operations ocfs2_file_iops = {
275062306a36Sopenharmony_ci	.setattr	= ocfs2_setattr,
275162306a36Sopenharmony_ci	.getattr	= ocfs2_getattr,
275262306a36Sopenharmony_ci	.permission	= ocfs2_permission,
275362306a36Sopenharmony_ci	.listxattr	= ocfs2_listxattr,
275462306a36Sopenharmony_ci	.fiemap		= ocfs2_fiemap,
275562306a36Sopenharmony_ci	.get_inode_acl	= ocfs2_iop_get_acl,
275662306a36Sopenharmony_ci	.set_acl	= ocfs2_iop_set_acl,
275762306a36Sopenharmony_ci	.fileattr_get	= ocfs2_fileattr_get,
275862306a36Sopenharmony_ci	.fileattr_set	= ocfs2_fileattr_set,
275962306a36Sopenharmony_ci};
276062306a36Sopenharmony_ci
276162306a36Sopenharmony_ciconst struct inode_operations ocfs2_special_file_iops = {
276262306a36Sopenharmony_ci	.setattr	= ocfs2_setattr,
276362306a36Sopenharmony_ci	.getattr	= ocfs2_getattr,
276462306a36Sopenharmony_ci	.permission	= ocfs2_permission,
276562306a36Sopenharmony_ci	.get_inode_acl	= ocfs2_iop_get_acl,
276662306a36Sopenharmony_ci	.set_acl	= ocfs2_iop_set_acl,
276762306a36Sopenharmony_ci};
276862306a36Sopenharmony_ci
276962306a36Sopenharmony_ci/*
277062306a36Sopenharmony_ci * Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with
277162306a36Sopenharmony_ci * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks!
277262306a36Sopenharmony_ci */
277362306a36Sopenharmony_ciconst struct file_operations ocfs2_fops = {
277462306a36Sopenharmony_ci	.llseek		= ocfs2_file_llseek,
277562306a36Sopenharmony_ci	.mmap		= ocfs2_mmap,
277662306a36Sopenharmony_ci	.fsync		= ocfs2_sync_file,
277762306a36Sopenharmony_ci	.release	= ocfs2_file_release,
277862306a36Sopenharmony_ci	.open		= ocfs2_file_open,
277962306a36Sopenharmony_ci	.read_iter	= ocfs2_file_read_iter,
278062306a36Sopenharmony_ci	.write_iter	= ocfs2_file_write_iter,
278162306a36Sopenharmony_ci	.unlocked_ioctl	= ocfs2_ioctl,
278262306a36Sopenharmony_ci#ifdef CONFIG_COMPAT
278362306a36Sopenharmony_ci	.compat_ioctl   = ocfs2_compat_ioctl,
278462306a36Sopenharmony_ci#endif
278562306a36Sopenharmony_ci	.lock		= ocfs2_lock,
278662306a36Sopenharmony_ci	.flock		= ocfs2_flock,
278762306a36Sopenharmony_ci	.splice_read	= ocfs2_file_splice_read,
278862306a36Sopenharmony_ci	.splice_write	= iter_file_splice_write,
278962306a36Sopenharmony_ci	.fallocate	= ocfs2_fallocate,
279062306a36Sopenharmony_ci	.remap_file_range = ocfs2_remap_file_range,
279162306a36Sopenharmony_ci};
279262306a36Sopenharmony_ci
279362306a36Sopenharmony_ciWRAP_DIR_ITER(ocfs2_readdir) // FIXME!
279462306a36Sopenharmony_ciconst struct file_operations ocfs2_dops = {
279562306a36Sopenharmony_ci	.llseek		= generic_file_llseek,
279662306a36Sopenharmony_ci	.read		= generic_read_dir,
279762306a36Sopenharmony_ci	.iterate_shared	= shared_ocfs2_readdir,
279862306a36Sopenharmony_ci	.fsync		= ocfs2_sync_file,
279962306a36Sopenharmony_ci	.release	= ocfs2_dir_release,
280062306a36Sopenharmony_ci	.open		= ocfs2_dir_open,
280162306a36Sopenharmony_ci	.unlocked_ioctl	= ocfs2_ioctl,
280262306a36Sopenharmony_ci#ifdef CONFIG_COMPAT
280362306a36Sopenharmony_ci	.compat_ioctl   = ocfs2_compat_ioctl,
280462306a36Sopenharmony_ci#endif
280562306a36Sopenharmony_ci	.lock		= ocfs2_lock,
280662306a36Sopenharmony_ci	.flock		= ocfs2_flock,
280762306a36Sopenharmony_ci};
280862306a36Sopenharmony_ci
280962306a36Sopenharmony_ci/*
281062306a36Sopenharmony_ci * POSIX-lockless variants of our file_operations.
281162306a36Sopenharmony_ci *
281262306a36Sopenharmony_ci * These will be used if the underlying cluster stack does not support
281362306a36Sopenharmony_ci * posix file locking, if the user passes the "localflocks" mount
281462306a36Sopenharmony_ci * option, or if we have a local-only fs.
281562306a36Sopenharmony_ci *
281662306a36Sopenharmony_ci * ocfs2_flock is in here because all stacks handle UNIX file locks,
281762306a36Sopenharmony_ci * so we still want it in the case of no stack support for
281862306a36Sopenharmony_ci * plocks. Internally, it will do the right thing when asked to ignore
281962306a36Sopenharmony_ci * the cluster.
282062306a36Sopenharmony_ci */
282162306a36Sopenharmony_ciconst struct file_operations ocfs2_fops_no_plocks = {
282262306a36Sopenharmony_ci	.llseek		= ocfs2_file_llseek,
282362306a36Sopenharmony_ci	.mmap		= ocfs2_mmap,
282462306a36Sopenharmony_ci	.fsync		= ocfs2_sync_file,
282562306a36Sopenharmony_ci	.release	= ocfs2_file_release,
282662306a36Sopenharmony_ci	.open		= ocfs2_file_open,
282762306a36Sopenharmony_ci	.read_iter	= ocfs2_file_read_iter,
282862306a36Sopenharmony_ci	.write_iter	= ocfs2_file_write_iter,
282962306a36Sopenharmony_ci	.unlocked_ioctl	= ocfs2_ioctl,
283062306a36Sopenharmony_ci#ifdef CONFIG_COMPAT
283162306a36Sopenharmony_ci	.compat_ioctl   = ocfs2_compat_ioctl,
283262306a36Sopenharmony_ci#endif
283362306a36Sopenharmony_ci	.flock		= ocfs2_flock,
283462306a36Sopenharmony_ci	.splice_read	= filemap_splice_read,
283562306a36Sopenharmony_ci	.splice_write	= iter_file_splice_write,
283662306a36Sopenharmony_ci	.fallocate	= ocfs2_fallocate,
283762306a36Sopenharmony_ci	.remap_file_range = ocfs2_remap_file_range,
283862306a36Sopenharmony_ci};
283962306a36Sopenharmony_ci
284062306a36Sopenharmony_ciconst struct file_operations ocfs2_dops_no_plocks = {
284162306a36Sopenharmony_ci	.llseek		= generic_file_llseek,
284262306a36Sopenharmony_ci	.read		= generic_read_dir,
284362306a36Sopenharmony_ci	.iterate_shared	= shared_ocfs2_readdir,
284462306a36Sopenharmony_ci	.fsync		= ocfs2_sync_file,
284562306a36Sopenharmony_ci	.release	= ocfs2_dir_release,
284662306a36Sopenharmony_ci	.open		= ocfs2_dir_open,
284762306a36Sopenharmony_ci	.unlocked_ioctl	= ocfs2_ioctl,
284862306a36Sopenharmony_ci#ifdef CONFIG_COMPAT
284962306a36Sopenharmony_ci	.compat_ioctl   = ocfs2_compat_ioctl,
285062306a36Sopenharmony_ci#endif
285162306a36Sopenharmony_ci	.flock		= ocfs2_flock,
285262306a36Sopenharmony_ci};
2853