18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*-
38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0:
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * file.c
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * File open, close, extend, truncate
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/capability.h>
138c2ecf20Sopenharmony_ci#include <linux/fs.h>
148c2ecf20Sopenharmony_ci#include <linux/types.h>
158c2ecf20Sopenharmony_ci#include <linux/slab.h>
168c2ecf20Sopenharmony_ci#include <linux/highmem.h>
178c2ecf20Sopenharmony_ci#include <linux/pagemap.h>
188c2ecf20Sopenharmony_ci#include <linux/uio.h>
198c2ecf20Sopenharmony_ci#include <linux/sched.h>
208c2ecf20Sopenharmony_ci#include <linux/splice.h>
218c2ecf20Sopenharmony_ci#include <linux/mount.h>
228c2ecf20Sopenharmony_ci#include <linux/writeback.h>
238c2ecf20Sopenharmony_ci#include <linux/falloc.h>
248c2ecf20Sopenharmony_ci#include <linux/quotaops.h>
258c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
268c2ecf20Sopenharmony_ci#include <linux/backing-dev.h>
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci#include <cluster/masklog.h>
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci#include "ocfs2.h"
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci#include "alloc.h"
338c2ecf20Sopenharmony_ci#include "aops.h"
348c2ecf20Sopenharmony_ci#include "dir.h"
358c2ecf20Sopenharmony_ci#include "dlmglue.h"
368c2ecf20Sopenharmony_ci#include "extent_map.h"
378c2ecf20Sopenharmony_ci#include "file.h"
388c2ecf20Sopenharmony_ci#include "sysfile.h"
398c2ecf20Sopenharmony_ci#include "inode.h"
408c2ecf20Sopenharmony_ci#include "ioctl.h"
418c2ecf20Sopenharmony_ci#include "journal.h"
428c2ecf20Sopenharmony_ci#include "locks.h"
438c2ecf20Sopenharmony_ci#include "mmap.h"
448c2ecf20Sopenharmony_ci#include "suballoc.h"
458c2ecf20Sopenharmony_ci#include "super.h"
468c2ecf20Sopenharmony_ci#include "xattr.h"
478c2ecf20Sopenharmony_ci#include "acl.h"
488c2ecf20Sopenharmony_ci#include "quota.h"
498c2ecf20Sopenharmony_ci#include "refcounttree.h"
508c2ecf20Sopenharmony_ci#include "ocfs2_trace.h"
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci#include "buffer_head_io.h"
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistatic int ocfs2_init_file_private(struct inode *inode, struct file *file)
558c2ecf20Sopenharmony_ci{
568c2ecf20Sopenharmony_ci	struct ocfs2_file_private *fp;
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL);
598c2ecf20Sopenharmony_ci	if (!fp)
608c2ecf20Sopenharmony_ci		return -ENOMEM;
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	fp->fp_file = file;
638c2ecf20Sopenharmony_ci	mutex_init(&fp->fp_mutex);
648c2ecf20Sopenharmony_ci	ocfs2_file_lock_res_init(&fp->fp_flock, fp);
658c2ecf20Sopenharmony_ci	file->private_data = fp;
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	return 0;
688c2ecf20Sopenharmony_ci}
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_cistatic void ocfs2_free_file_private(struct inode *inode, struct file *file)
718c2ecf20Sopenharmony_ci{
728c2ecf20Sopenharmony_ci	struct ocfs2_file_private *fp = file->private_data;
738c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	if (fp) {
768c2ecf20Sopenharmony_ci		ocfs2_simple_drop_lockres(osb, &fp->fp_flock);
778c2ecf20Sopenharmony_ci		ocfs2_lock_res_free(&fp->fp_flock);
788c2ecf20Sopenharmony_ci		kfree(fp);
798c2ecf20Sopenharmony_ci		file->private_data = NULL;
808c2ecf20Sopenharmony_ci	}
818c2ecf20Sopenharmony_ci}
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_cistatic int ocfs2_file_open(struct inode *inode, struct file *file)
848c2ecf20Sopenharmony_ci{
858c2ecf20Sopenharmony_ci	int status;
868c2ecf20Sopenharmony_ci	int mode = file->f_flags;
878c2ecf20Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci	trace_ocfs2_file_open(inode, file, file->f_path.dentry,
908c2ecf20Sopenharmony_ci			      (unsigned long long)oi->ip_blkno,
918c2ecf20Sopenharmony_ci			      file->f_path.dentry->d_name.len,
928c2ecf20Sopenharmony_ci			      file->f_path.dentry->d_name.name, mode);
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	if (file->f_mode & FMODE_WRITE) {
958c2ecf20Sopenharmony_ci		status = dquot_initialize(inode);
968c2ecf20Sopenharmony_ci		if (status)
978c2ecf20Sopenharmony_ci			goto leave;
988c2ecf20Sopenharmony_ci	}
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	spin_lock(&oi->ip_lock);
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci	/* Check that the inode hasn't been wiped from disk by another
1038c2ecf20Sopenharmony_ci	 * node. If it hasn't then we're safe as long as we hold the
1048c2ecf20Sopenharmony_ci	 * spin lock until our increment of open count. */
1058c2ecf20Sopenharmony_ci	if (oi->ip_flags & OCFS2_INODE_DELETED) {
1068c2ecf20Sopenharmony_ci		spin_unlock(&oi->ip_lock);
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci		status = -ENOENT;
1098c2ecf20Sopenharmony_ci		goto leave;
1108c2ecf20Sopenharmony_ci	}
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	if (mode & O_DIRECT)
1138c2ecf20Sopenharmony_ci		oi->ip_flags |= OCFS2_INODE_OPEN_DIRECT;
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	oi->ip_open_count++;
1168c2ecf20Sopenharmony_ci	spin_unlock(&oi->ip_lock);
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci	status = ocfs2_init_file_private(inode, file);
1198c2ecf20Sopenharmony_ci	if (status) {
1208c2ecf20Sopenharmony_ci		/*
1218c2ecf20Sopenharmony_ci		 * We want to set open count back if we're failing the
1228c2ecf20Sopenharmony_ci		 * open.
1238c2ecf20Sopenharmony_ci		 */
1248c2ecf20Sopenharmony_ci		spin_lock(&oi->ip_lock);
1258c2ecf20Sopenharmony_ci		oi->ip_open_count--;
1268c2ecf20Sopenharmony_ci		spin_unlock(&oi->ip_lock);
1278c2ecf20Sopenharmony_ci	}
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	file->f_mode |= FMODE_NOWAIT;
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_cileave:
1328c2ecf20Sopenharmony_ci	return status;
1338c2ecf20Sopenharmony_ci}
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_cistatic int ocfs2_file_release(struct inode *inode, struct file *file)
1368c2ecf20Sopenharmony_ci{
1378c2ecf20Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	spin_lock(&oi->ip_lock);
1408c2ecf20Sopenharmony_ci	if (!--oi->ip_open_count)
1418c2ecf20Sopenharmony_ci		oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	trace_ocfs2_file_release(inode, file, file->f_path.dentry,
1448c2ecf20Sopenharmony_ci				 oi->ip_blkno,
1458c2ecf20Sopenharmony_ci				 file->f_path.dentry->d_name.len,
1468c2ecf20Sopenharmony_ci				 file->f_path.dentry->d_name.name,
1478c2ecf20Sopenharmony_ci				 oi->ip_open_count);
1488c2ecf20Sopenharmony_ci	spin_unlock(&oi->ip_lock);
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	ocfs2_free_file_private(inode, file);
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	return 0;
1538c2ecf20Sopenharmony_ci}
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_cistatic int ocfs2_dir_open(struct inode *inode, struct file *file)
1568c2ecf20Sopenharmony_ci{
1578c2ecf20Sopenharmony_ci	return ocfs2_init_file_private(inode, file);
1588c2ecf20Sopenharmony_ci}
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_cistatic int ocfs2_dir_release(struct inode *inode, struct file *file)
1618c2ecf20Sopenharmony_ci{
1628c2ecf20Sopenharmony_ci	ocfs2_free_file_private(inode, file);
1638c2ecf20Sopenharmony_ci	return 0;
1648c2ecf20Sopenharmony_ci}
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_cistatic int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
1678c2ecf20Sopenharmony_ci			   int datasync)
1688c2ecf20Sopenharmony_ci{
1698c2ecf20Sopenharmony_ci	int err = 0;
1708c2ecf20Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
1718c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1728c2ecf20Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1738c2ecf20Sopenharmony_ci	journal_t *journal = osb->journal->j_journal;
1748c2ecf20Sopenharmony_ci	int ret;
1758c2ecf20Sopenharmony_ci	tid_t commit_tid;
1768c2ecf20Sopenharmony_ci	bool needs_barrier = false;
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	trace_ocfs2_sync_file(inode, file, file->f_path.dentry,
1798c2ecf20Sopenharmony_ci			      oi->ip_blkno,
1808c2ecf20Sopenharmony_ci			      file->f_path.dentry->d_name.len,
1818c2ecf20Sopenharmony_ci			      file->f_path.dentry->d_name.name,
1828c2ecf20Sopenharmony_ci			      (unsigned long long)datasync);
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
1858c2ecf20Sopenharmony_ci		return -EROFS;
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	err = file_write_and_wait_range(file, start, end);
1888c2ecf20Sopenharmony_ci	if (err)
1898c2ecf20Sopenharmony_ci		return err;
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	commit_tid = datasync ? oi->i_datasync_tid : oi->i_sync_tid;
1928c2ecf20Sopenharmony_ci	if (journal->j_flags & JBD2_BARRIER &&
1938c2ecf20Sopenharmony_ci	    !jbd2_trans_will_send_data_barrier(journal, commit_tid))
1948c2ecf20Sopenharmony_ci		needs_barrier = true;
1958c2ecf20Sopenharmony_ci	err = jbd2_complete_transaction(journal, commit_tid);
1968c2ecf20Sopenharmony_ci	if (needs_barrier) {
1978c2ecf20Sopenharmony_ci		ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
1988c2ecf20Sopenharmony_ci		if (!err)
1998c2ecf20Sopenharmony_ci			err = ret;
2008c2ecf20Sopenharmony_ci	}
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	if (err)
2038c2ecf20Sopenharmony_ci		mlog_errno(err);
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci	return (err < 0) ? -EIO : 0;
2068c2ecf20Sopenharmony_ci}
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ciint ocfs2_should_update_atime(struct inode *inode,
2098c2ecf20Sopenharmony_ci			      struct vfsmount *vfsmnt)
2108c2ecf20Sopenharmony_ci{
2118c2ecf20Sopenharmony_ci	struct timespec64 now;
2128c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
2158c2ecf20Sopenharmony_ci		return 0;
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci	if ((inode->i_flags & S_NOATIME) ||
2188c2ecf20Sopenharmony_ci	    ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)))
2198c2ecf20Sopenharmony_ci		return 0;
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	/*
2228c2ecf20Sopenharmony_ci	 * We can be called with no vfsmnt structure - NFSD will
2238c2ecf20Sopenharmony_ci	 * sometimes do this.
2248c2ecf20Sopenharmony_ci	 *
2258c2ecf20Sopenharmony_ci	 * Note that our action here is different than touch_atime() -
2268c2ecf20Sopenharmony_ci	 * if we can't tell whether this is a noatime mount, then we
2278c2ecf20Sopenharmony_ci	 * don't know whether to trust the value of s_atime_quantum.
2288c2ecf20Sopenharmony_ci	 */
2298c2ecf20Sopenharmony_ci	if (vfsmnt == NULL)
2308c2ecf20Sopenharmony_ci		return 0;
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci	if ((vfsmnt->mnt_flags & MNT_NOATIME) ||
2338c2ecf20Sopenharmony_ci	    ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
2348c2ecf20Sopenharmony_ci		return 0;
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	if (vfsmnt->mnt_flags & MNT_RELATIME) {
2378c2ecf20Sopenharmony_ci		if ((timespec64_compare(&inode->i_atime, &inode->i_mtime) <= 0) ||
2388c2ecf20Sopenharmony_ci		    (timespec64_compare(&inode->i_atime, &inode->i_ctime) <= 0))
2398c2ecf20Sopenharmony_ci			return 1;
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci		return 0;
2428c2ecf20Sopenharmony_ci	}
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci	now = current_time(inode);
2458c2ecf20Sopenharmony_ci	if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum))
2468c2ecf20Sopenharmony_ci		return 0;
2478c2ecf20Sopenharmony_ci	else
2488c2ecf20Sopenharmony_ci		return 1;
2498c2ecf20Sopenharmony_ci}
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ciint ocfs2_update_inode_atime(struct inode *inode,
2528c2ecf20Sopenharmony_ci			     struct buffer_head *bh)
2538c2ecf20Sopenharmony_ci{
2548c2ecf20Sopenharmony_ci	int ret;
2558c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2568c2ecf20Sopenharmony_ci	handle_t *handle;
2578c2ecf20Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data;
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
2608c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
2618c2ecf20Sopenharmony_ci		ret = PTR_ERR(handle);
2628c2ecf20Sopenharmony_ci		mlog_errno(ret);
2638c2ecf20Sopenharmony_ci		goto out;
2648c2ecf20Sopenharmony_ci	}
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
2678c2ecf20Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
2688c2ecf20Sopenharmony_ci	if (ret) {
2698c2ecf20Sopenharmony_ci		mlog_errno(ret);
2708c2ecf20Sopenharmony_ci		goto out_commit;
2718c2ecf20Sopenharmony_ci	}
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	/*
2748c2ecf20Sopenharmony_ci	 * Don't use ocfs2_mark_inode_dirty() here as we don't always
2758c2ecf20Sopenharmony_ci	 * have i_mutex to guard against concurrent changes to other
2768c2ecf20Sopenharmony_ci	 * inode fields.
2778c2ecf20Sopenharmony_ci	 */
2788c2ecf20Sopenharmony_ci	inode->i_atime = current_time(inode);
2798c2ecf20Sopenharmony_ci	di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
2808c2ecf20Sopenharmony_ci	di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
2818c2ecf20Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 0);
2828c2ecf20Sopenharmony_ci	ocfs2_journal_dirty(handle, bh);
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ciout_commit:
2858c2ecf20Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
2868c2ecf20Sopenharmony_ciout:
2878c2ecf20Sopenharmony_ci	return ret;
2888c2ecf20Sopenharmony_ci}
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ciint ocfs2_set_inode_size(handle_t *handle,
2918c2ecf20Sopenharmony_ci				struct inode *inode,
2928c2ecf20Sopenharmony_ci				struct buffer_head *fe_bh,
2938c2ecf20Sopenharmony_ci				u64 new_i_size)
2948c2ecf20Sopenharmony_ci{
2958c2ecf20Sopenharmony_ci	int status;
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci	i_size_write(inode, new_i_size);
2988c2ecf20Sopenharmony_ci	inode->i_blocks = ocfs2_inode_sector_count(inode);
2998c2ecf20Sopenharmony_ci	inode->i_ctime = inode->i_mtime = current_time(inode);
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
3028c2ecf20Sopenharmony_ci	if (status < 0) {
3038c2ecf20Sopenharmony_ci		mlog_errno(status);
3048c2ecf20Sopenharmony_ci		goto bail;
3058c2ecf20Sopenharmony_ci	}
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_cibail:
3088c2ecf20Sopenharmony_ci	return status;
3098c2ecf20Sopenharmony_ci}
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ciint ocfs2_simple_size_update(struct inode *inode,
3128c2ecf20Sopenharmony_ci			     struct buffer_head *di_bh,
3138c2ecf20Sopenharmony_ci			     u64 new_i_size)
3148c2ecf20Sopenharmony_ci{
3158c2ecf20Sopenharmony_ci	int ret;
3168c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3178c2ecf20Sopenharmony_ci	handle_t *handle = NULL;
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
3208c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
3218c2ecf20Sopenharmony_ci		ret = PTR_ERR(handle);
3228c2ecf20Sopenharmony_ci		mlog_errno(ret);
3238c2ecf20Sopenharmony_ci		goto out;
3248c2ecf20Sopenharmony_ci	}
3258c2ecf20Sopenharmony_ci
3268c2ecf20Sopenharmony_ci	ret = ocfs2_set_inode_size(handle, inode, di_bh,
3278c2ecf20Sopenharmony_ci				   new_i_size);
3288c2ecf20Sopenharmony_ci	if (ret < 0)
3298c2ecf20Sopenharmony_ci		mlog_errno(ret);
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 0);
3328c2ecf20Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
3338c2ecf20Sopenharmony_ciout:
3348c2ecf20Sopenharmony_ci	return ret;
3358c2ecf20Sopenharmony_ci}
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_cistatic int ocfs2_cow_file_pos(struct inode *inode,
3388c2ecf20Sopenharmony_ci			      struct buffer_head *fe_bh,
3398c2ecf20Sopenharmony_ci			      u64 offset)
3408c2ecf20Sopenharmony_ci{
3418c2ecf20Sopenharmony_ci	int status;
3428c2ecf20Sopenharmony_ci	u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
3438c2ecf20Sopenharmony_ci	unsigned int num_clusters = 0;
3448c2ecf20Sopenharmony_ci	unsigned int ext_flags = 0;
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_ci	/*
3478c2ecf20Sopenharmony_ci	 * If the new offset is aligned to the range of the cluster, there is
3488c2ecf20Sopenharmony_ci	 * no space for ocfs2_zero_range_for_truncate to fill, so no need to
3498c2ecf20Sopenharmony_ci	 * CoW either.
3508c2ecf20Sopenharmony_ci	 */
3518c2ecf20Sopenharmony_ci	if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0)
3528c2ecf20Sopenharmony_ci		return 0;
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	status = ocfs2_get_clusters(inode, cpos, &phys,
3558c2ecf20Sopenharmony_ci				    &num_clusters, &ext_flags);
3568c2ecf20Sopenharmony_ci	if (status) {
3578c2ecf20Sopenharmony_ci		mlog_errno(status);
3588c2ecf20Sopenharmony_ci		goto out;
3598c2ecf20Sopenharmony_ci	}
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
3628c2ecf20Sopenharmony_ci		goto out;
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_ci	return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ciout:
3678c2ecf20Sopenharmony_ci	return status;
3688c2ecf20Sopenharmony_ci}
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_cistatic int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
3718c2ecf20Sopenharmony_ci				     struct inode *inode,
3728c2ecf20Sopenharmony_ci				     struct buffer_head *fe_bh,
3738c2ecf20Sopenharmony_ci				     u64 new_i_size)
3748c2ecf20Sopenharmony_ci{
3758c2ecf20Sopenharmony_ci	int status;
3768c2ecf20Sopenharmony_ci	handle_t *handle;
3778c2ecf20Sopenharmony_ci	struct ocfs2_dinode *di;
3788c2ecf20Sopenharmony_ci	u64 cluster_bytes;
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	/*
3818c2ecf20Sopenharmony_ci	 * We need to CoW the cluster contains the offset if it is reflinked
3828c2ecf20Sopenharmony_ci	 * since we will call ocfs2_zero_range_for_truncate later which will
3838c2ecf20Sopenharmony_ci	 * write "0" from offset to the end of the cluster.
3848c2ecf20Sopenharmony_ci	 */
3858c2ecf20Sopenharmony_ci	status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size);
3868c2ecf20Sopenharmony_ci	if (status) {
3878c2ecf20Sopenharmony_ci		mlog_errno(status);
3888c2ecf20Sopenharmony_ci		return status;
3898c2ecf20Sopenharmony_ci	}
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci	/* TODO: This needs to actually orphan the inode in this
3928c2ecf20Sopenharmony_ci	 * transaction. */
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
3958c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
3968c2ecf20Sopenharmony_ci		status = PTR_ERR(handle);
3978c2ecf20Sopenharmony_ci		mlog_errno(status);
3988c2ecf20Sopenharmony_ci		goto out;
3998c2ecf20Sopenharmony_ci	}
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
4028c2ecf20Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_WRITE);
4038c2ecf20Sopenharmony_ci	if (status < 0) {
4048c2ecf20Sopenharmony_ci		mlog_errno(status);
4058c2ecf20Sopenharmony_ci		goto out_commit;
4068c2ecf20Sopenharmony_ci	}
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci	/*
4098c2ecf20Sopenharmony_ci	 * Do this before setting i_size.
4108c2ecf20Sopenharmony_ci	 */
4118c2ecf20Sopenharmony_ci	cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size);
4128c2ecf20Sopenharmony_ci	status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size,
4138c2ecf20Sopenharmony_ci					       cluster_bytes);
4148c2ecf20Sopenharmony_ci	if (status) {
4158c2ecf20Sopenharmony_ci		mlog_errno(status);
4168c2ecf20Sopenharmony_ci		goto out_commit;
4178c2ecf20Sopenharmony_ci	}
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ci	i_size_write(inode, new_i_size);
4208c2ecf20Sopenharmony_ci	inode->i_ctime = inode->i_mtime = current_time(inode);
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci	di = (struct ocfs2_dinode *) fe_bh->b_data;
4238c2ecf20Sopenharmony_ci	di->i_size = cpu_to_le64(new_i_size);
4248c2ecf20Sopenharmony_ci	di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
4258c2ecf20Sopenharmony_ci	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
4268c2ecf20Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 0);
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	ocfs2_journal_dirty(handle, fe_bh);
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ciout_commit:
4318c2ecf20Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
4328c2ecf20Sopenharmony_ciout:
4338c2ecf20Sopenharmony_ci	return status;
4348c2ecf20Sopenharmony_ci}
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ciint ocfs2_truncate_file(struct inode *inode,
4378c2ecf20Sopenharmony_ci			       struct buffer_head *di_bh,
4388c2ecf20Sopenharmony_ci			       u64 new_i_size)
4398c2ecf20Sopenharmony_ci{
4408c2ecf20Sopenharmony_ci	int status = 0;
4418c2ecf20Sopenharmony_ci	struct ocfs2_dinode *fe = NULL;
4428c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci	/* We trust di_bh because it comes from ocfs2_inode_lock(), which
4458c2ecf20Sopenharmony_ci	 * already validated it */
4468c2ecf20Sopenharmony_ci	fe = (struct ocfs2_dinode *) di_bh->b_data;
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci	trace_ocfs2_truncate_file((unsigned long long)OCFS2_I(inode)->ip_blkno,
4498c2ecf20Sopenharmony_ci				  (unsigned long long)le64_to_cpu(fe->i_size),
4508c2ecf20Sopenharmony_ci				  (unsigned long long)new_i_size);
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
4538c2ecf20Sopenharmony_ci			"Inode %llu, inode i_size = %lld != di "
4548c2ecf20Sopenharmony_ci			"i_size = %llu, i_flags = 0x%x\n",
4558c2ecf20Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
4568c2ecf20Sopenharmony_ci			i_size_read(inode),
4578c2ecf20Sopenharmony_ci			(unsigned long long)le64_to_cpu(fe->i_size),
4588c2ecf20Sopenharmony_ci			le32_to_cpu(fe->i_flags));
4598c2ecf20Sopenharmony_ci
4608c2ecf20Sopenharmony_ci	if (new_i_size > le64_to_cpu(fe->i_size)) {
4618c2ecf20Sopenharmony_ci		trace_ocfs2_truncate_file_error(
4628c2ecf20Sopenharmony_ci			(unsigned long long)le64_to_cpu(fe->i_size),
4638c2ecf20Sopenharmony_ci			(unsigned long long)new_i_size);
4648c2ecf20Sopenharmony_ci		status = -EINVAL;
4658c2ecf20Sopenharmony_ci		mlog_errno(status);
4668c2ecf20Sopenharmony_ci		goto bail;
4678c2ecf20Sopenharmony_ci	}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	down_write(&OCFS2_I(inode)->ip_alloc_sem);
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci	ocfs2_resv_discard(&osb->osb_la_resmap,
4728c2ecf20Sopenharmony_ci			   &OCFS2_I(inode)->ip_la_data_resv);
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	/*
4758c2ecf20Sopenharmony_ci	 * The inode lock forced other nodes to sync and drop their
4768c2ecf20Sopenharmony_ci	 * pages, which (correctly) happens even if we have a truncate
4778c2ecf20Sopenharmony_ci	 * without allocation change - ocfs2 cluster sizes can be much
4788c2ecf20Sopenharmony_ci	 * greater than page size, so we have to truncate them
4798c2ecf20Sopenharmony_ci	 * anyway.
4808c2ecf20Sopenharmony_ci	 */
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_ci	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
4838c2ecf20Sopenharmony_ci		unmap_mapping_range(inode->i_mapping,
4848c2ecf20Sopenharmony_ci				    new_i_size + PAGE_SIZE - 1, 0, 1);
4858c2ecf20Sopenharmony_ci		truncate_inode_pages(inode->i_mapping, new_i_size);
4868c2ecf20Sopenharmony_ci		status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
4878c2ecf20Sopenharmony_ci					       i_size_read(inode), 1);
4888c2ecf20Sopenharmony_ci		if (status)
4898c2ecf20Sopenharmony_ci			mlog_errno(status);
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_ci		goto bail_unlock_sem;
4928c2ecf20Sopenharmony_ci	}
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_ci	/* alright, we're going to need to do a full blown alloc size
4958c2ecf20Sopenharmony_ci	 * change. Orphan the inode so that recovery can complete the
4968c2ecf20Sopenharmony_ci	 * truncate if necessary. This does the task of marking
4978c2ecf20Sopenharmony_ci	 * i_size. */
4988c2ecf20Sopenharmony_ci	status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
4998c2ecf20Sopenharmony_ci	if (status < 0) {
5008c2ecf20Sopenharmony_ci		mlog_errno(status);
5018c2ecf20Sopenharmony_ci		goto bail_unlock_sem;
5028c2ecf20Sopenharmony_ci	}
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci	unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
5058c2ecf20Sopenharmony_ci	truncate_inode_pages(inode->i_mapping, new_i_size);
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_ci	status = ocfs2_commit_truncate(osb, inode, di_bh);
5088c2ecf20Sopenharmony_ci	if (status < 0) {
5098c2ecf20Sopenharmony_ci		mlog_errno(status);
5108c2ecf20Sopenharmony_ci		goto bail_unlock_sem;
5118c2ecf20Sopenharmony_ci	}
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	/* TODO: orphan dir cleanup here. */
5148c2ecf20Sopenharmony_cibail_unlock_sem:
5158c2ecf20Sopenharmony_ci	up_write(&OCFS2_I(inode)->ip_alloc_sem);
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_cibail:
5188c2ecf20Sopenharmony_ci	if (!status && OCFS2_I(inode)->ip_clusters == 0)
5198c2ecf20Sopenharmony_ci		status = ocfs2_try_remove_refcount_tree(inode, di_bh);
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	return status;
5228c2ecf20Sopenharmony_ci}
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci/*
5258c2ecf20Sopenharmony_ci * extend file allocation only here.
5268c2ecf20Sopenharmony_ci * we'll update all the disk stuff, and oip->alloc_size
5278c2ecf20Sopenharmony_ci *
5288c2ecf20Sopenharmony_ci * expect stuff to be locked, a transaction started and enough data /
5298c2ecf20Sopenharmony_ci * metadata reservations in the contexts.
5308c2ecf20Sopenharmony_ci *
5318c2ecf20Sopenharmony_ci * Will return -EAGAIN, and a reason if a restart is needed.
5328c2ecf20Sopenharmony_ci * If passed in, *reason will always be set, even in error.
5338c2ecf20Sopenharmony_ci */
5348c2ecf20Sopenharmony_ciint ocfs2_add_inode_data(struct ocfs2_super *osb,
5358c2ecf20Sopenharmony_ci			 struct inode *inode,
5368c2ecf20Sopenharmony_ci			 u32 *logical_offset,
5378c2ecf20Sopenharmony_ci			 u32 clusters_to_add,
5388c2ecf20Sopenharmony_ci			 int mark_unwritten,
5398c2ecf20Sopenharmony_ci			 struct buffer_head *fe_bh,
5408c2ecf20Sopenharmony_ci			 handle_t *handle,
5418c2ecf20Sopenharmony_ci			 struct ocfs2_alloc_context *data_ac,
5428c2ecf20Sopenharmony_ci			 struct ocfs2_alloc_context *meta_ac,
5438c2ecf20Sopenharmony_ci			 enum ocfs2_alloc_restarted *reason_ret)
5448c2ecf20Sopenharmony_ci{
5458c2ecf20Sopenharmony_ci	int ret;
5468c2ecf20Sopenharmony_ci	struct ocfs2_extent_tree et;
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh);
5498c2ecf20Sopenharmony_ci	ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
5508c2ecf20Sopenharmony_ci					  clusters_to_add, mark_unwritten,
5518c2ecf20Sopenharmony_ci					  data_ac, meta_ac, reason_ret);
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci	return ret;
5548c2ecf20Sopenharmony_ci}
5558c2ecf20Sopenharmony_ci
5568c2ecf20Sopenharmony_cistatic int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
5578c2ecf20Sopenharmony_ci				   u32 clusters_to_add, int mark_unwritten)
5588c2ecf20Sopenharmony_ci{
5598c2ecf20Sopenharmony_ci	int status = 0;
5608c2ecf20Sopenharmony_ci	int restart_func = 0;
5618c2ecf20Sopenharmony_ci	int credits;
5628c2ecf20Sopenharmony_ci	u32 prev_clusters;
5638c2ecf20Sopenharmony_ci	struct buffer_head *bh = NULL;
5648c2ecf20Sopenharmony_ci	struct ocfs2_dinode *fe = NULL;
5658c2ecf20Sopenharmony_ci	handle_t *handle = NULL;
5668c2ecf20Sopenharmony_ci	struct ocfs2_alloc_context *data_ac = NULL;
5678c2ecf20Sopenharmony_ci	struct ocfs2_alloc_context *meta_ac = NULL;
5688c2ecf20Sopenharmony_ci	enum ocfs2_alloc_restarted why = RESTART_NONE;
5698c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5708c2ecf20Sopenharmony_ci	struct ocfs2_extent_tree et;
5718c2ecf20Sopenharmony_ci	int did_quota = 0;
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci	/*
5748c2ecf20Sopenharmony_ci	 * Unwritten extent only exists for file systems which
5758c2ecf20Sopenharmony_ci	 * support holes.
5768c2ecf20Sopenharmony_ci	 */
5778c2ecf20Sopenharmony_ci	BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
5788c2ecf20Sopenharmony_ci
5798c2ecf20Sopenharmony_ci	status = ocfs2_read_inode_block(inode, &bh);
5808c2ecf20Sopenharmony_ci	if (status < 0) {
5818c2ecf20Sopenharmony_ci		mlog_errno(status);
5828c2ecf20Sopenharmony_ci		goto leave;
5838c2ecf20Sopenharmony_ci	}
5848c2ecf20Sopenharmony_ci	fe = (struct ocfs2_dinode *) bh->b_data;
5858c2ecf20Sopenharmony_ci
5868c2ecf20Sopenharmony_cirestart_all:
5878c2ecf20Sopenharmony_ci	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
5908c2ecf20Sopenharmony_ci	status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
5918c2ecf20Sopenharmony_ci				       &data_ac, &meta_ac);
5928c2ecf20Sopenharmony_ci	if (status) {
5938c2ecf20Sopenharmony_ci		mlog_errno(status);
5948c2ecf20Sopenharmony_ci		goto leave;
5958c2ecf20Sopenharmony_ci	}
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_ci	credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list);
5988c2ecf20Sopenharmony_ci	handle = ocfs2_start_trans(osb, credits);
5998c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
6008c2ecf20Sopenharmony_ci		status = PTR_ERR(handle);
6018c2ecf20Sopenharmony_ci		handle = NULL;
6028c2ecf20Sopenharmony_ci		mlog_errno(status);
6038c2ecf20Sopenharmony_ci		goto leave;
6048c2ecf20Sopenharmony_ci	}
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_cirestarted_transaction:
6078c2ecf20Sopenharmony_ci	trace_ocfs2_extend_allocation(
6088c2ecf20Sopenharmony_ci		(unsigned long long)OCFS2_I(inode)->ip_blkno,
6098c2ecf20Sopenharmony_ci		(unsigned long long)i_size_read(inode),
6108c2ecf20Sopenharmony_ci		le32_to_cpu(fe->i_clusters), clusters_to_add,
6118c2ecf20Sopenharmony_ci		why, restart_func);
6128c2ecf20Sopenharmony_ci
6138c2ecf20Sopenharmony_ci	status = dquot_alloc_space_nodirty(inode,
6148c2ecf20Sopenharmony_ci			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
6158c2ecf20Sopenharmony_ci	if (status)
6168c2ecf20Sopenharmony_ci		goto leave;
6178c2ecf20Sopenharmony_ci	did_quota = 1;
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci	/* reserve a write to the file entry early on - that we if we
6208c2ecf20Sopenharmony_ci	 * run out of credits in the allocation path, we can still
6218c2ecf20Sopenharmony_ci	 * update i_size. */
6228c2ecf20Sopenharmony_ci	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
6238c2ecf20Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_WRITE);
6248c2ecf20Sopenharmony_ci	if (status < 0) {
6258c2ecf20Sopenharmony_ci		mlog_errno(status);
6268c2ecf20Sopenharmony_ci		goto leave;
6278c2ecf20Sopenharmony_ci	}
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci	prev_clusters = OCFS2_I(inode)->ip_clusters;
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci	status = ocfs2_add_inode_data(osb,
6328c2ecf20Sopenharmony_ci				      inode,
6338c2ecf20Sopenharmony_ci				      &logical_start,
6348c2ecf20Sopenharmony_ci				      clusters_to_add,
6358c2ecf20Sopenharmony_ci				      mark_unwritten,
6368c2ecf20Sopenharmony_ci				      bh,
6378c2ecf20Sopenharmony_ci				      handle,
6388c2ecf20Sopenharmony_ci				      data_ac,
6398c2ecf20Sopenharmony_ci				      meta_ac,
6408c2ecf20Sopenharmony_ci				      &why);
6418c2ecf20Sopenharmony_ci	if ((status < 0) && (status != -EAGAIN)) {
6428c2ecf20Sopenharmony_ci		if (status != -ENOSPC)
6438c2ecf20Sopenharmony_ci			mlog_errno(status);
6448c2ecf20Sopenharmony_ci		goto leave;
6458c2ecf20Sopenharmony_ci	}
6468c2ecf20Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 1);
6478c2ecf20Sopenharmony_ci	ocfs2_journal_dirty(handle, bh);
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_ci	spin_lock(&OCFS2_I(inode)->ip_lock);
6508c2ecf20Sopenharmony_ci	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
6518c2ecf20Sopenharmony_ci	spin_unlock(&OCFS2_I(inode)->ip_lock);
6528c2ecf20Sopenharmony_ci	/* Release unused quota reservation */
6538c2ecf20Sopenharmony_ci	dquot_free_space(inode,
6548c2ecf20Sopenharmony_ci			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
6558c2ecf20Sopenharmony_ci	did_quota = 0;
6568c2ecf20Sopenharmony_ci
6578c2ecf20Sopenharmony_ci	if (why != RESTART_NONE && clusters_to_add) {
6588c2ecf20Sopenharmony_ci		if (why == RESTART_META) {
6598c2ecf20Sopenharmony_ci			restart_func = 1;
6608c2ecf20Sopenharmony_ci			status = 0;
6618c2ecf20Sopenharmony_ci		} else {
6628c2ecf20Sopenharmony_ci			BUG_ON(why != RESTART_TRANS);
6638c2ecf20Sopenharmony_ci
6648c2ecf20Sopenharmony_ci			status = ocfs2_allocate_extend_trans(handle, 1);
6658c2ecf20Sopenharmony_ci			if (status < 0) {
6668c2ecf20Sopenharmony_ci				/* handle still has to be committed at
6678c2ecf20Sopenharmony_ci				 * this point. */
6688c2ecf20Sopenharmony_ci				status = -ENOMEM;
6698c2ecf20Sopenharmony_ci				mlog_errno(status);
6708c2ecf20Sopenharmony_ci				goto leave;
6718c2ecf20Sopenharmony_ci			}
6728c2ecf20Sopenharmony_ci			goto restarted_transaction;
6738c2ecf20Sopenharmony_ci		}
6748c2ecf20Sopenharmony_ci	}
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci	trace_ocfs2_extend_allocation_end(OCFS2_I(inode)->ip_blkno,
6778c2ecf20Sopenharmony_ci	     le32_to_cpu(fe->i_clusters),
6788c2ecf20Sopenharmony_ci	     (unsigned long long)le64_to_cpu(fe->i_size),
6798c2ecf20Sopenharmony_ci	     OCFS2_I(inode)->ip_clusters,
6808c2ecf20Sopenharmony_ci	     (unsigned long long)i_size_read(inode));
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_cileave:
6838c2ecf20Sopenharmony_ci	if (status < 0 && did_quota)
6848c2ecf20Sopenharmony_ci		dquot_free_space(inode,
6858c2ecf20Sopenharmony_ci			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
6868c2ecf20Sopenharmony_ci	if (handle) {
6878c2ecf20Sopenharmony_ci		ocfs2_commit_trans(osb, handle);
6888c2ecf20Sopenharmony_ci		handle = NULL;
6898c2ecf20Sopenharmony_ci	}
6908c2ecf20Sopenharmony_ci	if (data_ac) {
6918c2ecf20Sopenharmony_ci		ocfs2_free_alloc_context(data_ac);
6928c2ecf20Sopenharmony_ci		data_ac = NULL;
6938c2ecf20Sopenharmony_ci	}
6948c2ecf20Sopenharmony_ci	if (meta_ac) {
6958c2ecf20Sopenharmony_ci		ocfs2_free_alloc_context(meta_ac);
6968c2ecf20Sopenharmony_ci		meta_ac = NULL;
6978c2ecf20Sopenharmony_ci	}
6988c2ecf20Sopenharmony_ci	if ((!status) && restart_func) {
6998c2ecf20Sopenharmony_ci		restart_func = 0;
7008c2ecf20Sopenharmony_ci		goto restart_all;
7018c2ecf20Sopenharmony_ci	}
7028c2ecf20Sopenharmony_ci	brelse(bh);
7038c2ecf20Sopenharmony_ci	bh = NULL;
7048c2ecf20Sopenharmony_ci
7058c2ecf20Sopenharmony_ci	return status;
7068c2ecf20Sopenharmony_ci}
7078c2ecf20Sopenharmony_ci
7088c2ecf20Sopenharmony_ci/*
7098c2ecf20Sopenharmony_ci * While a write will already be ordering the data, a truncate will not.
7108c2ecf20Sopenharmony_ci * Thus, we need to explicitly order the zeroed pages.
7118c2ecf20Sopenharmony_ci */
7128c2ecf20Sopenharmony_cistatic handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
7138c2ecf20Sopenharmony_ci						      struct buffer_head *di_bh,
7148c2ecf20Sopenharmony_ci						      loff_t start_byte,
7158c2ecf20Sopenharmony_ci						      loff_t length)
7168c2ecf20Sopenharmony_ci{
7178c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7188c2ecf20Sopenharmony_ci	handle_t *handle = NULL;
7198c2ecf20Sopenharmony_ci	int ret = 0;
7208c2ecf20Sopenharmony_ci
7218c2ecf20Sopenharmony_ci	if (!ocfs2_should_order_data(inode))
7228c2ecf20Sopenharmony_ci		goto out;
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
7258c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
7268c2ecf20Sopenharmony_ci		ret = -ENOMEM;
7278c2ecf20Sopenharmony_ci		mlog_errno(ret);
7288c2ecf20Sopenharmony_ci		goto out;
7298c2ecf20Sopenharmony_ci	}
7308c2ecf20Sopenharmony_ci
7318c2ecf20Sopenharmony_ci	ret = ocfs2_jbd2_inode_add_write(handle, inode, start_byte, length);
7328c2ecf20Sopenharmony_ci	if (ret < 0) {
7338c2ecf20Sopenharmony_ci		mlog_errno(ret);
7348c2ecf20Sopenharmony_ci		goto out;
7358c2ecf20Sopenharmony_ci	}
7368c2ecf20Sopenharmony_ci
7378c2ecf20Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
7388c2ecf20Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
7398c2ecf20Sopenharmony_ci	if (ret)
7408c2ecf20Sopenharmony_ci		mlog_errno(ret);
7418c2ecf20Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 1);
7428c2ecf20Sopenharmony_ci
7438c2ecf20Sopenharmony_ciout:
7448c2ecf20Sopenharmony_ci	if (ret) {
7458c2ecf20Sopenharmony_ci		if (!IS_ERR(handle))
7468c2ecf20Sopenharmony_ci			ocfs2_commit_trans(osb, handle);
7478c2ecf20Sopenharmony_ci		handle = ERR_PTR(ret);
7488c2ecf20Sopenharmony_ci	}
7498c2ecf20Sopenharmony_ci	return handle;
7508c2ecf20Sopenharmony_ci}
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_ci/* Some parts of this taken from generic_cont_expand, which turned out
7538c2ecf20Sopenharmony_ci * to be too fragile to do exactly what we need without us having to
7548c2ecf20Sopenharmony_ci * worry about recursive locking in ->write_begin() and ->write_end(). */
7558c2ecf20Sopenharmony_cistatic int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
7568c2ecf20Sopenharmony_ci				 u64 abs_to, struct buffer_head *di_bh)
7578c2ecf20Sopenharmony_ci{
7588c2ecf20Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
7598c2ecf20Sopenharmony_ci	struct page *page;
7608c2ecf20Sopenharmony_ci	unsigned long index = abs_from >> PAGE_SHIFT;
7618c2ecf20Sopenharmony_ci	handle_t *handle;
7628c2ecf20Sopenharmony_ci	int ret = 0;
7638c2ecf20Sopenharmony_ci	unsigned zero_from, zero_to, block_start, block_end;
7648c2ecf20Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
7658c2ecf20Sopenharmony_ci
7668c2ecf20Sopenharmony_ci	BUG_ON(abs_from >= abs_to);
7678c2ecf20Sopenharmony_ci	BUG_ON(abs_to > (((u64)index + 1) << PAGE_SHIFT));
7688c2ecf20Sopenharmony_ci	BUG_ON(abs_from & (inode->i_blkbits - 1));
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_ci	handle = ocfs2_zero_start_ordered_transaction(inode, di_bh,
7718c2ecf20Sopenharmony_ci						      abs_from,
7728c2ecf20Sopenharmony_ci						      abs_to - abs_from);
7738c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
7748c2ecf20Sopenharmony_ci		ret = PTR_ERR(handle);
7758c2ecf20Sopenharmony_ci		goto out;
7768c2ecf20Sopenharmony_ci	}
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci	page = find_or_create_page(mapping, index, GFP_NOFS);
7798c2ecf20Sopenharmony_ci	if (!page) {
7808c2ecf20Sopenharmony_ci		ret = -ENOMEM;
7818c2ecf20Sopenharmony_ci		mlog_errno(ret);
7828c2ecf20Sopenharmony_ci		goto out_commit_trans;
7838c2ecf20Sopenharmony_ci	}
7848c2ecf20Sopenharmony_ci
7858c2ecf20Sopenharmony_ci	/* Get the offsets within the page that we want to zero */
7868c2ecf20Sopenharmony_ci	zero_from = abs_from & (PAGE_SIZE - 1);
7878c2ecf20Sopenharmony_ci	zero_to = abs_to & (PAGE_SIZE - 1);
7888c2ecf20Sopenharmony_ci	if (!zero_to)
7898c2ecf20Sopenharmony_ci		zero_to = PAGE_SIZE;
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	trace_ocfs2_write_zero_page(
7928c2ecf20Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
7938c2ecf20Sopenharmony_ci			(unsigned long long)abs_from,
7948c2ecf20Sopenharmony_ci			(unsigned long long)abs_to,
7958c2ecf20Sopenharmony_ci			index, zero_from, zero_to);
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci	/* We know that zero_from is block aligned */
7988c2ecf20Sopenharmony_ci	for (block_start = zero_from; block_start < zero_to;
7998c2ecf20Sopenharmony_ci	     block_start = block_end) {
8008c2ecf20Sopenharmony_ci		block_end = block_start + i_blocksize(inode);
8018c2ecf20Sopenharmony_ci
8028c2ecf20Sopenharmony_ci		/*
8038c2ecf20Sopenharmony_ci		 * block_start is block-aligned.  Bump it by one to force
8048c2ecf20Sopenharmony_ci		 * __block_write_begin and block_commit_write to zero the
8058c2ecf20Sopenharmony_ci		 * whole block.
8068c2ecf20Sopenharmony_ci		 */
8078c2ecf20Sopenharmony_ci		ret = __block_write_begin(page, block_start + 1, 0,
8088c2ecf20Sopenharmony_ci					  ocfs2_get_block);
8098c2ecf20Sopenharmony_ci		if (ret < 0) {
8108c2ecf20Sopenharmony_ci			mlog_errno(ret);
8118c2ecf20Sopenharmony_ci			goto out_unlock;
8128c2ecf20Sopenharmony_ci		}
8138c2ecf20Sopenharmony_ci
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci		/* must not update i_size! */
8168c2ecf20Sopenharmony_ci		ret = block_commit_write(page, block_start + 1,
8178c2ecf20Sopenharmony_ci					 block_start + 1);
8188c2ecf20Sopenharmony_ci		if (ret < 0)
8198c2ecf20Sopenharmony_ci			mlog_errno(ret);
8208c2ecf20Sopenharmony_ci		else
8218c2ecf20Sopenharmony_ci			ret = 0;
8228c2ecf20Sopenharmony_ci	}
8238c2ecf20Sopenharmony_ci
8248c2ecf20Sopenharmony_ci	/*
8258c2ecf20Sopenharmony_ci	 * fs-writeback will release the dirty pages without page lock
8268c2ecf20Sopenharmony_ci	 * whose offset are over inode size, the release happens at
8278c2ecf20Sopenharmony_ci	 * block_write_full_page().
8288c2ecf20Sopenharmony_ci	 */
8298c2ecf20Sopenharmony_ci	i_size_write(inode, abs_to);
8308c2ecf20Sopenharmony_ci	inode->i_blocks = ocfs2_inode_sector_count(inode);
8318c2ecf20Sopenharmony_ci	di->i_size = cpu_to_le64((u64)i_size_read(inode));
8328c2ecf20Sopenharmony_ci	inode->i_mtime = inode->i_ctime = current_time(inode);
8338c2ecf20Sopenharmony_ci	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
8348c2ecf20Sopenharmony_ci	di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
8358c2ecf20Sopenharmony_ci	di->i_mtime_nsec = di->i_ctime_nsec;
8368c2ecf20Sopenharmony_ci	if (handle) {
8378c2ecf20Sopenharmony_ci		ocfs2_journal_dirty(handle, di_bh);
8388c2ecf20Sopenharmony_ci		ocfs2_update_inode_fsync_trans(handle, inode, 1);
8398c2ecf20Sopenharmony_ci	}
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ciout_unlock:
8428c2ecf20Sopenharmony_ci	unlock_page(page);
8438c2ecf20Sopenharmony_ci	put_page(page);
8448c2ecf20Sopenharmony_ciout_commit_trans:
8458c2ecf20Sopenharmony_ci	if (handle)
8468c2ecf20Sopenharmony_ci		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
8478c2ecf20Sopenharmony_ciout:
8488c2ecf20Sopenharmony_ci	return ret;
8498c2ecf20Sopenharmony_ci}
8508c2ecf20Sopenharmony_ci
8518c2ecf20Sopenharmony_ci/*
8528c2ecf20Sopenharmony_ci * Find the next range to zero.  We do this in terms of bytes because
8538c2ecf20Sopenharmony_ci * that's what ocfs2_zero_extend() wants, and it is dealing with the
8548c2ecf20Sopenharmony_ci * pagecache.  We may return multiple extents.
8558c2ecf20Sopenharmony_ci *
8568c2ecf20Sopenharmony_ci * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
8578c2ecf20Sopenharmony_ci * needs to be zeroed.  range_start and range_end return the next zeroing
8588c2ecf20Sopenharmony_ci * range.  A subsequent call should pass the previous range_end as its
8598c2ecf20Sopenharmony_ci * zero_start.  If range_end is 0, there's nothing to do.
8608c2ecf20Sopenharmony_ci *
8618c2ecf20Sopenharmony_ci * Unwritten extents are skipped over.  Refcounted extents are CoWd.
8628c2ecf20Sopenharmony_ci */
8638c2ecf20Sopenharmony_cistatic int ocfs2_zero_extend_get_range(struct inode *inode,
8648c2ecf20Sopenharmony_ci				       struct buffer_head *di_bh,
8658c2ecf20Sopenharmony_ci				       u64 zero_start, u64 zero_end,
8668c2ecf20Sopenharmony_ci				       u64 *range_start, u64 *range_end)
8678c2ecf20Sopenharmony_ci{
8688c2ecf20Sopenharmony_ci	int rc = 0, needs_cow = 0;
8698c2ecf20Sopenharmony_ci	u32 p_cpos, zero_clusters = 0;
8708c2ecf20Sopenharmony_ci	u32 zero_cpos =
8718c2ecf20Sopenharmony_ci		zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
8728c2ecf20Sopenharmony_ci	u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
8738c2ecf20Sopenharmony_ci	unsigned int num_clusters = 0;
8748c2ecf20Sopenharmony_ci	unsigned int ext_flags = 0;
8758c2ecf20Sopenharmony_ci
8768c2ecf20Sopenharmony_ci	while (zero_cpos < last_cpos) {
8778c2ecf20Sopenharmony_ci		rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
8788c2ecf20Sopenharmony_ci					&num_clusters, &ext_flags);
8798c2ecf20Sopenharmony_ci		if (rc) {
8808c2ecf20Sopenharmony_ci			mlog_errno(rc);
8818c2ecf20Sopenharmony_ci			goto out;
8828c2ecf20Sopenharmony_ci		}
8838c2ecf20Sopenharmony_ci
8848c2ecf20Sopenharmony_ci		if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
8858c2ecf20Sopenharmony_ci			zero_clusters = num_clusters;
8868c2ecf20Sopenharmony_ci			if (ext_flags & OCFS2_EXT_REFCOUNTED)
8878c2ecf20Sopenharmony_ci				needs_cow = 1;
8888c2ecf20Sopenharmony_ci			break;
8898c2ecf20Sopenharmony_ci		}
8908c2ecf20Sopenharmony_ci
8918c2ecf20Sopenharmony_ci		zero_cpos += num_clusters;
8928c2ecf20Sopenharmony_ci	}
8938c2ecf20Sopenharmony_ci	if (!zero_clusters) {
8948c2ecf20Sopenharmony_ci		*range_end = 0;
8958c2ecf20Sopenharmony_ci		goto out;
8968c2ecf20Sopenharmony_ci	}
8978c2ecf20Sopenharmony_ci
8988c2ecf20Sopenharmony_ci	while ((zero_cpos + zero_clusters) < last_cpos) {
8998c2ecf20Sopenharmony_ci		rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
9008c2ecf20Sopenharmony_ci					&p_cpos, &num_clusters,
9018c2ecf20Sopenharmony_ci					&ext_flags);
9028c2ecf20Sopenharmony_ci		if (rc) {
9038c2ecf20Sopenharmony_ci			mlog_errno(rc);
9048c2ecf20Sopenharmony_ci			goto out;
9058c2ecf20Sopenharmony_ci		}
9068c2ecf20Sopenharmony_ci
9078c2ecf20Sopenharmony_ci		if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
9088c2ecf20Sopenharmony_ci			break;
9098c2ecf20Sopenharmony_ci		if (ext_flags & OCFS2_EXT_REFCOUNTED)
9108c2ecf20Sopenharmony_ci			needs_cow = 1;
9118c2ecf20Sopenharmony_ci		zero_clusters += num_clusters;
9128c2ecf20Sopenharmony_ci	}
9138c2ecf20Sopenharmony_ci	if ((zero_cpos + zero_clusters) > last_cpos)
9148c2ecf20Sopenharmony_ci		zero_clusters = last_cpos - zero_cpos;
9158c2ecf20Sopenharmony_ci
9168c2ecf20Sopenharmony_ci	if (needs_cow) {
9178c2ecf20Sopenharmony_ci		rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos,
9188c2ecf20Sopenharmony_ci					zero_clusters, UINT_MAX);
9198c2ecf20Sopenharmony_ci		if (rc) {
9208c2ecf20Sopenharmony_ci			mlog_errno(rc);
9218c2ecf20Sopenharmony_ci			goto out;
9228c2ecf20Sopenharmony_ci		}
9238c2ecf20Sopenharmony_ci	}
9248c2ecf20Sopenharmony_ci
9258c2ecf20Sopenharmony_ci	*range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
9268c2ecf20Sopenharmony_ci	*range_end = ocfs2_clusters_to_bytes(inode->i_sb,
9278c2ecf20Sopenharmony_ci					     zero_cpos + zero_clusters);
9288c2ecf20Sopenharmony_ci
9298c2ecf20Sopenharmony_ciout:
9308c2ecf20Sopenharmony_ci	return rc;
9318c2ecf20Sopenharmony_ci}
9328c2ecf20Sopenharmony_ci
9338c2ecf20Sopenharmony_ci/*
9348c2ecf20Sopenharmony_ci * Zero one range returned from ocfs2_zero_extend_get_range().  The caller
9358c2ecf20Sopenharmony_ci * has made sure that the entire range needs zeroing.
9368c2ecf20Sopenharmony_ci */
9378c2ecf20Sopenharmony_cistatic int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
9388c2ecf20Sopenharmony_ci				   u64 range_end, struct buffer_head *di_bh)
9398c2ecf20Sopenharmony_ci{
9408c2ecf20Sopenharmony_ci	int rc = 0;
9418c2ecf20Sopenharmony_ci	u64 next_pos;
9428c2ecf20Sopenharmony_ci	u64 zero_pos = range_start;
9438c2ecf20Sopenharmony_ci
9448c2ecf20Sopenharmony_ci	trace_ocfs2_zero_extend_range(
9458c2ecf20Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
9468c2ecf20Sopenharmony_ci			(unsigned long long)range_start,
9478c2ecf20Sopenharmony_ci			(unsigned long long)range_end);
9488c2ecf20Sopenharmony_ci	BUG_ON(range_start >= range_end);
9498c2ecf20Sopenharmony_ci
9508c2ecf20Sopenharmony_ci	while (zero_pos < range_end) {
9518c2ecf20Sopenharmony_ci		next_pos = (zero_pos & PAGE_MASK) + PAGE_SIZE;
9528c2ecf20Sopenharmony_ci		if (next_pos > range_end)
9538c2ecf20Sopenharmony_ci			next_pos = range_end;
9548c2ecf20Sopenharmony_ci		rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh);
9558c2ecf20Sopenharmony_ci		if (rc < 0) {
9568c2ecf20Sopenharmony_ci			mlog_errno(rc);
9578c2ecf20Sopenharmony_ci			break;
9588c2ecf20Sopenharmony_ci		}
9598c2ecf20Sopenharmony_ci		zero_pos = next_pos;
9608c2ecf20Sopenharmony_ci
9618c2ecf20Sopenharmony_ci		/*
9628c2ecf20Sopenharmony_ci		 * Very large extends have the potential to lock up
9638c2ecf20Sopenharmony_ci		 * the cpu for extended periods of time.
9648c2ecf20Sopenharmony_ci		 */
9658c2ecf20Sopenharmony_ci		cond_resched();
9668c2ecf20Sopenharmony_ci	}
9678c2ecf20Sopenharmony_ci
9688c2ecf20Sopenharmony_ci	return rc;
9698c2ecf20Sopenharmony_ci}
9708c2ecf20Sopenharmony_ci
9718c2ecf20Sopenharmony_ciint ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
9728c2ecf20Sopenharmony_ci		      loff_t zero_to_size)
9738c2ecf20Sopenharmony_ci{
9748c2ecf20Sopenharmony_ci	int ret = 0;
9758c2ecf20Sopenharmony_ci	u64 zero_start, range_start = 0, range_end = 0;
9768c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
9778c2ecf20Sopenharmony_ci
9788c2ecf20Sopenharmony_ci	zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
9798c2ecf20Sopenharmony_ci	trace_ocfs2_zero_extend((unsigned long long)OCFS2_I(inode)->ip_blkno,
9808c2ecf20Sopenharmony_ci				(unsigned long long)zero_start,
9818c2ecf20Sopenharmony_ci				(unsigned long long)i_size_read(inode));
9828c2ecf20Sopenharmony_ci	while (zero_start < zero_to_size) {
9838c2ecf20Sopenharmony_ci		ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
9848c2ecf20Sopenharmony_ci						  zero_to_size,
9858c2ecf20Sopenharmony_ci						  &range_start,
9868c2ecf20Sopenharmony_ci						  &range_end);
9878c2ecf20Sopenharmony_ci		if (ret) {
9888c2ecf20Sopenharmony_ci			mlog_errno(ret);
9898c2ecf20Sopenharmony_ci			break;
9908c2ecf20Sopenharmony_ci		}
9918c2ecf20Sopenharmony_ci		if (!range_end)
9928c2ecf20Sopenharmony_ci			break;
9938c2ecf20Sopenharmony_ci		/* Trim the ends */
9948c2ecf20Sopenharmony_ci		if (range_start < zero_start)
9958c2ecf20Sopenharmony_ci			range_start = zero_start;
9968c2ecf20Sopenharmony_ci		if (range_end > zero_to_size)
9978c2ecf20Sopenharmony_ci			range_end = zero_to_size;
9988c2ecf20Sopenharmony_ci
9998c2ecf20Sopenharmony_ci		ret = ocfs2_zero_extend_range(inode, range_start,
10008c2ecf20Sopenharmony_ci					      range_end, di_bh);
10018c2ecf20Sopenharmony_ci		if (ret) {
10028c2ecf20Sopenharmony_ci			mlog_errno(ret);
10038c2ecf20Sopenharmony_ci			break;
10048c2ecf20Sopenharmony_ci		}
10058c2ecf20Sopenharmony_ci		zero_start = range_end;
10068c2ecf20Sopenharmony_ci	}
10078c2ecf20Sopenharmony_ci
10088c2ecf20Sopenharmony_ci	return ret;
10098c2ecf20Sopenharmony_ci}
10108c2ecf20Sopenharmony_ci
10118c2ecf20Sopenharmony_ciint ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
10128c2ecf20Sopenharmony_ci			  u64 new_i_size, u64 zero_to)
10138c2ecf20Sopenharmony_ci{
10148c2ecf20Sopenharmony_ci	int ret;
10158c2ecf20Sopenharmony_ci	u32 clusters_to_add;
10168c2ecf20Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
10178c2ecf20Sopenharmony_ci
10188c2ecf20Sopenharmony_ci	/*
10198c2ecf20Sopenharmony_ci	 * Only quota files call this without a bh, and they can't be
10208c2ecf20Sopenharmony_ci	 * refcounted.
10218c2ecf20Sopenharmony_ci	 */
10228c2ecf20Sopenharmony_ci	BUG_ON(!di_bh && ocfs2_is_refcount_inode(inode));
10238c2ecf20Sopenharmony_ci	BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
10248c2ecf20Sopenharmony_ci
10258c2ecf20Sopenharmony_ci	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
10268c2ecf20Sopenharmony_ci	if (clusters_to_add < oi->ip_clusters)
10278c2ecf20Sopenharmony_ci		clusters_to_add = 0;
10288c2ecf20Sopenharmony_ci	else
10298c2ecf20Sopenharmony_ci		clusters_to_add -= oi->ip_clusters;
10308c2ecf20Sopenharmony_ci
10318c2ecf20Sopenharmony_ci	if (clusters_to_add) {
10328c2ecf20Sopenharmony_ci		ret = ocfs2_extend_allocation(inode, oi->ip_clusters,
10338c2ecf20Sopenharmony_ci					      clusters_to_add, 0);
10348c2ecf20Sopenharmony_ci		if (ret) {
10358c2ecf20Sopenharmony_ci			mlog_errno(ret);
10368c2ecf20Sopenharmony_ci			goto out;
10378c2ecf20Sopenharmony_ci		}
10388c2ecf20Sopenharmony_ci	}
10398c2ecf20Sopenharmony_ci
10408c2ecf20Sopenharmony_ci	/*
10418c2ecf20Sopenharmony_ci	 * Call this even if we don't add any clusters to the tree. We
10428c2ecf20Sopenharmony_ci	 * still need to zero the area between the old i_size and the
10438c2ecf20Sopenharmony_ci	 * new i_size.
10448c2ecf20Sopenharmony_ci	 */
10458c2ecf20Sopenharmony_ci	ret = ocfs2_zero_extend(inode, di_bh, zero_to);
10468c2ecf20Sopenharmony_ci	if (ret < 0)
10478c2ecf20Sopenharmony_ci		mlog_errno(ret);
10488c2ecf20Sopenharmony_ci
10498c2ecf20Sopenharmony_ciout:
10508c2ecf20Sopenharmony_ci	return ret;
10518c2ecf20Sopenharmony_ci}
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_cistatic int ocfs2_extend_file(struct inode *inode,
10548c2ecf20Sopenharmony_ci			     struct buffer_head *di_bh,
10558c2ecf20Sopenharmony_ci			     u64 new_i_size)
10568c2ecf20Sopenharmony_ci{
10578c2ecf20Sopenharmony_ci	int ret = 0;
10588c2ecf20Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
10598c2ecf20Sopenharmony_ci
10608c2ecf20Sopenharmony_ci	BUG_ON(!di_bh);
10618c2ecf20Sopenharmony_ci
10628c2ecf20Sopenharmony_ci	/* setattr sometimes calls us like this. */
10638c2ecf20Sopenharmony_ci	if (new_i_size == 0)
10648c2ecf20Sopenharmony_ci		goto out;
10658c2ecf20Sopenharmony_ci
10668c2ecf20Sopenharmony_ci	if (i_size_read(inode) == new_i_size)
10678c2ecf20Sopenharmony_ci		goto out;
10688c2ecf20Sopenharmony_ci	BUG_ON(new_i_size < i_size_read(inode));
10698c2ecf20Sopenharmony_ci
10708c2ecf20Sopenharmony_ci	/*
10718c2ecf20Sopenharmony_ci	 * The alloc sem blocks people in read/write from reading our
10728c2ecf20Sopenharmony_ci	 * allocation until we're done changing it. We depend on
10738c2ecf20Sopenharmony_ci	 * i_mutex to block other extend/truncate calls while we're
10748c2ecf20Sopenharmony_ci	 * here.  We even have to hold it for sparse files because there
10758c2ecf20Sopenharmony_ci	 * might be some tail zeroing.
10768c2ecf20Sopenharmony_ci	 */
10778c2ecf20Sopenharmony_ci	down_write(&oi->ip_alloc_sem);
10788c2ecf20Sopenharmony_ci
10798c2ecf20Sopenharmony_ci	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
10808c2ecf20Sopenharmony_ci		/*
10818c2ecf20Sopenharmony_ci		 * We can optimize small extends by keeping the inodes
10828c2ecf20Sopenharmony_ci		 * inline data.
10838c2ecf20Sopenharmony_ci		 */
10848c2ecf20Sopenharmony_ci		if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) {
10858c2ecf20Sopenharmony_ci			up_write(&oi->ip_alloc_sem);
10868c2ecf20Sopenharmony_ci			goto out_update_size;
10878c2ecf20Sopenharmony_ci		}
10888c2ecf20Sopenharmony_ci
10898c2ecf20Sopenharmony_ci		ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
10908c2ecf20Sopenharmony_ci		if (ret) {
10918c2ecf20Sopenharmony_ci			up_write(&oi->ip_alloc_sem);
10928c2ecf20Sopenharmony_ci			mlog_errno(ret);
10938c2ecf20Sopenharmony_ci			goto out;
10948c2ecf20Sopenharmony_ci		}
10958c2ecf20Sopenharmony_ci	}
10968c2ecf20Sopenharmony_ci
10978c2ecf20Sopenharmony_ci	if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
10988c2ecf20Sopenharmony_ci		ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
10998c2ecf20Sopenharmony_ci	else
11008c2ecf20Sopenharmony_ci		ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
11018c2ecf20Sopenharmony_ci					    new_i_size);
11028c2ecf20Sopenharmony_ci
11038c2ecf20Sopenharmony_ci	up_write(&oi->ip_alloc_sem);
11048c2ecf20Sopenharmony_ci
11058c2ecf20Sopenharmony_ci	if (ret < 0) {
11068c2ecf20Sopenharmony_ci		mlog_errno(ret);
11078c2ecf20Sopenharmony_ci		goto out;
11088c2ecf20Sopenharmony_ci	}
11098c2ecf20Sopenharmony_ci
11108c2ecf20Sopenharmony_ciout_update_size:
11118c2ecf20Sopenharmony_ci	ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
11128c2ecf20Sopenharmony_ci	if (ret < 0)
11138c2ecf20Sopenharmony_ci		mlog_errno(ret);
11148c2ecf20Sopenharmony_ci
11158c2ecf20Sopenharmony_ciout:
11168c2ecf20Sopenharmony_ci	return ret;
11178c2ecf20Sopenharmony_ci}
11188c2ecf20Sopenharmony_ci
11198c2ecf20Sopenharmony_ciint ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
11208c2ecf20Sopenharmony_ci{
11218c2ecf20Sopenharmony_ci	int status = 0, size_change;
11228c2ecf20Sopenharmony_ci	int inode_locked = 0;
11238c2ecf20Sopenharmony_ci	struct inode *inode = d_inode(dentry);
11248c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
11258c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(sb);
11268c2ecf20Sopenharmony_ci	struct buffer_head *bh = NULL;
11278c2ecf20Sopenharmony_ci	handle_t *handle = NULL;
11288c2ecf20Sopenharmony_ci	struct dquot *transfer_to[MAXQUOTAS] = { };
11298c2ecf20Sopenharmony_ci	int qtype;
11308c2ecf20Sopenharmony_ci	int had_lock;
11318c2ecf20Sopenharmony_ci	struct ocfs2_lock_holder oh;
11328c2ecf20Sopenharmony_ci
11338c2ecf20Sopenharmony_ci	trace_ocfs2_setattr(inode, dentry,
11348c2ecf20Sopenharmony_ci			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
11358c2ecf20Sopenharmony_ci			    dentry->d_name.len, dentry->d_name.name,
11368c2ecf20Sopenharmony_ci			    attr->ia_valid, attr->ia_mode,
11378c2ecf20Sopenharmony_ci			    from_kuid(&init_user_ns, attr->ia_uid),
11388c2ecf20Sopenharmony_ci			    from_kgid(&init_user_ns, attr->ia_gid));
11398c2ecf20Sopenharmony_ci
11408c2ecf20Sopenharmony_ci	/* ensuring we don't even attempt to truncate a symlink */
11418c2ecf20Sopenharmony_ci	if (S_ISLNK(inode->i_mode))
11428c2ecf20Sopenharmony_ci		attr->ia_valid &= ~ATTR_SIZE;
11438c2ecf20Sopenharmony_ci
11448c2ecf20Sopenharmony_ci#define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
11458c2ecf20Sopenharmony_ci			   | ATTR_GID | ATTR_UID | ATTR_MODE)
11468c2ecf20Sopenharmony_ci	if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
11478c2ecf20Sopenharmony_ci		return 0;
11488c2ecf20Sopenharmony_ci
11498c2ecf20Sopenharmony_ci	status = setattr_prepare(dentry, attr);
11508c2ecf20Sopenharmony_ci	if (status)
11518c2ecf20Sopenharmony_ci		return status;
11528c2ecf20Sopenharmony_ci
11538c2ecf20Sopenharmony_ci	if (is_quota_modification(inode, attr)) {
11548c2ecf20Sopenharmony_ci		status = dquot_initialize(inode);
11558c2ecf20Sopenharmony_ci		if (status)
11568c2ecf20Sopenharmony_ci			return status;
11578c2ecf20Sopenharmony_ci	}
11588c2ecf20Sopenharmony_ci	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
11598c2ecf20Sopenharmony_ci	if (size_change) {
11608c2ecf20Sopenharmony_ci		/*
11618c2ecf20Sopenharmony_ci		 * Here we should wait dio to finish before inode lock
11628c2ecf20Sopenharmony_ci		 * to avoid a deadlock between ocfs2_setattr() and
11638c2ecf20Sopenharmony_ci		 * ocfs2_dio_end_io_write()
11648c2ecf20Sopenharmony_ci		 */
11658c2ecf20Sopenharmony_ci		inode_dio_wait(inode);
11668c2ecf20Sopenharmony_ci
11678c2ecf20Sopenharmony_ci		status = ocfs2_rw_lock(inode, 1);
11688c2ecf20Sopenharmony_ci		if (status < 0) {
11698c2ecf20Sopenharmony_ci			mlog_errno(status);
11708c2ecf20Sopenharmony_ci			goto bail;
11718c2ecf20Sopenharmony_ci		}
11728c2ecf20Sopenharmony_ci	}
11738c2ecf20Sopenharmony_ci
11748c2ecf20Sopenharmony_ci	had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh);
11758c2ecf20Sopenharmony_ci	if (had_lock < 0) {
11768c2ecf20Sopenharmony_ci		status = had_lock;
11778c2ecf20Sopenharmony_ci		goto bail_unlock_rw;
11788c2ecf20Sopenharmony_ci	} else if (had_lock) {
11798c2ecf20Sopenharmony_ci		/*
11808c2ecf20Sopenharmony_ci		 * As far as we know, ocfs2_setattr() could only be the first
11818c2ecf20Sopenharmony_ci		 * VFS entry point in the call chain of recursive cluster
11828c2ecf20Sopenharmony_ci		 * locking issue.
11838c2ecf20Sopenharmony_ci		 *
11848c2ecf20Sopenharmony_ci		 * For instance:
11858c2ecf20Sopenharmony_ci		 * chmod_common()
11868c2ecf20Sopenharmony_ci		 *  notify_change()
11878c2ecf20Sopenharmony_ci		 *   ocfs2_setattr()
11888c2ecf20Sopenharmony_ci		 *    posix_acl_chmod()
11898c2ecf20Sopenharmony_ci		 *     ocfs2_iop_get_acl()
11908c2ecf20Sopenharmony_ci		 *
11918c2ecf20Sopenharmony_ci		 * But, we're not 100% sure if it's always true, because the
11928c2ecf20Sopenharmony_ci		 * ordering of the VFS entry points in the call chain is out
11938c2ecf20Sopenharmony_ci		 * of our control. So, we'd better dump the stack here to
11948c2ecf20Sopenharmony_ci		 * catch the other cases of recursive locking.
11958c2ecf20Sopenharmony_ci		 */
11968c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "Another case of recursive locking:\n");
11978c2ecf20Sopenharmony_ci		dump_stack();
11988c2ecf20Sopenharmony_ci	}
11998c2ecf20Sopenharmony_ci	inode_locked = 1;
12008c2ecf20Sopenharmony_ci
12018c2ecf20Sopenharmony_ci	if (size_change) {
12028c2ecf20Sopenharmony_ci		status = inode_newsize_ok(inode, attr->ia_size);
12038c2ecf20Sopenharmony_ci		if (status)
12048c2ecf20Sopenharmony_ci			goto bail_unlock;
12058c2ecf20Sopenharmony_ci
12068c2ecf20Sopenharmony_ci		if (i_size_read(inode) >= attr->ia_size) {
12078c2ecf20Sopenharmony_ci			if (ocfs2_should_order_data(inode)) {
12088c2ecf20Sopenharmony_ci				status = ocfs2_begin_ordered_truncate(inode,
12098c2ecf20Sopenharmony_ci								      attr->ia_size);
12108c2ecf20Sopenharmony_ci				if (status)
12118c2ecf20Sopenharmony_ci					goto bail_unlock;
12128c2ecf20Sopenharmony_ci			}
12138c2ecf20Sopenharmony_ci			status = ocfs2_truncate_file(inode, bh, attr->ia_size);
12148c2ecf20Sopenharmony_ci		} else
12158c2ecf20Sopenharmony_ci			status = ocfs2_extend_file(inode, bh, attr->ia_size);
12168c2ecf20Sopenharmony_ci		if (status < 0) {
12178c2ecf20Sopenharmony_ci			if (status != -ENOSPC)
12188c2ecf20Sopenharmony_ci				mlog_errno(status);
12198c2ecf20Sopenharmony_ci			status = -ENOSPC;
12208c2ecf20Sopenharmony_ci			goto bail_unlock;
12218c2ecf20Sopenharmony_ci		}
12228c2ecf20Sopenharmony_ci	}
12238c2ecf20Sopenharmony_ci
12248c2ecf20Sopenharmony_ci	if ((attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
12258c2ecf20Sopenharmony_ci	    (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
12268c2ecf20Sopenharmony_ci		/*
12278c2ecf20Sopenharmony_ci		 * Gather pointers to quota structures so that allocation /
12288c2ecf20Sopenharmony_ci		 * freeing of quota structures happens here and not inside
12298c2ecf20Sopenharmony_ci		 * dquot_transfer() where we have problems with lock ordering
12308c2ecf20Sopenharmony_ci		 */
12318c2ecf20Sopenharmony_ci		if (attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)
12328c2ecf20Sopenharmony_ci		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
12338c2ecf20Sopenharmony_ci		    OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
12348c2ecf20Sopenharmony_ci			transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid));
12358c2ecf20Sopenharmony_ci			if (IS_ERR(transfer_to[USRQUOTA])) {
12368c2ecf20Sopenharmony_ci				status = PTR_ERR(transfer_to[USRQUOTA]);
12378c2ecf20Sopenharmony_ci				transfer_to[USRQUOTA] = NULL;
12388c2ecf20Sopenharmony_ci				goto bail_unlock;
12398c2ecf20Sopenharmony_ci			}
12408c2ecf20Sopenharmony_ci		}
12418c2ecf20Sopenharmony_ci		if (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid)
12428c2ecf20Sopenharmony_ci		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
12438c2ecf20Sopenharmony_ci		    OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
12448c2ecf20Sopenharmony_ci			transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid));
12458c2ecf20Sopenharmony_ci			if (IS_ERR(transfer_to[GRPQUOTA])) {
12468c2ecf20Sopenharmony_ci				status = PTR_ERR(transfer_to[GRPQUOTA]);
12478c2ecf20Sopenharmony_ci				transfer_to[GRPQUOTA] = NULL;
12488c2ecf20Sopenharmony_ci				goto bail_unlock;
12498c2ecf20Sopenharmony_ci			}
12508c2ecf20Sopenharmony_ci		}
12518c2ecf20Sopenharmony_ci		down_write(&OCFS2_I(inode)->ip_alloc_sem);
12528c2ecf20Sopenharmony_ci		handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS +
12538c2ecf20Sopenharmony_ci					   2 * ocfs2_quota_trans_credits(sb));
12548c2ecf20Sopenharmony_ci		if (IS_ERR(handle)) {
12558c2ecf20Sopenharmony_ci			status = PTR_ERR(handle);
12568c2ecf20Sopenharmony_ci			mlog_errno(status);
12578c2ecf20Sopenharmony_ci			goto bail_unlock_alloc;
12588c2ecf20Sopenharmony_ci		}
12598c2ecf20Sopenharmony_ci		status = __dquot_transfer(inode, transfer_to);
12608c2ecf20Sopenharmony_ci		if (status < 0)
12618c2ecf20Sopenharmony_ci			goto bail_commit;
12628c2ecf20Sopenharmony_ci	} else {
12638c2ecf20Sopenharmony_ci		down_write(&OCFS2_I(inode)->ip_alloc_sem);
12648c2ecf20Sopenharmony_ci		handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
12658c2ecf20Sopenharmony_ci		if (IS_ERR(handle)) {
12668c2ecf20Sopenharmony_ci			status = PTR_ERR(handle);
12678c2ecf20Sopenharmony_ci			mlog_errno(status);
12688c2ecf20Sopenharmony_ci			goto bail_unlock_alloc;
12698c2ecf20Sopenharmony_ci		}
12708c2ecf20Sopenharmony_ci	}
12718c2ecf20Sopenharmony_ci
12728c2ecf20Sopenharmony_ci	setattr_copy(inode, attr);
12738c2ecf20Sopenharmony_ci	mark_inode_dirty(inode);
12748c2ecf20Sopenharmony_ci
12758c2ecf20Sopenharmony_ci	status = ocfs2_mark_inode_dirty(handle, inode, bh);
12768c2ecf20Sopenharmony_ci	if (status < 0)
12778c2ecf20Sopenharmony_ci		mlog_errno(status);
12788c2ecf20Sopenharmony_ci
12798c2ecf20Sopenharmony_cibail_commit:
12808c2ecf20Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
12818c2ecf20Sopenharmony_cibail_unlock_alloc:
12828c2ecf20Sopenharmony_ci	up_write(&OCFS2_I(inode)->ip_alloc_sem);
12838c2ecf20Sopenharmony_cibail_unlock:
12848c2ecf20Sopenharmony_ci	if (status && inode_locked) {
12858c2ecf20Sopenharmony_ci		ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
12868c2ecf20Sopenharmony_ci		inode_locked = 0;
12878c2ecf20Sopenharmony_ci	}
12888c2ecf20Sopenharmony_cibail_unlock_rw:
12898c2ecf20Sopenharmony_ci	if (size_change)
12908c2ecf20Sopenharmony_ci		ocfs2_rw_unlock(inode, 1);
12918c2ecf20Sopenharmony_cibail:
12928c2ecf20Sopenharmony_ci
12938c2ecf20Sopenharmony_ci	/* Release quota pointers in case we acquired them */
12948c2ecf20Sopenharmony_ci	for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++)
12958c2ecf20Sopenharmony_ci		dqput(transfer_to[qtype]);
12968c2ecf20Sopenharmony_ci
12978c2ecf20Sopenharmony_ci	if (!status && attr->ia_valid & ATTR_MODE) {
12988c2ecf20Sopenharmony_ci		status = ocfs2_acl_chmod(inode, bh);
12998c2ecf20Sopenharmony_ci		if (status < 0)
13008c2ecf20Sopenharmony_ci			mlog_errno(status);
13018c2ecf20Sopenharmony_ci	}
13028c2ecf20Sopenharmony_ci	if (inode_locked)
13038c2ecf20Sopenharmony_ci		ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_ci	brelse(bh);
13068c2ecf20Sopenharmony_ci	return status;
13078c2ecf20Sopenharmony_ci}
13088c2ecf20Sopenharmony_ci
13098c2ecf20Sopenharmony_ciint ocfs2_getattr(const struct path *path, struct kstat *stat,
13108c2ecf20Sopenharmony_ci		  u32 request_mask, unsigned int flags)
13118c2ecf20Sopenharmony_ci{
13128c2ecf20Sopenharmony_ci	struct inode *inode = d_inode(path->dentry);
13138c2ecf20Sopenharmony_ci	struct super_block *sb = path->dentry->d_sb;
13148c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = sb->s_fs_info;
13158c2ecf20Sopenharmony_ci	int err;
13168c2ecf20Sopenharmony_ci
13178c2ecf20Sopenharmony_ci	err = ocfs2_inode_revalidate(path->dentry);
13188c2ecf20Sopenharmony_ci	if (err) {
13198c2ecf20Sopenharmony_ci		if (err != -ENOENT)
13208c2ecf20Sopenharmony_ci			mlog_errno(err);
13218c2ecf20Sopenharmony_ci		goto bail;
13228c2ecf20Sopenharmony_ci	}
13238c2ecf20Sopenharmony_ci
13248c2ecf20Sopenharmony_ci	generic_fillattr(inode, stat);
13258c2ecf20Sopenharmony_ci	/*
13268c2ecf20Sopenharmony_ci	 * If there is inline data in the inode, the inode will normally not
13278c2ecf20Sopenharmony_ci	 * have data blocks allocated (it may have an external xattr block).
13288c2ecf20Sopenharmony_ci	 * Report at least one sector for such files, so tools like tar, rsync,
13298c2ecf20Sopenharmony_ci	 * others don't incorrectly think the file is completely sparse.
13308c2ecf20Sopenharmony_ci	 */
13318c2ecf20Sopenharmony_ci	if (unlikely(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
13328c2ecf20Sopenharmony_ci		stat->blocks += (stat->size + 511)>>9;
13338c2ecf20Sopenharmony_ci
13348c2ecf20Sopenharmony_ci	/* We set the blksize from the cluster size for performance */
13358c2ecf20Sopenharmony_ci	stat->blksize = osb->s_clustersize;
13368c2ecf20Sopenharmony_ci
13378c2ecf20Sopenharmony_cibail:
13388c2ecf20Sopenharmony_ci	return err;
13398c2ecf20Sopenharmony_ci}
13408c2ecf20Sopenharmony_ci
13418c2ecf20Sopenharmony_ciint ocfs2_permission(struct inode *inode, int mask)
13428c2ecf20Sopenharmony_ci{
13438c2ecf20Sopenharmony_ci	int ret, had_lock;
13448c2ecf20Sopenharmony_ci	struct ocfs2_lock_holder oh;
13458c2ecf20Sopenharmony_ci
13468c2ecf20Sopenharmony_ci	if (mask & MAY_NOT_BLOCK)
13478c2ecf20Sopenharmony_ci		return -ECHILD;
13488c2ecf20Sopenharmony_ci
13498c2ecf20Sopenharmony_ci	had_lock = ocfs2_inode_lock_tracker(inode, NULL, 0, &oh);
13508c2ecf20Sopenharmony_ci	if (had_lock < 0) {
13518c2ecf20Sopenharmony_ci		ret = had_lock;
13528c2ecf20Sopenharmony_ci		goto out;
13538c2ecf20Sopenharmony_ci	} else if (had_lock) {
13548c2ecf20Sopenharmony_ci		/* See comments in ocfs2_setattr() for details.
13558c2ecf20Sopenharmony_ci		 * The call chain of this case could be:
13568c2ecf20Sopenharmony_ci		 * do_sys_open()
13578c2ecf20Sopenharmony_ci		 *  may_open()
13588c2ecf20Sopenharmony_ci		 *   inode_permission()
13598c2ecf20Sopenharmony_ci		 *    ocfs2_permission()
13608c2ecf20Sopenharmony_ci		 *     ocfs2_iop_get_acl()
13618c2ecf20Sopenharmony_ci		 */
13628c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "Another case of recursive locking:\n");
13638c2ecf20Sopenharmony_ci		dump_stack();
13648c2ecf20Sopenharmony_ci	}
13658c2ecf20Sopenharmony_ci
13668c2ecf20Sopenharmony_ci	ret = generic_permission(inode, mask);
13678c2ecf20Sopenharmony_ci
13688c2ecf20Sopenharmony_ci	ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
13698c2ecf20Sopenharmony_ciout:
13708c2ecf20Sopenharmony_ci	return ret;
13718c2ecf20Sopenharmony_ci}
13728c2ecf20Sopenharmony_ci
13738c2ecf20Sopenharmony_cistatic int __ocfs2_write_remove_suid(struct inode *inode,
13748c2ecf20Sopenharmony_ci				     struct buffer_head *bh)
13758c2ecf20Sopenharmony_ci{
13768c2ecf20Sopenharmony_ci	int ret;
13778c2ecf20Sopenharmony_ci	handle_t *handle;
13788c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
13798c2ecf20Sopenharmony_ci	struct ocfs2_dinode *di;
13808c2ecf20Sopenharmony_ci
13818c2ecf20Sopenharmony_ci	trace_ocfs2_write_remove_suid(
13828c2ecf20Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
13838c2ecf20Sopenharmony_ci			inode->i_mode);
13848c2ecf20Sopenharmony_ci
13858c2ecf20Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
13868c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
13878c2ecf20Sopenharmony_ci		ret = PTR_ERR(handle);
13888c2ecf20Sopenharmony_ci		mlog_errno(ret);
13898c2ecf20Sopenharmony_ci		goto out;
13908c2ecf20Sopenharmony_ci	}
13918c2ecf20Sopenharmony_ci
13928c2ecf20Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
13938c2ecf20Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
13948c2ecf20Sopenharmony_ci	if (ret < 0) {
13958c2ecf20Sopenharmony_ci		mlog_errno(ret);
13968c2ecf20Sopenharmony_ci		goto out_trans;
13978c2ecf20Sopenharmony_ci	}
13988c2ecf20Sopenharmony_ci
13998c2ecf20Sopenharmony_ci	inode->i_mode &= ~S_ISUID;
14008c2ecf20Sopenharmony_ci	if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP))
14018c2ecf20Sopenharmony_ci		inode->i_mode &= ~S_ISGID;
14028c2ecf20Sopenharmony_ci
14038c2ecf20Sopenharmony_ci	di = (struct ocfs2_dinode *) bh->b_data;
14048c2ecf20Sopenharmony_ci	di->i_mode = cpu_to_le16(inode->i_mode);
14058c2ecf20Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 0);
14068c2ecf20Sopenharmony_ci
14078c2ecf20Sopenharmony_ci	ocfs2_journal_dirty(handle, bh);
14088c2ecf20Sopenharmony_ci
14098c2ecf20Sopenharmony_ciout_trans:
14108c2ecf20Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
14118c2ecf20Sopenharmony_ciout:
14128c2ecf20Sopenharmony_ci	return ret;
14138c2ecf20Sopenharmony_ci}
14148c2ecf20Sopenharmony_ci
14158c2ecf20Sopenharmony_cistatic int ocfs2_write_remove_suid(struct inode *inode)
14168c2ecf20Sopenharmony_ci{
14178c2ecf20Sopenharmony_ci	int ret;
14188c2ecf20Sopenharmony_ci	struct buffer_head *bh = NULL;
14198c2ecf20Sopenharmony_ci
14208c2ecf20Sopenharmony_ci	ret = ocfs2_read_inode_block(inode, &bh);
14218c2ecf20Sopenharmony_ci	if (ret < 0) {
14228c2ecf20Sopenharmony_ci		mlog_errno(ret);
14238c2ecf20Sopenharmony_ci		goto out;
14248c2ecf20Sopenharmony_ci	}
14258c2ecf20Sopenharmony_ci
14268c2ecf20Sopenharmony_ci	ret =  __ocfs2_write_remove_suid(inode, bh);
14278c2ecf20Sopenharmony_ciout:
14288c2ecf20Sopenharmony_ci	brelse(bh);
14298c2ecf20Sopenharmony_ci	return ret;
14308c2ecf20Sopenharmony_ci}
14318c2ecf20Sopenharmony_ci
14328c2ecf20Sopenharmony_ci/*
14338c2ecf20Sopenharmony_ci * Allocate enough extents to cover the region starting at byte offset
14348c2ecf20Sopenharmony_ci * start for len bytes. Existing extents are skipped, any extents
14358c2ecf20Sopenharmony_ci * added are marked as "unwritten".
14368c2ecf20Sopenharmony_ci */
14378c2ecf20Sopenharmony_cistatic int ocfs2_allocate_unwritten_extents(struct inode *inode,
14388c2ecf20Sopenharmony_ci					    u64 start, u64 len)
14398c2ecf20Sopenharmony_ci{
14408c2ecf20Sopenharmony_ci	int ret;
14418c2ecf20Sopenharmony_ci	u32 cpos, phys_cpos, clusters, alloc_size;
14428c2ecf20Sopenharmony_ci	u64 end = start + len;
14438c2ecf20Sopenharmony_ci	struct buffer_head *di_bh = NULL;
14448c2ecf20Sopenharmony_ci
14458c2ecf20Sopenharmony_ci	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
14468c2ecf20Sopenharmony_ci		ret = ocfs2_read_inode_block(inode, &di_bh);
14478c2ecf20Sopenharmony_ci		if (ret) {
14488c2ecf20Sopenharmony_ci			mlog_errno(ret);
14498c2ecf20Sopenharmony_ci			goto out;
14508c2ecf20Sopenharmony_ci		}
14518c2ecf20Sopenharmony_ci
14528c2ecf20Sopenharmony_ci		/*
14538c2ecf20Sopenharmony_ci		 * Nothing to do if the requested reservation range
14548c2ecf20Sopenharmony_ci		 * fits within the inode.
14558c2ecf20Sopenharmony_ci		 */
14568c2ecf20Sopenharmony_ci		if (ocfs2_size_fits_inline_data(di_bh, end))
14578c2ecf20Sopenharmony_ci			goto out;
14588c2ecf20Sopenharmony_ci
14598c2ecf20Sopenharmony_ci		ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
14608c2ecf20Sopenharmony_ci		if (ret) {
14618c2ecf20Sopenharmony_ci			mlog_errno(ret);
14628c2ecf20Sopenharmony_ci			goto out;
14638c2ecf20Sopenharmony_ci		}
14648c2ecf20Sopenharmony_ci	}
14658c2ecf20Sopenharmony_ci
14668c2ecf20Sopenharmony_ci	/*
14678c2ecf20Sopenharmony_ci	 * We consider both start and len to be inclusive.
14688c2ecf20Sopenharmony_ci	 */
14698c2ecf20Sopenharmony_ci	cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
14708c2ecf20Sopenharmony_ci	clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len);
14718c2ecf20Sopenharmony_ci	clusters -= cpos;
14728c2ecf20Sopenharmony_ci
14738c2ecf20Sopenharmony_ci	while (clusters) {
14748c2ecf20Sopenharmony_ci		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
14758c2ecf20Sopenharmony_ci					 &alloc_size, NULL);
14768c2ecf20Sopenharmony_ci		if (ret) {
14778c2ecf20Sopenharmony_ci			mlog_errno(ret);
14788c2ecf20Sopenharmony_ci			goto out;
14798c2ecf20Sopenharmony_ci		}
14808c2ecf20Sopenharmony_ci
14818c2ecf20Sopenharmony_ci		/*
14828c2ecf20Sopenharmony_ci		 * Hole or existing extent len can be arbitrary, so
14838c2ecf20Sopenharmony_ci		 * cap it to our own allocation request.
14848c2ecf20Sopenharmony_ci		 */
14858c2ecf20Sopenharmony_ci		if (alloc_size > clusters)
14868c2ecf20Sopenharmony_ci			alloc_size = clusters;
14878c2ecf20Sopenharmony_ci
14888c2ecf20Sopenharmony_ci		if (phys_cpos) {
14898c2ecf20Sopenharmony_ci			/*
14908c2ecf20Sopenharmony_ci			 * We already have an allocation at this
14918c2ecf20Sopenharmony_ci			 * region so we can safely skip it.
14928c2ecf20Sopenharmony_ci			 */
14938c2ecf20Sopenharmony_ci			goto next;
14948c2ecf20Sopenharmony_ci		}
14958c2ecf20Sopenharmony_ci
14968c2ecf20Sopenharmony_ci		ret = ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
14978c2ecf20Sopenharmony_ci		if (ret) {
14988c2ecf20Sopenharmony_ci			if (ret != -ENOSPC)
14998c2ecf20Sopenharmony_ci				mlog_errno(ret);
15008c2ecf20Sopenharmony_ci			goto out;
15018c2ecf20Sopenharmony_ci		}
15028c2ecf20Sopenharmony_ci
15038c2ecf20Sopenharmony_cinext:
15048c2ecf20Sopenharmony_ci		cpos += alloc_size;
15058c2ecf20Sopenharmony_ci		clusters -= alloc_size;
15068c2ecf20Sopenharmony_ci	}
15078c2ecf20Sopenharmony_ci
15088c2ecf20Sopenharmony_ci	ret = 0;
15098c2ecf20Sopenharmony_ciout:
15108c2ecf20Sopenharmony_ci
15118c2ecf20Sopenharmony_ci	brelse(di_bh);
15128c2ecf20Sopenharmony_ci	return ret;
15138c2ecf20Sopenharmony_ci}
15148c2ecf20Sopenharmony_ci
15158c2ecf20Sopenharmony_ci/*
15168c2ecf20Sopenharmony_ci * Truncate a byte range, avoiding pages within partial clusters. This
15178c2ecf20Sopenharmony_ci * preserves those pages for the zeroing code to write to.
15188c2ecf20Sopenharmony_ci */
15198c2ecf20Sopenharmony_cistatic void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
15208c2ecf20Sopenharmony_ci					 u64 byte_len)
15218c2ecf20Sopenharmony_ci{
15228c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
15238c2ecf20Sopenharmony_ci	loff_t start, end;
15248c2ecf20Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
15258c2ecf20Sopenharmony_ci
15268c2ecf20Sopenharmony_ci	start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start);
15278c2ecf20Sopenharmony_ci	end = byte_start + byte_len;
15288c2ecf20Sopenharmony_ci	end = end & ~(osb->s_clustersize - 1);
15298c2ecf20Sopenharmony_ci
15308c2ecf20Sopenharmony_ci	if (start < end) {
15318c2ecf20Sopenharmony_ci		unmap_mapping_range(mapping, start, end - start, 0);
15328c2ecf20Sopenharmony_ci		truncate_inode_pages_range(mapping, start, end - 1);
15338c2ecf20Sopenharmony_ci	}
15348c2ecf20Sopenharmony_ci}
15358c2ecf20Sopenharmony_ci
15368c2ecf20Sopenharmony_ci/*
15378c2ecf20Sopenharmony_ci * zero out partial blocks of one cluster.
15388c2ecf20Sopenharmony_ci *
15398c2ecf20Sopenharmony_ci * start: file offset where zero starts, will be made upper block aligned.
15408c2ecf20Sopenharmony_ci * len: it will be trimmed to the end of current cluster if "start + len"
15418c2ecf20Sopenharmony_ci *      is bigger than it.
15428c2ecf20Sopenharmony_ci */
15438c2ecf20Sopenharmony_cistatic int ocfs2_zeroout_partial_cluster(struct inode *inode,
15448c2ecf20Sopenharmony_ci					u64 start, u64 len)
15458c2ecf20Sopenharmony_ci{
15468c2ecf20Sopenharmony_ci	int ret;
15478c2ecf20Sopenharmony_ci	u64 start_block, end_block, nr_blocks;
15488c2ecf20Sopenharmony_ci	u64 p_block, offset;
15498c2ecf20Sopenharmony_ci	u32 cluster, p_cluster, nr_clusters;
15508c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
15518c2ecf20Sopenharmony_ci	u64 end = ocfs2_align_bytes_to_clusters(sb, start);
15528c2ecf20Sopenharmony_ci
15538c2ecf20Sopenharmony_ci	if (start + len < end)
15548c2ecf20Sopenharmony_ci		end = start + len;
15558c2ecf20Sopenharmony_ci
15568c2ecf20Sopenharmony_ci	start_block = ocfs2_blocks_for_bytes(sb, start);
15578c2ecf20Sopenharmony_ci	end_block = ocfs2_blocks_for_bytes(sb, end);
15588c2ecf20Sopenharmony_ci	nr_blocks = end_block - start_block;
15598c2ecf20Sopenharmony_ci	if (!nr_blocks)
15608c2ecf20Sopenharmony_ci		return 0;
15618c2ecf20Sopenharmony_ci
15628c2ecf20Sopenharmony_ci	cluster = ocfs2_bytes_to_clusters(sb, start);
15638c2ecf20Sopenharmony_ci	ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
15648c2ecf20Sopenharmony_ci				&nr_clusters, NULL);
15658c2ecf20Sopenharmony_ci	if (ret)
15668c2ecf20Sopenharmony_ci		return ret;
15678c2ecf20Sopenharmony_ci	if (!p_cluster)
15688c2ecf20Sopenharmony_ci		return 0;
15698c2ecf20Sopenharmony_ci
15708c2ecf20Sopenharmony_ci	offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
15718c2ecf20Sopenharmony_ci	p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
15728c2ecf20Sopenharmony_ci	return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
15738c2ecf20Sopenharmony_ci}
15748c2ecf20Sopenharmony_ci
15758c2ecf20Sopenharmony_cistatic int ocfs2_zero_partial_clusters(struct inode *inode,
15768c2ecf20Sopenharmony_ci				       u64 start, u64 len)
15778c2ecf20Sopenharmony_ci{
15788c2ecf20Sopenharmony_ci	int ret = 0;
15798c2ecf20Sopenharmony_ci	u64 tmpend = 0;
15808c2ecf20Sopenharmony_ci	u64 end = start + len;
15818c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
15828c2ecf20Sopenharmony_ci	unsigned int csize = osb->s_clustersize;
15838c2ecf20Sopenharmony_ci	handle_t *handle;
15848c2ecf20Sopenharmony_ci	loff_t isize = i_size_read(inode);
15858c2ecf20Sopenharmony_ci
15868c2ecf20Sopenharmony_ci	/*
15878c2ecf20Sopenharmony_ci	 * The "start" and "end" values are NOT necessarily part of
15888c2ecf20Sopenharmony_ci	 * the range whose allocation is being deleted. Rather, this
15898c2ecf20Sopenharmony_ci	 * is what the user passed in with the request. We must zero
15908c2ecf20Sopenharmony_ci	 * partial clusters here. There's no need to worry about
15918c2ecf20Sopenharmony_ci	 * physical allocation - the zeroing code knows to skip holes.
15928c2ecf20Sopenharmony_ci	 */
15938c2ecf20Sopenharmony_ci	trace_ocfs2_zero_partial_clusters(
15948c2ecf20Sopenharmony_ci		(unsigned long long)OCFS2_I(inode)->ip_blkno,
15958c2ecf20Sopenharmony_ci		(unsigned long long)start, (unsigned long long)end);
15968c2ecf20Sopenharmony_ci
15978c2ecf20Sopenharmony_ci	/*
15988c2ecf20Sopenharmony_ci	 * If both edges are on a cluster boundary then there's no
15998c2ecf20Sopenharmony_ci	 * zeroing required as the region is part of the allocation to
16008c2ecf20Sopenharmony_ci	 * be truncated.
16018c2ecf20Sopenharmony_ci	 */
16028c2ecf20Sopenharmony_ci	if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
16038c2ecf20Sopenharmony_ci		goto out;
16048c2ecf20Sopenharmony_ci
16058c2ecf20Sopenharmony_ci	/* No page cache for EOF blocks, issue zero out to disk. */
16068c2ecf20Sopenharmony_ci	if (end > isize) {
16078c2ecf20Sopenharmony_ci		/*
16088c2ecf20Sopenharmony_ci		 * zeroout eof blocks in last cluster starting from
16098c2ecf20Sopenharmony_ci		 * "isize" even "start" > "isize" because it is
16108c2ecf20Sopenharmony_ci		 * complicated to zeroout just at "start" as "start"
16118c2ecf20Sopenharmony_ci		 * may be not aligned with block size, buffer write
16128c2ecf20Sopenharmony_ci		 * would be required to do that, but out of eof buffer
16138c2ecf20Sopenharmony_ci		 * write is not supported.
16148c2ecf20Sopenharmony_ci		 */
16158c2ecf20Sopenharmony_ci		ret = ocfs2_zeroout_partial_cluster(inode, isize,
16168c2ecf20Sopenharmony_ci					end - isize);
16178c2ecf20Sopenharmony_ci		if (ret) {
16188c2ecf20Sopenharmony_ci			mlog_errno(ret);
16198c2ecf20Sopenharmony_ci			goto out;
16208c2ecf20Sopenharmony_ci		}
16218c2ecf20Sopenharmony_ci		if (start >= isize)
16228c2ecf20Sopenharmony_ci			goto out;
16238c2ecf20Sopenharmony_ci		end = isize;
16248c2ecf20Sopenharmony_ci	}
16258c2ecf20Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
16268c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
16278c2ecf20Sopenharmony_ci		ret = PTR_ERR(handle);
16288c2ecf20Sopenharmony_ci		mlog_errno(ret);
16298c2ecf20Sopenharmony_ci		goto out;
16308c2ecf20Sopenharmony_ci	}
16318c2ecf20Sopenharmony_ci
16328c2ecf20Sopenharmony_ci	/*
16338c2ecf20Sopenharmony_ci	 * If start is on a cluster boundary and end is somewhere in another
16348c2ecf20Sopenharmony_ci	 * cluster, we have not COWed the cluster starting at start, unless
16358c2ecf20Sopenharmony_ci	 * end is also within the same cluster. So, in this case, we skip this
16368c2ecf20Sopenharmony_ci	 * first call to ocfs2_zero_range_for_truncate() truncate and move on
16378c2ecf20Sopenharmony_ci	 * to the next one.
16388c2ecf20Sopenharmony_ci	 */
16398c2ecf20Sopenharmony_ci	if ((start & (csize - 1)) != 0) {
16408c2ecf20Sopenharmony_ci		/*
16418c2ecf20Sopenharmony_ci		 * We want to get the byte offset of the end of the 1st
16428c2ecf20Sopenharmony_ci		 * cluster.
16438c2ecf20Sopenharmony_ci		 */
16448c2ecf20Sopenharmony_ci		tmpend = (u64)osb->s_clustersize +
16458c2ecf20Sopenharmony_ci			(start & ~(osb->s_clustersize - 1));
16468c2ecf20Sopenharmony_ci		if (tmpend > end)
16478c2ecf20Sopenharmony_ci			tmpend = end;
16488c2ecf20Sopenharmony_ci
16498c2ecf20Sopenharmony_ci		trace_ocfs2_zero_partial_clusters_range1(
16508c2ecf20Sopenharmony_ci			(unsigned long long)start,
16518c2ecf20Sopenharmony_ci			(unsigned long long)tmpend);
16528c2ecf20Sopenharmony_ci
16538c2ecf20Sopenharmony_ci		ret = ocfs2_zero_range_for_truncate(inode, handle, start,
16548c2ecf20Sopenharmony_ci						    tmpend);
16558c2ecf20Sopenharmony_ci		if (ret)
16568c2ecf20Sopenharmony_ci			mlog_errno(ret);
16578c2ecf20Sopenharmony_ci	}
16588c2ecf20Sopenharmony_ci
16598c2ecf20Sopenharmony_ci	if (tmpend < end) {
16608c2ecf20Sopenharmony_ci		/*
16618c2ecf20Sopenharmony_ci		 * This may make start and end equal, but the zeroing
16628c2ecf20Sopenharmony_ci		 * code will skip any work in that case so there's no
16638c2ecf20Sopenharmony_ci		 * need to catch it up here.
16648c2ecf20Sopenharmony_ci		 */
16658c2ecf20Sopenharmony_ci		start = end & ~(osb->s_clustersize - 1);
16668c2ecf20Sopenharmony_ci
16678c2ecf20Sopenharmony_ci		trace_ocfs2_zero_partial_clusters_range2(
16688c2ecf20Sopenharmony_ci			(unsigned long long)start, (unsigned long long)end);
16698c2ecf20Sopenharmony_ci
16708c2ecf20Sopenharmony_ci		ret = ocfs2_zero_range_for_truncate(inode, handle, start, end);
16718c2ecf20Sopenharmony_ci		if (ret)
16728c2ecf20Sopenharmony_ci			mlog_errno(ret);
16738c2ecf20Sopenharmony_ci	}
16748c2ecf20Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 1);
16758c2ecf20Sopenharmony_ci
16768c2ecf20Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
16778c2ecf20Sopenharmony_ciout:
16788c2ecf20Sopenharmony_ci	return ret;
16798c2ecf20Sopenharmony_ci}
16808c2ecf20Sopenharmony_ci
16818c2ecf20Sopenharmony_cistatic int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
16828c2ecf20Sopenharmony_ci{
16838c2ecf20Sopenharmony_ci	int i;
16848c2ecf20Sopenharmony_ci	struct ocfs2_extent_rec *rec = NULL;
16858c2ecf20Sopenharmony_ci
16868c2ecf20Sopenharmony_ci	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
16878c2ecf20Sopenharmony_ci
16888c2ecf20Sopenharmony_ci		rec = &el->l_recs[i];
16898c2ecf20Sopenharmony_ci
16908c2ecf20Sopenharmony_ci		if (le32_to_cpu(rec->e_cpos) < pos)
16918c2ecf20Sopenharmony_ci			break;
16928c2ecf20Sopenharmony_ci	}
16938c2ecf20Sopenharmony_ci
16948c2ecf20Sopenharmony_ci	return i;
16958c2ecf20Sopenharmony_ci}
16968c2ecf20Sopenharmony_ci
16978c2ecf20Sopenharmony_ci/*
16988c2ecf20Sopenharmony_ci * Helper to calculate the punching pos and length in one run, we handle the
16998c2ecf20Sopenharmony_ci * following three cases in order:
17008c2ecf20Sopenharmony_ci *
17018c2ecf20Sopenharmony_ci * - remove the entire record
17028c2ecf20Sopenharmony_ci * - remove a partial record
17038c2ecf20Sopenharmony_ci * - no record needs to be removed (hole-punching completed)
17048c2ecf20Sopenharmony_ci*/
17058c2ecf20Sopenharmony_cistatic void ocfs2_calc_trunc_pos(struct inode *inode,
17068c2ecf20Sopenharmony_ci				 struct ocfs2_extent_list *el,
17078c2ecf20Sopenharmony_ci				 struct ocfs2_extent_rec *rec,
17088c2ecf20Sopenharmony_ci				 u32 trunc_start, u32 *trunc_cpos,
17098c2ecf20Sopenharmony_ci				 u32 *trunc_len, u32 *trunc_end,
17108c2ecf20Sopenharmony_ci				 u64 *blkno, int *done)
17118c2ecf20Sopenharmony_ci{
17128c2ecf20Sopenharmony_ci	int ret = 0;
17138c2ecf20Sopenharmony_ci	u32 coff, range;
17148c2ecf20Sopenharmony_ci
17158c2ecf20Sopenharmony_ci	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
17168c2ecf20Sopenharmony_ci
17178c2ecf20Sopenharmony_ci	if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
17188c2ecf20Sopenharmony_ci		/*
17198c2ecf20Sopenharmony_ci		 * remove an entire extent record.
17208c2ecf20Sopenharmony_ci		 */
17218c2ecf20Sopenharmony_ci		*trunc_cpos = le32_to_cpu(rec->e_cpos);
17228c2ecf20Sopenharmony_ci		/*
17238c2ecf20Sopenharmony_ci		 * Skip holes if any.
17248c2ecf20Sopenharmony_ci		 */
17258c2ecf20Sopenharmony_ci		if (range < *trunc_end)
17268c2ecf20Sopenharmony_ci			*trunc_end = range;
17278c2ecf20Sopenharmony_ci		*trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos);
17288c2ecf20Sopenharmony_ci		*blkno = le64_to_cpu(rec->e_blkno);
17298c2ecf20Sopenharmony_ci		*trunc_end = le32_to_cpu(rec->e_cpos);
17308c2ecf20Sopenharmony_ci	} else if (range > trunc_start) {
17318c2ecf20Sopenharmony_ci		/*
17328c2ecf20Sopenharmony_ci		 * remove a partial extent record, which means we're
17338c2ecf20Sopenharmony_ci		 * removing the last extent record.
17348c2ecf20Sopenharmony_ci		 */
17358c2ecf20Sopenharmony_ci		*trunc_cpos = trunc_start;
17368c2ecf20Sopenharmony_ci		/*
17378c2ecf20Sopenharmony_ci		 * skip hole if any.
17388c2ecf20Sopenharmony_ci		 */
17398c2ecf20Sopenharmony_ci		if (range < *trunc_end)
17408c2ecf20Sopenharmony_ci			*trunc_end = range;
17418c2ecf20Sopenharmony_ci		*trunc_len = *trunc_end - trunc_start;
17428c2ecf20Sopenharmony_ci		coff = trunc_start - le32_to_cpu(rec->e_cpos);
17438c2ecf20Sopenharmony_ci		*blkno = le64_to_cpu(rec->e_blkno) +
17448c2ecf20Sopenharmony_ci				ocfs2_clusters_to_blocks(inode->i_sb, coff);
17458c2ecf20Sopenharmony_ci		*trunc_end = trunc_start;
17468c2ecf20Sopenharmony_ci	} else {
17478c2ecf20Sopenharmony_ci		/*
17488c2ecf20Sopenharmony_ci		 * It may have two following possibilities:
17498c2ecf20Sopenharmony_ci		 *
17508c2ecf20Sopenharmony_ci		 * - last record has been removed
17518c2ecf20Sopenharmony_ci		 * - trunc_start was within a hole
17528c2ecf20Sopenharmony_ci		 *
17538c2ecf20Sopenharmony_ci		 * both two cases mean the completion of hole punching.
17548c2ecf20Sopenharmony_ci		 */
17558c2ecf20Sopenharmony_ci		ret = 1;
17568c2ecf20Sopenharmony_ci	}
17578c2ecf20Sopenharmony_ci
17588c2ecf20Sopenharmony_ci	*done = ret;
17598c2ecf20Sopenharmony_ci}
17608c2ecf20Sopenharmony_ci
17618c2ecf20Sopenharmony_ciint ocfs2_remove_inode_range(struct inode *inode,
17628c2ecf20Sopenharmony_ci			     struct buffer_head *di_bh, u64 byte_start,
17638c2ecf20Sopenharmony_ci			     u64 byte_len)
17648c2ecf20Sopenharmony_ci{
17658c2ecf20Sopenharmony_ci	int ret = 0, flags = 0, done = 0, i;
17668c2ecf20Sopenharmony_ci	u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
17678c2ecf20Sopenharmony_ci	u32 cluster_in_el;
17688c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
17698c2ecf20Sopenharmony_ci	struct ocfs2_cached_dealloc_ctxt dealloc;
17708c2ecf20Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
17718c2ecf20Sopenharmony_ci	struct ocfs2_extent_tree et;
17728c2ecf20Sopenharmony_ci	struct ocfs2_path *path = NULL;
17738c2ecf20Sopenharmony_ci	struct ocfs2_extent_list *el = NULL;
17748c2ecf20Sopenharmony_ci	struct ocfs2_extent_rec *rec = NULL;
17758c2ecf20Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
17768c2ecf20Sopenharmony_ci	u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
17778c2ecf20Sopenharmony_ci
17788c2ecf20Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
17798c2ecf20Sopenharmony_ci	ocfs2_init_dealloc_ctxt(&dealloc);
17808c2ecf20Sopenharmony_ci
17818c2ecf20Sopenharmony_ci	trace_ocfs2_remove_inode_range(
17828c2ecf20Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
17838c2ecf20Sopenharmony_ci			(unsigned long long)byte_start,
17848c2ecf20Sopenharmony_ci			(unsigned long long)byte_len);
17858c2ecf20Sopenharmony_ci
17868c2ecf20Sopenharmony_ci	if (byte_len == 0)
17878c2ecf20Sopenharmony_ci		return 0;
17888c2ecf20Sopenharmony_ci
17898c2ecf20Sopenharmony_ci	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
17908c2ecf20Sopenharmony_ci		ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
17918c2ecf20Sopenharmony_ci					    byte_start + byte_len, 0);
17928c2ecf20Sopenharmony_ci		if (ret) {
17938c2ecf20Sopenharmony_ci			mlog_errno(ret);
17948c2ecf20Sopenharmony_ci			goto out;
17958c2ecf20Sopenharmony_ci		}
17968c2ecf20Sopenharmony_ci		/*
17978c2ecf20Sopenharmony_ci		 * There's no need to get fancy with the page cache
17988c2ecf20Sopenharmony_ci		 * truncate of an inline-data inode. We're talking
17998c2ecf20Sopenharmony_ci		 * about less than a page here, which will be cached
18008c2ecf20Sopenharmony_ci		 * in the dinode buffer anyway.
18018c2ecf20Sopenharmony_ci		 */
18028c2ecf20Sopenharmony_ci		unmap_mapping_range(mapping, 0, 0, 0);
18038c2ecf20Sopenharmony_ci		truncate_inode_pages(mapping, 0);
18048c2ecf20Sopenharmony_ci		goto out;
18058c2ecf20Sopenharmony_ci	}
18068c2ecf20Sopenharmony_ci
18078c2ecf20Sopenharmony_ci	/*
18088c2ecf20Sopenharmony_ci	 * For reflinks, we may need to CoW 2 clusters which might be
18098c2ecf20Sopenharmony_ci	 * partially zero'd later, if hole's start and end offset were
18108c2ecf20Sopenharmony_ci	 * within one cluster(means is not exactly aligned to clustersize).
18118c2ecf20Sopenharmony_ci	 */
18128c2ecf20Sopenharmony_ci
18138c2ecf20Sopenharmony_ci	if (ocfs2_is_refcount_inode(inode)) {
18148c2ecf20Sopenharmony_ci		ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
18158c2ecf20Sopenharmony_ci		if (ret) {
18168c2ecf20Sopenharmony_ci			mlog_errno(ret);
18178c2ecf20Sopenharmony_ci			goto out;
18188c2ecf20Sopenharmony_ci		}
18198c2ecf20Sopenharmony_ci
18208c2ecf20Sopenharmony_ci		ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len);
18218c2ecf20Sopenharmony_ci		if (ret) {
18228c2ecf20Sopenharmony_ci			mlog_errno(ret);
18238c2ecf20Sopenharmony_ci			goto out;
18248c2ecf20Sopenharmony_ci		}
18258c2ecf20Sopenharmony_ci	}
18268c2ecf20Sopenharmony_ci
18278c2ecf20Sopenharmony_ci	trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
18288c2ecf20Sopenharmony_ci	trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
18298c2ecf20Sopenharmony_ci	cluster_in_el = trunc_end;
18308c2ecf20Sopenharmony_ci
18318c2ecf20Sopenharmony_ci	ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
18328c2ecf20Sopenharmony_ci	if (ret) {
18338c2ecf20Sopenharmony_ci		mlog_errno(ret);
18348c2ecf20Sopenharmony_ci		goto out;
18358c2ecf20Sopenharmony_ci	}
18368c2ecf20Sopenharmony_ci
18378c2ecf20Sopenharmony_ci	path = ocfs2_new_path_from_et(&et);
18388c2ecf20Sopenharmony_ci	if (!path) {
18398c2ecf20Sopenharmony_ci		ret = -ENOMEM;
18408c2ecf20Sopenharmony_ci		mlog_errno(ret);
18418c2ecf20Sopenharmony_ci		goto out;
18428c2ecf20Sopenharmony_ci	}
18438c2ecf20Sopenharmony_ci
18448c2ecf20Sopenharmony_ci	while (trunc_end > trunc_start) {
18458c2ecf20Sopenharmony_ci
18468c2ecf20Sopenharmony_ci		ret = ocfs2_find_path(INODE_CACHE(inode), path,
18478c2ecf20Sopenharmony_ci				      cluster_in_el);
18488c2ecf20Sopenharmony_ci		if (ret) {
18498c2ecf20Sopenharmony_ci			mlog_errno(ret);
18508c2ecf20Sopenharmony_ci			goto out;
18518c2ecf20Sopenharmony_ci		}
18528c2ecf20Sopenharmony_ci
18538c2ecf20Sopenharmony_ci		el = path_leaf_el(path);
18548c2ecf20Sopenharmony_ci
18558c2ecf20Sopenharmony_ci		i = ocfs2_find_rec(el, trunc_end);
18568c2ecf20Sopenharmony_ci		/*
18578c2ecf20Sopenharmony_ci		 * Need to go to previous extent block.
18588c2ecf20Sopenharmony_ci		 */
18598c2ecf20Sopenharmony_ci		if (i < 0) {
18608c2ecf20Sopenharmony_ci			if (path->p_tree_depth == 0)
18618c2ecf20Sopenharmony_ci				break;
18628c2ecf20Sopenharmony_ci
18638c2ecf20Sopenharmony_ci			ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
18648c2ecf20Sopenharmony_ci							    path,
18658c2ecf20Sopenharmony_ci							    &cluster_in_el);
18668c2ecf20Sopenharmony_ci			if (ret) {
18678c2ecf20Sopenharmony_ci				mlog_errno(ret);
18688c2ecf20Sopenharmony_ci				goto out;
18698c2ecf20Sopenharmony_ci			}
18708c2ecf20Sopenharmony_ci
18718c2ecf20Sopenharmony_ci			/*
18728c2ecf20Sopenharmony_ci			 * We've reached the leftmost extent block,
18738c2ecf20Sopenharmony_ci			 * it's safe to leave.
18748c2ecf20Sopenharmony_ci			 */
18758c2ecf20Sopenharmony_ci			if (cluster_in_el == 0)
18768c2ecf20Sopenharmony_ci				break;
18778c2ecf20Sopenharmony_ci
18788c2ecf20Sopenharmony_ci			/*
18798c2ecf20Sopenharmony_ci			 * The 'pos' searched for previous extent block is
18808c2ecf20Sopenharmony_ci			 * always one cluster less than actual trunc_end.
18818c2ecf20Sopenharmony_ci			 */
18828c2ecf20Sopenharmony_ci			trunc_end = cluster_in_el + 1;
18838c2ecf20Sopenharmony_ci
18848c2ecf20Sopenharmony_ci			ocfs2_reinit_path(path, 1);
18858c2ecf20Sopenharmony_ci
18868c2ecf20Sopenharmony_ci			continue;
18878c2ecf20Sopenharmony_ci
18888c2ecf20Sopenharmony_ci		} else
18898c2ecf20Sopenharmony_ci			rec = &el->l_recs[i];
18908c2ecf20Sopenharmony_ci
18918c2ecf20Sopenharmony_ci		ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
18928c2ecf20Sopenharmony_ci				     &trunc_len, &trunc_end, &blkno, &done);
18938c2ecf20Sopenharmony_ci		if (done)
18948c2ecf20Sopenharmony_ci			break;
18958c2ecf20Sopenharmony_ci
18968c2ecf20Sopenharmony_ci		flags = rec->e_flags;
18978c2ecf20Sopenharmony_ci		phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
18988c2ecf20Sopenharmony_ci
18998c2ecf20Sopenharmony_ci		ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
19008c2ecf20Sopenharmony_ci					       phys_cpos, trunc_len, flags,
19018c2ecf20Sopenharmony_ci					       &dealloc, refcount_loc, false);
19028c2ecf20Sopenharmony_ci		if (ret < 0) {
19038c2ecf20Sopenharmony_ci			mlog_errno(ret);
19048c2ecf20Sopenharmony_ci			goto out;
19058c2ecf20Sopenharmony_ci		}
19068c2ecf20Sopenharmony_ci
19078c2ecf20Sopenharmony_ci		cluster_in_el = trunc_end;
19088c2ecf20Sopenharmony_ci
19098c2ecf20Sopenharmony_ci		ocfs2_reinit_path(path, 1);
19108c2ecf20Sopenharmony_ci	}
19118c2ecf20Sopenharmony_ci
19128c2ecf20Sopenharmony_ci	ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
19138c2ecf20Sopenharmony_ci
19148c2ecf20Sopenharmony_ciout:
19158c2ecf20Sopenharmony_ci	ocfs2_free_path(path);
19168c2ecf20Sopenharmony_ci	ocfs2_schedule_truncate_log_flush(osb, 1);
19178c2ecf20Sopenharmony_ci	ocfs2_run_deallocs(osb, &dealloc);
19188c2ecf20Sopenharmony_ci
19198c2ecf20Sopenharmony_ci	return ret;
19208c2ecf20Sopenharmony_ci}
19218c2ecf20Sopenharmony_ci
19228c2ecf20Sopenharmony_ci/*
19238c2ecf20Sopenharmony_ci * Parts of this function taken from xfs_change_file_space()
19248c2ecf20Sopenharmony_ci */
19258c2ecf20Sopenharmony_cistatic int __ocfs2_change_file_space(struct file *file, struct inode *inode,
19268c2ecf20Sopenharmony_ci				     loff_t f_pos, unsigned int cmd,
19278c2ecf20Sopenharmony_ci				     struct ocfs2_space_resv *sr,
19288c2ecf20Sopenharmony_ci				     int change_size)
19298c2ecf20Sopenharmony_ci{
19308c2ecf20Sopenharmony_ci	int ret;
19318c2ecf20Sopenharmony_ci	s64 llen;
19328c2ecf20Sopenharmony_ci	loff_t size, orig_isize;
19338c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
19348c2ecf20Sopenharmony_ci	struct buffer_head *di_bh = NULL;
19358c2ecf20Sopenharmony_ci	handle_t *handle;
19368c2ecf20Sopenharmony_ci	unsigned long long max_off = inode->i_sb->s_maxbytes;
19378c2ecf20Sopenharmony_ci
19388c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
19398c2ecf20Sopenharmony_ci		return -EROFS;
19408c2ecf20Sopenharmony_ci
19418c2ecf20Sopenharmony_ci	inode_lock(inode);
19428c2ecf20Sopenharmony_ci
19438c2ecf20Sopenharmony_ci	/*
19448c2ecf20Sopenharmony_ci	 * This prevents concurrent writes on other nodes
19458c2ecf20Sopenharmony_ci	 */
19468c2ecf20Sopenharmony_ci	ret = ocfs2_rw_lock(inode, 1);
19478c2ecf20Sopenharmony_ci	if (ret) {
19488c2ecf20Sopenharmony_ci		mlog_errno(ret);
19498c2ecf20Sopenharmony_ci		goto out;
19508c2ecf20Sopenharmony_ci	}
19518c2ecf20Sopenharmony_ci
19528c2ecf20Sopenharmony_ci	ret = ocfs2_inode_lock(inode, &di_bh, 1);
19538c2ecf20Sopenharmony_ci	if (ret) {
19548c2ecf20Sopenharmony_ci		mlog_errno(ret);
19558c2ecf20Sopenharmony_ci		goto out_rw_unlock;
19568c2ecf20Sopenharmony_ci	}
19578c2ecf20Sopenharmony_ci
19588c2ecf20Sopenharmony_ci	if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
19598c2ecf20Sopenharmony_ci		ret = -EPERM;
19608c2ecf20Sopenharmony_ci		goto out_inode_unlock;
19618c2ecf20Sopenharmony_ci	}
19628c2ecf20Sopenharmony_ci
19638c2ecf20Sopenharmony_ci	switch (sr->l_whence) {
19648c2ecf20Sopenharmony_ci	case 0: /*SEEK_SET*/
19658c2ecf20Sopenharmony_ci		break;
19668c2ecf20Sopenharmony_ci	case 1: /*SEEK_CUR*/
19678c2ecf20Sopenharmony_ci		sr->l_start += f_pos;
19688c2ecf20Sopenharmony_ci		break;
19698c2ecf20Sopenharmony_ci	case 2: /*SEEK_END*/
19708c2ecf20Sopenharmony_ci		sr->l_start += i_size_read(inode);
19718c2ecf20Sopenharmony_ci		break;
19728c2ecf20Sopenharmony_ci	default:
19738c2ecf20Sopenharmony_ci		ret = -EINVAL;
19748c2ecf20Sopenharmony_ci		goto out_inode_unlock;
19758c2ecf20Sopenharmony_ci	}
19768c2ecf20Sopenharmony_ci	sr->l_whence = 0;
19778c2ecf20Sopenharmony_ci
19788c2ecf20Sopenharmony_ci	llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len;
19798c2ecf20Sopenharmony_ci
19808c2ecf20Sopenharmony_ci	if (sr->l_start < 0
19818c2ecf20Sopenharmony_ci	    || sr->l_start > max_off
19828c2ecf20Sopenharmony_ci	    || (sr->l_start + llen) < 0
19838c2ecf20Sopenharmony_ci	    || (sr->l_start + llen) > max_off) {
19848c2ecf20Sopenharmony_ci		ret = -EINVAL;
19858c2ecf20Sopenharmony_ci		goto out_inode_unlock;
19868c2ecf20Sopenharmony_ci	}
19878c2ecf20Sopenharmony_ci	size = sr->l_start + sr->l_len;
19888c2ecf20Sopenharmony_ci
19898c2ecf20Sopenharmony_ci	if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64 ||
19908c2ecf20Sopenharmony_ci	    cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) {
19918c2ecf20Sopenharmony_ci		if (sr->l_len <= 0) {
19928c2ecf20Sopenharmony_ci			ret = -EINVAL;
19938c2ecf20Sopenharmony_ci			goto out_inode_unlock;
19948c2ecf20Sopenharmony_ci		}
19958c2ecf20Sopenharmony_ci	}
19968c2ecf20Sopenharmony_ci
19978c2ecf20Sopenharmony_ci	if (file && setattr_should_drop_suidgid(file_inode(file))) {
19988c2ecf20Sopenharmony_ci		ret = __ocfs2_write_remove_suid(inode, di_bh);
19998c2ecf20Sopenharmony_ci		if (ret) {
20008c2ecf20Sopenharmony_ci			mlog_errno(ret);
20018c2ecf20Sopenharmony_ci			goto out_inode_unlock;
20028c2ecf20Sopenharmony_ci		}
20038c2ecf20Sopenharmony_ci	}
20048c2ecf20Sopenharmony_ci
20058c2ecf20Sopenharmony_ci	down_write(&OCFS2_I(inode)->ip_alloc_sem);
20068c2ecf20Sopenharmony_ci	switch (cmd) {
20078c2ecf20Sopenharmony_ci	case OCFS2_IOC_RESVSP:
20088c2ecf20Sopenharmony_ci	case OCFS2_IOC_RESVSP64:
20098c2ecf20Sopenharmony_ci		/*
20108c2ecf20Sopenharmony_ci		 * This takes unsigned offsets, but the signed ones we
20118c2ecf20Sopenharmony_ci		 * pass have been checked against overflow above.
20128c2ecf20Sopenharmony_ci		 */
20138c2ecf20Sopenharmony_ci		ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start,
20148c2ecf20Sopenharmony_ci						       sr->l_len);
20158c2ecf20Sopenharmony_ci		break;
20168c2ecf20Sopenharmony_ci	case OCFS2_IOC_UNRESVSP:
20178c2ecf20Sopenharmony_ci	case OCFS2_IOC_UNRESVSP64:
20188c2ecf20Sopenharmony_ci		ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start,
20198c2ecf20Sopenharmony_ci					       sr->l_len);
20208c2ecf20Sopenharmony_ci		break;
20218c2ecf20Sopenharmony_ci	default:
20228c2ecf20Sopenharmony_ci		ret = -EINVAL;
20238c2ecf20Sopenharmony_ci	}
20248c2ecf20Sopenharmony_ci
20258c2ecf20Sopenharmony_ci	orig_isize = i_size_read(inode);
20268c2ecf20Sopenharmony_ci	/* zeroout eof blocks in the cluster. */
20278c2ecf20Sopenharmony_ci	if (!ret && change_size && orig_isize < size) {
20288c2ecf20Sopenharmony_ci		ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
20298c2ecf20Sopenharmony_ci					size - orig_isize);
20308c2ecf20Sopenharmony_ci		if (!ret)
20318c2ecf20Sopenharmony_ci			i_size_write(inode, size);
20328c2ecf20Sopenharmony_ci	}
20338c2ecf20Sopenharmony_ci	up_write(&OCFS2_I(inode)->ip_alloc_sem);
20348c2ecf20Sopenharmony_ci	if (ret) {
20358c2ecf20Sopenharmony_ci		mlog_errno(ret);
20368c2ecf20Sopenharmony_ci		goto out_inode_unlock;
20378c2ecf20Sopenharmony_ci	}
20388c2ecf20Sopenharmony_ci
20398c2ecf20Sopenharmony_ci	/*
20408c2ecf20Sopenharmony_ci	 * We update c/mtime for these changes
20418c2ecf20Sopenharmony_ci	 */
20428c2ecf20Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
20438c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
20448c2ecf20Sopenharmony_ci		ret = PTR_ERR(handle);
20458c2ecf20Sopenharmony_ci		mlog_errno(ret);
20468c2ecf20Sopenharmony_ci		goto out_inode_unlock;
20478c2ecf20Sopenharmony_ci	}
20488c2ecf20Sopenharmony_ci
20498c2ecf20Sopenharmony_ci	inode->i_ctime = inode->i_mtime = current_time(inode);
20508c2ecf20Sopenharmony_ci	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
20518c2ecf20Sopenharmony_ci	if (ret < 0)
20528c2ecf20Sopenharmony_ci		mlog_errno(ret);
20538c2ecf20Sopenharmony_ci
20548c2ecf20Sopenharmony_ci	if (file && (file->f_flags & O_SYNC))
20558c2ecf20Sopenharmony_ci		handle->h_sync = 1;
20568c2ecf20Sopenharmony_ci
20578c2ecf20Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
20588c2ecf20Sopenharmony_ci
20598c2ecf20Sopenharmony_ciout_inode_unlock:
20608c2ecf20Sopenharmony_ci	brelse(di_bh);
20618c2ecf20Sopenharmony_ci	ocfs2_inode_unlock(inode, 1);
20628c2ecf20Sopenharmony_ciout_rw_unlock:
20638c2ecf20Sopenharmony_ci	ocfs2_rw_unlock(inode, 1);
20648c2ecf20Sopenharmony_ci
20658c2ecf20Sopenharmony_ciout:
20668c2ecf20Sopenharmony_ci	inode_unlock(inode);
20678c2ecf20Sopenharmony_ci	return ret;
20688c2ecf20Sopenharmony_ci}
20698c2ecf20Sopenharmony_ci
20708c2ecf20Sopenharmony_ciint ocfs2_change_file_space(struct file *file, unsigned int cmd,
20718c2ecf20Sopenharmony_ci			    struct ocfs2_space_resv *sr)
20728c2ecf20Sopenharmony_ci{
20738c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
20748c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
20758c2ecf20Sopenharmony_ci	int ret;
20768c2ecf20Sopenharmony_ci
20778c2ecf20Sopenharmony_ci	if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
20788c2ecf20Sopenharmony_ci	    !ocfs2_writes_unwritten_extents(osb))
20798c2ecf20Sopenharmony_ci		return -ENOTTY;
20808c2ecf20Sopenharmony_ci	else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) &&
20818c2ecf20Sopenharmony_ci		 !ocfs2_sparse_alloc(osb))
20828c2ecf20Sopenharmony_ci		return -ENOTTY;
20838c2ecf20Sopenharmony_ci
20848c2ecf20Sopenharmony_ci	if (!S_ISREG(inode->i_mode))
20858c2ecf20Sopenharmony_ci		return -EINVAL;
20868c2ecf20Sopenharmony_ci
20878c2ecf20Sopenharmony_ci	if (!(file->f_mode & FMODE_WRITE))
20888c2ecf20Sopenharmony_ci		return -EBADF;
20898c2ecf20Sopenharmony_ci
20908c2ecf20Sopenharmony_ci	ret = mnt_want_write_file(file);
20918c2ecf20Sopenharmony_ci	if (ret)
20928c2ecf20Sopenharmony_ci		return ret;
20938c2ecf20Sopenharmony_ci	ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
20948c2ecf20Sopenharmony_ci	mnt_drop_write_file(file);
20958c2ecf20Sopenharmony_ci	return ret;
20968c2ecf20Sopenharmony_ci}
20978c2ecf20Sopenharmony_ci
20988c2ecf20Sopenharmony_cistatic long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
20998c2ecf20Sopenharmony_ci			    loff_t len)
21008c2ecf20Sopenharmony_ci{
21018c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
21028c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
21038c2ecf20Sopenharmony_ci	struct ocfs2_space_resv sr;
21048c2ecf20Sopenharmony_ci	int change_size = 1;
21058c2ecf20Sopenharmony_ci	int cmd = OCFS2_IOC_RESVSP64;
21068c2ecf20Sopenharmony_ci	int ret = 0;
21078c2ecf20Sopenharmony_ci
21088c2ecf20Sopenharmony_ci	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
21098c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
21108c2ecf20Sopenharmony_ci	if (!ocfs2_writes_unwritten_extents(osb))
21118c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
21128c2ecf20Sopenharmony_ci
21138c2ecf20Sopenharmony_ci	if (mode & FALLOC_FL_KEEP_SIZE) {
21148c2ecf20Sopenharmony_ci		change_size = 0;
21158c2ecf20Sopenharmony_ci	} else {
21168c2ecf20Sopenharmony_ci		ret = inode_newsize_ok(inode, offset + len);
21178c2ecf20Sopenharmony_ci		if (ret)
21188c2ecf20Sopenharmony_ci			return ret;
21198c2ecf20Sopenharmony_ci	}
21208c2ecf20Sopenharmony_ci
21218c2ecf20Sopenharmony_ci	if (mode & FALLOC_FL_PUNCH_HOLE)
21228c2ecf20Sopenharmony_ci		cmd = OCFS2_IOC_UNRESVSP64;
21238c2ecf20Sopenharmony_ci
21248c2ecf20Sopenharmony_ci	sr.l_whence = 0;
21258c2ecf20Sopenharmony_ci	sr.l_start = (s64)offset;
21268c2ecf20Sopenharmony_ci	sr.l_len = (s64)len;
21278c2ecf20Sopenharmony_ci
21288c2ecf20Sopenharmony_ci	return __ocfs2_change_file_space(NULL, inode, offset, cmd, &sr,
21298c2ecf20Sopenharmony_ci					 change_size);
21308c2ecf20Sopenharmony_ci}
21318c2ecf20Sopenharmony_ci
21328c2ecf20Sopenharmony_ciint ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
21338c2ecf20Sopenharmony_ci				   size_t count)
21348c2ecf20Sopenharmony_ci{
21358c2ecf20Sopenharmony_ci	int ret = 0;
21368c2ecf20Sopenharmony_ci	unsigned int extent_flags;
21378c2ecf20Sopenharmony_ci	u32 cpos, clusters, extent_len, phys_cpos;
21388c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
21398c2ecf20Sopenharmony_ci
21408c2ecf20Sopenharmony_ci	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) ||
21418c2ecf20Sopenharmony_ci	    !ocfs2_is_refcount_inode(inode) ||
21428c2ecf20Sopenharmony_ci	    OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
21438c2ecf20Sopenharmony_ci		return 0;
21448c2ecf20Sopenharmony_ci
21458c2ecf20Sopenharmony_ci	cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
21468c2ecf20Sopenharmony_ci	clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
21478c2ecf20Sopenharmony_ci
21488c2ecf20Sopenharmony_ci	while (clusters) {
21498c2ecf20Sopenharmony_ci		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
21508c2ecf20Sopenharmony_ci					 &extent_flags);
21518c2ecf20Sopenharmony_ci		if (ret < 0) {
21528c2ecf20Sopenharmony_ci			mlog_errno(ret);
21538c2ecf20Sopenharmony_ci			goto out;
21548c2ecf20Sopenharmony_ci		}
21558c2ecf20Sopenharmony_ci
21568c2ecf20Sopenharmony_ci		if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) {
21578c2ecf20Sopenharmony_ci			ret = 1;
21588c2ecf20Sopenharmony_ci			break;
21598c2ecf20Sopenharmony_ci		}
21608c2ecf20Sopenharmony_ci
21618c2ecf20Sopenharmony_ci		if (extent_len > clusters)
21628c2ecf20Sopenharmony_ci			extent_len = clusters;
21638c2ecf20Sopenharmony_ci
21648c2ecf20Sopenharmony_ci		clusters -= extent_len;
21658c2ecf20Sopenharmony_ci		cpos += extent_len;
21668c2ecf20Sopenharmony_ci	}
21678c2ecf20Sopenharmony_ciout:
21688c2ecf20Sopenharmony_ci	return ret;
21698c2ecf20Sopenharmony_ci}
21708c2ecf20Sopenharmony_ci
21718c2ecf20Sopenharmony_cistatic int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
21728c2ecf20Sopenharmony_ci{
21738c2ecf20Sopenharmony_ci	int blockmask = inode->i_sb->s_blocksize - 1;
21748c2ecf20Sopenharmony_ci	loff_t final_size = pos + count;
21758c2ecf20Sopenharmony_ci
21768c2ecf20Sopenharmony_ci	if ((pos & blockmask) || (final_size & blockmask))
21778c2ecf20Sopenharmony_ci		return 1;
21788c2ecf20Sopenharmony_ci	return 0;
21798c2ecf20Sopenharmony_ci}
21808c2ecf20Sopenharmony_ci
21818c2ecf20Sopenharmony_cistatic int ocfs2_inode_lock_for_extent_tree(struct inode *inode,
21828c2ecf20Sopenharmony_ci					    struct buffer_head **di_bh,
21838c2ecf20Sopenharmony_ci					    int meta_level,
21848c2ecf20Sopenharmony_ci					    int write_sem,
21858c2ecf20Sopenharmony_ci					    int wait)
21868c2ecf20Sopenharmony_ci{
21878c2ecf20Sopenharmony_ci	int ret = 0;
21888c2ecf20Sopenharmony_ci
21898c2ecf20Sopenharmony_ci	if (wait)
21908c2ecf20Sopenharmony_ci		ret = ocfs2_inode_lock(inode, di_bh, meta_level);
21918c2ecf20Sopenharmony_ci	else
21928c2ecf20Sopenharmony_ci		ret = ocfs2_try_inode_lock(inode, di_bh, meta_level);
21938c2ecf20Sopenharmony_ci	if (ret < 0)
21948c2ecf20Sopenharmony_ci		goto out;
21958c2ecf20Sopenharmony_ci
21968c2ecf20Sopenharmony_ci	if (wait) {
21978c2ecf20Sopenharmony_ci		if (write_sem)
21988c2ecf20Sopenharmony_ci			down_write(&OCFS2_I(inode)->ip_alloc_sem);
21998c2ecf20Sopenharmony_ci		else
22008c2ecf20Sopenharmony_ci			down_read(&OCFS2_I(inode)->ip_alloc_sem);
22018c2ecf20Sopenharmony_ci	} else {
22028c2ecf20Sopenharmony_ci		if (write_sem)
22038c2ecf20Sopenharmony_ci			ret = down_write_trylock(&OCFS2_I(inode)->ip_alloc_sem);
22048c2ecf20Sopenharmony_ci		else
22058c2ecf20Sopenharmony_ci			ret = down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem);
22068c2ecf20Sopenharmony_ci
22078c2ecf20Sopenharmony_ci		if (!ret) {
22088c2ecf20Sopenharmony_ci			ret = -EAGAIN;
22098c2ecf20Sopenharmony_ci			goto out_unlock;
22108c2ecf20Sopenharmony_ci		}
22118c2ecf20Sopenharmony_ci	}
22128c2ecf20Sopenharmony_ci
22138c2ecf20Sopenharmony_ci	return ret;
22148c2ecf20Sopenharmony_ci
22158c2ecf20Sopenharmony_ciout_unlock:
22168c2ecf20Sopenharmony_ci	brelse(*di_bh);
22178c2ecf20Sopenharmony_ci	*di_bh = NULL;
22188c2ecf20Sopenharmony_ci	ocfs2_inode_unlock(inode, meta_level);
22198c2ecf20Sopenharmony_ciout:
22208c2ecf20Sopenharmony_ci	return ret;
22218c2ecf20Sopenharmony_ci}
22228c2ecf20Sopenharmony_ci
22238c2ecf20Sopenharmony_cistatic void ocfs2_inode_unlock_for_extent_tree(struct inode *inode,
22248c2ecf20Sopenharmony_ci					       struct buffer_head **di_bh,
22258c2ecf20Sopenharmony_ci					       int meta_level,
22268c2ecf20Sopenharmony_ci					       int write_sem)
22278c2ecf20Sopenharmony_ci{
22288c2ecf20Sopenharmony_ci	if (write_sem)
22298c2ecf20Sopenharmony_ci		up_write(&OCFS2_I(inode)->ip_alloc_sem);
22308c2ecf20Sopenharmony_ci	else
22318c2ecf20Sopenharmony_ci		up_read(&OCFS2_I(inode)->ip_alloc_sem);
22328c2ecf20Sopenharmony_ci
22338c2ecf20Sopenharmony_ci	brelse(*di_bh);
22348c2ecf20Sopenharmony_ci	*di_bh = NULL;
22358c2ecf20Sopenharmony_ci
22368c2ecf20Sopenharmony_ci	if (meta_level >= 0)
22378c2ecf20Sopenharmony_ci		ocfs2_inode_unlock(inode, meta_level);
22388c2ecf20Sopenharmony_ci}
22398c2ecf20Sopenharmony_ci
22408c2ecf20Sopenharmony_cistatic int ocfs2_prepare_inode_for_write(struct file *file,
22418c2ecf20Sopenharmony_ci					 loff_t pos, size_t count, int wait)
22428c2ecf20Sopenharmony_ci{
22438c2ecf20Sopenharmony_ci	int ret = 0, meta_level = 0, overwrite_io = 0;
22448c2ecf20Sopenharmony_ci	int write_sem = 0;
22458c2ecf20Sopenharmony_ci	struct dentry *dentry = file->f_path.dentry;
22468c2ecf20Sopenharmony_ci	struct inode *inode = d_inode(dentry);
22478c2ecf20Sopenharmony_ci	struct buffer_head *di_bh = NULL;
22488c2ecf20Sopenharmony_ci	u32 cpos;
22498c2ecf20Sopenharmony_ci	u32 clusters;
22508c2ecf20Sopenharmony_ci
22518c2ecf20Sopenharmony_ci	/*
22528c2ecf20Sopenharmony_ci	 * We start with a read level meta lock and only jump to an ex
22538c2ecf20Sopenharmony_ci	 * if we need to make modifications here.
22548c2ecf20Sopenharmony_ci	 */
22558c2ecf20Sopenharmony_ci	for(;;) {
22568c2ecf20Sopenharmony_ci		ret = ocfs2_inode_lock_for_extent_tree(inode,
22578c2ecf20Sopenharmony_ci						       &di_bh,
22588c2ecf20Sopenharmony_ci						       meta_level,
22598c2ecf20Sopenharmony_ci						       write_sem,
22608c2ecf20Sopenharmony_ci						       wait);
22618c2ecf20Sopenharmony_ci		if (ret < 0) {
22628c2ecf20Sopenharmony_ci			if (ret != -EAGAIN)
22638c2ecf20Sopenharmony_ci				mlog_errno(ret);
22648c2ecf20Sopenharmony_ci			goto out;
22658c2ecf20Sopenharmony_ci		}
22668c2ecf20Sopenharmony_ci
22678c2ecf20Sopenharmony_ci		/*
22688c2ecf20Sopenharmony_ci		 * Check if IO will overwrite allocated blocks in case
22698c2ecf20Sopenharmony_ci		 * IOCB_NOWAIT flag is set.
22708c2ecf20Sopenharmony_ci		 */
22718c2ecf20Sopenharmony_ci		if (!wait && !overwrite_io) {
22728c2ecf20Sopenharmony_ci			overwrite_io = 1;
22738c2ecf20Sopenharmony_ci
22748c2ecf20Sopenharmony_ci			ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
22758c2ecf20Sopenharmony_ci			if (ret < 0) {
22768c2ecf20Sopenharmony_ci				if (ret != -EAGAIN)
22778c2ecf20Sopenharmony_ci					mlog_errno(ret);
22788c2ecf20Sopenharmony_ci				goto out_unlock;
22798c2ecf20Sopenharmony_ci			}
22808c2ecf20Sopenharmony_ci		}
22818c2ecf20Sopenharmony_ci
22828c2ecf20Sopenharmony_ci		/* Clear suid / sgid if necessary. We do this here
22838c2ecf20Sopenharmony_ci		 * instead of later in the write path because
22848c2ecf20Sopenharmony_ci		 * remove_suid() calls ->setattr without any hint that
22858c2ecf20Sopenharmony_ci		 * we may have already done our cluster locking. Since
22868c2ecf20Sopenharmony_ci		 * ocfs2_setattr() *must* take cluster locks to
22878c2ecf20Sopenharmony_ci		 * proceed, this will lead us to recursively lock the
22888c2ecf20Sopenharmony_ci		 * inode. There's also the dinode i_size state which
22898c2ecf20Sopenharmony_ci		 * can be lost via setattr during extending writes (we
22908c2ecf20Sopenharmony_ci		 * set inode->i_size at the end of a write. */
22918c2ecf20Sopenharmony_ci		if (setattr_should_drop_suidgid(inode)) {
22928c2ecf20Sopenharmony_ci			if (meta_level == 0) {
22938c2ecf20Sopenharmony_ci				ocfs2_inode_unlock_for_extent_tree(inode,
22948c2ecf20Sopenharmony_ci								   &di_bh,
22958c2ecf20Sopenharmony_ci								   meta_level,
22968c2ecf20Sopenharmony_ci								   write_sem);
22978c2ecf20Sopenharmony_ci				meta_level = 1;
22988c2ecf20Sopenharmony_ci				continue;
22998c2ecf20Sopenharmony_ci			}
23008c2ecf20Sopenharmony_ci
23018c2ecf20Sopenharmony_ci			ret = ocfs2_write_remove_suid(inode);
23028c2ecf20Sopenharmony_ci			if (ret < 0) {
23038c2ecf20Sopenharmony_ci				mlog_errno(ret);
23048c2ecf20Sopenharmony_ci				goto out_unlock;
23058c2ecf20Sopenharmony_ci			}
23068c2ecf20Sopenharmony_ci		}
23078c2ecf20Sopenharmony_ci
23088c2ecf20Sopenharmony_ci		ret = ocfs2_check_range_for_refcount(inode, pos, count);
23098c2ecf20Sopenharmony_ci		if (ret == 1) {
23108c2ecf20Sopenharmony_ci			ocfs2_inode_unlock_for_extent_tree(inode,
23118c2ecf20Sopenharmony_ci							   &di_bh,
23128c2ecf20Sopenharmony_ci							   meta_level,
23138c2ecf20Sopenharmony_ci							   write_sem);
23148c2ecf20Sopenharmony_ci			meta_level = 1;
23158c2ecf20Sopenharmony_ci			write_sem = 1;
23168c2ecf20Sopenharmony_ci			ret = ocfs2_inode_lock_for_extent_tree(inode,
23178c2ecf20Sopenharmony_ci							       &di_bh,
23188c2ecf20Sopenharmony_ci							       meta_level,
23198c2ecf20Sopenharmony_ci							       write_sem,
23208c2ecf20Sopenharmony_ci							       wait);
23218c2ecf20Sopenharmony_ci			if (ret < 0) {
23228c2ecf20Sopenharmony_ci				if (ret != -EAGAIN)
23238c2ecf20Sopenharmony_ci					mlog_errno(ret);
23248c2ecf20Sopenharmony_ci				goto out;
23258c2ecf20Sopenharmony_ci			}
23268c2ecf20Sopenharmony_ci
23278c2ecf20Sopenharmony_ci			cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
23288c2ecf20Sopenharmony_ci			clusters =
23298c2ecf20Sopenharmony_ci				ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
23308c2ecf20Sopenharmony_ci			ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
23318c2ecf20Sopenharmony_ci		}
23328c2ecf20Sopenharmony_ci
23338c2ecf20Sopenharmony_ci		if (ret < 0) {
23348c2ecf20Sopenharmony_ci			if (ret != -EAGAIN)
23358c2ecf20Sopenharmony_ci				mlog_errno(ret);
23368c2ecf20Sopenharmony_ci			goto out_unlock;
23378c2ecf20Sopenharmony_ci		}
23388c2ecf20Sopenharmony_ci
23398c2ecf20Sopenharmony_ci		break;
23408c2ecf20Sopenharmony_ci	}
23418c2ecf20Sopenharmony_ci
23428c2ecf20Sopenharmony_ciout_unlock:
23438c2ecf20Sopenharmony_ci	trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
23448c2ecf20Sopenharmony_ci					    pos, count, wait);
23458c2ecf20Sopenharmony_ci
23468c2ecf20Sopenharmony_ci	ocfs2_inode_unlock_for_extent_tree(inode,
23478c2ecf20Sopenharmony_ci					   &di_bh,
23488c2ecf20Sopenharmony_ci					   meta_level,
23498c2ecf20Sopenharmony_ci					   write_sem);
23508c2ecf20Sopenharmony_ci
23518c2ecf20Sopenharmony_ciout:
23528c2ecf20Sopenharmony_ci	return ret;
23538c2ecf20Sopenharmony_ci}
23548c2ecf20Sopenharmony_ci
23558c2ecf20Sopenharmony_cistatic ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
23568c2ecf20Sopenharmony_ci				    struct iov_iter *from)
23578c2ecf20Sopenharmony_ci{
23588c2ecf20Sopenharmony_ci	int rw_level;
23598c2ecf20Sopenharmony_ci	ssize_t written = 0;
23608c2ecf20Sopenharmony_ci	ssize_t ret;
23618c2ecf20Sopenharmony_ci	size_t count = iov_iter_count(from);
23628c2ecf20Sopenharmony_ci	struct file *file = iocb->ki_filp;
23638c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
23648c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
23658c2ecf20Sopenharmony_ci	int full_coherency = !(osb->s_mount_opt &
23668c2ecf20Sopenharmony_ci			       OCFS2_MOUNT_COHERENCY_BUFFERED);
23678c2ecf20Sopenharmony_ci	void *saved_ki_complete = NULL;
23688c2ecf20Sopenharmony_ci	int append_write = ((iocb->ki_pos + count) >=
23698c2ecf20Sopenharmony_ci			i_size_read(inode) ? 1 : 0);
23708c2ecf20Sopenharmony_ci	int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
23718c2ecf20Sopenharmony_ci	int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
23728c2ecf20Sopenharmony_ci
23738c2ecf20Sopenharmony_ci	trace_ocfs2_file_write_iter(inode, file, file->f_path.dentry,
23748c2ecf20Sopenharmony_ci		(unsigned long long)OCFS2_I(inode)->ip_blkno,
23758c2ecf20Sopenharmony_ci		file->f_path.dentry->d_name.len,
23768c2ecf20Sopenharmony_ci		file->f_path.dentry->d_name.name,
23778c2ecf20Sopenharmony_ci		(unsigned int)from->nr_segs);	/* GRRRRR */
23788c2ecf20Sopenharmony_ci
23798c2ecf20Sopenharmony_ci	if (!direct_io && nowait)
23808c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
23818c2ecf20Sopenharmony_ci
23828c2ecf20Sopenharmony_ci	if (count == 0)
23838c2ecf20Sopenharmony_ci		return 0;
23848c2ecf20Sopenharmony_ci
23858c2ecf20Sopenharmony_ci	if (nowait) {
23868c2ecf20Sopenharmony_ci		if (!inode_trylock(inode))
23878c2ecf20Sopenharmony_ci			return -EAGAIN;
23888c2ecf20Sopenharmony_ci	} else
23898c2ecf20Sopenharmony_ci		inode_lock(inode);
23908c2ecf20Sopenharmony_ci
23918c2ecf20Sopenharmony_ci	/*
23928c2ecf20Sopenharmony_ci	 * Concurrent O_DIRECT writes are allowed with
23938c2ecf20Sopenharmony_ci	 * mount_option "coherency=buffered".
23948c2ecf20Sopenharmony_ci	 * For append write, we must take rw EX.
23958c2ecf20Sopenharmony_ci	 */
23968c2ecf20Sopenharmony_ci	rw_level = (!direct_io || full_coherency || append_write);
23978c2ecf20Sopenharmony_ci
23988c2ecf20Sopenharmony_ci	if (nowait)
23998c2ecf20Sopenharmony_ci		ret = ocfs2_try_rw_lock(inode, rw_level);
24008c2ecf20Sopenharmony_ci	else
24018c2ecf20Sopenharmony_ci		ret = ocfs2_rw_lock(inode, rw_level);
24028c2ecf20Sopenharmony_ci	if (ret < 0) {
24038c2ecf20Sopenharmony_ci		if (ret != -EAGAIN)
24048c2ecf20Sopenharmony_ci			mlog_errno(ret);
24058c2ecf20Sopenharmony_ci		goto out_mutex;
24068c2ecf20Sopenharmony_ci	}
24078c2ecf20Sopenharmony_ci
24088c2ecf20Sopenharmony_ci	/*
24098c2ecf20Sopenharmony_ci	 * O_DIRECT writes with "coherency=full" need to take EX cluster
24108c2ecf20Sopenharmony_ci	 * inode_lock to guarantee coherency.
24118c2ecf20Sopenharmony_ci	 */
24128c2ecf20Sopenharmony_ci	if (direct_io && full_coherency) {
24138c2ecf20Sopenharmony_ci		/*
24148c2ecf20Sopenharmony_ci		 * We need to take and drop the inode lock to force
24158c2ecf20Sopenharmony_ci		 * other nodes to drop their caches.  Buffered I/O
24168c2ecf20Sopenharmony_ci		 * already does this in write_begin().
24178c2ecf20Sopenharmony_ci		 */
24188c2ecf20Sopenharmony_ci		if (nowait)
24198c2ecf20Sopenharmony_ci			ret = ocfs2_try_inode_lock(inode, NULL, 1);
24208c2ecf20Sopenharmony_ci		else
24218c2ecf20Sopenharmony_ci			ret = ocfs2_inode_lock(inode, NULL, 1);
24228c2ecf20Sopenharmony_ci		if (ret < 0) {
24238c2ecf20Sopenharmony_ci			if (ret != -EAGAIN)
24248c2ecf20Sopenharmony_ci				mlog_errno(ret);
24258c2ecf20Sopenharmony_ci			goto out;
24268c2ecf20Sopenharmony_ci		}
24278c2ecf20Sopenharmony_ci
24288c2ecf20Sopenharmony_ci		ocfs2_inode_unlock(inode, 1);
24298c2ecf20Sopenharmony_ci	}
24308c2ecf20Sopenharmony_ci
24318c2ecf20Sopenharmony_ci	ret = generic_write_checks(iocb, from);
24328c2ecf20Sopenharmony_ci	if (ret <= 0) {
24338c2ecf20Sopenharmony_ci		if (ret)
24348c2ecf20Sopenharmony_ci			mlog_errno(ret);
24358c2ecf20Sopenharmony_ci		goto out;
24368c2ecf20Sopenharmony_ci	}
24378c2ecf20Sopenharmony_ci	count = ret;
24388c2ecf20Sopenharmony_ci
24398c2ecf20Sopenharmony_ci	ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait);
24408c2ecf20Sopenharmony_ci	if (ret < 0) {
24418c2ecf20Sopenharmony_ci		if (ret != -EAGAIN)
24428c2ecf20Sopenharmony_ci			mlog_errno(ret);
24438c2ecf20Sopenharmony_ci		goto out;
24448c2ecf20Sopenharmony_ci	}
24458c2ecf20Sopenharmony_ci
24468c2ecf20Sopenharmony_ci	if (direct_io && !is_sync_kiocb(iocb) &&
24478c2ecf20Sopenharmony_ci	    ocfs2_is_io_unaligned(inode, count, iocb->ki_pos)) {
24488c2ecf20Sopenharmony_ci		/*
24498c2ecf20Sopenharmony_ci		 * Make it a sync io if it's an unaligned aio.
24508c2ecf20Sopenharmony_ci		 */
24518c2ecf20Sopenharmony_ci		saved_ki_complete = xchg(&iocb->ki_complete, NULL);
24528c2ecf20Sopenharmony_ci	}
24538c2ecf20Sopenharmony_ci
24548c2ecf20Sopenharmony_ci	/* communicate with ocfs2_dio_end_io */
24558c2ecf20Sopenharmony_ci	ocfs2_iocb_set_rw_locked(iocb, rw_level);
24568c2ecf20Sopenharmony_ci
24578c2ecf20Sopenharmony_ci	written = __generic_file_write_iter(iocb, from);
24588c2ecf20Sopenharmony_ci	/* buffered aio wouldn't have proper lock coverage today */
24598c2ecf20Sopenharmony_ci	BUG_ON(written == -EIOCBQUEUED && !direct_io);
24608c2ecf20Sopenharmony_ci
24618c2ecf20Sopenharmony_ci	/*
24628c2ecf20Sopenharmony_ci	 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
24638c2ecf20Sopenharmony_ci	 * function pointer which is called when o_direct io completes so that
24648c2ecf20Sopenharmony_ci	 * it can unlock our rw lock.
24658c2ecf20Sopenharmony_ci	 * Unfortunately there are error cases which call end_io and others
24668c2ecf20Sopenharmony_ci	 * that don't.  so we don't have to unlock the rw_lock if either an
24678c2ecf20Sopenharmony_ci	 * async dio is going to do it in the future or an end_io after an
24688c2ecf20Sopenharmony_ci	 * error has already done it.
24698c2ecf20Sopenharmony_ci	 */
24708c2ecf20Sopenharmony_ci	if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) {
24718c2ecf20Sopenharmony_ci		rw_level = -1;
24728c2ecf20Sopenharmony_ci	}
24738c2ecf20Sopenharmony_ci
24748c2ecf20Sopenharmony_ci	if (unlikely(written <= 0))
24758c2ecf20Sopenharmony_ci		goto out;
24768c2ecf20Sopenharmony_ci
24778c2ecf20Sopenharmony_ci	if (((file->f_flags & O_DSYNC) && !direct_io) ||
24788c2ecf20Sopenharmony_ci	    IS_SYNC(inode)) {
24798c2ecf20Sopenharmony_ci		ret = filemap_fdatawrite_range(file->f_mapping,
24808c2ecf20Sopenharmony_ci					       iocb->ki_pos - written,
24818c2ecf20Sopenharmony_ci					       iocb->ki_pos - 1);
24828c2ecf20Sopenharmony_ci		if (ret < 0)
24838c2ecf20Sopenharmony_ci			written = ret;
24848c2ecf20Sopenharmony_ci
24858c2ecf20Sopenharmony_ci		if (!ret) {
24868c2ecf20Sopenharmony_ci			ret = jbd2_journal_force_commit(osb->journal->j_journal);
24878c2ecf20Sopenharmony_ci			if (ret < 0)
24888c2ecf20Sopenharmony_ci				written = ret;
24898c2ecf20Sopenharmony_ci		}
24908c2ecf20Sopenharmony_ci
24918c2ecf20Sopenharmony_ci		if (!ret)
24928c2ecf20Sopenharmony_ci			ret = filemap_fdatawait_range(file->f_mapping,
24938c2ecf20Sopenharmony_ci						      iocb->ki_pos - written,
24948c2ecf20Sopenharmony_ci						      iocb->ki_pos - 1);
24958c2ecf20Sopenharmony_ci	}
24968c2ecf20Sopenharmony_ci
24978c2ecf20Sopenharmony_ciout:
24988c2ecf20Sopenharmony_ci	if (saved_ki_complete)
24998c2ecf20Sopenharmony_ci		xchg(&iocb->ki_complete, saved_ki_complete);
25008c2ecf20Sopenharmony_ci
25018c2ecf20Sopenharmony_ci	if (rw_level != -1)
25028c2ecf20Sopenharmony_ci		ocfs2_rw_unlock(inode, rw_level);
25038c2ecf20Sopenharmony_ci
25048c2ecf20Sopenharmony_ciout_mutex:
25058c2ecf20Sopenharmony_ci	inode_unlock(inode);
25068c2ecf20Sopenharmony_ci
25078c2ecf20Sopenharmony_ci	if (written)
25088c2ecf20Sopenharmony_ci		ret = written;
25098c2ecf20Sopenharmony_ci	return ret;
25108c2ecf20Sopenharmony_ci}
25118c2ecf20Sopenharmony_ci
25128c2ecf20Sopenharmony_cistatic ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
25138c2ecf20Sopenharmony_ci				   struct iov_iter *to)
25148c2ecf20Sopenharmony_ci{
25158c2ecf20Sopenharmony_ci	int ret = 0, rw_level = -1, lock_level = 0;
25168c2ecf20Sopenharmony_ci	struct file *filp = iocb->ki_filp;
25178c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(filp);
25188c2ecf20Sopenharmony_ci	int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
25198c2ecf20Sopenharmony_ci	int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;
25208c2ecf20Sopenharmony_ci
25218c2ecf20Sopenharmony_ci	trace_ocfs2_file_read_iter(inode, filp, filp->f_path.dentry,
25228c2ecf20Sopenharmony_ci			(unsigned long long)OCFS2_I(inode)->ip_blkno,
25238c2ecf20Sopenharmony_ci			filp->f_path.dentry->d_name.len,
25248c2ecf20Sopenharmony_ci			filp->f_path.dentry->d_name.name,
25258c2ecf20Sopenharmony_ci			to->nr_segs);	/* GRRRRR */
25268c2ecf20Sopenharmony_ci
25278c2ecf20Sopenharmony_ci
25288c2ecf20Sopenharmony_ci	if (!inode) {
25298c2ecf20Sopenharmony_ci		ret = -EINVAL;
25308c2ecf20Sopenharmony_ci		mlog_errno(ret);
25318c2ecf20Sopenharmony_ci		goto bail;
25328c2ecf20Sopenharmony_ci	}
25338c2ecf20Sopenharmony_ci
25348c2ecf20Sopenharmony_ci	if (!direct_io && nowait)
25358c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
25368c2ecf20Sopenharmony_ci
25378c2ecf20Sopenharmony_ci	/*
25388c2ecf20Sopenharmony_ci	 * buffered reads protect themselves in ->readpage().  O_DIRECT reads
25398c2ecf20Sopenharmony_ci	 * need locks to protect pending reads from racing with truncate.
25408c2ecf20Sopenharmony_ci	 */
25418c2ecf20Sopenharmony_ci	if (direct_io) {
25428c2ecf20Sopenharmony_ci		if (nowait)
25438c2ecf20Sopenharmony_ci			ret = ocfs2_try_rw_lock(inode, 0);
25448c2ecf20Sopenharmony_ci		else
25458c2ecf20Sopenharmony_ci			ret = ocfs2_rw_lock(inode, 0);
25468c2ecf20Sopenharmony_ci
25478c2ecf20Sopenharmony_ci		if (ret < 0) {
25488c2ecf20Sopenharmony_ci			if (ret != -EAGAIN)
25498c2ecf20Sopenharmony_ci				mlog_errno(ret);
25508c2ecf20Sopenharmony_ci			goto bail;
25518c2ecf20Sopenharmony_ci		}
25528c2ecf20Sopenharmony_ci		rw_level = 0;
25538c2ecf20Sopenharmony_ci		/* communicate with ocfs2_dio_end_io */
25548c2ecf20Sopenharmony_ci		ocfs2_iocb_set_rw_locked(iocb, rw_level);
25558c2ecf20Sopenharmony_ci	}
25568c2ecf20Sopenharmony_ci
25578c2ecf20Sopenharmony_ci	/*
25588c2ecf20Sopenharmony_ci	 * We're fine letting folks race truncates and extending
25598c2ecf20Sopenharmony_ci	 * writes with read across the cluster, just like they can
25608c2ecf20Sopenharmony_ci	 * locally. Hence no rw_lock during read.
25618c2ecf20Sopenharmony_ci	 *
25628c2ecf20Sopenharmony_ci	 * Take and drop the meta data lock to update inode fields
25638c2ecf20Sopenharmony_ci	 * like i_size. This allows the checks down below
25648c2ecf20Sopenharmony_ci	 * generic_file_read_iter() a chance of actually working.
25658c2ecf20Sopenharmony_ci	 */
25668c2ecf20Sopenharmony_ci	ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
25678c2ecf20Sopenharmony_ci				     !nowait);
25688c2ecf20Sopenharmony_ci	if (ret < 0) {
25698c2ecf20Sopenharmony_ci		if (ret != -EAGAIN)
25708c2ecf20Sopenharmony_ci			mlog_errno(ret);
25718c2ecf20Sopenharmony_ci		goto bail;
25728c2ecf20Sopenharmony_ci	}
25738c2ecf20Sopenharmony_ci	ocfs2_inode_unlock(inode, lock_level);
25748c2ecf20Sopenharmony_ci
25758c2ecf20Sopenharmony_ci	ret = generic_file_read_iter(iocb, to);
25768c2ecf20Sopenharmony_ci	trace_generic_file_read_iter_ret(ret);
25778c2ecf20Sopenharmony_ci
25788c2ecf20Sopenharmony_ci	/* buffered aio wouldn't have proper lock coverage today */
25798c2ecf20Sopenharmony_ci	BUG_ON(ret == -EIOCBQUEUED && !direct_io);
25808c2ecf20Sopenharmony_ci
25818c2ecf20Sopenharmony_ci	/* see ocfs2_file_write_iter */
25828c2ecf20Sopenharmony_ci	if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
25838c2ecf20Sopenharmony_ci		rw_level = -1;
25848c2ecf20Sopenharmony_ci	}
25858c2ecf20Sopenharmony_ci
25868c2ecf20Sopenharmony_cibail:
25878c2ecf20Sopenharmony_ci	if (rw_level != -1)
25888c2ecf20Sopenharmony_ci		ocfs2_rw_unlock(inode, rw_level);
25898c2ecf20Sopenharmony_ci
25908c2ecf20Sopenharmony_ci	return ret;
25918c2ecf20Sopenharmony_ci}
25928c2ecf20Sopenharmony_ci
25938c2ecf20Sopenharmony_ci/* Refer generic_file_llseek_unlocked() */
25948c2ecf20Sopenharmony_cistatic loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
25958c2ecf20Sopenharmony_ci{
25968c2ecf20Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
25978c2ecf20Sopenharmony_ci	int ret = 0;
25988c2ecf20Sopenharmony_ci
25998c2ecf20Sopenharmony_ci	inode_lock(inode);
26008c2ecf20Sopenharmony_ci
26018c2ecf20Sopenharmony_ci	switch (whence) {
26028c2ecf20Sopenharmony_ci	case SEEK_SET:
26038c2ecf20Sopenharmony_ci		break;
26048c2ecf20Sopenharmony_ci	case SEEK_END:
26058c2ecf20Sopenharmony_ci		/* SEEK_END requires the OCFS2 inode lock for the file
26068c2ecf20Sopenharmony_ci		 * because it references the file's size.
26078c2ecf20Sopenharmony_ci		 */
26088c2ecf20Sopenharmony_ci		ret = ocfs2_inode_lock(inode, NULL, 0);
26098c2ecf20Sopenharmony_ci		if (ret < 0) {
26108c2ecf20Sopenharmony_ci			mlog_errno(ret);
26118c2ecf20Sopenharmony_ci			goto out;
26128c2ecf20Sopenharmony_ci		}
26138c2ecf20Sopenharmony_ci		offset += i_size_read(inode);
26148c2ecf20Sopenharmony_ci		ocfs2_inode_unlock(inode, 0);
26158c2ecf20Sopenharmony_ci		break;
26168c2ecf20Sopenharmony_ci	case SEEK_CUR:
26178c2ecf20Sopenharmony_ci		if (offset == 0) {
26188c2ecf20Sopenharmony_ci			offset = file->f_pos;
26198c2ecf20Sopenharmony_ci			goto out;
26208c2ecf20Sopenharmony_ci		}
26218c2ecf20Sopenharmony_ci		offset += file->f_pos;
26228c2ecf20Sopenharmony_ci		break;
26238c2ecf20Sopenharmony_ci	case SEEK_DATA:
26248c2ecf20Sopenharmony_ci	case SEEK_HOLE:
26258c2ecf20Sopenharmony_ci		ret = ocfs2_seek_data_hole_offset(file, &offset, whence);
26268c2ecf20Sopenharmony_ci		if (ret)
26278c2ecf20Sopenharmony_ci			goto out;
26288c2ecf20Sopenharmony_ci		break;
26298c2ecf20Sopenharmony_ci	default:
26308c2ecf20Sopenharmony_ci		ret = -EINVAL;
26318c2ecf20Sopenharmony_ci		goto out;
26328c2ecf20Sopenharmony_ci	}
26338c2ecf20Sopenharmony_ci
26348c2ecf20Sopenharmony_ci	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
26358c2ecf20Sopenharmony_ci
26368c2ecf20Sopenharmony_ciout:
26378c2ecf20Sopenharmony_ci	inode_unlock(inode);
26388c2ecf20Sopenharmony_ci	if (ret)
26398c2ecf20Sopenharmony_ci		return ret;
26408c2ecf20Sopenharmony_ci	return offset;
26418c2ecf20Sopenharmony_ci}
26428c2ecf20Sopenharmony_ci
26438c2ecf20Sopenharmony_cistatic loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in,
26448c2ecf20Sopenharmony_ci				     struct file *file_out, loff_t pos_out,
26458c2ecf20Sopenharmony_ci				     loff_t len, unsigned int remap_flags)
26468c2ecf20Sopenharmony_ci{
26478c2ecf20Sopenharmony_ci	struct inode *inode_in = file_inode(file_in);
26488c2ecf20Sopenharmony_ci	struct inode *inode_out = file_inode(file_out);
26498c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
26508c2ecf20Sopenharmony_ci	struct buffer_head *in_bh = NULL, *out_bh = NULL;
26518c2ecf20Sopenharmony_ci	bool same_inode = (inode_in == inode_out);
26528c2ecf20Sopenharmony_ci	loff_t remapped = 0;
26538c2ecf20Sopenharmony_ci	ssize_t ret;
26548c2ecf20Sopenharmony_ci
26558c2ecf20Sopenharmony_ci	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
26568c2ecf20Sopenharmony_ci		return -EINVAL;
26578c2ecf20Sopenharmony_ci	if (!ocfs2_refcount_tree(osb))
26588c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
26598c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
26608c2ecf20Sopenharmony_ci		return -EROFS;
26618c2ecf20Sopenharmony_ci
26628c2ecf20Sopenharmony_ci	/* Lock both files against IO */
26638c2ecf20Sopenharmony_ci	ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
26648c2ecf20Sopenharmony_ci	if (ret)
26658c2ecf20Sopenharmony_ci		return ret;
26668c2ecf20Sopenharmony_ci
26678c2ecf20Sopenharmony_ci	/* Check file eligibility and prepare for block sharing. */
26688c2ecf20Sopenharmony_ci	ret = -EINVAL;
26698c2ecf20Sopenharmony_ci	if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
26708c2ecf20Sopenharmony_ci	    (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
26718c2ecf20Sopenharmony_ci		goto out_unlock;
26728c2ecf20Sopenharmony_ci
26738c2ecf20Sopenharmony_ci	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
26748c2ecf20Sopenharmony_ci			&len, remap_flags);
26758c2ecf20Sopenharmony_ci	if (ret < 0 || len == 0)
26768c2ecf20Sopenharmony_ci		goto out_unlock;
26778c2ecf20Sopenharmony_ci
26788c2ecf20Sopenharmony_ci	/* Lock out changes to the allocation maps and remap. */
26798c2ecf20Sopenharmony_ci	down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
26808c2ecf20Sopenharmony_ci	if (!same_inode)
26818c2ecf20Sopenharmony_ci		down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
26828c2ecf20Sopenharmony_ci				  SINGLE_DEPTH_NESTING);
26838c2ecf20Sopenharmony_ci
26848c2ecf20Sopenharmony_ci	/* Zap any page cache for the destination file's range. */
26858c2ecf20Sopenharmony_ci	truncate_inode_pages_range(&inode_out->i_data,
26868c2ecf20Sopenharmony_ci				   round_down(pos_out, PAGE_SIZE),
26878c2ecf20Sopenharmony_ci				   round_up(pos_out + len, PAGE_SIZE) - 1);
26888c2ecf20Sopenharmony_ci
26898c2ecf20Sopenharmony_ci	remapped = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in,
26908c2ecf20Sopenharmony_ci			inode_out, out_bh, pos_out, len);
26918c2ecf20Sopenharmony_ci	up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
26928c2ecf20Sopenharmony_ci	if (!same_inode)
26938c2ecf20Sopenharmony_ci		up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
26948c2ecf20Sopenharmony_ci	if (remapped < 0) {
26958c2ecf20Sopenharmony_ci		ret = remapped;
26968c2ecf20Sopenharmony_ci		mlog_errno(ret);
26978c2ecf20Sopenharmony_ci		goto out_unlock;
26988c2ecf20Sopenharmony_ci	}
26998c2ecf20Sopenharmony_ci
27008c2ecf20Sopenharmony_ci	/*
27018c2ecf20Sopenharmony_ci	 * Empty the extent map so that we may get the right extent
27028c2ecf20Sopenharmony_ci	 * record from the disk.
27038c2ecf20Sopenharmony_ci	 */
27048c2ecf20Sopenharmony_ci	ocfs2_extent_map_trunc(inode_in, 0);
27058c2ecf20Sopenharmony_ci	ocfs2_extent_map_trunc(inode_out, 0);
27068c2ecf20Sopenharmony_ci
27078c2ecf20Sopenharmony_ci	ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
27088c2ecf20Sopenharmony_ci	if (ret) {
27098c2ecf20Sopenharmony_ci		mlog_errno(ret);
27108c2ecf20Sopenharmony_ci		goto out_unlock;
27118c2ecf20Sopenharmony_ci	}
27128c2ecf20Sopenharmony_ci
27138c2ecf20Sopenharmony_ciout_unlock:
27148c2ecf20Sopenharmony_ci	ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
27158c2ecf20Sopenharmony_ci	return remapped > 0 ? remapped : ret;
27168c2ecf20Sopenharmony_ci}
27178c2ecf20Sopenharmony_ci
27188c2ecf20Sopenharmony_ciconst struct inode_operations ocfs2_file_iops = {
27198c2ecf20Sopenharmony_ci	.setattr	= ocfs2_setattr,
27208c2ecf20Sopenharmony_ci	.getattr	= ocfs2_getattr,
27218c2ecf20Sopenharmony_ci	.permission	= ocfs2_permission,
27228c2ecf20Sopenharmony_ci	.listxattr	= ocfs2_listxattr,
27238c2ecf20Sopenharmony_ci	.fiemap		= ocfs2_fiemap,
27248c2ecf20Sopenharmony_ci	.get_acl	= ocfs2_iop_get_acl,
27258c2ecf20Sopenharmony_ci	.set_acl	= ocfs2_iop_set_acl,
27268c2ecf20Sopenharmony_ci};
27278c2ecf20Sopenharmony_ci
27288c2ecf20Sopenharmony_ciconst struct inode_operations ocfs2_special_file_iops = {
27298c2ecf20Sopenharmony_ci	.setattr	= ocfs2_setattr,
27308c2ecf20Sopenharmony_ci	.getattr	= ocfs2_getattr,
27318c2ecf20Sopenharmony_ci	.permission	= ocfs2_permission,
27328c2ecf20Sopenharmony_ci	.get_acl	= ocfs2_iop_get_acl,
27338c2ecf20Sopenharmony_ci	.set_acl	= ocfs2_iop_set_acl,
27348c2ecf20Sopenharmony_ci};
27358c2ecf20Sopenharmony_ci
27368c2ecf20Sopenharmony_ci/*
27378c2ecf20Sopenharmony_ci * Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with
27388c2ecf20Sopenharmony_ci * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks!
27398c2ecf20Sopenharmony_ci */
27408c2ecf20Sopenharmony_ciconst struct file_operations ocfs2_fops = {
27418c2ecf20Sopenharmony_ci	.llseek		= ocfs2_file_llseek,
27428c2ecf20Sopenharmony_ci	.mmap		= ocfs2_mmap,
27438c2ecf20Sopenharmony_ci	.fsync		= ocfs2_sync_file,
27448c2ecf20Sopenharmony_ci	.release	= ocfs2_file_release,
27458c2ecf20Sopenharmony_ci	.open		= ocfs2_file_open,
27468c2ecf20Sopenharmony_ci	.read_iter	= ocfs2_file_read_iter,
27478c2ecf20Sopenharmony_ci	.write_iter	= ocfs2_file_write_iter,
27488c2ecf20Sopenharmony_ci	.unlocked_ioctl	= ocfs2_ioctl,
27498c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT
27508c2ecf20Sopenharmony_ci	.compat_ioctl   = ocfs2_compat_ioctl,
27518c2ecf20Sopenharmony_ci#endif
27528c2ecf20Sopenharmony_ci	.lock		= ocfs2_lock,
27538c2ecf20Sopenharmony_ci	.flock		= ocfs2_flock,
27548c2ecf20Sopenharmony_ci	.splice_read	= generic_file_splice_read,
27558c2ecf20Sopenharmony_ci	.splice_write	= iter_file_splice_write,
27568c2ecf20Sopenharmony_ci	.fallocate	= ocfs2_fallocate,
27578c2ecf20Sopenharmony_ci	.remap_file_range = ocfs2_remap_file_range,
27588c2ecf20Sopenharmony_ci};
27598c2ecf20Sopenharmony_ci
27608c2ecf20Sopenharmony_ciconst struct file_operations ocfs2_dops = {
27618c2ecf20Sopenharmony_ci	.llseek		= generic_file_llseek,
27628c2ecf20Sopenharmony_ci	.read		= generic_read_dir,
27638c2ecf20Sopenharmony_ci	.iterate	= ocfs2_readdir,
27648c2ecf20Sopenharmony_ci	.fsync		= ocfs2_sync_file,
27658c2ecf20Sopenharmony_ci	.release	= ocfs2_dir_release,
27668c2ecf20Sopenharmony_ci	.open		= ocfs2_dir_open,
27678c2ecf20Sopenharmony_ci	.unlocked_ioctl	= ocfs2_ioctl,
27688c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT
27698c2ecf20Sopenharmony_ci	.compat_ioctl   = ocfs2_compat_ioctl,
27708c2ecf20Sopenharmony_ci#endif
27718c2ecf20Sopenharmony_ci	.lock		= ocfs2_lock,
27728c2ecf20Sopenharmony_ci	.flock		= ocfs2_flock,
27738c2ecf20Sopenharmony_ci};
27748c2ecf20Sopenharmony_ci
27758c2ecf20Sopenharmony_ci/*
27768c2ecf20Sopenharmony_ci * POSIX-lockless variants of our file_operations.
27778c2ecf20Sopenharmony_ci *
27788c2ecf20Sopenharmony_ci * These will be used if the underlying cluster stack does not support
27798c2ecf20Sopenharmony_ci * posix file locking, if the user passes the "localflocks" mount
27808c2ecf20Sopenharmony_ci * option, or if we have a local-only fs.
27818c2ecf20Sopenharmony_ci *
27828c2ecf20Sopenharmony_ci * ocfs2_flock is in here because all stacks handle UNIX file locks,
27838c2ecf20Sopenharmony_ci * so we still want it in the case of no stack support for
27848c2ecf20Sopenharmony_ci * plocks. Internally, it will do the right thing when asked to ignore
27858c2ecf20Sopenharmony_ci * the cluster.
27868c2ecf20Sopenharmony_ci */
27878c2ecf20Sopenharmony_ciconst struct file_operations ocfs2_fops_no_plocks = {
27888c2ecf20Sopenharmony_ci	.llseek		= ocfs2_file_llseek,
27898c2ecf20Sopenharmony_ci	.mmap		= ocfs2_mmap,
27908c2ecf20Sopenharmony_ci	.fsync		= ocfs2_sync_file,
27918c2ecf20Sopenharmony_ci	.release	= ocfs2_file_release,
27928c2ecf20Sopenharmony_ci	.open		= ocfs2_file_open,
27938c2ecf20Sopenharmony_ci	.read_iter	= ocfs2_file_read_iter,
27948c2ecf20Sopenharmony_ci	.write_iter	= ocfs2_file_write_iter,
27958c2ecf20Sopenharmony_ci	.unlocked_ioctl	= ocfs2_ioctl,
27968c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT
27978c2ecf20Sopenharmony_ci	.compat_ioctl   = ocfs2_compat_ioctl,
27988c2ecf20Sopenharmony_ci#endif
27998c2ecf20Sopenharmony_ci	.flock		= ocfs2_flock,
28008c2ecf20Sopenharmony_ci	.splice_read	= generic_file_splice_read,
28018c2ecf20Sopenharmony_ci	.splice_write	= iter_file_splice_write,
28028c2ecf20Sopenharmony_ci	.fallocate	= ocfs2_fallocate,
28038c2ecf20Sopenharmony_ci	.remap_file_range = ocfs2_remap_file_range,
28048c2ecf20Sopenharmony_ci};
28058c2ecf20Sopenharmony_ci
28068c2ecf20Sopenharmony_ciconst struct file_operations ocfs2_dops_no_plocks = {
28078c2ecf20Sopenharmony_ci	.llseek		= generic_file_llseek,
28088c2ecf20Sopenharmony_ci	.read		= generic_read_dir,
28098c2ecf20Sopenharmony_ci	.iterate	= ocfs2_readdir,
28108c2ecf20Sopenharmony_ci	.fsync		= ocfs2_sync_file,
28118c2ecf20Sopenharmony_ci	.release	= ocfs2_dir_release,
28128c2ecf20Sopenharmony_ci	.open		= ocfs2_dir_open,
28138c2ecf20Sopenharmony_ci	.unlocked_ioctl	= ocfs2_ioctl,
28148c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT
28158c2ecf20Sopenharmony_ci	.compat_ioctl   = ocfs2_compat_ioctl,
28168c2ecf20Sopenharmony_ci#endif
28178c2ecf20Sopenharmony_ci	.flock		= ocfs2_flock,
28188c2ecf20Sopenharmony_ci};
2819