18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*-
38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0:
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * move_extents.c
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Copyright (C) 2011 Oracle.  All rights reserved.
88c2ecf20Sopenharmony_ci */
98c2ecf20Sopenharmony_ci#include <linux/fs.h>
108c2ecf20Sopenharmony_ci#include <linux/types.h>
118c2ecf20Sopenharmony_ci#include <linux/mount.h>
128c2ecf20Sopenharmony_ci#include <linux/swap.h>
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci#include <cluster/masklog.h>
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#include "ocfs2.h"
178c2ecf20Sopenharmony_ci#include "ocfs2_ioctl.h"
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#include "alloc.h"
208c2ecf20Sopenharmony_ci#include "localalloc.h"
218c2ecf20Sopenharmony_ci#include "aops.h"
228c2ecf20Sopenharmony_ci#include "dlmglue.h"
238c2ecf20Sopenharmony_ci#include "extent_map.h"
248c2ecf20Sopenharmony_ci#include "inode.h"
258c2ecf20Sopenharmony_ci#include "journal.h"
268c2ecf20Sopenharmony_ci#include "suballoc.h"
278c2ecf20Sopenharmony_ci#include "uptodate.h"
288c2ecf20Sopenharmony_ci#include "super.h"
298c2ecf20Sopenharmony_ci#include "dir.h"
308c2ecf20Sopenharmony_ci#include "buffer_head_io.h"
318c2ecf20Sopenharmony_ci#include "sysfile.h"
328c2ecf20Sopenharmony_ci#include "refcounttree.h"
338c2ecf20Sopenharmony_ci#include "move_extents.h"
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_cistruct ocfs2_move_extents_context {
368c2ecf20Sopenharmony_ci	struct inode *inode;
378c2ecf20Sopenharmony_ci	struct file *file;
388c2ecf20Sopenharmony_ci	int auto_defrag;
398c2ecf20Sopenharmony_ci	int partial;
408c2ecf20Sopenharmony_ci	int credits;
418c2ecf20Sopenharmony_ci	u32 new_phys_cpos;
428c2ecf20Sopenharmony_ci	u32 clusters_moved;
438c2ecf20Sopenharmony_ci	u64 refcount_loc;
448c2ecf20Sopenharmony_ci	struct ocfs2_move_extents *range;
458c2ecf20Sopenharmony_ci	struct ocfs2_extent_tree et;
468c2ecf20Sopenharmony_ci	struct ocfs2_alloc_context *meta_ac;
478c2ecf20Sopenharmony_ci	struct ocfs2_alloc_context *data_ac;
488c2ecf20Sopenharmony_ci	struct ocfs2_cached_dealloc_ctxt dealloc;
498c2ecf20Sopenharmony_ci};
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_cistatic int __ocfs2_move_extent(handle_t *handle,
528c2ecf20Sopenharmony_ci			       struct ocfs2_move_extents_context *context,
538c2ecf20Sopenharmony_ci			       u32 cpos, u32 len, u32 p_cpos, u32 new_p_cpos,
548c2ecf20Sopenharmony_ci			       int ext_flags)
558c2ecf20Sopenharmony_ci{
568c2ecf20Sopenharmony_ci	int ret = 0, index;
578c2ecf20Sopenharmony_ci	struct inode *inode = context->inode;
588c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
598c2ecf20Sopenharmony_ci	struct ocfs2_extent_rec *rec, replace_rec;
608c2ecf20Sopenharmony_ci	struct ocfs2_path *path = NULL;
618c2ecf20Sopenharmony_ci	struct ocfs2_extent_list *el;
628c2ecf20Sopenharmony_ci	u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci);
638c2ecf20Sopenharmony_ci	u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos);
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci	ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos,
668c2ecf20Sopenharmony_ci					       p_cpos, new_p_cpos, len);
678c2ecf20Sopenharmony_ci	if (ret) {
688c2ecf20Sopenharmony_ci		mlog_errno(ret);
698c2ecf20Sopenharmony_ci		goto out;
708c2ecf20Sopenharmony_ci	}
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci	memset(&replace_rec, 0, sizeof(replace_rec));
738c2ecf20Sopenharmony_ci	replace_rec.e_cpos = cpu_to_le32(cpos);
748c2ecf20Sopenharmony_ci	replace_rec.e_leaf_clusters = cpu_to_le16(len);
758c2ecf20Sopenharmony_ci	replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb,
768c2ecf20Sopenharmony_ci								   new_p_cpos));
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	path = ocfs2_new_path_from_et(&context->et);
798c2ecf20Sopenharmony_ci	if (!path) {
808c2ecf20Sopenharmony_ci		ret = -ENOMEM;
818c2ecf20Sopenharmony_ci		mlog_errno(ret);
828c2ecf20Sopenharmony_ci		goto out;
838c2ecf20Sopenharmony_ci	}
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci	ret = ocfs2_find_path(INODE_CACHE(inode), path, cpos);
868c2ecf20Sopenharmony_ci	if (ret) {
878c2ecf20Sopenharmony_ci		mlog_errno(ret);
888c2ecf20Sopenharmony_ci		goto out;
898c2ecf20Sopenharmony_ci	}
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	el = path_leaf_el(path);
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	index = ocfs2_search_extent_list(el, cpos);
948c2ecf20Sopenharmony_ci	if (index == -1) {
958c2ecf20Sopenharmony_ci		ret = ocfs2_error(inode->i_sb,
968c2ecf20Sopenharmony_ci				  "Inode %llu has an extent at cpos %u which can no longer be found\n",
978c2ecf20Sopenharmony_ci				  (unsigned long long)ino, cpos);
988c2ecf20Sopenharmony_ci		goto out;
998c2ecf20Sopenharmony_ci	}
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	rec = &el->l_recs[index];
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci	BUG_ON(ext_flags != rec->e_flags);
1048c2ecf20Sopenharmony_ci	/*
1058c2ecf20Sopenharmony_ci	 * after moving/defraging to new location, the extent is not going
1068c2ecf20Sopenharmony_ci	 * to be refcounted anymore.
1078c2ecf20Sopenharmony_ci	 */
1088c2ecf20Sopenharmony_ci	replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED;
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	ret = ocfs2_split_extent(handle, &context->et, path, index,
1118c2ecf20Sopenharmony_ci				 &replace_rec, context->meta_ac,
1128c2ecf20Sopenharmony_ci				 &context->dealloc);
1138c2ecf20Sopenharmony_ci	if (ret) {
1148c2ecf20Sopenharmony_ci		mlog_errno(ret);
1158c2ecf20Sopenharmony_ci		goto out;
1168c2ecf20Sopenharmony_ci	}
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci	context->new_phys_cpos = new_p_cpos;
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	/*
1218c2ecf20Sopenharmony_ci	 * need I to append truncate log for old clusters?
1228c2ecf20Sopenharmony_ci	 */
1238c2ecf20Sopenharmony_ci	if (old_blkno) {
1248c2ecf20Sopenharmony_ci		if (ext_flags & OCFS2_EXT_REFCOUNTED)
1258c2ecf20Sopenharmony_ci			ret = ocfs2_decrease_refcount(inode, handle,
1268c2ecf20Sopenharmony_ci					ocfs2_blocks_to_clusters(osb->sb,
1278c2ecf20Sopenharmony_ci								 old_blkno),
1288c2ecf20Sopenharmony_ci					len, context->meta_ac,
1298c2ecf20Sopenharmony_ci					&context->dealloc, 1);
1308c2ecf20Sopenharmony_ci		else
1318c2ecf20Sopenharmony_ci			ret = ocfs2_truncate_log_append(osb, handle,
1328c2ecf20Sopenharmony_ci							old_blkno, len);
1338c2ecf20Sopenharmony_ci	}
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 0);
1368c2ecf20Sopenharmony_ciout:
1378c2ecf20Sopenharmony_ci	ocfs2_free_path(path);
1388c2ecf20Sopenharmony_ci	return ret;
1398c2ecf20Sopenharmony_ci}
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci/*
1428c2ecf20Sopenharmony_ci * lock allocator, and reserve appropriate number of bits for
1438c2ecf20Sopenharmony_ci * meta blocks.
1448c2ecf20Sopenharmony_ci */
1458c2ecf20Sopenharmony_cistatic int ocfs2_lock_meta_allocator_move_extents(struct inode *inode,
1468c2ecf20Sopenharmony_ci					struct ocfs2_extent_tree *et,
1478c2ecf20Sopenharmony_ci					u32 clusters_to_move,
1488c2ecf20Sopenharmony_ci					u32 extents_to_split,
1498c2ecf20Sopenharmony_ci					struct ocfs2_alloc_context **meta_ac,
1508c2ecf20Sopenharmony_ci					int extra_blocks,
1518c2ecf20Sopenharmony_ci					int *credits)
1528c2ecf20Sopenharmony_ci{
1538c2ecf20Sopenharmony_ci	int ret, num_free_extents;
1548c2ecf20Sopenharmony_ci	unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move;
1558c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	num_free_extents = ocfs2_num_free_extents(et);
1588c2ecf20Sopenharmony_ci	if (num_free_extents < 0) {
1598c2ecf20Sopenharmony_ci		ret = num_free_extents;
1608c2ecf20Sopenharmony_ci		mlog_errno(ret);
1618c2ecf20Sopenharmony_ci		goto out;
1628c2ecf20Sopenharmony_ci	}
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci	if (!num_free_extents ||
1658c2ecf20Sopenharmony_ci	    (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed))
1668c2ecf20Sopenharmony_ci		extra_blocks += ocfs2_extend_meta_needed(et->et_root_el);
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, meta_ac);
1698c2ecf20Sopenharmony_ci	if (ret) {
1708c2ecf20Sopenharmony_ci		mlog_errno(ret);
1718c2ecf20Sopenharmony_ci		goto out;
1728c2ecf20Sopenharmony_ci	}
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	*credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el);
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci	mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n",
1788c2ecf20Sopenharmony_ci	     extra_blocks, clusters_to_move, *credits);
1798c2ecf20Sopenharmony_ciout:
1808c2ecf20Sopenharmony_ci	if (ret) {
1818c2ecf20Sopenharmony_ci		if (*meta_ac) {
1828c2ecf20Sopenharmony_ci			ocfs2_free_alloc_context(*meta_ac);
1838c2ecf20Sopenharmony_ci			*meta_ac = NULL;
1848c2ecf20Sopenharmony_ci		}
1858c2ecf20Sopenharmony_ci	}
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	return ret;
1888c2ecf20Sopenharmony_ci}
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci/*
1918c2ecf20Sopenharmony_ci * Using one journal handle to guarantee the data consistency in case
1928c2ecf20Sopenharmony_ci * crash happens anywhere.
1938c2ecf20Sopenharmony_ci *
1948c2ecf20Sopenharmony_ci *  XXX: defrag can end up with finishing partial extent as requested,
1958c2ecf20Sopenharmony_ci * due to not enough contiguous clusters can be found in allocator.
1968c2ecf20Sopenharmony_ci */
1978c2ecf20Sopenharmony_cistatic int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
1988c2ecf20Sopenharmony_ci			       u32 cpos, u32 phys_cpos, u32 *len, int ext_flags)
1998c2ecf20Sopenharmony_ci{
2008c2ecf20Sopenharmony_ci	int ret, credits = 0, extra_blocks = 0, partial = context->partial;
2018c2ecf20Sopenharmony_ci	handle_t *handle;
2028c2ecf20Sopenharmony_ci	struct inode *inode = context->inode;
2038c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2048c2ecf20Sopenharmony_ci	struct inode *tl_inode = osb->osb_tl_inode;
2058c2ecf20Sopenharmony_ci	struct ocfs2_refcount_tree *ref_tree = NULL;
2068c2ecf20Sopenharmony_ci	u32 new_phys_cpos, new_len;
2078c2ecf20Sopenharmony_ci	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
2088c2ecf20Sopenharmony_ci	int need_free = 0;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
2118c2ecf20Sopenharmony_ci		BUG_ON(!ocfs2_is_refcount_inode(inode));
2128c2ecf20Sopenharmony_ci		BUG_ON(!context->refcount_loc);
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci		ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
2158c2ecf20Sopenharmony_ci					       &ref_tree, NULL);
2168c2ecf20Sopenharmony_ci		if (ret) {
2178c2ecf20Sopenharmony_ci			mlog_errno(ret);
2188c2ecf20Sopenharmony_ci			return ret;
2198c2ecf20Sopenharmony_ci		}
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci		ret = ocfs2_prepare_refcount_change_for_del(inode,
2228c2ecf20Sopenharmony_ci							context->refcount_loc,
2238c2ecf20Sopenharmony_ci							phys_blkno,
2248c2ecf20Sopenharmony_ci							*len,
2258c2ecf20Sopenharmony_ci							&credits,
2268c2ecf20Sopenharmony_ci							&extra_blocks);
2278c2ecf20Sopenharmony_ci		if (ret) {
2288c2ecf20Sopenharmony_ci			mlog_errno(ret);
2298c2ecf20Sopenharmony_ci			goto out;
2308c2ecf20Sopenharmony_ci		}
2318c2ecf20Sopenharmony_ci	}
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
2348c2ecf20Sopenharmony_ci						*len, 1,
2358c2ecf20Sopenharmony_ci						&context->meta_ac,
2368c2ecf20Sopenharmony_ci						extra_blocks, &credits);
2378c2ecf20Sopenharmony_ci	if (ret) {
2388c2ecf20Sopenharmony_ci		mlog_errno(ret);
2398c2ecf20Sopenharmony_ci		goto out;
2408c2ecf20Sopenharmony_ci	}
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	/*
2438c2ecf20Sopenharmony_ci	 * should be using allocation reservation strategy there?
2448c2ecf20Sopenharmony_ci	 *
2458c2ecf20Sopenharmony_ci	 * if (context->data_ac)
2468c2ecf20Sopenharmony_ci	 *	context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
2478c2ecf20Sopenharmony_ci	 */
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_ci	inode_lock(tl_inode);
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci	if (ocfs2_truncate_log_needs_flush(osb)) {
2528c2ecf20Sopenharmony_ci		ret = __ocfs2_flush_truncate_log(osb);
2538c2ecf20Sopenharmony_ci		if (ret < 0) {
2548c2ecf20Sopenharmony_ci			mlog_errno(ret);
2558c2ecf20Sopenharmony_ci			goto out_unlock_mutex;
2568c2ecf20Sopenharmony_ci		}
2578c2ecf20Sopenharmony_ci	}
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	/*
2608c2ecf20Sopenharmony_ci	 * Make sure ocfs2_reserve_cluster is called after
2618c2ecf20Sopenharmony_ci	 * __ocfs2_flush_truncate_log, otherwise, dead lock may happen.
2628c2ecf20Sopenharmony_ci	 *
2638c2ecf20Sopenharmony_ci	 * If ocfs2_reserve_cluster is called
2648c2ecf20Sopenharmony_ci	 * before __ocfs2_flush_truncate_log, dead lock on global bitmap
2658c2ecf20Sopenharmony_ci	 * may happen.
2668c2ecf20Sopenharmony_ci	 *
2678c2ecf20Sopenharmony_ci	 */
2688c2ecf20Sopenharmony_ci	ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac);
2698c2ecf20Sopenharmony_ci	if (ret) {
2708c2ecf20Sopenharmony_ci		mlog_errno(ret);
2718c2ecf20Sopenharmony_ci		goto out_unlock_mutex;
2728c2ecf20Sopenharmony_ci	}
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci	handle = ocfs2_start_trans(osb, credits);
2758c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
2768c2ecf20Sopenharmony_ci		ret = PTR_ERR(handle);
2778c2ecf20Sopenharmony_ci		mlog_errno(ret);
2788c2ecf20Sopenharmony_ci		goto out_unlock_mutex;
2798c2ecf20Sopenharmony_ci	}
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci	ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len,
2828c2ecf20Sopenharmony_ci				     &new_phys_cpos, &new_len);
2838c2ecf20Sopenharmony_ci	if (ret) {
2848c2ecf20Sopenharmony_ci		mlog_errno(ret);
2858c2ecf20Sopenharmony_ci		goto out_commit;
2868c2ecf20Sopenharmony_ci	}
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci	/*
2898c2ecf20Sopenharmony_ci	 * allowing partial extent moving is kind of 'pros and cons', it makes
2908c2ecf20Sopenharmony_ci	 * whole defragmentation less likely to fail, on the contrary, the bad
2918c2ecf20Sopenharmony_ci	 * thing is it may make the fs even more fragmented after moving, let
2928c2ecf20Sopenharmony_ci	 * userspace make a good decision here.
2938c2ecf20Sopenharmony_ci	 */
2948c2ecf20Sopenharmony_ci	if (new_len != *len) {
2958c2ecf20Sopenharmony_ci		mlog(0, "len_claimed: %u, len: %u\n", new_len, *len);
2968c2ecf20Sopenharmony_ci		if (!partial) {
2978c2ecf20Sopenharmony_ci			context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
2988c2ecf20Sopenharmony_ci			ret = -ENOSPC;
2998c2ecf20Sopenharmony_ci			need_free = 1;
3008c2ecf20Sopenharmony_ci			goto out_commit;
3018c2ecf20Sopenharmony_ci		}
3028c2ecf20Sopenharmony_ci	}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos,
3058c2ecf20Sopenharmony_ci	     phys_cpos, new_phys_cpos);
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_ci	ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos,
3088c2ecf20Sopenharmony_ci				  new_phys_cpos, ext_flags);
3098c2ecf20Sopenharmony_ci	if (ret)
3108c2ecf20Sopenharmony_ci		mlog_errno(ret);
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci	if (partial && (new_len != *len))
3138c2ecf20Sopenharmony_ci		*len = new_len;
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ci	/*
3168c2ecf20Sopenharmony_ci	 * Here we should write the new page out first if we are
3178c2ecf20Sopenharmony_ci	 * in write-back mode.
3188c2ecf20Sopenharmony_ci	 */
3198c2ecf20Sopenharmony_ci	ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len);
3208c2ecf20Sopenharmony_ci	if (ret)
3218c2ecf20Sopenharmony_ci		mlog_errno(ret);
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ciout_commit:
3248c2ecf20Sopenharmony_ci	if (need_free && context->data_ac) {
3258c2ecf20Sopenharmony_ci		struct ocfs2_alloc_context *data_ac = context->data_ac;
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci		if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL)
3288c2ecf20Sopenharmony_ci			ocfs2_free_local_alloc_bits(osb, handle, data_ac,
3298c2ecf20Sopenharmony_ci					new_phys_cpos, new_len);
3308c2ecf20Sopenharmony_ci		else
3318c2ecf20Sopenharmony_ci			ocfs2_free_clusters(handle,
3328c2ecf20Sopenharmony_ci					data_ac->ac_inode,
3338c2ecf20Sopenharmony_ci					data_ac->ac_bh,
3348c2ecf20Sopenharmony_ci					ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos),
3358c2ecf20Sopenharmony_ci					new_len);
3368c2ecf20Sopenharmony_ci	}
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ciout_unlock_mutex:
3418c2ecf20Sopenharmony_ci	inode_unlock(tl_inode);
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_ci	if (context->data_ac) {
3448c2ecf20Sopenharmony_ci		ocfs2_free_alloc_context(context->data_ac);
3458c2ecf20Sopenharmony_ci		context->data_ac = NULL;
3468c2ecf20Sopenharmony_ci	}
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	if (context->meta_ac) {
3498c2ecf20Sopenharmony_ci		ocfs2_free_alloc_context(context->meta_ac);
3508c2ecf20Sopenharmony_ci		context->meta_ac = NULL;
3518c2ecf20Sopenharmony_ci	}
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ciout:
3548c2ecf20Sopenharmony_ci	if (ref_tree)
3558c2ecf20Sopenharmony_ci		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci	return ret;
3588c2ecf20Sopenharmony_ci}
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci/*
3618c2ecf20Sopenharmony_ci * find the victim alloc group, where #blkno fits.
3628c2ecf20Sopenharmony_ci */
3638c2ecf20Sopenharmony_cistatic int ocfs2_find_victim_alloc_group(struct inode *inode,
3648c2ecf20Sopenharmony_ci					 u64 vict_blkno,
3658c2ecf20Sopenharmony_ci					 int type, int slot,
3668c2ecf20Sopenharmony_ci					 int *vict_bit,
3678c2ecf20Sopenharmony_ci					 struct buffer_head **ret_bh)
3688c2ecf20Sopenharmony_ci{
3698c2ecf20Sopenharmony_ci	int ret, i, bits_per_unit = 0;
3708c2ecf20Sopenharmony_ci	u64 blkno;
3718c2ecf20Sopenharmony_ci	char namebuf[40];
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3748c2ecf20Sopenharmony_ci	struct buffer_head *ac_bh = NULL, *gd_bh = NULL;
3758c2ecf20Sopenharmony_ci	struct ocfs2_chain_list *cl;
3768c2ecf20Sopenharmony_ci	struct ocfs2_chain_rec *rec;
3778c2ecf20Sopenharmony_ci	struct ocfs2_dinode *ac_dinode;
3788c2ecf20Sopenharmony_ci	struct ocfs2_group_desc *bg;
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
3818c2ecf20Sopenharmony_ci	ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
3828c2ecf20Sopenharmony_ci					 strlen(namebuf), &blkno);
3838c2ecf20Sopenharmony_ci	if (ret) {
3848c2ecf20Sopenharmony_ci		ret = -ENOENT;
3858c2ecf20Sopenharmony_ci		goto out;
3868c2ecf20Sopenharmony_ci	}
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci	ret = ocfs2_read_blocks_sync(osb, blkno, 1, &ac_bh);
3898c2ecf20Sopenharmony_ci	if (ret) {
3908c2ecf20Sopenharmony_ci		mlog_errno(ret);
3918c2ecf20Sopenharmony_ci		goto out;
3928c2ecf20Sopenharmony_ci	}
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	ac_dinode = (struct ocfs2_dinode *)ac_bh->b_data;
3958c2ecf20Sopenharmony_ci	cl = &(ac_dinode->id2.i_chain);
3968c2ecf20Sopenharmony_ci	rec = &(cl->cl_recs[0]);
3978c2ecf20Sopenharmony_ci
3988c2ecf20Sopenharmony_ci	if (type == GLOBAL_BITMAP_SYSTEM_INODE)
3998c2ecf20Sopenharmony_ci		bits_per_unit = osb->s_clustersize_bits -
4008c2ecf20Sopenharmony_ci					inode->i_sb->s_blocksize_bits;
4018c2ecf20Sopenharmony_ci	/*
4028c2ecf20Sopenharmony_ci	 * 'vict_blkno' was out of the valid range.
4038c2ecf20Sopenharmony_ci	 */
4048c2ecf20Sopenharmony_ci	if ((vict_blkno < le64_to_cpu(rec->c_blkno)) ||
4058c2ecf20Sopenharmony_ci	    (vict_blkno >= ((u64)le32_to_cpu(ac_dinode->id1.bitmap1.i_total) <<
4068c2ecf20Sopenharmony_ci				bits_per_unit))) {
4078c2ecf20Sopenharmony_ci		ret = -EINVAL;
4088c2ecf20Sopenharmony_ci		goto out;
4098c2ecf20Sopenharmony_ci	}
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci	for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) {
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci		rec = &(cl->cl_recs[i]);
4148c2ecf20Sopenharmony_ci		if (!rec)
4158c2ecf20Sopenharmony_ci			continue;
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci		bg = NULL;
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ci		do {
4208c2ecf20Sopenharmony_ci			if (!bg)
4218c2ecf20Sopenharmony_ci				blkno = le64_to_cpu(rec->c_blkno);
4228c2ecf20Sopenharmony_ci			else
4238c2ecf20Sopenharmony_ci				blkno = le64_to_cpu(bg->bg_next_group);
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci			if (gd_bh) {
4268c2ecf20Sopenharmony_ci				brelse(gd_bh);
4278c2ecf20Sopenharmony_ci				gd_bh = NULL;
4288c2ecf20Sopenharmony_ci			}
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci			ret = ocfs2_read_blocks_sync(osb, blkno, 1, &gd_bh);
4318c2ecf20Sopenharmony_ci			if (ret) {
4328c2ecf20Sopenharmony_ci				mlog_errno(ret);
4338c2ecf20Sopenharmony_ci				goto out;
4348c2ecf20Sopenharmony_ci			}
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci			bg = (struct ocfs2_group_desc *)gd_bh->b_data;
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci			if (vict_blkno < (le64_to_cpu(bg->bg_blkno) +
4398c2ecf20Sopenharmony_ci						(le16_to_cpu(bg->bg_bits) << bits_per_unit))) {
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci				*ret_bh = gd_bh;
4428c2ecf20Sopenharmony_ci				*vict_bit = (vict_blkno - blkno) >>
4438c2ecf20Sopenharmony_ci							bits_per_unit;
4448c2ecf20Sopenharmony_ci				mlog(0, "find the victim group: #%llu, "
4458c2ecf20Sopenharmony_ci				     "total_bits: %u, vict_bit: %u\n",
4468c2ecf20Sopenharmony_ci				     blkno, le16_to_cpu(bg->bg_bits),
4478c2ecf20Sopenharmony_ci				     *vict_bit);
4488c2ecf20Sopenharmony_ci				goto out;
4498c2ecf20Sopenharmony_ci			}
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci		} while (le64_to_cpu(bg->bg_next_group));
4528c2ecf20Sopenharmony_ci	}
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	ret = -EINVAL;
4558c2ecf20Sopenharmony_ciout:
4568c2ecf20Sopenharmony_ci	brelse(ac_bh);
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ci	/*
4598c2ecf20Sopenharmony_ci	 * caller has to release the gd_bh properly.
4608c2ecf20Sopenharmony_ci	 */
4618c2ecf20Sopenharmony_ci	return ret;
4628c2ecf20Sopenharmony_ci}
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_ci/*
4658c2ecf20Sopenharmony_ci * XXX: helper to validate and adjust moving goal.
4668c2ecf20Sopenharmony_ci */
4678c2ecf20Sopenharmony_cistatic int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
4688c2ecf20Sopenharmony_ci					       struct ocfs2_move_extents *range)
4698c2ecf20Sopenharmony_ci{
4708c2ecf20Sopenharmony_ci	int ret, goal_bit = 0;
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_ci	struct buffer_head *gd_bh = NULL;
4738c2ecf20Sopenharmony_ci	struct ocfs2_group_desc *bg;
4748c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4758c2ecf20Sopenharmony_ci	int c_to_b = 1 << (osb->s_clustersize_bits -
4768c2ecf20Sopenharmony_ci					inode->i_sb->s_blocksize_bits);
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	/*
4798c2ecf20Sopenharmony_ci	 * make goal become cluster aligned.
4808c2ecf20Sopenharmony_ci	 */
4818c2ecf20Sopenharmony_ci	range->me_goal = ocfs2_block_to_cluster_start(inode->i_sb,
4828c2ecf20Sopenharmony_ci						      range->me_goal);
4838c2ecf20Sopenharmony_ci	/*
4848c2ecf20Sopenharmony_ci	 * validate goal sits within global_bitmap, and return the victim
4858c2ecf20Sopenharmony_ci	 * group desc
4868c2ecf20Sopenharmony_ci	 */
4878c2ecf20Sopenharmony_ci	ret = ocfs2_find_victim_alloc_group(inode, range->me_goal,
4888c2ecf20Sopenharmony_ci					    GLOBAL_BITMAP_SYSTEM_INODE,
4898c2ecf20Sopenharmony_ci					    OCFS2_INVALID_SLOT,
4908c2ecf20Sopenharmony_ci					    &goal_bit, &gd_bh);
4918c2ecf20Sopenharmony_ci	if (ret)
4928c2ecf20Sopenharmony_ci		goto out;
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_ci	bg = (struct ocfs2_group_desc *)gd_bh->b_data;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	/*
4978c2ecf20Sopenharmony_ci	 * moving goal is not allowd to start with a group desc blok(#0 blk)
4988c2ecf20Sopenharmony_ci	 * let's compromise to the latter cluster.
4998c2ecf20Sopenharmony_ci	 */
5008c2ecf20Sopenharmony_ci	if (range->me_goal == le64_to_cpu(bg->bg_blkno))
5018c2ecf20Sopenharmony_ci		range->me_goal += c_to_b;
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci	/*
5048c2ecf20Sopenharmony_ci	 * movement is not gonna cross two groups.
5058c2ecf20Sopenharmony_ci	 */
5068c2ecf20Sopenharmony_ci	if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize <
5078c2ecf20Sopenharmony_ci								range->me_len) {
5088c2ecf20Sopenharmony_ci		ret = -EINVAL;
5098c2ecf20Sopenharmony_ci		goto out;
5108c2ecf20Sopenharmony_ci	}
5118c2ecf20Sopenharmony_ci	/*
5128c2ecf20Sopenharmony_ci	 * more exact validations/adjustments will be performed later during
5138c2ecf20Sopenharmony_ci	 * moving operation for each extent range.
5148c2ecf20Sopenharmony_ci	 */
5158c2ecf20Sopenharmony_ci	mlog(0, "extents get ready to be moved to #%llu block\n",
5168c2ecf20Sopenharmony_ci	     range->me_goal);
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ciout:
5198c2ecf20Sopenharmony_ci	brelse(gd_bh);
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	return ret;
5228c2ecf20Sopenharmony_ci}
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_cistatic void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
5258c2ecf20Sopenharmony_ci				    int *goal_bit, u32 move_len, u32 max_hop,
5268c2ecf20Sopenharmony_ci				    u32 *phys_cpos)
5278c2ecf20Sopenharmony_ci{
5288c2ecf20Sopenharmony_ci	int i, used, last_free_bits = 0, base_bit = *goal_bit;
5298c2ecf20Sopenharmony_ci	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
5308c2ecf20Sopenharmony_ci	u32 base_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
5318c2ecf20Sopenharmony_ci						 le64_to_cpu(gd->bg_blkno));
5328c2ecf20Sopenharmony_ci
5338c2ecf20Sopenharmony_ci	for (i = base_bit; i < le16_to_cpu(gd->bg_bits); i++) {
5348c2ecf20Sopenharmony_ci
5358c2ecf20Sopenharmony_ci		used = ocfs2_test_bit(i, (unsigned long *)gd->bg_bitmap);
5368c2ecf20Sopenharmony_ci		if (used) {
5378c2ecf20Sopenharmony_ci			/*
5388c2ecf20Sopenharmony_ci			 * we even tried searching the free chunk by jumping
5398c2ecf20Sopenharmony_ci			 * a 'max_hop' distance, but still failed.
5408c2ecf20Sopenharmony_ci			 */
5418c2ecf20Sopenharmony_ci			if ((i - base_bit) > max_hop) {
5428c2ecf20Sopenharmony_ci				*phys_cpos = 0;
5438c2ecf20Sopenharmony_ci				break;
5448c2ecf20Sopenharmony_ci			}
5458c2ecf20Sopenharmony_ci
5468c2ecf20Sopenharmony_ci			if (last_free_bits)
5478c2ecf20Sopenharmony_ci				last_free_bits = 0;
5488c2ecf20Sopenharmony_ci
5498c2ecf20Sopenharmony_ci			continue;
5508c2ecf20Sopenharmony_ci		} else
5518c2ecf20Sopenharmony_ci			last_free_bits++;
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci		if (last_free_bits == move_len) {
5548c2ecf20Sopenharmony_ci			i -= move_len;
5558c2ecf20Sopenharmony_ci			*goal_bit = i;
5568c2ecf20Sopenharmony_ci			*phys_cpos = base_cpos + i;
5578c2ecf20Sopenharmony_ci			break;
5588c2ecf20Sopenharmony_ci		}
5598c2ecf20Sopenharmony_ci	}
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ci	mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos);
5628c2ecf20Sopenharmony_ci}
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_cistatic int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
5658c2ecf20Sopenharmony_ci			     u32 cpos, u32 phys_cpos, u32 *new_phys_cpos,
5668c2ecf20Sopenharmony_ci			     u32 len, int ext_flags)
5678c2ecf20Sopenharmony_ci{
5688c2ecf20Sopenharmony_ci	int ret, credits = 0, extra_blocks = 0, goal_bit = 0;
5698c2ecf20Sopenharmony_ci	handle_t *handle;
5708c2ecf20Sopenharmony_ci	struct inode *inode = context->inode;
5718c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5728c2ecf20Sopenharmony_ci	struct inode *tl_inode = osb->osb_tl_inode;
5738c2ecf20Sopenharmony_ci	struct inode *gb_inode = NULL;
5748c2ecf20Sopenharmony_ci	struct buffer_head *gb_bh = NULL;
5758c2ecf20Sopenharmony_ci	struct buffer_head *gd_bh = NULL;
5768c2ecf20Sopenharmony_ci	struct ocfs2_group_desc *gd;
5778c2ecf20Sopenharmony_ci	struct ocfs2_refcount_tree *ref_tree = NULL;
5788c2ecf20Sopenharmony_ci	u32 move_max_hop = ocfs2_blocks_to_clusters(inode->i_sb,
5798c2ecf20Sopenharmony_ci						    context->range->me_threshold);
5808c2ecf20Sopenharmony_ci	u64 phys_blkno, new_phys_blkno;
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_ci	phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_ci	if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) {
5858c2ecf20Sopenharmony_ci		BUG_ON(!ocfs2_is_refcount_inode(inode));
5868c2ecf20Sopenharmony_ci		BUG_ON(!context->refcount_loc);
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_ci		ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
5898c2ecf20Sopenharmony_ci					       &ref_tree, NULL);
5908c2ecf20Sopenharmony_ci		if (ret) {
5918c2ecf20Sopenharmony_ci			mlog_errno(ret);
5928c2ecf20Sopenharmony_ci			return ret;
5938c2ecf20Sopenharmony_ci		}
5948c2ecf20Sopenharmony_ci
5958c2ecf20Sopenharmony_ci		ret = ocfs2_prepare_refcount_change_for_del(inode,
5968c2ecf20Sopenharmony_ci							context->refcount_loc,
5978c2ecf20Sopenharmony_ci							phys_blkno,
5988c2ecf20Sopenharmony_ci							len,
5998c2ecf20Sopenharmony_ci							&credits,
6008c2ecf20Sopenharmony_ci							&extra_blocks);
6018c2ecf20Sopenharmony_ci		if (ret) {
6028c2ecf20Sopenharmony_ci			mlog_errno(ret);
6038c2ecf20Sopenharmony_ci			goto out;
6048c2ecf20Sopenharmony_ci		}
6058c2ecf20Sopenharmony_ci	}
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_ci	ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
6088c2ecf20Sopenharmony_ci						len, 1,
6098c2ecf20Sopenharmony_ci						&context->meta_ac,
6108c2ecf20Sopenharmony_ci						extra_blocks, &credits);
6118c2ecf20Sopenharmony_ci	if (ret) {
6128c2ecf20Sopenharmony_ci		mlog_errno(ret);
6138c2ecf20Sopenharmony_ci		goto out;
6148c2ecf20Sopenharmony_ci	}
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci	/*
6178c2ecf20Sopenharmony_ci	 * need to count 2 extra credits for global_bitmap inode and
6188c2ecf20Sopenharmony_ci	 * group descriptor.
6198c2ecf20Sopenharmony_ci	 */
6208c2ecf20Sopenharmony_ci	credits += OCFS2_INODE_UPDATE_CREDITS + 1;
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci	/*
6238c2ecf20Sopenharmony_ci	 * ocfs2_move_extent() didn't reserve any clusters in lock_allocators()
6248c2ecf20Sopenharmony_ci	 * logic, while we still need to lock the global_bitmap.
6258c2ecf20Sopenharmony_ci	 */
6268c2ecf20Sopenharmony_ci	gb_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE,
6278c2ecf20Sopenharmony_ci					       OCFS2_INVALID_SLOT);
6288c2ecf20Sopenharmony_ci	if (!gb_inode) {
6298c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "unable to get global_bitmap inode\n");
6308c2ecf20Sopenharmony_ci		ret = -EIO;
6318c2ecf20Sopenharmony_ci		goto out;
6328c2ecf20Sopenharmony_ci	}
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_ci	inode_lock(gb_inode);
6358c2ecf20Sopenharmony_ci
6368c2ecf20Sopenharmony_ci	ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
6378c2ecf20Sopenharmony_ci	if (ret) {
6388c2ecf20Sopenharmony_ci		mlog_errno(ret);
6398c2ecf20Sopenharmony_ci		goto out_unlock_gb_mutex;
6408c2ecf20Sopenharmony_ci	}
6418c2ecf20Sopenharmony_ci
6428c2ecf20Sopenharmony_ci	inode_lock(tl_inode);
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_ci	handle = ocfs2_start_trans(osb, credits);
6458c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
6468c2ecf20Sopenharmony_ci		ret = PTR_ERR(handle);
6478c2ecf20Sopenharmony_ci		mlog_errno(ret);
6488c2ecf20Sopenharmony_ci		goto out_unlock_tl_inode;
6498c2ecf20Sopenharmony_ci	}
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_ci	new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
6528c2ecf20Sopenharmony_ci	ret = ocfs2_find_victim_alloc_group(inode, new_phys_blkno,
6538c2ecf20Sopenharmony_ci					    GLOBAL_BITMAP_SYSTEM_INODE,
6548c2ecf20Sopenharmony_ci					    OCFS2_INVALID_SLOT,
6558c2ecf20Sopenharmony_ci					    &goal_bit, &gd_bh);
6568c2ecf20Sopenharmony_ci	if (ret) {
6578c2ecf20Sopenharmony_ci		mlog_errno(ret);
6588c2ecf20Sopenharmony_ci		goto out_commit;
6598c2ecf20Sopenharmony_ci	}
6608c2ecf20Sopenharmony_ci
6618c2ecf20Sopenharmony_ci	/*
6628c2ecf20Sopenharmony_ci	 * probe the victim cluster group to find a proper
6638c2ecf20Sopenharmony_ci	 * region to fit wanted movement, it even will perfrom
6648c2ecf20Sopenharmony_ci	 * a best-effort attempt by compromising to a threshold
6658c2ecf20Sopenharmony_ci	 * around the goal.
6668c2ecf20Sopenharmony_ci	 */
6678c2ecf20Sopenharmony_ci	ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop,
6688c2ecf20Sopenharmony_ci				new_phys_cpos);
6698c2ecf20Sopenharmony_ci	if (!*new_phys_cpos) {
6708c2ecf20Sopenharmony_ci		ret = -ENOSPC;
6718c2ecf20Sopenharmony_ci		goto out_commit;
6728c2ecf20Sopenharmony_ci	}
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_ci	ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos,
6758c2ecf20Sopenharmony_ci				  *new_phys_cpos, ext_flags);
6768c2ecf20Sopenharmony_ci	if (ret) {
6778c2ecf20Sopenharmony_ci		mlog_errno(ret);
6788c2ecf20Sopenharmony_ci		goto out_commit;
6798c2ecf20Sopenharmony_ci	}
6808c2ecf20Sopenharmony_ci
6818c2ecf20Sopenharmony_ci	gd = (struct ocfs2_group_desc *)gd_bh->b_data;
6828c2ecf20Sopenharmony_ci	ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len,
6838c2ecf20Sopenharmony_ci					       le16_to_cpu(gd->bg_chain));
6848c2ecf20Sopenharmony_ci	if (ret) {
6858c2ecf20Sopenharmony_ci		mlog_errno(ret);
6868c2ecf20Sopenharmony_ci		goto out_commit;
6878c2ecf20Sopenharmony_ci	}
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_ci	ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh,
6908c2ecf20Sopenharmony_ci					 goal_bit, len);
6918c2ecf20Sopenharmony_ci	if (ret) {
6928c2ecf20Sopenharmony_ci		ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len,
6938c2ecf20Sopenharmony_ci					       le16_to_cpu(gd->bg_chain));
6948c2ecf20Sopenharmony_ci		mlog_errno(ret);
6958c2ecf20Sopenharmony_ci	}
6968c2ecf20Sopenharmony_ci
6978c2ecf20Sopenharmony_ci	/*
6988c2ecf20Sopenharmony_ci	 * Here we should write the new page out first if we are
6998c2ecf20Sopenharmony_ci	 * in write-back mode.
7008c2ecf20Sopenharmony_ci	 */
7018c2ecf20Sopenharmony_ci	ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len);
7028c2ecf20Sopenharmony_ci	if (ret)
7038c2ecf20Sopenharmony_ci		mlog_errno(ret);
7048c2ecf20Sopenharmony_ci
7058c2ecf20Sopenharmony_ciout_commit:
7068c2ecf20Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
7078c2ecf20Sopenharmony_ci	brelse(gd_bh);
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_ciout_unlock_tl_inode:
7108c2ecf20Sopenharmony_ci	inode_unlock(tl_inode);
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	ocfs2_inode_unlock(gb_inode, 1);
7138c2ecf20Sopenharmony_ciout_unlock_gb_mutex:
7148c2ecf20Sopenharmony_ci	inode_unlock(gb_inode);
7158c2ecf20Sopenharmony_ci	brelse(gb_bh);
7168c2ecf20Sopenharmony_ci	iput(gb_inode);
7178c2ecf20Sopenharmony_ci
7188c2ecf20Sopenharmony_ciout:
7198c2ecf20Sopenharmony_ci	if (context->meta_ac) {
7208c2ecf20Sopenharmony_ci		ocfs2_free_alloc_context(context->meta_ac);
7218c2ecf20Sopenharmony_ci		context->meta_ac = NULL;
7228c2ecf20Sopenharmony_ci	}
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_ci	if (ref_tree)
7258c2ecf20Sopenharmony_ci		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci	return ret;
7288c2ecf20Sopenharmony_ci}
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_ci/*
7318c2ecf20Sopenharmony_ci * Helper to calculate the defraging length in one run according to threshold.
7328c2ecf20Sopenharmony_ci */
7338c2ecf20Sopenharmony_cistatic void ocfs2_calc_extent_defrag_len(u32 *alloc_size, u32 *len_defraged,
7348c2ecf20Sopenharmony_ci					 u32 threshold, int *skip)
7358c2ecf20Sopenharmony_ci{
7368c2ecf20Sopenharmony_ci	if ((*alloc_size + *len_defraged) < threshold) {
7378c2ecf20Sopenharmony_ci		/*
7388c2ecf20Sopenharmony_ci		 * proceed defragmentation until we meet the thresh
7398c2ecf20Sopenharmony_ci		 */
7408c2ecf20Sopenharmony_ci		*len_defraged += *alloc_size;
7418c2ecf20Sopenharmony_ci	} else if (*len_defraged == 0) {
7428c2ecf20Sopenharmony_ci		/*
7438c2ecf20Sopenharmony_ci		 * XXX: skip a large extent.
7448c2ecf20Sopenharmony_ci		 */
7458c2ecf20Sopenharmony_ci		*skip = 1;
7468c2ecf20Sopenharmony_ci	} else {
7478c2ecf20Sopenharmony_ci		/*
7488c2ecf20Sopenharmony_ci		 * split this extent to coalesce with former pieces as
7498c2ecf20Sopenharmony_ci		 * to reach the threshold.
7508c2ecf20Sopenharmony_ci		 *
7518c2ecf20Sopenharmony_ci		 * we're done here with one cycle of defragmentation
7528c2ecf20Sopenharmony_ci		 * in a size of 'thresh', resetting 'len_defraged'
7538c2ecf20Sopenharmony_ci		 * forces a new defragmentation.
7548c2ecf20Sopenharmony_ci		 */
7558c2ecf20Sopenharmony_ci		*alloc_size = threshold - *len_defraged;
7568c2ecf20Sopenharmony_ci		*len_defraged = 0;
7578c2ecf20Sopenharmony_ci	}
7588c2ecf20Sopenharmony_ci}
7598c2ecf20Sopenharmony_ci
7608c2ecf20Sopenharmony_cistatic int __ocfs2_move_extents_range(struct buffer_head *di_bh,
7618c2ecf20Sopenharmony_ci				struct ocfs2_move_extents_context *context)
7628c2ecf20Sopenharmony_ci{
7638c2ecf20Sopenharmony_ci	int ret = 0, flags, do_defrag, skip = 0;
7648c2ecf20Sopenharmony_ci	u32 cpos, phys_cpos, move_start, len_to_move, alloc_size;
7658c2ecf20Sopenharmony_ci	u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0;
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ci	struct inode *inode = context->inode;
7688c2ecf20Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
7698c2ecf20Sopenharmony_ci	struct ocfs2_move_extents *range = context->range;
7708c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	if ((i_size_read(inode) == 0) || (range->me_len == 0))
7738c2ecf20Sopenharmony_ci		return 0;
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_ci	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
7768c2ecf20Sopenharmony_ci		return 0;
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci	context->refcount_loc = le64_to_cpu(di->i_refcount_loc);
7798c2ecf20Sopenharmony_ci
7808c2ecf20Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh);
7818c2ecf20Sopenharmony_ci	ocfs2_init_dealloc_ctxt(&context->dealloc);
7828c2ecf20Sopenharmony_ci
7838c2ecf20Sopenharmony_ci	/*
7848c2ecf20Sopenharmony_ci	 * TO-DO XXX:
7858c2ecf20Sopenharmony_ci	 *
7868c2ecf20Sopenharmony_ci	 * - xattr extents.
7878c2ecf20Sopenharmony_ci	 */
7888c2ecf20Sopenharmony_ci
7898c2ecf20Sopenharmony_ci	do_defrag = context->auto_defrag;
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	/*
7928c2ecf20Sopenharmony_ci	 * extents moving happens in unit of clusters, for the sake
7938c2ecf20Sopenharmony_ci	 * of simplicity, we may ignore two clusters where 'byte_start'
7948c2ecf20Sopenharmony_ci	 * and 'byte_start + len' were within.
7958c2ecf20Sopenharmony_ci	 */
7968c2ecf20Sopenharmony_ci	move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start);
7978c2ecf20Sopenharmony_ci	len_to_move = (range->me_start + range->me_len) >>
7988c2ecf20Sopenharmony_ci						osb->s_clustersize_bits;
7998c2ecf20Sopenharmony_ci	if (len_to_move >= move_start)
8008c2ecf20Sopenharmony_ci		len_to_move -= move_start;
8018c2ecf20Sopenharmony_ci	else
8028c2ecf20Sopenharmony_ci		len_to_move = 0;
8038c2ecf20Sopenharmony_ci
8048c2ecf20Sopenharmony_ci	if (do_defrag) {
8058c2ecf20Sopenharmony_ci		defrag_thresh = range->me_threshold >> osb->s_clustersize_bits;
8068c2ecf20Sopenharmony_ci		if (defrag_thresh <= 1)
8078c2ecf20Sopenharmony_ci			goto done;
8088c2ecf20Sopenharmony_ci	} else
8098c2ecf20Sopenharmony_ci		new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
8108c2ecf20Sopenharmony_ci							 range->me_goal);
8118c2ecf20Sopenharmony_ci
8128c2ecf20Sopenharmony_ci	mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u, "
8138c2ecf20Sopenharmony_ci	     "thresh: %u\n",
8148c2ecf20Sopenharmony_ci	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
8158c2ecf20Sopenharmony_ci	     (unsigned long long)range->me_start,
8168c2ecf20Sopenharmony_ci	     (unsigned long long)range->me_len,
8178c2ecf20Sopenharmony_ci	     move_start, len_to_move, defrag_thresh);
8188c2ecf20Sopenharmony_ci
8198c2ecf20Sopenharmony_ci	cpos = move_start;
8208c2ecf20Sopenharmony_ci	while (len_to_move) {
8218c2ecf20Sopenharmony_ci		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &alloc_size,
8228c2ecf20Sopenharmony_ci					 &flags);
8238c2ecf20Sopenharmony_ci		if (ret) {
8248c2ecf20Sopenharmony_ci			mlog_errno(ret);
8258c2ecf20Sopenharmony_ci			goto out;
8268c2ecf20Sopenharmony_ci		}
8278c2ecf20Sopenharmony_ci
8288c2ecf20Sopenharmony_ci		if (alloc_size > len_to_move)
8298c2ecf20Sopenharmony_ci			alloc_size = len_to_move;
8308c2ecf20Sopenharmony_ci
8318c2ecf20Sopenharmony_ci		/*
8328c2ecf20Sopenharmony_ci		 * XXX: how to deal with a hole:
8338c2ecf20Sopenharmony_ci		 *
8348c2ecf20Sopenharmony_ci		 * - skip the hole of course
8358c2ecf20Sopenharmony_ci		 * - force a new defragmentation
8368c2ecf20Sopenharmony_ci		 */
8378c2ecf20Sopenharmony_ci		if (!phys_cpos) {
8388c2ecf20Sopenharmony_ci			if (do_defrag)
8398c2ecf20Sopenharmony_ci				len_defraged = 0;
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci			goto next;
8428c2ecf20Sopenharmony_ci		}
8438c2ecf20Sopenharmony_ci
8448c2ecf20Sopenharmony_ci		if (do_defrag) {
8458c2ecf20Sopenharmony_ci			ocfs2_calc_extent_defrag_len(&alloc_size, &len_defraged,
8468c2ecf20Sopenharmony_ci						     defrag_thresh, &skip);
8478c2ecf20Sopenharmony_ci			/*
8488c2ecf20Sopenharmony_ci			 * skip large extents
8498c2ecf20Sopenharmony_ci			 */
8508c2ecf20Sopenharmony_ci			if (skip) {
8518c2ecf20Sopenharmony_ci				skip = 0;
8528c2ecf20Sopenharmony_ci				goto next;
8538c2ecf20Sopenharmony_ci			}
8548c2ecf20Sopenharmony_ci
8558c2ecf20Sopenharmony_ci			mlog(0, "#Defrag: cpos: %u, phys_cpos: %u, "
8568c2ecf20Sopenharmony_ci			     "alloc_size: %u, len_defraged: %u\n",
8578c2ecf20Sopenharmony_ci			     cpos, phys_cpos, alloc_size, len_defraged);
8588c2ecf20Sopenharmony_ci
8598c2ecf20Sopenharmony_ci			ret = ocfs2_defrag_extent(context, cpos, phys_cpos,
8608c2ecf20Sopenharmony_ci						  &alloc_size, flags);
8618c2ecf20Sopenharmony_ci		} else {
8628c2ecf20Sopenharmony_ci			ret = ocfs2_move_extent(context, cpos, phys_cpos,
8638c2ecf20Sopenharmony_ci						&new_phys_cpos, alloc_size,
8648c2ecf20Sopenharmony_ci						flags);
8658c2ecf20Sopenharmony_ci
8668c2ecf20Sopenharmony_ci			new_phys_cpos += alloc_size;
8678c2ecf20Sopenharmony_ci		}
8688c2ecf20Sopenharmony_ci
8698c2ecf20Sopenharmony_ci		if (ret < 0) {
8708c2ecf20Sopenharmony_ci			mlog_errno(ret);
8718c2ecf20Sopenharmony_ci			goto out;
8728c2ecf20Sopenharmony_ci		}
8738c2ecf20Sopenharmony_ci
8748c2ecf20Sopenharmony_ci		context->clusters_moved += alloc_size;
8758c2ecf20Sopenharmony_cinext:
8768c2ecf20Sopenharmony_ci		cpos += alloc_size;
8778c2ecf20Sopenharmony_ci		len_to_move -= alloc_size;
8788c2ecf20Sopenharmony_ci	}
8798c2ecf20Sopenharmony_ci
8808c2ecf20Sopenharmony_cidone:
8818c2ecf20Sopenharmony_ci	range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE;
8828c2ecf20Sopenharmony_ci
8838c2ecf20Sopenharmony_ciout:
8848c2ecf20Sopenharmony_ci	range->me_moved_len = ocfs2_clusters_to_bytes(osb->sb,
8858c2ecf20Sopenharmony_ci						      context->clusters_moved);
8868c2ecf20Sopenharmony_ci	range->me_new_offset = ocfs2_clusters_to_bytes(osb->sb,
8878c2ecf20Sopenharmony_ci						       context->new_phys_cpos);
8888c2ecf20Sopenharmony_ci
8898c2ecf20Sopenharmony_ci	ocfs2_schedule_truncate_log_flush(osb, 1);
8908c2ecf20Sopenharmony_ci	ocfs2_run_deallocs(osb, &context->dealloc);
8918c2ecf20Sopenharmony_ci
8928c2ecf20Sopenharmony_ci	return ret;
8938c2ecf20Sopenharmony_ci}
8948c2ecf20Sopenharmony_ci
8958c2ecf20Sopenharmony_cistatic int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
8968c2ecf20Sopenharmony_ci{
8978c2ecf20Sopenharmony_ci	int status;
8988c2ecf20Sopenharmony_ci	handle_t *handle;
8998c2ecf20Sopenharmony_ci	struct inode *inode = context->inode;
9008c2ecf20Sopenharmony_ci	struct ocfs2_dinode *di;
9018c2ecf20Sopenharmony_ci	struct buffer_head *di_bh = NULL;
9028c2ecf20Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
9058c2ecf20Sopenharmony_ci		return -EROFS;
9068c2ecf20Sopenharmony_ci
9078c2ecf20Sopenharmony_ci	inode_lock(inode);
9088c2ecf20Sopenharmony_ci
9098c2ecf20Sopenharmony_ci	/*
9108c2ecf20Sopenharmony_ci	 * This prevents concurrent writes from other nodes
9118c2ecf20Sopenharmony_ci	 */
9128c2ecf20Sopenharmony_ci	status = ocfs2_rw_lock(inode, 1);
9138c2ecf20Sopenharmony_ci	if (status) {
9148c2ecf20Sopenharmony_ci		mlog_errno(status);
9158c2ecf20Sopenharmony_ci		goto out;
9168c2ecf20Sopenharmony_ci	}
9178c2ecf20Sopenharmony_ci
9188c2ecf20Sopenharmony_ci	status = ocfs2_inode_lock(inode, &di_bh, 1);
9198c2ecf20Sopenharmony_ci	if (status) {
9208c2ecf20Sopenharmony_ci		mlog_errno(status);
9218c2ecf20Sopenharmony_ci		goto out_rw_unlock;
9228c2ecf20Sopenharmony_ci	}
9238c2ecf20Sopenharmony_ci
9248c2ecf20Sopenharmony_ci	/*
9258c2ecf20Sopenharmony_ci	 * rememer ip_xattr_sem also needs to be held if necessary
9268c2ecf20Sopenharmony_ci	 */
9278c2ecf20Sopenharmony_ci	down_write(&OCFS2_I(inode)->ip_alloc_sem);
9288c2ecf20Sopenharmony_ci
9298c2ecf20Sopenharmony_ci	status = __ocfs2_move_extents_range(di_bh, context);
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_ci	up_write(&OCFS2_I(inode)->ip_alloc_sem);
9328c2ecf20Sopenharmony_ci	if (status) {
9338c2ecf20Sopenharmony_ci		mlog_errno(status);
9348c2ecf20Sopenharmony_ci		goto out_inode_unlock;
9358c2ecf20Sopenharmony_ci	}
9368c2ecf20Sopenharmony_ci
9378c2ecf20Sopenharmony_ci	/*
9388c2ecf20Sopenharmony_ci	 * We update ctime for these changes
9398c2ecf20Sopenharmony_ci	 */
9408c2ecf20Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
9418c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
9428c2ecf20Sopenharmony_ci		status = PTR_ERR(handle);
9438c2ecf20Sopenharmony_ci		mlog_errno(status);
9448c2ecf20Sopenharmony_ci		goto out_inode_unlock;
9458c2ecf20Sopenharmony_ci	}
9468c2ecf20Sopenharmony_ci
9478c2ecf20Sopenharmony_ci	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
9488c2ecf20Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_WRITE);
9498c2ecf20Sopenharmony_ci	if (status) {
9508c2ecf20Sopenharmony_ci		mlog_errno(status);
9518c2ecf20Sopenharmony_ci		goto out_commit;
9528c2ecf20Sopenharmony_ci	}
9538c2ecf20Sopenharmony_ci
9548c2ecf20Sopenharmony_ci	di = (struct ocfs2_dinode *)di_bh->b_data;
9558c2ecf20Sopenharmony_ci	inode->i_ctime = current_time(inode);
9568c2ecf20Sopenharmony_ci	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
9578c2ecf20Sopenharmony_ci	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
9588c2ecf20Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 0);
9598c2ecf20Sopenharmony_ci
9608c2ecf20Sopenharmony_ci	ocfs2_journal_dirty(handle, di_bh);
9618c2ecf20Sopenharmony_ci
9628c2ecf20Sopenharmony_ciout_commit:
9638c2ecf20Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
9648c2ecf20Sopenharmony_ci
9658c2ecf20Sopenharmony_ciout_inode_unlock:
9668c2ecf20Sopenharmony_ci	brelse(di_bh);
9678c2ecf20Sopenharmony_ci	ocfs2_inode_unlock(inode, 1);
9688c2ecf20Sopenharmony_ciout_rw_unlock:
9698c2ecf20Sopenharmony_ci	ocfs2_rw_unlock(inode, 1);
9708c2ecf20Sopenharmony_ciout:
9718c2ecf20Sopenharmony_ci	inode_unlock(inode);
9728c2ecf20Sopenharmony_ci
9738c2ecf20Sopenharmony_ci	return status;
9748c2ecf20Sopenharmony_ci}
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_ciint ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
9778c2ecf20Sopenharmony_ci{
9788c2ecf20Sopenharmony_ci	int status;
9798c2ecf20Sopenharmony_ci
9808c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(filp);
9818c2ecf20Sopenharmony_ci	struct ocfs2_move_extents range;
9828c2ecf20Sopenharmony_ci	struct ocfs2_move_extents_context *context;
9838c2ecf20Sopenharmony_ci
9848c2ecf20Sopenharmony_ci	if (!argp)
9858c2ecf20Sopenharmony_ci		return -EINVAL;
9868c2ecf20Sopenharmony_ci
9878c2ecf20Sopenharmony_ci	status = mnt_want_write_file(filp);
9888c2ecf20Sopenharmony_ci	if (status)
9898c2ecf20Sopenharmony_ci		return status;
9908c2ecf20Sopenharmony_ci
9918c2ecf20Sopenharmony_ci	if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) {
9928c2ecf20Sopenharmony_ci		status = -EPERM;
9938c2ecf20Sopenharmony_ci		goto out_drop;
9948c2ecf20Sopenharmony_ci	}
9958c2ecf20Sopenharmony_ci
9968c2ecf20Sopenharmony_ci	if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
9978c2ecf20Sopenharmony_ci		status = -EPERM;
9988c2ecf20Sopenharmony_ci		goto out_drop;
9998c2ecf20Sopenharmony_ci	}
10008c2ecf20Sopenharmony_ci
10018c2ecf20Sopenharmony_ci	context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS);
10028c2ecf20Sopenharmony_ci	if (!context) {
10038c2ecf20Sopenharmony_ci		status = -ENOMEM;
10048c2ecf20Sopenharmony_ci		mlog_errno(status);
10058c2ecf20Sopenharmony_ci		goto out_drop;
10068c2ecf20Sopenharmony_ci	}
10078c2ecf20Sopenharmony_ci
10088c2ecf20Sopenharmony_ci	context->inode = inode;
10098c2ecf20Sopenharmony_ci	context->file = filp;
10108c2ecf20Sopenharmony_ci
10118c2ecf20Sopenharmony_ci	if (copy_from_user(&range, argp, sizeof(range))) {
10128c2ecf20Sopenharmony_ci		status = -EFAULT;
10138c2ecf20Sopenharmony_ci		goto out_free;
10148c2ecf20Sopenharmony_ci	}
10158c2ecf20Sopenharmony_ci
10168c2ecf20Sopenharmony_ci	if (range.me_start > i_size_read(inode)) {
10178c2ecf20Sopenharmony_ci		status = -EINVAL;
10188c2ecf20Sopenharmony_ci		goto out_free;
10198c2ecf20Sopenharmony_ci	}
10208c2ecf20Sopenharmony_ci
10218c2ecf20Sopenharmony_ci	if (range.me_start + range.me_len > i_size_read(inode))
10228c2ecf20Sopenharmony_ci			range.me_len = i_size_read(inode) - range.me_start;
10238c2ecf20Sopenharmony_ci
10248c2ecf20Sopenharmony_ci	context->range = &range;
10258c2ecf20Sopenharmony_ci
10268c2ecf20Sopenharmony_ci	/*
10278c2ecf20Sopenharmony_ci	 * ok, the default theshold for the defragmentation
10288c2ecf20Sopenharmony_ci	 * is 1M, since our maximum clustersize was 1M also.
10298c2ecf20Sopenharmony_ci	 * any thought?
10308c2ecf20Sopenharmony_ci	 */
10318c2ecf20Sopenharmony_ci	if (!range.me_threshold)
10328c2ecf20Sopenharmony_ci		range.me_threshold = 1024 * 1024;
10338c2ecf20Sopenharmony_ci
10348c2ecf20Sopenharmony_ci	if (range.me_threshold > i_size_read(inode))
10358c2ecf20Sopenharmony_ci		range.me_threshold = i_size_read(inode);
10368c2ecf20Sopenharmony_ci
10378c2ecf20Sopenharmony_ci	if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
10388c2ecf20Sopenharmony_ci		context->auto_defrag = 1;
10398c2ecf20Sopenharmony_ci
10408c2ecf20Sopenharmony_ci		if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG)
10418c2ecf20Sopenharmony_ci			context->partial = 1;
10428c2ecf20Sopenharmony_ci	} else {
10438c2ecf20Sopenharmony_ci		/*
10448c2ecf20Sopenharmony_ci		 * first best-effort attempt to validate and adjust the goal
10458c2ecf20Sopenharmony_ci		 * (physical address in block), while it can't guarantee later
10468c2ecf20Sopenharmony_ci		 * operation can succeed all the time since global_bitmap may
10478c2ecf20Sopenharmony_ci		 * change a bit over time.
10488c2ecf20Sopenharmony_ci		 */
10498c2ecf20Sopenharmony_ci
10508c2ecf20Sopenharmony_ci		status = ocfs2_validate_and_adjust_move_goal(inode, &range);
10518c2ecf20Sopenharmony_ci		if (status)
10528c2ecf20Sopenharmony_ci			goto out_copy;
10538c2ecf20Sopenharmony_ci	}
10548c2ecf20Sopenharmony_ci
10558c2ecf20Sopenharmony_ci	status = ocfs2_move_extents(context);
10568c2ecf20Sopenharmony_ci	if (status)
10578c2ecf20Sopenharmony_ci		mlog_errno(status);
10588c2ecf20Sopenharmony_ciout_copy:
10598c2ecf20Sopenharmony_ci	/*
10608c2ecf20Sopenharmony_ci	 * movement/defragmentation may end up being partially completed,
10618c2ecf20Sopenharmony_ci	 * that's the reason why we need to return userspace the finished
10628c2ecf20Sopenharmony_ci	 * length and new_offset even if failure happens somewhere.
10638c2ecf20Sopenharmony_ci	 */
10648c2ecf20Sopenharmony_ci	if (copy_to_user(argp, &range, sizeof(range)))
10658c2ecf20Sopenharmony_ci		status = -EFAULT;
10668c2ecf20Sopenharmony_ci
10678c2ecf20Sopenharmony_ciout_free:
10688c2ecf20Sopenharmony_ci	kfree(context);
10698c2ecf20Sopenharmony_ciout_drop:
10708c2ecf20Sopenharmony_ci	mnt_drop_write_file(filp);
10718c2ecf20Sopenharmony_ci
10728c2ecf20Sopenharmony_ci	return status;
10738c2ecf20Sopenharmony_ci}
1074