18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*- 38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0: 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * move_extents.c 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Copyright (C) 2011 Oracle. All rights reserved. 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci#include <linux/fs.h> 108c2ecf20Sopenharmony_ci#include <linux/types.h> 118c2ecf20Sopenharmony_ci#include <linux/mount.h> 128c2ecf20Sopenharmony_ci#include <linux/swap.h> 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include <cluster/masklog.h> 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#include "ocfs2.h" 178c2ecf20Sopenharmony_ci#include "ocfs2_ioctl.h" 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#include "alloc.h" 208c2ecf20Sopenharmony_ci#include "localalloc.h" 218c2ecf20Sopenharmony_ci#include "aops.h" 228c2ecf20Sopenharmony_ci#include "dlmglue.h" 238c2ecf20Sopenharmony_ci#include "extent_map.h" 248c2ecf20Sopenharmony_ci#include "inode.h" 258c2ecf20Sopenharmony_ci#include "journal.h" 268c2ecf20Sopenharmony_ci#include "suballoc.h" 278c2ecf20Sopenharmony_ci#include "uptodate.h" 288c2ecf20Sopenharmony_ci#include "super.h" 298c2ecf20Sopenharmony_ci#include "dir.h" 308c2ecf20Sopenharmony_ci#include "buffer_head_io.h" 318c2ecf20Sopenharmony_ci#include "sysfile.h" 328c2ecf20Sopenharmony_ci#include "refcounttree.h" 338c2ecf20Sopenharmony_ci#include "move_extents.h" 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_cistruct ocfs2_move_extents_context { 368c2ecf20Sopenharmony_ci struct inode *inode; 378c2ecf20Sopenharmony_ci struct file *file; 388c2ecf20Sopenharmony_ci int auto_defrag; 398c2ecf20Sopenharmony_ci int partial; 408c2ecf20Sopenharmony_ci int credits; 418c2ecf20Sopenharmony_ci u32 new_phys_cpos; 428c2ecf20Sopenharmony_ci u32 clusters_moved; 438c2ecf20Sopenharmony_ci u64 refcount_loc; 448c2ecf20Sopenharmony_ci struct ocfs2_move_extents *range; 458c2ecf20Sopenharmony_ci struct ocfs2_extent_tree et; 468c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *meta_ac; 478c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *data_ac; 488c2ecf20Sopenharmony_ci struct ocfs2_cached_dealloc_ctxt dealloc; 498c2ecf20Sopenharmony_ci}; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_cistatic int __ocfs2_move_extent(handle_t *handle, 528c2ecf20Sopenharmony_ci struct ocfs2_move_extents_context *context, 538c2ecf20Sopenharmony_ci u32 cpos, u32 len, u32 p_cpos, u32 new_p_cpos, 548c2ecf20Sopenharmony_ci int ext_flags) 558c2ecf20Sopenharmony_ci{ 568c2ecf20Sopenharmony_ci int ret = 0, index; 578c2ecf20Sopenharmony_ci struct inode *inode = context->inode; 588c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 598c2ecf20Sopenharmony_ci struct ocfs2_extent_rec *rec, replace_rec; 608c2ecf20Sopenharmony_ci struct ocfs2_path *path = NULL; 618c2ecf20Sopenharmony_ci struct ocfs2_extent_list *el; 628c2ecf20Sopenharmony_ci u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); 638c2ecf20Sopenharmony_ci u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos, 668c2ecf20Sopenharmony_ci p_cpos, new_p_cpos, len); 678c2ecf20Sopenharmony_ci if (ret) { 688c2ecf20Sopenharmony_ci mlog_errno(ret); 698c2ecf20Sopenharmony_ci goto out; 708c2ecf20Sopenharmony_ci } 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci memset(&replace_rec, 0, sizeof(replace_rec)); 738c2ecf20Sopenharmony_ci replace_rec.e_cpos = cpu_to_le32(cpos); 748c2ecf20Sopenharmony_ci replace_rec.e_leaf_clusters = cpu_to_le16(len); 758c2ecf20Sopenharmony_ci replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, 768c2ecf20Sopenharmony_ci new_p_cpos)); 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci path = ocfs2_new_path_from_et(&context->et); 798c2ecf20Sopenharmony_ci if (!path) { 808c2ecf20Sopenharmony_ci ret = -ENOMEM; 818c2ecf20Sopenharmony_ci mlog_errno(ret); 828c2ecf20Sopenharmony_ci goto out; 838c2ecf20Sopenharmony_ci } 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci ret = ocfs2_find_path(INODE_CACHE(inode), path, cpos); 868c2ecf20Sopenharmony_ci if (ret) { 878c2ecf20Sopenharmony_ci mlog_errno(ret); 888c2ecf20Sopenharmony_ci goto out; 898c2ecf20Sopenharmony_ci } 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci el = path_leaf_el(path); 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci index = ocfs2_search_extent_list(el, cpos); 948c2ecf20Sopenharmony_ci if (index == -1) { 958c2ecf20Sopenharmony_ci ret = ocfs2_error(inode->i_sb, 968c2ecf20Sopenharmony_ci "Inode %llu has an extent at cpos %u which can no longer be found\n", 978c2ecf20Sopenharmony_ci (unsigned long long)ino, cpos); 988c2ecf20Sopenharmony_ci goto out; 998c2ecf20Sopenharmony_ci } 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci rec = &el->l_recs[index]; 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci BUG_ON(ext_flags != rec->e_flags); 1048c2ecf20Sopenharmony_ci /* 1058c2ecf20Sopenharmony_ci * after moving/defraging to new location, the extent is not going 1068c2ecf20Sopenharmony_ci * to be refcounted anymore. 1078c2ecf20Sopenharmony_ci */ 1088c2ecf20Sopenharmony_ci replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED; 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci ret = ocfs2_split_extent(handle, &context->et, path, index, 1118c2ecf20Sopenharmony_ci &replace_rec, context->meta_ac, 1128c2ecf20Sopenharmony_ci &context->dealloc); 1138c2ecf20Sopenharmony_ci if (ret) { 1148c2ecf20Sopenharmony_ci mlog_errno(ret); 1158c2ecf20Sopenharmony_ci goto out; 1168c2ecf20Sopenharmony_ci } 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci context->new_phys_cpos = new_p_cpos; 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci /* 1218c2ecf20Sopenharmony_ci * need I to append truncate log for old clusters? 1228c2ecf20Sopenharmony_ci */ 1238c2ecf20Sopenharmony_ci if (old_blkno) { 1248c2ecf20Sopenharmony_ci if (ext_flags & OCFS2_EXT_REFCOUNTED) 1258c2ecf20Sopenharmony_ci ret = ocfs2_decrease_refcount(inode, handle, 1268c2ecf20Sopenharmony_ci ocfs2_blocks_to_clusters(osb->sb, 1278c2ecf20Sopenharmony_ci old_blkno), 1288c2ecf20Sopenharmony_ci len, context->meta_ac, 1298c2ecf20Sopenharmony_ci &context->dealloc, 1); 1308c2ecf20Sopenharmony_ci else 1318c2ecf20Sopenharmony_ci ret = ocfs2_truncate_log_append(osb, handle, 1328c2ecf20Sopenharmony_ci old_blkno, len); 1338c2ecf20Sopenharmony_ci } 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci ocfs2_update_inode_fsync_trans(handle, inode, 0); 1368c2ecf20Sopenharmony_ciout: 1378c2ecf20Sopenharmony_ci ocfs2_free_path(path); 1388c2ecf20Sopenharmony_ci return ret; 1398c2ecf20Sopenharmony_ci} 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci/* 1428c2ecf20Sopenharmony_ci * lock allocator, and reserve appropriate number of bits for 1438c2ecf20Sopenharmony_ci * meta blocks. 1448c2ecf20Sopenharmony_ci */ 1458c2ecf20Sopenharmony_cistatic int ocfs2_lock_meta_allocator_move_extents(struct inode *inode, 1468c2ecf20Sopenharmony_ci struct ocfs2_extent_tree *et, 1478c2ecf20Sopenharmony_ci u32 clusters_to_move, 1488c2ecf20Sopenharmony_ci u32 extents_to_split, 1498c2ecf20Sopenharmony_ci struct ocfs2_alloc_context **meta_ac, 1508c2ecf20Sopenharmony_ci int extra_blocks, 1518c2ecf20Sopenharmony_ci int *credits) 1528c2ecf20Sopenharmony_ci{ 1538c2ecf20Sopenharmony_ci int ret, num_free_extents; 1548c2ecf20Sopenharmony_ci unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move; 1558c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci num_free_extents = ocfs2_num_free_extents(et); 1588c2ecf20Sopenharmony_ci if (num_free_extents < 0) { 1598c2ecf20Sopenharmony_ci ret = num_free_extents; 1608c2ecf20Sopenharmony_ci mlog_errno(ret); 1618c2ecf20Sopenharmony_ci goto out; 1628c2ecf20Sopenharmony_ci } 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci if (!num_free_extents || 1658c2ecf20Sopenharmony_ci (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) 1668c2ecf20Sopenharmony_ci extra_blocks += ocfs2_extend_meta_needed(et->et_root_el); 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, meta_ac); 1698c2ecf20Sopenharmony_ci if (ret) { 1708c2ecf20Sopenharmony_ci mlog_errno(ret); 1718c2ecf20Sopenharmony_ci goto out; 1728c2ecf20Sopenharmony_ci } 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el); 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n", 1788c2ecf20Sopenharmony_ci extra_blocks, clusters_to_move, *credits); 1798c2ecf20Sopenharmony_ciout: 1808c2ecf20Sopenharmony_ci if (ret) { 1818c2ecf20Sopenharmony_ci if (*meta_ac) { 1828c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(*meta_ac); 1838c2ecf20Sopenharmony_ci *meta_ac = NULL; 1848c2ecf20Sopenharmony_ci } 1858c2ecf20Sopenharmony_ci } 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci return ret; 1888c2ecf20Sopenharmony_ci} 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci/* 1918c2ecf20Sopenharmony_ci * Using one journal handle to guarantee the data consistency in case 1928c2ecf20Sopenharmony_ci * crash happens anywhere. 1938c2ecf20Sopenharmony_ci * 1948c2ecf20Sopenharmony_ci * XXX: defrag can end up with finishing partial extent as requested, 1958c2ecf20Sopenharmony_ci * due to not enough contiguous clusters can be found in allocator. 1968c2ecf20Sopenharmony_ci */ 1978c2ecf20Sopenharmony_cistatic int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, 1988c2ecf20Sopenharmony_ci u32 cpos, u32 phys_cpos, u32 *len, int ext_flags) 1998c2ecf20Sopenharmony_ci{ 2008c2ecf20Sopenharmony_ci int ret, credits = 0, extra_blocks = 0, partial = context->partial; 2018c2ecf20Sopenharmony_ci handle_t *handle; 2028c2ecf20Sopenharmony_ci struct inode *inode = context->inode; 2038c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2048c2ecf20Sopenharmony_ci struct inode *tl_inode = osb->osb_tl_inode; 2058c2ecf20Sopenharmony_ci struct ocfs2_refcount_tree *ref_tree = NULL; 2068c2ecf20Sopenharmony_ci u32 new_phys_cpos, new_len; 2078c2ecf20Sopenharmony_ci u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 2088c2ecf20Sopenharmony_ci int need_free = 0; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { 2118c2ecf20Sopenharmony_ci BUG_ON(!ocfs2_is_refcount_inode(inode)); 2128c2ecf20Sopenharmony_ci BUG_ON(!context->refcount_loc); 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, 2158c2ecf20Sopenharmony_ci &ref_tree, NULL); 2168c2ecf20Sopenharmony_ci if (ret) { 2178c2ecf20Sopenharmony_ci mlog_errno(ret); 2188c2ecf20Sopenharmony_ci return ret; 2198c2ecf20Sopenharmony_ci } 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci ret = ocfs2_prepare_refcount_change_for_del(inode, 2228c2ecf20Sopenharmony_ci context->refcount_loc, 2238c2ecf20Sopenharmony_ci phys_blkno, 2248c2ecf20Sopenharmony_ci *len, 2258c2ecf20Sopenharmony_ci &credits, 2268c2ecf20Sopenharmony_ci &extra_blocks); 2278c2ecf20Sopenharmony_ci if (ret) { 2288c2ecf20Sopenharmony_ci mlog_errno(ret); 2298c2ecf20Sopenharmony_ci goto out; 2308c2ecf20Sopenharmony_ci } 2318c2ecf20Sopenharmony_ci } 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, 2348c2ecf20Sopenharmony_ci *len, 1, 2358c2ecf20Sopenharmony_ci &context->meta_ac, 2368c2ecf20Sopenharmony_ci extra_blocks, &credits); 2378c2ecf20Sopenharmony_ci if (ret) { 2388c2ecf20Sopenharmony_ci mlog_errno(ret); 2398c2ecf20Sopenharmony_ci goto out; 2408c2ecf20Sopenharmony_ci } 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci /* 2438c2ecf20Sopenharmony_ci * should be using allocation reservation strategy there? 2448c2ecf20Sopenharmony_ci * 2458c2ecf20Sopenharmony_ci * if (context->data_ac) 2468c2ecf20Sopenharmony_ci * context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; 2478c2ecf20Sopenharmony_ci */ 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci inode_lock(tl_inode); 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci if (ocfs2_truncate_log_needs_flush(osb)) { 2528c2ecf20Sopenharmony_ci ret = __ocfs2_flush_truncate_log(osb); 2538c2ecf20Sopenharmony_ci if (ret < 0) { 2548c2ecf20Sopenharmony_ci mlog_errno(ret); 2558c2ecf20Sopenharmony_ci goto out_unlock_mutex; 2568c2ecf20Sopenharmony_ci } 2578c2ecf20Sopenharmony_ci } 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci /* 2608c2ecf20Sopenharmony_ci * Make sure ocfs2_reserve_cluster is called after 2618c2ecf20Sopenharmony_ci * __ocfs2_flush_truncate_log, otherwise, dead lock may happen. 2628c2ecf20Sopenharmony_ci * 2638c2ecf20Sopenharmony_ci * If ocfs2_reserve_cluster is called 2648c2ecf20Sopenharmony_ci * before __ocfs2_flush_truncate_log, dead lock on global bitmap 2658c2ecf20Sopenharmony_ci * may happen. 2668c2ecf20Sopenharmony_ci * 2678c2ecf20Sopenharmony_ci */ 2688c2ecf20Sopenharmony_ci ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac); 2698c2ecf20Sopenharmony_ci if (ret) { 2708c2ecf20Sopenharmony_ci mlog_errno(ret); 2718c2ecf20Sopenharmony_ci goto out_unlock_mutex; 2728c2ecf20Sopenharmony_ci } 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci handle = ocfs2_start_trans(osb, credits); 2758c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 2768c2ecf20Sopenharmony_ci ret = PTR_ERR(handle); 2778c2ecf20Sopenharmony_ci mlog_errno(ret); 2788c2ecf20Sopenharmony_ci goto out_unlock_mutex; 2798c2ecf20Sopenharmony_ci } 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len, 2828c2ecf20Sopenharmony_ci &new_phys_cpos, &new_len); 2838c2ecf20Sopenharmony_ci if (ret) { 2848c2ecf20Sopenharmony_ci mlog_errno(ret); 2858c2ecf20Sopenharmony_ci goto out_commit; 2868c2ecf20Sopenharmony_ci } 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci /* 2898c2ecf20Sopenharmony_ci * allowing partial extent moving is kind of 'pros and cons', it makes 2908c2ecf20Sopenharmony_ci * whole defragmentation less likely to fail, on the contrary, the bad 2918c2ecf20Sopenharmony_ci * thing is it may make the fs even more fragmented after moving, let 2928c2ecf20Sopenharmony_ci * userspace make a good decision here. 2938c2ecf20Sopenharmony_ci */ 2948c2ecf20Sopenharmony_ci if (new_len != *len) { 2958c2ecf20Sopenharmony_ci mlog(0, "len_claimed: %u, len: %u\n", new_len, *len); 2968c2ecf20Sopenharmony_ci if (!partial) { 2978c2ecf20Sopenharmony_ci context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; 2988c2ecf20Sopenharmony_ci ret = -ENOSPC; 2998c2ecf20Sopenharmony_ci need_free = 1; 3008c2ecf20Sopenharmony_ci goto out_commit; 3018c2ecf20Sopenharmony_ci } 3028c2ecf20Sopenharmony_ci } 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, 3058c2ecf20Sopenharmony_ci phys_cpos, new_phys_cpos); 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_ci ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos, 3088c2ecf20Sopenharmony_ci new_phys_cpos, ext_flags); 3098c2ecf20Sopenharmony_ci if (ret) 3108c2ecf20Sopenharmony_ci mlog_errno(ret); 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci if (partial && (new_len != *len)) 3138c2ecf20Sopenharmony_ci *len = new_len; 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci /* 3168c2ecf20Sopenharmony_ci * Here we should write the new page out first if we are 3178c2ecf20Sopenharmony_ci * in write-back mode. 3188c2ecf20Sopenharmony_ci */ 3198c2ecf20Sopenharmony_ci ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len); 3208c2ecf20Sopenharmony_ci if (ret) 3218c2ecf20Sopenharmony_ci mlog_errno(ret); 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_ciout_commit: 3248c2ecf20Sopenharmony_ci if (need_free && context->data_ac) { 3258c2ecf20Sopenharmony_ci struct ocfs2_alloc_context *data_ac = context->data_ac; 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL) 3288c2ecf20Sopenharmony_ci ocfs2_free_local_alloc_bits(osb, handle, data_ac, 3298c2ecf20Sopenharmony_ci new_phys_cpos, new_len); 3308c2ecf20Sopenharmony_ci else 3318c2ecf20Sopenharmony_ci ocfs2_free_clusters(handle, 3328c2ecf20Sopenharmony_ci data_ac->ac_inode, 3338c2ecf20Sopenharmony_ci data_ac->ac_bh, 3348c2ecf20Sopenharmony_ci ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos), 3358c2ecf20Sopenharmony_ci new_len); 3368c2ecf20Sopenharmony_ci } 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ciout_unlock_mutex: 3418c2ecf20Sopenharmony_ci inode_unlock(tl_inode); 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci if (context->data_ac) { 3448c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(context->data_ac); 3458c2ecf20Sopenharmony_ci context->data_ac = NULL; 3468c2ecf20Sopenharmony_ci } 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci if (context->meta_ac) { 3498c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(context->meta_ac); 3508c2ecf20Sopenharmony_ci context->meta_ac = NULL; 3518c2ecf20Sopenharmony_ci } 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ciout: 3548c2ecf20Sopenharmony_ci if (ref_tree) 3558c2ecf20Sopenharmony_ci ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci return ret; 3588c2ecf20Sopenharmony_ci} 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci/* 3618c2ecf20Sopenharmony_ci * find the victim alloc group, where #blkno fits. 3628c2ecf20Sopenharmony_ci */ 3638c2ecf20Sopenharmony_cistatic int ocfs2_find_victim_alloc_group(struct inode *inode, 3648c2ecf20Sopenharmony_ci u64 vict_blkno, 3658c2ecf20Sopenharmony_ci int type, int slot, 3668c2ecf20Sopenharmony_ci int *vict_bit, 3678c2ecf20Sopenharmony_ci struct buffer_head **ret_bh) 3688c2ecf20Sopenharmony_ci{ 3698c2ecf20Sopenharmony_ci int ret, i, bits_per_unit = 0; 3708c2ecf20Sopenharmony_ci u64 blkno; 3718c2ecf20Sopenharmony_ci char namebuf[40]; 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3748c2ecf20Sopenharmony_ci struct buffer_head *ac_bh = NULL, *gd_bh = NULL; 3758c2ecf20Sopenharmony_ci struct ocfs2_chain_list *cl; 3768c2ecf20Sopenharmony_ci struct ocfs2_chain_rec *rec; 3778c2ecf20Sopenharmony_ci struct ocfs2_dinode *ac_dinode; 3788c2ecf20Sopenharmony_ci struct ocfs2_group_desc *bg; 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot); 3818c2ecf20Sopenharmony_ci ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, 3828c2ecf20Sopenharmony_ci strlen(namebuf), &blkno); 3838c2ecf20Sopenharmony_ci if (ret) { 3848c2ecf20Sopenharmony_ci ret = -ENOENT; 3858c2ecf20Sopenharmony_ci goto out; 3868c2ecf20Sopenharmony_ci } 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci ret = ocfs2_read_blocks_sync(osb, blkno, 1, &ac_bh); 3898c2ecf20Sopenharmony_ci if (ret) { 3908c2ecf20Sopenharmony_ci mlog_errno(ret); 3918c2ecf20Sopenharmony_ci goto out; 3928c2ecf20Sopenharmony_ci } 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci ac_dinode = (struct ocfs2_dinode *)ac_bh->b_data; 3958c2ecf20Sopenharmony_ci cl = &(ac_dinode->id2.i_chain); 3968c2ecf20Sopenharmony_ci rec = &(cl->cl_recs[0]); 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci if (type == GLOBAL_BITMAP_SYSTEM_INODE) 3998c2ecf20Sopenharmony_ci bits_per_unit = osb->s_clustersize_bits - 4008c2ecf20Sopenharmony_ci inode->i_sb->s_blocksize_bits; 4018c2ecf20Sopenharmony_ci /* 4028c2ecf20Sopenharmony_ci * 'vict_blkno' was out of the valid range. 4038c2ecf20Sopenharmony_ci */ 4048c2ecf20Sopenharmony_ci if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || 4058c2ecf20Sopenharmony_ci (vict_blkno >= ((u64)le32_to_cpu(ac_dinode->id1.bitmap1.i_total) << 4068c2ecf20Sopenharmony_ci bits_per_unit))) { 4078c2ecf20Sopenharmony_ci ret = -EINVAL; 4088c2ecf20Sopenharmony_ci goto out; 4098c2ecf20Sopenharmony_ci } 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) { 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ci rec = &(cl->cl_recs[i]); 4148c2ecf20Sopenharmony_ci if (!rec) 4158c2ecf20Sopenharmony_ci continue; 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci bg = NULL; 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_ci do { 4208c2ecf20Sopenharmony_ci if (!bg) 4218c2ecf20Sopenharmony_ci blkno = le64_to_cpu(rec->c_blkno); 4228c2ecf20Sopenharmony_ci else 4238c2ecf20Sopenharmony_ci blkno = le64_to_cpu(bg->bg_next_group); 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci if (gd_bh) { 4268c2ecf20Sopenharmony_ci brelse(gd_bh); 4278c2ecf20Sopenharmony_ci gd_bh = NULL; 4288c2ecf20Sopenharmony_ci } 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci ret = ocfs2_read_blocks_sync(osb, blkno, 1, &gd_bh); 4318c2ecf20Sopenharmony_ci if (ret) { 4328c2ecf20Sopenharmony_ci mlog_errno(ret); 4338c2ecf20Sopenharmony_ci goto out; 4348c2ecf20Sopenharmony_ci } 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci bg = (struct ocfs2_group_desc *)gd_bh->b_data; 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_ci if (vict_blkno < (le64_to_cpu(bg->bg_blkno) + 4398c2ecf20Sopenharmony_ci (le16_to_cpu(bg->bg_bits) << bits_per_unit))) { 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci *ret_bh = gd_bh; 4428c2ecf20Sopenharmony_ci *vict_bit = (vict_blkno - blkno) >> 4438c2ecf20Sopenharmony_ci bits_per_unit; 4448c2ecf20Sopenharmony_ci mlog(0, "find the victim group: #%llu, " 4458c2ecf20Sopenharmony_ci "total_bits: %u, vict_bit: %u\n", 4468c2ecf20Sopenharmony_ci blkno, le16_to_cpu(bg->bg_bits), 4478c2ecf20Sopenharmony_ci *vict_bit); 4488c2ecf20Sopenharmony_ci goto out; 4498c2ecf20Sopenharmony_ci } 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci } while (le64_to_cpu(bg->bg_next_group)); 4528c2ecf20Sopenharmony_ci } 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci ret = -EINVAL; 4558c2ecf20Sopenharmony_ciout: 4568c2ecf20Sopenharmony_ci brelse(ac_bh); 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci /* 4598c2ecf20Sopenharmony_ci * caller has to release the gd_bh properly. 4608c2ecf20Sopenharmony_ci */ 4618c2ecf20Sopenharmony_ci return ret; 4628c2ecf20Sopenharmony_ci} 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci/* 4658c2ecf20Sopenharmony_ci * XXX: helper to validate and adjust moving goal. 4668c2ecf20Sopenharmony_ci */ 4678c2ecf20Sopenharmony_cistatic int ocfs2_validate_and_adjust_move_goal(struct inode *inode, 4688c2ecf20Sopenharmony_ci struct ocfs2_move_extents *range) 4698c2ecf20Sopenharmony_ci{ 4708c2ecf20Sopenharmony_ci int ret, goal_bit = 0; 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci struct buffer_head *gd_bh = NULL; 4738c2ecf20Sopenharmony_ci struct ocfs2_group_desc *bg; 4748c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4758c2ecf20Sopenharmony_ci int c_to_b = 1 << (osb->s_clustersize_bits - 4768c2ecf20Sopenharmony_ci inode->i_sb->s_blocksize_bits); 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_ci /* 4798c2ecf20Sopenharmony_ci * make goal become cluster aligned. 4808c2ecf20Sopenharmony_ci */ 4818c2ecf20Sopenharmony_ci range->me_goal = ocfs2_block_to_cluster_start(inode->i_sb, 4828c2ecf20Sopenharmony_ci range->me_goal); 4838c2ecf20Sopenharmony_ci /* 4848c2ecf20Sopenharmony_ci * validate goal sits within global_bitmap, and return the victim 4858c2ecf20Sopenharmony_ci * group desc 4868c2ecf20Sopenharmony_ci */ 4878c2ecf20Sopenharmony_ci ret = ocfs2_find_victim_alloc_group(inode, range->me_goal, 4888c2ecf20Sopenharmony_ci GLOBAL_BITMAP_SYSTEM_INODE, 4898c2ecf20Sopenharmony_ci OCFS2_INVALID_SLOT, 4908c2ecf20Sopenharmony_ci &goal_bit, &gd_bh); 4918c2ecf20Sopenharmony_ci if (ret) 4928c2ecf20Sopenharmony_ci goto out; 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci bg = (struct ocfs2_group_desc *)gd_bh->b_data; 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci /* 4978c2ecf20Sopenharmony_ci * moving goal is not allowd to start with a group desc blok(#0 blk) 4988c2ecf20Sopenharmony_ci * let's compromise to the latter cluster. 4998c2ecf20Sopenharmony_ci */ 5008c2ecf20Sopenharmony_ci if (range->me_goal == le64_to_cpu(bg->bg_blkno)) 5018c2ecf20Sopenharmony_ci range->me_goal += c_to_b; 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci /* 5048c2ecf20Sopenharmony_ci * movement is not gonna cross two groups. 5058c2ecf20Sopenharmony_ci */ 5068c2ecf20Sopenharmony_ci if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize < 5078c2ecf20Sopenharmony_ci range->me_len) { 5088c2ecf20Sopenharmony_ci ret = -EINVAL; 5098c2ecf20Sopenharmony_ci goto out; 5108c2ecf20Sopenharmony_ci } 5118c2ecf20Sopenharmony_ci /* 5128c2ecf20Sopenharmony_ci * more exact validations/adjustments will be performed later during 5138c2ecf20Sopenharmony_ci * moving operation for each extent range. 5148c2ecf20Sopenharmony_ci */ 5158c2ecf20Sopenharmony_ci mlog(0, "extents get ready to be moved to #%llu block\n", 5168c2ecf20Sopenharmony_ci range->me_goal); 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ciout: 5198c2ecf20Sopenharmony_ci brelse(gd_bh); 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci return ret; 5228c2ecf20Sopenharmony_ci} 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_cistatic void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh, 5258c2ecf20Sopenharmony_ci int *goal_bit, u32 move_len, u32 max_hop, 5268c2ecf20Sopenharmony_ci u32 *phys_cpos) 5278c2ecf20Sopenharmony_ci{ 5288c2ecf20Sopenharmony_ci int i, used, last_free_bits = 0, base_bit = *goal_bit; 5298c2ecf20Sopenharmony_ci struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; 5308c2ecf20Sopenharmony_ci u32 base_cpos = ocfs2_blocks_to_clusters(inode->i_sb, 5318c2ecf20Sopenharmony_ci le64_to_cpu(gd->bg_blkno)); 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci for (i = base_bit; i < le16_to_cpu(gd->bg_bits); i++) { 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci used = ocfs2_test_bit(i, (unsigned long *)gd->bg_bitmap); 5368c2ecf20Sopenharmony_ci if (used) { 5378c2ecf20Sopenharmony_ci /* 5388c2ecf20Sopenharmony_ci * we even tried searching the free chunk by jumping 5398c2ecf20Sopenharmony_ci * a 'max_hop' distance, but still failed. 5408c2ecf20Sopenharmony_ci */ 5418c2ecf20Sopenharmony_ci if ((i - base_bit) > max_hop) { 5428c2ecf20Sopenharmony_ci *phys_cpos = 0; 5438c2ecf20Sopenharmony_ci break; 5448c2ecf20Sopenharmony_ci } 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_ci if (last_free_bits) 5478c2ecf20Sopenharmony_ci last_free_bits = 0; 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci continue; 5508c2ecf20Sopenharmony_ci } else 5518c2ecf20Sopenharmony_ci last_free_bits++; 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci if (last_free_bits == move_len) { 5548c2ecf20Sopenharmony_ci i -= move_len; 5558c2ecf20Sopenharmony_ci *goal_bit = i; 5568c2ecf20Sopenharmony_ci *phys_cpos = base_cpos + i; 5578c2ecf20Sopenharmony_ci break; 5588c2ecf20Sopenharmony_ci } 5598c2ecf20Sopenharmony_ci } 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ci mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos); 5628c2ecf20Sopenharmony_ci} 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_cistatic int ocfs2_move_extent(struct ocfs2_move_extents_context *context, 5658c2ecf20Sopenharmony_ci u32 cpos, u32 phys_cpos, u32 *new_phys_cpos, 5668c2ecf20Sopenharmony_ci u32 len, int ext_flags) 5678c2ecf20Sopenharmony_ci{ 5688c2ecf20Sopenharmony_ci int ret, credits = 0, extra_blocks = 0, goal_bit = 0; 5698c2ecf20Sopenharmony_ci handle_t *handle; 5708c2ecf20Sopenharmony_ci struct inode *inode = context->inode; 5718c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5728c2ecf20Sopenharmony_ci struct inode *tl_inode = osb->osb_tl_inode; 5738c2ecf20Sopenharmony_ci struct inode *gb_inode = NULL; 5748c2ecf20Sopenharmony_ci struct buffer_head *gb_bh = NULL; 5758c2ecf20Sopenharmony_ci struct buffer_head *gd_bh = NULL; 5768c2ecf20Sopenharmony_ci struct ocfs2_group_desc *gd; 5778c2ecf20Sopenharmony_ci struct ocfs2_refcount_tree *ref_tree = NULL; 5788c2ecf20Sopenharmony_ci u32 move_max_hop = ocfs2_blocks_to_clusters(inode->i_sb, 5798c2ecf20Sopenharmony_ci context->range->me_threshold); 5808c2ecf20Sopenharmony_ci u64 phys_blkno, new_phys_blkno; 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { 5858c2ecf20Sopenharmony_ci BUG_ON(!ocfs2_is_refcount_inode(inode)); 5868c2ecf20Sopenharmony_ci BUG_ON(!context->refcount_loc); 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, 5898c2ecf20Sopenharmony_ci &ref_tree, NULL); 5908c2ecf20Sopenharmony_ci if (ret) { 5918c2ecf20Sopenharmony_ci mlog_errno(ret); 5928c2ecf20Sopenharmony_ci return ret; 5938c2ecf20Sopenharmony_ci } 5948c2ecf20Sopenharmony_ci 5958c2ecf20Sopenharmony_ci ret = ocfs2_prepare_refcount_change_for_del(inode, 5968c2ecf20Sopenharmony_ci context->refcount_loc, 5978c2ecf20Sopenharmony_ci phys_blkno, 5988c2ecf20Sopenharmony_ci len, 5998c2ecf20Sopenharmony_ci &credits, 6008c2ecf20Sopenharmony_ci &extra_blocks); 6018c2ecf20Sopenharmony_ci if (ret) { 6028c2ecf20Sopenharmony_ci mlog_errno(ret); 6038c2ecf20Sopenharmony_ci goto out; 6048c2ecf20Sopenharmony_ci } 6058c2ecf20Sopenharmony_ci } 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, 6088c2ecf20Sopenharmony_ci len, 1, 6098c2ecf20Sopenharmony_ci &context->meta_ac, 6108c2ecf20Sopenharmony_ci extra_blocks, &credits); 6118c2ecf20Sopenharmony_ci if (ret) { 6128c2ecf20Sopenharmony_ci mlog_errno(ret); 6138c2ecf20Sopenharmony_ci goto out; 6148c2ecf20Sopenharmony_ci } 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci /* 6178c2ecf20Sopenharmony_ci * need to count 2 extra credits for global_bitmap inode and 6188c2ecf20Sopenharmony_ci * group descriptor. 6198c2ecf20Sopenharmony_ci */ 6208c2ecf20Sopenharmony_ci credits += OCFS2_INODE_UPDATE_CREDITS + 1; 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_ci /* 6238c2ecf20Sopenharmony_ci * ocfs2_move_extent() didn't reserve any clusters in lock_allocators() 6248c2ecf20Sopenharmony_ci * logic, while we still need to lock the global_bitmap. 6258c2ecf20Sopenharmony_ci */ 6268c2ecf20Sopenharmony_ci gb_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, 6278c2ecf20Sopenharmony_ci OCFS2_INVALID_SLOT); 6288c2ecf20Sopenharmony_ci if (!gb_inode) { 6298c2ecf20Sopenharmony_ci mlog(ML_ERROR, "unable to get global_bitmap inode\n"); 6308c2ecf20Sopenharmony_ci ret = -EIO; 6318c2ecf20Sopenharmony_ci goto out; 6328c2ecf20Sopenharmony_ci } 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_ci inode_lock(gb_inode); 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); 6378c2ecf20Sopenharmony_ci if (ret) { 6388c2ecf20Sopenharmony_ci mlog_errno(ret); 6398c2ecf20Sopenharmony_ci goto out_unlock_gb_mutex; 6408c2ecf20Sopenharmony_ci } 6418c2ecf20Sopenharmony_ci 6428c2ecf20Sopenharmony_ci inode_lock(tl_inode); 6438c2ecf20Sopenharmony_ci 6448c2ecf20Sopenharmony_ci handle = ocfs2_start_trans(osb, credits); 6458c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 6468c2ecf20Sopenharmony_ci ret = PTR_ERR(handle); 6478c2ecf20Sopenharmony_ci mlog_errno(ret); 6488c2ecf20Sopenharmony_ci goto out_unlock_tl_inode; 6498c2ecf20Sopenharmony_ci } 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); 6528c2ecf20Sopenharmony_ci ret = ocfs2_find_victim_alloc_group(inode, new_phys_blkno, 6538c2ecf20Sopenharmony_ci GLOBAL_BITMAP_SYSTEM_INODE, 6548c2ecf20Sopenharmony_ci OCFS2_INVALID_SLOT, 6558c2ecf20Sopenharmony_ci &goal_bit, &gd_bh); 6568c2ecf20Sopenharmony_ci if (ret) { 6578c2ecf20Sopenharmony_ci mlog_errno(ret); 6588c2ecf20Sopenharmony_ci goto out_commit; 6598c2ecf20Sopenharmony_ci } 6608c2ecf20Sopenharmony_ci 6618c2ecf20Sopenharmony_ci /* 6628c2ecf20Sopenharmony_ci * probe the victim cluster group to find a proper 6638c2ecf20Sopenharmony_ci * region to fit wanted movement, it even will perfrom 6648c2ecf20Sopenharmony_ci * a best-effort attempt by compromising to a threshold 6658c2ecf20Sopenharmony_ci * around the goal. 6668c2ecf20Sopenharmony_ci */ 6678c2ecf20Sopenharmony_ci ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop, 6688c2ecf20Sopenharmony_ci new_phys_cpos); 6698c2ecf20Sopenharmony_ci if (!*new_phys_cpos) { 6708c2ecf20Sopenharmony_ci ret = -ENOSPC; 6718c2ecf20Sopenharmony_ci goto out_commit; 6728c2ecf20Sopenharmony_ci } 6738c2ecf20Sopenharmony_ci 6748c2ecf20Sopenharmony_ci ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos, 6758c2ecf20Sopenharmony_ci *new_phys_cpos, ext_flags); 6768c2ecf20Sopenharmony_ci if (ret) { 6778c2ecf20Sopenharmony_ci mlog_errno(ret); 6788c2ecf20Sopenharmony_ci goto out_commit; 6798c2ecf20Sopenharmony_ci } 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_ci gd = (struct ocfs2_group_desc *)gd_bh->b_data; 6828c2ecf20Sopenharmony_ci ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len, 6838c2ecf20Sopenharmony_ci le16_to_cpu(gd->bg_chain)); 6848c2ecf20Sopenharmony_ci if (ret) { 6858c2ecf20Sopenharmony_ci mlog_errno(ret); 6868c2ecf20Sopenharmony_ci goto out_commit; 6878c2ecf20Sopenharmony_ci } 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, 6908c2ecf20Sopenharmony_ci goal_bit, len); 6918c2ecf20Sopenharmony_ci if (ret) { 6928c2ecf20Sopenharmony_ci ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len, 6938c2ecf20Sopenharmony_ci le16_to_cpu(gd->bg_chain)); 6948c2ecf20Sopenharmony_ci mlog_errno(ret); 6958c2ecf20Sopenharmony_ci } 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci /* 6988c2ecf20Sopenharmony_ci * Here we should write the new page out first if we are 6998c2ecf20Sopenharmony_ci * in write-back mode. 7008c2ecf20Sopenharmony_ci */ 7018c2ecf20Sopenharmony_ci ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len); 7028c2ecf20Sopenharmony_ci if (ret) 7038c2ecf20Sopenharmony_ci mlog_errno(ret); 7048c2ecf20Sopenharmony_ci 7058c2ecf20Sopenharmony_ciout_commit: 7068c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 7078c2ecf20Sopenharmony_ci brelse(gd_bh); 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ciout_unlock_tl_inode: 7108c2ecf20Sopenharmony_ci inode_unlock(tl_inode); 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ci ocfs2_inode_unlock(gb_inode, 1); 7138c2ecf20Sopenharmony_ciout_unlock_gb_mutex: 7148c2ecf20Sopenharmony_ci inode_unlock(gb_inode); 7158c2ecf20Sopenharmony_ci brelse(gb_bh); 7168c2ecf20Sopenharmony_ci iput(gb_inode); 7178c2ecf20Sopenharmony_ci 7188c2ecf20Sopenharmony_ciout: 7198c2ecf20Sopenharmony_ci if (context->meta_ac) { 7208c2ecf20Sopenharmony_ci ocfs2_free_alloc_context(context->meta_ac); 7218c2ecf20Sopenharmony_ci context->meta_ac = NULL; 7228c2ecf20Sopenharmony_ci } 7238c2ecf20Sopenharmony_ci 7248c2ecf20Sopenharmony_ci if (ref_tree) 7258c2ecf20Sopenharmony_ci ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci return ret; 7288c2ecf20Sopenharmony_ci} 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci/* 7318c2ecf20Sopenharmony_ci * Helper to calculate the defraging length in one run according to threshold. 7328c2ecf20Sopenharmony_ci */ 7338c2ecf20Sopenharmony_cistatic void ocfs2_calc_extent_defrag_len(u32 *alloc_size, u32 *len_defraged, 7348c2ecf20Sopenharmony_ci u32 threshold, int *skip) 7358c2ecf20Sopenharmony_ci{ 7368c2ecf20Sopenharmony_ci if ((*alloc_size + *len_defraged) < threshold) { 7378c2ecf20Sopenharmony_ci /* 7388c2ecf20Sopenharmony_ci * proceed defragmentation until we meet the thresh 7398c2ecf20Sopenharmony_ci */ 7408c2ecf20Sopenharmony_ci *len_defraged += *alloc_size; 7418c2ecf20Sopenharmony_ci } else if (*len_defraged == 0) { 7428c2ecf20Sopenharmony_ci /* 7438c2ecf20Sopenharmony_ci * XXX: skip a large extent. 7448c2ecf20Sopenharmony_ci */ 7458c2ecf20Sopenharmony_ci *skip = 1; 7468c2ecf20Sopenharmony_ci } else { 7478c2ecf20Sopenharmony_ci /* 7488c2ecf20Sopenharmony_ci * split this extent to coalesce with former pieces as 7498c2ecf20Sopenharmony_ci * to reach the threshold. 7508c2ecf20Sopenharmony_ci * 7518c2ecf20Sopenharmony_ci * we're done here with one cycle of defragmentation 7528c2ecf20Sopenharmony_ci * in a size of 'thresh', resetting 'len_defraged' 7538c2ecf20Sopenharmony_ci * forces a new defragmentation. 7548c2ecf20Sopenharmony_ci */ 7558c2ecf20Sopenharmony_ci *alloc_size = threshold - *len_defraged; 7568c2ecf20Sopenharmony_ci *len_defraged = 0; 7578c2ecf20Sopenharmony_ci } 7588c2ecf20Sopenharmony_ci} 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_cistatic int __ocfs2_move_extents_range(struct buffer_head *di_bh, 7618c2ecf20Sopenharmony_ci struct ocfs2_move_extents_context *context) 7628c2ecf20Sopenharmony_ci{ 7638c2ecf20Sopenharmony_ci int ret = 0, flags, do_defrag, skip = 0; 7648c2ecf20Sopenharmony_ci u32 cpos, phys_cpos, move_start, len_to_move, alloc_size; 7658c2ecf20Sopenharmony_ci u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0; 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci struct inode *inode = context->inode; 7688c2ecf20Sopenharmony_ci struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 7698c2ecf20Sopenharmony_ci struct ocfs2_move_extents *range = context->range; 7708c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 7718c2ecf20Sopenharmony_ci 7728c2ecf20Sopenharmony_ci if ((i_size_read(inode) == 0) || (range->me_len == 0)) 7738c2ecf20Sopenharmony_ci return 0; 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_ci if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 7768c2ecf20Sopenharmony_ci return 0; 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_ci context->refcount_loc = le64_to_cpu(di->i_refcount_loc); 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_ci ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh); 7818c2ecf20Sopenharmony_ci ocfs2_init_dealloc_ctxt(&context->dealloc); 7828c2ecf20Sopenharmony_ci 7838c2ecf20Sopenharmony_ci /* 7848c2ecf20Sopenharmony_ci * TO-DO XXX: 7858c2ecf20Sopenharmony_ci * 7868c2ecf20Sopenharmony_ci * - xattr extents. 7878c2ecf20Sopenharmony_ci */ 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ci do_defrag = context->auto_defrag; 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_ci /* 7928c2ecf20Sopenharmony_ci * extents moving happens in unit of clusters, for the sake 7938c2ecf20Sopenharmony_ci * of simplicity, we may ignore two clusters where 'byte_start' 7948c2ecf20Sopenharmony_ci * and 'byte_start + len' were within. 7958c2ecf20Sopenharmony_ci */ 7968c2ecf20Sopenharmony_ci move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start); 7978c2ecf20Sopenharmony_ci len_to_move = (range->me_start + range->me_len) >> 7988c2ecf20Sopenharmony_ci osb->s_clustersize_bits; 7998c2ecf20Sopenharmony_ci if (len_to_move >= move_start) 8008c2ecf20Sopenharmony_ci len_to_move -= move_start; 8018c2ecf20Sopenharmony_ci else 8028c2ecf20Sopenharmony_ci len_to_move = 0; 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_ci if (do_defrag) { 8058c2ecf20Sopenharmony_ci defrag_thresh = range->me_threshold >> osb->s_clustersize_bits; 8068c2ecf20Sopenharmony_ci if (defrag_thresh <= 1) 8078c2ecf20Sopenharmony_ci goto done; 8088c2ecf20Sopenharmony_ci } else 8098c2ecf20Sopenharmony_ci new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, 8108c2ecf20Sopenharmony_ci range->me_goal); 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_ci mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u, " 8138c2ecf20Sopenharmony_ci "thresh: %u\n", 8148c2ecf20Sopenharmony_ci (unsigned long long)OCFS2_I(inode)->ip_blkno, 8158c2ecf20Sopenharmony_ci (unsigned long long)range->me_start, 8168c2ecf20Sopenharmony_ci (unsigned long long)range->me_len, 8178c2ecf20Sopenharmony_ci move_start, len_to_move, defrag_thresh); 8188c2ecf20Sopenharmony_ci 8198c2ecf20Sopenharmony_ci cpos = move_start; 8208c2ecf20Sopenharmony_ci while (len_to_move) { 8218c2ecf20Sopenharmony_ci ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &alloc_size, 8228c2ecf20Sopenharmony_ci &flags); 8238c2ecf20Sopenharmony_ci if (ret) { 8248c2ecf20Sopenharmony_ci mlog_errno(ret); 8258c2ecf20Sopenharmony_ci goto out; 8268c2ecf20Sopenharmony_ci } 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_ci if (alloc_size > len_to_move) 8298c2ecf20Sopenharmony_ci alloc_size = len_to_move; 8308c2ecf20Sopenharmony_ci 8318c2ecf20Sopenharmony_ci /* 8328c2ecf20Sopenharmony_ci * XXX: how to deal with a hole: 8338c2ecf20Sopenharmony_ci * 8348c2ecf20Sopenharmony_ci * - skip the hole of course 8358c2ecf20Sopenharmony_ci * - force a new defragmentation 8368c2ecf20Sopenharmony_ci */ 8378c2ecf20Sopenharmony_ci if (!phys_cpos) { 8388c2ecf20Sopenharmony_ci if (do_defrag) 8398c2ecf20Sopenharmony_ci len_defraged = 0; 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci goto next; 8428c2ecf20Sopenharmony_ci } 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci if (do_defrag) { 8458c2ecf20Sopenharmony_ci ocfs2_calc_extent_defrag_len(&alloc_size, &len_defraged, 8468c2ecf20Sopenharmony_ci defrag_thresh, &skip); 8478c2ecf20Sopenharmony_ci /* 8488c2ecf20Sopenharmony_ci * skip large extents 8498c2ecf20Sopenharmony_ci */ 8508c2ecf20Sopenharmony_ci if (skip) { 8518c2ecf20Sopenharmony_ci skip = 0; 8528c2ecf20Sopenharmony_ci goto next; 8538c2ecf20Sopenharmony_ci } 8548c2ecf20Sopenharmony_ci 8558c2ecf20Sopenharmony_ci mlog(0, "#Defrag: cpos: %u, phys_cpos: %u, " 8568c2ecf20Sopenharmony_ci "alloc_size: %u, len_defraged: %u\n", 8578c2ecf20Sopenharmony_ci cpos, phys_cpos, alloc_size, len_defraged); 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_ci ret = ocfs2_defrag_extent(context, cpos, phys_cpos, 8608c2ecf20Sopenharmony_ci &alloc_size, flags); 8618c2ecf20Sopenharmony_ci } else { 8628c2ecf20Sopenharmony_ci ret = ocfs2_move_extent(context, cpos, phys_cpos, 8638c2ecf20Sopenharmony_ci &new_phys_cpos, alloc_size, 8648c2ecf20Sopenharmony_ci flags); 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci new_phys_cpos += alloc_size; 8678c2ecf20Sopenharmony_ci } 8688c2ecf20Sopenharmony_ci 8698c2ecf20Sopenharmony_ci if (ret < 0) { 8708c2ecf20Sopenharmony_ci mlog_errno(ret); 8718c2ecf20Sopenharmony_ci goto out; 8728c2ecf20Sopenharmony_ci } 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci context->clusters_moved += alloc_size; 8758c2ecf20Sopenharmony_cinext: 8768c2ecf20Sopenharmony_ci cpos += alloc_size; 8778c2ecf20Sopenharmony_ci len_to_move -= alloc_size; 8788c2ecf20Sopenharmony_ci } 8798c2ecf20Sopenharmony_ci 8808c2ecf20Sopenharmony_cidone: 8818c2ecf20Sopenharmony_ci range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE; 8828c2ecf20Sopenharmony_ci 8838c2ecf20Sopenharmony_ciout: 8848c2ecf20Sopenharmony_ci range->me_moved_len = ocfs2_clusters_to_bytes(osb->sb, 8858c2ecf20Sopenharmony_ci context->clusters_moved); 8868c2ecf20Sopenharmony_ci range->me_new_offset = ocfs2_clusters_to_bytes(osb->sb, 8878c2ecf20Sopenharmony_ci context->new_phys_cpos); 8888c2ecf20Sopenharmony_ci 8898c2ecf20Sopenharmony_ci ocfs2_schedule_truncate_log_flush(osb, 1); 8908c2ecf20Sopenharmony_ci ocfs2_run_deallocs(osb, &context->dealloc); 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_ci return ret; 8938c2ecf20Sopenharmony_ci} 8948c2ecf20Sopenharmony_ci 8958c2ecf20Sopenharmony_cistatic int ocfs2_move_extents(struct ocfs2_move_extents_context *context) 8968c2ecf20Sopenharmony_ci{ 8978c2ecf20Sopenharmony_ci int status; 8988c2ecf20Sopenharmony_ci handle_t *handle; 8998c2ecf20Sopenharmony_ci struct inode *inode = context->inode; 9008c2ecf20Sopenharmony_ci struct ocfs2_dinode *di; 9018c2ecf20Sopenharmony_ci struct buffer_head *di_bh = NULL; 9028c2ecf20Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 9038c2ecf20Sopenharmony_ci 9048c2ecf20Sopenharmony_ci if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 9058c2ecf20Sopenharmony_ci return -EROFS; 9068c2ecf20Sopenharmony_ci 9078c2ecf20Sopenharmony_ci inode_lock(inode); 9088c2ecf20Sopenharmony_ci 9098c2ecf20Sopenharmony_ci /* 9108c2ecf20Sopenharmony_ci * This prevents concurrent writes from other nodes 9118c2ecf20Sopenharmony_ci */ 9128c2ecf20Sopenharmony_ci status = ocfs2_rw_lock(inode, 1); 9138c2ecf20Sopenharmony_ci if (status) { 9148c2ecf20Sopenharmony_ci mlog_errno(status); 9158c2ecf20Sopenharmony_ci goto out; 9168c2ecf20Sopenharmony_ci } 9178c2ecf20Sopenharmony_ci 9188c2ecf20Sopenharmony_ci status = ocfs2_inode_lock(inode, &di_bh, 1); 9198c2ecf20Sopenharmony_ci if (status) { 9208c2ecf20Sopenharmony_ci mlog_errno(status); 9218c2ecf20Sopenharmony_ci goto out_rw_unlock; 9228c2ecf20Sopenharmony_ci } 9238c2ecf20Sopenharmony_ci 9248c2ecf20Sopenharmony_ci /* 9258c2ecf20Sopenharmony_ci * rememer ip_xattr_sem also needs to be held if necessary 9268c2ecf20Sopenharmony_ci */ 9278c2ecf20Sopenharmony_ci down_write(&OCFS2_I(inode)->ip_alloc_sem); 9288c2ecf20Sopenharmony_ci 9298c2ecf20Sopenharmony_ci status = __ocfs2_move_extents_range(di_bh, context); 9308c2ecf20Sopenharmony_ci 9318c2ecf20Sopenharmony_ci up_write(&OCFS2_I(inode)->ip_alloc_sem); 9328c2ecf20Sopenharmony_ci if (status) { 9338c2ecf20Sopenharmony_ci mlog_errno(status); 9348c2ecf20Sopenharmony_ci goto out_inode_unlock; 9358c2ecf20Sopenharmony_ci } 9368c2ecf20Sopenharmony_ci 9378c2ecf20Sopenharmony_ci /* 9388c2ecf20Sopenharmony_ci * We update ctime for these changes 9398c2ecf20Sopenharmony_ci */ 9408c2ecf20Sopenharmony_ci handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 9418c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 9428c2ecf20Sopenharmony_ci status = PTR_ERR(handle); 9438c2ecf20Sopenharmony_ci mlog_errno(status); 9448c2ecf20Sopenharmony_ci goto out_inode_unlock; 9458c2ecf20Sopenharmony_ci } 9468c2ecf20Sopenharmony_ci 9478c2ecf20Sopenharmony_ci status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 9488c2ecf20Sopenharmony_ci OCFS2_JOURNAL_ACCESS_WRITE); 9498c2ecf20Sopenharmony_ci if (status) { 9508c2ecf20Sopenharmony_ci mlog_errno(status); 9518c2ecf20Sopenharmony_ci goto out_commit; 9528c2ecf20Sopenharmony_ci } 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_ci di = (struct ocfs2_dinode *)di_bh->b_data; 9558c2ecf20Sopenharmony_ci inode->i_ctime = current_time(inode); 9568c2ecf20Sopenharmony_ci di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 9578c2ecf20Sopenharmony_ci di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 9588c2ecf20Sopenharmony_ci ocfs2_update_inode_fsync_trans(handle, inode, 0); 9598c2ecf20Sopenharmony_ci 9608c2ecf20Sopenharmony_ci ocfs2_journal_dirty(handle, di_bh); 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ciout_commit: 9638c2ecf20Sopenharmony_ci ocfs2_commit_trans(osb, handle); 9648c2ecf20Sopenharmony_ci 9658c2ecf20Sopenharmony_ciout_inode_unlock: 9668c2ecf20Sopenharmony_ci brelse(di_bh); 9678c2ecf20Sopenharmony_ci ocfs2_inode_unlock(inode, 1); 9688c2ecf20Sopenharmony_ciout_rw_unlock: 9698c2ecf20Sopenharmony_ci ocfs2_rw_unlock(inode, 1); 9708c2ecf20Sopenharmony_ciout: 9718c2ecf20Sopenharmony_ci inode_unlock(inode); 9728c2ecf20Sopenharmony_ci 9738c2ecf20Sopenharmony_ci return status; 9748c2ecf20Sopenharmony_ci} 9758c2ecf20Sopenharmony_ci 9768c2ecf20Sopenharmony_ciint ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) 9778c2ecf20Sopenharmony_ci{ 9788c2ecf20Sopenharmony_ci int status; 9798c2ecf20Sopenharmony_ci 9808c2ecf20Sopenharmony_ci struct inode *inode = file_inode(filp); 9818c2ecf20Sopenharmony_ci struct ocfs2_move_extents range; 9828c2ecf20Sopenharmony_ci struct ocfs2_move_extents_context *context; 9838c2ecf20Sopenharmony_ci 9848c2ecf20Sopenharmony_ci if (!argp) 9858c2ecf20Sopenharmony_ci return -EINVAL; 9868c2ecf20Sopenharmony_ci 9878c2ecf20Sopenharmony_ci status = mnt_want_write_file(filp); 9888c2ecf20Sopenharmony_ci if (status) 9898c2ecf20Sopenharmony_ci return status; 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_ci if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) { 9928c2ecf20Sopenharmony_ci status = -EPERM; 9938c2ecf20Sopenharmony_ci goto out_drop; 9948c2ecf20Sopenharmony_ci } 9958c2ecf20Sopenharmony_ci 9968c2ecf20Sopenharmony_ci if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { 9978c2ecf20Sopenharmony_ci status = -EPERM; 9988c2ecf20Sopenharmony_ci goto out_drop; 9998c2ecf20Sopenharmony_ci } 10008c2ecf20Sopenharmony_ci 10018c2ecf20Sopenharmony_ci context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS); 10028c2ecf20Sopenharmony_ci if (!context) { 10038c2ecf20Sopenharmony_ci status = -ENOMEM; 10048c2ecf20Sopenharmony_ci mlog_errno(status); 10058c2ecf20Sopenharmony_ci goto out_drop; 10068c2ecf20Sopenharmony_ci } 10078c2ecf20Sopenharmony_ci 10088c2ecf20Sopenharmony_ci context->inode = inode; 10098c2ecf20Sopenharmony_ci context->file = filp; 10108c2ecf20Sopenharmony_ci 10118c2ecf20Sopenharmony_ci if (copy_from_user(&range, argp, sizeof(range))) { 10128c2ecf20Sopenharmony_ci status = -EFAULT; 10138c2ecf20Sopenharmony_ci goto out_free; 10148c2ecf20Sopenharmony_ci } 10158c2ecf20Sopenharmony_ci 10168c2ecf20Sopenharmony_ci if (range.me_start > i_size_read(inode)) { 10178c2ecf20Sopenharmony_ci status = -EINVAL; 10188c2ecf20Sopenharmony_ci goto out_free; 10198c2ecf20Sopenharmony_ci } 10208c2ecf20Sopenharmony_ci 10218c2ecf20Sopenharmony_ci if (range.me_start + range.me_len > i_size_read(inode)) 10228c2ecf20Sopenharmony_ci range.me_len = i_size_read(inode) - range.me_start; 10238c2ecf20Sopenharmony_ci 10248c2ecf20Sopenharmony_ci context->range = ⦥ 10258c2ecf20Sopenharmony_ci 10268c2ecf20Sopenharmony_ci /* 10278c2ecf20Sopenharmony_ci * ok, the default theshold for the defragmentation 10288c2ecf20Sopenharmony_ci * is 1M, since our maximum clustersize was 1M also. 10298c2ecf20Sopenharmony_ci * any thought? 10308c2ecf20Sopenharmony_ci */ 10318c2ecf20Sopenharmony_ci if (!range.me_threshold) 10328c2ecf20Sopenharmony_ci range.me_threshold = 1024 * 1024; 10338c2ecf20Sopenharmony_ci 10348c2ecf20Sopenharmony_ci if (range.me_threshold > i_size_read(inode)) 10358c2ecf20Sopenharmony_ci range.me_threshold = i_size_read(inode); 10368c2ecf20Sopenharmony_ci 10378c2ecf20Sopenharmony_ci if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) { 10388c2ecf20Sopenharmony_ci context->auto_defrag = 1; 10398c2ecf20Sopenharmony_ci 10408c2ecf20Sopenharmony_ci if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG) 10418c2ecf20Sopenharmony_ci context->partial = 1; 10428c2ecf20Sopenharmony_ci } else { 10438c2ecf20Sopenharmony_ci /* 10448c2ecf20Sopenharmony_ci * first best-effort attempt to validate and adjust the goal 10458c2ecf20Sopenharmony_ci * (physical address in block), while it can't guarantee later 10468c2ecf20Sopenharmony_ci * operation can succeed all the time since global_bitmap may 10478c2ecf20Sopenharmony_ci * change a bit over time. 10488c2ecf20Sopenharmony_ci */ 10498c2ecf20Sopenharmony_ci 10508c2ecf20Sopenharmony_ci status = ocfs2_validate_and_adjust_move_goal(inode, &range); 10518c2ecf20Sopenharmony_ci if (status) 10528c2ecf20Sopenharmony_ci goto out_copy; 10538c2ecf20Sopenharmony_ci } 10548c2ecf20Sopenharmony_ci 10558c2ecf20Sopenharmony_ci status = ocfs2_move_extents(context); 10568c2ecf20Sopenharmony_ci if (status) 10578c2ecf20Sopenharmony_ci mlog_errno(status); 10588c2ecf20Sopenharmony_ciout_copy: 10598c2ecf20Sopenharmony_ci /* 10608c2ecf20Sopenharmony_ci * movement/defragmentation may end up being partially completed, 10618c2ecf20Sopenharmony_ci * that's the reason why we need to return userspace the finished 10628c2ecf20Sopenharmony_ci * length and new_offset even if failure happens somewhere. 10638c2ecf20Sopenharmony_ci */ 10648c2ecf20Sopenharmony_ci if (copy_to_user(argp, &range, sizeof(range))) 10658c2ecf20Sopenharmony_ci status = -EFAULT; 10668c2ecf20Sopenharmony_ci 10678c2ecf20Sopenharmony_ciout_free: 10688c2ecf20Sopenharmony_ci kfree(context); 10698c2ecf20Sopenharmony_ciout_drop: 10708c2ecf20Sopenharmony_ci mnt_drop_write_file(filp); 10718c2ecf20Sopenharmony_ci 10728c2ecf20Sopenharmony_ci return status; 10738c2ecf20Sopenharmony_ci} 1074