162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * move_extents.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2011 Oracle. All rights reserved. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci#include <linux/fs.h> 862306a36Sopenharmony_ci#include <linux/types.h> 962306a36Sopenharmony_ci#include <linux/mount.h> 1062306a36Sopenharmony_ci#include <linux/swap.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <cluster/masklog.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include "ocfs2.h" 1562306a36Sopenharmony_ci#include "ocfs2_ioctl.h" 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#include "alloc.h" 1862306a36Sopenharmony_ci#include "localalloc.h" 1962306a36Sopenharmony_ci#include "aops.h" 2062306a36Sopenharmony_ci#include "dlmglue.h" 2162306a36Sopenharmony_ci#include "extent_map.h" 2262306a36Sopenharmony_ci#include "inode.h" 2362306a36Sopenharmony_ci#include "journal.h" 2462306a36Sopenharmony_ci#include "suballoc.h" 2562306a36Sopenharmony_ci#include "uptodate.h" 2662306a36Sopenharmony_ci#include "super.h" 2762306a36Sopenharmony_ci#include "dir.h" 2862306a36Sopenharmony_ci#include "buffer_head_io.h" 2962306a36Sopenharmony_ci#include "sysfile.h" 3062306a36Sopenharmony_ci#include "refcounttree.h" 3162306a36Sopenharmony_ci#include "move_extents.h" 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_cistruct ocfs2_move_extents_context { 3462306a36Sopenharmony_ci struct inode *inode; 3562306a36Sopenharmony_ci struct file *file; 3662306a36Sopenharmony_ci int auto_defrag; 3762306a36Sopenharmony_ci int partial; 3862306a36Sopenharmony_ci int credits; 3962306a36Sopenharmony_ci u32 new_phys_cpos; 4062306a36Sopenharmony_ci u32 clusters_moved; 4162306a36Sopenharmony_ci u64 refcount_loc; 4262306a36Sopenharmony_ci struct ocfs2_move_extents *range; 4362306a36Sopenharmony_ci struct ocfs2_extent_tree et; 4462306a36Sopenharmony_ci struct ocfs2_alloc_context *meta_ac; 4562306a36Sopenharmony_ci struct ocfs2_alloc_context *data_ac; 4662306a36Sopenharmony_ci struct ocfs2_cached_dealloc_ctxt dealloc; 4762306a36Sopenharmony_ci}; 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_cistatic int __ocfs2_move_extent(handle_t *handle, 5062306a36Sopenharmony_ci struct ocfs2_move_extents_context *context, 5162306a36Sopenharmony_ci u32 cpos, u32 len, u32 p_cpos, u32 new_p_cpos, 5262306a36Sopenharmony_ci int ext_flags) 5362306a36Sopenharmony_ci{ 5462306a36Sopenharmony_ci int ret = 0, index; 5562306a36Sopenharmony_ci struct inode *inode = context->inode; 5662306a36Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5762306a36Sopenharmony_ci struct ocfs2_extent_rec *rec, replace_rec; 5862306a36Sopenharmony_ci struct ocfs2_path *path = NULL; 5962306a36Sopenharmony_ci struct ocfs2_extent_list *el; 6062306a36Sopenharmony_ci u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); 6162306a36Sopenharmony_ci u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos, 6462306a36Sopenharmony_ci p_cpos, new_p_cpos, len); 6562306a36Sopenharmony_ci if (ret) { 6662306a36Sopenharmony_ci mlog_errno(ret); 6762306a36Sopenharmony_ci goto out; 6862306a36Sopenharmony_ci } 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci memset(&replace_rec, 0, sizeof(replace_rec)); 7162306a36Sopenharmony_ci replace_rec.e_cpos = cpu_to_le32(cpos); 7262306a36Sopenharmony_ci replace_rec.e_leaf_clusters = cpu_to_le16(len); 7362306a36Sopenharmony_ci replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, 7462306a36Sopenharmony_ci new_p_cpos)); 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci path = ocfs2_new_path_from_et(&context->et); 7762306a36Sopenharmony_ci if (!path) { 7862306a36Sopenharmony_ci ret = -ENOMEM; 7962306a36Sopenharmony_ci mlog_errno(ret); 8062306a36Sopenharmony_ci goto out; 8162306a36Sopenharmony_ci } 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci ret = ocfs2_find_path(INODE_CACHE(inode), path, cpos); 8462306a36Sopenharmony_ci if (ret) { 8562306a36Sopenharmony_ci mlog_errno(ret); 8662306a36Sopenharmony_ci goto out; 8762306a36Sopenharmony_ci } 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci el = path_leaf_el(path); 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci index = ocfs2_search_extent_list(el, cpos); 9262306a36Sopenharmony_ci if (index == -1) { 9362306a36Sopenharmony_ci ret = ocfs2_error(inode->i_sb, 9462306a36Sopenharmony_ci "Inode %llu has an extent at cpos %u which can no longer be found\n", 9562306a36Sopenharmony_ci (unsigned long long)ino, cpos); 9662306a36Sopenharmony_ci goto out; 9762306a36Sopenharmony_ci } 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci rec = &el->l_recs[index]; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci BUG_ON(ext_flags != rec->e_flags); 10262306a36Sopenharmony_ci /* 10362306a36Sopenharmony_ci * after moving/defraging to new location, the extent is not going 10462306a36Sopenharmony_ci * to be refcounted anymore. 10562306a36Sopenharmony_ci */ 10662306a36Sopenharmony_ci replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED; 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci ret = ocfs2_split_extent(handle, &context->et, path, index, 10962306a36Sopenharmony_ci &replace_rec, context->meta_ac, 11062306a36Sopenharmony_ci &context->dealloc); 11162306a36Sopenharmony_ci if (ret) { 11262306a36Sopenharmony_ci mlog_errno(ret); 11362306a36Sopenharmony_ci goto out; 11462306a36Sopenharmony_ci } 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci context->new_phys_cpos = new_p_cpos; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci /* 11962306a36Sopenharmony_ci * need I to append truncate log for old clusters? 12062306a36Sopenharmony_ci */ 12162306a36Sopenharmony_ci if (old_blkno) { 12262306a36Sopenharmony_ci if (ext_flags & OCFS2_EXT_REFCOUNTED) 12362306a36Sopenharmony_ci ret = ocfs2_decrease_refcount(inode, handle, 12462306a36Sopenharmony_ci ocfs2_blocks_to_clusters(osb->sb, 12562306a36Sopenharmony_ci old_blkno), 12662306a36Sopenharmony_ci len, context->meta_ac, 12762306a36Sopenharmony_ci &context->dealloc, 1); 12862306a36Sopenharmony_ci else 12962306a36Sopenharmony_ci ret = ocfs2_truncate_log_append(osb, handle, 13062306a36Sopenharmony_ci old_blkno, len); 13162306a36Sopenharmony_ci } 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci ocfs2_update_inode_fsync_trans(handle, inode, 0); 13462306a36Sopenharmony_ciout: 13562306a36Sopenharmony_ci ocfs2_free_path(path); 13662306a36Sopenharmony_ci return ret; 13762306a36Sopenharmony_ci} 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci/* 14062306a36Sopenharmony_ci * lock allocator, and reserve appropriate number of bits for 14162306a36Sopenharmony_ci * meta blocks. 14262306a36Sopenharmony_ci */ 14362306a36Sopenharmony_cistatic int ocfs2_lock_meta_allocator_move_extents(struct inode *inode, 14462306a36Sopenharmony_ci struct ocfs2_extent_tree *et, 14562306a36Sopenharmony_ci u32 clusters_to_move, 14662306a36Sopenharmony_ci u32 extents_to_split, 14762306a36Sopenharmony_ci struct ocfs2_alloc_context **meta_ac, 14862306a36Sopenharmony_ci int extra_blocks, 14962306a36Sopenharmony_ci int *credits) 15062306a36Sopenharmony_ci{ 15162306a36Sopenharmony_ci int ret, num_free_extents; 15262306a36Sopenharmony_ci unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move; 15362306a36Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci num_free_extents = ocfs2_num_free_extents(et); 15662306a36Sopenharmony_ci if (num_free_extents < 0) { 15762306a36Sopenharmony_ci ret = num_free_extents; 15862306a36Sopenharmony_ci mlog_errno(ret); 15962306a36Sopenharmony_ci goto out; 16062306a36Sopenharmony_ci } 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci if (!num_free_extents || 16362306a36Sopenharmony_ci (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) 16462306a36Sopenharmony_ci extra_blocks += ocfs2_extend_meta_needed(et->et_root_el); 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, meta_ac); 16762306a36Sopenharmony_ci if (ret) { 16862306a36Sopenharmony_ci mlog_errno(ret); 16962306a36Sopenharmony_ci goto out; 17062306a36Sopenharmony_ci } 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el); 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n", 17662306a36Sopenharmony_ci extra_blocks, clusters_to_move, *credits); 17762306a36Sopenharmony_ciout: 17862306a36Sopenharmony_ci if (ret) { 17962306a36Sopenharmony_ci if (*meta_ac) { 18062306a36Sopenharmony_ci ocfs2_free_alloc_context(*meta_ac); 18162306a36Sopenharmony_ci *meta_ac = NULL; 18262306a36Sopenharmony_ci } 18362306a36Sopenharmony_ci } 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci return ret; 18662306a36Sopenharmony_ci} 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci/* 18962306a36Sopenharmony_ci * Using one journal handle to guarantee the data consistency in case 19062306a36Sopenharmony_ci * crash happens anywhere. 19162306a36Sopenharmony_ci * 19262306a36Sopenharmony_ci * XXX: defrag can end up with finishing partial extent as requested, 19362306a36Sopenharmony_ci * due to not enough contiguous clusters can be found in allocator. 19462306a36Sopenharmony_ci */ 19562306a36Sopenharmony_cistatic int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, 19662306a36Sopenharmony_ci u32 cpos, u32 phys_cpos, u32 *len, int ext_flags) 19762306a36Sopenharmony_ci{ 19862306a36Sopenharmony_ci int ret, credits = 0, extra_blocks = 0, partial = context->partial; 19962306a36Sopenharmony_ci handle_t *handle; 20062306a36Sopenharmony_ci struct inode *inode = context->inode; 20162306a36Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 20262306a36Sopenharmony_ci struct inode *tl_inode = osb->osb_tl_inode; 20362306a36Sopenharmony_ci struct ocfs2_refcount_tree *ref_tree = NULL; 20462306a36Sopenharmony_ci u32 new_phys_cpos, new_len; 20562306a36Sopenharmony_ci u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 20662306a36Sopenharmony_ci int need_free = 0; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { 20962306a36Sopenharmony_ci BUG_ON(!ocfs2_is_refcount_inode(inode)); 21062306a36Sopenharmony_ci BUG_ON(!context->refcount_loc); 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, 21362306a36Sopenharmony_ci &ref_tree, NULL); 21462306a36Sopenharmony_ci if (ret) { 21562306a36Sopenharmony_ci mlog_errno(ret); 21662306a36Sopenharmony_ci return ret; 21762306a36Sopenharmony_ci } 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci ret = ocfs2_prepare_refcount_change_for_del(inode, 22062306a36Sopenharmony_ci context->refcount_loc, 22162306a36Sopenharmony_ci phys_blkno, 22262306a36Sopenharmony_ci *len, 22362306a36Sopenharmony_ci &credits, 22462306a36Sopenharmony_ci &extra_blocks); 22562306a36Sopenharmony_ci if (ret) { 22662306a36Sopenharmony_ci mlog_errno(ret); 22762306a36Sopenharmony_ci goto out; 22862306a36Sopenharmony_ci } 22962306a36Sopenharmony_ci } 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, 23262306a36Sopenharmony_ci *len, 1, 23362306a36Sopenharmony_ci &context->meta_ac, 23462306a36Sopenharmony_ci extra_blocks, &credits); 23562306a36Sopenharmony_ci if (ret) { 23662306a36Sopenharmony_ci mlog_errno(ret); 23762306a36Sopenharmony_ci goto out; 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci /* 24162306a36Sopenharmony_ci * should be using allocation reservation strategy there? 24262306a36Sopenharmony_ci * 24362306a36Sopenharmony_ci * if (context->data_ac) 24462306a36Sopenharmony_ci * context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; 24562306a36Sopenharmony_ci */ 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci inode_lock(tl_inode); 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci if (ocfs2_truncate_log_needs_flush(osb)) { 25062306a36Sopenharmony_ci ret = __ocfs2_flush_truncate_log(osb); 25162306a36Sopenharmony_ci if (ret < 0) { 25262306a36Sopenharmony_ci mlog_errno(ret); 25362306a36Sopenharmony_ci goto out_unlock_mutex; 25462306a36Sopenharmony_ci } 25562306a36Sopenharmony_ci } 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci /* 25862306a36Sopenharmony_ci * Make sure ocfs2_reserve_cluster is called after 25962306a36Sopenharmony_ci * __ocfs2_flush_truncate_log, otherwise, dead lock may happen. 26062306a36Sopenharmony_ci * 26162306a36Sopenharmony_ci * If ocfs2_reserve_cluster is called 26262306a36Sopenharmony_ci * before __ocfs2_flush_truncate_log, dead lock on global bitmap 26362306a36Sopenharmony_ci * may happen. 26462306a36Sopenharmony_ci * 26562306a36Sopenharmony_ci */ 26662306a36Sopenharmony_ci ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac); 26762306a36Sopenharmony_ci if (ret) { 26862306a36Sopenharmony_ci mlog_errno(ret); 26962306a36Sopenharmony_ci goto out_unlock_mutex; 27062306a36Sopenharmony_ci } 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci handle = ocfs2_start_trans(osb, credits); 27362306a36Sopenharmony_ci if (IS_ERR(handle)) { 27462306a36Sopenharmony_ci ret = PTR_ERR(handle); 27562306a36Sopenharmony_ci mlog_errno(ret); 27662306a36Sopenharmony_ci goto out_unlock_mutex; 27762306a36Sopenharmony_ci } 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len, 28062306a36Sopenharmony_ci &new_phys_cpos, &new_len); 28162306a36Sopenharmony_ci if (ret) { 28262306a36Sopenharmony_ci mlog_errno(ret); 28362306a36Sopenharmony_ci goto out_commit; 28462306a36Sopenharmony_ci } 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci /* 28762306a36Sopenharmony_ci * allowing partial extent moving is kind of 'pros and cons', it makes 28862306a36Sopenharmony_ci * whole defragmentation less likely to fail, on the contrary, the bad 28962306a36Sopenharmony_ci * thing is it may make the fs even more fragmented after moving, let 29062306a36Sopenharmony_ci * userspace make a good decision here. 29162306a36Sopenharmony_ci */ 29262306a36Sopenharmony_ci if (new_len != *len) { 29362306a36Sopenharmony_ci mlog(0, "len_claimed: %u, len: %u\n", new_len, *len); 29462306a36Sopenharmony_ci if (!partial) { 29562306a36Sopenharmony_ci context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; 29662306a36Sopenharmony_ci ret = -ENOSPC; 29762306a36Sopenharmony_ci need_free = 1; 29862306a36Sopenharmony_ci goto out_commit; 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci } 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, 30362306a36Sopenharmony_ci phys_cpos, new_phys_cpos); 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos, 30662306a36Sopenharmony_ci new_phys_cpos, ext_flags); 30762306a36Sopenharmony_ci if (ret) 30862306a36Sopenharmony_ci mlog_errno(ret); 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci if (partial && (new_len != *len)) 31162306a36Sopenharmony_ci *len = new_len; 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci /* 31462306a36Sopenharmony_ci * Here we should write the new page out first if we are 31562306a36Sopenharmony_ci * in write-back mode. 31662306a36Sopenharmony_ci */ 31762306a36Sopenharmony_ci ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len); 31862306a36Sopenharmony_ci if (ret) 31962306a36Sopenharmony_ci mlog_errno(ret); 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ciout_commit: 32262306a36Sopenharmony_ci if (need_free && context->data_ac) { 32362306a36Sopenharmony_ci struct ocfs2_alloc_context *data_ac = context->data_ac; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL) 32662306a36Sopenharmony_ci ocfs2_free_local_alloc_bits(osb, handle, data_ac, 32762306a36Sopenharmony_ci new_phys_cpos, new_len); 32862306a36Sopenharmony_ci else 32962306a36Sopenharmony_ci ocfs2_free_clusters(handle, 33062306a36Sopenharmony_ci data_ac->ac_inode, 33162306a36Sopenharmony_ci data_ac->ac_bh, 33262306a36Sopenharmony_ci ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos), 33362306a36Sopenharmony_ci new_len); 33462306a36Sopenharmony_ci } 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci ocfs2_commit_trans(osb, handle); 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ciout_unlock_mutex: 33962306a36Sopenharmony_ci inode_unlock(tl_inode); 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci if (context->data_ac) { 34262306a36Sopenharmony_ci ocfs2_free_alloc_context(context->data_ac); 34362306a36Sopenharmony_ci context->data_ac = NULL; 34462306a36Sopenharmony_ci } 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci if (context->meta_ac) { 34762306a36Sopenharmony_ci ocfs2_free_alloc_context(context->meta_ac); 34862306a36Sopenharmony_ci context->meta_ac = NULL; 34962306a36Sopenharmony_ci } 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ciout: 35262306a36Sopenharmony_ci if (ref_tree) 35362306a36Sopenharmony_ci ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci return ret; 35662306a36Sopenharmony_ci} 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci/* 35962306a36Sopenharmony_ci * find the victim alloc group, where #blkno fits. 36062306a36Sopenharmony_ci */ 36162306a36Sopenharmony_cistatic int ocfs2_find_victim_alloc_group(struct inode *inode, 36262306a36Sopenharmony_ci u64 vict_blkno, 36362306a36Sopenharmony_ci int type, int slot, 36462306a36Sopenharmony_ci int *vict_bit, 36562306a36Sopenharmony_ci struct buffer_head **ret_bh) 36662306a36Sopenharmony_ci{ 36762306a36Sopenharmony_ci int ret, i, bits_per_unit = 0; 36862306a36Sopenharmony_ci u64 blkno; 36962306a36Sopenharmony_ci char namebuf[40]; 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 37262306a36Sopenharmony_ci struct buffer_head *ac_bh = NULL, *gd_bh = NULL; 37362306a36Sopenharmony_ci struct ocfs2_chain_list *cl; 37462306a36Sopenharmony_ci struct ocfs2_chain_rec *rec; 37562306a36Sopenharmony_ci struct ocfs2_dinode *ac_dinode; 37662306a36Sopenharmony_ci struct ocfs2_group_desc *bg; 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot); 37962306a36Sopenharmony_ci ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, 38062306a36Sopenharmony_ci strlen(namebuf), &blkno); 38162306a36Sopenharmony_ci if (ret) { 38262306a36Sopenharmony_ci ret = -ENOENT; 38362306a36Sopenharmony_ci goto out; 38462306a36Sopenharmony_ci } 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci ret = ocfs2_read_blocks_sync(osb, blkno, 1, &ac_bh); 38762306a36Sopenharmony_ci if (ret) { 38862306a36Sopenharmony_ci mlog_errno(ret); 38962306a36Sopenharmony_ci goto out; 39062306a36Sopenharmony_ci } 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci ac_dinode = (struct ocfs2_dinode *)ac_bh->b_data; 39362306a36Sopenharmony_ci cl = &(ac_dinode->id2.i_chain); 39462306a36Sopenharmony_ci rec = &(cl->cl_recs[0]); 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci if (type == GLOBAL_BITMAP_SYSTEM_INODE) 39762306a36Sopenharmony_ci bits_per_unit = osb->s_clustersize_bits - 39862306a36Sopenharmony_ci inode->i_sb->s_blocksize_bits; 39962306a36Sopenharmony_ci /* 40062306a36Sopenharmony_ci * 'vict_blkno' was out of the valid range. 40162306a36Sopenharmony_ci */ 40262306a36Sopenharmony_ci if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || 40362306a36Sopenharmony_ci (vict_blkno >= ((u64)le32_to_cpu(ac_dinode->id1.bitmap1.i_total) << 40462306a36Sopenharmony_ci bits_per_unit))) { 40562306a36Sopenharmony_ci ret = -EINVAL; 40662306a36Sopenharmony_ci goto out; 40762306a36Sopenharmony_ci } 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) { 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci rec = &(cl->cl_recs[i]); 41262306a36Sopenharmony_ci if (!rec) 41362306a36Sopenharmony_ci continue; 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci bg = NULL; 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci do { 41862306a36Sopenharmony_ci if (!bg) 41962306a36Sopenharmony_ci blkno = le64_to_cpu(rec->c_blkno); 42062306a36Sopenharmony_ci else 42162306a36Sopenharmony_ci blkno = le64_to_cpu(bg->bg_next_group); 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci if (gd_bh) { 42462306a36Sopenharmony_ci brelse(gd_bh); 42562306a36Sopenharmony_ci gd_bh = NULL; 42662306a36Sopenharmony_ci } 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci ret = ocfs2_read_blocks_sync(osb, blkno, 1, &gd_bh); 42962306a36Sopenharmony_ci if (ret) { 43062306a36Sopenharmony_ci mlog_errno(ret); 43162306a36Sopenharmony_ci goto out; 43262306a36Sopenharmony_ci } 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci bg = (struct ocfs2_group_desc *)gd_bh->b_data; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci if (vict_blkno < (le64_to_cpu(bg->bg_blkno) + 43762306a36Sopenharmony_ci (le16_to_cpu(bg->bg_bits) << bits_per_unit))) { 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci *ret_bh = gd_bh; 44062306a36Sopenharmony_ci *vict_bit = (vict_blkno - blkno) >> 44162306a36Sopenharmony_ci bits_per_unit; 44262306a36Sopenharmony_ci mlog(0, "find the victim group: #%llu, " 44362306a36Sopenharmony_ci "total_bits: %u, vict_bit: %u\n", 44462306a36Sopenharmony_ci blkno, le16_to_cpu(bg->bg_bits), 44562306a36Sopenharmony_ci *vict_bit); 44662306a36Sopenharmony_ci goto out; 44762306a36Sopenharmony_ci } 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci } while (le64_to_cpu(bg->bg_next_group)); 45062306a36Sopenharmony_ci } 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci ret = -EINVAL; 45362306a36Sopenharmony_ciout: 45462306a36Sopenharmony_ci brelse(ac_bh); 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci /* 45762306a36Sopenharmony_ci * caller has to release the gd_bh properly. 45862306a36Sopenharmony_ci */ 45962306a36Sopenharmony_ci return ret; 46062306a36Sopenharmony_ci} 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci/* 46362306a36Sopenharmony_ci * XXX: helper to validate and adjust moving goal. 46462306a36Sopenharmony_ci */ 46562306a36Sopenharmony_cistatic int ocfs2_validate_and_adjust_move_goal(struct inode *inode, 46662306a36Sopenharmony_ci struct ocfs2_move_extents *range) 46762306a36Sopenharmony_ci{ 46862306a36Sopenharmony_ci int ret, goal_bit = 0; 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci struct buffer_head *gd_bh = NULL; 47162306a36Sopenharmony_ci struct ocfs2_group_desc *bg; 47262306a36Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 47362306a36Sopenharmony_ci int c_to_b = 1 << (osb->s_clustersize_bits - 47462306a36Sopenharmony_ci inode->i_sb->s_blocksize_bits); 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci /* 47762306a36Sopenharmony_ci * make goal become cluster aligned. 47862306a36Sopenharmony_ci */ 47962306a36Sopenharmony_ci range->me_goal = ocfs2_block_to_cluster_start(inode->i_sb, 48062306a36Sopenharmony_ci range->me_goal); 48162306a36Sopenharmony_ci /* 48262306a36Sopenharmony_ci * validate goal sits within global_bitmap, and return the victim 48362306a36Sopenharmony_ci * group desc 48462306a36Sopenharmony_ci */ 48562306a36Sopenharmony_ci ret = ocfs2_find_victim_alloc_group(inode, range->me_goal, 48662306a36Sopenharmony_ci GLOBAL_BITMAP_SYSTEM_INODE, 48762306a36Sopenharmony_ci OCFS2_INVALID_SLOT, 48862306a36Sopenharmony_ci &goal_bit, &gd_bh); 48962306a36Sopenharmony_ci if (ret) 49062306a36Sopenharmony_ci goto out; 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci bg = (struct ocfs2_group_desc *)gd_bh->b_data; 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci /* 49562306a36Sopenharmony_ci * moving goal is not allowd to start with a group desc blok(#0 blk) 49662306a36Sopenharmony_ci * let's compromise to the latter cluster. 49762306a36Sopenharmony_ci */ 49862306a36Sopenharmony_ci if (range->me_goal == le64_to_cpu(bg->bg_blkno)) 49962306a36Sopenharmony_ci range->me_goal += c_to_b; 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci /* 50262306a36Sopenharmony_ci * movement is not gonna cross two groups. 50362306a36Sopenharmony_ci */ 50462306a36Sopenharmony_ci if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize < 50562306a36Sopenharmony_ci range->me_len) { 50662306a36Sopenharmony_ci ret = -EINVAL; 50762306a36Sopenharmony_ci goto out; 50862306a36Sopenharmony_ci } 50962306a36Sopenharmony_ci /* 51062306a36Sopenharmony_ci * more exact validations/adjustments will be performed later during 51162306a36Sopenharmony_ci * moving operation for each extent range. 51262306a36Sopenharmony_ci */ 51362306a36Sopenharmony_ci mlog(0, "extents get ready to be moved to #%llu block\n", 51462306a36Sopenharmony_ci range->me_goal); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ciout: 51762306a36Sopenharmony_ci brelse(gd_bh); 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci return ret; 52062306a36Sopenharmony_ci} 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_cistatic void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh, 52362306a36Sopenharmony_ci int *goal_bit, u32 move_len, u32 max_hop, 52462306a36Sopenharmony_ci u32 *phys_cpos) 52562306a36Sopenharmony_ci{ 52662306a36Sopenharmony_ci int i, used, last_free_bits = 0, base_bit = *goal_bit; 52762306a36Sopenharmony_ci struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; 52862306a36Sopenharmony_ci u32 base_cpos = ocfs2_blocks_to_clusters(inode->i_sb, 52962306a36Sopenharmony_ci le64_to_cpu(gd->bg_blkno)); 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci for (i = base_bit; i < le16_to_cpu(gd->bg_bits); i++) { 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci used = ocfs2_test_bit(i, (unsigned long *)gd->bg_bitmap); 53462306a36Sopenharmony_ci if (used) { 53562306a36Sopenharmony_ci /* 53662306a36Sopenharmony_ci * we even tried searching the free chunk by jumping 53762306a36Sopenharmony_ci * a 'max_hop' distance, but still failed. 53862306a36Sopenharmony_ci */ 53962306a36Sopenharmony_ci if ((i - base_bit) > max_hop) { 54062306a36Sopenharmony_ci *phys_cpos = 0; 54162306a36Sopenharmony_ci break; 54262306a36Sopenharmony_ci } 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci if (last_free_bits) 54562306a36Sopenharmony_ci last_free_bits = 0; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci continue; 54862306a36Sopenharmony_ci } else 54962306a36Sopenharmony_ci last_free_bits++; 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci if (last_free_bits == move_len) { 55262306a36Sopenharmony_ci i -= move_len; 55362306a36Sopenharmony_ci *goal_bit = i; 55462306a36Sopenharmony_ci *phys_cpos = base_cpos + i; 55562306a36Sopenharmony_ci break; 55662306a36Sopenharmony_ci } 55762306a36Sopenharmony_ci } 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos); 56062306a36Sopenharmony_ci} 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_cistatic int ocfs2_move_extent(struct ocfs2_move_extents_context *context, 56362306a36Sopenharmony_ci u32 cpos, u32 phys_cpos, u32 *new_phys_cpos, 56462306a36Sopenharmony_ci u32 len, int ext_flags) 56562306a36Sopenharmony_ci{ 56662306a36Sopenharmony_ci int ret, credits = 0, extra_blocks = 0, goal_bit = 0; 56762306a36Sopenharmony_ci handle_t *handle; 56862306a36Sopenharmony_ci struct inode *inode = context->inode; 56962306a36Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 57062306a36Sopenharmony_ci struct inode *tl_inode = osb->osb_tl_inode; 57162306a36Sopenharmony_ci struct inode *gb_inode = NULL; 57262306a36Sopenharmony_ci struct buffer_head *gb_bh = NULL; 57362306a36Sopenharmony_ci struct buffer_head *gd_bh = NULL; 57462306a36Sopenharmony_ci struct ocfs2_group_desc *gd; 57562306a36Sopenharmony_ci struct ocfs2_refcount_tree *ref_tree = NULL; 57662306a36Sopenharmony_ci u32 move_max_hop = ocfs2_blocks_to_clusters(inode->i_sb, 57762306a36Sopenharmony_ci context->range->me_threshold); 57862306a36Sopenharmony_ci u64 phys_blkno, new_phys_blkno; 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { 58362306a36Sopenharmony_ci BUG_ON(!ocfs2_is_refcount_inode(inode)); 58462306a36Sopenharmony_ci BUG_ON(!context->refcount_loc); 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, 58762306a36Sopenharmony_ci &ref_tree, NULL); 58862306a36Sopenharmony_ci if (ret) { 58962306a36Sopenharmony_ci mlog_errno(ret); 59062306a36Sopenharmony_ci return ret; 59162306a36Sopenharmony_ci } 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci ret = ocfs2_prepare_refcount_change_for_del(inode, 59462306a36Sopenharmony_ci context->refcount_loc, 59562306a36Sopenharmony_ci phys_blkno, 59662306a36Sopenharmony_ci len, 59762306a36Sopenharmony_ci &credits, 59862306a36Sopenharmony_ci &extra_blocks); 59962306a36Sopenharmony_ci if (ret) { 60062306a36Sopenharmony_ci mlog_errno(ret); 60162306a36Sopenharmony_ci goto out; 60262306a36Sopenharmony_ci } 60362306a36Sopenharmony_ci } 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, 60662306a36Sopenharmony_ci len, 1, 60762306a36Sopenharmony_ci &context->meta_ac, 60862306a36Sopenharmony_ci extra_blocks, &credits); 60962306a36Sopenharmony_ci if (ret) { 61062306a36Sopenharmony_ci mlog_errno(ret); 61162306a36Sopenharmony_ci goto out; 61262306a36Sopenharmony_ci } 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci /* 61562306a36Sopenharmony_ci * need to count 2 extra credits for global_bitmap inode and 61662306a36Sopenharmony_ci * group descriptor. 61762306a36Sopenharmony_ci */ 61862306a36Sopenharmony_ci credits += OCFS2_INODE_UPDATE_CREDITS + 1; 61962306a36Sopenharmony_ci 62062306a36Sopenharmony_ci /* 62162306a36Sopenharmony_ci * ocfs2_move_extent() didn't reserve any clusters in lock_allocators() 62262306a36Sopenharmony_ci * logic, while we still need to lock the global_bitmap. 62362306a36Sopenharmony_ci */ 62462306a36Sopenharmony_ci gb_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, 62562306a36Sopenharmony_ci OCFS2_INVALID_SLOT); 62662306a36Sopenharmony_ci if (!gb_inode) { 62762306a36Sopenharmony_ci mlog(ML_ERROR, "unable to get global_bitmap inode\n"); 62862306a36Sopenharmony_ci ret = -EIO; 62962306a36Sopenharmony_ci goto out; 63062306a36Sopenharmony_ci } 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci inode_lock(gb_inode); 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); 63562306a36Sopenharmony_ci if (ret) { 63662306a36Sopenharmony_ci mlog_errno(ret); 63762306a36Sopenharmony_ci goto out_unlock_gb_mutex; 63862306a36Sopenharmony_ci } 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci inode_lock(tl_inode); 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci handle = ocfs2_start_trans(osb, credits); 64362306a36Sopenharmony_ci if (IS_ERR(handle)) { 64462306a36Sopenharmony_ci ret = PTR_ERR(handle); 64562306a36Sopenharmony_ci mlog_errno(ret); 64662306a36Sopenharmony_ci goto out_unlock_tl_inode; 64762306a36Sopenharmony_ci } 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); 65062306a36Sopenharmony_ci ret = ocfs2_find_victim_alloc_group(inode, new_phys_blkno, 65162306a36Sopenharmony_ci GLOBAL_BITMAP_SYSTEM_INODE, 65262306a36Sopenharmony_ci OCFS2_INVALID_SLOT, 65362306a36Sopenharmony_ci &goal_bit, &gd_bh); 65462306a36Sopenharmony_ci if (ret) { 65562306a36Sopenharmony_ci mlog_errno(ret); 65662306a36Sopenharmony_ci goto out_commit; 65762306a36Sopenharmony_ci } 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci /* 66062306a36Sopenharmony_ci * probe the victim cluster group to find a proper 66162306a36Sopenharmony_ci * region to fit wanted movement, it even will perfrom 66262306a36Sopenharmony_ci * a best-effort attempt by compromising to a threshold 66362306a36Sopenharmony_ci * around the goal. 66462306a36Sopenharmony_ci */ 66562306a36Sopenharmony_ci ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop, 66662306a36Sopenharmony_ci new_phys_cpos); 66762306a36Sopenharmony_ci if (!*new_phys_cpos) { 66862306a36Sopenharmony_ci ret = -ENOSPC; 66962306a36Sopenharmony_ci goto out_commit; 67062306a36Sopenharmony_ci } 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos, 67362306a36Sopenharmony_ci *new_phys_cpos, ext_flags); 67462306a36Sopenharmony_ci if (ret) { 67562306a36Sopenharmony_ci mlog_errno(ret); 67662306a36Sopenharmony_ci goto out_commit; 67762306a36Sopenharmony_ci } 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ci gd = (struct ocfs2_group_desc *)gd_bh->b_data; 68062306a36Sopenharmony_ci ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len, 68162306a36Sopenharmony_ci le16_to_cpu(gd->bg_chain)); 68262306a36Sopenharmony_ci if (ret) { 68362306a36Sopenharmony_ci mlog_errno(ret); 68462306a36Sopenharmony_ci goto out_commit; 68562306a36Sopenharmony_ci } 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, 68862306a36Sopenharmony_ci goal_bit, len); 68962306a36Sopenharmony_ci if (ret) { 69062306a36Sopenharmony_ci ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len, 69162306a36Sopenharmony_ci le16_to_cpu(gd->bg_chain)); 69262306a36Sopenharmony_ci mlog_errno(ret); 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci /* 69662306a36Sopenharmony_ci * Here we should write the new page out first if we are 69762306a36Sopenharmony_ci * in write-back mode. 69862306a36Sopenharmony_ci */ 69962306a36Sopenharmony_ci ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len); 70062306a36Sopenharmony_ci if (ret) 70162306a36Sopenharmony_ci mlog_errno(ret); 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ciout_commit: 70462306a36Sopenharmony_ci ocfs2_commit_trans(osb, handle); 70562306a36Sopenharmony_ci brelse(gd_bh); 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ciout_unlock_tl_inode: 70862306a36Sopenharmony_ci inode_unlock(tl_inode); 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci ocfs2_inode_unlock(gb_inode, 1); 71162306a36Sopenharmony_ciout_unlock_gb_mutex: 71262306a36Sopenharmony_ci inode_unlock(gb_inode); 71362306a36Sopenharmony_ci brelse(gb_bh); 71462306a36Sopenharmony_ci iput(gb_inode); 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ciout: 71762306a36Sopenharmony_ci if (context->meta_ac) { 71862306a36Sopenharmony_ci ocfs2_free_alloc_context(context->meta_ac); 71962306a36Sopenharmony_ci context->meta_ac = NULL; 72062306a36Sopenharmony_ci } 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci if (ref_tree) 72362306a36Sopenharmony_ci ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 72462306a36Sopenharmony_ci 72562306a36Sopenharmony_ci return ret; 72662306a36Sopenharmony_ci} 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci/* 72962306a36Sopenharmony_ci * Helper to calculate the defraging length in one run according to threshold. 73062306a36Sopenharmony_ci */ 73162306a36Sopenharmony_cistatic void ocfs2_calc_extent_defrag_len(u32 *alloc_size, u32 *len_defraged, 73262306a36Sopenharmony_ci u32 threshold, int *skip) 73362306a36Sopenharmony_ci{ 73462306a36Sopenharmony_ci if ((*alloc_size + *len_defraged) < threshold) { 73562306a36Sopenharmony_ci /* 73662306a36Sopenharmony_ci * proceed defragmentation until we meet the thresh 73762306a36Sopenharmony_ci */ 73862306a36Sopenharmony_ci *len_defraged += *alloc_size; 73962306a36Sopenharmony_ci } else if (*len_defraged == 0) { 74062306a36Sopenharmony_ci /* 74162306a36Sopenharmony_ci * XXX: skip a large extent. 74262306a36Sopenharmony_ci */ 74362306a36Sopenharmony_ci *skip = 1; 74462306a36Sopenharmony_ci } else { 74562306a36Sopenharmony_ci /* 74662306a36Sopenharmony_ci * split this extent to coalesce with former pieces as 74762306a36Sopenharmony_ci * to reach the threshold. 74862306a36Sopenharmony_ci * 74962306a36Sopenharmony_ci * we're done here with one cycle of defragmentation 75062306a36Sopenharmony_ci * in a size of 'thresh', resetting 'len_defraged' 75162306a36Sopenharmony_ci * forces a new defragmentation. 75262306a36Sopenharmony_ci */ 75362306a36Sopenharmony_ci *alloc_size = threshold - *len_defraged; 75462306a36Sopenharmony_ci *len_defraged = 0; 75562306a36Sopenharmony_ci } 75662306a36Sopenharmony_ci} 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_cistatic int __ocfs2_move_extents_range(struct buffer_head *di_bh, 75962306a36Sopenharmony_ci struct ocfs2_move_extents_context *context) 76062306a36Sopenharmony_ci{ 76162306a36Sopenharmony_ci int ret = 0, flags, do_defrag, skip = 0; 76262306a36Sopenharmony_ci u32 cpos, phys_cpos, move_start, len_to_move, alloc_size; 76362306a36Sopenharmony_ci u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0; 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci struct inode *inode = context->inode; 76662306a36Sopenharmony_ci struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 76762306a36Sopenharmony_ci struct ocfs2_move_extents *range = context->range; 76862306a36Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci if ((i_size_read(inode) == 0) || (range->me_len == 0)) 77162306a36Sopenharmony_ci return 0; 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 77462306a36Sopenharmony_ci return 0; 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci context->refcount_loc = le64_to_cpu(di->i_refcount_loc); 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh); 77962306a36Sopenharmony_ci ocfs2_init_dealloc_ctxt(&context->dealloc); 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci /* 78262306a36Sopenharmony_ci * TO-DO XXX: 78362306a36Sopenharmony_ci * 78462306a36Sopenharmony_ci * - xattr extents. 78562306a36Sopenharmony_ci */ 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci do_defrag = context->auto_defrag; 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci /* 79062306a36Sopenharmony_ci * extents moving happens in unit of clusters, for the sake 79162306a36Sopenharmony_ci * of simplicity, we may ignore two clusters where 'byte_start' 79262306a36Sopenharmony_ci * and 'byte_start + len' were within. 79362306a36Sopenharmony_ci */ 79462306a36Sopenharmony_ci move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start); 79562306a36Sopenharmony_ci len_to_move = (range->me_start + range->me_len) >> 79662306a36Sopenharmony_ci osb->s_clustersize_bits; 79762306a36Sopenharmony_ci if (len_to_move >= move_start) 79862306a36Sopenharmony_ci len_to_move -= move_start; 79962306a36Sopenharmony_ci else 80062306a36Sopenharmony_ci len_to_move = 0; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci if (do_defrag) { 80362306a36Sopenharmony_ci defrag_thresh = range->me_threshold >> osb->s_clustersize_bits; 80462306a36Sopenharmony_ci if (defrag_thresh <= 1) 80562306a36Sopenharmony_ci goto done; 80662306a36Sopenharmony_ci } else 80762306a36Sopenharmony_ci new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, 80862306a36Sopenharmony_ci range->me_goal); 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u, " 81162306a36Sopenharmony_ci "thresh: %u\n", 81262306a36Sopenharmony_ci (unsigned long long)OCFS2_I(inode)->ip_blkno, 81362306a36Sopenharmony_ci (unsigned long long)range->me_start, 81462306a36Sopenharmony_ci (unsigned long long)range->me_len, 81562306a36Sopenharmony_ci move_start, len_to_move, defrag_thresh); 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci cpos = move_start; 81862306a36Sopenharmony_ci while (len_to_move) { 81962306a36Sopenharmony_ci ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &alloc_size, 82062306a36Sopenharmony_ci &flags); 82162306a36Sopenharmony_ci if (ret) { 82262306a36Sopenharmony_ci mlog_errno(ret); 82362306a36Sopenharmony_ci goto out; 82462306a36Sopenharmony_ci } 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci if (alloc_size > len_to_move) 82762306a36Sopenharmony_ci alloc_size = len_to_move; 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci /* 83062306a36Sopenharmony_ci * XXX: how to deal with a hole: 83162306a36Sopenharmony_ci * 83262306a36Sopenharmony_ci * - skip the hole of course 83362306a36Sopenharmony_ci * - force a new defragmentation 83462306a36Sopenharmony_ci */ 83562306a36Sopenharmony_ci if (!phys_cpos) { 83662306a36Sopenharmony_ci if (do_defrag) 83762306a36Sopenharmony_ci len_defraged = 0; 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci goto next; 84062306a36Sopenharmony_ci } 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci if (do_defrag) { 84362306a36Sopenharmony_ci ocfs2_calc_extent_defrag_len(&alloc_size, &len_defraged, 84462306a36Sopenharmony_ci defrag_thresh, &skip); 84562306a36Sopenharmony_ci /* 84662306a36Sopenharmony_ci * skip large extents 84762306a36Sopenharmony_ci */ 84862306a36Sopenharmony_ci if (skip) { 84962306a36Sopenharmony_ci skip = 0; 85062306a36Sopenharmony_ci goto next; 85162306a36Sopenharmony_ci } 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci mlog(0, "#Defrag: cpos: %u, phys_cpos: %u, " 85462306a36Sopenharmony_ci "alloc_size: %u, len_defraged: %u\n", 85562306a36Sopenharmony_ci cpos, phys_cpos, alloc_size, len_defraged); 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci ret = ocfs2_defrag_extent(context, cpos, phys_cpos, 85862306a36Sopenharmony_ci &alloc_size, flags); 85962306a36Sopenharmony_ci } else { 86062306a36Sopenharmony_ci ret = ocfs2_move_extent(context, cpos, phys_cpos, 86162306a36Sopenharmony_ci &new_phys_cpos, alloc_size, 86262306a36Sopenharmony_ci flags); 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci new_phys_cpos += alloc_size; 86562306a36Sopenharmony_ci } 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci if (ret < 0) { 86862306a36Sopenharmony_ci mlog_errno(ret); 86962306a36Sopenharmony_ci goto out; 87062306a36Sopenharmony_ci } 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci context->clusters_moved += alloc_size; 87362306a36Sopenharmony_cinext: 87462306a36Sopenharmony_ci cpos += alloc_size; 87562306a36Sopenharmony_ci len_to_move -= alloc_size; 87662306a36Sopenharmony_ci } 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_cidone: 87962306a36Sopenharmony_ci range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE; 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ciout: 88262306a36Sopenharmony_ci range->me_moved_len = ocfs2_clusters_to_bytes(osb->sb, 88362306a36Sopenharmony_ci context->clusters_moved); 88462306a36Sopenharmony_ci range->me_new_offset = ocfs2_clusters_to_bytes(osb->sb, 88562306a36Sopenharmony_ci context->new_phys_cpos); 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci ocfs2_schedule_truncate_log_flush(osb, 1); 88862306a36Sopenharmony_ci ocfs2_run_deallocs(osb, &context->dealloc); 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci return ret; 89162306a36Sopenharmony_ci} 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_cistatic int ocfs2_move_extents(struct ocfs2_move_extents_context *context) 89462306a36Sopenharmony_ci{ 89562306a36Sopenharmony_ci int status; 89662306a36Sopenharmony_ci handle_t *handle; 89762306a36Sopenharmony_ci struct inode *inode = context->inode; 89862306a36Sopenharmony_ci struct ocfs2_dinode *di; 89962306a36Sopenharmony_ci struct buffer_head *di_bh = NULL; 90062306a36Sopenharmony_ci struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 90362306a36Sopenharmony_ci return -EROFS; 90462306a36Sopenharmony_ci 90562306a36Sopenharmony_ci inode_lock(inode); 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci /* 90862306a36Sopenharmony_ci * This prevents concurrent writes from other nodes 90962306a36Sopenharmony_ci */ 91062306a36Sopenharmony_ci status = ocfs2_rw_lock(inode, 1); 91162306a36Sopenharmony_ci if (status) { 91262306a36Sopenharmony_ci mlog_errno(status); 91362306a36Sopenharmony_ci goto out; 91462306a36Sopenharmony_ci } 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci status = ocfs2_inode_lock(inode, &di_bh, 1); 91762306a36Sopenharmony_ci if (status) { 91862306a36Sopenharmony_ci mlog_errno(status); 91962306a36Sopenharmony_ci goto out_rw_unlock; 92062306a36Sopenharmony_ci } 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_ci /* 92362306a36Sopenharmony_ci * rememer ip_xattr_sem also needs to be held if necessary 92462306a36Sopenharmony_ci */ 92562306a36Sopenharmony_ci down_write(&OCFS2_I(inode)->ip_alloc_sem); 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_ci status = __ocfs2_move_extents_range(di_bh, context); 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci up_write(&OCFS2_I(inode)->ip_alloc_sem); 93062306a36Sopenharmony_ci if (status) { 93162306a36Sopenharmony_ci mlog_errno(status); 93262306a36Sopenharmony_ci goto out_inode_unlock; 93362306a36Sopenharmony_ci } 93462306a36Sopenharmony_ci 93562306a36Sopenharmony_ci /* 93662306a36Sopenharmony_ci * We update ctime for these changes 93762306a36Sopenharmony_ci */ 93862306a36Sopenharmony_ci handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 93962306a36Sopenharmony_ci if (IS_ERR(handle)) { 94062306a36Sopenharmony_ci status = PTR_ERR(handle); 94162306a36Sopenharmony_ci mlog_errno(status); 94262306a36Sopenharmony_ci goto out_inode_unlock; 94362306a36Sopenharmony_ci } 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ci status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 94662306a36Sopenharmony_ci OCFS2_JOURNAL_ACCESS_WRITE); 94762306a36Sopenharmony_ci if (status) { 94862306a36Sopenharmony_ci mlog_errno(status); 94962306a36Sopenharmony_ci goto out_commit; 95062306a36Sopenharmony_ci } 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci di = (struct ocfs2_dinode *)di_bh->b_data; 95362306a36Sopenharmony_ci inode_set_ctime_current(inode); 95462306a36Sopenharmony_ci di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec); 95562306a36Sopenharmony_ci di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); 95662306a36Sopenharmony_ci ocfs2_update_inode_fsync_trans(handle, inode, 0); 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci ocfs2_journal_dirty(handle, di_bh); 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ciout_commit: 96162306a36Sopenharmony_ci ocfs2_commit_trans(osb, handle); 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ciout_inode_unlock: 96462306a36Sopenharmony_ci brelse(di_bh); 96562306a36Sopenharmony_ci ocfs2_inode_unlock(inode, 1); 96662306a36Sopenharmony_ciout_rw_unlock: 96762306a36Sopenharmony_ci ocfs2_rw_unlock(inode, 1); 96862306a36Sopenharmony_ciout: 96962306a36Sopenharmony_ci inode_unlock(inode); 97062306a36Sopenharmony_ci 97162306a36Sopenharmony_ci return status; 97262306a36Sopenharmony_ci} 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_ciint ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) 97562306a36Sopenharmony_ci{ 97662306a36Sopenharmony_ci int status; 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci struct inode *inode = file_inode(filp); 97962306a36Sopenharmony_ci struct ocfs2_move_extents range; 98062306a36Sopenharmony_ci struct ocfs2_move_extents_context *context; 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci if (!argp) 98362306a36Sopenharmony_ci return -EINVAL; 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci status = mnt_want_write_file(filp); 98662306a36Sopenharmony_ci if (status) 98762306a36Sopenharmony_ci return status; 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) { 99062306a36Sopenharmony_ci status = -EPERM; 99162306a36Sopenharmony_ci goto out_drop; 99262306a36Sopenharmony_ci } 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { 99562306a36Sopenharmony_ci status = -EPERM; 99662306a36Sopenharmony_ci goto out_drop; 99762306a36Sopenharmony_ci } 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS); 100062306a36Sopenharmony_ci if (!context) { 100162306a36Sopenharmony_ci status = -ENOMEM; 100262306a36Sopenharmony_ci mlog_errno(status); 100362306a36Sopenharmony_ci goto out_drop; 100462306a36Sopenharmony_ci } 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_ci context->inode = inode; 100762306a36Sopenharmony_ci context->file = filp; 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci if (copy_from_user(&range, argp, sizeof(range))) { 101062306a36Sopenharmony_ci status = -EFAULT; 101162306a36Sopenharmony_ci goto out_free; 101262306a36Sopenharmony_ci } 101362306a36Sopenharmony_ci 101462306a36Sopenharmony_ci if (range.me_start > i_size_read(inode)) { 101562306a36Sopenharmony_ci status = -EINVAL; 101662306a36Sopenharmony_ci goto out_free; 101762306a36Sopenharmony_ci } 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci if (range.me_start + range.me_len > i_size_read(inode)) 102062306a36Sopenharmony_ci range.me_len = i_size_read(inode) - range.me_start; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci context->range = ⦥ 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci /* 102562306a36Sopenharmony_ci * ok, the default theshold for the defragmentation 102662306a36Sopenharmony_ci * is 1M, since our maximum clustersize was 1M also. 102762306a36Sopenharmony_ci * any thought? 102862306a36Sopenharmony_ci */ 102962306a36Sopenharmony_ci if (!range.me_threshold) 103062306a36Sopenharmony_ci range.me_threshold = 1024 * 1024; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci if (range.me_threshold > i_size_read(inode)) 103362306a36Sopenharmony_ci range.me_threshold = i_size_read(inode); 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) { 103662306a36Sopenharmony_ci context->auto_defrag = 1; 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG) 103962306a36Sopenharmony_ci context->partial = 1; 104062306a36Sopenharmony_ci } else { 104162306a36Sopenharmony_ci /* 104262306a36Sopenharmony_ci * first best-effort attempt to validate and adjust the goal 104362306a36Sopenharmony_ci * (physical address in block), while it can't guarantee later 104462306a36Sopenharmony_ci * operation can succeed all the time since global_bitmap may 104562306a36Sopenharmony_ci * change a bit over time. 104662306a36Sopenharmony_ci */ 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_ci status = ocfs2_validate_and_adjust_move_goal(inode, &range); 104962306a36Sopenharmony_ci if (status) 105062306a36Sopenharmony_ci goto out_copy; 105162306a36Sopenharmony_ci } 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ci status = ocfs2_move_extents(context); 105462306a36Sopenharmony_ci if (status) 105562306a36Sopenharmony_ci mlog_errno(status); 105662306a36Sopenharmony_ciout_copy: 105762306a36Sopenharmony_ci /* 105862306a36Sopenharmony_ci * movement/defragmentation may end up being partially completed, 105962306a36Sopenharmony_ci * that's the reason why we need to return userspace the finished 106062306a36Sopenharmony_ci * length and new_offset even if failure happens somewhere. 106162306a36Sopenharmony_ci */ 106262306a36Sopenharmony_ci if (copy_to_user(argp, &range, sizeof(range))) 106362306a36Sopenharmony_ci status = -EFAULT; 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ciout_free: 106662306a36Sopenharmony_ci kfree(context); 106762306a36Sopenharmony_ciout_drop: 106862306a36Sopenharmony_ci mnt_drop_write_file(filp); 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci return status; 107162306a36Sopenharmony_ci} 1072