162306a36Sopenharmony_ci// SPDX-License-Identifier: LGPL-2.1 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd. 462306a36Sopenharmony_ci * Written by Takashi Sato <t-sato@yk.jp.nec.com> 562306a36Sopenharmony_ci * Akira Fujita <a-fujita@rs.jp.nec.com> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/fs.h> 962306a36Sopenharmony_ci#include <linux/quotaops.h> 1062306a36Sopenharmony_ci#include <linux/slab.h> 1162306a36Sopenharmony_ci#include <linux/sched/mm.h> 1262306a36Sopenharmony_ci#include "ext4_jbd2.h" 1362306a36Sopenharmony_ci#include "ext4.h" 1462306a36Sopenharmony_ci#include "ext4_extents.h" 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci/** 1762306a36Sopenharmony_ci * get_ext_path() - Find an extent path for designated logical block number. 1862306a36Sopenharmony_ci * @inode: inode to be searched 1962306a36Sopenharmony_ci * @lblock: logical block number to find an extent path 2062306a36Sopenharmony_ci * @ppath: pointer to an extent path pointer (for output) 2162306a36Sopenharmony_ci * 2262306a36Sopenharmony_ci * ext4_find_extent wrapper. Return 0 on success, or a negative error value 2362306a36Sopenharmony_ci * on failure. 2462306a36Sopenharmony_ci */ 2562306a36Sopenharmony_cistatic inline int 2662306a36Sopenharmony_ciget_ext_path(struct inode *inode, ext4_lblk_t lblock, 2762306a36Sopenharmony_ci struct ext4_ext_path **ppath) 2862306a36Sopenharmony_ci{ 2962306a36Sopenharmony_ci struct ext4_ext_path *path; 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE); 3262306a36Sopenharmony_ci if (IS_ERR(path)) 3362306a36Sopenharmony_ci return PTR_ERR(path); 3462306a36Sopenharmony_ci if (path[ext_depth(inode)].p_ext == NULL) { 3562306a36Sopenharmony_ci ext4_free_ext_path(path); 3662306a36Sopenharmony_ci *ppath = NULL; 3762306a36Sopenharmony_ci return -ENODATA; 3862306a36Sopenharmony_ci } 3962306a36Sopenharmony_ci *ppath = path; 4062306a36Sopenharmony_ci return 0; 4162306a36Sopenharmony_ci} 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci/** 4462306a36Sopenharmony_ci * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem 4562306a36Sopenharmony_ci * @first: inode to be locked 4662306a36Sopenharmony_ci * @second: inode to be locked 4762306a36Sopenharmony_ci * 4862306a36Sopenharmony_ci * Acquire write lock of i_data_sem of the two inodes 4962306a36Sopenharmony_ci */ 5062306a36Sopenharmony_civoid 5162306a36Sopenharmony_ciext4_double_down_write_data_sem(struct inode *first, struct inode *second) 5262306a36Sopenharmony_ci{ 5362306a36Sopenharmony_ci if (first < second) { 5462306a36Sopenharmony_ci down_write(&EXT4_I(first)->i_data_sem); 5562306a36Sopenharmony_ci down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER); 5662306a36Sopenharmony_ci } else { 5762306a36Sopenharmony_ci down_write(&EXT4_I(second)->i_data_sem); 5862306a36Sopenharmony_ci down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER); 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci } 6162306a36Sopenharmony_ci} 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci/** 6462306a36Sopenharmony_ci * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem 6562306a36Sopenharmony_ci * 6662306a36Sopenharmony_ci * @orig_inode: original inode structure to be released its lock first 6762306a36Sopenharmony_ci * @donor_inode: donor inode structure to be released its lock second 6862306a36Sopenharmony_ci * Release write lock of i_data_sem of two inodes (orig and donor). 6962306a36Sopenharmony_ci */ 7062306a36Sopenharmony_civoid 7162306a36Sopenharmony_ciext4_double_up_write_data_sem(struct inode *orig_inode, 7262306a36Sopenharmony_ci struct inode *donor_inode) 7362306a36Sopenharmony_ci{ 7462306a36Sopenharmony_ci up_write(&EXT4_I(orig_inode)->i_data_sem); 7562306a36Sopenharmony_ci up_write(&EXT4_I(donor_inode)->i_data_sem); 7662306a36Sopenharmony_ci} 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci/** 7962306a36Sopenharmony_ci * mext_check_coverage - Check that all extents in range has the same type 8062306a36Sopenharmony_ci * 8162306a36Sopenharmony_ci * @inode: inode in question 8262306a36Sopenharmony_ci * @from: block offset of inode 8362306a36Sopenharmony_ci * @count: block count to be checked 8462306a36Sopenharmony_ci * @unwritten: extents expected to be unwritten 8562306a36Sopenharmony_ci * @err: pointer to save error value 8662306a36Sopenharmony_ci * 8762306a36Sopenharmony_ci * Return 1 if all extents in range has expected type, and zero otherwise. 8862306a36Sopenharmony_ci */ 8962306a36Sopenharmony_cistatic int 9062306a36Sopenharmony_cimext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, 9162306a36Sopenharmony_ci int unwritten, int *err) 9262306a36Sopenharmony_ci{ 9362306a36Sopenharmony_ci struct ext4_ext_path *path = NULL; 9462306a36Sopenharmony_ci struct ext4_extent *ext; 9562306a36Sopenharmony_ci int ret = 0; 9662306a36Sopenharmony_ci ext4_lblk_t last = from + count; 9762306a36Sopenharmony_ci while (from < last) { 9862306a36Sopenharmony_ci *err = get_ext_path(inode, from, &path); 9962306a36Sopenharmony_ci if (*err) 10062306a36Sopenharmony_ci goto out; 10162306a36Sopenharmony_ci ext = path[ext_depth(inode)].p_ext; 10262306a36Sopenharmony_ci if (unwritten != ext4_ext_is_unwritten(ext)) 10362306a36Sopenharmony_ci goto out; 10462306a36Sopenharmony_ci from += ext4_ext_get_actual_len(ext); 10562306a36Sopenharmony_ci } 10662306a36Sopenharmony_ci ret = 1; 10762306a36Sopenharmony_ciout: 10862306a36Sopenharmony_ci ext4_free_ext_path(path); 10962306a36Sopenharmony_ci return ret; 11062306a36Sopenharmony_ci} 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci/** 11362306a36Sopenharmony_ci * mext_folio_double_lock - Grab and lock folio on both @inode1 and @inode2 11462306a36Sopenharmony_ci * 11562306a36Sopenharmony_ci * @inode1: the inode structure 11662306a36Sopenharmony_ci * @inode2: the inode structure 11762306a36Sopenharmony_ci * @index1: folio index 11862306a36Sopenharmony_ci * @index2: folio index 11962306a36Sopenharmony_ci * @folio: result folio vector 12062306a36Sopenharmony_ci * 12162306a36Sopenharmony_ci * Grab two locked folio for inode's by inode order 12262306a36Sopenharmony_ci */ 12362306a36Sopenharmony_cistatic int 12462306a36Sopenharmony_cimext_folio_double_lock(struct inode *inode1, struct inode *inode2, 12562306a36Sopenharmony_ci pgoff_t index1, pgoff_t index2, struct folio *folio[2]) 12662306a36Sopenharmony_ci{ 12762306a36Sopenharmony_ci struct address_space *mapping[2]; 12862306a36Sopenharmony_ci unsigned int flags; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci BUG_ON(!inode1 || !inode2); 13162306a36Sopenharmony_ci if (inode1 < inode2) { 13262306a36Sopenharmony_ci mapping[0] = inode1->i_mapping; 13362306a36Sopenharmony_ci mapping[1] = inode2->i_mapping; 13462306a36Sopenharmony_ci } else { 13562306a36Sopenharmony_ci swap(index1, index2); 13662306a36Sopenharmony_ci mapping[0] = inode2->i_mapping; 13762306a36Sopenharmony_ci mapping[1] = inode1->i_mapping; 13862306a36Sopenharmony_ci } 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci flags = memalloc_nofs_save(); 14162306a36Sopenharmony_ci folio[0] = __filemap_get_folio(mapping[0], index1, FGP_WRITEBEGIN, 14262306a36Sopenharmony_ci mapping_gfp_mask(mapping[0])); 14362306a36Sopenharmony_ci if (IS_ERR(folio[0])) { 14462306a36Sopenharmony_ci memalloc_nofs_restore(flags); 14562306a36Sopenharmony_ci return PTR_ERR(folio[0]); 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci folio[1] = __filemap_get_folio(mapping[1], index2, FGP_WRITEBEGIN, 14962306a36Sopenharmony_ci mapping_gfp_mask(mapping[1])); 15062306a36Sopenharmony_ci memalloc_nofs_restore(flags); 15162306a36Sopenharmony_ci if (IS_ERR(folio[1])) { 15262306a36Sopenharmony_ci folio_unlock(folio[0]); 15362306a36Sopenharmony_ci folio_put(folio[0]); 15462306a36Sopenharmony_ci return PTR_ERR(folio[1]); 15562306a36Sopenharmony_ci } 15662306a36Sopenharmony_ci /* 15762306a36Sopenharmony_ci * __filemap_get_folio() may not wait on folio's writeback if 15862306a36Sopenharmony_ci * BDI not demand that. But it is reasonable to be very conservative 15962306a36Sopenharmony_ci * here and explicitly wait on folio's writeback 16062306a36Sopenharmony_ci */ 16162306a36Sopenharmony_ci folio_wait_writeback(folio[0]); 16262306a36Sopenharmony_ci folio_wait_writeback(folio[1]); 16362306a36Sopenharmony_ci if (inode1 > inode2) 16462306a36Sopenharmony_ci swap(folio[0], folio[1]); 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci return 0; 16762306a36Sopenharmony_ci} 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci/* Force page buffers uptodate w/o dropping page's lock */ 17062306a36Sopenharmony_cistatic int 17162306a36Sopenharmony_cimext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to) 17262306a36Sopenharmony_ci{ 17362306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 17462306a36Sopenharmony_ci sector_t block; 17562306a36Sopenharmony_ci struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 17662306a36Sopenharmony_ci unsigned int blocksize, block_start, block_end; 17762306a36Sopenharmony_ci int i, err, nr = 0, partial = 0; 17862306a36Sopenharmony_ci BUG_ON(!folio_test_locked(folio)); 17962306a36Sopenharmony_ci BUG_ON(folio_test_writeback(folio)); 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci if (folio_test_uptodate(folio)) 18262306a36Sopenharmony_ci return 0; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci blocksize = i_blocksize(inode); 18562306a36Sopenharmony_ci head = folio_buffers(folio); 18662306a36Sopenharmony_ci if (!head) { 18762306a36Sopenharmony_ci create_empty_buffers(&folio->page, blocksize, 0); 18862306a36Sopenharmony_ci head = folio_buffers(folio); 18962306a36Sopenharmony_ci } 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits); 19262306a36Sopenharmony_ci for (bh = head, block_start = 0; bh != head || !block_start; 19362306a36Sopenharmony_ci block++, block_start = block_end, bh = bh->b_this_page) { 19462306a36Sopenharmony_ci block_end = block_start + blocksize; 19562306a36Sopenharmony_ci if (block_end <= from || block_start >= to) { 19662306a36Sopenharmony_ci if (!buffer_uptodate(bh)) 19762306a36Sopenharmony_ci partial = 1; 19862306a36Sopenharmony_ci continue; 19962306a36Sopenharmony_ci } 20062306a36Sopenharmony_ci if (buffer_uptodate(bh)) 20162306a36Sopenharmony_ci continue; 20262306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 20362306a36Sopenharmony_ci err = ext4_get_block(inode, block, bh, 0); 20462306a36Sopenharmony_ci if (err) { 20562306a36Sopenharmony_ci folio_set_error(folio); 20662306a36Sopenharmony_ci return err; 20762306a36Sopenharmony_ci } 20862306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 20962306a36Sopenharmony_ci folio_zero_range(folio, block_start, blocksize); 21062306a36Sopenharmony_ci set_buffer_uptodate(bh); 21162306a36Sopenharmony_ci continue; 21262306a36Sopenharmony_ci } 21362306a36Sopenharmony_ci } 21462306a36Sopenharmony_ci BUG_ON(nr >= MAX_BUF_PER_PAGE); 21562306a36Sopenharmony_ci arr[nr++] = bh; 21662306a36Sopenharmony_ci } 21762306a36Sopenharmony_ci /* No io required */ 21862306a36Sopenharmony_ci if (!nr) 21962306a36Sopenharmony_ci goto out; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 22262306a36Sopenharmony_ci bh = arr[i]; 22362306a36Sopenharmony_ci if (!bh_uptodate_or_lock(bh)) { 22462306a36Sopenharmony_ci err = ext4_read_bh(bh, 0, NULL); 22562306a36Sopenharmony_ci if (err) 22662306a36Sopenharmony_ci return err; 22762306a36Sopenharmony_ci } 22862306a36Sopenharmony_ci } 22962306a36Sopenharmony_ciout: 23062306a36Sopenharmony_ci if (!partial) 23162306a36Sopenharmony_ci folio_mark_uptodate(folio); 23262306a36Sopenharmony_ci return 0; 23362306a36Sopenharmony_ci} 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci/** 23662306a36Sopenharmony_ci * move_extent_per_page - Move extent data per page 23762306a36Sopenharmony_ci * 23862306a36Sopenharmony_ci * @o_filp: file structure of original file 23962306a36Sopenharmony_ci * @donor_inode: donor inode 24062306a36Sopenharmony_ci * @orig_page_offset: page index on original file 24162306a36Sopenharmony_ci * @donor_page_offset: page index on donor file 24262306a36Sopenharmony_ci * @data_offset_in_page: block index where data swapping starts 24362306a36Sopenharmony_ci * @block_len_in_page: the number of blocks to be swapped 24462306a36Sopenharmony_ci * @unwritten: orig extent is unwritten or not 24562306a36Sopenharmony_ci * @err: pointer to save return value 24662306a36Sopenharmony_ci * 24762306a36Sopenharmony_ci * Save the data in original inode blocks and replace original inode extents 24862306a36Sopenharmony_ci * with donor inode extents by calling ext4_swap_extents(). 24962306a36Sopenharmony_ci * Finally, write out the saved data in new original inode blocks. Return 25062306a36Sopenharmony_ci * replaced block count. 25162306a36Sopenharmony_ci */ 25262306a36Sopenharmony_cistatic int 25362306a36Sopenharmony_cimove_extent_per_page(struct file *o_filp, struct inode *donor_inode, 25462306a36Sopenharmony_ci pgoff_t orig_page_offset, pgoff_t donor_page_offset, 25562306a36Sopenharmony_ci int data_offset_in_page, 25662306a36Sopenharmony_ci int block_len_in_page, int unwritten, int *err) 25762306a36Sopenharmony_ci{ 25862306a36Sopenharmony_ci struct inode *orig_inode = file_inode(o_filp); 25962306a36Sopenharmony_ci struct folio *folio[2] = {NULL, NULL}; 26062306a36Sopenharmony_ci handle_t *handle; 26162306a36Sopenharmony_ci ext4_lblk_t orig_blk_offset, donor_blk_offset; 26262306a36Sopenharmony_ci unsigned long blocksize = orig_inode->i_sb->s_blocksize; 26362306a36Sopenharmony_ci unsigned int tmp_data_size, data_size, replaced_size; 26462306a36Sopenharmony_ci int i, err2, jblocks, retries = 0; 26562306a36Sopenharmony_ci int replaced_count = 0; 26662306a36Sopenharmony_ci int from = data_offset_in_page << orig_inode->i_blkbits; 26762306a36Sopenharmony_ci int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits; 26862306a36Sopenharmony_ci struct super_block *sb = orig_inode->i_sb; 26962306a36Sopenharmony_ci struct buffer_head *bh = NULL; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci /* 27262306a36Sopenharmony_ci * It needs twice the amount of ordinary journal buffers because 27362306a36Sopenharmony_ci * inode and donor_inode may change each different metadata blocks. 27462306a36Sopenharmony_ci */ 27562306a36Sopenharmony_ciagain: 27662306a36Sopenharmony_ci *err = 0; 27762306a36Sopenharmony_ci jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 27862306a36Sopenharmony_ci handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks); 27962306a36Sopenharmony_ci if (IS_ERR(handle)) { 28062306a36Sopenharmony_ci *err = PTR_ERR(handle); 28162306a36Sopenharmony_ci return 0; 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci orig_blk_offset = orig_page_offset * blocks_per_page + 28562306a36Sopenharmony_ci data_offset_in_page; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci donor_blk_offset = donor_page_offset * blocks_per_page + 28862306a36Sopenharmony_ci data_offset_in_page; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci /* Calculate data_size */ 29162306a36Sopenharmony_ci if ((orig_blk_offset + block_len_in_page - 1) == 29262306a36Sopenharmony_ci ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { 29362306a36Sopenharmony_ci /* Replace the last block */ 29462306a36Sopenharmony_ci tmp_data_size = orig_inode->i_size & (blocksize - 1); 29562306a36Sopenharmony_ci /* 29662306a36Sopenharmony_ci * If data_size equal zero, it shows data_size is multiples of 29762306a36Sopenharmony_ci * blocksize. So we set appropriate value. 29862306a36Sopenharmony_ci */ 29962306a36Sopenharmony_ci if (tmp_data_size == 0) 30062306a36Sopenharmony_ci tmp_data_size = blocksize; 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci data_size = tmp_data_size + 30362306a36Sopenharmony_ci ((block_len_in_page - 1) << orig_inode->i_blkbits); 30462306a36Sopenharmony_ci } else 30562306a36Sopenharmony_ci data_size = block_len_in_page << orig_inode->i_blkbits; 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci replaced_size = data_size; 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci *err = mext_folio_double_lock(orig_inode, donor_inode, orig_page_offset, 31062306a36Sopenharmony_ci donor_page_offset, folio); 31162306a36Sopenharmony_ci if (unlikely(*err < 0)) 31262306a36Sopenharmony_ci goto stop_journal; 31362306a36Sopenharmony_ci /* 31462306a36Sopenharmony_ci * If orig extent was unwritten it can become initialized 31562306a36Sopenharmony_ci * at any time after i_data_sem was dropped, in order to 31662306a36Sopenharmony_ci * serialize with delalloc we have recheck extent while we 31762306a36Sopenharmony_ci * hold page's lock, if it is still the case data copy is not 31862306a36Sopenharmony_ci * necessary, just swap data blocks between orig and donor. 31962306a36Sopenharmony_ci */ 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci VM_BUG_ON_FOLIO(folio_test_large(folio[0]), folio[0]); 32262306a36Sopenharmony_ci VM_BUG_ON_FOLIO(folio_test_large(folio[1]), folio[1]); 32362306a36Sopenharmony_ci VM_BUG_ON_FOLIO(folio_nr_pages(folio[0]) != folio_nr_pages(folio[1]), folio[1]); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci if (unwritten) { 32662306a36Sopenharmony_ci ext4_double_down_write_data_sem(orig_inode, donor_inode); 32762306a36Sopenharmony_ci /* If any of extents in range became initialized we have to 32862306a36Sopenharmony_ci * fallback to data copying */ 32962306a36Sopenharmony_ci unwritten = mext_check_coverage(orig_inode, orig_blk_offset, 33062306a36Sopenharmony_ci block_len_in_page, 1, err); 33162306a36Sopenharmony_ci if (*err) 33262306a36Sopenharmony_ci goto drop_data_sem; 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci unwritten &= mext_check_coverage(donor_inode, donor_blk_offset, 33562306a36Sopenharmony_ci block_len_in_page, 1, err); 33662306a36Sopenharmony_ci if (*err) 33762306a36Sopenharmony_ci goto drop_data_sem; 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci if (!unwritten) { 34062306a36Sopenharmony_ci ext4_double_up_write_data_sem(orig_inode, donor_inode); 34162306a36Sopenharmony_ci goto data_copy; 34262306a36Sopenharmony_ci } 34362306a36Sopenharmony_ci if (!filemap_release_folio(folio[0], 0) || 34462306a36Sopenharmony_ci !filemap_release_folio(folio[1], 0)) { 34562306a36Sopenharmony_ci *err = -EBUSY; 34662306a36Sopenharmony_ci goto drop_data_sem; 34762306a36Sopenharmony_ci } 34862306a36Sopenharmony_ci replaced_count = ext4_swap_extents(handle, orig_inode, 34962306a36Sopenharmony_ci donor_inode, orig_blk_offset, 35062306a36Sopenharmony_ci donor_blk_offset, 35162306a36Sopenharmony_ci block_len_in_page, 1, err); 35262306a36Sopenharmony_ci drop_data_sem: 35362306a36Sopenharmony_ci ext4_double_up_write_data_sem(orig_inode, donor_inode); 35462306a36Sopenharmony_ci goto unlock_folios; 35562306a36Sopenharmony_ci } 35662306a36Sopenharmony_cidata_copy: 35762306a36Sopenharmony_ci *err = mext_page_mkuptodate(folio[0], from, from + replaced_size); 35862306a36Sopenharmony_ci if (*err) 35962306a36Sopenharmony_ci goto unlock_folios; 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci /* At this point all buffers in range are uptodate, old mapping layout 36262306a36Sopenharmony_ci * is no longer required, try to drop it now. */ 36362306a36Sopenharmony_ci if (!filemap_release_folio(folio[0], 0) || 36462306a36Sopenharmony_ci !filemap_release_folio(folio[1], 0)) { 36562306a36Sopenharmony_ci *err = -EBUSY; 36662306a36Sopenharmony_ci goto unlock_folios; 36762306a36Sopenharmony_ci } 36862306a36Sopenharmony_ci ext4_double_down_write_data_sem(orig_inode, donor_inode); 36962306a36Sopenharmony_ci replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode, 37062306a36Sopenharmony_ci orig_blk_offset, donor_blk_offset, 37162306a36Sopenharmony_ci block_len_in_page, 1, err); 37262306a36Sopenharmony_ci ext4_double_up_write_data_sem(orig_inode, donor_inode); 37362306a36Sopenharmony_ci if (*err) { 37462306a36Sopenharmony_ci if (replaced_count) { 37562306a36Sopenharmony_ci block_len_in_page = replaced_count; 37662306a36Sopenharmony_ci replaced_size = 37762306a36Sopenharmony_ci block_len_in_page << orig_inode->i_blkbits; 37862306a36Sopenharmony_ci } else 37962306a36Sopenharmony_ci goto unlock_folios; 38062306a36Sopenharmony_ci } 38162306a36Sopenharmony_ci /* Perform all necessary steps similar write_begin()/write_end() 38262306a36Sopenharmony_ci * but keeping in mind that i_size will not change */ 38362306a36Sopenharmony_ci if (!folio_buffers(folio[0])) 38462306a36Sopenharmony_ci create_empty_buffers(&folio[0]->page, 1 << orig_inode->i_blkbits, 0); 38562306a36Sopenharmony_ci bh = folio_buffers(folio[0]); 38662306a36Sopenharmony_ci for (i = 0; i < data_offset_in_page; i++) 38762306a36Sopenharmony_ci bh = bh->b_this_page; 38862306a36Sopenharmony_ci for (i = 0; i < block_len_in_page; i++) { 38962306a36Sopenharmony_ci *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0); 39062306a36Sopenharmony_ci if (*err < 0) 39162306a36Sopenharmony_ci goto repair_branches; 39262306a36Sopenharmony_ci bh = bh->b_this_page; 39362306a36Sopenharmony_ci } 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci block_commit_write(&folio[0]->page, from, from + replaced_size); 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci /* Even in case of data=writeback it is reasonable to pin 39862306a36Sopenharmony_ci * inode to transaction, to prevent unexpected data loss */ 39962306a36Sopenharmony_ci *err = ext4_jbd2_inode_add_write(handle, orig_inode, 40062306a36Sopenharmony_ci (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size); 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ciunlock_folios: 40362306a36Sopenharmony_ci folio_unlock(folio[0]); 40462306a36Sopenharmony_ci folio_put(folio[0]); 40562306a36Sopenharmony_ci folio_unlock(folio[1]); 40662306a36Sopenharmony_ci folio_put(folio[1]); 40762306a36Sopenharmony_cistop_journal: 40862306a36Sopenharmony_ci ext4_journal_stop(handle); 40962306a36Sopenharmony_ci if (*err == -ENOSPC && 41062306a36Sopenharmony_ci ext4_should_retry_alloc(sb, &retries)) 41162306a36Sopenharmony_ci goto again; 41262306a36Sopenharmony_ci /* Buffer was busy because probably is pinned to journal transaction, 41362306a36Sopenharmony_ci * force transaction commit may help to free it. */ 41462306a36Sopenharmony_ci if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal && 41562306a36Sopenharmony_ci jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal)) 41662306a36Sopenharmony_ci goto again; 41762306a36Sopenharmony_ci return replaced_count; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_cirepair_branches: 42062306a36Sopenharmony_ci /* 42162306a36Sopenharmony_ci * This should never ever happen! 42262306a36Sopenharmony_ci * Extents are swapped already, but we are not able to copy data. 42362306a36Sopenharmony_ci * Try to swap extents to it's original places 42462306a36Sopenharmony_ci */ 42562306a36Sopenharmony_ci ext4_double_down_write_data_sem(orig_inode, donor_inode); 42662306a36Sopenharmony_ci replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode, 42762306a36Sopenharmony_ci orig_blk_offset, donor_blk_offset, 42862306a36Sopenharmony_ci block_len_in_page, 0, &err2); 42962306a36Sopenharmony_ci ext4_double_up_write_data_sem(orig_inode, donor_inode); 43062306a36Sopenharmony_ci if (replaced_count != block_len_in_page) { 43162306a36Sopenharmony_ci ext4_error_inode_block(orig_inode, (sector_t)(orig_blk_offset), 43262306a36Sopenharmony_ci EIO, "Unable to copy data block," 43362306a36Sopenharmony_ci " data will be lost."); 43462306a36Sopenharmony_ci *err = -EIO; 43562306a36Sopenharmony_ci } 43662306a36Sopenharmony_ci replaced_count = 0; 43762306a36Sopenharmony_ci goto unlock_folios; 43862306a36Sopenharmony_ci} 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci/** 44162306a36Sopenharmony_ci * mext_check_arguments - Check whether move extent can be done 44262306a36Sopenharmony_ci * 44362306a36Sopenharmony_ci * @orig_inode: original inode 44462306a36Sopenharmony_ci * @donor_inode: donor inode 44562306a36Sopenharmony_ci * @orig_start: logical start offset in block for orig 44662306a36Sopenharmony_ci * @donor_start: logical start offset in block for donor 44762306a36Sopenharmony_ci * @len: the number of blocks to be moved 44862306a36Sopenharmony_ci * 44962306a36Sopenharmony_ci * Check the arguments of ext4_move_extents() whether the files can be 45062306a36Sopenharmony_ci * exchanged with each other. 45162306a36Sopenharmony_ci * Return 0 on success, or a negative error value on failure. 45262306a36Sopenharmony_ci */ 45362306a36Sopenharmony_cistatic int 45462306a36Sopenharmony_cimext_check_arguments(struct inode *orig_inode, 45562306a36Sopenharmony_ci struct inode *donor_inode, __u64 orig_start, 45662306a36Sopenharmony_ci __u64 donor_start, __u64 *len) 45762306a36Sopenharmony_ci{ 45862306a36Sopenharmony_ci __u64 orig_eof, donor_eof; 45962306a36Sopenharmony_ci unsigned int blkbits = orig_inode->i_blkbits; 46062306a36Sopenharmony_ci unsigned int blocksize = 1 << blkbits; 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits; 46362306a36Sopenharmony_ci donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits; 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { 46762306a36Sopenharmony_ci ext4_debug("ext4 move extent: suid or sgid is set" 46862306a36Sopenharmony_ci " to donor file [ino:orig %lu, donor %lu]\n", 46962306a36Sopenharmony_ci orig_inode->i_ino, donor_inode->i_ino); 47062306a36Sopenharmony_ci return -EINVAL; 47162306a36Sopenharmony_ci } 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode)) 47462306a36Sopenharmony_ci return -EPERM; 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci /* Ext4 move extent does not support swap files */ 47762306a36Sopenharmony_ci if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { 47862306a36Sopenharmony_ci ext4_debug("ext4 move extent: The argument files should not be swap files [ino:orig %lu, donor %lu]\n", 47962306a36Sopenharmony_ci orig_inode->i_ino, donor_inode->i_ino); 48062306a36Sopenharmony_ci return -ETXTBSY; 48162306a36Sopenharmony_ci } 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) { 48462306a36Sopenharmony_ci ext4_debug("ext4 move extent: The argument files should not be quota files [ino:orig %lu, donor %lu]\n", 48562306a36Sopenharmony_ci orig_inode->i_ino, donor_inode->i_ino); 48662306a36Sopenharmony_ci return -EOPNOTSUPP; 48762306a36Sopenharmony_ci } 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci /* Ext4 move extent supports only extent based file */ 49062306a36Sopenharmony_ci if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { 49162306a36Sopenharmony_ci ext4_debug("ext4 move extent: orig file is not extents " 49262306a36Sopenharmony_ci "based file [ino:orig %lu]\n", orig_inode->i_ino); 49362306a36Sopenharmony_ci return -EOPNOTSUPP; 49462306a36Sopenharmony_ci } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) { 49562306a36Sopenharmony_ci ext4_debug("ext4 move extent: donor file is not extents " 49662306a36Sopenharmony_ci "based file [ino:donor %lu]\n", donor_inode->i_ino); 49762306a36Sopenharmony_ci return -EOPNOTSUPP; 49862306a36Sopenharmony_ci } 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci if ((!orig_inode->i_size) || (!donor_inode->i_size)) { 50162306a36Sopenharmony_ci ext4_debug("ext4 move extent: File size is 0 byte\n"); 50262306a36Sopenharmony_ci return -EINVAL; 50362306a36Sopenharmony_ci } 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci /* Start offset should be same */ 50662306a36Sopenharmony_ci if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) != 50762306a36Sopenharmony_ci (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) { 50862306a36Sopenharmony_ci ext4_debug("ext4 move extent: orig and donor's start " 50962306a36Sopenharmony_ci "offsets are not aligned [ino:orig %lu, donor %lu]\n", 51062306a36Sopenharmony_ci orig_inode->i_ino, donor_inode->i_ino); 51162306a36Sopenharmony_ci return -EINVAL; 51262306a36Sopenharmony_ci } 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci if ((orig_start >= EXT_MAX_BLOCKS) || 51562306a36Sopenharmony_ci (donor_start >= EXT_MAX_BLOCKS) || 51662306a36Sopenharmony_ci (*len > EXT_MAX_BLOCKS) || 51762306a36Sopenharmony_ci (donor_start + *len >= EXT_MAX_BLOCKS) || 51862306a36Sopenharmony_ci (orig_start + *len >= EXT_MAX_BLOCKS)) { 51962306a36Sopenharmony_ci ext4_debug("ext4 move extent: Can't handle over [%u] blocks " 52062306a36Sopenharmony_ci "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, 52162306a36Sopenharmony_ci orig_inode->i_ino, donor_inode->i_ino); 52262306a36Sopenharmony_ci return -EINVAL; 52362306a36Sopenharmony_ci } 52462306a36Sopenharmony_ci if (orig_eof <= orig_start) 52562306a36Sopenharmony_ci *len = 0; 52662306a36Sopenharmony_ci else if (orig_eof < orig_start + *len - 1) 52762306a36Sopenharmony_ci *len = orig_eof - orig_start; 52862306a36Sopenharmony_ci if (donor_eof <= donor_start) 52962306a36Sopenharmony_ci *len = 0; 53062306a36Sopenharmony_ci else if (donor_eof < donor_start + *len - 1) 53162306a36Sopenharmony_ci *len = donor_eof - donor_start; 53262306a36Sopenharmony_ci if (!*len) { 53362306a36Sopenharmony_ci ext4_debug("ext4 move extent: len should not be 0 " 53462306a36Sopenharmony_ci "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, 53562306a36Sopenharmony_ci donor_inode->i_ino); 53662306a36Sopenharmony_ci return -EINVAL; 53762306a36Sopenharmony_ci } 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci return 0; 54062306a36Sopenharmony_ci} 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci/** 54362306a36Sopenharmony_ci * ext4_move_extents - Exchange the specified range of a file 54462306a36Sopenharmony_ci * 54562306a36Sopenharmony_ci * @o_filp: file structure of the original file 54662306a36Sopenharmony_ci * @d_filp: file structure of the donor file 54762306a36Sopenharmony_ci * @orig_blk: start offset in block for orig 54862306a36Sopenharmony_ci * @donor_blk: start offset in block for donor 54962306a36Sopenharmony_ci * @len: the number of blocks to be moved 55062306a36Sopenharmony_ci * @moved_len: moved block length 55162306a36Sopenharmony_ci * 55262306a36Sopenharmony_ci * This function returns 0 and moved block length is set in moved_len 55362306a36Sopenharmony_ci * if succeed, otherwise returns error value. 55462306a36Sopenharmony_ci * 55562306a36Sopenharmony_ci */ 55662306a36Sopenharmony_ciint 55762306a36Sopenharmony_ciext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, 55862306a36Sopenharmony_ci __u64 donor_blk, __u64 len, __u64 *moved_len) 55962306a36Sopenharmony_ci{ 56062306a36Sopenharmony_ci struct inode *orig_inode = file_inode(o_filp); 56162306a36Sopenharmony_ci struct inode *donor_inode = file_inode(d_filp); 56262306a36Sopenharmony_ci struct ext4_ext_path *path = NULL; 56362306a36Sopenharmony_ci int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits; 56462306a36Sopenharmony_ci ext4_lblk_t o_end, o_start = orig_blk; 56562306a36Sopenharmony_ci ext4_lblk_t d_start = donor_blk; 56662306a36Sopenharmony_ci int ret; 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci if (orig_inode->i_sb != donor_inode->i_sb) { 56962306a36Sopenharmony_ci ext4_debug("ext4 move extent: The argument files " 57062306a36Sopenharmony_ci "should be in same FS [ino:orig %lu, donor %lu]\n", 57162306a36Sopenharmony_ci orig_inode->i_ino, donor_inode->i_ino); 57262306a36Sopenharmony_ci return -EINVAL; 57362306a36Sopenharmony_ci } 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci /* orig and donor should be different inodes */ 57662306a36Sopenharmony_ci if (orig_inode == donor_inode) { 57762306a36Sopenharmony_ci ext4_debug("ext4 move extent: The argument files should not " 57862306a36Sopenharmony_ci "be same inode [ino:orig %lu, donor %lu]\n", 57962306a36Sopenharmony_ci orig_inode->i_ino, donor_inode->i_ino); 58062306a36Sopenharmony_ci return -EINVAL; 58162306a36Sopenharmony_ci } 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci /* Regular file check */ 58462306a36Sopenharmony_ci if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { 58562306a36Sopenharmony_ci ext4_debug("ext4 move extent: The argument files should be " 58662306a36Sopenharmony_ci "regular file [ino:orig %lu, donor %lu]\n", 58762306a36Sopenharmony_ci orig_inode->i_ino, donor_inode->i_ino); 58862306a36Sopenharmony_ci return -EINVAL; 58962306a36Sopenharmony_ci } 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci /* TODO: it's not obvious how to swap blocks for inodes with full 59262306a36Sopenharmony_ci journaling enabled */ 59362306a36Sopenharmony_ci if (ext4_should_journal_data(orig_inode) || 59462306a36Sopenharmony_ci ext4_should_journal_data(donor_inode)) { 59562306a36Sopenharmony_ci ext4_msg(orig_inode->i_sb, KERN_ERR, 59662306a36Sopenharmony_ci "Online defrag not supported with data journaling"); 59762306a36Sopenharmony_ci return -EOPNOTSUPP; 59862306a36Sopenharmony_ci } 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci if (IS_ENCRYPTED(orig_inode) || IS_ENCRYPTED(donor_inode)) { 60162306a36Sopenharmony_ci ext4_msg(orig_inode->i_sb, KERN_ERR, 60262306a36Sopenharmony_ci "Online defrag not supported for encrypted files"); 60362306a36Sopenharmony_ci return -EOPNOTSUPP; 60462306a36Sopenharmony_ci } 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci /* Protect orig and donor inodes against a truncate */ 60762306a36Sopenharmony_ci lock_two_nondirectories(orig_inode, donor_inode); 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci /* Wait for all existing dio workers */ 61062306a36Sopenharmony_ci inode_dio_wait(orig_inode); 61162306a36Sopenharmony_ci inode_dio_wait(donor_inode); 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci /* Protect extent tree against block allocations via delalloc */ 61462306a36Sopenharmony_ci ext4_double_down_write_data_sem(orig_inode, donor_inode); 61562306a36Sopenharmony_ci /* Check the filesystem environment whether move_extent can be done */ 61662306a36Sopenharmony_ci ret = mext_check_arguments(orig_inode, donor_inode, orig_blk, 61762306a36Sopenharmony_ci donor_blk, &len); 61862306a36Sopenharmony_ci if (ret) 61962306a36Sopenharmony_ci goto out; 62062306a36Sopenharmony_ci o_end = o_start + len; 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ci *moved_len = 0; 62362306a36Sopenharmony_ci while (o_start < o_end) { 62462306a36Sopenharmony_ci struct ext4_extent *ex; 62562306a36Sopenharmony_ci ext4_lblk_t cur_blk, next_blk; 62662306a36Sopenharmony_ci pgoff_t orig_page_index, donor_page_index; 62762306a36Sopenharmony_ci int offset_in_page; 62862306a36Sopenharmony_ci int unwritten, cur_len; 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci ret = get_ext_path(orig_inode, o_start, &path); 63162306a36Sopenharmony_ci if (ret) 63262306a36Sopenharmony_ci goto out; 63362306a36Sopenharmony_ci ex = path[path->p_depth].p_ext; 63462306a36Sopenharmony_ci cur_blk = le32_to_cpu(ex->ee_block); 63562306a36Sopenharmony_ci cur_len = ext4_ext_get_actual_len(ex); 63662306a36Sopenharmony_ci /* Check hole before the start pos */ 63762306a36Sopenharmony_ci if (cur_blk + cur_len - 1 < o_start) { 63862306a36Sopenharmony_ci next_blk = ext4_ext_next_allocated_block(path); 63962306a36Sopenharmony_ci if (next_blk == EXT_MAX_BLOCKS) { 64062306a36Sopenharmony_ci ret = -ENODATA; 64162306a36Sopenharmony_ci goto out; 64262306a36Sopenharmony_ci } 64362306a36Sopenharmony_ci d_start += next_blk - o_start; 64462306a36Sopenharmony_ci o_start = next_blk; 64562306a36Sopenharmony_ci continue; 64662306a36Sopenharmony_ci /* Check hole after the start pos */ 64762306a36Sopenharmony_ci } else if (cur_blk > o_start) { 64862306a36Sopenharmony_ci /* Skip hole */ 64962306a36Sopenharmony_ci d_start += cur_blk - o_start; 65062306a36Sopenharmony_ci o_start = cur_blk; 65162306a36Sopenharmony_ci /* Extent inside requested range ?*/ 65262306a36Sopenharmony_ci if (cur_blk >= o_end) 65362306a36Sopenharmony_ci goto out; 65462306a36Sopenharmony_ci } else { /* in_range(o_start, o_blk, o_len) */ 65562306a36Sopenharmony_ci cur_len += cur_blk - o_start; 65662306a36Sopenharmony_ci } 65762306a36Sopenharmony_ci unwritten = ext4_ext_is_unwritten(ex); 65862306a36Sopenharmony_ci if (o_end - o_start < cur_len) 65962306a36Sopenharmony_ci cur_len = o_end - o_start; 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci orig_page_index = o_start >> (PAGE_SHIFT - 66262306a36Sopenharmony_ci orig_inode->i_blkbits); 66362306a36Sopenharmony_ci donor_page_index = d_start >> (PAGE_SHIFT - 66462306a36Sopenharmony_ci donor_inode->i_blkbits); 66562306a36Sopenharmony_ci offset_in_page = o_start % blocks_per_page; 66662306a36Sopenharmony_ci if (cur_len > blocks_per_page - offset_in_page) 66762306a36Sopenharmony_ci cur_len = blocks_per_page - offset_in_page; 66862306a36Sopenharmony_ci /* 66962306a36Sopenharmony_ci * Up semaphore to avoid following problems: 67062306a36Sopenharmony_ci * a. transaction deadlock among ext4_journal_start, 67162306a36Sopenharmony_ci * ->write_begin via pagefault, and jbd2_journal_commit 67262306a36Sopenharmony_ci * b. racing with ->read_folio, ->write_begin, and 67362306a36Sopenharmony_ci * ext4_get_block in move_extent_per_page 67462306a36Sopenharmony_ci */ 67562306a36Sopenharmony_ci ext4_double_up_write_data_sem(orig_inode, donor_inode); 67662306a36Sopenharmony_ci /* Swap original branches with new branches */ 67762306a36Sopenharmony_ci *moved_len += move_extent_per_page(o_filp, donor_inode, 67862306a36Sopenharmony_ci orig_page_index, donor_page_index, 67962306a36Sopenharmony_ci offset_in_page, cur_len, 68062306a36Sopenharmony_ci unwritten, &ret); 68162306a36Sopenharmony_ci ext4_double_down_write_data_sem(orig_inode, donor_inode); 68262306a36Sopenharmony_ci if (ret < 0) 68362306a36Sopenharmony_ci break; 68462306a36Sopenharmony_ci o_start += cur_len; 68562306a36Sopenharmony_ci d_start += cur_len; 68662306a36Sopenharmony_ci } 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ciout: 68962306a36Sopenharmony_ci if (*moved_len) { 69062306a36Sopenharmony_ci ext4_discard_preallocations(orig_inode, 0); 69162306a36Sopenharmony_ci ext4_discard_preallocations(donor_inode, 0); 69262306a36Sopenharmony_ci } 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci ext4_free_ext_path(path); 69562306a36Sopenharmony_ci ext4_double_up_write_data_sem(orig_inode, donor_inode); 69662306a36Sopenharmony_ci unlock_two_nondirectories(orig_inode, donor_inode); 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci return ret; 69962306a36Sopenharmony_ci} 700