18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2007 Oracle. All rights reserved. 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci#include <linux/fs.h> 78c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 88c2ecf20Sopenharmony_ci#include <linux/time.h> 98c2ecf20Sopenharmony_ci#include <linux/init.h> 108c2ecf20Sopenharmony_ci#include <linux/string.h> 118c2ecf20Sopenharmony_ci#include <linux/backing-dev.h> 128c2ecf20Sopenharmony_ci#include <linux/falloc.h> 138c2ecf20Sopenharmony_ci#include <linux/writeback.h> 148c2ecf20Sopenharmony_ci#include <linux/compat.h> 158c2ecf20Sopenharmony_ci#include <linux/slab.h> 168c2ecf20Sopenharmony_ci#include <linux/btrfs.h> 178c2ecf20Sopenharmony_ci#include <linux/uio.h> 188c2ecf20Sopenharmony_ci#include <linux/iversion.h> 198c2ecf20Sopenharmony_ci#include "ctree.h" 208c2ecf20Sopenharmony_ci#include "disk-io.h" 218c2ecf20Sopenharmony_ci#include "transaction.h" 228c2ecf20Sopenharmony_ci#include "btrfs_inode.h" 238c2ecf20Sopenharmony_ci#include "print-tree.h" 248c2ecf20Sopenharmony_ci#include "tree-log.h" 258c2ecf20Sopenharmony_ci#include "locking.h" 268c2ecf20Sopenharmony_ci#include "volumes.h" 278c2ecf20Sopenharmony_ci#include "qgroup.h" 288c2ecf20Sopenharmony_ci#include "compression.h" 298c2ecf20Sopenharmony_ci#include "delalloc-space.h" 308c2ecf20Sopenharmony_ci#include "reflink.h" 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_cistatic struct kmem_cache *btrfs_inode_defrag_cachep; 338c2ecf20Sopenharmony_ci/* 348c2ecf20Sopenharmony_ci * when auto defrag is enabled we 358c2ecf20Sopenharmony_ci * queue up these defrag structs to remember which 368c2ecf20Sopenharmony_ci * inodes need defragging passes 378c2ecf20Sopenharmony_ci */ 388c2ecf20Sopenharmony_cistruct inode_defrag { 398c2ecf20Sopenharmony_ci struct rb_node rb_node; 408c2ecf20Sopenharmony_ci /* objectid */ 418c2ecf20Sopenharmony_ci u64 ino; 428c2ecf20Sopenharmony_ci /* 438c2ecf20Sopenharmony_ci * transid where the defrag was added, we search for 448c2ecf20Sopenharmony_ci * extents newer than this 458c2ecf20Sopenharmony_ci */ 468c2ecf20Sopenharmony_ci u64 transid; 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci /* root objectid */ 498c2ecf20Sopenharmony_ci u64 root; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci /* last offset we were able to defrag */ 528c2ecf20Sopenharmony_ci u64 last_offset; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci /* if we've wrapped around back to zero once already */ 558c2ecf20Sopenharmony_ci int cycled; 568c2ecf20Sopenharmony_ci}; 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_cistatic int __compare_inode_defrag(struct inode_defrag *defrag1, 598c2ecf20Sopenharmony_ci struct inode_defrag *defrag2) 608c2ecf20Sopenharmony_ci{ 618c2ecf20Sopenharmony_ci if (defrag1->root > defrag2->root) 628c2ecf20Sopenharmony_ci return 1; 638c2ecf20Sopenharmony_ci else if (defrag1->root < defrag2->root) 648c2ecf20Sopenharmony_ci return -1; 658c2ecf20Sopenharmony_ci else if (defrag1->ino > defrag2->ino) 668c2ecf20Sopenharmony_ci return 1; 678c2ecf20Sopenharmony_ci else if (defrag1->ino < defrag2->ino) 688c2ecf20Sopenharmony_ci return -1; 698c2ecf20Sopenharmony_ci else 708c2ecf20Sopenharmony_ci return 0; 718c2ecf20Sopenharmony_ci} 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci/* pop a record for an inode into the defrag tree. The lock 748c2ecf20Sopenharmony_ci * must be held already 758c2ecf20Sopenharmony_ci * 768c2ecf20Sopenharmony_ci * If you're inserting a record for an older transid than an 778c2ecf20Sopenharmony_ci * existing record, the transid already in the tree is lowered 788c2ecf20Sopenharmony_ci * 798c2ecf20Sopenharmony_ci * If an existing record is found the defrag item you 808c2ecf20Sopenharmony_ci * pass in is freed 818c2ecf20Sopenharmony_ci */ 828c2ecf20Sopenharmony_cistatic int __btrfs_add_inode_defrag(struct btrfs_inode *inode, 838c2ecf20Sopenharmony_ci struct inode_defrag *defrag) 848c2ecf20Sopenharmony_ci{ 858c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 868c2ecf20Sopenharmony_ci struct inode_defrag *entry; 878c2ecf20Sopenharmony_ci struct rb_node **p; 888c2ecf20Sopenharmony_ci struct rb_node *parent = NULL; 898c2ecf20Sopenharmony_ci int ret; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci p = &fs_info->defrag_inodes.rb_node; 928c2ecf20Sopenharmony_ci while (*p) { 938c2ecf20Sopenharmony_ci parent = *p; 948c2ecf20Sopenharmony_ci entry = rb_entry(parent, struct inode_defrag, rb_node); 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci ret = __compare_inode_defrag(defrag, entry); 978c2ecf20Sopenharmony_ci if (ret < 0) 988c2ecf20Sopenharmony_ci p = &parent->rb_left; 998c2ecf20Sopenharmony_ci else if (ret > 0) 1008c2ecf20Sopenharmony_ci p = &parent->rb_right; 1018c2ecf20Sopenharmony_ci else { 1028c2ecf20Sopenharmony_ci /* if we're reinserting an entry for 1038c2ecf20Sopenharmony_ci * an old defrag run, make sure to 1048c2ecf20Sopenharmony_ci * lower the transid of our existing record 1058c2ecf20Sopenharmony_ci */ 1068c2ecf20Sopenharmony_ci if (defrag->transid < entry->transid) 1078c2ecf20Sopenharmony_ci entry->transid = defrag->transid; 1088c2ecf20Sopenharmony_ci if (defrag->last_offset > entry->last_offset) 1098c2ecf20Sopenharmony_ci entry->last_offset = defrag->last_offset; 1108c2ecf20Sopenharmony_ci return -EEXIST; 1118c2ecf20Sopenharmony_ci } 1128c2ecf20Sopenharmony_ci } 1138c2ecf20Sopenharmony_ci set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags); 1148c2ecf20Sopenharmony_ci rb_link_node(&defrag->rb_node, parent, p); 1158c2ecf20Sopenharmony_ci rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes); 1168c2ecf20Sopenharmony_ci return 0; 1178c2ecf20Sopenharmony_ci} 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_cistatic inline int __need_auto_defrag(struct btrfs_fs_info *fs_info) 1208c2ecf20Sopenharmony_ci{ 1218c2ecf20Sopenharmony_ci if (!btrfs_test_opt(fs_info, AUTO_DEFRAG)) 1228c2ecf20Sopenharmony_ci return 0; 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci if (btrfs_fs_closing(fs_info)) 1258c2ecf20Sopenharmony_ci return 0; 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci return 1; 1288c2ecf20Sopenharmony_ci} 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci/* 1318c2ecf20Sopenharmony_ci * insert a defrag record for this inode if auto defrag is 1328c2ecf20Sopenharmony_ci * enabled 1338c2ecf20Sopenharmony_ci */ 1348c2ecf20Sopenharmony_ciint btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, 1358c2ecf20Sopenharmony_ci struct btrfs_inode *inode) 1368c2ecf20Sopenharmony_ci{ 1378c2ecf20Sopenharmony_ci struct btrfs_root *root = inode->root; 1388c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 1398c2ecf20Sopenharmony_ci struct inode_defrag *defrag; 1408c2ecf20Sopenharmony_ci u64 transid; 1418c2ecf20Sopenharmony_ci int ret; 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci if (!__need_auto_defrag(fs_info)) 1448c2ecf20Sopenharmony_ci return 0; 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) 1478c2ecf20Sopenharmony_ci return 0; 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci if (trans) 1508c2ecf20Sopenharmony_ci transid = trans->transid; 1518c2ecf20Sopenharmony_ci else 1528c2ecf20Sopenharmony_ci transid = inode->root->last_trans; 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS); 1558c2ecf20Sopenharmony_ci if (!defrag) 1568c2ecf20Sopenharmony_ci return -ENOMEM; 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci defrag->ino = btrfs_ino(inode); 1598c2ecf20Sopenharmony_ci defrag->transid = transid; 1608c2ecf20Sopenharmony_ci defrag->root = root->root_key.objectid; 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci spin_lock(&fs_info->defrag_inodes_lock); 1638c2ecf20Sopenharmony_ci if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) { 1648c2ecf20Sopenharmony_ci /* 1658c2ecf20Sopenharmony_ci * If we set IN_DEFRAG flag and evict the inode from memory, 1668c2ecf20Sopenharmony_ci * and then re-read this inode, this new inode doesn't have 1678c2ecf20Sopenharmony_ci * IN_DEFRAG flag. At the case, we may find the existed defrag. 1688c2ecf20Sopenharmony_ci */ 1698c2ecf20Sopenharmony_ci ret = __btrfs_add_inode_defrag(inode, defrag); 1708c2ecf20Sopenharmony_ci if (ret) 1718c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 1728c2ecf20Sopenharmony_ci } else { 1738c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 1748c2ecf20Sopenharmony_ci } 1758c2ecf20Sopenharmony_ci spin_unlock(&fs_info->defrag_inodes_lock); 1768c2ecf20Sopenharmony_ci return 0; 1778c2ecf20Sopenharmony_ci} 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci/* 1808c2ecf20Sopenharmony_ci * Requeue the defrag object. If there is a defrag object that points to 1818c2ecf20Sopenharmony_ci * the same inode in the tree, we will merge them together (by 1828c2ecf20Sopenharmony_ci * __btrfs_add_inode_defrag()) and free the one that we want to requeue. 1838c2ecf20Sopenharmony_ci */ 1848c2ecf20Sopenharmony_cistatic void btrfs_requeue_inode_defrag(struct btrfs_inode *inode, 1858c2ecf20Sopenharmony_ci struct inode_defrag *defrag) 1868c2ecf20Sopenharmony_ci{ 1878c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 1888c2ecf20Sopenharmony_ci int ret; 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci if (!__need_auto_defrag(fs_info)) 1918c2ecf20Sopenharmony_ci goto out; 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci /* 1948c2ecf20Sopenharmony_ci * Here we don't check the IN_DEFRAG flag, because we need merge 1958c2ecf20Sopenharmony_ci * them together. 1968c2ecf20Sopenharmony_ci */ 1978c2ecf20Sopenharmony_ci spin_lock(&fs_info->defrag_inodes_lock); 1988c2ecf20Sopenharmony_ci ret = __btrfs_add_inode_defrag(inode, defrag); 1998c2ecf20Sopenharmony_ci spin_unlock(&fs_info->defrag_inodes_lock); 2008c2ecf20Sopenharmony_ci if (ret) 2018c2ecf20Sopenharmony_ci goto out; 2028c2ecf20Sopenharmony_ci return; 2038c2ecf20Sopenharmony_ciout: 2048c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 2058c2ecf20Sopenharmony_ci} 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci/* 2088c2ecf20Sopenharmony_ci * pick the defragable inode that we want, if it doesn't exist, we will get 2098c2ecf20Sopenharmony_ci * the next one. 2108c2ecf20Sopenharmony_ci */ 2118c2ecf20Sopenharmony_cistatic struct inode_defrag * 2128c2ecf20Sopenharmony_cibtrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino) 2138c2ecf20Sopenharmony_ci{ 2148c2ecf20Sopenharmony_ci struct inode_defrag *entry = NULL; 2158c2ecf20Sopenharmony_ci struct inode_defrag tmp; 2168c2ecf20Sopenharmony_ci struct rb_node *p; 2178c2ecf20Sopenharmony_ci struct rb_node *parent = NULL; 2188c2ecf20Sopenharmony_ci int ret; 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci tmp.ino = ino; 2218c2ecf20Sopenharmony_ci tmp.root = root; 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci spin_lock(&fs_info->defrag_inodes_lock); 2248c2ecf20Sopenharmony_ci p = fs_info->defrag_inodes.rb_node; 2258c2ecf20Sopenharmony_ci while (p) { 2268c2ecf20Sopenharmony_ci parent = p; 2278c2ecf20Sopenharmony_ci entry = rb_entry(parent, struct inode_defrag, rb_node); 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci ret = __compare_inode_defrag(&tmp, entry); 2308c2ecf20Sopenharmony_ci if (ret < 0) 2318c2ecf20Sopenharmony_ci p = parent->rb_left; 2328c2ecf20Sopenharmony_ci else if (ret > 0) 2338c2ecf20Sopenharmony_ci p = parent->rb_right; 2348c2ecf20Sopenharmony_ci else 2358c2ecf20Sopenharmony_ci goto out; 2368c2ecf20Sopenharmony_ci } 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci if (parent && __compare_inode_defrag(&tmp, entry) > 0) { 2398c2ecf20Sopenharmony_ci parent = rb_next(parent); 2408c2ecf20Sopenharmony_ci if (parent) 2418c2ecf20Sopenharmony_ci entry = rb_entry(parent, struct inode_defrag, rb_node); 2428c2ecf20Sopenharmony_ci else 2438c2ecf20Sopenharmony_ci entry = NULL; 2448c2ecf20Sopenharmony_ci } 2458c2ecf20Sopenharmony_ciout: 2468c2ecf20Sopenharmony_ci if (entry) 2478c2ecf20Sopenharmony_ci rb_erase(parent, &fs_info->defrag_inodes); 2488c2ecf20Sopenharmony_ci spin_unlock(&fs_info->defrag_inodes_lock); 2498c2ecf20Sopenharmony_ci return entry; 2508c2ecf20Sopenharmony_ci} 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_civoid btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info) 2538c2ecf20Sopenharmony_ci{ 2548c2ecf20Sopenharmony_ci struct inode_defrag *defrag; 2558c2ecf20Sopenharmony_ci struct rb_node *node; 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci spin_lock(&fs_info->defrag_inodes_lock); 2588c2ecf20Sopenharmony_ci node = rb_first(&fs_info->defrag_inodes); 2598c2ecf20Sopenharmony_ci while (node) { 2608c2ecf20Sopenharmony_ci rb_erase(node, &fs_info->defrag_inodes); 2618c2ecf20Sopenharmony_ci defrag = rb_entry(node, struct inode_defrag, rb_node); 2628c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci cond_resched_lock(&fs_info->defrag_inodes_lock); 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci node = rb_first(&fs_info->defrag_inodes); 2678c2ecf20Sopenharmony_ci } 2688c2ecf20Sopenharmony_ci spin_unlock(&fs_info->defrag_inodes_lock); 2698c2ecf20Sopenharmony_ci} 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci#define BTRFS_DEFRAG_BATCH 1024 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_cistatic int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, 2748c2ecf20Sopenharmony_ci struct inode_defrag *defrag) 2758c2ecf20Sopenharmony_ci{ 2768c2ecf20Sopenharmony_ci struct btrfs_root *inode_root; 2778c2ecf20Sopenharmony_ci struct inode *inode; 2788c2ecf20Sopenharmony_ci struct btrfs_ioctl_defrag_range_args range; 2798c2ecf20Sopenharmony_ci int num_defrag; 2808c2ecf20Sopenharmony_ci int ret; 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci /* get the inode */ 2838c2ecf20Sopenharmony_ci inode_root = btrfs_get_fs_root(fs_info, defrag->root, true); 2848c2ecf20Sopenharmony_ci if (IS_ERR(inode_root)) { 2858c2ecf20Sopenharmony_ci ret = PTR_ERR(inode_root); 2868c2ecf20Sopenharmony_ci goto cleanup; 2878c2ecf20Sopenharmony_ci } 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root); 2908c2ecf20Sopenharmony_ci btrfs_put_root(inode_root); 2918c2ecf20Sopenharmony_ci if (IS_ERR(inode)) { 2928c2ecf20Sopenharmony_ci ret = PTR_ERR(inode); 2938c2ecf20Sopenharmony_ci goto cleanup; 2948c2ecf20Sopenharmony_ci } 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci /* do a chunk of defrag */ 2978c2ecf20Sopenharmony_ci clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); 2988c2ecf20Sopenharmony_ci memset(&range, 0, sizeof(range)); 2998c2ecf20Sopenharmony_ci range.len = (u64)-1; 3008c2ecf20Sopenharmony_ci range.start = defrag->last_offset; 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci sb_start_write(fs_info->sb); 3038c2ecf20Sopenharmony_ci num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, 3048c2ecf20Sopenharmony_ci BTRFS_DEFRAG_BATCH); 3058c2ecf20Sopenharmony_ci sb_end_write(fs_info->sb); 3068c2ecf20Sopenharmony_ci /* 3078c2ecf20Sopenharmony_ci * if we filled the whole defrag batch, there 3088c2ecf20Sopenharmony_ci * must be more work to do. Queue this defrag 3098c2ecf20Sopenharmony_ci * again 3108c2ecf20Sopenharmony_ci */ 3118c2ecf20Sopenharmony_ci if (num_defrag == BTRFS_DEFRAG_BATCH) { 3128c2ecf20Sopenharmony_ci defrag->last_offset = range.start; 3138c2ecf20Sopenharmony_ci btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag); 3148c2ecf20Sopenharmony_ci } else if (defrag->last_offset && !defrag->cycled) { 3158c2ecf20Sopenharmony_ci /* 3168c2ecf20Sopenharmony_ci * we didn't fill our defrag batch, but 3178c2ecf20Sopenharmony_ci * we didn't start at zero. Make sure we loop 3188c2ecf20Sopenharmony_ci * around to the start of the file. 3198c2ecf20Sopenharmony_ci */ 3208c2ecf20Sopenharmony_ci defrag->last_offset = 0; 3218c2ecf20Sopenharmony_ci defrag->cycled = 1; 3228c2ecf20Sopenharmony_ci btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag); 3238c2ecf20Sopenharmony_ci } else { 3248c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 3258c2ecf20Sopenharmony_ci } 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci iput(inode); 3288c2ecf20Sopenharmony_ci return 0; 3298c2ecf20Sopenharmony_cicleanup: 3308c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 3318c2ecf20Sopenharmony_ci return ret; 3328c2ecf20Sopenharmony_ci} 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci/* 3358c2ecf20Sopenharmony_ci * run through the list of inodes in the FS that need 3368c2ecf20Sopenharmony_ci * defragging 3378c2ecf20Sopenharmony_ci */ 3388c2ecf20Sopenharmony_ciint btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) 3398c2ecf20Sopenharmony_ci{ 3408c2ecf20Sopenharmony_ci struct inode_defrag *defrag; 3418c2ecf20Sopenharmony_ci u64 first_ino = 0; 3428c2ecf20Sopenharmony_ci u64 root_objectid = 0; 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_ci atomic_inc(&fs_info->defrag_running); 3458c2ecf20Sopenharmony_ci while (1) { 3468c2ecf20Sopenharmony_ci /* Pause the auto defragger. */ 3478c2ecf20Sopenharmony_ci if (test_bit(BTRFS_FS_STATE_REMOUNTING, 3488c2ecf20Sopenharmony_ci &fs_info->fs_state)) 3498c2ecf20Sopenharmony_ci break; 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci if (!__need_auto_defrag(fs_info)) 3528c2ecf20Sopenharmony_ci break; 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci /* find an inode to defrag */ 3558c2ecf20Sopenharmony_ci defrag = btrfs_pick_defrag_inode(fs_info, root_objectid, 3568c2ecf20Sopenharmony_ci first_ino); 3578c2ecf20Sopenharmony_ci if (!defrag) { 3588c2ecf20Sopenharmony_ci if (root_objectid || first_ino) { 3598c2ecf20Sopenharmony_ci root_objectid = 0; 3608c2ecf20Sopenharmony_ci first_ino = 0; 3618c2ecf20Sopenharmony_ci continue; 3628c2ecf20Sopenharmony_ci } else { 3638c2ecf20Sopenharmony_ci break; 3648c2ecf20Sopenharmony_ci } 3658c2ecf20Sopenharmony_ci } 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci first_ino = defrag->ino + 1; 3688c2ecf20Sopenharmony_ci root_objectid = defrag->root; 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci __btrfs_run_defrag_inode(fs_info, defrag); 3718c2ecf20Sopenharmony_ci } 3728c2ecf20Sopenharmony_ci atomic_dec(&fs_info->defrag_running); 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ci /* 3758c2ecf20Sopenharmony_ci * during unmount, we use the transaction_wait queue to 3768c2ecf20Sopenharmony_ci * wait for the defragger to stop 3778c2ecf20Sopenharmony_ci */ 3788c2ecf20Sopenharmony_ci wake_up(&fs_info->transaction_wait); 3798c2ecf20Sopenharmony_ci return 0; 3808c2ecf20Sopenharmony_ci} 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_ci/* simple helper to fault in pages and copy. This should go away 3838c2ecf20Sopenharmony_ci * and be replaced with calls into generic code. 3848c2ecf20Sopenharmony_ci */ 3858c2ecf20Sopenharmony_cistatic noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes, 3868c2ecf20Sopenharmony_ci struct page **prepared_pages, 3878c2ecf20Sopenharmony_ci struct iov_iter *i) 3888c2ecf20Sopenharmony_ci{ 3898c2ecf20Sopenharmony_ci size_t copied = 0; 3908c2ecf20Sopenharmony_ci size_t total_copied = 0; 3918c2ecf20Sopenharmony_ci int pg = 0; 3928c2ecf20Sopenharmony_ci int offset = offset_in_page(pos); 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci while (write_bytes > 0) { 3958c2ecf20Sopenharmony_ci size_t count = min_t(size_t, 3968c2ecf20Sopenharmony_ci PAGE_SIZE - offset, write_bytes); 3978c2ecf20Sopenharmony_ci struct page *page = prepared_pages[pg]; 3988c2ecf20Sopenharmony_ci /* 3998c2ecf20Sopenharmony_ci * Copy data from userspace to the current page 4008c2ecf20Sopenharmony_ci */ 4018c2ecf20Sopenharmony_ci copied = iov_iter_copy_from_user_atomic(page, i, offset, count); 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci /* Flush processor's dcache for this page */ 4048c2ecf20Sopenharmony_ci flush_dcache_page(page); 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci /* 4078c2ecf20Sopenharmony_ci * if we get a partial write, we can end up with 4088c2ecf20Sopenharmony_ci * partially up to date pages. These add 4098c2ecf20Sopenharmony_ci * a lot of complexity, so make sure they don't 4108c2ecf20Sopenharmony_ci * happen by forcing this copy to be retried. 4118c2ecf20Sopenharmony_ci * 4128c2ecf20Sopenharmony_ci * The rest of the btrfs_file_write code will fall 4138c2ecf20Sopenharmony_ci * back to page at a time copies after we return 0. 4148c2ecf20Sopenharmony_ci */ 4158c2ecf20Sopenharmony_ci if (!PageUptodate(page) && copied < count) 4168c2ecf20Sopenharmony_ci copied = 0; 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci iov_iter_advance(i, copied); 4198c2ecf20Sopenharmony_ci write_bytes -= copied; 4208c2ecf20Sopenharmony_ci total_copied += copied; 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci /* Return to btrfs_file_write_iter to fault page */ 4238c2ecf20Sopenharmony_ci if (unlikely(copied == 0)) 4248c2ecf20Sopenharmony_ci break; 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_ci if (copied < PAGE_SIZE - offset) { 4278c2ecf20Sopenharmony_ci offset += copied; 4288c2ecf20Sopenharmony_ci } else { 4298c2ecf20Sopenharmony_ci pg++; 4308c2ecf20Sopenharmony_ci offset = 0; 4318c2ecf20Sopenharmony_ci } 4328c2ecf20Sopenharmony_ci } 4338c2ecf20Sopenharmony_ci return total_copied; 4348c2ecf20Sopenharmony_ci} 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci/* 4378c2ecf20Sopenharmony_ci * unlocks pages after btrfs_file_write is done with them 4388c2ecf20Sopenharmony_ci */ 4398c2ecf20Sopenharmony_cistatic void btrfs_drop_pages(struct page **pages, size_t num_pages) 4408c2ecf20Sopenharmony_ci{ 4418c2ecf20Sopenharmony_ci size_t i; 4428c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 4438c2ecf20Sopenharmony_ci /* page checked is some magic around finding pages that 4448c2ecf20Sopenharmony_ci * have been modified without going through btrfs_set_page_dirty 4458c2ecf20Sopenharmony_ci * clear it here. There should be no need to mark the pages 4468c2ecf20Sopenharmony_ci * accessed as prepare_pages should have marked them accessed 4478c2ecf20Sopenharmony_ci * in prepare_pages via find_or_create_page() 4488c2ecf20Sopenharmony_ci */ 4498c2ecf20Sopenharmony_ci ClearPageChecked(pages[i]); 4508c2ecf20Sopenharmony_ci unlock_page(pages[i]); 4518c2ecf20Sopenharmony_ci put_page(pages[i]); 4528c2ecf20Sopenharmony_ci } 4538c2ecf20Sopenharmony_ci} 4548c2ecf20Sopenharmony_ci 4558c2ecf20Sopenharmony_ci/* 4568c2ecf20Sopenharmony_ci * after copy_from_user, pages need to be dirtied and we need to make 4578c2ecf20Sopenharmony_ci * sure holes are created between the current EOF and the start of 4588c2ecf20Sopenharmony_ci * any next extents (if required). 4598c2ecf20Sopenharmony_ci * 4608c2ecf20Sopenharmony_ci * this also makes the decision about creating an inline extent vs 4618c2ecf20Sopenharmony_ci * doing real data extents, marking pages dirty and delalloc as required. 4628c2ecf20Sopenharmony_ci */ 4638c2ecf20Sopenharmony_ciint btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages, 4648c2ecf20Sopenharmony_ci size_t num_pages, loff_t pos, size_t write_bytes, 4658c2ecf20Sopenharmony_ci struct extent_state **cached) 4668c2ecf20Sopenharmony_ci{ 4678c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 4688c2ecf20Sopenharmony_ci int err = 0; 4698c2ecf20Sopenharmony_ci int i; 4708c2ecf20Sopenharmony_ci u64 num_bytes; 4718c2ecf20Sopenharmony_ci u64 start_pos; 4728c2ecf20Sopenharmony_ci u64 end_of_last_block; 4738c2ecf20Sopenharmony_ci u64 end_pos = pos + write_bytes; 4748c2ecf20Sopenharmony_ci loff_t isize = i_size_read(&inode->vfs_inode); 4758c2ecf20Sopenharmony_ci unsigned int extra_bits = 0; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci start_pos = pos & ~((u64) fs_info->sectorsize - 1); 4788c2ecf20Sopenharmony_ci num_bytes = round_up(write_bytes + pos - start_pos, 4798c2ecf20Sopenharmony_ci fs_info->sectorsize); 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci end_of_last_block = start_pos + num_bytes - 1; 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci /* 4848c2ecf20Sopenharmony_ci * The pages may have already been dirty, clear out old accounting so 4858c2ecf20Sopenharmony_ci * we can set things up properly 4868c2ecf20Sopenharmony_ci */ 4878c2ecf20Sopenharmony_ci clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block, 4888c2ecf20Sopenharmony_ci EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 4898c2ecf20Sopenharmony_ci 0, 0, cached); 4908c2ecf20Sopenharmony_ci 4918c2ecf20Sopenharmony_ci err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 4928c2ecf20Sopenharmony_ci extra_bits, cached); 4938c2ecf20Sopenharmony_ci if (err) 4948c2ecf20Sopenharmony_ci return err; 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 4978c2ecf20Sopenharmony_ci struct page *p = pages[i]; 4988c2ecf20Sopenharmony_ci SetPageUptodate(p); 4998c2ecf20Sopenharmony_ci ClearPageChecked(p); 5008c2ecf20Sopenharmony_ci set_page_dirty(p); 5018c2ecf20Sopenharmony_ci } 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci /* 5048c2ecf20Sopenharmony_ci * we've only changed i_size in ram, and we haven't updated 5058c2ecf20Sopenharmony_ci * the disk i_size. There is no need to log the inode 5068c2ecf20Sopenharmony_ci * at this time. 5078c2ecf20Sopenharmony_ci */ 5088c2ecf20Sopenharmony_ci if (end_pos > isize) 5098c2ecf20Sopenharmony_ci i_size_write(&inode->vfs_inode, end_pos); 5108c2ecf20Sopenharmony_ci return 0; 5118c2ecf20Sopenharmony_ci} 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci/* 5148c2ecf20Sopenharmony_ci * this drops all the extents in the cache that intersect the range 5158c2ecf20Sopenharmony_ci * [start, end]. Existing extents are split as required. 5168c2ecf20Sopenharmony_ci */ 5178c2ecf20Sopenharmony_civoid btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end, 5188c2ecf20Sopenharmony_ci int skip_pinned) 5198c2ecf20Sopenharmony_ci{ 5208c2ecf20Sopenharmony_ci struct extent_map *em; 5218c2ecf20Sopenharmony_ci struct extent_map *split = NULL; 5228c2ecf20Sopenharmony_ci struct extent_map *split2 = NULL; 5238c2ecf20Sopenharmony_ci struct extent_map_tree *em_tree = &inode->extent_tree; 5248c2ecf20Sopenharmony_ci u64 len = end - start + 1; 5258c2ecf20Sopenharmony_ci u64 gen; 5268c2ecf20Sopenharmony_ci int ret; 5278c2ecf20Sopenharmony_ci int testend = 1; 5288c2ecf20Sopenharmony_ci unsigned long flags; 5298c2ecf20Sopenharmony_ci int compressed = 0; 5308c2ecf20Sopenharmony_ci bool modified; 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci WARN_ON(end < start); 5338c2ecf20Sopenharmony_ci if (end == (u64)-1) { 5348c2ecf20Sopenharmony_ci len = (u64)-1; 5358c2ecf20Sopenharmony_ci testend = 0; 5368c2ecf20Sopenharmony_ci } 5378c2ecf20Sopenharmony_ci while (1) { 5388c2ecf20Sopenharmony_ci int no_splits = 0; 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci modified = false; 5418c2ecf20Sopenharmony_ci if (!split) 5428c2ecf20Sopenharmony_ci split = alloc_extent_map(); 5438c2ecf20Sopenharmony_ci if (!split2) 5448c2ecf20Sopenharmony_ci split2 = alloc_extent_map(); 5458c2ecf20Sopenharmony_ci if (!split || !split2) 5468c2ecf20Sopenharmony_ci no_splits = 1; 5478c2ecf20Sopenharmony_ci 5488c2ecf20Sopenharmony_ci write_lock(&em_tree->lock); 5498c2ecf20Sopenharmony_ci em = lookup_extent_mapping(em_tree, start, len); 5508c2ecf20Sopenharmony_ci if (!em) { 5518c2ecf20Sopenharmony_ci write_unlock(&em_tree->lock); 5528c2ecf20Sopenharmony_ci break; 5538c2ecf20Sopenharmony_ci } 5548c2ecf20Sopenharmony_ci flags = em->flags; 5558c2ecf20Sopenharmony_ci gen = em->generation; 5568c2ecf20Sopenharmony_ci if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 5578c2ecf20Sopenharmony_ci if (testend && em->start + em->len >= start + len) { 5588c2ecf20Sopenharmony_ci free_extent_map(em); 5598c2ecf20Sopenharmony_ci write_unlock(&em_tree->lock); 5608c2ecf20Sopenharmony_ci break; 5618c2ecf20Sopenharmony_ci } 5628c2ecf20Sopenharmony_ci start = em->start + em->len; 5638c2ecf20Sopenharmony_ci if (testend) 5648c2ecf20Sopenharmony_ci len = start + len - (em->start + em->len); 5658c2ecf20Sopenharmony_ci free_extent_map(em); 5668c2ecf20Sopenharmony_ci write_unlock(&em_tree->lock); 5678c2ecf20Sopenharmony_ci continue; 5688c2ecf20Sopenharmony_ci } 5698c2ecf20Sopenharmony_ci compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 5708c2ecf20Sopenharmony_ci clear_bit(EXTENT_FLAG_PINNED, &em->flags); 5718c2ecf20Sopenharmony_ci clear_bit(EXTENT_FLAG_LOGGING, &flags); 5728c2ecf20Sopenharmony_ci modified = !list_empty(&em->list); 5738c2ecf20Sopenharmony_ci if (no_splits) 5748c2ecf20Sopenharmony_ci goto next; 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_ci if (em->start < start) { 5778c2ecf20Sopenharmony_ci split->start = em->start; 5788c2ecf20Sopenharmony_ci split->len = start - em->start; 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci if (em->block_start < EXTENT_MAP_LAST_BYTE) { 5818c2ecf20Sopenharmony_ci split->orig_start = em->orig_start; 5828c2ecf20Sopenharmony_ci split->block_start = em->block_start; 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci if (compressed) 5858c2ecf20Sopenharmony_ci split->block_len = em->block_len; 5868c2ecf20Sopenharmony_ci else 5878c2ecf20Sopenharmony_ci split->block_len = split->len; 5888c2ecf20Sopenharmony_ci split->orig_block_len = max(split->block_len, 5898c2ecf20Sopenharmony_ci em->orig_block_len); 5908c2ecf20Sopenharmony_ci split->ram_bytes = em->ram_bytes; 5918c2ecf20Sopenharmony_ci } else { 5928c2ecf20Sopenharmony_ci split->orig_start = split->start; 5938c2ecf20Sopenharmony_ci split->block_len = 0; 5948c2ecf20Sopenharmony_ci split->block_start = em->block_start; 5958c2ecf20Sopenharmony_ci split->orig_block_len = 0; 5968c2ecf20Sopenharmony_ci split->ram_bytes = split->len; 5978c2ecf20Sopenharmony_ci } 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci split->generation = gen; 6008c2ecf20Sopenharmony_ci split->flags = flags; 6018c2ecf20Sopenharmony_ci split->compress_type = em->compress_type; 6028c2ecf20Sopenharmony_ci replace_extent_mapping(em_tree, em, split, modified); 6038c2ecf20Sopenharmony_ci free_extent_map(split); 6048c2ecf20Sopenharmony_ci split = split2; 6058c2ecf20Sopenharmony_ci split2 = NULL; 6068c2ecf20Sopenharmony_ci } 6078c2ecf20Sopenharmony_ci if (testend && em->start + em->len > start + len) { 6088c2ecf20Sopenharmony_ci u64 diff = start + len - em->start; 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_ci split->start = start + len; 6118c2ecf20Sopenharmony_ci split->len = em->start + em->len - (start + len); 6128c2ecf20Sopenharmony_ci split->flags = flags; 6138c2ecf20Sopenharmony_ci split->compress_type = em->compress_type; 6148c2ecf20Sopenharmony_ci split->generation = gen; 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci if (em->block_start < EXTENT_MAP_LAST_BYTE) { 6178c2ecf20Sopenharmony_ci split->orig_block_len = max(em->block_len, 6188c2ecf20Sopenharmony_ci em->orig_block_len); 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci split->ram_bytes = em->ram_bytes; 6218c2ecf20Sopenharmony_ci if (compressed) { 6228c2ecf20Sopenharmony_ci split->block_len = em->block_len; 6238c2ecf20Sopenharmony_ci split->block_start = em->block_start; 6248c2ecf20Sopenharmony_ci split->orig_start = em->orig_start; 6258c2ecf20Sopenharmony_ci } else { 6268c2ecf20Sopenharmony_ci split->block_len = split->len; 6278c2ecf20Sopenharmony_ci split->block_start = em->block_start 6288c2ecf20Sopenharmony_ci + diff; 6298c2ecf20Sopenharmony_ci split->orig_start = em->orig_start; 6308c2ecf20Sopenharmony_ci } 6318c2ecf20Sopenharmony_ci } else { 6328c2ecf20Sopenharmony_ci split->ram_bytes = split->len; 6338c2ecf20Sopenharmony_ci split->orig_start = split->start; 6348c2ecf20Sopenharmony_ci split->block_len = 0; 6358c2ecf20Sopenharmony_ci split->block_start = em->block_start; 6368c2ecf20Sopenharmony_ci split->orig_block_len = 0; 6378c2ecf20Sopenharmony_ci } 6388c2ecf20Sopenharmony_ci 6398c2ecf20Sopenharmony_ci if (extent_map_in_tree(em)) { 6408c2ecf20Sopenharmony_ci replace_extent_mapping(em_tree, em, split, 6418c2ecf20Sopenharmony_ci modified); 6428c2ecf20Sopenharmony_ci } else { 6438c2ecf20Sopenharmony_ci ret = add_extent_mapping(em_tree, split, 6448c2ecf20Sopenharmony_ci modified); 6458c2ecf20Sopenharmony_ci ASSERT(ret == 0); /* Logic error */ 6468c2ecf20Sopenharmony_ci } 6478c2ecf20Sopenharmony_ci free_extent_map(split); 6488c2ecf20Sopenharmony_ci split = NULL; 6498c2ecf20Sopenharmony_ci } 6508c2ecf20Sopenharmony_cinext: 6518c2ecf20Sopenharmony_ci if (extent_map_in_tree(em)) 6528c2ecf20Sopenharmony_ci remove_extent_mapping(em_tree, em); 6538c2ecf20Sopenharmony_ci write_unlock(&em_tree->lock); 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_ci /* once for us */ 6568c2ecf20Sopenharmony_ci free_extent_map(em); 6578c2ecf20Sopenharmony_ci /* once for the tree*/ 6588c2ecf20Sopenharmony_ci free_extent_map(em); 6598c2ecf20Sopenharmony_ci } 6608c2ecf20Sopenharmony_ci if (split) 6618c2ecf20Sopenharmony_ci free_extent_map(split); 6628c2ecf20Sopenharmony_ci if (split2) 6638c2ecf20Sopenharmony_ci free_extent_map(split2); 6648c2ecf20Sopenharmony_ci} 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_ci/* 6678c2ecf20Sopenharmony_ci * this is very complex, but the basic idea is to drop all extents 6688c2ecf20Sopenharmony_ci * in the range start - end. hint_block is filled in with a block number 6698c2ecf20Sopenharmony_ci * that would be a good hint to the block allocator for this file. 6708c2ecf20Sopenharmony_ci * 6718c2ecf20Sopenharmony_ci * If an extent intersects the range but is not entirely inside the range 6728c2ecf20Sopenharmony_ci * it is either truncated or split. Anything entirely inside the range 6738c2ecf20Sopenharmony_ci * is deleted from the tree. 6748c2ecf20Sopenharmony_ci */ 6758c2ecf20Sopenharmony_ciint __btrfs_drop_extents(struct btrfs_trans_handle *trans, 6768c2ecf20Sopenharmony_ci struct btrfs_root *root, struct btrfs_inode *inode, 6778c2ecf20Sopenharmony_ci struct btrfs_path *path, u64 start, u64 end, 6788c2ecf20Sopenharmony_ci u64 *drop_end, int drop_cache, 6798c2ecf20Sopenharmony_ci int replace_extent, 6808c2ecf20Sopenharmony_ci u32 extent_item_size, 6818c2ecf20Sopenharmony_ci int *key_inserted) 6828c2ecf20Sopenharmony_ci{ 6838c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 6848c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 6858c2ecf20Sopenharmony_ci struct btrfs_file_extent_item *fi; 6868c2ecf20Sopenharmony_ci struct btrfs_ref ref = { 0 }; 6878c2ecf20Sopenharmony_ci struct btrfs_key key; 6888c2ecf20Sopenharmony_ci struct btrfs_key new_key; 6898c2ecf20Sopenharmony_ci struct inode *vfs_inode = &inode->vfs_inode; 6908c2ecf20Sopenharmony_ci u64 ino = btrfs_ino(inode); 6918c2ecf20Sopenharmony_ci u64 search_start = start; 6928c2ecf20Sopenharmony_ci u64 disk_bytenr = 0; 6938c2ecf20Sopenharmony_ci u64 num_bytes = 0; 6948c2ecf20Sopenharmony_ci u64 extent_offset = 0; 6958c2ecf20Sopenharmony_ci u64 extent_end = 0; 6968c2ecf20Sopenharmony_ci u64 last_end = start; 6978c2ecf20Sopenharmony_ci int del_nr = 0; 6988c2ecf20Sopenharmony_ci int del_slot = 0; 6998c2ecf20Sopenharmony_ci int extent_type; 7008c2ecf20Sopenharmony_ci int recow; 7018c2ecf20Sopenharmony_ci int ret; 7028c2ecf20Sopenharmony_ci int modify_tree = -1; 7038c2ecf20Sopenharmony_ci int update_refs; 7048c2ecf20Sopenharmony_ci int found = 0; 7058c2ecf20Sopenharmony_ci int leafs_visited = 0; 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ci if (drop_cache) 7088c2ecf20Sopenharmony_ci btrfs_drop_extent_cache(inode, start, end - 1, 0); 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_ci if (start >= inode->disk_i_size && !replace_extent) 7118c2ecf20Sopenharmony_ci modify_tree = 0; 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci update_refs = (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); 7148c2ecf20Sopenharmony_ci while (1) { 7158c2ecf20Sopenharmony_ci recow = 0; 7168c2ecf20Sopenharmony_ci ret = btrfs_lookup_file_extent(trans, root, path, ino, 7178c2ecf20Sopenharmony_ci search_start, modify_tree); 7188c2ecf20Sopenharmony_ci if (ret < 0) 7198c2ecf20Sopenharmony_ci break; 7208c2ecf20Sopenharmony_ci if (ret > 0 && path->slots[0] > 0 && search_start == start) { 7218c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 7228c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); 7238c2ecf20Sopenharmony_ci if (key.objectid == ino && 7248c2ecf20Sopenharmony_ci key.type == BTRFS_EXTENT_DATA_KEY) 7258c2ecf20Sopenharmony_ci path->slots[0]--; 7268c2ecf20Sopenharmony_ci } 7278c2ecf20Sopenharmony_ci ret = 0; 7288c2ecf20Sopenharmony_ci leafs_visited++; 7298c2ecf20Sopenharmony_cinext_slot: 7308c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 7318c2ecf20Sopenharmony_ci if (path->slots[0] >= btrfs_header_nritems(leaf)) { 7328c2ecf20Sopenharmony_ci BUG_ON(del_nr > 0); 7338c2ecf20Sopenharmony_ci ret = btrfs_next_leaf(root, path); 7348c2ecf20Sopenharmony_ci if (ret < 0) 7358c2ecf20Sopenharmony_ci break; 7368c2ecf20Sopenharmony_ci if (ret > 0) { 7378c2ecf20Sopenharmony_ci ret = 0; 7388c2ecf20Sopenharmony_ci break; 7398c2ecf20Sopenharmony_ci } 7408c2ecf20Sopenharmony_ci leafs_visited++; 7418c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 7428c2ecf20Sopenharmony_ci recow = 1; 7438c2ecf20Sopenharmony_ci } 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci if (key.objectid > ino) 7488c2ecf20Sopenharmony_ci break; 7498c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(key.objectid < ino) || 7508c2ecf20Sopenharmony_ci key.type < BTRFS_EXTENT_DATA_KEY) { 7518c2ecf20Sopenharmony_ci ASSERT(del_nr == 0); 7528c2ecf20Sopenharmony_ci path->slots[0]++; 7538c2ecf20Sopenharmony_ci goto next_slot; 7548c2ecf20Sopenharmony_ci } 7558c2ecf20Sopenharmony_ci if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) 7568c2ecf20Sopenharmony_ci break; 7578c2ecf20Sopenharmony_ci 7588c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 7598c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 7608c2ecf20Sopenharmony_ci extent_type = btrfs_file_extent_type(leaf, fi); 7618c2ecf20Sopenharmony_ci 7628c2ecf20Sopenharmony_ci if (extent_type == BTRFS_FILE_EXTENT_REG || 7638c2ecf20Sopenharmony_ci extent_type == BTRFS_FILE_EXTENT_PREALLOC) { 7648c2ecf20Sopenharmony_ci disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 7658c2ecf20Sopenharmony_ci num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 7668c2ecf20Sopenharmony_ci extent_offset = btrfs_file_extent_offset(leaf, fi); 7678c2ecf20Sopenharmony_ci extent_end = key.offset + 7688c2ecf20Sopenharmony_ci btrfs_file_extent_num_bytes(leaf, fi); 7698c2ecf20Sopenharmony_ci } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 7708c2ecf20Sopenharmony_ci extent_end = key.offset + 7718c2ecf20Sopenharmony_ci btrfs_file_extent_ram_bytes(leaf, fi); 7728c2ecf20Sopenharmony_ci } else { 7738c2ecf20Sopenharmony_ci /* can't happen */ 7748c2ecf20Sopenharmony_ci BUG(); 7758c2ecf20Sopenharmony_ci } 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci /* 7788c2ecf20Sopenharmony_ci * Don't skip extent items representing 0 byte lengths. They 7798c2ecf20Sopenharmony_ci * used to be created (bug) if while punching holes we hit 7808c2ecf20Sopenharmony_ci * -ENOSPC condition. So if we find one here, just ensure we 7818c2ecf20Sopenharmony_ci * delete it, otherwise we would insert a new file extent item 7828c2ecf20Sopenharmony_ci * with the same key (offset) as that 0 bytes length file 7838c2ecf20Sopenharmony_ci * extent item in the call to setup_items_for_insert() later 7848c2ecf20Sopenharmony_ci * in this function. 7858c2ecf20Sopenharmony_ci */ 7868c2ecf20Sopenharmony_ci if (extent_end == key.offset && extent_end >= search_start) { 7878c2ecf20Sopenharmony_ci last_end = extent_end; 7888c2ecf20Sopenharmony_ci goto delete_extent_item; 7898c2ecf20Sopenharmony_ci } 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_ci if (extent_end <= search_start) { 7928c2ecf20Sopenharmony_ci path->slots[0]++; 7938c2ecf20Sopenharmony_ci goto next_slot; 7948c2ecf20Sopenharmony_ci } 7958c2ecf20Sopenharmony_ci 7968c2ecf20Sopenharmony_ci found = 1; 7978c2ecf20Sopenharmony_ci search_start = max(key.offset, start); 7988c2ecf20Sopenharmony_ci if (recow || !modify_tree) { 7998c2ecf20Sopenharmony_ci modify_tree = -1; 8008c2ecf20Sopenharmony_ci btrfs_release_path(path); 8018c2ecf20Sopenharmony_ci continue; 8028c2ecf20Sopenharmony_ci } 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_ci /* 8058c2ecf20Sopenharmony_ci * | - range to drop - | 8068c2ecf20Sopenharmony_ci * | -------- extent -------- | 8078c2ecf20Sopenharmony_ci */ 8088c2ecf20Sopenharmony_ci if (start > key.offset && end < extent_end) { 8098c2ecf20Sopenharmony_ci BUG_ON(del_nr > 0); 8108c2ecf20Sopenharmony_ci if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 8118c2ecf20Sopenharmony_ci ret = -EOPNOTSUPP; 8128c2ecf20Sopenharmony_ci break; 8138c2ecf20Sopenharmony_ci } 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci memcpy(&new_key, &key, sizeof(new_key)); 8168c2ecf20Sopenharmony_ci new_key.offset = start; 8178c2ecf20Sopenharmony_ci ret = btrfs_duplicate_item(trans, root, path, 8188c2ecf20Sopenharmony_ci &new_key); 8198c2ecf20Sopenharmony_ci if (ret == -EAGAIN) { 8208c2ecf20Sopenharmony_ci btrfs_release_path(path); 8218c2ecf20Sopenharmony_ci continue; 8228c2ecf20Sopenharmony_ci } 8238c2ecf20Sopenharmony_ci if (ret < 0) 8248c2ecf20Sopenharmony_ci break; 8258c2ecf20Sopenharmony_ci 8268c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 8278c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0] - 1, 8288c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 8298c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, 8308c2ecf20Sopenharmony_ci start - key.offset); 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 8338c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 8348c2ecf20Sopenharmony_ci 8358c2ecf20Sopenharmony_ci extent_offset += start - key.offset; 8368c2ecf20Sopenharmony_ci btrfs_set_file_extent_offset(leaf, fi, extent_offset); 8378c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, 8388c2ecf20Sopenharmony_ci extent_end - start); 8398c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci if (update_refs && disk_bytenr > 0) { 8428c2ecf20Sopenharmony_ci btrfs_init_generic_ref(&ref, 8438c2ecf20Sopenharmony_ci BTRFS_ADD_DELAYED_REF, 8448c2ecf20Sopenharmony_ci disk_bytenr, num_bytes, 0); 8458c2ecf20Sopenharmony_ci btrfs_init_data_ref(&ref, 8468c2ecf20Sopenharmony_ci root->root_key.objectid, 8478c2ecf20Sopenharmony_ci new_key.objectid, 8488c2ecf20Sopenharmony_ci start - extent_offset); 8498c2ecf20Sopenharmony_ci ret = btrfs_inc_extent_ref(trans, &ref); 8508c2ecf20Sopenharmony_ci BUG_ON(ret); /* -ENOMEM */ 8518c2ecf20Sopenharmony_ci } 8528c2ecf20Sopenharmony_ci key.offset = start; 8538c2ecf20Sopenharmony_ci } 8548c2ecf20Sopenharmony_ci /* 8558c2ecf20Sopenharmony_ci * From here on out we will have actually dropped something, so 8568c2ecf20Sopenharmony_ci * last_end can be updated. 8578c2ecf20Sopenharmony_ci */ 8588c2ecf20Sopenharmony_ci last_end = extent_end; 8598c2ecf20Sopenharmony_ci 8608c2ecf20Sopenharmony_ci /* 8618c2ecf20Sopenharmony_ci * | ---- range to drop ----- | 8628c2ecf20Sopenharmony_ci * | -------- extent -------- | 8638c2ecf20Sopenharmony_ci */ 8648c2ecf20Sopenharmony_ci if (start <= key.offset && end < extent_end) { 8658c2ecf20Sopenharmony_ci if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 8668c2ecf20Sopenharmony_ci ret = -EOPNOTSUPP; 8678c2ecf20Sopenharmony_ci break; 8688c2ecf20Sopenharmony_ci } 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_ci memcpy(&new_key, &key, sizeof(new_key)); 8718c2ecf20Sopenharmony_ci new_key.offset = end; 8728c2ecf20Sopenharmony_ci btrfs_set_item_key_safe(fs_info, path, &new_key); 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci extent_offset += end - key.offset; 8758c2ecf20Sopenharmony_ci btrfs_set_file_extent_offset(leaf, fi, extent_offset); 8768c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, 8778c2ecf20Sopenharmony_ci extent_end - end); 8788c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 8798c2ecf20Sopenharmony_ci if (update_refs && disk_bytenr > 0) 8808c2ecf20Sopenharmony_ci inode_sub_bytes(vfs_inode, end - key.offset); 8818c2ecf20Sopenharmony_ci break; 8828c2ecf20Sopenharmony_ci } 8838c2ecf20Sopenharmony_ci 8848c2ecf20Sopenharmony_ci search_start = extent_end; 8858c2ecf20Sopenharmony_ci /* 8868c2ecf20Sopenharmony_ci * | ---- range to drop ----- | 8878c2ecf20Sopenharmony_ci * | -------- extent -------- | 8888c2ecf20Sopenharmony_ci */ 8898c2ecf20Sopenharmony_ci if (start > key.offset && end >= extent_end) { 8908c2ecf20Sopenharmony_ci BUG_ON(del_nr > 0); 8918c2ecf20Sopenharmony_ci if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 8928c2ecf20Sopenharmony_ci ret = -EOPNOTSUPP; 8938c2ecf20Sopenharmony_ci break; 8948c2ecf20Sopenharmony_ci } 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, 8978c2ecf20Sopenharmony_ci start - key.offset); 8988c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 8998c2ecf20Sopenharmony_ci if (update_refs && disk_bytenr > 0) 9008c2ecf20Sopenharmony_ci inode_sub_bytes(vfs_inode, extent_end - start); 9018c2ecf20Sopenharmony_ci if (end == extent_end) 9028c2ecf20Sopenharmony_ci break; 9038c2ecf20Sopenharmony_ci 9048c2ecf20Sopenharmony_ci path->slots[0]++; 9058c2ecf20Sopenharmony_ci goto next_slot; 9068c2ecf20Sopenharmony_ci } 9078c2ecf20Sopenharmony_ci 9088c2ecf20Sopenharmony_ci /* 9098c2ecf20Sopenharmony_ci * | ---- range to drop ----- | 9108c2ecf20Sopenharmony_ci * | ------ extent ------ | 9118c2ecf20Sopenharmony_ci */ 9128c2ecf20Sopenharmony_ci if (start <= key.offset && end >= extent_end) { 9138c2ecf20Sopenharmony_cidelete_extent_item: 9148c2ecf20Sopenharmony_ci if (del_nr == 0) { 9158c2ecf20Sopenharmony_ci del_slot = path->slots[0]; 9168c2ecf20Sopenharmony_ci del_nr = 1; 9178c2ecf20Sopenharmony_ci } else { 9188c2ecf20Sopenharmony_ci BUG_ON(del_slot + del_nr != path->slots[0]); 9198c2ecf20Sopenharmony_ci del_nr++; 9208c2ecf20Sopenharmony_ci } 9218c2ecf20Sopenharmony_ci 9228c2ecf20Sopenharmony_ci if (update_refs && 9238c2ecf20Sopenharmony_ci extent_type == BTRFS_FILE_EXTENT_INLINE) { 9248c2ecf20Sopenharmony_ci inode_sub_bytes(vfs_inode, 9258c2ecf20Sopenharmony_ci extent_end - key.offset); 9268c2ecf20Sopenharmony_ci extent_end = ALIGN(extent_end, 9278c2ecf20Sopenharmony_ci fs_info->sectorsize); 9288c2ecf20Sopenharmony_ci } else if (update_refs && disk_bytenr > 0) { 9298c2ecf20Sopenharmony_ci btrfs_init_generic_ref(&ref, 9308c2ecf20Sopenharmony_ci BTRFS_DROP_DELAYED_REF, 9318c2ecf20Sopenharmony_ci disk_bytenr, num_bytes, 0); 9328c2ecf20Sopenharmony_ci btrfs_init_data_ref(&ref, 9338c2ecf20Sopenharmony_ci root->root_key.objectid, 9348c2ecf20Sopenharmony_ci key.objectid, 9358c2ecf20Sopenharmony_ci key.offset - extent_offset); 9368c2ecf20Sopenharmony_ci ret = btrfs_free_extent(trans, &ref); 9378c2ecf20Sopenharmony_ci BUG_ON(ret); /* -ENOMEM */ 9388c2ecf20Sopenharmony_ci inode_sub_bytes(vfs_inode, 9398c2ecf20Sopenharmony_ci extent_end - key.offset); 9408c2ecf20Sopenharmony_ci } 9418c2ecf20Sopenharmony_ci 9428c2ecf20Sopenharmony_ci if (end == extent_end) 9438c2ecf20Sopenharmony_ci break; 9448c2ecf20Sopenharmony_ci 9458c2ecf20Sopenharmony_ci if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) { 9468c2ecf20Sopenharmony_ci path->slots[0]++; 9478c2ecf20Sopenharmony_ci goto next_slot; 9488c2ecf20Sopenharmony_ci } 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_ci ret = btrfs_del_items(trans, root, path, del_slot, 9518c2ecf20Sopenharmony_ci del_nr); 9528c2ecf20Sopenharmony_ci if (ret) { 9538c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 9548c2ecf20Sopenharmony_ci break; 9558c2ecf20Sopenharmony_ci } 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_ci del_nr = 0; 9588c2ecf20Sopenharmony_ci del_slot = 0; 9598c2ecf20Sopenharmony_ci 9608c2ecf20Sopenharmony_ci btrfs_release_path(path); 9618c2ecf20Sopenharmony_ci continue; 9628c2ecf20Sopenharmony_ci } 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci BUG(); 9658c2ecf20Sopenharmony_ci } 9668c2ecf20Sopenharmony_ci 9678c2ecf20Sopenharmony_ci if (!ret && del_nr > 0) { 9688c2ecf20Sopenharmony_ci /* 9698c2ecf20Sopenharmony_ci * Set path->slots[0] to first slot, so that after the delete 9708c2ecf20Sopenharmony_ci * if items are move off from our leaf to its immediate left or 9718c2ecf20Sopenharmony_ci * right neighbor leafs, we end up with a correct and adjusted 9728c2ecf20Sopenharmony_ci * path->slots[0] for our insertion (if replace_extent != 0). 9738c2ecf20Sopenharmony_ci */ 9748c2ecf20Sopenharmony_ci path->slots[0] = del_slot; 9758c2ecf20Sopenharmony_ci ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 9768c2ecf20Sopenharmony_ci if (ret) 9778c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 9788c2ecf20Sopenharmony_ci } 9798c2ecf20Sopenharmony_ci 9808c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 9818c2ecf20Sopenharmony_ci /* 9828c2ecf20Sopenharmony_ci * If btrfs_del_items() was called, it might have deleted a leaf, in 9838c2ecf20Sopenharmony_ci * which case it unlocked our path, so check path->locks[0] matches a 9848c2ecf20Sopenharmony_ci * write lock. 9858c2ecf20Sopenharmony_ci */ 9868c2ecf20Sopenharmony_ci if (!ret && replace_extent && leafs_visited == 1 && 9878c2ecf20Sopenharmony_ci (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING || 9888c2ecf20Sopenharmony_ci path->locks[0] == BTRFS_WRITE_LOCK) && 9898c2ecf20Sopenharmony_ci btrfs_leaf_free_space(leaf) >= 9908c2ecf20Sopenharmony_ci sizeof(struct btrfs_item) + extent_item_size) { 9918c2ecf20Sopenharmony_ci 9928c2ecf20Sopenharmony_ci key.objectid = ino; 9938c2ecf20Sopenharmony_ci key.type = BTRFS_EXTENT_DATA_KEY; 9948c2ecf20Sopenharmony_ci key.offset = start; 9958c2ecf20Sopenharmony_ci if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) { 9968c2ecf20Sopenharmony_ci struct btrfs_key slot_key; 9978c2ecf20Sopenharmony_ci 9988c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]); 9998c2ecf20Sopenharmony_ci if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) 10008c2ecf20Sopenharmony_ci path->slots[0]++; 10018c2ecf20Sopenharmony_ci } 10028c2ecf20Sopenharmony_ci setup_items_for_insert(root, path, &key, &extent_item_size, 1); 10038c2ecf20Sopenharmony_ci *key_inserted = 1; 10048c2ecf20Sopenharmony_ci } 10058c2ecf20Sopenharmony_ci 10068c2ecf20Sopenharmony_ci if (!replace_extent || !(*key_inserted)) 10078c2ecf20Sopenharmony_ci btrfs_release_path(path); 10088c2ecf20Sopenharmony_ci if (drop_end) 10098c2ecf20Sopenharmony_ci *drop_end = found ? min(end, last_end) : end; 10108c2ecf20Sopenharmony_ci return ret; 10118c2ecf20Sopenharmony_ci} 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_ciint btrfs_drop_extents(struct btrfs_trans_handle *trans, 10148c2ecf20Sopenharmony_ci struct btrfs_root *root, struct inode *inode, u64 start, 10158c2ecf20Sopenharmony_ci u64 end, int drop_cache) 10168c2ecf20Sopenharmony_ci{ 10178c2ecf20Sopenharmony_ci struct btrfs_path *path; 10188c2ecf20Sopenharmony_ci int ret; 10198c2ecf20Sopenharmony_ci 10208c2ecf20Sopenharmony_ci path = btrfs_alloc_path(); 10218c2ecf20Sopenharmony_ci if (!path) 10228c2ecf20Sopenharmony_ci return -ENOMEM; 10238c2ecf20Sopenharmony_ci ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, start, 10248c2ecf20Sopenharmony_ci end, NULL, drop_cache, 0, 0, NULL); 10258c2ecf20Sopenharmony_ci btrfs_free_path(path); 10268c2ecf20Sopenharmony_ci return ret; 10278c2ecf20Sopenharmony_ci} 10288c2ecf20Sopenharmony_ci 10298c2ecf20Sopenharmony_cistatic int extent_mergeable(struct extent_buffer *leaf, int slot, 10308c2ecf20Sopenharmony_ci u64 objectid, u64 bytenr, u64 orig_offset, 10318c2ecf20Sopenharmony_ci u64 *start, u64 *end) 10328c2ecf20Sopenharmony_ci{ 10338c2ecf20Sopenharmony_ci struct btrfs_file_extent_item *fi; 10348c2ecf20Sopenharmony_ci struct btrfs_key key; 10358c2ecf20Sopenharmony_ci u64 extent_end; 10368c2ecf20Sopenharmony_ci 10378c2ecf20Sopenharmony_ci if (slot < 0 || slot >= btrfs_header_nritems(leaf)) 10388c2ecf20Sopenharmony_ci return 0; 10398c2ecf20Sopenharmony_ci 10408c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &key, slot); 10418c2ecf20Sopenharmony_ci if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) 10428c2ecf20Sopenharmony_ci return 0; 10438c2ecf20Sopenharmony_ci 10448c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 10458c2ecf20Sopenharmony_ci if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || 10468c2ecf20Sopenharmony_ci btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || 10478c2ecf20Sopenharmony_ci btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset || 10488c2ecf20Sopenharmony_ci btrfs_file_extent_compression(leaf, fi) || 10498c2ecf20Sopenharmony_ci btrfs_file_extent_encryption(leaf, fi) || 10508c2ecf20Sopenharmony_ci btrfs_file_extent_other_encoding(leaf, fi)) 10518c2ecf20Sopenharmony_ci return 0; 10528c2ecf20Sopenharmony_ci 10538c2ecf20Sopenharmony_ci extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 10548c2ecf20Sopenharmony_ci if ((*start && *start != key.offset) || (*end && *end != extent_end)) 10558c2ecf20Sopenharmony_ci return 0; 10568c2ecf20Sopenharmony_ci 10578c2ecf20Sopenharmony_ci *start = key.offset; 10588c2ecf20Sopenharmony_ci *end = extent_end; 10598c2ecf20Sopenharmony_ci return 1; 10608c2ecf20Sopenharmony_ci} 10618c2ecf20Sopenharmony_ci 10628c2ecf20Sopenharmony_ci/* 10638c2ecf20Sopenharmony_ci * Mark extent in the range start - end as written. 10648c2ecf20Sopenharmony_ci * 10658c2ecf20Sopenharmony_ci * This changes extent type from 'pre-allocated' to 'regular'. If only 10668c2ecf20Sopenharmony_ci * part of extent is marked as written, the extent will be split into 10678c2ecf20Sopenharmony_ci * two or three. 10688c2ecf20Sopenharmony_ci */ 10698c2ecf20Sopenharmony_ciint btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 10708c2ecf20Sopenharmony_ci struct btrfs_inode *inode, u64 start, u64 end) 10718c2ecf20Sopenharmony_ci{ 10728c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 10738c2ecf20Sopenharmony_ci struct btrfs_root *root = inode->root; 10748c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 10758c2ecf20Sopenharmony_ci struct btrfs_path *path; 10768c2ecf20Sopenharmony_ci struct btrfs_file_extent_item *fi; 10778c2ecf20Sopenharmony_ci struct btrfs_ref ref = { 0 }; 10788c2ecf20Sopenharmony_ci struct btrfs_key key; 10798c2ecf20Sopenharmony_ci struct btrfs_key new_key; 10808c2ecf20Sopenharmony_ci u64 bytenr; 10818c2ecf20Sopenharmony_ci u64 num_bytes; 10828c2ecf20Sopenharmony_ci u64 extent_end; 10838c2ecf20Sopenharmony_ci u64 orig_offset; 10848c2ecf20Sopenharmony_ci u64 other_start; 10858c2ecf20Sopenharmony_ci u64 other_end; 10868c2ecf20Sopenharmony_ci u64 split; 10878c2ecf20Sopenharmony_ci int del_nr = 0; 10888c2ecf20Sopenharmony_ci int del_slot = 0; 10898c2ecf20Sopenharmony_ci int recow; 10908c2ecf20Sopenharmony_ci int ret = 0; 10918c2ecf20Sopenharmony_ci u64 ino = btrfs_ino(inode); 10928c2ecf20Sopenharmony_ci 10938c2ecf20Sopenharmony_ci path = btrfs_alloc_path(); 10948c2ecf20Sopenharmony_ci if (!path) 10958c2ecf20Sopenharmony_ci return -ENOMEM; 10968c2ecf20Sopenharmony_ciagain: 10978c2ecf20Sopenharmony_ci recow = 0; 10988c2ecf20Sopenharmony_ci split = start; 10998c2ecf20Sopenharmony_ci key.objectid = ino; 11008c2ecf20Sopenharmony_ci key.type = BTRFS_EXTENT_DATA_KEY; 11018c2ecf20Sopenharmony_ci key.offset = split; 11028c2ecf20Sopenharmony_ci 11038c2ecf20Sopenharmony_ci ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 11048c2ecf20Sopenharmony_ci if (ret < 0) 11058c2ecf20Sopenharmony_ci goto out; 11068c2ecf20Sopenharmony_ci if (ret > 0 && path->slots[0] > 0) 11078c2ecf20Sopenharmony_ci path->slots[0]--; 11088c2ecf20Sopenharmony_ci 11098c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 11108c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 11118c2ecf20Sopenharmony_ci if (key.objectid != ino || 11128c2ecf20Sopenharmony_ci key.type != BTRFS_EXTENT_DATA_KEY) { 11138c2ecf20Sopenharmony_ci ret = -EINVAL; 11148c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 11158c2ecf20Sopenharmony_ci goto out; 11168c2ecf20Sopenharmony_ci } 11178c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 11188c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 11198c2ecf20Sopenharmony_ci if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC) { 11208c2ecf20Sopenharmony_ci ret = -EINVAL; 11218c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 11228c2ecf20Sopenharmony_ci goto out; 11238c2ecf20Sopenharmony_ci } 11248c2ecf20Sopenharmony_ci extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 11258c2ecf20Sopenharmony_ci if (key.offset > start || extent_end < end) { 11268c2ecf20Sopenharmony_ci ret = -EINVAL; 11278c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 11288c2ecf20Sopenharmony_ci goto out; 11298c2ecf20Sopenharmony_ci } 11308c2ecf20Sopenharmony_ci 11318c2ecf20Sopenharmony_ci bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 11328c2ecf20Sopenharmony_ci num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 11338c2ecf20Sopenharmony_ci orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); 11348c2ecf20Sopenharmony_ci memcpy(&new_key, &key, sizeof(new_key)); 11358c2ecf20Sopenharmony_ci 11368c2ecf20Sopenharmony_ci if (start == key.offset && end < extent_end) { 11378c2ecf20Sopenharmony_ci other_start = 0; 11388c2ecf20Sopenharmony_ci other_end = start; 11398c2ecf20Sopenharmony_ci if (extent_mergeable(leaf, path->slots[0] - 1, 11408c2ecf20Sopenharmony_ci ino, bytenr, orig_offset, 11418c2ecf20Sopenharmony_ci &other_start, &other_end)) { 11428c2ecf20Sopenharmony_ci new_key.offset = end; 11438c2ecf20Sopenharmony_ci btrfs_set_item_key_safe(fs_info, path, &new_key); 11448c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 11458c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 11468c2ecf20Sopenharmony_ci btrfs_set_file_extent_generation(leaf, fi, 11478c2ecf20Sopenharmony_ci trans->transid); 11488c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, 11498c2ecf20Sopenharmony_ci extent_end - end); 11508c2ecf20Sopenharmony_ci btrfs_set_file_extent_offset(leaf, fi, 11518c2ecf20Sopenharmony_ci end - orig_offset); 11528c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0] - 1, 11538c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 11548c2ecf20Sopenharmony_ci btrfs_set_file_extent_generation(leaf, fi, 11558c2ecf20Sopenharmony_ci trans->transid); 11568c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, 11578c2ecf20Sopenharmony_ci end - other_start); 11588c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 11598c2ecf20Sopenharmony_ci goto out; 11608c2ecf20Sopenharmony_ci } 11618c2ecf20Sopenharmony_ci } 11628c2ecf20Sopenharmony_ci 11638c2ecf20Sopenharmony_ci if (start > key.offset && end == extent_end) { 11648c2ecf20Sopenharmony_ci other_start = end; 11658c2ecf20Sopenharmony_ci other_end = 0; 11668c2ecf20Sopenharmony_ci if (extent_mergeable(leaf, path->slots[0] + 1, 11678c2ecf20Sopenharmony_ci ino, bytenr, orig_offset, 11688c2ecf20Sopenharmony_ci &other_start, &other_end)) { 11698c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 11708c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 11718c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, 11728c2ecf20Sopenharmony_ci start - key.offset); 11738c2ecf20Sopenharmony_ci btrfs_set_file_extent_generation(leaf, fi, 11748c2ecf20Sopenharmony_ci trans->transid); 11758c2ecf20Sopenharmony_ci path->slots[0]++; 11768c2ecf20Sopenharmony_ci new_key.offset = start; 11778c2ecf20Sopenharmony_ci btrfs_set_item_key_safe(fs_info, path, &new_key); 11788c2ecf20Sopenharmony_ci 11798c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 11808c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 11818c2ecf20Sopenharmony_ci btrfs_set_file_extent_generation(leaf, fi, 11828c2ecf20Sopenharmony_ci trans->transid); 11838c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, 11848c2ecf20Sopenharmony_ci other_end - start); 11858c2ecf20Sopenharmony_ci btrfs_set_file_extent_offset(leaf, fi, 11868c2ecf20Sopenharmony_ci start - orig_offset); 11878c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 11888c2ecf20Sopenharmony_ci goto out; 11898c2ecf20Sopenharmony_ci } 11908c2ecf20Sopenharmony_ci } 11918c2ecf20Sopenharmony_ci 11928c2ecf20Sopenharmony_ci while (start > key.offset || end < extent_end) { 11938c2ecf20Sopenharmony_ci if (key.offset == start) 11948c2ecf20Sopenharmony_ci split = end; 11958c2ecf20Sopenharmony_ci 11968c2ecf20Sopenharmony_ci new_key.offset = split; 11978c2ecf20Sopenharmony_ci ret = btrfs_duplicate_item(trans, root, path, &new_key); 11988c2ecf20Sopenharmony_ci if (ret == -EAGAIN) { 11998c2ecf20Sopenharmony_ci btrfs_release_path(path); 12008c2ecf20Sopenharmony_ci goto again; 12018c2ecf20Sopenharmony_ci } 12028c2ecf20Sopenharmony_ci if (ret < 0) { 12038c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 12048c2ecf20Sopenharmony_ci goto out; 12058c2ecf20Sopenharmony_ci } 12068c2ecf20Sopenharmony_ci 12078c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 12088c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0] - 1, 12098c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 12108c2ecf20Sopenharmony_ci btrfs_set_file_extent_generation(leaf, fi, trans->transid); 12118c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, 12128c2ecf20Sopenharmony_ci split - key.offset); 12138c2ecf20Sopenharmony_ci 12148c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 12158c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 12168c2ecf20Sopenharmony_ci 12178c2ecf20Sopenharmony_ci btrfs_set_file_extent_generation(leaf, fi, trans->transid); 12188c2ecf20Sopenharmony_ci btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); 12198c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, 12208c2ecf20Sopenharmony_ci extent_end - split); 12218c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_ci btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr, 12248c2ecf20Sopenharmony_ci num_bytes, 0); 12258c2ecf20Sopenharmony_ci btrfs_init_data_ref(&ref, root->root_key.objectid, ino, 12268c2ecf20Sopenharmony_ci orig_offset); 12278c2ecf20Sopenharmony_ci ret = btrfs_inc_extent_ref(trans, &ref); 12288c2ecf20Sopenharmony_ci if (ret) { 12298c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 12308c2ecf20Sopenharmony_ci goto out; 12318c2ecf20Sopenharmony_ci } 12328c2ecf20Sopenharmony_ci 12338c2ecf20Sopenharmony_ci if (split == start) { 12348c2ecf20Sopenharmony_ci key.offset = start; 12358c2ecf20Sopenharmony_ci } else { 12368c2ecf20Sopenharmony_ci if (start != key.offset) { 12378c2ecf20Sopenharmony_ci ret = -EINVAL; 12388c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 12398c2ecf20Sopenharmony_ci goto out; 12408c2ecf20Sopenharmony_ci } 12418c2ecf20Sopenharmony_ci path->slots[0]--; 12428c2ecf20Sopenharmony_ci extent_end = end; 12438c2ecf20Sopenharmony_ci } 12448c2ecf20Sopenharmony_ci recow = 1; 12458c2ecf20Sopenharmony_ci } 12468c2ecf20Sopenharmony_ci 12478c2ecf20Sopenharmony_ci other_start = end; 12488c2ecf20Sopenharmony_ci other_end = 0; 12498c2ecf20Sopenharmony_ci btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, 12508c2ecf20Sopenharmony_ci num_bytes, 0); 12518c2ecf20Sopenharmony_ci btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset); 12528c2ecf20Sopenharmony_ci if (extent_mergeable(leaf, path->slots[0] + 1, 12538c2ecf20Sopenharmony_ci ino, bytenr, orig_offset, 12548c2ecf20Sopenharmony_ci &other_start, &other_end)) { 12558c2ecf20Sopenharmony_ci if (recow) { 12568c2ecf20Sopenharmony_ci btrfs_release_path(path); 12578c2ecf20Sopenharmony_ci goto again; 12588c2ecf20Sopenharmony_ci } 12598c2ecf20Sopenharmony_ci extent_end = other_end; 12608c2ecf20Sopenharmony_ci del_slot = path->slots[0] + 1; 12618c2ecf20Sopenharmony_ci del_nr++; 12628c2ecf20Sopenharmony_ci ret = btrfs_free_extent(trans, &ref); 12638c2ecf20Sopenharmony_ci if (ret) { 12648c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 12658c2ecf20Sopenharmony_ci goto out; 12668c2ecf20Sopenharmony_ci } 12678c2ecf20Sopenharmony_ci } 12688c2ecf20Sopenharmony_ci other_start = 0; 12698c2ecf20Sopenharmony_ci other_end = start; 12708c2ecf20Sopenharmony_ci if (extent_mergeable(leaf, path->slots[0] - 1, 12718c2ecf20Sopenharmony_ci ino, bytenr, orig_offset, 12728c2ecf20Sopenharmony_ci &other_start, &other_end)) { 12738c2ecf20Sopenharmony_ci if (recow) { 12748c2ecf20Sopenharmony_ci btrfs_release_path(path); 12758c2ecf20Sopenharmony_ci goto again; 12768c2ecf20Sopenharmony_ci } 12778c2ecf20Sopenharmony_ci key.offset = other_start; 12788c2ecf20Sopenharmony_ci del_slot = path->slots[0]; 12798c2ecf20Sopenharmony_ci del_nr++; 12808c2ecf20Sopenharmony_ci ret = btrfs_free_extent(trans, &ref); 12818c2ecf20Sopenharmony_ci if (ret) { 12828c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 12838c2ecf20Sopenharmony_ci goto out; 12848c2ecf20Sopenharmony_ci } 12858c2ecf20Sopenharmony_ci } 12868c2ecf20Sopenharmony_ci if (del_nr == 0) { 12878c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 12888c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 12898c2ecf20Sopenharmony_ci btrfs_set_file_extent_type(leaf, fi, 12908c2ecf20Sopenharmony_ci BTRFS_FILE_EXTENT_REG); 12918c2ecf20Sopenharmony_ci btrfs_set_file_extent_generation(leaf, fi, trans->transid); 12928c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 12938c2ecf20Sopenharmony_ci } else { 12948c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, del_slot - 1, 12958c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 12968c2ecf20Sopenharmony_ci btrfs_set_file_extent_type(leaf, fi, 12978c2ecf20Sopenharmony_ci BTRFS_FILE_EXTENT_REG); 12988c2ecf20Sopenharmony_ci btrfs_set_file_extent_generation(leaf, fi, trans->transid); 12998c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, 13008c2ecf20Sopenharmony_ci extent_end - key.offset); 13018c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 13028c2ecf20Sopenharmony_ci 13038c2ecf20Sopenharmony_ci ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 13048c2ecf20Sopenharmony_ci if (ret < 0) { 13058c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 13068c2ecf20Sopenharmony_ci goto out; 13078c2ecf20Sopenharmony_ci } 13088c2ecf20Sopenharmony_ci } 13098c2ecf20Sopenharmony_ciout: 13108c2ecf20Sopenharmony_ci btrfs_free_path(path); 13118c2ecf20Sopenharmony_ci return ret; 13128c2ecf20Sopenharmony_ci} 13138c2ecf20Sopenharmony_ci 13148c2ecf20Sopenharmony_ci/* 13158c2ecf20Sopenharmony_ci * on error we return an unlocked page and the error value 13168c2ecf20Sopenharmony_ci * on success we return a locked page and 0 13178c2ecf20Sopenharmony_ci */ 13188c2ecf20Sopenharmony_cistatic int prepare_uptodate_page(struct inode *inode, 13198c2ecf20Sopenharmony_ci struct page *page, u64 pos, 13208c2ecf20Sopenharmony_ci bool force_uptodate) 13218c2ecf20Sopenharmony_ci{ 13228c2ecf20Sopenharmony_ci int ret = 0; 13238c2ecf20Sopenharmony_ci 13248c2ecf20Sopenharmony_ci if (((pos & (PAGE_SIZE - 1)) || force_uptodate) && 13258c2ecf20Sopenharmony_ci !PageUptodate(page)) { 13268c2ecf20Sopenharmony_ci ret = btrfs_readpage(NULL, page); 13278c2ecf20Sopenharmony_ci if (ret) 13288c2ecf20Sopenharmony_ci return ret; 13298c2ecf20Sopenharmony_ci lock_page(page); 13308c2ecf20Sopenharmony_ci if (!PageUptodate(page)) { 13318c2ecf20Sopenharmony_ci unlock_page(page); 13328c2ecf20Sopenharmony_ci return -EIO; 13338c2ecf20Sopenharmony_ci } 13348c2ecf20Sopenharmony_ci if (page->mapping != inode->i_mapping) { 13358c2ecf20Sopenharmony_ci unlock_page(page); 13368c2ecf20Sopenharmony_ci return -EAGAIN; 13378c2ecf20Sopenharmony_ci } 13388c2ecf20Sopenharmony_ci } 13398c2ecf20Sopenharmony_ci return 0; 13408c2ecf20Sopenharmony_ci} 13418c2ecf20Sopenharmony_ci 13428c2ecf20Sopenharmony_ci/* 13438c2ecf20Sopenharmony_ci * this just gets pages into the page cache and locks them down. 13448c2ecf20Sopenharmony_ci */ 13458c2ecf20Sopenharmony_cistatic noinline int prepare_pages(struct inode *inode, struct page **pages, 13468c2ecf20Sopenharmony_ci size_t num_pages, loff_t pos, 13478c2ecf20Sopenharmony_ci size_t write_bytes, bool force_uptodate) 13488c2ecf20Sopenharmony_ci{ 13498c2ecf20Sopenharmony_ci int i; 13508c2ecf20Sopenharmony_ci unsigned long index = pos >> PAGE_SHIFT; 13518c2ecf20Sopenharmony_ci gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 13528c2ecf20Sopenharmony_ci int err = 0; 13538c2ecf20Sopenharmony_ci int faili; 13548c2ecf20Sopenharmony_ci 13558c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 13568c2ecf20Sopenharmony_ciagain: 13578c2ecf20Sopenharmony_ci pages[i] = find_or_create_page(inode->i_mapping, index + i, 13588c2ecf20Sopenharmony_ci mask | __GFP_WRITE); 13598c2ecf20Sopenharmony_ci if (!pages[i]) { 13608c2ecf20Sopenharmony_ci faili = i - 1; 13618c2ecf20Sopenharmony_ci err = -ENOMEM; 13628c2ecf20Sopenharmony_ci goto fail; 13638c2ecf20Sopenharmony_ci } 13648c2ecf20Sopenharmony_ci 13658c2ecf20Sopenharmony_ci if (i == 0) 13668c2ecf20Sopenharmony_ci err = prepare_uptodate_page(inode, pages[i], pos, 13678c2ecf20Sopenharmony_ci force_uptodate); 13688c2ecf20Sopenharmony_ci if (!err && i == num_pages - 1) 13698c2ecf20Sopenharmony_ci err = prepare_uptodate_page(inode, pages[i], 13708c2ecf20Sopenharmony_ci pos + write_bytes, false); 13718c2ecf20Sopenharmony_ci if (err) { 13728c2ecf20Sopenharmony_ci put_page(pages[i]); 13738c2ecf20Sopenharmony_ci if (err == -EAGAIN) { 13748c2ecf20Sopenharmony_ci err = 0; 13758c2ecf20Sopenharmony_ci goto again; 13768c2ecf20Sopenharmony_ci } 13778c2ecf20Sopenharmony_ci faili = i - 1; 13788c2ecf20Sopenharmony_ci goto fail; 13798c2ecf20Sopenharmony_ci } 13808c2ecf20Sopenharmony_ci wait_on_page_writeback(pages[i]); 13818c2ecf20Sopenharmony_ci } 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_ci return 0; 13848c2ecf20Sopenharmony_cifail: 13858c2ecf20Sopenharmony_ci while (faili >= 0) { 13868c2ecf20Sopenharmony_ci unlock_page(pages[faili]); 13878c2ecf20Sopenharmony_ci put_page(pages[faili]); 13888c2ecf20Sopenharmony_ci faili--; 13898c2ecf20Sopenharmony_ci } 13908c2ecf20Sopenharmony_ci return err; 13918c2ecf20Sopenharmony_ci 13928c2ecf20Sopenharmony_ci} 13938c2ecf20Sopenharmony_ci 13948c2ecf20Sopenharmony_ci/* 13958c2ecf20Sopenharmony_ci * This function locks the extent and properly waits for data=ordered extents 13968c2ecf20Sopenharmony_ci * to finish before allowing the pages to be modified if need. 13978c2ecf20Sopenharmony_ci * 13988c2ecf20Sopenharmony_ci * The return value: 13998c2ecf20Sopenharmony_ci * 1 - the extent is locked 14008c2ecf20Sopenharmony_ci * 0 - the extent is not locked, and everything is OK 14018c2ecf20Sopenharmony_ci * -EAGAIN - need re-prepare the pages 14028c2ecf20Sopenharmony_ci * the other < 0 number - Something wrong happens 14038c2ecf20Sopenharmony_ci */ 14048c2ecf20Sopenharmony_cistatic noinline int 14058c2ecf20Sopenharmony_cilock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, 14068c2ecf20Sopenharmony_ci size_t num_pages, loff_t pos, 14078c2ecf20Sopenharmony_ci size_t write_bytes, 14088c2ecf20Sopenharmony_ci u64 *lockstart, u64 *lockend, 14098c2ecf20Sopenharmony_ci struct extent_state **cached_state) 14108c2ecf20Sopenharmony_ci{ 14118c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 14128c2ecf20Sopenharmony_ci u64 start_pos; 14138c2ecf20Sopenharmony_ci u64 last_pos; 14148c2ecf20Sopenharmony_ci int i; 14158c2ecf20Sopenharmony_ci int ret = 0; 14168c2ecf20Sopenharmony_ci 14178c2ecf20Sopenharmony_ci start_pos = round_down(pos, fs_info->sectorsize); 14188c2ecf20Sopenharmony_ci last_pos = round_up(pos + write_bytes, fs_info->sectorsize) - 1; 14198c2ecf20Sopenharmony_ci 14208c2ecf20Sopenharmony_ci if (start_pos < inode->vfs_inode.i_size) { 14218c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *ordered; 14228c2ecf20Sopenharmony_ci 14238c2ecf20Sopenharmony_ci lock_extent_bits(&inode->io_tree, start_pos, last_pos, 14248c2ecf20Sopenharmony_ci cached_state); 14258c2ecf20Sopenharmony_ci ordered = btrfs_lookup_ordered_range(inode, start_pos, 14268c2ecf20Sopenharmony_ci last_pos - start_pos + 1); 14278c2ecf20Sopenharmony_ci if (ordered && 14288c2ecf20Sopenharmony_ci ordered->file_offset + ordered->num_bytes > start_pos && 14298c2ecf20Sopenharmony_ci ordered->file_offset <= last_pos) { 14308c2ecf20Sopenharmony_ci unlock_extent_cached(&inode->io_tree, start_pos, 14318c2ecf20Sopenharmony_ci last_pos, cached_state); 14328c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 14338c2ecf20Sopenharmony_ci unlock_page(pages[i]); 14348c2ecf20Sopenharmony_ci put_page(pages[i]); 14358c2ecf20Sopenharmony_ci } 14368c2ecf20Sopenharmony_ci btrfs_start_ordered_extent(ordered, 1); 14378c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 14388c2ecf20Sopenharmony_ci return -EAGAIN; 14398c2ecf20Sopenharmony_ci } 14408c2ecf20Sopenharmony_ci if (ordered) 14418c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 14428c2ecf20Sopenharmony_ci 14438c2ecf20Sopenharmony_ci *lockstart = start_pos; 14448c2ecf20Sopenharmony_ci *lockend = last_pos; 14458c2ecf20Sopenharmony_ci ret = 1; 14468c2ecf20Sopenharmony_ci } 14478c2ecf20Sopenharmony_ci 14488c2ecf20Sopenharmony_ci /* 14498c2ecf20Sopenharmony_ci * It's possible the pages are dirty right now, but we don't want 14508c2ecf20Sopenharmony_ci * to clean them yet because copy_from_user may catch a page fault 14518c2ecf20Sopenharmony_ci * and we might have to fall back to one page at a time. If that 14528c2ecf20Sopenharmony_ci * happens, we'll unlock these pages and we'd have a window where 14538c2ecf20Sopenharmony_ci * reclaim could sneak in and drop the once-dirty page on the floor 14548c2ecf20Sopenharmony_ci * without writing it. 14558c2ecf20Sopenharmony_ci * 14568c2ecf20Sopenharmony_ci * We have the pages locked and the extent range locked, so there's 14578c2ecf20Sopenharmony_ci * no way someone can start IO on any dirty pages in this range. 14588c2ecf20Sopenharmony_ci * 14598c2ecf20Sopenharmony_ci * We'll call btrfs_dirty_pages() later on, and that will flip around 14608c2ecf20Sopenharmony_ci * delalloc bits and dirty the pages as required. 14618c2ecf20Sopenharmony_ci */ 14628c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 14638c2ecf20Sopenharmony_ci set_page_extent_mapped(pages[i]); 14648c2ecf20Sopenharmony_ci WARN_ON(!PageLocked(pages[i])); 14658c2ecf20Sopenharmony_ci } 14668c2ecf20Sopenharmony_ci 14678c2ecf20Sopenharmony_ci return ret; 14688c2ecf20Sopenharmony_ci} 14698c2ecf20Sopenharmony_ci 14708c2ecf20Sopenharmony_cistatic int check_can_nocow(struct btrfs_inode *inode, loff_t pos, 14718c2ecf20Sopenharmony_ci size_t *write_bytes, bool nowait) 14728c2ecf20Sopenharmony_ci{ 14738c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 14748c2ecf20Sopenharmony_ci struct btrfs_root *root = inode->root; 14758c2ecf20Sopenharmony_ci u64 lockstart, lockend; 14768c2ecf20Sopenharmony_ci u64 num_bytes; 14778c2ecf20Sopenharmony_ci int ret; 14788c2ecf20Sopenharmony_ci 14798c2ecf20Sopenharmony_ci if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) 14808c2ecf20Sopenharmony_ci return 0; 14818c2ecf20Sopenharmony_ci 14828c2ecf20Sopenharmony_ci if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock)) 14838c2ecf20Sopenharmony_ci return -EAGAIN; 14848c2ecf20Sopenharmony_ci 14858c2ecf20Sopenharmony_ci lockstart = round_down(pos, fs_info->sectorsize); 14868c2ecf20Sopenharmony_ci lockend = round_up(pos + *write_bytes, 14878c2ecf20Sopenharmony_ci fs_info->sectorsize) - 1; 14888c2ecf20Sopenharmony_ci num_bytes = lockend - lockstart + 1; 14898c2ecf20Sopenharmony_ci 14908c2ecf20Sopenharmony_ci if (nowait) { 14918c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *ordered; 14928c2ecf20Sopenharmony_ci 14938c2ecf20Sopenharmony_ci if (!try_lock_extent(&inode->io_tree, lockstart, lockend)) 14948c2ecf20Sopenharmony_ci return -EAGAIN; 14958c2ecf20Sopenharmony_ci 14968c2ecf20Sopenharmony_ci ordered = btrfs_lookup_ordered_range(inode, lockstart, 14978c2ecf20Sopenharmony_ci num_bytes); 14988c2ecf20Sopenharmony_ci if (ordered) { 14998c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 15008c2ecf20Sopenharmony_ci ret = -EAGAIN; 15018c2ecf20Sopenharmony_ci goto out_unlock; 15028c2ecf20Sopenharmony_ci } 15038c2ecf20Sopenharmony_ci } else { 15048c2ecf20Sopenharmony_ci btrfs_lock_and_flush_ordered_range(inode, lockstart, 15058c2ecf20Sopenharmony_ci lockend, NULL); 15068c2ecf20Sopenharmony_ci } 15078c2ecf20Sopenharmony_ci 15088c2ecf20Sopenharmony_ci ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, 15098c2ecf20Sopenharmony_ci NULL, NULL, NULL, false); 15108c2ecf20Sopenharmony_ci if (ret <= 0) { 15118c2ecf20Sopenharmony_ci ret = 0; 15128c2ecf20Sopenharmony_ci if (!nowait) 15138c2ecf20Sopenharmony_ci btrfs_drew_write_unlock(&root->snapshot_lock); 15148c2ecf20Sopenharmony_ci } else { 15158c2ecf20Sopenharmony_ci *write_bytes = min_t(size_t, *write_bytes , 15168c2ecf20Sopenharmony_ci num_bytes - pos + lockstart); 15178c2ecf20Sopenharmony_ci } 15188c2ecf20Sopenharmony_ciout_unlock: 15198c2ecf20Sopenharmony_ci unlock_extent(&inode->io_tree, lockstart, lockend); 15208c2ecf20Sopenharmony_ci 15218c2ecf20Sopenharmony_ci return ret; 15228c2ecf20Sopenharmony_ci} 15238c2ecf20Sopenharmony_ci 15248c2ecf20Sopenharmony_cistatic int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos, 15258c2ecf20Sopenharmony_ci size_t *write_bytes) 15268c2ecf20Sopenharmony_ci{ 15278c2ecf20Sopenharmony_ci return check_can_nocow(inode, pos, write_bytes, true); 15288c2ecf20Sopenharmony_ci} 15298c2ecf20Sopenharmony_ci 15308c2ecf20Sopenharmony_ci/* 15318c2ecf20Sopenharmony_ci * Check if we can do nocow write into the range [@pos, @pos + @write_bytes) 15328c2ecf20Sopenharmony_ci * 15338c2ecf20Sopenharmony_ci * @pos: File offset 15348c2ecf20Sopenharmony_ci * @write_bytes: The length to write, will be updated to the nocow writeable 15358c2ecf20Sopenharmony_ci * range 15368c2ecf20Sopenharmony_ci * 15378c2ecf20Sopenharmony_ci * This function will flush ordered extents in the range to ensure proper 15388c2ecf20Sopenharmony_ci * nocow checks. 15398c2ecf20Sopenharmony_ci * 15408c2ecf20Sopenharmony_ci * Return: 15418c2ecf20Sopenharmony_ci * >0 and update @write_bytes if we can do nocow write 15428c2ecf20Sopenharmony_ci * 0 if we can't do nocow write 15438c2ecf20Sopenharmony_ci * -EAGAIN if we can't get the needed lock or there are ordered extents 15448c2ecf20Sopenharmony_ci * for * (nowait == true) case 15458c2ecf20Sopenharmony_ci * <0 if other error happened 15468c2ecf20Sopenharmony_ci * 15478c2ecf20Sopenharmony_ci * NOTE: Callers need to release the lock by btrfs_check_nocow_unlock(). 15488c2ecf20Sopenharmony_ci */ 15498c2ecf20Sopenharmony_ciint btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, 15508c2ecf20Sopenharmony_ci size_t *write_bytes) 15518c2ecf20Sopenharmony_ci{ 15528c2ecf20Sopenharmony_ci return check_can_nocow(inode, pos, write_bytes, false); 15538c2ecf20Sopenharmony_ci} 15548c2ecf20Sopenharmony_ci 15558c2ecf20Sopenharmony_civoid btrfs_check_nocow_unlock(struct btrfs_inode *inode) 15568c2ecf20Sopenharmony_ci{ 15578c2ecf20Sopenharmony_ci btrfs_drew_write_unlock(&inode->root->snapshot_lock); 15588c2ecf20Sopenharmony_ci} 15598c2ecf20Sopenharmony_ci 15608c2ecf20Sopenharmony_cistatic noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, 15618c2ecf20Sopenharmony_ci struct iov_iter *i) 15628c2ecf20Sopenharmony_ci{ 15638c2ecf20Sopenharmony_ci struct file *file = iocb->ki_filp; 15648c2ecf20Sopenharmony_ci loff_t pos = iocb->ki_pos; 15658c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 15668c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 15678c2ecf20Sopenharmony_ci struct page **pages = NULL; 15688c2ecf20Sopenharmony_ci struct extent_changeset *data_reserved = NULL; 15698c2ecf20Sopenharmony_ci u64 release_bytes = 0; 15708c2ecf20Sopenharmony_ci u64 lockstart; 15718c2ecf20Sopenharmony_ci u64 lockend; 15728c2ecf20Sopenharmony_ci size_t num_written = 0; 15738c2ecf20Sopenharmony_ci int nrptrs; 15748c2ecf20Sopenharmony_ci int ret = 0; 15758c2ecf20Sopenharmony_ci bool only_release_metadata = false; 15768c2ecf20Sopenharmony_ci bool force_page_uptodate = false; 15778c2ecf20Sopenharmony_ci 15788c2ecf20Sopenharmony_ci nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE), 15798c2ecf20Sopenharmony_ci PAGE_SIZE / (sizeof(struct page *))); 15808c2ecf20Sopenharmony_ci nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); 15818c2ecf20Sopenharmony_ci nrptrs = max(nrptrs, 8); 15828c2ecf20Sopenharmony_ci pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL); 15838c2ecf20Sopenharmony_ci if (!pages) 15848c2ecf20Sopenharmony_ci return -ENOMEM; 15858c2ecf20Sopenharmony_ci 15868c2ecf20Sopenharmony_ci while (iov_iter_count(i) > 0) { 15878c2ecf20Sopenharmony_ci struct extent_state *cached_state = NULL; 15888c2ecf20Sopenharmony_ci size_t offset = offset_in_page(pos); 15898c2ecf20Sopenharmony_ci size_t sector_offset; 15908c2ecf20Sopenharmony_ci size_t write_bytes = min(iov_iter_count(i), 15918c2ecf20Sopenharmony_ci nrptrs * (size_t)PAGE_SIZE - 15928c2ecf20Sopenharmony_ci offset); 15938c2ecf20Sopenharmony_ci size_t num_pages = DIV_ROUND_UP(write_bytes + offset, 15948c2ecf20Sopenharmony_ci PAGE_SIZE); 15958c2ecf20Sopenharmony_ci size_t reserve_bytes; 15968c2ecf20Sopenharmony_ci size_t dirty_pages; 15978c2ecf20Sopenharmony_ci size_t copied; 15988c2ecf20Sopenharmony_ci size_t dirty_sectors; 15998c2ecf20Sopenharmony_ci size_t num_sectors; 16008c2ecf20Sopenharmony_ci int extents_locked; 16018c2ecf20Sopenharmony_ci 16028c2ecf20Sopenharmony_ci WARN_ON(num_pages > nrptrs); 16038c2ecf20Sopenharmony_ci 16048c2ecf20Sopenharmony_ci /* 16058c2ecf20Sopenharmony_ci * Fault pages before locking them in prepare_pages 16068c2ecf20Sopenharmony_ci * to avoid recursive lock 16078c2ecf20Sopenharmony_ci */ 16088c2ecf20Sopenharmony_ci if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) { 16098c2ecf20Sopenharmony_ci ret = -EFAULT; 16108c2ecf20Sopenharmony_ci break; 16118c2ecf20Sopenharmony_ci } 16128c2ecf20Sopenharmony_ci 16138c2ecf20Sopenharmony_ci only_release_metadata = false; 16148c2ecf20Sopenharmony_ci sector_offset = pos & (fs_info->sectorsize - 1); 16158c2ecf20Sopenharmony_ci reserve_bytes = round_up(write_bytes + sector_offset, 16168c2ecf20Sopenharmony_ci fs_info->sectorsize); 16178c2ecf20Sopenharmony_ci 16188c2ecf20Sopenharmony_ci extent_changeset_release(data_reserved); 16198c2ecf20Sopenharmony_ci ret = btrfs_check_data_free_space(BTRFS_I(inode), 16208c2ecf20Sopenharmony_ci &data_reserved, pos, 16218c2ecf20Sopenharmony_ci write_bytes); 16228c2ecf20Sopenharmony_ci if (ret < 0) { 16238c2ecf20Sopenharmony_ci if (btrfs_check_nocow_lock(BTRFS_I(inode), pos, 16248c2ecf20Sopenharmony_ci &write_bytes) > 0) { 16258c2ecf20Sopenharmony_ci /* 16268c2ecf20Sopenharmony_ci * For nodata cow case, no need to reserve 16278c2ecf20Sopenharmony_ci * data space. 16288c2ecf20Sopenharmony_ci */ 16298c2ecf20Sopenharmony_ci only_release_metadata = true; 16308c2ecf20Sopenharmony_ci /* 16318c2ecf20Sopenharmony_ci * our prealloc extent may be smaller than 16328c2ecf20Sopenharmony_ci * write_bytes, so scale down. 16338c2ecf20Sopenharmony_ci */ 16348c2ecf20Sopenharmony_ci num_pages = DIV_ROUND_UP(write_bytes + offset, 16358c2ecf20Sopenharmony_ci PAGE_SIZE); 16368c2ecf20Sopenharmony_ci reserve_bytes = round_up(write_bytes + 16378c2ecf20Sopenharmony_ci sector_offset, 16388c2ecf20Sopenharmony_ci fs_info->sectorsize); 16398c2ecf20Sopenharmony_ci } else { 16408c2ecf20Sopenharmony_ci break; 16418c2ecf20Sopenharmony_ci } 16428c2ecf20Sopenharmony_ci } 16438c2ecf20Sopenharmony_ci 16448c2ecf20Sopenharmony_ci WARN_ON(reserve_bytes == 0); 16458c2ecf20Sopenharmony_ci ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), 16468c2ecf20Sopenharmony_ci reserve_bytes); 16478c2ecf20Sopenharmony_ci if (ret) { 16488c2ecf20Sopenharmony_ci if (!only_release_metadata) 16498c2ecf20Sopenharmony_ci btrfs_free_reserved_data_space(BTRFS_I(inode), 16508c2ecf20Sopenharmony_ci data_reserved, pos, 16518c2ecf20Sopenharmony_ci write_bytes); 16528c2ecf20Sopenharmony_ci else 16538c2ecf20Sopenharmony_ci btrfs_check_nocow_unlock(BTRFS_I(inode)); 16548c2ecf20Sopenharmony_ci break; 16558c2ecf20Sopenharmony_ci } 16568c2ecf20Sopenharmony_ci 16578c2ecf20Sopenharmony_ci release_bytes = reserve_bytes; 16588c2ecf20Sopenharmony_ciagain: 16598c2ecf20Sopenharmony_ci /* 16608c2ecf20Sopenharmony_ci * This is going to setup the pages array with the number of 16618c2ecf20Sopenharmony_ci * pages we want, so we don't really need to worry about the 16628c2ecf20Sopenharmony_ci * contents of pages from loop to loop 16638c2ecf20Sopenharmony_ci */ 16648c2ecf20Sopenharmony_ci ret = prepare_pages(inode, pages, num_pages, 16658c2ecf20Sopenharmony_ci pos, write_bytes, 16668c2ecf20Sopenharmony_ci force_page_uptodate); 16678c2ecf20Sopenharmony_ci if (ret) { 16688c2ecf20Sopenharmony_ci btrfs_delalloc_release_extents(BTRFS_I(inode), 16698c2ecf20Sopenharmony_ci reserve_bytes); 16708c2ecf20Sopenharmony_ci break; 16718c2ecf20Sopenharmony_ci } 16728c2ecf20Sopenharmony_ci 16738c2ecf20Sopenharmony_ci extents_locked = lock_and_cleanup_extent_if_need( 16748c2ecf20Sopenharmony_ci BTRFS_I(inode), pages, 16758c2ecf20Sopenharmony_ci num_pages, pos, write_bytes, &lockstart, 16768c2ecf20Sopenharmony_ci &lockend, &cached_state); 16778c2ecf20Sopenharmony_ci if (extents_locked < 0) { 16788c2ecf20Sopenharmony_ci if (extents_locked == -EAGAIN) 16798c2ecf20Sopenharmony_ci goto again; 16808c2ecf20Sopenharmony_ci btrfs_delalloc_release_extents(BTRFS_I(inode), 16818c2ecf20Sopenharmony_ci reserve_bytes); 16828c2ecf20Sopenharmony_ci ret = extents_locked; 16838c2ecf20Sopenharmony_ci break; 16848c2ecf20Sopenharmony_ci } 16858c2ecf20Sopenharmony_ci 16868c2ecf20Sopenharmony_ci copied = btrfs_copy_from_user(pos, write_bytes, pages, i); 16878c2ecf20Sopenharmony_ci 16888c2ecf20Sopenharmony_ci num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes); 16898c2ecf20Sopenharmony_ci dirty_sectors = round_up(copied + sector_offset, 16908c2ecf20Sopenharmony_ci fs_info->sectorsize); 16918c2ecf20Sopenharmony_ci dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors); 16928c2ecf20Sopenharmony_ci 16938c2ecf20Sopenharmony_ci /* 16948c2ecf20Sopenharmony_ci * if we have trouble faulting in the pages, fall 16958c2ecf20Sopenharmony_ci * back to one page at a time 16968c2ecf20Sopenharmony_ci */ 16978c2ecf20Sopenharmony_ci if (copied < write_bytes) 16988c2ecf20Sopenharmony_ci nrptrs = 1; 16998c2ecf20Sopenharmony_ci 17008c2ecf20Sopenharmony_ci if (copied == 0) { 17018c2ecf20Sopenharmony_ci force_page_uptodate = true; 17028c2ecf20Sopenharmony_ci dirty_sectors = 0; 17038c2ecf20Sopenharmony_ci dirty_pages = 0; 17048c2ecf20Sopenharmony_ci } else { 17058c2ecf20Sopenharmony_ci force_page_uptodate = false; 17068c2ecf20Sopenharmony_ci dirty_pages = DIV_ROUND_UP(copied + offset, 17078c2ecf20Sopenharmony_ci PAGE_SIZE); 17088c2ecf20Sopenharmony_ci } 17098c2ecf20Sopenharmony_ci 17108c2ecf20Sopenharmony_ci if (num_sectors > dirty_sectors) { 17118c2ecf20Sopenharmony_ci /* release everything except the sectors we dirtied */ 17128c2ecf20Sopenharmony_ci release_bytes -= dirty_sectors << 17138c2ecf20Sopenharmony_ci fs_info->sb->s_blocksize_bits; 17148c2ecf20Sopenharmony_ci if (only_release_metadata) { 17158c2ecf20Sopenharmony_ci btrfs_delalloc_release_metadata(BTRFS_I(inode), 17168c2ecf20Sopenharmony_ci release_bytes, true); 17178c2ecf20Sopenharmony_ci } else { 17188c2ecf20Sopenharmony_ci u64 __pos; 17198c2ecf20Sopenharmony_ci 17208c2ecf20Sopenharmony_ci __pos = round_down(pos, 17218c2ecf20Sopenharmony_ci fs_info->sectorsize) + 17228c2ecf20Sopenharmony_ci (dirty_pages << PAGE_SHIFT); 17238c2ecf20Sopenharmony_ci btrfs_delalloc_release_space(BTRFS_I(inode), 17248c2ecf20Sopenharmony_ci data_reserved, __pos, 17258c2ecf20Sopenharmony_ci release_bytes, true); 17268c2ecf20Sopenharmony_ci } 17278c2ecf20Sopenharmony_ci } 17288c2ecf20Sopenharmony_ci 17298c2ecf20Sopenharmony_ci release_bytes = round_up(copied + sector_offset, 17308c2ecf20Sopenharmony_ci fs_info->sectorsize); 17318c2ecf20Sopenharmony_ci 17328c2ecf20Sopenharmony_ci if (copied > 0) 17338c2ecf20Sopenharmony_ci ret = btrfs_dirty_pages(BTRFS_I(inode), pages, 17348c2ecf20Sopenharmony_ci dirty_pages, pos, copied, 17358c2ecf20Sopenharmony_ci &cached_state); 17368c2ecf20Sopenharmony_ci 17378c2ecf20Sopenharmony_ci /* 17388c2ecf20Sopenharmony_ci * If we have not locked the extent range, because the range's 17398c2ecf20Sopenharmony_ci * start offset is >= i_size, we might still have a non-NULL 17408c2ecf20Sopenharmony_ci * cached extent state, acquired while marking the extent range 17418c2ecf20Sopenharmony_ci * as delalloc through btrfs_dirty_pages(). Therefore free any 17428c2ecf20Sopenharmony_ci * possible cached extent state to avoid a memory leak. 17438c2ecf20Sopenharmony_ci */ 17448c2ecf20Sopenharmony_ci if (extents_locked) 17458c2ecf20Sopenharmony_ci unlock_extent_cached(&BTRFS_I(inode)->io_tree, 17468c2ecf20Sopenharmony_ci lockstart, lockend, &cached_state); 17478c2ecf20Sopenharmony_ci else 17488c2ecf20Sopenharmony_ci free_extent_state(cached_state); 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_ci btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); 17518c2ecf20Sopenharmony_ci if (ret) { 17528c2ecf20Sopenharmony_ci btrfs_drop_pages(pages, num_pages); 17538c2ecf20Sopenharmony_ci break; 17548c2ecf20Sopenharmony_ci } 17558c2ecf20Sopenharmony_ci 17568c2ecf20Sopenharmony_ci release_bytes = 0; 17578c2ecf20Sopenharmony_ci if (only_release_metadata) 17588c2ecf20Sopenharmony_ci btrfs_check_nocow_unlock(BTRFS_I(inode)); 17598c2ecf20Sopenharmony_ci 17608c2ecf20Sopenharmony_ci if (only_release_metadata && copied > 0) { 17618c2ecf20Sopenharmony_ci lockstart = round_down(pos, 17628c2ecf20Sopenharmony_ci fs_info->sectorsize); 17638c2ecf20Sopenharmony_ci lockend = round_up(pos + copied, 17648c2ecf20Sopenharmony_ci fs_info->sectorsize) - 1; 17658c2ecf20Sopenharmony_ci 17668c2ecf20Sopenharmony_ci set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 17678c2ecf20Sopenharmony_ci lockend, EXTENT_NORESERVE, NULL, 17688c2ecf20Sopenharmony_ci NULL, GFP_NOFS); 17698c2ecf20Sopenharmony_ci } 17708c2ecf20Sopenharmony_ci 17718c2ecf20Sopenharmony_ci btrfs_drop_pages(pages, num_pages); 17728c2ecf20Sopenharmony_ci 17738c2ecf20Sopenharmony_ci cond_resched(); 17748c2ecf20Sopenharmony_ci 17758c2ecf20Sopenharmony_ci balance_dirty_pages_ratelimited(inode->i_mapping); 17768c2ecf20Sopenharmony_ci 17778c2ecf20Sopenharmony_ci pos += copied; 17788c2ecf20Sopenharmony_ci num_written += copied; 17798c2ecf20Sopenharmony_ci } 17808c2ecf20Sopenharmony_ci 17818c2ecf20Sopenharmony_ci kfree(pages); 17828c2ecf20Sopenharmony_ci 17838c2ecf20Sopenharmony_ci if (release_bytes) { 17848c2ecf20Sopenharmony_ci if (only_release_metadata) { 17858c2ecf20Sopenharmony_ci btrfs_check_nocow_unlock(BTRFS_I(inode)); 17868c2ecf20Sopenharmony_ci btrfs_delalloc_release_metadata(BTRFS_I(inode), 17878c2ecf20Sopenharmony_ci release_bytes, true); 17888c2ecf20Sopenharmony_ci } else { 17898c2ecf20Sopenharmony_ci btrfs_delalloc_release_space(BTRFS_I(inode), 17908c2ecf20Sopenharmony_ci data_reserved, 17918c2ecf20Sopenharmony_ci round_down(pos, fs_info->sectorsize), 17928c2ecf20Sopenharmony_ci release_bytes, true); 17938c2ecf20Sopenharmony_ci } 17948c2ecf20Sopenharmony_ci } 17958c2ecf20Sopenharmony_ci 17968c2ecf20Sopenharmony_ci extent_changeset_free(data_reserved); 17978c2ecf20Sopenharmony_ci return num_written ? num_written : ret; 17988c2ecf20Sopenharmony_ci} 17998c2ecf20Sopenharmony_ci 18008c2ecf20Sopenharmony_cistatic ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) 18018c2ecf20Sopenharmony_ci{ 18028c2ecf20Sopenharmony_ci struct file *file = iocb->ki_filp; 18038c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 18048c2ecf20Sopenharmony_ci loff_t pos; 18058c2ecf20Sopenharmony_ci ssize_t written; 18068c2ecf20Sopenharmony_ci ssize_t written_buffered; 18078c2ecf20Sopenharmony_ci loff_t endbyte; 18088c2ecf20Sopenharmony_ci int err; 18098c2ecf20Sopenharmony_ci 18108c2ecf20Sopenharmony_ci written = btrfs_direct_IO(iocb, from); 18118c2ecf20Sopenharmony_ci 18128c2ecf20Sopenharmony_ci if (written < 0 || !iov_iter_count(from)) 18138c2ecf20Sopenharmony_ci return written; 18148c2ecf20Sopenharmony_ci 18158c2ecf20Sopenharmony_ci pos = iocb->ki_pos; 18168c2ecf20Sopenharmony_ci written_buffered = btrfs_buffered_write(iocb, from); 18178c2ecf20Sopenharmony_ci if (written_buffered < 0) { 18188c2ecf20Sopenharmony_ci err = written_buffered; 18198c2ecf20Sopenharmony_ci goto out; 18208c2ecf20Sopenharmony_ci } 18218c2ecf20Sopenharmony_ci /* 18228c2ecf20Sopenharmony_ci * Ensure all data is persisted. We want the next direct IO read to be 18238c2ecf20Sopenharmony_ci * able to read what was just written. 18248c2ecf20Sopenharmony_ci */ 18258c2ecf20Sopenharmony_ci endbyte = pos + written_buffered - 1; 18268c2ecf20Sopenharmony_ci err = btrfs_fdatawrite_range(inode, pos, endbyte); 18278c2ecf20Sopenharmony_ci if (err) 18288c2ecf20Sopenharmony_ci goto out; 18298c2ecf20Sopenharmony_ci err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte); 18308c2ecf20Sopenharmony_ci if (err) 18318c2ecf20Sopenharmony_ci goto out; 18328c2ecf20Sopenharmony_ci written += written_buffered; 18338c2ecf20Sopenharmony_ci iocb->ki_pos = pos + written_buffered; 18348c2ecf20Sopenharmony_ci invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT, 18358c2ecf20Sopenharmony_ci endbyte >> PAGE_SHIFT); 18368c2ecf20Sopenharmony_ciout: 18378c2ecf20Sopenharmony_ci return written ? written : err; 18388c2ecf20Sopenharmony_ci} 18398c2ecf20Sopenharmony_ci 18408c2ecf20Sopenharmony_cistatic void update_time_for_write(struct inode *inode) 18418c2ecf20Sopenharmony_ci{ 18428c2ecf20Sopenharmony_ci struct timespec64 now; 18438c2ecf20Sopenharmony_ci 18448c2ecf20Sopenharmony_ci if (IS_NOCMTIME(inode)) 18458c2ecf20Sopenharmony_ci return; 18468c2ecf20Sopenharmony_ci 18478c2ecf20Sopenharmony_ci now = current_time(inode); 18488c2ecf20Sopenharmony_ci if (!timespec64_equal(&inode->i_mtime, &now)) 18498c2ecf20Sopenharmony_ci inode->i_mtime = now; 18508c2ecf20Sopenharmony_ci 18518c2ecf20Sopenharmony_ci if (!timespec64_equal(&inode->i_ctime, &now)) 18528c2ecf20Sopenharmony_ci inode->i_ctime = now; 18538c2ecf20Sopenharmony_ci 18548c2ecf20Sopenharmony_ci if (IS_I_VERSION(inode)) 18558c2ecf20Sopenharmony_ci inode_inc_iversion(inode); 18568c2ecf20Sopenharmony_ci} 18578c2ecf20Sopenharmony_ci 18588c2ecf20Sopenharmony_cistatic ssize_t btrfs_file_write_iter(struct kiocb *iocb, 18598c2ecf20Sopenharmony_ci struct iov_iter *from) 18608c2ecf20Sopenharmony_ci{ 18618c2ecf20Sopenharmony_ci struct file *file = iocb->ki_filp; 18628c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 18638c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 18648c2ecf20Sopenharmony_ci u64 start_pos; 18658c2ecf20Sopenharmony_ci u64 end_pos; 18668c2ecf20Sopenharmony_ci ssize_t num_written = 0; 18678c2ecf20Sopenharmony_ci const bool sync = iocb->ki_flags & IOCB_DSYNC; 18688c2ecf20Sopenharmony_ci ssize_t err; 18698c2ecf20Sopenharmony_ci loff_t pos; 18708c2ecf20Sopenharmony_ci size_t count; 18718c2ecf20Sopenharmony_ci loff_t oldsize; 18728c2ecf20Sopenharmony_ci int clean_page = 0; 18738c2ecf20Sopenharmony_ci 18748c2ecf20Sopenharmony_ci if (!(iocb->ki_flags & IOCB_DIRECT) && 18758c2ecf20Sopenharmony_ci (iocb->ki_flags & IOCB_NOWAIT)) 18768c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 18778c2ecf20Sopenharmony_ci 18788c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) { 18798c2ecf20Sopenharmony_ci if (!inode_trylock(inode)) 18808c2ecf20Sopenharmony_ci return -EAGAIN; 18818c2ecf20Sopenharmony_ci } else { 18828c2ecf20Sopenharmony_ci inode_lock(inode); 18838c2ecf20Sopenharmony_ci } 18848c2ecf20Sopenharmony_ci 18858c2ecf20Sopenharmony_ci err = generic_write_checks(iocb, from); 18868c2ecf20Sopenharmony_ci if (err <= 0) { 18878c2ecf20Sopenharmony_ci inode_unlock(inode); 18888c2ecf20Sopenharmony_ci return err; 18898c2ecf20Sopenharmony_ci } 18908c2ecf20Sopenharmony_ci 18918c2ecf20Sopenharmony_ci pos = iocb->ki_pos; 18928c2ecf20Sopenharmony_ci count = iov_iter_count(from); 18938c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) { 18948c2ecf20Sopenharmony_ci size_t nocow_bytes = count; 18958c2ecf20Sopenharmony_ci 18968c2ecf20Sopenharmony_ci /* 18978c2ecf20Sopenharmony_ci * We will allocate space in case nodatacow is not set, 18988c2ecf20Sopenharmony_ci * so bail 18998c2ecf20Sopenharmony_ci */ 19008c2ecf20Sopenharmony_ci if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes) 19018c2ecf20Sopenharmony_ci <= 0) { 19028c2ecf20Sopenharmony_ci inode_unlock(inode); 19038c2ecf20Sopenharmony_ci return -EAGAIN; 19048c2ecf20Sopenharmony_ci } 19058c2ecf20Sopenharmony_ci /* 19068c2ecf20Sopenharmony_ci * There are holes in the range or parts of the range that must 19078c2ecf20Sopenharmony_ci * be COWed (shared extents, RO block groups, etc), so just bail 19088c2ecf20Sopenharmony_ci * out. 19098c2ecf20Sopenharmony_ci */ 19108c2ecf20Sopenharmony_ci if (nocow_bytes < count) { 19118c2ecf20Sopenharmony_ci inode_unlock(inode); 19128c2ecf20Sopenharmony_ci return -EAGAIN; 19138c2ecf20Sopenharmony_ci } 19148c2ecf20Sopenharmony_ci } 19158c2ecf20Sopenharmony_ci 19168c2ecf20Sopenharmony_ci current->backing_dev_info = inode_to_bdi(inode); 19178c2ecf20Sopenharmony_ci err = file_remove_privs(file); 19188c2ecf20Sopenharmony_ci if (err) { 19198c2ecf20Sopenharmony_ci inode_unlock(inode); 19208c2ecf20Sopenharmony_ci goto out; 19218c2ecf20Sopenharmony_ci } 19228c2ecf20Sopenharmony_ci 19238c2ecf20Sopenharmony_ci /* 19248c2ecf20Sopenharmony_ci * If BTRFS flips readonly due to some impossible error 19258c2ecf20Sopenharmony_ci * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), 19268c2ecf20Sopenharmony_ci * although we have opened a file as writable, we have 19278c2ecf20Sopenharmony_ci * to stop this write operation to ensure FS consistency. 19288c2ecf20Sopenharmony_ci */ 19298c2ecf20Sopenharmony_ci if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { 19308c2ecf20Sopenharmony_ci inode_unlock(inode); 19318c2ecf20Sopenharmony_ci err = -EROFS; 19328c2ecf20Sopenharmony_ci goto out; 19338c2ecf20Sopenharmony_ci } 19348c2ecf20Sopenharmony_ci 19358c2ecf20Sopenharmony_ci /* 19368c2ecf20Sopenharmony_ci * We reserve space for updating the inode when we reserve space for the 19378c2ecf20Sopenharmony_ci * extent we are going to write, so we will enospc out there. We don't 19388c2ecf20Sopenharmony_ci * need to start yet another transaction to update the inode as we will 19398c2ecf20Sopenharmony_ci * update the inode when we finish writing whatever data we write. 19408c2ecf20Sopenharmony_ci */ 19418c2ecf20Sopenharmony_ci update_time_for_write(inode); 19428c2ecf20Sopenharmony_ci 19438c2ecf20Sopenharmony_ci start_pos = round_down(pos, fs_info->sectorsize); 19448c2ecf20Sopenharmony_ci oldsize = i_size_read(inode); 19458c2ecf20Sopenharmony_ci if (start_pos > oldsize) { 19468c2ecf20Sopenharmony_ci /* Expand hole size to cover write data, preventing empty gap */ 19478c2ecf20Sopenharmony_ci end_pos = round_up(pos + count, 19488c2ecf20Sopenharmony_ci fs_info->sectorsize); 19498c2ecf20Sopenharmony_ci err = btrfs_cont_expand(inode, oldsize, end_pos); 19508c2ecf20Sopenharmony_ci if (err) { 19518c2ecf20Sopenharmony_ci inode_unlock(inode); 19528c2ecf20Sopenharmony_ci goto out; 19538c2ecf20Sopenharmony_ci } 19548c2ecf20Sopenharmony_ci if (start_pos > round_up(oldsize, fs_info->sectorsize)) 19558c2ecf20Sopenharmony_ci clean_page = 1; 19568c2ecf20Sopenharmony_ci } 19578c2ecf20Sopenharmony_ci 19588c2ecf20Sopenharmony_ci if (sync) 19598c2ecf20Sopenharmony_ci atomic_inc(&BTRFS_I(inode)->sync_writers); 19608c2ecf20Sopenharmony_ci 19618c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_DIRECT) { 19628c2ecf20Sopenharmony_ci /* 19638c2ecf20Sopenharmony_ci * 1. We must always clear IOCB_DSYNC in order to not deadlock 19648c2ecf20Sopenharmony_ci * in iomap, as it calls generic_write_sync() in this case. 19658c2ecf20Sopenharmony_ci * 2. If we are async, we can call iomap_dio_complete() either 19668c2ecf20Sopenharmony_ci * in 19678c2ecf20Sopenharmony_ci * 19688c2ecf20Sopenharmony_ci * 2.1. A worker thread from the last bio completed. In this 19698c2ecf20Sopenharmony_ci * case we need to mark the btrfs_dio_data that it is 19708c2ecf20Sopenharmony_ci * async in order to call generic_write_sync() properly. 19718c2ecf20Sopenharmony_ci * This is handled by setting BTRFS_DIO_SYNC_STUB in the 19728c2ecf20Sopenharmony_ci * current->journal_info. 19738c2ecf20Sopenharmony_ci * 2.2 The submitter context, because all IO completed 19748c2ecf20Sopenharmony_ci * before we exited iomap_dio_rw(). In this case we can 19758c2ecf20Sopenharmony_ci * just re-set the IOCB_DSYNC on the iocb and we'll do 19768c2ecf20Sopenharmony_ci * the sync below. If our ->end_io() gets called and 19778c2ecf20Sopenharmony_ci * current->journal_info is set, then we know we're in 19788c2ecf20Sopenharmony_ci * our current context and we will clear 19798c2ecf20Sopenharmony_ci * current->journal_info to indicate that we need to 19808c2ecf20Sopenharmony_ci * sync below. 19818c2ecf20Sopenharmony_ci */ 19828c2ecf20Sopenharmony_ci if (sync) { 19838c2ecf20Sopenharmony_ci ASSERT(current->journal_info == NULL); 19848c2ecf20Sopenharmony_ci iocb->ki_flags &= ~IOCB_DSYNC; 19858c2ecf20Sopenharmony_ci current->journal_info = BTRFS_DIO_SYNC_STUB; 19868c2ecf20Sopenharmony_ci } 19878c2ecf20Sopenharmony_ci num_written = __btrfs_direct_write(iocb, from); 19888c2ecf20Sopenharmony_ci 19898c2ecf20Sopenharmony_ci /* 19908c2ecf20Sopenharmony_ci * As stated above, we cleared journal_info, so we need to do 19918c2ecf20Sopenharmony_ci * the sync ourselves. 19928c2ecf20Sopenharmony_ci */ 19938c2ecf20Sopenharmony_ci if (sync && current->journal_info == NULL) 19948c2ecf20Sopenharmony_ci iocb->ki_flags |= IOCB_DSYNC; 19958c2ecf20Sopenharmony_ci current->journal_info = NULL; 19968c2ecf20Sopenharmony_ci } else { 19978c2ecf20Sopenharmony_ci num_written = btrfs_buffered_write(iocb, from); 19988c2ecf20Sopenharmony_ci if (num_written > 0) 19998c2ecf20Sopenharmony_ci iocb->ki_pos = pos + num_written; 20008c2ecf20Sopenharmony_ci if (clean_page) 20018c2ecf20Sopenharmony_ci pagecache_isize_extended(inode, oldsize, 20028c2ecf20Sopenharmony_ci i_size_read(inode)); 20038c2ecf20Sopenharmony_ci } 20048c2ecf20Sopenharmony_ci 20058c2ecf20Sopenharmony_ci inode_unlock(inode); 20068c2ecf20Sopenharmony_ci 20078c2ecf20Sopenharmony_ci btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); 20088c2ecf20Sopenharmony_ci 20098c2ecf20Sopenharmony_ci if (num_written > 0) 20108c2ecf20Sopenharmony_ci num_written = generic_write_sync(iocb, num_written); 20118c2ecf20Sopenharmony_ci 20128c2ecf20Sopenharmony_ci if (sync) 20138c2ecf20Sopenharmony_ci atomic_dec(&BTRFS_I(inode)->sync_writers); 20148c2ecf20Sopenharmony_ciout: 20158c2ecf20Sopenharmony_ci current->backing_dev_info = NULL; 20168c2ecf20Sopenharmony_ci return num_written ? num_written : err; 20178c2ecf20Sopenharmony_ci} 20188c2ecf20Sopenharmony_ci 20198c2ecf20Sopenharmony_ciint btrfs_release_file(struct inode *inode, struct file *filp) 20208c2ecf20Sopenharmony_ci{ 20218c2ecf20Sopenharmony_ci struct btrfs_file_private *private = filp->private_data; 20228c2ecf20Sopenharmony_ci 20238c2ecf20Sopenharmony_ci if (private && private->filldir_buf) 20248c2ecf20Sopenharmony_ci kfree(private->filldir_buf); 20258c2ecf20Sopenharmony_ci kfree(private); 20268c2ecf20Sopenharmony_ci filp->private_data = NULL; 20278c2ecf20Sopenharmony_ci 20288c2ecf20Sopenharmony_ci /* 20298c2ecf20Sopenharmony_ci * Set by setattr when we are about to truncate a file from a non-zero 20308c2ecf20Sopenharmony_ci * size to a zero size. This tries to flush down new bytes that may 20318c2ecf20Sopenharmony_ci * have been written if the application were using truncate to replace 20328c2ecf20Sopenharmony_ci * a file in place. 20338c2ecf20Sopenharmony_ci */ 20348c2ecf20Sopenharmony_ci if (test_and_clear_bit(BTRFS_INODE_FLUSH_ON_CLOSE, 20358c2ecf20Sopenharmony_ci &BTRFS_I(inode)->runtime_flags)) 20368c2ecf20Sopenharmony_ci filemap_flush(inode->i_mapping); 20378c2ecf20Sopenharmony_ci return 0; 20388c2ecf20Sopenharmony_ci} 20398c2ecf20Sopenharmony_ci 20408c2ecf20Sopenharmony_cistatic int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) 20418c2ecf20Sopenharmony_ci{ 20428c2ecf20Sopenharmony_ci int ret; 20438c2ecf20Sopenharmony_ci struct blk_plug plug; 20448c2ecf20Sopenharmony_ci 20458c2ecf20Sopenharmony_ci /* 20468c2ecf20Sopenharmony_ci * This is only called in fsync, which would do synchronous writes, so 20478c2ecf20Sopenharmony_ci * a plug can merge adjacent IOs as much as possible. Esp. in case of 20488c2ecf20Sopenharmony_ci * multiple disks using raid profile, a large IO can be split to 20498c2ecf20Sopenharmony_ci * several segments of stripe length (currently 64K). 20508c2ecf20Sopenharmony_ci */ 20518c2ecf20Sopenharmony_ci blk_start_plug(&plug); 20528c2ecf20Sopenharmony_ci atomic_inc(&BTRFS_I(inode)->sync_writers); 20538c2ecf20Sopenharmony_ci ret = btrfs_fdatawrite_range(inode, start, end); 20548c2ecf20Sopenharmony_ci atomic_dec(&BTRFS_I(inode)->sync_writers); 20558c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 20568c2ecf20Sopenharmony_ci 20578c2ecf20Sopenharmony_ci return ret; 20588c2ecf20Sopenharmony_ci} 20598c2ecf20Sopenharmony_ci 20608c2ecf20Sopenharmony_cistatic inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) 20618c2ecf20Sopenharmony_ci{ 20628c2ecf20Sopenharmony_ci struct btrfs_inode *inode = BTRFS_I(ctx->inode); 20638c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 20648c2ecf20Sopenharmony_ci 20658c2ecf20Sopenharmony_ci if (btrfs_inode_in_log(inode, fs_info->generation) && 20668c2ecf20Sopenharmony_ci list_empty(&ctx->ordered_extents)) 20678c2ecf20Sopenharmony_ci return true; 20688c2ecf20Sopenharmony_ci 20698c2ecf20Sopenharmony_ci /* 20708c2ecf20Sopenharmony_ci * If we are doing a fast fsync we can not bail out if the inode's 20718c2ecf20Sopenharmony_ci * last_trans is <= then the last committed transaction, because we only 20728c2ecf20Sopenharmony_ci * update the last_trans of the inode during ordered extent completion, 20738c2ecf20Sopenharmony_ci * and for a fast fsync we don't wait for that, we only wait for the 20748c2ecf20Sopenharmony_ci * writeback to complete. 20758c2ecf20Sopenharmony_ci */ 20768c2ecf20Sopenharmony_ci if (inode->last_trans <= fs_info->last_trans_committed && 20778c2ecf20Sopenharmony_ci (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) || 20788c2ecf20Sopenharmony_ci list_empty(&ctx->ordered_extents))) 20798c2ecf20Sopenharmony_ci return true; 20808c2ecf20Sopenharmony_ci 20818c2ecf20Sopenharmony_ci return false; 20828c2ecf20Sopenharmony_ci} 20838c2ecf20Sopenharmony_ci 20848c2ecf20Sopenharmony_ci/* 20858c2ecf20Sopenharmony_ci * fsync call for both files and directories. This logs the inode into 20868c2ecf20Sopenharmony_ci * the tree log instead of forcing full commits whenever possible. 20878c2ecf20Sopenharmony_ci * 20888c2ecf20Sopenharmony_ci * It needs to call filemap_fdatawait so that all ordered extent updates are 20898c2ecf20Sopenharmony_ci * in the metadata btree are up to date for copying to the log. 20908c2ecf20Sopenharmony_ci * 20918c2ecf20Sopenharmony_ci * It drops the inode mutex before doing the tree log commit. This is an 20928c2ecf20Sopenharmony_ci * important optimization for directories because holding the mutex prevents 20938c2ecf20Sopenharmony_ci * new operations on the dir while we write to disk. 20948c2ecf20Sopenharmony_ci */ 20958c2ecf20Sopenharmony_ciint btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 20968c2ecf20Sopenharmony_ci{ 20978c2ecf20Sopenharmony_ci struct dentry *dentry = file_dentry(file); 20988c2ecf20Sopenharmony_ci struct inode *inode = d_inode(dentry); 20998c2ecf20Sopenharmony_ci struct btrfs_root *root = BTRFS_I(inode)->root; 21008c2ecf20Sopenharmony_ci struct btrfs_trans_handle *trans; 21018c2ecf20Sopenharmony_ci struct btrfs_log_ctx ctx; 21028c2ecf20Sopenharmony_ci int ret = 0, err; 21038c2ecf20Sopenharmony_ci u64 len; 21048c2ecf20Sopenharmony_ci bool full_sync; 21058c2ecf20Sopenharmony_ci 21068c2ecf20Sopenharmony_ci trace_btrfs_sync_file(file, datasync); 21078c2ecf20Sopenharmony_ci 21088c2ecf20Sopenharmony_ci btrfs_init_log_ctx(&ctx, inode); 21098c2ecf20Sopenharmony_ci 21108c2ecf20Sopenharmony_ci /* 21118c2ecf20Sopenharmony_ci * Always set the range to a full range, otherwise we can get into 21128c2ecf20Sopenharmony_ci * several problems, from missing file extent items to represent holes 21138c2ecf20Sopenharmony_ci * when not using the NO_HOLES feature, to log tree corruption due to 21148c2ecf20Sopenharmony_ci * races between hole detection during logging and completion of ordered 21158c2ecf20Sopenharmony_ci * extents outside the range, to missing checksums due to ordered extents 21168c2ecf20Sopenharmony_ci * for which we flushed only a subset of their pages. 21178c2ecf20Sopenharmony_ci */ 21188c2ecf20Sopenharmony_ci start = 0; 21198c2ecf20Sopenharmony_ci end = LLONG_MAX; 21208c2ecf20Sopenharmony_ci len = (u64)LLONG_MAX + 1; 21218c2ecf20Sopenharmony_ci 21228c2ecf20Sopenharmony_ci /* 21238c2ecf20Sopenharmony_ci * We write the dirty pages in the range and wait until they complete 21248c2ecf20Sopenharmony_ci * out of the ->i_mutex. If so, we can flush the dirty pages by 21258c2ecf20Sopenharmony_ci * multi-task, and make the performance up. See 21268c2ecf20Sopenharmony_ci * btrfs_wait_ordered_range for an explanation of the ASYNC check. 21278c2ecf20Sopenharmony_ci */ 21288c2ecf20Sopenharmony_ci ret = start_ordered_ops(inode, start, end); 21298c2ecf20Sopenharmony_ci if (ret) 21308c2ecf20Sopenharmony_ci goto out; 21318c2ecf20Sopenharmony_ci 21328c2ecf20Sopenharmony_ci inode_lock(inode); 21338c2ecf20Sopenharmony_ci 21348c2ecf20Sopenharmony_ci /* 21358c2ecf20Sopenharmony_ci * We take the dio_sem here because the tree log stuff can race with 21368c2ecf20Sopenharmony_ci * lockless dio writes and get an extent map logged for an extent we 21378c2ecf20Sopenharmony_ci * never waited on. We need it this high up for lockdep reasons. 21388c2ecf20Sopenharmony_ci */ 21398c2ecf20Sopenharmony_ci down_write(&BTRFS_I(inode)->dio_sem); 21408c2ecf20Sopenharmony_ci 21418c2ecf20Sopenharmony_ci atomic_inc(&root->log_batch); 21428c2ecf20Sopenharmony_ci 21438c2ecf20Sopenharmony_ci /* 21448c2ecf20Sopenharmony_ci * Always check for the full sync flag while holding the inode's lock, 21458c2ecf20Sopenharmony_ci * to avoid races with other tasks. The flag must be either set all the 21468c2ecf20Sopenharmony_ci * time during logging or always off all the time while logging. 21478c2ecf20Sopenharmony_ci */ 21488c2ecf20Sopenharmony_ci full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 21498c2ecf20Sopenharmony_ci &BTRFS_I(inode)->runtime_flags); 21508c2ecf20Sopenharmony_ci 21518c2ecf20Sopenharmony_ci /* 21528c2ecf20Sopenharmony_ci * Before we acquired the inode's lock, someone may have dirtied more 21538c2ecf20Sopenharmony_ci * pages in the target range. We need to make sure that writeback for 21548c2ecf20Sopenharmony_ci * any such pages does not start while we are logging the inode, because 21558c2ecf20Sopenharmony_ci * if it does, any of the following might happen when we are not doing a 21568c2ecf20Sopenharmony_ci * full inode sync: 21578c2ecf20Sopenharmony_ci * 21588c2ecf20Sopenharmony_ci * 1) We log an extent after its writeback finishes but before its 21598c2ecf20Sopenharmony_ci * checksums are added to the csum tree, leading to -EIO errors 21608c2ecf20Sopenharmony_ci * when attempting to read the extent after a log replay. 21618c2ecf20Sopenharmony_ci * 21628c2ecf20Sopenharmony_ci * 2) We can end up logging an extent before its writeback finishes. 21638c2ecf20Sopenharmony_ci * Therefore after the log replay we will have a file extent item 21648c2ecf20Sopenharmony_ci * pointing to an unwritten extent (and no data checksums as well). 21658c2ecf20Sopenharmony_ci * 21668c2ecf20Sopenharmony_ci * So trigger writeback for any eventual new dirty pages and then we 21678c2ecf20Sopenharmony_ci * wait for all ordered extents to complete below. 21688c2ecf20Sopenharmony_ci */ 21698c2ecf20Sopenharmony_ci ret = start_ordered_ops(inode, start, end); 21708c2ecf20Sopenharmony_ci if (ret) { 21718c2ecf20Sopenharmony_ci up_write(&BTRFS_I(inode)->dio_sem); 21728c2ecf20Sopenharmony_ci inode_unlock(inode); 21738c2ecf20Sopenharmony_ci goto out; 21748c2ecf20Sopenharmony_ci } 21758c2ecf20Sopenharmony_ci 21768c2ecf20Sopenharmony_ci /* 21778c2ecf20Sopenharmony_ci * We have to do this here to avoid the priority inversion of waiting on 21788c2ecf20Sopenharmony_ci * IO of a lower priority task while holding a transaction open. 21798c2ecf20Sopenharmony_ci * 21808c2ecf20Sopenharmony_ci * For a full fsync we wait for the ordered extents to complete while 21818c2ecf20Sopenharmony_ci * for a fast fsync we wait just for writeback to complete, and then 21828c2ecf20Sopenharmony_ci * attach the ordered extents to the transaction so that a transaction 21838c2ecf20Sopenharmony_ci * commit waits for their completion, to avoid data loss if we fsync, 21848c2ecf20Sopenharmony_ci * the current transaction commits before the ordered extents complete 21858c2ecf20Sopenharmony_ci * and a power failure happens right after that. 21868c2ecf20Sopenharmony_ci */ 21878c2ecf20Sopenharmony_ci if (full_sync) { 21888c2ecf20Sopenharmony_ci ret = btrfs_wait_ordered_range(inode, start, len); 21898c2ecf20Sopenharmony_ci } else { 21908c2ecf20Sopenharmony_ci /* 21918c2ecf20Sopenharmony_ci * Get our ordered extents as soon as possible to avoid doing 21928c2ecf20Sopenharmony_ci * checksum lookups in the csum tree, and use instead the 21938c2ecf20Sopenharmony_ci * checksums attached to the ordered extents. 21948c2ecf20Sopenharmony_ci */ 21958c2ecf20Sopenharmony_ci btrfs_get_ordered_extents_for_logging(BTRFS_I(inode), 21968c2ecf20Sopenharmony_ci &ctx.ordered_extents); 21978c2ecf20Sopenharmony_ci ret = filemap_fdatawait_range(inode->i_mapping, start, end); 21988c2ecf20Sopenharmony_ci } 21998c2ecf20Sopenharmony_ci 22008c2ecf20Sopenharmony_ci if (ret) 22018c2ecf20Sopenharmony_ci goto out_release_extents; 22028c2ecf20Sopenharmony_ci 22038c2ecf20Sopenharmony_ci atomic_inc(&root->log_batch); 22048c2ecf20Sopenharmony_ci 22058c2ecf20Sopenharmony_ci smp_mb(); 22068c2ecf20Sopenharmony_ci if (skip_inode_logging(&ctx)) { 22078c2ecf20Sopenharmony_ci /* 22088c2ecf20Sopenharmony_ci * We've had everything committed since the last time we were 22098c2ecf20Sopenharmony_ci * modified so clear this flag in case it was set for whatever 22108c2ecf20Sopenharmony_ci * reason, it's no longer relevant. 22118c2ecf20Sopenharmony_ci */ 22128c2ecf20Sopenharmony_ci clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 22138c2ecf20Sopenharmony_ci &BTRFS_I(inode)->runtime_flags); 22148c2ecf20Sopenharmony_ci /* 22158c2ecf20Sopenharmony_ci * An ordered extent might have started before and completed 22168c2ecf20Sopenharmony_ci * already with io errors, in which case the inode was not 22178c2ecf20Sopenharmony_ci * updated and we end up here. So check the inode's mapping 22188c2ecf20Sopenharmony_ci * for any errors that might have happened since we last 22198c2ecf20Sopenharmony_ci * checked called fsync. 22208c2ecf20Sopenharmony_ci */ 22218c2ecf20Sopenharmony_ci ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err); 22228c2ecf20Sopenharmony_ci goto out_release_extents; 22238c2ecf20Sopenharmony_ci } 22248c2ecf20Sopenharmony_ci 22258c2ecf20Sopenharmony_ci /* 22268c2ecf20Sopenharmony_ci * We use start here because we will need to wait on the IO to complete 22278c2ecf20Sopenharmony_ci * in btrfs_sync_log, which could require joining a transaction (for 22288c2ecf20Sopenharmony_ci * example checking cross references in the nocow path). If we use join 22298c2ecf20Sopenharmony_ci * here we could get into a situation where we're waiting on IO to 22308c2ecf20Sopenharmony_ci * happen that is blocked on a transaction trying to commit. With start 22318c2ecf20Sopenharmony_ci * we inc the extwriter counter, so we wait for all extwriters to exit 22328c2ecf20Sopenharmony_ci * before we start blocking joiners. This comment is to keep somebody 22338c2ecf20Sopenharmony_ci * from thinking they are super smart and changing this to 22348c2ecf20Sopenharmony_ci * btrfs_join_transaction *cough*Josef*cough*. 22358c2ecf20Sopenharmony_ci */ 22368c2ecf20Sopenharmony_ci trans = btrfs_start_transaction(root, 0); 22378c2ecf20Sopenharmony_ci if (IS_ERR(trans)) { 22388c2ecf20Sopenharmony_ci ret = PTR_ERR(trans); 22398c2ecf20Sopenharmony_ci goto out_release_extents; 22408c2ecf20Sopenharmony_ci } 22418c2ecf20Sopenharmony_ci 22428c2ecf20Sopenharmony_ci ret = btrfs_log_dentry_safe(trans, dentry, &ctx); 22438c2ecf20Sopenharmony_ci btrfs_release_log_ctx_extents(&ctx); 22448c2ecf20Sopenharmony_ci if (ret < 0) { 22458c2ecf20Sopenharmony_ci /* Fallthrough and commit/free transaction. */ 22468c2ecf20Sopenharmony_ci ret = 1; 22478c2ecf20Sopenharmony_ci } 22488c2ecf20Sopenharmony_ci 22498c2ecf20Sopenharmony_ci /* we've logged all the items and now have a consistent 22508c2ecf20Sopenharmony_ci * version of the file in the log. It is possible that 22518c2ecf20Sopenharmony_ci * someone will come in and modify the file, but that's 22528c2ecf20Sopenharmony_ci * fine because the log is consistent on disk, and we 22538c2ecf20Sopenharmony_ci * have references to all of the file's extents 22548c2ecf20Sopenharmony_ci * 22558c2ecf20Sopenharmony_ci * It is possible that someone will come in and log the 22568c2ecf20Sopenharmony_ci * file again, but that will end up using the synchronization 22578c2ecf20Sopenharmony_ci * inside btrfs_sync_log to keep things safe. 22588c2ecf20Sopenharmony_ci */ 22598c2ecf20Sopenharmony_ci up_write(&BTRFS_I(inode)->dio_sem); 22608c2ecf20Sopenharmony_ci inode_unlock(inode); 22618c2ecf20Sopenharmony_ci 22628c2ecf20Sopenharmony_ci if (ret != BTRFS_NO_LOG_SYNC) { 22638c2ecf20Sopenharmony_ci if (!ret) { 22648c2ecf20Sopenharmony_ci ret = btrfs_sync_log(trans, root, &ctx); 22658c2ecf20Sopenharmony_ci if (!ret) { 22668c2ecf20Sopenharmony_ci ret = btrfs_end_transaction(trans); 22678c2ecf20Sopenharmony_ci goto out; 22688c2ecf20Sopenharmony_ci } 22698c2ecf20Sopenharmony_ci } 22708c2ecf20Sopenharmony_ci if (!full_sync) { 22718c2ecf20Sopenharmony_ci ret = btrfs_wait_ordered_range(inode, start, len); 22728c2ecf20Sopenharmony_ci if (ret) { 22738c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 22748c2ecf20Sopenharmony_ci goto out; 22758c2ecf20Sopenharmony_ci } 22768c2ecf20Sopenharmony_ci } 22778c2ecf20Sopenharmony_ci ret = btrfs_commit_transaction(trans); 22788c2ecf20Sopenharmony_ci } else { 22798c2ecf20Sopenharmony_ci ret = btrfs_end_transaction(trans); 22808c2ecf20Sopenharmony_ci } 22818c2ecf20Sopenharmony_ciout: 22828c2ecf20Sopenharmony_ci ASSERT(list_empty(&ctx.list)); 22838c2ecf20Sopenharmony_ci err = file_check_and_advance_wb_err(file); 22848c2ecf20Sopenharmony_ci if (!ret) 22858c2ecf20Sopenharmony_ci ret = err; 22868c2ecf20Sopenharmony_ci return ret > 0 ? -EIO : ret; 22878c2ecf20Sopenharmony_ci 22888c2ecf20Sopenharmony_ciout_release_extents: 22898c2ecf20Sopenharmony_ci btrfs_release_log_ctx_extents(&ctx); 22908c2ecf20Sopenharmony_ci up_write(&BTRFS_I(inode)->dio_sem); 22918c2ecf20Sopenharmony_ci inode_unlock(inode); 22928c2ecf20Sopenharmony_ci goto out; 22938c2ecf20Sopenharmony_ci} 22948c2ecf20Sopenharmony_ci 22958c2ecf20Sopenharmony_cistatic const struct vm_operations_struct btrfs_file_vm_ops = { 22968c2ecf20Sopenharmony_ci .fault = filemap_fault, 22978c2ecf20Sopenharmony_ci .map_pages = filemap_map_pages, 22988c2ecf20Sopenharmony_ci .page_mkwrite = btrfs_page_mkwrite, 22998c2ecf20Sopenharmony_ci}; 23008c2ecf20Sopenharmony_ci 23018c2ecf20Sopenharmony_cistatic int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) 23028c2ecf20Sopenharmony_ci{ 23038c2ecf20Sopenharmony_ci struct address_space *mapping = filp->f_mapping; 23048c2ecf20Sopenharmony_ci 23058c2ecf20Sopenharmony_ci if (!mapping->a_ops->readpage) 23068c2ecf20Sopenharmony_ci return -ENOEXEC; 23078c2ecf20Sopenharmony_ci 23088c2ecf20Sopenharmony_ci file_accessed(filp); 23098c2ecf20Sopenharmony_ci vma->vm_ops = &btrfs_file_vm_ops; 23108c2ecf20Sopenharmony_ci 23118c2ecf20Sopenharmony_ci return 0; 23128c2ecf20Sopenharmony_ci} 23138c2ecf20Sopenharmony_ci 23148c2ecf20Sopenharmony_cistatic int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf, 23158c2ecf20Sopenharmony_ci int slot, u64 start, u64 end) 23168c2ecf20Sopenharmony_ci{ 23178c2ecf20Sopenharmony_ci struct btrfs_file_extent_item *fi; 23188c2ecf20Sopenharmony_ci struct btrfs_key key; 23198c2ecf20Sopenharmony_ci 23208c2ecf20Sopenharmony_ci if (slot < 0 || slot >= btrfs_header_nritems(leaf)) 23218c2ecf20Sopenharmony_ci return 0; 23228c2ecf20Sopenharmony_ci 23238c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &key, slot); 23248c2ecf20Sopenharmony_ci if (key.objectid != btrfs_ino(inode) || 23258c2ecf20Sopenharmony_ci key.type != BTRFS_EXTENT_DATA_KEY) 23268c2ecf20Sopenharmony_ci return 0; 23278c2ecf20Sopenharmony_ci 23288c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 23298c2ecf20Sopenharmony_ci 23308c2ecf20Sopenharmony_ci if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) 23318c2ecf20Sopenharmony_ci return 0; 23328c2ecf20Sopenharmony_ci 23338c2ecf20Sopenharmony_ci if (btrfs_file_extent_disk_bytenr(leaf, fi)) 23348c2ecf20Sopenharmony_ci return 0; 23358c2ecf20Sopenharmony_ci 23368c2ecf20Sopenharmony_ci if (key.offset == end) 23378c2ecf20Sopenharmony_ci return 1; 23388c2ecf20Sopenharmony_ci if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start) 23398c2ecf20Sopenharmony_ci return 1; 23408c2ecf20Sopenharmony_ci return 0; 23418c2ecf20Sopenharmony_ci} 23428c2ecf20Sopenharmony_ci 23438c2ecf20Sopenharmony_cistatic int fill_holes(struct btrfs_trans_handle *trans, 23448c2ecf20Sopenharmony_ci struct btrfs_inode *inode, 23458c2ecf20Sopenharmony_ci struct btrfs_path *path, u64 offset, u64 end) 23468c2ecf20Sopenharmony_ci{ 23478c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 23488c2ecf20Sopenharmony_ci struct btrfs_root *root = inode->root; 23498c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 23508c2ecf20Sopenharmony_ci struct btrfs_file_extent_item *fi; 23518c2ecf20Sopenharmony_ci struct extent_map *hole_em; 23528c2ecf20Sopenharmony_ci struct extent_map_tree *em_tree = &inode->extent_tree; 23538c2ecf20Sopenharmony_ci struct btrfs_key key; 23548c2ecf20Sopenharmony_ci int ret; 23558c2ecf20Sopenharmony_ci 23568c2ecf20Sopenharmony_ci if (btrfs_fs_incompat(fs_info, NO_HOLES)) 23578c2ecf20Sopenharmony_ci goto out; 23588c2ecf20Sopenharmony_ci 23598c2ecf20Sopenharmony_ci key.objectid = btrfs_ino(inode); 23608c2ecf20Sopenharmony_ci key.type = BTRFS_EXTENT_DATA_KEY; 23618c2ecf20Sopenharmony_ci key.offset = offset; 23628c2ecf20Sopenharmony_ci 23638c2ecf20Sopenharmony_ci ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 23648c2ecf20Sopenharmony_ci if (ret <= 0) { 23658c2ecf20Sopenharmony_ci /* 23668c2ecf20Sopenharmony_ci * We should have dropped this offset, so if we find it then 23678c2ecf20Sopenharmony_ci * something has gone horribly wrong. 23688c2ecf20Sopenharmony_ci */ 23698c2ecf20Sopenharmony_ci if (ret == 0) 23708c2ecf20Sopenharmony_ci ret = -EINVAL; 23718c2ecf20Sopenharmony_ci return ret; 23728c2ecf20Sopenharmony_ci } 23738c2ecf20Sopenharmony_ci 23748c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 23758c2ecf20Sopenharmony_ci if (hole_mergeable(inode, leaf, path->slots[0] - 1, offset, end)) { 23768c2ecf20Sopenharmony_ci u64 num_bytes; 23778c2ecf20Sopenharmony_ci 23788c2ecf20Sopenharmony_ci path->slots[0]--; 23798c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 23808c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 23818c2ecf20Sopenharmony_ci num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + 23828c2ecf20Sopenharmony_ci end - offset; 23838c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); 23848c2ecf20Sopenharmony_ci btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); 23858c2ecf20Sopenharmony_ci btrfs_set_file_extent_offset(leaf, fi, 0); 23868c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 23878c2ecf20Sopenharmony_ci goto out; 23888c2ecf20Sopenharmony_ci } 23898c2ecf20Sopenharmony_ci 23908c2ecf20Sopenharmony_ci if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) { 23918c2ecf20Sopenharmony_ci u64 num_bytes; 23928c2ecf20Sopenharmony_ci 23938c2ecf20Sopenharmony_ci key.offset = offset; 23948c2ecf20Sopenharmony_ci btrfs_set_item_key_safe(fs_info, path, &key); 23958c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 23968c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 23978c2ecf20Sopenharmony_ci num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - 23988c2ecf20Sopenharmony_ci offset; 23998c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); 24008c2ecf20Sopenharmony_ci btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); 24018c2ecf20Sopenharmony_ci btrfs_set_file_extent_offset(leaf, fi, 0); 24028c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 24038c2ecf20Sopenharmony_ci goto out; 24048c2ecf20Sopenharmony_ci } 24058c2ecf20Sopenharmony_ci btrfs_release_path(path); 24068c2ecf20Sopenharmony_ci 24078c2ecf20Sopenharmony_ci ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), 24088c2ecf20Sopenharmony_ci offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0); 24098c2ecf20Sopenharmony_ci if (ret) 24108c2ecf20Sopenharmony_ci return ret; 24118c2ecf20Sopenharmony_ci 24128c2ecf20Sopenharmony_ciout: 24138c2ecf20Sopenharmony_ci btrfs_release_path(path); 24148c2ecf20Sopenharmony_ci 24158c2ecf20Sopenharmony_ci hole_em = alloc_extent_map(); 24168c2ecf20Sopenharmony_ci if (!hole_em) { 24178c2ecf20Sopenharmony_ci btrfs_drop_extent_cache(inode, offset, end - 1, 0); 24188c2ecf20Sopenharmony_ci set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); 24198c2ecf20Sopenharmony_ci } else { 24208c2ecf20Sopenharmony_ci hole_em->start = offset; 24218c2ecf20Sopenharmony_ci hole_em->len = end - offset; 24228c2ecf20Sopenharmony_ci hole_em->ram_bytes = hole_em->len; 24238c2ecf20Sopenharmony_ci hole_em->orig_start = offset; 24248c2ecf20Sopenharmony_ci 24258c2ecf20Sopenharmony_ci hole_em->block_start = EXTENT_MAP_HOLE; 24268c2ecf20Sopenharmony_ci hole_em->block_len = 0; 24278c2ecf20Sopenharmony_ci hole_em->orig_block_len = 0; 24288c2ecf20Sopenharmony_ci hole_em->compress_type = BTRFS_COMPRESS_NONE; 24298c2ecf20Sopenharmony_ci hole_em->generation = trans->transid; 24308c2ecf20Sopenharmony_ci 24318c2ecf20Sopenharmony_ci do { 24328c2ecf20Sopenharmony_ci btrfs_drop_extent_cache(inode, offset, end - 1, 0); 24338c2ecf20Sopenharmony_ci write_lock(&em_tree->lock); 24348c2ecf20Sopenharmony_ci ret = add_extent_mapping(em_tree, hole_em, 1); 24358c2ecf20Sopenharmony_ci write_unlock(&em_tree->lock); 24368c2ecf20Sopenharmony_ci } while (ret == -EEXIST); 24378c2ecf20Sopenharmony_ci free_extent_map(hole_em); 24388c2ecf20Sopenharmony_ci if (ret) 24398c2ecf20Sopenharmony_ci set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 24408c2ecf20Sopenharmony_ci &inode->runtime_flags); 24418c2ecf20Sopenharmony_ci } 24428c2ecf20Sopenharmony_ci 24438c2ecf20Sopenharmony_ci return 0; 24448c2ecf20Sopenharmony_ci} 24458c2ecf20Sopenharmony_ci 24468c2ecf20Sopenharmony_ci/* 24478c2ecf20Sopenharmony_ci * Find a hole extent on given inode and change start/len to the end of hole 24488c2ecf20Sopenharmony_ci * extent.(hole/vacuum extent whose em->start <= start && 24498c2ecf20Sopenharmony_ci * em->start + em->len > start) 24508c2ecf20Sopenharmony_ci * When a hole extent is found, return 1 and modify start/len. 24518c2ecf20Sopenharmony_ci */ 24528c2ecf20Sopenharmony_cistatic int find_first_non_hole(struct inode *inode, u64 *start, u64 *len) 24538c2ecf20Sopenharmony_ci{ 24548c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 24558c2ecf20Sopenharmony_ci struct extent_map *em; 24568c2ecf20Sopenharmony_ci int ret = 0; 24578c2ecf20Sopenharmony_ci 24588c2ecf20Sopenharmony_ci em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 24598c2ecf20Sopenharmony_ci round_down(*start, fs_info->sectorsize), 24608c2ecf20Sopenharmony_ci round_up(*len, fs_info->sectorsize)); 24618c2ecf20Sopenharmony_ci if (IS_ERR(em)) 24628c2ecf20Sopenharmony_ci return PTR_ERR(em); 24638c2ecf20Sopenharmony_ci 24648c2ecf20Sopenharmony_ci /* Hole or vacuum extent(only exists in no-hole mode) */ 24658c2ecf20Sopenharmony_ci if (em->block_start == EXTENT_MAP_HOLE) { 24668c2ecf20Sopenharmony_ci ret = 1; 24678c2ecf20Sopenharmony_ci *len = em->start + em->len > *start + *len ? 24688c2ecf20Sopenharmony_ci 0 : *start + *len - em->start - em->len; 24698c2ecf20Sopenharmony_ci *start = em->start + em->len; 24708c2ecf20Sopenharmony_ci } 24718c2ecf20Sopenharmony_ci free_extent_map(em); 24728c2ecf20Sopenharmony_ci return ret; 24738c2ecf20Sopenharmony_ci} 24748c2ecf20Sopenharmony_ci 24758c2ecf20Sopenharmony_cistatic int btrfs_punch_hole_lock_range(struct inode *inode, 24768c2ecf20Sopenharmony_ci const u64 lockstart, 24778c2ecf20Sopenharmony_ci const u64 lockend, 24788c2ecf20Sopenharmony_ci struct extent_state **cached_state) 24798c2ecf20Sopenharmony_ci{ 24808c2ecf20Sopenharmony_ci while (1) { 24818c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *ordered; 24828c2ecf20Sopenharmony_ci int ret; 24838c2ecf20Sopenharmony_ci 24848c2ecf20Sopenharmony_ci truncate_pagecache_range(inode, lockstart, lockend); 24858c2ecf20Sopenharmony_ci 24868c2ecf20Sopenharmony_ci lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 24878c2ecf20Sopenharmony_ci cached_state); 24888c2ecf20Sopenharmony_ci ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), 24898c2ecf20Sopenharmony_ci lockend); 24908c2ecf20Sopenharmony_ci 24918c2ecf20Sopenharmony_ci /* 24928c2ecf20Sopenharmony_ci * We need to make sure we have no ordered extents in this range 24938c2ecf20Sopenharmony_ci * and nobody raced in and read a page in this range, if we did 24948c2ecf20Sopenharmony_ci * we need to try again. 24958c2ecf20Sopenharmony_ci */ 24968c2ecf20Sopenharmony_ci if ((!ordered || 24978c2ecf20Sopenharmony_ci (ordered->file_offset + ordered->num_bytes <= lockstart || 24988c2ecf20Sopenharmony_ci ordered->file_offset > lockend)) && 24998c2ecf20Sopenharmony_ci !filemap_range_has_page(inode->i_mapping, 25008c2ecf20Sopenharmony_ci lockstart, lockend)) { 25018c2ecf20Sopenharmony_ci if (ordered) 25028c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 25038c2ecf20Sopenharmony_ci break; 25048c2ecf20Sopenharmony_ci } 25058c2ecf20Sopenharmony_ci if (ordered) 25068c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 25078c2ecf20Sopenharmony_ci unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, 25088c2ecf20Sopenharmony_ci lockend, cached_state); 25098c2ecf20Sopenharmony_ci ret = btrfs_wait_ordered_range(inode, lockstart, 25108c2ecf20Sopenharmony_ci lockend - lockstart + 1); 25118c2ecf20Sopenharmony_ci if (ret) 25128c2ecf20Sopenharmony_ci return ret; 25138c2ecf20Sopenharmony_ci } 25148c2ecf20Sopenharmony_ci return 0; 25158c2ecf20Sopenharmony_ci} 25168c2ecf20Sopenharmony_ci 25178c2ecf20Sopenharmony_cistatic int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans, 25188c2ecf20Sopenharmony_ci struct inode *inode, 25198c2ecf20Sopenharmony_ci struct btrfs_path *path, 25208c2ecf20Sopenharmony_ci struct btrfs_replace_extent_info *extent_info, 25218c2ecf20Sopenharmony_ci const u64 replace_len) 25228c2ecf20Sopenharmony_ci{ 25238c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 25248c2ecf20Sopenharmony_ci struct btrfs_root *root = BTRFS_I(inode)->root; 25258c2ecf20Sopenharmony_ci struct btrfs_file_extent_item *extent; 25268c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 25278c2ecf20Sopenharmony_ci struct btrfs_key key; 25288c2ecf20Sopenharmony_ci int slot; 25298c2ecf20Sopenharmony_ci struct btrfs_ref ref = { 0 }; 25308c2ecf20Sopenharmony_ci int ret; 25318c2ecf20Sopenharmony_ci 25328c2ecf20Sopenharmony_ci if (replace_len == 0) 25338c2ecf20Sopenharmony_ci return 0; 25348c2ecf20Sopenharmony_ci 25358c2ecf20Sopenharmony_ci if (extent_info->disk_offset == 0 && 25368c2ecf20Sopenharmony_ci btrfs_fs_incompat(fs_info, NO_HOLES)) 25378c2ecf20Sopenharmony_ci return 0; 25388c2ecf20Sopenharmony_ci 25398c2ecf20Sopenharmony_ci key.objectid = btrfs_ino(BTRFS_I(inode)); 25408c2ecf20Sopenharmony_ci key.type = BTRFS_EXTENT_DATA_KEY; 25418c2ecf20Sopenharmony_ci key.offset = extent_info->file_offset; 25428c2ecf20Sopenharmony_ci ret = btrfs_insert_empty_item(trans, root, path, &key, 25438c2ecf20Sopenharmony_ci sizeof(struct btrfs_file_extent_item)); 25448c2ecf20Sopenharmony_ci if (ret) 25458c2ecf20Sopenharmony_ci return ret; 25468c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 25478c2ecf20Sopenharmony_ci slot = path->slots[0]; 25488c2ecf20Sopenharmony_ci write_extent_buffer(leaf, extent_info->extent_buf, 25498c2ecf20Sopenharmony_ci btrfs_item_ptr_offset(leaf, slot), 25508c2ecf20Sopenharmony_ci sizeof(struct btrfs_file_extent_item)); 25518c2ecf20Sopenharmony_ci extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 25528c2ecf20Sopenharmony_ci ASSERT(btrfs_file_extent_type(leaf, extent) != BTRFS_FILE_EXTENT_INLINE); 25538c2ecf20Sopenharmony_ci btrfs_set_file_extent_offset(leaf, extent, extent_info->data_offset); 25548c2ecf20Sopenharmony_ci btrfs_set_file_extent_num_bytes(leaf, extent, replace_len); 25558c2ecf20Sopenharmony_ci if (extent_info->is_new_extent) 25568c2ecf20Sopenharmony_ci btrfs_set_file_extent_generation(leaf, extent, trans->transid); 25578c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 25588c2ecf20Sopenharmony_ci btrfs_release_path(path); 25598c2ecf20Sopenharmony_ci 25608c2ecf20Sopenharmony_ci ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), 25618c2ecf20Sopenharmony_ci extent_info->file_offset, replace_len); 25628c2ecf20Sopenharmony_ci if (ret) 25638c2ecf20Sopenharmony_ci return ret; 25648c2ecf20Sopenharmony_ci 25658c2ecf20Sopenharmony_ci /* If it's a hole, nothing more needs to be done. */ 25668c2ecf20Sopenharmony_ci if (extent_info->disk_offset == 0) 25678c2ecf20Sopenharmony_ci return 0; 25688c2ecf20Sopenharmony_ci 25698c2ecf20Sopenharmony_ci inode_add_bytes(inode, replace_len); 25708c2ecf20Sopenharmony_ci 25718c2ecf20Sopenharmony_ci if (extent_info->is_new_extent && extent_info->insertions == 0) { 25728c2ecf20Sopenharmony_ci key.objectid = extent_info->disk_offset; 25738c2ecf20Sopenharmony_ci key.type = BTRFS_EXTENT_ITEM_KEY; 25748c2ecf20Sopenharmony_ci key.offset = extent_info->disk_len; 25758c2ecf20Sopenharmony_ci ret = btrfs_alloc_reserved_file_extent(trans, root, 25768c2ecf20Sopenharmony_ci btrfs_ino(BTRFS_I(inode)), 25778c2ecf20Sopenharmony_ci extent_info->file_offset, 25788c2ecf20Sopenharmony_ci extent_info->qgroup_reserved, 25798c2ecf20Sopenharmony_ci &key); 25808c2ecf20Sopenharmony_ci } else { 25818c2ecf20Sopenharmony_ci u64 ref_offset; 25828c2ecf20Sopenharmony_ci 25838c2ecf20Sopenharmony_ci btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, 25848c2ecf20Sopenharmony_ci extent_info->disk_offset, 25858c2ecf20Sopenharmony_ci extent_info->disk_len, 0); 25868c2ecf20Sopenharmony_ci ref_offset = extent_info->file_offset - extent_info->data_offset; 25878c2ecf20Sopenharmony_ci btrfs_init_data_ref(&ref, root->root_key.objectid, 25888c2ecf20Sopenharmony_ci btrfs_ino(BTRFS_I(inode)), ref_offset); 25898c2ecf20Sopenharmony_ci ret = btrfs_inc_extent_ref(trans, &ref); 25908c2ecf20Sopenharmony_ci } 25918c2ecf20Sopenharmony_ci 25928c2ecf20Sopenharmony_ci extent_info->insertions++; 25938c2ecf20Sopenharmony_ci 25948c2ecf20Sopenharmony_ci return ret; 25958c2ecf20Sopenharmony_ci} 25968c2ecf20Sopenharmony_ci 25978c2ecf20Sopenharmony_ci/* 25988c2ecf20Sopenharmony_ci * The respective range must have been previously locked, as well as the inode. 25998c2ecf20Sopenharmony_ci * The end offset is inclusive (last byte of the range). 26008c2ecf20Sopenharmony_ci * @extent_info is NULL for fallocate's hole punching and non-NULL when replacing 26018c2ecf20Sopenharmony_ci * the file range with an extent. 26028c2ecf20Sopenharmony_ci * When not punching a hole, we don't want to end up in a state where we dropped 26038c2ecf20Sopenharmony_ci * extents without inserting a new one, so we must abort the transaction to avoid 26048c2ecf20Sopenharmony_ci * a corruption. 26058c2ecf20Sopenharmony_ci */ 26068c2ecf20Sopenharmony_ciint btrfs_replace_file_extents(struct inode *inode, struct btrfs_path *path, 26078c2ecf20Sopenharmony_ci const u64 start, const u64 end, 26088c2ecf20Sopenharmony_ci struct btrfs_replace_extent_info *extent_info, 26098c2ecf20Sopenharmony_ci struct btrfs_trans_handle **trans_out) 26108c2ecf20Sopenharmony_ci{ 26118c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 26128c2ecf20Sopenharmony_ci u64 min_size = btrfs_calc_insert_metadata_size(fs_info, 1); 26138c2ecf20Sopenharmony_ci u64 ino_size = round_up(inode->i_size, fs_info->sectorsize); 26148c2ecf20Sopenharmony_ci struct btrfs_root *root = BTRFS_I(inode)->root; 26158c2ecf20Sopenharmony_ci struct btrfs_trans_handle *trans = NULL; 26168c2ecf20Sopenharmony_ci struct btrfs_block_rsv *rsv; 26178c2ecf20Sopenharmony_ci unsigned int rsv_count; 26188c2ecf20Sopenharmony_ci u64 cur_offset; 26198c2ecf20Sopenharmony_ci u64 drop_end; 26208c2ecf20Sopenharmony_ci u64 len = end - start; 26218c2ecf20Sopenharmony_ci int ret = 0; 26228c2ecf20Sopenharmony_ci 26238c2ecf20Sopenharmony_ci if (end <= start) 26248c2ecf20Sopenharmony_ci return -EINVAL; 26258c2ecf20Sopenharmony_ci 26268c2ecf20Sopenharmony_ci rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP); 26278c2ecf20Sopenharmony_ci if (!rsv) { 26288c2ecf20Sopenharmony_ci ret = -ENOMEM; 26298c2ecf20Sopenharmony_ci goto out; 26308c2ecf20Sopenharmony_ci } 26318c2ecf20Sopenharmony_ci rsv->size = btrfs_calc_insert_metadata_size(fs_info, 1); 26328c2ecf20Sopenharmony_ci rsv->failfast = 1; 26338c2ecf20Sopenharmony_ci 26348c2ecf20Sopenharmony_ci /* 26358c2ecf20Sopenharmony_ci * 1 - update the inode 26368c2ecf20Sopenharmony_ci * 1 - removing the extents in the range 26378c2ecf20Sopenharmony_ci * 1 - adding the hole extent if no_holes isn't set or if we are 26388c2ecf20Sopenharmony_ci * replacing the range with a new extent 26398c2ecf20Sopenharmony_ci */ 26408c2ecf20Sopenharmony_ci if (!btrfs_fs_incompat(fs_info, NO_HOLES) || extent_info) 26418c2ecf20Sopenharmony_ci rsv_count = 3; 26428c2ecf20Sopenharmony_ci else 26438c2ecf20Sopenharmony_ci rsv_count = 2; 26448c2ecf20Sopenharmony_ci 26458c2ecf20Sopenharmony_ci trans = btrfs_start_transaction(root, rsv_count); 26468c2ecf20Sopenharmony_ci if (IS_ERR(trans)) { 26478c2ecf20Sopenharmony_ci ret = PTR_ERR(trans); 26488c2ecf20Sopenharmony_ci trans = NULL; 26498c2ecf20Sopenharmony_ci goto out_free; 26508c2ecf20Sopenharmony_ci } 26518c2ecf20Sopenharmony_ci 26528c2ecf20Sopenharmony_ci ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, 26538c2ecf20Sopenharmony_ci min_size, false); 26548c2ecf20Sopenharmony_ci BUG_ON(ret); 26558c2ecf20Sopenharmony_ci trans->block_rsv = rsv; 26568c2ecf20Sopenharmony_ci 26578c2ecf20Sopenharmony_ci cur_offset = start; 26588c2ecf20Sopenharmony_ci while (cur_offset < end) { 26598c2ecf20Sopenharmony_ci ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, 26608c2ecf20Sopenharmony_ci cur_offset, end + 1, &drop_end, 26618c2ecf20Sopenharmony_ci 1, 0, 0, NULL); 26628c2ecf20Sopenharmony_ci if (ret != -ENOSPC) { 26638c2ecf20Sopenharmony_ci /* 26648c2ecf20Sopenharmony_ci * The only time we don't want to abort is if we are 26658c2ecf20Sopenharmony_ci * attempting to clone a partial inline extent, in which 26668c2ecf20Sopenharmony_ci * case we'll get EOPNOTSUPP. However if we aren't 26678c2ecf20Sopenharmony_ci * clone we need to abort no matter what, because if we 26688c2ecf20Sopenharmony_ci * got EOPNOTSUPP via prealloc then we messed up and 26698c2ecf20Sopenharmony_ci * need to abort. 26708c2ecf20Sopenharmony_ci */ 26718c2ecf20Sopenharmony_ci if (ret && 26728c2ecf20Sopenharmony_ci (ret != -EOPNOTSUPP || 26738c2ecf20Sopenharmony_ci (extent_info && extent_info->is_new_extent))) 26748c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 26758c2ecf20Sopenharmony_ci break; 26768c2ecf20Sopenharmony_ci } 26778c2ecf20Sopenharmony_ci 26788c2ecf20Sopenharmony_ci trans->block_rsv = &fs_info->trans_block_rsv; 26798c2ecf20Sopenharmony_ci 26808c2ecf20Sopenharmony_ci if (!extent_info && cur_offset < drop_end && 26818c2ecf20Sopenharmony_ci cur_offset < ino_size) { 26828c2ecf20Sopenharmony_ci ret = fill_holes(trans, BTRFS_I(inode), path, 26838c2ecf20Sopenharmony_ci cur_offset, drop_end); 26848c2ecf20Sopenharmony_ci if (ret) { 26858c2ecf20Sopenharmony_ci /* 26868c2ecf20Sopenharmony_ci * If we failed then we didn't insert our hole 26878c2ecf20Sopenharmony_ci * entries for the area we dropped, so now the 26888c2ecf20Sopenharmony_ci * fs is corrupted, so we must abort the 26898c2ecf20Sopenharmony_ci * transaction. 26908c2ecf20Sopenharmony_ci */ 26918c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 26928c2ecf20Sopenharmony_ci break; 26938c2ecf20Sopenharmony_ci } 26948c2ecf20Sopenharmony_ci } else if (!extent_info && cur_offset < drop_end) { 26958c2ecf20Sopenharmony_ci /* 26968c2ecf20Sopenharmony_ci * We are past the i_size here, but since we didn't 26978c2ecf20Sopenharmony_ci * insert holes we need to clear the mapped area so we 26988c2ecf20Sopenharmony_ci * know to not set disk_i_size in this area until a new 26998c2ecf20Sopenharmony_ci * file extent is inserted here. 27008c2ecf20Sopenharmony_ci */ 27018c2ecf20Sopenharmony_ci ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode), 27028c2ecf20Sopenharmony_ci cur_offset, drop_end - cur_offset); 27038c2ecf20Sopenharmony_ci if (ret) { 27048c2ecf20Sopenharmony_ci /* 27058c2ecf20Sopenharmony_ci * We couldn't clear our area, so we could 27068c2ecf20Sopenharmony_ci * presumably adjust up and corrupt the fs, so 27078c2ecf20Sopenharmony_ci * we need to abort. 27088c2ecf20Sopenharmony_ci */ 27098c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 27108c2ecf20Sopenharmony_ci break; 27118c2ecf20Sopenharmony_ci } 27128c2ecf20Sopenharmony_ci } 27138c2ecf20Sopenharmony_ci 27148c2ecf20Sopenharmony_ci if (extent_info && drop_end > extent_info->file_offset) { 27158c2ecf20Sopenharmony_ci u64 replace_len = drop_end - extent_info->file_offset; 27168c2ecf20Sopenharmony_ci 27178c2ecf20Sopenharmony_ci ret = btrfs_insert_replace_extent(trans, inode, path, 27188c2ecf20Sopenharmony_ci extent_info, replace_len); 27198c2ecf20Sopenharmony_ci if (ret) { 27208c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 27218c2ecf20Sopenharmony_ci break; 27228c2ecf20Sopenharmony_ci } 27238c2ecf20Sopenharmony_ci extent_info->data_len -= replace_len; 27248c2ecf20Sopenharmony_ci extent_info->data_offset += replace_len; 27258c2ecf20Sopenharmony_ci extent_info->file_offset += replace_len; 27268c2ecf20Sopenharmony_ci } 27278c2ecf20Sopenharmony_ci 27288c2ecf20Sopenharmony_ci cur_offset = drop_end; 27298c2ecf20Sopenharmony_ci 27308c2ecf20Sopenharmony_ci ret = btrfs_update_inode(trans, root, inode); 27318c2ecf20Sopenharmony_ci if (ret) 27328c2ecf20Sopenharmony_ci break; 27338c2ecf20Sopenharmony_ci 27348c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 27358c2ecf20Sopenharmony_ci btrfs_btree_balance_dirty(fs_info); 27368c2ecf20Sopenharmony_ci 27378c2ecf20Sopenharmony_ci trans = btrfs_start_transaction(root, rsv_count); 27388c2ecf20Sopenharmony_ci if (IS_ERR(trans)) { 27398c2ecf20Sopenharmony_ci ret = PTR_ERR(trans); 27408c2ecf20Sopenharmony_ci trans = NULL; 27418c2ecf20Sopenharmony_ci break; 27428c2ecf20Sopenharmony_ci } 27438c2ecf20Sopenharmony_ci 27448c2ecf20Sopenharmony_ci ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, 27458c2ecf20Sopenharmony_ci rsv, min_size, false); 27468c2ecf20Sopenharmony_ci BUG_ON(ret); /* shouldn't happen */ 27478c2ecf20Sopenharmony_ci trans->block_rsv = rsv; 27488c2ecf20Sopenharmony_ci 27498c2ecf20Sopenharmony_ci if (!extent_info) { 27508c2ecf20Sopenharmony_ci ret = find_first_non_hole(inode, &cur_offset, &len); 27518c2ecf20Sopenharmony_ci if (unlikely(ret < 0)) 27528c2ecf20Sopenharmony_ci break; 27538c2ecf20Sopenharmony_ci if (ret && !len) { 27548c2ecf20Sopenharmony_ci ret = 0; 27558c2ecf20Sopenharmony_ci break; 27568c2ecf20Sopenharmony_ci } 27578c2ecf20Sopenharmony_ci } 27588c2ecf20Sopenharmony_ci } 27598c2ecf20Sopenharmony_ci 27608c2ecf20Sopenharmony_ci /* 27618c2ecf20Sopenharmony_ci * If we were cloning, force the next fsync to be a full one since we 27628c2ecf20Sopenharmony_ci * we replaced (or just dropped in the case of cloning holes when 27638c2ecf20Sopenharmony_ci * NO_HOLES is enabled) extents and extent maps. 27648c2ecf20Sopenharmony_ci * This is for the sake of simplicity, and cloning into files larger 27658c2ecf20Sopenharmony_ci * than 16Mb would force the full fsync any way (when 27668c2ecf20Sopenharmony_ci * try_release_extent_mapping() is invoked during page cache truncation. 27678c2ecf20Sopenharmony_ci */ 27688c2ecf20Sopenharmony_ci if (extent_info && !extent_info->is_new_extent) 27698c2ecf20Sopenharmony_ci set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 27708c2ecf20Sopenharmony_ci &BTRFS_I(inode)->runtime_flags); 27718c2ecf20Sopenharmony_ci 27728c2ecf20Sopenharmony_ci if (ret) 27738c2ecf20Sopenharmony_ci goto out_trans; 27748c2ecf20Sopenharmony_ci 27758c2ecf20Sopenharmony_ci trans->block_rsv = &fs_info->trans_block_rsv; 27768c2ecf20Sopenharmony_ci /* 27778c2ecf20Sopenharmony_ci * If we are using the NO_HOLES feature we might have had already an 27788c2ecf20Sopenharmony_ci * hole that overlaps a part of the region [lockstart, lockend] and 27798c2ecf20Sopenharmony_ci * ends at (or beyond) lockend. Since we have no file extent items to 27808c2ecf20Sopenharmony_ci * represent holes, drop_end can be less than lockend and so we must 27818c2ecf20Sopenharmony_ci * make sure we have an extent map representing the existing hole (the 27828c2ecf20Sopenharmony_ci * call to __btrfs_drop_extents() might have dropped the existing extent 27838c2ecf20Sopenharmony_ci * map representing the existing hole), otherwise the fast fsync path 27848c2ecf20Sopenharmony_ci * will not record the existence of the hole region 27858c2ecf20Sopenharmony_ci * [existing_hole_start, lockend]. 27868c2ecf20Sopenharmony_ci */ 27878c2ecf20Sopenharmony_ci if (drop_end <= end) 27888c2ecf20Sopenharmony_ci drop_end = end + 1; 27898c2ecf20Sopenharmony_ci /* 27908c2ecf20Sopenharmony_ci * Don't insert file hole extent item if it's for a range beyond eof 27918c2ecf20Sopenharmony_ci * (because it's useless) or if it represents a 0 bytes range (when 27928c2ecf20Sopenharmony_ci * cur_offset == drop_end). 27938c2ecf20Sopenharmony_ci */ 27948c2ecf20Sopenharmony_ci if (!extent_info && cur_offset < ino_size && cur_offset < drop_end) { 27958c2ecf20Sopenharmony_ci ret = fill_holes(trans, BTRFS_I(inode), path, 27968c2ecf20Sopenharmony_ci cur_offset, drop_end); 27978c2ecf20Sopenharmony_ci if (ret) { 27988c2ecf20Sopenharmony_ci /* Same comment as above. */ 27998c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 28008c2ecf20Sopenharmony_ci goto out_trans; 28018c2ecf20Sopenharmony_ci } 28028c2ecf20Sopenharmony_ci } else if (!extent_info && cur_offset < drop_end) { 28038c2ecf20Sopenharmony_ci /* See the comment in the loop above for the reasoning here. */ 28048c2ecf20Sopenharmony_ci ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode), 28058c2ecf20Sopenharmony_ci cur_offset, drop_end - cur_offset); 28068c2ecf20Sopenharmony_ci if (ret) { 28078c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 28088c2ecf20Sopenharmony_ci goto out_trans; 28098c2ecf20Sopenharmony_ci } 28108c2ecf20Sopenharmony_ci 28118c2ecf20Sopenharmony_ci } 28128c2ecf20Sopenharmony_ci if (extent_info) { 28138c2ecf20Sopenharmony_ci ret = btrfs_insert_replace_extent(trans, inode, path, extent_info, 28148c2ecf20Sopenharmony_ci extent_info->data_len); 28158c2ecf20Sopenharmony_ci if (ret) { 28168c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 28178c2ecf20Sopenharmony_ci goto out_trans; 28188c2ecf20Sopenharmony_ci } 28198c2ecf20Sopenharmony_ci } 28208c2ecf20Sopenharmony_ci 28218c2ecf20Sopenharmony_ciout_trans: 28228c2ecf20Sopenharmony_ci if (!trans) 28238c2ecf20Sopenharmony_ci goto out_free; 28248c2ecf20Sopenharmony_ci 28258c2ecf20Sopenharmony_ci trans->block_rsv = &fs_info->trans_block_rsv; 28268c2ecf20Sopenharmony_ci if (ret) 28278c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 28288c2ecf20Sopenharmony_ci else 28298c2ecf20Sopenharmony_ci *trans_out = trans; 28308c2ecf20Sopenharmony_ciout_free: 28318c2ecf20Sopenharmony_ci btrfs_free_block_rsv(fs_info, rsv); 28328c2ecf20Sopenharmony_ciout: 28338c2ecf20Sopenharmony_ci return ret; 28348c2ecf20Sopenharmony_ci} 28358c2ecf20Sopenharmony_ci 28368c2ecf20Sopenharmony_cistatic int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len) 28378c2ecf20Sopenharmony_ci{ 28388c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 28398c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 28408c2ecf20Sopenharmony_ci struct btrfs_root *root = BTRFS_I(inode)->root; 28418c2ecf20Sopenharmony_ci struct extent_state *cached_state = NULL; 28428c2ecf20Sopenharmony_ci struct btrfs_path *path; 28438c2ecf20Sopenharmony_ci struct btrfs_trans_handle *trans = NULL; 28448c2ecf20Sopenharmony_ci u64 lockstart; 28458c2ecf20Sopenharmony_ci u64 lockend; 28468c2ecf20Sopenharmony_ci u64 tail_start; 28478c2ecf20Sopenharmony_ci u64 tail_len; 28488c2ecf20Sopenharmony_ci u64 orig_start = offset; 28498c2ecf20Sopenharmony_ci int ret = 0; 28508c2ecf20Sopenharmony_ci bool same_block; 28518c2ecf20Sopenharmony_ci u64 ino_size; 28528c2ecf20Sopenharmony_ci bool truncated_block = false; 28538c2ecf20Sopenharmony_ci bool updated_inode = false; 28548c2ecf20Sopenharmony_ci 28558c2ecf20Sopenharmony_ci ret = btrfs_wait_ordered_range(inode, offset, len); 28568c2ecf20Sopenharmony_ci if (ret) 28578c2ecf20Sopenharmony_ci return ret; 28588c2ecf20Sopenharmony_ci 28598c2ecf20Sopenharmony_ci inode_lock(inode); 28608c2ecf20Sopenharmony_ci ino_size = round_up(inode->i_size, fs_info->sectorsize); 28618c2ecf20Sopenharmony_ci ret = find_first_non_hole(inode, &offset, &len); 28628c2ecf20Sopenharmony_ci if (ret < 0) 28638c2ecf20Sopenharmony_ci goto out_only_mutex; 28648c2ecf20Sopenharmony_ci if (ret && !len) { 28658c2ecf20Sopenharmony_ci /* Already in a large hole */ 28668c2ecf20Sopenharmony_ci ret = 0; 28678c2ecf20Sopenharmony_ci goto out_only_mutex; 28688c2ecf20Sopenharmony_ci } 28698c2ecf20Sopenharmony_ci 28708c2ecf20Sopenharmony_ci ret = file_modified(file); 28718c2ecf20Sopenharmony_ci if (ret) 28728c2ecf20Sopenharmony_ci goto out_only_mutex; 28738c2ecf20Sopenharmony_ci 28748c2ecf20Sopenharmony_ci lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode))); 28758c2ecf20Sopenharmony_ci lockend = round_down(offset + len, 28768c2ecf20Sopenharmony_ci btrfs_inode_sectorsize(BTRFS_I(inode))) - 1; 28778c2ecf20Sopenharmony_ci same_block = (BTRFS_BYTES_TO_BLKS(fs_info, offset)) 28788c2ecf20Sopenharmony_ci == (BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)); 28798c2ecf20Sopenharmony_ci /* 28808c2ecf20Sopenharmony_ci * We needn't truncate any block which is beyond the end of the file 28818c2ecf20Sopenharmony_ci * because we are sure there is no data there. 28828c2ecf20Sopenharmony_ci */ 28838c2ecf20Sopenharmony_ci /* 28848c2ecf20Sopenharmony_ci * Only do this if we are in the same block and we aren't doing the 28858c2ecf20Sopenharmony_ci * entire block. 28868c2ecf20Sopenharmony_ci */ 28878c2ecf20Sopenharmony_ci if (same_block && len < fs_info->sectorsize) { 28888c2ecf20Sopenharmony_ci if (offset < ino_size) { 28898c2ecf20Sopenharmony_ci truncated_block = true; 28908c2ecf20Sopenharmony_ci ret = btrfs_truncate_block(inode, offset, len, 0); 28918c2ecf20Sopenharmony_ci } else { 28928c2ecf20Sopenharmony_ci ret = 0; 28938c2ecf20Sopenharmony_ci } 28948c2ecf20Sopenharmony_ci goto out_only_mutex; 28958c2ecf20Sopenharmony_ci } 28968c2ecf20Sopenharmony_ci 28978c2ecf20Sopenharmony_ci /* zero back part of the first block */ 28988c2ecf20Sopenharmony_ci if (offset < ino_size) { 28998c2ecf20Sopenharmony_ci truncated_block = true; 29008c2ecf20Sopenharmony_ci ret = btrfs_truncate_block(inode, offset, 0, 0); 29018c2ecf20Sopenharmony_ci if (ret) { 29028c2ecf20Sopenharmony_ci inode_unlock(inode); 29038c2ecf20Sopenharmony_ci return ret; 29048c2ecf20Sopenharmony_ci } 29058c2ecf20Sopenharmony_ci } 29068c2ecf20Sopenharmony_ci 29078c2ecf20Sopenharmony_ci /* Check the aligned pages after the first unaligned page, 29088c2ecf20Sopenharmony_ci * if offset != orig_start, which means the first unaligned page 29098c2ecf20Sopenharmony_ci * including several following pages are already in holes, 29108c2ecf20Sopenharmony_ci * the extra check can be skipped */ 29118c2ecf20Sopenharmony_ci if (offset == orig_start) { 29128c2ecf20Sopenharmony_ci /* after truncate page, check hole again */ 29138c2ecf20Sopenharmony_ci len = offset + len - lockstart; 29148c2ecf20Sopenharmony_ci offset = lockstart; 29158c2ecf20Sopenharmony_ci ret = find_first_non_hole(inode, &offset, &len); 29168c2ecf20Sopenharmony_ci if (ret < 0) 29178c2ecf20Sopenharmony_ci goto out_only_mutex; 29188c2ecf20Sopenharmony_ci if (ret && !len) { 29198c2ecf20Sopenharmony_ci ret = 0; 29208c2ecf20Sopenharmony_ci goto out_only_mutex; 29218c2ecf20Sopenharmony_ci } 29228c2ecf20Sopenharmony_ci lockstart = offset; 29238c2ecf20Sopenharmony_ci } 29248c2ecf20Sopenharmony_ci 29258c2ecf20Sopenharmony_ci /* Check the tail unaligned part is in a hole */ 29268c2ecf20Sopenharmony_ci tail_start = lockend + 1; 29278c2ecf20Sopenharmony_ci tail_len = offset + len - tail_start; 29288c2ecf20Sopenharmony_ci if (tail_len) { 29298c2ecf20Sopenharmony_ci ret = find_first_non_hole(inode, &tail_start, &tail_len); 29308c2ecf20Sopenharmony_ci if (unlikely(ret < 0)) 29318c2ecf20Sopenharmony_ci goto out_only_mutex; 29328c2ecf20Sopenharmony_ci if (!ret) { 29338c2ecf20Sopenharmony_ci /* zero the front end of the last page */ 29348c2ecf20Sopenharmony_ci if (tail_start + tail_len < ino_size) { 29358c2ecf20Sopenharmony_ci truncated_block = true; 29368c2ecf20Sopenharmony_ci ret = btrfs_truncate_block(inode, 29378c2ecf20Sopenharmony_ci tail_start + tail_len, 29388c2ecf20Sopenharmony_ci 0, 1); 29398c2ecf20Sopenharmony_ci if (ret) 29408c2ecf20Sopenharmony_ci goto out_only_mutex; 29418c2ecf20Sopenharmony_ci } 29428c2ecf20Sopenharmony_ci } 29438c2ecf20Sopenharmony_ci } 29448c2ecf20Sopenharmony_ci 29458c2ecf20Sopenharmony_ci if (lockend < lockstart) { 29468c2ecf20Sopenharmony_ci ret = 0; 29478c2ecf20Sopenharmony_ci goto out_only_mutex; 29488c2ecf20Sopenharmony_ci } 29498c2ecf20Sopenharmony_ci 29508c2ecf20Sopenharmony_ci ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend, 29518c2ecf20Sopenharmony_ci &cached_state); 29528c2ecf20Sopenharmony_ci if (ret) 29538c2ecf20Sopenharmony_ci goto out_only_mutex; 29548c2ecf20Sopenharmony_ci 29558c2ecf20Sopenharmony_ci path = btrfs_alloc_path(); 29568c2ecf20Sopenharmony_ci if (!path) { 29578c2ecf20Sopenharmony_ci ret = -ENOMEM; 29588c2ecf20Sopenharmony_ci goto out; 29598c2ecf20Sopenharmony_ci } 29608c2ecf20Sopenharmony_ci 29618c2ecf20Sopenharmony_ci ret = btrfs_replace_file_extents(inode, path, lockstart, lockend, NULL, 29628c2ecf20Sopenharmony_ci &trans); 29638c2ecf20Sopenharmony_ci btrfs_free_path(path); 29648c2ecf20Sopenharmony_ci if (ret) 29658c2ecf20Sopenharmony_ci goto out; 29668c2ecf20Sopenharmony_ci 29678c2ecf20Sopenharmony_ci ASSERT(trans != NULL); 29688c2ecf20Sopenharmony_ci inode_inc_iversion(inode); 29698c2ecf20Sopenharmony_ci inode->i_mtime = inode->i_ctime = current_time(inode); 29708c2ecf20Sopenharmony_ci ret = btrfs_update_inode(trans, root, inode); 29718c2ecf20Sopenharmony_ci updated_inode = true; 29728c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 29738c2ecf20Sopenharmony_ci btrfs_btree_balance_dirty(fs_info); 29748c2ecf20Sopenharmony_ciout: 29758c2ecf20Sopenharmony_ci unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 29768c2ecf20Sopenharmony_ci &cached_state); 29778c2ecf20Sopenharmony_ciout_only_mutex: 29788c2ecf20Sopenharmony_ci if (!updated_inode && truncated_block && !ret) { 29798c2ecf20Sopenharmony_ci /* 29808c2ecf20Sopenharmony_ci * If we only end up zeroing part of a page, we still need to 29818c2ecf20Sopenharmony_ci * update the inode item, so that all the time fields are 29828c2ecf20Sopenharmony_ci * updated as well as the necessary btrfs inode in memory fields 29838c2ecf20Sopenharmony_ci * for detecting, at fsync time, if the inode isn't yet in the 29848c2ecf20Sopenharmony_ci * log tree or it's there but not up to date. 29858c2ecf20Sopenharmony_ci */ 29868c2ecf20Sopenharmony_ci struct timespec64 now = current_time(inode); 29878c2ecf20Sopenharmony_ci 29888c2ecf20Sopenharmony_ci inode_inc_iversion(inode); 29898c2ecf20Sopenharmony_ci inode->i_mtime = now; 29908c2ecf20Sopenharmony_ci inode->i_ctime = now; 29918c2ecf20Sopenharmony_ci trans = btrfs_start_transaction(root, 1); 29928c2ecf20Sopenharmony_ci if (IS_ERR(trans)) { 29938c2ecf20Sopenharmony_ci ret = PTR_ERR(trans); 29948c2ecf20Sopenharmony_ci } else { 29958c2ecf20Sopenharmony_ci int ret2; 29968c2ecf20Sopenharmony_ci 29978c2ecf20Sopenharmony_ci ret = btrfs_update_inode(trans, root, inode); 29988c2ecf20Sopenharmony_ci ret2 = btrfs_end_transaction(trans); 29998c2ecf20Sopenharmony_ci if (!ret) 30008c2ecf20Sopenharmony_ci ret = ret2; 30018c2ecf20Sopenharmony_ci } 30028c2ecf20Sopenharmony_ci } 30038c2ecf20Sopenharmony_ci inode_unlock(inode); 30048c2ecf20Sopenharmony_ci return ret; 30058c2ecf20Sopenharmony_ci} 30068c2ecf20Sopenharmony_ci 30078c2ecf20Sopenharmony_ci/* Helper structure to record which range is already reserved */ 30088c2ecf20Sopenharmony_cistruct falloc_range { 30098c2ecf20Sopenharmony_ci struct list_head list; 30108c2ecf20Sopenharmony_ci u64 start; 30118c2ecf20Sopenharmony_ci u64 len; 30128c2ecf20Sopenharmony_ci}; 30138c2ecf20Sopenharmony_ci 30148c2ecf20Sopenharmony_ci/* 30158c2ecf20Sopenharmony_ci * Helper function to add falloc range 30168c2ecf20Sopenharmony_ci * 30178c2ecf20Sopenharmony_ci * Caller should have locked the larger range of extent containing 30188c2ecf20Sopenharmony_ci * [start, len) 30198c2ecf20Sopenharmony_ci */ 30208c2ecf20Sopenharmony_cistatic int add_falloc_range(struct list_head *head, u64 start, u64 len) 30218c2ecf20Sopenharmony_ci{ 30228c2ecf20Sopenharmony_ci struct falloc_range *prev = NULL; 30238c2ecf20Sopenharmony_ci struct falloc_range *range = NULL; 30248c2ecf20Sopenharmony_ci 30258c2ecf20Sopenharmony_ci if (list_empty(head)) 30268c2ecf20Sopenharmony_ci goto insert; 30278c2ecf20Sopenharmony_ci 30288c2ecf20Sopenharmony_ci /* 30298c2ecf20Sopenharmony_ci * As fallocate iterate by bytenr order, we only need to check 30308c2ecf20Sopenharmony_ci * the last range. 30318c2ecf20Sopenharmony_ci */ 30328c2ecf20Sopenharmony_ci prev = list_entry(head->prev, struct falloc_range, list); 30338c2ecf20Sopenharmony_ci if (prev->start + prev->len == start) { 30348c2ecf20Sopenharmony_ci prev->len += len; 30358c2ecf20Sopenharmony_ci return 0; 30368c2ecf20Sopenharmony_ci } 30378c2ecf20Sopenharmony_ciinsert: 30388c2ecf20Sopenharmony_ci range = kmalloc(sizeof(*range), GFP_KERNEL); 30398c2ecf20Sopenharmony_ci if (!range) 30408c2ecf20Sopenharmony_ci return -ENOMEM; 30418c2ecf20Sopenharmony_ci range->start = start; 30428c2ecf20Sopenharmony_ci range->len = len; 30438c2ecf20Sopenharmony_ci list_add_tail(&range->list, head); 30448c2ecf20Sopenharmony_ci return 0; 30458c2ecf20Sopenharmony_ci} 30468c2ecf20Sopenharmony_ci 30478c2ecf20Sopenharmony_cistatic int btrfs_fallocate_update_isize(struct inode *inode, 30488c2ecf20Sopenharmony_ci const u64 end, 30498c2ecf20Sopenharmony_ci const int mode) 30508c2ecf20Sopenharmony_ci{ 30518c2ecf20Sopenharmony_ci struct btrfs_trans_handle *trans; 30528c2ecf20Sopenharmony_ci struct btrfs_root *root = BTRFS_I(inode)->root; 30538c2ecf20Sopenharmony_ci int ret; 30548c2ecf20Sopenharmony_ci int ret2; 30558c2ecf20Sopenharmony_ci 30568c2ecf20Sopenharmony_ci if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode)) 30578c2ecf20Sopenharmony_ci return 0; 30588c2ecf20Sopenharmony_ci 30598c2ecf20Sopenharmony_ci trans = btrfs_start_transaction(root, 1); 30608c2ecf20Sopenharmony_ci if (IS_ERR(trans)) 30618c2ecf20Sopenharmony_ci return PTR_ERR(trans); 30628c2ecf20Sopenharmony_ci 30638c2ecf20Sopenharmony_ci inode->i_ctime = current_time(inode); 30648c2ecf20Sopenharmony_ci i_size_write(inode, end); 30658c2ecf20Sopenharmony_ci btrfs_inode_safe_disk_i_size_write(inode, 0); 30668c2ecf20Sopenharmony_ci ret = btrfs_update_inode(trans, root, inode); 30678c2ecf20Sopenharmony_ci ret2 = btrfs_end_transaction(trans); 30688c2ecf20Sopenharmony_ci 30698c2ecf20Sopenharmony_ci return ret ? ret : ret2; 30708c2ecf20Sopenharmony_ci} 30718c2ecf20Sopenharmony_ci 30728c2ecf20Sopenharmony_cienum { 30738c2ecf20Sopenharmony_ci RANGE_BOUNDARY_WRITTEN_EXTENT, 30748c2ecf20Sopenharmony_ci RANGE_BOUNDARY_PREALLOC_EXTENT, 30758c2ecf20Sopenharmony_ci RANGE_BOUNDARY_HOLE, 30768c2ecf20Sopenharmony_ci}; 30778c2ecf20Sopenharmony_ci 30788c2ecf20Sopenharmony_cistatic int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode, 30798c2ecf20Sopenharmony_ci u64 offset) 30808c2ecf20Sopenharmony_ci{ 30818c2ecf20Sopenharmony_ci const u64 sectorsize = btrfs_inode_sectorsize(inode); 30828c2ecf20Sopenharmony_ci struct extent_map *em; 30838c2ecf20Sopenharmony_ci int ret; 30848c2ecf20Sopenharmony_ci 30858c2ecf20Sopenharmony_ci offset = round_down(offset, sectorsize); 30868c2ecf20Sopenharmony_ci em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize); 30878c2ecf20Sopenharmony_ci if (IS_ERR(em)) 30888c2ecf20Sopenharmony_ci return PTR_ERR(em); 30898c2ecf20Sopenharmony_ci 30908c2ecf20Sopenharmony_ci if (em->block_start == EXTENT_MAP_HOLE) 30918c2ecf20Sopenharmony_ci ret = RANGE_BOUNDARY_HOLE; 30928c2ecf20Sopenharmony_ci else if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 30938c2ecf20Sopenharmony_ci ret = RANGE_BOUNDARY_PREALLOC_EXTENT; 30948c2ecf20Sopenharmony_ci else 30958c2ecf20Sopenharmony_ci ret = RANGE_BOUNDARY_WRITTEN_EXTENT; 30968c2ecf20Sopenharmony_ci 30978c2ecf20Sopenharmony_ci free_extent_map(em); 30988c2ecf20Sopenharmony_ci return ret; 30998c2ecf20Sopenharmony_ci} 31008c2ecf20Sopenharmony_ci 31018c2ecf20Sopenharmony_cistatic int btrfs_zero_range(struct inode *inode, 31028c2ecf20Sopenharmony_ci loff_t offset, 31038c2ecf20Sopenharmony_ci loff_t len, 31048c2ecf20Sopenharmony_ci const int mode) 31058c2ecf20Sopenharmony_ci{ 31068c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; 31078c2ecf20Sopenharmony_ci struct extent_map *em; 31088c2ecf20Sopenharmony_ci struct extent_changeset *data_reserved = NULL; 31098c2ecf20Sopenharmony_ci int ret; 31108c2ecf20Sopenharmony_ci u64 alloc_hint = 0; 31118c2ecf20Sopenharmony_ci const u64 sectorsize = btrfs_inode_sectorsize(BTRFS_I(inode)); 31128c2ecf20Sopenharmony_ci u64 alloc_start = round_down(offset, sectorsize); 31138c2ecf20Sopenharmony_ci u64 alloc_end = round_up(offset + len, sectorsize); 31148c2ecf20Sopenharmony_ci u64 bytes_to_reserve = 0; 31158c2ecf20Sopenharmony_ci bool space_reserved = false; 31168c2ecf20Sopenharmony_ci 31178c2ecf20Sopenharmony_ci inode_dio_wait(inode); 31188c2ecf20Sopenharmony_ci 31198c2ecf20Sopenharmony_ci em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start, 31208c2ecf20Sopenharmony_ci alloc_end - alloc_start); 31218c2ecf20Sopenharmony_ci if (IS_ERR(em)) { 31228c2ecf20Sopenharmony_ci ret = PTR_ERR(em); 31238c2ecf20Sopenharmony_ci goto out; 31248c2ecf20Sopenharmony_ci } 31258c2ecf20Sopenharmony_ci 31268c2ecf20Sopenharmony_ci /* 31278c2ecf20Sopenharmony_ci * Avoid hole punching and extent allocation for some cases. More cases 31288c2ecf20Sopenharmony_ci * could be considered, but these are unlikely common and we keep things 31298c2ecf20Sopenharmony_ci * as simple as possible for now. Also, intentionally, if the target 31308c2ecf20Sopenharmony_ci * range contains one or more prealloc extents together with regular 31318c2ecf20Sopenharmony_ci * extents and holes, we drop all the existing extents and allocate a 31328c2ecf20Sopenharmony_ci * new prealloc extent, so that we get a larger contiguous disk extent. 31338c2ecf20Sopenharmony_ci */ 31348c2ecf20Sopenharmony_ci if (em->start <= alloc_start && 31358c2ecf20Sopenharmony_ci test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 31368c2ecf20Sopenharmony_ci const u64 em_end = em->start + em->len; 31378c2ecf20Sopenharmony_ci 31388c2ecf20Sopenharmony_ci if (em_end >= offset + len) { 31398c2ecf20Sopenharmony_ci /* 31408c2ecf20Sopenharmony_ci * The whole range is already a prealloc extent, 31418c2ecf20Sopenharmony_ci * do nothing except updating the inode's i_size if 31428c2ecf20Sopenharmony_ci * needed. 31438c2ecf20Sopenharmony_ci */ 31448c2ecf20Sopenharmony_ci free_extent_map(em); 31458c2ecf20Sopenharmony_ci ret = btrfs_fallocate_update_isize(inode, offset + len, 31468c2ecf20Sopenharmony_ci mode); 31478c2ecf20Sopenharmony_ci goto out; 31488c2ecf20Sopenharmony_ci } 31498c2ecf20Sopenharmony_ci /* 31508c2ecf20Sopenharmony_ci * Part of the range is already a prealloc extent, so operate 31518c2ecf20Sopenharmony_ci * only on the remaining part of the range. 31528c2ecf20Sopenharmony_ci */ 31538c2ecf20Sopenharmony_ci alloc_start = em_end; 31548c2ecf20Sopenharmony_ci ASSERT(IS_ALIGNED(alloc_start, sectorsize)); 31558c2ecf20Sopenharmony_ci len = offset + len - alloc_start; 31568c2ecf20Sopenharmony_ci offset = alloc_start; 31578c2ecf20Sopenharmony_ci alloc_hint = em->block_start + em->len; 31588c2ecf20Sopenharmony_ci } 31598c2ecf20Sopenharmony_ci free_extent_map(em); 31608c2ecf20Sopenharmony_ci 31618c2ecf20Sopenharmony_ci if (BTRFS_BYTES_TO_BLKS(fs_info, offset) == 31628c2ecf20Sopenharmony_ci BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) { 31638c2ecf20Sopenharmony_ci em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start, 31648c2ecf20Sopenharmony_ci sectorsize); 31658c2ecf20Sopenharmony_ci if (IS_ERR(em)) { 31668c2ecf20Sopenharmony_ci ret = PTR_ERR(em); 31678c2ecf20Sopenharmony_ci goto out; 31688c2ecf20Sopenharmony_ci } 31698c2ecf20Sopenharmony_ci 31708c2ecf20Sopenharmony_ci if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 31718c2ecf20Sopenharmony_ci free_extent_map(em); 31728c2ecf20Sopenharmony_ci ret = btrfs_fallocate_update_isize(inode, offset + len, 31738c2ecf20Sopenharmony_ci mode); 31748c2ecf20Sopenharmony_ci goto out; 31758c2ecf20Sopenharmony_ci } 31768c2ecf20Sopenharmony_ci if (len < sectorsize && em->block_start != EXTENT_MAP_HOLE) { 31778c2ecf20Sopenharmony_ci free_extent_map(em); 31788c2ecf20Sopenharmony_ci ret = btrfs_truncate_block(inode, offset, len, 0); 31798c2ecf20Sopenharmony_ci if (!ret) 31808c2ecf20Sopenharmony_ci ret = btrfs_fallocate_update_isize(inode, 31818c2ecf20Sopenharmony_ci offset + len, 31828c2ecf20Sopenharmony_ci mode); 31838c2ecf20Sopenharmony_ci return ret; 31848c2ecf20Sopenharmony_ci } 31858c2ecf20Sopenharmony_ci free_extent_map(em); 31868c2ecf20Sopenharmony_ci alloc_start = round_down(offset, sectorsize); 31878c2ecf20Sopenharmony_ci alloc_end = alloc_start + sectorsize; 31888c2ecf20Sopenharmony_ci goto reserve_space; 31898c2ecf20Sopenharmony_ci } 31908c2ecf20Sopenharmony_ci 31918c2ecf20Sopenharmony_ci alloc_start = round_up(offset, sectorsize); 31928c2ecf20Sopenharmony_ci alloc_end = round_down(offset + len, sectorsize); 31938c2ecf20Sopenharmony_ci 31948c2ecf20Sopenharmony_ci /* 31958c2ecf20Sopenharmony_ci * For unaligned ranges, check the pages at the boundaries, they might 31968c2ecf20Sopenharmony_ci * map to an extent, in which case we need to partially zero them, or 31978c2ecf20Sopenharmony_ci * they might map to a hole, in which case we need our allocation range 31988c2ecf20Sopenharmony_ci * to cover them. 31998c2ecf20Sopenharmony_ci */ 32008c2ecf20Sopenharmony_ci if (!IS_ALIGNED(offset, sectorsize)) { 32018c2ecf20Sopenharmony_ci ret = btrfs_zero_range_check_range_boundary(BTRFS_I(inode), 32028c2ecf20Sopenharmony_ci offset); 32038c2ecf20Sopenharmony_ci if (ret < 0) 32048c2ecf20Sopenharmony_ci goto out; 32058c2ecf20Sopenharmony_ci if (ret == RANGE_BOUNDARY_HOLE) { 32068c2ecf20Sopenharmony_ci alloc_start = round_down(offset, sectorsize); 32078c2ecf20Sopenharmony_ci ret = 0; 32088c2ecf20Sopenharmony_ci } else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) { 32098c2ecf20Sopenharmony_ci ret = btrfs_truncate_block(inode, offset, 0, 0); 32108c2ecf20Sopenharmony_ci if (ret) 32118c2ecf20Sopenharmony_ci goto out; 32128c2ecf20Sopenharmony_ci } else { 32138c2ecf20Sopenharmony_ci ret = 0; 32148c2ecf20Sopenharmony_ci } 32158c2ecf20Sopenharmony_ci } 32168c2ecf20Sopenharmony_ci 32178c2ecf20Sopenharmony_ci if (!IS_ALIGNED(offset + len, sectorsize)) { 32188c2ecf20Sopenharmony_ci ret = btrfs_zero_range_check_range_boundary(BTRFS_I(inode), 32198c2ecf20Sopenharmony_ci offset + len); 32208c2ecf20Sopenharmony_ci if (ret < 0) 32218c2ecf20Sopenharmony_ci goto out; 32228c2ecf20Sopenharmony_ci if (ret == RANGE_BOUNDARY_HOLE) { 32238c2ecf20Sopenharmony_ci alloc_end = round_up(offset + len, sectorsize); 32248c2ecf20Sopenharmony_ci ret = 0; 32258c2ecf20Sopenharmony_ci } else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) { 32268c2ecf20Sopenharmony_ci ret = btrfs_truncate_block(inode, offset + len, 0, 1); 32278c2ecf20Sopenharmony_ci if (ret) 32288c2ecf20Sopenharmony_ci goto out; 32298c2ecf20Sopenharmony_ci } else { 32308c2ecf20Sopenharmony_ci ret = 0; 32318c2ecf20Sopenharmony_ci } 32328c2ecf20Sopenharmony_ci } 32338c2ecf20Sopenharmony_ci 32348c2ecf20Sopenharmony_cireserve_space: 32358c2ecf20Sopenharmony_ci if (alloc_start < alloc_end) { 32368c2ecf20Sopenharmony_ci struct extent_state *cached_state = NULL; 32378c2ecf20Sopenharmony_ci const u64 lockstart = alloc_start; 32388c2ecf20Sopenharmony_ci const u64 lockend = alloc_end - 1; 32398c2ecf20Sopenharmony_ci 32408c2ecf20Sopenharmony_ci bytes_to_reserve = alloc_end - alloc_start; 32418c2ecf20Sopenharmony_ci ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), 32428c2ecf20Sopenharmony_ci bytes_to_reserve); 32438c2ecf20Sopenharmony_ci if (ret < 0) 32448c2ecf20Sopenharmony_ci goto out; 32458c2ecf20Sopenharmony_ci space_reserved = true; 32468c2ecf20Sopenharmony_ci ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend, 32478c2ecf20Sopenharmony_ci &cached_state); 32488c2ecf20Sopenharmony_ci if (ret) 32498c2ecf20Sopenharmony_ci goto out; 32508c2ecf20Sopenharmony_ci ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, 32518c2ecf20Sopenharmony_ci alloc_start, bytes_to_reserve); 32528c2ecf20Sopenharmony_ci if (ret) { 32538c2ecf20Sopenharmony_ci unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, 32548c2ecf20Sopenharmony_ci lockend, &cached_state); 32558c2ecf20Sopenharmony_ci goto out; 32568c2ecf20Sopenharmony_ci } 32578c2ecf20Sopenharmony_ci ret = btrfs_prealloc_file_range(inode, mode, alloc_start, 32588c2ecf20Sopenharmony_ci alloc_end - alloc_start, 32598c2ecf20Sopenharmony_ci i_blocksize(inode), 32608c2ecf20Sopenharmony_ci offset + len, &alloc_hint); 32618c2ecf20Sopenharmony_ci unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, 32628c2ecf20Sopenharmony_ci lockend, &cached_state); 32638c2ecf20Sopenharmony_ci /* btrfs_prealloc_file_range releases reserved space on error */ 32648c2ecf20Sopenharmony_ci if (ret) { 32658c2ecf20Sopenharmony_ci space_reserved = false; 32668c2ecf20Sopenharmony_ci goto out; 32678c2ecf20Sopenharmony_ci } 32688c2ecf20Sopenharmony_ci } 32698c2ecf20Sopenharmony_ci ret = btrfs_fallocate_update_isize(inode, offset + len, mode); 32708c2ecf20Sopenharmony_ci out: 32718c2ecf20Sopenharmony_ci if (ret && space_reserved) 32728c2ecf20Sopenharmony_ci btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, 32738c2ecf20Sopenharmony_ci alloc_start, bytes_to_reserve); 32748c2ecf20Sopenharmony_ci extent_changeset_free(data_reserved); 32758c2ecf20Sopenharmony_ci 32768c2ecf20Sopenharmony_ci return ret; 32778c2ecf20Sopenharmony_ci} 32788c2ecf20Sopenharmony_ci 32798c2ecf20Sopenharmony_cistatic long btrfs_fallocate(struct file *file, int mode, 32808c2ecf20Sopenharmony_ci loff_t offset, loff_t len) 32818c2ecf20Sopenharmony_ci{ 32828c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 32838c2ecf20Sopenharmony_ci struct extent_state *cached_state = NULL; 32848c2ecf20Sopenharmony_ci struct extent_changeset *data_reserved = NULL; 32858c2ecf20Sopenharmony_ci struct falloc_range *range; 32868c2ecf20Sopenharmony_ci struct falloc_range *tmp; 32878c2ecf20Sopenharmony_ci struct list_head reserve_list; 32888c2ecf20Sopenharmony_ci u64 cur_offset; 32898c2ecf20Sopenharmony_ci u64 last_byte; 32908c2ecf20Sopenharmony_ci u64 alloc_start; 32918c2ecf20Sopenharmony_ci u64 alloc_end; 32928c2ecf20Sopenharmony_ci u64 alloc_hint = 0; 32938c2ecf20Sopenharmony_ci u64 locked_end; 32948c2ecf20Sopenharmony_ci u64 actual_end = 0; 32958c2ecf20Sopenharmony_ci struct extent_map *em; 32968c2ecf20Sopenharmony_ci int blocksize = btrfs_inode_sectorsize(BTRFS_I(inode)); 32978c2ecf20Sopenharmony_ci int ret; 32988c2ecf20Sopenharmony_ci 32998c2ecf20Sopenharmony_ci alloc_start = round_down(offset, blocksize); 33008c2ecf20Sopenharmony_ci alloc_end = round_up(offset + len, blocksize); 33018c2ecf20Sopenharmony_ci cur_offset = alloc_start; 33028c2ecf20Sopenharmony_ci 33038c2ecf20Sopenharmony_ci /* Make sure we aren't being give some crap mode */ 33048c2ecf20Sopenharmony_ci if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 33058c2ecf20Sopenharmony_ci FALLOC_FL_ZERO_RANGE)) 33068c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 33078c2ecf20Sopenharmony_ci 33088c2ecf20Sopenharmony_ci if (mode & FALLOC_FL_PUNCH_HOLE) 33098c2ecf20Sopenharmony_ci return btrfs_punch_hole(file, offset, len); 33108c2ecf20Sopenharmony_ci 33118c2ecf20Sopenharmony_ci /* 33128c2ecf20Sopenharmony_ci * Only trigger disk allocation, don't trigger qgroup reserve 33138c2ecf20Sopenharmony_ci * 33148c2ecf20Sopenharmony_ci * For qgroup space, it will be checked later. 33158c2ecf20Sopenharmony_ci */ 33168c2ecf20Sopenharmony_ci if (!(mode & FALLOC_FL_ZERO_RANGE)) { 33178c2ecf20Sopenharmony_ci ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), 33188c2ecf20Sopenharmony_ci alloc_end - alloc_start); 33198c2ecf20Sopenharmony_ci if (ret < 0) 33208c2ecf20Sopenharmony_ci return ret; 33218c2ecf20Sopenharmony_ci } 33228c2ecf20Sopenharmony_ci 33238c2ecf20Sopenharmony_ci inode_lock(inode); 33248c2ecf20Sopenharmony_ci 33258c2ecf20Sopenharmony_ci if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) { 33268c2ecf20Sopenharmony_ci ret = inode_newsize_ok(inode, offset + len); 33278c2ecf20Sopenharmony_ci if (ret) 33288c2ecf20Sopenharmony_ci goto out; 33298c2ecf20Sopenharmony_ci } 33308c2ecf20Sopenharmony_ci 33318c2ecf20Sopenharmony_ci ret = file_modified(file); 33328c2ecf20Sopenharmony_ci if (ret) 33338c2ecf20Sopenharmony_ci goto out; 33348c2ecf20Sopenharmony_ci 33358c2ecf20Sopenharmony_ci /* 33368c2ecf20Sopenharmony_ci * TODO: Move these two operations after we have checked 33378c2ecf20Sopenharmony_ci * accurate reserved space, or fallocate can still fail but 33388c2ecf20Sopenharmony_ci * with page truncated or size expanded. 33398c2ecf20Sopenharmony_ci * 33408c2ecf20Sopenharmony_ci * But that's a minor problem and won't do much harm BTW. 33418c2ecf20Sopenharmony_ci */ 33428c2ecf20Sopenharmony_ci if (alloc_start > inode->i_size) { 33438c2ecf20Sopenharmony_ci ret = btrfs_cont_expand(inode, i_size_read(inode), 33448c2ecf20Sopenharmony_ci alloc_start); 33458c2ecf20Sopenharmony_ci if (ret) 33468c2ecf20Sopenharmony_ci goto out; 33478c2ecf20Sopenharmony_ci } else if (offset + len > inode->i_size) { 33488c2ecf20Sopenharmony_ci /* 33498c2ecf20Sopenharmony_ci * If we are fallocating from the end of the file onward we 33508c2ecf20Sopenharmony_ci * need to zero out the end of the block if i_size lands in the 33518c2ecf20Sopenharmony_ci * middle of a block. 33528c2ecf20Sopenharmony_ci */ 33538c2ecf20Sopenharmony_ci ret = btrfs_truncate_block(inode, inode->i_size, 0, 0); 33548c2ecf20Sopenharmony_ci if (ret) 33558c2ecf20Sopenharmony_ci goto out; 33568c2ecf20Sopenharmony_ci } 33578c2ecf20Sopenharmony_ci 33588c2ecf20Sopenharmony_ci /* 33598c2ecf20Sopenharmony_ci * wait for ordered IO before we have any locks. We'll loop again 33608c2ecf20Sopenharmony_ci * below with the locks held. 33618c2ecf20Sopenharmony_ci */ 33628c2ecf20Sopenharmony_ci ret = btrfs_wait_ordered_range(inode, alloc_start, 33638c2ecf20Sopenharmony_ci alloc_end - alloc_start); 33648c2ecf20Sopenharmony_ci if (ret) 33658c2ecf20Sopenharmony_ci goto out; 33668c2ecf20Sopenharmony_ci 33678c2ecf20Sopenharmony_ci if (mode & FALLOC_FL_ZERO_RANGE) { 33688c2ecf20Sopenharmony_ci ret = btrfs_zero_range(inode, offset, len, mode); 33698c2ecf20Sopenharmony_ci inode_unlock(inode); 33708c2ecf20Sopenharmony_ci return ret; 33718c2ecf20Sopenharmony_ci } 33728c2ecf20Sopenharmony_ci 33738c2ecf20Sopenharmony_ci locked_end = alloc_end - 1; 33748c2ecf20Sopenharmony_ci while (1) { 33758c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *ordered; 33768c2ecf20Sopenharmony_ci 33778c2ecf20Sopenharmony_ci /* the extent lock is ordered inside the running 33788c2ecf20Sopenharmony_ci * transaction 33798c2ecf20Sopenharmony_ci */ 33808c2ecf20Sopenharmony_ci lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, 33818c2ecf20Sopenharmony_ci locked_end, &cached_state); 33828c2ecf20Sopenharmony_ci ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), 33838c2ecf20Sopenharmony_ci locked_end); 33848c2ecf20Sopenharmony_ci 33858c2ecf20Sopenharmony_ci if (ordered && 33868c2ecf20Sopenharmony_ci ordered->file_offset + ordered->num_bytes > alloc_start && 33878c2ecf20Sopenharmony_ci ordered->file_offset < alloc_end) { 33888c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 33898c2ecf20Sopenharmony_ci unlock_extent_cached(&BTRFS_I(inode)->io_tree, 33908c2ecf20Sopenharmony_ci alloc_start, locked_end, 33918c2ecf20Sopenharmony_ci &cached_state); 33928c2ecf20Sopenharmony_ci /* 33938c2ecf20Sopenharmony_ci * we can't wait on the range with the transaction 33948c2ecf20Sopenharmony_ci * running or with the extent lock held 33958c2ecf20Sopenharmony_ci */ 33968c2ecf20Sopenharmony_ci ret = btrfs_wait_ordered_range(inode, alloc_start, 33978c2ecf20Sopenharmony_ci alloc_end - alloc_start); 33988c2ecf20Sopenharmony_ci if (ret) 33998c2ecf20Sopenharmony_ci goto out; 34008c2ecf20Sopenharmony_ci } else { 34018c2ecf20Sopenharmony_ci if (ordered) 34028c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 34038c2ecf20Sopenharmony_ci break; 34048c2ecf20Sopenharmony_ci } 34058c2ecf20Sopenharmony_ci } 34068c2ecf20Sopenharmony_ci 34078c2ecf20Sopenharmony_ci /* First, check if we exceed the qgroup limit */ 34088c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&reserve_list); 34098c2ecf20Sopenharmony_ci while (cur_offset < alloc_end) { 34108c2ecf20Sopenharmony_ci em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, 34118c2ecf20Sopenharmony_ci alloc_end - cur_offset); 34128c2ecf20Sopenharmony_ci if (IS_ERR(em)) { 34138c2ecf20Sopenharmony_ci ret = PTR_ERR(em); 34148c2ecf20Sopenharmony_ci break; 34158c2ecf20Sopenharmony_ci } 34168c2ecf20Sopenharmony_ci last_byte = min(extent_map_end(em), alloc_end); 34178c2ecf20Sopenharmony_ci actual_end = min_t(u64, extent_map_end(em), offset + len); 34188c2ecf20Sopenharmony_ci last_byte = ALIGN(last_byte, blocksize); 34198c2ecf20Sopenharmony_ci if (em->block_start == EXTENT_MAP_HOLE || 34208c2ecf20Sopenharmony_ci (cur_offset >= inode->i_size && 34218c2ecf20Sopenharmony_ci !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 34228c2ecf20Sopenharmony_ci ret = add_falloc_range(&reserve_list, cur_offset, 34238c2ecf20Sopenharmony_ci last_byte - cur_offset); 34248c2ecf20Sopenharmony_ci if (ret < 0) { 34258c2ecf20Sopenharmony_ci free_extent_map(em); 34268c2ecf20Sopenharmony_ci break; 34278c2ecf20Sopenharmony_ci } 34288c2ecf20Sopenharmony_ci ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), 34298c2ecf20Sopenharmony_ci &data_reserved, cur_offset, 34308c2ecf20Sopenharmony_ci last_byte - cur_offset); 34318c2ecf20Sopenharmony_ci if (ret < 0) { 34328c2ecf20Sopenharmony_ci cur_offset = last_byte; 34338c2ecf20Sopenharmony_ci free_extent_map(em); 34348c2ecf20Sopenharmony_ci break; 34358c2ecf20Sopenharmony_ci } 34368c2ecf20Sopenharmony_ci } else { 34378c2ecf20Sopenharmony_ci /* 34388c2ecf20Sopenharmony_ci * Do not need to reserve unwritten extent for this 34398c2ecf20Sopenharmony_ci * range, free reserved data space first, otherwise 34408c2ecf20Sopenharmony_ci * it'll result in false ENOSPC error. 34418c2ecf20Sopenharmony_ci */ 34428c2ecf20Sopenharmony_ci btrfs_free_reserved_data_space(BTRFS_I(inode), 34438c2ecf20Sopenharmony_ci data_reserved, cur_offset, 34448c2ecf20Sopenharmony_ci last_byte - cur_offset); 34458c2ecf20Sopenharmony_ci } 34468c2ecf20Sopenharmony_ci free_extent_map(em); 34478c2ecf20Sopenharmony_ci cur_offset = last_byte; 34488c2ecf20Sopenharmony_ci } 34498c2ecf20Sopenharmony_ci 34508c2ecf20Sopenharmony_ci /* 34518c2ecf20Sopenharmony_ci * If ret is still 0, means we're OK to fallocate. 34528c2ecf20Sopenharmony_ci * Or just cleanup the list and exit. 34538c2ecf20Sopenharmony_ci */ 34548c2ecf20Sopenharmony_ci list_for_each_entry_safe(range, tmp, &reserve_list, list) { 34558c2ecf20Sopenharmony_ci if (!ret) 34568c2ecf20Sopenharmony_ci ret = btrfs_prealloc_file_range(inode, mode, 34578c2ecf20Sopenharmony_ci range->start, 34588c2ecf20Sopenharmony_ci range->len, i_blocksize(inode), 34598c2ecf20Sopenharmony_ci offset + len, &alloc_hint); 34608c2ecf20Sopenharmony_ci else 34618c2ecf20Sopenharmony_ci btrfs_free_reserved_data_space(BTRFS_I(inode), 34628c2ecf20Sopenharmony_ci data_reserved, range->start, 34638c2ecf20Sopenharmony_ci range->len); 34648c2ecf20Sopenharmony_ci list_del(&range->list); 34658c2ecf20Sopenharmony_ci kfree(range); 34668c2ecf20Sopenharmony_ci } 34678c2ecf20Sopenharmony_ci if (ret < 0) 34688c2ecf20Sopenharmony_ci goto out_unlock; 34698c2ecf20Sopenharmony_ci 34708c2ecf20Sopenharmony_ci /* 34718c2ecf20Sopenharmony_ci * We didn't need to allocate any more space, but we still extended the 34728c2ecf20Sopenharmony_ci * size of the file so we need to update i_size and the inode item. 34738c2ecf20Sopenharmony_ci */ 34748c2ecf20Sopenharmony_ci ret = btrfs_fallocate_update_isize(inode, actual_end, mode); 34758c2ecf20Sopenharmony_ciout_unlock: 34768c2ecf20Sopenharmony_ci unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 34778c2ecf20Sopenharmony_ci &cached_state); 34788c2ecf20Sopenharmony_ciout: 34798c2ecf20Sopenharmony_ci inode_unlock(inode); 34808c2ecf20Sopenharmony_ci /* Let go of our reservation. */ 34818c2ecf20Sopenharmony_ci if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE)) 34828c2ecf20Sopenharmony_ci btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, 34838c2ecf20Sopenharmony_ci cur_offset, alloc_end - cur_offset); 34848c2ecf20Sopenharmony_ci extent_changeset_free(data_reserved); 34858c2ecf20Sopenharmony_ci return ret; 34868c2ecf20Sopenharmony_ci} 34878c2ecf20Sopenharmony_ci 34888c2ecf20Sopenharmony_cistatic loff_t find_desired_extent(struct inode *inode, loff_t offset, 34898c2ecf20Sopenharmony_ci int whence) 34908c2ecf20Sopenharmony_ci{ 34918c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 34928c2ecf20Sopenharmony_ci struct extent_map *em = NULL; 34938c2ecf20Sopenharmony_ci struct extent_state *cached_state = NULL; 34948c2ecf20Sopenharmony_ci loff_t i_size = inode->i_size; 34958c2ecf20Sopenharmony_ci u64 lockstart; 34968c2ecf20Sopenharmony_ci u64 lockend; 34978c2ecf20Sopenharmony_ci u64 start; 34988c2ecf20Sopenharmony_ci u64 len; 34998c2ecf20Sopenharmony_ci int ret = 0; 35008c2ecf20Sopenharmony_ci 35018c2ecf20Sopenharmony_ci if (i_size == 0 || offset >= i_size) 35028c2ecf20Sopenharmony_ci return -ENXIO; 35038c2ecf20Sopenharmony_ci 35048c2ecf20Sopenharmony_ci /* 35058c2ecf20Sopenharmony_ci * offset can be negative, in this case we start finding DATA/HOLE from 35068c2ecf20Sopenharmony_ci * the very start of the file. 35078c2ecf20Sopenharmony_ci */ 35088c2ecf20Sopenharmony_ci start = max_t(loff_t, 0, offset); 35098c2ecf20Sopenharmony_ci 35108c2ecf20Sopenharmony_ci lockstart = round_down(start, fs_info->sectorsize); 35118c2ecf20Sopenharmony_ci lockend = round_up(i_size, fs_info->sectorsize); 35128c2ecf20Sopenharmony_ci if (lockend <= lockstart) 35138c2ecf20Sopenharmony_ci lockend = lockstart + fs_info->sectorsize; 35148c2ecf20Sopenharmony_ci lockend--; 35158c2ecf20Sopenharmony_ci len = lockend - lockstart + 1; 35168c2ecf20Sopenharmony_ci 35178c2ecf20Sopenharmony_ci lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 35188c2ecf20Sopenharmony_ci &cached_state); 35198c2ecf20Sopenharmony_ci 35208c2ecf20Sopenharmony_ci while (start < i_size) { 35218c2ecf20Sopenharmony_ci em = btrfs_get_extent_fiemap(BTRFS_I(inode), start, len); 35228c2ecf20Sopenharmony_ci if (IS_ERR(em)) { 35238c2ecf20Sopenharmony_ci ret = PTR_ERR(em); 35248c2ecf20Sopenharmony_ci em = NULL; 35258c2ecf20Sopenharmony_ci break; 35268c2ecf20Sopenharmony_ci } 35278c2ecf20Sopenharmony_ci 35288c2ecf20Sopenharmony_ci if (whence == SEEK_HOLE && 35298c2ecf20Sopenharmony_ci (em->block_start == EXTENT_MAP_HOLE || 35308c2ecf20Sopenharmony_ci test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) 35318c2ecf20Sopenharmony_ci break; 35328c2ecf20Sopenharmony_ci else if (whence == SEEK_DATA && 35338c2ecf20Sopenharmony_ci (em->block_start != EXTENT_MAP_HOLE && 35348c2ecf20Sopenharmony_ci !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) 35358c2ecf20Sopenharmony_ci break; 35368c2ecf20Sopenharmony_ci 35378c2ecf20Sopenharmony_ci start = em->start + em->len; 35388c2ecf20Sopenharmony_ci free_extent_map(em); 35398c2ecf20Sopenharmony_ci em = NULL; 35408c2ecf20Sopenharmony_ci cond_resched(); 35418c2ecf20Sopenharmony_ci } 35428c2ecf20Sopenharmony_ci free_extent_map(em); 35438c2ecf20Sopenharmony_ci unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 35448c2ecf20Sopenharmony_ci &cached_state); 35458c2ecf20Sopenharmony_ci if (ret) { 35468c2ecf20Sopenharmony_ci offset = ret; 35478c2ecf20Sopenharmony_ci } else { 35488c2ecf20Sopenharmony_ci if (whence == SEEK_DATA && start >= i_size) 35498c2ecf20Sopenharmony_ci offset = -ENXIO; 35508c2ecf20Sopenharmony_ci else 35518c2ecf20Sopenharmony_ci offset = min_t(loff_t, start, i_size); 35528c2ecf20Sopenharmony_ci } 35538c2ecf20Sopenharmony_ci 35548c2ecf20Sopenharmony_ci return offset; 35558c2ecf20Sopenharmony_ci} 35568c2ecf20Sopenharmony_ci 35578c2ecf20Sopenharmony_cistatic loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) 35588c2ecf20Sopenharmony_ci{ 35598c2ecf20Sopenharmony_ci struct inode *inode = file->f_mapping->host; 35608c2ecf20Sopenharmony_ci 35618c2ecf20Sopenharmony_ci switch (whence) { 35628c2ecf20Sopenharmony_ci default: 35638c2ecf20Sopenharmony_ci return generic_file_llseek(file, offset, whence); 35648c2ecf20Sopenharmony_ci case SEEK_DATA: 35658c2ecf20Sopenharmony_ci case SEEK_HOLE: 35668c2ecf20Sopenharmony_ci inode_lock_shared(inode); 35678c2ecf20Sopenharmony_ci offset = find_desired_extent(inode, offset, whence); 35688c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 35698c2ecf20Sopenharmony_ci break; 35708c2ecf20Sopenharmony_ci } 35718c2ecf20Sopenharmony_ci 35728c2ecf20Sopenharmony_ci if (offset < 0) 35738c2ecf20Sopenharmony_ci return offset; 35748c2ecf20Sopenharmony_ci 35758c2ecf20Sopenharmony_ci return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 35768c2ecf20Sopenharmony_ci} 35778c2ecf20Sopenharmony_ci 35788c2ecf20Sopenharmony_cistatic int btrfs_file_open(struct inode *inode, struct file *filp) 35798c2ecf20Sopenharmony_ci{ 35808c2ecf20Sopenharmony_ci filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; 35818c2ecf20Sopenharmony_ci return generic_file_open(inode, filp); 35828c2ecf20Sopenharmony_ci} 35838c2ecf20Sopenharmony_ci 35848c2ecf20Sopenharmony_cistatic ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 35858c2ecf20Sopenharmony_ci{ 35868c2ecf20Sopenharmony_ci ssize_t ret = 0; 35878c2ecf20Sopenharmony_ci 35888c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_DIRECT) { 35898c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 35908c2ecf20Sopenharmony_ci 35918c2ecf20Sopenharmony_ci inode_lock_shared(inode); 35928c2ecf20Sopenharmony_ci ret = btrfs_direct_IO(iocb, to); 35938c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 35948c2ecf20Sopenharmony_ci if (ret < 0 || !iov_iter_count(to) || 35958c2ecf20Sopenharmony_ci iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp))) 35968c2ecf20Sopenharmony_ci return ret; 35978c2ecf20Sopenharmony_ci } 35988c2ecf20Sopenharmony_ci 35998c2ecf20Sopenharmony_ci return generic_file_buffered_read(iocb, to, ret); 36008c2ecf20Sopenharmony_ci} 36018c2ecf20Sopenharmony_ci 36028c2ecf20Sopenharmony_ciconst struct file_operations btrfs_file_operations = { 36038c2ecf20Sopenharmony_ci .llseek = btrfs_file_llseek, 36048c2ecf20Sopenharmony_ci .read_iter = btrfs_file_read_iter, 36058c2ecf20Sopenharmony_ci .splice_read = generic_file_splice_read, 36068c2ecf20Sopenharmony_ci .write_iter = btrfs_file_write_iter, 36078c2ecf20Sopenharmony_ci .splice_write = iter_file_splice_write, 36088c2ecf20Sopenharmony_ci .mmap = btrfs_file_mmap, 36098c2ecf20Sopenharmony_ci .open = btrfs_file_open, 36108c2ecf20Sopenharmony_ci .release = btrfs_release_file, 36118c2ecf20Sopenharmony_ci .fsync = btrfs_sync_file, 36128c2ecf20Sopenharmony_ci .fallocate = btrfs_fallocate, 36138c2ecf20Sopenharmony_ci .unlocked_ioctl = btrfs_ioctl, 36148c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 36158c2ecf20Sopenharmony_ci .compat_ioctl = btrfs_compat_ioctl, 36168c2ecf20Sopenharmony_ci#endif 36178c2ecf20Sopenharmony_ci .remap_file_range = btrfs_remap_file_range, 36188c2ecf20Sopenharmony_ci}; 36198c2ecf20Sopenharmony_ci 36208c2ecf20Sopenharmony_civoid __cold btrfs_auto_defrag_exit(void) 36218c2ecf20Sopenharmony_ci{ 36228c2ecf20Sopenharmony_ci kmem_cache_destroy(btrfs_inode_defrag_cachep); 36238c2ecf20Sopenharmony_ci} 36248c2ecf20Sopenharmony_ci 36258c2ecf20Sopenharmony_ciint __init btrfs_auto_defrag_init(void) 36268c2ecf20Sopenharmony_ci{ 36278c2ecf20Sopenharmony_ci btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", 36288c2ecf20Sopenharmony_ci sizeof(struct inode_defrag), 0, 36298c2ecf20Sopenharmony_ci SLAB_MEM_SPREAD, 36308c2ecf20Sopenharmony_ci NULL); 36318c2ecf20Sopenharmony_ci if (!btrfs_inode_defrag_cachep) 36328c2ecf20Sopenharmony_ci return -ENOMEM; 36338c2ecf20Sopenharmony_ci 36348c2ecf20Sopenharmony_ci return 0; 36358c2ecf20Sopenharmony_ci} 36368c2ecf20Sopenharmony_ci 36378c2ecf20Sopenharmony_ciint btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end) 36388c2ecf20Sopenharmony_ci{ 36398c2ecf20Sopenharmony_ci int ret; 36408c2ecf20Sopenharmony_ci 36418c2ecf20Sopenharmony_ci /* 36428c2ecf20Sopenharmony_ci * So with compression we will find and lock a dirty page and clear the 36438c2ecf20Sopenharmony_ci * first one as dirty, setup an async extent, and immediately return 36448c2ecf20Sopenharmony_ci * with the entire range locked but with nobody actually marked with 36458c2ecf20Sopenharmony_ci * writeback. So we can't just filemap_write_and_wait_range() and 36468c2ecf20Sopenharmony_ci * expect it to work since it will just kick off a thread to do the 36478c2ecf20Sopenharmony_ci * actual work. So we need to call filemap_fdatawrite_range _again_ 36488c2ecf20Sopenharmony_ci * since it will wait on the page lock, which won't be unlocked until 36498c2ecf20Sopenharmony_ci * after the pages have been marked as writeback and so we're good to go 36508c2ecf20Sopenharmony_ci * from there. We have to do this otherwise we'll miss the ordered 36518c2ecf20Sopenharmony_ci * extents and that results in badness. Please Josef, do not think you 36528c2ecf20Sopenharmony_ci * know better and pull this out at some point in the future, it is 36538c2ecf20Sopenharmony_ci * right and you are wrong. 36548c2ecf20Sopenharmony_ci */ 36558c2ecf20Sopenharmony_ci ret = filemap_fdatawrite_range(inode->i_mapping, start, end); 36568c2ecf20Sopenharmony_ci if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 36578c2ecf20Sopenharmony_ci &BTRFS_I(inode)->runtime_flags)) 36588c2ecf20Sopenharmony_ci ret = filemap_fdatawrite_range(inode->i_mapping, start, end); 36598c2ecf20Sopenharmony_ci 36608c2ecf20Sopenharmony_ci return ret; 36618c2ecf20Sopenharmony_ci} 3662