18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2007 Oracle.  All rights reserved.
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <linux/fs.h>
78c2ecf20Sopenharmony_ci#include <linux/pagemap.h>
88c2ecf20Sopenharmony_ci#include <linux/time.h>
98c2ecf20Sopenharmony_ci#include <linux/init.h>
108c2ecf20Sopenharmony_ci#include <linux/string.h>
118c2ecf20Sopenharmony_ci#include <linux/backing-dev.h>
128c2ecf20Sopenharmony_ci#include <linux/falloc.h>
138c2ecf20Sopenharmony_ci#include <linux/writeback.h>
148c2ecf20Sopenharmony_ci#include <linux/compat.h>
158c2ecf20Sopenharmony_ci#include <linux/slab.h>
168c2ecf20Sopenharmony_ci#include <linux/btrfs.h>
178c2ecf20Sopenharmony_ci#include <linux/uio.h>
188c2ecf20Sopenharmony_ci#include <linux/iversion.h>
198c2ecf20Sopenharmony_ci#include "ctree.h"
208c2ecf20Sopenharmony_ci#include "disk-io.h"
218c2ecf20Sopenharmony_ci#include "transaction.h"
228c2ecf20Sopenharmony_ci#include "btrfs_inode.h"
238c2ecf20Sopenharmony_ci#include "print-tree.h"
248c2ecf20Sopenharmony_ci#include "tree-log.h"
258c2ecf20Sopenharmony_ci#include "locking.h"
268c2ecf20Sopenharmony_ci#include "volumes.h"
278c2ecf20Sopenharmony_ci#include "qgroup.h"
288c2ecf20Sopenharmony_ci#include "compression.h"
298c2ecf20Sopenharmony_ci#include "delalloc-space.h"
308c2ecf20Sopenharmony_ci#include "reflink.h"
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_cistatic struct kmem_cache *btrfs_inode_defrag_cachep;
338c2ecf20Sopenharmony_ci/*
348c2ecf20Sopenharmony_ci * when auto defrag is enabled we
358c2ecf20Sopenharmony_ci * queue up these defrag structs to remember which
368c2ecf20Sopenharmony_ci * inodes need defragging passes
378c2ecf20Sopenharmony_ci */
388c2ecf20Sopenharmony_cistruct inode_defrag {
398c2ecf20Sopenharmony_ci	struct rb_node rb_node;
408c2ecf20Sopenharmony_ci	/* objectid */
418c2ecf20Sopenharmony_ci	u64 ino;
428c2ecf20Sopenharmony_ci	/*
438c2ecf20Sopenharmony_ci	 * transid where the defrag was added, we search for
448c2ecf20Sopenharmony_ci	 * extents newer than this
458c2ecf20Sopenharmony_ci	 */
468c2ecf20Sopenharmony_ci	u64 transid;
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci	/* root objectid */
498c2ecf20Sopenharmony_ci	u64 root;
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci	/* last offset we were able to defrag */
528c2ecf20Sopenharmony_ci	u64 last_offset;
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci	/* if we've wrapped around back to zero once already */
558c2ecf20Sopenharmony_ci	int cycled;
568c2ecf20Sopenharmony_ci};
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_cistatic int __compare_inode_defrag(struct inode_defrag *defrag1,
598c2ecf20Sopenharmony_ci				  struct inode_defrag *defrag2)
608c2ecf20Sopenharmony_ci{
618c2ecf20Sopenharmony_ci	if (defrag1->root > defrag2->root)
628c2ecf20Sopenharmony_ci		return 1;
638c2ecf20Sopenharmony_ci	else if (defrag1->root < defrag2->root)
648c2ecf20Sopenharmony_ci		return -1;
658c2ecf20Sopenharmony_ci	else if (defrag1->ino > defrag2->ino)
668c2ecf20Sopenharmony_ci		return 1;
678c2ecf20Sopenharmony_ci	else if (defrag1->ino < defrag2->ino)
688c2ecf20Sopenharmony_ci		return -1;
698c2ecf20Sopenharmony_ci	else
708c2ecf20Sopenharmony_ci		return 0;
718c2ecf20Sopenharmony_ci}
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci/* pop a record for an inode into the defrag tree.  The lock
748c2ecf20Sopenharmony_ci * must be held already
758c2ecf20Sopenharmony_ci *
768c2ecf20Sopenharmony_ci * If you're inserting a record for an older transid than an
778c2ecf20Sopenharmony_ci * existing record, the transid already in the tree is lowered
788c2ecf20Sopenharmony_ci *
798c2ecf20Sopenharmony_ci * If an existing record is found the defrag item you
808c2ecf20Sopenharmony_ci * pass in is freed
818c2ecf20Sopenharmony_ci */
828c2ecf20Sopenharmony_cistatic int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
838c2ecf20Sopenharmony_ci				    struct inode_defrag *defrag)
848c2ecf20Sopenharmony_ci{
858c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = inode->root->fs_info;
868c2ecf20Sopenharmony_ci	struct inode_defrag *entry;
878c2ecf20Sopenharmony_ci	struct rb_node **p;
888c2ecf20Sopenharmony_ci	struct rb_node *parent = NULL;
898c2ecf20Sopenharmony_ci	int ret;
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	p = &fs_info->defrag_inodes.rb_node;
928c2ecf20Sopenharmony_ci	while (*p) {
938c2ecf20Sopenharmony_ci		parent = *p;
948c2ecf20Sopenharmony_ci		entry = rb_entry(parent, struct inode_defrag, rb_node);
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci		ret = __compare_inode_defrag(defrag, entry);
978c2ecf20Sopenharmony_ci		if (ret < 0)
988c2ecf20Sopenharmony_ci			p = &parent->rb_left;
998c2ecf20Sopenharmony_ci		else if (ret > 0)
1008c2ecf20Sopenharmony_ci			p = &parent->rb_right;
1018c2ecf20Sopenharmony_ci		else {
1028c2ecf20Sopenharmony_ci			/* if we're reinserting an entry for
1038c2ecf20Sopenharmony_ci			 * an old defrag run, make sure to
1048c2ecf20Sopenharmony_ci			 * lower the transid of our existing record
1058c2ecf20Sopenharmony_ci			 */
1068c2ecf20Sopenharmony_ci			if (defrag->transid < entry->transid)
1078c2ecf20Sopenharmony_ci				entry->transid = defrag->transid;
1088c2ecf20Sopenharmony_ci			if (defrag->last_offset > entry->last_offset)
1098c2ecf20Sopenharmony_ci				entry->last_offset = defrag->last_offset;
1108c2ecf20Sopenharmony_ci			return -EEXIST;
1118c2ecf20Sopenharmony_ci		}
1128c2ecf20Sopenharmony_ci	}
1138c2ecf20Sopenharmony_ci	set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags);
1148c2ecf20Sopenharmony_ci	rb_link_node(&defrag->rb_node, parent, p);
1158c2ecf20Sopenharmony_ci	rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes);
1168c2ecf20Sopenharmony_ci	return 0;
1178c2ecf20Sopenharmony_ci}
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_cistatic inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
1208c2ecf20Sopenharmony_ci{
1218c2ecf20Sopenharmony_ci	if (!btrfs_test_opt(fs_info, AUTO_DEFRAG))
1228c2ecf20Sopenharmony_ci		return 0;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	if (btrfs_fs_closing(fs_info))
1258c2ecf20Sopenharmony_ci		return 0;
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	return 1;
1288c2ecf20Sopenharmony_ci}
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci/*
1318c2ecf20Sopenharmony_ci * insert a defrag record for this inode if auto defrag is
1328c2ecf20Sopenharmony_ci * enabled
1338c2ecf20Sopenharmony_ci */
1348c2ecf20Sopenharmony_ciint btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
1358c2ecf20Sopenharmony_ci			   struct btrfs_inode *inode)
1368c2ecf20Sopenharmony_ci{
1378c2ecf20Sopenharmony_ci	struct btrfs_root *root = inode->root;
1388c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
1398c2ecf20Sopenharmony_ci	struct inode_defrag *defrag;
1408c2ecf20Sopenharmony_ci	u64 transid;
1418c2ecf20Sopenharmony_ci	int ret;
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	if (!__need_auto_defrag(fs_info))
1448c2ecf20Sopenharmony_ci		return 0;
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
1478c2ecf20Sopenharmony_ci		return 0;
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci	if (trans)
1508c2ecf20Sopenharmony_ci		transid = trans->transid;
1518c2ecf20Sopenharmony_ci	else
1528c2ecf20Sopenharmony_ci		transid = inode->root->last_trans;
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci	defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
1558c2ecf20Sopenharmony_ci	if (!defrag)
1568c2ecf20Sopenharmony_ci		return -ENOMEM;
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	defrag->ino = btrfs_ino(inode);
1598c2ecf20Sopenharmony_ci	defrag->transid = transid;
1608c2ecf20Sopenharmony_ci	defrag->root = root->root_key.objectid;
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci	spin_lock(&fs_info->defrag_inodes_lock);
1638c2ecf20Sopenharmony_ci	if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
1648c2ecf20Sopenharmony_ci		/*
1658c2ecf20Sopenharmony_ci		 * If we set IN_DEFRAG flag and evict the inode from memory,
1668c2ecf20Sopenharmony_ci		 * and then re-read this inode, this new inode doesn't have
1678c2ecf20Sopenharmony_ci		 * IN_DEFRAG flag. At the case, we may find the existed defrag.
1688c2ecf20Sopenharmony_ci		 */
1698c2ecf20Sopenharmony_ci		ret = __btrfs_add_inode_defrag(inode, defrag);
1708c2ecf20Sopenharmony_ci		if (ret)
1718c2ecf20Sopenharmony_ci			kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
1728c2ecf20Sopenharmony_ci	} else {
1738c2ecf20Sopenharmony_ci		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
1748c2ecf20Sopenharmony_ci	}
1758c2ecf20Sopenharmony_ci	spin_unlock(&fs_info->defrag_inodes_lock);
1768c2ecf20Sopenharmony_ci	return 0;
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci/*
1808c2ecf20Sopenharmony_ci * Requeue the defrag object. If there is a defrag object that points to
1818c2ecf20Sopenharmony_ci * the same inode in the tree, we will merge them together (by
1828c2ecf20Sopenharmony_ci * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
1838c2ecf20Sopenharmony_ci */
1848c2ecf20Sopenharmony_cistatic void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
1858c2ecf20Sopenharmony_ci				       struct inode_defrag *defrag)
1868c2ecf20Sopenharmony_ci{
1878c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = inode->root->fs_info;
1888c2ecf20Sopenharmony_ci	int ret;
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	if (!__need_auto_defrag(fs_info))
1918c2ecf20Sopenharmony_ci		goto out;
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci	/*
1948c2ecf20Sopenharmony_ci	 * Here we don't check the IN_DEFRAG flag, because we need merge
1958c2ecf20Sopenharmony_ci	 * them together.
1968c2ecf20Sopenharmony_ci	 */
1978c2ecf20Sopenharmony_ci	spin_lock(&fs_info->defrag_inodes_lock);
1988c2ecf20Sopenharmony_ci	ret = __btrfs_add_inode_defrag(inode, defrag);
1998c2ecf20Sopenharmony_ci	spin_unlock(&fs_info->defrag_inodes_lock);
2008c2ecf20Sopenharmony_ci	if (ret)
2018c2ecf20Sopenharmony_ci		goto out;
2028c2ecf20Sopenharmony_ci	return;
2038c2ecf20Sopenharmony_ciout:
2048c2ecf20Sopenharmony_ci	kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
2058c2ecf20Sopenharmony_ci}
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci/*
2088c2ecf20Sopenharmony_ci * pick the defragable inode that we want, if it doesn't exist, we will get
2098c2ecf20Sopenharmony_ci * the next one.
2108c2ecf20Sopenharmony_ci */
2118c2ecf20Sopenharmony_cistatic struct inode_defrag *
2128c2ecf20Sopenharmony_cibtrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
2138c2ecf20Sopenharmony_ci{
2148c2ecf20Sopenharmony_ci	struct inode_defrag *entry = NULL;
2158c2ecf20Sopenharmony_ci	struct inode_defrag tmp;
2168c2ecf20Sopenharmony_ci	struct rb_node *p;
2178c2ecf20Sopenharmony_ci	struct rb_node *parent = NULL;
2188c2ecf20Sopenharmony_ci	int ret;
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	tmp.ino = ino;
2218c2ecf20Sopenharmony_ci	tmp.root = root;
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci	spin_lock(&fs_info->defrag_inodes_lock);
2248c2ecf20Sopenharmony_ci	p = fs_info->defrag_inodes.rb_node;
2258c2ecf20Sopenharmony_ci	while (p) {
2268c2ecf20Sopenharmony_ci		parent = p;
2278c2ecf20Sopenharmony_ci		entry = rb_entry(parent, struct inode_defrag, rb_node);
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci		ret = __compare_inode_defrag(&tmp, entry);
2308c2ecf20Sopenharmony_ci		if (ret < 0)
2318c2ecf20Sopenharmony_ci			p = parent->rb_left;
2328c2ecf20Sopenharmony_ci		else if (ret > 0)
2338c2ecf20Sopenharmony_ci			p = parent->rb_right;
2348c2ecf20Sopenharmony_ci		else
2358c2ecf20Sopenharmony_ci			goto out;
2368c2ecf20Sopenharmony_ci	}
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
2398c2ecf20Sopenharmony_ci		parent = rb_next(parent);
2408c2ecf20Sopenharmony_ci		if (parent)
2418c2ecf20Sopenharmony_ci			entry = rb_entry(parent, struct inode_defrag, rb_node);
2428c2ecf20Sopenharmony_ci		else
2438c2ecf20Sopenharmony_ci			entry = NULL;
2448c2ecf20Sopenharmony_ci	}
2458c2ecf20Sopenharmony_ciout:
2468c2ecf20Sopenharmony_ci	if (entry)
2478c2ecf20Sopenharmony_ci		rb_erase(parent, &fs_info->defrag_inodes);
2488c2ecf20Sopenharmony_ci	spin_unlock(&fs_info->defrag_inodes_lock);
2498c2ecf20Sopenharmony_ci	return entry;
2508c2ecf20Sopenharmony_ci}
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_civoid btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
2538c2ecf20Sopenharmony_ci{
2548c2ecf20Sopenharmony_ci	struct inode_defrag *defrag;
2558c2ecf20Sopenharmony_ci	struct rb_node *node;
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	spin_lock(&fs_info->defrag_inodes_lock);
2588c2ecf20Sopenharmony_ci	node = rb_first(&fs_info->defrag_inodes);
2598c2ecf20Sopenharmony_ci	while (node) {
2608c2ecf20Sopenharmony_ci		rb_erase(node, &fs_info->defrag_inodes);
2618c2ecf20Sopenharmony_ci		defrag = rb_entry(node, struct inode_defrag, rb_node);
2628c2ecf20Sopenharmony_ci		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci		cond_resched_lock(&fs_info->defrag_inodes_lock);
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci		node = rb_first(&fs_info->defrag_inodes);
2678c2ecf20Sopenharmony_ci	}
2688c2ecf20Sopenharmony_ci	spin_unlock(&fs_info->defrag_inodes_lock);
2698c2ecf20Sopenharmony_ci}
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci#define BTRFS_DEFRAG_BATCH	1024
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_cistatic int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
2748c2ecf20Sopenharmony_ci				    struct inode_defrag *defrag)
2758c2ecf20Sopenharmony_ci{
2768c2ecf20Sopenharmony_ci	struct btrfs_root *inode_root;
2778c2ecf20Sopenharmony_ci	struct inode *inode;
2788c2ecf20Sopenharmony_ci	struct btrfs_ioctl_defrag_range_args range;
2798c2ecf20Sopenharmony_ci	int num_defrag;
2808c2ecf20Sopenharmony_ci	int ret;
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci	/* get the inode */
2838c2ecf20Sopenharmony_ci	inode_root = btrfs_get_fs_root(fs_info, defrag->root, true);
2848c2ecf20Sopenharmony_ci	if (IS_ERR(inode_root)) {
2858c2ecf20Sopenharmony_ci		ret = PTR_ERR(inode_root);
2868c2ecf20Sopenharmony_ci		goto cleanup;
2878c2ecf20Sopenharmony_ci	}
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci	inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root);
2908c2ecf20Sopenharmony_ci	btrfs_put_root(inode_root);
2918c2ecf20Sopenharmony_ci	if (IS_ERR(inode)) {
2928c2ecf20Sopenharmony_ci		ret = PTR_ERR(inode);
2938c2ecf20Sopenharmony_ci		goto cleanup;
2948c2ecf20Sopenharmony_ci	}
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci	/* do a chunk of defrag */
2978c2ecf20Sopenharmony_ci	clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
2988c2ecf20Sopenharmony_ci	memset(&range, 0, sizeof(range));
2998c2ecf20Sopenharmony_ci	range.len = (u64)-1;
3008c2ecf20Sopenharmony_ci	range.start = defrag->last_offset;
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci	sb_start_write(fs_info->sb);
3038c2ecf20Sopenharmony_ci	num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
3048c2ecf20Sopenharmony_ci				       BTRFS_DEFRAG_BATCH);
3058c2ecf20Sopenharmony_ci	sb_end_write(fs_info->sb);
3068c2ecf20Sopenharmony_ci	/*
3078c2ecf20Sopenharmony_ci	 * if we filled the whole defrag batch, there
3088c2ecf20Sopenharmony_ci	 * must be more work to do.  Queue this defrag
3098c2ecf20Sopenharmony_ci	 * again
3108c2ecf20Sopenharmony_ci	 */
3118c2ecf20Sopenharmony_ci	if (num_defrag == BTRFS_DEFRAG_BATCH) {
3128c2ecf20Sopenharmony_ci		defrag->last_offset = range.start;
3138c2ecf20Sopenharmony_ci		btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
3148c2ecf20Sopenharmony_ci	} else if (defrag->last_offset && !defrag->cycled) {
3158c2ecf20Sopenharmony_ci		/*
3168c2ecf20Sopenharmony_ci		 * we didn't fill our defrag batch, but
3178c2ecf20Sopenharmony_ci		 * we didn't start at zero.  Make sure we loop
3188c2ecf20Sopenharmony_ci		 * around to the start of the file.
3198c2ecf20Sopenharmony_ci		 */
3208c2ecf20Sopenharmony_ci		defrag->last_offset = 0;
3218c2ecf20Sopenharmony_ci		defrag->cycled = 1;
3228c2ecf20Sopenharmony_ci		btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
3238c2ecf20Sopenharmony_ci	} else {
3248c2ecf20Sopenharmony_ci		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
3258c2ecf20Sopenharmony_ci	}
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	iput(inode);
3288c2ecf20Sopenharmony_ci	return 0;
3298c2ecf20Sopenharmony_cicleanup:
3308c2ecf20Sopenharmony_ci	kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
3318c2ecf20Sopenharmony_ci	return ret;
3328c2ecf20Sopenharmony_ci}
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci/*
3358c2ecf20Sopenharmony_ci * run through the list of inodes in the FS that need
3368c2ecf20Sopenharmony_ci * defragging
3378c2ecf20Sopenharmony_ci */
3388c2ecf20Sopenharmony_ciint btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
3398c2ecf20Sopenharmony_ci{
3408c2ecf20Sopenharmony_ci	struct inode_defrag *defrag;
3418c2ecf20Sopenharmony_ci	u64 first_ino = 0;
3428c2ecf20Sopenharmony_ci	u64 root_objectid = 0;
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci	atomic_inc(&fs_info->defrag_running);
3458c2ecf20Sopenharmony_ci	while (1) {
3468c2ecf20Sopenharmony_ci		/* Pause the auto defragger. */
3478c2ecf20Sopenharmony_ci		if (test_bit(BTRFS_FS_STATE_REMOUNTING,
3488c2ecf20Sopenharmony_ci			     &fs_info->fs_state))
3498c2ecf20Sopenharmony_ci			break;
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci		if (!__need_auto_defrag(fs_info))
3528c2ecf20Sopenharmony_ci			break;
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci		/* find an inode to defrag */
3558c2ecf20Sopenharmony_ci		defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
3568c2ecf20Sopenharmony_ci						 first_ino);
3578c2ecf20Sopenharmony_ci		if (!defrag) {
3588c2ecf20Sopenharmony_ci			if (root_objectid || first_ino) {
3598c2ecf20Sopenharmony_ci				root_objectid = 0;
3608c2ecf20Sopenharmony_ci				first_ino = 0;
3618c2ecf20Sopenharmony_ci				continue;
3628c2ecf20Sopenharmony_ci			} else {
3638c2ecf20Sopenharmony_ci				break;
3648c2ecf20Sopenharmony_ci			}
3658c2ecf20Sopenharmony_ci		}
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci		first_ino = defrag->ino + 1;
3688c2ecf20Sopenharmony_ci		root_objectid = defrag->root;
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci		__btrfs_run_defrag_inode(fs_info, defrag);
3718c2ecf20Sopenharmony_ci	}
3728c2ecf20Sopenharmony_ci	atomic_dec(&fs_info->defrag_running);
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci	/*
3758c2ecf20Sopenharmony_ci	 * during unmount, we use the transaction_wait queue to
3768c2ecf20Sopenharmony_ci	 * wait for the defragger to stop
3778c2ecf20Sopenharmony_ci	 */
3788c2ecf20Sopenharmony_ci	wake_up(&fs_info->transaction_wait);
3798c2ecf20Sopenharmony_ci	return 0;
3808c2ecf20Sopenharmony_ci}
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_ci/* simple helper to fault in pages and copy.  This should go away
3838c2ecf20Sopenharmony_ci * and be replaced with calls into generic code.
3848c2ecf20Sopenharmony_ci */
3858c2ecf20Sopenharmony_cistatic noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
3868c2ecf20Sopenharmony_ci					 struct page **prepared_pages,
3878c2ecf20Sopenharmony_ci					 struct iov_iter *i)
3888c2ecf20Sopenharmony_ci{
3898c2ecf20Sopenharmony_ci	size_t copied = 0;
3908c2ecf20Sopenharmony_ci	size_t total_copied = 0;
3918c2ecf20Sopenharmony_ci	int pg = 0;
3928c2ecf20Sopenharmony_ci	int offset = offset_in_page(pos);
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	while (write_bytes > 0) {
3958c2ecf20Sopenharmony_ci		size_t count = min_t(size_t,
3968c2ecf20Sopenharmony_ci				     PAGE_SIZE - offset, write_bytes);
3978c2ecf20Sopenharmony_ci		struct page *page = prepared_pages[pg];
3988c2ecf20Sopenharmony_ci		/*
3998c2ecf20Sopenharmony_ci		 * Copy data from userspace to the current page
4008c2ecf20Sopenharmony_ci		 */
4018c2ecf20Sopenharmony_ci		copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci		/* Flush processor's dcache for this page */
4048c2ecf20Sopenharmony_ci		flush_dcache_page(page);
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci		/*
4078c2ecf20Sopenharmony_ci		 * if we get a partial write, we can end up with
4088c2ecf20Sopenharmony_ci		 * partially up to date pages.  These add
4098c2ecf20Sopenharmony_ci		 * a lot of complexity, so make sure they don't
4108c2ecf20Sopenharmony_ci		 * happen by forcing this copy to be retried.
4118c2ecf20Sopenharmony_ci		 *
4128c2ecf20Sopenharmony_ci		 * The rest of the btrfs_file_write code will fall
4138c2ecf20Sopenharmony_ci		 * back to page at a time copies after we return 0.
4148c2ecf20Sopenharmony_ci		 */
4158c2ecf20Sopenharmony_ci		if (!PageUptodate(page) && copied < count)
4168c2ecf20Sopenharmony_ci			copied = 0;
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_ci		iov_iter_advance(i, copied);
4198c2ecf20Sopenharmony_ci		write_bytes -= copied;
4208c2ecf20Sopenharmony_ci		total_copied += copied;
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci		/* Return to btrfs_file_write_iter to fault page */
4238c2ecf20Sopenharmony_ci		if (unlikely(copied == 0))
4248c2ecf20Sopenharmony_ci			break;
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci		if (copied < PAGE_SIZE - offset) {
4278c2ecf20Sopenharmony_ci			offset += copied;
4288c2ecf20Sopenharmony_ci		} else {
4298c2ecf20Sopenharmony_ci			pg++;
4308c2ecf20Sopenharmony_ci			offset = 0;
4318c2ecf20Sopenharmony_ci		}
4328c2ecf20Sopenharmony_ci	}
4338c2ecf20Sopenharmony_ci	return total_copied;
4348c2ecf20Sopenharmony_ci}
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci/*
4378c2ecf20Sopenharmony_ci * unlocks pages after btrfs_file_write is done with them
4388c2ecf20Sopenharmony_ci */
4398c2ecf20Sopenharmony_cistatic void btrfs_drop_pages(struct page **pages, size_t num_pages)
4408c2ecf20Sopenharmony_ci{
4418c2ecf20Sopenharmony_ci	size_t i;
4428c2ecf20Sopenharmony_ci	for (i = 0; i < num_pages; i++) {
4438c2ecf20Sopenharmony_ci		/* page checked is some magic around finding pages that
4448c2ecf20Sopenharmony_ci		 * have been modified without going through btrfs_set_page_dirty
4458c2ecf20Sopenharmony_ci		 * clear it here. There should be no need to mark the pages
4468c2ecf20Sopenharmony_ci		 * accessed as prepare_pages should have marked them accessed
4478c2ecf20Sopenharmony_ci		 * in prepare_pages via find_or_create_page()
4488c2ecf20Sopenharmony_ci		 */
4498c2ecf20Sopenharmony_ci		ClearPageChecked(pages[i]);
4508c2ecf20Sopenharmony_ci		unlock_page(pages[i]);
4518c2ecf20Sopenharmony_ci		put_page(pages[i]);
4528c2ecf20Sopenharmony_ci	}
4538c2ecf20Sopenharmony_ci}
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_ci/*
4568c2ecf20Sopenharmony_ci * after copy_from_user, pages need to be dirtied and we need to make
4578c2ecf20Sopenharmony_ci * sure holes are created between the current EOF and the start of
4588c2ecf20Sopenharmony_ci * any next extents (if required).
4598c2ecf20Sopenharmony_ci *
4608c2ecf20Sopenharmony_ci * this also makes the decision about creating an inline extent vs
4618c2ecf20Sopenharmony_ci * doing real data extents, marking pages dirty and delalloc as required.
4628c2ecf20Sopenharmony_ci */
4638c2ecf20Sopenharmony_ciint btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
4648c2ecf20Sopenharmony_ci		      size_t num_pages, loff_t pos, size_t write_bytes,
4658c2ecf20Sopenharmony_ci		      struct extent_state **cached)
4668c2ecf20Sopenharmony_ci{
4678c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = inode->root->fs_info;
4688c2ecf20Sopenharmony_ci	int err = 0;
4698c2ecf20Sopenharmony_ci	int i;
4708c2ecf20Sopenharmony_ci	u64 num_bytes;
4718c2ecf20Sopenharmony_ci	u64 start_pos;
4728c2ecf20Sopenharmony_ci	u64 end_of_last_block;
4738c2ecf20Sopenharmony_ci	u64 end_pos = pos + write_bytes;
4748c2ecf20Sopenharmony_ci	loff_t isize = i_size_read(&inode->vfs_inode);
4758c2ecf20Sopenharmony_ci	unsigned int extra_bits = 0;
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci	start_pos = pos & ~((u64) fs_info->sectorsize - 1);
4788c2ecf20Sopenharmony_ci	num_bytes = round_up(write_bytes + pos - start_pos,
4798c2ecf20Sopenharmony_ci			     fs_info->sectorsize);
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_ci	end_of_last_block = start_pos + num_bytes - 1;
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_ci	/*
4848c2ecf20Sopenharmony_ci	 * The pages may have already been dirty, clear out old accounting so
4858c2ecf20Sopenharmony_ci	 * we can set things up properly
4868c2ecf20Sopenharmony_ci	 */
4878c2ecf20Sopenharmony_ci	clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block,
4888c2ecf20Sopenharmony_ci			 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
4898c2ecf20Sopenharmony_ci			 0, 0, cached);
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_ci	err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
4928c2ecf20Sopenharmony_ci					extra_bits, cached);
4938c2ecf20Sopenharmony_ci	if (err)
4948c2ecf20Sopenharmony_ci		return err;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	for (i = 0; i < num_pages; i++) {
4978c2ecf20Sopenharmony_ci		struct page *p = pages[i];
4988c2ecf20Sopenharmony_ci		SetPageUptodate(p);
4998c2ecf20Sopenharmony_ci		ClearPageChecked(p);
5008c2ecf20Sopenharmony_ci		set_page_dirty(p);
5018c2ecf20Sopenharmony_ci	}
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci	/*
5048c2ecf20Sopenharmony_ci	 * we've only changed i_size in ram, and we haven't updated
5058c2ecf20Sopenharmony_ci	 * the disk i_size.  There is no need to log the inode
5068c2ecf20Sopenharmony_ci	 * at this time.
5078c2ecf20Sopenharmony_ci	 */
5088c2ecf20Sopenharmony_ci	if (end_pos > isize)
5098c2ecf20Sopenharmony_ci		i_size_write(&inode->vfs_inode, end_pos);
5108c2ecf20Sopenharmony_ci	return 0;
5118c2ecf20Sopenharmony_ci}
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci/*
5148c2ecf20Sopenharmony_ci * this drops all the extents in the cache that intersect the range
5158c2ecf20Sopenharmony_ci * [start, end].  Existing extents are split as required.
5168c2ecf20Sopenharmony_ci */
5178c2ecf20Sopenharmony_civoid btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
5188c2ecf20Sopenharmony_ci			     int skip_pinned)
5198c2ecf20Sopenharmony_ci{
5208c2ecf20Sopenharmony_ci	struct extent_map *em;
5218c2ecf20Sopenharmony_ci	struct extent_map *split = NULL;
5228c2ecf20Sopenharmony_ci	struct extent_map *split2 = NULL;
5238c2ecf20Sopenharmony_ci	struct extent_map_tree *em_tree = &inode->extent_tree;
5248c2ecf20Sopenharmony_ci	u64 len = end - start + 1;
5258c2ecf20Sopenharmony_ci	u64 gen;
5268c2ecf20Sopenharmony_ci	int ret;
5278c2ecf20Sopenharmony_ci	int testend = 1;
5288c2ecf20Sopenharmony_ci	unsigned long flags;
5298c2ecf20Sopenharmony_ci	int compressed = 0;
5308c2ecf20Sopenharmony_ci	bool modified;
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	WARN_ON(end < start);
5338c2ecf20Sopenharmony_ci	if (end == (u64)-1) {
5348c2ecf20Sopenharmony_ci		len = (u64)-1;
5358c2ecf20Sopenharmony_ci		testend = 0;
5368c2ecf20Sopenharmony_ci	}
5378c2ecf20Sopenharmony_ci	while (1) {
5388c2ecf20Sopenharmony_ci		int no_splits = 0;
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ci		modified = false;
5418c2ecf20Sopenharmony_ci		if (!split)
5428c2ecf20Sopenharmony_ci			split = alloc_extent_map();
5438c2ecf20Sopenharmony_ci		if (!split2)
5448c2ecf20Sopenharmony_ci			split2 = alloc_extent_map();
5458c2ecf20Sopenharmony_ci		if (!split || !split2)
5468c2ecf20Sopenharmony_ci			no_splits = 1;
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_ci		write_lock(&em_tree->lock);
5498c2ecf20Sopenharmony_ci		em = lookup_extent_mapping(em_tree, start, len);
5508c2ecf20Sopenharmony_ci		if (!em) {
5518c2ecf20Sopenharmony_ci			write_unlock(&em_tree->lock);
5528c2ecf20Sopenharmony_ci			break;
5538c2ecf20Sopenharmony_ci		}
5548c2ecf20Sopenharmony_ci		flags = em->flags;
5558c2ecf20Sopenharmony_ci		gen = em->generation;
5568c2ecf20Sopenharmony_ci		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
5578c2ecf20Sopenharmony_ci			if (testend && em->start + em->len >= start + len) {
5588c2ecf20Sopenharmony_ci				free_extent_map(em);
5598c2ecf20Sopenharmony_ci				write_unlock(&em_tree->lock);
5608c2ecf20Sopenharmony_ci				break;
5618c2ecf20Sopenharmony_ci			}
5628c2ecf20Sopenharmony_ci			start = em->start + em->len;
5638c2ecf20Sopenharmony_ci			if (testend)
5648c2ecf20Sopenharmony_ci				len = start + len - (em->start + em->len);
5658c2ecf20Sopenharmony_ci			free_extent_map(em);
5668c2ecf20Sopenharmony_ci			write_unlock(&em_tree->lock);
5678c2ecf20Sopenharmony_ci			continue;
5688c2ecf20Sopenharmony_ci		}
5698c2ecf20Sopenharmony_ci		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
5708c2ecf20Sopenharmony_ci		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
5718c2ecf20Sopenharmony_ci		clear_bit(EXTENT_FLAG_LOGGING, &flags);
5728c2ecf20Sopenharmony_ci		modified = !list_empty(&em->list);
5738c2ecf20Sopenharmony_ci		if (no_splits)
5748c2ecf20Sopenharmony_ci			goto next;
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_ci		if (em->start < start) {
5778c2ecf20Sopenharmony_ci			split->start = em->start;
5788c2ecf20Sopenharmony_ci			split->len = start - em->start;
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_ci			if (em->block_start < EXTENT_MAP_LAST_BYTE) {
5818c2ecf20Sopenharmony_ci				split->orig_start = em->orig_start;
5828c2ecf20Sopenharmony_ci				split->block_start = em->block_start;
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_ci				if (compressed)
5858c2ecf20Sopenharmony_ci					split->block_len = em->block_len;
5868c2ecf20Sopenharmony_ci				else
5878c2ecf20Sopenharmony_ci					split->block_len = split->len;
5888c2ecf20Sopenharmony_ci				split->orig_block_len = max(split->block_len,
5898c2ecf20Sopenharmony_ci						em->orig_block_len);
5908c2ecf20Sopenharmony_ci				split->ram_bytes = em->ram_bytes;
5918c2ecf20Sopenharmony_ci			} else {
5928c2ecf20Sopenharmony_ci				split->orig_start = split->start;
5938c2ecf20Sopenharmony_ci				split->block_len = 0;
5948c2ecf20Sopenharmony_ci				split->block_start = em->block_start;
5958c2ecf20Sopenharmony_ci				split->orig_block_len = 0;
5968c2ecf20Sopenharmony_ci				split->ram_bytes = split->len;
5978c2ecf20Sopenharmony_ci			}
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_ci			split->generation = gen;
6008c2ecf20Sopenharmony_ci			split->flags = flags;
6018c2ecf20Sopenharmony_ci			split->compress_type = em->compress_type;
6028c2ecf20Sopenharmony_ci			replace_extent_mapping(em_tree, em, split, modified);
6038c2ecf20Sopenharmony_ci			free_extent_map(split);
6048c2ecf20Sopenharmony_ci			split = split2;
6058c2ecf20Sopenharmony_ci			split2 = NULL;
6068c2ecf20Sopenharmony_ci		}
6078c2ecf20Sopenharmony_ci		if (testend && em->start + em->len > start + len) {
6088c2ecf20Sopenharmony_ci			u64 diff = start + len - em->start;
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_ci			split->start = start + len;
6118c2ecf20Sopenharmony_ci			split->len = em->start + em->len - (start + len);
6128c2ecf20Sopenharmony_ci			split->flags = flags;
6138c2ecf20Sopenharmony_ci			split->compress_type = em->compress_type;
6148c2ecf20Sopenharmony_ci			split->generation = gen;
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci			if (em->block_start < EXTENT_MAP_LAST_BYTE) {
6178c2ecf20Sopenharmony_ci				split->orig_block_len = max(em->block_len,
6188c2ecf20Sopenharmony_ci						    em->orig_block_len);
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci				split->ram_bytes = em->ram_bytes;
6218c2ecf20Sopenharmony_ci				if (compressed) {
6228c2ecf20Sopenharmony_ci					split->block_len = em->block_len;
6238c2ecf20Sopenharmony_ci					split->block_start = em->block_start;
6248c2ecf20Sopenharmony_ci					split->orig_start = em->orig_start;
6258c2ecf20Sopenharmony_ci				} else {
6268c2ecf20Sopenharmony_ci					split->block_len = split->len;
6278c2ecf20Sopenharmony_ci					split->block_start = em->block_start
6288c2ecf20Sopenharmony_ci						+ diff;
6298c2ecf20Sopenharmony_ci					split->orig_start = em->orig_start;
6308c2ecf20Sopenharmony_ci				}
6318c2ecf20Sopenharmony_ci			} else {
6328c2ecf20Sopenharmony_ci				split->ram_bytes = split->len;
6338c2ecf20Sopenharmony_ci				split->orig_start = split->start;
6348c2ecf20Sopenharmony_ci				split->block_len = 0;
6358c2ecf20Sopenharmony_ci				split->block_start = em->block_start;
6368c2ecf20Sopenharmony_ci				split->orig_block_len = 0;
6378c2ecf20Sopenharmony_ci			}
6388c2ecf20Sopenharmony_ci
6398c2ecf20Sopenharmony_ci			if (extent_map_in_tree(em)) {
6408c2ecf20Sopenharmony_ci				replace_extent_mapping(em_tree, em, split,
6418c2ecf20Sopenharmony_ci						       modified);
6428c2ecf20Sopenharmony_ci			} else {
6438c2ecf20Sopenharmony_ci				ret = add_extent_mapping(em_tree, split,
6448c2ecf20Sopenharmony_ci							 modified);
6458c2ecf20Sopenharmony_ci				ASSERT(ret == 0); /* Logic error */
6468c2ecf20Sopenharmony_ci			}
6478c2ecf20Sopenharmony_ci			free_extent_map(split);
6488c2ecf20Sopenharmony_ci			split = NULL;
6498c2ecf20Sopenharmony_ci		}
6508c2ecf20Sopenharmony_cinext:
6518c2ecf20Sopenharmony_ci		if (extent_map_in_tree(em))
6528c2ecf20Sopenharmony_ci			remove_extent_mapping(em_tree, em);
6538c2ecf20Sopenharmony_ci		write_unlock(&em_tree->lock);
6548c2ecf20Sopenharmony_ci
6558c2ecf20Sopenharmony_ci		/* once for us */
6568c2ecf20Sopenharmony_ci		free_extent_map(em);
6578c2ecf20Sopenharmony_ci		/* once for the tree*/
6588c2ecf20Sopenharmony_ci		free_extent_map(em);
6598c2ecf20Sopenharmony_ci	}
6608c2ecf20Sopenharmony_ci	if (split)
6618c2ecf20Sopenharmony_ci		free_extent_map(split);
6628c2ecf20Sopenharmony_ci	if (split2)
6638c2ecf20Sopenharmony_ci		free_extent_map(split2);
6648c2ecf20Sopenharmony_ci}
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci/*
6678c2ecf20Sopenharmony_ci * this is very complex, but the basic idea is to drop all extents
6688c2ecf20Sopenharmony_ci * in the range start - end.  hint_block is filled in with a block number
6698c2ecf20Sopenharmony_ci * that would be a good hint to the block allocator for this file.
6708c2ecf20Sopenharmony_ci *
6718c2ecf20Sopenharmony_ci * If an extent intersects the range but is not entirely inside the range
6728c2ecf20Sopenharmony_ci * it is either truncated or split.  Anything entirely inside the range
6738c2ecf20Sopenharmony_ci * is deleted from the tree.
6748c2ecf20Sopenharmony_ci */
6758c2ecf20Sopenharmony_ciint __btrfs_drop_extents(struct btrfs_trans_handle *trans,
6768c2ecf20Sopenharmony_ci			 struct btrfs_root *root, struct btrfs_inode *inode,
6778c2ecf20Sopenharmony_ci			 struct btrfs_path *path, u64 start, u64 end,
6788c2ecf20Sopenharmony_ci			 u64 *drop_end, int drop_cache,
6798c2ecf20Sopenharmony_ci			 int replace_extent,
6808c2ecf20Sopenharmony_ci			 u32 extent_item_size,
6818c2ecf20Sopenharmony_ci			 int *key_inserted)
6828c2ecf20Sopenharmony_ci{
6838c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
6848c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
6858c2ecf20Sopenharmony_ci	struct btrfs_file_extent_item *fi;
6868c2ecf20Sopenharmony_ci	struct btrfs_ref ref = { 0 };
6878c2ecf20Sopenharmony_ci	struct btrfs_key key;
6888c2ecf20Sopenharmony_ci	struct btrfs_key new_key;
6898c2ecf20Sopenharmony_ci	struct inode *vfs_inode = &inode->vfs_inode;
6908c2ecf20Sopenharmony_ci	u64 ino = btrfs_ino(inode);
6918c2ecf20Sopenharmony_ci	u64 search_start = start;
6928c2ecf20Sopenharmony_ci	u64 disk_bytenr = 0;
6938c2ecf20Sopenharmony_ci	u64 num_bytes = 0;
6948c2ecf20Sopenharmony_ci	u64 extent_offset = 0;
6958c2ecf20Sopenharmony_ci	u64 extent_end = 0;
6968c2ecf20Sopenharmony_ci	u64 last_end = start;
6978c2ecf20Sopenharmony_ci	int del_nr = 0;
6988c2ecf20Sopenharmony_ci	int del_slot = 0;
6998c2ecf20Sopenharmony_ci	int extent_type;
7008c2ecf20Sopenharmony_ci	int recow;
7018c2ecf20Sopenharmony_ci	int ret;
7028c2ecf20Sopenharmony_ci	int modify_tree = -1;
7038c2ecf20Sopenharmony_ci	int update_refs;
7048c2ecf20Sopenharmony_ci	int found = 0;
7058c2ecf20Sopenharmony_ci	int leafs_visited = 0;
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_ci	if (drop_cache)
7088c2ecf20Sopenharmony_ci		btrfs_drop_extent_cache(inode, start, end - 1, 0);
7098c2ecf20Sopenharmony_ci
7108c2ecf20Sopenharmony_ci	if (start >= inode->disk_i_size && !replace_extent)
7118c2ecf20Sopenharmony_ci		modify_tree = 0;
7128c2ecf20Sopenharmony_ci
7138c2ecf20Sopenharmony_ci	update_refs = (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID);
7148c2ecf20Sopenharmony_ci	while (1) {
7158c2ecf20Sopenharmony_ci		recow = 0;
7168c2ecf20Sopenharmony_ci		ret = btrfs_lookup_file_extent(trans, root, path, ino,
7178c2ecf20Sopenharmony_ci					       search_start, modify_tree);
7188c2ecf20Sopenharmony_ci		if (ret < 0)
7198c2ecf20Sopenharmony_ci			break;
7208c2ecf20Sopenharmony_ci		if (ret > 0 && path->slots[0] > 0 && search_start == start) {
7218c2ecf20Sopenharmony_ci			leaf = path->nodes[0];
7228c2ecf20Sopenharmony_ci			btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
7238c2ecf20Sopenharmony_ci			if (key.objectid == ino &&
7248c2ecf20Sopenharmony_ci			    key.type == BTRFS_EXTENT_DATA_KEY)
7258c2ecf20Sopenharmony_ci				path->slots[0]--;
7268c2ecf20Sopenharmony_ci		}
7278c2ecf20Sopenharmony_ci		ret = 0;
7288c2ecf20Sopenharmony_ci		leafs_visited++;
7298c2ecf20Sopenharmony_cinext_slot:
7308c2ecf20Sopenharmony_ci		leaf = path->nodes[0];
7318c2ecf20Sopenharmony_ci		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
7328c2ecf20Sopenharmony_ci			BUG_ON(del_nr > 0);
7338c2ecf20Sopenharmony_ci			ret = btrfs_next_leaf(root, path);
7348c2ecf20Sopenharmony_ci			if (ret < 0)
7358c2ecf20Sopenharmony_ci				break;
7368c2ecf20Sopenharmony_ci			if (ret > 0) {
7378c2ecf20Sopenharmony_ci				ret = 0;
7388c2ecf20Sopenharmony_ci				break;
7398c2ecf20Sopenharmony_ci			}
7408c2ecf20Sopenharmony_ci			leafs_visited++;
7418c2ecf20Sopenharmony_ci			leaf = path->nodes[0];
7428c2ecf20Sopenharmony_ci			recow = 1;
7438c2ecf20Sopenharmony_ci		}
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7468c2ecf20Sopenharmony_ci
7478c2ecf20Sopenharmony_ci		if (key.objectid > ino)
7488c2ecf20Sopenharmony_ci			break;
7498c2ecf20Sopenharmony_ci		if (WARN_ON_ONCE(key.objectid < ino) ||
7508c2ecf20Sopenharmony_ci		    key.type < BTRFS_EXTENT_DATA_KEY) {
7518c2ecf20Sopenharmony_ci			ASSERT(del_nr == 0);
7528c2ecf20Sopenharmony_ci			path->slots[0]++;
7538c2ecf20Sopenharmony_ci			goto next_slot;
7548c2ecf20Sopenharmony_ci		}
7558c2ecf20Sopenharmony_ci		if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
7568c2ecf20Sopenharmony_ci			break;
7578c2ecf20Sopenharmony_ci
7588c2ecf20Sopenharmony_ci		fi = btrfs_item_ptr(leaf, path->slots[0],
7598c2ecf20Sopenharmony_ci				    struct btrfs_file_extent_item);
7608c2ecf20Sopenharmony_ci		extent_type = btrfs_file_extent_type(leaf, fi);
7618c2ecf20Sopenharmony_ci
7628c2ecf20Sopenharmony_ci		if (extent_type == BTRFS_FILE_EXTENT_REG ||
7638c2ecf20Sopenharmony_ci		    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
7648c2ecf20Sopenharmony_ci			disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7658c2ecf20Sopenharmony_ci			num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7668c2ecf20Sopenharmony_ci			extent_offset = btrfs_file_extent_offset(leaf, fi);
7678c2ecf20Sopenharmony_ci			extent_end = key.offset +
7688c2ecf20Sopenharmony_ci				btrfs_file_extent_num_bytes(leaf, fi);
7698c2ecf20Sopenharmony_ci		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
7708c2ecf20Sopenharmony_ci			extent_end = key.offset +
7718c2ecf20Sopenharmony_ci				btrfs_file_extent_ram_bytes(leaf, fi);
7728c2ecf20Sopenharmony_ci		} else {
7738c2ecf20Sopenharmony_ci			/* can't happen */
7748c2ecf20Sopenharmony_ci			BUG();
7758c2ecf20Sopenharmony_ci		}
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_ci		/*
7788c2ecf20Sopenharmony_ci		 * Don't skip extent items representing 0 byte lengths. They
7798c2ecf20Sopenharmony_ci		 * used to be created (bug) if while punching holes we hit
7808c2ecf20Sopenharmony_ci		 * -ENOSPC condition. So if we find one here, just ensure we
7818c2ecf20Sopenharmony_ci		 * delete it, otherwise we would insert a new file extent item
7828c2ecf20Sopenharmony_ci		 * with the same key (offset) as that 0 bytes length file
7838c2ecf20Sopenharmony_ci		 * extent item in the call to setup_items_for_insert() later
7848c2ecf20Sopenharmony_ci		 * in this function.
7858c2ecf20Sopenharmony_ci		 */
7868c2ecf20Sopenharmony_ci		if (extent_end == key.offset && extent_end >= search_start) {
7878c2ecf20Sopenharmony_ci			last_end = extent_end;
7888c2ecf20Sopenharmony_ci			goto delete_extent_item;
7898c2ecf20Sopenharmony_ci		}
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci		if (extent_end <= search_start) {
7928c2ecf20Sopenharmony_ci			path->slots[0]++;
7938c2ecf20Sopenharmony_ci			goto next_slot;
7948c2ecf20Sopenharmony_ci		}
7958c2ecf20Sopenharmony_ci
7968c2ecf20Sopenharmony_ci		found = 1;
7978c2ecf20Sopenharmony_ci		search_start = max(key.offset, start);
7988c2ecf20Sopenharmony_ci		if (recow || !modify_tree) {
7998c2ecf20Sopenharmony_ci			modify_tree = -1;
8008c2ecf20Sopenharmony_ci			btrfs_release_path(path);
8018c2ecf20Sopenharmony_ci			continue;
8028c2ecf20Sopenharmony_ci		}
8038c2ecf20Sopenharmony_ci
8048c2ecf20Sopenharmony_ci		/*
8058c2ecf20Sopenharmony_ci		 *     | - range to drop - |
8068c2ecf20Sopenharmony_ci		 *  | -------- extent -------- |
8078c2ecf20Sopenharmony_ci		 */
8088c2ecf20Sopenharmony_ci		if (start > key.offset && end < extent_end) {
8098c2ecf20Sopenharmony_ci			BUG_ON(del_nr > 0);
8108c2ecf20Sopenharmony_ci			if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
8118c2ecf20Sopenharmony_ci				ret = -EOPNOTSUPP;
8128c2ecf20Sopenharmony_ci				break;
8138c2ecf20Sopenharmony_ci			}
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci			memcpy(&new_key, &key, sizeof(new_key));
8168c2ecf20Sopenharmony_ci			new_key.offset = start;
8178c2ecf20Sopenharmony_ci			ret = btrfs_duplicate_item(trans, root, path,
8188c2ecf20Sopenharmony_ci						   &new_key);
8198c2ecf20Sopenharmony_ci			if (ret == -EAGAIN) {
8208c2ecf20Sopenharmony_ci				btrfs_release_path(path);
8218c2ecf20Sopenharmony_ci				continue;
8228c2ecf20Sopenharmony_ci			}
8238c2ecf20Sopenharmony_ci			if (ret < 0)
8248c2ecf20Sopenharmony_ci				break;
8258c2ecf20Sopenharmony_ci
8268c2ecf20Sopenharmony_ci			leaf = path->nodes[0];
8278c2ecf20Sopenharmony_ci			fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
8288c2ecf20Sopenharmony_ci					    struct btrfs_file_extent_item);
8298c2ecf20Sopenharmony_ci			btrfs_set_file_extent_num_bytes(leaf, fi,
8308c2ecf20Sopenharmony_ci							start - key.offset);
8318c2ecf20Sopenharmony_ci
8328c2ecf20Sopenharmony_ci			fi = btrfs_item_ptr(leaf, path->slots[0],
8338c2ecf20Sopenharmony_ci					    struct btrfs_file_extent_item);
8348c2ecf20Sopenharmony_ci
8358c2ecf20Sopenharmony_ci			extent_offset += start - key.offset;
8368c2ecf20Sopenharmony_ci			btrfs_set_file_extent_offset(leaf, fi, extent_offset);
8378c2ecf20Sopenharmony_ci			btrfs_set_file_extent_num_bytes(leaf, fi,
8388c2ecf20Sopenharmony_ci							extent_end - start);
8398c2ecf20Sopenharmony_ci			btrfs_mark_buffer_dirty(leaf);
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci			if (update_refs && disk_bytenr > 0) {
8428c2ecf20Sopenharmony_ci				btrfs_init_generic_ref(&ref,
8438c2ecf20Sopenharmony_ci						BTRFS_ADD_DELAYED_REF,
8448c2ecf20Sopenharmony_ci						disk_bytenr, num_bytes, 0);
8458c2ecf20Sopenharmony_ci				btrfs_init_data_ref(&ref,
8468c2ecf20Sopenharmony_ci						root->root_key.objectid,
8478c2ecf20Sopenharmony_ci						new_key.objectid,
8488c2ecf20Sopenharmony_ci						start - extent_offset);
8498c2ecf20Sopenharmony_ci				ret = btrfs_inc_extent_ref(trans, &ref);
8508c2ecf20Sopenharmony_ci				BUG_ON(ret); /* -ENOMEM */
8518c2ecf20Sopenharmony_ci			}
8528c2ecf20Sopenharmony_ci			key.offset = start;
8538c2ecf20Sopenharmony_ci		}
8548c2ecf20Sopenharmony_ci		/*
8558c2ecf20Sopenharmony_ci		 * From here on out we will have actually dropped something, so
8568c2ecf20Sopenharmony_ci		 * last_end can be updated.
8578c2ecf20Sopenharmony_ci		 */
8588c2ecf20Sopenharmony_ci		last_end = extent_end;
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_ci		/*
8618c2ecf20Sopenharmony_ci		 *  | ---- range to drop ----- |
8628c2ecf20Sopenharmony_ci		 *      | -------- extent -------- |
8638c2ecf20Sopenharmony_ci		 */
8648c2ecf20Sopenharmony_ci		if (start <= key.offset && end < extent_end) {
8658c2ecf20Sopenharmony_ci			if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
8668c2ecf20Sopenharmony_ci				ret = -EOPNOTSUPP;
8678c2ecf20Sopenharmony_ci				break;
8688c2ecf20Sopenharmony_ci			}
8698c2ecf20Sopenharmony_ci
8708c2ecf20Sopenharmony_ci			memcpy(&new_key, &key, sizeof(new_key));
8718c2ecf20Sopenharmony_ci			new_key.offset = end;
8728c2ecf20Sopenharmony_ci			btrfs_set_item_key_safe(fs_info, path, &new_key);
8738c2ecf20Sopenharmony_ci
8748c2ecf20Sopenharmony_ci			extent_offset += end - key.offset;
8758c2ecf20Sopenharmony_ci			btrfs_set_file_extent_offset(leaf, fi, extent_offset);
8768c2ecf20Sopenharmony_ci			btrfs_set_file_extent_num_bytes(leaf, fi,
8778c2ecf20Sopenharmony_ci							extent_end - end);
8788c2ecf20Sopenharmony_ci			btrfs_mark_buffer_dirty(leaf);
8798c2ecf20Sopenharmony_ci			if (update_refs && disk_bytenr > 0)
8808c2ecf20Sopenharmony_ci				inode_sub_bytes(vfs_inode, end - key.offset);
8818c2ecf20Sopenharmony_ci			break;
8828c2ecf20Sopenharmony_ci		}
8838c2ecf20Sopenharmony_ci
8848c2ecf20Sopenharmony_ci		search_start = extent_end;
8858c2ecf20Sopenharmony_ci		/*
8868c2ecf20Sopenharmony_ci		 *       | ---- range to drop ----- |
8878c2ecf20Sopenharmony_ci		 *  | -------- extent -------- |
8888c2ecf20Sopenharmony_ci		 */
8898c2ecf20Sopenharmony_ci		if (start > key.offset && end >= extent_end) {
8908c2ecf20Sopenharmony_ci			BUG_ON(del_nr > 0);
8918c2ecf20Sopenharmony_ci			if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
8928c2ecf20Sopenharmony_ci				ret = -EOPNOTSUPP;
8938c2ecf20Sopenharmony_ci				break;
8948c2ecf20Sopenharmony_ci			}
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci			btrfs_set_file_extent_num_bytes(leaf, fi,
8978c2ecf20Sopenharmony_ci							start - key.offset);
8988c2ecf20Sopenharmony_ci			btrfs_mark_buffer_dirty(leaf);
8998c2ecf20Sopenharmony_ci			if (update_refs && disk_bytenr > 0)
9008c2ecf20Sopenharmony_ci				inode_sub_bytes(vfs_inode, extent_end - start);
9018c2ecf20Sopenharmony_ci			if (end == extent_end)
9028c2ecf20Sopenharmony_ci				break;
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_ci			path->slots[0]++;
9058c2ecf20Sopenharmony_ci			goto next_slot;
9068c2ecf20Sopenharmony_ci		}
9078c2ecf20Sopenharmony_ci
9088c2ecf20Sopenharmony_ci		/*
9098c2ecf20Sopenharmony_ci		 *  | ---- range to drop ----- |
9108c2ecf20Sopenharmony_ci		 *    | ------ extent ------ |
9118c2ecf20Sopenharmony_ci		 */
9128c2ecf20Sopenharmony_ci		if (start <= key.offset && end >= extent_end) {
9138c2ecf20Sopenharmony_cidelete_extent_item:
9148c2ecf20Sopenharmony_ci			if (del_nr == 0) {
9158c2ecf20Sopenharmony_ci				del_slot = path->slots[0];
9168c2ecf20Sopenharmony_ci				del_nr = 1;
9178c2ecf20Sopenharmony_ci			} else {
9188c2ecf20Sopenharmony_ci				BUG_ON(del_slot + del_nr != path->slots[0]);
9198c2ecf20Sopenharmony_ci				del_nr++;
9208c2ecf20Sopenharmony_ci			}
9218c2ecf20Sopenharmony_ci
9228c2ecf20Sopenharmony_ci			if (update_refs &&
9238c2ecf20Sopenharmony_ci			    extent_type == BTRFS_FILE_EXTENT_INLINE) {
9248c2ecf20Sopenharmony_ci				inode_sub_bytes(vfs_inode,
9258c2ecf20Sopenharmony_ci						extent_end - key.offset);
9268c2ecf20Sopenharmony_ci				extent_end = ALIGN(extent_end,
9278c2ecf20Sopenharmony_ci						   fs_info->sectorsize);
9288c2ecf20Sopenharmony_ci			} else if (update_refs && disk_bytenr > 0) {
9298c2ecf20Sopenharmony_ci				btrfs_init_generic_ref(&ref,
9308c2ecf20Sopenharmony_ci						BTRFS_DROP_DELAYED_REF,
9318c2ecf20Sopenharmony_ci						disk_bytenr, num_bytes, 0);
9328c2ecf20Sopenharmony_ci				btrfs_init_data_ref(&ref,
9338c2ecf20Sopenharmony_ci						root->root_key.objectid,
9348c2ecf20Sopenharmony_ci						key.objectid,
9358c2ecf20Sopenharmony_ci						key.offset - extent_offset);
9368c2ecf20Sopenharmony_ci				ret = btrfs_free_extent(trans, &ref);
9378c2ecf20Sopenharmony_ci				BUG_ON(ret); /* -ENOMEM */
9388c2ecf20Sopenharmony_ci				inode_sub_bytes(vfs_inode,
9398c2ecf20Sopenharmony_ci						extent_end - key.offset);
9408c2ecf20Sopenharmony_ci			}
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_ci			if (end == extent_end)
9438c2ecf20Sopenharmony_ci				break;
9448c2ecf20Sopenharmony_ci
9458c2ecf20Sopenharmony_ci			if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
9468c2ecf20Sopenharmony_ci				path->slots[0]++;
9478c2ecf20Sopenharmony_ci				goto next_slot;
9488c2ecf20Sopenharmony_ci			}
9498c2ecf20Sopenharmony_ci
9508c2ecf20Sopenharmony_ci			ret = btrfs_del_items(trans, root, path, del_slot,
9518c2ecf20Sopenharmony_ci					      del_nr);
9528c2ecf20Sopenharmony_ci			if (ret) {
9538c2ecf20Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
9548c2ecf20Sopenharmony_ci				break;
9558c2ecf20Sopenharmony_ci			}
9568c2ecf20Sopenharmony_ci
9578c2ecf20Sopenharmony_ci			del_nr = 0;
9588c2ecf20Sopenharmony_ci			del_slot = 0;
9598c2ecf20Sopenharmony_ci
9608c2ecf20Sopenharmony_ci			btrfs_release_path(path);
9618c2ecf20Sopenharmony_ci			continue;
9628c2ecf20Sopenharmony_ci		}
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ci		BUG();
9658c2ecf20Sopenharmony_ci	}
9668c2ecf20Sopenharmony_ci
9678c2ecf20Sopenharmony_ci	if (!ret && del_nr > 0) {
9688c2ecf20Sopenharmony_ci		/*
9698c2ecf20Sopenharmony_ci		 * Set path->slots[0] to first slot, so that after the delete
9708c2ecf20Sopenharmony_ci		 * if items are move off from our leaf to its immediate left or
9718c2ecf20Sopenharmony_ci		 * right neighbor leafs, we end up with a correct and adjusted
9728c2ecf20Sopenharmony_ci		 * path->slots[0] for our insertion (if replace_extent != 0).
9738c2ecf20Sopenharmony_ci		 */
9748c2ecf20Sopenharmony_ci		path->slots[0] = del_slot;
9758c2ecf20Sopenharmony_ci		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9768c2ecf20Sopenharmony_ci		if (ret)
9778c2ecf20Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
9788c2ecf20Sopenharmony_ci	}
9798c2ecf20Sopenharmony_ci
9808c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
9818c2ecf20Sopenharmony_ci	/*
9828c2ecf20Sopenharmony_ci	 * If btrfs_del_items() was called, it might have deleted a leaf, in
9838c2ecf20Sopenharmony_ci	 * which case it unlocked our path, so check path->locks[0] matches a
9848c2ecf20Sopenharmony_ci	 * write lock.
9858c2ecf20Sopenharmony_ci	 */
9868c2ecf20Sopenharmony_ci	if (!ret && replace_extent && leafs_visited == 1 &&
9878c2ecf20Sopenharmony_ci	    (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
9888c2ecf20Sopenharmony_ci	     path->locks[0] == BTRFS_WRITE_LOCK) &&
9898c2ecf20Sopenharmony_ci	    btrfs_leaf_free_space(leaf) >=
9908c2ecf20Sopenharmony_ci	    sizeof(struct btrfs_item) + extent_item_size) {
9918c2ecf20Sopenharmony_ci
9928c2ecf20Sopenharmony_ci		key.objectid = ino;
9938c2ecf20Sopenharmony_ci		key.type = BTRFS_EXTENT_DATA_KEY;
9948c2ecf20Sopenharmony_ci		key.offset = start;
9958c2ecf20Sopenharmony_ci		if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) {
9968c2ecf20Sopenharmony_ci			struct btrfs_key slot_key;
9978c2ecf20Sopenharmony_ci
9988c2ecf20Sopenharmony_ci			btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]);
9998c2ecf20Sopenharmony_ci			if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
10008c2ecf20Sopenharmony_ci				path->slots[0]++;
10018c2ecf20Sopenharmony_ci		}
10028c2ecf20Sopenharmony_ci		setup_items_for_insert(root, path, &key, &extent_item_size, 1);
10038c2ecf20Sopenharmony_ci		*key_inserted = 1;
10048c2ecf20Sopenharmony_ci	}
10058c2ecf20Sopenharmony_ci
10068c2ecf20Sopenharmony_ci	if (!replace_extent || !(*key_inserted))
10078c2ecf20Sopenharmony_ci		btrfs_release_path(path);
10088c2ecf20Sopenharmony_ci	if (drop_end)
10098c2ecf20Sopenharmony_ci		*drop_end = found ? min(end, last_end) : end;
10108c2ecf20Sopenharmony_ci	return ret;
10118c2ecf20Sopenharmony_ci}
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_ciint btrfs_drop_extents(struct btrfs_trans_handle *trans,
10148c2ecf20Sopenharmony_ci		       struct btrfs_root *root, struct inode *inode, u64 start,
10158c2ecf20Sopenharmony_ci		       u64 end, int drop_cache)
10168c2ecf20Sopenharmony_ci{
10178c2ecf20Sopenharmony_ci	struct btrfs_path *path;
10188c2ecf20Sopenharmony_ci	int ret;
10198c2ecf20Sopenharmony_ci
10208c2ecf20Sopenharmony_ci	path = btrfs_alloc_path();
10218c2ecf20Sopenharmony_ci	if (!path)
10228c2ecf20Sopenharmony_ci		return -ENOMEM;
10238c2ecf20Sopenharmony_ci	ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, start,
10248c2ecf20Sopenharmony_ci				   end, NULL, drop_cache, 0, 0, NULL);
10258c2ecf20Sopenharmony_ci	btrfs_free_path(path);
10268c2ecf20Sopenharmony_ci	return ret;
10278c2ecf20Sopenharmony_ci}
10288c2ecf20Sopenharmony_ci
10298c2ecf20Sopenharmony_cistatic int extent_mergeable(struct extent_buffer *leaf, int slot,
10308c2ecf20Sopenharmony_ci			    u64 objectid, u64 bytenr, u64 orig_offset,
10318c2ecf20Sopenharmony_ci			    u64 *start, u64 *end)
10328c2ecf20Sopenharmony_ci{
10338c2ecf20Sopenharmony_ci	struct btrfs_file_extent_item *fi;
10348c2ecf20Sopenharmony_ci	struct btrfs_key key;
10358c2ecf20Sopenharmony_ci	u64 extent_end;
10368c2ecf20Sopenharmony_ci
10378c2ecf20Sopenharmony_ci	if (slot < 0 || slot >= btrfs_header_nritems(leaf))
10388c2ecf20Sopenharmony_ci		return 0;
10398c2ecf20Sopenharmony_ci
10408c2ecf20Sopenharmony_ci	btrfs_item_key_to_cpu(leaf, &key, slot);
10418c2ecf20Sopenharmony_ci	if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10428c2ecf20Sopenharmony_ci		return 0;
10438c2ecf20Sopenharmony_ci
10448c2ecf20Sopenharmony_ci	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10458c2ecf20Sopenharmony_ci	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
10468c2ecf20Sopenharmony_ci	    btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
10478c2ecf20Sopenharmony_ci	    btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
10488c2ecf20Sopenharmony_ci	    btrfs_file_extent_compression(leaf, fi) ||
10498c2ecf20Sopenharmony_ci	    btrfs_file_extent_encryption(leaf, fi) ||
10508c2ecf20Sopenharmony_ci	    btrfs_file_extent_other_encoding(leaf, fi))
10518c2ecf20Sopenharmony_ci		return 0;
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
10548c2ecf20Sopenharmony_ci	if ((*start && *start != key.offset) || (*end && *end != extent_end))
10558c2ecf20Sopenharmony_ci		return 0;
10568c2ecf20Sopenharmony_ci
10578c2ecf20Sopenharmony_ci	*start = key.offset;
10588c2ecf20Sopenharmony_ci	*end = extent_end;
10598c2ecf20Sopenharmony_ci	return 1;
10608c2ecf20Sopenharmony_ci}
10618c2ecf20Sopenharmony_ci
10628c2ecf20Sopenharmony_ci/*
10638c2ecf20Sopenharmony_ci * Mark extent in the range start - end as written.
10648c2ecf20Sopenharmony_ci *
10658c2ecf20Sopenharmony_ci * This changes extent type from 'pre-allocated' to 'regular'. If only
10668c2ecf20Sopenharmony_ci * part of extent is marked as written, the extent will be split into
10678c2ecf20Sopenharmony_ci * two or three.
10688c2ecf20Sopenharmony_ci */
10698c2ecf20Sopenharmony_ciint btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
10708c2ecf20Sopenharmony_ci			      struct btrfs_inode *inode, u64 start, u64 end)
10718c2ecf20Sopenharmony_ci{
10728c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
10738c2ecf20Sopenharmony_ci	struct btrfs_root *root = inode->root;
10748c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
10758c2ecf20Sopenharmony_ci	struct btrfs_path *path;
10768c2ecf20Sopenharmony_ci	struct btrfs_file_extent_item *fi;
10778c2ecf20Sopenharmony_ci	struct btrfs_ref ref = { 0 };
10788c2ecf20Sopenharmony_ci	struct btrfs_key key;
10798c2ecf20Sopenharmony_ci	struct btrfs_key new_key;
10808c2ecf20Sopenharmony_ci	u64 bytenr;
10818c2ecf20Sopenharmony_ci	u64 num_bytes;
10828c2ecf20Sopenharmony_ci	u64 extent_end;
10838c2ecf20Sopenharmony_ci	u64 orig_offset;
10848c2ecf20Sopenharmony_ci	u64 other_start;
10858c2ecf20Sopenharmony_ci	u64 other_end;
10868c2ecf20Sopenharmony_ci	u64 split;
10878c2ecf20Sopenharmony_ci	int del_nr = 0;
10888c2ecf20Sopenharmony_ci	int del_slot = 0;
10898c2ecf20Sopenharmony_ci	int recow;
10908c2ecf20Sopenharmony_ci	int ret = 0;
10918c2ecf20Sopenharmony_ci	u64 ino = btrfs_ino(inode);
10928c2ecf20Sopenharmony_ci
10938c2ecf20Sopenharmony_ci	path = btrfs_alloc_path();
10948c2ecf20Sopenharmony_ci	if (!path)
10958c2ecf20Sopenharmony_ci		return -ENOMEM;
10968c2ecf20Sopenharmony_ciagain:
10978c2ecf20Sopenharmony_ci	recow = 0;
10988c2ecf20Sopenharmony_ci	split = start;
10998c2ecf20Sopenharmony_ci	key.objectid = ino;
11008c2ecf20Sopenharmony_ci	key.type = BTRFS_EXTENT_DATA_KEY;
11018c2ecf20Sopenharmony_ci	key.offset = split;
11028c2ecf20Sopenharmony_ci
11038c2ecf20Sopenharmony_ci	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
11048c2ecf20Sopenharmony_ci	if (ret < 0)
11058c2ecf20Sopenharmony_ci		goto out;
11068c2ecf20Sopenharmony_ci	if (ret > 0 && path->slots[0] > 0)
11078c2ecf20Sopenharmony_ci		path->slots[0]--;
11088c2ecf20Sopenharmony_ci
11098c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
11108c2ecf20Sopenharmony_ci	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
11118c2ecf20Sopenharmony_ci	if (key.objectid != ino ||
11128c2ecf20Sopenharmony_ci	    key.type != BTRFS_EXTENT_DATA_KEY) {
11138c2ecf20Sopenharmony_ci		ret = -EINVAL;
11148c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
11158c2ecf20Sopenharmony_ci		goto out;
11168c2ecf20Sopenharmony_ci	}
11178c2ecf20Sopenharmony_ci	fi = btrfs_item_ptr(leaf, path->slots[0],
11188c2ecf20Sopenharmony_ci			    struct btrfs_file_extent_item);
11198c2ecf20Sopenharmony_ci	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC) {
11208c2ecf20Sopenharmony_ci		ret = -EINVAL;
11218c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
11228c2ecf20Sopenharmony_ci		goto out;
11238c2ecf20Sopenharmony_ci	}
11248c2ecf20Sopenharmony_ci	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
11258c2ecf20Sopenharmony_ci	if (key.offset > start || extent_end < end) {
11268c2ecf20Sopenharmony_ci		ret = -EINVAL;
11278c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
11288c2ecf20Sopenharmony_ci		goto out;
11298c2ecf20Sopenharmony_ci	}
11308c2ecf20Sopenharmony_ci
11318c2ecf20Sopenharmony_ci	bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
11328c2ecf20Sopenharmony_ci	num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
11338c2ecf20Sopenharmony_ci	orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
11348c2ecf20Sopenharmony_ci	memcpy(&new_key, &key, sizeof(new_key));
11358c2ecf20Sopenharmony_ci
11368c2ecf20Sopenharmony_ci	if (start == key.offset && end < extent_end) {
11378c2ecf20Sopenharmony_ci		other_start = 0;
11388c2ecf20Sopenharmony_ci		other_end = start;
11398c2ecf20Sopenharmony_ci		if (extent_mergeable(leaf, path->slots[0] - 1,
11408c2ecf20Sopenharmony_ci				     ino, bytenr, orig_offset,
11418c2ecf20Sopenharmony_ci				     &other_start, &other_end)) {
11428c2ecf20Sopenharmony_ci			new_key.offset = end;
11438c2ecf20Sopenharmony_ci			btrfs_set_item_key_safe(fs_info, path, &new_key);
11448c2ecf20Sopenharmony_ci			fi = btrfs_item_ptr(leaf, path->slots[0],
11458c2ecf20Sopenharmony_ci					    struct btrfs_file_extent_item);
11468c2ecf20Sopenharmony_ci			btrfs_set_file_extent_generation(leaf, fi,
11478c2ecf20Sopenharmony_ci							 trans->transid);
11488c2ecf20Sopenharmony_ci			btrfs_set_file_extent_num_bytes(leaf, fi,
11498c2ecf20Sopenharmony_ci							extent_end - end);
11508c2ecf20Sopenharmony_ci			btrfs_set_file_extent_offset(leaf, fi,
11518c2ecf20Sopenharmony_ci						     end - orig_offset);
11528c2ecf20Sopenharmony_ci			fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
11538c2ecf20Sopenharmony_ci					    struct btrfs_file_extent_item);
11548c2ecf20Sopenharmony_ci			btrfs_set_file_extent_generation(leaf, fi,
11558c2ecf20Sopenharmony_ci							 trans->transid);
11568c2ecf20Sopenharmony_ci			btrfs_set_file_extent_num_bytes(leaf, fi,
11578c2ecf20Sopenharmony_ci							end - other_start);
11588c2ecf20Sopenharmony_ci			btrfs_mark_buffer_dirty(leaf);
11598c2ecf20Sopenharmony_ci			goto out;
11608c2ecf20Sopenharmony_ci		}
11618c2ecf20Sopenharmony_ci	}
11628c2ecf20Sopenharmony_ci
11638c2ecf20Sopenharmony_ci	if (start > key.offset && end == extent_end) {
11648c2ecf20Sopenharmony_ci		other_start = end;
11658c2ecf20Sopenharmony_ci		other_end = 0;
11668c2ecf20Sopenharmony_ci		if (extent_mergeable(leaf, path->slots[0] + 1,
11678c2ecf20Sopenharmony_ci				     ino, bytenr, orig_offset,
11688c2ecf20Sopenharmony_ci				     &other_start, &other_end)) {
11698c2ecf20Sopenharmony_ci			fi = btrfs_item_ptr(leaf, path->slots[0],
11708c2ecf20Sopenharmony_ci					    struct btrfs_file_extent_item);
11718c2ecf20Sopenharmony_ci			btrfs_set_file_extent_num_bytes(leaf, fi,
11728c2ecf20Sopenharmony_ci							start - key.offset);
11738c2ecf20Sopenharmony_ci			btrfs_set_file_extent_generation(leaf, fi,
11748c2ecf20Sopenharmony_ci							 trans->transid);
11758c2ecf20Sopenharmony_ci			path->slots[0]++;
11768c2ecf20Sopenharmony_ci			new_key.offset = start;
11778c2ecf20Sopenharmony_ci			btrfs_set_item_key_safe(fs_info, path, &new_key);
11788c2ecf20Sopenharmony_ci
11798c2ecf20Sopenharmony_ci			fi = btrfs_item_ptr(leaf, path->slots[0],
11808c2ecf20Sopenharmony_ci					    struct btrfs_file_extent_item);
11818c2ecf20Sopenharmony_ci			btrfs_set_file_extent_generation(leaf, fi,
11828c2ecf20Sopenharmony_ci							 trans->transid);
11838c2ecf20Sopenharmony_ci			btrfs_set_file_extent_num_bytes(leaf, fi,
11848c2ecf20Sopenharmony_ci							other_end - start);
11858c2ecf20Sopenharmony_ci			btrfs_set_file_extent_offset(leaf, fi,
11868c2ecf20Sopenharmony_ci						     start - orig_offset);
11878c2ecf20Sopenharmony_ci			btrfs_mark_buffer_dirty(leaf);
11888c2ecf20Sopenharmony_ci			goto out;
11898c2ecf20Sopenharmony_ci		}
11908c2ecf20Sopenharmony_ci	}
11918c2ecf20Sopenharmony_ci
11928c2ecf20Sopenharmony_ci	while (start > key.offset || end < extent_end) {
11938c2ecf20Sopenharmony_ci		if (key.offset == start)
11948c2ecf20Sopenharmony_ci			split = end;
11958c2ecf20Sopenharmony_ci
11968c2ecf20Sopenharmony_ci		new_key.offset = split;
11978c2ecf20Sopenharmony_ci		ret = btrfs_duplicate_item(trans, root, path, &new_key);
11988c2ecf20Sopenharmony_ci		if (ret == -EAGAIN) {
11998c2ecf20Sopenharmony_ci			btrfs_release_path(path);
12008c2ecf20Sopenharmony_ci			goto again;
12018c2ecf20Sopenharmony_ci		}
12028c2ecf20Sopenharmony_ci		if (ret < 0) {
12038c2ecf20Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
12048c2ecf20Sopenharmony_ci			goto out;
12058c2ecf20Sopenharmony_ci		}
12068c2ecf20Sopenharmony_ci
12078c2ecf20Sopenharmony_ci		leaf = path->nodes[0];
12088c2ecf20Sopenharmony_ci		fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
12098c2ecf20Sopenharmony_ci				    struct btrfs_file_extent_item);
12108c2ecf20Sopenharmony_ci		btrfs_set_file_extent_generation(leaf, fi, trans->transid);
12118c2ecf20Sopenharmony_ci		btrfs_set_file_extent_num_bytes(leaf, fi,
12128c2ecf20Sopenharmony_ci						split - key.offset);
12138c2ecf20Sopenharmony_ci
12148c2ecf20Sopenharmony_ci		fi = btrfs_item_ptr(leaf, path->slots[0],
12158c2ecf20Sopenharmony_ci				    struct btrfs_file_extent_item);
12168c2ecf20Sopenharmony_ci
12178c2ecf20Sopenharmony_ci		btrfs_set_file_extent_generation(leaf, fi, trans->transid);
12188c2ecf20Sopenharmony_ci		btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
12198c2ecf20Sopenharmony_ci		btrfs_set_file_extent_num_bytes(leaf, fi,
12208c2ecf20Sopenharmony_ci						extent_end - split);
12218c2ecf20Sopenharmony_ci		btrfs_mark_buffer_dirty(leaf);
12228c2ecf20Sopenharmony_ci
12238c2ecf20Sopenharmony_ci		btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr,
12248c2ecf20Sopenharmony_ci				       num_bytes, 0);
12258c2ecf20Sopenharmony_ci		btrfs_init_data_ref(&ref, root->root_key.objectid, ino,
12268c2ecf20Sopenharmony_ci				    orig_offset);
12278c2ecf20Sopenharmony_ci		ret = btrfs_inc_extent_ref(trans, &ref);
12288c2ecf20Sopenharmony_ci		if (ret) {
12298c2ecf20Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
12308c2ecf20Sopenharmony_ci			goto out;
12318c2ecf20Sopenharmony_ci		}
12328c2ecf20Sopenharmony_ci
12338c2ecf20Sopenharmony_ci		if (split == start) {
12348c2ecf20Sopenharmony_ci			key.offset = start;
12358c2ecf20Sopenharmony_ci		} else {
12368c2ecf20Sopenharmony_ci			if (start != key.offset) {
12378c2ecf20Sopenharmony_ci				ret = -EINVAL;
12388c2ecf20Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
12398c2ecf20Sopenharmony_ci				goto out;
12408c2ecf20Sopenharmony_ci			}
12418c2ecf20Sopenharmony_ci			path->slots[0]--;
12428c2ecf20Sopenharmony_ci			extent_end = end;
12438c2ecf20Sopenharmony_ci		}
12448c2ecf20Sopenharmony_ci		recow = 1;
12458c2ecf20Sopenharmony_ci	}
12468c2ecf20Sopenharmony_ci
12478c2ecf20Sopenharmony_ci	other_start = end;
12488c2ecf20Sopenharmony_ci	other_end = 0;
12498c2ecf20Sopenharmony_ci	btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
12508c2ecf20Sopenharmony_ci			       num_bytes, 0);
12518c2ecf20Sopenharmony_ci	btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset);
12528c2ecf20Sopenharmony_ci	if (extent_mergeable(leaf, path->slots[0] + 1,
12538c2ecf20Sopenharmony_ci			     ino, bytenr, orig_offset,
12548c2ecf20Sopenharmony_ci			     &other_start, &other_end)) {
12558c2ecf20Sopenharmony_ci		if (recow) {
12568c2ecf20Sopenharmony_ci			btrfs_release_path(path);
12578c2ecf20Sopenharmony_ci			goto again;
12588c2ecf20Sopenharmony_ci		}
12598c2ecf20Sopenharmony_ci		extent_end = other_end;
12608c2ecf20Sopenharmony_ci		del_slot = path->slots[0] + 1;
12618c2ecf20Sopenharmony_ci		del_nr++;
12628c2ecf20Sopenharmony_ci		ret = btrfs_free_extent(trans, &ref);
12638c2ecf20Sopenharmony_ci		if (ret) {
12648c2ecf20Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
12658c2ecf20Sopenharmony_ci			goto out;
12668c2ecf20Sopenharmony_ci		}
12678c2ecf20Sopenharmony_ci	}
12688c2ecf20Sopenharmony_ci	other_start = 0;
12698c2ecf20Sopenharmony_ci	other_end = start;
12708c2ecf20Sopenharmony_ci	if (extent_mergeable(leaf, path->slots[0] - 1,
12718c2ecf20Sopenharmony_ci			     ino, bytenr, orig_offset,
12728c2ecf20Sopenharmony_ci			     &other_start, &other_end)) {
12738c2ecf20Sopenharmony_ci		if (recow) {
12748c2ecf20Sopenharmony_ci			btrfs_release_path(path);
12758c2ecf20Sopenharmony_ci			goto again;
12768c2ecf20Sopenharmony_ci		}
12778c2ecf20Sopenharmony_ci		key.offset = other_start;
12788c2ecf20Sopenharmony_ci		del_slot = path->slots[0];
12798c2ecf20Sopenharmony_ci		del_nr++;
12808c2ecf20Sopenharmony_ci		ret = btrfs_free_extent(trans, &ref);
12818c2ecf20Sopenharmony_ci		if (ret) {
12828c2ecf20Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
12838c2ecf20Sopenharmony_ci			goto out;
12848c2ecf20Sopenharmony_ci		}
12858c2ecf20Sopenharmony_ci	}
12868c2ecf20Sopenharmony_ci	if (del_nr == 0) {
12878c2ecf20Sopenharmony_ci		fi = btrfs_item_ptr(leaf, path->slots[0],
12888c2ecf20Sopenharmony_ci			   struct btrfs_file_extent_item);
12898c2ecf20Sopenharmony_ci		btrfs_set_file_extent_type(leaf, fi,
12908c2ecf20Sopenharmony_ci					   BTRFS_FILE_EXTENT_REG);
12918c2ecf20Sopenharmony_ci		btrfs_set_file_extent_generation(leaf, fi, trans->transid);
12928c2ecf20Sopenharmony_ci		btrfs_mark_buffer_dirty(leaf);
12938c2ecf20Sopenharmony_ci	} else {
12948c2ecf20Sopenharmony_ci		fi = btrfs_item_ptr(leaf, del_slot - 1,
12958c2ecf20Sopenharmony_ci			   struct btrfs_file_extent_item);
12968c2ecf20Sopenharmony_ci		btrfs_set_file_extent_type(leaf, fi,
12978c2ecf20Sopenharmony_ci					   BTRFS_FILE_EXTENT_REG);
12988c2ecf20Sopenharmony_ci		btrfs_set_file_extent_generation(leaf, fi, trans->transid);
12998c2ecf20Sopenharmony_ci		btrfs_set_file_extent_num_bytes(leaf, fi,
13008c2ecf20Sopenharmony_ci						extent_end - key.offset);
13018c2ecf20Sopenharmony_ci		btrfs_mark_buffer_dirty(leaf);
13028c2ecf20Sopenharmony_ci
13038c2ecf20Sopenharmony_ci		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
13048c2ecf20Sopenharmony_ci		if (ret < 0) {
13058c2ecf20Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
13068c2ecf20Sopenharmony_ci			goto out;
13078c2ecf20Sopenharmony_ci		}
13088c2ecf20Sopenharmony_ci	}
13098c2ecf20Sopenharmony_ciout:
13108c2ecf20Sopenharmony_ci	btrfs_free_path(path);
13118c2ecf20Sopenharmony_ci	return ret;
13128c2ecf20Sopenharmony_ci}
13138c2ecf20Sopenharmony_ci
13148c2ecf20Sopenharmony_ci/*
13158c2ecf20Sopenharmony_ci * on error we return an unlocked page and the error value
13168c2ecf20Sopenharmony_ci * on success we return a locked page and 0
13178c2ecf20Sopenharmony_ci */
13188c2ecf20Sopenharmony_cistatic int prepare_uptodate_page(struct inode *inode,
13198c2ecf20Sopenharmony_ci				 struct page *page, u64 pos,
13208c2ecf20Sopenharmony_ci				 bool force_uptodate)
13218c2ecf20Sopenharmony_ci{
13228c2ecf20Sopenharmony_ci	int ret = 0;
13238c2ecf20Sopenharmony_ci
13248c2ecf20Sopenharmony_ci	if (((pos & (PAGE_SIZE - 1)) || force_uptodate) &&
13258c2ecf20Sopenharmony_ci	    !PageUptodate(page)) {
13268c2ecf20Sopenharmony_ci		ret = btrfs_readpage(NULL, page);
13278c2ecf20Sopenharmony_ci		if (ret)
13288c2ecf20Sopenharmony_ci			return ret;
13298c2ecf20Sopenharmony_ci		lock_page(page);
13308c2ecf20Sopenharmony_ci		if (!PageUptodate(page)) {
13318c2ecf20Sopenharmony_ci			unlock_page(page);
13328c2ecf20Sopenharmony_ci			return -EIO;
13338c2ecf20Sopenharmony_ci		}
13348c2ecf20Sopenharmony_ci		if (page->mapping != inode->i_mapping) {
13358c2ecf20Sopenharmony_ci			unlock_page(page);
13368c2ecf20Sopenharmony_ci			return -EAGAIN;
13378c2ecf20Sopenharmony_ci		}
13388c2ecf20Sopenharmony_ci	}
13398c2ecf20Sopenharmony_ci	return 0;
13408c2ecf20Sopenharmony_ci}
13418c2ecf20Sopenharmony_ci
13428c2ecf20Sopenharmony_ci/*
13438c2ecf20Sopenharmony_ci * this just gets pages into the page cache and locks them down.
13448c2ecf20Sopenharmony_ci */
13458c2ecf20Sopenharmony_cistatic noinline int prepare_pages(struct inode *inode, struct page **pages,
13468c2ecf20Sopenharmony_ci				  size_t num_pages, loff_t pos,
13478c2ecf20Sopenharmony_ci				  size_t write_bytes, bool force_uptodate)
13488c2ecf20Sopenharmony_ci{
13498c2ecf20Sopenharmony_ci	int i;
13508c2ecf20Sopenharmony_ci	unsigned long index = pos >> PAGE_SHIFT;
13518c2ecf20Sopenharmony_ci	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
13528c2ecf20Sopenharmony_ci	int err = 0;
13538c2ecf20Sopenharmony_ci	int faili;
13548c2ecf20Sopenharmony_ci
13558c2ecf20Sopenharmony_ci	for (i = 0; i < num_pages; i++) {
13568c2ecf20Sopenharmony_ciagain:
13578c2ecf20Sopenharmony_ci		pages[i] = find_or_create_page(inode->i_mapping, index + i,
13588c2ecf20Sopenharmony_ci					       mask | __GFP_WRITE);
13598c2ecf20Sopenharmony_ci		if (!pages[i]) {
13608c2ecf20Sopenharmony_ci			faili = i - 1;
13618c2ecf20Sopenharmony_ci			err = -ENOMEM;
13628c2ecf20Sopenharmony_ci			goto fail;
13638c2ecf20Sopenharmony_ci		}
13648c2ecf20Sopenharmony_ci
13658c2ecf20Sopenharmony_ci		if (i == 0)
13668c2ecf20Sopenharmony_ci			err = prepare_uptodate_page(inode, pages[i], pos,
13678c2ecf20Sopenharmony_ci						    force_uptodate);
13688c2ecf20Sopenharmony_ci		if (!err && i == num_pages - 1)
13698c2ecf20Sopenharmony_ci			err = prepare_uptodate_page(inode, pages[i],
13708c2ecf20Sopenharmony_ci						    pos + write_bytes, false);
13718c2ecf20Sopenharmony_ci		if (err) {
13728c2ecf20Sopenharmony_ci			put_page(pages[i]);
13738c2ecf20Sopenharmony_ci			if (err == -EAGAIN) {
13748c2ecf20Sopenharmony_ci				err = 0;
13758c2ecf20Sopenharmony_ci				goto again;
13768c2ecf20Sopenharmony_ci			}
13778c2ecf20Sopenharmony_ci			faili = i - 1;
13788c2ecf20Sopenharmony_ci			goto fail;
13798c2ecf20Sopenharmony_ci		}
13808c2ecf20Sopenharmony_ci		wait_on_page_writeback(pages[i]);
13818c2ecf20Sopenharmony_ci	}
13828c2ecf20Sopenharmony_ci
13838c2ecf20Sopenharmony_ci	return 0;
13848c2ecf20Sopenharmony_cifail:
13858c2ecf20Sopenharmony_ci	while (faili >= 0) {
13868c2ecf20Sopenharmony_ci		unlock_page(pages[faili]);
13878c2ecf20Sopenharmony_ci		put_page(pages[faili]);
13888c2ecf20Sopenharmony_ci		faili--;
13898c2ecf20Sopenharmony_ci	}
13908c2ecf20Sopenharmony_ci	return err;
13918c2ecf20Sopenharmony_ci
13928c2ecf20Sopenharmony_ci}
13938c2ecf20Sopenharmony_ci
13948c2ecf20Sopenharmony_ci/*
13958c2ecf20Sopenharmony_ci * This function locks the extent and properly waits for data=ordered extents
13968c2ecf20Sopenharmony_ci * to finish before allowing the pages to be modified if need.
13978c2ecf20Sopenharmony_ci *
13988c2ecf20Sopenharmony_ci * The return value:
13998c2ecf20Sopenharmony_ci * 1 - the extent is locked
14008c2ecf20Sopenharmony_ci * 0 - the extent is not locked, and everything is OK
14018c2ecf20Sopenharmony_ci * -EAGAIN - need re-prepare the pages
14028c2ecf20Sopenharmony_ci * the other < 0 number - Something wrong happens
14038c2ecf20Sopenharmony_ci */
14048c2ecf20Sopenharmony_cistatic noinline int
14058c2ecf20Sopenharmony_cilock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
14068c2ecf20Sopenharmony_ci				size_t num_pages, loff_t pos,
14078c2ecf20Sopenharmony_ci				size_t write_bytes,
14088c2ecf20Sopenharmony_ci				u64 *lockstart, u64 *lockend,
14098c2ecf20Sopenharmony_ci				struct extent_state **cached_state)
14108c2ecf20Sopenharmony_ci{
14118c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = inode->root->fs_info;
14128c2ecf20Sopenharmony_ci	u64 start_pos;
14138c2ecf20Sopenharmony_ci	u64 last_pos;
14148c2ecf20Sopenharmony_ci	int i;
14158c2ecf20Sopenharmony_ci	int ret = 0;
14168c2ecf20Sopenharmony_ci
14178c2ecf20Sopenharmony_ci	start_pos = round_down(pos, fs_info->sectorsize);
14188c2ecf20Sopenharmony_ci	last_pos = round_up(pos + write_bytes, fs_info->sectorsize) - 1;
14198c2ecf20Sopenharmony_ci
14208c2ecf20Sopenharmony_ci	if (start_pos < inode->vfs_inode.i_size) {
14218c2ecf20Sopenharmony_ci		struct btrfs_ordered_extent *ordered;
14228c2ecf20Sopenharmony_ci
14238c2ecf20Sopenharmony_ci		lock_extent_bits(&inode->io_tree, start_pos, last_pos,
14248c2ecf20Sopenharmony_ci				cached_state);
14258c2ecf20Sopenharmony_ci		ordered = btrfs_lookup_ordered_range(inode, start_pos,
14268c2ecf20Sopenharmony_ci						     last_pos - start_pos + 1);
14278c2ecf20Sopenharmony_ci		if (ordered &&
14288c2ecf20Sopenharmony_ci		    ordered->file_offset + ordered->num_bytes > start_pos &&
14298c2ecf20Sopenharmony_ci		    ordered->file_offset <= last_pos) {
14308c2ecf20Sopenharmony_ci			unlock_extent_cached(&inode->io_tree, start_pos,
14318c2ecf20Sopenharmony_ci					last_pos, cached_state);
14328c2ecf20Sopenharmony_ci			for (i = 0; i < num_pages; i++) {
14338c2ecf20Sopenharmony_ci				unlock_page(pages[i]);
14348c2ecf20Sopenharmony_ci				put_page(pages[i]);
14358c2ecf20Sopenharmony_ci			}
14368c2ecf20Sopenharmony_ci			btrfs_start_ordered_extent(ordered, 1);
14378c2ecf20Sopenharmony_ci			btrfs_put_ordered_extent(ordered);
14388c2ecf20Sopenharmony_ci			return -EAGAIN;
14398c2ecf20Sopenharmony_ci		}
14408c2ecf20Sopenharmony_ci		if (ordered)
14418c2ecf20Sopenharmony_ci			btrfs_put_ordered_extent(ordered);
14428c2ecf20Sopenharmony_ci
14438c2ecf20Sopenharmony_ci		*lockstart = start_pos;
14448c2ecf20Sopenharmony_ci		*lockend = last_pos;
14458c2ecf20Sopenharmony_ci		ret = 1;
14468c2ecf20Sopenharmony_ci	}
14478c2ecf20Sopenharmony_ci
14488c2ecf20Sopenharmony_ci	/*
14498c2ecf20Sopenharmony_ci	 * It's possible the pages are dirty right now, but we don't want
14508c2ecf20Sopenharmony_ci	 * to clean them yet because copy_from_user may catch a page fault
14518c2ecf20Sopenharmony_ci	 * and we might have to fall back to one page at a time.  If that
14528c2ecf20Sopenharmony_ci	 * happens, we'll unlock these pages and we'd have a window where
14538c2ecf20Sopenharmony_ci	 * reclaim could sneak in and drop the once-dirty page on the floor
14548c2ecf20Sopenharmony_ci	 * without writing it.
14558c2ecf20Sopenharmony_ci	 *
14568c2ecf20Sopenharmony_ci	 * We have the pages locked and the extent range locked, so there's
14578c2ecf20Sopenharmony_ci	 * no way someone can start IO on any dirty pages in this range.
14588c2ecf20Sopenharmony_ci	 *
14598c2ecf20Sopenharmony_ci	 * We'll call btrfs_dirty_pages() later on, and that will flip around
14608c2ecf20Sopenharmony_ci	 * delalloc bits and dirty the pages as required.
14618c2ecf20Sopenharmony_ci	 */
14628c2ecf20Sopenharmony_ci	for (i = 0; i < num_pages; i++) {
14638c2ecf20Sopenharmony_ci		set_page_extent_mapped(pages[i]);
14648c2ecf20Sopenharmony_ci		WARN_ON(!PageLocked(pages[i]));
14658c2ecf20Sopenharmony_ci	}
14668c2ecf20Sopenharmony_ci
14678c2ecf20Sopenharmony_ci	return ret;
14688c2ecf20Sopenharmony_ci}
14698c2ecf20Sopenharmony_ci
14708c2ecf20Sopenharmony_cistatic int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
14718c2ecf20Sopenharmony_ci			   size_t *write_bytes, bool nowait)
14728c2ecf20Sopenharmony_ci{
14738c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = inode->root->fs_info;
14748c2ecf20Sopenharmony_ci	struct btrfs_root *root = inode->root;
14758c2ecf20Sopenharmony_ci	u64 lockstart, lockend;
14768c2ecf20Sopenharmony_ci	u64 num_bytes;
14778c2ecf20Sopenharmony_ci	int ret;
14788c2ecf20Sopenharmony_ci
14798c2ecf20Sopenharmony_ci	if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
14808c2ecf20Sopenharmony_ci		return 0;
14818c2ecf20Sopenharmony_ci
14828c2ecf20Sopenharmony_ci	if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock))
14838c2ecf20Sopenharmony_ci		return -EAGAIN;
14848c2ecf20Sopenharmony_ci
14858c2ecf20Sopenharmony_ci	lockstart = round_down(pos, fs_info->sectorsize);
14868c2ecf20Sopenharmony_ci	lockend = round_up(pos + *write_bytes,
14878c2ecf20Sopenharmony_ci			   fs_info->sectorsize) - 1;
14888c2ecf20Sopenharmony_ci	num_bytes = lockend - lockstart + 1;
14898c2ecf20Sopenharmony_ci
14908c2ecf20Sopenharmony_ci	if (nowait) {
14918c2ecf20Sopenharmony_ci		struct btrfs_ordered_extent *ordered;
14928c2ecf20Sopenharmony_ci
14938c2ecf20Sopenharmony_ci		if (!try_lock_extent(&inode->io_tree, lockstart, lockend))
14948c2ecf20Sopenharmony_ci			return -EAGAIN;
14958c2ecf20Sopenharmony_ci
14968c2ecf20Sopenharmony_ci		ordered = btrfs_lookup_ordered_range(inode, lockstart,
14978c2ecf20Sopenharmony_ci						     num_bytes);
14988c2ecf20Sopenharmony_ci		if (ordered) {
14998c2ecf20Sopenharmony_ci			btrfs_put_ordered_extent(ordered);
15008c2ecf20Sopenharmony_ci			ret = -EAGAIN;
15018c2ecf20Sopenharmony_ci			goto out_unlock;
15028c2ecf20Sopenharmony_ci		}
15038c2ecf20Sopenharmony_ci	} else {
15048c2ecf20Sopenharmony_ci		btrfs_lock_and_flush_ordered_range(inode, lockstart,
15058c2ecf20Sopenharmony_ci						   lockend, NULL);
15068c2ecf20Sopenharmony_ci	}
15078c2ecf20Sopenharmony_ci
15088c2ecf20Sopenharmony_ci	ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
15098c2ecf20Sopenharmony_ci			NULL, NULL, NULL, false);
15108c2ecf20Sopenharmony_ci	if (ret <= 0) {
15118c2ecf20Sopenharmony_ci		ret = 0;
15128c2ecf20Sopenharmony_ci		if (!nowait)
15138c2ecf20Sopenharmony_ci			btrfs_drew_write_unlock(&root->snapshot_lock);
15148c2ecf20Sopenharmony_ci	} else {
15158c2ecf20Sopenharmony_ci		*write_bytes = min_t(size_t, *write_bytes ,
15168c2ecf20Sopenharmony_ci				     num_bytes - pos + lockstart);
15178c2ecf20Sopenharmony_ci	}
15188c2ecf20Sopenharmony_ciout_unlock:
15198c2ecf20Sopenharmony_ci	unlock_extent(&inode->io_tree, lockstart, lockend);
15208c2ecf20Sopenharmony_ci
15218c2ecf20Sopenharmony_ci	return ret;
15228c2ecf20Sopenharmony_ci}
15238c2ecf20Sopenharmony_ci
15248c2ecf20Sopenharmony_cistatic int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos,
15258c2ecf20Sopenharmony_ci			      size_t *write_bytes)
15268c2ecf20Sopenharmony_ci{
15278c2ecf20Sopenharmony_ci	return check_can_nocow(inode, pos, write_bytes, true);
15288c2ecf20Sopenharmony_ci}
15298c2ecf20Sopenharmony_ci
15308c2ecf20Sopenharmony_ci/*
15318c2ecf20Sopenharmony_ci * Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
15328c2ecf20Sopenharmony_ci *
15338c2ecf20Sopenharmony_ci * @pos:	 File offset
15348c2ecf20Sopenharmony_ci * @write_bytes: The length to write, will be updated to the nocow writeable
15358c2ecf20Sopenharmony_ci *		 range
15368c2ecf20Sopenharmony_ci *
15378c2ecf20Sopenharmony_ci * This function will flush ordered extents in the range to ensure proper
15388c2ecf20Sopenharmony_ci * nocow checks.
15398c2ecf20Sopenharmony_ci *
15408c2ecf20Sopenharmony_ci * Return:
15418c2ecf20Sopenharmony_ci * >0		and update @write_bytes if we can do nocow write
15428c2ecf20Sopenharmony_ci *  0		if we can't do nocow write
15438c2ecf20Sopenharmony_ci * -EAGAIN	if we can't get the needed lock or there are ordered extents
15448c2ecf20Sopenharmony_ci * 		for * (nowait == true) case
15458c2ecf20Sopenharmony_ci * <0		if other error happened
15468c2ecf20Sopenharmony_ci *
15478c2ecf20Sopenharmony_ci * NOTE: Callers need to release the lock by btrfs_check_nocow_unlock().
15488c2ecf20Sopenharmony_ci */
15498c2ecf20Sopenharmony_ciint btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
15508c2ecf20Sopenharmony_ci			   size_t *write_bytes)
15518c2ecf20Sopenharmony_ci{
15528c2ecf20Sopenharmony_ci	return check_can_nocow(inode, pos, write_bytes, false);
15538c2ecf20Sopenharmony_ci}
15548c2ecf20Sopenharmony_ci
15558c2ecf20Sopenharmony_civoid btrfs_check_nocow_unlock(struct btrfs_inode *inode)
15568c2ecf20Sopenharmony_ci{
15578c2ecf20Sopenharmony_ci	btrfs_drew_write_unlock(&inode->root->snapshot_lock);
15588c2ecf20Sopenharmony_ci}
15598c2ecf20Sopenharmony_ci
15608c2ecf20Sopenharmony_cistatic noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
15618c2ecf20Sopenharmony_ci					       struct iov_iter *i)
15628c2ecf20Sopenharmony_ci{
15638c2ecf20Sopenharmony_ci	struct file *file = iocb->ki_filp;
15648c2ecf20Sopenharmony_ci	loff_t pos = iocb->ki_pos;
15658c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
15668c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
15678c2ecf20Sopenharmony_ci	struct page **pages = NULL;
15688c2ecf20Sopenharmony_ci	struct extent_changeset *data_reserved = NULL;
15698c2ecf20Sopenharmony_ci	u64 release_bytes = 0;
15708c2ecf20Sopenharmony_ci	u64 lockstart;
15718c2ecf20Sopenharmony_ci	u64 lockend;
15728c2ecf20Sopenharmony_ci	size_t num_written = 0;
15738c2ecf20Sopenharmony_ci	int nrptrs;
15748c2ecf20Sopenharmony_ci	int ret = 0;
15758c2ecf20Sopenharmony_ci	bool only_release_metadata = false;
15768c2ecf20Sopenharmony_ci	bool force_page_uptodate = false;
15778c2ecf20Sopenharmony_ci
15788c2ecf20Sopenharmony_ci	nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
15798c2ecf20Sopenharmony_ci			PAGE_SIZE / (sizeof(struct page *)));
15808c2ecf20Sopenharmony_ci	nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
15818c2ecf20Sopenharmony_ci	nrptrs = max(nrptrs, 8);
15828c2ecf20Sopenharmony_ci	pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
15838c2ecf20Sopenharmony_ci	if (!pages)
15848c2ecf20Sopenharmony_ci		return -ENOMEM;
15858c2ecf20Sopenharmony_ci
15868c2ecf20Sopenharmony_ci	while (iov_iter_count(i) > 0) {
15878c2ecf20Sopenharmony_ci		struct extent_state *cached_state = NULL;
15888c2ecf20Sopenharmony_ci		size_t offset = offset_in_page(pos);
15898c2ecf20Sopenharmony_ci		size_t sector_offset;
15908c2ecf20Sopenharmony_ci		size_t write_bytes = min(iov_iter_count(i),
15918c2ecf20Sopenharmony_ci					 nrptrs * (size_t)PAGE_SIZE -
15928c2ecf20Sopenharmony_ci					 offset);
15938c2ecf20Sopenharmony_ci		size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
15948c2ecf20Sopenharmony_ci						PAGE_SIZE);
15958c2ecf20Sopenharmony_ci		size_t reserve_bytes;
15968c2ecf20Sopenharmony_ci		size_t dirty_pages;
15978c2ecf20Sopenharmony_ci		size_t copied;
15988c2ecf20Sopenharmony_ci		size_t dirty_sectors;
15998c2ecf20Sopenharmony_ci		size_t num_sectors;
16008c2ecf20Sopenharmony_ci		int extents_locked;
16018c2ecf20Sopenharmony_ci
16028c2ecf20Sopenharmony_ci		WARN_ON(num_pages > nrptrs);
16038c2ecf20Sopenharmony_ci
16048c2ecf20Sopenharmony_ci		/*
16058c2ecf20Sopenharmony_ci		 * Fault pages before locking them in prepare_pages
16068c2ecf20Sopenharmony_ci		 * to avoid recursive lock
16078c2ecf20Sopenharmony_ci		 */
16088c2ecf20Sopenharmony_ci		if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
16098c2ecf20Sopenharmony_ci			ret = -EFAULT;
16108c2ecf20Sopenharmony_ci			break;
16118c2ecf20Sopenharmony_ci		}
16128c2ecf20Sopenharmony_ci
16138c2ecf20Sopenharmony_ci		only_release_metadata = false;
16148c2ecf20Sopenharmony_ci		sector_offset = pos & (fs_info->sectorsize - 1);
16158c2ecf20Sopenharmony_ci		reserve_bytes = round_up(write_bytes + sector_offset,
16168c2ecf20Sopenharmony_ci				fs_info->sectorsize);
16178c2ecf20Sopenharmony_ci
16188c2ecf20Sopenharmony_ci		extent_changeset_release(data_reserved);
16198c2ecf20Sopenharmony_ci		ret = btrfs_check_data_free_space(BTRFS_I(inode),
16208c2ecf20Sopenharmony_ci						  &data_reserved, pos,
16218c2ecf20Sopenharmony_ci						  write_bytes);
16228c2ecf20Sopenharmony_ci		if (ret < 0) {
16238c2ecf20Sopenharmony_ci			if (btrfs_check_nocow_lock(BTRFS_I(inode), pos,
16248c2ecf20Sopenharmony_ci						   &write_bytes) > 0) {
16258c2ecf20Sopenharmony_ci				/*
16268c2ecf20Sopenharmony_ci				 * For nodata cow case, no need to reserve
16278c2ecf20Sopenharmony_ci				 * data space.
16288c2ecf20Sopenharmony_ci				 */
16298c2ecf20Sopenharmony_ci				only_release_metadata = true;
16308c2ecf20Sopenharmony_ci				/*
16318c2ecf20Sopenharmony_ci				 * our prealloc extent may be smaller than
16328c2ecf20Sopenharmony_ci				 * write_bytes, so scale down.
16338c2ecf20Sopenharmony_ci				 */
16348c2ecf20Sopenharmony_ci				num_pages = DIV_ROUND_UP(write_bytes + offset,
16358c2ecf20Sopenharmony_ci							 PAGE_SIZE);
16368c2ecf20Sopenharmony_ci				reserve_bytes = round_up(write_bytes +
16378c2ecf20Sopenharmony_ci							 sector_offset,
16388c2ecf20Sopenharmony_ci							 fs_info->sectorsize);
16398c2ecf20Sopenharmony_ci			} else {
16408c2ecf20Sopenharmony_ci				break;
16418c2ecf20Sopenharmony_ci			}
16428c2ecf20Sopenharmony_ci		}
16438c2ecf20Sopenharmony_ci
16448c2ecf20Sopenharmony_ci		WARN_ON(reserve_bytes == 0);
16458c2ecf20Sopenharmony_ci		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
16468c2ecf20Sopenharmony_ci				reserve_bytes);
16478c2ecf20Sopenharmony_ci		if (ret) {
16488c2ecf20Sopenharmony_ci			if (!only_release_metadata)
16498c2ecf20Sopenharmony_ci				btrfs_free_reserved_data_space(BTRFS_I(inode),
16508c2ecf20Sopenharmony_ci						data_reserved, pos,
16518c2ecf20Sopenharmony_ci						write_bytes);
16528c2ecf20Sopenharmony_ci			else
16538c2ecf20Sopenharmony_ci				btrfs_check_nocow_unlock(BTRFS_I(inode));
16548c2ecf20Sopenharmony_ci			break;
16558c2ecf20Sopenharmony_ci		}
16568c2ecf20Sopenharmony_ci
16578c2ecf20Sopenharmony_ci		release_bytes = reserve_bytes;
16588c2ecf20Sopenharmony_ciagain:
16598c2ecf20Sopenharmony_ci		/*
16608c2ecf20Sopenharmony_ci		 * This is going to setup the pages array with the number of
16618c2ecf20Sopenharmony_ci		 * pages we want, so we don't really need to worry about the
16628c2ecf20Sopenharmony_ci		 * contents of pages from loop to loop
16638c2ecf20Sopenharmony_ci		 */
16648c2ecf20Sopenharmony_ci		ret = prepare_pages(inode, pages, num_pages,
16658c2ecf20Sopenharmony_ci				    pos, write_bytes,
16668c2ecf20Sopenharmony_ci				    force_page_uptodate);
16678c2ecf20Sopenharmony_ci		if (ret) {
16688c2ecf20Sopenharmony_ci			btrfs_delalloc_release_extents(BTRFS_I(inode),
16698c2ecf20Sopenharmony_ci						       reserve_bytes);
16708c2ecf20Sopenharmony_ci			break;
16718c2ecf20Sopenharmony_ci		}
16728c2ecf20Sopenharmony_ci
16738c2ecf20Sopenharmony_ci		extents_locked = lock_and_cleanup_extent_if_need(
16748c2ecf20Sopenharmony_ci				BTRFS_I(inode), pages,
16758c2ecf20Sopenharmony_ci				num_pages, pos, write_bytes, &lockstart,
16768c2ecf20Sopenharmony_ci				&lockend, &cached_state);
16778c2ecf20Sopenharmony_ci		if (extents_locked < 0) {
16788c2ecf20Sopenharmony_ci			if (extents_locked == -EAGAIN)
16798c2ecf20Sopenharmony_ci				goto again;
16808c2ecf20Sopenharmony_ci			btrfs_delalloc_release_extents(BTRFS_I(inode),
16818c2ecf20Sopenharmony_ci						       reserve_bytes);
16828c2ecf20Sopenharmony_ci			ret = extents_locked;
16838c2ecf20Sopenharmony_ci			break;
16848c2ecf20Sopenharmony_ci		}
16858c2ecf20Sopenharmony_ci
16868c2ecf20Sopenharmony_ci		copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
16878c2ecf20Sopenharmony_ci
16888c2ecf20Sopenharmony_ci		num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
16898c2ecf20Sopenharmony_ci		dirty_sectors = round_up(copied + sector_offset,
16908c2ecf20Sopenharmony_ci					fs_info->sectorsize);
16918c2ecf20Sopenharmony_ci		dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors);
16928c2ecf20Sopenharmony_ci
16938c2ecf20Sopenharmony_ci		/*
16948c2ecf20Sopenharmony_ci		 * if we have trouble faulting in the pages, fall
16958c2ecf20Sopenharmony_ci		 * back to one page at a time
16968c2ecf20Sopenharmony_ci		 */
16978c2ecf20Sopenharmony_ci		if (copied < write_bytes)
16988c2ecf20Sopenharmony_ci			nrptrs = 1;
16998c2ecf20Sopenharmony_ci
17008c2ecf20Sopenharmony_ci		if (copied == 0) {
17018c2ecf20Sopenharmony_ci			force_page_uptodate = true;
17028c2ecf20Sopenharmony_ci			dirty_sectors = 0;
17038c2ecf20Sopenharmony_ci			dirty_pages = 0;
17048c2ecf20Sopenharmony_ci		} else {
17058c2ecf20Sopenharmony_ci			force_page_uptodate = false;
17068c2ecf20Sopenharmony_ci			dirty_pages = DIV_ROUND_UP(copied + offset,
17078c2ecf20Sopenharmony_ci						   PAGE_SIZE);
17088c2ecf20Sopenharmony_ci		}
17098c2ecf20Sopenharmony_ci
17108c2ecf20Sopenharmony_ci		if (num_sectors > dirty_sectors) {
17118c2ecf20Sopenharmony_ci			/* release everything except the sectors we dirtied */
17128c2ecf20Sopenharmony_ci			release_bytes -= dirty_sectors <<
17138c2ecf20Sopenharmony_ci						fs_info->sb->s_blocksize_bits;
17148c2ecf20Sopenharmony_ci			if (only_release_metadata) {
17158c2ecf20Sopenharmony_ci				btrfs_delalloc_release_metadata(BTRFS_I(inode),
17168c2ecf20Sopenharmony_ci							release_bytes, true);
17178c2ecf20Sopenharmony_ci			} else {
17188c2ecf20Sopenharmony_ci				u64 __pos;
17198c2ecf20Sopenharmony_ci
17208c2ecf20Sopenharmony_ci				__pos = round_down(pos,
17218c2ecf20Sopenharmony_ci						   fs_info->sectorsize) +
17228c2ecf20Sopenharmony_ci					(dirty_pages << PAGE_SHIFT);
17238c2ecf20Sopenharmony_ci				btrfs_delalloc_release_space(BTRFS_I(inode),
17248c2ecf20Sopenharmony_ci						data_reserved, __pos,
17258c2ecf20Sopenharmony_ci						release_bytes, true);
17268c2ecf20Sopenharmony_ci			}
17278c2ecf20Sopenharmony_ci		}
17288c2ecf20Sopenharmony_ci
17298c2ecf20Sopenharmony_ci		release_bytes = round_up(copied + sector_offset,
17308c2ecf20Sopenharmony_ci					fs_info->sectorsize);
17318c2ecf20Sopenharmony_ci
17328c2ecf20Sopenharmony_ci		if (copied > 0)
17338c2ecf20Sopenharmony_ci			ret = btrfs_dirty_pages(BTRFS_I(inode), pages,
17348c2ecf20Sopenharmony_ci						dirty_pages, pos, copied,
17358c2ecf20Sopenharmony_ci						&cached_state);
17368c2ecf20Sopenharmony_ci
17378c2ecf20Sopenharmony_ci		/*
17388c2ecf20Sopenharmony_ci		 * If we have not locked the extent range, because the range's
17398c2ecf20Sopenharmony_ci		 * start offset is >= i_size, we might still have a non-NULL
17408c2ecf20Sopenharmony_ci		 * cached extent state, acquired while marking the extent range
17418c2ecf20Sopenharmony_ci		 * as delalloc through btrfs_dirty_pages(). Therefore free any
17428c2ecf20Sopenharmony_ci		 * possible cached extent state to avoid a memory leak.
17438c2ecf20Sopenharmony_ci		 */
17448c2ecf20Sopenharmony_ci		if (extents_locked)
17458c2ecf20Sopenharmony_ci			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
17468c2ecf20Sopenharmony_ci					     lockstart, lockend, &cached_state);
17478c2ecf20Sopenharmony_ci		else
17488c2ecf20Sopenharmony_ci			free_extent_state(cached_state);
17498c2ecf20Sopenharmony_ci
17508c2ecf20Sopenharmony_ci		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
17518c2ecf20Sopenharmony_ci		if (ret) {
17528c2ecf20Sopenharmony_ci			btrfs_drop_pages(pages, num_pages);
17538c2ecf20Sopenharmony_ci			break;
17548c2ecf20Sopenharmony_ci		}
17558c2ecf20Sopenharmony_ci
17568c2ecf20Sopenharmony_ci		release_bytes = 0;
17578c2ecf20Sopenharmony_ci		if (only_release_metadata)
17588c2ecf20Sopenharmony_ci			btrfs_check_nocow_unlock(BTRFS_I(inode));
17598c2ecf20Sopenharmony_ci
17608c2ecf20Sopenharmony_ci		if (only_release_metadata && copied > 0) {
17618c2ecf20Sopenharmony_ci			lockstart = round_down(pos,
17628c2ecf20Sopenharmony_ci					       fs_info->sectorsize);
17638c2ecf20Sopenharmony_ci			lockend = round_up(pos + copied,
17648c2ecf20Sopenharmony_ci					   fs_info->sectorsize) - 1;
17658c2ecf20Sopenharmony_ci
17668c2ecf20Sopenharmony_ci			set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
17678c2ecf20Sopenharmony_ci				       lockend, EXTENT_NORESERVE, NULL,
17688c2ecf20Sopenharmony_ci				       NULL, GFP_NOFS);
17698c2ecf20Sopenharmony_ci		}
17708c2ecf20Sopenharmony_ci
17718c2ecf20Sopenharmony_ci		btrfs_drop_pages(pages, num_pages);
17728c2ecf20Sopenharmony_ci
17738c2ecf20Sopenharmony_ci		cond_resched();
17748c2ecf20Sopenharmony_ci
17758c2ecf20Sopenharmony_ci		balance_dirty_pages_ratelimited(inode->i_mapping);
17768c2ecf20Sopenharmony_ci
17778c2ecf20Sopenharmony_ci		pos += copied;
17788c2ecf20Sopenharmony_ci		num_written += copied;
17798c2ecf20Sopenharmony_ci	}
17808c2ecf20Sopenharmony_ci
17818c2ecf20Sopenharmony_ci	kfree(pages);
17828c2ecf20Sopenharmony_ci
17838c2ecf20Sopenharmony_ci	if (release_bytes) {
17848c2ecf20Sopenharmony_ci		if (only_release_metadata) {
17858c2ecf20Sopenharmony_ci			btrfs_check_nocow_unlock(BTRFS_I(inode));
17868c2ecf20Sopenharmony_ci			btrfs_delalloc_release_metadata(BTRFS_I(inode),
17878c2ecf20Sopenharmony_ci					release_bytes, true);
17888c2ecf20Sopenharmony_ci		} else {
17898c2ecf20Sopenharmony_ci			btrfs_delalloc_release_space(BTRFS_I(inode),
17908c2ecf20Sopenharmony_ci					data_reserved,
17918c2ecf20Sopenharmony_ci					round_down(pos, fs_info->sectorsize),
17928c2ecf20Sopenharmony_ci					release_bytes, true);
17938c2ecf20Sopenharmony_ci		}
17948c2ecf20Sopenharmony_ci	}
17958c2ecf20Sopenharmony_ci
17968c2ecf20Sopenharmony_ci	extent_changeset_free(data_reserved);
17978c2ecf20Sopenharmony_ci	return num_written ? num_written : ret;
17988c2ecf20Sopenharmony_ci}
17998c2ecf20Sopenharmony_ci
18008c2ecf20Sopenharmony_cistatic ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
18018c2ecf20Sopenharmony_ci{
18028c2ecf20Sopenharmony_ci	struct file *file = iocb->ki_filp;
18038c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
18048c2ecf20Sopenharmony_ci	loff_t pos;
18058c2ecf20Sopenharmony_ci	ssize_t written;
18068c2ecf20Sopenharmony_ci	ssize_t written_buffered;
18078c2ecf20Sopenharmony_ci	loff_t endbyte;
18088c2ecf20Sopenharmony_ci	int err;
18098c2ecf20Sopenharmony_ci
18108c2ecf20Sopenharmony_ci	written = btrfs_direct_IO(iocb, from);
18118c2ecf20Sopenharmony_ci
18128c2ecf20Sopenharmony_ci	if (written < 0 || !iov_iter_count(from))
18138c2ecf20Sopenharmony_ci		return written;
18148c2ecf20Sopenharmony_ci
18158c2ecf20Sopenharmony_ci	pos = iocb->ki_pos;
18168c2ecf20Sopenharmony_ci	written_buffered = btrfs_buffered_write(iocb, from);
18178c2ecf20Sopenharmony_ci	if (written_buffered < 0) {
18188c2ecf20Sopenharmony_ci		err = written_buffered;
18198c2ecf20Sopenharmony_ci		goto out;
18208c2ecf20Sopenharmony_ci	}
18218c2ecf20Sopenharmony_ci	/*
18228c2ecf20Sopenharmony_ci	 * Ensure all data is persisted. We want the next direct IO read to be
18238c2ecf20Sopenharmony_ci	 * able to read what was just written.
18248c2ecf20Sopenharmony_ci	 */
18258c2ecf20Sopenharmony_ci	endbyte = pos + written_buffered - 1;
18268c2ecf20Sopenharmony_ci	err = btrfs_fdatawrite_range(inode, pos, endbyte);
18278c2ecf20Sopenharmony_ci	if (err)
18288c2ecf20Sopenharmony_ci		goto out;
18298c2ecf20Sopenharmony_ci	err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
18308c2ecf20Sopenharmony_ci	if (err)
18318c2ecf20Sopenharmony_ci		goto out;
18328c2ecf20Sopenharmony_ci	written += written_buffered;
18338c2ecf20Sopenharmony_ci	iocb->ki_pos = pos + written_buffered;
18348c2ecf20Sopenharmony_ci	invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
18358c2ecf20Sopenharmony_ci				 endbyte >> PAGE_SHIFT);
18368c2ecf20Sopenharmony_ciout:
18378c2ecf20Sopenharmony_ci	return written ? written : err;
18388c2ecf20Sopenharmony_ci}
18398c2ecf20Sopenharmony_ci
18408c2ecf20Sopenharmony_cistatic void update_time_for_write(struct inode *inode)
18418c2ecf20Sopenharmony_ci{
18428c2ecf20Sopenharmony_ci	struct timespec64 now;
18438c2ecf20Sopenharmony_ci
18448c2ecf20Sopenharmony_ci	if (IS_NOCMTIME(inode))
18458c2ecf20Sopenharmony_ci		return;
18468c2ecf20Sopenharmony_ci
18478c2ecf20Sopenharmony_ci	now = current_time(inode);
18488c2ecf20Sopenharmony_ci	if (!timespec64_equal(&inode->i_mtime, &now))
18498c2ecf20Sopenharmony_ci		inode->i_mtime = now;
18508c2ecf20Sopenharmony_ci
18518c2ecf20Sopenharmony_ci	if (!timespec64_equal(&inode->i_ctime, &now))
18528c2ecf20Sopenharmony_ci		inode->i_ctime = now;
18538c2ecf20Sopenharmony_ci
18548c2ecf20Sopenharmony_ci	if (IS_I_VERSION(inode))
18558c2ecf20Sopenharmony_ci		inode_inc_iversion(inode);
18568c2ecf20Sopenharmony_ci}
18578c2ecf20Sopenharmony_ci
18588c2ecf20Sopenharmony_cistatic ssize_t btrfs_file_write_iter(struct kiocb *iocb,
18598c2ecf20Sopenharmony_ci				    struct iov_iter *from)
18608c2ecf20Sopenharmony_ci{
18618c2ecf20Sopenharmony_ci	struct file *file = iocb->ki_filp;
18628c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
18638c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
18648c2ecf20Sopenharmony_ci	u64 start_pos;
18658c2ecf20Sopenharmony_ci	u64 end_pos;
18668c2ecf20Sopenharmony_ci	ssize_t num_written = 0;
18678c2ecf20Sopenharmony_ci	const bool sync = iocb->ki_flags & IOCB_DSYNC;
18688c2ecf20Sopenharmony_ci	ssize_t err;
18698c2ecf20Sopenharmony_ci	loff_t pos;
18708c2ecf20Sopenharmony_ci	size_t count;
18718c2ecf20Sopenharmony_ci	loff_t oldsize;
18728c2ecf20Sopenharmony_ci	int clean_page = 0;
18738c2ecf20Sopenharmony_ci
18748c2ecf20Sopenharmony_ci	if (!(iocb->ki_flags & IOCB_DIRECT) &&
18758c2ecf20Sopenharmony_ci	    (iocb->ki_flags & IOCB_NOWAIT))
18768c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
18778c2ecf20Sopenharmony_ci
18788c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT) {
18798c2ecf20Sopenharmony_ci		if (!inode_trylock(inode))
18808c2ecf20Sopenharmony_ci			return -EAGAIN;
18818c2ecf20Sopenharmony_ci	} else {
18828c2ecf20Sopenharmony_ci		inode_lock(inode);
18838c2ecf20Sopenharmony_ci	}
18848c2ecf20Sopenharmony_ci
18858c2ecf20Sopenharmony_ci	err = generic_write_checks(iocb, from);
18868c2ecf20Sopenharmony_ci	if (err <= 0) {
18878c2ecf20Sopenharmony_ci		inode_unlock(inode);
18888c2ecf20Sopenharmony_ci		return err;
18898c2ecf20Sopenharmony_ci	}
18908c2ecf20Sopenharmony_ci
18918c2ecf20Sopenharmony_ci	pos = iocb->ki_pos;
18928c2ecf20Sopenharmony_ci	count = iov_iter_count(from);
18938c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT) {
18948c2ecf20Sopenharmony_ci		size_t nocow_bytes = count;
18958c2ecf20Sopenharmony_ci
18968c2ecf20Sopenharmony_ci		/*
18978c2ecf20Sopenharmony_ci		 * We will allocate space in case nodatacow is not set,
18988c2ecf20Sopenharmony_ci		 * so bail
18998c2ecf20Sopenharmony_ci		 */
19008c2ecf20Sopenharmony_ci		if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes)
19018c2ecf20Sopenharmony_ci		    <= 0) {
19028c2ecf20Sopenharmony_ci			inode_unlock(inode);
19038c2ecf20Sopenharmony_ci			return -EAGAIN;
19048c2ecf20Sopenharmony_ci		}
19058c2ecf20Sopenharmony_ci		/*
19068c2ecf20Sopenharmony_ci		 * There are holes in the range or parts of the range that must
19078c2ecf20Sopenharmony_ci		 * be COWed (shared extents, RO block groups, etc), so just bail
19088c2ecf20Sopenharmony_ci		 * out.
19098c2ecf20Sopenharmony_ci		 */
19108c2ecf20Sopenharmony_ci		if (nocow_bytes < count) {
19118c2ecf20Sopenharmony_ci			inode_unlock(inode);
19128c2ecf20Sopenharmony_ci			return -EAGAIN;
19138c2ecf20Sopenharmony_ci		}
19148c2ecf20Sopenharmony_ci	}
19158c2ecf20Sopenharmony_ci
19168c2ecf20Sopenharmony_ci	current->backing_dev_info = inode_to_bdi(inode);
19178c2ecf20Sopenharmony_ci	err = file_remove_privs(file);
19188c2ecf20Sopenharmony_ci	if (err) {
19198c2ecf20Sopenharmony_ci		inode_unlock(inode);
19208c2ecf20Sopenharmony_ci		goto out;
19218c2ecf20Sopenharmony_ci	}
19228c2ecf20Sopenharmony_ci
19238c2ecf20Sopenharmony_ci	/*
19248c2ecf20Sopenharmony_ci	 * If BTRFS flips readonly due to some impossible error
19258c2ecf20Sopenharmony_ci	 * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
19268c2ecf20Sopenharmony_ci	 * although we have opened a file as writable, we have
19278c2ecf20Sopenharmony_ci	 * to stop this write operation to ensure FS consistency.
19288c2ecf20Sopenharmony_ci	 */
19298c2ecf20Sopenharmony_ci	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
19308c2ecf20Sopenharmony_ci		inode_unlock(inode);
19318c2ecf20Sopenharmony_ci		err = -EROFS;
19328c2ecf20Sopenharmony_ci		goto out;
19338c2ecf20Sopenharmony_ci	}
19348c2ecf20Sopenharmony_ci
19358c2ecf20Sopenharmony_ci	/*
19368c2ecf20Sopenharmony_ci	 * We reserve space for updating the inode when we reserve space for the
19378c2ecf20Sopenharmony_ci	 * extent we are going to write, so we will enospc out there.  We don't
19388c2ecf20Sopenharmony_ci	 * need to start yet another transaction to update the inode as we will
19398c2ecf20Sopenharmony_ci	 * update the inode when we finish writing whatever data we write.
19408c2ecf20Sopenharmony_ci	 */
19418c2ecf20Sopenharmony_ci	update_time_for_write(inode);
19428c2ecf20Sopenharmony_ci
19438c2ecf20Sopenharmony_ci	start_pos = round_down(pos, fs_info->sectorsize);
19448c2ecf20Sopenharmony_ci	oldsize = i_size_read(inode);
19458c2ecf20Sopenharmony_ci	if (start_pos > oldsize) {
19468c2ecf20Sopenharmony_ci		/* Expand hole size to cover write data, preventing empty gap */
19478c2ecf20Sopenharmony_ci		end_pos = round_up(pos + count,
19488c2ecf20Sopenharmony_ci				   fs_info->sectorsize);
19498c2ecf20Sopenharmony_ci		err = btrfs_cont_expand(inode, oldsize, end_pos);
19508c2ecf20Sopenharmony_ci		if (err) {
19518c2ecf20Sopenharmony_ci			inode_unlock(inode);
19528c2ecf20Sopenharmony_ci			goto out;
19538c2ecf20Sopenharmony_ci		}
19548c2ecf20Sopenharmony_ci		if (start_pos > round_up(oldsize, fs_info->sectorsize))
19558c2ecf20Sopenharmony_ci			clean_page = 1;
19568c2ecf20Sopenharmony_ci	}
19578c2ecf20Sopenharmony_ci
19588c2ecf20Sopenharmony_ci	if (sync)
19598c2ecf20Sopenharmony_ci		atomic_inc(&BTRFS_I(inode)->sync_writers);
19608c2ecf20Sopenharmony_ci
19618c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_DIRECT) {
19628c2ecf20Sopenharmony_ci		/*
19638c2ecf20Sopenharmony_ci		 * 1. We must always clear IOCB_DSYNC in order to not deadlock
19648c2ecf20Sopenharmony_ci		 *    in iomap, as it calls generic_write_sync() in this case.
19658c2ecf20Sopenharmony_ci		 * 2. If we are async, we can call iomap_dio_complete() either
19668c2ecf20Sopenharmony_ci		 *    in
19678c2ecf20Sopenharmony_ci		 *
19688c2ecf20Sopenharmony_ci		 *    2.1. A worker thread from the last bio completed.  In this
19698c2ecf20Sopenharmony_ci		 *	   case we need to mark the btrfs_dio_data that it is
19708c2ecf20Sopenharmony_ci		 *	   async in order to call generic_write_sync() properly.
19718c2ecf20Sopenharmony_ci		 *	   This is handled by setting BTRFS_DIO_SYNC_STUB in the
19728c2ecf20Sopenharmony_ci		 *	   current->journal_info.
19738c2ecf20Sopenharmony_ci		 *    2.2  The submitter context, because all IO completed
19748c2ecf20Sopenharmony_ci		 *         before we exited iomap_dio_rw().  In this case we can
19758c2ecf20Sopenharmony_ci		 *         just re-set the IOCB_DSYNC on the iocb and we'll do
19768c2ecf20Sopenharmony_ci		 *         the sync below.  If our ->end_io() gets called and
19778c2ecf20Sopenharmony_ci		 *         current->journal_info is set, then we know we're in
19788c2ecf20Sopenharmony_ci		 *         our current context and we will clear
19798c2ecf20Sopenharmony_ci		 *         current->journal_info to indicate that we need to
19808c2ecf20Sopenharmony_ci		 *         sync below.
19818c2ecf20Sopenharmony_ci		 */
19828c2ecf20Sopenharmony_ci		if (sync) {
19838c2ecf20Sopenharmony_ci			ASSERT(current->journal_info == NULL);
19848c2ecf20Sopenharmony_ci			iocb->ki_flags &= ~IOCB_DSYNC;
19858c2ecf20Sopenharmony_ci			current->journal_info = BTRFS_DIO_SYNC_STUB;
19868c2ecf20Sopenharmony_ci		}
19878c2ecf20Sopenharmony_ci		num_written = __btrfs_direct_write(iocb, from);
19888c2ecf20Sopenharmony_ci
19898c2ecf20Sopenharmony_ci		/*
19908c2ecf20Sopenharmony_ci		 * As stated above, we cleared journal_info, so we need to do
19918c2ecf20Sopenharmony_ci		 * the sync ourselves.
19928c2ecf20Sopenharmony_ci		 */
19938c2ecf20Sopenharmony_ci		if (sync && current->journal_info == NULL)
19948c2ecf20Sopenharmony_ci			iocb->ki_flags |= IOCB_DSYNC;
19958c2ecf20Sopenharmony_ci		current->journal_info = NULL;
19968c2ecf20Sopenharmony_ci	} else {
19978c2ecf20Sopenharmony_ci		num_written = btrfs_buffered_write(iocb, from);
19988c2ecf20Sopenharmony_ci		if (num_written > 0)
19998c2ecf20Sopenharmony_ci			iocb->ki_pos = pos + num_written;
20008c2ecf20Sopenharmony_ci		if (clean_page)
20018c2ecf20Sopenharmony_ci			pagecache_isize_extended(inode, oldsize,
20028c2ecf20Sopenharmony_ci						i_size_read(inode));
20038c2ecf20Sopenharmony_ci	}
20048c2ecf20Sopenharmony_ci
20058c2ecf20Sopenharmony_ci	inode_unlock(inode);
20068c2ecf20Sopenharmony_ci
20078c2ecf20Sopenharmony_ci	btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
20088c2ecf20Sopenharmony_ci
20098c2ecf20Sopenharmony_ci	if (num_written > 0)
20108c2ecf20Sopenharmony_ci		num_written = generic_write_sync(iocb, num_written);
20118c2ecf20Sopenharmony_ci
20128c2ecf20Sopenharmony_ci	if (sync)
20138c2ecf20Sopenharmony_ci		atomic_dec(&BTRFS_I(inode)->sync_writers);
20148c2ecf20Sopenharmony_ciout:
20158c2ecf20Sopenharmony_ci	current->backing_dev_info = NULL;
20168c2ecf20Sopenharmony_ci	return num_written ? num_written : err;
20178c2ecf20Sopenharmony_ci}
20188c2ecf20Sopenharmony_ci
20198c2ecf20Sopenharmony_ciint btrfs_release_file(struct inode *inode, struct file *filp)
20208c2ecf20Sopenharmony_ci{
20218c2ecf20Sopenharmony_ci	struct btrfs_file_private *private = filp->private_data;
20228c2ecf20Sopenharmony_ci
20238c2ecf20Sopenharmony_ci	if (private && private->filldir_buf)
20248c2ecf20Sopenharmony_ci		kfree(private->filldir_buf);
20258c2ecf20Sopenharmony_ci	kfree(private);
20268c2ecf20Sopenharmony_ci	filp->private_data = NULL;
20278c2ecf20Sopenharmony_ci
20288c2ecf20Sopenharmony_ci	/*
20298c2ecf20Sopenharmony_ci	 * Set by setattr when we are about to truncate a file from a non-zero
20308c2ecf20Sopenharmony_ci	 * size to a zero size.  This tries to flush down new bytes that may
20318c2ecf20Sopenharmony_ci	 * have been written if the application were using truncate to replace
20328c2ecf20Sopenharmony_ci	 * a file in place.
20338c2ecf20Sopenharmony_ci	 */
20348c2ecf20Sopenharmony_ci	if (test_and_clear_bit(BTRFS_INODE_FLUSH_ON_CLOSE,
20358c2ecf20Sopenharmony_ci			       &BTRFS_I(inode)->runtime_flags))
20368c2ecf20Sopenharmony_ci			filemap_flush(inode->i_mapping);
20378c2ecf20Sopenharmony_ci	return 0;
20388c2ecf20Sopenharmony_ci}
20398c2ecf20Sopenharmony_ci
20408c2ecf20Sopenharmony_cistatic int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
20418c2ecf20Sopenharmony_ci{
20428c2ecf20Sopenharmony_ci	int ret;
20438c2ecf20Sopenharmony_ci	struct blk_plug plug;
20448c2ecf20Sopenharmony_ci
20458c2ecf20Sopenharmony_ci	/*
20468c2ecf20Sopenharmony_ci	 * This is only called in fsync, which would do synchronous writes, so
20478c2ecf20Sopenharmony_ci	 * a plug can merge adjacent IOs as much as possible.  Esp. in case of
20488c2ecf20Sopenharmony_ci	 * multiple disks using raid profile, a large IO can be split to
20498c2ecf20Sopenharmony_ci	 * several segments of stripe length (currently 64K).
20508c2ecf20Sopenharmony_ci	 */
20518c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
20528c2ecf20Sopenharmony_ci	atomic_inc(&BTRFS_I(inode)->sync_writers);
20538c2ecf20Sopenharmony_ci	ret = btrfs_fdatawrite_range(inode, start, end);
20548c2ecf20Sopenharmony_ci	atomic_dec(&BTRFS_I(inode)->sync_writers);
20558c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
20568c2ecf20Sopenharmony_ci
20578c2ecf20Sopenharmony_ci	return ret;
20588c2ecf20Sopenharmony_ci}
20598c2ecf20Sopenharmony_ci
20608c2ecf20Sopenharmony_cistatic inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
20618c2ecf20Sopenharmony_ci{
20628c2ecf20Sopenharmony_ci	struct btrfs_inode *inode = BTRFS_I(ctx->inode);
20638c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = inode->root->fs_info;
20648c2ecf20Sopenharmony_ci
20658c2ecf20Sopenharmony_ci	if (btrfs_inode_in_log(inode, fs_info->generation) &&
20668c2ecf20Sopenharmony_ci	    list_empty(&ctx->ordered_extents))
20678c2ecf20Sopenharmony_ci		return true;
20688c2ecf20Sopenharmony_ci
20698c2ecf20Sopenharmony_ci	/*
20708c2ecf20Sopenharmony_ci	 * If we are doing a fast fsync we can not bail out if the inode's
20718c2ecf20Sopenharmony_ci	 * last_trans is <= then the last committed transaction, because we only
20728c2ecf20Sopenharmony_ci	 * update the last_trans of the inode during ordered extent completion,
20738c2ecf20Sopenharmony_ci	 * and for a fast fsync we don't wait for that, we only wait for the
20748c2ecf20Sopenharmony_ci	 * writeback to complete.
20758c2ecf20Sopenharmony_ci	 */
20768c2ecf20Sopenharmony_ci	if (inode->last_trans <= fs_info->last_trans_committed &&
20778c2ecf20Sopenharmony_ci	    (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) ||
20788c2ecf20Sopenharmony_ci	     list_empty(&ctx->ordered_extents)))
20798c2ecf20Sopenharmony_ci		return true;
20808c2ecf20Sopenharmony_ci
20818c2ecf20Sopenharmony_ci	return false;
20828c2ecf20Sopenharmony_ci}
20838c2ecf20Sopenharmony_ci
20848c2ecf20Sopenharmony_ci/*
20858c2ecf20Sopenharmony_ci * fsync call for both files and directories.  This logs the inode into
20868c2ecf20Sopenharmony_ci * the tree log instead of forcing full commits whenever possible.
20878c2ecf20Sopenharmony_ci *
20888c2ecf20Sopenharmony_ci * It needs to call filemap_fdatawait so that all ordered extent updates are
20898c2ecf20Sopenharmony_ci * in the metadata btree are up to date for copying to the log.
20908c2ecf20Sopenharmony_ci *
20918c2ecf20Sopenharmony_ci * It drops the inode mutex before doing the tree log commit.  This is an
20928c2ecf20Sopenharmony_ci * important optimization for directories because holding the mutex prevents
20938c2ecf20Sopenharmony_ci * new operations on the dir while we write to disk.
20948c2ecf20Sopenharmony_ci */
20958c2ecf20Sopenharmony_ciint btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
20968c2ecf20Sopenharmony_ci{
20978c2ecf20Sopenharmony_ci	struct dentry *dentry = file_dentry(file);
20988c2ecf20Sopenharmony_ci	struct inode *inode = d_inode(dentry);
20998c2ecf20Sopenharmony_ci	struct btrfs_root *root = BTRFS_I(inode)->root;
21008c2ecf20Sopenharmony_ci	struct btrfs_trans_handle *trans;
21018c2ecf20Sopenharmony_ci	struct btrfs_log_ctx ctx;
21028c2ecf20Sopenharmony_ci	int ret = 0, err;
21038c2ecf20Sopenharmony_ci	u64 len;
21048c2ecf20Sopenharmony_ci	bool full_sync;
21058c2ecf20Sopenharmony_ci
21068c2ecf20Sopenharmony_ci	trace_btrfs_sync_file(file, datasync);
21078c2ecf20Sopenharmony_ci
21088c2ecf20Sopenharmony_ci	btrfs_init_log_ctx(&ctx, inode);
21098c2ecf20Sopenharmony_ci
21108c2ecf20Sopenharmony_ci	/*
21118c2ecf20Sopenharmony_ci	 * Always set the range to a full range, otherwise we can get into
21128c2ecf20Sopenharmony_ci	 * several problems, from missing file extent items to represent holes
21138c2ecf20Sopenharmony_ci	 * when not using the NO_HOLES feature, to log tree corruption due to
21148c2ecf20Sopenharmony_ci	 * races between hole detection during logging and completion of ordered
21158c2ecf20Sopenharmony_ci	 * extents outside the range, to missing checksums due to ordered extents
21168c2ecf20Sopenharmony_ci	 * for which we flushed only a subset of their pages.
21178c2ecf20Sopenharmony_ci	 */
21188c2ecf20Sopenharmony_ci	start = 0;
21198c2ecf20Sopenharmony_ci	end = LLONG_MAX;
21208c2ecf20Sopenharmony_ci	len = (u64)LLONG_MAX + 1;
21218c2ecf20Sopenharmony_ci
21228c2ecf20Sopenharmony_ci	/*
21238c2ecf20Sopenharmony_ci	 * We write the dirty pages in the range and wait until they complete
21248c2ecf20Sopenharmony_ci	 * out of the ->i_mutex. If so, we can flush the dirty pages by
21258c2ecf20Sopenharmony_ci	 * multi-task, and make the performance up.  See
21268c2ecf20Sopenharmony_ci	 * btrfs_wait_ordered_range for an explanation of the ASYNC check.
21278c2ecf20Sopenharmony_ci	 */
21288c2ecf20Sopenharmony_ci	ret = start_ordered_ops(inode, start, end);
21298c2ecf20Sopenharmony_ci	if (ret)
21308c2ecf20Sopenharmony_ci		goto out;
21318c2ecf20Sopenharmony_ci
21328c2ecf20Sopenharmony_ci	inode_lock(inode);
21338c2ecf20Sopenharmony_ci
21348c2ecf20Sopenharmony_ci	/*
21358c2ecf20Sopenharmony_ci	 * We take the dio_sem here because the tree log stuff can race with
21368c2ecf20Sopenharmony_ci	 * lockless dio writes and get an extent map logged for an extent we
21378c2ecf20Sopenharmony_ci	 * never waited on.  We need it this high up for lockdep reasons.
21388c2ecf20Sopenharmony_ci	 */
21398c2ecf20Sopenharmony_ci	down_write(&BTRFS_I(inode)->dio_sem);
21408c2ecf20Sopenharmony_ci
21418c2ecf20Sopenharmony_ci	atomic_inc(&root->log_batch);
21428c2ecf20Sopenharmony_ci
21438c2ecf20Sopenharmony_ci	/*
21448c2ecf20Sopenharmony_ci	 * Always check for the full sync flag while holding the inode's lock,
21458c2ecf20Sopenharmony_ci	 * to avoid races with other tasks. The flag must be either set all the
21468c2ecf20Sopenharmony_ci	 * time during logging or always off all the time while logging.
21478c2ecf20Sopenharmony_ci	 */
21488c2ecf20Sopenharmony_ci	full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
21498c2ecf20Sopenharmony_ci			     &BTRFS_I(inode)->runtime_flags);
21508c2ecf20Sopenharmony_ci
21518c2ecf20Sopenharmony_ci	/*
21528c2ecf20Sopenharmony_ci	 * Before we acquired the inode's lock, someone may have dirtied more
21538c2ecf20Sopenharmony_ci	 * pages in the target range. We need to make sure that writeback for
21548c2ecf20Sopenharmony_ci	 * any such pages does not start while we are logging the inode, because
21558c2ecf20Sopenharmony_ci	 * if it does, any of the following might happen when we are not doing a
21568c2ecf20Sopenharmony_ci	 * full inode sync:
21578c2ecf20Sopenharmony_ci	 *
21588c2ecf20Sopenharmony_ci	 * 1) We log an extent after its writeback finishes but before its
21598c2ecf20Sopenharmony_ci	 *    checksums are added to the csum tree, leading to -EIO errors
21608c2ecf20Sopenharmony_ci	 *    when attempting to read the extent after a log replay.
21618c2ecf20Sopenharmony_ci	 *
21628c2ecf20Sopenharmony_ci	 * 2) We can end up logging an extent before its writeback finishes.
21638c2ecf20Sopenharmony_ci	 *    Therefore after the log replay we will have a file extent item
21648c2ecf20Sopenharmony_ci	 *    pointing to an unwritten extent (and no data checksums as well).
21658c2ecf20Sopenharmony_ci	 *
21668c2ecf20Sopenharmony_ci	 * So trigger writeback for any eventual new dirty pages and then we
21678c2ecf20Sopenharmony_ci	 * wait for all ordered extents to complete below.
21688c2ecf20Sopenharmony_ci	 */
21698c2ecf20Sopenharmony_ci	ret = start_ordered_ops(inode, start, end);
21708c2ecf20Sopenharmony_ci	if (ret) {
21718c2ecf20Sopenharmony_ci		up_write(&BTRFS_I(inode)->dio_sem);
21728c2ecf20Sopenharmony_ci		inode_unlock(inode);
21738c2ecf20Sopenharmony_ci		goto out;
21748c2ecf20Sopenharmony_ci	}
21758c2ecf20Sopenharmony_ci
21768c2ecf20Sopenharmony_ci	/*
21778c2ecf20Sopenharmony_ci	 * We have to do this here to avoid the priority inversion of waiting on
21788c2ecf20Sopenharmony_ci	 * IO of a lower priority task while holding a transaction open.
21798c2ecf20Sopenharmony_ci	 *
21808c2ecf20Sopenharmony_ci	 * For a full fsync we wait for the ordered extents to complete while
21818c2ecf20Sopenharmony_ci	 * for a fast fsync we wait just for writeback to complete, and then
21828c2ecf20Sopenharmony_ci	 * attach the ordered extents to the transaction so that a transaction
21838c2ecf20Sopenharmony_ci	 * commit waits for their completion, to avoid data loss if we fsync,
21848c2ecf20Sopenharmony_ci	 * the current transaction commits before the ordered extents complete
21858c2ecf20Sopenharmony_ci	 * and a power failure happens right after that.
21868c2ecf20Sopenharmony_ci	 */
21878c2ecf20Sopenharmony_ci	if (full_sync) {
21888c2ecf20Sopenharmony_ci		ret = btrfs_wait_ordered_range(inode, start, len);
21898c2ecf20Sopenharmony_ci	} else {
21908c2ecf20Sopenharmony_ci		/*
21918c2ecf20Sopenharmony_ci		 * Get our ordered extents as soon as possible to avoid doing
21928c2ecf20Sopenharmony_ci		 * checksum lookups in the csum tree, and use instead the
21938c2ecf20Sopenharmony_ci		 * checksums attached to the ordered extents.
21948c2ecf20Sopenharmony_ci		 */
21958c2ecf20Sopenharmony_ci		btrfs_get_ordered_extents_for_logging(BTRFS_I(inode),
21968c2ecf20Sopenharmony_ci						      &ctx.ordered_extents);
21978c2ecf20Sopenharmony_ci		ret = filemap_fdatawait_range(inode->i_mapping, start, end);
21988c2ecf20Sopenharmony_ci	}
21998c2ecf20Sopenharmony_ci
22008c2ecf20Sopenharmony_ci	if (ret)
22018c2ecf20Sopenharmony_ci		goto out_release_extents;
22028c2ecf20Sopenharmony_ci
22038c2ecf20Sopenharmony_ci	atomic_inc(&root->log_batch);
22048c2ecf20Sopenharmony_ci
22058c2ecf20Sopenharmony_ci	smp_mb();
22068c2ecf20Sopenharmony_ci	if (skip_inode_logging(&ctx)) {
22078c2ecf20Sopenharmony_ci		/*
22088c2ecf20Sopenharmony_ci		 * We've had everything committed since the last time we were
22098c2ecf20Sopenharmony_ci		 * modified so clear this flag in case it was set for whatever
22108c2ecf20Sopenharmony_ci		 * reason, it's no longer relevant.
22118c2ecf20Sopenharmony_ci		 */
22128c2ecf20Sopenharmony_ci		clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
22138c2ecf20Sopenharmony_ci			  &BTRFS_I(inode)->runtime_flags);
22148c2ecf20Sopenharmony_ci		/*
22158c2ecf20Sopenharmony_ci		 * An ordered extent might have started before and completed
22168c2ecf20Sopenharmony_ci		 * already with io errors, in which case the inode was not
22178c2ecf20Sopenharmony_ci		 * updated and we end up here. So check the inode's mapping
22188c2ecf20Sopenharmony_ci		 * for any errors that might have happened since we last
22198c2ecf20Sopenharmony_ci		 * checked called fsync.
22208c2ecf20Sopenharmony_ci		 */
22218c2ecf20Sopenharmony_ci		ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err);
22228c2ecf20Sopenharmony_ci		goto out_release_extents;
22238c2ecf20Sopenharmony_ci	}
22248c2ecf20Sopenharmony_ci
22258c2ecf20Sopenharmony_ci	/*
22268c2ecf20Sopenharmony_ci	 * We use start here because we will need to wait on the IO to complete
22278c2ecf20Sopenharmony_ci	 * in btrfs_sync_log, which could require joining a transaction (for
22288c2ecf20Sopenharmony_ci	 * example checking cross references in the nocow path).  If we use join
22298c2ecf20Sopenharmony_ci	 * here we could get into a situation where we're waiting on IO to
22308c2ecf20Sopenharmony_ci	 * happen that is blocked on a transaction trying to commit.  With start
22318c2ecf20Sopenharmony_ci	 * we inc the extwriter counter, so we wait for all extwriters to exit
22328c2ecf20Sopenharmony_ci	 * before we start blocking joiners.  This comment is to keep somebody
22338c2ecf20Sopenharmony_ci	 * from thinking they are super smart and changing this to
22348c2ecf20Sopenharmony_ci	 * btrfs_join_transaction *cough*Josef*cough*.
22358c2ecf20Sopenharmony_ci	 */
22368c2ecf20Sopenharmony_ci	trans = btrfs_start_transaction(root, 0);
22378c2ecf20Sopenharmony_ci	if (IS_ERR(trans)) {
22388c2ecf20Sopenharmony_ci		ret = PTR_ERR(trans);
22398c2ecf20Sopenharmony_ci		goto out_release_extents;
22408c2ecf20Sopenharmony_ci	}
22418c2ecf20Sopenharmony_ci
22428c2ecf20Sopenharmony_ci	ret = btrfs_log_dentry_safe(trans, dentry, &ctx);
22438c2ecf20Sopenharmony_ci	btrfs_release_log_ctx_extents(&ctx);
22448c2ecf20Sopenharmony_ci	if (ret < 0) {
22458c2ecf20Sopenharmony_ci		/* Fallthrough and commit/free transaction. */
22468c2ecf20Sopenharmony_ci		ret = 1;
22478c2ecf20Sopenharmony_ci	}
22488c2ecf20Sopenharmony_ci
22498c2ecf20Sopenharmony_ci	/* we've logged all the items and now have a consistent
22508c2ecf20Sopenharmony_ci	 * version of the file in the log.  It is possible that
22518c2ecf20Sopenharmony_ci	 * someone will come in and modify the file, but that's
22528c2ecf20Sopenharmony_ci	 * fine because the log is consistent on disk, and we
22538c2ecf20Sopenharmony_ci	 * have references to all of the file's extents
22548c2ecf20Sopenharmony_ci	 *
22558c2ecf20Sopenharmony_ci	 * It is possible that someone will come in and log the
22568c2ecf20Sopenharmony_ci	 * file again, but that will end up using the synchronization
22578c2ecf20Sopenharmony_ci	 * inside btrfs_sync_log to keep things safe.
22588c2ecf20Sopenharmony_ci	 */
22598c2ecf20Sopenharmony_ci	up_write(&BTRFS_I(inode)->dio_sem);
22608c2ecf20Sopenharmony_ci	inode_unlock(inode);
22618c2ecf20Sopenharmony_ci
22628c2ecf20Sopenharmony_ci	if (ret != BTRFS_NO_LOG_SYNC) {
22638c2ecf20Sopenharmony_ci		if (!ret) {
22648c2ecf20Sopenharmony_ci			ret = btrfs_sync_log(trans, root, &ctx);
22658c2ecf20Sopenharmony_ci			if (!ret) {
22668c2ecf20Sopenharmony_ci				ret = btrfs_end_transaction(trans);
22678c2ecf20Sopenharmony_ci				goto out;
22688c2ecf20Sopenharmony_ci			}
22698c2ecf20Sopenharmony_ci		}
22708c2ecf20Sopenharmony_ci		if (!full_sync) {
22718c2ecf20Sopenharmony_ci			ret = btrfs_wait_ordered_range(inode, start, len);
22728c2ecf20Sopenharmony_ci			if (ret) {
22738c2ecf20Sopenharmony_ci				btrfs_end_transaction(trans);
22748c2ecf20Sopenharmony_ci				goto out;
22758c2ecf20Sopenharmony_ci			}
22768c2ecf20Sopenharmony_ci		}
22778c2ecf20Sopenharmony_ci		ret = btrfs_commit_transaction(trans);
22788c2ecf20Sopenharmony_ci	} else {
22798c2ecf20Sopenharmony_ci		ret = btrfs_end_transaction(trans);
22808c2ecf20Sopenharmony_ci	}
22818c2ecf20Sopenharmony_ciout:
22828c2ecf20Sopenharmony_ci	ASSERT(list_empty(&ctx.list));
22838c2ecf20Sopenharmony_ci	err = file_check_and_advance_wb_err(file);
22848c2ecf20Sopenharmony_ci	if (!ret)
22858c2ecf20Sopenharmony_ci		ret = err;
22868c2ecf20Sopenharmony_ci	return ret > 0 ? -EIO : ret;
22878c2ecf20Sopenharmony_ci
22888c2ecf20Sopenharmony_ciout_release_extents:
22898c2ecf20Sopenharmony_ci	btrfs_release_log_ctx_extents(&ctx);
22908c2ecf20Sopenharmony_ci	up_write(&BTRFS_I(inode)->dio_sem);
22918c2ecf20Sopenharmony_ci	inode_unlock(inode);
22928c2ecf20Sopenharmony_ci	goto out;
22938c2ecf20Sopenharmony_ci}
22948c2ecf20Sopenharmony_ci
22958c2ecf20Sopenharmony_cistatic const struct vm_operations_struct btrfs_file_vm_ops = {
22968c2ecf20Sopenharmony_ci	.fault		= filemap_fault,
22978c2ecf20Sopenharmony_ci	.map_pages	= filemap_map_pages,
22988c2ecf20Sopenharmony_ci	.page_mkwrite	= btrfs_page_mkwrite,
22998c2ecf20Sopenharmony_ci};
23008c2ecf20Sopenharmony_ci
23018c2ecf20Sopenharmony_cistatic int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
23028c2ecf20Sopenharmony_ci{
23038c2ecf20Sopenharmony_ci	struct address_space *mapping = filp->f_mapping;
23048c2ecf20Sopenharmony_ci
23058c2ecf20Sopenharmony_ci	if (!mapping->a_ops->readpage)
23068c2ecf20Sopenharmony_ci		return -ENOEXEC;
23078c2ecf20Sopenharmony_ci
23088c2ecf20Sopenharmony_ci	file_accessed(filp);
23098c2ecf20Sopenharmony_ci	vma->vm_ops = &btrfs_file_vm_ops;
23108c2ecf20Sopenharmony_ci
23118c2ecf20Sopenharmony_ci	return 0;
23128c2ecf20Sopenharmony_ci}
23138c2ecf20Sopenharmony_ci
23148c2ecf20Sopenharmony_cistatic int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
23158c2ecf20Sopenharmony_ci			  int slot, u64 start, u64 end)
23168c2ecf20Sopenharmony_ci{
23178c2ecf20Sopenharmony_ci	struct btrfs_file_extent_item *fi;
23188c2ecf20Sopenharmony_ci	struct btrfs_key key;
23198c2ecf20Sopenharmony_ci
23208c2ecf20Sopenharmony_ci	if (slot < 0 || slot >= btrfs_header_nritems(leaf))
23218c2ecf20Sopenharmony_ci		return 0;
23228c2ecf20Sopenharmony_ci
23238c2ecf20Sopenharmony_ci	btrfs_item_key_to_cpu(leaf, &key, slot);
23248c2ecf20Sopenharmony_ci	if (key.objectid != btrfs_ino(inode) ||
23258c2ecf20Sopenharmony_ci	    key.type != BTRFS_EXTENT_DATA_KEY)
23268c2ecf20Sopenharmony_ci		return 0;
23278c2ecf20Sopenharmony_ci
23288c2ecf20Sopenharmony_ci	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
23298c2ecf20Sopenharmony_ci
23308c2ecf20Sopenharmony_ci	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
23318c2ecf20Sopenharmony_ci		return 0;
23328c2ecf20Sopenharmony_ci
23338c2ecf20Sopenharmony_ci	if (btrfs_file_extent_disk_bytenr(leaf, fi))
23348c2ecf20Sopenharmony_ci		return 0;
23358c2ecf20Sopenharmony_ci
23368c2ecf20Sopenharmony_ci	if (key.offset == end)
23378c2ecf20Sopenharmony_ci		return 1;
23388c2ecf20Sopenharmony_ci	if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
23398c2ecf20Sopenharmony_ci		return 1;
23408c2ecf20Sopenharmony_ci	return 0;
23418c2ecf20Sopenharmony_ci}
23428c2ecf20Sopenharmony_ci
23438c2ecf20Sopenharmony_cistatic int fill_holes(struct btrfs_trans_handle *trans,
23448c2ecf20Sopenharmony_ci		struct btrfs_inode *inode,
23458c2ecf20Sopenharmony_ci		struct btrfs_path *path, u64 offset, u64 end)
23468c2ecf20Sopenharmony_ci{
23478c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
23488c2ecf20Sopenharmony_ci	struct btrfs_root *root = inode->root;
23498c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
23508c2ecf20Sopenharmony_ci	struct btrfs_file_extent_item *fi;
23518c2ecf20Sopenharmony_ci	struct extent_map *hole_em;
23528c2ecf20Sopenharmony_ci	struct extent_map_tree *em_tree = &inode->extent_tree;
23538c2ecf20Sopenharmony_ci	struct btrfs_key key;
23548c2ecf20Sopenharmony_ci	int ret;
23558c2ecf20Sopenharmony_ci
23568c2ecf20Sopenharmony_ci	if (btrfs_fs_incompat(fs_info, NO_HOLES))
23578c2ecf20Sopenharmony_ci		goto out;
23588c2ecf20Sopenharmony_ci
23598c2ecf20Sopenharmony_ci	key.objectid = btrfs_ino(inode);
23608c2ecf20Sopenharmony_ci	key.type = BTRFS_EXTENT_DATA_KEY;
23618c2ecf20Sopenharmony_ci	key.offset = offset;
23628c2ecf20Sopenharmony_ci
23638c2ecf20Sopenharmony_ci	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
23648c2ecf20Sopenharmony_ci	if (ret <= 0) {
23658c2ecf20Sopenharmony_ci		/*
23668c2ecf20Sopenharmony_ci		 * We should have dropped this offset, so if we find it then
23678c2ecf20Sopenharmony_ci		 * something has gone horribly wrong.
23688c2ecf20Sopenharmony_ci		 */
23698c2ecf20Sopenharmony_ci		if (ret == 0)
23708c2ecf20Sopenharmony_ci			ret = -EINVAL;
23718c2ecf20Sopenharmony_ci		return ret;
23728c2ecf20Sopenharmony_ci	}
23738c2ecf20Sopenharmony_ci
23748c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
23758c2ecf20Sopenharmony_ci	if (hole_mergeable(inode, leaf, path->slots[0] - 1, offset, end)) {
23768c2ecf20Sopenharmony_ci		u64 num_bytes;
23778c2ecf20Sopenharmony_ci
23788c2ecf20Sopenharmony_ci		path->slots[0]--;
23798c2ecf20Sopenharmony_ci		fi = btrfs_item_ptr(leaf, path->slots[0],
23808c2ecf20Sopenharmony_ci				    struct btrfs_file_extent_item);
23818c2ecf20Sopenharmony_ci		num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
23828c2ecf20Sopenharmony_ci			end - offset;
23838c2ecf20Sopenharmony_ci		btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
23848c2ecf20Sopenharmony_ci		btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
23858c2ecf20Sopenharmony_ci		btrfs_set_file_extent_offset(leaf, fi, 0);
23868c2ecf20Sopenharmony_ci		btrfs_mark_buffer_dirty(leaf);
23878c2ecf20Sopenharmony_ci		goto out;
23888c2ecf20Sopenharmony_ci	}
23898c2ecf20Sopenharmony_ci
23908c2ecf20Sopenharmony_ci	if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
23918c2ecf20Sopenharmony_ci		u64 num_bytes;
23928c2ecf20Sopenharmony_ci
23938c2ecf20Sopenharmony_ci		key.offset = offset;
23948c2ecf20Sopenharmony_ci		btrfs_set_item_key_safe(fs_info, path, &key);
23958c2ecf20Sopenharmony_ci		fi = btrfs_item_ptr(leaf, path->slots[0],
23968c2ecf20Sopenharmony_ci				    struct btrfs_file_extent_item);
23978c2ecf20Sopenharmony_ci		num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
23988c2ecf20Sopenharmony_ci			offset;
23998c2ecf20Sopenharmony_ci		btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
24008c2ecf20Sopenharmony_ci		btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
24018c2ecf20Sopenharmony_ci		btrfs_set_file_extent_offset(leaf, fi, 0);
24028c2ecf20Sopenharmony_ci		btrfs_mark_buffer_dirty(leaf);
24038c2ecf20Sopenharmony_ci		goto out;
24048c2ecf20Sopenharmony_ci	}
24058c2ecf20Sopenharmony_ci	btrfs_release_path(path);
24068c2ecf20Sopenharmony_ci
24078c2ecf20Sopenharmony_ci	ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
24088c2ecf20Sopenharmony_ci			offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0);
24098c2ecf20Sopenharmony_ci	if (ret)
24108c2ecf20Sopenharmony_ci		return ret;
24118c2ecf20Sopenharmony_ci
24128c2ecf20Sopenharmony_ciout:
24138c2ecf20Sopenharmony_ci	btrfs_release_path(path);
24148c2ecf20Sopenharmony_ci
24158c2ecf20Sopenharmony_ci	hole_em = alloc_extent_map();
24168c2ecf20Sopenharmony_ci	if (!hole_em) {
24178c2ecf20Sopenharmony_ci		btrfs_drop_extent_cache(inode, offset, end - 1, 0);
24188c2ecf20Sopenharmony_ci		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
24198c2ecf20Sopenharmony_ci	} else {
24208c2ecf20Sopenharmony_ci		hole_em->start = offset;
24218c2ecf20Sopenharmony_ci		hole_em->len = end - offset;
24228c2ecf20Sopenharmony_ci		hole_em->ram_bytes = hole_em->len;
24238c2ecf20Sopenharmony_ci		hole_em->orig_start = offset;
24248c2ecf20Sopenharmony_ci
24258c2ecf20Sopenharmony_ci		hole_em->block_start = EXTENT_MAP_HOLE;
24268c2ecf20Sopenharmony_ci		hole_em->block_len = 0;
24278c2ecf20Sopenharmony_ci		hole_em->orig_block_len = 0;
24288c2ecf20Sopenharmony_ci		hole_em->compress_type = BTRFS_COMPRESS_NONE;
24298c2ecf20Sopenharmony_ci		hole_em->generation = trans->transid;
24308c2ecf20Sopenharmony_ci
24318c2ecf20Sopenharmony_ci		do {
24328c2ecf20Sopenharmony_ci			btrfs_drop_extent_cache(inode, offset, end - 1, 0);
24338c2ecf20Sopenharmony_ci			write_lock(&em_tree->lock);
24348c2ecf20Sopenharmony_ci			ret = add_extent_mapping(em_tree, hole_em, 1);
24358c2ecf20Sopenharmony_ci			write_unlock(&em_tree->lock);
24368c2ecf20Sopenharmony_ci		} while (ret == -EEXIST);
24378c2ecf20Sopenharmony_ci		free_extent_map(hole_em);
24388c2ecf20Sopenharmony_ci		if (ret)
24398c2ecf20Sopenharmony_ci			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
24408c2ecf20Sopenharmony_ci					&inode->runtime_flags);
24418c2ecf20Sopenharmony_ci	}
24428c2ecf20Sopenharmony_ci
24438c2ecf20Sopenharmony_ci	return 0;
24448c2ecf20Sopenharmony_ci}
24458c2ecf20Sopenharmony_ci
24468c2ecf20Sopenharmony_ci/*
24478c2ecf20Sopenharmony_ci * Find a hole extent on given inode and change start/len to the end of hole
24488c2ecf20Sopenharmony_ci * extent.(hole/vacuum extent whose em->start <= start &&
24498c2ecf20Sopenharmony_ci *	   em->start + em->len > start)
24508c2ecf20Sopenharmony_ci * When a hole extent is found, return 1 and modify start/len.
24518c2ecf20Sopenharmony_ci */
24528c2ecf20Sopenharmony_cistatic int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
24538c2ecf20Sopenharmony_ci{
24548c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
24558c2ecf20Sopenharmony_ci	struct extent_map *em;
24568c2ecf20Sopenharmony_ci	int ret = 0;
24578c2ecf20Sopenharmony_ci
24588c2ecf20Sopenharmony_ci	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
24598c2ecf20Sopenharmony_ci			      round_down(*start, fs_info->sectorsize),
24608c2ecf20Sopenharmony_ci			      round_up(*len, fs_info->sectorsize));
24618c2ecf20Sopenharmony_ci	if (IS_ERR(em))
24628c2ecf20Sopenharmony_ci		return PTR_ERR(em);
24638c2ecf20Sopenharmony_ci
24648c2ecf20Sopenharmony_ci	/* Hole or vacuum extent(only exists in no-hole mode) */
24658c2ecf20Sopenharmony_ci	if (em->block_start == EXTENT_MAP_HOLE) {
24668c2ecf20Sopenharmony_ci		ret = 1;
24678c2ecf20Sopenharmony_ci		*len = em->start + em->len > *start + *len ?
24688c2ecf20Sopenharmony_ci		       0 : *start + *len - em->start - em->len;
24698c2ecf20Sopenharmony_ci		*start = em->start + em->len;
24708c2ecf20Sopenharmony_ci	}
24718c2ecf20Sopenharmony_ci	free_extent_map(em);
24728c2ecf20Sopenharmony_ci	return ret;
24738c2ecf20Sopenharmony_ci}
24748c2ecf20Sopenharmony_ci
24758c2ecf20Sopenharmony_cistatic int btrfs_punch_hole_lock_range(struct inode *inode,
24768c2ecf20Sopenharmony_ci				       const u64 lockstart,
24778c2ecf20Sopenharmony_ci				       const u64 lockend,
24788c2ecf20Sopenharmony_ci				       struct extent_state **cached_state)
24798c2ecf20Sopenharmony_ci{
24808c2ecf20Sopenharmony_ci	while (1) {
24818c2ecf20Sopenharmony_ci		struct btrfs_ordered_extent *ordered;
24828c2ecf20Sopenharmony_ci		int ret;
24838c2ecf20Sopenharmony_ci
24848c2ecf20Sopenharmony_ci		truncate_pagecache_range(inode, lockstart, lockend);
24858c2ecf20Sopenharmony_ci
24868c2ecf20Sopenharmony_ci		lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
24878c2ecf20Sopenharmony_ci				 cached_state);
24888c2ecf20Sopenharmony_ci		ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode),
24898c2ecf20Sopenharmony_ci							    lockend);
24908c2ecf20Sopenharmony_ci
24918c2ecf20Sopenharmony_ci		/*
24928c2ecf20Sopenharmony_ci		 * We need to make sure we have no ordered extents in this range
24938c2ecf20Sopenharmony_ci		 * and nobody raced in and read a page in this range, if we did
24948c2ecf20Sopenharmony_ci		 * we need to try again.
24958c2ecf20Sopenharmony_ci		 */
24968c2ecf20Sopenharmony_ci		if ((!ordered ||
24978c2ecf20Sopenharmony_ci		    (ordered->file_offset + ordered->num_bytes <= lockstart ||
24988c2ecf20Sopenharmony_ci		     ordered->file_offset > lockend)) &&
24998c2ecf20Sopenharmony_ci		     !filemap_range_has_page(inode->i_mapping,
25008c2ecf20Sopenharmony_ci					     lockstart, lockend)) {
25018c2ecf20Sopenharmony_ci			if (ordered)
25028c2ecf20Sopenharmony_ci				btrfs_put_ordered_extent(ordered);
25038c2ecf20Sopenharmony_ci			break;
25048c2ecf20Sopenharmony_ci		}
25058c2ecf20Sopenharmony_ci		if (ordered)
25068c2ecf20Sopenharmony_ci			btrfs_put_ordered_extent(ordered);
25078c2ecf20Sopenharmony_ci		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
25088c2ecf20Sopenharmony_ci				     lockend, cached_state);
25098c2ecf20Sopenharmony_ci		ret = btrfs_wait_ordered_range(inode, lockstart,
25108c2ecf20Sopenharmony_ci					       lockend - lockstart + 1);
25118c2ecf20Sopenharmony_ci		if (ret)
25128c2ecf20Sopenharmony_ci			return ret;
25138c2ecf20Sopenharmony_ci	}
25148c2ecf20Sopenharmony_ci	return 0;
25158c2ecf20Sopenharmony_ci}
25168c2ecf20Sopenharmony_ci
25178c2ecf20Sopenharmony_cistatic int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans,
25188c2ecf20Sopenharmony_ci				     struct inode *inode,
25198c2ecf20Sopenharmony_ci				     struct btrfs_path *path,
25208c2ecf20Sopenharmony_ci				     struct btrfs_replace_extent_info *extent_info,
25218c2ecf20Sopenharmony_ci				     const u64 replace_len)
25228c2ecf20Sopenharmony_ci{
25238c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
25248c2ecf20Sopenharmony_ci	struct btrfs_root *root = BTRFS_I(inode)->root;
25258c2ecf20Sopenharmony_ci	struct btrfs_file_extent_item *extent;
25268c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
25278c2ecf20Sopenharmony_ci	struct btrfs_key key;
25288c2ecf20Sopenharmony_ci	int slot;
25298c2ecf20Sopenharmony_ci	struct btrfs_ref ref = { 0 };
25308c2ecf20Sopenharmony_ci	int ret;
25318c2ecf20Sopenharmony_ci
25328c2ecf20Sopenharmony_ci	if (replace_len == 0)
25338c2ecf20Sopenharmony_ci		return 0;
25348c2ecf20Sopenharmony_ci
25358c2ecf20Sopenharmony_ci	if (extent_info->disk_offset == 0 &&
25368c2ecf20Sopenharmony_ci	    btrfs_fs_incompat(fs_info, NO_HOLES))
25378c2ecf20Sopenharmony_ci		return 0;
25388c2ecf20Sopenharmony_ci
25398c2ecf20Sopenharmony_ci	key.objectid = btrfs_ino(BTRFS_I(inode));
25408c2ecf20Sopenharmony_ci	key.type = BTRFS_EXTENT_DATA_KEY;
25418c2ecf20Sopenharmony_ci	key.offset = extent_info->file_offset;
25428c2ecf20Sopenharmony_ci	ret = btrfs_insert_empty_item(trans, root, path, &key,
25438c2ecf20Sopenharmony_ci				      sizeof(struct btrfs_file_extent_item));
25448c2ecf20Sopenharmony_ci	if (ret)
25458c2ecf20Sopenharmony_ci		return ret;
25468c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
25478c2ecf20Sopenharmony_ci	slot = path->slots[0];
25488c2ecf20Sopenharmony_ci	write_extent_buffer(leaf, extent_info->extent_buf,
25498c2ecf20Sopenharmony_ci			    btrfs_item_ptr_offset(leaf, slot),
25508c2ecf20Sopenharmony_ci			    sizeof(struct btrfs_file_extent_item));
25518c2ecf20Sopenharmony_ci	extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
25528c2ecf20Sopenharmony_ci	ASSERT(btrfs_file_extent_type(leaf, extent) != BTRFS_FILE_EXTENT_INLINE);
25538c2ecf20Sopenharmony_ci	btrfs_set_file_extent_offset(leaf, extent, extent_info->data_offset);
25548c2ecf20Sopenharmony_ci	btrfs_set_file_extent_num_bytes(leaf, extent, replace_len);
25558c2ecf20Sopenharmony_ci	if (extent_info->is_new_extent)
25568c2ecf20Sopenharmony_ci		btrfs_set_file_extent_generation(leaf, extent, trans->transid);
25578c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(leaf);
25588c2ecf20Sopenharmony_ci	btrfs_release_path(path);
25598c2ecf20Sopenharmony_ci
25608c2ecf20Sopenharmony_ci	ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
25618c2ecf20Sopenharmony_ci			extent_info->file_offset, replace_len);
25628c2ecf20Sopenharmony_ci	if (ret)
25638c2ecf20Sopenharmony_ci		return ret;
25648c2ecf20Sopenharmony_ci
25658c2ecf20Sopenharmony_ci	/* If it's a hole, nothing more needs to be done. */
25668c2ecf20Sopenharmony_ci	if (extent_info->disk_offset == 0)
25678c2ecf20Sopenharmony_ci		return 0;
25688c2ecf20Sopenharmony_ci
25698c2ecf20Sopenharmony_ci	inode_add_bytes(inode, replace_len);
25708c2ecf20Sopenharmony_ci
25718c2ecf20Sopenharmony_ci	if (extent_info->is_new_extent && extent_info->insertions == 0) {
25728c2ecf20Sopenharmony_ci		key.objectid = extent_info->disk_offset;
25738c2ecf20Sopenharmony_ci		key.type = BTRFS_EXTENT_ITEM_KEY;
25748c2ecf20Sopenharmony_ci		key.offset = extent_info->disk_len;
25758c2ecf20Sopenharmony_ci		ret = btrfs_alloc_reserved_file_extent(trans, root,
25768c2ecf20Sopenharmony_ci						       btrfs_ino(BTRFS_I(inode)),
25778c2ecf20Sopenharmony_ci						       extent_info->file_offset,
25788c2ecf20Sopenharmony_ci						       extent_info->qgroup_reserved,
25798c2ecf20Sopenharmony_ci						       &key);
25808c2ecf20Sopenharmony_ci	} else {
25818c2ecf20Sopenharmony_ci		u64 ref_offset;
25828c2ecf20Sopenharmony_ci
25838c2ecf20Sopenharmony_ci		btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
25848c2ecf20Sopenharmony_ci				       extent_info->disk_offset,
25858c2ecf20Sopenharmony_ci				       extent_info->disk_len, 0);
25868c2ecf20Sopenharmony_ci		ref_offset = extent_info->file_offset - extent_info->data_offset;
25878c2ecf20Sopenharmony_ci		btrfs_init_data_ref(&ref, root->root_key.objectid,
25888c2ecf20Sopenharmony_ci				    btrfs_ino(BTRFS_I(inode)), ref_offset);
25898c2ecf20Sopenharmony_ci		ret = btrfs_inc_extent_ref(trans, &ref);
25908c2ecf20Sopenharmony_ci	}
25918c2ecf20Sopenharmony_ci
25928c2ecf20Sopenharmony_ci	extent_info->insertions++;
25938c2ecf20Sopenharmony_ci
25948c2ecf20Sopenharmony_ci	return ret;
25958c2ecf20Sopenharmony_ci}
25968c2ecf20Sopenharmony_ci
25978c2ecf20Sopenharmony_ci/*
25988c2ecf20Sopenharmony_ci * The respective range must have been previously locked, as well as the inode.
25998c2ecf20Sopenharmony_ci * The end offset is inclusive (last byte of the range).
26008c2ecf20Sopenharmony_ci * @extent_info is NULL for fallocate's hole punching and non-NULL when replacing
26018c2ecf20Sopenharmony_ci * the file range with an extent.
26028c2ecf20Sopenharmony_ci * When not punching a hole, we don't want to end up in a state where we dropped
26038c2ecf20Sopenharmony_ci * extents without inserting a new one, so we must abort the transaction to avoid
26048c2ecf20Sopenharmony_ci * a corruption.
26058c2ecf20Sopenharmony_ci */
26068c2ecf20Sopenharmony_ciint btrfs_replace_file_extents(struct inode *inode, struct btrfs_path *path,
26078c2ecf20Sopenharmony_ci			   const u64 start, const u64 end,
26088c2ecf20Sopenharmony_ci			   struct btrfs_replace_extent_info *extent_info,
26098c2ecf20Sopenharmony_ci			   struct btrfs_trans_handle **trans_out)
26108c2ecf20Sopenharmony_ci{
26118c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
26128c2ecf20Sopenharmony_ci	u64 min_size = btrfs_calc_insert_metadata_size(fs_info, 1);
26138c2ecf20Sopenharmony_ci	u64 ino_size = round_up(inode->i_size, fs_info->sectorsize);
26148c2ecf20Sopenharmony_ci	struct btrfs_root *root = BTRFS_I(inode)->root;
26158c2ecf20Sopenharmony_ci	struct btrfs_trans_handle *trans = NULL;
26168c2ecf20Sopenharmony_ci	struct btrfs_block_rsv *rsv;
26178c2ecf20Sopenharmony_ci	unsigned int rsv_count;
26188c2ecf20Sopenharmony_ci	u64 cur_offset;
26198c2ecf20Sopenharmony_ci	u64 drop_end;
26208c2ecf20Sopenharmony_ci	u64 len = end - start;
26218c2ecf20Sopenharmony_ci	int ret = 0;
26228c2ecf20Sopenharmony_ci
26238c2ecf20Sopenharmony_ci	if (end <= start)
26248c2ecf20Sopenharmony_ci		return -EINVAL;
26258c2ecf20Sopenharmony_ci
26268c2ecf20Sopenharmony_ci	rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
26278c2ecf20Sopenharmony_ci	if (!rsv) {
26288c2ecf20Sopenharmony_ci		ret = -ENOMEM;
26298c2ecf20Sopenharmony_ci		goto out;
26308c2ecf20Sopenharmony_ci	}
26318c2ecf20Sopenharmony_ci	rsv->size = btrfs_calc_insert_metadata_size(fs_info, 1);
26328c2ecf20Sopenharmony_ci	rsv->failfast = 1;
26338c2ecf20Sopenharmony_ci
26348c2ecf20Sopenharmony_ci	/*
26358c2ecf20Sopenharmony_ci	 * 1 - update the inode
26368c2ecf20Sopenharmony_ci	 * 1 - removing the extents in the range
26378c2ecf20Sopenharmony_ci	 * 1 - adding the hole extent if no_holes isn't set or if we are
26388c2ecf20Sopenharmony_ci	 *     replacing the range with a new extent
26398c2ecf20Sopenharmony_ci	 */
26408c2ecf20Sopenharmony_ci	if (!btrfs_fs_incompat(fs_info, NO_HOLES) || extent_info)
26418c2ecf20Sopenharmony_ci		rsv_count = 3;
26428c2ecf20Sopenharmony_ci	else
26438c2ecf20Sopenharmony_ci		rsv_count = 2;
26448c2ecf20Sopenharmony_ci
26458c2ecf20Sopenharmony_ci	trans = btrfs_start_transaction(root, rsv_count);
26468c2ecf20Sopenharmony_ci	if (IS_ERR(trans)) {
26478c2ecf20Sopenharmony_ci		ret = PTR_ERR(trans);
26488c2ecf20Sopenharmony_ci		trans = NULL;
26498c2ecf20Sopenharmony_ci		goto out_free;
26508c2ecf20Sopenharmony_ci	}
26518c2ecf20Sopenharmony_ci
26528c2ecf20Sopenharmony_ci	ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
26538c2ecf20Sopenharmony_ci				      min_size, false);
26548c2ecf20Sopenharmony_ci	BUG_ON(ret);
26558c2ecf20Sopenharmony_ci	trans->block_rsv = rsv;
26568c2ecf20Sopenharmony_ci
26578c2ecf20Sopenharmony_ci	cur_offset = start;
26588c2ecf20Sopenharmony_ci	while (cur_offset < end) {
26598c2ecf20Sopenharmony_ci		ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path,
26608c2ecf20Sopenharmony_ci					   cur_offset, end + 1, &drop_end,
26618c2ecf20Sopenharmony_ci					   1, 0, 0, NULL);
26628c2ecf20Sopenharmony_ci		if (ret != -ENOSPC) {
26638c2ecf20Sopenharmony_ci			/*
26648c2ecf20Sopenharmony_ci			 * The only time we don't want to abort is if we are
26658c2ecf20Sopenharmony_ci			 * attempting to clone a partial inline extent, in which
26668c2ecf20Sopenharmony_ci			 * case we'll get EOPNOTSUPP.  However if we aren't
26678c2ecf20Sopenharmony_ci			 * clone we need to abort no matter what, because if we
26688c2ecf20Sopenharmony_ci			 * got EOPNOTSUPP via prealloc then we messed up and
26698c2ecf20Sopenharmony_ci			 * need to abort.
26708c2ecf20Sopenharmony_ci			 */
26718c2ecf20Sopenharmony_ci			if (ret &&
26728c2ecf20Sopenharmony_ci			    (ret != -EOPNOTSUPP ||
26738c2ecf20Sopenharmony_ci			     (extent_info && extent_info->is_new_extent)))
26748c2ecf20Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
26758c2ecf20Sopenharmony_ci			break;
26768c2ecf20Sopenharmony_ci		}
26778c2ecf20Sopenharmony_ci
26788c2ecf20Sopenharmony_ci		trans->block_rsv = &fs_info->trans_block_rsv;
26798c2ecf20Sopenharmony_ci
26808c2ecf20Sopenharmony_ci		if (!extent_info && cur_offset < drop_end &&
26818c2ecf20Sopenharmony_ci		    cur_offset < ino_size) {
26828c2ecf20Sopenharmony_ci			ret = fill_holes(trans, BTRFS_I(inode), path,
26838c2ecf20Sopenharmony_ci					cur_offset, drop_end);
26848c2ecf20Sopenharmony_ci			if (ret) {
26858c2ecf20Sopenharmony_ci				/*
26868c2ecf20Sopenharmony_ci				 * If we failed then we didn't insert our hole
26878c2ecf20Sopenharmony_ci				 * entries for the area we dropped, so now the
26888c2ecf20Sopenharmony_ci				 * fs is corrupted, so we must abort the
26898c2ecf20Sopenharmony_ci				 * transaction.
26908c2ecf20Sopenharmony_ci				 */
26918c2ecf20Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
26928c2ecf20Sopenharmony_ci				break;
26938c2ecf20Sopenharmony_ci			}
26948c2ecf20Sopenharmony_ci		} else if (!extent_info && cur_offset < drop_end) {
26958c2ecf20Sopenharmony_ci			/*
26968c2ecf20Sopenharmony_ci			 * We are past the i_size here, but since we didn't
26978c2ecf20Sopenharmony_ci			 * insert holes we need to clear the mapped area so we
26988c2ecf20Sopenharmony_ci			 * know to not set disk_i_size in this area until a new
26998c2ecf20Sopenharmony_ci			 * file extent is inserted here.
27008c2ecf20Sopenharmony_ci			 */
27018c2ecf20Sopenharmony_ci			ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
27028c2ecf20Sopenharmony_ci					cur_offset, drop_end - cur_offset);
27038c2ecf20Sopenharmony_ci			if (ret) {
27048c2ecf20Sopenharmony_ci				/*
27058c2ecf20Sopenharmony_ci				 * We couldn't clear our area, so we could
27068c2ecf20Sopenharmony_ci				 * presumably adjust up and corrupt the fs, so
27078c2ecf20Sopenharmony_ci				 * we need to abort.
27088c2ecf20Sopenharmony_ci				 */
27098c2ecf20Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
27108c2ecf20Sopenharmony_ci				break;
27118c2ecf20Sopenharmony_ci			}
27128c2ecf20Sopenharmony_ci		}
27138c2ecf20Sopenharmony_ci
27148c2ecf20Sopenharmony_ci		if (extent_info && drop_end > extent_info->file_offset) {
27158c2ecf20Sopenharmony_ci			u64 replace_len = drop_end - extent_info->file_offset;
27168c2ecf20Sopenharmony_ci
27178c2ecf20Sopenharmony_ci			ret = btrfs_insert_replace_extent(trans, inode, path,
27188c2ecf20Sopenharmony_ci							extent_info, replace_len);
27198c2ecf20Sopenharmony_ci			if (ret) {
27208c2ecf20Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
27218c2ecf20Sopenharmony_ci				break;
27228c2ecf20Sopenharmony_ci			}
27238c2ecf20Sopenharmony_ci			extent_info->data_len -= replace_len;
27248c2ecf20Sopenharmony_ci			extent_info->data_offset += replace_len;
27258c2ecf20Sopenharmony_ci			extent_info->file_offset += replace_len;
27268c2ecf20Sopenharmony_ci		}
27278c2ecf20Sopenharmony_ci
27288c2ecf20Sopenharmony_ci		cur_offset = drop_end;
27298c2ecf20Sopenharmony_ci
27308c2ecf20Sopenharmony_ci		ret = btrfs_update_inode(trans, root, inode);
27318c2ecf20Sopenharmony_ci		if (ret)
27328c2ecf20Sopenharmony_ci			break;
27338c2ecf20Sopenharmony_ci
27348c2ecf20Sopenharmony_ci		btrfs_end_transaction(trans);
27358c2ecf20Sopenharmony_ci		btrfs_btree_balance_dirty(fs_info);
27368c2ecf20Sopenharmony_ci
27378c2ecf20Sopenharmony_ci		trans = btrfs_start_transaction(root, rsv_count);
27388c2ecf20Sopenharmony_ci		if (IS_ERR(trans)) {
27398c2ecf20Sopenharmony_ci			ret = PTR_ERR(trans);
27408c2ecf20Sopenharmony_ci			trans = NULL;
27418c2ecf20Sopenharmony_ci			break;
27428c2ecf20Sopenharmony_ci		}
27438c2ecf20Sopenharmony_ci
27448c2ecf20Sopenharmony_ci		ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
27458c2ecf20Sopenharmony_ci					      rsv, min_size, false);
27468c2ecf20Sopenharmony_ci		BUG_ON(ret);	/* shouldn't happen */
27478c2ecf20Sopenharmony_ci		trans->block_rsv = rsv;
27488c2ecf20Sopenharmony_ci
27498c2ecf20Sopenharmony_ci		if (!extent_info) {
27508c2ecf20Sopenharmony_ci			ret = find_first_non_hole(inode, &cur_offset, &len);
27518c2ecf20Sopenharmony_ci			if (unlikely(ret < 0))
27528c2ecf20Sopenharmony_ci				break;
27538c2ecf20Sopenharmony_ci			if (ret && !len) {
27548c2ecf20Sopenharmony_ci				ret = 0;
27558c2ecf20Sopenharmony_ci				break;
27568c2ecf20Sopenharmony_ci			}
27578c2ecf20Sopenharmony_ci		}
27588c2ecf20Sopenharmony_ci	}
27598c2ecf20Sopenharmony_ci
27608c2ecf20Sopenharmony_ci	/*
27618c2ecf20Sopenharmony_ci	 * If we were cloning, force the next fsync to be a full one since we
27628c2ecf20Sopenharmony_ci	 * we replaced (or just dropped in the case of cloning holes when
27638c2ecf20Sopenharmony_ci	 * NO_HOLES is enabled) extents and extent maps.
27648c2ecf20Sopenharmony_ci	 * This is for the sake of simplicity, and cloning into files larger
27658c2ecf20Sopenharmony_ci	 * than 16Mb would force the full fsync any way (when
27668c2ecf20Sopenharmony_ci	 * try_release_extent_mapping() is invoked during page cache truncation.
27678c2ecf20Sopenharmony_ci	 */
27688c2ecf20Sopenharmony_ci	if (extent_info && !extent_info->is_new_extent)
27698c2ecf20Sopenharmony_ci		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
27708c2ecf20Sopenharmony_ci			&BTRFS_I(inode)->runtime_flags);
27718c2ecf20Sopenharmony_ci
27728c2ecf20Sopenharmony_ci	if (ret)
27738c2ecf20Sopenharmony_ci		goto out_trans;
27748c2ecf20Sopenharmony_ci
27758c2ecf20Sopenharmony_ci	trans->block_rsv = &fs_info->trans_block_rsv;
27768c2ecf20Sopenharmony_ci	/*
27778c2ecf20Sopenharmony_ci	 * If we are using the NO_HOLES feature we might have had already an
27788c2ecf20Sopenharmony_ci	 * hole that overlaps a part of the region [lockstart, lockend] and
27798c2ecf20Sopenharmony_ci	 * ends at (or beyond) lockend. Since we have no file extent items to
27808c2ecf20Sopenharmony_ci	 * represent holes, drop_end can be less than lockend and so we must
27818c2ecf20Sopenharmony_ci	 * make sure we have an extent map representing the existing hole (the
27828c2ecf20Sopenharmony_ci	 * call to __btrfs_drop_extents() might have dropped the existing extent
27838c2ecf20Sopenharmony_ci	 * map representing the existing hole), otherwise the fast fsync path
27848c2ecf20Sopenharmony_ci	 * will not record the existence of the hole region
27858c2ecf20Sopenharmony_ci	 * [existing_hole_start, lockend].
27868c2ecf20Sopenharmony_ci	 */
27878c2ecf20Sopenharmony_ci	if (drop_end <= end)
27888c2ecf20Sopenharmony_ci		drop_end = end + 1;
27898c2ecf20Sopenharmony_ci	/*
27908c2ecf20Sopenharmony_ci	 * Don't insert file hole extent item if it's for a range beyond eof
27918c2ecf20Sopenharmony_ci	 * (because it's useless) or if it represents a 0 bytes range (when
27928c2ecf20Sopenharmony_ci	 * cur_offset == drop_end).
27938c2ecf20Sopenharmony_ci	 */
27948c2ecf20Sopenharmony_ci	if (!extent_info && cur_offset < ino_size && cur_offset < drop_end) {
27958c2ecf20Sopenharmony_ci		ret = fill_holes(trans, BTRFS_I(inode), path,
27968c2ecf20Sopenharmony_ci				cur_offset, drop_end);
27978c2ecf20Sopenharmony_ci		if (ret) {
27988c2ecf20Sopenharmony_ci			/* Same comment as above. */
27998c2ecf20Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
28008c2ecf20Sopenharmony_ci			goto out_trans;
28018c2ecf20Sopenharmony_ci		}
28028c2ecf20Sopenharmony_ci	} else if (!extent_info && cur_offset < drop_end) {
28038c2ecf20Sopenharmony_ci		/* See the comment in the loop above for the reasoning here. */
28048c2ecf20Sopenharmony_ci		ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
28058c2ecf20Sopenharmony_ci					cur_offset, drop_end - cur_offset);
28068c2ecf20Sopenharmony_ci		if (ret) {
28078c2ecf20Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
28088c2ecf20Sopenharmony_ci			goto out_trans;
28098c2ecf20Sopenharmony_ci		}
28108c2ecf20Sopenharmony_ci
28118c2ecf20Sopenharmony_ci	}
28128c2ecf20Sopenharmony_ci	if (extent_info) {
28138c2ecf20Sopenharmony_ci		ret = btrfs_insert_replace_extent(trans, inode, path, extent_info,
28148c2ecf20Sopenharmony_ci						extent_info->data_len);
28158c2ecf20Sopenharmony_ci		if (ret) {
28168c2ecf20Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
28178c2ecf20Sopenharmony_ci			goto out_trans;
28188c2ecf20Sopenharmony_ci		}
28198c2ecf20Sopenharmony_ci	}
28208c2ecf20Sopenharmony_ci
28218c2ecf20Sopenharmony_ciout_trans:
28228c2ecf20Sopenharmony_ci	if (!trans)
28238c2ecf20Sopenharmony_ci		goto out_free;
28248c2ecf20Sopenharmony_ci
28258c2ecf20Sopenharmony_ci	trans->block_rsv = &fs_info->trans_block_rsv;
28268c2ecf20Sopenharmony_ci	if (ret)
28278c2ecf20Sopenharmony_ci		btrfs_end_transaction(trans);
28288c2ecf20Sopenharmony_ci	else
28298c2ecf20Sopenharmony_ci		*trans_out = trans;
28308c2ecf20Sopenharmony_ciout_free:
28318c2ecf20Sopenharmony_ci	btrfs_free_block_rsv(fs_info, rsv);
28328c2ecf20Sopenharmony_ciout:
28338c2ecf20Sopenharmony_ci	return ret;
28348c2ecf20Sopenharmony_ci}
28358c2ecf20Sopenharmony_ci
28368c2ecf20Sopenharmony_cistatic int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
28378c2ecf20Sopenharmony_ci{
28388c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
28398c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
28408c2ecf20Sopenharmony_ci	struct btrfs_root *root = BTRFS_I(inode)->root;
28418c2ecf20Sopenharmony_ci	struct extent_state *cached_state = NULL;
28428c2ecf20Sopenharmony_ci	struct btrfs_path *path;
28438c2ecf20Sopenharmony_ci	struct btrfs_trans_handle *trans = NULL;
28448c2ecf20Sopenharmony_ci	u64 lockstart;
28458c2ecf20Sopenharmony_ci	u64 lockend;
28468c2ecf20Sopenharmony_ci	u64 tail_start;
28478c2ecf20Sopenharmony_ci	u64 tail_len;
28488c2ecf20Sopenharmony_ci	u64 orig_start = offset;
28498c2ecf20Sopenharmony_ci	int ret = 0;
28508c2ecf20Sopenharmony_ci	bool same_block;
28518c2ecf20Sopenharmony_ci	u64 ino_size;
28528c2ecf20Sopenharmony_ci	bool truncated_block = false;
28538c2ecf20Sopenharmony_ci	bool updated_inode = false;
28548c2ecf20Sopenharmony_ci
28558c2ecf20Sopenharmony_ci	ret = btrfs_wait_ordered_range(inode, offset, len);
28568c2ecf20Sopenharmony_ci	if (ret)
28578c2ecf20Sopenharmony_ci		return ret;
28588c2ecf20Sopenharmony_ci
28598c2ecf20Sopenharmony_ci	inode_lock(inode);
28608c2ecf20Sopenharmony_ci	ino_size = round_up(inode->i_size, fs_info->sectorsize);
28618c2ecf20Sopenharmony_ci	ret = find_first_non_hole(inode, &offset, &len);
28628c2ecf20Sopenharmony_ci	if (ret < 0)
28638c2ecf20Sopenharmony_ci		goto out_only_mutex;
28648c2ecf20Sopenharmony_ci	if (ret && !len) {
28658c2ecf20Sopenharmony_ci		/* Already in a large hole */
28668c2ecf20Sopenharmony_ci		ret = 0;
28678c2ecf20Sopenharmony_ci		goto out_only_mutex;
28688c2ecf20Sopenharmony_ci	}
28698c2ecf20Sopenharmony_ci
28708c2ecf20Sopenharmony_ci	ret = file_modified(file);
28718c2ecf20Sopenharmony_ci	if (ret)
28728c2ecf20Sopenharmony_ci		goto out_only_mutex;
28738c2ecf20Sopenharmony_ci
28748c2ecf20Sopenharmony_ci	lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode)));
28758c2ecf20Sopenharmony_ci	lockend = round_down(offset + len,
28768c2ecf20Sopenharmony_ci			     btrfs_inode_sectorsize(BTRFS_I(inode))) - 1;
28778c2ecf20Sopenharmony_ci	same_block = (BTRFS_BYTES_TO_BLKS(fs_info, offset))
28788c2ecf20Sopenharmony_ci		== (BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1));
28798c2ecf20Sopenharmony_ci	/*
28808c2ecf20Sopenharmony_ci	 * We needn't truncate any block which is beyond the end of the file
28818c2ecf20Sopenharmony_ci	 * because we are sure there is no data there.
28828c2ecf20Sopenharmony_ci	 */
28838c2ecf20Sopenharmony_ci	/*
28848c2ecf20Sopenharmony_ci	 * Only do this if we are in the same block and we aren't doing the
28858c2ecf20Sopenharmony_ci	 * entire block.
28868c2ecf20Sopenharmony_ci	 */
28878c2ecf20Sopenharmony_ci	if (same_block && len < fs_info->sectorsize) {
28888c2ecf20Sopenharmony_ci		if (offset < ino_size) {
28898c2ecf20Sopenharmony_ci			truncated_block = true;
28908c2ecf20Sopenharmony_ci			ret = btrfs_truncate_block(inode, offset, len, 0);
28918c2ecf20Sopenharmony_ci		} else {
28928c2ecf20Sopenharmony_ci			ret = 0;
28938c2ecf20Sopenharmony_ci		}
28948c2ecf20Sopenharmony_ci		goto out_only_mutex;
28958c2ecf20Sopenharmony_ci	}
28968c2ecf20Sopenharmony_ci
28978c2ecf20Sopenharmony_ci	/* zero back part of the first block */
28988c2ecf20Sopenharmony_ci	if (offset < ino_size) {
28998c2ecf20Sopenharmony_ci		truncated_block = true;
29008c2ecf20Sopenharmony_ci		ret = btrfs_truncate_block(inode, offset, 0, 0);
29018c2ecf20Sopenharmony_ci		if (ret) {
29028c2ecf20Sopenharmony_ci			inode_unlock(inode);
29038c2ecf20Sopenharmony_ci			return ret;
29048c2ecf20Sopenharmony_ci		}
29058c2ecf20Sopenharmony_ci	}
29068c2ecf20Sopenharmony_ci
29078c2ecf20Sopenharmony_ci	/* Check the aligned pages after the first unaligned page,
29088c2ecf20Sopenharmony_ci	 * if offset != orig_start, which means the first unaligned page
29098c2ecf20Sopenharmony_ci	 * including several following pages are already in holes,
29108c2ecf20Sopenharmony_ci	 * the extra check can be skipped */
29118c2ecf20Sopenharmony_ci	if (offset == orig_start) {
29128c2ecf20Sopenharmony_ci		/* after truncate page, check hole again */
29138c2ecf20Sopenharmony_ci		len = offset + len - lockstart;
29148c2ecf20Sopenharmony_ci		offset = lockstart;
29158c2ecf20Sopenharmony_ci		ret = find_first_non_hole(inode, &offset, &len);
29168c2ecf20Sopenharmony_ci		if (ret < 0)
29178c2ecf20Sopenharmony_ci			goto out_only_mutex;
29188c2ecf20Sopenharmony_ci		if (ret && !len) {
29198c2ecf20Sopenharmony_ci			ret = 0;
29208c2ecf20Sopenharmony_ci			goto out_only_mutex;
29218c2ecf20Sopenharmony_ci		}
29228c2ecf20Sopenharmony_ci		lockstart = offset;
29238c2ecf20Sopenharmony_ci	}
29248c2ecf20Sopenharmony_ci
29258c2ecf20Sopenharmony_ci	/* Check the tail unaligned part is in a hole */
29268c2ecf20Sopenharmony_ci	tail_start = lockend + 1;
29278c2ecf20Sopenharmony_ci	tail_len = offset + len - tail_start;
29288c2ecf20Sopenharmony_ci	if (tail_len) {
29298c2ecf20Sopenharmony_ci		ret = find_first_non_hole(inode, &tail_start, &tail_len);
29308c2ecf20Sopenharmony_ci		if (unlikely(ret < 0))
29318c2ecf20Sopenharmony_ci			goto out_only_mutex;
29328c2ecf20Sopenharmony_ci		if (!ret) {
29338c2ecf20Sopenharmony_ci			/* zero the front end of the last page */
29348c2ecf20Sopenharmony_ci			if (tail_start + tail_len < ino_size) {
29358c2ecf20Sopenharmony_ci				truncated_block = true;
29368c2ecf20Sopenharmony_ci				ret = btrfs_truncate_block(inode,
29378c2ecf20Sopenharmony_ci							tail_start + tail_len,
29388c2ecf20Sopenharmony_ci							0, 1);
29398c2ecf20Sopenharmony_ci				if (ret)
29408c2ecf20Sopenharmony_ci					goto out_only_mutex;
29418c2ecf20Sopenharmony_ci			}
29428c2ecf20Sopenharmony_ci		}
29438c2ecf20Sopenharmony_ci	}
29448c2ecf20Sopenharmony_ci
29458c2ecf20Sopenharmony_ci	if (lockend < lockstart) {
29468c2ecf20Sopenharmony_ci		ret = 0;
29478c2ecf20Sopenharmony_ci		goto out_only_mutex;
29488c2ecf20Sopenharmony_ci	}
29498c2ecf20Sopenharmony_ci
29508c2ecf20Sopenharmony_ci	ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
29518c2ecf20Sopenharmony_ci					  &cached_state);
29528c2ecf20Sopenharmony_ci	if (ret)
29538c2ecf20Sopenharmony_ci		goto out_only_mutex;
29548c2ecf20Sopenharmony_ci
29558c2ecf20Sopenharmony_ci	path = btrfs_alloc_path();
29568c2ecf20Sopenharmony_ci	if (!path) {
29578c2ecf20Sopenharmony_ci		ret = -ENOMEM;
29588c2ecf20Sopenharmony_ci		goto out;
29598c2ecf20Sopenharmony_ci	}
29608c2ecf20Sopenharmony_ci
29618c2ecf20Sopenharmony_ci	ret = btrfs_replace_file_extents(inode, path, lockstart, lockend, NULL,
29628c2ecf20Sopenharmony_ci				     &trans);
29638c2ecf20Sopenharmony_ci	btrfs_free_path(path);
29648c2ecf20Sopenharmony_ci	if (ret)
29658c2ecf20Sopenharmony_ci		goto out;
29668c2ecf20Sopenharmony_ci
29678c2ecf20Sopenharmony_ci	ASSERT(trans != NULL);
29688c2ecf20Sopenharmony_ci	inode_inc_iversion(inode);
29698c2ecf20Sopenharmony_ci	inode->i_mtime = inode->i_ctime = current_time(inode);
29708c2ecf20Sopenharmony_ci	ret = btrfs_update_inode(trans, root, inode);
29718c2ecf20Sopenharmony_ci	updated_inode = true;
29728c2ecf20Sopenharmony_ci	btrfs_end_transaction(trans);
29738c2ecf20Sopenharmony_ci	btrfs_btree_balance_dirty(fs_info);
29748c2ecf20Sopenharmony_ciout:
29758c2ecf20Sopenharmony_ci	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
29768c2ecf20Sopenharmony_ci			     &cached_state);
29778c2ecf20Sopenharmony_ciout_only_mutex:
29788c2ecf20Sopenharmony_ci	if (!updated_inode && truncated_block && !ret) {
29798c2ecf20Sopenharmony_ci		/*
29808c2ecf20Sopenharmony_ci		 * If we only end up zeroing part of a page, we still need to
29818c2ecf20Sopenharmony_ci		 * update the inode item, so that all the time fields are
29828c2ecf20Sopenharmony_ci		 * updated as well as the necessary btrfs inode in memory fields
29838c2ecf20Sopenharmony_ci		 * for detecting, at fsync time, if the inode isn't yet in the
29848c2ecf20Sopenharmony_ci		 * log tree or it's there but not up to date.
29858c2ecf20Sopenharmony_ci		 */
29868c2ecf20Sopenharmony_ci		struct timespec64 now = current_time(inode);
29878c2ecf20Sopenharmony_ci
29888c2ecf20Sopenharmony_ci		inode_inc_iversion(inode);
29898c2ecf20Sopenharmony_ci		inode->i_mtime = now;
29908c2ecf20Sopenharmony_ci		inode->i_ctime = now;
29918c2ecf20Sopenharmony_ci		trans = btrfs_start_transaction(root, 1);
29928c2ecf20Sopenharmony_ci		if (IS_ERR(trans)) {
29938c2ecf20Sopenharmony_ci			ret = PTR_ERR(trans);
29948c2ecf20Sopenharmony_ci		} else {
29958c2ecf20Sopenharmony_ci			int ret2;
29968c2ecf20Sopenharmony_ci
29978c2ecf20Sopenharmony_ci			ret = btrfs_update_inode(trans, root, inode);
29988c2ecf20Sopenharmony_ci			ret2 = btrfs_end_transaction(trans);
29998c2ecf20Sopenharmony_ci			if (!ret)
30008c2ecf20Sopenharmony_ci				ret = ret2;
30018c2ecf20Sopenharmony_ci		}
30028c2ecf20Sopenharmony_ci	}
30038c2ecf20Sopenharmony_ci	inode_unlock(inode);
30048c2ecf20Sopenharmony_ci	return ret;
30058c2ecf20Sopenharmony_ci}
30068c2ecf20Sopenharmony_ci
30078c2ecf20Sopenharmony_ci/* Helper structure to record which range is already reserved */
30088c2ecf20Sopenharmony_cistruct falloc_range {
30098c2ecf20Sopenharmony_ci	struct list_head list;
30108c2ecf20Sopenharmony_ci	u64 start;
30118c2ecf20Sopenharmony_ci	u64 len;
30128c2ecf20Sopenharmony_ci};
30138c2ecf20Sopenharmony_ci
30148c2ecf20Sopenharmony_ci/*
30158c2ecf20Sopenharmony_ci * Helper function to add falloc range
30168c2ecf20Sopenharmony_ci *
30178c2ecf20Sopenharmony_ci * Caller should have locked the larger range of extent containing
30188c2ecf20Sopenharmony_ci * [start, len)
30198c2ecf20Sopenharmony_ci */
30208c2ecf20Sopenharmony_cistatic int add_falloc_range(struct list_head *head, u64 start, u64 len)
30218c2ecf20Sopenharmony_ci{
30228c2ecf20Sopenharmony_ci	struct falloc_range *prev = NULL;
30238c2ecf20Sopenharmony_ci	struct falloc_range *range = NULL;
30248c2ecf20Sopenharmony_ci
30258c2ecf20Sopenharmony_ci	if (list_empty(head))
30268c2ecf20Sopenharmony_ci		goto insert;
30278c2ecf20Sopenharmony_ci
30288c2ecf20Sopenharmony_ci	/*
30298c2ecf20Sopenharmony_ci	 * As fallocate iterate by bytenr order, we only need to check
30308c2ecf20Sopenharmony_ci	 * the last range.
30318c2ecf20Sopenharmony_ci	 */
30328c2ecf20Sopenharmony_ci	prev = list_entry(head->prev, struct falloc_range, list);
30338c2ecf20Sopenharmony_ci	if (prev->start + prev->len == start) {
30348c2ecf20Sopenharmony_ci		prev->len += len;
30358c2ecf20Sopenharmony_ci		return 0;
30368c2ecf20Sopenharmony_ci	}
30378c2ecf20Sopenharmony_ciinsert:
30388c2ecf20Sopenharmony_ci	range = kmalloc(sizeof(*range), GFP_KERNEL);
30398c2ecf20Sopenharmony_ci	if (!range)
30408c2ecf20Sopenharmony_ci		return -ENOMEM;
30418c2ecf20Sopenharmony_ci	range->start = start;
30428c2ecf20Sopenharmony_ci	range->len = len;
30438c2ecf20Sopenharmony_ci	list_add_tail(&range->list, head);
30448c2ecf20Sopenharmony_ci	return 0;
30458c2ecf20Sopenharmony_ci}
30468c2ecf20Sopenharmony_ci
30478c2ecf20Sopenharmony_cistatic int btrfs_fallocate_update_isize(struct inode *inode,
30488c2ecf20Sopenharmony_ci					const u64 end,
30498c2ecf20Sopenharmony_ci					const int mode)
30508c2ecf20Sopenharmony_ci{
30518c2ecf20Sopenharmony_ci	struct btrfs_trans_handle *trans;
30528c2ecf20Sopenharmony_ci	struct btrfs_root *root = BTRFS_I(inode)->root;
30538c2ecf20Sopenharmony_ci	int ret;
30548c2ecf20Sopenharmony_ci	int ret2;
30558c2ecf20Sopenharmony_ci
30568c2ecf20Sopenharmony_ci	if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode))
30578c2ecf20Sopenharmony_ci		return 0;
30588c2ecf20Sopenharmony_ci
30598c2ecf20Sopenharmony_ci	trans = btrfs_start_transaction(root, 1);
30608c2ecf20Sopenharmony_ci	if (IS_ERR(trans))
30618c2ecf20Sopenharmony_ci		return PTR_ERR(trans);
30628c2ecf20Sopenharmony_ci
30638c2ecf20Sopenharmony_ci	inode->i_ctime = current_time(inode);
30648c2ecf20Sopenharmony_ci	i_size_write(inode, end);
30658c2ecf20Sopenharmony_ci	btrfs_inode_safe_disk_i_size_write(inode, 0);
30668c2ecf20Sopenharmony_ci	ret = btrfs_update_inode(trans, root, inode);
30678c2ecf20Sopenharmony_ci	ret2 = btrfs_end_transaction(trans);
30688c2ecf20Sopenharmony_ci
30698c2ecf20Sopenharmony_ci	return ret ? ret : ret2;
30708c2ecf20Sopenharmony_ci}
30718c2ecf20Sopenharmony_ci
30728c2ecf20Sopenharmony_cienum {
30738c2ecf20Sopenharmony_ci	RANGE_BOUNDARY_WRITTEN_EXTENT,
30748c2ecf20Sopenharmony_ci	RANGE_BOUNDARY_PREALLOC_EXTENT,
30758c2ecf20Sopenharmony_ci	RANGE_BOUNDARY_HOLE,
30768c2ecf20Sopenharmony_ci};
30778c2ecf20Sopenharmony_ci
30788c2ecf20Sopenharmony_cistatic int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode,
30798c2ecf20Sopenharmony_ci						 u64 offset)
30808c2ecf20Sopenharmony_ci{
30818c2ecf20Sopenharmony_ci	const u64 sectorsize = btrfs_inode_sectorsize(inode);
30828c2ecf20Sopenharmony_ci	struct extent_map *em;
30838c2ecf20Sopenharmony_ci	int ret;
30848c2ecf20Sopenharmony_ci
30858c2ecf20Sopenharmony_ci	offset = round_down(offset, sectorsize);
30868c2ecf20Sopenharmony_ci	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize);
30878c2ecf20Sopenharmony_ci	if (IS_ERR(em))
30888c2ecf20Sopenharmony_ci		return PTR_ERR(em);
30898c2ecf20Sopenharmony_ci
30908c2ecf20Sopenharmony_ci	if (em->block_start == EXTENT_MAP_HOLE)
30918c2ecf20Sopenharmony_ci		ret = RANGE_BOUNDARY_HOLE;
30928c2ecf20Sopenharmony_ci	else if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
30938c2ecf20Sopenharmony_ci		ret = RANGE_BOUNDARY_PREALLOC_EXTENT;
30948c2ecf20Sopenharmony_ci	else
30958c2ecf20Sopenharmony_ci		ret = RANGE_BOUNDARY_WRITTEN_EXTENT;
30968c2ecf20Sopenharmony_ci
30978c2ecf20Sopenharmony_ci	free_extent_map(em);
30988c2ecf20Sopenharmony_ci	return ret;
30998c2ecf20Sopenharmony_ci}
31008c2ecf20Sopenharmony_ci
31018c2ecf20Sopenharmony_cistatic int btrfs_zero_range(struct inode *inode,
31028c2ecf20Sopenharmony_ci			    loff_t offset,
31038c2ecf20Sopenharmony_ci			    loff_t len,
31048c2ecf20Sopenharmony_ci			    const int mode)
31058c2ecf20Sopenharmony_ci{
31068c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
31078c2ecf20Sopenharmony_ci	struct extent_map *em;
31088c2ecf20Sopenharmony_ci	struct extent_changeset *data_reserved = NULL;
31098c2ecf20Sopenharmony_ci	int ret;
31108c2ecf20Sopenharmony_ci	u64 alloc_hint = 0;
31118c2ecf20Sopenharmony_ci	const u64 sectorsize = btrfs_inode_sectorsize(BTRFS_I(inode));
31128c2ecf20Sopenharmony_ci	u64 alloc_start = round_down(offset, sectorsize);
31138c2ecf20Sopenharmony_ci	u64 alloc_end = round_up(offset + len, sectorsize);
31148c2ecf20Sopenharmony_ci	u64 bytes_to_reserve = 0;
31158c2ecf20Sopenharmony_ci	bool space_reserved = false;
31168c2ecf20Sopenharmony_ci
31178c2ecf20Sopenharmony_ci	inode_dio_wait(inode);
31188c2ecf20Sopenharmony_ci
31198c2ecf20Sopenharmony_ci	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
31208c2ecf20Sopenharmony_ci			      alloc_end - alloc_start);
31218c2ecf20Sopenharmony_ci	if (IS_ERR(em)) {
31228c2ecf20Sopenharmony_ci		ret = PTR_ERR(em);
31238c2ecf20Sopenharmony_ci		goto out;
31248c2ecf20Sopenharmony_ci	}
31258c2ecf20Sopenharmony_ci
31268c2ecf20Sopenharmony_ci	/*
31278c2ecf20Sopenharmony_ci	 * Avoid hole punching and extent allocation for some cases. More cases
31288c2ecf20Sopenharmony_ci	 * could be considered, but these are unlikely common and we keep things
31298c2ecf20Sopenharmony_ci	 * as simple as possible for now. Also, intentionally, if the target
31308c2ecf20Sopenharmony_ci	 * range contains one or more prealloc extents together with regular
31318c2ecf20Sopenharmony_ci	 * extents and holes, we drop all the existing extents and allocate a
31328c2ecf20Sopenharmony_ci	 * new prealloc extent, so that we get a larger contiguous disk extent.
31338c2ecf20Sopenharmony_ci	 */
31348c2ecf20Sopenharmony_ci	if (em->start <= alloc_start &&
31358c2ecf20Sopenharmony_ci	    test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
31368c2ecf20Sopenharmony_ci		const u64 em_end = em->start + em->len;
31378c2ecf20Sopenharmony_ci
31388c2ecf20Sopenharmony_ci		if (em_end >= offset + len) {
31398c2ecf20Sopenharmony_ci			/*
31408c2ecf20Sopenharmony_ci			 * The whole range is already a prealloc extent,
31418c2ecf20Sopenharmony_ci			 * do nothing except updating the inode's i_size if
31428c2ecf20Sopenharmony_ci			 * needed.
31438c2ecf20Sopenharmony_ci			 */
31448c2ecf20Sopenharmony_ci			free_extent_map(em);
31458c2ecf20Sopenharmony_ci			ret = btrfs_fallocate_update_isize(inode, offset + len,
31468c2ecf20Sopenharmony_ci							   mode);
31478c2ecf20Sopenharmony_ci			goto out;
31488c2ecf20Sopenharmony_ci		}
31498c2ecf20Sopenharmony_ci		/*
31508c2ecf20Sopenharmony_ci		 * Part of the range is already a prealloc extent, so operate
31518c2ecf20Sopenharmony_ci		 * only on the remaining part of the range.
31528c2ecf20Sopenharmony_ci		 */
31538c2ecf20Sopenharmony_ci		alloc_start = em_end;
31548c2ecf20Sopenharmony_ci		ASSERT(IS_ALIGNED(alloc_start, sectorsize));
31558c2ecf20Sopenharmony_ci		len = offset + len - alloc_start;
31568c2ecf20Sopenharmony_ci		offset = alloc_start;
31578c2ecf20Sopenharmony_ci		alloc_hint = em->block_start + em->len;
31588c2ecf20Sopenharmony_ci	}
31598c2ecf20Sopenharmony_ci	free_extent_map(em);
31608c2ecf20Sopenharmony_ci
31618c2ecf20Sopenharmony_ci	if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
31628c2ecf20Sopenharmony_ci	    BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
31638c2ecf20Sopenharmony_ci		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
31648c2ecf20Sopenharmony_ci				      sectorsize);
31658c2ecf20Sopenharmony_ci		if (IS_ERR(em)) {
31668c2ecf20Sopenharmony_ci			ret = PTR_ERR(em);
31678c2ecf20Sopenharmony_ci			goto out;
31688c2ecf20Sopenharmony_ci		}
31698c2ecf20Sopenharmony_ci
31708c2ecf20Sopenharmony_ci		if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
31718c2ecf20Sopenharmony_ci			free_extent_map(em);
31728c2ecf20Sopenharmony_ci			ret = btrfs_fallocate_update_isize(inode, offset + len,
31738c2ecf20Sopenharmony_ci							   mode);
31748c2ecf20Sopenharmony_ci			goto out;
31758c2ecf20Sopenharmony_ci		}
31768c2ecf20Sopenharmony_ci		if (len < sectorsize && em->block_start != EXTENT_MAP_HOLE) {
31778c2ecf20Sopenharmony_ci			free_extent_map(em);
31788c2ecf20Sopenharmony_ci			ret = btrfs_truncate_block(inode, offset, len, 0);
31798c2ecf20Sopenharmony_ci			if (!ret)
31808c2ecf20Sopenharmony_ci				ret = btrfs_fallocate_update_isize(inode,
31818c2ecf20Sopenharmony_ci								   offset + len,
31828c2ecf20Sopenharmony_ci								   mode);
31838c2ecf20Sopenharmony_ci			return ret;
31848c2ecf20Sopenharmony_ci		}
31858c2ecf20Sopenharmony_ci		free_extent_map(em);
31868c2ecf20Sopenharmony_ci		alloc_start = round_down(offset, sectorsize);
31878c2ecf20Sopenharmony_ci		alloc_end = alloc_start + sectorsize;
31888c2ecf20Sopenharmony_ci		goto reserve_space;
31898c2ecf20Sopenharmony_ci	}
31908c2ecf20Sopenharmony_ci
31918c2ecf20Sopenharmony_ci	alloc_start = round_up(offset, sectorsize);
31928c2ecf20Sopenharmony_ci	alloc_end = round_down(offset + len, sectorsize);
31938c2ecf20Sopenharmony_ci
31948c2ecf20Sopenharmony_ci	/*
31958c2ecf20Sopenharmony_ci	 * For unaligned ranges, check the pages at the boundaries, they might
31968c2ecf20Sopenharmony_ci	 * map to an extent, in which case we need to partially zero them, or
31978c2ecf20Sopenharmony_ci	 * they might map to a hole, in which case we need our allocation range
31988c2ecf20Sopenharmony_ci	 * to cover them.
31998c2ecf20Sopenharmony_ci	 */
32008c2ecf20Sopenharmony_ci	if (!IS_ALIGNED(offset, sectorsize)) {
32018c2ecf20Sopenharmony_ci		ret = btrfs_zero_range_check_range_boundary(BTRFS_I(inode),
32028c2ecf20Sopenharmony_ci							    offset);
32038c2ecf20Sopenharmony_ci		if (ret < 0)
32048c2ecf20Sopenharmony_ci			goto out;
32058c2ecf20Sopenharmony_ci		if (ret == RANGE_BOUNDARY_HOLE) {
32068c2ecf20Sopenharmony_ci			alloc_start = round_down(offset, sectorsize);
32078c2ecf20Sopenharmony_ci			ret = 0;
32088c2ecf20Sopenharmony_ci		} else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) {
32098c2ecf20Sopenharmony_ci			ret = btrfs_truncate_block(inode, offset, 0, 0);
32108c2ecf20Sopenharmony_ci			if (ret)
32118c2ecf20Sopenharmony_ci				goto out;
32128c2ecf20Sopenharmony_ci		} else {
32138c2ecf20Sopenharmony_ci			ret = 0;
32148c2ecf20Sopenharmony_ci		}
32158c2ecf20Sopenharmony_ci	}
32168c2ecf20Sopenharmony_ci
32178c2ecf20Sopenharmony_ci	if (!IS_ALIGNED(offset + len, sectorsize)) {
32188c2ecf20Sopenharmony_ci		ret = btrfs_zero_range_check_range_boundary(BTRFS_I(inode),
32198c2ecf20Sopenharmony_ci							    offset + len);
32208c2ecf20Sopenharmony_ci		if (ret < 0)
32218c2ecf20Sopenharmony_ci			goto out;
32228c2ecf20Sopenharmony_ci		if (ret == RANGE_BOUNDARY_HOLE) {
32238c2ecf20Sopenharmony_ci			alloc_end = round_up(offset + len, sectorsize);
32248c2ecf20Sopenharmony_ci			ret = 0;
32258c2ecf20Sopenharmony_ci		} else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) {
32268c2ecf20Sopenharmony_ci			ret = btrfs_truncate_block(inode, offset + len, 0, 1);
32278c2ecf20Sopenharmony_ci			if (ret)
32288c2ecf20Sopenharmony_ci				goto out;
32298c2ecf20Sopenharmony_ci		} else {
32308c2ecf20Sopenharmony_ci			ret = 0;
32318c2ecf20Sopenharmony_ci		}
32328c2ecf20Sopenharmony_ci	}
32338c2ecf20Sopenharmony_ci
32348c2ecf20Sopenharmony_cireserve_space:
32358c2ecf20Sopenharmony_ci	if (alloc_start < alloc_end) {
32368c2ecf20Sopenharmony_ci		struct extent_state *cached_state = NULL;
32378c2ecf20Sopenharmony_ci		const u64 lockstart = alloc_start;
32388c2ecf20Sopenharmony_ci		const u64 lockend = alloc_end - 1;
32398c2ecf20Sopenharmony_ci
32408c2ecf20Sopenharmony_ci		bytes_to_reserve = alloc_end - alloc_start;
32418c2ecf20Sopenharmony_ci		ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
32428c2ecf20Sopenharmony_ci						      bytes_to_reserve);
32438c2ecf20Sopenharmony_ci		if (ret < 0)
32448c2ecf20Sopenharmony_ci			goto out;
32458c2ecf20Sopenharmony_ci		space_reserved = true;
32468c2ecf20Sopenharmony_ci		ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
32478c2ecf20Sopenharmony_ci						  &cached_state);
32488c2ecf20Sopenharmony_ci		if (ret)
32498c2ecf20Sopenharmony_ci			goto out;
32508c2ecf20Sopenharmony_ci		ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved,
32518c2ecf20Sopenharmony_ci						alloc_start, bytes_to_reserve);
32528c2ecf20Sopenharmony_ci		if (ret) {
32538c2ecf20Sopenharmony_ci			unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
32548c2ecf20Sopenharmony_ci					     lockend, &cached_state);
32558c2ecf20Sopenharmony_ci			goto out;
32568c2ecf20Sopenharmony_ci		}
32578c2ecf20Sopenharmony_ci		ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
32588c2ecf20Sopenharmony_ci						alloc_end - alloc_start,
32598c2ecf20Sopenharmony_ci						i_blocksize(inode),
32608c2ecf20Sopenharmony_ci						offset + len, &alloc_hint);
32618c2ecf20Sopenharmony_ci		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
32628c2ecf20Sopenharmony_ci				     lockend, &cached_state);
32638c2ecf20Sopenharmony_ci		/* btrfs_prealloc_file_range releases reserved space on error */
32648c2ecf20Sopenharmony_ci		if (ret) {
32658c2ecf20Sopenharmony_ci			space_reserved = false;
32668c2ecf20Sopenharmony_ci			goto out;
32678c2ecf20Sopenharmony_ci		}
32688c2ecf20Sopenharmony_ci	}
32698c2ecf20Sopenharmony_ci	ret = btrfs_fallocate_update_isize(inode, offset + len, mode);
32708c2ecf20Sopenharmony_ci out:
32718c2ecf20Sopenharmony_ci	if (ret && space_reserved)
32728c2ecf20Sopenharmony_ci		btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
32738c2ecf20Sopenharmony_ci					       alloc_start, bytes_to_reserve);
32748c2ecf20Sopenharmony_ci	extent_changeset_free(data_reserved);
32758c2ecf20Sopenharmony_ci
32768c2ecf20Sopenharmony_ci	return ret;
32778c2ecf20Sopenharmony_ci}
32788c2ecf20Sopenharmony_ci
32798c2ecf20Sopenharmony_cistatic long btrfs_fallocate(struct file *file, int mode,
32808c2ecf20Sopenharmony_ci			    loff_t offset, loff_t len)
32818c2ecf20Sopenharmony_ci{
32828c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
32838c2ecf20Sopenharmony_ci	struct extent_state *cached_state = NULL;
32848c2ecf20Sopenharmony_ci	struct extent_changeset *data_reserved = NULL;
32858c2ecf20Sopenharmony_ci	struct falloc_range *range;
32868c2ecf20Sopenharmony_ci	struct falloc_range *tmp;
32878c2ecf20Sopenharmony_ci	struct list_head reserve_list;
32888c2ecf20Sopenharmony_ci	u64 cur_offset;
32898c2ecf20Sopenharmony_ci	u64 last_byte;
32908c2ecf20Sopenharmony_ci	u64 alloc_start;
32918c2ecf20Sopenharmony_ci	u64 alloc_end;
32928c2ecf20Sopenharmony_ci	u64 alloc_hint = 0;
32938c2ecf20Sopenharmony_ci	u64 locked_end;
32948c2ecf20Sopenharmony_ci	u64 actual_end = 0;
32958c2ecf20Sopenharmony_ci	struct extent_map *em;
32968c2ecf20Sopenharmony_ci	int blocksize = btrfs_inode_sectorsize(BTRFS_I(inode));
32978c2ecf20Sopenharmony_ci	int ret;
32988c2ecf20Sopenharmony_ci
32998c2ecf20Sopenharmony_ci	alloc_start = round_down(offset, blocksize);
33008c2ecf20Sopenharmony_ci	alloc_end = round_up(offset + len, blocksize);
33018c2ecf20Sopenharmony_ci	cur_offset = alloc_start;
33028c2ecf20Sopenharmony_ci
33038c2ecf20Sopenharmony_ci	/* Make sure we aren't being give some crap mode */
33048c2ecf20Sopenharmony_ci	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
33058c2ecf20Sopenharmony_ci		     FALLOC_FL_ZERO_RANGE))
33068c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
33078c2ecf20Sopenharmony_ci
33088c2ecf20Sopenharmony_ci	if (mode & FALLOC_FL_PUNCH_HOLE)
33098c2ecf20Sopenharmony_ci		return btrfs_punch_hole(file, offset, len);
33108c2ecf20Sopenharmony_ci
33118c2ecf20Sopenharmony_ci	/*
33128c2ecf20Sopenharmony_ci	 * Only trigger disk allocation, don't trigger qgroup reserve
33138c2ecf20Sopenharmony_ci	 *
33148c2ecf20Sopenharmony_ci	 * For qgroup space, it will be checked later.
33158c2ecf20Sopenharmony_ci	 */
33168c2ecf20Sopenharmony_ci	if (!(mode & FALLOC_FL_ZERO_RANGE)) {
33178c2ecf20Sopenharmony_ci		ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
33188c2ecf20Sopenharmony_ci						      alloc_end - alloc_start);
33198c2ecf20Sopenharmony_ci		if (ret < 0)
33208c2ecf20Sopenharmony_ci			return ret;
33218c2ecf20Sopenharmony_ci	}
33228c2ecf20Sopenharmony_ci
33238c2ecf20Sopenharmony_ci	inode_lock(inode);
33248c2ecf20Sopenharmony_ci
33258c2ecf20Sopenharmony_ci	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) {
33268c2ecf20Sopenharmony_ci		ret = inode_newsize_ok(inode, offset + len);
33278c2ecf20Sopenharmony_ci		if (ret)
33288c2ecf20Sopenharmony_ci			goto out;
33298c2ecf20Sopenharmony_ci	}
33308c2ecf20Sopenharmony_ci
33318c2ecf20Sopenharmony_ci	ret = file_modified(file);
33328c2ecf20Sopenharmony_ci	if (ret)
33338c2ecf20Sopenharmony_ci		goto out;
33348c2ecf20Sopenharmony_ci
33358c2ecf20Sopenharmony_ci	/*
33368c2ecf20Sopenharmony_ci	 * TODO: Move these two operations after we have checked
33378c2ecf20Sopenharmony_ci	 * accurate reserved space, or fallocate can still fail but
33388c2ecf20Sopenharmony_ci	 * with page truncated or size expanded.
33398c2ecf20Sopenharmony_ci	 *
33408c2ecf20Sopenharmony_ci	 * But that's a minor problem and won't do much harm BTW.
33418c2ecf20Sopenharmony_ci	 */
33428c2ecf20Sopenharmony_ci	if (alloc_start > inode->i_size) {
33438c2ecf20Sopenharmony_ci		ret = btrfs_cont_expand(inode, i_size_read(inode),
33448c2ecf20Sopenharmony_ci					alloc_start);
33458c2ecf20Sopenharmony_ci		if (ret)
33468c2ecf20Sopenharmony_ci			goto out;
33478c2ecf20Sopenharmony_ci	} else if (offset + len > inode->i_size) {
33488c2ecf20Sopenharmony_ci		/*
33498c2ecf20Sopenharmony_ci		 * If we are fallocating from the end of the file onward we
33508c2ecf20Sopenharmony_ci		 * need to zero out the end of the block if i_size lands in the
33518c2ecf20Sopenharmony_ci		 * middle of a block.
33528c2ecf20Sopenharmony_ci		 */
33538c2ecf20Sopenharmony_ci		ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
33548c2ecf20Sopenharmony_ci		if (ret)
33558c2ecf20Sopenharmony_ci			goto out;
33568c2ecf20Sopenharmony_ci	}
33578c2ecf20Sopenharmony_ci
33588c2ecf20Sopenharmony_ci	/*
33598c2ecf20Sopenharmony_ci	 * wait for ordered IO before we have any locks.  We'll loop again
33608c2ecf20Sopenharmony_ci	 * below with the locks held.
33618c2ecf20Sopenharmony_ci	 */
33628c2ecf20Sopenharmony_ci	ret = btrfs_wait_ordered_range(inode, alloc_start,
33638c2ecf20Sopenharmony_ci				       alloc_end - alloc_start);
33648c2ecf20Sopenharmony_ci	if (ret)
33658c2ecf20Sopenharmony_ci		goto out;
33668c2ecf20Sopenharmony_ci
33678c2ecf20Sopenharmony_ci	if (mode & FALLOC_FL_ZERO_RANGE) {
33688c2ecf20Sopenharmony_ci		ret = btrfs_zero_range(inode, offset, len, mode);
33698c2ecf20Sopenharmony_ci		inode_unlock(inode);
33708c2ecf20Sopenharmony_ci		return ret;
33718c2ecf20Sopenharmony_ci	}
33728c2ecf20Sopenharmony_ci
33738c2ecf20Sopenharmony_ci	locked_end = alloc_end - 1;
33748c2ecf20Sopenharmony_ci	while (1) {
33758c2ecf20Sopenharmony_ci		struct btrfs_ordered_extent *ordered;
33768c2ecf20Sopenharmony_ci
33778c2ecf20Sopenharmony_ci		/* the extent lock is ordered inside the running
33788c2ecf20Sopenharmony_ci		 * transaction
33798c2ecf20Sopenharmony_ci		 */
33808c2ecf20Sopenharmony_ci		lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
33818c2ecf20Sopenharmony_ci				 locked_end, &cached_state);
33828c2ecf20Sopenharmony_ci		ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode),
33838c2ecf20Sopenharmony_ci							    locked_end);
33848c2ecf20Sopenharmony_ci
33858c2ecf20Sopenharmony_ci		if (ordered &&
33868c2ecf20Sopenharmony_ci		    ordered->file_offset + ordered->num_bytes > alloc_start &&
33878c2ecf20Sopenharmony_ci		    ordered->file_offset < alloc_end) {
33888c2ecf20Sopenharmony_ci			btrfs_put_ordered_extent(ordered);
33898c2ecf20Sopenharmony_ci			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
33908c2ecf20Sopenharmony_ci					     alloc_start, locked_end,
33918c2ecf20Sopenharmony_ci					     &cached_state);
33928c2ecf20Sopenharmony_ci			/*
33938c2ecf20Sopenharmony_ci			 * we can't wait on the range with the transaction
33948c2ecf20Sopenharmony_ci			 * running or with the extent lock held
33958c2ecf20Sopenharmony_ci			 */
33968c2ecf20Sopenharmony_ci			ret = btrfs_wait_ordered_range(inode, alloc_start,
33978c2ecf20Sopenharmony_ci						       alloc_end - alloc_start);
33988c2ecf20Sopenharmony_ci			if (ret)
33998c2ecf20Sopenharmony_ci				goto out;
34008c2ecf20Sopenharmony_ci		} else {
34018c2ecf20Sopenharmony_ci			if (ordered)
34028c2ecf20Sopenharmony_ci				btrfs_put_ordered_extent(ordered);
34038c2ecf20Sopenharmony_ci			break;
34048c2ecf20Sopenharmony_ci		}
34058c2ecf20Sopenharmony_ci	}
34068c2ecf20Sopenharmony_ci
34078c2ecf20Sopenharmony_ci	/* First, check if we exceed the qgroup limit */
34088c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&reserve_list);
34098c2ecf20Sopenharmony_ci	while (cur_offset < alloc_end) {
34108c2ecf20Sopenharmony_ci		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
34118c2ecf20Sopenharmony_ci				      alloc_end - cur_offset);
34128c2ecf20Sopenharmony_ci		if (IS_ERR(em)) {
34138c2ecf20Sopenharmony_ci			ret = PTR_ERR(em);
34148c2ecf20Sopenharmony_ci			break;
34158c2ecf20Sopenharmony_ci		}
34168c2ecf20Sopenharmony_ci		last_byte = min(extent_map_end(em), alloc_end);
34178c2ecf20Sopenharmony_ci		actual_end = min_t(u64, extent_map_end(em), offset + len);
34188c2ecf20Sopenharmony_ci		last_byte = ALIGN(last_byte, blocksize);
34198c2ecf20Sopenharmony_ci		if (em->block_start == EXTENT_MAP_HOLE ||
34208c2ecf20Sopenharmony_ci		    (cur_offset >= inode->i_size &&
34218c2ecf20Sopenharmony_ci		     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
34228c2ecf20Sopenharmony_ci			ret = add_falloc_range(&reserve_list, cur_offset,
34238c2ecf20Sopenharmony_ci					       last_byte - cur_offset);
34248c2ecf20Sopenharmony_ci			if (ret < 0) {
34258c2ecf20Sopenharmony_ci				free_extent_map(em);
34268c2ecf20Sopenharmony_ci				break;
34278c2ecf20Sopenharmony_ci			}
34288c2ecf20Sopenharmony_ci			ret = btrfs_qgroup_reserve_data(BTRFS_I(inode),
34298c2ecf20Sopenharmony_ci					&data_reserved, cur_offset,
34308c2ecf20Sopenharmony_ci					last_byte - cur_offset);
34318c2ecf20Sopenharmony_ci			if (ret < 0) {
34328c2ecf20Sopenharmony_ci				cur_offset = last_byte;
34338c2ecf20Sopenharmony_ci				free_extent_map(em);
34348c2ecf20Sopenharmony_ci				break;
34358c2ecf20Sopenharmony_ci			}
34368c2ecf20Sopenharmony_ci		} else {
34378c2ecf20Sopenharmony_ci			/*
34388c2ecf20Sopenharmony_ci			 * Do not need to reserve unwritten extent for this
34398c2ecf20Sopenharmony_ci			 * range, free reserved data space first, otherwise
34408c2ecf20Sopenharmony_ci			 * it'll result in false ENOSPC error.
34418c2ecf20Sopenharmony_ci			 */
34428c2ecf20Sopenharmony_ci			btrfs_free_reserved_data_space(BTRFS_I(inode),
34438c2ecf20Sopenharmony_ci				data_reserved, cur_offset,
34448c2ecf20Sopenharmony_ci				last_byte - cur_offset);
34458c2ecf20Sopenharmony_ci		}
34468c2ecf20Sopenharmony_ci		free_extent_map(em);
34478c2ecf20Sopenharmony_ci		cur_offset = last_byte;
34488c2ecf20Sopenharmony_ci	}
34498c2ecf20Sopenharmony_ci
34508c2ecf20Sopenharmony_ci	/*
34518c2ecf20Sopenharmony_ci	 * If ret is still 0, means we're OK to fallocate.
34528c2ecf20Sopenharmony_ci	 * Or just cleanup the list and exit.
34538c2ecf20Sopenharmony_ci	 */
34548c2ecf20Sopenharmony_ci	list_for_each_entry_safe(range, tmp, &reserve_list, list) {
34558c2ecf20Sopenharmony_ci		if (!ret)
34568c2ecf20Sopenharmony_ci			ret = btrfs_prealloc_file_range(inode, mode,
34578c2ecf20Sopenharmony_ci					range->start,
34588c2ecf20Sopenharmony_ci					range->len, i_blocksize(inode),
34598c2ecf20Sopenharmony_ci					offset + len, &alloc_hint);
34608c2ecf20Sopenharmony_ci		else
34618c2ecf20Sopenharmony_ci			btrfs_free_reserved_data_space(BTRFS_I(inode),
34628c2ecf20Sopenharmony_ci					data_reserved, range->start,
34638c2ecf20Sopenharmony_ci					range->len);
34648c2ecf20Sopenharmony_ci		list_del(&range->list);
34658c2ecf20Sopenharmony_ci		kfree(range);
34668c2ecf20Sopenharmony_ci	}
34678c2ecf20Sopenharmony_ci	if (ret < 0)
34688c2ecf20Sopenharmony_ci		goto out_unlock;
34698c2ecf20Sopenharmony_ci
34708c2ecf20Sopenharmony_ci	/*
34718c2ecf20Sopenharmony_ci	 * We didn't need to allocate any more space, but we still extended the
34728c2ecf20Sopenharmony_ci	 * size of the file so we need to update i_size and the inode item.
34738c2ecf20Sopenharmony_ci	 */
34748c2ecf20Sopenharmony_ci	ret = btrfs_fallocate_update_isize(inode, actual_end, mode);
34758c2ecf20Sopenharmony_ciout_unlock:
34768c2ecf20Sopenharmony_ci	unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
34778c2ecf20Sopenharmony_ci			     &cached_state);
34788c2ecf20Sopenharmony_ciout:
34798c2ecf20Sopenharmony_ci	inode_unlock(inode);
34808c2ecf20Sopenharmony_ci	/* Let go of our reservation. */
34818c2ecf20Sopenharmony_ci	if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
34828c2ecf20Sopenharmony_ci		btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
34838c2ecf20Sopenharmony_ci				cur_offset, alloc_end - cur_offset);
34848c2ecf20Sopenharmony_ci	extent_changeset_free(data_reserved);
34858c2ecf20Sopenharmony_ci	return ret;
34868c2ecf20Sopenharmony_ci}
34878c2ecf20Sopenharmony_ci
34888c2ecf20Sopenharmony_cistatic loff_t find_desired_extent(struct inode *inode, loff_t offset,
34898c2ecf20Sopenharmony_ci				  int whence)
34908c2ecf20Sopenharmony_ci{
34918c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
34928c2ecf20Sopenharmony_ci	struct extent_map *em = NULL;
34938c2ecf20Sopenharmony_ci	struct extent_state *cached_state = NULL;
34948c2ecf20Sopenharmony_ci	loff_t i_size = inode->i_size;
34958c2ecf20Sopenharmony_ci	u64 lockstart;
34968c2ecf20Sopenharmony_ci	u64 lockend;
34978c2ecf20Sopenharmony_ci	u64 start;
34988c2ecf20Sopenharmony_ci	u64 len;
34998c2ecf20Sopenharmony_ci	int ret = 0;
35008c2ecf20Sopenharmony_ci
35018c2ecf20Sopenharmony_ci	if (i_size == 0 || offset >= i_size)
35028c2ecf20Sopenharmony_ci		return -ENXIO;
35038c2ecf20Sopenharmony_ci
35048c2ecf20Sopenharmony_ci	/*
35058c2ecf20Sopenharmony_ci	 * offset can be negative, in this case we start finding DATA/HOLE from
35068c2ecf20Sopenharmony_ci	 * the very start of the file.
35078c2ecf20Sopenharmony_ci	 */
35088c2ecf20Sopenharmony_ci	start = max_t(loff_t, 0, offset);
35098c2ecf20Sopenharmony_ci
35108c2ecf20Sopenharmony_ci	lockstart = round_down(start, fs_info->sectorsize);
35118c2ecf20Sopenharmony_ci	lockend = round_up(i_size, fs_info->sectorsize);
35128c2ecf20Sopenharmony_ci	if (lockend <= lockstart)
35138c2ecf20Sopenharmony_ci		lockend = lockstart + fs_info->sectorsize;
35148c2ecf20Sopenharmony_ci	lockend--;
35158c2ecf20Sopenharmony_ci	len = lockend - lockstart + 1;
35168c2ecf20Sopenharmony_ci
35178c2ecf20Sopenharmony_ci	lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
35188c2ecf20Sopenharmony_ci			 &cached_state);
35198c2ecf20Sopenharmony_ci
35208c2ecf20Sopenharmony_ci	while (start < i_size) {
35218c2ecf20Sopenharmony_ci		em = btrfs_get_extent_fiemap(BTRFS_I(inode), start, len);
35228c2ecf20Sopenharmony_ci		if (IS_ERR(em)) {
35238c2ecf20Sopenharmony_ci			ret = PTR_ERR(em);
35248c2ecf20Sopenharmony_ci			em = NULL;
35258c2ecf20Sopenharmony_ci			break;
35268c2ecf20Sopenharmony_ci		}
35278c2ecf20Sopenharmony_ci
35288c2ecf20Sopenharmony_ci		if (whence == SEEK_HOLE &&
35298c2ecf20Sopenharmony_ci		    (em->block_start == EXTENT_MAP_HOLE ||
35308c2ecf20Sopenharmony_ci		     test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
35318c2ecf20Sopenharmony_ci			break;
35328c2ecf20Sopenharmony_ci		else if (whence == SEEK_DATA &&
35338c2ecf20Sopenharmony_ci			   (em->block_start != EXTENT_MAP_HOLE &&
35348c2ecf20Sopenharmony_ci			    !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
35358c2ecf20Sopenharmony_ci			break;
35368c2ecf20Sopenharmony_ci
35378c2ecf20Sopenharmony_ci		start = em->start + em->len;
35388c2ecf20Sopenharmony_ci		free_extent_map(em);
35398c2ecf20Sopenharmony_ci		em = NULL;
35408c2ecf20Sopenharmony_ci		cond_resched();
35418c2ecf20Sopenharmony_ci	}
35428c2ecf20Sopenharmony_ci	free_extent_map(em);
35438c2ecf20Sopenharmony_ci	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
35448c2ecf20Sopenharmony_ci			     &cached_state);
35458c2ecf20Sopenharmony_ci	if (ret) {
35468c2ecf20Sopenharmony_ci		offset = ret;
35478c2ecf20Sopenharmony_ci	} else {
35488c2ecf20Sopenharmony_ci		if (whence == SEEK_DATA && start >= i_size)
35498c2ecf20Sopenharmony_ci			offset = -ENXIO;
35508c2ecf20Sopenharmony_ci		else
35518c2ecf20Sopenharmony_ci			offset = min_t(loff_t, start, i_size);
35528c2ecf20Sopenharmony_ci	}
35538c2ecf20Sopenharmony_ci
35548c2ecf20Sopenharmony_ci	return offset;
35558c2ecf20Sopenharmony_ci}
35568c2ecf20Sopenharmony_ci
35578c2ecf20Sopenharmony_cistatic loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
35588c2ecf20Sopenharmony_ci{
35598c2ecf20Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
35608c2ecf20Sopenharmony_ci
35618c2ecf20Sopenharmony_ci	switch (whence) {
35628c2ecf20Sopenharmony_ci	default:
35638c2ecf20Sopenharmony_ci		return generic_file_llseek(file, offset, whence);
35648c2ecf20Sopenharmony_ci	case SEEK_DATA:
35658c2ecf20Sopenharmony_ci	case SEEK_HOLE:
35668c2ecf20Sopenharmony_ci		inode_lock_shared(inode);
35678c2ecf20Sopenharmony_ci		offset = find_desired_extent(inode, offset, whence);
35688c2ecf20Sopenharmony_ci		inode_unlock_shared(inode);
35698c2ecf20Sopenharmony_ci		break;
35708c2ecf20Sopenharmony_ci	}
35718c2ecf20Sopenharmony_ci
35728c2ecf20Sopenharmony_ci	if (offset < 0)
35738c2ecf20Sopenharmony_ci		return offset;
35748c2ecf20Sopenharmony_ci
35758c2ecf20Sopenharmony_ci	return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
35768c2ecf20Sopenharmony_ci}
35778c2ecf20Sopenharmony_ci
35788c2ecf20Sopenharmony_cistatic int btrfs_file_open(struct inode *inode, struct file *filp)
35798c2ecf20Sopenharmony_ci{
35808c2ecf20Sopenharmony_ci	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
35818c2ecf20Sopenharmony_ci	return generic_file_open(inode, filp);
35828c2ecf20Sopenharmony_ci}
35838c2ecf20Sopenharmony_ci
35848c2ecf20Sopenharmony_cistatic ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
35858c2ecf20Sopenharmony_ci{
35868c2ecf20Sopenharmony_ci	ssize_t ret = 0;
35878c2ecf20Sopenharmony_ci
35888c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_DIRECT) {
35898c2ecf20Sopenharmony_ci		struct inode *inode = file_inode(iocb->ki_filp);
35908c2ecf20Sopenharmony_ci
35918c2ecf20Sopenharmony_ci		inode_lock_shared(inode);
35928c2ecf20Sopenharmony_ci		ret = btrfs_direct_IO(iocb, to);
35938c2ecf20Sopenharmony_ci		inode_unlock_shared(inode);
35948c2ecf20Sopenharmony_ci		if (ret < 0 || !iov_iter_count(to) ||
35958c2ecf20Sopenharmony_ci		    iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp)))
35968c2ecf20Sopenharmony_ci			return ret;
35978c2ecf20Sopenharmony_ci	}
35988c2ecf20Sopenharmony_ci
35998c2ecf20Sopenharmony_ci	return generic_file_buffered_read(iocb, to, ret);
36008c2ecf20Sopenharmony_ci}
36018c2ecf20Sopenharmony_ci
36028c2ecf20Sopenharmony_ciconst struct file_operations btrfs_file_operations = {
36038c2ecf20Sopenharmony_ci	.llseek		= btrfs_file_llseek,
36048c2ecf20Sopenharmony_ci	.read_iter      = btrfs_file_read_iter,
36058c2ecf20Sopenharmony_ci	.splice_read	= generic_file_splice_read,
36068c2ecf20Sopenharmony_ci	.write_iter	= btrfs_file_write_iter,
36078c2ecf20Sopenharmony_ci	.splice_write	= iter_file_splice_write,
36088c2ecf20Sopenharmony_ci	.mmap		= btrfs_file_mmap,
36098c2ecf20Sopenharmony_ci	.open		= btrfs_file_open,
36108c2ecf20Sopenharmony_ci	.release	= btrfs_release_file,
36118c2ecf20Sopenharmony_ci	.fsync		= btrfs_sync_file,
36128c2ecf20Sopenharmony_ci	.fallocate	= btrfs_fallocate,
36138c2ecf20Sopenharmony_ci	.unlocked_ioctl	= btrfs_ioctl,
36148c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT
36158c2ecf20Sopenharmony_ci	.compat_ioctl	= btrfs_compat_ioctl,
36168c2ecf20Sopenharmony_ci#endif
36178c2ecf20Sopenharmony_ci	.remap_file_range = btrfs_remap_file_range,
36188c2ecf20Sopenharmony_ci};
36198c2ecf20Sopenharmony_ci
36208c2ecf20Sopenharmony_civoid __cold btrfs_auto_defrag_exit(void)
36218c2ecf20Sopenharmony_ci{
36228c2ecf20Sopenharmony_ci	kmem_cache_destroy(btrfs_inode_defrag_cachep);
36238c2ecf20Sopenharmony_ci}
36248c2ecf20Sopenharmony_ci
36258c2ecf20Sopenharmony_ciint __init btrfs_auto_defrag_init(void)
36268c2ecf20Sopenharmony_ci{
36278c2ecf20Sopenharmony_ci	btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
36288c2ecf20Sopenharmony_ci					sizeof(struct inode_defrag), 0,
36298c2ecf20Sopenharmony_ci					SLAB_MEM_SPREAD,
36308c2ecf20Sopenharmony_ci					NULL);
36318c2ecf20Sopenharmony_ci	if (!btrfs_inode_defrag_cachep)
36328c2ecf20Sopenharmony_ci		return -ENOMEM;
36338c2ecf20Sopenharmony_ci
36348c2ecf20Sopenharmony_ci	return 0;
36358c2ecf20Sopenharmony_ci}
36368c2ecf20Sopenharmony_ci
36378c2ecf20Sopenharmony_ciint btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
36388c2ecf20Sopenharmony_ci{
36398c2ecf20Sopenharmony_ci	int ret;
36408c2ecf20Sopenharmony_ci
36418c2ecf20Sopenharmony_ci	/*
36428c2ecf20Sopenharmony_ci	 * So with compression we will find and lock a dirty page and clear the
36438c2ecf20Sopenharmony_ci	 * first one as dirty, setup an async extent, and immediately return
36448c2ecf20Sopenharmony_ci	 * with the entire range locked but with nobody actually marked with
36458c2ecf20Sopenharmony_ci	 * writeback.  So we can't just filemap_write_and_wait_range() and
36468c2ecf20Sopenharmony_ci	 * expect it to work since it will just kick off a thread to do the
36478c2ecf20Sopenharmony_ci	 * actual work.  So we need to call filemap_fdatawrite_range _again_
36488c2ecf20Sopenharmony_ci	 * since it will wait on the page lock, which won't be unlocked until
36498c2ecf20Sopenharmony_ci	 * after the pages have been marked as writeback and so we're good to go
36508c2ecf20Sopenharmony_ci	 * from there.  We have to do this otherwise we'll miss the ordered
36518c2ecf20Sopenharmony_ci	 * extents and that results in badness.  Please Josef, do not think you
36528c2ecf20Sopenharmony_ci	 * know better and pull this out at some point in the future, it is
36538c2ecf20Sopenharmony_ci	 * right and you are wrong.
36548c2ecf20Sopenharmony_ci	 */
36558c2ecf20Sopenharmony_ci	ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
36568c2ecf20Sopenharmony_ci	if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
36578c2ecf20Sopenharmony_ci			     &BTRFS_I(inode)->runtime_flags))
36588c2ecf20Sopenharmony_ci		ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
36598c2ecf20Sopenharmony_ci
36608c2ecf20Sopenharmony_ci	return ret;
36618c2ecf20Sopenharmony_ci}
3662