162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * mm/truncate.c - code for taking down pages from address_spaces
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2002, Linus Torvalds
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * 10Sep2002	Andrew Morton
862306a36Sopenharmony_ci *		Initial version.
962306a36Sopenharmony_ci */
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/kernel.h>
1262306a36Sopenharmony_ci#include <linux/backing-dev.h>
1362306a36Sopenharmony_ci#include <linux/dax.h>
1462306a36Sopenharmony_ci#include <linux/gfp.h>
1562306a36Sopenharmony_ci#include <linux/mm.h>
1662306a36Sopenharmony_ci#include <linux/swap.h>
1762306a36Sopenharmony_ci#include <linux/export.h>
1862306a36Sopenharmony_ci#include <linux/pagemap.h>
1962306a36Sopenharmony_ci#include <linux/highmem.h>
2062306a36Sopenharmony_ci#include <linux/pagevec.h>
2162306a36Sopenharmony_ci#include <linux/task_io_accounting_ops.h>
2262306a36Sopenharmony_ci#include <linux/shmem_fs.h>
2362306a36Sopenharmony_ci#include <linux/rmap.h>
2462306a36Sopenharmony_ci#include "internal.h"
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci/*
2762306a36Sopenharmony_ci * Regular page slots are stabilized by the page lock even without the tree
2862306a36Sopenharmony_ci * itself locked.  These unlocked entries need verification under the tree
2962306a36Sopenharmony_ci * lock.
3062306a36Sopenharmony_ci */
3162306a36Sopenharmony_cistatic inline void __clear_shadow_entry(struct address_space *mapping,
3262306a36Sopenharmony_ci				pgoff_t index, void *entry)
3362306a36Sopenharmony_ci{
3462306a36Sopenharmony_ci	XA_STATE(xas, &mapping->i_pages, index);
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci	xas_set_update(&xas, workingset_update_node);
3762306a36Sopenharmony_ci	if (xas_load(&xas) != entry)
3862306a36Sopenharmony_ci		return;
3962306a36Sopenharmony_ci	xas_store(&xas, NULL);
4062306a36Sopenharmony_ci}
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_cistatic void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
4362306a36Sopenharmony_ci			       void *entry)
4462306a36Sopenharmony_ci{
4562306a36Sopenharmony_ci	spin_lock(&mapping->host->i_lock);
4662306a36Sopenharmony_ci	xa_lock_irq(&mapping->i_pages);
4762306a36Sopenharmony_ci	__clear_shadow_entry(mapping, index, entry);
4862306a36Sopenharmony_ci	xa_unlock_irq(&mapping->i_pages);
4962306a36Sopenharmony_ci	if (mapping_shrinkable(mapping))
5062306a36Sopenharmony_ci		inode_add_lru(mapping->host);
5162306a36Sopenharmony_ci	spin_unlock(&mapping->host->i_lock);
5262306a36Sopenharmony_ci}
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci/*
5562306a36Sopenharmony_ci * Unconditionally remove exceptional entries. Usually called from truncate
5662306a36Sopenharmony_ci * path. Note that the folio_batch may be altered by this function by removing
5762306a36Sopenharmony_ci * exceptional entries similar to what folio_batch_remove_exceptionals() does.
5862306a36Sopenharmony_ci */
5962306a36Sopenharmony_cistatic void truncate_folio_batch_exceptionals(struct address_space *mapping,
6062306a36Sopenharmony_ci				struct folio_batch *fbatch, pgoff_t *indices)
6162306a36Sopenharmony_ci{
6262306a36Sopenharmony_ci	int i, j;
6362306a36Sopenharmony_ci	bool dax;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	/* Handled by shmem itself */
6662306a36Sopenharmony_ci	if (shmem_mapping(mapping))
6762306a36Sopenharmony_ci		return;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	for (j = 0; j < folio_batch_count(fbatch); j++)
7062306a36Sopenharmony_ci		if (xa_is_value(fbatch->folios[j]))
7162306a36Sopenharmony_ci			break;
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	if (j == folio_batch_count(fbatch))
7462306a36Sopenharmony_ci		return;
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	dax = dax_mapping(mapping);
7762306a36Sopenharmony_ci	if (!dax) {
7862306a36Sopenharmony_ci		spin_lock(&mapping->host->i_lock);
7962306a36Sopenharmony_ci		xa_lock_irq(&mapping->i_pages);
8062306a36Sopenharmony_ci	}
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	for (i = j; i < folio_batch_count(fbatch); i++) {
8362306a36Sopenharmony_ci		struct folio *folio = fbatch->folios[i];
8462306a36Sopenharmony_ci		pgoff_t index = indices[i];
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci		if (!xa_is_value(folio)) {
8762306a36Sopenharmony_ci			fbatch->folios[j++] = folio;
8862306a36Sopenharmony_ci			continue;
8962306a36Sopenharmony_ci		}
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci		if (unlikely(dax)) {
9262306a36Sopenharmony_ci			dax_delete_mapping_entry(mapping, index);
9362306a36Sopenharmony_ci			continue;
9462306a36Sopenharmony_ci		}
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci		__clear_shadow_entry(mapping, index, folio);
9762306a36Sopenharmony_ci	}
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	if (!dax) {
10062306a36Sopenharmony_ci		xa_unlock_irq(&mapping->i_pages);
10162306a36Sopenharmony_ci		if (mapping_shrinkable(mapping))
10262306a36Sopenharmony_ci			inode_add_lru(mapping->host);
10362306a36Sopenharmony_ci		spin_unlock(&mapping->host->i_lock);
10462306a36Sopenharmony_ci	}
10562306a36Sopenharmony_ci	fbatch->nr = j;
10662306a36Sopenharmony_ci}
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci/*
10962306a36Sopenharmony_ci * Invalidate exceptional entry if easily possible. This handles exceptional
11062306a36Sopenharmony_ci * entries for invalidate_inode_pages().
11162306a36Sopenharmony_ci */
11262306a36Sopenharmony_cistatic int invalidate_exceptional_entry(struct address_space *mapping,
11362306a36Sopenharmony_ci					pgoff_t index, void *entry)
11462306a36Sopenharmony_ci{
11562306a36Sopenharmony_ci	/* Handled by shmem itself, or for DAX we do nothing. */
11662306a36Sopenharmony_ci	if (shmem_mapping(mapping) || dax_mapping(mapping))
11762306a36Sopenharmony_ci		return 1;
11862306a36Sopenharmony_ci	clear_shadow_entry(mapping, index, entry);
11962306a36Sopenharmony_ci	return 1;
12062306a36Sopenharmony_ci}
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci/*
12362306a36Sopenharmony_ci * Invalidate exceptional entry if clean. This handles exceptional entries for
12462306a36Sopenharmony_ci * invalidate_inode_pages2() so for DAX it evicts only clean entries.
12562306a36Sopenharmony_ci */
12662306a36Sopenharmony_cistatic int invalidate_exceptional_entry2(struct address_space *mapping,
12762306a36Sopenharmony_ci					 pgoff_t index, void *entry)
12862306a36Sopenharmony_ci{
12962306a36Sopenharmony_ci	/* Handled by shmem itself */
13062306a36Sopenharmony_ci	if (shmem_mapping(mapping))
13162306a36Sopenharmony_ci		return 1;
13262306a36Sopenharmony_ci	if (dax_mapping(mapping))
13362306a36Sopenharmony_ci		return dax_invalidate_mapping_entry_sync(mapping, index);
13462306a36Sopenharmony_ci	clear_shadow_entry(mapping, index, entry);
13562306a36Sopenharmony_ci	return 1;
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci/**
13962306a36Sopenharmony_ci * folio_invalidate - Invalidate part or all of a folio.
14062306a36Sopenharmony_ci * @folio: The folio which is affected.
14162306a36Sopenharmony_ci * @offset: start of the range to invalidate
14262306a36Sopenharmony_ci * @length: length of the range to invalidate
14362306a36Sopenharmony_ci *
14462306a36Sopenharmony_ci * folio_invalidate() is called when all or part of the folio has become
14562306a36Sopenharmony_ci * invalidated by a truncate operation.
14662306a36Sopenharmony_ci *
14762306a36Sopenharmony_ci * folio_invalidate() does not have to release all buffers, but it must
14862306a36Sopenharmony_ci * ensure that no dirty buffer is left outside @offset and that no I/O
14962306a36Sopenharmony_ci * is underway against any of the blocks which are outside the truncation
15062306a36Sopenharmony_ci * point.  Because the caller is about to free (and possibly reuse) those
15162306a36Sopenharmony_ci * blocks on-disk.
15262306a36Sopenharmony_ci */
15362306a36Sopenharmony_civoid folio_invalidate(struct folio *folio, size_t offset, size_t length)
15462306a36Sopenharmony_ci{
15562306a36Sopenharmony_ci	const struct address_space_operations *aops = folio->mapping->a_ops;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	if (aops->invalidate_folio)
15862306a36Sopenharmony_ci		aops->invalidate_folio(folio, offset, length);
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(folio_invalidate);
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci/*
16362306a36Sopenharmony_ci * If truncate cannot remove the fs-private metadata from the page, the page
16462306a36Sopenharmony_ci * becomes orphaned.  It will be left on the LRU and may even be mapped into
16562306a36Sopenharmony_ci * user pagetables if we're racing with filemap_fault().
16662306a36Sopenharmony_ci *
16762306a36Sopenharmony_ci * We need to bail out if page->mapping is no longer equal to the original
16862306a36Sopenharmony_ci * mapping.  This happens a) when the VM reclaimed the page while we waited on
16962306a36Sopenharmony_ci * its lock, b) when a concurrent invalidate_mapping_pages got there first and
17062306a36Sopenharmony_ci * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
17162306a36Sopenharmony_ci */
17262306a36Sopenharmony_cistatic void truncate_cleanup_folio(struct folio *folio)
17362306a36Sopenharmony_ci{
17462306a36Sopenharmony_ci	if (folio_mapped(folio))
17562306a36Sopenharmony_ci		unmap_mapping_folio(folio);
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	if (folio_has_private(folio))
17862306a36Sopenharmony_ci		folio_invalidate(folio, 0, folio_size(folio));
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	/*
18162306a36Sopenharmony_ci	 * Some filesystems seem to re-dirty the page even after
18262306a36Sopenharmony_ci	 * the VM has canceled the dirty bit (eg ext3 journaling).
18362306a36Sopenharmony_ci	 * Hence dirty accounting check is placed after invalidation.
18462306a36Sopenharmony_ci	 */
18562306a36Sopenharmony_ci	folio_cancel_dirty(folio);
18662306a36Sopenharmony_ci	folio_clear_mappedtodisk(folio);
18762306a36Sopenharmony_ci}
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ciint truncate_inode_folio(struct address_space *mapping, struct folio *folio)
19062306a36Sopenharmony_ci{
19162306a36Sopenharmony_ci	if (folio->mapping != mapping)
19262306a36Sopenharmony_ci		return -EIO;
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	truncate_cleanup_folio(folio);
19562306a36Sopenharmony_ci	filemap_remove_folio(folio);
19662306a36Sopenharmony_ci	return 0;
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci/*
20062306a36Sopenharmony_ci * Handle partial folios.  The folio may be entirely within the
20162306a36Sopenharmony_ci * range if a split has raced with us.  If not, we zero the part of the
20262306a36Sopenharmony_ci * folio that's within the [start, end] range, and then split the folio if
20362306a36Sopenharmony_ci * it's large.  split_page_range() will discard pages which now lie beyond
20462306a36Sopenharmony_ci * i_size, and we rely on the caller to discard pages which lie within a
20562306a36Sopenharmony_ci * newly created hole.
20662306a36Sopenharmony_ci *
20762306a36Sopenharmony_ci * Returns false if splitting failed so the caller can avoid
20862306a36Sopenharmony_ci * discarding the entire folio which is stubbornly unsplit.
20962306a36Sopenharmony_ci */
21062306a36Sopenharmony_cibool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
21162306a36Sopenharmony_ci{
21262306a36Sopenharmony_ci	loff_t pos = folio_pos(folio);
21362306a36Sopenharmony_ci	unsigned int offset, length;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	if (pos < start)
21662306a36Sopenharmony_ci		offset = start - pos;
21762306a36Sopenharmony_ci	else
21862306a36Sopenharmony_ci		offset = 0;
21962306a36Sopenharmony_ci	length = folio_size(folio);
22062306a36Sopenharmony_ci	if (pos + length <= (u64)end)
22162306a36Sopenharmony_ci		length = length - offset;
22262306a36Sopenharmony_ci	else
22362306a36Sopenharmony_ci		length = end + 1 - pos - offset;
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	folio_wait_writeback(folio);
22662306a36Sopenharmony_ci	if (length == folio_size(folio)) {
22762306a36Sopenharmony_ci		truncate_inode_folio(folio->mapping, folio);
22862306a36Sopenharmony_ci		return true;
22962306a36Sopenharmony_ci	}
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	/*
23262306a36Sopenharmony_ci	 * We may be zeroing pages we're about to discard, but it avoids
23362306a36Sopenharmony_ci	 * doing a complex calculation here, and then doing the zeroing
23462306a36Sopenharmony_ci	 * anyway if the page split fails.
23562306a36Sopenharmony_ci	 */
23662306a36Sopenharmony_ci	folio_zero_range(folio, offset, length);
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	if (folio_has_private(folio))
23962306a36Sopenharmony_ci		folio_invalidate(folio, offset, length);
24062306a36Sopenharmony_ci	if (!folio_test_large(folio))
24162306a36Sopenharmony_ci		return true;
24262306a36Sopenharmony_ci	if (split_folio(folio) == 0)
24362306a36Sopenharmony_ci		return true;
24462306a36Sopenharmony_ci	if (folio_test_dirty(folio))
24562306a36Sopenharmony_ci		return false;
24662306a36Sopenharmony_ci	truncate_inode_folio(folio->mapping, folio);
24762306a36Sopenharmony_ci	return true;
24862306a36Sopenharmony_ci}
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci/*
25162306a36Sopenharmony_ci * Used to get rid of pages on hardware memory corruption.
25262306a36Sopenharmony_ci */
25362306a36Sopenharmony_ciint generic_error_remove_page(struct address_space *mapping, struct page *page)
25462306a36Sopenharmony_ci{
25562306a36Sopenharmony_ci	VM_BUG_ON_PAGE(PageTail(page), page);
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci	if (!mapping)
25862306a36Sopenharmony_ci		return -EINVAL;
25962306a36Sopenharmony_ci	/*
26062306a36Sopenharmony_ci	 * Only punch for normal data pages for now.
26162306a36Sopenharmony_ci	 * Handling other types like directories would need more auditing.
26262306a36Sopenharmony_ci	 */
26362306a36Sopenharmony_ci	if (!S_ISREG(mapping->host->i_mode))
26462306a36Sopenharmony_ci		return -EIO;
26562306a36Sopenharmony_ci	return truncate_inode_folio(mapping, page_folio(page));
26662306a36Sopenharmony_ci}
26762306a36Sopenharmony_ciEXPORT_SYMBOL(generic_error_remove_page);
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_cistatic long mapping_evict_folio(struct address_space *mapping,
27062306a36Sopenharmony_ci		struct folio *folio)
27162306a36Sopenharmony_ci{
27262306a36Sopenharmony_ci	if (folio_test_dirty(folio) || folio_test_writeback(folio))
27362306a36Sopenharmony_ci		return 0;
27462306a36Sopenharmony_ci	/* The refcount will be elevated if any page in the folio is mapped */
27562306a36Sopenharmony_ci	if (folio_ref_count(folio) >
27662306a36Sopenharmony_ci			folio_nr_pages(folio) + folio_has_private(folio) + 1)
27762306a36Sopenharmony_ci		return 0;
27862306a36Sopenharmony_ci	if (!filemap_release_folio(folio, 0))
27962306a36Sopenharmony_ci		return 0;
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	return remove_mapping(mapping, folio);
28262306a36Sopenharmony_ci}
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci/**
28562306a36Sopenharmony_ci * invalidate_inode_page() - Remove an unused page from the pagecache.
28662306a36Sopenharmony_ci * @page: The page to remove.
28762306a36Sopenharmony_ci *
28862306a36Sopenharmony_ci * Safely invalidate one page from its pagecache mapping.
28962306a36Sopenharmony_ci * It only drops clean, unused pages.
29062306a36Sopenharmony_ci *
29162306a36Sopenharmony_ci * Context: Page must be locked.
29262306a36Sopenharmony_ci * Return: The number of pages successfully removed.
29362306a36Sopenharmony_ci */
29462306a36Sopenharmony_cilong invalidate_inode_page(struct page *page)
29562306a36Sopenharmony_ci{
29662306a36Sopenharmony_ci	struct folio *folio = page_folio(page);
29762306a36Sopenharmony_ci	struct address_space *mapping = folio_mapping(folio);
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	/* The page may have been truncated before it was locked */
30062306a36Sopenharmony_ci	if (!mapping)
30162306a36Sopenharmony_ci		return 0;
30262306a36Sopenharmony_ci	return mapping_evict_folio(mapping, folio);
30362306a36Sopenharmony_ci}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci/**
30662306a36Sopenharmony_ci * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
30762306a36Sopenharmony_ci * @mapping: mapping to truncate
30862306a36Sopenharmony_ci * @lstart: offset from which to truncate
30962306a36Sopenharmony_ci * @lend: offset to which to truncate (inclusive)
31062306a36Sopenharmony_ci *
31162306a36Sopenharmony_ci * Truncate the page cache, removing the pages that are between
31262306a36Sopenharmony_ci * specified offsets (and zeroing out partial pages
31362306a36Sopenharmony_ci * if lstart or lend + 1 is not page aligned).
31462306a36Sopenharmony_ci *
31562306a36Sopenharmony_ci * Truncate takes two passes - the first pass is nonblocking.  It will not
31662306a36Sopenharmony_ci * block on page locks and it will not block on writeback.  The second pass
31762306a36Sopenharmony_ci * will wait.  This is to prevent as much IO as possible in the affected region.
31862306a36Sopenharmony_ci * The first pass will remove most pages, so the search cost of the second pass
31962306a36Sopenharmony_ci * is low.
32062306a36Sopenharmony_ci *
32162306a36Sopenharmony_ci * We pass down the cache-hot hint to the page freeing code.  Even if the
32262306a36Sopenharmony_ci * mapping is large, it is probably the case that the final pages are the most
32362306a36Sopenharmony_ci * recently touched, and freeing happens in ascending file offset order.
32462306a36Sopenharmony_ci *
32562306a36Sopenharmony_ci * Note that since ->invalidate_folio() accepts range to invalidate
32662306a36Sopenharmony_ci * truncate_inode_pages_range is able to handle cases where lend + 1 is not
32762306a36Sopenharmony_ci * page aligned properly.
32862306a36Sopenharmony_ci */
32962306a36Sopenharmony_civoid truncate_inode_pages_range(struct address_space *mapping,
33062306a36Sopenharmony_ci				loff_t lstart, loff_t lend)
33162306a36Sopenharmony_ci{
33262306a36Sopenharmony_ci	pgoff_t		start;		/* inclusive */
33362306a36Sopenharmony_ci	pgoff_t		end;		/* exclusive */
33462306a36Sopenharmony_ci	struct folio_batch fbatch;
33562306a36Sopenharmony_ci	pgoff_t		indices[PAGEVEC_SIZE];
33662306a36Sopenharmony_ci	pgoff_t		index;
33762306a36Sopenharmony_ci	int		i;
33862306a36Sopenharmony_ci	struct folio	*folio;
33962306a36Sopenharmony_ci	bool		same_folio;
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	if (mapping_empty(mapping))
34262306a36Sopenharmony_ci		return;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	/*
34562306a36Sopenharmony_ci	 * 'start' and 'end' always covers the range of pages to be fully
34662306a36Sopenharmony_ci	 * truncated. Partial pages are covered with 'partial_start' at the
34762306a36Sopenharmony_ci	 * start of the range and 'partial_end' at the end of the range.
34862306a36Sopenharmony_ci	 * Note that 'end' is exclusive while 'lend' is inclusive.
34962306a36Sopenharmony_ci	 */
35062306a36Sopenharmony_ci	start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
35162306a36Sopenharmony_ci	if (lend == -1)
35262306a36Sopenharmony_ci		/*
35362306a36Sopenharmony_ci		 * lend == -1 indicates end-of-file so we have to set 'end'
35462306a36Sopenharmony_ci		 * to the highest possible pgoff_t and since the type is
35562306a36Sopenharmony_ci		 * unsigned we're using -1.
35662306a36Sopenharmony_ci		 */
35762306a36Sopenharmony_ci		end = -1;
35862306a36Sopenharmony_ci	else
35962306a36Sopenharmony_ci		end = (lend + 1) >> PAGE_SHIFT;
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	folio_batch_init(&fbatch);
36262306a36Sopenharmony_ci	index = start;
36362306a36Sopenharmony_ci	while (index < end && find_lock_entries(mapping, &index, end - 1,
36462306a36Sopenharmony_ci			&fbatch, indices)) {
36562306a36Sopenharmony_ci		truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
36662306a36Sopenharmony_ci		for (i = 0; i < folio_batch_count(&fbatch); i++)
36762306a36Sopenharmony_ci			truncate_cleanup_folio(fbatch.folios[i]);
36862306a36Sopenharmony_ci		delete_from_page_cache_batch(mapping, &fbatch);
36962306a36Sopenharmony_ci		for (i = 0; i < folio_batch_count(&fbatch); i++)
37062306a36Sopenharmony_ci			folio_unlock(fbatch.folios[i]);
37162306a36Sopenharmony_ci		folio_batch_release(&fbatch);
37262306a36Sopenharmony_ci		cond_resched();
37362306a36Sopenharmony_ci	}
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
37662306a36Sopenharmony_ci	folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0);
37762306a36Sopenharmony_ci	if (!IS_ERR(folio)) {
37862306a36Sopenharmony_ci		same_folio = lend < folio_pos(folio) + folio_size(folio);
37962306a36Sopenharmony_ci		if (!truncate_inode_partial_folio(folio, lstart, lend)) {
38062306a36Sopenharmony_ci			start = folio_next_index(folio);
38162306a36Sopenharmony_ci			if (same_folio)
38262306a36Sopenharmony_ci				end = folio->index;
38362306a36Sopenharmony_ci		}
38462306a36Sopenharmony_ci		folio_unlock(folio);
38562306a36Sopenharmony_ci		folio_put(folio);
38662306a36Sopenharmony_ci		folio = NULL;
38762306a36Sopenharmony_ci	}
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_ci	if (!same_folio) {
39062306a36Sopenharmony_ci		folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT,
39162306a36Sopenharmony_ci						FGP_LOCK, 0);
39262306a36Sopenharmony_ci		if (!IS_ERR(folio)) {
39362306a36Sopenharmony_ci			if (!truncate_inode_partial_folio(folio, lstart, lend))
39462306a36Sopenharmony_ci				end = folio->index;
39562306a36Sopenharmony_ci			folio_unlock(folio);
39662306a36Sopenharmony_ci			folio_put(folio);
39762306a36Sopenharmony_ci		}
39862306a36Sopenharmony_ci	}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	index = start;
40162306a36Sopenharmony_ci	while (index < end) {
40262306a36Sopenharmony_ci		cond_resched();
40362306a36Sopenharmony_ci		if (!find_get_entries(mapping, &index, end - 1, &fbatch,
40462306a36Sopenharmony_ci				indices)) {
40562306a36Sopenharmony_ci			/* If all gone from start onwards, we're done */
40662306a36Sopenharmony_ci			if (index == start)
40762306a36Sopenharmony_ci				break;
40862306a36Sopenharmony_ci			/* Otherwise restart to make sure all gone */
40962306a36Sopenharmony_ci			index = start;
41062306a36Sopenharmony_ci			continue;
41162306a36Sopenharmony_ci		}
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci		for (i = 0; i < folio_batch_count(&fbatch); i++) {
41462306a36Sopenharmony_ci			struct folio *folio = fbatch.folios[i];
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci			/* We rely upon deletion not changing page->index */
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci			if (xa_is_value(folio))
41962306a36Sopenharmony_ci				continue;
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci			folio_lock(folio);
42262306a36Sopenharmony_ci			VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
42362306a36Sopenharmony_ci			folio_wait_writeback(folio);
42462306a36Sopenharmony_ci			truncate_inode_folio(mapping, folio);
42562306a36Sopenharmony_ci			folio_unlock(folio);
42662306a36Sopenharmony_ci		}
42762306a36Sopenharmony_ci		truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
42862306a36Sopenharmony_ci		folio_batch_release(&fbatch);
42962306a36Sopenharmony_ci	}
43062306a36Sopenharmony_ci}
43162306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_inode_pages_range);
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci/**
43462306a36Sopenharmony_ci * truncate_inode_pages - truncate *all* the pages from an offset
43562306a36Sopenharmony_ci * @mapping: mapping to truncate
43662306a36Sopenharmony_ci * @lstart: offset from which to truncate
43762306a36Sopenharmony_ci *
43862306a36Sopenharmony_ci * Called under (and serialised by) inode->i_rwsem and
43962306a36Sopenharmony_ci * mapping->invalidate_lock.
44062306a36Sopenharmony_ci *
44162306a36Sopenharmony_ci * Note: When this function returns, there can be a page in the process of
44262306a36Sopenharmony_ci * deletion (inside __filemap_remove_folio()) in the specified range.  Thus
44362306a36Sopenharmony_ci * mapping->nrpages can be non-zero when this function returns even after
44462306a36Sopenharmony_ci * truncation of the whole mapping.
44562306a36Sopenharmony_ci */
44662306a36Sopenharmony_civoid truncate_inode_pages(struct address_space *mapping, loff_t lstart)
44762306a36Sopenharmony_ci{
44862306a36Sopenharmony_ci	truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
44962306a36Sopenharmony_ci}
45062306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_inode_pages);
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci/**
45362306a36Sopenharmony_ci * truncate_inode_pages_final - truncate *all* pages before inode dies
45462306a36Sopenharmony_ci * @mapping: mapping to truncate
45562306a36Sopenharmony_ci *
45662306a36Sopenharmony_ci * Called under (and serialized by) inode->i_rwsem.
45762306a36Sopenharmony_ci *
45862306a36Sopenharmony_ci * Filesystems have to use this in the .evict_inode path to inform the
45962306a36Sopenharmony_ci * VM that this is the final truncate and the inode is going away.
46062306a36Sopenharmony_ci */
46162306a36Sopenharmony_civoid truncate_inode_pages_final(struct address_space *mapping)
46262306a36Sopenharmony_ci{
46362306a36Sopenharmony_ci	/*
46462306a36Sopenharmony_ci	 * Page reclaim can not participate in regular inode lifetime
46562306a36Sopenharmony_ci	 * management (can't call iput()) and thus can race with the
46662306a36Sopenharmony_ci	 * inode teardown.  Tell it when the address space is exiting,
46762306a36Sopenharmony_ci	 * so that it does not install eviction information after the
46862306a36Sopenharmony_ci	 * final truncate has begun.
46962306a36Sopenharmony_ci	 */
47062306a36Sopenharmony_ci	mapping_set_exiting(mapping);
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci	if (!mapping_empty(mapping)) {
47362306a36Sopenharmony_ci		/*
47462306a36Sopenharmony_ci		 * As truncation uses a lockless tree lookup, cycle
47562306a36Sopenharmony_ci		 * the tree lock to make sure any ongoing tree
47662306a36Sopenharmony_ci		 * modification that does not see AS_EXITING is
47762306a36Sopenharmony_ci		 * completed before starting the final truncate.
47862306a36Sopenharmony_ci		 */
47962306a36Sopenharmony_ci		xa_lock_irq(&mapping->i_pages);
48062306a36Sopenharmony_ci		xa_unlock_irq(&mapping->i_pages);
48162306a36Sopenharmony_ci	}
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	truncate_inode_pages(mapping, 0);
48462306a36Sopenharmony_ci}
48562306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_inode_pages_final);
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci/**
48862306a36Sopenharmony_ci * mapping_try_invalidate - Invalidate all the evictable folios of one inode
48962306a36Sopenharmony_ci * @mapping: the address_space which holds the folios to invalidate
49062306a36Sopenharmony_ci * @start: the offset 'from' which to invalidate
49162306a36Sopenharmony_ci * @end: the offset 'to' which to invalidate (inclusive)
49262306a36Sopenharmony_ci * @nr_failed: How many folio invalidations failed
49362306a36Sopenharmony_ci *
49462306a36Sopenharmony_ci * This function is similar to invalidate_mapping_pages(), except that it
49562306a36Sopenharmony_ci * returns the number of folios which could not be evicted in @nr_failed.
49662306a36Sopenharmony_ci */
49762306a36Sopenharmony_ciunsigned long mapping_try_invalidate(struct address_space *mapping,
49862306a36Sopenharmony_ci		pgoff_t start, pgoff_t end, unsigned long *nr_failed)
49962306a36Sopenharmony_ci{
50062306a36Sopenharmony_ci	pgoff_t indices[PAGEVEC_SIZE];
50162306a36Sopenharmony_ci	struct folio_batch fbatch;
50262306a36Sopenharmony_ci	pgoff_t index = start;
50362306a36Sopenharmony_ci	unsigned long ret;
50462306a36Sopenharmony_ci	unsigned long count = 0;
50562306a36Sopenharmony_ci	int i;
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	folio_batch_init(&fbatch);
50862306a36Sopenharmony_ci	while (find_lock_entries(mapping, &index, end, &fbatch, indices)) {
50962306a36Sopenharmony_ci		for (i = 0; i < folio_batch_count(&fbatch); i++) {
51062306a36Sopenharmony_ci			struct folio *folio = fbatch.folios[i];
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci			/* We rely upon deletion not changing folio->index */
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci			if (xa_is_value(folio)) {
51562306a36Sopenharmony_ci				count += invalidate_exceptional_entry(mapping,
51662306a36Sopenharmony_ci							     indices[i], folio);
51762306a36Sopenharmony_ci				continue;
51862306a36Sopenharmony_ci			}
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci			ret = mapping_evict_folio(mapping, folio);
52162306a36Sopenharmony_ci			folio_unlock(folio);
52262306a36Sopenharmony_ci			/*
52362306a36Sopenharmony_ci			 * Invalidation is a hint that the folio is no longer
52462306a36Sopenharmony_ci			 * of interest and try to speed up its reclaim.
52562306a36Sopenharmony_ci			 */
52662306a36Sopenharmony_ci			if (!ret) {
52762306a36Sopenharmony_ci				deactivate_file_folio(folio);
52862306a36Sopenharmony_ci				/* Likely in the lru cache of a remote CPU */
52962306a36Sopenharmony_ci				if (nr_failed)
53062306a36Sopenharmony_ci					(*nr_failed)++;
53162306a36Sopenharmony_ci			}
53262306a36Sopenharmony_ci			count += ret;
53362306a36Sopenharmony_ci		}
53462306a36Sopenharmony_ci		folio_batch_remove_exceptionals(&fbatch);
53562306a36Sopenharmony_ci		folio_batch_release(&fbatch);
53662306a36Sopenharmony_ci		cond_resched();
53762306a36Sopenharmony_ci	}
53862306a36Sopenharmony_ci	return count;
53962306a36Sopenharmony_ci}
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci/**
54262306a36Sopenharmony_ci * invalidate_mapping_pages - Invalidate all clean, unlocked cache of one inode
54362306a36Sopenharmony_ci * @mapping: the address_space which holds the cache to invalidate
54462306a36Sopenharmony_ci * @start: the offset 'from' which to invalidate
54562306a36Sopenharmony_ci * @end: the offset 'to' which to invalidate (inclusive)
54662306a36Sopenharmony_ci *
54762306a36Sopenharmony_ci * This function removes pages that are clean, unmapped and unlocked,
54862306a36Sopenharmony_ci * as well as shadow entries. It will not block on IO activity.
54962306a36Sopenharmony_ci *
55062306a36Sopenharmony_ci * If you want to remove all the pages of one inode, regardless of
55162306a36Sopenharmony_ci * their use and writeback state, use truncate_inode_pages().
55262306a36Sopenharmony_ci *
55362306a36Sopenharmony_ci * Return: The number of indices that had their contents invalidated
55462306a36Sopenharmony_ci */
55562306a36Sopenharmony_ciunsigned long invalidate_mapping_pages(struct address_space *mapping,
55662306a36Sopenharmony_ci		pgoff_t start, pgoff_t end)
55762306a36Sopenharmony_ci{
55862306a36Sopenharmony_ci	return mapping_try_invalidate(mapping, start, end, NULL);
55962306a36Sopenharmony_ci}
56062306a36Sopenharmony_ciEXPORT_SYMBOL(invalidate_mapping_pages);
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci/*
56362306a36Sopenharmony_ci * This is like invalidate_inode_page(), except it ignores the page's
56462306a36Sopenharmony_ci * refcount.  We do this because invalidate_inode_pages2() needs stronger
56562306a36Sopenharmony_ci * invalidation guarantees, and cannot afford to leave pages behind because
56662306a36Sopenharmony_ci * shrink_page_list() has a temp ref on them, or because they're transiently
56762306a36Sopenharmony_ci * sitting in the folio_add_lru() caches.
56862306a36Sopenharmony_ci */
56962306a36Sopenharmony_cistatic int invalidate_complete_folio2(struct address_space *mapping,
57062306a36Sopenharmony_ci					struct folio *folio)
57162306a36Sopenharmony_ci{
57262306a36Sopenharmony_ci	if (folio->mapping != mapping)
57362306a36Sopenharmony_ci		return 0;
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci	if (!filemap_release_folio(folio, GFP_KERNEL))
57662306a36Sopenharmony_ci		return 0;
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	spin_lock(&mapping->host->i_lock);
57962306a36Sopenharmony_ci	xa_lock_irq(&mapping->i_pages);
58062306a36Sopenharmony_ci	if (folio_test_dirty(folio))
58162306a36Sopenharmony_ci		goto failed;
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	BUG_ON(folio_has_private(folio));
58462306a36Sopenharmony_ci	__filemap_remove_folio(folio, NULL);
58562306a36Sopenharmony_ci	xa_unlock_irq(&mapping->i_pages);
58662306a36Sopenharmony_ci	if (mapping_shrinkable(mapping))
58762306a36Sopenharmony_ci		inode_add_lru(mapping->host);
58862306a36Sopenharmony_ci	spin_unlock(&mapping->host->i_lock);
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci	filemap_free_folio(mapping, folio);
59162306a36Sopenharmony_ci	return 1;
59262306a36Sopenharmony_cifailed:
59362306a36Sopenharmony_ci	xa_unlock_irq(&mapping->i_pages);
59462306a36Sopenharmony_ci	spin_unlock(&mapping->host->i_lock);
59562306a36Sopenharmony_ci	return 0;
59662306a36Sopenharmony_ci}
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_cistatic int folio_launder(struct address_space *mapping, struct folio *folio)
59962306a36Sopenharmony_ci{
60062306a36Sopenharmony_ci	if (!folio_test_dirty(folio))
60162306a36Sopenharmony_ci		return 0;
60262306a36Sopenharmony_ci	if (folio->mapping != mapping || mapping->a_ops->launder_folio == NULL)
60362306a36Sopenharmony_ci		return 0;
60462306a36Sopenharmony_ci	return mapping->a_ops->launder_folio(folio);
60562306a36Sopenharmony_ci}
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci/**
60862306a36Sopenharmony_ci * invalidate_inode_pages2_range - remove range of pages from an address_space
60962306a36Sopenharmony_ci * @mapping: the address_space
61062306a36Sopenharmony_ci * @start: the page offset 'from' which to invalidate
61162306a36Sopenharmony_ci * @end: the page offset 'to' which to invalidate (inclusive)
61262306a36Sopenharmony_ci *
61362306a36Sopenharmony_ci * Any pages which are found to be mapped into pagetables are unmapped prior to
61462306a36Sopenharmony_ci * invalidation.
61562306a36Sopenharmony_ci *
61662306a36Sopenharmony_ci * Return: -EBUSY if any pages could not be invalidated.
61762306a36Sopenharmony_ci */
61862306a36Sopenharmony_ciint invalidate_inode_pages2_range(struct address_space *mapping,
61962306a36Sopenharmony_ci				  pgoff_t start, pgoff_t end)
62062306a36Sopenharmony_ci{
62162306a36Sopenharmony_ci	pgoff_t indices[PAGEVEC_SIZE];
62262306a36Sopenharmony_ci	struct folio_batch fbatch;
62362306a36Sopenharmony_ci	pgoff_t index;
62462306a36Sopenharmony_ci	int i;
62562306a36Sopenharmony_ci	int ret = 0;
62662306a36Sopenharmony_ci	int ret2 = 0;
62762306a36Sopenharmony_ci	int did_range_unmap = 0;
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	if (mapping_empty(mapping))
63062306a36Sopenharmony_ci		return 0;
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci	folio_batch_init(&fbatch);
63362306a36Sopenharmony_ci	index = start;
63462306a36Sopenharmony_ci	while (find_get_entries(mapping, &index, end, &fbatch, indices)) {
63562306a36Sopenharmony_ci		for (i = 0; i < folio_batch_count(&fbatch); i++) {
63662306a36Sopenharmony_ci			struct folio *folio = fbatch.folios[i];
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_ci			/* We rely upon deletion not changing folio->index */
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci			if (xa_is_value(folio)) {
64162306a36Sopenharmony_ci				if (!invalidate_exceptional_entry2(mapping,
64262306a36Sopenharmony_ci						indices[i], folio))
64362306a36Sopenharmony_ci					ret = -EBUSY;
64462306a36Sopenharmony_ci				continue;
64562306a36Sopenharmony_ci			}
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci			if (!did_range_unmap && folio_mapped(folio)) {
64862306a36Sopenharmony_ci				/*
64962306a36Sopenharmony_ci				 * If folio is mapped, before taking its lock,
65062306a36Sopenharmony_ci				 * zap the rest of the file in one hit.
65162306a36Sopenharmony_ci				 */
65262306a36Sopenharmony_ci				unmap_mapping_pages(mapping, indices[i],
65362306a36Sopenharmony_ci						(1 + end - indices[i]), false);
65462306a36Sopenharmony_ci				did_range_unmap = 1;
65562306a36Sopenharmony_ci			}
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci			folio_lock(folio);
65862306a36Sopenharmony_ci			if (unlikely(folio->mapping != mapping)) {
65962306a36Sopenharmony_ci				folio_unlock(folio);
66062306a36Sopenharmony_ci				continue;
66162306a36Sopenharmony_ci			}
66262306a36Sopenharmony_ci			VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
66362306a36Sopenharmony_ci			folio_wait_writeback(folio);
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_ci			if (folio_mapped(folio))
66662306a36Sopenharmony_ci				unmap_mapping_folio(folio);
66762306a36Sopenharmony_ci			BUG_ON(folio_mapped(folio));
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci			ret2 = folio_launder(mapping, folio);
67062306a36Sopenharmony_ci			if (ret2 == 0) {
67162306a36Sopenharmony_ci				if (!invalidate_complete_folio2(mapping, folio))
67262306a36Sopenharmony_ci					ret2 = -EBUSY;
67362306a36Sopenharmony_ci			}
67462306a36Sopenharmony_ci			if (ret2 < 0)
67562306a36Sopenharmony_ci				ret = ret2;
67662306a36Sopenharmony_ci			folio_unlock(folio);
67762306a36Sopenharmony_ci		}
67862306a36Sopenharmony_ci		folio_batch_remove_exceptionals(&fbatch);
67962306a36Sopenharmony_ci		folio_batch_release(&fbatch);
68062306a36Sopenharmony_ci		cond_resched();
68162306a36Sopenharmony_ci	}
68262306a36Sopenharmony_ci	/*
68362306a36Sopenharmony_ci	 * For DAX we invalidate page tables after invalidating page cache.  We
68462306a36Sopenharmony_ci	 * could invalidate page tables while invalidating each entry however
68562306a36Sopenharmony_ci	 * that would be expensive. And doing range unmapping before doesn't
68662306a36Sopenharmony_ci	 * work as we have no cheap way to find whether page cache entry didn't
68762306a36Sopenharmony_ci	 * get remapped later.
68862306a36Sopenharmony_ci	 */
68962306a36Sopenharmony_ci	if (dax_mapping(mapping)) {
69062306a36Sopenharmony_ci		unmap_mapping_pages(mapping, start, end - start + 1, false);
69162306a36Sopenharmony_ci	}
69262306a36Sopenharmony_ci	return ret;
69362306a36Sopenharmony_ci}
69462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_ci/**
69762306a36Sopenharmony_ci * invalidate_inode_pages2 - remove all pages from an address_space
69862306a36Sopenharmony_ci * @mapping: the address_space
69962306a36Sopenharmony_ci *
70062306a36Sopenharmony_ci * Any pages which are found to be mapped into pagetables are unmapped prior to
70162306a36Sopenharmony_ci * invalidation.
70262306a36Sopenharmony_ci *
70362306a36Sopenharmony_ci * Return: -EBUSY if any pages could not be invalidated.
70462306a36Sopenharmony_ci */
70562306a36Sopenharmony_ciint invalidate_inode_pages2(struct address_space *mapping)
70662306a36Sopenharmony_ci{
70762306a36Sopenharmony_ci	return invalidate_inode_pages2_range(mapping, 0, -1);
70862306a36Sopenharmony_ci}
70962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(invalidate_inode_pages2);
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci/**
71262306a36Sopenharmony_ci * truncate_pagecache - unmap and remove pagecache that has been truncated
71362306a36Sopenharmony_ci * @inode: inode
71462306a36Sopenharmony_ci * @newsize: new file size
71562306a36Sopenharmony_ci *
71662306a36Sopenharmony_ci * inode's new i_size must already be written before truncate_pagecache
71762306a36Sopenharmony_ci * is called.
71862306a36Sopenharmony_ci *
71962306a36Sopenharmony_ci * This function should typically be called before the filesystem
72062306a36Sopenharmony_ci * releases resources associated with the freed range (eg. deallocates
72162306a36Sopenharmony_ci * blocks). This way, pagecache will always stay logically coherent
72262306a36Sopenharmony_ci * with on-disk format, and the filesystem would not have to deal with
72362306a36Sopenharmony_ci * situations such as writepage being called for a page that has already
72462306a36Sopenharmony_ci * had its underlying blocks deallocated.
72562306a36Sopenharmony_ci */
72662306a36Sopenharmony_civoid truncate_pagecache(struct inode *inode, loff_t newsize)
72762306a36Sopenharmony_ci{
72862306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
72962306a36Sopenharmony_ci	loff_t holebegin = round_up(newsize, PAGE_SIZE);
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci	/*
73262306a36Sopenharmony_ci	 * unmap_mapping_range is called twice, first simply for
73362306a36Sopenharmony_ci	 * efficiency so that truncate_inode_pages does fewer
73462306a36Sopenharmony_ci	 * single-page unmaps.  However after this first call, and
73562306a36Sopenharmony_ci	 * before truncate_inode_pages finishes, it is possible for
73662306a36Sopenharmony_ci	 * private pages to be COWed, which remain after
73762306a36Sopenharmony_ci	 * truncate_inode_pages finishes, hence the second
73862306a36Sopenharmony_ci	 * unmap_mapping_range call must be made for correctness.
73962306a36Sopenharmony_ci	 */
74062306a36Sopenharmony_ci	unmap_mapping_range(mapping, holebegin, 0, 1);
74162306a36Sopenharmony_ci	truncate_inode_pages(mapping, newsize);
74262306a36Sopenharmony_ci	unmap_mapping_range(mapping, holebegin, 0, 1);
74362306a36Sopenharmony_ci}
74462306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_pagecache);
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci/**
74762306a36Sopenharmony_ci * truncate_setsize - update inode and pagecache for a new file size
74862306a36Sopenharmony_ci * @inode: inode
74962306a36Sopenharmony_ci * @newsize: new file size
75062306a36Sopenharmony_ci *
75162306a36Sopenharmony_ci * truncate_setsize updates i_size and performs pagecache truncation (if
75262306a36Sopenharmony_ci * necessary) to @newsize. It will be typically be called from the filesystem's
75362306a36Sopenharmony_ci * setattr function when ATTR_SIZE is passed in.
75462306a36Sopenharmony_ci *
75562306a36Sopenharmony_ci * Must be called with a lock serializing truncates and writes (generally
75662306a36Sopenharmony_ci * i_rwsem but e.g. xfs uses a different lock) and before all filesystem
75762306a36Sopenharmony_ci * specific block truncation has been performed.
75862306a36Sopenharmony_ci */
75962306a36Sopenharmony_civoid truncate_setsize(struct inode *inode, loff_t newsize)
76062306a36Sopenharmony_ci{
76162306a36Sopenharmony_ci	loff_t oldsize = inode->i_size;
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	i_size_write(inode, newsize);
76462306a36Sopenharmony_ci	if (newsize > oldsize)
76562306a36Sopenharmony_ci		pagecache_isize_extended(inode, oldsize, newsize);
76662306a36Sopenharmony_ci	truncate_pagecache(inode, newsize);
76762306a36Sopenharmony_ci}
76862306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_setsize);
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci/**
77162306a36Sopenharmony_ci * pagecache_isize_extended - update pagecache after extension of i_size
77262306a36Sopenharmony_ci * @inode:	inode for which i_size was extended
77362306a36Sopenharmony_ci * @from:	original inode size
77462306a36Sopenharmony_ci * @to:		new inode size
77562306a36Sopenharmony_ci *
77662306a36Sopenharmony_ci * Handle extension of inode size either caused by extending truncate or by
77762306a36Sopenharmony_ci * write starting after current i_size. We mark the page straddling current
77862306a36Sopenharmony_ci * i_size RO so that page_mkwrite() is called on the nearest write access to
77962306a36Sopenharmony_ci * the page.  This way filesystem can be sure that page_mkwrite() is called on
78062306a36Sopenharmony_ci * the page before user writes to the page via mmap after the i_size has been
78162306a36Sopenharmony_ci * changed.
78262306a36Sopenharmony_ci *
78362306a36Sopenharmony_ci * The function must be called after i_size is updated so that page fault
78462306a36Sopenharmony_ci * coming after we unlock the page will already see the new i_size.
78562306a36Sopenharmony_ci * The function must be called while we still hold i_rwsem - this not only
78662306a36Sopenharmony_ci * makes sure i_size is stable but also that userspace cannot observe new
78762306a36Sopenharmony_ci * i_size value before we are prepared to store mmap writes at new inode size.
78862306a36Sopenharmony_ci */
78962306a36Sopenharmony_civoid pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
79062306a36Sopenharmony_ci{
79162306a36Sopenharmony_ci	int bsize = i_blocksize(inode);
79262306a36Sopenharmony_ci	loff_t rounded_from;
79362306a36Sopenharmony_ci	struct page *page;
79462306a36Sopenharmony_ci	pgoff_t index;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	WARN_ON(to > inode->i_size);
79762306a36Sopenharmony_ci
79862306a36Sopenharmony_ci	if (from >= to || bsize == PAGE_SIZE)
79962306a36Sopenharmony_ci		return;
80062306a36Sopenharmony_ci	/* Page straddling @from will not have any hole block created? */
80162306a36Sopenharmony_ci	rounded_from = round_up(from, bsize);
80262306a36Sopenharmony_ci	if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1)))
80362306a36Sopenharmony_ci		return;
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_ci	index = from >> PAGE_SHIFT;
80662306a36Sopenharmony_ci	page = find_lock_page(inode->i_mapping, index);
80762306a36Sopenharmony_ci	/* Page not cached? Nothing to do */
80862306a36Sopenharmony_ci	if (!page)
80962306a36Sopenharmony_ci		return;
81062306a36Sopenharmony_ci	/*
81162306a36Sopenharmony_ci	 * See clear_page_dirty_for_io() for details why set_page_dirty()
81262306a36Sopenharmony_ci	 * is needed.
81362306a36Sopenharmony_ci	 */
81462306a36Sopenharmony_ci	if (page_mkclean(page))
81562306a36Sopenharmony_ci		set_page_dirty(page);
81662306a36Sopenharmony_ci	unlock_page(page);
81762306a36Sopenharmony_ci	put_page(page);
81862306a36Sopenharmony_ci}
81962306a36Sopenharmony_ciEXPORT_SYMBOL(pagecache_isize_extended);
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_ci/**
82262306a36Sopenharmony_ci * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
82362306a36Sopenharmony_ci * @inode: inode
82462306a36Sopenharmony_ci * @lstart: offset of beginning of hole
82562306a36Sopenharmony_ci * @lend: offset of last byte of hole
82662306a36Sopenharmony_ci *
82762306a36Sopenharmony_ci * This function should typically be called before the filesystem
82862306a36Sopenharmony_ci * releases resources associated with the freed range (eg. deallocates
82962306a36Sopenharmony_ci * blocks). This way, pagecache will always stay logically coherent
83062306a36Sopenharmony_ci * with on-disk format, and the filesystem would not have to deal with
83162306a36Sopenharmony_ci * situations such as writepage being called for a page that has already
83262306a36Sopenharmony_ci * had its underlying blocks deallocated.
83362306a36Sopenharmony_ci */
83462306a36Sopenharmony_civoid truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
83562306a36Sopenharmony_ci{
83662306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
83762306a36Sopenharmony_ci	loff_t unmap_start = round_up(lstart, PAGE_SIZE);
83862306a36Sopenharmony_ci	loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
83962306a36Sopenharmony_ci	/*
84062306a36Sopenharmony_ci	 * This rounding is currently just for example: unmap_mapping_range
84162306a36Sopenharmony_ci	 * expands its hole outwards, whereas we want it to contract the hole
84262306a36Sopenharmony_ci	 * inwards.  However, existing callers of truncate_pagecache_range are
84362306a36Sopenharmony_ci	 * doing their own page rounding first.  Note that unmap_mapping_range
84462306a36Sopenharmony_ci	 * allows holelen 0 for all, and we allow lend -1 for end of file.
84562306a36Sopenharmony_ci	 */
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_ci	/*
84862306a36Sopenharmony_ci	 * Unlike in truncate_pagecache, unmap_mapping_range is called only
84962306a36Sopenharmony_ci	 * once (before truncating pagecache), and without "even_cows" flag:
85062306a36Sopenharmony_ci	 * hole-punching should not remove private COWed pages from the hole.
85162306a36Sopenharmony_ci	 */
85262306a36Sopenharmony_ci	if ((u64)unmap_end > (u64)unmap_start)
85362306a36Sopenharmony_ci		unmap_mapping_range(mapping, unmap_start,
85462306a36Sopenharmony_ci				    1 + unmap_end - unmap_start, 0);
85562306a36Sopenharmony_ci	truncate_inode_pages_range(mapping, lstart, lend);
85662306a36Sopenharmony_ci}
85762306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_pagecache_range);
858