162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * mm/truncate.c - code for taking down pages from address_spaces 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2002, Linus Torvalds 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * 10Sep2002 Andrew Morton 862306a36Sopenharmony_ci * Initial version. 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/kernel.h> 1262306a36Sopenharmony_ci#include <linux/backing-dev.h> 1362306a36Sopenharmony_ci#include <linux/dax.h> 1462306a36Sopenharmony_ci#include <linux/gfp.h> 1562306a36Sopenharmony_ci#include <linux/mm.h> 1662306a36Sopenharmony_ci#include <linux/swap.h> 1762306a36Sopenharmony_ci#include <linux/export.h> 1862306a36Sopenharmony_ci#include <linux/pagemap.h> 1962306a36Sopenharmony_ci#include <linux/highmem.h> 2062306a36Sopenharmony_ci#include <linux/pagevec.h> 2162306a36Sopenharmony_ci#include <linux/task_io_accounting_ops.h> 2262306a36Sopenharmony_ci#include <linux/shmem_fs.h> 2362306a36Sopenharmony_ci#include <linux/rmap.h> 2462306a36Sopenharmony_ci#include "internal.h" 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* 2762306a36Sopenharmony_ci * Regular page slots are stabilized by the page lock even without the tree 2862306a36Sopenharmony_ci * itself locked. These unlocked entries need verification under the tree 2962306a36Sopenharmony_ci * lock. 3062306a36Sopenharmony_ci */ 3162306a36Sopenharmony_cistatic inline void __clear_shadow_entry(struct address_space *mapping, 3262306a36Sopenharmony_ci pgoff_t index, void *entry) 3362306a36Sopenharmony_ci{ 3462306a36Sopenharmony_ci XA_STATE(xas, &mapping->i_pages, index); 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci xas_set_update(&xas, workingset_update_node); 3762306a36Sopenharmony_ci if (xas_load(&xas) != entry) 3862306a36Sopenharmony_ci return; 3962306a36Sopenharmony_ci xas_store(&xas, NULL); 4062306a36Sopenharmony_ci} 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_cistatic void clear_shadow_entry(struct address_space *mapping, pgoff_t index, 4362306a36Sopenharmony_ci void *entry) 4462306a36Sopenharmony_ci{ 4562306a36Sopenharmony_ci spin_lock(&mapping->host->i_lock); 4662306a36Sopenharmony_ci xa_lock_irq(&mapping->i_pages); 4762306a36Sopenharmony_ci __clear_shadow_entry(mapping, index, entry); 4862306a36Sopenharmony_ci xa_unlock_irq(&mapping->i_pages); 4962306a36Sopenharmony_ci if (mapping_shrinkable(mapping)) 5062306a36Sopenharmony_ci inode_add_lru(mapping->host); 5162306a36Sopenharmony_ci spin_unlock(&mapping->host->i_lock); 5262306a36Sopenharmony_ci} 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci/* 5562306a36Sopenharmony_ci * Unconditionally remove exceptional entries. Usually called from truncate 5662306a36Sopenharmony_ci * path. Note that the folio_batch may be altered by this function by removing 5762306a36Sopenharmony_ci * exceptional entries similar to what folio_batch_remove_exceptionals() does. 5862306a36Sopenharmony_ci */ 5962306a36Sopenharmony_cistatic void truncate_folio_batch_exceptionals(struct address_space *mapping, 6062306a36Sopenharmony_ci struct folio_batch *fbatch, pgoff_t *indices) 6162306a36Sopenharmony_ci{ 6262306a36Sopenharmony_ci int i, j; 6362306a36Sopenharmony_ci bool dax; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci /* Handled by shmem itself */ 6662306a36Sopenharmony_ci if (shmem_mapping(mapping)) 6762306a36Sopenharmony_ci return; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci for (j = 0; j < folio_batch_count(fbatch); j++) 7062306a36Sopenharmony_ci if (xa_is_value(fbatch->folios[j])) 7162306a36Sopenharmony_ci break; 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci if (j == folio_batch_count(fbatch)) 7462306a36Sopenharmony_ci return; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci dax = dax_mapping(mapping); 7762306a36Sopenharmony_ci if (!dax) { 7862306a36Sopenharmony_ci spin_lock(&mapping->host->i_lock); 7962306a36Sopenharmony_ci xa_lock_irq(&mapping->i_pages); 8062306a36Sopenharmony_ci } 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci for (i = j; i < folio_batch_count(fbatch); i++) { 8362306a36Sopenharmony_ci struct folio *folio = fbatch->folios[i]; 8462306a36Sopenharmony_ci pgoff_t index = indices[i]; 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci if (!xa_is_value(folio)) { 8762306a36Sopenharmony_ci fbatch->folios[j++] = folio; 8862306a36Sopenharmony_ci continue; 8962306a36Sopenharmony_ci } 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci if (unlikely(dax)) { 9262306a36Sopenharmony_ci dax_delete_mapping_entry(mapping, index); 9362306a36Sopenharmony_ci continue; 9462306a36Sopenharmony_ci } 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci __clear_shadow_entry(mapping, index, folio); 9762306a36Sopenharmony_ci } 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci if (!dax) { 10062306a36Sopenharmony_ci xa_unlock_irq(&mapping->i_pages); 10162306a36Sopenharmony_ci if (mapping_shrinkable(mapping)) 10262306a36Sopenharmony_ci inode_add_lru(mapping->host); 10362306a36Sopenharmony_ci spin_unlock(&mapping->host->i_lock); 10462306a36Sopenharmony_ci } 10562306a36Sopenharmony_ci fbatch->nr = j; 10662306a36Sopenharmony_ci} 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci/* 10962306a36Sopenharmony_ci * Invalidate exceptional entry if easily possible. This handles exceptional 11062306a36Sopenharmony_ci * entries for invalidate_inode_pages(). 11162306a36Sopenharmony_ci */ 11262306a36Sopenharmony_cistatic int invalidate_exceptional_entry(struct address_space *mapping, 11362306a36Sopenharmony_ci pgoff_t index, void *entry) 11462306a36Sopenharmony_ci{ 11562306a36Sopenharmony_ci /* Handled by shmem itself, or for DAX we do nothing. */ 11662306a36Sopenharmony_ci if (shmem_mapping(mapping) || dax_mapping(mapping)) 11762306a36Sopenharmony_ci return 1; 11862306a36Sopenharmony_ci clear_shadow_entry(mapping, index, entry); 11962306a36Sopenharmony_ci return 1; 12062306a36Sopenharmony_ci} 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci/* 12362306a36Sopenharmony_ci * Invalidate exceptional entry if clean. This handles exceptional entries for 12462306a36Sopenharmony_ci * invalidate_inode_pages2() so for DAX it evicts only clean entries. 12562306a36Sopenharmony_ci */ 12662306a36Sopenharmony_cistatic int invalidate_exceptional_entry2(struct address_space *mapping, 12762306a36Sopenharmony_ci pgoff_t index, void *entry) 12862306a36Sopenharmony_ci{ 12962306a36Sopenharmony_ci /* Handled by shmem itself */ 13062306a36Sopenharmony_ci if (shmem_mapping(mapping)) 13162306a36Sopenharmony_ci return 1; 13262306a36Sopenharmony_ci if (dax_mapping(mapping)) 13362306a36Sopenharmony_ci return dax_invalidate_mapping_entry_sync(mapping, index); 13462306a36Sopenharmony_ci clear_shadow_entry(mapping, index, entry); 13562306a36Sopenharmony_ci return 1; 13662306a36Sopenharmony_ci} 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci/** 13962306a36Sopenharmony_ci * folio_invalidate - Invalidate part or all of a folio. 14062306a36Sopenharmony_ci * @folio: The folio which is affected. 14162306a36Sopenharmony_ci * @offset: start of the range to invalidate 14262306a36Sopenharmony_ci * @length: length of the range to invalidate 14362306a36Sopenharmony_ci * 14462306a36Sopenharmony_ci * folio_invalidate() is called when all or part of the folio has become 14562306a36Sopenharmony_ci * invalidated by a truncate operation. 14662306a36Sopenharmony_ci * 14762306a36Sopenharmony_ci * folio_invalidate() does not have to release all buffers, but it must 14862306a36Sopenharmony_ci * ensure that no dirty buffer is left outside @offset and that no I/O 14962306a36Sopenharmony_ci * is underway against any of the blocks which are outside the truncation 15062306a36Sopenharmony_ci * point. Because the caller is about to free (and possibly reuse) those 15162306a36Sopenharmony_ci * blocks on-disk. 15262306a36Sopenharmony_ci */ 15362306a36Sopenharmony_civoid folio_invalidate(struct folio *folio, size_t offset, size_t length) 15462306a36Sopenharmony_ci{ 15562306a36Sopenharmony_ci const struct address_space_operations *aops = folio->mapping->a_ops; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci if (aops->invalidate_folio) 15862306a36Sopenharmony_ci aops->invalidate_folio(folio, offset, length); 15962306a36Sopenharmony_ci} 16062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(folio_invalidate); 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci/* 16362306a36Sopenharmony_ci * If truncate cannot remove the fs-private metadata from the page, the page 16462306a36Sopenharmony_ci * becomes orphaned. It will be left on the LRU and may even be mapped into 16562306a36Sopenharmony_ci * user pagetables if we're racing with filemap_fault(). 16662306a36Sopenharmony_ci * 16762306a36Sopenharmony_ci * We need to bail out if page->mapping is no longer equal to the original 16862306a36Sopenharmony_ci * mapping. This happens a) when the VM reclaimed the page while we waited on 16962306a36Sopenharmony_ci * its lock, b) when a concurrent invalidate_mapping_pages got there first and 17062306a36Sopenharmony_ci * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 17162306a36Sopenharmony_ci */ 17262306a36Sopenharmony_cistatic void truncate_cleanup_folio(struct folio *folio) 17362306a36Sopenharmony_ci{ 17462306a36Sopenharmony_ci if (folio_mapped(folio)) 17562306a36Sopenharmony_ci unmap_mapping_folio(folio); 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci if (folio_has_private(folio)) 17862306a36Sopenharmony_ci folio_invalidate(folio, 0, folio_size(folio)); 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci /* 18162306a36Sopenharmony_ci * Some filesystems seem to re-dirty the page even after 18262306a36Sopenharmony_ci * the VM has canceled the dirty bit (eg ext3 journaling). 18362306a36Sopenharmony_ci * Hence dirty accounting check is placed after invalidation. 18462306a36Sopenharmony_ci */ 18562306a36Sopenharmony_ci folio_cancel_dirty(folio); 18662306a36Sopenharmony_ci folio_clear_mappedtodisk(folio); 18762306a36Sopenharmony_ci} 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ciint truncate_inode_folio(struct address_space *mapping, struct folio *folio) 19062306a36Sopenharmony_ci{ 19162306a36Sopenharmony_ci if (folio->mapping != mapping) 19262306a36Sopenharmony_ci return -EIO; 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci truncate_cleanup_folio(folio); 19562306a36Sopenharmony_ci filemap_remove_folio(folio); 19662306a36Sopenharmony_ci return 0; 19762306a36Sopenharmony_ci} 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci/* 20062306a36Sopenharmony_ci * Handle partial folios. The folio may be entirely within the 20162306a36Sopenharmony_ci * range if a split has raced with us. If not, we zero the part of the 20262306a36Sopenharmony_ci * folio that's within the [start, end] range, and then split the folio if 20362306a36Sopenharmony_ci * it's large. split_page_range() will discard pages which now lie beyond 20462306a36Sopenharmony_ci * i_size, and we rely on the caller to discard pages which lie within a 20562306a36Sopenharmony_ci * newly created hole. 20662306a36Sopenharmony_ci * 20762306a36Sopenharmony_ci * Returns false if splitting failed so the caller can avoid 20862306a36Sopenharmony_ci * discarding the entire folio which is stubbornly unsplit. 20962306a36Sopenharmony_ci */ 21062306a36Sopenharmony_cibool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci loff_t pos = folio_pos(folio); 21362306a36Sopenharmony_ci unsigned int offset, length; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci if (pos < start) 21662306a36Sopenharmony_ci offset = start - pos; 21762306a36Sopenharmony_ci else 21862306a36Sopenharmony_ci offset = 0; 21962306a36Sopenharmony_ci length = folio_size(folio); 22062306a36Sopenharmony_ci if (pos + length <= (u64)end) 22162306a36Sopenharmony_ci length = length - offset; 22262306a36Sopenharmony_ci else 22362306a36Sopenharmony_ci length = end + 1 - pos - offset; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci folio_wait_writeback(folio); 22662306a36Sopenharmony_ci if (length == folio_size(folio)) { 22762306a36Sopenharmony_ci truncate_inode_folio(folio->mapping, folio); 22862306a36Sopenharmony_ci return true; 22962306a36Sopenharmony_ci } 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci /* 23262306a36Sopenharmony_ci * We may be zeroing pages we're about to discard, but it avoids 23362306a36Sopenharmony_ci * doing a complex calculation here, and then doing the zeroing 23462306a36Sopenharmony_ci * anyway if the page split fails. 23562306a36Sopenharmony_ci */ 23662306a36Sopenharmony_ci folio_zero_range(folio, offset, length); 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci if (folio_has_private(folio)) 23962306a36Sopenharmony_ci folio_invalidate(folio, offset, length); 24062306a36Sopenharmony_ci if (!folio_test_large(folio)) 24162306a36Sopenharmony_ci return true; 24262306a36Sopenharmony_ci if (split_folio(folio) == 0) 24362306a36Sopenharmony_ci return true; 24462306a36Sopenharmony_ci if (folio_test_dirty(folio)) 24562306a36Sopenharmony_ci return false; 24662306a36Sopenharmony_ci truncate_inode_folio(folio->mapping, folio); 24762306a36Sopenharmony_ci return true; 24862306a36Sopenharmony_ci} 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci/* 25162306a36Sopenharmony_ci * Used to get rid of pages on hardware memory corruption. 25262306a36Sopenharmony_ci */ 25362306a36Sopenharmony_ciint generic_error_remove_page(struct address_space *mapping, struct page *page) 25462306a36Sopenharmony_ci{ 25562306a36Sopenharmony_ci VM_BUG_ON_PAGE(PageTail(page), page); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci if (!mapping) 25862306a36Sopenharmony_ci return -EINVAL; 25962306a36Sopenharmony_ci /* 26062306a36Sopenharmony_ci * Only punch for normal data pages for now. 26162306a36Sopenharmony_ci * Handling other types like directories would need more auditing. 26262306a36Sopenharmony_ci */ 26362306a36Sopenharmony_ci if (!S_ISREG(mapping->host->i_mode)) 26462306a36Sopenharmony_ci return -EIO; 26562306a36Sopenharmony_ci return truncate_inode_folio(mapping, page_folio(page)); 26662306a36Sopenharmony_ci} 26762306a36Sopenharmony_ciEXPORT_SYMBOL(generic_error_remove_page); 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_cistatic long mapping_evict_folio(struct address_space *mapping, 27062306a36Sopenharmony_ci struct folio *folio) 27162306a36Sopenharmony_ci{ 27262306a36Sopenharmony_ci if (folio_test_dirty(folio) || folio_test_writeback(folio)) 27362306a36Sopenharmony_ci return 0; 27462306a36Sopenharmony_ci /* The refcount will be elevated if any page in the folio is mapped */ 27562306a36Sopenharmony_ci if (folio_ref_count(folio) > 27662306a36Sopenharmony_ci folio_nr_pages(folio) + folio_has_private(folio) + 1) 27762306a36Sopenharmony_ci return 0; 27862306a36Sopenharmony_ci if (!filemap_release_folio(folio, 0)) 27962306a36Sopenharmony_ci return 0; 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci return remove_mapping(mapping, folio); 28262306a36Sopenharmony_ci} 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci/** 28562306a36Sopenharmony_ci * invalidate_inode_page() - Remove an unused page from the pagecache. 28662306a36Sopenharmony_ci * @page: The page to remove. 28762306a36Sopenharmony_ci * 28862306a36Sopenharmony_ci * Safely invalidate one page from its pagecache mapping. 28962306a36Sopenharmony_ci * It only drops clean, unused pages. 29062306a36Sopenharmony_ci * 29162306a36Sopenharmony_ci * Context: Page must be locked. 29262306a36Sopenharmony_ci * Return: The number of pages successfully removed. 29362306a36Sopenharmony_ci */ 29462306a36Sopenharmony_cilong invalidate_inode_page(struct page *page) 29562306a36Sopenharmony_ci{ 29662306a36Sopenharmony_ci struct folio *folio = page_folio(page); 29762306a36Sopenharmony_ci struct address_space *mapping = folio_mapping(folio); 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci /* The page may have been truncated before it was locked */ 30062306a36Sopenharmony_ci if (!mapping) 30162306a36Sopenharmony_ci return 0; 30262306a36Sopenharmony_ci return mapping_evict_folio(mapping, folio); 30362306a36Sopenharmony_ci} 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci/** 30662306a36Sopenharmony_ci * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets 30762306a36Sopenharmony_ci * @mapping: mapping to truncate 30862306a36Sopenharmony_ci * @lstart: offset from which to truncate 30962306a36Sopenharmony_ci * @lend: offset to which to truncate (inclusive) 31062306a36Sopenharmony_ci * 31162306a36Sopenharmony_ci * Truncate the page cache, removing the pages that are between 31262306a36Sopenharmony_ci * specified offsets (and zeroing out partial pages 31362306a36Sopenharmony_ci * if lstart or lend + 1 is not page aligned). 31462306a36Sopenharmony_ci * 31562306a36Sopenharmony_ci * Truncate takes two passes - the first pass is nonblocking. It will not 31662306a36Sopenharmony_ci * block on page locks and it will not block on writeback. The second pass 31762306a36Sopenharmony_ci * will wait. This is to prevent as much IO as possible in the affected region. 31862306a36Sopenharmony_ci * The first pass will remove most pages, so the search cost of the second pass 31962306a36Sopenharmony_ci * is low. 32062306a36Sopenharmony_ci * 32162306a36Sopenharmony_ci * We pass down the cache-hot hint to the page freeing code. Even if the 32262306a36Sopenharmony_ci * mapping is large, it is probably the case that the final pages are the most 32362306a36Sopenharmony_ci * recently touched, and freeing happens in ascending file offset order. 32462306a36Sopenharmony_ci * 32562306a36Sopenharmony_ci * Note that since ->invalidate_folio() accepts range to invalidate 32662306a36Sopenharmony_ci * truncate_inode_pages_range is able to handle cases where lend + 1 is not 32762306a36Sopenharmony_ci * page aligned properly. 32862306a36Sopenharmony_ci */ 32962306a36Sopenharmony_civoid truncate_inode_pages_range(struct address_space *mapping, 33062306a36Sopenharmony_ci loff_t lstart, loff_t lend) 33162306a36Sopenharmony_ci{ 33262306a36Sopenharmony_ci pgoff_t start; /* inclusive */ 33362306a36Sopenharmony_ci pgoff_t end; /* exclusive */ 33462306a36Sopenharmony_ci struct folio_batch fbatch; 33562306a36Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 33662306a36Sopenharmony_ci pgoff_t index; 33762306a36Sopenharmony_ci int i; 33862306a36Sopenharmony_ci struct folio *folio; 33962306a36Sopenharmony_ci bool same_folio; 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci if (mapping_empty(mapping)) 34262306a36Sopenharmony_ci return; 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci /* 34562306a36Sopenharmony_ci * 'start' and 'end' always covers the range of pages to be fully 34662306a36Sopenharmony_ci * truncated. Partial pages are covered with 'partial_start' at the 34762306a36Sopenharmony_ci * start of the range and 'partial_end' at the end of the range. 34862306a36Sopenharmony_ci * Note that 'end' is exclusive while 'lend' is inclusive. 34962306a36Sopenharmony_ci */ 35062306a36Sopenharmony_ci start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; 35162306a36Sopenharmony_ci if (lend == -1) 35262306a36Sopenharmony_ci /* 35362306a36Sopenharmony_ci * lend == -1 indicates end-of-file so we have to set 'end' 35462306a36Sopenharmony_ci * to the highest possible pgoff_t and since the type is 35562306a36Sopenharmony_ci * unsigned we're using -1. 35662306a36Sopenharmony_ci */ 35762306a36Sopenharmony_ci end = -1; 35862306a36Sopenharmony_ci else 35962306a36Sopenharmony_ci end = (lend + 1) >> PAGE_SHIFT; 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci folio_batch_init(&fbatch); 36262306a36Sopenharmony_ci index = start; 36362306a36Sopenharmony_ci while (index < end && find_lock_entries(mapping, &index, end - 1, 36462306a36Sopenharmony_ci &fbatch, indices)) { 36562306a36Sopenharmony_ci truncate_folio_batch_exceptionals(mapping, &fbatch, indices); 36662306a36Sopenharmony_ci for (i = 0; i < folio_batch_count(&fbatch); i++) 36762306a36Sopenharmony_ci truncate_cleanup_folio(fbatch.folios[i]); 36862306a36Sopenharmony_ci delete_from_page_cache_batch(mapping, &fbatch); 36962306a36Sopenharmony_ci for (i = 0; i < folio_batch_count(&fbatch); i++) 37062306a36Sopenharmony_ci folio_unlock(fbatch.folios[i]); 37162306a36Sopenharmony_ci folio_batch_release(&fbatch); 37262306a36Sopenharmony_ci cond_resched(); 37362306a36Sopenharmony_ci } 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); 37662306a36Sopenharmony_ci folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0); 37762306a36Sopenharmony_ci if (!IS_ERR(folio)) { 37862306a36Sopenharmony_ci same_folio = lend < folio_pos(folio) + folio_size(folio); 37962306a36Sopenharmony_ci if (!truncate_inode_partial_folio(folio, lstart, lend)) { 38062306a36Sopenharmony_ci start = folio_next_index(folio); 38162306a36Sopenharmony_ci if (same_folio) 38262306a36Sopenharmony_ci end = folio->index; 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci folio_unlock(folio); 38562306a36Sopenharmony_ci folio_put(folio); 38662306a36Sopenharmony_ci folio = NULL; 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci if (!same_folio) { 39062306a36Sopenharmony_ci folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT, 39162306a36Sopenharmony_ci FGP_LOCK, 0); 39262306a36Sopenharmony_ci if (!IS_ERR(folio)) { 39362306a36Sopenharmony_ci if (!truncate_inode_partial_folio(folio, lstart, lend)) 39462306a36Sopenharmony_ci end = folio->index; 39562306a36Sopenharmony_ci folio_unlock(folio); 39662306a36Sopenharmony_ci folio_put(folio); 39762306a36Sopenharmony_ci } 39862306a36Sopenharmony_ci } 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci index = start; 40162306a36Sopenharmony_ci while (index < end) { 40262306a36Sopenharmony_ci cond_resched(); 40362306a36Sopenharmony_ci if (!find_get_entries(mapping, &index, end - 1, &fbatch, 40462306a36Sopenharmony_ci indices)) { 40562306a36Sopenharmony_ci /* If all gone from start onwards, we're done */ 40662306a36Sopenharmony_ci if (index == start) 40762306a36Sopenharmony_ci break; 40862306a36Sopenharmony_ci /* Otherwise restart to make sure all gone */ 40962306a36Sopenharmony_ci index = start; 41062306a36Sopenharmony_ci continue; 41162306a36Sopenharmony_ci } 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci for (i = 0; i < folio_batch_count(&fbatch); i++) { 41462306a36Sopenharmony_ci struct folio *folio = fbatch.folios[i]; 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci /* We rely upon deletion not changing page->index */ 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci if (xa_is_value(folio)) 41962306a36Sopenharmony_ci continue; 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci folio_lock(folio); 42262306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio); 42362306a36Sopenharmony_ci folio_wait_writeback(folio); 42462306a36Sopenharmony_ci truncate_inode_folio(mapping, folio); 42562306a36Sopenharmony_ci folio_unlock(folio); 42662306a36Sopenharmony_ci } 42762306a36Sopenharmony_ci truncate_folio_batch_exceptionals(mapping, &fbatch, indices); 42862306a36Sopenharmony_ci folio_batch_release(&fbatch); 42962306a36Sopenharmony_ci } 43062306a36Sopenharmony_ci} 43162306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_inode_pages_range); 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci/** 43462306a36Sopenharmony_ci * truncate_inode_pages - truncate *all* the pages from an offset 43562306a36Sopenharmony_ci * @mapping: mapping to truncate 43662306a36Sopenharmony_ci * @lstart: offset from which to truncate 43762306a36Sopenharmony_ci * 43862306a36Sopenharmony_ci * Called under (and serialised by) inode->i_rwsem and 43962306a36Sopenharmony_ci * mapping->invalidate_lock. 44062306a36Sopenharmony_ci * 44162306a36Sopenharmony_ci * Note: When this function returns, there can be a page in the process of 44262306a36Sopenharmony_ci * deletion (inside __filemap_remove_folio()) in the specified range. Thus 44362306a36Sopenharmony_ci * mapping->nrpages can be non-zero when this function returns even after 44462306a36Sopenharmony_ci * truncation of the whole mapping. 44562306a36Sopenharmony_ci */ 44662306a36Sopenharmony_civoid truncate_inode_pages(struct address_space *mapping, loff_t lstart) 44762306a36Sopenharmony_ci{ 44862306a36Sopenharmony_ci truncate_inode_pages_range(mapping, lstart, (loff_t)-1); 44962306a36Sopenharmony_ci} 45062306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_inode_pages); 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci/** 45362306a36Sopenharmony_ci * truncate_inode_pages_final - truncate *all* pages before inode dies 45462306a36Sopenharmony_ci * @mapping: mapping to truncate 45562306a36Sopenharmony_ci * 45662306a36Sopenharmony_ci * Called under (and serialized by) inode->i_rwsem. 45762306a36Sopenharmony_ci * 45862306a36Sopenharmony_ci * Filesystems have to use this in the .evict_inode path to inform the 45962306a36Sopenharmony_ci * VM that this is the final truncate and the inode is going away. 46062306a36Sopenharmony_ci */ 46162306a36Sopenharmony_civoid truncate_inode_pages_final(struct address_space *mapping) 46262306a36Sopenharmony_ci{ 46362306a36Sopenharmony_ci /* 46462306a36Sopenharmony_ci * Page reclaim can not participate in regular inode lifetime 46562306a36Sopenharmony_ci * management (can't call iput()) and thus can race with the 46662306a36Sopenharmony_ci * inode teardown. Tell it when the address space is exiting, 46762306a36Sopenharmony_ci * so that it does not install eviction information after the 46862306a36Sopenharmony_ci * final truncate has begun. 46962306a36Sopenharmony_ci */ 47062306a36Sopenharmony_ci mapping_set_exiting(mapping); 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci if (!mapping_empty(mapping)) { 47362306a36Sopenharmony_ci /* 47462306a36Sopenharmony_ci * As truncation uses a lockless tree lookup, cycle 47562306a36Sopenharmony_ci * the tree lock to make sure any ongoing tree 47662306a36Sopenharmony_ci * modification that does not see AS_EXITING is 47762306a36Sopenharmony_ci * completed before starting the final truncate. 47862306a36Sopenharmony_ci */ 47962306a36Sopenharmony_ci xa_lock_irq(&mapping->i_pages); 48062306a36Sopenharmony_ci xa_unlock_irq(&mapping->i_pages); 48162306a36Sopenharmony_ci } 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci truncate_inode_pages(mapping, 0); 48462306a36Sopenharmony_ci} 48562306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_inode_pages_final); 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci/** 48862306a36Sopenharmony_ci * mapping_try_invalidate - Invalidate all the evictable folios of one inode 48962306a36Sopenharmony_ci * @mapping: the address_space which holds the folios to invalidate 49062306a36Sopenharmony_ci * @start: the offset 'from' which to invalidate 49162306a36Sopenharmony_ci * @end: the offset 'to' which to invalidate (inclusive) 49262306a36Sopenharmony_ci * @nr_failed: How many folio invalidations failed 49362306a36Sopenharmony_ci * 49462306a36Sopenharmony_ci * This function is similar to invalidate_mapping_pages(), except that it 49562306a36Sopenharmony_ci * returns the number of folios which could not be evicted in @nr_failed. 49662306a36Sopenharmony_ci */ 49762306a36Sopenharmony_ciunsigned long mapping_try_invalidate(struct address_space *mapping, 49862306a36Sopenharmony_ci pgoff_t start, pgoff_t end, unsigned long *nr_failed) 49962306a36Sopenharmony_ci{ 50062306a36Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 50162306a36Sopenharmony_ci struct folio_batch fbatch; 50262306a36Sopenharmony_ci pgoff_t index = start; 50362306a36Sopenharmony_ci unsigned long ret; 50462306a36Sopenharmony_ci unsigned long count = 0; 50562306a36Sopenharmony_ci int i; 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci folio_batch_init(&fbatch); 50862306a36Sopenharmony_ci while (find_lock_entries(mapping, &index, end, &fbatch, indices)) { 50962306a36Sopenharmony_ci for (i = 0; i < folio_batch_count(&fbatch); i++) { 51062306a36Sopenharmony_ci struct folio *folio = fbatch.folios[i]; 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci /* We rely upon deletion not changing folio->index */ 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci if (xa_is_value(folio)) { 51562306a36Sopenharmony_ci count += invalidate_exceptional_entry(mapping, 51662306a36Sopenharmony_ci indices[i], folio); 51762306a36Sopenharmony_ci continue; 51862306a36Sopenharmony_ci } 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci ret = mapping_evict_folio(mapping, folio); 52162306a36Sopenharmony_ci folio_unlock(folio); 52262306a36Sopenharmony_ci /* 52362306a36Sopenharmony_ci * Invalidation is a hint that the folio is no longer 52462306a36Sopenharmony_ci * of interest and try to speed up its reclaim. 52562306a36Sopenharmony_ci */ 52662306a36Sopenharmony_ci if (!ret) { 52762306a36Sopenharmony_ci deactivate_file_folio(folio); 52862306a36Sopenharmony_ci /* Likely in the lru cache of a remote CPU */ 52962306a36Sopenharmony_ci if (nr_failed) 53062306a36Sopenharmony_ci (*nr_failed)++; 53162306a36Sopenharmony_ci } 53262306a36Sopenharmony_ci count += ret; 53362306a36Sopenharmony_ci } 53462306a36Sopenharmony_ci folio_batch_remove_exceptionals(&fbatch); 53562306a36Sopenharmony_ci folio_batch_release(&fbatch); 53662306a36Sopenharmony_ci cond_resched(); 53762306a36Sopenharmony_ci } 53862306a36Sopenharmony_ci return count; 53962306a36Sopenharmony_ci} 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci/** 54262306a36Sopenharmony_ci * invalidate_mapping_pages - Invalidate all clean, unlocked cache of one inode 54362306a36Sopenharmony_ci * @mapping: the address_space which holds the cache to invalidate 54462306a36Sopenharmony_ci * @start: the offset 'from' which to invalidate 54562306a36Sopenharmony_ci * @end: the offset 'to' which to invalidate (inclusive) 54662306a36Sopenharmony_ci * 54762306a36Sopenharmony_ci * This function removes pages that are clean, unmapped and unlocked, 54862306a36Sopenharmony_ci * as well as shadow entries. It will not block on IO activity. 54962306a36Sopenharmony_ci * 55062306a36Sopenharmony_ci * If you want to remove all the pages of one inode, regardless of 55162306a36Sopenharmony_ci * their use and writeback state, use truncate_inode_pages(). 55262306a36Sopenharmony_ci * 55362306a36Sopenharmony_ci * Return: The number of indices that had their contents invalidated 55462306a36Sopenharmony_ci */ 55562306a36Sopenharmony_ciunsigned long invalidate_mapping_pages(struct address_space *mapping, 55662306a36Sopenharmony_ci pgoff_t start, pgoff_t end) 55762306a36Sopenharmony_ci{ 55862306a36Sopenharmony_ci return mapping_try_invalidate(mapping, start, end, NULL); 55962306a36Sopenharmony_ci} 56062306a36Sopenharmony_ciEXPORT_SYMBOL(invalidate_mapping_pages); 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci/* 56362306a36Sopenharmony_ci * This is like invalidate_inode_page(), except it ignores the page's 56462306a36Sopenharmony_ci * refcount. We do this because invalidate_inode_pages2() needs stronger 56562306a36Sopenharmony_ci * invalidation guarantees, and cannot afford to leave pages behind because 56662306a36Sopenharmony_ci * shrink_page_list() has a temp ref on them, or because they're transiently 56762306a36Sopenharmony_ci * sitting in the folio_add_lru() caches. 56862306a36Sopenharmony_ci */ 56962306a36Sopenharmony_cistatic int invalidate_complete_folio2(struct address_space *mapping, 57062306a36Sopenharmony_ci struct folio *folio) 57162306a36Sopenharmony_ci{ 57262306a36Sopenharmony_ci if (folio->mapping != mapping) 57362306a36Sopenharmony_ci return 0; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci if (!filemap_release_folio(folio, GFP_KERNEL)) 57662306a36Sopenharmony_ci return 0; 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci spin_lock(&mapping->host->i_lock); 57962306a36Sopenharmony_ci xa_lock_irq(&mapping->i_pages); 58062306a36Sopenharmony_ci if (folio_test_dirty(folio)) 58162306a36Sopenharmony_ci goto failed; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci BUG_ON(folio_has_private(folio)); 58462306a36Sopenharmony_ci __filemap_remove_folio(folio, NULL); 58562306a36Sopenharmony_ci xa_unlock_irq(&mapping->i_pages); 58662306a36Sopenharmony_ci if (mapping_shrinkable(mapping)) 58762306a36Sopenharmony_ci inode_add_lru(mapping->host); 58862306a36Sopenharmony_ci spin_unlock(&mapping->host->i_lock); 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci filemap_free_folio(mapping, folio); 59162306a36Sopenharmony_ci return 1; 59262306a36Sopenharmony_cifailed: 59362306a36Sopenharmony_ci xa_unlock_irq(&mapping->i_pages); 59462306a36Sopenharmony_ci spin_unlock(&mapping->host->i_lock); 59562306a36Sopenharmony_ci return 0; 59662306a36Sopenharmony_ci} 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_cistatic int folio_launder(struct address_space *mapping, struct folio *folio) 59962306a36Sopenharmony_ci{ 60062306a36Sopenharmony_ci if (!folio_test_dirty(folio)) 60162306a36Sopenharmony_ci return 0; 60262306a36Sopenharmony_ci if (folio->mapping != mapping || mapping->a_ops->launder_folio == NULL) 60362306a36Sopenharmony_ci return 0; 60462306a36Sopenharmony_ci return mapping->a_ops->launder_folio(folio); 60562306a36Sopenharmony_ci} 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci/** 60862306a36Sopenharmony_ci * invalidate_inode_pages2_range - remove range of pages from an address_space 60962306a36Sopenharmony_ci * @mapping: the address_space 61062306a36Sopenharmony_ci * @start: the page offset 'from' which to invalidate 61162306a36Sopenharmony_ci * @end: the page offset 'to' which to invalidate (inclusive) 61262306a36Sopenharmony_ci * 61362306a36Sopenharmony_ci * Any pages which are found to be mapped into pagetables are unmapped prior to 61462306a36Sopenharmony_ci * invalidation. 61562306a36Sopenharmony_ci * 61662306a36Sopenharmony_ci * Return: -EBUSY if any pages could not be invalidated. 61762306a36Sopenharmony_ci */ 61862306a36Sopenharmony_ciint invalidate_inode_pages2_range(struct address_space *mapping, 61962306a36Sopenharmony_ci pgoff_t start, pgoff_t end) 62062306a36Sopenharmony_ci{ 62162306a36Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 62262306a36Sopenharmony_ci struct folio_batch fbatch; 62362306a36Sopenharmony_ci pgoff_t index; 62462306a36Sopenharmony_ci int i; 62562306a36Sopenharmony_ci int ret = 0; 62662306a36Sopenharmony_ci int ret2 = 0; 62762306a36Sopenharmony_ci int did_range_unmap = 0; 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci if (mapping_empty(mapping)) 63062306a36Sopenharmony_ci return 0; 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci folio_batch_init(&fbatch); 63362306a36Sopenharmony_ci index = start; 63462306a36Sopenharmony_ci while (find_get_entries(mapping, &index, end, &fbatch, indices)) { 63562306a36Sopenharmony_ci for (i = 0; i < folio_batch_count(&fbatch); i++) { 63662306a36Sopenharmony_ci struct folio *folio = fbatch.folios[i]; 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci /* We rely upon deletion not changing folio->index */ 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci if (xa_is_value(folio)) { 64162306a36Sopenharmony_ci if (!invalidate_exceptional_entry2(mapping, 64262306a36Sopenharmony_ci indices[i], folio)) 64362306a36Sopenharmony_ci ret = -EBUSY; 64462306a36Sopenharmony_ci continue; 64562306a36Sopenharmony_ci } 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci if (!did_range_unmap && folio_mapped(folio)) { 64862306a36Sopenharmony_ci /* 64962306a36Sopenharmony_ci * If folio is mapped, before taking its lock, 65062306a36Sopenharmony_ci * zap the rest of the file in one hit. 65162306a36Sopenharmony_ci */ 65262306a36Sopenharmony_ci unmap_mapping_pages(mapping, indices[i], 65362306a36Sopenharmony_ci (1 + end - indices[i]), false); 65462306a36Sopenharmony_ci did_range_unmap = 1; 65562306a36Sopenharmony_ci } 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci folio_lock(folio); 65862306a36Sopenharmony_ci if (unlikely(folio->mapping != mapping)) { 65962306a36Sopenharmony_ci folio_unlock(folio); 66062306a36Sopenharmony_ci continue; 66162306a36Sopenharmony_ci } 66262306a36Sopenharmony_ci VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio); 66362306a36Sopenharmony_ci folio_wait_writeback(folio); 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci if (folio_mapped(folio)) 66662306a36Sopenharmony_ci unmap_mapping_folio(folio); 66762306a36Sopenharmony_ci BUG_ON(folio_mapped(folio)); 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci ret2 = folio_launder(mapping, folio); 67062306a36Sopenharmony_ci if (ret2 == 0) { 67162306a36Sopenharmony_ci if (!invalidate_complete_folio2(mapping, folio)) 67262306a36Sopenharmony_ci ret2 = -EBUSY; 67362306a36Sopenharmony_ci } 67462306a36Sopenharmony_ci if (ret2 < 0) 67562306a36Sopenharmony_ci ret = ret2; 67662306a36Sopenharmony_ci folio_unlock(folio); 67762306a36Sopenharmony_ci } 67862306a36Sopenharmony_ci folio_batch_remove_exceptionals(&fbatch); 67962306a36Sopenharmony_ci folio_batch_release(&fbatch); 68062306a36Sopenharmony_ci cond_resched(); 68162306a36Sopenharmony_ci } 68262306a36Sopenharmony_ci /* 68362306a36Sopenharmony_ci * For DAX we invalidate page tables after invalidating page cache. We 68462306a36Sopenharmony_ci * could invalidate page tables while invalidating each entry however 68562306a36Sopenharmony_ci * that would be expensive. And doing range unmapping before doesn't 68662306a36Sopenharmony_ci * work as we have no cheap way to find whether page cache entry didn't 68762306a36Sopenharmony_ci * get remapped later. 68862306a36Sopenharmony_ci */ 68962306a36Sopenharmony_ci if (dax_mapping(mapping)) { 69062306a36Sopenharmony_ci unmap_mapping_pages(mapping, start, end - start + 1, false); 69162306a36Sopenharmony_ci } 69262306a36Sopenharmony_ci return ret; 69362306a36Sopenharmony_ci} 69462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci/** 69762306a36Sopenharmony_ci * invalidate_inode_pages2 - remove all pages from an address_space 69862306a36Sopenharmony_ci * @mapping: the address_space 69962306a36Sopenharmony_ci * 70062306a36Sopenharmony_ci * Any pages which are found to be mapped into pagetables are unmapped prior to 70162306a36Sopenharmony_ci * invalidation. 70262306a36Sopenharmony_ci * 70362306a36Sopenharmony_ci * Return: -EBUSY if any pages could not be invalidated. 70462306a36Sopenharmony_ci */ 70562306a36Sopenharmony_ciint invalidate_inode_pages2(struct address_space *mapping) 70662306a36Sopenharmony_ci{ 70762306a36Sopenharmony_ci return invalidate_inode_pages2_range(mapping, 0, -1); 70862306a36Sopenharmony_ci} 70962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(invalidate_inode_pages2); 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci/** 71262306a36Sopenharmony_ci * truncate_pagecache - unmap and remove pagecache that has been truncated 71362306a36Sopenharmony_ci * @inode: inode 71462306a36Sopenharmony_ci * @newsize: new file size 71562306a36Sopenharmony_ci * 71662306a36Sopenharmony_ci * inode's new i_size must already be written before truncate_pagecache 71762306a36Sopenharmony_ci * is called. 71862306a36Sopenharmony_ci * 71962306a36Sopenharmony_ci * This function should typically be called before the filesystem 72062306a36Sopenharmony_ci * releases resources associated with the freed range (eg. deallocates 72162306a36Sopenharmony_ci * blocks). This way, pagecache will always stay logically coherent 72262306a36Sopenharmony_ci * with on-disk format, and the filesystem would not have to deal with 72362306a36Sopenharmony_ci * situations such as writepage being called for a page that has already 72462306a36Sopenharmony_ci * had its underlying blocks deallocated. 72562306a36Sopenharmony_ci */ 72662306a36Sopenharmony_civoid truncate_pagecache(struct inode *inode, loff_t newsize) 72762306a36Sopenharmony_ci{ 72862306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 72962306a36Sopenharmony_ci loff_t holebegin = round_up(newsize, PAGE_SIZE); 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci /* 73262306a36Sopenharmony_ci * unmap_mapping_range is called twice, first simply for 73362306a36Sopenharmony_ci * efficiency so that truncate_inode_pages does fewer 73462306a36Sopenharmony_ci * single-page unmaps. However after this first call, and 73562306a36Sopenharmony_ci * before truncate_inode_pages finishes, it is possible for 73662306a36Sopenharmony_ci * private pages to be COWed, which remain after 73762306a36Sopenharmony_ci * truncate_inode_pages finishes, hence the second 73862306a36Sopenharmony_ci * unmap_mapping_range call must be made for correctness. 73962306a36Sopenharmony_ci */ 74062306a36Sopenharmony_ci unmap_mapping_range(mapping, holebegin, 0, 1); 74162306a36Sopenharmony_ci truncate_inode_pages(mapping, newsize); 74262306a36Sopenharmony_ci unmap_mapping_range(mapping, holebegin, 0, 1); 74362306a36Sopenharmony_ci} 74462306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_pagecache); 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci/** 74762306a36Sopenharmony_ci * truncate_setsize - update inode and pagecache for a new file size 74862306a36Sopenharmony_ci * @inode: inode 74962306a36Sopenharmony_ci * @newsize: new file size 75062306a36Sopenharmony_ci * 75162306a36Sopenharmony_ci * truncate_setsize updates i_size and performs pagecache truncation (if 75262306a36Sopenharmony_ci * necessary) to @newsize. It will be typically be called from the filesystem's 75362306a36Sopenharmony_ci * setattr function when ATTR_SIZE is passed in. 75462306a36Sopenharmony_ci * 75562306a36Sopenharmony_ci * Must be called with a lock serializing truncates and writes (generally 75662306a36Sopenharmony_ci * i_rwsem but e.g. xfs uses a different lock) and before all filesystem 75762306a36Sopenharmony_ci * specific block truncation has been performed. 75862306a36Sopenharmony_ci */ 75962306a36Sopenharmony_civoid truncate_setsize(struct inode *inode, loff_t newsize) 76062306a36Sopenharmony_ci{ 76162306a36Sopenharmony_ci loff_t oldsize = inode->i_size; 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci i_size_write(inode, newsize); 76462306a36Sopenharmony_ci if (newsize > oldsize) 76562306a36Sopenharmony_ci pagecache_isize_extended(inode, oldsize, newsize); 76662306a36Sopenharmony_ci truncate_pagecache(inode, newsize); 76762306a36Sopenharmony_ci} 76862306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_setsize); 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci/** 77162306a36Sopenharmony_ci * pagecache_isize_extended - update pagecache after extension of i_size 77262306a36Sopenharmony_ci * @inode: inode for which i_size was extended 77362306a36Sopenharmony_ci * @from: original inode size 77462306a36Sopenharmony_ci * @to: new inode size 77562306a36Sopenharmony_ci * 77662306a36Sopenharmony_ci * Handle extension of inode size either caused by extending truncate or by 77762306a36Sopenharmony_ci * write starting after current i_size. We mark the page straddling current 77862306a36Sopenharmony_ci * i_size RO so that page_mkwrite() is called on the nearest write access to 77962306a36Sopenharmony_ci * the page. This way filesystem can be sure that page_mkwrite() is called on 78062306a36Sopenharmony_ci * the page before user writes to the page via mmap after the i_size has been 78162306a36Sopenharmony_ci * changed. 78262306a36Sopenharmony_ci * 78362306a36Sopenharmony_ci * The function must be called after i_size is updated so that page fault 78462306a36Sopenharmony_ci * coming after we unlock the page will already see the new i_size. 78562306a36Sopenharmony_ci * The function must be called while we still hold i_rwsem - this not only 78662306a36Sopenharmony_ci * makes sure i_size is stable but also that userspace cannot observe new 78762306a36Sopenharmony_ci * i_size value before we are prepared to store mmap writes at new inode size. 78862306a36Sopenharmony_ci */ 78962306a36Sopenharmony_civoid pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to) 79062306a36Sopenharmony_ci{ 79162306a36Sopenharmony_ci int bsize = i_blocksize(inode); 79262306a36Sopenharmony_ci loff_t rounded_from; 79362306a36Sopenharmony_ci struct page *page; 79462306a36Sopenharmony_ci pgoff_t index; 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci WARN_ON(to > inode->i_size); 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci if (from >= to || bsize == PAGE_SIZE) 79962306a36Sopenharmony_ci return; 80062306a36Sopenharmony_ci /* Page straddling @from will not have any hole block created? */ 80162306a36Sopenharmony_ci rounded_from = round_up(from, bsize); 80262306a36Sopenharmony_ci if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1))) 80362306a36Sopenharmony_ci return; 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_ci index = from >> PAGE_SHIFT; 80662306a36Sopenharmony_ci page = find_lock_page(inode->i_mapping, index); 80762306a36Sopenharmony_ci /* Page not cached? Nothing to do */ 80862306a36Sopenharmony_ci if (!page) 80962306a36Sopenharmony_ci return; 81062306a36Sopenharmony_ci /* 81162306a36Sopenharmony_ci * See clear_page_dirty_for_io() for details why set_page_dirty() 81262306a36Sopenharmony_ci * is needed. 81362306a36Sopenharmony_ci */ 81462306a36Sopenharmony_ci if (page_mkclean(page)) 81562306a36Sopenharmony_ci set_page_dirty(page); 81662306a36Sopenharmony_ci unlock_page(page); 81762306a36Sopenharmony_ci put_page(page); 81862306a36Sopenharmony_ci} 81962306a36Sopenharmony_ciEXPORT_SYMBOL(pagecache_isize_extended); 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci/** 82262306a36Sopenharmony_ci * truncate_pagecache_range - unmap and remove pagecache that is hole-punched 82362306a36Sopenharmony_ci * @inode: inode 82462306a36Sopenharmony_ci * @lstart: offset of beginning of hole 82562306a36Sopenharmony_ci * @lend: offset of last byte of hole 82662306a36Sopenharmony_ci * 82762306a36Sopenharmony_ci * This function should typically be called before the filesystem 82862306a36Sopenharmony_ci * releases resources associated with the freed range (eg. deallocates 82962306a36Sopenharmony_ci * blocks). This way, pagecache will always stay logically coherent 83062306a36Sopenharmony_ci * with on-disk format, and the filesystem would not have to deal with 83162306a36Sopenharmony_ci * situations such as writepage being called for a page that has already 83262306a36Sopenharmony_ci * had its underlying blocks deallocated. 83362306a36Sopenharmony_ci */ 83462306a36Sopenharmony_civoid truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend) 83562306a36Sopenharmony_ci{ 83662306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 83762306a36Sopenharmony_ci loff_t unmap_start = round_up(lstart, PAGE_SIZE); 83862306a36Sopenharmony_ci loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1; 83962306a36Sopenharmony_ci /* 84062306a36Sopenharmony_ci * This rounding is currently just for example: unmap_mapping_range 84162306a36Sopenharmony_ci * expands its hole outwards, whereas we want it to contract the hole 84262306a36Sopenharmony_ci * inwards. However, existing callers of truncate_pagecache_range are 84362306a36Sopenharmony_ci * doing their own page rounding first. Note that unmap_mapping_range 84462306a36Sopenharmony_ci * allows holelen 0 for all, and we allow lend -1 for end of file. 84562306a36Sopenharmony_ci */ 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_ci /* 84862306a36Sopenharmony_ci * Unlike in truncate_pagecache, unmap_mapping_range is called only 84962306a36Sopenharmony_ci * once (before truncating pagecache), and without "even_cows" flag: 85062306a36Sopenharmony_ci * hole-punching should not remove private COWed pages from the hole. 85162306a36Sopenharmony_ci */ 85262306a36Sopenharmony_ci if ((u64)unmap_end > (u64)unmap_start) 85362306a36Sopenharmony_ci unmap_mapping_range(mapping, unmap_start, 85462306a36Sopenharmony_ci 1 + unmap_end - unmap_start, 0); 85562306a36Sopenharmony_ci truncate_inode_pages_range(mapping, lstart, lend); 85662306a36Sopenharmony_ci} 85762306a36Sopenharmony_ciEXPORT_SYMBOL(truncate_pagecache_range); 858