18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * mm/truncate.c - code for taking down pages from address_spaces 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2002, Linus Torvalds 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * 10Sep2002 Andrew Morton 88c2ecf20Sopenharmony_ci * Initial version. 98c2ecf20Sopenharmony_ci */ 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#include <linux/kernel.h> 128c2ecf20Sopenharmony_ci#include <linux/backing-dev.h> 138c2ecf20Sopenharmony_ci#include <linux/dax.h> 148c2ecf20Sopenharmony_ci#include <linux/gfp.h> 158c2ecf20Sopenharmony_ci#include <linux/mm.h> 168c2ecf20Sopenharmony_ci#include <linux/swap.h> 178c2ecf20Sopenharmony_ci#include <linux/export.h> 188c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 198c2ecf20Sopenharmony_ci#include <linux/highmem.h> 208c2ecf20Sopenharmony_ci#include <linux/pagevec.h> 218c2ecf20Sopenharmony_ci#include <linux/task_io_accounting_ops.h> 228c2ecf20Sopenharmony_ci#include <linux/buffer_head.h> /* grr. try_to_release_page, 238c2ecf20Sopenharmony_ci do_invalidatepage */ 248c2ecf20Sopenharmony_ci#include <linux/shmem_fs.h> 258c2ecf20Sopenharmony_ci#include <linux/cleancache.h> 268c2ecf20Sopenharmony_ci#include <linux/rmap.h> 278c2ecf20Sopenharmony_ci#include "internal.h" 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci/* 308c2ecf20Sopenharmony_ci * Regular page slots are stabilized by the page lock even without the tree 318c2ecf20Sopenharmony_ci * itself locked. These unlocked entries need verification under the tree 328c2ecf20Sopenharmony_ci * lock. 338c2ecf20Sopenharmony_ci */ 348c2ecf20Sopenharmony_cistatic inline void __clear_shadow_entry(struct address_space *mapping, 358c2ecf20Sopenharmony_ci pgoff_t index, void *entry) 368c2ecf20Sopenharmony_ci{ 378c2ecf20Sopenharmony_ci XA_STATE(xas, &mapping->i_pages, index); 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci xas_set_update(&xas, workingset_update_node); 408c2ecf20Sopenharmony_ci if (xas_load(&xas) != entry) 418c2ecf20Sopenharmony_ci return; 428c2ecf20Sopenharmony_ci xas_store(&xas, NULL); 438c2ecf20Sopenharmony_ci mapping->nrexceptional--; 448c2ecf20Sopenharmony_ci} 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_cistatic void clear_shadow_entry(struct address_space *mapping, pgoff_t index, 478c2ecf20Sopenharmony_ci void *entry) 488c2ecf20Sopenharmony_ci{ 498c2ecf20Sopenharmony_ci xa_lock_irq(&mapping->i_pages); 508c2ecf20Sopenharmony_ci __clear_shadow_entry(mapping, index, entry); 518c2ecf20Sopenharmony_ci xa_unlock_irq(&mapping->i_pages); 528c2ecf20Sopenharmony_ci} 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci/* 558c2ecf20Sopenharmony_ci * Unconditionally remove exceptional entries. Usually called from truncate 568c2ecf20Sopenharmony_ci * path. Note that the pagevec may be altered by this function by removing 578c2ecf20Sopenharmony_ci * exceptional entries similar to what pagevec_remove_exceptionals does. 588c2ecf20Sopenharmony_ci */ 598c2ecf20Sopenharmony_cistatic void truncate_exceptional_pvec_entries(struct address_space *mapping, 608c2ecf20Sopenharmony_ci struct pagevec *pvec, pgoff_t *indices, 618c2ecf20Sopenharmony_ci pgoff_t end) 628c2ecf20Sopenharmony_ci{ 638c2ecf20Sopenharmony_ci int i, j; 648c2ecf20Sopenharmony_ci bool dax, lock; 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci /* Handled by shmem itself */ 678c2ecf20Sopenharmony_ci if (shmem_mapping(mapping)) 688c2ecf20Sopenharmony_ci return; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci for (j = 0; j < pagevec_count(pvec); j++) 718c2ecf20Sopenharmony_ci if (xa_is_value(pvec->pages[j])) 728c2ecf20Sopenharmony_ci break; 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci if (j == pagevec_count(pvec)) 758c2ecf20Sopenharmony_ci return; 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci dax = dax_mapping(mapping); 788c2ecf20Sopenharmony_ci lock = !dax && indices[j] < end; 798c2ecf20Sopenharmony_ci if (lock) 808c2ecf20Sopenharmony_ci xa_lock_irq(&mapping->i_pages); 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci for (i = j; i < pagevec_count(pvec); i++) { 838c2ecf20Sopenharmony_ci struct page *page = pvec->pages[i]; 848c2ecf20Sopenharmony_ci pgoff_t index = indices[i]; 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci if (!xa_is_value(page)) { 878c2ecf20Sopenharmony_ci pvec->pages[j++] = page; 888c2ecf20Sopenharmony_ci continue; 898c2ecf20Sopenharmony_ci } 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci if (index >= end) 928c2ecf20Sopenharmony_ci continue; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci if (unlikely(dax)) { 958c2ecf20Sopenharmony_ci dax_delete_mapping_entry(mapping, index); 968c2ecf20Sopenharmony_ci continue; 978c2ecf20Sopenharmony_ci } 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci __clear_shadow_entry(mapping, index, page); 1008c2ecf20Sopenharmony_ci } 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci if (lock) 1038c2ecf20Sopenharmony_ci xa_unlock_irq(&mapping->i_pages); 1048c2ecf20Sopenharmony_ci pvec->nr = j; 1058c2ecf20Sopenharmony_ci} 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci/* 1088c2ecf20Sopenharmony_ci * Invalidate exceptional entry if easily possible. This handles exceptional 1098c2ecf20Sopenharmony_ci * entries for invalidate_inode_pages(). 1108c2ecf20Sopenharmony_ci */ 1118c2ecf20Sopenharmony_cistatic int invalidate_exceptional_entry(struct address_space *mapping, 1128c2ecf20Sopenharmony_ci pgoff_t index, void *entry) 1138c2ecf20Sopenharmony_ci{ 1148c2ecf20Sopenharmony_ci /* Handled by shmem itself, or for DAX we do nothing. */ 1158c2ecf20Sopenharmony_ci if (shmem_mapping(mapping) || dax_mapping(mapping)) 1168c2ecf20Sopenharmony_ci return 1; 1178c2ecf20Sopenharmony_ci clear_shadow_entry(mapping, index, entry); 1188c2ecf20Sopenharmony_ci return 1; 1198c2ecf20Sopenharmony_ci} 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci/* 1228c2ecf20Sopenharmony_ci * Invalidate exceptional entry if clean. This handles exceptional entries for 1238c2ecf20Sopenharmony_ci * invalidate_inode_pages2() so for DAX it evicts only clean entries. 1248c2ecf20Sopenharmony_ci */ 1258c2ecf20Sopenharmony_cistatic int invalidate_exceptional_entry2(struct address_space *mapping, 1268c2ecf20Sopenharmony_ci pgoff_t index, void *entry) 1278c2ecf20Sopenharmony_ci{ 1288c2ecf20Sopenharmony_ci /* Handled by shmem itself */ 1298c2ecf20Sopenharmony_ci if (shmem_mapping(mapping)) 1308c2ecf20Sopenharmony_ci return 1; 1318c2ecf20Sopenharmony_ci if (dax_mapping(mapping)) 1328c2ecf20Sopenharmony_ci return dax_invalidate_mapping_entry_sync(mapping, index); 1338c2ecf20Sopenharmony_ci clear_shadow_entry(mapping, index, entry); 1348c2ecf20Sopenharmony_ci return 1; 1358c2ecf20Sopenharmony_ci} 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci/** 1388c2ecf20Sopenharmony_ci * do_invalidatepage - invalidate part or all of a page 1398c2ecf20Sopenharmony_ci * @page: the page which is affected 1408c2ecf20Sopenharmony_ci * @offset: start of the range to invalidate 1418c2ecf20Sopenharmony_ci * @length: length of the range to invalidate 1428c2ecf20Sopenharmony_ci * 1438c2ecf20Sopenharmony_ci * do_invalidatepage() is called when all or part of the page has become 1448c2ecf20Sopenharmony_ci * invalidated by a truncate operation. 1458c2ecf20Sopenharmony_ci * 1468c2ecf20Sopenharmony_ci * do_invalidatepage() does not have to release all buffers, but it must 1478c2ecf20Sopenharmony_ci * ensure that no dirty buffer is left outside @offset and that no I/O 1488c2ecf20Sopenharmony_ci * is underway against any of the blocks which are outside the truncation 1498c2ecf20Sopenharmony_ci * point. Because the caller is about to free (and possibly reuse) those 1508c2ecf20Sopenharmony_ci * blocks on-disk. 1518c2ecf20Sopenharmony_ci */ 1528c2ecf20Sopenharmony_civoid do_invalidatepage(struct page *page, unsigned int offset, 1538c2ecf20Sopenharmony_ci unsigned int length) 1548c2ecf20Sopenharmony_ci{ 1558c2ecf20Sopenharmony_ci void (*invalidatepage)(struct page *, unsigned int, unsigned int); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci invalidatepage = page->mapping->a_ops->invalidatepage; 1588c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK 1598c2ecf20Sopenharmony_ci if (!invalidatepage) 1608c2ecf20Sopenharmony_ci invalidatepage = block_invalidatepage; 1618c2ecf20Sopenharmony_ci#endif 1628c2ecf20Sopenharmony_ci if (invalidatepage) 1638c2ecf20Sopenharmony_ci (*invalidatepage)(page, offset, length); 1648c2ecf20Sopenharmony_ci} 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci/* 1678c2ecf20Sopenharmony_ci * If truncate cannot remove the fs-private metadata from the page, the page 1688c2ecf20Sopenharmony_ci * becomes orphaned. It will be left on the LRU and may even be mapped into 1698c2ecf20Sopenharmony_ci * user pagetables if we're racing with filemap_fault(). 1708c2ecf20Sopenharmony_ci * 1718c2ecf20Sopenharmony_ci * We need to bail out if page->mapping is no longer equal to the original 1728c2ecf20Sopenharmony_ci * mapping. This happens a) when the VM reclaimed the page while we waited on 1738c2ecf20Sopenharmony_ci * its lock, b) when a concurrent invalidate_mapping_pages got there first and 1748c2ecf20Sopenharmony_ci * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 1758c2ecf20Sopenharmony_ci */ 1768c2ecf20Sopenharmony_cistatic void truncate_cleanup_page(struct page *page) 1778c2ecf20Sopenharmony_ci{ 1788c2ecf20Sopenharmony_ci if (page_mapped(page)) 1798c2ecf20Sopenharmony_ci unmap_mapping_page(page); 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci if (page_has_private(page)) 1828c2ecf20Sopenharmony_ci do_invalidatepage(page, 0, thp_size(page)); 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci /* 1858c2ecf20Sopenharmony_ci * Some filesystems seem to re-dirty the page even after 1868c2ecf20Sopenharmony_ci * the VM has canceled the dirty bit (eg ext3 journaling). 1878c2ecf20Sopenharmony_ci * Hence dirty accounting check is placed after invalidation. 1888c2ecf20Sopenharmony_ci */ 1898c2ecf20Sopenharmony_ci cancel_dirty_page(page); 1908c2ecf20Sopenharmony_ci ClearPageMappedToDisk(page); 1918c2ecf20Sopenharmony_ci} 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci/* 1948c2ecf20Sopenharmony_ci * This is for invalidate_mapping_pages(). That function can be called at 1958c2ecf20Sopenharmony_ci * any time, and is not supposed to throw away dirty pages. But pages can 1968c2ecf20Sopenharmony_ci * be marked dirty at any time too, so use remove_mapping which safely 1978c2ecf20Sopenharmony_ci * discards clean, unused pages. 1988c2ecf20Sopenharmony_ci * 1998c2ecf20Sopenharmony_ci * Returns non-zero if the page was successfully invalidated. 2008c2ecf20Sopenharmony_ci */ 2018c2ecf20Sopenharmony_cistatic int 2028c2ecf20Sopenharmony_ciinvalidate_complete_page(struct address_space *mapping, struct page *page) 2038c2ecf20Sopenharmony_ci{ 2048c2ecf20Sopenharmony_ci int ret; 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci if (page->mapping != mapping) 2078c2ecf20Sopenharmony_ci return 0; 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci if (page_has_private(page) && !try_to_release_page(page, 0)) 2108c2ecf20Sopenharmony_ci return 0; 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci ret = remove_mapping(mapping, page); 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci return ret; 2158c2ecf20Sopenharmony_ci} 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ciint truncate_inode_page(struct address_space *mapping, struct page *page) 2188c2ecf20Sopenharmony_ci{ 2198c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageTail(page), page); 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci if (page->mapping != mapping) 2228c2ecf20Sopenharmony_ci return -EIO; 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci truncate_cleanup_page(page); 2258c2ecf20Sopenharmony_ci delete_from_page_cache(page); 2268c2ecf20Sopenharmony_ci return 0; 2278c2ecf20Sopenharmony_ci} 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci/* 2308c2ecf20Sopenharmony_ci * Used to get rid of pages on hardware memory corruption. 2318c2ecf20Sopenharmony_ci */ 2328c2ecf20Sopenharmony_ciint generic_error_remove_page(struct address_space *mapping, struct page *page) 2338c2ecf20Sopenharmony_ci{ 2348c2ecf20Sopenharmony_ci if (!mapping) 2358c2ecf20Sopenharmony_ci return -EINVAL; 2368c2ecf20Sopenharmony_ci /* 2378c2ecf20Sopenharmony_ci * Only punch for normal data pages for now. 2388c2ecf20Sopenharmony_ci * Handling other types like directories would need more auditing. 2398c2ecf20Sopenharmony_ci */ 2408c2ecf20Sopenharmony_ci if (!S_ISREG(mapping->host->i_mode)) 2418c2ecf20Sopenharmony_ci return -EIO; 2428c2ecf20Sopenharmony_ci return truncate_inode_page(mapping, page); 2438c2ecf20Sopenharmony_ci} 2448c2ecf20Sopenharmony_ciEXPORT_SYMBOL(generic_error_remove_page); 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci/* 2478c2ecf20Sopenharmony_ci * Safely invalidate one page from its pagecache mapping. 2488c2ecf20Sopenharmony_ci * It only drops clean, unused pages. The page must be locked. 2498c2ecf20Sopenharmony_ci * 2508c2ecf20Sopenharmony_ci * Returns 1 if the page is successfully invalidated, otherwise 0. 2518c2ecf20Sopenharmony_ci */ 2528c2ecf20Sopenharmony_ciint invalidate_inode_page(struct page *page) 2538c2ecf20Sopenharmony_ci{ 2548c2ecf20Sopenharmony_ci struct address_space *mapping = page_mapping(page); 2558c2ecf20Sopenharmony_ci if (!mapping) 2568c2ecf20Sopenharmony_ci return 0; 2578c2ecf20Sopenharmony_ci if (PageDirty(page) || PageWriteback(page)) 2588c2ecf20Sopenharmony_ci return 0; 2598c2ecf20Sopenharmony_ci if (page_mapped(page)) 2608c2ecf20Sopenharmony_ci return 0; 2618c2ecf20Sopenharmony_ci return invalidate_complete_page(mapping, page); 2628c2ecf20Sopenharmony_ci} 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci/** 2658c2ecf20Sopenharmony_ci * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets 2668c2ecf20Sopenharmony_ci * @mapping: mapping to truncate 2678c2ecf20Sopenharmony_ci * @lstart: offset from which to truncate 2688c2ecf20Sopenharmony_ci * @lend: offset to which to truncate (inclusive) 2698c2ecf20Sopenharmony_ci * 2708c2ecf20Sopenharmony_ci * Truncate the page cache, removing the pages that are between 2718c2ecf20Sopenharmony_ci * specified offsets (and zeroing out partial pages 2728c2ecf20Sopenharmony_ci * if lstart or lend + 1 is not page aligned). 2738c2ecf20Sopenharmony_ci * 2748c2ecf20Sopenharmony_ci * Truncate takes two passes - the first pass is nonblocking. It will not 2758c2ecf20Sopenharmony_ci * block on page locks and it will not block on writeback. The second pass 2768c2ecf20Sopenharmony_ci * will wait. This is to prevent as much IO as possible in the affected region. 2778c2ecf20Sopenharmony_ci * The first pass will remove most pages, so the search cost of the second pass 2788c2ecf20Sopenharmony_ci * is low. 2798c2ecf20Sopenharmony_ci * 2808c2ecf20Sopenharmony_ci * We pass down the cache-hot hint to the page freeing code. Even if the 2818c2ecf20Sopenharmony_ci * mapping is large, it is probably the case that the final pages are the most 2828c2ecf20Sopenharmony_ci * recently touched, and freeing happens in ascending file offset order. 2838c2ecf20Sopenharmony_ci * 2848c2ecf20Sopenharmony_ci * Note that since ->invalidatepage() accepts range to invalidate 2858c2ecf20Sopenharmony_ci * truncate_inode_pages_range is able to handle cases where lend + 1 is not 2868c2ecf20Sopenharmony_ci * page aligned properly. 2878c2ecf20Sopenharmony_ci */ 2888c2ecf20Sopenharmony_civoid truncate_inode_pages_range(struct address_space *mapping, 2898c2ecf20Sopenharmony_ci loff_t lstart, loff_t lend) 2908c2ecf20Sopenharmony_ci{ 2918c2ecf20Sopenharmony_ci pgoff_t start; /* inclusive */ 2928c2ecf20Sopenharmony_ci pgoff_t end; /* exclusive */ 2938c2ecf20Sopenharmony_ci unsigned int partial_start; /* inclusive */ 2948c2ecf20Sopenharmony_ci unsigned int partial_end; /* exclusive */ 2958c2ecf20Sopenharmony_ci struct pagevec pvec; 2968c2ecf20Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 2978c2ecf20Sopenharmony_ci pgoff_t index; 2988c2ecf20Sopenharmony_ci int i; 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci if (mapping->nrpages == 0 && mapping->nrexceptional == 0) 3018c2ecf20Sopenharmony_ci goto out; 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci /* Offsets within partial pages */ 3048c2ecf20Sopenharmony_ci partial_start = lstart & (PAGE_SIZE - 1); 3058c2ecf20Sopenharmony_ci partial_end = (lend + 1) & (PAGE_SIZE - 1); 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_ci /* 3088c2ecf20Sopenharmony_ci * 'start' and 'end' always covers the range of pages to be fully 3098c2ecf20Sopenharmony_ci * truncated. Partial pages are covered with 'partial_start' at the 3108c2ecf20Sopenharmony_ci * start of the range and 'partial_end' at the end of the range. 3118c2ecf20Sopenharmony_ci * Note that 'end' is exclusive while 'lend' is inclusive. 3128c2ecf20Sopenharmony_ci */ 3138c2ecf20Sopenharmony_ci start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; 3148c2ecf20Sopenharmony_ci if (lend == -1) 3158c2ecf20Sopenharmony_ci /* 3168c2ecf20Sopenharmony_ci * lend == -1 indicates end-of-file so we have to set 'end' 3178c2ecf20Sopenharmony_ci * to the highest possible pgoff_t and since the type is 3188c2ecf20Sopenharmony_ci * unsigned we're using -1. 3198c2ecf20Sopenharmony_ci */ 3208c2ecf20Sopenharmony_ci end = -1; 3218c2ecf20Sopenharmony_ci else 3228c2ecf20Sopenharmony_ci end = (lend + 1) >> PAGE_SHIFT; 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci pagevec_init(&pvec); 3258c2ecf20Sopenharmony_ci index = start; 3268c2ecf20Sopenharmony_ci while (index < end && pagevec_lookup_entries(&pvec, mapping, index, 3278c2ecf20Sopenharmony_ci min(end - index, (pgoff_t)PAGEVEC_SIZE), 3288c2ecf20Sopenharmony_ci indices)) { 3298c2ecf20Sopenharmony_ci /* 3308c2ecf20Sopenharmony_ci * Pagevec array has exceptional entries and we may also fail 3318c2ecf20Sopenharmony_ci * to lock some pages. So we store pages that can be deleted 3328c2ecf20Sopenharmony_ci * in a new pagevec. 3338c2ecf20Sopenharmony_ci */ 3348c2ecf20Sopenharmony_ci struct pagevec locked_pvec; 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci pagevec_init(&locked_pvec); 3378c2ecf20Sopenharmony_ci for (i = 0; i < pagevec_count(&pvec); i++) { 3388c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci /* We rely upon deletion not changing page->index */ 3418c2ecf20Sopenharmony_ci index = indices[i]; 3428c2ecf20Sopenharmony_ci if (index >= end) 3438c2ecf20Sopenharmony_ci break; 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci if (xa_is_value(page)) 3468c2ecf20Sopenharmony_ci continue; 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci if (!trylock_page(page)) 3498c2ecf20Sopenharmony_ci continue; 3508c2ecf20Sopenharmony_ci WARN_ON(page_to_index(page) != index); 3518c2ecf20Sopenharmony_ci if (PageWriteback(page)) { 3528c2ecf20Sopenharmony_ci unlock_page(page); 3538c2ecf20Sopenharmony_ci continue; 3548c2ecf20Sopenharmony_ci } 3558c2ecf20Sopenharmony_ci if (page->mapping != mapping) { 3568c2ecf20Sopenharmony_ci unlock_page(page); 3578c2ecf20Sopenharmony_ci continue; 3588c2ecf20Sopenharmony_ci } 3598c2ecf20Sopenharmony_ci pagevec_add(&locked_pvec, page); 3608c2ecf20Sopenharmony_ci } 3618c2ecf20Sopenharmony_ci for (i = 0; i < pagevec_count(&locked_pvec); i++) 3628c2ecf20Sopenharmony_ci truncate_cleanup_page(locked_pvec.pages[i]); 3638c2ecf20Sopenharmony_ci delete_from_page_cache_batch(mapping, &locked_pvec); 3648c2ecf20Sopenharmony_ci for (i = 0; i < pagevec_count(&locked_pvec); i++) 3658c2ecf20Sopenharmony_ci unlock_page(locked_pvec.pages[i]); 3668c2ecf20Sopenharmony_ci truncate_exceptional_pvec_entries(mapping, &pvec, indices, end); 3678c2ecf20Sopenharmony_ci pagevec_release(&pvec); 3688c2ecf20Sopenharmony_ci cond_resched(); 3698c2ecf20Sopenharmony_ci index++; 3708c2ecf20Sopenharmony_ci } 3718c2ecf20Sopenharmony_ci if (partial_start) { 3728c2ecf20Sopenharmony_ci struct page *page = find_lock_page(mapping, start - 1); 3738c2ecf20Sopenharmony_ci if (page) { 3748c2ecf20Sopenharmony_ci unsigned int top = PAGE_SIZE; 3758c2ecf20Sopenharmony_ci if (start > end) { 3768c2ecf20Sopenharmony_ci /* Truncation within a single page */ 3778c2ecf20Sopenharmony_ci top = partial_end; 3788c2ecf20Sopenharmony_ci partial_end = 0; 3798c2ecf20Sopenharmony_ci } 3808c2ecf20Sopenharmony_ci wait_on_page_writeback(page); 3818c2ecf20Sopenharmony_ci zero_user_segment(page, partial_start, top); 3828c2ecf20Sopenharmony_ci cleancache_invalidate_page(mapping, page); 3838c2ecf20Sopenharmony_ci if (page_has_private(page)) 3848c2ecf20Sopenharmony_ci do_invalidatepage(page, partial_start, 3858c2ecf20Sopenharmony_ci top - partial_start); 3868c2ecf20Sopenharmony_ci unlock_page(page); 3878c2ecf20Sopenharmony_ci put_page(page); 3888c2ecf20Sopenharmony_ci } 3898c2ecf20Sopenharmony_ci } 3908c2ecf20Sopenharmony_ci if (partial_end) { 3918c2ecf20Sopenharmony_ci struct page *page = find_lock_page(mapping, end); 3928c2ecf20Sopenharmony_ci if (page) { 3938c2ecf20Sopenharmony_ci wait_on_page_writeback(page); 3948c2ecf20Sopenharmony_ci zero_user_segment(page, 0, partial_end); 3958c2ecf20Sopenharmony_ci cleancache_invalidate_page(mapping, page); 3968c2ecf20Sopenharmony_ci if (page_has_private(page)) 3978c2ecf20Sopenharmony_ci do_invalidatepage(page, 0, 3988c2ecf20Sopenharmony_ci partial_end); 3998c2ecf20Sopenharmony_ci unlock_page(page); 4008c2ecf20Sopenharmony_ci put_page(page); 4018c2ecf20Sopenharmony_ci } 4028c2ecf20Sopenharmony_ci } 4038c2ecf20Sopenharmony_ci /* 4048c2ecf20Sopenharmony_ci * If the truncation happened within a single page no pages 4058c2ecf20Sopenharmony_ci * will be released, just zeroed, so we can bail out now. 4068c2ecf20Sopenharmony_ci */ 4078c2ecf20Sopenharmony_ci if (start >= end) 4088c2ecf20Sopenharmony_ci goto out; 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci index = start; 4118c2ecf20Sopenharmony_ci for ( ; ; ) { 4128c2ecf20Sopenharmony_ci cond_resched(); 4138c2ecf20Sopenharmony_ci if (!pagevec_lookup_entries(&pvec, mapping, index, 4148c2ecf20Sopenharmony_ci min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { 4158c2ecf20Sopenharmony_ci /* If all gone from start onwards, we're done */ 4168c2ecf20Sopenharmony_ci if (index == start) 4178c2ecf20Sopenharmony_ci break; 4188c2ecf20Sopenharmony_ci /* Otherwise restart to make sure all gone */ 4198c2ecf20Sopenharmony_ci index = start; 4208c2ecf20Sopenharmony_ci continue; 4218c2ecf20Sopenharmony_ci } 4228c2ecf20Sopenharmony_ci if (index == start && indices[0] >= end) { 4238c2ecf20Sopenharmony_ci /* All gone out of hole to be punched, we're done */ 4248c2ecf20Sopenharmony_ci pagevec_remove_exceptionals(&pvec); 4258c2ecf20Sopenharmony_ci pagevec_release(&pvec); 4268c2ecf20Sopenharmony_ci break; 4278c2ecf20Sopenharmony_ci } 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci for (i = 0; i < pagevec_count(&pvec); i++) { 4308c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci /* We rely upon deletion not changing page->index */ 4338c2ecf20Sopenharmony_ci index = indices[i]; 4348c2ecf20Sopenharmony_ci if (index >= end) { 4358c2ecf20Sopenharmony_ci /* Restart punch to make sure all gone */ 4368c2ecf20Sopenharmony_ci index = start - 1; 4378c2ecf20Sopenharmony_ci break; 4388c2ecf20Sopenharmony_ci } 4398c2ecf20Sopenharmony_ci 4408c2ecf20Sopenharmony_ci if (xa_is_value(page)) 4418c2ecf20Sopenharmony_ci continue; 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci lock_page(page); 4448c2ecf20Sopenharmony_ci WARN_ON(page_to_index(page) != index); 4458c2ecf20Sopenharmony_ci wait_on_page_writeback(page); 4468c2ecf20Sopenharmony_ci truncate_inode_page(mapping, page); 4478c2ecf20Sopenharmony_ci unlock_page(page); 4488c2ecf20Sopenharmony_ci } 4498c2ecf20Sopenharmony_ci truncate_exceptional_pvec_entries(mapping, &pvec, indices, end); 4508c2ecf20Sopenharmony_ci pagevec_release(&pvec); 4518c2ecf20Sopenharmony_ci index++; 4528c2ecf20Sopenharmony_ci } 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ciout: 4558c2ecf20Sopenharmony_ci cleancache_invalidate_inode(mapping); 4568c2ecf20Sopenharmony_ci} 4578c2ecf20Sopenharmony_ciEXPORT_SYMBOL(truncate_inode_pages_range); 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci/** 4608c2ecf20Sopenharmony_ci * truncate_inode_pages - truncate *all* the pages from an offset 4618c2ecf20Sopenharmony_ci * @mapping: mapping to truncate 4628c2ecf20Sopenharmony_ci * @lstart: offset from which to truncate 4638c2ecf20Sopenharmony_ci * 4648c2ecf20Sopenharmony_ci * Called under (and serialised by) inode->i_mutex. 4658c2ecf20Sopenharmony_ci * 4668c2ecf20Sopenharmony_ci * Note: When this function returns, there can be a page in the process of 4678c2ecf20Sopenharmony_ci * deletion (inside __delete_from_page_cache()) in the specified range. Thus 4688c2ecf20Sopenharmony_ci * mapping->nrpages can be non-zero when this function returns even after 4698c2ecf20Sopenharmony_ci * truncation of the whole mapping. 4708c2ecf20Sopenharmony_ci */ 4718c2ecf20Sopenharmony_civoid truncate_inode_pages(struct address_space *mapping, loff_t lstart) 4728c2ecf20Sopenharmony_ci{ 4738c2ecf20Sopenharmony_ci truncate_inode_pages_range(mapping, lstart, (loff_t)-1); 4748c2ecf20Sopenharmony_ci} 4758c2ecf20Sopenharmony_ciEXPORT_SYMBOL(truncate_inode_pages); 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci/** 4788c2ecf20Sopenharmony_ci * truncate_inode_pages_final - truncate *all* pages before inode dies 4798c2ecf20Sopenharmony_ci * @mapping: mapping to truncate 4808c2ecf20Sopenharmony_ci * 4818c2ecf20Sopenharmony_ci * Called under (and serialized by) inode->i_mutex. 4828c2ecf20Sopenharmony_ci * 4838c2ecf20Sopenharmony_ci * Filesystems have to use this in the .evict_inode path to inform the 4848c2ecf20Sopenharmony_ci * VM that this is the final truncate and the inode is going away. 4858c2ecf20Sopenharmony_ci */ 4868c2ecf20Sopenharmony_civoid truncate_inode_pages_final(struct address_space *mapping) 4878c2ecf20Sopenharmony_ci{ 4888c2ecf20Sopenharmony_ci unsigned long nrexceptional; 4898c2ecf20Sopenharmony_ci unsigned long nrpages; 4908c2ecf20Sopenharmony_ci 4918c2ecf20Sopenharmony_ci /* 4928c2ecf20Sopenharmony_ci * Page reclaim can not participate in regular inode lifetime 4938c2ecf20Sopenharmony_ci * management (can't call iput()) and thus can race with the 4948c2ecf20Sopenharmony_ci * inode teardown. Tell it when the address space is exiting, 4958c2ecf20Sopenharmony_ci * so that it does not install eviction information after the 4968c2ecf20Sopenharmony_ci * final truncate has begun. 4978c2ecf20Sopenharmony_ci */ 4988c2ecf20Sopenharmony_ci mapping_set_exiting(mapping); 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_ci /* 5018c2ecf20Sopenharmony_ci * When reclaim installs eviction entries, it increases 5028c2ecf20Sopenharmony_ci * nrexceptional first, then decreases nrpages. Make sure we see 5038c2ecf20Sopenharmony_ci * this in the right order or we might miss an entry. 5048c2ecf20Sopenharmony_ci */ 5058c2ecf20Sopenharmony_ci nrpages = mapping->nrpages; 5068c2ecf20Sopenharmony_ci smp_rmb(); 5078c2ecf20Sopenharmony_ci nrexceptional = mapping->nrexceptional; 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_ci if (nrpages || nrexceptional) { 5108c2ecf20Sopenharmony_ci /* 5118c2ecf20Sopenharmony_ci * As truncation uses a lockless tree lookup, cycle 5128c2ecf20Sopenharmony_ci * the tree lock to make sure any ongoing tree 5138c2ecf20Sopenharmony_ci * modification that does not see AS_EXITING is 5148c2ecf20Sopenharmony_ci * completed before starting the final truncate. 5158c2ecf20Sopenharmony_ci */ 5168c2ecf20Sopenharmony_ci xa_lock_irq(&mapping->i_pages); 5178c2ecf20Sopenharmony_ci xa_unlock_irq(&mapping->i_pages); 5188c2ecf20Sopenharmony_ci } 5198c2ecf20Sopenharmony_ci 5208c2ecf20Sopenharmony_ci /* 5218c2ecf20Sopenharmony_ci * Cleancache needs notification even if there are no pages or shadow 5228c2ecf20Sopenharmony_ci * entries. 5238c2ecf20Sopenharmony_ci */ 5248c2ecf20Sopenharmony_ci truncate_inode_pages(mapping, 0); 5258c2ecf20Sopenharmony_ci} 5268c2ecf20Sopenharmony_ciEXPORT_SYMBOL(truncate_inode_pages_final); 5278c2ecf20Sopenharmony_ci 5288c2ecf20Sopenharmony_cistatic unsigned long __invalidate_mapping_pages(struct address_space *mapping, 5298c2ecf20Sopenharmony_ci pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) 5308c2ecf20Sopenharmony_ci{ 5318c2ecf20Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 5328c2ecf20Sopenharmony_ci struct pagevec pvec; 5338c2ecf20Sopenharmony_ci pgoff_t index = start; 5348c2ecf20Sopenharmony_ci unsigned long ret; 5358c2ecf20Sopenharmony_ci unsigned long count = 0; 5368c2ecf20Sopenharmony_ci int i; 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci pagevec_init(&pvec); 5398c2ecf20Sopenharmony_ci while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, 5408c2ecf20Sopenharmony_ci min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 5418c2ecf20Sopenharmony_ci indices)) { 5428c2ecf20Sopenharmony_ci for (i = 0; i < pagevec_count(&pvec); i++) { 5438c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci /* We rely upon deletion not changing page->index */ 5468c2ecf20Sopenharmony_ci index = indices[i]; 5478c2ecf20Sopenharmony_ci if (index > end) 5488c2ecf20Sopenharmony_ci break; 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci if (xa_is_value(page)) { 5518c2ecf20Sopenharmony_ci invalidate_exceptional_entry(mapping, index, 5528c2ecf20Sopenharmony_ci page); 5538c2ecf20Sopenharmony_ci continue; 5548c2ecf20Sopenharmony_ci } 5558c2ecf20Sopenharmony_ci 5568c2ecf20Sopenharmony_ci if (!trylock_page(page)) 5578c2ecf20Sopenharmony_ci continue; 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci WARN_ON(page_to_index(page) != index); 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ci /* Middle of THP: skip */ 5628c2ecf20Sopenharmony_ci if (PageTransTail(page)) { 5638c2ecf20Sopenharmony_ci unlock_page(page); 5648c2ecf20Sopenharmony_ci continue; 5658c2ecf20Sopenharmony_ci } else if (PageTransHuge(page)) { 5668c2ecf20Sopenharmony_ci index += HPAGE_PMD_NR - 1; 5678c2ecf20Sopenharmony_ci i += HPAGE_PMD_NR - 1; 5688c2ecf20Sopenharmony_ci /* 5698c2ecf20Sopenharmony_ci * 'end' is in the middle of THP. Don't 5708c2ecf20Sopenharmony_ci * invalidate the page as the part outside of 5718c2ecf20Sopenharmony_ci * 'end' could be still useful. 5728c2ecf20Sopenharmony_ci */ 5738c2ecf20Sopenharmony_ci if (index > end) { 5748c2ecf20Sopenharmony_ci unlock_page(page); 5758c2ecf20Sopenharmony_ci continue; 5768c2ecf20Sopenharmony_ci } 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci /* Take a pin outside pagevec */ 5798c2ecf20Sopenharmony_ci get_page(page); 5808c2ecf20Sopenharmony_ci 5818c2ecf20Sopenharmony_ci /* 5828c2ecf20Sopenharmony_ci * Drop extra pins before trying to invalidate 5838c2ecf20Sopenharmony_ci * the huge page. 5848c2ecf20Sopenharmony_ci */ 5858c2ecf20Sopenharmony_ci pagevec_remove_exceptionals(&pvec); 5868c2ecf20Sopenharmony_ci pagevec_release(&pvec); 5878c2ecf20Sopenharmony_ci } 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci ret = invalidate_inode_page(page); 5908c2ecf20Sopenharmony_ci unlock_page(page); 5918c2ecf20Sopenharmony_ci /* 5928c2ecf20Sopenharmony_ci * Invalidation is a hint that the page is no longer 5938c2ecf20Sopenharmony_ci * of interest and try to speed up its reclaim. 5948c2ecf20Sopenharmony_ci */ 5958c2ecf20Sopenharmony_ci if (!ret) { 5968c2ecf20Sopenharmony_ci deactivate_file_page(page); 5978c2ecf20Sopenharmony_ci /* It is likely on the pagevec of a remote CPU */ 5988c2ecf20Sopenharmony_ci if (nr_pagevec) 5998c2ecf20Sopenharmony_ci (*nr_pagevec)++; 6008c2ecf20Sopenharmony_ci } 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci if (PageTransHuge(page)) 6038c2ecf20Sopenharmony_ci put_page(page); 6048c2ecf20Sopenharmony_ci count += ret; 6058c2ecf20Sopenharmony_ci } 6068c2ecf20Sopenharmony_ci pagevec_remove_exceptionals(&pvec); 6078c2ecf20Sopenharmony_ci pagevec_release(&pvec); 6088c2ecf20Sopenharmony_ci cond_resched(); 6098c2ecf20Sopenharmony_ci index++; 6108c2ecf20Sopenharmony_ci } 6118c2ecf20Sopenharmony_ci return count; 6128c2ecf20Sopenharmony_ci} 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ci/** 6158c2ecf20Sopenharmony_ci * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode 6168c2ecf20Sopenharmony_ci * @mapping: the address_space which holds the pages to invalidate 6178c2ecf20Sopenharmony_ci * @start: the offset 'from' which to invalidate 6188c2ecf20Sopenharmony_ci * @end: the offset 'to' which to invalidate (inclusive) 6198c2ecf20Sopenharmony_ci * 6208c2ecf20Sopenharmony_ci * This function only removes the unlocked pages, if you want to 6218c2ecf20Sopenharmony_ci * remove all the pages of one inode, you must call truncate_inode_pages. 6228c2ecf20Sopenharmony_ci * 6238c2ecf20Sopenharmony_ci * invalidate_mapping_pages() will not block on IO activity. It will not 6248c2ecf20Sopenharmony_ci * invalidate pages which are dirty, locked, under writeback or mapped into 6258c2ecf20Sopenharmony_ci * pagetables. 6268c2ecf20Sopenharmony_ci * 6278c2ecf20Sopenharmony_ci * Return: the number of the pages that were invalidated 6288c2ecf20Sopenharmony_ci */ 6298c2ecf20Sopenharmony_ciunsigned long invalidate_mapping_pages(struct address_space *mapping, 6308c2ecf20Sopenharmony_ci pgoff_t start, pgoff_t end) 6318c2ecf20Sopenharmony_ci{ 6328c2ecf20Sopenharmony_ci return __invalidate_mapping_pages(mapping, start, end, NULL); 6338c2ecf20Sopenharmony_ci} 6348c2ecf20Sopenharmony_ciEXPORT_SYMBOL(invalidate_mapping_pages); 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci/** 6378c2ecf20Sopenharmony_ci * This helper is similar with the above one, except that it accounts for pages 6388c2ecf20Sopenharmony_ci * that are likely on a pagevec and count them in @nr_pagevec, which will used by 6398c2ecf20Sopenharmony_ci * the caller. 6408c2ecf20Sopenharmony_ci */ 6418c2ecf20Sopenharmony_civoid invalidate_mapping_pagevec(struct address_space *mapping, 6428c2ecf20Sopenharmony_ci pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) 6438c2ecf20Sopenharmony_ci{ 6448c2ecf20Sopenharmony_ci __invalidate_mapping_pages(mapping, start, end, nr_pagevec); 6458c2ecf20Sopenharmony_ci} 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ci/* 6488c2ecf20Sopenharmony_ci * This is like invalidate_complete_page(), except it ignores the page's 6498c2ecf20Sopenharmony_ci * refcount. We do this because invalidate_inode_pages2() needs stronger 6508c2ecf20Sopenharmony_ci * invalidation guarantees, and cannot afford to leave pages behind because 6518c2ecf20Sopenharmony_ci * shrink_page_list() has a temp ref on them, or because they're transiently 6528c2ecf20Sopenharmony_ci * sitting in the lru_cache_add() pagevecs. 6538c2ecf20Sopenharmony_ci */ 6548c2ecf20Sopenharmony_cistatic int 6558c2ecf20Sopenharmony_ciinvalidate_complete_page2(struct address_space *mapping, struct page *page) 6568c2ecf20Sopenharmony_ci{ 6578c2ecf20Sopenharmony_ci unsigned long flags; 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci if (page->mapping != mapping) 6608c2ecf20Sopenharmony_ci return 0; 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) 6638c2ecf20Sopenharmony_ci return 0; 6648c2ecf20Sopenharmony_ci 6658c2ecf20Sopenharmony_ci xa_lock_irqsave(&mapping->i_pages, flags); 6668c2ecf20Sopenharmony_ci if (PageDirty(page)) 6678c2ecf20Sopenharmony_ci goto failed; 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci BUG_ON(page_has_private(page)); 6708c2ecf20Sopenharmony_ci __delete_from_page_cache(page, NULL); 6718c2ecf20Sopenharmony_ci xa_unlock_irqrestore(&mapping->i_pages, flags); 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_ci if (mapping->a_ops->freepage) 6748c2ecf20Sopenharmony_ci mapping->a_ops->freepage(page); 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_ci put_page(page); /* pagecache ref */ 6778c2ecf20Sopenharmony_ci return 1; 6788c2ecf20Sopenharmony_cifailed: 6798c2ecf20Sopenharmony_ci xa_unlock_irqrestore(&mapping->i_pages, flags); 6808c2ecf20Sopenharmony_ci return 0; 6818c2ecf20Sopenharmony_ci} 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_cistatic int do_launder_page(struct address_space *mapping, struct page *page) 6848c2ecf20Sopenharmony_ci{ 6858c2ecf20Sopenharmony_ci if (!PageDirty(page)) 6868c2ecf20Sopenharmony_ci return 0; 6878c2ecf20Sopenharmony_ci if (page->mapping != mapping || mapping->a_ops->launder_page == NULL) 6888c2ecf20Sopenharmony_ci return 0; 6898c2ecf20Sopenharmony_ci return mapping->a_ops->launder_page(page); 6908c2ecf20Sopenharmony_ci} 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci/** 6938c2ecf20Sopenharmony_ci * invalidate_inode_pages2_range - remove range of pages from an address_space 6948c2ecf20Sopenharmony_ci * @mapping: the address_space 6958c2ecf20Sopenharmony_ci * @start: the page offset 'from' which to invalidate 6968c2ecf20Sopenharmony_ci * @end: the page offset 'to' which to invalidate (inclusive) 6978c2ecf20Sopenharmony_ci * 6988c2ecf20Sopenharmony_ci * Any pages which are found to be mapped into pagetables are unmapped prior to 6998c2ecf20Sopenharmony_ci * invalidation. 7008c2ecf20Sopenharmony_ci * 7018c2ecf20Sopenharmony_ci * Return: -EBUSY if any pages could not be invalidated. 7028c2ecf20Sopenharmony_ci */ 7038c2ecf20Sopenharmony_ciint invalidate_inode_pages2_range(struct address_space *mapping, 7048c2ecf20Sopenharmony_ci pgoff_t start, pgoff_t end) 7058c2ecf20Sopenharmony_ci{ 7068c2ecf20Sopenharmony_ci pgoff_t indices[PAGEVEC_SIZE]; 7078c2ecf20Sopenharmony_ci struct pagevec pvec; 7088c2ecf20Sopenharmony_ci pgoff_t index; 7098c2ecf20Sopenharmony_ci int i; 7108c2ecf20Sopenharmony_ci int ret = 0; 7118c2ecf20Sopenharmony_ci int ret2 = 0; 7128c2ecf20Sopenharmony_ci int did_range_unmap = 0; 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_ci if (mapping->nrpages == 0 && mapping->nrexceptional == 0) 7158c2ecf20Sopenharmony_ci goto out; 7168c2ecf20Sopenharmony_ci 7178c2ecf20Sopenharmony_ci pagevec_init(&pvec); 7188c2ecf20Sopenharmony_ci index = start; 7198c2ecf20Sopenharmony_ci while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, 7208c2ecf20Sopenharmony_ci min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 7218c2ecf20Sopenharmony_ci indices)) { 7228c2ecf20Sopenharmony_ci for (i = 0; i < pagevec_count(&pvec); i++) { 7238c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 7248c2ecf20Sopenharmony_ci 7258c2ecf20Sopenharmony_ci /* We rely upon deletion not changing page->index */ 7268c2ecf20Sopenharmony_ci index = indices[i]; 7278c2ecf20Sopenharmony_ci if (index > end) 7288c2ecf20Sopenharmony_ci break; 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci if (xa_is_value(page)) { 7318c2ecf20Sopenharmony_ci if (!invalidate_exceptional_entry2(mapping, 7328c2ecf20Sopenharmony_ci index, page)) 7338c2ecf20Sopenharmony_ci ret = -EBUSY; 7348c2ecf20Sopenharmony_ci continue; 7358c2ecf20Sopenharmony_ci } 7368c2ecf20Sopenharmony_ci 7378c2ecf20Sopenharmony_ci if (!did_range_unmap && page_mapped(page)) { 7388c2ecf20Sopenharmony_ci /* 7398c2ecf20Sopenharmony_ci * If page is mapped, before taking its lock, 7408c2ecf20Sopenharmony_ci * zap the rest of the file in one hit. 7418c2ecf20Sopenharmony_ci */ 7428c2ecf20Sopenharmony_ci unmap_mapping_pages(mapping, index, 7438c2ecf20Sopenharmony_ci (1 + end - index), false); 7448c2ecf20Sopenharmony_ci did_range_unmap = 1; 7458c2ecf20Sopenharmony_ci } 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci lock_page(page); 7488c2ecf20Sopenharmony_ci WARN_ON(page_to_index(page) != index); 7498c2ecf20Sopenharmony_ci if (page->mapping != mapping) { 7508c2ecf20Sopenharmony_ci unlock_page(page); 7518c2ecf20Sopenharmony_ci continue; 7528c2ecf20Sopenharmony_ci } 7538c2ecf20Sopenharmony_ci wait_on_page_writeback(page); 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_ci if (page_mapped(page)) 7568c2ecf20Sopenharmony_ci unmap_mapping_page(page); 7578c2ecf20Sopenharmony_ci BUG_ON(page_mapped(page)); 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_ci ret2 = do_launder_page(mapping, page); 7608c2ecf20Sopenharmony_ci if (ret2 == 0) { 7618c2ecf20Sopenharmony_ci if (!invalidate_complete_page2(mapping, page)) 7628c2ecf20Sopenharmony_ci ret2 = -EBUSY; 7638c2ecf20Sopenharmony_ci } 7648c2ecf20Sopenharmony_ci if (ret2 < 0) 7658c2ecf20Sopenharmony_ci ret = ret2; 7668c2ecf20Sopenharmony_ci unlock_page(page); 7678c2ecf20Sopenharmony_ci } 7688c2ecf20Sopenharmony_ci pagevec_remove_exceptionals(&pvec); 7698c2ecf20Sopenharmony_ci pagevec_release(&pvec); 7708c2ecf20Sopenharmony_ci cond_resched(); 7718c2ecf20Sopenharmony_ci index++; 7728c2ecf20Sopenharmony_ci } 7738c2ecf20Sopenharmony_ci /* 7748c2ecf20Sopenharmony_ci * For DAX we invalidate page tables after invalidating page cache. We 7758c2ecf20Sopenharmony_ci * could invalidate page tables while invalidating each entry however 7768c2ecf20Sopenharmony_ci * that would be expensive. And doing range unmapping before doesn't 7778c2ecf20Sopenharmony_ci * work as we have no cheap way to find whether page cache entry didn't 7788c2ecf20Sopenharmony_ci * get remapped later. 7798c2ecf20Sopenharmony_ci */ 7808c2ecf20Sopenharmony_ci if (dax_mapping(mapping)) { 7818c2ecf20Sopenharmony_ci unmap_mapping_pages(mapping, start, end - start + 1, false); 7828c2ecf20Sopenharmony_ci } 7838c2ecf20Sopenharmony_ciout: 7848c2ecf20Sopenharmony_ci cleancache_invalidate_inode(mapping); 7858c2ecf20Sopenharmony_ci return ret; 7868c2ecf20Sopenharmony_ci} 7878c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ci/** 7908c2ecf20Sopenharmony_ci * invalidate_inode_pages2 - remove all pages from an address_space 7918c2ecf20Sopenharmony_ci * @mapping: the address_space 7928c2ecf20Sopenharmony_ci * 7938c2ecf20Sopenharmony_ci * Any pages which are found to be mapped into pagetables are unmapped prior to 7948c2ecf20Sopenharmony_ci * invalidation. 7958c2ecf20Sopenharmony_ci * 7968c2ecf20Sopenharmony_ci * Return: -EBUSY if any pages could not be invalidated. 7978c2ecf20Sopenharmony_ci */ 7988c2ecf20Sopenharmony_ciint invalidate_inode_pages2(struct address_space *mapping) 7998c2ecf20Sopenharmony_ci{ 8008c2ecf20Sopenharmony_ci return invalidate_inode_pages2_range(mapping, 0, -1); 8018c2ecf20Sopenharmony_ci} 8028c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(invalidate_inode_pages2); 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_ci/** 8058c2ecf20Sopenharmony_ci * truncate_pagecache - unmap and remove pagecache that has been truncated 8068c2ecf20Sopenharmony_ci * @inode: inode 8078c2ecf20Sopenharmony_ci * @newsize: new file size 8088c2ecf20Sopenharmony_ci * 8098c2ecf20Sopenharmony_ci * inode's new i_size must already be written before truncate_pagecache 8108c2ecf20Sopenharmony_ci * is called. 8118c2ecf20Sopenharmony_ci * 8128c2ecf20Sopenharmony_ci * This function should typically be called before the filesystem 8138c2ecf20Sopenharmony_ci * releases resources associated with the freed range (eg. deallocates 8148c2ecf20Sopenharmony_ci * blocks). This way, pagecache will always stay logically coherent 8158c2ecf20Sopenharmony_ci * with on-disk format, and the filesystem would not have to deal with 8168c2ecf20Sopenharmony_ci * situations such as writepage being called for a page that has already 8178c2ecf20Sopenharmony_ci * had its underlying blocks deallocated. 8188c2ecf20Sopenharmony_ci */ 8198c2ecf20Sopenharmony_civoid truncate_pagecache(struct inode *inode, loff_t newsize) 8208c2ecf20Sopenharmony_ci{ 8218c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 8228c2ecf20Sopenharmony_ci loff_t holebegin = round_up(newsize, PAGE_SIZE); 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci /* 8258c2ecf20Sopenharmony_ci * unmap_mapping_range is called twice, first simply for 8268c2ecf20Sopenharmony_ci * efficiency so that truncate_inode_pages does fewer 8278c2ecf20Sopenharmony_ci * single-page unmaps. However after this first call, and 8288c2ecf20Sopenharmony_ci * before truncate_inode_pages finishes, it is possible for 8298c2ecf20Sopenharmony_ci * private pages to be COWed, which remain after 8308c2ecf20Sopenharmony_ci * truncate_inode_pages finishes, hence the second 8318c2ecf20Sopenharmony_ci * unmap_mapping_range call must be made for correctness. 8328c2ecf20Sopenharmony_ci */ 8338c2ecf20Sopenharmony_ci unmap_mapping_range(mapping, holebegin, 0, 1); 8348c2ecf20Sopenharmony_ci truncate_inode_pages(mapping, newsize); 8358c2ecf20Sopenharmony_ci unmap_mapping_range(mapping, holebegin, 0, 1); 8368c2ecf20Sopenharmony_ci} 8378c2ecf20Sopenharmony_ciEXPORT_SYMBOL(truncate_pagecache); 8388c2ecf20Sopenharmony_ci 8398c2ecf20Sopenharmony_ci/** 8408c2ecf20Sopenharmony_ci * truncate_setsize - update inode and pagecache for a new file size 8418c2ecf20Sopenharmony_ci * @inode: inode 8428c2ecf20Sopenharmony_ci * @newsize: new file size 8438c2ecf20Sopenharmony_ci * 8448c2ecf20Sopenharmony_ci * truncate_setsize updates i_size and performs pagecache truncation (if 8458c2ecf20Sopenharmony_ci * necessary) to @newsize. It will be typically be called from the filesystem's 8468c2ecf20Sopenharmony_ci * setattr function when ATTR_SIZE is passed in. 8478c2ecf20Sopenharmony_ci * 8488c2ecf20Sopenharmony_ci * Must be called with a lock serializing truncates and writes (generally 8498c2ecf20Sopenharmony_ci * i_mutex but e.g. xfs uses a different lock) and before all filesystem 8508c2ecf20Sopenharmony_ci * specific block truncation has been performed. 8518c2ecf20Sopenharmony_ci */ 8528c2ecf20Sopenharmony_civoid truncate_setsize(struct inode *inode, loff_t newsize) 8538c2ecf20Sopenharmony_ci{ 8548c2ecf20Sopenharmony_ci loff_t oldsize = inode->i_size; 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci i_size_write(inode, newsize); 8578c2ecf20Sopenharmony_ci if (newsize > oldsize) 8588c2ecf20Sopenharmony_ci pagecache_isize_extended(inode, oldsize, newsize); 8598c2ecf20Sopenharmony_ci truncate_pagecache(inode, newsize); 8608c2ecf20Sopenharmony_ci} 8618c2ecf20Sopenharmony_ciEXPORT_SYMBOL(truncate_setsize); 8628c2ecf20Sopenharmony_ci 8638c2ecf20Sopenharmony_ci/** 8648c2ecf20Sopenharmony_ci * pagecache_isize_extended - update pagecache after extension of i_size 8658c2ecf20Sopenharmony_ci * @inode: inode for which i_size was extended 8668c2ecf20Sopenharmony_ci * @from: original inode size 8678c2ecf20Sopenharmony_ci * @to: new inode size 8688c2ecf20Sopenharmony_ci * 8698c2ecf20Sopenharmony_ci * Handle extension of inode size either caused by extending truncate or by 8708c2ecf20Sopenharmony_ci * write starting after current i_size. We mark the page straddling current 8718c2ecf20Sopenharmony_ci * i_size RO so that page_mkwrite() is called on the nearest write access to 8728c2ecf20Sopenharmony_ci * the page. This way filesystem can be sure that page_mkwrite() is called on 8738c2ecf20Sopenharmony_ci * the page before user writes to the page via mmap after the i_size has been 8748c2ecf20Sopenharmony_ci * changed. 8758c2ecf20Sopenharmony_ci * 8768c2ecf20Sopenharmony_ci * The function must be called after i_size is updated so that page fault 8778c2ecf20Sopenharmony_ci * coming after we unlock the page will already see the new i_size. 8788c2ecf20Sopenharmony_ci * The function must be called while we still hold i_mutex - this not only 8798c2ecf20Sopenharmony_ci * makes sure i_size is stable but also that userspace cannot observe new 8808c2ecf20Sopenharmony_ci * i_size value before we are prepared to store mmap writes at new inode size. 8818c2ecf20Sopenharmony_ci */ 8828c2ecf20Sopenharmony_civoid pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to) 8838c2ecf20Sopenharmony_ci{ 8848c2ecf20Sopenharmony_ci int bsize = i_blocksize(inode); 8858c2ecf20Sopenharmony_ci loff_t rounded_from; 8868c2ecf20Sopenharmony_ci struct page *page; 8878c2ecf20Sopenharmony_ci pgoff_t index; 8888c2ecf20Sopenharmony_ci 8898c2ecf20Sopenharmony_ci WARN_ON(to > inode->i_size); 8908c2ecf20Sopenharmony_ci 8918c2ecf20Sopenharmony_ci if (from >= to || bsize == PAGE_SIZE) 8928c2ecf20Sopenharmony_ci return; 8938c2ecf20Sopenharmony_ci /* Page straddling @from will not have any hole block created? */ 8948c2ecf20Sopenharmony_ci rounded_from = round_up(from, bsize); 8958c2ecf20Sopenharmony_ci if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1))) 8968c2ecf20Sopenharmony_ci return; 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci index = from >> PAGE_SHIFT; 8998c2ecf20Sopenharmony_ci page = find_lock_page(inode->i_mapping, index); 9008c2ecf20Sopenharmony_ci /* Page not cached? Nothing to do */ 9018c2ecf20Sopenharmony_ci if (!page) 9028c2ecf20Sopenharmony_ci return; 9038c2ecf20Sopenharmony_ci /* 9048c2ecf20Sopenharmony_ci * See clear_page_dirty_for_io() for details why set_page_dirty() 9058c2ecf20Sopenharmony_ci * is needed. 9068c2ecf20Sopenharmony_ci */ 9078c2ecf20Sopenharmony_ci if (page_mkclean(page)) 9088c2ecf20Sopenharmony_ci set_page_dirty(page); 9098c2ecf20Sopenharmony_ci unlock_page(page); 9108c2ecf20Sopenharmony_ci put_page(page); 9118c2ecf20Sopenharmony_ci} 9128c2ecf20Sopenharmony_ciEXPORT_SYMBOL(pagecache_isize_extended); 9138c2ecf20Sopenharmony_ci 9148c2ecf20Sopenharmony_ci/** 9158c2ecf20Sopenharmony_ci * truncate_pagecache_range - unmap and remove pagecache that is hole-punched 9168c2ecf20Sopenharmony_ci * @inode: inode 9178c2ecf20Sopenharmony_ci * @lstart: offset of beginning of hole 9188c2ecf20Sopenharmony_ci * @lend: offset of last byte of hole 9198c2ecf20Sopenharmony_ci * 9208c2ecf20Sopenharmony_ci * This function should typically be called before the filesystem 9218c2ecf20Sopenharmony_ci * releases resources associated with the freed range (eg. deallocates 9228c2ecf20Sopenharmony_ci * blocks). This way, pagecache will always stay logically coherent 9238c2ecf20Sopenharmony_ci * with on-disk format, and the filesystem would not have to deal with 9248c2ecf20Sopenharmony_ci * situations such as writepage being called for a page that has already 9258c2ecf20Sopenharmony_ci * had its underlying blocks deallocated. 9268c2ecf20Sopenharmony_ci */ 9278c2ecf20Sopenharmony_civoid truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend) 9288c2ecf20Sopenharmony_ci{ 9298c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 9308c2ecf20Sopenharmony_ci loff_t unmap_start = round_up(lstart, PAGE_SIZE); 9318c2ecf20Sopenharmony_ci loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1; 9328c2ecf20Sopenharmony_ci /* 9338c2ecf20Sopenharmony_ci * This rounding is currently just for example: unmap_mapping_range 9348c2ecf20Sopenharmony_ci * expands its hole outwards, whereas we want it to contract the hole 9358c2ecf20Sopenharmony_ci * inwards. However, existing callers of truncate_pagecache_range are 9368c2ecf20Sopenharmony_ci * doing their own page rounding first. Note that unmap_mapping_range 9378c2ecf20Sopenharmony_ci * allows holelen 0 for all, and we allow lend -1 for end of file. 9388c2ecf20Sopenharmony_ci */ 9398c2ecf20Sopenharmony_ci 9408c2ecf20Sopenharmony_ci /* 9418c2ecf20Sopenharmony_ci * Unlike in truncate_pagecache, unmap_mapping_range is called only 9428c2ecf20Sopenharmony_ci * once (before truncating pagecache), and without "even_cows" flag: 9438c2ecf20Sopenharmony_ci * hole-punching should not remove private COWed pages from the hole. 9448c2ecf20Sopenharmony_ci */ 9458c2ecf20Sopenharmony_ci if ((u64)unmap_end > (u64)unmap_start) 9468c2ecf20Sopenharmony_ci unmap_mapping_range(mapping, unmap_start, 9478c2ecf20Sopenharmony_ci 1 + unmap_end - unmap_start, 0); 9488c2ecf20Sopenharmony_ci truncate_inode_pages_range(mapping, lstart, lend); 9498c2ecf20Sopenharmony_ci} 9508c2ecf20Sopenharmony_ciEXPORT_SYMBOL(truncate_pagecache_range); 951