162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * mm/fadvise.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2002, Linus Torvalds 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * 11Jan2003 Andrew Morton 862306a36Sopenharmony_ci * Initial version. 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/kernel.h> 1262306a36Sopenharmony_ci#include <linux/file.h> 1362306a36Sopenharmony_ci#include <linux/fs.h> 1462306a36Sopenharmony_ci#include <linux/mm.h> 1562306a36Sopenharmony_ci#include <linux/pagemap.h> 1662306a36Sopenharmony_ci#include <linux/backing-dev.h> 1762306a36Sopenharmony_ci#include <linux/fadvise.h> 1862306a36Sopenharmony_ci#include <linux/writeback.h> 1962306a36Sopenharmony_ci#include <linux/syscalls.h> 2062306a36Sopenharmony_ci#include <linux/swap.h> 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#include <asm/unistd.h> 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#include "internal.h" 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* 2762306a36Sopenharmony_ci * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could 2862306a36Sopenharmony_ci * deactivate the pages and clear PG_Referenced. 2962306a36Sopenharmony_ci */ 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ciint generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 3262306a36Sopenharmony_ci{ 3362306a36Sopenharmony_ci struct inode *inode; 3462306a36Sopenharmony_ci struct address_space *mapping; 3562306a36Sopenharmony_ci struct backing_dev_info *bdi; 3662306a36Sopenharmony_ci loff_t endbyte; /* inclusive */ 3762306a36Sopenharmony_ci pgoff_t start_index; 3862306a36Sopenharmony_ci pgoff_t end_index; 3962306a36Sopenharmony_ci unsigned long nrpages; 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci inode = file_inode(file); 4262306a36Sopenharmony_ci if (S_ISFIFO(inode->i_mode)) 4362306a36Sopenharmony_ci return -ESPIPE; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci mapping = file->f_mapping; 4662306a36Sopenharmony_ci if (!mapping || len < 0) 4762306a36Sopenharmony_ci return -EINVAL; 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci bdi = inode_to_bdi(mapping->host); 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) { 5262306a36Sopenharmony_ci switch (advice) { 5362306a36Sopenharmony_ci case POSIX_FADV_NORMAL: 5462306a36Sopenharmony_ci case POSIX_FADV_RANDOM: 5562306a36Sopenharmony_ci case POSIX_FADV_SEQUENTIAL: 5662306a36Sopenharmony_ci case POSIX_FADV_WILLNEED: 5762306a36Sopenharmony_ci case POSIX_FADV_NOREUSE: 5862306a36Sopenharmony_ci case POSIX_FADV_DONTNEED: 5962306a36Sopenharmony_ci /* no bad return value, but ignore advice */ 6062306a36Sopenharmony_ci break; 6162306a36Sopenharmony_ci default: 6262306a36Sopenharmony_ci return -EINVAL; 6362306a36Sopenharmony_ci } 6462306a36Sopenharmony_ci return 0; 6562306a36Sopenharmony_ci } 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci /* 6862306a36Sopenharmony_ci * Careful about overflows. Len == 0 means "as much as possible". Use 6962306a36Sopenharmony_ci * unsigned math because signed overflows are undefined and UBSan 7062306a36Sopenharmony_ci * complains. 7162306a36Sopenharmony_ci */ 7262306a36Sopenharmony_ci endbyte = (u64)offset + (u64)len; 7362306a36Sopenharmony_ci if (!len || endbyte < len) 7462306a36Sopenharmony_ci endbyte = LLONG_MAX; 7562306a36Sopenharmony_ci else 7662306a36Sopenharmony_ci endbyte--; /* inclusive */ 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci switch (advice) { 7962306a36Sopenharmony_ci case POSIX_FADV_NORMAL: 8062306a36Sopenharmony_ci file->f_ra.ra_pages = bdi->ra_pages; 8162306a36Sopenharmony_ci spin_lock(&file->f_lock); 8262306a36Sopenharmony_ci file->f_mode &= ~(FMODE_RANDOM | FMODE_NOREUSE); 8362306a36Sopenharmony_ci spin_unlock(&file->f_lock); 8462306a36Sopenharmony_ci break; 8562306a36Sopenharmony_ci case POSIX_FADV_RANDOM: 8662306a36Sopenharmony_ci spin_lock(&file->f_lock); 8762306a36Sopenharmony_ci file->f_mode |= FMODE_RANDOM; 8862306a36Sopenharmony_ci spin_unlock(&file->f_lock); 8962306a36Sopenharmony_ci break; 9062306a36Sopenharmony_ci case POSIX_FADV_SEQUENTIAL: 9162306a36Sopenharmony_ci file->f_ra.ra_pages = bdi->ra_pages * 2; 9262306a36Sopenharmony_ci spin_lock(&file->f_lock); 9362306a36Sopenharmony_ci file->f_mode &= ~FMODE_RANDOM; 9462306a36Sopenharmony_ci spin_unlock(&file->f_lock); 9562306a36Sopenharmony_ci break; 9662306a36Sopenharmony_ci case POSIX_FADV_WILLNEED: 9762306a36Sopenharmony_ci /* First and last PARTIAL page! */ 9862306a36Sopenharmony_ci start_index = offset >> PAGE_SHIFT; 9962306a36Sopenharmony_ci end_index = endbyte >> PAGE_SHIFT; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci /* Careful about overflow on the "+1" */ 10262306a36Sopenharmony_ci nrpages = end_index - start_index + 1; 10362306a36Sopenharmony_ci if (!nrpages) 10462306a36Sopenharmony_ci nrpages = ~0UL; 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci force_page_cache_readahead(mapping, file, start_index, nrpages); 10762306a36Sopenharmony_ci break; 10862306a36Sopenharmony_ci case POSIX_FADV_NOREUSE: 10962306a36Sopenharmony_ci spin_lock(&file->f_lock); 11062306a36Sopenharmony_ci file->f_mode |= FMODE_NOREUSE; 11162306a36Sopenharmony_ci spin_unlock(&file->f_lock); 11262306a36Sopenharmony_ci break; 11362306a36Sopenharmony_ci case POSIX_FADV_DONTNEED: 11462306a36Sopenharmony_ci __filemap_fdatawrite_range(mapping, offset, endbyte, 11562306a36Sopenharmony_ci WB_SYNC_NONE); 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci /* 11862306a36Sopenharmony_ci * First and last FULL page! Partial pages are deliberately 11962306a36Sopenharmony_ci * preserved on the expectation that it is better to preserve 12062306a36Sopenharmony_ci * needed memory than to discard unneeded memory. 12162306a36Sopenharmony_ci */ 12262306a36Sopenharmony_ci start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; 12362306a36Sopenharmony_ci end_index = (endbyte >> PAGE_SHIFT); 12462306a36Sopenharmony_ci /* 12562306a36Sopenharmony_ci * The page at end_index will be inclusively discarded according 12662306a36Sopenharmony_ci * by invalidate_mapping_pages(), so subtracting 1 from 12762306a36Sopenharmony_ci * end_index means we will skip the last page. But if endbyte 12862306a36Sopenharmony_ci * is page aligned or is at the end of file, we should not skip 12962306a36Sopenharmony_ci * that page - discarding the last page is safe enough. 13062306a36Sopenharmony_ci */ 13162306a36Sopenharmony_ci if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK && 13262306a36Sopenharmony_ci endbyte != inode->i_size - 1) { 13362306a36Sopenharmony_ci /* First page is tricky as 0 - 1 = -1, but pgoff_t 13462306a36Sopenharmony_ci * is unsigned, so the end_index >= start_index 13562306a36Sopenharmony_ci * check below would be true and we'll discard the whole 13662306a36Sopenharmony_ci * file cache which is not what was asked. 13762306a36Sopenharmony_ci */ 13862306a36Sopenharmony_ci if (end_index == 0) 13962306a36Sopenharmony_ci break; 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci end_index--; 14262306a36Sopenharmony_ci } 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci if (end_index >= start_index) { 14562306a36Sopenharmony_ci unsigned long nr_failed = 0; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci /* 14862306a36Sopenharmony_ci * It's common to FADV_DONTNEED right after 14962306a36Sopenharmony_ci * the read or write that instantiates the 15062306a36Sopenharmony_ci * pages, in which case there will be some 15162306a36Sopenharmony_ci * sitting on the local LRU cache. Try to 15262306a36Sopenharmony_ci * avoid the expensive remote drain and the 15362306a36Sopenharmony_ci * second cache tree walk below by flushing 15462306a36Sopenharmony_ci * them out right away. 15562306a36Sopenharmony_ci */ 15662306a36Sopenharmony_ci lru_add_drain(); 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci mapping_try_invalidate(mapping, start_index, end_index, 15962306a36Sopenharmony_ci &nr_failed); 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci /* 16262306a36Sopenharmony_ci * The failures may be due to the folio being 16362306a36Sopenharmony_ci * in the LRU cache of a remote CPU. Drain all 16462306a36Sopenharmony_ci * caches and try again. 16562306a36Sopenharmony_ci */ 16662306a36Sopenharmony_ci if (nr_failed) { 16762306a36Sopenharmony_ci lru_add_drain_all(); 16862306a36Sopenharmony_ci invalidate_mapping_pages(mapping, start_index, 16962306a36Sopenharmony_ci end_index); 17062306a36Sopenharmony_ci } 17162306a36Sopenharmony_ci } 17262306a36Sopenharmony_ci break; 17362306a36Sopenharmony_ci default: 17462306a36Sopenharmony_ci return -EINVAL; 17562306a36Sopenharmony_ci } 17662306a36Sopenharmony_ci return 0; 17762306a36Sopenharmony_ci} 17862306a36Sopenharmony_ciEXPORT_SYMBOL(generic_fadvise); 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ciint vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice) 18162306a36Sopenharmony_ci{ 18262306a36Sopenharmony_ci if (file->f_op->fadvise) 18362306a36Sopenharmony_ci return file->f_op->fadvise(file, offset, len, advice); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci return generic_fadvise(file, offset, len, advice); 18662306a36Sopenharmony_ci} 18762306a36Sopenharmony_ciEXPORT_SYMBOL(vfs_fadvise); 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci#ifdef CONFIG_ADVISE_SYSCALLS 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ciint ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci struct fd f = fdget(fd); 19462306a36Sopenharmony_ci int ret; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci if (!f.file) 19762306a36Sopenharmony_ci return -EBADF; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci ret = vfs_fadvise(f.file, offset, len, advice); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci fdput(f); 20262306a36Sopenharmony_ci return ret; 20362306a36Sopenharmony_ci} 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ciSYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) 20662306a36Sopenharmony_ci{ 20762306a36Sopenharmony_ci return ksys_fadvise64_64(fd, offset, len, advice); 20862306a36Sopenharmony_ci} 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci#ifdef __ARCH_WANT_SYS_FADVISE64 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ciSYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) 21362306a36Sopenharmony_ci{ 21462306a36Sopenharmony_ci return ksys_fadvise64_64(fd, offset, len, advice); 21562306a36Sopenharmony_ci} 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci#endif 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FADVISE64_64) 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ciCOMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, compat_arg_u64_dual(offset), 22262306a36Sopenharmony_ci compat_arg_u64_dual(len), int, advice) 22362306a36Sopenharmony_ci{ 22462306a36Sopenharmony_ci return ksys_fadvise64_64(fd, compat_arg_u64_glue(offset), 22562306a36Sopenharmony_ci compat_arg_u64_glue(len), advice); 22662306a36Sopenharmony_ci} 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci#endif 22962306a36Sopenharmony_ci#endif 230