162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/mm/mincore.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 1994-2006 Linus Torvalds 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci/* 962306a36Sopenharmony_ci * The mincore() system call. 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci#include <linux/pagemap.h> 1262306a36Sopenharmony_ci#include <linux/gfp.h> 1362306a36Sopenharmony_ci#include <linux/pagewalk.h> 1462306a36Sopenharmony_ci#include <linux/mman.h> 1562306a36Sopenharmony_ci#include <linux/syscalls.h> 1662306a36Sopenharmony_ci#include <linux/swap.h> 1762306a36Sopenharmony_ci#include <linux/swapops.h> 1862306a36Sopenharmony_ci#include <linux/shmem_fs.h> 1962306a36Sopenharmony_ci#include <linux/hugetlb.h> 2062306a36Sopenharmony_ci#include <linux/pgtable.h> 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#include <linux/uaccess.h> 2362306a36Sopenharmony_ci#include "swap.h" 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_cistatic int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, 2662306a36Sopenharmony_ci unsigned long end, struct mm_walk *walk) 2762306a36Sopenharmony_ci{ 2862306a36Sopenharmony_ci#ifdef CONFIG_HUGETLB_PAGE 2962306a36Sopenharmony_ci unsigned char present; 3062306a36Sopenharmony_ci unsigned char *vec = walk->private; 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci /* 3362306a36Sopenharmony_ci * Hugepages under user process are always in RAM and never 3462306a36Sopenharmony_ci * swapped out, but theoretically it needs to be checked. 3562306a36Sopenharmony_ci */ 3662306a36Sopenharmony_ci present = pte && !huge_pte_none_mostly(huge_ptep_get(pte)); 3762306a36Sopenharmony_ci for (; addr != end; vec++, addr += PAGE_SIZE) 3862306a36Sopenharmony_ci *vec = present; 3962306a36Sopenharmony_ci walk->private = vec; 4062306a36Sopenharmony_ci#else 4162306a36Sopenharmony_ci BUG(); 4262306a36Sopenharmony_ci#endif 4362306a36Sopenharmony_ci return 0; 4462306a36Sopenharmony_ci} 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci/* 4762306a36Sopenharmony_ci * Later we can get more picky about what "in core" means precisely. 4862306a36Sopenharmony_ci * For now, simply check to see if the page is in the page cache, 4962306a36Sopenharmony_ci * and is up to date; i.e. that no page-in operation would be required 5062306a36Sopenharmony_ci * at this time if an application were to map and access this page. 5162306a36Sopenharmony_ci */ 5262306a36Sopenharmony_cistatic unsigned char mincore_page(struct address_space *mapping, pgoff_t index) 5362306a36Sopenharmony_ci{ 5462306a36Sopenharmony_ci unsigned char present = 0; 5562306a36Sopenharmony_ci struct folio *folio; 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci /* 5862306a36Sopenharmony_ci * When tmpfs swaps out a page from a file, any process mapping that 5962306a36Sopenharmony_ci * file will not get a swp_entry_t in its pte, but rather it is like 6062306a36Sopenharmony_ci * any other file mapping (ie. marked !present and faulted in with 6162306a36Sopenharmony_ci * tmpfs's .fault). So swapped out tmpfs mappings are tested here. 6262306a36Sopenharmony_ci */ 6362306a36Sopenharmony_ci folio = filemap_get_incore_folio(mapping, index); 6462306a36Sopenharmony_ci if (!IS_ERR(folio)) { 6562306a36Sopenharmony_ci present = folio_test_uptodate(folio); 6662306a36Sopenharmony_ci folio_put(folio); 6762306a36Sopenharmony_ci } 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci return present; 7062306a36Sopenharmony_ci} 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_cistatic int __mincore_unmapped_range(unsigned long addr, unsigned long end, 7362306a36Sopenharmony_ci struct vm_area_struct *vma, unsigned char *vec) 7462306a36Sopenharmony_ci{ 7562306a36Sopenharmony_ci unsigned long nr = (end - addr) >> PAGE_SHIFT; 7662306a36Sopenharmony_ci int i; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci if (vma->vm_file) { 7962306a36Sopenharmony_ci pgoff_t pgoff; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci pgoff = linear_page_index(vma, addr); 8262306a36Sopenharmony_ci for (i = 0; i < nr; i++, pgoff++) 8362306a36Sopenharmony_ci vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); 8462306a36Sopenharmony_ci } else { 8562306a36Sopenharmony_ci for (i = 0; i < nr; i++) 8662306a36Sopenharmony_ci vec[i] = 0; 8762306a36Sopenharmony_ci } 8862306a36Sopenharmony_ci return nr; 8962306a36Sopenharmony_ci} 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_cistatic int mincore_unmapped_range(unsigned long addr, unsigned long end, 9262306a36Sopenharmony_ci __always_unused int depth, 9362306a36Sopenharmony_ci struct mm_walk *walk) 9462306a36Sopenharmony_ci{ 9562306a36Sopenharmony_ci walk->private += __mincore_unmapped_range(addr, end, 9662306a36Sopenharmony_ci walk->vma, walk->private); 9762306a36Sopenharmony_ci return 0; 9862306a36Sopenharmony_ci} 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_cistatic int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 10162306a36Sopenharmony_ci struct mm_walk *walk) 10262306a36Sopenharmony_ci{ 10362306a36Sopenharmony_ci spinlock_t *ptl; 10462306a36Sopenharmony_ci struct vm_area_struct *vma = walk->vma; 10562306a36Sopenharmony_ci pte_t *ptep; 10662306a36Sopenharmony_ci unsigned char *vec = walk->private; 10762306a36Sopenharmony_ci int nr = (end - addr) >> PAGE_SHIFT; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci ptl = pmd_trans_huge_lock(pmd, vma); 11062306a36Sopenharmony_ci if (ptl) { 11162306a36Sopenharmony_ci memset(vec, 1, nr); 11262306a36Sopenharmony_ci spin_unlock(ptl); 11362306a36Sopenharmony_ci goto out; 11462306a36Sopenharmony_ci } 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 11762306a36Sopenharmony_ci if (!ptep) { 11862306a36Sopenharmony_ci walk->action = ACTION_AGAIN; 11962306a36Sopenharmony_ci return 0; 12062306a36Sopenharmony_ci } 12162306a36Sopenharmony_ci for (; addr != end; ptep++, addr += PAGE_SIZE) { 12262306a36Sopenharmony_ci pte_t pte = ptep_get(ptep); 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci /* We need to do cache lookup too for pte markers */ 12562306a36Sopenharmony_ci if (pte_none_mostly(pte)) 12662306a36Sopenharmony_ci __mincore_unmapped_range(addr, addr + PAGE_SIZE, 12762306a36Sopenharmony_ci vma, vec); 12862306a36Sopenharmony_ci else if (pte_present(pte)) 12962306a36Sopenharmony_ci *vec = 1; 13062306a36Sopenharmony_ci else { /* pte is a swap entry */ 13162306a36Sopenharmony_ci swp_entry_t entry = pte_to_swp_entry(pte); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci if (non_swap_entry(entry)) { 13462306a36Sopenharmony_ci /* 13562306a36Sopenharmony_ci * migration or hwpoison entries are always 13662306a36Sopenharmony_ci * uptodate 13762306a36Sopenharmony_ci */ 13862306a36Sopenharmony_ci *vec = 1; 13962306a36Sopenharmony_ci } else { 14062306a36Sopenharmony_ci#ifdef CONFIG_SWAP 14162306a36Sopenharmony_ci *vec = mincore_page(swap_address_space(entry), 14262306a36Sopenharmony_ci swp_offset(entry)); 14362306a36Sopenharmony_ci#else 14462306a36Sopenharmony_ci WARN_ON(1); 14562306a36Sopenharmony_ci *vec = 1; 14662306a36Sopenharmony_ci#endif 14762306a36Sopenharmony_ci } 14862306a36Sopenharmony_ci } 14962306a36Sopenharmony_ci vec++; 15062306a36Sopenharmony_ci } 15162306a36Sopenharmony_ci pte_unmap_unlock(ptep - 1, ptl); 15262306a36Sopenharmony_ciout: 15362306a36Sopenharmony_ci walk->private += nr; 15462306a36Sopenharmony_ci cond_resched(); 15562306a36Sopenharmony_ci return 0; 15662306a36Sopenharmony_ci} 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_cistatic inline bool can_do_mincore(struct vm_area_struct *vma) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci if (vma_is_anonymous(vma)) 16162306a36Sopenharmony_ci return true; 16262306a36Sopenharmony_ci if (!vma->vm_file) 16362306a36Sopenharmony_ci return false; 16462306a36Sopenharmony_ci /* 16562306a36Sopenharmony_ci * Reveal pagecache information only for non-anonymous mappings that 16662306a36Sopenharmony_ci * correspond to the files the calling process could (if tried) open 16762306a36Sopenharmony_ci * for writing; otherwise we'd be including shared non-exclusive 16862306a36Sopenharmony_ci * mappings, which opens a side channel. 16962306a36Sopenharmony_ci */ 17062306a36Sopenharmony_ci return inode_owner_or_capable(&nop_mnt_idmap, 17162306a36Sopenharmony_ci file_inode(vma->vm_file)) || 17262306a36Sopenharmony_ci file_permission(vma->vm_file, MAY_WRITE) == 0; 17362306a36Sopenharmony_ci} 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_cistatic const struct mm_walk_ops mincore_walk_ops = { 17662306a36Sopenharmony_ci .pmd_entry = mincore_pte_range, 17762306a36Sopenharmony_ci .pte_hole = mincore_unmapped_range, 17862306a36Sopenharmony_ci .hugetlb_entry = mincore_hugetlb, 17962306a36Sopenharmony_ci .walk_lock = PGWALK_RDLOCK, 18062306a36Sopenharmony_ci}; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci/* 18362306a36Sopenharmony_ci * Do a chunk of "sys_mincore()". We've already checked 18462306a36Sopenharmony_ci * all the arguments, we hold the mmap semaphore: we should 18562306a36Sopenharmony_ci * just return the amount of info we're asked for. 18662306a36Sopenharmony_ci */ 18762306a36Sopenharmony_cistatic long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec) 18862306a36Sopenharmony_ci{ 18962306a36Sopenharmony_ci struct vm_area_struct *vma; 19062306a36Sopenharmony_ci unsigned long end; 19162306a36Sopenharmony_ci int err; 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci vma = vma_lookup(current->mm, addr); 19462306a36Sopenharmony_ci if (!vma) 19562306a36Sopenharmony_ci return -ENOMEM; 19662306a36Sopenharmony_ci end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); 19762306a36Sopenharmony_ci if (!can_do_mincore(vma)) { 19862306a36Sopenharmony_ci unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE); 19962306a36Sopenharmony_ci memset(vec, 1, pages); 20062306a36Sopenharmony_ci return pages; 20162306a36Sopenharmony_ci } 20262306a36Sopenharmony_ci err = walk_page_range(vma->vm_mm, addr, end, &mincore_walk_ops, vec); 20362306a36Sopenharmony_ci if (err < 0) 20462306a36Sopenharmony_ci return err; 20562306a36Sopenharmony_ci return (end - addr) >> PAGE_SHIFT; 20662306a36Sopenharmony_ci} 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci/* 20962306a36Sopenharmony_ci * The mincore(2) system call. 21062306a36Sopenharmony_ci * 21162306a36Sopenharmony_ci * mincore() returns the memory residency status of the pages in the 21262306a36Sopenharmony_ci * current process's address space specified by [addr, addr + len). 21362306a36Sopenharmony_ci * The status is returned in a vector of bytes. The least significant 21462306a36Sopenharmony_ci * bit of each byte is 1 if the referenced page is in memory, otherwise 21562306a36Sopenharmony_ci * it is zero. 21662306a36Sopenharmony_ci * 21762306a36Sopenharmony_ci * Because the status of a page can change after mincore() checks it 21862306a36Sopenharmony_ci * but before it returns to the application, the returned vector may 21962306a36Sopenharmony_ci * contain stale information. Only locked pages are guaranteed to 22062306a36Sopenharmony_ci * remain in memory. 22162306a36Sopenharmony_ci * 22262306a36Sopenharmony_ci * return values: 22362306a36Sopenharmony_ci * zero - success 22462306a36Sopenharmony_ci * -EFAULT - vec points to an illegal address 22562306a36Sopenharmony_ci * -EINVAL - addr is not a multiple of PAGE_SIZE 22662306a36Sopenharmony_ci * -ENOMEM - Addresses in the range [addr, addr + len] are 22762306a36Sopenharmony_ci * invalid for the address space of this process, or 22862306a36Sopenharmony_ci * specify one or more pages which are not currently 22962306a36Sopenharmony_ci * mapped 23062306a36Sopenharmony_ci * -EAGAIN - A kernel resource was temporarily unavailable. 23162306a36Sopenharmony_ci */ 23262306a36Sopenharmony_ciSYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, 23362306a36Sopenharmony_ci unsigned char __user *, vec) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci long retval; 23662306a36Sopenharmony_ci unsigned long pages; 23762306a36Sopenharmony_ci unsigned char *tmp; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci start = untagged_addr(start); 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci /* Check the start address: needs to be page-aligned.. */ 24262306a36Sopenharmony_ci if (start & ~PAGE_MASK) 24362306a36Sopenharmony_ci return -EINVAL; 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci /* ..and we need to be passed a valid user-space range */ 24662306a36Sopenharmony_ci if (!access_ok((void __user *) start, len)) 24762306a36Sopenharmony_ci return -ENOMEM; 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci /* This also avoids any overflows on PAGE_ALIGN */ 25062306a36Sopenharmony_ci pages = len >> PAGE_SHIFT; 25162306a36Sopenharmony_ci pages += (offset_in_page(len)) != 0; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci if (!access_ok(vec, pages)) 25462306a36Sopenharmony_ci return -EFAULT; 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci tmp = (void *) __get_free_page(GFP_USER); 25762306a36Sopenharmony_ci if (!tmp) 25862306a36Sopenharmony_ci return -EAGAIN; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci retval = 0; 26162306a36Sopenharmony_ci while (pages) { 26262306a36Sopenharmony_ci /* 26362306a36Sopenharmony_ci * Do at most PAGE_SIZE entries per iteration, due to 26462306a36Sopenharmony_ci * the temporary buffer size. 26562306a36Sopenharmony_ci */ 26662306a36Sopenharmony_ci mmap_read_lock(current->mm); 26762306a36Sopenharmony_ci retval = do_mincore(start, min(pages, PAGE_SIZE), tmp); 26862306a36Sopenharmony_ci mmap_read_unlock(current->mm); 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci if (retval <= 0) 27162306a36Sopenharmony_ci break; 27262306a36Sopenharmony_ci if (copy_to_user(vec, tmp, retval)) { 27362306a36Sopenharmony_ci retval = -EFAULT; 27462306a36Sopenharmony_ci break; 27562306a36Sopenharmony_ci } 27662306a36Sopenharmony_ci pages -= retval; 27762306a36Sopenharmony_ci vec += retval; 27862306a36Sopenharmony_ci start += retval << PAGE_SHIFT; 27962306a36Sopenharmony_ci retval = 0; 28062306a36Sopenharmony_ci } 28162306a36Sopenharmony_ci free_page((unsigned long) tmp); 28262306a36Sopenharmony_ci return retval; 28362306a36Sopenharmony_ci} 284