18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *	linux/mm/mincore.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 1994-2006  Linus Torvalds
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci/*
98c2ecf20Sopenharmony_ci * The mincore() system call.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci#include <linux/pagemap.h>
128c2ecf20Sopenharmony_ci#include <linux/gfp.h>
138c2ecf20Sopenharmony_ci#include <linux/pagewalk.h>
148c2ecf20Sopenharmony_ci#include <linux/mman.h>
158c2ecf20Sopenharmony_ci#include <linux/syscalls.h>
168c2ecf20Sopenharmony_ci#include <linux/swap.h>
178c2ecf20Sopenharmony_ci#include <linux/swapops.h>
188c2ecf20Sopenharmony_ci#include <linux/shmem_fs.h>
198c2ecf20Sopenharmony_ci#include <linux/hugetlb.h>
208c2ecf20Sopenharmony_ci#include <linux/pgtable.h>
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_cistatic int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
258c2ecf20Sopenharmony_ci			unsigned long end, struct mm_walk *walk)
268c2ecf20Sopenharmony_ci{
278c2ecf20Sopenharmony_ci#ifdef CONFIG_HUGETLB_PAGE
288c2ecf20Sopenharmony_ci	unsigned char present;
298c2ecf20Sopenharmony_ci	unsigned char *vec = walk->private;
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci	/*
328c2ecf20Sopenharmony_ci	 * Hugepages under user process are always in RAM and never
338c2ecf20Sopenharmony_ci	 * swapped out, but theoretically it needs to be checked.
348c2ecf20Sopenharmony_ci	 */
358c2ecf20Sopenharmony_ci	present = pte && !huge_pte_none(huge_ptep_get(pte));
368c2ecf20Sopenharmony_ci	for (; addr != end; vec++, addr += PAGE_SIZE)
378c2ecf20Sopenharmony_ci		*vec = present;
388c2ecf20Sopenharmony_ci	walk->private = vec;
398c2ecf20Sopenharmony_ci#else
408c2ecf20Sopenharmony_ci	BUG();
418c2ecf20Sopenharmony_ci#endif
428c2ecf20Sopenharmony_ci	return 0;
438c2ecf20Sopenharmony_ci}
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci/*
468c2ecf20Sopenharmony_ci * Later we can get more picky about what "in core" means precisely.
478c2ecf20Sopenharmony_ci * For now, simply check to see if the page is in the page cache,
488c2ecf20Sopenharmony_ci * and is up to date; i.e. that no page-in operation would be required
498c2ecf20Sopenharmony_ci * at this time if an application were to map and access this page.
508c2ecf20Sopenharmony_ci */
518c2ecf20Sopenharmony_cistatic unsigned char mincore_page(struct address_space *mapping, pgoff_t index)
528c2ecf20Sopenharmony_ci{
538c2ecf20Sopenharmony_ci	unsigned char present = 0;
548c2ecf20Sopenharmony_ci	struct page *page;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci	/*
578c2ecf20Sopenharmony_ci	 * When tmpfs swaps out a page from a file, any process mapping that
588c2ecf20Sopenharmony_ci	 * file will not get a swp_entry_t in its pte, but rather it is like
598c2ecf20Sopenharmony_ci	 * any other file mapping (ie. marked !present and faulted in with
608c2ecf20Sopenharmony_ci	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
618c2ecf20Sopenharmony_ci	 */
628c2ecf20Sopenharmony_ci	page = find_get_incore_page(mapping, index);
638c2ecf20Sopenharmony_ci	if (page) {
648c2ecf20Sopenharmony_ci		present = PageUptodate(page);
658c2ecf20Sopenharmony_ci		put_page(page);
668c2ecf20Sopenharmony_ci	}
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	return present;
698c2ecf20Sopenharmony_ci}
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_cistatic int __mincore_unmapped_range(unsigned long addr, unsigned long end,
728c2ecf20Sopenharmony_ci				struct vm_area_struct *vma, unsigned char *vec)
738c2ecf20Sopenharmony_ci{
748c2ecf20Sopenharmony_ci	unsigned long nr = (end - addr) >> PAGE_SHIFT;
758c2ecf20Sopenharmony_ci	int i;
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	if (vma->vm_file) {
788c2ecf20Sopenharmony_ci		pgoff_t pgoff;
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci		pgoff = linear_page_index(vma, addr);
818c2ecf20Sopenharmony_ci		for (i = 0; i < nr; i++, pgoff++)
828c2ecf20Sopenharmony_ci			vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
838c2ecf20Sopenharmony_ci	} else {
848c2ecf20Sopenharmony_ci		for (i = 0; i < nr; i++)
858c2ecf20Sopenharmony_ci			vec[i] = 0;
868c2ecf20Sopenharmony_ci	}
878c2ecf20Sopenharmony_ci	return nr;
888c2ecf20Sopenharmony_ci}
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_cistatic int mincore_unmapped_range(unsigned long addr, unsigned long end,
918c2ecf20Sopenharmony_ci				   __always_unused int depth,
928c2ecf20Sopenharmony_ci				   struct mm_walk *walk)
938c2ecf20Sopenharmony_ci{
948c2ecf20Sopenharmony_ci	walk->private += __mincore_unmapped_range(addr, end,
958c2ecf20Sopenharmony_ci						  walk->vma, walk->private);
968c2ecf20Sopenharmony_ci	return 0;
978c2ecf20Sopenharmony_ci}
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_cistatic int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
1008c2ecf20Sopenharmony_ci			struct mm_walk *walk)
1018c2ecf20Sopenharmony_ci{
1028c2ecf20Sopenharmony_ci	spinlock_t *ptl;
1038c2ecf20Sopenharmony_ci	struct vm_area_struct *vma = walk->vma;
1048c2ecf20Sopenharmony_ci	pte_t *ptep;
1058c2ecf20Sopenharmony_ci	unsigned char *vec = walk->private;
1068c2ecf20Sopenharmony_ci	int nr = (end - addr) >> PAGE_SHIFT;
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci	ptl = pmd_trans_huge_lock(pmd, vma);
1098c2ecf20Sopenharmony_ci	if (ptl) {
1108c2ecf20Sopenharmony_ci		memset(vec, 1, nr);
1118c2ecf20Sopenharmony_ci		spin_unlock(ptl);
1128c2ecf20Sopenharmony_ci		goto out;
1138c2ecf20Sopenharmony_ci	}
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	if (pmd_trans_unstable(pmd)) {
1168c2ecf20Sopenharmony_ci		__mincore_unmapped_range(addr, end, vma, vec);
1178c2ecf20Sopenharmony_ci		goto out;
1188c2ecf20Sopenharmony_ci	}
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
1218c2ecf20Sopenharmony_ci	for (; addr != end; ptep++, addr += PAGE_SIZE) {
1228c2ecf20Sopenharmony_ci		pte_t pte = *ptep;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci		if (pte_none(pte))
1258c2ecf20Sopenharmony_ci			__mincore_unmapped_range(addr, addr + PAGE_SIZE,
1268c2ecf20Sopenharmony_ci						 vma, vec);
1278c2ecf20Sopenharmony_ci		else if (pte_present(pte))
1288c2ecf20Sopenharmony_ci			*vec = 1;
1298c2ecf20Sopenharmony_ci		else { /* pte is a swap entry */
1308c2ecf20Sopenharmony_ci			swp_entry_t entry = pte_to_swp_entry(pte);
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci			if (non_swap_entry(entry)) {
1338c2ecf20Sopenharmony_ci				/*
1348c2ecf20Sopenharmony_ci				 * migration or hwpoison entries are always
1358c2ecf20Sopenharmony_ci				 * uptodate
1368c2ecf20Sopenharmony_ci				 */
1378c2ecf20Sopenharmony_ci				*vec = 1;
1388c2ecf20Sopenharmony_ci			} else {
1398c2ecf20Sopenharmony_ci#ifdef CONFIG_SWAP
1408c2ecf20Sopenharmony_ci				*vec = mincore_page(swap_address_space(entry),
1418c2ecf20Sopenharmony_ci						    swp_offset(entry));
1428c2ecf20Sopenharmony_ci#else
1438c2ecf20Sopenharmony_ci				WARN_ON(1);
1448c2ecf20Sopenharmony_ci				*vec = 1;
1458c2ecf20Sopenharmony_ci#endif
1468c2ecf20Sopenharmony_ci			}
1478c2ecf20Sopenharmony_ci		}
1488c2ecf20Sopenharmony_ci		vec++;
1498c2ecf20Sopenharmony_ci	}
1508c2ecf20Sopenharmony_ci	pte_unmap_unlock(ptep - 1, ptl);
1518c2ecf20Sopenharmony_ciout:
1528c2ecf20Sopenharmony_ci	walk->private += nr;
1538c2ecf20Sopenharmony_ci	cond_resched();
1548c2ecf20Sopenharmony_ci	return 0;
1558c2ecf20Sopenharmony_ci}
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_cistatic inline bool can_do_mincore(struct vm_area_struct *vma)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	if (vma_is_anonymous(vma))
1608c2ecf20Sopenharmony_ci		return true;
1618c2ecf20Sopenharmony_ci	if (!vma->vm_file)
1628c2ecf20Sopenharmony_ci		return false;
1638c2ecf20Sopenharmony_ci	/*
1648c2ecf20Sopenharmony_ci	 * Reveal pagecache information only for non-anonymous mappings that
1658c2ecf20Sopenharmony_ci	 * correspond to the files the calling process could (if tried) open
1668c2ecf20Sopenharmony_ci	 * for writing; otherwise we'd be including shared non-exclusive
1678c2ecf20Sopenharmony_ci	 * mappings, which opens a side channel.
1688c2ecf20Sopenharmony_ci	 */
1698c2ecf20Sopenharmony_ci	return inode_owner_or_capable(file_inode(vma->vm_file)) ||
1708c2ecf20Sopenharmony_ci		inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
1718c2ecf20Sopenharmony_ci}
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_cistatic const struct mm_walk_ops mincore_walk_ops = {
1748c2ecf20Sopenharmony_ci	.pmd_entry		= mincore_pte_range,
1758c2ecf20Sopenharmony_ci	.pte_hole		= mincore_unmapped_range,
1768c2ecf20Sopenharmony_ci	.hugetlb_entry		= mincore_hugetlb,
1778c2ecf20Sopenharmony_ci};
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci/*
1808c2ecf20Sopenharmony_ci * Do a chunk of "sys_mincore()". We've already checked
1818c2ecf20Sopenharmony_ci * all the arguments, we hold the mmap semaphore: we should
1828c2ecf20Sopenharmony_ci * just return the amount of info we're asked for.
1838c2ecf20Sopenharmony_ci */
1848c2ecf20Sopenharmony_cistatic long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
1858c2ecf20Sopenharmony_ci{
1868c2ecf20Sopenharmony_ci	struct vm_area_struct *vma;
1878c2ecf20Sopenharmony_ci	unsigned long end;
1888c2ecf20Sopenharmony_ci	int err;
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	vma = find_vma(current->mm, addr);
1918c2ecf20Sopenharmony_ci	if (!vma || addr < vma->vm_start)
1928c2ecf20Sopenharmony_ci		return -ENOMEM;
1938c2ecf20Sopenharmony_ci	end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
1948c2ecf20Sopenharmony_ci	if (!can_do_mincore(vma)) {
1958c2ecf20Sopenharmony_ci		unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE);
1968c2ecf20Sopenharmony_ci		memset(vec, 1, pages);
1978c2ecf20Sopenharmony_ci		return pages;
1988c2ecf20Sopenharmony_ci	}
1998c2ecf20Sopenharmony_ci	err = walk_page_range(vma->vm_mm, addr, end, &mincore_walk_ops, vec);
2008c2ecf20Sopenharmony_ci	if (err < 0)
2018c2ecf20Sopenharmony_ci		return err;
2028c2ecf20Sopenharmony_ci	return (end - addr) >> PAGE_SHIFT;
2038c2ecf20Sopenharmony_ci}
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci/*
2068c2ecf20Sopenharmony_ci * The mincore(2) system call.
2078c2ecf20Sopenharmony_ci *
2088c2ecf20Sopenharmony_ci * mincore() returns the memory residency status of the pages in the
2098c2ecf20Sopenharmony_ci * current process's address space specified by [addr, addr + len).
2108c2ecf20Sopenharmony_ci * The status is returned in a vector of bytes.  The least significant
2118c2ecf20Sopenharmony_ci * bit of each byte is 1 if the referenced page is in memory, otherwise
2128c2ecf20Sopenharmony_ci * it is zero.
2138c2ecf20Sopenharmony_ci *
2148c2ecf20Sopenharmony_ci * Because the status of a page can change after mincore() checks it
2158c2ecf20Sopenharmony_ci * but before it returns to the application, the returned vector may
2168c2ecf20Sopenharmony_ci * contain stale information.  Only locked pages are guaranteed to
2178c2ecf20Sopenharmony_ci * remain in memory.
2188c2ecf20Sopenharmony_ci *
2198c2ecf20Sopenharmony_ci * return values:
2208c2ecf20Sopenharmony_ci *  zero    - success
2218c2ecf20Sopenharmony_ci *  -EFAULT - vec points to an illegal address
2228c2ecf20Sopenharmony_ci *  -EINVAL - addr is not a multiple of PAGE_SIZE
2238c2ecf20Sopenharmony_ci *  -ENOMEM - Addresses in the range [addr, addr + len] are
2248c2ecf20Sopenharmony_ci *		invalid for the address space of this process, or
2258c2ecf20Sopenharmony_ci *		specify one or more pages which are not currently
2268c2ecf20Sopenharmony_ci *		mapped
2278c2ecf20Sopenharmony_ci *  -EAGAIN - A kernel resource was temporarily unavailable.
2288c2ecf20Sopenharmony_ci */
2298c2ecf20Sopenharmony_ciSYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
2308c2ecf20Sopenharmony_ci		unsigned char __user *, vec)
2318c2ecf20Sopenharmony_ci{
2328c2ecf20Sopenharmony_ci	long retval;
2338c2ecf20Sopenharmony_ci	unsigned long pages;
2348c2ecf20Sopenharmony_ci	unsigned char *tmp;
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	start = untagged_addr(start);
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	/* Check the start address: needs to be page-aligned.. */
2398c2ecf20Sopenharmony_ci	if (start & ~PAGE_MASK)
2408c2ecf20Sopenharmony_ci		return -EINVAL;
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	/* ..and we need to be passed a valid user-space range */
2438c2ecf20Sopenharmony_ci	if (!access_ok((void __user *) start, len))
2448c2ecf20Sopenharmony_ci		return -ENOMEM;
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci	/* This also avoids any overflows on PAGE_ALIGN */
2478c2ecf20Sopenharmony_ci	pages = len >> PAGE_SHIFT;
2488c2ecf20Sopenharmony_ci	pages += (offset_in_page(len)) != 0;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci	if (!access_ok(vec, pages))
2518c2ecf20Sopenharmony_ci		return -EFAULT;
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ci	tmp = (void *) __get_free_page(GFP_USER);
2548c2ecf20Sopenharmony_ci	if (!tmp)
2558c2ecf20Sopenharmony_ci		return -EAGAIN;
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	retval = 0;
2588c2ecf20Sopenharmony_ci	while (pages) {
2598c2ecf20Sopenharmony_ci		/*
2608c2ecf20Sopenharmony_ci		 * Do at most PAGE_SIZE entries per iteration, due to
2618c2ecf20Sopenharmony_ci		 * the temporary buffer size.
2628c2ecf20Sopenharmony_ci		 */
2638c2ecf20Sopenharmony_ci		mmap_read_lock(current->mm);
2648c2ecf20Sopenharmony_ci		retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
2658c2ecf20Sopenharmony_ci		mmap_read_unlock(current->mm);
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci		if (retval <= 0)
2688c2ecf20Sopenharmony_ci			break;
2698c2ecf20Sopenharmony_ci		if (copy_to_user(vec, tmp, retval)) {
2708c2ecf20Sopenharmony_ci			retval = -EFAULT;
2718c2ecf20Sopenharmony_ci			break;
2728c2ecf20Sopenharmony_ci		}
2738c2ecf20Sopenharmony_ci		pages -= retval;
2748c2ecf20Sopenharmony_ci		vec += retval;
2758c2ecf20Sopenharmony_ci		start += retval << PAGE_SHIFT;
2768c2ecf20Sopenharmony_ci		retval = 0;
2778c2ecf20Sopenharmony_ci	}
2788c2ecf20Sopenharmony_ci	free_page((unsigned long) tmp);
2798c2ecf20Sopenharmony_ci	return retval;
2808c2ecf20Sopenharmony_ci}
281