162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci#include <linux/memblock.h>
362306a36Sopenharmony_ci#include <linux/compiler.h>
462306a36Sopenharmony_ci#include <linux/fs.h>
562306a36Sopenharmony_ci#include <linux/init.h>
662306a36Sopenharmony_ci#include <linux/ksm.h>
762306a36Sopenharmony_ci#include <linux/mm.h>
862306a36Sopenharmony_ci#include <linux/mmzone.h>
962306a36Sopenharmony_ci#include <linux/huge_mm.h>
1062306a36Sopenharmony_ci#include <linux/proc_fs.h>
1162306a36Sopenharmony_ci#include <linux/seq_file.h>
1262306a36Sopenharmony_ci#include <linux/hugetlb.h>
1362306a36Sopenharmony_ci#include <linux/memremap.h>
1462306a36Sopenharmony_ci#include <linux/memcontrol.h>
1562306a36Sopenharmony_ci#include <linux/mmu_notifier.h>
1662306a36Sopenharmony_ci#include <linux/page_idle.h>
1762306a36Sopenharmony_ci#include <linux/kernel-page-flags.h>
1862306a36Sopenharmony_ci#include <linux/uaccess.h>
1962306a36Sopenharmony_ci#include "internal.h"
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#define KPMSIZE sizeof(u64)
2262306a36Sopenharmony_ci#define KPMMASK (KPMSIZE - 1)
2362306a36Sopenharmony_ci#define KPMBITS (KPMSIZE * BITS_PER_BYTE)
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_cistatic inline unsigned long get_max_dump_pfn(void)
2662306a36Sopenharmony_ci{
2762306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM
2862306a36Sopenharmony_ci	/*
2962306a36Sopenharmony_ci	 * The memmap of early sections is completely populated and marked
3062306a36Sopenharmony_ci	 * online even if max_pfn does not fall on a section boundary -
3162306a36Sopenharmony_ci	 * pfn_to_online_page() will succeed on all pages. Allow inspecting
3262306a36Sopenharmony_ci	 * these memmaps.
3362306a36Sopenharmony_ci	 */
3462306a36Sopenharmony_ci	return round_up(max_pfn, PAGES_PER_SECTION);
3562306a36Sopenharmony_ci#else
3662306a36Sopenharmony_ci	return max_pfn;
3762306a36Sopenharmony_ci#endif
3862306a36Sopenharmony_ci}
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci/* /proc/kpagecount - an array exposing page counts
4162306a36Sopenharmony_ci *
4262306a36Sopenharmony_ci * Each entry is a u64 representing the corresponding
4362306a36Sopenharmony_ci * physical page count.
4462306a36Sopenharmony_ci */
4562306a36Sopenharmony_cistatic ssize_t kpagecount_read(struct file *file, char __user *buf,
4662306a36Sopenharmony_ci			     size_t count, loff_t *ppos)
4762306a36Sopenharmony_ci{
4862306a36Sopenharmony_ci	const unsigned long max_dump_pfn = get_max_dump_pfn();
4962306a36Sopenharmony_ci	u64 __user *out = (u64 __user *)buf;
5062306a36Sopenharmony_ci	struct page *ppage;
5162306a36Sopenharmony_ci	unsigned long src = *ppos;
5262306a36Sopenharmony_ci	unsigned long pfn;
5362306a36Sopenharmony_ci	ssize_t ret = 0;
5462306a36Sopenharmony_ci	u64 pcount;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	pfn = src / KPMSIZE;
5762306a36Sopenharmony_ci	if (src & KPMMASK || count & KPMMASK)
5862306a36Sopenharmony_ci		return -EINVAL;
5962306a36Sopenharmony_ci	if (src >= max_dump_pfn * KPMSIZE)
6062306a36Sopenharmony_ci		return 0;
6162306a36Sopenharmony_ci	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci	while (count > 0) {
6462306a36Sopenharmony_ci		/*
6562306a36Sopenharmony_ci		 * TODO: ZONE_DEVICE support requires to identify
6662306a36Sopenharmony_ci		 * memmaps that were actually initialized.
6762306a36Sopenharmony_ci		 */
6862306a36Sopenharmony_ci		ppage = pfn_to_online_page(pfn);
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci		if (!ppage || PageSlab(ppage) || page_has_type(ppage))
7162306a36Sopenharmony_ci			pcount = 0;
7262306a36Sopenharmony_ci		else
7362306a36Sopenharmony_ci			pcount = page_mapcount(ppage);
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci		if (put_user(pcount, out)) {
7662306a36Sopenharmony_ci			ret = -EFAULT;
7762306a36Sopenharmony_ci			break;
7862306a36Sopenharmony_ci		}
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci		pfn++;
8162306a36Sopenharmony_ci		out++;
8262306a36Sopenharmony_ci		count -= KPMSIZE;
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci		cond_resched();
8562306a36Sopenharmony_ci	}
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	*ppos += (char __user *)out - buf;
8862306a36Sopenharmony_ci	if (!ret)
8962306a36Sopenharmony_ci		ret = (char __user *)out - buf;
9062306a36Sopenharmony_ci	return ret;
9162306a36Sopenharmony_ci}
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_cistatic const struct proc_ops kpagecount_proc_ops = {
9462306a36Sopenharmony_ci	.proc_flags	= PROC_ENTRY_PERMANENT,
9562306a36Sopenharmony_ci	.proc_lseek	= mem_lseek,
9662306a36Sopenharmony_ci	.proc_read	= kpagecount_read,
9762306a36Sopenharmony_ci};
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci/* /proc/kpageflags - an array exposing page flags
10062306a36Sopenharmony_ci *
10162306a36Sopenharmony_ci * Each entry is a u64 representing the corresponding
10262306a36Sopenharmony_ci * physical page flags.
10362306a36Sopenharmony_ci */
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_cistatic inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci	return ((kflags >> kbit) & 1) << ubit;
10862306a36Sopenharmony_ci}
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ciu64 stable_page_flags(struct page *page)
11162306a36Sopenharmony_ci{
11262306a36Sopenharmony_ci	u64 k;
11362306a36Sopenharmony_ci	u64 u;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	/*
11662306a36Sopenharmony_ci	 * pseudo flag: KPF_NOPAGE
11762306a36Sopenharmony_ci	 * it differentiates a memory hole from a page with no flags
11862306a36Sopenharmony_ci	 */
11962306a36Sopenharmony_ci	if (!page)
12062306a36Sopenharmony_ci		return 1 << KPF_NOPAGE;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	k = page->flags;
12362306a36Sopenharmony_ci	u = 0;
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	/*
12662306a36Sopenharmony_ci	 * pseudo flags for the well known (anonymous) memory mapped pages
12762306a36Sopenharmony_ci	 *
12862306a36Sopenharmony_ci	 * Note that page->_mapcount is overloaded in SLAB, so the
12962306a36Sopenharmony_ci	 * simple test in page_mapped() is not enough.
13062306a36Sopenharmony_ci	 */
13162306a36Sopenharmony_ci	if (!PageSlab(page) && page_mapped(page))
13262306a36Sopenharmony_ci		u |= 1 << KPF_MMAP;
13362306a36Sopenharmony_ci	if (PageAnon(page))
13462306a36Sopenharmony_ci		u |= 1 << KPF_ANON;
13562306a36Sopenharmony_ci	if (PageKsm(page))
13662306a36Sopenharmony_ci		u |= 1 << KPF_KSM;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	/*
13962306a36Sopenharmony_ci	 * compound pages: export both head/tail info
14062306a36Sopenharmony_ci	 * they together define a compound page's start/end pos and order
14162306a36Sopenharmony_ci	 */
14262306a36Sopenharmony_ci	if (PageHead(page))
14362306a36Sopenharmony_ci		u |= 1 << KPF_COMPOUND_HEAD;
14462306a36Sopenharmony_ci	if (PageTail(page))
14562306a36Sopenharmony_ci		u |= 1 << KPF_COMPOUND_TAIL;
14662306a36Sopenharmony_ci	if (PageHuge(page))
14762306a36Sopenharmony_ci		u |= 1 << KPF_HUGE;
14862306a36Sopenharmony_ci	/*
14962306a36Sopenharmony_ci	 * PageTransCompound can be true for non-huge compound pages (slab
15062306a36Sopenharmony_ci	 * pages or pages allocated by drivers with __GFP_COMP) because it
15162306a36Sopenharmony_ci	 * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon
15262306a36Sopenharmony_ci	 * to make sure a given page is a thp, not a non-huge compound page.
15362306a36Sopenharmony_ci	 */
15462306a36Sopenharmony_ci	else if (PageTransCompound(page)) {
15562306a36Sopenharmony_ci		struct page *head = compound_head(page);
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci		if (PageLRU(head) || PageAnon(head))
15862306a36Sopenharmony_ci			u |= 1 << KPF_THP;
15962306a36Sopenharmony_ci		else if (is_huge_zero_page(head)) {
16062306a36Sopenharmony_ci			u |= 1 << KPF_ZERO_PAGE;
16162306a36Sopenharmony_ci			u |= 1 << KPF_THP;
16262306a36Sopenharmony_ci		}
16362306a36Sopenharmony_ci	} else if (is_zero_pfn(page_to_pfn(page)))
16462306a36Sopenharmony_ci		u |= 1 << KPF_ZERO_PAGE;
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	/*
16862306a36Sopenharmony_ci	 * Caveats on high order pages: PG_buddy and PG_slab will only be set
16962306a36Sopenharmony_ci	 * on the head page.
17062306a36Sopenharmony_ci	 */
17162306a36Sopenharmony_ci	if (PageBuddy(page))
17262306a36Sopenharmony_ci		u |= 1 << KPF_BUDDY;
17362306a36Sopenharmony_ci	else if (page_count(page) == 0 && is_free_buddy_page(page))
17462306a36Sopenharmony_ci		u |= 1 << KPF_BUDDY;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	if (PageOffline(page))
17762306a36Sopenharmony_ci		u |= 1 << KPF_OFFLINE;
17862306a36Sopenharmony_ci	if (PageTable(page))
17962306a36Sopenharmony_ci		u |= 1 << KPF_PGTABLE;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	if (page_is_idle(page))
18262306a36Sopenharmony_ci		u |= 1 << KPF_IDLE;
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
18762306a36Sopenharmony_ci	if (PageTail(page) && PageSlab(page))
18862306a36Sopenharmony_ci		u |= 1 << KPF_SLAB;
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_ERROR,		PG_error);
19162306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_DIRTY,		PG_dirty);
19262306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_UPTODATE,	PG_uptodate);
19362306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_WRITEBACK,	PG_writeback);
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_LRU,		PG_lru);
19662306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_REFERENCED,	PG_referenced);
19762306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_ACTIVE,	PG_active);
19862306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_RECLAIM,	PG_reclaim);
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	if (PageSwapCache(page))
20162306a36Sopenharmony_ci		u |= 1 << KPF_SWAPCACHE;
20262306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_SWAPBACKED,	PG_swapbacked);
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_UNEVICTABLE,	PG_unevictable);
20562306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_MLOCKED,	PG_mlocked);
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_FAILURE
20862306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_HWPOISON,	PG_hwpoison);
20962306a36Sopenharmony_ci#endif
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci#ifdef CONFIG_ARCH_USES_PG_UNCACHED
21262306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_UNCACHED,	PG_uncached);
21362306a36Sopenharmony_ci#endif
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_RESERVED,	PG_reserved);
21662306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_MAPPEDTODISK,	PG_mappedtodisk);
21762306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_PRIVATE,	PG_private);
21862306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_PRIVATE_2,	PG_private_2);
21962306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE,	PG_owner_priv_1);
22062306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_ARCH,		PG_arch_1);
22162306a36Sopenharmony_ci#ifdef CONFIG_ARCH_USES_PG_ARCH_X
22262306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_ARCH_2,	PG_arch_2);
22362306a36Sopenharmony_ci	u |= kpf_copy_bit(k, KPF_ARCH_3,	PG_arch_3);
22462306a36Sopenharmony_ci#endif
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	return u;
22762306a36Sopenharmony_ci};
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_cistatic ssize_t kpageflags_read(struct file *file, char __user *buf,
23062306a36Sopenharmony_ci			     size_t count, loff_t *ppos)
23162306a36Sopenharmony_ci{
23262306a36Sopenharmony_ci	const unsigned long max_dump_pfn = get_max_dump_pfn();
23362306a36Sopenharmony_ci	u64 __user *out = (u64 __user *)buf;
23462306a36Sopenharmony_ci	struct page *ppage;
23562306a36Sopenharmony_ci	unsigned long src = *ppos;
23662306a36Sopenharmony_ci	unsigned long pfn;
23762306a36Sopenharmony_ci	ssize_t ret = 0;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	pfn = src / KPMSIZE;
24062306a36Sopenharmony_ci	if (src & KPMMASK || count & KPMMASK)
24162306a36Sopenharmony_ci		return -EINVAL;
24262306a36Sopenharmony_ci	if (src >= max_dump_pfn * KPMSIZE)
24362306a36Sopenharmony_ci		return 0;
24462306a36Sopenharmony_ci	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	while (count > 0) {
24762306a36Sopenharmony_ci		/*
24862306a36Sopenharmony_ci		 * TODO: ZONE_DEVICE support requires to identify
24962306a36Sopenharmony_ci		 * memmaps that were actually initialized.
25062306a36Sopenharmony_ci		 */
25162306a36Sopenharmony_ci		ppage = pfn_to_online_page(pfn);
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci		if (put_user(stable_page_flags(ppage), out)) {
25462306a36Sopenharmony_ci			ret = -EFAULT;
25562306a36Sopenharmony_ci			break;
25662306a36Sopenharmony_ci		}
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci		pfn++;
25962306a36Sopenharmony_ci		out++;
26062306a36Sopenharmony_ci		count -= KPMSIZE;
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci		cond_resched();
26362306a36Sopenharmony_ci	}
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	*ppos += (char __user *)out - buf;
26662306a36Sopenharmony_ci	if (!ret)
26762306a36Sopenharmony_ci		ret = (char __user *)out - buf;
26862306a36Sopenharmony_ci	return ret;
26962306a36Sopenharmony_ci}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_cistatic const struct proc_ops kpageflags_proc_ops = {
27262306a36Sopenharmony_ci	.proc_flags	= PROC_ENTRY_PERMANENT,
27362306a36Sopenharmony_ci	.proc_lseek	= mem_lseek,
27462306a36Sopenharmony_ci	.proc_read	= kpageflags_read,
27562306a36Sopenharmony_ci};
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci#ifdef CONFIG_MEMCG
27862306a36Sopenharmony_cistatic ssize_t kpagecgroup_read(struct file *file, char __user *buf,
27962306a36Sopenharmony_ci				size_t count, loff_t *ppos)
28062306a36Sopenharmony_ci{
28162306a36Sopenharmony_ci	const unsigned long max_dump_pfn = get_max_dump_pfn();
28262306a36Sopenharmony_ci	u64 __user *out = (u64 __user *)buf;
28362306a36Sopenharmony_ci	struct page *ppage;
28462306a36Sopenharmony_ci	unsigned long src = *ppos;
28562306a36Sopenharmony_ci	unsigned long pfn;
28662306a36Sopenharmony_ci	ssize_t ret = 0;
28762306a36Sopenharmony_ci	u64 ino;
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	pfn = src / KPMSIZE;
29062306a36Sopenharmony_ci	if (src & KPMMASK || count & KPMMASK)
29162306a36Sopenharmony_ci		return -EINVAL;
29262306a36Sopenharmony_ci	if (src >= max_dump_pfn * KPMSIZE)
29362306a36Sopenharmony_ci		return 0;
29462306a36Sopenharmony_ci	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	while (count > 0) {
29762306a36Sopenharmony_ci		/*
29862306a36Sopenharmony_ci		 * TODO: ZONE_DEVICE support requires to identify
29962306a36Sopenharmony_ci		 * memmaps that were actually initialized.
30062306a36Sopenharmony_ci		 */
30162306a36Sopenharmony_ci		ppage = pfn_to_online_page(pfn);
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci		if (ppage)
30462306a36Sopenharmony_ci			ino = page_cgroup_ino(ppage);
30562306a36Sopenharmony_ci		else
30662306a36Sopenharmony_ci			ino = 0;
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci		if (put_user(ino, out)) {
30962306a36Sopenharmony_ci			ret = -EFAULT;
31062306a36Sopenharmony_ci			break;
31162306a36Sopenharmony_ci		}
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci		pfn++;
31462306a36Sopenharmony_ci		out++;
31562306a36Sopenharmony_ci		count -= KPMSIZE;
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci		cond_resched();
31862306a36Sopenharmony_ci	}
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	*ppos += (char __user *)out - buf;
32162306a36Sopenharmony_ci	if (!ret)
32262306a36Sopenharmony_ci		ret = (char __user *)out - buf;
32362306a36Sopenharmony_ci	return ret;
32462306a36Sopenharmony_ci}
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_cistatic const struct proc_ops kpagecgroup_proc_ops = {
32762306a36Sopenharmony_ci	.proc_flags	= PROC_ENTRY_PERMANENT,
32862306a36Sopenharmony_ci	.proc_lseek	= mem_lseek,
32962306a36Sopenharmony_ci	.proc_read	= kpagecgroup_read,
33062306a36Sopenharmony_ci};
33162306a36Sopenharmony_ci#endif /* CONFIG_MEMCG */
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_cistatic int __init proc_page_init(void)
33462306a36Sopenharmony_ci{
33562306a36Sopenharmony_ci	proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops);
33662306a36Sopenharmony_ci	proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops);
33762306a36Sopenharmony_ci#ifdef CONFIG_MEMCG
33862306a36Sopenharmony_ci	proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops);
33962306a36Sopenharmony_ci#endif
34062306a36Sopenharmony_ci	return 0;
34162306a36Sopenharmony_ci}
34262306a36Sopenharmony_cifs_initcall(proc_page_init);
343