18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) IBM Corporation, 2014, 2017
48c2ecf20Sopenharmony_ci * Anton Blanchard, Rashmica Gupta.
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci#define pr_fmt(fmt) "memtrace: " fmt
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include <linux/bitops.h>
108c2ecf20Sopenharmony_ci#include <linux/string.h>
118c2ecf20Sopenharmony_ci#include <linux/memblock.h>
128c2ecf20Sopenharmony_ci#include <linux/init.h>
138c2ecf20Sopenharmony_ci#include <linux/moduleparam.h>
148c2ecf20Sopenharmony_ci#include <linux/fs.h>
158c2ecf20Sopenharmony_ci#include <linux/debugfs.h>
168c2ecf20Sopenharmony_ci#include <linux/slab.h>
178c2ecf20Sopenharmony_ci#include <linux/memory.h>
188c2ecf20Sopenharmony_ci#include <linux/memory_hotplug.h>
198c2ecf20Sopenharmony_ci#include <linux/numa.h>
208c2ecf20Sopenharmony_ci#include <asm/machdep.h>
218c2ecf20Sopenharmony_ci#include <asm/debugfs.h>
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci/* This enables us to keep track of the memory removed from each node. */
248c2ecf20Sopenharmony_cistruct memtrace_entry {
258c2ecf20Sopenharmony_ci	void *mem;
268c2ecf20Sopenharmony_ci	u64 start;
278c2ecf20Sopenharmony_ci	u64 size;
288c2ecf20Sopenharmony_ci	u32 nid;
298c2ecf20Sopenharmony_ci	struct dentry *dir;
308c2ecf20Sopenharmony_ci	char name[16];
318c2ecf20Sopenharmony_ci};
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(memtrace_mutex);
348c2ecf20Sopenharmony_cistatic u64 memtrace_size;
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_cistatic struct memtrace_entry *memtrace_array;
378c2ecf20Sopenharmony_cistatic unsigned int memtrace_array_nr;
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_cistatic ssize_t memtrace_read(struct file *filp, char __user *ubuf,
418c2ecf20Sopenharmony_ci			     size_t count, loff_t *ppos)
428c2ecf20Sopenharmony_ci{
438c2ecf20Sopenharmony_ci	struct memtrace_entry *ent = filp->private_data;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
468c2ecf20Sopenharmony_ci}
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_cistatic const struct file_operations memtrace_fops = {
498c2ecf20Sopenharmony_ci	.llseek = default_llseek,
508c2ecf20Sopenharmony_ci	.read	= memtrace_read,
518c2ecf20Sopenharmony_ci	.open	= simple_open,
528c2ecf20Sopenharmony_ci};
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistatic int check_memblock_online(struct memory_block *mem, void *arg)
558c2ecf20Sopenharmony_ci{
568c2ecf20Sopenharmony_ci	if (mem->state != MEM_ONLINE)
578c2ecf20Sopenharmony_ci		return -1;
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	return 0;
608c2ecf20Sopenharmony_ci}
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_cistatic int change_memblock_state(struct memory_block *mem, void *arg)
638c2ecf20Sopenharmony_ci{
648c2ecf20Sopenharmony_ci	unsigned long state = (unsigned long)arg;
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	mem->state = state;
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	return 0;
698c2ecf20Sopenharmony_ci}
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_cistatic void memtrace_clear_range(unsigned long start_pfn,
728c2ecf20Sopenharmony_ci				 unsigned long nr_pages)
738c2ecf20Sopenharmony_ci{
748c2ecf20Sopenharmony_ci	unsigned long pfn;
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	/*
778c2ecf20Sopenharmony_ci	 * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM
788c2ecf20Sopenharmony_ci	 * does not apply, avoid passing around "struct page" and use
798c2ecf20Sopenharmony_ci	 * clear_page() instead directly.
808c2ecf20Sopenharmony_ci	 */
818c2ecf20Sopenharmony_ci	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
828c2ecf20Sopenharmony_ci		if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
838c2ecf20Sopenharmony_ci			cond_resched();
848c2ecf20Sopenharmony_ci		clear_page(__va(PFN_PHYS(pfn)));
858c2ecf20Sopenharmony_ci	}
868c2ecf20Sopenharmony_ci}
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci/* called with device_hotplug_lock held */
898c2ecf20Sopenharmony_cistatic bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
908c2ecf20Sopenharmony_ci{
918c2ecf20Sopenharmony_ci	const unsigned long start = PFN_PHYS(start_pfn);
928c2ecf20Sopenharmony_ci	const unsigned long size = PFN_PHYS(nr_pages);
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	if (walk_memory_blocks(start, size, NULL, check_memblock_online))
958c2ecf20Sopenharmony_ci		return false;
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE,
988c2ecf20Sopenharmony_ci			   change_memblock_state);
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	if (offline_pages(start_pfn, nr_pages)) {
1018c2ecf20Sopenharmony_ci		walk_memory_blocks(start, size, (void *)MEM_ONLINE,
1028c2ecf20Sopenharmony_ci				   change_memblock_state);
1038c2ecf20Sopenharmony_ci		return false;
1048c2ecf20Sopenharmony_ci	}
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci	walk_memory_blocks(start, size, (void *)MEM_OFFLINE,
1078c2ecf20Sopenharmony_ci			   change_memblock_state);
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	return true;
1118c2ecf20Sopenharmony_ci}
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_cistatic u64 memtrace_alloc_node(u32 nid, u64 size)
1148c2ecf20Sopenharmony_ci{
1158c2ecf20Sopenharmony_ci	u64 start_pfn, end_pfn, nr_pages, pfn;
1168c2ecf20Sopenharmony_ci	u64 base_pfn;
1178c2ecf20Sopenharmony_ci	u64 bytes = memory_block_size_bytes();
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci	if (!node_spanned_pages(nid))
1208c2ecf20Sopenharmony_ci		return 0;
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci	start_pfn = node_start_pfn(nid);
1238c2ecf20Sopenharmony_ci	end_pfn = node_end_pfn(nid);
1248c2ecf20Sopenharmony_ci	nr_pages = size >> PAGE_SHIFT;
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	/* Trace memory needs to be aligned to the size */
1278c2ecf20Sopenharmony_ci	end_pfn = round_down(end_pfn - nr_pages, nr_pages);
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	lock_device_hotplug();
1308c2ecf20Sopenharmony_ci	for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
1318c2ecf20Sopenharmony_ci		if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
1328c2ecf20Sopenharmony_ci			/*
1338c2ecf20Sopenharmony_ci			 * Clear the range while we still have a linear
1348c2ecf20Sopenharmony_ci			 * mapping.
1358c2ecf20Sopenharmony_ci			 */
1368c2ecf20Sopenharmony_ci			memtrace_clear_range(base_pfn, nr_pages);
1378c2ecf20Sopenharmony_ci			/*
1388c2ecf20Sopenharmony_ci			 * Remove memory in memory block size chunks so that
1398c2ecf20Sopenharmony_ci			 * iomem resources are always split to the same size and
1408c2ecf20Sopenharmony_ci			 * we never try to remove memory that spans two iomem
1418c2ecf20Sopenharmony_ci			 * resources.
1428c2ecf20Sopenharmony_ci			 */
1438c2ecf20Sopenharmony_ci			end_pfn = base_pfn + nr_pages;
1448c2ecf20Sopenharmony_ci			for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
1458c2ecf20Sopenharmony_ci				__remove_memory(nid, pfn << PAGE_SHIFT, bytes);
1468c2ecf20Sopenharmony_ci			}
1478c2ecf20Sopenharmony_ci			unlock_device_hotplug();
1488c2ecf20Sopenharmony_ci			return base_pfn << PAGE_SHIFT;
1498c2ecf20Sopenharmony_ci		}
1508c2ecf20Sopenharmony_ci	}
1518c2ecf20Sopenharmony_ci	unlock_device_hotplug();
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	return 0;
1548c2ecf20Sopenharmony_ci}
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_cistatic int memtrace_init_regions_runtime(u64 size)
1578c2ecf20Sopenharmony_ci{
1588c2ecf20Sopenharmony_ci	u32 nid;
1598c2ecf20Sopenharmony_ci	u64 m;
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci	memtrace_array = kcalloc(num_online_nodes(),
1628c2ecf20Sopenharmony_ci				sizeof(struct memtrace_entry), GFP_KERNEL);
1638c2ecf20Sopenharmony_ci	if (!memtrace_array) {
1648c2ecf20Sopenharmony_ci		pr_err("Failed to allocate memtrace_array\n");
1658c2ecf20Sopenharmony_ci		return -EINVAL;
1668c2ecf20Sopenharmony_ci	}
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	for_each_online_node(nid) {
1698c2ecf20Sopenharmony_ci		m = memtrace_alloc_node(nid, size);
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci		/*
1728c2ecf20Sopenharmony_ci		 * A node might not have any local memory, so warn but
1738c2ecf20Sopenharmony_ci		 * continue on.
1748c2ecf20Sopenharmony_ci		 */
1758c2ecf20Sopenharmony_ci		if (!m) {
1768c2ecf20Sopenharmony_ci			pr_err("Failed to allocate trace memory on node %d\n", nid);
1778c2ecf20Sopenharmony_ci			continue;
1788c2ecf20Sopenharmony_ci		}
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci		pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci		memtrace_array[memtrace_array_nr].start = m;
1838c2ecf20Sopenharmony_ci		memtrace_array[memtrace_array_nr].size = size;
1848c2ecf20Sopenharmony_ci		memtrace_array[memtrace_array_nr].nid = nid;
1858c2ecf20Sopenharmony_ci		memtrace_array_nr++;
1868c2ecf20Sopenharmony_ci	}
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	return 0;
1898c2ecf20Sopenharmony_ci}
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_cistatic struct dentry *memtrace_debugfs_dir;
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_cistatic int memtrace_init_debugfs(void)
1948c2ecf20Sopenharmony_ci{
1958c2ecf20Sopenharmony_ci	int ret = 0;
1968c2ecf20Sopenharmony_ci	int i;
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	for (i = 0; i < memtrace_array_nr; i++) {
1998c2ecf20Sopenharmony_ci		struct dentry *dir;
2008c2ecf20Sopenharmony_ci		struct memtrace_entry *ent = &memtrace_array[i];
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci		ent->mem = ioremap(ent->start, ent->size);
2038c2ecf20Sopenharmony_ci		/* Warn but continue on */
2048c2ecf20Sopenharmony_ci		if (!ent->mem) {
2058c2ecf20Sopenharmony_ci			pr_err("Failed to map trace memory at 0x%llx\n",
2068c2ecf20Sopenharmony_ci				 ent->start);
2078c2ecf20Sopenharmony_ci			ret = -1;
2088c2ecf20Sopenharmony_ci			continue;
2098c2ecf20Sopenharmony_ci		}
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_ci		snprintf(ent->name, 16, "%08x", ent->nid);
2128c2ecf20Sopenharmony_ci		dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci		ent->dir = dir;
2158c2ecf20Sopenharmony_ci		debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
2168c2ecf20Sopenharmony_ci		debugfs_create_x64("start", 0400, dir, &ent->start);
2178c2ecf20Sopenharmony_ci		debugfs_create_x64("size", 0400, dir, &ent->size);
2188c2ecf20Sopenharmony_ci	}
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	return ret;
2218c2ecf20Sopenharmony_ci}
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_cistatic int online_mem_block(struct memory_block *mem, void *arg)
2248c2ecf20Sopenharmony_ci{
2258c2ecf20Sopenharmony_ci	return device_online(&mem->dev);
2268c2ecf20Sopenharmony_ci}
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci/*
2298c2ecf20Sopenharmony_ci * Iterate through the chunks of memory we have removed from the kernel
2308c2ecf20Sopenharmony_ci * and attempt to add them back to the kernel.
2318c2ecf20Sopenharmony_ci */
2328c2ecf20Sopenharmony_cistatic int memtrace_online(void)
2338c2ecf20Sopenharmony_ci{
2348c2ecf20Sopenharmony_ci	int i, ret = 0;
2358c2ecf20Sopenharmony_ci	struct memtrace_entry *ent;
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci	for (i = memtrace_array_nr - 1; i >= 0; i--) {
2388c2ecf20Sopenharmony_ci		ent = &memtrace_array[i];
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci		/* We have onlined this chunk previously */
2418c2ecf20Sopenharmony_ci		if (ent->nid == NUMA_NO_NODE)
2428c2ecf20Sopenharmony_ci			continue;
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci		/* Remove from io mappings */
2458c2ecf20Sopenharmony_ci		if (ent->mem) {
2468c2ecf20Sopenharmony_ci			iounmap(ent->mem);
2478c2ecf20Sopenharmony_ci			ent->mem = 0;
2488c2ecf20Sopenharmony_ci		}
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci		if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) {
2518c2ecf20Sopenharmony_ci			pr_err("Failed to add trace memory to node %d\n",
2528c2ecf20Sopenharmony_ci				ent->nid);
2538c2ecf20Sopenharmony_ci			ret += 1;
2548c2ecf20Sopenharmony_ci			continue;
2558c2ecf20Sopenharmony_ci		}
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci		lock_device_hotplug();
2588c2ecf20Sopenharmony_ci		walk_memory_blocks(ent->start, ent->size, NULL,
2598c2ecf20Sopenharmony_ci				   online_mem_block);
2608c2ecf20Sopenharmony_ci		unlock_device_hotplug();
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci		/*
2638c2ecf20Sopenharmony_ci		 * Memory was added successfully so clean up references to it
2648c2ecf20Sopenharmony_ci		 * so on reentry we can tell that this chunk was added.
2658c2ecf20Sopenharmony_ci		 */
2668c2ecf20Sopenharmony_ci		debugfs_remove_recursive(ent->dir);
2678c2ecf20Sopenharmony_ci		pr_info("Added trace memory back to node %d\n", ent->nid);
2688c2ecf20Sopenharmony_ci		ent->size = ent->start = ent->nid = NUMA_NO_NODE;
2698c2ecf20Sopenharmony_ci	}
2708c2ecf20Sopenharmony_ci	if (ret)
2718c2ecf20Sopenharmony_ci		return ret;
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	/* If all chunks of memory were added successfully, reset globals */
2748c2ecf20Sopenharmony_ci	kfree(memtrace_array);
2758c2ecf20Sopenharmony_ci	memtrace_array = NULL;
2768c2ecf20Sopenharmony_ci	memtrace_size = 0;
2778c2ecf20Sopenharmony_ci	memtrace_array_nr = 0;
2788c2ecf20Sopenharmony_ci	return 0;
2798c2ecf20Sopenharmony_ci}
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_cistatic int memtrace_enable_set(void *data, u64 val)
2828c2ecf20Sopenharmony_ci{
2838c2ecf20Sopenharmony_ci	int rc = -EAGAIN;
2848c2ecf20Sopenharmony_ci	u64 bytes;
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci	/*
2878c2ecf20Sopenharmony_ci	 * Don't attempt to do anything if size isn't aligned to a memory
2888c2ecf20Sopenharmony_ci	 * block or equal to zero.
2898c2ecf20Sopenharmony_ci	 */
2908c2ecf20Sopenharmony_ci	bytes = memory_block_size_bytes();
2918c2ecf20Sopenharmony_ci	if (val & (bytes - 1)) {
2928c2ecf20Sopenharmony_ci		pr_err("Value must be aligned with 0x%llx\n", bytes);
2938c2ecf20Sopenharmony_ci		return -EINVAL;
2948c2ecf20Sopenharmony_ci	}
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci	mutex_lock(&memtrace_mutex);
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	/* Re-add/online previously removed/offlined memory */
2998c2ecf20Sopenharmony_ci	if (memtrace_size) {
3008c2ecf20Sopenharmony_ci		if (memtrace_online())
3018c2ecf20Sopenharmony_ci			goto out_unlock;
3028c2ecf20Sopenharmony_ci	}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	if (!val) {
3058c2ecf20Sopenharmony_ci		rc = 0;
3068c2ecf20Sopenharmony_ci		goto out_unlock;
3078c2ecf20Sopenharmony_ci	}
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	/* Offline and remove memory */
3108c2ecf20Sopenharmony_ci	if (memtrace_init_regions_runtime(val))
3118c2ecf20Sopenharmony_ci		goto out_unlock;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	if (memtrace_init_debugfs())
3148c2ecf20Sopenharmony_ci		goto out_unlock;
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	memtrace_size = val;
3178c2ecf20Sopenharmony_ci	rc = 0;
3188c2ecf20Sopenharmony_ciout_unlock:
3198c2ecf20Sopenharmony_ci	mutex_unlock(&memtrace_mutex);
3208c2ecf20Sopenharmony_ci	return rc;
3218c2ecf20Sopenharmony_ci}
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_cistatic int memtrace_enable_get(void *data, u64 *val)
3248c2ecf20Sopenharmony_ci{
3258c2ecf20Sopenharmony_ci	*val = memtrace_size;
3268c2ecf20Sopenharmony_ci	return 0;
3278c2ecf20Sopenharmony_ci}
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ciDEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
3308c2ecf20Sopenharmony_ci					memtrace_enable_set, "0x%016llx\n");
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_cistatic int memtrace_init(void)
3338c2ecf20Sopenharmony_ci{
3348c2ecf20Sopenharmony_ci	memtrace_debugfs_dir = debugfs_create_dir("memtrace",
3358c2ecf20Sopenharmony_ci						  powerpc_debugfs_root);
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ci	debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
3388c2ecf20Sopenharmony_ci			    NULL, &memtrace_init_fops);
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci	return 0;
3418c2ecf20Sopenharmony_ci}
3428c2ecf20Sopenharmony_cimachine_device_initcall(powernv, memtrace_init);
343