18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) IBM Corporation, 2014, 2017 48c2ecf20Sopenharmony_ci * Anton Blanchard, Rashmica Gupta. 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci#define pr_fmt(fmt) "memtrace: " fmt 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#include <linux/bitops.h> 108c2ecf20Sopenharmony_ci#include <linux/string.h> 118c2ecf20Sopenharmony_ci#include <linux/memblock.h> 128c2ecf20Sopenharmony_ci#include <linux/init.h> 138c2ecf20Sopenharmony_ci#include <linux/moduleparam.h> 148c2ecf20Sopenharmony_ci#include <linux/fs.h> 158c2ecf20Sopenharmony_ci#include <linux/debugfs.h> 168c2ecf20Sopenharmony_ci#include <linux/slab.h> 178c2ecf20Sopenharmony_ci#include <linux/memory.h> 188c2ecf20Sopenharmony_ci#include <linux/memory_hotplug.h> 198c2ecf20Sopenharmony_ci#include <linux/numa.h> 208c2ecf20Sopenharmony_ci#include <asm/machdep.h> 218c2ecf20Sopenharmony_ci#include <asm/debugfs.h> 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ci/* This enables us to keep track of the memory removed from each node. */ 248c2ecf20Sopenharmony_cistruct memtrace_entry { 258c2ecf20Sopenharmony_ci void *mem; 268c2ecf20Sopenharmony_ci u64 start; 278c2ecf20Sopenharmony_ci u64 size; 288c2ecf20Sopenharmony_ci u32 nid; 298c2ecf20Sopenharmony_ci struct dentry *dir; 308c2ecf20Sopenharmony_ci char name[16]; 318c2ecf20Sopenharmony_ci}; 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(memtrace_mutex); 348c2ecf20Sopenharmony_cistatic u64 memtrace_size; 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_cistatic struct memtrace_entry *memtrace_array; 378c2ecf20Sopenharmony_cistatic unsigned int memtrace_array_nr; 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_cistatic ssize_t memtrace_read(struct file *filp, char __user *ubuf, 418c2ecf20Sopenharmony_ci size_t count, loff_t *ppos) 428c2ecf20Sopenharmony_ci{ 438c2ecf20Sopenharmony_ci struct memtrace_entry *ent = filp->private_data; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); 468c2ecf20Sopenharmony_ci} 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_cistatic const struct file_operations memtrace_fops = { 498c2ecf20Sopenharmony_ci .llseek = default_llseek, 508c2ecf20Sopenharmony_ci .read = memtrace_read, 518c2ecf20Sopenharmony_ci .open = simple_open, 528c2ecf20Sopenharmony_ci}; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_cistatic int check_memblock_online(struct memory_block *mem, void *arg) 558c2ecf20Sopenharmony_ci{ 568c2ecf20Sopenharmony_ci if (mem->state != MEM_ONLINE) 578c2ecf20Sopenharmony_ci return -1; 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci return 0; 608c2ecf20Sopenharmony_ci} 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_cistatic int change_memblock_state(struct memory_block *mem, void *arg) 638c2ecf20Sopenharmony_ci{ 648c2ecf20Sopenharmony_ci unsigned long state = (unsigned long)arg; 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci mem->state = state; 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci return 0; 698c2ecf20Sopenharmony_ci} 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_cistatic void memtrace_clear_range(unsigned long start_pfn, 728c2ecf20Sopenharmony_ci unsigned long nr_pages) 738c2ecf20Sopenharmony_ci{ 748c2ecf20Sopenharmony_ci unsigned long pfn; 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci /* 778c2ecf20Sopenharmony_ci * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM 788c2ecf20Sopenharmony_ci * does not apply, avoid passing around "struct page" and use 798c2ecf20Sopenharmony_ci * clear_page() instead directly. 808c2ecf20Sopenharmony_ci */ 818c2ecf20Sopenharmony_ci for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { 828c2ecf20Sopenharmony_ci if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) 838c2ecf20Sopenharmony_ci cond_resched(); 848c2ecf20Sopenharmony_ci clear_page(__va(PFN_PHYS(pfn))); 858c2ecf20Sopenharmony_ci } 868c2ecf20Sopenharmony_ci} 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci/* called with device_hotplug_lock held */ 898c2ecf20Sopenharmony_cistatic bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) 908c2ecf20Sopenharmony_ci{ 918c2ecf20Sopenharmony_ci const unsigned long start = PFN_PHYS(start_pfn); 928c2ecf20Sopenharmony_ci const unsigned long size = PFN_PHYS(nr_pages); 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci if (walk_memory_blocks(start, size, NULL, check_memblock_online)) 958c2ecf20Sopenharmony_ci return false; 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE, 988c2ecf20Sopenharmony_ci change_memblock_state); 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci if (offline_pages(start_pfn, nr_pages)) { 1018c2ecf20Sopenharmony_ci walk_memory_blocks(start, size, (void *)MEM_ONLINE, 1028c2ecf20Sopenharmony_ci change_memblock_state); 1038c2ecf20Sopenharmony_ci return false; 1048c2ecf20Sopenharmony_ci } 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci walk_memory_blocks(start, size, (void *)MEM_OFFLINE, 1078c2ecf20Sopenharmony_ci change_memblock_state); 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci return true; 1118c2ecf20Sopenharmony_ci} 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_cistatic u64 memtrace_alloc_node(u32 nid, u64 size) 1148c2ecf20Sopenharmony_ci{ 1158c2ecf20Sopenharmony_ci u64 start_pfn, end_pfn, nr_pages, pfn; 1168c2ecf20Sopenharmony_ci u64 base_pfn; 1178c2ecf20Sopenharmony_ci u64 bytes = memory_block_size_bytes(); 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci if (!node_spanned_pages(nid)) 1208c2ecf20Sopenharmony_ci return 0; 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci start_pfn = node_start_pfn(nid); 1238c2ecf20Sopenharmony_ci end_pfn = node_end_pfn(nid); 1248c2ecf20Sopenharmony_ci nr_pages = size >> PAGE_SHIFT; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci /* Trace memory needs to be aligned to the size */ 1278c2ecf20Sopenharmony_ci end_pfn = round_down(end_pfn - nr_pages, nr_pages); 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci lock_device_hotplug(); 1308c2ecf20Sopenharmony_ci for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { 1318c2ecf20Sopenharmony_ci if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { 1328c2ecf20Sopenharmony_ci /* 1338c2ecf20Sopenharmony_ci * Clear the range while we still have a linear 1348c2ecf20Sopenharmony_ci * mapping. 1358c2ecf20Sopenharmony_ci */ 1368c2ecf20Sopenharmony_ci memtrace_clear_range(base_pfn, nr_pages); 1378c2ecf20Sopenharmony_ci /* 1388c2ecf20Sopenharmony_ci * Remove memory in memory block size chunks so that 1398c2ecf20Sopenharmony_ci * iomem resources are always split to the same size and 1408c2ecf20Sopenharmony_ci * we never try to remove memory that spans two iomem 1418c2ecf20Sopenharmony_ci * resources. 1428c2ecf20Sopenharmony_ci */ 1438c2ecf20Sopenharmony_ci end_pfn = base_pfn + nr_pages; 1448c2ecf20Sopenharmony_ci for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { 1458c2ecf20Sopenharmony_ci __remove_memory(nid, pfn << PAGE_SHIFT, bytes); 1468c2ecf20Sopenharmony_ci } 1478c2ecf20Sopenharmony_ci unlock_device_hotplug(); 1488c2ecf20Sopenharmony_ci return base_pfn << PAGE_SHIFT; 1498c2ecf20Sopenharmony_ci } 1508c2ecf20Sopenharmony_ci } 1518c2ecf20Sopenharmony_ci unlock_device_hotplug(); 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci return 0; 1548c2ecf20Sopenharmony_ci} 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_cistatic int memtrace_init_regions_runtime(u64 size) 1578c2ecf20Sopenharmony_ci{ 1588c2ecf20Sopenharmony_ci u32 nid; 1598c2ecf20Sopenharmony_ci u64 m; 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci memtrace_array = kcalloc(num_online_nodes(), 1628c2ecf20Sopenharmony_ci sizeof(struct memtrace_entry), GFP_KERNEL); 1638c2ecf20Sopenharmony_ci if (!memtrace_array) { 1648c2ecf20Sopenharmony_ci pr_err("Failed to allocate memtrace_array\n"); 1658c2ecf20Sopenharmony_ci return -EINVAL; 1668c2ecf20Sopenharmony_ci } 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci for_each_online_node(nid) { 1698c2ecf20Sopenharmony_ci m = memtrace_alloc_node(nid, size); 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci /* 1728c2ecf20Sopenharmony_ci * A node might not have any local memory, so warn but 1738c2ecf20Sopenharmony_ci * continue on. 1748c2ecf20Sopenharmony_ci */ 1758c2ecf20Sopenharmony_ci if (!m) { 1768c2ecf20Sopenharmony_ci pr_err("Failed to allocate trace memory on node %d\n", nid); 1778c2ecf20Sopenharmony_ci continue; 1788c2ecf20Sopenharmony_ci } 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m); 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci memtrace_array[memtrace_array_nr].start = m; 1838c2ecf20Sopenharmony_ci memtrace_array[memtrace_array_nr].size = size; 1848c2ecf20Sopenharmony_ci memtrace_array[memtrace_array_nr].nid = nid; 1858c2ecf20Sopenharmony_ci memtrace_array_nr++; 1868c2ecf20Sopenharmony_ci } 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci return 0; 1898c2ecf20Sopenharmony_ci} 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_cistatic struct dentry *memtrace_debugfs_dir; 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_cistatic int memtrace_init_debugfs(void) 1948c2ecf20Sopenharmony_ci{ 1958c2ecf20Sopenharmony_ci int ret = 0; 1968c2ecf20Sopenharmony_ci int i; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci for (i = 0; i < memtrace_array_nr; i++) { 1998c2ecf20Sopenharmony_ci struct dentry *dir; 2008c2ecf20Sopenharmony_ci struct memtrace_entry *ent = &memtrace_array[i]; 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci ent->mem = ioremap(ent->start, ent->size); 2038c2ecf20Sopenharmony_ci /* Warn but continue on */ 2048c2ecf20Sopenharmony_ci if (!ent->mem) { 2058c2ecf20Sopenharmony_ci pr_err("Failed to map trace memory at 0x%llx\n", 2068c2ecf20Sopenharmony_ci ent->start); 2078c2ecf20Sopenharmony_ci ret = -1; 2088c2ecf20Sopenharmony_ci continue; 2098c2ecf20Sopenharmony_ci } 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci snprintf(ent->name, 16, "%08x", ent->nid); 2128c2ecf20Sopenharmony_ci dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci ent->dir = dir; 2158c2ecf20Sopenharmony_ci debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); 2168c2ecf20Sopenharmony_ci debugfs_create_x64("start", 0400, dir, &ent->start); 2178c2ecf20Sopenharmony_ci debugfs_create_x64("size", 0400, dir, &ent->size); 2188c2ecf20Sopenharmony_ci } 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci return ret; 2218c2ecf20Sopenharmony_ci} 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_cistatic int online_mem_block(struct memory_block *mem, void *arg) 2248c2ecf20Sopenharmony_ci{ 2258c2ecf20Sopenharmony_ci return device_online(&mem->dev); 2268c2ecf20Sopenharmony_ci} 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci/* 2298c2ecf20Sopenharmony_ci * Iterate through the chunks of memory we have removed from the kernel 2308c2ecf20Sopenharmony_ci * and attempt to add them back to the kernel. 2318c2ecf20Sopenharmony_ci */ 2328c2ecf20Sopenharmony_cistatic int memtrace_online(void) 2338c2ecf20Sopenharmony_ci{ 2348c2ecf20Sopenharmony_ci int i, ret = 0; 2358c2ecf20Sopenharmony_ci struct memtrace_entry *ent; 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci for (i = memtrace_array_nr - 1; i >= 0; i--) { 2388c2ecf20Sopenharmony_ci ent = &memtrace_array[i]; 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci /* We have onlined this chunk previously */ 2418c2ecf20Sopenharmony_ci if (ent->nid == NUMA_NO_NODE) 2428c2ecf20Sopenharmony_ci continue; 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci /* Remove from io mappings */ 2458c2ecf20Sopenharmony_ci if (ent->mem) { 2468c2ecf20Sopenharmony_ci iounmap(ent->mem); 2478c2ecf20Sopenharmony_ci ent->mem = 0; 2488c2ecf20Sopenharmony_ci } 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) { 2518c2ecf20Sopenharmony_ci pr_err("Failed to add trace memory to node %d\n", 2528c2ecf20Sopenharmony_ci ent->nid); 2538c2ecf20Sopenharmony_ci ret += 1; 2548c2ecf20Sopenharmony_ci continue; 2558c2ecf20Sopenharmony_ci } 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci lock_device_hotplug(); 2588c2ecf20Sopenharmony_ci walk_memory_blocks(ent->start, ent->size, NULL, 2598c2ecf20Sopenharmony_ci online_mem_block); 2608c2ecf20Sopenharmony_ci unlock_device_hotplug(); 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci /* 2638c2ecf20Sopenharmony_ci * Memory was added successfully so clean up references to it 2648c2ecf20Sopenharmony_ci * so on reentry we can tell that this chunk was added. 2658c2ecf20Sopenharmony_ci */ 2668c2ecf20Sopenharmony_ci debugfs_remove_recursive(ent->dir); 2678c2ecf20Sopenharmony_ci pr_info("Added trace memory back to node %d\n", ent->nid); 2688c2ecf20Sopenharmony_ci ent->size = ent->start = ent->nid = NUMA_NO_NODE; 2698c2ecf20Sopenharmony_ci } 2708c2ecf20Sopenharmony_ci if (ret) 2718c2ecf20Sopenharmony_ci return ret; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci /* If all chunks of memory were added successfully, reset globals */ 2748c2ecf20Sopenharmony_ci kfree(memtrace_array); 2758c2ecf20Sopenharmony_ci memtrace_array = NULL; 2768c2ecf20Sopenharmony_ci memtrace_size = 0; 2778c2ecf20Sopenharmony_ci memtrace_array_nr = 0; 2788c2ecf20Sopenharmony_ci return 0; 2798c2ecf20Sopenharmony_ci} 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_cistatic int memtrace_enable_set(void *data, u64 val) 2828c2ecf20Sopenharmony_ci{ 2838c2ecf20Sopenharmony_ci int rc = -EAGAIN; 2848c2ecf20Sopenharmony_ci u64 bytes; 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci /* 2878c2ecf20Sopenharmony_ci * Don't attempt to do anything if size isn't aligned to a memory 2888c2ecf20Sopenharmony_ci * block or equal to zero. 2898c2ecf20Sopenharmony_ci */ 2908c2ecf20Sopenharmony_ci bytes = memory_block_size_bytes(); 2918c2ecf20Sopenharmony_ci if (val & (bytes - 1)) { 2928c2ecf20Sopenharmony_ci pr_err("Value must be aligned with 0x%llx\n", bytes); 2938c2ecf20Sopenharmony_ci return -EINVAL; 2948c2ecf20Sopenharmony_ci } 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci mutex_lock(&memtrace_mutex); 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci /* Re-add/online previously removed/offlined memory */ 2998c2ecf20Sopenharmony_ci if (memtrace_size) { 3008c2ecf20Sopenharmony_ci if (memtrace_online()) 3018c2ecf20Sopenharmony_ci goto out_unlock; 3028c2ecf20Sopenharmony_ci } 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci if (!val) { 3058c2ecf20Sopenharmony_ci rc = 0; 3068c2ecf20Sopenharmony_ci goto out_unlock; 3078c2ecf20Sopenharmony_ci } 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci /* Offline and remove memory */ 3108c2ecf20Sopenharmony_ci if (memtrace_init_regions_runtime(val)) 3118c2ecf20Sopenharmony_ci goto out_unlock; 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci if (memtrace_init_debugfs()) 3148c2ecf20Sopenharmony_ci goto out_unlock; 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci memtrace_size = val; 3178c2ecf20Sopenharmony_ci rc = 0; 3188c2ecf20Sopenharmony_ciout_unlock: 3198c2ecf20Sopenharmony_ci mutex_unlock(&memtrace_mutex); 3208c2ecf20Sopenharmony_ci return rc; 3218c2ecf20Sopenharmony_ci} 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_cistatic int memtrace_enable_get(void *data, u64 *val) 3248c2ecf20Sopenharmony_ci{ 3258c2ecf20Sopenharmony_ci *val = memtrace_size; 3268c2ecf20Sopenharmony_ci return 0; 3278c2ecf20Sopenharmony_ci} 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ciDEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get, 3308c2ecf20Sopenharmony_ci memtrace_enable_set, "0x%016llx\n"); 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_cistatic int memtrace_init(void) 3338c2ecf20Sopenharmony_ci{ 3348c2ecf20Sopenharmony_ci memtrace_debugfs_dir = debugfs_create_dir("memtrace", 3358c2ecf20Sopenharmony_ci powerpc_debugfs_root); 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci debugfs_create_file("enable", 0600, memtrace_debugfs_dir, 3388c2ecf20Sopenharmony_ci NULL, &memtrace_init_fops); 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci return 0; 3418c2ecf20Sopenharmony_ci} 3428c2ecf20Sopenharmony_cimachine_device_initcall(powernv, memtrace_init); 343