162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Memory subsystem support 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 662306a36Sopenharmony_ci * Dave Hansen <haveblue@us.ibm.com> 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * This file provides the necessary infrastructure to represent 962306a36Sopenharmony_ci * a SPARSEMEM-memory-model system's physical memory in /sysfs. 1062306a36Sopenharmony_ci * All arch-independent code that assumes MEMORY_HOTPLUG requires 1162306a36Sopenharmony_ci * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 1262306a36Sopenharmony_ci */ 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/module.h> 1562306a36Sopenharmony_ci#include <linux/init.h> 1662306a36Sopenharmony_ci#include <linux/topology.h> 1762306a36Sopenharmony_ci#include <linux/capability.h> 1862306a36Sopenharmony_ci#include <linux/device.h> 1962306a36Sopenharmony_ci#include <linux/memory.h> 2062306a36Sopenharmony_ci#include <linux/memory_hotplug.h> 2162306a36Sopenharmony_ci#include <linux/mm.h> 2262306a36Sopenharmony_ci#include <linux/stat.h> 2362306a36Sopenharmony_ci#include <linux/slab.h> 2462306a36Sopenharmony_ci#include <linux/xarray.h> 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#include <linux/atomic.h> 2762306a36Sopenharmony_ci#include <linux/uaccess.h> 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci#define MEMORY_CLASS_NAME "memory" 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_cistatic const char *const online_type_to_str[] = { 3262306a36Sopenharmony_ci [MMOP_OFFLINE] = "offline", 3362306a36Sopenharmony_ci [MMOP_ONLINE] = "online", 3462306a36Sopenharmony_ci [MMOP_ONLINE_KERNEL] = "online_kernel", 3562306a36Sopenharmony_ci [MMOP_ONLINE_MOVABLE] = "online_movable", 3662306a36Sopenharmony_ci}; 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ciint mhp_online_type_from_str(const char *str) 3962306a36Sopenharmony_ci{ 4062306a36Sopenharmony_ci int i; 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) { 4362306a36Sopenharmony_ci if (sysfs_streq(str, online_type_to_str[i])) 4462306a36Sopenharmony_ci return i; 4562306a36Sopenharmony_ci } 4662306a36Sopenharmony_ci return -EINVAL; 4762306a36Sopenharmony_ci} 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci#define to_memory_block(dev) container_of(dev, struct memory_block, dev) 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_cistatic int sections_per_block; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic inline unsigned long memory_block_id(unsigned long section_nr) 5462306a36Sopenharmony_ci{ 5562306a36Sopenharmony_ci return section_nr / sections_per_block; 5662306a36Sopenharmony_ci} 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_cistatic inline unsigned long pfn_to_block_id(unsigned long pfn) 5962306a36Sopenharmony_ci{ 6062306a36Sopenharmony_ci return memory_block_id(pfn_to_section_nr(pfn)); 6162306a36Sopenharmony_ci} 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_cistatic inline unsigned long phys_to_block_id(unsigned long phys) 6462306a36Sopenharmony_ci{ 6562306a36Sopenharmony_ci return pfn_to_block_id(PFN_DOWN(phys)); 6662306a36Sopenharmony_ci} 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_cistatic int memory_subsys_online(struct device *dev); 6962306a36Sopenharmony_cistatic int memory_subsys_offline(struct device *dev); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_cistatic struct bus_type memory_subsys = { 7262306a36Sopenharmony_ci .name = MEMORY_CLASS_NAME, 7362306a36Sopenharmony_ci .dev_name = MEMORY_CLASS_NAME, 7462306a36Sopenharmony_ci .online = memory_subsys_online, 7562306a36Sopenharmony_ci .offline = memory_subsys_offline, 7662306a36Sopenharmony_ci}; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci/* 7962306a36Sopenharmony_ci * Memory blocks are cached in a local radix tree to avoid 8062306a36Sopenharmony_ci * a costly linear search for the corresponding device on 8162306a36Sopenharmony_ci * the subsystem bus. 8262306a36Sopenharmony_ci */ 8362306a36Sopenharmony_cistatic DEFINE_XARRAY(memory_blocks); 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci/* 8662306a36Sopenharmony_ci * Memory groups, indexed by memory group id (mgid). 8762306a36Sopenharmony_ci */ 8862306a36Sopenharmony_cistatic DEFINE_XARRAY_FLAGS(memory_groups, XA_FLAGS_ALLOC); 8962306a36Sopenharmony_ci#define MEMORY_GROUP_MARK_DYNAMIC XA_MARK_1 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_cistatic BLOCKING_NOTIFIER_HEAD(memory_chain); 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ciint register_memory_notifier(struct notifier_block *nb) 9462306a36Sopenharmony_ci{ 9562306a36Sopenharmony_ci return blocking_notifier_chain_register(&memory_chain, nb); 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ciEXPORT_SYMBOL(register_memory_notifier); 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_civoid unregister_memory_notifier(struct notifier_block *nb) 10062306a36Sopenharmony_ci{ 10162306a36Sopenharmony_ci blocking_notifier_chain_unregister(&memory_chain, nb); 10262306a36Sopenharmony_ci} 10362306a36Sopenharmony_ciEXPORT_SYMBOL(unregister_memory_notifier); 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_cistatic void memory_block_release(struct device *dev) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci struct memory_block *mem = to_memory_block(dev); 10862306a36Sopenharmony_ci /* Verify that the altmap is freed */ 10962306a36Sopenharmony_ci WARN_ON(mem->altmap); 11062306a36Sopenharmony_ci kfree(mem); 11162306a36Sopenharmony_ci} 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ciunsigned long __weak memory_block_size_bytes(void) 11462306a36Sopenharmony_ci{ 11562306a36Sopenharmony_ci return MIN_MEMORY_BLOCK_SIZE; 11662306a36Sopenharmony_ci} 11762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(memory_block_size_bytes); 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci/* Show the memory block ID, relative to the memory block size */ 12062306a36Sopenharmony_cistatic ssize_t phys_index_show(struct device *dev, 12162306a36Sopenharmony_ci struct device_attribute *attr, char *buf) 12262306a36Sopenharmony_ci{ 12362306a36Sopenharmony_ci struct memory_block *mem = to_memory_block(dev); 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci return sysfs_emit(buf, "%08lx\n", memory_block_id(mem->start_section_nr)); 12662306a36Sopenharmony_ci} 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci/* 12962306a36Sopenharmony_ci * Legacy interface that we cannot remove. Always indicate "removable" 13062306a36Sopenharmony_ci * with CONFIG_MEMORY_HOTREMOVE - bad heuristic. 13162306a36Sopenharmony_ci */ 13262306a36Sopenharmony_cistatic ssize_t removable_show(struct device *dev, struct device_attribute *attr, 13362306a36Sopenharmony_ci char *buf) 13462306a36Sopenharmony_ci{ 13562306a36Sopenharmony_ci return sysfs_emit(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)); 13662306a36Sopenharmony_ci} 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci/* 13962306a36Sopenharmony_ci * online, offline, going offline, etc. 14062306a36Sopenharmony_ci */ 14162306a36Sopenharmony_cistatic ssize_t state_show(struct device *dev, struct device_attribute *attr, 14262306a36Sopenharmony_ci char *buf) 14362306a36Sopenharmony_ci{ 14462306a36Sopenharmony_ci struct memory_block *mem = to_memory_block(dev); 14562306a36Sopenharmony_ci const char *output; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci /* 14862306a36Sopenharmony_ci * We can probably put these states in a nice little array 14962306a36Sopenharmony_ci * so that they're not open-coded 15062306a36Sopenharmony_ci */ 15162306a36Sopenharmony_ci switch (mem->state) { 15262306a36Sopenharmony_ci case MEM_ONLINE: 15362306a36Sopenharmony_ci output = "online"; 15462306a36Sopenharmony_ci break; 15562306a36Sopenharmony_ci case MEM_OFFLINE: 15662306a36Sopenharmony_ci output = "offline"; 15762306a36Sopenharmony_ci break; 15862306a36Sopenharmony_ci case MEM_GOING_OFFLINE: 15962306a36Sopenharmony_ci output = "going-offline"; 16062306a36Sopenharmony_ci break; 16162306a36Sopenharmony_ci default: 16262306a36Sopenharmony_ci WARN_ON(1); 16362306a36Sopenharmony_ci return sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", mem->state); 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci return sysfs_emit(buf, "%s\n", output); 16762306a36Sopenharmony_ci} 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ciint memory_notify(unsigned long val, void *v) 17062306a36Sopenharmony_ci{ 17162306a36Sopenharmony_ci return blocking_notifier_call_chain(&memory_chain, val, v); 17262306a36Sopenharmony_ci} 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) 17562306a36Sopenharmony_cistatic unsigned long memblk_nr_poison(struct memory_block *mem); 17662306a36Sopenharmony_ci#else 17762306a36Sopenharmony_cistatic inline unsigned long memblk_nr_poison(struct memory_block *mem) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci return 0; 18062306a36Sopenharmony_ci} 18162306a36Sopenharmony_ci#endif 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci/* 18462306a36Sopenharmony_ci * Must acquire mem_hotplug_lock in write mode. 18562306a36Sopenharmony_ci */ 18662306a36Sopenharmony_cistatic int memory_block_online(struct memory_block *mem) 18762306a36Sopenharmony_ci{ 18862306a36Sopenharmony_ci unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 18962306a36Sopenharmony_ci unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 19062306a36Sopenharmony_ci unsigned long nr_vmemmap_pages = 0; 19162306a36Sopenharmony_ci struct zone *zone; 19262306a36Sopenharmony_ci int ret; 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci if (memblk_nr_poison(mem)) 19562306a36Sopenharmony_ci return -EHWPOISON; 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group, 19862306a36Sopenharmony_ci start_pfn, nr_pages); 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci /* 20162306a36Sopenharmony_ci * Although vmemmap pages have a different lifecycle than the pages 20262306a36Sopenharmony_ci * they describe (they remain until the memory is unplugged), doing 20362306a36Sopenharmony_ci * their initialization and accounting at memory onlining/offlining 20462306a36Sopenharmony_ci * stage helps to keep accounting easier to follow - e.g vmemmaps 20562306a36Sopenharmony_ci * belong to the same zone as the memory they backed. 20662306a36Sopenharmony_ci */ 20762306a36Sopenharmony_ci if (mem->altmap) 20862306a36Sopenharmony_ci nr_vmemmap_pages = mem->altmap->free; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci mem_hotplug_begin(); 21162306a36Sopenharmony_ci if (nr_vmemmap_pages) { 21262306a36Sopenharmony_ci ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone); 21362306a36Sopenharmony_ci if (ret) 21462306a36Sopenharmony_ci goto out; 21562306a36Sopenharmony_ci } 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci ret = online_pages(start_pfn + nr_vmemmap_pages, 21862306a36Sopenharmony_ci nr_pages - nr_vmemmap_pages, zone, mem->group); 21962306a36Sopenharmony_ci if (ret) { 22062306a36Sopenharmony_ci if (nr_vmemmap_pages) 22162306a36Sopenharmony_ci mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); 22262306a36Sopenharmony_ci goto out; 22362306a36Sopenharmony_ci } 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci /* 22662306a36Sopenharmony_ci * Account once onlining succeeded. If the zone was unpopulated, it is 22762306a36Sopenharmony_ci * now already properly populated. 22862306a36Sopenharmony_ci */ 22962306a36Sopenharmony_ci if (nr_vmemmap_pages) 23062306a36Sopenharmony_ci adjust_present_page_count(pfn_to_page(start_pfn), mem->group, 23162306a36Sopenharmony_ci nr_vmemmap_pages); 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci mem->zone = zone; 23462306a36Sopenharmony_ciout: 23562306a36Sopenharmony_ci mem_hotplug_done(); 23662306a36Sopenharmony_ci return ret; 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci/* 24062306a36Sopenharmony_ci * Must acquire mem_hotplug_lock in write mode. 24162306a36Sopenharmony_ci */ 24262306a36Sopenharmony_cistatic int memory_block_offline(struct memory_block *mem) 24362306a36Sopenharmony_ci{ 24462306a36Sopenharmony_ci unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 24562306a36Sopenharmony_ci unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 24662306a36Sopenharmony_ci unsigned long nr_vmemmap_pages = 0; 24762306a36Sopenharmony_ci int ret; 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci if (!mem->zone) 25062306a36Sopenharmony_ci return -EINVAL; 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci /* 25362306a36Sopenharmony_ci * Unaccount before offlining, such that unpopulated zone and kthreads 25462306a36Sopenharmony_ci * can properly be torn down in offline_pages(). 25562306a36Sopenharmony_ci */ 25662306a36Sopenharmony_ci if (mem->altmap) 25762306a36Sopenharmony_ci nr_vmemmap_pages = mem->altmap->free; 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci mem_hotplug_begin(); 26062306a36Sopenharmony_ci if (nr_vmemmap_pages) 26162306a36Sopenharmony_ci adjust_present_page_count(pfn_to_page(start_pfn), mem->group, 26262306a36Sopenharmony_ci -nr_vmemmap_pages); 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci ret = offline_pages(start_pfn + nr_vmemmap_pages, 26562306a36Sopenharmony_ci nr_pages - nr_vmemmap_pages, mem->zone, mem->group); 26662306a36Sopenharmony_ci if (ret) { 26762306a36Sopenharmony_ci /* offline_pages() failed. Account back. */ 26862306a36Sopenharmony_ci if (nr_vmemmap_pages) 26962306a36Sopenharmony_ci adjust_present_page_count(pfn_to_page(start_pfn), 27062306a36Sopenharmony_ci mem->group, nr_vmemmap_pages); 27162306a36Sopenharmony_ci goto out; 27262306a36Sopenharmony_ci } 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci if (nr_vmemmap_pages) 27562306a36Sopenharmony_ci mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci mem->zone = NULL; 27862306a36Sopenharmony_ciout: 27962306a36Sopenharmony_ci mem_hotplug_done(); 28062306a36Sopenharmony_ci return ret; 28162306a36Sopenharmony_ci} 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci/* 28462306a36Sopenharmony_ci * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 28562306a36Sopenharmony_ci * OK to have direct references to sparsemem variables in here. 28662306a36Sopenharmony_ci */ 28762306a36Sopenharmony_cistatic int 28862306a36Sopenharmony_cimemory_block_action(struct memory_block *mem, unsigned long action) 28962306a36Sopenharmony_ci{ 29062306a36Sopenharmony_ci int ret; 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci switch (action) { 29362306a36Sopenharmony_ci case MEM_ONLINE: 29462306a36Sopenharmony_ci ret = memory_block_online(mem); 29562306a36Sopenharmony_ci break; 29662306a36Sopenharmony_ci case MEM_OFFLINE: 29762306a36Sopenharmony_ci ret = memory_block_offline(mem); 29862306a36Sopenharmony_ci break; 29962306a36Sopenharmony_ci default: 30062306a36Sopenharmony_ci WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " 30162306a36Sopenharmony_ci "%ld\n", __func__, mem->start_section_nr, action, action); 30262306a36Sopenharmony_ci ret = -EINVAL; 30362306a36Sopenharmony_ci } 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci return ret; 30662306a36Sopenharmony_ci} 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_cistatic int memory_block_change_state(struct memory_block *mem, 30962306a36Sopenharmony_ci unsigned long to_state, unsigned long from_state_req) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci int ret = 0; 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci if (mem->state != from_state_req) 31462306a36Sopenharmony_ci return -EINVAL; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci if (to_state == MEM_OFFLINE) 31762306a36Sopenharmony_ci mem->state = MEM_GOING_OFFLINE; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci ret = memory_block_action(mem, to_state); 32062306a36Sopenharmony_ci mem->state = ret ? from_state_req : to_state; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci return ret; 32362306a36Sopenharmony_ci} 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci/* The device lock serializes operations on memory_subsys_[online|offline] */ 32662306a36Sopenharmony_cistatic int memory_subsys_online(struct device *dev) 32762306a36Sopenharmony_ci{ 32862306a36Sopenharmony_ci struct memory_block *mem = to_memory_block(dev); 32962306a36Sopenharmony_ci int ret; 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci if (mem->state == MEM_ONLINE) 33262306a36Sopenharmony_ci return 0; 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci /* 33562306a36Sopenharmony_ci * When called via device_online() without configuring the online_type, 33662306a36Sopenharmony_ci * we want to default to MMOP_ONLINE. 33762306a36Sopenharmony_ci */ 33862306a36Sopenharmony_ci if (mem->online_type == MMOP_OFFLINE) 33962306a36Sopenharmony_ci mem->online_type = MMOP_ONLINE; 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 34262306a36Sopenharmony_ci mem->online_type = MMOP_OFFLINE; 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci return ret; 34562306a36Sopenharmony_ci} 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_cistatic int memory_subsys_offline(struct device *dev) 34862306a36Sopenharmony_ci{ 34962306a36Sopenharmony_ci struct memory_block *mem = to_memory_block(dev); 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci if (mem->state == MEM_OFFLINE) 35262306a36Sopenharmony_ci return 0; 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 35562306a36Sopenharmony_ci} 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_cistatic ssize_t state_store(struct device *dev, struct device_attribute *attr, 35862306a36Sopenharmony_ci const char *buf, size_t count) 35962306a36Sopenharmony_ci{ 36062306a36Sopenharmony_ci const int online_type = mhp_online_type_from_str(buf); 36162306a36Sopenharmony_ci struct memory_block *mem = to_memory_block(dev); 36262306a36Sopenharmony_ci int ret; 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci if (online_type < 0) 36562306a36Sopenharmony_ci return -EINVAL; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci ret = lock_device_hotplug_sysfs(); 36862306a36Sopenharmony_ci if (ret) 36962306a36Sopenharmony_ci return ret; 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci switch (online_type) { 37262306a36Sopenharmony_ci case MMOP_ONLINE_KERNEL: 37362306a36Sopenharmony_ci case MMOP_ONLINE_MOVABLE: 37462306a36Sopenharmony_ci case MMOP_ONLINE: 37562306a36Sopenharmony_ci /* mem->online_type is protected by device_hotplug_lock */ 37662306a36Sopenharmony_ci mem->online_type = online_type; 37762306a36Sopenharmony_ci ret = device_online(&mem->dev); 37862306a36Sopenharmony_ci break; 37962306a36Sopenharmony_ci case MMOP_OFFLINE: 38062306a36Sopenharmony_ci ret = device_offline(&mem->dev); 38162306a36Sopenharmony_ci break; 38262306a36Sopenharmony_ci default: 38362306a36Sopenharmony_ci ret = -EINVAL; /* should never happen */ 38462306a36Sopenharmony_ci } 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci unlock_device_hotplug(); 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci if (ret < 0) 38962306a36Sopenharmony_ci return ret; 39062306a36Sopenharmony_ci if (ret) 39162306a36Sopenharmony_ci return -EINVAL; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci return count; 39462306a36Sopenharmony_ci} 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci/* 39762306a36Sopenharmony_ci * Legacy interface that we cannot remove: s390x exposes the storage increment 39862306a36Sopenharmony_ci * covered by a memory block, allowing for identifying which memory blocks 39962306a36Sopenharmony_ci * comprise a storage increment. Since a memory block spans complete 40062306a36Sopenharmony_ci * storage increments nowadays, this interface is basically unused. Other 40162306a36Sopenharmony_ci * archs never exposed != 0. 40262306a36Sopenharmony_ci */ 40362306a36Sopenharmony_cistatic ssize_t phys_device_show(struct device *dev, 40462306a36Sopenharmony_ci struct device_attribute *attr, char *buf) 40562306a36Sopenharmony_ci{ 40662306a36Sopenharmony_ci struct memory_block *mem = to_memory_block(dev); 40762306a36Sopenharmony_ci unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci return sysfs_emit(buf, "%d\n", 41062306a36Sopenharmony_ci arch_get_memory_phys_device(start_pfn)); 41162306a36Sopenharmony_ci} 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTREMOVE 41462306a36Sopenharmony_cistatic int print_allowed_zone(char *buf, int len, int nid, 41562306a36Sopenharmony_ci struct memory_group *group, 41662306a36Sopenharmony_ci unsigned long start_pfn, unsigned long nr_pages, 41762306a36Sopenharmony_ci int online_type, struct zone *default_zone) 41862306a36Sopenharmony_ci{ 41962306a36Sopenharmony_ci struct zone *zone; 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci zone = zone_for_pfn_range(online_type, nid, group, start_pfn, nr_pages); 42262306a36Sopenharmony_ci if (zone == default_zone) 42362306a36Sopenharmony_ci return 0; 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci return sysfs_emit_at(buf, len, " %s", zone->name); 42662306a36Sopenharmony_ci} 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_cistatic ssize_t valid_zones_show(struct device *dev, 42962306a36Sopenharmony_ci struct device_attribute *attr, char *buf) 43062306a36Sopenharmony_ci{ 43162306a36Sopenharmony_ci struct memory_block *mem = to_memory_block(dev); 43262306a36Sopenharmony_ci unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 43362306a36Sopenharmony_ci unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 43462306a36Sopenharmony_ci struct memory_group *group = mem->group; 43562306a36Sopenharmony_ci struct zone *default_zone; 43662306a36Sopenharmony_ci int nid = mem->nid; 43762306a36Sopenharmony_ci int len = 0; 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci /* 44062306a36Sopenharmony_ci * Check the existing zone. Make sure that we do that only on the 44162306a36Sopenharmony_ci * online nodes otherwise the page_zone is not reliable 44262306a36Sopenharmony_ci */ 44362306a36Sopenharmony_ci if (mem->state == MEM_ONLINE) { 44462306a36Sopenharmony_ci /* 44562306a36Sopenharmony_ci * If !mem->zone, the memory block spans multiple zones and 44662306a36Sopenharmony_ci * cannot get offlined. 44762306a36Sopenharmony_ci */ 44862306a36Sopenharmony_ci default_zone = mem->zone; 44962306a36Sopenharmony_ci if (!default_zone) 45062306a36Sopenharmony_ci return sysfs_emit(buf, "%s\n", "none"); 45162306a36Sopenharmony_ci len += sysfs_emit_at(buf, len, "%s", default_zone->name); 45262306a36Sopenharmony_ci goto out; 45362306a36Sopenharmony_ci } 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, group, 45662306a36Sopenharmony_ci start_pfn, nr_pages); 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci len += sysfs_emit_at(buf, len, "%s", default_zone->name); 45962306a36Sopenharmony_ci len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages, 46062306a36Sopenharmony_ci MMOP_ONLINE_KERNEL, default_zone); 46162306a36Sopenharmony_ci len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages, 46262306a36Sopenharmony_ci MMOP_ONLINE_MOVABLE, default_zone); 46362306a36Sopenharmony_ciout: 46462306a36Sopenharmony_ci len += sysfs_emit_at(buf, len, "\n"); 46562306a36Sopenharmony_ci return len; 46662306a36Sopenharmony_ci} 46762306a36Sopenharmony_cistatic DEVICE_ATTR_RO(valid_zones); 46862306a36Sopenharmony_ci#endif 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_cistatic DEVICE_ATTR_RO(phys_index); 47162306a36Sopenharmony_cistatic DEVICE_ATTR_RW(state); 47262306a36Sopenharmony_cistatic DEVICE_ATTR_RO(phys_device); 47362306a36Sopenharmony_cistatic DEVICE_ATTR_RO(removable); 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci/* 47662306a36Sopenharmony_ci * Show the memory block size (shared by all memory blocks). 47762306a36Sopenharmony_ci */ 47862306a36Sopenharmony_cistatic ssize_t block_size_bytes_show(struct device *dev, 47962306a36Sopenharmony_ci struct device_attribute *attr, char *buf) 48062306a36Sopenharmony_ci{ 48162306a36Sopenharmony_ci return sysfs_emit(buf, "%lx\n", memory_block_size_bytes()); 48262306a36Sopenharmony_ci} 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_cistatic DEVICE_ATTR_RO(block_size_bytes); 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci/* 48762306a36Sopenharmony_ci * Memory auto online policy. 48862306a36Sopenharmony_ci */ 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_cistatic ssize_t auto_online_blocks_show(struct device *dev, 49162306a36Sopenharmony_ci struct device_attribute *attr, char *buf) 49262306a36Sopenharmony_ci{ 49362306a36Sopenharmony_ci return sysfs_emit(buf, "%s\n", 49462306a36Sopenharmony_ci online_type_to_str[mhp_default_online_type]); 49562306a36Sopenharmony_ci} 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_cistatic ssize_t auto_online_blocks_store(struct device *dev, 49862306a36Sopenharmony_ci struct device_attribute *attr, 49962306a36Sopenharmony_ci const char *buf, size_t count) 50062306a36Sopenharmony_ci{ 50162306a36Sopenharmony_ci const int online_type = mhp_online_type_from_str(buf); 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci if (online_type < 0) 50462306a36Sopenharmony_ci return -EINVAL; 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci mhp_default_online_type = online_type; 50762306a36Sopenharmony_ci return count; 50862306a36Sopenharmony_ci} 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_cistatic DEVICE_ATTR_RW(auto_online_blocks); 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci#ifdef CONFIG_CRASH_HOTPLUG 51362306a36Sopenharmony_ci#include <linux/kexec.h> 51462306a36Sopenharmony_cistatic ssize_t crash_hotplug_show(struct device *dev, 51562306a36Sopenharmony_ci struct device_attribute *attr, char *buf) 51662306a36Sopenharmony_ci{ 51762306a36Sopenharmony_ci return sysfs_emit(buf, "%d\n", crash_hotplug_memory_support()); 51862306a36Sopenharmony_ci} 51962306a36Sopenharmony_cistatic DEVICE_ATTR_RO(crash_hotplug); 52062306a36Sopenharmony_ci#endif 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci/* 52362306a36Sopenharmony_ci * Some architectures will have custom drivers to do this, and 52462306a36Sopenharmony_ci * will not need to do it from userspace. The fake hot-add code 52562306a36Sopenharmony_ci * as well as ppc64 will do all of their discovery in userspace 52662306a36Sopenharmony_ci * and will require this interface. 52762306a36Sopenharmony_ci */ 52862306a36Sopenharmony_ci#ifdef CONFIG_ARCH_MEMORY_PROBE 52962306a36Sopenharmony_cistatic ssize_t probe_store(struct device *dev, struct device_attribute *attr, 53062306a36Sopenharmony_ci const char *buf, size_t count) 53162306a36Sopenharmony_ci{ 53262306a36Sopenharmony_ci u64 phys_addr; 53362306a36Sopenharmony_ci int nid, ret; 53462306a36Sopenharmony_ci unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci ret = kstrtoull(buf, 0, &phys_addr); 53762306a36Sopenharmony_ci if (ret) 53862306a36Sopenharmony_ci return ret; 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) 54162306a36Sopenharmony_ci return -EINVAL; 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci ret = lock_device_hotplug_sysfs(); 54462306a36Sopenharmony_ci if (ret) 54562306a36Sopenharmony_ci return ret; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci nid = memory_add_physaddr_to_nid(phys_addr); 54862306a36Sopenharmony_ci ret = __add_memory(nid, phys_addr, 54962306a36Sopenharmony_ci MIN_MEMORY_BLOCK_SIZE * sections_per_block, 55062306a36Sopenharmony_ci MHP_NONE); 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci if (ret) 55362306a36Sopenharmony_ci goto out; 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci ret = count; 55662306a36Sopenharmony_ciout: 55762306a36Sopenharmony_ci unlock_device_hotplug(); 55862306a36Sopenharmony_ci return ret; 55962306a36Sopenharmony_ci} 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_cistatic DEVICE_ATTR_WO(probe); 56262306a36Sopenharmony_ci#endif 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_FAILURE 56562306a36Sopenharmony_ci/* 56662306a36Sopenharmony_ci * Support for offlining pages of memory 56762306a36Sopenharmony_ci */ 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci/* Soft offline a page */ 57062306a36Sopenharmony_cistatic ssize_t soft_offline_page_store(struct device *dev, 57162306a36Sopenharmony_ci struct device_attribute *attr, 57262306a36Sopenharmony_ci const char *buf, size_t count) 57362306a36Sopenharmony_ci{ 57462306a36Sopenharmony_ci int ret; 57562306a36Sopenharmony_ci u64 pfn; 57662306a36Sopenharmony_ci if (!capable(CAP_SYS_ADMIN)) 57762306a36Sopenharmony_ci return -EPERM; 57862306a36Sopenharmony_ci if (kstrtoull(buf, 0, &pfn) < 0) 57962306a36Sopenharmony_ci return -EINVAL; 58062306a36Sopenharmony_ci pfn >>= PAGE_SHIFT; 58162306a36Sopenharmony_ci ret = soft_offline_page(pfn, 0); 58262306a36Sopenharmony_ci return ret == 0 ? count : ret; 58362306a36Sopenharmony_ci} 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci/* Forcibly offline a page, including killing processes. */ 58662306a36Sopenharmony_cistatic ssize_t hard_offline_page_store(struct device *dev, 58762306a36Sopenharmony_ci struct device_attribute *attr, 58862306a36Sopenharmony_ci const char *buf, size_t count) 58962306a36Sopenharmony_ci{ 59062306a36Sopenharmony_ci int ret; 59162306a36Sopenharmony_ci u64 pfn; 59262306a36Sopenharmony_ci if (!capable(CAP_SYS_ADMIN)) 59362306a36Sopenharmony_ci return -EPERM; 59462306a36Sopenharmony_ci if (kstrtoull(buf, 0, &pfn) < 0) 59562306a36Sopenharmony_ci return -EINVAL; 59662306a36Sopenharmony_ci pfn >>= PAGE_SHIFT; 59762306a36Sopenharmony_ci ret = memory_failure(pfn, MF_SW_SIMULATED); 59862306a36Sopenharmony_ci if (ret == -EOPNOTSUPP) 59962306a36Sopenharmony_ci ret = 0; 60062306a36Sopenharmony_ci return ret ? ret : count; 60162306a36Sopenharmony_ci} 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_cistatic DEVICE_ATTR_WO(soft_offline_page); 60462306a36Sopenharmony_cistatic DEVICE_ATTR_WO(hard_offline_page); 60562306a36Sopenharmony_ci#endif 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci/* See phys_device_show(). */ 60862306a36Sopenharmony_ciint __weak arch_get_memory_phys_device(unsigned long start_pfn) 60962306a36Sopenharmony_ci{ 61062306a36Sopenharmony_ci return 0; 61162306a36Sopenharmony_ci} 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci/* 61462306a36Sopenharmony_ci * A reference for the returned memory block device is acquired. 61562306a36Sopenharmony_ci * 61662306a36Sopenharmony_ci * Called under device_hotplug_lock. 61762306a36Sopenharmony_ci */ 61862306a36Sopenharmony_cistatic struct memory_block *find_memory_block_by_id(unsigned long block_id) 61962306a36Sopenharmony_ci{ 62062306a36Sopenharmony_ci struct memory_block *mem; 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ci mem = xa_load(&memory_blocks, block_id); 62362306a36Sopenharmony_ci if (mem) 62462306a36Sopenharmony_ci get_device(&mem->dev); 62562306a36Sopenharmony_ci return mem; 62662306a36Sopenharmony_ci} 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_ci/* 62962306a36Sopenharmony_ci * Called under device_hotplug_lock. 63062306a36Sopenharmony_ci */ 63162306a36Sopenharmony_cistruct memory_block *find_memory_block(unsigned long section_nr) 63262306a36Sopenharmony_ci{ 63362306a36Sopenharmony_ci unsigned long block_id = memory_block_id(section_nr); 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci return find_memory_block_by_id(block_id); 63662306a36Sopenharmony_ci} 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_cistatic struct attribute *memory_memblk_attrs[] = { 63962306a36Sopenharmony_ci &dev_attr_phys_index.attr, 64062306a36Sopenharmony_ci &dev_attr_state.attr, 64162306a36Sopenharmony_ci &dev_attr_phys_device.attr, 64262306a36Sopenharmony_ci &dev_attr_removable.attr, 64362306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTREMOVE 64462306a36Sopenharmony_ci &dev_attr_valid_zones.attr, 64562306a36Sopenharmony_ci#endif 64662306a36Sopenharmony_ci NULL 64762306a36Sopenharmony_ci}; 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_cistatic const struct attribute_group memory_memblk_attr_group = { 65062306a36Sopenharmony_ci .attrs = memory_memblk_attrs, 65162306a36Sopenharmony_ci}; 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_cistatic const struct attribute_group *memory_memblk_attr_groups[] = { 65462306a36Sopenharmony_ci &memory_memblk_attr_group, 65562306a36Sopenharmony_ci NULL, 65662306a36Sopenharmony_ci}; 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_cistatic int __add_memory_block(struct memory_block *memory) 65962306a36Sopenharmony_ci{ 66062306a36Sopenharmony_ci int ret; 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci memory->dev.bus = &memory_subsys; 66362306a36Sopenharmony_ci memory->dev.id = memory->start_section_nr / sections_per_block; 66462306a36Sopenharmony_ci memory->dev.release = memory_block_release; 66562306a36Sopenharmony_ci memory->dev.groups = memory_memblk_attr_groups; 66662306a36Sopenharmony_ci memory->dev.offline = memory->state == MEM_OFFLINE; 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci ret = device_register(&memory->dev); 66962306a36Sopenharmony_ci if (ret) { 67062306a36Sopenharmony_ci put_device(&memory->dev); 67162306a36Sopenharmony_ci return ret; 67262306a36Sopenharmony_ci } 67362306a36Sopenharmony_ci ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory, 67462306a36Sopenharmony_ci GFP_KERNEL)); 67562306a36Sopenharmony_ci if (ret) 67662306a36Sopenharmony_ci device_unregister(&memory->dev); 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci return ret; 67962306a36Sopenharmony_ci} 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_cistatic struct zone *early_node_zone_for_memory_block(struct memory_block *mem, 68262306a36Sopenharmony_ci int nid) 68362306a36Sopenharmony_ci{ 68462306a36Sopenharmony_ci const unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 68562306a36Sopenharmony_ci const unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 68662306a36Sopenharmony_ci struct zone *zone, *matching_zone = NULL; 68762306a36Sopenharmony_ci pg_data_t *pgdat = NODE_DATA(nid); 68862306a36Sopenharmony_ci int i; 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci /* 69162306a36Sopenharmony_ci * This logic only works for early memory, when the applicable zones 69262306a36Sopenharmony_ci * already span the memory block. We don't expect overlapping zones on 69362306a36Sopenharmony_ci * a single node for early memory. So if we're told that some PFNs 69462306a36Sopenharmony_ci * of a node fall into this memory block, we can assume that all node 69562306a36Sopenharmony_ci * zones that intersect with the memory block are actually applicable. 69662306a36Sopenharmony_ci * No need to look at the memmap. 69762306a36Sopenharmony_ci */ 69862306a36Sopenharmony_ci for (i = 0; i < MAX_NR_ZONES; i++) { 69962306a36Sopenharmony_ci zone = pgdat->node_zones + i; 70062306a36Sopenharmony_ci if (!populated_zone(zone)) 70162306a36Sopenharmony_ci continue; 70262306a36Sopenharmony_ci if (!zone_intersects(zone, start_pfn, nr_pages)) 70362306a36Sopenharmony_ci continue; 70462306a36Sopenharmony_ci if (!matching_zone) { 70562306a36Sopenharmony_ci matching_zone = zone; 70662306a36Sopenharmony_ci continue; 70762306a36Sopenharmony_ci } 70862306a36Sopenharmony_ci /* Spans multiple zones ... */ 70962306a36Sopenharmony_ci matching_zone = NULL; 71062306a36Sopenharmony_ci break; 71162306a36Sopenharmony_ci } 71262306a36Sopenharmony_ci return matching_zone; 71362306a36Sopenharmony_ci} 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci#ifdef CONFIG_NUMA 71662306a36Sopenharmony_ci/** 71762306a36Sopenharmony_ci * memory_block_add_nid() - Indicate that system RAM falling into this memory 71862306a36Sopenharmony_ci * block device (partially) belongs to the given node. 71962306a36Sopenharmony_ci * @mem: The memory block device. 72062306a36Sopenharmony_ci * @nid: The node id. 72162306a36Sopenharmony_ci * @context: The memory initialization context. 72262306a36Sopenharmony_ci * 72362306a36Sopenharmony_ci * Indicate that system RAM falling into this memory block (partially) belongs 72462306a36Sopenharmony_ci * to the given node. If the context indicates ("early") that we are adding the 72562306a36Sopenharmony_ci * node during node device subsystem initialization, this will also properly 72662306a36Sopenharmony_ci * set/adjust mem->zone based on the zone ranges of the given node. 72762306a36Sopenharmony_ci */ 72862306a36Sopenharmony_civoid memory_block_add_nid(struct memory_block *mem, int nid, 72962306a36Sopenharmony_ci enum meminit_context context) 73062306a36Sopenharmony_ci{ 73162306a36Sopenharmony_ci if (context == MEMINIT_EARLY && mem->nid != nid) { 73262306a36Sopenharmony_ci /* 73362306a36Sopenharmony_ci * For early memory we have to determine the zone when setting 73462306a36Sopenharmony_ci * the node id and handle multiple nodes spanning a single 73562306a36Sopenharmony_ci * memory block by indicate via zone == NULL that we're not 73662306a36Sopenharmony_ci * dealing with a single zone. So if we're setting the node id 73762306a36Sopenharmony_ci * the first time, determine if there is a single zone. If we're 73862306a36Sopenharmony_ci * setting the node id a second time to a different node, 73962306a36Sopenharmony_ci * invalidate the single detected zone. 74062306a36Sopenharmony_ci */ 74162306a36Sopenharmony_ci if (mem->nid == NUMA_NO_NODE) 74262306a36Sopenharmony_ci mem->zone = early_node_zone_for_memory_block(mem, nid); 74362306a36Sopenharmony_ci else 74462306a36Sopenharmony_ci mem->zone = NULL; 74562306a36Sopenharmony_ci } 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci /* 74862306a36Sopenharmony_ci * If this memory block spans multiple nodes, we only indicate 74962306a36Sopenharmony_ci * the last processed node. If we span multiple nodes (not applicable 75062306a36Sopenharmony_ci * to hotplugged memory), zone == NULL will prohibit memory offlining 75162306a36Sopenharmony_ci * and consequently unplug. 75262306a36Sopenharmony_ci */ 75362306a36Sopenharmony_ci mem->nid = nid; 75462306a36Sopenharmony_ci} 75562306a36Sopenharmony_ci#endif 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_cistatic int add_memory_block(unsigned long block_id, unsigned long state, 75862306a36Sopenharmony_ci struct vmem_altmap *altmap, 75962306a36Sopenharmony_ci struct memory_group *group) 76062306a36Sopenharmony_ci{ 76162306a36Sopenharmony_ci struct memory_block *mem; 76262306a36Sopenharmony_ci int ret = 0; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci mem = find_memory_block_by_id(block_id); 76562306a36Sopenharmony_ci if (mem) { 76662306a36Sopenharmony_ci put_device(&mem->dev); 76762306a36Sopenharmony_ci return -EEXIST; 76862306a36Sopenharmony_ci } 76962306a36Sopenharmony_ci mem = kzalloc(sizeof(*mem), GFP_KERNEL); 77062306a36Sopenharmony_ci if (!mem) 77162306a36Sopenharmony_ci return -ENOMEM; 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci mem->start_section_nr = block_id * sections_per_block; 77462306a36Sopenharmony_ci mem->state = state; 77562306a36Sopenharmony_ci mem->nid = NUMA_NO_NODE; 77662306a36Sopenharmony_ci mem->altmap = altmap; 77762306a36Sopenharmony_ci INIT_LIST_HEAD(&mem->group_next); 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci#ifndef CONFIG_NUMA 78062306a36Sopenharmony_ci if (state == MEM_ONLINE) 78162306a36Sopenharmony_ci /* 78262306a36Sopenharmony_ci * MEM_ONLINE at this point implies early memory. With NUMA, 78362306a36Sopenharmony_ci * we'll determine the zone when setting the node id via 78462306a36Sopenharmony_ci * memory_block_add_nid(). Memory hotplug updated the zone 78562306a36Sopenharmony_ci * manually when memory onlining/offlining succeeds. 78662306a36Sopenharmony_ci */ 78762306a36Sopenharmony_ci mem->zone = early_node_zone_for_memory_block(mem, NUMA_NO_NODE); 78862306a36Sopenharmony_ci#endif /* CONFIG_NUMA */ 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci ret = __add_memory_block(mem); 79162306a36Sopenharmony_ci if (ret) 79262306a36Sopenharmony_ci return ret; 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci if (group) { 79562306a36Sopenharmony_ci mem->group = group; 79662306a36Sopenharmony_ci list_add(&mem->group_next, &group->memory_blocks); 79762306a36Sopenharmony_ci } 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci return 0; 80062306a36Sopenharmony_ci} 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_cistatic int __init add_boot_memory_block(unsigned long base_section_nr) 80362306a36Sopenharmony_ci{ 80462306a36Sopenharmony_ci int section_count = 0; 80562306a36Sopenharmony_ci unsigned long nr; 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci for (nr = base_section_nr; nr < base_section_nr + sections_per_block; 80862306a36Sopenharmony_ci nr++) 80962306a36Sopenharmony_ci if (present_section_nr(nr)) 81062306a36Sopenharmony_ci section_count++; 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ci if (section_count == 0) 81362306a36Sopenharmony_ci return 0; 81462306a36Sopenharmony_ci return add_memory_block(memory_block_id(base_section_nr), 81562306a36Sopenharmony_ci MEM_ONLINE, NULL, NULL); 81662306a36Sopenharmony_ci} 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_cistatic int add_hotplug_memory_block(unsigned long block_id, 81962306a36Sopenharmony_ci struct vmem_altmap *altmap, 82062306a36Sopenharmony_ci struct memory_group *group) 82162306a36Sopenharmony_ci{ 82262306a36Sopenharmony_ci return add_memory_block(block_id, MEM_OFFLINE, altmap, group); 82362306a36Sopenharmony_ci} 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_cistatic void remove_memory_block(struct memory_block *memory) 82662306a36Sopenharmony_ci{ 82762306a36Sopenharmony_ci if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) 82862306a36Sopenharmony_ci return; 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL); 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci if (memory->group) { 83362306a36Sopenharmony_ci list_del(&memory->group_next); 83462306a36Sopenharmony_ci memory->group = NULL; 83562306a36Sopenharmony_ci } 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci /* drop the ref. we got via find_memory_block() */ 83862306a36Sopenharmony_ci put_device(&memory->dev); 83962306a36Sopenharmony_ci device_unregister(&memory->dev); 84062306a36Sopenharmony_ci} 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci/* 84362306a36Sopenharmony_ci * Create memory block devices for the given memory area. Start and size 84462306a36Sopenharmony_ci * have to be aligned to memory block granularity. Memory block devices 84562306a36Sopenharmony_ci * will be initialized as offline. 84662306a36Sopenharmony_ci * 84762306a36Sopenharmony_ci * Called under device_hotplug_lock. 84862306a36Sopenharmony_ci */ 84962306a36Sopenharmony_ciint create_memory_block_devices(unsigned long start, unsigned long size, 85062306a36Sopenharmony_ci struct vmem_altmap *altmap, 85162306a36Sopenharmony_ci struct memory_group *group) 85262306a36Sopenharmony_ci{ 85362306a36Sopenharmony_ci const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 85462306a36Sopenharmony_ci unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 85562306a36Sopenharmony_ci struct memory_block *mem; 85662306a36Sopenharmony_ci unsigned long block_id; 85762306a36Sopenharmony_ci int ret = 0; 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 86062306a36Sopenharmony_ci !IS_ALIGNED(size, memory_block_size_bytes()))) 86162306a36Sopenharmony_ci return -EINVAL; 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci for (block_id = start_block_id; block_id != end_block_id; block_id++) { 86462306a36Sopenharmony_ci ret = add_hotplug_memory_block(block_id, altmap, group); 86562306a36Sopenharmony_ci if (ret) 86662306a36Sopenharmony_ci break; 86762306a36Sopenharmony_ci } 86862306a36Sopenharmony_ci if (ret) { 86962306a36Sopenharmony_ci end_block_id = block_id; 87062306a36Sopenharmony_ci for (block_id = start_block_id; block_id != end_block_id; 87162306a36Sopenharmony_ci block_id++) { 87262306a36Sopenharmony_ci mem = find_memory_block_by_id(block_id); 87362306a36Sopenharmony_ci if (WARN_ON_ONCE(!mem)) 87462306a36Sopenharmony_ci continue; 87562306a36Sopenharmony_ci remove_memory_block(mem); 87662306a36Sopenharmony_ci } 87762306a36Sopenharmony_ci } 87862306a36Sopenharmony_ci return ret; 87962306a36Sopenharmony_ci} 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ci/* 88262306a36Sopenharmony_ci * Remove memory block devices for the given memory area. Start and size 88362306a36Sopenharmony_ci * have to be aligned to memory block granularity. Memory block devices 88462306a36Sopenharmony_ci * have to be offline. 88562306a36Sopenharmony_ci * 88662306a36Sopenharmony_ci * Called under device_hotplug_lock. 88762306a36Sopenharmony_ci */ 88862306a36Sopenharmony_civoid remove_memory_block_devices(unsigned long start, unsigned long size) 88962306a36Sopenharmony_ci{ 89062306a36Sopenharmony_ci const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 89162306a36Sopenharmony_ci const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 89262306a36Sopenharmony_ci struct memory_block *mem; 89362306a36Sopenharmony_ci unsigned long block_id; 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 89662306a36Sopenharmony_ci !IS_ALIGNED(size, memory_block_size_bytes()))) 89762306a36Sopenharmony_ci return; 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci for (block_id = start_block_id; block_id != end_block_id; block_id++) { 90062306a36Sopenharmony_ci mem = find_memory_block_by_id(block_id); 90162306a36Sopenharmony_ci if (WARN_ON_ONCE(!mem)) 90262306a36Sopenharmony_ci continue; 90362306a36Sopenharmony_ci num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem)); 90462306a36Sopenharmony_ci unregister_memory_block_under_nodes(mem); 90562306a36Sopenharmony_ci remove_memory_block(mem); 90662306a36Sopenharmony_ci } 90762306a36Sopenharmony_ci} 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_cistatic struct attribute *memory_root_attrs[] = { 91062306a36Sopenharmony_ci#ifdef CONFIG_ARCH_MEMORY_PROBE 91162306a36Sopenharmony_ci &dev_attr_probe.attr, 91262306a36Sopenharmony_ci#endif 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_FAILURE 91562306a36Sopenharmony_ci &dev_attr_soft_offline_page.attr, 91662306a36Sopenharmony_ci &dev_attr_hard_offline_page.attr, 91762306a36Sopenharmony_ci#endif 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci &dev_attr_block_size_bytes.attr, 92062306a36Sopenharmony_ci &dev_attr_auto_online_blocks.attr, 92162306a36Sopenharmony_ci#ifdef CONFIG_CRASH_HOTPLUG 92262306a36Sopenharmony_ci &dev_attr_crash_hotplug.attr, 92362306a36Sopenharmony_ci#endif 92462306a36Sopenharmony_ci NULL 92562306a36Sopenharmony_ci}; 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_cistatic const struct attribute_group memory_root_attr_group = { 92862306a36Sopenharmony_ci .attrs = memory_root_attrs, 92962306a36Sopenharmony_ci}; 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_cistatic const struct attribute_group *memory_root_attr_groups[] = { 93262306a36Sopenharmony_ci &memory_root_attr_group, 93362306a36Sopenharmony_ci NULL, 93462306a36Sopenharmony_ci}; 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci/* 93762306a36Sopenharmony_ci * Initialize the sysfs support for memory devices. At the time this function 93862306a36Sopenharmony_ci * is called, we cannot have concurrent creation/deletion of memory block 93962306a36Sopenharmony_ci * devices, the device_hotplug_lock is not needed. 94062306a36Sopenharmony_ci */ 94162306a36Sopenharmony_civoid __init memory_dev_init(void) 94262306a36Sopenharmony_ci{ 94362306a36Sopenharmony_ci int ret; 94462306a36Sopenharmony_ci unsigned long block_sz, nr; 94562306a36Sopenharmony_ci 94662306a36Sopenharmony_ci /* Validate the configured memory block size */ 94762306a36Sopenharmony_ci block_sz = memory_block_size_bytes(); 94862306a36Sopenharmony_ci if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE) 94962306a36Sopenharmony_ci panic("Memory block size not suitable: 0x%lx\n", block_sz); 95062306a36Sopenharmony_ci sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci ret = subsys_system_register(&memory_subsys, memory_root_attr_groups); 95362306a36Sopenharmony_ci if (ret) 95462306a36Sopenharmony_ci panic("%s() failed to register subsystem: %d\n", __func__, ret); 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci /* 95762306a36Sopenharmony_ci * Create entries for memory sections that were found 95862306a36Sopenharmony_ci * during boot and have been initialized 95962306a36Sopenharmony_ci */ 96062306a36Sopenharmony_ci for (nr = 0; nr <= __highest_present_section_nr; 96162306a36Sopenharmony_ci nr += sections_per_block) { 96262306a36Sopenharmony_ci ret = add_boot_memory_block(nr); 96362306a36Sopenharmony_ci if (ret) 96462306a36Sopenharmony_ci panic("%s() failed to add memory block: %d\n", __func__, 96562306a36Sopenharmony_ci ret); 96662306a36Sopenharmony_ci } 96762306a36Sopenharmony_ci} 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_ci/** 97062306a36Sopenharmony_ci * walk_memory_blocks - walk through all present memory blocks overlapped 97162306a36Sopenharmony_ci * by the range [start, start + size) 97262306a36Sopenharmony_ci * 97362306a36Sopenharmony_ci * @start: start address of the memory range 97462306a36Sopenharmony_ci * @size: size of the memory range 97562306a36Sopenharmony_ci * @arg: argument passed to func 97662306a36Sopenharmony_ci * @func: callback for each memory section walked 97762306a36Sopenharmony_ci * 97862306a36Sopenharmony_ci * This function walks through all present memory blocks overlapped by the 97962306a36Sopenharmony_ci * range [start, start + size), calling func on each memory block. 98062306a36Sopenharmony_ci * 98162306a36Sopenharmony_ci * In case func() returns an error, walking is aborted and the error is 98262306a36Sopenharmony_ci * returned. 98362306a36Sopenharmony_ci * 98462306a36Sopenharmony_ci * Called under device_hotplug_lock. 98562306a36Sopenharmony_ci */ 98662306a36Sopenharmony_ciint walk_memory_blocks(unsigned long start, unsigned long size, 98762306a36Sopenharmony_ci void *arg, walk_memory_blocks_func_t func) 98862306a36Sopenharmony_ci{ 98962306a36Sopenharmony_ci const unsigned long start_block_id = phys_to_block_id(start); 99062306a36Sopenharmony_ci const unsigned long end_block_id = phys_to_block_id(start + size - 1); 99162306a36Sopenharmony_ci struct memory_block *mem; 99262306a36Sopenharmony_ci unsigned long block_id; 99362306a36Sopenharmony_ci int ret = 0; 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_ci if (!size) 99662306a36Sopenharmony_ci return 0; 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci for (block_id = start_block_id; block_id <= end_block_id; block_id++) { 99962306a36Sopenharmony_ci mem = find_memory_block_by_id(block_id); 100062306a36Sopenharmony_ci if (!mem) 100162306a36Sopenharmony_ci continue; 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci ret = func(mem, arg); 100462306a36Sopenharmony_ci put_device(&mem->dev); 100562306a36Sopenharmony_ci if (ret) 100662306a36Sopenharmony_ci break; 100762306a36Sopenharmony_ci } 100862306a36Sopenharmony_ci return ret; 100962306a36Sopenharmony_ci} 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_cistruct for_each_memory_block_cb_data { 101262306a36Sopenharmony_ci walk_memory_blocks_func_t func; 101362306a36Sopenharmony_ci void *arg; 101462306a36Sopenharmony_ci}; 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_cistatic int for_each_memory_block_cb(struct device *dev, void *data) 101762306a36Sopenharmony_ci{ 101862306a36Sopenharmony_ci struct memory_block *mem = to_memory_block(dev); 101962306a36Sopenharmony_ci struct for_each_memory_block_cb_data *cb_data = data; 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_ci return cb_data->func(mem, cb_data->arg); 102262306a36Sopenharmony_ci} 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci/** 102562306a36Sopenharmony_ci * for_each_memory_block - walk through all present memory blocks 102662306a36Sopenharmony_ci * 102762306a36Sopenharmony_ci * @arg: argument passed to func 102862306a36Sopenharmony_ci * @func: callback for each memory block walked 102962306a36Sopenharmony_ci * 103062306a36Sopenharmony_ci * This function walks through all present memory blocks, calling func on 103162306a36Sopenharmony_ci * each memory block. 103262306a36Sopenharmony_ci * 103362306a36Sopenharmony_ci * In case func() returns an error, walking is aborted and the error is 103462306a36Sopenharmony_ci * returned. 103562306a36Sopenharmony_ci */ 103662306a36Sopenharmony_ciint for_each_memory_block(void *arg, walk_memory_blocks_func_t func) 103762306a36Sopenharmony_ci{ 103862306a36Sopenharmony_ci struct for_each_memory_block_cb_data cb_data = { 103962306a36Sopenharmony_ci .func = func, 104062306a36Sopenharmony_ci .arg = arg, 104162306a36Sopenharmony_ci }; 104262306a36Sopenharmony_ci 104362306a36Sopenharmony_ci return bus_for_each_dev(&memory_subsys, NULL, &cb_data, 104462306a36Sopenharmony_ci for_each_memory_block_cb); 104562306a36Sopenharmony_ci} 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci/* 104862306a36Sopenharmony_ci * This is an internal helper to unify allocation and initialization of 104962306a36Sopenharmony_ci * memory groups. Note that the passed memory group will be copied to a 105062306a36Sopenharmony_ci * dynamically allocated memory group. After this call, the passed 105162306a36Sopenharmony_ci * memory group should no longer be used. 105262306a36Sopenharmony_ci */ 105362306a36Sopenharmony_cistatic int memory_group_register(struct memory_group group) 105462306a36Sopenharmony_ci{ 105562306a36Sopenharmony_ci struct memory_group *new_group; 105662306a36Sopenharmony_ci uint32_t mgid; 105762306a36Sopenharmony_ci int ret; 105862306a36Sopenharmony_ci 105962306a36Sopenharmony_ci if (!node_possible(group.nid)) 106062306a36Sopenharmony_ci return -EINVAL; 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci new_group = kzalloc(sizeof(group), GFP_KERNEL); 106362306a36Sopenharmony_ci if (!new_group) 106462306a36Sopenharmony_ci return -ENOMEM; 106562306a36Sopenharmony_ci *new_group = group; 106662306a36Sopenharmony_ci INIT_LIST_HEAD(&new_group->memory_blocks); 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ci ret = xa_alloc(&memory_groups, &mgid, new_group, xa_limit_31b, 106962306a36Sopenharmony_ci GFP_KERNEL); 107062306a36Sopenharmony_ci if (ret) { 107162306a36Sopenharmony_ci kfree(new_group); 107262306a36Sopenharmony_ci return ret; 107362306a36Sopenharmony_ci } else if (group.is_dynamic) { 107462306a36Sopenharmony_ci xa_set_mark(&memory_groups, mgid, MEMORY_GROUP_MARK_DYNAMIC); 107562306a36Sopenharmony_ci } 107662306a36Sopenharmony_ci return mgid; 107762306a36Sopenharmony_ci} 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_ci/** 108062306a36Sopenharmony_ci * memory_group_register_static() - Register a static memory group. 108162306a36Sopenharmony_ci * @nid: The node id. 108262306a36Sopenharmony_ci * @max_pages: The maximum number of pages we'll have in this static memory 108362306a36Sopenharmony_ci * group. 108462306a36Sopenharmony_ci * 108562306a36Sopenharmony_ci * Register a new static memory group and return the memory group id. 108662306a36Sopenharmony_ci * All memory in the group belongs to a single unit, such as a DIMM. All 108762306a36Sopenharmony_ci * memory belonging to a static memory group is added in one go to be removed 108862306a36Sopenharmony_ci * in one go -- it's static. 108962306a36Sopenharmony_ci * 109062306a36Sopenharmony_ci * Returns an error if out of memory, if the node id is invalid, if no new 109162306a36Sopenharmony_ci * memory groups can be registered, or if max_pages is invalid (0). Otherwise, 109262306a36Sopenharmony_ci * returns the new memory group id. 109362306a36Sopenharmony_ci */ 109462306a36Sopenharmony_ciint memory_group_register_static(int nid, unsigned long max_pages) 109562306a36Sopenharmony_ci{ 109662306a36Sopenharmony_ci struct memory_group group = { 109762306a36Sopenharmony_ci .nid = nid, 109862306a36Sopenharmony_ci .s = { 109962306a36Sopenharmony_ci .max_pages = max_pages, 110062306a36Sopenharmony_ci }, 110162306a36Sopenharmony_ci }; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci if (!max_pages) 110462306a36Sopenharmony_ci return -EINVAL; 110562306a36Sopenharmony_ci return memory_group_register(group); 110662306a36Sopenharmony_ci} 110762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(memory_group_register_static); 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci/** 111062306a36Sopenharmony_ci * memory_group_register_dynamic() - Register a dynamic memory group. 111162306a36Sopenharmony_ci * @nid: The node id. 111262306a36Sopenharmony_ci * @unit_pages: Unit in pages in which is memory added/removed in this dynamic 111362306a36Sopenharmony_ci * memory group. 111462306a36Sopenharmony_ci * 111562306a36Sopenharmony_ci * Register a new dynamic memory group and return the memory group id. 111662306a36Sopenharmony_ci * Memory within a dynamic memory group is added/removed dynamically 111762306a36Sopenharmony_ci * in unit_pages. 111862306a36Sopenharmony_ci * 111962306a36Sopenharmony_ci * Returns an error if out of memory, if the node id is invalid, if no new 112062306a36Sopenharmony_ci * memory groups can be registered, or if unit_pages is invalid (0, not a 112162306a36Sopenharmony_ci * power of two, smaller than a single memory block). Otherwise, returns the 112262306a36Sopenharmony_ci * new memory group id. 112362306a36Sopenharmony_ci */ 112462306a36Sopenharmony_ciint memory_group_register_dynamic(int nid, unsigned long unit_pages) 112562306a36Sopenharmony_ci{ 112662306a36Sopenharmony_ci struct memory_group group = { 112762306a36Sopenharmony_ci .nid = nid, 112862306a36Sopenharmony_ci .is_dynamic = true, 112962306a36Sopenharmony_ci .d = { 113062306a36Sopenharmony_ci .unit_pages = unit_pages, 113162306a36Sopenharmony_ci }, 113262306a36Sopenharmony_ci }; 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_ci if (!unit_pages || !is_power_of_2(unit_pages) || 113562306a36Sopenharmony_ci unit_pages < PHYS_PFN(memory_block_size_bytes())) 113662306a36Sopenharmony_ci return -EINVAL; 113762306a36Sopenharmony_ci return memory_group_register(group); 113862306a36Sopenharmony_ci} 113962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(memory_group_register_dynamic); 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci/** 114262306a36Sopenharmony_ci * memory_group_unregister() - Unregister a memory group. 114362306a36Sopenharmony_ci * @mgid: the memory group id 114462306a36Sopenharmony_ci * 114562306a36Sopenharmony_ci * Unregister a memory group. If any memory block still belongs to this 114662306a36Sopenharmony_ci * memory group, unregistering will fail. 114762306a36Sopenharmony_ci * 114862306a36Sopenharmony_ci * Returns -EINVAL if the memory group id is invalid, returns -EBUSY if some 114962306a36Sopenharmony_ci * memory blocks still belong to this memory group and returns 0 if 115062306a36Sopenharmony_ci * unregistering succeeded. 115162306a36Sopenharmony_ci */ 115262306a36Sopenharmony_ciint memory_group_unregister(int mgid) 115362306a36Sopenharmony_ci{ 115462306a36Sopenharmony_ci struct memory_group *group; 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_ci if (mgid < 0) 115762306a36Sopenharmony_ci return -EINVAL; 115862306a36Sopenharmony_ci 115962306a36Sopenharmony_ci group = xa_load(&memory_groups, mgid); 116062306a36Sopenharmony_ci if (!group) 116162306a36Sopenharmony_ci return -EINVAL; 116262306a36Sopenharmony_ci if (!list_empty(&group->memory_blocks)) 116362306a36Sopenharmony_ci return -EBUSY; 116462306a36Sopenharmony_ci xa_erase(&memory_groups, mgid); 116562306a36Sopenharmony_ci kfree(group); 116662306a36Sopenharmony_ci return 0; 116762306a36Sopenharmony_ci} 116862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(memory_group_unregister); 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci/* 117162306a36Sopenharmony_ci * This is an internal helper only to be used in core memory hotplug code to 117262306a36Sopenharmony_ci * lookup a memory group. We don't care about locking, as we don't expect a 117362306a36Sopenharmony_ci * memory group to get unregistered while adding memory to it -- because 117462306a36Sopenharmony_ci * the group and the memory is managed by the same driver. 117562306a36Sopenharmony_ci */ 117662306a36Sopenharmony_cistruct memory_group *memory_group_find_by_id(int mgid) 117762306a36Sopenharmony_ci{ 117862306a36Sopenharmony_ci return xa_load(&memory_groups, mgid); 117962306a36Sopenharmony_ci} 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_ci/* 118262306a36Sopenharmony_ci * This is an internal helper only to be used in core memory hotplug code to 118362306a36Sopenharmony_ci * walk all dynamic memory groups excluding a given memory group, either 118462306a36Sopenharmony_ci * belonging to a specific node, or belonging to any node. 118562306a36Sopenharmony_ci */ 118662306a36Sopenharmony_ciint walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, 118762306a36Sopenharmony_ci struct memory_group *excluded, void *arg) 118862306a36Sopenharmony_ci{ 118962306a36Sopenharmony_ci struct memory_group *group; 119062306a36Sopenharmony_ci unsigned long index; 119162306a36Sopenharmony_ci int ret = 0; 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci xa_for_each_marked(&memory_groups, index, group, 119462306a36Sopenharmony_ci MEMORY_GROUP_MARK_DYNAMIC) { 119562306a36Sopenharmony_ci if (group == excluded) 119662306a36Sopenharmony_ci continue; 119762306a36Sopenharmony_ci#ifdef CONFIG_NUMA 119862306a36Sopenharmony_ci if (nid != NUMA_NO_NODE && group->nid != nid) 119962306a36Sopenharmony_ci continue; 120062306a36Sopenharmony_ci#endif /* CONFIG_NUMA */ 120162306a36Sopenharmony_ci ret = func(group, arg); 120262306a36Sopenharmony_ci if (ret) 120362306a36Sopenharmony_ci break; 120462306a36Sopenharmony_ci } 120562306a36Sopenharmony_ci return ret; 120662306a36Sopenharmony_ci} 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) 120962306a36Sopenharmony_civoid memblk_nr_poison_inc(unsigned long pfn) 121062306a36Sopenharmony_ci{ 121162306a36Sopenharmony_ci const unsigned long block_id = pfn_to_block_id(pfn); 121262306a36Sopenharmony_ci struct memory_block *mem = find_memory_block_by_id(block_id); 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci if (mem) 121562306a36Sopenharmony_ci atomic_long_inc(&mem->nr_hwpoison); 121662306a36Sopenharmony_ci} 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_civoid memblk_nr_poison_sub(unsigned long pfn, long i) 121962306a36Sopenharmony_ci{ 122062306a36Sopenharmony_ci const unsigned long block_id = pfn_to_block_id(pfn); 122162306a36Sopenharmony_ci struct memory_block *mem = find_memory_block_by_id(block_id); 122262306a36Sopenharmony_ci 122362306a36Sopenharmony_ci if (mem) 122462306a36Sopenharmony_ci atomic_long_sub(i, &mem->nr_hwpoison); 122562306a36Sopenharmony_ci} 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_cistatic unsigned long memblk_nr_poison(struct memory_block *mem) 122862306a36Sopenharmony_ci{ 122962306a36Sopenharmony_ci return atomic_long_read(&mem->nr_hwpoison); 123062306a36Sopenharmony_ci} 123162306a36Sopenharmony_ci#endif 1232