162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Memory subsystem support
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
662306a36Sopenharmony_ci *            Dave Hansen <haveblue@us.ibm.com>
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * This file provides the necessary infrastructure to represent
962306a36Sopenharmony_ci * a SPARSEMEM-memory-model system's physical memory in /sysfs.
1062306a36Sopenharmony_ci * All arch-independent code that assumes MEMORY_HOTPLUG requires
1162306a36Sopenharmony_ci * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
1262306a36Sopenharmony_ci */
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <linux/module.h>
1562306a36Sopenharmony_ci#include <linux/init.h>
1662306a36Sopenharmony_ci#include <linux/topology.h>
1762306a36Sopenharmony_ci#include <linux/capability.h>
1862306a36Sopenharmony_ci#include <linux/device.h>
1962306a36Sopenharmony_ci#include <linux/memory.h>
2062306a36Sopenharmony_ci#include <linux/memory_hotplug.h>
2162306a36Sopenharmony_ci#include <linux/mm.h>
2262306a36Sopenharmony_ci#include <linux/stat.h>
2362306a36Sopenharmony_ci#include <linux/slab.h>
2462306a36Sopenharmony_ci#include <linux/xarray.h>
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#include <linux/atomic.h>
2762306a36Sopenharmony_ci#include <linux/uaccess.h>
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci#define MEMORY_CLASS_NAME	"memory"
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_cistatic const char *const online_type_to_str[] = {
3262306a36Sopenharmony_ci	[MMOP_OFFLINE] = "offline",
3362306a36Sopenharmony_ci	[MMOP_ONLINE] = "online",
3462306a36Sopenharmony_ci	[MMOP_ONLINE_KERNEL] = "online_kernel",
3562306a36Sopenharmony_ci	[MMOP_ONLINE_MOVABLE] = "online_movable",
3662306a36Sopenharmony_ci};
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ciint mhp_online_type_from_str(const char *str)
3962306a36Sopenharmony_ci{
4062306a36Sopenharmony_ci	int i;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) {
4362306a36Sopenharmony_ci		if (sysfs_streq(str, online_type_to_str[i]))
4462306a36Sopenharmony_ci			return i;
4562306a36Sopenharmony_ci	}
4662306a36Sopenharmony_ci	return -EINVAL;
4762306a36Sopenharmony_ci}
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci#define to_memory_block(dev) container_of(dev, struct memory_block, dev)
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_cistatic int sections_per_block;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_cistatic inline unsigned long memory_block_id(unsigned long section_nr)
5462306a36Sopenharmony_ci{
5562306a36Sopenharmony_ci	return section_nr / sections_per_block;
5662306a36Sopenharmony_ci}
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_cistatic inline unsigned long pfn_to_block_id(unsigned long pfn)
5962306a36Sopenharmony_ci{
6062306a36Sopenharmony_ci	return memory_block_id(pfn_to_section_nr(pfn));
6162306a36Sopenharmony_ci}
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_cistatic inline unsigned long phys_to_block_id(unsigned long phys)
6462306a36Sopenharmony_ci{
6562306a36Sopenharmony_ci	return pfn_to_block_id(PFN_DOWN(phys));
6662306a36Sopenharmony_ci}
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_cistatic int memory_subsys_online(struct device *dev);
6962306a36Sopenharmony_cistatic int memory_subsys_offline(struct device *dev);
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_cistatic struct bus_type memory_subsys = {
7262306a36Sopenharmony_ci	.name = MEMORY_CLASS_NAME,
7362306a36Sopenharmony_ci	.dev_name = MEMORY_CLASS_NAME,
7462306a36Sopenharmony_ci	.online = memory_subsys_online,
7562306a36Sopenharmony_ci	.offline = memory_subsys_offline,
7662306a36Sopenharmony_ci};
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci/*
7962306a36Sopenharmony_ci * Memory blocks are cached in a local radix tree to avoid
8062306a36Sopenharmony_ci * a costly linear search for the corresponding device on
8162306a36Sopenharmony_ci * the subsystem bus.
8262306a36Sopenharmony_ci */
8362306a36Sopenharmony_cistatic DEFINE_XARRAY(memory_blocks);
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci/*
8662306a36Sopenharmony_ci * Memory groups, indexed by memory group id (mgid).
8762306a36Sopenharmony_ci */
8862306a36Sopenharmony_cistatic DEFINE_XARRAY_FLAGS(memory_groups, XA_FLAGS_ALLOC);
8962306a36Sopenharmony_ci#define MEMORY_GROUP_MARK_DYNAMIC	XA_MARK_1
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_cistatic BLOCKING_NOTIFIER_HEAD(memory_chain);
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ciint register_memory_notifier(struct notifier_block *nb)
9462306a36Sopenharmony_ci{
9562306a36Sopenharmony_ci	return blocking_notifier_chain_register(&memory_chain, nb);
9662306a36Sopenharmony_ci}
9762306a36Sopenharmony_ciEXPORT_SYMBOL(register_memory_notifier);
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_civoid unregister_memory_notifier(struct notifier_block *nb)
10062306a36Sopenharmony_ci{
10162306a36Sopenharmony_ci	blocking_notifier_chain_unregister(&memory_chain, nb);
10262306a36Sopenharmony_ci}
10362306a36Sopenharmony_ciEXPORT_SYMBOL(unregister_memory_notifier);
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_cistatic void memory_block_release(struct device *dev)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci	struct memory_block *mem = to_memory_block(dev);
10862306a36Sopenharmony_ci	/* Verify that the altmap is freed */
10962306a36Sopenharmony_ci	WARN_ON(mem->altmap);
11062306a36Sopenharmony_ci	kfree(mem);
11162306a36Sopenharmony_ci}
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ciunsigned long __weak memory_block_size_bytes(void)
11462306a36Sopenharmony_ci{
11562306a36Sopenharmony_ci	return MIN_MEMORY_BLOCK_SIZE;
11662306a36Sopenharmony_ci}
11762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(memory_block_size_bytes);
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci/* Show the memory block ID, relative to the memory block size */
12062306a36Sopenharmony_cistatic ssize_t phys_index_show(struct device *dev,
12162306a36Sopenharmony_ci			       struct device_attribute *attr, char *buf)
12262306a36Sopenharmony_ci{
12362306a36Sopenharmony_ci	struct memory_block *mem = to_memory_block(dev);
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	return sysfs_emit(buf, "%08lx\n", memory_block_id(mem->start_section_nr));
12662306a36Sopenharmony_ci}
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci/*
12962306a36Sopenharmony_ci * Legacy interface that we cannot remove. Always indicate "removable"
13062306a36Sopenharmony_ci * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
13162306a36Sopenharmony_ci */
13262306a36Sopenharmony_cistatic ssize_t removable_show(struct device *dev, struct device_attribute *attr,
13362306a36Sopenharmony_ci			      char *buf)
13462306a36Sopenharmony_ci{
13562306a36Sopenharmony_ci	return sysfs_emit(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE));
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci/*
13962306a36Sopenharmony_ci * online, offline, going offline, etc.
14062306a36Sopenharmony_ci */
14162306a36Sopenharmony_cistatic ssize_t state_show(struct device *dev, struct device_attribute *attr,
14262306a36Sopenharmony_ci			  char *buf)
14362306a36Sopenharmony_ci{
14462306a36Sopenharmony_ci	struct memory_block *mem = to_memory_block(dev);
14562306a36Sopenharmony_ci	const char *output;
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	/*
14862306a36Sopenharmony_ci	 * We can probably put these states in a nice little array
14962306a36Sopenharmony_ci	 * so that they're not open-coded
15062306a36Sopenharmony_ci	 */
15162306a36Sopenharmony_ci	switch (mem->state) {
15262306a36Sopenharmony_ci	case MEM_ONLINE:
15362306a36Sopenharmony_ci		output = "online";
15462306a36Sopenharmony_ci		break;
15562306a36Sopenharmony_ci	case MEM_OFFLINE:
15662306a36Sopenharmony_ci		output = "offline";
15762306a36Sopenharmony_ci		break;
15862306a36Sopenharmony_ci	case MEM_GOING_OFFLINE:
15962306a36Sopenharmony_ci		output = "going-offline";
16062306a36Sopenharmony_ci		break;
16162306a36Sopenharmony_ci	default:
16262306a36Sopenharmony_ci		WARN_ON(1);
16362306a36Sopenharmony_ci		return sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", mem->state);
16462306a36Sopenharmony_ci	}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	return sysfs_emit(buf, "%s\n", output);
16762306a36Sopenharmony_ci}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ciint memory_notify(unsigned long val, void *v)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	return blocking_notifier_call_chain(&memory_chain, val, v);
17262306a36Sopenharmony_ci}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
17562306a36Sopenharmony_cistatic unsigned long memblk_nr_poison(struct memory_block *mem);
17662306a36Sopenharmony_ci#else
17762306a36Sopenharmony_cistatic inline unsigned long memblk_nr_poison(struct memory_block *mem)
17862306a36Sopenharmony_ci{
17962306a36Sopenharmony_ci	return 0;
18062306a36Sopenharmony_ci}
18162306a36Sopenharmony_ci#endif
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci/*
18462306a36Sopenharmony_ci * Must acquire mem_hotplug_lock in write mode.
18562306a36Sopenharmony_ci */
18662306a36Sopenharmony_cistatic int memory_block_online(struct memory_block *mem)
18762306a36Sopenharmony_ci{
18862306a36Sopenharmony_ci	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
18962306a36Sopenharmony_ci	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
19062306a36Sopenharmony_ci	unsigned long nr_vmemmap_pages = 0;
19162306a36Sopenharmony_ci	struct zone *zone;
19262306a36Sopenharmony_ci	int ret;
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	if (memblk_nr_poison(mem))
19562306a36Sopenharmony_ci		return -EHWPOISON;
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
19862306a36Sopenharmony_ci				  start_pfn, nr_pages);
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	/*
20162306a36Sopenharmony_ci	 * Although vmemmap pages have a different lifecycle than the pages
20262306a36Sopenharmony_ci	 * they describe (they remain until the memory is unplugged), doing
20362306a36Sopenharmony_ci	 * their initialization and accounting at memory onlining/offlining
20462306a36Sopenharmony_ci	 * stage helps to keep accounting easier to follow - e.g vmemmaps
20562306a36Sopenharmony_ci	 * belong to the same zone as the memory they backed.
20662306a36Sopenharmony_ci	 */
20762306a36Sopenharmony_ci	if (mem->altmap)
20862306a36Sopenharmony_ci		nr_vmemmap_pages = mem->altmap->free;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	mem_hotplug_begin();
21162306a36Sopenharmony_ci	if (nr_vmemmap_pages) {
21262306a36Sopenharmony_ci		ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
21362306a36Sopenharmony_ci		if (ret)
21462306a36Sopenharmony_ci			goto out;
21562306a36Sopenharmony_ci	}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	ret = online_pages(start_pfn + nr_vmemmap_pages,
21862306a36Sopenharmony_ci			   nr_pages - nr_vmemmap_pages, zone, mem->group);
21962306a36Sopenharmony_ci	if (ret) {
22062306a36Sopenharmony_ci		if (nr_vmemmap_pages)
22162306a36Sopenharmony_ci			mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
22262306a36Sopenharmony_ci		goto out;
22362306a36Sopenharmony_ci	}
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	/*
22662306a36Sopenharmony_ci	 * Account once onlining succeeded. If the zone was unpopulated, it is
22762306a36Sopenharmony_ci	 * now already properly populated.
22862306a36Sopenharmony_ci	 */
22962306a36Sopenharmony_ci	if (nr_vmemmap_pages)
23062306a36Sopenharmony_ci		adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
23162306a36Sopenharmony_ci					  nr_vmemmap_pages);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	mem->zone = zone;
23462306a36Sopenharmony_ciout:
23562306a36Sopenharmony_ci	mem_hotplug_done();
23662306a36Sopenharmony_ci	return ret;
23762306a36Sopenharmony_ci}
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci/*
24062306a36Sopenharmony_ci * Must acquire mem_hotplug_lock in write mode.
24162306a36Sopenharmony_ci */
24262306a36Sopenharmony_cistatic int memory_block_offline(struct memory_block *mem)
24362306a36Sopenharmony_ci{
24462306a36Sopenharmony_ci	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
24562306a36Sopenharmony_ci	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
24662306a36Sopenharmony_ci	unsigned long nr_vmemmap_pages = 0;
24762306a36Sopenharmony_ci	int ret;
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	if (!mem->zone)
25062306a36Sopenharmony_ci		return -EINVAL;
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	/*
25362306a36Sopenharmony_ci	 * Unaccount before offlining, such that unpopulated zone and kthreads
25462306a36Sopenharmony_ci	 * can properly be torn down in offline_pages().
25562306a36Sopenharmony_ci	 */
25662306a36Sopenharmony_ci	if (mem->altmap)
25762306a36Sopenharmony_ci		nr_vmemmap_pages = mem->altmap->free;
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	mem_hotplug_begin();
26062306a36Sopenharmony_ci	if (nr_vmemmap_pages)
26162306a36Sopenharmony_ci		adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
26262306a36Sopenharmony_ci					  -nr_vmemmap_pages);
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	ret = offline_pages(start_pfn + nr_vmemmap_pages,
26562306a36Sopenharmony_ci			    nr_pages - nr_vmemmap_pages, mem->zone, mem->group);
26662306a36Sopenharmony_ci	if (ret) {
26762306a36Sopenharmony_ci		/* offline_pages() failed. Account back. */
26862306a36Sopenharmony_ci		if (nr_vmemmap_pages)
26962306a36Sopenharmony_ci			adjust_present_page_count(pfn_to_page(start_pfn),
27062306a36Sopenharmony_ci						  mem->group, nr_vmemmap_pages);
27162306a36Sopenharmony_ci		goto out;
27262306a36Sopenharmony_ci	}
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	if (nr_vmemmap_pages)
27562306a36Sopenharmony_ci		mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	mem->zone = NULL;
27862306a36Sopenharmony_ciout:
27962306a36Sopenharmony_ci	mem_hotplug_done();
28062306a36Sopenharmony_ci	return ret;
28162306a36Sopenharmony_ci}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci/*
28462306a36Sopenharmony_ci * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
28562306a36Sopenharmony_ci * OK to have direct references to sparsemem variables in here.
28662306a36Sopenharmony_ci */
28762306a36Sopenharmony_cistatic int
28862306a36Sopenharmony_cimemory_block_action(struct memory_block *mem, unsigned long action)
28962306a36Sopenharmony_ci{
29062306a36Sopenharmony_ci	int ret;
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	switch (action) {
29362306a36Sopenharmony_ci	case MEM_ONLINE:
29462306a36Sopenharmony_ci		ret = memory_block_online(mem);
29562306a36Sopenharmony_ci		break;
29662306a36Sopenharmony_ci	case MEM_OFFLINE:
29762306a36Sopenharmony_ci		ret = memory_block_offline(mem);
29862306a36Sopenharmony_ci		break;
29962306a36Sopenharmony_ci	default:
30062306a36Sopenharmony_ci		WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
30162306a36Sopenharmony_ci		     "%ld\n", __func__, mem->start_section_nr, action, action);
30262306a36Sopenharmony_ci		ret = -EINVAL;
30362306a36Sopenharmony_ci	}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	return ret;
30662306a36Sopenharmony_ci}
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_cistatic int memory_block_change_state(struct memory_block *mem,
30962306a36Sopenharmony_ci		unsigned long to_state, unsigned long from_state_req)
31062306a36Sopenharmony_ci{
31162306a36Sopenharmony_ci	int ret = 0;
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	if (mem->state != from_state_req)
31462306a36Sopenharmony_ci		return -EINVAL;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	if (to_state == MEM_OFFLINE)
31762306a36Sopenharmony_ci		mem->state = MEM_GOING_OFFLINE;
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	ret = memory_block_action(mem, to_state);
32062306a36Sopenharmony_ci	mem->state = ret ? from_state_req : to_state;
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	return ret;
32362306a36Sopenharmony_ci}
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci/* The device lock serializes operations on memory_subsys_[online|offline] */
32662306a36Sopenharmony_cistatic int memory_subsys_online(struct device *dev)
32762306a36Sopenharmony_ci{
32862306a36Sopenharmony_ci	struct memory_block *mem = to_memory_block(dev);
32962306a36Sopenharmony_ci	int ret;
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	if (mem->state == MEM_ONLINE)
33262306a36Sopenharmony_ci		return 0;
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	/*
33562306a36Sopenharmony_ci	 * When called via device_online() without configuring the online_type,
33662306a36Sopenharmony_ci	 * we want to default to MMOP_ONLINE.
33762306a36Sopenharmony_ci	 */
33862306a36Sopenharmony_ci	if (mem->online_type == MMOP_OFFLINE)
33962306a36Sopenharmony_ci		mem->online_type = MMOP_ONLINE;
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
34262306a36Sopenharmony_ci	mem->online_type = MMOP_OFFLINE;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	return ret;
34562306a36Sopenharmony_ci}
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_cistatic int memory_subsys_offline(struct device *dev)
34862306a36Sopenharmony_ci{
34962306a36Sopenharmony_ci	struct memory_block *mem = to_memory_block(dev);
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	if (mem->state == MEM_OFFLINE)
35262306a36Sopenharmony_ci		return 0;
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
35562306a36Sopenharmony_ci}
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_cistatic ssize_t state_store(struct device *dev, struct device_attribute *attr,
35862306a36Sopenharmony_ci			   const char *buf, size_t count)
35962306a36Sopenharmony_ci{
36062306a36Sopenharmony_ci	const int online_type = mhp_online_type_from_str(buf);
36162306a36Sopenharmony_ci	struct memory_block *mem = to_memory_block(dev);
36262306a36Sopenharmony_ci	int ret;
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	if (online_type < 0)
36562306a36Sopenharmony_ci		return -EINVAL;
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	ret = lock_device_hotplug_sysfs();
36862306a36Sopenharmony_ci	if (ret)
36962306a36Sopenharmony_ci		return ret;
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	switch (online_type) {
37262306a36Sopenharmony_ci	case MMOP_ONLINE_KERNEL:
37362306a36Sopenharmony_ci	case MMOP_ONLINE_MOVABLE:
37462306a36Sopenharmony_ci	case MMOP_ONLINE:
37562306a36Sopenharmony_ci		/* mem->online_type is protected by device_hotplug_lock */
37662306a36Sopenharmony_ci		mem->online_type = online_type;
37762306a36Sopenharmony_ci		ret = device_online(&mem->dev);
37862306a36Sopenharmony_ci		break;
37962306a36Sopenharmony_ci	case MMOP_OFFLINE:
38062306a36Sopenharmony_ci		ret = device_offline(&mem->dev);
38162306a36Sopenharmony_ci		break;
38262306a36Sopenharmony_ci	default:
38362306a36Sopenharmony_ci		ret = -EINVAL; /* should never happen */
38462306a36Sopenharmony_ci	}
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	unlock_device_hotplug();
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	if (ret < 0)
38962306a36Sopenharmony_ci		return ret;
39062306a36Sopenharmony_ci	if (ret)
39162306a36Sopenharmony_ci		return -EINVAL;
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	return count;
39462306a36Sopenharmony_ci}
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci/*
39762306a36Sopenharmony_ci * Legacy interface that we cannot remove: s390x exposes the storage increment
39862306a36Sopenharmony_ci * covered by a memory block, allowing for identifying which memory blocks
39962306a36Sopenharmony_ci * comprise a storage increment. Since a memory block spans complete
40062306a36Sopenharmony_ci * storage increments nowadays, this interface is basically unused. Other
40162306a36Sopenharmony_ci * archs never exposed != 0.
40262306a36Sopenharmony_ci */
40362306a36Sopenharmony_cistatic ssize_t phys_device_show(struct device *dev,
40462306a36Sopenharmony_ci				struct device_attribute *attr, char *buf)
40562306a36Sopenharmony_ci{
40662306a36Sopenharmony_ci	struct memory_block *mem = to_memory_block(dev);
40762306a36Sopenharmony_ci	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	return sysfs_emit(buf, "%d\n",
41062306a36Sopenharmony_ci			  arch_get_memory_phys_device(start_pfn));
41162306a36Sopenharmony_ci}
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTREMOVE
41462306a36Sopenharmony_cistatic int print_allowed_zone(char *buf, int len, int nid,
41562306a36Sopenharmony_ci			      struct memory_group *group,
41662306a36Sopenharmony_ci			      unsigned long start_pfn, unsigned long nr_pages,
41762306a36Sopenharmony_ci			      int online_type, struct zone *default_zone)
41862306a36Sopenharmony_ci{
41962306a36Sopenharmony_ci	struct zone *zone;
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	zone = zone_for_pfn_range(online_type, nid, group, start_pfn, nr_pages);
42262306a36Sopenharmony_ci	if (zone == default_zone)
42362306a36Sopenharmony_ci		return 0;
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci	return sysfs_emit_at(buf, len, " %s", zone->name);
42662306a36Sopenharmony_ci}
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_cistatic ssize_t valid_zones_show(struct device *dev,
42962306a36Sopenharmony_ci				struct device_attribute *attr, char *buf)
43062306a36Sopenharmony_ci{
43162306a36Sopenharmony_ci	struct memory_block *mem = to_memory_block(dev);
43262306a36Sopenharmony_ci	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
43362306a36Sopenharmony_ci	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
43462306a36Sopenharmony_ci	struct memory_group *group = mem->group;
43562306a36Sopenharmony_ci	struct zone *default_zone;
43662306a36Sopenharmony_ci	int nid = mem->nid;
43762306a36Sopenharmony_ci	int len = 0;
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	/*
44062306a36Sopenharmony_ci	 * Check the existing zone. Make sure that we do that only on the
44162306a36Sopenharmony_ci	 * online nodes otherwise the page_zone is not reliable
44262306a36Sopenharmony_ci	 */
44362306a36Sopenharmony_ci	if (mem->state == MEM_ONLINE) {
44462306a36Sopenharmony_ci		/*
44562306a36Sopenharmony_ci		 * If !mem->zone, the memory block spans multiple zones and
44662306a36Sopenharmony_ci		 * cannot get offlined.
44762306a36Sopenharmony_ci		 */
44862306a36Sopenharmony_ci		default_zone = mem->zone;
44962306a36Sopenharmony_ci		if (!default_zone)
45062306a36Sopenharmony_ci			return sysfs_emit(buf, "%s\n", "none");
45162306a36Sopenharmony_ci		len += sysfs_emit_at(buf, len, "%s", default_zone->name);
45262306a36Sopenharmony_ci		goto out;
45362306a36Sopenharmony_ci	}
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, group,
45662306a36Sopenharmony_ci					  start_pfn, nr_pages);
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	len += sysfs_emit_at(buf, len, "%s", default_zone->name);
45962306a36Sopenharmony_ci	len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
46062306a36Sopenharmony_ci				  MMOP_ONLINE_KERNEL, default_zone);
46162306a36Sopenharmony_ci	len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
46262306a36Sopenharmony_ci				  MMOP_ONLINE_MOVABLE, default_zone);
46362306a36Sopenharmony_ciout:
46462306a36Sopenharmony_ci	len += sysfs_emit_at(buf, len, "\n");
46562306a36Sopenharmony_ci	return len;
46662306a36Sopenharmony_ci}
46762306a36Sopenharmony_cistatic DEVICE_ATTR_RO(valid_zones);
46862306a36Sopenharmony_ci#endif
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_cistatic DEVICE_ATTR_RO(phys_index);
47162306a36Sopenharmony_cistatic DEVICE_ATTR_RW(state);
47262306a36Sopenharmony_cistatic DEVICE_ATTR_RO(phys_device);
47362306a36Sopenharmony_cistatic DEVICE_ATTR_RO(removable);
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci/*
47662306a36Sopenharmony_ci * Show the memory block size (shared by all memory blocks).
47762306a36Sopenharmony_ci */
47862306a36Sopenharmony_cistatic ssize_t block_size_bytes_show(struct device *dev,
47962306a36Sopenharmony_ci				     struct device_attribute *attr, char *buf)
48062306a36Sopenharmony_ci{
48162306a36Sopenharmony_ci	return sysfs_emit(buf, "%lx\n", memory_block_size_bytes());
48262306a36Sopenharmony_ci}
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_cistatic DEVICE_ATTR_RO(block_size_bytes);
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci/*
48762306a36Sopenharmony_ci * Memory auto online policy.
48862306a36Sopenharmony_ci */
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_cistatic ssize_t auto_online_blocks_show(struct device *dev,
49162306a36Sopenharmony_ci				       struct device_attribute *attr, char *buf)
49262306a36Sopenharmony_ci{
49362306a36Sopenharmony_ci	return sysfs_emit(buf, "%s\n",
49462306a36Sopenharmony_ci			  online_type_to_str[mhp_default_online_type]);
49562306a36Sopenharmony_ci}
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_cistatic ssize_t auto_online_blocks_store(struct device *dev,
49862306a36Sopenharmony_ci					struct device_attribute *attr,
49962306a36Sopenharmony_ci					const char *buf, size_t count)
50062306a36Sopenharmony_ci{
50162306a36Sopenharmony_ci	const int online_type = mhp_online_type_from_str(buf);
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_ci	if (online_type < 0)
50462306a36Sopenharmony_ci		return -EINVAL;
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	mhp_default_online_type = online_type;
50762306a36Sopenharmony_ci	return count;
50862306a36Sopenharmony_ci}
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_cistatic DEVICE_ATTR_RW(auto_online_blocks);
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci#ifdef CONFIG_CRASH_HOTPLUG
51362306a36Sopenharmony_ci#include <linux/kexec.h>
51462306a36Sopenharmony_cistatic ssize_t crash_hotplug_show(struct device *dev,
51562306a36Sopenharmony_ci				       struct device_attribute *attr, char *buf)
51662306a36Sopenharmony_ci{
51762306a36Sopenharmony_ci	return sysfs_emit(buf, "%d\n", crash_hotplug_memory_support());
51862306a36Sopenharmony_ci}
51962306a36Sopenharmony_cistatic DEVICE_ATTR_RO(crash_hotplug);
52062306a36Sopenharmony_ci#endif
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci/*
52362306a36Sopenharmony_ci * Some architectures will have custom drivers to do this, and
52462306a36Sopenharmony_ci * will not need to do it from userspace.  The fake hot-add code
52562306a36Sopenharmony_ci * as well as ppc64 will do all of their discovery in userspace
52662306a36Sopenharmony_ci * and will require this interface.
52762306a36Sopenharmony_ci */
52862306a36Sopenharmony_ci#ifdef CONFIG_ARCH_MEMORY_PROBE
52962306a36Sopenharmony_cistatic ssize_t probe_store(struct device *dev, struct device_attribute *attr,
53062306a36Sopenharmony_ci			   const char *buf, size_t count)
53162306a36Sopenharmony_ci{
53262306a36Sopenharmony_ci	u64 phys_addr;
53362306a36Sopenharmony_ci	int nid, ret;
53462306a36Sopenharmony_ci	unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci	ret = kstrtoull(buf, 0, &phys_addr);
53762306a36Sopenharmony_ci	if (ret)
53862306a36Sopenharmony_ci		return ret;
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
54162306a36Sopenharmony_ci		return -EINVAL;
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci	ret = lock_device_hotplug_sysfs();
54462306a36Sopenharmony_ci	if (ret)
54562306a36Sopenharmony_ci		return ret;
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci	nid = memory_add_physaddr_to_nid(phys_addr);
54862306a36Sopenharmony_ci	ret = __add_memory(nid, phys_addr,
54962306a36Sopenharmony_ci			   MIN_MEMORY_BLOCK_SIZE * sections_per_block,
55062306a36Sopenharmony_ci			   MHP_NONE);
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci	if (ret)
55362306a36Sopenharmony_ci		goto out;
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci	ret = count;
55662306a36Sopenharmony_ciout:
55762306a36Sopenharmony_ci	unlock_device_hotplug();
55862306a36Sopenharmony_ci	return ret;
55962306a36Sopenharmony_ci}
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_cistatic DEVICE_ATTR_WO(probe);
56262306a36Sopenharmony_ci#endif
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_FAILURE
56562306a36Sopenharmony_ci/*
56662306a36Sopenharmony_ci * Support for offlining pages of memory
56762306a36Sopenharmony_ci */
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci/* Soft offline a page */
57062306a36Sopenharmony_cistatic ssize_t soft_offline_page_store(struct device *dev,
57162306a36Sopenharmony_ci				       struct device_attribute *attr,
57262306a36Sopenharmony_ci				       const char *buf, size_t count)
57362306a36Sopenharmony_ci{
57462306a36Sopenharmony_ci	int ret;
57562306a36Sopenharmony_ci	u64 pfn;
57662306a36Sopenharmony_ci	if (!capable(CAP_SYS_ADMIN))
57762306a36Sopenharmony_ci		return -EPERM;
57862306a36Sopenharmony_ci	if (kstrtoull(buf, 0, &pfn) < 0)
57962306a36Sopenharmony_ci		return -EINVAL;
58062306a36Sopenharmony_ci	pfn >>= PAGE_SHIFT;
58162306a36Sopenharmony_ci	ret = soft_offline_page(pfn, 0);
58262306a36Sopenharmony_ci	return ret == 0 ? count : ret;
58362306a36Sopenharmony_ci}
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci/* Forcibly offline a page, including killing processes. */
58662306a36Sopenharmony_cistatic ssize_t hard_offline_page_store(struct device *dev,
58762306a36Sopenharmony_ci				       struct device_attribute *attr,
58862306a36Sopenharmony_ci				       const char *buf, size_t count)
58962306a36Sopenharmony_ci{
59062306a36Sopenharmony_ci	int ret;
59162306a36Sopenharmony_ci	u64 pfn;
59262306a36Sopenharmony_ci	if (!capable(CAP_SYS_ADMIN))
59362306a36Sopenharmony_ci		return -EPERM;
59462306a36Sopenharmony_ci	if (kstrtoull(buf, 0, &pfn) < 0)
59562306a36Sopenharmony_ci		return -EINVAL;
59662306a36Sopenharmony_ci	pfn >>= PAGE_SHIFT;
59762306a36Sopenharmony_ci	ret = memory_failure(pfn, MF_SW_SIMULATED);
59862306a36Sopenharmony_ci	if (ret == -EOPNOTSUPP)
59962306a36Sopenharmony_ci		ret = 0;
60062306a36Sopenharmony_ci	return ret ? ret : count;
60162306a36Sopenharmony_ci}
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_cistatic DEVICE_ATTR_WO(soft_offline_page);
60462306a36Sopenharmony_cistatic DEVICE_ATTR_WO(hard_offline_page);
60562306a36Sopenharmony_ci#endif
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci/* See phys_device_show(). */
60862306a36Sopenharmony_ciint __weak arch_get_memory_phys_device(unsigned long start_pfn)
60962306a36Sopenharmony_ci{
61062306a36Sopenharmony_ci	return 0;
61162306a36Sopenharmony_ci}
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci/*
61462306a36Sopenharmony_ci * A reference for the returned memory block device is acquired.
61562306a36Sopenharmony_ci *
61662306a36Sopenharmony_ci * Called under device_hotplug_lock.
61762306a36Sopenharmony_ci */
61862306a36Sopenharmony_cistatic struct memory_block *find_memory_block_by_id(unsigned long block_id)
61962306a36Sopenharmony_ci{
62062306a36Sopenharmony_ci	struct memory_block *mem;
62162306a36Sopenharmony_ci
62262306a36Sopenharmony_ci	mem = xa_load(&memory_blocks, block_id);
62362306a36Sopenharmony_ci	if (mem)
62462306a36Sopenharmony_ci		get_device(&mem->dev);
62562306a36Sopenharmony_ci	return mem;
62662306a36Sopenharmony_ci}
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci/*
62962306a36Sopenharmony_ci * Called under device_hotplug_lock.
63062306a36Sopenharmony_ci */
63162306a36Sopenharmony_cistruct memory_block *find_memory_block(unsigned long section_nr)
63262306a36Sopenharmony_ci{
63362306a36Sopenharmony_ci	unsigned long block_id = memory_block_id(section_nr);
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci	return find_memory_block_by_id(block_id);
63662306a36Sopenharmony_ci}
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_cistatic struct attribute *memory_memblk_attrs[] = {
63962306a36Sopenharmony_ci	&dev_attr_phys_index.attr,
64062306a36Sopenharmony_ci	&dev_attr_state.attr,
64162306a36Sopenharmony_ci	&dev_attr_phys_device.attr,
64262306a36Sopenharmony_ci	&dev_attr_removable.attr,
64362306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTREMOVE
64462306a36Sopenharmony_ci	&dev_attr_valid_zones.attr,
64562306a36Sopenharmony_ci#endif
64662306a36Sopenharmony_ci	NULL
64762306a36Sopenharmony_ci};
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_cistatic const struct attribute_group memory_memblk_attr_group = {
65062306a36Sopenharmony_ci	.attrs = memory_memblk_attrs,
65162306a36Sopenharmony_ci};
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_cistatic const struct attribute_group *memory_memblk_attr_groups[] = {
65462306a36Sopenharmony_ci	&memory_memblk_attr_group,
65562306a36Sopenharmony_ci	NULL,
65662306a36Sopenharmony_ci};
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_cistatic int __add_memory_block(struct memory_block *memory)
65962306a36Sopenharmony_ci{
66062306a36Sopenharmony_ci	int ret;
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci	memory->dev.bus = &memory_subsys;
66362306a36Sopenharmony_ci	memory->dev.id = memory->start_section_nr / sections_per_block;
66462306a36Sopenharmony_ci	memory->dev.release = memory_block_release;
66562306a36Sopenharmony_ci	memory->dev.groups = memory_memblk_attr_groups;
66662306a36Sopenharmony_ci	memory->dev.offline = memory->state == MEM_OFFLINE;
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci	ret = device_register(&memory->dev);
66962306a36Sopenharmony_ci	if (ret) {
67062306a36Sopenharmony_ci		put_device(&memory->dev);
67162306a36Sopenharmony_ci		return ret;
67262306a36Sopenharmony_ci	}
67362306a36Sopenharmony_ci	ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory,
67462306a36Sopenharmony_ci			      GFP_KERNEL));
67562306a36Sopenharmony_ci	if (ret)
67662306a36Sopenharmony_ci		device_unregister(&memory->dev);
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	return ret;
67962306a36Sopenharmony_ci}
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_cistatic struct zone *early_node_zone_for_memory_block(struct memory_block *mem,
68262306a36Sopenharmony_ci						     int nid)
68362306a36Sopenharmony_ci{
68462306a36Sopenharmony_ci	const unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
68562306a36Sopenharmony_ci	const unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
68662306a36Sopenharmony_ci	struct zone *zone, *matching_zone = NULL;
68762306a36Sopenharmony_ci	pg_data_t *pgdat = NODE_DATA(nid);
68862306a36Sopenharmony_ci	int i;
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_ci	/*
69162306a36Sopenharmony_ci	 * This logic only works for early memory, when the applicable zones
69262306a36Sopenharmony_ci	 * already span the memory block. We don't expect overlapping zones on
69362306a36Sopenharmony_ci	 * a single node for early memory. So if we're told that some PFNs
69462306a36Sopenharmony_ci	 * of a node fall into this memory block, we can assume that all node
69562306a36Sopenharmony_ci	 * zones that intersect with the memory block are actually applicable.
69662306a36Sopenharmony_ci	 * No need to look at the memmap.
69762306a36Sopenharmony_ci	 */
69862306a36Sopenharmony_ci	for (i = 0; i < MAX_NR_ZONES; i++) {
69962306a36Sopenharmony_ci		zone = pgdat->node_zones + i;
70062306a36Sopenharmony_ci		if (!populated_zone(zone))
70162306a36Sopenharmony_ci			continue;
70262306a36Sopenharmony_ci		if (!zone_intersects(zone, start_pfn, nr_pages))
70362306a36Sopenharmony_ci			continue;
70462306a36Sopenharmony_ci		if (!matching_zone) {
70562306a36Sopenharmony_ci			matching_zone = zone;
70662306a36Sopenharmony_ci			continue;
70762306a36Sopenharmony_ci		}
70862306a36Sopenharmony_ci		/* Spans multiple zones ... */
70962306a36Sopenharmony_ci		matching_zone = NULL;
71062306a36Sopenharmony_ci		break;
71162306a36Sopenharmony_ci	}
71262306a36Sopenharmony_ci	return matching_zone;
71362306a36Sopenharmony_ci}
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci#ifdef CONFIG_NUMA
71662306a36Sopenharmony_ci/**
71762306a36Sopenharmony_ci * memory_block_add_nid() - Indicate that system RAM falling into this memory
71862306a36Sopenharmony_ci *			    block device (partially) belongs to the given node.
71962306a36Sopenharmony_ci * @mem: The memory block device.
72062306a36Sopenharmony_ci * @nid: The node id.
72162306a36Sopenharmony_ci * @context: The memory initialization context.
72262306a36Sopenharmony_ci *
72362306a36Sopenharmony_ci * Indicate that system RAM falling into this memory block (partially) belongs
72462306a36Sopenharmony_ci * to the given node. If the context indicates ("early") that we are adding the
72562306a36Sopenharmony_ci * node during node device subsystem initialization, this will also properly
72662306a36Sopenharmony_ci * set/adjust mem->zone based on the zone ranges of the given node.
72762306a36Sopenharmony_ci */
72862306a36Sopenharmony_civoid memory_block_add_nid(struct memory_block *mem, int nid,
72962306a36Sopenharmony_ci			  enum meminit_context context)
73062306a36Sopenharmony_ci{
73162306a36Sopenharmony_ci	if (context == MEMINIT_EARLY && mem->nid != nid) {
73262306a36Sopenharmony_ci		/*
73362306a36Sopenharmony_ci		 * For early memory we have to determine the zone when setting
73462306a36Sopenharmony_ci		 * the node id and handle multiple nodes spanning a single
73562306a36Sopenharmony_ci		 * memory block by indicate via zone == NULL that we're not
73662306a36Sopenharmony_ci		 * dealing with a single zone. So if we're setting the node id
73762306a36Sopenharmony_ci		 * the first time, determine if there is a single zone. If we're
73862306a36Sopenharmony_ci		 * setting the node id a second time to a different node,
73962306a36Sopenharmony_ci		 * invalidate the single detected zone.
74062306a36Sopenharmony_ci		 */
74162306a36Sopenharmony_ci		if (mem->nid == NUMA_NO_NODE)
74262306a36Sopenharmony_ci			mem->zone = early_node_zone_for_memory_block(mem, nid);
74362306a36Sopenharmony_ci		else
74462306a36Sopenharmony_ci			mem->zone = NULL;
74562306a36Sopenharmony_ci	}
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci	/*
74862306a36Sopenharmony_ci	 * If this memory block spans multiple nodes, we only indicate
74962306a36Sopenharmony_ci	 * the last processed node. If we span multiple nodes (not applicable
75062306a36Sopenharmony_ci	 * to hotplugged memory), zone == NULL will prohibit memory offlining
75162306a36Sopenharmony_ci	 * and consequently unplug.
75262306a36Sopenharmony_ci	 */
75362306a36Sopenharmony_ci	mem->nid = nid;
75462306a36Sopenharmony_ci}
75562306a36Sopenharmony_ci#endif
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_cistatic int add_memory_block(unsigned long block_id, unsigned long state,
75862306a36Sopenharmony_ci			    struct vmem_altmap *altmap,
75962306a36Sopenharmony_ci			    struct memory_group *group)
76062306a36Sopenharmony_ci{
76162306a36Sopenharmony_ci	struct memory_block *mem;
76262306a36Sopenharmony_ci	int ret = 0;
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci	mem = find_memory_block_by_id(block_id);
76562306a36Sopenharmony_ci	if (mem) {
76662306a36Sopenharmony_ci		put_device(&mem->dev);
76762306a36Sopenharmony_ci		return -EEXIST;
76862306a36Sopenharmony_ci	}
76962306a36Sopenharmony_ci	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
77062306a36Sopenharmony_ci	if (!mem)
77162306a36Sopenharmony_ci		return -ENOMEM;
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci	mem->start_section_nr = block_id * sections_per_block;
77462306a36Sopenharmony_ci	mem->state = state;
77562306a36Sopenharmony_ci	mem->nid = NUMA_NO_NODE;
77662306a36Sopenharmony_ci	mem->altmap = altmap;
77762306a36Sopenharmony_ci	INIT_LIST_HEAD(&mem->group_next);
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci#ifndef CONFIG_NUMA
78062306a36Sopenharmony_ci	if (state == MEM_ONLINE)
78162306a36Sopenharmony_ci		/*
78262306a36Sopenharmony_ci		 * MEM_ONLINE at this point implies early memory. With NUMA,
78362306a36Sopenharmony_ci		 * we'll determine the zone when setting the node id via
78462306a36Sopenharmony_ci		 * memory_block_add_nid(). Memory hotplug updated the zone
78562306a36Sopenharmony_ci		 * manually when memory onlining/offlining succeeds.
78662306a36Sopenharmony_ci		 */
78762306a36Sopenharmony_ci		mem->zone = early_node_zone_for_memory_block(mem, NUMA_NO_NODE);
78862306a36Sopenharmony_ci#endif /* CONFIG_NUMA */
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ci	ret = __add_memory_block(mem);
79162306a36Sopenharmony_ci	if (ret)
79262306a36Sopenharmony_ci		return ret;
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	if (group) {
79562306a36Sopenharmony_ci		mem->group = group;
79662306a36Sopenharmony_ci		list_add(&mem->group_next, &group->memory_blocks);
79762306a36Sopenharmony_ci	}
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci	return 0;
80062306a36Sopenharmony_ci}
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_cistatic int __init add_boot_memory_block(unsigned long base_section_nr)
80362306a36Sopenharmony_ci{
80462306a36Sopenharmony_ci	int section_count = 0;
80562306a36Sopenharmony_ci	unsigned long nr;
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci	for (nr = base_section_nr; nr < base_section_nr + sections_per_block;
80862306a36Sopenharmony_ci	     nr++)
80962306a36Sopenharmony_ci		if (present_section_nr(nr))
81062306a36Sopenharmony_ci			section_count++;
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_ci	if (section_count == 0)
81362306a36Sopenharmony_ci		return 0;
81462306a36Sopenharmony_ci	return add_memory_block(memory_block_id(base_section_nr),
81562306a36Sopenharmony_ci				MEM_ONLINE, NULL,  NULL);
81662306a36Sopenharmony_ci}
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_cistatic int add_hotplug_memory_block(unsigned long block_id,
81962306a36Sopenharmony_ci				    struct vmem_altmap *altmap,
82062306a36Sopenharmony_ci				    struct memory_group *group)
82162306a36Sopenharmony_ci{
82262306a36Sopenharmony_ci	return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
82362306a36Sopenharmony_ci}
82462306a36Sopenharmony_ci
82562306a36Sopenharmony_cistatic void remove_memory_block(struct memory_block *memory)
82662306a36Sopenharmony_ci{
82762306a36Sopenharmony_ci	if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
82862306a36Sopenharmony_ci		return;
82962306a36Sopenharmony_ci
83062306a36Sopenharmony_ci	WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL);
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci	if (memory->group) {
83362306a36Sopenharmony_ci		list_del(&memory->group_next);
83462306a36Sopenharmony_ci		memory->group = NULL;
83562306a36Sopenharmony_ci	}
83662306a36Sopenharmony_ci
83762306a36Sopenharmony_ci	/* drop the ref. we got via find_memory_block() */
83862306a36Sopenharmony_ci	put_device(&memory->dev);
83962306a36Sopenharmony_ci	device_unregister(&memory->dev);
84062306a36Sopenharmony_ci}
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_ci/*
84362306a36Sopenharmony_ci * Create memory block devices for the given memory area. Start and size
84462306a36Sopenharmony_ci * have to be aligned to memory block granularity. Memory block devices
84562306a36Sopenharmony_ci * will be initialized as offline.
84662306a36Sopenharmony_ci *
84762306a36Sopenharmony_ci * Called under device_hotplug_lock.
84862306a36Sopenharmony_ci */
84962306a36Sopenharmony_ciint create_memory_block_devices(unsigned long start, unsigned long size,
85062306a36Sopenharmony_ci				struct vmem_altmap *altmap,
85162306a36Sopenharmony_ci				struct memory_group *group)
85262306a36Sopenharmony_ci{
85362306a36Sopenharmony_ci	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
85462306a36Sopenharmony_ci	unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
85562306a36Sopenharmony_ci	struct memory_block *mem;
85662306a36Sopenharmony_ci	unsigned long block_id;
85762306a36Sopenharmony_ci	int ret = 0;
85862306a36Sopenharmony_ci
85962306a36Sopenharmony_ci	if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
86062306a36Sopenharmony_ci			 !IS_ALIGNED(size, memory_block_size_bytes())))
86162306a36Sopenharmony_ci		return -EINVAL;
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
86462306a36Sopenharmony_ci		ret = add_hotplug_memory_block(block_id, altmap, group);
86562306a36Sopenharmony_ci		if (ret)
86662306a36Sopenharmony_ci			break;
86762306a36Sopenharmony_ci	}
86862306a36Sopenharmony_ci	if (ret) {
86962306a36Sopenharmony_ci		end_block_id = block_id;
87062306a36Sopenharmony_ci		for (block_id = start_block_id; block_id != end_block_id;
87162306a36Sopenharmony_ci		     block_id++) {
87262306a36Sopenharmony_ci			mem = find_memory_block_by_id(block_id);
87362306a36Sopenharmony_ci			if (WARN_ON_ONCE(!mem))
87462306a36Sopenharmony_ci				continue;
87562306a36Sopenharmony_ci			remove_memory_block(mem);
87662306a36Sopenharmony_ci		}
87762306a36Sopenharmony_ci	}
87862306a36Sopenharmony_ci	return ret;
87962306a36Sopenharmony_ci}
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci/*
88262306a36Sopenharmony_ci * Remove memory block devices for the given memory area. Start and size
88362306a36Sopenharmony_ci * have to be aligned to memory block granularity. Memory block devices
88462306a36Sopenharmony_ci * have to be offline.
88562306a36Sopenharmony_ci *
88662306a36Sopenharmony_ci * Called under device_hotplug_lock.
88762306a36Sopenharmony_ci */
88862306a36Sopenharmony_civoid remove_memory_block_devices(unsigned long start, unsigned long size)
88962306a36Sopenharmony_ci{
89062306a36Sopenharmony_ci	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
89162306a36Sopenharmony_ci	const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
89262306a36Sopenharmony_ci	struct memory_block *mem;
89362306a36Sopenharmony_ci	unsigned long block_id;
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_ci	if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
89662306a36Sopenharmony_ci			 !IS_ALIGNED(size, memory_block_size_bytes())))
89762306a36Sopenharmony_ci		return;
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
90062306a36Sopenharmony_ci		mem = find_memory_block_by_id(block_id);
90162306a36Sopenharmony_ci		if (WARN_ON_ONCE(!mem))
90262306a36Sopenharmony_ci			continue;
90362306a36Sopenharmony_ci		num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem));
90462306a36Sopenharmony_ci		unregister_memory_block_under_nodes(mem);
90562306a36Sopenharmony_ci		remove_memory_block(mem);
90662306a36Sopenharmony_ci	}
90762306a36Sopenharmony_ci}
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_cistatic struct attribute *memory_root_attrs[] = {
91062306a36Sopenharmony_ci#ifdef CONFIG_ARCH_MEMORY_PROBE
91162306a36Sopenharmony_ci	&dev_attr_probe.attr,
91262306a36Sopenharmony_ci#endif
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_FAILURE
91562306a36Sopenharmony_ci	&dev_attr_soft_offline_page.attr,
91662306a36Sopenharmony_ci	&dev_attr_hard_offline_page.attr,
91762306a36Sopenharmony_ci#endif
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_ci	&dev_attr_block_size_bytes.attr,
92062306a36Sopenharmony_ci	&dev_attr_auto_online_blocks.attr,
92162306a36Sopenharmony_ci#ifdef CONFIG_CRASH_HOTPLUG
92262306a36Sopenharmony_ci	&dev_attr_crash_hotplug.attr,
92362306a36Sopenharmony_ci#endif
92462306a36Sopenharmony_ci	NULL
92562306a36Sopenharmony_ci};
92662306a36Sopenharmony_ci
92762306a36Sopenharmony_cistatic const struct attribute_group memory_root_attr_group = {
92862306a36Sopenharmony_ci	.attrs = memory_root_attrs,
92962306a36Sopenharmony_ci};
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_cistatic const struct attribute_group *memory_root_attr_groups[] = {
93262306a36Sopenharmony_ci	&memory_root_attr_group,
93362306a36Sopenharmony_ci	NULL,
93462306a36Sopenharmony_ci};
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci/*
93762306a36Sopenharmony_ci * Initialize the sysfs support for memory devices. At the time this function
93862306a36Sopenharmony_ci * is called, we cannot have concurrent creation/deletion of memory block
93962306a36Sopenharmony_ci * devices, the device_hotplug_lock is not needed.
94062306a36Sopenharmony_ci */
94162306a36Sopenharmony_civoid __init memory_dev_init(void)
94262306a36Sopenharmony_ci{
94362306a36Sopenharmony_ci	int ret;
94462306a36Sopenharmony_ci	unsigned long block_sz, nr;
94562306a36Sopenharmony_ci
94662306a36Sopenharmony_ci	/* Validate the configured memory block size */
94762306a36Sopenharmony_ci	block_sz = memory_block_size_bytes();
94862306a36Sopenharmony_ci	if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
94962306a36Sopenharmony_ci		panic("Memory block size not suitable: 0x%lx\n", block_sz);
95062306a36Sopenharmony_ci	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
95362306a36Sopenharmony_ci	if (ret)
95462306a36Sopenharmony_ci		panic("%s() failed to register subsystem: %d\n", __func__, ret);
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci	/*
95762306a36Sopenharmony_ci	 * Create entries for memory sections that were found
95862306a36Sopenharmony_ci	 * during boot and have been initialized
95962306a36Sopenharmony_ci	 */
96062306a36Sopenharmony_ci	for (nr = 0; nr <= __highest_present_section_nr;
96162306a36Sopenharmony_ci	     nr += sections_per_block) {
96262306a36Sopenharmony_ci		ret = add_boot_memory_block(nr);
96362306a36Sopenharmony_ci		if (ret)
96462306a36Sopenharmony_ci			panic("%s() failed to add memory block: %d\n", __func__,
96562306a36Sopenharmony_ci			      ret);
96662306a36Sopenharmony_ci	}
96762306a36Sopenharmony_ci}
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_ci/**
97062306a36Sopenharmony_ci * walk_memory_blocks - walk through all present memory blocks overlapped
97162306a36Sopenharmony_ci *			by the range [start, start + size)
97262306a36Sopenharmony_ci *
97362306a36Sopenharmony_ci * @start: start address of the memory range
97462306a36Sopenharmony_ci * @size: size of the memory range
97562306a36Sopenharmony_ci * @arg: argument passed to func
97662306a36Sopenharmony_ci * @func: callback for each memory section walked
97762306a36Sopenharmony_ci *
97862306a36Sopenharmony_ci * This function walks through all present memory blocks overlapped by the
97962306a36Sopenharmony_ci * range [start, start + size), calling func on each memory block.
98062306a36Sopenharmony_ci *
98162306a36Sopenharmony_ci * In case func() returns an error, walking is aborted and the error is
98262306a36Sopenharmony_ci * returned.
98362306a36Sopenharmony_ci *
98462306a36Sopenharmony_ci * Called under device_hotplug_lock.
98562306a36Sopenharmony_ci */
98662306a36Sopenharmony_ciint walk_memory_blocks(unsigned long start, unsigned long size,
98762306a36Sopenharmony_ci		       void *arg, walk_memory_blocks_func_t func)
98862306a36Sopenharmony_ci{
98962306a36Sopenharmony_ci	const unsigned long start_block_id = phys_to_block_id(start);
99062306a36Sopenharmony_ci	const unsigned long end_block_id = phys_to_block_id(start + size - 1);
99162306a36Sopenharmony_ci	struct memory_block *mem;
99262306a36Sopenharmony_ci	unsigned long block_id;
99362306a36Sopenharmony_ci	int ret = 0;
99462306a36Sopenharmony_ci
99562306a36Sopenharmony_ci	if (!size)
99662306a36Sopenharmony_ci		return 0;
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci	for (block_id = start_block_id; block_id <= end_block_id; block_id++) {
99962306a36Sopenharmony_ci		mem = find_memory_block_by_id(block_id);
100062306a36Sopenharmony_ci		if (!mem)
100162306a36Sopenharmony_ci			continue;
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci		ret = func(mem, arg);
100462306a36Sopenharmony_ci		put_device(&mem->dev);
100562306a36Sopenharmony_ci		if (ret)
100662306a36Sopenharmony_ci			break;
100762306a36Sopenharmony_ci	}
100862306a36Sopenharmony_ci	return ret;
100962306a36Sopenharmony_ci}
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_cistruct for_each_memory_block_cb_data {
101262306a36Sopenharmony_ci	walk_memory_blocks_func_t func;
101362306a36Sopenharmony_ci	void *arg;
101462306a36Sopenharmony_ci};
101562306a36Sopenharmony_ci
101662306a36Sopenharmony_cistatic int for_each_memory_block_cb(struct device *dev, void *data)
101762306a36Sopenharmony_ci{
101862306a36Sopenharmony_ci	struct memory_block *mem = to_memory_block(dev);
101962306a36Sopenharmony_ci	struct for_each_memory_block_cb_data *cb_data = data;
102062306a36Sopenharmony_ci
102162306a36Sopenharmony_ci	return cb_data->func(mem, cb_data->arg);
102262306a36Sopenharmony_ci}
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci/**
102562306a36Sopenharmony_ci * for_each_memory_block - walk through all present memory blocks
102662306a36Sopenharmony_ci *
102762306a36Sopenharmony_ci * @arg: argument passed to func
102862306a36Sopenharmony_ci * @func: callback for each memory block walked
102962306a36Sopenharmony_ci *
103062306a36Sopenharmony_ci * This function walks through all present memory blocks, calling func on
103162306a36Sopenharmony_ci * each memory block.
103262306a36Sopenharmony_ci *
103362306a36Sopenharmony_ci * In case func() returns an error, walking is aborted and the error is
103462306a36Sopenharmony_ci * returned.
103562306a36Sopenharmony_ci */
103662306a36Sopenharmony_ciint for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
103762306a36Sopenharmony_ci{
103862306a36Sopenharmony_ci	struct for_each_memory_block_cb_data cb_data = {
103962306a36Sopenharmony_ci		.func = func,
104062306a36Sopenharmony_ci		.arg = arg,
104162306a36Sopenharmony_ci	};
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_ci	return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
104462306a36Sopenharmony_ci				for_each_memory_block_cb);
104562306a36Sopenharmony_ci}
104662306a36Sopenharmony_ci
104762306a36Sopenharmony_ci/*
104862306a36Sopenharmony_ci * This is an internal helper to unify allocation and initialization of
104962306a36Sopenharmony_ci * memory groups. Note that the passed memory group will be copied to a
105062306a36Sopenharmony_ci * dynamically allocated memory group. After this call, the passed
105162306a36Sopenharmony_ci * memory group should no longer be used.
105262306a36Sopenharmony_ci */
105362306a36Sopenharmony_cistatic int memory_group_register(struct memory_group group)
105462306a36Sopenharmony_ci{
105562306a36Sopenharmony_ci	struct memory_group *new_group;
105662306a36Sopenharmony_ci	uint32_t mgid;
105762306a36Sopenharmony_ci	int ret;
105862306a36Sopenharmony_ci
105962306a36Sopenharmony_ci	if (!node_possible(group.nid))
106062306a36Sopenharmony_ci		return -EINVAL;
106162306a36Sopenharmony_ci
106262306a36Sopenharmony_ci	new_group = kzalloc(sizeof(group), GFP_KERNEL);
106362306a36Sopenharmony_ci	if (!new_group)
106462306a36Sopenharmony_ci		return -ENOMEM;
106562306a36Sopenharmony_ci	*new_group = group;
106662306a36Sopenharmony_ci	INIT_LIST_HEAD(&new_group->memory_blocks);
106762306a36Sopenharmony_ci
106862306a36Sopenharmony_ci	ret = xa_alloc(&memory_groups, &mgid, new_group, xa_limit_31b,
106962306a36Sopenharmony_ci		       GFP_KERNEL);
107062306a36Sopenharmony_ci	if (ret) {
107162306a36Sopenharmony_ci		kfree(new_group);
107262306a36Sopenharmony_ci		return ret;
107362306a36Sopenharmony_ci	} else if (group.is_dynamic) {
107462306a36Sopenharmony_ci		xa_set_mark(&memory_groups, mgid, MEMORY_GROUP_MARK_DYNAMIC);
107562306a36Sopenharmony_ci	}
107662306a36Sopenharmony_ci	return mgid;
107762306a36Sopenharmony_ci}
107862306a36Sopenharmony_ci
107962306a36Sopenharmony_ci/**
108062306a36Sopenharmony_ci * memory_group_register_static() - Register a static memory group.
108162306a36Sopenharmony_ci * @nid: The node id.
108262306a36Sopenharmony_ci * @max_pages: The maximum number of pages we'll have in this static memory
108362306a36Sopenharmony_ci *	       group.
108462306a36Sopenharmony_ci *
108562306a36Sopenharmony_ci * Register a new static memory group and return the memory group id.
108662306a36Sopenharmony_ci * All memory in the group belongs to a single unit, such as a DIMM. All
108762306a36Sopenharmony_ci * memory belonging to a static memory group is added in one go to be removed
108862306a36Sopenharmony_ci * in one go -- it's static.
108962306a36Sopenharmony_ci *
109062306a36Sopenharmony_ci * Returns an error if out of memory, if the node id is invalid, if no new
109162306a36Sopenharmony_ci * memory groups can be registered, or if max_pages is invalid (0). Otherwise,
109262306a36Sopenharmony_ci * returns the new memory group id.
109362306a36Sopenharmony_ci */
109462306a36Sopenharmony_ciint memory_group_register_static(int nid, unsigned long max_pages)
109562306a36Sopenharmony_ci{
109662306a36Sopenharmony_ci	struct memory_group group = {
109762306a36Sopenharmony_ci		.nid = nid,
109862306a36Sopenharmony_ci		.s = {
109962306a36Sopenharmony_ci			.max_pages = max_pages,
110062306a36Sopenharmony_ci		},
110162306a36Sopenharmony_ci	};
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci	if (!max_pages)
110462306a36Sopenharmony_ci		return -EINVAL;
110562306a36Sopenharmony_ci	return memory_group_register(group);
110662306a36Sopenharmony_ci}
110762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(memory_group_register_static);
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci/**
111062306a36Sopenharmony_ci * memory_group_register_dynamic() - Register a dynamic memory group.
111162306a36Sopenharmony_ci * @nid: The node id.
111262306a36Sopenharmony_ci * @unit_pages: Unit in pages in which is memory added/removed in this dynamic
111362306a36Sopenharmony_ci *		memory group.
111462306a36Sopenharmony_ci *
111562306a36Sopenharmony_ci * Register a new dynamic memory group and return the memory group id.
111662306a36Sopenharmony_ci * Memory within a dynamic memory group is added/removed dynamically
111762306a36Sopenharmony_ci * in unit_pages.
111862306a36Sopenharmony_ci *
111962306a36Sopenharmony_ci * Returns an error if out of memory, if the node id is invalid, if no new
112062306a36Sopenharmony_ci * memory groups can be registered, or if unit_pages is invalid (0, not a
112162306a36Sopenharmony_ci * power of two, smaller than a single memory block). Otherwise, returns the
112262306a36Sopenharmony_ci * new memory group id.
112362306a36Sopenharmony_ci */
112462306a36Sopenharmony_ciint memory_group_register_dynamic(int nid, unsigned long unit_pages)
112562306a36Sopenharmony_ci{
112662306a36Sopenharmony_ci	struct memory_group group = {
112762306a36Sopenharmony_ci		.nid = nid,
112862306a36Sopenharmony_ci		.is_dynamic = true,
112962306a36Sopenharmony_ci		.d = {
113062306a36Sopenharmony_ci			.unit_pages = unit_pages,
113162306a36Sopenharmony_ci		},
113262306a36Sopenharmony_ci	};
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci	if (!unit_pages || !is_power_of_2(unit_pages) ||
113562306a36Sopenharmony_ci	    unit_pages < PHYS_PFN(memory_block_size_bytes()))
113662306a36Sopenharmony_ci		return -EINVAL;
113762306a36Sopenharmony_ci	return memory_group_register(group);
113862306a36Sopenharmony_ci}
113962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(memory_group_register_dynamic);
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_ci/**
114262306a36Sopenharmony_ci * memory_group_unregister() - Unregister a memory group.
114362306a36Sopenharmony_ci * @mgid: the memory group id
114462306a36Sopenharmony_ci *
114562306a36Sopenharmony_ci * Unregister a memory group. If any memory block still belongs to this
114662306a36Sopenharmony_ci * memory group, unregistering will fail.
114762306a36Sopenharmony_ci *
114862306a36Sopenharmony_ci * Returns -EINVAL if the memory group id is invalid, returns -EBUSY if some
114962306a36Sopenharmony_ci * memory blocks still belong to this memory group and returns 0 if
115062306a36Sopenharmony_ci * unregistering succeeded.
115162306a36Sopenharmony_ci */
115262306a36Sopenharmony_ciint memory_group_unregister(int mgid)
115362306a36Sopenharmony_ci{
115462306a36Sopenharmony_ci	struct memory_group *group;
115562306a36Sopenharmony_ci
115662306a36Sopenharmony_ci	if (mgid < 0)
115762306a36Sopenharmony_ci		return -EINVAL;
115862306a36Sopenharmony_ci
115962306a36Sopenharmony_ci	group = xa_load(&memory_groups, mgid);
116062306a36Sopenharmony_ci	if (!group)
116162306a36Sopenharmony_ci		return -EINVAL;
116262306a36Sopenharmony_ci	if (!list_empty(&group->memory_blocks))
116362306a36Sopenharmony_ci		return -EBUSY;
116462306a36Sopenharmony_ci	xa_erase(&memory_groups, mgid);
116562306a36Sopenharmony_ci	kfree(group);
116662306a36Sopenharmony_ci	return 0;
116762306a36Sopenharmony_ci}
116862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(memory_group_unregister);
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci/*
117162306a36Sopenharmony_ci * This is an internal helper only to be used in core memory hotplug code to
117262306a36Sopenharmony_ci * lookup a memory group. We don't care about locking, as we don't expect a
117362306a36Sopenharmony_ci * memory group to get unregistered while adding memory to it -- because
117462306a36Sopenharmony_ci * the group and the memory is managed by the same driver.
117562306a36Sopenharmony_ci */
117662306a36Sopenharmony_cistruct memory_group *memory_group_find_by_id(int mgid)
117762306a36Sopenharmony_ci{
117862306a36Sopenharmony_ci	return xa_load(&memory_groups, mgid);
117962306a36Sopenharmony_ci}
118062306a36Sopenharmony_ci
118162306a36Sopenharmony_ci/*
118262306a36Sopenharmony_ci * This is an internal helper only to be used in core memory hotplug code to
118362306a36Sopenharmony_ci * walk all dynamic memory groups excluding a given memory group, either
118462306a36Sopenharmony_ci * belonging to a specific node, or belonging to any node.
118562306a36Sopenharmony_ci */
118662306a36Sopenharmony_ciint walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
118762306a36Sopenharmony_ci			       struct memory_group *excluded, void *arg)
118862306a36Sopenharmony_ci{
118962306a36Sopenharmony_ci	struct memory_group *group;
119062306a36Sopenharmony_ci	unsigned long index;
119162306a36Sopenharmony_ci	int ret = 0;
119262306a36Sopenharmony_ci
119362306a36Sopenharmony_ci	xa_for_each_marked(&memory_groups, index, group,
119462306a36Sopenharmony_ci			   MEMORY_GROUP_MARK_DYNAMIC) {
119562306a36Sopenharmony_ci		if (group == excluded)
119662306a36Sopenharmony_ci			continue;
119762306a36Sopenharmony_ci#ifdef CONFIG_NUMA
119862306a36Sopenharmony_ci		if (nid != NUMA_NO_NODE && group->nid != nid)
119962306a36Sopenharmony_ci			continue;
120062306a36Sopenharmony_ci#endif /* CONFIG_NUMA */
120162306a36Sopenharmony_ci		ret = func(group, arg);
120262306a36Sopenharmony_ci		if (ret)
120362306a36Sopenharmony_ci			break;
120462306a36Sopenharmony_ci	}
120562306a36Sopenharmony_ci	return ret;
120662306a36Sopenharmony_ci}
120762306a36Sopenharmony_ci
120862306a36Sopenharmony_ci#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
120962306a36Sopenharmony_civoid memblk_nr_poison_inc(unsigned long pfn)
121062306a36Sopenharmony_ci{
121162306a36Sopenharmony_ci	const unsigned long block_id = pfn_to_block_id(pfn);
121262306a36Sopenharmony_ci	struct memory_block *mem = find_memory_block_by_id(block_id);
121362306a36Sopenharmony_ci
121462306a36Sopenharmony_ci	if (mem)
121562306a36Sopenharmony_ci		atomic_long_inc(&mem->nr_hwpoison);
121662306a36Sopenharmony_ci}
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_civoid memblk_nr_poison_sub(unsigned long pfn, long i)
121962306a36Sopenharmony_ci{
122062306a36Sopenharmony_ci	const unsigned long block_id = pfn_to_block_id(pfn);
122162306a36Sopenharmony_ci	struct memory_block *mem = find_memory_block_by_id(block_id);
122262306a36Sopenharmony_ci
122362306a36Sopenharmony_ci	if (mem)
122462306a36Sopenharmony_ci		atomic_long_sub(i, &mem->nr_hwpoison);
122562306a36Sopenharmony_ci}
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_cistatic unsigned long memblk_nr_poison(struct memory_block *mem)
122862306a36Sopenharmony_ci{
122962306a36Sopenharmony_ci	return atomic_long_read(&mem->nr_hwpoison);
123062306a36Sopenharmony_ci}
123162306a36Sopenharmony_ci#endif
1232