18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *  linux/mm/memory_hotplug.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci *  Copyright (C)
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/stddef.h>
98c2ecf20Sopenharmony_ci#include <linux/mm.h>
108c2ecf20Sopenharmony_ci#include <linux/sched/signal.h>
118c2ecf20Sopenharmony_ci#include <linux/swap.h>
128c2ecf20Sopenharmony_ci#include <linux/interrupt.h>
138c2ecf20Sopenharmony_ci#include <linux/pagemap.h>
148c2ecf20Sopenharmony_ci#include <linux/compiler.h>
158c2ecf20Sopenharmony_ci#include <linux/export.h>
168c2ecf20Sopenharmony_ci#include <linux/pagevec.h>
178c2ecf20Sopenharmony_ci#include <linux/writeback.h>
188c2ecf20Sopenharmony_ci#include <linux/slab.h>
198c2ecf20Sopenharmony_ci#include <linux/sysctl.h>
208c2ecf20Sopenharmony_ci#include <linux/cpu.h>
218c2ecf20Sopenharmony_ci#include <linux/memory.h>
228c2ecf20Sopenharmony_ci#include <linux/memremap.h>
238c2ecf20Sopenharmony_ci#include <linux/memory_hotplug.h>
248c2ecf20Sopenharmony_ci#include <linux/highmem.h>
258c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
268c2ecf20Sopenharmony_ci#include <linux/ioport.h>
278c2ecf20Sopenharmony_ci#include <linux/delay.h>
288c2ecf20Sopenharmony_ci#include <linux/migrate.h>
298c2ecf20Sopenharmony_ci#include <linux/page-isolation.h>
308c2ecf20Sopenharmony_ci#include <linux/pfn.h>
318c2ecf20Sopenharmony_ci#include <linux/suspend.h>
328c2ecf20Sopenharmony_ci#include <linux/mm_inline.h>
338c2ecf20Sopenharmony_ci#include <linux/firmware-map.h>
348c2ecf20Sopenharmony_ci#include <linux/stop_machine.h>
358c2ecf20Sopenharmony_ci#include <linux/hugetlb.h>
368c2ecf20Sopenharmony_ci#include <linux/memblock.h>
378c2ecf20Sopenharmony_ci#include <linux/compaction.h>
388c2ecf20Sopenharmony_ci#include <linux/rmap.h>
398c2ecf20Sopenharmony_ci#include <linux/zswapd.h>
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci#include <asm/tlbflush.h>
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci#include "internal.h"
448c2ecf20Sopenharmony_ci#include "shuffle.h"
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci/*
478c2ecf20Sopenharmony_ci * online_page_callback contains pointer to current page onlining function.
488c2ecf20Sopenharmony_ci * Initially it is generic_online_page(). If it is required it could be
498c2ecf20Sopenharmony_ci * changed by calling set_online_page_callback() for callback registration
508c2ecf20Sopenharmony_ci * and restore_online_page_callback() for generic callback restore.
518c2ecf20Sopenharmony_ci */
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_cistatic online_page_callback_t online_page_callback = generic_online_page;
548c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(online_page_callback_lock);
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ciDEFINE_STATIC_PERCPU_RWSEM(mem_hotplug_lock);
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_civoid get_online_mems(void)
598c2ecf20Sopenharmony_ci{
608c2ecf20Sopenharmony_ci	percpu_down_read(&mem_hotplug_lock);
618c2ecf20Sopenharmony_ci}
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_civoid put_online_mems(void)
648c2ecf20Sopenharmony_ci{
658c2ecf20Sopenharmony_ci	percpu_up_read(&mem_hotplug_lock);
668c2ecf20Sopenharmony_ci}
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_cibool movable_node_enabled = false;
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci#ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
718c2ecf20Sopenharmony_ciint memhp_default_online_type = MMOP_OFFLINE;
728c2ecf20Sopenharmony_ci#else
738c2ecf20Sopenharmony_ciint memhp_default_online_type = MMOP_ONLINE;
748c2ecf20Sopenharmony_ci#endif
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_cistatic int __init setup_memhp_default_state(char *str)
778c2ecf20Sopenharmony_ci{
788c2ecf20Sopenharmony_ci	const int online_type = memhp_online_type_from_str(str);
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	if (online_type >= 0)
818c2ecf20Sopenharmony_ci		memhp_default_online_type = online_type;
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci	return 1;
848c2ecf20Sopenharmony_ci}
858c2ecf20Sopenharmony_ci__setup("memhp_default_state=", setup_memhp_default_state);
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_civoid mem_hotplug_begin(void)
888c2ecf20Sopenharmony_ci{
898c2ecf20Sopenharmony_ci	cpus_read_lock();
908c2ecf20Sopenharmony_ci	percpu_down_write(&mem_hotplug_lock);
918c2ecf20Sopenharmony_ci}
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_civoid mem_hotplug_done(void)
948c2ecf20Sopenharmony_ci{
958c2ecf20Sopenharmony_ci	percpu_up_write(&mem_hotplug_lock);
968c2ecf20Sopenharmony_ci	cpus_read_unlock();
978c2ecf20Sopenharmony_ci}
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ciu64 max_mem_size = U64_MAX;
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci/* add this memory to iomem resource */
1028c2ecf20Sopenharmony_cistatic struct resource *register_memory_resource(u64 start, u64 size,
1038c2ecf20Sopenharmony_ci						 const char *resource_name)
1048c2ecf20Sopenharmony_ci{
1058c2ecf20Sopenharmony_ci	struct resource *res;
1068c2ecf20Sopenharmony_ci	unsigned long flags =  IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci	if (strcmp(resource_name, "System RAM"))
1098c2ecf20Sopenharmony_ci		flags |= IORESOURCE_SYSRAM_DRIVER_MANAGED;
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	/*
1128c2ecf20Sopenharmony_ci	 * Make sure value parsed from 'mem=' only restricts memory adding
1138c2ecf20Sopenharmony_ci	 * while booting, so that memory hotplug won't be impacted. Please
1148c2ecf20Sopenharmony_ci	 * refer to document of 'mem=' in kernel-parameters.txt for more
1158c2ecf20Sopenharmony_ci	 * details.
1168c2ecf20Sopenharmony_ci	 */
1178c2ecf20Sopenharmony_ci	if (start + size > max_mem_size && system_state < SYSTEM_RUNNING)
1188c2ecf20Sopenharmony_ci		return ERR_PTR(-E2BIG);
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	/*
1218c2ecf20Sopenharmony_ci	 * Request ownership of the new memory range.  This might be
1228c2ecf20Sopenharmony_ci	 * a child of an existing resource that was present but
1238c2ecf20Sopenharmony_ci	 * not marked as busy.
1248c2ecf20Sopenharmony_ci	 */
1258c2ecf20Sopenharmony_ci	res = __request_region(&iomem_resource, start, size,
1268c2ecf20Sopenharmony_ci			       resource_name, flags);
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci	if (!res) {
1298c2ecf20Sopenharmony_ci		pr_debug("Unable to reserve System RAM region: %016llx->%016llx\n",
1308c2ecf20Sopenharmony_ci				start, start + size);
1318c2ecf20Sopenharmony_ci		return ERR_PTR(-EEXIST);
1328c2ecf20Sopenharmony_ci	}
1338c2ecf20Sopenharmony_ci	return res;
1348c2ecf20Sopenharmony_ci}
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_cistatic void release_memory_resource(struct resource *res)
1378c2ecf20Sopenharmony_ci{
1388c2ecf20Sopenharmony_ci	if (!res)
1398c2ecf20Sopenharmony_ci		return;
1408c2ecf20Sopenharmony_ci	release_resource(res);
1418c2ecf20Sopenharmony_ci	kfree(res);
1428c2ecf20Sopenharmony_ci}
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
1458c2ecf20Sopenharmony_civoid get_page_bootmem(unsigned long info,  struct page *page,
1468c2ecf20Sopenharmony_ci		      unsigned long type)
1478c2ecf20Sopenharmony_ci{
1488c2ecf20Sopenharmony_ci	page->freelist = (void *)type;
1498c2ecf20Sopenharmony_ci	SetPagePrivate(page);
1508c2ecf20Sopenharmony_ci	set_page_private(page, info);
1518c2ecf20Sopenharmony_ci	page_ref_inc(page);
1528c2ecf20Sopenharmony_ci}
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_civoid put_page_bootmem(struct page *page)
1558c2ecf20Sopenharmony_ci{
1568c2ecf20Sopenharmony_ci	unsigned long type;
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	type = (unsigned long) page->freelist;
1598c2ecf20Sopenharmony_ci	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
1608c2ecf20Sopenharmony_ci	       type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci	if (page_ref_dec_return(page) == 1) {
1638c2ecf20Sopenharmony_ci		page->freelist = NULL;
1648c2ecf20Sopenharmony_ci		ClearPagePrivate(page);
1658c2ecf20Sopenharmony_ci		set_page_private(page, 0);
1668c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&page->lru);
1678c2ecf20Sopenharmony_ci		free_reserved_page(page);
1688c2ecf20Sopenharmony_ci	}
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
1728c2ecf20Sopenharmony_ci#ifndef CONFIG_SPARSEMEM_VMEMMAP
1738c2ecf20Sopenharmony_cistatic void register_page_bootmem_info_section(unsigned long start_pfn)
1748c2ecf20Sopenharmony_ci{
1758c2ecf20Sopenharmony_ci	unsigned long mapsize, section_nr, i;
1768c2ecf20Sopenharmony_ci	struct mem_section *ms;
1778c2ecf20Sopenharmony_ci	struct page *page, *memmap;
1788c2ecf20Sopenharmony_ci	struct mem_section_usage *usage;
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	section_nr = pfn_to_section_nr(start_pfn);
1818c2ecf20Sopenharmony_ci	ms = __nr_to_section(section_nr);
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	/* Get section's memmap address */
1848c2ecf20Sopenharmony_ci	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci	/*
1878c2ecf20Sopenharmony_ci	 * Get page for the memmap's phys address
1888c2ecf20Sopenharmony_ci	 * XXX: need more consideration for sparse_vmemmap...
1898c2ecf20Sopenharmony_ci	 */
1908c2ecf20Sopenharmony_ci	page = virt_to_page(memmap);
1918c2ecf20Sopenharmony_ci	mapsize = sizeof(struct page) * PAGES_PER_SECTION;
1928c2ecf20Sopenharmony_ci	mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	/* remember memmap's page */
1958c2ecf20Sopenharmony_ci	for (i = 0; i < mapsize; i++, page++)
1968c2ecf20Sopenharmony_ci		get_page_bootmem(section_nr, page, SECTION_INFO);
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	usage = ms->usage;
1998c2ecf20Sopenharmony_ci	page = virt_to_page(usage);
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	mapsize = PAGE_ALIGN(mem_section_usage_size()) >> PAGE_SHIFT;
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci	for (i = 0; i < mapsize; i++, page++)
2048c2ecf20Sopenharmony_ci		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci}
2078c2ecf20Sopenharmony_ci#else /* CONFIG_SPARSEMEM_VMEMMAP */
2088c2ecf20Sopenharmony_cistatic void register_page_bootmem_info_section(unsigned long start_pfn)
2098c2ecf20Sopenharmony_ci{
2108c2ecf20Sopenharmony_ci	unsigned long mapsize, section_nr, i;
2118c2ecf20Sopenharmony_ci	struct mem_section *ms;
2128c2ecf20Sopenharmony_ci	struct page *page, *memmap;
2138c2ecf20Sopenharmony_ci	struct mem_section_usage *usage;
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ci	section_nr = pfn_to_section_nr(start_pfn);
2168c2ecf20Sopenharmony_ci	ms = __nr_to_section(section_nr);
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	usage = ms->usage;
2238c2ecf20Sopenharmony_ci	page = virt_to_page(usage);
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci	mapsize = PAGE_ALIGN(mem_section_usage_size()) >> PAGE_SHIFT;
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci	for (i = 0; i < mapsize; i++, page++)
2288c2ecf20Sopenharmony_ci		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
2298c2ecf20Sopenharmony_ci}
2308c2ecf20Sopenharmony_ci#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_civoid __init register_page_bootmem_info_node(struct pglist_data *pgdat)
2338c2ecf20Sopenharmony_ci{
2348c2ecf20Sopenharmony_ci	unsigned long i, pfn, end_pfn, nr_pages;
2358c2ecf20Sopenharmony_ci	int node = pgdat->node_id;
2368c2ecf20Sopenharmony_ci	struct page *page;
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
2398c2ecf20Sopenharmony_ci	page = virt_to_page(pgdat);
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci	for (i = 0; i < nr_pages; i++, page++)
2428c2ecf20Sopenharmony_ci		get_page_bootmem(node, page, NODE_INFO);
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci	pfn = pgdat->node_start_pfn;
2458c2ecf20Sopenharmony_ci	end_pfn = pgdat_end_pfn(pgdat);
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci	/* register section info */
2488c2ecf20Sopenharmony_ci	for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
2498c2ecf20Sopenharmony_ci		/*
2508c2ecf20Sopenharmony_ci		 * Some platforms can assign the same pfn to multiple nodes - on
2518c2ecf20Sopenharmony_ci		 * node0 as well as nodeN.  To avoid registering a pfn against
2528c2ecf20Sopenharmony_ci		 * multiple nodes we check that this pfn does not already
2538c2ecf20Sopenharmony_ci		 * reside in some other nodes.
2548c2ecf20Sopenharmony_ci		 */
2558c2ecf20Sopenharmony_ci		if (pfn_valid(pfn) && (early_pfn_to_nid(pfn) == node))
2568c2ecf20Sopenharmony_ci			register_page_bootmem_info_section(pfn);
2578c2ecf20Sopenharmony_ci	}
2588c2ecf20Sopenharmony_ci}
2598c2ecf20Sopenharmony_ci#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_cistatic int check_pfn_span(unsigned long pfn, unsigned long nr_pages,
2628c2ecf20Sopenharmony_ci		const char *reason)
2638c2ecf20Sopenharmony_ci{
2648c2ecf20Sopenharmony_ci	/*
2658c2ecf20Sopenharmony_ci	 * Disallow all operations smaller than a sub-section and only
2668c2ecf20Sopenharmony_ci	 * allow operations smaller than a section for
2678c2ecf20Sopenharmony_ci	 * SPARSEMEM_VMEMMAP. Note that check_hotplug_memory_range()
2688c2ecf20Sopenharmony_ci	 * enforces a larger memory_block_size_bytes() granularity for
2698c2ecf20Sopenharmony_ci	 * memory that will be marked online, so this check should only
2708c2ecf20Sopenharmony_ci	 * fire for direct arch_{add,remove}_memory() users outside of
2718c2ecf20Sopenharmony_ci	 * add_memory_resource().
2728c2ecf20Sopenharmony_ci	 */
2738c2ecf20Sopenharmony_ci	unsigned long min_align;
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
2768c2ecf20Sopenharmony_ci		min_align = PAGES_PER_SUBSECTION;
2778c2ecf20Sopenharmony_ci	else
2788c2ecf20Sopenharmony_ci		min_align = PAGES_PER_SECTION;
2798c2ecf20Sopenharmony_ci	if (!IS_ALIGNED(pfn, min_align)
2808c2ecf20Sopenharmony_ci			|| !IS_ALIGNED(nr_pages, min_align)) {
2818c2ecf20Sopenharmony_ci		WARN(1, "Misaligned __%s_pages start: %#lx end: #%lx\n",
2828c2ecf20Sopenharmony_ci				reason, pfn, pfn + nr_pages - 1);
2838c2ecf20Sopenharmony_ci		return -EINVAL;
2848c2ecf20Sopenharmony_ci	}
2858c2ecf20Sopenharmony_ci	return 0;
2868c2ecf20Sopenharmony_ci}
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_cistatic int check_hotplug_memory_addressable(unsigned long pfn,
2898c2ecf20Sopenharmony_ci					    unsigned long nr_pages)
2908c2ecf20Sopenharmony_ci{
2918c2ecf20Sopenharmony_ci	const u64 max_addr = PFN_PHYS(pfn + nr_pages) - 1;
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci	if (max_addr >> MAX_PHYSMEM_BITS) {
2948c2ecf20Sopenharmony_ci		const u64 max_allowed = (1ull << (MAX_PHYSMEM_BITS + 1)) - 1;
2958c2ecf20Sopenharmony_ci		WARN(1,
2968c2ecf20Sopenharmony_ci		     "Hotplugged memory exceeds maximum addressable address, range=%#llx-%#llx, maximum=%#llx\n",
2978c2ecf20Sopenharmony_ci		     (u64)PFN_PHYS(pfn), max_addr, max_allowed);
2988c2ecf20Sopenharmony_ci		return -E2BIG;
2998c2ecf20Sopenharmony_ci	}
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci	return 0;
3028c2ecf20Sopenharmony_ci}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci/*
3058c2ecf20Sopenharmony_ci * Reasonably generic function for adding memory.  It is
3068c2ecf20Sopenharmony_ci * expected that archs that support memory hotplug will
3078c2ecf20Sopenharmony_ci * call this function after deciding the zone to which to
3088c2ecf20Sopenharmony_ci * add the new pages.
3098c2ecf20Sopenharmony_ci */
3108c2ecf20Sopenharmony_ciint __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
3118c2ecf20Sopenharmony_ci		struct mhp_params *params)
3128c2ecf20Sopenharmony_ci{
3138c2ecf20Sopenharmony_ci	const unsigned long end_pfn = pfn + nr_pages;
3148c2ecf20Sopenharmony_ci	unsigned long cur_nr_pages;
3158c2ecf20Sopenharmony_ci	int err;
3168c2ecf20Sopenharmony_ci	struct vmem_altmap *altmap = params->altmap;
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(!params->pgprot.pgprot))
3198c2ecf20Sopenharmony_ci		return -EINVAL;
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	err = check_hotplug_memory_addressable(pfn, nr_pages);
3228c2ecf20Sopenharmony_ci	if (err)
3238c2ecf20Sopenharmony_ci		return err;
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_ci	if (altmap) {
3268c2ecf20Sopenharmony_ci		/*
3278c2ecf20Sopenharmony_ci		 * Validate altmap is within bounds of the total request
3288c2ecf20Sopenharmony_ci		 */
3298c2ecf20Sopenharmony_ci		if (altmap->base_pfn != pfn
3308c2ecf20Sopenharmony_ci				|| vmem_altmap_offset(altmap) > nr_pages) {
3318c2ecf20Sopenharmony_ci			pr_warn_once("memory add fail, invalid altmap\n");
3328c2ecf20Sopenharmony_ci			return -EINVAL;
3338c2ecf20Sopenharmony_ci		}
3348c2ecf20Sopenharmony_ci		altmap->alloc = 0;
3358c2ecf20Sopenharmony_ci	}
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ci	err = check_pfn_span(pfn, nr_pages, "add");
3388c2ecf20Sopenharmony_ci	if (err)
3398c2ecf20Sopenharmony_ci		return err;
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	for (; pfn < end_pfn; pfn += cur_nr_pages) {
3428c2ecf20Sopenharmony_ci		/* Select all remaining pages up to the next section boundary */
3438c2ecf20Sopenharmony_ci		cur_nr_pages = min(end_pfn - pfn,
3448c2ecf20Sopenharmony_ci				   SECTION_ALIGN_UP(pfn + 1) - pfn);
3458c2ecf20Sopenharmony_ci		err = sparse_add_section(nid, pfn, cur_nr_pages, altmap);
3468c2ecf20Sopenharmony_ci		if (err)
3478c2ecf20Sopenharmony_ci			break;
3488c2ecf20Sopenharmony_ci		cond_resched();
3498c2ecf20Sopenharmony_ci	}
3508c2ecf20Sopenharmony_ci	vmemmap_populate_print_last();
3518c2ecf20Sopenharmony_ci	return err;
3528c2ecf20Sopenharmony_ci}
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
3558c2ecf20Sopenharmony_cistatic unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
3568c2ecf20Sopenharmony_ci				     unsigned long start_pfn,
3578c2ecf20Sopenharmony_ci				     unsigned long end_pfn)
3588c2ecf20Sopenharmony_ci{
3598c2ecf20Sopenharmony_ci	for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SUBSECTION) {
3608c2ecf20Sopenharmony_ci		if (unlikely(!pfn_to_online_page(start_pfn)))
3618c2ecf20Sopenharmony_ci			continue;
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci		if (unlikely(pfn_to_nid(start_pfn) != nid))
3648c2ecf20Sopenharmony_ci			continue;
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci		if (zone != page_zone(pfn_to_page(start_pfn)))
3678c2ecf20Sopenharmony_ci			continue;
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci		return start_pfn;
3708c2ecf20Sopenharmony_ci	}
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci	return 0;
3738c2ecf20Sopenharmony_ci}
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci/* find the biggest valid pfn in the range [start_pfn, end_pfn). */
3768c2ecf20Sopenharmony_cistatic unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
3778c2ecf20Sopenharmony_ci				    unsigned long start_pfn,
3788c2ecf20Sopenharmony_ci				    unsigned long end_pfn)
3798c2ecf20Sopenharmony_ci{
3808c2ecf20Sopenharmony_ci	unsigned long pfn;
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_ci	/* pfn is the end pfn of a memory section. */
3838c2ecf20Sopenharmony_ci	pfn = end_pfn - 1;
3848c2ecf20Sopenharmony_ci	for (; pfn >= start_pfn; pfn -= PAGES_PER_SUBSECTION) {
3858c2ecf20Sopenharmony_ci		if (unlikely(!pfn_to_online_page(pfn)))
3868c2ecf20Sopenharmony_ci			continue;
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci		if (unlikely(pfn_to_nid(pfn) != nid))
3898c2ecf20Sopenharmony_ci			continue;
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci		if (zone != page_zone(pfn_to_page(pfn)))
3928c2ecf20Sopenharmony_ci			continue;
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci		return pfn;
3958c2ecf20Sopenharmony_ci	}
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	return 0;
3988c2ecf20Sopenharmony_ci}
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_cistatic void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
4018c2ecf20Sopenharmony_ci			     unsigned long end_pfn)
4028c2ecf20Sopenharmony_ci{
4038c2ecf20Sopenharmony_ci	unsigned long pfn;
4048c2ecf20Sopenharmony_ci	int nid = zone_to_nid(zone);
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci	zone_span_writelock(zone);
4078c2ecf20Sopenharmony_ci	if (zone->zone_start_pfn == start_pfn) {
4088c2ecf20Sopenharmony_ci		/*
4098c2ecf20Sopenharmony_ci		 * If the section is smallest section in the zone, it need
4108c2ecf20Sopenharmony_ci		 * shrink zone->zone_start_pfn and zone->zone_spanned_pages.
4118c2ecf20Sopenharmony_ci		 * In this case, we find second smallest valid mem_section
4128c2ecf20Sopenharmony_ci		 * for shrinking zone.
4138c2ecf20Sopenharmony_ci		 */
4148c2ecf20Sopenharmony_ci		pfn = find_smallest_section_pfn(nid, zone, end_pfn,
4158c2ecf20Sopenharmony_ci						zone_end_pfn(zone));
4168c2ecf20Sopenharmony_ci		if (pfn) {
4178c2ecf20Sopenharmony_ci			zone->spanned_pages = zone_end_pfn(zone) - pfn;
4188c2ecf20Sopenharmony_ci			zone->zone_start_pfn = pfn;
4198c2ecf20Sopenharmony_ci		} else {
4208c2ecf20Sopenharmony_ci			zone->zone_start_pfn = 0;
4218c2ecf20Sopenharmony_ci			zone->spanned_pages = 0;
4228c2ecf20Sopenharmony_ci		}
4238c2ecf20Sopenharmony_ci	} else if (zone_end_pfn(zone) == end_pfn) {
4248c2ecf20Sopenharmony_ci		/*
4258c2ecf20Sopenharmony_ci		 * If the section is biggest section in the zone, it need
4268c2ecf20Sopenharmony_ci		 * shrink zone->spanned_pages.
4278c2ecf20Sopenharmony_ci		 * In this case, we find second biggest valid mem_section for
4288c2ecf20Sopenharmony_ci		 * shrinking zone.
4298c2ecf20Sopenharmony_ci		 */
4308c2ecf20Sopenharmony_ci		pfn = find_biggest_section_pfn(nid, zone, zone->zone_start_pfn,
4318c2ecf20Sopenharmony_ci					       start_pfn);
4328c2ecf20Sopenharmony_ci		if (pfn)
4338c2ecf20Sopenharmony_ci			zone->spanned_pages = pfn - zone->zone_start_pfn + 1;
4348c2ecf20Sopenharmony_ci		else {
4358c2ecf20Sopenharmony_ci			zone->zone_start_pfn = 0;
4368c2ecf20Sopenharmony_ci			zone->spanned_pages = 0;
4378c2ecf20Sopenharmony_ci		}
4388c2ecf20Sopenharmony_ci	}
4398c2ecf20Sopenharmony_ci	zone_span_writeunlock(zone);
4408c2ecf20Sopenharmony_ci}
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_cistatic void update_pgdat_span(struct pglist_data *pgdat)
4438c2ecf20Sopenharmony_ci{
4448c2ecf20Sopenharmony_ci	unsigned long node_start_pfn = 0, node_end_pfn = 0;
4458c2ecf20Sopenharmony_ci	struct zone *zone;
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci	for (zone = pgdat->node_zones;
4488c2ecf20Sopenharmony_ci	     zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
4498c2ecf20Sopenharmony_ci		unsigned long zone_end_pfn = zone->zone_start_pfn +
4508c2ecf20Sopenharmony_ci					     zone->spanned_pages;
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci		/* No need to lock the zones, they can't change. */
4538c2ecf20Sopenharmony_ci		if (!zone->spanned_pages)
4548c2ecf20Sopenharmony_ci			continue;
4558c2ecf20Sopenharmony_ci		if (!node_end_pfn) {
4568c2ecf20Sopenharmony_ci			node_start_pfn = zone->zone_start_pfn;
4578c2ecf20Sopenharmony_ci			node_end_pfn = zone_end_pfn;
4588c2ecf20Sopenharmony_ci			continue;
4598c2ecf20Sopenharmony_ci		}
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci		if (zone_end_pfn > node_end_pfn)
4628c2ecf20Sopenharmony_ci			node_end_pfn = zone_end_pfn;
4638c2ecf20Sopenharmony_ci		if (zone->zone_start_pfn < node_start_pfn)
4648c2ecf20Sopenharmony_ci			node_start_pfn = zone->zone_start_pfn;
4658c2ecf20Sopenharmony_ci	}
4668c2ecf20Sopenharmony_ci
4678c2ecf20Sopenharmony_ci	pgdat->node_start_pfn = node_start_pfn;
4688c2ecf20Sopenharmony_ci	pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
4698c2ecf20Sopenharmony_ci}
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_civoid __ref remove_pfn_range_from_zone(struct zone *zone,
4728c2ecf20Sopenharmony_ci				      unsigned long start_pfn,
4738c2ecf20Sopenharmony_ci				      unsigned long nr_pages)
4748c2ecf20Sopenharmony_ci{
4758c2ecf20Sopenharmony_ci	const unsigned long end_pfn = start_pfn + nr_pages;
4768c2ecf20Sopenharmony_ci	struct pglist_data *pgdat = zone->zone_pgdat;
4778c2ecf20Sopenharmony_ci	unsigned long pfn, cur_nr_pages, flags;
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci	/* Poison struct pages because they are now uninitialized again. */
4808c2ecf20Sopenharmony_ci	for (pfn = start_pfn; pfn < end_pfn; pfn += cur_nr_pages) {
4818c2ecf20Sopenharmony_ci		cond_resched();
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_ci		/* Select all remaining pages up to the next section boundary */
4848c2ecf20Sopenharmony_ci		cur_nr_pages =
4858c2ecf20Sopenharmony_ci			min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
4868c2ecf20Sopenharmony_ci		page_init_poison(pfn_to_page(pfn),
4878c2ecf20Sopenharmony_ci				 sizeof(struct page) * cur_nr_pages);
4888c2ecf20Sopenharmony_ci	}
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci#ifdef CONFIG_ZONE_DEVICE
4918c2ecf20Sopenharmony_ci	/*
4928c2ecf20Sopenharmony_ci	 * Zone shrinking code cannot properly deal with ZONE_DEVICE. So
4938c2ecf20Sopenharmony_ci	 * we will not try to shrink the zones - which is okay as
4948c2ecf20Sopenharmony_ci	 * set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
4958c2ecf20Sopenharmony_ci	 */
4968c2ecf20Sopenharmony_ci	if (zone_idx(zone) == ZONE_DEVICE)
4978c2ecf20Sopenharmony_ci		return;
4988c2ecf20Sopenharmony_ci#endif
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ci	clear_zone_contiguous(zone);
5018c2ecf20Sopenharmony_ci
5028c2ecf20Sopenharmony_ci	pgdat_resize_lock(zone->zone_pgdat, &flags);
5038c2ecf20Sopenharmony_ci	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
5048c2ecf20Sopenharmony_ci	update_pgdat_span(pgdat);
5058c2ecf20Sopenharmony_ci	pgdat_resize_unlock(zone->zone_pgdat, &flags);
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_ci	set_zone_contiguous(zone);
5088c2ecf20Sopenharmony_ci}
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_cistatic void __remove_section(unsigned long pfn, unsigned long nr_pages,
5118c2ecf20Sopenharmony_ci			     unsigned long map_offset,
5128c2ecf20Sopenharmony_ci			     struct vmem_altmap *altmap)
5138c2ecf20Sopenharmony_ci{
5148c2ecf20Sopenharmony_ci	struct mem_section *ms = __pfn_to_section(pfn);
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(!valid_section(ms)))
5178c2ecf20Sopenharmony_ci		return;
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_ci	sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
5208c2ecf20Sopenharmony_ci}
5218c2ecf20Sopenharmony_ci
5228c2ecf20Sopenharmony_ci/**
5238c2ecf20Sopenharmony_ci * __remove_pages() - remove sections of pages
5248c2ecf20Sopenharmony_ci * @pfn: starting pageframe (must be aligned to start of a section)
5258c2ecf20Sopenharmony_ci * @nr_pages: number of pages to remove (must be multiple of section size)
5268c2ecf20Sopenharmony_ci * @altmap: alternative device page map or %NULL if default memmap is used
5278c2ecf20Sopenharmony_ci *
5288c2ecf20Sopenharmony_ci * Generic helper function to remove section mappings and sysfs entries
5298c2ecf20Sopenharmony_ci * for the section of the memory we are removing. Caller needs to make
5308c2ecf20Sopenharmony_ci * sure that pages are marked reserved and zones are adjust properly by
5318c2ecf20Sopenharmony_ci * calling offline_pages().
5328c2ecf20Sopenharmony_ci */
5338c2ecf20Sopenharmony_civoid __remove_pages(unsigned long pfn, unsigned long nr_pages,
5348c2ecf20Sopenharmony_ci		    struct vmem_altmap *altmap)
5358c2ecf20Sopenharmony_ci{
5368c2ecf20Sopenharmony_ci	const unsigned long end_pfn = pfn + nr_pages;
5378c2ecf20Sopenharmony_ci	unsigned long cur_nr_pages;
5388c2ecf20Sopenharmony_ci	unsigned long map_offset = 0;
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ci	map_offset = vmem_altmap_offset(altmap);
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_ci	if (check_pfn_span(pfn, nr_pages, "remove"))
5438c2ecf20Sopenharmony_ci		return;
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci	for (; pfn < end_pfn; pfn += cur_nr_pages) {
5468c2ecf20Sopenharmony_ci		cond_resched();
5478c2ecf20Sopenharmony_ci		/* Select all remaining pages up to the next section boundary */
5488c2ecf20Sopenharmony_ci		cur_nr_pages = min(end_pfn - pfn,
5498c2ecf20Sopenharmony_ci				   SECTION_ALIGN_UP(pfn + 1) - pfn);
5508c2ecf20Sopenharmony_ci		__remove_section(pfn, cur_nr_pages, map_offset, altmap);
5518c2ecf20Sopenharmony_ci		map_offset = 0;
5528c2ecf20Sopenharmony_ci	}
5538c2ecf20Sopenharmony_ci}
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ciint set_online_page_callback(online_page_callback_t callback)
5568c2ecf20Sopenharmony_ci{
5578c2ecf20Sopenharmony_ci	int rc = -EINVAL;
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci	get_online_mems();
5608c2ecf20Sopenharmony_ci	mutex_lock(&online_page_callback_lock);
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_ci	if (online_page_callback == generic_online_page) {
5638c2ecf20Sopenharmony_ci		online_page_callback = callback;
5648c2ecf20Sopenharmony_ci		rc = 0;
5658c2ecf20Sopenharmony_ci	}
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci	mutex_unlock(&online_page_callback_lock);
5688c2ecf20Sopenharmony_ci	put_online_mems();
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci	return rc;
5718c2ecf20Sopenharmony_ci}
5728c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(set_online_page_callback);
5738c2ecf20Sopenharmony_ci
5748c2ecf20Sopenharmony_ciint restore_online_page_callback(online_page_callback_t callback)
5758c2ecf20Sopenharmony_ci{
5768c2ecf20Sopenharmony_ci	int rc = -EINVAL;
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci	get_online_mems();
5798c2ecf20Sopenharmony_ci	mutex_lock(&online_page_callback_lock);
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_ci	if (online_page_callback == callback) {
5828c2ecf20Sopenharmony_ci		online_page_callback = generic_online_page;
5838c2ecf20Sopenharmony_ci		rc = 0;
5848c2ecf20Sopenharmony_ci	}
5858c2ecf20Sopenharmony_ci
5868c2ecf20Sopenharmony_ci	mutex_unlock(&online_page_callback_lock);
5878c2ecf20Sopenharmony_ci	put_online_mems();
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	return rc;
5908c2ecf20Sopenharmony_ci}
5918c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(restore_online_page_callback);
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_civoid generic_online_page(struct page *page, unsigned int order)
5948c2ecf20Sopenharmony_ci{
5958c2ecf20Sopenharmony_ci	/*
5968c2ecf20Sopenharmony_ci	 * Freeing the page with debug_pagealloc enabled will try to unmap it,
5978c2ecf20Sopenharmony_ci	 * so we should map it first. This is better than introducing a special
5988c2ecf20Sopenharmony_ci	 * case in page freeing fast path.
5998c2ecf20Sopenharmony_ci	 */
6008c2ecf20Sopenharmony_ci	if (debug_pagealloc_enabled_static())
6018c2ecf20Sopenharmony_ci		kernel_map_pages(page, 1 << order, 1);
6028c2ecf20Sopenharmony_ci	__free_pages_core(page, order);
6038c2ecf20Sopenharmony_ci	totalram_pages_add(1UL << order);
6048c2ecf20Sopenharmony_ci#ifdef CONFIG_HIGHMEM
6058c2ecf20Sopenharmony_ci	if (PageHighMem(page))
6068c2ecf20Sopenharmony_ci		totalhigh_pages_add(1UL << order);
6078c2ecf20Sopenharmony_ci#endif
6088c2ecf20Sopenharmony_ci}
6098c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(generic_online_page);
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_cistatic void online_pages_range(unsigned long start_pfn, unsigned long nr_pages)
6128c2ecf20Sopenharmony_ci{
6138c2ecf20Sopenharmony_ci	const unsigned long end_pfn = start_pfn + nr_pages;
6148c2ecf20Sopenharmony_ci	unsigned long pfn;
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci	/*
6178c2ecf20Sopenharmony_ci	 * Online the pages in MAX_ORDER - 1 aligned chunks. The callback might
6188c2ecf20Sopenharmony_ci	 * decide to not expose all pages to the buddy (e.g., expose them
6198c2ecf20Sopenharmony_ci	 * later). We account all pages as being online and belonging to this
6208c2ecf20Sopenharmony_ci	 * zone ("present").
6218c2ecf20Sopenharmony_ci	 */
6228c2ecf20Sopenharmony_ci	for (pfn = start_pfn; pfn < end_pfn; pfn += MAX_ORDER_NR_PAGES)
6238c2ecf20Sopenharmony_ci		(*online_page_callback)(pfn_to_page(pfn), MAX_ORDER - 1);
6248c2ecf20Sopenharmony_ci
6258c2ecf20Sopenharmony_ci	/* mark all involved sections as online */
6268c2ecf20Sopenharmony_ci	online_mem_sections(start_pfn, end_pfn);
6278c2ecf20Sopenharmony_ci}
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci/* check which state of node_states will be changed when online memory */
6308c2ecf20Sopenharmony_cistatic void node_states_check_changes_online(unsigned long nr_pages,
6318c2ecf20Sopenharmony_ci	struct zone *zone, struct memory_notify *arg)
6328c2ecf20Sopenharmony_ci{
6338c2ecf20Sopenharmony_ci	int nid = zone_to_nid(zone);
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ci	arg->status_change_nid = NUMA_NO_NODE;
6368c2ecf20Sopenharmony_ci	arg->status_change_nid_normal = NUMA_NO_NODE;
6378c2ecf20Sopenharmony_ci	arg->status_change_nid_high = NUMA_NO_NODE;
6388c2ecf20Sopenharmony_ci
6398c2ecf20Sopenharmony_ci	if (!node_state(nid, N_MEMORY))
6408c2ecf20Sopenharmony_ci		arg->status_change_nid = nid;
6418c2ecf20Sopenharmony_ci	if (zone_idx(zone) <= ZONE_NORMAL && !node_state(nid, N_NORMAL_MEMORY))
6428c2ecf20Sopenharmony_ci		arg->status_change_nid_normal = nid;
6438c2ecf20Sopenharmony_ci#ifdef CONFIG_HIGHMEM
6448c2ecf20Sopenharmony_ci	if (zone_idx(zone) <= ZONE_HIGHMEM && !node_state(nid, N_HIGH_MEMORY))
6458c2ecf20Sopenharmony_ci		arg->status_change_nid_high = nid;
6468c2ecf20Sopenharmony_ci#endif
6478c2ecf20Sopenharmony_ci}
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_cistatic void node_states_set_node(int node, struct memory_notify *arg)
6508c2ecf20Sopenharmony_ci{
6518c2ecf20Sopenharmony_ci	if (arg->status_change_nid_normal >= 0)
6528c2ecf20Sopenharmony_ci		node_set_state(node, N_NORMAL_MEMORY);
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci	if (arg->status_change_nid_high >= 0)
6558c2ecf20Sopenharmony_ci		node_set_state(node, N_HIGH_MEMORY);
6568c2ecf20Sopenharmony_ci
6578c2ecf20Sopenharmony_ci	if (arg->status_change_nid >= 0)
6588c2ecf20Sopenharmony_ci		node_set_state(node, N_MEMORY);
6598c2ecf20Sopenharmony_ci}
6608c2ecf20Sopenharmony_ci
6618c2ecf20Sopenharmony_cistatic void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn,
6628c2ecf20Sopenharmony_ci		unsigned long nr_pages)
6638c2ecf20Sopenharmony_ci{
6648c2ecf20Sopenharmony_ci	unsigned long old_end_pfn = zone_end_pfn(zone);
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci	if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
6678c2ecf20Sopenharmony_ci		zone->zone_start_pfn = start_pfn;
6688c2ecf20Sopenharmony_ci
6698c2ecf20Sopenharmony_ci	zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn;
6708c2ecf20Sopenharmony_ci}
6718c2ecf20Sopenharmony_ci
6728c2ecf20Sopenharmony_cistatic void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn,
6738c2ecf20Sopenharmony_ci                                     unsigned long nr_pages)
6748c2ecf20Sopenharmony_ci{
6758c2ecf20Sopenharmony_ci	unsigned long old_end_pfn = pgdat_end_pfn(pgdat);
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ci	if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn)
6788c2ecf20Sopenharmony_ci		pgdat->node_start_pfn = start_pfn;
6798c2ecf20Sopenharmony_ci
6808c2ecf20Sopenharmony_ci	pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_ci}
6838c2ecf20Sopenharmony_ci/*
6848c2ecf20Sopenharmony_ci * Associate the pfn range with the given zone, initializing the memmaps
6858c2ecf20Sopenharmony_ci * and resizing the pgdat/zone data to span the added pages. After this
6868c2ecf20Sopenharmony_ci * call, all affected pages are PG_reserved.
6878c2ecf20Sopenharmony_ci *
6888c2ecf20Sopenharmony_ci * All aligned pageblocks are initialized to the specified migratetype
6898c2ecf20Sopenharmony_ci * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
6908c2ecf20Sopenharmony_ci * zone stats (e.g., nr_isolate_pageblock) are touched.
6918c2ecf20Sopenharmony_ci */
6928c2ecf20Sopenharmony_civoid __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
6938c2ecf20Sopenharmony_ci				  unsigned long nr_pages,
6948c2ecf20Sopenharmony_ci				  struct vmem_altmap *altmap, int migratetype)
6958c2ecf20Sopenharmony_ci{
6968c2ecf20Sopenharmony_ci	struct pglist_data *pgdat = zone->zone_pgdat;
6978c2ecf20Sopenharmony_ci	int nid = pgdat->node_id;
6988c2ecf20Sopenharmony_ci	unsigned long flags;
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ci	clear_zone_contiguous(zone);
7018c2ecf20Sopenharmony_ci
7028c2ecf20Sopenharmony_ci	/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
7038c2ecf20Sopenharmony_ci	pgdat_resize_lock(pgdat, &flags);
7048c2ecf20Sopenharmony_ci	zone_span_writelock(zone);
7058c2ecf20Sopenharmony_ci	if (zone_is_empty(zone))
7068c2ecf20Sopenharmony_ci		init_currently_empty_zone(zone, start_pfn, nr_pages);
7078c2ecf20Sopenharmony_ci	resize_zone_range(zone, start_pfn, nr_pages);
7088c2ecf20Sopenharmony_ci	zone_span_writeunlock(zone);
7098c2ecf20Sopenharmony_ci	resize_pgdat_range(pgdat, start_pfn, nr_pages);
7108c2ecf20Sopenharmony_ci	pgdat_resize_unlock(pgdat, &flags);
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	/*
7138c2ecf20Sopenharmony_ci	 * TODO now we have a visible range of pages which are not associated
7148c2ecf20Sopenharmony_ci	 * with their zone properly. Not nice but set_pfnblock_flags_mask
7158c2ecf20Sopenharmony_ci	 * expects the zone spans the pfn range. All the pages in the range
7168c2ecf20Sopenharmony_ci	 * are reserved so nobody should be touching them so we should be safe
7178c2ecf20Sopenharmony_ci	 */
7188c2ecf20Sopenharmony_ci	memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
7198c2ecf20Sopenharmony_ci			 MEMINIT_HOTPLUG, altmap, migratetype);
7208c2ecf20Sopenharmony_ci
7218c2ecf20Sopenharmony_ci	set_zone_contiguous(zone);
7228c2ecf20Sopenharmony_ci}
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_ci/*
7258c2ecf20Sopenharmony_ci * Returns a default kernel memory zone for the given pfn range.
7268c2ecf20Sopenharmony_ci * If no kernel zone covers this pfn range it will automatically go
7278c2ecf20Sopenharmony_ci * to the ZONE_NORMAL.
7288c2ecf20Sopenharmony_ci */
7298c2ecf20Sopenharmony_cistatic struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn,
7308c2ecf20Sopenharmony_ci		unsigned long nr_pages)
7318c2ecf20Sopenharmony_ci{
7328c2ecf20Sopenharmony_ci	struct pglist_data *pgdat = NODE_DATA(nid);
7338c2ecf20Sopenharmony_ci	int zid;
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_ci	for (zid = 0; zid <= ZONE_NORMAL; zid++) {
7368c2ecf20Sopenharmony_ci		struct zone *zone = &pgdat->node_zones[zid];
7378c2ecf20Sopenharmony_ci
7388c2ecf20Sopenharmony_ci		if (zone_intersects(zone, start_pfn, nr_pages))
7398c2ecf20Sopenharmony_ci			return zone;
7408c2ecf20Sopenharmony_ci	}
7418c2ecf20Sopenharmony_ci
7428c2ecf20Sopenharmony_ci	return &pgdat->node_zones[ZONE_NORMAL];
7438c2ecf20Sopenharmony_ci}
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_cistatic inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
7468c2ecf20Sopenharmony_ci		unsigned long nr_pages)
7478c2ecf20Sopenharmony_ci{
7488c2ecf20Sopenharmony_ci	struct zone *kernel_zone = default_kernel_zone_for_pfn(nid, start_pfn,
7498c2ecf20Sopenharmony_ci			nr_pages);
7508c2ecf20Sopenharmony_ci	struct zone *movable_zone = &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
7518c2ecf20Sopenharmony_ci	bool in_kernel = zone_intersects(kernel_zone, start_pfn, nr_pages);
7528c2ecf20Sopenharmony_ci	bool in_movable = zone_intersects(movable_zone, start_pfn, nr_pages);
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci	/*
7558c2ecf20Sopenharmony_ci	 * We inherit the existing zone in a simple case where zones do not
7568c2ecf20Sopenharmony_ci	 * overlap in the given range
7578c2ecf20Sopenharmony_ci	 */
7588c2ecf20Sopenharmony_ci	if (in_kernel ^ in_movable)
7598c2ecf20Sopenharmony_ci		return (in_kernel) ? kernel_zone : movable_zone;
7608c2ecf20Sopenharmony_ci
7618c2ecf20Sopenharmony_ci	/*
7628c2ecf20Sopenharmony_ci	 * If the range doesn't belong to any zone or two zones overlap in the
7638c2ecf20Sopenharmony_ci	 * given range then we use movable zone only if movable_node is
7648c2ecf20Sopenharmony_ci	 * enabled because we always online to a kernel zone by default.
7658c2ecf20Sopenharmony_ci	 */
7668c2ecf20Sopenharmony_ci	return movable_node_enabled ? movable_zone : kernel_zone;
7678c2ecf20Sopenharmony_ci}
7688c2ecf20Sopenharmony_ci
7698c2ecf20Sopenharmony_cistruct zone *zone_for_pfn_range(int online_type, int nid,
7708c2ecf20Sopenharmony_ci		unsigned long start_pfn, unsigned long nr_pages)
7718c2ecf20Sopenharmony_ci{
7728c2ecf20Sopenharmony_ci	if (online_type == MMOP_ONLINE_KERNEL)
7738c2ecf20Sopenharmony_ci		return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages);
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_ci	if (online_type == MMOP_ONLINE_MOVABLE)
7768c2ecf20Sopenharmony_ci		return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci	return default_zone_for_pfn(nid, start_pfn, nr_pages);
7798c2ecf20Sopenharmony_ci}
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_ciint __ref online_pages(unsigned long pfn, unsigned long nr_pages,
7828c2ecf20Sopenharmony_ci		       int online_type, int nid)
7838c2ecf20Sopenharmony_ci{
7848c2ecf20Sopenharmony_ci	unsigned long flags;
7858c2ecf20Sopenharmony_ci	struct zone *zone;
7868c2ecf20Sopenharmony_ci	int need_zonelists_rebuild = 0;
7878c2ecf20Sopenharmony_ci	int ret;
7888c2ecf20Sopenharmony_ci	struct memory_notify arg;
7898c2ecf20Sopenharmony_ci
7908c2ecf20Sopenharmony_ci	/* We can only online full sections (e.g., SECTION_IS_ONLINE) */
7918c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(!nr_pages ||
7928c2ecf20Sopenharmony_ci			 !IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION)))
7938c2ecf20Sopenharmony_ci		return -EINVAL;
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_ci	mem_hotplug_begin();
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci	/* associate pfn range with the zone */
7988c2ecf20Sopenharmony_ci	zone = zone_for_pfn_range(online_type, nid, pfn, nr_pages);
7998c2ecf20Sopenharmony_ci	move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci	arg.start_pfn = pfn;
8028c2ecf20Sopenharmony_ci	arg.nr_pages = nr_pages;
8038c2ecf20Sopenharmony_ci	node_states_check_changes_online(nr_pages, zone, &arg);
8048c2ecf20Sopenharmony_ci
8058c2ecf20Sopenharmony_ci	ret = memory_notify(MEM_GOING_ONLINE, &arg);
8068c2ecf20Sopenharmony_ci	ret = notifier_to_errno(ret);
8078c2ecf20Sopenharmony_ci	if (ret)
8088c2ecf20Sopenharmony_ci		goto failed_addition;
8098c2ecf20Sopenharmony_ci
8108c2ecf20Sopenharmony_ci	/*
8118c2ecf20Sopenharmony_ci	 * Fixup the number of isolated pageblocks before marking the sections
8128c2ecf20Sopenharmony_ci	 * onlining, such that undo_isolate_page_range() works correctly.
8138c2ecf20Sopenharmony_ci	 */
8148c2ecf20Sopenharmony_ci	spin_lock_irqsave(&zone->lock, flags);
8158c2ecf20Sopenharmony_ci	zone->nr_isolate_pageblock += nr_pages / pageblock_nr_pages;
8168c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&zone->lock, flags);
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_ci	/*
8198c2ecf20Sopenharmony_ci	 * If this zone is not populated, then it is not in zonelist.
8208c2ecf20Sopenharmony_ci	 * This means the page allocator ignores this zone.
8218c2ecf20Sopenharmony_ci	 * So, zonelist must be updated after online.
8228c2ecf20Sopenharmony_ci	 */
8238c2ecf20Sopenharmony_ci	if (!populated_zone(zone)) {
8248c2ecf20Sopenharmony_ci		need_zonelists_rebuild = 1;
8258c2ecf20Sopenharmony_ci		setup_zone_pageset(zone);
8268c2ecf20Sopenharmony_ci	}
8278c2ecf20Sopenharmony_ci
8288c2ecf20Sopenharmony_ci	online_pages_range(pfn, nr_pages);
8298c2ecf20Sopenharmony_ci	zone->present_pages += nr_pages;
8308c2ecf20Sopenharmony_ci
8318c2ecf20Sopenharmony_ci	pgdat_resize_lock(zone->zone_pgdat, &flags);
8328c2ecf20Sopenharmony_ci	zone->zone_pgdat->node_present_pages += nr_pages;
8338c2ecf20Sopenharmony_ci	pgdat_resize_unlock(zone->zone_pgdat, &flags);
8348c2ecf20Sopenharmony_ci
8358c2ecf20Sopenharmony_ci	node_states_set_node(nid, &arg);
8368c2ecf20Sopenharmony_ci	if (need_zonelists_rebuild)
8378c2ecf20Sopenharmony_ci		build_all_zonelists(NULL);
8388c2ecf20Sopenharmony_ci	zone_pcp_update(zone);
8398c2ecf20Sopenharmony_ci
8408c2ecf20Sopenharmony_ci	/* Basic onlining is complete, allow allocation of onlined pages. */
8418c2ecf20Sopenharmony_ci	undo_isolate_page_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE);
8428c2ecf20Sopenharmony_ci
8438c2ecf20Sopenharmony_ci	/*
8448c2ecf20Sopenharmony_ci	 * Freshly onlined pages aren't shuffled (e.g., all pages are placed to
8458c2ecf20Sopenharmony_ci	 * the tail of the freelist when undoing isolation). Shuffle the whole
8468c2ecf20Sopenharmony_ci	 * zone to make sure the just onlined pages are properly distributed
8478c2ecf20Sopenharmony_ci	 * across the whole freelist - to create an initial shuffle.
8488c2ecf20Sopenharmony_ci	 */
8498c2ecf20Sopenharmony_ci	shuffle_zone(zone);
8508c2ecf20Sopenharmony_ci
8518c2ecf20Sopenharmony_ci	init_per_zone_wmark_min();
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_ci	kswapd_run(nid);
8548c2ecf20Sopenharmony_ci	kcompactd_run(nid);
8558c2ecf20Sopenharmony_ci#ifdef CONFIG_HYPERHOLD_ZSWAPD
8568c2ecf20Sopenharmony_ci	zswapd_run(nid);
8578c2ecf20Sopenharmony_ci#endif
8588c2ecf20Sopenharmony_ci
8598c2ecf20Sopenharmony_ci	writeback_set_ratelimit();
8608c2ecf20Sopenharmony_ci
8618c2ecf20Sopenharmony_ci	memory_notify(MEM_ONLINE, &arg);
8628c2ecf20Sopenharmony_ci	mem_hotplug_done();
8638c2ecf20Sopenharmony_ci	return 0;
8648c2ecf20Sopenharmony_ci
8658c2ecf20Sopenharmony_cifailed_addition:
8668c2ecf20Sopenharmony_ci	pr_debug("online_pages [mem %#010llx-%#010llx] failed\n",
8678c2ecf20Sopenharmony_ci		 (unsigned long long) pfn << PAGE_SHIFT,
8688c2ecf20Sopenharmony_ci		 (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
8698c2ecf20Sopenharmony_ci	memory_notify(MEM_CANCEL_ONLINE, &arg);
8708c2ecf20Sopenharmony_ci	remove_pfn_range_from_zone(zone, pfn, nr_pages);
8718c2ecf20Sopenharmony_ci	mem_hotplug_done();
8728c2ecf20Sopenharmony_ci	return ret;
8738c2ecf20Sopenharmony_ci}
8748c2ecf20Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
8758c2ecf20Sopenharmony_ci
8768c2ecf20Sopenharmony_cistatic void reset_node_present_pages(pg_data_t *pgdat)
8778c2ecf20Sopenharmony_ci{
8788c2ecf20Sopenharmony_ci	struct zone *z;
8798c2ecf20Sopenharmony_ci
8808c2ecf20Sopenharmony_ci	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
8818c2ecf20Sopenharmony_ci		z->present_pages = 0;
8828c2ecf20Sopenharmony_ci
8838c2ecf20Sopenharmony_ci	pgdat->node_present_pages = 0;
8848c2ecf20Sopenharmony_ci}
8858c2ecf20Sopenharmony_ci
8868c2ecf20Sopenharmony_ci/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
8878c2ecf20Sopenharmony_cistatic pg_data_t __ref *hotadd_new_pgdat(int nid)
8888c2ecf20Sopenharmony_ci{
8898c2ecf20Sopenharmony_ci	struct pglist_data *pgdat;
8908c2ecf20Sopenharmony_ci
8918c2ecf20Sopenharmony_ci	pgdat = NODE_DATA(nid);
8928c2ecf20Sopenharmony_ci	if (!pgdat) {
8938c2ecf20Sopenharmony_ci		pgdat = arch_alloc_nodedata(nid);
8948c2ecf20Sopenharmony_ci		if (!pgdat)
8958c2ecf20Sopenharmony_ci			return NULL;
8968c2ecf20Sopenharmony_ci
8978c2ecf20Sopenharmony_ci		pgdat->per_cpu_nodestats =
8988c2ecf20Sopenharmony_ci			alloc_percpu(struct per_cpu_nodestat);
8998c2ecf20Sopenharmony_ci		arch_refresh_nodedata(nid, pgdat);
9008c2ecf20Sopenharmony_ci	} else {
9018c2ecf20Sopenharmony_ci		int cpu;
9028c2ecf20Sopenharmony_ci		/*
9038c2ecf20Sopenharmony_ci		 * Reset the nr_zones, order and highest_zoneidx before reuse.
9048c2ecf20Sopenharmony_ci		 * Note that kswapd will init kswapd_highest_zoneidx properly
9058c2ecf20Sopenharmony_ci		 * when it starts in the near future.
9068c2ecf20Sopenharmony_ci		 */
9078c2ecf20Sopenharmony_ci		pgdat->nr_zones = 0;
9088c2ecf20Sopenharmony_ci		pgdat->kswapd_order = 0;
9098c2ecf20Sopenharmony_ci		pgdat->kswapd_highest_zoneidx = 0;
9108c2ecf20Sopenharmony_ci		for_each_online_cpu(cpu) {
9118c2ecf20Sopenharmony_ci			struct per_cpu_nodestat *p;
9128c2ecf20Sopenharmony_ci
9138c2ecf20Sopenharmony_ci			p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
9148c2ecf20Sopenharmony_ci			memset(p, 0, sizeof(*p));
9158c2ecf20Sopenharmony_ci		}
9168c2ecf20Sopenharmony_ci	}
9178c2ecf20Sopenharmony_ci
9188c2ecf20Sopenharmony_ci	/* we can use NODE_DATA(nid) from here */
9198c2ecf20Sopenharmony_ci	pgdat->node_id = nid;
9208c2ecf20Sopenharmony_ci	pgdat->node_start_pfn = 0;
9218c2ecf20Sopenharmony_ci
9228c2ecf20Sopenharmony_ci	/* init node's zones as empty zones, we don't have any present pages.*/
9238c2ecf20Sopenharmony_ci	free_area_init_core_hotplug(nid);
9248c2ecf20Sopenharmony_ci
9258c2ecf20Sopenharmony_ci	/*
9268c2ecf20Sopenharmony_ci	 * The node we allocated has no zone fallback lists. For avoiding
9278c2ecf20Sopenharmony_ci	 * to access not-initialized zonelist, build here.
9288c2ecf20Sopenharmony_ci	 */
9298c2ecf20Sopenharmony_ci	build_all_zonelists(pgdat);
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_ci	/*
9328c2ecf20Sopenharmony_ci	 * When memory is hot-added, all the memory is in offline state. So
9338c2ecf20Sopenharmony_ci	 * clear all zones' present_pages because they will be updated in
9348c2ecf20Sopenharmony_ci	 * online_pages() and offline_pages().
9358c2ecf20Sopenharmony_ci	 */
9368c2ecf20Sopenharmony_ci	reset_node_managed_pages(pgdat);
9378c2ecf20Sopenharmony_ci	reset_node_present_pages(pgdat);
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ci	return pgdat;
9408c2ecf20Sopenharmony_ci}
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_cistatic void rollback_node_hotadd(int nid)
9438c2ecf20Sopenharmony_ci{
9448c2ecf20Sopenharmony_ci	pg_data_t *pgdat = NODE_DATA(nid);
9458c2ecf20Sopenharmony_ci
9468c2ecf20Sopenharmony_ci	arch_refresh_nodedata(nid, NULL);
9478c2ecf20Sopenharmony_ci	free_percpu(pgdat->per_cpu_nodestats);
9488c2ecf20Sopenharmony_ci	arch_free_nodedata(pgdat);
9498c2ecf20Sopenharmony_ci}
9508c2ecf20Sopenharmony_ci
9518c2ecf20Sopenharmony_ci
9528c2ecf20Sopenharmony_ci/**
9538c2ecf20Sopenharmony_ci * try_online_node - online a node if offlined
9548c2ecf20Sopenharmony_ci * @nid: the node ID
9558c2ecf20Sopenharmony_ci * @set_node_online: Whether we want to online the node
9568c2ecf20Sopenharmony_ci * called by cpu_up() to online a node without onlined memory.
9578c2ecf20Sopenharmony_ci *
9588c2ecf20Sopenharmony_ci * Returns:
9598c2ecf20Sopenharmony_ci * 1 -> a new node has been allocated
9608c2ecf20Sopenharmony_ci * 0 -> the node is already online
9618c2ecf20Sopenharmony_ci * -ENOMEM -> the node could not be allocated
9628c2ecf20Sopenharmony_ci */
9638c2ecf20Sopenharmony_cistatic int __try_online_node(int nid, bool set_node_online)
9648c2ecf20Sopenharmony_ci{
9658c2ecf20Sopenharmony_ci	pg_data_t *pgdat;
9668c2ecf20Sopenharmony_ci	int ret = 1;
9678c2ecf20Sopenharmony_ci
9688c2ecf20Sopenharmony_ci	if (node_online(nid))
9698c2ecf20Sopenharmony_ci		return 0;
9708c2ecf20Sopenharmony_ci
9718c2ecf20Sopenharmony_ci	pgdat = hotadd_new_pgdat(nid);
9728c2ecf20Sopenharmony_ci	if (!pgdat) {
9738c2ecf20Sopenharmony_ci		pr_err("Cannot online node %d due to NULL pgdat\n", nid);
9748c2ecf20Sopenharmony_ci		ret = -ENOMEM;
9758c2ecf20Sopenharmony_ci		goto out;
9768c2ecf20Sopenharmony_ci	}
9778c2ecf20Sopenharmony_ci
9788c2ecf20Sopenharmony_ci	if (set_node_online) {
9798c2ecf20Sopenharmony_ci		node_set_online(nid);
9808c2ecf20Sopenharmony_ci		ret = register_one_node(nid);
9818c2ecf20Sopenharmony_ci		BUG_ON(ret);
9828c2ecf20Sopenharmony_ci	}
9838c2ecf20Sopenharmony_ciout:
9848c2ecf20Sopenharmony_ci	return ret;
9858c2ecf20Sopenharmony_ci}
9868c2ecf20Sopenharmony_ci
9878c2ecf20Sopenharmony_ci/*
9888c2ecf20Sopenharmony_ci * Users of this function always want to online/register the node
9898c2ecf20Sopenharmony_ci */
9908c2ecf20Sopenharmony_ciint try_online_node(int nid)
9918c2ecf20Sopenharmony_ci{
9928c2ecf20Sopenharmony_ci	int ret;
9938c2ecf20Sopenharmony_ci
9948c2ecf20Sopenharmony_ci	mem_hotplug_begin();
9958c2ecf20Sopenharmony_ci	ret =  __try_online_node(nid, true);
9968c2ecf20Sopenharmony_ci	mem_hotplug_done();
9978c2ecf20Sopenharmony_ci	return ret;
9988c2ecf20Sopenharmony_ci}
9998c2ecf20Sopenharmony_ci
10008c2ecf20Sopenharmony_cistatic int check_hotplug_memory_range(u64 start, u64 size)
10018c2ecf20Sopenharmony_ci{
10028c2ecf20Sopenharmony_ci	/* memory range must be block size aligned */
10038c2ecf20Sopenharmony_ci	if (!size || !IS_ALIGNED(start, memory_block_size_bytes()) ||
10048c2ecf20Sopenharmony_ci	    !IS_ALIGNED(size, memory_block_size_bytes())) {
10058c2ecf20Sopenharmony_ci		pr_err("Block size [%#lx] unaligned hotplug range: start %#llx, size %#llx",
10068c2ecf20Sopenharmony_ci		       memory_block_size_bytes(), start, size);
10078c2ecf20Sopenharmony_ci		return -EINVAL;
10088c2ecf20Sopenharmony_ci	}
10098c2ecf20Sopenharmony_ci
10108c2ecf20Sopenharmony_ci	return 0;
10118c2ecf20Sopenharmony_ci}
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_cistatic int online_memory_block(struct memory_block *mem, void *arg)
10148c2ecf20Sopenharmony_ci{
10158c2ecf20Sopenharmony_ci	mem->online_type = memhp_default_online_type;
10168c2ecf20Sopenharmony_ci	return device_online(&mem->dev);
10178c2ecf20Sopenharmony_ci}
10188c2ecf20Sopenharmony_ci
10198c2ecf20Sopenharmony_ci/*
10208c2ecf20Sopenharmony_ci * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
10218c2ecf20Sopenharmony_ci * and online/offline operations (triggered e.g. by sysfs).
10228c2ecf20Sopenharmony_ci *
10238c2ecf20Sopenharmony_ci * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
10248c2ecf20Sopenharmony_ci */
10258c2ecf20Sopenharmony_ciint __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
10268c2ecf20Sopenharmony_ci{
10278c2ecf20Sopenharmony_ci	struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
10288c2ecf20Sopenharmony_ci	u64 start, size;
10298c2ecf20Sopenharmony_ci	bool new_node = false;
10308c2ecf20Sopenharmony_ci	int ret;
10318c2ecf20Sopenharmony_ci
10328c2ecf20Sopenharmony_ci	start = res->start;
10338c2ecf20Sopenharmony_ci	size = resource_size(res);
10348c2ecf20Sopenharmony_ci
10358c2ecf20Sopenharmony_ci	ret = check_hotplug_memory_range(start, size);
10368c2ecf20Sopenharmony_ci	if (ret)
10378c2ecf20Sopenharmony_ci		return ret;
10388c2ecf20Sopenharmony_ci
10398c2ecf20Sopenharmony_ci	if (!node_possible(nid)) {
10408c2ecf20Sopenharmony_ci		WARN(1, "node %d was absent from the node_possible_map\n", nid);
10418c2ecf20Sopenharmony_ci		return -EINVAL;
10428c2ecf20Sopenharmony_ci	}
10438c2ecf20Sopenharmony_ci
10448c2ecf20Sopenharmony_ci	mem_hotplug_begin();
10458c2ecf20Sopenharmony_ci
10468c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
10478c2ecf20Sopenharmony_ci		memblock_add_node(start, size, nid);
10488c2ecf20Sopenharmony_ci
10498c2ecf20Sopenharmony_ci	ret = __try_online_node(nid, false);
10508c2ecf20Sopenharmony_ci	if (ret < 0)
10518c2ecf20Sopenharmony_ci		goto error;
10528c2ecf20Sopenharmony_ci	new_node = ret;
10538c2ecf20Sopenharmony_ci
10548c2ecf20Sopenharmony_ci	/* call arch's memory hotadd */
10558c2ecf20Sopenharmony_ci	ret = arch_add_memory(nid, start, size, &params);
10568c2ecf20Sopenharmony_ci	if (ret < 0)
10578c2ecf20Sopenharmony_ci		goto error;
10588c2ecf20Sopenharmony_ci
10598c2ecf20Sopenharmony_ci	/* create memory block devices after memory was added */
10608c2ecf20Sopenharmony_ci	ret = create_memory_block_devices(start, size);
10618c2ecf20Sopenharmony_ci	if (ret) {
10628c2ecf20Sopenharmony_ci		arch_remove_memory(nid, start, size, NULL);
10638c2ecf20Sopenharmony_ci		goto error;
10648c2ecf20Sopenharmony_ci	}
10658c2ecf20Sopenharmony_ci
10668c2ecf20Sopenharmony_ci	if (new_node) {
10678c2ecf20Sopenharmony_ci		/* If sysfs file of new node can't be created, cpu on the node
10688c2ecf20Sopenharmony_ci		 * can't be hot-added. There is no rollback way now.
10698c2ecf20Sopenharmony_ci		 * So, check by BUG_ON() to catch it reluctantly..
10708c2ecf20Sopenharmony_ci		 * We online node here. We can't roll back from here.
10718c2ecf20Sopenharmony_ci		 */
10728c2ecf20Sopenharmony_ci		node_set_online(nid);
10738c2ecf20Sopenharmony_ci		ret = __register_one_node(nid);
10748c2ecf20Sopenharmony_ci		BUG_ON(ret);
10758c2ecf20Sopenharmony_ci	}
10768c2ecf20Sopenharmony_ci
10778c2ecf20Sopenharmony_ci	/* link memory sections under this node.*/
10788c2ecf20Sopenharmony_ci	link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
10798c2ecf20Sopenharmony_ci			  MEMINIT_HOTPLUG);
10808c2ecf20Sopenharmony_ci
10818c2ecf20Sopenharmony_ci	/* create new memmap entry */
10828c2ecf20Sopenharmony_ci	if (!strcmp(res->name, "System RAM"))
10838c2ecf20Sopenharmony_ci		firmware_map_add_hotplug(start, start + size, "System RAM");
10848c2ecf20Sopenharmony_ci
10858c2ecf20Sopenharmony_ci	/* device_online() will take the lock when calling online_pages() */
10868c2ecf20Sopenharmony_ci	mem_hotplug_done();
10878c2ecf20Sopenharmony_ci
10888c2ecf20Sopenharmony_ci	/*
10898c2ecf20Sopenharmony_ci	 * In case we're allowed to merge the resource, flag it and trigger
10908c2ecf20Sopenharmony_ci	 * merging now that adding succeeded.
10918c2ecf20Sopenharmony_ci	 */
10928c2ecf20Sopenharmony_ci	if (mhp_flags & MEMHP_MERGE_RESOURCE)
10938c2ecf20Sopenharmony_ci		merge_system_ram_resource(res);
10948c2ecf20Sopenharmony_ci
10958c2ecf20Sopenharmony_ci	/* online pages if requested */
10968c2ecf20Sopenharmony_ci	if (memhp_default_online_type != MMOP_OFFLINE)
10978c2ecf20Sopenharmony_ci		walk_memory_blocks(start, size, NULL, online_memory_block);
10988c2ecf20Sopenharmony_ci
10998c2ecf20Sopenharmony_ci	return ret;
11008c2ecf20Sopenharmony_cierror:
11018c2ecf20Sopenharmony_ci	/* rollback pgdat allocation and others */
11028c2ecf20Sopenharmony_ci	if (new_node)
11038c2ecf20Sopenharmony_ci		rollback_node_hotadd(nid);
11048c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
11058c2ecf20Sopenharmony_ci		memblock_remove(start, size);
11068c2ecf20Sopenharmony_ci	mem_hotplug_done();
11078c2ecf20Sopenharmony_ci	return ret;
11088c2ecf20Sopenharmony_ci}
11098c2ecf20Sopenharmony_ci
11108c2ecf20Sopenharmony_ci/* requires device_hotplug_lock, see add_memory_resource() */
11118c2ecf20Sopenharmony_ciint __ref __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags)
11128c2ecf20Sopenharmony_ci{
11138c2ecf20Sopenharmony_ci	struct resource *res;
11148c2ecf20Sopenharmony_ci	int ret;
11158c2ecf20Sopenharmony_ci
11168c2ecf20Sopenharmony_ci	res = register_memory_resource(start, size, "System RAM");
11178c2ecf20Sopenharmony_ci	if (IS_ERR(res))
11188c2ecf20Sopenharmony_ci		return PTR_ERR(res);
11198c2ecf20Sopenharmony_ci
11208c2ecf20Sopenharmony_ci	ret = add_memory_resource(nid, res, mhp_flags);
11218c2ecf20Sopenharmony_ci	if (ret < 0)
11228c2ecf20Sopenharmony_ci		release_memory_resource(res);
11238c2ecf20Sopenharmony_ci	return ret;
11248c2ecf20Sopenharmony_ci}
11258c2ecf20Sopenharmony_ci
11268c2ecf20Sopenharmony_ciint add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags)
11278c2ecf20Sopenharmony_ci{
11288c2ecf20Sopenharmony_ci	int rc;
11298c2ecf20Sopenharmony_ci
11308c2ecf20Sopenharmony_ci	lock_device_hotplug();
11318c2ecf20Sopenharmony_ci	rc = __add_memory(nid, start, size, mhp_flags);
11328c2ecf20Sopenharmony_ci	unlock_device_hotplug();
11338c2ecf20Sopenharmony_ci
11348c2ecf20Sopenharmony_ci	return rc;
11358c2ecf20Sopenharmony_ci}
11368c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(add_memory);
11378c2ecf20Sopenharmony_ci
11388c2ecf20Sopenharmony_ci/*
11398c2ecf20Sopenharmony_ci * Add special, driver-managed memory to the system as system RAM. Such
11408c2ecf20Sopenharmony_ci * memory is not exposed via the raw firmware-provided memmap as system
11418c2ecf20Sopenharmony_ci * RAM, instead, it is detected and added by a driver - during cold boot,
11428c2ecf20Sopenharmony_ci * after a reboot, and after kexec.
11438c2ecf20Sopenharmony_ci *
11448c2ecf20Sopenharmony_ci * Reasons why this memory should not be used for the initial memmap of a
11458c2ecf20Sopenharmony_ci * kexec kernel or for placing kexec images:
11468c2ecf20Sopenharmony_ci * - The booting kernel is in charge of determining how this memory will be
11478c2ecf20Sopenharmony_ci *   used (e.g., use persistent memory as system RAM)
11488c2ecf20Sopenharmony_ci * - Coordination with a hypervisor is required before this memory
11498c2ecf20Sopenharmony_ci *   can be used (e.g., inaccessible parts).
11508c2ecf20Sopenharmony_ci *
11518c2ecf20Sopenharmony_ci * For this memory, no entries in /sys/firmware/memmap ("raw firmware-provided
11528c2ecf20Sopenharmony_ci * memory map") are created. Also, the created memory resource is flagged
11538c2ecf20Sopenharmony_ci * with IORESOURCE_SYSRAM_DRIVER_MANAGED, so in-kernel users can special-case
11548c2ecf20Sopenharmony_ci * this memory as well (esp., not place kexec images onto it).
11558c2ecf20Sopenharmony_ci *
11568c2ecf20Sopenharmony_ci * The resource_name (visible via /proc/iomem) has to have the format
11578c2ecf20Sopenharmony_ci * "System RAM ($DRIVER)".
11588c2ecf20Sopenharmony_ci */
11598c2ecf20Sopenharmony_ciint add_memory_driver_managed(int nid, u64 start, u64 size,
11608c2ecf20Sopenharmony_ci			      const char *resource_name, mhp_t mhp_flags)
11618c2ecf20Sopenharmony_ci{
11628c2ecf20Sopenharmony_ci	struct resource *res;
11638c2ecf20Sopenharmony_ci	int rc;
11648c2ecf20Sopenharmony_ci
11658c2ecf20Sopenharmony_ci	if (!resource_name ||
11668c2ecf20Sopenharmony_ci	    strstr(resource_name, "System RAM (") != resource_name ||
11678c2ecf20Sopenharmony_ci	    resource_name[strlen(resource_name) - 1] != ')')
11688c2ecf20Sopenharmony_ci		return -EINVAL;
11698c2ecf20Sopenharmony_ci
11708c2ecf20Sopenharmony_ci	lock_device_hotplug();
11718c2ecf20Sopenharmony_ci
11728c2ecf20Sopenharmony_ci	res = register_memory_resource(start, size, resource_name);
11738c2ecf20Sopenharmony_ci	if (IS_ERR(res)) {
11748c2ecf20Sopenharmony_ci		rc = PTR_ERR(res);
11758c2ecf20Sopenharmony_ci		goto out_unlock;
11768c2ecf20Sopenharmony_ci	}
11778c2ecf20Sopenharmony_ci
11788c2ecf20Sopenharmony_ci	rc = add_memory_resource(nid, res, mhp_flags);
11798c2ecf20Sopenharmony_ci	if (rc < 0)
11808c2ecf20Sopenharmony_ci		release_memory_resource(res);
11818c2ecf20Sopenharmony_ci
11828c2ecf20Sopenharmony_ciout_unlock:
11838c2ecf20Sopenharmony_ci	unlock_device_hotplug();
11848c2ecf20Sopenharmony_ci	return rc;
11858c2ecf20Sopenharmony_ci}
11868c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(add_memory_driver_managed);
11878c2ecf20Sopenharmony_ci
11888c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTREMOVE
11898c2ecf20Sopenharmony_ci/*
11908c2ecf20Sopenharmony_ci * Confirm all pages in a range [start, end) belong to the same zone (skipping
11918c2ecf20Sopenharmony_ci * memory holes). When true, return the zone.
11928c2ecf20Sopenharmony_ci */
11938c2ecf20Sopenharmony_cistruct zone *test_pages_in_a_zone(unsigned long start_pfn,
11948c2ecf20Sopenharmony_ci				  unsigned long end_pfn)
11958c2ecf20Sopenharmony_ci{
11968c2ecf20Sopenharmony_ci	unsigned long pfn, sec_end_pfn;
11978c2ecf20Sopenharmony_ci	struct zone *zone = NULL;
11988c2ecf20Sopenharmony_ci	struct page *page;
11998c2ecf20Sopenharmony_ci	int i;
12008c2ecf20Sopenharmony_ci	for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
12018c2ecf20Sopenharmony_ci	     pfn < end_pfn;
12028c2ecf20Sopenharmony_ci	     pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
12038c2ecf20Sopenharmony_ci		/* Make sure the memory section is present first */
12048c2ecf20Sopenharmony_ci		if (!present_section_nr(pfn_to_section_nr(pfn)))
12058c2ecf20Sopenharmony_ci			continue;
12068c2ecf20Sopenharmony_ci		for (; pfn < sec_end_pfn && pfn < end_pfn;
12078c2ecf20Sopenharmony_ci		     pfn += MAX_ORDER_NR_PAGES) {
12088c2ecf20Sopenharmony_ci			i = 0;
12098c2ecf20Sopenharmony_ci			/* This is just a CONFIG_HOLES_IN_ZONE check.*/
12108c2ecf20Sopenharmony_ci			while ((i < MAX_ORDER_NR_PAGES) &&
12118c2ecf20Sopenharmony_ci				!pfn_valid_within(pfn + i))
12128c2ecf20Sopenharmony_ci				i++;
12138c2ecf20Sopenharmony_ci			if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
12148c2ecf20Sopenharmony_ci				continue;
12158c2ecf20Sopenharmony_ci			/* Check if we got outside of the zone */
12168c2ecf20Sopenharmony_ci			if (zone && !zone_spans_pfn(zone, pfn + i))
12178c2ecf20Sopenharmony_ci				return NULL;
12188c2ecf20Sopenharmony_ci			page = pfn_to_page(pfn + i);
12198c2ecf20Sopenharmony_ci			if (zone && page_zone(page) != zone)
12208c2ecf20Sopenharmony_ci				return NULL;
12218c2ecf20Sopenharmony_ci			zone = page_zone(page);
12228c2ecf20Sopenharmony_ci		}
12238c2ecf20Sopenharmony_ci	}
12248c2ecf20Sopenharmony_ci
12258c2ecf20Sopenharmony_ci	return zone;
12268c2ecf20Sopenharmony_ci}
12278c2ecf20Sopenharmony_ci
12288c2ecf20Sopenharmony_ci/*
12298c2ecf20Sopenharmony_ci * Scan pfn range [start,end) to find movable/migratable pages (LRU pages,
12308c2ecf20Sopenharmony_ci * non-lru movable pages and hugepages). Will skip over most unmovable
12318c2ecf20Sopenharmony_ci * pages (esp., pages that can be skipped when offlining), but bail out on
12328c2ecf20Sopenharmony_ci * definitely unmovable pages.
12338c2ecf20Sopenharmony_ci *
12348c2ecf20Sopenharmony_ci * Returns:
12358c2ecf20Sopenharmony_ci *	0 in case a movable page is found and movable_pfn was updated.
12368c2ecf20Sopenharmony_ci *	-ENOENT in case no movable page was found.
12378c2ecf20Sopenharmony_ci *	-EBUSY in case a definitely unmovable page was found.
12388c2ecf20Sopenharmony_ci */
12398c2ecf20Sopenharmony_cistatic int scan_movable_pages(unsigned long start, unsigned long end,
12408c2ecf20Sopenharmony_ci			      unsigned long *movable_pfn)
12418c2ecf20Sopenharmony_ci{
12428c2ecf20Sopenharmony_ci	unsigned long pfn;
12438c2ecf20Sopenharmony_ci
12448c2ecf20Sopenharmony_ci	for (pfn = start; pfn < end; pfn++) {
12458c2ecf20Sopenharmony_ci		struct page *page, *head;
12468c2ecf20Sopenharmony_ci		unsigned long skip;
12478c2ecf20Sopenharmony_ci
12488c2ecf20Sopenharmony_ci		if (!pfn_valid(pfn))
12498c2ecf20Sopenharmony_ci			continue;
12508c2ecf20Sopenharmony_ci		page = pfn_to_page(pfn);
12518c2ecf20Sopenharmony_ci		if (PageLRU(page))
12528c2ecf20Sopenharmony_ci			goto found;
12538c2ecf20Sopenharmony_ci		if (__PageMovable(page))
12548c2ecf20Sopenharmony_ci			goto found;
12558c2ecf20Sopenharmony_ci
12568c2ecf20Sopenharmony_ci		/*
12578c2ecf20Sopenharmony_ci		 * PageOffline() pages that are not marked __PageMovable() and
12588c2ecf20Sopenharmony_ci		 * have a reference count > 0 (after MEM_GOING_OFFLINE) are
12598c2ecf20Sopenharmony_ci		 * definitely unmovable. If their reference count would be 0,
12608c2ecf20Sopenharmony_ci		 * they could at least be skipped when offlining memory.
12618c2ecf20Sopenharmony_ci		 */
12628c2ecf20Sopenharmony_ci		if (PageOffline(page) && page_count(page))
12638c2ecf20Sopenharmony_ci			return -EBUSY;
12648c2ecf20Sopenharmony_ci
12658c2ecf20Sopenharmony_ci		if (!PageHuge(page))
12668c2ecf20Sopenharmony_ci			continue;
12678c2ecf20Sopenharmony_ci		head = compound_head(page);
12688c2ecf20Sopenharmony_ci		if (page_huge_active(head))
12698c2ecf20Sopenharmony_ci			goto found;
12708c2ecf20Sopenharmony_ci		skip = compound_nr(head) - (pfn - page_to_pfn(head));
12718c2ecf20Sopenharmony_ci		pfn += skip - 1;
12728c2ecf20Sopenharmony_ci	}
12738c2ecf20Sopenharmony_ci	return -ENOENT;
12748c2ecf20Sopenharmony_cifound:
12758c2ecf20Sopenharmony_ci	*movable_pfn = pfn;
12768c2ecf20Sopenharmony_ci	return 0;
12778c2ecf20Sopenharmony_ci}
12788c2ecf20Sopenharmony_ci
12798c2ecf20Sopenharmony_cistatic int
12808c2ecf20Sopenharmony_cido_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
12818c2ecf20Sopenharmony_ci{
12828c2ecf20Sopenharmony_ci	unsigned long pfn;
12838c2ecf20Sopenharmony_ci	struct page *page, *head;
12848c2ecf20Sopenharmony_ci	int ret = 0;
12858c2ecf20Sopenharmony_ci	LIST_HEAD(source);
12868c2ecf20Sopenharmony_ci	static DEFINE_RATELIMIT_STATE(migrate_rs, DEFAULT_RATELIMIT_INTERVAL,
12878c2ecf20Sopenharmony_ci				      DEFAULT_RATELIMIT_BURST);
12888c2ecf20Sopenharmony_ci
12898c2ecf20Sopenharmony_ci	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
12908c2ecf20Sopenharmony_ci		if (!pfn_valid(pfn))
12918c2ecf20Sopenharmony_ci			continue;
12928c2ecf20Sopenharmony_ci		page = pfn_to_page(pfn);
12938c2ecf20Sopenharmony_ci		head = compound_head(page);
12948c2ecf20Sopenharmony_ci
12958c2ecf20Sopenharmony_ci		if (PageHuge(page)) {
12968c2ecf20Sopenharmony_ci			pfn = page_to_pfn(head) + compound_nr(head) - 1;
12978c2ecf20Sopenharmony_ci			isolate_hugetlb(head, &source);
12988c2ecf20Sopenharmony_ci			continue;
12998c2ecf20Sopenharmony_ci		} else if (PageTransHuge(page))
13008c2ecf20Sopenharmony_ci			pfn = page_to_pfn(head) + thp_nr_pages(page) - 1;
13018c2ecf20Sopenharmony_ci
13028c2ecf20Sopenharmony_ci		/*
13038c2ecf20Sopenharmony_ci		 * HWPoison pages have elevated reference counts so the migration would
13048c2ecf20Sopenharmony_ci		 * fail on them. It also doesn't make any sense to migrate them in the
13058c2ecf20Sopenharmony_ci		 * first place. Still try to unmap such a page in case it is still mapped
13068c2ecf20Sopenharmony_ci		 * (e.g. current hwpoison implementation doesn't unmap KSM pages but keep
13078c2ecf20Sopenharmony_ci		 * the unmap as the catch all safety net).
13088c2ecf20Sopenharmony_ci		 */
13098c2ecf20Sopenharmony_ci		if (PageHWPoison(page)) {
13108c2ecf20Sopenharmony_ci			if (WARN_ON(PageLRU(page)))
13118c2ecf20Sopenharmony_ci				isolate_lru_page(page);
13128c2ecf20Sopenharmony_ci			if (page_mapped(page))
13138c2ecf20Sopenharmony_ci				try_to_unmap(page, TTU_IGNORE_MLOCK);
13148c2ecf20Sopenharmony_ci			continue;
13158c2ecf20Sopenharmony_ci		}
13168c2ecf20Sopenharmony_ci
13178c2ecf20Sopenharmony_ci		if (!get_page_unless_zero(page))
13188c2ecf20Sopenharmony_ci			continue;
13198c2ecf20Sopenharmony_ci		/*
13208c2ecf20Sopenharmony_ci		 * We can skip free pages. And we can deal with pages on
13218c2ecf20Sopenharmony_ci		 * LRU and non-lru movable pages.
13228c2ecf20Sopenharmony_ci		 */
13238c2ecf20Sopenharmony_ci		if (PageLRU(page))
13248c2ecf20Sopenharmony_ci			ret = isolate_lru_page(page);
13258c2ecf20Sopenharmony_ci		else
13268c2ecf20Sopenharmony_ci			ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
13278c2ecf20Sopenharmony_ci		if (!ret) { /* Success */
13288c2ecf20Sopenharmony_ci			list_add_tail(&page->lru, &source);
13298c2ecf20Sopenharmony_ci			if (!__PageMovable(page))
13308c2ecf20Sopenharmony_ci				inc_node_page_state(page, NR_ISOLATED_ANON +
13318c2ecf20Sopenharmony_ci						    page_is_file_lru(page));
13328c2ecf20Sopenharmony_ci
13338c2ecf20Sopenharmony_ci		} else {
13348c2ecf20Sopenharmony_ci			if (__ratelimit(&migrate_rs)) {
13358c2ecf20Sopenharmony_ci				pr_warn("failed to isolate pfn %lx\n", pfn);
13368c2ecf20Sopenharmony_ci				dump_page(page, "isolation failed");
13378c2ecf20Sopenharmony_ci			}
13388c2ecf20Sopenharmony_ci		}
13398c2ecf20Sopenharmony_ci		put_page(page);
13408c2ecf20Sopenharmony_ci	}
13418c2ecf20Sopenharmony_ci	if (!list_empty(&source)) {
13428c2ecf20Sopenharmony_ci		nodemask_t nmask = node_states[N_MEMORY];
13438c2ecf20Sopenharmony_ci		struct migration_target_control mtc = {
13448c2ecf20Sopenharmony_ci			.nmask = &nmask,
13458c2ecf20Sopenharmony_ci			.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
13468c2ecf20Sopenharmony_ci		};
13478c2ecf20Sopenharmony_ci
13488c2ecf20Sopenharmony_ci		/*
13498c2ecf20Sopenharmony_ci		 * We have checked that migration range is on a single zone so
13508c2ecf20Sopenharmony_ci		 * we can use the nid of the first page to all the others.
13518c2ecf20Sopenharmony_ci		 */
13528c2ecf20Sopenharmony_ci		mtc.nid = page_to_nid(list_first_entry(&source, struct page, lru));
13538c2ecf20Sopenharmony_ci
13548c2ecf20Sopenharmony_ci		/*
13558c2ecf20Sopenharmony_ci		 * try to allocate from a different node but reuse this node
13568c2ecf20Sopenharmony_ci		 * if there are no other online nodes to be used (e.g. we are
13578c2ecf20Sopenharmony_ci		 * offlining a part of the only existing node)
13588c2ecf20Sopenharmony_ci		 */
13598c2ecf20Sopenharmony_ci		node_clear(mtc.nid, nmask);
13608c2ecf20Sopenharmony_ci		if (nodes_empty(nmask))
13618c2ecf20Sopenharmony_ci			node_set(mtc.nid, nmask);
13628c2ecf20Sopenharmony_ci		ret = migrate_pages(&source, alloc_migration_target, NULL,
13638c2ecf20Sopenharmony_ci			(unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
13648c2ecf20Sopenharmony_ci		if (ret) {
13658c2ecf20Sopenharmony_ci			list_for_each_entry(page, &source, lru) {
13668c2ecf20Sopenharmony_ci				if (__ratelimit(&migrate_rs)) {
13678c2ecf20Sopenharmony_ci					pr_warn("migrating pfn %lx failed ret:%d\n",
13688c2ecf20Sopenharmony_ci						page_to_pfn(page), ret);
13698c2ecf20Sopenharmony_ci					dump_page(page, "migration failure");
13708c2ecf20Sopenharmony_ci				}
13718c2ecf20Sopenharmony_ci			}
13728c2ecf20Sopenharmony_ci			putback_movable_pages(&source);
13738c2ecf20Sopenharmony_ci		}
13748c2ecf20Sopenharmony_ci	}
13758c2ecf20Sopenharmony_ci
13768c2ecf20Sopenharmony_ci	return ret;
13778c2ecf20Sopenharmony_ci}
13788c2ecf20Sopenharmony_ci
13798c2ecf20Sopenharmony_cistatic int __init cmdline_parse_movable_node(char *p)
13808c2ecf20Sopenharmony_ci{
13818c2ecf20Sopenharmony_ci	movable_node_enabled = true;
13828c2ecf20Sopenharmony_ci	return 0;
13838c2ecf20Sopenharmony_ci}
13848c2ecf20Sopenharmony_ciearly_param("movable_node", cmdline_parse_movable_node);
13858c2ecf20Sopenharmony_ci
13868c2ecf20Sopenharmony_ci/* check which state of node_states will be changed when offline memory */
13878c2ecf20Sopenharmony_cistatic void node_states_check_changes_offline(unsigned long nr_pages,
13888c2ecf20Sopenharmony_ci		struct zone *zone, struct memory_notify *arg)
13898c2ecf20Sopenharmony_ci{
13908c2ecf20Sopenharmony_ci	struct pglist_data *pgdat = zone->zone_pgdat;
13918c2ecf20Sopenharmony_ci	unsigned long present_pages = 0;
13928c2ecf20Sopenharmony_ci	enum zone_type zt;
13938c2ecf20Sopenharmony_ci
13948c2ecf20Sopenharmony_ci	arg->status_change_nid = NUMA_NO_NODE;
13958c2ecf20Sopenharmony_ci	arg->status_change_nid_normal = NUMA_NO_NODE;
13968c2ecf20Sopenharmony_ci	arg->status_change_nid_high = NUMA_NO_NODE;
13978c2ecf20Sopenharmony_ci
13988c2ecf20Sopenharmony_ci	/*
13998c2ecf20Sopenharmony_ci	 * Check whether node_states[N_NORMAL_MEMORY] will be changed.
14008c2ecf20Sopenharmony_ci	 * If the memory to be offline is within the range
14018c2ecf20Sopenharmony_ci	 * [0..ZONE_NORMAL], and it is the last present memory there,
14028c2ecf20Sopenharmony_ci	 * the zones in that range will become empty after the offlining,
14038c2ecf20Sopenharmony_ci	 * thus we can determine that we need to clear the node from
14048c2ecf20Sopenharmony_ci	 * node_states[N_NORMAL_MEMORY].
14058c2ecf20Sopenharmony_ci	 */
14068c2ecf20Sopenharmony_ci	for (zt = 0; zt <= ZONE_NORMAL; zt++)
14078c2ecf20Sopenharmony_ci		present_pages += pgdat->node_zones[zt].present_pages;
14088c2ecf20Sopenharmony_ci	if (zone_idx(zone) <= ZONE_NORMAL && nr_pages >= present_pages)
14098c2ecf20Sopenharmony_ci		arg->status_change_nid_normal = zone_to_nid(zone);
14108c2ecf20Sopenharmony_ci
14118c2ecf20Sopenharmony_ci#ifdef CONFIG_HIGHMEM
14128c2ecf20Sopenharmony_ci	/*
14138c2ecf20Sopenharmony_ci	 * node_states[N_HIGH_MEMORY] contains nodes which
14148c2ecf20Sopenharmony_ci	 * have normal memory or high memory.
14158c2ecf20Sopenharmony_ci	 * Here we add the present_pages belonging to ZONE_HIGHMEM.
14168c2ecf20Sopenharmony_ci	 * If the zone is within the range of [0..ZONE_HIGHMEM), and
14178c2ecf20Sopenharmony_ci	 * we determine that the zones in that range become empty,
14188c2ecf20Sopenharmony_ci	 * we need to clear the node for N_HIGH_MEMORY.
14198c2ecf20Sopenharmony_ci	 */
14208c2ecf20Sopenharmony_ci	present_pages += pgdat->node_zones[ZONE_HIGHMEM].present_pages;
14218c2ecf20Sopenharmony_ci	if (zone_idx(zone) <= ZONE_HIGHMEM && nr_pages >= present_pages)
14228c2ecf20Sopenharmony_ci		arg->status_change_nid_high = zone_to_nid(zone);
14238c2ecf20Sopenharmony_ci#endif
14248c2ecf20Sopenharmony_ci
14258c2ecf20Sopenharmony_ci	/*
14268c2ecf20Sopenharmony_ci	 * We have accounted the pages from [0..ZONE_NORMAL), and
14278c2ecf20Sopenharmony_ci	 * in case of CONFIG_HIGHMEM the pages from ZONE_HIGHMEM
14288c2ecf20Sopenharmony_ci	 * as well.
14298c2ecf20Sopenharmony_ci	 * Here we count the possible pages from ZONE_MOVABLE.
14308c2ecf20Sopenharmony_ci	 * If after having accounted all the pages, we see that the nr_pages
14318c2ecf20Sopenharmony_ci	 * to be offlined is over or equal to the accounted pages,
14328c2ecf20Sopenharmony_ci	 * we know that the node will become empty, and so, we can clear
14338c2ecf20Sopenharmony_ci	 * it for N_MEMORY as well.
14348c2ecf20Sopenharmony_ci	 */
14358c2ecf20Sopenharmony_ci	present_pages += pgdat->node_zones[ZONE_MOVABLE].present_pages;
14368c2ecf20Sopenharmony_ci
14378c2ecf20Sopenharmony_ci	if (nr_pages >= present_pages)
14388c2ecf20Sopenharmony_ci		arg->status_change_nid = zone_to_nid(zone);
14398c2ecf20Sopenharmony_ci}
14408c2ecf20Sopenharmony_ci
14418c2ecf20Sopenharmony_cistatic void node_states_clear_node(int node, struct memory_notify *arg)
14428c2ecf20Sopenharmony_ci{
14438c2ecf20Sopenharmony_ci	if (arg->status_change_nid_normal >= 0)
14448c2ecf20Sopenharmony_ci		node_clear_state(node, N_NORMAL_MEMORY);
14458c2ecf20Sopenharmony_ci
14468c2ecf20Sopenharmony_ci	if (arg->status_change_nid_high >= 0)
14478c2ecf20Sopenharmony_ci		node_clear_state(node, N_HIGH_MEMORY);
14488c2ecf20Sopenharmony_ci
14498c2ecf20Sopenharmony_ci	if (arg->status_change_nid >= 0)
14508c2ecf20Sopenharmony_ci		node_clear_state(node, N_MEMORY);
14518c2ecf20Sopenharmony_ci}
14528c2ecf20Sopenharmony_ci
14538c2ecf20Sopenharmony_cistatic int count_system_ram_pages_cb(unsigned long start_pfn,
14548c2ecf20Sopenharmony_ci				     unsigned long nr_pages, void *data)
14558c2ecf20Sopenharmony_ci{
14568c2ecf20Sopenharmony_ci	unsigned long *nr_system_ram_pages = data;
14578c2ecf20Sopenharmony_ci
14588c2ecf20Sopenharmony_ci	*nr_system_ram_pages += nr_pages;
14598c2ecf20Sopenharmony_ci	return 0;
14608c2ecf20Sopenharmony_ci}
14618c2ecf20Sopenharmony_ci
14628c2ecf20Sopenharmony_ciint __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
14638c2ecf20Sopenharmony_ci{
14648c2ecf20Sopenharmony_ci	const unsigned long end_pfn = start_pfn + nr_pages;
14658c2ecf20Sopenharmony_ci	unsigned long pfn, system_ram_pages = 0;
14668c2ecf20Sopenharmony_ci	unsigned long flags;
14678c2ecf20Sopenharmony_ci	struct zone *zone;
14688c2ecf20Sopenharmony_ci	struct memory_notify arg;
14698c2ecf20Sopenharmony_ci	int ret, node;
14708c2ecf20Sopenharmony_ci	char *reason;
14718c2ecf20Sopenharmony_ci
14728c2ecf20Sopenharmony_ci	/* We can only offline full sections (e.g., SECTION_IS_ONLINE) */
14738c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(!nr_pages ||
14748c2ecf20Sopenharmony_ci			 !IS_ALIGNED(start_pfn | nr_pages, PAGES_PER_SECTION)))
14758c2ecf20Sopenharmony_ci		return -EINVAL;
14768c2ecf20Sopenharmony_ci
14778c2ecf20Sopenharmony_ci	mem_hotplug_begin();
14788c2ecf20Sopenharmony_ci
14798c2ecf20Sopenharmony_ci	/*
14808c2ecf20Sopenharmony_ci	 * Don't allow to offline memory blocks that contain holes.
14818c2ecf20Sopenharmony_ci	 * Consequently, memory blocks with holes can never get onlined
14828c2ecf20Sopenharmony_ci	 * via the hotplug path - online_pages() - as hotplugged memory has
14838c2ecf20Sopenharmony_ci	 * no holes. This way, we e.g., don't have to worry about marking
14848c2ecf20Sopenharmony_ci	 * memory holes PG_reserved, don't need pfn_valid() checks, and can
14858c2ecf20Sopenharmony_ci	 * avoid using walk_system_ram_range() later.
14868c2ecf20Sopenharmony_ci	 */
14878c2ecf20Sopenharmony_ci	walk_system_ram_range(start_pfn, nr_pages, &system_ram_pages,
14888c2ecf20Sopenharmony_ci			      count_system_ram_pages_cb);
14898c2ecf20Sopenharmony_ci	if (system_ram_pages != nr_pages) {
14908c2ecf20Sopenharmony_ci		ret = -EINVAL;
14918c2ecf20Sopenharmony_ci		reason = "memory holes";
14928c2ecf20Sopenharmony_ci		goto failed_removal;
14938c2ecf20Sopenharmony_ci	}
14948c2ecf20Sopenharmony_ci
14958c2ecf20Sopenharmony_ci	/* This makes hotplug much easier...and readable.
14968c2ecf20Sopenharmony_ci	   we assume this for now. .*/
14978c2ecf20Sopenharmony_ci	zone = test_pages_in_a_zone(start_pfn, end_pfn);
14988c2ecf20Sopenharmony_ci	if (!zone) {
14998c2ecf20Sopenharmony_ci		ret = -EINVAL;
15008c2ecf20Sopenharmony_ci		reason = "multizone range";
15018c2ecf20Sopenharmony_ci		goto failed_removal;
15028c2ecf20Sopenharmony_ci	}
15038c2ecf20Sopenharmony_ci	node = zone_to_nid(zone);
15048c2ecf20Sopenharmony_ci
15058c2ecf20Sopenharmony_ci	/* set above range as isolated */
15068c2ecf20Sopenharmony_ci	ret = start_isolate_page_range(start_pfn, end_pfn,
15078c2ecf20Sopenharmony_ci				       MIGRATE_MOVABLE,
15088c2ecf20Sopenharmony_ci				       MEMORY_OFFLINE | REPORT_FAILURE);
15098c2ecf20Sopenharmony_ci	if (ret) {
15108c2ecf20Sopenharmony_ci		reason = "failure to isolate range";
15118c2ecf20Sopenharmony_ci		goto failed_removal;
15128c2ecf20Sopenharmony_ci	}
15138c2ecf20Sopenharmony_ci
15148c2ecf20Sopenharmony_ci	arg.start_pfn = start_pfn;
15158c2ecf20Sopenharmony_ci	arg.nr_pages = nr_pages;
15168c2ecf20Sopenharmony_ci	node_states_check_changes_offline(nr_pages, zone, &arg);
15178c2ecf20Sopenharmony_ci
15188c2ecf20Sopenharmony_ci	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
15198c2ecf20Sopenharmony_ci	ret = notifier_to_errno(ret);
15208c2ecf20Sopenharmony_ci	if (ret) {
15218c2ecf20Sopenharmony_ci		reason = "notifier failure";
15228c2ecf20Sopenharmony_ci		goto failed_removal_isolated;
15238c2ecf20Sopenharmony_ci	}
15248c2ecf20Sopenharmony_ci
15258c2ecf20Sopenharmony_ci	do {
15268c2ecf20Sopenharmony_ci		pfn = start_pfn;
15278c2ecf20Sopenharmony_ci		do {
15288c2ecf20Sopenharmony_ci			if (signal_pending(current)) {
15298c2ecf20Sopenharmony_ci				ret = -EINTR;
15308c2ecf20Sopenharmony_ci				reason = "signal backoff";
15318c2ecf20Sopenharmony_ci				goto failed_removal_isolated;
15328c2ecf20Sopenharmony_ci			}
15338c2ecf20Sopenharmony_ci
15348c2ecf20Sopenharmony_ci			cond_resched();
15358c2ecf20Sopenharmony_ci			lru_add_drain_all();
15368c2ecf20Sopenharmony_ci
15378c2ecf20Sopenharmony_ci			ret = scan_movable_pages(pfn, end_pfn, &pfn);
15388c2ecf20Sopenharmony_ci			if (!ret) {
15398c2ecf20Sopenharmony_ci				/*
15408c2ecf20Sopenharmony_ci				 * TODO: fatal migration failures should bail
15418c2ecf20Sopenharmony_ci				 * out
15428c2ecf20Sopenharmony_ci				 */
15438c2ecf20Sopenharmony_ci				do_migrate_range(pfn, end_pfn);
15448c2ecf20Sopenharmony_ci			}
15458c2ecf20Sopenharmony_ci		} while (!ret);
15468c2ecf20Sopenharmony_ci
15478c2ecf20Sopenharmony_ci		if (ret != -ENOENT) {
15488c2ecf20Sopenharmony_ci			reason = "unmovable page";
15498c2ecf20Sopenharmony_ci			goto failed_removal_isolated;
15508c2ecf20Sopenharmony_ci		}
15518c2ecf20Sopenharmony_ci
15528c2ecf20Sopenharmony_ci		/*
15538c2ecf20Sopenharmony_ci		 * Dissolve free hugepages in the memory block before doing
15548c2ecf20Sopenharmony_ci		 * offlining actually in order to make hugetlbfs's object
15558c2ecf20Sopenharmony_ci		 * counting consistent.
15568c2ecf20Sopenharmony_ci		 */
15578c2ecf20Sopenharmony_ci		ret = dissolve_free_huge_pages(start_pfn, end_pfn);
15588c2ecf20Sopenharmony_ci		if (ret) {
15598c2ecf20Sopenharmony_ci			reason = "failure to dissolve huge pages";
15608c2ecf20Sopenharmony_ci			goto failed_removal_isolated;
15618c2ecf20Sopenharmony_ci		}
15628c2ecf20Sopenharmony_ci
15638c2ecf20Sopenharmony_ci		/*
15648c2ecf20Sopenharmony_ci		 * per-cpu pages are drained in start_isolate_page_range, but if
15658c2ecf20Sopenharmony_ci		 * there are still pages that are not free, make sure that we
15668c2ecf20Sopenharmony_ci		 * drain again, because when we isolated range we might
15678c2ecf20Sopenharmony_ci		 * have raced with another thread that was adding pages to pcp
15688c2ecf20Sopenharmony_ci		 * list.
15698c2ecf20Sopenharmony_ci		 *
15708c2ecf20Sopenharmony_ci		 * Forward progress should be still guaranteed because
15718c2ecf20Sopenharmony_ci		 * pages on the pcp list can only belong to MOVABLE_ZONE
15728c2ecf20Sopenharmony_ci		 * because has_unmovable_pages explicitly checks for
15738c2ecf20Sopenharmony_ci		 * PageBuddy on freed pages on other zones.
15748c2ecf20Sopenharmony_ci		 */
15758c2ecf20Sopenharmony_ci		ret = test_pages_isolated(start_pfn, end_pfn, MEMORY_OFFLINE);
15768c2ecf20Sopenharmony_ci		if (ret)
15778c2ecf20Sopenharmony_ci			drain_all_pages(zone);
15788c2ecf20Sopenharmony_ci	} while (ret);
15798c2ecf20Sopenharmony_ci
15808c2ecf20Sopenharmony_ci	/* Mark all sections offline and remove free pages from the buddy. */
15818c2ecf20Sopenharmony_ci	__offline_isolated_pages(start_pfn, end_pfn);
15828c2ecf20Sopenharmony_ci	pr_info("Offlined Pages %ld\n", nr_pages);
15838c2ecf20Sopenharmony_ci
15848c2ecf20Sopenharmony_ci	/*
15858c2ecf20Sopenharmony_ci	 * The memory sections are marked offline, and the pageblock flags
15868c2ecf20Sopenharmony_ci	 * effectively stale; nobody should be touching them. Fixup the number
15878c2ecf20Sopenharmony_ci	 * of isolated pageblocks, memory onlining will properly revert this.
15888c2ecf20Sopenharmony_ci	 */
15898c2ecf20Sopenharmony_ci	spin_lock_irqsave(&zone->lock, flags);
15908c2ecf20Sopenharmony_ci	zone->nr_isolate_pageblock -= nr_pages / pageblock_nr_pages;
15918c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&zone->lock, flags);
15928c2ecf20Sopenharmony_ci
15938c2ecf20Sopenharmony_ci	/* removal success */
15948c2ecf20Sopenharmony_ci	adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
15958c2ecf20Sopenharmony_ci	zone->present_pages -= nr_pages;
15968c2ecf20Sopenharmony_ci
15978c2ecf20Sopenharmony_ci	pgdat_resize_lock(zone->zone_pgdat, &flags);
15988c2ecf20Sopenharmony_ci	zone->zone_pgdat->node_present_pages -= nr_pages;
15998c2ecf20Sopenharmony_ci	pgdat_resize_unlock(zone->zone_pgdat, &flags);
16008c2ecf20Sopenharmony_ci
16018c2ecf20Sopenharmony_ci	init_per_zone_wmark_min();
16028c2ecf20Sopenharmony_ci
16038c2ecf20Sopenharmony_ci	if (!populated_zone(zone)) {
16048c2ecf20Sopenharmony_ci		zone_pcp_reset(zone);
16058c2ecf20Sopenharmony_ci		build_all_zonelists(NULL);
16068c2ecf20Sopenharmony_ci	} else
16078c2ecf20Sopenharmony_ci		zone_pcp_update(zone);
16088c2ecf20Sopenharmony_ci
16098c2ecf20Sopenharmony_ci	node_states_clear_node(node, &arg);
16108c2ecf20Sopenharmony_ci	if (arg.status_change_nid >= 0) {
16118c2ecf20Sopenharmony_ci		kswapd_stop(node);
16128c2ecf20Sopenharmony_ci		kcompactd_stop(node);
16138c2ecf20Sopenharmony_ci#ifdef CONFIG_HYPERHOLD_ZSWAPD
16148c2ecf20Sopenharmony_ci		zswapd_stop(node);
16158c2ecf20Sopenharmony_ci#endif
16168c2ecf20Sopenharmony_ci	}
16178c2ecf20Sopenharmony_ci
16188c2ecf20Sopenharmony_ci	writeback_set_ratelimit();
16198c2ecf20Sopenharmony_ci
16208c2ecf20Sopenharmony_ci	memory_notify(MEM_OFFLINE, &arg);
16218c2ecf20Sopenharmony_ci	remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
16228c2ecf20Sopenharmony_ci	mem_hotplug_done();
16238c2ecf20Sopenharmony_ci	return 0;
16248c2ecf20Sopenharmony_ci
16258c2ecf20Sopenharmony_cifailed_removal_isolated:
16268c2ecf20Sopenharmony_ci	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
16278c2ecf20Sopenharmony_ci	memory_notify(MEM_CANCEL_OFFLINE, &arg);
16288c2ecf20Sopenharmony_cifailed_removal:
16298c2ecf20Sopenharmony_ci	pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
16308c2ecf20Sopenharmony_ci		 (unsigned long long) start_pfn << PAGE_SHIFT,
16318c2ecf20Sopenharmony_ci		 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
16328c2ecf20Sopenharmony_ci		 reason);
16338c2ecf20Sopenharmony_ci	/* pushback to free area */
16348c2ecf20Sopenharmony_ci	mem_hotplug_done();
16358c2ecf20Sopenharmony_ci	return ret;
16368c2ecf20Sopenharmony_ci}
16378c2ecf20Sopenharmony_ci
16388c2ecf20Sopenharmony_cistatic int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
16398c2ecf20Sopenharmony_ci{
16408c2ecf20Sopenharmony_ci	int ret = !is_memblock_offlined(mem);
16418c2ecf20Sopenharmony_ci
16428c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
16438c2ecf20Sopenharmony_ci		phys_addr_t beginpa, endpa;
16448c2ecf20Sopenharmony_ci
16458c2ecf20Sopenharmony_ci		beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
16468c2ecf20Sopenharmony_ci		endpa = beginpa + memory_block_size_bytes() - 1;
16478c2ecf20Sopenharmony_ci		pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n",
16488c2ecf20Sopenharmony_ci			&beginpa, &endpa);
16498c2ecf20Sopenharmony_ci
16508c2ecf20Sopenharmony_ci		return -EBUSY;
16518c2ecf20Sopenharmony_ci	}
16528c2ecf20Sopenharmony_ci	return 0;
16538c2ecf20Sopenharmony_ci}
16548c2ecf20Sopenharmony_ci
16558c2ecf20Sopenharmony_cistatic int check_cpu_on_node(pg_data_t *pgdat)
16568c2ecf20Sopenharmony_ci{
16578c2ecf20Sopenharmony_ci	int cpu;
16588c2ecf20Sopenharmony_ci
16598c2ecf20Sopenharmony_ci	for_each_present_cpu(cpu) {
16608c2ecf20Sopenharmony_ci		if (cpu_to_node(cpu) == pgdat->node_id)
16618c2ecf20Sopenharmony_ci			/*
16628c2ecf20Sopenharmony_ci			 * the cpu on this node isn't removed, and we can't
16638c2ecf20Sopenharmony_ci			 * offline this node.
16648c2ecf20Sopenharmony_ci			 */
16658c2ecf20Sopenharmony_ci			return -EBUSY;
16668c2ecf20Sopenharmony_ci	}
16678c2ecf20Sopenharmony_ci
16688c2ecf20Sopenharmony_ci	return 0;
16698c2ecf20Sopenharmony_ci}
16708c2ecf20Sopenharmony_ci
16718c2ecf20Sopenharmony_cistatic int check_no_memblock_for_node_cb(struct memory_block *mem, void *arg)
16728c2ecf20Sopenharmony_ci{
16738c2ecf20Sopenharmony_ci	int nid = *(int *)arg;
16748c2ecf20Sopenharmony_ci
16758c2ecf20Sopenharmony_ci	/*
16768c2ecf20Sopenharmony_ci	 * If a memory block belongs to multiple nodes, the stored nid is not
16778c2ecf20Sopenharmony_ci	 * reliable. However, such blocks are always online (e.g., cannot get
16788c2ecf20Sopenharmony_ci	 * offlined) and, therefore, are still spanned by the node.
16798c2ecf20Sopenharmony_ci	 */
16808c2ecf20Sopenharmony_ci	return mem->nid == nid ? -EEXIST : 0;
16818c2ecf20Sopenharmony_ci}
16828c2ecf20Sopenharmony_ci
16838c2ecf20Sopenharmony_ci/**
16848c2ecf20Sopenharmony_ci * try_offline_node
16858c2ecf20Sopenharmony_ci * @nid: the node ID
16868c2ecf20Sopenharmony_ci *
16878c2ecf20Sopenharmony_ci * Offline a node if all memory sections and cpus of the node are removed.
16888c2ecf20Sopenharmony_ci *
16898c2ecf20Sopenharmony_ci * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
16908c2ecf20Sopenharmony_ci * and online/offline operations before this call.
16918c2ecf20Sopenharmony_ci */
16928c2ecf20Sopenharmony_civoid try_offline_node(int nid)
16938c2ecf20Sopenharmony_ci{
16948c2ecf20Sopenharmony_ci	pg_data_t *pgdat = NODE_DATA(nid);
16958c2ecf20Sopenharmony_ci	int rc;
16968c2ecf20Sopenharmony_ci
16978c2ecf20Sopenharmony_ci	/*
16988c2ecf20Sopenharmony_ci	 * If the node still spans pages (especially ZONE_DEVICE), don't
16998c2ecf20Sopenharmony_ci	 * offline it. A node spans memory after move_pfn_range_to_zone(),
17008c2ecf20Sopenharmony_ci	 * e.g., after the memory block was onlined.
17018c2ecf20Sopenharmony_ci	 */
17028c2ecf20Sopenharmony_ci	if (pgdat->node_spanned_pages)
17038c2ecf20Sopenharmony_ci		return;
17048c2ecf20Sopenharmony_ci
17058c2ecf20Sopenharmony_ci	/*
17068c2ecf20Sopenharmony_ci	 * Especially offline memory blocks might not be spanned by the
17078c2ecf20Sopenharmony_ci	 * node. They will get spanned by the node once they get onlined.
17088c2ecf20Sopenharmony_ci	 * However, they link to the node in sysfs and can get onlined later.
17098c2ecf20Sopenharmony_ci	 */
17108c2ecf20Sopenharmony_ci	rc = for_each_memory_block(&nid, check_no_memblock_for_node_cb);
17118c2ecf20Sopenharmony_ci	if (rc)
17128c2ecf20Sopenharmony_ci		return;
17138c2ecf20Sopenharmony_ci
17148c2ecf20Sopenharmony_ci	if (check_cpu_on_node(pgdat))
17158c2ecf20Sopenharmony_ci		return;
17168c2ecf20Sopenharmony_ci
17178c2ecf20Sopenharmony_ci	/*
17188c2ecf20Sopenharmony_ci	 * all memory/cpu of this node are removed, we can offline this
17198c2ecf20Sopenharmony_ci	 * node now.
17208c2ecf20Sopenharmony_ci	 */
17218c2ecf20Sopenharmony_ci	node_set_offline(nid);
17228c2ecf20Sopenharmony_ci	unregister_one_node(nid);
17238c2ecf20Sopenharmony_ci}
17248c2ecf20Sopenharmony_ciEXPORT_SYMBOL(try_offline_node);
17258c2ecf20Sopenharmony_ci
17268c2ecf20Sopenharmony_cistatic int __ref try_remove_memory(int nid, u64 start, u64 size)
17278c2ecf20Sopenharmony_ci{
17288c2ecf20Sopenharmony_ci	int rc = 0;
17298c2ecf20Sopenharmony_ci
17308c2ecf20Sopenharmony_ci	BUG_ON(check_hotplug_memory_range(start, size));
17318c2ecf20Sopenharmony_ci
17328c2ecf20Sopenharmony_ci	/*
17338c2ecf20Sopenharmony_ci	 * All memory blocks must be offlined before removing memory.  Check
17348c2ecf20Sopenharmony_ci	 * whether all memory blocks in question are offline and return error
17358c2ecf20Sopenharmony_ci	 * if this is not the case.
17368c2ecf20Sopenharmony_ci	 */
17378c2ecf20Sopenharmony_ci	rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb);
17388c2ecf20Sopenharmony_ci	if (rc)
17398c2ecf20Sopenharmony_ci		return rc;
17408c2ecf20Sopenharmony_ci
17418c2ecf20Sopenharmony_ci	/* remove memmap entry */
17428c2ecf20Sopenharmony_ci	firmware_map_remove(start, start + size, "System RAM");
17438c2ecf20Sopenharmony_ci
17448c2ecf20Sopenharmony_ci	/*
17458c2ecf20Sopenharmony_ci	 * Memory block device removal under the device_hotplug_lock is
17468c2ecf20Sopenharmony_ci	 * a barrier against racing online attempts.
17478c2ecf20Sopenharmony_ci	 */
17488c2ecf20Sopenharmony_ci	remove_memory_block_devices(start, size);
17498c2ecf20Sopenharmony_ci
17508c2ecf20Sopenharmony_ci	mem_hotplug_begin();
17518c2ecf20Sopenharmony_ci
17528c2ecf20Sopenharmony_ci	arch_remove_memory(nid, start, size, NULL);
17538c2ecf20Sopenharmony_ci
17548c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
17558c2ecf20Sopenharmony_ci		memblock_free(start, size);
17568c2ecf20Sopenharmony_ci		memblock_remove(start, size);
17578c2ecf20Sopenharmony_ci	}
17588c2ecf20Sopenharmony_ci
17598c2ecf20Sopenharmony_ci	release_mem_region_adjustable(start, size);
17608c2ecf20Sopenharmony_ci
17618c2ecf20Sopenharmony_ci	try_offline_node(nid);
17628c2ecf20Sopenharmony_ci
17638c2ecf20Sopenharmony_ci	mem_hotplug_done();
17648c2ecf20Sopenharmony_ci	return 0;
17658c2ecf20Sopenharmony_ci}
17668c2ecf20Sopenharmony_ci
17678c2ecf20Sopenharmony_ci/**
17688c2ecf20Sopenharmony_ci * remove_memory
17698c2ecf20Sopenharmony_ci * @nid: the node ID
17708c2ecf20Sopenharmony_ci * @start: physical address of the region to remove
17718c2ecf20Sopenharmony_ci * @size: size of the region to remove
17728c2ecf20Sopenharmony_ci *
17738c2ecf20Sopenharmony_ci * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
17748c2ecf20Sopenharmony_ci * and online/offline operations before this call, as required by
17758c2ecf20Sopenharmony_ci * try_offline_node().
17768c2ecf20Sopenharmony_ci */
17778c2ecf20Sopenharmony_civoid __remove_memory(int nid, u64 start, u64 size)
17788c2ecf20Sopenharmony_ci{
17798c2ecf20Sopenharmony_ci
17808c2ecf20Sopenharmony_ci	/*
17818c2ecf20Sopenharmony_ci	 * trigger BUG() if some memory is not offlined prior to calling this
17828c2ecf20Sopenharmony_ci	 * function
17838c2ecf20Sopenharmony_ci	 */
17848c2ecf20Sopenharmony_ci	if (try_remove_memory(nid, start, size))
17858c2ecf20Sopenharmony_ci		BUG();
17868c2ecf20Sopenharmony_ci}
17878c2ecf20Sopenharmony_ci
17888c2ecf20Sopenharmony_ci/*
17898c2ecf20Sopenharmony_ci * Remove memory if every memory block is offline, otherwise return -EBUSY is
17908c2ecf20Sopenharmony_ci * some memory is not offline
17918c2ecf20Sopenharmony_ci */
17928c2ecf20Sopenharmony_ciint remove_memory(int nid, u64 start, u64 size)
17938c2ecf20Sopenharmony_ci{
17948c2ecf20Sopenharmony_ci	int rc;
17958c2ecf20Sopenharmony_ci
17968c2ecf20Sopenharmony_ci	lock_device_hotplug();
17978c2ecf20Sopenharmony_ci	rc  = try_remove_memory(nid, start, size);
17988c2ecf20Sopenharmony_ci	unlock_device_hotplug();
17998c2ecf20Sopenharmony_ci
18008c2ecf20Sopenharmony_ci	return rc;
18018c2ecf20Sopenharmony_ci}
18028c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(remove_memory);
18038c2ecf20Sopenharmony_ci
18048c2ecf20Sopenharmony_cistatic int try_offline_memory_block(struct memory_block *mem, void *arg)
18058c2ecf20Sopenharmony_ci{
18068c2ecf20Sopenharmony_ci	uint8_t online_type = MMOP_ONLINE_KERNEL;
18078c2ecf20Sopenharmony_ci	uint8_t **online_types = arg;
18088c2ecf20Sopenharmony_ci	struct page *page;
18098c2ecf20Sopenharmony_ci	int rc;
18108c2ecf20Sopenharmony_ci
18118c2ecf20Sopenharmony_ci	/*
18128c2ecf20Sopenharmony_ci	 * Sense the online_type via the zone of the memory block. Offlining
18138c2ecf20Sopenharmony_ci	 * with multiple zones within one memory block will be rejected
18148c2ecf20Sopenharmony_ci	 * by offlining code ... so we don't care about that.
18158c2ecf20Sopenharmony_ci	 */
18168c2ecf20Sopenharmony_ci	page = pfn_to_online_page(section_nr_to_pfn(mem->start_section_nr));
18178c2ecf20Sopenharmony_ci	if (page && zone_idx(page_zone(page)) == ZONE_MOVABLE)
18188c2ecf20Sopenharmony_ci		online_type = MMOP_ONLINE_MOVABLE;
18198c2ecf20Sopenharmony_ci
18208c2ecf20Sopenharmony_ci	rc = device_offline(&mem->dev);
18218c2ecf20Sopenharmony_ci	/*
18228c2ecf20Sopenharmony_ci	 * Default is MMOP_OFFLINE - change it only if offlining succeeded,
18238c2ecf20Sopenharmony_ci	 * so try_reonline_memory_block() can do the right thing.
18248c2ecf20Sopenharmony_ci	 */
18258c2ecf20Sopenharmony_ci	if (!rc)
18268c2ecf20Sopenharmony_ci		**online_types = online_type;
18278c2ecf20Sopenharmony_ci
18288c2ecf20Sopenharmony_ci	(*online_types)++;
18298c2ecf20Sopenharmony_ci	/* Ignore if already offline. */
18308c2ecf20Sopenharmony_ci	return rc < 0 ? rc : 0;
18318c2ecf20Sopenharmony_ci}
18328c2ecf20Sopenharmony_ci
18338c2ecf20Sopenharmony_cistatic int try_reonline_memory_block(struct memory_block *mem, void *arg)
18348c2ecf20Sopenharmony_ci{
18358c2ecf20Sopenharmony_ci	uint8_t **online_types = arg;
18368c2ecf20Sopenharmony_ci	int rc;
18378c2ecf20Sopenharmony_ci
18388c2ecf20Sopenharmony_ci	if (**online_types != MMOP_OFFLINE) {
18398c2ecf20Sopenharmony_ci		mem->online_type = **online_types;
18408c2ecf20Sopenharmony_ci		rc = device_online(&mem->dev);
18418c2ecf20Sopenharmony_ci		if (rc < 0)
18428c2ecf20Sopenharmony_ci			pr_warn("%s: Failed to re-online memory: %d",
18438c2ecf20Sopenharmony_ci				__func__, rc);
18448c2ecf20Sopenharmony_ci	}
18458c2ecf20Sopenharmony_ci
18468c2ecf20Sopenharmony_ci	/* Continue processing all remaining memory blocks. */
18478c2ecf20Sopenharmony_ci	(*online_types)++;
18488c2ecf20Sopenharmony_ci	return 0;
18498c2ecf20Sopenharmony_ci}
18508c2ecf20Sopenharmony_ci
18518c2ecf20Sopenharmony_ci/*
18528c2ecf20Sopenharmony_ci * Try to offline and remove memory. Might take a long time to finish in case
18538c2ecf20Sopenharmony_ci * memory is still in use. Primarily useful for memory devices that logically
18548c2ecf20Sopenharmony_ci * unplugged all memory (so it's no longer in use) and want to offline + remove
18558c2ecf20Sopenharmony_ci * that memory.
18568c2ecf20Sopenharmony_ci */
18578c2ecf20Sopenharmony_ciint offline_and_remove_memory(int nid, u64 start, u64 size)
18588c2ecf20Sopenharmony_ci{
18598c2ecf20Sopenharmony_ci	const unsigned long mb_count = size / memory_block_size_bytes();
18608c2ecf20Sopenharmony_ci	uint8_t *online_types, *tmp;
18618c2ecf20Sopenharmony_ci	int rc;
18628c2ecf20Sopenharmony_ci
18638c2ecf20Sopenharmony_ci	if (!IS_ALIGNED(start, memory_block_size_bytes()) ||
18648c2ecf20Sopenharmony_ci	    !IS_ALIGNED(size, memory_block_size_bytes()) || !size)
18658c2ecf20Sopenharmony_ci		return -EINVAL;
18668c2ecf20Sopenharmony_ci
18678c2ecf20Sopenharmony_ci	/*
18688c2ecf20Sopenharmony_ci	 * We'll remember the old online type of each memory block, so we can
18698c2ecf20Sopenharmony_ci	 * try to revert whatever we did when offlining one memory block fails
18708c2ecf20Sopenharmony_ci	 * after offlining some others succeeded.
18718c2ecf20Sopenharmony_ci	 */
18728c2ecf20Sopenharmony_ci	online_types = kmalloc_array(mb_count, sizeof(*online_types),
18738c2ecf20Sopenharmony_ci				     GFP_KERNEL);
18748c2ecf20Sopenharmony_ci	if (!online_types)
18758c2ecf20Sopenharmony_ci		return -ENOMEM;
18768c2ecf20Sopenharmony_ci	/*
18778c2ecf20Sopenharmony_ci	 * Initialize all states to MMOP_OFFLINE, so when we abort processing in
18788c2ecf20Sopenharmony_ci	 * try_offline_memory_block(), we'll skip all unprocessed blocks in
18798c2ecf20Sopenharmony_ci	 * try_reonline_memory_block().
18808c2ecf20Sopenharmony_ci	 */
18818c2ecf20Sopenharmony_ci	memset(online_types, MMOP_OFFLINE, mb_count);
18828c2ecf20Sopenharmony_ci
18838c2ecf20Sopenharmony_ci	lock_device_hotplug();
18848c2ecf20Sopenharmony_ci
18858c2ecf20Sopenharmony_ci	tmp = online_types;
18868c2ecf20Sopenharmony_ci	rc = walk_memory_blocks(start, size, &tmp, try_offline_memory_block);
18878c2ecf20Sopenharmony_ci
18888c2ecf20Sopenharmony_ci	/*
18898c2ecf20Sopenharmony_ci	 * In case we succeeded to offline all memory, remove it.
18908c2ecf20Sopenharmony_ci	 * This cannot fail as it cannot get onlined in the meantime.
18918c2ecf20Sopenharmony_ci	 */
18928c2ecf20Sopenharmony_ci	if (!rc) {
18938c2ecf20Sopenharmony_ci		rc = try_remove_memory(nid, start, size);
18948c2ecf20Sopenharmony_ci		if (rc)
18958c2ecf20Sopenharmony_ci			pr_err("%s: Failed to remove memory: %d", __func__, rc);
18968c2ecf20Sopenharmony_ci	}
18978c2ecf20Sopenharmony_ci
18988c2ecf20Sopenharmony_ci	/*
18998c2ecf20Sopenharmony_ci	 * Rollback what we did. While memory onlining might theoretically fail
19008c2ecf20Sopenharmony_ci	 * (nacked by a notifier), it barely ever happens.
19018c2ecf20Sopenharmony_ci	 */
19028c2ecf20Sopenharmony_ci	if (rc) {
19038c2ecf20Sopenharmony_ci		tmp = online_types;
19048c2ecf20Sopenharmony_ci		walk_memory_blocks(start, size, &tmp,
19058c2ecf20Sopenharmony_ci				   try_reonline_memory_block);
19068c2ecf20Sopenharmony_ci	}
19078c2ecf20Sopenharmony_ci	unlock_device_hotplug();
19088c2ecf20Sopenharmony_ci
19098c2ecf20Sopenharmony_ci	kfree(online_types);
19108c2ecf20Sopenharmony_ci	return rc;
19118c2ecf20Sopenharmony_ci}
19128c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(offline_and_remove_memory);
19138c2ecf20Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTREMOVE */
1914