xref: /kernel/linux/linux-6.6/mm/sparse.c (revision 62306a36)
162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * sparse memory mappings.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci#include <linux/mm.h>
662306a36Sopenharmony_ci#include <linux/slab.h>
762306a36Sopenharmony_ci#include <linux/mmzone.h>
862306a36Sopenharmony_ci#include <linux/memblock.h>
962306a36Sopenharmony_ci#include <linux/compiler.h>
1062306a36Sopenharmony_ci#include <linux/highmem.h>
1162306a36Sopenharmony_ci#include <linux/export.h>
1262306a36Sopenharmony_ci#include <linux/spinlock.h>
1362306a36Sopenharmony_ci#include <linux/vmalloc.h>
1462306a36Sopenharmony_ci#include <linux/swap.h>
1562306a36Sopenharmony_ci#include <linux/swapops.h>
1662306a36Sopenharmony_ci#include <linux/bootmem_info.h>
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#include "internal.h"
1962306a36Sopenharmony_ci#include <asm/dma.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci/*
2262306a36Sopenharmony_ci * Permanent SPARSEMEM data:
2362306a36Sopenharmony_ci *
2462306a36Sopenharmony_ci * 1) mem_section	- memory sections, mem_map's for valid memory
2562306a36Sopenharmony_ci */
2662306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_EXTREME
2762306a36Sopenharmony_cistruct mem_section **mem_section;
2862306a36Sopenharmony_ci#else
2962306a36Sopenharmony_cistruct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
3062306a36Sopenharmony_ci	____cacheline_internodealigned_in_smp;
3162306a36Sopenharmony_ci#endif
3262306a36Sopenharmony_ciEXPORT_SYMBOL(mem_section);
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci#ifdef NODE_NOT_IN_PAGE_FLAGS
3562306a36Sopenharmony_ci/*
3662306a36Sopenharmony_ci * If we did not store the node number in the page then we have to
3762306a36Sopenharmony_ci * do a lookup in the section_to_node_table in order to find which
3862306a36Sopenharmony_ci * node the page belongs to.
3962306a36Sopenharmony_ci */
4062306a36Sopenharmony_ci#if MAX_NUMNODES <= 256
4162306a36Sopenharmony_cistatic u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
4262306a36Sopenharmony_ci#else
4362306a36Sopenharmony_cistatic u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
4462306a36Sopenharmony_ci#endif
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ciint page_to_nid(const struct page *page)
4762306a36Sopenharmony_ci{
4862306a36Sopenharmony_ci	return section_to_node_table[page_to_section(page)];
4962306a36Sopenharmony_ci}
5062306a36Sopenharmony_ciEXPORT_SYMBOL(page_to_nid);
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_cistatic void set_section_nid(unsigned long section_nr, int nid)
5362306a36Sopenharmony_ci{
5462306a36Sopenharmony_ci	section_to_node_table[section_nr] = nid;
5562306a36Sopenharmony_ci}
5662306a36Sopenharmony_ci#else /* !NODE_NOT_IN_PAGE_FLAGS */
5762306a36Sopenharmony_cistatic inline void set_section_nid(unsigned long section_nr, int nid)
5862306a36Sopenharmony_ci{
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci#endif
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_EXTREME
6362306a36Sopenharmony_cistatic noinline struct mem_section __ref *sparse_index_alloc(int nid)
6462306a36Sopenharmony_ci{
6562306a36Sopenharmony_ci	struct mem_section *section = NULL;
6662306a36Sopenharmony_ci	unsigned long array_size = SECTIONS_PER_ROOT *
6762306a36Sopenharmony_ci				   sizeof(struct mem_section);
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	if (slab_is_available()) {
7062306a36Sopenharmony_ci		section = kzalloc_node(array_size, GFP_KERNEL, nid);
7162306a36Sopenharmony_ci	} else {
7262306a36Sopenharmony_ci		section = memblock_alloc_node(array_size, SMP_CACHE_BYTES,
7362306a36Sopenharmony_ci					      nid);
7462306a36Sopenharmony_ci		if (!section)
7562306a36Sopenharmony_ci			panic("%s: Failed to allocate %lu bytes nid=%d\n",
7662306a36Sopenharmony_ci			      __func__, array_size, nid);
7762306a36Sopenharmony_ci	}
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	return section;
8062306a36Sopenharmony_ci}
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_cistatic int __meminit sparse_index_init(unsigned long section_nr, int nid)
8362306a36Sopenharmony_ci{
8462306a36Sopenharmony_ci	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
8562306a36Sopenharmony_ci	struct mem_section *section;
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	/*
8862306a36Sopenharmony_ci	 * An existing section is possible in the sub-section hotplug
8962306a36Sopenharmony_ci	 * case. First hot-add instantiates, follow-on hot-add reuses
9062306a36Sopenharmony_ci	 * the existing section.
9162306a36Sopenharmony_ci	 *
9262306a36Sopenharmony_ci	 * The mem_hotplug_lock resolves the apparent race below.
9362306a36Sopenharmony_ci	 */
9462306a36Sopenharmony_ci	if (mem_section[root])
9562306a36Sopenharmony_ci		return 0;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	section = sparse_index_alloc(nid);
9862306a36Sopenharmony_ci	if (!section)
9962306a36Sopenharmony_ci		return -ENOMEM;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	mem_section[root] = section;
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	return 0;
10462306a36Sopenharmony_ci}
10562306a36Sopenharmony_ci#else /* !SPARSEMEM_EXTREME */
10662306a36Sopenharmony_cistatic inline int sparse_index_init(unsigned long section_nr, int nid)
10762306a36Sopenharmony_ci{
10862306a36Sopenharmony_ci	return 0;
10962306a36Sopenharmony_ci}
11062306a36Sopenharmony_ci#endif
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci/*
11362306a36Sopenharmony_ci * During early boot, before section_mem_map is used for an actual
11462306a36Sopenharmony_ci * mem_map, we use section_mem_map to store the section's NUMA
11562306a36Sopenharmony_ci * node.  This keeps us from having to use another data structure.  The
11662306a36Sopenharmony_ci * node information is cleared just before we store the real mem_map.
11762306a36Sopenharmony_ci */
11862306a36Sopenharmony_cistatic inline unsigned long sparse_encode_early_nid(int nid)
11962306a36Sopenharmony_ci{
12062306a36Sopenharmony_ci	return ((unsigned long)nid << SECTION_NID_SHIFT);
12162306a36Sopenharmony_ci}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_cistatic inline int sparse_early_nid(struct mem_section *section)
12462306a36Sopenharmony_ci{
12562306a36Sopenharmony_ci	return (section->section_mem_map >> SECTION_NID_SHIFT);
12662306a36Sopenharmony_ci}
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci/* Validate the physical addressing limitations of the model */
12962306a36Sopenharmony_cistatic void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
13062306a36Sopenharmony_ci						unsigned long *end_pfn)
13162306a36Sopenharmony_ci{
13262306a36Sopenharmony_ci	unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	/*
13562306a36Sopenharmony_ci	 * Sanity checks - do not allow an architecture to pass
13662306a36Sopenharmony_ci	 * in larger pfns than the maximum scope of sparsemem:
13762306a36Sopenharmony_ci	 */
13862306a36Sopenharmony_ci	if (*start_pfn > max_sparsemem_pfn) {
13962306a36Sopenharmony_ci		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
14062306a36Sopenharmony_ci			"Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
14162306a36Sopenharmony_ci			*start_pfn, *end_pfn, max_sparsemem_pfn);
14262306a36Sopenharmony_ci		WARN_ON_ONCE(1);
14362306a36Sopenharmony_ci		*start_pfn = max_sparsemem_pfn;
14462306a36Sopenharmony_ci		*end_pfn = max_sparsemem_pfn;
14562306a36Sopenharmony_ci	} else if (*end_pfn > max_sparsemem_pfn) {
14662306a36Sopenharmony_ci		mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
14762306a36Sopenharmony_ci			"End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
14862306a36Sopenharmony_ci			*start_pfn, *end_pfn, max_sparsemem_pfn);
14962306a36Sopenharmony_ci		WARN_ON_ONCE(1);
15062306a36Sopenharmony_ci		*end_pfn = max_sparsemem_pfn;
15162306a36Sopenharmony_ci	}
15262306a36Sopenharmony_ci}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci/*
15562306a36Sopenharmony_ci * There are a number of times that we loop over NR_MEM_SECTIONS,
15662306a36Sopenharmony_ci * looking for section_present() on each.  But, when we have very
15762306a36Sopenharmony_ci * large physical address spaces, NR_MEM_SECTIONS can also be
15862306a36Sopenharmony_ci * very large which makes the loops quite long.
15962306a36Sopenharmony_ci *
16062306a36Sopenharmony_ci * Keeping track of this gives us an easy way to break out of
16162306a36Sopenharmony_ci * those loops early.
16262306a36Sopenharmony_ci */
16362306a36Sopenharmony_ciunsigned long __highest_present_section_nr;
16462306a36Sopenharmony_cistatic void __section_mark_present(struct mem_section *ms,
16562306a36Sopenharmony_ci		unsigned long section_nr)
16662306a36Sopenharmony_ci{
16762306a36Sopenharmony_ci	if (section_nr > __highest_present_section_nr)
16862306a36Sopenharmony_ci		__highest_present_section_nr = section_nr;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	ms->section_mem_map |= SECTION_MARKED_PRESENT;
17162306a36Sopenharmony_ci}
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci#define for_each_present_section_nr(start, section_nr)		\
17462306a36Sopenharmony_ci	for (section_nr = next_present_section_nr(start-1);	\
17562306a36Sopenharmony_ci	     section_nr != -1;								\
17662306a36Sopenharmony_ci	     section_nr = next_present_section_nr(section_nr))
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_cistatic inline unsigned long first_present_section_nr(void)
17962306a36Sopenharmony_ci{
18062306a36Sopenharmony_ci	return next_present_section_nr(-1);
18162306a36Sopenharmony_ci}
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP
18462306a36Sopenharmony_cistatic void subsection_mask_set(unsigned long *map, unsigned long pfn,
18562306a36Sopenharmony_ci		unsigned long nr_pages)
18662306a36Sopenharmony_ci{
18762306a36Sopenharmony_ci	int idx = subsection_map_index(pfn);
18862306a36Sopenharmony_ci	int end = subsection_map_index(pfn + nr_pages - 1);
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	bitmap_set(map, idx, end - idx + 1);
19162306a36Sopenharmony_ci}
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_civoid __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
19462306a36Sopenharmony_ci{
19562306a36Sopenharmony_ci	int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
19662306a36Sopenharmony_ci	unsigned long nr, start_sec = pfn_to_section_nr(pfn);
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	if (!nr_pages)
19962306a36Sopenharmony_ci		return;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	for (nr = start_sec; nr <= end_sec; nr++) {
20262306a36Sopenharmony_ci		struct mem_section *ms;
20362306a36Sopenharmony_ci		unsigned long pfns;
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci		pfns = min(nr_pages, PAGES_PER_SECTION
20662306a36Sopenharmony_ci				- (pfn & ~PAGE_SECTION_MASK));
20762306a36Sopenharmony_ci		ms = __nr_to_section(nr);
20862306a36Sopenharmony_ci		subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci		pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
21162306a36Sopenharmony_ci				pfns, subsection_map_index(pfn),
21262306a36Sopenharmony_ci				subsection_map_index(pfn + pfns - 1));
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci		pfn += pfns;
21562306a36Sopenharmony_ci		nr_pages -= pfns;
21662306a36Sopenharmony_ci	}
21762306a36Sopenharmony_ci}
21862306a36Sopenharmony_ci#else
21962306a36Sopenharmony_civoid __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
22062306a36Sopenharmony_ci{
22162306a36Sopenharmony_ci}
22262306a36Sopenharmony_ci#endif
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci/* Record a memory area against a node. */
22562306a36Sopenharmony_cistatic void __init memory_present(int nid, unsigned long start, unsigned long end)
22662306a36Sopenharmony_ci{
22762306a36Sopenharmony_ci	unsigned long pfn;
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_EXTREME
23062306a36Sopenharmony_ci	if (unlikely(!mem_section)) {
23162306a36Sopenharmony_ci		unsigned long size, align;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci		size = sizeof(struct mem_section *) * NR_SECTION_ROOTS;
23462306a36Sopenharmony_ci		align = 1 << (INTERNODE_CACHE_SHIFT);
23562306a36Sopenharmony_ci		mem_section = memblock_alloc(size, align);
23662306a36Sopenharmony_ci		if (!mem_section)
23762306a36Sopenharmony_ci			panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
23862306a36Sopenharmony_ci			      __func__, size, align);
23962306a36Sopenharmony_ci	}
24062306a36Sopenharmony_ci#endif
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	start &= PAGE_SECTION_MASK;
24362306a36Sopenharmony_ci	mminit_validate_memmodel_limits(&start, &end);
24462306a36Sopenharmony_ci	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
24562306a36Sopenharmony_ci		unsigned long section = pfn_to_section_nr(pfn);
24662306a36Sopenharmony_ci		struct mem_section *ms;
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci		sparse_index_init(section, nid);
24962306a36Sopenharmony_ci		set_section_nid(section, nid);
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci		ms = __nr_to_section(section);
25262306a36Sopenharmony_ci		if (!ms->section_mem_map) {
25362306a36Sopenharmony_ci			ms->section_mem_map = sparse_encode_early_nid(nid) |
25462306a36Sopenharmony_ci							SECTION_IS_ONLINE;
25562306a36Sopenharmony_ci			__section_mark_present(ms, section);
25662306a36Sopenharmony_ci		}
25762306a36Sopenharmony_ci	}
25862306a36Sopenharmony_ci}
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci/*
26162306a36Sopenharmony_ci * Mark all memblocks as present using memory_present().
26262306a36Sopenharmony_ci * This is a convenience function that is useful to mark all of the systems
26362306a36Sopenharmony_ci * memory as present during initialization.
26462306a36Sopenharmony_ci */
26562306a36Sopenharmony_cistatic void __init memblocks_present(void)
26662306a36Sopenharmony_ci{
26762306a36Sopenharmony_ci	unsigned long start, end;
26862306a36Sopenharmony_ci	int i, nid;
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid)
27162306a36Sopenharmony_ci		memory_present(nid, start, end);
27262306a36Sopenharmony_ci}
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci/*
27562306a36Sopenharmony_ci * Subtle, we encode the real pfn into the mem_map such that
27662306a36Sopenharmony_ci * the identity pfn - section_mem_map will return the actual
27762306a36Sopenharmony_ci * physical page frame number.
27862306a36Sopenharmony_ci */
27962306a36Sopenharmony_cistatic unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
28062306a36Sopenharmony_ci{
28162306a36Sopenharmony_ci	unsigned long coded_mem_map =
28262306a36Sopenharmony_ci		(unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
28362306a36Sopenharmony_ci	BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT);
28462306a36Sopenharmony_ci	BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
28562306a36Sopenharmony_ci	return coded_mem_map;
28662306a36Sopenharmony_ci}
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG
28962306a36Sopenharmony_ci/*
29062306a36Sopenharmony_ci * Decode mem_map from the coded memmap
29162306a36Sopenharmony_ci */
29262306a36Sopenharmony_cistruct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
29362306a36Sopenharmony_ci{
29462306a36Sopenharmony_ci	/* mask off the extra low bits of information */
29562306a36Sopenharmony_ci	coded_mem_map &= SECTION_MAP_MASK;
29662306a36Sopenharmony_ci	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
29762306a36Sopenharmony_ci}
29862306a36Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG */
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_cistatic void __meminit sparse_init_one_section(struct mem_section *ms,
30162306a36Sopenharmony_ci		unsigned long pnum, struct page *mem_map,
30262306a36Sopenharmony_ci		struct mem_section_usage *usage, unsigned long flags)
30362306a36Sopenharmony_ci{
30462306a36Sopenharmony_ci	ms->section_mem_map &= ~SECTION_MAP_MASK;
30562306a36Sopenharmony_ci	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum)
30662306a36Sopenharmony_ci		| SECTION_HAS_MEM_MAP | flags;
30762306a36Sopenharmony_ci	ms->usage = usage;
30862306a36Sopenharmony_ci}
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_cistatic unsigned long usemap_size(void)
31162306a36Sopenharmony_ci{
31262306a36Sopenharmony_ci	return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long);
31362306a36Sopenharmony_ci}
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_cisize_t mem_section_usage_size(void)
31662306a36Sopenharmony_ci{
31762306a36Sopenharmony_ci	return sizeof(struct mem_section_usage) + usemap_size();
31862306a36Sopenharmony_ci}
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTREMOVE
32162306a36Sopenharmony_cistatic inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat)
32262306a36Sopenharmony_ci{
32362306a36Sopenharmony_ci#ifndef CONFIG_NUMA
32462306a36Sopenharmony_ci	VM_BUG_ON(pgdat != &contig_page_data);
32562306a36Sopenharmony_ci	return __pa_symbol(&contig_page_data);
32662306a36Sopenharmony_ci#else
32762306a36Sopenharmony_ci	return __pa(pgdat);
32862306a36Sopenharmony_ci#endif
32962306a36Sopenharmony_ci}
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_cistatic struct mem_section_usage * __init
33262306a36Sopenharmony_cisparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
33362306a36Sopenharmony_ci					 unsigned long size)
33462306a36Sopenharmony_ci{
33562306a36Sopenharmony_ci	struct mem_section_usage *usage;
33662306a36Sopenharmony_ci	unsigned long goal, limit;
33762306a36Sopenharmony_ci	int nid;
33862306a36Sopenharmony_ci	/*
33962306a36Sopenharmony_ci	 * A page may contain usemaps for other sections preventing the
34062306a36Sopenharmony_ci	 * page being freed and making a section unremovable while
34162306a36Sopenharmony_ci	 * other sections referencing the usemap remain active. Similarly,
34262306a36Sopenharmony_ci	 * a pgdat can prevent a section being removed. If section A
34362306a36Sopenharmony_ci	 * contains a pgdat and section B contains the usemap, both
34462306a36Sopenharmony_ci	 * sections become inter-dependent. This allocates usemaps
34562306a36Sopenharmony_ci	 * from the same section as the pgdat where possible to avoid
34662306a36Sopenharmony_ci	 * this problem.
34762306a36Sopenharmony_ci	 */
34862306a36Sopenharmony_ci	goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
34962306a36Sopenharmony_ci	limit = goal + (1UL << PA_SECTION_SHIFT);
35062306a36Sopenharmony_ci	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
35162306a36Sopenharmony_ciagain:
35262306a36Sopenharmony_ci	usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid);
35362306a36Sopenharmony_ci	if (!usage && limit) {
35462306a36Sopenharmony_ci		limit = 0;
35562306a36Sopenharmony_ci		goto again;
35662306a36Sopenharmony_ci	}
35762306a36Sopenharmony_ci	return usage;
35862306a36Sopenharmony_ci}
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_cistatic void __init check_usemap_section_nr(int nid,
36162306a36Sopenharmony_ci		struct mem_section_usage *usage)
36262306a36Sopenharmony_ci{
36362306a36Sopenharmony_ci	unsigned long usemap_snr, pgdat_snr;
36462306a36Sopenharmony_ci	static unsigned long old_usemap_snr;
36562306a36Sopenharmony_ci	static unsigned long old_pgdat_snr;
36662306a36Sopenharmony_ci	struct pglist_data *pgdat = NODE_DATA(nid);
36762306a36Sopenharmony_ci	int usemap_nid;
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	/* First call */
37062306a36Sopenharmony_ci	if (!old_usemap_snr) {
37162306a36Sopenharmony_ci		old_usemap_snr = NR_MEM_SECTIONS;
37262306a36Sopenharmony_ci		old_pgdat_snr = NR_MEM_SECTIONS;
37362306a36Sopenharmony_ci	}
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
37662306a36Sopenharmony_ci	pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT);
37762306a36Sopenharmony_ci	if (usemap_snr == pgdat_snr)
37862306a36Sopenharmony_ci		return;
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
38162306a36Sopenharmony_ci		/* skip redundant message */
38262306a36Sopenharmony_ci		return;
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci	old_usemap_snr = usemap_snr;
38562306a36Sopenharmony_ci	old_pgdat_snr = pgdat_snr;
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
38862306a36Sopenharmony_ci	if (usemap_nid != nid) {
38962306a36Sopenharmony_ci		pr_info("node %d must be removed before remove section %ld\n",
39062306a36Sopenharmony_ci			nid, usemap_snr);
39162306a36Sopenharmony_ci		return;
39262306a36Sopenharmony_ci	}
39362306a36Sopenharmony_ci	/*
39462306a36Sopenharmony_ci	 * There is a circular dependency.
39562306a36Sopenharmony_ci	 * Some platforms allow un-removable section because they will just
39662306a36Sopenharmony_ci	 * gather other removable sections for dynamic partitioning.
39762306a36Sopenharmony_ci	 * Just notify un-removable section's number here.
39862306a36Sopenharmony_ci	 */
39962306a36Sopenharmony_ci	pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations\n",
40062306a36Sopenharmony_ci		usemap_snr, pgdat_snr, nid);
40162306a36Sopenharmony_ci}
40262306a36Sopenharmony_ci#else
40362306a36Sopenharmony_cistatic struct mem_section_usage * __init
40462306a36Sopenharmony_cisparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
40562306a36Sopenharmony_ci					 unsigned long size)
40662306a36Sopenharmony_ci{
40762306a36Sopenharmony_ci	return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id);
40862306a36Sopenharmony_ci}
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_cistatic void __init check_usemap_section_nr(int nid,
41162306a36Sopenharmony_ci		struct mem_section_usage *usage)
41262306a36Sopenharmony_ci{
41362306a36Sopenharmony_ci}
41462306a36Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTREMOVE */
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP
41762306a36Sopenharmony_cistatic unsigned long __init section_map_size(void)
41862306a36Sopenharmony_ci{
41962306a36Sopenharmony_ci	return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
42062306a36Sopenharmony_ci}
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci#else
42362306a36Sopenharmony_cistatic unsigned long __init section_map_size(void)
42462306a36Sopenharmony_ci{
42562306a36Sopenharmony_ci	return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
42662306a36Sopenharmony_ci}
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_cistruct page __init *__populate_section_memmap(unsigned long pfn,
42962306a36Sopenharmony_ci		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
43062306a36Sopenharmony_ci		struct dev_pagemap *pgmap)
43162306a36Sopenharmony_ci{
43262306a36Sopenharmony_ci	unsigned long size = section_map_size();
43362306a36Sopenharmony_ci	struct page *map = sparse_buffer_alloc(size);
43462306a36Sopenharmony_ci	phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	if (map)
43762306a36Sopenharmony_ci		return map;
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	map = memmap_alloc(size, size, addr, nid, false);
44062306a36Sopenharmony_ci	if (!map)
44162306a36Sopenharmony_ci		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n",
44262306a36Sopenharmony_ci		      __func__, size, PAGE_SIZE, nid, &addr);
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	return map;
44562306a36Sopenharmony_ci}
44662306a36Sopenharmony_ci#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_cistatic void *sparsemap_buf __meminitdata;
44962306a36Sopenharmony_cistatic void *sparsemap_buf_end __meminitdata;
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_cistatic inline void __meminit sparse_buffer_free(unsigned long size)
45262306a36Sopenharmony_ci{
45362306a36Sopenharmony_ci	WARN_ON(!sparsemap_buf || size == 0);
45462306a36Sopenharmony_ci	memblock_free(sparsemap_buf, size);
45562306a36Sopenharmony_ci}
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_cistatic void __init sparse_buffer_init(unsigned long size, int nid)
45862306a36Sopenharmony_ci{
45962306a36Sopenharmony_ci	phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
46062306a36Sopenharmony_ci	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
46162306a36Sopenharmony_ci	/*
46262306a36Sopenharmony_ci	 * Pre-allocated buffer is mainly used by __populate_section_memmap
46362306a36Sopenharmony_ci	 * and we want it to be properly aligned to the section size - this is
46462306a36Sopenharmony_ci	 * especially the case for VMEMMAP which maps memmap to PMDs
46562306a36Sopenharmony_ci	 */
46662306a36Sopenharmony_ci	sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
46762306a36Sopenharmony_ci	sparsemap_buf_end = sparsemap_buf + size;
46862306a36Sopenharmony_ci}
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_cistatic void __init sparse_buffer_fini(void)
47162306a36Sopenharmony_ci{
47262306a36Sopenharmony_ci	unsigned long size = sparsemap_buf_end - sparsemap_buf;
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	if (sparsemap_buf && size > 0)
47562306a36Sopenharmony_ci		sparse_buffer_free(size);
47662306a36Sopenharmony_ci	sparsemap_buf = NULL;
47762306a36Sopenharmony_ci}
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_civoid * __meminit sparse_buffer_alloc(unsigned long size)
48062306a36Sopenharmony_ci{
48162306a36Sopenharmony_ci	void *ptr = NULL;
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	if (sparsemap_buf) {
48462306a36Sopenharmony_ci		ptr = (void *) roundup((unsigned long)sparsemap_buf, size);
48562306a36Sopenharmony_ci		if (ptr + size > sparsemap_buf_end)
48662306a36Sopenharmony_ci			ptr = NULL;
48762306a36Sopenharmony_ci		else {
48862306a36Sopenharmony_ci			/* Free redundant aligned space */
48962306a36Sopenharmony_ci			if ((unsigned long)(ptr - sparsemap_buf) > 0)
49062306a36Sopenharmony_ci				sparse_buffer_free((unsigned long)(ptr - sparsemap_buf));
49162306a36Sopenharmony_ci			sparsemap_buf = ptr + size;
49262306a36Sopenharmony_ci		}
49362306a36Sopenharmony_ci	}
49462306a36Sopenharmony_ci	return ptr;
49562306a36Sopenharmony_ci}
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_civoid __weak __meminit vmemmap_populate_print_last(void)
49862306a36Sopenharmony_ci{
49962306a36Sopenharmony_ci}
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci/*
50262306a36Sopenharmony_ci * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
50362306a36Sopenharmony_ci * And number of present sections in this node is map_count.
50462306a36Sopenharmony_ci */
50562306a36Sopenharmony_cistatic void __init sparse_init_nid(int nid, unsigned long pnum_begin,
50662306a36Sopenharmony_ci				   unsigned long pnum_end,
50762306a36Sopenharmony_ci				   unsigned long map_count)
50862306a36Sopenharmony_ci{
50962306a36Sopenharmony_ci	struct mem_section_usage *usage;
51062306a36Sopenharmony_ci	unsigned long pnum;
51162306a36Sopenharmony_ci	struct page *map;
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
51462306a36Sopenharmony_ci			mem_section_usage_size() * map_count);
51562306a36Sopenharmony_ci	if (!usage) {
51662306a36Sopenharmony_ci		pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
51762306a36Sopenharmony_ci		goto failed;
51862306a36Sopenharmony_ci	}
51962306a36Sopenharmony_ci	sparse_buffer_init(map_count * section_map_size(), nid);
52062306a36Sopenharmony_ci	for_each_present_section_nr(pnum_begin, pnum) {
52162306a36Sopenharmony_ci		unsigned long pfn = section_nr_to_pfn(pnum);
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci		if (pnum >= pnum_end)
52462306a36Sopenharmony_ci			break;
52562306a36Sopenharmony_ci
52662306a36Sopenharmony_ci		map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
52762306a36Sopenharmony_ci				nid, NULL, NULL);
52862306a36Sopenharmony_ci		if (!map) {
52962306a36Sopenharmony_ci			pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
53062306a36Sopenharmony_ci			       __func__, nid);
53162306a36Sopenharmony_ci			pnum_begin = pnum;
53262306a36Sopenharmony_ci			sparse_buffer_fini();
53362306a36Sopenharmony_ci			goto failed;
53462306a36Sopenharmony_ci		}
53562306a36Sopenharmony_ci		check_usemap_section_nr(nid, usage);
53662306a36Sopenharmony_ci		sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage,
53762306a36Sopenharmony_ci				SECTION_IS_EARLY);
53862306a36Sopenharmony_ci		usage = (void *) usage + mem_section_usage_size();
53962306a36Sopenharmony_ci	}
54062306a36Sopenharmony_ci	sparse_buffer_fini();
54162306a36Sopenharmony_ci	return;
54262306a36Sopenharmony_cifailed:
54362306a36Sopenharmony_ci	/* We failed to allocate, mark all the following pnums as not present */
54462306a36Sopenharmony_ci	for_each_present_section_nr(pnum_begin, pnum) {
54562306a36Sopenharmony_ci		struct mem_section *ms;
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci		if (pnum >= pnum_end)
54862306a36Sopenharmony_ci			break;
54962306a36Sopenharmony_ci		ms = __nr_to_section(pnum);
55062306a36Sopenharmony_ci		ms->section_mem_map = 0;
55162306a36Sopenharmony_ci	}
55262306a36Sopenharmony_ci}
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci/*
55562306a36Sopenharmony_ci * Allocate the accumulated non-linear sections, allocate a mem_map
55662306a36Sopenharmony_ci * for each and record the physical to section mapping.
55762306a36Sopenharmony_ci */
55862306a36Sopenharmony_civoid __init sparse_init(void)
55962306a36Sopenharmony_ci{
56062306a36Sopenharmony_ci	unsigned long pnum_end, pnum_begin, map_count = 1;
56162306a36Sopenharmony_ci	int nid_begin;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	memblocks_present();
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	pnum_begin = first_present_section_nr();
56662306a36Sopenharmony_ci	nid_begin = sparse_early_nid(__nr_to_section(pnum_begin));
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci	/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
56962306a36Sopenharmony_ci	set_pageblock_order();
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci	for_each_present_section_nr(pnum_begin + 1, pnum_end) {
57262306a36Sopenharmony_ci		int nid = sparse_early_nid(__nr_to_section(pnum_end));
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci		if (nid == nid_begin) {
57562306a36Sopenharmony_ci			map_count++;
57662306a36Sopenharmony_ci			continue;
57762306a36Sopenharmony_ci		}
57862306a36Sopenharmony_ci		/* Init node with sections in range [pnum_begin, pnum_end) */
57962306a36Sopenharmony_ci		sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
58062306a36Sopenharmony_ci		nid_begin = nid;
58162306a36Sopenharmony_ci		pnum_begin = pnum_end;
58262306a36Sopenharmony_ci		map_count = 1;
58362306a36Sopenharmony_ci	}
58462306a36Sopenharmony_ci	/* cover the last node */
58562306a36Sopenharmony_ci	sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
58662306a36Sopenharmony_ci	vmemmap_populate_print_last();
58762306a36Sopenharmony_ci}
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci/* Mark all memory sections within the pfn range as online */
59262306a36Sopenharmony_civoid online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
59362306a36Sopenharmony_ci{
59462306a36Sopenharmony_ci	unsigned long pfn;
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
59762306a36Sopenharmony_ci		unsigned long section_nr = pfn_to_section_nr(pfn);
59862306a36Sopenharmony_ci		struct mem_section *ms;
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci		/* onlining code should never touch invalid ranges */
60162306a36Sopenharmony_ci		if (WARN_ON(!valid_section_nr(section_nr)))
60262306a36Sopenharmony_ci			continue;
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci		ms = __nr_to_section(section_nr);
60562306a36Sopenharmony_ci		ms->section_mem_map |= SECTION_IS_ONLINE;
60662306a36Sopenharmony_ci	}
60762306a36Sopenharmony_ci}
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci/* Mark all memory sections within the pfn range as offline */
61062306a36Sopenharmony_civoid offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
61162306a36Sopenharmony_ci{
61262306a36Sopenharmony_ci	unsigned long pfn;
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
61562306a36Sopenharmony_ci		unsigned long section_nr = pfn_to_section_nr(pfn);
61662306a36Sopenharmony_ci		struct mem_section *ms;
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci		/*
61962306a36Sopenharmony_ci		 * TODO this needs some double checking. Offlining code makes
62062306a36Sopenharmony_ci		 * sure to check pfn_valid but those checks might be just bogus
62162306a36Sopenharmony_ci		 */
62262306a36Sopenharmony_ci		if (WARN_ON(!valid_section_nr(section_nr)))
62362306a36Sopenharmony_ci			continue;
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_ci		ms = __nr_to_section(section_nr);
62662306a36Sopenharmony_ci		ms->section_mem_map &= ~SECTION_IS_ONLINE;
62762306a36Sopenharmony_ci	}
62862306a36Sopenharmony_ci}
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP
63162306a36Sopenharmony_cistatic struct page * __meminit populate_section_memmap(unsigned long pfn,
63262306a36Sopenharmony_ci		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
63362306a36Sopenharmony_ci		struct dev_pagemap *pgmap)
63462306a36Sopenharmony_ci{
63562306a36Sopenharmony_ci	return __populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
63662306a36Sopenharmony_ci}
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_cistatic void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
63962306a36Sopenharmony_ci		struct vmem_altmap *altmap)
64062306a36Sopenharmony_ci{
64162306a36Sopenharmony_ci	unsigned long start = (unsigned long) pfn_to_page(pfn);
64262306a36Sopenharmony_ci	unsigned long end = start + nr_pages * sizeof(struct page);
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci	vmemmap_free(start, end, altmap);
64562306a36Sopenharmony_ci}
64662306a36Sopenharmony_cistatic void free_map_bootmem(struct page *memmap)
64762306a36Sopenharmony_ci{
64862306a36Sopenharmony_ci	unsigned long start = (unsigned long)memmap;
64962306a36Sopenharmony_ci	unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci	vmemmap_free(start, end, NULL);
65262306a36Sopenharmony_ci}
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_cistatic int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
65562306a36Sopenharmony_ci{
65662306a36Sopenharmony_ci	DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
65762306a36Sopenharmony_ci	DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
65862306a36Sopenharmony_ci	struct mem_section *ms = __pfn_to_section(pfn);
65962306a36Sopenharmony_ci	unsigned long *subsection_map = ms->usage
66062306a36Sopenharmony_ci		? &ms->usage->subsection_map[0] : NULL;
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci	subsection_mask_set(map, pfn, nr_pages);
66362306a36Sopenharmony_ci	if (subsection_map)
66462306a36Sopenharmony_ci		bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
66562306a36Sopenharmony_ci
66662306a36Sopenharmony_ci	if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
66762306a36Sopenharmony_ci				"section already deactivated (%#lx + %ld)\n",
66862306a36Sopenharmony_ci				pfn, nr_pages))
66962306a36Sopenharmony_ci		return -EINVAL;
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci	bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
67262306a36Sopenharmony_ci	return 0;
67362306a36Sopenharmony_ci}
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_cistatic bool is_subsection_map_empty(struct mem_section *ms)
67662306a36Sopenharmony_ci{
67762306a36Sopenharmony_ci	return bitmap_empty(&ms->usage->subsection_map[0],
67862306a36Sopenharmony_ci			    SUBSECTIONS_PER_SECTION);
67962306a36Sopenharmony_ci}
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_cistatic int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
68262306a36Sopenharmony_ci{
68362306a36Sopenharmony_ci	struct mem_section *ms = __pfn_to_section(pfn);
68462306a36Sopenharmony_ci	DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
68562306a36Sopenharmony_ci	unsigned long *subsection_map;
68662306a36Sopenharmony_ci	int rc = 0;
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci	subsection_mask_set(map, pfn, nr_pages);
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_ci	subsection_map = &ms->usage->subsection_map[0];
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_ci	if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
69362306a36Sopenharmony_ci		rc = -EINVAL;
69462306a36Sopenharmony_ci	else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
69562306a36Sopenharmony_ci		rc = -EEXIST;
69662306a36Sopenharmony_ci	else
69762306a36Sopenharmony_ci		bitmap_or(subsection_map, map, subsection_map,
69862306a36Sopenharmony_ci				SUBSECTIONS_PER_SECTION);
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ci	return rc;
70162306a36Sopenharmony_ci}
70262306a36Sopenharmony_ci#else
70362306a36Sopenharmony_cistatic struct page * __meminit populate_section_memmap(unsigned long pfn,
70462306a36Sopenharmony_ci		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
70562306a36Sopenharmony_ci		struct dev_pagemap *pgmap)
70662306a36Sopenharmony_ci{
70762306a36Sopenharmony_ci	return kvmalloc_node(array_size(sizeof(struct page),
70862306a36Sopenharmony_ci					PAGES_PER_SECTION), GFP_KERNEL, nid);
70962306a36Sopenharmony_ci}
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_cistatic void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
71262306a36Sopenharmony_ci		struct vmem_altmap *altmap)
71362306a36Sopenharmony_ci{
71462306a36Sopenharmony_ci	kvfree(pfn_to_page(pfn));
71562306a36Sopenharmony_ci}
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_cistatic void free_map_bootmem(struct page *memmap)
71862306a36Sopenharmony_ci{
71962306a36Sopenharmony_ci	unsigned long maps_section_nr, removing_section_nr, i;
72062306a36Sopenharmony_ci	unsigned long magic, nr_pages;
72162306a36Sopenharmony_ci	struct page *page = virt_to_page(memmap);
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci	nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
72462306a36Sopenharmony_ci		>> PAGE_SHIFT;
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	for (i = 0; i < nr_pages; i++, page++) {
72762306a36Sopenharmony_ci		magic = page->index;
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci		BUG_ON(magic == NODE_INFO);
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci		maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
73262306a36Sopenharmony_ci		removing_section_nr = page_private(page);
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci		/*
73562306a36Sopenharmony_ci		 * When this function is called, the removing section is
73662306a36Sopenharmony_ci		 * logical offlined state. This means all pages are isolated
73762306a36Sopenharmony_ci		 * from page allocator. If removing section's memmap is placed
73862306a36Sopenharmony_ci		 * on the same section, it must not be freed.
73962306a36Sopenharmony_ci		 * If it is freed, page allocator may allocate it which will
74062306a36Sopenharmony_ci		 * be removed physically soon.
74162306a36Sopenharmony_ci		 */
74262306a36Sopenharmony_ci		if (maps_section_nr != removing_section_nr)
74362306a36Sopenharmony_ci			put_page_bootmem(page);
74462306a36Sopenharmony_ci	}
74562306a36Sopenharmony_ci}
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_cistatic int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
74862306a36Sopenharmony_ci{
74962306a36Sopenharmony_ci	return 0;
75062306a36Sopenharmony_ci}
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_cistatic bool is_subsection_map_empty(struct mem_section *ms)
75362306a36Sopenharmony_ci{
75462306a36Sopenharmony_ci	return true;
75562306a36Sopenharmony_ci}
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_cistatic int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
75862306a36Sopenharmony_ci{
75962306a36Sopenharmony_ci	return 0;
76062306a36Sopenharmony_ci}
76162306a36Sopenharmony_ci#endif /* CONFIG_SPARSEMEM_VMEMMAP */
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci/*
76462306a36Sopenharmony_ci * To deactivate a memory region, there are 3 cases to handle across
76562306a36Sopenharmony_ci * two configurations (SPARSEMEM_VMEMMAP={y,n}):
76662306a36Sopenharmony_ci *
76762306a36Sopenharmony_ci * 1. deactivation of a partial hot-added section (only possible in
76862306a36Sopenharmony_ci *    the SPARSEMEM_VMEMMAP=y case).
76962306a36Sopenharmony_ci *      a) section was present at memory init.
77062306a36Sopenharmony_ci *      b) section was hot-added post memory init.
77162306a36Sopenharmony_ci * 2. deactivation of a complete hot-added section.
77262306a36Sopenharmony_ci * 3. deactivation of a complete section from memory init.
77362306a36Sopenharmony_ci *
77462306a36Sopenharmony_ci * For 1, when subsection_map does not empty we will not be freeing the
77562306a36Sopenharmony_ci * usage map, but still need to free the vmemmap range.
77662306a36Sopenharmony_ci *
77762306a36Sopenharmony_ci * For 2 and 3, the SPARSEMEM_VMEMMAP={y,n} cases are unified
77862306a36Sopenharmony_ci */
77962306a36Sopenharmony_cistatic void section_deactivate(unsigned long pfn, unsigned long nr_pages,
78062306a36Sopenharmony_ci		struct vmem_altmap *altmap)
78162306a36Sopenharmony_ci{
78262306a36Sopenharmony_ci	struct mem_section *ms = __pfn_to_section(pfn);
78362306a36Sopenharmony_ci	bool section_is_early = early_section(ms);
78462306a36Sopenharmony_ci	struct page *memmap = NULL;
78562306a36Sopenharmony_ci	bool empty;
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	if (clear_subsection_map(pfn, nr_pages))
78862306a36Sopenharmony_ci		return;
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ci	empty = is_subsection_map_empty(ms);
79162306a36Sopenharmony_ci	if (empty) {
79262306a36Sopenharmony_ci		unsigned long section_nr = pfn_to_section_nr(pfn);
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci		/*
79562306a36Sopenharmony_ci		 * Mark the section invalid so that valid_section()
79662306a36Sopenharmony_ci		 * return false. This prevents code from dereferencing
79762306a36Sopenharmony_ci		 * ms->usage array.
79862306a36Sopenharmony_ci		 */
79962306a36Sopenharmony_ci		ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci		/*
80262306a36Sopenharmony_ci		 * When removing an early section, the usage map is kept (as the
80362306a36Sopenharmony_ci		 * usage maps of other sections fall into the same page). It
80462306a36Sopenharmony_ci		 * will be re-used when re-adding the section - which is then no
80562306a36Sopenharmony_ci		 * longer an early section. If the usage map is PageReserved, it
80662306a36Sopenharmony_ci		 * was allocated during boot.
80762306a36Sopenharmony_ci		 */
80862306a36Sopenharmony_ci		if (!PageReserved(virt_to_page(ms->usage))) {
80962306a36Sopenharmony_ci			kfree_rcu(ms->usage, rcu);
81062306a36Sopenharmony_ci			WRITE_ONCE(ms->usage, NULL);
81162306a36Sopenharmony_ci		}
81262306a36Sopenharmony_ci		memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
81362306a36Sopenharmony_ci	}
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci	/*
81662306a36Sopenharmony_ci	 * The memmap of early sections is always fully populated. See
81762306a36Sopenharmony_ci	 * section_activate() and pfn_valid() .
81862306a36Sopenharmony_ci	 */
81962306a36Sopenharmony_ci	if (!section_is_early)
82062306a36Sopenharmony_ci		depopulate_section_memmap(pfn, nr_pages, altmap);
82162306a36Sopenharmony_ci	else if (memmap)
82262306a36Sopenharmony_ci		free_map_bootmem(memmap);
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	if (empty)
82562306a36Sopenharmony_ci		ms->section_mem_map = (unsigned long)NULL;
82662306a36Sopenharmony_ci}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_cistatic struct page * __meminit section_activate(int nid, unsigned long pfn,
82962306a36Sopenharmony_ci		unsigned long nr_pages, struct vmem_altmap *altmap,
83062306a36Sopenharmony_ci		struct dev_pagemap *pgmap)
83162306a36Sopenharmony_ci{
83262306a36Sopenharmony_ci	struct mem_section *ms = __pfn_to_section(pfn);
83362306a36Sopenharmony_ci	struct mem_section_usage *usage = NULL;
83462306a36Sopenharmony_ci	struct page *memmap;
83562306a36Sopenharmony_ci	int rc;
83662306a36Sopenharmony_ci
83762306a36Sopenharmony_ci	if (!ms->usage) {
83862306a36Sopenharmony_ci		usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
83962306a36Sopenharmony_ci		if (!usage)
84062306a36Sopenharmony_ci			return ERR_PTR(-ENOMEM);
84162306a36Sopenharmony_ci		ms->usage = usage;
84262306a36Sopenharmony_ci	}
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci	rc = fill_subsection_map(pfn, nr_pages);
84562306a36Sopenharmony_ci	if (rc) {
84662306a36Sopenharmony_ci		if (usage)
84762306a36Sopenharmony_ci			ms->usage = NULL;
84862306a36Sopenharmony_ci		kfree(usage);
84962306a36Sopenharmony_ci		return ERR_PTR(rc);
85062306a36Sopenharmony_ci	}
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci	/*
85362306a36Sopenharmony_ci	 * The early init code does not consider partially populated
85462306a36Sopenharmony_ci	 * initial sections, it simply assumes that memory will never be
85562306a36Sopenharmony_ci	 * referenced.  If we hot-add memory into such a section then we
85662306a36Sopenharmony_ci	 * do not need to populate the memmap and can simply reuse what
85762306a36Sopenharmony_ci	 * is already there.
85862306a36Sopenharmony_ci	 */
85962306a36Sopenharmony_ci	if (nr_pages < PAGES_PER_SECTION && early_section(ms))
86062306a36Sopenharmony_ci		return pfn_to_page(pfn);
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci	memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
86362306a36Sopenharmony_ci	if (!memmap) {
86462306a36Sopenharmony_ci		section_deactivate(pfn, nr_pages, altmap);
86562306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
86662306a36Sopenharmony_ci	}
86762306a36Sopenharmony_ci
86862306a36Sopenharmony_ci	return memmap;
86962306a36Sopenharmony_ci}
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci/**
87262306a36Sopenharmony_ci * sparse_add_section - add a memory section, or populate an existing one
87362306a36Sopenharmony_ci * @nid: The node to add section on
87462306a36Sopenharmony_ci * @start_pfn: start pfn of the memory range
87562306a36Sopenharmony_ci * @nr_pages: number of pfns to add in the section
87662306a36Sopenharmony_ci * @altmap: alternate pfns to allocate the memmap backing store
87762306a36Sopenharmony_ci * @pgmap: alternate compound page geometry for devmap mappings
87862306a36Sopenharmony_ci *
87962306a36Sopenharmony_ci * This is only intended for hotplug.
88062306a36Sopenharmony_ci *
88162306a36Sopenharmony_ci * Note that only VMEMMAP supports sub-section aligned hotplug,
88262306a36Sopenharmony_ci * the proper alignment and size are gated by check_pfn_span().
88362306a36Sopenharmony_ci *
88462306a36Sopenharmony_ci *
88562306a36Sopenharmony_ci * Return:
88662306a36Sopenharmony_ci * * 0		- On success.
88762306a36Sopenharmony_ci * * -EEXIST	- Section has been present.
88862306a36Sopenharmony_ci * * -ENOMEM	- Out of memory.
88962306a36Sopenharmony_ci */
89062306a36Sopenharmony_ciint __meminit sparse_add_section(int nid, unsigned long start_pfn,
89162306a36Sopenharmony_ci		unsigned long nr_pages, struct vmem_altmap *altmap,
89262306a36Sopenharmony_ci		struct dev_pagemap *pgmap)
89362306a36Sopenharmony_ci{
89462306a36Sopenharmony_ci	unsigned long section_nr = pfn_to_section_nr(start_pfn);
89562306a36Sopenharmony_ci	struct mem_section *ms;
89662306a36Sopenharmony_ci	struct page *memmap;
89762306a36Sopenharmony_ci	int ret;
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci	ret = sparse_index_init(section_nr, nid);
90062306a36Sopenharmony_ci	if (ret < 0)
90162306a36Sopenharmony_ci		return ret;
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci	memmap = section_activate(nid, start_pfn, nr_pages, altmap, pgmap);
90462306a36Sopenharmony_ci	if (IS_ERR(memmap))
90562306a36Sopenharmony_ci		return PTR_ERR(memmap);
90662306a36Sopenharmony_ci
90762306a36Sopenharmony_ci	/*
90862306a36Sopenharmony_ci	 * Poison uninitialized struct pages in order to catch invalid flags
90962306a36Sopenharmony_ci	 * combinations.
91062306a36Sopenharmony_ci	 */
91162306a36Sopenharmony_ci	page_init_poison(memmap, sizeof(struct page) * nr_pages);
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_ci	ms = __nr_to_section(section_nr);
91462306a36Sopenharmony_ci	set_section_nid(section_nr, nid);
91562306a36Sopenharmony_ci	__section_mark_present(ms, section_nr);
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci	/* Align memmap to section boundary in the subsection case */
91862306a36Sopenharmony_ci	if (section_nr_to_pfn(section_nr) != start_pfn)
91962306a36Sopenharmony_ci		memmap = pfn_to_page(section_nr_to_pfn(section_nr));
92062306a36Sopenharmony_ci	sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
92162306a36Sopenharmony_ci
92262306a36Sopenharmony_ci	return 0;
92362306a36Sopenharmony_ci}
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_civoid sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
92662306a36Sopenharmony_ci			   struct vmem_altmap *altmap)
92762306a36Sopenharmony_ci{
92862306a36Sopenharmony_ci	struct mem_section *ms = __pfn_to_section(pfn);
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci	if (WARN_ON_ONCE(!valid_section(ms)))
93162306a36Sopenharmony_ci		return;
93262306a36Sopenharmony_ci
93362306a36Sopenharmony_ci	section_deactivate(pfn, nr_pages, altmap);
93462306a36Sopenharmony_ci}
93562306a36Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG */
936