162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * sparse memory mappings. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci#include <linux/mm.h> 662306a36Sopenharmony_ci#include <linux/slab.h> 762306a36Sopenharmony_ci#include <linux/mmzone.h> 862306a36Sopenharmony_ci#include <linux/memblock.h> 962306a36Sopenharmony_ci#include <linux/compiler.h> 1062306a36Sopenharmony_ci#include <linux/highmem.h> 1162306a36Sopenharmony_ci#include <linux/export.h> 1262306a36Sopenharmony_ci#include <linux/spinlock.h> 1362306a36Sopenharmony_ci#include <linux/vmalloc.h> 1462306a36Sopenharmony_ci#include <linux/swap.h> 1562306a36Sopenharmony_ci#include <linux/swapops.h> 1662306a36Sopenharmony_ci#include <linux/bootmem_info.h> 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include "internal.h" 1962306a36Sopenharmony_ci#include <asm/dma.h> 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci/* 2262306a36Sopenharmony_ci * Permanent SPARSEMEM data: 2362306a36Sopenharmony_ci * 2462306a36Sopenharmony_ci * 1) mem_section - memory sections, mem_map's for valid memory 2562306a36Sopenharmony_ci */ 2662306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_EXTREME 2762306a36Sopenharmony_cistruct mem_section **mem_section; 2862306a36Sopenharmony_ci#else 2962306a36Sopenharmony_cistruct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] 3062306a36Sopenharmony_ci ____cacheline_internodealigned_in_smp; 3162306a36Sopenharmony_ci#endif 3262306a36Sopenharmony_ciEXPORT_SYMBOL(mem_section); 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#ifdef NODE_NOT_IN_PAGE_FLAGS 3562306a36Sopenharmony_ci/* 3662306a36Sopenharmony_ci * If we did not store the node number in the page then we have to 3762306a36Sopenharmony_ci * do a lookup in the section_to_node_table in order to find which 3862306a36Sopenharmony_ci * node the page belongs to. 3962306a36Sopenharmony_ci */ 4062306a36Sopenharmony_ci#if MAX_NUMNODES <= 256 4162306a36Sopenharmony_cistatic u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; 4262306a36Sopenharmony_ci#else 4362306a36Sopenharmony_cistatic u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; 4462306a36Sopenharmony_ci#endif 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ciint page_to_nid(const struct page *page) 4762306a36Sopenharmony_ci{ 4862306a36Sopenharmony_ci return section_to_node_table[page_to_section(page)]; 4962306a36Sopenharmony_ci} 5062306a36Sopenharmony_ciEXPORT_SYMBOL(page_to_nid); 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_cistatic void set_section_nid(unsigned long section_nr, int nid) 5362306a36Sopenharmony_ci{ 5462306a36Sopenharmony_ci section_to_node_table[section_nr] = nid; 5562306a36Sopenharmony_ci} 5662306a36Sopenharmony_ci#else /* !NODE_NOT_IN_PAGE_FLAGS */ 5762306a36Sopenharmony_cistatic inline void set_section_nid(unsigned long section_nr, int nid) 5862306a36Sopenharmony_ci{ 5962306a36Sopenharmony_ci} 6062306a36Sopenharmony_ci#endif 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_EXTREME 6362306a36Sopenharmony_cistatic noinline struct mem_section __ref *sparse_index_alloc(int nid) 6462306a36Sopenharmony_ci{ 6562306a36Sopenharmony_ci struct mem_section *section = NULL; 6662306a36Sopenharmony_ci unsigned long array_size = SECTIONS_PER_ROOT * 6762306a36Sopenharmony_ci sizeof(struct mem_section); 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci if (slab_is_available()) { 7062306a36Sopenharmony_ci section = kzalloc_node(array_size, GFP_KERNEL, nid); 7162306a36Sopenharmony_ci } else { 7262306a36Sopenharmony_ci section = memblock_alloc_node(array_size, SMP_CACHE_BYTES, 7362306a36Sopenharmony_ci nid); 7462306a36Sopenharmony_ci if (!section) 7562306a36Sopenharmony_ci panic("%s: Failed to allocate %lu bytes nid=%d\n", 7662306a36Sopenharmony_ci __func__, array_size, nid); 7762306a36Sopenharmony_ci } 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci return section; 8062306a36Sopenharmony_ci} 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_cistatic int __meminit sparse_index_init(unsigned long section_nr, int nid) 8362306a36Sopenharmony_ci{ 8462306a36Sopenharmony_ci unsigned long root = SECTION_NR_TO_ROOT(section_nr); 8562306a36Sopenharmony_ci struct mem_section *section; 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci /* 8862306a36Sopenharmony_ci * An existing section is possible in the sub-section hotplug 8962306a36Sopenharmony_ci * case. First hot-add instantiates, follow-on hot-add reuses 9062306a36Sopenharmony_ci * the existing section. 9162306a36Sopenharmony_ci * 9262306a36Sopenharmony_ci * The mem_hotplug_lock resolves the apparent race below. 9362306a36Sopenharmony_ci */ 9462306a36Sopenharmony_ci if (mem_section[root]) 9562306a36Sopenharmony_ci return 0; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci section = sparse_index_alloc(nid); 9862306a36Sopenharmony_ci if (!section) 9962306a36Sopenharmony_ci return -ENOMEM; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci mem_section[root] = section; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci return 0; 10462306a36Sopenharmony_ci} 10562306a36Sopenharmony_ci#else /* !SPARSEMEM_EXTREME */ 10662306a36Sopenharmony_cistatic inline int sparse_index_init(unsigned long section_nr, int nid) 10762306a36Sopenharmony_ci{ 10862306a36Sopenharmony_ci return 0; 10962306a36Sopenharmony_ci} 11062306a36Sopenharmony_ci#endif 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci/* 11362306a36Sopenharmony_ci * During early boot, before section_mem_map is used for an actual 11462306a36Sopenharmony_ci * mem_map, we use section_mem_map to store the section's NUMA 11562306a36Sopenharmony_ci * node. This keeps us from having to use another data structure. The 11662306a36Sopenharmony_ci * node information is cleared just before we store the real mem_map. 11762306a36Sopenharmony_ci */ 11862306a36Sopenharmony_cistatic inline unsigned long sparse_encode_early_nid(int nid) 11962306a36Sopenharmony_ci{ 12062306a36Sopenharmony_ci return ((unsigned long)nid << SECTION_NID_SHIFT); 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_cistatic inline int sparse_early_nid(struct mem_section *section) 12462306a36Sopenharmony_ci{ 12562306a36Sopenharmony_ci return (section->section_mem_map >> SECTION_NID_SHIFT); 12662306a36Sopenharmony_ci} 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci/* Validate the physical addressing limitations of the model */ 12962306a36Sopenharmony_cistatic void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, 13062306a36Sopenharmony_ci unsigned long *end_pfn) 13162306a36Sopenharmony_ci{ 13262306a36Sopenharmony_ci unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci /* 13562306a36Sopenharmony_ci * Sanity checks - do not allow an architecture to pass 13662306a36Sopenharmony_ci * in larger pfns than the maximum scope of sparsemem: 13762306a36Sopenharmony_ci */ 13862306a36Sopenharmony_ci if (*start_pfn > max_sparsemem_pfn) { 13962306a36Sopenharmony_ci mminit_dprintk(MMINIT_WARNING, "pfnvalidation", 14062306a36Sopenharmony_ci "Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n", 14162306a36Sopenharmony_ci *start_pfn, *end_pfn, max_sparsemem_pfn); 14262306a36Sopenharmony_ci WARN_ON_ONCE(1); 14362306a36Sopenharmony_ci *start_pfn = max_sparsemem_pfn; 14462306a36Sopenharmony_ci *end_pfn = max_sparsemem_pfn; 14562306a36Sopenharmony_ci } else if (*end_pfn > max_sparsemem_pfn) { 14662306a36Sopenharmony_ci mminit_dprintk(MMINIT_WARNING, "pfnvalidation", 14762306a36Sopenharmony_ci "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n", 14862306a36Sopenharmony_ci *start_pfn, *end_pfn, max_sparsemem_pfn); 14962306a36Sopenharmony_ci WARN_ON_ONCE(1); 15062306a36Sopenharmony_ci *end_pfn = max_sparsemem_pfn; 15162306a36Sopenharmony_ci } 15262306a36Sopenharmony_ci} 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci/* 15562306a36Sopenharmony_ci * There are a number of times that we loop over NR_MEM_SECTIONS, 15662306a36Sopenharmony_ci * looking for section_present() on each. But, when we have very 15762306a36Sopenharmony_ci * large physical address spaces, NR_MEM_SECTIONS can also be 15862306a36Sopenharmony_ci * very large which makes the loops quite long. 15962306a36Sopenharmony_ci * 16062306a36Sopenharmony_ci * Keeping track of this gives us an easy way to break out of 16162306a36Sopenharmony_ci * those loops early. 16262306a36Sopenharmony_ci */ 16362306a36Sopenharmony_ciunsigned long __highest_present_section_nr; 16462306a36Sopenharmony_cistatic void __section_mark_present(struct mem_section *ms, 16562306a36Sopenharmony_ci unsigned long section_nr) 16662306a36Sopenharmony_ci{ 16762306a36Sopenharmony_ci if (section_nr > __highest_present_section_nr) 16862306a36Sopenharmony_ci __highest_present_section_nr = section_nr; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci ms->section_mem_map |= SECTION_MARKED_PRESENT; 17162306a36Sopenharmony_ci} 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci#define for_each_present_section_nr(start, section_nr) \ 17462306a36Sopenharmony_ci for (section_nr = next_present_section_nr(start-1); \ 17562306a36Sopenharmony_ci section_nr != -1; \ 17662306a36Sopenharmony_ci section_nr = next_present_section_nr(section_nr)) 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_cistatic inline unsigned long first_present_section_nr(void) 17962306a36Sopenharmony_ci{ 18062306a36Sopenharmony_ci return next_present_section_nr(-1); 18162306a36Sopenharmony_ci} 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 18462306a36Sopenharmony_cistatic void subsection_mask_set(unsigned long *map, unsigned long pfn, 18562306a36Sopenharmony_ci unsigned long nr_pages) 18662306a36Sopenharmony_ci{ 18762306a36Sopenharmony_ci int idx = subsection_map_index(pfn); 18862306a36Sopenharmony_ci int end = subsection_map_index(pfn + nr_pages - 1); 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci bitmap_set(map, idx, end - idx + 1); 19162306a36Sopenharmony_ci} 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_civoid __init subsection_map_init(unsigned long pfn, unsigned long nr_pages) 19462306a36Sopenharmony_ci{ 19562306a36Sopenharmony_ci int end_sec = pfn_to_section_nr(pfn + nr_pages - 1); 19662306a36Sopenharmony_ci unsigned long nr, start_sec = pfn_to_section_nr(pfn); 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci if (!nr_pages) 19962306a36Sopenharmony_ci return; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci for (nr = start_sec; nr <= end_sec; nr++) { 20262306a36Sopenharmony_ci struct mem_section *ms; 20362306a36Sopenharmony_ci unsigned long pfns; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci pfns = min(nr_pages, PAGES_PER_SECTION 20662306a36Sopenharmony_ci - (pfn & ~PAGE_SECTION_MASK)); 20762306a36Sopenharmony_ci ms = __nr_to_section(nr); 20862306a36Sopenharmony_ci subsection_mask_set(ms->usage->subsection_map, pfn, pfns); 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr, 21162306a36Sopenharmony_ci pfns, subsection_map_index(pfn), 21262306a36Sopenharmony_ci subsection_map_index(pfn + pfns - 1)); 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci pfn += pfns; 21562306a36Sopenharmony_ci nr_pages -= pfns; 21662306a36Sopenharmony_ci } 21762306a36Sopenharmony_ci} 21862306a36Sopenharmony_ci#else 21962306a36Sopenharmony_civoid __init subsection_map_init(unsigned long pfn, unsigned long nr_pages) 22062306a36Sopenharmony_ci{ 22162306a36Sopenharmony_ci} 22262306a36Sopenharmony_ci#endif 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci/* Record a memory area against a node. */ 22562306a36Sopenharmony_cistatic void __init memory_present(int nid, unsigned long start, unsigned long end) 22662306a36Sopenharmony_ci{ 22762306a36Sopenharmony_ci unsigned long pfn; 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_EXTREME 23062306a36Sopenharmony_ci if (unlikely(!mem_section)) { 23162306a36Sopenharmony_ci unsigned long size, align; 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci size = sizeof(struct mem_section *) * NR_SECTION_ROOTS; 23462306a36Sopenharmony_ci align = 1 << (INTERNODE_CACHE_SHIFT); 23562306a36Sopenharmony_ci mem_section = memblock_alloc(size, align); 23662306a36Sopenharmony_ci if (!mem_section) 23762306a36Sopenharmony_ci panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 23862306a36Sopenharmony_ci __func__, size, align); 23962306a36Sopenharmony_ci } 24062306a36Sopenharmony_ci#endif 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci start &= PAGE_SECTION_MASK; 24362306a36Sopenharmony_ci mminit_validate_memmodel_limits(&start, &end); 24462306a36Sopenharmony_ci for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { 24562306a36Sopenharmony_ci unsigned long section = pfn_to_section_nr(pfn); 24662306a36Sopenharmony_ci struct mem_section *ms; 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci sparse_index_init(section, nid); 24962306a36Sopenharmony_ci set_section_nid(section, nid); 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci ms = __nr_to_section(section); 25262306a36Sopenharmony_ci if (!ms->section_mem_map) { 25362306a36Sopenharmony_ci ms->section_mem_map = sparse_encode_early_nid(nid) | 25462306a36Sopenharmony_ci SECTION_IS_ONLINE; 25562306a36Sopenharmony_ci __section_mark_present(ms, section); 25662306a36Sopenharmony_ci } 25762306a36Sopenharmony_ci } 25862306a36Sopenharmony_ci} 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci/* 26162306a36Sopenharmony_ci * Mark all memblocks as present using memory_present(). 26262306a36Sopenharmony_ci * This is a convenience function that is useful to mark all of the systems 26362306a36Sopenharmony_ci * memory as present during initialization. 26462306a36Sopenharmony_ci */ 26562306a36Sopenharmony_cistatic void __init memblocks_present(void) 26662306a36Sopenharmony_ci{ 26762306a36Sopenharmony_ci unsigned long start, end; 26862306a36Sopenharmony_ci int i, nid; 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) 27162306a36Sopenharmony_ci memory_present(nid, start, end); 27262306a36Sopenharmony_ci} 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci/* 27562306a36Sopenharmony_ci * Subtle, we encode the real pfn into the mem_map such that 27662306a36Sopenharmony_ci * the identity pfn - section_mem_map will return the actual 27762306a36Sopenharmony_ci * physical page frame number. 27862306a36Sopenharmony_ci */ 27962306a36Sopenharmony_cistatic unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) 28062306a36Sopenharmony_ci{ 28162306a36Sopenharmony_ci unsigned long coded_mem_map = 28262306a36Sopenharmony_ci (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); 28362306a36Sopenharmony_ci BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT); 28462306a36Sopenharmony_ci BUG_ON(coded_mem_map & ~SECTION_MAP_MASK); 28562306a36Sopenharmony_ci return coded_mem_map; 28662306a36Sopenharmony_ci} 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 28962306a36Sopenharmony_ci/* 29062306a36Sopenharmony_ci * Decode mem_map from the coded memmap 29162306a36Sopenharmony_ci */ 29262306a36Sopenharmony_cistruct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) 29362306a36Sopenharmony_ci{ 29462306a36Sopenharmony_ci /* mask off the extra low bits of information */ 29562306a36Sopenharmony_ci coded_mem_map &= SECTION_MAP_MASK; 29662306a36Sopenharmony_ci return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); 29762306a36Sopenharmony_ci} 29862306a36Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG */ 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_cistatic void __meminit sparse_init_one_section(struct mem_section *ms, 30162306a36Sopenharmony_ci unsigned long pnum, struct page *mem_map, 30262306a36Sopenharmony_ci struct mem_section_usage *usage, unsigned long flags) 30362306a36Sopenharmony_ci{ 30462306a36Sopenharmony_ci ms->section_mem_map &= ~SECTION_MAP_MASK; 30562306a36Sopenharmony_ci ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) 30662306a36Sopenharmony_ci | SECTION_HAS_MEM_MAP | flags; 30762306a36Sopenharmony_ci ms->usage = usage; 30862306a36Sopenharmony_ci} 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_cistatic unsigned long usemap_size(void) 31162306a36Sopenharmony_ci{ 31262306a36Sopenharmony_ci return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long); 31362306a36Sopenharmony_ci} 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_cisize_t mem_section_usage_size(void) 31662306a36Sopenharmony_ci{ 31762306a36Sopenharmony_ci return sizeof(struct mem_section_usage) + usemap_size(); 31862306a36Sopenharmony_ci} 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTREMOVE 32162306a36Sopenharmony_cistatic inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat) 32262306a36Sopenharmony_ci{ 32362306a36Sopenharmony_ci#ifndef CONFIG_NUMA 32462306a36Sopenharmony_ci VM_BUG_ON(pgdat != &contig_page_data); 32562306a36Sopenharmony_ci return __pa_symbol(&contig_page_data); 32662306a36Sopenharmony_ci#else 32762306a36Sopenharmony_ci return __pa(pgdat); 32862306a36Sopenharmony_ci#endif 32962306a36Sopenharmony_ci} 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_cistatic struct mem_section_usage * __init 33262306a36Sopenharmony_cisparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, 33362306a36Sopenharmony_ci unsigned long size) 33462306a36Sopenharmony_ci{ 33562306a36Sopenharmony_ci struct mem_section_usage *usage; 33662306a36Sopenharmony_ci unsigned long goal, limit; 33762306a36Sopenharmony_ci int nid; 33862306a36Sopenharmony_ci /* 33962306a36Sopenharmony_ci * A page may contain usemaps for other sections preventing the 34062306a36Sopenharmony_ci * page being freed and making a section unremovable while 34162306a36Sopenharmony_ci * other sections referencing the usemap remain active. Similarly, 34262306a36Sopenharmony_ci * a pgdat can prevent a section being removed. If section A 34362306a36Sopenharmony_ci * contains a pgdat and section B contains the usemap, both 34462306a36Sopenharmony_ci * sections become inter-dependent. This allocates usemaps 34562306a36Sopenharmony_ci * from the same section as the pgdat where possible to avoid 34662306a36Sopenharmony_ci * this problem. 34762306a36Sopenharmony_ci */ 34862306a36Sopenharmony_ci goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); 34962306a36Sopenharmony_ci limit = goal + (1UL << PA_SECTION_SHIFT); 35062306a36Sopenharmony_ci nid = early_pfn_to_nid(goal >> PAGE_SHIFT); 35162306a36Sopenharmony_ciagain: 35262306a36Sopenharmony_ci usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid); 35362306a36Sopenharmony_ci if (!usage && limit) { 35462306a36Sopenharmony_ci limit = 0; 35562306a36Sopenharmony_ci goto again; 35662306a36Sopenharmony_ci } 35762306a36Sopenharmony_ci return usage; 35862306a36Sopenharmony_ci} 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_cistatic void __init check_usemap_section_nr(int nid, 36162306a36Sopenharmony_ci struct mem_section_usage *usage) 36262306a36Sopenharmony_ci{ 36362306a36Sopenharmony_ci unsigned long usemap_snr, pgdat_snr; 36462306a36Sopenharmony_ci static unsigned long old_usemap_snr; 36562306a36Sopenharmony_ci static unsigned long old_pgdat_snr; 36662306a36Sopenharmony_ci struct pglist_data *pgdat = NODE_DATA(nid); 36762306a36Sopenharmony_ci int usemap_nid; 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci /* First call */ 37062306a36Sopenharmony_ci if (!old_usemap_snr) { 37162306a36Sopenharmony_ci old_usemap_snr = NR_MEM_SECTIONS; 37262306a36Sopenharmony_ci old_pgdat_snr = NR_MEM_SECTIONS; 37362306a36Sopenharmony_ci } 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); 37662306a36Sopenharmony_ci pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT); 37762306a36Sopenharmony_ci if (usemap_snr == pgdat_snr) 37862306a36Sopenharmony_ci return; 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr) 38162306a36Sopenharmony_ci /* skip redundant message */ 38262306a36Sopenharmony_ci return; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci old_usemap_snr = usemap_snr; 38562306a36Sopenharmony_ci old_pgdat_snr = pgdat_snr; 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr)); 38862306a36Sopenharmony_ci if (usemap_nid != nid) { 38962306a36Sopenharmony_ci pr_info("node %d must be removed before remove section %ld\n", 39062306a36Sopenharmony_ci nid, usemap_snr); 39162306a36Sopenharmony_ci return; 39262306a36Sopenharmony_ci } 39362306a36Sopenharmony_ci /* 39462306a36Sopenharmony_ci * There is a circular dependency. 39562306a36Sopenharmony_ci * Some platforms allow un-removable section because they will just 39662306a36Sopenharmony_ci * gather other removable sections for dynamic partitioning. 39762306a36Sopenharmony_ci * Just notify un-removable section's number here. 39862306a36Sopenharmony_ci */ 39962306a36Sopenharmony_ci pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations\n", 40062306a36Sopenharmony_ci usemap_snr, pgdat_snr, nid); 40162306a36Sopenharmony_ci} 40262306a36Sopenharmony_ci#else 40362306a36Sopenharmony_cistatic struct mem_section_usage * __init 40462306a36Sopenharmony_cisparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, 40562306a36Sopenharmony_ci unsigned long size) 40662306a36Sopenharmony_ci{ 40762306a36Sopenharmony_ci return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id); 40862306a36Sopenharmony_ci} 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_cistatic void __init check_usemap_section_nr(int nid, 41162306a36Sopenharmony_ci struct mem_section_usage *usage) 41262306a36Sopenharmony_ci{ 41362306a36Sopenharmony_ci} 41462306a36Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTREMOVE */ 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 41762306a36Sopenharmony_cistatic unsigned long __init section_map_size(void) 41862306a36Sopenharmony_ci{ 41962306a36Sopenharmony_ci return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE); 42062306a36Sopenharmony_ci} 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci#else 42362306a36Sopenharmony_cistatic unsigned long __init section_map_size(void) 42462306a36Sopenharmony_ci{ 42562306a36Sopenharmony_ci return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION); 42662306a36Sopenharmony_ci} 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_cistruct page __init *__populate_section_memmap(unsigned long pfn, 42962306a36Sopenharmony_ci unsigned long nr_pages, int nid, struct vmem_altmap *altmap, 43062306a36Sopenharmony_ci struct dev_pagemap *pgmap) 43162306a36Sopenharmony_ci{ 43262306a36Sopenharmony_ci unsigned long size = section_map_size(); 43362306a36Sopenharmony_ci struct page *map = sparse_buffer_alloc(size); 43462306a36Sopenharmony_ci phys_addr_t addr = __pa(MAX_DMA_ADDRESS); 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci if (map) 43762306a36Sopenharmony_ci return map; 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci map = memmap_alloc(size, size, addr, nid, false); 44062306a36Sopenharmony_ci if (!map) 44162306a36Sopenharmony_ci panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n", 44262306a36Sopenharmony_ci __func__, size, PAGE_SIZE, nid, &addr); 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci return map; 44562306a36Sopenharmony_ci} 44662306a36Sopenharmony_ci#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_cistatic void *sparsemap_buf __meminitdata; 44962306a36Sopenharmony_cistatic void *sparsemap_buf_end __meminitdata; 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_cistatic inline void __meminit sparse_buffer_free(unsigned long size) 45262306a36Sopenharmony_ci{ 45362306a36Sopenharmony_ci WARN_ON(!sparsemap_buf || size == 0); 45462306a36Sopenharmony_ci memblock_free(sparsemap_buf, size); 45562306a36Sopenharmony_ci} 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_cistatic void __init sparse_buffer_init(unsigned long size, int nid) 45862306a36Sopenharmony_ci{ 45962306a36Sopenharmony_ci phys_addr_t addr = __pa(MAX_DMA_ADDRESS); 46062306a36Sopenharmony_ci WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */ 46162306a36Sopenharmony_ci /* 46262306a36Sopenharmony_ci * Pre-allocated buffer is mainly used by __populate_section_memmap 46362306a36Sopenharmony_ci * and we want it to be properly aligned to the section size - this is 46462306a36Sopenharmony_ci * especially the case for VMEMMAP which maps memmap to PMDs 46562306a36Sopenharmony_ci */ 46662306a36Sopenharmony_ci sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); 46762306a36Sopenharmony_ci sparsemap_buf_end = sparsemap_buf + size; 46862306a36Sopenharmony_ci} 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_cistatic void __init sparse_buffer_fini(void) 47162306a36Sopenharmony_ci{ 47262306a36Sopenharmony_ci unsigned long size = sparsemap_buf_end - sparsemap_buf; 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci if (sparsemap_buf && size > 0) 47562306a36Sopenharmony_ci sparse_buffer_free(size); 47662306a36Sopenharmony_ci sparsemap_buf = NULL; 47762306a36Sopenharmony_ci} 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_civoid * __meminit sparse_buffer_alloc(unsigned long size) 48062306a36Sopenharmony_ci{ 48162306a36Sopenharmony_ci void *ptr = NULL; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci if (sparsemap_buf) { 48462306a36Sopenharmony_ci ptr = (void *) roundup((unsigned long)sparsemap_buf, size); 48562306a36Sopenharmony_ci if (ptr + size > sparsemap_buf_end) 48662306a36Sopenharmony_ci ptr = NULL; 48762306a36Sopenharmony_ci else { 48862306a36Sopenharmony_ci /* Free redundant aligned space */ 48962306a36Sopenharmony_ci if ((unsigned long)(ptr - sparsemap_buf) > 0) 49062306a36Sopenharmony_ci sparse_buffer_free((unsigned long)(ptr - sparsemap_buf)); 49162306a36Sopenharmony_ci sparsemap_buf = ptr + size; 49262306a36Sopenharmony_ci } 49362306a36Sopenharmony_ci } 49462306a36Sopenharmony_ci return ptr; 49562306a36Sopenharmony_ci} 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_civoid __weak __meminit vmemmap_populate_print_last(void) 49862306a36Sopenharmony_ci{ 49962306a36Sopenharmony_ci} 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci/* 50262306a36Sopenharmony_ci * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end) 50362306a36Sopenharmony_ci * And number of present sections in this node is map_count. 50462306a36Sopenharmony_ci */ 50562306a36Sopenharmony_cistatic void __init sparse_init_nid(int nid, unsigned long pnum_begin, 50662306a36Sopenharmony_ci unsigned long pnum_end, 50762306a36Sopenharmony_ci unsigned long map_count) 50862306a36Sopenharmony_ci{ 50962306a36Sopenharmony_ci struct mem_section_usage *usage; 51062306a36Sopenharmony_ci unsigned long pnum; 51162306a36Sopenharmony_ci struct page *map; 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), 51462306a36Sopenharmony_ci mem_section_usage_size() * map_count); 51562306a36Sopenharmony_ci if (!usage) { 51662306a36Sopenharmony_ci pr_err("%s: node[%d] usemap allocation failed", __func__, nid); 51762306a36Sopenharmony_ci goto failed; 51862306a36Sopenharmony_ci } 51962306a36Sopenharmony_ci sparse_buffer_init(map_count * section_map_size(), nid); 52062306a36Sopenharmony_ci for_each_present_section_nr(pnum_begin, pnum) { 52162306a36Sopenharmony_ci unsigned long pfn = section_nr_to_pfn(pnum); 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci if (pnum >= pnum_end) 52462306a36Sopenharmony_ci break; 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci map = __populate_section_memmap(pfn, PAGES_PER_SECTION, 52762306a36Sopenharmony_ci nid, NULL, NULL); 52862306a36Sopenharmony_ci if (!map) { 52962306a36Sopenharmony_ci pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", 53062306a36Sopenharmony_ci __func__, nid); 53162306a36Sopenharmony_ci pnum_begin = pnum; 53262306a36Sopenharmony_ci sparse_buffer_fini(); 53362306a36Sopenharmony_ci goto failed; 53462306a36Sopenharmony_ci } 53562306a36Sopenharmony_ci check_usemap_section_nr(nid, usage); 53662306a36Sopenharmony_ci sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage, 53762306a36Sopenharmony_ci SECTION_IS_EARLY); 53862306a36Sopenharmony_ci usage = (void *) usage + mem_section_usage_size(); 53962306a36Sopenharmony_ci } 54062306a36Sopenharmony_ci sparse_buffer_fini(); 54162306a36Sopenharmony_ci return; 54262306a36Sopenharmony_cifailed: 54362306a36Sopenharmony_ci /* We failed to allocate, mark all the following pnums as not present */ 54462306a36Sopenharmony_ci for_each_present_section_nr(pnum_begin, pnum) { 54562306a36Sopenharmony_ci struct mem_section *ms; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci if (pnum >= pnum_end) 54862306a36Sopenharmony_ci break; 54962306a36Sopenharmony_ci ms = __nr_to_section(pnum); 55062306a36Sopenharmony_ci ms->section_mem_map = 0; 55162306a36Sopenharmony_ci } 55262306a36Sopenharmony_ci} 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci/* 55562306a36Sopenharmony_ci * Allocate the accumulated non-linear sections, allocate a mem_map 55662306a36Sopenharmony_ci * for each and record the physical to section mapping. 55762306a36Sopenharmony_ci */ 55862306a36Sopenharmony_civoid __init sparse_init(void) 55962306a36Sopenharmony_ci{ 56062306a36Sopenharmony_ci unsigned long pnum_end, pnum_begin, map_count = 1; 56162306a36Sopenharmony_ci int nid_begin; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci memblocks_present(); 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci pnum_begin = first_present_section_nr(); 56662306a36Sopenharmony_ci nid_begin = sparse_early_nid(__nr_to_section(pnum_begin)); 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */ 56962306a36Sopenharmony_ci set_pageblock_order(); 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci for_each_present_section_nr(pnum_begin + 1, pnum_end) { 57262306a36Sopenharmony_ci int nid = sparse_early_nid(__nr_to_section(pnum_end)); 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci if (nid == nid_begin) { 57562306a36Sopenharmony_ci map_count++; 57662306a36Sopenharmony_ci continue; 57762306a36Sopenharmony_ci } 57862306a36Sopenharmony_ci /* Init node with sections in range [pnum_begin, pnum_end) */ 57962306a36Sopenharmony_ci sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count); 58062306a36Sopenharmony_ci nid_begin = nid; 58162306a36Sopenharmony_ci pnum_begin = pnum_end; 58262306a36Sopenharmony_ci map_count = 1; 58362306a36Sopenharmony_ci } 58462306a36Sopenharmony_ci /* cover the last node */ 58562306a36Sopenharmony_ci sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count); 58662306a36Sopenharmony_ci vmemmap_populate_print_last(); 58762306a36Sopenharmony_ci} 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci/* Mark all memory sections within the pfn range as online */ 59262306a36Sopenharmony_civoid online_mem_sections(unsigned long start_pfn, unsigned long end_pfn) 59362306a36Sopenharmony_ci{ 59462306a36Sopenharmony_ci unsigned long pfn; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 59762306a36Sopenharmony_ci unsigned long section_nr = pfn_to_section_nr(pfn); 59862306a36Sopenharmony_ci struct mem_section *ms; 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci /* onlining code should never touch invalid ranges */ 60162306a36Sopenharmony_ci if (WARN_ON(!valid_section_nr(section_nr))) 60262306a36Sopenharmony_ci continue; 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci ms = __nr_to_section(section_nr); 60562306a36Sopenharmony_ci ms->section_mem_map |= SECTION_IS_ONLINE; 60662306a36Sopenharmony_ci } 60762306a36Sopenharmony_ci} 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci/* Mark all memory sections within the pfn range as offline */ 61062306a36Sopenharmony_civoid offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) 61162306a36Sopenharmony_ci{ 61262306a36Sopenharmony_ci unsigned long pfn; 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 61562306a36Sopenharmony_ci unsigned long section_nr = pfn_to_section_nr(pfn); 61662306a36Sopenharmony_ci struct mem_section *ms; 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci /* 61962306a36Sopenharmony_ci * TODO this needs some double checking. Offlining code makes 62062306a36Sopenharmony_ci * sure to check pfn_valid but those checks might be just bogus 62162306a36Sopenharmony_ci */ 62262306a36Sopenharmony_ci if (WARN_ON(!valid_section_nr(section_nr))) 62362306a36Sopenharmony_ci continue; 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci ms = __nr_to_section(section_nr); 62662306a36Sopenharmony_ci ms->section_mem_map &= ~SECTION_IS_ONLINE; 62762306a36Sopenharmony_ci } 62862306a36Sopenharmony_ci} 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 63162306a36Sopenharmony_cistatic struct page * __meminit populate_section_memmap(unsigned long pfn, 63262306a36Sopenharmony_ci unsigned long nr_pages, int nid, struct vmem_altmap *altmap, 63362306a36Sopenharmony_ci struct dev_pagemap *pgmap) 63462306a36Sopenharmony_ci{ 63562306a36Sopenharmony_ci return __populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap); 63662306a36Sopenharmony_ci} 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_cistatic void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, 63962306a36Sopenharmony_ci struct vmem_altmap *altmap) 64062306a36Sopenharmony_ci{ 64162306a36Sopenharmony_ci unsigned long start = (unsigned long) pfn_to_page(pfn); 64262306a36Sopenharmony_ci unsigned long end = start + nr_pages * sizeof(struct page); 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci vmemmap_free(start, end, altmap); 64562306a36Sopenharmony_ci} 64662306a36Sopenharmony_cistatic void free_map_bootmem(struct page *memmap) 64762306a36Sopenharmony_ci{ 64862306a36Sopenharmony_ci unsigned long start = (unsigned long)memmap; 64962306a36Sopenharmony_ci unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci vmemmap_free(start, end, NULL); 65262306a36Sopenharmony_ci} 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_cistatic int clear_subsection_map(unsigned long pfn, unsigned long nr_pages) 65562306a36Sopenharmony_ci{ 65662306a36Sopenharmony_ci DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; 65762306a36Sopenharmony_ci DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 }; 65862306a36Sopenharmony_ci struct mem_section *ms = __pfn_to_section(pfn); 65962306a36Sopenharmony_ci unsigned long *subsection_map = ms->usage 66062306a36Sopenharmony_ci ? &ms->usage->subsection_map[0] : NULL; 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci subsection_mask_set(map, pfn, nr_pages); 66362306a36Sopenharmony_ci if (subsection_map) 66462306a36Sopenharmony_ci bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION); 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION), 66762306a36Sopenharmony_ci "section already deactivated (%#lx + %ld)\n", 66862306a36Sopenharmony_ci pfn, nr_pages)) 66962306a36Sopenharmony_ci return -EINVAL; 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION); 67262306a36Sopenharmony_ci return 0; 67362306a36Sopenharmony_ci} 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_cistatic bool is_subsection_map_empty(struct mem_section *ms) 67662306a36Sopenharmony_ci{ 67762306a36Sopenharmony_ci return bitmap_empty(&ms->usage->subsection_map[0], 67862306a36Sopenharmony_ci SUBSECTIONS_PER_SECTION); 67962306a36Sopenharmony_ci} 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_cistatic int fill_subsection_map(unsigned long pfn, unsigned long nr_pages) 68262306a36Sopenharmony_ci{ 68362306a36Sopenharmony_ci struct mem_section *ms = __pfn_to_section(pfn); 68462306a36Sopenharmony_ci DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; 68562306a36Sopenharmony_ci unsigned long *subsection_map; 68662306a36Sopenharmony_ci int rc = 0; 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci subsection_mask_set(map, pfn, nr_pages); 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci subsection_map = &ms->usage->subsection_map[0]; 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci if (bitmap_empty(map, SUBSECTIONS_PER_SECTION)) 69362306a36Sopenharmony_ci rc = -EINVAL; 69462306a36Sopenharmony_ci else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION)) 69562306a36Sopenharmony_ci rc = -EEXIST; 69662306a36Sopenharmony_ci else 69762306a36Sopenharmony_ci bitmap_or(subsection_map, map, subsection_map, 69862306a36Sopenharmony_ci SUBSECTIONS_PER_SECTION); 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci return rc; 70162306a36Sopenharmony_ci} 70262306a36Sopenharmony_ci#else 70362306a36Sopenharmony_cistatic struct page * __meminit populate_section_memmap(unsigned long pfn, 70462306a36Sopenharmony_ci unsigned long nr_pages, int nid, struct vmem_altmap *altmap, 70562306a36Sopenharmony_ci struct dev_pagemap *pgmap) 70662306a36Sopenharmony_ci{ 70762306a36Sopenharmony_ci return kvmalloc_node(array_size(sizeof(struct page), 70862306a36Sopenharmony_ci PAGES_PER_SECTION), GFP_KERNEL, nid); 70962306a36Sopenharmony_ci} 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_cistatic void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, 71262306a36Sopenharmony_ci struct vmem_altmap *altmap) 71362306a36Sopenharmony_ci{ 71462306a36Sopenharmony_ci kvfree(pfn_to_page(pfn)); 71562306a36Sopenharmony_ci} 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_cistatic void free_map_bootmem(struct page *memmap) 71862306a36Sopenharmony_ci{ 71962306a36Sopenharmony_ci unsigned long maps_section_nr, removing_section_nr, i; 72062306a36Sopenharmony_ci unsigned long magic, nr_pages; 72162306a36Sopenharmony_ci struct page *page = virt_to_page(memmap); 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) 72462306a36Sopenharmony_ci >> PAGE_SHIFT; 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci for (i = 0; i < nr_pages; i++, page++) { 72762306a36Sopenharmony_ci magic = page->index; 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci BUG_ON(magic == NODE_INFO); 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci maps_section_nr = pfn_to_section_nr(page_to_pfn(page)); 73262306a36Sopenharmony_ci removing_section_nr = page_private(page); 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci /* 73562306a36Sopenharmony_ci * When this function is called, the removing section is 73662306a36Sopenharmony_ci * logical offlined state. This means all pages are isolated 73762306a36Sopenharmony_ci * from page allocator. If removing section's memmap is placed 73862306a36Sopenharmony_ci * on the same section, it must not be freed. 73962306a36Sopenharmony_ci * If it is freed, page allocator may allocate it which will 74062306a36Sopenharmony_ci * be removed physically soon. 74162306a36Sopenharmony_ci */ 74262306a36Sopenharmony_ci if (maps_section_nr != removing_section_nr) 74362306a36Sopenharmony_ci put_page_bootmem(page); 74462306a36Sopenharmony_ci } 74562306a36Sopenharmony_ci} 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_cistatic int clear_subsection_map(unsigned long pfn, unsigned long nr_pages) 74862306a36Sopenharmony_ci{ 74962306a36Sopenharmony_ci return 0; 75062306a36Sopenharmony_ci} 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_cistatic bool is_subsection_map_empty(struct mem_section *ms) 75362306a36Sopenharmony_ci{ 75462306a36Sopenharmony_ci return true; 75562306a36Sopenharmony_ci} 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_cistatic int fill_subsection_map(unsigned long pfn, unsigned long nr_pages) 75862306a36Sopenharmony_ci{ 75962306a36Sopenharmony_ci return 0; 76062306a36Sopenharmony_ci} 76162306a36Sopenharmony_ci#endif /* CONFIG_SPARSEMEM_VMEMMAP */ 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci/* 76462306a36Sopenharmony_ci * To deactivate a memory region, there are 3 cases to handle across 76562306a36Sopenharmony_ci * two configurations (SPARSEMEM_VMEMMAP={y,n}): 76662306a36Sopenharmony_ci * 76762306a36Sopenharmony_ci * 1. deactivation of a partial hot-added section (only possible in 76862306a36Sopenharmony_ci * the SPARSEMEM_VMEMMAP=y case). 76962306a36Sopenharmony_ci * a) section was present at memory init. 77062306a36Sopenharmony_ci * b) section was hot-added post memory init. 77162306a36Sopenharmony_ci * 2. deactivation of a complete hot-added section. 77262306a36Sopenharmony_ci * 3. deactivation of a complete section from memory init. 77362306a36Sopenharmony_ci * 77462306a36Sopenharmony_ci * For 1, when subsection_map does not empty we will not be freeing the 77562306a36Sopenharmony_ci * usage map, but still need to free the vmemmap range. 77662306a36Sopenharmony_ci * 77762306a36Sopenharmony_ci * For 2 and 3, the SPARSEMEM_VMEMMAP={y,n} cases are unified 77862306a36Sopenharmony_ci */ 77962306a36Sopenharmony_cistatic void section_deactivate(unsigned long pfn, unsigned long nr_pages, 78062306a36Sopenharmony_ci struct vmem_altmap *altmap) 78162306a36Sopenharmony_ci{ 78262306a36Sopenharmony_ci struct mem_section *ms = __pfn_to_section(pfn); 78362306a36Sopenharmony_ci bool section_is_early = early_section(ms); 78462306a36Sopenharmony_ci struct page *memmap = NULL; 78562306a36Sopenharmony_ci bool empty; 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci if (clear_subsection_map(pfn, nr_pages)) 78862306a36Sopenharmony_ci return; 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci empty = is_subsection_map_empty(ms); 79162306a36Sopenharmony_ci if (empty) { 79262306a36Sopenharmony_ci unsigned long section_nr = pfn_to_section_nr(pfn); 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci /* 79562306a36Sopenharmony_ci * Mark the section invalid so that valid_section() 79662306a36Sopenharmony_ci * return false. This prevents code from dereferencing 79762306a36Sopenharmony_ci * ms->usage array. 79862306a36Sopenharmony_ci */ 79962306a36Sopenharmony_ci ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci /* 80262306a36Sopenharmony_ci * When removing an early section, the usage map is kept (as the 80362306a36Sopenharmony_ci * usage maps of other sections fall into the same page). It 80462306a36Sopenharmony_ci * will be re-used when re-adding the section - which is then no 80562306a36Sopenharmony_ci * longer an early section. If the usage map is PageReserved, it 80662306a36Sopenharmony_ci * was allocated during boot. 80762306a36Sopenharmony_ci */ 80862306a36Sopenharmony_ci if (!PageReserved(virt_to_page(ms->usage))) { 80962306a36Sopenharmony_ci kfree_rcu(ms->usage, rcu); 81062306a36Sopenharmony_ci WRITE_ONCE(ms->usage, NULL); 81162306a36Sopenharmony_ci } 81262306a36Sopenharmony_ci memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); 81362306a36Sopenharmony_ci } 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci /* 81662306a36Sopenharmony_ci * The memmap of early sections is always fully populated. See 81762306a36Sopenharmony_ci * section_activate() and pfn_valid() . 81862306a36Sopenharmony_ci */ 81962306a36Sopenharmony_ci if (!section_is_early) 82062306a36Sopenharmony_ci depopulate_section_memmap(pfn, nr_pages, altmap); 82162306a36Sopenharmony_ci else if (memmap) 82262306a36Sopenharmony_ci free_map_bootmem(memmap); 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci if (empty) 82562306a36Sopenharmony_ci ms->section_mem_map = (unsigned long)NULL; 82662306a36Sopenharmony_ci} 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_cistatic struct page * __meminit section_activate(int nid, unsigned long pfn, 82962306a36Sopenharmony_ci unsigned long nr_pages, struct vmem_altmap *altmap, 83062306a36Sopenharmony_ci struct dev_pagemap *pgmap) 83162306a36Sopenharmony_ci{ 83262306a36Sopenharmony_ci struct mem_section *ms = __pfn_to_section(pfn); 83362306a36Sopenharmony_ci struct mem_section_usage *usage = NULL; 83462306a36Sopenharmony_ci struct page *memmap; 83562306a36Sopenharmony_ci int rc; 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci if (!ms->usage) { 83862306a36Sopenharmony_ci usage = kzalloc(mem_section_usage_size(), GFP_KERNEL); 83962306a36Sopenharmony_ci if (!usage) 84062306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 84162306a36Sopenharmony_ci ms->usage = usage; 84262306a36Sopenharmony_ci } 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci rc = fill_subsection_map(pfn, nr_pages); 84562306a36Sopenharmony_ci if (rc) { 84662306a36Sopenharmony_ci if (usage) 84762306a36Sopenharmony_ci ms->usage = NULL; 84862306a36Sopenharmony_ci kfree(usage); 84962306a36Sopenharmony_ci return ERR_PTR(rc); 85062306a36Sopenharmony_ci } 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci /* 85362306a36Sopenharmony_ci * The early init code does not consider partially populated 85462306a36Sopenharmony_ci * initial sections, it simply assumes that memory will never be 85562306a36Sopenharmony_ci * referenced. If we hot-add memory into such a section then we 85662306a36Sopenharmony_ci * do not need to populate the memmap and can simply reuse what 85762306a36Sopenharmony_ci * is already there. 85862306a36Sopenharmony_ci */ 85962306a36Sopenharmony_ci if (nr_pages < PAGES_PER_SECTION && early_section(ms)) 86062306a36Sopenharmony_ci return pfn_to_page(pfn); 86162306a36Sopenharmony_ci 86262306a36Sopenharmony_ci memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap); 86362306a36Sopenharmony_ci if (!memmap) { 86462306a36Sopenharmony_ci section_deactivate(pfn, nr_pages, altmap); 86562306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 86662306a36Sopenharmony_ci } 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci return memmap; 86962306a36Sopenharmony_ci} 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci/** 87262306a36Sopenharmony_ci * sparse_add_section - add a memory section, or populate an existing one 87362306a36Sopenharmony_ci * @nid: The node to add section on 87462306a36Sopenharmony_ci * @start_pfn: start pfn of the memory range 87562306a36Sopenharmony_ci * @nr_pages: number of pfns to add in the section 87662306a36Sopenharmony_ci * @altmap: alternate pfns to allocate the memmap backing store 87762306a36Sopenharmony_ci * @pgmap: alternate compound page geometry for devmap mappings 87862306a36Sopenharmony_ci * 87962306a36Sopenharmony_ci * This is only intended for hotplug. 88062306a36Sopenharmony_ci * 88162306a36Sopenharmony_ci * Note that only VMEMMAP supports sub-section aligned hotplug, 88262306a36Sopenharmony_ci * the proper alignment and size are gated by check_pfn_span(). 88362306a36Sopenharmony_ci * 88462306a36Sopenharmony_ci * 88562306a36Sopenharmony_ci * Return: 88662306a36Sopenharmony_ci * * 0 - On success. 88762306a36Sopenharmony_ci * * -EEXIST - Section has been present. 88862306a36Sopenharmony_ci * * -ENOMEM - Out of memory. 88962306a36Sopenharmony_ci */ 89062306a36Sopenharmony_ciint __meminit sparse_add_section(int nid, unsigned long start_pfn, 89162306a36Sopenharmony_ci unsigned long nr_pages, struct vmem_altmap *altmap, 89262306a36Sopenharmony_ci struct dev_pagemap *pgmap) 89362306a36Sopenharmony_ci{ 89462306a36Sopenharmony_ci unsigned long section_nr = pfn_to_section_nr(start_pfn); 89562306a36Sopenharmony_ci struct mem_section *ms; 89662306a36Sopenharmony_ci struct page *memmap; 89762306a36Sopenharmony_ci int ret; 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci ret = sparse_index_init(section_nr, nid); 90062306a36Sopenharmony_ci if (ret < 0) 90162306a36Sopenharmony_ci return ret; 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci memmap = section_activate(nid, start_pfn, nr_pages, altmap, pgmap); 90462306a36Sopenharmony_ci if (IS_ERR(memmap)) 90562306a36Sopenharmony_ci return PTR_ERR(memmap); 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci /* 90862306a36Sopenharmony_ci * Poison uninitialized struct pages in order to catch invalid flags 90962306a36Sopenharmony_ci * combinations. 91062306a36Sopenharmony_ci */ 91162306a36Sopenharmony_ci page_init_poison(memmap, sizeof(struct page) * nr_pages); 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci ms = __nr_to_section(section_nr); 91462306a36Sopenharmony_ci set_section_nid(section_nr, nid); 91562306a36Sopenharmony_ci __section_mark_present(ms, section_nr); 91662306a36Sopenharmony_ci 91762306a36Sopenharmony_ci /* Align memmap to section boundary in the subsection case */ 91862306a36Sopenharmony_ci if (section_nr_to_pfn(section_nr) != start_pfn) 91962306a36Sopenharmony_ci memmap = pfn_to_page(section_nr_to_pfn(section_nr)); 92062306a36Sopenharmony_ci sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0); 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_ci return 0; 92362306a36Sopenharmony_ci} 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_civoid sparse_remove_section(unsigned long pfn, unsigned long nr_pages, 92662306a36Sopenharmony_ci struct vmem_altmap *altmap) 92762306a36Sopenharmony_ci{ 92862306a36Sopenharmony_ci struct mem_section *ms = __pfn_to_section(pfn); 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci if (WARN_ON_ONCE(!valid_section(ms))) 93162306a36Sopenharmony_ci return; 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_ci section_deactivate(pfn, nr_pages, altmap); 93462306a36Sopenharmony_ci} 93562306a36Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG */ 936