162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * mm_init.c - Memory initialisation verification and debugging 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright 2008 IBM Corporation, 2008 662306a36Sopenharmony_ci * Author Mel Gorman <mel@csn.ul.ie> 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci#include <linux/kernel.h> 1062306a36Sopenharmony_ci#include <linux/init.h> 1162306a36Sopenharmony_ci#include <linux/kobject.h> 1262306a36Sopenharmony_ci#include <linux/export.h> 1362306a36Sopenharmony_ci#include <linux/memory.h> 1462306a36Sopenharmony_ci#include <linux/notifier.h> 1562306a36Sopenharmony_ci#include <linux/sched.h> 1662306a36Sopenharmony_ci#include <linux/mman.h> 1762306a36Sopenharmony_ci#include <linux/memblock.h> 1862306a36Sopenharmony_ci#include <linux/page-isolation.h> 1962306a36Sopenharmony_ci#include <linux/padata.h> 2062306a36Sopenharmony_ci#include <linux/nmi.h> 2162306a36Sopenharmony_ci#include <linux/buffer_head.h> 2262306a36Sopenharmony_ci#include <linux/kmemleak.h> 2362306a36Sopenharmony_ci#include <linux/kfence.h> 2462306a36Sopenharmony_ci#include <linux/page_ext.h> 2562306a36Sopenharmony_ci#include <linux/pti.h> 2662306a36Sopenharmony_ci#include <linux/pgtable.h> 2762306a36Sopenharmony_ci#include <linux/swap.h> 2862306a36Sopenharmony_ci#include <linux/cma.h> 2962306a36Sopenharmony_ci#include <linux/crash_dump.h> 3062306a36Sopenharmony_ci#include "internal.h" 3162306a36Sopenharmony_ci#include "slab.h" 3262306a36Sopenharmony_ci#include "shuffle.h" 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#include <asm/setup.h> 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_MEMORY_INIT 3762306a36Sopenharmony_ciint __meminitdata mminit_loglevel; 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci/* The zonelists are simply reported, validation is manual. */ 4062306a36Sopenharmony_civoid __init mminit_verify_zonelist(void) 4162306a36Sopenharmony_ci{ 4262306a36Sopenharmony_ci int nid; 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci if (mminit_loglevel < MMINIT_VERIFY) 4562306a36Sopenharmony_ci return; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci for_each_online_node(nid) { 4862306a36Sopenharmony_ci pg_data_t *pgdat = NODE_DATA(nid); 4962306a36Sopenharmony_ci struct zone *zone; 5062306a36Sopenharmony_ci struct zoneref *z; 5162306a36Sopenharmony_ci struct zonelist *zonelist; 5262306a36Sopenharmony_ci int i, listid, zoneid; 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci BUILD_BUG_ON(MAX_ZONELISTS > 2); 5562306a36Sopenharmony_ci for (i = 0; i < MAX_ZONELISTS * MAX_NR_ZONES; i++) { 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci /* Identify the zone and nodelist */ 5862306a36Sopenharmony_ci zoneid = i % MAX_NR_ZONES; 5962306a36Sopenharmony_ci listid = i / MAX_NR_ZONES; 6062306a36Sopenharmony_ci zonelist = &pgdat->node_zonelists[listid]; 6162306a36Sopenharmony_ci zone = &pgdat->node_zones[zoneid]; 6262306a36Sopenharmony_ci if (!populated_zone(zone)) 6362306a36Sopenharmony_ci continue; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci /* Print information about the zonelist */ 6662306a36Sopenharmony_ci printk(KERN_DEBUG "mminit::zonelist %s %d:%s = ", 6762306a36Sopenharmony_ci listid > 0 ? "thisnode" : "general", nid, 6862306a36Sopenharmony_ci zone->name); 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci /* Iterate the zonelist */ 7162306a36Sopenharmony_ci for_each_zone_zonelist(zone, z, zonelist, zoneid) 7262306a36Sopenharmony_ci pr_cont("%d:%s ", zone_to_nid(zone), zone->name); 7362306a36Sopenharmony_ci pr_cont("\n"); 7462306a36Sopenharmony_ci } 7562306a36Sopenharmony_ci } 7662306a36Sopenharmony_ci} 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_civoid __init mminit_verify_pageflags_layout(void) 7962306a36Sopenharmony_ci{ 8062306a36Sopenharmony_ci int shift, width; 8162306a36Sopenharmony_ci unsigned long or_mask, add_mask; 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci shift = BITS_PER_LONG; 8462306a36Sopenharmony_ci width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH 8562306a36Sopenharmony_ci - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH; 8662306a36Sopenharmony_ci mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths", 8762306a36Sopenharmony_ci "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n", 8862306a36Sopenharmony_ci SECTIONS_WIDTH, 8962306a36Sopenharmony_ci NODES_WIDTH, 9062306a36Sopenharmony_ci ZONES_WIDTH, 9162306a36Sopenharmony_ci LAST_CPUPID_WIDTH, 9262306a36Sopenharmony_ci KASAN_TAG_WIDTH, 9362306a36Sopenharmony_ci LRU_GEN_WIDTH, 9462306a36Sopenharmony_ci LRU_REFS_WIDTH, 9562306a36Sopenharmony_ci NR_PAGEFLAGS); 9662306a36Sopenharmony_ci mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", 9762306a36Sopenharmony_ci "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n", 9862306a36Sopenharmony_ci SECTIONS_SHIFT, 9962306a36Sopenharmony_ci NODES_SHIFT, 10062306a36Sopenharmony_ci ZONES_SHIFT, 10162306a36Sopenharmony_ci LAST_CPUPID_SHIFT, 10262306a36Sopenharmony_ci KASAN_TAG_WIDTH); 10362306a36Sopenharmony_ci mminit_dprintk(MMINIT_TRACE, "pageflags_layout_pgshifts", 10462306a36Sopenharmony_ci "Section %lu Node %lu Zone %lu Lastcpupid %lu Kasantag %lu\n", 10562306a36Sopenharmony_ci (unsigned long)SECTIONS_PGSHIFT, 10662306a36Sopenharmony_ci (unsigned long)NODES_PGSHIFT, 10762306a36Sopenharmony_ci (unsigned long)ZONES_PGSHIFT, 10862306a36Sopenharmony_ci (unsigned long)LAST_CPUPID_PGSHIFT, 10962306a36Sopenharmony_ci (unsigned long)KASAN_TAG_PGSHIFT); 11062306a36Sopenharmony_ci mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodezoneid", 11162306a36Sopenharmony_ci "Node/Zone ID: %lu -> %lu\n", 11262306a36Sopenharmony_ci (unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT), 11362306a36Sopenharmony_ci (unsigned long)ZONEID_PGOFF); 11462306a36Sopenharmony_ci mminit_dprintk(MMINIT_TRACE, "pageflags_layout_usage", 11562306a36Sopenharmony_ci "location: %d -> %d layout %d -> %d unused %d -> %d page-flags\n", 11662306a36Sopenharmony_ci shift, width, width, NR_PAGEFLAGS, NR_PAGEFLAGS, 0); 11762306a36Sopenharmony_ci#ifdef NODE_NOT_IN_PAGE_FLAGS 11862306a36Sopenharmony_ci mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags", 11962306a36Sopenharmony_ci "Node not in page flags"); 12062306a36Sopenharmony_ci#endif 12162306a36Sopenharmony_ci#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS 12262306a36Sopenharmony_ci mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags", 12362306a36Sopenharmony_ci "Last cpupid not in page flags"); 12462306a36Sopenharmony_ci#endif 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci if (SECTIONS_WIDTH) { 12762306a36Sopenharmony_ci shift -= SECTIONS_WIDTH; 12862306a36Sopenharmony_ci BUG_ON(shift != SECTIONS_PGSHIFT); 12962306a36Sopenharmony_ci } 13062306a36Sopenharmony_ci if (NODES_WIDTH) { 13162306a36Sopenharmony_ci shift -= NODES_WIDTH; 13262306a36Sopenharmony_ci BUG_ON(shift != NODES_PGSHIFT); 13362306a36Sopenharmony_ci } 13462306a36Sopenharmony_ci if (ZONES_WIDTH) { 13562306a36Sopenharmony_ci shift -= ZONES_WIDTH; 13662306a36Sopenharmony_ci BUG_ON(shift != ZONES_PGSHIFT); 13762306a36Sopenharmony_ci } 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci /* Check for bitmask overlaps */ 14062306a36Sopenharmony_ci or_mask = (ZONES_MASK << ZONES_PGSHIFT) | 14162306a36Sopenharmony_ci (NODES_MASK << NODES_PGSHIFT) | 14262306a36Sopenharmony_ci (SECTIONS_MASK << SECTIONS_PGSHIFT); 14362306a36Sopenharmony_ci add_mask = (ZONES_MASK << ZONES_PGSHIFT) + 14462306a36Sopenharmony_ci (NODES_MASK << NODES_PGSHIFT) + 14562306a36Sopenharmony_ci (SECTIONS_MASK << SECTIONS_PGSHIFT); 14662306a36Sopenharmony_ci BUG_ON(or_mask != add_mask); 14762306a36Sopenharmony_ci} 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_cistatic __init int set_mminit_loglevel(char *str) 15062306a36Sopenharmony_ci{ 15162306a36Sopenharmony_ci get_option(&str, &mminit_loglevel); 15262306a36Sopenharmony_ci return 0; 15362306a36Sopenharmony_ci} 15462306a36Sopenharmony_ciearly_param("mminit_loglevel", set_mminit_loglevel); 15562306a36Sopenharmony_ci#endif /* CONFIG_DEBUG_MEMORY_INIT */ 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_cistruct kobject *mm_kobj; 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci#ifdef CONFIG_SMP 16062306a36Sopenharmony_cis32 vm_committed_as_batch = 32; 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_civoid mm_compute_batch(int overcommit_policy) 16362306a36Sopenharmony_ci{ 16462306a36Sopenharmony_ci u64 memsized_batch; 16562306a36Sopenharmony_ci s32 nr = num_present_cpus(); 16662306a36Sopenharmony_ci s32 batch = max_t(s32, nr*2, 32); 16762306a36Sopenharmony_ci unsigned long ram_pages = totalram_pages(); 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci /* 17062306a36Sopenharmony_ci * For policy OVERCOMMIT_NEVER, set batch size to 0.4% of 17162306a36Sopenharmony_ci * (total memory/#cpus), and lift it to 25% for other policies 17262306a36Sopenharmony_ci * to easy the possible lock contention for percpu_counter 17362306a36Sopenharmony_ci * vm_committed_as, while the max limit is INT_MAX 17462306a36Sopenharmony_ci */ 17562306a36Sopenharmony_ci if (overcommit_policy == OVERCOMMIT_NEVER) 17662306a36Sopenharmony_ci memsized_batch = min_t(u64, ram_pages/nr/256, INT_MAX); 17762306a36Sopenharmony_ci else 17862306a36Sopenharmony_ci memsized_batch = min_t(u64, ram_pages/nr/4, INT_MAX); 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci vm_committed_as_batch = max_t(s32, memsized_batch, batch); 18162306a36Sopenharmony_ci} 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_cistatic int __meminit mm_compute_batch_notifier(struct notifier_block *self, 18462306a36Sopenharmony_ci unsigned long action, void *arg) 18562306a36Sopenharmony_ci{ 18662306a36Sopenharmony_ci switch (action) { 18762306a36Sopenharmony_ci case MEM_ONLINE: 18862306a36Sopenharmony_ci case MEM_OFFLINE: 18962306a36Sopenharmony_ci mm_compute_batch(sysctl_overcommit_memory); 19062306a36Sopenharmony_ci break; 19162306a36Sopenharmony_ci default: 19262306a36Sopenharmony_ci break; 19362306a36Sopenharmony_ci } 19462306a36Sopenharmony_ci return NOTIFY_OK; 19562306a36Sopenharmony_ci} 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_cistatic int __init mm_compute_batch_init(void) 19862306a36Sopenharmony_ci{ 19962306a36Sopenharmony_ci mm_compute_batch(sysctl_overcommit_memory); 20062306a36Sopenharmony_ci hotplug_memory_notifier(mm_compute_batch_notifier, MM_COMPUTE_BATCH_PRI); 20162306a36Sopenharmony_ci return 0; 20262306a36Sopenharmony_ci} 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci__initcall(mm_compute_batch_init); 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci#endif 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_cistatic int __init mm_sysfs_init(void) 20962306a36Sopenharmony_ci{ 21062306a36Sopenharmony_ci mm_kobj = kobject_create_and_add("mm", kernel_kobj); 21162306a36Sopenharmony_ci if (!mm_kobj) 21262306a36Sopenharmony_ci return -ENOMEM; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci return 0; 21562306a36Sopenharmony_ci} 21662306a36Sopenharmony_cipostcore_initcall(mm_sysfs_init); 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_cistatic unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata; 21962306a36Sopenharmony_cistatic unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata; 22062306a36Sopenharmony_cistatic unsigned long zone_movable_pfn[MAX_NUMNODES] __initdata; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_cistatic unsigned long required_kernelcore __initdata; 22362306a36Sopenharmony_cistatic unsigned long required_kernelcore_percent __initdata; 22462306a36Sopenharmony_cistatic unsigned long required_movablecore __initdata; 22562306a36Sopenharmony_cistatic unsigned long required_movablecore_percent __initdata; 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_cistatic unsigned long nr_kernel_pages __initdata; 22862306a36Sopenharmony_cistatic unsigned long nr_all_pages __initdata; 22962306a36Sopenharmony_cistatic unsigned long dma_reserve __initdata; 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_cistatic bool deferred_struct_pages __meminitdata; 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_cistatic int __init cmdline_parse_core(char *p, unsigned long *core, 23662306a36Sopenharmony_ci unsigned long *percent) 23762306a36Sopenharmony_ci{ 23862306a36Sopenharmony_ci unsigned long long coremem; 23962306a36Sopenharmony_ci char *endptr; 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci if (!p) 24262306a36Sopenharmony_ci return -EINVAL; 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci /* Value may be a percentage of total memory, otherwise bytes */ 24562306a36Sopenharmony_ci coremem = simple_strtoull(p, &endptr, 0); 24662306a36Sopenharmony_ci if (*endptr == '%') { 24762306a36Sopenharmony_ci /* Paranoid check for percent values greater than 100 */ 24862306a36Sopenharmony_ci WARN_ON(coremem > 100); 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci *percent = coremem; 25162306a36Sopenharmony_ci } else { 25262306a36Sopenharmony_ci coremem = memparse(p, &p); 25362306a36Sopenharmony_ci /* Paranoid check that UL is enough for the coremem value */ 25462306a36Sopenharmony_ci WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX); 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci *core = coremem >> PAGE_SHIFT; 25762306a36Sopenharmony_ci *percent = 0UL; 25862306a36Sopenharmony_ci } 25962306a36Sopenharmony_ci return 0; 26062306a36Sopenharmony_ci} 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_cibool mirrored_kernelcore __initdata_memblock; 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci/* 26562306a36Sopenharmony_ci * kernelcore=size sets the amount of memory for use for allocations that 26662306a36Sopenharmony_ci * cannot be reclaimed or migrated. 26762306a36Sopenharmony_ci */ 26862306a36Sopenharmony_cistatic int __init cmdline_parse_kernelcore(char *p) 26962306a36Sopenharmony_ci{ 27062306a36Sopenharmony_ci /* parse kernelcore=mirror */ 27162306a36Sopenharmony_ci if (parse_option_str(p, "mirror")) { 27262306a36Sopenharmony_ci mirrored_kernelcore = true; 27362306a36Sopenharmony_ci return 0; 27462306a36Sopenharmony_ci } 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci return cmdline_parse_core(p, &required_kernelcore, 27762306a36Sopenharmony_ci &required_kernelcore_percent); 27862306a36Sopenharmony_ci} 27962306a36Sopenharmony_ciearly_param("kernelcore", cmdline_parse_kernelcore); 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci/* 28262306a36Sopenharmony_ci * movablecore=size sets the amount of memory for use for allocations that 28362306a36Sopenharmony_ci * can be reclaimed or migrated. 28462306a36Sopenharmony_ci */ 28562306a36Sopenharmony_cistatic int __init cmdline_parse_movablecore(char *p) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci return cmdline_parse_core(p, &required_movablecore, 28862306a36Sopenharmony_ci &required_movablecore_percent); 28962306a36Sopenharmony_ci} 29062306a36Sopenharmony_ciearly_param("movablecore", cmdline_parse_movablecore); 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci/* 29362306a36Sopenharmony_ci * early_calculate_totalpages() 29462306a36Sopenharmony_ci * Sum pages in active regions for movable zone. 29562306a36Sopenharmony_ci * Populate N_MEMORY for calculating usable_nodes. 29662306a36Sopenharmony_ci */ 29762306a36Sopenharmony_cistatic unsigned long __init early_calculate_totalpages(void) 29862306a36Sopenharmony_ci{ 29962306a36Sopenharmony_ci unsigned long totalpages = 0; 30062306a36Sopenharmony_ci unsigned long start_pfn, end_pfn; 30162306a36Sopenharmony_ci int i, nid; 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { 30462306a36Sopenharmony_ci unsigned long pages = end_pfn - start_pfn; 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci totalpages += pages; 30762306a36Sopenharmony_ci if (pages) 30862306a36Sopenharmony_ci node_set_state(nid, N_MEMORY); 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci return totalpages; 31162306a36Sopenharmony_ci} 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci/* 31462306a36Sopenharmony_ci * This finds a zone that can be used for ZONE_MOVABLE pages. The 31562306a36Sopenharmony_ci * assumption is made that zones within a node are ordered in monotonic 31662306a36Sopenharmony_ci * increasing memory addresses so that the "highest" populated zone is used 31762306a36Sopenharmony_ci */ 31862306a36Sopenharmony_cistatic void __init find_usable_zone_for_movable(void) 31962306a36Sopenharmony_ci{ 32062306a36Sopenharmony_ci int zone_index; 32162306a36Sopenharmony_ci for (zone_index = MAX_NR_ZONES - 1; zone_index >= 0; zone_index--) { 32262306a36Sopenharmony_ci if (zone_index == ZONE_MOVABLE) 32362306a36Sopenharmony_ci continue; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci if (arch_zone_highest_possible_pfn[zone_index] > 32662306a36Sopenharmony_ci arch_zone_lowest_possible_pfn[zone_index]) 32762306a36Sopenharmony_ci break; 32862306a36Sopenharmony_ci } 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci VM_BUG_ON(zone_index == -1); 33162306a36Sopenharmony_ci movable_zone = zone_index; 33262306a36Sopenharmony_ci} 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci/* 33562306a36Sopenharmony_ci * Find the PFN the Movable zone begins in each node. Kernel memory 33662306a36Sopenharmony_ci * is spread evenly between nodes as long as the nodes have enough 33762306a36Sopenharmony_ci * memory. When they don't, some nodes will have more kernelcore than 33862306a36Sopenharmony_ci * others 33962306a36Sopenharmony_ci */ 34062306a36Sopenharmony_cistatic void __init find_zone_movable_pfns_for_nodes(void) 34162306a36Sopenharmony_ci{ 34262306a36Sopenharmony_ci int i, nid; 34362306a36Sopenharmony_ci unsigned long usable_startpfn; 34462306a36Sopenharmony_ci unsigned long kernelcore_node, kernelcore_remaining; 34562306a36Sopenharmony_ci /* save the state before borrow the nodemask */ 34662306a36Sopenharmony_ci nodemask_t saved_node_state = node_states[N_MEMORY]; 34762306a36Sopenharmony_ci unsigned long totalpages = early_calculate_totalpages(); 34862306a36Sopenharmony_ci int usable_nodes = nodes_weight(node_states[N_MEMORY]); 34962306a36Sopenharmony_ci struct memblock_region *r; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci /* Need to find movable_zone earlier when movable_node is specified. */ 35262306a36Sopenharmony_ci find_usable_zone_for_movable(); 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci /* 35562306a36Sopenharmony_ci * If movable_node is specified, ignore kernelcore and movablecore 35662306a36Sopenharmony_ci * options. 35762306a36Sopenharmony_ci */ 35862306a36Sopenharmony_ci if (movable_node_is_enabled()) { 35962306a36Sopenharmony_ci for_each_mem_region(r) { 36062306a36Sopenharmony_ci if (!memblock_is_hotpluggable(r)) 36162306a36Sopenharmony_ci continue; 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci nid = memblock_get_region_node(r); 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci usable_startpfn = PFN_DOWN(r->base); 36662306a36Sopenharmony_ci zone_movable_pfn[nid] = zone_movable_pfn[nid] ? 36762306a36Sopenharmony_ci min(usable_startpfn, zone_movable_pfn[nid]) : 36862306a36Sopenharmony_ci usable_startpfn; 36962306a36Sopenharmony_ci } 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci goto out2; 37262306a36Sopenharmony_ci } 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci /* 37562306a36Sopenharmony_ci * If kernelcore=mirror is specified, ignore movablecore option 37662306a36Sopenharmony_ci */ 37762306a36Sopenharmony_ci if (mirrored_kernelcore) { 37862306a36Sopenharmony_ci bool mem_below_4gb_not_mirrored = false; 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci if (!memblock_has_mirror()) { 38162306a36Sopenharmony_ci pr_warn("The system has no mirror memory, ignore kernelcore=mirror.\n"); 38262306a36Sopenharmony_ci goto out; 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci if (is_kdump_kernel()) { 38662306a36Sopenharmony_ci pr_warn("The system is under kdump, ignore kernelcore=mirror.\n"); 38762306a36Sopenharmony_ci goto out; 38862306a36Sopenharmony_ci } 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci for_each_mem_region(r) { 39162306a36Sopenharmony_ci if (memblock_is_mirror(r)) 39262306a36Sopenharmony_ci continue; 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci nid = memblock_get_region_node(r); 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci usable_startpfn = memblock_region_memory_base_pfn(r); 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci if (usable_startpfn < PHYS_PFN(SZ_4G)) { 39962306a36Sopenharmony_ci mem_below_4gb_not_mirrored = true; 40062306a36Sopenharmony_ci continue; 40162306a36Sopenharmony_ci } 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci zone_movable_pfn[nid] = zone_movable_pfn[nid] ? 40462306a36Sopenharmony_ci min(usable_startpfn, zone_movable_pfn[nid]) : 40562306a36Sopenharmony_ci usable_startpfn; 40662306a36Sopenharmony_ci } 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci if (mem_below_4gb_not_mirrored) 40962306a36Sopenharmony_ci pr_warn("This configuration results in unmirrored kernel memory.\n"); 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci goto out2; 41262306a36Sopenharmony_ci } 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci /* 41562306a36Sopenharmony_ci * If kernelcore=nn% or movablecore=nn% was specified, calculate the 41662306a36Sopenharmony_ci * amount of necessary memory. 41762306a36Sopenharmony_ci */ 41862306a36Sopenharmony_ci if (required_kernelcore_percent) 41962306a36Sopenharmony_ci required_kernelcore = (totalpages * 100 * required_kernelcore_percent) / 42062306a36Sopenharmony_ci 10000UL; 42162306a36Sopenharmony_ci if (required_movablecore_percent) 42262306a36Sopenharmony_ci required_movablecore = (totalpages * 100 * required_movablecore_percent) / 42362306a36Sopenharmony_ci 10000UL; 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci /* 42662306a36Sopenharmony_ci * If movablecore= was specified, calculate what size of 42762306a36Sopenharmony_ci * kernelcore that corresponds so that memory usable for 42862306a36Sopenharmony_ci * any allocation type is evenly spread. If both kernelcore 42962306a36Sopenharmony_ci * and movablecore are specified, then the value of kernelcore 43062306a36Sopenharmony_ci * will be used for required_kernelcore if it's greater than 43162306a36Sopenharmony_ci * what movablecore would have allowed. 43262306a36Sopenharmony_ci */ 43362306a36Sopenharmony_ci if (required_movablecore) { 43462306a36Sopenharmony_ci unsigned long corepages; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci /* 43762306a36Sopenharmony_ci * Round-up so that ZONE_MOVABLE is at least as large as what 43862306a36Sopenharmony_ci * was requested by the user 43962306a36Sopenharmony_ci */ 44062306a36Sopenharmony_ci required_movablecore = 44162306a36Sopenharmony_ci roundup(required_movablecore, MAX_ORDER_NR_PAGES); 44262306a36Sopenharmony_ci required_movablecore = min(totalpages, required_movablecore); 44362306a36Sopenharmony_ci corepages = totalpages - required_movablecore; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci required_kernelcore = max(required_kernelcore, corepages); 44662306a36Sopenharmony_ci } 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci /* 44962306a36Sopenharmony_ci * If kernelcore was not specified or kernelcore size is larger 45062306a36Sopenharmony_ci * than totalpages, there is no ZONE_MOVABLE. 45162306a36Sopenharmony_ci */ 45262306a36Sopenharmony_ci if (!required_kernelcore || required_kernelcore >= totalpages) 45362306a36Sopenharmony_ci goto out; 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ 45662306a36Sopenharmony_ci usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone]; 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_cirestart: 45962306a36Sopenharmony_ci /* Spread kernelcore memory as evenly as possible throughout nodes */ 46062306a36Sopenharmony_ci kernelcore_node = required_kernelcore / usable_nodes; 46162306a36Sopenharmony_ci for_each_node_state(nid, N_MEMORY) { 46262306a36Sopenharmony_ci unsigned long start_pfn, end_pfn; 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci /* 46562306a36Sopenharmony_ci * Recalculate kernelcore_node if the division per node 46662306a36Sopenharmony_ci * now exceeds what is necessary to satisfy the requested 46762306a36Sopenharmony_ci * amount of memory for the kernel 46862306a36Sopenharmony_ci */ 46962306a36Sopenharmony_ci if (required_kernelcore < kernelcore_node) 47062306a36Sopenharmony_ci kernelcore_node = required_kernelcore / usable_nodes; 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci /* 47362306a36Sopenharmony_ci * As the map is walked, we track how much memory is usable 47462306a36Sopenharmony_ci * by the kernel using kernelcore_remaining. When it is 47562306a36Sopenharmony_ci * 0, the rest of the node is usable by ZONE_MOVABLE 47662306a36Sopenharmony_ci */ 47762306a36Sopenharmony_ci kernelcore_remaining = kernelcore_node; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci /* Go through each range of PFNs within this node */ 48062306a36Sopenharmony_ci for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { 48162306a36Sopenharmony_ci unsigned long size_pages; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci start_pfn = max(start_pfn, zone_movable_pfn[nid]); 48462306a36Sopenharmony_ci if (start_pfn >= end_pfn) 48562306a36Sopenharmony_ci continue; 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci /* Account for what is only usable for kernelcore */ 48862306a36Sopenharmony_ci if (start_pfn < usable_startpfn) { 48962306a36Sopenharmony_ci unsigned long kernel_pages; 49062306a36Sopenharmony_ci kernel_pages = min(end_pfn, usable_startpfn) 49162306a36Sopenharmony_ci - start_pfn; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci kernelcore_remaining -= min(kernel_pages, 49462306a36Sopenharmony_ci kernelcore_remaining); 49562306a36Sopenharmony_ci required_kernelcore -= min(kernel_pages, 49662306a36Sopenharmony_ci required_kernelcore); 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci /* Continue if range is now fully accounted */ 49962306a36Sopenharmony_ci if (end_pfn <= usable_startpfn) { 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci /* 50262306a36Sopenharmony_ci * Push zone_movable_pfn to the end so 50362306a36Sopenharmony_ci * that if we have to rebalance 50462306a36Sopenharmony_ci * kernelcore across nodes, we will 50562306a36Sopenharmony_ci * not double account here 50662306a36Sopenharmony_ci */ 50762306a36Sopenharmony_ci zone_movable_pfn[nid] = end_pfn; 50862306a36Sopenharmony_ci continue; 50962306a36Sopenharmony_ci } 51062306a36Sopenharmony_ci start_pfn = usable_startpfn; 51162306a36Sopenharmony_ci } 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci /* 51462306a36Sopenharmony_ci * The usable PFN range for ZONE_MOVABLE is from 51562306a36Sopenharmony_ci * start_pfn->end_pfn. Calculate size_pages as the 51662306a36Sopenharmony_ci * number of pages used as kernelcore 51762306a36Sopenharmony_ci */ 51862306a36Sopenharmony_ci size_pages = end_pfn - start_pfn; 51962306a36Sopenharmony_ci if (size_pages > kernelcore_remaining) 52062306a36Sopenharmony_ci size_pages = kernelcore_remaining; 52162306a36Sopenharmony_ci zone_movable_pfn[nid] = start_pfn + size_pages; 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci /* 52462306a36Sopenharmony_ci * Some kernelcore has been met, update counts and 52562306a36Sopenharmony_ci * break if the kernelcore for this node has been 52662306a36Sopenharmony_ci * satisfied 52762306a36Sopenharmony_ci */ 52862306a36Sopenharmony_ci required_kernelcore -= min(required_kernelcore, 52962306a36Sopenharmony_ci size_pages); 53062306a36Sopenharmony_ci kernelcore_remaining -= size_pages; 53162306a36Sopenharmony_ci if (!kernelcore_remaining) 53262306a36Sopenharmony_ci break; 53362306a36Sopenharmony_ci } 53462306a36Sopenharmony_ci } 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci /* 53762306a36Sopenharmony_ci * If there is still required_kernelcore, we do another pass with one 53862306a36Sopenharmony_ci * less node in the count. This will push zone_movable_pfn[nid] further 53962306a36Sopenharmony_ci * along on the nodes that still have memory until kernelcore is 54062306a36Sopenharmony_ci * satisfied 54162306a36Sopenharmony_ci */ 54262306a36Sopenharmony_ci usable_nodes--; 54362306a36Sopenharmony_ci if (usable_nodes && required_kernelcore > usable_nodes) 54462306a36Sopenharmony_ci goto restart; 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ciout2: 54762306a36Sopenharmony_ci /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ 54862306a36Sopenharmony_ci for (nid = 0; nid < MAX_NUMNODES; nid++) { 54962306a36Sopenharmony_ci unsigned long start_pfn, end_pfn; 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci zone_movable_pfn[nid] = 55262306a36Sopenharmony_ci roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 55562306a36Sopenharmony_ci if (zone_movable_pfn[nid] >= end_pfn) 55662306a36Sopenharmony_ci zone_movable_pfn[nid] = 0; 55762306a36Sopenharmony_ci } 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ciout: 56062306a36Sopenharmony_ci /* restore the node_state */ 56162306a36Sopenharmony_ci node_states[N_MEMORY] = saved_node_state; 56262306a36Sopenharmony_ci} 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_cistatic void __meminit __init_single_page(struct page *page, unsigned long pfn, 56562306a36Sopenharmony_ci unsigned long zone, int nid) 56662306a36Sopenharmony_ci{ 56762306a36Sopenharmony_ci mm_zero_struct_page(page); 56862306a36Sopenharmony_ci set_page_links(page, zone, nid, pfn); 56962306a36Sopenharmony_ci init_page_count(page); 57062306a36Sopenharmony_ci page_mapcount_reset(page); 57162306a36Sopenharmony_ci page_cpupid_reset_last(page); 57262306a36Sopenharmony_ci page_kasan_tag_reset(page); 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci INIT_LIST_HEAD(&page->lru); 57562306a36Sopenharmony_ci#ifdef WANT_PAGE_VIRTUAL 57662306a36Sopenharmony_ci /* The shift won't overflow because ZONE_NORMAL is below 4G. */ 57762306a36Sopenharmony_ci if (!is_highmem_idx(zone)) 57862306a36Sopenharmony_ci set_page_address(page, __va(pfn << PAGE_SHIFT)); 57962306a36Sopenharmony_ci#endif 58062306a36Sopenharmony_ci} 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci#ifdef CONFIG_NUMA 58362306a36Sopenharmony_ci/* 58462306a36Sopenharmony_ci * During memory init memblocks map pfns to nids. The search is expensive and 58562306a36Sopenharmony_ci * this caches recent lookups. The implementation of __early_pfn_to_nid 58662306a36Sopenharmony_ci * treats start/end as pfns. 58762306a36Sopenharmony_ci */ 58862306a36Sopenharmony_cistruct mminit_pfnnid_cache { 58962306a36Sopenharmony_ci unsigned long last_start; 59062306a36Sopenharmony_ci unsigned long last_end; 59162306a36Sopenharmony_ci int last_nid; 59262306a36Sopenharmony_ci}; 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_cistatic struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci/* 59762306a36Sopenharmony_ci * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. 59862306a36Sopenharmony_ci */ 59962306a36Sopenharmony_cistatic int __meminit __early_pfn_to_nid(unsigned long pfn, 60062306a36Sopenharmony_ci struct mminit_pfnnid_cache *state) 60162306a36Sopenharmony_ci{ 60262306a36Sopenharmony_ci unsigned long start_pfn, end_pfn; 60362306a36Sopenharmony_ci int nid; 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci if (state->last_start <= pfn && pfn < state->last_end) 60662306a36Sopenharmony_ci return state->last_nid; 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn); 60962306a36Sopenharmony_ci if (nid != NUMA_NO_NODE) { 61062306a36Sopenharmony_ci state->last_start = start_pfn; 61162306a36Sopenharmony_ci state->last_end = end_pfn; 61262306a36Sopenharmony_ci state->last_nid = nid; 61362306a36Sopenharmony_ci } 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci return nid; 61662306a36Sopenharmony_ci} 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ciint __meminit early_pfn_to_nid(unsigned long pfn) 61962306a36Sopenharmony_ci{ 62062306a36Sopenharmony_ci static DEFINE_SPINLOCK(early_pfn_lock); 62162306a36Sopenharmony_ci int nid; 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci spin_lock(&early_pfn_lock); 62462306a36Sopenharmony_ci nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache); 62562306a36Sopenharmony_ci if (nid < 0) 62662306a36Sopenharmony_ci nid = first_online_node; 62762306a36Sopenharmony_ci spin_unlock(&early_pfn_lock); 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci return nid; 63062306a36Sopenharmony_ci} 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ciint hashdist = HASHDIST_DEFAULT; 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_cistatic int __init set_hashdist(char *str) 63562306a36Sopenharmony_ci{ 63662306a36Sopenharmony_ci if (!str) 63762306a36Sopenharmony_ci return 0; 63862306a36Sopenharmony_ci hashdist = simple_strtoul(str, &str, 0); 63962306a36Sopenharmony_ci return 1; 64062306a36Sopenharmony_ci} 64162306a36Sopenharmony_ci__setup("hashdist=", set_hashdist); 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_cistatic inline void fixup_hashdist(void) 64462306a36Sopenharmony_ci{ 64562306a36Sopenharmony_ci if (num_node_state(N_MEMORY) == 1) 64662306a36Sopenharmony_ci hashdist = 0; 64762306a36Sopenharmony_ci} 64862306a36Sopenharmony_ci#else 64962306a36Sopenharmony_cistatic inline void fixup_hashdist(void) {} 65062306a36Sopenharmony_ci#endif /* CONFIG_NUMA */ 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 65362306a36Sopenharmony_cistatic inline void pgdat_set_deferred_range(pg_data_t *pgdat) 65462306a36Sopenharmony_ci{ 65562306a36Sopenharmony_ci pgdat->first_deferred_pfn = ULONG_MAX; 65662306a36Sopenharmony_ci} 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci/* Returns true if the struct page for the pfn is initialised */ 65962306a36Sopenharmony_cistatic inline bool __meminit early_page_initialised(unsigned long pfn, int nid) 66062306a36Sopenharmony_ci{ 66162306a36Sopenharmony_ci if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn) 66262306a36Sopenharmony_ci return false; 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci return true; 66562306a36Sopenharmony_ci} 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci/* 66862306a36Sopenharmony_ci * Returns true when the remaining initialisation should be deferred until 66962306a36Sopenharmony_ci * later in the boot cycle when it can be parallelised. 67062306a36Sopenharmony_ci */ 67162306a36Sopenharmony_cistatic bool __meminit 67262306a36Sopenharmony_cidefer_init(int nid, unsigned long pfn, unsigned long end_pfn) 67362306a36Sopenharmony_ci{ 67462306a36Sopenharmony_ci static unsigned long prev_end_pfn, nr_initialised; 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci if (early_page_ext_enabled()) 67762306a36Sopenharmony_ci return false; 67862306a36Sopenharmony_ci /* 67962306a36Sopenharmony_ci * prev_end_pfn static that contains the end of previous zone 68062306a36Sopenharmony_ci * No need to protect because called very early in boot before smp_init. 68162306a36Sopenharmony_ci */ 68262306a36Sopenharmony_ci if (prev_end_pfn != end_pfn) { 68362306a36Sopenharmony_ci prev_end_pfn = end_pfn; 68462306a36Sopenharmony_ci nr_initialised = 0; 68562306a36Sopenharmony_ci } 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci /* Always populate low zones for address-constrained allocations */ 68862306a36Sopenharmony_ci if (end_pfn < pgdat_end_pfn(NODE_DATA(nid))) 68962306a36Sopenharmony_ci return false; 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_ci if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX) 69262306a36Sopenharmony_ci return true; 69362306a36Sopenharmony_ci /* 69462306a36Sopenharmony_ci * We start only with one section of pages, more pages are added as 69562306a36Sopenharmony_ci * needed until the rest of deferred pages are initialized. 69662306a36Sopenharmony_ci */ 69762306a36Sopenharmony_ci nr_initialised++; 69862306a36Sopenharmony_ci if ((nr_initialised > PAGES_PER_SECTION) && 69962306a36Sopenharmony_ci (pfn & (PAGES_PER_SECTION - 1)) == 0) { 70062306a36Sopenharmony_ci NODE_DATA(nid)->first_deferred_pfn = pfn; 70162306a36Sopenharmony_ci return true; 70262306a36Sopenharmony_ci } 70362306a36Sopenharmony_ci return false; 70462306a36Sopenharmony_ci} 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_cistatic void __meminit init_reserved_page(unsigned long pfn, int nid) 70762306a36Sopenharmony_ci{ 70862306a36Sopenharmony_ci pg_data_t *pgdat; 70962306a36Sopenharmony_ci int zid; 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci if (early_page_initialised(pfn, nid)) 71262306a36Sopenharmony_ci return; 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci pgdat = NODE_DATA(nid); 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci for (zid = 0; zid < MAX_NR_ZONES; zid++) { 71762306a36Sopenharmony_ci struct zone *zone = &pgdat->node_zones[zid]; 71862306a36Sopenharmony_ci 71962306a36Sopenharmony_ci if (zone_spans_pfn(zone, pfn)) 72062306a36Sopenharmony_ci break; 72162306a36Sopenharmony_ci } 72262306a36Sopenharmony_ci __init_single_page(pfn_to_page(pfn), pfn, zid, nid); 72362306a36Sopenharmony_ci} 72462306a36Sopenharmony_ci#else 72562306a36Sopenharmony_cistatic inline void pgdat_set_deferred_range(pg_data_t *pgdat) {} 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_cistatic inline bool early_page_initialised(unsigned long pfn, int nid) 72862306a36Sopenharmony_ci{ 72962306a36Sopenharmony_ci return true; 73062306a36Sopenharmony_ci} 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_cistatic inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn) 73362306a36Sopenharmony_ci{ 73462306a36Sopenharmony_ci return false; 73562306a36Sopenharmony_ci} 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_cistatic inline void init_reserved_page(unsigned long pfn, int nid) 73862306a36Sopenharmony_ci{ 73962306a36Sopenharmony_ci} 74062306a36Sopenharmony_ci#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci/* 74362306a36Sopenharmony_ci * Initialised pages do not have PageReserved set. This function is 74462306a36Sopenharmony_ci * called for each range allocated by the bootmem allocator and 74562306a36Sopenharmony_ci * marks the pages PageReserved. The remaining valid pages are later 74662306a36Sopenharmony_ci * sent to the buddy page allocator. 74762306a36Sopenharmony_ci */ 74862306a36Sopenharmony_civoid __meminit reserve_bootmem_region(phys_addr_t start, 74962306a36Sopenharmony_ci phys_addr_t end, int nid) 75062306a36Sopenharmony_ci{ 75162306a36Sopenharmony_ci unsigned long start_pfn = PFN_DOWN(start); 75262306a36Sopenharmony_ci unsigned long end_pfn = PFN_UP(end); 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci for (; start_pfn < end_pfn; start_pfn++) { 75562306a36Sopenharmony_ci if (pfn_valid(start_pfn)) { 75662306a36Sopenharmony_ci struct page *page = pfn_to_page(start_pfn); 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_ci init_reserved_page(start_pfn, nid); 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ci /* Avoid false-positive PageTail() */ 76162306a36Sopenharmony_ci INIT_LIST_HEAD(&page->lru); 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci /* 76462306a36Sopenharmony_ci * no need for atomic set_bit because the struct 76562306a36Sopenharmony_ci * page is not visible yet so nobody should 76662306a36Sopenharmony_ci * access it yet. 76762306a36Sopenharmony_ci */ 76862306a36Sopenharmony_ci __SetPageReserved(page); 76962306a36Sopenharmony_ci } 77062306a36Sopenharmony_ci } 77162306a36Sopenharmony_ci} 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci/* If zone is ZONE_MOVABLE but memory is mirrored, it is an overlapped init */ 77462306a36Sopenharmony_cistatic bool __meminit 77562306a36Sopenharmony_cioverlap_memmap_init(unsigned long zone, unsigned long *pfn) 77662306a36Sopenharmony_ci{ 77762306a36Sopenharmony_ci static struct memblock_region *r; 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci if (mirrored_kernelcore && zone == ZONE_MOVABLE) { 78062306a36Sopenharmony_ci if (!r || *pfn >= memblock_region_memory_end_pfn(r)) { 78162306a36Sopenharmony_ci for_each_mem_region(r) { 78262306a36Sopenharmony_ci if (*pfn < memblock_region_memory_end_pfn(r)) 78362306a36Sopenharmony_ci break; 78462306a36Sopenharmony_ci } 78562306a36Sopenharmony_ci } 78662306a36Sopenharmony_ci if (*pfn >= memblock_region_memory_base_pfn(r) && 78762306a36Sopenharmony_ci memblock_is_mirror(r)) { 78862306a36Sopenharmony_ci *pfn = memblock_region_memory_end_pfn(r); 78962306a36Sopenharmony_ci return true; 79062306a36Sopenharmony_ci } 79162306a36Sopenharmony_ci } 79262306a36Sopenharmony_ci return false; 79362306a36Sopenharmony_ci} 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci/* 79662306a36Sopenharmony_ci * Only struct pages that correspond to ranges defined by memblock.memory 79762306a36Sopenharmony_ci * are zeroed and initialized by going through __init_single_page() during 79862306a36Sopenharmony_ci * memmap_init_zone_range(). 79962306a36Sopenharmony_ci * 80062306a36Sopenharmony_ci * But, there could be struct pages that correspond to holes in 80162306a36Sopenharmony_ci * memblock.memory. This can happen because of the following reasons: 80262306a36Sopenharmony_ci * - physical memory bank size is not necessarily the exact multiple of the 80362306a36Sopenharmony_ci * arbitrary section size 80462306a36Sopenharmony_ci * - early reserved memory may not be listed in memblock.memory 80562306a36Sopenharmony_ci * - memory layouts defined with memmap= kernel parameter may not align 80662306a36Sopenharmony_ci * nicely with memmap sections 80762306a36Sopenharmony_ci * 80862306a36Sopenharmony_ci * Explicitly initialize those struct pages so that: 80962306a36Sopenharmony_ci * - PG_Reserved is set 81062306a36Sopenharmony_ci * - zone and node links point to zone and node that span the page if the 81162306a36Sopenharmony_ci * hole is in the middle of a zone 81262306a36Sopenharmony_ci * - zone and node links point to adjacent zone/node if the hole falls on 81362306a36Sopenharmony_ci * the zone boundary; the pages in such holes will be prepended to the 81462306a36Sopenharmony_ci * zone/node above the hole except for the trailing pages in the last 81562306a36Sopenharmony_ci * section that will be appended to the zone/node below. 81662306a36Sopenharmony_ci */ 81762306a36Sopenharmony_cistatic void __init init_unavailable_range(unsigned long spfn, 81862306a36Sopenharmony_ci unsigned long epfn, 81962306a36Sopenharmony_ci int zone, int node) 82062306a36Sopenharmony_ci{ 82162306a36Sopenharmony_ci unsigned long pfn; 82262306a36Sopenharmony_ci u64 pgcnt = 0; 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci for (pfn = spfn; pfn < epfn; pfn++) { 82562306a36Sopenharmony_ci if (!pfn_valid(pageblock_start_pfn(pfn))) { 82662306a36Sopenharmony_ci pfn = pageblock_end_pfn(pfn) - 1; 82762306a36Sopenharmony_ci continue; 82862306a36Sopenharmony_ci } 82962306a36Sopenharmony_ci __init_single_page(pfn_to_page(pfn), pfn, zone, node); 83062306a36Sopenharmony_ci __SetPageReserved(pfn_to_page(pfn)); 83162306a36Sopenharmony_ci pgcnt++; 83262306a36Sopenharmony_ci } 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci if (pgcnt) 83562306a36Sopenharmony_ci pr_info("On node %d, zone %s: %lld pages in unavailable ranges", 83662306a36Sopenharmony_ci node, zone_names[zone], pgcnt); 83762306a36Sopenharmony_ci} 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci/* 84062306a36Sopenharmony_ci * Initially all pages are reserved - free ones are freed 84162306a36Sopenharmony_ci * up by memblock_free_all() once the early boot process is 84262306a36Sopenharmony_ci * done. Non-atomic initialization, single-pass. 84362306a36Sopenharmony_ci * 84462306a36Sopenharmony_ci * All aligned pageblocks are initialized to the specified migratetype 84562306a36Sopenharmony_ci * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related 84662306a36Sopenharmony_ci * zone stats (e.g., nr_isolate_pageblock) are touched. 84762306a36Sopenharmony_ci */ 84862306a36Sopenharmony_civoid __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone, 84962306a36Sopenharmony_ci unsigned long start_pfn, unsigned long zone_end_pfn, 85062306a36Sopenharmony_ci enum meminit_context context, 85162306a36Sopenharmony_ci struct vmem_altmap *altmap, int migratetype) 85262306a36Sopenharmony_ci{ 85362306a36Sopenharmony_ci unsigned long pfn, end_pfn = start_pfn + size; 85462306a36Sopenharmony_ci struct page *page; 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci if (highest_memmap_pfn < end_pfn - 1) 85762306a36Sopenharmony_ci highest_memmap_pfn = end_pfn - 1; 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci#ifdef CONFIG_ZONE_DEVICE 86062306a36Sopenharmony_ci /* 86162306a36Sopenharmony_ci * Honor reservation requested by the driver for this ZONE_DEVICE 86262306a36Sopenharmony_ci * memory. We limit the total number of pages to initialize to just 86362306a36Sopenharmony_ci * those that might contain the memory mapping. We will defer the 86462306a36Sopenharmony_ci * ZONE_DEVICE page initialization until after we have released 86562306a36Sopenharmony_ci * the hotplug lock. 86662306a36Sopenharmony_ci */ 86762306a36Sopenharmony_ci if (zone == ZONE_DEVICE) { 86862306a36Sopenharmony_ci if (!altmap) 86962306a36Sopenharmony_ci return; 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci if (start_pfn == altmap->base_pfn) 87262306a36Sopenharmony_ci start_pfn += altmap->reserve; 87362306a36Sopenharmony_ci end_pfn = altmap->base_pfn + vmem_altmap_offset(altmap); 87462306a36Sopenharmony_ci } 87562306a36Sopenharmony_ci#endif 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci for (pfn = start_pfn; pfn < end_pfn; ) { 87862306a36Sopenharmony_ci /* 87962306a36Sopenharmony_ci * There can be holes in boot-time mem_map[]s handed to this 88062306a36Sopenharmony_ci * function. They do not exist on hotplugged memory. 88162306a36Sopenharmony_ci */ 88262306a36Sopenharmony_ci if (context == MEMINIT_EARLY) { 88362306a36Sopenharmony_ci if (overlap_memmap_init(zone, &pfn)) 88462306a36Sopenharmony_ci continue; 88562306a36Sopenharmony_ci if (defer_init(nid, pfn, zone_end_pfn)) { 88662306a36Sopenharmony_ci deferred_struct_pages = true; 88762306a36Sopenharmony_ci break; 88862306a36Sopenharmony_ci } 88962306a36Sopenharmony_ci } 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci page = pfn_to_page(pfn); 89262306a36Sopenharmony_ci __init_single_page(page, pfn, zone, nid); 89362306a36Sopenharmony_ci if (context == MEMINIT_HOTPLUG) 89462306a36Sopenharmony_ci __SetPageReserved(page); 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci /* 89762306a36Sopenharmony_ci * Usually, we want to mark the pageblock MIGRATE_MOVABLE, 89862306a36Sopenharmony_ci * such that unmovable allocations won't be scattered all 89962306a36Sopenharmony_ci * over the place during system boot. 90062306a36Sopenharmony_ci */ 90162306a36Sopenharmony_ci if (pageblock_aligned(pfn)) { 90262306a36Sopenharmony_ci set_pageblock_migratetype(page, migratetype); 90362306a36Sopenharmony_ci cond_resched(); 90462306a36Sopenharmony_ci } 90562306a36Sopenharmony_ci pfn++; 90662306a36Sopenharmony_ci } 90762306a36Sopenharmony_ci} 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_cistatic void __init memmap_init_zone_range(struct zone *zone, 91062306a36Sopenharmony_ci unsigned long start_pfn, 91162306a36Sopenharmony_ci unsigned long end_pfn, 91262306a36Sopenharmony_ci unsigned long *hole_pfn) 91362306a36Sopenharmony_ci{ 91462306a36Sopenharmony_ci unsigned long zone_start_pfn = zone->zone_start_pfn; 91562306a36Sopenharmony_ci unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages; 91662306a36Sopenharmony_ci int nid = zone_to_nid(zone), zone_id = zone_idx(zone); 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn); 91962306a36Sopenharmony_ci end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn); 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci if (start_pfn >= end_pfn) 92262306a36Sopenharmony_ci return; 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn, 92562306a36Sopenharmony_ci zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_ci if (*hole_pfn < start_pfn) 92862306a36Sopenharmony_ci init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid); 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci *hole_pfn = end_pfn; 93162306a36Sopenharmony_ci} 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_cistatic void __init memmap_init(void) 93462306a36Sopenharmony_ci{ 93562306a36Sopenharmony_ci unsigned long start_pfn, end_pfn; 93662306a36Sopenharmony_ci unsigned long hole_pfn = 0; 93762306a36Sopenharmony_ci int i, j, zone_id = 0, nid; 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { 94062306a36Sopenharmony_ci struct pglist_data *node = NODE_DATA(nid); 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci for (j = 0; j < MAX_NR_ZONES; j++) { 94362306a36Sopenharmony_ci struct zone *zone = node->node_zones + j; 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ci if (!populated_zone(zone)) 94662306a36Sopenharmony_ci continue; 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_ci memmap_init_zone_range(zone, start_pfn, end_pfn, 94962306a36Sopenharmony_ci &hole_pfn); 95062306a36Sopenharmony_ci zone_id = j; 95162306a36Sopenharmony_ci } 95262306a36Sopenharmony_ci } 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM 95562306a36Sopenharmony_ci /* 95662306a36Sopenharmony_ci * Initialize the memory map for hole in the range [memory_end, 95762306a36Sopenharmony_ci * section_end]. 95862306a36Sopenharmony_ci * Append the pages in this hole to the highest zone in the last 95962306a36Sopenharmony_ci * node. 96062306a36Sopenharmony_ci * The call to init_unavailable_range() is outside the ifdef to 96162306a36Sopenharmony_ci * silence the compiler warining about zone_id set but not used; 96262306a36Sopenharmony_ci * for FLATMEM it is a nop anyway 96362306a36Sopenharmony_ci */ 96462306a36Sopenharmony_ci end_pfn = round_up(end_pfn, PAGES_PER_SECTION); 96562306a36Sopenharmony_ci if (hole_pfn < end_pfn) 96662306a36Sopenharmony_ci#endif 96762306a36Sopenharmony_ci init_unavailable_range(hole_pfn, end_pfn, zone_id, nid); 96862306a36Sopenharmony_ci} 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci#ifdef CONFIG_ZONE_DEVICE 97162306a36Sopenharmony_cistatic void __ref __init_zone_device_page(struct page *page, unsigned long pfn, 97262306a36Sopenharmony_ci unsigned long zone_idx, int nid, 97362306a36Sopenharmony_ci struct dev_pagemap *pgmap) 97462306a36Sopenharmony_ci{ 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci __init_single_page(page, pfn, zone_idx, nid); 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci /* 97962306a36Sopenharmony_ci * Mark page reserved as it will need to wait for onlining 98062306a36Sopenharmony_ci * phase for it to be fully associated with a zone. 98162306a36Sopenharmony_ci * 98262306a36Sopenharmony_ci * We can use the non-atomic __set_bit operation for setting 98362306a36Sopenharmony_ci * the flag as we are still initializing the pages. 98462306a36Sopenharmony_ci */ 98562306a36Sopenharmony_ci __SetPageReserved(page); 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci /* 98862306a36Sopenharmony_ci * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer 98962306a36Sopenharmony_ci * and zone_device_data. It is a bug if a ZONE_DEVICE page is 99062306a36Sopenharmony_ci * ever freed or placed on a driver-private list. 99162306a36Sopenharmony_ci */ 99262306a36Sopenharmony_ci page->pgmap = pgmap; 99362306a36Sopenharmony_ci page->zone_device_data = NULL; 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_ci /* 99662306a36Sopenharmony_ci * Mark the block movable so that blocks are reserved for 99762306a36Sopenharmony_ci * movable at startup. This will force kernel allocations 99862306a36Sopenharmony_ci * to reserve their blocks rather than leaking throughout 99962306a36Sopenharmony_ci * the address space during boot when many long-lived 100062306a36Sopenharmony_ci * kernel allocations are made. 100162306a36Sopenharmony_ci * 100262306a36Sopenharmony_ci * Please note that MEMINIT_HOTPLUG path doesn't clear memmap 100362306a36Sopenharmony_ci * because this is done early in section_activate() 100462306a36Sopenharmony_ci */ 100562306a36Sopenharmony_ci if (pageblock_aligned(pfn)) { 100662306a36Sopenharmony_ci set_pageblock_migratetype(page, MIGRATE_MOVABLE); 100762306a36Sopenharmony_ci cond_resched(); 100862306a36Sopenharmony_ci } 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci /* 101162306a36Sopenharmony_ci * ZONE_DEVICE pages are released directly to the driver page allocator 101262306a36Sopenharmony_ci * which will set the page count to 1 when allocating the page. 101362306a36Sopenharmony_ci */ 101462306a36Sopenharmony_ci if (pgmap->type == MEMORY_DEVICE_PRIVATE || 101562306a36Sopenharmony_ci pgmap->type == MEMORY_DEVICE_COHERENT) 101662306a36Sopenharmony_ci set_page_count(page, 0); 101762306a36Sopenharmony_ci} 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci/* 102062306a36Sopenharmony_ci * With compound page geometry and when struct pages are stored in ram most 102162306a36Sopenharmony_ci * tail pages are reused. Consequently, the amount of unique struct pages to 102262306a36Sopenharmony_ci * initialize is a lot smaller that the total amount of struct pages being 102362306a36Sopenharmony_ci * mapped. This is a paired / mild layering violation with explicit knowledge 102462306a36Sopenharmony_ci * of how the sparse_vmemmap internals handle compound pages in the lack 102562306a36Sopenharmony_ci * of an altmap. See vmemmap_populate_compound_pages(). 102662306a36Sopenharmony_ci */ 102762306a36Sopenharmony_cistatic inline unsigned long compound_nr_pages(struct vmem_altmap *altmap, 102862306a36Sopenharmony_ci struct dev_pagemap *pgmap) 102962306a36Sopenharmony_ci{ 103062306a36Sopenharmony_ci if (!vmemmap_can_optimize(altmap, pgmap)) 103162306a36Sopenharmony_ci return pgmap_vmemmap_nr(pgmap); 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_ci return VMEMMAP_RESERVE_NR * (PAGE_SIZE / sizeof(struct page)); 103462306a36Sopenharmony_ci} 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_cistatic void __ref memmap_init_compound(struct page *head, 103762306a36Sopenharmony_ci unsigned long head_pfn, 103862306a36Sopenharmony_ci unsigned long zone_idx, int nid, 103962306a36Sopenharmony_ci struct dev_pagemap *pgmap, 104062306a36Sopenharmony_ci unsigned long nr_pages) 104162306a36Sopenharmony_ci{ 104262306a36Sopenharmony_ci unsigned long pfn, end_pfn = head_pfn + nr_pages; 104362306a36Sopenharmony_ci unsigned int order = pgmap->vmemmap_shift; 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci __SetPageHead(head); 104662306a36Sopenharmony_ci for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) { 104762306a36Sopenharmony_ci struct page *page = pfn_to_page(pfn); 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ci __init_zone_device_page(page, pfn, zone_idx, nid, pgmap); 105062306a36Sopenharmony_ci prep_compound_tail(head, pfn - head_pfn); 105162306a36Sopenharmony_ci set_page_count(page, 0); 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ci /* 105462306a36Sopenharmony_ci * The first tail page stores important compound page info. 105562306a36Sopenharmony_ci * Call prep_compound_head() after the first tail page has 105662306a36Sopenharmony_ci * been initialized, to not have the data overwritten. 105762306a36Sopenharmony_ci */ 105862306a36Sopenharmony_ci if (pfn == head_pfn + 1) 105962306a36Sopenharmony_ci prep_compound_head(head, order); 106062306a36Sopenharmony_ci } 106162306a36Sopenharmony_ci} 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_civoid __ref memmap_init_zone_device(struct zone *zone, 106462306a36Sopenharmony_ci unsigned long start_pfn, 106562306a36Sopenharmony_ci unsigned long nr_pages, 106662306a36Sopenharmony_ci struct dev_pagemap *pgmap) 106762306a36Sopenharmony_ci{ 106862306a36Sopenharmony_ci unsigned long pfn, end_pfn = start_pfn + nr_pages; 106962306a36Sopenharmony_ci struct pglist_data *pgdat = zone->zone_pgdat; 107062306a36Sopenharmony_ci struct vmem_altmap *altmap = pgmap_altmap(pgmap); 107162306a36Sopenharmony_ci unsigned int pfns_per_compound = pgmap_vmemmap_nr(pgmap); 107262306a36Sopenharmony_ci unsigned long zone_idx = zone_idx(zone); 107362306a36Sopenharmony_ci unsigned long start = jiffies; 107462306a36Sopenharmony_ci int nid = pgdat->node_id; 107562306a36Sopenharmony_ci 107662306a36Sopenharmony_ci if (WARN_ON_ONCE(!pgmap || zone_idx != ZONE_DEVICE)) 107762306a36Sopenharmony_ci return; 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_ci /* 108062306a36Sopenharmony_ci * The call to memmap_init should have already taken care 108162306a36Sopenharmony_ci * of the pages reserved for the memmap, so we can just jump to 108262306a36Sopenharmony_ci * the end of that region and start processing the device pages. 108362306a36Sopenharmony_ci */ 108462306a36Sopenharmony_ci if (altmap) { 108562306a36Sopenharmony_ci start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap); 108662306a36Sopenharmony_ci nr_pages = end_pfn - start_pfn; 108762306a36Sopenharmony_ci } 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_ci for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) { 109062306a36Sopenharmony_ci struct page *page = pfn_to_page(pfn); 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_ci __init_zone_device_page(page, pfn, zone_idx, nid, pgmap); 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ci if (pfns_per_compound == 1) 109562306a36Sopenharmony_ci continue; 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci memmap_init_compound(page, pfn, zone_idx, nid, pgmap, 109862306a36Sopenharmony_ci compound_nr_pages(altmap, pgmap)); 109962306a36Sopenharmony_ci } 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_ci pr_debug("%s initialised %lu pages in %ums\n", __func__, 110262306a36Sopenharmony_ci nr_pages, jiffies_to_msecs(jiffies - start)); 110362306a36Sopenharmony_ci} 110462306a36Sopenharmony_ci#endif 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci/* 110762306a36Sopenharmony_ci * The zone ranges provided by the architecture do not include ZONE_MOVABLE 110862306a36Sopenharmony_ci * because it is sized independent of architecture. Unlike the other zones, 110962306a36Sopenharmony_ci * the starting point for ZONE_MOVABLE is not fixed. It may be different 111062306a36Sopenharmony_ci * in each node depending on the size of each node and how evenly kernelcore 111162306a36Sopenharmony_ci * is distributed. This helper function adjusts the zone ranges 111262306a36Sopenharmony_ci * provided by the architecture for a given node by using the end of the 111362306a36Sopenharmony_ci * highest usable zone for ZONE_MOVABLE. This preserves the assumption that 111462306a36Sopenharmony_ci * zones within a node are in order of monotonic increases memory addresses 111562306a36Sopenharmony_ci */ 111662306a36Sopenharmony_cistatic void __init adjust_zone_range_for_zone_movable(int nid, 111762306a36Sopenharmony_ci unsigned long zone_type, 111862306a36Sopenharmony_ci unsigned long node_end_pfn, 111962306a36Sopenharmony_ci unsigned long *zone_start_pfn, 112062306a36Sopenharmony_ci unsigned long *zone_end_pfn) 112162306a36Sopenharmony_ci{ 112262306a36Sopenharmony_ci /* Only adjust if ZONE_MOVABLE is on this node */ 112362306a36Sopenharmony_ci if (zone_movable_pfn[nid]) { 112462306a36Sopenharmony_ci /* Size ZONE_MOVABLE */ 112562306a36Sopenharmony_ci if (zone_type == ZONE_MOVABLE) { 112662306a36Sopenharmony_ci *zone_start_pfn = zone_movable_pfn[nid]; 112762306a36Sopenharmony_ci *zone_end_pfn = min(node_end_pfn, 112862306a36Sopenharmony_ci arch_zone_highest_possible_pfn[movable_zone]); 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci /* Adjust for ZONE_MOVABLE starting within this range */ 113162306a36Sopenharmony_ci } else if (!mirrored_kernelcore && 113262306a36Sopenharmony_ci *zone_start_pfn < zone_movable_pfn[nid] && 113362306a36Sopenharmony_ci *zone_end_pfn > zone_movable_pfn[nid]) { 113462306a36Sopenharmony_ci *zone_end_pfn = zone_movable_pfn[nid]; 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_ci /* Check if this whole range is within ZONE_MOVABLE */ 113762306a36Sopenharmony_ci } else if (*zone_start_pfn >= zone_movable_pfn[nid]) 113862306a36Sopenharmony_ci *zone_start_pfn = *zone_end_pfn; 113962306a36Sopenharmony_ci } 114062306a36Sopenharmony_ci} 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci/* 114362306a36Sopenharmony_ci * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, 114462306a36Sopenharmony_ci * then all holes in the requested range will be accounted for. 114562306a36Sopenharmony_ci */ 114662306a36Sopenharmony_ciunsigned long __init __absent_pages_in_range(int nid, 114762306a36Sopenharmony_ci unsigned long range_start_pfn, 114862306a36Sopenharmony_ci unsigned long range_end_pfn) 114962306a36Sopenharmony_ci{ 115062306a36Sopenharmony_ci unsigned long nr_absent = range_end_pfn - range_start_pfn; 115162306a36Sopenharmony_ci unsigned long start_pfn, end_pfn; 115262306a36Sopenharmony_ci int i; 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { 115562306a36Sopenharmony_ci start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn); 115662306a36Sopenharmony_ci end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn); 115762306a36Sopenharmony_ci nr_absent -= end_pfn - start_pfn; 115862306a36Sopenharmony_ci } 115962306a36Sopenharmony_ci return nr_absent; 116062306a36Sopenharmony_ci} 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci/** 116362306a36Sopenharmony_ci * absent_pages_in_range - Return number of page frames in holes within a range 116462306a36Sopenharmony_ci * @start_pfn: The start PFN to start searching for holes 116562306a36Sopenharmony_ci * @end_pfn: The end PFN to stop searching for holes 116662306a36Sopenharmony_ci * 116762306a36Sopenharmony_ci * Return: the number of pages frames in memory holes within a range. 116862306a36Sopenharmony_ci */ 116962306a36Sopenharmony_ciunsigned long __init absent_pages_in_range(unsigned long start_pfn, 117062306a36Sopenharmony_ci unsigned long end_pfn) 117162306a36Sopenharmony_ci{ 117262306a36Sopenharmony_ci return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn); 117362306a36Sopenharmony_ci} 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci/* Return the number of page frames in holes in a zone on a node */ 117662306a36Sopenharmony_cistatic unsigned long __init zone_absent_pages_in_node(int nid, 117762306a36Sopenharmony_ci unsigned long zone_type, 117862306a36Sopenharmony_ci unsigned long zone_start_pfn, 117962306a36Sopenharmony_ci unsigned long zone_end_pfn) 118062306a36Sopenharmony_ci{ 118162306a36Sopenharmony_ci unsigned long nr_absent; 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci /* zone is empty, we don't have any absent pages */ 118462306a36Sopenharmony_ci if (zone_start_pfn == zone_end_pfn) 118562306a36Sopenharmony_ci return 0; 118662306a36Sopenharmony_ci 118762306a36Sopenharmony_ci nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); 118862306a36Sopenharmony_ci 118962306a36Sopenharmony_ci /* 119062306a36Sopenharmony_ci * ZONE_MOVABLE handling. 119162306a36Sopenharmony_ci * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages 119262306a36Sopenharmony_ci * and vice versa. 119362306a36Sopenharmony_ci */ 119462306a36Sopenharmony_ci if (mirrored_kernelcore && zone_movable_pfn[nid]) { 119562306a36Sopenharmony_ci unsigned long start_pfn, end_pfn; 119662306a36Sopenharmony_ci struct memblock_region *r; 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci for_each_mem_region(r) { 119962306a36Sopenharmony_ci start_pfn = clamp(memblock_region_memory_base_pfn(r), 120062306a36Sopenharmony_ci zone_start_pfn, zone_end_pfn); 120162306a36Sopenharmony_ci end_pfn = clamp(memblock_region_memory_end_pfn(r), 120262306a36Sopenharmony_ci zone_start_pfn, zone_end_pfn); 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci if (zone_type == ZONE_MOVABLE && 120562306a36Sopenharmony_ci memblock_is_mirror(r)) 120662306a36Sopenharmony_ci nr_absent += end_pfn - start_pfn; 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci if (zone_type == ZONE_NORMAL && 120962306a36Sopenharmony_ci !memblock_is_mirror(r)) 121062306a36Sopenharmony_ci nr_absent += end_pfn - start_pfn; 121162306a36Sopenharmony_ci } 121262306a36Sopenharmony_ci } 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci return nr_absent; 121562306a36Sopenharmony_ci} 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_ci/* 121862306a36Sopenharmony_ci * Return the number of pages a zone spans in a node, including holes 121962306a36Sopenharmony_ci * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node() 122062306a36Sopenharmony_ci */ 122162306a36Sopenharmony_cistatic unsigned long __init zone_spanned_pages_in_node(int nid, 122262306a36Sopenharmony_ci unsigned long zone_type, 122362306a36Sopenharmony_ci unsigned long node_start_pfn, 122462306a36Sopenharmony_ci unsigned long node_end_pfn, 122562306a36Sopenharmony_ci unsigned long *zone_start_pfn, 122662306a36Sopenharmony_ci unsigned long *zone_end_pfn) 122762306a36Sopenharmony_ci{ 122862306a36Sopenharmony_ci unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type]; 122962306a36Sopenharmony_ci unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; 123062306a36Sopenharmony_ci 123162306a36Sopenharmony_ci /* Get the start and end of the zone */ 123262306a36Sopenharmony_ci *zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high); 123362306a36Sopenharmony_ci *zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high); 123462306a36Sopenharmony_ci adjust_zone_range_for_zone_movable(nid, zone_type, node_end_pfn, 123562306a36Sopenharmony_ci zone_start_pfn, zone_end_pfn); 123662306a36Sopenharmony_ci 123762306a36Sopenharmony_ci /* Check that this node has pages within the zone's required range */ 123862306a36Sopenharmony_ci if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn) 123962306a36Sopenharmony_ci return 0; 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_ci /* Move the zone boundaries inside the node if necessary */ 124262306a36Sopenharmony_ci *zone_end_pfn = min(*zone_end_pfn, node_end_pfn); 124362306a36Sopenharmony_ci *zone_start_pfn = max(*zone_start_pfn, node_start_pfn); 124462306a36Sopenharmony_ci 124562306a36Sopenharmony_ci /* Return the spanned pages */ 124662306a36Sopenharmony_ci return *zone_end_pfn - *zone_start_pfn; 124762306a36Sopenharmony_ci} 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_cistatic void __init reset_memoryless_node_totalpages(struct pglist_data *pgdat) 125062306a36Sopenharmony_ci{ 125162306a36Sopenharmony_ci struct zone *z; 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_ci for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) { 125462306a36Sopenharmony_ci z->zone_start_pfn = 0; 125562306a36Sopenharmony_ci z->spanned_pages = 0; 125662306a36Sopenharmony_ci z->present_pages = 0; 125762306a36Sopenharmony_ci#if defined(CONFIG_MEMORY_HOTPLUG) 125862306a36Sopenharmony_ci z->present_early_pages = 0; 125962306a36Sopenharmony_ci#endif 126062306a36Sopenharmony_ci } 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci pgdat->node_spanned_pages = 0; 126362306a36Sopenharmony_ci pgdat->node_present_pages = 0; 126462306a36Sopenharmony_ci pr_debug("On node %d totalpages: 0\n", pgdat->node_id); 126562306a36Sopenharmony_ci} 126662306a36Sopenharmony_ci 126762306a36Sopenharmony_cistatic void __init calculate_node_totalpages(struct pglist_data *pgdat, 126862306a36Sopenharmony_ci unsigned long node_start_pfn, 126962306a36Sopenharmony_ci unsigned long node_end_pfn) 127062306a36Sopenharmony_ci{ 127162306a36Sopenharmony_ci unsigned long realtotalpages = 0, totalpages = 0; 127262306a36Sopenharmony_ci enum zone_type i; 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_ci for (i = 0; i < MAX_NR_ZONES; i++) { 127562306a36Sopenharmony_ci struct zone *zone = pgdat->node_zones + i; 127662306a36Sopenharmony_ci unsigned long zone_start_pfn, zone_end_pfn; 127762306a36Sopenharmony_ci unsigned long spanned, absent; 127862306a36Sopenharmony_ci unsigned long real_size; 127962306a36Sopenharmony_ci 128062306a36Sopenharmony_ci spanned = zone_spanned_pages_in_node(pgdat->node_id, i, 128162306a36Sopenharmony_ci node_start_pfn, 128262306a36Sopenharmony_ci node_end_pfn, 128362306a36Sopenharmony_ci &zone_start_pfn, 128462306a36Sopenharmony_ci &zone_end_pfn); 128562306a36Sopenharmony_ci absent = zone_absent_pages_in_node(pgdat->node_id, i, 128662306a36Sopenharmony_ci zone_start_pfn, 128762306a36Sopenharmony_ci zone_end_pfn); 128862306a36Sopenharmony_ci 128962306a36Sopenharmony_ci real_size = spanned - absent; 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci if (spanned) 129262306a36Sopenharmony_ci zone->zone_start_pfn = zone_start_pfn; 129362306a36Sopenharmony_ci else 129462306a36Sopenharmony_ci zone->zone_start_pfn = 0; 129562306a36Sopenharmony_ci zone->spanned_pages = spanned; 129662306a36Sopenharmony_ci zone->present_pages = real_size; 129762306a36Sopenharmony_ci#if defined(CONFIG_MEMORY_HOTPLUG) 129862306a36Sopenharmony_ci zone->present_early_pages = real_size; 129962306a36Sopenharmony_ci#endif 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ci totalpages += spanned; 130262306a36Sopenharmony_ci realtotalpages += real_size; 130362306a36Sopenharmony_ci } 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_ci pgdat->node_spanned_pages = totalpages; 130662306a36Sopenharmony_ci pgdat->node_present_pages = realtotalpages; 130762306a36Sopenharmony_ci pr_debug("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); 130862306a36Sopenharmony_ci} 130962306a36Sopenharmony_ci 131062306a36Sopenharmony_cistatic unsigned long __init calc_memmap_size(unsigned long spanned_pages, 131162306a36Sopenharmony_ci unsigned long present_pages) 131262306a36Sopenharmony_ci{ 131362306a36Sopenharmony_ci unsigned long pages = spanned_pages; 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ci /* 131662306a36Sopenharmony_ci * Provide a more accurate estimation if there are holes within 131762306a36Sopenharmony_ci * the zone and SPARSEMEM is in use. If there are holes within the 131862306a36Sopenharmony_ci * zone, each populated memory region may cost us one or two extra 131962306a36Sopenharmony_ci * memmap pages due to alignment because memmap pages for each 132062306a36Sopenharmony_ci * populated regions may not be naturally aligned on page boundary. 132162306a36Sopenharmony_ci * So the (present_pages >> 4) heuristic is a tradeoff for that. 132262306a36Sopenharmony_ci */ 132362306a36Sopenharmony_ci if (spanned_pages > present_pages + (present_pages >> 4) && 132462306a36Sopenharmony_ci IS_ENABLED(CONFIG_SPARSEMEM)) 132562306a36Sopenharmony_ci pages = present_pages; 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_ci return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT; 132862306a36Sopenharmony_ci} 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 133162306a36Sopenharmony_cistatic void pgdat_init_split_queue(struct pglist_data *pgdat) 133262306a36Sopenharmony_ci{ 133362306a36Sopenharmony_ci struct deferred_split *ds_queue = &pgdat->deferred_split_queue; 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci spin_lock_init(&ds_queue->split_queue_lock); 133662306a36Sopenharmony_ci INIT_LIST_HEAD(&ds_queue->split_queue); 133762306a36Sopenharmony_ci ds_queue->split_queue_len = 0; 133862306a36Sopenharmony_ci} 133962306a36Sopenharmony_ci#else 134062306a36Sopenharmony_cistatic void pgdat_init_split_queue(struct pglist_data *pgdat) {} 134162306a36Sopenharmony_ci#endif 134262306a36Sopenharmony_ci 134362306a36Sopenharmony_ci#ifdef CONFIG_COMPACTION 134462306a36Sopenharmony_cistatic void pgdat_init_kcompactd(struct pglist_data *pgdat) 134562306a36Sopenharmony_ci{ 134662306a36Sopenharmony_ci init_waitqueue_head(&pgdat->kcompactd_wait); 134762306a36Sopenharmony_ci} 134862306a36Sopenharmony_ci#else 134962306a36Sopenharmony_cistatic void pgdat_init_kcompactd(struct pglist_data *pgdat) {} 135062306a36Sopenharmony_ci#endif 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_cistatic void __meminit pgdat_init_internals(struct pglist_data *pgdat) 135362306a36Sopenharmony_ci{ 135462306a36Sopenharmony_ci int i; 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci pgdat_resize_init(pgdat); 135762306a36Sopenharmony_ci pgdat_kswapd_lock_init(pgdat); 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci pgdat_init_split_queue(pgdat); 136062306a36Sopenharmony_ci pgdat_init_kcompactd(pgdat); 136162306a36Sopenharmony_ci 136262306a36Sopenharmony_ci init_waitqueue_head(&pgdat->kswapd_wait); 136362306a36Sopenharmony_ci init_waitqueue_head(&pgdat->pfmemalloc_wait); 136462306a36Sopenharmony_ci#ifdef CONFIG_HYPERHOLD_ZSWAPD 136562306a36Sopenharmony_ci init_waitqueue_head(&pgdat->zswapd_wait); 136662306a36Sopenharmony_ci#endif 136762306a36Sopenharmony_ci 136862306a36Sopenharmony_ci for (i = 0; i < NR_VMSCAN_THROTTLE; i++) 136962306a36Sopenharmony_ci init_waitqueue_head(&pgdat->reclaim_wait[i]); 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci pgdat_page_ext_init(pgdat); 137262306a36Sopenharmony_ci lruvec_init(&pgdat->__lruvec); 137362306a36Sopenharmony_ci#if defined(CONFIG_HYPERHOLD_FILE_LRU) && defined(CONFIG_MEMCG) 137462306a36Sopenharmony_ci pgdat->__lruvec.pgdat = pgdat; 137562306a36Sopenharmony_ci#endif 137662306a36Sopenharmony_ci} 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_cistatic void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid, 137962306a36Sopenharmony_ci unsigned long remaining_pages) 138062306a36Sopenharmony_ci{ 138162306a36Sopenharmony_ci atomic_long_set(&zone->managed_pages, remaining_pages); 138262306a36Sopenharmony_ci zone_set_nid(zone, nid); 138362306a36Sopenharmony_ci zone->name = zone_names[idx]; 138462306a36Sopenharmony_ci zone->zone_pgdat = NODE_DATA(nid); 138562306a36Sopenharmony_ci spin_lock_init(&zone->lock); 138662306a36Sopenharmony_ci zone_seqlock_init(zone); 138762306a36Sopenharmony_ci zone_pcp_init(zone); 138862306a36Sopenharmony_ci} 138962306a36Sopenharmony_ci 139062306a36Sopenharmony_cistatic void __meminit zone_init_free_lists(struct zone *zone) 139162306a36Sopenharmony_ci{ 139262306a36Sopenharmony_ci unsigned int order, t; 139362306a36Sopenharmony_ci for_each_migratetype_order(order, t) { 139462306a36Sopenharmony_ci INIT_LIST_HEAD(&zone->free_area[order].free_list[t]); 139562306a36Sopenharmony_ci zone->free_area[order].nr_free = 0; 139662306a36Sopenharmony_ci } 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci#ifdef CONFIG_UNACCEPTED_MEMORY 139962306a36Sopenharmony_ci INIT_LIST_HEAD(&zone->unaccepted_pages); 140062306a36Sopenharmony_ci#endif 140162306a36Sopenharmony_ci} 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_civoid __meminit init_currently_empty_zone(struct zone *zone, 140462306a36Sopenharmony_ci unsigned long zone_start_pfn, 140562306a36Sopenharmony_ci unsigned long size) 140662306a36Sopenharmony_ci{ 140762306a36Sopenharmony_ci struct pglist_data *pgdat = zone->zone_pgdat; 140862306a36Sopenharmony_ci int zone_idx = zone_idx(zone) + 1; 140962306a36Sopenharmony_ci 141062306a36Sopenharmony_ci if (zone_idx > pgdat->nr_zones) 141162306a36Sopenharmony_ci pgdat->nr_zones = zone_idx; 141262306a36Sopenharmony_ci 141362306a36Sopenharmony_ci zone->zone_start_pfn = zone_start_pfn; 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_ci mminit_dprintk(MMINIT_TRACE, "memmap_init", 141662306a36Sopenharmony_ci "Initialising map node %d zone %lu pfns %lu -> %lu\n", 141762306a36Sopenharmony_ci pgdat->node_id, 141862306a36Sopenharmony_ci (unsigned long)zone_idx(zone), 141962306a36Sopenharmony_ci zone_start_pfn, (zone_start_pfn + size)); 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_ci zone_init_free_lists(zone); 142262306a36Sopenharmony_ci zone->initialized = 1; 142362306a36Sopenharmony_ci} 142462306a36Sopenharmony_ci 142562306a36Sopenharmony_ci#ifndef CONFIG_SPARSEMEM 142662306a36Sopenharmony_ci/* 142762306a36Sopenharmony_ci * Calculate the size of the zone->blockflags rounded to an unsigned long 142862306a36Sopenharmony_ci * Start by making sure zonesize is a multiple of pageblock_order by rounding 142962306a36Sopenharmony_ci * up. Then use 1 NR_PAGEBLOCK_BITS worth of bits per pageblock, finally 143062306a36Sopenharmony_ci * round what is now in bits to nearest long in bits, then return it in 143162306a36Sopenharmony_ci * bytes. 143262306a36Sopenharmony_ci */ 143362306a36Sopenharmony_cistatic unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize) 143462306a36Sopenharmony_ci{ 143562306a36Sopenharmony_ci unsigned long usemapsize; 143662306a36Sopenharmony_ci 143762306a36Sopenharmony_ci zonesize += zone_start_pfn & (pageblock_nr_pages-1); 143862306a36Sopenharmony_ci usemapsize = roundup(zonesize, pageblock_nr_pages); 143962306a36Sopenharmony_ci usemapsize = usemapsize >> pageblock_order; 144062306a36Sopenharmony_ci usemapsize *= NR_PAGEBLOCK_BITS; 144162306a36Sopenharmony_ci usemapsize = roundup(usemapsize, BITS_PER_LONG); 144262306a36Sopenharmony_ci 144362306a36Sopenharmony_ci return usemapsize / BITS_PER_BYTE; 144462306a36Sopenharmony_ci} 144562306a36Sopenharmony_ci 144662306a36Sopenharmony_cistatic void __ref setup_usemap(struct zone *zone) 144762306a36Sopenharmony_ci{ 144862306a36Sopenharmony_ci unsigned long usemapsize = usemap_size(zone->zone_start_pfn, 144962306a36Sopenharmony_ci zone->spanned_pages); 145062306a36Sopenharmony_ci zone->pageblock_flags = NULL; 145162306a36Sopenharmony_ci if (usemapsize) { 145262306a36Sopenharmony_ci zone->pageblock_flags = 145362306a36Sopenharmony_ci memblock_alloc_node(usemapsize, SMP_CACHE_BYTES, 145462306a36Sopenharmony_ci zone_to_nid(zone)); 145562306a36Sopenharmony_ci if (!zone->pageblock_flags) 145662306a36Sopenharmony_ci panic("Failed to allocate %ld bytes for zone %s pageblock flags on node %d\n", 145762306a36Sopenharmony_ci usemapsize, zone->name, zone_to_nid(zone)); 145862306a36Sopenharmony_ci } 145962306a36Sopenharmony_ci} 146062306a36Sopenharmony_ci#else 146162306a36Sopenharmony_cistatic inline void setup_usemap(struct zone *zone) {} 146262306a36Sopenharmony_ci#endif /* CONFIG_SPARSEMEM */ 146362306a36Sopenharmony_ci 146462306a36Sopenharmony_ci#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ 146762306a36Sopenharmony_civoid __init set_pageblock_order(void) 146862306a36Sopenharmony_ci{ 146962306a36Sopenharmony_ci unsigned int order = MAX_ORDER; 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_ci /* Check that pageblock_nr_pages has not already been setup */ 147262306a36Sopenharmony_ci if (pageblock_order) 147362306a36Sopenharmony_ci return; 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci /* Don't let pageblocks exceed the maximum allocation granularity. */ 147662306a36Sopenharmony_ci if (HPAGE_SHIFT > PAGE_SHIFT && HUGETLB_PAGE_ORDER < order) 147762306a36Sopenharmony_ci order = HUGETLB_PAGE_ORDER; 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci /* 148062306a36Sopenharmony_ci * Assume the largest contiguous order of interest is a huge page. 148162306a36Sopenharmony_ci * This value may be variable depending on boot parameters on IA64 and 148262306a36Sopenharmony_ci * powerpc. 148362306a36Sopenharmony_ci */ 148462306a36Sopenharmony_ci pageblock_order = order; 148562306a36Sopenharmony_ci} 148662306a36Sopenharmony_ci#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ 148762306a36Sopenharmony_ci 148862306a36Sopenharmony_ci/* 148962306a36Sopenharmony_ci * When CONFIG_HUGETLB_PAGE_SIZE_VARIABLE is not set, set_pageblock_order() 149062306a36Sopenharmony_ci * is unused as pageblock_order is set at compile-time. See 149162306a36Sopenharmony_ci * include/linux/pageblock-flags.h for the values of pageblock_order based on 149262306a36Sopenharmony_ci * the kernel config 149362306a36Sopenharmony_ci */ 149462306a36Sopenharmony_civoid __init set_pageblock_order(void) 149562306a36Sopenharmony_ci{ 149662306a36Sopenharmony_ci} 149762306a36Sopenharmony_ci 149862306a36Sopenharmony_ci#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci/* 150162306a36Sopenharmony_ci * Set up the zone data structures 150262306a36Sopenharmony_ci * - init pgdat internals 150362306a36Sopenharmony_ci * - init all zones belonging to this node 150462306a36Sopenharmony_ci * 150562306a36Sopenharmony_ci * NOTE: this function is only called during memory hotplug 150662306a36Sopenharmony_ci */ 150762306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 150862306a36Sopenharmony_civoid __ref free_area_init_core_hotplug(struct pglist_data *pgdat) 150962306a36Sopenharmony_ci{ 151062306a36Sopenharmony_ci int nid = pgdat->node_id; 151162306a36Sopenharmony_ci enum zone_type z; 151262306a36Sopenharmony_ci int cpu; 151362306a36Sopenharmony_ci 151462306a36Sopenharmony_ci pgdat_init_internals(pgdat); 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ci if (pgdat->per_cpu_nodestats == &boot_nodestats) 151762306a36Sopenharmony_ci pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); 151862306a36Sopenharmony_ci 151962306a36Sopenharmony_ci /* 152062306a36Sopenharmony_ci * Reset the nr_zones, order and highest_zoneidx before reuse. 152162306a36Sopenharmony_ci * Note that kswapd will init kswapd_highest_zoneidx properly 152262306a36Sopenharmony_ci * when it starts in the near future. 152362306a36Sopenharmony_ci */ 152462306a36Sopenharmony_ci pgdat->nr_zones = 0; 152562306a36Sopenharmony_ci pgdat->kswapd_order = 0; 152662306a36Sopenharmony_ci pgdat->kswapd_highest_zoneidx = 0; 152762306a36Sopenharmony_ci pgdat->node_start_pfn = 0; 152862306a36Sopenharmony_ci pgdat->node_present_pages = 0; 152962306a36Sopenharmony_ci 153062306a36Sopenharmony_ci for_each_online_cpu(cpu) { 153162306a36Sopenharmony_ci struct per_cpu_nodestat *p; 153262306a36Sopenharmony_ci 153362306a36Sopenharmony_ci p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu); 153462306a36Sopenharmony_ci memset(p, 0, sizeof(*p)); 153562306a36Sopenharmony_ci } 153662306a36Sopenharmony_ci 153762306a36Sopenharmony_ci /* 153862306a36Sopenharmony_ci * When memory is hot-added, all the memory is in offline state. So 153962306a36Sopenharmony_ci * clear all zones' present_pages and managed_pages because they will 154062306a36Sopenharmony_ci * be updated in online_pages() and offline_pages(). 154162306a36Sopenharmony_ci */ 154262306a36Sopenharmony_ci for (z = 0; z < MAX_NR_ZONES; z++) { 154362306a36Sopenharmony_ci struct zone *zone = pgdat->node_zones + z; 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci zone->present_pages = 0; 154662306a36Sopenharmony_ci zone_init_internals(zone, z, nid, 0); 154762306a36Sopenharmony_ci } 154862306a36Sopenharmony_ci} 154962306a36Sopenharmony_ci#endif 155062306a36Sopenharmony_ci 155162306a36Sopenharmony_ci/* 155262306a36Sopenharmony_ci * Set up the zone data structures: 155362306a36Sopenharmony_ci * - mark all pages reserved 155462306a36Sopenharmony_ci * - mark all memory queues empty 155562306a36Sopenharmony_ci * - clear the memory bitmaps 155662306a36Sopenharmony_ci * 155762306a36Sopenharmony_ci * NOTE: pgdat should get zeroed by caller. 155862306a36Sopenharmony_ci * NOTE: this function is only called during early init. 155962306a36Sopenharmony_ci */ 156062306a36Sopenharmony_cistatic void __init free_area_init_core(struct pglist_data *pgdat) 156162306a36Sopenharmony_ci{ 156262306a36Sopenharmony_ci enum zone_type j; 156362306a36Sopenharmony_ci int nid = pgdat->node_id; 156462306a36Sopenharmony_ci 156562306a36Sopenharmony_ci pgdat_init_internals(pgdat); 156662306a36Sopenharmony_ci pgdat->per_cpu_nodestats = &boot_nodestats; 156762306a36Sopenharmony_ci 156862306a36Sopenharmony_ci for (j = 0; j < MAX_NR_ZONES; j++) { 156962306a36Sopenharmony_ci struct zone *zone = pgdat->node_zones + j; 157062306a36Sopenharmony_ci unsigned long size, freesize, memmap_pages; 157162306a36Sopenharmony_ci 157262306a36Sopenharmony_ci size = zone->spanned_pages; 157362306a36Sopenharmony_ci freesize = zone->present_pages; 157462306a36Sopenharmony_ci 157562306a36Sopenharmony_ci /* 157662306a36Sopenharmony_ci * Adjust freesize so that it accounts for how much memory 157762306a36Sopenharmony_ci * is used by this zone for memmap. This affects the watermark 157862306a36Sopenharmony_ci * and per-cpu initialisations 157962306a36Sopenharmony_ci */ 158062306a36Sopenharmony_ci memmap_pages = calc_memmap_size(size, freesize); 158162306a36Sopenharmony_ci if (!is_highmem_idx(j)) { 158262306a36Sopenharmony_ci if (freesize >= memmap_pages) { 158362306a36Sopenharmony_ci freesize -= memmap_pages; 158462306a36Sopenharmony_ci if (memmap_pages) 158562306a36Sopenharmony_ci pr_debug(" %s zone: %lu pages used for memmap\n", 158662306a36Sopenharmony_ci zone_names[j], memmap_pages); 158762306a36Sopenharmony_ci } else 158862306a36Sopenharmony_ci pr_warn(" %s zone: %lu memmap pages exceeds freesize %lu\n", 158962306a36Sopenharmony_ci zone_names[j], memmap_pages, freesize); 159062306a36Sopenharmony_ci } 159162306a36Sopenharmony_ci 159262306a36Sopenharmony_ci /* Account for reserved pages */ 159362306a36Sopenharmony_ci if (j == 0 && freesize > dma_reserve) { 159462306a36Sopenharmony_ci freesize -= dma_reserve; 159562306a36Sopenharmony_ci pr_debug(" %s zone: %lu pages reserved\n", zone_names[0], dma_reserve); 159662306a36Sopenharmony_ci } 159762306a36Sopenharmony_ci 159862306a36Sopenharmony_ci if (!is_highmem_idx(j)) 159962306a36Sopenharmony_ci nr_kernel_pages += freesize; 160062306a36Sopenharmony_ci /* Charge for highmem memmap if there are enough kernel pages */ 160162306a36Sopenharmony_ci else if (nr_kernel_pages > memmap_pages * 2) 160262306a36Sopenharmony_ci nr_kernel_pages -= memmap_pages; 160362306a36Sopenharmony_ci nr_all_pages += freesize; 160462306a36Sopenharmony_ci 160562306a36Sopenharmony_ci /* 160662306a36Sopenharmony_ci * Set an approximate value for lowmem here, it will be adjusted 160762306a36Sopenharmony_ci * when the bootmem allocator frees pages into the buddy system. 160862306a36Sopenharmony_ci * And all highmem pages will be managed by the buddy system. 160962306a36Sopenharmony_ci */ 161062306a36Sopenharmony_ci zone_init_internals(zone, j, nid, freesize); 161162306a36Sopenharmony_ci 161262306a36Sopenharmony_ci if (!size) 161362306a36Sopenharmony_ci continue; 161462306a36Sopenharmony_ci 161562306a36Sopenharmony_ci setup_usemap(zone); 161662306a36Sopenharmony_ci init_currently_empty_zone(zone, zone->zone_start_pfn, size); 161762306a36Sopenharmony_ci } 161862306a36Sopenharmony_ci} 161962306a36Sopenharmony_ci 162062306a36Sopenharmony_civoid __init *memmap_alloc(phys_addr_t size, phys_addr_t align, 162162306a36Sopenharmony_ci phys_addr_t min_addr, int nid, bool exact_nid) 162262306a36Sopenharmony_ci{ 162362306a36Sopenharmony_ci void *ptr; 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_ci if (exact_nid) 162662306a36Sopenharmony_ci ptr = memblock_alloc_exact_nid_raw(size, align, min_addr, 162762306a36Sopenharmony_ci MEMBLOCK_ALLOC_ACCESSIBLE, 162862306a36Sopenharmony_ci nid); 162962306a36Sopenharmony_ci else 163062306a36Sopenharmony_ci ptr = memblock_alloc_try_nid_raw(size, align, min_addr, 163162306a36Sopenharmony_ci MEMBLOCK_ALLOC_ACCESSIBLE, 163262306a36Sopenharmony_ci nid); 163362306a36Sopenharmony_ci 163462306a36Sopenharmony_ci if (ptr && size > 0) 163562306a36Sopenharmony_ci page_init_poison(ptr, size); 163662306a36Sopenharmony_ci 163762306a36Sopenharmony_ci return ptr; 163862306a36Sopenharmony_ci} 163962306a36Sopenharmony_ci 164062306a36Sopenharmony_ci#ifdef CONFIG_FLATMEM 164162306a36Sopenharmony_cistatic void __init alloc_node_mem_map(struct pglist_data *pgdat) 164262306a36Sopenharmony_ci{ 164362306a36Sopenharmony_ci unsigned long __maybe_unused start = 0; 164462306a36Sopenharmony_ci unsigned long __maybe_unused offset = 0; 164562306a36Sopenharmony_ci 164662306a36Sopenharmony_ci /* Skip empty nodes */ 164762306a36Sopenharmony_ci if (!pgdat->node_spanned_pages) 164862306a36Sopenharmony_ci return; 164962306a36Sopenharmony_ci 165062306a36Sopenharmony_ci start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); 165162306a36Sopenharmony_ci offset = pgdat->node_start_pfn - start; 165262306a36Sopenharmony_ci /* ia64 gets its own node_mem_map, before this, without bootmem */ 165362306a36Sopenharmony_ci if (!pgdat->node_mem_map) { 165462306a36Sopenharmony_ci unsigned long size, end; 165562306a36Sopenharmony_ci struct page *map; 165662306a36Sopenharmony_ci 165762306a36Sopenharmony_ci /* 165862306a36Sopenharmony_ci * The zone's endpoints aren't required to be MAX_ORDER 165962306a36Sopenharmony_ci * aligned but the node_mem_map endpoints must be in order 166062306a36Sopenharmony_ci * for the buddy allocator to function correctly. 166162306a36Sopenharmony_ci */ 166262306a36Sopenharmony_ci end = pgdat_end_pfn(pgdat); 166362306a36Sopenharmony_ci end = ALIGN(end, MAX_ORDER_NR_PAGES); 166462306a36Sopenharmony_ci size = (end - start) * sizeof(struct page); 166562306a36Sopenharmony_ci map = memmap_alloc(size, SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT, 166662306a36Sopenharmony_ci pgdat->node_id, false); 166762306a36Sopenharmony_ci if (!map) 166862306a36Sopenharmony_ci panic("Failed to allocate %ld bytes for node %d memory map\n", 166962306a36Sopenharmony_ci size, pgdat->node_id); 167062306a36Sopenharmony_ci pgdat->node_mem_map = map + offset; 167162306a36Sopenharmony_ci } 167262306a36Sopenharmony_ci pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", 167362306a36Sopenharmony_ci __func__, pgdat->node_id, (unsigned long)pgdat, 167462306a36Sopenharmony_ci (unsigned long)pgdat->node_mem_map); 167562306a36Sopenharmony_ci#ifndef CONFIG_NUMA 167662306a36Sopenharmony_ci /* 167762306a36Sopenharmony_ci * With no DISCONTIG, the global mem_map is just set as node 0's 167862306a36Sopenharmony_ci */ 167962306a36Sopenharmony_ci if (pgdat == NODE_DATA(0)) { 168062306a36Sopenharmony_ci mem_map = NODE_DATA(0)->node_mem_map; 168162306a36Sopenharmony_ci if (page_to_pfn(mem_map) != pgdat->node_start_pfn) 168262306a36Sopenharmony_ci mem_map -= offset; 168362306a36Sopenharmony_ci } 168462306a36Sopenharmony_ci#endif 168562306a36Sopenharmony_ci} 168662306a36Sopenharmony_ci#else 168762306a36Sopenharmony_cistatic inline void alloc_node_mem_map(struct pglist_data *pgdat) { } 168862306a36Sopenharmony_ci#endif /* CONFIG_FLATMEM */ 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci/** 169162306a36Sopenharmony_ci * get_pfn_range_for_nid - Return the start and end page frames for a node 169262306a36Sopenharmony_ci * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. 169362306a36Sopenharmony_ci * @start_pfn: Passed by reference. On return, it will have the node start_pfn. 169462306a36Sopenharmony_ci * @end_pfn: Passed by reference. On return, it will have the node end_pfn. 169562306a36Sopenharmony_ci * 169662306a36Sopenharmony_ci * It returns the start and end page frame of a node based on information 169762306a36Sopenharmony_ci * provided by memblock_set_node(). If called for a node 169862306a36Sopenharmony_ci * with no available memory, the start and end PFNs will be 0. 169962306a36Sopenharmony_ci */ 170062306a36Sopenharmony_civoid __init get_pfn_range_for_nid(unsigned int nid, 170162306a36Sopenharmony_ci unsigned long *start_pfn, unsigned long *end_pfn) 170262306a36Sopenharmony_ci{ 170362306a36Sopenharmony_ci unsigned long this_start_pfn, this_end_pfn; 170462306a36Sopenharmony_ci int i; 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci *start_pfn = -1UL; 170762306a36Sopenharmony_ci *end_pfn = 0; 170862306a36Sopenharmony_ci 170962306a36Sopenharmony_ci for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) { 171062306a36Sopenharmony_ci *start_pfn = min(*start_pfn, this_start_pfn); 171162306a36Sopenharmony_ci *end_pfn = max(*end_pfn, this_end_pfn); 171262306a36Sopenharmony_ci } 171362306a36Sopenharmony_ci 171462306a36Sopenharmony_ci if (*start_pfn == -1UL) 171562306a36Sopenharmony_ci *start_pfn = 0; 171662306a36Sopenharmony_ci} 171762306a36Sopenharmony_ci 171862306a36Sopenharmony_cistatic void __init free_area_init_node(int nid) 171962306a36Sopenharmony_ci{ 172062306a36Sopenharmony_ci pg_data_t *pgdat = NODE_DATA(nid); 172162306a36Sopenharmony_ci unsigned long start_pfn = 0; 172262306a36Sopenharmony_ci unsigned long end_pfn = 0; 172362306a36Sopenharmony_ci 172462306a36Sopenharmony_ci /* pg_data_t should be reset to zero when it's allocated */ 172562306a36Sopenharmony_ci WARN_ON(pgdat->nr_zones || pgdat->kswapd_highest_zoneidx); 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_ci get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 172862306a36Sopenharmony_ci 172962306a36Sopenharmony_ci pgdat->node_id = nid; 173062306a36Sopenharmony_ci pgdat->node_start_pfn = start_pfn; 173162306a36Sopenharmony_ci pgdat->per_cpu_nodestats = NULL; 173262306a36Sopenharmony_ci 173362306a36Sopenharmony_ci if (start_pfn != end_pfn) { 173462306a36Sopenharmony_ci pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, 173562306a36Sopenharmony_ci (u64)start_pfn << PAGE_SHIFT, 173662306a36Sopenharmony_ci end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci calculate_node_totalpages(pgdat, start_pfn, end_pfn); 173962306a36Sopenharmony_ci } else { 174062306a36Sopenharmony_ci pr_info("Initmem setup node %d as memoryless\n", nid); 174162306a36Sopenharmony_ci 174262306a36Sopenharmony_ci reset_memoryless_node_totalpages(pgdat); 174362306a36Sopenharmony_ci } 174462306a36Sopenharmony_ci 174562306a36Sopenharmony_ci alloc_node_mem_map(pgdat); 174662306a36Sopenharmony_ci pgdat_set_deferred_range(pgdat); 174762306a36Sopenharmony_ci 174862306a36Sopenharmony_ci free_area_init_core(pgdat); 174962306a36Sopenharmony_ci lru_gen_init_pgdat(pgdat); 175062306a36Sopenharmony_ci} 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_ci/* Any regular or high memory on that node ? */ 175362306a36Sopenharmony_cistatic void __init check_for_memory(pg_data_t *pgdat) 175462306a36Sopenharmony_ci{ 175562306a36Sopenharmony_ci enum zone_type zone_type; 175662306a36Sopenharmony_ci 175762306a36Sopenharmony_ci for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) { 175862306a36Sopenharmony_ci struct zone *zone = &pgdat->node_zones[zone_type]; 175962306a36Sopenharmony_ci if (populated_zone(zone)) { 176062306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_HIGHMEM)) 176162306a36Sopenharmony_ci node_set_state(pgdat->node_id, N_HIGH_MEMORY); 176262306a36Sopenharmony_ci if (zone_type <= ZONE_NORMAL) 176362306a36Sopenharmony_ci node_set_state(pgdat->node_id, N_NORMAL_MEMORY); 176462306a36Sopenharmony_ci break; 176562306a36Sopenharmony_ci } 176662306a36Sopenharmony_ci } 176762306a36Sopenharmony_ci} 176862306a36Sopenharmony_ci 176962306a36Sopenharmony_ci#if MAX_NUMNODES > 1 177062306a36Sopenharmony_ci/* 177162306a36Sopenharmony_ci * Figure out the number of possible node ids. 177262306a36Sopenharmony_ci */ 177362306a36Sopenharmony_civoid __init setup_nr_node_ids(void) 177462306a36Sopenharmony_ci{ 177562306a36Sopenharmony_ci unsigned int highest; 177662306a36Sopenharmony_ci 177762306a36Sopenharmony_ci highest = find_last_bit(node_possible_map.bits, MAX_NUMNODES); 177862306a36Sopenharmony_ci nr_node_ids = highest + 1; 177962306a36Sopenharmony_ci} 178062306a36Sopenharmony_ci#endif 178162306a36Sopenharmony_ci 178262306a36Sopenharmony_ci/* 178362306a36Sopenharmony_ci * Some architectures, e.g. ARC may have ZONE_HIGHMEM below ZONE_NORMAL. For 178462306a36Sopenharmony_ci * such cases we allow max_zone_pfn sorted in the descending order 178562306a36Sopenharmony_ci */ 178662306a36Sopenharmony_cistatic bool arch_has_descending_max_zone_pfns(void) 178762306a36Sopenharmony_ci{ 178862306a36Sopenharmony_ci return IS_ENABLED(CONFIG_ARC) && !IS_ENABLED(CONFIG_ARC_HAS_PAE40); 178962306a36Sopenharmony_ci} 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_ci/** 179262306a36Sopenharmony_ci * free_area_init - Initialise all pg_data_t and zone data 179362306a36Sopenharmony_ci * @max_zone_pfn: an array of max PFNs for each zone 179462306a36Sopenharmony_ci * 179562306a36Sopenharmony_ci * This will call free_area_init_node() for each active node in the system. 179662306a36Sopenharmony_ci * Using the page ranges provided by memblock_set_node(), the size of each 179762306a36Sopenharmony_ci * zone in each node and their holes is calculated. If the maximum PFN 179862306a36Sopenharmony_ci * between two adjacent zones match, it is assumed that the zone is empty. 179962306a36Sopenharmony_ci * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed 180062306a36Sopenharmony_ci * that arch_max_dma32_pfn has no pages. It is also assumed that a zone 180162306a36Sopenharmony_ci * starts where the previous one ended. For example, ZONE_DMA32 starts 180262306a36Sopenharmony_ci * at arch_max_dma_pfn. 180362306a36Sopenharmony_ci */ 180462306a36Sopenharmony_civoid __init free_area_init(unsigned long *max_zone_pfn) 180562306a36Sopenharmony_ci{ 180662306a36Sopenharmony_ci unsigned long start_pfn, end_pfn; 180762306a36Sopenharmony_ci int i, nid, zone; 180862306a36Sopenharmony_ci bool descending; 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_ci /* Record where the zone boundaries are */ 181162306a36Sopenharmony_ci memset(arch_zone_lowest_possible_pfn, 0, 181262306a36Sopenharmony_ci sizeof(arch_zone_lowest_possible_pfn)); 181362306a36Sopenharmony_ci memset(arch_zone_highest_possible_pfn, 0, 181462306a36Sopenharmony_ci sizeof(arch_zone_highest_possible_pfn)); 181562306a36Sopenharmony_ci 181662306a36Sopenharmony_ci start_pfn = PHYS_PFN(memblock_start_of_DRAM()); 181762306a36Sopenharmony_ci descending = arch_has_descending_max_zone_pfns(); 181862306a36Sopenharmony_ci 181962306a36Sopenharmony_ci for (i = 0; i < MAX_NR_ZONES; i++) { 182062306a36Sopenharmony_ci if (descending) 182162306a36Sopenharmony_ci zone = MAX_NR_ZONES - i - 1; 182262306a36Sopenharmony_ci else 182362306a36Sopenharmony_ci zone = i; 182462306a36Sopenharmony_ci 182562306a36Sopenharmony_ci if (zone == ZONE_MOVABLE) 182662306a36Sopenharmony_ci continue; 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_ci end_pfn = max(max_zone_pfn[zone], start_pfn); 182962306a36Sopenharmony_ci arch_zone_lowest_possible_pfn[zone] = start_pfn; 183062306a36Sopenharmony_ci arch_zone_highest_possible_pfn[zone] = end_pfn; 183162306a36Sopenharmony_ci 183262306a36Sopenharmony_ci start_pfn = end_pfn; 183362306a36Sopenharmony_ci } 183462306a36Sopenharmony_ci 183562306a36Sopenharmony_ci /* Find the PFNs that ZONE_MOVABLE begins at in each node */ 183662306a36Sopenharmony_ci memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); 183762306a36Sopenharmony_ci find_zone_movable_pfns_for_nodes(); 183862306a36Sopenharmony_ci 183962306a36Sopenharmony_ci /* Print out the zone ranges */ 184062306a36Sopenharmony_ci pr_info("Zone ranges:\n"); 184162306a36Sopenharmony_ci for (i = 0; i < MAX_NR_ZONES; i++) { 184262306a36Sopenharmony_ci if (i == ZONE_MOVABLE) 184362306a36Sopenharmony_ci continue; 184462306a36Sopenharmony_ci pr_info(" %-8s ", zone_names[i]); 184562306a36Sopenharmony_ci if (arch_zone_lowest_possible_pfn[i] == 184662306a36Sopenharmony_ci arch_zone_highest_possible_pfn[i]) 184762306a36Sopenharmony_ci pr_cont("empty\n"); 184862306a36Sopenharmony_ci else 184962306a36Sopenharmony_ci pr_cont("[mem %#018Lx-%#018Lx]\n", 185062306a36Sopenharmony_ci (u64)arch_zone_lowest_possible_pfn[i] 185162306a36Sopenharmony_ci << PAGE_SHIFT, 185262306a36Sopenharmony_ci ((u64)arch_zone_highest_possible_pfn[i] 185362306a36Sopenharmony_ci << PAGE_SHIFT) - 1); 185462306a36Sopenharmony_ci } 185562306a36Sopenharmony_ci 185662306a36Sopenharmony_ci /* Print out the PFNs ZONE_MOVABLE begins at in each node */ 185762306a36Sopenharmony_ci pr_info("Movable zone start for each node\n"); 185862306a36Sopenharmony_ci for (i = 0; i < MAX_NUMNODES; i++) { 185962306a36Sopenharmony_ci if (zone_movable_pfn[i]) 186062306a36Sopenharmony_ci pr_info(" Node %d: %#018Lx\n", i, 186162306a36Sopenharmony_ci (u64)zone_movable_pfn[i] << PAGE_SHIFT); 186262306a36Sopenharmony_ci } 186362306a36Sopenharmony_ci 186462306a36Sopenharmony_ci /* 186562306a36Sopenharmony_ci * Print out the early node map, and initialize the 186662306a36Sopenharmony_ci * subsection-map relative to active online memory ranges to 186762306a36Sopenharmony_ci * enable future "sub-section" extensions of the memory map. 186862306a36Sopenharmony_ci */ 186962306a36Sopenharmony_ci pr_info("Early memory node ranges\n"); 187062306a36Sopenharmony_ci for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { 187162306a36Sopenharmony_ci pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid, 187262306a36Sopenharmony_ci (u64)start_pfn << PAGE_SHIFT, 187362306a36Sopenharmony_ci ((u64)end_pfn << PAGE_SHIFT) - 1); 187462306a36Sopenharmony_ci subsection_map_init(start_pfn, end_pfn - start_pfn); 187562306a36Sopenharmony_ci } 187662306a36Sopenharmony_ci 187762306a36Sopenharmony_ci /* Initialise every node */ 187862306a36Sopenharmony_ci mminit_verify_pageflags_layout(); 187962306a36Sopenharmony_ci setup_nr_node_ids(); 188062306a36Sopenharmony_ci set_pageblock_order(); 188162306a36Sopenharmony_ci 188262306a36Sopenharmony_ci for_each_node(nid) { 188362306a36Sopenharmony_ci pg_data_t *pgdat; 188462306a36Sopenharmony_ci 188562306a36Sopenharmony_ci if (!node_online(nid)) { 188662306a36Sopenharmony_ci pr_info("Initializing node %d as memoryless\n", nid); 188762306a36Sopenharmony_ci 188862306a36Sopenharmony_ci /* Allocator not initialized yet */ 188962306a36Sopenharmony_ci pgdat = arch_alloc_nodedata(nid); 189062306a36Sopenharmony_ci if (!pgdat) 189162306a36Sopenharmony_ci panic("Cannot allocate %zuB for node %d.\n", 189262306a36Sopenharmony_ci sizeof(*pgdat), nid); 189362306a36Sopenharmony_ci arch_refresh_nodedata(nid, pgdat); 189462306a36Sopenharmony_ci free_area_init_node(nid); 189562306a36Sopenharmony_ci 189662306a36Sopenharmony_ci /* 189762306a36Sopenharmony_ci * We do not want to confuse userspace by sysfs 189862306a36Sopenharmony_ci * files/directories for node without any memory 189962306a36Sopenharmony_ci * attached to it, so this node is not marked as 190062306a36Sopenharmony_ci * N_MEMORY and not marked online so that no sysfs 190162306a36Sopenharmony_ci * hierarchy will be created via register_one_node for 190262306a36Sopenharmony_ci * it. The pgdat will get fully initialized by 190362306a36Sopenharmony_ci * hotadd_init_pgdat() when memory is hotplugged into 190462306a36Sopenharmony_ci * this node. 190562306a36Sopenharmony_ci */ 190662306a36Sopenharmony_ci continue; 190762306a36Sopenharmony_ci } 190862306a36Sopenharmony_ci 190962306a36Sopenharmony_ci pgdat = NODE_DATA(nid); 191062306a36Sopenharmony_ci free_area_init_node(nid); 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci /* Any memory on that node */ 191362306a36Sopenharmony_ci if (pgdat->node_present_pages) 191462306a36Sopenharmony_ci node_set_state(nid, N_MEMORY); 191562306a36Sopenharmony_ci check_for_memory(pgdat); 191662306a36Sopenharmony_ci } 191762306a36Sopenharmony_ci 191862306a36Sopenharmony_ci memmap_init(); 191962306a36Sopenharmony_ci 192062306a36Sopenharmony_ci /* disable hash distribution for systems with a single node */ 192162306a36Sopenharmony_ci fixup_hashdist(); 192262306a36Sopenharmony_ci} 192362306a36Sopenharmony_ci 192462306a36Sopenharmony_ci/** 192562306a36Sopenharmony_ci * node_map_pfn_alignment - determine the maximum internode alignment 192662306a36Sopenharmony_ci * 192762306a36Sopenharmony_ci * This function should be called after node map is populated and sorted. 192862306a36Sopenharmony_ci * It calculates the maximum power of two alignment which can distinguish 192962306a36Sopenharmony_ci * all the nodes. 193062306a36Sopenharmony_ci * 193162306a36Sopenharmony_ci * For example, if all nodes are 1GiB and aligned to 1GiB, the return value 193262306a36Sopenharmony_ci * would indicate 1GiB alignment with (1 << (30 - PAGE_SHIFT)). If the 193362306a36Sopenharmony_ci * nodes are shifted by 256MiB, 256MiB. Note that if only the last node is 193462306a36Sopenharmony_ci * shifted, 1GiB is enough and this function will indicate so. 193562306a36Sopenharmony_ci * 193662306a36Sopenharmony_ci * This is used to test whether pfn -> nid mapping of the chosen memory 193762306a36Sopenharmony_ci * model has fine enough granularity to avoid incorrect mapping for the 193862306a36Sopenharmony_ci * populated node map. 193962306a36Sopenharmony_ci * 194062306a36Sopenharmony_ci * Return: the determined alignment in pfn's. 0 if there is no alignment 194162306a36Sopenharmony_ci * requirement (single node). 194262306a36Sopenharmony_ci */ 194362306a36Sopenharmony_ciunsigned long __init node_map_pfn_alignment(void) 194462306a36Sopenharmony_ci{ 194562306a36Sopenharmony_ci unsigned long accl_mask = 0, last_end = 0; 194662306a36Sopenharmony_ci unsigned long start, end, mask; 194762306a36Sopenharmony_ci int last_nid = NUMA_NO_NODE; 194862306a36Sopenharmony_ci int i, nid; 194962306a36Sopenharmony_ci 195062306a36Sopenharmony_ci for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) { 195162306a36Sopenharmony_ci if (!start || last_nid < 0 || last_nid == nid) { 195262306a36Sopenharmony_ci last_nid = nid; 195362306a36Sopenharmony_ci last_end = end; 195462306a36Sopenharmony_ci continue; 195562306a36Sopenharmony_ci } 195662306a36Sopenharmony_ci 195762306a36Sopenharmony_ci /* 195862306a36Sopenharmony_ci * Start with a mask granular enough to pin-point to the 195962306a36Sopenharmony_ci * start pfn and tick off bits one-by-one until it becomes 196062306a36Sopenharmony_ci * too coarse to separate the current node from the last. 196162306a36Sopenharmony_ci */ 196262306a36Sopenharmony_ci mask = ~((1 << __ffs(start)) - 1); 196362306a36Sopenharmony_ci while (mask && last_end <= (start & (mask << 1))) 196462306a36Sopenharmony_ci mask <<= 1; 196562306a36Sopenharmony_ci 196662306a36Sopenharmony_ci /* accumulate all internode masks */ 196762306a36Sopenharmony_ci accl_mask |= mask; 196862306a36Sopenharmony_ci } 196962306a36Sopenharmony_ci 197062306a36Sopenharmony_ci /* convert mask to number of pages */ 197162306a36Sopenharmony_ci return ~accl_mask + 1; 197262306a36Sopenharmony_ci} 197362306a36Sopenharmony_ci 197462306a36Sopenharmony_ci#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 197562306a36Sopenharmony_cistatic void __init deferred_free_range(unsigned long pfn, 197662306a36Sopenharmony_ci unsigned long nr_pages) 197762306a36Sopenharmony_ci{ 197862306a36Sopenharmony_ci struct page *page; 197962306a36Sopenharmony_ci unsigned long i; 198062306a36Sopenharmony_ci 198162306a36Sopenharmony_ci if (!nr_pages) 198262306a36Sopenharmony_ci return; 198362306a36Sopenharmony_ci 198462306a36Sopenharmony_ci page = pfn_to_page(pfn); 198562306a36Sopenharmony_ci 198662306a36Sopenharmony_ci /* Free a large naturally-aligned chunk if possible */ 198762306a36Sopenharmony_ci if (nr_pages == MAX_ORDER_NR_PAGES && IS_MAX_ORDER_ALIGNED(pfn)) { 198862306a36Sopenharmony_ci for (i = 0; i < nr_pages; i += pageblock_nr_pages) 198962306a36Sopenharmony_ci set_pageblock_migratetype(page + i, MIGRATE_MOVABLE); 199062306a36Sopenharmony_ci __free_pages_core(page, MAX_ORDER); 199162306a36Sopenharmony_ci return; 199262306a36Sopenharmony_ci } 199362306a36Sopenharmony_ci 199462306a36Sopenharmony_ci /* Accept chunks smaller than MAX_ORDER upfront */ 199562306a36Sopenharmony_ci accept_memory(PFN_PHYS(pfn), PFN_PHYS(pfn + nr_pages)); 199662306a36Sopenharmony_ci 199762306a36Sopenharmony_ci for (i = 0; i < nr_pages; i++, page++, pfn++) { 199862306a36Sopenharmony_ci if (pageblock_aligned(pfn)) 199962306a36Sopenharmony_ci set_pageblock_migratetype(page, MIGRATE_MOVABLE); 200062306a36Sopenharmony_ci __free_pages_core(page, 0); 200162306a36Sopenharmony_ci } 200262306a36Sopenharmony_ci} 200362306a36Sopenharmony_ci 200462306a36Sopenharmony_ci/* Completion tracking for deferred_init_memmap() threads */ 200562306a36Sopenharmony_cistatic atomic_t pgdat_init_n_undone __initdata; 200662306a36Sopenharmony_cistatic __initdata DECLARE_COMPLETION(pgdat_init_all_done_comp); 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_cistatic inline void __init pgdat_init_report_one_done(void) 200962306a36Sopenharmony_ci{ 201062306a36Sopenharmony_ci if (atomic_dec_and_test(&pgdat_init_n_undone)) 201162306a36Sopenharmony_ci complete(&pgdat_init_all_done_comp); 201262306a36Sopenharmony_ci} 201362306a36Sopenharmony_ci 201462306a36Sopenharmony_ci/* 201562306a36Sopenharmony_ci * Returns true if page needs to be initialized or freed to buddy allocator. 201662306a36Sopenharmony_ci * 201762306a36Sopenharmony_ci * We check if a current MAX_ORDER block is valid by only checking the validity 201862306a36Sopenharmony_ci * of the head pfn. 201962306a36Sopenharmony_ci */ 202062306a36Sopenharmony_cistatic inline bool __init deferred_pfn_valid(unsigned long pfn) 202162306a36Sopenharmony_ci{ 202262306a36Sopenharmony_ci if (IS_MAX_ORDER_ALIGNED(pfn) && !pfn_valid(pfn)) 202362306a36Sopenharmony_ci return false; 202462306a36Sopenharmony_ci return true; 202562306a36Sopenharmony_ci} 202662306a36Sopenharmony_ci 202762306a36Sopenharmony_ci/* 202862306a36Sopenharmony_ci * Free pages to buddy allocator. Try to free aligned pages in 202962306a36Sopenharmony_ci * MAX_ORDER_NR_PAGES sizes. 203062306a36Sopenharmony_ci */ 203162306a36Sopenharmony_cistatic void __init deferred_free_pages(unsigned long pfn, 203262306a36Sopenharmony_ci unsigned long end_pfn) 203362306a36Sopenharmony_ci{ 203462306a36Sopenharmony_ci unsigned long nr_free = 0; 203562306a36Sopenharmony_ci 203662306a36Sopenharmony_ci for (; pfn < end_pfn; pfn++) { 203762306a36Sopenharmony_ci if (!deferred_pfn_valid(pfn)) { 203862306a36Sopenharmony_ci deferred_free_range(pfn - nr_free, nr_free); 203962306a36Sopenharmony_ci nr_free = 0; 204062306a36Sopenharmony_ci } else if (IS_MAX_ORDER_ALIGNED(pfn)) { 204162306a36Sopenharmony_ci deferred_free_range(pfn - nr_free, nr_free); 204262306a36Sopenharmony_ci nr_free = 1; 204362306a36Sopenharmony_ci } else { 204462306a36Sopenharmony_ci nr_free++; 204562306a36Sopenharmony_ci } 204662306a36Sopenharmony_ci } 204762306a36Sopenharmony_ci /* Free the last block of pages to allocator */ 204862306a36Sopenharmony_ci deferred_free_range(pfn - nr_free, nr_free); 204962306a36Sopenharmony_ci} 205062306a36Sopenharmony_ci 205162306a36Sopenharmony_ci/* 205262306a36Sopenharmony_ci * Initialize struct pages. We minimize pfn page lookups and scheduler checks 205362306a36Sopenharmony_ci * by performing it only once every MAX_ORDER_NR_PAGES. 205462306a36Sopenharmony_ci * Return number of pages initialized. 205562306a36Sopenharmony_ci */ 205662306a36Sopenharmony_cistatic unsigned long __init deferred_init_pages(struct zone *zone, 205762306a36Sopenharmony_ci unsigned long pfn, 205862306a36Sopenharmony_ci unsigned long end_pfn) 205962306a36Sopenharmony_ci{ 206062306a36Sopenharmony_ci int nid = zone_to_nid(zone); 206162306a36Sopenharmony_ci unsigned long nr_pages = 0; 206262306a36Sopenharmony_ci int zid = zone_idx(zone); 206362306a36Sopenharmony_ci struct page *page = NULL; 206462306a36Sopenharmony_ci 206562306a36Sopenharmony_ci for (; pfn < end_pfn; pfn++) { 206662306a36Sopenharmony_ci if (!deferred_pfn_valid(pfn)) { 206762306a36Sopenharmony_ci page = NULL; 206862306a36Sopenharmony_ci continue; 206962306a36Sopenharmony_ci } else if (!page || IS_MAX_ORDER_ALIGNED(pfn)) { 207062306a36Sopenharmony_ci page = pfn_to_page(pfn); 207162306a36Sopenharmony_ci } else { 207262306a36Sopenharmony_ci page++; 207362306a36Sopenharmony_ci } 207462306a36Sopenharmony_ci __init_single_page(page, pfn, zid, nid); 207562306a36Sopenharmony_ci nr_pages++; 207662306a36Sopenharmony_ci } 207762306a36Sopenharmony_ci return (nr_pages); 207862306a36Sopenharmony_ci} 207962306a36Sopenharmony_ci 208062306a36Sopenharmony_ci/* 208162306a36Sopenharmony_ci * This function is meant to pre-load the iterator for the zone init. 208262306a36Sopenharmony_ci * Specifically it walks through the ranges until we are caught up to the 208362306a36Sopenharmony_ci * first_init_pfn value and exits there. If we never encounter the value we 208462306a36Sopenharmony_ci * return false indicating there are no valid ranges left. 208562306a36Sopenharmony_ci */ 208662306a36Sopenharmony_cistatic bool __init 208762306a36Sopenharmony_cideferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone, 208862306a36Sopenharmony_ci unsigned long *spfn, unsigned long *epfn, 208962306a36Sopenharmony_ci unsigned long first_init_pfn) 209062306a36Sopenharmony_ci{ 209162306a36Sopenharmony_ci u64 j; 209262306a36Sopenharmony_ci 209362306a36Sopenharmony_ci /* 209462306a36Sopenharmony_ci * Start out by walking through the ranges in this zone that have 209562306a36Sopenharmony_ci * already been initialized. We don't need to do anything with them 209662306a36Sopenharmony_ci * so we just need to flush them out of the system. 209762306a36Sopenharmony_ci */ 209862306a36Sopenharmony_ci for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) { 209962306a36Sopenharmony_ci if (*epfn <= first_init_pfn) 210062306a36Sopenharmony_ci continue; 210162306a36Sopenharmony_ci if (*spfn < first_init_pfn) 210262306a36Sopenharmony_ci *spfn = first_init_pfn; 210362306a36Sopenharmony_ci *i = j; 210462306a36Sopenharmony_ci return true; 210562306a36Sopenharmony_ci } 210662306a36Sopenharmony_ci 210762306a36Sopenharmony_ci return false; 210862306a36Sopenharmony_ci} 210962306a36Sopenharmony_ci 211062306a36Sopenharmony_ci/* 211162306a36Sopenharmony_ci * Initialize and free pages. We do it in two loops: first we initialize 211262306a36Sopenharmony_ci * struct page, then free to buddy allocator, because while we are 211362306a36Sopenharmony_ci * freeing pages we can access pages that are ahead (computing buddy 211462306a36Sopenharmony_ci * page in __free_one_page()). 211562306a36Sopenharmony_ci * 211662306a36Sopenharmony_ci * In order to try and keep some memory in the cache we have the loop 211762306a36Sopenharmony_ci * broken along max page order boundaries. This way we will not cause 211862306a36Sopenharmony_ci * any issues with the buddy page computation. 211962306a36Sopenharmony_ci */ 212062306a36Sopenharmony_cistatic unsigned long __init 212162306a36Sopenharmony_cideferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn, 212262306a36Sopenharmony_ci unsigned long *end_pfn) 212362306a36Sopenharmony_ci{ 212462306a36Sopenharmony_ci unsigned long mo_pfn = ALIGN(*start_pfn + 1, MAX_ORDER_NR_PAGES); 212562306a36Sopenharmony_ci unsigned long spfn = *start_pfn, epfn = *end_pfn; 212662306a36Sopenharmony_ci unsigned long nr_pages = 0; 212762306a36Sopenharmony_ci u64 j = *i; 212862306a36Sopenharmony_ci 212962306a36Sopenharmony_ci /* First we loop through and initialize the page values */ 213062306a36Sopenharmony_ci for_each_free_mem_pfn_range_in_zone_from(j, zone, start_pfn, end_pfn) { 213162306a36Sopenharmony_ci unsigned long t; 213262306a36Sopenharmony_ci 213362306a36Sopenharmony_ci if (mo_pfn <= *start_pfn) 213462306a36Sopenharmony_ci break; 213562306a36Sopenharmony_ci 213662306a36Sopenharmony_ci t = min(mo_pfn, *end_pfn); 213762306a36Sopenharmony_ci nr_pages += deferred_init_pages(zone, *start_pfn, t); 213862306a36Sopenharmony_ci 213962306a36Sopenharmony_ci if (mo_pfn < *end_pfn) { 214062306a36Sopenharmony_ci *start_pfn = mo_pfn; 214162306a36Sopenharmony_ci break; 214262306a36Sopenharmony_ci } 214362306a36Sopenharmony_ci } 214462306a36Sopenharmony_ci 214562306a36Sopenharmony_ci /* Reset values and now loop through freeing pages as needed */ 214662306a36Sopenharmony_ci swap(j, *i); 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_ci for_each_free_mem_pfn_range_in_zone_from(j, zone, &spfn, &epfn) { 214962306a36Sopenharmony_ci unsigned long t; 215062306a36Sopenharmony_ci 215162306a36Sopenharmony_ci if (mo_pfn <= spfn) 215262306a36Sopenharmony_ci break; 215362306a36Sopenharmony_ci 215462306a36Sopenharmony_ci t = min(mo_pfn, epfn); 215562306a36Sopenharmony_ci deferred_free_pages(spfn, t); 215662306a36Sopenharmony_ci 215762306a36Sopenharmony_ci if (mo_pfn <= epfn) 215862306a36Sopenharmony_ci break; 215962306a36Sopenharmony_ci } 216062306a36Sopenharmony_ci 216162306a36Sopenharmony_ci return nr_pages; 216262306a36Sopenharmony_ci} 216362306a36Sopenharmony_ci 216462306a36Sopenharmony_cistatic void __init 216562306a36Sopenharmony_cideferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, 216662306a36Sopenharmony_ci void *arg) 216762306a36Sopenharmony_ci{ 216862306a36Sopenharmony_ci unsigned long spfn, epfn; 216962306a36Sopenharmony_ci struct zone *zone = arg; 217062306a36Sopenharmony_ci u64 i; 217162306a36Sopenharmony_ci 217262306a36Sopenharmony_ci deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn); 217362306a36Sopenharmony_ci 217462306a36Sopenharmony_ci /* 217562306a36Sopenharmony_ci * Initialize and free pages in MAX_ORDER sized increments so that we 217662306a36Sopenharmony_ci * can avoid introducing any issues with the buddy allocator. 217762306a36Sopenharmony_ci */ 217862306a36Sopenharmony_ci while (spfn < end_pfn) { 217962306a36Sopenharmony_ci deferred_init_maxorder(&i, zone, &spfn, &epfn); 218062306a36Sopenharmony_ci cond_resched(); 218162306a36Sopenharmony_ci } 218262306a36Sopenharmony_ci} 218362306a36Sopenharmony_ci 218462306a36Sopenharmony_ci/* An arch may override for more concurrency. */ 218562306a36Sopenharmony_ci__weak int __init 218662306a36Sopenharmony_cideferred_page_init_max_threads(const struct cpumask *node_cpumask) 218762306a36Sopenharmony_ci{ 218862306a36Sopenharmony_ci return 1; 218962306a36Sopenharmony_ci} 219062306a36Sopenharmony_ci 219162306a36Sopenharmony_ci/* Initialise remaining memory on a node */ 219262306a36Sopenharmony_cistatic int __init deferred_init_memmap(void *data) 219362306a36Sopenharmony_ci{ 219462306a36Sopenharmony_ci pg_data_t *pgdat = data; 219562306a36Sopenharmony_ci const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); 219662306a36Sopenharmony_ci unsigned long spfn = 0, epfn = 0; 219762306a36Sopenharmony_ci unsigned long first_init_pfn, flags; 219862306a36Sopenharmony_ci unsigned long start = jiffies; 219962306a36Sopenharmony_ci struct zone *zone; 220062306a36Sopenharmony_ci int zid, max_threads; 220162306a36Sopenharmony_ci u64 i; 220262306a36Sopenharmony_ci 220362306a36Sopenharmony_ci /* Bind memory initialisation thread to a local node if possible */ 220462306a36Sopenharmony_ci if (!cpumask_empty(cpumask)) 220562306a36Sopenharmony_ci set_cpus_allowed_ptr(current, cpumask); 220662306a36Sopenharmony_ci 220762306a36Sopenharmony_ci pgdat_resize_lock(pgdat, &flags); 220862306a36Sopenharmony_ci first_init_pfn = pgdat->first_deferred_pfn; 220962306a36Sopenharmony_ci if (first_init_pfn == ULONG_MAX) { 221062306a36Sopenharmony_ci pgdat_resize_unlock(pgdat, &flags); 221162306a36Sopenharmony_ci pgdat_init_report_one_done(); 221262306a36Sopenharmony_ci return 0; 221362306a36Sopenharmony_ci } 221462306a36Sopenharmony_ci 221562306a36Sopenharmony_ci /* Sanity check boundaries */ 221662306a36Sopenharmony_ci BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn); 221762306a36Sopenharmony_ci BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat)); 221862306a36Sopenharmony_ci pgdat->first_deferred_pfn = ULONG_MAX; 221962306a36Sopenharmony_ci 222062306a36Sopenharmony_ci /* 222162306a36Sopenharmony_ci * Once we unlock here, the zone cannot be grown anymore, thus if an 222262306a36Sopenharmony_ci * interrupt thread must allocate this early in boot, zone must be 222362306a36Sopenharmony_ci * pre-grown prior to start of deferred page initialization. 222462306a36Sopenharmony_ci */ 222562306a36Sopenharmony_ci pgdat_resize_unlock(pgdat, &flags); 222662306a36Sopenharmony_ci 222762306a36Sopenharmony_ci /* Only the highest zone is deferred so find it */ 222862306a36Sopenharmony_ci for (zid = 0; zid < MAX_NR_ZONES; zid++) { 222962306a36Sopenharmony_ci zone = pgdat->node_zones + zid; 223062306a36Sopenharmony_ci if (first_init_pfn < zone_end_pfn(zone)) 223162306a36Sopenharmony_ci break; 223262306a36Sopenharmony_ci } 223362306a36Sopenharmony_ci 223462306a36Sopenharmony_ci /* If the zone is empty somebody else may have cleared out the zone */ 223562306a36Sopenharmony_ci if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, 223662306a36Sopenharmony_ci first_init_pfn)) 223762306a36Sopenharmony_ci goto zone_empty; 223862306a36Sopenharmony_ci 223962306a36Sopenharmony_ci max_threads = deferred_page_init_max_threads(cpumask); 224062306a36Sopenharmony_ci 224162306a36Sopenharmony_ci while (spfn < epfn) { 224262306a36Sopenharmony_ci unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION); 224362306a36Sopenharmony_ci struct padata_mt_job job = { 224462306a36Sopenharmony_ci .thread_fn = deferred_init_memmap_chunk, 224562306a36Sopenharmony_ci .fn_arg = zone, 224662306a36Sopenharmony_ci .start = spfn, 224762306a36Sopenharmony_ci .size = epfn_align - spfn, 224862306a36Sopenharmony_ci .align = PAGES_PER_SECTION, 224962306a36Sopenharmony_ci .min_chunk = PAGES_PER_SECTION, 225062306a36Sopenharmony_ci .max_threads = max_threads, 225162306a36Sopenharmony_ci }; 225262306a36Sopenharmony_ci 225362306a36Sopenharmony_ci padata_do_multithreaded(&job); 225462306a36Sopenharmony_ci deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, 225562306a36Sopenharmony_ci epfn_align); 225662306a36Sopenharmony_ci } 225762306a36Sopenharmony_cizone_empty: 225862306a36Sopenharmony_ci /* Sanity check that the next zone really is unpopulated */ 225962306a36Sopenharmony_ci WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); 226062306a36Sopenharmony_ci 226162306a36Sopenharmony_ci pr_info("node %d deferred pages initialised in %ums\n", 226262306a36Sopenharmony_ci pgdat->node_id, jiffies_to_msecs(jiffies - start)); 226362306a36Sopenharmony_ci 226462306a36Sopenharmony_ci pgdat_init_report_one_done(); 226562306a36Sopenharmony_ci return 0; 226662306a36Sopenharmony_ci} 226762306a36Sopenharmony_ci 226862306a36Sopenharmony_ci/* 226962306a36Sopenharmony_ci * If this zone has deferred pages, try to grow it by initializing enough 227062306a36Sopenharmony_ci * deferred pages to satisfy the allocation specified by order, rounded up to 227162306a36Sopenharmony_ci * the nearest PAGES_PER_SECTION boundary. So we're adding memory in increments 227262306a36Sopenharmony_ci * of SECTION_SIZE bytes by initializing struct pages in increments of 227362306a36Sopenharmony_ci * PAGES_PER_SECTION * sizeof(struct page) bytes. 227462306a36Sopenharmony_ci * 227562306a36Sopenharmony_ci * Return true when zone was grown, otherwise return false. We return true even 227662306a36Sopenharmony_ci * when we grow less than requested, to let the caller decide if there are 227762306a36Sopenharmony_ci * enough pages to satisfy the allocation. 227862306a36Sopenharmony_ci * 227962306a36Sopenharmony_ci * Note: We use noinline because this function is needed only during boot, and 228062306a36Sopenharmony_ci * it is called from a __ref function _deferred_grow_zone. This way we are 228162306a36Sopenharmony_ci * making sure that it is not inlined into permanent text section. 228262306a36Sopenharmony_ci */ 228362306a36Sopenharmony_cibool __init deferred_grow_zone(struct zone *zone, unsigned int order) 228462306a36Sopenharmony_ci{ 228562306a36Sopenharmony_ci unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION); 228662306a36Sopenharmony_ci pg_data_t *pgdat = zone->zone_pgdat; 228762306a36Sopenharmony_ci unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; 228862306a36Sopenharmony_ci unsigned long spfn, epfn, flags; 228962306a36Sopenharmony_ci unsigned long nr_pages = 0; 229062306a36Sopenharmony_ci u64 i; 229162306a36Sopenharmony_ci 229262306a36Sopenharmony_ci /* Only the last zone may have deferred pages */ 229362306a36Sopenharmony_ci if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat)) 229462306a36Sopenharmony_ci return false; 229562306a36Sopenharmony_ci 229662306a36Sopenharmony_ci pgdat_resize_lock(pgdat, &flags); 229762306a36Sopenharmony_ci 229862306a36Sopenharmony_ci /* 229962306a36Sopenharmony_ci * If someone grew this zone while we were waiting for spinlock, return 230062306a36Sopenharmony_ci * true, as there might be enough pages already. 230162306a36Sopenharmony_ci */ 230262306a36Sopenharmony_ci if (first_deferred_pfn != pgdat->first_deferred_pfn) { 230362306a36Sopenharmony_ci pgdat_resize_unlock(pgdat, &flags); 230462306a36Sopenharmony_ci return true; 230562306a36Sopenharmony_ci } 230662306a36Sopenharmony_ci 230762306a36Sopenharmony_ci /* If the zone is empty somebody else may have cleared out the zone */ 230862306a36Sopenharmony_ci if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, 230962306a36Sopenharmony_ci first_deferred_pfn)) { 231062306a36Sopenharmony_ci pgdat->first_deferred_pfn = ULONG_MAX; 231162306a36Sopenharmony_ci pgdat_resize_unlock(pgdat, &flags); 231262306a36Sopenharmony_ci /* Retry only once. */ 231362306a36Sopenharmony_ci return first_deferred_pfn != ULONG_MAX; 231462306a36Sopenharmony_ci } 231562306a36Sopenharmony_ci 231662306a36Sopenharmony_ci /* 231762306a36Sopenharmony_ci * Initialize and free pages in MAX_ORDER sized increments so 231862306a36Sopenharmony_ci * that we can avoid introducing any issues with the buddy 231962306a36Sopenharmony_ci * allocator. 232062306a36Sopenharmony_ci */ 232162306a36Sopenharmony_ci while (spfn < epfn) { 232262306a36Sopenharmony_ci /* update our first deferred PFN for this section */ 232362306a36Sopenharmony_ci first_deferred_pfn = spfn; 232462306a36Sopenharmony_ci 232562306a36Sopenharmony_ci nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn); 232662306a36Sopenharmony_ci touch_nmi_watchdog(); 232762306a36Sopenharmony_ci 232862306a36Sopenharmony_ci /* We should only stop along section boundaries */ 232962306a36Sopenharmony_ci if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION) 233062306a36Sopenharmony_ci continue; 233162306a36Sopenharmony_ci 233262306a36Sopenharmony_ci /* If our quota has been met we can stop here */ 233362306a36Sopenharmony_ci if (nr_pages >= nr_pages_needed) 233462306a36Sopenharmony_ci break; 233562306a36Sopenharmony_ci } 233662306a36Sopenharmony_ci 233762306a36Sopenharmony_ci pgdat->first_deferred_pfn = spfn; 233862306a36Sopenharmony_ci pgdat_resize_unlock(pgdat, &flags); 233962306a36Sopenharmony_ci 234062306a36Sopenharmony_ci return nr_pages > 0; 234162306a36Sopenharmony_ci} 234262306a36Sopenharmony_ci 234362306a36Sopenharmony_ci#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 234462306a36Sopenharmony_ci 234562306a36Sopenharmony_ci#ifdef CONFIG_CMA 234662306a36Sopenharmony_civoid __init init_cma_reserved_pageblock(struct page *page) 234762306a36Sopenharmony_ci{ 234862306a36Sopenharmony_ci unsigned i = pageblock_nr_pages; 234962306a36Sopenharmony_ci struct page *p = page; 235062306a36Sopenharmony_ci 235162306a36Sopenharmony_ci do { 235262306a36Sopenharmony_ci __ClearPageReserved(p); 235362306a36Sopenharmony_ci set_page_count(p, 0); 235462306a36Sopenharmony_ci } while (++p, --i); 235562306a36Sopenharmony_ci 235662306a36Sopenharmony_ci set_pageblock_migratetype(page, MIGRATE_CMA); 235762306a36Sopenharmony_ci set_page_refcounted(page); 235862306a36Sopenharmony_ci __free_pages(page, pageblock_order); 235962306a36Sopenharmony_ci 236062306a36Sopenharmony_ci adjust_managed_page_count(page, pageblock_nr_pages); 236162306a36Sopenharmony_ci page_zone(page)->cma_pages += pageblock_nr_pages; 236262306a36Sopenharmony_ci} 236362306a36Sopenharmony_ci#endif 236462306a36Sopenharmony_ci 236562306a36Sopenharmony_civoid set_zone_contiguous(struct zone *zone) 236662306a36Sopenharmony_ci{ 236762306a36Sopenharmony_ci unsigned long block_start_pfn = zone->zone_start_pfn; 236862306a36Sopenharmony_ci unsigned long block_end_pfn; 236962306a36Sopenharmony_ci 237062306a36Sopenharmony_ci block_end_pfn = pageblock_end_pfn(block_start_pfn); 237162306a36Sopenharmony_ci for (; block_start_pfn < zone_end_pfn(zone); 237262306a36Sopenharmony_ci block_start_pfn = block_end_pfn, 237362306a36Sopenharmony_ci block_end_pfn += pageblock_nr_pages) { 237462306a36Sopenharmony_ci 237562306a36Sopenharmony_ci block_end_pfn = min(block_end_pfn, zone_end_pfn(zone)); 237662306a36Sopenharmony_ci 237762306a36Sopenharmony_ci if (!__pageblock_pfn_to_page(block_start_pfn, 237862306a36Sopenharmony_ci block_end_pfn, zone)) 237962306a36Sopenharmony_ci return; 238062306a36Sopenharmony_ci cond_resched(); 238162306a36Sopenharmony_ci } 238262306a36Sopenharmony_ci 238362306a36Sopenharmony_ci /* We confirm that there is no hole */ 238462306a36Sopenharmony_ci zone->contiguous = true; 238562306a36Sopenharmony_ci} 238662306a36Sopenharmony_ci 238762306a36Sopenharmony_civoid __init page_alloc_init_late(void) 238862306a36Sopenharmony_ci{ 238962306a36Sopenharmony_ci struct zone *zone; 239062306a36Sopenharmony_ci int nid; 239162306a36Sopenharmony_ci 239262306a36Sopenharmony_ci#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 239362306a36Sopenharmony_ci 239462306a36Sopenharmony_ci /* There will be num_node_state(N_MEMORY) threads */ 239562306a36Sopenharmony_ci atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY)); 239662306a36Sopenharmony_ci for_each_node_state(nid, N_MEMORY) { 239762306a36Sopenharmony_ci kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid); 239862306a36Sopenharmony_ci } 239962306a36Sopenharmony_ci 240062306a36Sopenharmony_ci /* Block until all are initialised */ 240162306a36Sopenharmony_ci wait_for_completion(&pgdat_init_all_done_comp); 240262306a36Sopenharmony_ci 240362306a36Sopenharmony_ci /* 240462306a36Sopenharmony_ci * We initialized the rest of the deferred pages. Permanently disable 240562306a36Sopenharmony_ci * on-demand struct page initialization. 240662306a36Sopenharmony_ci */ 240762306a36Sopenharmony_ci static_branch_disable(&deferred_pages); 240862306a36Sopenharmony_ci 240962306a36Sopenharmony_ci /* Reinit limits that are based on free pages after the kernel is up */ 241062306a36Sopenharmony_ci files_maxfiles_init(); 241162306a36Sopenharmony_ci#endif 241262306a36Sopenharmony_ci 241362306a36Sopenharmony_ci buffer_init(); 241462306a36Sopenharmony_ci 241562306a36Sopenharmony_ci /* Discard memblock private memory */ 241662306a36Sopenharmony_ci memblock_discard(); 241762306a36Sopenharmony_ci 241862306a36Sopenharmony_ci for_each_node_state(nid, N_MEMORY) 241962306a36Sopenharmony_ci shuffle_free_memory(NODE_DATA(nid)); 242062306a36Sopenharmony_ci 242162306a36Sopenharmony_ci for_each_populated_zone(zone) 242262306a36Sopenharmony_ci set_zone_contiguous(zone); 242362306a36Sopenharmony_ci 242462306a36Sopenharmony_ci /* Initialize page ext after all struct pages are initialized. */ 242562306a36Sopenharmony_ci if (deferred_struct_pages) 242662306a36Sopenharmony_ci page_ext_init(); 242762306a36Sopenharmony_ci 242862306a36Sopenharmony_ci page_alloc_sysctl_init(); 242962306a36Sopenharmony_ci} 243062306a36Sopenharmony_ci 243162306a36Sopenharmony_ci#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES 243262306a36Sopenharmony_ci/* 243362306a36Sopenharmony_ci * Returns the number of pages that arch has reserved but 243462306a36Sopenharmony_ci * is not known to alloc_large_system_hash(). 243562306a36Sopenharmony_ci */ 243662306a36Sopenharmony_cistatic unsigned long __init arch_reserved_kernel_pages(void) 243762306a36Sopenharmony_ci{ 243862306a36Sopenharmony_ci return 0; 243962306a36Sopenharmony_ci} 244062306a36Sopenharmony_ci#endif 244162306a36Sopenharmony_ci 244262306a36Sopenharmony_ci/* 244362306a36Sopenharmony_ci * Adaptive scale is meant to reduce sizes of hash tables on large memory 244462306a36Sopenharmony_ci * machines. As memory size is increased the scale is also increased but at 244562306a36Sopenharmony_ci * slower pace. Starting from ADAPT_SCALE_BASE (64G), every time memory 244662306a36Sopenharmony_ci * quadruples the scale is increased by one, which means the size of hash table 244762306a36Sopenharmony_ci * only doubles, instead of quadrupling as well. 244862306a36Sopenharmony_ci * Because 32-bit systems cannot have large physical memory, where this scaling 244962306a36Sopenharmony_ci * makes sense, it is disabled on such platforms. 245062306a36Sopenharmony_ci */ 245162306a36Sopenharmony_ci#if __BITS_PER_LONG > 32 245262306a36Sopenharmony_ci#define ADAPT_SCALE_BASE (64ul << 30) 245362306a36Sopenharmony_ci#define ADAPT_SCALE_SHIFT 2 245462306a36Sopenharmony_ci#define ADAPT_SCALE_NPAGES (ADAPT_SCALE_BASE >> PAGE_SHIFT) 245562306a36Sopenharmony_ci#endif 245662306a36Sopenharmony_ci 245762306a36Sopenharmony_ci/* 245862306a36Sopenharmony_ci * allocate a large system hash table from bootmem 245962306a36Sopenharmony_ci * - it is assumed that the hash table must contain an exact power-of-2 246062306a36Sopenharmony_ci * quantity of entries 246162306a36Sopenharmony_ci * - limit is the number of hash buckets, not the total allocation size 246262306a36Sopenharmony_ci */ 246362306a36Sopenharmony_civoid *__init alloc_large_system_hash(const char *tablename, 246462306a36Sopenharmony_ci unsigned long bucketsize, 246562306a36Sopenharmony_ci unsigned long numentries, 246662306a36Sopenharmony_ci int scale, 246762306a36Sopenharmony_ci int flags, 246862306a36Sopenharmony_ci unsigned int *_hash_shift, 246962306a36Sopenharmony_ci unsigned int *_hash_mask, 247062306a36Sopenharmony_ci unsigned long low_limit, 247162306a36Sopenharmony_ci unsigned long high_limit) 247262306a36Sopenharmony_ci{ 247362306a36Sopenharmony_ci unsigned long long max = high_limit; 247462306a36Sopenharmony_ci unsigned long log2qty, size; 247562306a36Sopenharmony_ci void *table; 247662306a36Sopenharmony_ci gfp_t gfp_flags; 247762306a36Sopenharmony_ci bool virt; 247862306a36Sopenharmony_ci bool huge; 247962306a36Sopenharmony_ci 248062306a36Sopenharmony_ci /* allow the kernel cmdline to have a say */ 248162306a36Sopenharmony_ci if (!numentries) { 248262306a36Sopenharmony_ci /* round applicable memory size up to nearest megabyte */ 248362306a36Sopenharmony_ci numentries = nr_kernel_pages; 248462306a36Sopenharmony_ci numentries -= arch_reserved_kernel_pages(); 248562306a36Sopenharmony_ci 248662306a36Sopenharmony_ci /* It isn't necessary when PAGE_SIZE >= 1MB */ 248762306a36Sopenharmony_ci if (PAGE_SIZE < SZ_1M) 248862306a36Sopenharmony_ci numentries = round_up(numentries, SZ_1M / PAGE_SIZE); 248962306a36Sopenharmony_ci 249062306a36Sopenharmony_ci#if __BITS_PER_LONG > 32 249162306a36Sopenharmony_ci if (!high_limit) { 249262306a36Sopenharmony_ci unsigned long adapt; 249362306a36Sopenharmony_ci 249462306a36Sopenharmony_ci for (adapt = ADAPT_SCALE_NPAGES; adapt < numentries; 249562306a36Sopenharmony_ci adapt <<= ADAPT_SCALE_SHIFT) 249662306a36Sopenharmony_ci scale++; 249762306a36Sopenharmony_ci } 249862306a36Sopenharmony_ci#endif 249962306a36Sopenharmony_ci 250062306a36Sopenharmony_ci /* limit to 1 bucket per 2^scale bytes of low memory */ 250162306a36Sopenharmony_ci if (scale > PAGE_SHIFT) 250262306a36Sopenharmony_ci numentries >>= (scale - PAGE_SHIFT); 250362306a36Sopenharmony_ci else 250462306a36Sopenharmony_ci numentries <<= (PAGE_SHIFT - scale); 250562306a36Sopenharmony_ci 250662306a36Sopenharmony_ci if (unlikely((numentries * bucketsize) < PAGE_SIZE)) 250762306a36Sopenharmony_ci numentries = PAGE_SIZE / bucketsize; 250862306a36Sopenharmony_ci } 250962306a36Sopenharmony_ci numentries = roundup_pow_of_two(numentries); 251062306a36Sopenharmony_ci 251162306a36Sopenharmony_ci /* limit allocation size to 1/16 total memory by default */ 251262306a36Sopenharmony_ci if (max == 0) { 251362306a36Sopenharmony_ci max = ((unsigned long long)nr_all_pages << PAGE_SHIFT) >> 4; 251462306a36Sopenharmony_ci do_div(max, bucketsize); 251562306a36Sopenharmony_ci } 251662306a36Sopenharmony_ci max = min(max, 0x80000000ULL); 251762306a36Sopenharmony_ci 251862306a36Sopenharmony_ci if (numentries < low_limit) 251962306a36Sopenharmony_ci numentries = low_limit; 252062306a36Sopenharmony_ci if (numentries > max) 252162306a36Sopenharmony_ci numentries = max; 252262306a36Sopenharmony_ci 252362306a36Sopenharmony_ci log2qty = ilog2(numentries); 252462306a36Sopenharmony_ci 252562306a36Sopenharmony_ci gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC; 252662306a36Sopenharmony_ci do { 252762306a36Sopenharmony_ci virt = false; 252862306a36Sopenharmony_ci size = bucketsize << log2qty; 252962306a36Sopenharmony_ci if (flags & HASH_EARLY) { 253062306a36Sopenharmony_ci if (flags & HASH_ZERO) 253162306a36Sopenharmony_ci table = memblock_alloc(size, SMP_CACHE_BYTES); 253262306a36Sopenharmony_ci else 253362306a36Sopenharmony_ci table = memblock_alloc_raw(size, 253462306a36Sopenharmony_ci SMP_CACHE_BYTES); 253562306a36Sopenharmony_ci } else if (get_order(size) > MAX_ORDER || hashdist) { 253662306a36Sopenharmony_ci table = vmalloc_huge(size, gfp_flags); 253762306a36Sopenharmony_ci virt = true; 253862306a36Sopenharmony_ci if (table) 253962306a36Sopenharmony_ci huge = is_vm_area_hugepages(table); 254062306a36Sopenharmony_ci } else { 254162306a36Sopenharmony_ci /* 254262306a36Sopenharmony_ci * If bucketsize is not a power-of-two, we may free 254362306a36Sopenharmony_ci * some pages at the end of hash table which 254462306a36Sopenharmony_ci * alloc_pages_exact() automatically does 254562306a36Sopenharmony_ci */ 254662306a36Sopenharmony_ci table = alloc_pages_exact(size, gfp_flags); 254762306a36Sopenharmony_ci kmemleak_alloc(table, size, 1, gfp_flags); 254862306a36Sopenharmony_ci } 254962306a36Sopenharmony_ci } while (!table && size > PAGE_SIZE && --log2qty); 255062306a36Sopenharmony_ci 255162306a36Sopenharmony_ci if (!table) 255262306a36Sopenharmony_ci panic("Failed to allocate %s hash table\n", tablename); 255362306a36Sopenharmony_ci 255462306a36Sopenharmony_ci pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", 255562306a36Sopenharmony_ci tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, 255662306a36Sopenharmony_ci virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear"); 255762306a36Sopenharmony_ci 255862306a36Sopenharmony_ci if (_hash_shift) 255962306a36Sopenharmony_ci *_hash_shift = log2qty; 256062306a36Sopenharmony_ci if (_hash_mask) 256162306a36Sopenharmony_ci *_hash_mask = (1 << log2qty) - 1; 256262306a36Sopenharmony_ci 256362306a36Sopenharmony_ci return table; 256462306a36Sopenharmony_ci} 256562306a36Sopenharmony_ci 256662306a36Sopenharmony_ci/** 256762306a36Sopenharmony_ci * set_dma_reserve - set the specified number of pages reserved in the first zone 256862306a36Sopenharmony_ci * @new_dma_reserve: The number of pages to mark reserved 256962306a36Sopenharmony_ci * 257062306a36Sopenharmony_ci * The per-cpu batchsize and zone watermarks are determined by managed_pages. 257162306a36Sopenharmony_ci * In the DMA zone, a significant percentage may be consumed by kernel image 257262306a36Sopenharmony_ci * and other unfreeable allocations which can skew the watermarks badly. This 257362306a36Sopenharmony_ci * function may optionally be used to account for unfreeable pages in the 257462306a36Sopenharmony_ci * first zone (e.g., ZONE_DMA). The effect will be lower watermarks and 257562306a36Sopenharmony_ci * smaller per-cpu batchsize. 257662306a36Sopenharmony_ci */ 257762306a36Sopenharmony_civoid __init set_dma_reserve(unsigned long new_dma_reserve) 257862306a36Sopenharmony_ci{ 257962306a36Sopenharmony_ci dma_reserve = new_dma_reserve; 258062306a36Sopenharmony_ci} 258162306a36Sopenharmony_ci 258262306a36Sopenharmony_civoid __init memblock_free_pages(struct page *page, unsigned long pfn, 258362306a36Sopenharmony_ci unsigned int order) 258462306a36Sopenharmony_ci{ 258562306a36Sopenharmony_ci 258662306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) { 258762306a36Sopenharmony_ci int nid = early_pfn_to_nid(pfn); 258862306a36Sopenharmony_ci 258962306a36Sopenharmony_ci if (!early_page_initialised(pfn, nid)) 259062306a36Sopenharmony_ci return; 259162306a36Sopenharmony_ci } 259262306a36Sopenharmony_ci 259362306a36Sopenharmony_ci if (!kmsan_memblock_free_pages(page, order)) { 259462306a36Sopenharmony_ci /* KMSAN will take care of these pages. */ 259562306a36Sopenharmony_ci return; 259662306a36Sopenharmony_ci } 259762306a36Sopenharmony_ci __free_pages_core(page, order); 259862306a36Sopenharmony_ci} 259962306a36Sopenharmony_ci 260062306a36Sopenharmony_ciDEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc); 260162306a36Sopenharmony_ciEXPORT_SYMBOL(init_on_alloc); 260262306a36Sopenharmony_ci 260362306a36Sopenharmony_ciDEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); 260462306a36Sopenharmony_ciEXPORT_SYMBOL(init_on_free); 260562306a36Sopenharmony_ci 260662306a36Sopenharmony_cistatic bool _init_on_alloc_enabled_early __read_mostly 260762306a36Sopenharmony_ci = IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON); 260862306a36Sopenharmony_cistatic int __init early_init_on_alloc(char *buf) 260962306a36Sopenharmony_ci{ 261062306a36Sopenharmony_ci 261162306a36Sopenharmony_ci return kstrtobool(buf, &_init_on_alloc_enabled_early); 261262306a36Sopenharmony_ci} 261362306a36Sopenharmony_ciearly_param("init_on_alloc", early_init_on_alloc); 261462306a36Sopenharmony_ci 261562306a36Sopenharmony_cistatic bool _init_on_free_enabled_early __read_mostly 261662306a36Sopenharmony_ci = IS_ENABLED(CONFIG_INIT_ON_FREE_DEFAULT_ON); 261762306a36Sopenharmony_cistatic int __init early_init_on_free(char *buf) 261862306a36Sopenharmony_ci{ 261962306a36Sopenharmony_ci return kstrtobool(buf, &_init_on_free_enabled_early); 262062306a36Sopenharmony_ci} 262162306a36Sopenharmony_ciearly_param("init_on_free", early_init_on_free); 262262306a36Sopenharmony_ci 262362306a36Sopenharmony_ciDEFINE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled); 262462306a36Sopenharmony_ci 262562306a36Sopenharmony_ci/* 262662306a36Sopenharmony_ci * Enable static keys related to various memory debugging and hardening options. 262762306a36Sopenharmony_ci * Some override others, and depend on early params that are evaluated in the 262862306a36Sopenharmony_ci * order of appearance. So we need to first gather the full picture of what was 262962306a36Sopenharmony_ci * enabled, and then make decisions. 263062306a36Sopenharmony_ci */ 263162306a36Sopenharmony_cistatic void __init mem_debugging_and_hardening_init(void) 263262306a36Sopenharmony_ci{ 263362306a36Sopenharmony_ci bool page_poisoning_requested = false; 263462306a36Sopenharmony_ci bool want_check_pages = false; 263562306a36Sopenharmony_ci 263662306a36Sopenharmony_ci#ifdef CONFIG_PAGE_POISONING 263762306a36Sopenharmony_ci /* 263862306a36Sopenharmony_ci * Page poisoning is debug page alloc for some arches. If 263962306a36Sopenharmony_ci * either of those options are enabled, enable poisoning. 264062306a36Sopenharmony_ci */ 264162306a36Sopenharmony_ci if (page_poisoning_enabled() || 264262306a36Sopenharmony_ci (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && 264362306a36Sopenharmony_ci debug_pagealloc_enabled())) { 264462306a36Sopenharmony_ci static_branch_enable(&_page_poisoning_enabled); 264562306a36Sopenharmony_ci page_poisoning_requested = true; 264662306a36Sopenharmony_ci want_check_pages = true; 264762306a36Sopenharmony_ci } 264862306a36Sopenharmony_ci#endif 264962306a36Sopenharmony_ci 265062306a36Sopenharmony_ci if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) && 265162306a36Sopenharmony_ci page_poisoning_requested) { 265262306a36Sopenharmony_ci pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " 265362306a36Sopenharmony_ci "will take precedence over init_on_alloc and init_on_free\n"); 265462306a36Sopenharmony_ci _init_on_alloc_enabled_early = false; 265562306a36Sopenharmony_ci _init_on_free_enabled_early = false; 265662306a36Sopenharmony_ci } 265762306a36Sopenharmony_ci 265862306a36Sopenharmony_ci if (_init_on_alloc_enabled_early) { 265962306a36Sopenharmony_ci want_check_pages = true; 266062306a36Sopenharmony_ci static_branch_enable(&init_on_alloc); 266162306a36Sopenharmony_ci } else { 266262306a36Sopenharmony_ci static_branch_disable(&init_on_alloc); 266362306a36Sopenharmony_ci } 266462306a36Sopenharmony_ci 266562306a36Sopenharmony_ci if (_init_on_free_enabled_early) { 266662306a36Sopenharmony_ci want_check_pages = true; 266762306a36Sopenharmony_ci static_branch_enable(&init_on_free); 266862306a36Sopenharmony_ci } else { 266962306a36Sopenharmony_ci static_branch_disable(&init_on_free); 267062306a36Sopenharmony_ci } 267162306a36Sopenharmony_ci 267262306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_KMSAN) && 267362306a36Sopenharmony_ci (_init_on_alloc_enabled_early || _init_on_free_enabled_early)) 267462306a36Sopenharmony_ci pr_info("mem auto-init: please make sure init_on_alloc and init_on_free are disabled when running KMSAN\n"); 267562306a36Sopenharmony_ci 267662306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_PAGEALLOC 267762306a36Sopenharmony_ci if (debug_pagealloc_enabled()) { 267862306a36Sopenharmony_ci want_check_pages = true; 267962306a36Sopenharmony_ci static_branch_enable(&_debug_pagealloc_enabled); 268062306a36Sopenharmony_ci 268162306a36Sopenharmony_ci if (debug_guardpage_minorder()) 268262306a36Sopenharmony_ci static_branch_enable(&_debug_guardpage_enabled); 268362306a36Sopenharmony_ci } 268462306a36Sopenharmony_ci#endif 268562306a36Sopenharmony_ci 268662306a36Sopenharmony_ci /* 268762306a36Sopenharmony_ci * Any page debugging or hardening option also enables sanity checking 268862306a36Sopenharmony_ci * of struct pages being allocated or freed. With CONFIG_DEBUG_VM it's 268962306a36Sopenharmony_ci * enabled already. 269062306a36Sopenharmony_ci */ 269162306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_DEBUG_VM) && want_check_pages) 269262306a36Sopenharmony_ci static_branch_enable(&check_pages_enabled); 269362306a36Sopenharmony_ci} 269462306a36Sopenharmony_ci 269562306a36Sopenharmony_ci/* Report memory auto-initialization states for this boot. */ 269662306a36Sopenharmony_cistatic void __init report_meminit(void) 269762306a36Sopenharmony_ci{ 269862306a36Sopenharmony_ci const char *stack; 269962306a36Sopenharmony_ci 270062306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_INIT_STACK_ALL_PATTERN)) 270162306a36Sopenharmony_ci stack = "all(pattern)"; 270262306a36Sopenharmony_ci else if (IS_ENABLED(CONFIG_INIT_STACK_ALL_ZERO)) 270362306a36Sopenharmony_ci stack = "all(zero)"; 270462306a36Sopenharmony_ci else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL)) 270562306a36Sopenharmony_ci stack = "byref_all(zero)"; 270662306a36Sopenharmony_ci else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF)) 270762306a36Sopenharmony_ci stack = "byref(zero)"; 270862306a36Sopenharmony_ci else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_USER)) 270962306a36Sopenharmony_ci stack = "__user(zero)"; 271062306a36Sopenharmony_ci else 271162306a36Sopenharmony_ci stack = "off"; 271262306a36Sopenharmony_ci 271362306a36Sopenharmony_ci pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n", 271462306a36Sopenharmony_ci stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off", 271562306a36Sopenharmony_ci want_init_on_free() ? "on" : "off"); 271662306a36Sopenharmony_ci if (want_init_on_free()) 271762306a36Sopenharmony_ci pr_info("mem auto-init: clearing system memory may take some time...\n"); 271862306a36Sopenharmony_ci} 271962306a36Sopenharmony_ci 272062306a36Sopenharmony_cistatic void __init mem_init_print_info(void) 272162306a36Sopenharmony_ci{ 272262306a36Sopenharmony_ci unsigned long physpages, codesize, datasize, rosize, bss_size; 272362306a36Sopenharmony_ci unsigned long init_code_size, init_data_size; 272462306a36Sopenharmony_ci 272562306a36Sopenharmony_ci physpages = get_num_physpages(); 272662306a36Sopenharmony_ci codesize = _etext - _stext; 272762306a36Sopenharmony_ci datasize = _edata - _sdata; 272862306a36Sopenharmony_ci rosize = __end_rodata - __start_rodata; 272962306a36Sopenharmony_ci bss_size = __bss_stop - __bss_start; 273062306a36Sopenharmony_ci init_data_size = __init_end - __init_begin; 273162306a36Sopenharmony_ci init_code_size = _einittext - _sinittext; 273262306a36Sopenharmony_ci 273362306a36Sopenharmony_ci /* 273462306a36Sopenharmony_ci * Detect special cases and adjust section sizes accordingly: 273562306a36Sopenharmony_ci * 1) .init.* may be embedded into .data sections 273662306a36Sopenharmony_ci * 2) .init.text.* may be out of [__init_begin, __init_end], 273762306a36Sopenharmony_ci * please refer to arch/tile/kernel/vmlinux.lds.S. 273862306a36Sopenharmony_ci * 3) .rodata.* may be embedded into .text or .data sections. 273962306a36Sopenharmony_ci */ 274062306a36Sopenharmony_ci#define adj_init_size(start, end, size, pos, adj) \ 274162306a36Sopenharmony_ci do { \ 274262306a36Sopenharmony_ci if (&start[0] <= &pos[0] && &pos[0] < &end[0] && size > adj) \ 274362306a36Sopenharmony_ci size -= adj; \ 274462306a36Sopenharmony_ci } while (0) 274562306a36Sopenharmony_ci 274662306a36Sopenharmony_ci adj_init_size(__init_begin, __init_end, init_data_size, 274762306a36Sopenharmony_ci _sinittext, init_code_size); 274862306a36Sopenharmony_ci adj_init_size(_stext, _etext, codesize, _sinittext, init_code_size); 274962306a36Sopenharmony_ci adj_init_size(_sdata, _edata, datasize, __init_begin, init_data_size); 275062306a36Sopenharmony_ci adj_init_size(_stext, _etext, codesize, __start_rodata, rosize); 275162306a36Sopenharmony_ci adj_init_size(_sdata, _edata, datasize, __start_rodata, rosize); 275262306a36Sopenharmony_ci 275362306a36Sopenharmony_ci#undef adj_init_size 275462306a36Sopenharmony_ci 275562306a36Sopenharmony_ci pr_info("Memory: %luK/%luK available (%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved" 275662306a36Sopenharmony_ci#ifdef CONFIG_HIGHMEM 275762306a36Sopenharmony_ci ", %luK highmem" 275862306a36Sopenharmony_ci#endif 275962306a36Sopenharmony_ci ")\n", 276062306a36Sopenharmony_ci K(nr_free_pages()), K(physpages), 276162306a36Sopenharmony_ci codesize / SZ_1K, datasize / SZ_1K, rosize / SZ_1K, 276262306a36Sopenharmony_ci (init_data_size + init_code_size) / SZ_1K, bss_size / SZ_1K, 276362306a36Sopenharmony_ci K(physpages - totalram_pages() - totalcma_pages), 276462306a36Sopenharmony_ci K(totalcma_pages) 276562306a36Sopenharmony_ci#ifdef CONFIG_HIGHMEM 276662306a36Sopenharmony_ci , K(totalhigh_pages()) 276762306a36Sopenharmony_ci#endif 276862306a36Sopenharmony_ci ); 276962306a36Sopenharmony_ci} 277062306a36Sopenharmony_ci 277162306a36Sopenharmony_ci/* 277262306a36Sopenharmony_ci * Set up kernel memory allocators 277362306a36Sopenharmony_ci */ 277462306a36Sopenharmony_civoid __init mm_core_init(void) 277562306a36Sopenharmony_ci{ 277662306a36Sopenharmony_ci /* Initializations relying on SMP setup */ 277762306a36Sopenharmony_ci build_all_zonelists(NULL); 277862306a36Sopenharmony_ci page_alloc_init_cpuhp(); 277962306a36Sopenharmony_ci 278062306a36Sopenharmony_ci /* 278162306a36Sopenharmony_ci * page_ext requires contiguous pages, 278262306a36Sopenharmony_ci * bigger than MAX_ORDER unless SPARSEMEM. 278362306a36Sopenharmony_ci */ 278462306a36Sopenharmony_ci page_ext_init_flatmem(); 278562306a36Sopenharmony_ci mem_debugging_and_hardening_init(); 278662306a36Sopenharmony_ci kfence_alloc_pool_and_metadata(); 278762306a36Sopenharmony_ci report_meminit(); 278862306a36Sopenharmony_ci kmsan_init_shadow(); 278962306a36Sopenharmony_ci stack_depot_early_init(); 279062306a36Sopenharmony_ci mem_init(); 279162306a36Sopenharmony_ci mem_init_print_info(); 279262306a36Sopenharmony_ci kmem_cache_init(); 279362306a36Sopenharmony_ci /* 279462306a36Sopenharmony_ci * page_owner must be initialized after buddy is ready, and also after 279562306a36Sopenharmony_ci * slab is ready so that stack_depot_init() works properly 279662306a36Sopenharmony_ci */ 279762306a36Sopenharmony_ci page_ext_init_flatmem_late(); 279862306a36Sopenharmony_ci kmemleak_init(); 279962306a36Sopenharmony_ci ptlock_cache_init(); 280062306a36Sopenharmony_ci pgtable_cache_init(); 280162306a36Sopenharmony_ci debug_objects_mem_init(); 280262306a36Sopenharmony_ci vmalloc_init(); 280362306a36Sopenharmony_ci /* If no deferred init page_ext now, as vmap is fully initialized */ 280462306a36Sopenharmony_ci if (!deferred_struct_pages) 280562306a36Sopenharmony_ci page_ext_init(); 280662306a36Sopenharmony_ci /* Should be run before the first non-init thread is created */ 280762306a36Sopenharmony_ci init_espfix_bsp(); 280862306a36Sopenharmony_ci /* Should be run after espfix64 is set up. */ 280962306a36Sopenharmony_ci pti_init(); 281062306a36Sopenharmony_ci kmsan_init_runtime(); 281162306a36Sopenharmony_ci mm_cache_init(); 281262306a36Sopenharmony_ci} 2813