162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved. 462306a36Sopenharmony_ci * Copyright (c) 2001 Intel Corp. 562306a36Sopenharmony_ci * Copyright (c) 2001 Tony Luck <tony.luck@intel.com> 662306a36Sopenharmony_ci * Copyright (c) 2002 NEC Corp. 762306a36Sopenharmony_ci * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com> 862306a36Sopenharmony_ci * Copyright (c) 2004 Silicon Graphics, Inc 962306a36Sopenharmony_ci * Russ Anderson <rja@sgi.com> 1062306a36Sopenharmony_ci * Jesse Barnes <jbarnes@sgi.com> 1162306a36Sopenharmony_ci * Jack Steiner <steiner@sgi.com> 1262306a36Sopenharmony_ci */ 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci/* 1562306a36Sopenharmony_ci * Platform initialization for Discontig Memory 1662306a36Sopenharmony_ci */ 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include <linux/kernel.h> 1962306a36Sopenharmony_ci#include <linux/mm.h> 2062306a36Sopenharmony_ci#include <linux/nmi.h> 2162306a36Sopenharmony_ci#include <linux/swap.h> 2262306a36Sopenharmony_ci#include <linux/memblock.h> 2362306a36Sopenharmony_ci#include <linux/acpi.h> 2462306a36Sopenharmony_ci#include <linux/efi.h> 2562306a36Sopenharmony_ci#include <linux/nodemask.h> 2662306a36Sopenharmony_ci#include <linux/slab.h> 2762306a36Sopenharmony_ci#include <asm/efi.h> 2862306a36Sopenharmony_ci#include <asm/tlb.h> 2962306a36Sopenharmony_ci#include <asm/meminit.h> 3062306a36Sopenharmony_ci#include <asm/numa.h> 3162306a36Sopenharmony_ci#include <asm/sections.h> 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci/* 3462306a36Sopenharmony_ci * Track per-node information needed to setup the boot memory allocator, the 3562306a36Sopenharmony_ci * per-node areas, and the real VM. 3662306a36Sopenharmony_ci */ 3762306a36Sopenharmony_cistruct early_node_data { 3862306a36Sopenharmony_ci struct ia64_node_data *node_data; 3962306a36Sopenharmony_ci unsigned long pernode_addr; 4062306a36Sopenharmony_ci unsigned long pernode_size; 4162306a36Sopenharmony_ci unsigned long min_pfn; 4262306a36Sopenharmony_ci unsigned long max_pfn; 4362306a36Sopenharmony_ci}; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_cistatic struct early_node_data mem_data[MAX_NUMNODES] __initdata; 4662306a36Sopenharmony_cistatic nodemask_t memory_less_mask __initdata; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_cipg_data_t *pgdat_list[MAX_NUMNODES]; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci/* 5162306a36Sopenharmony_ci * To prevent cache aliasing effects, align per-node structures so that they 5262306a36Sopenharmony_ci * start at addresses that are strided by node number. 5362306a36Sopenharmony_ci */ 5462306a36Sopenharmony_ci#define MAX_NODE_ALIGN_OFFSET (32 * 1024 * 1024) 5562306a36Sopenharmony_ci#define NODEDATA_ALIGN(addr, node) \ 5662306a36Sopenharmony_ci ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + \ 5762306a36Sopenharmony_ci (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1))) 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci/** 6062306a36Sopenharmony_ci * build_node_maps - callback to setup mem_data structs for each node 6162306a36Sopenharmony_ci * @start: physical start of range 6262306a36Sopenharmony_ci * @len: length of range 6362306a36Sopenharmony_ci * @node: node where this range resides 6462306a36Sopenharmony_ci * 6562306a36Sopenharmony_ci * Detect extents of each piece of memory that we wish to 6662306a36Sopenharmony_ci * treat as a virtually contiguous block (i.e. each node). Each such block 6762306a36Sopenharmony_ci * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down 6862306a36Sopenharmony_ci * if necessary. Any non-existent pages will simply be part of the virtual 6962306a36Sopenharmony_ci * memmap. 7062306a36Sopenharmony_ci */ 7162306a36Sopenharmony_cistatic int __init build_node_maps(unsigned long start, unsigned long len, 7262306a36Sopenharmony_ci int node) 7362306a36Sopenharmony_ci{ 7462306a36Sopenharmony_ci unsigned long spfn, epfn, end = start + len; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; 7762306a36Sopenharmony_ci spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci if (!mem_data[node].min_pfn) { 8062306a36Sopenharmony_ci mem_data[node].min_pfn = spfn; 8162306a36Sopenharmony_ci mem_data[node].max_pfn = epfn; 8262306a36Sopenharmony_ci } else { 8362306a36Sopenharmony_ci mem_data[node].min_pfn = min(spfn, mem_data[node].min_pfn); 8462306a36Sopenharmony_ci mem_data[node].max_pfn = max(epfn, mem_data[node].max_pfn); 8562306a36Sopenharmony_ci } 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci return 0; 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci/** 9162306a36Sopenharmony_ci * early_nr_cpus_node - return number of cpus on a given node 9262306a36Sopenharmony_ci * @node: node to check 9362306a36Sopenharmony_ci * 9462306a36Sopenharmony_ci * Count the number of cpus on @node. We can't use nr_cpus_node() yet because 9562306a36Sopenharmony_ci * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been 9662306a36Sopenharmony_ci * called yet. Note that node 0 will also count all non-existent cpus. 9762306a36Sopenharmony_ci */ 9862306a36Sopenharmony_cistatic int early_nr_cpus_node(int node) 9962306a36Sopenharmony_ci{ 10062306a36Sopenharmony_ci int cpu, n = 0; 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci for_each_possible_early_cpu(cpu) 10362306a36Sopenharmony_ci if (node == node_cpuid[cpu].nid) 10462306a36Sopenharmony_ci n++; 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci return n; 10762306a36Sopenharmony_ci} 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci/** 11062306a36Sopenharmony_ci * compute_pernodesize - compute size of pernode data 11162306a36Sopenharmony_ci * @node: the node id. 11262306a36Sopenharmony_ci */ 11362306a36Sopenharmony_cistatic unsigned long compute_pernodesize(int node) 11462306a36Sopenharmony_ci{ 11562306a36Sopenharmony_ci unsigned long pernodesize = 0, cpus; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci cpus = early_nr_cpus_node(node); 11862306a36Sopenharmony_ci pernodesize += PERCPU_PAGE_SIZE * cpus; 11962306a36Sopenharmony_ci pernodesize += node * L1_CACHE_BYTES; 12062306a36Sopenharmony_ci pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); 12162306a36Sopenharmony_ci pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); 12262306a36Sopenharmony_ci pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); 12362306a36Sopenharmony_ci pernodesize = PAGE_ALIGN(pernodesize); 12462306a36Sopenharmony_ci return pernodesize; 12562306a36Sopenharmony_ci} 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci/** 12862306a36Sopenharmony_ci * per_cpu_node_setup - setup per-cpu areas on each node 12962306a36Sopenharmony_ci * @cpu_data: per-cpu area on this node 13062306a36Sopenharmony_ci * @node: node to setup 13162306a36Sopenharmony_ci * 13262306a36Sopenharmony_ci * Copy the static per-cpu data into the region we just set aside and then 13362306a36Sopenharmony_ci * setup __per_cpu_offset for each CPU on this node. Return a pointer to 13462306a36Sopenharmony_ci * the end of the area. 13562306a36Sopenharmony_ci */ 13662306a36Sopenharmony_cistatic void *per_cpu_node_setup(void *cpu_data, int node) 13762306a36Sopenharmony_ci{ 13862306a36Sopenharmony_ci#ifdef CONFIG_SMP 13962306a36Sopenharmony_ci int cpu; 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci for_each_possible_early_cpu(cpu) { 14262306a36Sopenharmony_ci void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci if (node != node_cpuid[cpu].nid) 14562306a36Sopenharmony_ci continue; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start); 14862306a36Sopenharmony_ci __per_cpu_offset[cpu] = (char *)__va(cpu_data) - 14962306a36Sopenharmony_ci __per_cpu_start; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci /* 15262306a36Sopenharmony_ci * percpu area for cpu0 is moved from the __init area 15362306a36Sopenharmony_ci * which is setup by head.S and used till this point. 15462306a36Sopenharmony_ci * Update ar.k3. This move is ensures that percpu 15562306a36Sopenharmony_ci * area for cpu0 is on the correct node and its 15662306a36Sopenharmony_ci * virtual address isn't insanely far from other 15762306a36Sopenharmony_ci * percpu areas which is important for congruent 15862306a36Sopenharmony_ci * percpu allocator. 15962306a36Sopenharmony_ci */ 16062306a36Sopenharmony_ci if (cpu == 0) 16162306a36Sopenharmony_ci ia64_set_kr(IA64_KR_PER_CPU_DATA, 16262306a36Sopenharmony_ci (unsigned long)cpu_data - 16362306a36Sopenharmony_ci (unsigned long)__per_cpu_start); 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci cpu_data += PERCPU_PAGE_SIZE; 16662306a36Sopenharmony_ci } 16762306a36Sopenharmony_ci#endif 16862306a36Sopenharmony_ci return cpu_data; 16962306a36Sopenharmony_ci} 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci#ifdef CONFIG_SMP 17262306a36Sopenharmony_ci/** 17362306a36Sopenharmony_ci * setup_per_cpu_areas - setup percpu areas 17462306a36Sopenharmony_ci * 17562306a36Sopenharmony_ci * Arch code has already allocated and initialized percpu areas. All 17662306a36Sopenharmony_ci * this function has to do is to teach the determined layout to the 17762306a36Sopenharmony_ci * dynamic percpu allocator, which happens to be more complex than 17862306a36Sopenharmony_ci * creating whole new ones using helpers. 17962306a36Sopenharmony_ci */ 18062306a36Sopenharmony_civoid __init setup_per_cpu_areas(void) 18162306a36Sopenharmony_ci{ 18262306a36Sopenharmony_ci struct pcpu_alloc_info *ai; 18362306a36Sopenharmony_ci struct pcpu_group_info *gi; 18462306a36Sopenharmony_ci unsigned int *cpu_map; 18562306a36Sopenharmony_ci void *base; 18662306a36Sopenharmony_ci unsigned long base_offset; 18762306a36Sopenharmony_ci unsigned int cpu; 18862306a36Sopenharmony_ci ssize_t static_size, reserved_size, dyn_size; 18962306a36Sopenharmony_ci int node, prev_node, unit, nr_units; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids); 19262306a36Sopenharmony_ci if (!ai) 19362306a36Sopenharmony_ci panic("failed to allocate pcpu_alloc_info"); 19462306a36Sopenharmony_ci cpu_map = ai->groups[0].cpu_map; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci /* determine base */ 19762306a36Sopenharmony_ci base = (void *)ULONG_MAX; 19862306a36Sopenharmony_ci for_each_possible_cpu(cpu) 19962306a36Sopenharmony_ci base = min(base, 20062306a36Sopenharmony_ci (void *)(__per_cpu_offset[cpu] + __per_cpu_start)); 20162306a36Sopenharmony_ci base_offset = (void *)__per_cpu_start - base; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci /* build cpu_map, units are grouped by node */ 20462306a36Sopenharmony_ci unit = 0; 20562306a36Sopenharmony_ci for_each_node(node) 20662306a36Sopenharmony_ci for_each_possible_cpu(cpu) 20762306a36Sopenharmony_ci if (node == node_cpuid[cpu].nid) 20862306a36Sopenharmony_ci cpu_map[unit++] = cpu; 20962306a36Sopenharmony_ci nr_units = unit; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci /* set basic parameters */ 21262306a36Sopenharmony_ci static_size = __per_cpu_end - __per_cpu_start; 21362306a36Sopenharmony_ci reserved_size = PERCPU_MODULE_RESERVE; 21462306a36Sopenharmony_ci dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; 21562306a36Sopenharmony_ci if (dyn_size < 0) 21662306a36Sopenharmony_ci panic("percpu area overflow static=%zd reserved=%zd\n", 21762306a36Sopenharmony_ci static_size, reserved_size); 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci ai->static_size = static_size; 22062306a36Sopenharmony_ci ai->reserved_size = reserved_size; 22162306a36Sopenharmony_ci ai->dyn_size = dyn_size; 22262306a36Sopenharmony_ci ai->unit_size = PERCPU_PAGE_SIZE; 22362306a36Sopenharmony_ci ai->atom_size = PAGE_SIZE; 22462306a36Sopenharmony_ci ai->alloc_size = PERCPU_PAGE_SIZE; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci /* 22762306a36Sopenharmony_ci * CPUs are put into groups according to node. Walk cpu_map 22862306a36Sopenharmony_ci * and create new groups at node boundaries. 22962306a36Sopenharmony_ci */ 23062306a36Sopenharmony_ci prev_node = NUMA_NO_NODE; 23162306a36Sopenharmony_ci ai->nr_groups = 0; 23262306a36Sopenharmony_ci for (unit = 0; unit < nr_units; unit++) { 23362306a36Sopenharmony_ci cpu = cpu_map[unit]; 23462306a36Sopenharmony_ci node = node_cpuid[cpu].nid; 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci if (node == prev_node) { 23762306a36Sopenharmony_ci gi->nr_units++; 23862306a36Sopenharmony_ci continue; 23962306a36Sopenharmony_ci } 24062306a36Sopenharmony_ci prev_node = node; 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci gi = &ai->groups[ai->nr_groups++]; 24362306a36Sopenharmony_ci gi->nr_units = 1; 24462306a36Sopenharmony_ci gi->base_offset = __per_cpu_offset[cpu] + base_offset; 24562306a36Sopenharmony_ci gi->cpu_map = &cpu_map[unit]; 24662306a36Sopenharmony_ci } 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci pcpu_setup_first_chunk(ai, base); 24962306a36Sopenharmony_ci pcpu_free_alloc_info(ai); 25062306a36Sopenharmony_ci} 25162306a36Sopenharmony_ci#endif 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci/** 25462306a36Sopenharmony_ci * fill_pernode - initialize pernode data. 25562306a36Sopenharmony_ci * @node: the node id. 25662306a36Sopenharmony_ci * @pernode: physical address of pernode data 25762306a36Sopenharmony_ci * @pernodesize: size of the pernode data 25862306a36Sopenharmony_ci */ 25962306a36Sopenharmony_cistatic void __init fill_pernode(int node, unsigned long pernode, 26062306a36Sopenharmony_ci unsigned long pernodesize) 26162306a36Sopenharmony_ci{ 26262306a36Sopenharmony_ci void *cpu_data; 26362306a36Sopenharmony_ci int cpus = early_nr_cpus_node(node); 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci mem_data[node].pernode_addr = pernode; 26662306a36Sopenharmony_ci mem_data[node].pernode_size = pernodesize; 26762306a36Sopenharmony_ci memset(__va(pernode), 0, pernodesize); 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci cpu_data = (void *)pernode; 27062306a36Sopenharmony_ci pernode += PERCPU_PAGE_SIZE * cpus; 27162306a36Sopenharmony_ci pernode += node * L1_CACHE_BYTES; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci pgdat_list[node] = __va(pernode); 27462306a36Sopenharmony_ci pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci mem_data[node].node_data = __va(pernode); 27762306a36Sopenharmony_ci pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); 27862306a36Sopenharmony_ci pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci cpu_data = per_cpu_node_setup(cpu_data, node); 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci return; 28362306a36Sopenharmony_ci} 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci/** 28662306a36Sopenharmony_ci * find_pernode_space - allocate memory for memory map and per-node structures 28762306a36Sopenharmony_ci * @start: physical start of range 28862306a36Sopenharmony_ci * @len: length of range 28962306a36Sopenharmony_ci * @node: node where this range resides 29062306a36Sopenharmony_ci * 29162306a36Sopenharmony_ci * This routine reserves space for the per-cpu data struct, the list of 29262306a36Sopenharmony_ci * pg_data_ts and the per-node data struct. Each node will have something like 29362306a36Sopenharmony_ci * the following in the first chunk of addr. space large enough to hold it. 29462306a36Sopenharmony_ci * 29562306a36Sopenharmony_ci * ________________________ 29662306a36Sopenharmony_ci * | | 29762306a36Sopenharmony_ci * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first 29862306a36Sopenharmony_ci * | PERCPU_PAGE_SIZE * | start and length big enough 29962306a36Sopenharmony_ci * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus. 30062306a36Sopenharmony_ci * |------------------------| 30162306a36Sopenharmony_ci * | local pg_data_t * | 30262306a36Sopenharmony_ci * |------------------------| 30362306a36Sopenharmony_ci * | local ia64_node_data | 30462306a36Sopenharmony_ci * |------------------------| 30562306a36Sopenharmony_ci * | ??? | 30662306a36Sopenharmony_ci * |________________________| 30762306a36Sopenharmony_ci * 30862306a36Sopenharmony_ci * Once this space has been set aside, the bootmem maps are initialized. We 30962306a36Sopenharmony_ci * could probably move the allocation of the per-cpu and ia64_node_data space 31062306a36Sopenharmony_ci * outside of this function and use alloc_bootmem_node(), but doing it here 31162306a36Sopenharmony_ci * is straightforward and we get the alignments we want so... 31262306a36Sopenharmony_ci */ 31362306a36Sopenharmony_cistatic int __init find_pernode_space(unsigned long start, unsigned long len, 31462306a36Sopenharmony_ci int node) 31562306a36Sopenharmony_ci{ 31662306a36Sopenharmony_ci unsigned long spfn, epfn; 31762306a36Sopenharmony_ci unsigned long pernodesize = 0, pernode; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci spfn = start >> PAGE_SHIFT; 32062306a36Sopenharmony_ci epfn = (start + len) >> PAGE_SHIFT; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci /* 32362306a36Sopenharmony_ci * Make sure this memory falls within this node's usable memory 32462306a36Sopenharmony_ci * since we may have thrown some away in build_maps(). 32562306a36Sopenharmony_ci */ 32662306a36Sopenharmony_ci if (spfn < mem_data[node].min_pfn || epfn > mem_data[node].max_pfn) 32762306a36Sopenharmony_ci return 0; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci /* Don't setup this node's local space twice... */ 33062306a36Sopenharmony_ci if (mem_data[node].pernode_addr) 33162306a36Sopenharmony_ci return 0; 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci /* 33462306a36Sopenharmony_ci * Calculate total size needed, incl. what's necessary 33562306a36Sopenharmony_ci * for good alignment and alias prevention. 33662306a36Sopenharmony_ci */ 33762306a36Sopenharmony_ci pernodesize = compute_pernodesize(node); 33862306a36Sopenharmony_ci pernode = NODEDATA_ALIGN(start, node); 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci /* Is this range big enough for what we want to store here? */ 34162306a36Sopenharmony_ci if (start + len > (pernode + pernodesize)) 34262306a36Sopenharmony_ci fill_pernode(node, pernode, pernodesize); 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci return 0; 34562306a36Sopenharmony_ci} 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci/** 34862306a36Sopenharmony_ci * reserve_pernode_space - reserve memory for per-node space 34962306a36Sopenharmony_ci * 35062306a36Sopenharmony_ci * Reserve the space used by the bootmem maps & per-node space in the boot 35162306a36Sopenharmony_ci * allocator so that when we actually create the real mem maps we don't 35262306a36Sopenharmony_ci * use their memory. 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_cistatic void __init reserve_pernode_space(void) 35562306a36Sopenharmony_ci{ 35662306a36Sopenharmony_ci unsigned long base, size; 35762306a36Sopenharmony_ci int node; 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci for_each_online_node(node) { 36062306a36Sopenharmony_ci if (node_isset(node, memory_less_mask)) 36162306a36Sopenharmony_ci continue; 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci /* Now the per-node space */ 36462306a36Sopenharmony_ci size = mem_data[node].pernode_size; 36562306a36Sopenharmony_ci base = __pa(mem_data[node].pernode_addr); 36662306a36Sopenharmony_ci memblock_reserve(base, size); 36762306a36Sopenharmony_ci } 36862306a36Sopenharmony_ci} 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_cistatic void scatter_node_data(void) 37162306a36Sopenharmony_ci{ 37262306a36Sopenharmony_ci pg_data_t **dst; 37362306a36Sopenharmony_ci int node; 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci /* 37662306a36Sopenharmony_ci * for_each_online_node() can't be used at here. 37762306a36Sopenharmony_ci * node_online_map is not set for hot-added nodes at this time, 37862306a36Sopenharmony_ci * because we are halfway through initialization of the new node's 37962306a36Sopenharmony_ci * structures. If for_each_online_node() is used, a new node's 38062306a36Sopenharmony_ci * pg_data_ptrs will be not initialized. Instead of using it, 38162306a36Sopenharmony_ci * pgdat_list[] is checked. 38262306a36Sopenharmony_ci */ 38362306a36Sopenharmony_ci for_each_node(node) { 38462306a36Sopenharmony_ci if (pgdat_list[node]) { 38562306a36Sopenharmony_ci dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs; 38662306a36Sopenharmony_ci memcpy(dst, pgdat_list, sizeof(pgdat_list)); 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci } 38962306a36Sopenharmony_ci} 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci/** 39262306a36Sopenharmony_ci * initialize_pernode_data - fixup per-cpu & per-node pointers 39362306a36Sopenharmony_ci * 39462306a36Sopenharmony_ci * Each node's per-node area has a copy of the global pg_data_t list, so 39562306a36Sopenharmony_ci * we copy that to each node here, as well as setting the per-cpu pointer 39662306a36Sopenharmony_ci * to the local node data structure. 39762306a36Sopenharmony_ci */ 39862306a36Sopenharmony_cistatic void __init initialize_pernode_data(void) 39962306a36Sopenharmony_ci{ 40062306a36Sopenharmony_ci int cpu, node; 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci scatter_node_data(); 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci#ifdef CONFIG_SMP 40562306a36Sopenharmony_ci /* Set the node_data pointer for each per-cpu struct */ 40662306a36Sopenharmony_ci for_each_possible_early_cpu(cpu) { 40762306a36Sopenharmony_ci node = node_cpuid[cpu].nid; 40862306a36Sopenharmony_ci per_cpu(ia64_cpu_info, cpu).node_data = 40962306a36Sopenharmony_ci mem_data[node].node_data; 41062306a36Sopenharmony_ci } 41162306a36Sopenharmony_ci#else 41262306a36Sopenharmony_ci { 41362306a36Sopenharmony_ci struct cpuinfo_ia64 *cpu0_cpu_info; 41462306a36Sopenharmony_ci cpu = 0; 41562306a36Sopenharmony_ci node = node_cpuid[cpu].nid; 41662306a36Sopenharmony_ci cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + 41762306a36Sopenharmony_ci ((char *)&ia64_cpu_info - __per_cpu_start)); 41862306a36Sopenharmony_ci cpu0_cpu_info->node_data = mem_data[node].node_data; 41962306a36Sopenharmony_ci } 42062306a36Sopenharmony_ci#endif /* CONFIG_SMP */ 42162306a36Sopenharmony_ci} 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci/** 42462306a36Sopenharmony_ci * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit 42562306a36Sopenharmony_ci * node but fall back to any other node when __alloc_bootmem_node fails 42662306a36Sopenharmony_ci * for best. 42762306a36Sopenharmony_ci * @nid: node id 42862306a36Sopenharmony_ci * @pernodesize: size of this node's pernode data 42962306a36Sopenharmony_ci */ 43062306a36Sopenharmony_cistatic void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) 43162306a36Sopenharmony_ci{ 43262306a36Sopenharmony_ci void *ptr = NULL; 43362306a36Sopenharmony_ci u8 best = 0xff; 43462306a36Sopenharmony_ci int bestnode = NUMA_NO_NODE, node, anynode = 0; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci for_each_online_node(node) { 43762306a36Sopenharmony_ci if (node_isset(node, memory_less_mask)) 43862306a36Sopenharmony_ci continue; 43962306a36Sopenharmony_ci else if (node_distance(nid, node) < best) { 44062306a36Sopenharmony_ci best = node_distance(nid, node); 44162306a36Sopenharmony_ci bestnode = node; 44262306a36Sopenharmony_ci } 44362306a36Sopenharmony_ci anynode = node; 44462306a36Sopenharmony_ci } 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci if (bestnode == NUMA_NO_NODE) 44762306a36Sopenharmony_ci bestnode = anynode; 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE, 45062306a36Sopenharmony_ci __pa(MAX_DMA_ADDRESS), 45162306a36Sopenharmony_ci MEMBLOCK_ALLOC_ACCESSIBLE, 45262306a36Sopenharmony_ci bestnode); 45362306a36Sopenharmony_ci if (!ptr) 45462306a36Sopenharmony_ci panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%lx\n", 45562306a36Sopenharmony_ci __func__, pernodesize, PERCPU_PAGE_SIZE, bestnode, 45662306a36Sopenharmony_ci __pa(MAX_DMA_ADDRESS)); 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci return ptr; 45962306a36Sopenharmony_ci} 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci/** 46262306a36Sopenharmony_ci * memory_less_nodes - allocate and initialize CPU only nodes pernode 46362306a36Sopenharmony_ci * information. 46462306a36Sopenharmony_ci */ 46562306a36Sopenharmony_cistatic void __init memory_less_nodes(void) 46662306a36Sopenharmony_ci{ 46762306a36Sopenharmony_ci unsigned long pernodesize; 46862306a36Sopenharmony_ci void *pernode; 46962306a36Sopenharmony_ci int node; 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci for_each_node_mask(node, memory_less_mask) { 47262306a36Sopenharmony_ci pernodesize = compute_pernodesize(node); 47362306a36Sopenharmony_ci pernode = memory_less_node_alloc(node, pernodesize); 47462306a36Sopenharmony_ci fill_pernode(node, __pa(pernode), pernodesize); 47562306a36Sopenharmony_ci } 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci return; 47862306a36Sopenharmony_ci} 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci/** 48162306a36Sopenharmony_ci * find_memory - walk the EFI memory map and setup the bootmem allocator 48262306a36Sopenharmony_ci * 48362306a36Sopenharmony_ci * Called early in boot to setup the bootmem allocator, and to 48462306a36Sopenharmony_ci * allocate the per-cpu and per-node structures. 48562306a36Sopenharmony_ci */ 48662306a36Sopenharmony_civoid __init find_memory(void) 48762306a36Sopenharmony_ci{ 48862306a36Sopenharmony_ci int node; 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci reserve_memory(); 49162306a36Sopenharmony_ci efi_memmap_walk(filter_memory, register_active_ranges); 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci if (num_online_nodes() == 0) { 49462306a36Sopenharmony_ci printk(KERN_ERR "node info missing!\n"); 49562306a36Sopenharmony_ci node_set_online(0); 49662306a36Sopenharmony_ci } 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci nodes_or(memory_less_mask, memory_less_mask, node_online_map); 49962306a36Sopenharmony_ci min_low_pfn = -1; 50062306a36Sopenharmony_ci max_low_pfn = 0; 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci /* These actually end up getting called by call_pernode_memory() */ 50362306a36Sopenharmony_ci efi_memmap_walk(filter_rsvd_memory, build_node_maps); 50462306a36Sopenharmony_ci efi_memmap_walk(filter_rsvd_memory, find_pernode_space); 50562306a36Sopenharmony_ci efi_memmap_walk(find_max_min_low_pfn, NULL); 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci for_each_online_node(node) 50862306a36Sopenharmony_ci if (mem_data[node].min_pfn) 50962306a36Sopenharmony_ci node_clear(node, memory_less_mask); 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci reserve_pernode_space(); 51262306a36Sopenharmony_ci memory_less_nodes(); 51362306a36Sopenharmony_ci initialize_pernode_data(); 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci max_pfn = max_low_pfn; 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci find_initrd(); 51862306a36Sopenharmony_ci} 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci#ifdef CONFIG_SMP 52162306a36Sopenharmony_ci/** 52262306a36Sopenharmony_ci * per_cpu_init - setup per-cpu variables 52362306a36Sopenharmony_ci * 52462306a36Sopenharmony_ci * find_pernode_space() does most of this already, we just need to set 52562306a36Sopenharmony_ci * local_per_cpu_offset 52662306a36Sopenharmony_ci */ 52762306a36Sopenharmony_civoid *per_cpu_init(void) 52862306a36Sopenharmony_ci{ 52962306a36Sopenharmony_ci int cpu; 53062306a36Sopenharmony_ci static int first_time = 1; 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci if (first_time) { 53362306a36Sopenharmony_ci first_time = 0; 53462306a36Sopenharmony_ci for_each_possible_early_cpu(cpu) 53562306a36Sopenharmony_ci per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; 53662306a36Sopenharmony_ci } 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; 53962306a36Sopenharmony_ci} 54062306a36Sopenharmony_ci#endif /* CONFIG_SMP */ 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci/** 54362306a36Sopenharmony_ci * call_pernode_memory - use SRAT to call callback functions with node info 54462306a36Sopenharmony_ci * @start: physical start of range 54562306a36Sopenharmony_ci * @len: length of range 54662306a36Sopenharmony_ci * @arg: function to call for each range 54762306a36Sopenharmony_ci * 54862306a36Sopenharmony_ci * efi_memmap_walk() knows nothing about layout of memory across nodes. Find 54962306a36Sopenharmony_ci * out to which node a block of memory belongs. Ignore memory that we cannot 55062306a36Sopenharmony_ci * identify, and split blocks that run across multiple nodes. 55162306a36Sopenharmony_ci * 55262306a36Sopenharmony_ci * Take this opportunity to round the start address up and the end address 55362306a36Sopenharmony_ci * down to page boundaries. 55462306a36Sopenharmony_ci */ 55562306a36Sopenharmony_civoid call_pernode_memory(unsigned long start, unsigned long len, void *arg) 55662306a36Sopenharmony_ci{ 55762306a36Sopenharmony_ci unsigned long rs, re, end = start + len; 55862306a36Sopenharmony_ci void (*func)(unsigned long, unsigned long, int); 55962306a36Sopenharmony_ci int i; 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci start = PAGE_ALIGN(start); 56262306a36Sopenharmony_ci end &= PAGE_MASK; 56362306a36Sopenharmony_ci if (start >= end) 56462306a36Sopenharmony_ci return; 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci func = arg; 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci if (!num_node_memblks) { 56962306a36Sopenharmony_ci /* No SRAT table, so assume one node (node 0) */ 57062306a36Sopenharmony_ci if (start < end) 57162306a36Sopenharmony_ci (*func)(start, end - start, 0); 57262306a36Sopenharmony_ci return; 57362306a36Sopenharmony_ci } 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci for (i = 0; i < num_node_memblks; i++) { 57662306a36Sopenharmony_ci rs = max(start, node_memblk[i].start_paddr); 57762306a36Sopenharmony_ci re = min(end, node_memblk[i].start_paddr + 57862306a36Sopenharmony_ci node_memblk[i].size); 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci if (rs < re) 58162306a36Sopenharmony_ci (*func)(rs, re - rs, node_memblk[i].nid); 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci if (re == end) 58462306a36Sopenharmony_ci break; 58562306a36Sopenharmony_ci } 58662306a36Sopenharmony_ci} 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_ci/** 58962306a36Sopenharmony_ci * paging_init - setup page tables 59062306a36Sopenharmony_ci * 59162306a36Sopenharmony_ci * paging_init() sets up the page tables for each node of the system and frees 59262306a36Sopenharmony_ci * the bootmem allocator memory for general use. 59362306a36Sopenharmony_ci */ 59462306a36Sopenharmony_civoid __init paging_init(void) 59562306a36Sopenharmony_ci{ 59662306a36Sopenharmony_ci unsigned long max_dma; 59762306a36Sopenharmony_ci unsigned long max_zone_pfns[MAX_NR_ZONES]; 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci sparse_init(); 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 60462306a36Sopenharmony_ci max_zone_pfns[ZONE_DMA32] = max_dma; 60562306a36Sopenharmony_ci max_zone_pfns[ZONE_NORMAL] = max_low_pfn; 60662306a36Sopenharmony_ci free_area_init(max_zone_pfns); 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); 60962306a36Sopenharmony_ci} 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_cipg_data_t * __init arch_alloc_nodedata(int nid) 61262306a36Sopenharmony_ci{ 61362306a36Sopenharmony_ci unsigned long size = compute_pernodesize(nid); 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci return memblock_alloc(size, SMP_CACHE_BYTES); 61662306a36Sopenharmony_ci} 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_civoid arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) 61962306a36Sopenharmony_ci{ 62062306a36Sopenharmony_ci pgdat_list[update_node] = update_pgdat; 62162306a36Sopenharmony_ci scatter_node_data(); 62262306a36Sopenharmony_ci} 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 62562306a36Sopenharmony_ciint __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 62662306a36Sopenharmony_ci struct vmem_altmap *altmap) 62762306a36Sopenharmony_ci{ 62862306a36Sopenharmony_ci return vmemmap_populate_basepages(start, end, node, NULL); 62962306a36Sopenharmony_ci} 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_civoid vmemmap_free(unsigned long start, unsigned long end, 63262306a36Sopenharmony_ci struct vmem_altmap *altmap) 63362306a36Sopenharmony_ci{ 63462306a36Sopenharmony_ci} 63562306a36Sopenharmony_ci#endif 636