18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved. 48c2ecf20Sopenharmony_ci * Copyright (c) 2001 Intel Corp. 58c2ecf20Sopenharmony_ci * Copyright (c) 2001 Tony Luck <tony.luck@intel.com> 68c2ecf20Sopenharmony_ci * Copyright (c) 2002 NEC Corp. 78c2ecf20Sopenharmony_ci * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com> 88c2ecf20Sopenharmony_ci * Copyright (c) 2004 Silicon Graphics, Inc 98c2ecf20Sopenharmony_ci * Russ Anderson <rja@sgi.com> 108c2ecf20Sopenharmony_ci * Jesse Barnes <jbarnes@sgi.com> 118c2ecf20Sopenharmony_ci * Jack Steiner <steiner@sgi.com> 128c2ecf20Sopenharmony_ci */ 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci/* 158c2ecf20Sopenharmony_ci * Platform initialization for Discontig Memory 168c2ecf20Sopenharmony_ci */ 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci#include <linux/kernel.h> 198c2ecf20Sopenharmony_ci#include <linux/mm.h> 208c2ecf20Sopenharmony_ci#include <linux/nmi.h> 218c2ecf20Sopenharmony_ci#include <linux/swap.h> 228c2ecf20Sopenharmony_ci#include <linux/memblock.h> 238c2ecf20Sopenharmony_ci#include <linux/acpi.h> 248c2ecf20Sopenharmony_ci#include <linux/efi.h> 258c2ecf20Sopenharmony_ci#include <linux/nodemask.h> 268c2ecf20Sopenharmony_ci#include <linux/slab.h> 278c2ecf20Sopenharmony_ci#include <asm/tlb.h> 288c2ecf20Sopenharmony_ci#include <asm/meminit.h> 298c2ecf20Sopenharmony_ci#include <asm/numa.h> 308c2ecf20Sopenharmony_ci#include <asm/sections.h> 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci/* 338c2ecf20Sopenharmony_ci * Track per-node information needed to setup the boot memory allocator, the 348c2ecf20Sopenharmony_ci * per-node areas, and the real VM. 358c2ecf20Sopenharmony_ci */ 368c2ecf20Sopenharmony_cistruct early_node_data { 378c2ecf20Sopenharmony_ci struct ia64_node_data *node_data; 388c2ecf20Sopenharmony_ci unsigned long pernode_addr; 398c2ecf20Sopenharmony_ci unsigned long pernode_size; 408c2ecf20Sopenharmony_ci unsigned long min_pfn; 418c2ecf20Sopenharmony_ci unsigned long max_pfn; 428c2ecf20Sopenharmony_ci}; 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_cistatic struct early_node_data mem_data[MAX_NUMNODES] __initdata; 458c2ecf20Sopenharmony_cistatic nodemask_t memory_less_mask __initdata; 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_cipg_data_t *pgdat_list[MAX_NUMNODES]; 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci/* 508c2ecf20Sopenharmony_ci * To prevent cache aliasing effects, align per-node structures so that they 518c2ecf20Sopenharmony_ci * start at addresses that are strided by node number. 528c2ecf20Sopenharmony_ci */ 538c2ecf20Sopenharmony_ci#define MAX_NODE_ALIGN_OFFSET (32 * 1024 * 1024) 548c2ecf20Sopenharmony_ci#define NODEDATA_ALIGN(addr, node) \ 558c2ecf20Sopenharmony_ci ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + \ 568c2ecf20Sopenharmony_ci (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1))) 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci/** 598c2ecf20Sopenharmony_ci * build_node_maps - callback to setup mem_data structs for each node 608c2ecf20Sopenharmony_ci * @start: physical start of range 618c2ecf20Sopenharmony_ci * @len: length of range 628c2ecf20Sopenharmony_ci * @node: node where this range resides 638c2ecf20Sopenharmony_ci * 648c2ecf20Sopenharmony_ci * Detect extents of each piece of memory that we wish to 658c2ecf20Sopenharmony_ci * treat as a virtually contiguous block (i.e. each node). Each such block 668c2ecf20Sopenharmony_ci * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down 678c2ecf20Sopenharmony_ci * if necessary. Any non-existent pages will simply be part of the virtual 688c2ecf20Sopenharmony_ci * memmap. 698c2ecf20Sopenharmony_ci */ 708c2ecf20Sopenharmony_cistatic int __init build_node_maps(unsigned long start, unsigned long len, 718c2ecf20Sopenharmony_ci int node) 728c2ecf20Sopenharmony_ci{ 738c2ecf20Sopenharmony_ci unsigned long spfn, epfn, end = start + len; 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; 768c2ecf20Sopenharmony_ci spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci if (!mem_data[node].min_pfn) { 798c2ecf20Sopenharmony_ci mem_data[node].min_pfn = spfn; 808c2ecf20Sopenharmony_ci mem_data[node].max_pfn = epfn; 818c2ecf20Sopenharmony_ci } else { 828c2ecf20Sopenharmony_ci mem_data[node].min_pfn = min(spfn, mem_data[node].min_pfn); 838c2ecf20Sopenharmony_ci mem_data[node].max_pfn = max(epfn, mem_data[node].max_pfn); 848c2ecf20Sopenharmony_ci } 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci return 0; 878c2ecf20Sopenharmony_ci} 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci/** 908c2ecf20Sopenharmony_ci * early_nr_cpus_node - return number of cpus on a given node 918c2ecf20Sopenharmony_ci * @node: node to check 928c2ecf20Sopenharmony_ci * 938c2ecf20Sopenharmony_ci * Count the number of cpus on @node. We can't use nr_cpus_node() yet because 948c2ecf20Sopenharmony_ci * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been 958c2ecf20Sopenharmony_ci * called yet. Note that node 0 will also count all non-existent cpus. 968c2ecf20Sopenharmony_ci */ 978c2ecf20Sopenharmony_cistatic int early_nr_cpus_node(int node) 988c2ecf20Sopenharmony_ci{ 998c2ecf20Sopenharmony_ci int cpu, n = 0; 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci for_each_possible_early_cpu(cpu) 1028c2ecf20Sopenharmony_ci if (node == node_cpuid[cpu].nid) 1038c2ecf20Sopenharmony_ci n++; 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci return n; 1068c2ecf20Sopenharmony_ci} 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci/** 1098c2ecf20Sopenharmony_ci * compute_pernodesize - compute size of pernode data 1108c2ecf20Sopenharmony_ci * @node: the node id. 1118c2ecf20Sopenharmony_ci */ 1128c2ecf20Sopenharmony_cistatic unsigned long compute_pernodesize(int node) 1138c2ecf20Sopenharmony_ci{ 1148c2ecf20Sopenharmony_ci unsigned long pernodesize = 0, cpus; 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci cpus = early_nr_cpus_node(node); 1178c2ecf20Sopenharmony_ci pernodesize += PERCPU_PAGE_SIZE * cpus; 1188c2ecf20Sopenharmony_ci pernodesize += node * L1_CACHE_BYTES; 1198c2ecf20Sopenharmony_ci pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); 1208c2ecf20Sopenharmony_ci pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); 1218c2ecf20Sopenharmony_ci pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); 1228c2ecf20Sopenharmony_ci pernodesize = PAGE_ALIGN(pernodesize); 1238c2ecf20Sopenharmony_ci return pernodesize; 1248c2ecf20Sopenharmony_ci} 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci/** 1278c2ecf20Sopenharmony_ci * per_cpu_node_setup - setup per-cpu areas on each node 1288c2ecf20Sopenharmony_ci * @cpu_data: per-cpu area on this node 1298c2ecf20Sopenharmony_ci * @node: node to setup 1308c2ecf20Sopenharmony_ci * 1318c2ecf20Sopenharmony_ci * Copy the static per-cpu data into the region we just set aside and then 1328c2ecf20Sopenharmony_ci * setup __per_cpu_offset for each CPU on this node. Return a pointer to 1338c2ecf20Sopenharmony_ci * the end of the area. 1348c2ecf20Sopenharmony_ci */ 1358c2ecf20Sopenharmony_cistatic void *per_cpu_node_setup(void *cpu_data, int node) 1368c2ecf20Sopenharmony_ci{ 1378c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 1388c2ecf20Sopenharmony_ci int cpu; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci for_each_possible_early_cpu(cpu) { 1418c2ecf20Sopenharmony_ci void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start; 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci if (node != node_cpuid[cpu].nid) 1448c2ecf20Sopenharmony_ci continue; 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start); 1478c2ecf20Sopenharmony_ci __per_cpu_offset[cpu] = (char *)__va(cpu_data) - 1488c2ecf20Sopenharmony_ci __per_cpu_start; 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci /* 1518c2ecf20Sopenharmony_ci * percpu area for cpu0 is moved from the __init area 1528c2ecf20Sopenharmony_ci * which is setup by head.S and used till this point. 1538c2ecf20Sopenharmony_ci * Update ar.k3. This move is ensures that percpu 1548c2ecf20Sopenharmony_ci * area for cpu0 is on the correct node and its 1558c2ecf20Sopenharmony_ci * virtual address isn't insanely far from other 1568c2ecf20Sopenharmony_ci * percpu areas which is important for congruent 1578c2ecf20Sopenharmony_ci * percpu allocator. 1588c2ecf20Sopenharmony_ci */ 1598c2ecf20Sopenharmony_ci if (cpu == 0) 1608c2ecf20Sopenharmony_ci ia64_set_kr(IA64_KR_PER_CPU_DATA, 1618c2ecf20Sopenharmony_ci (unsigned long)cpu_data - 1628c2ecf20Sopenharmony_ci (unsigned long)__per_cpu_start); 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci cpu_data += PERCPU_PAGE_SIZE; 1658c2ecf20Sopenharmony_ci } 1668c2ecf20Sopenharmony_ci#endif 1678c2ecf20Sopenharmony_ci return cpu_data; 1688c2ecf20Sopenharmony_ci} 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 1718c2ecf20Sopenharmony_ci/** 1728c2ecf20Sopenharmony_ci * setup_per_cpu_areas - setup percpu areas 1738c2ecf20Sopenharmony_ci * 1748c2ecf20Sopenharmony_ci * Arch code has already allocated and initialized percpu areas. All 1758c2ecf20Sopenharmony_ci * this function has to do is to teach the determined layout to the 1768c2ecf20Sopenharmony_ci * dynamic percpu allocator, which happens to be more complex than 1778c2ecf20Sopenharmony_ci * creating whole new ones using helpers. 1788c2ecf20Sopenharmony_ci */ 1798c2ecf20Sopenharmony_civoid __init setup_per_cpu_areas(void) 1808c2ecf20Sopenharmony_ci{ 1818c2ecf20Sopenharmony_ci struct pcpu_alloc_info *ai; 1828c2ecf20Sopenharmony_ci struct pcpu_group_info *gi; 1838c2ecf20Sopenharmony_ci unsigned int *cpu_map; 1848c2ecf20Sopenharmony_ci void *base; 1858c2ecf20Sopenharmony_ci unsigned long base_offset; 1868c2ecf20Sopenharmony_ci unsigned int cpu; 1878c2ecf20Sopenharmony_ci ssize_t static_size, reserved_size, dyn_size; 1888c2ecf20Sopenharmony_ci int node, prev_node, unit, nr_units; 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids); 1918c2ecf20Sopenharmony_ci if (!ai) 1928c2ecf20Sopenharmony_ci panic("failed to allocate pcpu_alloc_info"); 1938c2ecf20Sopenharmony_ci cpu_map = ai->groups[0].cpu_map; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci /* determine base */ 1968c2ecf20Sopenharmony_ci base = (void *)ULONG_MAX; 1978c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) 1988c2ecf20Sopenharmony_ci base = min(base, 1998c2ecf20Sopenharmony_ci (void *)(__per_cpu_offset[cpu] + __per_cpu_start)); 2008c2ecf20Sopenharmony_ci base_offset = (void *)__per_cpu_start - base; 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci /* build cpu_map, units are grouped by node */ 2038c2ecf20Sopenharmony_ci unit = 0; 2048c2ecf20Sopenharmony_ci for_each_node(node) 2058c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) 2068c2ecf20Sopenharmony_ci if (node == node_cpuid[cpu].nid) 2078c2ecf20Sopenharmony_ci cpu_map[unit++] = cpu; 2088c2ecf20Sopenharmony_ci nr_units = unit; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci /* set basic parameters */ 2118c2ecf20Sopenharmony_ci static_size = __per_cpu_end - __per_cpu_start; 2128c2ecf20Sopenharmony_ci reserved_size = PERCPU_MODULE_RESERVE; 2138c2ecf20Sopenharmony_ci dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; 2148c2ecf20Sopenharmony_ci if (dyn_size < 0) 2158c2ecf20Sopenharmony_ci panic("percpu area overflow static=%zd reserved=%zd\n", 2168c2ecf20Sopenharmony_ci static_size, reserved_size); 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci ai->static_size = static_size; 2198c2ecf20Sopenharmony_ci ai->reserved_size = reserved_size; 2208c2ecf20Sopenharmony_ci ai->dyn_size = dyn_size; 2218c2ecf20Sopenharmony_ci ai->unit_size = PERCPU_PAGE_SIZE; 2228c2ecf20Sopenharmony_ci ai->atom_size = PAGE_SIZE; 2238c2ecf20Sopenharmony_ci ai->alloc_size = PERCPU_PAGE_SIZE; 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci /* 2268c2ecf20Sopenharmony_ci * CPUs are put into groups according to node. Walk cpu_map 2278c2ecf20Sopenharmony_ci * and create new groups at node boundaries. 2288c2ecf20Sopenharmony_ci */ 2298c2ecf20Sopenharmony_ci prev_node = NUMA_NO_NODE; 2308c2ecf20Sopenharmony_ci ai->nr_groups = 0; 2318c2ecf20Sopenharmony_ci for (unit = 0; unit < nr_units; unit++) { 2328c2ecf20Sopenharmony_ci cpu = cpu_map[unit]; 2338c2ecf20Sopenharmony_ci node = node_cpuid[cpu].nid; 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci if (node == prev_node) { 2368c2ecf20Sopenharmony_ci gi->nr_units++; 2378c2ecf20Sopenharmony_ci continue; 2388c2ecf20Sopenharmony_ci } 2398c2ecf20Sopenharmony_ci prev_node = node; 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci gi = &ai->groups[ai->nr_groups++]; 2428c2ecf20Sopenharmony_ci gi->nr_units = 1; 2438c2ecf20Sopenharmony_ci gi->base_offset = __per_cpu_offset[cpu] + base_offset; 2448c2ecf20Sopenharmony_ci gi->cpu_map = &cpu_map[unit]; 2458c2ecf20Sopenharmony_ci } 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci pcpu_setup_first_chunk(ai, base); 2488c2ecf20Sopenharmony_ci pcpu_free_alloc_info(ai); 2498c2ecf20Sopenharmony_ci} 2508c2ecf20Sopenharmony_ci#endif 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci/** 2538c2ecf20Sopenharmony_ci * fill_pernode - initialize pernode data. 2548c2ecf20Sopenharmony_ci * @node: the node id. 2558c2ecf20Sopenharmony_ci * @pernode: physical address of pernode data 2568c2ecf20Sopenharmony_ci * @pernodesize: size of the pernode data 2578c2ecf20Sopenharmony_ci */ 2588c2ecf20Sopenharmony_cistatic void __init fill_pernode(int node, unsigned long pernode, 2598c2ecf20Sopenharmony_ci unsigned long pernodesize) 2608c2ecf20Sopenharmony_ci{ 2618c2ecf20Sopenharmony_ci void *cpu_data; 2628c2ecf20Sopenharmony_ci int cpus = early_nr_cpus_node(node); 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci mem_data[node].pernode_addr = pernode; 2658c2ecf20Sopenharmony_ci mem_data[node].pernode_size = pernodesize; 2668c2ecf20Sopenharmony_ci memset(__va(pernode), 0, pernodesize); 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci cpu_data = (void *)pernode; 2698c2ecf20Sopenharmony_ci pernode += PERCPU_PAGE_SIZE * cpus; 2708c2ecf20Sopenharmony_ci pernode += node * L1_CACHE_BYTES; 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci pgdat_list[node] = __va(pernode); 2738c2ecf20Sopenharmony_ci pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_ci mem_data[node].node_data = __va(pernode); 2768c2ecf20Sopenharmony_ci pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); 2778c2ecf20Sopenharmony_ci pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci cpu_data = per_cpu_node_setup(cpu_data, node); 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci return; 2828c2ecf20Sopenharmony_ci} 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci/** 2858c2ecf20Sopenharmony_ci * find_pernode_space - allocate memory for memory map and per-node structures 2868c2ecf20Sopenharmony_ci * @start: physical start of range 2878c2ecf20Sopenharmony_ci * @len: length of range 2888c2ecf20Sopenharmony_ci * @node: node where this range resides 2898c2ecf20Sopenharmony_ci * 2908c2ecf20Sopenharmony_ci * This routine reserves space for the per-cpu data struct, the list of 2918c2ecf20Sopenharmony_ci * pg_data_ts and the per-node data struct. Each node will have something like 2928c2ecf20Sopenharmony_ci * the following in the first chunk of addr. space large enough to hold it. 2938c2ecf20Sopenharmony_ci * 2948c2ecf20Sopenharmony_ci * ________________________ 2958c2ecf20Sopenharmony_ci * | | 2968c2ecf20Sopenharmony_ci * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first 2978c2ecf20Sopenharmony_ci * | PERCPU_PAGE_SIZE * | start and length big enough 2988c2ecf20Sopenharmony_ci * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus. 2998c2ecf20Sopenharmony_ci * |------------------------| 3008c2ecf20Sopenharmony_ci * | local pg_data_t * | 3018c2ecf20Sopenharmony_ci * |------------------------| 3028c2ecf20Sopenharmony_ci * | local ia64_node_data | 3038c2ecf20Sopenharmony_ci * |------------------------| 3048c2ecf20Sopenharmony_ci * | ??? | 3058c2ecf20Sopenharmony_ci * |________________________| 3068c2ecf20Sopenharmony_ci * 3078c2ecf20Sopenharmony_ci * Once this space has been set aside, the bootmem maps are initialized. We 3088c2ecf20Sopenharmony_ci * could probably move the allocation of the per-cpu and ia64_node_data space 3098c2ecf20Sopenharmony_ci * outside of this function and use alloc_bootmem_node(), but doing it here 3108c2ecf20Sopenharmony_ci * is straightforward and we get the alignments we want so... 3118c2ecf20Sopenharmony_ci */ 3128c2ecf20Sopenharmony_cistatic int __init find_pernode_space(unsigned long start, unsigned long len, 3138c2ecf20Sopenharmony_ci int node) 3148c2ecf20Sopenharmony_ci{ 3158c2ecf20Sopenharmony_ci unsigned long spfn, epfn; 3168c2ecf20Sopenharmony_ci unsigned long pernodesize = 0, pernode; 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci spfn = start >> PAGE_SHIFT; 3198c2ecf20Sopenharmony_ci epfn = (start + len) >> PAGE_SHIFT; 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci /* 3228c2ecf20Sopenharmony_ci * Make sure this memory falls within this node's usable memory 3238c2ecf20Sopenharmony_ci * since we may have thrown some away in build_maps(). 3248c2ecf20Sopenharmony_ci */ 3258c2ecf20Sopenharmony_ci if (spfn < mem_data[node].min_pfn || epfn > mem_data[node].max_pfn) 3268c2ecf20Sopenharmony_ci return 0; 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci /* Don't setup this node's local space twice... */ 3298c2ecf20Sopenharmony_ci if (mem_data[node].pernode_addr) 3308c2ecf20Sopenharmony_ci return 0; 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci /* 3338c2ecf20Sopenharmony_ci * Calculate total size needed, incl. what's necessary 3348c2ecf20Sopenharmony_ci * for good alignment and alias prevention. 3358c2ecf20Sopenharmony_ci */ 3368c2ecf20Sopenharmony_ci pernodesize = compute_pernodesize(node); 3378c2ecf20Sopenharmony_ci pernode = NODEDATA_ALIGN(start, node); 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci /* Is this range big enough for what we want to store here? */ 3408c2ecf20Sopenharmony_ci if (start + len > (pernode + pernodesize)) 3418c2ecf20Sopenharmony_ci fill_pernode(node, pernode, pernodesize); 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci return 0; 3448c2ecf20Sopenharmony_ci} 3458c2ecf20Sopenharmony_ci 3468c2ecf20Sopenharmony_ci/** 3478c2ecf20Sopenharmony_ci * reserve_pernode_space - reserve memory for per-node space 3488c2ecf20Sopenharmony_ci * 3498c2ecf20Sopenharmony_ci * Reserve the space used by the bootmem maps & per-node space in the boot 3508c2ecf20Sopenharmony_ci * allocator so that when we actually create the real mem maps we don't 3518c2ecf20Sopenharmony_ci * use their memory. 3528c2ecf20Sopenharmony_ci */ 3538c2ecf20Sopenharmony_cistatic void __init reserve_pernode_space(void) 3548c2ecf20Sopenharmony_ci{ 3558c2ecf20Sopenharmony_ci unsigned long base, size; 3568c2ecf20Sopenharmony_ci int node; 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci for_each_online_node(node) { 3598c2ecf20Sopenharmony_ci if (node_isset(node, memory_less_mask)) 3608c2ecf20Sopenharmony_ci continue; 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci /* Now the per-node space */ 3638c2ecf20Sopenharmony_ci size = mem_data[node].pernode_size; 3648c2ecf20Sopenharmony_ci base = __pa(mem_data[node].pernode_addr); 3658c2ecf20Sopenharmony_ci memblock_reserve(base, size); 3668c2ecf20Sopenharmony_ci } 3678c2ecf20Sopenharmony_ci} 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_cistatic void scatter_node_data(void) 3708c2ecf20Sopenharmony_ci{ 3718c2ecf20Sopenharmony_ci pg_data_t **dst; 3728c2ecf20Sopenharmony_ci int node; 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ci /* 3758c2ecf20Sopenharmony_ci * for_each_online_node() can't be used at here. 3768c2ecf20Sopenharmony_ci * node_online_map is not set for hot-added nodes at this time, 3778c2ecf20Sopenharmony_ci * because we are halfway through initialization of the new node's 3788c2ecf20Sopenharmony_ci * structures. If for_each_online_node() is used, a new node's 3798c2ecf20Sopenharmony_ci * pg_data_ptrs will be not initialized. Instead of using it, 3808c2ecf20Sopenharmony_ci * pgdat_list[] is checked. 3818c2ecf20Sopenharmony_ci */ 3828c2ecf20Sopenharmony_ci for_each_node(node) { 3838c2ecf20Sopenharmony_ci if (pgdat_list[node]) { 3848c2ecf20Sopenharmony_ci dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs; 3858c2ecf20Sopenharmony_ci memcpy(dst, pgdat_list, sizeof(pgdat_list)); 3868c2ecf20Sopenharmony_ci } 3878c2ecf20Sopenharmony_ci } 3888c2ecf20Sopenharmony_ci} 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci/** 3918c2ecf20Sopenharmony_ci * initialize_pernode_data - fixup per-cpu & per-node pointers 3928c2ecf20Sopenharmony_ci * 3938c2ecf20Sopenharmony_ci * Each node's per-node area has a copy of the global pg_data_t list, so 3948c2ecf20Sopenharmony_ci * we copy that to each node here, as well as setting the per-cpu pointer 3958c2ecf20Sopenharmony_ci * to the local node data structure. 3968c2ecf20Sopenharmony_ci */ 3978c2ecf20Sopenharmony_cistatic void __init initialize_pernode_data(void) 3988c2ecf20Sopenharmony_ci{ 3998c2ecf20Sopenharmony_ci int cpu, node; 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_ci scatter_node_data(); 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 4048c2ecf20Sopenharmony_ci /* Set the node_data pointer for each per-cpu struct */ 4058c2ecf20Sopenharmony_ci for_each_possible_early_cpu(cpu) { 4068c2ecf20Sopenharmony_ci node = node_cpuid[cpu].nid; 4078c2ecf20Sopenharmony_ci per_cpu(ia64_cpu_info, cpu).node_data = 4088c2ecf20Sopenharmony_ci mem_data[node].node_data; 4098c2ecf20Sopenharmony_ci } 4108c2ecf20Sopenharmony_ci#else 4118c2ecf20Sopenharmony_ci { 4128c2ecf20Sopenharmony_ci struct cpuinfo_ia64 *cpu0_cpu_info; 4138c2ecf20Sopenharmony_ci cpu = 0; 4148c2ecf20Sopenharmony_ci node = node_cpuid[cpu].nid; 4158c2ecf20Sopenharmony_ci cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + 4168c2ecf20Sopenharmony_ci ((char *)&ia64_cpu_info - __per_cpu_start)); 4178c2ecf20Sopenharmony_ci cpu0_cpu_info->node_data = mem_data[node].node_data; 4188c2ecf20Sopenharmony_ci } 4198c2ecf20Sopenharmony_ci#endif /* CONFIG_SMP */ 4208c2ecf20Sopenharmony_ci} 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci/** 4238c2ecf20Sopenharmony_ci * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit 4248c2ecf20Sopenharmony_ci * node but fall back to any other node when __alloc_bootmem_node fails 4258c2ecf20Sopenharmony_ci * for best. 4268c2ecf20Sopenharmony_ci * @nid: node id 4278c2ecf20Sopenharmony_ci * @pernodesize: size of this node's pernode data 4288c2ecf20Sopenharmony_ci */ 4298c2ecf20Sopenharmony_cistatic void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) 4308c2ecf20Sopenharmony_ci{ 4318c2ecf20Sopenharmony_ci void *ptr = NULL; 4328c2ecf20Sopenharmony_ci u8 best = 0xff; 4338c2ecf20Sopenharmony_ci int bestnode = NUMA_NO_NODE, node, anynode = 0; 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci for_each_online_node(node) { 4368c2ecf20Sopenharmony_ci if (node_isset(node, memory_less_mask)) 4378c2ecf20Sopenharmony_ci continue; 4388c2ecf20Sopenharmony_ci else if (node_distance(nid, node) < best) { 4398c2ecf20Sopenharmony_ci best = node_distance(nid, node); 4408c2ecf20Sopenharmony_ci bestnode = node; 4418c2ecf20Sopenharmony_ci } 4428c2ecf20Sopenharmony_ci anynode = node; 4438c2ecf20Sopenharmony_ci } 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci if (bestnode == NUMA_NO_NODE) 4468c2ecf20Sopenharmony_ci bestnode = anynode; 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE, 4498c2ecf20Sopenharmony_ci __pa(MAX_DMA_ADDRESS), 4508c2ecf20Sopenharmony_ci MEMBLOCK_ALLOC_ACCESSIBLE, 4518c2ecf20Sopenharmony_ci bestnode); 4528c2ecf20Sopenharmony_ci if (!ptr) 4538c2ecf20Sopenharmony_ci panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%lx\n", 4548c2ecf20Sopenharmony_ci __func__, pernodesize, PERCPU_PAGE_SIZE, bestnode, 4558c2ecf20Sopenharmony_ci __pa(MAX_DMA_ADDRESS)); 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci return ptr; 4588c2ecf20Sopenharmony_ci} 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_ci/** 4618c2ecf20Sopenharmony_ci * memory_less_nodes - allocate and initialize CPU only nodes pernode 4628c2ecf20Sopenharmony_ci * information. 4638c2ecf20Sopenharmony_ci */ 4648c2ecf20Sopenharmony_cistatic void __init memory_less_nodes(void) 4658c2ecf20Sopenharmony_ci{ 4668c2ecf20Sopenharmony_ci unsigned long pernodesize; 4678c2ecf20Sopenharmony_ci void *pernode; 4688c2ecf20Sopenharmony_ci int node; 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci for_each_node_mask(node, memory_less_mask) { 4718c2ecf20Sopenharmony_ci pernodesize = compute_pernodesize(node); 4728c2ecf20Sopenharmony_ci pernode = memory_less_node_alloc(node, pernodesize); 4738c2ecf20Sopenharmony_ci fill_pernode(node, __pa(pernode), pernodesize); 4748c2ecf20Sopenharmony_ci } 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci return; 4778c2ecf20Sopenharmony_ci} 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci/** 4808c2ecf20Sopenharmony_ci * find_memory - walk the EFI memory map and setup the bootmem allocator 4818c2ecf20Sopenharmony_ci * 4828c2ecf20Sopenharmony_ci * Called early in boot to setup the bootmem allocator, and to 4838c2ecf20Sopenharmony_ci * allocate the per-cpu and per-node structures. 4848c2ecf20Sopenharmony_ci */ 4858c2ecf20Sopenharmony_civoid __init find_memory(void) 4868c2ecf20Sopenharmony_ci{ 4878c2ecf20Sopenharmony_ci int node; 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci reserve_memory(); 4908c2ecf20Sopenharmony_ci efi_memmap_walk(filter_memory, register_active_ranges); 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci if (num_online_nodes() == 0) { 4938c2ecf20Sopenharmony_ci printk(KERN_ERR "node info missing!\n"); 4948c2ecf20Sopenharmony_ci node_set_online(0); 4958c2ecf20Sopenharmony_ci } 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci nodes_or(memory_less_mask, memory_less_mask, node_online_map); 4988c2ecf20Sopenharmony_ci min_low_pfn = -1; 4998c2ecf20Sopenharmony_ci max_low_pfn = 0; 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_ci /* These actually end up getting called by call_pernode_memory() */ 5028c2ecf20Sopenharmony_ci efi_memmap_walk(filter_rsvd_memory, build_node_maps); 5038c2ecf20Sopenharmony_ci efi_memmap_walk(filter_rsvd_memory, find_pernode_space); 5048c2ecf20Sopenharmony_ci efi_memmap_walk(find_max_min_low_pfn, NULL); 5058c2ecf20Sopenharmony_ci 5068c2ecf20Sopenharmony_ci for_each_online_node(node) 5078c2ecf20Sopenharmony_ci if (mem_data[node].min_pfn) 5088c2ecf20Sopenharmony_ci node_clear(node, memory_less_mask); 5098c2ecf20Sopenharmony_ci 5108c2ecf20Sopenharmony_ci reserve_pernode_space(); 5118c2ecf20Sopenharmony_ci memory_less_nodes(); 5128c2ecf20Sopenharmony_ci initialize_pernode_data(); 5138c2ecf20Sopenharmony_ci 5148c2ecf20Sopenharmony_ci max_pfn = max_low_pfn; 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci find_initrd(); 5178c2ecf20Sopenharmony_ci} 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 5208c2ecf20Sopenharmony_ci/** 5218c2ecf20Sopenharmony_ci * per_cpu_init - setup per-cpu variables 5228c2ecf20Sopenharmony_ci * 5238c2ecf20Sopenharmony_ci * find_pernode_space() does most of this already, we just need to set 5248c2ecf20Sopenharmony_ci * local_per_cpu_offset 5258c2ecf20Sopenharmony_ci */ 5268c2ecf20Sopenharmony_civoid *per_cpu_init(void) 5278c2ecf20Sopenharmony_ci{ 5288c2ecf20Sopenharmony_ci int cpu; 5298c2ecf20Sopenharmony_ci static int first_time = 1; 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci if (first_time) { 5328c2ecf20Sopenharmony_ci first_time = 0; 5338c2ecf20Sopenharmony_ci for_each_possible_early_cpu(cpu) 5348c2ecf20Sopenharmony_ci per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; 5358c2ecf20Sopenharmony_ci } 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; 5388c2ecf20Sopenharmony_ci} 5398c2ecf20Sopenharmony_ci#endif /* CONFIG_SMP */ 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci/** 5428c2ecf20Sopenharmony_ci * call_pernode_memory - use SRAT to call callback functions with node info 5438c2ecf20Sopenharmony_ci * @start: physical start of range 5448c2ecf20Sopenharmony_ci * @len: length of range 5458c2ecf20Sopenharmony_ci * @arg: function to call for each range 5468c2ecf20Sopenharmony_ci * 5478c2ecf20Sopenharmony_ci * efi_memmap_walk() knows nothing about layout of memory across nodes. Find 5488c2ecf20Sopenharmony_ci * out to which node a block of memory belongs. Ignore memory that we cannot 5498c2ecf20Sopenharmony_ci * identify, and split blocks that run across multiple nodes. 5508c2ecf20Sopenharmony_ci * 5518c2ecf20Sopenharmony_ci * Take this opportunity to round the start address up and the end address 5528c2ecf20Sopenharmony_ci * down to page boundaries. 5538c2ecf20Sopenharmony_ci */ 5548c2ecf20Sopenharmony_civoid call_pernode_memory(unsigned long start, unsigned long len, void *arg) 5558c2ecf20Sopenharmony_ci{ 5568c2ecf20Sopenharmony_ci unsigned long rs, re, end = start + len; 5578c2ecf20Sopenharmony_ci void (*func)(unsigned long, unsigned long, int); 5588c2ecf20Sopenharmony_ci int i; 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci start = PAGE_ALIGN(start); 5618c2ecf20Sopenharmony_ci end &= PAGE_MASK; 5628c2ecf20Sopenharmony_ci if (start >= end) 5638c2ecf20Sopenharmony_ci return; 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_ci func = arg; 5668c2ecf20Sopenharmony_ci 5678c2ecf20Sopenharmony_ci if (!num_node_memblks) { 5688c2ecf20Sopenharmony_ci /* No SRAT table, so assume one node (node 0) */ 5698c2ecf20Sopenharmony_ci if (start < end) 5708c2ecf20Sopenharmony_ci (*func)(start, end - start, 0); 5718c2ecf20Sopenharmony_ci return; 5728c2ecf20Sopenharmony_ci } 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci for (i = 0; i < num_node_memblks; i++) { 5758c2ecf20Sopenharmony_ci rs = max(start, node_memblk[i].start_paddr); 5768c2ecf20Sopenharmony_ci re = min(end, node_memblk[i].start_paddr + 5778c2ecf20Sopenharmony_ci node_memblk[i].size); 5788c2ecf20Sopenharmony_ci 5798c2ecf20Sopenharmony_ci if (rs < re) 5808c2ecf20Sopenharmony_ci (*func)(rs, re - rs, node_memblk[i].nid); 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci if (re == end) 5838c2ecf20Sopenharmony_ci break; 5848c2ecf20Sopenharmony_ci } 5858c2ecf20Sopenharmony_ci} 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_ci/** 5888c2ecf20Sopenharmony_ci * paging_init - setup page tables 5898c2ecf20Sopenharmony_ci * 5908c2ecf20Sopenharmony_ci * paging_init() sets up the page tables for each node of the system and frees 5918c2ecf20Sopenharmony_ci * the bootmem allocator memory for general use. 5928c2ecf20Sopenharmony_ci */ 5938c2ecf20Sopenharmony_civoid __init paging_init(void) 5948c2ecf20Sopenharmony_ci{ 5958c2ecf20Sopenharmony_ci unsigned long max_dma; 5968c2ecf20Sopenharmony_ci unsigned long pfn_offset = 0; 5978c2ecf20Sopenharmony_ci unsigned long max_pfn = 0; 5988c2ecf20Sopenharmony_ci int node; 5998c2ecf20Sopenharmony_ci unsigned long max_zone_pfns[MAX_NR_ZONES]; 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ci max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; 6028c2ecf20Sopenharmony_ci 6038c2ecf20Sopenharmony_ci sparse_init(); 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_ci#ifdef CONFIG_VIRTUAL_MEM_MAP 6068c2ecf20Sopenharmony_ci VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * 6078c2ecf20Sopenharmony_ci sizeof(struct page)); 6088c2ecf20Sopenharmony_ci vmem_map = (struct page *) VMALLOC_END; 6098c2ecf20Sopenharmony_ci efi_memmap_walk(create_mem_map_page_table, NULL); 6108c2ecf20Sopenharmony_ci printk("Virtual mem_map starts at 0x%p\n", vmem_map); 6118c2ecf20Sopenharmony_ci#endif 6128c2ecf20Sopenharmony_ci 6138c2ecf20Sopenharmony_ci for_each_online_node(node) { 6148c2ecf20Sopenharmony_ci pfn_offset = mem_data[node].min_pfn; 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci#ifdef CONFIG_VIRTUAL_MEM_MAP 6178c2ecf20Sopenharmony_ci NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset; 6188c2ecf20Sopenharmony_ci#endif 6198c2ecf20Sopenharmony_ci if (mem_data[node].max_pfn > max_pfn) 6208c2ecf20Sopenharmony_ci max_pfn = mem_data[node].max_pfn; 6218c2ecf20Sopenharmony_ci } 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 6248c2ecf20Sopenharmony_ci#ifdef CONFIG_ZONE_DMA32 6258c2ecf20Sopenharmony_ci max_zone_pfns[ZONE_DMA32] = max_dma; 6268c2ecf20Sopenharmony_ci#endif 6278c2ecf20Sopenharmony_ci max_zone_pfns[ZONE_NORMAL] = max_pfn; 6288c2ecf20Sopenharmony_ci free_area_init(max_zone_pfns); 6298c2ecf20Sopenharmony_ci 6308c2ecf20Sopenharmony_ci zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); 6318c2ecf20Sopenharmony_ci} 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 6348c2ecf20Sopenharmony_cipg_data_t *arch_alloc_nodedata(int nid) 6358c2ecf20Sopenharmony_ci{ 6368c2ecf20Sopenharmony_ci unsigned long size = compute_pernodesize(nid); 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci return kzalloc(size, GFP_KERNEL); 6398c2ecf20Sopenharmony_ci} 6408c2ecf20Sopenharmony_ci 6418c2ecf20Sopenharmony_civoid arch_free_nodedata(pg_data_t *pgdat) 6428c2ecf20Sopenharmony_ci{ 6438c2ecf20Sopenharmony_ci kfree(pgdat); 6448c2ecf20Sopenharmony_ci} 6458c2ecf20Sopenharmony_ci 6468c2ecf20Sopenharmony_civoid arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) 6478c2ecf20Sopenharmony_ci{ 6488c2ecf20Sopenharmony_ci pgdat_list[update_node] = update_pgdat; 6498c2ecf20Sopenharmony_ci scatter_node_data(); 6508c2ecf20Sopenharmony_ci} 6518c2ecf20Sopenharmony_ci#endif 6528c2ecf20Sopenharmony_ci 6538c2ecf20Sopenharmony_ci#ifdef CONFIG_SPARSEMEM_VMEMMAP 6548c2ecf20Sopenharmony_ciint __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 6558c2ecf20Sopenharmony_ci struct vmem_altmap *altmap) 6568c2ecf20Sopenharmony_ci{ 6578c2ecf20Sopenharmony_ci return vmemmap_populate_basepages(start, end, node, NULL); 6588c2ecf20Sopenharmony_ci} 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_civoid vmemmap_free(unsigned long start, unsigned long end, 6618c2ecf20Sopenharmony_ci struct vmem_altmap *altmap) 6628c2ecf20Sopenharmony_ci{ 6638c2ecf20Sopenharmony_ci} 6648c2ecf20Sopenharmony_ci#endif 665