18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * pSeries NUMA support
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci#define pr_fmt(fmt) "numa: " fmt
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include <linux/threads.h>
108c2ecf20Sopenharmony_ci#include <linux/memblock.h>
118c2ecf20Sopenharmony_ci#include <linux/init.h>
128c2ecf20Sopenharmony_ci#include <linux/mm.h>
138c2ecf20Sopenharmony_ci#include <linux/mmzone.h>
148c2ecf20Sopenharmony_ci#include <linux/export.h>
158c2ecf20Sopenharmony_ci#include <linux/nodemask.h>
168c2ecf20Sopenharmony_ci#include <linux/cpu.h>
178c2ecf20Sopenharmony_ci#include <linux/notifier.h>
188c2ecf20Sopenharmony_ci#include <linux/of.h>
198c2ecf20Sopenharmony_ci#include <linux/pfn.h>
208c2ecf20Sopenharmony_ci#include <linux/cpuset.h>
218c2ecf20Sopenharmony_ci#include <linux/node.h>
228c2ecf20Sopenharmony_ci#include <linux/stop_machine.h>
238c2ecf20Sopenharmony_ci#include <linux/proc_fs.h>
248c2ecf20Sopenharmony_ci#include <linux/seq_file.h>
258c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
268c2ecf20Sopenharmony_ci#include <linux/slab.h>
278c2ecf20Sopenharmony_ci#include <asm/cputhreads.h>
288c2ecf20Sopenharmony_ci#include <asm/sparsemem.h>
298c2ecf20Sopenharmony_ci#include <asm/prom.h>
308c2ecf20Sopenharmony_ci#include <asm/smp.h>
318c2ecf20Sopenharmony_ci#include <asm/topology.h>
328c2ecf20Sopenharmony_ci#include <asm/firmware.h>
338c2ecf20Sopenharmony_ci#include <asm/paca.h>
348c2ecf20Sopenharmony_ci#include <asm/hvcall.h>
358c2ecf20Sopenharmony_ci#include <asm/setup.h>
368c2ecf20Sopenharmony_ci#include <asm/vdso.h>
378c2ecf20Sopenharmony_ci#include <asm/drmem.h>
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_cistatic int numa_enabled = 1;
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_cistatic char *cmdline __initdata;
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_cistatic int numa_debug;
448c2ecf20Sopenharmony_ci#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ciint numa_cpu_lookup_table[NR_CPUS];
478c2ecf20Sopenharmony_cicpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
488c2ecf20Sopenharmony_cistruct pglist_data *node_data[MAX_NUMNODES];
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ciEXPORT_SYMBOL(numa_cpu_lookup_table);
518c2ecf20Sopenharmony_ciEXPORT_SYMBOL(node_to_cpumask_map);
528c2ecf20Sopenharmony_ciEXPORT_SYMBOL(node_data);
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistatic int primary_domain_index;
558c2ecf20Sopenharmony_cistatic int n_mem_addr_cells, n_mem_size_cells;
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci#define FORM0_AFFINITY 0
588c2ecf20Sopenharmony_ci#define FORM1_AFFINITY 1
598c2ecf20Sopenharmony_ci#define FORM2_AFFINITY 2
608c2ecf20Sopenharmony_cistatic int affinity_form;
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci#define MAX_DISTANCE_REF_POINTS 4
638c2ecf20Sopenharmony_cistatic int distance_ref_points_depth;
648c2ecf20Sopenharmony_cistatic const __be32 *distance_ref_points;
658c2ecf20Sopenharmony_cistatic int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
668c2ecf20Sopenharmony_cistatic int numa_distance_table[MAX_NUMNODES][MAX_NUMNODES] = {
678c2ecf20Sopenharmony_ci	[0 ... MAX_NUMNODES - 1] = { [0 ... MAX_NUMNODES - 1] = -1 }
688c2ecf20Sopenharmony_ci};
698c2ecf20Sopenharmony_cistatic int numa_id_index_table[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci/*
728c2ecf20Sopenharmony_ci * Allocate node_to_cpumask_map based on number of available nodes
738c2ecf20Sopenharmony_ci * Requires node_possible_map to be valid.
748c2ecf20Sopenharmony_ci *
758c2ecf20Sopenharmony_ci * Note: cpumask_of_node() is not valid until after this is done.
768c2ecf20Sopenharmony_ci */
778c2ecf20Sopenharmony_cistatic void __init setup_node_to_cpumask_map(void)
788c2ecf20Sopenharmony_ci{
798c2ecf20Sopenharmony_ci	unsigned int node;
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci	/* setup nr_node_ids if not done yet */
828c2ecf20Sopenharmony_ci	if (nr_node_ids == MAX_NUMNODES)
838c2ecf20Sopenharmony_ci		setup_nr_node_ids();
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci	/* allocate the map */
868c2ecf20Sopenharmony_ci	for_each_node(node)
878c2ecf20Sopenharmony_ci		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci	/* cpumask_of_node() will now work */
908c2ecf20Sopenharmony_ci	dbg("Node to cpumask map for %u nodes\n", nr_node_ids);
918c2ecf20Sopenharmony_ci}
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_cistatic int __init fake_numa_create_new_node(unsigned long end_pfn,
948c2ecf20Sopenharmony_ci						unsigned int *nid)
958c2ecf20Sopenharmony_ci{
968c2ecf20Sopenharmony_ci	unsigned long long mem;
978c2ecf20Sopenharmony_ci	char *p = cmdline;
988c2ecf20Sopenharmony_ci	static unsigned int fake_nid;
998c2ecf20Sopenharmony_ci	static unsigned long long curr_boundary;
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	/*
1028c2ecf20Sopenharmony_ci	 * Modify node id, iff we started creating NUMA nodes
1038c2ecf20Sopenharmony_ci	 * We want to continue from where we left of the last time
1048c2ecf20Sopenharmony_ci	 */
1058c2ecf20Sopenharmony_ci	if (fake_nid)
1068c2ecf20Sopenharmony_ci		*nid = fake_nid;
1078c2ecf20Sopenharmony_ci	/*
1088c2ecf20Sopenharmony_ci	 * In case there are no more arguments to parse, the
1098c2ecf20Sopenharmony_ci	 * node_id should be the same as the last fake node id
1108c2ecf20Sopenharmony_ci	 * (we've handled this above).
1118c2ecf20Sopenharmony_ci	 */
1128c2ecf20Sopenharmony_ci	if (!p)
1138c2ecf20Sopenharmony_ci		return 0;
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	mem = memparse(p, &p);
1168c2ecf20Sopenharmony_ci	if (!mem)
1178c2ecf20Sopenharmony_ci		return 0;
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci	if (mem < curr_boundary)
1208c2ecf20Sopenharmony_ci		return 0;
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci	curr_boundary = mem;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	if ((end_pfn << PAGE_SHIFT) > mem) {
1258c2ecf20Sopenharmony_ci		/*
1268c2ecf20Sopenharmony_ci		 * Skip commas and spaces
1278c2ecf20Sopenharmony_ci		 */
1288c2ecf20Sopenharmony_ci		while (*p == ',' || *p == ' ' || *p == '\t')
1298c2ecf20Sopenharmony_ci			p++;
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci		cmdline = p;
1328c2ecf20Sopenharmony_ci		fake_nid++;
1338c2ecf20Sopenharmony_ci		*nid = fake_nid;
1348c2ecf20Sopenharmony_ci		dbg("created new fake_node with id %d\n", fake_nid);
1358c2ecf20Sopenharmony_ci		return 1;
1368c2ecf20Sopenharmony_ci	}
1378c2ecf20Sopenharmony_ci	return 0;
1388c2ecf20Sopenharmony_ci}
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_cistatic void reset_numa_cpu_lookup_table(void)
1418c2ecf20Sopenharmony_ci{
1428c2ecf20Sopenharmony_ci	unsigned int cpu;
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci	for_each_possible_cpu(cpu)
1458c2ecf20Sopenharmony_ci		numa_cpu_lookup_table[cpu] = -1;
1468c2ecf20Sopenharmony_ci}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_civoid map_cpu_to_node(int cpu, int node)
1498c2ecf20Sopenharmony_ci{
1508c2ecf20Sopenharmony_ci	update_numa_cpu_lookup_table(cpu, node);
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	dbg("adding cpu %d to node %d\n", cpu, node);
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
1558c2ecf20Sopenharmony_ci		cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
1568c2ecf20Sopenharmony_ci}
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
1598c2ecf20Sopenharmony_civoid unmap_cpu_from_node(unsigned long cpu)
1608c2ecf20Sopenharmony_ci{
1618c2ecf20Sopenharmony_ci	int node = numa_cpu_lookup_table[cpu];
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	dbg("removing cpu %lu from node %d\n", cpu, node);
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
1668c2ecf20Sopenharmony_ci		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
1678c2ecf20Sopenharmony_ci	} else {
1688c2ecf20Sopenharmony_ci		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
1698c2ecf20Sopenharmony_ci		       cpu, node);
1708c2ecf20Sopenharmony_ci	}
1718c2ecf20Sopenharmony_ci}
1728c2ecf20Sopenharmony_ci#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_cistatic int __associativity_to_nid(const __be32 *associativity,
1758c2ecf20Sopenharmony_ci				  int max_array_sz)
1768c2ecf20Sopenharmony_ci{
1778c2ecf20Sopenharmony_ci	int nid;
1788c2ecf20Sopenharmony_ci	/*
1798c2ecf20Sopenharmony_ci	 * primary_domain_index is 1 based array index.
1808c2ecf20Sopenharmony_ci	 */
1818c2ecf20Sopenharmony_ci	int index = primary_domain_index  - 1;
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	if (!numa_enabled || index >= max_array_sz)
1848c2ecf20Sopenharmony_ci		return NUMA_NO_NODE;
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci	nid = of_read_number(&associativity[index], 1);
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	/* POWER4 LPAR uses 0xffff as invalid node */
1898c2ecf20Sopenharmony_ci	if (nid == 0xffff || nid >= nr_node_ids)
1908c2ecf20Sopenharmony_ci		nid = NUMA_NO_NODE;
1918c2ecf20Sopenharmony_ci	return nid;
1928c2ecf20Sopenharmony_ci}
1938c2ecf20Sopenharmony_ci/*
1948c2ecf20Sopenharmony_ci * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
1958c2ecf20Sopenharmony_ci * info is found.
1968c2ecf20Sopenharmony_ci */
1978c2ecf20Sopenharmony_cistatic int associativity_to_nid(const __be32 *associativity)
1988c2ecf20Sopenharmony_ci{
1998c2ecf20Sopenharmony_ci	int array_sz = of_read_number(associativity, 1);
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	/* Skip the first element in the associativity array */
2028c2ecf20Sopenharmony_ci	return __associativity_to_nid((associativity + 1), array_sz);
2038c2ecf20Sopenharmony_ci}
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_cistatic int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
2068c2ecf20Sopenharmony_ci{
2078c2ecf20Sopenharmony_ci	int dist;
2088c2ecf20Sopenharmony_ci	int node1, node2;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	node1 = associativity_to_nid(cpu1_assoc);
2118c2ecf20Sopenharmony_ci	node2 = associativity_to_nid(cpu2_assoc);
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	dist = numa_distance_table[node1][node2];
2148c2ecf20Sopenharmony_ci	if (dist <= LOCAL_DISTANCE)
2158c2ecf20Sopenharmony_ci		return 0;
2168c2ecf20Sopenharmony_ci	else if (dist <= REMOTE_DISTANCE)
2178c2ecf20Sopenharmony_ci		return 1;
2188c2ecf20Sopenharmony_ci	else
2198c2ecf20Sopenharmony_ci		return 2;
2208c2ecf20Sopenharmony_ci}
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_cistatic int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
2238c2ecf20Sopenharmony_ci{
2248c2ecf20Sopenharmony_ci	int dist = 0;
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	int i, index;
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	for (i = 0; i < distance_ref_points_depth; i++) {
2298c2ecf20Sopenharmony_ci		index = be32_to_cpu(distance_ref_points[i]);
2308c2ecf20Sopenharmony_ci		if (cpu1_assoc[index] == cpu2_assoc[index])
2318c2ecf20Sopenharmony_ci			break;
2328c2ecf20Sopenharmony_ci		dist++;
2338c2ecf20Sopenharmony_ci	}
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci	return dist;
2368c2ecf20Sopenharmony_ci}
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ciint cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
2398c2ecf20Sopenharmony_ci{
2408c2ecf20Sopenharmony_ci	/* We should not get called with FORM0 */
2418c2ecf20Sopenharmony_ci	VM_WARN_ON(affinity_form == FORM0_AFFINITY);
2428c2ecf20Sopenharmony_ci	if (affinity_form == FORM1_AFFINITY)
2438c2ecf20Sopenharmony_ci		return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
2448c2ecf20Sopenharmony_ci	return __cpu_form2_relative_distance(cpu1_assoc, cpu2_assoc);
2458c2ecf20Sopenharmony_ci}
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci/* must hold reference to node during call */
2488c2ecf20Sopenharmony_cistatic const __be32 *of_get_associativity(struct device_node *dev)
2498c2ecf20Sopenharmony_ci{
2508c2ecf20Sopenharmony_ci	return of_get_property(dev, "ibm,associativity", NULL);
2518c2ecf20Sopenharmony_ci}
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ciint __node_distance(int a, int b)
2548c2ecf20Sopenharmony_ci{
2558c2ecf20Sopenharmony_ci	int i;
2568c2ecf20Sopenharmony_ci	int distance = LOCAL_DISTANCE;
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	if (affinity_form == FORM2_AFFINITY)
2598c2ecf20Sopenharmony_ci		return numa_distance_table[a][b];
2608c2ecf20Sopenharmony_ci	else if (affinity_form == FORM0_AFFINITY)
2618c2ecf20Sopenharmony_ci		return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	for (i = 0; i < distance_ref_points_depth; i++) {
2648c2ecf20Sopenharmony_ci		if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
2658c2ecf20Sopenharmony_ci			break;
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci		/* Double the distance for each NUMA level */
2688c2ecf20Sopenharmony_ci		distance *= 2;
2698c2ecf20Sopenharmony_ci	}
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci	return distance;
2728c2ecf20Sopenharmony_ci}
2738c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__node_distance);
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci/* Returns the nid associated with the given device tree node,
2768c2ecf20Sopenharmony_ci * or -1 if not found.
2778c2ecf20Sopenharmony_ci */
2788c2ecf20Sopenharmony_cistatic int of_node_to_nid_single(struct device_node *device)
2798c2ecf20Sopenharmony_ci{
2808c2ecf20Sopenharmony_ci	int nid = NUMA_NO_NODE;
2818c2ecf20Sopenharmony_ci	const __be32 *tmp;
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	tmp = of_get_associativity(device);
2848c2ecf20Sopenharmony_ci	if (tmp)
2858c2ecf20Sopenharmony_ci		nid = associativity_to_nid(tmp);
2868c2ecf20Sopenharmony_ci	return nid;
2878c2ecf20Sopenharmony_ci}
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci/* Walk the device tree upwards, looking for an associativity id */
2908c2ecf20Sopenharmony_ciint of_node_to_nid(struct device_node *device)
2918c2ecf20Sopenharmony_ci{
2928c2ecf20Sopenharmony_ci	int nid = NUMA_NO_NODE;
2938c2ecf20Sopenharmony_ci
2948c2ecf20Sopenharmony_ci	of_node_get(device);
2958c2ecf20Sopenharmony_ci	while (device) {
2968c2ecf20Sopenharmony_ci		nid = of_node_to_nid_single(device);
2978c2ecf20Sopenharmony_ci		if (nid != -1)
2988c2ecf20Sopenharmony_ci			break;
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci		device = of_get_next_parent(device);
3018c2ecf20Sopenharmony_ci	}
3028c2ecf20Sopenharmony_ci	of_node_put(device);
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	return nid;
3058c2ecf20Sopenharmony_ci}
3068c2ecf20Sopenharmony_ciEXPORT_SYMBOL(of_node_to_nid);
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_cistatic void __initialize_form1_numa_distance(const __be32 *associativity,
3098c2ecf20Sopenharmony_ci					     int max_array_sz)
3108c2ecf20Sopenharmony_ci{
3118c2ecf20Sopenharmony_ci	int i, nid;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	if (affinity_form != FORM1_AFFINITY)
3148c2ecf20Sopenharmony_ci		return;
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	nid = __associativity_to_nid(associativity, max_array_sz);
3178c2ecf20Sopenharmony_ci	if (nid != NUMA_NO_NODE) {
3188c2ecf20Sopenharmony_ci		for (i = 0; i < distance_ref_points_depth; i++) {
3198c2ecf20Sopenharmony_ci			const __be32 *entry;
3208c2ecf20Sopenharmony_ci			int index = be32_to_cpu(distance_ref_points[i]) - 1;
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci			/*
3238c2ecf20Sopenharmony_ci			 * broken hierarchy, return with broken distance table
3248c2ecf20Sopenharmony_ci			 */
3258c2ecf20Sopenharmony_ci			if (WARN(index >= max_array_sz, "Broken ibm,associativity property"))
3268c2ecf20Sopenharmony_ci				return;
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci			entry = &associativity[index];
3298c2ecf20Sopenharmony_ci			distance_lookup_table[nid][i] = of_read_number(entry, 1);
3308c2ecf20Sopenharmony_ci		}
3318c2ecf20Sopenharmony_ci	}
3328c2ecf20Sopenharmony_ci}
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_cistatic void initialize_form1_numa_distance(const __be32 *associativity)
3358c2ecf20Sopenharmony_ci{
3368c2ecf20Sopenharmony_ci	int array_sz;
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci	array_sz = of_read_number(associativity, 1);
3398c2ecf20Sopenharmony_ci	/* Skip the first element in the associativity array */
3408c2ecf20Sopenharmony_ci	__initialize_form1_numa_distance(associativity + 1, array_sz);
3418c2ecf20Sopenharmony_ci}
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_ci/*
3448c2ecf20Sopenharmony_ci * Used to update distance information w.r.t newly added node.
3458c2ecf20Sopenharmony_ci */
3468c2ecf20Sopenharmony_civoid update_numa_distance(struct device_node *node)
3478c2ecf20Sopenharmony_ci{
3488c2ecf20Sopenharmony_ci	int nid;
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	if (affinity_form == FORM0_AFFINITY)
3518c2ecf20Sopenharmony_ci		return;
3528c2ecf20Sopenharmony_ci	else if (affinity_form == FORM1_AFFINITY) {
3538c2ecf20Sopenharmony_ci		const __be32 *associativity;
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci		associativity = of_get_associativity(node);
3568c2ecf20Sopenharmony_ci		if (!associativity)
3578c2ecf20Sopenharmony_ci			return;
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci		initialize_form1_numa_distance(associativity);
3608c2ecf20Sopenharmony_ci		return;
3618c2ecf20Sopenharmony_ci	}
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci	/* FORM2 affinity  */
3648c2ecf20Sopenharmony_ci	nid = of_node_to_nid_single(node);
3658c2ecf20Sopenharmony_ci	if (nid == NUMA_NO_NODE)
3668c2ecf20Sopenharmony_ci		return;
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci	/*
3698c2ecf20Sopenharmony_ci	 * With FORM2 we expect NUMA distance of all possible NUMA
3708c2ecf20Sopenharmony_ci	 * nodes to be provided during boot.
3718c2ecf20Sopenharmony_ci	 */
3728c2ecf20Sopenharmony_ci	WARN(numa_distance_table[nid][nid] == -1,
3738c2ecf20Sopenharmony_ci	     "NUMA distance details for node %d not provided\n", nid);
3748c2ecf20Sopenharmony_ci}
3758c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(update_numa_distance);
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci/*
3788c2ecf20Sopenharmony_ci * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
3798c2ecf20Sopenharmony_ci * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements}
3808c2ecf20Sopenharmony_ci */
3818c2ecf20Sopenharmony_cistatic void initialize_form2_numa_distance_lookup_table(void)
3828c2ecf20Sopenharmony_ci{
3838c2ecf20Sopenharmony_ci	int i, j;
3848c2ecf20Sopenharmony_ci	struct device_node *root;
3858c2ecf20Sopenharmony_ci	const __u8 *numa_dist_table;
3868c2ecf20Sopenharmony_ci	const __be32 *numa_lookup_index;
3878c2ecf20Sopenharmony_ci	int numa_dist_table_length;
3888c2ecf20Sopenharmony_ci	int max_numa_index, distance_index;
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	if (firmware_has_feature(FW_FEATURE_OPAL))
3918c2ecf20Sopenharmony_ci		root = of_find_node_by_path("/ibm,opal");
3928c2ecf20Sopenharmony_ci	else
3938c2ecf20Sopenharmony_ci		root = of_find_node_by_path("/rtas");
3948c2ecf20Sopenharmony_ci	if (!root)
3958c2ecf20Sopenharmony_ci		root = of_find_node_by_path("/");
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	numa_lookup_index = of_get_property(root, "ibm,numa-lookup-index-table", NULL);
3988c2ecf20Sopenharmony_ci	max_numa_index = of_read_number(&numa_lookup_index[0], 1);
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	/* first element of the array is the size and is encode-int */
4018c2ecf20Sopenharmony_ci	numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL);
4028c2ecf20Sopenharmony_ci	numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1);
4038c2ecf20Sopenharmony_ci	/* Skip the size which is encoded int */
4048c2ecf20Sopenharmony_ci	numa_dist_table += sizeof(__be32);
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci	pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n",
4078c2ecf20Sopenharmony_ci		 numa_dist_table_length, max_numa_index);
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	for (i = 0; i < max_numa_index; i++)
4108c2ecf20Sopenharmony_ci		/* +1 skip the max_numa_index in the property */
4118c2ecf20Sopenharmony_ci		numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1);
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci	if (numa_dist_table_length != max_numa_index * max_numa_index) {
4158c2ecf20Sopenharmony_ci		WARN(1, "Wrong NUMA distance information\n");
4168c2ecf20Sopenharmony_ci		/* consider everybody else just remote. */
4178c2ecf20Sopenharmony_ci		for (i = 0;  i < max_numa_index; i++) {
4188c2ecf20Sopenharmony_ci			for (j = 0; j < max_numa_index; j++) {
4198c2ecf20Sopenharmony_ci				int nodeA = numa_id_index_table[i];
4208c2ecf20Sopenharmony_ci				int nodeB = numa_id_index_table[j];
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci				if (nodeA == nodeB)
4238c2ecf20Sopenharmony_ci					numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE;
4248c2ecf20Sopenharmony_ci				else
4258c2ecf20Sopenharmony_ci					numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE;
4268c2ecf20Sopenharmony_ci			}
4278c2ecf20Sopenharmony_ci		}
4288c2ecf20Sopenharmony_ci	}
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	distance_index = 0;
4318c2ecf20Sopenharmony_ci	for (i = 0;  i < max_numa_index; i++) {
4328c2ecf20Sopenharmony_ci		for (j = 0; j < max_numa_index; j++) {
4338c2ecf20Sopenharmony_ci			int nodeA = numa_id_index_table[i];
4348c2ecf20Sopenharmony_ci			int nodeB = numa_id_index_table[j];
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci			numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++];
4378c2ecf20Sopenharmony_ci			pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]);
4388c2ecf20Sopenharmony_ci		}
4398c2ecf20Sopenharmony_ci	}
4408c2ecf20Sopenharmony_ci	of_node_put(root);
4418c2ecf20Sopenharmony_ci}
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_cistatic int __init find_primary_domain_index(void)
4448c2ecf20Sopenharmony_ci{
4458c2ecf20Sopenharmony_ci	int index;
4468c2ecf20Sopenharmony_ci	struct device_node *root;
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci	/*
4498c2ecf20Sopenharmony_ci	 * Check for which form of affinity.
4508c2ecf20Sopenharmony_ci	 */
4518c2ecf20Sopenharmony_ci	if (firmware_has_feature(FW_FEATURE_OPAL)) {
4528c2ecf20Sopenharmony_ci		affinity_form = FORM1_AFFINITY;
4538c2ecf20Sopenharmony_ci	} else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) {
4548c2ecf20Sopenharmony_ci		dbg("Using form 2 affinity\n");
4558c2ecf20Sopenharmony_ci		affinity_form = FORM2_AFFINITY;
4568c2ecf20Sopenharmony_ci	} else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) {
4578c2ecf20Sopenharmony_ci		dbg("Using form 1 affinity\n");
4588c2ecf20Sopenharmony_ci		affinity_form = FORM1_AFFINITY;
4598c2ecf20Sopenharmony_ci	} else
4608c2ecf20Sopenharmony_ci		affinity_form = FORM0_AFFINITY;
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci	if (firmware_has_feature(FW_FEATURE_OPAL))
4638c2ecf20Sopenharmony_ci		root = of_find_node_by_path("/ibm,opal");
4648c2ecf20Sopenharmony_ci	else
4658c2ecf20Sopenharmony_ci		root = of_find_node_by_path("/rtas");
4668c2ecf20Sopenharmony_ci	if (!root)
4678c2ecf20Sopenharmony_ci		root = of_find_node_by_path("/");
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	/*
4708c2ecf20Sopenharmony_ci	 * This property is a set of 32-bit integers, each representing
4718c2ecf20Sopenharmony_ci	 * an index into the ibm,associativity nodes.
4728c2ecf20Sopenharmony_ci	 *
4738c2ecf20Sopenharmony_ci	 * With form 0 affinity the first integer is for an SMP configuration
4748c2ecf20Sopenharmony_ci	 * (should be all 0's) and the second is for a normal NUMA
4758c2ecf20Sopenharmony_ci	 * configuration. We have only one level of NUMA.
4768c2ecf20Sopenharmony_ci	 *
4778c2ecf20Sopenharmony_ci	 * With form 1 affinity the first integer is the most significant
4788c2ecf20Sopenharmony_ci	 * NUMA boundary and the following are progressively less significant
4798c2ecf20Sopenharmony_ci	 * boundaries. There can be more than one level of NUMA.
4808c2ecf20Sopenharmony_ci	 */
4818c2ecf20Sopenharmony_ci	distance_ref_points = of_get_property(root,
4828c2ecf20Sopenharmony_ci					"ibm,associativity-reference-points",
4838c2ecf20Sopenharmony_ci					&distance_ref_points_depth);
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_ci	if (!distance_ref_points) {
4868c2ecf20Sopenharmony_ci		dbg("NUMA: ibm,associativity-reference-points not found.\n");
4878c2ecf20Sopenharmony_ci		goto err;
4888c2ecf20Sopenharmony_ci	}
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci	distance_ref_points_depth /= sizeof(int);
4918c2ecf20Sopenharmony_ci	if (affinity_form == FORM0_AFFINITY) {
4928c2ecf20Sopenharmony_ci		if (distance_ref_points_depth < 2) {
4938c2ecf20Sopenharmony_ci			printk(KERN_WARNING "NUMA: "
4948c2ecf20Sopenharmony_ci			       "short ibm,associativity-reference-points\n");
4958c2ecf20Sopenharmony_ci			goto err;
4968c2ecf20Sopenharmony_ci		}
4978c2ecf20Sopenharmony_ci
4988c2ecf20Sopenharmony_ci		index = of_read_number(&distance_ref_points[1], 1);
4998c2ecf20Sopenharmony_ci	} else {
5008c2ecf20Sopenharmony_ci		/*
5018c2ecf20Sopenharmony_ci		 * Both FORM1 and FORM2 affinity find the primary domain details
5028c2ecf20Sopenharmony_ci		 * at the same offset.
5038c2ecf20Sopenharmony_ci		 */
5048c2ecf20Sopenharmony_ci		index = of_read_number(distance_ref_points, 1);
5058c2ecf20Sopenharmony_ci	}
5068c2ecf20Sopenharmony_ci	/*
5078c2ecf20Sopenharmony_ci	 * Warn and cap if the hardware supports more than
5088c2ecf20Sopenharmony_ci	 * MAX_DISTANCE_REF_POINTS domains.
5098c2ecf20Sopenharmony_ci	 */
5108c2ecf20Sopenharmony_ci	if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
5118c2ecf20Sopenharmony_ci		printk(KERN_WARNING "NUMA: distance array capped at "
5128c2ecf20Sopenharmony_ci			"%d entries\n", MAX_DISTANCE_REF_POINTS);
5138c2ecf20Sopenharmony_ci		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
5148c2ecf20Sopenharmony_ci	}
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	of_node_put(root);
5178c2ecf20Sopenharmony_ci	return index;
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_cierr:
5208c2ecf20Sopenharmony_ci	of_node_put(root);
5218c2ecf20Sopenharmony_ci	return -1;
5228c2ecf20Sopenharmony_ci}
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_cistatic void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
5258c2ecf20Sopenharmony_ci{
5268c2ecf20Sopenharmony_ci	struct device_node *memory = NULL;
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci	memory = of_find_node_by_type(memory, "memory");
5298c2ecf20Sopenharmony_ci	if (!memory)
5308c2ecf20Sopenharmony_ci		panic("numa.c: No memory nodes found!");
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	*n_addr_cells = of_n_addr_cells(memory);
5338c2ecf20Sopenharmony_ci	*n_size_cells = of_n_size_cells(memory);
5348c2ecf20Sopenharmony_ci	of_node_put(memory);
5358c2ecf20Sopenharmony_ci}
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_cistatic unsigned long read_n_cells(int n, const __be32 **buf)
5388c2ecf20Sopenharmony_ci{
5398c2ecf20Sopenharmony_ci	unsigned long result = 0;
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_ci	while (n--) {
5428c2ecf20Sopenharmony_ci		result = (result << 32) | of_read_number(*buf, 1);
5438c2ecf20Sopenharmony_ci		(*buf)++;
5448c2ecf20Sopenharmony_ci	}
5458c2ecf20Sopenharmony_ci	return result;
5468c2ecf20Sopenharmony_ci}
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_cistruct assoc_arrays {
5498c2ecf20Sopenharmony_ci	u32	n_arrays;
5508c2ecf20Sopenharmony_ci	u32	array_sz;
5518c2ecf20Sopenharmony_ci	const __be32 *arrays;
5528c2ecf20Sopenharmony_ci};
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_ci/*
5558c2ecf20Sopenharmony_ci * Retrieve and validate the list of associativity arrays for drconf
5568c2ecf20Sopenharmony_ci * memory from the ibm,associativity-lookup-arrays property of the
5578c2ecf20Sopenharmony_ci * device tree..
5588c2ecf20Sopenharmony_ci *
5598c2ecf20Sopenharmony_ci * The layout of the ibm,associativity-lookup-arrays property is a number N
5608c2ecf20Sopenharmony_ci * indicating the number of associativity arrays, followed by a number M
5618c2ecf20Sopenharmony_ci * indicating the size of each associativity array, followed by a list
5628c2ecf20Sopenharmony_ci * of N associativity arrays.
5638c2ecf20Sopenharmony_ci */
5648c2ecf20Sopenharmony_cistatic int of_get_assoc_arrays(struct assoc_arrays *aa)
5658c2ecf20Sopenharmony_ci{
5668c2ecf20Sopenharmony_ci	struct device_node *memory;
5678c2ecf20Sopenharmony_ci	const __be32 *prop;
5688c2ecf20Sopenharmony_ci	u32 len;
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
5718c2ecf20Sopenharmony_ci	if (!memory)
5728c2ecf20Sopenharmony_ci		return -1;
5738c2ecf20Sopenharmony_ci
5748c2ecf20Sopenharmony_ci	prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
5758c2ecf20Sopenharmony_ci	if (!prop || len < 2 * sizeof(unsigned int)) {
5768c2ecf20Sopenharmony_ci		of_node_put(memory);
5778c2ecf20Sopenharmony_ci		return -1;
5788c2ecf20Sopenharmony_ci	}
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_ci	aa->n_arrays = of_read_number(prop++, 1);
5818c2ecf20Sopenharmony_ci	aa->array_sz = of_read_number(prop++, 1);
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_ci	of_node_put(memory);
5848c2ecf20Sopenharmony_ci
5858c2ecf20Sopenharmony_ci	/* Now that we know the number of arrays and size of each array,
5868c2ecf20Sopenharmony_ci	 * revalidate the size of the property read in.
5878c2ecf20Sopenharmony_ci	 */
5888c2ecf20Sopenharmony_ci	if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int))
5898c2ecf20Sopenharmony_ci		return -1;
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci	aa->arrays = prop;
5928c2ecf20Sopenharmony_ci	return 0;
5938c2ecf20Sopenharmony_ci}
5948c2ecf20Sopenharmony_ci
5958c2ecf20Sopenharmony_cistatic int get_nid_and_numa_distance(struct drmem_lmb *lmb)
5968c2ecf20Sopenharmony_ci{
5978c2ecf20Sopenharmony_ci	struct assoc_arrays aa = { .arrays = NULL };
5988c2ecf20Sopenharmony_ci	int default_nid = NUMA_NO_NODE;
5998c2ecf20Sopenharmony_ci	int nid = default_nid;
6008c2ecf20Sopenharmony_ci	int rc, index;
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_ci	if ((primary_domain_index < 0) || !numa_enabled)
6038c2ecf20Sopenharmony_ci		return default_nid;
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	rc = of_get_assoc_arrays(&aa);
6068c2ecf20Sopenharmony_ci	if (rc)
6078c2ecf20Sopenharmony_ci		return default_nid;
6088c2ecf20Sopenharmony_ci
6098c2ecf20Sopenharmony_ci	if (primary_domain_index <= aa.array_sz &&
6108c2ecf20Sopenharmony_ci	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
6118c2ecf20Sopenharmony_ci		const __be32 *associativity;
6128c2ecf20Sopenharmony_ci
6138c2ecf20Sopenharmony_ci		index = lmb->aa_index * aa.array_sz;
6148c2ecf20Sopenharmony_ci		associativity = &aa.arrays[index];
6158c2ecf20Sopenharmony_ci		nid = __associativity_to_nid(associativity, aa.array_sz);
6168c2ecf20Sopenharmony_ci		if (nid > 0 && affinity_form == FORM1_AFFINITY) {
6178c2ecf20Sopenharmony_ci			/*
6188c2ecf20Sopenharmony_ci			 * lookup array associativity entries have
6198c2ecf20Sopenharmony_ci			 * no length of the array as the first element.
6208c2ecf20Sopenharmony_ci			 */
6218c2ecf20Sopenharmony_ci			__initialize_form1_numa_distance(associativity, aa.array_sz);
6228c2ecf20Sopenharmony_ci		}
6238c2ecf20Sopenharmony_ci	}
6248c2ecf20Sopenharmony_ci	return nid;
6258c2ecf20Sopenharmony_ci}
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci/*
6288c2ecf20Sopenharmony_ci * This is like of_node_to_nid_single() for memory represented in the
6298c2ecf20Sopenharmony_ci * ibm,dynamic-reconfiguration-memory node.
6308c2ecf20Sopenharmony_ci */
6318c2ecf20Sopenharmony_ciint of_drconf_to_nid_single(struct drmem_lmb *lmb)
6328c2ecf20Sopenharmony_ci{
6338c2ecf20Sopenharmony_ci	struct assoc_arrays aa = { .arrays = NULL };
6348c2ecf20Sopenharmony_ci	int default_nid = NUMA_NO_NODE;
6358c2ecf20Sopenharmony_ci	int nid = default_nid;
6368c2ecf20Sopenharmony_ci	int rc, index;
6378c2ecf20Sopenharmony_ci
6388c2ecf20Sopenharmony_ci	if ((primary_domain_index < 0) || !numa_enabled)
6398c2ecf20Sopenharmony_ci		return default_nid;
6408c2ecf20Sopenharmony_ci
6418c2ecf20Sopenharmony_ci	rc = of_get_assoc_arrays(&aa);
6428c2ecf20Sopenharmony_ci	if (rc)
6438c2ecf20Sopenharmony_ci		return default_nid;
6448c2ecf20Sopenharmony_ci
6458c2ecf20Sopenharmony_ci	if (primary_domain_index <= aa.array_sz &&
6468c2ecf20Sopenharmony_ci	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
6478c2ecf20Sopenharmony_ci		const __be32 *associativity;
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_ci		index = lmb->aa_index * aa.array_sz;
6508c2ecf20Sopenharmony_ci		associativity = &aa.arrays[index];
6518c2ecf20Sopenharmony_ci		nid = __associativity_to_nid(associativity, aa.array_sz);
6528c2ecf20Sopenharmony_ci	}
6538c2ecf20Sopenharmony_ci	return nid;
6548c2ecf20Sopenharmony_ci}
6558c2ecf20Sopenharmony_ci
6568c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_SPLPAR
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_cistatic int __vphn_get_associativity(long lcpu, __be32 *associativity)
6598c2ecf20Sopenharmony_ci{
6608c2ecf20Sopenharmony_ci	long rc, hwid;
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci	/*
6638c2ecf20Sopenharmony_ci	 * On a shared lpar, device tree will not have node associativity.
6648c2ecf20Sopenharmony_ci	 * At this time lppaca, or its __old_status field may not be
6658c2ecf20Sopenharmony_ci	 * updated. Hence kernel cannot detect if its on a shared lpar. So
6668c2ecf20Sopenharmony_ci	 * request an explicit associativity irrespective of whether the
6678c2ecf20Sopenharmony_ci	 * lpar is shared or dedicated. Use the device tree property as a
6688c2ecf20Sopenharmony_ci	 * fallback. cpu_to_phys_id is only valid between
6698c2ecf20Sopenharmony_ci	 * smp_setup_cpu_maps() and smp_setup_pacas().
6708c2ecf20Sopenharmony_ci	 */
6718c2ecf20Sopenharmony_ci	if (firmware_has_feature(FW_FEATURE_VPHN)) {
6728c2ecf20Sopenharmony_ci		if (cpu_to_phys_id)
6738c2ecf20Sopenharmony_ci			hwid = cpu_to_phys_id[lcpu];
6748c2ecf20Sopenharmony_ci		else
6758c2ecf20Sopenharmony_ci			hwid = get_hard_smp_processor_id(lcpu);
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ci		rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
6788c2ecf20Sopenharmony_ci		if (rc == H_SUCCESS)
6798c2ecf20Sopenharmony_ci			return 0;
6808c2ecf20Sopenharmony_ci	}
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_ci	return -1;
6838c2ecf20Sopenharmony_ci}
6848c2ecf20Sopenharmony_ci
6858c2ecf20Sopenharmony_cistatic int vphn_get_nid(long lcpu)
6868c2ecf20Sopenharmony_ci{
6878c2ecf20Sopenharmony_ci	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_ci
6908c2ecf20Sopenharmony_ci	if (!__vphn_get_associativity(lcpu, associativity))
6918c2ecf20Sopenharmony_ci		return associativity_to_nid(associativity);
6928c2ecf20Sopenharmony_ci
6938c2ecf20Sopenharmony_ci	return NUMA_NO_NODE;
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_ci}
6968c2ecf20Sopenharmony_ci#else
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_cistatic int __vphn_get_associativity(long lcpu, __be32 *associativity)
6998c2ecf20Sopenharmony_ci{
7008c2ecf20Sopenharmony_ci	return -1;
7018c2ecf20Sopenharmony_ci}
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_cistatic int vphn_get_nid(long unused)
7048c2ecf20Sopenharmony_ci{
7058c2ecf20Sopenharmony_ci	return NUMA_NO_NODE;
7068c2ecf20Sopenharmony_ci}
7078c2ecf20Sopenharmony_ci#endif  /* CONFIG_PPC_SPLPAR */
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_ci/*
7108c2ecf20Sopenharmony_ci * Figure out to which domain a cpu belongs and stick it there.
7118c2ecf20Sopenharmony_ci * Return the id of the domain used.
7128c2ecf20Sopenharmony_ci */
7138c2ecf20Sopenharmony_cistatic int numa_setup_cpu(unsigned long lcpu)
7148c2ecf20Sopenharmony_ci{
7158c2ecf20Sopenharmony_ci	struct device_node *cpu;
7168c2ecf20Sopenharmony_ci	int fcpu = cpu_first_thread_sibling(lcpu);
7178c2ecf20Sopenharmony_ci	int nid = NUMA_NO_NODE;
7188c2ecf20Sopenharmony_ci
7198c2ecf20Sopenharmony_ci	if (!cpu_present(lcpu)) {
7208c2ecf20Sopenharmony_ci		set_cpu_numa_node(lcpu, first_online_node);
7218c2ecf20Sopenharmony_ci		return first_online_node;
7228c2ecf20Sopenharmony_ci	}
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_ci	/*
7258c2ecf20Sopenharmony_ci	 * If a valid cpu-to-node mapping is already available, use it
7268c2ecf20Sopenharmony_ci	 * directly instead of querying the firmware, since it represents
7278c2ecf20Sopenharmony_ci	 * the most recent mapping notified to us by the platform (eg: VPHN).
7288c2ecf20Sopenharmony_ci	 * Since cpu_to_node binding remains the same for all threads in the
7298c2ecf20Sopenharmony_ci	 * core. If a valid cpu-to-node mapping is already available, for
7308c2ecf20Sopenharmony_ci	 * the first thread in the core, use it.
7318c2ecf20Sopenharmony_ci	 */
7328c2ecf20Sopenharmony_ci	nid = numa_cpu_lookup_table[fcpu];
7338c2ecf20Sopenharmony_ci	if (nid >= 0) {
7348c2ecf20Sopenharmony_ci		map_cpu_to_node(lcpu, nid);
7358c2ecf20Sopenharmony_ci		return nid;
7368c2ecf20Sopenharmony_ci	}
7378c2ecf20Sopenharmony_ci
7388c2ecf20Sopenharmony_ci	nid = vphn_get_nid(lcpu);
7398c2ecf20Sopenharmony_ci	if (nid != NUMA_NO_NODE)
7408c2ecf20Sopenharmony_ci		goto out_present;
7418c2ecf20Sopenharmony_ci
7428c2ecf20Sopenharmony_ci	cpu = of_get_cpu_node(lcpu, NULL);
7438c2ecf20Sopenharmony_ci
7448c2ecf20Sopenharmony_ci	if (!cpu) {
7458c2ecf20Sopenharmony_ci		WARN_ON(1);
7468c2ecf20Sopenharmony_ci		if (cpu_present(lcpu))
7478c2ecf20Sopenharmony_ci			goto out_present;
7488c2ecf20Sopenharmony_ci		else
7498c2ecf20Sopenharmony_ci			goto out;
7508c2ecf20Sopenharmony_ci	}
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_ci	nid = of_node_to_nid_single(cpu);
7538c2ecf20Sopenharmony_ci	of_node_put(cpu);
7548c2ecf20Sopenharmony_ci
7558c2ecf20Sopenharmony_ciout_present:
7568c2ecf20Sopenharmony_ci	if (nid < 0 || !node_possible(nid))
7578c2ecf20Sopenharmony_ci		nid = first_online_node;
7588c2ecf20Sopenharmony_ci
7598c2ecf20Sopenharmony_ci	/*
7608c2ecf20Sopenharmony_ci	 * Update for the first thread of the core. All threads of a core
7618c2ecf20Sopenharmony_ci	 * have to be part of the same node. This not only avoids querying
7628c2ecf20Sopenharmony_ci	 * for every other thread in the core, but always avoids a case
7638c2ecf20Sopenharmony_ci	 * where virtual node associativity change causes subsequent threads
7648c2ecf20Sopenharmony_ci	 * of a core to be associated with different nid. However if first
7658c2ecf20Sopenharmony_ci	 * thread is already online, expect it to have a valid mapping.
7668c2ecf20Sopenharmony_ci	 */
7678c2ecf20Sopenharmony_ci	if (fcpu != lcpu) {
7688c2ecf20Sopenharmony_ci		WARN_ON(cpu_online(fcpu));
7698c2ecf20Sopenharmony_ci		map_cpu_to_node(fcpu, nid);
7708c2ecf20Sopenharmony_ci	}
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	map_cpu_to_node(lcpu, nid);
7738c2ecf20Sopenharmony_ciout:
7748c2ecf20Sopenharmony_ci	return nid;
7758c2ecf20Sopenharmony_ci}
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_cistatic void verify_cpu_node_mapping(int cpu, int node)
7788c2ecf20Sopenharmony_ci{
7798c2ecf20Sopenharmony_ci	int base, sibling, i;
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_ci	/* Verify that all the threads in the core belong to the same node */
7828c2ecf20Sopenharmony_ci	base = cpu_first_thread_sibling(cpu);
7838c2ecf20Sopenharmony_ci
7848c2ecf20Sopenharmony_ci	for (i = 0; i < threads_per_core; i++) {
7858c2ecf20Sopenharmony_ci		sibling = base + i;
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci		if (sibling == cpu || cpu_is_offline(sibling))
7888c2ecf20Sopenharmony_ci			continue;
7898c2ecf20Sopenharmony_ci
7908c2ecf20Sopenharmony_ci		if (cpu_to_node(sibling) != node) {
7918c2ecf20Sopenharmony_ci			WARN(1, "CPU thread siblings %d and %d don't belong"
7928c2ecf20Sopenharmony_ci				" to the same node!\n", cpu, sibling);
7938c2ecf20Sopenharmony_ci			break;
7948c2ecf20Sopenharmony_ci		}
7958c2ecf20Sopenharmony_ci	}
7968c2ecf20Sopenharmony_ci}
7978c2ecf20Sopenharmony_ci
7988c2ecf20Sopenharmony_ci/* Must run before sched domains notifier. */
7998c2ecf20Sopenharmony_cistatic int ppc_numa_cpu_prepare(unsigned int cpu)
8008c2ecf20Sopenharmony_ci{
8018c2ecf20Sopenharmony_ci	int nid;
8028c2ecf20Sopenharmony_ci
8038c2ecf20Sopenharmony_ci	nid = numa_setup_cpu(cpu);
8048c2ecf20Sopenharmony_ci	verify_cpu_node_mapping(cpu, nid);
8058c2ecf20Sopenharmony_ci	return 0;
8068c2ecf20Sopenharmony_ci}
8078c2ecf20Sopenharmony_ci
8088c2ecf20Sopenharmony_cistatic int ppc_numa_cpu_dead(unsigned int cpu)
8098c2ecf20Sopenharmony_ci{
8108c2ecf20Sopenharmony_ci	return 0;
8118c2ecf20Sopenharmony_ci}
8128c2ecf20Sopenharmony_ci
8138c2ecf20Sopenharmony_ci/*
8148c2ecf20Sopenharmony_ci * Check and possibly modify a memory region to enforce the memory limit.
8158c2ecf20Sopenharmony_ci *
8168c2ecf20Sopenharmony_ci * Returns the size the region should have to enforce the memory limit.
8178c2ecf20Sopenharmony_ci * This will either be the original value of size, a truncated value,
8188c2ecf20Sopenharmony_ci * or zero. If the returned value of size is 0 the region should be
8198c2ecf20Sopenharmony_ci * discarded as it lies wholly above the memory limit.
8208c2ecf20Sopenharmony_ci */
8218c2ecf20Sopenharmony_cistatic unsigned long __init numa_enforce_memory_limit(unsigned long start,
8228c2ecf20Sopenharmony_ci						      unsigned long size)
8238c2ecf20Sopenharmony_ci{
8248c2ecf20Sopenharmony_ci	/*
8258c2ecf20Sopenharmony_ci	 * We use memblock_end_of_DRAM() in here instead of memory_limit because
8268c2ecf20Sopenharmony_ci	 * we've already adjusted it for the limit and it takes care of
8278c2ecf20Sopenharmony_ci	 * having memory holes below the limit.  Also, in the case of
8288c2ecf20Sopenharmony_ci	 * iommu_is_off, memory_limit is not set but is implicitly enforced.
8298c2ecf20Sopenharmony_ci	 */
8308c2ecf20Sopenharmony_ci
8318c2ecf20Sopenharmony_ci	if (start + size <= memblock_end_of_DRAM())
8328c2ecf20Sopenharmony_ci		return size;
8338c2ecf20Sopenharmony_ci
8348c2ecf20Sopenharmony_ci	if (start >= memblock_end_of_DRAM())
8358c2ecf20Sopenharmony_ci		return 0;
8368c2ecf20Sopenharmony_ci
8378c2ecf20Sopenharmony_ci	return memblock_end_of_DRAM() - start;
8388c2ecf20Sopenharmony_ci}
8398c2ecf20Sopenharmony_ci
8408c2ecf20Sopenharmony_ci/*
8418c2ecf20Sopenharmony_ci * Reads the counter for a given entry in
8428c2ecf20Sopenharmony_ci * linux,drconf-usable-memory property
8438c2ecf20Sopenharmony_ci */
8448c2ecf20Sopenharmony_cistatic inline int __init read_usm_ranges(const __be32 **usm)
8458c2ecf20Sopenharmony_ci{
8468c2ecf20Sopenharmony_ci	/*
8478c2ecf20Sopenharmony_ci	 * For each lmb in ibm,dynamic-memory a corresponding
8488c2ecf20Sopenharmony_ci	 * entry in linux,drconf-usable-memory property contains
8498c2ecf20Sopenharmony_ci	 * a counter followed by that many (base, size) duple.
8508c2ecf20Sopenharmony_ci	 * read the counter from linux,drconf-usable-memory
8518c2ecf20Sopenharmony_ci	 */
8528c2ecf20Sopenharmony_ci	return read_n_cells(n_mem_size_cells, usm);
8538c2ecf20Sopenharmony_ci}
8548c2ecf20Sopenharmony_ci
8558c2ecf20Sopenharmony_ci/*
8568c2ecf20Sopenharmony_ci * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
8578c2ecf20Sopenharmony_ci * node.  This assumes n_mem_{addr,size}_cells have been set.
8588c2ecf20Sopenharmony_ci */
8598c2ecf20Sopenharmony_cistatic int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
8608c2ecf20Sopenharmony_ci					const __be32 **usm,
8618c2ecf20Sopenharmony_ci					void *data)
8628c2ecf20Sopenharmony_ci{
8638c2ecf20Sopenharmony_ci	unsigned int ranges, is_kexec_kdump = 0;
8648c2ecf20Sopenharmony_ci	unsigned long base, size, sz;
8658c2ecf20Sopenharmony_ci	int nid;
8668c2ecf20Sopenharmony_ci
8678c2ecf20Sopenharmony_ci	/*
8688c2ecf20Sopenharmony_ci	 * Skip this block if the reserved bit is set in flags (0x80)
8698c2ecf20Sopenharmony_ci	 * or if the block is not assigned to this partition (0x8)
8708c2ecf20Sopenharmony_ci	 */
8718c2ecf20Sopenharmony_ci	if ((lmb->flags & DRCONF_MEM_RESERVED)
8728c2ecf20Sopenharmony_ci	    || !(lmb->flags & DRCONF_MEM_ASSIGNED))
8738c2ecf20Sopenharmony_ci		return 0;
8748c2ecf20Sopenharmony_ci
8758c2ecf20Sopenharmony_ci	if (*usm)
8768c2ecf20Sopenharmony_ci		is_kexec_kdump = 1;
8778c2ecf20Sopenharmony_ci
8788c2ecf20Sopenharmony_ci	base = lmb->base_addr;
8798c2ecf20Sopenharmony_ci	size = drmem_lmb_size();
8808c2ecf20Sopenharmony_ci	ranges = 1;
8818c2ecf20Sopenharmony_ci
8828c2ecf20Sopenharmony_ci	if (is_kexec_kdump) {
8838c2ecf20Sopenharmony_ci		ranges = read_usm_ranges(usm);
8848c2ecf20Sopenharmony_ci		if (!ranges) /* there are no (base, size) duple */
8858c2ecf20Sopenharmony_ci			return 0;
8868c2ecf20Sopenharmony_ci	}
8878c2ecf20Sopenharmony_ci
8888c2ecf20Sopenharmony_ci	do {
8898c2ecf20Sopenharmony_ci		if (is_kexec_kdump) {
8908c2ecf20Sopenharmony_ci			base = read_n_cells(n_mem_addr_cells, usm);
8918c2ecf20Sopenharmony_ci			size = read_n_cells(n_mem_size_cells, usm);
8928c2ecf20Sopenharmony_ci		}
8938c2ecf20Sopenharmony_ci
8948c2ecf20Sopenharmony_ci		nid = get_nid_and_numa_distance(lmb);
8958c2ecf20Sopenharmony_ci		fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
8968c2ecf20Sopenharmony_ci					  &nid);
8978c2ecf20Sopenharmony_ci		node_set_online(nid);
8988c2ecf20Sopenharmony_ci		sz = numa_enforce_memory_limit(base, size);
8998c2ecf20Sopenharmony_ci		if (sz)
9008c2ecf20Sopenharmony_ci			memblock_set_node(base, sz, &memblock.memory, nid);
9018c2ecf20Sopenharmony_ci	} while (--ranges);
9028c2ecf20Sopenharmony_ci
9038c2ecf20Sopenharmony_ci	return 0;
9048c2ecf20Sopenharmony_ci}
9058c2ecf20Sopenharmony_ci
9068c2ecf20Sopenharmony_cistatic int __init parse_numa_properties(void)
9078c2ecf20Sopenharmony_ci{
9088c2ecf20Sopenharmony_ci	struct device_node *memory;
9098c2ecf20Sopenharmony_ci	int default_nid = 0;
9108c2ecf20Sopenharmony_ci	unsigned long i;
9118c2ecf20Sopenharmony_ci	const __be32 *associativity;
9128c2ecf20Sopenharmony_ci
9138c2ecf20Sopenharmony_ci	if (numa_enabled == 0) {
9148c2ecf20Sopenharmony_ci		printk(KERN_WARNING "NUMA disabled by user\n");
9158c2ecf20Sopenharmony_ci		return -1;
9168c2ecf20Sopenharmony_ci	}
9178c2ecf20Sopenharmony_ci
9188c2ecf20Sopenharmony_ci	primary_domain_index = find_primary_domain_index();
9198c2ecf20Sopenharmony_ci
9208c2ecf20Sopenharmony_ci	if (primary_domain_index < 0) {
9218c2ecf20Sopenharmony_ci		/*
9228c2ecf20Sopenharmony_ci		 * if we fail to parse primary_domain_index from device tree
9238c2ecf20Sopenharmony_ci		 * mark the numa disabled, boot with numa disabled.
9248c2ecf20Sopenharmony_ci		 */
9258c2ecf20Sopenharmony_ci		numa_enabled = false;
9268c2ecf20Sopenharmony_ci		return primary_domain_index;
9278c2ecf20Sopenharmony_ci	}
9288c2ecf20Sopenharmony_ci
9298c2ecf20Sopenharmony_ci	dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index);
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_ci	/*
9328c2ecf20Sopenharmony_ci	 * If it is FORM2 initialize the distance table here.
9338c2ecf20Sopenharmony_ci	 */
9348c2ecf20Sopenharmony_ci	if (affinity_form == FORM2_AFFINITY)
9358c2ecf20Sopenharmony_ci		initialize_form2_numa_distance_lookup_table();
9368c2ecf20Sopenharmony_ci
9378c2ecf20Sopenharmony_ci	/*
9388c2ecf20Sopenharmony_ci	 * Even though we connect cpus to numa domains later in SMP
9398c2ecf20Sopenharmony_ci	 * init, we need to know the node ids now. This is because
9408c2ecf20Sopenharmony_ci	 * each node to be onlined must have NODE_DATA etc backing it.
9418c2ecf20Sopenharmony_ci	 */
9428c2ecf20Sopenharmony_ci	for_each_present_cpu(i) {
9438c2ecf20Sopenharmony_ci		__be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
9448c2ecf20Sopenharmony_ci		struct device_node *cpu;
9458c2ecf20Sopenharmony_ci		int nid = NUMA_NO_NODE;
9468c2ecf20Sopenharmony_ci
9478c2ecf20Sopenharmony_ci		memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
9488c2ecf20Sopenharmony_ci
9498c2ecf20Sopenharmony_ci		if (__vphn_get_associativity(i, vphn_assoc) == 0) {
9508c2ecf20Sopenharmony_ci			nid = associativity_to_nid(vphn_assoc);
9518c2ecf20Sopenharmony_ci			initialize_form1_numa_distance(vphn_assoc);
9528c2ecf20Sopenharmony_ci		} else {
9538c2ecf20Sopenharmony_ci
9548c2ecf20Sopenharmony_ci			/*
9558c2ecf20Sopenharmony_ci			 * Don't fall back to default_nid yet -- we will plug
9568c2ecf20Sopenharmony_ci			 * cpus into nodes once the memory scan has discovered
9578c2ecf20Sopenharmony_ci			 * the topology.
9588c2ecf20Sopenharmony_ci			 */
9598c2ecf20Sopenharmony_ci			cpu = of_get_cpu_node(i, NULL);
9608c2ecf20Sopenharmony_ci			BUG_ON(!cpu);
9618c2ecf20Sopenharmony_ci
9628c2ecf20Sopenharmony_ci			associativity = of_get_associativity(cpu);
9638c2ecf20Sopenharmony_ci			if (associativity) {
9648c2ecf20Sopenharmony_ci				nid = associativity_to_nid(associativity);
9658c2ecf20Sopenharmony_ci				initialize_form1_numa_distance(associativity);
9668c2ecf20Sopenharmony_ci			}
9678c2ecf20Sopenharmony_ci			of_node_put(cpu);
9688c2ecf20Sopenharmony_ci		}
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_ci		/* node_set_online() is an UB if 'nid' is negative */
9718c2ecf20Sopenharmony_ci		if (likely(nid >= 0))
9728c2ecf20Sopenharmony_ci			node_set_online(nid);
9738c2ecf20Sopenharmony_ci	}
9748c2ecf20Sopenharmony_ci
9758c2ecf20Sopenharmony_ci	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
9768c2ecf20Sopenharmony_ci
9778c2ecf20Sopenharmony_ci	for_each_node_by_type(memory, "memory") {
9788c2ecf20Sopenharmony_ci		unsigned long start;
9798c2ecf20Sopenharmony_ci		unsigned long size;
9808c2ecf20Sopenharmony_ci		int nid;
9818c2ecf20Sopenharmony_ci		int ranges;
9828c2ecf20Sopenharmony_ci		const __be32 *memcell_buf;
9838c2ecf20Sopenharmony_ci		unsigned int len;
9848c2ecf20Sopenharmony_ci
9858c2ecf20Sopenharmony_ci		memcell_buf = of_get_property(memory,
9868c2ecf20Sopenharmony_ci			"linux,usable-memory", &len);
9878c2ecf20Sopenharmony_ci		if (!memcell_buf || len <= 0)
9888c2ecf20Sopenharmony_ci			memcell_buf = of_get_property(memory, "reg", &len);
9898c2ecf20Sopenharmony_ci		if (!memcell_buf || len <= 0)
9908c2ecf20Sopenharmony_ci			continue;
9918c2ecf20Sopenharmony_ci
9928c2ecf20Sopenharmony_ci		/* ranges in cell */
9938c2ecf20Sopenharmony_ci		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
9948c2ecf20Sopenharmony_cinew_range:
9958c2ecf20Sopenharmony_ci		/* these are order-sensitive, and modify the buffer pointer */
9968c2ecf20Sopenharmony_ci		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
9978c2ecf20Sopenharmony_ci		size = read_n_cells(n_mem_size_cells, &memcell_buf);
9988c2ecf20Sopenharmony_ci
9998c2ecf20Sopenharmony_ci		/*
10008c2ecf20Sopenharmony_ci		 * Assumption: either all memory nodes or none will
10018c2ecf20Sopenharmony_ci		 * have associativity properties.  If none, then
10028c2ecf20Sopenharmony_ci		 * everything goes to default_nid.
10038c2ecf20Sopenharmony_ci		 */
10048c2ecf20Sopenharmony_ci		associativity = of_get_associativity(memory);
10058c2ecf20Sopenharmony_ci		if (associativity) {
10068c2ecf20Sopenharmony_ci			nid = associativity_to_nid(associativity);
10078c2ecf20Sopenharmony_ci			initialize_form1_numa_distance(associativity);
10088c2ecf20Sopenharmony_ci		} else
10098c2ecf20Sopenharmony_ci			nid = default_nid;
10108c2ecf20Sopenharmony_ci
10118c2ecf20Sopenharmony_ci		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
10128c2ecf20Sopenharmony_ci		node_set_online(nid);
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_ci		size = numa_enforce_memory_limit(start, size);
10158c2ecf20Sopenharmony_ci		if (size)
10168c2ecf20Sopenharmony_ci			memblock_set_node(start, size, &memblock.memory, nid);
10178c2ecf20Sopenharmony_ci
10188c2ecf20Sopenharmony_ci		if (--ranges)
10198c2ecf20Sopenharmony_ci			goto new_range;
10208c2ecf20Sopenharmony_ci	}
10218c2ecf20Sopenharmony_ci
10228c2ecf20Sopenharmony_ci	/*
10238c2ecf20Sopenharmony_ci	 * Now do the same thing for each MEMBLOCK listed in the
10248c2ecf20Sopenharmony_ci	 * ibm,dynamic-memory property in the
10258c2ecf20Sopenharmony_ci	 * ibm,dynamic-reconfiguration-memory node.
10268c2ecf20Sopenharmony_ci	 */
10278c2ecf20Sopenharmony_ci	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
10288c2ecf20Sopenharmony_ci	if (memory) {
10298c2ecf20Sopenharmony_ci		walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb);
10308c2ecf20Sopenharmony_ci		of_node_put(memory);
10318c2ecf20Sopenharmony_ci	}
10328c2ecf20Sopenharmony_ci
10338c2ecf20Sopenharmony_ci	return 0;
10348c2ecf20Sopenharmony_ci}
10358c2ecf20Sopenharmony_ci
10368c2ecf20Sopenharmony_cistatic void __init setup_nonnuma(void)
10378c2ecf20Sopenharmony_ci{
10388c2ecf20Sopenharmony_ci	unsigned long top_of_ram = memblock_end_of_DRAM();
10398c2ecf20Sopenharmony_ci	unsigned long total_ram = memblock_phys_mem_size();
10408c2ecf20Sopenharmony_ci	unsigned long start_pfn, end_pfn;
10418c2ecf20Sopenharmony_ci	unsigned int nid = 0;
10428c2ecf20Sopenharmony_ci	int i;
10438c2ecf20Sopenharmony_ci
10448c2ecf20Sopenharmony_ci	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
10458c2ecf20Sopenharmony_ci	       top_of_ram, total_ram);
10468c2ecf20Sopenharmony_ci	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
10478c2ecf20Sopenharmony_ci	       (top_of_ram - total_ram) >> 20);
10488c2ecf20Sopenharmony_ci
10498c2ecf20Sopenharmony_ci	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
10508c2ecf20Sopenharmony_ci		fake_numa_create_new_node(end_pfn, &nid);
10518c2ecf20Sopenharmony_ci		memblock_set_node(PFN_PHYS(start_pfn),
10528c2ecf20Sopenharmony_ci				  PFN_PHYS(end_pfn - start_pfn),
10538c2ecf20Sopenharmony_ci				  &memblock.memory, nid);
10548c2ecf20Sopenharmony_ci		node_set_online(nid);
10558c2ecf20Sopenharmony_ci	}
10568c2ecf20Sopenharmony_ci}
10578c2ecf20Sopenharmony_ci
10588c2ecf20Sopenharmony_civoid __init dump_numa_cpu_topology(void)
10598c2ecf20Sopenharmony_ci{
10608c2ecf20Sopenharmony_ci	unsigned int node;
10618c2ecf20Sopenharmony_ci	unsigned int cpu, count;
10628c2ecf20Sopenharmony_ci
10638c2ecf20Sopenharmony_ci	if (!numa_enabled)
10648c2ecf20Sopenharmony_ci		return;
10658c2ecf20Sopenharmony_ci
10668c2ecf20Sopenharmony_ci	for_each_online_node(node) {
10678c2ecf20Sopenharmony_ci		pr_info("Node %d CPUs:", node);
10688c2ecf20Sopenharmony_ci
10698c2ecf20Sopenharmony_ci		count = 0;
10708c2ecf20Sopenharmony_ci		/*
10718c2ecf20Sopenharmony_ci		 * If we used a CPU iterator here we would miss printing
10728c2ecf20Sopenharmony_ci		 * the holes in the cpumap.
10738c2ecf20Sopenharmony_ci		 */
10748c2ecf20Sopenharmony_ci		for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
10758c2ecf20Sopenharmony_ci			if (cpumask_test_cpu(cpu,
10768c2ecf20Sopenharmony_ci					node_to_cpumask_map[node])) {
10778c2ecf20Sopenharmony_ci				if (count == 0)
10788c2ecf20Sopenharmony_ci					pr_cont(" %u", cpu);
10798c2ecf20Sopenharmony_ci				++count;
10808c2ecf20Sopenharmony_ci			} else {
10818c2ecf20Sopenharmony_ci				if (count > 1)
10828c2ecf20Sopenharmony_ci					pr_cont("-%u", cpu - 1);
10838c2ecf20Sopenharmony_ci				count = 0;
10848c2ecf20Sopenharmony_ci			}
10858c2ecf20Sopenharmony_ci		}
10868c2ecf20Sopenharmony_ci
10878c2ecf20Sopenharmony_ci		if (count > 1)
10888c2ecf20Sopenharmony_ci			pr_cont("-%u", nr_cpu_ids - 1);
10898c2ecf20Sopenharmony_ci		pr_cont("\n");
10908c2ecf20Sopenharmony_ci	}
10918c2ecf20Sopenharmony_ci}
10928c2ecf20Sopenharmony_ci
10938c2ecf20Sopenharmony_ci/* Initialize NODE_DATA for a node on the local memory */
10948c2ecf20Sopenharmony_cistatic void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
10958c2ecf20Sopenharmony_ci{
10968c2ecf20Sopenharmony_ci	u64 spanned_pages = end_pfn - start_pfn;
10978c2ecf20Sopenharmony_ci	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
10988c2ecf20Sopenharmony_ci	u64 nd_pa;
10998c2ecf20Sopenharmony_ci	void *nd;
11008c2ecf20Sopenharmony_ci	int tnid;
11018c2ecf20Sopenharmony_ci
11028c2ecf20Sopenharmony_ci	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
11038c2ecf20Sopenharmony_ci	if (!nd_pa)
11048c2ecf20Sopenharmony_ci		panic("Cannot allocate %zu bytes for node %d data\n",
11058c2ecf20Sopenharmony_ci		      nd_size, nid);
11068c2ecf20Sopenharmony_ci
11078c2ecf20Sopenharmony_ci	nd = __va(nd_pa);
11088c2ecf20Sopenharmony_ci
11098c2ecf20Sopenharmony_ci	/* report and initialize */
11108c2ecf20Sopenharmony_ci	pr_info("  NODE_DATA [mem %#010Lx-%#010Lx]\n",
11118c2ecf20Sopenharmony_ci		nd_pa, nd_pa + nd_size - 1);
11128c2ecf20Sopenharmony_ci	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
11138c2ecf20Sopenharmony_ci	if (tnid != nid)
11148c2ecf20Sopenharmony_ci		pr_info("    NODE_DATA(%d) on node %d\n", nid, tnid);
11158c2ecf20Sopenharmony_ci
11168c2ecf20Sopenharmony_ci	node_data[nid] = nd;
11178c2ecf20Sopenharmony_ci	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
11188c2ecf20Sopenharmony_ci	NODE_DATA(nid)->node_id = nid;
11198c2ecf20Sopenharmony_ci	NODE_DATA(nid)->node_start_pfn = start_pfn;
11208c2ecf20Sopenharmony_ci	NODE_DATA(nid)->node_spanned_pages = spanned_pages;
11218c2ecf20Sopenharmony_ci}
11228c2ecf20Sopenharmony_ci
11238c2ecf20Sopenharmony_cistatic void __init find_possible_nodes(void)
11248c2ecf20Sopenharmony_ci{
11258c2ecf20Sopenharmony_ci	struct device_node *rtas;
11268c2ecf20Sopenharmony_ci	const __be32 *domains = NULL;
11278c2ecf20Sopenharmony_ci	int prop_length, max_nodes;
11288c2ecf20Sopenharmony_ci	u32 i;
11298c2ecf20Sopenharmony_ci
11308c2ecf20Sopenharmony_ci	if (!numa_enabled)
11318c2ecf20Sopenharmony_ci		return;
11328c2ecf20Sopenharmony_ci
11338c2ecf20Sopenharmony_ci	rtas = of_find_node_by_path("/rtas");
11348c2ecf20Sopenharmony_ci	if (!rtas)
11358c2ecf20Sopenharmony_ci		return;
11368c2ecf20Sopenharmony_ci
11378c2ecf20Sopenharmony_ci	/*
11388c2ecf20Sopenharmony_ci	 * ibm,current-associativity-domains is a fairly recent property. If
11398c2ecf20Sopenharmony_ci	 * it doesn't exist, then fallback on ibm,max-associativity-domains.
11408c2ecf20Sopenharmony_ci	 * Current denotes what the platform can support compared to max
11418c2ecf20Sopenharmony_ci	 * which denotes what the Hypervisor can support.
11428c2ecf20Sopenharmony_ci	 *
11438c2ecf20Sopenharmony_ci	 * If the LPAR is migratable, new nodes might be activated after a LPM,
11448c2ecf20Sopenharmony_ci	 * so we should consider the max number in that case.
11458c2ecf20Sopenharmony_ci	 */
11468c2ecf20Sopenharmony_ci	if (!of_get_property(of_root, "ibm,migratable-partition", NULL))
11478c2ecf20Sopenharmony_ci		domains = of_get_property(rtas,
11488c2ecf20Sopenharmony_ci					  "ibm,current-associativity-domains",
11498c2ecf20Sopenharmony_ci					  &prop_length);
11508c2ecf20Sopenharmony_ci	if (!domains) {
11518c2ecf20Sopenharmony_ci		domains = of_get_property(rtas, "ibm,max-associativity-domains",
11528c2ecf20Sopenharmony_ci					&prop_length);
11538c2ecf20Sopenharmony_ci		if (!domains)
11548c2ecf20Sopenharmony_ci			goto out;
11558c2ecf20Sopenharmony_ci	}
11568c2ecf20Sopenharmony_ci
11578c2ecf20Sopenharmony_ci	max_nodes = of_read_number(&domains[primary_domain_index], 1);
11588c2ecf20Sopenharmony_ci	pr_info("Partition configured for %d NUMA nodes.\n", max_nodes);
11598c2ecf20Sopenharmony_ci
11608c2ecf20Sopenharmony_ci	for (i = 0; i < max_nodes; i++) {
11618c2ecf20Sopenharmony_ci		if (!node_possible(i))
11628c2ecf20Sopenharmony_ci			node_set(i, node_possible_map);
11638c2ecf20Sopenharmony_ci	}
11648c2ecf20Sopenharmony_ci
11658c2ecf20Sopenharmony_ci	prop_length /= sizeof(int);
11668c2ecf20Sopenharmony_ci	if (prop_length > primary_domain_index + 2)
11678c2ecf20Sopenharmony_ci		coregroup_enabled = 1;
11688c2ecf20Sopenharmony_ci
11698c2ecf20Sopenharmony_ciout:
11708c2ecf20Sopenharmony_ci	of_node_put(rtas);
11718c2ecf20Sopenharmony_ci}
11728c2ecf20Sopenharmony_ci
11738c2ecf20Sopenharmony_civoid __init mem_topology_setup(void)
11748c2ecf20Sopenharmony_ci{
11758c2ecf20Sopenharmony_ci	int cpu;
11768c2ecf20Sopenharmony_ci
11778c2ecf20Sopenharmony_ci	/*
11788c2ecf20Sopenharmony_ci	 * Linux/mm assumes node 0 to be online at boot. However this is not
11798c2ecf20Sopenharmony_ci	 * true on PowerPC, where node 0 is similar to any other node, it
11808c2ecf20Sopenharmony_ci	 * could be cpuless, memoryless node. So force node 0 to be offline
11818c2ecf20Sopenharmony_ci	 * for now. This will prevent cpuless, memoryless node 0 showing up
11828c2ecf20Sopenharmony_ci	 * unnecessarily as online. If a node has cpus or memory that need
11838c2ecf20Sopenharmony_ci	 * to be online, then node will anyway be marked online.
11848c2ecf20Sopenharmony_ci	 */
11858c2ecf20Sopenharmony_ci	node_set_offline(0);
11868c2ecf20Sopenharmony_ci
11878c2ecf20Sopenharmony_ci	if (parse_numa_properties())
11888c2ecf20Sopenharmony_ci		setup_nonnuma();
11898c2ecf20Sopenharmony_ci
11908c2ecf20Sopenharmony_ci	/*
11918c2ecf20Sopenharmony_ci	 * Modify the set of possible NUMA nodes to reflect information
11928c2ecf20Sopenharmony_ci	 * available about the set of online nodes, and the set of nodes
11938c2ecf20Sopenharmony_ci	 * that we expect to make use of for this platform's affinity
11948c2ecf20Sopenharmony_ci	 * calculations.
11958c2ecf20Sopenharmony_ci	 */
11968c2ecf20Sopenharmony_ci	nodes_and(node_possible_map, node_possible_map, node_online_map);
11978c2ecf20Sopenharmony_ci
11988c2ecf20Sopenharmony_ci	find_possible_nodes();
11998c2ecf20Sopenharmony_ci
12008c2ecf20Sopenharmony_ci	setup_node_to_cpumask_map();
12018c2ecf20Sopenharmony_ci
12028c2ecf20Sopenharmony_ci	reset_numa_cpu_lookup_table();
12038c2ecf20Sopenharmony_ci
12048c2ecf20Sopenharmony_ci	for_each_possible_cpu(cpu) {
12058c2ecf20Sopenharmony_ci		/*
12068c2ecf20Sopenharmony_ci		 * Powerpc with CONFIG_NUMA always used to have a node 0,
12078c2ecf20Sopenharmony_ci		 * even if it was memoryless or cpuless. For all cpus that
12088c2ecf20Sopenharmony_ci		 * are possible but not present, cpu_to_node() would point
12098c2ecf20Sopenharmony_ci		 * to node 0. To remove a cpuless, memoryless dummy node,
12108c2ecf20Sopenharmony_ci		 * powerpc need to make sure all possible but not present
12118c2ecf20Sopenharmony_ci		 * cpu_to_node are set to a proper node.
12128c2ecf20Sopenharmony_ci		 */
12138c2ecf20Sopenharmony_ci		numa_setup_cpu(cpu);
12148c2ecf20Sopenharmony_ci	}
12158c2ecf20Sopenharmony_ci}
12168c2ecf20Sopenharmony_ci
12178c2ecf20Sopenharmony_civoid __init initmem_init(void)
12188c2ecf20Sopenharmony_ci{
12198c2ecf20Sopenharmony_ci	int nid;
12208c2ecf20Sopenharmony_ci
12218c2ecf20Sopenharmony_ci	max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
12228c2ecf20Sopenharmony_ci	max_pfn = max_low_pfn;
12238c2ecf20Sopenharmony_ci
12248c2ecf20Sopenharmony_ci	memblock_dump_all();
12258c2ecf20Sopenharmony_ci
12268c2ecf20Sopenharmony_ci	for_each_online_node(nid) {
12278c2ecf20Sopenharmony_ci		unsigned long start_pfn, end_pfn;
12288c2ecf20Sopenharmony_ci
12298c2ecf20Sopenharmony_ci		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
12308c2ecf20Sopenharmony_ci		setup_node_data(nid, start_pfn, end_pfn);
12318c2ecf20Sopenharmony_ci	}
12328c2ecf20Sopenharmony_ci
12338c2ecf20Sopenharmony_ci	sparse_init();
12348c2ecf20Sopenharmony_ci
12358c2ecf20Sopenharmony_ci	/*
12368c2ecf20Sopenharmony_ci	 * We need the numa_cpu_lookup_table to be accurate for all CPUs,
12378c2ecf20Sopenharmony_ci	 * even before we online them, so that we can use cpu_to_{node,mem}
12388c2ecf20Sopenharmony_ci	 * early in boot, cf. smp_prepare_cpus().
12398c2ecf20Sopenharmony_ci	 * _nocalls() + manual invocation is used because cpuhp is not yet
12408c2ecf20Sopenharmony_ci	 * initialized for the boot CPU.
12418c2ecf20Sopenharmony_ci	 */
12428c2ecf20Sopenharmony_ci	cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
12438c2ecf20Sopenharmony_ci				  ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
12448c2ecf20Sopenharmony_ci}
12458c2ecf20Sopenharmony_ci
12468c2ecf20Sopenharmony_cistatic int __init early_numa(char *p)
12478c2ecf20Sopenharmony_ci{
12488c2ecf20Sopenharmony_ci	if (!p)
12498c2ecf20Sopenharmony_ci		return 0;
12508c2ecf20Sopenharmony_ci
12518c2ecf20Sopenharmony_ci	if (strstr(p, "off"))
12528c2ecf20Sopenharmony_ci		numa_enabled = 0;
12538c2ecf20Sopenharmony_ci
12548c2ecf20Sopenharmony_ci	if (strstr(p, "debug"))
12558c2ecf20Sopenharmony_ci		numa_debug = 1;
12568c2ecf20Sopenharmony_ci
12578c2ecf20Sopenharmony_ci	p = strstr(p, "fake=");
12588c2ecf20Sopenharmony_ci	if (p)
12598c2ecf20Sopenharmony_ci		cmdline = p + strlen("fake=");
12608c2ecf20Sopenharmony_ci
12618c2ecf20Sopenharmony_ci	return 0;
12628c2ecf20Sopenharmony_ci}
12638c2ecf20Sopenharmony_ciearly_param("numa", early_numa);
12648c2ecf20Sopenharmony_ci
12658c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG
12668c2ecf20Sopenharmony_ci/*
12678c2ecf20Sopenharmony_ci * Find the node associated with a hot added memory section for
12688c2ecf20Sopenharmony_ci * memory represented in the device tree by the property
12698c2ecf20Sopenharmony_ci * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
12708c2ecf20Sopenharmony_ci */
12718c2ecf20Sopenharmony_cistatic int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
12728c2ecf20Sopenharmony_ci{
12738c2ecf20Sopenharmony_ci	struct drmem_lmb *lmb;
12748c2ecf20Sopenharmony_ci	unsigned long lmb_size;
12758c2ecf20Sopenharmony_ci	int nid = NUMA_NO_NODE;
12768c2ecf20Sopenharmony_ci
12778c2ecf20Sopenharmony_ci	lmb_size = drmem_lmb_size();
12788c2ecf20Sopenharmony_ci
12798c2ecf20Sopenharmony_ci	for_each_drmem_lmb(lmb) {
12808c2ecf20Sopenharmony_ci		/* skip this block if it is reserved or not assigned to
12818c2ecf20Sopenharmony_ci		 * this partition */
12828c2ecf20Sopenharmony_ci		if ((lmb->flags & DRCONF_MEM_RESERVED)
12838c2ecf20Sopenharmony_ci		    || !(lmb->flags & DRCONF_MEM_ASSIGNED))
12848c2ecf20Sopenharmony_ci			continue;
12858c2ecf20Sopenharmony_ci
12868c2ecf20Sopenharmony_ci		if ((scn_addr < lmb->base_addr)
12878c2ecf20Sopenharmony_ci		    || (scn_addr >= (lmb->base_addr + lmb_size)))
12888c2ecf20Sopenharmony_ci			continue;
12898c2ecf20Sopenharmony_ci
12908c2ecf20Sopenharmony_ci		nid = of_drconf_to_nid_single(lmb);
12918c2ecf20Sopenharmony_ci		break;
12928c2ecf20Sopenharmony_ci	}
12938c2ecf20Sopenharmony_ci
12948c2ecf20Sopenharmony_ci	return nid;
12958c2ecf20Sopenharmony_ci}
12968c2ecf20Sopenharmony_ci
12978c2ecf20Sopenharmony_ci/*
12988c2ecf20Sopenharmony_ci * Find the node associated with a hot added memory section for memory
12998c2ecf20Sopenharmony_ci * represented in the device tree as a node (i.e. memory@XXXX) for
13008c2ecf20Sopenharmony_ci * each memblock.
13018c2ecf20Sopenharmony_ci */
13028c2ecf20Sopenharmony_cistatic int hot_add_node_scn_to_nid(unsigned long scn_addr)
13038c2ecf20Sopenharmony_ci{
13048c2ecf20Sopenharmony_ci	struct device_node *memory;
13058c2ecf20Sopenharmony_ci	int nid = NUMA_NO_NODE;
13068c2ecf20Sopenharmony_ci
13078c2ecf20Sopenharmony_ci	for_each_node_by_type(memory, "memory") {
13088c2ecf20Sopenharmony_ci		unsigned long start, size;
13098c2ecf20Sopenharmony_ci		int ranges;
13108c2ecf20Sopenharmony_ci		const __be32 *memcell_buf;
13118c2ecf20Sopenharmony_ci		unsigned int len;
13128c2ecf20Sopenharmony_ci
13138c2ecf20Sopenharmony_ci		memcell_buf = of_get_property(memory, "reg", &len);
13148c2ecf20Sopenharmony_ci		if (!memcell_buf || len <= 0)
13158c2ecf20Sopenharmony_ci			continue;
13168c2ecf20Sopenharmony_ci
13178c2ecf20Sopenharmony_ci		/* ranges in cell */
13188c2ecf20Sopenharmony_ci		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
13198c2ecf20Sopenharmony_ci
13208c2ecf20Sopenharmony_ci		while (ranges--) {
13218c2ecf20Sopenharmony_ci			start = read_n_cells(n_mem_addr_cells, &memcell_buf);
13228c2ecf20Sopenharmony_ci			size = read_n_cells(n_mem_size_cells, &memcell_buf);
13238c2ecf20Sopenharmony_ci
13248c2ecf20Sopenharmony_ci			if ((scn_addr < start) || (scn_addr >= (start + size)))
13258c2ecf20Sopenharmony_ci				continue;
13268c2ecf20Sopenharmony_ci
13278c2ecf20Sopenharmony_ci			nid = of_node_to_nid_single(memory);
13288c2ecf20Sopenharmony_ci			break;
13298c2ecf20Sopenharmony_ci		}
13308c2ecf20Sopenharmony_ci
13318c2ecf20Sopenharmony_ci		if (nid >= 0)
13328c2ecf20Sopenharmony_ci			break;
13338c2ecf20Sopenharmony_ci	}
13348c2ecf20Sopenharmony_ci
13358c2ecf20Sopenharmony_ci	of_node_put(memory);
13368c2ecf20Sopenharmony_ci
13378c2ecf20Sopenharmony_ci	return nid;
13388c2ecf20Sopenharmony_ci}
13398c2ecf20Sopenharmony_ci
13408c2ecf20Sopenharmony_ci/*
13418c2ecf20Sopenharmony_ci * Find the node associated with a hot added memory section.  Section
13428c2ecf20Sopenharmony_ci * corresponds to a SPARSEMEM section, not an MEMBLOCK.  It is assumed that
13438c2ecf20Sopenharmony_ci * sections are fully contained within a single MEMBLOCK.
13448c2ecf20Sopenharmony_ci */
13458c2ecf20Sopenharmony_ciint hot_add_scn_to_nid(unsigned long scn_addr)
13468c2ecf20Sopenharmony_ci{
13478c2ecf20Sopenharmony_ci	struct device_node *memory = NULL;
13488c2ecf20Sopenharmony_ci	int nid;
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_ci	if (!numa_enabled)
13518c2ecf20Sopenharmony_ci		return first_online_node;
13528c2ecf20Sopenharmony_ci
13538c2ecf20Sopenharmony_ci	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
13548c2ecf20Sopenharmony_ci	if (memory) {
13558c2ecf20Sopenharmony_ci		nid = hot_add_drconf_scn_to_nid(scn_addr);
13568c2ecf20Sopenharmony_ci		of_node_put(memory);
13578c2ecf20Sopenharmony_ci	} else {
13588c2ecf20Sopenharmony_ci		nid = hot_add_node_scn_to_nid(scn_addr);
13598c2ecf20Sopenharmony_ci	}
13608c2ecf20Sopenharmony_ci
13618c2ecf20Sopenharmony_ci	if (nid < 0 || !node_possible(nid))
13628c2ecf20Sopenharmony_ci		nid = first_online_node;
13638c2ecf20Sopenharmony_ci
13648c2ecf20Sopenharmony_ci	return nid;
13658c2ecf20Sopenharmony_ci}
13668c2ecf20Sopenharmony_ci
13678c2ecf20Sopenharmony_cistatic u64 hot_add_drconf_memory_max(void)
13688c2ecf20Sopenharmony_ci{
13698c2ecf20Sopenharmony_ci	struct device_node *memory = NULL;
13708c2ecf20Sopenharmony_ci	struct device_node *dn = NULL;
13718c2ecf20Sopenharmony_ci	const __be64 *lrdr = NULL;
13728c2ecf20Sopenharmony_ci
13738c2ecf20Sopenharmony_ci	dn = of_find_node_by_path("/rtas");
13748c2ecf20Sopenharmony_ci	if (dn) {
13758c2ecf20Sopenharmony_ci		lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL);
13768c2ecf20Sopenharmony_ci		of_node_put(dn);
13778c2ecf20Sopenharmony_ci		if (lrdr)
13788c2ecf20Sopenharmony_ci			return be64_to_cpup(lrdr);
13798c2ecf20Sopenharmony_ci	}
13808c2ecf20Sopenharmony_ci
13818c2ecf20Sopenharmony_ci	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
13828c2ecf20Sopenharmony_ci	if (memory) {
13838c2ecf20Sopenharmony_ci		of_node_put(memory);
13848c2ecf20Sopenharmony_ci		return drmem_lmb_memory_max();
13858c2ecf20Sopenharmony_ci	}
13868c2ecf20Sopenharmony_ci	return 0;
13878c2ecf20Sopenharmony_ci}
13888c2ecf20Sopenharmony_ci
13898c2ecf20Sopenharmony_ci/*
13908c2ecf20Sopenharmony_ci * memory_hotplug_max - return max address of memory that may be added
13918c2ecf20Sopenharmony_ci *
13928c2ecf20Sopenharmony_ci * This is currently only used on systems that support drconfig memory
13938c2ecf20Sopenharmony_ci * hotplug.
13948c2ecf20Sopenharmony_ci */
13958c2ecf20Sopenharmony_ciu64 memory_hotplug_max(void)
13968c2ecf20Sopenharmony_ci{
13978c2ecf20Sopenharmony_ci        return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
13988c2ecf20Sopenharmony_ci}
13998c2ecf20Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG */
14008c2ecf20Sopenharmony_ci
14018c2ecf20Sopenharmony_ci/* Virtual Processor Home Node (VPHN) support */
14028c2ecf20Sopenharmony_ci#ifdef CONFIG_PPC_SPLPAR
14038c2ecf20Sopenharmony_cistatic int topology_inited;
14048c2ecf20Sopenharmony_ci
14058c2ecf20Sopenharmony_ci/*
14068c2ecf20Sopenharmony_ci * Retrieve the new associativity information for a virtual processor's
14078c2ecf20Sopenharmony_ci * home node.
14088c2ecf20Sopenharmony_ci */
14098c2ecf20Sopenharmony_cistatic long vphn_get_associativity(unsigned long cpu,
14108c2ecf20Sopenharmony_ci					__be32 *associativity)
14118c2ecf20Sopenharmony_ci{
14128c2ecf20Sopenharmony_ci	long rc;
14138c2ecf20Sopenharmony_ci
14148c2ecf20Sopenharmony_ci	rc = hcall_vphn(get_hard_smp_processor_id(cpu),
14158c2ecf20Sopenharmony_ci				VPHN_FLAG_VCPU, associativity);
14168c2ecf20Sopenharmony_ci
14178c2ecf20Sopenharmony_ci	switch (rc) {
14188c2ecf20Sopenharmony_ci	case H_SUCCESS:
14198c2ecf20Sopenharmony_ci		dbg("VPHN hcall succeeded. Reset polling...\n");
14208c2ecf20Sopenharmony_ci		goto out;
14218c2ecf20Sopenharmony_ci
14228c2ecf20Sopenharmony_ci	case H_FUNCTION:
14238c2ecf20Sopenharmony_ci		pr_err_ratelimited("VPHN unsupported. Disabling polling...\n");
14248c2ecf20Sopenharmony_ci		break;
14258c2ecf20Sopenharmony_ci	case H_HARDWARE:
14268c2ecf20Sopenharmony_ci		pr_err_ratelimited("hcall_vphn() experienced a hardware fault "
14278c2ecf20Sopenharmony_ci			"preventing VPHN. Disabling polling...\n");
14288c2ecf20Sopenharmony_ci		break;
14298c2ecf20Sopenharmony_ci	case H_PARAMETER:
14308c2ecf20Sopenharmony_ci		pr_err_ratelimited("hcall_vphn() was passed an invalid parameter. "
14318c2ecf20Sopenharmony_ci			"Disabling polling...\n");
14328c2ecf20Sopenharmony_ci		break;
14338c2ecf20Sopenharmony_ci	default:
14348c2ecf20Sopenharmony_ci		pr_err_ratelimited("hcall_vphn() returned %ld. Disabling polling...\n"
14358c2ecf20Sopenharmony_ci			, rc);
14368c2ecf20Sopenharmony_ci		break;
14378c2ecf20Sopenharmony_ci	}
14388c2ecf20Sopenharmony_ciout:
14398c2ecf20Sopenharmony_ci	return rc;
14408c2ecf20Sopenharmony_ci}
14418c2ecf20Sopenharmony_ci
14428c2ecf20Sopenharmony_ciint find_and_online_cpu_nid(int cpu)
14438c2ecf20Sopenharmony_ci{
14448c2ecf20Sopenharmony_ci	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
14458c2ecf20Sopenharmony_ci	int new_nid;
14468c2ecf20Sopenharmony_ci
14478c2ecf20Sopenharmony_ci	/* Use associativity from first thread for all siblings */
14488c2ecf20Sopenharmony_ci	if (vphn_get_associativity(cpu, associativity))
14498c2ecf20Sopenharmony_ci		return cpu_to_node(cpu);
14508c2ecf20Sopenharmony_ci
14518c2ecf20Sopenharmony_ci	new_nid = associativity_to_nid(associativity);
14528c2ecf20Sopenharmony_ci	if (new_nid < 0 || !node_possible(new_nid))
14538c2ecf20Sopenharmony_ci		new_nid = first_online_node;
14548c2ecf20Sopenharmony_ci
14558c2ecf20Sopenharmony_ci	if (NODE_DATA(new_nid) == NULL) {
14568c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG
14578c2ecf20Sopenharmony_ci		/*
14588c2ecf20Sopenharmony_ci		 * Need to ensure that NODE_DATA is initialized for a node from
14598c2ecf20Sopenharmony_ci		 * available memory (see memblock_alloc_try_nid). If unable to
14608c2ecf20Sopenharmony_ci		 * init the node, then default to nearest node that has memory
14618c2ecf20Sopenharmony_ci		 * installed. Skip onlining a node if the subsystems are not
14628c2ecf20Sopenharmony_ci		 * yet initialized.
14638c2ecf20Sopenharmony_ci		 */
14648c2ecf20Sopenharmony_ci		if (!topology_inited || try_online_node(new_nid))
14658c2ecf20Sopenharmony_ci			new_nid = first_online_node;
14668c2ecf20Sopenharmony_ci#else
14678c2ecf20Sopenharmony_ci		/*
14688c2ecf20Sopenharmony_ci		 * Default to using the nearest node that has memory installed.
14698c2ecf20Sopenharmony_ci		 * Otherwise, it would be necessary to patch the kernel MM code
14708c2ecf20Sopenharmony_ci		 * to deal with more memoryless-node error conditions.
14718c2ecf20Sopenharmony_ci		 */
14728c2ecf20Sopenharmony_ci		new_nid = first_online_node;
14738c2ecf20Sopenharmony_ci#endif
14748c2ecf20Sopenharmony_ci	}
14758c2ecf20Sopenharmony_ci
14768c2ecf20Sopenharmony_ci	pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__,
14778c2ecf20Sopenharmony_ci		cpu, new_nid);
14788c2ecf20Sopenharmony_ci	return new_nid;
14798c2ecf20Sopenharmony_ci}
14808c2ecf20Sopenharmony_ci
14818c2ecf20Sopenharmony_ciint cpu_to_coregroup_id(int cpu)
14828c2ecf20Sopenharmony_ci{
14838c2ecf20Sopenharmony_ci	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
14848c2ecf20Sopenharmony_ci	int index;
14858c2ecf20Sopenharmony_ci
14868c2ecf20Sopenharmony_ci	if (cpu < 0 || cpu > nr_cpu_ids)
14878c2ecf20Sopenharmony_ci		return -1;
14888c2ecf20Sopenharmony_ci
14898c2ecf20Sopenharmony_ci	if (!coregroup_enabled)
14908c2ecf20Sopenharmony_ci		goto out;
14918c2ecf20Sopenharmony_ci
14928c2ecf20Sopenharmony_ci	if (!firmware_has_feature(FW_FEATURE_VPHN))
14938c2ecf20Sopenharmony_ci		goto out;
14948c2ecf20Sopenharmony_ci
14958c2ecf20Sopenharmony_ci	if (vphn_get_associativity(cpu, associativity))
14968c2ecf20Sopenharmony_ci		goto out;
14978c2ecf20Sopenharmony_ci
14988c2ecf20Sopenharmony_ci	index = of_read_number(associativity, 1);
14998c2ecf20Sopenharmony_ci	if (index > primary_domain_index + 1)
15008c2ecf20Sopenharmony_ci		return of_read_number(&associativity[index - 1], 1);
15018c2ecf20Sopenharmony_ci
15028c2ecf20Sopenharmony_ciout:
15038c2ecf20Sopenharmony_ci	return cpu_to_core_id(cpu);
15048c2ecf20Sopenharmony_ci}
15058c2ecf20Sopenharmony_ci
15068c2ecf20Sopenharmony_cistatic int topology_update_init(void)
15078c2ecf20Sopenharmony_ci{
15088c2ecf20Sopenharmony_ci	topology_inited = 1;
15098c2ecf20Sopenharmony_ci	return 0;
15108c2ecf20Sopenharmony_ci}
15118c2ecf20Sopenharmony_cidevice_initcall(topology_update_init);
15128c2ecf20Sopenharmony_ci#endif /* CONFIG_PPC_SPLPAR */
1513