xref: /kernel/linux/linux-6.6/arch/powerpc/mm/numa.c (revision 62306a36)
162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * pSeries NUMA support
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci#define pr_fmt(fmt) "numa: " fmt
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <linux/threads.h>
1062306a36Sopenharmony_ci#include <linux/memblock.h>
1162306a36Sopenharmony_ci#include <linux/init.h>
1262306a36Sopenharmony_ci#include <linux/mm.h>
1362306a36Sopenharmony_ci#include <linux/mmzone.h>
1462306a36Sopenharmony_ci#include <linux/export.h>
1562306a36Sopenharmony_ci#include <linux/nodemask.h>
1662306a36Sopenharmony_ci#include <linux/cpu.h>
1762306a36Sopenharmony_ci#include <linux/notifier.h>
1862306a36Sopenharmony_ci#include <linux/of.h>
1962306a36Sopenharmony_ci#include <linux/of_address.h>
2062306a36Sopenharmony_ci#include <linux/pfn.h>
2162306a36Sopenharmony_ci#include <linux/cpuset.h>
2262306a36Sopenharmony_ci#include <linux/node.h>
2362306a36Sopenharmony_ci#include <linux/stop_machine.h>
2462306a36Sopenharmony_ci#include <linux/proc_fs.h>
2562306a36Sopenharmony_ci#include <linux/seq_file.h>
2662306a36Sopenharmony_ci#include <linux/uaccess.h>
2762306a36Sopenharmony_ci#include <linux/slab.h>
2862306a36Sopenharmony_ci#include <asm/cputhreads.h>
2962306a36Sopenharmony_ci#include <asm/sparsemem.h>
3062306a36Sopenharmony_ci#include <asm/smp.h>
3162306a36Sopenharmony_ci#include <asm/topology.h>
3262306a36Sopenharmony_ci#include <asm/firmware.h>
3362306a36Sopenharmony_ci#include <asm/paca.h>
3462306a36Sopenharmony_ci#include <asm/hvcall.h>
3562306a36Sopenharmony_ci#include <asm/setup.h>
3662306a36Sopenharmony_ci#include <asm/vdso.h>
3762306a36Sopenharmony_ci#include <asm/vphn.h>
3862306a36Sopenharmony_ci#include <asm/drmem.h>
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cistatic int numa_enabled = 1;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_cistatic char *cmdline __initdata;
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ciint numa_cpu_lookup_table[NR_CPUS];
4562306a36Sopenharmony_cicpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
4662306a36Sopenharmony_cistruct pglist_data *node_data[MAX_NUMNODES];
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ciEXPORT_SYMBOL(numa_cpu_lookup_table);
4962306a36Sopenharmony_ciEXPORT_SYMBOL(node_to_cpumask_map);
5062306a36Sopenharmony_ciEXPORT_SYMBOL(node_data);
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_cistatic int primary_domain_index;
5362306a36Sopenharmony_cistatic int n_mem_addr_cells, n_mem_size_cells;
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci#define FORM0_AFFINITY 0
5662306a36Sopenharmony_ci#define FORM1_AFFINITY 1
5762306a36Sopenharmony_ci#define FORM2_AFFINITY 2
5862306a36Sopenharmony_cistatic int affinity_form;
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci#define MAX_DISTANCE_REF_POINTS 4
6162306a36Sopenharmony_cistatic int distance_ref_points_depth;
6262306a36Sopenharmony_cistatic const __be32 *distance_ref_points;
6362306a36Sopenharmony_cistatic int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
6462306a36Sopenharmony_cistatic int numa_distance_table[MAX_NUMNODES][MAX_NUMNODES] = {
6562306a36Sopenharmony_ci	[0 ... MAX_NUMNODES - 1] = { [0 ... MAX_NUMNODES - 1] = -1 }
6662306a36Sopenharmony_ci};
6762306a36Sopenharmony_cistatic int numa_id_index_table[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci/*
7062306a36Sopenharmony_ci * Allocate node_to_cpumask_map based on number of available nodes
7162306a36Sopenharmony_ci * Requires node_possible_map to be valid.
7262306a36Sopenharmony_ci *
7362306a36Sopenharmony_ci * Note: cpumask_of_node() is not valid until after this is done.
7462306a36Sopenharmony_ci */
7562306a36Sopenharmony_cistatic void __init setup_node_to_cpumask_map(void)
7662306a36Sopenharmony_ci{
7762306a36Sopenharmony_ci	unsigned int node;
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	/* setup nr_node_ids if not done yet */
8062306a36Sopenharmony_ci	if (nr_node_ids == MAX_NUMNODES)
8162306a36Sopenharmony_ci		setup_nr_node_ids();
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	/* allocate the map */
8462306a36Sopenharmony_ci	for_each_node(node)
8562306a36Sopenharmony_ci		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	/* cpumask_of_node() will now work */
8862306a36Sopenharmony_ci	pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
8962306a36Sopenharmony_ci}
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_cistatic int __init fake_numa_create_new_node(unsigned long end_pfn,
9262306a36Sopenharmony_ci						unsigned int *nid)
9362306a36Sopenharmony_ci{
9462306a36Sopenharmony_ci	unsigned long long mem;
9562306a36Sopenharmony_ci	char *p = cmdline;
9662306a36Sopenharmony_ci	static unsigned int fake_nid;
9762306a36Sopenharmony_ci	static unsigned long long curr_boundary;
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	/*
10062306a36Sopenharmony_ci	 * Modify node id, iff we started creating NUMA nodes
10162306a36Sopenharmony_ci	 * We want to continue from where we left of the last time
10262306a36Sopenharmony_ci	 */
10362306a36Sopenharmony_ci	if (fake_nid)
10462306a36Sopenharmony_ci		*nid = fake_nid;
10562306a36Sopenharmony_ci	/*
10662306a36Sopenharmony_ci	 * In case there are no more arguments to parse, the
10762306a36Sopenharmony_ci	 * node_id should be the same as the last fake node id
10862306a36Sopenharmony_ci	 * (we've handled this above).
10962306a36Sopenharmony_ci	 */
11062306a36Sopenharmony_ci	if (!p)
11162306a36Sopenharmony_ci		return 0;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	mem = memparse(p, &p);
11462306a36Sopenharmony_ci	if (!mem)
11562306a36Sopenharmony_ci		return 0;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	if (mem < curr_boundary)
11862306a36Sopenharmony_ci		return 0;
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	curr_boundary = mem;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	if ((end_pfn << PAGE_SHIFT) > mem) {
12362306a36Sopenharmony_ci		/*
12462306a36Sopenharmony_ci		 * Skip commas and spaces
12562306a36Sopenharmony_ci		 */
12662306a36Sopenharmony_ci		while (*p == ',' || *p == ' ' || *p == '\t')
12762306a36Sopenharmony_ci			p++;
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci		cmdline = p;
13062306a36Sopenharmony_ci		fake_nid++;
13162306a36Sopenharmony_ci		*nid = fake_nid;
13262306a36Sopenharmony_ci		pr_debug("created new fake_node with id %d\n", fake_nid);
13362306a36Sopenharmony_ci		return 1;
13462306a36Sopenharmony_ci	}
13562306a36Sopenharmony_ci	return 0;
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_cistatic void __init reset_numa_cpu_lookup_table(void)
13962306a36Sopenharmony_ci{
14062306a36Sopenharmony_ci	unsigned int cpu;
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	for_each_possible_cpu(cpu)
14362306a36Sopenharmony_ci		numa_cpu_lookup_table[cpu] = -1;
14462306a36Sopenharmony_ci}
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_civoid map_cpu_to_node(int cpu, int node)
14762306a36Sopenharmony_ci{
14862306a36Sopenharmony_ci	update_numa_cpu_lookup_table(cpu, node);
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) {
15162306a36Sopenharmony_ci		pr_debug("adding cpu %d to node %d\n", cpu, node);
15262306a36Sopenharmony_ci		cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
15362306a36Sopenharmony_ci	}
15462306a36Sopenharmony_ci}
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
15762306a36Sopenharmony_civoid unmap_cpu_from_node(unsigned long cpu)
15862306a36Sopenharmony_ci{
15962306a36Sopenharmony_ci	int node = numa_cpu_lookup_table[cpu];
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
16262306a36Sopenharmony_ci		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
16362306a36Sopenharmony_ci		pr_debug("removing cpu %lu from node %d\n", cpu, node);
16462306a36Sopenharmony_ci	} else {
16562306a36Sopenharmony_ci		pr_warn("Warning: cpu %lu not found in node %d\n", cpu, node);
16662306a36Sopenharmony_ci	}
16762306a36Sopenharmony_ci}
16862306a36Sopenharmony_ci#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_cistatic int __associativity_to_nid(const __be32 *associativity,
17162306a36Sopenharmony_ci				  int max_array_sz)
17262306a36Sopenharmony_ci{
17362306a36Sopenharmony_ci	int nid;
17462306a36Sopenharmony_ci	/*
17562306a36Sopenharmony_ci	 * primary_domain_index is 1 based array index.
17662306a36Sopenharmony_ci	 */
17762306a36Sopenharmony_ci	int index = primary_domain_index  - 1;
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	if (!numa_enabled || index >= max_array_sz)
18062306a36Sopenharmony_ci		return NUMA_NO_NODE;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	nid = of_read_number(&associativity[index], 1);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	/* POWER4 LPAR uses 0xffff as invalid node */
18562306a36Sopenharmony_ci	if (nid == 0xffff || nid >= nr_node_ids)
18662306a36Sopenharmony_ci		nid = NUMA_NO_NODE;
18762306a36Sopenharmony_ci	return nid;
18862306a36Sopenharmony_ci}
18962306a36Sopenharmony_ci/*
19062306a36Sopenharmony_ci * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
19162306a36Sopenharmony_ci * info is found.
19262306a36Sopenharmony_ci */
19362306a36Sopenharmony_cistatic int associativity_to_nid(const __be32 *associativity)
19462306a36Sopenharmony_ci{
19562306a36Sopenharmony_ci	int array_sz = of_read_number(associativity, 1);
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	/* Skip the first element in the associativity array */
19862306a36Sopenharmony_ci	return __associativity_to_nid((associativity + 1), array_sz);
19962306a36Sopenharmony_ci}
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_cistatic int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
20262306a36Sopenharmony_ci{
20362306a36Sopenharmony_ci	int dist;
20462306a36Sopenharmony_ci	int node1, node2;
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	node1 = associativity_to_nid(cpu1_assoc);
20762306a36Sopenharmony_ci	node2 = associativity_to_nid(cpu2_assoc);
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci	dist = numa_distance_table[node1][node2];
21062306a36Sopenharmony_ci	if (dist <= LOCAL_DISTANCE)
21162306a36Sopenharmony_ci		return 0;
21262306a36Sopenharmony_ci	else if (dist <= REMOTE_DISTANCE)
21362306a36Sopenharmony_ci		return 1;
21462306a36Sopenharmony_ci	else
21562306a36Sopenharmony_ci		return 2;
21662306a36Sopenharmony_ci}
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_cistatic int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
21962306a36Sopenharmony_ci{
22062306a36Sopenharmony_ci	int dist = 0;
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	int i, index;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	for (i = 0; i < distance_ref_points_depth; i++) {
22562306a36Sopenharmony_ci		index = be32_to_cpu(distance_ref_points[i]);
22662306a36Sopenharmony_ci		if (cpu1_assoc[index] == cpu2_assoc[index])
22762306a36Sopenharmony_ci			break;
22862306a36Sopenharmony_ci		dist++;
22962306a36Sopenharmony_ci	}
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	return dist;
23262306a36Sopenharmony_ci}
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ciint cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
23562306a36Sopenharmony_ci{
23662306a36Sopenharmony_ci	/* We should not get called with FORM0 */
23762306a36Sopenharmony_ci	VM_WARN_ON(affinity_form == FORM0_AFFINITY);
23862306a36Sopenharmony_ci	if (affinity_form == FORM1_AFFINITY)
23962306a36Sopenharmony_ci		return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
24062306a36Sopenharmony_ci	return __cpu_form2_relative_distance(cpu1_assoc, cpu2_assoc);
24162306a36Sopenharmony_ci}
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci/* must hold reference to node during call */
24462306a36Sopenharmony_cistatic const __be32 *of_get_associativity(struct device_node *dev)
24562306a36Sopenharmony_ci{
24662306a36Sopenharmony_ci	return of_get_property(dev, "ibm,associativity", NULL);
24762306a36Sopenharmony_ci}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ciint __node_distance(int a, int b)
25062306a36Sopenharmony_ci{
25162306a36Sopenharmony_ci	int i;
25262306a36Sopenharmony_ci	int distance = LOCAL_DISTANCE;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	if (affinity_form == FORM2_AFFINITY)
25562306a36Sopenharmony_ci		return numa_distance_table[a][b];
25662306a36Sopenharmony_ci	else if (affinity_form == FORM0_AFFINITY)
25762306a36Sopenharmony_ci		return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	for (i = 0; i < distance_ref_points_depth; i++) {
26062306a36Sopenharmony_ci		if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
26162306a36Sopenharmony_ci			break;
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci		/* Double the distance for each NUMA level */
26462306a36Sopenharmony_ci		distance *= 2;
26562306a36Sopenharmony_ci	}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	return distance;
26862306a36Sopenharmony_ci}
26962306a36Sopenharmony_ciEXPORT_SYMBOL(__node_distance);
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci/* Returns the nid associated with the given device tree node,
27262306a36Sopenharmony_ci * or -1 if not found.
27362306a36Sopenharmony_ci */
27462306a36Sopenharmony_cistatic int of_node_to_nid_single(struct device_node *device)
27562306a36Sopenharmony_ci{
27662306a36Sopenharmony_ci	int nid = NUMA_NO_NODE;
27762306a36Sopenharmony_ci	const __be32 *tmp;
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	tmp = of_get_associativity(device);
28062306a36Sopenharmony_ci	if (tmp)
28162306a36Sopenharmony_ci		nid = associativity_to_nid(tmp);
28262306a36Sopenharmony_ci	return nid;
28362306a36Sopenharmony_ci}
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci/* Walk the device tree upwards, looking for an associativity id */
28662306a36Sopenharmony_ciint of_node_to_nid(struct device_node *device)
28762306a36Sopenharmony_ci{
28862306a36Sopenharmony_ci	int nid = NUMA_NO_NODE;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	of_node_get(device);
29162306a36Sopenharmony_ci	while (device) {
29262306a36Sopenharmony_ci		nid = of_node_to_nid_single(device);
29362306a36Sopenharmony_ci		if (nid != -1)
29462306a36Sopenharmony_ci			break;
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci		device = of_get_next_parent(device);
29762306a36Sopenharmony_ci	}
29862306a36Sopenharmony_ci	of_node_put(device);
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	return nid;
30162306a36Sopenharmony_ci}
30262306a36Sopenharmony_ciEXPORT_SYMBOL(of_node_to_nid);
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_cistatic void __initialize_form1_numa_distance(const __be32 *associativity,
30562306a36Sopenharmony_ci					     int max_array_sz)
30662306a36Sopenharmony_ci{
30762306a36Sopenharmony_ci	int i, nid;
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	if (affinity_form != FORM1_AFFINITY)
31062306a36Sopenharmony_ci		return;
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	nid = __associativity_to_nid(associativity, max_array_sz);
31362306a36Sopenharmony_ci	if (nid != NUMA_NO_NODE) {
31462306a36Sopenharmony_ci		for (i = 0; i < distance_ref_points_depth; i++) {
31562306a36Sopenharmony_ci			const __be32 *entry;
31662306a36Sopenharmony_ci			int index = be32_to_cpu(distance_ref_points[i]) - 1;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci			/*
31962306a36Sopenharmony_ci			 * broken hierarchy, return with broken distance table
32062306a36Sopenharmony_ci			 */
32162306a36Sopenharmony_ci			if (WARN(index >= max_array_sz, "Broken ibm,associativity property"))
32262306a36Sopenharmony_ci				return;
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci			entry = &associativity[index];
32562306a36Sopenharmony_ci			distance_lookup_table[nid][i] = of_read_number(entry, 1);
32662306a36Sopenharmony_ci		}
32762306a36Sopenharmony_ci	}
32862306a36Sopenharmony_ci}
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_cistatic void initialize_form1_numa_distance(const __be32 *associativity)
33162306a36Sopenharmony_ci{
33262306a36Sopenharmony_ci	int array_sz;
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	array_sz = of_read_number(associativity, 1);
33562306a36Sopenharmony_ci	/* Skip the first element in the associativity array */
33662306a36Sopenharmony_ci	__initialize_form1_numa_distance(associativity + 1, array_sz);
33762306a36Sopenharmony_ci}
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci/*
34062306a36Sopenharmony_ci * Used to update distance information w.r.t newly added node.
34162306a36Sopenharmony_ci */
34262306a36Sopenharmony_civoid update_numa_distance(struct device_node *node)
34362306a36Sopenharmony_ci{
34462306a36Sopenharmony_ci	int nid;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	if (affinity_form == FORM0_AFFINITY)
34762306a36Sopenharmony_ci		return;
34862306a36Sopenharmony_ci	else if (affinity_form == FORM1_AFFINITY) {
34962306a36Sopenharmony_ci		const __be32 *associativity;
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci		associativity = of_get_associativity(node);
35262306a36Sopenharmony_ci		if (!associativity)
35362306a36Sopenharmony_ci			return;
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci		initialize_form1_numa_distance(associativity);
35662306a36Sopenharmony_ci		return;
35762306a36Sopenharmony_ci	}
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci	/* FORM2 affinity  */
36062306a36Sopenharmony_ci	nid = of_node_to_nid_single(node);
36162306a36Sopenharmony_ci	if (nid == NUMA_NO_NODE)
36262306a36Sopenharmony_ci		return;
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	/*
36562306a36Sopenharmony_ci	 * With FORM2 we expect NUMA distance of all possible NUMA
36662306a36Sopenharmony_ci	 * nodes to be provided during boot.
36762306a36Sopenharmony_ci	 */
36862306a36Sopenharmony_ci	WARN(numa_distance_table[nid][nid] == -1,
36962306a36Sopenharmony_ci	     "NUMA distance details for node %d not provided\n", nid);
37062306a36Sopenharmony_ci}
37162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(update_numa_distance);
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci/*
37462306a36Sopenharmony_ci * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
37562306a36Sopenharmony_ci * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements}
37662306a36Sopenharmony_ci */
37762306a36Sopenharmony_cistatic void __init initialize_form2_numa_distance_lookup_table(void)
37862306a36Sopenharmony_ci{
37962306a36Sopenharmony_ci	int i, j;
38062306a36Sopenharmony_ci	struct device_node *root;
38162306a36Sopenharmony_ci	const __u8 *form2_distances;
38262306a36Sopenharmony_ci	const __be32 *numa_lookup_index;
38362306a36Sopenharmony_ci	int form2_distances_length;
38462306a36Sopenharmony_ci	int max_numa_index, distance_index;
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	if (firmware_has_feature(FW_FEATURE_OPAL))
38762306a36Sopenharmony_ci		root = of_find_node_by_path("/ibm,opal");
38862306a36Sopenharmony_ci	else
38962306a36Sopenharmony_ci		root = of_find_node_by_path("/rtas");
39062306a36Sopenharmony_ci	if (!root)
39162306a36Sopenharmony_ci		root = of_find_node_by_path("/");
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	numa_lookup_index = of_get_property(root, "ibm,numa-lookup-index-table", NULL);
39462306a36Sopenharmony_ci	max_numa_index = of_read_number(&numa_lookup_index[0], 1);
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci	/* first element of the array is the size and is encode-int */
39762306a36Sopenharmony_ci	form2_distances = of_get_property(root, "ibm,numa-distance-table", NULL);
39862306a36Sopenharmony_ci	form2_distances_length = of_read_number((const __be32 *)&form2_distances[0], 1);
39962306a36Sopenharmony_ci	/* Skip the size which is encoded int */
40062306a36Sopenharmony_ci	form2_distances += sizeof(__be32);
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	pr_debug("form2_distances_len = %d, numa_dist_indexes_len = %d\n",
40362306a36Sopenharmony_ci		 form2_distances_length, max_numa_index);
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci	for (i = 0; i < max_numa_index; i++)
40662306a36Sopenharmony_ci		/* +1 skip the max_numa_index in the property */
40762306a36Sopenharmony_ci		numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1);
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci	if (form2_distances_length != max_numa_index * max_numa_index) {
41162306a36Sopenharmony_ci		WARN(1, "Wrong NUMA distance information\n");
41262306a36Sopenharmony_ci		form2_distances = NULL; // don't use it
41362306a36Sopenharmony_ci	}
41462306a36Sopenharmony_ci	distance_index = 0;
41562306a36Sopenharmony_ci	for (i = 0;  i < max_numa_index; i++) {
41662306a36Sopenharmony_ci		for (j = 0; j < max_numa_index; j++) {
41762306a36Sopenharmony_ci			int nodeA = numa_id_index_table[i];
41862306a36Sopenharmony_ci			int nodeB = numa_id_index_table[j];
41962306a36Sopenharmony_ci			int dist;
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci			if (form2_distances)
42262306a36Sopenharmony_ci				dist = form2_distances[distance_index++];
42362306a36Sopenharmony_ci			else if (nodeA == nodeB)
42462306a36Sopenharmony_ci				dist = LOCAL_DISTANCE;
42562306a36Sopenharmony_ci			else
42662306a36Sopenharmony_ci				dist = REMOTE_DISTANCE;
42762306a36Sopenharmony_ci			numa_distance_table[nodeA][nodeB] = dist;
42862306a36Sopenharmony_ci			pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, dist);
42962306a36Sopenharmony_ci		}
43062306a36Sopenharmony_ci	}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	of_node_put(root);
43362306a36Sopenharmony_ci}
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_cistatic int __init find_primary_domain_index(void)
43662306a36Sopenharmony_ci{
43762306a36Sopenharmony_ci	int index;
43862306a36Sopenharmony_ci	struct device_node *root;
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci	/*
44162306a36Sopenharmony_ci	 * Check for which form of affinity.
44262306a36Sopenharmony_ci	 */
44362306a36Sopenharmony_ci	if (firmware_has_feature(FW_FEATURE_OPAL)) {
44462306a36Sopenharmony_ci		affinity_form = FORM1_AFFINITY;
44562306a36Sopenharmony_ci	} else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) {
44662306a36Sopenharmony_ci		pr_debug("Using form 2 affinity\n");
44762306a36Sopenharmony_ci		affinity_form = FORM2_AFFINITY;
44862306a36Sopenharmony_ci	} else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) {
44962306a36Sopenharmony_ci		pr_debug("Using form 1 affinity\n");
45062306a36Sopenharmony_ci		affinity_form = FORM1_AFFINITY;
45162306a36Sopenharmony_ci	} else
45262306a36Sopenharmony_ci		affinity_form = FORM0_AFFINITY;
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci	if (firmware_has_feature(FW_FEATURE_OPAL))
45562306a36Sopenharmony_ci		root = of_find_node_by_path("/ibm,opal");
45662306a36Sopenharmony_ci	else
45762306a36Sopenharmony_ci		root = of_find_node_by_path("/rtas");
45862306a36Sopenharmony_ci	if (!root)
45962306a36Sopenharmony_ci		root = of_find_node_by_path("/");
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci	/*
46262306a36Sopenharmony_ci	 * This property is a set of 32-bit integers, each representing
46362306a36Sopenharmony_ci	 * an index into the ibm,associativity nodes.
46462306a36Sopenharmony_ci	 *
46562306a36Sopenharmony_ci	 * With form 0 affinity the first integer is for an SMP configuration
46662306a36Sopenharmony_ci	 * (should be all 0's) and the second is for a normal NUMA
46762306a36Sopenharmony_ci	 * configuration. We have only one level of NUMA.
46862306a36Sopenharmony_ci	 *
46962306a36Sopenharmony_ci	 * With form 1 affinity the first integer is the most significant
47062306a36Sopenharmony_ci	 * NUMA boundary and the following are progressively less significant
47162306a36Sopenharmony_ci	 * boundaries. There can be more than one level of NUMA.
47262306a36Sopenharmony_ci	 */
47362306a36Sopenharmony_ci	distance_ref_points = of_get_property(root,
47462306a36Sopenharmony_ci					"ibm,associativity-reference-points",
47562306a36Sopenharmony_ci					&distance_ref_points_depth);
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci	if (!distance_ref_points) {
47862306a36Sopenharmony_ci		pr_debug("ibm,associativity-reference-points not found.\n");
47962306a36Sopenharmony_ci		goto err;
48062306a36Sopenharmony_ci	}
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci	distance_ref_points_depth /= sizeof(int);
48362306a36Sopenharmony_ci	if (affinity_form == FORM0_AFFINITY) {
48462306a36Sopenharmony_ci		if (distance_ref_points_depth < 2) {
48562306a36Sopenharmony_ci			pr_warn("short ibm,associativity-reference-points\n");
48662306a36Sopenharmony_ci			goto err;
48762306a36Sopenharmony_ci		}
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci		index = of_read_number(&distance_ref_points[1], 1);
49062306a36Sopenharmony_ci	} else {
49162306a36Sopenharmony_ci		/*
49262306a36Sopenharmony_ci		 * Both FORM1 and FORM2 affinity find the primary domain details
49362306a36Sopenharmony_ci		 * at the same offset.
49462306a36Sopenharmony_ci		 */
49562306a36Sopenharmony_ci		index = of_read_number(distance_ref_points, 1);
49662306a36Sopenharmony_ci	}
49762306a36Sopenharmony_ci	/*
49862306a36Sopenharmony_ci	 * Warn and cap if the hardware supports more than
49962306a36Sopenharmony_ci	 * MAX_DISTANCE_REF_POINTS domains.
50062306a36Sopenharmony_ci	 */
50162306a36Sopenharmony_ci	if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
50262306a36Sopenharmony_ci		pr_warn("distance array capped at %d entries\n",
50362306a36Sopenharmony_ci			MAX_DISTANCE_REF_POINTS);
50462306a36Sopenharmony_ci		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
50562306a36Sopenharmony_ci	}
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	of_node_put(root);
50862306a36Sopenharmony_ci	return index;
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_cierr:
51162306a36Sopenharmony_ci	of_node_put(root);
51262306a36Sopenharmony_ci	return -1;
51362306a36Sopenharmony_ci}
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_cistatic void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
51662306a36Sopenharmony_ci{
51762306a36Sopenharmony_ci	struct device_node *memory = NULL;
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci	memory = of_find_node_by_type(memory, "memory");
52062306a36Sopenharmony_ci	if (!memory)
52162306a36Sopenharmony_ci		panic("numa.c: No memory nodes found!");
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci	*n_addr_cells = of_n_addr_cells(memory);
52462306a36Sopenharmony_ci	*n_size_cells = of_n_size_cells(memory);
52562306a36Sopenharmony_ci	of_node_put(memory);
52662306a36Sopenharmony_ci}
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_cistatic unsigned long read_n_cells(int n, const __be32 **buf)
52962306a36Sopenharmony_ci{
53062306a36Sopenharmony_ci	unsigned long result = 0;
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	while (n--) {
53362306a36Sopenharmony_ci		result = (result << 32) | of_read_number(*buf, 1);
53462306a36Sopenharmony_ci		(*buf)++;
53562306a36Sopenharmony_ci	}
53662306a36Sopenharmony_ci	return result;
53762306a36Sopenharmony_ci}
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_cistruct assoc_arrays {
54062306a36Sopenharmony_ci	u32	n_arrays;
54162306a36Sopenharmony_ci	u32	array_sz;
54262306a36Sopenharmony_ci	const __be32 *arrays;
54362306a36Sopenharmony_ci};
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci/*
54662306a36Sopenharmony_ci * Retrieve and validate the list of associativity arrays for drconf
54762306a36Sopenharmony_ci * memory from the ibm,associativity-lookup-arrays property of the
54862306a36Sopenharmony_ci * device tree..
54962306a36Sopenharmony_ci *
55062306a36Sopenharmony_ci * The layout of the ibm,associativity-lookup-arrays property is a number N
55162306a36Sopenharmony_ci * indicating the number of associativity arrays, followed by a number M
55262306a36Sopenharmony_ci * indicating the size of each associativity array, followed by a list
55362306a36Sopenharmony_ci * of N associativity arrays.
55462306a36Sopenharmony_ci */
55562306a36Sopenharmony_cistatic int of_get_assoc_arrays(struct assoc_arrays *aa)
55662306a36Sopenharmony_ci{
55762306a36Sopenharmony_ci	struct device_node *memory;
55862306a36Sopenharmony_ci	const __be32 *prop;
55962306a36Sopenharmony_ci	u32 len;
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
56262306a36Sopenharmony_ci	if (!memory)
56362306a36Sopenharmony_ci		return -1;
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
56662306a36Sopenharmony_ci	if (!prop || len < 2 * sizeof(unsigned int)) {
56762306a36Sopenharmony_ci		of_node_put(memory);
56862306a36Sopenharmony_ci		return -1;
56962306a36Sopenharmony_ci	}
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci	aa->n_arrays = of_read_number(prop++, 1);
57262306a36Sopenharmony_ci	aa->array_sz = of_read_number(prop++, 1);
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	of_node_put(memory);
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	/* Now that we know the number of arrays and size of each array,
57762306a36Sopenharmony_ci	 * revalidate the size of the property read in.
57862306a36Sopenharmony_ci	 */
57962306a36Sopenharmony_ci	if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int))
58062306a36Sopenharmony_ci		return -1;
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci	aa->arrays = prop;
58362306a36Sopenharmony_ci	return 0;
58462306a36Sopenharmony_ci}
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_cistatic int __init get_nid_and_numa_distance(struct drmem_lmb *lmb)
58762306a36Sopenharmony_ci{
58862306a36Sopenharmony_ci	struct assoc_arrays aa = { .arrays = NULL };
58962306a36Sopenharmony_ci	int default_nid = NUMA_NO_NODE;
59062306a36Sopenharmony_ci	int nid = default_nid;
59162306a36Sopenharmony_ci	int rc, index;
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	if ((primary_domain_index < 0) || !numa_enabled)
59462306a36Sopenharmony_ci		return default_nid;
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	rc = of_get_assoc_arrays(&aa);
59762306a36Sopenharmony_ci	if (rc)
59862306a36Sopenharmony_ci		return default_nid;
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	if (primary_domain_index <= aa.array_sz &&
60162306a36Sopenharmony_ci	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
60262306a36Sopenharmony_ci		const __be32 *associativity;
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci		index = lmb->aa_index * aa.array_sz;
60562306a36Sopenharmony_ci		associativity = &aa.arrays[index];
60662306a36Sopenharmony_ci		nid = __associativity_to_nid(associativity, aa.array_sz);
60762306a36Sopenharmony_ci		if (nid > 0 && affinity_form == FORM1_AFFINITY) {
60862306a36Sopenharmony_ci			/*
60962306a36Sopenharmony_ci			 * lookup array associativity entries have
61062306a36Sopenharmony_ci			 * no length of the array as the first element.
61162306a36Sopenharmony_ci			 */
61262306a36Sopenharmony_ci			__initialize_form1_numa_distance(associativity, aa.array_sz);
61362306a36Sopenharmony_ci		}
61462306a36Sopenharmony_ci	}
61562306a36Sopenharmony_ci	return nid;
61662306a36Sopenharmony_ci}
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci/*
61962306a36Sopenharmony_ci * This is like of_node_to_nid_single() for memory represented in the
62062306a36Sopenharmony_ci * ibm,dynamic-reconfiguration-memory node.
62162306a36Sopenharmony_ci */
62262306a36Sopenharmony_ciint of_drconf_to_nid_single(struct drmem_lmb *lmb)
62362306a36Sopenharmony_ci{
62462306a36Sopenharmony_ci	struct assoc_arrays aa = { .arrays = NULL };
62562306a36Sopenharmony_ci	int default_nid = NUMA_NO_NODE;
62662306a36Sopenharmony_ci	int nid = default_nid;
62762306a36Sopenharmony_ci	int rc, index;
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	if ((primary_domain_index < 0) || !numa_enabled)
63062306a36Sopenharmony_ci		return default_nid;
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci	rc = of_get_assoc_arrays(&aa);
63362306a36Sopenharmony_ci	if (rc)
63462306a36Sopenharmony_ci		return default_nid;
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	if (primary_domain_index <= aa.array_sz &&
63762306a36Sopenharmony_ci	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
63862306a36Sopenharmony_ci		const __be32 *associativity;
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci		index = lmb->aa_index * aa.array_sz;
64162306a36Sopenharmony_ci		associativity = &aa.arrays[index];
64262306a36Sopenharmony_ci		nid = __associativity_to_nid(associativity, aa.array_sz);
64362306a36Sopenharmony_ci	}
64462306a36Sopenharmony_ci	return nid;
64562306a36Sopenharmony_ci}
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci#ifdef CONFIG_PPC_SPLPAR
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_cistatic int __vphn_get_associativity(long lcpu, __be32 *associativity)
65062306a36Sopenharmony_ci{
65162306a36Sopenharmony_ci	long rc, hwid;
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	/*
65462306a36Sopenharmony_ci	 * On a shared lpar, device tree will not have node associativity.
65562306a36Sopenharmony_ci	 * At this time lppaca, or its __old_status field may not be
65662306a36Sopenharmony_ci	 * updated. Hence kernel cannot detect if its on a shared lpar. So
65762306a36Sopenharmony_ci	 * request an explicit associativity irrespective of whether the
65862306a36Sopenharmony_ci	 * lpar is shared or dedicated. Use the device tree property as a
65962306a36Sopenharmony_ci	 * fallback. cpu_to_phys_id is only valid between
66062306a36Sopenharmony_ci	 * smp_setup_cpu_maps() and smp_setup_pacas().
66162306a36Sopenharmony_ci	 */
66262306a36Sopenharmony_ci	if (firmware_has_feature(FW_FEATURE_VPHN)) {
66362306a36Sopenharmony_ci		if (cpu_to_phys_id)
66462306a36Sopenharmony_ci			hwid = cpu_to_phys_id[lcpu];
66562306a36Sopenharmony_ci		else
66662306a36Sopenharmony_ci			hwid = get_hard_smp_processor_id(lcpu);
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci		rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
66962306a36Sopenharmony_ci		if (rc == H_SUCCESS)
67062306a36Sopenharmony_ci			return 0;
67162306a36Sopenharmony_ci	}
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci	return -1;
67462306a36Sopenharmony_ci}
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_cistatic int vphn_get_nid(long lcpu)
67762306a36Sopenharmony_ci{
67862306a36Sopenharmony_ci	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
67962306a36Sopenharmony_ci
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	if (!__vphn_get_associativity(lcpu, associativity))
68262306a36Sopenharmony_ci		return associativity_to_nid(associativity);
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci	return NUMA_NO_NODE;
68562306a36Sopenharmony_ci
68662306a36Sopenharmony_ci}
68762306a36Sopenharmony_ci#else
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_cistatic int __vphn_get_associativity(long lcpu, __be32 *associativity)
69062306a36Sopenharmony_ci{
69162306a36Sopenharmony_ci	return -1;
69262306a36Sopenharmony_ci}
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_cistatic int vphn_get_nid(long unused)
69562306a36Sopenharmony_ci{
69662306a36Sopenharmony_ci	return NUMA_NO_NODE;
69762306a36Sopenharmony_ci}
69862306a36Sopenharmony_ci#endif  /* CONFIG_PPC_SPLPAR */
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ci/*
70162306a36Sopenharmony_ci * Figure out to which domain a cpu belongs and stick it there.
70262306a36Sopenharmony_ci * Return the id of the domain used.
70362306a36Sopenharmony_ci */
70462306a36Sopenharmony_cistatic int numa_setup_cpu(unsigned long lcpu)
70562306a36Sopenharmony_ci{
70662306a36Sopenharmony_ci	struct device_node *cpu;
70762306a36Sopenharmony_ci	int fcpu = cpu_first_thread_sibling(lcpu);
70862306a36Sopenharmony_ci	int nid = NUMA_NO_NODE;
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci	if (!cpu_present(lcpu)) {
71162306a36Sopenharmony_ci		set_cpu_numa_node(lcpu, first_online_node);
71262306a36Sopenharmony_ci		return first_online_node;
71362306a36Sopenharmony_ci	}
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	/*
71662306a36Sopenharmony_ci	 * If a valid cpu-to-node mapping is already available, use it
71762306a36Sopenharmony_ci	 * directly instead of querying the firmware, since it represents
71862306a36Sopenharmony_ci	 * the most recent mapping notified to us by the platform (eg: VPHN).
71962306a36Sopenharmony_ci	 * Since cpu_to_node binding remains the same for all threads in the
72062306a36Sopenharmony_ci	 * core. If a valid cpu-to-node mapping is already available, for
72162306a36Sopenharmony_ci	 * the first thread in the core, use it.
72262306a36Sopenharmony_ci	 */
72362306a36Sopenharmony_ci	nid = numa_cpu_lookup_table[fcpu];
72462306a36Sopenharmony_ci	if (nid >= 0) {
72562306a36Sopenharmony_ci		map_cpu_to_node(lcpu, nid);
72662306a36Sopenharmony_ci		return nid;
72762306a36Sopenharmony_ci	}
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci	nid = vphn_get_nid(lcpu);
73062306a36Sopenharmony_ci	if (nid != NUMA_NO_NODE)
73162306a36Sopenharmony_ci		goto out_present;
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	cpu = of_get_cpu_node(lcpu, NULL);
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci	if (!cpu) {
73662306a36Sopenharmony_ci		WARN_ON(1);
73762306a36Sopenharmony_ci		if (cpu_present(lcpu))
73862306a36Sopenharmony_ci			goto out_present;
73962306a36Sopenharmony_ci		else
74062306a36Sopenharmony_ci			goto out;
74162306a36Sopenharmony_ci	}
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci	nid = of_node_to_nid_single(cpu);
74462306a36Sopenharmony_ci	of_node_put(cpu);
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ciout_present:
74762306a36Sopenharmony_ci	if (nid < 0 || !node_possible(nid))
74862306a36Sopenharmony_ci		nid = first_online_node;
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	/*
75162306a36Sopenharmony_ci	 * Update for the first thread of the core. All threads of a core
75262306a36Sopenharmony_ci	 * have to be part of the same node. This not only avoids querying
75362306a36Sopenharmony_ci	 * for every other thread in the core, but always avoids a case
75462306a36Sopenharmony_ci	 * where virtual node associativity change causes subsequent threads
75562306a36Sopenharmony_ci	 * of a core to be associated with different nid. However if first
75662306a36Sopenharmony_ci	 * thread is already online, expect it to have a valid mapping.
75762306a36Sopenharmony_ci	 */
75862306a36Sopenharmony_ci	if (fcpu != lcpu) {
75962306a36Sopenharmony_ci		WARN_ON(cpu_online(fcpu));
76062306a36Sopenharmony_ci		map_cpu_to_node(fcpu, nid);
76162306a36Sopenharmony_ci	}
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	map_cpu_to_node(lcpu, nid);
76462306a36Sopenharmony_ciout:
76562306a36Sopenharmony_ci	return nid;
76662306a36Sopenharmony_ci}
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_cistatic void verify_cpu_node_mapping(int cpu, int node)
76962306a36Sopenharmony_ci{
77062306a36Sopenharmony_ci	int base, sibling, i;
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci	/* Verify that all the threads in the core belong to the same node */
77362306a36Sopenharmony_ci	base = cpu_first_thread_sibling(cpu);
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	for (i = 0; i < threads_per_core; i++) {
77662306a36Sopenharmony_ci		sibling = base + i;
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci		if (sibling == cpu || cpu_is_offline(sibling))
77962306a36Sopenharmony_ci			continue;
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci		if (cpu_to_node(sibling) != node) {
78262306a36Sopenharmony_ci			WARN(1, "CPU thread siblings %d and %d don't belong"
78362306a36Sopenharmony_ci				" to the same node!\n", cpu, sibling);
78462306a36Sopenharmony_ci			break;
78562306a36Sopenharmony_ci		}
78662306a36Sopenharmony_ci	}
78762306a36Sopenharmony_ci}
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci/* Must run before sched domains notifier. */
79062306a36Sopenharmony_cistatic int ppc_numa_cpu_prepare(unsigned int cpu)
79162306a36Sopenharmony_ci{
79262306a36Sopenharmony_ci	int nid;
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	nid = numa_setup_cpu(cpu);
79562306a36Sopenharmony_ci	verify_cpu_node_mapping(cpu, nid);
79662306a36Sopenharmony_ci	return 0;
79762306a36Sopenharmony_ci}
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_cistatic int ppc_numa_cpu_dead(unsigned int cpu)
80062306a36Sopenharmony_ci{
80162306a36Sopenharmony_ci	return 0;
80262306a36Sopenharmony_ci}
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci/*
80562306a36Sopenharmony_ci * Check and possibly modify a memory region to enforce the memory limit.
80662306a36Sopenharmony_ci *
80762306a36Sopenharmony_ci * Returns the size the region should have to enforce the memory limit.
80862306a36Sopenharmony_ci * This will either be the original value of size, a truncated value,
80962306a36Sopenharmony_ci * or zero. If the returned value of size is 0 the region should be
81062306a36Sopenharmony_ci * discarded as it lies wholly above the memory limit.
81162306a36Sopenharmony_ci */
81262306a36Sopenharmony_cistatic unsigned long __init numa_enforce_memory_limit(unsigned long start,
81362306a36Sopenharmony_ci						      unsigned long size)
81462306a36Sopenharmony_ci{
81562306a36Sopenharmony_ci	/*
81662306a36Sopenharmony_ci	 * We use memblock_end_of_DRAM() in here instead of memory_limit because
81762306a36Sopenharmony_ci	 * we've already adjusted it for the limit and it takes care of
81862306a36Sopenharmony_ci	 * having memory holes below the limit.  Also, in the case of
81962306a36Sopenharmony_ci	 * iommu_is_off, memory_limit is not set but is implicitly enforced.
82062306a36Sopenharmony_ci	 */
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	if (start + size <= memblock_end_of_DRAM())
82362306a36Sopenharmony_ci		return size;
82462306a36Sopenharmony_ci
82562306a36Sopenharmony_ci	if (start >= memblock_end_of_DRAM())
82662306a36Sopenharmony_ci		return 0;
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci	return memblock_end_of_DRAM() - start;
82962306a36Sopenharmony_ci}
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci/*
83262306a36Sopenharmony_ci * Reads the counter for a given entry in
83362306a36Sopenharmony_ci * linux,drconf-usable-memory property
83462306a36Sopenharmony_ci */
83562306a36Sopenharmony_cistatic inline int __init read_usm_ranges(const __be32 **usm)
83662306a36Sopenharmony_ci{
83762306a36Sopenharmony_ci	/*
83862306a36Sopenharmony_ci	 * For each lmb in ibm,dynamic-memory a corresponding
83962306a36Sopenharmony_ci	 * entry in linux,drconf-usable-memory property contains
84062306a36Sopenharmony_ci	 * a counter followed by that many (base, size) duple.
84162306a36Sopenharmony_ci	 * read the counter from linux,drconf-usable-memory
84262306a36Sopenharmony_ci	 */
84362306a36Sopenharmony_ci	return read_n_cells(n_mem_size_cells, usm);
84462306a36Sopenharmony_ci}
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_ci/*
84762306a36Sopenharmony_ci * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
84862306a36Sopenharmony_ci * node.  This assumes n_mem_{addr,size}_cells have been set.
84962306a36Sopenharmony_ci */
85062306a36Sopenharmony_cistatic int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
85162306a36Sopenharmony_ci					const __be32 **usm,
85262306a36Sopenharmony_ci					void *data)
85362306a36Sopenharmony_ci{
85462306a36Sopenharmony_ci	unsigned int ranges, is_kexec_kdump = 0;
85562306a36Sopenharmony_ci	unsigned long base, size, sz;
85662306a36Sopenharmony_ci	int nid;
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	/*
85962306a36Sopenharmony_ci	 * Skip this block if the reserved bit is set in flags (0x80)
86062306a36Sopenharmony_ci	 * or if the block is not assigned to this partition (0x8)
86162306a36Sopenharmony_ci	 */
86262306a36Sopenharmony_ci	if ((lmb->flags & DRCONF_MEM_RESERVED)
86362306a36Sopenharmony_ci	    || !(lmb->flags & DRCONF_MEM_ASSIGNED))
86462306a36Sopenharmony_ci		return 0;
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci	if (*usm)
86762306a36Sopenharmony_ci		is_kexec_kdump = 1;
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ci	base = lmb->base_addr;
87062306a36Sopenharmony_ci	size = drmem_lmb_size();
87162306a36Sopenharmony_ci	ranges = 1;
87262306a36Sopenharmony_ci
87362306a36Sopenharmony_ci	if (is_kexec_kdump) {
87462306a36Sopenharmony_ci		ranges = read_usm_ranges(usm);
87562306a36Sopenharmony_ci		if (!ranges) /* there are no (base, size) duple */
87662306a36Sopenharmony_ci			return 0;
87762306a36Sopenharmony_ci	}
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci	do {
88062306a36Sopenharmony_ci		if (is_kexec_kdump) {
88162306a36Sopenharmony_ci			base = read_n_cells(n_mem_addr_cells, usm);
88262306a36Sopenharmony_ci			size = read_n_cells(n_mem_size_cells, usm);
88362306a36Sopenharmony_ci		}
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci		nid = get_nid_and_numa_distance(lmb);
88662306a36Sopenharmony_ci		fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
88762306a36Sopenharmony_ci					  &nid);
88862306a36Sopenharmony_ci		node_set_online(nid);
88962306a36Sopenharmony_ci		sz = numa_enforce_memory_limit(base, size);
89062306a36Sopenharmony_ci		if (sz)
89162306a36Sopenharmony_ci			memblock_set_node(base, sz, &memblock.memory, nid);
89262306a36Sopenharmony_ci	} while (--ranges);
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	return 0;
89562306a36Sopenharmony_ci}
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_cistatic int __init parse_numa_properties(void)
89862306a36Sopenharmony_ci{
89962306a36Sopenharmony_ci	struct device_node *memory;
90062306a36Sopenharmony_ci	int default_nid = 0;
90162306a36Sopenharmony_ci	unsigned long i;
90262306a36Sopenharmony_ci	const __be32 *associativity;
90362306a36Sopenharmony_ci
90462306a36Sopenharmony_ci	if (numa_enabled == 0) {
90562306a36Sopenharmony_ci		pr_warn("disabled by user\n");
90662306a36Sopenharmony_ci		return -1;
90762306a36Sopenharmony_ci	}
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	primary_domain_index = find_primary_domain_index();
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci	if (primary_domain_index < 0) {
91262306a36Sopenharmony_ci		/*
91362306a36Sopenharmony_ci		 * if we fail to parse primary_domain_index from device tree
91462306a36Sopenharmony_ci		 * mark the numa disabled, boot with numa disabled.
91562306a36Sopenharmony_ci		 */
91662306a36Sopenharmony_ci		numa_enabled = false;
91762306a36Sopenharmony_ci		return primary_domain_index;
91862306a36Sopenharmony_ci	}
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci	pr_debug("associativity depth for CPU/Memory: %d\n", primary_domain_index);
92162306a36Sopenharmony_ci
92262306a36Sopenharmony_ci	/*
92362306a36Sopenharmony_ci	 * If it is FORM2 initialize the distance table here.
92462306a36Sopenharmony_ci	 */
92562306a36Sopenharmony_ci	if (affinity_form == FORM2_AFFINITY)
92662306a36Sopenharmony_ci		initialize_form2_numa_distance_lookup_table();
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	/*
92962306a36Sopenharmony_ci	 * Even though we connect cpus to numa domains later in SMP
93062306a36Sopenharmony_ci	 * init, we need to know the node ids now. This is because
93162306a36Sopenharmony_ci	 * each node to be onlined must have NODE_DATA etc backing it.
93262306a36Sopenharmony_ci	 */
93362306a36Sopenharmony_ci	for_each_present_cpu(i) {
93462306a36Sopenharmony_ci		__be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
93562306a36Sopenharmony_ci		struct device_node *cpu;
93662306a36Sopenharmony_ci		int nid = NUMA_NO_NODE;
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_ci		memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
93962306a36Sopenharmony_ci
94062306a36Sopenharmony_ci		if (__vphn_get_associativity(i, vphn_assoc) == 0) {
94162306a36Sopenharmony_ci			nid = associativity_to_nid(vphn_assoc);
94262306a36Sopenharmony_ci			initialize_form1_numa_distance(vphn_assoc);
94362306a36Sopenharmony_ci		} else {
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci			/*
94662306a36Sopenharmony_ci			 * Don't fall back to default_nid yet -- we will plug
94762306a36Sopenharmony_ci			 * cpus into nodes once the memory scan has discovered
94862306a36Sopenharmony_ci			 * the topology.
94962306a36Sopenharmony_ci			 */
95062306a36Sopenharmony_ci			cpu = of_get_cpu_node(i, NULL);
95162306a36Sopenharmony_ci			BUG_ON(!cpu);
95262306a36Sopenharmony_ci
95362306a36Sopenharmony_ci			associativity = of_get_associativity(cpu);
95462306a36Sopenharmony_ci			if (associativity) {
95562306a36Sopenharmony_ci				nid = associativity_to_nid(associativity);
95662306a36Sopenharmony_ci				initialize_form1_numa_distance(associativity);
95762306a36Sopenharmony_ci			}
95862306a36Sopenharmony_ci			of_node_put(cpu);
95962306a36Sopenharmony_ci		}
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci		/* node_set_online() is an UB if 'nid' is negative */
96262306a36Sopenharmony_ci		if (likely(nid >= 0))
96362306a36Sopenharmony_ci			node_set_online(nid);
96462306a36Sopenharmony_ci	}
96562306a36Sopenharmony_ci
96662306a36Sopenharmony_ci	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
96762306a36Sopenharmony_ci
96862306a36Sopenharmony_ci	for_each_node_by_type(memory, "memory") {
96962306a36Sopenharmony_ci		unsigned long start;
97062306a36Sopenharmony_ci		unsigned long size;
97162306a36Sopenharmony_ci		int nid;
97262306a36Sopenharmony_ci		int ranges;
97362306a36Sopenharmony_ci		const __be32 *memcell_buf;
97462306a36Sopenharmony_ci		unsigned int len;
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_ci		memcell_buf = of_get_property(memory,
97762306a36Sopenharmony_ci			"linux,usable-memory", &len);
97862306a36Sopenharmony_ci		if (!memcell_buf || len <= 0)
97962306a36Sopenharmony_ci			memcell_buf = of_get_property(memory, "reg", &len);
98062306a36Sopenharmony_ci		if (!memcell_buf || len <= 0)
98162306a36Sopenharmony_ci			continue;
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci		/* ranges in cell */
98462306a36Sopenharmony_ci		ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
98562306a36Sopenharmony_cinew_range:
98662306a36Sopenharmony_ci		/* these are order-sensitive, and modify the buffer pointer */
98762306a36Sopenharmony_ci		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
98862306a36Sopenharmony_ci		size = read_n_cells(n_mem_size_cells, &memcell_buf);
98962306a36Sopenharmony_ci
99062306a36Sopenharmony_ci		/*
99162306a36Sopenharmony_ci		 * Assumption: either all memory nodes or none will
99262306a36Sopenharmony_ci		 * have associativity properties.  If none, then
99362306a36Sopenharmony_ci		 * everything goes to default_nid.
99462306a36Sopenharmony_ci		 */
99562306a36Sopenharmony_ci		associativity = of_get_associativity(memory);
99662306a36Sopenharmony_ci		if (associativity) {
99762306a36Sopenharmony_ci			nid = associativity_to_nid(associativity);
99862306a36Sopenharmony_ci			initialize_form1_numa_distance(associativity);
99962306a36Sopenharmony_ci		} else
100062306a36Sopenharmony_ci			nid = default_nid;
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_ci		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
100362306a36Sopenharmony_ci		node_set_online(nid);
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_ci		size = numa_enforce_memory_limit(start, size);
100662306a36Sopenharmony_ci		if (size)
100762306a36Sopenharmony_ci			memblock_set_node(start, size, &memblock.memory, nid);
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci		if (--ranges)
101062306a36Sopenharmony_ci			goto new_range;
101162306a36Sopenharmony_ci	}
101262306a36Sopenharmony_ci
101362306a36Sopenharmony_ci	/*
101462306a36Sopenharmony_ci	 * Now do the same thing for each MEMBLOCK listed in the
101562306a36Sopenharmony_ci	 * ibm,dynamic-memory property in the
101662306a36Sopenharmony_ci	 * ibm,dynamic-reconfiguration-memory node.
101762306a36Sopenharmony_ci	 */
101862306a36Sopenharmony_ci	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
101962306a36Sopenharmony_ci	if (memory) {
102062306a36Sopenharmony_ci		walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb);
102162306a36Sopenharmony_ci		of_node_put(memory);
102262306a36Sopenharmony_ci	}
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci	return 0;
102562306a36Sopenharmony_ci}
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_cistatic void __init setup_nonnuma(void)
102862306a36Sopenharmony_ci{
102962306a36Sopenharmony_ci	unsigned long top_of_ram = memblock_end_of_DRAM();
103062306a36Sopenharmony_ci	unsigned long total_ram = memblock_phys_mem_size();
103162306a36Sopenharmony_ci	unsigned long start_pfn, end_pfn;
103262306a36Sopenharmony_ci	unsigned int nid = 0;
103362306a36Sopenharmony_ci	int i;
103462306a36Sopenharmony_ci
103562306a36Sopenharmony_ci	pr_debug("Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram);
103662306a36Sopenharmony_ci	pr_debug("Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20);
103762306a36Sopenharmony_ci
103862306a36Sopenharmony_ci	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
103962306a36Sopenharmony_ci		fake_numa_create_new_node(end_pfn, &nid);
104062306a36Sopenharmony_ci		memblock_set_node(PFN_PHYS(start_pfn),
104162306a36Sopenharmony_ci				  PFN_PHYS(end_pfn - start_pfn),
104262306a36Sopenharmony_ci				  &memblock.memory, nid);
104362306a36Sopenharmony_ci		node_set_online(nid);
104462306a36Sopenharmony_ci	}
104562306a36Sopenharmony_ci}
104662306a36Sopenharmony_ci
104762306a36Sopenharmony_civoid __init dump_numa_cpu_topology(void)
104862306a36Sopenharmony_ci{
104962306a36Sopenharmony_ci	unsigned int node;
105062306a36Sopenharmony_ci	unsigned int cpu, count;
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci	if (!numa_enabled)
105362306a36Sopenharmony_ci		return;
105462306a36Sopenharmony_ci
105562306a36Sopenharmony_ci	for_each_online_node(node) {
105662306a36Sopenharmony_ci		pr_info("Node %d CPUs:", node);
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_ci		count = 0;
105962306a36Sopenharmony_ci		/*
106062306a36Sopenharmony_ci		 * If we used a CPU iterator here we would miss printing
106162306a36Sopenharmony_ci		 * the holes in the cpumap.
106262306a36Sopenharmony_ci		 */
106362306a36Sopenharmony_ci		for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
106462306a36Sopenharmony_ci			if (cpumask_test_cpu(cpu,
106562306a36Sopenharmony_ci					node_to_cpumask_map[node])) {
106662306a36Sopenharmony_ci				if (count == 0)
106762306a36Sopenharmony_ci					pr_cont(" %u", cpu);
106862306a36Sopenharmony_ci				++count;
106962306a36Sopenharmony_ci			} else {
107062306a36Sopenharmony_ci				if (count > 1)
107162306a36Sopenharmony_ci					pr_cont("-%u", cpu - 1);
107262306a36Sopenharmony_ci				count = 0;
107362306a36Sopenharmony_ci			}
107462306a36Sopenharmony_ci		}
107562306a36Sopenharmony_ci
107662306a36Sopenharmony_ci		if (count > 1)
107762306a36Sopenharmony_ci			pr_cont("-%u", nr_cpu_ids - 1);
107862306a36Sopenharmony_ci		pr_cont("\n");
107962306a36Sopenharmony_ci	}
108062306a36Sopenharmony_ci}
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_ci/* Initialize NODE_DATA for a node on the local memory */
108362306a36Sopenharmony_cistatic void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
108462306a36Sopenharmony_ci{
108562306a36Sopenharmony_ci	u64 spanned_pages = end_pfn - start_pfn;
108662306a36Sopenharmony_ci	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
108762306a36Sopenharmony_ci	u64 nd_pa;
108862306a36Sopenharmony_ci	void *nd;
108962306a36Sopenharmony_ci	int tnid;
109062306a36Sopenharmony_ci
109162306a36Sopenharmony_ci	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
109262306a36Sopenharmony_ci	if (!nd_pa)
109362306a36Sopenharmony_ci		panic("Cannot allocate %zu bytes for node %d data\n",
109462306a36Sopenharmony_ci		      nd_size, nid);
109562306a36Sopenharmony_ci
109662306a36Sopenharmony_ci	nd = __va(nd_pa);
109762306a36Sopenharmony_ci
109862306a36Sopenharmony_ci	/* report and initialize */
109962306a36Sopenharmony_ci	pr_info("  NODE_DATA [mem %#010Lx-%#010Lx]\n",
110062306a36Sopenharmony_ci		nd_pa, nd_pa + nd_size - 1);
110162306a36Sopenharmony_ci	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
110262306a36Sopenharmony_ci	if (tnid != nid)
110362306a36Sopenharmony_ci		pr_info("    NODE_DATA(%d) on node %d\n", nid, tnid);
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	node_data[nid] = nd;
110662306a36Sopenharmony_ci	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
110762306a36Sopenharmony_ci	NODE_DATA(nid)->node_id = nid;
110862306a36Sopenharmony_ci	NODE_DATA(nid)->node_start_pfn = start_pfn;
110962306a36Sopenharmony_ci	NODE_DATA(nid)->node_spanned_pages = spanned_pages;
111062306a36Sopenharmony_ci}
111162306a36Sopenharmony_ci
111262306a36Sopenharmony_cistatic void __init find_possible_nodes(void)
111362306a36Sopenharmony_ci{
111462306a36Sopenharmony_ci	struct device_node *rtas;
111562306a36Sopenharmony_ci	const __be32 *domains = NULL;
111662306a36Sopenharmony_ci	int prop_length, max_nodes;
111762306a36Sopenharmony_ci	u32 i;
111862306a36Sopenharmony_ci
111962306a36Sopenharmony_ci	if (!numa_enabled)
112062306a36Sopenharmony_ci		return;
112162306a36Sopenharmony_ci
112262306a36Sopenharmony_ci	rtas = of_find_node_by_path("/rtas");
112362306a36Sopenharmony_ci	if (!rtas)
112462306a36Sopenharmony_ci		return;
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci	/*
112762306a36Sopenharmony_ci	 * ibm,current-associativity-domains is a fairly recent property. If
112862306a36Sopenharmony_ci	 * it doesn't exist, then fallback on ibm,max-associativity-domains.
112962306a36Sopenharmony_ci	 * Current denotes what the platform can support compared to max
113062306a36Sopenharmony_ci	 * which denotes what the Hypervisor can support.
113162306a36Sopenharmony_ci	 *
113262306a36Sopenharmony_ci	 * If the LPAR is migratable, new nodes might be activated after a LPM,
113362306a36Sopenharmony_ci	 * so we should consider the max number in that case.
113462306a36Sopenharmony_ci	 */
113562306a36Sopenharmony_ci	if (!of_get_property(of_root, "ibm,migratable-partition", NULL))
113662306a36Sopenharmony_ci		domains = of_get_property(rtas,
113762306a36Sopenharmony_ci					  "ibm,current-associativity-domains",
113862306a36Sopenharmony_ci					  &prop_length);
113962306a36Sopenharmony_ci	if (!domains) {
114062306a36Sopenharmony_ci		domains = of_get_property(rtas, "ibm,max-associativity-domains",
114162306a36Sopenharmony_ci					&prop_length);
114262306a36Sopenharmony_ci		if (!domains)
114362306a36Sopenharmony_ci			goto out;
114462306a36Sopenharmony_ci	}
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_ci	max_nodes = of_read_number(&domains[primary_domain_index], 1);
114762306a36Sopenharmony_ci	pr_info("Partition configured for %d NUMA nodes.\n", max_nodes);
114862306a36Sopenharmony_ci
114962306a36Sopenharmony_ci	for (i = 0; i < max_nodes; i++) {
115062306a36Sopenharmony_ci		if (!node_possible(i))
115162306a36Sopenharmony_ci			node_set(i, node_possible_map);
115262306a36Sopenharmony_ci	}
115362306a36Sopenharmony_ci
115462306a36Sopenharmony_ci	prop_length /= sizeof(int);
115562306a36Sopenharmony_ci	if (prop_length > primary_domain_index + 2)
115662306a36Sopenharmony_ci		coregroup_enabled = 1;
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ciout:
115962306a36Sopenharmony_ci	of_node_put(rtas);
116062306a36Sopenharmony_ci}
116162306a36Sopenharmony_ci
116262306a36Sopenharmony_civoid __init mem_topology_setup(void)
116362306a36Sopenharmony_ci{
116462306a36Sopenharmony_ci	int cpu;
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci	max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
116762306a36Sopenharmony_ci	min_low_pfn = MEMORY_START >> PAGE_SHIFT;
116862306a36Sopenharmony_ci
116962306a36Sopenharmony_ci	/*
117062306a36Sopenharmony_ci	 * Linux/mm assumes node 0 to be online at boot. However this is not
117162306a36Sopenharmony_ci	 * true on PowerPC, where node 0 is similar to any other node, it
117262306a36Sopenharmony_ci	 * could be cpuless, memoryless node. So force node 0 to be offline
117362306a36Sopenharmony_ci	 * for now. This will prevent cpuless, memoryless node 0 showing up
117462306a36Sopenharmony_ci	 * unnecessarily as online. If a node has cpus or memory that need
117562306a36Sopenharmony_ci	 * to be online, then node will anyway be marked online.
117662306a36Sopenharmony_ci	 */
117762306a36Sopenharmony_ci	node_set_offline(0);
117862306a36Sopenharmony_ci
117962306a36Sopenharmony_ci	if (parse_numa_properties())
118062306a36Sopenharmony_ci		setup_nonnuma();
118162306a36Sopenharmony_ci
118262306a36Sopenharmony_ci	/*
118362306a36Sopenharmony_ci	 * Modify the set of possible NUMA nodes to reflect information
118462306a36Sopenharmony_ci	 * available about the set of online nodes, and the set of nodes
118562306a36Sopenharmony_ci	 * that we expect to make use of for this platform's affinity
118662306a36Sopenharmony_ci	 * calculations.
118762306a36Sopenharmony_ci	 */
118862306a36Sopenharmony_ci	nodes_and(node_possible_map, node_possible_map, node_online_map);
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_ci	find_possible_nodes();
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_ci	setup_node_to_cpumask_map();
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_ci	reset_numa_cpu_lookup_table();
119562306a36Sopenharmony_ci
119662306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
119762306a36Sopenharmony_ci		/*
119862306a36Sopenharmony_ci		 * Powerpc with CONFIG_NUMA always used to have a node 0,
119962306a36Sopenharmony_ci		 * even if it was memoryless or cpuless. For all cpus that
120062306a36Sopenharmony_ci		 * are possible but not present, cpu_to_node() would point
120162306a36Sopenharmony_ci		 * to node 0. To remove a cpuless, memoryless dummy node,
120262306a36Sopenharmony_ci		 * powerpc need to make sure all possible but not present
120362306a36Sopenharmony_ci		 * cpu_to_node are set to a proper node.
120462306a36Sopenharmony_ci		 */
120562306a36Sopenharmony_ci		numa_setup_cpu(cpu);
120662306a36Sopenharmony_ci	}
120762306a36Sopenharmony_ci}
120862306a36Sopenharmony_ci
120962306a36Sopenharmony_civoid __init initmem_init(void)
121062306a36Sopenharmony_ci{
121162306a36Sopenharmony_ci	int nid;
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_ci	memblock_dump_all();
121462306a36Sopenharmony_ci
121562306a36Sopenharmony_ci	for_each_online_node(nid) {
121662306a36Sopenharmony_ci		unsigned long start_pfn, end_pfn;
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
121962306a36Sopenharmony_ci		setup_node_data(nid, start_pfn, end_pfn);
122062306a36Sopenharmony_ci	}
122162306a36Sopenharmony_ci
122262306a36Sopenharmony_ci	sparse_init();
122362306a36Sopenharmony_ci
122462306a36Sopenharmony_ci	/*
122562306a36Sopenharmony_ci	 * We need the numa_cpu_lookup_table to be accurate for all CPUs,
122662306a36Sopenharmony_ci	 * even before we online them, so that we can use cpu_to_{node,mem}
122762306a36Sopenharmony_ci	 * early in boot, cf. smp_prepare_cpus().
122862306a36Sopenharmony_ci	 * _nocalls() + manual invocation is used because cpuhp is not yet
122962306a36Sopenharmony_ci	 * initialized for the boot CPU.
123062306a36Sopenharmony_ci	 */
123162306a36Sopenharmony_ci	cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
123262306a36Sopenharmony_ci				  ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
123362306a36Sopenharmony_ci}
123462306a36Sopenharmony_ci
123562306a36Sopenharmony_cistatic int __init early_numa(char *p)
123662306a36Sopenharmony_ci{
123762306a36Sopenharmony_ci	if (!p)
123862306a36Sopenharmony_ci		return 0;
123962306a36Sopenharmony_ci
124062306a36Sopenharmony_ci	if (strstr(p, "off"))
124162306a36Sopenharmony_ci		numa_enabled = 0;
124262306a36Sopenharmony_ci
124362306a36Sopenharmony_ci	p = strstr(p, "fake=");
124462306a36Sopenharmony_ci	if (p)
124562306a36Sopenharmony_ci		cmdline = p + strlen("fake=");
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_ci	return 0;
124862306a36Sopenharmony_ci}
124962306a36Sopenharmony_ciearly_param("numa", early_numa);
125062306a36Sopenharmony_ci
125162306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG
125262306a36Sopenharmony_ci/*
125362306a36Sopenharmony_ci * Find the node associated with a hot added memory section for
125462306a36Sopenharmony_ci * memory represented in the device tree by the property
125562306a36Sopenharmony_ci * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
125662306a36Sopenharmony_ci */
125762306a36Sopenharmony_cistatic int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
125862306a36Sopenharmony_ci{
125962306a36Sopenharmony_ci	struct drmem_lmb *lmb;
126062306a36Sopenharmony_ci	unsigned long lmb_size;
126162306a36Sopenharmony_ci	int nid = NUMA_NO_NODE;
126262306a36Sopenharmony_ci
126362306a36Sopenharmony_ci	lmb_size = drmem_lmb_size();
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ci	for_each_drmem_lmb(lmb) {
126662306a36Sopenharmony_ci		/* skip this block if it is reserved or not assigned to
126762306a36Sopenharmony_ci		 * this partition */
126862306a36Sopenharmony_ci		if ((lmb->flags & DRCONF_MEM_RESERVED)
126962306a36Sopenharmony_ci		    || !(lmb->flags & DRCONF_MEM_ASSIGNED))
127062306a36Sopenharmony_ci			continue;
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_ci		if ((scn_addr < lmb->base_addr)
127362306a36Sopenharmony_ci		    || (scn_addr >= (lmb->base_addr + lmb_size)))
127462306a36Sopenharmony_ci			continue;
127562306a36Sopenharmony_ci
127662306a36Sopenharmony_ci		nid = of_drconf_to_nid_single(lmb);
127762306a36Sopenharmony_ci		break;
127862306a36Sopenharmony_ci	}
127962306a36Sopenharmony_ci
128062306a36Sopenharmony_ci	return nid;
128162306a36Sopenharmony_ci}
128262306a36Sopenharmony_ci
128362306a36Sopenharmony_ci/*
128462306a36Sopenharmony_ci * Find the node associated with a hot added memory section for memory
128562306a36Sopenharmony_ci * represented in the device tree as a node (i.e. memory@XXXX) for
128662306a36Sopenharmony_ci * each memblock.
128762306a36Sopenharmony_ci */
128862306a36Sopenharmony_cistatic int hot_add_node_scn_to_nid(unsigned long scn_addr)
128962306a36Sopenharmony_ci{
129062306a36Sopenharmony_ci	struct device_node *memory;
129162306a36Sopenharmony_ci	int nid = NUMA_NO_NODE;
129262306a36Sopenharmony_ci
129362306a36Sopenharmony_ci	for_each_node_by_type(memory, "memory") {
129462306a36Sopenharmony_ci		int i = 0;
129562306a36Sopenharmony_ci
129662306a36Sopenharmony_ci		while (1) {
129762306a36Sopenharmony_ci			struct resource res;
129862306a36Sopenharmony_ci
129962306a36Sopenharmony_ci			if (of_address_to_resource(memory, i++, &res))
130062306a36Sopenharmony_ci				break;
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci			if ((scn_addr < res.start) || (scn_addr > res.end))
130362306a36Sopenharmony_ci				continue;
130462306a36Sopenharmony_ci
130562306a36Sopenharmony_ci			nid = of_node_to_nid_single(memory);
130662306a36Sopenharmony_ci			break;
130762306a36Sopenharmony_ci		}
130862306a36Sopenharmony_ci
130962306a36Sopenharmony_ci		if (nid >= 0)
131062306a36Sopenharmony_ci			break;
131162306a36Sopenharmony_ci	}
131262306a36Sopenharmony_ci
131362306a36Sopenharmony_ci	of_node_put(memory);
131462306a36Sopenharmony_ci
131562306a36Sopenharmony_ci	return nid;
131662306a36Sopenharmony_ci}
131762306a36Sopenharmony_ci
131862306a36Sopenharmony_ci/*
131962306a36Sopenharmony_ci * Find the node associated with a hot added memory section.  Section
132062306a36Sopenharmony_ci * corresponds to a SPARSEMEM section, not an MEMBLOCK.  It is assumed that
132162306a36Sopenharmony_ci * sections are fully contained within a single MEMBLOCK.
132262306a36Sopenharmony_ci */
132362306a36Sopenharmony_ciint hot_add_scn_to_nid(unsigned long scn_addr)
132462306a36Sopenharmony_ci{
132562306a36Sopenharmony_ci	struct device_node *memory = NULL;
132662306a36Sopenharmony_ci	int nid;
132762306a36Sopenharmony_ci
132862306a36Sopenharmony_ci	if (!numa_enabled)
132962306a36Sopenharmony_ci		return first_online_node;
133062306a36Sopenharmony_ci
133162306a36Sopenharmony_ci	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
133262306a36Sopenharmony_ci	if (memory) {
133362306a36Sopenharmony_ci		nid = hot_add_drconf_scn_to_nid(scn_addr);
133462306a36Sopenharmony_ci		of_node_put(memory);
133562306a36Sopenharmony_ci	} else {
133662306a36Sopenharmony_ci		nid = hot_add_node_scn_to_nid(scn_addr);
133762306a36Sopenharmony_ci	}
133862306a36Sopenharmony_ci
133962306a36Sopenharmony_ci	if (nid < 0 || !node_possible(nid))
134062306a36Sopenharmony_ci		nid = first_online_node;
134162306a36Sopenharmony_ci
134262306a36Sopenharmony_ci	return nid;
134362306a36Sopenharmony_ci}
134462306a36Sopenharmony_ci
134562306a36Sopenharmony_cistatic u64 hot_add_drconf_memory_max(void)
134662306a36Sopenharmony_ci{
134762306a36Sopenharmony_ci	struct device_node *memory = NULL;
134862306a36Sopenharmony_ci	struct device_node *dn = NULL;
134962306a36Sopenharmony_ci	const __be64 *lrdr = NULL;
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_ci	dn = of_find_node_by_path("/rtas");
135262306a36Sopenharmony_ci	if (dn) {
135362306a36Sopenharmony_ci		lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL);
135462306a36Sopenharmony_ci		of_node_put(dn);
135562306a36Sopenharmony_ci		if (lrdr)
135662306a36Sopenharmony_ci			return be64_to_cpup(lrdr);
135762306a36Sopenharmony_ci	}
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
136062306a36Sopenharmony_ci	if (memory) {
136162306a36Sopenharmony_ci		of_node_put(memory);
136262306a36Sopenharmony_ci		return drmem_lmb_memory_max();
136362306a36Sopenharmony_ci	}
136462306a36Sopenharmony_ci	return 0;
136562306a36Sopenharmony_ci}
136662306a36Sopenharmony_ci
136762306a36Sopenharmony_ci/*
136862306a36Sopenharmony_ci * memory_hotplug_max - return max address of memory that may be added
136962306a36Sopenharmony_ci *
137062306a36Sopenharmony_ci * This is currently only used on systems that support drconfig memory
137162306a36Sopenharmony_ci * hotplug.
137262306a36Sopenharmony_ci */
137362306a36Sopenharmony_ciu64 memory_hotplug_max(void)
137462306a36Sopenharmony_ci{
137562306a36Sopenharmony_ci        return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
137662306a36Sopenharmony_ci}
137762306a36Sopenharmony_ci#endif /* CONFIG_MEMORY_HOTPLUG */
137862306a36Sopenharmony_ci
137962306a36Sopenharmony_ci/* Virtual Processor Home Node (VPHN) support */
138062306a36Sopenharmony_ci#ifdef CONFIG_PPC_SPLPAR
138162306a36Sopenharmony_cistatic int topology_inited;
138262306a36Sopenharmony_ci
138362306a36Sopenharmony_ci/*
138462306a36Sopenharmony_ci * Retrieve the new associativity information for a virtual processor's
138562306a36Sopenharmony_ci * home node.
138662306a36Sopenharmony_ci */
138762306a36Sopenharmony_cistatic long vphn_get_associativity(unsigned long cpu,
138862306a36Sopenharmony_ci					__be32 *associativity)
138962306a36Sopenharmony_ci{
139062306a36Sopenharmony_ci	long rc;
139162306a36Sopenharmony_ci
139262306a36Sopenharmony_ci	rc = hcall_vphn(get_hard_smp_processor_id(cpu),
139362306a36Sopenharmony_ci				VPHN_FLAG_VCPU, associativity);
139462306a36Sopenharmony_ci
139562306a36Sopenharmony_ci	switch (rc) {
139662306a36Sopenharmony_ci	case H_SUCCESS:
139762306a36Sopenharmony_ci		pr_debug("VPHN hcall succeeded. Reset polling...\n");
139862306a36Sopenharmony_ci		goto out;
139962306a36Sopenharmony_ci
140062306a36Sopenharmony_ci	case H_FUNCTION:
140162306a36Sopenharmony_ci		pr_err_ratelimited("VPHN unsupported. Disabling polling...\n");
140262306a36Sopenharmony_ci		break;
140362306a36Sopenharmony_ci	case H_HARDWARE:
140462306a36Sopenharmony_ci		pr_err_ratelimited("hcall_vphn() experienced a hardware fault "
140562306a36Sopenharmony_ci			"preventing VPHN. Disabling polling...\n");
140662306a36Sopenharmony_ci		break;
140762306a36Sopenharmony_ci	case H_PARAMETER:
140862306a36Sopenharmony_ci		pr_err_ratelimited("hcall_vphn() was passed an invalid parameter. "
140962306a36Sopenharmony_ci			"Disabling polling...\n");
141062306a36Sopenharmony_ci		break;
141162306a36Sopenharmony_ci	default:
141262306a36Sopenharmony_ci		pr_err_ratelimited("hcall_vphn() returned %ld. Disabling polling...\n"
141362306a36Sopenharmony_ci			, rc);
141462306a36Sopenharmony_ci		break;
141562306a36Sopenharmony_ci	}
141662306a36Sopenharmony_ciout:
141762306a36Sopenharmony_ci	return rc;
141862306a36Sopenharmony_ci}
141962306a36Sopenharmony_ci
142062306a36Sopenharmony_civoid find_and_update_cpu_nid(int cpu)
142162306a36Sopenharmony_ci{
142262306a36Sopenharmony_ci	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
142362306a36Sopenharmony_ci	int new_nid;
142462306a36Sopenharmony_ci
142562306a36Sopenharmony_ci	/* Use associativity from first thread for all siblings */
142662306a36Sopenharmony_ci	if (vphn_get_associativity(cpu, associativity))
142762306a36Sopenharmony_ci		return;
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_ci	/* Do not have previous associativity, so find it now. */
143062306a36Sopenharmony_ci	new_nid = associativity_to_nid(associativity);
143162306a36Sopenharmony_ci
143262306a36Sopenharmony_ci	if (new_nid < 0 || !node_possible(new_nid))
143362306a36Sopenharmony_ci		new_nid = first_online_node;
143462306a36Sopenharmony_ci	else
143562306a36Sopenharmony_ci		// Associate node <-> cpu, so cpu_up() calls
143662306a36Sopenharmony_ci		// try_online_node() on the right node.
143762306a36Sopenharmony_ci		set_cpu_numa_node(cpu, new_nid);
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	pr_debug("%s:%d cpu %d nid %d\n", __func__, __LINE__, cpu, new_nid);
144062306a36Sopenharmony_ci}
144162306a36Sopenharmony_ci
144262306a36Sopenharmony_ciint cpu_to_coregroup_id(int cpu)
144362306a36Sopenharmony_ci{
144462306a36Sopenharmony_ci	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
144562306a36Sopenharmony_ci	int index;
144662306a36Sopenharmony_ci
144762306a36Sopenharmony_ci	if (cpu < 0 || cpu > nr_cpu_ids)
144862306a36Sopenharmony_ci		return -1;
144962306a36Sopenharmony_ci
145062306a36Sopenharmony_ci	if (!coregroup_enabled)
145162306a36Sopenharmony_ci		goto out;
145262306a36Sopenharmony_ci
145362306a36Sopenharmony_ci	if (!firmware_has_feature(FW_FEATURE_VPHN))
145462306a36Sopenharmony_ci		goto out;
145562306a36Sopenharmony_ci
145662306a36Sopenharmony_ci	if (vphn_get_associativity(cpu, associativity))
145762306a36Sopenharmony_ci		goto out;
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ci	index = of_read_number(associativity, 1);
146062306a36Sopenharmony_ci	if (index > primary_domain_index + 1)
146162306a36Sopenharmony_ci		return of_read_number(&associativity[index - 1], 1);
146262306a36Sopenharmony_ci
146362306a36Sopenharmony_ciout:
146462306a36Sopenharmony_ci	return cpu_to_core_id(cpu);
146562306a36Sopenharmony_ci}
146662306a36Sopenharmony_ci
146762306a36Sopenharmony_cistatic int topology_update_init(void)
146862306a36Sopenharmony_ci{
146962306a36Sopenharmony_ci	topology_inited = 1;
147062306a36Sopenharmony_ci	return 0;
147162306a36Sopenharmony_ci}
147262306a36Sopenharmony_cidevice_initcall(topology_update_init);
147362306a36Sopenharmony_ci#endif /* CONFIG_PPC_SPLPAR */
1474