1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * pSeries NUMA support 4 * 5 * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM 6 */ 7#define pr_fmt(fmt) "numa: " fmt 8 9#include <linux/threads.h> 10#include <linux/memblock.h> 11#include <linux/init.h> 12#include <linux/mm.h> 13#include <linux/mmzone.h> 14#include <linux/export.h> 15#include <linux/nodemask.h> 16#include <linux/cpu.h> 17#include <linux/notifier.h> 18#include <linux/of.h> 19#include <linux/pfn.h> 20#include <linux/cpuset.h> 21#include <linux/node.h> 22#include <linux/stop_machine.h> 23#include <linux/proc_fs.h> 24#include <linux/seq_file.h> 25#include <linux/uaccess.h> 26#include <linux/slab.h> 27#include <asm/cputhreads.h> 28#include <asm/sparsemem.h> 29#include <asm/prom.h> 30#include <asm/smp.h> 31#include <asm/topology.h> 32#include <asm/firmware.h> 33#include <asm/paca.h> 34#include <asm/hvcall.h> 35#include <asm/setup.h> 36#include <asm/vdso.h> 37#include <asm/drmem.h> 38 39static int numa_enabled = 1; 40 41static char *cmdline __initdata; 42 43static int numa_debug; 44#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } 45 46int numa_cpu_lookup_table[NR_CPUS]; 47cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; 48struct pglist_data *node_data[MAX_NUMNODES]; 49 50EXPORT_SYMBOL(numa_cpu_lookup_table); 51EXPORT_SYMBOL(node_to_cpumask_map); 52EXPORT_SYMBOL(node_data); 53 54static int primary_domain_index; 55static int n_mem_addr_cells, n_mem_size_cells; 56 57#define FORM0_AFFINITY 0 58#define FORM1_AFFINITY 1 59#define FORM2_AFFINITY 2 60static int affinity_form; 61 62#define MAX_DISTANCE_REF_POINTS 4 63static int distance_ref_points_depth; 64static const __be32 *distance_ref_points; 65static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; 66static int numa_distance_table[MAX_NUMNODES][MAX_NUMNODES] = { 67 [0 ... MAX_NUMNODES - 1] = { [0 ... MAX_NUMNODES - 1] = -1 } 68}; 69static int numa_id_index_table[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE }; 70 71/* 72 * Allocate node_to_cpumask_map based on number of available nodes 73 * Requires node_possible_map to be valid. 74 * 75 * Note: cpumask_of_node() is not valid until after this is done. 76 */ 77static void __init setup_node_to_cpumask_map(void) 78{ 79 unsigned int node; 80 81 /* setup nr_node_ids if not done yet */ 82 if (nr_node_ids == MAX_NUMNODES) 83 setup_nr_node_ids(); 84 85 /* allocate the map */ 86 for_each_node(node) 87 alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); 88 89 /* cpumask_of_node() will now work */ 90 dbg("Node to cpumask map for %u nodes\n", nr_node_ids); 91} 92 93static int __init fake_numa_create_new_node(unsigned long end_pfn, 94 unsigned int *nid) 95{ 96 unsigned long long mem; 97 char *p = cmdline; 98 static unsigned int fake_nid; 99 static unsigned long long curr_boundary; 100 101 /* 102 * Modify node id, iff we started creating NUMA nodes 103 * We want to continue from where we left of the last time 104 */ 105 if (fake_nid) 106 *nid = fake_nid; 107 /* 108 * In case there are no more arguments to parse, the 109 * node_id should be the same as the last fake node id 110 * (we've handled this above). 111 */ 112 if (!p) 113 return 0; 114 115 mem = memparse(p, &p); 116 if (!mem) 117 return 0; 118 119 if (mem < curr_boundary) 120 return 0; 121 122 curr_boundary = mem; 123 124 if ((end_pfn << PAGE_SHIFT) > mem) { 125 /* 126 * Skip commas and spaces 127 */ 128 while (*p == ',' || *p == ' ' || *p == '\t') 129 p++; 130 131 cmdline = p; 132 fake_nid++; 133 *nid = fake_nid; 134 dbg("created new fake_node with id %d\n", fake_nid); 135 return 1; 136 } 137 return 0; 138} 139 140static void reset_numa_cpu_lookup_table(void) 141{ 142 unsigned int cpu; 143 144 for_each_possible_cpu(cpu) 145 numa_cpu_lookup_table[cpu] = -1; 146} 147 148void map_cpu_to_node(int cpu, int node) 149{ 150 update_numa_cpu_lookup_table(cpu, node); 151 152 dbg("adding cpu %d to node %d\n", cpu, node); 153 154 if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) 155 cpumask_set_cpu(cpu, node_to_cpumask_map[node]); 156} 157 158#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR) 159void unmap_cpu_from_node(unsigned long cpu) 160{ 161 int node = numa_cpu_lookup_table[cpu]; 162 163 dbg("removing cpu %lu from node %d\n", cpu, node); 164 165 if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { 166 cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); 167 } else { 168 printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", 169 cpu, node); 170 } 171} 172#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ 173 174static int __associativity_to_nid(const __be32 *associativity, 175 int max_array_sz) 176{ 177 int nid; 178 /* 179 * primary_domain_index is 1 based array index. 180 */ 181 int index = primary_domain_index - 1; 182 183 if (!numa_enabled || index >= max_array_sz) 184 return NUMA_NO_NODE; 185 186 nid = of_read_number(&associativity[index], 1); 187 188 /* POWER4 LPAR uses 0xffff as invalid node */ 189 if (nid == 0xffff || nid >= nr_node_ids) 190 nid = NUMA_NO_NODE; 191 return nid; 192} 193/* 194 * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA 195 * info is found. 196 */ 197static int associativity_to_nid(const __be32 *associativity) 198{ 199 int array_sz = of_read_number(associativity, 1); 200 201 /* Skip the first element in the associativity array */ 202 return __associativity_to_nid((associativity + 1), array_sz); 203} 204 205static int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) 206{ 207 int dist; 208 int node1, node2; 209 210 node1 = associativity_to_nid(cpu1_assoc); 211 node2 = associativity_to_nid(cpu2_assoc); 212 213 dist = numa_distance_table[node1][node2]; 214 if (dist <= LOCAL_DISTANCE) 215 return 0; 216 else if (dist <= REMOTE_DISTANCE) 217 return 1; 218 else 219 return 2; 220} 221 222static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) 223{ 224 int dist = 0; 225 226 int i, index; 227 228 for (i = 0; i < distance_ref_points_depth; i++) { 229 index = be32_to_cpu(distance_ref_points[i]); 230 if (cpu1_assoc[index] == cpu2_assoc[index]) 231 break; 232 dist++; 233 } 234 235 return dist; 236} 237 238int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) 239{ 240 /* We should not get called with FORM0 */ 241 VM_WARN_ON(affinity_form == FORM0_AFFINITY); 242 if (affinity_form == FORM1_AFFINITY) 243 return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc); 244 return __cpu_form2_relative_distance(cpu1_assoc, cpu2_assoc); 245} 246 247/* must hold reference to node during call */ 248static const __be32 *of_get_associativity(struct device_node *dev) 249{ 250 return of_get_property(dev, "ibm,associativity", NULL); 251} 252 253int __node_distance(int a, int b) 254{ 255 int i; 256 int distance = LOCAL_DISTANCE; 257 258 if (affinity_form == FORM2_AFFINITY) 259 return numa_distance_table[a][b]; 260 else if (affinity_form == FORM0_AFFINITY) 261 return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE); 262 263 for (i = 0; i < distance_ref_points_depth; i++) { 264 if (distance_lookup_table[a][i] == distance_lookup_table[b][i]) 265 break; 266 267 /* Double the distance for each NUMA level */ 268 distance *= 2; 269 } 270 271 return distance; 272} 273EXPORT_SYMBOL(__node_distance); 274 275/* Returns the nid associated with the given device tree node, 276 * or -1 if not found. 277 */ 278static int of_node_to_nid_single(struct device_node *device) 279{ 280 int nid = NUMA_NO_NODE; 281 const __be32 *tmp; 282 283 tmp = of_get_associativity(device); 284 if (tmp) 285 nid = associativity_to_nid(tmp); 286 return nid; 287} 288 289/* Walk the device tree upwards, looking for an associativity id */ 290int of_node_to_nid(struct device_node *device) 291{ 292 int nid = NUMA_NO_NODE; 293 294 of_node_get(device); 295 while (device) { 296 nid = of_node_to_nid_single(device); 297 if (nid != -1) 298 break; 299 300 device = of_get_next_parent(device); 301 } 302 of_node_put(device); 303 304 return nid; 305} 306EXPORT_SYMBOL(of_node_to_nid); 307 308static void __initialize_form1_numa_distance(const __be32 *associativity, 309 int max_array_sz) 310{ 311 int i, nid; 312 313 if (affinity_form != FORM1_AFFINITY) 314 return; 315 316 nid = __associativity_to_nid(associativity, max_array_sz); 317 if (nid != NUMA_NO_NODE) { 318 for (i = 0; i < distance_ref_points_depth; i++) { 319 const __be32 *entry; 320 int index = be32_to_cpu(distance_ref_points[i]) - 1; 321 322 /* 323 * broken hierarchy, return with broken distance table 324 */ 325 if (WARN(index >= max_array_sz, "Broken ibm,associativity property")) 326 return; 327 328 entry = &associativity[index]; 329 distance_lookup_table[nid][i] = of_read_number(entry, 1); 330 } 331 } 332} 333 334static void initialize_form1_numa_distance(const __be32 *associativity) 335{ 336 int array_sz; 337 338 array_sz = of_read_number(associativity, 1); 339 /* Skip the first element in the associativity array */ 340 __initialize_form1_numa_distance(associativity + 1, array_sz); 341} 342 343/* 344 * Used to update distance information w.r.t newly added node. 345 */ 346void update_numa_distance(struct device_node *node) 347{ 348 int nid; 349 350 if (affinity_form == FORM0_AFFINITY) 351 return; 352 else if (affinity_form == FORM1_AFFINITY) { 353 const __be32 *associativity; 354 355 associativity = of_get_associativity(node); 356 if (!associativity) 357 return; 358 359 initialize_form1_numa_distance(associativity); 360 return; 361 } 362 363 /* FORM2 affinity */ 364 nid = of_node_to_nid_single(node); 365 if (nid == NUMA_NO_NODE) 366 return; 367 368 /* 369 * With FORM2 we expect NUMA distance of all possible NUMA 370 * nodes to be provided during boot. 371 */ 372 WARN(numa_distance_table[nid][nid] == -1, 373 "NUMA distance details for node %d not provided\n", nid); 374} 375EXPORT_SYMBOL_GPL(update_numa_distance); 376 377/* 378 * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN} 379 * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements} 380 */ 381static void initialize_form2_numa_distance_lookup_table(void) 382{ 383 int i, j; 384 struct device_node *root; 385 const __u8 *numa_dist_table; 386 const __be32 *numa_lookup_index; 387 int numa_dist_table_length; 388 int max_numa_index, distance_index; 389 390 if (firmware_has_feature(FW_FEATURE_OPAL)) 391 root = of_find_node_by_path("/ibm,opal"); 392 else 393 root = of_find_node_by_path("/rtas"); 394 if (!root) 395 root = of_find_node_by_path("/"); 396 397 numa_lookup_index = of_get_property(root, "ibm,numa-lookup-index-table", NULL); 398 max_numa_index = of_read_number(&numa_lookup_index[0], 1); 399 400 /* first element of the array is the size and is encode-int */ 401 numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL); 402 numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1); 403 /* Skip the size which is encoded int */ 404 numa_dist_table += sizeof(__be32); 405 406 pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n", 407 numa_dist_table_length, max_numa_index); 408 409 for (i = 0; i < max_numa_index; i++) 410 /* +1 skip the max_numa_index in the property */ 411 numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1); 412 413 414 if (numa_dist_table_length != max_numa_index * max_numa_index) { 415 WARN(1, "Wrong NUMA distance information\n"); 416 /* consider everybody else just remote. */ 417 for (i = 0; i < max_numa_index; i++) { 418 for (j = 0; j < max_numa_index; j++) { 419 int nodeA = numa_id_index_table[i]; 420 int nodeB = numa_id_index_table[j]; 421 422 if (nodeA == nodeB) 423 numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE; 424 else 425 numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE; 426 } 427 } 428 } 429 430 distance_index = 0; 431 for (i = 0; i < max_numa_index; i++) { 432 for (j = 0; j < max_numa_index; j++) { 433 int nodeA = numa_id_index_table[i]; 434 int nodeB = numa_id_index_table[j]; 435 436 numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++]; 437 pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]); 438 } 439 } 440 of_node_put(root); 441} 442 443static int __init find_primary_domain_index(void) 444{ 445 int index; 446 struct device_node *root; 447 448 /* 449 * Check for which form of affinity. 450 */ 451 if (firmware_has_feature(FW_FEATURE_OPAL)) { 452 affinity_form = FORM1_AFFINITY; 453 } else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) { 454 dbg("Using form 2 affinity\n"); 455 affinity_form = FORM2_AFFINITY; 456 } else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) { 457 dbg("Using form 1 affinity\n"); 458 affinity_form = FORM1_AFFINITY; 459 } else 460 affinity_form = FORM0_AFFINITY; 461 462 if (firmware_has_feature(FW_FEATURE_OPAL)) 463 root = of_find_node_by_path("/ibm,opal"); 464 else 465 root = of_find_node_by_path("/rtas"); 466 if (!root) 467 root = of_find_node_by_path("/"); 468 469 /* 470 * This property is a set of 32-bit integers, each representing 471 * an index into the ibm,associativity nodes. 472 * 473 * With form 0 affinity the first integer is for an SMP configuration 474 * (should be all 0's) and the second is for a normal NUMA 475 * configuration. We have only one level of NUMA. 476 * 477 * With form 1 affinity the first integer is the most significant 478 * NUMA boundary and the following are progressively less significant 479 * boundaries. There can be more than one level of NUMA. 480 */ 481 distance_ref_points = of_get_property(root, 482 "ibm,associativity-reference-points", 483 &distance_ref_points_depth); 484 485 if (!distance_ref_points) { 486 dbg("NUMA: ibm,associativity-reference-points not found.\n"); 487 goto err; 488 } 489 490 distance_ref_points_depth /= sizeof(int); 491 if (affinity_form == FORM0_AFFINITY) { 492 if (distance_ref_points_depth < 2) { 493 printk(KERN_WARNING "NUMA: " 494 "short ibm,associativity-reference-points\n"); 495 goto err; 496 } 497 498 index = of_read_number(&distance_ref_points[1], 1); 499 } else { 500 /* 501 * Both FORM1 and FORM2 affinity find the primary domain details 502 * at the same offset. 503 */ 504 index = of_read_number(distance_ref_points, 1); 505 } 506 /* 507 * Warn and cap if the hardware supports more than 508 * MAX_DISTANCE_REF_POINTS domains. 509 */ 510 if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) { 511 printk(KERN_WARNING "NUMA: distance array capped at " 512 "%d entries\n", MAX_DISTANCE_REF_POINTS); 513 distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; 514 } 515 516 of_node_put(root); 517 return index; 518 519err: 520 of_node_put(root); 521 return -1; 522} 523 524static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) 525{ 526 struct device_node *memory = NULL; 527 528 memory = of_find_node_by_type(memory, "memory"); 529 if (!memory) 530 panic("numa.c: No memory nodes found!"); 531 532 *n_addr_cells = of_n_addr_cells(memory); 533 *n_size_cells = of_n_size_cells(memory); 534 of_node_put(memory); 535} 536 537static unsigned long read_n_cells(int n, const __be32 **buf) 538{ 539 unsigned long result = 0; 540 541 while (n--) { 542 result = (result << 32) | of_read_number(*buf, 1); 543 (*buf)++; 544 } 545 return result; 546} 547 548struct assoc_arrays { 549 u32 n_arrays; 550 u32 array_sz; 551 const __be32 *arrays; 552}; 553 554/* 555 * Retrieve and validate the list of associativity arrays for drconf 556 * memory from the ibm,associativity-lookup-arrays property of the 557 * device tree.. 558 * 559 * The layout of the ibm,associativity-lookup-arrays property is a number N 560 * indicating the number of associativity arrays, followed by a number M 561 * indicating the size of each associativity array, followed by a list 562 * of N associativity arrays. 563 */ 564static int of_get_assoc_arrays(struct assoc_arrays *aa) 565{ 566 struct device_node *memory; 567 const __be32 *prop; 568 u32 len; 569 570 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 571 if (!memory) 572 return -1; 573 574 prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); 575 if (!prop || len < 2 * sizeof(unsigned int)) { 576 of_node_put(memory); 577 return -1; 578 } 579 580 aa->n_arrays = of_read_number(prop++, 1); 581 aa->array_sz = of_read_number(prop++, 1); 582 583 of_node_put(memory); 584 585 /* Now that we know the number of arrays and size of each array, 586 * revalidate the size of the property read in. 587 */ 588 if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int)) 589 return -1; 590 591 aa->arrays = prop; 592 return 0; 593} 594 595static int get_nid_and_numa_distance(struct drmem_lmb *lmb) 596{ 597 struct assoc_arrays aa = { .arrays = NULL }; 598 int default_nid = NUMA_NO_NODE; 599 int nid = default_nid; 600 int rc, index; 601 602 if ((primary_domain_index < 0) || !numa_enabled) 603 return default_nid; 604 605 rc = of_get_assoc_arrays(&aa); 606 if (rc) 607 return default_nid; 608 609 if (primary_domain_index <= aa.array_sz && 610 !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) { 611 const __be32 *associativity; 612 613 index = lmb->aa_index * aa.array_sz; 614 associativity = &aa.arrays[index]; 615 nid = __associativity_to_nid(associativity, aa.array_sz); 616 if (nid > 0 && affinity_form == FORM1_AFFINITY) { 617 /* 618 * lookup array associativity entries have 619 * no length of the array as the first element. 620 */ 621 __initialize_form1_numa_distance(associativity, aa.array_sz); 622 } 623 } 624 return nid; 625} 626 627/* 628 * This is like of_node_to_nid_single() for memory represented in the 629 * ibm,dynamic-reconfiguration-memory node. 630 */ 631int of_drconf_to_nid_single(struct drmem_lmb *lmb) 632{ 633 struct assoc_arrays aa = { .arrays = NULL }; 634 int default_nid = NUMA_NO_NODE; 635 int nid = default_nid; 636 int rc, index; 637 638 if ((primary_domain_index < 0) || !numa_enabled) 639 return default_nid; 640 641 rc = of_get_assoc_arrays(&aa); 642 if (rc) 643 return default_nid; 644 645 if (primary_domain_index <= aa.array_sz && 646 !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) { 647 const __be32 *associativity; 648 649 index = lmb->aa_index * aa.array_sz; 650 associativity = &aa.arrays[index]; 651 nid = __associativity_to_nid(associativity, aa.array_sz); 652 } 653 return nid; 654} 655 656#ifdef CONFIG_PPC_SPLPAR 657 658static int __vphn_get_associativity(long lcpu, __be32 *associativity) 659{ 660 long rc, hwid; 661 662 /* 663 * On a shared lpar, device tree will not have node associativity. 664 * At this time lppaca, or its __old_status field may not be 665 * updated. Hence kernel cannot detect if its on a shared lpar. So 666 * request an explicit associativity irrespective of whether the 667 * lpar is shared or dedicated. Use the device tree property as a 668 * fallback. cpu_to_phys_id is only valid between 669 * smp_setup_cpu_maps() and smp_setup_pacas(). 670 */ 671 if (firmware_has_feature(FW_FEATURE_VPHN)) { 672 if (cpu_to_phys_id) 673 hwid = cpu_to_phys_id[lcpu]; 674 else 675 hwid = get_hard_smp_processor_id(lcpu); 676 677 rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity); 678 if (rc == H_SUCCESS) 679 return 0; 680 } 681 682 return -1; 683} 684 685static int vphn_get_nid(long lcpu) 686{ 687 __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; 688 689 690 if (!__vphn_get_associativity(lcpu, associativity)) 691 return associativity_to_nid(associativity); 692 693 return NUMA_NO_NODE; 694 695} 696#else 697 698static int __vphn_get_associativity(long lcpu, __be32 *associativity) 699{ 700 return -1; 701} 702 703static int vphn_get_nid(long unused) 704{ 705 return NUMA_NO_NODE; 706} 707#endif /* CONFIG_PPC_SPLPAR */ 708 709/* 710 * Figure out to which domain a cpu belongs and stick it there. 711 * Return the id of the domain used. 712 */ 713static int numa_setup_cpu(unsigned long lcpu) 714{ 715 struct device_node *cpu; 716 int fcpu = cpu_first_thread_sibling(lcpu); 717 int nid = NUMA_NO_NODE; 718 719 if (!cpu_present(lcpu)) { 720 set_cpu_numa_node(lcpu, first_online_node); 721 return first_online_node; 722 } 723 724 /* 725 * If a valid cpu-to-node mapping is already available, use it 726 * directly instead of querying the firmware, since it represents 727 * the most recent mapping notified to us by the platform (eg: VPHN). 728 * Since cpu_to_node binding remains the same for all threads in the 729 * core. If a valid cpu-to-node mapping is already available, for 730 * the first thread in the core, use it. 731 */ 732 nid = numa_cpu_lookup_table[fcpu]; 733 if (nid >= 0) { 734 map_cpu_to_node(lcpu, nid); 735 return nid; 736 } 737 738 nid = vphn_get_nid(lcpu); 739 if (nid != NUMA_NO_NODE) 740 goto out_present; 741 742 cpu = of_get_cpu_node(lcpu, NULL); 743 744 if (!cpu) { 745 WARN_ON(1); 746 if (cpu_present(lcpu)) 747 goto out_present; 748 else 749 goto out; 750 } 751 752 nid = of_node_to_nid_single(cpu); 753 of_node_put(cpu); 754 755out_present: 756 if (nid < 0 || !node_possible(nid)) 757 nid = first_online_node; 758 759 /* 760 * Update for the first thread of the core. All threads of a core 761 * have to be part of the same node. This not only avoids querying 762 * for every other thread in the core, but always avoids a case 763 * where virtual node associativity change causes subsequent threads 764 * of a core to be associated with different nid. However if first 765 * thread is already online, expect it to have a valid mapping. 766 */ 767 if (fcpu != lcpu) { 768 WARN_ON(cpu_online(fcpu)); 769 map_cpu_to_node(fcpu, nid); 770 } 771 772 map_cpu_to_node(lcpu, nid); 773out: 774 return nid; 775} 776 777static void verify_cpu_node_mapping(int cpu, int node) 778{ 779 int base, sibling, i; 780 781 /* Verify that all the threads in the core belong to the same node */ 782 base = cpu_first_thread_sibling(cpu); 783 784 for (i = 0; i < threads_per_core; i++) { 785 sibling = base + i; 786 787 if (sibling == cpu || cpu_is_offline(sibling)) 788 continue; 789 790 if (cpu_to_node(sibling) != node) { 791 WARN(1, "CPU thread siblings %d and %d don't belong" 792 " to the same node!\n", cpu, sibling); 793 break; 794 } 795 } 796} 797 798/* Must run before sched domains notifier. */ 799static int ppc_numa_cpu_prepare(unsigned int cpu) 800{ 801 int nid; 802 803 nid = numa_setup_cpu(cpu); 804 verify_cpu_node_mapping(cpu, nid); 805 return 0; 806} 807 808static int ppc_numa_cpu_dead(unsigned int cpu) 809{ 810 return 0; 811} 812 813/* 814 * Check and possibly modify a memory region to enforce the memory limit. 815 * 816 * Returns the size the region should have to enforce the memory limit. 817 * This will either be the original value of size, a truncated value, 818 * or zero. If the returned value of size is 0 the region should be 819 * discarded as it lies wholly above the memory limit. 820 */ 821static unsigned long __init numa_enforce_memory_limit(unsigned long start, 822 unsigned long size) 823{ 824 /* 825 * We use memblock_end_of_DRAM() in here instead of memory_limit because 826 * we've already adjusted it for the limit and it takes care of 827 * having memory holes below the limit. Also, in the case of 828 * iommu_is_off, memory_limit is not set but is implicitly enforced. 829 */ 830 831 if (start + size <= memblock_end_of_DRAM()) 832 return size; 833 834 if (start >= memblock_end_of_DRAM()) 835 return 0; 836 837 return memblock_end_of_DRAM() - start; 838} 839 840/* 841 * Reads the counter for a given entry in 842 * linux,drconf-usable-memory property 843 */ 844static inline int __init read_usm_ranges(const __be32 **usm) 845{ 846 /* 847 * For each lmb in ibm,dynamic-memory a corresponding 848 * entry in linux,drconf-usable-memory property contains 849 * a counter followed by that many (base, size) duple. 850 * read the counter from linux,drconf-usable-memory 851 */ 852 return read_n_cells(n_mem_size_cells, usm); 853} 854 855/* 856 * Extract NUMA information from the ibm,dynamic-reconfiguration-memory 857 * node. This assumes n_mem_{addr,size}_cells have been set. 858 */ 859static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, 860 const __be32 **usm, 861 void *data) 862{ 863 unsigned int ranges, is_kexec_kdump = 0; 864 unsigned long base, size, sz; 865 int nid; 866 867 /* 868 * Skip this block if the reserved bit is set in flags (0x80) 869 * or if the block is not assigned to this partition (0x8) 870 */ 871 if ((lmb->flags & DRCONF_MEM_RESERVED) 872 || !(lmb->flags & DRCONF_MEM_ASSIGNED)) 873 return 0; 874 875 if (*usm) 876 is_kexec_kdump = 1; 877 878 base = lmb->base_addr; 879 size = drmem_lmb_size(); 880 ranges = 1; 881 882 if (is_kexec_kdump) { 883 ranges = read_usm_ranges(usm); 884 if (!ranges) /* there are no (base, size) duple */ 885 return 0; 886 } 887 888 do { 889 if (is_kexec_kdump) { 890 base = read_n_cells(n_mem_addr_cells, usm); 891 size = read_n_cells(n_mem_size_cells, usm); 892 } 893 894 nid = get_nid_and_numa_distance(lmb); 895 fake_numa_create_new_node(((base + size) >> PAGE_SHIFT), 896 &nid); 897 node_set_online(nid); 898 sz = numa_enforce_memory_limit(base, size); 899 if (sz) 900 memblock_set_node(base, sz, &memblock.memory, nid); 901 } while (--ranges); 902 903 return 0; 904} 905 906static int __init parse_numa_properties(void) 907{ 908 struct device_node *memory; 909 int default_nid = 0; 910 unsigned long i; 911 const __be32 *associativity; 912 913 if (numa_enabled == 0) { 914 printk(KERN_WARNING "NUMA disabled by user\n"); 915 return -1; 916 } 917 918 primary_domain_index = find_primary_domain_index(); 919 920 if (primary_domain_index < 0) { 921 /* 922 * if we fail to parse primary_domain_index from device tree 923 * mark the numa disabled, boot with numa disabled. 924 */ 925 numa_enabled = false; 926 return primary_domain_index; 927 } 928 929 dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index); 930 931 /* 932 * If it is FORM2 initialize the distance table here. 933 */ 934 if (affinity_form == FORM2_AFFINITY) 935 initialize_form2_numa_distance_lookup_table(); 936 937 /* 938 * Even though we connect cpus to numa domains later in SMP 939 * init, we need to know the node ids now. This is because 940 * each node to be onlined must have NODE_DATA etc backing it. 941 */ 942 for_each_present_cpu(i) { 943 __be32 vphn_assoc[VPHN_ASSOC_BUFSIZE]; 944 struct device_node *cpu; 945 int nid = NUMA_NO_NODE; 946 947 memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32)); 948 949 if (__vphn_get_associativity(i, vphn_assoc) == 0) { 950 nid = associativity_to_nid(vphn_assoc); 951 initialize_form1_numa_distance(vphn_assoc); 952 } else { 953 954 /* 955 * Don't fall back to default_nid yet -- we will plug 956 * cpus into nodes once the memory scan has discovered 957 * the topology. 958 */ 959 cpu = of_get_cpu_node(i, NULL); 960 BUG_ON(!cpu); 961 962 associativity = of_get_associativity(cpu); 963 if (associativity) { 964 nid = associativity_to_nid(associativity); 965 initialize_form1_numa_distance(associativity); 966 } 967 of_node_put(cpu); 968 } 969 970 /* node_set_online() is an UB if 'nid' is negative */ 971 if (likely(nid >= 0)) 972 node_set_online(nid); 973 } 974 975 get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); 976 977 for_each_node_by_type(memory, "memory") { 978 unsigned long start; 979 unsigned long size; 980 int nid; 981 int ranges; 982 const __be32 *memcell_buf; 983 unsigned int len; 984 985 memcell_buf = of_get_property(memory, 986 "linux,usable-memory", &len); 987 if (!memcell_buf || len <= 0) 988 memcell_buf = of_get_property(memory, "reg", &len); 989 if (!memcell_buf || len <= 0) 990 continue; 991 992 /* ranges in cell */ 993 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 994new_range: 995 /* these are order-sensitive, and modify the buffer pointer */ 996 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 997 size = read_n_cells(n_mem_size_cells, &memcell_buf); 998 999 /* 1000 * Assumption: either all memory nodes or none will 1001 * have associativity properties. If none, then 1002 * everything goes to default_nid. 1003 */ 1004 associativity = of_get_associativity(memory); 1005 if (associativity) { 1006 nid = associativity_to_nid(associativity); 1007 initialize_form1_numa_distance(associativity); 1008 } else 1009 nid = default_nid; 1010 1011 fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); 1012 node_set_online(nid); 1013 1014 size = numa_enforce_memory_limit(start, size); 1015 if (size) 1016 memblock_set_node(start, size, &memblock.memory, nid); 1017 1018 if (--ranges) 1019 goto new_range; 1020 } 1021 1022 /* 1023 * Now do the same thing for each MEMBLOCK listed in the 1024 * ibm,dynamic-memory property in the 1025 * ibm,dynamic-reconfiguration-memory node. 1026 */ 1027 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 1028 if (memory) { 1029 walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb); 1030 of_node_put(memory); 1031 } 1032 1033 return 0; 1034} 1035 1036static void __init setup_nonnuma(void) 1037{ 1038 unsigned long top_of_ram = memblock_end_of_DRAM(); 1039 unsigned long total_ram = memblock_phys_mem_size(); 1040 unsigned long start_pfn, end_pfn; 1041 unsigned int nid = 0; 1042 int i; 1043 1044 printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", 1045 top_of_ram, total_ram); 1046 printk(KERN_DEBUG "Memory hole size: %ldMB\n", 1047 (top_of_ram - total_ram) >> 20); 1048 1049 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { 1050 fake_numa_create_new_node(end_pfn, &nid); 1051 memblock_set_node(PFN_PHYS(start_pfn), 1052 PFN_PHYS(end_pfn - start_pfn), 1053 &memblock.memory, nid); 1054 node_set_online(nid); 1055 } 1056} 1057 1058void __init dump_numa_cpu_topology(void) 1059{ 1060 unsigned int node; 1061 unsigned int cpu, count; 1062 1063 if (!numa_enabled) 1064 return; 1065 1066 for_each_online_node(node) { 1067 pr_info("Node %d CPUs:", node); 1068 1069 count = 0; 1070 /* 1071 * If we used a CPU iterator here we would miss printing 1072 * the holes in the cpumap. 1073 */ 1074 for (cpu = 0; cpu < nr_cpu_ids; cpu++) { 1075 if (cpumask_test_cpu(cpu, 1076 node_to_cpumask_map[node])) { 1077 if (count == 0) 1078 pr_cont(" %u", cpu); 1079 ++count; 1080 } else { 1081 if (count > 1) 1082 pr_cont("-%u", cpu - 1); 1083 count = 0; 1084 } 1085 } 1086 1087 if (count > 1) 1088 pr_cont("-%u", nr_cpu_ids - 1); 1089 pr_cont("\n"); 1090 } 1091} 1092 1093/* Initialize NODE_DATA for a node on the local memory */ 1094static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) 1095{ 1096 u64 spanned_pages = end_pfn - start_pfn; 1097 const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); 1098 u64 nd_pa; 1099 void *nd; 1100 int tnid; 1101 1102 nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); 1103 if (!nd_pa) 1104 panic("Cannot allocate %zu bytes for node %d data\n", 1105 nd_size, nid); 1106 1107 nd = __va(nd_pa); 1108 1109 /* report and initialize */ 1110 pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n", 1111 nd_pa, nd_pa + nd_size - 1); 1112 tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); 1113 if (tnid != nid) 1114 pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); 1115 1116 node_data[nid] = nd; 1117 memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); 1118 NODE_DATA(nid)->node_id = nid; 1119 NODE_DATA(nid)->node_start_pfn = start_pfn; 1120 NODE_DATA(nid)->node_spanned_pages = spanned_pages; 1121} 1122 1123static void __init find_possible_nodes(void) 1124{ 1125 struct device_node *rtas; 1126 const __be32 *domains = NULL; 1127 int prop_length, max_nodes; 1128 u32 i; 1129 1130 if (!numa_enabled) 1131 return; 1132 1133 rtas = of_find_node_by_path("/rtas"); 1134 if (!rtas) 1135 return; 1136 1137 /* 1138 * ibm,current-associativity-domains is a fairly recent property. If 1139 * it doesn't exist, then fallback on ibm,max-associativity-domains. 1140 * Current denotes what the platform can support compared to max 1141 * which denotes what the Hypervisor can support. 1142 * 1143 * If the LPAR is migratable, new nodes might be activated after a LPM, 1144 * so we should consider the max number in that case. 1145 */ 1146 if (!of_get_property(of_root, "ibm,migratable-partition", NULL)) 1147 domains = of_get_property(rtas, 1148 "ibm,current-associativity-domains", 1149 &prop_length); 1150 if (!domains) { 1151 domains = of_get_property(rtas, "ibm,max-associativity-domains", 1152 &prop_length); 1153 if (!domains) 1154 goto out; 1155 } 1156 1157 max_nodes = of_read_number(&domains[primary_domain_index], 1); 1158 pr_info("Partition configured for %d NUMA nodes.\n", max_nodes); 1159 1160 for (i = 0; i < max_nodes; i++) { 1161 if (!node_possible(i)) 1162 node_set(i, node_possible_map); 1163 } 1164 1165 prop_length /= sizeof(int); 1166 if (prop_length > primary_domain_index + 2) 1167 coregroup_enabled = 1; 1168 1169out: 1170 of_node_put(rtas); 1171} 1172 1173void __init mem_topology_setup(void) 1174{ 1175 int cpu; 1176 1177 /* 1178 * Linux/mm assumes node 0 to be online at boot. However this is not 1179 * true on PowerPC, where node 0 is similar to any other node, it 1180 * could be cpuless, memoryless node. So force node 0 to be offline 1181 * for now. This will prevent cpuless, memoryless node 0 showing up 1182 * unnecessarily as online. If a node has cpus or memory that need 1183 * to be online, then node will anyway be marked online. 1184 */ 1185 node_set_offline(0); 1186 1187 if (parse_numa_properties()) 1188 setup_nonnuma(); 1189 1190 /* 1191 * Modify the set of possible NUMA nodes to reflect information 1192 * available about the set of online nodes, and the set of nodes 1193 * that we expect to make use of for this platform's affinity 1194 * calculations. 1195 */ 1196 nodes_and(node_possible_map, node_possible_map, node_online_map); 1197 1198 find_possible_nodes(); 1199 1200 setup_node_to_cpumask_map(); 1201 1202 reset_numa_cpu_lookup_table(); 1203 1204 for_each_possible_cpu(cpu) { 1205 /* 1206 * Powerpc with CONFIG_NUMA always used to have a node 0, 1207 * even if it was memoryless or cpuless. For all cpus that 1208 * are possible but not present, cpu_to_node() would point 1209 * to node 0. To remove a cpuless, memoryless dummy node, 1210 * powerpc need to make sure all possible but not present 1211 * cpu_to_node are set to a proper node. 1212 */ 1213 numa_setup_cpu(cpu); 1214 } 1215} 1216 1217void __init initmem_init(void) 1218{ 1219 int nid; 1220 1221 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 1222 max_pfn = max_low_pfn; 1223 1224 memblock_dump_all(); 1225 1226 for_each_online_node(nid) { 1227 unsigned long start_pfn, end_pfn; 1228 1229 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 1230 setup_node_data(nid, start_pfn, end_pfn); 1231 } 1232 1233 sparse_init(); 1234 1235 /* 1236 * We need the numa_cpu_lookup_table to be accurate for all CPUs, 1237 * even before we online them, so that we can use cpu_to_{node,mem} 1238 * early in boot, cf. smp_prepare_cpus(). 1239 * _nocalls() + manual invocation is used because cpuhp is not yet 1240 * initialized for the boot CPU. 1241 */ 1242 cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare", 1243 ppc_numa_cpu_prepare, ppc_numa_cpu_dead); 1244} 1245 1246static int __init early_numa(char *p) 1247{ 1248 if (!p) 1249 return 0; 1250 1251 if (strstr(p, "off")) 1252 numa_enabled = 0; 1253 1254 if (strstr(p, "debug")) 1255 numa_debug = 1; 1256 1257 p = strstr(p, "fake="); 1258 if (p) 1259 cmdline = p + strlen("fake="); 1260 1261 return 0; 1262} 1263early_param("numa", early_numa); 1264 1265#ifdef CONFIG_MEMORY_HOTPLUG 1266/* 1267 * Find the node associated with a hot added memory section for 1268 * memory represented in the device tree by the property 1269 * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory. 1270 */ 1271static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) 1272{ 1273 struct drmem_lmb *lmb; 1274 unsigned long lmb_size; 1275 int nid = NUMA_NO_NODE; 1276 1277 lmb_size = drmem_lmb_size(); 1278 1279 for_each_drmem_lmb(lmb) { 1280 /* skip this block if it is reserved or not assigned to 1281 * this partition */ 1282 if ((lmb->flags & DRCONF_MEM_RESERVED) 1283 || !(lmb->flags & DRCONF_MEM_ASSIGNED)) 1284 continue; 1285 1286 if ((scn_addr < lmb->base_addr) 1287 || (scn_addr >= (lmb->base_addr + lmb_size))) 1288 continue; 1289 1290 nid = of_drconf_to_nid_single(lmb); 1291 break; 1292 } 1293 1294 return nid; 1295} 1296 1297/* 1298 * Find the node associated with a hot added memory section for memory 1299 * represented in the device tree as a node (i.e. memory@XXXX) for 1300 * each memblock. 1301 */ 1302static int hot_add_node_scn_to_nid(unsigned long scn_addr) 1303{ 1304 struct device_node *memory; 1305 int nid = NUMA_NO_NODE; 1306 1307 for_each_node_by_type(memory, "memory") { 1308 unsigned long start, size; 1309 int ranges; 1310 const __be32 *memcell_buf; 1311 unsigned int len; 1312 1313 memcell_buf = of_get_property(memory, "reg", &len); 1314 if (!memcell_buf || len <= 0) 1315 continue; 1316 1317 /* ranges in cell */ 1318 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 1319 1320 while (ranges--) { 1321 start = read_n_cells(n_mem_addr_cells, &memcell_buf); 1322 size = read_n_cells(n_mem_size_cells, &memcell_buf); 1323 1324 if ((scn_addr < start) || (scn_addr >= (start + size))) 1325 continue; 1326 1327 nid = of_node_to_nid_single(memory); 1328 break; 1329 } 1330 1331 if (nid >= 0) 1332 break; 1333 } 1334 1335 of_node_put(memory); 1336 1337 return nid; 1338} 1339 1340/* 1341 * Find the node associated with a hot added memory section. Section 1342 * corresponds to a SPARSEMEM section, not an MEMBLOCK. It is assumed that 1343 * sections are fully contained within a single MEMBLOCK. 1344 */ 1345int hot_add_scn_to_nid(unsigned long scn_addr) 1346{ 1347 struct device_node *memory = NULL; 1348 int nid; 1349 1350 if (!numa_enabled) 1351 return first_online_node; 1352 1353 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 1354 if (memory) { 1355 nid = hot_add_drconf_scn_to_nid(scn_addr); 1356 of_node_put(memory); 1357 } else { 1358 nid = hot_add_node_scn_to_nid(scn_addr); 1359 } 1360 1361 if (nid < 0 || !node_possible(nid)) 1362 nid = first_online_node; 1363 1364 return nid; 1365} 1366 1367static u64 hot_add_drconf_memory_max(void) 1368{ 1369 struct device_node *memory = NULL; 1370 struct device_node *dn = NULL; 1371 const __be64 *lrdr = NULL; 1372 1373 dn = of_find_node_by_path("/rtas"); 1374 if (dn) { 1375 lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL); 1376 of_node_put(dn); 1377 if (lrdr) 1378 return be64_to_cpup(lrdr); 1379 } 1380 1381 memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 1382 if (memory) { 1383 of_node_put(memory); 1384 return drmem_lmb_memory_max(); 1385 } 1386 return 0; 1387} 1388 1389/* 1390 * memory_hotplug_max - return max address of memory that may be added 1391 * 1392 * This is currently only used on systems that support drconfig memory 1393 * hotplug. 1394 */ 1395u64 memory_hotplug_max(void) 1396{ 1397 return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM()); 1398} 1399#endif /* CONFIG_MEMORY_HOTPLUG */ 1400 1401/* Virtual Processor Home Node (VPHN) support */ 1402#ifdef CONFIG_PPC_SPLPAR 1403static int topology_inited; 1404 1405/* 1406 * Retrieve the new associativity information for a virtual processor's 1407 * home node. 1408 */ 1409static long vphn_get_associativity(unsigned long cpu, 1410 __be32 *associativity) 1411{ 1412 long rc; 1413 1414 rc = hcall_vphn(get_hard_smp_processor_id(cpu), 1415 VPHN_FLAG_VCPU, associativity); 1416 1417 switch (rc) { 1418 case H_SUCCESS: 1419 dbg("VPHN hcall succeeded. Reset polling...\n"); 1420 goto out; 1421 1422 case H_FUNCTION: 1423 pr_err_ratelimited("VPHN unsupported. Disabling polling...\n"); 1424 break; 1425 case H_HARDWARE: 1426 pr_err_ratelimited("hcall_vphn() experienced a hardware fault " 1427 "preventing VPHN. Disabling polling...\n"); 1428 break; 1429 case H_PARAMETER: 1430 pr_err_ratelimited("hcall_vphn() was passed an invalid parameter. " 1431 "Disabling polling...\n"); 1432 break; 1433 default: 1434 pr_err_ratelimited("hcall_vphn() returned %ld. Disabling polling...\n" 1435 , rc); 1436 break; 1437 } 1438out: 1439 return rc; 1440} 1441 1442int find_and_online_cpu_nid(int cpu) 1443{ 1444 __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; 1445 int new_nid; 1446 1447 /* Use associativity from first thread for all siblings */ 1448 if (vphn_get_associativity(cpu, associativity)) 1449 return cpu_to_node(cpu); 1450 1451 new_nid = associativity_to_nid(associativity); 1452 if (new_nid < 0 || !node_possible(new_nid)) 1453 new_nid = first_online_node; 1454 1455 if (NODE_DATA(new_nid) == NULL) { 1456#ifdef CONFIG_MEMORY_HOTPLUG 1457 /* 1458 * Need to ensure that NODE_DATA is initialized for a node from 1459 * available memory (see memblock_alloc_try_nid). If unable to 1460 * init the node, then default to nearest node that has memory 1461 * installed. Skip onlining a node if the subsystems are not 1462 * yet initialized. 1463 */ 1464 if (!topology_inited || try_online_node(new_nid)) 1465 new_nid = first_online_node; 1466#else 1467 /* 1468 * Default to using the nearest node that has memory installed. 1469 * Otherwise, it would be necessary to patch the kernel MM code 1470 * to deal with more memoryless-node error conditions. 1471 */ 1472 new_nid = first_online_node; 1473#endif 1474 } 1475 1476 pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__, 1477 cpu, new_nid); 1478 return new_nid; 1479} 1480 1481int cpu_to_coregroup_id(int cpu) 1482{ 1483 __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; 1484 int index; 1485 1486 if (cpu < 0 || cpu > nr_cpu_ids) 1487 return -1; 1488 1489 if (!coregroup_enabled) 1490 goto out; 1491 1492 if (!firmware_has_feature(FW_FEATURE_VPHN)) 1493 goto out; 1494 1495 if (vphn_get_associativity(cpu, associativity)) 1496 goto out; 1497 1498 index = of_read_number(associativity, 1); 1499 if (index > primary_domain_index + 1) 1500 return of_read_number(&associativity[index - 1], 1); 1501 1502out: 1503 return cpu_to_core_id(cpu); 1504} 1505 1506static int topology_update_init(void) 1507{ 1508 topology_inited = 1; 1509 return 0; 1510} 1511device_initcall(topology_update_init); 1512#endif /* CONFIG_PPC_SPLPAR */ 1513