1// SPDX-License-Identifier: GPL-2.0 2/* 3 * CPU subsystem support 4 */ 5 6#include <linux/kernel.h> 7#include <linux/module.h> 8#include <linux/init.h> 9#include <linux/sched.h> 10#include <linux/cpu.h> 11#include <linux/topology.h> 12#include <linux/device.h> 13#include <linux/node.h> 14#include <linux/gfp.h> 15#include <linux/slab.h> 16#include <linux/percpu.h> 17#include <linux/acpi.h> 18#include <linux/of.h> 19#include <linux/cpufeature.h> 20#include <linux/tick.h> 21#include <linux/pm_qos.h> 22#include <linux/sched/isolation.h> 23 24#include "base.h" 25 26static DEFINE_PER_CPU(struct device *, cpu_sys_devices); 27 28static int cpu_subsys_match(struct device *dev, struct device_driver *drv) 29{ 30 /* ACPI style match is the only one that may succeed. */ 31 if (acpi_driver_match_device(dev, drv)) 32 return 1; 33 34 return 0; 35} 36 37#ifdef CONFIG_HOTPLUG_CPU 38static void change_cpu_under_node(struct cpu *cpu, 39 unsigned int from_nid, unsigned int to_nid) 40{ 41 int cpuid = cpu->dev.id; 42 unregister_cpu_under_node(cpuid, from_nid); 43 register_cpu_under_node(cpuid, to_nid); 44 cpu->node_id = to_nid; 45} 46 47static int cpu_subsys_online(struct device *dev) 48{ 49 struct cpu *cpu = container_of(dev, struct cpu, dev); 50 int cpuid = dev->id; 51 int from_nid, to_nid; 52 int ret; 53 54 from_nid = cpu_to_node(cpuid); 55 if (from_nid == NUMA_NO_NODE) 56 return -ENODEV; 57 58 ret = cpu_device_up(dev); 59 /* 60 * When hot adding memory to memoryless node and enabling a cpu 61 * on the node, node number of the cpu may internally change. 62 */ 63 to_nid = cpu_to_node(cpuid); 64 if (from_nid != to_nid) 65 change_cpu_under_node(cpu, from_nid, to_nid); 66 67 return ret; 68} 69 70static int cpu_subsys_offline(struct device *dev) 71{ 72 return cpu_device_down(dev); 73} 74 75void unregister_cpu(struct cpu *cpu) 76{ 77 int logical_cpu = cpu->dev.id; 78 79 unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu)); 80 81 device_unregister(&cpu->dev); 82 per_cpu(cpu_sys_devices, logical_cpu) = NULL; 83 return; 84} 85 86#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE 87static ssize_t cpu_probe_store(struct device *dev, 88 struct device_attribute *attr, 89 const char *buf, 90 size_t count) 91{ 92 ssize_t cnt; 93 int ret; 94 95 ret = lock_device_hotplug_sysfs(); 96 if (ret) 97 return ret; 98 99 cnt = arch_cpu_probe(buf, count); 100 101 unlock_device_hotplug(); 102 return cnt; 103} 104 105static ssize_t cpu_release_store(struct device *dev, 106 struct device_attribute *attr, 107 const char *buf, 108 size_t count) 109{ 110 ssize_t cnt; 111 int ret; 112 113 ret = lock_device_hotplug_sysfs(); 114 if (ret) 115 return ret; 116 117 cnt = arch_cpu_release(buf, count); 118 119 unlock_device_hotplug(); 120 return cnt; 121} 122 123static DEVICE_ATTR(probe, S_IWUSR, NULL, cpu_probe_store); 124static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store); 125#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ 126#endif /* CONFIG_HOTPLUG_CPU */ 127 128struct bus_type cpu_subsys = { 129 .name = "cpu", 130 .dev_name = "cpu", 131 .match = cpu_subsys_match, 132#ifdef CONFIG_HOTPLUG_CPU 133 .online = cpu_subsys_online, 134 .offline = cpu_subsys_offline, 135#endif 136}; 137EXPORT_SYMBOL_GPL(cpu_subsys); 138 139#ifdef CONFIG_KEXEC 140#include <linux/kexec.h> 141 142static ssize_t crash_notes_show(struct device *dev, 143 struct device_attribute *attr, 144 char *buf) 145{ 146 struct cpu *cpu = container_of(dev, struct cpu, dev); 147 unsigned long long addr; 148 int cpunum; 149 150 cpunum = cpu->dev.id; 151 152 /* 153 * Might be reading other cpu's data based on which cpu read thread 154 * has been scheduled. But cpu data (memory) is allocated once during 155 * boot up and this data does not change there after. Hence this 156 * operation should be safe. No locking required. 157 */ 158 addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum)); 159 160 return sysfs_emit(buf, "%llx\n", addr); 161} 162static DEVICE_ATTR_ADMIN_RO(crash_notes); 163 164static ssize_t crash_notes_size_show(struct device *dev, 165 struct device_attribute *attr, 166 char *buf) 167{ 168 return sysfs_emit(buf, "%zu\n", sizeof(note_buf_t)); 169} 170static DEVICE_ATTR_ADMIN_RO(crash_notes_size); 171 172static struct attribute *crash_note_cpu_attrs[] = { 173 &dev_attr_crash_notes.attr, 174 &dev_attr_crash_notes_size.attr, 175 NULL 176}; 177 178static struct attribute_group crash_note_cpu_attr_group = { 179 .attrs = crash_note_cpu_attrs, 180}; 181#endif 182 183#ifdef CONFIG_CPU_ISOLATION_OPT 184static ssize_t isolate_show(struct device *dev, 185 struct device_attribute *attr, char *buf) 186{ 187 struct cpu *cpu = container_of(dev, struct cpu, dev); 188 ssize_t rc; 189 int cpuid = cpu->dev.id; 190 unsigned int isolated = cpu_isolated(cpuid); 191 192 rc = sysfs_emit(buf, "%d\n", isolated); 193 194 return rc; 195} 196 197static DEVICE_ATTR_RO(isolate); 198 199static struct attribute *cpu_isolated_attrs[] = { 200 &dev_attr_isolate.attr, 201 NULL 202}; 203 204static struct attribute_group cpu_isolated_attr_group = { 205 .attrs = cpu_isolated_attrs, 206}; 207#endif 208 209static const struct attribute_group *common_cpu_attr_groups[] = { 210#ifdef CONFIG_KEXEC 211 &crash_note_cpu_attr_group, 212#endif 213#ifdef CONFIG_CPU_ISOLATION_OPT 214 &cpu_isolated_attr_group, 215#endif 216 NULL 217}; 218 219static const struct attribute_group *hotplugable_cpu_attr_groups[] = { 220#ifdef CONFIG_KEXEC 221 &crash_note_cpu_attr_group, 222#endif 223#ifdef CONFIG_CPU_ISOLATION_OPT 224 &cpu_isolated_attr_group, 225#endif 226 NULL 227}; 228 229/* 230 * Print cpu online, possible, present, and system maps 231 */ 232 233struct cpu_attr { 234 struct device_attribute attr; 235 const struct cpumask *const map; 236}; 237 238static ssize_t show_cpus_attr(struct device *dev, 239 struct device_attribute *attr, 240 char *buf) 241{ 242 struct cpu_attr *ca = container_of(attr, struct cpu_attr, attr); 243 244 return cpumap_print_to_pagebuf(true, buf, ca->map); 245} 246 247#define _CPU_ATTR(name, map) \ 248 { __ATTR(name, 0444, show_cpus_attr, NULL), map } 249 250/* Keep in sync with cpu_subsys_attrs */ 251static struct cpu_attr cpu_attrs[] = { 252 _CPU_ATTR(online, &__cpu_online_mask), 253 _CPU_ATTR(possible, &__cpu_possible_mask), 254 _CPU_ATTR(present, &__cpu_present_mask), 255#ifdef CONFIG_CPU_ISOLATION_OPT 256 _CPU_ATTR(core_ctl_isolated, &__cpu_isolated_mask), 257#endif 258}; 259 260/* 261 * Print values for NR_CPUS and offlined cpus 262 */ 263static ssize_t print_cpus_kernel_max(struct device *dev, 264 struct device_attribute *attr, char *buf) 265{ 266 return sysfs_emit(buf, "%d\n", NR_CPUS - 1); 267} 268static DEVICE_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL); 269 270/* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */ 271unsigned int total_cpus; 272 273static ssize_t print_cpus_offline(struct device *dev, 274 struct device_attribute *attr, char *buf) 275{ 276 int len = 0; 277 cpumask_var_t offline; 278 279 /* display offline cpus < nr_cpu_ids */ 280 if (!alloc_cpumask_var(&offline, GFP_KERNEL)) 281 return -ENOMEM; 282 cpumask_andnot(offline, cpu_possible_mask, cpu_online_mask); 283 len += sysfs_emit_at(buf, len, "%*pbl", cpumask_pr_args(offline)); 284 free_cpumask_var(offline); 285 286 /* display offline cpus >= nr_cpu_ids */ 287 if (total_cpus && nr_cpu_ids < total_cpus) { 288 len += sysfs_emit_at(buf, len, ","); 289 290 if (nr_cpu_ids == total_cpus-1) 291 len += sysfs_emit_at(buf, len, "%u", nr_cpu_ids); 292 else 293 len += sysfs_emit_at(buf, len, "%u-%d", 294 nr_cpu_ids, total_cpus - 1); 295 } 296 297 len += sysfs_emit_at(buf, len, "\n"); 298 299 return len; 300} 301static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL); 302 303static ssize_t print_cpus_isolated(struct device *dev, 304 struct device_attribute *attr, char *buf) 305{ 306 int len; 307 cpumask_var_t isolated; 308 309 if (!alloc_cpumask_var(&isolated, GFP_KERNEL)) 310 return -ENOMEM; 311 312 cpumask_andnot(isolated, cpu_possible_mask, 313 housekeeping_cpumask(HK_FLAG_DOMAIN)); 314 len = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated)); 315 316 free_cpumask_var(isolated); 317 318 return len; 319} 320static DEVICE_ATTR(isolated, 0444, print_cpus_isolated, NULL); 321 322#ifdef CONFIG_NO_HZ_FULL 323static ssize_t print_cpus_nohz_full(struct device *dev, 324 struct device_attribute *attr, char *buf) 325{ 326 return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask)); 327} 328static DEVICE_ATTR(nohz_full, 0444, print_cpus_nohz_full, NULL); 329#endif 330 331static void cpu_device_release(struct device *dev) 332{ 333 /* 334 * This is an empty function to prevent the driver core from spitting a 335 * warning at us. Yes, I know this is directly opposite of what the 336 * documentation for the driver core and kobjects say, and the author 337 * of this code has already been publically ridiculed for doing 338 * something as foolish as this. However, at this point in time, it is 339 * the only way to handle the issue of statically allocated cpu 340 * devices. The different architectures will have their cpu device 341 * code reworked to properly handle this in the near future, so this 342 * function will then be changed to correctly free up the memory held 343 * by the cpu device. 344 * 345 * Never copy this way of doing things, or you too will be made fun of 346 * on the linux-kernel list, you have been warned. 347 */ 348} 349 350#ifdef CONFIG_GENERIC_CPU_AUTOPROBE 351static ssize_t print_cpu_modalias(struct device *dev, 352 struct device_attribute *attr, 353 char *buf) 354{ 355 int len = 0; 356 u32 i; 357 358 len += sysfs_emit_at(buf, len, 359 "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:", 360 CPU_FEATURE_TYPEVAL); 361 362 for (i = 0; i < MAX_CPU_FEATURES; i++) 363 if (cpu_have_feature(i)) { 364 if (len + sizeof(",XXXX\n") >= PAGE_SIZE) { 365 WARN(1, "CPU features overflow page\n"); 366 break; 367 } 368 len += sysfs_emit_at(buf, len, ",%04X", i); 369 } 370 len += sysfs_emit_at(buf, len, "\n"); 371 return len; 372} 373 374static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env) 375{ 376 char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); 377 if (buf) { 378 print_cpu_modalias(NULL, NULL, buf); 379 add_uevent_var(env, "MODALIAS=%s", buf); 380 kfree(buf); 381 } 382 return 0; 383} 384#endif 385 386/* 387 * register_cpu - Setup a sysfs device for a CPU. 388 * @cpu - cpu->hotpluggable field set to 1 will generate a control file in 389 * sysfs for this CPU. 390 * @num - CPU number to use when creating the device. 391 * 392 * Initialize and register the CPU device. 393 */ 394int register_cpu(struct cpu *cpu, int num) 395{ 396 int error; 397 398 cpu->node_id = cpu_to_node(num); 399 memset(&cpu->dev, 0x00, sizeof(struct device)); 400 cpu->dev.id = num; 401 cpu->dev.bus = &cpu_subsys; 402 cpu->dev.release = cpu_device_release; 403 cpu->dev.offline_disabled = !cpu->hotpluggable; 404 cpu->dev.offline = !cpu_online(num); 405 cpu->dev.of_node = of_get_cpu_node(num, NULL); 406#ifdef CONFIG_GENERIC_CPU_AUTOPROBE 407 cpu->dev.bus->uevent = cpu_uevent; 408#endif 409 cpu->dev.groups = common_cpu_attr_groups; 410 if (cpu->hotpluggable) 411 cpu->dev.groups = hotplugable_cpu_attr_groups; 412 error = device_register(&cpu->dev); 413 if (error) { 414 put_device(&cpu->dev); 415 return error; 416 } 417 418 per_cpu(cpu_sys_devices, num) = &cpu->dev; 419 register_cpu_under_node(num, cpu_to_node(num)); 420 dev_pm_qos_expose_latency_limit(&cpu->dev, 421 PM_QOS_RESUME_LATENCY_NO_CONSTRAINT); 422 423 return 0; 424} 425 426struct device *get_cpu_device(unsigned cpu) 427{ 428 if (cpu < nr_cpu_ids && cpu_possible(cpu)) 429 return per_cpu(cpu_sys_devices, cpu); 430 else 431 return NULL; 432} 433EXPORT_SYMBOL_GPL(get_cpu_device); 434 435static void device_create_release(struct device *dev) 436{ 437 kfree(dev); 438} 439 440__printf(4, 0) 441static struct device * 442__cpu_device_create(struct device *parent, void *drvdata, 443 const struct attribute_group **groups, 444 const char *fmt, va_list args) 445{ 446 struct device *dev = NULL; 447 int retval = -ENODEV; 448 449 dev = kzalloc(sizeof(*dev), GFP_KERNEL); 450 if (!dev) { 451 retval = -ENOMEM; 452 goto error; 453 } 454 455 device_initialize(dev); 456 dev->parent = parent; 457 dev->groups = groups; 458 dev->release = device_create_release; 459 device_set_pm_not_required(dev); 460 dev_set_drvdata(dev, drvdata); 461 462 retval = kobject_set_name_vargs(&dev->kobj, fmt, args); 463 if (retval) 464 goto error; 465 466 retval = device_add(dev); 467 if (retval) 468 goto error; 469 470 return dev; 471 472error: 473 put_device(dev); 474 return ERR_PTR(retval); 475} 476 477struct device *cpu_device_create(struct device *parent, void *drvdata, 478 const struct attribute_group **groups, 479 const char *fmt, ...) 480{ 481 va_list vargs; 482 struct device *dev; 483 484 va_start(vargs, fmt); 485 dev = __cpu_device_create(parent, drvdata, groups, fmt, vargs); 486 va_end(vargs); 487 return dev; 488} 489EXPORT_SYMBOL_GPL(cpu_device_create); 490 491#ifdef CONFIG_GENERIC_CPU_AUTOPROBE 492static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL); 493#endif 494 495static struct attribute *cpu_root_attrs[] = { 496#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE 497 &dev_attr_probe.attr, 498 &dev_attr_release.attr, 499#endif 500 &cpu_attrs[0].attr.attr, 501 &cpu_attrs[1].attr.attr, 502 &cpu_attrs[2].attr.attr, 503#ifdef CONFIG_CPU_ISOLATION_OPT 504 &cpu_attrs[3].attr.attr, 505#endif 506 &dev_attr_kernel_max.attr, 507 &dev_attr_offline.attr, 508 &dev_attr_isolated.attr, 509#ifdef CONFIG_NO_HZ_FULL 510 &dev_attr_nohz_full.attr, 511#endif 512#ifdef CONFIG_GENERIC_CPU_AUTOPROBE 513 &dev_attr_modalias.attr, 514#endif 515 NULL 516}; 517 518static struct attribute_group cpu_root_attr_group = { 519 .attrs = cpu_root_attrs, 520}; 521 522static const struct attribute_group *cpu_root_attr_groups[] = { 523 &cpu_root_attr_group, 524 NULL, 525}; 526 527bool cpu_is_hotpluggable(unsigned cpu) 528{ 529 struct device *dev = get_cpu_device(cpu); 530 return dev && container_of(dev, struct cpu, dev)->hotpluggable 531 && tick_nohz_cpu_hotpluggable(cpu); 532} 533EXPORT_SYMBOL_GPL(cpu_is_hotpluggable); 534 535#ifdef CONFIG_GENERIC_CPU_DEVICES 536static DEFINE_PER_CPU(struct cpu, cpu_devices); 537#endif 538 539static void __init cpu_dev_register_generic(void) 540{ 541#ifdef CONFIG_GENERIC_CPU_DEVICES 542 int i; 543 544 for_each_possible_cpu(i) { 545 if (register_cpu(&per_cpu(cpu_devices, i), i)) 546 panic("Failed to register CPU device"); 547 } 548#endif 549} 550 551#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES 552 553ssize_t __weak cpu_show_meltdown(struct device *dev, 554 struct device_attribute *attr, char *buf) 555{ 556 return sysfs_emit(buf, "Not affected\n"); 557} 558 559ssize_t __weak cpu_show_spectre_v1(struct device *dev, 560 struct device_attribute *attr, char *buf) 561{ 562 return sysfs_emit(buf, "Not affected\n"); 563} 564 565ssize_t __weak cpu_show_spectre_v2(struct device *dev, 566 struct device_attribute *attr, char *buf) 567{ 568 return sysfs_emit(buf, "Not affected\n"); 569} 570 571ssize_t __weak cpu_show_spec_store_bypass(struct device *dev, 572 struct device_attribute *attr, char *buf) 573{ 574 return sysfs_emit(buf, "Not affected\n"); 575} 576 577ssize_t __weak cpu_show_l1tf(struct device *dev, 578 struct device_attribute *attr, char *buf) 579{ 580 return sysfs_emit(buf, "Not affected\n"); 581} 582 583ssize_t __weak cpu_show_mds(struct device *dev, 584 struct device_attribute *attr, char *buf) 585{ 586 return sysfs_emit(buf, "Not affected\n"); 587} 588 589ssize_t __weak cpu_show_tsx_async_abort(struct device *dev, 590 struct device_attribute *attr, 591 char *buf) 592{ 593 return sysfs_emit(buf, "Not affected\n"); 594} 595 596ssize_t __weak cpu_show_itlb_multihit(struct device *dev, 597 struct device_attribute *attr, char *buf) 598{ 599 return sysfs_emit(buf, "Not affected\n"); 600} 601 602ssize_t __weak cpu_show_srbds(struct device *dev, 603 struct device_attribute *attr, char *buf) 604{ 605 return sysfs_emit(buf, "Not affected\n"); 606} 607 608ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, 609 struct device_attribute *attr, char *buf) 610{ 611 return sysfs_emit(buf, "Not affected\n"); 612} 613 614ssize_t __weak cpu_show_retbleed(struct device *dev, 615 struct device_attribute *attr, char *buf) 616{ 617 return sysfs_emit(buf, "Not affected\n"); 618} 619 620ssize_t __weak cpu_show_gds(struct device *dev, 621 struct device_attribute *attr, char *buf) 622{ 623 return sysfs_emit(buf, "Not affected\n"); 624} 625 626ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev, 627 struct device_attribute *attr, char *buf) 628{ 629 return sysfs_emit(buf, "Not affected\n"); 630} 631 632ssize_t __weak cpu_show_reg_file_data_sampling(struct device *dev, 633 struct device_attribute *attr, char *buf) 634{ 635 return sysfs_emit(buf, "Not affected\n"); 636} 637 638static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); 639static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); 640static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); 641static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); 642static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); 643static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); 644static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); 645static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); 646static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); 647static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); 648static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); 649static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); 650static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); 651static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); 652 653static struct attribute *cpu_root_vulnerabilities_attrs[] = { 654 &dev_attr_meltdown.attr, 655 &dev_attr_spectre_v1.attr, 656 &dev_attr_spectre_v2.attr, 657 &dev_attr_spec_store_bypass.attr, 658 &dev_attr_l1tf.attr, 659 &dev_attr_mds.attr, 660 &dev_attr_tsx_async_abort.attr, 661 &dev_attr_itlb_multihit.attr, 662 &dev_attr_srbds.attr, 663 &dev_attr_mmio_stale_data.attr, 664 &dev_attr_retbleed.attr, 665 &dev_attr_gather_data_sampling.attr, 666 &dev_attr_spec_rstack_overflow.attr, 667 &dev_attr_reg_file_data_sampling.attr, 668 NULL 669}; 670 671static const struct attribute_group cpu_root_vulnerabilities_group = { 672 .name = "vulnerabilities", 673 .attrs = cpu_root_vulnerabilities_attrs, 674}; 675 676static void __init cpu_register_vulnerabilities(void) 677{ 678 if (sysfs_create_group(&cpu_subsys.dev_root->kobj, 679 &cpu_root_vulnerabilities_group)) 680 pr_err("Unable to register CPU vulnerabilities\n"); 681} 682 683#else 684static inline void cpu_register_vulnerabilities(void) { } 685#endif 686 687void __init cpu_dev_init(void) 688{ 689 if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups)) 690 panic("Failed to register CPU subsystem"); 691 692 cpu_dev_register_generic(); 693 cpu_register_vulnerabilities(); 694} 695