xref: /kernel/linux/linux-5.10/drivers/base/cpu.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * CPU subsystem support
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/init.h>
9#include <linux/sched.h>
10#include <linux/cpu.h>
11#include <linux/topology.h>
12#include <linux/device.h>
13#include <linux/node.h>
14#include <linux/gfp.h>
15#include <linux/slab.h>
16#include <linux/percpu.h>
17#include <linux/acpi.h>
18#include <linux/of.h>
19#include <linux/cpufeature.h>
20#include <linux/tick.h>
21#include <linux/pm_qos.h>
22#include <linux/sched/isolation.h>
23
24#include "base.h"
25
26static DEFINE_PER_CPU(struct device *, cpu_sys_devices);
27
28static int cpu_subsys_match(struct device *dev, struct device_driver *drv)
29{
30	/* ACPI style match is the only one that may succeed. */
31	if (acpi_driver_match_device(dev, drv))
32		return 1;
33
34	return 0;
35}
36
37#ifdef CONFIG_HOTPLUG_CPU
38static void change_cpu_under_node(struct cpu *cpu,
39			unsigned int from_nid, unsigned int to_nid)
40{
41	int cpuid = cpu->dev.id;
42	unregister_cpu_under_node(cpuid, from_nid);
43	register_cpu_under_node(cpuid, to_nid);
44	cpu->node_id = to_nid;
45}
46
47static int cpu_subsys_online(struct device *dev)
48{
49	struct cpu *cpu = container_of(dev, struct cpu, dev);
50	int cpuid = dev->id;
51	int from_nid, to_nid;
52	int ret;
53
54	from_nid = cpu_to_node(cpuid);
55	if (from_nid == NUMA_NO_NODE)
56		return -ENODEV;
57
58	ret = cpu_device_up(dev);
59	/*
60	 * When hot adding memory to memoryless node and enabling a cpu
61	 * on the node, node number of the cpu may internally change.
62	 */
63	to_nid = cpu_to_node(cpuid);
64	if (from_nid != to_nid)
65		change_cpu_under_node(cpu, from_nid, to_nid);
66
67	return ret;
68}
69
70static int cpu_subsys_offline(struct device *dev)
71{
72	return cpu_device_down(dev);
73}
74
75void unregister_cpu(struct cpu *cpu)
76{
77	int logical_cpu = cpu->dev.id;
78
79	unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu));
80
81	device_unregister(&cpu->dev);
82	per_cpu(cpu_sys_devices, logical_cpu) = NULL;
83	return;
84}
85
86#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
87static ssize_t cpu_probe_store(struct device *dev,
88			       struct device_attribute *attr,
89			       const char *buf,
90			       size_t count)
91{
92	ssize_t cnt;
93	int ret;
94
95	ret = lock_device_hotplug_sysfs();
96	if (ret)
97		return ret;
98
99	cnt = arch_cpu_probe(buf, count);
100
101	unlock_device_hotplug();
102	return cnt;
103}
104
105static ssize_t cpu_release_store(struct device *dev,
106				 struct device_attribute *attr,
107				 const char *buf,
108				 size_t count)
109{
110	ssize_t cnt;
111	int ret;
112
113	ret = lock_device_hotplug_sysfs();
114	if (ret)
115		return ret;
116
117	cnt = arch_cpu_release(buf, count);
118
119	unlock_device_hotplug();
120	return cnt;
121}
122
123static DEVICE_ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
124static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store);
125#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
126#endif /* CONFIG_HOTPLUG_CPU */
127
128struct bus_type cpu_subsys = {
129	.name = "cpu",
130	.dev_name = "cpu",
131	.match = cpu_subsys_match,
132#ifdef CONFIG_HOTPLUG_CPU
133	.online = cpu_subsys_online,
134	.offline = cpu_subsys_offline,
135#endif
136};
137EXPORT_SYMBOL_GPL(cpu_subsys);
138
139#ifdef CONFIG_KEXEC
140#include <linux/kexec.h>
141
142static ssize_t crash_notes_show(struct device *dev,
143				struct device_attribute *attr,
144				char *buf)
145{
146	struct cpu *cpu = container_of(dev, struct cpu, dev);
147	unsigned long long addr;
148	int cpunum;
149
150	cpunum = cpu->dev.id;
151
152	/*
153	 * Might be reading other cpu's data based on which cpu read thread
154	 * has been scheduled. But cpu data (memory) is allocated once during
155	 * boot up and this data does not change there after. Hence this
156	 * operation should be safe. No locking required.
157	 */
158	addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum));
159
160	return sysfs_emit(buf, "%llx\n", addr);
161}
162static DEVICE_ATTR_ADMIN_RO(crash_notes);
163
164static ssize_t crash_notes_size_show(struct device *dev,
165				     struct device_attribute *attr,
166				     char *buf)
167{
168	return sysfs_emit(buf, "%zu\n", sizeof(note_buf_t));
169}
170static DEVICE_ATTR_ADMIN_RO(crash_notes_size);
171
172static struct attribute *crash_note_cpu_attrs[] = {
173	&dev_attr_crash_notes.attr,
174	&dev_attr_crash_notes_size.attr,
175	NULL
176};
177
178static struct attribute_group crash_note_cpu_attr_group = {
179	.attrs = crash_note_cpu_attrs,
180};
181#endif
182
183#ifdef CONFIG_CPU_ISOLATION_OPT
184static ssize_t isolate_show(struct device *dev,
185			    struct device_attribute *attr, char *buf)
186{
187	struct cpu *cpu = container_of(dev, struct cpu, dev);
188	ssize_t rc;
189	int cpuid = cpu->dev.id;
190	unsigned int isolated = cpu_isolated(cpuid);
191
192	rc = sysfs_emit(buf, "%d\n", isolated);
193
194	return rc;
195}
196
197static DEVICE_ATTR_RO(isolate);
198
199static struct attribute *cpu_isolated_attrs[] = {
200	&dev_attr_isolate.attr,
201	NULL
202};
203
204static struct attribute_group cpu_isolated_attr_group = {
205	.attrs = cpu_isolated_attrs,
206};
207#endif
208
209static const struct attribute_group *common_cpu_attr_groups[] = {
210#ifdef CONFIG_KEXEC
211	&crash_note_cpu_attr_group,
212#endif
213#ifdef CONFIG_CPU_ISOLATION_OPT
214	&cpu_isolated_attr_group,
215#endif
216	NULL
217};
218
219static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
220#ifdef CONFIG_KEXEC
221	&crash_note_cpu_attr_group,
222#endif
223#ifdef CONFIG_CPU_ISOLATION_OPT
224	&cpu_isolated_attr_group,
225#endif
226	NULL
227};
228
229/*
230 * Print cpu online, possible, present, and system maps
231 */
232
233struct cpu_attr {
234	struct device_attribute attr;
235	const struct cpumask *const map;
236};
237
238static ssize_t show_cpus_attr(struct device *dev,
239			      struct device_attribute *attr,
240			      char *buf)
241{
242	struct cpu_attr *ca = container_of(attr, struct cpu_attr, attr);
243
244	return cpumap_print_to_pagebuf(true, buf, ca->map);
245}
246
247#define _CPU_ATTR(name, map) \
248	{ __ATTR(name, 0444, show_cpus_attr, NULL), map }
249
250/* Keep in sync with cpu_subsys_attrs */
251static struct cpu_attr cpu_attrs[] = {
252	_CPU_ATTR(online, &__cpu_online_mask),
253	_CPU_ATTR(possible, &__cpu_possible_mask),
254	_CPU_ATTR(present, &__cpu_present_mask),
255#ifdef CONFIG_CPU_ISOLATION_OPT
256	_CPU_ATTR(core_ctl_isolated, &__cpu_isolated_mask),
257#endif
258};
259
260/*
261 * Print values for NR_CPUS and offlined cpus
262 */
263static ssize_t print_cpus_kernel_max(struct device *dev,
264				     struct device_attribute *attr, char *buf)
265{
266	return sysfs_emit(buf, "%d\n", NR_CPUS - 1);
267}
268static DEVICE_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL);
269
270/* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */
271unsigned int total_cpus;
272
273static ssize_t print_cpus_offline(struct device *dev,
274				  struct device_attribute *attr, char *buf)
275{
276	int len = 0;
277	cpumask_var_t offline;
278
279	/* display offline cpus < nr_cpu_ids */
280	if (!alloc_cpumask_var(&offline, GFP_KERNEL))
281		return -ENOMEM;
282	cpumask_andnot(offline, cpu_possible_mask, cpu_online_mask);
283	len += sysfs_emit_at(buf, len, "%*pbl", cpumask_pr_args(offline));
284	free_cpumask_var(offline);
285
286	/* display offline cpus >= nr_cpu_ids */
287	if (total_cpus && nr_cpu_ids < total_cpus) {
288		len += sysfs_emit_at(buf, len, ",");
289
290		if (nr_cpu_ids == total_cpus-1)
291			len += sysfs_emit_at(buf, len, "%u", nr_cpu_ids);
292		else
293			len += sysfs_emit_at(buf, len, "%u-%d",
294					     nr_cpu_ids, total_cpus - 1);
295	}
296
297	len += sysfs_emit_at(buf, len, "\n");
298
299	return len;
300}
301static DEVICE_ATTR(offline, 0444, print_cpus_offline, NULL);
302
303static ssize_t print_cpus_isolated(struct device *dev,
304				  struct device_attribute *attr, char *buf)
305{
306	int len;
307	cpumask_var_t isolated;
308
309	if (!alloc_cpumask_var(&isolated, GFP_KERNEL))
310		return -ENOMEM;
311
312	cpumask_andnot(isolated, cpu_possible_mask,
313		       housekeeping_cpumask(HK_FLAG_DOMAIN));
314	len = sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(isolated));
315
316	free_cpumask_var(isolated);
317
318	return len;
319}
320static DEVICE_ATTR(isolated, 0444, print_cpus_isolated, NULL);
321
322#ifdef CONFIG_NO_HZ_FULL
323static ssize_t print_cpus_nohz_full(struct device *dev,
324				    struct device_attribute *attr, char *buf)
325{
326	return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(tick_nohz_full_mask));
327}
328static DEVICE_ATTR(nohz_full, 0444, print_cpus_nohz_full, NULL);
329#endif
330
331static void cpu_device_release(struct device *dev)
332{
333	/*
334	 * This is an empty function to prevent the driver core from spitting a
335	 * warning at us.  Yes, I know this is directly opposite of what the
336	 * documentation for the driver core and kobjects say, and the author
337	 * of this code has already been publically ridiculed for doing
338	 * something as foolish as this.  However, at this point in time, it is
339	 * the only way to handle the issue of statically allocated cpu
340	 * devices.  The different architectures will have their cpu device
341	 * code reworked to properly handle this in the near future, so this
342	 * function will then be changed to correctly free up the memory held
343	 * by the cpu device.
344	 *
345	 * Never copy this way of doing things, or you too will be made fun of
346	 * on the linux-kernel list, you have been warned.
347	 */
348}
349
350#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
351static ssize_t print_cpu_modalias(struct device *dev,
352				  struct device_attribute *attr,
353				  char *buf)
354{
355	int len = 0;
356	u32 i;
357
358	len += sysfs_emit_at(buf, len,
359			     "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:",
360			     CPU_FEATURE_TYPEVAL);
361
362	for (i = 0; i < MAX_CPU_FEATURES; i++)
363		if (cpu_have_feature(i)) {
364			if (len + sizeof(",XXXX\n") >= PAGE_SIZE) {
365				WARN(1, "CPU features overflow page\n");
366				break;
367			}
368			len += sysfs_emit_at(buf, len, ",%04X", i);
369		}
370	len += sysfs_emit_at(buf, len, "\n");
371	return len;
372}
373
374static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
375{
376	char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
377	if (buf) {
378		print_cpu_modalias(NULL, NULL, buf);
379		add_uevent_var(env, "MODALIAS=%s", buf);
380		kfree(buf);
381	}
382	return 0;
383}
384#endif
385
386/*
387 * register_cpu - Setup a sysfs device for a CPU.
388 * @cpu - cpu->hotpluggable field set to 1 will generate a control file in
389 *	  sysfs for this CPU.
390 * @num - CPU number to use when creating the device.
391 *
392 * Initialize and register the CPU device.
393 */
394int register_cpu(struct cpu *cpu, int num)
395{
396	int error;
397
398	cpu->node_id = cpu_to_node(num);
399	memset(&cpu->dev, 0x00, sizeof(struct device));
400	cpu->dev.id = num;
401	cpu->dev.bus = &cpu_subsys;
402	cpu->dev.release = cpu_device_release;
403	cpu->dev.offline_disabled = !cpu->hotpluggable;
404	cpu->dev.offline = !cpu_online(num);
405	cpu->dev.of_node = of_get_cpu_node(num, NULL);
406#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
407	cpu->dev.bus->uevent = cpu_uevent;
408#endif
409	cpu->dev.groups = common_cpu_attr_groups;
410	if (cpu->hotpluggable)
411		cpu->dev.groups = hotplugable_cpu_attr_groups;
412	error = device_register(&cpu->dev);
413	if (error) {
414		put_device(&cpu->dev);
415		return error;
416	}
417
418	per_cpu(cpu_sys_devices, num) = &cpu->dev;
419	register_cpu_under_node(num, cpu_to_node(num));
420	dev_pm_qos_expose_latency_limit(&cpu->dev,
421					PM_QOS_RESUME_LATENCY_NO_CONSTRAINT);
422
423	return 0;
424}
425
426struct device *get_cpu_device(unsigned cpu)
427{
428	if (cpu < nr_cpu_ids && cpu_possible(cpu))
429		return per_cpu(cpu_sys_devices, cpu);
430	else
431		return NULL;
432}
433EXPORT_SYMBOL_GPL(get_cpu_device);
434
435static void device_create_release(struct device *dev)
436{
437	kfree(dev);
438}
439
440__printf(4, 0)
441static struct device *
442__cpu_device_create(struct device *parent, void *drvdata,
443		    const struct attribute_group **groups,
444		    const char *fmt, va_list args)
445{
446	struct device *dev = NULL;
447	int retval = -ENODEV;
448
449	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
450	if (!dev) {
451		retval = -ENOMEM;
452		goto error;
453	}
454
455	device_initialize(dev);
456	dev->parent = parent;
457	dev->groups = groups;
458	dev->release = device_create_release;
459	device_set_pm_not_required(dev);
460	dev_set_drvdata(dev, drvdata);
461
462	retval = kobject_set_name_vargs(&dev->kobj, fmt, args);
463	if (retval)
464		goto error;
465
466	retval = device_add(dev);
467	if (retval)
468		goto error;
469
470	return dev;
471
472error:
473	put_device(dev);
474	return ERR_PTR(retval);
475}
476
477struct device *cpu_device_create(struct device *parent, void *drvdata,
478				 const struct attribute_group **groups,
479				 const char *fmt, ...)
480{
481	va_list vargs;
482	struct device *dev;
483
484	va_start(vargs, fmt);
485	dev = __cpu_device_create(parent, drvdata, groups, fmt, vargs);
486	va_end(vargs);
487	return dev;
488}
489EXPORT_SYMBOL_GPL(cpu_device_create);
490
491#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
492static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL);
493#endif
494
495static struct attribute *cpu_root_attrs[] = {
496#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
497	&dev_attr_probe.attr,
498	&dev_attr_release.attr,
499#endif
500	&cpu_attrs[0].attr.attr,
501	&cpu_attrs[1].attr.attr,
502	&cpu_attrs[2].attr.attr,
503#ifdef CONFIG_CPU_ISOLATION_OPT
504	&cpu_attrs[3].attr.attr,
505#endif
506	&dev_attr_kernel_max.attr,
507	&dev_attr_offline.attr,
508	&dev_attr_isolated.attr,
509#ifdef CONFIG_NO_HZ_FULL
510	&dev_attr_nohz_full.attr,
511#endif
512#ifdef CONFIG_GENERIC_CPU_AUTOPROBE
513	&dev_attr_modalias.attr,
514#endif
515	NULL
516};
517
518static struct attribute_group cpu_root_attr_group = {
519	.attrs = cpu_root_attrs,
520};
521
522static const struct attribute_group *cpu_root_attr_groups[] = {
523	&cpu_root_attr_group,
524	NULL,
525};
526
527bool cpu_is_hotpluggable(unsigned cpu)
528{
529	struct device *dev = get_cpu_device(cpu);
530	return dev && container_of(dev, struct cpu, dev)->hotpluggable
531		&& tick_nohz_cpu_hotpluggable(cpu);
532}
533EXPORT_SYMBOL_GPL(cpu_is_hotpluggable);
534
535#ifdef CONFIG_GENERIC_CPU_DEVICES
536static DEFINE_PER_CPU(struct cpu, cpu_devices);
537#endif
538
539static void __init cpu_dev_register_generic(void)
540{
541#ifdef CONFIG_GENERIC_CPU_DEVICES
542	int i;
543
544	for_each_possible_cpu(i) {
545		if (register_cpu(&per_cpu(cpu_devices, i), i))
546			panic("Failed to register CPU device");
547	}
548#endif
549}
550
551#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
552
553ssize_t __weak cpu_show_meltdown(struct device *dev,
554				 struct device_attribute *attr, char *buf)
555{
556	return sysfs_emit(buf, "Not affected\n");
557}
558
559ssize_t __weak cpu_show_spectre_v1(struct device *dev,
560				   struct device_attribute *attr, char *buf)
561{
562	return sysfs_emit(buf, "Not affected\n");
563}
564
565ssize_t __weak cpu_show_spectre_v2(struct device *dev,
566				   struct device_attribute *attr, char *buf)
567{
568	return sysfs_emit(buf, "Not affected\n");
569}
570
571ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
572					  struct device_attribute *attr, char *buf)
573{
574	return sysfs_emit(buf, "Not affected\n");
575}
576
577ssize_t __weak cpu_show_l1tf(struct device *dev,
578			     struct device_attribute *attr, char *buf)
579{
580	return sysfs_emit(buf, "Not affected\n");
581}
582
583ssize_t __weak cpu_show_mds(struct device *dev,
584			    struct device_attribute *attr, char *buf)
585{
586	return sysfs_emit(buf, "Not affected\n");
587}
588
589ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
590					struct device_attribute *attr,
591					char *buf)
592{
593	return sysfs_emit(buf, "Not affected\n");
594}
595
596ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
597				      struct device_attribute *attr, char *buf)
598{
599	return sysfs_emit(buf, "Not affected\n");
600}
601
602ssize_t __weak cpu_show_srbds(struct device *dev,
603			      struct device_attribute *attr, char *buf)
604{
605	return sysfs_emit(buf, "Not affected\n");
606}
607
608ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
609					struct device_attribute *attr, char *buf)
610{
611	return sysfs_emit(buf, "Not affected\n");
612}
613
614ssize_t __weak cpu_show_retbleed(struct device *dev,
615				 struct device_attribute *attr, char *buf)
616{
617	return sysfs_emit(buf, "Not affected\n");
618}
619
620ssize_t __weak cpu_show_gds(struct device *dev,
621			    struct device_attribute *attr, char *buf)
622{
623	return sysfs_emit(buf, "Not affected\n");
624}
625
626ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev,
627					     struct device_attribute *attr, char *buf)
628{
629	return sysfs_emit(buf, "Not affected\n");
630}
631
632ssize_t __weak cpu_show_reg_file_data_sampling(struct device *dev,
633					       struct device_attribute *attr, char *buf)
634{
635	return sysfs_emit(buf, "Not affected\n");
636}
637
638static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
639static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
640static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
641static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
642static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
643static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
644static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
645static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
646static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
647static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
648static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
649static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
650static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL);
651static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL);
652
653static struct attribute *cpu_root_vulnerabilities_attrs[] = {
654	&dev_attr_meltdown.attr,
655	&dev_attr_spectre_v1.attr,
656	&dev_attr_spectre_v2.attr,
657	&dev_attr_spec_store_bypass.attr,
658	&dev_attr_l1tf.attr,
659	&dev_attr_mds.attr,
660	&dev_attr_tsx_async_abort.attr,
661	&dev_attr_itlb_multihit.attr,
662	&dev_attr_srbds.attr,
663	&dev_attr_mmio_stale_data.attr,
664	&dev_attr_retbleed.attr,
665	&dev_attr_gather_data_sampling.attr,
666	&dev_attr_spec_rstack_overflow.attr,
667	&dev_attr_reg_file_data_sampling.attr,
668	NULL
669};
670
671static const struct attribute_group cpu_root_vulnerabilities_group = {
672	.name  = "vulnerabilities",
673	.attrs = cpu_root_vulnerabilities_attrs,
674};
675
676static void __init cpu_register_vulnerabilities(void)
677{
678	if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
679			       &cpu_root_vulnerabilities_group))
680		pr_err("Unable to register CPU vulnerabilities\n");
681}
682
683#else
684static inline void cpu_register_vulnerabilities(void) { }
685#endif
686
687void __init cpu_dev_init(void)
688{
689	if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
690		panic("Failed to register CPU subsystem");
691
692	cpu_dev_register_generic();
693	cpu_register_vulnerabilities();
694}
695