xref: /kernel/linux/linux-6.6/net/core/net-sysfs.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * net-sysfs.c - network device class and attributes
4 *
5 * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
6 */
7
8#include <linux/capability.h>
9#include <linux/kernel.h>
10#include <linux/netdevice.h>
11#include <linux/if_arp.h>
12#include <linux/slab.h>
13#include <linux/sched/signal.h>
14#include <linux/sched/isolation.h>
15#include <linux/nsproxy.h>
16#include <net/sock.h>
17#include <net/net_namespace.h>
18#include <linux/rtnetlink.h>
19#include <linux/vmalloc.h>
20#include <linux/export.h>
21#include <linux/jiffies.h>
22#include <linux/pm_runtime.h>
23#include <linux/of.h>
24#include <linux/of_net.h>
25#include <linux/cpu.h>
26#include <net/netdev_rx_queue.h>
27
28#include "dev.h"
29#include "net-sysfs.h"
30
31#ifdef CONFIG_SYSFS
32static const char fmt_hex[] = "%#x\n";
33static const char fmt_dec[] = "%d\n";
34static const char fmt_ulong[] = "%lu\n";
35static const char fmt_u64[] = "%llu\n";
36
37/* Caller holds RTNL or dev_base_lock */
38static inline int dev_isalive(const struct net_device *dev)
39{
40	return dev->reg_state <= NETREG_REGISTERED;
41}
42
43/* use same locking rules as GIF* ioctl's */
44static ssize_t netdev_show(const struct device *dev,
45			   struct device_attribute *attr, char *buf,
46			   ssize_t (*format)(const struct net_device *, char *))
47{
48	struct net_device *ndev = to_net_dev(dev);
49	ssize_t ret = -EINVAL;
50
51	read_lock(&dev_base_lock);
52	if (dev_isalive(ndev))
53		ret = (*format)(ndev, buf);
54	read_unlock(&dev_base_lock);
55
56	return ret;
57}
58
59/* generate a show function for simple field */
60#define NETDEVICE_SHOW(field, format_string)				\
61static ssize_t format_##field(const struct net_device *dev, char *buf)	\
62{									\
63	return sysfs_emit(buf, format_string, dev->field);		\
64}									\
65static ssize_t field##_show(struct device *dev,				\
66			    struct device_attribute *attr, char *buf)	\
67{									\
68	return netdev_show(dev, attr, buf, format_##field);		\
69}									\
70
71#define NETDEVICE_SHOW_RO(field, format_string)				\
72NETDEVICE_SHOW(field, format_string);					\
73static DEVICE_ATTR_RO(field)
74
75#define NETDEVICE_SHOW_RW(field, format_string)				\
76NETDEVICE_SHOW(field, format_string);					\
77static DEVICE_ATTR_RW(field)
78
79/* use same locking and permission rules as SIF* ioctl's */
80static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
81			    const char *buf, size_t len,
82			    int (*set)(struct net_device *, unsigned long))
83{
84	struct net_device *netdev = to_net_dev(dev);
85	struct net *net = dev_net(netdev);
86	unsigned long new;
87	int ret;
88
89	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
90		return -EPERM;
91
92	ret = kstrtoul(buf, 0, &new);
93	if (ret)
94		goto err;
95
96	if (!rtnl_trylock())
97		return restart_syscall();
98
99	if (dev_isalive(netdev)) {
100		ret = (*set)(netdev, new);
101		if (ret == 0)
102			ret = len;
103	}
104	rtnl_unlock();
105 err:
106	return ret;
107}
108
109NETDEVICE_SHOW_RO(dev_id, fmt_hex);
110NETDEVICE_SHOW_RO(dev_port, fmt_dec);
111NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
112NETDEVICE_SHOW_RO(addr_len, fmt_dec);
113NETDEVICE_SHOW_RO(ifindex, fmt_dec);
114NETDEVICE_SHOW_RO(type, fmt_dec);
115NETDEVICE_SHOW_RO(link_mode, fmt_dec);
116
117static ssize_t iflink_show(struct device *dev, struct device_attribute *attr,
118			   char *buf)
119{
120	struct net_device *ndev = to_net_dev(dev);
121
122	return sysfs_emit(buf, fmt_dec, dev_get_iflink(ndev));
123}
124static DEVICE_ATTR_RO(iflink);
125
126static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
127{
128	return sysfs_emit(buf, fmt_dec, dev->name_assign_type);
129}
130
131static ssize_t name_assign_type_show(struct device *dev,
132				     struct device_attribute *attr,
133				     char *buf)
134{
135	struct net_device *ndev = to_net_dev(dev);
136	ssize_t ret = -EINVAL;
137
138	if (ndev->name_assign_type != NET_NAME_UNKNOWN)
139		ret = netdev_show(dev, attr, buf, format_name_assign_type);
140
141	return ret;
142}
143static DEVICE_ATTR_RO(name_assign_type);
144
145/* use same locking rules as GIFHWADDR ioctl's */
146static ssize_t address_show(struct device *dev, struct device_attribute *attr,
147			    char *buf)
148{
149	struct net_device *ndev = to_net_dev(dev);
150	ssize_t ret = -EINVAL;
151
152	read_lock(&dev_base_lock);
153	if (dev_isalive(ndev))
154		ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len);
155	read_unlock(&dev_base_lock);
156	return ret;
157}
158static DEVICE_ATTR_RO(address);
159
160static ssize_t broadcast_show(struct device *dev,
161			      struct device_attribute *attr, char *buf)
162{
163	struct net_device *ndev = to_net_dev(dev);
164
165	if (dev_isalive(ndev))
166		return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
167	return -EINVAL;
168}
169static DEVICE_ATTR_RO(broadcast);
170
171static int change_carrier(struct net_device *dev, unsigned long new_carrier)
172{
173	if (!netif_running(dev))
174		return -EINVAL;
175	return dev_change_carrier(dev, (bool)new_carrier);
176}
177
178static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
179			     const char *buf, size_t len)
180{
181	struct net_device *netdev = to_net_dev(dev);
182
183	/* The check is also done in change_carrier; this helps returning early
184	 * without hitting the trylock/restart in netdev_store.
185	 */
186	if (!netdev->netdev_ops->ndo_change_carrier)
187		return -EOPNOTSUPP;
188
189	return netdev_store(dev, attr, buf, len, change_carrier);
190}
191
192static ssize_t carrier_show(struct device *dev,
193			    struct device_attribute *attr, char *buf)
194{
195	struct net_device *netdev = to_net_dev(dev);
196
197	if (netif_running(netdev))
198		return sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
199
200	return -EINVAL;
201}
202static DEVICE_ATTR_RW(carrier);
203
204static ssize_t speed_show(struct device *dev,
205			  struct device_attribute *attr, char *buf)
206{
207	struct net_device *netdev = to_net_dev(dev);
208	int ret = -EINVAL;
209
210	/* The check is also done in __ethtool_get_link_ksettings; this helps
211	 * returning early without hitting the trylock/restart below.
212	 */
213	if (!netdev->ethtool_ops->get_link_ksettings)
214		return ret;
215
216	if (!rtnl_trylock())
217		return restart_syscall();
218
219	if (netif_running(netdev) && netif_device_present(netdev)) {
220		struct ethtool_link_ksettings cmd;
221
222		if (!__ethtool_get_link_ksettings(netdev, &cmd))
223			ret = sysfs_emit(buf, fmt_dec, cmd.base.speed);
224	}
225	rtnl_unlock();
226	return ret;
227}
228static DEVICE_ATTR_RO(speed);
229
230static ssize_t duplex_show(struct device *dev,
231			   struct device_attribute *attr, char *buf)
232{
233	struct net_device *netdev = to_net_dev(dev);
234	int ret = -EINVAL;
235
236	/* The check is also done in __ethtool_get_link_ksettings; this helps
237	 * returning early without hitting the trylock/restart below.
238	 */
239	if (!netdev->ethtool_ops->get_link_ksettings)
240		return ret;
241
242	if (!rtnl_trylock())
243		return restart_syscall();
244
245	if (netif_running(netdev)) {
246		struct ethtool_link_ksettings cmd;
247
248		if (!__ethtool_get_link_ksettings(netdev, &cmd)) {
249			const char *duplex;
250
251			switch (cmd.base.duplex) {
252			case DUPLEX_HALF:
253				duplex = "half";
254				break;
255			case DUPLEX_FULL:
256				duplex = "full";
257				break;
258			default:
259				duplex = "unknown";
260				break;
261			}
262			ret = sysfs_emit(buf, "%s\n", duplex);
263		}
264	}
265	rtnl_unlock();
266	return ret;
267}
268static DEVICE_ATTR_RO(duplex);
269
270static ssize_t testing_show(struct device *dev,
271			    struct device_attribute *attr, char *buf)
272{
273	struct net_device *netdev = to_net_dev(dev);
274
275	if (netif_running(netdev))
276		return sysfs_emit(buf, fmt_dec, !!netif_testing(netdev));
277
278	return -EINVAL;
279}
280static DEVICE_ATTR_RO(testing);
281
282static ssize_t dormant_show(struct device *dev,
283			    struct device_attribute *attr, char *buf)
284{
285	struct net_device *netdev = to_net_dev(dev);
286
287	if (netif_running(netdev))
288		return sysfs_emit(buf, fmt_dec, !!netif_dormant(netdev));
289
290	return -EINVAL;
291}
292static DEVICE_ATTR_RO(dormant);
293
294static const char *const operstates[] = {
295	"unknown",
296	"notpresent", /* currently unused */
297	"down",
298	"lowerlayerdown",
299	"testing",
300	"dormant",
301	"up"
302};
303
304static ssize_t operstate_show(struct device *dev,
305			      struct device_attribute *attr, char *buf)
306{
307	const struct net_device *netdev = to_net_dev(dev);
308	unsigned char operstate;
309
310	read_lock(&dev_base_lock);
311	operstate = netdev->operstate;
312	if (!netif_running(netdev))
313		operstate = IF_OPER_DOWN;
314	read_unlock(&dev_base_lock);
315
316	if (operstate >= ARRAY_SIZE(operstates))
317		return -EINVAL; /* should not happen */
318
319	return sysfs_emit(buf, "%s\n", operstates[operstate]);
320}
321static DEVICE_ATTR_RO(operstate);
322
323static ssize_t carrier_changes_show(struct device *dev,
324				    struct device_attribute *attr,
325				    char *buf)
326{
327	struct net_device *netdev = to_net_dev(dev);
328
329	return sysfs_emit(buf, fmt_dec,
330			  atomic_read(&netdev->carrier_up_count) +
331			  atomic_read(&netdev->carrier_down_count));
332}
333static DEVICE_ATTR_RO(carrier_changes);
334
335static ssize_t carrier_up_count_show(struct device *dev,
336				     struct device_attribute *attr,
337				     char *buf)
338{
339	struct net_device *netdev = to_net_dev(dev);
340
341	return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_up_count));
342}
343static DEVICE_ATTR_RO(carrier_up_count);
344
345static ssize_t carrier_down_count_show(struct device *dev,
346				       struct device_attribute *attr,
347				       char *buf)
348{
349	struct net_device *netdev = to_net_dev(dev);
350
351	return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_down_count));
352}
353static DEVICE_ATTR_RO(carrier_down_count);
354
355/* read-write attributes */
356
357static int change_mtu(struct net_device *dev, unsigned long new_mtu)
358{
359	return dev_set_mtu(dev, (int)new_mtu);
360}
361
362static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
363			 const char *buf, size_t len)
364{
365	return netdev_store(dev, attr, buf, len, change_mtu);
366}
367NETDEVICE_SHOW_RW(mtu, fmt_dec);
368
369static int change_flags(struct net_device *dev, unsigned long new_flags)
370{
371	return dev_change_flags(dev, (unsigned int)new_flags, NULL);
372}
373
374static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
375			   const char *buf, size_t len)
376{
377	return netdev_store(dev, attr, buf, len, change_flags);
378}
379NETDEVICE_SHOW_RW(flags, fmt_hex);
380
381static ssize_t tx_queue_len_store(struct device *dev,
382				  struct device_attribute *attr,
383				  const char *buf, size_t len)
384{
385	if (!capable(CAP_NET_ADMIN))
386		return -EPERM;
387
388	return netdev_store(dev, attr, buf, len, dev_change_tx_queue_len);
389}
390NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
391
392static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
393{
394	WRITE_ONCE(dev->gro_flush_timeout, val);
395	return 0;
396}
397
398static ssize_t gro_flush_timeout_store(struct device *dev,
399				       struct device_attribute *attr,
400				       const char *buf, size_t len)
401{
402	if (!capable(CAP_NET_ADMIN))
403		return -EPERM;
404
405	return netdev_store(dev, attr, buf, len, change_gro_flush_timeout);
406}
407NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
408
409static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
410{
411	WRITE_ONCE(dev->napi_defer_hard_irqs, val);
412	return 0;
413}
414
415static ssize_t napi_defer_hard_irqs_store(struct device *dev,
416					  struct device_attribute *attr,
417					  const char *buf, size_t len)
418{
419	if (!capable(CAP_NET_ADMIN))
420		return -EPERM;
421
422	return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs);
423}
424NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_dec);
425
426static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
427			     const char *buf, size_t len)
428{
429	struct net_device *netdev = to_net_dev(dev);
430	struct net *net = dev_net(netdev);
431	size_t count = len;
432	ssize_t ret = 0;
433
434	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
435		return -EPERM;
436
437	/* ignore trailing newline */
438	if (len >  0 && buf[len - 1] == '\n')
439		--count;
440
441	if (!rtnl_trylock())
442		return restart_syscall();
443
444	if (dev_isalive(netdev)) {
445		ret = dev_set_alias(netdev, buf, count);
446		if (ret < 0)
447			goto err;
448		ret = len;
449		netdev_state_change(netdev);
450	}
451err:
452	rtnl_unlock();
453
454	return ret;
455}
456
457static ssize_t ifalias_show(struct device *dev,
458			    struct device_attribute *attr, char *buf)
459{
460	const struct net_device *netdev = to_net_dev(dev);
461	char tmp[IFALIASZ];
462	ssize_t ret = 0;
463
464	ret = dev_get_alias(netdev, tmp, sizeof(tmp));
465	if (ret > 0)
466		ret = sysfs_emit(buf, "%s\n", tmp);
467	return ret;
468}
469static DEVICE_ATTR_RW(ifalias);
470
471static int change_group(struct net_device *dev, unsigned long new_group)
472{
473	dev_set_group(dev, (int)new_group);
474	return 0;
475}
476
477static ssize_t group_store(struct device *dev, struct device_attribute *attr,
478			   const char *buf, size_t len)
479{
480	return netdev_store(dev, attr, buf, len, change_group);
481}
482NETDEVICE_SHOW(group, fmt_dec);
483static DEVICE_ATTR(netdev_group, 0644, group_show, group_store);
484
485static int change_proto_down(struct net_device *dev, unsigned long proto_down)
486{
487	return dev_change_proto_down(dev, (bool)proto_down);
488}
489
490static ssize_t proto_down_store(struct device *dev,
491				struct device_attribute *attr,
492				const char *buf, size_t len)
493{
494	return netdev_store(dev, attr, buf, len, change_proto_down);
495}
496NETDEVICE_SHOW_RW(proto_down, fmt_dec);
497
498static ssize_t phys_port_id_show(struct device *dev,
499				 struct device_attribute *attr, char *buf)
500{
501	struct net_device *netdev = to_net_dev(dev);
502	ssize_t ret = -EINVAL;
503
504	/* The check is also done in dev_get_phys_port_id; this helps returning
505	 * early without hitting the trylock/restart below.
506	 */
507	if (!netdev->netdev_ops->ndo_get_phys_port_id)
508		return -EOPNOTSUPP;
509
510	if (!rtnl_trylock())
511		return restart_syscall();
512
513	if (dev_isalive(netdev)) {
514		struct netdev_phys_item_id ppid;
515
516		ret = dev_get_phys_port_id(netdev, &ppid);
517		if (!ret)
518			ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
519	}
520	rtnl_unlock();
521
522	return ret;
523}
524static DEVICE_ATTR_RO(phys_port_id);
525
526static ssize_t phys_port_name_show(struct device *dev,
527				   struct device_attribute *attr, char *buf)
528{
529	struct net_device *netdev = to_net_dev(dev);
530	ssize_t ret = -EINVAL;
531
532	/* The checks are also done in dev_get_phys_port_name; this helps
533	 * returning early without hitting the trylock/restart below.
534	 */
535	if (!netdev->netdev_ops->ndo_get_phys_port_name &&
536	    !netdev->devlink_port)
537		return -EOPNOTSUPP;
538
539	if (!rtnl_trylock())
540		return restart_syscall();
541
542	if (dev_isalive(netdev)) {
543		char name[IFNAMSIZ];
544
545		ret = dev_get_phys_port_name(netdev, name, sizeof(name));
546		if (!ret)
547			ret = sysfs_emit(buf, "%s\n", name);
548	}
549	rtnl_unlock();
550
551	return ret;
552}
553static DEVICE_ATTR_RO(phys_port_name);
554
555static ssize_t phys_switch_id_show(struct device *dev,
556				   struct device_attribute *attr, char *buf)
557{
558	struct net_device *netdev = to_net_dev(dev);
559	ssize_t ret = -EINVAL;
560
561	/* The checks are also done in dev_get_phys_port_name; this helps
562	 * returning early without hitting the trylock/restart below. This works
563	 * because recurse is false when calling dev_get_port_parent_id.
564	 */
565	if (!netdev->netdev_ops->ndo_get_port_parent_id &&
566	    !netdev->devlink_port)
567		return -EOPNOTSUPP;
568
569	if (!rtnl_trylock())
570		return restart_syscall();
571
572	if (dev_isalive(netdev)) {
573		struct netdev_phys_item_id ppid = { };
574
575		ret = dev_get_port_parent_id(netdev, &ppid, false);
576		if (!ret)
577			ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
578	}
579	rtnl_unlock();
580
581	return ret;
582}
583static DEVICE_ATTR_RO(phys_switch_id);
584
585static ssize_t threaded_show(struct device *dev,
586			     struct device_attribute *attr, char *buf)
587{
588	struct net_device *netdev = to_net_dev(dev);
589	ssize_t ret = -EINVAL;
590
591	if (!rtnl_trylock())
592		return restart_syscall();
593
594	if (dev_isalive(netdev))
595		ret = sysfs_emit(buf, fmt_dec, netdev->threaded);
596
597	rtnl_unlock();
598	return ret;
599}
600
601static int modify_napi_threaded(struct net_device *dev, unsigned long val)
602{
603	int ret;
604
605	if (list_empty(&dev->napi_list))
606		return -EOPNOTSUPP;
607
608	if (val != 0 && val != 1)
609		return -EOPNOTSUPP;
610
611	ret = dev_set_threaded(dev, val);
612
613	return ret;
614}
615
616static ssize_t threaded_store(struct device *dev,
617			      struct device_attribute *attr,
618			      const char *buf, size_t len)
619{
620	return netdev_store(dev, attr, buf, len, modify_napi_threaded);
621}
622static DEVICE_ATTR_RW(threaded);
623
624static struct attribute *net_class_attrs[] __ro_after_init = {
625	&dev_attr_netdev_group.attr,
626	&dev_attr_type.attr,
627	&dev_attr_dev_id.attr,
628	&dev_attr_dev_port.attr,
629	&dev_attr_iflink.attr,
630	&dev_attr_ifindex.attr,
631	&dev_attr_name_assign_type.attr,
632	&dev_attr_addr_assign_type.attr,
633	&dev_attr_addr_len.attr,
634	&dev_attr_link_mode.attr,
635	&dev_attr_address.attr,
636	&dev_attr_broadcast.attr,
637	&dev_attr_speed.attr,
638	&dev_attr_duplex.attr,
639	&dev_attr_dormant.attr,
640	&dev_attr_testing.attr,
641	&dev_attr_operstate.attr,
642	&dev_attr_carrier_changes.attr,
643	&dev_attr_ifalias.attr,
644	&dev_attr_carrier.attr,
645	&dev_attr_mtu.attr,
646	&dev_attr_flags.attr,
647	&dev_attr_tx_queue_len.attr,
648	&dev_attr_gro_flush_timeout.attr,
649	&dev_attr_napi_defer_hard_irqs.attr,
650	&dev_attr_phys_port_id.attr,
651	&dev_attr_phys_port_name.attr,
652	&dev_attr_phys_switch_id.attr,
653	&dev_attr_proto_down.attr,
654	&dev_attr_carrier_up_count.attr,
655	&dev_attr_carrier_down_count.attr,
656	&dev_attr_threaded.attr,
657	NULL,
658};
659ATTRIBUTE_GROUPS(net_class);
660
661/* Show a given an attribute in the statistics group */
662static ssize_t netstat_show(const struct device *d,
663			    struct device_attribute *attr, char *buf,
664			    unsigned long offset)
665{
666	struct net_device *dev = to_net_dev(d);
667	ssize_t ret = -EINVAL;
668
669	WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
670		offset % sizeof(u64) != 0);
671
672	read_lock(&dev_base_lock);
673	if (dev_isalive(dev)) {
674		struct rtnl_link_stats64 temp;
675		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
676
677		ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
678	}
679	read_unlock(&dev_base_lock);
680	return ret;
681}
682
683/* generate a read-only statistics attribute */
684#define NETSTAT_ENTRY(name)						\
685static ssize_t name##_show(struct device *d,				\
686			   struct device_attribute *attr, char *buf)	\
687{									\
688	return netstat_show(d, attr, buf,				\
689			    offsetof(struct rtnl_link_stats64, name));	\
690}									\
691static DEVICE_ATTR_RO(name)
692
693NETSTAT_ENTRY(rx_packets);
694NETSTAT_ENTRY(tx_packets);
695NETSTAT_ENTRY(rx_bytes);
696NETSTAT_ENTRY(tx_bytes);
697NETSTAT_ENTRY(rx_errors);
698NETSTAT_ENTRY(tx_errors);
699NETSTAT_ENTRY(rx_dropped);
700NETSTAT_ENTRY(tx_dropped);
701NETSTAT_ENTRY(multicast);
702NETSTAT_ENTRY(collisions);
703NETSTAT_ENTRY(rx_length_errors);
704NETSTAT_ENTRY(rx_over_errors);
705NETSTAT_ENTRY(rx_crc_errors);
706NETSTAT_ENTRY(rx_frame_errors);
707NETSTAT_ENTRY(rx_fifo_errors);
708NETSTAT_ENTRY(rx_missed_errors);
709NETSTAT_ENTRY(tx_aborted_errors);
710NETSTAT_ENTRY(tx_carrier_errors);
711NETSTAT_ENTRY(tx_fifo_errors);
712NETSTAT_ENTRY(tx_heartbeat_errors);
713NETSTAT_ENTRY(tx_window_errors);
714NETSTAT_ENTRY(rx_compressed);
715NETSTAT_ENTRY(tx_compressed);
716NETSTAT_ENTRY(rx_nohandler);
717
718static struct attribute *netstat_attrs[] __ro_after_init = {
719	&dev_attr_rx_packets.attr,
720	&dev_attr_tx_packets.attr,
721	&dev_attr_rx_bytes.attr,
722	&dev_attr_tx_bytes.attr,
723	&dev_attr_rx_errors.attr,
724	&dev_attr_tx_errors.attr,
725	&dev_attr_rx_dropped.attr,
726	&dev_attr_tx_dropped.attr,
727	&dev_attr_multicast.attr,
728	&dev_attr_collisions.attr,
729	&dev_attr_rx_length_errors.attr,
730	&dev_attr_rx_over_errors.attr,
731	&dev_attr_rx_crc_errors.attr,
732	&dev_attr_rx_frame_errors.attr,
733	&dev_attr_rx_fifo_errors.attr,
734	&dev_attr_rx_missed_errors.attr,
735	&dev_attr_tx_aborted_errors.attr,
736	&dev_attr_tx_carrier_errors.attr,
737	&dev_attr_tx_fifo_errors.attr,
738	&dev_attr_tx_heartbeat_errors.attr,
739	&dev_attr_tx_window_errors.attr,
740	&dev_attr_rx_compressed.attr,
741	&dev_attr_tx_compressed.attr,
742	&dev_attr_rx_nohandler.attr,
743	NULL
744};
745
746static const struct attribute_group netstat_group = {
747	.name  = "statistics",
748	.attrs  = netstat_attrs,
749};
750
751static struct attribute *wireless_attrs[] = {
752	NULL
753};
754
755static const struct attribute_group wireless_group = {
756	.name = "wireless",
757	.attrs = wireless_attrs,
758};
759
760static bool wireless_group_needed(struct net_device *ndev)
761{
762#if IS_ENABLED(CONFIG_CFG80211)
763	if (ndev->ieee80211_ptr)
764		return true;
765#endif
766#if IS_ENABLED(CONFIG_WIRELESS_EXT)
767	if (ndev->wireless_handlers)
768		return true;
769#endif
770	return false;
771}
772
773#else /* CONFIG_SYSFS */
774#define net_class_groups	NULL
775#endif /* CONFIG_SYSFS */
776
777#ifdef CONFIG_SYSFS
778#define to_rx_queue_attr(_attr) \
779	container_of(_attr, struct rx_queue_attribute, attr)
780
781#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
782
783static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
784				  char *buf)
785{
786	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
787	struct netdev_rx_queue *queue = to_rx_queue(kobj);
788
789	if (!attribute->show)
790		return -EIO;
791
792	return attribute->show(queue, buf);
793}
794
795static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
796				   const char *buf, size_t count)
797{
798	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
799	struct netdev_rx_queue *queue = to_rx_queue(kobj);
800
801	if (!attribute->store)
802		return -EIO;
803
804	return attribute->store(queue, buf, count);
805}
806
807static const struct sysfs_ops rx_queue_sysfs_ops = {
808	.show = rx_queue_attr_show,
809	.store = rx_queue_attr_store,
810};
811
812#ifdef CONFIG_RPS
813static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
814{
815	struct rps_map *map;
816	cpumask_var_t mask;
817	int i, len;
818
819	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
820		return -ENOMEM;
821
822	rcu_read_lock();
823	map = rcu_dereference(queue->rps_map);
824	if (map)
825		for (i = 0; i < map->len; i++)
826			cpumask_set_cpu(map->cpus[i], mask);
827
828	len = sysfs_emit(buf, "%*pb\n", cpumask_pr_args(mask));
829	rcu_read_unlock();
830	free_cpumask_var(mask);
831
832	return len < PAGE_SIZE ? len : -EINVAL;
833}
834
835static int netdev_rx_queue_set_rps_mask(struct netdev_rx_queue *queue,
836					cpumask_var_t mask)
837{
838	static DEFINE_MUTEX(rps_map_mutex);
839	struct rps_map *old_map, *map;
840	int cpu, i;
841
842	map = kzalloc(max_t(unsigned int,
843			    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
844		      GFP_KERNEL);
845	if (!map)
846		return -ENOMEM;
847
848	i = 0;
849	for_each_cpu_and(cpu, mask, cpu_online_mask)
850		map->cpus[i++] = cpu;
851
852	if (i) {
853		map->len = i;
854	} else {
855		kfree(map);
856		map = NULL;
857	}
858
859	mutex_lock(&rps_map_mutex);
860	old_map = rcu_dereference_protected(queue->rps_map,
861					    mutex_is_locked(&rps_map_mutex));
862	rcu_assign_pointer(queue->rps_map, map);
863
864	if (map)
865		static_branch_inc(&rps_needed);
866	if (old_map)
867		static_branch_dec(&rps_needed);
868
869	mutex_unlock(&rps_map_mutex);
870
871	if (old_map)
872		kfree_rcu(old_map, rcu);
873	return 0;
874}
875
876int rps_cpumask_housekeeping(struct cpumask *mask)
877{
878	if (!cpumask_empty(mask)) {
879		cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
880		cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
881		if (cpumask_empty(mask))
882			return -EINVAL;
883	}
884	return 0;
885}
886
887static ssize_t store_rps_map(struct netdev_rx_queue *queue,
888			     const char *buf, size_t len)
889{
890	cpumask_var_t mask;
891	int err;
892
893	if (!capable(CAP_NET_ADMIN))
894		return -EPERM;
895
896	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
897		return -ENOMEM;
898
899	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
900	if (err)
901		goto out;
902
903	err = rps_cpumask_housekeeping(mask);
904	if (err)
905		goto out;
906
907	err = netdev_rx_queue_set_rps_mask(queue, mask);
908
909out:
910	free_cpumask_var(mask);
911	return err ? : len;
912}
913
914static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
915					   char *buf)
916{
917	struct rps_dev_flow_table *flow_table;
918	unsigned long val = 0;
919
920	rcu_read_lock();
921	flow_table = rcu_dereference(queue->rps_flow_table);
922	if (flow_table)
923		val = (unsigned long)flow_table->mask + 1;
924	rcu_read_unlock();
925
926	return sysfs_emit(buf, "%lu\n", val);
927}
928
929static void rps_dev_flow_table_release(struct rcu_head *rcu)
930{
931	struct rps_dev_flow_table *table = container_of(rcu,
932	    struct rps_dev_flow_table, rcu);
933	vfree(table);
934}
935
936static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
937					    const char *buf, size_t len)
938{
939	unsigned long mask, count;
940	struct rps_dev_flow_table *table, *old_table;
941	static DEFINE_SPINLOCK(rps_dev_flow_lock);
942	int rc;
943
944	if (!capable(CAP_NET_ADMIN))
945		return -EPERM;
946
947	rc = kstrtoul(buf, 0, &count);
948	if (rc < 0)
949		return rc;
950
951	if (count) {
952		mask = count - 1;
953		/* mask = roundup_pow_of_two(count) - 1;
954		 * without overflows...
955		 */
956		while ((mask | (mask >> 1)) != mask)
957			mask |= (mask >> 1);
958		/* On 64 bit arches, must check mask fits in table->mask (u32),
959		 * and on 32bit arches, must check
960		 * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow.
961		 */
962#if BITS_PER_LONG > 32
963		if (mask > (unsigned long)(u32)mask)
964			return -EINVAL;
965#else
966		if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1))
967				/ sizeof(struct rps_dev_flow)) {
968			/* Enforce a limit to prevent overflow */
969			return -EINVAL;
970		}
971#endif
972		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1));
973		if (!table)
974			return -ENOMEM;
975
976		table->mask = mask;
977		for (count = 0; count <= mask; count++)
978			table->flows[count].cpu = RPS_NO_CPU;
979	} else {
980		table = NULL;
981	}
982
983	spin_lock(&rps_dev_flow_lock);
984	old_table = rcu_dereference_protected(queue->rps_flow_table,
985					      lockdep_is_held(&rps_dev_flow_lock));
986	rcu_assign_pointer(queue->rps_flow_table, table);
987	spin_unlock(&rps_dev_flow_lock);
988
989	if (old_table)
990		call_rcu(&old_table->rcu, rps_dev_flow_table_release);
991
992	return len;
993}
994
995static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
996	= __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map);
997
998static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
999	= __ATTR(rps_flow_cnt, 0644,
1000		 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
1001#endif /* CONFIG_RPS */
1002
1003static struct attribute *rx_queue_default_attrs[] __ro_after_init = {
1004#ifdef CONFIG_RPS
1005	&rps_cpus_attribute.attr,
1006	&rps_dev_flow_table_cnt_attribute.attr,
1007#endif
1008	NULL
1009};
1010ATTRIBUTE_GROUPS(rx_queue_default);
1011
1012static void rx_queue_release(struct kobject *kobj)
1013{
1014	struct netdev_rx_queue *queue = to_rx_queue(kobj);
1015#ifdef CONFIG_RPS
1016	struct rps_map *map;
1017	struct rps_dev_flow_table *flow_table;
1018
1019	map = rcu_dereference_protected(queue->rps_map, 1);
1020	if (map) {
1021		RCU_INIT_POINTER(queue->rps_map, NULL);
1022		kfree_rcu(map, rcu);
1023	}
1024
1025	flow_table = rcu_dereference_protected(queue->rps_flow_table, 1);
1026	if (flow_table) {
1027		RCU_INIT_POINTER(queue->rps_flow_table, NULL);
1028		call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
1029	}
1030#endif
1031
1032	memset(kobj, 0, sizeof(*kobj));
1033	netdev_put(queue->dev, &queue->dev_tracker);
1034}
1035
1036static const void *rx_queue_namespace(const struct kobject *kobj)
1037{
1038	struct netdev_rx_queue *queue = to_rx_queue(kobj);
1039	struct device *dev = &queue->dev->dev;
1040	const void *ns = NULL;
1041
1042	if (dev->class && dev->class->ns_type)
1043		ns = dev->class->namespace(dev);
1044
1045	return ns;
1046}
1047
1048static void rx_queue_get_ownership(const struct kobject *kobj,
1049				   kuid_t *uid, kgid_t *gid)
1050{
1051	const struct net *net = rx_queue_namespace(kobj);
1052
1053	net_ns_get_ownership(net, uid, gid);
1054}
1055
1056static const struct kobj_type rx_queue_ktype = {
1057	.sysfs_ops = &rx_queue_sysfs_ops,
1058	.release = rx_queue_release,
1059	.default_groups = rx_queue_default_groups,
1060	.namespace = rx_queue_namespace,
1061	.get_ownership = rx_queue_get_ownership,
1062};
1063
1064static int rx_queue_default_mask(struct net_device *dev,
1065				 struct netdev_rx_queue *queue)
1066{
1067#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
1068	struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask);
1069
1070	if (rps_default_mask && !cpumask_empty(rps_default_mask))
1071		return netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
1072#endif
1073	return 0;
1074}
1075
1076static int rx_queue_add_kobject(struct net_device *dev, int index)
1077{
1078	struct netdev_rx_queue *queue = dev->_rx + index;
1079	struct kobject *kobj = &queue->kobj;
1080	int error = 0;
1081
1082	/* Kobject_put later will trigger rx_queue_release call which
1083	 * decreases dev refcount: Take that reference here
1084	 */
1085	netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
1086
1087	kobj->kset = dev->queues_kset;
1088	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
1089				     "rx-%u", index);
1090	if (error)
1091		goto err;
1092
1093	if (dev->sysfs_rx_queue_group) {
1094		error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
1095		if (error)
1096			goto err;
1097	}
1098
1099	error = rx_queue_default_mask(dev, queue);
1100	if (error)
1101		goto err;
1102
1103	kobject_uevent(kobj, KOBJ_ADD);
1104
1105	return error;
1106
1107err:
1108	kobject_put(kobj);
1109	return error;
1110}
1111
1112static int rx_queue_change_owner(struct net_device *dev, int index, kuid_t kuid,
1113				 kgid_t kgid)
1114{
1115	struct netdev_rx_queue *queue = dev->_rx + index;
1116	struct kobject *kobj = &queue->kobj;
1117	int error;
1118
1119	error = sysfs_change_owner(kobj, kuid, kgid);
1120	if (error)
1121		return error;
1122
1123	if (dev->sysfs_rx_queue_group)
1124		error = sysfs_group_change_owner(
1125			kobj, dev->sysfs_rx_queue_group, kuid, kgid);
1126
1127	return error;
1128}
1129#endif /* CONFIG_SYSFS */
1130
1131int
1132net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
1133{
1134#ifdef CONFIG_SYSFS
1135	int i;
1136	int error = 0;
1137
1138#ifndef CONFIG_RPS
1139	if (!dev->sysfs_rx_queue_group)
1140		return 0;
1141#endif
1142	for (i = old_num; i < new_num; i++) {
1143		error = rx_queue_add_kobject(dev, i);
1144		if (error) {
1145			new_num = old_num;
1146			break;
1147		}
1148	}
1149
1150	while (--i >= new_num) {
1151		struct kobject *kobj = &dev->_rx[i].kobj;
1152
1153		if (!refcount_read(&dev_net(dev)->ns.count))
1154			kobj->uevent_suppress = 1;
1155		if (dev->sysfs_rx_queue_group)
1156			sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
1157		kobject_put(kobj);
1158	}
1159
1160	return error;
1161#else
1162	return 0;
1163#endif
1164}
1165
1166static int net_rx_queue_change_owner(struct net_device *dev, int num,
1167				     kuid_t kuid, kgid_t kgid)
1168{
1169#ifdef CONFIG_SYSFS
1170	int error = 0;
1171	int i;
1172
1173#ifndef CONFIG_RPS
1174	if (!dev->sysfs_rx_queue_group)
1175		return 0;
1176#endif
1177	for (i = 0; i < num; i++) {
1178		error = rx_queue_change_owner(dev, i, kuid, kgid);
1179		if (error)
1180			break;
1181	}
1182
1183	return error;
1184#else
1185	return 0;
1186#endif
1187}
1188
1189#ifdef CONFIG_SYSFS
1190/*
1191 * netdev_queue sysfs structures and functions.
1192 */
1193struct netdev_queue_attribute {
1194	struct attribute attr;
1195	ssize_t (*show)(struct netdev_queue *queue, char *buf);
1196	ssize_t (*store)(struct netdev_queue *queue,
1197			 const char *buf, size_t len);
1198};
1199#define to_netdev_queue_attr(_attr) \
1200	container_of(_attr, struct netdev_queue_attribute, attr)
1201
1202#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
1203
1204static ssize_t netdev_queue_attr_show(struct kobject *kobj,
1205				      struct attribute *attr, char *buf)
1206{
1207	const struct netdev_queue_attribute *attribute
1208		= to_netdev_queue_attr(attr);
1209	struct netdev_queue *queue = to_netdev_queue(kobj);
1210
1211	if (!attribute->show)
1212		return -EIO;
1213
1214	return attribute->show(queue, buf);
1215}
1216
1217static ssize_t netdev_queue_attr_store(struct kobject *kobj,
1218				       struct attribute *attr,
1219				       const char *buf, size_t count)
1220{
1221	const struct netdev_queue_attribute *attribute
1222		= to_netdev_queue_attr(attr);
1223	struct netdev_queue *queue = to_netdev_queue(kobj);
1224
1225	if (!attribute->store)
1226		return -EIO;
1227
1228	return attribute->store(queue, buf, count);
1229}
1230
1231static const struct sysfs_ops netdev_queue_sysfs_ops = {
1232	.show = netdev_queue_attr_show,
1233	.store = netdev_queue_attr_store,
1234};
1235
1236static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
1237{
1238	unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);
1239
1240	return sysfs_emit(buf, fmt_ulong, trans_timeout);
1241}
1242
1243static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
1244{
1245	struct net_device *dev = queue->dev;
1246	unsigned int i;
1247
1248	i = queue - dev->_tx;
1249	BUG_ON(i >= dev->num_tx_queues);
1250
1251	return i;
1252}
1253
1254static ssize_t traffic_class_show(struct netdev_queue *queue,
1255				  char *buf)
1256{
1257	struct net_device *dev = queue->dev;
1258	int num_tc, tc;
1259	int index;
1260
1261	if (!netif_is_multiqueue(dev))
1262		return -ENOENT;
1263
1264	if (!rtnl_trylock())
1265		return restart_syscall();
1266
1267	index = get_netdev_queue_index(queue);
1268
1269	/* If queue belongs to subordinate dev use its TC mapping */
1270	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1271
1272	num_tc = dev->num_tc;
1273	tc = netdev_txq_to_tc(dev, index);
1274
1275	rtnl_unlock();
1276
1277	if (tc < 0)
1278		return -EINVAL;
1279
1280	/* We can report the traffic class one of two ways:
1281	 * Subordinate device traffic classes are reported with the traffic
1282	 * class first, and then the subordinate class so for example TC0 on
1283	 * subordinate device 2 will be reported as "0-2". If the queue
1284	 * belongs to the root device it will be reported with just the
1285	 * traffic class, so just "0" for TC 0 for example.
1286	 */
1287	return num_tc < 0 ? sysfs_emit(buf, "%d%d\n", tc, num_tc) :
1288			    sysfs_emit(buf, "%d\n", tc);
1289}
1290
1291#ifdef CONFIG_XPS
1292static ssize_t tx_maxrate_show(struct netdev_queue *queue,
1293			       char *buf)
1294{
1295	return sysfs_emit(buf, "%lu\n", queue->tx_maxrate);
1296}
1297
1298static ssize_t tx_maxrate_store(struct netdev_queue *queue,
1299				const char *buf, size_t len)
1300{
1301	struct net_device *dev = queue->dev;
1302	int err, index = get_netdev_queue_index(queue);
1303	u32 rate = 0;
1304
1305	if (!capable(CAP_NET_ADMIN))
1306		return -EPERM;
1307
1308	/* The check is also done later; this helps returning early without
1309	 * hitting the trylock/restart below.
1310	 */
1311	if (!dev->netdev_ops->ndo_set_tx_maxrate)
1312		return -EOPNOTSUPP;
1313
1314	err = kstrtou32(buf, 10, &rate);
1315	if (err < 0)
1316		return err;
1317
1318	if (!rtnl_trylock())
1319		return restart_syscall();
1320
1321	err = -EOPNOTSUPP;
1322	if (dev->netdev_ops->ndo_set_tx_maxrate)
1323		err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
1324
1325	rtnl_unlock();
1326	if (!err) {
1327		queue->tx_maxrate = rate;
1328		return len;
1329	}
1330	return err;
1331}
1332
1333static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init
1334	= __ATTR_RW(tx_maxrate);
1335#endif
1336
1337static struct netdev_queue_attribute queue_trans_timeout __ro_after_init
1338	= __ATTR_RO(tx_timeout);
1339
1340static struct netdev_queue_attribute queue_traffic_class __ro_after_init
1341	= __ATTR_RO(traffic_class);
1342
1343#ifdef CONFIG_BQL
1344/*
1345 * Byte queue limits sysfs structures and functions.
1346 */
1347static ssize_t bql_show(char *buf, unsigned int value)
1348{
1349	return sysfs_emit(buf, "%u\n", value);
1350}
1351
1352static ssize_t bql_set(const char *buf, const size_t count,
1353		       unsigned int *pvalue)
1354{
1355	unsigned int value;
1356	int err;
1357
1358	if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) {
1359		value = DQL_MAX_LIMIT;
1360	} else {
1361		err = kstrtouint(buf, 10, &value);
1362		if (err < 0)
1363			return err;
1364		if (value > DQL_MAX_LIMIT)
1365			return -EINVAL;
1366	}
1367
1368	*pvalue = value;
1369
1370	return count;
1371}
1372
1373static ssize_t bql_show_hold_time(struct netdev_queue *queue,
1374				  char *buf)
1375{
1376	struct dql *dql = &queue->dql;
1377
1378	return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
1379}
1380
1381static ssize_t bql_set_hold_time(struct netdev_queue *queue,
1382				 const char *buf, size_t len)
1383{
1384	struct dql *dql = &queue->dql;
1385	unsigned int value;
1386	int err;
1387
1388	err = kstrtouint(buf, 10, &value);
1389	if (err < 0)
1390		return err;
1391
1392	dql->slack_hold_time = msecs_to_jiffies(value);
1393
1394	return len;
1395}
1396
1397static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
1398	= __ATTR(hold_time, 0644,
1399		 bql_show_hold_time, bql_set_hold_time);
1400
1401static ssize_t bql_show_inflight(struct netdev_queue *queue,
1402				 char *buf)
1403{
1404	struct dql *dql = &queue->dql;
1405
1406	return sysfs_emit(buf, "%u\n", dql->num_queued - dql->num_completed);
1407}
1408
1409static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
1410	__ATTR(inflight, 0444, bql_show_inflight, NULL);
1411
1412#define BQL_ATTR(NAME, FIELD)						\
1413static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,		\
1414				 char *buf)				\
1415{									\
1416	return bql_show(buf, queue->dql.FIELD);				\
1417}									\
1418									\
1419static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,		\
1420				const char *buf, size_t len)		\
1421{									\
1422	return bql_set(buf, len, &queue->dql.FIELD);			\
1423}									\
1424									\
1425static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
1426	= __ATTR(NAME, 0644,				\
1427		 bql_show_ ## NAME, bql_set_ ## NAME)
1428
1429BQL_ATTR(limit, limit);
1430BQL_ATTR(limit_max, max_limit);
1431BQL_ATTR(limit_min, min_limit);
1432
1433static struct attribute *dql_attrs[] __ro_after_init = {
1434	&bql_limit_attribute.attr,
1435	&bql_limit_max_attribute.attr,
1436	&bql_limit_min_attribute.attr,
1437	&bql_hold_time_attribute.attr,
1438	&bql_inflight_attribute.attr,
1439	NULL
1440};
1441
1442static const struct attribute_group dql_group = {
1443	.name  = "byte_queue_limits",
1444	.attrs  = dql_attrs,
1445};
1446#endif /* CONFIG_BQL */
1447
1448#ifdef CONFIG_XPS
1449static ssize_t xps_queue_show(struct net_device *dev, unsigned int index,
1450			      int tc, char *buf, enum xps_map_type type)
1451{
1452	struct xps_dev_maps *dev_maps;
1453	unsigned long *mask;
1454	unsigned int nr_ids;
1455	int j, len;
1456
1457	rcu_read_lock();
1458	dev_maps = rcu_dereference(dev->xps_maps[type]);
1459
1460	/* Default to nr_cpu_ids/dev->num_rx_queues and do not just return 0
1461	 * when dev_maps hasn't been allocated yet, to be backward compatible.
1462	 */
1463	nr_ids = dev_maps ? dev_maps->nr_ids :
1464		 (type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues);
1465
1466	mask = bitmap_zalloc(nr_ids, GFP_NOWAIT);
1467	if (!mask) {
1468		rcu_read_unlock();
1469		return -ENOMEM;
1470	}
1471
1472	if (!dev_maps || tc >= dev_maps->num_tc)
1473		goto out_no_maps;
1474
1475	for (j = 0; j < nr_ids; j++) {
1476		int i, tci = j * dev_maps->num_tc + tc;
1477		struct xps_map *map;
1478
1479		map = rcu_dereference(dev_maps->attr_map[tci]);
1480		if (!map)
1481			continue;
1482
1483		for (i = map->len; i--;) {
1484			if (map->queues[i] == index) {
1485				__set_bit(j, mask);
1486				break;
1487			}
1488		}
1489	}
1490out_no_maps:
1491	rcu_read_unlock();
1492
1493	len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids);
1494	bitmap_free(mask);
1495
1496	return len < PAGE_SIZE ? len : -EINVAL;
1497}
1498
1499static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
1500{
1501	struct net_device *dev = queue->dev;
1502	unsigned int index;
1503	int len, tc;
1504
1505	if (!netif_is_multiqueue(dev))
1506		return -ENOENT;
1507
1508	index = get_netdev_queue_index(queue);
1509
1510	if (!rtnl_trylock())
1511		return restart_syscall();
1512
1513	/* If queue belongs to subordinate dev use its map */
1514	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1515
1516	tc = netdev_txq_to_tc(dev, index);
1517	if (tc < 0) {
1518		rtnl_unlock();
1519		return -EINVAL;
1520	}
1521
1522	/* Make sure the subordinate device can't be freed */
1523	get_device(&dev->dev);
1524	rtnl_unlock();
1525
1526	len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);
1527
1528	put_device(&dev->dev);
1529	return len;
1530}
1531
1532static ssize_t xps_cpus_store(struct netdev_queue *queue,
1533			      const char *buf, size_t len)
1534{
1535	struct net_device *dev = queue->dev;
1536	unsigned int index;
1537	cpumask_var_t mask;
1538	int err;
1539
1540	if (!netif_is_multiqueue(dev))
1541		return -ENOENT;
1542
1543	if (!capable(CAP_NET_ADMIN))
1544		return -EPERM;
1545
1546	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
1547		return -ENOMEM;
1548
1549	index = get_netdev_queue_index(queue);
1550
1551	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
1552	if (err) {
1553		free_cpumask_var(mask);
1554		return err;
1555	}
1556
1557	if (!rtnl_trylock()) {
1558		free_cpumask_var(mask);
1559		return restart_syscall();
1560	}
1561
1562	err = netif_set_xps_queue(dev, mask, index);
1563	rtnl_unlock();
1564
1565	free_cpumask_var(mask);
1566
1567	return err ? : len;
1568}
1569
1570static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
1571	= __ATTR_RW(xps_cpus);
1572
1573static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
1574{
1575	struct net_device *dev = queue->dev;
1576	unsigned int index;
1577	int tc;
1578
1579	index = get_netdev_queue_index(queue);
1580
1581	if (!rtnl_trylock())
1582		return restart_syscall();
1583
1584	tc = netdev_txq_to_tc(dev, index);
1585	rtnl_unlock();
1586	if (tc < 0)
1587		return -EINVAL;
1588
1589	return xps_queue_show(dev, index, tc, buf, XPS_RXQS);
1590}
1591
1592static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
1593			      size_t len)
1594{
1595	struct net_device *dev = queue->dev;
1596	struct net *net = dev_net(dev);
1597	unsigned long *mask;
1598	unsigned int index;
1599	int err;
1600
1601	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1602		return -EPERM;
1603
1604	mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
1605	if (!mask)
1606		return -ENOMEM;
1607
1608	index = get_netdev_queue_index(queue);
1609
1610	err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
1611	if (err) {
1612		bitmap_free(mask);
1613		return err;
1614	}
1615
1616	if (!rtnl_trylock()) {
1617		bitmap_free(mask);
1618		return restart_syscall();
1619	}
1620
1621	cpus_read_lock();
1622	err = __netif_set_xps_queue(dev, mask, index, XPS_RXQS);
1623	cpus_read_unlock();
1624
1625	rtnl_unlock();
1626
1627	bitmap_free(mask);
1628	return err ? : len;
1629}
1630
1631static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
1632	= __ATTR_RW(xps_rxqs);
1633#endif /* CONFIG_XPS */
1634
1635static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
1636	&queue_trans_timeout.attr,
1637	&queue_traffic_class.attr,
1638#ifdef CONFIG_XPS
1639	&xps_cpus_attribute.attr,
1640	&xps_rxqs_attribute.attr,
1641	&queue_tx_maxrate.attr,
1642#endif
1643	NULL
1644};
1645ATTRIBUTE_GROUPS(netdev_queue_default);
1646
1647static void netdev_queue_release(struct kobject *kobj)
1648{
1649	struct netdev_queue *queue = to_netdev_queue(kobj);
1650
1651	memset(kobj, 0, sizeof(*kobj));
1652	netdev_put(queue->dev, &queue->dev_tracker);
1653}
1654
1655static const void *netdev_queue_namespace(const struct kobject *kobj)
1656{
1657	struct netdev_queue *queue = to_netdev_queue(kobj);
1658	struct device *dev = &queue->dev->dev;
1659	const void *ns = NULL;
1660
1661	if (dev->class && dev->class->ns_type)
1662		ns = dev->class->namespace(dev);
1663
1664	return ns;
1665}
1666
1667static void netdev_queue_get_ownership(const struct kobject *kobj,
1668				       kuid_t *uid, kgid_t *gid)
1669{
1670	const struct net *net = netdev_queue_namespace(kobj);
1671
1672	net_ns_get_ownership(net, uid, gid);
1673}
1674
1675static const struct kobj_type netdev_queue_ktype = {
1676	.sysfs_ops = &netdev_queue_sysfs_ops,
1677	.release = netdev_queue_release,
1678	.default_groups = netdev_queue_default_groups,
1679	.namespace = netdev_queue_namespace,
1680	.get_ownership = netdev_queue_get_ownership,
1681};
1682
1683static int netdev_queue_add_kobject(struct net_device *dev, int index)
1684{
1685	struct netdev_queue *queue = dev->_tx + index;
1686	struct kobject *kobj = &queue->kobj;
1687	int error = 0;
1688
1689	/* Kobject_put later will trigger netdev_queue_release call
1690	 * which decreases dev refcount: Take that reference here
1691	 */
1692	netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
1693
1694	kobj->kset = dev->queues_kset;
1695	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
1696				     "tx-%u", index);
1697	if (error)
1698		goto err;
1699
1700#ifdef CONFIG_BQL
1701	error = sysfs_create_group(kobj, &dql_group);
1702	if (error)
1703		goto err;
1704#endif
1705
1706	kobject_uevent(kobj, KOBJ_ADD);
1707	return 0;
1708
1709err:
1710	kobject_put(kobj);
1711	return error;
1712}
1713
1714static int tx_queue_change_owner(struct net_device *ndev, int index,
1715				 kuid_t kuid, kgid_t kgid)
1716{
1717	struct netdev_queue *queue = ndev->_tx + index;
1718	struct kobject *kobj = &queue->kobj;
1719	int error;
1720
1721	error = sysfs_change_owner(kobj, kuid, kgid);
1722	if (error)
1723		return error;
1724
1725#ifdef CONFIG_BQL
1726	error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid);
1727#endif
1728	return error;
1729}
1730#endif /* CONFIG_SYSFS */
1731
1732int
1733netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
1734{
1735#ifdef CONFIG_SYSFS
1736	int i;
1737	int error = 0;
1738
1739	/* Tx queue kobjects are allowed to be updated when a device is being
1740	 * unregistered, but solely to remove queues from qdiscs. Any path
1741	 * adding queues should be fixed.
1742	 */
1743	WARN(dev->reg_state == NETREG_UNREGISTERING && new_num > old_num,
1744	     "New queues can't be registered after device unregistration.");
1745
1746	for (i = old_num; i < new_num; i++) {
1747		error = netdev_queue_add_kobject(dev, i);
1748		if (error) {
1749			new_num = old_num;
1750			break;
1751		}
1752	}
1753
1754	while (--i >= new_num) {
1755		struct netdev_queue *queue = dev->_tx + i;
1756
1757		if (!refcount_read(&dev_net(dev)->ns.count))
1758			queue->kobj.uevent_suppress = 1;
1759#ifdef CONFIG_BQL
1760		sysfs_remove_group(&queue->kobj, &dql_group);
1761#endif
1762		kobject_put(&queue->kobj);
1763	}
1764
1765	return error;
1766#else
1767	return 0;
1768#endif /* CONFIG_SYSFS */
1769}
1770
1771static int net_tx_queue_change_owner(struct net_device *dev, int num,
1772				     kuid_t kuid, kgid_t kgid)
1773{
1774#ifdef CONFIG_SYSFS
1775	int error = 0;
1776	int i;
1777
1778	for (i = 0; i < num; i++) {
1779		error = tx_queue_change_owner(dev, i, kuid, kgid);
1780		if (error)
1781			break;
1782	}
1783
1784	return error;
1785#else
1786	return 0;
1787#endif /* CONFIG_SYSFS */
1788}
1789
1790static int register_queue_kobjects(struct net_device *dev)
1791{
1792	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
1793
1794#ifdef CONFIG_SYSFS
1795	dev->queues_kset = kset_create_and_add("queues",
1796					       NULL, &dev->dev.kobj);
1797	if (!dev->queues_kset)
1798		return -ENOMEM;
1799	real_rx = dev->real_num_rx_queues;
1800#endif
1801	real_tx = dev->real_num_tx_queues;
1802
1803	error = net_rx_queue_update_kobjects(dev, 0, real_rx);
1804	if (error)
1805		goto error;
1806	rxq = real_rx;
1807
1808	error = netdev_queue_update_kobjects(dev, 0, real_tx);
1809	if (error)
1810		goto error;
1811	txq = real_tx;
1812
1813	return 0;
1814
1815error:
1816	netdev_queue_update_kobjects(dev, txq, 0);
1817	net_rx_queue_update_kobjects(dev, rxq, 0);
1818#ifdef CONFIG_SYSFS
1819	kset_unregister(dev->queues_kset);
1820#endif
1821	return error;
1822}
1823
1824static int queue_change_owner(struct net_device *ndev, kuid_t kuid, kgid_t kgid)
1825{
1826	int error = 0, real_rx = 0, real_tx = 0;
1827
1828#ifdef CONFIG_SYSFS
1829	if (ndev->queues_kset) {
1830		error = sysfs_change_owner(&ndev->queues_kset->kobj, kuid, kgid);
1831		if (error)
1832			return error;
1833	}
1834	real_rx = ndev->real_num_rx_queues;
1835#endif
1836	real_tx = ndev->real_num_tx_queues;
1837
1838	error = net_rx_queue_change_owner(ndev, real_rx, kuid, kgid);
1839	if (error)
1840		return error;
1841
1842	error = net_tx_queue_change_owner(ndev, real_tx, kuid, kgid);
1843	if (error)
1844		return error;
1845
1846	return 0;
1847}
1848
1849static void remove_queue_kobjects(struct net_device *dev)
1850{
1851	int real_rx = 0, real_tx = 0;
1852
1853#ifdef CONFIG_SYSFS
1854	real_rx = dev->real_num_rx_queues;
1855#endif
1856	real_tx = dev->real_num_tx_queues;
1857
1858	net_rx_queue_update_kobjects(dev, real_rx, 0);
1859	netdev_queue_update_kobjects(dev, real_tx, 0);
1860
1861	dev->real_num_rx_queues = 0;
1862	dev->real_num_tx_queues = 0;
1863#ifdef CONFIG_SYSFS
1864	kset_unregister(dev->queues_kset);
1865#endif
1866}
1867
1868static bool net_current_may_mount(void)
1869{
1870	struct net *net = current->nsproxy->net_ns;
1871
1872	return ns_capable(net->user_ns, CAP_SYS_ADMIN);
1873}
1874
1875static void *net_grab_current_ns(void)
1876{
1877	struct net *ns = current->nsproxy->net_ns;
1878#ifdef CONFIG_NET_NS
1879	if (ns)
1880		refcount_inc(&ns->passive);
1881#endif
1882	return ns;
1883}
1884
1885static const void *net_initial_ns(void)
1886{
1887	return &init_net;
1888}
1889
1890static const void *net_netlink_ns(struct sock *sk)
1891{
1892	return sock_net(sk);
1893}
1894
1895const struct kobj_ns_type_operations net_ns_type_operations = {
1896	.type = KOBJ_NS_TYPE_NET,
1897	.current_may_mount = net_current_may_mount,
1898	.grab_current_ns = net_grab_current_ns,
1899	.netlink_ns = net_netlink_ns,
1900	.initial_ns = net_initial_ns,
1901	.drop_ns = net_drop_ns,
1902};
1903EXPORT_SYMBOL_GPL(net_ns_type_operations);
1904
1905static int netdev_uevent(const struct device *d, struct kobj_uevent_env *env)
1906{
1907	const struct net_device *dev = to_net_dev(d);
1908	int retval;
1909
1910	/* pass interface to uevent. */
1911	retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
1912	if (retval)
1913		goto exit;
1914
1915	/* pass ifindex to uevent.
1916	 * ifindex is useful as it won't change (interface name may change)
1917	 * and is what RtNetlink uses natively.
1918	 */
1919	retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
1920
1921exit:
1922	return retval;
1923}
1924
1925/*
1926 *	netdev_release -- destroy and free a dead device.
1927 *	Called when last reference to device kobject is gone.
1928 */
1929static void netdev_release(struct device *d)
1930{
1931	struct net_device *dev = to_net_dev(d);
1932
1933	BUG_ON(dev->reg_state != NETREG_RELEASED);
1934
1935	/* no need to wait for rcu grace period:
1936	 * device is dead and about to be freed.
1937	 */
1938	kfree(rcu_access_pointer(dev->ifalias));
1939	netdev_freemem(dev);
1940}
1941
1942static const void *net_namespace(const struct device *d)
1943{
1944	const struct net_device *dev = to_net_dev(d);
1945
1946	return dev_net(dev);
1947}
1948
1949static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid)
1950{
1951	const struct net_device *dev = to_net_dev(d);
1952	const struct net *net = dev_net(dev);
1953
1954	net_ns_get_ownership(net, uid, gid);
1955}
1956
1957static struct class net_class __ro_after_init = {
1958	.name = "net",
1959	.dev_release = netdev_release,
1960	.dev_groups = net_class_groups,
1961	.dev_uevent = netdev_uevent,
1962	.ns_type = &net_ns_type_operations,
1963	.namespace = net_namespace,
1964	.get_ownership = net_get_ownership,
1965};
1966
1967#ifdef CONFIG_OF
1968static int of_dev_node_match(struct device *dev, const void *data)
1969{
1970	for (; dev; dev = dev->parent) {
1971		if (dev->of_node == data)
1972			return 1;
1973	}
1974
1975	return 0;
1976}
1977
1978/*
1979 * of_find_net_device_by_node - lookup the net device for the device node
1980 * @np: OF device node
1981 *
1982 * Looks up the net_device structure corresponding with the device node.
1983 * If successful, returns a pointer to the net_device with the embedded
1984 * struct device refcount incremented by one, or NULL on failure. The
1985 * refcount must be dropped when done with the net_device.
1986 */
1987struct net_device *of_find_net_device_by_node(struct device_node *np)
1988{
1989	struct device *dev;
1990
1991	dev = class_find_device(&net_class, NULL, np, of_dev_node_match);
1992	if (!dev)
1993		return NULL;
1994
1995	return to_net_dev(dev);
1996}
1997EXPORT_SYMBOL(of_find_net_device_by_node);
1998#endif
1999
2000/* Delete sysfs entries but hold kobject reference until after all
2001 * netdev references are gone.
2002 */
2003void netdev_unregister_kobject(struct net_device *ndev)
2004{
2005	struct device *dev = &ndev->dev;
2006
2007	if (!refcount_read(&dev_net(ndev)->ns.count))
2008		dev_set_uevent_suppress(dev, 1);
2009
2010	kobject_get(&dev->kobj);
2011
2012	remove_queue_kobjects(ndev);
2013
2014	pm_runtime_set_memalloc_noio(dev, false);
2015
2016	device_del(dev);
2017}
2018
2019/* Create sysfs entries for network device. */
2020int netdev_register_kobject(struct net_device *ndev)
2021{
2022	struct device *dev = &ndev->dev;
2023	const struct attribute_group **groups = ndev->sysfs_groups;
2024	int error = 0;
2025
2026	device_initialize(dev);
2027	dev->class = &net_class;
2028	dev->platform_data = ndev;
2029	dev->groups = groups;
2030
2031	dev_set_name(dev, "%s", ndev->name);
2032
2033#ifdef CONFIG_SYSFS
2034	/* Allow for a device specific group */
2035	if (*groups)
2036		groups++;
2037
2038	*groups++ = &netstat_group;
2039
2040	if (wireless_group_needed(ndev))
2041		*groups++ = &wireless_group;
2042#endif /* CONFIG_SYSFS */
2043
2044	error = device_add(dev);
2045	if (error)
2046		return error;
2047
2048	error = register_queue_kobjects(ndev);
2049	if (error) {
2050		device_del(dev);
2051		return error;
2052	}
2053
2054	pm_runtime_set_memalloc_noio(dev, true);
2055
2056	return error;
2057}
2058
2059/* Change owner for sysfs entries when moving network devices across network
2060 * namespaces owned by different user namespaces.
2061 */
2062int netdev_change_owner(struct net_device *ndev, const struct net *net_old,
2063			const struct net *net_new)
2064{
2065	kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
2066	kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
2067	struct device *dev = &ndev->dev;
2068	int error;
2069
2070	net_ns_get_ownership(net_old, &old_uid, &old_gid);
2071	net_ns_get_ownership(net_new, &new_uid, &new_gid);
2072
2073	/* The network namespace was changed but the owning user namespace is
2074	 * identical so there's no need to change the owner of sysfs entries.
2075	 */
2076	if (uid_eq(old_uid, new_uid) && gid_eq(old_gid, new_gid))
2077		return 0;
2078
2079	error = device_change_owner(dev, new_uid, new_gid);
2080	if (error)
2081		return error;
2082
2083	error = queue_change_owner(ndev, new_uid, new_gid);
2084	if (error)
2085		return error;
2086
2087	return 0;
2088}
2089
2090int netdev_class_create_file_ns(const struct class_attribute *class_attr,
2091				const void *ns)
2092{
2093	return class_create_file_ns(&net_class, class_attr, ns);
2094}
2095EXPORT_SYMBOL(netdev_class_create_file_ns);
2096
2097void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
2098				 const void *ns)
2099{
2100	class_remove_file_ns(&net_class, class_attr, ns);
2101}
2102EXPORT_SYMBOL(netdev_class_remove_file_ns);
2103
2104int __init netdev_kobject_init(void)
2105{
2106	kobj_ns_type_register(&net_ns_type_operations);
2107	return class_register(&net_class);
2108}
2109