xref: /kernel/linux/linux-6.6/net/ipv4/devinet.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *	NET3	IP device support routines.
4 *
5 *	Derived from the IP parts of dev.c 1.0.19
6 * 		Authors:	Ross Biro
7 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9 *
10 *	Additional Authors:
11 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 *	Changes:
15 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16 *					lists.
17 *		Cyrus Durgin:		updated for kmod
18 *		Matthias Andree:	in devinet_ioctl, compare label and
19 *					address (4.4BSD alias style support),
20 *					fall back to comparing just the label
21 *					if no match found.
22 */
23
24
25#include <linux/uaccess.h>
26#include <linux/bitops.h>
27#include <linux/capability.h>
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/kernel.h>
31#include <linux/sched/signal.h>
32#include <linux/string.h>
33#include <linux/mm.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/in.h>
37#include <linux/errno.h>
38#include <linux/interrupt.h>
39#include <linux/if_addr.h>
40#include <linux/if_ether.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/etherdevice.h>
44#include <linux/skbuff.h>
45#include <linux/init.h>
46#include <linux/notifier.h>
47#include <linux/inetdevice.h>
48#include <linux/igmp.h>
49#include <linux/slab.h>
50#include <linux/hash.h>
51#ifdef CONFIG_SYSCTL
52#include <linux/sysctl.h>
53#endif
54#include <linux/kmod.h>
55#include <linux/netconf.h>
56
57#include <net/arp.h>
58#include <net/ip.h>
59#include <net/route.h>
60#include <net/ip_fib.h>
61#include <net/rtnetlink.h>
62#include <net/net_namespace.h>
63#include <net/addrconf.h>
64
65#define IPV6ONLY_FLAGS	\
66		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69
70static struct ipv4_devconf ipv4_devconf = {
71	.data = {
72		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79	},
80};
81
82static struct ipv4_devconf ipv4_devconf_dflt = {
83	.data = {
84		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92	},
93};
94
95#define IPV4_DEVCONF_DFLT(net, attr) \
96	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97
98static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99	[IFA_LOCAL]     	= { .type = NLA_U32 },
100	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104	[IFA_FLAGS]		= { .type = NLA_U32 },
105	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107	[IFA_PROTO]		= { .type = NLA_U8 },
108};
109
110struct inet_fill_args {
111	u32 portid;
112	u32 seq;
113	int event;
114	unsigned int flags;
115	int netnsid;
116	int ifindex;
117};
118
119#define IN4_ADDR_HSIZE_SHIFT	8
120#define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121
122static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123
124static u32 inet_addr_hash(const struct net *net, __be32 addr)
125{
126	u32 val = (__force u32) addr ^ net_hash_mix(net);
127
128	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129}
130
131static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132{
133	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134
135	ASSERT_RTNL();
136	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137}
138
139static void inet_hash_remove(struct in_ifaddr *ifa)
140{
141	ASSERT_RTNL();
142	hlist_del_init_rcu(&ifa->hash);
143}
144
145/**
146 * __ip_dev_find - find the first device with a given source address.
147 * @net: the net namespace
148 * @addr: the source address
149 * @devref: if true, take a reference on the found device
150 *
151 * If a caller uses devref=false, it should be protected by RCU, or RTNL
152 */
153struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154{
155	struct net_device *result = NULL;
156	struct in_ifaddr *ifa;
157
158	rcu_read_lock();
159	ifa = inet_lookup_ifaddr_rcu(net, addr);
160	if (!ifa) {
161		struct flowi4 fl4 = { .daddr = addr };
162		struct fib_result res = { 0 };
163		struct fib_table *local;
164
165		/* Fallback to FIB local table so that communication
166		 * over loopback subnets work.
167		 */
168		local = fib_get_table(net, RT_TABLE_LOCAL);
169		if (local &&
170		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171		    res.type == RTN_LOCAL)
172			result = FIB_RES_DEV(res);
173	} else {
174		result = ifa->ifa_dev->dev;
175	}
176	if (result && devref)
177		dev_hold(result);
178	rcu_read_unlock();
179	return result;
180}
181EXPORT_SYMBOL(__ip_dev_find);
182
183/* called under RCU lock */
184struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185{
186	u32 hash = inet_addr_hash(net, addr);
187	struct in_ifaddr *ifa;
188
189	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190		if (ifa->ifa_local == addr &&
191		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192			return ifa;
193
194	return NULL;
195}
196
197static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198
199static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201static void inet_del_ifa(struct in_device *in_dev,
202			 struct in_ifaddr __rcu **ifap,
203			 int destroy);
204#ifdef CONFIG_SYSCTL
205static int devinet_sysctl_register(struct in_device *idev);
206static void devinet_sysctl_unregister(struct in_device *idev);
207#else
208static int devinet_sysctl_register(struct in_device *idev)
209{
210	return 0;
211}
212static void devinet_sysctl_unregister(struct in_device *idev)
213{
214}
215#endif
216
217/* Locks all the inet devices. */
218
219static struct in_ifaddr *inet_alloc_ifa(void)
220{
221	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222}
223
224static void inet_rcu_free_ifa(struct rcu_head *head)
225{
226	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227	if (ifa->ifa_dev)
228		in_dev_put(ifa->ifa_dev);
229	kfree(ifa);
230}
231
232static void inet_free_ifa(struct in_ifaddr *ifa)
233{
234	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235}
236
237static void in_dev_free_rcu(struct rcu_head *head)
238{
239	struct in_device *idev = container_of(head, struct in_device, rcu_head);
240
241	kfree(rcu_dereference_protected(idev->mc_hash, 1));
242	kfree(idev);
243}
244
245void in_dev_finish_destroy(struct in_device *idev)
246{
247	struct net_device *dev = idev->dev;
248
249	WARN_ON(idev->ifa_list);
250	WARN_ON(idev->mc_list);
251#ifdef NET_REFCNT_DEBUG
252	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253#endif
254	netdev_put(dev, &idev->dev_tracker);
255	if (!idev->dead)
256		pr_err("Freeing alive in_device %p\n", idev);
257	else
258		call_rcu(&idev->rcu_head, in_dev_free_rcu);
259}
260EXPORT_SYMBOL(in_dev_finish_destroy);
261
262static struct in_device *inetdev_init(struct net_device *dev)
263{
264	struct in_device *in_dev;
265	int err = -ENOMEM;
266
267	ASSERT_RTNL();
268
269	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270	if (!in_dev)
271		goto out;
272	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273			sizeof(in_dev->cnf));
274	in_dev->cnf.sysctl = NULL;
275	in_dev->dev = dev;
276	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277	if (!in_dev->arp_parms)
278		goto out_kfree;
279	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280		dev_disable_lro(dev);
281	/* Reference in_dev->dev */
282	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283	/* Account for reference dev->ip_ptr (below) */
284	refcount_set(&in_dev->refcnt, 1);
285
286	err = devinet_sysctl_register(in_dev);
287	if (err) {
288		in_dev->dead = 1;
289		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290		in_dev_put(in_dev);
291		in_dev = NULL;
292		goto out;
293	}
294	ip_mc_init_dev(in_dev);
295	if (dev->flags & IFF_UP)
296		ip_mc_up(in_dev);
297
298	/* we can receive as soon as ip_ptr is set -- do this last */
299	rcu_assign_pointer(dev->ip_ptr, in_dev);
300out:
301	return in_dev ?: ERR_PTR(err);
302out_kfree:
303	kfree(in_dev);
304	in_dev = NULL;
305	goto out;
306}
307
308static void inetdev_destroy(struct in_device *in_dev)
309{
310	struct net_device *dev;
311	struct in_ifaddr *ifa;
312
313	ASSERT_RTNL();
314
315	dev = in_dev->dev;
316
317	in_dev->dead = 1;
318
319	ip_mc_destroy_dev(in_dev);
320
321	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323		inet_free_ifa(ifa);
324	}
325
326	RCU_INIT_POINTER(dev->ip_ptr, NULL);
327
328	devinet_sysctl_unregister(in_dev);
329	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330	arp_ifdown(dev);
331
332	in_dev_put(in_dev);
333}
334
335int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336{
337	const struct in_ifaddr *ifa;
338
339	rcu_read_lock();
340	in_dev_for_each_ifa_rcu(ifa, in_dev) {
341		if (inet_ifa_match(a, ifa)) {
342			if (!b || inet_ifa_match(b, ifa)) {
343				rcu_read_unlock();
344				return 1;
345			}
346		}
347	}
348	rcu_read_unlock();
349	return 0;
350}
351
352static void __inet_del_ifa(struct in_device *in_dev,
353			   struct in_ifaddr __rcu **ifap,
354			   int destroy, struct nlmsghdr *nlh, u32 portid)
355{
356	struct in_ifaddr *promote = NULL;
357	struct in_ifaddr *ifa, *ifa1;
358	struct in_ifaddr __rcu **last_prim;
359	struct in_ifaddr *prev_prom = NULL;
360	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361
362	ASSERT_RTNL();
363
364	ifa1 = rtnl_dereference(*ifap);
365	last_prim = ifap;
366	if (in_dev->dead)
367		goto no_promotions;
368
369	/* 1. Deleting primary ifaddr forces deletion all secondaries
370	 * unless alias promotion is set
371	 **/
372
373	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375
376		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378			    ifa1->ifa_scope <= ifa->ifa_scope)
379				last_prim = &ifa->ifa_next;
380
381			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382			    ifa1->ifa_mask != ifa->ifa_mask ||
383			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
384				ifap1 = &ifa->ifa_next;
385				prev_prom = ifa;
386				continue;
387			}
388
389			if (!do_promote) {
390				inet_hash_remove(ifa);
391				*ifap1 = ifa->ifa_next;
392
393				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394				blocking_notifier_call_chain(&inetaddr_chain,
395						NETDEV_DOWN, ifa);
396				inet_free_ifa(ifa);
397			} else {
398				promote = ifa;
399				break;
400			}
401		}
402	}
403
404	/* On promotion all secondaries from subnet are changing
405	 * the primary IP, we must remove all their routes silently
406	 * and later to add them back with new prefsrc. Do this
407	 * while all addresses are on the device list.
408	 */
409	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410		if (ifa1->ifa_mask == ifa->ifa_mask &&
411		    inet_ifa_match(ifa1->ifa_address, ifa))
412			fib_del_ifaddr(ifa, ifa1);
413	}
414
415no_promotions:
416	/* 2. Unlink it */
417
418	*ifap = ifa1->ifa_next;
419	inet_hash_remove(ifa1);
420
421	/* 3. Announce address deletion */
422
423	/* Send message first, then call notifier.
424	   At first sight, FIB update triggered by notifier
425	   will refer to already deleted ifaddr, that could confuse
426	   netlink listeners. It is not true: look, gated sees
427	   that route deleted and if it still thinks that ifaddr
428	   is valid, it will try to restore deleted routes... Grr.
429	   So that, this order is correct.
430	 */
431	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433
434	if (promote) {
435		struct in_ifaddr *next_sec;
436
437		next_sec = rtnl_dereference(promote->ifa_next);
438		if (prev_prom) {
439			struct in_ifaddr *last_sec;
440
441			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442
443			last_sec = rtnl_dereference(*last_prim);
444			rcu_assign_pointer(promote->ifa_next, last_sec);
445			rcu_assign_pointer(*last_prim, promote);
446		}
447
448		promote->ifa_flags &= ~IFA_F_SECONDARY;
449		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450		blocking_notifier_call_chain(&inetaddr_chain,
451				NETDEV_UP, promote);
452		for (ifa = next_sec; ifa;
453		     ifa = rtnl_dereference(ifa->ifa_next)) {
454			if (ifa1->ifa_mask != ifa->ifa_mask ||
455			    !inet_ifa_match(ifa1->ifa_address, ifa))
456					continue;
457			fib_add_ifaddr(ifa);
458		}
459
460	}
461	if (destroy)
462		inet_free_ifa(ifa1);
463}
464
465static void inet_del_ifa(struct in_device *in_dev,
466			 struct in_ifaddr __rcu **ifap,
467			 int destroy)
468{
469	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470}
471
472static void check_lifetime(struct work_struct *work);
473
474static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475
476static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477			     u32 portid, struct netlink_ext_ack *extack)
478{
479	struct in_ifaddr __rcu **last_primary, **ifap;
480	struct in_device *in_dev = ifa->ifa_dev;
481	struct in_validator_info ivi;
482	struct in_ifaddr *ifa1;
483	int ret;
484
485	ASSERT_RTNL();
486
487	if (!ifa->ifa_local) {
488		inet_free_ifa(ifa);
489		return 0;
490	}
491
492	ifa->ifa_flags &= ~IFA_F_SECONDARY;
493	last_primary = &in_dev->ifa_list;
494
495	/* Don't set IPv6 only flags to IPv4 addresses */
496	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497
498	ifap = &in_dev->ifa_list;
499	ifa1 = rtnl_dereference(*ifap);
500
501	while (ifa1) {
502		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503		    ifa->ifa_scope <= ifa1->ifa_scope)
504			last_primary = &ifa1->ifa_next;
505		if (ifa1->ifa_mask == ifa->ifa_mask &&
506		    inet_ifa_match(ifa1->ifa_address, ifa)) {
507			if (ifa1->ifa_local == ifa->ifa_local) {
508				inet_free_ifa(ifa);
509				return -EEXIST;
510			}
511			if (ifa1->ifa_scope != ifa->ifa_scope) {
512				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
513				inet_free_ifa(ifa);
514				return -EINVAL;
515			}
516			ifa->ifa_flags |= IFA_F_SECONDARY;
517		}
518
519		ifap = &ifa1->ifa_next;
520		ifa1 = rtnl_dereference(*ifap);
521	}
522
523	/* Allow any devices that wish to register ifaddr validtors to weigh
524	 * in now, before changes are committed.  The rntl lock is serializing
525	 * access here, so the state should not change between a validator call
526	 * and a final notify on commit.  This isn't invoked on promotion under
527	 * the assumption that validators are checking the address itself, and
528	 * not the flags.
529	 */
530	ivi.ivi_addr = ifa->ifa_address;
531	ivi.ivi_dev = ifa->ifa_dev;
532	ivi.extack = extack;
533	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
534					   NETDEV_UP, &ivi);
535	ret = notifier_to_errno(ret);
536	if (ret) {
537		inet_free_ifa(ifa);
538		return ret;
539	}
540
541	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
542		ifap = last_primary;
543
544	rcu_assign_pointer(ifa->ifa_next, *ifap);
545	rcu_assign_pointer(*ifap, ifa);
546
547	inet_hash_insert(dev_net(in_dev->dev), ifa);
548
549	cancel_delayed_work(&check_lifetime_work);
550	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
551
552	/* Send message first, then call notifier.
553	   Notifier will trigger FIB update, so that
554	   listeners of netlink will know about new ifaddr */
555	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
557
558	return 0;
559}
560
561static int inet_insert_ifa(struct in_ifaddr *ifa)
562{
563	return __inet_insert_ifa(ifa, NULL, 0, NULL);
564}
565
566static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
567{
568	struct in_device *in_dev = __in_dev_get_rtnl(dev);
569
570	ASSERT_RTNL();
571
572	if (!in_dev) {
573		inet_free_ifa(ifa);
574		return -ENOBUFS;
575	}
576	ipv4_devconf_setall(in_dev);
577	neigh_parms_data_state_setall(in_dev->arp_parms);
578	if (ifa->ifa_dev != in_dev) {
579		WARN_ON(ifa->ifa_dev);
580		in_dev_hold(in_dev);
581		ifa->ifa_dev = in_dev;
582	}
583	if (ipv4_is_loopback(ifa->ifa_local))
584		ifa->ifa_scope = RT_SCOPE_HOST;
585	return inet_insert_ifa(ifa);
586}
587
588/* Caller must hold RCU or RTNL :
589 * We dont take a reference on found in_device
590 */
591struct in_device *inetdev_by_index(struct net *net, int ifindex)
592{
593	struct net_device *dev;
594	struct in_device *in_dev = NULL;
595
596	rcu_read_lock();
597	dev = dev_get_by_index_rcu(net, ifindex);
598	if (dev)
599		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
600	rcu_read_unlock();
601	return in_dev;
602}
603EXPORT_SYMBOL(inetdev_by_index);
604
605/* Called only from RTNL semaphored context. No locks. */
606
607struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
608				    __be32 mask)
609{
610	struct in_ifaddr *ifa;
611
612	ASSERT_RTNL();
613
614	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
615		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
616			return ifa;
617	}
618	return NULL;
619}
620
621static int ip_mc_autojoin_config(struct net *net, bool join,
622				 const struct in_ifaddr *ifa)
623{
624#if defined(CONFIG_IP_MULTICAST)
625	struct ip_mreqn mreq = {
626		.imr_multiaddr.s_addr = ifa->ifa_address,
627		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
628	};
629	struct sock *sk = net->ipv4.mc_autojoin_sk;
630	int ret;
631
632	ASSERT_RTNL();
633
634	lock_sock(sk);
635	if (join)
636		ret = ip_mc_join_group(sk, &mreq);
637	else
638		ret = ip_mc_leave_group(sk, &mreq);
639	release_sock(sk);
640
641	return ret;
642#else
643	return -EOPNOTSUPP;
644#endif
645}
646
647static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
648			    struct netlink_ext_ack *extack)
649{
650	struct net *net = sock_net(skb->sk);
651	struct in_ifaddr __rcu **ifap;
652	struct nlattr *tb[IFA_MAX+1];
653	struct in_device *in_dev;
654	struct ifaddrmsg *ifm;
655	struct in_ifaddr *ifa;
656	int err;
657
658	ASSERT_RTNL();
659
660	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
661				     ifa_ipv4_policy, extack);
662	if (err < 0)
663		goto errout;
664
665	ifm = nlmsg_data(nlh);
666	in_dev = inetdev_by_index(net, ifm->ifa_index);
667	if (!in_dev) {
668		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
669		err = -ENODEV;
670		goto errout;
671	}
672
673	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
674	     ifap = &ifa->ifa_next) {
675		if (tb[IFA_LOCAL] &&
676		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
677			continue;
678
679		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
680			continue;
681
682		if (tb[IFA_ADDRESS] &&
683		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
684		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
685			continue;
686
687		if (ipv4_is_multicast(ifa->ifa_address))
688			ip_mc_autojoin_config(net, false, ifa);
689		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
690		return 0;
691	}
692
693	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
694	err = -EADDRNOTAVAIL;
695errout:
696	return err;
697}
698
699#define INFINITY_LIFE_TIME	0xFFFFFFFF
700
701static void check_lifetime(struct work_struct *work)
702{
703	unsigned long now, next, next_sec, next_sched;
704	struct in_ifaddr *ifa;
705	struct hlist_node *n;
706	int i;
707
708	now = jiffies;
709	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
710
711	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
712		bool change_needed = false;
713
714		rcu_read_lock();
715		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
716			unsigned long age;
717
718			if (ifa->ifa_flags & IFA_F_PERMANENT)
719				continue;
720
721			/* We try to batch several events at once. */
722			age = (now - ifa->ifa_tstamp +
723			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
724
725			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
726			    age >= ifa->ifa_valid_lft) {
727				change_needed = true;
728			} else if (ifa->ifa_preferred_lft ==
729				   INFINITY_LIFE_TIME) {
730				continue;
731			} else if (age >= ifa->ifa_preferred_lft) {
732				if (time_before(ifa->ifa_tstamp +
733						ifa->ifa_valid_lft * HZ, next))
734					next = ifa->ifa_tstamp +
735					       ifa->ifa_valid_lft * HZ;
736
737				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
738					change_needed = true;
739			} else if (time_before(ifa->ifa_tstamp +
740					       ifa->ifa_preferred_lft * HZ,
741					       next)) {
742				next = ifa->ifa_tstamp +
743				       ifa->ifa_preferred_lft * HZ;
744			}
745		}
746		rcu_read_unlock();
747		if (!change_needed)
748			continue;
749		rtnl_lock();
750		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
751			unsigned long age;
752
753			if (ifa->ifa_flags & IFA_F_PERMANENT)
754				continue;
755
756			/* We try to batch several events at once. */
757			age = (now - ifa->ifa_tstamp +
758			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
759
760			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
761			    age >= ifa->ifa_valid_lft) {
762				struct in_ifaddr __rcu **ifap;
763				struct in_ifaddr *tmp;
764
765				ifap = &ifa->ifa_dev->ifa_list;
766				tmp = rtnl_dereference(*ifap);
767				while (tmp) {
768					if (tmp == ifa) {
769						inet_del_ifa(ifa->ifa_dev,
770							     ifap, 1);
771						break;
772					}
773					ifap = &tmp->ifa_next;
774					tmp = rtnl_dereference(*ifap);
775				}
776			} else if (ifa->ifa_preferred_lft !=
777				   INFINITY_LIFE_TIME &&
778				   age >= ifa->ifa_preferred_lft &&
779				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
780				ifa->ifa_flags |= IFA_F_DEPRECATED;
781				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
782			}
783		}
784		rtnl_unlock();
785	}
786
787	next_sec = round_jiffies_up(next);
788	next_sched = next;
789
790	/* If rounded timeout is accurate enough, accept it. */
791	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
792		next_sched = next_sec;
793
794	now = jiffies;
795	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
796	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
797		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
798
799	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
800			next_sched - now);
801}
802
803static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
804			     __u32 prefered_lft)
805{
806	unsigned long timeout;
807
808	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
809
810	timeout = addrconf_timeout_fixup(valid_lft, HZ);
811	if (addrconf_finite_timeout(timeout))
812		ifa->ifa_valid_lft = timeout;
813	else
814		ifa->ifa_flags |= IFA_F_PERMANENT;
815
816	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
817	if (addrconf_finite_timeout(timeout)) {
818		if (timeout == 0)
819			ifa->ifa_flags |= IFA_F_DEPRECATED;
820		ifa->ifa_preferred_lft = timeout;
821	}
822	ifa->ifa_tstamp = jiffies;
823	if (!ifa->ifa_cstamp)
824		ifa->ifa_cstamp = ifa->ifa_tstamp;
825}
826
827static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
828				       __u32 *pvalid_lft, __u32 *pprefered_lft,
829				       struct netlink_ext_ack *extack)
830{
831	struct nlattr *tb[IFA_MAX+1];
832	struct in_ifaddr *ifa;
833	struct ifaddrmsg *ifm;
834	struct net_device *dev;
835	struct in_device *in_dev;
836	int err;
837
838	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
839				     ifa_ipv4_policy, extack);
840	if (err < 0)
841		goto errout;
842
843	ifm = nlmsg_data(nlh);
844	err = -EINVAL;
845
846	if (ifm->ifa_prefixlen > 32) {
847		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
848		goto errout;
849	}
850
851	if (!tb[IFA_LOCAL]) {
852		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
853		goto errout;
854	}
855
856	dev = __dev_get_by_index(net, ifm->ifa_index);
857	err = -ENODEV;
858	if (!dev) {
859		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
860		goto errout;
861	}
862
863	in_dev = __in_dev_get_rtnl(dev);
864	err = -ENOBUFS;
865	if (!in_dev)
866		goto errout;
867
868	ifa = inet_alloc_ifa();
869	if (!ifa)
870		/*
871		 * A potential indev allocation can be left alive, it stays
872		 * assigned to its device and is destroy with it.
873		 */
874		goto errout;
875
876	ipv4_devconf_setall(in_dev);
877	neigh_parms_data_state_setall(in_dev->arp_parms);
878	in_dev_hold(in_dev);
879
880	if (!tb[IFA_ADDRESS])
881		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
882
883	INIT_HLIST_NODE(&ifa->hash);
884	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
885	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
886	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
887					 ifm->ifa_flags;
888	ifa->ifa_scope = ifm->ifa_scope;
889	ifa->ifa_dev = in_dev;
890
891	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
892	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
893
894	if (tb[IFA_BROADCAST])
895		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
896
897	if (tb[IFA_LABEL])
898		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
899	else
900		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
901
902	if (tb[IFA_RT_PRIORITY])
903		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
904
905	if (tb[IFA_PROTO])
906		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
907
908	if (tb[IFA_CACHEINFO]) {
909		struct ifa_cacheinfo *ci;
910
911		ci = nla_data(tb[IFA_CACHEINFO]);
912		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
913			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
914			err = -EINVAL;
915			goto errout_free;
916		}
917		*pvalid_lft = ci->ifa_valid;
918		*pprefered_lft = ci->ifa_prefered;
919	}
920
921	return ifa;
922
923errout_free:
924	inet_free_ifa(ifa);
925errout:
926	return ERR_PTR(err);
927}
928
929static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
930{
931	struct in_device *in_dev = ifa->ifa_dev;
932	struct in_ifaddr *ifa1;
933
934	if (!ifa->ifa_local)
935		return NULL;
936
937	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
938		if (ifa1->ifa_mask == ifa->ifa_mask &&
939		    inet_ifa_match(ifa1->ifa_address, ifa) &&
940		    ifa1->ifa_local == ifa->ifa_local)
941			return ifa1;
942	}
943	return NULL;
944}
945
946static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
947			    struct netlink_ext_ack *extack)
948{
949	struct net *net = sock_net(skb->sk);
950	struct in_ifaddr *ifa;
951	struct in_ifaddr *ifa_existing;
952	__u32 valid_lft = INFINITY_LIFE_TIME;
953	__u32 prefered_lft = INFINITY_LIFE_TIME;
954
955	ASSERT_RTNL();
956
957	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
958	if (IS_ERR(ifa))
959		return PTR_ERR(ifa);
960
961	ifa_existing = find_matching_ifa(ifa);
962	if (!ifa_existing) {
963		/* It would be best to check for !NLM_F_CREATE here but
964		 * userspace already relies on not having to provide this.
965		 */
966		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
967		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
968			int ret = ip_mc_autojoin_config(net, true, ifa);
969
970			if (ret < 0) {
971				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
972				inet_free_ifa(ifa);
973				return ret;
974			}
975		}
976		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
977					 extack);
978	} else {
979		u32 new_metric = ifa->ifa_rt_priority;
980		u8 new_proto = ifa->ifa_proto;
981
982		inet_free_ifa(ifa);
983
984		if (nlh->nlmsg_flags & NLM_F_EXCL ||
985		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
986			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
987			return -EEXIST;
988		}
989		ifa = ifa_existing;
990
991		if (ifa->ifa_rt_priority != new_metric) {
992			fib_modify_prefix_metric(ifa, new_metric);
993			ifa->ifa_rt_priority = new_metric;
994		}
995
996		ifa->ifa_proto = new_proto;
997
998		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
999		cancel_delayed_work(&check_lifetime_work);
1000		queue_delayed_work(system_power_efficient_wq,
1001				&check_lifetime_work, 0);
1002		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1003	}
1004	return 0;
1005}
1006
1007/*
1008 *	Determine a default network mask, based on the IP address.
1009 */
1010
1011static int inet_abc_len(__be32 addr)
1012{
1013	int rc = -1;	/* Something else, probably a multicast. */
1014
1015	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1016		rc = 0;
1017	else {
1018		__u32 haddr = ntohl(addr);
1019		if (IN_CLASSA(haddr))
1020			rc = 8;
1021		else if (IN_CLASSB(haddr))
1022			rc = 16;
1023		else if (IN_CLASSC(haddr))
1024			rc = 24;
1025		else if (IN_CLASSE(haddr))
1026			rc = 32;
1027	}
1028
1029	return rc;
1030}
1031
1032
1033int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1034{
1035	struct sockaddr_in sin_orig;
1036	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1037	struct in_ifaddr __rcu **ifap = NULL;
1038	struct in_device *in_dev;
1039	struct in_ifaddr *ifa = NULL;
1040	struct net_device *dev;
1041	char *colon;
1042	int ret = -EFAULT;
1043	int tryaddrmatch = 0;
1044
1045	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1046
1047	/* save original address for comparison */
1048	memcpy(&sin_orig, sin, sizeof(*sin));
1049
1050	colon = strchr(ifr->ifr_name, ':');
1051	if (colon)
1052		*colon = 0;
1053
1054	dev_load(net, ifr->ifr_name);
1055
1056	switch (cmd) {
1057	case SIOCGIFADDR:	/* Get interface address */
1058	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1059	case SIOCGIFDSTADDR:	/* Get the destination address */
1060	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1061		/* Note that these ioctls will not sleep,
1062		   so that we do not impose a lock.
1063		   One day we will be forced to put shlock here (I mean SMP)
1064		 */
1065		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1066		memset(sin, 0, sizeof(*sin));
1067		sin->sin_family = AF_INET;
1068		break;
1069
1070	case SIOCSIFFLAGS:
1071		ret = -EPERM;
1072		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1073			goto out;
1074		break;
1075	case SIOCSIFADDR:	/* Set interface address (and family) */
1076	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1077	case SIOCSIFDSTADDR:	/* Set the destination address */
1078	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1079		ret = -EPERM;
1080		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1081			goto out;
1082		ret = -EINVAL;
1083		if (sin->sin_family != AF_INET)
1084			goto out;
1085		break;
1086	default:
1087		ret = -EINVAL;
1088		goto out;
1089	}
1090
1091	rtnl_lock();
1092
1093	ret = -ENODEV;
1094	dev = __dev_get_by_name(net, ifr->ifr_name);
1095	if (!dev)
1096		goto done;
1097
1098	if (colon)
1099		*colon = ':';
1100
1101	in_dev = __in_dev_get_rtnl(dev);
1102	if (in_dev) {
1103		if (tryaddrmatch) {
1104			/* Matthias Andree */
1105			/* compare label and address (4.4BSD style) */
1106			/* note: we only do this for a limited set of ioctls
1107			   and only if the original address family was AF_INET.
1108			   This is checked above. */
1109
1110			for (ifap = &in_dev->ifa_list;
1111			     (ifa = rtnl_dereference(*ifap)) != NULL;
1112			     ifap = &ifa->ifa_next) {
1113				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1114				    sin_orig.sin_addr.s_addr ==
1115							ifa->ifa_local) {
1116					break; /* found */
1117				}
1118			}
1119		}
1120		/* we didn't get a match, maybe the application is
1121		   4.3BSD-style and passed in junk so we fall back to
1122		   comparing just the label */
1123		if (!ifa) {
1124			for (ifap = &in_dev->ifa_list;
1125			     (ifa = rtnl_dereference(*ifap)) != NULL;
1126			     ifap = &ifa->ifa_next)
1127				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1128					break;
1129		}
1130	}
1131
1132	ret = -EADDRNOTAVAIL;
1133	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1134		goto done;
1135
1136	switch (cmd) {
1137	case SIOCGIFADDR:	/* Get interface address */
1138		ret = 0;
1139		sin->sin_addr.s_addr = ifa->ifa_local;
1140		break;
1141
1142	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1143		ret = 0;
1144		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1145		break;
1146
1147	case SIOCGIFDSTADDR:	/* Get the destination address */
1148		ret = 0;
1149		sin->sin_addr.s_addr = ifa->ifa_address;
1150		break;
1151
1152	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1153		ret = 0;
1154		sin->sin_addr.s_addr = ifa->ifa_mask;
1155		break;
1156
1157	case SIOCSIFFLAGS:
1158		if (colon) {
1159			ret = -EADDRNOTAVAIL;
1160			if (!ifa)
1161				break;
1162			ret = 0;
1163			if (!(ifr->ifr_flags & IFF_UP))
1164				inet_del_ifa(in_dev, ifap, 1);
1165			break;
1166		}
1167		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1168		break;
1169
1170	case SIOCSIFADDR:	/* Set interface address (and family) */
1171		ret = -EINVAL;
1172		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1173			break;
1174
1175		if (!ifa) {
1176			ret = -ENOBUFS;
1177			ifa = inet_alloc_ifa();
1178			if (!ifa)
1179				break;
1180			INIT_HLIST_NODE(&ifa->hash);
1181			if (colon)
1182				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1183			else
1184				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1185		} else {
1186			ret = 0;
1187			if (ifa->ifa_local == sin->sin_addr.s_addr)
1188				break;
1189			inet_del_ifa(in_dev, ifap, 0);
1190			ifa->ifa_broadcast = 0;
1191			ifa->ifa_scope = 0;
1192		}
1193
1194		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1195
1196		if (!(dev->flags & IFF_POINTOPOINT)) {
1197			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1198			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1199			if ((dev->flags & IFF_BROADCAST) &&
1200			    ifa->ifa_prefixlen < 31)
1201				ifa->ifa_broadcast = ifa->ifa_address |
1202						     ~ifa->ifa_mask;
1203		} else {
1204			ifa->ifa_prefixlen = 32;
1205			ifa->ifa_mask = inet_make_mask(32);
1206		}
1207		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1208		ret = inet_set_ifa(dev, ifa);
1209		break;
1210
1211	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1212		ret = 0;
1213		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1214			inet_del_ifa(in_dev, ifap, 0);
1215			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1216			inet_insert_ifa(ifa);
1217		}
1218		break;
1219
1220	case SIOCSIFDSTADDR:	/* Set the destination address */
1221		ret = 0;
1222		if (ifa->ifa_address == sin->sin_addr.s_addr)
1223			break;
1224		ret = -EINVAL;
1225		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1226			break;
1227		ret = 0;
1228		inet_del_ifa(in_dev, ifap, 0);
1229		ifa->ifa_address = sin->sin_addr.s_addr;
1230		inet_insert_ifa(ifa);
1231		break;
1232
1233	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1234
1235		/*
1236		 *	The mask we set must be legal.
1237		 */
1238		ret = -EINVAL;
1239		if (bad_mask(sin->sin_addr.s_addr, 0))
1240			break;
1241		ret = 0;
1242		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1243			__be32 old_mask = ifa->ifa_mask;
1244			inet_del_ifa(in_dev, ifap, 0);
1245			ifa->ifa_mask = sin->sin_addr.s_addr;
1246			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1247
1248			/* See if current broadcast address matches
1249			 * with current netmask, then recalculate
1250			 * the broadcast address. Otherwise it's a
1251			 * funny address, so don't touch it since
1252			 * the user seems to know what (s)he's doing...
1253			 */
1254			if ((dev->flags & IFF_BROADCAST) &&
1255			    (ifa->ifa_prefixlen < 31) &&
1256			    (ifa->ifa_broadcast ==
1257			     (ifa->ifa_local|~old_mask))) {
1258				ifa->ifa_broadcast = (ifa->ifa_local |
1259						      ~sin->sin_addr.s_addr);
1260			}
1261			inet_insert_ifa(ifa);
1262		}
1263		break;
1264	}
1265done:
1266	rtnl_unlock();
1267out:
1268	return ret;
1269}
1270
1271int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1272{
1273	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1274	const struct in_ifaddr *ifa;
1275	struct ifreq ifr;
1276	int done = 0;
1277
1278	if (WARN_ON(size > sizeof(struct ifreq)))
1279		goto out;
1280
1281	if (!in_dev)
1282		goto out;
1283
1284	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1285		if (!buf) {
1286			done += size;
1287			continue;
1288		}
1289		if (len < size)
1290			break;
1291		memset(&ifr, 0, sizeof(struct ifreq));
1292		strcpy(ifr.ifr_name, ifa->ifa_label);
1293
1294		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1295		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1296								ifa->ifa_local;
1297
1298		if (copy_to_user(buf + done, &ifr, size)) {
1299			done = -EFAULT;
1300			break;
1301		}
1302		len  -= size;
1303		done += size;
1304	}
1305out:
1306	return done;
1307}
1308
1309static __be32 in_dev_select_addr(const struct in_device *in_dev,
1310				 int scope)
1311{
1312	const struct in_ifaddr *ifa;
1313
1314	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1315		if (ifa->ifa_flags & IFA_F_SECONDARY)
1316			continue;
1317		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1318		    ifa->ifa_scope <= scope)
1319			return ifa->ifa_local;
1320	}
1321
1322	return 0;
1323}
1324
1325__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1326{
1327	const struct in_ifaddr *ifa;
1328	__be32 addr = 0;
1329	unsigned char localnet_scope = RT_SCOPE_HOST;
1330	struct in_device *in_dev;
1331	struct net *net = dev_net(dev);
1332	int master_idx;
1333
1334	rcu_read_lock();
1335	in_dev = __in_dev_get_rcu(dev);
1336	if (!in_dev)
1337		goto no_in_dev;
1338
1339	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1340		localnet_scope = RT_SCOPE_LINK;
1341
1342	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1343		if (ifa->ifa_flags & IFA_F_SECONDARY)
1344			continue;
1345		if (min(ifa->ifa_scope, localnet_scope) > scope)
1346			continue;
1347		if (!dst || inet_ifa_match(dst, ifa)) {
1348			addr = ifa->ifa_local;
1349			break;
1350		}
1351		if (!addr)
1352			addr = ifa->ifa_local;
1353	}
1354
1355	if (addr)
1356		goto out_unlock;
1357no_in_dev:
1358	master_idx = l3mdev_master_ifindex_rcu(dev);
1359
1360	/* For VRFs, the VRF device takes the place of the loopback device,
1361	 * with addresses on it being preferred.  Note in such cases the
1362	 * loopback device will be among the devices that fail the master_idx
1363	 * equality check in the loop below.
1364	 */
1365	if (master_idx &&
1366	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1367	    (in_dev = __in_dev_get_rcu(dev))) {
1368		addr = in_dev_select_addr(in_dev, scope);
1369		if (addr)
1370			goto out_unlock;
1371	}
1372
1373	/* Not loopback addresses on loopback should be preferred
1374	   in this case. It is important that lo is the first interface
1375	   in dev_base list.
1376	 */
1377	for_each_netdev_rcu(net, dev) {
1378		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1379			continue;
1380
1381		in_dev = __in_dev_get_rcu(dev);
1382		if (!in_dev)
1383			continue;
1384
1385		addr = in_dev_select_addr(in_dev, scope);
1386		if (addr)
1387			goto out_unlock;
1388	}
1389out_unlock:
1390	rcu_read_unlock();
1391	return addr;
1392}
1393EXPORT_SYMBOL(inet_select_addr);
1394
1395static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1396			      __be32 local, int scope)
1397{
1398	unsigned char localnet_scope = RT_SCOPE_HOST;
1399	const struct in_ifaddr *ifa;
1400	__be32 addr = 0;
1401	int same = 0;
1402
1403	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1404		localnet_scope = RT_SCOPE_LINK;
1405
1406	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1407		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1408
1409		if (!addr &&
1410		    (local == ifa->ifa_local || !local) &&
1411		    min_scope <= scope) {
1412			addr = ifa->ifa_local;
1413			if (same)
1414				break;
1415		}
1416		if (!same) {
1417			same = (!local || inet_ifa_match(local, ifa)) &&
1418				(!dst || inet_ifa_match(dst, ifa));
1419			if (same && addr) {
1420				if (local || !dst)
1421					break;
1422				/* Is the selected addr into dst subnet? */
1423				if (inet_ifa_match(addr, ifa))
1424					break;
1425				/* No, then can we use new local src? */
1426				if (min_scope <= scope) {
1427					addr = ifa->ifa_local;
1428					break;
1429				}
1430				/* search for large dst subnet for addr */
1431				same = 0;
1432			}
1433		}
1434	}
1435
1436	return same ? addr : 0;
1437}
1438
1439/*
1440 * Confirm that local IP address exists using wildcards:
1441 * - net: netns to check, cannot be NULL
1442 * - in_dev: only on this interface, NULL=any interface
1443 * - dst: only in the same subnet as dst, 0=any dst
1444 * - local: address, 0=autoselect the local address
1445 * - scope: maximum allowed scope value for the local address
1446 */
1447__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1448			 __be32 dst, __be32 local, int scope)
1449{
1450	__be32 addr = 0;
1451	struct net_device *dev;
1452
1453	if (in_dev)
1454		return confirm_addr_indev(in_dev, dst, local, scope);
1455
1456	rcu_read_lock();
1457	for_each_netdev_rcu(net, dev) {
1458		in_dev = __in_dev_get_rcu(dev);
1459		if (in_dev) {
1460			addr = confirm_addr_indev(in_dev, dst, local, scope);
1461			if (addr)
1462				break;
1463		}
1464	}
1465	rcu_read_unlock();
1466
1467	return addr;
1468}
1469EXPORT_SYMBOL(inet_confirm_addr);
1470
1471/*
1472 *	Device notifier
1473 */
1474
1475int register_inetaddr_notifier(struct notifier_block *nb)
1476{
1477	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1478}
1479EXPORT_SYMBOL(register_inetaddr_notifier);
1480
1481int unregister_inetaddr_notifier(struct notifier_block *nb)
1482{
1483	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1484}
1485EXPORT_SYMBOL(unregister_inetaddr_notifier);
1486
1487int register_inetaddr_validator_notifier(struct notifier_block *nb)
1488{
1489	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1490}
1491EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1492
1493int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1494{
1495	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1496	    nb);
1497}
1498EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1499
1500/* Rename ifa_labels for a device name change. Make some effort to preserve
1501 * existing alias numbering and to create unique labels if possible.
1502*/
1503static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1504{
1505	struct in_ifaddr *ifa;
1506	int named = 0;
1507
1508	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1509		char old[IFNAMSIZ], *dot;
1510
1511		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1512		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1513		if (named++ == 0)
1514			goto skip;
1515		dot = strchr(old, ':');
1516		if (!dot) {
1517			sprintf(old, ":%d", named);
1518			dot = old;
1519		}
1520		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1521			strcat(ifa->ifa_label, dot);
1522		else
1523			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1524skip:
1525		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1526	}
1527}
1528
1529static void inetdev_send_gratuitous_arp(struct net_device *dev,
1530					struct in_device *in_dev)
1531
1532{
1533	const struct in_ifaddr *ifa;
1534
1535	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1536		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1537			 ifa->ifa_local, dev,
1538			 ifa->ifa_local, NULL,
1539			 dev->dev_addr, NULL);
1540	}
1541}
1542
1543/* Called only under RTNL semaphore */
1544
1545static int inetdev_event(struct notifier_block *this, unsigned long event,
1546			 void *ptr)
1547{
1548	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1549	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1550
1551	ASSERT_RTNL();
1552
1553	if (!in_dev) {
1554		if (event == NETDEV_REGISTER) {
1555			in_dev = inetdev_init(dev);
1556			if (IS_ERR(in_dev))
1557				return notifier_from_errno(PTR_ERR(in_dev));
1558			if (dev->flags & IFF_LOOPBACK) {
1559				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1560				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1561			}
1562		} else if (event == NETDEV_CHANGEMTU) {
1563			/* Re-enabling IP */
1564			if (inetdev_valid_mtu(dev->mtu))
1565				in_dev = inetdev_init(dev);
1566		}
1567		goto out;
1568	}
1569
1570	switch (event) {
1571	case NETDEV_REGISTER:
1572		pr_debug("%s: bug\n", __func__);
1573		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1574		break;
1575	case NETDEV_UP:
1576		if (!inetdev_valid_mtu(dev->mtu))
1577			break;
1578		if (dev->flags & IFF_LOOPBACK) {
1579			struct in_ifaddr *ifa = inet_alloc_ifa();
1580
1581			if (ifa) {
1582				INIT_HLIST_NODE(&ifa->hash);
1583				ifa->ifa_local =
1584				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1585				ifa->ifa_prefixlen = 8;
1586				ifa->ifa_mask = inet_make_mask(8);
1587				in_dev_hold(in_dev);
1588				ifa->ifa_dev = in_dev;
1589				ifa->ifa_scope = RT_SCOPE_HOST;
1590				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1591				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1592						 INFINITY_LIFE_TIME);
1593				ipv4_devconf_setall(in_dev);
1594				neigh_parms_data_state_setall(in_dev->arp_parms);
1595				inet_insert_ifa(ifa);
1596			}
1597		}
1598		ip_mc_up(in_dev);
1599		fallthrough;
1600	case NETDEV_CHANGEADDR:
1601		if (!IN_DEV_ARP_NOTIFY(in_dev))
1602			break;
1603		fallthrough;
1604	case NETDEV_NOTIFY_PEERS:
1605		/* Send gratuitous ARP to notify of link change */
1606		inetdev_send_gratuitous_arp(dev, in_dev);
1607		break;
1608	case NETDEV_DOWN:
1609		ip_mc_down(in_dev);
1610		break;
1611	case NETDEV_PRE_TYPE_CHANGE:
1612		ip_mc_unmap(in_dev);
1613		break;
1614	case NETDEV_POST_TYPE_CHANGE:
1615		ip_mc_remap(in_dev);
1616		break;
1617	case NETDEV_CHANGEMTU:
1618		if (inetdev_valid_mtu(dev->mtu))
1619			break;
1620		/* disable IP when MTU is not enough */
1621		fallthrough;
1622	case NETDEV_UNREGISTER:
1623		inetdev_destroy(in_dev);
1624		break;
1625	case NETDEV_CHANGENAME:
1626		/* Do not notify about label change, this event is
1627		 * not interesting to applications using netlink.
1628		 */
1629		inetdev_changename(dev, in_dev);
1630
1631		devinet_sysctl_unregister(in_dev);
1632		devinet_sysctl_register(in_dev);
1633		break;
1634	}
1635out:
1636	return NOTIFY_DONE;
1637}
1638
1639static struct notifier_block ip_netdev_notifier = {
1640	.notifier_call = inetdev_event,
1641};
1642
1643static size_t inet_nlmsg_size(void)
1644{
1645	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1646	       + nla_total_size(4) /* IFA_ADDRESS */
1647	       + nla_total_size(4) /* IFA_LOCAL */
1648	       + nla_total_size(4) /* IFA_BROADCAST */
1649	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1650	       + nla_total_size(4)  /* IFA_FLAGS */
1651	       + nla_total_size(1)  /* IFA_PROTO */
1652	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1653	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1654}
1655
1656static inline u32 cstamp_delta(unsigned long cstamp)
1657{
1658	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1659}
1660
1661static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1662			 unsigned long tstamp, u32 preferred, u32 valid)
1663{
1664	struct ifa_cacheinfo ci;
1665
1666	ci.cstamp = cstamp_delta(cstamp);
1667	ci.tstamp = cstamp_delta(tstamp);
1668	ci.ifa_prefered = preferred;
1669	ci.ifa_valid = valid;
1670
1671	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1672}
1673
1674static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1675			    struct inet_fill_args *args)
1676{
1677	struct ifaddrmsg *ifm;
1678	struct nlmsghdr  *nlh;
1679	u32 preferred, valid;
1680
1681	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1682			args->flags);
1683	if (!nlh)
1684		return -EMSGSIZE;
1685
1686	ifm = nlmsg_data(nlh);
1687	ifm->ifa_family = AF_INET;
1688	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1689	ifm->ifa_flags = ifa->ifa_flags;
1690	ifm->ifa_scope = ifa->ifa_scope;
1691	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1692
1693	if (args->netnsid >= 0 &&
1694	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1695		goto nla_put_failure;
1696
1697	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1698		preferred = ifa->ifa_preferred_lft;
1699		valid = ifa->ifa_valid_lft;
1700		if (preferred != INFINITY_LIFE_TIME) {
1701			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1702
1703			if (preferred > tval)
1704				preferred -= tval;
1705			else
1706				preferred = 0;
1707			if (valid != INFINITY_LIFE_TIME) {
1708				if (valid > tval)
1709					valid -= tval;
1710				else
1711					valid = 0;
1712			}
1713		}
1714	} else {
1715		preferred = INFINITY_LIFE_TIME;
1716		valid = INFINITY_LIFE_TIME;
1717	}
1718	if ((ifa->ifa_address &&
1719	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1720	    (ifa->ifa_local &&
1721	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1722	    (ifa->ifa_broadcast &&
1723	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1724	    (ifa->ifa_label[0] &&
1725	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1726	    (ifa->ifa_proto &&
1727	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1728	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1729	    (ifa->ifa_rt_priority &&
1730	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1731	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1732			  preferred, valid))
1733		goto nla_put_failure;
1734
1735	nlmsg_end(skb, nlh);
1736	return 0;
1737
1738nla_put_failure:
1739	nlmsg_cancel(skb, nlh);
1740	return -EMSGSIZE;
1741}
1742
1743static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1744				      struct inet_fill_args *fillargs,
1745				      struct net **tgt_net, struct sock *sk,
1746				      struct netlink_callback *cb)
1747{
1748	struct netlink_ext_ack *extack = cb->extack;
1749	struct nlattr *tb[IFA_MAX+1];
1750	struct ifaddrmsg *ifm;
1751	int err, i;
1752
1753	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1754		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1755		return -EINVAL;
1756	}
1757
1758	ifm = nlmsg_data(nlh);
1759	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1760		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1761		return -EINVAL;
1762	}
1763
1764	fillargs->ifindex = ifm->ifa_index;
1765	if (fillargs->ifindex) {
1766		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1767		fillargs->flags |= NLM_F_DUMP_FILTERED;
1768	}
1769
1770	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1771					    ifa_ipv4_policy, extack);
1772	if (err < 0)
1773		return err;
1774
1775	for (i = 0; i <= IFA_MAX; ++i) {
1776		if (!tb[i])
1777			continue;
1778
1779		if (i == IFA_TARGET_NETNSID) {
1780			struct net *net;
1781
1782			fillargs->netnsid = nla_get_s32(tb[i]);
1783
1784			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1785			if (IS_ERR(net)) {
1786				fillargs->netnsid = -1;
1787				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1788				return PTR_ERR(net);
1789			}
1790			*tgt_net = net;
1791		} else {
1792			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1793			return -EINVAL;
1794		}
1795	}
1796
1797	return 0;
1798}
1799
1800static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1801			    struct netlink_callback *cb, int s_ip_idx,
1802			    struct inet_fill_args *fillargs)
1803{
1804	struct in_ifaddr *ifa;
1805	int ip_idx = 0;
1806	int err;
1807
1808	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1809		if (ip_idx < s_ip_idx) {
1810			ip_idx++;
1811			continue;
1812		}
1813		err = inet_fill_ifaddr(skb, ifa, fillargs);
1814		if (err < 0)
1815			goto done;
1816
1817		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1818		ip_idx++;
1819	}
1820	err = 0;
1821
1822done:
1823	cb->args[2] = ip_idx;
1824
1825	return err;
1826}
1827
1828/* Combine dev_addr_genid and dev_base_seq to detect changes.
1829 */
1830static u32 inet_base_seq(const struct net *net)
1831{
1832	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1833		  net->dev_base_seq;
1834
1835	/* Must not return 0 (see nl_dump_check_consistent()).
1836	 * Chose a value far away from 0.
1837	 */
1838	if (!res)
1839		res = 0x80000000;
1840	return res;
1841}
1842
1843static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1844{
1845	const struct nlmsghdr *nlh = cb->nlh;
1846	struct inet_fill_args fillargs = {
1847		.portid = NETLINK_CB(cb->skb).portid,
1848		.seq = nlh->nlmsg_seq,
1849		.event = RTM_NEWADDR,
1850		.flags = NLM_F_MULTI,
1851		.netnsid = -1,
1852	};
1853	struct net *net = sock_net(skb->sk);
1854	struct net *tgt_net = net;
1855	int h, s_h;
1856	int idx, s_idx;
1857	int s_ip_idx;
1858	struct net_device *dev;
1859	struct in_device *in_dev;
1860	struct hlist_head *head;
1861	int err = 0;
1862
1863	s_h = cb->args[0];
1864	s_idx = idx = cb->args[1];
1865	s_ip_idx = cb->args[2];
1866
1867	if (cb->strict_check) {
1868		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1869						 skb->sk, cb);
1870		if (err < 0)
1871			goto put_tgt_net;
1872
1873		err = 0;
1874		if (fillargs.ifindex) {
1875			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1876			if (!dev) {
1877				err = -ENODEV;
1878				goto put_tgt_net;
1879			}
1880
1881			in_dev = __in_dev_get_rtnl(dev);
1882			if (in_dev) {
1883				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1884						       &fillargs);
1885			}
1886			goto put_tgt_net;
1887		}
1888	}
1889
1890	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1891		idx = 0;
1892		head = &tgt_net->dev_index_head[h];
1893		rcu_read_lock();
1894		cb->seq = inet_base_seq(tgt_net);
1895		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1896			if (idx < s_idx)
1897				goto cont;
1898			if (h > s_h || idx > s_idx)
1899				s_ip_idx = 0;
1900			in_dev = __in_dev_get_rcu(dev);
1901			if (!in_dev)
1902				goto cont;
1903
1904			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1905					       &fillargs);
1906			if (err < 0) {
1907				rcu_read_unlock();
1908				goto done;
1909			}
1910cont:
1911			idx++;
1912		}
1913		rcu_read_unlock();
1914	}
1915
1916done:
1917	cb->args[0] = h;
1918	cb->args[1] = idx;
1919put_tgt_net:
1920	if (fillargs.netnsid >= 0)
1921		put_net(tgt_net);
1922
1923	return skb->len ? : err;
1924}
1925
1926static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1927		      u32 portid)
1928{
1929	struct inet_fill_args fillargs = {
1930		.portid = portid,
1931		.seq = nlh ? nlh->nlmsg_seq : 0,
1932		.event = event,
1933		.flags = 0,
1934		.netnsid = -1,
1935	};
1936	struct sk_buff *skb;
1937	int err = -ENOBUFS;
1938	struct net *net;
1939
1940	net = dev_net(ifa->ifa_dev->dev);
1941	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1942	if (!skb)
1943		goto errout;
1944
1945	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1946	if (err < 0) {
1947		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1948		WARN_ON(err == -EMSGSIZE);
1949		kfree_skb(skb);
1950		goto errout;
1951	}
1952	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1953	return;
1954errout:
1955	if (err < 0)
1956		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1957}
1958
1959static size_t inet_get_link_af_size(const struct net_device *dev,
1960				    u32 ext_filter_mask)
1961{
1962	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1963
1964	if (!in_dev)
1965		return 0;
1966
1967	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1968}
1969
1970static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1971			     u32 ext_filter_mask)
1972{
1973	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1974	struct nlattr *nla;
1975	int i;
1976
1977	if (!in_dev)
1978		return -ENODATA;
1979
1980	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1981	if (!nla)
1982		return -EMSGSIZE;
1983
1984	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1985		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1986
1987	return 0;
1988}
1989
1990static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1991	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1992};
1993
1994static int inet_validate_link_af(const struct net_device *dev,
1995				 const struct nlattr *nla,
1996				 struct netlink_ext_ack *extack)
1997{
1998	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1999	int err, rem;
2000
2001	if (dev && !__in_dev_get_rtnl(dev))
2002		return -EAFNOSUPPORT;
2003
2004	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2005					  inet_af_policy, extack);
2006	if (err < 0)
2007		return err;
2008
2009	if (tb[IFLA_INET_CONF]) {
2010		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2011			int cfgid = nla_type(a);
2012
2013			if (nla_len(a) < 4)
2014				return -EINVAL;
2015
2016			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2017				return -EINVAL;
2018		}
2019	}
2020
2021	return 0;
2022}
2023
2024static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2025			    struct netlink_ext_ack *extack)
2026{
2027	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2028	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2029	int rem;
2030
2031	if (!in_dev)
2032		return -EAFNOSUPPORT;
2033
2034	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2035		return -EINVAL;
2036
2037	if (tb[IFLA_INET_CONF]) {
2038		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2039			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2040	}
2041
2042	return 0;
2043}
2044
2045static int inet_netconf_msgsize_devconf(int type)
2046{
2047	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2048		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2049	bool all = false;
2050
2051	if (type == NETCONFA_ALL)
2052		all = true;
2053
2054	if (all || type == NETCONFA_FORWARDING)
2055		size += nla_total_size(4);
2056	if (all || type == NETCONFA_RP_FILTER)
2057		size += nla_total_size(4);
2058	if (all || type == NETCONFA_MC_FORWARDING)
2059		size += nla_total_size(4);
2060	if (all || type == NETCONFA_BC_FORWARDING)
2061		size += nla_total_size(4);
2062	if (all || type == NETCONFA_PROXY_NEIGH)
2063		size += nla_total_size(4);
2064	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2065		size += nla_total_size(4);
2066
2067	return size;
2068}
2069
2070static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2071				     struct ipv4_devconf *devconf, u32 portid,
2072				     u32 seq, int event, unsigned int flags,
2073				     int type)
2074{
2075	struct nlmsghdr  *nlh;
2076	struct netconfmsg *ncm;
2077	bool all = false;
2078
2079	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2080			flags);
2081	if (!nlh)
2082		return -EMSGSIZE;
2083
2084	if (type == NETCONFA_ALL)
2085		all = true;
2086
2087	ncm = nlmsg_data(nlh);
2088	ncm->ncm_family = AF_INET;
2089
2090	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2091		goto nla_put_failure;
2092
2093	if (!devconf)
2094		goto out;
2095
2096	if ((all || type == NETCONFA_FORWARDING) &&
2097	    nla_put_s32(skb, NETCONFA_FORWARDING,
2098			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2099		goto nla_put_failure;
2100	if ((all || type == NETCONFA_RP_FILTER) &&
2101	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2102			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2103		goto nla_put_failure;
2104	if ((all || type == NETCONFA_MC_FORWARDING) &&
2105	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2106			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2107		goto nla_put_failure;
2108	if ((all || type == NETCONFA_BC_FORWARDING) &&
2109	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2110			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2111		goto nla_put_failure;
2112	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2113	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2114			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2115		goto nla_put_failure;
2116	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2117	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2118			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2119		goto nla_put_failure;
2120
2121out:
2122	nlmsg_end(skb, nlh);
2123	return 0;
2124
2125nla_put_failure:
2126	nlmsg_cancel(skb, nlh);
2127	return -EMSGSIZE;
2128}
2129
2130void inet_netconf_notify_devconf(struct net *net, int event, int type,
2131				 int ifindex, struct ipv4_devconf *devconf)
2132{
2133	struct sk_buff *skb;
2134	int err = -ENOBUFS;
2135
2136	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2137	if (!skb)
2138		goto errout;
2139
2140	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2141					event, 0, type);
2142	if (err < 0) {
2143		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2144		WARN_ON(err == -EMSGSIZE);
2145		kfree_skb(skb);
2146		goto errout;
2147	}
2148	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2149	return;
2150errout:
2151	if (err < 0)
2152		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2153}
2154
2155static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2156	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2157	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2158	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2159	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2160	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2161};
2162
2163static int inet_netconf_valid_get_req(struct sk_buff *skb,
2164				      const struct nlmsghdr *nlh,
2165				      struct nlattr **tb,
2166				      struct netlink_ext_ack *extack)
2167{
2168	int i, err;
2169
2170	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2171		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2172		return -EINVAL;
2173	}
2174
2175	if (!netlink_strict_get_check(skb))
2176		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2177					      tb, NETCONFA_MAX,
2178					      devconf_ipv4_policy, extack);
2179
2180	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2181					    tb, NETCONFA_MAX,
2182					    devconf_ipv4_policy, extack);
2183	if (err)
2184		return err;
2185
2186	for (i = 0; i <= NETCONFA_MAX; i++) {
2187		if (!tb[i])
2188			continue;
2189
2190		switch (i) {
2191		case NETCONFA_IFINDEX:
2192			break;
2193		default:
2194			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2195			return -EINVAL;
2196		}
2197	}
2198
2199	return 0;
2200}
2201
2202static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2203				    struct nlmsghdr *nlh,
2204				    struct netlink_ext_ack *extack)
2205{
2206	struct net *net = sock_net(in_skb->sk);
2207	struct nlattr *tb[NETCONFA_MAX+1];
2208	struct sk_buff *skb;
2209	struct ipv4_devconf *devconf;
2210	struct in_device *in_dev;
2211	struct net_device *dev;
2212	int ifindex;
2213	int err;
2214
2215	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2216	if (err)
2217		goto errout;
2218
2219	err = -EINVAL;
2220	if (!tb[NETCONFA_IFINDEX])
2221		goto errout;
2222
2223	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2224	switch (ifindex) {
2225	case NETCONFA_IFINDEX_ALL:
2226		devconf = net->ipv4.devconf_all;
2227		break;
2228	case NETCONFA_IFINDEX_DEFAULT:
2229		devconf = net->ipv4.devconf_dflt;
2230		break;
2231	default:
2232		dev = __dev_get_by_index(net, ifindex);
2233		if (!dev)
2234			goto errout;
2235		in_dev = __in_dev_get_rtnl(dev);
2236		if (!in_dev)
2237			goto errout;
2238		devconf = &in_dev->cnf;
2239		break;
2240	}
2241
2242	err = -ENOBUFS;
2243	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2244	if (!skb)
2245		goto errout;
2246
2247	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2248					NETLINK_CB(in_skb).portid,
2249					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2250					NETCONFA_ALL);
2251	if (err < 0) {
2252		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2253		WARN_ON(err == -EMSGSIZE);
2254		kfree_skb(skb);
2255		goto errout;
2256	}
2257	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2258errout:
2259	return err;
2260}
2261
2262static int inet_netconf_dump_devconf(struct sk_buff *skb,
2263				     struct netlink_callback *cb)
2264{
2265	const struct nlmsghdr *nlh = cb->nlh;
2266	struct net *net = sock_net(skb->sk);
2267	int h, s_h;
2268	int idx, s_idx;
2269	struct net_device *dev;
2270	struct in_device *in_dev;
2271	struct hlist_head *head;
2272
2273	if (cb->strict_check) {
2274		struct netlink_ext_ack *extack = cb->extack;
2275		struct netconfmsg *ncm;
2276
2277		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2278			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2279			return -EINVAL;
2280		}
2281
2282		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2283			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2284			return -EINVAL;
2285		}
2286	}
2287
2288	s_h = cb->args[0];
2289	s_idx = idx = cb->args[1];
2290
2291	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2292		idx = 0;
2293		head = &net->dev_index_head[h];
2294		rcu_read_lock();
2295		cb->seq = inet_base_seq(net);
2296		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2297			if (idx < s_idx)
2298				goto cont;
2299			in_dev = __in_dev_get_rcu(dev);
2300			if (!in_dev)
2301				goto cont;
2302
2303			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2304						      &in_dev->cnf,
2305						      NETLINK_CB(cb->skb).portid,
2306						      nlh->nlmsg_seq,
2307						      RTM_NEWNETCONF,
2308						      NLM_F_MULTI,
2309						      NETCONFA_ALL) < 0) {
2310				rcu_read_unlock();
2311				goto done;
2312			}
2313			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2314cont:
2315			idx++;
2316		}
2317		rcu_read_unlock();
2318	}
2319	if (h == NETDEV_HASHENTRIES) {
2320		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2321					      net->ipv4.devconf_all,
2322					      NETLINK_CB(cb->skb).portid,
2323					      nlh->nlmsg_seq,
2324					      RTM_NEWNETCONF, NLM_F_MULTI,
2325					      NETCONFA_ALL) < 0)
2326			goto done;
2327		else
2328			h++;
2329	}
2330	if (h == NETDEV_HASHENTRIES + 1) {
2331		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2332					      net->ipv4.devconf_dflt,
2333					      NETLINK_CB(cb->skb).portid,
2334					      nlh->nlmsg_seq,
2335					      RTM_NEWNETCONF, NLM_F_MULTI,
2336					      NETCONFA_ALL) < 0)
2337			goto done;
2338		else
2339			h++;
2340	}
2341done:
2342	cb->args[0] = h;
2343	cb->args[1] = idx;
2344
2345	return skb->len;
2346}
2347
2348#ifdef CONFIG_SYSCTL
2349
2350static void devinet_copy_dflt_conf(struct net *net, int i)
2351{
2352	struct net_device *dev;
2353
2354	rcu_read_lock();
2355	for_each_netdev_rcu(net, dev) {
2356		struct in_device *in_dev;
2357
2358		in_dev = __in_dev_get_rcu(dev);
2359		if (in_dev && !test_bit(i, in_dev->cnf.state))
2360			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2361	}
2362	rcu_read_unlock();
2363}
2364
2365/* called with RTNL locked */
2366static void inet_forward_change(struct net *net)
2367{
2368	struct net_device *dev;
2369	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2370
2371	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2372	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2373	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2374				    NETCONFA_FORWARDING,
2375				    NETCONFA_IFINDEX_ALL,
2376				    net->ipv4.devconf_all);
2377	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2378				    NETCONFA_FORWARDING,
2379				    NETCONFA_IFINDEX_DEFAULT,
2380				    net->ipv4.devconf_dflt);
2381
2382	for_each_netdev(net, dev) {
2383		struct in_device *in_dev;
2384
2385		if (on)
2386			dev_disable_lro(dev);
2387
2388		in_dev = __in_dev_get_rtnl(dev);
2389		if (in_dev) {
2390			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2391			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2392						    NETCONFA_FORWARDING,
2393						    dev->ifindex, &in_dev->cnf);
2394		}
2395	}
2396}
2397
2398static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2399{
2400	if (cnf == net->ipv4.devconf_dflt)
2401		return NETCONFA_IFINDEX_DEFAULT;
2402	else if (cnf == net->ipv4.devconf_all)
2403		return NETCONFA_IFINDEX_ALL;
2404	else {
2405		struct in_device *idev
2406			= container_of(cnf, struct in_device, cnf);
2407		return idev->dev->ifindex;
2408	}
2409}
2410
2411static int devinet_conf_proc(struct ctl_table *ctl, int write,
2412			     void *buffer, size_t *lenp, loff_t *ppos)
2413{
2414	int old_value = *(int *)ctl->data;
2415	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2416	int new_value = *(int *)ctl->data;
2417
2418	if (write) {
2419		struct ipv4_devconf *cnf = ctl->extra1;
2420		struct net *net = ctl->extra2;
2421		int i = (int *)ctl->data - cnf->data;
2422		int ifindex;
2423
2424		set_bit(i, cnf->state);
2425
2426		if (cnf == net->ipv4.devconf_dflt)
2427			devinet_copy_dflt_conf(net, i);
2428		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2429		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2430			if ((new_value == 0) && (old_value != 0))
2431				rt_cache_flush(net);
2432
2433		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2434		    new_value != old_value)
2435			rt_cache_flush(net);
2436
2437		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2438		    new_value != old_value) {
2439			ifindex = devinet_conf_ifindex(net, cnf);
2440			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2441						    NETCONFA_RP_FILTER,
2442						    ifindex, cnf);
2443		}
2444		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2445		    new_value != old_value) {
2446			ifindex = devinet_conf_ifindex(net, cnf);
2447			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2448						    NETCONFA_PROXY_NEIGH,
2449						    ifindex, cnf);
2450		}
2451		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2452		    new_value != old_value) {
2453			ifindex = devinet_conf_ifindex(net, cnf);
2454			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2455						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2456						    ifindex, cnf);
2457		}
2458	}
2459
2460	return ret;
2461}
2462
2463static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2464				  void *buffer, size_t *lenp, loff_t *ppos)
2465{
2466	int *valp = ctl->data;
2467	int val = *valp;
2468	loff_t pos = *ppos;
2469	struct net *net = ctl->extra2;
2470	int ret;
2471
2472	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2473		return -EPERM;
2474
2475	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2476
2477	if (write && *valp != val) {
2478		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2479			if (!rtnl_trylock()) {
2480				/* Restore the original values before restarting */
2481				*valp = val;
2482				*ppos = pos;
2483				return restart_syscall();
2484			}
2485			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2486				inet_forward_change(net);
2487			} else {
2488				struct ipv4_devconf *cnf = ctl->extra1;
2489				struct in_device *idev =
2490					container_of(cnf, struct in_device, cnf);
2491				if (*valp)
2492					dev_disable_lro(idev->dev);
2493				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2494							    NETCONFA_FORWARDING,
2495							    idev->dev->ifindex,
2496							    cnf);
2497			}
2498			rtnl_unlock();
2499			rt_cache_flush(net);
2500		} else
2501			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2502						    NETCONFA_FORWARDING,
2503						    NETCONFA_IFINDEX_DEFAULT,
2504						    net->ipv4.devconf_dflt);
2505	}
2506
2507	return ret;
2508}
2509
2510static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2511				void *buffer, size_t *lenp, loff_t *ppos)
2512{
2513	int *valp = ctl->data;
2514	int val = *valp;
2515	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2516	struct net *net = ctl->extra2;
2517
2518	if (write && *valp != val)
2519		rt_cache_flush(net);
2520
2521	return ret;
2522}
2523
2524#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2525	{ \
2526		.procname	= name, \
2527		.data		= ipv4_devconf.data + \
2528				  IPV4_DEVCONF_ ## attr - 1, \
2529		.maxlen		= sizeof(int), \
2530		.mode		= mval, \
2531		.proc_handler	= proc, \
2532		.extra1		= &ipv4_devconf, \
2533	}
2534
2535#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2536	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2537
2538#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2539	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2540
2541#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2542	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2543
2544#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2545	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2546
2547static struct devinet_sysctl_table {
2548	struct ctl_table_header *sysctl_header;
2549	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2550} devinet_sysctl = {
2551	.devinet_vars = {
2552		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2553					     devinet_sysctl_forward),
2554		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2555		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2556
2557		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2558		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2559		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2560		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2561		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2562		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2563					"accept_source_route"),
2564		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2565		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2566		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2567		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2568		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2569		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2570		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2571		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2572		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2573		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2574		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2575		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2576		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2577					"arp_evict_nocarrier"),
2578		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2579		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2580					"force_igmp_version"),
2581		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2582					"igmpv2_unsolicited_report_interval"),
2583		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2584					"igmpv3_unsolicited_report_interval"),
2585		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2586					"ignore_routes_with_linkdown"),
2587		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2588					"drop_gratuitous_arp"),
2589
2590		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2591		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2592		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2593					      "promote_secondaries"),
2594		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2595					      "route_localnet"),
2596		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2597					      "drop_unicast_in_l2_multicast"),
2598	},
2599};
2600
2601static int __devinet_sysctl_register(struct net *net, char *dev_name,
2602				     int ifindex, struct ipv4_devconf *p)
2603{
2604	int i;
2605	struct devinet_sysctl_table *t;
2606	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2607
2608	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2609	if (!t)
2610		goto out;
2611
2612	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2613		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2614		t->devinet_vars[i].extra1 = p;
2615		t->devinet_vars[i].extra2 = net;
2616	}
2617
2618	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2619
2620	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2621	if (!t->sysctl_header)
2622		goto free;
2623
2624	p->sysctl = t;
2625
2626	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2627				    ifindex, p);
2628	return 0;
2629
2630free:
2631	kfree(t);
2632out:
2633	return -ENOMEM;
2634}
2635
2636static void __devinet_sysctl_unregister(struct net *net,
2637					struct ipv4_devconf *cnf, int ifindex)
2638{
2639	struct devinet_sysctl_table *t = cnf->sysctl;
2640
2641	if (t) {
2642		cnf->sysctl = NULL;
2643		unregister_net_sysctl_table(t->sysctl_header);
2644		kfree(t);
2645	}
2646
2647	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2648}
2649
2650static int devinet_sysctl_register(struct in_device *idev)
2651{
2652	int err;
2653
2654	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2655		return -EINVAL;
2656
2657	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2658	if (err)
2659		return err;
2660	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2661					idev->dev->ifindex, &idev->cnf);
2662	if (err)
2663		neigh_sysctl_unregister(idev->arp_parms);
2664	return err;
2665}
2666
2667static void devinet_sysctl_unregister(struct in_device *idev)
2668{
2669	struct net *net = dev_net(idev->dev);
2670
2671	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2672	neigh_sysctl_unregister(idev->arp_parms);
2673}
2674
2675static struct ctl_table ctl_forward_entry[] = {
2676	{
2677		.procname	= "ip_forward",
2678		.data		= &ipv4_devconf.data[
2679					IPV4_DEVCONF_FORWARDING - 1],
2680		.maxlen		= sizeof(int),
2681		.mode		= 0644,
2682		.proc_handler	= devinet_sysctl_forward,
2683		.extra1		= &ipv4_devconf,
2684		.extra2		= &init_net,
2685	},
2686	{ },
2687};
2688#endif
2689
2690static __net_init int devinet_init_net(struct net *net)
2691{
2692	int err;
2693	struct ipv4_devconf *all, *dflt;
2694#ifdef CONFIG_SYSCTL
2695	struct ctl_table *tbl;
2696	struct ctl_table_header *forw_hdr;
2697#endif
2698
2699	err = -ENOMEM;
2700	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2701	if (!all)
2702		goto err_alloc_all;
2703
2704	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2705	if (!dflt)
2706		goto err_alloc_dflt;
2707
2708#ifdef CONFIG_SYSCTL
2709	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2710	if (!tbl)
2711		goto err_alloc_ctl;
2712
2713	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2714	tbl[0].extra1 = all;
2715	tbl[0].extra2 = net;
2716#endif
2717
2718	if (!net_eq(net, &init_net)) {
2719		switch (net_inherit_devconf()) {
2720		case 3:
2721			/* copy from the current netns */
2722			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2723			       sizeof(ipv4_devconf));
2724			memcpy(dflt,
2725			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2726			       sizeof(ipv4_devconf_dflt));
2727			break;
2728		case 0:
2729		case 1:
2730			/* copy from init_net */
2731			memcpy(all, init_net.ipv4.devconf_all,
2732			       sizeof(ipv4_devconf));
2733			memcpy(dflt, init_net.ipv4.devconf_dflt,
2734			       sizeof(ipv4_devconf_dflt));
2735			break;
2736		case 2:
2737			/* use compiled values */
2738			break;
2739		}
2740	}
2741
2742#ifdef CONFIG_SYSCTL
2743	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2744	if (err < 0)
2745		goto err_reg_all;
2746
2747	err = __devinet_sysctl_register(net, "default",
2748					NETCONFA_IFINDEX_DEFAULT, dflt);
2749	if (err < 0)
2750		goto err_reg_dflt;
2751
2752	err = -ENOMEM;
2753	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2754					  ARRAY_SIZE(ctl_forward_entry));
2755	if (!forw_hdr)
2756		goto err_reg_ctl;
2757	net->ipv4.forw_hdr = forw_hdr;
2758#endif
2759
2760	net->ipv4.devconf_all = all;
2761	net->ipv4.devconf_dflt = dflt;
2762	return 0;
2763
2764#ifdef CONFIG_SYSCTL
2765err_reg_ctl:
2766	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2767err_reg_dflt:
2768	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2769err_reg_all:
2770	kfree(tbl);
2771err_alloc_ctl:
2772#endif
2773	kfree(dflt);
2774err_alloc_dflt:
2775	kfree(all);
2776err_alloc_all:
2777	return err;
2778}
2779
2780static __net_exit void devinet_exit_net(struct net *net)
2781{
2782#ifdef CONFIG_SYSCTL
2783	struct ctl_table *tbl;
2784
2785	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2786	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2787	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2788				    NETCONFA_IFINDEX_DEFAULT);
2789	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2790				    NETCONFA_IFINDEX_ALL);
2791	kfree(tbl);
2792#endif
2793	kfree(net->ipv4.devconf_dflt);
2794	kfree(net->ipv4.devconf_all);
2795}
2796
2797static __net_initdata struct pernet_operations devinet_ops = {
2798	.init = devinet_init_net,
2799	.exit = devinet_exit_net,
2800};
2801
2802static struct rtnl_af_ops inet_af_ops __read_mostly = {
2803	.family		  = AF_INET,
2804	.fill_link_af	  = inet_fill_link_af,
2805	.get_link_af_size = inet_get_link_af_size,
2806	.validate_link_af = inet_validate_link_af,
2807	.set_link_af	  = inet_set_link_af,
2808};
2809
2810void __init devinet_init(void)
2811{
2812	int i;
2813
2814	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2815		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2816
2817	register_pernet_subsys(&devinet_ops);
2818	register_netdevice_notifier(&ip_netdev_notifier);
2819
2820	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2821
2822	rtnl_af_register(&inet_af_ops);
2823
2824	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2825	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2826	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2827	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2828		      inet_netconf_dump_devconf, 0);
2829}
2830