1// SPDX-License-Identifier: GPL-2.0-or-later
2/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
3 */
4
5#include "ipvlan.h"
6
7static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval,
8				struct netlink_ext_ack *extack)
9{
10	struct ipvl_dev *ipvlan;
11	unsigned int flags;
12	int err;
13
14	ASSERT_RTNL();
15	if (port->mode != nval) {
16		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
17			flags = ipvlan->dev->flags;
18			if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) {
19				err = dev_change_flags(ipvlan->dev,
20						       flags | IFF_NOARP,
21						       extack);
22			} else {
23				err = dev_change_flags(ipvlan->dev,
24						       flags & ~IFF_NOARP,
25						       extack);
26			}
27			if (unlikely(err))
28				goto fail;
29		}
30		if (nval == IPVLAN_MODE_L3S) {
31			/* New mode is L3S */
32			err = ipvlan_l3s_register(port);
33			if (err)
34				goto fail;
35		} else if (port->mode == IPVLAN_MODE_L3S) {
36			/* Old mode was L3S */
37			ipvlan_l3s_unregister(port);
38		}
39		port->mode = nval;
40	}
41	return 0;
42
43fail:
44	/* Undo the flags changes that have been done so far. */
45	list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) {
46		flags = ipvlan->dev->flags;
47		if (port->mode == IPVLAN_MODE_L3 ||
48		    port->mode == IPVLAN_MODE_L3S)
49			dev_change_flags(ipvlan->dev, flags | IFF_NOARP,
50					 NULL);
51		else
52			dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP,
53					 NULL);
54	}
55
56	return err;
57}
58
59static int ipvlan_port_create(struct net_device *dev)
60{
61	struct ipvl_port *port;
62	int err, idx;
63
64	port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL);
65	if (!port)
66		return -ENOMEM;
67
68	write_pnet(&port->pnet, dev_net(dev));
69	port->dev = dev;
70	port->mode = IPVLAN_MODE_L3;
71	INIT_LIST_HEAD(&port->ipvlans);
72	for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
73		INIT_HLIST_HEAD(&port->hlhead[idx]);
74
75	skb_queue_head_init(&port->backlog);
76	INIT_WORK(&port->wq, ipvlan_process_multicast);
77	ida_init(&port->ida);
78	port->dev_id_start = 1;
79
80	err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port);
81	if (err)
82		goto err;
83
84	return 0;
85
86err:
87	kfree(port);
88	return err;
89}
90
91static void ipvlan_port_destroy(struct net_device *dev)
92{
93	struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
94	struct sk_buff *skb;
95
96	if (port->mode == IPVLAN_MODE_L3S)
97		ipvlan_l3s_unregister(port);
98	netdev_rx_handler_unregister(dev);
99	cancel_work_sync(&port->wq);
100	while ((skb = __skb_dequeue(&port->backlog)) != NULL) {
101		if (skb->dev)
102			dev_put(skb->dev);
103		kfree_skb(skb);
104	}
105	ida_destroy(&port->ida);
106	kfree(port);
107}
108
109#define IPVLAN_ALWAYS_ON_OFLOADS \
110	(NETIF_F_SG | NETIF_F_HW_CSUM | \
111	 NETIF_F_GSO_ROBUST | NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL)
112
113#define IPVLAN_ALWAYS_ON \
114	(IPVLAN_ALWAYS_ON_OFLOADS | NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED)
115
116#define IPVLAN_FEATURES \
117	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
118	 NETIF_F_GSO | NETIF_F_ALL_TSO | NETIF_F_GSO_ROBUST | \
119	 NETIF_F_GRO | NETIF_F_RXCSUM | \
120	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
121
122	/* NETIF_F_GSO_ENCAP_ALL NETIF_F_GSO_SOFTWARE Newly added */
123
124#define IPVLAN_STATE_MASK \
125	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
126
127static int ipvlan_init(struct net_device *dev)
128{
129	struct ipvl_dev *ipvlan = netdev_priv(dev);
130	struct net_device *phy_dev = ipvlan->phy_dev;
131	struct ipvl_port *port;
132	int err;
133
134	dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
135		     (phy_dev->state & IPVLAN_STATE_MASK);
136	dev->features = phy_dev->features & IPVLAN_FEATURES;
137	dev->features |= IPVLAN_ALWAYS_ON;
138	dev->vlan_features = phy_dev->vlan_features & IPVLAN_FEATURES;
139	dev->vlan_features |= IPVLAN_ALWAYS_ON_OFLOADS;
140	dev->hw_enc_features |= dev->features;
141	dev->gso_max_size = phy_dev->gso_max_size;
142	dev->gso_max_segs = phy_dev->gso_max_segs;
143	dev->hard_header_len = phy_dev->hard_header_len;
144
145	netdev_lockdep_set_classes(dev);
146
147	ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats);
148	if (!ipvlan->pcpu_stats)
149		return -ENOMEM;
150
151	if (!netif_is_ipvlan_port(phy_dev)) {
152		err = ipvlan_port_create(phy_dev);
153		if (err < 0) {
154			free_percpu(ipvlan->pcpu_stats);
155			return err;
156		}
157	}
158	port = ipvlan_port_get_rtnl(phy_dev);
159	port->count += 1;
160	return 0;
161}
162
163static void ipvlan_uninit(struct net_device *dev)
164{
165	struct ipvl_dev *ipvlan = netdev_priv(dev);
166	struct net_device *phy_dev = ipvlan->phy_dev;
167	struct ipvl_port *port;
168
169	free_percpu(ipvlan->pcpu_stats);
170
171	port = ipvlan_port_get_rtnl(phy_dev);
172	port->count -= 1;
173	if (!port->count)
174		ipvlan_port_destroy(port->dev);
175}
176
177static int ipvlan_open(struct net_device *dev)
178{
179	struct ipvl_dev *ipvlan = netdev_priv(dev);
180	struct ipvl_addr *addr;
181
182	if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
183	    ipvlan->port->mode == IPVLAN_MODE_L3S)
184		dev->flags |= IFF_NOARP;
185	else
186		dev->flags &= ~IFF_NOARP;
187
188	rcu_read_lock();
189	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
190		ipvlan_ht_addr_add(ipvlan, addr);
191	rcu_read_unlock();
192
193	return 0;
194}
195
196static int ipvlan_stop(struct net_device *dev)
197{
198	struct ipvl_dev *ipvlan = netdev_priv(dev);
199	struct net_device *phy_dev = ipvlan->phy_dev;
200	struct ipvl_addr *addr;
201
202	dev_uc_unsync(phy_dev, dev);
203	dev_mc_unsync(phy_dev, dev);
204
205	rcu_read_lock();
206	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
207		ipvlan_ht_addr_del(addr);
208	rcu_read_unlock();
209
210	return 0;
211}
212
213static netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb,
214				     struct net_device *dev)
215{
216	const struct ipvl_dev *ipvlan = netdev_priv(dev);
217	int skblen = skb->len;
218	int ret;
219
220	ret = ipvlan_queue_xmit(skb, dev);
221	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
222		struct ipvl_pcpu_stats *pcptr;
223
224		pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
225
226		u64_stats_update_begin(&pcptr->syncp);
227		pcptr->tx_pkts++;
228		pcptr->tx_bytes += skblen;
229		u64_stats_update_end(&pcptr->syncp);
230	} else {
231		this_cpu_inc(ipvlan->pcpu_stats->tx_drps);
232	}
233	return ret;
234}
235
236static netdev_features_t ipvlan_fix_features(struct net_device *dev,
237					     netdev_features_t features)
238{
239	struct ipvl_dev *ipvlan = netdev_priv(dev);
240
241	features |= NETIF_F_ALL_FOR_ALL;
242	features &= (ipvlan->sfeatures | ~IPVLAN_FEATURES);
243	features = netdev_increment_features(ipvlan->phy_dev->features,
244					     features, features);
245	features |= IPVLAN_ALWAYS_ON;
246	features &= (IPVLAN_FEATURES | IPVLAN_ALWAYS_ON);
247
248	return features;
249}
250
251static void ipvlan_change_rx_flags(struct net_device *dev, int change)
252{
253	struct ipvl_dev *ipvlan = netdev_priv(dev);
254	struct net_device *phy_dev = ipvlan->phy_dev;
255
256	if (change & IFF_ALLMULTI)
257		dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1);
258}
259
260static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
261{
262	struct ipvl_dev *ipvlan = netdev_priv(dev);
263
264	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
265		bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE);
266	} else {
267		struct netdev_hw_addr *ha;
268		DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE);
269
270		bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE);
271		netdev_for_each_mc_addr(ha, dev)
272			__set_bit(ipvlan_mac_hash(ha->addr), mc_filters);
273
274		/* Turn-on broadcast bit irrespective of address family,
275		 * since broadcast is deferred to a work-queue, hence no
276		 * impact on fast-path processing.
277		 */
278		__set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters);
279
280		bitmap_copy(ipvlan->mac_filters, mc_filters,
281			    IPVLAN_MAC_FILTER_SIZE);
282	}
283	dev_uc_sync(ipvlan->phy_dev, dev);
284	dev_mc_sync(ipvlan->phy_dev, dev);
285}
286
287static void ipvlan_get_stats64(struct net_device *dev,
288			       struct rtnl_link_stats64 *s)
289{
290	struct ipvl_dev *ipvlan = netdev_priv(dev);
291
292	if (ipvlan->pcpu_stats) {
293		struct ipvl_pcpu_stats *pcptr;
294		u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes;
295		u32 rx_errs = 0, tx_drps = 0;
296		u32 strt;
297		int idx;
298
299		for_each_possible_cpu(idx) {
300			pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx);
301			do {
302				strt= u64_stats_fetch_begin_irq(&pcptr->syncp);
303				rx_pkts = pcptr->rx_pkts;
304				rx_bytes = pcptr->rx_bytes;
305				rx_mcast = pcptr->rx_mcast;
306				tx_pkts = pcptr->tx_pkts;
307				tx_bytes = pcptr->tx_bytes;
308			} while (u64_stats_fetch_retry_irq(&pcptr->syncp,
309							   strt));
310
311			s->rx_packets += rx_pkts;
312			s->rx_bytes += rx_bytes;
313			s->multicast += rx_mcast;
314			s->tx_packets += tx_pkts;
315			s->tx_bytes += tx_bytes;
316
317			/* u32 values are updated without syncp protection. */
318			rx_errs += pcptr->rx_errs;
319			tx_drps += pcptr->tx_drps;
320		}
321		s->rx_errors = rx_errs;
322		s->rx_dropped = rx_errs;
323		s->tx_dropped = tx_drps;
324	}
325	s->tx_errors = DEV_STATS_READ(dev, tx_errors);
326}
327
328static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
329{
330	struct ipvl_dev *ipvlan = netdev_priv(dev);
331	struct net_device *phy_dev = ipvlan->phy_dev;
332
333	return vlan_vid_add(phy_dev, proto, vid);
334}
335
336static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
337				   u16 vid)
338{
339	struct ipvl_dev *ipvlan = netdev_priv(dev);
340	struct net_device *phy_dev = ipvlan->phy_dev;
341
342	vlan_vid_del(phy_dev, proto, vid);
343	return 0;
344}
345
346static int ipvlan_get_iflink(const struct net_device *dev)
347{
348	struct ipvl_dev *ipvlan = netdev_priv(dev);
349
350	return ipvlan->phy_dev->ifindex;
351}
352
353static const struct net_device_ops ipvlan_netdev_ops = {
354	.ndo_init		= ipvlan_init,
355	.ndo_uninit		= ipvlan_uninit,
356	.ndo_open		= ipvlan_open,
357	.ndo_stop		= ipvlan_stop,
358	.ndo_start_xmit		= ipvlan_start_xmit,
359	.ndo_fix_features	= ipvlan_fix_features,
360	.ndo_change_rx_flags	= ipvlan_change_rx_flags,
361	.ndo_set_rx_mode	= ipvlan_set_multicast_mac_filter,
362	.ndo_get_stats64	= ipvlan_get_stats64,
363	.ndo_vlan_rx_add_vid	= ipvlan_vlan_rx_add_vid,
364	.ndo_vlan_rx_kill_vid	= ipvlan_vlan_rx_kill_vid,
365	.ndo_get_iflink		= ipvlan_get_iflink,
366};
367
368static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
369			      unsigned short type, const void *daddr,
370			      const void *saddr, unsigned len)
371{
372	const struct ipvl_dev *ipvlan = netdev_priv(dev);
373	struct net_device *phy_dev = ipvlan->phy_dev;
374
375	/* TODO Probably use a different field than dev_addr so that the
376	 * mac-address on the virtual device is portable and can be carried
377	 * while the packets use the mac-addr on the physical device.
378	 */
379	return dev_hard_header(skb, phy_dev, type, daddr,
380			       saddr ? : phy_dev->dev_addr, len);
381}
382
383static const struct header_ops ipvlan_header_ops = {
384	.create  	= ipvlan_hard_header,
385	.parse		= eth_header_parse,
386	.cache		= eth_header_cache,
387	.cache_update	= eth_header_cache_update,
388};
389
390static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
391{
392	ipvlan->dev->mtu = dev->mtu;
393}
394
395static bool netif_is_ipvlan(const struct net_device *dev)
396{
397	/* both ipvlan and ipvtap devices use the same netdev_ops */
398	return dev->netdev_ops == &ipvlan_netdev_ops;
399}
400
401static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev,
402					     struct ethtool_link_ksettings *cmd)
403{
404	const struct ipvl_dev *ipvlan = netdev_priv(dev);
405
406	return __ethtool_get_link_ksettings(ipvlan->phy_dev, cmd);
407}
408
409static void ipvlan_ethtool_get_drvinfo(struct net_device *dev,
410				       struct ethtool_drvinfo *drvinfo)
411{
412	strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver));
413	strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version));
414}
415
416static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev)
417{
418	const struct ipvl_dev *ipvlan = netdev_priv(dev);
419
420	return ipvlan->msg_enable;
421}
422
423static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value)
424{
425	struct ipvl_dev *ipvlan = netdev_priv(dev);
426
427	ipvlan->msg_enable = value;
428}
429
430static const struct ethtool_ops ipvlan_ethtool_ops = {
431	.get_link	= ethtool_op_get_link,
432	.get_link_ksettings	= ipvlan_ethtool_get_link_ksettings,
433	.get_drvinfo	= ipvlan_ethtool_get_drvinfo,
434	.get_msglevel	= ipvlan_ethtool_get_msglevel,
435	.set_msglevel	= ipvlan_ethtool_set_msglevel,
436};
437
438static int ipvlan_nl_changelink(struct net_device *dev,
439				struct nlattr *tb[], struct nlattr *data[],
440				struct netlink_ext_ack *extack)
441{
442	struct ipvl_dev *ipvlan = netdev_priv(dev);
443	struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
444	int err = 0;
445
446	if (!data)
447		return 0;
448	if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN))
449		return -EPERM;
450
451	if (data[IFLA_IPVLAN_MODE]) {
452		u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
453
454		err = ipvlan_set_port_mode(port, nmode, extack);
455	}
456
457	if (!err && data[IFLA_IPVLAN_FLAGS]) {
458		u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
459
460		if (flags & IPVLAN_F_PRIVATE)
461			ipvlan_mark_private(port);
462		else
463			ipvlan_clear_private(port);
464
465		if (flags & IPVLAN_F_VEPA)
466			ipvlan_mark_vepa(port);
467		else
468			ipvlan_clear_vepa(port);
469	}
470
471	return err;
472}
473
474static size_t ipvlan_nl_getsize(const struct net_device *dev)
475{
476	return (0
477		+ nla_total_size(2) /* IFLA_IPVLAN_MODE */
478		+ nla_total_size(2) /* IFLA_IPVLAN_FLAGS */
479		);
480}
481
482static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[],
483			      struct netlink_ext_ack *extack)
484{
485	if (!data)
486		return 0;
487
488	if (data[IFLA_IPVLAN_MODE]) {
489		u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
490
491		if (mode >= IPVLAN_MODE_MAX)
492			return -EINVAL;
493	}
494	if (data[IFLA_IPVLAN_FLAGS]) {
495		u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
496
497		/* Only two bits are used at this moment. */
498		if (flags & ~(IPVLAN_F_PRIVATE | IPVLAN_F_VEPA))
499			return -EINVAL;
500		/* Also both flags can't be active at the same time. */
501		if ((flags & (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) ==
502		    (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA))
503			return -EINVAL;
504	}
505
506	return 0;
507}
508
509static int ipvlan_nl_fillinfo(struct sk_buff *skb,
510			      const struct net_device *dev)
511{
512	struct ipvl_dev *ipvlan = netdev_priv(dev);
513	struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
514	int ret = -EINVAL;
515
516	if (!port)
517		goto err;
518
519	ret = -EMSGSIZE;
520	if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode))
521		goto err;
522	if (nla_put_u16(skb, IFLA_IPVLAN_FLAGS, port->flags))
523		goto err;
524
525	return 0;
526
527err:
528	return ret;
529}
530
531int ipvlan_link_new(struct net *src_net, struct net_device *dev,
532		    struct nlattr *tb[], struct nlattr *data[],
533		    struct netlink_ext_ack *extack)
534{
535	struct ipvl_dev *ipvlan = netdev_priv(dev);
536	struct ipvl_port *port;
537	struct net_device *phy_dev;
538	int err;
539	u16 mode = IPVLAN_MODE_L3;
540
541	if (!tb[IFLA_LINK])
542		return -EINVAL;
543
544	phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
545	if (!phy_dev)
546		return -ENODEV;
547
548	if (netif_is_ipvlan(phy_dev)) {
549		struct ipvl_dev *tmp = netdev_priv(phy_dev);
550
551		phy_dev = tmp->phy_dev;
552		if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN))
553			return -EPERM;
554	} else if (!netif_is_ipvlan_port(phy_dev)) {
555		/* Exit early if the underlying link is invalid or busy */
556		if (phy_dev->type != ARPHRD_ETHER ||
557		    phy_dev->flags & IFF_LOOPBACK) {
558			netdev_err(phy_dev,
559				   "Master is either lo or non-ether device\n");
560			return -EINVAL;
561		}
562
563		if (netdev_is_rx_handler_busy(phy_dev)) {
564			netdev_err(phy_dev, "Device is already in use.\n");
565			return -EBUSY;
566		}
567	}
568
569	ipvlan->phy_dev = phy_dev;
570	ipvlan->dev = dev;
571	ipvlan->sfeatures = IPVLAN_FEATURES;
572	if (!tb[IFLA_MTU])
573		ipvlan_adjust_mtu(ipvlan, phy_dev);
574	INIT_LIST_HEAD(&ipvlan->addrs);
575	spin_lock_init(&ipvlan->addrs_lock);
576
577	/* TODO Probably put random address here to be presented to the
578	 * world but keep using the physical-dev address for the outgoing
579	 * packets.
580	 */
581	memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
582
583	dev->priv_flags |= IFF_NO_RX_HANDLER;
584
585	err = register_netdevice(dev);
586	if (err < 0)
587		return err;
588
589	/* ipvlan_init() would have created the port, if required */
590	port = ipvlan_port_get_rtnl(phy_dev);
591	ipvlan->port = port;
592
593	/* If the port-id base is at the MAX value, then wrap it around and
594	 * begin from 0x1 again. This may be due to a busy system where lots
595	 * of slaves are getting created and deleted.
596	 */
597	if (port->dev_id_start == 0xFFFE)
598		port->dev_id_start = 0x1;
599
600	/* Since L2 address is shared among all IPvlan slaves including
601	 * master, use unique 16 bit dev-ids to diffentiate among them.
602	 * Assign IDs between 0x1 and 0xFFFE (used by the master) to each
603	 * slave link [see addrconf_ifid_eui48()].
604	 */
605	err = ida_simple_get(&port->ida, port->dev_id_start, 0xFFFE,
606			     GFP_KERNEL);
607	if (err < 0)
608		err = ida_simple_get(&port->ida, 0x1, port->dev_id_start,
609				     GFP_KERNEL);
610	if (err < 0)
611		goto unregister_netdev;
612	dev->dev_id = err;
613
614	/* Increment id-base to the next slot for the future assignment */
615	port->dev_id_start = err + 1;
616
617	err = netdev_upper_dev_link(phy_dev, dev, extack);
618	if (err)
619		goto remove_ida;
620
621	/* Flags are per port and latest update overrides. User has
622	 * to be consistent in setting it just like the mode attribute.
623	 */
624	if (data && data[IFLA_IPVLAN_FLAGS])
625		port->flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
626
627	if (data && data[IFLA_IPVLAN_MODE])
628		mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
629
630	err = ipvlan_set_port_mode(port, mode, extack);
631	if (err)
632		goto unlink_netdev;
633
634	list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
635	netif_stacked_transfer_operstate(phy_dev, dev);
636	return 0;
637
638unlink_netdev:
639	netdev_upper_dev_unlink(phy_dev, dev);
640remove_ida:
641	ida_simple_remove(&port->ida, dev->dev_id);
642unregister_netdev:
643	unregister_netdevice(dev);
644	return err;
645}
646EXPORT_SYMBOL_GPL(ipvlan_link_new);
647
648void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
649{
650	struct ipvl_dev *ipvlan = netdev_priv(dev);
651	struct ipvl_addr *addr, *next;
652
653	spin_lock_bh(&ipvlan->addrs_lock);
654	list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
655		ipvlan_ht_addr_del(addr);
656		list_del_rcu(&addr->anode);
657		kfree_rcu(addr, rcu);
658	}
659	spin_unlock_bh(&ipvlan->addrs_lock);
660
661	ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
662	list_del_rcu(&ipvlan->pnode);
663	unregister_netdevice_queue(dev, head);
664	netdev_upper_dev_unlink(ipvlan->phy_dev, dev);
665}
666EXPORT_SYMBOL_GPL(ipvlan_link_delete);
667
668void ipvlan_link_setup(struct net_device *dev)
669{
670	ether_setup(dev);
671
672	dev->max_mtu = ETH_MAX_MTU;
673	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
674	dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
675	dev->netdev_ops = &ipvlan_netdev_ops;
676	dev->needs_free_netdev = true;
677	dev->header_ops = &ipvlan_header_ops;
678	dev->ethtool_ops = &ipvlan_ethtool_ops;
679}
680EXPORT_SYMBOL_GPL(ipvlan_link_setup);
681
682static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
683{
684	[IFLA_IPVLAN_MODE] = { .type = NLA_U16 },
685	[IFLA_IPVLAN_FLAGS] = { .type = NLA_U16 },
686};
687
688static struct net *ipvlan_get_link_net(const struct net_device *dev)
689{
690	struct ipvl_dev *ipvlan = netdev_priv(dev);
691
692	return dev_net(ipvlan->phy_dev);
693}
694
695static struct rtnl_link_ops ipvlan_link_ops = {
696	.kind		= "ipvlan",
697	.priv_size	= sizeof(struct ipvl_dev),
698
699	.setup		= ipvlan_link_setup,
700	.newlink	= ipvlan_link_new,
701	.dellink	= ipvlan_link_delete,
702	.get_link_net   = ipvlan_get_link_net,
703};
704
705int ipvlan_link_register(struct rtnl_link_ops *ops)
706{
707	ops->get_size	= ipvlan_nl_getsize;
708	ops->policy	= ipvlan_nl_policy;
709	ops->validate	= ipvlan_nl_validate;
710	ops->fill_info	= ipvlan_nl_fillinfo;
711	ops->changelink = ipvlan_nl_changelink;
712	ops->maxtype	= IFLA_IPVLAN_MAX;
713	return rtnl_link_register(ops);
714}
715EXPORT_SYMBOL_GPL(ipvlan_link_register);
716
717static int ipvlan_device_event(struct notifier_block *unused,
718			       unsigned long event, void *ptr)
719{
720	struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
721	struct netdev_notifier_pre_changeaddr_info *prechaddr_info;
722	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
723	struct ipvl_dev *ipvlan, *next;
724	struct ipvl_port *port;
725	LIST_HEAD(lst_kill);
726	int err;
727
728	if (!netif_is_ipvlan_port(dev))
729		return NOTIFY_DONE;
730
731	port = ipvlan_port_get_rtnl(dev);
732
733	switch (event) {
734	case NETDEV_CHANGE:
735		list_for_each_entry(ipvlan, &port->ipvlans, pnode)
736			netif_stacked_transfer_operstate(ipvlan->phy_dev,
737							 ipvlan->dev);
738		break;
739
740	case NETDEV_REGISTER: {
741		struct net *oldnet, *newnet = dev_net(dev);
742
743		oldnet = read_pnet(&port->pnet);
744		if (net_eq(newnet, oldnet))
745			break;
746
747		write_pnet(&port->pnet, newnet);
748
749		if (port->mode == IPVLAN_MODE_L3S)
750			ipvlan_migrate_l3s_hook(oldnet, newnet);
751		break;
752	}
753	case NETDEV_UNREGISTER:
754		if (dev->reg_state != NETREG_UNREGISTERING)
755			break;
756
757		list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
758			ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
759							    &lst_kill);
760		unregister_netdevice_many(&lst_kill);
761		break;
762
763	case NETDEV_FEAT_CHANGE:
764		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
765			ipvlan->dev->gso_max_size = dev->gso_max_size;
766			ipvlan->dev->gso_max_segs = dev->gso_max_segs;
767			netdev_update_features(ipvlan->dev);
768		}
769		break;
770
771	case NETDEV_CHANGEMTU:
772		list_for_each_entry(ipvlan, &port->ipvlans, pnode)
773			ipvlan_adjust_mtu(ipvlan, dev);
774		break;
775
776	case NETDEV_PRE_CHANGEADDR:
777		prechaddr_info = ptr;
778		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
779			err = dev_pre_changeaddr_notify(ipvlan->dev,
780						    prechaddr_info->dev_addr,
781						    extack);
782			if (err)
783				return notifier_from_errno(err);
784		}
785		break;
786
787	case NETDEV_CHANGEADDR:
788		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
789			ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr);
790			call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev);
791		}
792		break;
793
794	case NETDEV_PRE_TYPE_CHANGE:
795		/* Forbid underlying device to change its type. */
796		return NOTIFY_BAD;
797	}
798	return NOTIFY_DONE;
799}
800
801/* the caller must held the addrs lock */
802static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
803{
804	struct ipvl_addr *addr;
805
806	addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
807	if (!addr)
808		return -ENOMEM;
809
810	addr->master = ipvlan;
811	if (!is_v6) {
812		memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
813		addr->atype = IPVL_IPV4;
814#if IS_ENABLED(CONFIG_IPV6)
815	} else {
816		memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
817		addr->atype = IPVL_IPV6;
818#endif
819	}
820
821	list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
822
823	/* If the interface is not up, the address will be added to the hash
824	 * list by ipvlan_open.
825	 */
826	if (netif_running(ipvlan->dev))
827		ipvlan_ht_addr_add(ipvlan, addr);
828
829	return 0;
830}
831
832static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
833{
834	struct ipvl_addr *addr;
835
836	spin_lock_bh(&ipvlan->addrs_lock);
837	addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
838	if (!addr) {
839		spin_unlock_bh(&ipvlan->addrs_lock);
840		return;
841	}
842
843	ipvlan_ht_addr_del(addr);
844	list_del_rcu(&addr->anode);
845	spin_unlock_bh(&ipvlan->addrs_lock);
846	kfree_rcu(addr, rcu);
847}
848
849static bool ipvlan_is_valid_dev(const struct net_device *dev)
850{
851	struct ipvl_dev *ipvlan = netdev_priv(dev);
852
853	if (!netif_is_ipvlan(dev))
854		return false;
855
856	if (!ipvlan || !ipvlan->port)
857		return false;
858
859	return true;
860}
861
862#if IS_ENABLED(CONFIG_IPV6)
863static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
864{
865	int ret = -EINVAL;
866
867	spin_lock_bh(&ipvlan->addrs_lock);
868	if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
869		netif_err(ipvlan, ifup, ipvlan->dev,
870			  "Failed to add IPv6=%pI6c addr for %s intf\n",
871			  ip6_addr, ipvlan->dev->name);
872	else
873		ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
874	spin_unlock_bh(&ipvlan->addrs_lock);
875	return ret;
876}
877
878static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
879{
880	return ipvlan_del_addr(ipvlan, ip6_addr, true);
881}
882
883static int ipvlan_addr6_event(struct notifier_block *unused,
884			      unsigned long event, void *ptr)
885{
886	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr;
887	struct net_device *dev = (struct net_device *)if6->idev->dev;
888	struct ipvl_dev *ipvlan = netdev_priv(dev);
889
890	if (!ipvlan_is_valid_dev(dev))
891		return NOTIFY_DONE;
892
893	switch (event) {
894	case NETDEV_UP:
895		if (ipvlan_add_addr6(ipvlan, &if6->addr))
896			return NOTIFY_BAD;
897		break;
898
899	case NETDEV_DOWN:
900		ipvlan_del_addr6(ipvlan, &if6->addr);
901		break;
902	}
903
904	return NOTIFY_OK;
905}
906
907static int ipvlan_addr6_validator_event(struct notifier_block *unused,
908					unsigned long event, void *ptr)
909{
910	struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr;
911	struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
912	struct ipvl_dev *ipvlan = netdev_priv(dev);
913
914	if (!ipvlan_is_valid_dev(dev))
915		return NOTIFY_DONE;
916
917	switch (event) {
918	case NETDEV_UP:
919		if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) {
920			NL_SET_ERR_MSG(i6vi->extack,
921				       "Address already assigned to an ipvlan device");
922			return notifier_from_errno(-EADDRINUSE);
923		}
924		break;
925	}
926
927	return NOTIFY_OK;
928}
929#endif
930
931static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
932{
933	int ret = -EINVAL;
934
935	spin_lock_bh(&ipvlan->addrs_lock);
936	if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
937		netif_err(ipvlan, ifup, ipvlan->dev,
938			  "Failed to add IPv4=%pI4 on %s intf.\n",
939			  ip4_addr, ipvlan->dev->name);
940	else
941		ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
942	spin_unlock_bh(&ipvlan->addrs_lock);
943	return ret;
944}
945
946static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
947{
948	return ipvlan_del_addr(ipvlan, ip4_addr, false);
949}
950
951static int ipvlan_addr4_event(struct notifier_block *unused,
952			      unsigned long event, void *ptr)
953{
954	struct in_ifaddr *if4 = (struct in_ifaddr *)ptr;
955	struct net_device *dev = (struct net_device *)if4->ifa_dev->dev;
956	struct ipvl_dev *ipvlan = netdev_priv(dev);
957	struct in_addr ip4_addr;
958
959	if (!ipvlan_is_valid_dev(dev))
960		return NOTIFY_DONE;
961
962	switch (event) {
963	case NETDEV_UP:
964		ip4_addr.s_addr = if4->ifa_address;
965		if (ipvlan_add_addr4(ipvlan, &ip4_addr))
966			return NOTIFY_BAD;
967		break;
968
969	case NETDEV_DOWN:
970		ip4_addr.s_addr = if4->ifa_address;
971		ipvlan_del_addr4(ipvlan, &ip4_addr);
972		break;
973	}
974
975	return NOTIFY_OK;
976}
977
978static int ipvlan_addr4_validator_event(struct notifier_block *unused,
979					unsigned long event, void *ptr)
980{
981	struct in_validator_info *ivi = (struct in_validator_info *)ptr;
982	struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev;
983	struct ipvl_dev *ipvlan = netdev_priv(dev);
984
985	if (!ipvlan_is_valid_dev(dev))
986		return NOTIFY_DONE;
987
988	switch (event) {
989	case NETDEV_UP:
990		if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) {
991			NL_SET_ERR_MSG(ivi->extack,
992				       "Address already assigned to an ipvlan device");
993			return notifier_from_errno(-EADDRINUSE);
994		}
995		break;
996	}
997
998	return NOTIFY_OK;
999}
1000
1001static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = {
1002	.notifier_call = ipvlan_addr4_event,
1003};
1004
1005static struct notifier_block ipvlan_addr4_vtor_notifier_block __read_mostly = {
1006	.notifier_call = ipvlan_addr4_validator_event,
1007};
1008
1009static struct notifier_block ipvlan_notifier_block __read_mostly = {
1010	.notifier_call = ipvlan_device_event,
1011};
1012
1013#if IS_ENABLED(CONFIG_IPV6)
1014static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
1015	.notifier_call = ipvlan_addr6_event,
1016};
1017
1018static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
1019	.notifier_call = ipvlan_addr6_validator_event,
1020};
1021#endif
1022
1023static int __init ipvlan_init_module(void)
1024{
1025	int err;
1026
1027	ipvlan_init_secret();
1028	register_netdevice_notifier(&ipvlan_notifier_block);
1029#if IS_ENABLED(CONFIG_IPV6)
1030	register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1031	register_inet6addr_validator_notifier(
1032	    &ipvlan_addr6_vtor_notifier_block);
1033#endif
1034	register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1035	register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
1036
1037	err = ipvlan_l3s_init();
1038	if (err < 0)
1039		goto error;
1040
1041	err = ipvlan_link_register(&ipvlan_link_ops);
1042	if (err < 0) {
1043		ipvlan_l3s_cleanup();
1044		goto error;
1045	}
1046
1047	return 0;
1048error:
1049	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1050	unregister_inetaddr_validator_notifier(
1051	    &ipvlan_addr4_vtor_notifier_block);
1052#if IS_ENABLED(CONFIG_IPV6)
1053	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1054	unregister_inet6addr_validator_notifier(
1055	    &ipvlan_addr6_vtor_notifier_block);
1056#endif
1057	unregister_netdevice_notifier(&ipvlan_notifier_block);
1058	return err;
1059}
1060
1061static void __exit ipvlan_cleanup_module(void)
1062{
1063	rtnl_link_unregister(&ipvlan_link_ops);
1064	ipvlan_l3s_cleanup();
1065	unregister_netdevice_notifier(&ipvlan_notifier_block);
1066	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
1067	unregister_inetaddr_validator_notifier(
1068	    &ipvlan_addr4_vtor_notifier_block);
1069#if IS_ENABLED(CONFIG_IPV6)
1070	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
1071	unregister_inet6addr_validator_notifier(
1072	    &ipvlan_addr6_vtor_notifier_block);
1073#endif
1074}
1075
1076module_init(ipvlan_init_module);
1077module_exit(ipvlan_cleanup_module);
1078
1079MODULE_LICENSE("GPL");
1080MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>");
1081MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs");
1082MODULE_ALIAS_RTNL_LINK("ipvlan");
1083