xref: /kernel/linux/linux-5.10/net/ipv6/ip6mr.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *	Linux IPv6 multicast routing support for BSD pim6sd
4 *	Based on net/ipv4/ipmr.c.
5 *
6 *	(c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7 *		LSIIT Laboratory, Strasbourg, France
8 *	(c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 *		6WIND, Paris, France
10 *	Copyright (C)2007,2008 USAGI/WIDE Project
11 *		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 */
13
14#include <linux/uaccess.h>
15#include <linux/types.h>
16#include <linux/sched.h>
17#include <linux/errno.h>
18#include <linux/mm.h>
19#include <linux/kernel.h>
20#include <linux/fcntl.h>
21#include <linux/stat.h>
22#include <linux/socket.h>
23#include <linux/inet.h>
24#include <linux/netdevice.h>
25#include <linux/inetdevice.h>
26#include <linux/proc_fs.h>
27#include <linux/seq_file.h>
28#include <linux/init.h>
29#include <linux/compat.h>
30#include <linux/rhashtable.h>
31#include <net/protocol.h>
32#include <linux/skbuff.h>
33#include <net/raw.h>
34#include <linux/notifier.h>
35#include <linux/if_arp.h>
36#include <net/checksum.h>
37#include <net/netlink.h>
38#include <net/fib_rules.h>
39
40#include <net/ipv6.h>
41#include <net/ip6_route.h>
42#include <linux/mroute6.h>
43#include <linux/pim.h>
44#include <net/addrconf.h>
45#include <linux/netfilter_ipv6.h>
46#include <linux/export.h>
47#include <net/ip6_checksum.h>
48#include <linux/netconf.h>
49#include <net/ip_tunnels.h>
50
51#include <linux/nospec.h>
52
53struct ip6mr_rule {
54	struct fib_rule		common;
55};
56
57struct ip6mr_result {
58	struct mr_table	*mrt;
59};
60
61/* Big lock, protecting vif table, mrt cache and mroute socket state.
62   Note that the changes are semaphored via rtnl_lock.
63 */
64
65static DEFINE_RWLOCK(mrt_lock);
66
67/* Multicast router control variables */
68
69/* Special spinlock for queue of unresolved entries */
70static DEFINE_SPINLOCK(mfc_unres_lock);
71
72/* We return to original Alan's scheme. Hash table of resolved
73   entries is changed only in process context and protected
74   with weak lock mrt_lock. Queue of unresolved entries is protected
75   with strong spinlock mfc_unres_lock.
76
77   In this case data path is free of exclusive locks at all.
78 */
79
80static struct kmem_cache *mrt_cachep __read_mostly;
81
82static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
83static void ip6mr_free_table(struct mr_table *mrt);
84
85static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
86			   struct net_device *dev, struct sk_buff *skb,
87			   struct mfc6_cache *cache);
88static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
89			      mifi_t mifi, int assert);
90static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
91			      int cmd);
92static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
93static int ip6mr_rtm_dumproute(struct sk_buff *skb,
94			       struct netlink_callback *cb);
95static void mroute_clean_tables(struct mr_table *mrt, int flags);
96static void ipmr_expire_process(struct timer_list *t);
97
98#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
99#define ip6mr_for_each_table(mrt, net) \
100	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
101				lockdep_rtnl_is_held() || \
102				list_empty(&net->ipv6.mr6_tables))
103
104static struct mr_table *ip6mr_mr_table_iter(struct net *net,
105					    struct mr_table *mrt)
106{
107	struct mr_table *ret;
108
109	if (!mrt)
110		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
111				     struct mr_table, list);
112	else
113		ret = list_entry_rcu(mrt->list.next,
114				     struct mr_table, list);
115
116	if (&ret->list == &net->ipv6.mr6_tables)
117		return NULL;
118	return ret;
119}
120
121static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
122{
123	struct mr_table *mrt;
124
125	ip6mr_for_each_table(mrt, net) {
126		if (mrt->id == id)
127			return mrt;
128	}
129	return NULL;
130}
131
132static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
133			    struct mr_table **mrt)
134{
135	int err;
136	struct ip6mr_result res;
137	struct fib_lookup_arg arg = {
138		.result = &res,
139		.flags = FIB_LOOKUP_NOREF,
140	};
141
142	/* update flow if oif or iif point to device enslaved to l3mdev */
143	l3mdev_update_flow(net, flowi6_to_flowi(flp6));
144
145	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
146			       flowi6_to_flowi(flp6), 0, &arg);
147	if (err < 0)
148		return err;
149	*mrt = res.mrt;
150	return 0;
151}
152
153static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
154			     int flags, struct fib_lookup_arg *arg)
155{
156	struct ip6mr_result *res = arg->result;
157	struct mr_table *mrt;
158
159	switch (rule->action) {
160	case FR_ACT_TO_TBL:
161		break;
162	case FR_ACT_UNREACHABLE:
163		return -ENETUNREACH;
164	case FR_ACT_PROHIBIT:
165		return -EACCES;
166	case FR_ACT_BLACKHOLE:
167	default:
168		return -EINVAL;
169	}
170
171	arg->table = fib_rule_get_table(rule, arg);
172
173	mrt = ip6mr_get_table(rule->fr_net, arg->table);
174	if (!mrt)
175		return -EAGAIN;
176	res->mrt = mrt;
177	return 0;
178}
179
180static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
181{
182	return 1;
183}
184
185static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
186	FRA_GENERIC_POLICY,
187};
188
189static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
190				struct fib_rule_hdr *frh, struct nlattr **tb,
191				struct netlink_ext_ack *extack)
192{
193	return 0;
194}
195
196static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
197			      struct nlattr **tb)
198{
199	return 1;
200}
201
202static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
203			   struct fib_rule_hdr *frh)
204{
205	frh->dst_len = 0;
206	frh->src_len = 0;
207	frh->tos     = 0;
208	return 0;
209}
210
211static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
212	.family		= RTNL_FAMILY_IP6MR,
213	.rule_size	= sizeof(struct ip6mr_rule),
214	.addr_size	= sizeof(struct in6_addr),
215	.action		= ip6mr_rule_action,
216	.match		= ip6mr_rule_match,
217	.configure	= ip6mr_rule_configure,
218	.compare	= ip6mr_rule_compare,
219	.fill		= ip6mr_rule_fill,
220	.nlgroup	= RTNLGRP_IPV6_RULE,
221	.policy		= ip6mr_rule_policy,
222	.owner		= THIS_MODULE,
223};
224
225static int __net_init ip6mr_rules_init(struct net *net)
226{
227	struct fib_rules_ops *ops;
228	struct mr_table *mrt;
229	int err;
230
231	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
232	if (IS_ERR(ops))
233		return PTR_ERR(ops);
234
235	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
236
237	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
238	if (IS_ERR(mrt)) {
239		err = PTR_ERR(mrt);
240		goto err1;
241	}
242
243	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
244	if (err < 0)
245		goto err2;
246
247	net->ipv6.mr6_rules_ops = ops;
248	return 0;
249
250err2:
251	rtnl_lock();
252	ip6mr_free_table(mrt);
253	rtnl_unlock();
254err1:
255	fib_rules_unregister(ops);
256	return err;
257}
258
259static void __net_exit ip6mr_rules_exit(struct net *net)
260{
261	struct mr_table *mrt, *next;
262
263	rtnl_lock();
264	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
265		list_del(&mrt->list);
266		ip6mr_free_table(mrt);
267	}
268	fib_rules_unregister(net->ipv6.mr6_rules_ops);
269	rtnl_unlock();
270}
271
272static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
273			    struct netlink_ext_ack *extack)
274{
275	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
276}
277
278static unsigned int ip6mr_rules_seq_read(struct net *net)
279{
280	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
281}
282
283bool ip6mr_rule_default(const struct fib_rule *rule)
284{
285	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
286	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
287}
288EXPORT_SYMBOL(ip6mr_rule_default);
289#else
290#define ip6mr_for_each_table(mrt, net) \
291	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
292
293static struct mr_table *ip6mr_mr_table_iter(struct net *net,
294					    struct mr_table *mrt)
295{
296	if (!mrt)
297		return net->ipv6.mrt6;
298	return NULL;
299}
300
301static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
302{
303	return net->ipv6.mrt6;
304}
305
306static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
307			    struct mr_table **mrt)
308{
309	*mrt = net->ipv6.mrt6;
310	return 0;
311}
312
313static int __net_init ip6mr_rules_init(struct net *net)
314{
315	struct mr_table *mrt;
316
317	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
318	if (IS_ERR(mrt))
319		return PTR_ERR(mrt);
320	net->ipv6.mrt6 = mrt;
321	return 0;
322}
323
324static void __net_exit ip6mr_rules_exit(struct net *net)
325{
326	rtnl_lock();
327	ip6mr_free_table(net->ipv6.mrt6);
328	net->ipv6.mrt6 = NULL;
329	rtnl_unlock();
330}
331
332static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
333			    struct netlink_ext_ack *extack)
334{
335	return 0;
336}
337
338static unsigned int ip6mr_rules_seq_read(struct net *net)
339{
340	return 0;
341}
342#endif
343
344static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
345			  const void *ptr)
346{
347	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
348	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
349
350	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
351	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
352}
353
354static const struct rhashtable_params ip6mr_rht_params = {
355	.head_offset = offsetof(struct mr_mfc, mnode),
356	.key_offset = offsetof(struct mfc6_cache, cmparg),
357	.key_len = sizeof(struct mfc6_cache_cmp_arg),
358	.nelem_hint = 3,
359	.obj_cmpfn = ip6mr_hash_cmp,
360	.automatic_shrinking = true,
361};
362
363static void ip6mr_new_table_set(struct mr_table *mrt,
364				struct net *net)
365{
366#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
367	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
368#endif
369}
370
371static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
372	.mf6c_origin = IN6ADDR_ANY_INIT,
373	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
374};
375
376static struct mr_table_ops ip6mr_mr_table_ops = {
377	.rht_params = &ip6mr_rht_params,
378	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
379};
380
381static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
382{
383	struct mr_table *mrt;
384
385	mrt = ip6mr_get_table(net, id);
386	if (mrt)
387		return mrt;
388
389	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
390			      ipmr_expire_process, ip6mr_new_table_set);
391}
392
393static void ip6mr_free_table(struct mr_table *mrt)
394{
395	del_timer_sync(&mrt->ipmr_expire_timer);
396	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
397				 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
398	rhltable_destroy(&mrt->mfc_hash);
399	kfree(mrt);
400}
401
402#ifdef CONFIG_PROC_FS
403/* The /proc interfaces to multicast routing
404 * /proc/ip6_mr_cache /proc/ip6_mr_vif
405 */
406
407static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
408	__acquires(mrt_lock)
409{
410	struct mr_vif_iter *iter = seq->private;
411	struct net *net = seq_file_net(seq);
412	struct mr_table *mrt;
413
414	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
415	if (!mrt)
416		return ERR_PTR(-ENOENT);
417
418	iter->mrt = mrt;
419
420	read_lock(&mrt_lock);
421	return mr_vif_seq_start(seq, pos);
422}
423
424static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
425	__releases(mrt_lock)
426{
427	read_unlock(&mrt_lock);
428}
429
430static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
431{
432	struct mr_vif_iter *iter = seq->private;
433	struct mr_table *mrt = iter->mrt;
434
435	if (v == SEQ_START_TOKEN) {
436		seq_puts(seq,
437			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
438	} else {
439		const struct vif_device *vif = v;
440		const char *name = vif->dev ? vif->dev->name : "none";
441
442		seq_printf(seq,
443			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
444			   vif - mrt->vif_table,
445			   name, vif->bytes_in, vif->pkt_in,
446			   vif->bytes_out, vif->pkt_out,
447			   vif->flags);
448	}
449	return 0;
450}
451
452static const struct seq_operations ip6mr_vif_seq_ops = {
453	.start = ip6mr_vif_seq_start,
454	.next  = mr_vif_seq_next,
455	.stop  = ip6mr_vif_seq_stop,
456	.show  = ip6mr_vif_seq_show,
457};
458
459static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
460{
461	struct net *net = seq_file_net(seq);
462	struct mr_table *mrt;
463
464	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
465	if (!mrt)
466		return ERR_PTR(-ENOENT);
467
468	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
469}
470
471static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
472{
473	int n;
474
475	if (v == SEQ_START_TOKEN) {
476		seq_puts(seq,
477			 "Group                            "
478			 "Origin                           "
479			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
480	} else {
481		const struct mfc6_cache *mfc = v;
482		const struct mr_mfc_iter *it = seq->private;
483		struct mr_table *mrt = it->mrt;
484
485		seq_printf(seq, "%pI6 %pI6 %-3hd",
486			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
487			   mfc->_c.mfc_parent);
488
489		if (it->cache != &mrt->mfc_unres_queue) {
490			seq_printf(seq, " %8lu %8lu %8lu",
491				   mfc->_c.mfc_un.res.pkt,
492				   mfc->_c.mfc_un.res.bytes,
493				   mfc->_c.mfc_un.res.wrong_if);
494			for (n = mfc->_c.mfc_un.res.minvif;
495			     n < mfc->_c.mfc_un.res.maxvif; n++) {
496				if (VIF_EXISTS(mrt, n) &&
497				    mfc->_c.mfc_un.res.ttls[n] < 255)
498					seq_printf(seq,
499						   " %2d:%-3d", n,
500						   mfc->_c.mfc_un.res.ttls[n]);
501			}
502		} else {
503			/* unresolved mfc_caches don't contain
504			 * pkt, bytes and wrong_if values
505			 */
506			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
507		}
508		seq_putc(seq, '\n');
509	}
510	return 0;
511}
512
513static const struct seq_operations ipmr_mfc_seq_ops = {
514	.start = ipmr_mfc_seq_start,
515	.next  = mr_mfc_seq_next,
516	.stop  = mr_mfc_seq_stop,
517	.show  = ipmr_mfc_seq_show,
518};
519#endif
520
521#ifdef CONFIG_IPV6_PIMSM_V2
522
523static int pim6_rcv(struct sk_buff *skb)
524{
525	struct pimreghdr *pim;
526	struct ipv6hdr   *encap;
527	struct net_device  *reg_dev = NULL;
528	struct net *net = dev_net(skb->dev);
529	struct mr_table *mrt;
530	struct flowi6 fl6 = {
531		.flowi6_iif	= skb->dev->ifindex,
532		.flowi6_mark	= skb->mark,
533	};
534	int reg_vif_num;
535
536	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
537		goto drop;
538
539	pim = (struct pimreghdr *)skb_transport_header(skb);
540	if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
541	    (pim->flags & PIM_NULL_REGISTER) ||
542	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
543			     sizeof(*pim), IPPROTO_PIM,
544			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
545	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
546		goto drop;
547
548	/* check if the inner packet is destined to mcast group */
549	encap = (struct ipv6hdr *)(skb_transport_header(skb) +
550				   sizeof(*pim));
551
552	if (!ipv6_addr_is_multicast(&encap->daddr) ||
553	    encap->payload_len == 0 ||
554	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
555		goto drop;
556
557	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
558		goto drop;
559	reg_vif_num = mrt->mroute_reg_vif_num;
560
561	read_lock(&mrt_lock);
562	if (reg_vif_num >= 0)
563		reg_dev = mrt->vif_table[reg_vif_num].dev;
564	if (reg_dev)
565		dev_hold(reg_dev);
566	read_unlock(&mrt_lock);
567
568	if (!reg_dev)
569		goto drop;
570
571	skb->mac_header = skb->network_header;
572	skb_pull(skb, (u8 *)encap - skb->data);
573	skb_reset_network_header(skb);
574	skb->protocol = htons(ETH_P_IPV6);
575	skb->ip_summed = CHECKSUM_NONE;
576
577	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
578
579	netif_rx(skb);
580
581	dev_put(reg_dev);
582	return 0;
583 drop:
584	kfree_skb(skb);
585	return 0;
586}
587
588static const struct inet6_protocol pim6_protocol = {
589	.handler	=	pim6_rcv,
590};
591
592/* Service routines creating virtual interfaces: PIMREG */
593
594static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
595				      struct net_device *dev)
596{
597	struct net *net = dev_net(dev);
598	struct mr_table *mrt;
599	struct flowi6 fl6 = {
600		.flowi6_oif	= dev->ifindex,
601		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
602		.flowi6_mark	= skb->mark,
603	};
604
605	if (!pskb_inet_may_pull(skb))
606		goto tx_err;
607
608	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
609		goto tx_err;
610
611	read_lock(&mrt_lock);
612	dev->stats.tx_bytes += skb->len;
613	dev->stats.tx_packets++;
614	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
615	read_unlock(&mrt_lock);
616	kfree_skb(skb);
617	return NETDEV_TX_OK;
618
619tx_err:
620	dev->stats.tx_errors++;
621	kfree_skb(skb);
622	return NETDEV_TX_OK;
623}
624
625static int reg_vif_get_iflink(const struct net_device *dev)
626{
627	return 0;
628}
629
630static const struct net_device_ops reg_vif_netdev_ops = {
631	.ndo_start_xmit	= reg_vif_xmit,
632	.ndo_get_iflink = reg_vif_get_iflink,
633};
634
635static void reg_vif_setup(struct net_device *dev)
636{
637	dev->type		= ARPHRD_PIMREG;
638	dev->mtu		= 1500 - sizeof(struct ipv6hdr) - 8;
639	dev->flags		= IFF_NOARP;
640	dev->netdev_ops		= &reg_vif_netdev_ops;
641	dev->needs_free_netdev	= true;
642	dev->features		|= NETIF_F_NETNS_LOCAL;
643}
644
645static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
646{
647	struct net_device *dev;
648	char name[IFNAMSIZ];
649
650	if (mrt->id == RT6_TABLE_DFLT)
651		sprintf(name, "pim6reg");
652	else
653		sprintf(name, "pim6reg%u", mrt->id);
654
655	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
656	if (!dev)
657		return NULL;
658
659	dev_net_set(dev, net);
660
661	if (register_netdevice(dev)) {
662		free_netdev(dev);
663		return NULL;
664	}
665
666	if (dev_open(dev, NULL))
667		goto failure;
668
669	dev_hold(dev);
670	return dev;
671
672failure:
673	unregister_netdevice(dev);
674	return NULL;
675}
676#endif
677
678static int call_ip6mr_vif_entry_notifiers(struct net *net,
679					  enum fib_event_type event_type,
680					  struct vif_device *vif,
681					  mifi_t vif_index, u32 tb_id)
682{
683	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
684				     vif, vif_index, tb_id,
685				     &net->ipv6.ipmr_seq);
686}
687
688static int call_ip6mr_mfc_entry_notifiers(struct net *net,
689					  enum fib_event_type event_type,
690					  struct mfc6_cache *mfc, u32 tb_id)
691{
692	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
693				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
694}
695
696/* Delete a VIF entry */
697static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
698		       struct list_head *head)
699{
700	struct vif_device *v;
701	struct net_device *dev;
702	struct inet6_dev *in6_dev;
703
704	if (vifi < 0 || vifi >= mrt->maxvif)
705		return -EADDRNOTAVAIL;
706
707	v = &mrt->vif_table[vifi];
708
709	if (VIF_EXISTS(mrt, vifi))
710		call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
711					       FIB_EVENT_VIF_DEL, v, vifi,
712					       mrt->id);
713
714	write_lock_bh(&mrt_lock);
715	dev = v->dev;
716	v->dev = NULL;
717
718	if (!dev) {
719		write_unlock_bh(&mrt_lock);
720		return -EADDRNOTAVAIL;
721	}
722
723#ifdef CONFIG_IPV6_PIMSM_V2
724	if (vifi == mrt->mroute_reg_vif_num)
725		mrt->mroute_reg_vif_num = -1;
726#endif
727
728	if (vifi + 1 == mrt->maxvif) {
729		int tmp;
730		for (tmp = vifi - 1; tmp >= 0; tmp--) {
731			if (VIF_EXISTS(mrt, tmp))
732				break;
733		}
734		mrt->maxvif = tmp + 1;
735	}
736
737	write_unlock_bh(&mrt_lock);
738
739	dev_set_allmulti(dev, -1);
740
741	in6_dev = __in6_dev_get(dev);
742	if (in6_dev) {
743		atomic_dec(&in6_dev->cnf.mc_forwarding);
744		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
745					     NETCONFA_MC_FORWARDING,
746					     dev->ifindex, &in6_dev->cnf);
747	}
748
749	if ((v->flags & MIFF_REGISTER) && !notify)
750		unregister_netdevice_queue(dev, head);
751
752	dev_put(dev);
753	return 0;
754}
755
756static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
757{
758	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
759
760	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
761}
762
763static inline void ip6mr_cache_free(struct mfc6_cache *c)
764{
765	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
766}
767
768/* Destroy an unresolved cache entry, killing queued skbs
769   and reporting error to netlink readers.
770 */
771
772static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
773{
774	struct net *net = read_pnet(&mrt->net);
775	struct sk_buff *skb;
776
777	atomic_dec(&mrt->cache_resolve_queue_len);
778
779	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
780		if (ipv6_hdr(skb)->version == 0) {
781			struct nlmsghdr *nlh = skb_pull(skb,
782							sizeof(struct ipv6hdr));
783			nlh->nlmsg_type = NLMSG_ERROR;
784			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
785			skb_trim(skb, nlh->nlmsg_len);
786			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
787			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
788		} else
789			kfree_skb(skb);
790	}
791
792	ip6mr_cache_free(c);
793}
794
795
796/* Timer process for all the unresolved queue. */
797
798static void ipmr_do_expire_process(struct mr_table *mrt)
799{
800	unsigned long now = jiffies;
801	unsigned long expires = 10 * HZ;
802	struct mr_mfc *c, *next;
803
804	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
805		if (time_after(c->mfc_un.unres.expires, now)) {
806			/* not yet... */
807			unsigned long interval = c->mfc_un.unres.expires - now;
808			if (interval < expires)
809				expires = interval;
810			continue;
811		}
812
813		list_del(&c->list);
814		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
815		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
816	}
817
818	if (!list_empty(&mrt->mfc_unres_queue))
819		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
820}
821
822static void ipmr_expire_process(struct timer_list *t)
823{
824	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
825
826	if (!spin_trylock(&mfc_unres_lock)) {
827		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
828		return;
829	}
830
831	if (!list_empty(&mrt->mfc_unres_queue))
832		ipmr_do_expire_process(mrt);
833
834	spin_unlock(&mfc_unres_lock);
835}
836
837/* Fill oifs list. It is called under write locked mrt_lock. */
838
839static void ip6mr_update_thresholds(struct mr_table *mrt,
840				    struct mr_mfc *cache,
841				    unsigned char *ttls)
842{
843	int vifi;
844
845	cache->mfc_un.res.minvif = MAXMIFS;
846	cache->mfc_un.res.maxvif = 0;
847	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
848
849	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
850		if (VIF_EXISTS(mrt, vifi) &&
851		    ttls[vifi] && ttls[vifi] < 255) {
852			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
853			if (cache->mfc_un.res.minvif > vifi)
854				cache->mfc_un.res.minvif = vifi;
855			if (cache->mfc_un.res.maxvif <= vifi)
856				cache->mfc_un.res.maxvif = vifi + 1;
857		}
858	}
859	cache->mfc_un.res.lastuse = jiffies;
860}
861
862static int mif6_add(struct net *net, struct mr_table *mrt,
863		    struct mif6ctl *vifc, int mrtsock)
864{
865	int vifi = vifc->mif6c_mifi;
866	struct vif_device *v = &mrt->vif_table[vifi];
867	struct net_device *dev;
868	struct inet6_dev *in6_dev;
869	int err;
870
871	/* Is vif busy ? */
872	if (VIF_EXISTS(mrt, vifi))
873		return -EADDRINUSE;
874
875	switch (vifc->mif6c_flags) {
876#ifdef CONFIG_IPV6_PIMSM_V2
877	case MIFF_REGISTER:
878		/*
879		 * Special Purpose VIF in PIM
880		 * All the packets will be sent to the daemon
881		 */
882		if (mrt->mroute_reg_vif_num >= 0)
883			return -EADDRINUSE;
884		dev = ip6mr_reg_vif(net, mrt);
885		if (!dev)
886			return -ENOBUFS;
887		err = dev_set_allmulti(dev, 1);
888		if (err) {
889			unregister_netdevice(dev);
890			dev_put(dev);
891			return err;
892		}
893		break;
894#endif
895	case 0:
896		dev = dev_get_by_index(net, vifc->mif6c_pifi);
897		if (!dev)
898			return -EADDRNOTAVAIL;
899		err = dev_set_allmulti(dev, 1);
900		if (err) {
901			dev_put(dev);
902			return err;
903		}
904		break;
905	default:
906		return -EINVAL;
907	}
908
909	in6_dev = __in6_dev_get(dev);
910	if (in6_dev) {
911		atomic_inc(&in6_dev->cnf.mc_forwarding);
912		inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
913					     NETCONFA_MC_FORWARDING,
914					     dev->ifindex, &in6_dev->cnf);
915	}
916
917	/* Fill in the VIF structures */
918	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
919			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
920			MIFF_REGISTER);
921
922	/* And finish update writing critical data */
923	write_lock_bh(&mrt_lock);
924	v->dev = dev;
925#ifdef CONFIG_IPV6_PIMSM_V2
926	if (v->flags & MIFF_REGISTER)
927		mrt->mroute_reg_vif_num = vifi;
928#endif
929	if (vifi + 1 > mrt->maxvif)
930		mrt->maxvif = vifi + 1;
931	write_unlock_bh(&mrt_lock);
932	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
933				       v, vifi, mrt->id);
934	return 0;
935}
936
937static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
938					   const struct in6_addr *origin,
939					   const struct in6_addr *mcastgrp)
940{
941	struct mfc6_cache_cmp_arg arg = {
942		.mf6c_origin = *origin,
943		.mf6c_mcastgrp = *mcastgrp,
944	};
945
946	return mr_mfc_find(mrt, &arg);
947}
948
949/* Look for a (*,G) entry */
950static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
951					       struct in6_addr *mcastgrp,
952					       mifi_t mifi)
953{
954	struct mfc6_cache_cmp_arg arg = {
955		.mf6c_origin = in6addr_any,
956		.mf6c_mcastgrp = *mcastgrp,
957	};
958
959	if (ipv6_addr_any(mcastgrp))
960		return mr_mfc_find_any_parent(mrt, mifi);
961	return mr_mfc_find_any(mrt, mifi, &arg);
962}
963
964/* Look for a (S,G,iif) entry if parent != -1 */
965static struct mfc6_cache *
966ip6mr_cache_find_parent(struct mr_table *mrt,
967			const struct in6_addr *origin,
968			const struct in6_addr *mcastgrp,
969			int parent)
970{
971	struct mfc6_cache_cmp_arg arg = {
972		.mf6c_origin = *origin,
973		.mf6c_mcastgrp = *mcastgrp,
974	};
975
976	return mr_mfc_find_parent(mrt, &arg, parent);
977}
978
979/* Allocate a multicast cache entry */
980static struct mfc6_cache *ip6mr_cache_alloc(void)
981{
982	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
983	if (!c)
984		return NULL;
985	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
986	c->_c.mfc_un.res.minvif = MAXMIFS;
987	c->_c.free = ip6mr_cache_free_rcu;
988	refcount_set(&c->_c.mfc_un.res.refcount, 1);
989	return c;
990}
991
992static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
993{
994	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
995	if (!c)
996		return NULL;
997	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
998	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
999	return c;
1000}
1001
1002/*
1003 *	A cache entry has gone into a resolved state from queued
1004 */
1005
1006static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1007				struct mfc6_cache *uc, struct mfc6_cache *c)
1008{
1009	struct sk_buff *skb;
1010
1011	/*
1012	 *	Play the pending entries through our router
1013	 */
1014
1015	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1016		if (ipv6_hdr(skb)->version == 0) {
1017			struct nlmsghdr *nlh = skb_pull(skb,
1018							sizeof(struct ipv6hdr));
1019
1020			if (mr_fill_mroute(mrt, skb, &c->_c,
1021					   nlmsg_data(nlh)) > 0) {
1022				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1023			} else {
1024				nlh->nlmsg_type = NLMSG_ERROR;
1025				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1026				skb_trim(skb, nlh->nlmsg_len);
1027				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1028			}
1029			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1030		} else
1031			ip6_mr_forward(net, mrt, skb->dev, skb, c);
1032	}
1033}
1034
1035/*
1036 *	Bounce a cache query up to pim6sd and netlink.
1037 *
1038 *	Called under mrt_lock.
1039 */
1040
1041static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1042			      mifi_t mifi, int assert)
1043{
1044	struct sock *mroute6_sk;
1045	struct sk_buff *skb;
1046	struct mrt6msg *msg;
1047	int ret;
1048
1049#ifdef CONFIG_IPV6_PIMSM_V2
1050	if (assert == MRT6MSG_WHOLEPKT)
1051		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1052						+sizeof(*msg));
1053	else
1054#endif
1055		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1056
1057	if (!skb)
1058		return -ENOBUFS;
1059
1060	/* I suppose that internal messages
1061	 * do not require checksums */
1062
1063	skb->ip_summed = CHECKSUM_UNNECESSARY;
1064
1065#ifdef CONFIG_IPV6_PIMSM_V2
1066	if (assert == MRT6MSG_WHOLEPKT) {
1067		/* Ugly, but we have no choice with this interface.
1068		   Duplicate old header, fix length etc.
1069		   And all this only to mangle msg->im6_msgtype and
1070		   to set msg->im6_mbz to "mbz" :-)
1071		 */
1072		__skb_pull(skb, skb_network_offset(pkt));
1073
1074		skb_push(skb, sizeof(*msg));
1075		skb_reset_transport_header(skb);
1076		msg = (struct mrt6msg *)skb_transport_header(skb);
1077		msg->im6_mbz = 0;
1078		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1079		msg->im6_mif = mrt->mroute_reg_vif_num;
1080		msg->im6_pad = 0;
1081		msg->im6_src = ipv6_hdr(pkt)->saddr;
1082		msg->im6_dst = ipv6_hdr(pkt)->daddr;
1083
1084		skb->ip_summed = CHECKSUM_UNNECESSARY;
1085	} else
1086#endif
1087	{
1088	/*
1089	 *	Copy the IP header
1090	 */
1091
1092	skb_put(skb, sizeof(struct ipv6hdr));
1093	skb_reset_network_header(skb);
1094	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1095
1096	/*
1097	 *	Add our header
1098	 */
1099	skb_put(skb, sizeof(*msg));
1100	skb_reset_transport_header(skb);
1101	msg = (struct mrt6msg *)skb_transport_header(skb);
1102
1103	msg->im6_mbz = 0;
1104	msg->im6_msgtype = assert;
1105	msg->im6_mif = mifi;
1106	msg->im6_pad = 0;
1107	msg->im6_src = ipv6_hdr(pkt)->saddr;
1108	msg->im6_dst = ipv6_hdr(pkt)->daddr;
1109
1110	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1111	skb->ip_summed = CHECKSUM_UNNECESSARY;
1112	}
1113
1114	rcu_read_lock();
1115	mroute6_sk = rcu_dereference(mrt->mroute_sk);
1116	if (!mroute6_sk) {
1117		rcu_read_unlock();
1118		kfree_skb(skb);
1119		return -EINVAL;
1120	}
1121
1122	mrt6msg_netlink_event(mrt, skb);
1123
1124	/* Deliver to user space multicast routing algorithms */
1125	ret = sock_queue_rcv_skb(mroute6_sk, skb);
1126	rcu_read_unlock();
1127	if (ret < 0) {
1128		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1129		kfree_skb(skb);
1130	}
1131
1132	return ret;
1133}
1134
1135/* Queue a packet for resolution. It gets locked cache entry! */
1136static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1137				  struct sk_buff *skb, struct net_device *dev)
1138{
1139	struct mfc6_cache *c;
1140	bool found = false;
1141	int err;
1142
1143	spin_lock_bh(&mfc_unres_lock);
1144	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1145		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1146		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1147			found = true;
1148			break;
1149		}
1150	}
1151
1152	if (!found) {
1153		/*
1154		 *	Create a new entry if allowable
1155		 */
1156
1157		c = ip6mr_cache_alloc_unres();
1158		if (!c) {
1159			spin_unlock_bh(&mfc_unres_lock);
1160
1161			kfree_skb(skb);
1162			return -ENOBUFS;
1163		}
1164
1165		/* Fill in the new cache entry */
1166		c->_c.mfc_parent = -1;
1167		c->mf6c_origin = ipv6_hdr(skb)->saddr;
1168		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1169
1170		/*
1171		 *	Reflect first query at pim6sd
1172		 */
1173		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1174		if (err < 0) {
1175			/* If the report failed throw the cache entry
1176			   out - Brad Parker
1177			 */
1178			spin_unlock_bh(&mfc_unres_lock);
1179
1180			ip6mr_cache_free(c);
1181			kfree_skb(skb);
1182			return err;
1183		}
1184
1185		atomic_inc(&mrt->cache_resolve_queue_len);
1186		list_add(&c->_c.list, &mrt->mfc_unres_queue);
1187		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1188
1189		ipmr_do_expire_process(mrt);
1190	}
1191
1192	/* See if we can append the packet */
1193	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1194		kfree_skb(skb);
1195		err = -ENOBUFS;
1196	} else {
1197		if (dev) {
1198			skb->dev = dev;
1199			skb->skb_iif = dev->ifindex;
1200		}
1201		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1202		err = 0;
1203	}
1204
1205	spin_unlock_bh(&mfc_unres_lock);
1206	return err;
1207}
1208
1209/*
1210 *	MFC6 cache manipulation by user space
1211 */
1212
1213static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1214			    int parent)
1215{
1216	struct mfc6_cache *c;
1217
1218	/* The entries are added/deleted only under RTNL */
1219	rcu_read_lock();
1220	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1221				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1222	rcu_read_unlock();
1223	if (!c)
1224		return -ENOENT;
1225	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1226	list_del_rcu(&c->_c.list);
1227
1228	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1229				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1230	mr6_netlink_event(mrt, c, RTM_DELROUTE);
1231	mr_cache_put(&c->_c);
1232	return 0;
1233}
1234
1235static int ip6mr_device_event(struct notifier_block *this,
1236			      unsigned long event, void *ptr)
1237{
1238	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1239	struct net *net = dev_net(dev);
1240	struct mr_table *mrt;
1241	struct vif_device *v;
1242	int ct;
1243
1244	if (event != NETDEV_UNREGISTER)
1245		return NOTIFY_DONE;
1246
1247	ip6mr_for_each_table(mrt, net) {
1248		v = &mrt->vif_table[0];
1249		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1250			if (v->dev == dev)
1251				mif6_delete(mrt, ct, 1, NULL);
1252		}
1253	}
1254
1255	return NOTIFY_DONE;
1256}
1257
1258static unsigned int ip6mr_seq_read(struct net *net)
1259{
1260	ASSERT_RTNL();
1261
1262	return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1263}
1264
1265static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1266		      struct netlink_ext_ack *extack)
1267{
1268	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1269		       ip6mr_mr_table_iter, &mrt_lock, extack);
1270}
1271
1272static struct notifier_block ip6_mr_notifier = {
1273	.notifier_call = ip6mr_device_event
1274};
1275
1276static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1277	.family		= RTNL_FAMILY_IP6MR,
1278	.fib_seq_read	= ip6mr_seq_read,
1279	.fib_dump	= ip6mr_dump,
1280	.owner		= THIS_MODULE,
1281};
1282
1283static int __net_init ip6mr_notifier_init(struct net *net)
1284{
1285	struct fib_notifier_ops *ops;
1286
1287	net->ipv6.ipmr_seq = 0;
1288
1289	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1290	if (IS_ERR(ops))
1291		return PTR_ERR(ops);
1292
1293	net->ipv6.ip6mr_notifier_ops = ops;
1294
1295	return 0;
1296}
1297
1298static void __net_exit ip6mr_notifier_exit(struct net *net)
1299{
1300	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1301	net->ipv6.ip6mr_notifier_ops = NULL;
1302}
1303
1304/* Setup for IP multicast routing */
1305static int __net_init ip6mr_net_init(struct net *net)
1306{
1307	int err;
1308
1309	err = ip6mr_notifier_init(net);
1310	if (err)
1311		return err;
1312
1313	err = ip6mr_rules_init(net);
1314	if (err < 0)
1315		goto ip6mr_rules_fail;
1316
1317#ifdef CONFIG_PROC_FS
1318	err = -ENOMEM;
1319	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1320			sizeof(struct mr_vif_iter)))
1321		goto proc_vif_fail;
1322	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1323			sizeof(struct mr_mfc_iter)))
1324		goto proc_cache_fail;
1325#endif
1326
1327	return 0;
1328
1329#ifdef CONFIG_PROC_FS
1330proc_cache_fail:
1331	remove_proc_entry("ip6_mr_vif", net->proc_net);
1332proc_vif_fail:
1333	ip6mr_rules_exit(net);
1334#endif
1335ip6mr_rules_fail:
1336	ip6mr_notifier_exit(net);
1337	return err;
1338}
1339
1340static void __net_exit ip6mr_net_exit(struct net *net)
1341{
1342#ifdef CONFIG_PROC_FS
1343	remove_proc_entry("ip6_mr_cache", net->proc_net);
1344	remove_proc_entry("ip6_mr_vif", net->proc_net);
1345#endif
1346	ip6mr_rules_exit(net);
1347	ip6mr_notifier_exit(net);
1348}
1349
1350static struct pernet_operations ip6mr_net_ops = {
1351	.init = ip6mr_net_init,
1352	.exit = ip6mr_net_exit,
1353};
1354
1355int __init ip6_mr_init(void)
1356{
1357	int err;
1358
1359	mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1360				       sizeof(struct mfc6_cache),
1361				       0, SLAB_HWCACHE_ALIGN,
1362				       NULL);
1363	if (!mrt_cachep)
1364		return -ENOMEM;
1365
1366	err = register_pernet_subsys(&ip6mr_net_ops);
1367	if (err)
1368		goto reg_pernet_fail;
1369
1370	err = register_netdevice_notifier(&ip6_mr_notifier);
1371	if (err)
1372		goto reg_notif_fail;
1373#ifdef CONFIG_IPV6_PIMSM_V2
1374	if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1375		pr_err("%s: can't add PIM protocol\n", __func__);
1376		err = -EAGAIN;
1377		goto add_proto_fail;
1378	}
1379#endif
1380	err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1381				   NULL, ip6mr_rtm_dumproute, 0);
1382	if (err == 0)
1383		return 0;
1384
1385#ifdef CONFIG_IPV6_PIMSM_V2
1386	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1387add_proto_fail:
1388	unregister_netdevice_notifier(&ip6_mr_notifier);
1389#endif
1390reg_notif_fail:
1391	unregister_pernet_subsys(&ip6mr_net_ops);
1392reg_pernet_fail:
1393	kmem_cache_destroy(mrt_cachep);
1394	return err;
1395}
1396
1397void ip6_mr_cleanup(void)
1398{
1399	rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1400#ifdef CONFIG_IPV6_PIMSM_V2
1401	inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1402#endif
1403	unregister_netdevice_notifier(&ip6_mr_notifier);
1404	unregister_pernet_subsys(&ip6mr_net_ops);
1405	kmem_cache_destroy(mrt_cachep);
1406}
1407
1408static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1409			 struct mf6cctl *mfc, int mrtsock, int parent)
1410{
1411	unsigned char ttls[MAXMIFS];
1412	struct mfc6_cache *uc, *c;
1413	struct mr_mfc *_uc;
1414	bool found;
1415	int i, err;
1416
1417	if (mfc->mf6cc_parent >= MAXMIFS)
1418		return -ENFILE;
1419
1420	memset(ttls, 255, MAXMIFS);
1421	for (i = 0; i < MAXMIFS; i++) {
1422		if (IF_ISSET(i, &mfc->mf6cc_ifset))
1423			ttls[i] = 1;
1424	}
1425
1426	/* The entries are added/deleted only under RTNL */
1427	rcu_read_lock();
1428	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1429				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1430	rcu_read_unlock();
1431	if (c) {
1432		write_lock_bh(&mrt_lock);
1433		c->_c.mfc_parent = mfc->mf6cc_parent;
1434		ip6mr_update_thresholds(mrt, &c->_c, ttls);
1435		if (!mrtsock)
1436			c->_c.mfc_flags |= MFC_STATIC;
1437		write_unlock_bh(&mrt_lock);
1438		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1439					       c, mrt->id);
1440		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1441		return 0;
1442	}
1443
1444	if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1445	    !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1446		return -EINVAL;
1447
1448	c = ip6mr_cache_alloc();
1449	if (!c)
1450		return -ENOMEM;
1451
1452	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1453	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1454	c->_c.mfc_parent = mfc->mf6cc_parent;
1455	ip6mr_update_thresholds(mrt, &c->_c, ttls);
1456	if (!mrtsock)
1457		c->_c.mfc_flags |= MFC_STATIC;
1458
1459	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1460				  ip6mr_rht_params);
1461	if (err) {
1462		pr_err("ip6mr: rhtable insert error %d\n", err);
1463		ip6mr_cache_free(c);
1464		return err;
1465	}
1466	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1467
1468	/* Check to see if we resolved a queued list. If so we
1469	 * need to send on the frames and tidy up.
1470	 */
1471	found = false;
1472	spin_lock_bh(&mfc_unres_lock);
1473	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1474		uc = (struct mfc6_cache *)_uc;
1475		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1476		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1477			list_del(&_uc->list);
1478			atomic_dec(&mrt->cache_resolve_queue_len);
1479			found = true;
1480			break;
1481		}
1482	}
1483	if (list_empty(&mrt->mfc_unres_queue))
1484		del_timer(&mrt->ipmr_expire_timer);
1485	spin_unlock_bh(&mfc_unres_lock);
1486
1487	if (found) {
1488		ip6mr_cache_resolve(net, mrt, uc, c);
1489		ip6mr_cache_free(uc);
1490	}
1491	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1492				       c, mrt->id);
1493	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1494	return 0;
1495}
1496
1497/*
1498 *	Close the multicast socket, and clear the vif tables etc
1499 */
1500
1501static void mroute_clean_tables(struct mr_table *mrt, int flags)
1502{
1503	struct mr_mfc *c, *tmp;
1504	LIST_HEAD(list);
1505	int i;
1506
1507	/* Shut down all active vif entries */
1508	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1509		for (i = 0; i < mrt->maxvif; i++) {
1510			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1511			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1512			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1513				continue;
1514			mif6_delete(mrt, i, 0, &list);
1515		}
1516		unregister_netdevice_many(&list);
1517	}
1518
1519	/* Wipe the cache */
1520	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1521		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1522			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1523			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1524				continue;
1525			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1526			list_del_rcu(&c->list);
1527			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1528						       FIB_EVENT_ENTRY_DEL,
1529						       (struct mfc6_cache *)c, mrt->id);
1530			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1531			mr_cache_put(c);
1532		}
1533	}
1534
1535	if (flags & MRT6_FLUSH_MFC) {
1536		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1537			spin_lock_bh(&mfc_unres_lock);
1538			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1539				list_del(&c->list);
1540				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1541						  RTM_DELROUTE);
1542				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1543			}
1544			spin_unlock_bh(&mfc_unres_lock);
1545		}
1546	}
1547}
1548
1549static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1550{
1551	int err = 0;
1552	struct net *net = sock_net(sk);
1553
1554	rtnl_lock();
1555	write_lock_bh(&mrt_lock);
1556	if (rtnl_dereference(mrt->mroute_sk)) {
1557		err = -EADDRINUSE;
1558	} else {
1559		rcu_assign_pointer(mrt->mroute_sk, sk);
1560		sock_set_flag(sk, SOCK_RCU_FREE);
1561		atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1562	}
1563	write_unlock_bh(&mrt_lock);
1564
1565	if (!err)
1566		inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1567					     NETCONFA_MC_FORWARDING,
1568					     NETCONFA_IFINDEX_ALL,
1569					     net->ipv6.devconf_all);
1570	rtnl_unlock();
1571
1572	return err;
1573}
1574
1575int ip6mr_sk_done(struct sock *sk)
1576{
1577	int err = -EACCES;
1578	struct net *net = sock_net(sk);
1579	struct mr_table *mrt;
1580
1581	if (sk->sk_type != SOCK_RAW ||
1582	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1583		return err;
1584
1585	rtnl_lock();
1586	ip6mr_for_each_table(mrt, net) {
1587		if (sk == rtnl_dereference(mrt->mroute_sk)) {
1588			write_lock_bh(&mrt_lock);
1589			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1590			/* Note that mroute_sk had SOCK_RCU_FREE set,
1591			 * so the RCU grace period before sk freeing
1592			 * is guaranteed by sk_destruct()
1593			 */
1594			atomic_dec(&net->ipv6.devconf_all->mc_forwarding);
1595			write_unlock_bh(&mrt_lock);
1596			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1597						     NETCONFA_MC_FORWARDING,
1598						     NETCONFA_IFINDEX_ALL,
1599						     net->ipv6.devconf_all);
1600
1601			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1602			err = 0;
1603			break;
1604		}
1605	}
1606	rtnl_unlock();
1607
1608	return err;
1609}
1610
1611bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1612{
1613	struct mr_table *mrt;
1614	struct flowi6 fl6 = {
1615		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
1616		.flowi6_oif	= skb->dev->ifindex,
1617		.flowi6_mark	= skb->mark,
1618	};
1619
1620	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1621		return NULL;
1622
1623	return rcu_access_pointer(mrt->mroute_sk);
1624}
1625EXPORT_SYMBOL(mroute6_is_socket);
1626
1627/*
1628 *	Socket options and virtual interface manipulation. The whole
1629 *	virtual interface system is a complete heap, but unfortunately
1630 *	that's how BSD mrouted happens to think. Maybe one day with a proper
1631 *	MOSPF/PIM router set up we can clean this up.
1632 */
1633
1634int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1635			  unsigned int optlen)
1636{
1637	int ret, parent = 0;
1638	struct mif6ctl vif;
1639	struct mf6cctl mfc;
1640	mifi_t mifi;
1641	struct net *net = sock_net(sk);
1642	struct mr_table *mrt;
1643
1644	if (sk->sk_type != SOCK_RAW ||
1645	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1646		return -EOPNOTSUPP;
1647
1648	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1649	if (!mrt)
1650		return -ENOENT;
1651
1652	if (optname != MRT6_INIT) {
1653		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1654		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1655			return -EACCES;
1656	}
1657
1658	switch (optname) {
1659	case MRT6_INIT:
1660		if (optlen < sizeof(int))
1661			return -EINVAL;
1662
1663		return ip6mr_sk_init(mrt, sk);
1664
1665	case MRT6_DONE:
1666		return ip6mr_sk_done(sk);
1667
1668	case MRT6_ADD_MIF:
1669		if (optlen < sizeof(vif))
1670			return -EINVAL;
1671		if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1672			return -EFAULT;
1673		if (vif.mif6c_mifi >= MAXMIFS)
1674			return -ENFILE;
1675		rtnl_lock();
1676		ret = mif6_add(net, mrt, &vif,
1677			       sk == rtnl_dereference(mrt->mroute_sk));
1678		rtnl_unlock();
1679		return ret;
1680
1681	case MRT6_DEL_MIF:
1682		if (optlen < sizeof(mifi_t))
1683			return -EINVAL;
1684		if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1685			return -EFAULT;
1686		rtnl_lock();
1687		ret = mif6_delete(mrt, mifi, 0, NULL);
1688		rtnl_unlock();
1689		return ret;
1690
1691	/*
1692	 *	Manipulate the forwarding caches. These live
1693	 *	in a sort of kernel/user symbiosis.
1694	 */
1695	case MRT6_ADD_MFC:
1696	case MRT6_DEL_MFC:
1697		parent = -1;
1698		fallthrough;
1699	case MRT6_ADD_MFC_PROXY:
1700	case MRT6_DEL_MFC_PROXY:
1701		if (optlen < sizeof(mfc))
1702			return -EINVAL;
1703		if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1704			return -EFAULT;
1705		if (parent == 0)
1706			parent = mfc.mf6cc_parent;
1707		rtnl_lock();
1708		if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1709			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1710		else
1711			ret = ip6mr_mfc_add(net, mrt, &mfc,
1712					    sk ==
1713					    rtnl_dereference(mrt->mroute_sk),
1714					    parent);
1715		rtnl_unlock();
1716		return ret;
1717
1718	case MRT6_FLUSH:
1719	{
1720		int flags;
1721
1722		if (optlen != sizeof(flags))
1723			return -EINVAL;
1724		if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1725			return -EFAULT;
1726		rtnl_lock();
1727		mroute_clean_tables(mrt, flags);
1728		rtnl_unlock();
1729		return 0;
1730	}
1731
1732	/*
1733	 *	Control PIM assert (to activate pim will activate assert)
1734	 */
1735	case MRT6_ASSERT:
1736	{
1737		int v;
1738
1739		if (optlen != sizeof(v))
1740			return -EINVAL;
1741		if (copy_from_sockptr(&v, optval, sizeof(v)))
1742			return -EFAULT;
1743		mrt->mroute_do_assert = v;
1744		return 0;
1745	}
1746
1747#ifdef CONFIG_IPV6_PIMSM_V2
1748	case MRT6_PIM:
1749	{
1750		int v;
1751
1752		if (optlen != sizeof(v))
1753			return -EINVAL;
1754		if (copy_from_sockptr(&v, optval, sizeof(v)))
1755			return -EFAULT;
1756		v = !!v;
1757		rtnl_lock();
1758		ret = 0;
1759		if (v != mrt->mroute_do_pim) {
1760			mrt->mroute_do_pim = v;
1761			mrt->mroute_do_assert = v;
1762		}
1763		rtnl_unlock();
1764		return ret;
1765	}
1766
1767#endif
1768#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1769	case MRT6_TABLE:
1770	{
1771		u32 v;
1772
1773		if (optlen != sizeof(u32))
1774			return -EINVAL;
1775		if (copy_from_sockptr(&v, optval, sizeof(v)))
1776			return -EFAULT;
1777		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1778		if (v != RT_TABLE_DEFAULT && v >= 100000000)
1779			return -EINVAL;
1780		if (sk == rcu_access_pointer(mrt->mroute_sk))
1781			return -EBUSY;
1782
1783		rtnl_lock();
1784		ret = 0;
1785		mrt = ip6mr_new_table(net, v);
1786		if (IS_ERR(mrt))
1787			ret = PTR_ERR(mrt);
1788		else
1789			raw6_sk(sk)->ip6mr_table = v;
1790		rtnl_unlock();
1791		return ret;
1792	}
1793#endif
1794	/*
1795	 *	Spurious command, or MRT6_VERSION which you cannot
1796	 *	set.
1797	 */
1798	default:
1799		return -ENOPROTOOPT;
1800	}
1801}
1802
1803/*
1804 *	Getsock opt support for the multicast routing system.
1805 */
1806
1807int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1808			  int __user *optlen)
1809{
1810	int olr;
1811	int val;
1812	struct net *net = sock_net(sk);
1813	struct mr_table *mrt;
1814
1815	if (sk->sk_type != SOCK_RAW ||
1816	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1817		return -EOPNOTSUPP;
1818
1819	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1820	if (!mrt)
1821		return -ENOENT;
1822
1823	switch (optname) {
1824	case MRT6_VERSION:
1825		val = 0x0305;
1826		break;
1827#ifdef CONFIG_IPV6_PIMSM_V2
1828	case MRT6_PIM:
1829		val = mrt->mroute_do_pim;
1830		break;
1831#endif
1832	case MRT6_ASSERT:
1833		val = mrt->mroute_do_assert;
1834		break;
1835	default:
1836		return -ENOPROTOOPT;
1837	}
1838
1839	if (get_user(olr, optlen))
1840		return -EFAULT;
1841
1842	olr = min_t(int, olr, sizeof(int));
1843	if (olr < 0)
1844		return -EINVAL;
1845
1846	if (put_user(olr, optlen))
1847		return -EFAULT;
1848	if (copy_to_user(optval, &val, olr))
1849		return -EFAULT;
1850	return 0;
1851}
1852
1853/*
1854 *	The IP multicast ioctl support routines.
1855 */
1856
1857int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1858{
1859	struct sioc_sg_req6 sr;
1860	struct sioc_mif_req6 vr;
1861	struct vif_device *vif;
1862	struct mfc6_cache *c;
1863	struct net *net = sock_net(sk);
1864	struct mr_table *mrt;
1865
1866	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1867	if (!mrt)
1868		return -ENOENT;
1869
1870	switch (cmd) {
1871	case SIOCGETMIFCNT_IN6:
1872		if (copy_from_user(&vr, arg, sizeof(vr)))
1873			return -EFAULT;
1874		if (vr.mifi >= mrt->maxvif)
1875			return -EINVAL;
1876		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1877		read_lock(&mrt_lock);
1878		vif = &mrt->vif_table[vr.mifi];
1879		if (VIF_EXISTS(mrt, vr.mifi)) {
1880			vr.icount = vif->pkt_in;
1881			vr.ocount = vif->pkt_out;
1882			vr.ibytes = vif->bytes_in;
1883			vr.obytes = vif->bytes_out;
1884			read_unlock(&mrt_lock);
1885
1886			if (copy_to_user(arg, &vr, sizeof(vr)))
1887				return -EFAULT;
1888			return 0;
1889		}
1890		read_unlock(&mrt_lock);
1891		return -EADDRNOTAVAIL;
1892	case SIOCGETSGCNT_IN6:
1893		if (copy_from_user(&sr, arg, sizeof(sr)))
1894			return -EFAULT;
1895
1896		rcu_read_lock();
1897		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1898		if (c) {
1899			sr.pktcnt = c->_c.mfc_un.res.pkt;
1900			sr.bytecnt = c->_c.mfc_un.res.bytes;
1901			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1902			rcu_read_unlock();
1903
1904			if (copy_to_user(arg, &sr, sizeof(sr)))
1905				return -EFAULT;
1906			return 0;
1907		}
1908		rcu_read_unlock();
1909		return -EADDRNOTAVAIL;
1910	default:
1911		return -ENOIOCTLCMD;
1912	}
1913}
1914
1915#ifdef CONFIG_COMPAT
1916struct compat_sioc_sg_req6 {
1917	struct sockaddr_in6 src;
1918	struct sockaddr_in6 grp;
1919	compat_ulong_t pktcnt;
1920	compat_ulong_t bytecnt;
1921	compat_ulong_t wrong_if;
1922};
1923
1924struct compat_sioc_mif_req6 {
1925	mifi_t	mifi;
1926	compat_ulong_t icount;
1927	compat_ulong_t ocount;
1928	compat_ulong_t ibytes;
1929	compat_ulong_t obytes;
1930};
1931
1932int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1933{
1934	struct compat_sioc_sg_req6 sr;
1935	struct compat_sioc_mif_req6 vr;
1936	struct vif_device *vif;
1937	struct mfc6_cache *c;
1938	struct net *net = sock_net(sk);
1939	struct mr_table *mrt;
1940
1941	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1942	if (!mrt)
1943		return -ENOENT;
1944
1945	switch (cmd) {
1946	case SIOCGETMIFCNT_IN6:
1947		if (copy_from_user(&vr, arg, sizeof(vr)))
1948			return -EFAULT;
1949		if (vr.mifi >= mrt->maxvif)
1950			return -EINVAL;
1951		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1952		read_lock(&mrt_lock);
1953		vif = &mrt->vif_table[vr.mifi];
1954		if (VIF_EXISTS(mrt, vr.mifi)) {
1955			vr.icount = vif->pkt_in;
1956			vr.ocount = vif->pkt_out;
1957			vr.ibytes = vif->bytes_in;
1958			vr.obytes = vif->bytes_out;
1959			read_unlock(&mrt_lock);
1960
1961			if (copy_to_user(arg, &vr, sizeof(vr)))
1962				return -EFAULT;
1963			return 0;
1964		}
1965		read_unlock(&mrt_lock);
1966		return -EADDRNOTAVAIL;
1967	case SIOCGETSGCNT_IN6:
1968		if (copy_from_user(&sr, arg, sizeof(sr)))
1969			return -EFAULT;
1970
1971		rcu_read_lock();
1972		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1973		if (c) {
1974			sr.pktcnt = c->_c.mfc_un.res.pkt;
1975			sr.bytecnt = c->_c.mfc_un.res.bytes;
1976			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1977			rcu_read_unlock();
1978
1979			if (copy_to_user(arg, &sr, sizeof(sr)))
1980				return -EFAULT;
1981			return 0;
1982		}
1983		rcu_read_unlock();
1984		return -EADDRNOTAVAIL;
1985	default:
1986		return -ENOIOCTLCMD;
1987	}
1988}
1989#endif
1990
1991static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1992{
1993	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1994		      IPSTATS_MIB_OUTFORWDATAGRAMS);
1995	IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1996		      IPSTATS_MIB_OUTOCTETS, skb->len);
1997	return dst_output(net, sk, skb);
1998}
1999
2000/*
2001 *	Processing handlers for ip6mr_forward
2002 */
2003
2004static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2005			  struct sk_buff *skb, int vifi)
2006{
2007	struct ipv6hdr *ipv6h;
2008	struct vif_device *vif = &mrt->vif_table[vifi];
2009	struct net_device *dev;
2010	struct dst_entry *dst;
2011	struct flowi6 fl6;
2012
2013	if (!vif->dev)
2014		goto out_free;
2015
2016#ifdef CONFIG_IPV6_PIMSM_V2
2017	if (vif->flags & MIFF_REGISTER) {
2018		vif->pkt_out++;
2019		vif->bytes_out += skb->len;
2020		vif->dev->stats.tx_bytes += skb->len;
2021		vif->dev->stats.tx_packets++;
2022		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2023		goto out_free;
2024	}
2025#endif
2026
2027	ipv6h = ipv6_hdr(skb);
2028
2029	fl6 = (struct flowi6) {
2030		.flowi6_oif = vif->link,
2031		.daddr = ipv6h->daddr,
2032	};
2033
2034	dst = ip6_route_output(net, NULL, &fl6);
2035	if (dst->error) {
2036		dst_release(dst);
2037		goto out_free;
2038	}
2039
2040	skb_dst_drop(skb);
2041	skb_dst_set(skb, dst);
2042
2043	/*
2044	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2045	 * not only before forwarding, but after forwarding on all output
2046	 * interfaces. It is clear, if mrouter runs a multicasting
2047	 * program, it should receive packets not depending to what interface
2048	 * program is joined.
2049	 * If we will not make it, the program will have to join on all
2050	 * interfaces. On the other hand, multihoming host (or router, but
2051	 * not mrouter) cannot join to more than one interface - it will
2052	 * result in receiving multiple packets.
2053	 */
2054	dev = vif->dev;
2055	skb->dev = dev;
2056	vif->pkt_out++;
2057	vif->bytes_out += skb->len;
2058
2059	/* We are about to write */
2060	/* XXX: extension headers? */
2061	if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2062		goto out_free;
2063
2064	ipv6h = ipv6_hdr(skb);
2065	ipv6h->hop_limit--;
2066
2067	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2068
2069	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2070		       net, NULL, skb, skb->dev, dev,
2071		       ip6mr_forward2_finish);
2072
2073out_free:
2074	kfree_skb(skb);
2075	return 0;
2076}
2077
2078static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2079{
2080	int ct;
2081
2082	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2083		if (mrt->vif_table[ct].dev == dev)
2084			break;
2085	}
2086	return ct;
2087}
2088
2089static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2090			   struct net_device *dev, struct sk_buff *skb,
2091			   struct mfc6_cache *c)
2092{
2093	int psend = -1;
2094	int vif, ct;
2095	int true_vifi = ip6mr_find_vif(mrt, dev);
2096
2097	vif = c->_c.mfc_parent;
2098	c->_c.mfc_un.res.pkt++;
2099	c->_c.mfc_un.res.bytes += skb->len;
2100	c->_c.mfc_un.res.lastuse = jiffies;
2101
2102	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2103		struct mfc6_cache *cache_proxy;
2104
2105		/* For an (*,G) entry, we only check that the incoming
2106		 * interface is part of the static tree.
2107		 */
2108		rcu_read_lock();
2109		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2110		if (cache_proxy &&
2111		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2112			rcu_read_unlock();
2113			goto forward;
2114		}
2115		rcu_read_unlock();
2116	}
2117
2118	/*
2119	 * Wrong interface: drop packet and (maybe) send PIM assert.
2120	 */
2121	if (mrt->vif_table[vif].dev != dev) {
2122		c->_c.mfc_un.res.wrong_if++;
2123
2124		if (true_vifi >= 0 && mrt->mroute_do_assert &&
2125		    /* pimsm uses asserts, when switching from RPT to SPT,
2126		       so that we cannot check that packet arrived on an oif.
2127		       It is bad, but otherwise we would need to move pretty
2128		       large chunk of pimd to kernel. Ough... --ANK
2129		     */
2130		    (mrt->mroute_do_pim ||
2131		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2132		    time_after(jiffies,
2133			       c->_c.mfc_un.res.last_assert +
2134			       MFC_ASSERT_THRESH)) {
2135			c->_c.mfc_un.res.last_assert = jiffies;
2136			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2137		}
2138		goto dont_forward;
2139	}
2140
2141forward:
2142	mrt->vif_table[vif].pkt_in++;
2143	mrt->vif_table[vif].bytes_in += skb->len;
2144
2145	/*
2146	 *	Forward the frame
2147	 */
2148	if (ipv6_addr_any(&c->mf6c_origin) &&
2149	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
2150		if (true_vifi >= 0 &&
2151		    true_vifi != c->_c.mfc_parent &&
2152		    ipv6_hdr(skb)->hop_limit >
2153				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2154			/* It's an (*,*) entry and the packet is not coming from
2155			 * the upstream: forward the packet to the upstream
2156			 * only.
2157			 */
2158			psend = c->_c.mfc_parent;
2159			goto last_forward;
2160		}
2161		goto dont_forward;
2162	}
2163	for (ct = c->_c.mfc_un.res.maxvif - 1;
2164	     ct >= c->_c.mfc_un.res.minvif; ct--) {
2165		/* For (*,G) entry, don't forward to the incoming interface */
2166		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2167		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2168			if (psend != -1) {
2169				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2170				if (skb2)
2171					ip6mr_forward2(net, mrt, skb2, psend);
2172			}
2173			psend = ct;
2174		}
2175	}
2176last_forward:
2177	if (psend != -1) {
2178		ip6mr_forward2(net, mrt, skb, psend);
2179		return;
2180	}
2181
2182dont_forward:
2183	kfree_skb(skb);
2184}
2185
2186
2187/*
2188 *	Multicast packets for forwarding arrive here
2189 */
2190
2191int ip6_mr_input(struct sk_buff *skb)
2192{
2193	struct mfc6_cache *cache;
2194	struct net *net = dev_net(skb->dev);
2195	struct mr_table *mrt;
2196	struct flowi6 fl6 = {
2197		.flowi6_iif	= skb->dev->ifindex,
2198		.flowi6_mark	= skb->mark,
2199	};
2200	int err;
2201	struct net_device *dev;
2202
2203	/* skb->dev passed in is the master dev for vrfs.
2204	 * Get the proper interface that does have a vif associated with it.
2205	 */
2206	dev = skb->dev;
2207	if (netif_is_l3_master(skb->dev)) {
2208		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2209		if (!dev) {
2210			kfree_skb(skb);
2211			return -ENODEV;
2212		}
2213	}
2214
2215	err = ip6mr_fib_lookup(net, &fl6, &mrt);
2216	if (err < 0) {
2217		kfree_skb(skb);
2218		return err;
2219	}
2220
2221	read_lock(&mrt_lock);
2222	cache = ip6mr_cache_find(mrt,
2223				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2224	if (!cache) {
2225		int vif = ip6mr_find_vif(mrt, dev);
2226
2227		if (vif >= 0)
2228			cache = ip6mr_cache_find_any(mrt,
2229						     &ipv6_hdr(skb)->daddr,
2230						     vif);
2231	}
2232
2233	/*
2234	 *	No usable cache entry
2235	 */
2236	if (!cache) {
2237		int vif;
2238
2239		vif = ip6mr_find_vif(mrt, dev);
2240		if (vif >= 0) {
2241			int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2242			read_unlock(&mrt_lock);
2243
2244			return err;
2245		}
2246		read_unlock(&mrt_lock);
2247		kfree_skb(skb);
2248		return -ENODEV;
2249	}
2250
2251	ip6_mr_forward(net, mrt, dev, skb, cache);
2252
2253	read_unlock(&mrt_lock);
2254
2255	return 0;
2256}
2257
2258int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2259		    u32 portid)
2260{
2261	int err;
2262	struct mr_table *mrt;
2263	struct mfc6_cache *cache;
2264	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2265
2266	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2267	if (!mrt)
2268		return -ENOENT;
2269
2270	read_lock(&mrt_lock);
2271	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2272	if (!cache && skb->dev) {
2273		int vif = ip6mr_find_vif(mrt, skb->dev);
2274
2275		if (vif >= 0)
2276			cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2277						     vif);
2278	}
2279
2280	if (!cache) {
2281		struct sk_buff *skb2;
2282		struct ipv6hdr *iph;
2283		struct net_device *dev;
2284		int vif;
2285
2286		dev = skb->dev;
2287		if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2288			read_unlock(&mrt_lock);
2289			return -ENODEV;
2290		}
2291
2292		/* really correct? */
2293		skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2294		if (!skb2) {
2295			read_unlock(&mrt_lock);
2296			return -ENOMEM;
2297		}
2298
2299		NETLINK_CB(skb2).portid = portid;
2300		skb_reset_transport_header(skb2);
2301
2302		skb_put(skb2, sizeof(struct ipv6hdr));
2303		skb_reset_network_header(skb2);
2304
2305		iph = ipv6_hdr(skb2);
2306		iph->version = 0;
2307		iph->priority = 0;
2308		iph->flow_lbl[0] = 0;
2309		iph->flow_lbl[1] = 0;
2310		iph->flow_lbl[2] = 0;
2311		iph->payload_len = 0;
2312		iph->nexthdr = IPPROTO_NONE;
2313		iph->hop_limit = 0;
2314		iph->saddr = rt->rt6i_src.addr;
2315		iph->daddr = rt->rt6i_dst.addr;
2316
2317		err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2318		read_unlock(&mrt_lock);
2319
2320		return err;
2321	}
2322
2323	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2324	read_unlock(&mrt_lock);
2325	return err;
2326}
2327
2328static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2329			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2330			     int flags)
2331{
2332	struct nlmsghdr *nlh;
2333	struct rtmsg *rtm;
2334	int err;
2335
2336	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2337	if (!nlh)
2338		return -EMSGSIZE;
2339
2340	rtm = nlmsg_data(nlh);
2341	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2342	rtm->rtm_dst_len  = 128;
2343	rtm->rtm_src_len  = 128;
2344	rtm->rtm_tos      = 0;
2345	rtm->rtm_table    = mrt->id;
2346	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2347		goto nla_put_failure;
2348	rtm->rtm_type = RTN_MULTICAST;
2349	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2350	if (c->_c.mfc_flags & MFC_STATIC)
2351		rtm->rtm_protocol = RTPROT_STATIC;
2352	else
2353		rtm->rtm_protocol = RTPROT_MROUTED;
2354	rtm->rtm_flags    = 0;
2355
2356	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2357	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2358		goto nla_put_failure;
2359	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2360	/* do not break the dump if cache is unresolved */
2361	if (err < 0 && err != -ENOENT)
2362		goto nla_put_failure;
2363
2364	nlmsg_end(skb, nlh);
2365	return 0;
2366
2367nla_put_failure:
2368	nlmsg_cancel(skb, nlh);
2369	return -EMSGSIZE;
2370}
2371
2372static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2373			      u32 portid, u32 seq, struct mr_mfc *c,
2374			      int cmd, int flags)
2375{
2376	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2377				 cmd, flags);
2378}
2379
2380static int mr6_msgsize(bool unresolved, int maxvif)
2381{
2382	size_t len =
2383		NLMSG_ALIGN(sizeof(struct rtmsg))
2384		+ nla_total_size(4)	/* RTA_TABLE */
2385		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_SRC */
2386		+ nla_total_size(sizeof(struct in6_addr))	/* RTA_DST */
2387		;
2388
2389	if (!unresolved)
2390		len = len
2391		      + nla_total_size(4)	/* RTA_IIF */
2392		      + nla_total_size(0)	/* RTA_MULTIPATH */
2393		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2394						/* RTA_MFC_STATS */
2395		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2396		;
2397
2398	return len;
2399}
2400
2401static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2402			      int cmd)
2403{
2404	struct net *net = read_pnet(&mrt->net);
2405	struct sk_buff *skb;
2406	int err = -ENOBUFS;
2407
2408	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2409			GFP_ATOMIC);
2410	if (!skb)
2411		goto errout;
2412
2413	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2414	if (err < 0)
2415		goto errout;
2416
2417	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2418	return;
2419
2420errout:
2421	kfree_skb(skb);
2422	if (err < 0)
2423		rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2424}
2425
2426static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2427{
2428	size_t len =
2429		NLMSG_ALIGN(sizeof(struct rtgenmsg))
2430		+ nla_total_size(1)	/* IP6MRA_CREPORT_MSGTYPE */
2431		+ nla_total_size(4)	/* IP6MRA_CREPORT_MIF_ID */
2432					/* IP6MRA_CREPORT_SRC_ADDR */
2433		+ nla_total_size(sizeof(struct in6_addr))
2434					/* IP6MRA_CREPORT_DST_ADDR */
2435		+ nla_total_size(sizeof(struct in6_addr))
2436					/* IP6MRA_CREPORT_PKT */
2437		+ nla_total_size(payloadlen)
2438		;
2439
2440	return len;
2441}
2442
2443static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2444{
2445	struct net *net = read_pnet(&mrt->net);
2446	struct nlmsghdr *nlh;
2447	struct rtgenmsg *rtgenm;
2448	struct mrt6msg *msg;
2449	struct sk_buff *skb;
2450	struct nlattr *nla;
2451	int payloadlen;
2452
2453	payloadlen = pkt->len - sizeof(struct mrt6msg);
2454	msg = (struct mrt6msg *)skb_transport_header(pkt);
2455
2456	skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2457	if (!skb)
2458		goto errout;
2459
2460	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2461			sizeof(struct rtgenmsg), 0);
2462	if (!nlh)
2463		goto errout;
2464	rtgenm = nlmsg_data(nlh);
2465	rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2466	if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2467	    nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2468	    nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2469			     &msg->im6_src) ||
2470	    nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2471			     &msg->im6_dst))
2472		goto nla_put_failure;
2473
2474	nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2475	if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2476				  nla_data(nla), payloadlen))
2477		goto nla_put_failure;
2478
2479	nlmsg_end(skb, nlh);
2480
2481	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2482	return;
2483
2484nla_put_failure:
2485	nlmsg_cancel(skb, nlh);
2486errout:
2487	kfree_skb(skb);
2488	rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2489}
2490
2491static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2492{
2493	const struct nlmsghdr *nlh = cb->nlh;
2494	struct fib_dump_filter filter = {};
2495	int err;
2496
2497	if (cb->strict_check) {
2498		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2499					    &filter, cb);
2500		if (err < 0)
2501			return err;
2502	}
2503
2504	if (filter.table_id) {
2505		struct mr_table *mrt;
2506
2507		mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2508		if (!mrt) {
2509			if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2510				return skb->len;
2511
2512			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2513			return -ENOENT;
2514		}
2515		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2516				    &mfc_unres_lock, &filter);
2517		return skb->len ? : err;
2518	}
2519
2520	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2521				_ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2522}
2523