xref: /kernel/linux/linux-5.10/net/ipv6/icmp.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *	Internet Control Message Protocol (ICMPv6)
4 *	Linux INET6 implementation
5 *
6 *	Authors:
7 *	Pedro Roque		<roque@di.fc.ul.pt>
8 *
9 *	Based on net/ipv4/icmp.c
10 *
11 *	RFC 1885
12 */
13
14/*
15 *	Changes:
16 *
17 *	Andi Kleen		:	exception handling
18 *	Andi Kleen			add rate limits. never reply to a icmp.
19 *					add more length checks and other fixes.
20 *	yoshfuji		:	ensure to sent parameter problem for
21 *					fragments.
22 *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
23 *	Randy Dunlap and
24 *	YOSHIFUJI Hideaki @USAGI:	Per-interface statistics support
25 *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26 */
27
28#define pr_fmt(fmt) "IPv6: " fmt
29
30#include <linux/module.h>
31#include <linux/errno.h>
32#include <linux/types.h>
33#include <linux/socket.h>
34#include <linux/in.h>
35#include <linux/kernel.h>
36#include <linux/sockios.h>
37#include <linux/net.h>
38#include <linux/skbuff.h>
39#include <linux/init.h>
40#include <linux/netfilter.h>
41#include <linux/slab.h>
42
43#ifdef CONFIG_SYSCTL
44#include <linux/sysctl.h>
45#endif
46
47#include <linux/inet.h>
48#include <linux/netdevice.h>
49#include <linux/icmpv6.h>
50
51#include <net/ip.h>
52#include <net/sock.h>
53
54#include <net/ipv6.h>
55#include <net/ip6_checksum.h>
56#include <net/ping.h>
57#include <net/protocol.h>
58#include <net/raw.h>
59#include <net/rawv6.h>
60#include <net/transp_v6.h>
61#include <net/ip6_route.h>
62#include <net/addrconf.h>
63#include <net/icmp.h>
64#include <net/xfrm.h>
65#include <net/inet_common.h>
66#include <net/dsfield.h>
67#include <net/l3mdev.h>
68
69#include <linux/uaccess.h>
70
71/*
72 *	The ICMP socket(s). This is the most convenient way to flow control
73 *	our ICMP output as well as maintain a clean interface throughout
74 *	all layers. All Socketless IP sends will soon be gone.
75 *
76 *	On SMP we have one ICMP socket per-cpu.
77 */
78static struct sock *icmpv6_sk(struct net *net)
79{
80	return this_cpu_read(*net->ipv6.icmp_sk);
81}
82
83static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
84		       u8 type, u8 code, int offset, __be32 info)
85{
86	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
87	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
88	struct net *net = dev_net(skb->dev);
89
90	if (type == ICMPV6_PKT_TOOBIG)
91		ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
92	else if (type == NDISC_REDIRECT)
93		ip6_redirect(skb, net, skb->dev->ifindex, 0,
94			     sock_net_uid(net, NULL));
95
96	if (!(type & ICMPV6_INFOMSG_MASK))
97		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
98			ping_err(skb, offset, ntohl(info));
99
100	return 0;
101}
102
103static int icmpv6_rcv(struct sk_buff *skb);
104
105static const struct inet6_protocol icmpv6_protocol = {
106	.handler	=	icmpv6_rcv,
107	.err_handler	=	icmpv6_err,
108	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
109};
110
111/* Called with BH disabled */
112static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
113{
114	struct sock *sk;
115
116	sk = icmpv6_sk(net);
117	if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
118		/* This can happen if the output path (f.e. SIT or
119		 * ip6ip6 tunnel) signals dst_link_failure() for an
120		 * outgoing ICMP6 packet.
121		 */
122		return NULL;
123	}
124	return sk;
125}
126
127static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
128{
129	spin_unlock(&sk->sk_lock.slock);
130}
131
132/*
133 * Figure out, may we reply to this packet with icmp error.
134 *
135 * We do not reply, if:
136 *	- it was icmp error message.
137 *	- it is truncated, so that it is known, that protocol is ICMPV6
138 *	  (i.e. in the middle of some exthdr)
139 *
140 *	--ANK (980726)
141 */
142
143static bool is_ineligible(const struct sk_buff *skb)
144{
145	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
146	int len = skb->len - ptr;
147	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
148	__be16 frag_off;
149
150	if (len < 0)
151		return true;
152
153	ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
154	if (ptr < 0)
155		return false;
156	if (nexthdr == IPPROTO_ICMPV6) {
157		u8 _type, *tp;
158		tp = skb_header_pointer(skb,
159			ptr+offsetof(struct icmp6hdr, icmp6_type),
160			sizeof(_type), &_type);
161
162		/* Based on RFC 8200, Section 4.5 Fragment Header, return
163		 * false if this is a fragment packet with no icmp header info.
164		 */
165		if (!tp && frag_off != 0)
166			return false;
167		else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
168			return true;
169	}
170	return false;
171}
172
173static bool icmpv6_mask_allow(struct net *net, int type)
174{
175	if (type > ICMPV6_MSG_MAX)
176		return true;
177
178	/* Limit if icmp type is set in ratemask. */
179	if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
180		return true;
181
182	return false;
183}
184
185static bool icmpv6_global_allow(struct net *net, int type)
186{
187	if (icmpv6_mask_allow(net, type))
188		return true;
189
190	if (icmp_global_allow())
191		return true;
192
193	return false;
194}
195
196/*
197 * Check the ICMP output rate limit
198 */
199static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
200			       struct flowi6 *fl6)
201{
202	struct net *net = sock_net(sk);
203	struct dst_entry *dst;
204	bool res = false;
205
206	if (icmpv6_mask_allow(net, type))
207		return true;
208
209	/*
210	 * Look up the output route.
211	 * XXX: perhaps the expire for routing entries cloned by
212	 * this lookup should be more aggressive (not longer than timeout).
213	 */
214	dst = ip6_route_output(net, sk, fl6);
215	if (dst->error) {
216		IP6_INC_STATS(net, ip6_dst_idev(dst),
217			      IPSTATS_MIB_OUTNOROUTES);
218	} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
219		res = true;
220	} else {
221		struct rt6_info *rt = (struct rt6_info *)dst;
222		int tmo = net->ipv6.sysctl.icmpv6_time;
223		struct inet_peer *peer;
224
225		/* Give more bandwidth to wider prefixes. */
226		if (rt->rt6i_dst.plen < 128)
227			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
228
229		peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
230		res = inet_peer_xrlim_allow(peer, tmo);
231		if (peer)
232			inet_putpeer(peer);
233	}
234	dst_release(dst);
235	return res;
236}
237
238static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
239				  struct flowi6 *fl6)
240{
241	struct net *net = sock_net(sk);
242	struct dst_entry *dst;
243	bool res = false;
244
245	dst = ip6_route_output(net, sk, fl6);
246	if (!dst->error) {
247		struct rt6_info *rt = (struct rt6_info *)dst;
248		struct in6_addr prefsrc;
249
250		rt6_get_prefsrc(rt, &prefsrc);
251		res = !ipv6_addr_any(&prefsrc);
252	}
253	dst_release(dst);
254	return res;
255}
256
257/*
258 *	an inline helper for the "simple" if statement below
259 *	checks if parameter problem report is caused by an
260 *	unrecognized IPv6 option that has the Option Type
261 *	highest-order two bits set to 10
262 */
263
264static bool opt_unrec(struct sk_buff *skb, __u32 offset)
265{
266	u8 _optval, *op;
267
268	offset += skb_network_offset(skb);
269	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
270	if (!op)
271		return true;
272	return (*op & 0xC0) == 0x80;
273}
274
275void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
276				struct icmp6hdr *thdr, int len)
277{
278	struct sk_buff *skb;
279	struct icmp6hdr *icmp6h;
280
281	skb = skb_peek(&sk->sk_write_queue);
282	if (!skb)
283		return;
284
285	icmp6h = icmp6_hdr(skb);
286	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
287	icmp6h->icmp6_cksum = 0;
288
289	if (skb_queue_len(&sk->sk_write_queue) == 1) {
290		skb->csum = csum_partial(icmp6h,
291					sizeof(struct icmp6hdr), skb->csum);
292		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
293						      &fl6->daddr,
294						      len, fl6->flowi6_proto,
295						      skb->csum);
296	} else {
297		__wsum tmp_csum = 0;
298
299		skb_queue_walk(&sk->sk_write_queue, skb) {
300			tmp_csum = csum_add(tmp_csum, skb->csum);
301		}
302
303		tmp_csum = csum_partial(icmp6h,
304					sizeof(struct icmp6hdr), tmp_csum);
305		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
306						      &fl6->daddr,
307						      len, fl6->flowi6_proto,
308						      tmp_csum);
309	}
310	ip6_push_pending_frames(sk);
311}
312
313struct icmpv6_msg {
314	struct sk_buff	*skb;
315	int		offset;
316	uint8_t		type;
317};
318
319static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
320{
321	struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
322	struct sk_buff *org_skb = msg->skb;
323	__wsum csum;
324
325	csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
326				      to, len);
327	skb->csum = csum_block_add(skb->csum, csum, odd);
328	if (!(msg->type & ICMPV6_INFOMSG_MASK))
329		nf_ct_attach(skb, org_skb);
330	return 0;
331}
332
333#if IS_ENABLED(CONFIG_IPV6_MIP6)
334static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
335{
336	struct ipv6hdr *iph = ipv6_hdr(skb);
337	struct ipv6_destopt_hao *hao;
338	struct in6_addr tmp;
339	int off;
340
341	if (opt->dsthao) {
342		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
343		if (likely(off >= 0)) {
344			hao = (struct ipv6_destopt_hao *)
345					(skb_network_header(skb) + off);
346			tmp = iph->saddr;
347			iph->saddr = hao->addr;
348			hao->addr = tmp;
349		}
350	}
351}
352#else
353static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
354#endif
355
356static struct dst_entry *icmpv6_route_lookup(struct net *net,
357					     struct sk_buff *skb,
358					     struct sock *sk,
359					     struct flowi6 *fl6)
360{
361	struct dst_entry *dst, *dst2;
362	struct flowi6 fl2;
363	int err;
364
365	err = ip6_dst_lookup(net, sk, &dst, fl6);
366	if (err)
367		return ERR_PTR(err);
368
369	/*
370	 * We won't send icmp if the destination is known
371	 * anycast.
372	 */
373	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
374		net_dbg_ratelimited("icmp6_send: acast source\n");
375		dst_release(dst);
376		return ERR_PTR(-EINVAL);
377	}
378
379	/* No need to clone since we're just using its address. */
380	dst2 = dst;
381
382	dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
383	if (!IS_ERR(dst)) {
384		if (dst != dst2)
385			return dst;
386	} else {
387		if (PTR_ERR(dst) == -EPERM)
388			dst = NULL;
389		else
390			return dst;
391	}
392
393	err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
394	if (err)
395		goto relookup_failed;
396
397	err = ip6_dst_lookup(net, sk, &dst2, &fl2);
398	if (err)
399		goto relookup_failed;
400
401	dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
402	if (!IS_ERR(dst2)) {
403		dst_release(dst);
404		dst = dst2;
405	} else {
406		err = PTR_ERR(dst2);
407		if (err == -EPERM) {
408			dst_release(dst);
409			return dst2;
410		} else
411			goto relookup_failed;
412	}
413
414relookup_failed:
415	if (dst)
416		return dst;
417	return ERR_PTR(err);
418}
419
420static struct net_device *icmp6_dev(const struct sk_buff *skb)
421{
422	struct net_device *dev = skb->dev;
423
424	/* for local traffic to local address, skb dev is the loopback
425	 * device. Check if there is a dst attached to the skb and if so
426	 * get the real device index. Same is needed for replies to a link
427	 * local address on a device enslaved to an L3 master device
428	 */
429	if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
430		const struct rt6_info *rt6 = skb_rt6_info(skb);
431
432		/* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
433		 * and ip6_null_entry could be set to skb if no route is found.
434		 */
435		if (rt6 && rt6->rt6i_idev)
436			dev = rt6->rt6i_idev->dev;
437	}
438
439	return dev;
440}
441
442static int icmp6_iif(const struct sk_buff *skb)
443{
444	return icmp6_dev(skb)->ifindex;
445}
446
447/*
448 *	Send an ICMP message in response to a packet in error
449 */
450void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
451		const struct in6_addr *force_saddr,
452		const struct inet6_skb_parm *parm)
453{
454	struct inet6_dev *idev = NULL;
455	struct ipv6hdr *hdr = ipv6_hdr(skb);
456	struct sock *sk;
457	struct net *net;
458	struct ipv6_pinfo *np;
459	const struct in6_addr *saddr = NULL;
460	struct dst_entry *dst;
461	struct icmp6hdr tmp_hdr;
462	struct flowi6 fl6;
463	struct icmpv6_msg msg;
464	struct ipcm6_cookie ipc6;
465	int iif = 0;
466	int addr_type = 0;
467	int len;
468	u32 mark;
469
470	if ((u8 *)hdr < skb->head ||
471	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
472		return;
473
474	if (!skb->dev)
475		return;
476	net = dev_net(skb->dev);
477	mark = IP6_REPLY_MARK(net, skb->mark);
478	/*
479	 *	Make sure we respect the rules
480	 *	i.e. RFC 1885 2.4(e)
481	 *	Rule (e.1) is enforced by not using icmp6_send
482	 *	in any code that processes icmp errors.
483	 */
484	addr_type = ipv6_addr_type(&hdr->daddr);
485
486	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
487	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
488		saddr = &hdr->daddr;
489
490	/*
491	 *	Dest addr check
492	 */
493
494	if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
495		if (type != ICMPV6_PKT_TOOBIG &&
496		    !(type == ICMPV6_PARAMPROB &&
497		      code == ICMPV6_UNK_OPTION &&
498		      (opt_unrec(skb, info))))
499			return;
500
501		saddr = NULL;
502	}
503
504	addr_type = ipv6_addr_type(&hdr->saddr);
505
506	/*
507	 *	Source addr check
508	 */
509
510	if (__ipv6_addr_needs_scope_id(addr_type)) {
511		iif = icmp6_iif(skb);
512	} else {
513		/*
514		 * The source device is used for looking up which routing table
515		 * to use for sending an ICMP error.
516		 */
517		iif = l3mdev_master_ifindex(skb->dev);
518	}
519
520	/*
521	 *	Must not send error if the source does not uniquely
522	 *	identify a single node (RFC2463 Section 2.4).
523	 *	We check unspecified / multicast addresses here,
524	 *	and anycast addresses will be checked later.
525	 */
526	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
527		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
528				    &hdr->saddr, &hdr->daddr);
529		return;
530	}
531
532	/*
533	 *	Never answer to a ICMP packet.
534	 */
535	if (is_ineligible(skb)) {
536		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
537				    &hdr->saddr, &hdr->daddr);
538		return;
539	}
540
541	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
542	local_bh_disable();
543
544	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
545	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
546		goto out_bh_enable;
547
548	mip6_addr_swap(skb, parm);
549
550	sk = icmpv6_xmit_lock(net);
551	if (!sk)
552		goto out_bh_enable;
553
554	memset(&fl6, 0, sizeof(fl6));
555	fl6.flowi6_proto = IPPROTO_ICMPV6;
556	fl6.daddr = hdr->saddr;
557	if (force_saddr)
558		saddr = force_saddr;
559	if (saddr) {
560		fl6.saddr = *saddr;
561	} else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
562		/* select a more meaningful saddr from input if */
563		struct net_device *in_netdev;
564
565		in_netdev = dev_get_by_index(net, parm->iif);
566		if (in_netdev) {
567			ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
568					   inet6_sk(sk)->srcprefs,
569					   &fl6.saddr);
570			dev_put(in_netdev);
571		}
572	}
573	fl6.flowi6_mark = mark;
574	fl6.flowi6_oif = iif;
575	fl6.fl6_icmp_type = type;
576	fl6.fl6_icmp_code = code;
577	fl6.flowi6_uid = sock_net_uid(net, NULL);
578	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
579	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
580
581	np = inet6_sk(sk);
582
583	if (!icmpv6_xrlim_allow(sk, type, &fl6))
584		goto out;
585
586	tmp_hdr.icmp6_type = type;
587	tmp_hdr.icmp6_code = code;
588	tmp_hdr.icmp6_cksum = 0;
589	tmp_hdr.icmp6_pointer = htonl(info);
590
591	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
592		fl6.flowi6_oif = np->mcast_oif;
593	else if (!fl6.flowi6_oif)
594		fl6.flowi6_oif = np->ucast_oif;
595
596	ipcm6_init_sk(&ipc6, np);
597	ipc6.sockc.mark = mark;
598	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
599
600	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
601	if (IS_ERR(dst))
602		goto out;
603
604	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
605
606	msg.skb = skb;
607	msg.offset = skb_network_offset(skb);
608	msg.type = type;
609
610	len = skb->len - msg.offset;
611	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
612	if (len < 0) {
613		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
614				    &hdr->saddr, &hdr->daddr);
615		goto out_dst_release;
616	}
617
618	rcu_read_lock();
619	idev = __in6_dev_get(skb->dev);
620
621	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
622			    len + sizeof(struct icmp6hdr),
623			    sizeof(struct icmp6hdr),
624			    &ipc6, &fl6, (struct rt6_info *)dst,
625			    MSG_DONTWAIT)) {
626		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
627		ip6_flush_pending_frames(sk);
628	} else {
629		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
630					   len + sizeof(struct icmp6hdr));
631	}
632	rcu_read_unlock();
633out_dst_release:
634	dst_release(dst);
635out:
636	icmpv6_xmit_unlock(sk);
637out_bh_enable:
638	local_bh_enable();
639}
640EXPORT_SYMBOL(icmp6_send);
641
642/* Slightly more convenient version of icmp6_send.
643 */
644void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
645{
646	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
647	kfree_skb(skb);
648}
649
650/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
651 * if sufficient data bytes are available
652 * @nhs is the size of the tunnel header(s) :
653 *  Either an IPv4 header for SIT encap
654 *         an IPv4 header + GRE header for GRE encap
655 */
656int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
657			       unsigned int data_len)
658{
659	struct in6_addr temp_saddr;
660	struct rt6_info *rt;
661	struct sk_buff *skb2;
662	u32 info = 0;
663
664	if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
665		return 1;
666
667	/* RFC 4884 (partial) support for ICMP extensions */
668	if (data_len < 128 || (data_len & 7) || skb->len < data_len)
669		data_len = 0;
670
671	skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
672
673	if (!skb2)
674		return 1;
675
676	skb_dst_drop(skb2);
677	skb_pull(skb2, nhs);
678	skb_reset_network_header(skb2);
679
680	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
681			skb, 0);
682
683	if (rt && rt->dst.dev)
684		skb2->dev = rt->dst.dev;
685
686	ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
687
688	if (data_len) {
689		/* RFC 4884 (partial) support :
690		 * insert 0 padding at the end, before the extensions
691		 */
692		__skb_push(skb2, nhs);
693		skb_reset_network_header(skb2);
694		memmove(skb2->data, skb2->data + nhs, data_len - nhs);
695		memset(skb2->data + data_len - nhs, 0, nhs);
696		/* RFC 4884 4.5 : Length is measured in 64-bit words,
697		 * and stored in reserved[0]
698		 */
699		info = (data_len/8) << 24;
700	}
701	if (type == ICMP_TIME_EXCEEDED)
702		icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
703			   info, &temp_saddr, IP6CB(skb2));
704	else
705		icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
706			   info, &temp_saddr, IP6CB(skb2));
707	if (rt)
708		ip6_rt_put(rt);
709
710	kfree_skb(skb2);
711
712	return 0;
713}
714EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
715
716static void icmpv6_echo_reply(struct sk_buff *skb)
717{
718	struct net *net = dev_net(skb->dev);
719	struct sock *sk;
720	struct inet6_dev *idev;
721	struct ipv6_pinfo *np;
722	const struct in6_addr *saddr = NULL;
723	struct icmp6hdr *icmph = icmp6_hdr(skb);
724	struct icmp6hdr tmp_hdr;
725	struct flowi6 fl6;
726	struct icmpv6_msg msg;
727	struct dst_entry *dst;
728	struct ipcm6_cookie ipc6;
729	u32 mark = IP6_REPLY_MARK(net, skb->mark);
730	bool acast;
731
732	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
733	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
734		return;
735
736	saddr = &ipv6_hdr(skb)->daddr;
737
738	acast = ipv6_anycast_destination(skb_dst(skb), saddr);
739	if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
740		return;
741
742	if (!ipv6_unicast_destination(skb) &&
743	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
744		saddr = NULL;
745
746	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
747	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
748
749	memset(&fl6, 0, sizeof(fl6));
750	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
751		fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
752
753	fl6.flowi6_proto = IPPROTO_ICMPV6;
754	fl6.daddr = ipv6_hdr(skb)->saddr;
755	if (saddr)
756		fl6.saddr = *saddr;
757	fl6.flowi6_oif = icmp6_iif(skb);
758	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
759	fl6.flowi6_mark = mark;
760	fl6.flowi6_uid = sock_net_uid(net, NULL);
761	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
762
763	local_bh_disable();
764	sk = icmpv6_xmit_lock(net);
765	if (!sk)
766		goto out_bh_enable;
767	np = inet6_sk(sk);
768
769	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
770		fl6.flowi6_oif = np->mcast_oif;
771	else if (!fl6.flowi6_oif)
772		fl6.flowi6_oif = np->ucast_oif;
773
774	if (ip6_dst_lookup(net, sk, &dst, &fl6))
775		goto out;
776	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
777	if (IS_ERR(dst))
778		goto out;
779
780	/* Check the ratelimit */
781	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
782	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
783		goto out_dst_release;
784
785	idev = __in6_dev_get(skb->dev);
786
787	msg.skb = skb;
788	msg.offset = 0;
789	msg.type = ICMPV6_ECHO_REPLY;
790
791	ipcm6_init_sk(&ipc6, np);
792	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
793	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
794	ipc6.sockc.mark = mark;
795
796	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
797			    skb->len + sizeof(struct icmp6hdr),
798			    sizeof(struct icmp6hdr), &ipc6, &fl6,
799			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
800		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
801		ip6_flush_pending_frames(sk);
802	} else {
803		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
804					   skb->len + sizeof(struct icmp6hdr));
805	}
806out_dst_release:
807	dst_release(dst);
808out:
809	icmpv6_xmit_unlock(sk);
810out_bh_enable:
811	local_bh_enable();
812}
813
814void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
815{
816	const struct inet6_protocol *ipprot;
817	int inner_offset;
818	__be16 frag_off;
819	u8 nexthdr;
820	struct net *net = dev_net(skb->dev);
821
822	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
823		goto out;
824
825	nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
826	if (ipv6_ext_hdr(nexthdr)) {
827		/* now skip over extension headers */
828		inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
829						&nexthdr, &frag_off);
830		if (inner_offset < 0)
831			goto out;
832	} else {
833		inner_offset = sizeof(struct ipv6hdr);
834	}
835
836	/* Checkin header including 8 bytes of inner protocol header. */
837	if (!pskb_may_pull(skb, inner_offset+8))
838		goto out;
839
840	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
841	   Without this we will not able f.e. to make source routed
842	   pmtu discovery.
843	   Corresponding argument (opt) to notifiers is already added.
844	   --ANK (980726)
845	 */
846
847	ipprot = rcu_dereference(inet6_protos[nexthdr]);
848	if (ipprot && ipprot->err_handler)
849		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
850
851	raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
852	return;
853
854out:
855	__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
856}
857
858/*
859 *	Handle icmp messages
860 */
861
862static int icmpv6_rcv(struct sk_buff *skb)
863{
864	struct net *net = dev_net(skb->dev);
865	struct net_device *dev = icmp6_dev(skb);
866	struct inet6_dev *idev = __in6_dev_get(dev);
867	const struct in6_addr *saddr, *daddr;
868	struct icmp6hdr *hdr;
869	u8 type;
870	bool success = false;
871
872	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
873		struct sec_path *sp = skb_sec_path(skb);
874		int nh;
875
876		if (!(sp && sp->xvec[sp->len - 1]->props.flags &
877				 XFRM_STATE_ICMP))
878			goto drop_no_count;
879
880		if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
881			goto drop_no_count;
882
883		nh = skb_network_offset(skb);
884		skb_set_network_header(skb, sizeof(*hdr));
885
886		if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
887			goto drop_no_count;
888
889		skb_set_network_header(skb, nh);
890	}
891
892	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
893
894	saddr = &ipv6_hdr(skb)->saddr;
895	daddr = &ipv6_hdr(skb)->daddr;
896
897	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
898		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
899				    saddr, daddr);
900		goto csum_error;
901	}
902
903	if (!pskb_pull(skb, sizeof(*hdr)))
904		goto discard_it;
905
906	hdr = icmp6_hdr(skb);
907
908	type = hdr->icmp6_type;
909
910	ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
911
912	switch (type) {
913	case ICMPV6_ECHO_REQUEST:
914		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
915			icmpv6_echo_reply(skb);
916		break;
917
918	case ICMPV6_ECHO_REPLY:
919		success = ping_rcv(skb);
920		break;
921
922	case ICMPV6_PKT_TOOBIG:
923		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
924		   standard destination cache. Seems, only "advanced"
925		   destination cache will allow to solve this problem
926		   --ANK (980726)
927		 */
928		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
929			goto discard_it;
930		hdr = icmp6_hdr(skb);
931
932		/* to notify */
933		fallthrough;
934	case ICMPV6_DEST_UNREACH:
935	case ICMPV6_TIME_EXCEED:
936	case ICMPV6_PARAMPROB:
937		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
938		break;
939
940	case NDISC_ROUTER_SOLICITATION:
941	case NDISC_ROUTER_ADVERTISEMENT:
942	case NDISC_NEIGHBOUR_SOLICITATION:
943	case NDISC_NEIGHBOUR_ADVERTISEMENT:
944	case NDISC_REDIRECT:
945		ndisc_rcv(skb);
946		break;
947
948	case ICMPV6_MGM_QUERY:
949		igmp6_event_query(skb);
950		break;
951
952	case ICMPV6_MGM_REPORT:
953		igmp6_event_report(skb);
954		break;
955
956	case ICMPV6_MGM_REDUCTION:
957	case ICMPV6_NI_QUERY:
958	case ICMPV6_NI_REPLY:
959	case ICMPV6_MLD2_REPORT:
960	case ICMPV6_DHAAD_REQUEST:
961	case ICMPV6_DHAAD_REPLY:
962	case ICMPV6_MOBILE_PREFIX_SOL:
963	case ICMPV6_MOBILE_PREFIX_ADV:
964		break;
965
966	default:
967		/* informational */
968		if (type & ICMPV6_INFOMSG_MASK)
969			break;
970
971		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
972				    saddr, daddr);
973
974		/*
975		 * error of unknown type.
976		 * must pass to upper level
977		 */
978
979		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
980	}
981
982	/* until the v6 path can be better sorted assume failure and
983	 * preserve the status quo behaviour for the rest of the paths to here
984	 */
985	if (success)
986		consume_skb(skb);
987	else
988		kfree_skb(skb);
989
990	return 0;
991
992csum_error:
993	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
994discard_it:
995	__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
996drop_no_count:
997	kfree_skb(skb);
998	return 0;
999}
1000
1001void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
1002		      u8 type,
1003		      const struct in6_addr *saddr,
1004		      const struct in6_addr *daddr,
1005		      int oif)
1006{
1007	memset(fl6, 0, sizeof(*fl6));
1008	fl6->saddr = *saddr;
1009	fl6->daddr = *daddr;
1010	fl6->flowi6_proto	= IPPROTO_ICMPV6;
1011	fl6->fl6_icmp_type	= type;
1012	fl6->fl6_icmp_code	= 0;
1013	fl6->flowi6_oif		= oif;
1014	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1015}
1016
1017static void __net_exit icmpv6_sk_exit(struct net *net)
1018{
1019	int i;
1020
1021	for_each_possible_cpu(i)
1022		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
1023	free_percpu(net->ipv6.icmp_sk);
1024}
1025
1026static int __net_init icmpv6_sk_init(struct net *net)
1027{
1028	struct sock *sk;
1029	int err, i;
1030
1031	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
1032	if (!net->ipv6.icmp_sk)
1033		return -ENOMEM;
1034
1035	for_each_possible_cpu(i) {
1036		err = inet_ctl_sock_create(&sk, PF_INET6,
1037					   SOCK_RAW, IPPROTO_ICMPV6, net);
1038		if (err < 0) {
1039			pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1040			       err);
1041			goto fail;
1042		}
1043
1044		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
1045
1046		/* Enough space for 2 64K ICMP packets, including
1047		 * sk_buff struct overhead.
1048		 */
1049		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1050	}
1051	return 0;
1052
1053 fail:
1054	icmpv6_sk_exit(net);
1055	return err;
1056}
1057
1058static struct pernet_operations icmpv6_sk_ops = {
1059	.init = icmpv6_sk_init,
1060	.exit = icmpv6_sk_exit,
1061};
1062
1063int __init icmpv6_init(void)
1064{
1065	int err;
1066
1067	err = register_pernet_subsys(&icmpv6_sk_ops);
1068	if (err < 0)
1069		return err;
1070
1071	err = -EAGAIN;
1072	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1073		goto fail;
1074
1075	err = inet6_register_icmp_sender(icmp6_send);
1076	if (err)
1077		goto sender_reg_err;
1078	return 0;
1079
1080sender_reg_err:
1081	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1082fail:
1083	pr_err("Failed to register ICMP6 protocol\n");
1084	unregister_pernet_subsys(&icmpv6_sk_ops);
1085	return err;
1086}
1087
1088void icmpv6_cleanup(void)
1089{
1090	inet6_unregister_icmp_sender(icmp6_send);
1091	unregister_pernet_subsys(&icmpv6_sk_ops);
1092	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1093}
1094
1095
1096static const struct icmp6_err {
1097	int err;
1098	int fatal;
1099} tab_unreach[] = {
1100	{	/* NOROUTE */
1101		.err	= ENETUNREACH,
1102		.fatal	= 0,
1103	},
1104	{	/* ADM_PROHIBITED */
1105		.err	= EACCES,
1106		.fatal	= 1,
1107	},
1108	{	/* Was NOT_NEIGHBOUR, now reserved */
1109		.err	= EHOSTUNREACH,
1110		.fatal	= 0,
1111	},
1112	{	/* ADDR_UNREACH	*/
1113		.err	= EHOSTUNREACH,
1114		.fatal	= 0,
1115	},
1116	{	/* PORT_UNREACH	*/
1117		.err	= ECONNREFUSED,
1118		.fatal	= 1,
1119	},
1120	{	/* POLICY_FAIL */
1121		.err	= EACCES,
1122		.fatal	= 1,
1123	},
1124	{	/* REJECT_ROUTE	*/
1125		.err	= EACCES,
1126		.fatal	= 1,
1127	},
1128};
1129
1130int icmpv6_err_convert(u8 type, u8 code, int *err)
1131{
1132	int fatal = 0;
1133
1134	*err = EPROTO;
1135
1136	switch (type) {
1137	case ICMPV6_DEST_UNREACH:
1138		fatal = 1;
1139		if (code < ARRAY_SIZE(tab_unreach)) {
1140			*err  = tab_unreach[code].err;
1141			fatal = tab_unreach[code].fatal;
1142		}
1143		break;
1144
1145	case ICMPV6_PKT_TOOBIG:
1146		*err = EMSGSIZE;
1147		break;
1148
1149	case ICMPV6_PARAMPROB:
1150		*err = EPROTO;
1151		fatal = 1;
1152		break;
1153
1154	case ICMPV6_TIME_EXCEED:
1155		*err = EHOSTUNREACH;
1156		break;
1157	}
1158
1159	return fatal;
1160}
1161EXPORT_SYMBOL(icmpv6_err_convert);
1162
1163#ifdef CONFIG_SYSCTL
1164static struct ctl_table ipv6_icmp_table_template[] = {
1165	{
1166		.procname	= "ratelimit",
1167		.data		= &init_net.ipv6.sysctl.icmpv6_time,
1168		.maxlen		= sizeof(int),
1169		.mode		= 0644,
1170		.proc_handler	= proc_dointvec_ms_jiffies,
1171	},
1172	{
1173		.procname	= "echo_ignore_all",
1174		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1175		.maxlen		= sizeof(int),
1176		.mode		= 0644,
1177		.proc_handler = proc_dointvec,
1178	},
1179	{
1180		.procname	= "echo_ignore_multicast",
1181		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1182		.maxlen		= sizeof(int),
1183		.mode		= 0644,
1184		.proc_handler = proc_dointvec,
1185	},
1186	{
1187		.procname	= "echo_ignore_anycast",
1188		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1189		.maxlen		= sizeof(int),
1190		.mode		= 0644,
1191		.proc_handler = proc_dointvec,
1192	},
1193	{
1194		.procname	= "ratemask",
1195		.data		= &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1196		.maxlen		= ICMPV6_MSG_MAX + 1,
1197		.mode		= 0644,
1198		.proc_handler = proc_do_large_bitmap,
1199	},
1200	{ },
1201};
1202
1203struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1204{
1205	struct ctl_table *table;
1206
1207	table = kmemdup(ipv6_icmp_table_template,
1208			sizeof(ipv6_icmp_table_template),
1209			GFP_KERNEL);
1210
1211	if (table) {
1212		table[0].data = &net->ipv6.sysctl.icmpv6_time;
1213		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1214		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1215		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1216		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1217	}
1218	return table;
1219}
1220#endif
1221