xref: /kernel/linux/linux-6.6/net/ipv6/udp.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *	UDP over IPv6
4 *	Linux INET6 implementation
5 *
6 *	Authors:
7 *	Pedro Roque		<roque@di.fc.ul.pt>
8 *
9 *	Based on linux/ipv4/udp.c
10 *
11 *	Fixes:
12 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
13 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
14 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
15 *					a single port at the same time.
16 *      Kazunori MIYAZAWA @USAGI:       change process style to use ip6_append_data
17 *      YOSHIFUJI Hideaki @USAGI:	convert /proc/net/udp6 to seq_file.
18 */
19
20#include <linux/bpf-cgroup.h>
21#include <linux/errno.h>
22#include <linux/types.h>
23#include <linux/socket.h>
24#include <linux/sockios.h>
25#include <linux/net.h>
26#include <linux/in6.h>
27#include <linux/netdevice.h>
28#include <linux/if_arp.h>
29#include <linux/ipv6.h>
30#include <linux/icmpv6.h>
31#include <linux/init.h>
32#include <linux/module.h>
33#include <linux/skbuff.h>
34#include <linux/slab.h>
35#include <linux/uaccess.h>
36#include <linux/indirect_call_wrapper.h>
37
38#include <net/addrconf.h>
39#include <net/ndisc.h>
40#include <net/protocol.h>
41#include <net/transp_v6.h>
42#include <net/ip6_route.h>
43#include <net/raw.h>
44#include <net/seg6.h>
45#include <net/tcp_states.h>
46#include <net/ip6_checksum.h>
47#include <net/ip6_tunnel.h>
48#include <trace/events/udp.h>
49#include <net/xfrm.h>
50#include <net/inet_hashtables.h>
51#include <net/inet6_hashtables.h>
52#include <net/busy_poll.h>
53#include <net/sock_reuseport.h>
54#include <net/gro.h>
55
56#include <linux/proc_fs.h>
57#include <linux/seq_file.h>
58#include <trace/events/skb.h>
59#include "udp_impl.h"
60
61static void udpv6_destruct_sock(struct sock *sk)
62{
63	udp_destruct_common(sk);
64	inet6_sock_destruct(sk);
65}
66
67int udpv6_init_sock(struct sock *sk)
68{
69	udp_lib_init_sock(sk);
70	sk->sk_destruct = udpv6_destruct_sock;
71	set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
72	return 0;
73}
74
75INDIRECT_CALLABLE_SCOPE
76u32 udp6_ehashfn(const struct net *net,
77		 const struct in6_addr *laddr,
78		 const u16 lport,
79		 const struct in6_addr *faddr,
80		 const __be16 fport)
81{
82	static u32 udp6_ehash_secret __read_mostly;
83	static u32 udp_ipv6_hash_secret __read_mostly;
84
85	u32 lhash, fhash;
86
87	net_get_random_once(&udp6_ehash_secret,
88			    sizeof(udp6_ehash_secret));
89	net_get_random_once(&udp_ipv6_hash_secret,
90			    sizeof(udp_ipv6_hash_secret));
91
92	lhash = (__force u32)laddr->s6_addr32[3];
93	fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
94
95	return __inet6_ehashfn(lhash, lport, fhash, fport,
96			       udp6_ehash_secret + net_hash_mix(net));
97}
98
99int udp_v6_get_port(struct sock *sk, unsigned short snum)
100{
101	unsigned int hash2_nulladdr =
102		ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
103	unsigned int hash2_partial =
104		ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
105
106	/* precompute partial secondary hash */
107	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
108	return udp_lib_get_port(sk, snum, hash2_nulladdr);
109}
110
111void udp_v6_rehash(struct sock *sk)
112{
113	u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
114					  &sk->sk_v6_rcv_saddr,
115					  inet_sk(sk)->inet_num);
116
117	udp_lib_rehash(sk, new_hash);
118}
119
120static int compute_score(struct sock *sk, struct net *net,
121			 const struct in6_addr *saddr, __be16 sport,
122			 const struct in6_addr *daddr, unsigned short hnum,
123			 int dif, int sdif)
124{
125	int bound_dev_if, score;
126	struct inet_sock *inet;
127	bool dev_match;
128
129	if (!net_eq(sock_net(sk), net) ||
130	    udp_sk(sk)->udp_port_hash != hnum ||
131	    sk->sk_family != PF_INET6)
132		return -1;
133
134	if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
135		return -1;
136
137	score = 0;
138	inet = inet_sk(sk);
139
140	if (inet->inet_dport) {
141		if (inet->inet_dport != sport)
142			return -1;
143		score++;
144	}
145
146	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
147		if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
148			return -1;
149		score++;
150	}
151
152	bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
153	dev_match = udp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif);
154	if (!dev_match)
155		return -1;
156	if (bound_dev_if)
157		score++;
158
159	if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
160		score++;
161
162	return score;
163}
164
165/* called with rcu_read_lock() */
166static struct sock *udp6_lib_lookup2(struct net *net,
167		const struct in6_addr *saddr, __be16 sport,
168		const struct in6_addr *daddr, unsigned int hnum,
169		int dif, int sdif, struct udp_hslot *hslot2,
170		struct sk_buff *skb)
171{
172	struct sock *sk, *result;
173	int score, badness;
174
175	result = NULL;
176	badness = -1;
177	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
178		score = compute_score(sk, net, saddr, sport,
179				      daddr, hnum, dif, sdif);
180		if (score > badness) {
181			badness = score;
182
183			if (sk->sk_state == TCP_ESTABLISHED) {
184				result = sk;
185				continue;
186			}
187
188			result = inet6_lookup_reuseport(net, sk, skb, sizeof(struct udphdr),
189							saddr, sport, daddr, hnum, udp6_ehashfn);
190			if (!result) {
191				result = sk;
192				continue;
193			}
194
195			/* Fall back to scoring if group has connections */
196			if (!reuseport_has_conns(sk))
197				return result;
198
199			/* Reuseport logic returned an error, keep original score. */
200			if (IS_ERR(result))
201				continue;
202
203			badness = compute_score(sk, net, saddr, sport,
204						daddr, hnum, dif, sdif);
205		}
206	}
207	return result;
208}
209
210/* rcu_read_lock() must be held */
211struct sock *__udp6_lib_lookup(struct net *net,
212			       const struct in6_addr *saddr, __be16 sport,
213			       const struct in6_addr *daddr, __be16 dport,
214			       int dif, int sdif, struct udp_table *udptable,
215			       struct sk_buff *skb)
216{
217	unsigned short hnum = ntohs(dport);
218	unsigned int hash2, slot2;
219	struct udp_hslot *hslot2;
220	struct sock *result, *sk;
221
222	hash2 = ipv6_portaddr_hash(net, daddr, hnum);
223	slot2 = hash2 & udptable->mask;
224	hslot2 = &udptable->hash2[slot2];
225
226	/* Lookup connected or non-wildcard sockets */
227	result = udp6_lib_lookup2(net, saddr, sport,
228				  daddr, hnum, dif, sdif,
229				  hslot2, skb);
230	if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
231		goto done;
232
233	/* Lookup redirect from BPF */
234	if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
235	    udptable == net->ipv4.udp_table) {
236		sk = inet6_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr),
237						saddr, sport, daddr, hnum, dif,
238						udp6_ehashfn);
239		if (sk) {
240			result = sk;
241			goto done;
242		}
243	}
244
245	/* Got non-wildcard socket or error on first lookup */
246	if (result)
247		goto done;
248
249	/* Lookup wildcard sockets */
250	hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
251	slot2 = hash2 & udptable->mask;
252	hslot2 = &udptable->hash2[slot2];
253
254	result = udp6_lib_lookup2(net, saddr, sport,
255				  &in6addr_any, hnum, dif, sdif,
256				  hslot2, skb);
257done:
258	if (IS_ERR(result))
259		return NULL;
260	return result;
261}
262EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
263
264static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
265					  __be16 sport, __be16 dport,
266					  struct udp_table *udptable)
267{
268	const struct ipv6hdr *iph = ipv6_hdr(skb);
269
270	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
271				 &iph->daddr, dport, inet6_iif(skb),
272				 inet6_sdif(skb), udptable, skb);
273}
274
275struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
276				 __be16 sport, __be16 dport)
277{
278	const struct ipv6hdr *iph = ipv6_hdr(skb);
279	struct net *net = dev_net(skb->dev);
280	int iif, sdif;
281
282	inet6_get_iif_sdif(skb, &iif, &sdif);
283
284	return __udp6_lib_lookup(net, &iph->saddr, sport,
285				 &iph->daddr, dport, iif,
286				 sdif, net->ipv4.udp_table, NULL);
287}
288
289/* Must be called under rcu_read_lock().
290 * Does increment socket refcount.
291 */
292#if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
293struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
294			     const struct in6_addr *daddr, __be16 dport, int dif)
295{
296	struct sock *sk;
297
298	sk =  __udp6_lib_lookup(net, saddr, sport, daddr, dport,
299				dif, 0, net->ipv4.udp_table, NULL);
300	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
301		sk = NULL;
302	return sk;
303}
304EXPORT_SYMBOL_GPL(udp6_lib_lookup);
305#endif
306
307/* do not use the scratch area len for jumbogram: their length execeeds the
308 * scratch area space; note that the IP6CB flags is still in the first
309 * cacheline, so checking for jumbograms is cheap
310 */
311static int udp6_skb_len(struct sk_buff *skb)
312{
313	return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb);
314}
315
316/*
317 *	This should be easy, if there is something there we
318 *	return it, otherwise we block.
319 */
320
321int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
322		  int flags, int *addr_len)
323{
324	struct ipv6_pinfo *np = inet6_sk(sk);
325	struct inet_sock *inet = inet_sk(sk);
326	struct sk_buff *skb;
327	unsigned int ulen, copied;
328	int off, err, peeking = flags & MSG_PEEK;
329	int is_udplite = IS_UDPLITE(sk);
330	struct udp_mib __percpu *mib;
331	bool checksum_valid = false;
332	int is_udp4;
333
334	if (flags & MSG_ERRQUEUE)
335		return ipv6_recv_error(sk, msg, len, addr_len);
336
337	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
338		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
339
340try_again:
341	off = sk_peek_offset(sk, flags);
342	skb = __skb_recv_udp(sk, flags, &off, &err);
343	if (!skb)
344		return err;
345
346	ulen = udp6_skb_len(skb);
347	copied = len;
348	if (copied > ulen - off)
349		copied = ulen - off;
350	else if (copied < ulen)
351		msg->msg_flags |= MSG_TRUNC;
352
353	is_udp4 = (skb->protocol == htons(ETH_P_IP));
354	mib = __UDPX_MIB(sk, is_udp4);
355
356	/*
357	 * If checksum is needed at all, try to do it while copying the
358	 * data.  If the data is truncated, or if we only want a partial
359	 * coverage checksum (UDP-Lite), do it before the copy.
360	 */
361
362	if (copied < ulen || peeking ||
363	    (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
364		checksum_valid = udp_skb_csum_unnecessary(skb) ||
365				!__udp_lib_checksum_complete(skb);
366		if (!checksum_valid)
367			goto csum_copy_err;
368	}
369
370	if (checksum_valid || udp_skb_csum_unnecessary(skb)) {
371		if (udp_skb_is_linear(skb))
372			err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
373		else
374			err = skb_copy_datagram_msg(skb, off, msg, copied);
375	} else {
376		err = skb_copy_and_csum_datagram_msg(skb, off, msg);
377		if (err == -EINVAL)
378			goto csum_copy_err;
379	}
380	if (unlikely(err)) {
381		if (!peeking) {
382			atomic_inc(&sk->sk_drops);
383			SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
384		}
385		kfree_skb(skb);
386		return err;
387	}
388	if (!peeking)
389		SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
390
391	sock_recv_cmsgs(msg, sk, skb);
392
393	/* Copy the address. */
394	if (msg->msg_name) {
395		DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
396		sin6->sin6_family = AF_INET6;
397		sin6->sin6_port = udp_hdr(skb)->source;
398		sin6->sin6_flowinfo = 0;
399
400		if (is_udp4) {
401			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
402					       &sin6->sin6_addr);
403			sin6->sin6_scope_id = 0;
404		} else {
405			sin6->sin6_addr = ipv6_hdr(skb)->saddr;
406			sin6->sin6_scope_id =
407				ipv6_iface_scope_id(&sin6->sin6_addr,
408						    inet6_iif(skb));
409		}
410		*addr_len = sizeof(*sin6);
411
412		BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
413						      (struct sockaddr *)sin6,
414						      addr_len);
415	}
416
417	if (udp_test_bit(GRO_ENABLED, sk))
418		udp_cmsg_recv(msg, sk, skb);
419
420	if (np->rxopt.all)
421		ip6_datagram_recv_common_ctl(sk, msg, skb);
422
423	if (is_udp4) {
424		if (inet_cmsg_flags(inet))
425			ip_cmsg_recv_offset(msg, sk, skb,
426					    sizeof(struct udphdr), off);
427	} else {
428		if (np->rxopt.all)
429			ip6_datagram_recv_specific_ctl(sk, msg, skb);
430	}
431
432	err = copied;
433	if (flags & MSG_TRUNC)
434		err = ulen;
435
436	skb_consume_udp(sk, skb, peeking ? -err : err);
437	return err;
438
439csum_copy_err:
440	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
441				 udp_skb_destructor)) {
442		SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS);
443		SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
444	}
445	kfree_skb(skb);
446
447	/* starting over for a new packet, but check if we need to yield */
448	cond_resched();
449	msg->msg_flags &= ~MSG_TRUNC;
450	goto try_again;
451}
452
453DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
454void udpv6_encap_enable(void)
455{
456	static_branch_inc(&udpv6_encap_needed_key);
457}
458EXPORT_SYMBOL(udpv6_encap_enable);
459
460/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
461 * through error handlers in encapsulations looking for a match.
462 */
463static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb,
464				      struct inet6_skb_parm *opt,
465				      u8 type, u8 code, int offset, __be32 info)
466{
467	int i;
468
469	for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
470		int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
471			       u8 type, u8 code, int offset, __be32 info);
472		const struct ip6_tnl_encap_ops *encap;
473
474		encap = rcu_dereference(ip6tun_encaps[i]);
475		if (!encap)
476			continue;
477		handler = encap->err_handler;
478		if (handler && !handler(skb, opt, type, code, offset, info))
479			return 0;
480	}
481
482	return -ENOENT;
483}
484
485/* Try to match ICMP errors to UDP tunnels by looking up a socket without
486 * reversing source and destination port: this will match tunnels that force the
487 * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
488 * lwtunnels might actually break this assumption by being configured with
489 * different destination ports on endpoints, in this case we won't be able to
490 * trace ICMP messages back to them.
491 *
492 * If this doesn't match any socket, probe tunnels with arbitrary destination
493 * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
494 * we've sent packets to won't necessarily match the local destination port.
495 *
496 * Then ask the tunnel implementation to match the error against a valid
497 * association.
498 *
499 * Return an error if we can't find a match, the socket if we need further
500 * processing, zero otherwise.
501 */
502static struct sock *__udp6_lib_err_encap(struct net *net,
503					 const struct ipv6hdr *hdr, int offset,
504					 struct udphdr *uh,
505					 struct udp_table *udptable,
506					 struct sock *sk,
507					 struct sk_buff *skb,
508					 struct inet6_skb_parm *opt,
509					 u8 type, u8 code, __be32 info)
510{
511	int (*lookup)(struct sock *sk, struct sk_buff *skb);
512	int network_offset, transport_offset;
513	struct udp_sock *up;
514
515	network_offset = skb_network_offset(skb);
516	transport_offset = skb_transport_offset(skb);
517
518	/* Network header needs to point to the outer IPv6 header inside ICMP */
519	skb_reset_network_header(skb);
520
521	/* Transport header needs to point to the UDP header */
522	skb_set_transport_header(skb, offset);
523
524	if (sk) {
525		up = udp_sk(sk);
526
527		lookup = READ_ONCE(up->encap_err_lookup);
528		if (lookup && lookup(sk, skb))
529			sk = NULL;
530
531		goto out;
532	}
533
534	sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
535			       &hdr->saddr, uh->dest,
536			       inet6_iif(skb), 0, udptable, skb);
537	if (sk) {
538		up = udp_sk(sk);
539
540		lookup = READ_ONCE(up->encap_err_lookup);
541		if (!lookup || lookup(sk, skb))
542			sk = NULL;
543	}
544
545out:
546	if (!sk) {
547		sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code,
548							offset, info));
549	}
550
551	skb_set_transport_header(skb, transport_offset);
552	skb_set_network_header(skb, network_offset);
553
554	return sk;
555}
556
557int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
558		   u8 type, u8 code, int offset, __be32 info,
559		   struct udp_table *udptable)
560{
561	struct ipv6_pinfo *np;
562	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
563	const struct in6_addr *saddr = &hdr->saddr;
564	const struct in6_addr *daddr = seg6_get_daddr(skb, opt) ? : &hdr->daddr;
565	struct udphdr *uh = (struct udphdr *)(skb->data+offset);
566	bool tunnel = false;
567	struct sock *sk;
568	int harderr;
569	int err;
570	struct net *net = dev_net(skb->dev);
571
572	sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
573			       inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
574
575	if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) {
576		/* No socket for error: try tunnels before discarding */
577		if (static_branch_unlikely(&udpv6_encap_needed_key)) {
578			sk = __udp6_lib_err_encap(net, hdr, offset, uh,
579						  udptable, sk, skb,
580						  opt, type, code, info);
581			if (!sk)
582				return 0;
583		} else
584			sk = ERR_PTR(-ENOENT);
585
586		if (IS_ERR(sk)) {
587			__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
588					  ICMP6_MIB_INERRORS);
589			return PTR_ERR(sk);
590		}
591
592		tunnel = true;
593	}
594
595	harderr = icmpv6_err_convert(type, code, &err);
596	np = inet6_sk(sk);
597
598	if (type == ICMPV6_PKT_TOOBIG) {
599		if (!ip6_sk_accept_pmtu(sk))
600			goto out;
601		ip6_sk_update_pmtu(skb, sk, info);
602		if (np->pmtudisc != IPV6_PMTUDISC_DONT)
603			harderr = 1;
604	}
605	if (type == NDISC_REDIRECT) {
606		if (tunnel) {
607			ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
608				     READ_ONCE(sk->sk_mark), sk->sk_uid);
609		} else {
610			ip6_sk_redirect(skb, sk);
611		}
612		goto out;
613	}
614
615	/* Tunnels don't have an application socket: don't pass errors back */
616	if (tunnel) {
617		if (udp_sk(sk)->encap_err_rcv)
618			udp_sk(sk)->encap_err_rcv(sk, skb, err, uh->dest,
619						  ntohl(info), (u8 *)(uh+1));
620		goto out;
621	}
622
623	if (!np->recverr) {
624		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
625			goto out;
626	} else {
627		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
628	}
629
630	sk->sk_err = err;
631	sk_error_report(sk);
632out:
633	return 0;
634}
635
636static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
637{
638	int rc;
639
640	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
641		sock_rps_save_rxhash(sk, skb);
642		sk_mark_napi_id(sk, skb);
643		sk_incoming_cpu_update(sk);
644	} else {
645		sk_mark_napi_id_once(sk, skb);
646	}
647
648	rc = __udp_enqueue_schedule_skb(sk, skb);
649	if (rc < 0) {
650		int is_udplite = IS_UDPLITE(sk);
651		enum skb_drop_reason drop_reason;
652
653		/* Note that an ENOMEM error is charged twice */
654		if (rc == -ENOMEM) {
655			UDP6_INC_STATS(sock_net(sk),
656					 UDP_MIB_RCVBUFERRORS, is_udplite);
657			drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
658		} else {
659			UDP6_INC_STATS(sock_net(sk),
660				       UDP_MIB_MEMERRORS, is_udplite);
661			drop_reason = SKB_DROP_REASON_PROTO_MEM;
662		}
663		UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
664		kfree_skb_reason(skb, drop_reason);
665		trace_udp_fail_queue_rcv_skb(rc, sk);
666		return -1;
667	}
668
669	return 0;
670}
671
672static __inline__ int udpv6_err(struct sk_buff *skb,
673				struct inet6_skb_parm *opt, u8 type,
674				u8 code, int offset, __be32 info)
675{
676	return __udp6_lib_err(skb, opt, type, code, offset, info,
677			      dev_net(skb->dev)->ipv4.udp_table);
678}
679
680static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
681{
682	enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
683	struct udp_sock *up = udp_sk(sk);
684	int is_udplite = IS_UDPLITE(sk);
685
686	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
687		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
688		goto drop;
689	}
690	nf_reset_ct(skb);
691
692	if (static_branch_unlikely(&udpv6_encap_needed_key) &&
693	    READ_ONCE(up->encap_type)) {
694		int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
695
696		/*
697		 * This is an encapsulation socket so pass the skb to
698		 * the socket's udp_encap_rcv() hook. Otherwise, just
699		 * fall through and pass this up the UDP socket.
700		 * up->encap_rcv() returns the following value:
701		 * =0 if skb was successfully passed to the encap
702		 *    handler or was discarded by it.
703		 * >0 if skb should be passed on to UDP.
704		 * <0 if skb should be resubmitted as proto -N
705		 */
706
707		/* if we're overly short, let UDP handle it */
708		encap_rcv = READ_ONCE(up->encap_rcv);
709		if (encap_rcv) {
710			int ret;
711
712			/* Verify checksum before giving to encap */
713			if (udp_lib_checksum_complete(skb))
714				goto csum_error;
715
716			ret = encap_rcv(sk, skb);
717			if (ret <= 0) {
718				__UDP6_INC_STATS(sock_net(sk),
719						 UDP_MIB_INDATAGRAMS,
720						 is_udplite);
721				return -ret;
722			}
723		}
724
725		/* FALLTHROUGH -- it's a UDP Packet */
726	}
727
728	/*
729	 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
730	 */
731	if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) {
732		u16 pcrlen = READ_ONCE(up->pcrlen);
733
734		if (pcrlen == 0) {          /* full coverage was set  */
735			net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
736					    UDP_SKB_CB(skb)->cscov, skb->len);
737			goto drop;
738		}
739		if (UDP_SKB_CB(skb)->cscov < pcrlen) {
740			net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n",
741					    UDP_SKB_CB(skb)->cscov, pcrlen);
742			goto drop;
743		}
744	}
745
746	prefetch(&sk->sk_rmem_alloc);
747	if (rcu_access_pointer(sk->sk_filter) &&
748	    udp_lib_checksum_complete(skb))
749		goto csum_error;
750
751	if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) {
752		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
753		goto drop;
754	}
755
756	udp_csum_pull_header(skb);
757
758	skb_dst_drop(skb);
759
760	return __udpv6_queue_rcv_skb(sk, skb);
761
762csum_error:
763	drop_reason = SKB_DROP_REASON_UDP_CSUM;
764	__UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
765drop:
766	__UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
767	atomic_inc(&sk->sk_drops);
768	kfree_skb_reason(skb, drop_reason);
769	return -1;
770}
771
772static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
773{
774	struct sk_buff *next, *segs;
775	int ret;
776
777	if (likely(!udp_unexpected_gso(sk, skb)))
778		return udpv6_queue_rcv_one_skb(sk, skb);
779
780	__skb_push(skb, -skb_mac_offset(skb));
781	segs = udp_rcv_segment(sk, skb, false);
782	skb_list_walk_safe(segs, skb, next) {
783		__skb_pull(skb, skb_transport_offset(skb));
784
785		udp_post_segment_fix_csum(skb);
786		ret = udpv6_queue_rcv_one_skb(sk, skb);
787		if (ret > 0)
788			ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret,
789						 true);
790	}
791	return 0;
792}
793
794static bool __udp_v6_is_mcast_sock(struct net *net, const struct sock *sk,
795				   __be16 loc_port, const struct in6_addr *loc_addr,
796				   __be16 rmt_port, const struct in6_addr *rmt_addr,
797				   int dif, int sdif, unsigned short hnum)
798{
799	const struct inet_sock *inet = inet_sk(sk);
800
801	if (!net_eq(sock_net(sk), net))
802		return false;
803
804	if (udp_sk(sk)->udp_port_hash != hnum ||
805	    sk->sk_family != PF_INET6 ||
806	    (inet->inet_dport && inet->inet_dport != rmt_port) ||
807	    (!ipv6_addr_any(&sk->sk_v6_daddr) &&
808		    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
809	    !udp_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif) ||
810	    (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
811		    !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
812		return false;
813	if (!inet6_mc_check(sk, loc_addr, rmt_addr))
814		return false;
815	return true;
816}
817
818static void udp6_csum_zero_error(struct sk_buff *skb)
819{
820	/* RFC 2460 section 8.1 says that we SHOULD log
821	 * this error. Well, it is reasonable.
822	 */
823	net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
824			    &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
825			    &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
826}
827
828/*
829 * Note: called only from the BH handler context,
830 * so we don't need to lock the hashes.
831 */
832static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
833		const struct in6_addr *saddr, const struct in6_addr *daddr,
834		struct udp_table *udptable, int proto)
835{
836	struct sock *sk, *first = NULL;
837	const struct udphdr *uh = udp_hdr(skb);
838	unsigned short hnum = ntohs(uh->dest);
839	struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
840	unsigned int offset = offsetof(typeof(*sk), sk_node);
841	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
842	int dif = inet6_iif(skb);
843	int sdif = inet6_sdif(skb);
844	struct hlist_node *node;
845	struct sk_buff *nskb;
846
847	if (use_hash2) {
848		hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) &
849			    udptable->mask;
850		hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
851start_lookup:
852		hslot = &udptable->hash2[hash2];
853		offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
854	}
855
856	sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
857		if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr,
858					    uh->source, saddr, dif, sdif,
859					    hnum))
860			continue;
861		/* If zero checksum and no_check is not on for
862		 * the socket then skip it.
863		 */
864		if (!uh->check && !udp_get_no_check6_rx(sk))
865			continue;
866		if (!first) {
867			first = sk;
868			continue;
869		}
870		nskb = skb_clone(skb, GFP_ATOMIC);
871		if (unlikely(!nskb)) {
872			atomic_inc(&sk->sk_drops);
873			__UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
874					 IS_UDPLITE(sk));
875			__UDP6_INC_STATS(net, UDP_MIB_INERRORS,
876					 IS_UDPLITE(sk));
877			continue;
878		}
879
880		if (udpv6_queue_rcv_skb(sk, nskb) > 0)
881			consume_skb(nskb);
882	}
883
884	/* Also lookup *:port if we are using hash2 and haven't done so yet. */
885	if (use_hash2 && hash2 != hash2_any) {
886		hash2 = hash2_any;
887		goto start_lookup;
888	}
889
890	if (first) {
891		if (udpv6_queue_rcv_skb(first, skb) > 0)
892			consume_skb(skb);
893	} else {
894		kfree_skb(skb);
895		__UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
896				 proto == IPPROTO_UDPLITE);
897	}
898	return 0;
899}
900
901static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
902{
903	if (udp_sk_rx_dst_set(sk, dst)) {
904		const struct rt6_info *rt = (const struct rt6_info *)dst;
905
906		sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
907	}
908}
909
910/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
911 * return code conversion for ip layer consumption
912 */
913static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
914				struct udphdr *uh)
915{
916	int ret;
917
918	if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
919		skb_checksum_try_convert(skb, IPPROTO_UDP, ip6_compute_pseudo);
920
921	ret = udpv6_queue_rcv_skb(sk, skb);
922
923	/* a return value > 0 means to resubmit the input */
924	if (ret > 0)
925		return ret;
926	return 0;
927}
928
929int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
930		   int proto)
931{
932	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
933	const struct in6_addr *saddr, *daddr;
934	struct net *net = dev_net(skb->dev);
935	struct udphdr *uh;
936	struct sock *sk;
937	bool refcounted;
938	u32 ulen = 0;
939
940	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
941		goto discard;
942
943	saddr = &ipv6_hdr(skb)->saddr;
944	daddr = &ipv6_hdr(skb)->daddr;
945	uh = udp_hdr(skb);
946
947	ulen = ntohs(uh->len);
948	if (ulen > skb->len)
949		goto short_packet;
950
951	if (proto == IPPROTO_UDP) {
952		/* UDP validates ulen. */
953
954		/* Check for jumbo payload */
955		if (ulen == 0)
956			ulen = skb->len;
957
958		if (ulen < sizeof(*uh))
959			goto short_packet;
960
961		if (ulen < skb->len) {
962			if (pskb_trim_rcsum(skb, ulen))
963				goto short_packet;
964			saddr = &ipv6_hdr(skb)->saddr;
965			daddr = &ipv6_hdr(skb)->daddr;
966			uh = udp_hdr(skb);
967		}
968	}
969
970	if (udp6_csum_init(skb, uh, proto))
971		goto csum_error;
972
973	/* Check if the socket is already available, e.g. due to early demux */
974	sk = inet6_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest,
975			      &refcounted, udp6_ehashfn);
976	if (IS_ERR(sk))
977		goto no_sk;
978
979	if (sk) {
980		struct dst_entry *dst = skb_dst(skb);
981		int ret;
982
983		if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
984			udp6_sk_rx_dst_set(sk, dst);
985
986		if (!uh->check && !udp_get_no_check6_rx(sk)) {
987			if (refcounted)
988				sock_put(sk);
989			goto report_csum_error;
990		}
991
992		ret = udp6_unicast_rcv_skb(sk, skb, uh);
993		if (refcounted)
994			sock_put(sk);
995		return ret;
996	}
997
998	/*
999	 *	Multicast receive code
1000	 */
1001	if (ipv6_addr_is_multicast(daddr))
1002		return __udp6_lib_mcast_deliver(net, skb,
1003				saddr, daddr, udptable, proto);
1004
1005	/* Unicast */
1006	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
1007	if (sk) {
1008		if (!uh->check && !udp_get_no_check6_rx(sk))
1009			goto report_csum_error;
1010		return udp6_unicast_rcv_skb(sk, skb, uh);
1011	}
1012no_sk:
1013	reason = SKB_DROP_REASON_NO_SOCKET;
1014
1015	if (!uh->check)
1016		goto report_csum_error;
1017
1018	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1019		goto discard;
1020	nf_reset_ct(skb);
1021
1022	if (udp_lib_checksum_complete(skb))
1023		goto csum_error;
1024
1025	__UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
1026	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
1027
1028	kfree_skb_reason(skb, reason);
1029	return 0;
1030
1031short_packet:
1032	if (reason == SKB_DROP_REASON_NOT_SPECIFIED)
1033		reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1034	net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
1035			    proto == IPPROTO_UDPLITE ? "-Lite" : "",
1036			    saddr, ntohs(uh->source),
1037			    ulen, skb->len,
1038			    daddr, ntohs(uh->dest));
1039	goto discard;
1040
1041report_csum_error:
1042	udp6_csum_zero_error(skb);
1043csum_error:
1044	if (reason == SKB_DROP_REASON_NOT_SPECIFIED)
1045		reason = SKB_DROP_REASON_UDP_CSUM;
1046	__UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
1047discard:
1048	__UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
1049	kfree_skb_reason(skb, reason);
1050	return 0;
1051}
1052
1053
1054static struct sock *__udp6_lib_demux_lookup(struct net *net,
1055			__be16 loc_port, const struct in6_addr *loc_addr,
1056			__be16 rmt_port, const struct in6_addr *rmt_addr,
1057			int dif, int sdif)
1058{
1059	struct udp_table *udptable = net->ipv4.udp_table;
1060	unsigned short hnum = ntohs(loc_port);
1061	unsigned int hash2, slot2;
1062	struct udp_hslot *hslot2;
1063	__portpair ports;
1064	struct sock *sk;
1065
1066	hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
1067	slot2 = hash2 & udptable->mask;
1068	hslot2 = &udptable->hash2[slot2];
1069	ports = INET_COMBINED_PORTS(rmt_port, hnum);
1070
1071	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
1072		if (sk->sk_state == TCP_ESTABLISHED &&
1073		    inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif))
1074			return sk;
1075		/* Only check first socket in chain */
1076		break;
1077	}
1078	return NULL;
1079}
1080
1081void udp_v6_early_demux(struct sk_buff *skb)
1082{
1083	struct net *net = dev_net(skb->dev);
1084	const struct udphdr *uh;
1085	struct sock *sk;
1086	struct dst_entry *dst;
1087	int dif = skb->dev->ifindex;
1088	int sdif = inet6_sdif(skb);
1089
1090	if (!pskb_may_pull(skb, skb_transport_offset(skb) +
1091	    sizeof(struct udphdr)))
1092		return;
1093
1094	uh = udp_hdr(skb);
1095
1096	if (skb->pkt_type == PACKET_HOST)
1097		sk = __udp6_lib_demux_lookup(net, uh->dest,
1098					     &ipv6_hdr(skb)->daddr,
1099					     uh->source, &ipv6_hdr(skb)->saddr,
1100					     dif, sdif);
1101	else
1102		return;
1103
1104	if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
1105		return;
1106
1107	skb->sk = sk;
1108	skb->destructor = sock_efree;
1109	dst = rcu_dereference(sk->sk_rx_dst);
1110
1111	if (dst)
1112		dst = dst_check(dst, sk->sk_rx_dst_cookie);
1113	if (dst) {
1114		/* set noref for now.
1115		 * any place which wants to hold dst has to call
1116		 * dst_hold_safe()
1117		 */
1118		skb_dst_set_noref(skb, dst);
1119	}
1120}
1121
1122INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb)
1123{
1124	return __udp6_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP);
1125}
1126
1127/*
1128 * Throw away all pending data and cancel the corking. Socket is locked.
1129 */
1130static void udp_v6_flush_pending_frames(struct sock *sk)
1131{
1132	struct udp_sock *up = udp_sk(sk);
1133
1134	if (up->pending == AF_INET)
1135		udp_flush_pending_frames(sk);
1136	else if (up->pending) {
1137		up->len = 0;
1138		WRITE_ONCE(up->pending, 0);
1139		ip6_flush_pending_frames(sk);
1140	}
1141}
1142
1143static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
1144			     int addr_len)
1145{
1146	if (addr_len < offsetofend(struct sockaddr, sa_family))
1147		return -EINVAL;
1148	/* The following checks are replicated from __ip6_datagram_connect()
1149	 * and intended to prevent BPF program called below from accessing
1150	 * bytes that are out of the bound specified by user in addr_len.
1151	 */
1152	if (uaddr->sa_family == AF_INET) {
1153		if (ipv6_only_sock(sk))
1154			return -EAFNOSUPPORT;
1155		return udp_pre_connect(sk, uaddr, addr_len);
1156	}
1157
1158	if (addr_len < SIN6_LEN_RFC2133)
1159		return -EINVAL;
1160
1161	return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
1162}
1163
1164/**
1165 *	udp6_hwcsum_outgoing  -  handle outgoing HW checksumming
1166 *	@sk:	socket we are sending on
1167 *	@skb:	sk_buff containing the filled-in UDP header
1168 *		(checksum field must be zeroed out)
1169 *	@saddr: source address
1170 *	@daddr: destination address
1171 *	@len:	length of packet
1172 */
1173static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
1174				 const struct in6_addr *saddr,
1175				 const struct in6_addr *daddr, int len)
1176{
1177	unsigned int offset;
1178	struct udphdr *uh = udp_hdr(skb);
1179	struct sk_buff *frags = skb_shinfo(skb)->frag_list;
1180	__wsum csum = 0;
1181
1182	if (!frags) {
1183		/* Only one fragment on the socket.  */
1184		skb->csum_start = skb_transport_header(skb) - skb->head;
1185		skb->csum_offset = offsetof(struct udphdr, check);
1186		uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0);
1187	} else {
1188		/*
1189		 * HW-checksum won't work as there are two or more
1190		 * fragments on the socket so that all csums of sk_buffs
1191		 * should be together
1192		 */
1193		offset = skb_transport_offset(skb);
1194		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
1195		csum = skb->csum;
1196
1197		skb->ip_summed = CHECKSUM_NONE;
1198
1199		do {
1200			csum = csum_add(csum, frags->csum);
1201		} while ((frags = frags->next));
1202
1203		uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
1204					    csum);
1205		if (uh->check == 0)
1206			uh->check = CSUM_MANGLED_0;
1207	}
1208}
1209
1210/*
1211 *	Sending
1212 */
1213
1214static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
1215			   struct inet_cork *cork)
1216{
1217	struct sock *sk = skb->sk;
1218	struct udphdr *uh;
1219	int err = 0;
1220	int is_udplite = IS_UDPLITE(sk);
1221	__wsum csum = 0;
1222	int offset = skb_transport_offset(skb);
1223	int len = skb->len - offset;
1224	int datalen = len - sizeof(*uh);
1225
1226	/*
1227	 * Create a UDP header
1228	 */
1229	uh = udp_hdr(skb);
1230	uh->source = fl6->fl6_sport;
1231	uh->dest = fl6->fl6_dport;
1232	uh->len = htons(len);
1233	uh->check = 0;
1234
1235	if (cork->gso_size) {
1236		const int hlen = skb_network_header_len(skb) +
1237				 sizeof(struct udphdr);
1238
1239		if (hlen + cork->gso_size > cork->fragsize) {
1240			kfree_skb(skb);
1241			return -EINVAL;
1242		}
1243		if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
1244			kfree_skb(skb);
1245			return -EINVAL;
1246		}
1247		if (udp_get_no_check6_tx(sk)) {
1248			kfree_skb(skb);
1249			return -EINVAL;
1250		}
1251		if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
1252		    dst_xfrm(skb_dst(skb))) {
1253			kfree_skb(skb);
1254			return -EIO;
1255		}
1256
1257		if (datalen > cork->gso_size) {
1258			skb_shinfo(skb)->gso_size = cork->gso_size;
1259			skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
1260			skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
1261								 cork->gso_size);
1262		}
1263		goto csum_partial;
1264	}
1265
1266	if (is_udplite)
1267		csum = udplite_csum(skb);
1268	else if (udp_get_no_check6_tx(sk)) {   /* UDP csum disabled */
1269		skb->ip_summed = CHECKSUM_NONE;
1270		goto send;
1271	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
1272csum_partial:
1273		udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len);
1274		goto send;
1275	} else
1276		csum = udp_csum(skb);
1277
1278	/* add protocol-dependent pseudo-header */
1279	uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
1280				    len, fl6->flowi6_proto, csum);
1281	if (uh->check == 0)
1282		uh->check = CSUM_MANGLED_0;
1283
1284send:
1285	err = ip6_send_skb(skb);
1286	if (err) {
1287		if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
1288			UDP6_INC_STATS(sock_net(sk),
1289				       UDP_MIB_SNDBUFERRORS, is_udplite);
1290			err = 0;
1291		}
1292	} else {
1293		UDP6_INC_STATS(sock_net(sk),
1294			       UDP_MIB_OUTDATAGRAMS, is_udplite);
1295	}
1296	return err;
1297}
1298
1299static int udp_v6_push_pending_frames(struct sock *sk)
1300{
1301	struct sk_buff *skb;
1302	struct udp_sock  *up = udp_sk(sk);
1303	int err = 0;
1304
1305	if (up->pending == AF_INET)
1306		return udp_push_pending_frames(sk);
1307
1308	skb = ip6_finish_skb(sk);
1309	if (!skb)
1310		goto out;
1311
1312	err = udp_v6_send_skb(skb, &inet_sk(sk)->cork.fl.u.ip6,
1313			      &inet_sk(sk)->cork.base);
1314out:
1315	up->len = 0;
1316	WRITE_ONCE(up->pending, 0);
1317	return err;
1318}
1319
1320int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1321{
1322	struct ipv6_txoptions opt_space;
1323	struct udp_sock *up = udp_sk(sk);
1324	struct inet_sock *inet = inet_sk(sk);
1325	struct ipv6_pinfo *np = inet6_sk(sk);
1326	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
1327	struct in6_addr *daddr, *final_p, final;
1328	struct ipv6_txoptions *opt = NULL;
1329	struct ipv6_txoptions *opt_to_free = NULL;
1330	struct ip6_flowlabel *flowlabel = NULL;
1331	struct inet_cork_full cork;
1332	struct flowi6 *fl6 = &cork.fl.u.ip6;
1333	struct dst_entry *dst;
1334	struct ipcm6_cookie ipc6;
1335	int addr_len = msg->msg_namelen;
1336	bool connected = false;
1337	int ulen = len;
1338	int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
1339	int err;
1340	int is_udplite = IS_UDPLITE(sk);
1341	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
1342
1343	ipcm6_init(&ipc6);
1344	ipc6.gso_size = READ_ONCE(up->gso_size);
1345	ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
1346	ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
1347
1348	/* destination address check */
1349	if (sin6) {
1350		if (addr_len < offsetof(struct sockaddr, sa_data))
1351			return -EINVAL;
1352
1353		switch (sin6->sin6_family) {
1354		case AF_INET6:
1355			if (addr_len < SIN6_LEN_RFC2133)
1356				return -EINVAL;
1357			daddr = &sin6->sin6_addr;
1358			if (ipv6_addr_any(daddr) &&
1359			    ipv6_addr_v4mapped(&np->saddr))
1360				ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
1361						       daddr);
1362			break;
1363		case AF_INET:
1364			goto do_udp_sendmsg;
1365		case AF_UNSPEC:
1366			msg->msg_name = sin6 = NULL;
1367			msg->msg_namelen = addr_len = 0;
1368			daddr = NULL;
1369			break;
1370		default:
1371			return -EINVAL;
1372		}
1373	} else if (!READ_ONCE(up->pending)) {
1374		if (sk->sk_state != TCP_ESTABLISHED)
1375			return -EDESTADDRREQ;
1376		daddr = &sk->sk_v6_daddr;
1377	} else
1378		daddr = NULL;
1379
1380	if (daddr) {
1381		if (ipv6_addr_v4mapped(daddr)) {
1382			struct sockaddr_in sin;
1383			sin.sin_family = AF_INET;
1384			sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
1385			sin.sin_addr.s_addr = daddr->s6_addr32[3];
1386			msg->msg_name = &sin;
1387			msg->msg_namelen = sizeof(sin);
1388do_udp_sendmsg:
1389			err = ipv6_only_sock(sk) ?
1390				-ENETUNREACH : udp_sendmsg(sk, msg, len);
1391			msg->msg_name = sin6;
1392			msg->msg_namelen = addr_len;
1393			return err;
1394		}
1395	}
1396
1397	/* Rough check on arithmetic overflow,
1398	   better check is made in ip6_append_data().
1399	   */
1400	if (len > INT_MAX - sizeof(struct udphdr))
1401		return -EMSGSIZE;
1402
1403	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
1404	if (READ_ONCE(up->pending)) {
1405		if (READ_ONCE(up->pending) == AF_INET)
1406			return udp_sendmsg(sk, msg, len);
1407		/*
1408		 * There are pending frames.
1409		 * The socket lock must be held while it's corked.
1410		 */
1411		lock_sock(sk);
1412		if (likely(up->pending)) {
1413			if (unlikely(up->pending != AF_INET6)) {
1414				release_sock(sk);
1415				return -EAFNOSUPPORT;
1416			}
1417			dst = NULL;
1418			goto do_append_data;
1419		}
1420		release_sock(sk);
1421	}
1422	ulen += sizeof(struct udphdr);
1423
1424	memset(fl6, 0, sizeof(*fl6));
1425
1426	if (sin6) {
1427		if (sin6->sin6_port == 0)
1428			return -EINVAL;
1429
1430		fl6->fl6_dport = sin6->sin6_port;
1431		daddr = &sin6->sin6_addr;
1432
1433		if (np->sndflow) {
1434			fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
1435			if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
1436				flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
1437				if (IS_ERR(flowlabel))
1438					return -EINVAL;
1439			}
1440		}
1441
1442		/*
1443		 * Otherwise it will be difficult to maintain
1444		 * sk->sk_dst_cache.
1445		 */
1446		if (sk->sk_state == TCP_ESTABLISHED &&
1447		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
1448			daddr = &sk->sk_v6_daddr;
1449
1450		if (addr_len >= sizeof(struct sockaddr_in6) &&
1451		    sin6->sin6_scope_id &&
1452		    __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
1453			fl6->flowi6_oif = sin6->sin6_scope_id;
1454	} else {
1455		if (sk->sk_state != TCP_ESTABLISHED)
1456			return -EDESTADDRREQ;
1457
1458		fl6->fl6_dport = inet->inet_dport;
1459		daddr = &sk->sk_v6_daddr;
1460		fl6->flowlabel = np->flow_label;
1461		connected = true;
1462	}
1463
1464	if (!fl6->flowi6_oif)
1465		fl6->flowi6_oif = READ_ONCE(sk->sk_bound_dev_if);
1466
1467	if (!fl6->flowi6_oif)
1468		fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
1469
1470	fl6->flowi6_uid = sk->sk_uid;
1471
1472	if (msg->msg_controllen) {
1473		opt = &opt_space;
1474		memset(opt, 0, sizeof(struct ipv6_txoptions));
1475		opt->tot_len = sizeof(*opt);
1476		ipc6.opt = opt;
1477
1478		err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
1479		if (err > 0)
1480			err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6,
1481						    &ipc6);
1482		if (err < 0) {
1483			fl6_sock_release(flowlabel);
1484			return err;
1485		}
1486		if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
1487			flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
1488			if (IS_ERR(flowlabel))
1489				return -EINVAL;
1490		}
1491		if (!(opt->opt_nflen|opt->opt_flen))
1492			opt = NULL;
1493		connected = false;
1494	}
1495	if (!opt) {
1496		opt = txopt_get(np);
1497		opt_to_free = opt;
1498	}
1499	if (flowlabel)
1500		opt = fl6_merge_options(&opt_space, flowlabel, opt);
1501	opt = ipv6_fixup_options(&opt_space, opt);
1502	ipc6.opt = opt;
1503
1504	fl6->flowi6_proto = sk->sk_protocol;
1505	fl6->flowi6_mark = ipc6.sockc.mark;
1506	fl6->daddr = *daddr;
1507	if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr))
1508		fl6->saddr = np->saddr;
1509	fl6->fl6_sport = inet->inet_sport;
1510
1511	if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
1512		err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
1513					   (struct sockaddr *)sin6,
1514					   &addr_len,
1515					   &fl6->saddr);
1516		if (err)
1517			goto out_no_dst;
1518		if (sin6) {
1519			if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
1520				/* BPF program rewrote IPv6-only by IPv4-mapped
1521				 * IPv6. It's currently unsupported.
1522				 */
1523				err = -ENOTSUPP;
1524				goto out_no_dst;
1525			}
1526			if (sin6->sin6_port == 0) {
1527				/* BPF program set invalid port. Reject it. */
1528				err = -EINVAL;
1529				goto out_no_dst;
1530			}
1531			fl6->fl6_dport = sin6->sin6_port;
1532			fl6->daddr = sin6->sin6_addr;
1533		}
1534	}
1535
1536	if (ipv6_addr_any(&fl6->daddr))
1537		fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
1538
1539	final_p = fl6_update_dst(fl6, opt, &final);
1540	if (final_p)
1541		connected = false;
1542
1543	if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) {
1544		fl6->flowi6_oif = np->mcast_oif;
1545		connected = false;
1546	} else if (!fl6->flowi6_oif)
1547		fl6->flowi6_oif = np->ucast_oif;
1548
1549	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1550
1551	if (ipc6.tclass < 0)
1552		ipc6.tclass = np->tclass;
1553
1554	fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel);
1555
1556	dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected);
1557	if (IS_ERR(dst)) {
1558		err = PTR_ERR(dst);
1559		dst = NULL;
1560		goto out;
1561	}
1562
1563	if (ipc6.hlimit < 0)
1564		ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst);
1565
1566	if (msg->msg_flags&MSG_CONFIRM)
1567		goto do_confirm;
1568back_from_confirm:
1569
1570	/* Lockless fast path for the non-corking case */
1571	if (!corkreq) {
1572		struct sk_buff *skb;
1573
1574		skb = ip6_make_skb(sk, getfrag, msg, ulen,
1575				   sizeof(struct udphdr), &ipc6,
1576				   (struct rt6_info *)dst,
1577				   msg->msg_flags, &cork);
1578		err = PTR_ERR(skb);
1579		if (!IS_ERR_OR_NULL(skb))
1580			err = udp_v6_send_skb(skb, fl6, &cork.base);
1581		/* ip6_make_skb steals dst reference */
1582		goto out_no_dst;
1583	}
1584
1585	lock_sock(sk);
1586	if (unlikely(up->pending)) {
1587		/* The socket is already corked while preparing it. */
1588		/* ... which is an evident application bug. --ANK */
1589		release_sock(sk);
1590
1591		net_dbg_ratelimited("udp cork app bug 2\n");
1592		err = -EINVAL;
1593		goto out;
1594	}
1595
1596	WRITE_ONCE(up->pending, AF_INET6);
1597
1598do_append_data:
1599	if (ipc6.dontfrag < 0)
1600		ipc6.dontfrag = np->dontfrag;
1601	up->len += ulen;
1602	err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
1603			      &ipc6, fl6, (struct rt6_info *)dst,
1604			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
1605	if (err)
1606		udp_v6_flush_pending_frames(sk);
1607	else if (!corkreq)
1608		err = udp_v6_push_pending_frames(sk);
1609	else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
1610		WRITE_ONCE(up->pending, 0);
1611
1612	if (err > 0)
1613		err = np->recverr ? net_xmit_errno(err) : 0;
1614	release_sock(sk);
1615
1616out:
1617	dst_release(dst);
1618out_no_dst:
1619	fl6_sock_release(flowlabel);
1620	txopt_put(opt_to_free);
1621	if (!err)
1622		return len;
1623	/*
1624	 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
1625	 * ENOBUFS might not be good (it's not tunable per se), but otherwise
1626	 * we don't have a good statistic (IpOutDiscards but it can be too many
1627	 * things).  We could add another new stat but at least for now that
1628	 * seems like overkill.
1629	 */
1630	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1631		UDP6_INC_STATS(sock_net(sk),
1632			       UDP_MIB_SNDBUFERRORS, is_udplite);
1633	}
1634	return err;
1635
1636do_confirm:
1637	if (msg->msg_flags & MSG_PROBE)
1638		dst_confirm_neigh(dst, &fl6->daddr);
1639	if (!(msg->msg_flags&MSG_PROBE) || len)
1640		goto back_from_confirm;
1641	err = 0;
1642	goto out;
1643}
1644EXPORT_SYMBOL(udpv6_sendmsg);
1645
1646static void udpv6_splice_eof(struct socket *sock)
1647{
1648	struct sock *sk = sock->sk;
1649	struct udp_sock *up = udp_sk(sk);
1650
1651	if (!READ_ONCE(up->pending) || udp_test_bit(CORK, sk))
1652		return;
1653
1654	lock_sock(sk);
1655	if (up->pending && !udp_test_bit(CORK, sk))
1656		udp_v6_push_pending_frames(sk);
1657	release_sock(sk);
1658}
1659
1660void udpv6_destroy_sock(struct sock *sk)
1661{
1662	struct udp_sock *up = udp_sk(sk);
1663	lock_sock(sk);
1664
1665	/* protects from races with udp_abort() */
1666	sock_set_flag(sk, SOCK_DEAD);
1667	udp_v6_flush_pending_frames(sk);
1668	release_sock(sk);
1669
1670	if (static_branch_unlikely(&udpv6_encap_needed_key)) {
1671		if (up->encap_type) {
1672			void (*encap_destroy)(struct sock *sk);
1673			encap_destroy = READ_ONCE(up->encap_destroy);
1674			if (encap_destroy)
1675				encap_destroy(sk);
1676		}
1677		if (udp_test_bit(ENCAP_ENABLED, sk)) {
1678			static_branch_dec(&udpv6_encap_needed_key);
1679			udp_encap_disable();
1680		}
1681	}
1682}
1683
1684/*
1685 *	Socket option code for UDP
1686 */
1687int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
1688		     unsigned int optlen)
1689{
1690	if (level == SOL_UDP  ||  level == SOL_UDPLITE || level == SOL_SOCKET)
1691		return udp_lib_setsockopt(sk, level, optname,
1692					  optval, optlen,
1693					  udp_v6_push_pending_frames);
1694	return ipv6_setsockopt(sk, level, optname, optval, optlen);
1695}
1696
1697int udpv6_getsockopt(struct sock *sk, int level, int optname,
1698		     char __user *optval, int __user *optlen)
1699{
1700	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
1701		return udp_lib_getsockopt(sk, level, optname, optval, optlen);
1702	return ipv6_getsockopt(sk, level, optname, optval, optlen);
1703}
1704
1705static const struct inet6_protocol udpv6_protocol = {
1706	.handler	=	udpv6_rcv,
1707	.err_handler	=	udpv6_err,
1708	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1709};
1710
1711/* ------------------------------------------------------------------------ */
1712#ifdef CONFIG_PROC_FS
1713int udp6_seq_show(struct seq_file *seq, void *v)
1714{
1715	if (v == SEQ_START_TOKEN) {
1716		seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
1717	} else {
1718		int bucket = ((struct udp_iter_state *)seq->private)->bucket;
1719		const struct inet_sock *inet = inet_sk((const struct sock *)v);
1720		__u16 srcp = ntohs(inet->inet_sport);
1721		__u16 destp = ntohs(inet->inet_dport);
1722		__ip6_dgram_sock_seq_show(seq, v, srcp, destp,
1723					  udp_rqueue_get(v), bucket);
1724	}
1725	return 0;
1726}
1727
1728const struct seq_operations udp6_seq_ops = {
1729	.start		= udp_seq_start,
1730	.next		= udp_seq_next,
1731	.stop		= udp_seq_stop,
1732	.show		= udp6_seq_show,
1733};
1734EXPORT_SYMBOL(udp6_seq_ops);
1735
1736static struct udp_seq_afinfo udp6_seq_afinfo = {
1737	.family		= AF_INET6,
1738	.udp_table	= NULL,
1739};
1740
1741int __net_init udp6_proc_init(struct net *net)
1742{
1743	if (!proc_create_net_data("udp6", 0444, net->proc_net, &udp6_seq_ops,
1744			sizeof(struct udp_iter_state), &udp6_seq_afinfo))
1745		return -ENOMEM;
1746	return 0;
1747}
1748
1749void udp6_proc_exit(struct net *net)
1750{
1751	remove_proc_entry("udp6", net->proc_net);
1752}
1753#endif /* CONFIG_PROC_FS */
1754
1755/* ------------------------------------------------------------------------ */
1756
1757struct proto udpv6_prot = {
1758	.name			= "UDPv6",
1759	.owner			= THIS_MODULE,
1760	.close			= udp_lib_close,
1761	.pre_connect		= udpv6_pre_connect,
1762	.connect		= ip6_datagram_connect,
1763	.disconnect		= udp_disconnect,
1764	.ioctl			= udp_ioctl,
1765	.init			= udpv6_init_sock,
1766	.destroy		= udpv6_destroy_sock,
1767	.setsockopt		= udpv6_setsockopt,
1768	.getsockopt		= udpv6_getsockopt,
1769	.sendmsg		= udpv6_sendmsg,
1770	.recvmsg		= udpv6_recvmsg,
1771	.splice_eof		= udpv6_splice_eof,
1772	.release_cb		= ip6_datagram_release_cb,
1773	.hash			= udp_lib_hash,
1774	.unhash			= udp_lib_unhash,
1775	.rehash			= udp_v6_rehash,
1776	.get_port		= udp_v6_get_port,
1777	.put_port		= udp_lib_unhash,
1778#ifdef CONFIG_BPF_SYSCALL
1779	.psock_update_sk_prot	= udp_bpf_update_proto,
1780#endif
1781
1782	.memory_allocated	= &udp_memory_allocated,
1783	.per_cpu_fw_alloc	= &udp_memory_per_cpu_fw_alloc,
1784
1785	.sysctl_mem		= sysctl_udp_mem,
1786	.sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
1787	.sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
1788	.obj_size		= sizeof(struct udp6_sock),
1789	.ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
1790	.h.udp_table		= NULL,
1791	.diag_destroy		= udp_abort,
1792};
1793
1794static struct inet_protosw udpv6_protosw = {
1795	.type =      SOCK_DGRAM,
1796	.protocol =  IPPROTO_UDP,
1797	.prot =      &udpv6_prot,
1798	.ops =       &inet6_dgram_ops,
1799	.flags =     INET_PROTOSW_PERMANENT,
1800};
1801
1802int __init udpv6_init(void)
1803{
1804	int ret;
1805
1806	ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
1807	if (ret)
1808		goto out;
1809
1810	ret = inet6_register_protosw(&udpv6_protosw);
1811	if (ret)
1812		goto out_udpv6_protocol;
1813out:
1814	return ret;
1815
1816out_udpv6_protocol:
1817	inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
1818	goto out;
1819}
1820
1821void udpv6_exit(void)
1822{
1823	inet6_unregister_protosw(&udpv6_protosw);
1824	inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
1825}
1826