xref: /kernel/linux/linux-5.10/net/ipv6/tcp_ipv6.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *	TCP over IPv6
4 *	Linux INET6 implementation
5 *
6 *	Authors:
7 *	Pedro Roque		<roque@di.fc.ul.pt>
8 *
9 *	Based on:
10 *	linux/net/ipv4/tcp.c
11 *	linux/net/ipv4/tcp_input.c
12 *	linux/net/ipv4/tcp_output.c
13 *
14 *	Fixes:
15 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18 *					a single port at the same time.
19 *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20 */
21
22#include <linux/bottom_half.h>
23#include <linux/module.h>
24#include <linux/errno.h>
25#include <linux/types.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/net.h>
29#include <linux/jiffies.h>
30#include <linux/in.h>
31#include <linux/in6.h>
32#include <linux/netdevice.h>
33#include <linux/init.h>
34#include <linux/jhash.h>
35#include <linux/ipsec.h>
36#include <linux/times.h>
37#include <linux/slab.h>
38#include <linux/uaccess.h>
39#include <linux/ipv6.h>
40#include <linux/icmpv6.h>
41#include <linux/random.h>
42#include <linux/indirect_call_wrapper.h>
43
44#include <net/tcp.h>
45#include <net/ndisc.h>
46#include <net/inet6_hashtables.h>
47#include <net/inet6_connection_sock.h>
48#include <net/ipv6.h>
49#include <net/transp_v6.h>
50#include <net/addrconf.h>
51#include <net/ip6_route.h>
52#include <net/ip6_checksum.h>
53#include <net/inet_ecn.h>
54#include <net/protocol.h>
55#include <net/xfrm.h>
56#include <net/snmp.h>
57#include <net/dsfield.h>
58#include <net/timewait_sock.h>
59#include <net/inet_common.h>
60#include <net/secure_seq.h>
61#include <net/busy_poll.h>
62
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
65
66#include <crypto/hash.h>
67#include <linux/scatterlist.h>
68
69#include <trace/events/tcp.h>
70
71static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73				      struct request_sock *req);
74
75static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77static const struct inet_connection_sock_af_ops ipv6_mapped;
78const struct inet_connection_sock_af_ops ipv6_specific;
79#ifdef CONFIG_TCP_MD5SIG
80static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82#else
83static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84						   const struct in6_addr *addr,
85						   int l3index)
86{
87	return NULL;
88}
89#endif
90
91/* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
95 */
96static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97{
98	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101}
102
103static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104{
105	struct dst_entry *dst = skb_dst(skb);
106
107	if (dst && dst_hold_safe(dst)) {
108		const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110		rcu_assign_pointer(sk->sk_rx_dst, dst);
111		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113	}
114}
115
116static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117{
118	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119				ipv6_hdr(skb)->saddr.s6_addr32,
120				tcp_hdr(skb)->dest,
121				tcp_hdr(skb)->source);
122}
123
124static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125{
126	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127				   ipv6_hdr(skb)->saddr.s6_addr32);
128}
129
130static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131			      int addr_len)
132{
133	/* This check is replicated from tcp_v6_connect() and intended to
134	 * prevent BPF program called below from accessing bytes that are out
135	 * of the bound specified by user in addr_len.
136	 */
137	if (addr_len < SIN6_LEN_RFC2133)
138		return -EINVAL;
139
140	sock_owned_by_me(sk);
141
142	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143}
144
145static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146			  int addr_len)
147{
148	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149	struct inet_sock *inet = inet_sk(sk);
150	struct inet_connection_sock *icsk = inet_csk(sk);
151	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152	struct tcp_sock *tp = tcp_sk(sk);
153	struct in6_addr *saddr = NULL, *final_p, final;
154	struct ipv6_txoptions *opt;
155	struct flowi6 fl6;
156	struct dst_entry *dst;
157	int addr_type;
158	int err;
159	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161	if (addr_len < SIN6_LEN_RFC2133)
162		return -EINVAL;
163
164	if (usin->sin6_family != AF_INET6)
165		return -EAFNOSUPPORT;
166
167	memset(&fl6, 0, sizeof(fl6));
168
169	if (np->sndflow) {
170		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171		IP6_ECN_flow_init(fl6.flowlabel);
172		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173			struct ip6_flowlabel *flowlabel;
174			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175			if (IS_ERR(flowlabel))
176				return -EINVAL;
177			fl6_sock_release(flowlabel);
178		}
179	}
180
181	/*
182	 *	connect() to INADDR_ANY means loopback (BSD'ism).
183	 */
184
185	if (ipv6_addr_any(&usin->sin6_addr)) {
186		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188					       &usin->sin6_addr);
189		else
190			usin->sin6_addr = in6addr_loopback;
191	}
192
193	addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195	if (addr_type & IPV6_ADDR_MULTICAST)
196		return -ENETUNREACH;
197
198	if (addr_type&IPV6_ADDR_LINKLOCAL) {
199		if (addr_len >= sizeof(struct sockaddr_in6) &&
200		    usin->sin6_scope_id) {
201			/* If interface is set while binding, indices
202			 * must coincide.
203			 */
204			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205				return -EINVAL;
206
207			sk->sk_bound_dev_if = usin->sin6_scope_id;
208		}
209
210		/* Connect to link-local address requires an interface */
211		if (!sk->sk_bound_dev_if)
212			return -EINVAL;
213	}
214
215	if (tp->rx_opt.ts_recent_stamp &&
216	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217		tp->rx_opt.ts_recent = 0;
218		tp->rx_opt.ts_recent_stamp = 0;
219		WRITE_ONCE(tp->write_seq, 0);
220	}
221
222	sk->sk_v6_daddr = usin->sin6_addr;
223	np->flow_label = fl6.flowlabel;
224
225	/*
226	 *	TCP over IPv4
227	 */
228
229	if (addr_type & IPV6_ADDR_MAPPED) {
230		u32 exthdrlen = icsk->icsk_ext_hdr_len;
231		struct sockaddr_in sin;
232
233		if (__ipv6_only_sock(sk))
234			return -ENETUNREACH;
235
236		sin.sin_family = AF_INET;
237		sin.sin_port = usin->sin6_port;
238		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
241		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
242		if (sk_is_mptcp(sk))
243			mptcpv6_handle_mapped(sk, true);
244		sk->sk_backlog_rcv = tcp_v4_do_rcv;
245#ifdef CONFIG_TCP_MD5SIG
246		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
247#endif
248
249		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
250
251		if (err) {
252			icsk->icsk_ext_hdr_len = exthdrlen;
253			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
254			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
255			if (sk_is_mptcp(sk))
256				mptcpv6_handle_mapped(sk, false);
257			sk->sk_backlog_rcv = tcp_v6_do_rcv;
258#ifdef CONFIG_TCP_MD5SIG
259			tp->af_specific = &tcp_sock_ipv6_specific;
260#endif
261			goto failure;
262		}
263		np->saddr = sk->sk_v6_rcv_saddr;
264
265		return err;
266	}
267
268	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
269		saddr = &sk->sk_v6_rcv_saddr;
270
271	fl6.flowi6_proto = IPPROTO_TCP;
272	fl6.daddr = sk->sk_v6_daddr;
273	fl6.saddr = saddr ? *saddr : np->saddr;
274	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
275	fl6.flowi6_oif = sk->sk_bound_dev_if;
276	fl6.flowi6_mark = sk->sk_mark;
277	fl6.fl6_dport = usin->sin6_port;
278	fl6.fl6_sport = inet->inet_sport;
279	fl6.flowi6_uid = sk->sk_uid;
280
281	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
282	final_p = fl6_update_dst(&fl6, opt, &final);
283
284	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
285
286	dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
287	if (IS_ERR(dst)) {
288		err = PTR_ERR(dst);
289		goto failure;
290	}
291
292	if (!saddr) {
293		saddr = &fl6.saddr;
294		sk->sk_v6_rcv_saddr = *saddr;
295	}
296
297	/* set the source address */
298	np->saddr = *saddr;
299	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
300
301	sk->sk_gso_type = SKB_GSO_TCPV6;
302	ip6_dst_store(sk, dst, NULL, NULL);
303
304	icsk->icsk_ext_hdr_len = 0;
305	if (opt)
306		icsk->icsk_ext_hdr_len = opt->opt_flen +
307					 opt->opt_nflen;
308
309	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
310
311	inet->inet_dport = usin->sin6_port;
312
313	tcp_set_state(sk, TCP_SYN_SENT);
314	err = inet6_hash_connect(tcp_death_row, sk);
315	if (err)
316		goto late_failure;
317
318	sk_set_txhash(sk);
319
320	if (likely(!tp->repair)) {
321		if (!tp->write_seq)
322			WRITE_ONCE(tp->write_seq,
323				   secure_tcpv6_seq(np->saddr.s6_addr32,
324						    sk->sk_v6_daddr.s6_addr32,
325						    inet->inet_sport,
326						    inet->inet_dport));
327		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
328						   np->saddr.s6_addr32,
329						   sk->sk_v6_daddr.s6_addr32);
330	}
331
332	if (tcp_fastopen_defer_connect(sk, &err))
333		return err;
334	if (err)
335		goto late_failure;
336
337	err = tcp_connect(sk);
338	if (err)
339		goto late_failure;
340
341	return 0;
342
343late_failure:
344	tcp_set_state(sk, TCP_CLOSE);
345	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
346		inet_reset_saddr(sk);
347failure:
348	inet->inet_dport = 0;
349	sk->sk_route_caps = 0;
350	return err;
351}
352
353static void tcp_v6_mtu_reduced(struct sock *sk)
354{
355	struct dst_entry *dst;
356	u32 mtu;
357
358	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
359		return;
360
361	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
362
363	/* Drop requests trying to increase our current mss.
364	 * Check done in __ip6_rt_update_pmtu() is too late.
365	 */
366	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
367		return;
368
369	dst = inet6_csk_update_pmtu(sk, mtu);
370	if (!dst)
371		return;
372
373	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
374		tcp_sync_mss(sk, dst_mtu(dst));
375		tcp_simple_retransmit(sk);
376	}
377}
378
379static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
380		u8 type, u8 code, int offset, __be32 info)
381{
382	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
383	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
384	struct net *net = dev_net(skb->dev);
385	struct request_sock *fastopen;
386	struct ipv6_pinfo *np;
387	struct tcp_sock *tp;
388	__u32 seq, snd_una;
389	struct sock *sk;
390	bool fatal;
391	int err;
392
393	sk = __inet6_lookup_established(net, &tcp_hashinfo,
394					&hdr->daddr, th->dest,
395					&hdr->saddr, ntohs(th->source),
396					skb->dev->ifindex, inet6_sdif(skb));
397
398	if (!sk) {
399		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
400				  ICMP6_MIB_INERRORS);
401		return -ENOENT;
402	}
403
404	if (sk->sk_state == TCP_TIME_WAIT) {
405		inet_twsk_put(inet_twsk(sk));
406		return 0;
407	}
408	seq = ntohl(th->seq);
409	fatal = icmpv6_err_convert(type, code, &err);
410	if (sk->sk_state == TCP_NEW_SYN_RECV) {
411		tcp_req_err(sk, seq, fatal);
412		return 0;
413	}
414
415	bh_lock_sock(sk);
416	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
417		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
418
419	if (sk->sk_state == TCP_CLOSE)
420		goto out;
421
422	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
423		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
424		goto out;
425	}
426
427	tp = tcp_sk(sk);
428	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
429	fastopen = rcu_dereference(tp->fastopen_rsk);
430	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
431	if (sk->sk_state != TCP_LISTEN &&
432	    !between(seq, snd_una, tp->snd_nxt)) {
433		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
434		goto out;
435	}
436
437	np = tcp_inet6_sk(sk);
438
439	if (type == NDISC_REDIRECT) {
440		if (!sock_owned_by_user(sk)) {
441			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
442
443			if (dst)
444				dst->ops->redirect(dst, sk, skb);
445		}
446		goto out;
447	}
448
449	if (type == ICMPV6_PKT_TOOBIG) {
450		u32 mtu = ntohl(info);
451
452		/* We are not interested in TCP_LISTEN and open_requests
453		 * (SYN-ACKs send out by Linux are always <576bytes so
454		 * they should go through unfragmented).
455		 */
456		if (sk->sk_state == TCP_LISTEN)
457			goto out;
458
459		if (!ip6_sk_accept_pmtu(sk))
460			goto out;
461
462		if (mtu < IPV6_MIN_MTU)
463			goto out;
464
465		WRITE_ONCE(tp->mtu_info, mtu);
466
467		if (!sock_owned_by_user(sk))
468			tcp_v6_mtu_reduced(sk);
469		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
470					   &sk->sk_tsq_flags))
471			sock_hold(sk);
472		goto out;
473	}
474
475
476	/* Might be for an request_sock */
477	switch (sk->sk_state) {
478	case TCP_SYN_SENT:
479	case TCP_SYN_RECV:
480		/* Only in fast or simultaneous open. If a fast open socket is
481		 * already accepted it is treated as a connected one below.
482		 */
483		if (fastopen && !fastopen->sk)
484			break;
485
486		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
487
488		if (!sock_owned_by_user(sk)) {
489			sk->sk_err = err;
490			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
491
492			tcp_done(sk);
493		} else
494			sk->sk_err_soft = err;
495		goto out;
496	case TCP_LISTEN:
497		break;
498	default:
499		/* check if this ICMP message allows revert of backoff.
500		 * (see RFC 6069)
501		 */
502		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
503		    code == ICMPV6_NOROUTE)
504			tcp_ld_RTO_revert(sk, seq);
505	}
506
507	if (!sock_owned_by_user(sk) && np->recverr) {
508		sk->sk_err = err;
509		sk->sk_error_report(sk);
510	} else
511		sk->sk_err_soft = err;
512
513out:
514	bh_unlock_sock(sk);
515	sock_put(sk);
516	return 0;
517}
518
519
520static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
521			      struct flowi *fl,
522			      struct request_sock *req,
523			      struct tcp_fastopen_cookie *foc,
524			      enum tcp_synack_type synack_type,
525			      struct sk_buff *syn_skb)
526{
527	struct inet_request_sock *ireq = inet_rsk(req);
528	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
529	struct ipv6_txoptions *opt;
530	struct flowi6 *fl6 = &fl->u.ip6;
531	struct sk_buff *skb;
532	int err = -ENOMEM;
533	u8 tclass;
534
535	/* First, grab a route. */
536	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
537					       IPPROTO_TCP)) == NULL)
538		goto done;
539
540	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
541
542	if (skb) {
543		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
544				    &ireq->ir_v6_rmt_addr);
545
546		fl6->daddr = ireq->ir_v6_rmt_addr;
547		if (np->repflow && ireq->pktopts)
548			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
549
550		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
551				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
552				(np->tclass & INET_ECN_MASK) :
553				np->tclass;
554
555		if (!INET_ECN_is_capable(tclass) &&
556		    tcp_bpf_ca_needs_ecn((struct sock *)req))
557			tclass |= INET_ECN_ECT_0;
558
559		rcu_read_lock();
560		opt = ireq->ipv6_opt;
561		if (!opt)
562			opt = rcu_dereference(np->opt);
563		err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
564			       tclass, sk->sk_priority);
565		rcu_read_unlock();
566		err = net_xmit_eval(err);
567	}
568
569done:
570	return err;
571}
572
573
574static void tcp_v6_reqsk_destructor(struct request_sock *req)
575{
576	kfree(inet_rsk(req)->ipv6_opt);
577	kfree_skb(inet_rsk(req)->pktopts);
578}
579
580#ifdef CONFIG_TCP_MD5SIG
581static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
582						   const struct in6_addr *addr,
583						   int l3index)
584{
585	return tcp_md5_do_lookup(sk, l3index,
586				 (union tcp_md5_addr *)addr, AF_INET6);
587}
588
589static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
590						const struct sock *addr_sk)
591{
592	int l3index;
593
594	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
595						 addr_sk->sk_bound_dev_if);
596	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
597				    l3index);
598}
599
600static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
601				 sockptr_t optval, int optlen)
602{
603	struct tcp_md5sig cmd;
604	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
605	int l3index = 0;
606	u8 prefixlen;
607
608	if (optlen < sizeof(cmd))
609		return -EINVAL;
610
611	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
612		return -EFAULT;
613
614	if (sin6->sin6_family != AF_INET6)
615		return -EINVAL;
616
617	if (optname == TCP_MD5SIG_EXT &&
618	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
619		prefixlen = cmd.tcpm_prefixlen;
620		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
621					prefixlen > 32))
622			return -EINVAL;
623	} else {
624		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
625	}
626
627	if (optname == TCP_MD5SIG_EXT &&
628	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
629		struct net_device *dev;
630
631		rcu_read_lock();
632		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
633		if (dev && netif_is_l3_master(dev))
634			l3index = dev->ifindex;
635		rcu_read_unlock();
636
637		/* ok to reference set/not set outside of rcu;
638		 * right now device MUST be an L3 master
639		 */
640		if (!dev || !l3index)
641			return -EINVAL;
642	}
643
644	if (!cmd.tcpm_keylen) {
645		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
646			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
647					      AF_INET, prefixlen,
648					      l3index);
649		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
650				      AF_INET6, prefixlen, l3index);
651	}
652
653	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
654		return -EINVAL;
655
656	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
657		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
658				      AF_INET, prefixlen, l3index,
659				      cmd.tcpm_key, cmd.tcpm_keylen,
660				      GFP_KERNEL);
661
662	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
663			      AF_INET6, prefixlen, l3index,
664			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
665}
666
667static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
668				   const struct in6_addr *daddr,
669				   const struct in6_addr *saddr,
670				   const struct tcphdr *th, int nbytes)
671{
672	struct tcp6_pseudohdr *bp;
673	struct scatterlist sg;
674	struct tcphdr *_th;
675
676	bp = hp->scratch;
677	/* 1. TCP pseudo-header (RFC2460) */
678	bp->saddr = *saddr;
679	bp->daddr = *daddr;
680	bp->protocol = cpu_to_be32(IPPROTO_TCP);
681	bp->len = cpu_to_be32(nbytes);
682
683	_th = (struct tcphdr *)(bp + 1);
684	memcpy(_th, th, sizeof(*th));
685	_th->check = 0;
686
687	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
688	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
689				sizeof(*bp) + sizeof(*th));
690	return crypto_ahash_update(hp->md5_req);
691}
692
693static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
694			       const struct in6_addr *daddr, struct in6_addr *saddr,
695			       const struct tcphdr *th)
696{
697	struct tcp_md5sig_pool *hp;
698	struct ahash_request *req;
699
700	hp = tcp_get_md5sig_pool();
701	if (!hp)
702		goto clear_hash_noput;
703	req = hp->md5_req;
704
705	if (crypto_ahash_init(req))
706		goto clear_hash;
707	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
708		goto clear_hash;
709	if (tcp_md5_hash_key(hp, key))
710		goto clear_hash;
711	ahash_request_set_crypt(req, NULL, md5_hash, 0);
712	if (crypto_ahash_final(req))
713		goto clear_hash;
714
715	tcp_put_md5sig_pool();
716	return 0;
717
718clear_hash:
719	tcp_put_md5sig_pool();
720clear_hash_noput:
721	memset(md5_hash, 0, 16);
722	return 1;
723}
724
725static int tcp_v6_md5_hash_skb(char *md5_hash,
726			       const struct tcp_md5sig_key *key,
727			       const struct sock *sk,
728			       const struct sk_buff *skb)
729{
730	const struct in6_addr *saddr, *daddr;
731	struct tcp_md5sig_pool *hp;
732	struct ahash_request *req;
733	const struct tcphdr *th = tcp_hdr(skb);
734
735	if (sk) { /* valid for establish/request sockets */
736		saddr = &sk->sk_v6_rcv_saddr;
737		daddr = &sk->sk_v6_daddr;
738	} else {
739		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
740		saddr = &ip6h->saddr;
741		daddr = &ip6h->daddr;
742	}
743
744	hp = tcp_get_md5sig_pool();
745	if (!hp)
746		goto clear_hash_noput;
747	req = hp->md5_req;
748
749	if (crypto_ahash_init(req))
750		goto clear_hash;
751
752	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
753		goto clear_hash;
754	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
755		goto clear_hash;
756	if (tcp_md5_hash_key(hp, key))
757		goto clear_hash;
758	ahash_request_set_crypt(req, NULL, md5_hash, 0);
759	if (crypto_ahash_final(req))
760		goto clear_hash;
761
762	tcp_put_md5sig_pool();
763	return 0;
764
765clear_hash:
766	tcp_put_md5sig_pool();
767clear_hash_noput:
768	memset(md5_hash, 0, 16);
769	return 1;
770}
771
772#endif
773
774static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
775				    const struct sk_buff *skb,
776				    int dif, int sdif)
777{
778#ifdef CONFIG_TCP_MD5SIG
779	const __u8 *hash_location = NULL;
780	struct tcp_md5sig_key *hash_expected;
781	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
782	const struct tcphdr *th = tcp_hdr(skb);
783	int genhash, l3index;
784	u8 newhash[16];
785
786	/* sdif set, means packet ingressed via a device
787	 * in an L3 domain and dif is set to the l3mdev
788	 */
789	l3index = sdif ? dif : 0;
790
791	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
792	hash_location = tcp_parse_md5sig_option(th);
793
794	/* We've parsed the options - do we have a hash? */
795	if (!hash_expected && !hash_location)
796		return false;
797
798	if (hash_expected && !hash_location) {
799		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
800		return true;
801	}
802
803	if (!hash_expected && hash_location) {
804		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
805		return true;
806	}
807
808	/* check the signature */
809	genhash = tcp_v6_md5_hash_skb(newhash,
810				      hash_expected,
811				      NULL, skb);
812
813	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
814		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
815		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
816				     genhash ? "failed" : "mismatch",
817				     &ip6h->saddr, ntohs(th->source),
818				     &ip6h->daddr, ntohs(th->dest), l3index);
819		return true;
820	}
821#endif
822	return false;
823}
824
825static void tcp_v6_init_req(struct request_sock *req,
826			    const struct sock *sk_listener,
827			    struct sk_buff *skb)
828{
829	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
830	struct inet_request_sock *ireq = inet_rsk(req);
831	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
832
833	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
834	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
835
836	/* So that link locals have meaning */
837	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
838	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
839		ireq->ir_iif = tcp_v6_iif(skb);
840
841	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
842	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
843	     np->rxopt.bits.rxinfo ||
844	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
845	     np->rxopt.bits.rxohlim || np->repflow)) {
846		refcount_inc(&skb->users);
847		ireq->pktopts = skb;
848	}
849}
850
851static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
852					  struct flowi *fl,
853					  const struct request_sock *req)
854{
855	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
856}
857
858struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
859	.family		=	AF_INET6,
860	.obj_size	=	sizeof(struct tcp6_request_sock),
861	.rtx_syn_ack	=	tcp_rtx_synack,
862	.send_ack	=	tcp_v6_reqsk_send_ack,
863	.destructor	=	tcp_v6_reqsk_destructor,
864	.send_reset	=	tcp_v6_send_reset,
865	.syn_ack_timeout =	tcp_syn_ack_timeout,
866};
867
868const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
869	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
870				sizeof(struct ipv6hdr),
871#ifdef CONFIG_TCP_MD5SIG
872	.req_md5_lookup	=	tcp_v6_md5_lookup,
873	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
874#endif
875	.init_req	=	tcp_v6_init_req,
876#ifdef CONFIG_SYN_COOKIES
877	.cookie_init_seq =	cookie_v6_init_sequence,
878#endif
879	.route_req	=	tcp_v6_route_req,
880	.init_seq	=	tcp_v6_init_seq,
881	.init_ts_off	=	tcp_v6_init_ts_off,
882	.send_synack	=	tcp_v6_send_synack,
883};
884
885static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
886				 u32 ack, u32 win, u32 tsval, u32 tsecr,
887				 int oif, struct tcp_md5sig_key *key, int rst,
888				 u8 tclass, __be32 label, u32 priority)
889{
890	const struct tcphdr *th = tcp_hdr(skb);
891	struct tcphdr *t1;
892	struct sk_buff *buff;
893	struct flowi6 fl6;
894	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
895	struct sock *ctl_sk = net->ipv6.tcp_sk;
896	unsigned int tot_len = sizeof(struct tcphdr);
897	struct dst_entry *dst;
898	__be32 *topt;
899	__u32 mark = 0;
900
901	if (tsecr)
902		tot_len += TCPOLEN_TSTAMP_ALIGNED;
903#ifdef CONFIG_TCP_MD5SIG
904	if (key)
905		tot_len += TCPOLEN_MD5SIG_ALIGNED;
906#endif
907
908	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
909			 GFP_ATOMIC);
910	if (!buff)
911		return;
912
913	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
914
915	t1 = skb_push(buff, tot_len);
916	skb_reset_transport_header(buff);
917
918	/* Swap the send and the receive. */
919	memset(t1, 0, sizeof(*t1));
920	t1->dest = th->source;
921	t1->source = th->dest;
922	t1->doff = tot_len / 4;
923	t1->seq = htonl(seq);
924	t1->ack_seq = htonl(ack);
925	t1->ack = !rst || !th->ack;
926	t1->rst = rst;
927	t1->window = htons(win);
928
929	topt = (__be32 *)(t1 + 1);
930
931	if (tsecr) {
932		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
933				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
934		*topt++ = htonl(tsval);
935		*topt++ = htonl(tsecr);
936	}
937
938#ifdef CONFIG_TCP_MD5SIG
939	if (key) {
940		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
941				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
942		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
943				    &ipv6_hdr(skb)->saddr,
944				    &ipv6_hdr(skb)->daddr, t1);
945	}
946#endif
947
948	memset(&fl6, 0, sizeof(fl6));
949	fl6.daddr = ipv6_hdr(skb)->saddr;
950	fl6.saddr = ipv6_hdr(skb)->daddr;
951	fl6.flowlabel = label;
952
953	buff->ip_summed = CHECKSUM_PARTIAL;
954	buff->csum = 0;
955
956	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
957
958	fl6.flowi6_proto = IPPROTO_TCP;
959	if (rt6_need_strict(&fl6.daddr) && !oif)
960		fl6.flowi6_oif = tcp_v6_iif(skb);
961	else {
962		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
963			oif = skb->skb_iif;
964
965		fl6.flowi6_oif = oif;
966	}
967
968	if (sk) {
969		if (sk->sk_state == TCP_TIME_WAIT) {
970			mark = inet_twsk(sk)->tw_mark;
971			/* autoflowlabel relies on buff->hash */
972			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
973				     PKT_HASH_TYPE_L4);
974		} else {
975			mark = sk->sk_mark;
976		}
977		buff->tstamp = tcp_transmit_time(sk);
978	}
979	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
980	fl6.fl6_dport = t1->dest;
981	fl6.fl6_sport = t1->source;
982	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
983	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
984
985	/* Pass a socket to ip6_dst_lookup either it is for RST
986	 * Underlying function will use this to retrieve the network
987	 * namespace
988	 */
989	if (sk && sk->sk_state != TCP_TIME_WAIT)
990		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
991	else
992		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
993	if (!IS_ERR(dst)) {
994		skb_dst_set(buff, dst);
995		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
996			 tclass & ~INET_ECN_MASK, priority);
997		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
998		if (rst)
999			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1000		return;
1001	}
1002
1003	kfree_skb(buff);
1004}
1005
1006static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1007{
1008	const struct tcphdr *th = tcp_hdr(skb);
1009	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1010	u32 seq = 0, ack_seq = 0;
1011	struct tcp_md5sig_key *key = NULL;
1012#ifdef CONFIG_TCP_MD5SIG
1013	const __u8 *hash_location = NULL;
1014	unsigned char newhash[16];
1015	int genhash;
1016	struct sock *sk1 = NULL;
1017#endif
1018	__be32 label = 0;
1019	u32 priority = 0;
1020	struct net *net;
1021	int oif = 0;
1022
1023	if (th->rst)
1024		return;
1025
1026	/* If sk not NULL, it means we did a successful lookup and incoming
1027	 * route had to be correct. prequeue might have dropped our dst.
1028	 */
1029	if (!sk && !ipv6_unicast_destination(skb))
1030		return;
1031
1032	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1033#ifdef CONFIG_TCP_MD5SIG
1034	rcu_read_lock();
1035	hash_location = tcp_parse_md5sig_option(th);
1036	if (sk && sk_fullsock(sk)) {
1037		int l3index;
1038
1039		/* sdif set, means packet ingressed via a device
1040		 * in an L3 domain and inet_iif is set to it.
1041		 */
1042		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1043		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1044	} else if (hash_location) {
1045		int dif = tcp_v6_iif_l3_slave(skb);
1046		int sdif = tcp_v6_sdif(skb);
1047		int l3index;
1048
1049		/*
1050		 * active side is lost. Try to find listening socket through
1051		 * source port, and then find md5 key through listening socket.
1052		 * we are not loose security here:
1053		 * Incoming packet is checked with md5 hash with finding key,
1054		 * no RST generated if md5 hash doesn't match.
1055		 */
1056		sk1 = inet6_lookup_listener(net,
1057					   &tcp_hashinfo, NULL, 0,
1058					   &ipv6h->saddr,
1059					   th->source, &ipv6h->daddr,
1060					   ntohs(th->source), dif, sdif);
1061		if (!sk1)
1062			goto out;
1063
1064		/* sdif set, means packet ingressed via a device
1065		 * in an L3 domain and dif is set to it.
1066		 */
1067		l3index = tcp_v6_sdif(skb) ? dif : 0;
1068
1069		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1070		if (!key)
1071			goto out;
1072
1073		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1074		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1075			goto out;
1076	}
1077#endif
1078
1079	if (th->ack)
1080		seq = ntohl(th->ack_seq);
1081	else
1082		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1083			  (th->doff << 2);
1084
1085	if (sk) {
1086		oif = sk->sk_bound_dev_if;
1087		if (sk_fullsock(sk)) {
1088			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1089
1090			trace_tcp_send_reset(sk, skb);
1091			if (np->repflow)
1092				label = ip6_flowlabel(ipv6h);
1093			priority = sk->sk_priority;
1094		}
1095		if (sk->sk_state == TCP_TIME_WAIT) {
1096			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1097			priority = inet_twsk(sk)->tw_priority;
1098		}
1099	} else {
1100		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1101			label = ip6_flowlabel(ipv6h);
1102	}
1103
1104	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1105			     ipv6_get_dsfield(ipv6h), label, priority);
1106
1107#ifdef CONFIG_TCP_MD5SIG
1108out:
1109	rcu_read_unlock();
1110#endif
1111}
1112
1113static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1114			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1115			    struct tcp_md5sig_key *key, u8 tclass,
1116			    __be32 label, u32 priority)
1117{
1118	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1119			     tclass, label, priority);
1120}
1121
1122static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1123{
1124	struct inet_timewait_sock *tw = inet_twsk(sk);
1125	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1126
1127	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1128			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1129			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1130			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1131			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1132
1133	inet_twsk_put(tw);
1134}
1135
1136static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1137				  struct request_sock *req)
1138{
1139	int l3index;
1140
1141	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1142
1143	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1144	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1145	 */
1146	/* RFC 7323 2.3
1147	 * The window field (SEG.WND) of every outgoing segment, with the
1148	 * exception of <SYN> segments, MUST be right-shifted by
1149	 * Rcv.Wind.Shift bits:
1150	 */
1151	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1152			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1153			tcp_rsk(req)->rcv_nxt,
1154			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1155			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1156			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1157			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1158			ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1159}
1160
1161
1162static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1163{
1164#ifdef CONFIG_SYN_COOKIES
1165	const struct tcphdr *th = tcp_hdr(skb);
1166
1167	if (!th->syn)
1168		sk = cookie_v6_check(sk, skb);
1169#endif
1170	return sk;
1171}
1172
1173u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1174			 struct tcphdr *th, u32 *cookie)
1175{
1176	u16 mss = 0;
1177#ifdef CONFIG_SYN_COOKIES
1178	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1179				    &tcp_request_sock_ipv6_ops, sk, th);
1180	if (mss) {
1181		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1182		tcp_synq_overflow(sk);
1183	}
1184#endif
1185	return mss;
1186}
1187
1188static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1189{
1190	if (skb->protocol == htons(ETH_P_IP))
1191		return tcp_v4_conn_request(sk, skb);
1192
1193	if (!ipv6_unicast_destination(skb))
1194		goto drop;
1195
1196	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1197		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1198		return 0;
1199	}
1200
1201	return tcp_conn_request(&tcp6_request_sock_ops,
1202				&tcp_request_sock_ipv6_ops, sk, skb);
1203
1204drop:
1205	tcp_listendrop(sk);
1206	return 0; /* don't send reset */
1207}
1208
1209static void tcp_v6_restore_cb(struct sk_buff *skb)
1210{
1211	/* We need to move header back to the beginning if xfrm6_policy_check()
1212	 * and tcp_v6_fill_cb() are going to be called again.
1213	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1214	 */
1215	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1216		sizeof(struct inet6_skb_parm));
1217}
1218
1219static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1220					 struct request_sock *req,
1221					 struct dst_entry *dst,
1222					 struct request_sock *req_unhash,
1223					 bool *own_req)
1224{
1225	struct inet_request_sock *ireq;
1226	struct ipv6_pinfo *newnp;
1227	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1228	struct ipv6_txoptions *opt;
1229	struct inet_sock *newinet;
1230	bool found_dup_sk = false;
1231	struct tcp_sock *newtp;
1232	struct sock *newsk;
1233#ifdef CONFIG_TCP_MD5SIG
1234	struct tcp_md5sig_key *key;
1235	int l3index;
1236#endif
1237	struct flowi6 fl6;
1238
1239	if (skb->protocol == htons(ETH_P_IP)) {
1240		/*
1241		 *	v6 mapped
1242		 */
1243
1244		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1245					     req_unhash, own_req);
1246
1247		if (!newsk)
1248			return NULL;
1249
1250		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1251
1252		newinet = inet_sk(newsk);
1253		newnp = tcp_inet6_sk(newsk);
1254		newtp = tcp_sk(newsk);
1255
1256		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1257
1258		newnp->saddr = newsk->sk_v6_rcv_saddr;
1259
1260		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1261		if (sk_is_mptcp(newsk))
1262			mptcpv6_handle_mapped(newsk, true);
1263		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1264#ifdef CONFIG_TCP_MD5SIG
1265		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1266#endif
1267
1268		newnp->ipv6_mc_list = NULL;
1269		newnp->ipv6_ac_list = NULL;
1270		newnp->ipv6_fl_list = NULL;
1271		newnp->pktoptions  = NULL;
1272		newnp->opt	   = NULL;
1273		newnp->mcast_oif   = inet_iif(skb);
1274		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1275		newnp->rcv_flowinfo = 0;
1276		if (np->repflow)
1277			newnp->flow_label = 0;
1278
1279		/*
1280		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1281		 * here, tcp_create_openreq_child now does this for us, see the comment in
1282		 * that function for the gory details. -acme
1283		 */
1284
1285		/* It is tricky place. Until this moment IPv4 tcp
1286		   worked with IPv6 icsk.icsk_af_ops.
1287		   Sync it now.
1288		 */
1289		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1290
1291		return newsk;
1292	}
1293
1294	ireq = inet_rsk(req);
1295
1296	if (sk_acceptq_is_full(sk))
1297		goto out_overflow;
1298
1299	if (!dst) {
1300		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1301		if (!dst)
1302			goto out;
1303	}
1304
1305	newsk = tcp_create_openreq_child(sk, req, skb);
1306	if (!newsk)
1307		goto out_nonewsk;
1308
1309	/*
1310	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1311	 * count here, tcp_create_openreq_child now does this for us, see the
1312	 * comment in that function for the gory details. -acme
1313	 */
1314
1315	newsk->sk_gso_type = SKB_GSO_TCPV6;
1316	ip6_dst_store(newsk, dst, NULL, NULL);
1317	inet6_sk_rx_dst_set(newsk, skb);
1318
1319	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1320
1321	newtp = tcp_sk(newsk);
1322	newinet = inet_sk(newsk);
1323	newnp = tcp_inet6_sk(newsk);
1324
1325	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1326
1327	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1328	newnp->saddr = ireq->ir_v6_loc_addr;
1329	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1330	newsk->sk_bound_dev_if = ireq->ir_iif;
1331
1332	/* Now IPv6 options...
1333
1334	   First: no IPv4 options.
1335	 */
1336	newinet->inet_opt = NULL;
1337	newnp->ipv6_mc_list = NULL;
1338	newnp->ipv6_ac_list = NULL;
1339	newnp->ipv6_fl_list = NULL;
1340
1341	/* Clone RX bits */
1342	newnp->rxopt.all = np->rxopt.all;
1343
1344	newnp->pktoptions = NULL;
1345	newnp->opt	  = NULL;
1346	newnp->mcast_oif  = tcp_v6_iif(skb);
1347	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1348	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1349	if (np->repflow)
1350		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1351
1352	/* Set ToS of the new socket based upon the value of incoming SYN.
1353	 * ECT bits are set later in tcp_init_transfer().
1354	 */
1355	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1356		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1357
1358	/* Clone native IPv6 options from listening socket (if any)
1359
1360	   Yes, keeping reference count would be much more clever,
1361	   but we make one more one thing there: reattach optmem
1362	   to newsk.
1363	 */
1364	opt = ireq->ipv6_opt;
1365	if (!opt)
1366		opt = rcu_dereference(np->opt);
1367	if (opt) {
1368		opt = ipv6_dup_options(newsk, opt);
1369		RCU_INIT_POINTER(newnp->opt, opt);
1370	}
1371	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1372	if (opt)
1373		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1374						    opt->opt_flen;
1375
1376	tcp_ca_openreq_child(newsk, dst);
1377
1378	tcp_sync_mss(newsk, dst_mtu(dst));
1379	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1380
1381	tcp_initialize_rcv_mss(newsk);
1382
1383	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1384	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1385
1386#ifdef CONFIG_TCP_MD5SIG
1387	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1388
1389	/* Copy over the MD5 key from the original socket */
1390	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1391	if (key) {
1392		/* We're using one, so create a matching key
1393		 * on the newsk structure. If we fail to get
1394		 * memory, then we end up not copying the key
1395		 * across. Shucks.
1396		 */
1397		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1398			       AF_INET6, 128, l3index, key->key, key->keylen,
1399			       sk_gfp_mask(sk, GFP_ATOMIC));
1400	}
1401#endif
1402
1403	if (__inet_inherit_port(sk, newsk) < 0) {
1404		inet_csk_prepare_forced_close(newsk);
1405		tcp_done(newsk);
1406		goto out;
1407	}
1408	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1409				       &found_dup_sk);
1410	if (*own_req) {
1411		tcp_move_syn(newtp, req);
1412
1413		/* Clone pktoptions received with SYN, if we own the req */
1414		if (ireq->pktopts) {
1415			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1416			consume_skb(ireq->pktopts);
1417			ireq->pktopts = NULL;
1418			if (newnp->pktoptions)
1419				tcp_v6_restore_cb(newnp->pktoptions);
1420		}
1421	} else {
1422		if (!req_unhash && found_dup_sk) {
1423			/* This code path should only be executed in the
1424			 * syncookie case only
1425			 */
1426			bh_unlock_sock(newsk);
1427			sock_put(newsk);
1428			newsk = NULL;
1429		}
1430	}
1431
1432	return newsk;
1433
1434out_overflow:
1435	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1436out_nonewsk:
1437	dst_release(dst);
1438out:
1439	tcp_listendrop(sk);
1440	return NULL;
1441}
1442
1443/* The socket must have it's spinlock held when we get
1444 * here, unless it is a TCP_LISTEN socket.
1445 *
1446 * We have a potential double-lock case here, so even when
1447 * doing backlog processing we use the BH locking scheme.
1448 * This is because we cannot sleep with the original spinlock
1449 * held.
1450 */
1451static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1452{
1453	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1454	struct sk_buff *opt_skb = NULL;
1455	struct tcp_sock *tp;
1456
1457	/* Imagine: socket is IPv6. IPv4 packet arrives,
1458	   goes to IPv4 receive handler and backlogged.
1459	   From backlog it always goes here. Kerboom...
1460	   Fortunately, tcp_rcv_established and rcv_established
1461	   handle them correctly, but it is not case with
1462	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1463	 */
1464
1465	if (skb->protocol == htons(ETH_P_IP))
1466		return tcp_v4_do_rcv(sk, skb);
1467
1468	/*
1469	 *	socket locking is here for SMP purposes as backlog rcv
1470	 *	is currently called with bh processing disabled.
1471	 */
1472
1473	/* Do Stevens' IPV6_PKTOPTIONS.
1474
1475	   Yes, guys, it is the only place in our code, where we
1476	   may make it not affecting IPv4.
1477	   The rest of code is protocol independent,
1478	   and I do not like idea to uglify IPv4.
1479
1480	   Actually, all the idea behind IPV6_PKTOPTIONS
1481	   looks not very well thought. For now we latch
1482	   options, received in the last packet, enqueued
1483	   by tcp. Feel free to propose better solution.
1484					       --ANK (980728)
1485	 */
1486	if (np->rxopt.all)
1487		opt_skb = skb_clone_and_charge_r(skb, sk);
1488
1489	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1490		struct dst_entry *dst;
1491
1492		dst = rcu_dereference_protected(sk->sk_rx_dst,
1493						lockdep_sock_is_held(sk));
1494
1495		sock_rps_save_rxhash(sk, skb);
1496		sk_mark_napi_id(sk, skb);
1497		if (dst) {
1498			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1499			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1500				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1501				dst_release(dst);
1502			}
1503		}
1504
1505		tcp_rcv_established(sk, skb);
1506		if (opt_skb)
1507			goto ipv6_pktoptions;
1508		return 0;
1509	}
1510
1511	if (tcp_checksum_complete(skb))
1512		goto csum_err;
1513
1514	if (sk->sk_state == TCP_LISTEN) {
1515		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1516
1517		if (!nsk)
1518			goto discard;
1519
1520		if (nsk != sk) {
1521			if (tcp_child_process(sk, nsk, skb))
1522				goto reset;
1523			if (opt_skb)
1524				__kfree_skb(opt_skb);
1525			return 0;
1526		}
1527	} else
1528		sock_rps_save_rxhash(sk, skb);
1529
1530	if (tcp_rcv_state_process(sk, skb))
1531		goto reset;
1532	if (opt_skb)
1533		goto ipv6_pktoptions;
1534	return 0;
1535
1536reset:
1537	tcp_v6_send_reset(sk, skb);
1538discard:
1539	if (opt_skb)
1540		__kfree_skb(opt_skb);
1541	kfree_skb(skb);
1542	return 0;
1543csum_err:
1544	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1545	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1546	goto discard;
1547
1548
1549ipv6_pktoptions:
1550	/* Do you ask, what is it?
1551
1552	   1. skb was enqueued by tcp.
1553	   2. skb is added to tail of read queue, rather than out of order.
1554	   3. socket is not in passive state.
1555	   4. Finally, it really contains options, which user wants to receive.
1556	 */
1557	tp = tcp_sk(sk);
1558	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1559	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1560		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1561			np->mcast_oif = tcp_v6_iif(opt_skb);
1562		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1563			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1564		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1565			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1566		if (np->repflow)
1567			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1568		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1569			tcp_v6_restore_cb(opt_skb);
1570			opt_skb = xchg(&np->pktoptions, opt_skb);
1571		} else {
1572			__kfree_skb(opt_skb);
1573			opt_skb = xchg(&np->pktoptions, NULL);
1574		}
1575	}
1576
1577	kfree_skb(opt_skb);
1578	return 0;
1579}
1580
1581static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1582			   const struct tcphdr *th)
1583{
1584	/* This is tricky: we move IP6CB at its correct location into
1585	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1586	 * _decode_session6() uses IP6CB().
1587	 * barrier() makes sure compiler won't play aliasing games.
1588	 */
1589	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1590		sizeof(struct inet6_skb_parm));
1591	barrier();
1592
1593	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1594	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1595				    skb->len - th->doff*4);
1596	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1597	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1598	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1599	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1600	TCP_SKB_CB(skb)->sacked = 0;
1601	TCP_SKB_CB(skb)->has_rxtstamp =
1602			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1603}
1604
1605INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1606{
1607	struct sk_buff *skb_to_free;
1608	int sdif = inet6_sdif(skb);
1609	int dif = inet6_iif(skb);
1610	const struct tcphdr *th;
1611	const struct ipv6hdr *hdr;
1612	bool refcounted;
1613	struct sock *sk;
1614	int ret;
1615	struct net *net = dev_net(skb->dev);
1616
1617	if (skb->pkt_type != PACKET_HOST)
1618		goto discard_it;
1619
1620	/*
1621	 *	Count it even if it's bad.
1622	 */
1623	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1624
1625	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1626		goto discard_it;
1627
1628	th = (const struct tcphdr *)skb->data;
1629
1630	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1631		goto bad_packet;
1632	if (!pskb_may_pull(skb, th->doff*4))
1633		goto discard_it;
1634
1635	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1636		goto csum_error;
1637
1638	th = (const struct tcphdr *)skb->data;
1639	hdr = ipv6_hdr(skb);
1640
1641lookup:
1642	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1643				th->source, th->dest, inet6_iif(skb), sdif,
1644				&refcounted);
1645	if (!sk)
1646		goto no_tcp_socket;
1647
1648process:
1649	if (sk->sk_state == TCP_TIME_WAIT)
1650		goto do_time_wait;
1651
1652	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1653		struct request_sock *req = inet_reqsk(sk);
1654		bool req_stolen = false;
1655		struct sock *nsk;
1656
1657		sk = req->rsk_listener;
1658		if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1659			sk_drops_add(sk, skb);
1660			reqsk_put(req);
1661			goto discard_it;
1662		}
1663		if (tcp_checksum_complete(skb)) {
1664			reqsk_put(req);
1665			goto csum_error;
1666		}
1667		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1668			inet_csk_reqsk_queue_drop_and_put(sk, req);
1669			goto lookup;
1670		}
1671		sock_hold(sk);
1672		refcounted = true;
1673		nsk = NULL;
1674		if (!tcp_filter(sk, skb)) {
1675			th = (const struct tcphdr *)skb->data;
1676			hdr = ipv6_hdr(skb);
1677			tcp_v6_fill_cb(skb, hdr, th);
1678			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1679		}
1680		if (!nsk) {
1681			reqsk_put(req);
1682			if (req_stolen) {
1683				/* Another cpu got exclusive access to req
1684				 * and created a full blown socket.
1685				 * Try to feed this packet to this socket
1686				 * instead of discarding it.
1687				 */
1688				tcp_v6_restore_cb(skb);
1689				sock_put(sk);
1690				goto lookup;
1691			}
1692			goto discard_and_relse;
1693		}
1694		if (nsk == sk) {
1695			reqsk_put(req);
1696			tcp_v6_restore_cb(skb);
1697		} else if (tcp_child_process(sk, nsk, skb)) {
1698			tcp_v6_send_reset(nsk, skb);
1699			goto discard_and_relse;
1700		} else {
1701			sock_put(sk);
1702			return 0;
1703		}
1704	}
1705	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1706		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1707		goto discard_and_relse;
1708	}
1709
1710	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1711		goto discard_and_relse;
1712
1713	if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1714		goto discard_and_relse;
1715
1716	if (tcp_filter(sk, skb))
1717		goto discard_and_relse;
1718	th = (const struct tcphdr *)skb->data;
1719	hdr = ipv6_hdr(skb);
1720	tcp_v6_fill_cb(skb, hdr, th);
1721
1722	skb->dev = NULL;
1723
1724	if (sk->sk_state == TCP_LISTEN) {
1725		ret = tcp_v6_do_rcv(sk, skb);
1726		goto put_and_return;
1727	}
1728
1729	sk_incoming_cpu_update(sk);
1730
1731	bh_lock_sock_nested(sk);
1732	tcp_segs_in(tcp_sk(sk), skb);
1733	ret = 0;
1734	if (!sock_owned_by_user(sk)) {
1735		skb_to_free = sk->sk_rx_skb_cache;
1736		sk->sk_rx_skb_cache = NULL;
1737		ret = tcp_v6_do_rcv(sk, skb);
1738	} else {
1739		if (tcp_add_backlog(sk, skb))
1740			goto discard_and_relse;
1741		skb_to_free = NULL;
1742	}
1743	bh_unlock_sock(sk);
1744	if (skb_to_free)
1745		__kfree_skb(skb_to_free);
1746put_and_return:
1747	if (refcounted)
1748		sock_put(sk);
1749	return ret ? -1 : 0;
1750
1751no_tcp_socket:
1752	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1753		goto discard_it;
1754
1755	tcp_v6_fill_cb(skb, hdr, th);
1756
1757	if (tcp_checksum_complete(skb)) {
1758csum_error:
1759		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1760bad_packet:
1761		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1762	} else {
1763		tcp_v6_send_reset(NULL, skb);
1764	}
1765
1766discard_it:
1767	kfree_skb(skb);
1768	return 0;
1769
1770discard_and_relse:
1771	sk_drops_add(sk, skb);
1772	if (refcounted)
1773		sock_put(sk);
1774	goto discard_it;
1775
1776do_time_wait:
1777	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1778		inet_twsk_put(inet_twsk(sk));
1779		goto discard_it;
1780	}
1781
1782	tcp_v6_fill_cb(skb, hdr, th);
1783
1784	if (tcp_checksum_complete(skb)) {
1785		inet_twsk_put(inet_twsk(sk));
1786		goto csum_error;
1787	}
1788
1789	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1790	case TCP_TW_SYN:
1791	{
1792		struct sock *sk2;
1793
1794		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1795					    skb, __tcp_hdrlen(th),
1796					    &ipv6_hdr(skb)->saddr, th->source,
1797					    &ipv6_hdr(skb)->daddr,
1798					    ntohs(th->dest),
1799					    tcp_v6_iif_l3_slave(skb),
1800					    sdif);
1801		if (sk2) {
1802			struct inet_timewait_sock *tw = inet_twsk(sk);
1803			inet_twsk_deschedule_put(tw);
1804			sk = sk2;
1805			tcp_v6_restore_cb(skb);
1806			refcounted = false;
1807			goto process;
1808		}
1809	}
1810		/* to ACK */
1811		fallthrough;
1812	case TCP_TW_ACK:
1813		tcp_v6_timewait_ack(sk, skb);
1814		break;
1815	case TCP_TW_RST:
1816		tcp_v6_send_reset(sk, skb);
1817		inet_twsk_deschedule_put(inet_twsk(sk));
1818		goto discard_it;
1819	case TCP_TW_SUCCESS:
1820		;
1821	}
1822	goto discard_it;
1823}
1824
1825void tcp_v6_early_demux(struct sk_buff *skb)
1826{
1827	const struct ipv6hdr *hdr;
1828	const struct tcphdr *th;
1829	struct sock *sk;
1830
1831	if (skb->pkt_type != PACKET_HOST)
1832		return;
1833
1834	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1835		return;
1836
1837	hdr = ipv6_hdr(skb);
1838	th = tcp_hdr(skb);
1839
1840	if (th->doff < sizeof(struct tcphdr) / 4)
1841		return;
1842
1843	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1844	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1845					&hdr->saddr, th->source,
1846					&hdr->daddr, ntohs(th->dest),
1847					inet6_iif(skb), inet6_sdif(skb));
1848	if (sk) {
1849		skb->sk = sk;
1850		skb->destructor = sock_edemux;
1851		if (sk_fullsock(sk)) {
1852			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1853
1854			if (dst)
1855				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1856			if (dst &&
1857			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1858				skb_dst_set_noref(skb, dst);
1859		}
1860	}
1861}
1862
1863static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1864	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1865	.twsk_unique	= tcp_twsk_unique,
1866	.twsk_destructor = tcp_twsk_destructor,
1867};
1868
1869INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1870{
1871	struct ipv6_pinfo *np = inet6_sk(sk);
1872
1873	__tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1874}
1875
1876const struct inet_connection_sock_af_ops ipv6_specific = {
1877	.queue_xmit	   = inet6_csk_xmit,
1878	.send_check	   = tcp_v6_send_check,
1879	.rebuild_header	   = inet6_sk_rebuild_header,
1880	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1881	.conn_request	   = tcp_v6_conn_request,
1882	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1883	.net_header_len	   = sizeof(struct ipv6hdr),
1884	.net_frag_header_len = sizeof(struct frag_hdr),
1885	.setsockopt	   = ipv6_setsockopt,
1886	.getsockopt	   = ipv6_getsockopt,
1887	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1888	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1889	.mtu_reduced	   = tcp_v6_mtu_reduced,
1890};
1891
1892#ifdef CONFIG_TCP_MD5SIG
1893static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1894	.md5_lookup	=	tcp_v6_md5_lookup,
1895	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1896	.md5_parse	=	tcp_v6_parse_md5_keys,
1897};
1898#endif
1899
1900/*
1901 *	TCP over IPv4 via INET6 API
1902 */
1903static const struct inet_connection_sock_af_ops ipv6_mapped = {
1904	.queue_xmit	   = ip_queue_xmit,
1905	.send_check	   = tcp_v4_send_check,
1906	.rebuild_header	   = inet_sk_rebuild_header,
1907	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1908	.conn_request	   = tcp_v6_conn_request,
1909	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1910	.net_header_len	   = sizeof(struct iphdr),
1911	.setsockopt	   = ipv6_setsockopt,
1912	.getsockopt	   = ipv6_getsockopt,
1913	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1914	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1915	.mtu_reduced	   = tcp_v4_mtu_reduced,
1916};
1917
1918#ifdef CONFIG_TCP_MD5SIG
1919static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1920	.md5_lookup	=	tcp_v4_md5_lookup,
1921	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1922	.md5_parse	=	tcp_v6_parse_md5_keys,
1923};
1924#endif
1925
1926/* NOTE: A lot of things set to zero explicitly by call to
1927 *       sk_alloc() so need not be done here.
1928 */
1929static int tcp_v6_init_sock(struct sock *sk)
1930{
1931	struct inet_connection_sock *icsk = inet_csk(sk);
1932
1933	tcp_init_sock(sk);
1934
1935	icsk->icsk_af_ops = &ipv6_specific;
1936
1937#ifdef CONFIG_TCP_MD5SIG
1938	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1939#endif
1940
1941	return 0;
1942}
1943
1944#ifdef CONFIG_PROC_FS
1945/* Proc filesystem TCPv6 sock list dumping. */
1946static void get_openreq6(struct seq_file *seq,
1947			 const struct request_sock *req, int i)
1948{
1949	long ttd = req->rsk_timer.expires - jiffies;
1950	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1951	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1952
1953	if (ttd < 0)
1954		ttd = 0;
1955
1956	seq_printf(seq,
1957		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1958		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1959		   i,
1960		   src->s6_addr32[0], src->s6_addr32[1],
1961		   src->s6_addr32[2], src->s6_addr32[3],
1962		   inet_rsk(req)->ir_num,
1963		   dest->s6_addr32[0], dest->s6_addr32[1],
1964		   dest->s6_addr32[2], dest->s6_addr32[3],
1965		   ntohs(inet_rsk(req)->ir_rmt_port),
1966		   TCP_SYN_RECV,
1967		   0, 0, /* could print option size, but that is af dependent. */
1968		   1,   /* timers active (only the expire timer) */
1969		   jiffies_to_clock_t(ttd),
1970		   req->num_timeout,
1971		   from_kuid_munged(seq_user_ns(seq),
1972				    sock_i_uid(req->rsk_listener)),
1973		   0,  /* non standard timer */
1974		   0, /* open_requests have no inode */
1975		   0, req);
1976}
1977
1978static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1979{
1980	const struct in6_addr *dest, *src;
1981	__u16 destp, srcp;
1982	int timer_active;
1983	unsigned long timer_expires;
1984	const struct inet_sock *inet = inet_sk(sp);
1985	const struct tcp_sock *tp = tcp_sk(sp);
1986	const struct inet_connection_sock *icsk = inet_csk(sp);
1987	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1988	int rx_queue;
1989	int state;
1990
1991	dest  = &sp->sk_v6_daddr;
1992	src   = &sp->sk_v6_rcv_saddr;
1993	destp = ntohs(inet->inet_dport);
1994	srcp  = ntohs(inet->inet_sport);
1995
1996	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1997	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1998	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1999		timer_active	= 1;
2000		timer_expires	= icsk->icsk_timeout;
2001	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2002		timer_active	= 4;
2003		timer_expires	= icsk->icsk_timeout;
2004	} else if (timer_pending(&sp->sk_timer)) {
2005		timer_active	= 2;
2006		timer_expires	= sp->sk_timer.expires;
2007	} else {
2008		timer_active	= 0;
2009		timer_expires = jiffies;
2010	}
2011
2012	state = inet_sk_state_load(sp);
2013	if (state == TCP_LISTEN)
2014		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2015	else
2016		/* Because we don't lock the socket,
2017		 * we might find a transient negative value.
2018		 */
2019		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2020				      READ_ONCE(tp->copied_seq), 0);
2021
2022	seq_printf(seq,
2023		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2024		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2025		   i,
2026		   src->s6_addr32[0], src->s6_addr32[1],
2027		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2028		   dest->s6_addr32[0], dest->s6_addr32[1],
2029		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2030		   state,
2031		   READ_ONCE(tp->write_seq) - tp->snd_una,
2032		   rx_queue,
2033		   timer_active,
2034		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2035		   icsk->icsk_retransmits,
2036		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2037		   icsk->icsk_probes_out,
2038		   sock_i_ino(sp),
2039		   refcount_read(&sp->sk_refcnt), sp,
2040		   jiffies_to_clock_t(icsk->icsk_rto),
2041		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2042		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2043		   tp->snd_cwnd,
2044		   state == TCP_LISTEN ?
2045			fastopenq->max_qlen :
2046			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2047		   );
2048}
2049
2050static void get_timewait6_sock(struct seq_file *seq,
2051			       struct inet_timewait_sock *tw, int i)
2052{
2053	long delta = tw->tw_timer.expires - jiffies;
2054	const struct in6_addr *dest, *src;
2055	__u16 destp, srcp;
2056
2057	dest = &tw->tw_v6_daddr;
2058	src  = &tw->tw_v6_rcv_saddr;
2059	destp = ntohs(tw->tw_dport);
2060	srcp  = ntohs(tw->tw_sport);
2061
2062	seq_printf(seq,
2063		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2064		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2065		   i,
2066		   src->s6_addr32[0], src->s6_addr32[1],
2067		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2068		   dest->s6_addr32[0], dest->s6_addr32[1],
2069		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2070		   tw->tw_substate, 0, 0,
2071		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2072		   refcount_read(&tw->tw_refcnt), tw);
2073}
2074
2075static int tcp6_seq_show(struct seq_file *seq, void *v)
2076{
2077	struct tcp_iter_state *st;
2078	struct sock *sk = v;
2079
2080	if (v == SEQ_START_TOKEN) {
2081		seq_puts(seq,
2082			 "  sl  "
2083			 "local_address                         "
2084			 "remote_address                        "
2085			 "st tx_queue rx_queue tr tm->when retrnsmt"
2086			 "   uid  timeout inode\n");
2087		goto out;
2088	}
2089	st = seq->private;
2090
2091	if (sk->sk_state == TCP_TIME_WAIT)
2092		get_timewait6_sock(seq, v, st->num);
2093	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2094		get_openreq6(seq, v, st->num);
2095	else
2096		get_tcp6_sock(seq, v, st->num);
2097out:
2098	return 0;
2099}
2100
2101static const struct seq_operations tcp6_seq_ops = {
2102	.show		= tcp6_seq_show,
2103	.start		= tcp_seq_start,
2104	.next		= tcp_seq_next,
2105	.stop		= tcp_seq_stop,
2106};
2107
2108static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2109	.family		= AF_INET6,
2110};
2111
2112int __net_init tcp6_proc_init(struct net *net)
2113{
2114	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2115			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2116		return -ENOMEM;
2117	return 0;
2118}
2119
2120void tcp6_proc_exit(struct net *net)
2121{
2122	remove_proc_entry("tcp6", net->proc_net);
2123}
2124#endif
2125
2126struct proto tcpv6_prot = {
2127	.name			= "TCPv6",
2128	.owner			= THIS_MODULE,
2129	.close			= tcp_close,
2130	.pre_connect		= tcp_v6_pre_connect,
2131	.connect		= tcp_v6_connect,
2132	.disconnect		= tcp_disconnect,
2133	.accept			= inet_csk_accept,
2134	.ioctl			= tcp_ioctl,
2135	.init			= tcp_v6_init_sock,
2136	.destroy		= tcp_v4_destroy_sock,
2137	.shutdown		= tcp_shutdown,
2138	.setsockopt		= tcp_setsockopt,
2139	.getsockopt		= tcp_getsockopt,
2140	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2141	.keepalive		= tcp_set_keepalive,
2142	.recvmsg		= tcp_recvmsg,
2143	.sendmsg		= tcp_sendmsg,
2144	.sendpage		= tcp_sendpage,
2145	.backlog_rcv		= tcp_v6_do_rcv,
2146	.release_cb		= tcp_release_cb,
2147	.hash			= inet6_hash,
2148	.unhash			= inet_unhash,
2149	.get_port		= inet_csk_get_port,
2150	.enter_memory_pressure	= tcp_enter_memory_pressure,
2151	.leave_memory_pressure	= tcp_leave_memory_pressure,
2152	.stream_memory_free	= tcp_stream_memory_free,
2153	.sockets_allocated	= &tcp_sockets_allocated,
2154	.memory_allocated	= &tcp_memory_allocated,
2155	.memory_pressure	= &tcp_memory_pressure,
2156	.orphan_count		= &tcp_orphan_count,
2157	.sysctl_mem		= sysctl_tcp_mem,
2158	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2159	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2160	.max_header		= MAX_TCP_HEADER,
2161	.obj_size		= sizeof(struct tcp6_sock),
2162	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2163	.twsk_prot		= &tcp6_timewait_sock_ops,
2164	.rsk_prot		= &tcp6_request_sock_ops,
2165	.h.hashinfo		= &tcp_hashinfo,
2166	.no_autobind		= true,
2167	.diag_destroy		= tcp_abort,
2168};
2169EXPORT_SYMBOL_GPL(tcpv6_prot);
2170
2171static const struct inet6_protocol tcpv6_protocol = {
2172	.handler	=	tcp_v6_rcv,
2173	.err_handler	=	tcp_v6_err,
2174	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2175};
2176
2177static struct inet_protosw tcpv6_protosw = {
2178	.type		=	SOCK_STREAM,
2179	.protocol	=	IPPROTO_TCP,
2180	.prot		=	&tcpv6_prot,
2181	.ops		=	&inet6_stream_ops,
2182	.flags		=	INET_PROTOSW_PERMANENT |
2183				INET_PROTOSW_ICSK,
2184};
2185
2186static int __net_init tcpv6_net_init(struct net *net)
2187{
2188	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2189				    SOCK_RAW, IPPROTO_TCP, net);
2190}
2191
2192static void __net_exit tcpv6_net_exit(struct net *net)
2193{
2194	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2195}
2196
2197static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2198{
2199	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2200}
2201
2202static struct pernet_operations tcpv6_net_ops = {
2203	.init	    = tcpv6_net_init,
2204	.exit	    = tcpv6_net_exit,
2205	.exit_batch = tcpv6_net_exit_batch,
2206};
2207
2208int __init tcpv6_init(void)
2209{
2210	int ret;
2211
2212	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2213	if (ret)
2214		goto out;
2215
2216	/* register inet6 protocol */
2217	ret = inet6_register_protosw(&tcpv6_protosw);
2218	if (ret)
2219		goto out_tcpv6_protocol;
2220
2221	ret = register_pernet_subsys(&tcpv6_net_ops);
2222	if (ret)
2223		goto out_tcpv6_protosw;
2224
2225	ret = mptcpv6_init();
2226	if (ret)
2227		goto out_tcpv6_pernet_subsys;
2228
2229out:
2230	return ret;
2231
2232out_tcpv6_pernet_subsys:
2233	unregister_pernet_subsys(&tcpv6_net_ops);
2234out_tcpv6_protosw:
2235	inet6_unregister_protosw(&tcpv6_protosw);
2236out_tcpv6_protocol:
2237	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2238	goto out;
2239}
2240
2241void tcpv6_exit(void)
2242{
2243	unregister_pernet_subsys(&tcpv6_net_ops);
2244	inet6_unregister_protosw(&tcpv6_protosw);
2245	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2246}
2247