162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * INET		An implementation of the TCP/IP protocol suite for the LINUX
462306a36Sopenharmony_ci *		operating system.  INET is implemented using the  BSD Socket
562306a36Sopenharmony_ci *		interface as the means of communication with the user level.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci *		Implementation of the Transmission Control Protocol(TCP).
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * Authors:	Ross Biro
1062306a36Sopenharmony_ci *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
1162306a36Sopenharmony_ci *		Mark Evans, <evansmp@uhura.aston.ac.uk>
1262306a36Sopenharmony_ci *		Corey Minyard <wf-rch!minyard@relay.EU.net>
1362306a36Sopenharmony_ci *		Florian La Roche, <flla@stud.uni-sb.de>
1462306a36Sopenharmony_ci *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
1562306a36Sopenharmony_ci *		Linus Torvalds, <torvalds@cs.helsinki.fi>
1662306a36Sopenharmony_ci *		Alan Cox, <gw4pts@gw4pts.ampr.org>
1762306a36Sopenharmony_ci *		Matthew Dillon, <dillon@apollo.west.oic.com>
1862306a36Sopenharmony_ci *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
1962306a36Sopenharmony_ci *		Jorge Cwik, <jorge@laser.satlink.net>
2062306a36Sopenharmony_ci */
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#include <net/tcp.h>
2362306a36Sopenharmony_ci#include <net/xfrm.h>
2462306a36Sopenharmony_ci#include <net/busy_poll.h>
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_cistatic bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
2762306a36Sopenharmony_ci{
2862306a36Sopenharmony_ci	if (seq == s_win)
2962306a36Sopenharmony_ci		return true;
3062306a36Sopenharmony_ci	if (after(end_seq, s_win) && before(seq, e_win))
3162306a36Sopenharmony_ci		return true;
3262306a36Sopenharmony_ci	return seq == e_win && seq == end_seq;
3362306a36Sopenharmony_ci}
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_cistatic enum tcp_tw_status
3662306a36Sopenharmony_citcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
3762306a36Sopenharmony_ci				  const struct sk_buff *skb, int mib_idx)
3862306a36Sopenharmony_ci{
3962306a36Sopenharmony_ci	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	if (!tcp_oow_rate_limited(twsk_net(tw), skb, mib_idx,
4262306a36Sopenharmony_ci				  &tcptw->tw_last_oow_ack_time)) {
4362306a36Sopenharmony_ci		/* Send ACK. Note, we do not put the bucket,
4462306a36Sopenharmony_ci		 * it will be released by caller.
4562306a36Sopenharmony_ci		 */
4662306a36Sopenharmony_ci		return TCP_TW_ACK;
4762306a36Sopenharmony_ci	}
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	/* We are rate-limiting, so just release the tw sock and drop skb. */
5062306a36Sopenharmony_ci	inet_twsk_put(tw);
5162306a36Sopenharmony_ci	return TCP_TW_SUCCESS;
5262306a36Sopenharmony_ci}
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci/*
5562306a36Sopenharmony_ci * * Main purpose of TIME-WAIT state is to close connection gracefully,
5662306a36Sopenharmony_ci *   when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
5762306a36Sopenharmony_ci *   (and, probably, tail of data) and one or more our ACKs are lost.
5862306a36Sopenharmony_ci * * What is TIME-WAIT timeout? It is associated with maximal packet
5962306a36Sopenharmony_ci *   lifetime in the internet, which results in wrong conclusion, that
6062306a36Sopenharmony_ci *   it is set to catch "old duplicate segments" wandering out of their path.
6162306a36Sopenharmony_ci *   It is not quite correct. This timeout is calculated so that it exceeds
6262306a36Sopenharmony_ci *   maximal retransmission timeout enough to allow to lose one (or more)
6362306a36Sopenharmony_ci *   segments sent by peer and our ACKs. This time may be calculated from RTO.
6462306a36Sopenharmony_ci * * When TIME-WAIT socket receives RST, it means that another end
6562306a36Sopenharmony_ci *   finally closed and we are allowed to kill TIME-WAIT too.
6662306a36Sopenharmony_ci * * Second purpose of TIME-WAIT is catching old duplicate segments.
6762306a36Sopenharmony_ci *   Well, certainly it is pure paranoia, but if we load TIME-WAIT
6862306a36Sopenharmony_ci *   with this semantics, we MUST NOT kill TIME-WAIT state with RSTs.
6962306a36Sopenharmony_ci * * If we invented some more clever way to catch duplicates
7062306a36Sopenharmony_ci *   (f.e. based on PAWS), we could truncate TIME-WAIT to several RTOs.
7162306a36Sopenharmony_ci *
7262306a36Sopenharmony_ci * The algorithm below is based on FORMAL INTERPRETATION of RFCs.
7362306a36Sopenharmony_ci * When you compare it to RFCs, please, read section SEGMENT ARRIVES
7462306a36Sopenharmony_ci * from the very beginning.
7562306a36Sopenharmony_ci *
7662306a36Sopenharmony_ci * NOTE. With recycling (and later with fin-wait-2) TW bucket
7762306a36Sopenharmony_ci * is _not_ stateless. It means, that strictly speaking we must
7862306a36Sopenharmony_ci * spinlock it. I do not want! Well, probability of misbehaviour
7962306a36Sopenharmony_ci * is ridiculously low and, seems, we could use some mb() tricks
8062306a36Sopenharmony_ci * to avoid misread sequence numbers, states etc.  --ANK
8162306a36Sopenharmony_ci *
8262306a36Sopenharmony_ci * We don't need to initialize tmp_out.sack_ok as we don't use the results
8362306a36Sopenharmony_ci */
8462306a36Sopenharmony_cienum tcp_tw_status
8562306a36Sopenharmony_citcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
8662306a36Sopenharmony_ci			   const struct tcphdr *th)
8762306a36Sopenharmony_ci{
8862306a36Sopenharmony_ci	struct tcp_options_received tmp_opt;
8962306a36Sopenharmony_ci	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
9062306a36Sopenharmony_ci	bool paws_reject = false;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	tmp_opt.saw_tstamp = 0;
9362306a36Sopenharmony_ci	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
9462306a36Sopenharmony_ci		tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci		if (tmp_opt.saw_tstamp) {
9762306a36Sopenharmony_ci			if (tmp_opt.rcv_tsecr)
9862306a36Sopenharmony_ci				tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset;
9962306a36Sopenharmony_ci			tmp_opt.ts_recent	= tcptw->tw_ts_recent;
10062306a36Sopenharmony_ci			tmp_opt.ts_recent_stamp	= tcptw->tw_ts_recent_stamp;
10162306a36Sopenharmony_ci			paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
10262306a36Sopenharmony_ci		}
10362306a36Sopenharmony_ci	}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	if (tw->tw_substate == TCP_FIN_WAIT2) {
10662306a36Sopenharmony_ci		/* Just repeat all the checks of tcp_rcv_state_process() */
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci		/* Out of window, send ACK */
10962306a36Sopenharmony_ci		if (paws_reject ||
11062306a36Sopenharmony_ci		    !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
11162306a36Sopenharmony_ci				   tcptw->tw_rcv_nxt,
11262306a36Sopenharmony_ci				   tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
11362306a36Sopenharmony_ci			return tcp_timewait_check_oow_rate_limit(
11462306a36Sopenharmony_ci				tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2);
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci		if (th->rst)
11762306a36Sopenharmony_ci			goto kill;
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci		if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
12062306a36Sopenharmony_ci			return TCP_TW_RST;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci		/* Dup ACK? */
12362306a36Sopenharmony_ci		if (!th->ack ||
12462306a36Sopenharmony_ci		    !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
12562306a36Sopenharmony_ci		    TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
12662306a36Sopenharmony_ci			inet_twsk_put(tw);
12762306a36Sopenharmony_ci			return TCP_TW_SUCCESS;
12862306a36Sopenharmony_ci		}
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci		/* New data or FIN. If new data arrive after half-duplex close,
13162306a36Sopenharmony_ci		 * reset.
13262306a36Sopenharmony_ci		 */
13362306a36Sopenharmony_ci		if (!th->fin ||
13462306a36Sopenharmony_ci		    TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1)
13562306a36Sopenharmony_ci			return TCP_TW_RST;
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci		/* FIN arrived, enter true time-wait state. */
13862306a36Sopenharmony_ci		tw->tw_substate	  = TCP_TIME_WAIT;
13962306a36Sopenharmony_ci		tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
14062306a36Sopenharmony_ci		if (tmp_opt.saw_tstamp) {
14162306a36Sopenharmony_ci			tcptw->tw_ts_recent_stamp = ktime_get_seconds();
14262306a36Sopenharmony_ci			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
14362306a36Sopenharmony_ci		}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci		inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
14662306a36Sopenharmony_ci		return TCP_TW_ACK;
14762306a36Sopenharmony_ci	}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci	/*
15062306a36Sopenharmony_ci	 *	Now real TIME-WAIT state.
15162306a36Sopenharmony_ci	 *
15262306a36Sopenharmony_ci	 *	RFC 1122:
15362306a36Sopenharmony_ci	 *	"When a connection is [...] on TIME-WAIT state [...]
15462306a36Sopenharmony_ci	 *	[a TCP] MAY accept a new SYN from the remote TCP to
15562306a36Sopenharmony_ci	 *	reopen the connection directly, if it:
15662306a36Sopenharmony_ci	 *
15762306a36Sopenharmony_ci	 *	(1)  assigns its initial sequence number for the new
15862306a36Sopenharmony_ci	 *	connection to be larger than the largest sequence
15962306a36Sopenharmony_ci	 *	number it used on the previous connection incarnation,
16062306a36Sopenharmony_ci	 *	and
16162306a36Sopenharmony_ci	 *
16262306a36Sopenharmony_ci	 *	(2)  returns to TIME-WAIT state if the SYN turns out
16362306a36Sopenharmony_ci	 *	to be an old duplicate".
16462306a36Sopenharmony_ci	 */
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	if (!paws_reject &&
16762306a36Sopenharmony_ci	    (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
16862306a36Sopenharmony_ci	     (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
16962306a36Sopenharmony_ci		/* In window segment, it may be only reset or bare ack. */
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci		if (th->rst) {
17262306a36Sopenharmony_ci			/* This is TIME_WAIT assassination, in two flavors.
17362306a36Sopenharmony_ci			 * Oh well... nobody has a sufficient solution to this
17462306a36Sopenharmony_ci			 * protocol bug yet.
17562306a36Sopenharmony_ci			 */
17662306a36Sopenharmony_ci			if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
17762306a36Sopenharmony_cikill:
17862306a36Sopenharmony_ci				inet_twsk_deschedule_put(tw);
17962306a36Sopenharmony_ci				return TCP_TW_SUCCESS;
18062306a36Sopenharmony_ci			}
18162306a36Sopenharmony_ci		} else {
18262306a36Sopenharmony_ci			inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
18362306a36Sopenharmony_ci		}
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci		if (tmp_opt.saw_tstamp) {
18662306a36Sopenharmony_ci			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
18762306a36Sopenharmony_ci			tcptw->tw_ts_recent_stamp = ktime_get_seconds();
18862306a36Sopenharmony_ci		}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci		inet_twsk_put(tw);
19162306a36Sopenharmony_ci		return TCP_TW_SUCCESS;
19262306a36Sopenharmony_ci	}
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	/* Out of window segment.
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	   All the segments are ACKed immediately.
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	   The only exception is new SYN. We accept it, if it is
19962306a36Sopenharmony_ci	   not old duplicate and we are not in danger to be killed
20062306a36Sopenharmony_ci	   by delayed old duplicates. RFC check is that it has
20162306a36Sopenharmony_ci	   newer sequence number works at rates <40Mbit/sec.
20262306a36Sopenharmony_ci	   However, if paws works, it is reliable AND even more,
20362306a36Sopenharmony_ci	   we even may relax silly seq space cutoff.
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	   RED-PEN: we violate main RFC requirement, if this SYN will appear
20662306a36Sopenharmony_ci	   old duplicate (i.e. we receive RST in reply to SYN-ACK),
20762306a36Sopenharmony_ci	   we must return socket to time-wait state. It is not good,
20862306a36Sopenharmony_ci	   but not fatal yet.
20962306a36Sopenharmony_ci	 */
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	if (th->syn && !th->rst && !th->ack && !paws_reject &&
21262306a36Sopenharmony_ci	    (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) ||
21362306a36Sopenharmony_ci	     (tmp_opt.saw_tstamp &&
21462306a36Sopenharmony_ci	      (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
21562306a36Sopenharmony_ci		u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
21662306a36Sopenharmony_ci		if (isn == 0)
21762306a36Sopenharmony_ci			isn++;
21862306a36Sopenharmony_ci		TCP_SKB_CB(skb)->tcp_tw_isn = isn;
21962306a36Sopenharmony_ci		return TCP_TW_SYN;
22062306a36Sopenharmony_ci	}
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	if (paws_reject)
22362306a36Sopenharmony_ci		__NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	if (!th->rst) {
22662306a36Sopenharmony_ci		/* In this case we must reset the TIMEWAIT timer.
22762306a36Sopenharmony_ci		 *
22862306a36Sopenharmony_ci		 * If it is ACKless SYN it may be both old duplicate
22962306a36Sopenharmony_ci		 * and new good SYN with random sequence number <rcv_nxt.
23062306a36Sopenharmony_ci		 * Do not reschedule in the last case.
23162306a36Sopenharmony_ci		 */
23262306a36Sopenharmony_ci		if (paws_reject || th->ack)
23362306a36Sopenharmony_ci			inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci		return tcp_timewait_check_oow_rate_limit(
23662306a36Sopenharmony_ci			tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
23762306a36Sopenharmony_ci	}
23862306a36Sopenharmony_ci	inet_twsk_put(tw);
23962306a36Sopenharmony_ci	return TCP_TW_SUCCESS;
24062306a36Sopenharmony_ci}
24162306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_timewait_state_process);
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_cistatic void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw)
24462306a36Sopenharmony_ci{
24562306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
24662306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
24762306a36Sopenharmony_ci	struct tcp_md5sig_key *key;
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	/*
25062306a36Sopenharmony_ci	 * The timewait bucket does not have the key DB from the
25162306a36Sopenharmony_ci	 * sock structure. We just make a quick copy of the
25262306a36Sopenharmony_ci	 * md5 key being used (if indeed we are using one)
25362306a36Sopenharmony_ci	 * so the timewait ack generating code has the key.
25462306a36Sopenharmony_ci	 */
25562306a36Sopenharmony_ci	tcptw->tw_md5_key = NULL;
25662306a36Sopenharmony_ci	if (!static_branch_unlikely(&tcp_md5_needed.key))
25762306a36Sopenharmony_ci		return;
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	key = tp->af_specific->md5_lookup(sk, sk);
26062306a36Sopenharmony_ci	if (key) {
26162306a36Sopenharmony_ci		tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
26262306a36Sopenharmony_ci		if (!tcptw->tw_md5_key)
26362306a36Sopenharmony_ci			return;
26462306a36Sopenharmony_ci		if (!tcp_alloc_md5sig_pool())
26562306a36Sopenharmony_ci			goto out_free;
26662306a36Sopenharmony_ci		if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key))
26762306a36Sopenharmony_ci			goto out_free;
26862306a36Sopenharmony_ci	}
26962306a36Sopenharmony_ci	return;
27062306a36Sopenharmony_ciout_free:
27162306a36Sopenharmony_ci	WARN_ON_ONCE(1);
27262306a36Sopenharmony_ci	kfree(tcptw->tw_md5_key);
27362306a36Sopenharmony_ci	tcptw->tw_md5_key = NULL;
27462306a36Sopenharmony_ci#endif
27562306a36Sopenharmony_ci}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci/*
27862306a36Sopenharmony_ci * Move a socket to time-wait or dead fin-wait-2 state.
27962306a36Sopenharmony_ci */
28062306a36Sopenharmony_civoid tcp_time_wait(struct sock *sk, int state, int timeo)
28162306a36Sopenharmony_ci{
28262306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
28362306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
28462306a36Sopenharmony_ci	struct net *net = sock_net(sk);
28562306a36Sopenharmony_ci	struct inet_timewait_sock *tw;
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	tw = inet_twsk_alloc(sk, &net->ipv4.tcp_death_row, state);
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	if (tw) {
29062306a36Sopenharmony_ci		struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
29162306a36Sopenharmony_ci		const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci		tw->tw_transparent	= inet_test_bit(TRANSPARENT, sk);
29462306a36Sopenharmony_ci		tw->tw_mark		= sk->sk_mark;
29562306a36Sopenharmony_ci		tw->tw_priority		= sk->sk_priority;
29662306a36Sopenharmony_ci		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale;
29762306a36Sopenharmony_ci		tcptw->tw_rcv_nxt	= tp->rcv_nxt;
29862306a36Sopenharmony_ci		tcptw->tw_snd_nxt	= tp->snd_nxt;
29962306a36Sopenharmony_ci		tcptw->tw_rcv_wnd	= tcp_receive_window(tp);
30062306a36Sopenharmony_ci		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
30162306a36Sopenharmony_ci		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
30262306a36Sopenharmony_ci		tcptw->tw_ts_offset	= tp->tsoffset;
30362306a36Sopenharmony_ci		tcptw->tw_last_oow_ack_time = 0;
30462306a36Sopenharmony_ci		tcptw->tw_tx_delay	= tp->tcp_tx_delay;
30562306a36Sopenharmony_ci		tw->tw_txhash		= sk->sk_txhash;
30662306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
30762306a36Sopenharmony_ci		if (tw->tw_family == PF_INET6) {
30862306a36Sopenharmony_ci			struct ipv6_pinfo *np = inet6_sk(sk);
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci			tw->tw_v6_daddr = sk->sk_v6_daddr;
31162306a36Sopenharmony_ci			tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
31262306a36Sopenharmony_ci			tw->tw_tclass = np->tclass;
31362306a36Sopenharmony_ci			tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK);
31462306a36Sopenharmony_ci			tw->tw_ipv6only = sk->sk_ipv6only;
31562306a36Sopenharmony_ci		}
31662306a36Sopenharmony_ci#endif
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci		tcp_time_wait_init(sk, tcptw);
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci		/* Get the TIME_WAIT timeout firing. */
32162306a36Sopenharmony_ci		if (timeo < rto)
32262306a36Sopenharmony_ci			timeo = rto;
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci		if (state == TCP_TIME_WAIT)
32562306a36Sopenharmony_ci			timeo = TCP_TIMEWAIT_LEN;
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci		/* tw_timer is pinned, so we need to make sure BH are disabled
32862306a36Sopenharmony_ci		 * in following section, otherwise timer handler could run before
32962306a36Sopenharmony_ci		 * we complete the initialization.
33062306a36Sopenharmony_ci		 */
33162306a36Sopenharmony_ci		local_bh_disable();
33262306a36Sopenharmony_ci		inet_twsk_schedule(tw, timeo);
33362306a36Sopenharmony_ci		/* Linkage updates.
33462306a36Sopenharmony_ci		 * Note that access to tw after this point is illegal.
33562306a36Sopenharmony_ci		 */
33662306a36Sopenharmony_ci		inet_twsk_hashdance(tw, sk, net->ipv4.tcp_death_row.hashinfo);
33762306a36Sopenharmony_ci		local_bh_enable();
33862306a36Sopenharmony_ci	} else {
33962306a36Sopenharmony_ci		/* Sorry, if we're out of memory, just CLOSE this
34062306a36Sopenharmony_ci		 * socket up.  We've got bigger problems than
34162306a36Sopenharmony_ci		 * non-graceful socket closings.
34262306a36Sopenharmony_ci		 */
34362306a36Sopenharmony_ci		NET_INC_STATS(net, LINUX_MIB_TCPTIMEWAITOVERFLOW);
34462306a36Sopenharmony_ci	}
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	tcp_update_metrics(sk);
34762306a36Sopenharmony_ci	tcp_done(sk);
34862306a36Sopenharmony_ci}
34962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_time_wait);
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_civoid tcp_twsk_destructor(struct sock *sk)
35262306a36Sopenharmony_ci{
35362306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
35462306a36Sopenharmony_ci	if (static_branch_unlikely(&tcp_md5_needed.key)) {
35562306a36Sopenharmony_ci		struct tcp_timewait_sock *twsk = tcp_twsk(sk);
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci		if (twsk->tw_md5_key) {
35862306a36Sopenharmony_ci			kfree_rcu(twsk->tw_md5_key, rcu);
35962306a36Sopenharmony_ci			static_branch_slow_dec_deferred(&tcp_md5_needed);
36062306a36Sopenharmony_ci		}
36162306a36Sopenharmony_ci	}
36262306a36Sopenharmony_ci#endif
36362306a36Sopenharmony_ci}
36462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_twsk_destructor);
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_civoid tcp_twsk_purge(struct list_head *net_exit_list, int family)
36762306a36Sopenharmony_ci{
36862306a36Sopenharmony_ci	bool purged_once = false;
36962306a36Sopenharmony_ci	struct net *net;
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	list_for_each_entry(net, net_exit_list, exit_list) {
37262306a36Sopenharmony_ci		if (net->ipv4.tcp_death_row.hashinfo->pernet) {
37362306a36Sopenharmony_ci			/* Even if tw_refcount == 1, we must clean up kernel reqsk */
37462306a36Sopenharmony_ci			inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family);
37562306a36Sopenharmony_ci		} else if (!purged_once) {
37662306a36Sopenharmony_ci			inet_twsk_purge(&tcp_hashinfo, family);
37762306a36Sopenharmony_ci			purged_once = true;
37862306a36Sopenharmony_ci		}
37962306a36Sopenharmony_ci	}
38062306a36Sopenharmony_ci}
38162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_twsk_purge);
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci/* Warning : This function is called without sk_listener being locked.
38462306a36Sopenharmony_ci * Be sure to read socket fields once, as their value could change under us.
38562306a36Sopenharmony_ci */
38662306a36Sopenharmony_civoid tcp_openreq_init_rwin(struct request_sock *req,
38762306a36Sopenharmony_ci			   const struct sock *sk_listener,
38862306a36Sopenharmony_ci			   const struct dst_entry *dst)
38962306a36Sopenharmony_ci{
39062306a36Sopenharmony_ci	struct inet_request_sock *ireq = inet_rsk(req);
39162306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk_listener);
39262306a36Sopenharmony_ci	int full_space = tcp_full_space(sk_listener);
39362306a36Sopenharmony_ci	u32 window_clamp;
39462306a36Sopenharmony_ci	__u8 rcv_wscale;
39562306a36Sopenharmony_ci	u32 rcv_wnd;
39662306a36Sopenharmony_ci	int mss;
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci	mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
39962306a36Sopenharmony_ci	window_clamp = READ_ONCE(tp->window_clamp);
40062306a36Sopenharmony_ci	/* Set this up on the first call only */
40162306a36Sopenharmony_ci	req->rsk_window_clamp = window_clamp ? : dst_metric(dst, RTAX_WINDOW);
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	/* limit the window selection if the user enforce a smaller rx buffer */
40462306a36Sopenharmony_ci	if (sk_listener->sk_userlocks & SOCK_RCVBUF_LOCK &&
40562306a36Sopenharmony_ci	    (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
40662306a36Sopenharmony_ci		req->rsk_window_clamp = full_space;
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req);
40962306a36Sopenharmony_ci	if (rcv_wnd == 0)
41062306a36Sopenharmony_ci		rcv_wnd = dst_metric(dst, RTAX_INITRWND);
41162306a36Sopenharmony_ci	else if (full_space < rcv_wnd * mss)
41262306a36Sopenharmony_ci		full_space = rcv_wnd * mss;
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	/* tcp_full_space because it is guaranteed to be the first packet */
41562306a36Sopenharmony_ci	tcp_select_initial_window(sk_listener, full_space,
41662306a36Sopenharmony_ci		mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
41762306a36Sopenharmony_ci		&req->rsk_rcv_wnd,
41862306a36Sopenharmony_ci		&req->rsk_window_clamp,
41962306a36Sopenharmony_ci		ireq->wscale_ok,
42062306a36Sopenharmony_ci		&rcv_wscale,
42162306a36Sopenharmony_ci		rcv_wnd);
42262306a36Sopenharmony_ci	ireq->rcv_wscale = rcv_wscale;
42362306a36Sopenharmony_ci}
42462306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_openreq_init_rwin);
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_cistatic void tcp_ecn_openreq_child(struct tcp_sock *tp,
42762306a36Sopenharmony_ci				  const struct request_sock *req)
42862306a36Sopenharmony_ci{
42962306a36Sopenharmony_ci	tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0;
43062306a36Sopenharmony_ci}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_civoid tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
43362306a36Sopenharmony_ci{
43462306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
43562306a36Sopenharmony_ci	u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
43662306a36Sopenharmony_ci	bool ca_got_dst = false;
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci	if (ca_key != TCP_CA_UNSPEC) {
43962306a36Sopenharmony_ci		const struct tcp_congestion_ops *ca;
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci		rcu_read_lock();
44262306a36Sopenharmony_ci		ca = tcp_ca_find_key(ca_key);
44362306a36Sopenharmony_ci		if (likely(ca && bpf_try_module_get(ca, ca->owner))) {
44462306a36Sopenharmony_ci			icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
44562306a36Sopenharmony_ci			icsk->icsk_ca_ops = ca;
44662306a36Sopenharmony_ci			ca_got_dst = true;
44762306a36Sopenharmony_ci		}
44862306a36Sopenharmony_ci		rcu_read_unlock();
44962306a36Sopenharmony_ci	}
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci	/* If no valid choice made yet, assign current system default ca. */
45262306a36Sopenharmony_ci	if (!ca_got_dst &&
45362306a36Sopenharmony_ci	    (!icsk->icsk_ca_setsockopt ||
45462306a36Sopenharmony_ci	     !bpf_try_module_get(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner)))
45562306a36Sopenharmony_ci		tcp_assign_congestion_control(sk);
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	tcp_set_ca_state(sk, TCP_CA_Open);
45862306a36Sopenharmony_ci}
45962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_cistatic void smc_check_reset_syn_req(const struct tcp_sock *oldtp,
46262306a36Sopenharmony_ci				    struct request_sock *req,
46362306a36Sopenharmony_ci				    struct tcp_sock *newtp)
46462306a36Sopenharmony_ci{
46562306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_SMC)
46662306a36Sopenharmony_ci	struct inet_request_sock *ireq;
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	if (static_branch_unlikely(&tcp_have_smc)) {
46962306a36Sopenharmony_ci		ireq = inet_rsk(req);
47062306a36Sopenharmony_ci		if (oldtp->syn_smc && !ireq->smc_ok)
47162306a36Sopenharmony_ci			newtp->syn_smc = 0;
47262306a36Sopenharmony_ci	}
47362306a36Sopenharmony_ci#endif
47462306a36Sopenharmony_ci}
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci/* This is not only more efficient than what we used to do, it eliminates
47762306a36Sopenharmony_ci * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
47862306a36Sopenharmony_ci *
47962306a36Sopenharmony_ci * Actually, we could lots of memory writes here. tp of listening
48062306a36Sopenharmony_ci * socket contains all necessary default parameters.
48162306a36Sopenharmony_ci */
48262306a36Sopenharmony_cistruct sock *tcp_create_openreq_child(const struct sock *sk,
48362306a36Sopenharmony_ci				      struct request_sock *req,
48462306a36Sopenharmony_ci				      struct sk_buff *skb)
48562306a36Sopenharmony_ci{
48662306a36Sopenharmony_ci	struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
48762306a36Sopenharmony_ci	const struct inet_request_sock *ireq = inet_rsk(req);
48862306a36Sopenharmony_ci	struct tcp_request_sock *treq = tcp_rsk(req);
48962306a36Sopenharmony_ci	struct inet_connection_sock *newicsk;
49062306a36Sopenharmony_ci	const struct tcp_sock *oldtp;
49162306a36Sopenharmony_ci	struct tcp_sock *newtp;
49262306a36Sopenharmony_ci	u32 seq;
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	if (!newsk)
49562306a36Sopenharmony_ci		return NULL;
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	newicsk = inet_csk(newsk);
49862306a36Sopenharmony_ci	newtp = tcp_sk(newsk);
49962306a36Sopenharmony_ci	oldtp = tcp_sk(sk);
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	smc_check_reset_syn_req(oldtp, req, newtp);
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_ci	/* Now setup tcp_sock */
50462306a36Sopenharmony_ci	newtp->pred_flags = 0;
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	seq = treq->rcv_isn + 1;
50762306a36Sopenharmony_ci	newtp->rcv_wup = seq;
50862306a36Sopenharmony_ci	WRITE_ONCE(newtp->copied_seq, seq);
50962306a36Sopenharmony_ci	WRITE_ONCE(newtp->rcv_nxt, seq);
51062306a36Sopenharmony_ci	newtp->segs_in = 1;
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci	seq = treq->snt_isn + 1;
51362306a36Sopenharmony_ci	newtp->snd_sml = newtp->snd_una = seq;
51462306a36Sopenharmony_ci	WRITE_ONCE(newtp->snd_nxt, seq);
51562306a36Sopenharmony_ci	newtp->snd_up = seq;
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	INIT_LIST_HEAD(&newtp->tsq_node);
51862306a36Sopenharmony_ci	INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci	tcp_init_wl(newtp, treq->rcv_isn);
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
52362306a36Sopenharmony_ci	newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	newtp->lsndtime = tcp_jiffies32;
52662306a36Sopenharmony_ci	newsk->sk_txhash = READ_ONCE(treq->txhash);
52762306a36Sopenharmony_ci	newtp->total_retrans = req->num_retrans;
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	tcp_init_xmit_timers(newsk);
53062306a36Sopenharmony_ci	WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1);
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	if (sock_flag(newsk, SOCK_KEEPOPEN))
53362306a36Sopenharmony_ci		inet_csk_reset_keepalive_timer(newsk,
53462306a36Sopenharmony_ci					       keepalive_time_when(newtp));
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci	newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
53762306a36Sopenharmony_ci	newtp->rx_opt.sack_ok = ireq->sack_ok;
53862306a36Sopenharmony_ci	newtp->window_clamp = req->rsk_window_clamp;
53962306a36Sopenharmony_ci	newtp->rcv_ssthresh = req->rsk_rcv_wnd;
54062306a36Sopenharmony_ci	newtp->rcv_wnd = req->rsk_rcv_wnd;
54162306a36Sopenharmony_ci	newtp->rx_opt.wscale_ok = ireq->wscale_ok;
54262306a36Sopenharmony_ci	if (newtp->rx_opt.wscale_ok) {
54362306a36Sopenharmony_ci		newtp->rx_opt.snd_wscale = ireq->snd_wscale;
54462306a36Sopenharmony_ci		newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
54562306a36Sopenharmony_ci	} else {
54662306a36Sopenharmony_ci		newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
54762306a36Sopenharmony_ci		newtp->window_clamp = min(newtp->window_clamp, 65535U);
54862306a36Sopenharmony_ci	}
54962306a36Sopenharmony_ci	newtp->snd_wnd = ntohs(tcp_hdr(skb)->window) << newtp->rx_opt.snd_wscale;
55062306a36Sopenharmony_ci	newtp->max_window = newtp->snd_wnd;
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci	if (newtp->rx_opt.tstamp_ok) {
55362306a36Sopenharmony_ci		newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent);
55462306a36Sopenharmony_ci		newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
55562306a36Sopenharmony_ci		newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
55662306a36Sopenharmony_ci	} else {
55762306a36Sopenharmony_ci		newtp->rx_opt.ts_recent_stamp = 0;
55862306a36Sopenharmony_ci		newtp->tcp_header_len = sizeof(struct tcphdr);
55962306a36Sopenharmony_ci	}
56062306a36Sopenharmony_ci	if (req->num_timeout) {
56162306a36Sopenharmony_ci		newtp->undo_marker = treq->snt_isn;
56262306a36Sopenharmony_ci		newtp->retrans_stamp = div_u64(treq->snt_synack,
56362306a36Sopenharmony_ci					       USEC_PER_SEC / TCP_TS_HZ);
56462306a36Sopenharmony_ci	}
56562306a36Sopenharmony_ci	newtp->tsoffset = treq->ts_off;
56662306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
56762306a36Sopenharmony_ci	newtp->md5sig_info = NULL;	/*XXX*/
56862306a36Sopenharmony_ci#endif
56962306a36Sopenharmony_ci	if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
57062306a36Sopenharmony_ci		newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
57162306a36Sopenharmony_ci	newtp->rx_opt.mss_clamp = req->mss;
57262306a36Sopenharmony_ci	tcp_ecn_openreq_child(newtp, req);
57362306a36Sopenharmony_ci	newtp->fastopen_req = NULL;
57462306a36Sopenharmony_ci	RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	newtp->bpf_chg_cc_inprogress = 0;
57762306a36Sopenharmony_ci	tcp_bpf_clone(sk, newsk);
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci	__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	return newsk;
58262306a36Sopenharmony_ci}
58362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_create_openreq_child);
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci/*
58662306a36Sopenharmony_ci * Process an incoming packet for SYN_RECV sockets represented as a
58762306a36Sopenharmony_ci * request_sock. Normally sk is the listener socket but for TFO it
58862306a36Sopenharmony_ci * points to the child socket.
58962306a36Sopenharmony_ci *
59062306a36Sopenharmony_ci * XXX (TFO) - The current impl contains a special check for ack
59162306a36Sopenharmony_ci * validation and inside tcp_v4_reqsk_send_ack(). Can we do better?
59262306a36Sopenharmony_ci *
59362306a36Sopenharmony_ci * We don't need to initialize tmp_opt.sack_ok as we don't use the results
59462306a36Sopenharmony_ci *
59562306a36Sopenharmony_ci * Note: If @fastopen is true, this can be called from process context.
59662306a36Sopenharmony_ci *       Otherwise, this is from BH context.
59762306a36Sopenharmony_ci */
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_cistruct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
60062306a36Sopenharmony_ci			   struct request_sock *req,
60162306a36Sopenharmony_ci			   bool fastopen, bool *req_stolen)
60262306a36Sopenharmony_ci{
60362306a36Sopenharmony_ci	struct tcp_options_received tmp_opt;
60462306a36Sopenharmony_ci	struct sock *child;
60562306a36Sopenharmony_ci	const struct tcphdr *th = tcp_hdr(skb);
60662306a36Sopenharmony_ci	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
60762306a36Sopenharmony_ci	bool paws_reject = false;
60862306a36Sopenharmony_ci	bool own_req;
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	tmp_opt.saw_tstamp = 0;
61162306a36Sopenharmony_ci	if (th->doff > (sizeof(struct tcphdr)>>2)) {
61262306a36Sopenharmony_ci		tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci		if (tmp_opt.saw_tstamp) {
61562306a36Sopenharmony_ci			tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
61662306a36Sopenharmony_ci			if (tmp_opt.rcv_tsecr)
61762306a36Sopenharmony_ci				tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
61862306a36Sopenharmony_ci			/* We do not store true stamp, but it is not required,
61962306a36Sopenharmony_ci			 * it can be estimated (approximately)
62062306a36Sopenharmony_ci			 * from another data.
62162306a36Sopenharmony_ci			 */
62262306a36Sopenharmony_ci			tmp_opt.ts_recent_stamp = ktime_get_seconds() - reqsk_timeout(req, TCP_RTO_MAX) / HZ;
62362306a36Sopenharmony_ci			paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
62462306a36Sopenharmony_ci		}
62562306a36Sopenharmony_ci	}
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci	/* Check for pure retransmitted SYN. */
62862306a36Sopenharmony_ci	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
62962306a36Sopenharmony_ci	    flg == TCP_FLAG_SYN &&
63062306a36Sopenharmony_ci	    !paws_reject) {
63162306a36Sopenharmony_ci		/*
63262306a36Sopenharmony_ci		 * RFC793 draws (Incorrectly! It was fixed in RFC1122)
63362306a36Sopenharmony_ci		 * this case on figure 6 and figure 8, but formal
63462306a36Sopenharmony_ci		 * protocol description says NOTHING.
63562306a36Sopenharmony_ci		 * To be more exact, it says that we should send ACK,
63662306a36Sopenharmony_ci		 * because this segment (at least, if it has no data)
63762306a36Sopenharmony_ci		 * is out of window.
63862306a36Sopenharmony_ci		 *
63962306a36Sopenharmony_ci		 *  CONCLUSION: RFC793 (even with RFC1122) DOES NOT
64062306a36Sopenharmony_ci		 *  describe SYN-RECV state. All the description
64162306a36Sopenharmony_ci		 *  is wrong, we cannot believe to it and should
64262306a36Sopenharmony_ci		 *  rely only on common sense and implementation
64362306a36Sopenharmony_ci		 *  experience.
64462306a36Sopenharmony_ci		 *
64562306a36Sopenharmony_ci		 * Enforce "SYN-ACK" according to figure 8, figure 6
64662306a36Sopenharmony_ci		 * of RFC793, fixed by RFC1122.
64762306a36Sopenharmony_ci		 *
64862306a36Sopenharmony_ci		 * Note that even if there is new data in the SYN packet
64962306a36Sopenharmony_ci		 * they will be thrown away too.
65062306a36Sopenharmony_ci		 *
65162306a36Sopenharmony_ci		 * Reset timer after retransmitting SYNACK, similar to
65262306a36Sopenharmony_ci		 * the idea of fast retransmit in recovery.
65362306a36Sopenharmony_ci		 */
65462306a36Sopenharmony_ci		if (!tcp_oow_rate_limited(sock_net(sk), skb,
65562306a36Sopenharmony_ci					  LINUX_MIB_TCPACKSKIPPEDSYNRECV,
65662306a36Sopenharmony_ci					  &tcp_rsk(req)->last_oow_ack_time) &&
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci		    !inet_rtx_syn_ack(sk, req)) {
65962306a36Sopenharmony_ci			unsigned long expires = jiffies;
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci			expires += reqsk_timeout(req, TCP_RTO_MAX);
66262306a36Sopenharmony_ci			if (!fastopen)
66362306a36Sopenharmony_ci				mod_timer_pending(&req->rsk_timer, expires);
66462306a36Sopenharmony_ci			else
66562306a36Sopenharmony_ci				req->rsk_timer.expires = expires;
66662306a36Sopenharmony_ci		}
66762306a36Sopenharmony_ci		return NULL;
66862306a36Sopenharmony_ci	}
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci	/* Further reproduces section "SEGMENT ARRIVES"
67162306a36Sopenharmony_ci	   for state SYN-RECEIVED of RFC793.
67262306a36Sopenharmony_ci	   It is broken, however, it does not work only
67362306a36Sopenharmony_ci	   when SYNs are crossed.
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_ci	   You would think that SYN crossing is impossible here, since
67662306a36Sopenharmony_ci	   we should have a SYN_SENT socket (from connect()) on our end,
67762306a36Sopenharmony_ci	   but this is not true if the crossed SYNs were sent to both
67862306a36Sopenharmony_ci	   ends by a malicious third party.  We must defend against this,
67962306a36Sopenharmony_ci	   and to do that we first verify the ACK (as per RFC793, page
68062306a36Sopenharmony_ci	   36) and reset if it is invalid.  Is this a true full defense?
68162306a36Sopenharmony_ci	   To convince ourselves, let us consider a way in which the ACK
68262306a36Sopenharmony_ci	   test can still pass in this 'malicious crossed SYNs' case.
68362306a36Sopenharmony_ci	   Malicious sender sends identical SYNs (and thus identical sequence
68462306a36Sopenharmony_ci	   numbers) to both A and B:
68562306a36Sopenharmony_ci
68662306a36Sopenharmony_ci		A: gets SYN, seq=7
68762306a36Sopenharmony_ci		B: gets SYN, seq=7
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci	   By our good fortune, both A and B select the same initial
69062306a36Sopenharmony_ci	   send sequence number of seven :-)
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_ci		A: sends SYN|ACK, seq=7, ack_seq=8
69362306a36Sopenharmony_ci		B: sends SYN|ACK, seq=7, ack_seq=8
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_ci	   So we are now A eating this SYN|ACK, ACK test passes.  So
69662306a36Sopenharmony_ci	   does sequence test, SYN is truncated, and thus we consider
69762306a36Sopenharmony_ci	   it a bare ACK.
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	   If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
70062306a36Sopenharmony_ci	   bare ACK.  Otherwise, we create an established connection.  Both
70162306a36Sopenharmony_ci	   ends (listening sockets) accept the new incoming connection and try
70262306a36Sopenharmony_ci	   to talk to each other. 8-)
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci	   Note: This case is both harmless, and rare.  Possibility is about the
70562306a36Sopenharmony_ci	   same as us discovering intelligent life on another plant tomorrow.
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	   But generally, we should (RFC lies!) to accept ACK
70862306a36Sopenharmony_ci	   from SYNACK both here and in tcp_rcv_state_process().
70962306a36Sopenharmony_ci	   tcp_rcv_state_process() does not, hence, we do not too.
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci	   Note that the case is absolutely generic:
71262306a36Sopenharmony_ci	   we cannot optimize anything here without
71362306a36Sopenharmony_ci	   violating protocol. All the checks must be made
71462306a36Sopenharmony_ci	   before attempt to create socket.
71562306a36Sopenharmony_ci	 */
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	/* RFC793 page 36: "If the connection is in any non-synchronized state ...
71862306a36Sopenharmony_ci	 *                  and the incoming segment acknowledges something not yet
71962306a36Sopenharmony_ci	 *                  sent (the segment carries an unacceptable ACK) ...
72062306a36Sopenharmony_ci	 *                  a reset is sent."
72162306a36Sopenharmony_ci	 *
72262306a36Sopenharmony_ci	 * Invalid ACK: reset will be sent by listening socket.
72362306a36Sopenharmony_ci	 * Note that the ACK validity check for a Fast Open socket is done
72462306a36Sopenharmony_ci	 * elsewhere and is checked directly against the child socket rather
72562306a36Sopenharmony_ci	 * than req because user data may have been sent out.
72662306a36Sopenharmony_ci	 */
72762306a36Sopenharmony_ci	if ((flg & TCP_FLAG_ACK) && !fastopen &&
72862306a36Sopenharmony_ci	    (TCP_SKB_CB(skb)->ack_seq !=
72962306a36Sopenharmony_ci	     tcp_rsk(req)->snt_isn + 1))
73062306a36Sopenharmony_ci		return sk;
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	/* Also, it would be not so bad idea to check rcv_tsecr, which
73362306a36Sopenharmony_ci	 * is essentially ACK extension and too early or too late values
73462306a36Sopenharmony_ci	 * should cause reset in unsynchronized states.
73562306a36Sopenharmony_ci	 */
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_ci	/* RFC793: "first check sequence number". */
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
74062306a36Sopenharmony_ci					  tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) {
74162306a36Sopenharmony_ci		/* Out of window: send ACK and drop. */
74262306a36Sopenharmony_ci		if (!(flg & TCP_FLAG_RST) &&
74362306a36Sopenharmony_ci		    !tcp_oow_rate_limited(sock_net(sk), skb,
74462306a36Sopenharmony_ci					  LINUX_MIB_TCPACKSKIPPEDSYNRECV,
74562306a36Sopenharmony_ci					  &tcp_rsk(req)->last_oow_ack_time))
74662306a36Sopenharmony_ci			req->rsk_ops->send_ack(sk, skb, req);
74762306a36Sopenharmony_ci		if (paws_reject)
74862306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
74962306a36Sopenharmony_ci		return NULL;
75062306a36Sopenharmony_ci	}
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_ci	/* In sequence, PAWS is OK. */
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci	/* TODO: We probably should defer ts_recent change once
75562306a36Sopenharmony_ci	 * we take ownership of @req.
75662306a36Sopenharmony_ci	 */
75762306a36Sopenharmony_ci	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
75862306a36Sopenharmony_ci		WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval);
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
76162306a36Sopenharmony_ci		/* Truncate SYN, it is out of window starting
76262306a36Sopenharmony_ci		   at tcp_rsk(req)->rcv_isn + 1. */
76362306a36Sopenharmony_ci		flg &= ~TCP_FLAG_SYN;
76462306a36Sopenharmony_ci	}
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_ci	/* RFC793: "second check the RST bit" and
76762306a36Sopenharmony_ci	 *	   "fourth, check the SYN bit"
76862306a36Sopenharmony_ci	 */
76962306a36Sopenharmony_ci	if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
77062306a36Sopenharmony_ci		TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
77162306a36Sopenharmony_ci		goto embryonic_reset;
77262306a36Sopenharmony_ci	}
77362306a36Sopenharmony_ci
77462306a36Sopenharmony_ci	/* ACK sequence verified above, just make sure ACK is
77562306a36Sopenharmony_ci	 * set.  If ACK not set, just silently drop the packet.
77662306a36Sopenharmony_ci	 *
77762306a36Sopenharmony_ci	 * XXX (TFO) - if we ever allow "data after SYN", the
77862306a36Sopenharmony_ci	 * following check needs to be removed.
77962306a36Sopenharmony_ci	 */
78062306a36Sopenharmony_ci	if (!(flg & TCP_FLAG_ACK))
78162306a36Sopenharmony_ci		return NULL;
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci	/* For Fast Open no more processing is needed (sk is the
78462306a36Sopenharmony_ci	 * child socket).
78562306a36Sopenharmony_ci	 */
78662306a36Sopenharmony_ci	if (fastopen)
78762306a36Sopenharmony_ci		return sk;
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci	/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
79062306a36Sopenharmony_ci	if (req->num_timeout < READ_ONCE(inet_csk(sk)->icsk_accept_queue.rskq_defer_accept) &&
79162306a36Sopenharmony_ci	    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
79262306a36Sopenharmony_ci		inet_rsk(req)->acked = 1;
79362306a36Sopenharmony_ci		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
79462306a36Sopenharmony_ci		return NULL;
79562306a36Sopenharmony_ci	}
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	/* OK, ACK is valid, create big socket and
79862306a36Sopenharmony_ci	 * feed this segment to it. It will repeat all
79962306a36Sopenharmony_ci	 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
80062306a36Sopenharmony_ci	 * ESTABLISHED STATE. If it will be dropped after
80162306a36Sopenharmony_ci	 * socket is created, wait for troubles.
80262306a36Sopenharmony_ci	 */
80362306a36Sopenharmony_ci	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
80462306a36Sopenharmony_ci							 req, &own_req);
80562306a36Sopenharmony_ci	if (!child)
80662306a36Sopenharmony_ci		goto listen_overflow;
80762306a36Sopenharmony_ci
80862306a36Sopenharmony_ci	if (own_req && rsk_drop_req(req)) {
80962306a36Sopenharmony_ci		reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
81062306a36Sopenharmony_ci		inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
81162306a36Sopenharmony_ci		return child;
81262306a36Sopenharmony_ci	}
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_ci	sock_rps_save_rxhash(child, skb);
81562306a36Sopenharmony_ci	tcp_synack_rtt_meas(child, req);
81662306a36Sopenharmony_ci	*req_stolen = !own_req;
81762306a36Sopenharmony_ci	return inet_csk_complete_hashdance(sk, child, req, own_req);
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_cilisten_overflow:
82062306a36Sopenharmony_ci	if (sk != req->rsk_listener)
82162306a36Sopenharmony_ci		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_ci	if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) {
82462306a36Sopenharmony_ci		inet_rsk(req)->acked = 1;
82562306a36Sopenharmony_ci		return NULL;
82662306a36Sopenharmony_ci	}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ciembryonic_reset:
82962306a36Sopenharmony_ci	if (!(flg & TCP_FLAG_RST)) {
83062306a36Sopenharmony_ci		/* Received a bad SYN pkt - for TFO We try not to reset
83162306a36Sopenharmony_ci		 * the local connection unless it's really necessary to
83262306a36Sopenharmony_ci		 * avoid becoming vulnerable to outside attack aiming at
83362306a36Sopenharmony_ci		 * resetting legit local connections.
83462306a36Sopenharmony_ci		 */
83562306a36Sopenharmony_ci		req->rsk_ops->send_reset(sk, skb);
83662306a36Sopenharmony_ci	} else if (fastopen) { /* received a valid RST pkt */
83762306a36Sopenharmony_ci		reqsk_fastopen_remove(sk, req, true);
83862306a36Sopenharmony_ci		tcp_reset(sk, skb);
83962306a36Sopenharmony_ci	}
84062306a36Sopenharmony_ci	if (!fastopen) {
84162306a36Sopenharmony_ci		bool unlinked = inet_csk_reqsk_queue_drop(sk, req);
84262306a36Sopenharmony_ci
84362306a36Sopenharmony_ci		if (unlinked)
84462306a36Sopenharmony_ci			__NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
84562306a36Sopenharmony_ci		*req_stolen = !unlinked;
84662306a36Sopenharmony_ci	}
84762306a36Sopenharmony_ci	return NULL;
84862306a36Sopenharmony_ci}
84962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_check_req);
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_ci/*
85262306a36Sopenharmony_ci * Queue segment on the new socket if the new socket is active,
85362306a36Sopenharmony_ci * otherwise we just shortcircuit this and continue with
85462306a36Sopenharmony_ci * the new socket.
85562306a36Sopenharmony_ci *
85662306a36Sopenharmony_ci * For the vast majority of cases child->sk_state will be TCP_SYN_RECV
85762306a36Sopenharmony_ci * when entering. But other states are possible due to a race condition
85862306a36Sopenharmony_ci * where after __inet_lookup_established() fails but before the listener
85962306a36Sopenharmony_ci * locked is obtained, other packets cause the same connection to
86062306a36Sopenharmony_ci * be created.
86162306a36Sopenharmony_ci */
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ciint tcp_child_process(struct sock *parent, struct sock *child,
86462306a36Sopenharmony_ci		      struct sk_buff *skb)
86562306a36Sopenharmony_ci	__releases(&((child)->sk_lock.slock))
86662306a36Sopenharmony_ci{
86762306a36Sopenharmony_ci	int ret = 0;
86862306a36Sopenharmony_ci	int state = child->sk_state;
86962306a36Sopenharmony_ci
87062306a36Sopenharmony_ci	/* record sk_napi_id and sk_rx_queue_mapping of child. */
87162306a36Sopenharmony_ci	sk_mark_napi_id_set(child, skb);
87262306a36Sopenharmony_ci
87362306a36Sopenharmony_ci	tcp_segs_in(tcp_sk(child), skb);
87462306a36Sopenharmony_ci	if (!sock_owned_by_user(child)) {
87562306a36Sopenharmony_ci		ret = tcp_rcv_state_process(child, skb);
87662306a36Sopenharmony_ci		/* Wakeup parent, send SIGIO */
87762306a36Sopenharmony_ci		if (state == TCP_SYN_RECV && child->sk_state != state)
87862306a36Sopenharmony_ci			parent->sk_data_ready(parent);
87962306a36Sopenharmony_ci	} else {
88062306a36Sopenharmony_ci		/* Alas, it is possible again, because we do lookup
88162306a36Sopenharmony_ci		 * in main socket hash table and lock on listening
88262306a36Sopenharmony_ci		 * socket does not protect us more.
88362306a36Sopenharmony_ci		 */
88462306a36Sopenharmony_ci		__sk_add_backlog(child, skb);
88562306a36Sopenharmony_ci	}
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci	bh_unlock_sock(child);
88862306a36Sopenharmony_ci	sock_put(child);
88962306a36Sopenharmony_ci	return ret;
89062306a36Sopenharmony_ci}
89162306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_child_process);
892