162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * INET		An implementation of the TCP/IP protocol suite for the LINUX
462306a36Sopenharmony_ci *		operating system.  INET is implemented using the  BSD Socket
562306a36Sopenharmony_ci *		interface as the means of communication with the user level.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci *		Implementation of the Transmission Control Protocol(TCP).
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * Authors:	Ross Biro
1062306a36Sopenharmony_ci *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
1162306a36Sopenharmony_ci *		Mark Evans, <evansmp@uhura.aston.ac.uk>
1262306a36Sopenharmony_ci *		Corey Minyard <wf-rch!minyard@relay.EU.net>
1362306a36Sopenharmony_ci *		Florian La Roche, <flla@stud.uni-sb.de>
1462306a36Sopenharmony_ci *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
1562306a36Sopenharmony_ci *		Linus Torvalds, <torvalds@cs.helsinki.fi>
1662306a36Sopenharmony_ci *		Alan Cox, <gw4pts@gw4pts.ampr.org>
1762306a36Sopenharmony_ci *		Matthew Dillon, <dillon@apollo.west.oic.com>
1862306a36Sopenharmony_ci *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
1962306a36Sopenharmony_ci *		Jorge Cwik, <jorge@laser.satlink.net>
2062306a36Sopenharmony_ci */
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci/*
2362306a36Sopenharmony_ci * Changes:
2462306a36Sopenharmony_ci *		Pedro Roque	:	Fast Retransmit/Recovery.
2562306a36Sopenharmony_ci *					Two receive queues.
2662306a36Sopenharmony_ci *					Retransmit queue handled by TCP.
2762306a36Sopenharmony_ci *					Better retransmit timer handling.
2862306a36Sopenharmony_ci *					New congestion avoidance.
2962306a36Sopenharmony_ci *					Header prediction.
3062306a36Sopenharmony_ci *					Variable renaming.
3162306a36Sopenharmony_ci *
3262306a36Sopenharmony_ci *		Eric		:	Fast Retransmit.
3362306a36Sopenharmony_ci *		Randy Scott	:	MSS option defines.
3462306a36Sopenharmony_ci *		Eric Schenk	:	Fixes to slow start algorithm.
3562306a36Sopenharmony_ci *		Eric Schenk	:	Yet another double ACK bug.
3662306a36Sopenharmony_ci *		Eric Schenk	:	Delayed ACK bug fixes.
3762306a36Sopenharmony_ci *		Eric Schenk	:	Floyd style fast retrans war avoidance.
3862306a36Sopenharmony_ci *		David S. Miller	:	Don't allow zero congestion window.
3962306a36Sopenharmony_ci *		Eric Schenk	:	Fix retransmitter so that it sends
4062306a36Sopenharmony_ci *					next packet on ack of previous packet.
4162306a36Sopenharmony_ci *		Andi Kleen	:	Moved open_request checking here
4262306a36Sopenharmony_ci *					and process RSTs for open_requests.
4362306a36Sopenharmony_ci *		Andi Kleen	:	Better prune_queue, and other fixes.
4462306a36Sopenharmony_ci *		Andrey Savochkin:	Fix RTT measurements in the presence of
4562306a36Sopenharmony_ci *					timestamps.
4662306a36Sopenharmony_ci *		Andrey Savochkin:	Check sequence numbers correctly when
4762306a36Sopenharmony_ci *					removing SACKs due to in sequence incoming
4862306a36Sopenharmony_ci *					data segments.
4962306a36Sopenharmony_ci *		Andi Kleen:		Make sure we never ack data there is not
5062306a36Sopenharmony_ci *					enough room for. Also make this condition
5162306a36Sopenharmony_ci *					a fatal error if it might still happen.
5262306a36Sopenharmony_ci *		Andi Kleen:		Add tcp_measure_rcv_mss to make
5362306a36Sopenharmony_ci *					connections with MSS<min(MTU,ann. MSS)
5462306a36Sopenharmony_ci *					work without delayed acks.
5562306a36Sopenharmony_ci *		Andi Kleen:		Process packets with PSH set in the
5662306a36Sopenharmony_ci *					fast path.
5762306a36Sopenharmony_ci *		J Hadi Salim:		ECN support
5862306a36Sopenharmony_ci *	 	Andrei Gurtov,
5962306a36Sopenharmony_ci *		Pasi Sarolahti,
6062306a36Sopenharmony_ci *		Panu Kuhlberg:		Experimental audit of TCP (re)transmission
6162306a36Sopenharmony_ci *					engine. Lots of bugs are found.
6262306a36Sopenharmony_ci *		Pasi Sarolahti:		F-RTO for dealing with spurious RTOs
6362306a36Sopenharmony_ci */
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci#define pr_fmt(fmt) "TCP: " fmt
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci#include <linux/mm.h>
6862306a36Sopenharmony_ci#include <linux/slab.h>
6962306a36Sopenharmony_ci#include <linux/module.h>
7062306a36Sopenharmony_ci#include <linux/sysctl.h>
7162306a36Sopenharmony_ci#include <linux/kernel.h>
7262306a36Sopenharmony_ci#include <linux/prefetch.h>
7362306a36Sopenharmony_ci#include <net/dst.h>
7462306a36Sopenharmony_ci#include <net/tcp.h>
7562306a36Sopenharmony_ci#include <net/inet_common.h>
7662306a36Sopenharmony_ci#include <linux/ipsec.h>
7762306a36Sopenharmony_ci#include <asm/unaligned.h>
7862306a36Sopenharmony_ci#include <linux/errqueue.h>
7962306a36Sopenharmony_ci#include <trace/events/tcp.h>
8062306a36Sopenharmony_ci#include <linux/jump_label_ratelimit.h>
8162306a36Sopenharmony_ci#include <net/busy_poll.h>
8262306a36Sopenharmony_ci#include <net/mptcp.h>
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ciint sysctl_tcp_max_orphans __read_mostly = NR_FILE;
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci#define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
8762306a36Sopenharmony_ci#define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
8862306a36Sopenharmony_ci#define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/
8962306a36Sopenharmony_ci#define FLAG_RETRANS_DATA_ACKED	0x08 /* "" "" some of which was retransmitted.	*/
9062306a36Sopenharmony_ci#define FLAG_SYN_ACKED		0x10 /* This ACK acknowledged SYN.		*/
9162306a36Sopenharmony_ci#define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
9262306a36Sopenharmony_ci#define FLAG_ECE		0x40 /* ECE in this ACK				*/
9362306a36Sopenharmony_ci#define FLAG_LOST_RETRANS	0x80 /* This ACK marks some retransmission lost */
9462306a36Sopenharmony_ci#define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
9562306a36Sopenharmony_ci#define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/
9662306a36Sopenharmony_ci#define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
9762306a36Sopenharmony_ci#define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
9862306a36Sopenharmony_ci#define FLAG_SET_XMIT_TIMER	0x1000 /* Set TLP or RTO timer */
9962306a36Sopenharmony_ci#define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */
10062306a36Sopenharmony_ci#define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */
10162306a36Sopenharmony_ci#define FLAG_NO_CHALLENGE_ACK	0x8000 /* do not call tcp_send_challenge_ack()	*/
10262306a36Sopenharmony_ci#define FLAG_ACK_MAYBE_DELAYED	0x10000 /* Likely a delayed ACK */
10362306a36Sopenharmony_ci#define FLAG_DSACK_TLP		0x20000 /* DSACK for tail loss probe */
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci#define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
10662306a36Sopenharmony_ci#define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
10762306a36Sopenharmony_ci#define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
10862306a36Sopenharmony_ci#define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
11162306a36Sopenharmony_ci#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci#define REXMIT_NONE	0 /* no loss recovery to do */
11462306a36Sopenharmony_ci#define REXMIT_LOST	1 /* retransmit packets marked lost */
11562306a36Sopenharmony_ci#define REXMIT_NEW	2 /* FRTO-style transmit of unsent/new packets */
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_TLS_DEVICE)
11862306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ);
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_civoid clean_acked_data_enable(struct inet_connection_sock *icsk,
12162306a36Sopenharmony_ci			     void (*cad)(struct sock *sk, u32 ack_seq))
12262306a36Sopenharmony_ci{
12362306a36Sopenharmony_ci	icsk->icsk_clean_acked = cad;
12462306a36Sopenharmony_ci	static_branch_deferred_inc(&clean_acked_data_enabled);
12562306a36Sopenharmony_ci}
12662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(clean_acked_data_enable);
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_civoid clean_acked_data_disable(struct inet_connection_sock *icsk)
12962306a36Sopenharmony_ci{
13062306a36Sopenharmony_ci	static_branch_slow_dec_deferred(&clean_acked_data_enabled);
13162306a36Sopenharmony_ci	icsk->icsk_clean_acked = NULL;
13262306a36Sopenharmony_ci}
13362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(clean_acked_data_disable);
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_civoid clean_acked_data_flush(void)
13662306a36Sopenharmony_ci{
13762306a36Sopenharmony_ci	static_key_deferred_flush(&clean_acked_data_enabled);
13862306a36Sopenharmony_ci}
13962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(clean_acked_data_flush);
14062306a36Sopenharmony_ci#endif
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci#ifdef CONFIG_CGROUP_BPF
14362306a36Sopenharmony_cistatic void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
14462306a36Sopenharmony_ci{
14562306a36Sopenharmony_ci	bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown &&
14662306a36Sopenharmony_ci		BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
14762306a36Sopenharmony_ci				       BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG);
14862306a36Sopenharmony_ci	bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
14962306a36Sopenharmony_ci						    BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
15062306a36Sopenharmony_ci	struct bpf_sock_ops_kern sock_ops;
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	if (likely(!unknown_opt && !parse_all_opt))
15362306a36Sopenharmony_ci		return;
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	/* The skb will be handled in the
15662306a36Sopenharmony_ci	 * bpf_skops_established() or
15762306a36Sopenharmony_ci	 * bpf_skops_write_hdr_opt().
15862306a36Sopenharmony_ci	 */
15962306a36Sopenharmony_ci	switch (sk->sk_state) {
16062306a36Sopenharmony_ci	case TCP_SYN_RECV:
16162306a36Sopenharmony_ci	case TCP_SYN_SENT:
16262306a36Sopenharmony_ci	case TCP_LISTEN:
16362306a36Sopenharmony_ci		return;
16462306a36Sopenharmony_ci	}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	sock_owned_by_me(sk);
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
16962306a36Sopenharmony_ci	sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
17062306a36Sopenharmony_ci	sock_ops.is_fullsock = 1;
17162306a36Sopenharmony_ci	sock_ops.sk = sk;
17262306a36Sopenharmony_ci	bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
17562306a36Sopenharmony_ci}
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_cistatic void bpf_skops_established(struct sock *sk, int bpf_op,
17862306a36Sopenharmony_ci				  struct sk_buff *skb)
17962306a36Sopenharmony_ci{
18062306a36Sopenharmony_ci	struct bpf_sock_ops_kern sock_ops;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	sock_owned_by_me(sk);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
18562306a36Sopenharmony_ci	sock_ops.op = bpf_op;
18662306a36Sopenharmony_ci	sock_ops.is_fullsock = 1;
18762306a36Sopenharmony_ci	sock_ops.sk = sk;
18862306a36Sopenharmony_ci	/* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
18962306a36Sopenharmony_ci	if (skb)
19062306a36Sopenharmony_ci		bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
19362306a36Sopenharmony_ci}
19462306a36Sopenharmony_ci#else
19562306a36Sopenharmony_cistatic void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
19662306a36Sopenharmony_ci{
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_cistatic void bpf_skops_established(struct sock *sk, int bpf_op,
20062306a36Sopenharmony_ci				  struct sk_buff *skb)
20162306a36Sopenharmony_ci{
20262306a36Sopenharmony_ci}
20362306a36Sopenharmony_ci#endif
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_cistatic void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
20662306a36Sopenharmony_ci			     unsigned int len)
20762306a36Sopenharmony_ci{
20862306a36Sopenharmony_ci	static bool __once __read_mostly;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	if (!__once) {
21162306a36Sopenharmony_ci		struct net_device *dev;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci		__once = true;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci		rcu_read_lock();
21662306a36Sopenharmony_ci		dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
21762306a36Sopenharmony_ci		if (!dev || len >= dev->mtu)
21862306a36Sopenharmony_ci			pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
21962306a36Sopenharmony_ci				dev ? dev->name : "Unknown driver");
22062306a36Sopenharmony_ci		rcu_read_unlock();
22162306a36Sopenharmony_ci	}
22262306a36Sopenharmony_ci}
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci/* Adapt the MSS value used to make delayed ack decision to the
22562306a36Sopenharmony_ci * real world.
22662306a36Sopenharmony_ci */
22762306a36Sopenharmony_cistatic void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
22862306a36Sopenharmony_ci{
22962306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
23062306a36Sopenharmony_ci	const unsigned int lss = icsk->icsk_ack.last_seg_size;
23162306a36Sopenharmony_ci	unsigned int len;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	icsk->icsk_ack.last_seg_size = 0;
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	/* skb->len may jitter because of SACKs, even if peer
23662306a36Sopenharmony_ci	 * sends good full-sized frames.
23762306a36Sopenharmony_ci	 */
23862306a36Sopenharmony_ci	len = skb_shinfo(skb)->gso_size ? : skb->len;
23962306a36Sopenharmony_ci	if (len >= icsk->icsk_ack.rcv_mss) {
24062306a36Sopenharmony_ci		/* Note: divides are still a bit expensive.
24162306a36Sopenharmony_ci		 * For the moment, only adjust scaling_ratio
24262306a36Sopenharmony_ci		 * when we update icsk_ack.rcv_mss.
24362306a36Sopenharmony_ci		 */
24462306a36Sopenharmony_ci		if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
24562306a36Sopenharmony_ci			u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci			do_div(val, skb->truesize);
24862306a36Sopenharmony_ci			tcp_sk(sk)->scaling_ratio = val ? val : 1;
24962306a36Sopenharmony_ci		}
25062306a36Sopenharmony_ci		icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
25162306a36Sopenharmony_ci					       tcp_sk(sk)->advmss);
25262306a36Sopenharmony_ci		/* Account for possibly-removed options */
25362306a36Sopenharmony_ci		if (unlikely(len > icsk->icsk_ack.rcv_mss +
25462306a36Sopenharmony_ci				   MAX_TCP_OPTION_SPACE))
25562306a36Sopenharmony_ci			tcp_gro_dev_warn(sk, skb, len);
25662306a36Sopenharmony_ci		/* If the skb has a len of exactly 1*MSS and has the PSH bit
25762306a36Sopenharmony_ci		 * set then it is likely the end of an application write. So
25862306a36Sopenharmony_ci		 * more data may not be arriving soon, and yet the data sender
25962306a36Sopenharmony_ci		 * may be waiting for an ACK if cwnd-bound or using TX zero
26062306a36Sopenharmony_ci		 * copy. So we set ICSK_ACK_PUSHED here so that
26162306a36Sopenharmony_ci		 * tcp_cleanup_rbuf() will send an ACK immediately if the app
26262306a36Sopenharmony_ci		 * reads all of the data and is not ping-pong. If len > MSS
26362306a36Sopenharmony_ci		 * then this logic does not matter (and does not hurt) because
26462306a36Sopenharmony_ci		 * tcp_cleanup_rbuf() will always ACK immediately if the app
26562306a36Sopenharmony_ci		 * reads data and there is more than an MSS of unACKed data.
26662306a36Sopenharmony_ci		 */
26762306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH)
26862306a36Sopenharmony_ci			icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
26962306a36Sopenharmony_ci	} else {
27062306a36Sopenharmony_ci		/* Otherwise, we make more careful check taking into account,
27162306a36Sopenharmony_ci		 * that SACKs block is variable.
27262306a36Sopenharmony_ci		 *
27362306a36Sopenharmony_ci		 * "len" is invariant segment length, including TCP header.
27462306a36Sopenharmony_ci		 */
27562306a36Sopenharmony_ci		len += skb->data - skb_transport_header(skb);
27662306a36Sopenharmony_ci		if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
27762306a36Sopenharmony_ci		    /* If PSH is not set, packet should be
27862306a36Sopenharmony_ci		     * full sized, provided peer TCP is not badly broken.
27962306a36Sopenharmony_ci		     * This observation (if it is correct 8)) allows
28062306a36Sopenharmony_ci		     * to handle super-low mtu links fairly.
28162306a36Sopenharmony_ci		     */
28262306a36Sopenharmony_ci		    (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
28362306a36Sopenharmony_ci		     !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
28462306a36Sopenharmony_ci			/* Subtract also invariant (if peer is RFC compliant),
28562306a36Sopenharmony_ci			 * tcp header plus fixed timestamp option length.
28662306a36Sopenharmony_ci			 * Resulting "len" is MSS free of SACK jitter.
28762306a36Sopenharmony_ci			 */
28862306a36Sopenharmony_ci			len -= tcp_sk(sk)->tcp_header_len;
28962306a36Sopenharmony_ci			icsk->icsk_ack.last_seg_size = len;
29062306a36Sopenharmony_ci			if (len == lss) {
29162306a36Sopenharmony_ci				icsk->icsk_ack.rcv_mss = len;
29262306a36Sopenharmony_ci				return;
29362306a36Sopenharmony_ci			}
29462306a36Sopenharmony_ci		}
29562306a36Sopenharmony_ci		if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
29662306a36Sopenharmony_ci			icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
29762306a36Sopenharmony_ci		icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
29862306a36Sopenharmony_ci	}
29962306a36Sopenharmony_ci}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_cistatic void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
30262306a36Sopenharmony_ci{
30362306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
30462306a36Sopenharmony_ci	unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	if (quickacks == 0)
30762306a36Sopenharmony_ci		quickacks = 2;
30862306a36Sopenharmony_ci	quickacks = min(quickacks, max_quickacks);
30962306a36Sopenharmony_ci	if (quickacks > icsk->icsk_ack.quick)
31062306a36Sopenharmony_ci		icsk->icsk_ack.quick = quickacks;
31162306a36Sopenharmony_ci}
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_cistatic void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
31462306a36Sopenharmony_ci{
31562306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci	tcp_incr_quickack(sk, max_quickacks);
31862306a36Sopenharmony_ci	inet_csk_exit_pingpong_mode(sk);
31962306a36Sopenharmony_ci	icsk->icsk_ack.ato = TCP_ATO_MIN;
32062306a36Sopenharmony_ci}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci/* Send ACKs quickly, if "quick" count is not exhausted
32362306a36Sopenharmony_ci * and the session is not interactive.
32462306a36Sopenharmony_ci */
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_cistatic bool tcp_in_quickack_mode(struct sock *sk)
32762306a36Sopenharmony_ci{
32862306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
32962306a36Sopenharmony_ci	const struct dst_entry *dst = __sk_dst_get(sk);
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
33262306a36Sopenharmony_ci		(icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
33362306a36Sopenharmony_ci}
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_cistatic void tcp_ecn_queue_cwr(struct tcp_sock *tp)
33662306a36Sopenharmony_ci{
33762306a36Sopenharmony_ci	if (tp->ecn_flags & TCP_ECN_OK)
33862306a36Sopenharmony_ci		tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
33962306a36Sopenharmony_ci}
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_cistatic void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
34262306a36Sopenharmony_ci{
34362306a36Sopenharmony_ci	if (tcp_hdr(skb)->cwr) {
34462306a36Sopenharmony_ci		tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci		/* If the sender is telling us it has entered CWR, then its
34762306a36Sopenharmony_ci		 * cwnd may be very low (even just 1 packet), so we should ACK
34862306a36Sopenharmony_ci		 * immediately.
34962306a36Sopenharmony_ci		 */
35062306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
35162306a36Sopenharmony_ci			inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
35262306a36Sopenharmony_ci	}
35362306a36Sopenharmony_ci}
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_cistatic void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
35662306a36Sopenharmony_ci{
35762306a36Sopenharmony_ci	tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
35862306a36Sopenharmony_ci}
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_cistatic void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
36162306a36Sopenharmony_ci{
36262306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
36562306a36Sopenharmony_ci	case INET_ECN_NOT_ECT:
36662306a36Sopenharmony_ci		/* Funny extension: if ECT is not set on a segment,
36762306a36Sopenharmony_ci		 * and we already seen ECT on a previous segment,
36862306a36Sopenharmony_ci		 * it is probably a retransmit.
36962306a36Sopenharmony_ci		 */
37062306a36Sopenharmony_ci		if (tp->ecn_flags & TCP_ECN_SEEN)
37162306a36Sopenharmony_ci			tcp_enter_quickack_mode(sk, 2);
37262306a36Sopenharmony_ci		break;
37362306a36Sopenharmony_ci	case INET_ECN_CE:
37462306a36Sopenharmony_ci		if (tcp_ca_needs_ecn(sk))
37562306a36Sopenharmony_ci			tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
37862306a36Sopenharmony_ci			/* Better not delay acks, sender can have a very low cwnd */
37962306a36Sopenharmony_ci			tcp_enter_quickack_mode(sk, 2);
38062306a36Sopenharmony_ci			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
38162306a36Sopenharmony_ci		}
38262306a36Sopenharmony_ci		tp->ecn_flags |= TCP_ECN_SEEN;
38362306a36Sopenharmony_ci		break;
38462306a36Sopenharmony_ci	default:
38562306a36Sopenharmony_ci		if (tcp_ca_needs_ecn(sk))
38662306a36Sopenharmony_ci			tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
38762306a36Sopenharmony_ci		tp->ecn_flags |= TCP_ECN_SEEN;
38862306a36Sopenharmony_ci		break;
38962306a36Sopenharmony_ci	}
39062306a36Sopenharmony_ci}
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_cistatic void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
39362306a36Sopenharmony_ci{
39462306a36Sopenharmony_ci	if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
39562306a36Sopenharmony_ci		__tcp_ecn_check_ce(sk, skb);
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_cistatic void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
39962306a36Sopenharmony_ci{
40062306a36Sopenharmony_ci	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
40162306a36Sopenharmony_ci		tp->ecn_flags &= ~TCP_ECN_OK;
40262306a36Sopenharmony_ci}
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_cistatic void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
40562306a36Sopenharmony_ci{
40662306a36Sopenharmony_ci	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
40762306a36Sopenharmony_ci		tp->ecn_flags &= ~TCP_ECN_OK;
40862306a36Sopenharmony_ci}
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_cistatic bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
41162306a36Sopenharmony_ci{
41262306a36Sopenharmony_ci	if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
41362306a36Sopenharmony_ci		return true;
41462306a36Sopenharmony_ci	return false;
41562306a36Sopenharmony_ci}
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci/* Buffer size and advertised window tuning.
41862306a36Sopenharmony_ci *
41962306a36Sopenharmony_ci * 1. Tuning sk->sk_sndbuf, when connection enters established state.
42062306a36Sopenharmony_ci */
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_cistatic void tcp_sndbuf_expand(struct sock *sk)
42362306a36Sopenharmony_ci{
42462306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
42562306a36Sopenharmony_ci	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
42662306a36Sopenharmony_ci	int sndmem, per_mss;
42762306a36Sopenharmony_ci	u32 nr_segs;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	/* Worst case is non GSO/TSO : each frame consumes one skb
43062306a36Sopenharmony_ci	 * and skb->head is kmalloced using power of two area of memory
43162306a36Sopenharmony_ci	 */
43262306a36Sopenharmony_ci	per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
43362306a36Sopenharmony_ci		  MAX_TCP_HEADER +
43462306a36Sopenharmony_ci		  SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	per_mss = roundup_pow_of_two(per_mss) +
43762306a36Sopenharmony_ci		  SKB_DATA_ALIGN(sizeof(struct sk_buff));
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	nr_segs = max_t(u32, TCP_INIT_CWND, tcp_snd_cwnd(tp));
44062306a36Sopenharmony_ci	nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	/* Fast Recovery (RFC 5681 3.2) :
44362306a36Sopenharmony_ci	 * Cubic needs 1.7 factor, rounded to 2 to include
44462306a36Sopenharmony_ci	 * extra cushion (application might react slowly to EPOLLOUT)
44562306a36Sopenharmony_ci	 */
44662306a36Sopenharmony_ci	sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
44762306a36Sopenharmony_ci	sndmem *= nr_segs * per_mss;
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci	if (sk->sk_sndbuf < sndmem)
45062306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_sndbuf,
45162306a36Sopenharmony_ci			   min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
45262306a36Sopenharmony_ci}
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
45562306a36Sopenharmony_ci *
45662306a36Sopenharmony_ci * All tcp_full_space() is split to two parts: "network" buffer, allocated
45762306a36Sopenharmony_ci * forward and advertised in receiver window (tp->rcv_wnd) and
45862306a36Sopenharmony_ci * "application buffer", required to isolate scheduling/application
45962306a36Sopenharmony_ci * latencies from network.
46062306a36Sopenharmony_ci * window_clamp is maximal advertised window. It can be less than
46162306a36Sopenharmony_ci * tcp_full_space(), in this case tcp_full_space() - window_clamp
46262306a36Sopenharmony_ci * is reserved for "application" buffer. The less window_clamp is
46362306a36Sopenharmony_ci * the smoother our behaviour from viewpoint of network, but the lower
46462306a36Sopenharmony_ci * throughput and the higher sensitivity of the connection to losses. 8)
46562306a36Sopenharmony_ci *
46662306a36Sopenharmony_ci * rcv_ssthresh is more strict window_clamp used at "slow start"
46762306a36Sopenharmony_ci * phase to predict further behaviour of this connection.
46862306a36Sopenharmony_ci * It is used for two goals:
46962306a36Sopenharmony_ci * - to enforce header prediction at sender, even when application
47062306a36Sopenharmony_ci *   requires some significant "application buffer". It is check #1.
47162306a36Sopenharmony_ci * - to prevent pruning of receive queue because of misprediction
47262306a36Sopenharmony_ci *   of receiver window. Check #2.
47362306a36Sopenharmony_ci *
47462306a36Sopenharmony_ci * The scheme does not work when sender sends good segments opening
47562306a36Sopenharmony_ci * window and then starts to feed us spaghetti. But it should work
47662306a36Sopenharmony_ci * in common situations. Otherwise, we have to rely on queue collapsing.
47762306a36Sopenharmony_ci */
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci/* Slow part of check#2. */
48062306a36Sopenharmony_cistatic int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
48162306a36Sopenharmony_ci			     unsigned int skbtruesize)
48262306a36Sopenharmony_ci{
48362306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
48462306a36Sopenharmony_ci	/* Optimize this! */
48562306a36Sopenharmony_ci	int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
48662306a36Sopenharmony_ci	int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	while (tp->rcv_ssthresh <= window) {
48962306a36Sopenharmony_ci		if (truesize <= skb->len)
49062306a36Sopenharmony_ci			return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci		truesize >>= 1;
49362306a36Sopenharmony_ci		window >>= 1;
49462306a36Sopenharmony_ci	}
49562306a36Sopenharmony_ci	return 0;
49662306a36Sopenharmony_ci}
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing
49962306a36Sopenharmony_ci * can play nice with us, as sk_buff and skb->head might be either
50062306a36Sopenharmony_ci * freed or shared with up to MAX_SKB_FRAGS segments.
50162306a36Sopenharmony_ci * Only give a boost to drivers using page frag(s) to hold the frame(s),
50262306a36Sopenharmony_ci * and if no payload was pulled in skb->head before reaching us.
50362306a36Sopenharmony_ci */
50462306a36Sopenharmony_cistatic u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
50562306a36Sopenharmony_ci{
50662306a36Sopenharmony_ci	u32 truesize = skb->truesize;
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci	if (adjust && !skb_headlen(skb)) {
50962306a36Sopenharmony_ci		truesize -= SKB_TRUESIZE(skb_end_offset(skb));
51062306a36Sopenharmony_ci		/* paranoid check, some drivers might be buggy */
51162306a36Sopenharmony_ci		if (unlikely((int)truesize < (int)skb->len))
51262306a36Sopenharmony_ci			truesize = skb->truesize;
51362306a36Sopenharmony_ci	}
51462306a36Sopenharmony_ci	return truesize;
51562306a36Sopenharmony_ci}
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_cistatic void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
51862306a36Sopenharmony_ci			    bool adjust)
51962306a36Sopenharmony_ci{
52062306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
52162306a36Sopenharmony_ci	int room;
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci	room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	if (room <= 0)
52662306a36Sopenharmony_ci		return;
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci	/* Check #1 */
52962306a36Sopenharmony_ci	if (!tcp_under_memory_pressure(sk)) {
53062306a36Sopenharmony_ci		unsigned int truesize = truesize_adjust(adjust, skb);
53162306a36Sopenharmony_ci		int incr;
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci		/* Check #2. Increase window, if skb with such overhead
53462306a36Sopenharmony_ci		 * will fit to rcvbuf in future.
53562306a36Sopenharmony_ci		 */
53662306a36Sopenharmony_ci		if (tcp_win_from_space(sk, truesize) <= skb->len)
53762306a36Sopenharmony_ci			incr = 2 * tp->advmss;
53862306a36Sopenharmony_ci		else
53962306a36Sopenharmony_ci			incr = __tcp_grow_window(sk, skb, truesize);
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci		if (incr) {
54262306a36Sopenharmony_ci			incr = max_t(int, incr, 2 * skb->len);
54362306a36Sopenharmony_ci			tp->rcv_ssthresh += min(room, incr);
54462306a36Sopenharmony_ci			inet_csk(sk)->icsk_ack.quick |= 1;
54562306a36Sopenharmony_ci		}
54662306a36Sopenharmony_ci	} else {
54762306a36Sopenharmony_ci		/* Under pressure:
54862306a36Sopenharmony_ci		 * Adjust rcv_ssthresh according to reserved mem
54962306a36Sopenharmony_ci		 */
55062306a36Sopenharmony_ci		tcp_adjust_rcv_ssthresh(sk);
55162306a36Sopenharmony_ci	}
55262306a36Sopenharmony_ci}
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci/* 3. Try to fixup all. It is made immediately after connection enters
55562306a36Sopenharmony_ci *    established state.
55662306a36Sopenharmony_ci */
55762306a36Sopenharmony_cistatic void tcp_init_buffer_space(struct sock *sk)
55862306a36Sopenharmony_ci{
55962306a36Sopenharmony_ci	int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
56062306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
56162306a36Sopenharmony_ci	int maxwin;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
56462306a36Sopenharmony_ci		tcp_sndbuf_expand(sk);
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci	tcp_mstamp_refresh(tp);
56762306a36Sopenharmony_ci	tp->rcvq_space.time = tp->tcp_mstamp;
56862306a36Sopenharmony_ci	tp->rcvq_space.seq = tp->copied_seq;
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci	maxwin = tcp_full_space(sk);
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_ci	if (tp->window_clamp >= maxwin) {
57362306a36Sopenharmony_ci		tp->window_clamp = maxwin;
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci		if (tcp_app_win && maxwin > 4 * tp->advmss)
57662306a36Sopenharmony_ci			tp->window_clamp = max(maxwin -
57762306a36Sopenharmony_ci					       (maxwin >> tcp_app_win),
57862306a36Sopenharmony_ci					       4 * tp->advmss);
57962306a36Sopenharmony_ci	}
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	/* Force reservation of one segment. */
58262306a36Sopenharmony_ci	if (tcp_app_win &&
58362306a36Sopenharmony_ci	    tp->window_clamp > 2 * tp->advmss &&
58462306a36Sopenharmony_ci	    tp->window_clamp + tp->advmss > maxwin)
58562306a36Sopenharmony_ci		tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
58862306a36Sopenharmony_ci	tp->snd_cwnd_stamp = tcp_jiffies32;
58962306a36Sopenharmony_ci	tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd,
59062306a36Sopenharmony_ci				    (u32)TCP_INIT_CWND * tp->advmss);
59162306a36Sopenharmony_ci}
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci/* 4. Recalculate window clamp after socket hit its memory bounds. */
59462306a36Sopenharmony_cistatic void tcp_clamp_window(struct sock *sk)
59562306a36Sopenharmony_ci{
59662306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
59762306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
59862306a36Sopenharmony_ci	struct net *net = sock_net(sk);
59962306a36Sopenharmony_ci	int rmem2;
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci	icsk->icsk_ack.quick = 0;
60262306a36Sopenharmony_ci	rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci	if (sk->sk_rcvbuf < rmem2 &&
60562306a36Sopenharmony_ci	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
60662306a36Sopenharmony_ci	    !tcp_under_memory_pressure(sk) &&
60762306a36Sopenharmony_ci	    sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
60862306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_rcvbuf,
60962306a36Sopenharmony_ci			   min(atomic_read(&sk->sk_rmem_alloc), rmem2));
61062306a36Sopenharmony_ci	}
61162306a36Sopenharmony_ci	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
61262306a36Sopenharmony_ci		tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
61362306a36Sopenharmony_ci}
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci/* Initialize RCV_MSS value.
61662306a36Sopenharmony_ci * RCV_MSS is an our guess about MSS used by the peer.
61762306a36Sopenharmony_ci * We haven't any direct information about the MSS.
61862306a36Sopenharmony_ci * It's better to underestimate the RCV_MSS rather than overestimate.
61962306a36Sopenharmony_ci * Overestimations make us ACKing less frequently than needed.
62062306a36Sopenharmony_ci * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
62162306a36Sopenharmony_ci */
62262306a36Sopenharmony_civoid tcp_initialize_rcv_mss(struct sock *sk)
62362306a36Sopenharmony_ci{
62462306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
62562306a36Sopenharmony_ci	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci	hint = min(hint, tp->rcv_wnd / 2);
62862306a36Sopenharmony_ci	hint = min(hint, TCP_MSS_DEFAULT);
62962306a36Sopenharmony_ci	hint = max(hint, TCP_MIN_MSS);
63062306a36Sopenharmony_ci
63162306a36Sopenharmony_ci	inet_csk(sk)->icsk_ack.rcv_mss = hint;
63262306a36Sopenharmony_ci}
63362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_initialize_rcv_mss);
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci/* Receiver "autotuning" code.
63662306a36Sopenharmony_ci *
63762306a36Sopenharmony_ci * The algorithm for RTT estimation w/o timestamps is based on
63862306a36Sopenharmony_ci * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
63962306a36Sopenharmony_ci * <https://public.lanl.gov/radiant/pubs.html#DRS>
64062306a36Sopenharmony_ci *
64162306a36Sopenharmony_ci * More detail on this code can be found at
64262306a36Sopenharmony_ci * <http://staff.psc.edu/jheffner/>,
64362306a36Sopenharmony_ci * though this reference is out of date.  A new paper
64462306a36Sopenharmony_ci * is pending.
64562306a36Sopenharmony_ci */
64662306a36Sopenharmony_cistatic void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
64762306a36Sopenharmony_ci{
64862306a36Sopenharmony_ci	u32 new_sample = tp->rcv_rtt_est.rtt_us;
64962306a36Sopenharmony_ci	long m = sample;
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci	if (new_sample != 0) {
65262306a36Sopenharmony_ci		/* If we sample in larger samples in the non-timestamp
65362306a36Sopenharmony_ci		 * case, we could grossly overestimate the RTT especially
65462306a36Sopenharmony_ci		 * with chatty applications or bulk transfer apps which
65562306a36Sopenharmony_ci		 * are stalled on filesystem I/O.
65662306a36Sopenharmony_ci		 *
65762306a36Sopenharmony_ci		 * Also, since we are only going for a minimum in the
65862306a36Sopenharmony_ci		 * non-timestamp case, we do not smooth things out
65962306a36Sopenharmony_ci		 * else with timestamps disabled convergence takes too
66062306a36Sopenharmony_ci		 * long.
66162306a36Sopenharmony_ci		 */
66262306a36Sopenharmony_ci		if (!win_dep) {
66362306a36Sopenharmony_ci			m -= (new_sample >> 3);
66462306a36Sopenharmony_ci			new_sample += m;
66562306a36Sopenharmony_ci		} else {
66662306a36Sopenharmony_ci			m <<= 3;
66762306a36Sopenharmony_ci			if (m < new_sample)
66862306a36Sopenharmony_ci				new_sample = m;
66962306a36Sopenharmony_ci		}
67062306a36Sopenharmony_ci	} else {
67162306a36Sopenharmony_ci		/* No previous measure. */
67262306a36Sopenharmony_ci		new_sample = m << 3;
67362306a36Sopenharmony_ci	}
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_ci	tp->rcv_rtt_est.rtt_us = new_sample;
67662306a36Sopenharmony_ci}
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_cistatic inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
67962306a36Sopenharmony_ci{
68062306a36Sopenharmony_ci	u32 delta_us;
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci	if (tp->rcv_rtt_est.time == 0)
68362306a36Sopenharmony_ci		goto new_measure;
68462306a36Sopenharmony_ci	if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
68562306a36Sopenharmony_ci		return;
68662306a36Sopenharmony_ci	delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
68762306a36Sopenharmony_ci	if (!delta_us)
68862306a36Sopenharmony_ci		delta_us = 1;
68962306a36Sopenharmony_ci	tcp_rcv_rtt_update(tp, delta_us, 1);
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_cinew_measure:
69262306a36Sopenharmony_ci	tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
69362306a36Sopenharmony_ci	tp->rcv_rtt_est.time = tp->tcp_mstamp;
69462306a36Sopenharmony_ci}
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_cistatic inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
69762306a36Sopenharmony_ci					  const struct sk_buff *skb)
69862306a36Sopenharmony_ci{
69962306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
70062306a36Sopenharmony_ci
70162306a36Sopenharmony_ci	if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
70262306a36Sopenharmony_ci		return;
70362306a36Sopenharmony_ci	tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	if (TCP_SKB_CB(skb)->end_seq -
70662306a36Sopenharmony_ci	    TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
70762306a36Sopenharmony_ci		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
70862306a36Sopenharmony_ci		u32 delta_us;
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci		if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
71162306a36Sopenharmony_ci			if (!delta)
71262306a36Sopenharmony_ci				delta = 1;
71362306a36Sopenharmony_ci			delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
71462306a36Sopenharmony_ci			tcp_rcv_rtt_update(tp, delta_us, 0);
71562306a36Sopenharmony_ci		}
71662306a36Sopenharmony_ci	}
71762306a36Sopenharmony_ci}
71862306a36Sopenharmony_ci
71962306a36Sopenharmony_ci/*
72062306a36Sopenharmony_ci * This function should be called every time data is copied to user space.
72162306a36Sopenharmony_ci * It calculates the appropriate TCP receive buffer space.
72262306a36Sopenharmony_ci */
72362306a36Sopenharmony_civoid tcp_rcv_space_adjust(struct sock *sk)
72462306a36Sopenharmony_ci{
72562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
72662306a36Sopenharmony_ci	u32 copied;
72762306a36Sopenharmony_ci	int time;
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci	trace_tcp_rcv_space_adjust(sk);
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci	tcp_mstamp_refresh(tp);
73262306a36Sopenharmony_ci	time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
73362306a36Sopenharmony_ci	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
73462306a36Sopenharmony_ci		return;
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci	/* Number of bytes copied to user in last RTT */
73762306a36Sopenharmony_ci	copied = tp->copied_seq - tp->rcvq_space.seq;
73862306a36Sopenharmony_ci	if (copied <= tp->rcvq_space.space)
73962306a36Sopenharmony_ci		goto new_measure;
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci	/* A bit of theory :
74262306a36Sopenharmony_ci	 * copied = bytes received in previous RTT, our base window
74362306a36Sopenharmony_ci	 * To cope with packet losses, we need a 2x factor
74462306a36Sopenharmony_ci	 * To cope with slow start, and sender growing its cwin by 100 %
74562306a36Sopenharmony_ci	 * every RTT, we need a 4x factor, because the ACK we are sending
74662306a36Sopenharmony_ci	 * now is for the next RTT, not the current one :
74762306a36Sopenharmony_ci	 * <prev RTT . ><current RTT .. ><next RTT .... >
74862306a36Sopenharmony_ci	 */
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
75162306a36Sopenharmony_ci	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
75262306a36Sopenharmony_ci		u64 rcvwin, grow;
75362306a36Sopenharmony_ci		int rcvbuf;
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci		/* minimal window to cope with packet losses, assuming
75662306a36Sopenharmony_ci		 * steady state. Add some cushion because of small variations.
75762306a36Sopenharmony_ci		 */
75862306a36Sopenharmony_ci		rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci		/* Accommodate for sender rate increase (eg. slow start) */
76162306a36Sopenharmony_ci		grow = rcvwin * (copied - tp->rcvq_space.space);
76262306a36Sopenharmony_ci		do_div(grow, tp->rcvq_space.space);
76362306a36Sopenharmony_ci		rcvwin += (grow << 1);
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_ci		rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
76662306a36Sopenharmony_ci			       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
76762306a36Sopenharmony_ci		if (rcvbuf > sk->sk_rcvbuf) {
76862306a36Sopenharmony_ci			WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci			/* Make the window clamp follow along.  */
77162306a36Sopenharmony_ci			tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
77262306a36Sopenharmony_ci		}
77362306a36Sopenharmony_ci	}
77462306a36Sopenharmony_ci	tp->rcvq_space.space = copied;
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_cinew_measure:
77762306a36Sopenharmony_ci	tp->rcvq_space.seq = tp->copied_seq;
77862306a36Sopenharmony_ci	tp->rcvq_space.time = tp->tcp_mstamp;
77962306a36Sopenharmony_ci}
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci/* There is something which you must keep in mind when you analyze the
78262306a36Sopenharmony_ci * behavior of the tp->ato delayed ack timeout interval.  When a
78362306a36Sopenharmony_ci * connection starts up, we want to ack as quickly as possible.  The
78462306a36Sopenharmony_ci * problem is that "good" TCP's do slow start at the beginning of data
78562306a36Sopenharmony_ci * transmission.  The means that until we send the first few ACK's the
78662306a36Sopenharmony_ci * sender will sit on his end and only queue most of his data, because
78762306a36Sopenharmony_ci * he can only send snd_cwnd unacked packets at any given time.  For
78862306a36Sopenharmony_ci * each ACK we send, he increments snd_cwnd and transmits more of his
78962306a36Sopenharmony_ci * queue.  -DaveM
79062306a36Sopenharmony_ci */
79162306a36Sopenharmony_cistatic void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
79262306a36Sopenharmony_ci{
79362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
79462306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
79562306a36Sopenharmony_ci	u32 now;
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	inet_csk_schedule_ack(sk);
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci	tcp_measure_rcv_mss(sk, skb);
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci	tcp_rcv_rtt_measure(tp);
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	now = tcp_jiffies32;
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_ci	if (!icsk->icsk_ack.ato) {
80662306a36Sopenharmony_ci		/* The _first_ data packet received, initialize
80762306a36Sopenharmony_ci		 * delayed ACK engine.
80862306a36Sopenharmony_ci		 */
80962306a36Sopenharmony_ci		tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
81062306a36Sopenharmony_ci		icsk->icsk_ack.ato = TCP_ATO_MIN;
81162306a36Sopenharmony_ci	} else {
81262306a36Sopenharmony_ci		int m = now - icsk->icsk_ack.lrcvtime;
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_ci		if (m <= TCP_ATO_MIN / 2) {
81562306a36Sopenharmony_ci			/* The fastest case is the first. */
81662306a36Sopenharmony_ci			icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
81762306a36Sopenharmony_ci		} else if (m < icsk->icsk_ack.ato) {
81862306a36Sopenharmony_ci			icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
81962306a36Sopenharmony_ci			if (icsk->icsk_ack.ato > icsk->icsk_rto)
82062306a36Sopenharmony_ci				icsk->icsk_ack.ato = icsk->icsk_rto;
82162306a36Sopenharmony_ci		} else if (m > icsk->icsk_rto) {
82262306a36Sopenharmony_ci			/* Too long gap. Apparently sender failed to
82362306a36Sopenharmony_ci			 * restart window, so that we send ACKs quickly.
82462306a36Sopenharmony_ci			 */
82562306a36Sopenharmony_ci			tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
82662306a36Sopenharmony_ci		}
82762306a36Sopenharmony_ci	}
82862306a36Sopenharmony_ci	icsk->icsk_ack.lrcvtime = now;
82962306a36Sopenharmony_ci
83062306a36Sopenharmony_ci	tcp_ecn_check_ce(sk, skb);
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci	if (skb->len >= 128)
83362306a36Sopenharmony_ci		tcp_grow_window(sk, skb, true);
83462306a36Sopenharmony_ci}
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci/* Called to compute a smoothed rtt estimate. The data fed to this
83762306a36Sopenharmony_ci * routine either comes from timestamps, or from segments that were
83862306a36Sopenharmony_ci * known _not_ to have been retransmitted [see Karn/Partridge
83962306a36Sopenharmony_ci * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
84062306a36Sopenharmony_ci * piece by Van Jacobson.
84162306a36Sopenharmony_ci * NOTE: the next three routines used to be one big routine.
84262306a36Sopenharmony_ci * To save cycles in the RFC 1323 implementation it was better to break
84362306a36Sopenharmony_ci * it up into three procedures. -- erics
84462306a36Sopenharmony_ci */
84562306a36Sopenharmony_cistatic void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
84662306a36Sopenharmony_ci{
84762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
84862306a36Sopenharmony_ci	long m = mrtt_us; /* RTT */
84962306a36Sopenharmony_ci	u32 srtt = tp->srtt_us;
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_ci	/*	The following amusing code comes from Jacobson's
85262306a36Sopenharmony_ci	 *	article in SIGCOMM '88.  Note that rtt and mdev
85362306a36Sopenharmony_ci	 *	are scaled versions of rtt and mean deviation.
85462306a36Sopenharmony_ci	 *	This is designed to be as fast as possible
85562306a36Sopenharmony_ci	 *	m stands for "measurement".
85662306a36Sopenharmony_ci	 *
85762306a36Sopenharmony_ci	 *	On a 1990 paper the rto value is changed to:
85862306a36Sopenharmony_ci	 *	RTO = rtt + 4 * mdev
85962306a36Sopenharmony_ci	 *
86062306a36Sopenharmony_ci	 * Funny. This algorithm seems to be very broken.
86162306a36Sopenharmony_ci	 * These formulae increase RTO, when it should be decreased, increase
86262306a36Sopenharmony_ci	 * too slowly, when it should be increased quickly, decrease too quickly
86362306a36Sopenharmony_ci	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
86462306a36Sopenharmony_ci	 * does not matter how to _calculate_ it. Seems, it was trap
86562306a36Sopenharmony_ci	 * that VJ failed to avoid. 8)
86662306a36Sopenharmony_ci	 */
86762306a36Sopenharmony_ci	if (srtt != 0) {
86862306a36Sopenharmony_ci		m -= (srtt >> 3);	/* m is now error in rtt est */
86962306a36Sopenharmony_ci		srtt += m;		/* rtt = 7/8 rtt + 1/8 new */
87062306a36Sopenharmony_ci		if (m < 0) {
87162306a36Sopenharmony_ci			m = -m;		/* m is now abs(error) */
87262306a36Sopenharmony_ci			m -= (tp->mdev_us >> 2);   /* similar update on mdev */
87362306a36Sopenharmony_ci			/* This is similar to one of Eifel findings.
87462306a36Sopenharmony_ci			 * Eifel blocks mdev updates when rtt decreases.
87562306a36Sopenharmony_ci			 * This solution is a bit different: we use finer gain
87662306a36Sopenharmony_ci			 * for mdev in this case (alpha*beta).
87762306a36Sopenharmony_ci			 * Like Eifel it also prevents growth of rto,
87862306a36Sopenharmony_ci			 * but also it limits too fast rto decreases,
87962306a36Sopenharmony_ci			 * happening in pure Eifel.
88062306a36Sopenharmony_ci			 */
88162306a36Sopenharmony_ci			if (m > 0)
88262306a36Sopenharmony_ci				m >>= 3;
88362306a36Sopenharmony_ci		} else {
88462306a36Sopenharmony_ci			m -= (tp->mdev_us >> 2);   /* similar update on mdev */
88562306a36Sopenharmony_ci		}
88662306a36Sopenharmony_ci		tp->mdev_us += m;		/* mdev = 3/4 mdev + 1/4 new */
88762306a36Sopenharmony_ci		if (tp->mdev_us > tp->mdev_max_us) {
88862306a36Sopenharmony_ci			tp->mdev_max_us = tp->mdev_us;
88962306a36Sopenharmony_ci			if (tp->mdev_max_us > tp->rttvar_us)
89062306a36Sopenharmony_ci				tp->rttvar_us = tp->mdev_max_us;
89162306a36Sopenharmony_ci		}
89262306a36Sopenharmony_ci		if (after(tp->snd_una, tp->rtt_seq)) {
89362306a36Sopenharmony_ci			if (tp->mdev_max_us < tp->rttvar_us)
89462306a36Sopenharmony_ci				tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
89562306a36Sopenharmony_ci			tp->rtt_seq = tp->snd_nxt;
89662306a36Sopenharmony_ci			tp->mdev_max_us = tcp_rto_min_us(sk);
89762306a36Sopenharmony_ci
89862306a36Sopenharmony_ci			tcp_bpf_rtt(sk);
89962306a36Sopenharmony_ci		}
90062306a36Sopenharmony_ci	} else {
90162306a36Sopenharmony_ci		/* no previous measure. */
90262306a36Sopenharmony_ci		srtt = m << 3;		/* take the measured time to be rtt */
90362306a36Sopenharmony_ci		tp->mdev_us = m << 1;	/* make sure rto = 3*rtt */
90462306a36Sopenharmony_ci		tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
90562306a36Sopenharmony_ci		tp->mdev_max_us = tp->rttvar_us;
90662306a36Sopenharmony_ci		tp->rtt_seq = tp->snd_nxt;
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci		tcp_bpf_rtt(sk);
90962306a36Sopenharmony_ci	}
91062306a36Sopenharmony_ci	tp->srtt_us = max(1U, srtt);
91162306a36Sopenharmony_ci}
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_cistatic void tcp_update_pacing_rate(struct sock *sk)
91462306a36Sopenharmony_ci{
91562306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
91662306a36Sopenharmony_ci	u64 rate;
91762306a36Sopenharmony_ci
91862306a36Sopenharmony_ci	/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
91962306a36Sopenharmony_ci	rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	/* current rate is (cwnd * mss) / srtt
92262306a36Sopenharmony_ci	 * In Slow Start [1], set sk_pacing_rate to 200 % the current rate.
92362306a36Sopenharmony_ci	 * In Congestion Avoidance phase, set it to 120 % the current rate.
92462306a36Sopenharmony_ci	 *
92562306a36Sopenharmony_ci	 * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
92662306a36Sopenharmony_ci	 *	 If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
92762306a36Sopenharmony_ci	 *	 end of slow start and should slow down.
92862306a36Sopenharmony_ci	 */
92962306a36Sopenharmony_ci	if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
93062306a36Sopenharmony_ci		rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
93162306a36Sopenharmony_ci	else
93262306a36Sopenharmony_ci		rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci	rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	if (likely(tp->srtt_us))
93762306a36Sopenharmony_ci		do_div(rate, tp->srtt_us);
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci	/* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate
94062306a36Sopenharmony_ci	 * without any lock. We want to make sure compiler wont store
94162306a36Sopenharmony_ci	 * intermediate values in this location.
94262306a36Sopenharmony_ci	 */
94362306a36Sopenharmony_ci	WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
94462306a36Sopenharmony_ci					     sk->sk_max_pacing_rate));
94562306a36Sopenharmony_ci}
94662306a36Sopenharmony_ci
94762306a36Sopenharmony_ci/* Calculate rto without backoff.  This is the second half of Van Jacobson's
94862306a36Sopenharmony_ci * routine referred to above.
94962306a36Sopenharmony_ci */
95062306a36Sopenharmony_cistatic void tcp_set_rto(struct sock *sk)
95162306a36Sopenharmony_ci{
95262306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
95362306a36Sopenharmony_ci	/* Old crap is replaced with new one. 8)
95462306a36Sopenharmony_ci	 *
95562306a36Sopenharmony_ci	 * More seriously:
95662306a36Sopenharmony_ci	 * 1. If rtt variance happened to be less 50msec, it is hallucination.
95762306a36Sopenharmony_ci	 *    It cannot be less due to utterly erratic ACK generation made
95862306a36Sopenharmony_ci	 *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
95962306a36Sopenharmony_ci	 *    to do with delayed acks, because at cwnd>2 true delack timeout
96062306a36Sopenharmony_ci	 *    is invisible. Actually, Linux-2.4 also generates erratic
96162306a36Sopenharmony_ci	 *    ACKs in some circumstances.
96262306a36Sopenharmony_ci	 */
96362306a36Sopenharmony_ci	inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci	/* 2. Fixups made earlier cannot be right.
96662306a36Sopenharmony_ci	 *    If we do not estimate RTO correctly without them,
96762306a36Sopenharmony_ci	 *    all the algo is pure shit and should be replaced
96862306a36Sopenharmony_ci	 *    with correct one. It is exactly, which we pretend to do.
96962306a36Sopenharmony_ci	 */
97062306a36Sopenharmony_ci
97162306a36Sopenharmony_ci	/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
97262306a36Sopenharmony_ci	 * guarantees that rto is higher.
97362306a36Sopenharmony_ci	 */
97462306a36Sopenharmony_ci	tcp_bound_rto(sk);
97562306a36Sopenharmony_ci}
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_ci__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
97862306a36Sopenharmony_ci{
97962306a36Sopenharmony_ci	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci	if (!cwnd)
98262306a36Sopenharmony_ci		cwnd = TCP_INIT_CWND;
98362306a36Sopenharmony_ci	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
98462306a36Sopenharmony_ci}
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_cistruct tcp_sacktag_state {
98762306a36Sopenharmony_ci	/* Timestamps for earliest and latest never-retransmitted segment
98862306a36Sopenharmony_ci	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
98962306a36Sopenharmony_ci	 * but congestion control should still get an accurate delay signal.
99062306a36Sopenharmony_ci	 */
99162306a36Sopenharmony_ci	u64	first_sackt;
99262306a36Sopenharmony_ci	u64	last_sackt;
99362306a36Sopenharmony_ci	u32	reord;
99462306a36Sopenharmony_ci	u32	sack_delivered;
99562306a36Sopenharmony_ci	int	flag;
99662306a36Sopenharmony_ci	unsigned int mss_now;
99762306a36Sopenharmony_ci	struct rate_sample *rate;
99862306a36Sopenharmony_ci};
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci/* Take a notice that peer is sending D-SACKs. Skip update of data delivery
100162306a36Sopenharmony_ci * and spurious retransmission information if this DSACK is unlikely caused by
100262306a36Sopenharmony_ci * sender's action:
100362306a36Sopenharmony_ci * - DSACKed sequence range is larger than maximum receiver's window.
100462306a36Sopenharmony_ci * - Total no. of DSACKed segments exceed the total no. of retransmitted segs.
100562306a36Sopenharmony_ci */
100662306a36Sopenharmony_cistatic u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
100762306a36Sopenharmony_ci			  u32 end_seq, struct tcp_sacktag_state *state)
100862306a36Sopenharmony_ci{
100962306a36Sopenharmony_ci	u32 seq_len, dup_segs = 1;
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci	if (!before(start_seq, end_seq))
101262306a36Sopenharmony_ci		return 0;
101362306a36Sopenharmony_ci
101462306a36Sopenharmony_ci	seq_len = end_seq - start_seq;
101562306a36Sopenharmony_ci	/* Dubious DSACK: DSACKed range greater than maximum advertised rwnd */
101662306a36Sopenharmony_ci	if (seq_len > tp->max_window)
101762306a36Sopenharmony_ci		return 0;
101862306a36Sopenharmony_ci	if (seq_len > tp->mss_cache)
101962306a36Sopenharmony_ci		dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
102062306a36Sopenharmony_ci	else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq)
102162306a36Sopenharmony_ci		state->flag |= FLAG_DSACK_TLP;
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci	tp->dsack_dups += dup_segs;
102462306a36Sopenharmony_ci	/* Skip the DSACK if dup segs weren't retransmitted by sender */
102562306a36Sopenharmony_ci	if (tp->dsack_dups > tp->total_retrans)
102662306a36Sopenharmony_ci		return 0;
102762306a36Sopenharmony_ci
102862306a36Sopenharmony_ci	tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
102962306a36Sopenharmony_ci	/* We increase the RACK ordering window in rounds where we receive
103062306a36Sopenharmony_ci	 * DSACKs that may have been due to reordering causing RACK to trigger
103162306a36Sopenharmony_ci	 * a spurious fast recovery. Thus RACK ignores DSACKs that happen
103262306a36Sopenharmony_ci	 * without having seen reordering, or that match TLP probes (TLP
103362306a36Sopenharmony_ci	 * is timer-driven, not triggered by RACK).
103462306a36Sopenharmony_ci	 */
103562306a36Sopenharmony_ci	if (tp->reord_seen && !(state->flag & FLAG_DSACK_TLP))
103662306a36Sopenharmony_ci		tp->rack.dsack_seen = 1;
103762306a36Sopenharmony_ci
103862306a36Sopenharmony_ci	state->flag |= FLAG_DSACKING_ACK;
103962306a36Sopenharmony_ci	/* A spurious retransmission is delivered */
104062306a36Sopenharmony_ci	state->sack_delivered += dup_segs;
104162306a36Sopenharmony_ci
104262306a36Sopenharmony_ci	return dup_segs;
104362306a36Sopenharmony_ci}
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci/* It's reordering when higher sequence was delivered (i.e. sacked) before
104662306a36Sopenharmony_ci * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
104762306a36Sopenharmony_ci * distance is approximated in full-mss packet distance ("reordering").
104862306a36Sopenharmony_ci */
104962306a36Sopenharmony_cistatic void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
105062306a36Sopenharmony_ci				      const int ts)
105162306a36Sopenharmony_ci{
105262306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
105362306a36Sopenharmony_ci	const u32 mss = tp->mss_cache;
105462306a36Sopenharmony_ci	u32 fack, metric;
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci	fack = tcp_highest_sack_seq(tp);
105762306a36Sopenharmony_ci	if (!before(low_seq, fack))
105862306a36Sopenharmony_ci		return;
105962306a36Sopenharmony_ci
106062306a36Sopenharmony_ci	metric = fack - low_seq;
106162306a36Sopenharmony_ci	if ((metric > tp->reordering * mss) && mss) {
106262306a36Sopenharmony_ci#if FASTRETRANS_DEBUG > 1
106362306a36Sopenharmony_ci		pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
106462306a36Sopenharmony_ci			 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
106562306a36Sopenharmony_ci			 tp->reordering,
106662306a36Sopenharmony_ci			 0,
106762306a36Sopenharmony_ci			 tp->sacked_out,
106862306a36Sopenharmony_ci			 tp->undo_marker ? tp->undo_retrans : 0);
106962306a36Sopenharmony_ci#endif
107062306a36Sopenharmony_ci		tp->reordering = min_t(u32, (metric + mss - 1) / mss,
107162306a36Sopenharmony_ci				       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
107262306a36Sopenharmony_ci	}
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci	/* This exciting event is worth to be remembered. 8) */
107562306a36Sopenharmony_ci	tp->reord_seen++;
107662306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk),
107762306a36Sopenharmony_ci		      ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
107862306a36Sopenharmony_ci}
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_ci /* This must be called before lost_out or retrans_out are updated
108162306a36Sopenharmony_ci  * on a new loss, because we want to know if all skbs previously
108262306a36Sopenharmony_ci  * known to be lost have already been retransmitted, indicating
108362306a36Sopenharmony_ci  * that this newly lost skb is our next skb to retransmit.
108462306a36Sopenharmony_ci  */
108562306a36Sopenharmony_cistatic void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
108662306a36Sopenharmony_ci{
108762306a36Sopenharmony_ci	if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
108862306a36Sopenharmony_ci	    (tp->retransmit_skb_hint &&
108962306a36Sopenharmony_ci	     before(TCP_SKB_CB(skb)->seq,
109062306a36Sopenharmony_ci		    TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
109162306a36Sopenharmony_ci		tp->retransmit_skb_hint = skb;
109262306a36Sopenharmony_ci}
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci/* Sum the number of packets on the wire we have marked as lost, and
109562306a36Sopenharmony_ci * notify the congestion control module that the given skb was marked lost.
109662306a36Sopenharmony_ci */
109762306a36Sopenharmony_cistatic void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
109862306a36Sopenharmony_ci{
109962306a36Sopenharmony_ci	tp->lost += tcp_skb_pcount(skb);
110062306a36Sopenharmony_ci}
110162306a36Sopenharmony_ci
110262306a36Sopenharmony_civoid tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
110362306a36Sopenharmony_ci{
110462306a36Sopenharmony_ci	__u8 sacked = TCP_SKB_CB(skb)->sacked;
110562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
110662306a36Sopenharmony_ci
110762306a36Sopenharmony_ci	if (sacked & TCPCB_SACKED_ACKED)
110862306a36Sopenharmony_ci		return;
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci	tcp_verify_retransmit_hint(tp, skb);
111162306a36Sopenharmony_ci	if (sacked & TCPCB_LOST) {
111262306a36Sopenharmony_ci		if (sacked & TCPCB_SACKED_RETRANS) {
111362306a36Sopenharmony_ci			/* Account for retransmits that are lost again */
111462306a36Sopenharmony_ci			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
111562306a36Sopenharmony_ci			tp->retrans_out -= tcp_skb_pcount(skb);
111662306a36Sopenharmony_ci			NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
111762306a36Sopenharmony_ci				      tcp_skb_pcount(skb));
111862306a36Sopenharmony_ci			tcp_notify_skb_loss_event(tp, skb);
111962306a36Sopenharmony_ci		}
112062306a36Sopenharmony_ci	} else {
112162306a36Sopenharmony_ci		tp->lost_out += tcp_skb_pcount(skb);
112262306a36Sopenharmony_ci		TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
112362306a36Sopenharmony_ci		tcp_notify_skb_loss_event(tp, skb);
112462306a36Sopenharmony_ci	}
112562306a36Sopenharmony_ci}
112662306a36Sopenharmony_ci
112762306a36Sopenharmony_ci/* Updates the delivered and delivered_ce counts */
112862306a36Sopenharmony_cistatic void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
112962306a36Sopenharmony_ci				bool ece_ack)
113062306a36Sopenharmony_ci{
113162306a36Sopenharmony_ci	tp->delivered += delivered;
113262306a36Sopenharmony_ci	if (ece_ack)
113362306a36Sopenharmony_ci		tp->delivered_ce += delivered;
113462306a36Sopenharmony_ci}
113562306a36Sopenharmony_ci
113662306a36Sopenharmony_ci/* This procedure tags the retransmission queue when SACKs arrive.
113762306a36Sopenharmony_ci *
113862306a36Sopenharmony_ci * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
113962306a36Sopenharmony_ci * Packets in queue with these bits set are counted in variables
114062306a36Sopenharmony_ci * sacked_out, retrans_out and lost_out, correspondingly.
114162306a36Sopenharmony_ci *
114262306a36Sopenharmony_ci * Valid combinations are:
114362306a36Sopenharmony_ci * Tag  InFlight	Description
114462306a36Sopenharmony_ci * 0	1		- orig segment is in flight.
114562306a36Sopenharmony_ci * S	0		- nothing flies, orig reached receiver.
114662306a36Sopenharmony_ci * L	0		- nothing flies, orig lost by net.
114762306a36Sopenharmony_ci * R	2		- both orig and retransmit are in flight.
114862306a36Sopenharmony_ci * L|R	1		- orig is lost, retransmit is in flight.
114962306a36Sopenharmony_ci * S|R  1		- orig reached receiver, retrans is still in flight.
115062306a36Sopenharmony_ci * (L|S|R is logically valid, it could occur when L|R is sacked,
115162306a36Sopenharmony_ci *  but it is equivalent to plain S and code short-curcuits it to S.
115262306a36Sopenharmony_ci *  L|S is logically invalid, it would mean -1 packet in flight 8))
115362306a36Sopenharmony_ci *
115462306a36Sopenharmony_ci * These 6 states form finite state machine, controlled by the following events:
115562306a36Sopenharmony_ci * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
115662306a36Sopenharmony_ci * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
115762306a36Sopenharmony_ci * 3. Loss detection event of two flavors:
115862306a36Sopenharmony_ci *	A. Scoreboard estimator decided the packet is lost.
115962306a36Sopenharmony_ci *	   A'. Reno "three dupacks" marks head of queue lost.
116062306a36Sopenharmony_ci *	B. SACK arrives sacking SND.NXT at the moment, when the
116162306a36Sopenharmony_ci *	   segment was retransmitted.
116262306a36Sopenharmony_ci * 4. D-SACK added new rule: D-SACK changes any tag to S.
116362306a36Sopenharmony_ci *
116462306a36Sopenharmony_ci * It is pleasant to note, that state diagram turns out to be commutative,
116562306a36Sopenharmony_ci * so that we are allowed not to be bothered by order of our actions,
116662306a36Sopenharmony_ci * when multiple events arrive simultaneously. (see the function below).
116762306a36Sopenharmony_ci *
116862306a36Sopenharmony_ci * Reordering detection.
116962306a36Sopenharmony_ci * --------------------
117062306a36Sopenharmony_ci * Reordering metric is maximal distance, which a packet can be displaced
117162306a36Sopenharmony_ci * in packet stream. With SACKs we can estimate it:
117262306a36Sopenharmony_ci *
117362306a36Sopenharmony_ci * 1. SACK fills old hole and the corresponding segment was not
117462306a36Sopenharmony_ci *    ever retransmitted -> reordering. Alas, we cannot use it
117562306a36Sopenharmony_ci *    when segment was retransmitted.
117662306a36Sopenharmony_ci * 2. The last flaw is solved with D-SACK. D-SACK arrives
117762306a36Sopenharmony_ci *    for retransmitted and already SACKed segment -> reordering..
117862306a36Sopenharmony_ci * Both of these heuristics are not used in Loss state, when we cannot
117962306a36Sopenharmony_ci * account for retransmits accurately.
118062306a36Sopenharmony_ci *
118162306a36Sopenharmony_ci * SACK block validation.
118262306a36Sopenharmony_ci * ----------------------
118362306a36Sopenharmony_ci *
118462306a36Sopenharmony_ci * SACK block range validation checks that the received SACK block fits to
118562306a36Sopenharmony_ci * the expected sequence limits, i.e., it is between SND.UNA and SND.NXT.
118662306a36Sopenharmony_ci * Note that SND.UNA is not included to the range though being valid because
118762306a36Sopenharmony_ci * it means that the receiver is rather inconsistent with itself reporting
118862306a36Sopenharmony_ci * SACK reneging when it should advance SND.UNA. Such SACK block this is
118962306a36Sopenharmony_ci * perfectly valid, however, in light of RFC2018 which explicitly states
119062306a36Sopenharmony_ci * that "SACK block MUST reflect the newest segment.  Even if the newest
119162306a36Sopenharmony_ci * segment is going to be discarded ...", not that it looks very clever
119262306a36Sopenharmony_ci * in case of head skb. Due to potentional receiver driven attacks, we
119362306a36Sopenharmony_ci * choose to avoid immediate execution of a walk in write queue due to
119462306a36Sopenharmony_ci * reneging and defer head skb's loss recovery to standard loss recovery
119562306a36Sopenharmony_ci * procedure that will eventually trigger (nothing forbids us doing this).
119662306a36Sopenharmony_ci *
119762306a36Sopenharmony_ci * Implements also blockage to start_seq wrap-around. Problem lies in the
119862306a36Sopenharmony_ci * fact that though start_seq (s) is before end_seq (i.e., not reversed),
119962306a36Sopenharmony_ci * there's no guarantee that it will be before snd_nxt (n). The problem
120062306a36Sopenharmony_ci * happens when start_seq resides between end_seq wrap (e_w) and snd_nxt
120162306a36Sopenharmony_ci * wrap (s_w):
120262306a36Sopenharmony_ci *
120362306a36Sopenharmony_ci *         <- outs wnd ->                          <- wrapzone ->
120462306a36Sopenharmony_ci *         u     e      n                         u_w   e_w  s n_w
120562306a36Sopenharmony_ci *         |     |      |                          |     |   |  |
120662306a36Sopenharmony_ci * |<------------+------+----- TCP seqno space --------------+---------->|
120762306a36Sopenharmony_ci * ...-- <2^31 ->|                                           |<--------...
120862306a36Sopenharmony_ci * ...---- >2^31 ------>|                                    |<--------...
120962306a36Sopenharmony_ci *
121062306a36Sopenharmony_ci * Current code wouldn't be vulnerable but it's better still to discard such
121162306a36Sopenharmony_ci * crazy SACK blocks. Doing this check for start_seq alone closes somewhat
121262306a36Sopenharmony_ci * similar case (end_seq after snd_nxt wrap) as earlier reversed check in
121362306a36Sopenharmony_ci * snd_nxt wrap -> snd_una region will then become "well defined", i.e.,
121462306a36Sopenharmony_ci * equal to the ideal case (infinite seqno space without wrap caused issues).
121562306a36Sopenharmony_ci *
121662306a36Sopenharmony_ci * With D-SACK the lower bound is extended to cover sequence space below
121762306a36Sopenharmony_ci * SND.UNA down to undo_marker, which is the last point of interest. Yet
121862306a36Sopenharmony_ci * again, D-SACK block must not to go across snd_una (for the same reason as
121962306a36Sopenharmony_ci * for the normal SACK blocks, explained above). But there all simplicity
122062306a36Sopenharmony_ci * ends, TCP might receive valid D-SACKs below that. As long as they reside
122162306a36Sopenharmony_ci * fully below undo_marker they do not affect behavior in anyway and can
122262306a36Sopenharmony_ci * therefore be safely ignored. In rare cases (which are more or less
122362306a36Sopenharmony_ci * theoretical ones), the D-SACK will nicely cross that boundary due to skb
122462306a36Sopenharmony_ci * fragmentation and packet reordering past skb's retransmission. To consider
122562306a36Sopenharmony_ci * them correctly, the acceptable range must be extended even more though
122662306a36Sopenharmony_ci * the exact amount is rather hard to quantify. However, tp->max_window can
122762306a36Sopenharmony_ci * be used as an exaggerated estimate.
122862306a36Sopenharmony_ci */
122962306a36Sopenharmony_cistatic bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
123062306a36Sopenharmony_ci				   u32 start_seq, u32 end_seq)
123162306a36Sopenharmony_ci{
123262306a36Sopenharmony_ci	/* Too far in future, or reversed (interpretation is ambiguous) */
123362306a36Sopenharmony_ci	if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
123462306a36Sopenharmony_ci		return false;
123562306a36Sopenharmony_ci
123662306a36Sopenharmony_ci	/* Nasty start_seq wrap-around check (see comments above) */
123762306a36Sopenharmony_ci	if (!before(start_seq, tp->snd_nxt))
123862306a36Sopenharmony_ci		return false;
123962306a36Sopenharmony_ci
124062306a36Sopenharmony_ci	/* In outstanding window? ...This is valid exit for D-SACKs too.
124162306a36Sopenharmony_ci	 * start_seq == snd_una is non-sensical (see comments above)
124262306a36Sopenharmony_ci	 */
124362306a36Sopenharmony_ci	if (after(start_seq, tp->snd_una))
124462306a36Sopenharmony_ci		return true;
124562306a36Sopenharmony_ci
124662306a36Sopenharmony_ci	if (!is_dsack || !tp->undo_marker)
124762306a36Sopenharmony_ci		return false;
124862306a36Sopenharmony_ci
124962306a36Sopenharmony_ci	/* ...Then it's D-SACK, and must reside below snd_una completely */
125062306a36Sopenharmony_ci	if (after(end_seq, tp->snd_una))
125162306a36Sopenharmony_ci		return false;
125262306a36Sopenharmony_ci
125362306a36Sopenharmony_ci	if (!before(start_seq, tp->undo_marker))
125462306a36Sopenharmony_ci		return true;
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci	/* Too old */
125762306a36Sopenharmony_ci	if (!after(end_seq, tp->undo_marker))
125862306a36Sopenharmony_ci		return false;
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci	/* Undo_marker boundary crossing (overestimates a lot). Known already:
126162306a36Sopenharmony_ci	 *   start_seq < undo_marker and end_seq >= undo_marker.
126262306a36Sopenharmony_ci	 */
126362306a36Sopenharmony_ci	return !before(start_seq, end_seq - tp->max_window);
126462306a36Sopenharmony_ci}
126562306a36Sopenharmony_ci
126662306a36Sopenharmony_cistatic bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
126762306a36Sopenharmony_ci			    struct tcp_sack_block_wire *sp, int num_sacks,
126862306a36Sopenharmony_ci			    u32 prior_snd_una, struct tcp_sacktag_state *state)
126962306a36Sopenharmony_ci{
127062306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
127162306a36Sopenharmony_ci	u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
127262306a36Sopenharmony_ci	u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
127362306a36Sopenharmony_ci	u32 dup_segs;
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_ci	if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
127662306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
127762306a36Sopenharmony_ci	} else if (num_sacks > 1) {
127862306a36Sopenharmony_ci		u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
127962306a36Sopenharmony_ci		u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
128062306a36Sopenharmony_ci
128162306a36Sopenharmony_ci		if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1))
128262306a36Sopenharmony_ci			return false;
128362306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV);
128462306a36Sopenharmony_ci	} else {
128562306a36Sopenharmony_ci		return false;
128662306a36Sopenharmony_ci	}
128762306a36Sopenharmony_ci
128862306a36Sopenharmony_ci	dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
128962306a36Sopenharmony_ci	if (!dup_segs) {	/* Skip dubious DSACK */
129062306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS);
129162306a36Sopenharmony_ci		return false;
129262306a36Sopenharmony_ci	}
129362306a36Sopenharmony_ci
129462306a36Sopenharmony_ci	NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs);
129562306a36Sopenharmony_ci
129662306a36Sopenharmony_ci	/* D-SACK for already forgotten data... Do dumb counting. */
129762306a36Sopenharmony_ci	if (tp->undo_marker && tp->undo_retrans > 0 &&
129862306a36Sopenharmony_ci	    !after(end_seq_0, prior_snd_una) &&
129962306a36Sopenharmony_ci	    after(end_seq_0, tp->undo_marker))
130062306a36Sopenharmony_ci		tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs);
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	return true;
130362306a36Sopenharmony_ci}
130462306a36Sopenharmony_ci
130562306a36Sopenharmony_ci/* Check if skb is fully within the SACK block. In presence of GSO skbs,
130662306a36Sopenharmony_ci * the incoming SACK may not exactly match but we can find smaller MSS
130762306a36Sopenharmony_ci * aligned portion of it that matches. Therefore we might need to fragment
130862306a36Sopenharmony_ci * which may fail and creates some hassle (caller must handle error case
130962306a36Sopenharmony_ci * returns).
131062306a36Sopenharmony_ci *
131162306a36Sopenharmony_ci * FIXME: this could be merged to shift decision code
131262306a36Sopenharmony_ci */
131362306a36Sopenharmony_cistatic int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
131462306a36Sopenharmony_ci				  u32 start_seq, u32 end_seq)
131562306a36Sopenharmony_ci{
131662306a36Sopenharmony_ci	int err;
131762306a36Sopenharmony_ci	bool in_sack;
131862306a36Sopenharmony_ci	unsigned int pkt_len;
131962306a36Sopenharmony_ci	unsigned int mss;
132062306a36Sopenharmony_ci
132162306a36Sopenharmony_ci	in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
132262306a36Sopenharmony_ci		  !before(end_seq, TCP_SKB_CB(skb)->end_seq);
132362306a36Sopenharmony_ci
132462306a36Sopenharmony_ci	if (tcp_skb_pcount(skb) > 1 && !in_sack &&
132562306a36Sopenharmony_ci	    after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
132662306a36Sopenharmony_ci		mss = tcp_skb_mss(skb);
132762306a36Sopenharmony_ci		in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
132862306a36Sopenharmony_ci
132962306a36Sopenharmony_ci		if (!in_sack) {
133062306a36Sopenharmony_ci			pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
133162306a36Sopenharmony_ci			if (pkt_len < mss)
133262306a36Sopenharmony_ci				pkt_len = mss;
133362306a36Sopenharmony_ci		} else {
133462306a36Sopenharmony_ci			pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
133562306a36Sopenharmony_ci			if (pkt_len < mss)
133662306a36Sopenharmony_ci				return -EINVAL;
133762306a36Sopenharmony_ci		}
133862306a36Sopenharmony_ci
133962306a36Sopenharmony_ci		/* Round if necessary so that SACKs cover only full MSSes
134062306a36Sopenharmony_ci		 * and/or the remaining small portion (if present)
134162306a36Sopenharmony_ci		 */
134262306a36Sopenharmony_ci		if (pkt_len > mss) {
134362306a36Sopenharmony_ci			unsigned int new_len = (pkt_len / mss) * mss;
134462306a36Sopenharmony_ci			if (!in_sack && new_len < pkt_len)
134562306a36Sopenharmony_ci				new_len += mss;
134662306a36Sopenharmony_ci			pkt_len = new_len;
134762306a36Sopenharmony_ci		}
134862306a36Sopenharmony_ci
134962306a36Sopenharmony_ci		if (pkt_len >= skb->len && !in_sack)
135062306a36Sopenharmony_ci			return 0;
135162306a36Sopenharmony_ci
135262306a36Sopenharmony_ci		err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
135362306a36Sopenharmony_ci				   pkt_len, mss, GFP_ATOMIC);
135462306a36Sopenharmony_ci		if (err < 0)
135562306a36Sopenharmony_ci			return err;
135662306a36Sopenharmony_ci	}
135762306a36Sopenharmony_ci
135862306a36Sopenharmony_ci	return in_sack;
135962306a36Sopenharmony_ci}
136062306a36Sopenharmony_ci
136162306a36Sopenharmony_ci/* Mark the given newly-SACKed range as such, adjusting counters and hints. */
136262306a36Sopenharmony_cistatic u8 tcp_sacktag_one(struct sock *sk,
136362306a36Sopenharmony_ci			  struct tcp_sacktag_state *state, u8 sacked,
136462306a36Sopenharmony_ci			  u32 start_seq, u32 end_seq,
136562306a36Sopenharmony_ci			  int dup_sack, int pcount,
136662306a36Sopenharmony_ci			  u64 xmit_time)
136762306a36Sopenharmony_ci{
136862306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
136962306a36Sopenharmony_ci
137062306a36Sopenharmony_ci	/* Account D-SACK for retransmitted packet. */
137162306a36Sopenharmony_ci	if (dup_sack && (sacked & TCPCB_RETRANS)) {
137262306a36Sopenharmony_ci		if (tp->undo_marker && tp->undo_retrans > 0 &&
137362306a36Sopenharmony_ci		    after(end_seq, tp->undo_marker))
137462306a36Sopenharmony_ci			tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount);
137562306a36Sopenharmony_ci		if ((sacked & TCPCB_SACKED_ACKED) &&
137662306a36Sopenharmony_ci		    before(start_seq, state->reord))
137762306a36Sopenharmony_ci				state->reord = start_seq;
137862306a36Sopenharmony_ci	}
137962306a36Sopenharmony_ci
138062306a36Sopenharmony_ci	/* Nothing to do; acked frame is about to be dropped (was ACKed). */
138162306a36Sopenharmony_ci	if (!after(end_seq, tp->snd_una))
138262306a36Sopenharmony_ci		return sacked;
138362306a36Sopenharmony_ci
138462306a36Sopenharmony_ci	if (!(sacked & TCPCB_SACKED_ACKED)) {
138562306a36Sopenharmony_ci		tcp_rack_advance(tp, sacked, end_seq, xmit_time);
138662306a36Sopenharmony_ci
138762306a36Sopenharmony_ci		if (sacked & TCPCB_SACKED_RETRANS) {
138862306a36Sopenharmony_ci			/* If the segment is not tagged as lost,
138962306a36Sopenharmony_ci			 * we do not clear RETRANS, believing
139062306a36Sopenharmony_ci			 * that retransmission is still in flight.
139162306a36Sopenharmony_ci			 */
139262306a36Sopenharmony_ci			if (sacked & TCPCB_LOST) {
139362306a36Sopenharmony_ci				sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
139462306a36Sopenharmony_ci				tp->lost_out -= pcount;
139562306a36Sopenharmony_ci				tp->retrans_out -= pcount;
139662306a36Sopenharmony_ci			}
139762306a36Sopenharmony_ci		} else {
139862306a36Sopenharmony_ci			if (!(sacked & TCPCB_RETRANS)) {
139962306a36Sopenharmony_ci				/* New sack for not retransmitted frame,
140062306a36Sopenharmony_ci				 * which was in hole. It is reordering.
140162306a36Sopenharmony_ci				 */
140262306a36Sopenharmony_ci				if (before(start_seq,
140362306a36Sopenharmony_ci					   tcp_highest_sack_seq(tp)) &&
140462306a36Sopenharmony_ci				    before(start_seq, state->reord))
140562306a36Sopenharmony_ci					state->reord = start_seq;
140662306a36Sopenharmony_ci
140762306a36Sopenharmony_ci				if (!after(end_seq, tp->high_seq))
140862306a36Sopenharmony_ci					state->flag |= FLAG_ORIG_SACK_ACKED;
140962306a36Sopenharmony_ci				if (state->first_sackt == 0)
141062306a36Sopenharmony_ci					state->first_sackt = xmit_time;
141162306a36Sopenharmony_ci				state->last_sackt = xmit_time;
141262306a36Sopenharmony_ci			}
141362306a36Sopenharmony_ci
141462306a36Sopenharmony_ci			if (sacked & TCPCB_LOST) {
141562306a36Sopenharmony_ci				sacked &= ~TCPCB_LOST;
141662306a36Sopenharmony_ci				tp->lost_out -= pcount;
141762306a36Sopenharmony_ci			}
141862306a36Sopenharmony_ci		}
141962306a36Sopenharmony_ci
142062306a36Sopenharmony_ci		sacked |= TCPCB_SACKED_ACKED;
142162306a36Sopenharmony_ci		state->flag |= FLAG_DATA_SACKED;
142262306a36Sopenharmony_ci		tp->sacked_out += pcount;
142362306a36Sopenharmony_ci		/* Out-of-order packets delivered */
142462306a36Sopenharmony_ci		state->sack_delivered += pcount;
142562306a36Sopenharmony_ci
142662306a36Sopenharmony_ci		/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
142762306a36Sopenharmony_ci		if (tp->lost_skb_hint &&
142862306a36Sopenharmony_ci		    before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
142962306a36Sopenharmony_ci			tp->lost_cnt_hint += pcount;
143062306a36Sopenharmony_ci	}
143162306a36Sopenharmony_ci
143262306a36Sopenharmony_ci	/* D-SACK. We can detect redundant retransmission in S|R and plain R
143362306a36Sopenharmony_ci	 * frames and clear it. undo_retrans is decreased above, L|R frames
143462306a36Sopenharmony_ci	 * are accounted above as well.
143562306a36Sopenharmony_ci	 */
143662306a36Sopenharmony_ci	if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
143762306a36Sopenharmony_ci		sacked &= ~TCPCB_SACKED_RETRANS;
143862306a36Sopenharmony_ci		tp->retrans_out -= pcount;
143962306a36Sopenharmony_ci	}
144062306a36Sopenharmony_ci
144162306a36Sopenharmony_ci	return sacked;
144262306a36Sopenharmony_ci}
144362306a36Sopenharmony_ci
144462306a36Sopenharmony_ci/* Shift newly-SACKed bytes from this skb to the immediately previous
144562306a36Sopenharmony_ci * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
144662306a36Sopenharmony_ci */
144762306a36Sopenharmony_cistatic bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
144862306a36Sopenharmony_ci			    struct sk_buff *skb,
144962306a36Sopenharmony_ci			    struct tcp_sacktag_state *state,
145062306a36Sopenharmony_ci			    unsigned int pcount, int shifted, int mss,
145162306a36Sopenharmony_ci			    bool dup_sack)
145262306a36Sopenharmony_ci{
145362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
145462306a36Sopenharmony_ci	u32 start_seq = TCP_SKB_CB(skb)->seq;	/* start of newly-SACKed */
145562306a36Sopenharmony_ci	u32 end_seq = start_seq + shifted;	/* end of newly-SACKed */
145662306a36Sopenharmony_ci
145762306a36Sopenharmony_ci	BUG_ON(!pcount);
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ci	/* Adjust counters and hints for the newly sacked sequence
146062306a36Sopenharmony_ci	 * range but discard the return value since prev is already
146162306a36Sopenharmony_ci	 * marked. We must tag the range first because the seq
146262306a36Sopenharmony_ci	 * advancement below implicitly advances
146362306a36Sopenharmony_ci	 * tcp_highest_sack_seq() when skb is highest_sack.
146462306a36Sopenharmony_ci	 */
146562306a36Sopenharmony_ci	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
146662306a36Sopenharmony_ci			start_seq, end_seq, dup_sack, pcount,
146762306a36Sopenharmony_ci			tcp_skb_timestamp_us(skb));
146862306a36Sopenharmony_ci	tcp_rate_skb_delivered(sk, skb, state->rate);
146962306a36Sopenharmony_ci
147062306a36Sopenharmony_ci	if (skb == tp->lost_skb_hint)
147162306a36Sopenharmony_ci		tp->lost_cnt_hint += pcount;
147262306a36Sopenharmony_ci
147362306a36Sopenharmony_ci	TCP_SKB_CB(prev)->end_seq += shifted;
147462306a36Sopenharmony_ci	TCP_SKB_CB(skb)->seq += shifted;
147562306a36Sopenharmony_ci
147662306a36Sopenharmony_ci	tcp_skb_pcount_add(prev, pcount);
147762306a36Sopenharmony_ci	WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
147862306a36Sopenharmony_ci	tcp_skb_pcount_add(skb, -pcount);
147962306a36Sopenharmony_ci
148062306a36Sopenharmony_ci	/* When we're adding to gso_segs == 1, gso_size will be zero,
148162306a36Sopenharmony_ci	 * in theory this shouldn't be necessary but as long as DSACK
148262306a36Sopenharmony_ci	 * code can come after this skb later on it's better to keep
148362306a36Sopenharmony_ci	 * setting gso_size to something.
148462306a36Sopenharmony_ci	 */
148562306a36Sopenharmony_ci	if (!TCP_SKB_CB(prev)->tcp_gso_size)
148662306a36Sopenharmony_ci		TCP_SKB_CB(prev)->tcp_gso_size = mss;
148762306a36Sopenharmony_ci
148862306a36Sopenharmony_ci	/* CHECKME: To clear or not to clear? Mimics normal skb currently */
148962306a36Sopenharmony_ci	if (tcp_skb_pcount(skb) <= 1)
149062306a36Sopenharmony_ci		TCP_SKB_CB(skb)->tcp_gso_size = 0;
149162306a36Sopenharmony_ci
149262306a36Sopenharmony_ci	/* Difference in this won't matter, both ACKed by the same cumul. ACK */
149362306a36Sopenharmony_ci	TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
149462306a36Sopenharmony_ci
149562306a36Sopenharmony_ci	if (skb->len > 0) {
149662306a36Sopenharmony_ci		BUG_ON(!tcp_skb_pcount(skb));
149762306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
149862306a36Sopenharmony_ci		return false;
149962306a36Sopenharmony_ci	}
150062306a36Sopenharmony_ci
150162306a36Sopenharmony_ci	/* Whole SKB was eaten :-) */
150262306a36Sopenharmony_ci
150362306a36Sopenharmony_ci	if (skb == tp->retransmit_skb_hint)
150462306a36Sopenharmony_ci		tp->retransmit_skb_hint = prev;
150562306a36Sopenharmony_ci	if (skb == tp->lost_skb_hint) {
150662306a36Sopenharmony_ci		tp->lost_skb_hint = prev;
150762306a36Sopenharmony_ci		tp->lost_cnt_hint -= tcp_skb_pcount(prev);
150862306a36Sopenharmony_ci	}
150962306a36Sopenharmony_ci
151062306a36Sopenharmony_ci	TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
151162306a36Sopenharmony_ci	TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
151262306a36Sopenharmony_ci	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
151362306a36Sopenharmony_ci		TCP_SKB_CB(prev)->end_seq++;
151462306a36Sopenharmony_ci
151562306a36Sopenharmony_ci	if (skb == tcp_highest_sack(sk))
151662306a36Sopenharmony_ci		tcp_advance_highest_sack(sk, skb);
151762306a36Sopenharmony_ci
151862306a36Sopenharmony_ci	tcp_skb_collapse_tstamp(prev, skb);
151962306a36Sopenharmony_ci	if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
152062306a36Sopenharmony_ci		TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
152162306a36Sopenharmony_ci
152262306a36Sopenharmony_ci	tcp_rtx_queue_unlink_and_free(skb, sk);
152362306a36Sopenharmony_ci
152462306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
152562306a36Sopenharmony_ci
152662306a36Sopenharmony_ci	return true;
152762306a36Sopenharmony_ci}
152862306a36Sopenharmony_ci
152962306a36Sopenharmony_ci/* I wish gso_size would have a bit more sane initialization than
153062306a36Sopenharmony_ci * something-or-zero which complicates things
153162306a36Sopenharmony_ci */
153262306a36Sopenharmony_cistatic int tcp_skb_seglen(const struct sk_buff *skb)
153362306a36Sopenharmony_ci{
153462306a36Sopenharmony_ci	return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
153562306a36Sopenharmony_ci}
153662306a36Sopenharmony_ci
153762306a36Sopenharmony_ci/* Shifting pages past head area doesn't work */
153862306a36Sopenharmony_cistatic int skb_can_shift(const struct sk_buff *skb)
153962306a36Sopenharmony_ci{
154062306a36Sopenharmony_ci	return !skb_headlen(skb) && skb_is_nonlinear(skb);
154162306a36Sopenharmony_ci}
154262306a36Sopenharmony_ci
154362306a36Sopenharmony_ciint tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
154462306a36Sopenharmony_ci		  int pcount, int shiftlen)
154562306a36Sopenharmony_ci{
154662306a36Sopenharmony_ci	/* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
154762306a36Sopenharmony_ci	 * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
154862306a36Sopenharmony_ci	 * to make sure not storing more than 65535 * 8 bytes per skb,
154962306a36Sopenharmony_ci	 * even if current MSS is bigger.
155062306a36Sopenharmony_ci	 */
155162306a36Sopenharmony_ci	if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
155262306a36Sopenharmony_ci		return 0;
155362306a36Sopenharmony_ci	if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
155462306a36Sopenharmony_ci		return 0;
155562306a36Sopenharmony_ci	return skb_shift(to, from, shiftlen);
155662306a36Sopenharmony_ci}
155762306a36Sopenharmony_ci
155862306a36Sopenharmony_ci/* Try collapsing SACK blocks spanning across multiple skbs to a single
155962306a36Sopenharmony_ci * skb.
156062306a36Sopenharmony_ci */
156162306a36Sopenharmony_cistatic struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
156262306a36Sopenharmony_ci					  struct tcp_sacktag_state *state,
156362306a36Sopenharmony_ci					  u32 start_seq, u32 end_seq,
156462306a36Sopenharmony_ci					  bool dup_sack)
156562306a36Sopenharmony_ci{
156662306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
156762306a36Sopenharmony_ci	struct sk_buff *prev;
156862306a36Sopenharmony_ci	int mss;
156962306a36Sopenharmony_ci	int pcount = 0;
157062306a36Sopenharmony_ci	int len;
157162306a36Sopenharmony_ci	int in_sack;
157262306a36Sopenharmony_ci
157362306a36Sopenharmony_ci	/* Normally R but no L won't result in plain S */
157462306a36Sopenharmony_ci	if (!dup_sack &&
157562306a36Sopenharmony_ci	    (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
157662306a36Sopenharmony_ci		goto fallback;
157762306a36Sopenharmony_ci	if (!skb_can_shift(skb))
157862306a36Sopenharmony_ci		goto fallback;
157962306a36Sopenharmony_ci	/* This frame is about to be dropped (was ACKed). */
158062306a36Sopenharmony_ci	if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
158162306a36Sopenharmony_ci		goto fallback;
158262306a36Sopenharmony_ci
158362306a36Sopenharmony_ci	/* Can only happen with delayed DSACK + discard craziness */
158462306a36Sopenharmony_ci	prev = skb_rb_prev(skb);
158562306a36Sopenharmony_ci	if (!prev)
158662306a36Sopenharmony_ci		goto fallback;
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_ci	if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
158962306a36Sopenharmony_ci		goto fallback;
159062306a36Sopenharmony_ci
159162306a36Sopenharmony_ci	if (!tcp_skb_can_collapse(prev, skb))
159262306a36Sopenharmony_ci		goto fallback;
159362306a36Sopenharmony_ci
159462306a36Sopenharmony_ci	in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
159562306a36Sopenharmony_ci		  !before(end_seq, TCP_SKB_CB(skb)->end_seq);
159662306a36Sopenharmony_ci
159762306a36Sopenharmony_ci	if (in_sack) {
159862306a36Sopenharmony_ci		len = skb->len;
159962306a36Sopenharmony_ci		pcount = tcp_skb_pcount(skb);
160062306a36Sopenharmony_ci		mss = tcp_skb_seglen(skb);
160162306a36Sopenharmony_ci
160262306a36Sopenharmony_ci		/* TODO: Fix DSACKs to not fragment already SACKed and we can
160362306a36Sopenharmony_ci		 * drop this restriction as unnecessary
160462306a36Sopenharmony_ci		 */
160562306a36Sopenharmony_ci		if (mss != tcp_skb_seglen(prev))
160662306a36Sopenharmony_ci			goto fallback;
160762306a36Sopenharmony_ci	} else {
160862306a36Sopenharmony_ci		if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
160962306a36Sopenharmony_ci			goto noop;
161062306a36Sopenharmony_ci		/* CHECKME: This is non-MSS split case only?, this will
161162306a36Sopenharmony_ci		 * cause skipped skbs due to advancing loop btw, original
161262306a36Sopenharmony_ci		 * has that feature too
161362306a36Sopenharmony_ci		 */
161462306a36Sopenharmony_ci		if (tcp_skb_pcount(skb) <= 1)
161562306a36Sopenharmony_ci			goto noop;
161662306a36Sopenharmony_ci
161762306a36Sopenharmony_ci		in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
161862306a36Sopenharmony_ci		if (!in_sack) {
161962306a36Sopenharmony_ci			/* TODO: head merge to next could be attempted here
162062306a36Sopenharmony_ci			 * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)),
162162306a36Sopenharmony_ci			 * though it might not be worth of the additional hassle
162262306a36Sopenharmony_ci			 *
162362306a36Sopenharmony_ci			 * ...we can probably just fallback to what was done
162462306a36Sopenharmony_ci			 * previously. We could try merging non-SACKed ones
162562306a36Sopenharmony_ci			 * as well but it probably isn't going to buy off
162662306a36Sopenharmony_ci			 * because later SACKs might again split them, and
162762306a36Sopenharmony_ci			 * it would make skb timestamp tracking considerably
162862306a36Sopenharmony_ci			 * harder problem.
162962306a36Sopenharmony_ci			 */
163062306a36Sopenharmony_ci			goto fallback;
163162306a36Sopenharmony_ci		}
163262306a36Sopenharmony_ci
163362306a36Sopenharmony_ci		len = end_seq - TCP_SKB_CB(skb)->seq;
163462306a36Sopenharmony_ci		BUG_ON(len < 0);
163562306a36Sopenharmony_ci		BUG_ON(len > skb->len);
163662306a36Sopenharmony_ci
163762306a36Sopenharmony_ci		/* MSS boundaries should be honoured or else pcount will
163862306a36Sopenharmony_ci		 * severely break even though it makes things bit trickier.
163962306a36Sopenharmony_ci		 * Optimize common case to avoid most of the divides
164062306a36Sopenharmony_ci		 */
164162306a36Sopenharmony_ci		mss = tcp_skb_mss(skb);
164262306a36Sopenharmony_ci
164362306a36Sopenharmony_ci		/* TODO: Fix DSACKs to not fragment already SACKed and we can
164462306a36Sopenharmony_ci		 * drop this restriction as unnecessary
164562306a36Sopenharmony_ci		 */
164662306a36Sopenharmony_ci		if (mss != tcp_skb_seglen(prev))
164762306a36Sopenharmony_ci			goto fallback;
164862306a36Sopenharmony_ci
164962306a36Sopenharmony_ci		if (len == mss) {
165062306a36Sopenharmony_ci			pcount = 1;
165162306a36Sopenharmony_ci		} else if (len < mss) {
165262306a36Sopenharmony_ci			goto noop;
165362306a36Sopenharmony_ci		} else {
165462306a36Sopenharmony_ci			pcount = len / mss;
165562306a36Sopenharmony_ci			len = pcount * mss;
165662306a36Sopenharmony_ci		}
165762306a36Sopenharmony_ci	}
165862306a36Sopenharmony_ci
165962306a36Sopenharmony_ci	/* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
166062306a36Sopenharmony_ci	if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
166162306a36Sopenharmony_ci		goto fallback;
166262306a36Sopenharmony_ci
166362306a36Sopenharmony_ci	if (!tcp_skb_shift(prev, skb, pcount, len))
166462306a36Sopenharmony_ci		goto fallback;
166562306a36Sopenharmony_ci	if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
166662306a36Sopenharmony_ci		goto out;
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_ci	/* Hole filled allows collapsing with the next as well, this is very
166962306a36Sopenharmony_ci	 * useful when hole on every nth skb pattern happens
167062306a36Sopenharmony_ci	 */
167162306a36Sopenharmony_ci	skb = skb_rb_next(prev);
167262306a36Sopenharmony_ci	if (!skb)
167362306a36Sopenharmony_ci		goto out;
167462306a36Sopenharmony_ci
167562306a36Sopenharmony_ci	if (!skb_can_shift(skb) ||
167662306a36Sopenharmony_ci	    ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
167762306a36Sopenharmony_ci	    (mss != tcp_skb_seglen(skb)))
167862306a36Sopenharmony_ci		goto out;
167962306a36Sopenharmony_ci
168062306a36Sopenharmony_ci	if (!tcp_skb_can_collapse(prev, skb))
168162306a36Sopenharmony_ci		goto out;
168262306a36Sopenharmony_ci	len = skb->len;
168362306a36Sopenharmony_ci	pcount = tcp_skb_pcount(skb);
168462306a36Sopenharmony_ci	if (tcp_skb_shift(prev, skb, pcount, len))
168562306a36Sopenharmony_ci		tcp_shifted_skb(sk, prev, skb, state, pcount,
168662306a36Sopenharmony_ci				len, mss, 0);
168762306a36Sopenharmony_ci
168862306a36Sopenharmony_ciout:
168962306a36Sopenharmony_ci	return prev;
169062306a36Sopenharmony_ci
169162306a36Sopenharmony_cinoop:
169262306a36Sopenharmony_ci	return skb;
169362306a36Sopenharmony_ci
169462306a36Sopenharmony_cifallback:
169562306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
169662306a36Sopenharmony_ci	return NULL;
169762306a36Sopenharmony_ci}
169862306a36Sopenharmony_ci
169962306a36Sopenharmony_cistatic struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
170062306a36Sopenharmony_ci					struct tcp_sack_block *next_dup,
170162306a36Sopenharmony_ci					struct tcp_sacktag_state *state,
170262306a36Sopenharmony_ci					u32 start_seq, u32 end_seq,
170362306a36Sopenharmony_ci					bool dup_sack_in)
170462306a36Sopenharmony_ci{
170562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
170662306a36Sopenharmony_ci	struct sk_buff *tmp;
170762306a36Sopenharmony_ci
170862306a36Sopenharmony_ci	skb_rbtree_walk_from(skb) {
170962306a36Sopenharmony_ci		int in_sack = 0;
171062306a36Sopenharmony_ci		bool dup_sack = dup_sack_in;
171162306a36Sopenharmony_ci
171262306a36Sopenharmony_ci		/* queue is in-order => we can short-circuit the walk early */
171362306a36Sopenharmony_ci		if (!before(TCP_SKB_CB(skb)->seq, end_seq))
171462306a36Sopenharmony_ci			break;
171562306a36Sopenharmony_ci
171662306a36Sopenharmony_ci		if (next_dup  &&
171762306a36Sopenharmony_ci		    before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
171862306a36Sopenharmony_ci			in_sack = tcp_match_skb_to_sack(sk, skb,
171962306a36Sopenharmony_ci							next_dup->start_seq,
172062306a36Sopenharmony_ci							next_dup->end_seq);
172162306a36Sopenharmony_ci			if (in_sack > 0)
172262306a36Sopenharmony_ci				dup_sack = true;
172362306a36Sopenharmony_ci		}
172462306a36Sopenharmony_ci
172562306a36Sopenharmony_ci		/* skb reference here is a bit tricky to get right, since
172662306a36Sopenharmony_ci		 * shifting can eat and free both this skb and the next,
172762306a36Sopenharmony_ci		 * so not even _safe variant of the loop is enough.
172862306a36Sopenharmony_ci		 */
172962306a36Sopenharmony_ci		if (in_sack <= 0) {
173062306a36Sopenharmony_ci			tmp = tcp_shift_skb_data(sk, skb, state,
173162306a36Sopenharmony_ci						 start_seq, end_seq, dup_sack);
173262306a36Sopenharmony_ci			if (tmp) {
173362306a36Sopenharmony_ci				if (tmp != skb) {
173462306a36Sopenharmony_ci					skb = tmp;
173562306a36Sopenharmony_ci					continue;
173662306a36Sopenharmony_ci				}
173762306a36Sopenharmony_ci
173862306a36Sopenharmony_ci				in_sack = 0;
173962306a36Sopenharmony_ci			} else {
174062306a36Sopenharmony_ci				in_sack = tcp_match_skb_to_sack(sk, skb,
174162306a36Sopenharmony_ci								start_seq,
174262306a36Sopenharmony_ci								end_seq);
174362306a36Sopenharmony_ci			}
174462306a36Sopenharmony_ci		}
174562306a36Sopenharmony_ci
174662306a36Sopenharmony_ci		if (unlikely(in_sack < 0))
174762306a36Sopenharmony_ci			break;
174862306a36Sopenharmony_ci
174962306a36Sopenharmony_ci		if (in_sack) {
175062306a36Sopenharmony_ci			TCP_SKB_CB(skb)->sacked =
175162306a36Sopenharmony_ci				tcp_sacktag_one(sk,
175262306a36Sopenharmony_ci						state,
175362306a36Sopenharmony_ci						TCP_SKB_CB(skb)->sacked,
175462306a36Sopenharmony_ci						TCP_SKB_CB(skb)->seq,
175562306a36Sopenharmony_ci						TCP_SKB_CB(skb)->end_seq,
175662306a36Sopenharmony_ci						dup_sack,
175762306a36Sopenharmony_ci						tcp_skb_pcount(skb),
175862306a36Sopenharmony_ci						tcp_skb_timestamp_us(skb));
175962306a36Sopenharmony_ci			tcp_rate_skb_delivered(sk, skb, state->rate);
176062306a36Sopenharmony_ci			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
176162306a36Sopenharmony_ci				list_del_init(&skb->tcp_tsorted_anchor);
176262306a36Sopenharmony_ci
176362306a36Sopenharmony_ci			if (!before(TCP_SKB_CB(skb)->seq,
176462306a36Sopenharmony_ci				    tcp_highest_sack_seq(tp)))
176562306a36Sopenharmony_ci				tcp_advance_highest_sack(sk, skb);
176662306a36Sopenharmony_ci		}
176762306a36Sopenharmony_ci	}
176862306a36Sopenharmony_ci	return skb;
176962306a36Sopenharmony_ci}
177062306a36Sopenharmony_ci
177162306a36Sopenharmony_cistatic struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
177262306a36Sopenharmony_ci{
177362306a36Sopenharmony_ci	struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
177462306a36Sopenharmony_ci	struct sk_buff *skb;
177562306a36Sopenharmony_ci
177662306a36Sopenharmony_ci	while (*p) {
177762306a36Sopenharmony_ci		parent = *p;
177862306a36Sopenharmony_ci		skb = rb_to_skb(parent);
177962306a36Sopenharmony_ci		if (before(seq, TCP_SKB_CB(skb)->seq)) {
178062306a36Sopenharmony_ci			p = &parent->rb_left;
178162306a36Sopenharmony_ci			continue;
178262306a36Sopenharmony_ci		}
178362306a36Sopenharmony_ci		if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
178462306a36Sopenharmony_ci			p = &parent->rb_right;
178562306a36Sopenharmony_ci			continue;
178662306a36Sopenharmony_ci		}
178762306a36Sopenharmony_ci		return skb;
178862306a36Sopenharmony_ci	}
178962306a36Sopenharmony_ci	return NULL;
179062306a36Sopenharmony_ci}
179162306a36Sopenharmony_ci
179262306a36Sopenharmony_cistatic struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
179362306a36Sopenharmony_ci					u32 skip_to_seq)
179462306a36Sopenharmony_ci{
179562306a36Sopenharmony_ci	if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
179662306a36Sopenharmony_ci		return skb;
179762306a36Sopenharmony_ci
179862306a36Sopenharmony_ci	return tcp_sacktag_bsearch(sk, skip_to_seq);
179962306a36Sopenharmony_ci}
180062306a36Sopenharmony_ci
180162306a36Sopenharmony_cistatic struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
180262306a36Sopenharmony_ci						struct sock *sk,
180362306a36Sopenharmony_ci						struct tcp_sack_block *next_dup,
180462306a36Sopenharmony_ci						struct tcp_sacktag_state *state,
180562306a36Sopenharmony_ci						u32 skip_to_seq)
180662306a36Sopenharmony_ci{
180762306a36Sopenharmony_ci	if (!next_dup)
180862306a36Sopenharmony_ci		return skb;
180962306a36Sopenharmony_ci
181062306a36Sopenharmony_ci	if (before(next_dup->start_seq, skip_to_seq)) {
181162306a36Sopenharmony_ci		skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
181262306a36Sopenharmony_ci		skb = tcp_sacktag_walk(skb, sk, NULL, state,
181362306a36Sopenharmony_ci				       next_dup->start_seq, next_dup->end_seq,
181462306a36Sopenharmony_ci				       1);
181562306a36Sopenharmony_ci	}
181662306a36Sopenharmony_ci
181762306a36Sopenharmony_ci	return skb;
181862306a36Sopenharmony_ci}
181962306a36Sopenharmony_ci
182062306a36Sopenharmony_cistatic int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
182162306a36Sopenharmony_ci{
182262306a36Sopenharmony_ci	return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
182362306a36Sopenharmony_ci}
182462306a36Sopenharmony_ci
182562306a36Sopenharmony_cistatic int
182662306a36Sopenharmony_citcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
182762306a36Sopenharmony_ci			u32 prior_snd_una, struct tcp_sacktag_state *state)
182862306a36Sopenharmony_ci{
182962306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
183062306a36Sopenharmony_ci	const unsigned char *ptr = (skb_transport_header(ack_skb) +
183162306a36Sopenharmony_ci				    TCP_SKB_CB(ack_skb)->sacked);
183262306a36Sopenharmony_ci	struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
183362306a36Sopenharmony_ci	struct tcp_sack_block sp[TCP_NUM_SACKS];
183462306a36Sopenharmony_ci	struct tcp_sack_block *cache;
183562306a36Sopenharmony_ci	struct sk_buff *skb;
183662306a36Sopenharmony_ci	int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
183762306a36Sopenharmony_ci	int used_sacks;
183862306a36Sopenharmony_ci	bool found_dup_sack = false;
183962306a36Sopenharmony_ci	int i, j;
184062306a36Sopenharmony_ci	int first_sack_index;
184162306a36Sopenharmony_ci
184262306a36Sopenharmony_ci	state->flag = 0;
184362306a36Sopenharmony_ci	state->reord = tp->snd_nxt;
184462306a36Sopenharmony_ci
184562306a36Sopenharmony_ci	if (!tp->sacked_out)
184662306a36Sopenharmony_ci		tcp_highest_sack_reset(sk);
184762306a36Sopenharmony_ci
184862306a36Sopenharmony_ci	found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
184962306a36Sopenharmony_ci					 num_sacks, prior_snd_una, state);
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_ci	/* Eliminate too old ACKs, but take into
185262306a36Sopenharmony_ci	 * account more or less fresh ones, they can
185362306a36Sopenharmony_ci	 * contain valid SACK info.
185462306a36Sopenharmony_ci	 */
185562306a36Sopenharmony_ci	if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
185662306a36Sopenharmony_ci		return 0;
185762306a36Sopenharmony_ci
185862306a36Sopenharmony_ci	if (!tp->packets_out)
185962306a36Sopenharmony_ci		goto out;
186062306a36Sopenharmony_ci
186162306a36Sopenharmony_ci	used_sacks = 0;
186262306a36Sopenharmony_ci	first_sack_index = 0;
186362306a36Sopenharmony_ci	for (i = 0; i < num_sacks; i++) {
186462306a36Sopenharmony_ci		bool dup_sack = !i && found_dup_sack;
186562306a36Sopenharmony_ci
186662306a36Sopenharmony_ci		sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
186762306a36Sopenharmony_ci		sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
186862306a36Sopenharmony_ci
186962306a36Sopenharmony_ci		if (!tcp_is_sackblock_valid(tp, dup_sack,
187062306a36Sopenharmony_ci					    sp[used_sacks].start_seq,
187162306a36Sopenharmony_ci					    sp[used_sacks].end_seq)) {
187262306a36Sopenharmony_ci			int mib_idx;
187362306a36Sopenharmony_ci
187462306a36Sopenharmony_ci			if (dup_sack) {
187562306a36Sopenharmony_ci				if (!tp->undo_marker)
187662306a36Sopenharmony_ci					mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
187762306a36Sopenharmony_ci				else
187862306a36Sopenharmony_ci					mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
187962306a36Sopenharmony_ci			} else {
188062306a36Sopenharmony_ci				/* Don't count olds caused by ACK reordering */
188162306a36Sopenharmony_ci				if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
188262306a36Sopenharmony_ci				    !after(sp[used_sacks].end_seq, tp->snd_una))
188362306a36Sopenharmony_ci					continue;
188462306a36Sopenharmony_ci				mib_idx = LINUX_MIB_TCPSACKDISCARD;
188562306a36Sopenharmony_ci			}
188662306a36Sopenharmony_ci
188762306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk), mib_idx);
188862306a36Sopenharmony_ci			if (i == 0)
188962306a36Sopenharmony_ci				first_sack_index = -1;
189062306a36Sopenharmony_ci			continue;
189162306a36Sopenharmony_ci		}
189262306a36Sopenharmony_ci
189362306a36Sopenharmony_ci		/* Ignore very old stuff early */
189462306a36Sopenharmony_ci		if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
189562306a36Sopenharmony_ci			if (i == 0)
189662306a36Sopenharmony_ci				first_sack_index = -1;
189762306a36Sopenharmony_ci			continue;
189862306a36Sopenharmony_ci		}
189962306a36Sopenharmony_ci
190062306a36Sopenharmony_ci		used_sacks++;
190162306a36Sopenharmony_ci	}
190262306a36Sopenharmony_ci
190362306a36Sopenharmony_ci	/* order SACK blocks to allow in order walk of the retrans queue */
190462306a36Sopenharmony_ci	for (i = used_sacks - 1; i > 0; i--) {
190562306a36Sopenharmony_ci		for (j = 0; j < i; j++) {
190662306a36Sopenharmony_ci			if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
190762306a36Sopenharmony_ci				swap(sp[j], sp[j + 1]);
190862306a36Sopenharmony_ci
190962306a36Sopenharmony_ci				/* Track where the first SACK block goes to */
191062306a36Sopenharmony_ci				if (j == first_sack_index)
191162306a36Sopenharmony_ci					first_sack_index = j + 1;
191262306a36Sopenharmony_ci			}
191362306a36Sopenharmony_ci		}
191462306a36Sopenharmony_ci	}
191562306a36Sopenharmony_ci
191662306a36Sopenharmony_ci	state->mss_now = tcp_current_mss(sk);
191762306a36Sopenharmony_ci	skb = NULL;
191862306a36Sopenharmony_ci	i = 0;
191962306a36Sopenharmony_ci
192062306a36Sopenharmony_ci	if (!tp->sacked_out) {
192162306a36Sopenharmony_ci		/* It's already past, so skip checking against it */
192262306a36Sopenharmony_ci		cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
192362306a36Sopenharmony_ci	} else {
192462306a36Sopenharmony_ci		cache = tp->recv_sack_cache;
192562306a36Sopenharmony_ci		/* Skip empty blocks in at head of the cache */
192662306a36Sopenharmony_ci		while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
192762306a36Sopenharmony_ci		       !cache->end_seq)
192862306a36Sopenharmony_ci			cache++;
192962306a36Sopenharmony_ci	}
193062306a36Sopenharmony_ci
193162306a36Sopenharmony_ci	while (i < used_sacks) {
193262306a36Sopenharmony_ci		u32 start_seq = sp[i].start_seq;
193362306a36Sopenharmony_ci		u32 end_seq = sp[i].end_seq;
193462306a36Sopenharmony_ci		bool dup_sack = (found_dup_sack && (i == first_sack_index));
193562306a36Sopenharmony_ci		struct tcp_sack_block *next_dup = NULL;
193662306a36Sopenharmony_ci
193762306a36Sopenharmony_ci		if (found_dup_sack && ((i + 1) == first_sack_index))
193862306a36Sopenharmony_ci			next_dup = &sp[i + 1];
193962306a36Sopenharmony_ci
194062306a36Sopenharmony_ci		/* Skip too early cached blocks */
194162306a36Sopenharmony_ci		while (tcp_sack_cache_ok(tp, cache) &&
194262306a36Sopenharmony_ci		       !before(start_seq, cache->end_seq))
194362306a36Sopenharmony_ci			cache++;
194462306a36Sopenharmony_ci
194562306a36Sopenharmony_ci		/* Can skip some work by looking recv_sack_cache? */
194662306a36Sopenharmony_ci		if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
194762306a36Sopenharmony_ci		    after(end_seq, cache->start_seq)) {
194862306a36Sopenharmony_ci
194962306a36Sopenharmony_ci			/* Head todo? */
195062306a36Sopenharmony_ci			if (before(start_seq, cache->start_seq)) {
195162306a36Sopenharmony_ci				skb = tcp_sacktag_skip(skb, sk, start_seq);
195262306a36Sopenharmony_ci				skb = tcp_sacktag_walk(skb, sk, next_dup,
195362306a36Sopenharmony_ci						       state,
195462306a36Sopenharmony_ci						       start_seq,
195562306a36Sopenharmony_ci						       cache->start_seq,
195662306a36Sopenharmony_ci						       dup_sack);
195762306a36Sopenharmony_ci			}
195862306a36Sopenharmony_ci
195962306a36Sopenharmony_ci			/* Rest of the block already fully processed? */
196062306a36Sopenharmony_ci			if (!after(end_seq, cache->end_seq))
196162306a36Sopenharmony_ci				goto advance_sp;
196262306a36Sopenharmony_ci
196362306a36Sopenharmony_ci			skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
196462306a36Sopenharmony_ci						       state,
196562306a36Sopenharmony_ci						       cache->end_seq);
196662306a36Sopenharmony_ci
196762306a36Sopenharmony_ci			/* ...tail remains todo... */
196862306a36Sopenharmony_ci			if (tcp_highest_sack_seq(tp) == cache->end_seq) {
196962306a36Sopenharmony_ci				/* ...but better entrypoint exists! */
197062306a36Sopenharmony_ci				skb = tcp_highest_sack(sk);
197162306a36Sopenharmony_ci				if (!skb)
197262306a36Sopenharmony_ci					break;
197362306a36Sopenharmony_ci				cache++;
197462306a36Sopenharmony_ci				goto walk;
197562306a36Sopenharmony_ci			}
197662306a36Sopenharmony_ci
197762306a36Sopenharmony_ci			skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
197862306a36Sopenharmony_ci			/* Check overlap against next cached too (past this one already) */
197962306a36Sopenharmony_ci			cache++;
198062306a36Sopenharmony_ci			continue;
198162306a36Sopenharmony_ci		}
198262306a36Sopenharmony_ci
198362306a36Sopenharmony_ci		if (!before(start_seq, tcp_highest_sack_seq(tp))) {
198462306a36Sopenharmony_ci			skb = tcp_highest_sack(sk);
198562306a36Sopenharmony_ci			if (!skb)
198662306a36Sopenharmony_ci				break;
198762306a36Sopenharmony_ci		}
198862306a36Sopenharmony_ci		skb = tcp_sacktag_skip(skb, sk, start_seq);
198962306a36Sopenharmony_ci
199062306a36Sopenharmony_ciwalk:
199162306a36Sopenharmony_ci		skb = tcp_sacktag_walk(skb, sk, next_dup, state,
199262306a36Sopenharmony_ci				       start_seq, end_seq, dup_sack);
199362306a36Sopenharmony_ci
199462306a36Sopenharmony_ciadvance_sp:
199562306a36Sopenharmony_ci		i++;
199662306a36Sopenharmony_ci	}
199762306a36Sopenharmony_ci
199862306a36Sopenharmony_ci	/* Clear the head of the cache sack blocks so we can skip it next time */
199962306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
200062306a36Sopenharmony_ci		tp->recv_sack_cache[i].start_seq = 0;
200162306a36Sopenharmony_ci		tp->recv_sack_cache[i].end_seq = 0;
200262306a36Sopenharmony_ci	}
200362306a36Sopenharmony_ci	for (j = 0; j < used_sacks; j++)
200462306a36Sopenharmony_ci		tp->recv_sack_cache[i++] = sp[j];
200562306a36Sopenharmony_ci
200662306a36Sopenharmony_ci	if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
200762306a36Sopenharmony_ci		tcp_check_sack_reordering(sk, state->reord, 0);
200862306a36Sopenharmony_ci
200962306a36Sopenharmony_ci	tcp_verify_left_out(tp);
201062306a36Sopenharmony_ciout:
201162306a36Sopenharmony_ci
201262306a36Sopenharmony_ci#if FASTRETRANS_DEBUG > 0
201362306a36Sopenharmony_ci	WARN_ON((int)tp->sacked_out < 0);
201462306a36Sopenharmony_ci	WARN_ON((int)tp->lost_out < 0);
201562306a36Sopenharmony_ci	WARN_ON((int)tp->retrans_out < 0);
201662306a36Sopenharmony_ci	WARN_ON((int)tcp_packets_in_flight(tp) < 0);
201762306a36Sopenharmony_ci#endif
201862306a36Sopenharmony_ci	return state->flag;
201962306a36Sopenharmony_ci}
202062306a36Sopenharmony_ci
202162306a36Sopenharmony_ci/* Limits sacked_out so that sum with lost_out isn't ever larger than
202262306a36Sopenharmony_ci * packets_out. Returns false if sacked_out adjustement wasn't necessary.
202362306a36Sopenharmony_ci */
202462306a36Sopenharmony_cistatic bool tcp_limit_reno_sacked(struct tcp_sock *tp)
202562306a36Sopenharmony_ci{
202662306a36Sopenharmony_ci	u32 holes;
202762306a36Sopenharmony_ci
202862306a36Sopenharmony_ci	holes = max(tp->lost_out, 1U);
202962306a36Sopenharmony_ci	holes = min(holes, tp->packets_out);
203062306a36Sopenharmony_ci
203162306a36Sopenharmony_ci	if ((tp->sacked_out + holes) > tp->packets_out) {
203262306a36Sopenharmony_ci		tp->sacked_out = tp->packets_out - holes;
203362306a36Sopenharmony_ci		return true;
203462306a36Sopenharmony_ci	}
203562306a36Sopenharmony_ci	return false;
203662306a36Sopenharmony_ci}
203762306a36Sopenharmony_ci
203862306a36Sopenharmony_ci/* If we receive more dupacks than we expected counting segments
203962306a36Sopenharmony_ci * in assumption of absent reordering, interpret this as reordering.
204062306a36Sopenharmony_ci * The only another reason could be bug in receiver TCP.
204162306a36Sopenharmony_ci */
204262306a36Sopenharmony_cistatic void tcp_check_reno_reordering(struct sock *sk, const int addend)
204362306a36Sopenharmony_ci{
204462306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
204562306a36Sopenharmony_ci
204662306a36Sopenharmony_ci	if (!tcp_limit_reno_sacked(tp))
204762306a36Sopenharmony_ci		return;
204862306a36Sopenharmony_ci
204962306a36Sopenharmony_ci	tp->reordering = min_t(u32, tp->packets_out + addend,
205062306a36Sopenharmony_ci			       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
205162306a36Sopenharmony_ci	tp->reord_seen++;
205262306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
205362306a36Sopenharmony_ci}
205462306a36Sopenharmony_ci
205562306a36Sopenharmony_ci/* Emulate SACKs for SACKless connection: account for a new dupack. */
205662306a36Sopenharmony_ci
205762306a36Sopenharmony_cistatic void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
205862306a36Sopenharmony_ci{
205962306a36Sopenharmony_ci	if (num_dupack) {
206062306a36Sopenharmony_ci		struct tcp_sock *tp = tcp_sk(sk);
206162306a36Sopenharmony_ci		u32 prior_sacked = tp->sacked_out;
206262306a36Sopenharmony_ci		s32 delivered;
206362306a36Sopenharmony_ci
206462306a36Sopenharmony_ci		tp->sacked_out += num_dupack;
206562306a36Sopenharmony_ci		tcp_check_reno_reordering(sk, 0);
206662306a36Sopenharmony_ci		delivered = tp->sacked_out - prior_sacked;
206762306a36Sopenharmony_ci		if (delivered > 0)
206862306a36Sopenharmony_ci			tcp_count_delivered(tp, delivered, ece_ack);
206962306a36Sopenharmony_ci		tcp_verify_left_out(tp);
207062306a36Sopenharmony_ci	}
207162306a36Sopenharmony_ci}
207262306a36Sopenharmony_ci
207362306a36Sopenharmony_ci/* Account for ACK, ACKing some data in Reno Recovery phase. */
207462306a36Sopenharmony_ci
207562306a36Sopenharmony_cistatic void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
207662306a36Sopenharmony_ci{
207762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
207862306a36Sopenharmony_ci
207962306a36Sopenharmony_ci	if (acked > 0) {
208062306a36Sopenharmony_ci		/* One ACK acked hole. The rest eat duplicate ACKs. */
208162306a36Sopenharmony_ci		tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1),
208262306a36Sopenharmony_ci				    ece_ack);
208362306a36Sopenharmony_ci		if (acked - 1 >= tp->sacked_out)
208462306a36Sopenharmony_ci			tp->sacked_out = 0;
208562306a36Sopenharmony_ci		else
208662306a36Sopenharmony_ci			tp->sacked_out -= acked - 1;
208762306a36Sopenharmony_ci	}
208862306a36Sopenharmony_ci	tcp_check_reno_reordering(sk, acked);
208962306a36Sopenharmony_ci	tcp_verify_left_out(tp);
209062306a36Sopenharmony_ci}
209162306a36Sopenharmony_ci
209262306a36Sopenharmony_cistatic inline void tcp_reset_reno_sack(struct tcp_sock *tp)
209362306a36Sopenharmony_ci{
209462306a36Sopenharmony_ci	tp->sacked_out = 0;
209562306a36Sopenharmony_ci}
209662306a36Sopenharmony_ci
209762306a36Sopenharmony_civoid tcp_clear_retrans(struct tcp_sock *tp)
209862306a36Sopenharmony_ci{
209962306a36Sopenharmony_ci	tp->retrans_out = 0;
210062306a36Sopenharmony_ci	tp->lost_out = 0;
210162306a36Sopenharmony_ci	tp->undo_marker = 0;
210262306a36Sopenharmony_ci	tp->undo_retrans = -1;
210362306a36Sopenharmony_ci	tp->sacked_out = 0;
210462306a36Sopenharmony_ci}
210562306a36Sopenharmony_ci
210662306a36Sopenharmony_cistatic inline void tcp_init_undo(struct tcp_sock *tp)
210762306a36Sopenharmony_ci{
210862306a36Sopenharmony_ci	tp->undo_marker = tp->snd_una;
210962306a36Sopenharmony_ci	/* Retransmission still in flight may cause DSACKs later. */
211062306a36Sopenharmony_ci	tp->undo_retrans = tp->retrans_out ? : -1;
211162306a36Sopenharmony_ci}
211262306a36Sopenharmony_ci
211362306a36Sopenharmony_cistatic bool tcp_is_rack(const struct sock *sk)
211462306a36Sopenharmony_ci{
211562306a36Sopenharmony_ci	return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
211662306a36Sopenharmony_ci		TCP_RACK_LOSS_DETECTION;
211762306a36Sopenharmony_ci}
211862306a36Sopenharmony_ci
211962306a36Sopenharmony_ci/* If we detect SACK reneging, forget all SACK information
212062306a36Sopenharmony_ci * and reset tags completely, otherwise preserve SACKs. If receiver
212162306a36Sopenharmony_ci * dropped its ofo queue, we will know this due to reneging detection.
212262306a36Sopenharmony_ci */
212362306a36Sopenharmony_cistatic void tcp_timeout_mark_lost(struct sock *sk)
212462306a36Sopenharmony_ci{
212562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
212662306a36Sopenharmony_ci	struct sk_buff *skb, *head;
212762306a36Sopenharmony_ci	bool is_reneg;			/* is receiver reneging on SACKs? */
212862306a36Sopenharmony_ci
212962306a36Sopenharmony_ci	head = tcp_rtx_queue_head(sk);
213062306a36Sopenharmony_ci	is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
213162306a36Sopenharmony_ci	if (is_reneg) {
213262306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
213362306a36Sopenharmony_ci		tp->sacked_out = 0;
213462306a36Sopenharmony_ci		/* Mark SACK reneging until we recover from this loss event. */
213562306a36Sopenharmony_ci		tp->is_sack_reneg = 1;
213662306a36Sopenharmony_ci	} else if (tcp_is_reno(tp)) {
213762306a36Sopenharmony_ci		tcp_reset_reno_sack(tp);
213862306a36Sopenharmony_ci	}
213962306a36Sopenharmony_ci
214062306a36Sopenharmony_ci	skb = head;
214162306a36Sopenharmony_ci	skb_rbtree_walk_from(skb) {
214262306a36Sopenharmony_ci		if (is_reneg)
214362306a36Sopenharmony_ci			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
214462306a36Sopenharmony_ci		else if (tcp_is_rack(sk) && skb != head &&
214562306a36Sopenharmony_ci			 tcp_rack_skb_timeout(tp, skb, 0) > 0)
214662306a36Sopenharmony_ci			continue; /* Don't mark recently sent ones lost yet */
214762306a36Sopenharmony_ci		tcp_mark_skb_lost(sk, skb);
214862306a36Sopenharmony_ci	}
214962306a36Sopenharmony_ci	tcp_verify_left_out(tp);
215062306a36Sopenharmony_ci	tcp_clear_all_retrans_hints(tp);
215162306a36Sopenharmony_ci}
215262306a36Sopenharmony_ci
215362306a36Sopenharmony_ci/* Enter Loss state. */
215462306a36Sopenharmony_civoid tcp_enter_loss(struct sock *sk)
215562306a36Sopenharmony_ci{
215662306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
215762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
215862306a36Sopenharmony_ci	struct net *net = sock_net(sk);
215962306a36Sopenharmony_ci	bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
216062306a36Sopenharmony_ci	u8 reordering;
216162306a36Sopenharmony_ci
216262306a36Sopenharmony_ci	tcp_timeout_mark_lost(sk);
216362306a36Sopenharmony_ci
216462306a36Sopenharmony_ci	/* Reduce ssthresh if it has not yet been made inside this window. */
216562306a36Sopenharmony_ci	if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
216662306a36Sopenharmony_ci	    !after(tp->high_seq, tp->snd_una) ||
216762306a36Sopenharmony_ci	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
216862306a36Sopenharmony_ci		tp->prior_ssthresh = tcp_current_ssthresh(sk);
216962306a36Sopenharmony_ci		tp->prior_cwnd = tcp_snd_cwnd(tp);
217062306a36Sopenharmony_ci		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
217162306a36Sopenharmony_ci		tcp_ca_event(sk, CA_EVENT_LOSS);
217262306a36Sopenharmony_ci		tcp_init_undo(tp);
217362306a36Sopenharmony_ci	}
217462306a36Sopenharmony_ci	tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + 1);
217562306a36Sopenharmony_ci	tp->snd_cwnd_cnt   = 0;
217662306a36Sopenharmony_ci	tp->snd_cwnd_stamp = tcp_jiffies32;
217762306a36Sopenharmony_ci
217862306a36Sopenharmony_ci	/* Timeout in disordered state after receiving substantial DUPACKs
217962306a36Sopenharmony_ci	 * suggests that the degree of reordering is over-estimated.
218062306a36Sopenharmony_ci	 */
218162306a36Sopenharmony_ci	reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
218262306a36Sopenharmony_ci	if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
218362306a36Sopenharmony_ci	    tp->sacked_out >= reordering)
218462306a36Sopenharmony_ci		tp->reordering = min_t(unsigned int, tp->reordering,
218562306a36Sopenharmony_ci				       reordering);
218662306a36Sopenharmony_ci
218762306a36Sopenharmony_ci	tcp_set_ca_state(sk, TCP_CA_Loss);
218862306a36Sopenharmony_ci	tp->high_seq = tp->snd_nxt;
218962306a36Sopenharmony_ci	tcp_ecn_queue_cwr(tp);
219062306a36Sopenharmony_ci
219162306a36Sopenharmony_ci	/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
219262306a36Sopenharmony_ci	 * loss recovery is underway except recurring timeout(s) on
219362306a36Sopenharmony_ci	 * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
219462306a36Sopenharmony_ci	 */
219562306a36Sopenharmony_ci	tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
219662306a36Sopenharmony_ci		   (new_recovery || icsk->icsk_retransmits) &&
219762306a36Sopenharmony_ci		   !inet_csk(sk)->icsk_mtup.probe_size;
219862306a36Sopenharmony_ci}
219962306a36Sopenharmony_ci
220062306a36Sopenharmony_ci/* If ACK arrived pointing to a remembered SACK, it means that our
220162306a36Sopenharmony_ci * remembered SACKs do not reflect real state of receiver i.e.
220262306a36Sopenharmony_ci * receiver _host_ is heavily congested (or buggy).
220362306a36Sopenharmony_ci *
220462306a36Sopenharmony_ci * To avoid big spurious retransmission bursts due to transient SACK
220562306a36Sopenharmony_ci * scoreboard oddities that look like reneging, we give the receiver a
220662306a36Sopenharmony_ci * little time (max(RTT/2, 10ms)) to send us some more ACKs that will
220762306a36Sopenharmony_ci * restore sanity to the SACK scoreboard. If the apparent reneging
220862306a36Sopenharmony_ci * persists until this RTO then we'll clear the SACK scoreboard.
220962306a36Sopenharmony_ci */
221062306a36Sopenharmony_cistatic bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag)
221162306a36Sopenharmony_ci{
221262306a36Sopenharmony_ci	if (*ack_flag & FLAG_SACK_RENEGING &&
221362306a36Sopenharmony_ci	    *ack_flag & FLAG_SND_UNA_ADVANCED) {
221462306a36Sopenharmony_ci		struct tcp_sock *tp = tcp_sk(sk);
221562306a36Sopenharmony_ci		unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
221662306a36Sopenharmony_ci					  msecs_to_jiffies(10));
221762306a36Sopenharmony_ci
221862306a36Sopenharmony_ci		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
221962306a36Sopenharmony_ci					  delay, TCP_RTO_MAX);
222062306a36Sopenharmony_ci		*ack_flag &= ~FLAG_SET_XMIT_TIMER;
222162306a36Sopenharmony_ci		return true;
222262306a36Sopenharmony_ci	}
222362306a36Sopenharmony_ci	return false;
222462306a36Sopenharmony_ci}
222562306a36Sopenharmony_ci
222662306a36Sopenharmony_ci/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
222762306a36Sopenharmony_ci * counter when SACK is enabled (without SACK, sacked_out is used for
222862306a36Sopenharmony_ci * that purpose).
222962306a36Sopenharmony_ci *
223062306a36Sopenharmony_ci * With reordering, holes may still be in flight, so RFC3517 recovery
223162306a36Sopenharmony_ci * uses pure sacked_out (total number of SACKed segments) even though
223262306a36Sopenharmony_ci * it violates the RFC that uses duplicate ACKs, often these are equal
223362306a36Sopenharmony_ci * but when e.g. out-of-window ACKs or packet duplication occurs,
223462306a36Sopenharmony_ci * they differ. Since neither occurs due to loss, TCP should really
223562306a36Sopenharmony_ci * ignore them.
223662306a36Sopenharmony_ci */
223762306a36Sopenharmony_cistatic inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
223862306a36Sopenharmony_ci{
223962306a36Sopenharmony_ci	return tp->sacked_out + 1;
224062306a36Sopenharmony_ci}
224162306a36Sopenharmony_ci
224262306a36Sopenharmony_ci/* Linux NewReno/SACK/ECN state machine.
224362306a36Sopenharmony_ci * --------------------------------------
224462306a36Sopenharmony_ci *
224562306a36Sopenharmony_ci * "Open"	Normal state, no dubious events, fast path.
224662306a36Sopenharmony_ci * "Disorder"   In all the respects it is "Open",
224762306a36Sopenharmony_ci *		but requires a bit more attention. It is entered when
224862306a36Sopenharmony_ci *		we see some SACKs or dupacks. It is split of "Open"
224962306a36Sopenharmony_ci *		mainly to move some processing from fast path to slow one.
225062306a36Sopenharmony_ci * "CWR"	CWND was reduced due to some Congestion Notification event.
225162306a36Sopenharmony_ci *		It can be ECN, ICMP source quench, local device congestion.
225262306a36Sopenharmony_ci * "Recovery"	CWND was reduced, we are fast-retransmitting.
225362306a36Sopenharmony_ci * "Loss"	CWND was reduced due to RTO timeout or SACK reneging.
225462306a36Sopenharmony_ci *
225562306a36Sopenharmony_ci * tcp_fastretrans_alert() is entered:
225662306a36Sopenharmony_ci * - each incoming ACK, if state is not "Open"
225762306a36Sopenharmony_ci * - when arrived ACK is unusual, namely:
225862306a36Sopenharmony_ci *	* SACK
225962306a36Sopenharmony_ci *	* Duplicate ACK.
226062306a36Sopenharmony_ci *	* ECN ECE.
226162306a36Sopenharmony_ci *
226262306a36Sopenharmony_ci * Counting packets in flight is pretty simple.
226362306a36Sopenharmony_ci *
226462306a36Sopenharmony_ci *	in_flight = packets_out - left_out + retrans_out
226562306a36Sopenharmony_ci *
226662306a36Sopenharmony_ci *	packets_out is SND.NXT-SND.UNA counted in packets.
226762306a36Sopenharmony_ci *
226862306a36Sopenharmony_ci *	retrans_out is number of retransmitted segments.
226962306a36Sopenharmony_ci *
227062306a36Sopenharmony_ci *	left_out is number of segments left network, but not ACKed yet.
227162306a36Sopenharmony_ci *
227262306a36Sopenharmony_ci *		left_out = sacked_out + lost_out
227362306a36Sopenharmony_ci *
227462306a36Sopenharmony_ci *     sacked_out: Packets, which arrived to receiver out of order
227562306a36Sopenharmony_ci *		   and hence not ACKed. With SACKs this number is simply
227662306a36Sopenharmony_ci *		   amount of SACKed data. Even without SACKs
227762306a36Sopenharmony_ci *		   it is easy to give pretty reliable estimate of this number,
227862306a36Sopenharmony_ci *		   counting duplicate ACKs.
227962306a36Sopenharmony_ci *
228062306a36Sopenharmony_ci *       lost_out: Packets lost by network. TCP has no explicit
228162306a36Sopenharmony_ci *		   "loss notification" feedback from network (for now).
228262306a36Sopenharmony_ci *		   It means that this number can be only _guessed_.
228362306a36Sopenharmony_ci *		   Actually, it is the heuristics to predict lossage that
228462306a36Sopenharmony_ci *		   distinguishes different algorithms.
228562306a36Sopenharmony_ci *
228662306a36Sopenharmony_ci *	F.e. after RTO, when all the queue is considered as lost,
228762306a36Sopenharmony_ci *	lost_out = packets_out and in_flight = retrans_out.
228862306a36Sopenharmony_ci *
228962306a36Sopenharmony_ci *		Essentially, we have now a few algorithms detecting
229062306a36Sopenharmony_ci *		lost packets.
229162306a36Sopenharmony_ci *
229262306a36Sopenharmony_ci *		If the receiver supports SACK:
229362306a36Sopenharmony_ci *
229462306a36Sopenharmony_ci *		RFC6675/3517: It is the conventional algorithm. A packet is
229562306a36Sopenharmony_ci *		considered lost if the number of higher sequence packets
229662306a36Sopenharmony_ci *		SACKed is greater than or equal the DUPACK thoreshold
229762306a36Sopenharmony_ci *		(reordering). This is implemented in tcp_mark_head_lost and
229862306a36Sopenharmony_ci *		tcp_update_scoreboard.
229962306a36Sopenharmony_ci *
230062306a36Sopenharmony_ci *		RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm
230162306a36Sopenharmony_ci *		(2017-) that checks timing instead of counting DUPACKs.
230262306a36Sopenharmony_ci *		Essentially a packet is considered lost if it's not S/ACKed
230362306a36Sopenharmony_ci *		after RTT + reordering_window, where both metrics are
230462306a36Sopenharmony_ci *		dynamically measured and adjusted. This is implemented in
230562306a36Sopenharmony_ci *		tcp_rack_mark_lost.
230662306a36Sopenharmony_ci *
230762306a36Sopenharmony_ci *		If the receiver does not support SACK:
230862306a36Sopenharmony_ci *
230962306a36Sopenharmony_ci *		NewReno (RFC6582): in Recovery we assume that one segment
231062306a36Sopenharmony_ci *		is lost (classic Reno). While we are in Recovery and
231162306a36Sopenharmony_ci *		a partial ACK arrives, we assume that one more packet
231262306a36Sopenharmony_ci *		is lost (NewReno). This heuristics are the same in NewReno
231362306a36Sopenharmony_ci *		and SACK.
231462306a36Sopenharmony_ci *
231562306a36Sopenharmony_ci * Really tricky (and requiring careful tuning) part of algorithm
231662306a36Sopenharmony_ci * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue().
231762306a36Sopenharmony_ci * The first determines the moment _when_ we should reduce CWND and,
231862306a36Sopenharmony_ci * hence, slow down forward transmission. In fact, it determines the moment
231962306a36Sopenharmony_ci * when we decide that hole is caused by loss, rather than by a reorder.
232062306a36Sopenharmony_ci *
232162306a36Sopenharmony_ci * tcp_xmit_retransmit_queue() decides, _what_ we should retransmit to fill
232262306a36Sopenharmony_ci * holes, caused by lost packets.
232362306a36Sopenharmony_ci *
232462306a36Sopenharmony_ci * And the most logically complicated part of algorithm is undo
232562306a36Sopenharmony_ci * heuristics. We detect false retransmits due to both too early
232662306a36Sopenharmony_ci * fast retransmit (reordering) and underestimated RTO, analyzing
232762306a36Sopenharmony_ci * timestamps and D-SACKs. When we detect that some segments were
232862306a36Sopenharmony_ci * retransmitted by mistake and CWND reduction was wrong, we undo
232962306a36Sopenharmony_ci * window reduction and abort recovery phase. This logic is hidden
233062306a36Sopenharmony_ci * inside several functions named tcp_try_undo_<something>.
233162306a36Sopenharmony_ci */
233262306a36Sopenharmony_ci
233362306a36Sopenharmony_ci/* This function decides, when we should leave Disordered state
233462306a36Sopenharmony_ci * and enter Recovery phase, reducing congestion window.
233562306a36Sopenharmony_ci *
233662306a36Sopenharmony_ci * Main question: may we further continue forward transmission
233762306a36Sopenharmony_ci * with the same cwnd?
233862306a36Sopenharmony_ci */
233962306a36Sopenharmony_cistatic bool tcp_time_to_recover(struct sock *sk, int flag)
234062306a36Sopenharmony_ci{
234162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
234262306a36Sopenharmony_ci
234362306a36Sopenharmony_ci	/* Trick#1: The loss is proven. */
234462306a36Sopenharmony_ci	if (tp->lost_out)
234562306a36Sopenharmony_ci		return true;
234662306a36Sopenharmony_ci
234762306a36Sopenharmony_ci	/* Not-A-Trick#2 : Classic rule... */
234862306a36Sopenharmony_ci	if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
234962306a36Sopenharmony_ci		return true;
235062306a36Sopenharmony_ci
235162306a36Sopenharmony_ci	return false;
235262306a36Sopenharmony_ci}
235362306a36Sopenharmony_ci
235462306a36Sopenharmony_ci/* Detect loss in event "A" above by marking head of queue up as lost.
235562306a36Sopenharmony_ci * For RFC3517 SACK, a segment is considered lost if it
235662306a36Sopenharmony_ci * has at least tp->reordering SACKed seqments above it; "packets" refers to
235762306a36Sopenharmony_ci * the maximum SACKed segments to pass before reaching this limit.
235862306a36Sopenharmony_ci */
235962306a36Sopenharmony_cistatic void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
236062306a36Sopenharmony_ci{
236162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
236262306a36Sopenharmony_ci	struct sk_buff *skb;
236362306a36Sopenharmony_ci	int cnt;
236462306a36Sopenharmony_ci	/* Use SACK to deduce losses of new sequences sent during recovery */
236562306a36Sopenharmony_ci	const u32 loss_high = tp->snd_nxt;
236662306a36Sopenharmony_ci
236762306a36Sopenharmony_ci	WARN_ON(packets > tp->packets_out);
236862306a36Sopenharmony_ci	skb = tp->lost_skb_hint;
236962306a36Sopenharmony_ci	if (skb) {
237062306a36Sopenharmony_ci		/* Head already handled? */
237162306a36Sopenharmony_ci		if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
237262306a36Sopenharmony_ci			return;
237362306a36Sopenharmony_ci		cnt = tp->lost_cnt_hint;
237462306a36Sopenharmony_ci	} else {
237562306a36Sopenharmony_ci		skb = tcp_rtx_queue_head(sk);
237662306a36Sopenharmony_ci		cnt = 0;
237762306a36Sopenharmony_ci	}
237862306a36Sopenharmony_ci
237962306a36Sopenharmony_ci	skb_rbtree_walk_from(skb) {
238062306a36Sopenharmony_ci		/* TODO: do this better */
238162306a36Sopenharmony_ci		/* this is not the most efficient way to do this... */
238262306a36Sopenharmony_ci		tp->lost_skb_hint = skb;
238362306a36Sopenharmony_ci		tp->lost_cnt_hint = cnt;
238462306a36Sopenharmony_ci
238562306a36Sopenharmony_ci		if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
238662306a36Sopenharmony_ci			break;
238762306a36Sopenharmony_ci
238862306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
238962306a36Sopenharmony_ci			cnt += tcp_skb_pcount(skb);
239062306a36Sopenharmony_ci
239162306a36Sopenharmony_ci		if (cnt > packets)
239262306a36Sopenharmony_ci			break;
239362306a36Sopenharmony_ci
239462306a36Sopenharmony_ci		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST))
239562306a36Sopenharmony_ci			tcp_mark_skb_lost(sk, skb);
239662306a36Sopenharmony_ci
239762306a36Sopenharmony_ci		if (mark_head)
239862306a36Sopenharmony_ci			break;
239962306a36Sopenharmony_ci	}
240062306a36Sopenharmony_ci	tcp_verify_left_out(tp);
240162306a36Sopenharmony_ci}
240262306a36Sopenharmony_ci
240362306a36Sopenharmony_ci/* Account newly detected lost packet(s) */
240462306a36Sopenharmony_ci
240562306a36Sopenharmony_cistatic void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
240662306a36Sopenharmony_ci{
240762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
240862306a36Sopenharmony_ci
240962306a36Sopenharmony_ci	if (tcp_is_sack(tp)) {
241062306a36Sopenharmony_ci		int sacked_upto = tp->sacked_out - tp->reordering;
241162306a36Sopenharmony_ci		if (sacked_upto >= 0)
241262306a36Sopenharmony_ci			tcp_mark_head_lost(sk, sacked_upto, 0);
241362306a36Sopenharmony_ci		else if (fast_rexmit)
241462306a36Sopenharmony_ci			tcp_mark_head_lost(sk, 1, 1);
241562306a36Sopenharmony_ci	}
241662306a36Sopenharmony_ci}
241762306a36Sopenharmony_ci
241862306a36Sopenharmony_cistatic bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
241962306a36Sopenharmony_ci{
242062306a36Sopenharmony_ci	return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
242162306a36Sopenharmony_ci	       before(tp->rx_opt.rcv_tsecr, when);
242262306a36Sopenharmony_ci}
242362306a36Sopenharmony_ci
242462306a36Sopenharmony_ci/* skb is spurious retransmitted if the returned timestamp echo
242562306a36Sopenharmony_ci * reply is prior to the skb transmission time
242662306a36Sopenharmony_ci */
242762306a36Sopenharmony_cistatic bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
242862306a36Sopenharmony_ci				     const struct sk_buff *skb)
242962306a36Sopenharmony_ci{
243062306a36Sopenharmony_ci	return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
243162306a36Sopenharmony_ci	       tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
243262306a36Sopenharmony_ci}
243362306a36Sopenharmony_ci
243462306a36Sopenharmony_ci/* Nothing was retransmitted or returned timestamp is less
243562306a36Sopenharmony_ci * than timestamp of the first retransmission.
243662306a36Sopenharmony_ci */
243762306a36Sopenharmony_cistatic inline bool tcp_packet_delayed(const struct tcp_sock *tp)
243862306a36Sopenharmony_ci{
243962306a36Sopenharmony_ci	return tp->retrans_stamp &&
244062306a36Sopenharmony_ci	       tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
244162306a36Sopenharmony_ci}
244262306a36Sopenharmony_ci
244362306a36Sopenharmony_ci/* Undo procedures. */
244462306a36Sopenharmony_ci
244562306a36Sopenharmony_ci/* We can clear retrans_stamp when there are no retransmissions in the
244662306a36Sopenharmony_ci * window. It would seem that it is trivially available for us in
244762306a36Sopenharmony_ci * tp->retrans_out, however, that kind of assumptions doesn't consider
244862306a36Sopenharmony_ci * what will happen if errors occur when sending retransmission for the
244962306a36Sopenharmony_ci * second time. ...It could the that such segment has only
245062306a36Sopenharmony_ci * TCPCB_EVER_RETRANS set at the present time. It seems that checking
245162306a36Sopenharmony_ci * the head skb is enough except for some reneging corner cases that
245262306a36Sopenharmony_ci * are not worth the effort.
245362306a36Sopenharmony_ci *
245462306a36Sopenharmony_ci * Main reason for all this complexity is the fact that connection dying
245562306a36Sopenharmony_ci * time now depends on the validity of the retrans_stamp, in particular,
245662306a36Sopenharmony_ci * that successive retransmissions of a segment must not advance
245762306a36Sopenharmony_ci * retrans_stamp under any conditions.
245862306a36Sopenharmony_ci */
245962306a36Sopenharmony_cistatic bool tcp_any_retrans_done(const struct sock *sk)
246062306a36Sopenharmony_ci{
246162306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
246262306a36Sopenharmony_ci	struct sk_buff *skb;
246362306a36Sopenharmony_ci
246462306a36Sopenharmony_ci	if (tp->retrans_out)
246562306a36Sopenharmony_ci		return true;
246662306a36Sopenharmony_ci
246762306a36Sopenharmony_ci	skb = tcp_rtx_queue_head(sk);
246862306a36Sopenharmony_ci	if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
246962306a36Sopenharmony_ci		return true;
247062306a36Sopenharmony_ci
247162306a36Sopenharmony_ci	return false;
247262306a36Sopenharmony_ci}
247362306a36Sopenharmony_ci
247462306a36Sopenharmony_cistatic void DBGUNDO(struct sock *sk, const char *msg)
247562306a36Sopenharmony_ci{
247662306a36Sopenharmony_ci#if FASTRETRANS_DEBUG > 1
247762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
247862306a36Sopenharmony_ci	struct inet_sock *inet = inet_sk(sk);
247962306a36Sopenharmony_ci
248062306a36Sopenharmony_ci	if (sk->sk_family == AF_INET) {
248162306a36Sopenharmony_ci		pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
248262306a36Sopenharmony_ci			 msg,
248362306a36Sopenharmony_ci			 &inet->inet_daddr, ntohs(inet->inet_dport),
248462306a36Sopenharmony_ci			 tcp_snd_cwnd(tp), tcp_left_out(tp),
248562306a36Sopenharmony_ci			 tp->snd_ssthresh, tp->prior_ssthresh,
248662306a36Sopenharmony_ci			 tp->packets_out);
248762306a36Sopenharmony_ci	}
248862306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
248962306a36Sopenharmony_ci	else if (sk->sk_family == AF_INET6) {
249062306a36Sopenharmony_ci		pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
249162306a36Sopenharmony_ci			 msg,
249262306a36Sopenharmony_ci			 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
249362306a36Sopenharmony_ci			 tcp_snd_cwnd(tp), tcp_left_out(tp),
249462306a36Sopenharmony_ci			 tp->snd_ssthresh, tp->prior_ssthresh,
249562306a36Sopenharmony_ci			 tp->packets_out);
249662306a36Sopenharmony_ci	}
249762306a36Sopenharmony_ci#endif
249862306a36Sopenharmony_ci#endif
249962306a36Sopenharmony_ci}
250062306a36Sopenharmony_ci
250162306a36Sopenharmony_cistatic void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
250262306a36Sopenharmony_ci{
250362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
250462306a36Sopenharmony_ci
250562306a36Sopenharmony_ci	if (unmark_loss) {
250662306a36Sopenharmony_ci		struct sk_buff *skb;
250762306a36Sopenharmony_ci
250862306a36Sopenharmony_ci		skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
250962306a36Sopenharmony_ci			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
251062306a36Sopenharmony_ci		}
251162306a36Sopenharmony_ci		tp->lost_out = 0;
251262306a36Sopenharmony_ci		tcp_clear_all_retrans_hints(tp);
251362306a36Sopenharmony_ci	}
251462306a36Sopenharmony_ci
251562306a36Sopenharmony_ci	if (tp->prior_ssthresh) {
251662306a36Sopenharmony_ci		const struct inet_connection_sock *icsk = inet_csk(sk);
251762306a36Sopenharmony_ci
251862306a36Sopenharmony_ci		tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk));
251962306a36Sopenharmony_ci
252062306a36Sopenharmony_ci		if (tp->prior_ssthresh > tp->snd_ssthresh) {
252162306a36Sopenharmony_ci			tp->snd_ssthresh = tp->prior_ssthresh;
252262306a36Sopenharmony_ci			tcp_ecn_withdraw_cwr(tp);
252362306a36Sopenharmony_ci		}
252462306a36Sopenharmony_ci	}
252562306a36Sopenharmony_ci	tp->snd_cwnd_stamp = tcp_jiffies32;
252662306a36Sopenharmony_ci	tp->undo_marker = 0;
252762306a36Sopenharmony_ci	tp->rack.advanced = 1; /* Force RACK to re-exam losses */
252862306a36Sopenharmony_ci}
252962306a36Sopenharmony_ci
253062306a36Sopenharmony_cistatic inline bool tcp_may_undo(const struct tcp_sock *tp)
253162306a36Sopenharmony_ci{
253262306a36Sopenharmony_ci	return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
253362306a36Sopenharmony_ci}
253462306a36Sopenharmony_ci
253562306a36Sopenharmony_cistatic bool tcp_is_non_sack_preventing_reopen(struct sock *sk)
253662306a36Sopenharmony_ci{
253762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
253862306a36Sopenharmony_ci
253962306a36Sopenharmony_ci	if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
254062306a36Sopenharmony_ci		/* Hold old state until something *above* high_seq
254162306a36Sopenharmony_ci		 * is ACKed. For Reno it is MUST to prevent false
254262306a36Sopenharmony_ci		 * fast retransmits (RFC2582). SACK TCP is safe. */
254362306a36Sopenharmony_ci		if (!tcp_any_retrans_done(sk))
254462306a36Sopenharmony_ci			tp->retrans_stamp = 0;
254562306a36Sopenharmony_ci		return true;
254662306a36Sopenharmony_ci	}
254762306a36Sopenharmony_ci	return false;
254862306a36Sopenharmony_ci}
254962306a36Sopenharmony_ci
255062306a36Sopenharmony_ci/* People celebrate: "We love our President!" */
255162306a36Sopenharmony_cistatic bool tcp_try_undo_recovery(struct sock *sk)
255262306a36Sopenharmony_ci{
255362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
255462306a36Sopenharmony_ci
255562306a36Sopenharmony_ci	if (tcp_may_undo(tp)) {
255662306a36Sopenharmony_ci		int mib_idx;
255762306a36Sopenharmony_ci
255862306a36Sopenharmony_ci		/* Happy end! We did not retransmit anything
255962306a36Sopenharmony_ci		 * or our original transmission succeeded.
256062306a36Sopenharmony_ci		 */
256162306a36Sopenharmony_ci		DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
256262306a36Sopenharmony_ci		tcp_undo_cwnd_reduction(sk, false);
256362306a36Sopenharmony_ci		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
256462306a36Sopenharmony_ci			mib_idx = LINUX_MIB_TCPLOSSUNDO;
256562306a36Sopenharmony_ci		else
256662306a36Sopenharmony_ci			mib_idx = LINUX_MIB_TCPFULLUNDO;
256762306a36Sopenharmony_ci
256862306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), mib_idx);
256962306a36Sopenharmony_ci	} else if (tp->rack.reo_wnd_persist) {
257062306a36Sopenharmony_ci		tp->rack.reo_wnd_persist--;
257162306a36Sopenharmony_ci	}
257262306a36Sopenharmony_ci	if (tcp_is_non_sack_preventing_reopen(sk))
257362306a36Sopenharmony_ci		return true;
257462306a36Sopenharmony_ci	tcp_set_ca_state(sk, TCP_CA_Open);
257562306a36Sopenharmony_ci	tp->is_sack_reneg = 0;
257662306a36Sopenharmony_ci	return false;
257762306a36Sopenharmony_ci}
257862306a36Sopenharmony_ci
257962306a36Sopenharmony_ci/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
258062306a36Sopenharmony_cistatic bool tcp_try_undo_dsack(struct sock *sk)
258162306a36Sopenharmony_ci{
258262306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
258362306a36Sopenharmony_ci
258462306a36Sopenharmony_ci	if (tp->undo_marker && !tp->undo_retrans) {
258562306a36Sopenharmony_ci		tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
258662306a36Sopenharmony_ci					       tp->rack.reo_wnd_persist + 1);
258762306a36Sopenharmony_ci		DBGUNDO(sk, "D-SACK");
258862306a36Sopenharmony_ci		tcp_undo_cwnd_reduction(sk, false);
258962306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
259062306a36Sopenharmony_ci		return true;
259162306a36Sopenharmony_ci	}
259262306a36Sopenharmony_ci	return false;
259362306a36Sopenharmony_ci}
259462306a36Sopenharmony_ci
259562306a36Sopenharmony_ci/* Undo during loss recovery after partial ACK or using F-RTO. */
259662306a36Sopenharmony_cistatic bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
259762306a36Sopenharmony_ci{
259862306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
259962306a36Sopenharmony_ci
260062306a36Sopenharmony_ci	if (frto_undo || tcp_may_undo(tp)) {
260162306a36Sopenharmony_ci		tcp_undo_cwnd_reduction(sk, true);
260262306a36Sopenharmony_ci
260362306a36Sopenharmony_ci		DBGUNDO(sk, "partial loss");
260462306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
260562306a36Sopenharmony_ci		if (frto_undo)
260662306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk),
260762306a36Sopenharmony_ci					LINUX_MIB_TCPSPURIOUSRTOS);
260862306a36Sopenharmony_ci		inet_csk(sk)->icsk_retransmits = 0;
260962306a36Sopenharmony_ci		if (tcp_is_non_sack_preventing_reopen(sk))
261062306a36Sopenharmony_ci			return true;
261162306a36Sopenharmony_ci		if (frto_undo || tcp_is_sack(tp)) {
261262306a36Sopenharmony_ci			tcp_set_ca_state(sk, TCP_CA_Open);
261362306a36Sopenharmony_ci			tp->is_sack_reneg = 0;
261462306a36Sopenharmony_ci		}
261562306a36Sopenharmony_ci		return true;
261662306a36Sopenharmony_ci	}
261762306a36Sopenharmony_ci	return false;
261862306a36Sopenharmony_ci}
261962306a36Sopenharmony_ci
262062306a36Sopenharmony_ci/* The cwnd reduction in CWR and Recovery uses the PRR algorithm in RFC 6937.
262162306a36Sopenharmony_ci * It computes the number of packets to send (sndcnt) based on packets newly
262262306a36Sopenharmony_ci * delivered:
262362306a36Sopenharmony_ci *   1) If the packets in flight is larger than ssthresh, PRR spreads the
262462306a36Sopenharmony_ci *	cwnd reductions across a full RTT.
262562306a36Sopenharmony_ci *   2) Otherwise PRR uses packet conservation to send as much as delivered.
262662306a36Sopenharmony_ci *      But when SND_UNA is acked without further losses,
262762306a36Sopenharmony_ci *      slow starts cwnd up to ssthresh to speed up the recovery.
262862306a36Sopenharmony_ci */
262962306a36Sopenharmony_cistatic void tcp_init_cwnd_reduction(struct sock *sk)
263062306a36Sopenharmony_ci{
263162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
263262306a36Sopenharmony_ci
263362306a36Sopenharmony_ci	tp->high_seq = tp->snd_nxt;
263462306a36Sopenharmony_ci	tp->tlp_high_seq = 0;
263562306a36Sopenharmony_ci	tp->snd_cwnd_cnt = 0;
263662306a36Sopenharmony_ci	tp->prior_cwnd = tcp_snd_cwnd(tp);
263762306a36Sopenharmony_ci	tp->prr_delivered = 0;
263862306a36Sopenharmony_ci	tp->prr_out = 0;
263962306a36Sopenharmony_ci	tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
264062306a36Sopenharmony_ci	tcp_ecn_queue_cwr(tp);
264162306a36Sopenharmony_ci}
264262306a36Sopenharmony_ci
264362306a36Sopenharmony_civoid tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag)
264462306a36Sopenharmony_ci{
264562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
264662306a36Sopenharmony_ci	int sndcnt = 0;
264762306a36Sopenharmony_ci	int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
264862306a36Sopenharmony_ci
264962306a36Sopenharmony_ci	if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
265062306a36Sopenharmony_ci		return;
265162306a36Sopenharmony_ci
265262306a36Sopenharmony_ci	tp->prr_delivered += newly_acked_sacked;
265362306a36Sopenharmony_ci	if (delta < 0) {
265462306a36Sopenharmony_ci		u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
265562306a36Sopenharmony_ci			       tp->prior_cwnd - 1;
265662306a36Sopenharmony_ci		sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
265762306a36Sopenharmony_ci	} else {
265862306a36Sopenharmony_ci		sndcnt = max_t(int, tp->prr_delivered - tp->prr_out,
265962306a36Sopenharmony_ci			       newly_acked_sacked);
266062306a36Sopenharmony_ci		if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost)
266162306a36Sopenharmony_ci			sndcnt++;
266262306a36Sopenharmony_ci		sndcnt = min(delta, sndcnt);
266362306a36Sopenharmony_ci	}
266462306a36Sopenharmony_ci	/* Force a fast retransmit upon entering fast recovery */
266562306a36Sopenharmony_ci	sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
266662306a36Sopenharmony_ci	tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + sndcnt);
266762306a36Sopenharmony_ci}
266862306a36Sopenharmony_ci
266962306a36Sopenharmony_cistatic inline void tcp_end_cwnd_reduction(struct sock *sk)
267062306a36Sopenharmony_ci{
267162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
267262306a36Sopenharmony_ci
267362306a36Sopenharmony_ci	if (inet_csk(sk)->icsk_ca_ops->cong_control)
267462306a36Sopenharmony_ci		return;
267562306a36Sopenharmony_ci
267662306a36Sopenharmony_ci	/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
267762306a36Sopenharmony_ci	if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
267862306a36Sopenharmony_ci	    (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
267962306a36Sopenharmony_ci		tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
268062306a36Sopenharmony_ci		tp->snd_cwnd_stamp = tcp_jiffies32;
268162306a36Sopenharmony_ci	}
268262306a36Sopenharmony_ci	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
268362306a36Sopenharmony_ci}
268462306a36Sopenharmony_ci
268562306a36Sopenharmony_ci/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
268662306a36Sopenharmony_civoid tcp_enter_cwr(struct sock *sk)
268762306a36Sopenharmony_ci{
268862306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
268962306a36Sopenharmony_ci
269062306a36Sopenharmony_ci	tp->prior_ssthresh = 0;
269162306a36Sopenharmony_ci	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
269262306a36Sopenharmony_ci		tp->undo_marker = 0;
269362306a36Sopenharmony_ci		tcp_init_cwnd_reduction(sk);
269462306a36Sopenharmony_ci		tcp_set_ca_state(sk, TCP_CA_CWR);
269562306a36Sopenharmony_ci	}
269662306a36Sopenharmony_ci}
269762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_enter_cwr);
269862306a36Sopenharmony_ci
269962306a36Sopenharmony_cistatic void tcp_try_keep_open(struct sock *sk)
270062306a36Sopenharmony_ci{
270162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
270262306a36Sopenharmony_ci	int state = TCP_CA_Open;
270362306a36Sopenharmony_ci
270462306a36Sopenharmony_ci	if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
270562306a36Sopenharmony_ci		state = TCP_CA_Disorder;
270662306a36Sopenharmony_ci
270762306a36Sopenharmony_ci	if (inet_csk(sk)->icsk_ca_state != state) {
270862306a36Sopenharmony_ci		tcp_set_ca_state(sk, state);
270962306a36Sopenharmony_ci		tp->high_seq = tp->snd_nxt;
271062306a36Sopenharmony_ci	}
271162306a36Sopenharmony_ci}
271262306a36Sopenharmony_ci
271362306a36Sopenharmony_cistatic void tcp_try_to_open(struct sock *sk, int flag)
271462306a36Sopenharmony_ci{
271562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
271662306a36Sopenharmony_ci
271762306a36Sopenharmony_ci	tcp_verify_left_out(tp);
271862306a36Sopenharmony_ci
271962306a36Sopenharmony_ci	if (!tcp_any_retrans_done(sk))
272062306a36Sopenharmony_ci		tp->retrans_stamp = 0;
272162306a36Sopenharmony_ci
272262306a36Sopenharmony_ci	if (flag & FLAG_ECE)
272362306a36Sopenharmony_ci		tcp_enter_cwr(sk);
272462306a36Sopenharmony_ci
272562306a36Sopenharmony_ci	if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
272662306a36Sopenharmony_ci		tcp_try_keep_open(sk);
272762306a36Sopenharmony_ci	}
272862306a36Sopenharmony_ci}
272962306a36Sopenharmony_ci
273062306a36Sopenharmony_cistatic void tcp_mtup_probe_failed(struct sock *sk)
273162306a36Sopenharmony_ci{
273262306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
273362306a36Sopenharmony_ci
273462306a36Sopenharmony_ci	icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
273562306a36Sopenharmony_ci	icsk->icsk_mtup.probe_size = 0;
273662306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
273762306a36Sopenharmony_ci}
273862306a36Sopenharmony_ci
273962306a36Sopenharmony_cistatic void tcp_mtup_probe_success(struct sock *sk)
274062306a36Sopenharmony_ci{
274162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
274262306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
274362306a36Sopenharmony_ci	u64 val;
274462306a36Sopenharmony_ci
274562306a36Sopenharmony_ci	tp->prior_ssthresh = tcp_current_ssthresh(sk);
274662306a36Sopenharmony_ci
274762306a36Sopenharmony_ci	val = (u64)tcp_snd_cwnd(tp) * tcp_mss_to_mtu(sk, tp->mss_cache);
274862306a36Sopenharmony_ci	do_div(val, icsk->icsk_mtup.probe_size);
274962306a36Sopenharmony_ci	DEBUG_NET_WARN_ON_ONCE((u32)val != val);
275062306a36Sopenharmony_ci	tcp_snd_cwnd_set(tp, max_t(u32, 1U, val));
275162306a36Sopenharmony_ci
275262306a36Sopenharmony_ci	tp->snd_cwnd_cnt = 0;
275362306a36Sopenharmony_ci	tp->snd_cwnd_stamp = tcp_jiffies32;
275462306a36Sopenharmony_ci	tp->snd_ssthresh = tcp_current_ssthresh(sk);
275562306a36Sopenharmony_ci
275662306a36Sopenharmony_ci	icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
275762306a36Sopenharmony_ci	icsk->icsk_mtup.probe_size = 0;
275862306a36Sopenharmony_ci	tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
275962306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
276062306a36Sopenharmony_ci}
276162306a36Sopenharmony_ci
276262306a36Sopenharmony_ci/* Do a simple retransmit without using the backoff mechanisms in
276362306a36Sopenharmony_ci * tcp_timer. This is used for path mtu discovery.
276462306a36Sopenharmony_ci * The socket is already locked here.
276562306a36Sopenharmony_ci */
276662306a36Sopenharmony_civoid tcp_simple_retransmit(struct sock *sk)
276762306a36Sopenharmony_ci{
276862306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
276962306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
277062306a36Sopenharmony_ci	struct sk_buff *skb;
277162306a36Sopenharmony_ci	int mss;
277262306a36Sopenharmony_ci
277362306a36Sopenharmony_ci	/* A fastopen SYN request is stored as two separate packets within
277462306a36Sopenharmony_ci	 * the retransmit queue, this is done by tcp_send_syn_data().
277562306a36Sopenharmony_ci	 * As a result simply checking the MSS of the frames in the queue
277662306a36Sopenharmony_ci	 * will not work for the SYN packet.
277762306a36Sopenharmony_ci	 *
277862306a36Sopenharmony_ci	 * Us being here is an indication of a path MTU issue so we can
277962306a36Sopenharmony_ci	 * assume that the fastopen SYN was lost and just mark all the
278062306a36Sopenharmony_ci	 * frames in the retransmit queue as lost. We will use an MSS of
278162306a36Sopenharmony_ci	 * -1 to mark all frames as lost, otherwise compute the current MSS.
278262306a36Sopenharmony_ci	 */
278362306a36Sopenharmony_ci	if (tp->syn_data && sk->sk_state == TCP_SYN_SENT)
278462306a36Sopenharmony_ci		mss = -1;
278562306a36Sopenharmony_ci	else
278662306a36Sopenharmony_ci		mss = tcp_current_mss(sk);
278762306a36Sopenharmony_ci
278862306a36Sopenharmony_ci	skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
278962306a36Sopenharmony_ci		if (tcp_skb_seglen(skb) > mss)
279062306a36Sopenharmony_ci			tcp_mark_skb_lost(sk, skb);
279162306a36Sopenharmony_ci	}
279262306a36Sopenharmony_ci
279362306a36Sopenharmony_ci	tcp_clear_retrans_hints_partial(tp);
279462306a36Sopenharmony_ci
279562306a36Sopenharmony_ci	if (!tp->lost_out)
279662306a36Sopenharmony_ci		return;
279762306a36Sopenharmony_ci
279862306a36Sopenharmony_ci	if (tcp_is_reno(tp))
279962306a36Sopenharmony_ci		tcp_limit_reno_sacked(tp);
280062306a36Sopenharmony_ci
280162306a36Sopenharmony_ci	tcp_verify_left_out(tp);
280262306a36Sopenharmony_ci
280362306a36Sopenharmony_ci	/* Don't muck with the congestion window here.
280462306a36Sopenharmony_ci	 * Reason is that we do not increase amount of _data_
280562306a36Sopenharmony_ci	 * in network, but units changed and effective
280662306a36Sopenharmony_ci	 * cwnd/ssthresh really reduced now.
280762306a36Sopenharmony_ci	 */
280862306a36Sopenharmony_ci	if (icsk->icsk_ca_state != TCP_CA_Loss) {
280962306a36Sopenharmony_ci		tp->high_seq = tp->snd_nxt;
281062306a36Sopenharmony_ci		tp->snd_ssthresh = tcp_current_ssthresh(sk);
281162306a36Sopenharmony_ci		tp->prior_ssthresh = 0;
281262306a36Sopenharmony_ci		tp->undo_marker = 0;
281362306a36Sopenharmony_ci		tcp_set_ca_state(sk, TCP_CA_Loss);
281462306a36Sopenharmony_ci	}
281562306a36Sopenharmony_ci	tcp_xmit_retransmit_queue(sk);
281662306a36Sopenharmony_ci}
281762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_simple_retransmit);
281862306a36Sopenharmony_ci
281962306a36Sopenharmony_civoid tcp_enter_recovery(struct sock *sk, bool ece_ack)
282062306a36Sopenharmony_ci{
282162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
282262306a36Sopenharmony_ci	int mib_idx;
282362306a36Sopenharmony_ci
282462306a36Sopenharmony_ci	if (tcp_is_reno(tp))
282562306a36Sopenharmony_ci		mib_idx = LINUX_MIB_TCPRENORECOVERY;
282662306a36Sopenharmony_ci	else
282762306a36Sopenharmony_ci		mib_idx = LINUX_MIB_TCPSACKRECOVERY;
282862306a36Sopenharmony_ci
282962306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), mib_idx);
283062306a36Sopenharmony_ci
283162306a36Sopenharmony_ci	tp->prior_ssthresh = 0;
283262306a36Sopenharmony_ci	tcp_init_undo(tp);
283362306a36Sopenharmony_ci
283462306a36Sopenharmony_ci	if (!tcp_in_cwnd_reduction(sk)) {
283562306a36Sopenharmony_ci		if (!ece_ack)
283662306a36Sopenharmony_ci			tp->prior_ssthresh = tcp_current_ssthresh(sk);
283762306a36Sopenharmony_ci		tcp_init_cwnd_reduction(sk);
283862306a36Sopenharmony_ci	}
283962306a36Sopenharmony_ci	tcp_set_ca_state(sk, TCP_CA_Recovery);
284062306a36Sopenharmony_ci}
284162306a36Sopenharmony_ci
284262306a36Sopenharmony_ci/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
284362306a36Sopenharmony_ci * recovered or spurious. Otherwise retransmits more on partial ACKs.
284462306a36Sopenharmony_ci */
284562306a36Sopenharmony_cistatic void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
284662306a36Sopenharmony_ci			     int *rexmit)
284762306a36Sopenharmony_ci{
284862306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
284962306a36Sopenharmony_ci	bool recovered = !before(tp->snd_una, tp->high_seq);
285062306a36Sopenharmony_ci
285162306a36Sopenharmony_ci	if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
285262306a36Sopenharmony_ci	    tcp_try_undo_loss(sk, false))
285362306a36Sopenharmony_ci		return;
285462306a36Sopenharmony_ci
285562306a36Sopenharmony_ci	if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
285662306a36Sopenharmony_ci		/* Step 3.b. A timeout is spurious if not all data are
285762306a36Sopenharmony_ci		 * lost, i.e., never-retransmitted data are (s)acked.
285862306a36Sopenharmony_ci		 */
285962306a36Sopenharmony_ci		if ((flag & FLAG_ORIG_SACK_ACKED) &&
286062306a36Sopenharmony_ci		    tcp_try_undo_loss(sk, true))
286162306a36Sopenharmony_ci			return;
286262306a36Sopenharmony_ci
286362306a36Sopenharmony_ci		if (after(tp->snd_nxt, tp->high_seq)) {
286462306a36Sopenharmony_ci			if (flag & FLAG_DATA_SACKED || num_dupack)
286562306a36Sopenharmony_ci				tp->frto = 0; /* Step 3.a. loss was real */
286662306a36Sopenharmony_ci		} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
286762306a36Sopenharmony_ci			tp->high_seq = tp->snd_nxt;
286862306a36Sopenharmony_ci			/* Step 2.b. Try send new data (but deferred until cwnd
286962306a36Sopenharmony_ci			 * is updated in tcp_ack()). Otherwise fall back to
287062306a36Sopenharmony_ci			 * the conventional recovery.
287162306a36Sopenharmony_ci			 */
287262306a36Sopenharmony_ci			if (!tcp_write_queue_empty(sk) &&
287362306a36Sopenharmony_ci			    after(tcp_wnd_end(tp), tp->snd_nxt)) {
287462306a36Sopenharmony_ci				*rexmit = REXMIT_NEW;
287562306a36Sopenharmony_ci				return;
287662306a36Sopenharmony_ci			}
287762306a36Sopenharmony_ci			tp->frto = 0;
287862306a36Sopenharmony_ci		}
287962306a36Sopenharmony_ci	}
288062306a36Sopenharmony_ci
288162306a36Sopenharmony_ci	if (recovered) {
288262306a36Sopenharmony_ci		/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
288362306a36Sopenharmony_ci		tcp_try_undo_recovery(sk);
288462306a36Sopenharmony_ci		return;
288562306a36Sopenharmony_ci	}
288662306a36Sopenharmony_ci	if (tcp_is_reno(tp)) {
288762306a36Sopenharmony_ci		/* A Reno DUPACK means new data in F-RTO step 2.b above are
288862306a36Sopenharmony_ci		 * delivered. Lower inflight to clock out (re)transmissions.
288962306a36Sopenharmony_ci		 */
289062306a36Sopenharmony_ci		if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
289162306a36Sopenharmony_ci			tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
289262306a36Sopenharmony_ci		else if (flag & FLAG_SND_UNA_ADVANCED)
289362306a36Sopenharmony_ci			tcp_reset_reno_sack(tp);
289462306a36Sopenharmony_ci	}
289562306a36Sopenharmony_ci	*rexmit = REXMIT_LOST;
289662306a36Sopenharmony_ci}
289762306a36Sopenharmony_ci
289862306a36Sopenharmony_cistatic bool tcp_force_fast_retransmit(struct sock *sk)
289962306a36Sopenharmony_ci{
290062306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
290162306a36Sopenharmony_ci
290262306a36Sopenharmony_ci	return after(tcp_highest_sack_seq(tp),
290362306a36Sopenharmony_ci		     tp->snd_una + tp->reordering * tp->mss_cache);
290462306a36Sopenharmony_ci}
290562306a36Sopenharmony_ci
290662306a36Sopenharmony_ci/* Undo during fast recovery after partial ACK. */
290762306a36Sopenharmony_cistatic bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
290862306a36Sopenharmony_ci				 bool *do_lost)
290962306a36Sopenharmony_ci{
291062306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
291162306a36Sopenharmony_ci
291262306a36Sopenharmony_ci	if (tp->undo_marker && tcp_packet_delayed(tp)) {
291362306a36Sopenharmony_ci		/* Plain luck! Hole if filled with delayed
291462306a36Sopenharmony_ci		 * packet, rather than with a retransmit. Check reordering.
291562306a36Sopenharmony_ci		 */
291662306a36Sopenharmony_ci		tcp_check_sack_reordering(sk, prior_snd_una, 1);
291762306a36Sopenharmony_ci
291862306a36Sopenharmony_ci		/* We are getting evidence that the reordering degree is higher
291962306a36Sopenharmony_ci		 * than we realized. If there are no retransmits out then we
292062306a36Sopenharmony_ci		 * can undo. Otherwise we clock out new packets but do not
292162306a36Sopenharmony_ci		 * mark more packets lost or retransmit more.
292262306a36Sopenharmony_ci		 */
292362306a36Sopenharmony_ci		if (tp->retrans_out)
292462306a36Sopenharmony_ci			return true;
292562306a36Sopenharmony_ci
292662306a36Sopenharmony_ci		if (!tcp_any_retrans_done(sk))
292762306a36Sopenharmony_ci			tp->retrans_stamp = 0;
292862306a36Sopenharmony_ci
292962306a36Sopenharmony_ci		DBGUNDO(sk, "partial recovery");
293062306a36Sopenharmony_ci		tcp_undo_cwnd_reduction(sk, true);
293162306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
293262306a36Sopenharmony_ci		tcp_try_keep_open(sk);
293362306a36Sopenharmony_ci	} else {
293462306a36Sopenharmony_ci		/* Partial ACK arrived. Force fast retransmit. */
293562306a36Sopenharmony_ci		*do_lost = tcp_force_fast_retransmit(sk);
293662306a36Sopenharmony_ci	}
293762306a36Sopenharmony_ci	return false;
293862306a36Sopenharmony_ci}
293962306a36Sopenharmony_ci
294062306a36Sopenharmony_cistatic void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
294162306a36Sopenharmony_ci{
294262306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
294362306a36Sopenharmony_ci
294462306a36Sopenharmony_ci	if (tcp_rtx_queue_empty(sk))
294562306a36Sopenharmony_ci		return;
294662306a36Sopenharmony_ci
294762306a36Sopenharmony_ci	if (unlikely(tcp_is_reno(tp))) {
294862306a36Sopenharmony_ci		tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
294962306a36Sopenharmony_ci	} else if (tcp_is_rack(sk)) {
295062306a36Sopenharmony_ci		u32 prior_retrans = tp->retrans_out;
295162306a36Sopenharmony_ci
295262306a36Sopenharmony_ci		if (tcp_rack_mark_lost(sk))
295362306a36Sopenharmony_ci			*ack_flag &= ~FLAG_SET_XMIT_TIMER;
295462306a36Sopenharmony_ci		if (prior_retrans > tp->retrans_out)
295562306a36Sopenharmony_ci			*ack_flag |= FLAG_LOST_RETRANS;
295662306a36Sopenharmony_ci	}
295762306a36Sopenharmony_ci}
295862306a36Sopenharmony_ci
295962306a36Sopenharmony_ci/* Process an event, which can update packets-in-flight not trivially.
296062306a36Sopenharmony_ci * Main goal of this function is to calculate new estimate for left_out,
296162306a36Sopenharmony_ci * taking into account both packets sitting in receiver's buffer and
296262306a36Sopenharmony_ci * packets lost by network.
296362306a36Sopenharmony_ci *
296462306a36Sopenharmony_ci * Besides that it updates the congestion state when packet loss or ECN
296562306a36Sopenharmony_ci * is detected. But it does not reduce the cwnd, it is done by the
296662306a36Sopenharmony_ci * congestion control later.
296762306a36Sopenharmony_ci *
296862306a36Sopenharmony_ci * It does _not_ decide what to send, it is made in function
296962306a36Sopenharmony_ci * tcp_xmit_retransmit_queue().
297062306a36Sopenharmony_ci */
297162306a36Sopenharmony_cistatic void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
297262306a36Sopenharmony_ci				  int num_dupack, int *ack_flag, int *rexmit)
297362306a36Sopenharmony_ci{
297462306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
297562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
297662306a36Sopenharmony_ci	int fast_rexmit = 0, flag = *ack_flag;
297762306a36Sopenharmony_ci	bool ece_ack = flag & FLAG_ECE;
297862306a36Sopenharmony_ci	bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
297962306a36Sopenharmony_ci				      tcp_force_fast_retransmit(sk));
298062306a36Sopenharmony_ci
298162306a36Sopenharmony_ci	if (!tp->packets_out && tp->sacked_out)
298262306a36Sopenharmony_ci		tp->sacked_out = 0;
298362306a36Sopenharmony_ci
298462306a36Sopenharmony_ci	/* Now state machine starts.
298562306a36Sopenharmony_ci	 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
298662306a36Sopenharmony_ci	if (ece_ack)
298762306a36Sopenharmony_ci		tp->prior_ssthresh = 0;
298862306a36Sopenharmony_ci
298962306a36Sopenharmony_ci	/* B. In all the states check for reneging SACKs. */
299062306a36Sopenharmony_ci	if (tcp_check_sack_reneging(sk, ack_flag))
299162306a36Sopenharmony_ci		return;
299262306a36Sopenharmony_ci
299362306a36Sopenharmony_ci	/* C. Check consistency of the current state. */
299462306a36Sopenharmony_ci	tcp_verify_left_out(tp);
299562306a36Sopenharmony_ci
299662306a36Sopenharmony_ci	/* D. Check state exit conditions. State can be terminated
299762306a36Sopenharmony_ci	 *    when high_seq is ACKed. */
299862306a36Sopenharmony_ci	if (icsk->icsk_ca_state == TCP_CA_Open) {
299962306a36Sopenharmony_ci		WARN_ON(tp->retrans_out != 0 && !tp->syn_data);
300062306a36Sopenharmony_ci		tp->retrans_stamp = 0;
300162306a36Sopenharmony_ci	} else if (!before(tp->snd_una, tp->high_seq)) {
300262306a36Sopenharmony_ci		switch (icsk->icsk_ca_state) {
300362306a36Sopenharmony_ci		case TCP_CA_CWR:
300462306a36Sopenharmony_ci			/* CWR is to be held something *above* high_seq
300562306a36Sopenharmony_ci			 * is ACKed for CWR bit to reach receiver. */
300662306a36Sopenharmony_ci			if (tp->snd_una != tp->high_seq) {
300762306a36Sopenharmony_ci				tcp_end_cwnd_reduction(sk);
300862306a36Sopenharmony_ci				tcp_set_ca_state(sk, TCP_CA_Open);
300962306a36Sopenharmony_ci			}
301062306a36Sopenharmony_ci			break;
301162306a36Sopenharmony_ci
301262306a36Sopenharmony_ci		case TCP_CA_Recovery:
301362306a36Sopenharmony_ci			if (tcp_is_reno(tp))
301462306a36Sopenharmony_ci				tcp_reset_reno_sack(tp);
301562306a36Sopenharmony_ci			if (tcp_try_undo_recovery(sk))
301662306a36Sopenharmony_ci				return;
301762306a36Sopenharmony_ci			tcp_end_cwnd_reduction(sk);
301862306a36Sopenharmony_ci			break;
301962306a36Sopenharmony_ci		}
302062306a36Sopenharmony_ci	}
302162306a36Sopenharmony_ci
302262306a36Sopenharmony_ci	/* E. Process state. */
302362306a36Sopenharmony_ci	switch (icsk->icsk_ca_state) {
302462306a36Sopenharmony_ci	case TCP_CA_Recovery:
302562306a36Sopenharmony_ci		if (!(flag & FLAG_SND_UNA_ADVANCED)) {
302662306a36Sopenharmony_ci			if (tcp_is_reno(tp))
302762306a36Sopenharmony_ci				tcp_add_reno_sack(sk, num_dupack, ece_ack);
302862306a36Sopenharmony_ci		} else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
302962306a36Sopenharmony_ci			return;
303062306a36Sopenharmony_ci
303162306a36Sopenharmony_ci		if (tcp_try_undo_dsack(sk))
303262306a36Sopenharmony_ci			tcp_try_keep_open(sk);
303362306a36Sopenharmony_ci
303462306a36Sopenharmony_ci		tcp_identify_packet_loss(sk, ack_flag);
303562306a36Sopenharmony_ci		if (icsk->icsk_ca_state != TCP_CA_Recovery) {
303662306a36Sopenharmony_ci			if (!tcp_time_to_recover(sk, flag))
303762306a36Sopenharmony_ci				return;
303862306a36Sopenharmony_ci			/* Undo reverts the recovery state. If loss is evident,
303962306a36Sopenharmony_ci			 * starts a new recovery (e.g. reordering then loss);
304062306a36Sopenharmony_ci			 */
304162306a36Sopenharmony_ci			tcp_enter_recovery(sk, ece_ack);
304262306a36Sopenharmony_ci		}
304362306a36Sopenharmony_ci		break;
304462306a36Sopenharmony_ci	case TCP_CA_Loss:
304562306a36Sopenharmony_ci		tcp_process_loss(sk, flag, num_dupack, rexmit);
304662306a36Sopenharmony_ci		tcp_identify_packet_loss(sk, ack_flag);
304762306a36Sopenharmony_ci		if (!(icsk->icsk_ca_state == TCP_CA_Open ||
304862306a36Sopenharmony_ci		      (*ack_flag & FLAG_LOST_RETRANS)))
304962306a36Sopenharmony_ci			return;
305062306a36Sopenharmony_ci		/* Change state if cwnd is undone or retransmits are lost */
305162306a36Sopenharmony_ci		fallthrough;
305262306a36Sopenharmony_ci	default:
305362306a36Sopenharmony_ci		if (tcp_is_reno(tp)) {
305462306a36Sopenharmony_ci			if (flag & FLAG_SND_UNA_ADVANCED)
305562306a36Sopenharmony_ci				tcp_reset_reno_sack(tp);
305662306a36Sopenharmony_ci			tcp_add_reno_sack(sk, num_dupack, ece_ack);
305762306a36Sopenharmony_ci		}
305862306a36Sopenharmony_ci
305962306a36Sopenharmony_ci		if (icsk->icsk_ca_state <= TCP_CA_Disorder)
306062306a36Sopenharmony_ci			tcp_try_undo_dsack(sk);
306162306a36Sopenharmony_ci
306262306a36Sopenharmony_ci		tcp_identify_packet_loss(sk, ack_flag);
306362306a36Sopenharmony_ci		if (!tcp_time_to_recover(sk, flag)) {
306462306a36Sopenharmony_ci			tcp_try_to_open(sk, flag);
306562306a36Sopenharmony_ci			return;
306662306a36Sopenharmony_ci		}
306762306a36Sopenharmony_ci
306862306a36Sopenharmony_ci		/* MTU probe failure: don't reduce cwnd */
306962306a36Sopenharmony_ci		if (icsk->icsk_ca_state < TCP_CA_CWR &&
307062306a36Sopenharmony_ci		    icsk->icsk_mtup.probe_size &&
307162306a36Sopenharmony_ci		    tp->snd_una == tp->mtu_probe.probe_seq_start) {
307262306a36Sopenharmony_ci			tcp_mtup_probe_failed(sk);
307362306a36Sopenharmony_ci			/* Restores the reduction we did in tcp_mtup_probe() */
307462306a36Sopenharmony_ci			tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
307562306a36Sopenharmony_ci			tcp_simple_retransmit(sk);
307662306a36Sopenharmony_ci			return;
307762306a36Sopenharmony_ci		}
307862306a36Sopenharmony_ci
307962306a36Sopenharmony_ci		/* Otherwise enter Recovery state */
308062306a36Sopenharmony_ci		tcp_enter_recovery(sk, ece_ack);
308162306a36Sopenharmony_ci		fast_rexmit = 1;
308262306a36Sopenharmony_ci	}
308362306a36Sopenharmony_ci
308462306a36Sopenharmony_ci	if (!tcp_is_rack(sk) && do_lost)
308562306a36Sopenharmony_ci		tcp_update_scoreboard(sk, fast_rexmit);
308662306a36Sopenharmony_ci	*rexmit = REXMIT_LOST;
308762306a36Sopenharmony_ci}
308862306a36Sopenharmony_ci
308962306a36Sopenharmony_cistatic void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
309062306a36Sopenharmony_ci{
309162306a36Sopenharmony_ci	u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
309262306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
309362306a36Sopenharmony_ci
309462306a36Sopenharmony_ci	if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
309562306a36Sopenharmony_ci		/* If the remote keeps returning delayed ACKs, eventually
309662306a36Sopenharmony_ci		 * the min filter would pick it up and overestimate the
309762306a36Sopenharmony_ci		 * prop. delay when it expires. Skip suspected delayed ACKs.
309862306a36Sopenharmony_ci		 */
309962306a36Sopenharmony_ci		return;
310062306a36Sopenharmony_ci	}
310162306a36Sopenharmony_ci	minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
310262306a36Sopenharmony_ci			   rtt_us ? : jiffies_to_usecs(1));
310362306a36Sopenharmony_ci}
310462306a36Sopenharmony_ci
310562306a36Sopenharmony_cistatic bool tcp_ack_update_rtt(struct sock *sk, const int flag,
310662306a36Sopenharmony_ci			       long seq_rtt_us, long sack_rtt_us,
310762306a36Sopenharmony_ci			       long ca_rtt_us, struct rate_sample *rs)
310862306a36Sopenharmony_ci{
310962306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
311062306a36Sopenharmony_ci
311162306a36Sopenharmony_ci	/* Prefer RTT measured from ACK's timing to TS-ECR. This is because
311262306a36Sopenharmony_ci	 * broken middle-boxes or peers may corrupt TS-ECR fields. But
311362306a36Sopenharmony_ci	 * Karn's algorithm forbids taking RTT if some retransmitted data
311462306a36Sopenharmony_ci	 * is acked (RFC6298).
311562306a36Sopenharmony_ci	 */
311662306a36Sopenharmony_ci	if (seq_rtt_us < 0)
311762306a36Sopenharmony_ci		seq_rtt_us = sack_rtt_us;
311862306a36Sopenharmony_ci
311962306a36Sopenharmony_ci	/* RTTM Rule: A TSecr value received in a segment is used to
312062306a36Sopenharmony_ci	 * update the averaged RTT measurement only if the segment
312162306a36Sopenharmony_ci	 * acknowledges some new data, i.e., only if it advances the
312262306a36Sopenharmony_ci	 * left edge of the send window.
312362306a36Sopenharmony_ci	 * See draft-ietf-tcplw-high-performance-00, section 3.3.
312462306a36Sopenharmony_ci	 */
312562306a36Sopenharmony_ci	if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
312662306a36Sopenharmony_ci	    flag & FLAG_ACKED) {
312762306a36Sopenharmony_ci		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
312862306a36Sopenharmony_ci
312962306a36Sopenharmony_ci		if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
313062306a36Sopenharmony_ci			if (!delta)
313162306a36Sopenharmony_ci				delta = 1;
313262306a36Sopenharmony_ci			seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
313362306a36Sopenharmony_ci			ca_rtt_us = seq_rtt_us;
313462306a36Sopenharmony_ci		}
313562306a36Sopenharmony_ci	}
313662306a36Sopenharmony_ci	rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
313762306a36Sopenharmony_ci	if (seq_rtt_us < 0)
313862306a36Sopenharmony_ci		return false;
313962306a36Sopenharmony_ci
314062306a36Sopenharmony_ci	/* ca_rtt_us >= 0 is counting on the invariant that ca_rtt_us is
314162306a36Sopenharmony_ci	 * always taken together with ACK, SACK, or TS-opts. Any negative
314262306a36Sopenharmony_ci	 * values will be skipped with the seq_rtt_us < 0 check above.
314362306a36Sopenharmony_ci	 */
314462306a36Sopenharmony_ci	tcp_update_rtt_min(sk, ca_rtt_us, flag);
314562306a36Sopenharmony_ci	tcp_rtt_estimator(sk, seq_rtt_us);
314662306a36Sopenharmony_ci	tcp_set_rto(sk);
314762306a36Sopenharmony_ci
314862306a36Sopenharmony_ci	/* RFC6298: only reset backoff on valid RTT measurement. */
314962306a36Sopenharmony_ci	inet_csk(sk)->icsk_backoff = 0;
315062306a36Sopenharmony_ci	return true;
315162306a36Sopenharmony_ci}
315262306a36Sopenharmony_ci
315362306a36Sopenharmony_ci/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
315462306a36Sopenharmony_civoid tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
315562306a36Sopenharmony_ci{
315662306a36Sopenharmony_ci	struct rate_sample rs;
315762306a36Sopenharmony_ci	long rtt_us = -1L;
315862306a36Sopenharmony_ci
315962306a36Sopenharmony_ci	if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
316062306a36Sopenharmony_ci		rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
316162306a36Sopenharmony_ci
316262306a36Sopenharmony_ci	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
316362306a36Sopenharmony_ci}
316462306a36Sopenharmony_ci
316562306a36Sopenharmony_ci
316662306a36Sopenharmony_cistatic void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
316762306a36Sopenharmony_ci{
316862306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
316962306a36Sopenharmony_ci
317062306a36Sopenharmony_ci	icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
317162306a36Sopenharmony_ci	tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
317262306a36Sopenharmony_ci}
317362306a36Sopenharmony_ci
317462306a36Sopenharmony_ci/* Restart timer after forward progress on connection.
317562306a36Sopenharmony_ci * RFC2988 recommends to restart timer to now+rto.
317662306a36Sopenharmony_ci */
317762306a36Sopenharmony_civoid tcp_rearm_rto(struct sock *sk)
317862306a36Sopenharmony_ci{
317962306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
318062306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
318162306a36Sopenharmony_ci
318262306a36Sopenharmony_ci	/* If the retrans timer is currently being used by Fast Open
318362306a36Sopenharmony_ci	 * for SYN-ACK retrans purpose, stay put.
318462306a36Sopenharmony_ci	 */
318562306a36Sopenharmony_ci	if (rcu_access_pointer(tp->fastopen_rsk))
318662306a36Sopenharmony_ci		return;
318762306a36Sopenharmony_ci
318862306a36Sopenharmony_ci	if (!tp->packets_out) {
318962306a36Sopenharmony_ci		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
319062306a36Sopenharmony_ci	} else {
319162306a36Sopenharmony_ci		u32 rto = inet_csk(sk)->icsk_rto;
319262306a36Sopenharmony_ci		/* Offset the time elapsed after installing regular RTO */
319362306a36Sopenharmony_ci		if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
319462306a36Sopenharmony_ci		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
319562306a36Sopenharmony_ci			s64 delta_us = tcp_rto_delta_us(sk);
319662306a36Sopenharmony_ci			/* delta_us may not be positive if the socket is locked
319762306a36Sopenharmony_ci			 * when the retrans timer fires and is rescheduled.
319862306a36Sopenharmony_ci			 */
319962306a36Sopenharmony_ci			rto = usecs_to_jiffies(max_t(int, delta_us, 1));
320062306a36Sopenharmony_ci		}
320162306a36Sopenharmony_ci		tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
320262306a36Sopenharmony_ci				     TCP_RTO_MAX);
320362306a36Sopenharmony_ci	}
320462306a36Sopenharmony_ci}
320562306a36Sopenharmony_ci
320662306a36Sopenharmony_ci/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
320762306a36Sopenharmony_cistatic void tcp_set_xmit_timer(struct sock *sk)
320862306a36Sopenharmony_ci{
320962306a36Sopenharmony_ci	if (!tcp_schedule_loss_probe(sk, true))
321062306a36Sopenharmony_ci		tcp_rearm_rto(sk);
321162306a36Sopenharmony_ci}
321262306a36Sopenharmony_ci
321362306a36Sopenharmony_ci/* If we get here, the whole TSO packet has not been acked. */
321462306a36Sopenharmony_cistatic u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
321562306a36Sopenharmony_ci{
321662306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
321762306a36Sopenharmony_ci	u32 packets_acked;
321862306a36Sopenharmony_ci
321962306a36Sopenharmony_ci	BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
322062306a36Sopenharmony_ci
322162306a36Sopenharmony_ci	packets_acked = tcp_skb_pcount(skb);
322262306a36Sopenharmony_ci	if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
322362306a36Sopenharmony_ci		return 0;
322462306a36Sopenharmony_ci	packets_acked -= tcp_skb_pcount(skb);
322562306a36Sopenharmony_ci
322662306a36Sopenharmony_ci	if (packets_acked) {
322762306a36Sopenharmony_ci		BUG_ON(tcp_skb_pcount(skb) == 0);
322862306a36Sopenharmony_ci		BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
322962306a36Sopenharmony_ci	}
323062306a36Sopenharmony_ci
323162306a36Sopenharmony_ci	return packets_acked;
323262306a36Sopenharmony_ci}
323362306a36Sopenharmony_ci
323462306a36Sopenharmony_cistatic void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
323562306a36Sopenharmony_ci			   const struct sk_buff *ack_skb, u32 prior_snd_una)
323662306a36Sopenharmony_ci{
323762306a36Sopenharmony_ci	const struct skb_shared_info *shinfo;
323862306a36Sopenharmony_ci
323962306a36Sopenharmony_ci	/* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
324062306a36Sopenharmony_ci	if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
324162306a36Sopenharmony_ci		return;
324262306a36Sopenharmony_ci
324362306a36Sopenharmony_ci	shinfo = skb_shinfo(skb);
324462306a36Sopenharmony_ci	if (!before(shinfo->tskey, prior_snd_una) &&
324562306a36Sopenharmony_ci	    before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
324662306a36Sopenharmony_ci		tcp_skb_tsorted_save(skb) {
324762306a36Sopenharmony_ci			__skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK);
324862306a36Sopenharmony_ci		} tcp_skb_tsorted_restore(skb);
324962306a36Sopenharmony_ci	}
325062306a36Sopenharmony_ci}
325162306a36Sopenharmony_ci
325262306a36Sopenharmony_ci/* Remove acknowledged frames from the retransmission queue. If our packet
325362306a36Sopenharmony_ci * is before the ack sequence we can discard it as it's confirmed to have
325462306a36Sopenharmony_ci * arrived at the other end.
325562306a36Sopenharmony_ci */
325662306a36Sopenharmony_cistatic int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
325762306a36Sopenharmony_ci			       u32 prior_fack, u32 prior_snd_una,
325862306a36Sopenharmony_ci			       struct tcp_sacktag_state *sack, bool ece_ack)
325962306a36Sopenharmony_ci{
326062306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
326162306a36Sopenharmony_ci	u64 first_ackt, last_ackt;
326262306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
326362306a36Sopenharmony_ci	u32 prior_sacked = tp->sacked_out;
326462306a36Sopenharmony_ci	u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
326562306a36Sopenharmony_ci	struct sk_buff *skb, *next;
326662306a36Sopenharmony_ci	bool fully_acked = true;
326762306a36Sopenharmony_ci	long sack_rtt_us = -1L;
326862306a36Sopenharmony_ci	long seq_rtt_us = -1L;
326962306a36Sopenharmony_ci	long ca_rtt_us = -1L;
327062306a36Sopenharmony_ci	u32 pkts_acked = 0;
327162306a36Sopenharmony_ci	bool rtt_update;
327262306a36Sopenharmony_ci	int flag = 0;
327362306a36Sopenharmony_ci
327462306a36Sopenharmony_ci	first_ackt = 0;
327562306a36Sopenharmony_ci
327662306a36Sopenharmony_ci	for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
327762306a36Sopenharmony_ci		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
327862306a36Sopenharmony_ci		const u32 start_seq = scb->seq;
327962306a36Sopenharmony_ci		u8 sacked = scb->sacked;
328062306a36Sopenharmony_ci		u32 acked_pcount;
328162306a36Sopenharmony_ci
328262306a36Sopenharmony_ci		/* Determine how many packets and what bytes were acked, tso and else */
328362306a36Sopenharmony_ci		if (after(scb->end_seq, tp->snd_una)) {
328462306a36Sopenharmony_ci			if (tcp_skb_pcount(skb) == 1 ||
328562306a36Sopenharmony_ci			    !after(tp->snd_una, scb->seq))
328662306a36Sopenharmony_ci				break;
328762306a36Sopenharmony_ci
328862306a36Sopenharmony_ci			acked_pcount = tcp_tso_acked(sk, skb);
328962306a36Sopenharmony_ci			if (!acked_pcount)
329062306a36Sopenharmony_ci				break;
329162306a36Sopenharmony_ci			fully_acked = false;
329262306a36Sopenharmony_ci		} else {
329362306a36Sopenharmony_ci			acked_pcount = tcp_skb_pcount(skb);
329462306a36Sopenharmony_ci		}
329562306a36Sopenharmony_ci
329662306a36Sopenharmony_ci		if (unlikely(sacked & TCPCB_RETRANS)) {
329762306a36Sopenharmony_ci			if (sacked & TCPCB_SACKED_RETRANS)
329862306a36Sopenharmony_ci				tp->retrans_out -= acked_pcount;
329962306a36Sopenharmony_ci			flag |= FLAG_RETRANS_DATA_ACKED;
330062306a36Sopenharmony_ci		} else if (!(sacked & TCPCB_SACKED_ACKED)) {
330162306a36Sopenharmony_ci			last_ackt = tcp_skb_timestamp_us(skb);
330262306a36Sopenharmony_ci			WARN_ON_ONCE(last_ackt == 0);
330362306a36Sopenharmony_ci			if (!first_ackt)
330462306a36Sopenharmony_ci				first_ackt = last_ackt;
330562306a36Sopenharmony_ci
330662306a36Sopenharmony_ci			if (before(start_seq, reord))
330762306a36Sopenharmony_ci				reord = start_seq;
330862306a36Sopenharmony_ci			if (!after(scb->end_seq, tp->high_seq))
330962306a36Sopenharmony_ci				flag |= FLAG_ORIG_SACK_ACKED;
331062306a36Sopenharmony_ci		}
331162306a36Sopenharmony_ci
331262306a36Sopenharmony_ci		if (sacked & TCPCB_SACKED_ACKED) {
331362306a36Sopenharmony_ci			tp->sacked_out -= acked_pcount;
331462306a36Sopenharmony_ci		} else if (tcp_is_sack(tp)) {
331562306a36Sopenharmony_ci			tcp_count_delivered(tp, acked_pcount, ece_ack);
331662306a36Sopenharmony_ci			if (!tcp_skb_spurious_retrans(tp, skb))
331762306a36Sopenharmony_ci				tcp_rack_advance(tp, sacked, scb->end_seq,
331862306a36Sopenharmony_ci						 tcp_skb_timestamp_us(skb));
331962306a36Sopenharmony_ci		}
332062306a36Sopenharmony_ci		if (sacked & TCPCB_LOST)
332162306a36Sopenharmony_ci			tp->lost_out -= acked_pcount;
332262306a36Sopenharmony_ci
332362306a36Sopenharmony_ci		tp->packets_out -= acked_pcount;
332462306a36Sopenharmony_ci		pkts_acked += acked_pcount;
332562306a36Sopenharmony_ci		tcp_rate_skb_delivered(sk, skb, sack->rate);
332662306a36Sopenharmony_ci
332762306a36Sopenharmony_ci		/* Initial outgoing SYN's get put onto the write_queue
332862306a36Sopenharmony_ci		 * just like anything else we transmit.  It is not
332962306a36Sopenharmony_ci		 * true data, and if we misinform our callers that
333062306a36Sopenharmony_ci		 * this ACK acks real data, we will erroneously exit
333162306a36Sopenharmony_ci		 * connection startup slow start one packet too
333262306a36Sopenharmony_ci		 * quickly.  This is severely frowned upon behavior.
333362306a36Sopenharmony_ci		 */
333462306a36Sopenharmony_ci		if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
333562306a36Sopenharmony_ci			flag |= FLAG_DATA_ACKED;
333662306a36Sopenharmony_ci		} else {
333762306a36Sopenharmony_ci			flag |= FLAG_SYN_ACKED;
333862306a36Sopenharmony_ci			tp->retrans_stamp = 0;
333962306a36Sopenharmony_ci		}
334062306a36Sopenharmony_ci
334162306a36Sopenharmony_ci		if (!fully_acked)
334262306a36Sopenharmony_ci			break;
334362306a36Sopenharmony_ci
334462306a36Sopenharmony_ci		tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
334562306a36Sopenharmony_ci
334662306a36Sopenharmony_ci		next = skb_rb_next(skb);
334762306a36Sopenharmony_ci		if (unlikely(skb == tp->retransmit_skb_hint))
334862306a36Sopenharmony_ci			tp->retransmit_skb_hint = NULL;
334962306a36Sopenharmony_ci		if (unlikely(skb == tp->lost_skb_hint))
335062306a36Sopenharmony_ci			tp->lost_skb_hint = NULL;
335162306a36Sopenharmony_ci		tcp_highest_sack_replace(sk, skb, next);
335262306a36Sopenharmony_ci		tcp_rtx_queue_unlink_and_free(skb, sk);
335362306a36Sopenharmony_ci	}
335462306a36Sopenharmony_ci
335562306a36Sopenharmony_ci	if (!skb)
335662306a36Sopenharmony_ci		tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
335762306a36Sopenharmony_ci
335862306a36Sopenharmony_ci	if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
335962306a36Sopenharmony_ci		tp->snd_up = tp->snd_una;
336062306a36Sopenharmony_ci
336162306a36Sopenharmony_ci	if (skb) {
336262306a36Sopenharmony_ci		tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
336362306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
336462306a36Sopenharmony_ci			flag |= FLAG_SACK_RENEGING;
336562306a36Sopenharmony_ci	}
336662306a36Sopenharmony_ci
336762306a36Sopenharmony_ci	if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
336862306a36Sopenharmony_ci		seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
336962306a36Sopenharmony_ci		ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
337062306a36Sopenharmony_ci
337162306a36Sopenharmony_ci		if (pkts_acked == 1 && fully_acked && !prior_sacked &&
337262306a36Sopenharmony_ci		    (tp->snd_una - prior_snd_una) < tp->mss_cache &&
337362306a36Sopenharmony_ci		    sack->rate->prior_delivered + 1 == tp->delivered &&
337462306a36Sopenharmony_ci		    !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
337562306a36Sopenharmony_ci			/* Conservatively mark a delayed ACK. It's typically
337662306a36Sopenharmony_ci			 * from a lone runt packet over the round trip to
337762306a36Sopenharmony_ci			 * a receiver w/o out-of-order or CE events.
337862306a36Sopenharmony_ci			 */
337962306a36Sopenharmony_ci			flag |= FLAG_ACK_MAYBE_DELAYED;
338062306a36Sopenharmony_ci		}
338162306a36Sopenharmony_ci	}
338262306a36Sopenharmony_ci	if (sack->first_sackt) {
338362306a36Sopenharmony_ci		sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
338462306a36Sopenharmony_ci		ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
338562306a36Sopenharmony_ci	}
338662306a36Sopenharmony_ci	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
338762306a36Sopenharmony_ci					ca_rtt_us, sack->rate);
338862306a36Sopenharmony_ci
338962306a36Sopenharmony_ci	if (flag & FLAG_ACKED) {
339062306a36Sopenharmony_ci		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */
339162306a36Sopenharmony_ci		if (unlikely(icsk->icsk_mtup.probe_size &&
339262306a36Sopenharmony_ci			     !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
339362306a36Sopenharmony_ci			tcp_mtup_probe_success(sk);
339462306a36Sopenharmony_ci		}
339562306a36Sopenharmony_ci
339662306a36Sopenharmony_ci		if (tcp_is_reno(tp)) {
339762306a36Sopenharmony_ci			tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
339862306a36Sopenharmony_ci
339962306a36Sopenharmony_ci			/* If any of the cumulatively ACKed segments was
340062306a36Sopenharmony_ci			 * retransmitted, non-SACK case cannot confirm that
340162306a36Sopenharmony_ci			 * progress was due to original transmission due to
340262306a36Sopenharmony_ci			 * lack of TCPCB_SACKED_ACKED bits even if some of
340362306a36Sopenharmony_ci			 * the packets may have been never retransmitted.
340462306a36Sopenharmony_ci			 */
340562306a36Sopenharmony_ci			if (flag & FLAG_RETRANS_DATA_ACKED)
340662306a36Sopenharmony_ci				flag &= ~FLAG_ORIG_SACK_ACKED;
340762306a36Sopenharmony_ci		} else {
340862306a36Sopenharmony_ci			int delta;
340962306a36Sopenharmony_ci
341062306a36Sopenharmony_ci			/* Non-retransmitted hole got filled? That's reordering */
341162306a36Sopenharmony_ci			if (before(reord, prior_fack))
341262306a36Sopenharmony_ci				tcp_check_sack_reordering(sk, reord, 0);
341362306a36Sopenharmony_ci
341462306a36Sopenharmony_ci			delta = prior_sacked - tp->sacked_out;
341562306a36Sopenharmony_ci			tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
341662306a36Sopenharmony_ci		}
341762306a36Sopenharmony_ci	} else if (skb && rtt_update && sack_rtt_us >= 0 &&
341862306a36Sopenharmony_ci		   sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
341962306a36Sopenharmony_ci						    tcp_skb_timestamp_us(skb))) {
342062306a36Sopenharmony_ci		/* Do not re-arm RTO if the sack RTT is measured from data sent
342162306a36Sopenharmony_ci		 * after when the head was last (re)transmitted. Otherwise the
342262306a36Sopenharmony_ci		 * timeout may continue to extend in loss recovery.
342362306a36Sopenharmony_ci		 */
342462306a36Sopenharmony_ci		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */
342562306a36Sopenharmony_ci	}
342662306a36Sopenharmony_ci
342762306a36Sopenharmony_ci	if (icsk->icsk_ca_ops->pkts_acked) {
342862306a36Sopenharmony_ci		struct ack_sample sample = { .pkts_acked = pkts_acked,
342962306a36Sopenharmony_ci					     .rtt_us = sack->rate->rtt_us };
343062306a36Sopenharmony_ci
343162306a36Sopenharmony_ci		sample.in_flight = tp->mss_cache *
343262306a36Sopenharmony_ci			(tp->delivered - sack->rate->prior_delivered);
343362306a36Sopenharmony_ci		icsk->icsk_ca_ops->pkts_acked(sk, &sample);
343462306a36Sopenharmony_ci	}
343562306a36Sopenharmony_ci
343662306a36Sopenharmony_ci#if FASTRETRANS_DEBUG > 0
343762306a36Sopenharmony_ci	WARN_ON((int)tp->sacked_out < 0);
343862306a36Sopenharmony_ci	WARN_ON((int)tp->lost_out < 0);
343962306a36Sopenharmony_ci	WARN_ON((int)tp->retrans_out < 0);
344062306a36Sopenharmony_ci	if (!tp->packets_out && tcp_is_sack(tp)) {
344162306a36Sopenharmony_ci		icsk = inet_csk(sk);
344262306a36Sopenharmony_ci		if (tp->lost_out) {
344362306a36Sopenharmony_ci			pr_debug("Leak l=%u %d\n",
344462306a36Sopenharmony_ci				 tp->lost_out, icsk->icsk_ca_state);
344562306a36Sopenharmony_ci			tp->lost_out = 0;
344662306a36Sopenharmony_ci		}
344762306a36Sopenharmony_ci		if (tp->sacked_out) {
344862306a36Sopenharmony_ci			pr_debug("Leak s=%u %d\n",
344962306a36Sopenharmony_ci				 tp->sacked_out, icsk->icsk_ca_state);
345062306a36Sopenharmony_ci			tp->sacked_out = 0;
345162306a36Sopenharmony_ci		}
345262306a36Sopenharmony_ci		if (tp->retrans_out) {
345362306a36Sopenharmony_ci			pr_debug("Leak r=%u %d\n",
345462306a36Sopenharmony_ci				 tp->retrans_out, icsk->icsk_ca_state);
345562306a36Sopenharmony_ci			tp->retrans_out = 0;
345662306a36Sopenharmony_ci		}
345762306a36Sopenharmony_ci	}
345862306a36Sopenharmony_ci#endif
345962306a36Sopenharmony_ci	return flag;
346062306a36Sopenharmony_ci}
346162306a36Sopenharmony_ci
346262306a36Sopenharmony_cistatic void tcp_ack_probe(struct sock *sk)
346362306a36Sopenharmony_ci{
346462306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
346562306a36Sopenharmony_ci	struct sk_buff *head = tcp_send_head(sk);
346662306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
346762306a36Sopenharmony_ci
346862306a36Sopenharmony_ci	/* Was it a usable window open? */
346962306a36Sopenharmony_ci	if (!head)
347062306a36Sopenharmony_ci		return;
347162306a36Sopenharmony_ci	if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
347262306a36Sopenharmony_ci		icsk->icsk_backoff = 0;
347362306a36Sopenharmony_ci		icsk->icsk_probes_tstamp = 0;
347462306a36Sopenharmony_ci		inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
347562306a36Sopenharmony_ci		/* Socket must be waked up by subsequent tcp_data_snd_check().
347662306a36Sopenharmony_ci		 * This function is not for random using!
347762306a36Sopenharmony_ci		 */
347862306a36Sopenharmony_ci	} else {
347962306a36Sopenharmony_ci		unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
348062306a36Sopenharmony_ci
348162306a36Sopenharmony_ci		when = tcp_clamp_probe0_to_user_timeout(sk, when);
348262306a36Sopenharmony_ci		tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX);
348362306a36Sopenharmony_ci	}
348462306a36Sopenharmony_ci}
348562306a36Sopenharmony_ci
348662306a36Sopenharmony_cistatic inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
348762306a36Sopenharmony_ci{
348862306a36Sopenharmony_ci	return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
348962306a36Sopenharmony_ci		inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
349062306a36Sopenharmony_ci}
349162306a36Sopenharmony_ci
349262306a36Sopenharmony_ci/* Decide wheather to run the increase function of congestion control. */
349362306a36Sopenharmony_cistatic inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
349462306a36Sopenharmony_ci{
349562306a36Sopenharmony_ci	/* If reordering is high then always grow cwnd whenever data is
349662306a36Sopenharmony_ci	 * delivered regardless of its ordering. Otherwise stay conservative
349762306a36Sopenharmony_ci	 * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
349862306a36Sopenharmony_ci	 * new SACK or ECE mark may first advance cwnd here and later reduce
349962306a36Sopenharmony_ci	 * cwnd in tcp_fastretrans_alert() based on more states.
350062306a36Sopenharmony_ci	 */
350162306a36Sopenharmony_ci	if (tcp_sk(sk)->reordering >
350262306a36Sopenharmony_ci	    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
350362306a36Sopenharmony_ci		return flag & FLAG_FORWARD_PROGRESS;
350462306a36Sopenharmony_ci
350562306a36Sopenharmony_ci	return flag & FLAG_DATA_ACKED;
350662306a36Sopenharmony_ci}
350762306a36Sopenharmony_ci
350862306a36Sopenharmony_ci/* The "ultimate" congestion control function that aims to replace the rigid
350962306a36Sopenharmony_ci * cwnd increase and decrease control (tcp_cong_avoid,tcp_*cwnd_reduction).
351062306a36Sopenharmony_ci * It's called toward the end of processing an ACK with precise rate
351162306a36Sopenharmony_ci * information. All transmission or retransmission are delayed afterwards.
351262306a36Sopenharmony_ci */
351362306a36Sopenharmony_cistatic void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
351462306a36Sopenharmony_ci			     int flag, const struct rate_sample *rs)
351562306a36Sopenharmony_ci{
351662306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
351762306a36Sopenharmony_ci
351862306a36Sopenharmony_ci	if (icsk->icsk_ca_ops->cong_control) {
351962306a36Sopenharmony_ci		icsk->icsk_ca_ops->cong_control(sk, rs);
352062306a36Sopenharmony_ci		return;
352162306a36Sopenharmony_ci	}
352262306a36Sopenharmony_ci
352362306a36Sopenharmony_ci	if (tcp_in_cwnd_reduction(sk)) {
352462306a36Sopenharmony_ci		/* Reduce cwnd if state mandates */
352562306a36Sopenharmony_ci		tcp_cwnd_reduction(sk, acked_sacked, rs->losses, flag);
352662306a36Sopenharmony_ci	} else if (tcp_may_raise_cwnd(sk, flag)) {
352762306a36Sopenharmony_ci		/* Advance cwnd if state allows */
352862306a36Sopenharmony_ci		tcp_cong_avoid(sk, ack, acked_sacked);
352962306a36Sopenharmony_ci	}
353062306a36Sopenharmony_ci	tcp_update_pacing_rate(sk);
353162306a36Sopenharmony_ci}
353262306a36Sopenharmony_ci
353362306a36Sopenharmony_ci/* Check that window update is acceptable.
353462306a36Sopenharmony_ci * The function assumes that snd_una<=ack<=snd_next.
353562306a36Sopenharmony_ci */
353662306a36Sopenharmony_cistatic inline bool tcp_may_update_window(const struct tcp_sock *tp,
353762306a36Sopenharmony_ci					const u32 ack, const u32 ack_seq,
353862306a36Sopenharmony_ci					const u32 nwin)
353962306a36Sopenharmony_ci{
354062306a36Sopenharmony_ci	return	after(ack, tp->snd_una) ||
354162306a36Sopenharmony_ci		after(ack_seq, tp->snd_wl1) ||
354262306a36Sopenharmony_ci		(ack_seq == tp->snd_wl1 && (nwin > tp->snd_wnd || !nwin));
354362306a36Sopenharmony_ci}
354462306a36Sopenharmony_ci
354562306a36Sopenharmony_ci/* If we update tp->snd_una, also update tp->bytes_acked */
354662306a36Sopenharmony_cistatic void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
354762306a36Sopenharmony_ci{
354862306a36Sopenharmony_ci	u32 delta = ack - tp->snd_una;
354962306a36Sopenharmony_ci
355062306a36Sopenharmony_ci	sock_owned_by_me((struct sock *)tp);
355162306a36Sopenharmony_ci	tp->bytes_acked += delta;
355262306a36Sopenharmony_ci	tp->snd_una = ack;
355362306a36Sopenharmony_ci}
355462306a36Sopenharmony_ci
355562306a36Sopenharmony_ci/* If we update tp->rcv_nxt, also update tp->bytes_received */
355662306a36Sopenharmony_cistatic void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
355762306a36Sopenharmony_ci{
355862306a36Sopenharmony_ci	u32 delta = seq - tp->rcv_nxt;
355962306a36Sopenharmony_ci
356062306a36Sopenharmony_ci	sock_owned_by_me((struct sock *)tp);
356162306a36Sopenharmony_ci	tp->bytes_received += delta;
356262306a36Sopenharmony_ci	WRITE_ONCE(tp->rcv_nxt, seq);
356362306a36Sopenharmony_ci}
356462306a36Sopenharmony_ci
356562306a36Sopenharmony_ci/* Update our send window.
356662306a36Sopenharmony_ci *
356762306a36Sopenharmony_ci * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
356862306a36Sopenharmony_ci * and in FreeBSD. NetBSD's one is even worse.) is wrong.
356962306a36Sopenharmony_ci */
357062306a36Sopenharmony_cistatic int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
357162306a36Sopenharmony_ci				 u32 ack_seq)
357262306a36Sopenharmony_ci{
357362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
357462306a36Sopenharmony_ci	int flag = 0;
357562306a36Sopenharmony_ci	u32 nwin = ntohs(tcp_hdr(skb)->window);
357662306a36Sopenharmony_ci
357762306a36Sopenharmony_ci	if (likely(!tcp_hdr(skb)->syn))
357862306a36Sopenharmony_ci		nwin <<= tp->rx_opt.snd_wscale;
357962306a36Sopenharmony_ci
358062306a36Sopenharmony_ci	if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
358162306a36Sopenharmony_ci		flag |= FLAG_WIN_UPDATE;
358262306a36Sopenharmony_ci		tcp_update_wl(tp, ack_seq);
358362306a36Sopenharmony_ci
358462306a36Sopenharmony_ci		if (tp->snd_wnd != nwin) {
358562306a36Sopenharmony_ci			tp->snd_wnd = nwin;
358662306a36Sopenharmony_ci
358762306a36Sopenharmony_ci			/* Note, it is the only place, where
358862306a36Sopenharmony_ci			 * fast path is recovered for sending TCP.
358962306a36Sopenharmony_ci			 */
359062306a36Sopenharmony_ci			tp->pred_flags = 0;
359162306a36Sopenharmony_ci			tcp_fast_path_check(sk);
359262306a36Sopenharmony_ci
359362306a36Sopenharmony_ci			if (!tcp_write_queue_empty(sk))
359462306a36Sopenharmony_ci				tcp_slow_start_after_idle_check(sk);
359562306a36Sopenharmony_ci
359662306a36Sopenharmony_ci			if (nwin > tp->max_window) {
359762306a36Sopenharmony_ci				tp->max_window = nwin;
359862306a36Sopenharmony_ci				tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
359962306a36Sopenharmony_ci			}
360062306a36Sopenharmony_ci		}
360162306a36Sopenharmony_ci	}
360262306a36Sopenharmony_ci
360362306a36Sopenharmony_ci	tcp_snd_una_update(tp, ack);
360462306a36Sopenharmony_ci
360562306a36Sopenharmony_ci	return flag;
360662306a36Sopenharmony_ci}
360762306a36Sopenharmony_ci
360862306a36Sopenharmony_cistatic bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
360962306a36Sopenharmony_ci				   u32 *last_oow_ack_time)
361062306a36Sopenharmony_ci{
361162306a36Sopenharmony_ci	/* Paired with the WRITE_ONCE() in this function. */
361262306a36Sopenharmony_ci	u32 val = READ_ONCE(*last_oow_ack_time);
361362306a36Sopenharmony_ci
361462306a36Sopenharmony_ci	if (val) {
361562306a36Sopenharmony_ci		s32 elapsed = (s32)(tcp_jiffies32 - val);
361662306a36Sopenharmony_ci
361762306a36Sopenharmony_ci		if (0 <= elapsed &&
361862306a36Sopenharmony_ci		    elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
361962306a36Sopenharmony_ci			NET_INC_STATS(net, mib_idx);
362062306a36Sopenharmony_ci			return true;	/* rate-limited: don't send yet! */
362162306a36Sopenharmony_ci		}
362262306a36Sopenharmony_ci	}
362362306a36Sopenharmony_ci
362462306a36Sopenharmony_ci	/* Paired with the prior READ_ONCE() and with itself,
362562306a36Sopenharmony_ci	 * as we might be lockless.
362662306a36Sopenharmony_ci	 */
362762306a36Sopenharmony_ci	WRITE_ONCE(*last_oow_ack_time, tcp_jiffies32);
362862306a36Sopenharmony_ci
362962306a36Sopenharmony_ci	return false;	/* not rate-limited: go ahead, send dupack now! */
363062306a36Sopenharmony_ci}
363162306a36Sopenharmony_ci
363262306a36Sopenharmony_ci/* Return true if we're currently rate-limiting out-of-window ACKs and
363362306a36Sopenharmony_ci * thus shouldn't send a dupack right now. We rate-limit dupacks in
363462306a36Sopenharmony_ci * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
363562306a36Sopenharmony_ci * attacks that send repeated SYNs or ACKs for the same connection. To
363662306a36Sopenharmony_ci * do this, we do not send a duplicate SYNACK or ACK if the remote
363762306a36Sopenharmony_ci * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
363862306a36Sopenharmony_ci */
363962306a36Sopenharmony_cibool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
364062306a36Sopenharmony_ci			  int mib_idx, u32 *last_oow_ack_time)
364162306a36Sopenharmony_ci{
364262306a36Sopenharmony_ci	/* Data packets without SYNs are not likely part of an ACK loop. */
364362306a36Sopenharmony_ci	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
364462306a36Sopenharmony_ci	    !tcp_hdr(skb)->syn)
364562306a36Sopenharmony_ci		return false;
364662306a36Sopenharmony_ci
364762306a36Sopenharmony_ci	return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
364862306a36Sopenharmony_ci}
364962306a36Sopenharmony_ci
365062306a36Sopenharmony_ci/* RFC 5961 7 [ACK Throttling] */
365162306a36Sopenharmony_cistatic void tcp_send_challenge_ack(struct sock *sk)
365262306a36Sopenharmony_ci{
365362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
365462306a36Sopenharmony_ci	struct net *net = sock_net(sk);
365562306a36Sopenharmony_ci	u32 count, now, ack_limit;
365662306a36Sopenharmony_ci
365762306a36Sopenharmony_ci	/* First check our per-socket dupack rate limit. */
365862306a36Sopenharmony_ci	if (__tcp_oow_rate_limited(net,
365962306a36Sopenharmony_ci				   LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
366062306a36Sopenharmony_ci				   &tp->last_oow_ack_time))
366162306a36Sopenharmony_ci		return;
366262306a36Sopenharmony_ci
366362306a36Sopenharmony_ci	ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
366462306a36Sopenharmony_ci	if (ack_limit == INT_MAX)
366562306a36Sopenharmony_ci		goto send_ack;
366662306a36Sopenharmony_ci
366762306a36Sopenharmony_ci	/* Then check host-wide RFC 5961 rate limit. */
366862306a36Sopenharmony_ci	now = jiffies / HZ;
366962306a36Sopenharmony_ci	if (now != READ_ONCE(net->ipv4.tcp_challenge_timestamp)) {
367062306a36Sopenharmony_ci		u32 half = (ack_limit + 1) >> 1;
367162306a36Sopenharmony_ci
367262306a36Sopenharmony_ci		WRITE_ONCE(net->ipv4.tcp_challenge_timestamp, now);
367362306a36Sopenharmony_ci		WRITE_ONCE(net->ipv4.tcp_challenge_count,
367462306a36Sopenharmony_ci			   get_random_u32_inclusive(half, ack_limit + half - 1));
367562306a36Sopenharmony_ci	}
367662306a36Sopenharmony_ci	count = READ_ONCE(net->ipv4.tcp_challenge_count);
367762306a36Sopenharmony_ci	if (count > 0) {
367862306a36Sopenharmony_ci		WRITE_ONCE(net->ipv4.tcp_challenge_count, count - 1);
367962306a36Sopenharmony_cisend_ack:
368062306a36Sopenharmony_ci		NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
368162306a36Sopenharmony_ci		tcp_send_ack(sk);
368262306a36Sopenharmony_ci	}
368362306a36Sopenharmony_ci}
368462306a36Sopenharmony_ci
368562306a36Sopenharmony_cistatic void tcp_store_ts_recent(struct tcp_sock *tp)
368662306a36Sopenharmony_ci{
368762306a36Sopenharmony_ci	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
368862306a36Sopenharmony_ci	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
368962306a36Sopenharmony_ci}
369062306a36Sopenharmony_ci
369162306a36Sopenharmony_cistatic void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
369262306a36Sopenharmony_ci{
369362306a36Sopenharmony_ci	if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
369462306a36Sopenharmony_ci		/* PAWS bug workaround wrt. ACK frames, the PAWS discard
369562306a36Sopenharmony_ci		 * extra check below makes sure this can only happen
369662306a36Sopenharmony_ci		 * for pure ACK frames.  -DaveM
369762306a36Sopenharmony_ci		 *
369862306a36Sopenharmony_ci		 * Not only, also it occurs for expired timestamps.
369962306a36Sopenharmony_ci		 */
370062306a36Sopenharmony_ci
370162306a36Sopenharmony_ci		if (tcp_paws_check(&tp->rx_opt, 0))
370262306a36Sopenharmony_ci			tcp_store_ts_recent(tp);
370362306a36Sopenharmony_ci	}
370462306a36Sopenharmony_ci}
370562306a36Sopenharmony_ci
370662306a36Sopenharmony_ci/* This routine deals with acks during a TLP episode and ends an episode by
370762306a36Sopenharmony_ci * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
370862306a36Sopenharmony_ci */
370962306a36Sopenharmony_cistatic void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
371062306a36Sopenharmony_ci{
371162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
371262306a36Sopenharmony_ci
371362306a36Sopenharmony_ci	if (before(ack, tp->tlp_high_seq))
371462306a36Sopenharmony_ci		return;
371562306a36Sopenharmony_ci
371662306a36Sopenharmony_ci	if (!tp->tlp_retrans) {
371762306a36Sopenharmony_ci		/* TLP of new data has been acknowledged */
371862306a36Sopenharmony_ci		tp->tlp_high_seq = 0;
371962306a36Sopenharmony_ci	} else if (flag & FLAG_DSACK_TLP) {
372062306a36Sopenharmony_ci		/* This DSACK means original and TLP probe arrived; no loss */
372162306a36Sopenharmony_ci		tp->tlp_high_seq = 0;
372262306a36Sopenharmony_ci	} else if (after(ack, tp->tlp_high_seq)) {
372362306a36Sopenharmony_ci		/* ACK advances: there was a loss, so reduce cwnd. Reset
372462306a36Sopenharmony_ci		 * tlp_high_seq in tcp_init_cwnd_reduction()
372562306a36Sopenharmony_ci		 */
372662306a36Sopenharmony_ci		tcp_init_cwnd_reduction(sk);
372762306a36Sopenharmony_ci		tcp_set_ca_state(sk, TCP_CA_CWR);
372862306a36Sopenharmony_ci		tcp_end_cwnd_reduction(sk);
372962306a36Sopenharmony_ci		tcp_try_keep_open(sk);
373062306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk),
373162306a36Sopenharmony_ci				LINUX_MIB_TCPLOSSPROBERECOVERY);
373262306a36Sopenharmony_ci	} else if (!(flag & (FLAG_SND_UNA_ADVANCED |
373362306a36Sopenharmony_ci			     FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
373462306a36Sopenharmony_ci		/* Pure dupack: original and TLP probe arrived; no loss */
373562306a36Sopenharmony_ci		tp->tlp_high_seq = 0;
373662306a36Sopenharmony_ci	}
373762306a36Sopenharmony_ci}
373862306a36Sopenharmony_ci
373962306a36Sopenharmony_cistatic inline void tcp_in_ack_event(struct sock *sk, u32 flags)
374062306a36Sopenharmony_ci{
374162306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
374262306a36Sopenharmony_ci
374362306a36Sopenharmony_ci	if (icsk->icsk_ca_ops->in_ack_event)
374462306a36Sopenharmony_ci		icsk->icsk_ca_ops->in_ack_event(sk, flags);
374562306a36Sopenharmony_ci}
374662306a36Sopenharmony_ci
374762306a36Sopenharmony_ci/* Congestion control has updated the cwnd already. So if we're in
374862306a36Sopenharmony_ci * loss recovery then now we do any new sends (for FRTO) or
374962306a36Sopenharmony_ci * retransmits (for CA_Loss or CA_recovery) that make sense.
375062306a36Sopenharmony_ci */
375162306a36Sopenharmony_cistatic void tcp_xmit_recovery(struct sock *sk, int rexmit)
375262306a36Sopenharmony_ci{
375362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
375462306a36Sopenharmony_ci
375562306a36Sopenharmony_ci	if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
375662306a36Sopenharmony_ci		return;
375762306a36Sopenharmony_ci
375862306a36Sopenharmony_ci	if (unlikely(rexmit == REXMIT_NEW)) {
375962306a36Sopenharmony_ci		__tcp_push_pending_frames(sk, tcp_current_mss(sk),
376062306a36Sopenharmony_ci					  TCP_NAGLE_OFF);
376162306a36Sopenharmony_ci		if (after(tp->snd_nxt, tp->high_seq))
376262306a36Sopenharmony_ci			return;
376362306a36Sopenharmony_ci		tp->frto = 0;
376462306a36Sopenharmony_ci	}
376562306a36Sopenharmony_ci	tcp_xmit_retransmit_queue(sk);
376662306a36Sopenharmony_ci}
376762306a36Sopenharmony_ci
376862306a36Sopenharmony_ci/* Returns the number of packets newly acked or sacked by the current ACK */
376962306a36Sopenharmony_cistatic u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
377062306a36Sopenharmony_ci{
377162306a36Sopenharmony_ci	const struct net *net = sock_net(sk);
377262306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
377362306a36Sopenharmony_ci	u32 delivered;
377462306a36Sopenharmony_ci
377562306a36Sopenharmony_ci	delivered = tp->delivered - prior_delivered;
377662306a36Sopenharmony_ci	NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
377762306a36Sopenharmony_ci	if (flag & FLAG_ECE)
377862306a36Sopenharmony_ci		NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
377962306a36Sopenharmony_ci
378062306a36Sopenharmony_ci	return delivered;
378162306a36Sopenharmony_ci}
378262306a36Sopenharmony_ci
378362306a36Sopenharmony_ci/* This routine deals with incoming acks, but not outgoing ones. */
378462306a36Sopenharmony_cistatic int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
378562306a36Sopenharmony_ci{
378662306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
378762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
378862306a36Sopenharmony_ci	struct tcp_sacktag_state sack_state;
378962306a36Sopenharmony_ci	struct rate_sample rs = { .prior_delivered = 0 };
379062306a36Sopenharmony_ci	u32 prior_snd_una = tp->snd_una;
379162306a36Sopenharmony_ci	bool is_sack_reneg = tp->is_sack_reneg;
379262306a36Sopenharmony_ci	u32 ack_seq = TCP_SKB_CB(skb)->seq;
379362306a36Sopenharmony_ci	u32 ack = TCP_SKB_CB(skb)->ack_seq;
379462306a36Sopenharmony_ci	int num_dupack = 0;
379562306a36Sopenharmony_ci	int prior_packets = tp->packets_out;
379662306a36Sopenharmony_ci	u32 delivered = tp->delivered;
379762306a36Sopenharmony_ci	u32 lost = tp->lost;
379862306a36Sopenharmony_ci	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
379962306a36Sopenharmony_ci	u32 prior_fack;
380062306a36Sopenharmony_ci
380162306a36Sopenharmony_ci	sack_state.first_sackt = 0;
380262306a36Sopenharmony_ci	sack_state.rate = &rs;
380362306a36Sopenharmony_ci	sack_state.sack_delivered = 0;
380462306a36Sopenharmony_ci
380562306a36Sopenharmony_ci	/* We very likely will need to access rtx queue. */
380662306a36Sopenharmony_ci	prefetch(sk->tcp_rtx_queue.rb_node);
380762306a36Sopenharmony_ci
380862306a36Sopenharmony_ci	/* If the ack is older than previous acks
380962306a36Sopenharmony_ci	 * then we can probably ignore it.
381062306a36Sopenharmony_ci	 */
381162306a36Sopenharmony_ci	if (before(ack, prior_snd_una)) {
381262306a36Sopenharmony_ci		u32 max_window;
381362306a36Sopenharmony_ci
381462306a36Sopenharmony_ci		/* do not accept ACK for bytes we never sent. */
381562306a36Sopenharmony_ci		max_window = min_t(u64, tp->max_window, tp->bytes_acked);
381662306a36Sopenharmony_ci		/* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
381762306a36Sopenharmony_ci		if (before(ack, prior_snd_una - max_window)) {
381862306a36Sopenharmony_ci			if (!(flag & FLAG_NO_CHALLENGE_ACK))
381962306a36Sopenharmony_ci				tcp_send_challenge_ack(sk);
382062306a36Sopenharmony_ci			return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
382162306a36Sopenharmony_ci		}
382262306a36Sopenharmony_ci		goto old_ack;
382362306a36Sopenharmony_ci	}
382462306a36Sopenharmony_ci
382562306a36Sopenharmony_ci	/* If the ack includes data we haven't sent yet, discard
382662306a36Sopenharmony_ci	 * this segment (RFC793 Section 3.9).
382762306a36Sopenharmony_ci	 */
382862306a36Sopenharmony_ci	if (after(ack, tp->snd_nxt))
382962306a36Sopenharmony_ci		return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA;
383062306a36Sopenharmony_ci
383162306a36Sopenharmony_ci	if (after(ack, prior_snd_una)) {
383262306a36Sopenharmony_ci		flag |= FLAG_SND_UNA_ADVANCED;
383362306a36Sopenharmony_ci		icsk->icsk_retransmits = 0;
383462306a36Sopenharmony_ci
383562306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_TLS_DEVICE)
383662306a36Sopenharmony_ci		if (static_branch_unlikely(&clean_acked_data_enabled.key))
383762306a36Sopenharmony_ci			if (icsk->icsk_clean_acked)
383862306a36Sopenharmony_ci				icsk->icsk_clean_acked(sk, ack);
383962306a36Sopenharmony_ci#endif
384062306a36Sopenharmony_ci	}
384162306a36Sopenharmony_ci
384262306a36Sopenharmony_ci	prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
384362306a36Sopenharmony_ci	rs.prior_in_flight = tcp_packets_in_flight(tp);
384462306a36Sopenharmony_ci
384562306a36Sopenharmony_ci	/* ts_recent update must be made after we are sure that the packet
384662306a36Sopenharmony_ci	 * is in window.
384762306a36Sopenharmony_ci	 */
384862306a36Sopenharmony_ci	if (flag & FLAG_UPDATE_TS_RECENT)
384962306a36Sopenharmony_ci		tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
385062306a36Sopenharmony_ci
385162306a36Sopenharmony_ci	if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
385262306a36Sopenharmony_ci	    FLAG_SND_UNA_ADVANCED) {
385362306a36Sopenharmony_ci		/* Window is constant, pure forward advance.
385462306a36Sopenharmony_ci		 * No more checks are required.
385562306a36Sopenharmony_ci		 * Note, we use the fact that SND.UNA>=SND.WL2.
385662306a36Sopenharmony_ci		 */
385762306a36Sopenharmony_ci		tcp_update_wl(tp, ack_seq);
385862306a36Sopenharmony_ci		tcp_snd_una_update(tp, ack);
385962306a36Sopenharmony_ci		flag |= FLAG_WIN_UPDATE;
386062306a36Sopenharmony_ci
386162306a36Sopenharmony_ci		tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
386262306a36Sopenharmony_ci
386362306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
386462306a36Sopenharmony_ci	} else {
386562306a36Sopenharmony_ci		u32 ack_ev_flags = CA_ACK_SLOWPATH;
386662306a36Sopenharmony_ci
386762306a36Sopenharmony_ci		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
386862306a36Sopenharmony_ci			flag |= FLAG_DATA;
386962306a36Sopenharmony_ci		else
387062306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
387162306a36Sopenharmony_ci
387262306a36Sopenharmony_ci		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
387362306a36Sopenharmony_ci
387462306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->sacked)
387562306a36Sopenharmony_ci			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
387662306a36Sopenharmony_ci							&sack_state);
387762306a36Sopenharmony_ci
387862306a36Sopenharmony_ci		if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
387962306a36Sopenharmony_ci			flag |= FLAG_ECE;
388062306a36Sopenharmony_ci			ack_ev_flags |= CA_ACK_ECE;
388162306a36Sopenharmony_ci		}
388262306a36Sopenharmony_ci
388362306a36Sopenharmony_ci		if (sack_state.sack_delivered)
388462306a36Sopenharmony_ci			tcp_count_delivered(tp, sack_state.sack_delivered,
388562306a36Sopenharmony_ci					    flag & FLAG_ECE);
388662306a36Sopenharmony_ci
388762306a36Sopenharmony_ci		if (flag & FLAG_WIN_UPDATE)
388862306a36Sopenharmony_ci			ack_ev_flags |= CA_ACK_WIN_UPDATE;
388962306a36Sopenharmony_ci
389062306a36Sopenharmony_ci		tcp_in_ack_event(sk, ack_ev_flags);
389162306a36Sopenharmony_ci	}
389262306a36Sopenharmony_ci
389362306a36Sopenharmony_ci	/* This is a deviation from RFC3168 since it states that:
389462306a36Sopenharmony_ci	 * "When the TCP data sender is ready to set the CWR bit after reducing
389562306a36Sopenharmony_ci	 * the congestion window, it SHOULD set the CWR bit only on the first
389662306a36Sopenharmony_ci	 * new data packet that it transmits."
389762306a36Sopenharmony_ci	 * We accept CWR on pure ACKs to be more robust
389862306a36Sopenharmony_ci	 * with widely-deployed TCP implementations that do this.
389962306a36Sopenharmony_ci	 */
390062306a36Sopenharmony_ci	tcp_ecn_accept_cwr(sk, skb);
390162306a36Sopenharmony_ci
390262306a36Sopenharmony_ci	/* We passed data and got it acked, remove any soft error
390362306a36Sopenharmony_ci	 * log. Something worked...
390462306a36Sopenharmony_ci	 */
390562306a36Sopenharmony_ci	WRITE_ONCE(sk->sk_err_soft, 0);
390662306a36Sopenharmony_ci	icsk->icsk_probes_out = 0;
390762306a36Sopenharmony_ci	tp->rcv_tstamp = tcp_jiffies32;
390862306a36Sopenharmony_ci	if (!prior_packets)
390962306a36Sopenharmony_ci		goto no_queue;
391062306a36Sopenharmony_ci
391162306a36Sopenharmony_ci	/* See if we can take anything off of the retransmit queue. */
391262306a36Sopenharmony_ci	flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una,
391362306a36Sopenharmony_ci				    &sack_state, flag & FLAG_ECE);
391462306a36Sopenharmony_ci
391562306a36Sopenharmony_ci	tcp_rack_update_reo_wnd(sk, &rs);
391662306a36Sopenharmony_ci
391762306a36Sopenharmony_ci	if (tp->tlp_high_seq)
391862306a36Sopenharmony_ci		tcp_process_tlp_ack(sk, ack, flag);
391962306a36Sopenharmony_ci
392062306a36Sopenharmony_ci	if (tcp_ack_is_dubious(sk, flag)) {
392162306a36Sopenharmony_ci		if (!(flag & (FLAG_SND_UNA_ADVANCED |
392262306a36Sopenharmony_ci			      FLAG_NOT_DUP | FLAG_DSACKING_ACK))) {
392362306a36Sopenharmony_ci			num_dupack = 1;
392462306a36Sopenharmony_ci			/* Consider if pure acks were aggregated in tcp_add_backlog() */
392562306a36Sopenharmony_ci			if (!(flag & FLAG_DATA))
392662306a36Sopenharmony_ci				num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
392762306a36Sopenharmony_ci		}
392862306a36Sopenharmony_ci		tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
392962306a36Sopenharmony_ci				      &rexmit);
393062306a36Sopenharmony_ci	}
393162306a36Sopenharmony_ci
393262306a36Sopenharmony_ci	/* If needed, reset TLP/RTO timer when RACK doesn't set. */
393362306a36Sopenharmony_ci	if (flag & FLAG_SET_XMIT_TIMER)
393462306a36Sopenharmony_ci		tcp_set_xmit_timer(sk);
393562306a36Sopenharmony_ci
393662306a36Sopenharmony_ci	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
393762306a36Sopenharmony_ci		sk_dst_confirm(sk);
393862306a36Sopenharmony_ci
393962306a36Sopenharmony_ci	delivered = tcp_newly_delivered(sk, delivered, flag);
394062306a36Sopenharmony_ci	lost = tp->lost - lost;			/* freshly marked lost */
394162306a36Sopenharmony_ci	rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
394262306a36Sopenharmony_ci	tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
394362306a36Sopenharmony_ci	tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
394462306a36Sopenharmony_ci	tcp_xmit_recovery(sk, rexmit);
394562306a36Sopenharmony_ci	return 1;
394662306a36Sopenharmony_ci
394762306a36Sopenharmony_cino_queue:
394862306a36Sopenharmony_ci	/* If data was DSACKed, see if we can undo a cwnd reduction. */
394962306a36Sopenharmony_ci	if (flag & FLAG_DSACKING_ACK) {
395062306a36Sopenharmony_ci		tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
395162306a36Sopenharmony_ci				      &rexmit);
395262306a36Sopenharmony_ci		tcp_newly_delivered(sk, delivered, flag);
395362306a36Sopenharmony_ci	}
395462306a36Sopenharmony_ci	/* If this ack opens up a zero window, clear backoff.  It was
395562306a36Sopenharmony_ci	 * being used to time the probes, and is probably far higher than
395662306a36Sopenharmony_ci	 * it needs to be for normal retransmission.
395762306a36Sopenharmony_ci	 */
395862306a36Sopenharmony_ci	tcp_ack_probe(sk);
395962306a36Sopenharmony_ci
396062306a36Sopenharmony_ci	if (tp->tlp_high_seq)
396162306a36Sopenharmony_ci		tcp_process_tlp_ack(sk, ack, flag);
396262306a36Sopenharmony_ci	return 1;
396362306a36Sopenharmony_ci
396462306a36Sopenharmony_ciold_ack:
396562306a36Sopenharmony_ci	/* If data was SACKed, tag it and see if we should send more data.
396662306a36Sopenharmony_ci	 * If data was DSACKed, see if we can undo a cwnd reduction.
396762306a36Sopenharmony_ci	 */
396862306a36Sopenharmony_ci	if (TCP_SKB_CB(skb)->sacked) {
396962306a36Sopenharmony_ci		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
397062306a36Sopenharmony_ci						&sack_state);
397162306a36Sopenharmony_ci		tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
397262306a36Sopenharmony_ci				      &rexmit);
397362306a36Sopenharmony_ci		tcp_newly_delivered(sk, delivered, flag);
397462306a36Sopenharmony_ci		tcp_xmit_recovery(sk, rexmit);
397562306a36Sopenharmony_ci	}
397662306a36Sopenharmony_ci
397762306a36Sopenharmony_ci	return 0;
397862306a36Sopenharmony_ci}
397962306a36Sopenharmony_ci
398062306a36Sopenharmony_cistatic void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
398162306a36Sopenharmony_ci				      bool syn, struct tcp_fastopen_cookie *foc,
398262306a36Sopenharmony_ci				      bool exp_opt)
398362306a36Sopenharmony_ci{
398462306a36Sopenharmony_ci	/* Valid only in SYN or SYN-ACK with an even length.  */
398562306a36Sopenharmony_ci	if (!foc || !syn || len < 0 || (len & 1))
398662306a36Sopenharmony_ci		return;
398762306a36Sopenharmony_ci
398862306a36Sopenharmony_ci	if (len >= TCP_FASTOPEN_COOKIE_MIN &&
398962306a36Sopenharmony_ci	    len <= TCP_FASTOPEN_COOKIE_MAX)
399062306a36Sopenharmony_ci		memcpy(foc->val, cookie, len);
399162306a36Sopenharmony_ci	else if (len != 0)
399262306a36Sopenharmony_ci		len = -1;
399362306a36Sopenharmony_ci	foc->len = len;
399462306a36Sopenharmony_ci	foc->exp = exp_opt;
399562306a36Sopenharmony_ci}
399662306a36Sopenharmony_ci
399762306a36Sopenharmony_cistatic bool smc_parse_options(const struct tcphdr *th,
399862306a36Sopenharmony_ci			      struct tcp_options_received *opt_rx,
399962306a36Sopenharmony_ci			      const unsigned char *ptr,
400062306a36Sopenharmony_ci			      int opsize)
400162306a36Sopenharmony_ci{
400262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_SMC)
400362306a36Sopenharmony_ci	if (static_branch_unlikely(&tcp_have_smc)) {
400462306a36Sopenharmony_ci		if (th->syn && !(opsize & 1) &&
400562306a36Sopenharmony_ci		    opsize >= TCPOLEN_EXP_SMC_BASE &&
400662306a36Sopenharmony_ci		    get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
400762306a36Sopenharmony_ci			opt_rx->smc_ok = 1;
400862306a36Sopenharmony_ci			return true;
400962306a36Sopenharmony_ci		}
401062306a36Sopenharmony_ci	}
401162306a36Sopenharmony_ci#endif
401262306a36Sopenharmony_ci	return false;
401362306a36Sopenharmony_ci}
401462306a36Sopenharmony_ci
401562306a36Sopenharmony_ci/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
401662306a36Sopenharmony_ci * value on success.
401762306a36Sopenharmony_ci */
401862306a36Sopenharmony_ciu16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
401962306a36Sopenharmony_ci{
402062306a36Sopenharmony_ci	const unsigned char *ptr = (const unsigned char *)(th + 1);
402162306a36Sopenharmony_ci	int length = (th->doff * 4) - sizeof(struct tcphdr);
402262306a36Sopenharmony_ci	u16 mss = 0;
402362306a36Sopenharmony_ci
402462306a36Sopenharmony_ci	while (length > 0) {
402562306a36Sopenharmony_ci		int opcode = *ptr++;
402662306a36Sopenharmony_ci		int opsize;
402762306a36Sopenharmony_ci
402862306a36Sopenharmony_ci		switch (opcode) {
402962306a36Sopenharmony_ci		case TCPOPT_EOL:
403062306a36Sopenharmony_ci			return mss;
403162306a36Sopenharmony_ci		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
403262306a36Sopenharmony_ci			length--;
403362306a36Sopenharmony_ci			continue;
403462306a36Sopenharmony_ci		default:
403562306a36Sopenharmony_ci			if (length < 2)
403662306a36Sopenharmony_ci				return mss;
403762306a36Sopenharmony_ci			opsize = *ptr++;
403862306a36Sopenharmony_ci			if (opsize < 2) /* "silly options" */
403962306a36Sopenharmony_ci				return mss;
404062306a36Sopenharmony_ci			if (opsize > length)
404162306a36Sopenharmony_ci				return mss;	/* fail on partial options */
404262306a36Sopenharmony_ci			if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
404362306a36Sopenharmony_ci				u16 in_mss = get_unaligned_be16(ptr);
404462306a36Sopenharmony_ci
404562306a36Sopenharmony_ci				if (in_mss) {
404662306a36Sopenharmony_ci					if (user_mss && user_mss < in_mss)
404762306a36Sopenharmony_ci						in_mss = user_mss;
404862306a36Sopenharmony_ci					mss = in_mss;
404962306a36Sopenharmony_ci				}
405062306a36Sopenharmony_ci			}
405162306a36Sopenharmony_ci			ptr += opsize - 2;
405262306a36Sopenharmony_ci			length -= opsize;
405362306a36Sopenharmony_ci		}
405462306a36Sopenharmony_ci	}
405562306a36Sopenharmony_ci	return mss;
405662306a36Sopenharmony_ci}
405762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_parse_mss_option);
405862306a36Sopenharmony_ci
405962306a36Sopenharmony_ci/* Look for tcp options. Normally only called on SYN and SYNACK packets.
406062306a36Sopenharmony_ci * But, this can also be called on packets in the established flow when
406162306a36Sopenharmony_ci * the fast version below fails.
406262306a36Sopenharmony_ci */
406362306a36Sopenharmony_civoid tcp_parse_options(const struct net *net,
406462306a36Sopenharmony_ci		       const struct sk_buff *skb,
406562306a36Sopenharmony_ci		       struct tcp_options_received *opt_rx, int estab,
406662306a36Sopenharmony_ci		       struct tcp_fastopen_cookie *foc)
406762306a36Sopenharmony_ci{
406862306a36Sopenharmony_ci	const unsigned char *ptr;
406962306a36Sopenharmony_ci	const struct tcphdr *th = tcp_hdr(skb);
407062306a36Sopenharmony_ci	int length = (th->doff * 4) - sizeof(struct tcphdr);
407162306a36Sopenharmony_ci
407262306a36Sopenharmony_ci	ptr = (const unsigned char *)(th + 1);
407362306a36Sopenharmony_ci	opt_rx->saw_tstamp = 0;
407462306a36Sopenharmony_ci	opt_rx->saw_unknown = 0;
407562306a36Sopenharmony_ci
407662306a36Sopenharmony_ci	while (length > 0) {
407762306a36Sopenharmony_ci		int opcode = *ptr++;
407862306a36Sopenharmony_ci		int opsize;
407962306a36Sopenharmony_ci
408062306a36Sopenharmony_ci		switch (opcode) {
408162306a36Sopenharmony_ci		case TCPOPT_EOL:
408262306a36Sopenharmony_ci			return;
408362306a36Sopenharmony_ci		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
408462306a36Sopenharmony_ci			length--;
408562306a36Sopenharmony_ci			continue;
408662306a36Sopenharmony_ci		default:
408762306a36Sopenharmony_ci			if (length < 2)
408862306a36Sopenharmony_ci				return;
408962306a36Sopenharmony_ci			opsize = *ptr++;
409062306a36Sopenharmony_ci			if (opsize < 2) /* "silly options" */
409162306a36Sopenharmony_ci				return;
409262306a36Sopenharmony_ci			if (opsize > length)
409362306a36Sopenharmony_ci				return;	/* don't parse partial options */
409462306a36Sopenharmony_ci			switch (opcode) {
409562306a36Sopenharmony_ci			case TCPOPT_MSS:
409662306a36Sopenharmony_ci				if (opsize == TCPOLEN_MSS && th->syn && !estab) {
409762306a36Sopenharmony_ci					u16 in_mss = get_unaligned_be16(ptr);
409862306a36Sopenharmony_ci					if (in_mss) {
409962306a36Sopenharmony_ci						if (opt_rx->user_mss &&
410062306a36Sopenharmony_ci						    opt_rx->user_mss < in_mss)
410162306a36Sopenharmony_ci							in_mss = opt_rx->user_mss;
410262306a36Sopenharmony_ci						opt_rx->mss_clamp = in_mss;
410362306a36Sopenharmony_ci					}
410462306a36Sopenharmony_ci				}
410562306a36Sopenharmony_ci				break;
410662306a36Sopenharmony_ci			case TCPOPT_WINDOW:
410762306a36Sopenharmony_ci				if (opsize == TCPOLEN_WINDOW && th->syn &&
410862306a36Sopenharmony_ci				    !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
410962306a36Sopenharmony_ci					__u8 snd_wscale = *(__u8 *)ptr;
411062306a36Sopenharmony_ci					opt_rx->wscale_ok = 1;
411162306a36Sopenharmony_ci					if (snd_wscale > TCP_MAX_WSCALE) {
411262306a36Sopenharmony_ci						net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
411362306a36Sopenharmony_ci								     __func__,
411462306a36Sopenharmony_ci								     snd_wscale,
411562306a36Sopenharmony_ci								     TCP_MAX_WSCALE);
411662306a36Sopenharmony_ci						snd_wscale = TCP_MAX_WSCALE;
411762306a36Sopenharmony_ci					}
411862306a36Sopenharmony_ci					opt_rx->snd_wscale = snd_wscale;
411962306a36Sopenharmony_ci				}
412062306a36Sopenharmony_ci				break;
412162306a36Sopenharmony_ci			case TCPOPT_TIMESTAMP:
412262306a36Sopenharmony_ci				if ((opsize == TCPOLEN_TIMESTAMP) &&
412362306a36Sopenharmony_ci				    ((estab && opt_rx->tstamp_ok) ||
412462306a36Sopenharmony_ci				     (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
412562306a36Sopenharmony_ci					opt_rx->saw_tstamp = 1;
412662306a36Sopenharmony_ci					opt_rx->rcv_tsval = get_unaligned_be32(ptr);
412762306a36Sopenharmony_ci					opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
412862306a36Sopenharmony_ci				}
412962306a36Sopenharmony_ci				break;
413062306a36Sopenharmony_ci			case TCPOPT_SACK_PERM:
413162306a36Sopenharmony_ci				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
413262306a36Sopenharmony_ci				    !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
413362306a36Sopenharmony_ci					opt_rx->sack_ok = TCP_SACK_SEEN;
413462306a36Sopenharmony_ci					tcp_sack_reset(opt_rx);
413562306a36Sopenharmony_ci				}
413662306a36Sopenharmony_ci				break;
413762306a36Sopenharmony_ci
413862306a36Sopenharmony_ci			case TCPOPT_SACK:
413962306a36Sopenharmony_ci				if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
414062306a36Sopenharmony_ci				   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
414162306a36Sopenharmony_ci				   opt_rx->sack_ok) {
414262306a36Sopenharmony_ci					TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
414362306a36Sopenharmony_ci				}
414462306a36Sopenharmony_ci				break;
414562306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
414662306a36Sopenharmony_ci			case TCPOPT_MD5SIG:
414762306a36Sopenharmony_ci				/* The MD5 Hash has already been
414862306a36Sopenharmony_ci				 * checked (see tcp_v{4,6}_rcv()).
414962306a36Sopenharmony_ci				 */
415062306a36Sopenharmony_ci				break;
415162306a36Sopenharmony_ci#endif
415262306a36Sopenharmony_ci			case TCPOPT_FASTOPEN:
415362306a36Sopenharmony_ci				tcp_parse_fastopen_option(
415462306a36Sopenharmony_ci					opsize - TCPOLEN_FASTOPEN_BASE,
415562306a36Sopenharmony_ci					ptr, th->syn, foc, false);
415662306a36Sopenharmony_ci				break;
415762306a36Sopenharmony_ci
415862306a36Sopenharmony_ci			case TCPOPT_EXP:
415962306a36Sopenharmony_ci				/* Fast Open option shares code 254 using a
416062306a36Sopenharmony_ci				 * 16 bits magic number.
416162306a36Sopenharmony_ci				 */
416262306a36Sopenharmony_ci				if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
416362306a36Sopenharmony_ci				    get_unaligned_be16(ptr) ==
416462306a36Sopenharmony_ci				    TCPOPT_FASTOPEN_MAGIC) {
416562306a36Sopenharmony_ci					tcp_parse_fastopen_option(opsize -
416662306a36Sopenharmony_ci						TCPOLEN_EXP_FASTOPEN_BASE,
416762306a36Sopenharmony_ci						ptr + 2, th->syn, foc, true);
416862306a36Sopenharmony_ci					break;
416962306a36Sopenharmony_ci				}
417062306a36Sopenharmony_ci
417162306a36Sopenharmony_ci				if (smc_parse_options(th, opt_rx, ptr, opsize))
417262306a36Sopenharmony_ci					break;
417362306a36Sopenharmony_ci
417462306a36Sopenharmony_ci				opt_rx->saw_unknown = 1;
417562306a36Sopenharmony_ci				break;
417662306a36Sopenharmony_ci
417762306a36Sopenharmony_ci			default:
417862306a36Sopenharmony_ci				opt_rx->saw_unknown = 1;
417962306a36Sopenharmony_ci			}
418062306a36Sopenharmony_ci			ptr += opsize-2;
418162306a36Sopenharmony_ci			length -= opsize;
418262306a36Sopenharmony_ci		}
418362306a36Sopenharmony_ci	}
418462306a36Sopenharmony_ci}
418562306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_parse_options);
418662306a36Sopenharmony_ci
418762306a36Sopenharmony_cistatic bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
418862306a36Sopenharmony_ci{
418962306a36Sopenharmony_ci	const __be32 *ptr = (const __be32 *)(th + 1);
419062306a36Sopenharmony_ci
419162306a36Sopenharmony_ci	if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
419262306a36Sopenharmony_ci			  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
419362306a36Sopenharmony_ci		tp->rx_opt.saw_tstamp = 1;
419462306a36Sopenharmony_ci		++ptr;
419562306a36Sopenharmony_ci		tp->rx_opt.rcv_tsval = ntohl(*ptr);
419662306a36Sopenharmony_ci		++ptr;
419762306a36Sopenharmony_ci		if (*ptr)
419862306a36Sopenharmony_ci			tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
419962306a36Sopenharmony_ci		else
420062306a36Sopenharmony_ci			tp->rx_opt.rcv_tsecr = 0;
420162306a36Sopenharmony_ci		return true;
420262306a36Sopenharmony_ci	}
420362306a36Sopenharmony_ci	return false;
420462306a36Sopenharmony_ci}
420562306a36Sopenharmony_ci
420662306a36Sopenharmony_ci/* Fast parse options. This hopes to only see timestamps.
420762306a36Sopenharmony_ci * If it is wrong it falls back on tcp_parse_options().
420862306a36Sopenharmony_ci */
420962306a36Sopenharmony_cistatic bool tcp_fast_parse_options(const struct net *net,
421062306a36Sopenharmony_ci				   const struct sk_buff *skb,
421162306a36Sopenharmony_ci				   const struct tcphdr *th, struct tcp_sock *tp)
421262306a36Sopenharmony_ci{
421362306a36Sopenharmony_ci	/* In the spirit of fast parsing, compare doff directly to constant
421462306a36Sopenharmony_ci	 * values.  Because equality is used, short doff can be ignored here.
421562306a36Sopenharmony_ci	 */
421662306a36Sopenharmony_ci	if (th->doff == (sizeof(*th) / 4)) {
421762306a36Sopenharmony_ci		tp->rx_opt.saw_tstamp = 0;
421862306a36Sopenharmony_ci		return false;
421962306a36Sopenharmony_ci	} else if (tp->rx_opt.tstamp_ok &&
422062306a36Sopenharmony_ci		   th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
422162306a36Sopenharmony_ci		if (tcp_parse_aligned_timestamp(tp, th))
422262306a36Sopenharmony_ci			return true;
422362306a36Sopenharmony_ci	}
422462306a36Sopenharmony_ci
422562306a36Sopenharmony_ci	tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
422662306a36Sopenharmony_ci	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
422762306a36Sopenharmony_ci		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
422862306a36Sopenharmony_ci
422962306a36Sopenharmony_ci	return true;
423062306a36Sopenharmony_ci}
423162306a36Sopenharmony_ci
423262306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
423362306a36Sopenharmony_ci/*
423462306a36Sopenharmony_ci * Parse MD5 Signature option
423562306a36Sopenharmony_ci */
423662306a36Sopenharmony_ciconst u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
423762306a36Sopenharmony_ci{
423862306a36Sopenharmony_ci	int length = (th->doff << 2) - sizeof(*th);
423962306a36Sopenharmony_ci	const u8 *ptr = (const u8 *)(th + 1);
424062306a36Sopenharmony_ci
424162306a36Sopenharmony_ci	/* If not enough data remaining, we can short cut */
424262306a36Sopenharmony_ci	while (length >= TCPOLEN_MD5SIG) {
424362306a36Sopenharmony_ci		int opcode = *ptr++;
424462306a36Sopenharmony_ci		int opsize;
424562306a36Sopenharmony_ci
424662306a36Sopenharmony_ci		switch (opcode) {
424762306a36Sopenharmony_ci		case TCPOPT_EOL:
424862306a36Sopenharmony_ci			return NULL;
424962306a36Sopenharmony_ci		case TCPOPT_NOP:
425062306a36Sopenharmony_ci			length--;
425162306a36Sopenharmony_ci			continue;
425262306a36Sopenharmony_ci		default:
425362306a36Sopenharmony_ci			opsize = *ptr++;
425462306a36Sopenharmony_ci			if (opsize < 2 || opsize > length)
425562306a36Sopenharmony_ci				return NULL;
425662306a36Sopenharmony_ci			if (opcode == TCPOPT_MD5SIG)
425762306a36Sopenharmony_ci				return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
425862306a36Sopenharmony_ci		}
425962306a36Sopenharmony_ci		ptr += opsize - 2;
426062306a36Sopenharmony_ci		length -= opsize;
426162306a36Sopenharmony_ci	}
426262306a36Sopenharmony_ci	return NULL;
426362306a36Sopenharmony_ci}
426462306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_parse_md5sig_option);
426562306a36Sopenharmony_ci#endif
426662306a36Sopenharmony_ci
426762306a36Sopenharmony_ci/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
426862306a36Sopenharmony_ci *
426962306a36Sopenharmony_ci * It is not fatal. If this ACK does _not_ change critical state (seqs, window)
427062306a36Sopenharmony_ci * it can pass through stack. So, the following predicate verifies that
427162306a36Sopenharmony_ci * this segment is not used for anything but congestion avoidance or
427262306a36Sopenharmony_ci * fast retransmit. Moreover, we even are able to eliminate most of such
427362306a36Sopenharmony_ci * second order effects, if we apply some small "replay" window (~RTO)
427462306a36Sopenharmony_ci * to timestamp space.
427562306a36Sopenharmony_ci *
427662306a36Sopenharmony_ci * All these measures still do not guarantee that we reject wrapped ACKs
427762306a36Sopenharmony_ci * on networks with high bandwidth, when sequence space is recycled fastly,
427862306a36Sopenharmony_ci * but it guarantees that such events will be very rare and do not affect
427962306a36Sopenharmony_ci * connection seriously. This doesn't look nice, but alas, PAWS is really
428062306a36Sopenharmony_ci * buggy extension.
428162306a36Sopenharmony_ci *
428262306a36Sopenharmony_ci * [ Later note. Even worse! It is buggy for segments _with_ data. RFC
428362306a36Sopenharmony_ci * states that events when retransmit arrives after original data are rare.
428462306a36Sopenharmony_ci * It is a blatant lie. VJ forgot about fast retransmit! 8)8) It is
428562306a36Sopenharmony_ci * the biggest problem on large power networks even with minor reordering.
428662306a36Sopenharmony_ci * OK, let's give it small replay window. If peer clock is even 1hz, it is safe
428762306a36Sopenharmony_ci * up to bandwidth of 18Gigabit/sec. 8) ]
428862306a36Sopenharmony_ci */
428962306a36Sopenharmony_ci
429062306a36Sopenharmony_cistatic int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
429162306a36Sopenharmony_ci{
429262306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
429362306a36Sopenharmony_ci	const struct tcphdr *th = tcp_hdr(skb);
429462306a36Sopenharmony_ci	u32 seq = TCP_SKB_CB(skb)->seq;
429562306a36Sopenharmony_ci	u32 ack = TCP_SKB_CB(skb)->ack_seq;
429662306a36Sopenharmony_ci
429762306a36Sopenharmony_ci	return (/* 1. Pure ACK with correct sequence number. */
429862306a36Sopenharmony_ci		(th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
429962306a36Sopenharmony_ci
430062306a36Sopenharmony_ci		/* 2. ... and duplicate ACK. */
430162306a36Sopenharmony_ci		ack == tp->snd_una &&
430262306a36Sopenharmony_ci
430362306a36Sopenharmony_ci		/* 3. ... and does not update window. */
430462306a36Sopenharmony_ci		!tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
430562306a36Sopenharmony_ci
430662306a36Sopenharmony_ci		/* 4. ... and sits in replay window. */
430762306a36Sopenharmony_ci		(s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
430862306a36Sopenharmony_ci}
430962306a36Sopenharmony_ci
431062306a36Sopenharmony_cistatic inline bool tcp_paws_discard(const struct sock *sk,
431162306a36Sopenharmony_ci				   const struct sk_buff *skb)
431262306a36Sopenharmony_ci{
431362306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
431462306a36Sopenharmony_ci
431562306a36Sopenharmony_ci	return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
431662306a36Sopenharmony_ci	       !tcp_disordered_ack(sk, skb);
431762306a36Sopenharmony_ci}
431862306a36Sopenharmony_ci
431962306a36Sopenharmony_ci/* Check segment sequence number for validity.
432062306a36Sopenharmony_ci *
432162306a36Sopenharmony_ci * Segment controls are considered valid, if the segment
432262306a36Sopenharmony_ci * fits to the window after truncation to the window. Acceptability
432362306a36Sopenharmony_ci * of data (and SYN, FIN, of course) is checked separately.
432462306a36Sopenharmony_ci * See tcp_data_queue(), for example.
432562306a36Sopenharmony_ci *
432662306a36Sopenharmony_ci * Also, controls (RST is main one) are accepted using RCV.WUP instead
432762306a36Sopenharmony_ci * of RCV.NXT. Peer still did not advance his SND.UNA when we
432862306a36Sopenharmony_ci * delayed ACK, so that hisSND.UNA<=ourRCV.WUP.
432962306a36Sopenharmony_ci * (borrowed from freebsd)
433062306a36Sopenharmony_ci */
433162306a36Sopenharmony_ci
433262306a36Sopenharmony_cistatic enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
433362306a36Sopenharmony_ci					 u32 seq, u32 end_seq)
433462306a36Sopenharmony_ci{
433562306a36Sopenharmony_ci	if (before(end_seq, tp->rcv_wup))
433662306a36Sopenharmony_ci		return SKB_DROP_REASON_TCP_OLD_SEQUENCE;
433762306a36Sopenharmony_ci
433862306a36Sopenharmony_ci	if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
433962306a36Sopenharmony_ci		return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
434062306a36Sopenharmony_ci
434162306a36Sopenharmony_ci	return SKB_NOT_DROPPED_YET;
434262306a36Sopenharmony_ci}
434362306a36Sopenharmony_ci
434462306a36Sopenharmony_ci/* When we get a reset we do this. */
434562306a36Sopenharmony_civoid tcp_reset(struct sock *sk, struct sk_buff *skb)
434662306a36Sopenharmony_ci{
434762306a36Sopenharmony_ci	trace_tcp_receive_reset(sk);
434862306a36Sopenharmony_ci
434962306a36Sopenharmony_ci	/* mptcp can't tell us to ignore reset pkts,
435062306a36Sopenharmony_ci	 * so just ignore the return value of mptcp_incoming_options().
435162306a36Sopenharmony_ci	 */
435262306a36Sopenharmony_ci	if (sk_is_mptcp(sk))
435362306a36Sopenharmony_ci		mptcp_incoming_options(sk, skb);
435462306a36Sopenharmony_ci
435562306a36Sopenharmony_ci	/* We want the right error as BSD sees it (and indeed as we do). */
435662306a36Sopenharmony_ci	switch (sk->sk_state) {
435762306a36Sopenharmony_ci	case TCP_SYN_SENT:
435862306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_err, ECONNREFUSED);
435962306a36Sopenharmony_ci		break;
436062306a36Sopenharmony_ci	case TCP_CLOSE_WAIT:
436162306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_err, EPIPE);
436262306a36Sopenharmony_ci		break;
436362306a36Sopenharmony_ci	case TCP_CLOSE:
436462306a36Sopenharmony_ci		return;
436562306a36Sopenharmony_ci	default:
436662306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_err, ECONNRESET);
436762306a36Sopenharmony_ci	}
436862306a36Sopenharmony_ci	/* This barrier is coupled with smp_rmb() in tcp_poll() */
436962306a36Sopenharmony_ci	smp_wmb();
437062306a36Sopenharmony_ci
437162306a36Sopenharmony_ci	tcp_write_queue_purge(sk);
437262306a36Sopenharmony_ci	tcp_done(sk);
437362306a36Sopenharmony_ci
437462306a36Sopenharmony_ci	if (!sock_flag(sk, SOCK_DEAD))
437562306a36Sopenharmony_ci		sk_error_report(sk);
437662306a36Sopenharmony_ci}
437762306a36Sopenharmony_ci
437862306a36Sopenharmony_ci/*
437962306a36Sopenharmony_ci * 	Process the FIN bit. This now behaves as it is supposed to work
438062306a36Sopenharmony_ci *	and the FIN takes effect when it is validly part of sequence
438162306a36Sopenharmony_ci *	space. Not before when we get holes.
438262306a36Sopenharmony_ci *
438362306a36Sopenharmony_ci *	If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
438462306a36Sopenharmony_ci *	(and thence onto LAST-ACK and finally, CLOSE, we never enter
438562306a36Sopenharmony_ci *	TIME-WAIT)
438662306a36Sopenharmony_ci *
438762306a36Sopenharmony_ci *	If we are in FINWAIT-1, a received FIN indicates simultaneous
438862306a36Sopenharmony_ci *	close and we go into CLOSING (and later onto TIME-WAIT)
438962306a36Sopenharmony_ci *
439062306a36Sopenharmony_ci *	If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
439162306a36Sopenharmony_ci */
439262306a36Sopenharmony_civoid tcp_fin(struct sock *sk)
439362306a36Sopenharmony_ci{
439462306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
439562306a36Sopenharmony_ci
439662306a36Sopenharmony_ci	inet_csk_schedule_ack(sk);
439762306a36Sopenharmony_ci
439862306a36Sopenharmony_ci	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
439962306a36Sopenharmony_ci	sock_set_flag(sk, SOCK_DONE);
440062306a36Sopenharmony_ci
440162306a36Sopenharmony_ci	switch (sk->sk_state) {
440262306a36Sopenharmony_ci	case TCP_SYN_RECV:
440362306a36Sopenharmony_ci	case TCP_ESTABLISHED:
440462306a36Sopenharmony_ci		/* Move to CLOSE_WAIT */
440562306a36Sopenharmony_ci		tcp_set_state(sk, TCP_CLOSE_WAIT);
440662306a36Sopenharmony_ci		inet_csk_enter_pingpong_mode(sk);
440762306a36Sopenharmony_ci		break;
440862306a36Sopenharmony_ci
440962306a36Sopenharmony_ci	case TCP_CLOSE_WAIT:
441062306a36Sopenharmony_ci	case TCP_CLOSING:
441162306a36Sopenharmony_ci		/* Received a retransmission of the FIN, do
441262306a36Sopenharmony_ci		 * nothing.
441362306a36Sopenharmony_ci		 */
441462306a36Sopenharmony_ci		break;
441562306a36Sopenharmony_ci	case TCP_LAST_ACK:
441662306a36Sopenharmony_ci		/* RFC793: Remain in the LAST-ACK state. */
441762306a36Sopenharmony_ci		break;
441862306a36Sopenharmony_ci
441962306a36Sopenharmony_ci	case TCP_FIN_WAIT1:
442062306a36Sopenharmony_ci		/* This case occurs when a simultaneous close
442162306a36Sopenharmony_ci		 * happens, we must ack the received FIN and
442262306a36Sopenharmony_ci		 * enter the CLOSING state.
442362306a36Sopenharmony_ci		 */
442462306a36Sopenharmony_ci		tcp_send_ack(sk);
442562306a36Sopenharmony_ci		tcp_set_state(sk, TCP_CLOSING);
442662306a36Sopenharmony_ci		break;
442762306a36Sopenharmony_ci	case TCP_FIN_WAIT2:
442862306a36Sopenharmony_ci		/* Received a FIN -- send ACK and enter TIME_WAIT. */
442962306a36Sopenharmony_ci		tcp_send_ack(sk);
443062306a36Sopenharmony_ci		tcp_time_wait(sk, TCP_TIME_WAIT, 0);
443162306a36Sopenharmony_ci		break;
443262306a36Sopenharmony_ci	default:
443362306a36Sopenharmony_ci		/* Only TCP_LISTEN and TCP_CLOSE are left, in these
443462306a36Sopenharmony_ci		 * cases we should never reach this piece of code.
443562306a36Sopenharmony_ci		 */
443662306a36Sopenharmony_ci		pr_err("%s: Impossible, sk->sk_state=%d\n",
443762306a36Sopenharmony_ci		       __func__, sk->sk_state);
443862306a36Sopenharmony_ci		break;
443962306a36Sopenharmony_ci	}
444062306a36Sopenharmony_ci
444162306a36Sopenharmony_ci	/* It _is_ possible, that we have something out-of-order _after_ FIN.
444262306a36Sopenharmony_ci	 * Probably, we should reset in this case. For now drop them.
444362306a36Sopenharmony_ci	 */
444462306a36Sopenharmony_ci	skb_rbtree_purge(&tp->out_of_order_queue);
444562306a36Sopenharmony_ci	if (tcp_is_sack(tp))
444662306a36Sopenharmony_ci		tcp_sack_reset(&tp->rx_opt);
444762306a36Sopenharmony_ci
444862306a36Sopenharmony_ci	if (!sock_flag(sk, SOCK_DEAD)) {
444962306a36Sopenharmony_ci		sk->sk_state_change(sk);
445062306a36Sopenharmony_ci
445162306a36Sopenharmony_ci		/* Do not send POLL_HUP for half duplex close. */
445262306a36Sopenharmony_ci		if (sk->sk_shutdown == SHUTDOWN_MASK ||
445362306a36Sopenharmony_ci		    sk->sk_state == TCP_CLOSE)
445462306a36Sopenharmony_ci			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
445562306a36Sopenharmony_ci		else
445662306a36Sopenharmony_ci			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
445762306a36Sopenharmony_ci	}
445862306a36Sopenharmony_ci}
445962306a36Sopenharmony_ci
446062306a36Sopenharmony_cistatic inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
446162306a36Sopenharmony_ci				  u32 end_seq)
446262306a36Sopenharmony_ci{
446362306a36Sopenharmony_ci	if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
446462306a36Sopenharmony_ci		if (before(seq, sp->start_seq))
446562306a36Sopenharmony_ci			sp->start_seq = seq;
446662306a36Sopenharmony_ci		if (after(end_seq, sp->end_seq))
446762306a36Sopenharmony_ci			sp->end_seq = end_seq;
446862306a36Sopenharmony_ci		return true;
446962306a36Sopenharmony_ci	}
447062306a36Sopenharmony_ci	return false;
447162306a36Sopenharmony_ci}
447262306a36Sopenharmony_ci
447362306a36Sopenharmony_cistatic void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
447462306a36Sopenharmony_ci{
447562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
447662306a36Sopenharmony_ci
447762306a36Sopenharmony_ci	if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
447862306a36Sopenharmony_ci		int mib_idx;
447962306a36Sopenharmony_ci
448062306a36Sopenharmony_ci		if (before(seq, tp->rcv_nxt))
448162306a36Sopenharmony_ci			mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
448262306a36Sopenharmony_ci		else
448362306a36Sopenharmony_ci			mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
448462306a36Sopenharmony_ci
448562306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), mib_idx);
448662306a36Sopenharmony_ci
448762306a36Sopenharmony_ci		tp->rx_opt.dsack = 1;
448862306a36Sopenharmony_ci		tp->duplicate_sack[0].start_seq = seq;
448962306a36Sopenharmony_ci		tp->duplicate_sack[0].end_seq = end_seq;
449062306a36Sopenharmony_ci	}
449162306a36Sopenharmony_ci}
449262306a36Sopenharmony_ci
449362306a36Sopenharmony_cistatic void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
449462306a36Sopenharmony_ci{
449562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
449662306a36Sopenharmony_ci
449762306a36Sopenharmony_ci	if (!tp->rx_opt.dsack)
449862306a36Sopenharmony_ci		tcp_dsack_set(sk, seq, end_seq);
449962306a36Sopenharmony_ci	else
450062306a36Sopenharmony_ci		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
450162306a36Sopenharmony_ci}
450262306a36Sopenharmony_ci
450362306a36Sopenharmony_cistatic void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
450462306a36Sopenharmony_ci{
450562306a36Sopenharmony_ci	/* When the ACK path fails or drops most ACKs, the sender would
450662306a36Sopenharmony_ci	 * timeout and spuriously retransmit the same segment repeatedly.
450762306a36Sopenharmony_ci	 * The receiver remembers and reflects via DSACKs. Leverage the
450862306a36Sopenharmony_ci	 * DSACK state and change the txhash to re-route speculatively.
450962306a36Sopenharmony_ci	 */
451062306a36Sopenharmony_ci	if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
451162306a36Sopenharmony_ci	    sk_rethink_txhash(sk))
451262306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
451362306a36Sopenharmony_ci}
451462306a36Sopenharmony_ci
451562306a36Sopenharmony_cistatic void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
451662306a36Sopenharmony_ci{
451762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
451862306a36Sopenharmony_ci
451962306a36Sopenharmony_ci	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
452062306a36Sopenharmony_ci	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
452162306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
452262306a36Sopenharmony_ci		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
452362306a36Sopenharmony_ci
452462306a36Sopenharmony_ci		if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
452562306a36Sopenharmony_ci			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
452662306a36Sopenharmony_ci
452762306a36Sopenharmony_ci			tcp_rcv_spurious_retrans(sk, skb);
452862306a36Sopenharmony_ci			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
452962306a36Sopenharmony_ci				end_seq = tp->rcv_nxt;
453062306a36Sopenharmony_ci			tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
453162306a36Sopenharmony_ci		}
453262306a36Sopenharmony_ci	}
453362306a36Sopenharmony_ci
453462306a36Sopenharmony_ci	tcp_send_ack(sk);
453562306a36Sopenharmony_ci}
453662306a36Sopenharmony_ci
453762306a36Sopenharmony_ci/* These routines update the SACK block as out-of-order packets arrive or
453862306a36Sopenharmony_ci * in-order packets close up the sequence space.
453962306a36Sopenharmony_ci */
454062306a36Sopenharmony_cistatic void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
454162306a36Sopenharmony_ci{
454262306a36Sopenharmony_ci	int this_sack;
454362306a36Sopenharmony_ci	struct tcp_sack_block *sp = &tp->selective_acks[0];
454462306a36Sopenharmony_ci	struct tcp_sack_block *swalk = sp + 1;
454562306a36Sopenharmony_ci
454662306a36Sopenharmony_ci	/* See if the recent change to the first SACK eats into
454762306a36Sopenharmony_ci	 * or hits the sequence space of other SACK blocks, if so coalesce.
454862306a36Sopenharmony_ci	 */
454962306a36Sopenharmony_ci	for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
455062306a36Sopenharmony_ci		if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
455162306a36Sopenharmony_ci			int i;
455262306a36Sopenharmony_ci
455362306a36Sopenharmony_ci			/* Zap SWALK, by moving every further SACK up by one slot.
455462306a36Sopenharmony_ci			 * Decrease num_sacks.
455562306a36Sopenharmony_ci			 */
455662306a36Sopenharmony_ci			tp->rx_opt.num_sacks--;
455762306a36Sopenharmony_ci			for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
455862306a36Sopenharmony_ci				sp[i] = sp[i + 1];
455962306a36Sopenharmony_ci			continue;
456062306a36Sopenharmony_ci		}
456162306a36Sopenharmony_ci		this_sack++;
456262306a36Sopenharmony_ci		swalk++;
456362306a36Sopenharmony_ci	}
456462306a36Sopenharmony_ci}
456562306a36Sopenharmony_ci
456662306a36Sopenharmony_civoid tcp_sack_compress_send_ack(struct sock *sk)
456762306a36Sopenharmony_ci{
456862306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
456962306a36Sopenharmony_ci
457062306a36Sopenharmony_ci	if (!tp->compressed_ack)
457162306a36Sopenharmony_ci		return;
457262306a36Sopenharmony_ci
457362306a36Sopenharmony_ci	if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
457462306a36Sopenharmony_ci		__sock_put(sk);
457562306a36Sopenharmony_ci
457662306a36Sopenharmony_ci	/* Since we have to send one ack finally,
457762306a36Sopenharmony_ci	 * substract one from tp->compressed_ack to keep
457862306a36Sopenharmony_ci	 * LINUX_MIB_TCPACKCOMPRESSED accurate.
457962306a36Sopenharmony_ci	 */
458062306a36Sopenharmony_ci	NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
458162306a36Sopenharmony_ci		      tp->compressed_ack - 1);
458262306a36Sopenharmony_ci
458362306a36Sopenharmony_ci	tp->compressed_ack = 0;
458462306a36Sopenharmony_ci	tcp_send_ack(sk);
458562306a36Sopenharmony_ci}
458662306a36Sopenharmony_ci
458762306a36Sopenharmony_ci/* Reasonable amount of sack blocks included in TCP SACK option
458862306a36Sopenharmony_ci * The max is 4, but this becomes 3 if TCP timestamps are there.
458962306a36Sopenharmony_ci * Given that SACK packets might be lost, be conservative and use 2.
459062306a36Sopenharmony_ci */
459162306a36Sopenharmony_ci#define TCP_SACK_BLOCKS_EXPECTED 2
459262306a36Sopenharmony_ci
459362306a36Sopenharmony_cistatic void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
459462306a36Sopenharmony_ci{
459562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
459662306a36Sopenharmony_ci	struct tcp_sack_block *sp = &tp->selective_acks[0];
459762306a36Sopenharmony_ci	int cur_sacks = tp->rx_opt.num_sacks;
459862306a36Sopenharmony_ci	int this_sack;
459962306a36Sopenharmony_ci
460062306a36Sopenharmony_ci	if (!cur_sacks)
460162306a36Sopenharmony_ci		goto new_sack;
460262306a36Sopenharmony_ci
460362306a36Sopenharmony_ci	for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
460462306a36Sopenharmony_ci		if (tcp_sack_extend(sp, seq, end_seq)) {
460562306a36Sopenharmony_ci			if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
460662306a36Sopenharmony_ci				tcp_sack_compress_send_ack(sk);
460762306a36Sopenharmony_ci			/* Rotate this_sack to the first one. */
460862306a36Sopenharmony_ci			for (; this_sack > 0; this_sack--, sp--)
460962306a36Sopenharmony_ci				swap(*sp, *(sp - 1));
461062306a36Sopenharmony_ci			if (cur_sacks > 1)
461162306a36Sopenharmony_ci				tcp_sack_maybe_coalesce(tp);
461262306a36Sopenharmony_ci			return;
461362306a36Sopenharmony_ci		}
461462306a36Sopenharmony_ci	}
461562306a36Sopenharmony_ci
461662306a36Sopenharmony_ci	if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
461762306a36Sopenharmony_ci		tcp_sack_compress_send_ack(sk);
461862306a36Sopenharmony_ci
461962306a36Sopenharmony_ci	/* Could not find an adjacent existing SACK, build a new one,
462062306a36Sopenharmony_ci	 * put it at the front, and shift everyone else down.  We
462162306a36Sopenharmony_ci	 * always know there is at least one SACK present already here.
462262306a36Sopenharmony_ci	 *
462362306a36Sopenharmony_ci	 * If the sack array is full, forget about the last one.
462462306a36Sopenharmony_ci	 */
462562306a36Sopenharmony_ci	if (this_sack >= TCP_NUM_SACKS) {
462662306a36Sopenharmony_ci		this_sack--;
462762306a36Sopenharmony_ci		tp->rx_opt.num_sacks--;
462862306a36Sopenharmony_ci		sp--;
462962306a36Sopenharmony_ci	}
463062306a36Sopenharmony_ci	for (; this_sack > 0; this_sack--, sp--)
463162306a36Sopenharmony_ci		*sp = *(sp - 1);
463262306a36Sopenharmony_ci
463362306a36Sopenharmony_cinew_sack:
463462306a36Sopenharmony_ci	/* Build the new head SACK, and we're done. */
463562306a36Sopenharmony_ci	sp->start_seq = seq;
463662306a36Sopenharmony_ci	sp->end_seq = end_seq;
463762306a36Sopenharmony_ci	tp->rx_opt.num_sacks++;
463862306a36Sopenharmony_ci}
463962306a36Sopenharmony_ci
464062306a36Sopenharmony_ci/* RCV.NXT advances, some SACKs should be eaten. */
464162306a36Sopenharmony_ci
464262306a36Sopenharmony_cistatic void tcp_sack_remove(struct tcp_sock *tp)
464362306a36Sopenharmony_ci{
464462306a36Sopenharmony_ci	struct tcp_sack_block *sp = &tp->selective_acks[0];
464562306a36Sopenharmony_ci	int num_sacks = tp->rx_opt.num_sacks;
464662306a36Sopenharmony_ci	int this_sack;
464762306a36Sopenharmony_ci
464862306a36Sopenharmony_ci	/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
464962306a36Sopenharmony_ci	if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
465062306a36Sopenharmony_ci		tp->rx_opt.num_sacks = 0;
465162306a36Sopenharmony_ci		return;
465262306a36Sopenharmony_ci	}
465362306a36Sopenharmony_ci
465462306a36Sopenharmony_ci	for (this_sack = 0; this_sack < num_sacks;) {
465562306a36Sopenharmony_ci		/* Check if the start of the sack is covered by RCV.NXT. */
465662306a36Sopenharmony_ci		if (!before(tp->rcv_nxt, sp->start_seq)) {
465762306a36Sopenharmony_ci			int i;
465862306a36Sopenharmony_ci
465962306a36Sopenharmony_ci			/* RCV.NXT must cover all the block! */
466062306a36Sopenharmony_ci			WARN_ON(before(tp->rcv_nxt, sp->end_seq));
466162306a36Sopenharmony_ci
466262306a36Sopenharmony_ci			/* Zap this SACK, by moving forward any other SACKS. */
466362306a36Sopenharmony_ci			for (i = this_sack+1; i < num_sacks; i++)
466462306a36Sopenharmony_ci				tp->selective_acks[i-1] = tp->selective_acks[i];
466562306a36Sopenharmony_ci			num_sacks--;
466662306a36Sopenharmony_ci			continue;
466762306a36Sopenharmony_ci		}
466862306a36Sopenharmony_ci		this_sack++;
466962306a36Sopenharmony_ci		sp++;
467062306a36Sopenharmony_ci	}
467162306a36Sopenharmony_ci	tp->rx_opt.num_sacks = num_sacks;
467262306a36Sopenharmony_ci}
467362306a36Sopenharmony_ci
467462306a36Sopenharmony_ci/**
467562306a36Sopenharmony_ci * tcp_try_coalesce - try to merge skb to prior one
467662306a36Sopenharmony_ci * @sk: socket
467762306a36Sopenharmony_ci * @to: prior buffer
467862306a36Sopenharmony_ci * @from: buffer to add in queue
467962306a36Sopenharmony_ci * @fragstolen: pointer to boolean
468062306a36Sopenharmony_ci *
468162306a36Sopenharmony_ci * Before queueing skb @from after @to, try to merge them
468262306a36Sopenharmony_ci * to reduce overall memory use and queue lengths, if cost is small.
468362306a36Sopenharmony_ci * Packets in ofo or receive queues can stay a long time.
468462306a36Sopenharmony_ci * Better try to coalesce them right now to avoid future collapses.
468562306a36Sopenharmony_ci * Returns true if caller should free @from instead of queueing it
468662306a36Sopenharmony_ci */
468762306a36Sopenharmony_cistatic bool tcp_try_coalesce(struct sock *sk,
468862306a36Sopenharmony_ci			     struct sk_buff *to,
468962306a36Sopenharmony_ci			     struct sk_buff *from,
469062306a36Sopenharmony_ci			     bool *fragstolen)
469162306a36Sopenharmony_ci{
469262306a36Sopenharmony_ci	int delta;
469362306a36Sopenharmony_ci
469462306a36Sopenharmony_ci	*fragstolen = false;
469562306a36Sopenharmony_ci
469662306a36Sopenharmony_ci	/* Its possible this segment overlaps with prior segment in queue */
469762306a36Sopenharmony_ci	if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
469862306a36Sopenharmony_ci		return false;
469962306a36Sopenharmony_ci
470062306a36Sopenharmony_ci	if (!mptcp_skb_can_collapse(to, from))
470162306a36Sopenharmony_ci		return false;
470262306a36Sopenharmony_ci
470362306a36Sopenharmony_ci#ifdef CONFIG_TLS_DEVICE
470462306a36Sopenharmony_ci	if (from->decrypted != to->decrypted)
470562306a36Sopenharmony_ci		return false;
470662306a36Sopenharmony_ci#endif
470762306a36Sopenharmony_ci
470862306a36Sopenharmony_ci	if (!skb_try_coalesce(to, from, fragstolen, &delta))
470962306a36Sopenharmony_ci		return false;
471062306a36Sopenharmony_ci
471162306a36Sopenharmony_ci	atomic_add(delta, &sk->sk_rmem_alloc);
471262306a36Sopenharmony_ci	sk_mem_charge(sk, delta);
471362306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
471462306a36Sopenharmony_ci	TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
471562306a36Sopenharmony_ci	TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
471662306a36Sopenharmony_ci	TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
471762306a36Sopenharmony_ci
471862306a36Sopenharmony_ci	if (TCP_SKB_CB(from)->has_rxtstamp) {
471962306a36Sopenharmony_ci		TCP_SKB_CB(to)->has_rxtstamp = true;
472062306a36Sopenharmony_ci		to->tstamp = from->tstamp;
472162306a36Sopenharmony_ci		skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
472262306a36Sopenharmony_ci	}
472362306a36Sopenharmony_ci
472462306a36Sopenharmony_ci	return true;
472562306a36Sopenharmony_ci}
472662306a36Sopenharmony_ci
472762306a36Sopenharmony_cistatic bool tcp_ooo_try_coalesce(struct sock *sk,
472862306a36Sopenharmony_ci			     struct sk_buff *to,
472962306a36Sopenharmony_ci			     struct sk_buff *from,
473062306a36Sopenharmony_ci			     bool *fragstolen)
473162306a36Sopenharmony_ci{
473262306a36Sopenharmony_ci	bool res = tcp_try_coalesce(sk, to, from, fragstolen);
473362306a36Sopenharmony_ci
473462306a36Sopenharmony_ci	/* In case tcp_drop_reason() is called later, update to->gso_segs */
473562306a36Sopenharmony_ci	if (res) {
473662306a36Sopenharmony_ci		u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
473762306a36Sopenharmony_ci			       max_t(u16, 1, skb_shinfo(from)->gso_segs);
473862306a36Sopenharmony_ci
473962306a36Sopenharmony_ci		skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
474062306a36Sopenharmony_ci	}
474162306a36Sopenharmony_ci	return res;
474262306a36Sopenharmony_ci}
474362306a36Sopenharmony_ci
474462306a36Sopenharmony_cistatic void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
474562306a36Sopenharmony_ci			    enum skb_drop_reason reason)
474662306a36Sopenharmony_ci{
474762306a36Sopenharmony_ci	sk_drops_add(sk, skb);
474862306a36Sopenharmony_ci	kfree_skb_reason(skb, reason);
474962306a36Sopenharmony_ci}
475062306a36Sopenharmony_ci
475162306a36Sopenharmony_ci/* This one checks to see if we can put data from the
475262306a36Sopenharmony_ci * out_of_order queue into the receive_queue.
475362306a36Sopenharmony_ci */
475462306a36Sopenharmony_cistatic void tcp_ofo_queue(struct sock *sk)
475562306a36Sopenharmony_ci{
475662306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
475762306a36Sopenharmony_ci	__u32 dsack_high = tp->rcv_nxt;
475862306a36Sopenharmony_ci	bool fin, fragstolen, eaten;
475962306a36Sopenharmony_ci	struct sk_buff *skb, *tail;
476062306a36Sopenharmony_ci	struct rb_node *p;
476162306a36Sopenharmony_ci
476262306a36Sopenharmony_ci	p = rb_first(&tp->out_of_order_queue);
476362306a36Sopenharmony_ci	while (p) {
476462306a36Sopenharmony_ci		skb = rb_to_skb(p);
476562306a36Sopenharmony_ci		if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
476662306a36Sopenharmony_ci			break;
476762306a36Sopenharmony_ci
476862306a36Sopenharmony_ci		if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
476962306a36Sopenharmony_ci			__u32 dsack = dsack_high;
477062306a36Sopenharmony_ci			if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
477162306a36Sopenharmony_ci				dsack_high = TCP_SKB_CB(skb)->end_seq;
477262306a36Sopenharmony_ci			tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
477362306a36Sopenharmony_ci		}
477462306a36Sopenharmony_ci		p = rb_next(p);
477562306a36Sopenharmony_ci		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
477662306a36Sopenharmony_ci
477762306a36Sopenharmony_ci		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
477862306a36Sopenharmony_ci			tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_DROP);
477962306a36Sopenharmony_ci			continue;
478062306a36Sopenharmony_ci		}
478162306a36Sopenharmony_ci
478262306a36Sopenharmony_ci		tail = skb_peek_tail(&sk->sk_receive_queue);
478362306a36Sopenharmony_ci		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
478462306a36Sopenharmony_ci		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
478562306a36Sopenharmony_ci		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
478662306a36Sopenharmony_ci		if (!eaten)
478762306a36Sopenharmony_ci			__skb_queue_tail(&sk->sk_receive_queue, skb);
478862306a36Sopenharmony_ci		else
478962306a36Sopenharmony_ci			kfree_skb_partial(skb, fragstolen);
479062306a36Sopenharmony_ci
479162306a36Sopenharmony_ci		if (unlikely(fin)) {
479262306a36Sopenharmony_ci			tcp_fin(sk);
479362306a36Sopenharmony_ci			/* tcp_fin() purges tp->out_of_order_queue,
479462306a36Sopenharmony_ci			 * so we must end this loop right now.
479562306a36Sopenharmony_ci			 */
479662306a36Sopenharmony_ci			break;
479762306a36Sopenharmony_ci		}
479862306a36Sopenharmony_ci	}
479962306a36Sopenharmony_ci}
480062306a36Sopenharmony_ci
480162306a36Sopenharmony_cistatic bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb);
480262306a36Sopenharmony_cistatic int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb);
480362306a36Sopenharmony_ci
480462306a36Sopenharmony_cistatic int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
480562306a36Sopenharmony_ci				 unsigned int size)
480662306a36Sopenharmony_ci{
480762306a36Sopenharmony_ci	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
480862306a36Sopenharmony_ci	    !sk_rmem_schedule(sk, skb, size)) {
480962306a36Sopenharmony_ci
481062306a36Sopenharmony_ci		if (tcp_prune_queue(sk, skb) < 0)
481162306a36Sopenharmony_ci			return -1;
481262306a36Sopenharmony_ci
481362306a36Sopenharmony_ci		while (!sk_rmem_schedule(sk, skb, size)) {
481462306a36Sopenharmony_ci			if (!tcp_prune_ofo_queue(sk, skb))
481562306a36Sopenharmony_ci				return -1;
481662306a36Sopenharmony_ci		}
481762306a36Sopenharmony_ci	}
481862306a36Sopenharmony_ci	return 0;
481962306a36Sopenharmony_ci}
482062306a36Sopenharmony_ci
482162306a36Sopenharmony_cistatic void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
482262306a36Sopenharmony_ci{
482362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
482462306a36Sopenharmony_ci	struct rb_node **p, *parent;
482562306a36Sopenharmony_ci	struct sk_buff *skb1;
482662306a36Sopenharmony_ci	u32 seq, end_seq;
482762306a36Sopenharmony_ci	bool fragstolen;
482862306a36Sopenharmony_ci
482962306a36Sopenharmony_ci	tcp_ecn_check_ce(sk, skb);
483062306a36Sopenharmony_ci
483162306a36Sopenharmony_ci	if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
483262306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
483362306a36Sopenharmony_ci		sk->sk_data_ready(sk);
483462306a36Sopenharmony_ci		tcp_drop_reason(sk, skb, SKB_DROP_REASON_PROTO_MEM);
483562306a36Sopenharmony_ci		return;
483662306a36Sopenharmony_ci	}
483762306a36Sopenharmony_ci
483862306a36Sopenharmony_ci	/* Disable header prediction. */
483962306a36Sopenharmony_ci	tp->pred_flags = 0;
484062306a36Sopenharmony_ci	inet_csk_schedule_ack(sk);
484162306a36Sopenharmony_ci
484262306a36Sopenharmony_ci	tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
484362306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
484462306a36Sopenharmony_ci	seq = TCP_SKB_CB(skb)->seq;
484562306a36Sopenharmony_ci	end_seq = TCP_SKB_CB(skb)->end_seq;
484662306a36Sopenharmony_ci
484762306a36Sopenharmony_ci	p = &tp->out_of_order_queue.rb_node;
484862306a36Sopenharmony_ci	if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
484962306a36Sopenharmony_ci		/* Initial out of order segment, build 1 SACK. */
485062306a36Sopenharmony_ci		if (tcp_is_sack(tp)) {
485162306a36Sopenharmony_ci			tp->rx_opt.num_sacks = 1;
485262306a36Sopenharmony_ci			tp->selective_acks[0].start_seq = seq;
485362306a36Sopenharmony_ci			tp->selective_acks[0].end_seq = end_seq;
485462306a36Sopenharmony_ci		}
485562306a36Sopenharmony_ci		rb_link_node(&skb->rbnode, NULL, p);
485662306a36Sopenharmony_ci		rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
485762306a36Sopenharmony_ci		tp->ooo_last_skb = skb;
485862306a36Sopenharmony_ci		goto end;
485962306a36Sopenharmony_ci	}
486062306a36Sopenharmony_ci
486162306a36Sopenharmony_ci	/* In the typical case, we are adding an skb to the end of the list.
486262306a36Sopenharmony_ci	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
486362306a36Sopenharmony_ci	 */
486462306a36Sopenharmony_ci	if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
486562306a36Sopenharmony_ci				 skb, &fragstolen)) {
486662306a36Sopenharmony_cicoalesce_done:
486762306a36Sopenharmony_ci		/* For non sack flows, do not grow window to force DUPACK
486862306a36Sopenharmony_ci		 * and trigger fast retransmit.
486962306a36Sopenharmony_ci		 */
487062306a36Sopenharmony_ci		if (tcp_is_sack(tp))
487162306a36Sopenharmony_ci			tcp_grow_window(sk, skb, true);
487262306a36Sopenharmony_ci		kfree_skb_partial(skb, fragstolen);
487362306a36Sopenharmony_ci		skb = NULL;
487462306a36Sopenharmony_ci		goto add_sack;
487562306a36Sopenharmony_ci	}
487662306a36Sopenharmony_ci	/* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
487762306a36Sopenharmony_ci	if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
487862306a36Sopenharmony_ci		parent = &tp->ooo_last_skb->rbnode;
487962306a36Sopenharmony_ci		p = &parent->rb_right;
488062306a36Sopenharmony_ci		goto insert;
488162306a36Sopenharmony_ci	}
488262306a36Sopenharmony_ci
488362306a36Sopenharmony_ci	/* Find place to insert this segment. Handle overlaps on the way. */
488462306a36Sopenharmony_ci	parent = NULL;
488562306a36Sopenharmony_ci	while (*p) {
488662306a36Sopenharmony_ci		parent = *p;
488762306a36Sopenharmony_ci		skb1 = rb_to_skb(parent);
488862306a36Sopenharmony_ci		if (before(seq, TCP_SKB_CB(skb1)->seq)) {
488962306a36Sopenharmony_ci			p = &parent->rb_left;
489062306a36Sopenharmony_ci			continue;
489162306a36Sopenharmony_ci		}
489262306a36Sopenharmony_ci		if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
489362306a36Sopenharmony_ci			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
489462306a36Sopenharmony_ci				/* All the bits are present. Drop. */
489562306a36Sopenharmony_ci				NET_INC_STATS(sock_net(sk),
489662306a36Sopenharmony_ci					      LINUX_MIB_TCPOFOMERGE);
489762306a36Sopenharmony_ci				tcp_drop_reason(sk, skb,
489862306a36Sopenharmony_ci						SKB_DROP_REASON_TCP_OFOMERGE);
489962306a36Sopenharmony_ci				skb = NULL;
490062306a36Sopenharmony_ci				tcp_dsack_set(sk, seq, end_seq);
490162306a36Sopenharmony_ci				goto add_sack;
490262306a36Sopenharmony_ci			}
490362306a36Sopenharmony_ci			if (after(seq, TCP_SKB_CB(skb1)->seq)) {
490462306a36Sopenharmony_ci				/* Partial overlap. */
490562306a36Sopenharmony_ci				tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
490662306a36Sopenharmony_ci			} else {
490762306a36Sopenharmony_ci				/* skb's seq == skb1's seq and skb covers skb1.
490862306a36Sopenharmony_ci				 * Replace skb1 with skb.
490962306a36Sopenharmony_ci				 */
491062306a36Sopenharmony_ci				rb_replace_node(&skb1->rbnode, &skb->rbnode,
491162306a36Sopenharmony_ci						&tp->out_of_order_queue);
491262306a36Sopenharmony_ci				tcp_dsack_extend(sk,
491362306a36Sopenharmony_ci						 TCP_SKB_CB(skb1)->seq,
491462306a36Sopenharmony_ci						 TCP_SKB_CB(skb1)->end_seq);
491562306a36Sopenharmony_ci				NET_INC_STATS(sock_net(sk),
491662306a36Sopenharmony_ci					      LINUX_MIB_TCPOFOMERGE);
491762306a36Sopenharmony_ci				tcp_drop_reason(sk, skb1,
491862306a36Sopenharmony_ci						SKB_DROP_REASON_TCP_OFOMERGE);
491962306a36Sopenharmony_ci				goto merge_right;
492062306a36Sopenharmony_ci			}
492162306a36Sopenharmony_ci		} else if (tcp_ooo_try_coalesce(sk, skb1,
492262306a36Sopenharmony_ci						skb, &fragstolen)) {
492362306a36Sopenharmony_ci			goto coalesce_done;
492462306a36Sopenharmony_ci		}
492562306a36Sopenharmony_ci		p = &parent->rb_right;
492662306a36Sopenharmony_ci	}
492762306a36Sopenharmony_ciinsert:
492862306a36Sopenharmony_ci	/* Insert segment into RB tree. */
492962306a36Sopenharmony_ci	rb_link_node(&skb->rbnode, parent, p);
493062306a36Sopenharmony_ci	rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
493162306a36Sopenharmony_ci
493262306a36Sopenharmony_cimerge_right:
493362306a36Sopenharmony_ci	/* Remove other segments covered by skb. */
493462306a36Sopenharmony_ci	while ((skb1 = skb_rb_next(skb)) != NULL) {
493562306a36Sopenharmony_ci		if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
493662306a36Sopenharmony_ci			break;
493762306a36Sopenharmony_ci		if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
493862306a36Sopenharmony_ci			tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
493962306a36Sopenharmony_ci					 end_seq);
494062306a36Sopenharmony_ci			break;
494162306a36Sopenharmony_ci		}
494262306a36Sopenharmony_ci		rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
494362306a36Sopenharmony_ci		tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
494462306a36Sopenharmony_ci				 TCP_SKB_CB(skb1)->end_seq);
494562306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
494662306a36Sopenharmony_ci		tcp_drop_reason(sk, skb1, SKB_DROP_REASON_TCP_OFOMERGE);
494762306a36Sopenharmony_ci	}
494862306a36Sopenharmony_ci	/* If there is no skb after us, we are the last_skb ! */
494962306a36Sopenharmony_ci	if (!skb1)
495062306a36Sopenharmony_ci		tp->ooo_last_skb = skb;
495162306a36Sopenharmony_ci
495262306a36Sopenharmony_ciadd_sack:
495362306a36Sopenharmony_ci	if (tcp_is_sack(tp))
495462306a36Sopenharmony_ci		tcp_sack_new_ofo_skb(sk, seq, end_seq);
495562306a36Sopenharmony_ciend:
495662306a36Sopenharmony_ci	if (skb) {
495762306a36Sopenharmony_ci		/* For non sack flows, do not grow window to force DUPACK
495862306a36Sopenharmony_ci		 * and trigger fast retransmit.
495962306a36Sopenharmony_ci		 */
496062306a36Sopenharmony_ci		if (tcp_is_sack(tp))
496162306a36Sopenharmony_ci			tcp_grow_window(sk, skb, false);
496262306a36Sopenharmony_ci		skb_condense(skb);
496362306a36Sopenharmony_ci		skb_set_owner_r(skb, sk);
496462306a36Sopenharmony_ci	}
496562306a36Sopenharmony_ci}
496662306a36Sopenharmony_ci
496762306a36Sopenharmony_cistatic int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
496862306a36Sopenharmony_ci				      bool *fragstolen)
496962306a36Sopenharmony_ci{
497062306a36Sopenharmony_ci	int eaten;
497162306a36Sopenharmony_ci	struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
497262306a36Sopenharmony_ci
497362306a36Sopenharmony_ci	eaten = (tail &&
497462306a36Sopenharmony_ci		 tcp_try_coalesce(sk, tail,
497562306a36Sopenharmony_ci				  skb, fragstolen)) ? 1 : 0;
497662306a36Sopenharmony_ci	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
497762306a36Sopenharmony_ci	if (!eaten) {
497862306a36Sopenharmony_ci		__skb_queue_tail(&sk->sk_receive_queue, skb);
497962306a36Sopenharmony_ci		skb_set_owner_r(skb, sk);
498062306a36Sopenharmony_ci	}
498162306a36Sopenharmony_ci	return eaten;
498262306a36Sopenharmony_ci}
498362306a36Sopenharmony_ci
498462306a36Sopenharmony_ciint tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
498562306a36Sopenharmony_ci{
498662306a36Sopenharmony_ci	struct sk_buff *skb;
498762306a36Sopenharmony_ci	int err = -ENOMEM;
498862306a36Sopenharmony_ci	int data_len = 0;
498962306a36Sopenharmony_ci	bool fragstolen;
499062306a36Sopenharmony_ci
499162306a36Sopenharmony_ci	if (size == 0)
499262306a36Sopenharmony_ci		return 0;
499362306a36Sopenharmony_ci
499462306a36Sopenharmony_ci	if (size > PAGE_SIZE) {
499562306a36Sopenharmony_ci		int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
499662306a36Sopenharmony_ci
499762306a36Sopenharmony_ci		data_len = npages << PAGE_SHIFT;
499862306a36Sopenharmony_ci		size = data_len + (size & ~PAGE_MASK);
499962306a36Sopenharmony_ci	}
500062306a36Sopenharmony_ci	skb = alloc_skb_with_frags(size - data_len, data_len,
500162306a36Sopenharmony_ci				   PAGE_ALLOC_COSTLY_ORDER,
500262306a36Sopenharmony_ci				   &err, sk->sk_allocation);
500362306a36Sopenharmony_ci	if (!skb)
500462306a36Sopenharmony_ci		goto err;
500562306a36Sopenharmony_ci
500662306a36Sopenharmony_ci	skb_put(skb, size - data_len);
500762306a36Sopenharmony_ci	skb->data_len = data_len;
500862306a36Sopenharmony_ci	skb->len = size;
500962306a36Sopenharmony_ci
501062306a36Sopenharmony_ci	if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
501162306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
501262306a36Sopenharmony_ci		goto err_free;
501362306a36Sopenharmony_ci	}
501462306a36Sopenharmony_ci
501562306a36Sopenharmony_ci	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
501662306a36Sopenharmony_ci	if (err)
501762306a36Sopenharmony_ci		goto err_free;
501862306a36Sopenharmony_ci
501962306a36Sopenharmony_ci	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
502062306a36Sopenharmony_ci	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
502162306a36Sopenharmony_ci	TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
502262306a36Sopenharmony_ci
502362306a36Sopenharmony_ci	if (tcp_queue_rcv(sk, skb, &fragstolen)) {
502462306a36Sopenharmony_ci		WARN_ON_ONCE(fragstolen); /* should not happen */
502562306a36Sopenharmony_ci		__kfree_skb(skb);
502662306a36Sopenharmony_ci	}
502762306a36Sopenharmony_ci	return size;
502862306a36Sopenharmony_ci
502962306a36Sopenharmony_cierr_free:
503062306a36Sopenharmony_ci	kfree_skb(skb);
503162306a36Sopenharmony_cierr:
503262306a36Sopenharmony_ci	return err;
503362306a36Sopenharmony_ci
503462306a36Sopenharmony_ci}
503562306a36Sopenharmony_ci
503662306a36Sopenharmony_civoid tcp_data_ready(struct sock *sk)
503762306a36Sopenharmony_ci{
503862306a36Sopenharmony_ci	if (tcp_epollin_ready(sk, sk->sk_rcvlowat) || sock_flag(sk, SOCK_DONE))
503962306a36Sopenharmony_ci		sk->sk_data_ready(sk);
504062306a36Sopenharmony_ci}
504162306a36Sopenharmony_ci
504262306a36Sopenharmony_cistatic void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
504362306a36Sopenharmony_ci{
504462306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
504562306a36Sopenharmony_ci	enum skb_drop_reason reason;
504662306a36Sopenharmony_ci	bool fragstolen;
504762306a36Sopenharmony_ci	int eaten;
504862306a36Sopenharmony_ci
504962306a36Sopenharmony_ci	/* If a subflow has been reset, the packet should not continue
505062306a36Sopenharmony_ci	 * to be processed, drop the packet.
505162306a36Sopenharmony_ci	 */
505262306a36Sopenharmony_ci	if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) {
505362306a36Sopenharmony_ci		__kfree_skb(skb);
505462306a36Sopenharmony_ci		return;
505562306a36Sopenharmony_ci	}
505662306a36Sopenharmony_ci
505762306a36Sopenharmony_ci	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
505862306a36Sopenharmony_ci		__kfree_skb(skb);
505962306a36Sopenharmony_ci		return;
506062306a36Sopenharmony_ci	}
506162306a36Sopenharmony_ci	skb_dst_drop(skb);
506262306a36Sopenharmony_ci	__skb_pull(skb, tcp_hdr(skb)->doff * 4);
506362306a36Sopenharmony_ci
506462306a36Sopenharmony_ci	reason = SKB_DROP_REASON_NOT_SPECIFIED;
506562306a36Sopenharmony_ci	tp->rx_opt.dsack = 0;
506662306a36Sopenharmony_ci
506762306a36Sopenharmony_ci	/*  Queue data for delivery to the user.
506862306a36Sopenharmony_ci	 *  Packets in sequence go to the receive queue.
506962306a36Sopenharmony_ci	 *  Out of sequence packets to the out_of_order_queue.
507062306a36Sopenharmony_ci	 */
507162306a36Sopenharmony_ci	if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
507262306a36Sopenharmony_ci		if (tcp_receive_window(tp) == 0) {
507362306a36Sopenharmony_ci			reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
507462306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
507562306a36Sopenharmony_ci			goto out_of_window;
507662306a36Sopenharmony_ci		}
507762306a36Sopenharmony_ci
507862306a36Sopenharmony_ci		/* Ok. In sequence. In window. */
507962306a36Sopenharmony_ciqueue_and_out:
508062306a36Sopenharmony_ci		if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
508162306a36Sopenharmony_ci			/* TODO: maybe ratelimit these WIN 0 ACK ? */
508262306a36Sopenharmony_ci			inet_csk(sk)->icsk_ack.pending |=
508362306a36Sopenharmony_ci					(ICSK_ACK_NOMEM | ICSK_ACK_NOW);
508462306a36Sopenharmony_ci			inet_csk_schedule_ack(sk);
508562306a36Sopenharmony_ci			sk->sk_data_ready(sk);
508662306a36Sopenharmony_ci
508762306a36Sopenharmony_ci			if (skb_queue_len(&sk->sk_receive_queue)) {
508862306a36Sopenharmony_ci				reason = SKB_DROP_REASON_PROTO_MEM;
508962306a36Sopenharmony_ci				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
509062306a36Sopenharmony_ci				goto drop;
509162306a36Sopenharmony_ci			}
509262306a36Sopenharmony_ci			sk_forced_mem_schedule(sk, skb->truesize);
509362306a36Sopenharmony_ci		}
509462306a36Sopenharmony_ci
509562306a36Sopenharmony_ci		eaten = tcp_queue_rcv(sk, skb, &fragstolen);
509662306a36Sopenharmony_ci		if (skb->len)
509762306a36Sopenharmony_ci			tcp_event_data_recv(sk, skb);
509862306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
509962306a36Sopenharmony_ci			tcp_fin(sk);
510062306a36Sopenharmony_ci
510162306a36Sopenharmony_ci		if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
510262306a36Sopenharmony_ci			tcp_ofo_queue(sk);
510362306a36Sopenharmony_ci
510462306a36Sopenharmony_ci			/* RFC5681. 4.2. SHOULD send immediate ACK, when
510562306a36Sopenharmony_ci			 * gap in queue is filled.
510662306a36Sopenharmony_ci			 */
510762306a36Sopenharmony_ci			if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
510862306a36Sopenharmony_ci				inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
510962306a36Sopenharmony_ci		}
511062306a36Sopenharmony_ci
511162306a36Sopenharmony_ci		if (tp->rx_opt.num_sacks)
511262306a36Sopenharmony_ci			tcp_sack_remove(tp);
511362306a36Sopenharmony_ci
511462306a36Sopenharmony_ci		tcp_fast_path_check(sk);
511562306a36Sopenharmony_ci
511662306a36Sopenharmony_ci		if (eaten > 0)
511762306a36Sopenharmony_ci			kfree_skb_partial(skb, fragstolen);
511862306a36Sopenharmony_ci		if (!sock_flag(sk, SOCK_DEAD))
511962306a36Sopenharmony_ci			tcp_data_ready(sk);
512062306a36Sopenharmony_ci		return;
512162306a36Sopenharmony_ci	}
512262306a36Sopenharmony_ci
512362306a36Sopenharmony_ci	if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
512462306a36Sopenharmony_ci		tcp_rcv_spurious_retrans(sk, skb);
512562306a36Sopenharmony_ci		/* A retransmit, 2nd most common case.  Force an immediate ack. */
512662306a36Sopenharmony_ci		reason = SKB_DROP_REASON_TCP_OLD_DATA;
512762306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
512862306a36Sopenharmony_ci		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
512962306a36Sopenharmony_ci
513062306a36Sopenharmony_ciout_of_window:
513162306a36Sopenharmony_ci		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
513262306a36Sopenharmony_ci		inet_csk_schedule_ack(sk);
513362306a36Sopenharmony_cidrop:
513462306a36Sopenharmony_ci		tcp_drop_reason(sk, skb, reason);
513562306a36Sopenharmony_ci		return;
513662306a36Sopenharmony_ci	}
513762306a36Sopenharmony_ci
513862306a36Sopenharmony_ci	/* Out of window. F.e. zero window probe. */
513962306a36Sopenharmony_ci	if (!before(TCP_SKB_CB(skb)->seq,
514062306a36Sopenharmony_ci		    tp->rcv_nxt + tcp_receive_window(tp))) {
514162306a36Sopenharmony_ci		reason = SKB_DROP_REASON_TCP_OVERWINDOW;
514262306a36Sopenharmony_ci		goto out_of_window;
514362306a36Sopenharmony_ci	}
514462306a36Sopenharmony_ci
514562306a36Sopenharmony_ci	if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
514662306a36Sopenharmony_ci		/* Partial packet, seq < rcv_next < end_seq */
514762306a36Sopenharmony_ci		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
514862306a36Sopenharmony_ci
514962306a36Sopenharmony_ci		/* If window is closed, drop tail of packet. But after
515062306a36Sopenharmony_ci		 * remembering D-SACK for its head made in previous line.
515162306a36Sopenharmony_ci		 */
515262306a36Sopenharmony_ci		if (!tcp_receive_window(tp)) {
515362306a36Sopenharmony_ci			reason = SKB_DROP_REASON_TCP_ZEROWINDOW;
515462306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
515562306a36Sopenharmony_ci			goto out_of_window;
515662306a36Sopenharmony_ci		}
515762306a36Sopenharmony_ci		goto queue_and_out;
515862306a36Sopenharmony_ci	}
515962306a36Sopenharmony_ci
516062306a36Sopenharmony_ci	tcp_data_queue_ofo(sk, skb);
516162306a36Sopenharmony_ci}
516262306a36Sopenharmony_ci
516362306a36Sopenharmony_cistatic struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
516462306a36Sopenharmony_ci{
516562306a36Sopenharmony_ci	if (list)
516662306a36Sopenharmony_ci		return !skb_queue_is_last(list, skb) ? skb->next : NULL;
516762306a36Sopenharmony_ci
516862306a36Sopenharmony_ci	return skb_rb_next(skb);
516962306a36Sopenharmony_ci}
517062306a36Sopenharmony_ci
517162306a36Sopenharmony_cistatic struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
517262306a36Sopenharmony_ci					struct sk_buff_head *list,
517362306a36Sopenharmony_ci					struct rb_root *root)
517462306a36Sopenharmony_ci{
517562306a36Sopenharmony_ci	struct sk_buff *next = tcp_skb_next(skb, list);
517662306a36Sopenharmony_ci
517762306a36Sopenharmony_ci	if (list)
517862306a36Sopenharmony_ci		__skb_unlink(skb, list);
517962306a36Sopenharmony_ci	else
518062306a36Sopenharmony_ci		rb_erase(&skb->rbnode, root);
518162306a36Sopenharmony_ci
518262306a36Sopenharmony_ci	__kfree_skb(skb);
518362306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
518462306a36Sopenharmony_ci
518562306a36Sopenharmony_ci	return next;
518662306a36Sopenharmony_ci}
518762306a36Sopenharmony_ci
518862306a36Sopenharmony_ci/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
518962306a36Sopenharmony_civoid tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
519062306a36Sopenharmony_ci{
519162306a36Sopenharmony_ci	struct rb_node **p = &root->rb_node;
519262306a36Sopenharmony_ci	struct rb_node *parent = NULL;
519362306a36Sopenharmony_ci	struct sk_buff *skb1;
519462306a36Sopenharmony_ci
519562306a36Sopenharmony_ci	while (*p) {
519662306a36Sopenharmony_ci		parent = *p;
519762306a36Sopenharmony_ci		skb1 = rb_to_skb(parent);
519862306a36Sopenharmony_ci		if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
519962306a36Sopenharmony_ci			p = &parent->rb_left;
520062306a36Sopenharmony_ci		else
520162306a36Sopenharmony_ci			p = &parent->rb_right;
520262306a36Sopenharmony_ci	}
520362306a36Sopenharmony_ci	rb_link_node(&skb->rbnode, parent, p);
520462306a36Sopenharmony_ci	rb_insert_color(&skb->rbnode, root);
520562306a36Sopenharmony_ci}
520662306a36Sopenharmony_ci
520762306a36Sopenharmony_ci/* Collapse contiguous sequence of skbs head..tail with
520862306a36Sopenharmony_ci * sequence numbers start..end.
520962306a36Sopenharmony_ci *
521062306a36Sopenharmony_ci * If tail is NULL, this means until the end of the queue.
521162306a36Sopenharmony_ci *
521262306a36Sopenharmony_ci * Segments with FIN/SYN are not collapsed (only because this
521362306a36Sopenharmony_ci * simplifies code)
521462306a36Sopenharmony_ci */
521562306a36Sopenharmony_cistatic void
521662306a36Sopenharmony_citcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
521762306a36Sopenharmony_ci	     struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
521862306a36Sopenharmony_ci{
521962306a36Sopenharmony_ci	struct sk_buff *skb = head, *n;
522062306a36Sopenharmony_ci	struct sk_buff_head tmp;
522162306a36Sopenharmony_ci	bool end_of_skbs;
522262306a36Sopenharmony_ci
522362306a36Sopenharmony_ci	/* First, check that queue is collapsible and find
522462306a36Sopenharmony_ci	 * the point where collapsing can be useful.
522562306a36Sopenharmony_ci	 */
522662306a36Sopenharmony_cirestart:
522762306a36Sopenharmony_ci	for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
522862306a36Sopenharmony_ci		n = tcp_skb_next(skb, list);
522962306a36Sopenharmony_ci
523062306a36Sopenharmony_ci		/* No new bits? It is possible on ofo queue. */
523162306a36Sopenharmony_ci		if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
523262306a36Sopenharmony_ci			skb = tcp_collapse_one(sk, skb, list, root);
523362306a36Sopenharmony_ci			if (!skb)
523462306a36Sopenharmony_ci				break;
523562306a36Sopenharmony_ci			goto restart;
523662306a36Sopenharmony_ci		}
523762306a36Sopenharmony_ci
523862306a36Sopenharmony_ci		/* The first skb to collapse is:
523962306a36Sopenharmony_ci		 * - not SYN/FIN and
524062306a36Sopenharmony_ci		 * - bloated or contains data before "start" or
524162306a36Sopenharmony_ci		 *   overlaps to the next one and mptcp allow collapsing.
524262306a36Sopenharmony_ci		 */
524362306a36Sopenharmony_ci		if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
524462306a36Sopenharmony_ci		    (tcp_win_from_space(sk, skb->truesize) > skb->len ||
524562306a36Sopenharmony_ci		     before(TCP_SKB_CB(skb)->seq, start))) {
524662306a36Sopenharmony_ci			end_of_skbs = false;
524762306a36Sopenharmony_ci			break;
524862306a36Sopenharmony_ci		}
524962306a36Sopenharmony_ci
525062306a36Sopenharmony_ci		if (n && n != tail && mptcp_skb_can_collapse(skb, n) &&
525162306a36Sopenharmony_ci		    TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
525262306a36Sopenharmony_ci			end_of_skbs = false;
525362306a36Sopenharmony_ci			break;
525462306a36Sopenharmony_ci		}
525562306a36Sopenharmony_ci
525662306a36Sopenharmony_ci		/* Decided to skip this, advance start seq. */
525762306a36Sopenharmony_ci		start = TCP_SKB_CB(skb)->end_seq;
525862306a36Sopenharmony_ci	}
525962306a36Sopenharmony_ci	if (end_of_skbs ||
526062306a36Sopenharmony_ci	    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
526162306a36Sopenharmony_ci		return;
526262306a36Sopenharmony_ci
526362306a36Sopenharmony_ci	__skb_queue_head_init(&tmp);
526462306a36Sopenharmony_ci
526562306a36Sopenharmony_ci	while (before(start, end)) {
526662306a36Sopenharmony_ci		int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
526762306a36Sopenharmony_ci		struct sk_buff *nskb;
526862306a36Sopenharmony_ci
526962306a36Sopenharmony_ci		nskb = alloc_skb(copy, GFP_ATOMIC);
527062306a36Sopenharmony_ci		if (!nskb)
527162306a36Sopenharmony_ci			break;
527262306a36Sopenharmony_ci
527362306a36Sopenharmony_ci		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
527462306a36Sopenharmony_ci#ifdef CONFIG_TLS_DEVICE
527562306a36Sopenharmony_ci		nskb->decrypted = skb->decrypted;
527662306a36Sopenharmony_ci#endif
527762306a36Sopenharmony_ci		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
527862306a36Sopenharmony_ci		if (list)
527962306a36Sopenharmony_ci			__skb_queue_before(list, skb, nskb);
528062306a36Sopenharmony_ci		else
528162306a36Sopenharmony_ci			__skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
528262306a36Sopenharmony_ci		skb_set_owner_r(nskb, sk);
528362306a36Sopenharmony_ci		mptcp_skb_ext_move(nskb, skb);
528462306a36Sopenharmony_ci
528562306a36Sopenharmony_ci		/* Copy data, releasing collapsed skbs. */
528662306a36Sopenharmony_ci		while (copy > 0) {
528762306a36Sopenharmony_ci			int offset = start - TCP_SKB_CB(skb)->seq;
528862306a36Sopenharmony_ci			int size = TCP_SKB_CB(skb)->end_seq - start;
528962306a36Sopenharmony_ci
529062306a36Sopenharmony_ci			BUG_ON(offset < 0);
529162306a36Sopenharmony_ci			if (size > 0) {
529262306a36Sopenharmony_ci				size = min(copy, size);
529362306a36Sopenharmony_ci				if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
529462306a36Sopenharmony_ci					BUG();
529562306a36Sopenharmony_ci				TCP_SKB_CB(nskb)->end_seq += size;
529662306a36Sopenharmony_ci				copy -= size;
529762306a36Sopenharmony_ci				start += size;
529862306a36Sopenharmony_ci			}
529962306a36Sopenharmony_ci			if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
530062306a36Sopenharmony_ci				skb = tcp_collapse_one(sk, skb, list, root);
530162306a36Sopenharmony_ci				if (!skb ||
530262306a36Sopenharmony_ci				    skb == tail ||
530362306a36Sopenharmony_ci				    !mptcp_skb_can_collapse(nskb, skb) ||
530462306a36Sopenharmony_ci				    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
530562306a36Sopenharmony_ci					goto end;
530662306a36Sopenharmony_ci#ifdef CONFIG_TLS_DEVICE
530762306a36Sopenharmony_ci				if (skb->decrypted != nskb->decrypted)
530862306a36Sopenharmony_ci					goto end;
530962306a36Sopenharmony_ci#endif
531062306a36Sopenharmony_ci			}
531162306a36Sopenharmony_ci		}
531262306a36Sopenharmony_ci	}
531362306a36Sopenharmony_ciend:
531462306a36Sopenharmony_ci	skb_queue_walk_safe(&tmp, skb, n)
531562306a36Sopenharmony_ci		tcp_rbtree_insert(root, skb);
531662306a36Sopenharmony_ci}
531762306a36Sopenharmony_ci
531862306a36Sopenharmony_ci/* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
531962306a36Sopenharmony_ci * and tcp_collapse() them until all the queue is collapsed.
532062306a36Sopenharmony_ci */
532162306a36Sopenharmony_cistatic void tcp_collapse_ofo_queue(struct sock *sk)
532262306a36Sopenharmony_ci{
532362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
532462306a36Sopenharmony_ci	u32 range_truesize, sum_tiny = 0;
532562306a36Sopenharmony_ci	struct sk_buff *skb, *head;
532662306a36Sopenharmony_ci	u32 start, end;
532762306a36Sopenharmony_ci
532862306a36Sopenharmony_ci	skb = skb_rb_first(&tp->out_of_order_queue);
532962306a36Sopenharmony_cinew_range:
533062306a36Sopenharmony_ci	if (!skb) {
533162306a36Sopenharmony_ci		tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
533262306a36Sopenharmony_ci		return;
533362306a36Sopenharmony_ci	}
533462306a36Sopenharmony_ci	start = TCP_SKB_CB(skb)->seq;
533562306a36Sopenharmony_ci	end = TCP_SKB_CB(skb)->end_seq;
533662306a36Sopenharmony_ci	range_truesize = skb->truesize;
533762306a36Sopenharmony_ci
533862306a36Sopenharmony_ci	for (head = skb;;) {
533962306a36Sopenharmony_ci		skb = skb_rb_next(skb);
534062306a36Sopenharmony_ci
534162306a36Sopenharmony_ci		/* Range is terminated when we see a gap or when
534262306a36Sopenharmony_ci		 * we are at the queue end.
534362306a36Sopenharmony_ci		 */
534462306a36Sopenharmony_ci		if (!skb ||
534562306a36Sopenharmony_ci		    after(TCP_SKB_CB(skb)->seq, end) ||
534662306a36Sopenharmony_ci		    before(TCP_SKB_CB(skb)->end_seq, start)) {
534762306a36Sopenharmony_ci			/* Do not attempt collapsing tiny skbs */
534862306a36Sopenharmony_ci			if (range_truesize != head->truesize ||
534962306a36Sopenharmony_ci			    end - start >= SKB_WITH_OVERHEAD(PAGE_SIZE)) {
535062306a36Sopenharmony_ci				tcp_collapse(sk, NULL, &tp->out_of_order_queue,
535162306a36Sopenharmony_ci					     head, skb, start, end);
535262306a36Sopenharmony_ci			} else {
535362306a36Sopenharmony_ci				sum_tiny += range_truesize;
535462306a36Sopenharmony_ci				if (sum_tiny > sk->sk_rcvbuf >> 3)
535562306a36Sopenharmony_ci					return;
535662306a36Sopenharmony_ci			}
535762306a36Sopenharmony_ci			goto new_range;
535862306a36Sopenharmony_ci		}
535962306a36Sopenharmony_ci
536062306a36Sopenharmony_ci		range_truesize += skb->truesize;
536162306a36Sopenharmony_ci		if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
536262306a36Sopenharmony_ci			start = TCP_SKB_CB(skb)->seq;
536362306a36Sopenharmony_ci		if (after(TCP_SKB_CB(skb)->end_seq, end))
536462306a36Sopenharmony_ci			end = TCP_SKB_CB(skb)->end_seq;
536562306a36Sopenharmony_ci	}
536662306a36Sopenharmony_ci}
536762306a36Sopenharmony_ci
536862306a36Sopenharmony_ci/*
536962306a36Sopenharmony_ci * Clean the out-of-order queue to make room.
537062306a36Sopenharmony_ci * We drop high sequences packets to :
537162306a36Sopenharmony_ci * 1) Let a chance for holes to be filled.
537262306a36Sopenharmony_ci *    This means we do not drop packets from ooo queue if their sequence
537362306a36Sopenharmony_ci *    is before incoming packet sequence.
537462306a36Sopenharmony_ci * 2) not add too big latencies if thousands of packets sit there.
537562306a36Sopenharmony_ci *    (But if application shrinks SO_RCVBUF, we could still end up
537662306a36Sopenharmony_ci *     freeing whole queue here)
537762306a36Sopenharmony_ci * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
537862306a36Sopenharmony_ci *
537962306a36Sopenharmony_ci * Return true if queue has shrunk.
538062306a36Sopenharmony_ci */
538162306a36Sopenharmony_cistatic bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb)
538262306a36Sopenharmony_ci{
538362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
538462306a36Sopenharmony_ci	struct rb_node *node, *prev;
538562306a36Sopenharmony_ci	bool pruned = false;
538662306a36Sopenharmony_ci	int goal;
538762306a36Sopenharmony_ci
538862306a36Sopenharmony_ci	if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
538962306a36Sopenharmony_ci		return false;
539062306a36Sopenharmony_ci
539162306a36Sopenharmony_ci	goal = sk->sk_rcvbuf >> 3;
539262306a36Sopenharmony_ci	node = &tp->ooo_last_skb->rbnode;
539362306a36Sopenharmony_ci
539462306a36Sopenharmony_ci	do {
539562306a36Sopenharmony_ci		struct sk_buff *skb = rb_to_skb(node);
539662306a36Sopenharmony_ci
539762306a36Sopenharmony_ci		/* If incoming skb would land last in ofo queue, stop pruning. */
539862306a36Sopenharmony_ci		if (after(TCP_SKB_CB(in_skb)->seq, TCP_SKB_CB(skb)->seq))
539962306a36Sopenharmony_ci			break;
540062306a36Sopenharmony_ci		pruned = true;
540162306a36Sopenharmony_ci		prev = rb_prev(node);
540262306a36Sopenharmony_ci		rb_erase(node, &tp->out_of_order_queue);
540362306a36Sopenharmony_ci		goal -= skb->truesize;
540462306a36Sopenharmony_ci		tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
540562306a36Sopenharmony_ci		tp->ooo_last_skb = rb_to_skb(prev);
540662306a36Sopenharmony_ci		if (!prev || goal <= 0) {
540762306a36Sopenharmony_ci			if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
540862306a36Sopenharmony_ci			    !tcp_under_memory_pressure(sk))
540962306a36Sopenharmony_ci				break;
541062306a36Sopenharmony_ci			goal = sk->sk_rcvbuf >> 3;
541162306a36Sopenharmony_ci		}
541262306a36Sopenharmony_ci		node = prev;
541362306a36Sopenharmony_ci	} while (node);
541462306a36Sopenharmony_ci
541562306a36Sopenharmony_ci	if (pruned) {
541662306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
541762306a36Sopenharmony_ci		/* Reset SACK state.  A conforming SACK implementation will
541862306a36Sopenharmony_ci		 * do the same at a timeout based retransmit.  When a connection
541962306a36Sopenharmony_ci		 * is in a sad state like this, we care only about integrity
542062306a36Sopenharmony_ci		 * of the connection not performance.
542162306a36Sopenharmony_ci		 */
542262306a36Sopenharmony_ci		if (tp->rx_opt.sack_ok)
542362306a36Sopenharmony_ci			tcp_sack_reset(&tp->rx_opt);
542462306a36Sopenharmony_ci	}
542562306a36Sopenharmony_ci	return pruned;
542662306a36Sopenharmony_ci}
542762306a36Sopenharmony_ci
542862306a36Sopenharmony_ci/* Reduce allocated memory if we can, trying to get
542962306a36Sopenharmony_ci * the socket within its memory limits again.
543062306a36Sopenharmony_ci *
543162306a36Sopenharmony_ci * Return less than zero if we should start dropping frames
543262306a36Sopenharmony_ci * until the socket owning process reads some of the data
543362306a36Sopenharmony_ci * to stabilize the situation.
543462306a36Sopenharmony_ci */
543562306a36Sopenharmony_cistatic int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
543662306a36Sopenharmony_ci{
543762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
543862306a36Sopenharmony_ci
543962306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
544062306a36Sopenharmony_ci
544162306a36Sopenharmony_ci	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
544262306a36Sopenharmony_ci		tcp_clamp_window(sk);
544362306a36Sopenharmony_ci	else if (tcp_under_memory_pressure(sk))
544462306a36Sopenharmony_ci		tcp_adjust_rcv_ssthresh(sk);
544562306a36Sopenharmony_ci
544662306a36Sopenharmony_ci	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
544762306a36Sopenharmony_ci		return 0;
544862306a36Sopenharmony_ci
544962306a36Sopenharmony_ci	tcp_collapse_ofo_queue(sk);
545062306a36Sopenharmony_ci	if (!skb_queue_empty(&sk->sk_receive_queue))
545162306a36Sopenharmony_ci		tcp_collapse(sk, &sk->sk_receive_queue, NULL,
545262306a36Sopenharmony_ci			     skb_peek(&sk->sk_receive_queue),
545362306a36Sopenharmony_ci			     NULL,
545462306a36Sopenharmony_ci			     tp->copied_seq, tp->rcv_nxt);
545562306a36Sopenharmony_ci
545662306a36Sopenharmony_ci	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
545762306a36Sopenharmony_ci		return 0;
545862306a36Sopenharmony_ci
545962306a36Sopenharmony_ci	/* Collapsing did not help, destructive actions follow.
546062306a36Sopenharmony_ci	 * This must not ever occur. */
546162306a36Sopenharmony_ci
546262306a36Sopenharmony_ci	tcp_prune_ofo_queue(sk, in_skb);
546362306a36Sopenharmony_ci
546462306a36Sopenharmony_ci	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
546562306a36Sopenharmony_ci		return 0;
546662306a36Sopenharmony_ci
546762306a36Sopenharmony_ci	/* If we are really being abused, tell the caller to silently
546862306a36Sopenharmony_ci	 * drop receive data on the floor.  It will get retransmitted
546962306a36Sopenharmony_ci	 * and hopefully then we'll have sufficient space.
547062306a36Sopenharmony_ci	 */
547162306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
547262306a36Sopenharmony_ci
547362306a36Sopenharmony_ci	/* Massive buffer overcommit. */
547462306a36Sopenharmony_ci	tp->pred_flags = 0;
547562306a36Sopenharmony_ci	return -1;
547662306a36Sopenharmony_ci}
547762306a36Sopenharmony_ci
547862306a36Sopenharmony_cistatic bool tcp_should_expand_sndbuf(struct sock *sk)
547962306a36Sopenharmony_ci{
548062306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
548162306a36Sopenharmony_ci
548262306a36Sopenharmony_ci	/* If the user specified a specific send buffer setting, do
548362306a36Sopenharmony_ci	 * not modify it.
548462306a36Sopenharmony_ci	 */
548562306a36Sopenharmony_ci	if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
548662306a36Sopenharmony_ci		return false;
548762306a36Sopenharmony_ci
548862306a36Sopenharmony_ci	/* If we are under global TCP memory pressure, do not expand.  */
548962306a36Sopenharmony_ci	if (tcp_under_memory_pressure(sk)) {
549062306a36Sopenharmony_ci		int unused_mem = sk_unused_reserved_mem(sk);
549162306a36Sopenharmony_ci
549262306a36Sopenharmony_ci		/* Adjust sndbuf according to reserved mem. But make sure
549362306a36Sopenharmony_ci		 * it never goes below SOCK_MIN_SNDBUF.
549462306a36Sopenharmony_ci		 * See sk_stream_moderate_sndbuf() for more details.
549562306a36Sopenharmony_ci		 */
549662306a36Sopenharmony_ci		if (unused_mem > SOCK_MIN_SNDBUF)
549762306a36Sopenharmony_ci			WRITE_ONCE(sk->sk_sndbuf, unused_mem);
549862306a36Sopenharmony_ci
549962306a36Sopenharmony_ci		return false;
550062306a36Sopenharmony_ci	}
550162306a36Sopenharmony_ci
550262306a36Sopenharmony_ci	/* If we are under soft global TCP memory pressure, do not expand.  */
550362306a36Sopenharmony_ci	if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
550462306a36Sopenharmony_ci		return false;
550562306a36Sopenharmony_ci
550662306a36Sopenharmony_ci	/* If we filled the congestion window, do not expand.  */
550762306a36Sopenharmony_ci	if (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp))
550862306a36Sopenharmony_ci		return false;
550962306a36Sopenharmony_ci
551062306a36Sopenharmony_ci	return true;
551162306a36Sopenharmony_ci}
551262306a36Sopenharmony_ci
551362306a36Sopenharmony_cistatic void tcp_new_space(struct sock *sk)
551462306a36Sopenharmony_ci{
551562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
551662306a36Sopenharmony_ci
551762306a36Sopenharmony_ci	if (tcp_should_expand_sndbuf(sk)) {
551862306a36Sopenharmony_ci		tcp_sndbuf_expand(sk);
551962306a36Sopenharmony_ci		tp->snd_cwnd_stamp = tcp_jiffies32;
552062306a36Sopenharmony_ci	}
552162306a36Sopenharmony_ci
552262306a36Sopenharmony_ci	INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
552362306a36Sopenharmony_ci}
552462306a36Sopenharmony_ci
552562306a36Sopenharmony_ci/* Caller made space either from:
552662306a36Sopenharmony_ci * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced)
552762306a36Sopenharmony_ci * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt)
552862306a36Sopenharmony_ci *
552962306a36Sopenharmony_ci * We might be able to generate EPOLLOUT to the application if:
553062306a36Sopenharmony_ci * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2
553162306a36Sopenharmony_ci * 2) notsent amount (tp->write_seq - tp->snd_nxt) became
553262306a36Sopenharmony_ci *    small enough that tcp_stream_memory_free() decides it
553362306a36Sopenharmony_ci *    is time to generate EPOLLOUT.
553462306a36Sopenharmony_ci */
553562306a36Sopenharmony_civoid tcp_check_space(struct sock *sk)
553662306a36Sopenharmony_ci{
553762306a36Sopenharmony_ci	/* pairs with tcp_poll() */
553862306a36Sopenharmony_ci	smp_mb();
553962306a36Sopenharmony_ci	if (sk->sk_socket &&
554062306a36Sopenharmony_ci	    test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
554162306a36Sopenharmony_ci		tcp_new_space(sk);
554262306a36Sopenharmony_ci		if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
554362306a36Sopenharmony_ci			tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
554462306a36Sopenharmony_ci	}
554562306a36Sopenharmony_ci}
554662306a36Sopenharmony_ci
554762306a36Sopenharmony_cistatic inline void tcp_data_snd_check(struct sock *sk)
554862306a36Sopenharmony_ci{
554962306a36Sopenharmony_ci	tcp_push_pending_frames(sk);
555062306a36Sopenharmony_ci	tcp_check_space(sk);
555162306a36Sopenharmony_ci}
555262306a36Sopenharmony_ci
555362306a36Sopenharmony_ci/*
555462306a36Sopenharmony_ci * Check if sending an ack is needed.
555562306a36Sopenharmony_ci */
555662306a36Sopenharmony_cistatic void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
555762306a36Sopenharmony_ci{
555862306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
555962306a36Sopenharmony_ci	unsigned long rtt, delay;
556062306a36Sopenharmony_ci
556162306a36Sopenharmony_ci	    /* More than one full frame received... */
556262306a36Sopenharmony_ci	if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
556362306a36Sopenharmony_ci	     /* ... and right edge of window advances far enough.
556462306a36Sopenharmony_ci	      * (tcp_recvmsg() will send ACK otherwise).
556562306a36Sopenharmony_ci	      * If application uses SO_RCVLOWAT, we want send ack now if
556662306a36Sopenharmony_ci	      * we have not received enough bytes to satisfy the condition.
556762306a36Sopenharmony_ci	      */
556862306a36Sopenharmony_ci	    (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
556962306a36Sopenharmony_ci	     __tcp_select_window(sk) >= tp->rcv_wnd)) ||
557062306a36Sopenharmony_ci	    /* We ACK each frame or... */
557162306a36Sopenharmony_ci	    tcp_in_quickack_mode(sk) ||
557262306a36Sopenharmony_ci	    /* Protocol state mandates a one-time immediate ACK */
557362306a36Sopenharmony_ci	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
557462306a36Sopenharmony_cisend_now:
557562306a36Sopenharmony_ci		tcp_send_ack(sk);
557662306a36Sopenharmony_ci		return;
557762306a36Sopenharmony_ci	}
557862306a36Sopenharmony_ci
557962306a36Sopenharmony_ci	if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
558062306a36Sopenharmony_ci		tcp_send_delayed_ack(sk);
558162306a36Sopenharmony_ci		return;
558262306a36Sopenharmony_ci	}
558362306a36Sopenharmony_ci
558462306a36Sopenharmony_ci	if (!tcp_is_sack(tp) ||
558562306a36Sopenharmony_ci	    tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
558662306a36Sopenharmony_ci		goto send_now;
558762306a36Sopenharmony_ci
558862306a36Sopenharmony_ci	if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
558962306a36Sopenharmony_ci		tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
559062306a36Sopenharmony_ci		tp->dup_ack_counter = 0;
559162306a36Sopenharmony_ci	}
559262306a36Sopenharmony_ci	if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
559362306a36Sopenharmony_ci		tp->dup_ack_counter++;
559462306a36Sopenharmony_ci		goto send_now;
559562306a36Sopenharmony_ci	}
559662306a36Sopenharmony_ci	tp->compressed_ack++;
559762306a36Sopenharmony_ci	if (hrtimer_is_queued(&tp->compressed_ack_timer))
559862306a36Sopenharmony_ci		return;
559962306a36Sopenharmony_ci
560062306a36Sopenharmony_ci	/* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */
560162306a36Sopenharmony_ci
560262306a36Sopenharmony_ci	rtt = tp->rcv_rtt_est.rtt_us;
560362306a36Sopenharmony_ci	if (tp->srtt_us && tp->srtt_us < rtt)
560462306a36Sopenharmony_ci		rtt = tp->srtt_us;
560562306a36Sopenharmony_ci
560662306a36Sopenharmony_ci	delay = min_t(unsigned long,
560762306a36Sopenharmony_ci		      READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
560862306a36Sopenharmony_ci		      rtt * (NSEC_PER_USEC >> 3)/20);
560962306a36Sopenharmony_ci	sock_hold(sk);
561062306a36Sopenharmony_ci	hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
561162306a36Sopenharmony_ci			       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
561262306a36Sopenharmony_ci			       HRTIMER_MODE_REL_PINNED_SOFT);
561362306a36Sopenharmony_ci}
561462306a36Sopenharmony_ci
561562306a36Sopenharmony_cistatic inline void tcp_ack_snd_check(struct sock *sk)
561662306a36Sopenharmony_ci{
561762306a36Sopenharmony_ci	if (!inet_csk_ack_scheduled(sk)) {
561862306a36Sopenharmony_ci		/* We sent a data segment already. */
561962306a36Sopenharmony_ci		return;
562062306a36Sopenharmony_ci	}
562162306a36Sopenharmony_ci	__tcp_ack_snd_check(sk, 1);
562262306a36Sopenharmony_ci}
562362306a36Sopenharmony_ci
562462306a36Sopenharmony_ci/*
562562306a36Sopenharmony_ci *	This routine is only called when we have urgent data
562662306a36Sopenharmony_ci *	signaled. Its the 'slow' part of tcp_urg. It could be
562762306a36Sopenharmony_ci *	moved inline now as tcp_urg is only called from one
562862306a36Sopenharmony_ci *	place. We handle URGent data wrong. We have to - as
562962306a36Sopenharmony_ci *	BSD still doesn't use the correction from RFC961.
563062306a36Sopenharmony_ci *	For 1003.1g we should support a new option TCP_STDURG to permit
563162306a36Sopenharmony_ci *	either form (or just set the sysctl tcp_stdurg).
563262306a36Sopenharmony_ci */
563362306a36Sopenharmony_ci
563462306a36Sopenharmony_cistatic void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
563562306a36Sopenharmony_ci{
563662306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
563762306a36Sopenharmony_ci	u32 ptr = ntohs(th->urg_ptr);
563862306a36Sopenharmony_ci
563962306a36Sopenharmony_ci	if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
564062306a36Sopenharmony_ci		ptr--;
564162306a36Sopenharmony_ci	ptr += ntohl(th->seq);
564262306a36Sopenharmony_ci
564362306a36Sopenharmony_ci	/* Ignore urgent data that we've already seen and read. */
564462306a36Sopenharmony_ci	if (after(tp->copied_seq, ptr))
564562306a36Sopenharmony_ci		return;
564662306a36Sopenharmony_ci
564762306a36Sopenharmony_ci	/* Do not replay urg ptr.
564862306a36Sopenharmony_ci	 *
564962306a36Sopenharmony_ci	 * NOTE: interesting situation not covered by specs.
565062306a36Sopenharmony_ci	 * Misbehaving sender may send urg ptr, pointing to segment,
565162306a36Sopenharmony_ci	 * which we already have in ofo queue. We are not able to fetch
565262306a36Sopenharmony_ci	 * such data and will stay in TCP_URG_NOTYET until will be eaten
565362306a36Sopenharmony_ci	 * by recvmsg(). Seems, we are not obliged to handle such wicked
565462306a36Sopenharmony_ci	 * situations. But it is worth to think about possibility of some
565562306a36Sopenharmony_ci	 * DoSes using some hypothetical application level deadlock.
565662306a36Sopenharmony_ci	 */
565762306a36Sopenharmony_ci	if (before(ptr, tp->rcv_nxt))
565862306a36Sopenharmony_ci		return;
565962306a36Sopenharmony_ci
566062306a36Sopenharmony_ci	/* Do we already have a newer (or duplicate) urgent pointer? */
566162306a36Sopenharmony_ci	if (tp->urg_data && !after(ptr, tp->urg_seq))
566262306a36Sopenharmony_ci		return;
566362306a36Sopenharmony_ci
566462306a36Sopenharmony_ci	/* Tell the world about our new urgent pointer. */
566562306a36Sopenharmony_ci	sk_send_sigurg(sk);
566662306a36Sopenharmony_ci
566762306a36Sopenharmony_ci	/* We may be adding urgent data when the last byte read was
566862306a36Sopenharmony_ci	 * urgent. To do this requires some care. We cannot just ignore
566962306a36Sopenharmony_ci	 * tp->copied_seq since we would read the last urgent byte again
567062306a36Sopenharmony_ci	 * as data, nor can we alter copied_seq until this data arrives
567162306a36Sopenharmony_ci	 * or we break the semantics of SIOCATMARK (and thus sockatmark())
567262306a36Sopenharmony_ci	 *
567362306a36Sopenharmony_ci	 * NOTE. Double Dutch. Rendering to plain English: author of comment
567462306a36Sopenharmony_ci	 * above did something sort of 	send("A", MSG_OOB); send("B", MSG_OOB);
567562306a36Sopenharmony_ci	 * and expect that both A and B disappear from stream. This is _wrong_.
567662306a36Sopenharmony_ci	 * Though this happens in BSD with high probability, this is occasional.
567762306a36Sopenharmony_ci	 * Any application relying on this is buggy. Note also, that fix "works"
567862306a36Sopenharmony_ci	 * only in this artificial test. Insert some normal data between A and B and we will
567962306a36Sopenharmony_ci	 * decline of BSD again. Verdict: it is better to remove to trap
568062306a36Sopenharmony_ci	 * buggy users.
568162306a36Sopenharmony_ci	 */
568262306a36Sopenharmony_ci	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
568362306a36Sopenharmony_ci	    !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
568462306a36Sopenharmony_ci		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
568562306a36Sopenharmony_ci		tp->copied_seq++;
568662306a36Sopenharmony_ci		if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
568762306a36Sopenharmony_ci			__skb_unlink(skb, &sk->sk_receive_queue);
568862306a36Sopenharmony_ci			__kfree_skb(skb);
568962306a36Sopenharmony_ci		}
569062306a36Sopenharmony_ci	}
569162306a36Sopenharmony_ci
569262306a36Sopenharmony_ci	WRITE_ONCE(tp->urg_data, TCP_URG_NOTYET);
569362306a36Sopenharmony_ci	WRITE_ONCE(tp->urg_seq, ptr);
569462306a36Sopenharmony_ci
569562306a36Sopenharmony_ci	/* Disable header prediction. */
569662306a36Sopenharmony_ci	tp->pred_flags = 0;
569762306a36Sopenharmony_ci}
569862306a36Sopenharmony_ci
569962306a36Sopenharmony_ci/* This is the 'fast' part of urgent handling. */
570062306a36Sopenharmony_cistatic void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
570162306a36Sopenharmony_ci{
570262306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
570362306a36Sopenharmony_ci
570462306a36Sopenharmony_ci	/* Check if we get a new urgent pointer - normally not. */
570562306a36Sopenharmony_ci	if (unlikely(th->urg))
570662306a36Sopenharmony_ci		tcp_check_urg(sk, th);
570762306a36Sopenharmony_ci
570862306a36Sopenharmony_ci	/* Do we wait for any urgent data? - normally not... */
570962306a36Sopenharmony_ci	if (unlikely(tp->urg_data == TCP_URG_NOTYET)) {
571062306a36Sopenharmony_ci		u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
571162306a36Sopenharmony_ci			  th->syn;
571262306a36Sopenharmony_ci
571362306a36Sopenharmony_ci		/* Is the urgent pointer pointing into this packet? */
571462306a36Sopenharmony_ci		if (ptr < skb->len) {
571562306a36Sopenharmony_ci			u8 tmp;
571662306a36Sopenharmony_ci			if (skb_copy_bits(skb, ptr, &tmp, 1))
571762306a36Sopenharmony_ci				BUG();
571862306a36Sopenharmony_ci			WRITE_ONCE(tp->urg_data, TCP_URG_VALID | tmp);
571962306a36Sopenharmony_ci			if (!sock_flag(sk, SOCK_DEAD))
572062306a36Sopenharmony_ci				sk->sk_data_ready(sk);
572162306a36Sopenharmony_ci		}
572262306a36Sopenharmony_ci	}
572362306a36Sopenharmony_ci}
572462306a36Sopenharmony_ci
572562306a36Sopenharmony_ci/* Accept RST for rcv_nxt - 1 after a FIN.
572662306a36Sopenharmony_ci * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
572762306a36Sopenharmony_ci * FIN is sent followed by a RST packet. The RST is sent with the same
572862306a36Sopenharmony_ci * sequence number as the FIN, and thus according to RFC 5961 a challenge
572962306a36Sopenharmony_ci * ACK should be sent. However, Mac OSX rate limits replies to challenge
573062306a36Sopenharmony_ci * ACKs on the closed socket. In addition middleboxes can drop either the
573162306a36Sopenharmony_ci * challenge ACK or a subsequent RST.
573262306a36Sopenharmony_ci */
573362306a36Sopenharmony_cistatic bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb)
573462306a36Sopenharmony_ci{
573562306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
573662306a36Sopenharmony_ci
573762306a36Sopenharmony_ci	return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) &&
573862306a36Sopenharmony_ci			(1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK |
573962306a36Sopenharmony_ci					       TCPF_CLOSING));
574062306a36Sopenharmony_ci}
574162306a36Sopenharmony_ci
574262306a36Sopenharmony_ci/* Does PAWS and seqno based validation of an incoming segment, flags will
574362306a36Sopenharmony_ci * play significant role here.
574462306a36Sopenharmony_ci */
574562306a36Sopenharmony_cistatic bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
574662306a36Sopenharmony_ci				  const struct tcphdr *th, int syn_inerr)
574762306a36Sopenharmony_ci{
574862306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
574962306a36Sopenharmony_ci	SKB_DR(reason);
575062306a36Sopenharmony_ci
575162306a36Sopenharmony_ci	/* RFC1323: H1. Apply PAWS check first. */
575262306a36Sopenharmony_ci	if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
575362306a36Sopenharmony_ci	    tp->rx_opt.saw_tstamp &&
575462306a36Sopenharmony_ci	    tcp_paws_discard(sk, skb)) {
575562306a36Sopenharmony_ci		if (!th->rst) {
575662306a36Sopenharmony_ci			if (unlikely(th->syn))
575762306a36Sopenharmony_ci				goto syn_challenge;
575862306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
575962306a36Sopenharmony_ci			if (!tcp_oow_rate_limited(sock_net(sk), skb,
576062306a36Sopenharmony_ci						  LINUX_MIB_TCPACKSKIPPEDPAWS,
576162306a36Sopenharmony_ci						  &tp->last_oow_ack_time))
576262306a36Sopenharmony_ci				tcp_send_dupack(sk, skb);
576362306a36Sopenharmony_ci			SKB_DR_SET(reason, TCP_RFC7323_PAWS);
576462306a36Sopenharmony_ci			goto discard;
576562306a36Sopenharmony_ci		}
576662306a36Sopenharmony_ci		/* Reset is accepted even if it did not pass PAWS. */
576762306a36Sopenharmony_ci	}
576862306a36Sopenharmony_ci
576962306a36Sopenharmony_ci	/* Step 1: check sequence number */
577062306a36Sopenharmony_ci	reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
577162306a36Sopenharmony_ci	if (reason) {
577262306a36Sopenharmony_ci		/* RFC793, page 37: "In all states except SYN-SENT, all reset
577362306a36Sopenharmony_ci		 * (RST) segments are validated by checking their SEQ-fields."
577462306a36Sopenharmony_ci		 * And page 69: "If an incoming segment is not acceptable,
577562306a36Sopenharmony_ci		 * an acknowledgment should be sent in reply (unless the RST
577662306a36Sopenharmony_ci		 * bit is set, if so drop the segment and return)".
577762306a36Sopenharmony_ci		 */
577862306a36Sopenharmony_ci		if (!th->rst) {
577962306a36Sopenharmony_ci			if (th->syn)
578062306a36Sopenharmony_ci				goto syn_challenge;
578162306a36Sopenharmony_ci			if (!tcp_oow_rate_limited(sock_net(sk), skb,
578262306a36Sopenharmony_ci						  LINUX_MIB_TCPACKSKIPPEDSEQ,
578362306a36Sopenharmony_ci						  &tp->last_oow_ack_time))
578462306a36Sopenharmony_ci				tcp_send_dupack(sk, skb);
578562306a36Sopenharmony_ci		} else if (tcp_reset_check(sk, skb)) {
578662306a36Sopenharmony_ci			goto reset;
578762306a36Sopenharmony_ci		}
578862306a36Sopenharmony_ci		goto discard;
578962306a36Sopenharmony_ci	}
579062306a36Sopenharmony_ci
579162306a36Sopenharmony_ci	/* Step 2: check RST bit */
579262306a36Sopenharmony_ci	if (th->rst) {
579362306a36Sopenharmony_ci		/* RFC 5961 3.2 (extend to match against (RCV.NXT - 1) after a
579462306a36Sopenharmony_ci		 * FIN and SACK too if available):
579562306a36Sopenharmony_ci		 * If seq num matches RCV.NXT or (RCV.NXT - 1) after a FIN, or
579662306a36Sopenharmony_ci		 * the right-most SACK block,
579762306a36Sopenharmony_ci		 * then
579862306a36Sopenharmony_ci		 *     RESET the connection
579962306a36Sopenharmony_ci		 * else
580062306a36Sopenharmony_ci		 *     Send a challenge ACK
580162306a36Sopenharmony_ci		 */
580262306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
580362306a36Sopenharmony_ci		    tcp_reset_check(sk, skb))
580462306a36Sopenharmony_ci			goto reset;
580562306a36Sopenharmony_ci
580662306a36Sopenharmony_ci		if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
580762306a36Sopenharmony_ci			struct tcp_sack_block *sp = &tp->selective_acks[0];
580862306a36Sopenharmony_ci			int max_sack = sp[0].end_seq;
580962306a36Sopenharmony_ci			int this_sack;
581062306a36Sopenharmony_ci
581162306a36Sopenharmony_ci			for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
581262306a36Sopenharmony_ci			     ++this_sack) {
581362306a36Sopenharmony_ci				max_sack = after(sp[this_sack].end_seq,
581462306a36Sopenharmony_ci						 max_sack) ?
581562306a36Sopenharmony_ci					sp[this_sack].end_seq : max_sack;
581662306a36Sopenharmony_ci			}
581762306a36Sopenharmony_ci
581862306a36Sopenharmony_ci			if (TCP_SKB_CB(skb)->seq == max_sack)
581962306a36Sopenharmony_ci				goto reset;
582062306a36Sopenharmony_ci		}
582162306a36Sopenharmony_ci
582262306a36Sopenharmony_ci		/* Disable TFO if RST is out-of-order
582362306a36Sopenharmony_ci		 * and no data has been received
582462306a36Sopenharmony_ci		 * for current active TFO socket
582562306a36Sopenharmony_ci		 */
582662306a36Sopenharmony_ci		if (tp->syn_fastopen && !tp->data_segs_in &&
582762306a36Sopenharmony_ci		    sk->sk_state == TCP_ESTABLISHED)
582862306a36Sopenharmony_ci			tcp_fastopen_active_disable(sk);
582962306a36Sopenharmony_ci		tcp_send_challenge_ack(sk);
583062306a36Sopenharmony_ci		SKB_DR_SET(reason, TCP_RESET);
583162306a36Sopenharmony_ci		goto discard;
583262306a36Sopenharmony_ci	}
583362306a36Sopenharmony_ci
583462306a36Sopenharmony_ci	/* step 3: check security and precedence [ignored] */
583562306a36Sopenharmony_ci
583662306a36Sopenharmony_ci	/* step 4: Check for a SYN
583762306a36Sopenharmony_ci	 * RFC 5961 4.2 : Send a challenge ack
583862306a36Sopenharmony_ci	 */
583962306a36Sopenharmony_ci	if (th->syn) {
584062306a36Sopenharmony_cisyn_challenge:
584162306a36Sopenharmony_ci		if (syn_inerr)
584262306a36Sopenharmony_ci			TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
584362306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
584462306a36Sopenharmony_ci		tcp_send_challenge_ack(sk);
584562306a36Sopenharmony_ci		SKB_DR_SET(reason, TCP_INVALID_SYN);
584662306a36Sopenharmony_ci		goto discard;
584762306a36Sopenharmony_ci	}
584862306a36Sopenharmony_ci
584962306a36Sopenharmony_ci	bpf_skops_parse_hdr(sk, skb);
585062306a36Sopenharmony_ci
585162306a36Sopenharmony_ci	return true;
585262306a36Sopenharmony_ci
585362306a36Sopenharmony_cidiscard:
585462306a36Sopenharmony_ci	tcp_drop_reason(sk, skb, reason);
585562306a36Sopenharmony_ci	return false;
585662306a36Sopenharmony_ci
585762306a36Sopenharmony_cireset:
585862306a36Sopenharmony_ci	tcp_reset(sk, skb);
585962306a36Sopenharmony_ci	__kfree_skb(skb);
586062306a36Sopenharmony_ci	return false;
586162306a36Sopenharmony_ci}
586262306a36Sopenharmony_ci
586362306a36Sopenharmony_ci/*
586462306a36Sopenharmony_ci *	TCP receive function for the ESTABLISHED state.
586562306a36Sopenharmony_ci *
586662306a36Sopenharmony_ci *	It is split into a fast path and a slow path. The fast path is
586762306a36Sopenharmony_ci * 	disabled when:
586862306a36Sopenharmony_ci *	- A zero window was announced from us - zero window probing
586962306a36Sopenharmony_ci *        is only handled properly in the slow path.
587062306a36Sopenharmony_ci *	- Out of order segments arrived.
587162306a36Sopenharmony_ci *	- Urgent data is expected.
587262306a36Sopenharmony_ci *	- There is no buffer space left
587362306a36Sopenharmony_ci *	- Unexpected TCP flags/window values/header lengths are received
587462306a36Sopenharmony_ci *	  (detected by checking the TCP header against pred_flags)
587562306a36Sopenharmony_ci *	- Data is sent in both directions. Fast path only supports pure senders
587662306a36Sopenharmony_ci *	  or pure receivers (this means either the sequence number or the ack
587762306a36Sopenharmony_ci *	  value must stay constant)
587862306a36Sopenharmony_ci *	- Unexpected TCP option.
587962306a36Sopenharmony_ci *
588062306a36Sopenharmony_ci *	When these conditions are not satisfied it drops into a standard
588162306a36Sopenharmony_ci *	receive procedure patterned after RFC793 to handle all cases.
588262306a36Sopenharmony_ci *	The first three cases are guaranteed by proper pred_flags setting,
588362306a36Sopenharmony_ci *	the rest is checked inline. Fast processing is turned on in
588462306a36Sopenharmony_ci *	tcp_data_queue when everything is OK.
588562306a36Sopenharmony_ci */
588662306a36Sopenharmony_civoid tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
588762306a36Sopenharmony_ci{
588862306a36Sopenharmony_ci	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
588962306a36Sopenharmony_ci	const struct tcphdr *th = (const struct tcphdr *)skb->data;
589062306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
589162306a36Sopenharmony_ci	unsigned int len = skb->len;
589262306a36Sopenharmony_ci
589362306a36Sopenharmony_ci	/* TCP congestion window tracking */
589462306a36Sopenharmony_ci	trace_tcp_probe(sk, skb);
589562306a36Sopenharmony_ci
589662306a36Sopenharmony_ci	tcp_mstamp_refresh(tp);
589762306a36Sopenharmony_ci	if (unlikely(!rcu_access_pointer(sk->sk_rx_dst)))
589862306a36Sopenharmony_ci		inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
589962306a36Sopenharmony_ci	/*
590062306a36Sopenharmony_ci	 *	Header prediction.
590162306a36Sopenharmony_ci	 *	The code loosely follows the one in the famous
590262306a36Sopenharmony_ci	 *	"30 instruction TCP receive" Van Jacobson mail.
590362306a36Sopenharmony_ci	 *
590462306a36Sopenharmony_ci	 *	Van's trick is to deposit buffers into socket queue
590562306a36Sopenharmony_ci	 *	on a device interrupt, to call tcp_recv function
590662306a36Sopenharmony_ci	 *	on the receive process context and checksum and copy
590762306a36Sopenharmony_ci	 *	the buffer to user space. smart...
590862306a36Sopenharmony_ci	 *
590962306a36Sopenharmony_ci	 *	Our current scheme is not silly either but we take the
591062306a36Sopenharmony_ci	 *	extra cost of the net_bh soft interrupt processing...
591162306a36Sopenharmony_ci	 *	We do checksum and copy also but from device to kernel.
591262306a36Sopenharmony_ci	 */
591362306a36Sopenharmony_ci
591462306a36Sopenharmony_ci	tp->rx_opt.saw_tstamp = 0;
591562306a36Sopenharmony_ci
591662306a36Sopenharmony_ci	/*	pred_flags is 0xS?10 << 16 + snd_wnd
591762306a36Sopenharmony_ci	 *	if header_prediction is to be made
591862306a36Sopenharmony_ci	 *	'S' will always be tp->tcp_header_len >> 2
591962306a36Sopenharmony_ci	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
592062306a36Sopenharmony_ci	 *  turn it off	(when there are holes in the receive
592162306a36Sopenharmony_ci	 *	 space for instance)
592262306a36Sopenharmony_ci	 *	PSH flag is ignored.
592362306a36Sopenharmony_ci	 */
592462306a36Sopenharmony_ci
592562306a36Sopenharmony_ci	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
592662306a36Sopenharmony_ci	    TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
592762306a36Sopenharmony_ci	    !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
592862306a36Sopenharmony_ci		int tcp_header_len = tp->tcp_header_len;
592962306a36Sopenharmony_ci
593062306a36Sopenharmony_ci		/* Timestamp header prediction: tcp_header_len
593162306a36Sopenharmony_ci		 * is automatically equal to th->doff*4 due to pred_flags
593262306a36Sopenharmony_ci		 * match.
593362306a36Sopenharmony_ci		 */
593462306a36Sopenharmony_ci
593562306a36Sopenharmony_ci		/* Check timestamp */
593662306a36Sopenharmony_ci		if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
593762306a36Sopenharmony_ci			/* No? Slow path! */
593862306a36Sopenharmony_ci			if (!tcp_parse_aligned_timestamp(tp, th))
593962306a36Sopenharmony_ci				goto slow_path;
594062306a36Sopenharmony_ci
594162306a36Sopenharmony_ci			/* If PAWS failed, check it more carefully in slow path */
594262306a36Sopenharmony_ci			if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
594362306a36Sopenharmony_ci				goto slow_path;
594462306a36Sopenharmony_ci
594562306a36Sopenharmony_ci			/* DO NOT update ts_recent here, if checksum fails
594662306a36Sopenharmony_ci			 * and timestamp was corrupted part, it will result
594762306a36Sopenharmony_ci			 * in a hung connection since we will drop all
594862306a36Sopenharmony_ci			 * future packets due to the PAWS test.
594962306a36Sopenharmony_ci			 */
595062306a36Sopenharmony_ci		}
595162306a36Sopenharmony_ci
595262306a36Sopenharmony_ci		if (len <= tcp_header_len) {
595362306a36Sopenharmony_ci			/* Bulk data transfer: sender */
595462306a36Sopenharmony_ci			if (len == tcp_header_len) {
595562306a36Sopenharmony_ci				/* Predicted packet is in window by definition.
595662306a36Sopenharmony_ci				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
595762306a36Sopenharmony_ci				 * Hence, check seq<=rcv_wup reduces to:
595862306a36Sopenharmony_ci				 */
595962306a36Sopenharmony_ci				if (tcp_header_len ==
596062306a36Sopenharmony_ci				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
596162306a36Sopenharmony_ci				    tp->rcv_nxt == tp->rcv_wup)
596262306a36Sopenharmony_ci					tcp_store_ts_recent(tp);
596362306a36Sopenharmony_ci
596462306a36Sopenharmony_ci				/* We know that such packets are checksummed
596562306a36Sopenharmony_ci				 * on entry.
596662306a36Sopenharmony_ci				 */
596762306a36Sopenharmony_ci				tcp_ack(sk, skb, 0);
596862306a36Sopenharmony_ci				__kfree_skb(skb);
596962306a36Sopenharmony_ci				tcp_data_snd_check(sk);
597062306a36Sopenharmony_ci				/* When receiving pure ack in fast path, update
597162306a36Sopenharmony_ci				 * last ts ecr directly instead of calling
597262306a36Sopenharmony_ci				 * tcp_rcv_rtt_measure_ts()
597362306a36Sopenharmony_ci				 */
597462306a36Sopenharmony_ci				tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
597562306a36Sopenharmony_ci				return;
597662306a36Sopenharmony_ci			} else { /* Header too small */
597762306a36Sopenharmony_ci				reason = SKB_DROP_REASON_PKT_TOO_SMALL;
597862306a36Sopenharmony_ci				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
597962306a36Sopenharmony_ci				goto discard;
598062306a36Sopenharmony_ci			}
598162306a36Sopenharmony_ci		} else {
598262306a36Sopenharmony_ci			int eaten = 0;
598362306a36Sopenharmony_ci			bool fragstolen = false;
598462306a36Sopenharmony_ci
598562306a36Sopenharmony_ci			if (tcp_checksum_complete(skb))
598662306a36Sopenharmony_ci				goto csum_error;
598762306a36Sopenharmony_ci
598862306a36Sopenharmony_ci			if ((int)skb->truesize > sk->sk_forward_alloc)
598962306a36Sopenharmony_ci				goto step5;
599062306a36Sopenharmony_ci
599162306a36Sopenharmony_ci			/* Predicted packet is in window by definition.
599262306a36Sopenharmony_ci			 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
599362306a36Sopenharmony_ci			 * Hence, check seq<=rcv_wup reduces to:
599462306a36Sopenharmony_ci			 */
599562306a36Sopenharmony_ci			if (tcp_header_len ==
599662306a36Sopenharmony_ci			    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
599762306a36Sopenharmony_ci			    tp->rcv_nxt == tp->rcv_wup)
599862306a36Sopenharmony_ci				tcp_store_ts_recent(tp);
599962306a36Sopenharmony_ci
600062306a36Sopenharmony_ci			tcp_rcv_rtt_measure_ts(sk, skb);
600162306a36Sopenharmony_ci
600262306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
600362306a36Sopenharmony_ci
600462306a36Sopenharmony_ci			/* Bulk data transfer: receiver */
600562306a36Sopenharmony_ci			skb_dst_drop(skb);
600662306a36Sopenharmony_ci			__skb_pull(skb, tcp_header_len);
600762306a36Sopenharmony_ci			eaten = tcp_queue_rcv(sk, skb, &fragstolen);
600862306a36Sopenharmony_ci
600962306a36Sopenharmony_ci			tcp_event_data_recv(sk, skb);
601062306a36Sopenharmony_ci
601162306a36Sopenharmony_ci			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
601262306a36Sopenharmony_ci				/* Well, only one small jumplet in fast path... */
601362306a36Sopenharmony_ci				tcp_ack(sk, skb, FLAG_DATA);
601462306a36Sopenharmony_ci				tcp_data_snd_check(sk);
601562306a36Sopenharmony_ci				if (!inet_csk_ack_scheduled(sk))
601662306a36Sopenharmony_ci					goto no_ack;
601762306a36Sopenharmony_ci			} else {
601862306a36Sopenharmony_ci				tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
601962306a36Sopenharmony_ci			}
602062306a36Sopenharmony_ci
602162306a36Sopenharmony_ci			__tcp_ack_snd_check(sk, 0);
602262306a36Sopenharmony_cino_ack:
602362306a36Sopenharmony_ci			if (eaten)
602462306a36Sopenharmony_ci				kfree_skb_partial(skb, fragstolen);
602562306a36Sopenharmony_ci			tcp_data_ready(sk);
602662306a36Sopenharmony_ci			return;
602762306a36Sopenharmony_ci		}
602862306a36Sopenharmony_ci	}
602962306a36Sopenharmony_ci
603062306a36Sopenharmony_cislow_path:
603162306a36Sopenharmony_ci	if (len < (th->doff << 2) || tcp_checksum_complete(skb))
603262306a36Sopenharmony_ci		goto csum_error;
603362306a36Sopenharmony_ci
603462306a36Sopenharmony_ci	if (!th->ack && !th->rst && !th->syn) {
603562306a36Sopenharmony_ci		reason = SKB_DROP_REASON_TCP_FLAGS;
603662306a36Sopenharmony_ci		goto discard;
603762306a36Sopenharmony_ci	}
603862306a36Sopenharmony_ci
603962306a36Sopenharmony_ci	/*
604062306a36Sopenharmony_ci	 *	Standard slow path.
604162306a36Sopenharmony_ci	 */
604262306a36Sopenharmony_ci
604362306a36Sopenharmony_ci	if (!tcp_validate_incoming(sk, skb, th, 1))
604462306a36Sopenharmony_ci		return;
604562306a36Sopenharmony_ci
604662306a36Sopenharmony_cistep5:
604762306a36Sopenharmony_ci	reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
604862306a36Sopenharmony_ci	if ((int)reason < 0) {
604962306a36Sopenharmony_ci		reason = -reason;
605062306a36Sopenharmony_ci		goto discard;
605162306a36Sopenharmony_ci	}
605262306a36Sopenharmony_ci	tcp_rcv_rtt_measure_ts(sk, skb);
605362306a36Sopenharmony_ci
605462306a36Sopenharmony_ci	/* Process urgent data. */
605562306a36Sopenharmony_ci	tcp_urg(sk, skb, th);
605662306a36Sopenharmony_ci
605762306a36Sopenharmony_ci	/* step 7: process the segment text */
605862306a36Sopenharmony_ci	tcp_data_queue(sk, skb);
605962306a36Sopenharmony_ci
606062306a36Sopenharmony_ci	tcp_data_snd_check(sk);
606162306a36Sopenharmony_ci	tcp_ack_snd_check(sk);
606262306a36Sopenharmony_ci	return;
606362306a36Sopenharmony_ci
606462306a36Sopenharmony_cicsum_error:
606562306a36Sopenharmony_ci	reason = SKB_DROP_REASON_TCP_CSUM;
606662306a36Sopenharmony_ci	trace_tcp_bad_csum(skb);
606762306a36Sopenharmony_ci	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
606862306a36Sopenharmony_ci	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
606962306a36Sopenharmony_ci
607062306a36Sopenharmony_cidiscard:
607162306a36Sopenharmony_ci	tcp_drop_reason(sk, skb, reason);
607262306a36Sopenharmony_ci}
607362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_rcv_established);
607462306a36Sopenharmony_ci
607562306a36Sopenharmony_civoid tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
607662306a36Sopenharmony_ci{
607762306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
607862306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
607962306a36Sopenharmony_ci
608062306a36Sopenharmony_ci	tcp_mtup_init(sk);
608162306a36Sopenharmony_ci	icsk->icsk_af_ops->rebuild_header(sk);
608262306a36Sopenharmony_ci	tcp_init_metrics(sk);
608362306a36Sopenharmony_ci
608462306a36Sopenharmony_ci	/* Initialize the congestion window to start the transfer.
608562306a36Sopenharmony_ci	 * Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
608662306a36Sopenharmony_ci	 * retransmitted. In light of RFC6298 more aggressive 1sec
608762306a36Sopenharmony_ci	 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
608862306a36Sopenharmony_ci	 * retransmission has occurred.
608962306a36Sopenharmony_ci	 */
609062306a36Sopenharmony_ci	if (tp->total_retrans > 1 && tp->undo_marker)
609162306a36Sopenharmony_ci		tcp_snd_cwnd_set(tp, 1);
609262306a36Sopenharmony_ci	else
609362306a36Sopenharmony_ci		tcp_snd_cwnd_set(tp, tcp_init_cwnd(tp, __sk_dst_get(sk)));
609462306a36Sopenharmony_ci	tp->snd_cwnd_stamp = tcp_jiffies32;
609562306a36Sopenharmony_ci
609662306a36Sopenharmony_ci	bpf_skops_established(sk, bpf_op, skb);
609762306a36Sopenharmony_ci	/* Initialize congestion control unless BPF initialized it already: */
609862306a36Sopenharmony_ci	if (!icsk->icsk_ca_initialized)
609962306a36Sopenharmony_ci		tcp_init_congestion_control(sk);
610062306a36Sopenharmony_ci	tcp_init_buffer_space(sk);
610162306a36Sopenharmony_ci}
610262306a36Sopenharmony_ci
610362306a36Sopenharmony_civoid tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
610462306a36Sopenharmony_ci{
610562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
610662306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
610762306a36Sopenharmony_ci
610862306a36Sopenharmony_ci	tcp_set_state(sk, TCP_ESTABLISHED);
610962306a36Sopenharmony_ci	icsk->icsk_ack.lrcvtime = tcp_jiffies32;
611062306a36Sopenharmony_ci
611162306a36Sopenharmony_ci	if (skb) {
611262306a36Sopenharmony_ci		icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
611362306a36Sopenharmony_ci		security_inet_conn_established(sk, skb);
611462306a36Sopenharmony_ci		sk_mark_napi_id(sk, skb);
611562306a36Sopenharmony_ci	}
611662306a36Sopenharmony_ci
611762306a36Sopenharmony_ci	tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, skb);
611862306a36Sopenharmony_ci
611962306a36Sopenharmony_ci	/* Prevent spurious tcp_cwnd_restart() on first data
612062306a36Sopenharmony_ci	 * packet.
612162306a36Sopenharmony_ci	 */
612262306a36Sopenharmony_ci	tp->lsndtime = tcp_jiffies32;
612362306a36Sopenharmony_ci
612462306a36Sopenharmony_ci	if (sock_flag(sk, SOCK_KEEPOPEN))
612562306a36Sopenharmony_ci		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
612662306a36Sopenharmony_ci
612762306a36Sopenharmony_ci	if (!tp->rx_opt.snd_wscale)
612862306a36Sopenharmony_ci		__tcp_fast_path_on(tp, tp->snd_wnd);
612962306a36Sopenharmony_ci	else
613062306a36Sopenharmony_ci		tp->pred_flags = 0;
613162306a36Sopenharmony_ci}
613262306a36Sopenharmony_ci
613362306a36Sopenharmony_cistatic bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
613462306a36Sopenharmony_ci				    struct tcp_fastopen_cookie *cookie)
613562306a36Sopenharmony_ci{
613662306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
613762306a36Sopenharmony_ci	struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
613862306a36Sopenharmony_ci	u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
613962306a36Sopenharmony_ci	bool syn_drop = false;
614062306a36Sopenharmony_ci
614162306a36Sopenharmony_ci	if (mss == tp->rx_opt.user_mss) {
614262306a36Sopenharmony_ci		struct tcp_options_received opt;
614362306a36Sopenharmony_ci
614462306a36Sopenharmony_ci		/* Get original SYNACK MSS value if user MSS sets mss_clamp */
614562306a36Sopenharmony_ci		tcp_clear_options(&opt);
614662306a36Sopenharmony_ci		opt.user_mss = opt.mss_clamp = 0;
614762306a36Sopenharmony_ci		tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
614862306a36Sopenharmony_ci		mss = opt.mss_clamp;
614962306a36Sopenharmony_ci	}
615062306a36Sopenharmony_ci
615162306a36Sopenharmony_ci	if (!tp->syn_fastopen) {
615262306a36Sopenharmony_ci		/* Ignore an unsolicited cookie */
615362306a36Sopenharmony_ci		cookie->len = -1;
615462306a36Sopenharmony_ci	} else if (tp->total_retrans) {
615562306a36Sopenharmony_ci		/* SYN timed out and the SYN-ACK neither has a cookie nor
615662306a36Sopenharmony_ci		 * acknowledges data. Presumably the remote received only
615762306a36Sopenharmony_ci		 * the retransmitted (regular) SYNs: either the original
615862306a36Sopenharmony_ci		 * SYN-data or the corresponding SYN-ACK was dropped.
615962306a36Sopenharmony_ci		 */
616062306a36Sopenharmony_ci		syn_drop = (cookie->len < 0 && data);
616162306a36Sopenharmony_ci	} else if (cookie->len < 0 && !tp->syn_data) {
616262306a36Sopenharmony_ci		/* We requested a cookie but didn't get it. If we did not use
616362306a36Sopenharmony_ci		 * the (old) exp opt format then try so next time (try_exp=1).
616462306a36Sopenharmony_ci		 * Otherwise we go back to use the RFC7413 opt (try_exp=2).
616562306a36Sopenharmony_ci		 */
616662306a36Sopenharmony_ci		try_exp = tp->syn_fastopen_exp ? 2 : 1;
616762306a36Sopenharmony_ci	}
616862306a36Sopenharmony_ci
616962306a36Sopenharmony_ci	tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
617062306a36Sopenharmony_ci
617162306a36Sopenharmony_ci	if (data) { /* Retransmit unacked data in SYN */
617262306a36Sopenharmony_ci		if (tp->total_retrans)
617362306a36Sopenharmony_ci			tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED;
617462306a36Sopenharmony_ci		else
617562306a36Sopenharmony_ci			tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
617662306a36Sopenharmony_ci		skb_rbtree_walk_from(data)
617762306a36Sopenharmony_ci			 tcp_mark_skb_lost(sk, data);
617862306a36Sopenharmony_ci		tcp_xmit_retransmit_queue(sk);
617962306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk),
618062306a36Sopenharmony_ci				LINUX_MIB_TCPFASTOPENACTIVEFAIL);
618162306a36Sopenharmony_ci		return true;
618262306a36Sopenharmony_ci	}
618362306a36Sopenharmony_ci	tp->syn_data_acked = tp->syn_data;
618462306a36Sopenharmony_ci	if (tp->syn_data_acked) {
618562306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
618662306a36Sopenharmony_ci		/* SYN-data is counted as two separate packets in tcp_ack() */
618762306a36Sopenharmony_ci		if (tp->delivered > 1)
618862306a36Sopenharmony_ci			--tp->delivered;
618962306a36Sopenharmony_ci	}
619062306a36Sopenharmony_ci
619162306a36Sopenharmony_ci	tcp_fastopen_add_skb(sk, synack);
619262306a36Sopenharmony_ci
619362306a36Sopenharmony_ci	return false;
619462306a36Sopenharmony_ci}
619562306a36Sopenharmony_ci
619662306a36Sopenharmony_cistatic void smc_check_reset_syn(struct tcp_sock *tp)
619762306a36Sopenharmony_ci{
619862306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_SMC)
619962306a36Sopenharmony_ci	if (static_branch_unlikely(&tcp_have_smc)) {
620062306a36Sopenharmony_ci		if (tp->syn_smc && !tp->rx_opt.smc_ok)
620162306a36Sopenharmony_ci			tp->syn_smc = 0;
620262306a36Sopenharmony_ci	}
620362306a36Sopenharmony_ci#endif
620462306a36Sopenharmony_ci}
620562306a36Sopenharmony_ci
620662306a36Sopenharmony_cistatic void tcp_try_undo_spurious_syn(struct sock *sk)
620762306a36Sopenharmony_ci{
620862306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
620962306a36Sopenharmony_ci	u32 syn_stamp;
621062306a36Sopenharmony_ci
621162306a36Sopenharmony_ci	/* undo_marker is set when SYN or SYNACK times out. The timeout is
621262306a36Sopenharmony_ci	 * spurious if the ACK's timestamp option echo value matches the
621362306a36Sopenharmony_ci	 * original SYN timestamp.
621462306a36Sopenharmony_ci	 */
621562306a36Sopenharmony_ci	syn_stamp = tp->retrans_stamp;
621662306a36Sopenharmony_ci	if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp &&
621762306a36Sopenharmony_ci	    syn_stamp == tp->rx_opt.rcv_tsecr)
621862306a36Sopenharmony_ci		tp->undo_marker = 0;
621962306a36Sopenharmony_ci}
622062306a36Sopenharmony_ci
622162306a36Sopenharmony_cistatic int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
622262306a36Sopenharmony_ci					 const struct tcphdr *th)
622362306a36Sopenharmony_ci{
622462306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
622562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
622662306a36Sopenharmony_ci	struct tcp_fastopen_cookie foc = { .len = -1 };
622762306a36Sopenharmony_ci	int saved_clamp = tp->rx_opt.mss_clamp;
622862306a36Sopenharmony_ci	bool fastopen_fail;
622962306a36Sopenharmony_ci	SKB_DR(reason);
623062306a36Sopenharmony_ci
623162306a36Sopenharmony_ci	tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
623262306a36Sopenharmony_ci	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
623362306a36Sopenharmony_ci		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
623462306a36Sopenharmony_ci
623562306a36Sopenharmony_ci	if (th->ack) {
623662306a36Sopenharmony_ci		/* rfc793:
623762306a36Sopenharmony_ci		 * "If the state is SYN-SENT then
623862306a36Sopenharmony_ci		 *    first check the ACK bit
623962306a36Sopenharmony_ci		 *      If the ACK bit is set
624062306a36Sopenharmony_ci		 *	  If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
624162306a36Sopenharmony_ci		 *        a reset (unless the RST bit is set, if so drop
624262306a36Sopenharmony_ci		 *        the segment and return)"
624362306a36Sopenharmony_ci		 */
624462306a36Sopenharmony_ci		if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
624562306a36Sopenharmony_ci		    after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
624662306a36Sopenharmony_ci			/* Previous FIN/ACK or RST/ACK might be ignored. */
624762306a36Sopenharmony_ci			if (icsk->icsk_retransmits == 0)
624862306a36Sopenharmony_ci				inet_csk_reset_xmit_timer(sk,
624962306a36Sopenharmony_ci						ICSK_TIME_RETRANS,
625062306a36Sopenharmony_ci						TCP_TIMEOUT_MIN, TCP_RTO_MAX);
625162306a36Sopenharmony_ci			goto reset_and_undo;
625262306a36Sopenharmony_ci		}
625362306a36Sopenharmony_ci
625462306a36Sopenharmony_ci		if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
625562306a36Sopenharmony_ci		    !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
625662306a36Sopenharmony_ci			     tcp_time_stamp(tp))) {
625762306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk),
625862306a36Sopenharmony_ci					LINUX_MIB_PAWSACTIVEREJECTED);
625962306a36Sopenharmony_ci			goto reset_and_undo;
626062306a36Sopenharmony_ci		}
626162306a36Sopenharmony_ci
626262306a36Sopenharmony_ci		/* Now ACK is acceptable.
626362306a36Sopenharmony_ci		 *
626462306a36Sopenharmony_ci		 * "If the RST bit is set
626562306a36Sopenharmony_ci		 *    If the ACK was acceptable then signal the user "error:
626662306a36Sopenharmony_ci		 *    connection reset", drop the segment, enter CLOSED state,
626762306a36Sopenharmony_ci		 *    delete TCB, and return."
626862306a36Sopenharmony_ci		 */
626962306a36Sopenharmony_ci
627062306a36Sopenharmony_ci		if (th->rst) {
627162306a36Sopenharmony_ci			tcp_reset(sk, skb);
627262306a36Sopenharmony_ciconsume:
627362306a36Sopenharmony_ci			__kfree_skb(skb);
627462306a36Sopenharmony_ci			return 0;
627562306a36Sopenharmony_ci		}
627662306a36Sopenharmony_ci
627762306a36Sopenharmony_ci		/* rfc793:
627862306a36Sopenharmony_ci		 *   "fifth, if neither of the SYN or RST bits is set then
627962306a36Sopenharmony_ci		 *    drop the segment and return."
628062306a36Sopenharmony_ci		 *
628162306a36Sopenharmony_ci		 *    See note below!
628262306a36Sopenharmony_ci		 *                                        --ANK(990513)
628362306a36Sopenharmony_ci		 */
628462306a36Sopenharmony_ci		if (!th->syn) {
628562306a36Sopenharmony_ci			SKB_DR_SET(reason, TCP_FLAGS);
628662306a36Sopenharmony_ci			goto discard_and_undo;
628762306a36Sopenharmony_ci		}
628862306a36Sopenharmony_ci		/* rfc793:
628962306a36Sopenharmony_ci		 *   "If the SYN bit is on ...
629062306a36Sopenharmony_ci		 *    are acceptable then ...
629162306a36Sopenharmony_ci		 *    (our SYN has been ACKed), change the connection
629262306a36Sopenharmony_ci		 *    state to ESTABLISHED..."
629362306a36Sopenharmony_ci		 */
629462306a36Sopenharmony_ci
629562306a36Sopenharmony_ci		tcp_ecn_rcv_synack(tp, th);
629662306a36Sopenharmony_ci
629762306a36Sopenharmony_ci		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
629862306a36Sopenharmony_ci		tcp_try_undo_spurious_syn(sk);
629962306a36Sopenharmony_ci		tcp_ack(sk, skb, FLAG_SLOWPATH);
630062306a36Sopenharmony_ci
630162306a36Sopenharmony_ci		/* Ok.. it's good. Set up sequence numbers and
630262306a36Sopenharmony_ci		 * move to established.
630362306a36Sopenharmony_ci		 */
630462306a36Sopenharmony_ci		WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
630562306a36Sopenharmony_ci		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
630662306a36Sopenharmony_ci
630762306a36Sopenharmony_ci		/* RFC1323: The window in SYN & SYN/ACK segments is
630862306a36Sopenharmony_ci		 * never scaled.
630962306a36Sopenharmony_ci		 */
631062306a36Sopenharmony_ci		tp->snd_wnd = ntohs(th->window);
631162306a36Sopenharmony_ci
631262306a36Sopenharmony_ci		if (!tp->rx_opt.wscale_ok) {
631362306a36Sopenharmony_ci			tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
631462306a36Sopenharmony_ci			tp->window_clamp = min(tp->window_clamp, 65535U);
631562306a36Sopenharmony_ci		}
631662306a36Sopenharmony_ci
631762306a36Sopenharmony_ci		if (tp->rx_opt.saw_tstamp) {
631862306a36Sopenharmony_ci			tp->rx_opt.tstamp_ok	   = 1;
631962306a36Sopenharmony_ci			tp->tcp_header_len =
632062306a36Sopenharmony_ci				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
632162306a36Sopenharmony_ci			tp->advmss	    -= TCPOLEN_TSTAMP_ALIGNED;
632262306a36Sopenharmony_ci			tcp_store_ts_recent(tp);
632362306a36Sopenharmony_ci		} else {
632462306a36Sopenharmony_ci			tp->tcp_header_len = sizeof(struct tcphdr);
632562306a36Sopenharmony_ci		}
632662306a36Sopenharmony_ci
632762306a36Sopenharmony_ci		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
632862306a36Sopenharmony_ci		tcp_initialize_rcv_mss(sk);
632962306a36Sopenharmony_ci
633062306a36Sopenharmony_ci		/* Remember, tcp_poll() does not lock socket!
633162306a36Sopenharmony_ci		 * Change state from SYN-SENT only after copied_seq
633262306a36Sopenharmony_ci		 * is initialized. */
633362306a36Sopenharmony_ci		WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
633462306a36Sopenharmony_ci
633562306a36Sopenharmony_ci		smc_check_reset_syn(tp);
633662306a36Sopenharmony_ci
633762306a36Sopenharmony_ci		smp_mb();
633862306a36Sopenharmony_ci
633962306a36Sopenharmony_ci		tcp_finish_connect(sk, skb);
634062306a36Sopenharmony_ci
634162306a36Sopenharmony_ci		fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
634262306a36Sopenharmony_ci				tcp_rcv_fastopen_synack(sk, skb, &foc);
634362306a36Sopenharmony_ci
634462306a36Sopenharmony_ci		if (!sock_flag(sk, SOCK_DEAD)) {
634562306a36Sopenharmony_ci			sk->sk_state_change(sk);
634662306a36Sopenharmony_ci			sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
634762306a36Sopenharmony_ci		}
634862306a36Sopenharmony_ci		if (fastopen_fail)
634962306a36Sopenharmony_ci			return -1;
635062306a36Sopenharmony_ci		if (sk->sk_write_pending ||
635162306a36Sopenharmony_ci		    READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept) ||
635262306a36Sopenharmony_ci		    inet_csk_in_pingpong_mode(sk)) {
635362306a36Sopenharmony_ci			/* Save one ACK. Data will be ready after
635462306a36Sopenharmony_ci			 * several ticks, if write_pending is set.
635562306a36Sopenharmony_ci			 *
635662306a36Sopenharmony_ci			 * It may be deleted, but with this feature tcpdumps
635762306a36Sopenharmony_ci			 * look so _wonderfully_ clever, that I was not able
635862306a36Sopenharmony_ci			 * to stand against the temptation 8)     --ANK
635962306a36Sopenharmony_ci			 */
636062306a36Sopenharmony_ci			inet_csk_schedule_ack(sk);
636162306a36Sopenharmony_ci			tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
636262306a36Sopenharmony_ci			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
636362306a36Sopenharmony_ci						  TCP_DELACK_MAX, TCP_RTO_MAX);
636462306a36Sopenharmony_ci			goto consume;
636562306a36Sopenharmony_ci		}
636662306a36Sopenharmony_ci		tcp_send_ack(sk);
636762306a36Sopenharmony_ci		return -1;
636862306a36Sopenharmony_ci	}
636962306a36Sopenharmony_ci
637062306a36Sopenharmony_ci	/* No ACK in the segment */
637162306a36Sopenharmony_ci
637262306a36Sopenharmony_ci	if (th->rst) {
637362306a36Sopenharmony_ci		/* rfc793:
637462306a36Sopenharmony_ci		 * "If the RST bit is set
637562306a36Sopenharmony_ci		 *
637662306a36Sopenharmony_ci		 *      Otherwise (no ACK) drop the segment and return."
637762306a36Sopenharmony_ci		 */
637862306a36Sopenharmony_ci		SKB_DR_SET(reason, TCP_RESET);
637962306a36Sopenharmony_ci		goto discard_and_undo;
638062306a36Sopenharmony_ci	}
638162306a36Sopenharmony_ci
638262306a36Sopenharmony_ci	/* PAWS check. */
638362306a36Sopenharmony_ci	if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
638462306a36Sopenharmony_ci	    tcp_paws_reject(&tp->rx_opt, 0)) {
638562306a36Sopenharmony_ci		SKB_DR_SET(reason, TCP_RFC7323_PAWS);
638662306a36Sopenharmony_ci		goto discard_and_undo;
638762306a36Sopenharmony_ci	}
638862306a36Sopenharmony_ci	if (th->syn) {
638962306a36Sopenharmony_ci		/* We see SYN without ACK. It is attempt of
639062306a36Sopenharmony_ci		 * simultaneous connect with crossed SYNs.
639162306a36Sopenharmony_ci		 * Particularly, it can be connect to self.
639262306a36Sopenharmony_ci		 */
639362306a36Sopenharmony_ci		tcp_set_state(sk, TCP_SYN_RECV);
639462306a36Sopenharmony_ci
639562306a36Sopenharmony_ci		if (tp->rx_opt.saw_tstamp) {
639662306a36Sopenharmony_ci			tp->rx_opt.tstamp_ok = 1;
639762306a36Sopenharmony_ci			tcp_store_ts_recent(tp);
639862306a36Sopenharmony_ci			tp->tcp_header_len =
639962306a36Sopenharmony_ci				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
640062306a36Sopenharmony_ci		} else {
640162306a36Sopenharmony_ci			tp->tcp_header_len = sizeof(struct tcphdr);
640262306a36Sopenharmony_ci		}
640362306a36Sopenharmony_ci
640462306a36Sopenharmony_ci		WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
640562306a36Sopenharmony_ci		WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
640662306a36Sopenharmony_ci		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
640762306a36Sopenharmony_ci
640862306a36Sopenharmony_ci		/* RFC1323: The window in SYN & SYN/ACK segments is
640962306a36Sopenharmony_ci		 * never scaled.
641062306a36Sopenharmony_ci		 */
641162306a36Sopenharmony_ci		tp->snd_wnd    = ntohs(th->window);
641262306a36Sopenharmony_ci		tp->snd_wl1    = TCP_SKB_CB(skb)->seq;
641362306a36Sopenharmony_ci		tp->max_window = tp->snd_wnd;
641462306a36Sopenharmony_ci
641562306a36Sopenharmony_ci		tcp_ecn_rcv_syn(tp, th);
641662306a36Sopenharmony_ci
641762306a36Sopenharmony_ci		tcp_mtup_init(sk);
641862306a36Sopenharmony_ci		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
641962306a36Sopenharmony_ci		tcp_initialize_rcv_mss(sk);
642062306a36Sopenharmony_ci
642162306a36Sopenharmony_ci		tcp_send_synack(sk);
642262306a36Sopenharmony_ci#if 0
642362306a36Sopenharmony_ci		/* Note, we could accept data and URG from this segment.
642462306a36Sopenharmony_ci		 * There are no obstacles to make this (except that we must
642562306a36Sopenharmony_ci		 * either change tcp_recvmsg() to prevent it from returning data
642662306a36Sopenharmony_ci		 * before 3WHS completes per RFC793, or employ TCP Fast Open).
642762306a36Sopenharmony_ci		 *
642862306a36Sopenharmony_ci		 * However, if we ignore data in ACKless segments sometimes,
642962306a36Sopenharmony_ci		 * we have no reasons to accept it sometimes.
643062306a36Sopenharmony_ci		 * Also, seems the code doing it in step6 of tcp_rcv_state_process
643162306a36Sopenharmony_ci		 * is not flawless. So, discard packet for sanity.
643262306a36Sopenharmony_ci		 * Uncomment this return to process the data.
643362306a36Sopenharmony_ci		 */
643462306a36Sopenharmony_ci		return -1;
643562306a36Sopenharmony_ci#else
643662306a36Sopenharmony_ci		goto consume;
643762306a36Sopenharmony_ci#endif
643862306a36Sopenharmony_ci	}
643962306a36Sopenharmony_ci	/* "fifth, if neither of the SYN or RST bits is set then
644062306a36Sopenharmony_ci	 * drop the segment and return."
644162306a36Sopenharmony_ci	 */
644262306a36Sopenharmony_ci
644362306a36Sopenharmony_cidiscard_and_undo:
644462306a36Sopenharmony_ci	tcp_clear_options(&tp->rx_opt);
644562306a36Sopenharmony_ci	tp->rx_opt.mss_clamp = saved_clamp;
644662306a36Sopenharmony_ci	tcp_drop_reason(sk, skb, reason);
644762306a36Sopenharmony_ci	return 0;
644862306a36Sopenharmony_ci
644962306a36Sopenharmony_cireset_and_undo:
645062306a36Sopenharmony_ci	tcp_clear_options(&tp->rx_opt);
645162306a36Sopenharmony_ci	tp->rx_opt.mss_clamp = saved_clamp;
645262306a36Sopenharmony_ci	return 1;
645362306a36Sopenharmony_ci}
645462306a36Sopenharmony_ci
645562306a36Sopenharmony_cistatic void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
645662306a36Sopenharmony_ci{
645762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
645862306a36Sopenharmony_ci	struct request_sock *req;
645962306a36Sopenharmony_ci
646062306a36Sopenharmony_ci	/* If we are still handling the SYNACK RTO, see if timestamp ECR allows
646162306a36Sopenharmony_ci	 * undo. If peer SACKs triggered fast recovery, we can't undo here.
646262306a36Sopenharmony_ci	 */
646362306a36Sopenharmony_ci	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out)
646462306a36Sopenharmony_ci		tcp_try_undo_recovery(sk);
646562306a36Sopenharmony_ci
646662306a36Sopenharmony_ci	/* Reset rtx states to prevent spurious retransmits_timed_out() */
646762306a36Sopenharmony_ci	tp->retrans_stamp = 0;
646862306a36Sopenharmony_ci	inet_csk(sk)->icsk_retransmits = 0;
646962306a36Sopenharmony_ci
647062306a36Sopenharmony_ci	/* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
647162306a36Sopenharmony_ci	 * we no longer need req so release it.
647262306a36Sopenharmony_ci	 */
647362306a36Sopenharmony_ci	req = rcu_dereference_protected(tp->fastopen_rsk,
647462306a36Sopenharmony_ci					lockdep_sock_is_held(sk));
647562306a36Sopenharmony_ci	reqsk_fastopen_remove(sk, req, false);
647662306a36Sopenharmony_ci
647762306a36Sopenharmony_ci	/* Re-arm the timer because data may have been sent out.
647862306a36Sopenharmony_ci	 * This is similar to the regular data transmission case
647962306a36Sopenharmony_ci	 * when new data has just been ack'ed.
648062306a36Sopenharmony_ci	 *
648162306a36Sopenharmony_ci	 * (TFO) - we could try to be more aggressive and
648262306a36Sopenharmony_ci	 * retransmitting any data sooner based on when they
648362306a36Sopenharmony_ci	 * are sent out.
648462306a36Sopenharmony_ci	 */
648562306a36Sopenharmony_ci	tcp_rearm_rto(sk);
648662306a36Sopenharmony_ci}
648762306a36Sopenharmony_ci
648862306a36Sopenharmony_ci/*
648962306a36Sopenharmony_ci *	This function implements the receiving procedure of RFC 793 for
649062306a36Sopenharmony_ci *	all states except ESTABLISHED and TIME_WAIT.
649162306a36Sopenharmony_ci *	It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
649262306a36Sopenharmony_ci *	address independent.
649362306a36Sopenharmony_ci */
649462306a36Sopenharmony_ci
649562306a36Sopenharmony_ciint tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
649662306a36Sopenharmony_ci{
649762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
649862306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
649962306a36Sopenharmony_ci	const struct tcphdr *th = tcp_hdr(skb);
650062306a36Sopenharmony_ci	struct request_sock *req;
650162306a36Sopenharmony_ci	int queued = 0;
650262306a36Sopenharmony_ci	bool acceptable;
650362306a36Sopenharmony_ci	SKB_DR(reason);
650462306a36Sopenharmony_ci
650562306a36Sopenharmony_ci	switch (sk->sk_state) {
650662306a36Sopenharmony_ci	case TCP_CLOSE:
650762306a36Sopenharmony_ci		SKB_DR_SET(reason, TCP_CLOSE);
650862306a36Sopenharmony_ci		goto discard;
650962306a36Sopenharmony_ci
651062306a36Sopenharmony_ci	case TCP_LISTEN:
651162306a36Sopenharmony_ci		if (th->ack)
651262306a36Sopenharmony_ci			return 1;
651362306a36Sopenharmony_ci
651462306a36Sopenharmony_ci		if (th->rst) {
651562306a36Sopenharmony_ci			SKB_DR_SET(reason, TCP_RESET);
651662306a36Sopenharmony_ci			goto discard;
651762306a36Sopenharmony_ci		}
651862306a36Sopenharmony_ci		if (th->syn) {
651962306a36Sopenharmony_ci			if (th->fin) {
652062306a36Sopenharmony_ci				SKB_DR_SET(reason, TCP_FLAGS);
652162306a36Sopenharmony_ci				goto discard;
652262306a36Sopenharmony_ci			}
652362306a36Sopenharmony_ci			/* It is possible that we process SYN packets from backlog,
652462306a36Sopenharmony_ci			 * so we need to make sure to disable BH and RCU right there.
652562306a36Sopenharmony_ci			 */
652662306a36Sopenharmony_ci			rcu_read_lock();
652762306a36Sopenharmony_ci			local_bh_disable();
652862306a36Sopenharmony_ci			acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
652962306a36Sopenharmony_ci			local_bh_enable();
653062306a36Sopenharmony_ci			rcu_read_unlock();
653162306a36Sopenharmony_ci
653262306a36Sopenharmony_ci			if (!acceptable)
653362306a36Sopenharmony_ci				return 1;
653462306a36Sopenharmony_ci			consume_skb(skb);
653562306a36Sopenharmony_ci			return 0;
653662306a36Sopenharmony_ci		}
653762306a36Sopenharmony_ci		SKB_DR_SET(reason, TCP_FLAGS);
653862306a36Sopenharmony_ci		goto discard;
653962306a36Sopenharmony_ci
654062306a36Sopenharmony_ci	case TCP_SYN_SENT:
654162306a36Sopenharmony_ci		tp->rx_opt.saw_tstamp = 0;
654262306a36Sopenharmony_ci		tcp_mstamp_refresh(tp);
654362306a36Sopenharmony_ci		queued = tcp_rcv_synsent_state_process(sk, skb, th);
654462306a36Sopenharmony_ci		if (queued >= 0)
654562306a36Sopenharmony_ci			return queued;
654662306a36Sopenharmony_ci
654762306a36Sopenharmony_ci		/* Do step6 onward by hand. */
654862306a36Sopenharmony_ci		tcp_urg(sk, skb, th);
654962306a36Sopenharmony_ci		__kfree_skb(skb);
655062306a36Sopenharmony_ci		tcp_data_snd_check(sk);
655162306a36Sopenharmony_ci		return 0;
655262306a36Sopenharmony_ci	}
655362306a36Sopenharmony_ci
655462306a36Sopenharmony_ci	tcp_mstamp_refresh(tp);
655562306a36Sopenharmony_ci	tp->rx_opt.saw_tstamp = 0;
655662306a36Sopenharmony_ci	req = rcu_dereference_protected(tp->fastopen_rsk,
655762306a36Sopenharmony_ci					lockdep_sock_is_held(sk));
655862306a36Sopenharmony_ci	if (req) {
655962306a36Sopenharmony_ci		bool req_stolen;
656062306a36Sopenharmony_ci
656162306a36Sopenharmony_ci		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
656262306a36Sopenharmony_ci		    sk->sk_state != TCP_FIN_WAIT1);
656362306a36Sopenharmony_ci
656462306a36Sopenharmony_ci		if (!tcp_check_req(sk, skb, req, true, &req_stolen)) {
656562306a36Sopenharmony_ci			SKB_DR_SET(reason, TCP_FASTOPEN);
656662306a36Sopenharmony_ci			goto discard;
656762306a36Sopenharmony_ci		}
656862306a36Sopenharmony_ci	}
656962306a36Sopenharmony_ci
657062306a36Sopenharmony_ci	if (!th->ack && !th->rst && !th->syn) {
657162306a36Sopenharmony_ci		SKB_DR_SET(reason, TCP_FLAGS);
657262306a36Sopenharmony_ci		goto discard;
657362306a36Sopenharmony_ci	}
657462306a36Sopenharmony_ci	if (!tcp_validate_incoming(sk, skb, th, 0))
657562306a36Sopenharmony_ci		return 0;
657662306a36Sopenharmony_ci
657762306a36Sopenharmony_ci	/* step 5: check the ACK field */
657862306a36Sopenharmony_ci	acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
657962306a36Sopenharmony_ci				      FLAG_UPDATE_TS_RECENT |
658062306a36Sopenharmony_ci				      FLAG_NO_CHALLENGE_ACK) > 0;
658162306a36Sopenharmony_ci
658262306a36Sopenharmony_ci	if (!acceptable) {
658362306a36Sopenharmony_ci		if (sk->sk_state == TCP_SYN_RECV)
658462306a36Sopenharmony_ci			return 1;	/* send one RST */
658562306a36Sopenharmony_ci		tcp_send_challenge_ack(sk);
658662306a36Sopenharmony_ci		SKB_DR_SET(reason, TCP_OLD_ACK);
658762306a36Sopenharmony_ci		goto discard;
658862306a36Sopenharmony_ci	}
658962306a36Sopenharmony_ci	switch (sk->sk_state) {
659062306a36Sopenharmony_ci	case TCP_SYN_RECV:
659162306a36Sopenharmony_ci		tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */
659262306a36Sopenharmony_ci		if (!tp->srtt_us)
659362306a36Sopenharmony_ci			tcp_synack_rtt_meas(sk, req);
659462306a36Sopenharmony_ci
659562306a36Sopenharmony_ci		if (req) {
659662306a36Sopenharmony_ci			tcp_rcv_synrecv_state_fastopen(sk);
659762306a36Sopenharmony_ci		} else {
659862306a36Sopenharmony_ci			tcp_try_undo_spurious_syn(sk);
659962306a36Sopenharmony_ci			tp->retrans_stamp = 0;
660062306a36Sopenharmony_ci			tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,
660162306a36Sopenharmony_ci					  skb);
660262306a36Sopenharmony_ci			WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
660362306a36Sopenharmony_ci		}
660462306a36Sopenharmony_ci		smp_mb();
660562306a36Sopenharmony_ci		tcp_set_state(sk, TCP_ESTABLISHED);
660662306a36Sopenharmony_ci		sk->sk_state_change(sk);
660762306a36Sopenharmony_ci
660862306a36Sopenharmony_ci		/* Note, that this wakeup is only for marginal crossed SYN case.
660962306a36Sopenharmony_ci		 * Passively open sockets are not waked up, because
661062306a36Sopenharmony_ci		 * sk->sk_sleep == NULL and sk->sk_socket == NULL.
661162306a36Sopenharmony_ci		 */
661262306a36Sopenharmony_ci		if (sk->sk_socket)
661362306a36Sopenharmony_ci			sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
661462306a36Sopenharmony_ci
661562306a36Sopenharmony_ci		tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
661662306a36Sopenharmony_ci		tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
661762306a36Sopenharmony_ci		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
661862306a36Sopenharmony_ci
661962306a36Sopenharmony_ci		if (tp->rx_opt.tstamp_ok)
662062306a36Sopenharmony_ci			tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
662162306a36Sopenharmony_ci
662262306a36Sopenharmony_ci		if (!inet_csk(sk)->icsk_ca_ops->cong_control)
662362306a36Sopenharmony_ci			tcp_update_pacing_rate(sk);
662462306a36Sopenharmony_ci
662562306a36Sopenharmony_ci		/* Prevent spurious tcp_cwnd_restart() on first data packet */
662662306a36Sopenharmony_ci		tp->lsndtime = tcp_jiffies32;
662762306a36Sopenharmony_ci
662862306a36Sopenharmony_ci		tcp_initialize_rcv_mss(sk);
662962306a36Sopenharmony_ci		tcp_fast_path_on(tp);
663062306a36Sopenharmony_ci		break;
663162306a36Sopenharmony_ci
663262306a36Sopenharmony_ci	case TCP_FIN_WAIT1: {
663362306a36Sopenharmony_ci		int tmo;
663462306a36Sopenharmony_ci
663562306a36Sopenharmony_ci		if (req)
663662306a36Sopenharmony_ci			tcp_rcv_synrecv_state_fastopen(sk);
663762306a36Sopenharmony_ci
663862306a36Sopenharmony_ci		if (tp->snd_una != tp->write_seq)
663962306a36Sopenharmony_ci			break;
664062306a36Sopenharmony_ci
664162306a36Sopenharmony_ci		tcp_set_state(sk, TCP_FIN_WAIT2);
664262306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | SEND_SHUTDOWN);
664362306a36Sopenharmony_ci
664462306a36Sopenharmony_ci		sk_dst_confirm(sk);
664562306a36Sopenharmony_ci
664662306a36Sopenharmony_ci		if (!sock_flag(sk, SOCK_DEAD)) {
664762306a36Sopenharmony_ci			/* Wake up lingering close() */
664862306a36Sopenharmony_ci			sk->sk_state_change(sk);
664962306a36Sopenharmony_ci			break;
665062306a36Sopenharmony_ci		}
665162306a36Sopenharmony_ci
665262306a36Sopenharmony_ci		if (READ_ONCE(tp->linger2) < 0) {
665362306a36Sopenharmony_ci			tcp_done(sk);
665462306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
665562306a36Sopenharmony_ci			return 1;
665662306a36Sopenharmony_ci		}
665762306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
665862306a36Sopenharmony_ci		    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
665962306a36Sopenharmony_ci			/* Receive out of order FIN after close() */
666062306a36Sopenharmony_ci			if (tp->syn_fastopen && th->fin)
666162306a36Sopenharmony_ci				tcp_fastopen_active_disable(sk);
666262306a36Sopenharmony_ci			tcp_done(sk);
666362306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
666462306a36Sopenharmony_ci			return 1;
666562306a36Sopenharmony_ci		}
666662306a36Sopenharmony_ci
666762306a36Sopenharmony_ci		tmo = tcp_fin_time(sk);
666862306a36Sopenharmony_ci		if (tmo > TCP_TIMEWAIT_LEN) {
666962306a36Sopenharmony_ci			inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
667062306a36Sopenharmony_ci		} else if (th->fin || sock_owned_by_user(sk)) {
667162306a36Sopenharmony_ci			/* Bad case. We could lose such FIN otherwise.
667262306a36Sopenharmony_ci			 * It is not a big problem, but it looks confusing
667362306a36Sopenharmony_ci			 * and not so rare event. We still can lose it now,
667462306a36Sopenharmony_ci			 * if it spins in bh_lock_sock(), but it is really
667562306a36Sopenharmony_ci			 * marginal case.
667662306a36Sopenharmony_ci			 */
667762306a36Sopenharmony_ci			inet_csk_reset_keepalive_timer(sk, tmo);
667862306a36Sopenharmony_ci		} else {
667962306a36Sopenharmony_ci			tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
668062306a36Sopenharmony_ci			goto consume;
668162306a36Sopenharmony_ci		}
668262306a36Sopenharmony_ci		break;
668362306a36Sopenharmony_ci	}
668462306a36Sopenharmony_ci
668562306a36Sopenharmony_ci	case TCP_CLOSING:
668662306a36Sopenharmony_ci		if (tp->snd_una == tp->write_seq) {
668762306a36Sopenharmony_ci			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
668862306a36Sopenharmony_ci			goto consume;
668962306a36Sopenharmony_ci		}
669062306a36Sopenharmony_ci		break;
669162306a36Sopenharmony_ci
669262306a36Sopenharmony_ci	case TCP_LAST_ACK:
669362306a36Sopenharmony_ci		if (tp->snd_una == tp->write_seq) {
669462306a36Sopenharmony_ci			tcp_update_metrics(sk);
669562306a36Sopenharmony_ci			tcp_done(sk);
669662306a36Sopenharmony_ci			goto consume;
669762306a36Sopenharmony_ci		}
669862306a36Sopenharmony_ci		break;
669962306a36Sopenharmony_ci	}
670062306a36Sopenharmony_ci
670162306a36Sopenharmony_ci	/* step 6: check the URG bit */
670262306a36Sopenharmony_ci	tcp_urg(sk, skb, th);
670362306a36Sopenharmony_ci
670462306a36Sopenharmony_ci	/* step 7: process the segment text */
670562306a36Sopenharmony_ci	switch (sk->sk_state) {
670662306a36Sopenharmony_ci	case TCP_CLOSE_WAIT:
670762306a36Sopenharmony_ci	case TCP_CLOSING:
670862306a36Sopenharmony_ci	case TCP_LAST_ACK:
670962306a36Sopenharmony_ci		if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
671062306a36Sopenharmony_ci			/* If a subflow has been reset, the packet should not
671162306a36Sopenharmony_ci			 * continue to be processed, drop the packet.
671262306a36Sopenharmony_ci			 */
671362306a36Sopenharmony_ci			if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb))
671462306a36Sopenharmony_ci				goto discard;
671562306a36Sopenharmony_ci			break;
671662306a36Sopenharmony_ci		}
671762306a36Sopenharmony_ci		fallthrough;
671862306a36Sopenharmony_ci	case TCP_FIN_WAIT1:
671962306a36Sopenharmony_ci	case TCP_FIN_WAIT2:
672062306a36Sopenharmony_ci		/* RFC 793 says to queue data in these states,
672162306a36Sopenharmony_ci		 * RFC 1122 says we MUST send a reset.
672262306a36Sopenharmony_ci		 * BSD 4.4 also does reset.
672362306a36Sopenharmony_ci		 */
672462306a36Sopenharmony_ci		if (sk->sk_shutdown & RCV_SHUTDOWN) {
672562306a36Sopenharmony_ci			if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
672662306a36Sopenharmony_ci			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
672762306a36Sopenharmony_ci				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
672862306a36Sopenharmony_ci				tcp_reset(sk, skb);
672962306a36Sopenharmony_ci				return 1;
673062306a36Sopenharmony_ci			}
673162306a36Sopenharmony_ci		}
673262306a36Sopenharmony_ci		fallthrough;
673362306a36Sopenharmony_ci	case TCP_ESTABLISHED:
673462306a36Sopenharmony_ci		tcp_data_queue(sk, skb);
673562306a36Sopenharmony_ci		queued = 1;
673662306a36Sopenharmony_ci		break;
673762306a36Sopenharmony_ci	}
673862306a36Sopenharmony_ci
673962306a36Sopenharmony_ci	/* tcp_data could move socket to TIME-WAIT */
674062306a36Sopenharmony_ci	if (sk->sk_state != TCP_CLOSE) {
674162306a36Sopenharmony_ci		tcp_data_snd_check(sk);
674262306a36Sopenharmony_ci		tcp_ack_snd_check(sk);
674362306a36Sopenharmony_ci	}
674462306a36Sopenharmony_ci
674562306a36Sopenharmony_ci	if (!queued) {
674662306a36Sopenharmony_cidiscard:
674762306a36Sopenharmony_ci		tcp_drop_reason(sk, skb, reason);
674862306a36Sopenharmony_ci	}
674962306a36Sopenharmony_ci	return 0;
675062306a36Sopenharmony_ci
675162306a36Sopenharmony_ciconsume:
675262306a36Sopenharmony_ci	__kfree_skb(skb);
675362306a36Sopenharmony_ci	return 0;
675462306a36Sopenharmony_ci}
675562306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_rcv_state_process);
675662306a36Sopenharmony_ci
675762306a36Sopenharmony_cistatic inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
675862306a36Sopenharmony_ci{
675962306a36Sopenharmony_ci	struct inet_request_sock *ireq = inet_rsk(req);
676062306a36Sopenharmony_ci
676162306a36Sopenharmony_ci	if (family == AF_INET)
676262306a36Sopenharmony_ci		net_dbg_ratelimited("drop open request from %pI4/%u\n",
676362306a36Sopenharmony_ci				    &ireq->ir_rmt_addr, port);
676462306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
676562306a36Sopenharmony_ci	else if (family == AF_INET6)
676662306a36Sopenharmony_ci		net_dbg_ratelimited("drop open request from %pI6/%u\n",
676762306a36Sopenharmony_ci				    &ireq->ir_v6_rmt_addr, port);
676862306a36Sopenharmony_ci#endif
676962306a36Sopenharmony_ci}
677062306a36Sopenharmony_ci
677162306a36Sopenharmony_ci/* RFC3168 : 6.1.1 SYN packets must not have ECT/ECN bits set
677262306a36Sopenharmony_ci *
677362306a36Sopenharmony_ci * If we receive a SYN packet with these bits set, it means a
677462306a36Sopenharmony_ci * network is playing bad games with TOS bits. In order to
677562306a36Sopenharmony_ci * avoid possible false congestion notifications, we disable
677662306a36Sopenharmony_ci * TCP ECN negotiation.
677762306a36Sopenharmony_ci *
677862306a36Sopenharmony_ci * Exception: tcp_ca wants ECN. This is required for DCTCP
677962306a36Sopenharmony_ci * congestion control: Linux DCTCP asserts ECT on all packets,
678062306a36Sopenharmony_ci * including SYN, which is most optimal solution; however,
678162306a36Sopenharmony_ci * others, such as FreeBSD do not.
678262306a36Sopenharmony_ci *
678362306a36Sopenharmony_ci * Exception: At least one of the reserved bits of the TCP header (th->res1) is
678462306a36Sopenharmony_ci * set, indicating the use of a future TCP extension (such as AccECN). See
678562306a36Sopenharmony_ci * RFC8311 §4.3 which updates RFC3168 to allow the development of such
678662306a36Sopenharmony_ci * extensions.
678762306a36Sopenharmony_ci */
678862306a36Sopenharmony_cistatic void tcp_ecn_create_request(struct request_sock *req,
678962306a36Sopenharmony_ci				   const struct sk_buff *skb,
679062306a36Sopenharmony_ci				   const struct sock *listen_sk,
679162306a36Sopenharmony_ci				   const struct dst_entry *dst)
679262306a36Sopenharmony_ci{
679362306a36Sopenharmony_ci	const struct tcphdr *th = tcp_hdr(skb);
679462306a36Sopenharmony_ci	const struct net *net = sock_net(listen_sk);
679562306a36Sopenharmony_ci	bool th_ecn = th->ece && th->cwr;
679662306a36Sopenharmony_ci	bool ect, ecn_ok;
679762306a36Sopenharmony_ci	u32 ecn_ok_dst;
679862306a36Sopenharmony_ci
679962306a36Sopenharmony_ci	if (!th_ecn)
680062306a36Sopenharmony_ci		return;
680162306a36Sopenharmony_ci
680262306a36Sopenharmony_ci	ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
680362306a36Sopenharmony_ci	ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
680462306a36Sopenharmony_ci	ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst;
680562306a36Sopenharmony_ci
680662306a36Sopenharmony_ci	if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
680762306a36Sopenharmony_ci	    (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
680862306a36Sopenharmony_ci	    tcp_bpf_ca_needs_ecn((struct sock *)req))
680962306a36Sopenharmony_ci		inet_rsk(req)->ecn_ok = 1;
681062306a36Sopenharmony_ci}
681162306a36Sopenharmony_ci
681262306a36Sopenharmony_cistatic void tcp_openreq_init(struct request_sock *req,
681362306a36Sopenharmony_ci			     const struct tcp_options_received *rx_opt,
681462306a36Sopenharmony_ci			     struct sk_buff *skb, const struct sock *sk)
681562306a36Sopenharmony_ci{
681662306a36Sopenharmony_ci	struct inet_request_sock *ireq = inet_rsk(req);
681762306a36Sopenharmony_ci
681862306a36Sopenharmony_ci	req->rsk_rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
681962306a36Sopenharmony_ci	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
682062306a36Sopenharmony_ci	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
682162306a36Sopenharmony_ci	tcp_rsk(req)->snt_synack = 0;
682262306a36Sopenharmony_ci	tcp_rsk(req)->last_oow_ack_time = 0;
682362306a36Sopenharmony_ci	req->mss = rx_opt->mss_clamp;
682462306a36Sopenharmony_ci	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
682562306a36Sopenharmony_ci	ireq->tstamp_ok = rx_opt->tstamp_ok;
682662306a36Sopenharmony_ci	ireq->sack_ok = rx_opt->sack_ok;
682762306a36Sopenharmony_ci	ireq->snd_wscale = rx_opt->snd_wscale;
682862306a36Sopenharmony_ci	ireq->wscale_ok = rx_opt->wscale_ok;
682962306a36Sopenharmony_ci	ireq->acked = 0;
683062306a36Sopenharmony_ci	ireq->ecn_ok = 0;
683162306a36Sopenharmony_ci	ireq->ir_rmt_port = tcp_hdr(skb)->source;
683262306a36Sopenharmony_ci	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
683362306a36Sopenharmony_ci	ireq->ir_mark = inet_request_mark(sk, skb);
683462306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_SMC)
683562306a36Sopenharmony_ci	ireq->smc_ok = rx_opt->smc_ok && !(tcp_sk(sk)->smc_hs_congested &&
683662306a36Sopenharmony_ci			tcp_sk(sk)->smc_hs_congested(sk));
683762306a36Sopenharmony_ci#endif
683862306a36Sopenharmony_ci}
683962306a36Sopenharmony_ci
684062306a36Sopenharmony_cistruct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
684162306a36Sopenharmony_ci				      struct sock *sk_listener,
684262306a36Sopenharmony_ci				      bool attach_listener)
684362306a36Sopenharmony_ci{
684462306a36Sopenharmony_ci	struct request_sock *req = reqsk_alloc(ops, sk_listener,
684562306a36Sopenharmony_ci					       attach_listener);
684662306a36Sopenharmony_ci
684762306a36Sopenharmony_ci	if (req) {
684862306a36Sopenharmony_ci		struct inet_request_sock *ireq = inet_rsk(req);
684962306a36Sopenharmony_ci
685062306a36Sopenharmony_ci		ireq->ireq_opt = NULL;
685162306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
685262306a36Sopenharmony_ci		ireq->pktopts = NULL;
685362306a36Sopenharmony_ci#endif
685462306a36Sopenharmony_ci		atomic64_set(&ireq->ir_cookie, 0);
685562306a36Sopenharmony_ci		ireq->ireq_state = TCP_NEW_SYN_RECV;
685662306a36Sopenharmony_ci		write_pnet(&ireq->ireq_net, sock_net(sk_listener));
685762306a36Sopenharmony_ci		ireq->ireq_family = sk_listener->sk_family;
685862306a36Sopenharmony_ci		req->timeout = TCP_TIMEOUT_INIT;
685962306a36Sopenharmony_ci	}
686062306a36Sopenharmony_ci
686162306a36Sopenharmony_ci	return req;
686262306a36Sopenharmony_ci}
686362306a36Sopenharmony_ciEXPORT_SYMBOL(inet_reqsk_alloc);
686462306a36Sopenharmony_ci
686562306a36Sopenharmony_ci/*
686662306a36Sopenharmony_ci * Return true if a syncookie should be sent
686762306a36Sopenharmony_ci */
686862306a36Sopenharmony_cistatic bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
686962306a36Sopenharmony_ci{
687062306a36Sopenharmony_ci	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
687162306a36Sopenharmony_ci	const char *msg = "Dropping request";
687262306a36Sopenharmony_ci	struct net *net = sock_net(sk);
687362306a36Sopenharmony_ci	bool want_cookie = false;
687462306a36Sopenharmony_ci	u8 syncookies;
687562306a36Sopenharmony_ci
687662306a36Sopenharmony_ci	syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
687762306a36Sopenharmony_ci
687862306a36Sopenharmony_ci#ifdef CONFIG_SYN_COOKIES
687962306a36Sopenharmony_ci	if (syncookies) {
688062306a36Sopenharmony_ci		msg = "Sending cookies";
688162306a36Sopenharmony_ci		want_cookie = true;
688262306a36Sopenharmony_ci		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
688362306a36Sopenharmony_ci	} else
688462306a36Sopenharmony_ci#endif
688562306a36Sopenharmony_ci		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
688662306a36Sopenharmony_ci
688762306a36Sopenharmony_ci	if (!READ_ONCE(queue->synflood_warned) && syncookies != 2 &&
688862306a36Sopenharmony_ci	    xchg(&queue->synflood_warned, 1) == 0) {
688962306a36Sopenharmony_ci		if (IS_ENABLED(CONFIG_IPV6) && sk->sk_family == AF_INET6) {
689062306a36Sopenharmony_ci			net_info_ratelimited("%s: Possible SYN flooding on port [%pI6c]:%u. %s.\n",
689162306a36Sopenharmony_ci					proto, inet6_rcv_saddr(sk),
689262306a36Sopenharmony_ci					sk->sk_num, msg);
689362306a36Sopenharmony_ci		} else {
689462306a36Sopenharmony_ci			net_info_ratelimited("%s: Possible SYN flooding on port %pI4:%u. %s.\n",
689562306a36Sopenharmony_ci					proto, &sk->sk_rcv_saddr,
689662306a36Sopenharmony_ci					sk->sk_num, msg);
689762306a36Sopenharmony_ci		}
689862306a36Sopenharmony_ci	}
689962306a36Sopenharmony_ci
690062306a36Sopenharmony_ci	return want_cookie;
690162306a36Sopenharmony_ci}
690262306a36Sopenharmony_ci
690362306a36Sopenharmony_cistatic void tcp_reqsk_record_syn(const struct sock *sk,
690462306a36Sopenharmony_ci				 struct request_sock *req,
690562306a36Sopenharmony_ci				 const struct sk_buff *skb)
690662306a36Sopenharmony_ci{
690762306a36Sopenharmony_ci	if (tcp_sk(sk)->save_syn) {
690862306a36Sopenharmony_ci		u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
690962306a36Sopenharmony_ci		struct saved_syn *saved_syn;
691062306a36Sopenharmony_ci		u32 mac_hdrlen;
691162306a36Sopenharmony_ci		void *base;
691262306a36Sopenharmony_ci
691362306a36Sopenharmony_ci		if (tcp_sk(sk)->save_syn == 2) {  /* Save full header. */
691462306a36Sopenharmony_ci			base = skb_mac_header(skb);
691562306a36Sopenharmony_ci			mac_hdrlen = skb_mac_header_len(skb);
691662306a36Sopenharmony_ci			len += mac_hdrlen;
691762306a36Sopenharmony_ci		} else {
691862306a36Sopenharmony_ci			base = skb_network_header(skb);
691962306a36Sopenharmony_ci			mac_hdrlen = 0;
692062306a36Sopenharmony_ci		}
692162306a36Sopenharmony_ci
692262306a36Sopenharmony_ci		saved_syn = kmalloc(struct_size(saved_syn, data, len),
692362306a36Sopenharmony_ci				    GFP_ATOMIC);
692462306a36Sopenharmony_ci		if (saved_syn) {
692562306a36Sopenharmony_ci			saved_syn->mac_hdrlen = mac_hdrlen;
692662306a36Sopenharmony_ci			saved_syn->network_hdrlen = skb_network_header_len(skb);
692762306a36Sopenharmony_ci			saved_syn->tcp_hdrlen = tcp_hdrlen(skb);
692862306a36Sopenharmony_ci			memcpy(saved_syn->data, base, len);
692962306a36Sopenharmony_ci			req->saved_syn = saved_syn;
693062306a36Sopenharmony_ci		}
693162306a36Sopenharmony_ci	}
693262306a36Sopenharmony_ci}
693362306a36Sopenharmony_ci
693462306a36Sopenharmony_ci/* If a SYN cookie is required and supported, returns a clamped MSS value to be
693562306a36Sopenharmony_ci * used for SYN cookie generation.
693662306a36Sopenharmony_ci */
693762306a36Sopenharmony_ciu16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
693862306a36Sopenharmony_ci			  const struct tcp_request_sock_ops *af_ops,
693962306a36Sopenharmony_ci			  struct sock *sk, struct tcphdr *th)
694062306a36Sopenharmony_ci{
694162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
694262306a36Sopenharmony_ci	u16 mss;
694362306a36Sopenharmony_ci
694462306a36Sopenharmony_ci	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
694562306a36Sopenharmony_ci	    !inet_csk_reqsk_queue_is_full(sk))
694662306a36Sopenharmony_ci		return 0;
694762306a36Sopenharmony_ci
694862306a36Sopenharmony_ci	if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
694962306a36Sopenharmony_ci		return 0;
695062306a36Sopenharmony_ci
695162306a36Sopenharmony_ci	if (sk_acceptq_is_full(sk)) {
695262306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
695362306a36Sopenharmony_ci		return 0;
695462306a36Sopenharmony_ci	}
695562306a36Sopenharmony_ci
695662306a36Sopenharmony_ci	mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
695762306a36Sopenharmony_ci	if (!mss)
695862306a36Sopenharmony_ci		mss = af_ops->mss_clamp;
695962306a36Sopenharmony_ci
696062306a36Sopenharmony_ci	return mss;
696162306a36Sopenharmony_ci}
696262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
696362306a36Sopenharmony_ci
696462306a36Sopenharmony_ciint tcp_conn_request(struct request_sock_ops *rsk_ops,
696562306a36Sopenharmony_ci		     const struct tcp_request_sock_ops *af_ops,
696662306a36Sopenharmony_ci		     struct sock *sk, struct sk_buff *skb)
696762306a36Sopenharmony_ci{
696862306a36Sopenharmony_ci	struct tcp_fastopen_cookie foc = { .len = -1 };
696962306a36Sopenharmony_ci	__u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
697062306a36Sopenharmony_ci	struct tcp_options_received tmp_opt;
697162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
697262306a36Sopenharmony_ci	struct net *net = sock_net(sk);
697362306a36Sopenharmony_ci	struct sock *fastopen_sk = NULL;
697462306a36Sopenharmony_ci	struct request_sock *req;
697562306a36Sopenharmony_ci	bool want_cookie = false;
697662306a36Sopenharmony_ci	struct dst_entry *dst;
697762306a36Sopenharmony_ci	struct flowi fl;
697862306a36Sopenharmony_ci	u8 syncookies;
697962306a36Sopenharmony_ci
698062306a36Sopenharmony_ci	syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
698162306a36Sopenharmony_ci
698262306a36Sopenharmony_ci	/* TW buckets are converted to open requests without
698362306a36Sopenharmony_ci	 * limitations, they conserve resources and peer is
698462306a36Sopenharmony_ci	 * evidently real one.
698562306a36Sopenharmony_ci	 */
698662306a36Sopenharmony_ci	if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
698762306a36Sopenharmony_ci		want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
698862306a36Sopenharmony_ci		if (!want_cookie)
698962306a36Sopenharmony_ci			goto drop;
699062306a36Sopenharmony_ci	}
699162306a36Sopenharmony_ci
699262306a36Sopenharmony_ci	if (sk_acceptq_is_full(sk)) {
699362306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
699462306a36Sopenharmony_ci		goto drop;
699562306a36Sopenharmony_ci	}
699662306a36Sopenharmony_ci
699762306a36Sopenharmony_ci	req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
699862306a36Sopenharmony_ci	if (!req)
699962306a36Sopenharmony_ci		goto drop;
700062306a36Sopenharmony_ci
700162306a36Sopenharmony_ci	req->syncookie = want_cookie;
700262306a36Sopenharmony_ci	tcp_rsk(req)->af_specific = af_ops;
700362306a36Sopenharmony_ci	tcp_rsk(req)->ts_off = 0;
700462306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_MPTCP)
700562306a36Sopenharmony_ci	tcp_rsk(req)->is_mptcp = 0;
700662306a36Sopenharmony_ci#endif
700762306a36Sopenharmony_ci
700862306a36Sopenharmony_ci	tcp_clear_options(&tmp_opt);
700962306a36Sopenharmony_ci	tmp_opt.mss_clamp = af_ops->mss_clamp;
701062306a36Sopenharmony_ci	tmp_opt.user_mss  = tp->rx_opt.user_mss;
701162306a36Sopenharmony_ci	tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
701262306a36Sopenharmony_ci			  want_cookie ? NULL : &foc);
701362306a36Sopenharmony_ci
701462306a36Sopenharmony_ci	if (want_cookie && !tmp_opt.saw_tstamp)
701562306a36Sopenharmony_ci		tcp_clear_options(&tmp_opt);
701662306a36Sopenharmony_ci
701762306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_SMC) && want_cookie)
701862306a36Sopenharmony_ci		tmp_opt.smc_ok = 0;
701962306a36Sopenharmony_ci
702062306a36Sopenharmony_ci	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
702162306a36Sopenharmony_ci	tcp_openreq_init(req, &tmp_opt, skb, sk);
702262306a36Sopenharmony_ci	inet_rsk(req)->no_srccheck = inet_test_bit(TRANSPARENT, sk);
702362306a36Sopenharmony_ci
702462306a36Sopenharmony_ci	/* Note: tcp_v6_init_req() might override ir_iif for link locals */
702562306a36Sopenharmony_ci	inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
702662306a36Sopenharmony_ci
702762306a36Sopenharmony_ci	dst = af_ops->route_req(sk, skb, &fl, req);
702862306a36Sopenharmony_ci	if (!dst)
702962306a36Sopenharmony_ci		goto drop_and_free;
703062306a36Sopenharmony_ci
703162306a36Sopenharmony_ci	if (tmp_opt.tstamp_ok)
703262306a36Sopenharmony_ci		tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
703362306a36Sopenharmony_ci
703462306a36Sopenharmony_ci	if (!want_cookie && !isn) {
703562306a36Sopenharmony_ci		int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
703662306a36Sopenharmony_ci
703762306a36Sopenharmony_ci		/* Kill the following clause, if you dislike this way. */
703862306a36Sopenharmony_ci		if (!syncookies &&
703962306a36Sopenharmony_ci		    (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
704062306a36Sopenharmony_ci		     (max_syn_backlog >> 2)) &&
704162306a36Sopenharmony_ci		    !tcp_peer_is_proven(req, dst)) {
704262306a36Sopenharmony_ci			/* Without syncookies last quarter of
704362306a36Sopenharmony_ci			 * backlog is filled with destinations,
704462306a36Sopenharmony_ci			 * proven to be alive.
704562306a36Sopenharmony_ci			 * It means that we continue to communicate
704662306a36Sopenharmony_ci			 * to destinations, already remembered
704762306a36Sopenharmony_ci			 * to the moment of synflood.
704862306a36Sopenharmony_ci			 */
704962306a36Sopenharmony_ci			pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
705062306a36Sopenharmony_ci				    rsk_ops->family);
705162306a36Sopenharmony_ci			goto drop_and_release;
705262306a36Sopenharmony_ci		}
705362306a36Sopenharmony_ci
705462306a36Sopenharmony_ci		isn = af_ops->init_seq(skb);
705562306a36Sopenharmony_ci	}
705662306a36Sopenharmony_ci
705762306a36Sopenharmony_ci	tcp_ecn_create_request(req, skb, sk, dst);
705862306a36Sopenharmony_ci
705962306a36Sopenharmony_ci	if (want_cookie) {
706062306a36Sopenharmony_ci		isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
706162306a36Sopenharmony_ci		if (!tmp_opt.tstamp_ok)
706262306a36Sopenharmony_ci			inet_rsk(req)->ecn_ok = 0;
706362306a36Sopenharmony_ci	}
706462306a36Sopenharmony_ci
706562306a36Sopenharmony_ci	tcp_rsk(req)->snt_isn = isn;
706662306a36Sopenharmony_ci	tcp_rsk(req)->txhash = net_tx_rndhash();
706762306a36Sopenharmony_ci	tcp_rsk(req)->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
706862306a36Sopenharmony_ci	tcp_openreq_init_rwin(req, sk, dst);
706962306a36Sopenharmony_ci	sk_rx_queue_set(req_to_sk(req), skb);
707062306a36Sopenharmony_ci	if (!want_cookie) {
707162306a36Sopenharmony_ci		tcp_reqsk_record_syn(sk, req, skb);
707262306a36Sopenharmony_ci		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
707362306a36Sopenharmony_ci	}
707462306a36Sopenharmony_ci	if (fastopen_sk) {
707562306a36Sopenharmony_ci		af_ops->send_synack(fastopen_sk, dst, &fl, req,
707662306a36Sopenharmony_ci				    &foc, TCP_SYNACK_FASTOPEN, skb);
707762306a36Sopenharmony_ci		/* Add the child socket directly into the accept queue */
707862306a36Sopenharmony_ci		if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
707962306a36Sopenharmony_ci			reqsk_fastopen_remove(fastopen_sk, req, false);
708062306a36Sopenharmony_ci			bh_unlock_sock(fastopen_sk);
708162306a36Sopenharmony_ci			sock_put(fastopen_sk);
708262306a36Sopenharmony_ci			goto drop_and_free;
708362306a36Sopenharmony_ci		}
708462306a36Sopenharmony_ci		sk->sk_data_ready(sk);
708562306a36Sopenharmony_ci		bh_unlock_sock(fastopen_sk);
708662306a36Sopenharmony_ci		sock_put(fastopen_sk);
708762306a36Sopenharmony_ci	} else {
708862306a36Sopenharmony_ci		tcp_rsk(req)->tfo_listener = false;
708962306a36Sopenharmony_ci		if (!want_cookie) {
709062306a36Sopenharmony_ci			req->timeout = tcp_timeout_init((struct sock *)req);
709162306a36Sopenharmony_ci			inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
709262306a36Sopenharmony_ci		}
709362306a36Sopenharmony_ci		af_ops->send_synack(sk, dst, &fl, req, &foc,
709462306a36Sopenharmony_ci				    !want_cookie ? TCP_SYNACK_NORMAL :
709562306a36Sopenharmony_ci						   TCP_SYNACK_COOKIE,
709662306a36Sopenharmony_ci				    skb);
709762306a36Sopenharmony_ci		if (want_cookie) {
709862306a36Sopenharmony_ci			reqsk_free(req);
709962306a36Sopenharmony_ci			return 0;
710062306a36Sopenharmony_ci		}
710162306a36Sopenharmony_ci	}
710262306a36Sopenharmony_ci	reqsk_put(req);
710362306a36Sopenharmony_ci	return 0;
710462306a36Sopenharmony_ci
710562306a36Sopenharmony_cidrop_and_release:
710662306a36Sopenharmony_ci	dst_release(dst);
710762306a36Sopenharmony_cidrop_and_free:
710862306a36Sopenharmony_ci	__reqsk_free(req);
710962306a36Sopenharmony_cidrop:
711062306a36Sopenharmony_ci	tcp_listendrop(sk);
711162306a36Sopenharmony_ci	return 0;
711262306a36Sopenharmony_ci}
711362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_conn_request);
7114