162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * INET		An implementation of the TCP/IP protocol suite for the LINUX
462306a36Sopenharmony_ci *		operating system.  INET is implemented using the  BSD Socket
562306a36Sopenharmony_ci *		interface as the means of communication with the user level.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci *		Implementation of the Transmission Control Protocol(TCP).
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci *		IPv4 specific functions
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci *		code split from:
1262306a36Sopenharmony_ci *		linux/ipv4/tcp.c
1362306a36Sopenharmony_ci *		linux/ipv4/tcp_input.c
1462306a36Sopenharmony_ci *		linux/ipv4/tcp_output.c
1562306a36Sopenharmony_ci *
1662306a36Sopenharmony_ci *		See tcp.c for author information
1762306a36Sopenharmony_ci */
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci/*
2062306a36Sopenharmony_ci * Changes:
2162306a36Sopenharmony_ci *		David S. Miller	:	New socket lookup architecture.
2262306a36Sopenharmony_ci *					This code is dedicated to John Dyson.
2362306a36Sopenharmony_ci *		David S. Miller :	Change semantics of established hash,
2462306a36Sopenharmony_ci *					half is devoted to TIME_WAIT sockets
2562306a36Sopenharmony_ci *					and the rest go in the other half.
2662306a36Sopenharmony_ci *		Andi Kleen :		Add support for syncookies and fixed
2762306a36Sopenharmony_ci *					some bugs: ip options weren't passed to
2862306a36Sopenharmony_ci *					the TCP layer, missed a check for an
2962306a36Sopenharmony_ci *					ACK bit.
3062306a36Sopenharmony_ci *		Andi Kleen :		Implemented fast path mtu discovery.
3162306a36Sopenharmony_ci *	     				Fixed many serious bugs in the
3262306a36Sopenharmony_ci *					request_sock handling and moved
3362306a36Sopenharmony_ci *					most of it into the af independent code.
3462306a36Sopenharmony_ci *					Added tail drop and some other bugfixes.
3562306a36Sopenharmony_ci *					Added new listen semantics.
3662306a36Sopenharmony_ci *		Mike McLagan	:	Routing by source
3762306a36Sopenharmony_ci *	Juan Jose Ciarlante:		ip_dynaddr bits
3862306a36Sopenharmony_ci *		Andi Kleen:		various fixes.
3962306a36Sopenharmony_ci *	Vitaly E. Lavrov	:	Transparent proxy revived after year
4062306a36Sopenharmony_ci *					coma.
4162306a36Sopenharmony_ci *	Andi Kleen		:	Fix new listen.
4262306a36Sopenharmony_ci *	Andi Kleen		:	Fix accept error reporting.
4362306a36Sopenharmony_ci *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
4462306a36Sopenharmony_ci *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
4562306a36Sopenharmony_ci *					a single port at the same time.
4662306a36Sopenharmony_ci */
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci#define pr_fmt(fmt) "TCP: " fmt
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci#include <linux/bottom_half.h>
5162306a36Sopenharmony_ci#include <linux/types.h>
5262306a36Sopenharmony_ci#include <linux/fcntl.h>
5362306a36Sopenharmony_ci#include <linux/module.h>
5462306a36Sopenharmony_ci#include <linux/random.h>
5562306a36Sopenharmony_ci#include <linux/cache.h>
5662306a36Sopenharmony_ci#include <linux/jhash.h>
5762306a36Sopenharmony_ci#include <linux/init.h>
5862306a36Sopenharmony_ci#include <linux/times.h>
5962306a36Sopenharmony_ci#include <linux/slab.h>
6062306a36Sopenharmony_ci#include <linux/sched.h>
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci#include <net/net_namespace.h>
6362306a36Sopenharmony_ci#include <net/icmp.h>
6462306a36Sopenharmony_ci#include <net/inet_hashtables.h>
6562306a36Sopenharmony_ci#include <net/tcp.h>
6662306a36Sopenharmony_ci#include <net/transp_v6.h>
6762306a36Sopenharmony_ci#include <net/ipv6.h>
6862306a36Sopenharmony_ci#include <net/inet_common.h>
6962306a36Sopenharmony_ci#include <net/timewait_sock.h>
7062306a36Sopenharmony_ci#include <net/xfrm.h>
7162306a36Sopenharmony_ci#include <net/secure_seq.h>
7262306a36Sopenharmony_ci#include <net/busy_poll.h>
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci#include <linux/inet.h>
7562306a36Sopenharmony_ci#include <linux/ipv6.h>
7662306a36Sopenharmony_ci#include <linux/stddef.h>
7762306a36Sopenharmony_ci#include <linux/proc_fs.h>
7862306a36Sopenharmony_ci#include <linux/seq_file.h>
7962306a36Sopenharmony_ci#include <linux/inetdevice.h>
8062306a36Sopenharmony_ci#include <linux/btf_ids.h>
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci#include <crypto/hash.h>
8362306a36Sopenharmony_ci#include <linux/scatterlist.h>
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci#include <trace/events/tcp.h>
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
8862306a36Sopenharmony_cistatic int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
8962306a36Sopenharmony_ci			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
9062306a36Sopenharmony_ci#endif
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_cistruct inet_hashinfo tcp_hashinfo;
9362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_hashinfo);
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk);
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_cistatic u32 tcp_v4_init_seq(const struct sk_buff *skb)
9862306a36Sopenharmony_ci{
9962306a36Sopenharmony_ci	return secure_tcp_seq(ip_hdr(skb)->daddr,
10062306a36Sopenharmony_ci			      ip_hdr(skb)->saddr,
10162306a36Sopenharmony_ci			      tcp_hdr(skb)->dest,
10262306a36Sopenharmony_ci			      tcp_hdr(skb)->source);
10362306a36Sopenharmony_ci}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_cistatic u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci	return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
10862306a36Sopenharmony_ci}
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ciint tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
11162306a36Sopenharmony_ci{
11262306a36Sopenharmony_ci	int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
11362306a36Sopenharmony_ci	const struct inet_timewait_sock *tw = inet_twsk(sktw);
11462306a36Sopenharmony_ci	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
11562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	if (reuse == 2) {
11862306a36Sopenharmony_ci		/* Still does not detect *everything* that goes through
11962306a36Sopenharmony_ci		 * lo, since we require a loopback src or dst address
12062306a36Sopenharmony_ci		 * or direct binding to 'lo' interface.
12162306a36Sopenharmony_ci		 */
12262306a36Sopenharmony_ci		bool loopback = false;
12362306a36Sopenharmony_ci		if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
12462306a36Sopenharmony_ci			loopback = true;
12562306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
12662306a36Sopenharmony_ci		if (tw->tw_family == AF_INET6) {
12762306a36Sopenharmony_ci			if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
12862306a36Sopenharmony_ci			    ipv6_addr_v4mapped_loopback(&tw->tw_v6_daddr) ||
12962306a36Sopenharmony_ci			    ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
13062306a36Sopenharmony_ci			    ipv6_addr_v4mapped_loopback(&tw->tw_v6_rcv_saddr))
13162306a36Sopenharmony_ci				loopback = true;
13262306a36Sopenharmony_ci		} else
13362306a36Sopenharmony_ci#endif
13462306a36Sopenharmony_ci		{
13562306a36Sopenharmony_ci			if (ipv4_is_loopback(tw->tw_daddr) ||
13662306a36Sopenharmony_ci			    ipv4_is_loopback(tw->tw_rcv_saddr))
13762306a36Sopenharmony_ci				loopback = true;
13862306a36Sopenharmony_ci		}
13962306a36Sopenharmony_ci		if (!loopback)
14062306a36Sopenharmony_ci			reuse = 0;
14162306a36Sopenharmony_ci	}
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	/* With PAWS, it is safe from the viewpoint
14462306a36Sopenharmony_ci	   of data integrity. Even without PAWS it is safe provided sequence
14562306a36Sopenharmony_ci	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	   Actually, the idea is close to VJ's one, only timestamp cache is
14862306a36Sopenharmony_ci	   held not per host, but per port pair and TW bucket is used as state
14962306a36Sopenharmony_ci	   holder.
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	   If TW bucket has been already destroyed we fall back to VJ's scheme
15262306a36Sopenharmony_ci	   and use initial timestamp retrieved from peer table.
15362306a36Sopenharmony_ci	 */
15462306a36Sopenharmony_ci	if (tcptw->tw_ts_recent_stamp &&
15562306a36Sopenharmony_ci	    (!twp || (reuse && time_after32(ktime_get_seconds(),
15662306a36Sopenharmony_ci					    tcptw->tw_ts_recent_stamp)))) {
15762306a36Sopenharmony_ci		/* In case of repair and re-using TIME-WAIT sockets we still
15862306a36Sopenharmony_ci		 * want to be sure that it is safe as above but honor the
15962306a36Sopenharmony_ci		 * sequence numbers and time stamps set as part of the repair
16062306a36Sopenharmony_ci		 * process.
16162306a36Sopenharmony_ci		 *
16262306a36Sopenharmony_ci		 * Without this check re-using a TIME-WAIT socket with TCP
16362306a36Sopenharmony_ci		 * repair would accumulate a -1 on the repair assigned
16462306a36Sopenharmony_ci		 * sequence number. The first time it is reused the sequence
16562306a36Sopenharmony_ci		 * is -1, the second time -2, etc. This fixes that issue
16662306a36Sopenharmony_ci		 * without appearing to create any others.
16762306a36Sopenharmony_ci		 */
16862306a36Sopenharmony_ci		if (likely(!tp->repair)) {
16962306a36Sopenharmony_ci			u32 seq = tcptw->tw_snd_nxt + 65535 + 2;
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci			if (!seq)
17262306a36Sopenharmony_ci				seq = 1;
17362306a36Sopenharmony_ci			WRITE_ONCE(tp->write_seq, seq);
17462306a36Sopenharmony_ci			tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
17562306a36Sopenharmony_ci			tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
17662306a36Sopenharmony_ci		}
17762306a36Sopenharmony_ci		sock_hold(sktw);
17862306a36Sopenharmony_ci		return 1;
17962306a36Sopenharmony_ci	}
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	return 0;
18262306a36Sopenharmony_ci}
18362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_twsk_unique);
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_cistatic int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
18662306a36Sopenharmony_ci			      int addr_len)
18762306a36Sopenharmony_ci{
18862306a36Sopenharmony_ci	/* This check is replicated from tcp_v4_connect() and intended to
18962306a36Sopenharmony_ci	 * prevent BPF program called below from accessing bytes that are out
19062306a36Sopenharmony_ci	 * of the bound specified by user in addr_len.
19162306a36Sopenharmony_ci	 */
19262306a36Sopenharmony_ci	if (addr_len < sizeof(struct sockaddr_in))
19362306a36Sopenharmony_ci		return -EINVAL;
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	sock_owned_by_me(sk);
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, &addr_len);
19862306a36Sopenharmony_ci}
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci/* This will initiate an outgoing connection. */
20162306a36Sopenharmony_ciint tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
20262306a36Sopenharmony_ci{
20362306a36Sopenharmony_ci	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
20462306a36Sopenharmony_ci	struct inet_timewait_death_row *tcp_death_row;
20562306a36Sopenharmony_ci	struct inet_sock *inet = inet_sk(sk);
20662306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
20762306a36Sopenharmony_ci	struct ip_options_rcu *inet_opt;
20862306a36Sopenharmony_ci	struct net *net = sock_net(sk);
20962306a36Sopenharmony_ci	__be16 orig_sport, orig_dport;
21062306a36Sopenharmony_ci	__be32 daddr, nexthop;
21162306a36Sopenharmony_ci	struct flowi4 *fl4;
21262306a36Sopenharmony_ci	struct rtable *rt;
21362306a36Sopenharmony_ci	int err;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	if (addr_len < sizeof(struct sockaddr_in))
21662306a36Sopenharmony_ci		return -EINVAL;
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	if (usin->sin_family != AF_INET)
21962306a36Sopenharmony_ci		return -EAFNOSUPPORT;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	nexthop = daddr = usin->sin_addr.s_addr;
22262306a36Sopenharmony_ci	inet_opt = rcu_dereference_protected(inet->inet_opt,
22362306a36Sopenharmony_ci					     lockdep_sock_is_held(sk));
22462306a36Sopenharmony_ci	if (inet_opt && inet_opt->opt.srr) {
22562306a36Sopenharmony_ci		if (!daddr)
22662306a36Sopenharmony_ci			return -EINVAL;
22762306a36Sopenharmony_ci		nexthop = inet_opt->opt.faddr;
22862306a36Sopenharmony_ci	}
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	orig_sport = inet->inet_sport;
23162306a36Sopenharmony_ci	orig_dport = usin->sin_port;
23262306a36Sopenharmony_ci	fl4 = &inet->cork.fl.u.ip4;
23362306a36Sopenharmony_ci	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
23462306a36Sopenharmony_ci			      sk->sk_bound_dev_if, IPPROTO_TCP, orig_sport,
23562306a36Sopenharmony_ci			      orig_dport, sk);
23662306a36Sopenharmony_ci	if (IS_ERR(rt)) {
23762306a36Sopenharmony_ci		err = PTR_ERR(rt);
23862306a36Sopenharmony_ci		if (err == -ENETUNREACH)
23962306a36Sopenharmony_ci			IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
24062306a36Sopenharmony_ci		return err;
24162306a36Sopenharmony_ci	}
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
24462306a36Sopenharmony_ci		ip_rt_put(rt);
24562306a36Sopenharmony_ci		return -ENETUNREACH;
24662306a36Sopenharmony_ci	}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	if (!inet_opt || !inet_opt->opt.srr)
24962306a36Sopenharmony_ci		daddr = fl4->daddr;
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	if (!inet->inet_saddr) {
25462306a36Sopenharmony_ci		err = inet_bhash2_update_saddr(sk,  &fl4->saddr, AF_INET);
25562306a36Sopenharmony_ci		if (err) {
25662306a36Sopenharmony_ci			ip_rt_put(rt);
25762306a36Sopenharmony_ci			return err;
25862306a36Sopenharmony_ci		}
25962306a36Sopenharmony_ci	} else {
26062306a36Sopenharmony_ci		sk_rcv_saddr_set(sk, inet->inet_saddr);
26162306a36Sopenharmony_ci	}
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
26462306a36Sopenharmony_ci		/* Reset inherited state */
26562306a36Sopenharmony_ci		tp->rx_opt.ts_recent	   = 0;
26662306a36Sopenharmony_ci		tp->rx_opt.ts_recent_stamp = 0;
26762306a36Sopenharmony_ci		if (likely(!tp->repair))
26862306a36Sopenharmony_ci			WRITE_ONCE(tp->write_seq, 0);
26962306a36Sopenharmony_ci	}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	inet->inet_dport = usin->sin_port;
27262306a36Sopenharmony_ci	sk_daddr_set(sk, daddr);
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	inet_csk(sk)->icsk_ext_hdr_len = 0;
27562306a36Sopenharmony_ci	if (inet_opt)
27662306a36Sopenharmony_ci		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	/* Socket identity is still unknown (sport may be zero).
28162306a36Sopenharmony_ci	 * However we set state to SYN-SENT and not releasing socket
28262306a36Sopenharmony_ci	 * lock select source port, enter ourselves into the hash tables and
28362306a36Sopenharmony_ci	 * complete initialization after this.
28462306a36Sopenharmony_ci	 */
28562306a36Sopenharmony_ci	tcp_set_state(sk, TCP_SYN_SENT);
28662306a36Sopenharmony_ci	err = inet_hash_connect(tcp_death_row, sk);
28762306a36Sopenharmony_ci	if (err)
28862306a36Sopenharmony_ci		goto failure;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	sk_set_txhash(sk);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
29362306a36Sopenharmony_ci			       inet->inet_sport, inet->inet_dport, sk);
29462306a36Sopenharmony_ci	if (IS_ERR(rt)) {
29562306a36Sopenharmony_ci		err = PTR_ERR(rt);
29662306a36Sopenharmony_ci		rt = NULL;
29762306a36Sopenharmony_ci		goto failure;
29862306a36Sopenharmony_ci	}
29962306a36Sopenharmony_ci	/* OK, now commit destination to socket.  */
30062306a36Sopenharmony_ci	sk->sk_gso_type = SKB_GSO_TCPV4;
30162306a36Sopenharmony_ci	sk_setup_caps(sk, &rt->dst);
30262306a36Sopenharmony_ci	rt = NULL;
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	if (likely(!tp->repair)) {
30562306a36Sopenharmony_ci		if (!tp->write_seq)
30662306a36Sopenharmony_ci			WRITE_ONCE(tp->write_seq,
30762306a36Sopenharmony_ci				   secure_tcp_seq(inet->inet_saddr,
30862306a36Sopenharmony_ci						  inet->inet_daddr,
30962306a36Sopenharmony_ci						  inet->inet_sport,
31062306a36Sopenharmony_ci						  usin->sin_port));
31162306a36Sopenharmony_ci		WRITE_ONCE(tp->tsoffset,
31262306a36Sopenharmony_ci			   secure_tcp_ts_off(net, inet->inet_saddr,
31362306a36Sopenharmony_ci					     inet->inet_daddr));
31462306a36Sopenharmony_ci	}
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	atomic_set(&inet->inet_id, get_random_u16());
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci	if (tcp_fastopen_defer_connect(sk, &err))
31962306a36Sopenharmony_ci		return err;
32062306a36Sopenharmony_ci	if (err)
32162306a36Sopenharmony_ci		goto failure;
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci	err = tcp_connect(sk);
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	if (err)
32662306a36Sopenharmony_ci		goto failure;
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	return 0;
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_cifailure:
33162306a36Sopenharmony_ci	/*
33262306a36Sopenharmony_ci	 * This unhashes the socket and releases the local port,
33362306a36Sopenharmony_ci	 * if necessary.
33462306a36Sopenharmony_ci	 */
33562306a36Sopenharmony_ci	tcp_set_state(sk, TCP_CLOSE);
33662306a36Sopenharmony_ci	inet_bhash2_reset_saddr(sk);
33762306a36Sopenharmony_ci	ip_rt_put(rt);
33862306a36Sopenharmony_ci	sk->sk_route_caps = 0;
33962306a36Sopenharmony_ci	inet->inet_dport = 0;
34062306a36Sopenharmony_ci	return err;
34162306a36Sopenharmony_ci}
34262306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_connect);
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci/*
34562306a36Sopenharmony_ci * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
34662306a36Sopenharmony_ci * It can be called through tcp_release_cb() if socket was owned by user
34762306a36Sopenharmony_ci * at the time tcp_v4_err() was called to handle ICMP message.
34862306a36Sopenharmony_ci */
34962306a36Sopenharmony_civoid tcp_v4_mtu_reduced(struct sock *sk)
35062306a36Sopenharmony_ci{
35162306a36Sopenharmony_ci	struct inet_sock *inet = inet_sk(sk);
35262306a36Sopenharmony_ci	struct dst_entry *dst;
35362306a36Sopenharmony_ci	u32 mtu;
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
35662306a36Sopenharmony_ci		return;
35762306a36Sopenharmony_ci	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
35862306a36Sopenharmony_ci	dst = inet_csk_update_pmtu(sk, mtu);
35962306a36Sopenharmony_ci	if (!dst)
36062306a36Sopenharmony_ci		return;
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	/* Something is about to be wrong... Remember soft error
36362306a36Sopenharmony_ci	 * for the case, if this connection will not able to recover.
36462306a36Sopenharmony_ci	 */
36562306a36Sopenharmony_ci	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
36662306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_err_soft, EMSGSIZE);
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	mtu = dst_mtu(dst);
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
37162306a36Sopenharmony_ci	    ip_sk_accept_pmtu(sk) &&
37262306a36Sopenharmony_ci	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
37362306a36Sopenharmony_ci		tcp_sync_mss(sk, mtu);
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci		/* Resend the TCP packet because it's
37662306a36Sopenharmony_ci		 * clear that the old packet has been
37762306a36Sopenharmony_ci		 * dropped. This is the new "fast" path mtu
37862306a36Sopenharmony_ci		 * discovery.
37962306a36Sopenharmony_ci		 */
38062306a36Sopenharmony_ci		tcp_simple_retransmit(sk);
38162306a36Sopenharmony_ci	} /* else let the usual retransmit timer handle it */
38262306a36Sopenharmony_ci}
38362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_mtu_reduced);
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_cistatic void do_redirect(struct sk_buff *skb, struct sock *sk)
38662306a36Sopenharmony_ci{
38762306a36Sopenharmony_ci	struct dst_entry *dst = __sk_dst_check(sk, 0);
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_ci	if (dst)
39062306a36Sopenharmony_ci		dst->ops->redirect(dst, sk, skb);
39162306a36Sopenharmony_ci}
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
39562306a36Sopenharmony_civoid tcp_req_err(struct sock *sk, u32 seq, bool abort)
39662306a36Sopenharmony_ci{
39762306a36Sopenharmony_ci	struct request_sock *req = inet_reqsk(sk);
39862306a36Sopenharmony_ci	struct net *net = sock_net(sk);
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	/* ICMPs are not backlogged, hence we cannot get
40162306a36Sopenharmony_ci	 * an established socket here.
40262306a36Sopenharmony_ci	 */
40362306a36Sopenharmony_ci	if (seq != tcp_rsk(req)->snt_isn) {
40462306a36Sopenharmony_ci		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
40562306a36Sopenharmony_ci	} else if (abort) {
40662306a36Sopenharmony_ci		/*
40762306a36Sopenharmony_ci		 * Still in SYN_RECV, just remove it silently.
40862306a36Sopenharmony_ci		 * There is no good way to pass the error to the newly
40962306a36Sopenharmony_ci		 * created socket, and POSIX does not want network
41062306a36Sopenharmony_ci		 * errors returned from accept().
41162306a36Sopenharmony_ci		 */
41262306a36Sopenharmony_ci		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
41362306a36Sopenharmony_ci		tcp_listendrop(req->rsk_listener);
41462306a36Sopenharmony_ci	}
41562306a36Sopenharmony_ci	reqsk_put(req);
41662306a36Sopenharmony_ci}
41762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_req_err);
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci/* TCP-LD (RFC 6069) logic */
42062306a36Sopenharmony_civoid tcp_ld_RTO_revert(struct sock *sk, u32 seq)
42162306a36Sopenharmony_ci{
42262306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
42362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
42462306a36Sopenharmony_ci	struct sk_buff *skb;
42562306a36Sopenharmony_ci	s32 remaining;
42662306a36Sopenharmony_ci	u32 delta_us;
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci	if (sock_owned_by_user(sk))
42962306a36Sopenharmony_ci		return;
43062306a36Sopenharmony_ci
43162306a36Sopenharmony_ci	if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
43262306a36Sopenharmony_ci	    !icsk->icsk_backoff)
43362306a36Sopenharmony_ci		return;
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci	skb = tcp_rtx_queue_head(sk);
43662306a36Sopenharmony_ci	if (WARN_ON_ONCE(!skb))
43762306a36Sopenharmony_ci		return;
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	icsk->icsk_backoff--;
44062306a36Sopenharmony_ci	icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT;
44162306a36Sopenharmony_ci	icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci	tcp_mstamp_refresh(tp);
44462306a36Sopenharmony_ci	delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
44562306a36Sopenharmony_ci	remaining = icsk->icsk_rto - usecs_to_jiffies(delta_us);
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci	if (remaining > 0) {
44862306a36Sopenharmony_ci		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
44962306a36Sopenharmony_ci					  remaining, TCP_RTO_MAX);
45062306a36Sopenharmony_ci	} else {
45162306a36Sopenharmony_ci		/* RTO revert clocked out retransmission.
45262306a36Sopenharmony_ci		 * Will retransmit now.
45362306a36Sopenharmony_ci		 */
45462306a36Sopenharmony_ci		tcp_retransmit_timer(sk);
45562306a36Sopenharmony_ci	}
45662306a36Sopenharmony_ci}
45762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_ld_RTO_revert);
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci/*
46062306a36Sopenharmony_ci * This routine is called by the ICMP module when it gets some
46162306a36Sopenharmony_ci * sort of error condition.  If err < 0 then the socket should
46262306a36Sopenharmony_ci * be closed and the error returned to the user.  If err > 0
46362306a36Sopenharmony_ci * it's just the icmp type << 8 | icmp code.  After adjustment
46462306a36Sopenharmony_ci * header points to the first 8 bytes of the tcp header.  We need
46562306a36Sopenharmony_ci * to find the appropriate port.
46662306a36Sopenharmony_ci *
46762306a36Sopenharmony_ci * The locking strategy used here is very "optimistic". When
46862306a36Sopenharmony_ci * someone else accesses the socket the ICMP is just dropped
46962306a36Sopenharmony_ci * and for some paths there is no check at all.
47062306a36Sopenharmony_ci * A more general error queue to queue errors for later handling
47162306a36Sopenharmony_ci * is probably better.
47262306a36Sopenharmony_ci *
47362306a36Sopenharmony_ci */
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ciint tcp_v4_err(struct sk_buff *skb, u32 info)
47662306a36Sopenharmony_ci{
47762306a36Sopenharmony_ci	const struct iphdr *iph = (const struct iphdr *)skb->data;
47862306a36Sopenharmony_ci	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
47962306a36Sopenharmony_ci	struct tcp_sock *tp;
48062306a36Sopenharmony_ci	const int type = icmp_hdr(skb)->type;
48162306a36Sopenharmony_ci	const int code = icmp_hdr(skb)->code;
48262306a36Sopenharmony_ci	struct sock *sk;
48362306a36Sopenharmony_ci	struct request_sock *fastopen;
48462306a36Sopenharmony_ci	u32 seq, snd_una;
48562306a36Sopenharmony_ci	int err;
48662306a36Sopenharmony_ci	struct net *net = dev_net(skb->dev);
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
48962306a36Sopenharmony_ci				       iph->daddr, th->dest, iph->saddr,
49062306a36Sopenharmony_ci				       ntohs(th->source), inet_iif(skb), 0);
49162306a36Sopenharmony_ci	if (!sk) {
49262306a36Sopenharmony_ci		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
49362306a36Sopenharmony_ci		return -ENOENT;
49462306a36Sopenharmony_ci	}
49562306a36Sopenharmony_ci	if (sk->sk_state == TCP_TIME_WAIT) {
49662306a36Sopenharmony_ci		inet_twsk_put(inet_twsk(sk));
49762306a36Sopenharmony_ci		return 0;
49862306a36Sopenharmony_ci	}
49962306a36Sopenharmony_ci	seq = ntohl(th->seq);
50062306a36Sopenharmony_ci	if (sk->sk_state == TCP_NEW_SYN_RECV) {
50162306a36Sopenharmony_ci		tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB ||
50262306a36Sopenharmony_ci				     type == ICMP_TIME_EXCEEDED ||
50362306a36Sopenharmony_ci				     (type == ICMP_DEST_UNREACH &&
50462306a36Sopenharmony_ci				      (code == ICMP_NET_UNREACH ||
50562306a36Sopenharmony_ci				       code == ICMP_HOST_UNREACH)));
50662306a36Sopenharmony_ci		return 0;
50762306a36Sopenharmony_ci	}
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	bh_lock_sock(sk);
51062306a36Sopenharmony_ci	/* If too many ICMPs get dropped on busy
51162306a36Sopenharmony_ci	 * servers this needs to be solved differently.
51262306a36Sopenharmony_ci	 * We do take care of PMTU discovery (RFC1191) special case :
51362306a36Sopenharmony_ci	 * we can receive locally generated ICMP messages while socket is held.
51462306a36Sopenharmony_ci	 */
51562306a36Sopenharmony_ci	if (sock_owned_by_user(sk)) {
51662306a36Sopenharmony_ci		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
51762306a36Sopenharmony_ci			__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
51862306a36Sopenharmony_ci	}
51962306a36Sopenharmony_ci	if (sk->sk_state == TCP_CLOSE)
52062306a36Sopenharmony_ci		goto out;
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	if (static_branch_unlikely(&ip4_min_ttl)) {
52362306a36Sopenharmony_ci		/* min_ttl can be changed concurrently from do_ip_setsockopt() */
52462306a36Sopenharmony_ci		if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
52562306a36Sopenharmony_ci			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
52662306a36Sopenharmony_ci			goto out;
52762306a36Sopenharmony_ci		}
52862306a36Sopenharmony_ci	}
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_ci	tp = tcp_sk(sk);
53162306a36Sopenharmony_ci	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
53262306a36Sopenharmony_ci	fastopen = rcu_dereference(tp->fastopen_rsk);
53362306a36Sopenharmony_ci	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
53462306a36Sopenharmony_ci	if (sk->sk_state != TCP_LISTEN &&
53562306a36Sopenharmony_ci	    !between(seq, snd_una, tp->snd_nxt)) {
53662306a36Sopenharmony_ci		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
53762306a36Sopenharmony_ci		goto out;
53862306a36Sopenharmony_ci	}
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	switch (type) {
54162306a36Sopenharmony_ci	case ICMP_REDIRECT:
54262306a36Sopenharmony_ci		if (!sock_owned_by_user(sk))
54362306a36Sopenharmony_ci			do_redirect(skb, sk);
54462306a36Sopenharmony_ci		goto out;
54562306a36Sopenharmony_ci	case ICMP_SOURCE_QUENCH:
54662306a36Sopenharmony_ci		/* Just silently ignore these. */
54762306a36Sopenharmony_ci		goto out;
54862306a36Sopenharmony_ci	case ICMP_PARAMETERPROB:
54962306a36Sopenharmony_ci		err = EPROTO;
55062306a36Sopenharmony_ci		break;
55162306a36Sopenharmony_ci	case ICMP_DEST_UNREACH:
55262306a36Sopenharmony_ci		if (code > NR_ICMP_UNREACH)
55362306a36Sopenharmony_ci			goto out;
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
55662306a36Sopenharmony_ci			/* We are not interested in TCP_LISTEN and open_requests
55762306a36Sopenharmony_ci			 * (SYN-ACKs send out by Linux are always <576bytes so
55862306a36Sopenharmony_ci			 * they should go through unfragmented).
55962306a36Sopenharmony_ci			 */
56062306a36Sopenharmony_ci			if (sk->sk_state == TCP_LISTEN)
56162306a36Sopenharmony_ci				goto out;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci			WRITE_ONCE(tp->mtu_info, info);
56462306a36Sopenharmony_ci			if (!sock_owned_by_user(sk)) {
56562306a36Sopenharmony_ci				tcp_v4_mtu_reduced(sk);
56662306a36Sopenharmony_ci			} else {
56762306a36Sopenharmony_ci				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
56862306a36Sopenharmony_ci					sock_hold(sk);
56962306a36Sopenharmony_ci			}
57062306a36Sopenharmony_ci			goto out;
57162306a36Sopenharmony_ci		}
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci		err = icmp_err_convert[code].errno;
57462306a36Sopenharmony_ci		/* check if this ICMP message allows revert of backoff.
57562306a36Sopenharmony_ci		 * (see RFC 6069)
57662306a36Sopenharmony_ci		 */
57762306a36Sopenharmony_ci		if (!fastopen &&
57862306a36Sopenharmony_ci		    (code == ICMP_NET_UNREACH || code == ICMP_HOST_UNREACH))
57962306a36Sopenharmony_ci			tcp_ld_RTO_revert(sk, seq);
58062306a36Sopenharmony_ci		break;
58162306a36Sopenharmony_ci	case ICMP_TIME_EXCEEDED:
58262306a36Sopenharmony_ci		err = EHOSTUNREACH;
58362306a36Sopenharmony_ci		break;
58462306a36Sopenharmony_ci	default:
58562306a36Sopenharmony_ci		goto out;
58662306a36Sopenharmony_ci	}
58762306a36Sopenharmony_ci
58862306a36Sopenharmony_ci	switch (sk->sk_state) {
58962306a36Sopenharmony_ci	case TCP_SYN_SENT:
59062306a36Sopenharmony_ci	case TCP_SYN_RECV:
59162306a36Sopenharmony_ci		/* Only in fast or simultaneous open. If a fast open socket is
59262306a36Sopenharmony_ci		 * already accepted it is treated as a connected one below.
59362306a36Sopenharmony_ci		 */
59462306a36Sopenharmony_ci		if (fastopen && !fastopen->sk)
59562306a36Sopenharmony_ci			break;
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci		ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci		if (!sock_owned_by_user(sk)) {
60062306a36Sopenharmony_ci			WRITE_ONCE(sk->sk_err, err);
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci			sk_error_report(sk);
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci			tcp_done(sk);
60562306a36Sopenharmony_ci		} else {
60662306a36Sopenharmony_ci			WRITE_ONCE(sk->sk_err_soft, err);
60762306a36Sopenharmony_ci		}
60862306a36Sopenharmony_ci		goto out;
60962306a36Sopenharmony_ci	}
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci	/* If we've already connected we will keep trying
61262306a36Sopenharmony_ci	 * until we time out, or the user gives up.
61362306a36Sopenharmony_ci	 *
61462306a36Sopenharmony_ci	 * rfc1122 4.2.3.9 allows to consider as hard errors
61562306a36Sopenharmony_ci	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
61662306a36Sopenharmony_ci	 * but it is obsoleted by pmtu discovery).
61762306a36Sopenharmony_ci	 *
61862306a36Sopenharmony_ci	 * Note, that in modern internet, where routing is unreliable
61962306a36Sopenharmony_ci	 * and in each dark corner broken firewalls sit, sending random
62062306a36Sopenharmony_ci	 * errors ordered by their masters even this two messages finally lose
62162306a36Sopenharmony_ci	 * their original sense (even Linux sends invalid PORT_UNREACHs)
62262306a36Sopenharmony_ci	 *
62362306a36Sopenharmony_ci	 * Now we are in compliance with RFCs.
62462306a36Sopenharmony_ci	 *							--ANK (980905)
62562306a36Sopenharmony_ci	 */
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci	if (!sock_owned_by_user(sk) &&
62862306a36Sopenharmony_ci	    inet_test_bit(RECVERR, sk)) {
62962306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_err, err);
63062306a36Sopenharmony_ci		sk_error_report(sk);
63162306a36Sopenharmony_ci	} else	{ /* Only an error on timeout */
63262306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_err_soft, err);
63362306a36Sopenharmony_ci	}
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ciout:
63662306a36Sopenharmony_ci	bh_unlock_sock(sk);
63762306a36Sopenharmony_ci	sock_put(sk);
63862306a36Sopenharmony_ci	return 0;
63962306a36Sopenharmony_ci}
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_civoid __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
64262306a36Sopenharmony_ci{
64362306a36Sopenharmony_ci	struct tcphdr *th = tcp_hdr(skb);
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
64662306a36Sopenharmony_ci	skb->csum_start = skb_transport_header(skb) - skb->head;
64762306a36Sopenharmony_ci	skb->csum_offset = offsetof(struct tcphdr, check);
64862306a36Sopenharmony_ci}
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci/* This routine computes an IPv4 TCP checksum. */
65162306a36Sopenharmony_civoid tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
65262306a36Sopenharmony_ci{
65362306a36Sopenharmony_ci	const struct inet_sock *inet = inet_sk(sk);
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ci	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
65662306a36Sopenharmony_ci}
65762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_send_check);
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci/*
66062306a36Sopenharmony_ci *	This routine will send an RST to the other tcp.
66162306a36Sopenharmony_ci *
66262306a36Sopenharmony_ci *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
66362306a36Sopenharmony_ci *		      for reset.
66462306a36Sopenharmony_ci *	Answer: if a packet caused RST, it is not for a socket
66562306a36Sopenharmony_ci *		existing in our system, if it is matched to a socket,
66662306a36Sopenharmony_ci *		it is just duplicate segment or bug in other side's TCP.
66762306a36Sopenharmony_ci *		So that we build reply only basing on parameters
66862306a36Sopenharmony_ci *		arrived with segment.
66962306a36Sopenharmony_ci *	Exception: precedence violation. We do not implement it in any case.
67062306a36Sopenharmony_ci */
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
67362306a36Sopenharmony_ci#define OPTION_BYTES TCPOLEN_MD5SIG_ALIGNED
67462306a36Sopenharmony_ci#else
67562306a36Sopenharmony_ci#define OPTION_BYTES sizeof(__be32)
67662306a36Sopenharmony_ci#endif
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_cistatic void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
67962306a36Sopenharmony_ci{
68062306a36Sopenharmony_ci	const struct tcphdr *th = tcp_hdr(skb);
68162306a36Sopenharmony_ci	struct {
68262306a36Sopenharmony_ci		struct tcphdr th;
68362306a36Sopenharmony_ci		__be32 opt[OPTION_BYTES / sizeof(__be32)];
68462306a36Sopenharmony_ci	} rep;
68562306a36Sopenharmony_ci	struct ip_reply_arg arg;
68662306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
68762306a36Sopenharmony_ci	struct tcp_md5sig_key *key = NULL;
68862306a36Sopenharmony_ci	const __u8 *hash_location = NULL;
68962306a36Sopenharmony_ci	unsigned char newhash[16];
69062306a36Sopenharmony_ci	int genhash;
69162306a36Sopenharmony_ci	struct sock *sk1 = NULL;
69262306a36Sopenharmony_ci#endif
69362306a36Sopenharmony_ci	u64 transmit_time = 0;
69462306a36Sopenharmony_ci	struct sock *ctl_sk;
69562306a36Sopenharmony_ci	struct net *net;
69662306a36Sopenharmony_ci	u32 txhash = 0;
69762306a36Sopenharmony_ci
69862306a36Sopenharmony_ci	/* Never send a reset in response to a reset. */
69962306a36Sopenharmony_ci	if (th->rst)
70062306a36Sopenharmony_ci		return;
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ci	/* If sk not NULL, it means we did a successful lookup and incoming
70362306a36Sopenharmony_ci	 * route had to be correct. prequeue might have dropped our dst.
70462306a36Sopenharmony_ci	 */
70562306a36Sopenharmony_ci	if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
70662306a36Sopenharmony_ci		return;
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_ci	/* Swap the send and the receive. */
70962306a36Sopenharmony_ci	memset(&rep, 0, sizeof(rep));
71062306a36Sopenharmony_ci	rep.th.dest   = th->source;
71162306a36Sopenharmony_ci	rep.th.source = th->dest;
71262306a36Sopenharmony_ci	rep.th.doff   = sizeof(struct tcphdr) / 4;
71362306a36Sopenharmony_ci	rep.th.rst    = 1;
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	if (th->ack) {
71662306a36Sopenharmony_ci		rep.th.seq = th->ack_seq;
71762306a36Sopenharmony_ci	} else {
71862306a36Sopenharmony_ci		rep.th.ack = 1;
71962306a36Sopenharmony_ci		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
72062306a36Sopenharmony_ci				       skb->len - (th->doff << 2));
72162306a36Sopenharmony_ci	}
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci	memset(&arg, 0, sizeof(arg));
72462306a36Sopenharmony_ci	arg.iov[0].iov_base = (unsigned char *)&rep;
72562306a36Sopenharmony_ci	arg.iov[0].iov_len  = sizeof(rep.th);
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
72862306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
72962306a36Sopenharmony_ci	rcu_read_lock();
73062306a36Sopenharmony_ci	hash_location = tcp_parse_md5sig_option(th);
73162306a36Sopenharmony_ci	if (sk && sk_fullsock(sk)) {
73262306a36Sopenharmony_ci		const union tcp_md5_addr *addr;
73362306a36Sopenharmony_ci		int l3index;
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci		/* sdif set, means packet ingressed via a device
73662306a36Sopenharmony_ci		 * in an L3 domain and inet_iif is set to it.
73762306a36Sopenharmony_ci		 */
73862306a36Sopenharmony_ci		l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
73962306a36Sopenharmony_ci		addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
74062306a36Sopenharmony_ci		key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
74162306a36Sopenharmony_ci	} else if (hash_location) {
74262306a36Sopenharmony_ci		const union tcp_md5_addr *addr;
74362306a36Sopenharmony_ci		int sdif = tcp_v4_sdif(skb);
74462306a36Sopenharmony_ci		int dif = inet_iif(skb);
74562306a36Sopenharmony_ci		int l3index;
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci		/*
74862306a36Sopenharmony_ci		 * active side is lost. Try to find listening socket through
74962306a36Sopenharmony_ci		 * source port, and then find md5 key through listening socket.
75062306a36Sopenharmony_ci		 * we are not loose security here:
75162306a36Sopenharmony_ci		 * Incoming packet is checked with md5 hash with finding key,
75262306a36Sopenharmony_ci		 * no RST generated if md5 hash doesn't match.
75362306a36Sopenharmony_ci		 */
75462306a36Sopenharmony_ci		sk1 = __inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
75562306a36Sopenharmony_ci					     NULL, 0, ip_hdr(skb)->saddr,
75662306a36Sopenharmony_ci					     th->source, ip_hdr(skb)->daddr,
75762306a36Sopenharmony_ci					     ntohs(th->source), dif, sdif);
75862306a36Sopenharmony_ci		/* don't send rst if it can't find key */
75962306a36Sopenharmony_ci		if (!sk1)
76062306a36Sopenharmony_ci			goto out;
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_ci		/* sdif set, means packet ingressed via a device
76362306a36Sopenharmony_ci		 * in an L3 domain and dif is set to it.
76462306a36Sopenharmony_ci		 */
76562306a36Sopenharmony_ci		l3index = sdif ? dif : 0;
76662306a36Sopenharmony_ci		addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
76762306a36Sopenharmony_ci		key = tcp_md5_do_lookup(sk1, l3index, addr, AF_INET);
76862306a36Sopenharmony_ci		if (!key)
76962306a36Sopenharmony_ci			goto out;
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
77362306a36Sopenharmony_ci		if (genhash || memcmp(hash_location, newhash, 16) != 0)
77462306a36Sopenharmony_ci			goto out;
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	}
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci	if (key) {
77962306a36Sopenharmony_ci		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
78062306a36Sopenharmony_ci				   (TCPOPT_NOP << 16) |
78162306a36Sopenharmony_ci				   (TCPOPT_MD5SIG << 8) |
78262306a36Sopenharmony_ci				   TCPOLEN_MD5SIG);
78362306a36Sopenharmony_ci		/* Update length and the length the header thinks exists */
78462306a36Sopenharmony_ci		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
78562306a36Sopenharmony_ci		rep.th.doff = arg.iov[0].iov_len / 4;
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78862306a36Sopenharmony_ci				     key, ip_hdr(skb)->saddr,
78962306a36Sopenharmony_ci				     ip_hdr(skb)->daddr, &rep.th);
79062306a36Sopenharmony_ci	}
79162306a36Sopenharmony_ci#endif
79262306a36Sopenharmony_ci	/* Can't co-exist with TCPMD5, hence check rep.opt[0] */
79362306a36Sopenharmony_ci	if (rep.opt[0] == 0) {
79462306a36Sopenharmony_ci		__be32 mrst = mptcp_reset_option(skb);
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci		if (mrst) {
79762306a36Sopenharmony_ci			rep.opt[0] = mrst;
79862306a36Sopenharmony_ci			arg.iov[0].iov_len += sizeof(mrst);
79962306a36Sopenharmony_ci			rep.th.doff = arg.iov[0].iov_len / 4;
80062306a36Sopenharmony_ci		}
80162306a36Sopenharmony_ci	}
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
80462306a36Sopenharmony_ci				      ip_hdr(skb)->saddr, /* XXX */
80562306a36Sopenharmony_ci				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
80662306a36Sopenharmony_ci	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
80762306a36Sopenharmony_ci	arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
80862306a36Sopenharmony_ci
80962306a36Sopenharmony_ci	/* When socket is gone, all binding information is lost.
81062306a36Sopenharmony_ci	 * routing might fail in this case. No choice here, if we choose to force
81162306a36Sopenharmony_ci	 * input interface, we will misroute in case of asymmetric route.
81262306a36Sopenharmony_ci	 */
81362306a36Sopenharmony_ci	if (sk) {
81462306a36Sopenharmony_ci		arg.bound_dev_if = sk->sk_bound_dev_if;
81562306a36Sopenharmony_ci		if (sk_fullsock(sk))
81662306a36Sopenharmony_ci			trace_tcp_send_reset(sk, skb);
81762306a36Sopenharmony_ci	}
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci	BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
82062306a36Sopenharmony_ci		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	arg.tos = ip_hdr(skb)->tos;
82362306a36Sopenharmony_ci	arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
82462306a36Sopenharmony_ci	local_bh_disable();
82562306a36Sopenharmony_ci	ctl_sk = this_cpu_read(ipv4_tcp_sk);
82662306a36Sopenharmony_ci	sock_net_set(ctl_sk, net);
82762306a36Sopenharmony_ci	if (sk) {
82862306a36Sopenharmony_ci		ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
82962306a36Sopenharmony_ci				   inet_twsk(sk)->tw_mark : sk->sk_mark;
83062306a36Sopenharmony_ci		ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
83162306a36Sopenharmony_ci				   inet_twsk(sk)->tw_priority : sk->sk_priority;
83262306a36Sopenharmony_ci		transmit_time = tcp_transmit_time(sk);
83362306a36Sopenharmony_ci		xfrm_sk_clone_policy(ctl_sk, sk);
83462306a36Sopenharmony_ci		txhash = (sk->sk_state == TCP_TIME_WAIT) ?
83562306a36Sopenharmony_ci			 inet_twsk(sk)->tw_txhash : sk->sk_txhash;
83662306a36Sopenharmony_ci	} else {
83762306a36Sopenharmony_ci		ctl_sk->sk_mark = 0;
83862306a36Sopenharmony_ci		ctl_sk->sk_priority = 0;
83962306a36Sopenharmony_ci	}
84062306a36Sopenharmony_ci	ip_send_unicast_reply(ctl_sk,
84162306a36Sopenharmony_ci			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
84262306a36Sopenharmony_ci			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
84362306a36Sopenharmony_ci			      &arg, arg.iov[0].iov_len,
84462306a36Sopenharmony_ci			      transmit_time, txhash);
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_ci	xfrm_sk_free_policy(ctl_sk);
84762306a36Sopenharmony_ci	sock_net_set(ctl_sk, &init_net);
84862306a36Sopenharmony_ci	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
84962306a36Sopenharmony_ci	__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
85062306a36Sopenharmony_ci	local_bh_enable();
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
85362306a36Sopenharmony_ciout:
85462306a36Sopenharmony_ci	rcu_read_unlock();
85562306a36Sopenharmony_ci#endif
85662306a36Sopenharmony_ci}
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
85962306a36Sopenharmony_ci   outside socket context is ugly, certainly. What can I do?
86062306a36Sopenharmony_ci */
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_cistatic void tcp_v4_send_ack(const struct sock *sk,
86362306a36Sopenharmony_ci			    struct sk_buff *skb, u32 seq, u32 ack,
86462306a36Sopenharmony_ci			    u32 win, u32 tsval, u32 tsecr, int oif,
86562306a36Sopenharmony_ci			    struct tcp_md5sig_key *key,
86662306a36Sopenharmony_ci			    int reply_flags, u8 tos, u32 txhash)
86762306a36Sopenharmony_ci{
86862306a36Sopenharmony_ci	const struct tcphdr *th = tcp_hdr(skb);
86962306a36Sopenharmony_ci	struct {
87062306a36Sopenharmony_ci		struct tcphdr th;
87162306a36Sopenharmony_ci		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
87262306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
87362306a36Sopenharmony_ci			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
87462306a36Sopenharmony_ci#endif
87562306a36Sopenharmony_ci			];
87662306a36Sopenharmony_ci	} rep;
87762306a36Sopenharmony_ci	struct net *net = sock_net(sk);
87862306a36Sopenharmony_ci	struct ip_reply_arg arg;
87962306a36Sopenharmony_ci	struct sock *ctl_sk;
88062306a36Sopenharmony_ci	u64 transmit_time;
88162306a36Sopenharmony_ci
88262306a36Sopenharmony_ci	memset(&rep.th, 0, sizeof(struct tcphdr));
88362306a36Sopenharmony_ci	memset(&arg, 0, sizeof(arg));
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	arg.iov[0].iov_base = (unsigned char *)&rep;
88662306a36Sopenharmony_ci	arg.iov[0].iov_len  = sizeof(rep.th);
88762306a36Sopenharmony_ci	if (tsecr) {
88862306a36Sopenharmony_ci		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
88962306a36Sopenharmony_ci				   (TCPOPT_TIMESTAMP << 8) |
89062306a36Sopenharmony_ci				   TCPOLEN_TIMESTAMP);
89162306a36Sopenharmony_ci		rep.opt[1] = htonl(tsval);
89262306a36Sopenharmony_ci		rep.opt[2] = htonl(tsecr);
89362306a36Sopenharmony_ci		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
89462306a36Sopenharmony_ci	}
89562306a36Sopenharmony_ci
89662306a36Sopenharmony_ci	/* Swap the send and the receive. */
89762306a36Sopenharmony_ci	rep.th.dest    = th->source;
89862306a36Sopenharmony_ci	rep.th.source  = th->dest;
89962306a36Sopenharmony_ci	rep.th.doff    = arg.iov[0].iov_len / 4;
90062306a36Sopenharmony_ci	rep.th.seq     = htonl(seq);
90162306a36Sopenharmony_ci	rep.th.ack_seq = htonl(ack);
90262306a36Sopenharmony_ci	rep.th.ack     = 1;
90362306a36Sopenharmony_ci	rep.th.window  = htons(win);
90462306a36Sopenharmony_ci
90562306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
90662306a36Sopenharmony_ci	if (key) {
90762306a36Sopenharmony_ci		int offset = (tsecr) ? 3 : 0;
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
91062306a36Sopenharmony_ci					  (TCPOPT_NOP << 16) |
91162306a36Sopenharmony_ci					  (TCPOPT_MD5SIG << 8) |
91262306a36Sopenharmony_ci					  TCPOLEN_MD5SIG);
91362306a36Sopenharmony_ci		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
91462306a36Sopenharmony_ci		rep.th.doff = arg.iov[0].iov_len/4;
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
91762306a36Sopenharmony_ci				    key, ip_hdr(skb)->saddr,
91862306a36Sopenharmony_ci				    ip_hdr(skb)->daddr, &rep.th);
91962306a36Sopenharmony_ci	}
92062306a36Sopenharmony_ci#endif
92162306a36Sopenharmony_ci	arg.flags = reply_flags;
92262306a36Sopenharmony_ci	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
92362306a36Sopenharmony_ci				      ip_hdr(skb)->saddr, /* XXX */
92462306a36Sopenharmony_ci				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
92562306a36Sopenharmony_ci	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
92662306a36Sopenharmony_ci	if (oif)
92762306a36Sopenharmony_ci		arg.bound_dev_if = oif;
92862306a36Sopenharmony_ci	arg.tos = tos;
92962306a36Sopenharmony_ci	arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
93062306a36Sopenharmony_ci	local_bh_disable();
93162306a36Sopenharmony_ci	ctl_sk = this_cpu_read(ipv4_tcp_sk);
93262306a36Sopenharmony_ci	sock_net_set(ctl_sk, net);
93362306a36Sopenharmony_ci	ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
93462306a36Sopenharmony_ci			   inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
93562306a36Sopenharmony_ci	ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
93662306a36Sopenharmony_ci			   inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
93762306a36Sopenharmony_ci	transmit_time = tcp_transmit_time(sk);
93862306a36Sopenharmony_ci	ip_send_unicast_reply(ctl_sk,
93962306a36Sopenharmony_ci			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
94062306a36Sopenharmony_ci			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
94162306a36Sopenharmony_ci			      &arg, arg.iov[0].iov_len,
94262306a36Sopenharmony_ci			      transmit_time, txhash);
94362306a36Sopenharmony_ci
94462306a36Sopenharmony_ci	sock_net_set(ctl_sk, &init_net);
94562306a36Sopenharmony_ci	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
94662306a36Sopenharmony_ci	local_bh_enable();
94762306a36Sopenharmony_ci}
94862306a36Sopenharmony_ci
94962306a36Sopenharmony_cistatic void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
95062306a36Sopenharmony_ci{
95162306a36Sopenharmony_ci	struct inet_timewait_sock *tw = inet_twsk(sk);
95262306a36Sopenharmony_ci	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ci	tcp_v4_send_ack(sk, skb,
95562306a36Sopenharmony_ci			tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
95662306a36Sopenharmony_ci			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
95762306a36Sopenharmony_ci			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
95862306a36Sopenharmony_ci			tcptw->tw_ts_recent,
95962306a36Sopenharmony_ci			tw->tw_bound_dev_if,
96062306a36Sopenharmony_ci			tcp_twsk_md5_key(tcptw),
96162306a36Sopenharmony_ci			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
96262306a36Sopenharmony_ci			tw->tw_tos,
96362306a36Sopenharmony_ci			tw->tw_txhash
96462306a36Sopenharmony_ci			);
96562306a36Sopenharmony_ci
96662306a36Sopenharmony_ci	inet_twsk_put(tw);
96762306a36Sopenharmony_ci}
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_cistatic void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
97062306a36Sopenharmony_ci				  struct request_sock *req)
97162306a36Sopenharmony_ci{
97262306a36Sopenharmony_ci	const union tcp_md5_addr *addr;
97362306a36Sopenharmony_ci	int l3index;
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
97662306a36Sopenharmony_ci	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
97762306a36Sopenharmony_ci	 */
97862306a36Sopenharmony_ci	u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
97962306a36Sopenharmony_ci					     tcp_sk(sk)->snd_nxt;
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci	/* RFC 7323 2.3
98262306a36Sopenharmony_ci	 * The window field (SEG.WND) of every outgoing segment, with the
98362306a36Sopenharmony_ci	 * exception of <SYN> segments, MUST be right-shifted by
98462306a36Sopenharmony_ci	 * Rcv.Wind.Shift bits:
98562306a36Sopenharmony_ci	 */
98662306a36Sopenharmony_ci	addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
98762306a36Sopenharmony_ci	l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
98862306a36Sopenharmony_ci	tcp_v4_send_ack(sk, skb, seq,
98962306a36Sopenharmony_ci			tcp_rsk(req)->rcv_nxt,
99062306a36Sopenharmony_ci			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
99162306a36Sopenharmony_ci			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
99262306a36Sopenharmony_ci			READ_ONCE(req->ts_recent),
99362306a36Sopenharmony_ci			0,
99462306a36Sopenharmony_ci			tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
99562306a36Sopenharmony_ci			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
99662306a36Sopenharmony_ci			ip_hdr(skb)->tos,
99762306a36Sopenharmony_ci			READ_ONCE(tcp_rsk(req)->txhash));
99862306a36Sopenharmony_ci}
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci/*
100162306a36Sopenharmony_ci *	Send a SYN-ACK after having received a SYN.
100262306a36Sopenharmony_ci *	This still operates on a request_sock only, not on a big
100362306a36Sopenharmony_ci *	socket.
100462306a36Sopenharmony_ci */
100562306a36Sopenharmony_cistatic int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
100662306a36Sopenharmony_ci			      struct flowi *fl,
100762306a36Sopenharmony_ci			      struct request_sock *req,
100862306a36Sopenharmony_ci			      struct tcp_fastopen_cookie *foc,
100962306a36Sopenharmony_ci			      enum tcp_synack_type synack_type,
101062306a36Sopenharmony_ci			      struct sk_buff *syn_skb)
101162306a36Sopenharmony_ci{
101262306a36Sopenharmony_ci	const struct inet_request_sock *ireq = inet_rsk(req);
101362306a36Sopenharmony_ci	struct flowi4 fl4;
101462306a36Sopenharmony_ci	int err = -1;
101562306a36Sopenharmony_ci	struct sk_buff *skb;
101662306a36Sopenharmony_ci	u8 tos;
101762306a36Sopenharmony_ci
101862306a36Sopenharmony_ci	/* First, grab a route. */
101962306a36Sopenharmony_ci	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
102062306a36Sopenharmony_ci		return -1;
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ci	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci	if (skb) {
102562306a36Sopenharmony_ci		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_ci		tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
102862306a36Sopenharmony_ci				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
102962306a36Sopenharmony_ci				(inet_sk(sk)->tos & INET_ECN_MASK) :
103062306a36Sopenharmony_ci				inet_sk(sk)->tos;
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci		if (!INET_ECN_is_capable(tos) &&
103362306a36Sopenharmony_ci		    tcp_bpf_ca_needs_ecn((struct sock *)req))
103462306a36Sopenharmony_ci			tos |= INET_ECN_ECT_0;
103562306a36Sopenharmony_ci
103662306a36Sopenharmony_ci		rcu_read_lock();
103762306a36Sopenharmony_ci		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
103862306a36Sopenharmony_ci					    ireq->ir_rmt_addr,
103962306a36Sopenharmony_ci					    rcu_dereference(ireq->ireq_opt),
104062306a36Sopenharmony_ci					    tos);
104162306a36Sopenharmony_ci		rcu_read_unlock();
104262306a36Sopenharmony_ci		err = net_xmit_eval(err);
104362306a36Sopenharmony_ci	}
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci	return err;
104662306a36Sopenharmony_ci}
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci/*
104962306a36Sopenharmony_ci *	IPv4 request_sock destructor.
105062306a36Sopenharmony_ci */
105162306a36Sopenharmony_cistatic void tcp_v4_reqsk_destructor(struct request_sock *req)
105262306a36Sopenharmony_ci{
105362306a36Sopenharmony_ci	kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
105462306a36Sopenharmony_ci}
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
105762306a36Sopenharmony_ci/*
105862306a36Sopenharmony_ci * RFC2385 MD5 checksumming requires a mapping of
105962306a36Sopenharmony_ci * IP address->MD5 Key.
106062306a36Sopenharmony_ci * We need to maintain these in the sk structure.
106162306a36Sopenharmony_ci */
106262306a36Sopenharmony_ci
106362306a36Sopenharmony_ciDEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_md5_needed, HZ);
106462306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_needed);
106562306a36Sopenharmony_ci
106662306a36Sopenharmony_cistatic bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new)
106762306a36Sopenharmony_ci{
106862306a36Sopenharmony_ci	if (!old)
106962306a36Sopenharmony_ci		return true;
107062306a36Sopenharmony_ci
107162306a36Sopenharmony_ci	/* l3index always overrides non-l3index */
107262306a36Sopenharmony_ci	if (old->l3index && new->l3index == 0)
107362306a36Sopenharmony_ci		return false;
107462306a36Sopenharmony_ci	if (old->l3index == 0 && new->l3index)
107562306a36Sopenharmony_ci		return true;
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_ci	return old->prefixlen < new->prefixlen;
107862306a36Sopenharmony_ci}
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_ci/* Find the Key structure for an address.  */
108162306a36Sopenharmony_cistruct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
108262306a36Sopenharmony_ci					   const union tcp_md5_addr *addr,
108362306a36Sopenharmony_ci					   int family)
108462306a36Sopenharmony_ci{
108562306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
108662306a36Sopenharmony_ci	struct tcp_md5sig_key *key;
108762306a36Sopenharmony_ci	const struct tcp_md5sig_info *md5sig;
108862306a36Sopenharmony_ci	__be32 mask;
108962306a36Sopenharmony_ci	struct tcp_md5sig_key *best_match = NULL;
109062306a36Sopenharmony_ci	bool match;
109162306a36Sopenharmony_ci
109262306a36Sopenharmony_ci	/* caller either holds rcu_read_lock() or socket lock */
109362306a36Sopenharmony_ci	md5sig = rcu_dereference_check(tp->md5sig_info,
109462306a36Sopenharmony_ci				       lockdep_sock_is_held(sk));
109562306a36Sopenharmony_ci	if (!md5sig)
109662306a36Sopenharmony_ci		return NULL;
109762306a36Sopenharmony_ci
109862306a36Sopenharmony_ci	hlist_for_each_entry_rcu(key, &md5sig->head, node,
109962306a36Sopenharmony_ci				 lockdep_sock_is_held(sk)) {
110062306a36Sopenharmony_ci		if (key->family != family)
110162306a36Sopenharmony_ci			continue;
110262306a36Sopenharmony_ci		if (key->flags & TCP_MD5SIG_FLAG_IFINDEX && key->l3index != l3index)
110362306a36Sopenharmony_ci			continue;
110462306a36Sopenharmony_ci		if (family == AF_INET) {
110562306a36Sopenharmony_ci			mask = inet_make_mask(key->prefixlen);
110662306a36Sopenharmony_ci			match = (key->addr.a4.s_addr & mask) ==
110762306a36Sopenharmony_ci				(addr->a4.s_addr & mask);
110862306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
110962306a36Sopenharmony_ci		} else if (family == AF_INET6) {
111062306a36Sopenharmony_ci			match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
111162306a36Sopenharmony_ci						  key->prefixlen);
111262306a36Sopenharmony_ci#endif
111362306a36Sopenharmony_ci		} else {
111462306a36Sopenharmony_ci			match = false;
111562306a36Sopenharmony_ci		}
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci		if (match && better_md5_match(best_match, key))
111862306a36Sopenharmony_ci			best_match = key;
111962306a36Sopenharmony_ci	}
112062306a36Sopenharmony_ci	return best_match;
112162306a36Sopenharmony_ci}
112262306a36Sopenharmony_ciEXPORT_SYMBOL(__tcp_md5_do_lookup);
112362306a36Sopenharmony_ci
112462306a36Sopenharmony_cistatic struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
112562306a36Sopenharmony_ci						      const union tcp_md5_addr *addr,
112662306a36Sopenharmony_ci						      int family, u8 prefixlen,
112762306a36Sopenharmony_ci						      int l3index, u8 flags)
112862306a36Sopenharmony_ci{
112962306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
113062306a36Sopenharmony_ci	struct tcp_md5sig_key *key;
113162306a36Sopenharmony_ci	unsigned int size = sizeof(struct in_addr);
113262306a36Sopenharmony_ci	const struct tcp_md5sig_info *md5sig;
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci	/* caller either holds rcu_read_lock() or socket lock */
113562306a36Sopenharmony_ci	md5sig = rcu_dereference_check(tp->md5sig_info,
113662306a36Sopenharmony_ci				       lockdep_sock_is_held(sk));
113762306a36Sopenharmony_ci	if (!md5sig)
113862306a36Sopenharmony_ci		return NULL;
113962306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
114062306a36Sopenharmony_ci	if (family == AF_INET6)
114162306a36Sopenharmony_ci		size = sizeof(struct in6_addr);
114262306a36Sopenharmony_ci#endif
114362306a36Sopenharmony_ci	hlist_for_each_entry_rcu(key, &md5sig->head, node,
114462306a36Sopenharmony_ci				 lockdep_sock_is_held(sk)) {
114562306a36Sopenharmony_ci		if (key->family != family)
114662306a36Sopenharmony_ci			continue;
114762306a36Sopenharmony_ci		if ((key->flags & TCP_MD5SIG_FLAG_IFINDEX) != (flags & TCP_MD5SIG_FLAG_IFINDEX))
114862306a36Sopenharmony_ci			continue;
114962306a36Sopenharmony_ci		if (key->l3index != l3index)
115062306a36Sopenharmony_ci			continue;
115162306a36Sopenharmony_ci		if (!memcmp(&key->addr, addr, size) &&
115262306a36Sopenharmony_ci		    key->prefixlen == prefixlen)
115362306a36Sopenharmony_ci			return key;
115462306a36Sopenharmony_ci	}
115562306a36Sopenharmony_ci	return NULL;
115662306a36Sopenharmony_ci}
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_cistruct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
115962306a36Sopenharmony_ci					 const struct sock *addr_sk)
116062306a36Sopenharmony_ci{
116162306a36Sopenharmony_ci	const union tcp_md5_addr *addr;
116262306a36Sopenharmony_ci	int l3index;
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
116562306a36Sopenharmony_ci						 addr_sk->sk_bound_dev_if);
116662306a36Sopenharmony_ci	addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
116762306a36Sopenharmony_ci	return tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
116862306a36Sopenharmony_ci}
116962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_md5_lookup);
117062306a36Sopenharmony_ci
117162306a36Sopenharmony_cistatic int tcp_md5sig_info_add(struct sock *sk, gfp_t gfp)
117262306a36Sopenharmony_ci{
117362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
117462306a36Sopenharmony_ci	struct tcp_md5sig_info *md5sig;
117562306a36Sopenharmony_ci
117662306a36Sopenharmony_ci	md5sig = kmalloc(sizeof(*md5sig), gfp);
117762306a36Sopenharmony_ci	if (!md5sig)
117862306a36Sopenharmony_ci		return -ENOMEM;
117962306a36Sopenharmony_ci
118062306a36Sopenharmony_ci	sk_gso_disable(sk);
118162306a36Sopenharmony_ci	INIT_HLIST_HEAD(&md5sig->head);
118262306a36Sopenharmony_ci	rcu_assign_pointer(tp->md5sig_info, md5sig);
118362306a36Sopenharmony_ci	return 0;
118462306a36Sopenharmony_ci}
118562306a36Sopenharmony_ci
118662306a36Sopenharmony_ci/* This can be called on a newly created socket, from other files */
118762306a36Sopenharmony_cistatic int __tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
118862306a36Sopenharmony_ci			    int family, u8 prefixlen, int l3index, u8 flags,
118962306a36Sopenharmony_ci			    const u8 *newkey, u8 newkeylen, gfp_t gfp)
119062306a36Sopenharmony_ci{
119162306a36Sopenharmony_ci	/* Add Key to the list */
119262306a36Sopenharmony_ci	struct tcp_md5sig_key *key;
119362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
119462306a36Sopenharmony_ci	struct tcp_md5sig_info *md5sig;
119562306a36Sopenharmony_ci
119662306a36Sopenharmony_ci	key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags);
119762306a36Sopenharmony_ci	if (key) {
119862306a36Sopenharmony_ci		/* Pre-existing entry - just update that one.
119962306a36Sopenharmony_ci		 * Note that the key might be used concurrently.
120062306a36Sopenharmony_ci		 * data_race() is telling kcsan that we do not care of
120162306a36Sopenharmony_ci		 * key mismatches, since changing MD5 key on live flows
120262306a36Sopenharmony_ci		 * can lead to packet drops.
120362306a36Sopenharmony_ci		 */
120462306a36Sopenharmony_ci		data_race(memcpy(key->key, newkey, newkeylen));
120562306a36Sopenharmony_ci
120662306a36Sopenharmony_ci		/* Pairs with READ_ONCE() in tcp_md5_hash_key().
120762306a36Sopenharmony_ci		 * Also note that a reader could catch new key->keylen value
120862306a36Sopenharmony_ci		 * but old key->key[], this is the reason we use __GFP_ZERO
120962306a36Sopenharmony_ci		 * at sock_kmalloc() time below these lines.
121062306a36Sopenharmony_ci		 */
121162306a36Sopenharmony_ci		WRITE_ONCE(key->keylen, newkeylen);
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_ci		return 0;
121462306a36Sopenharmony_ci	}
121562306a36Sopenharmony_ci
121662306a36Sopenharmony_ci	md5sig = rcu_dereference_protected(tp->md5sig_info,
121762306a36Sopenharmony_ci					   lockdep_sock_is_held(sk));
121862306a36Sopenharmony_ci
121962306a36Sopenharmony_ci	key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO);
122062306a36Sopenharmony_ci	if (!key)
122162306a36Sopenharmony_ci		return -ENOMEM;
122262306a36Sopenharmony_ci	if (!tcp_alloc_md5sig_pool()) {
122362306a36Sopenharmony_ci		sock_kfree_s(sk, key, sizeof(*key));
122462306a36Sopenharmony_ci		return -ENOMEM;
122562306a36Sopenharmony_ci	}
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_ci	memcpy(key->key, newkey, newkeylen);
122862306a36Sopenharmony_ci	key->keylen = newkeylen;
122962306a36Sopenharmony_ci	key->family = family;
123062306a36Sopenharmony_ci	key->prefixlen = prefixlen;
123162306a36Sopenharmony_ci	key->l3index = l3index;
123262306a36Sopenharmony_ci	key->flags = flags;
123362306a36Sopenharmony_ci	memcpy(&key->addr, addr,
123462306a36Sopenharmony_ci	       (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) ? sizeof(struct in6_addr) :
123562306a36Sopenharmony_ci								 sizeof(struct in_addr));
123662306a36Sopenharmony_ci	hlist_add_head_rcu(&key->node, &md5sig->head);
123762306a36Sopenharmony_ci	return 0;
123862306a36Sopenharmony_ci}
123962306a36Sopenharmony_ci
124062306a36Sopenharmony_ciint tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
124162306a36Sopenharmony_ci		   int family, u8 prefixlen, int l3index, u8 flags,
124262306a36Sopenharmony_ci		   const u8 *newkey, u8 newkeylen)
124362306a36Sopenharmony_ci{
124462306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
124562306a36Sopenharmony_ci
124662306a36Sopenharmony_ci	if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) {
124762306a36Sopenharmony_ci		if (tcp_md5sig_info_add(sk, GFP_KERNEL))
124862306a36Sopenharmony_ci			return -ENOMEM;
124962306a36Sopenharmony_ci
125062306a36Sopenharmony_ci		if (!static_branch_inc(&tcp_md5_needed.key)) {
125162306a36Sopenharmony_ci			struct tcp_md5sig_info *md5sig;
125262306a36Sopenharmony_ci
125362306a36Sopenharmony_ci			md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk));
125462306a36Sopenharmony_ci			rcu_assign_pointer(tp->md5sig_info, NULL);
125562306a36Sopenharmony_ci			kfree_rcu(md5sig, rcu);
125662306a36Sopenharmony_ci			return -EUSERS;
125762306a36Sopenharmony_ci		}
125862306a36Sopenharmony_ci	}
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci	return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, flags,
126162306a36Sopenharmony_ci				newkey, newkeylen, GFP_KERNEL);
126262306a36Sopenharmony_ci}
126362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_do_add);
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ciint tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr,
126662306a36Sopenharmony_ci		     int family, u8 prefixlen, int l3index,
126762306a36Sopenharmony_ci		     struct tcp_md5sig_key *key)
126862306a36Sopenharmony_ci{
126962306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
127062306a36Sopenharmony_ci
127162306a36Sopenharmony_ci	if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) {
127262306a36Sopenharmony_ci		if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC)))
127362306a36Sopenharmony_ci			return -ENOMEM;
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_ci		if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) {
127662306a36Sopenharmony_ci			struct tcp_md5sig_info *md5sig;
127762306a36Sopenharmony_ci
127862306a36Sopenharmony_ci			md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk));
127962306a36Sopenharmony_ci			net_warn_ratelimited("Too many TCP-MD5 keys in the system\n");
128062306a36Sopenharmony_ci			rcu_assign_pointer(tp->md5sig_info, NULL);
128162306a36Sopenharmony_ci			kfree_rcu(md5sig, rcu);
128262306a36Sopenharmony_ci			return -EUSERS;
128362306a36Sopenharmony_ci		}
128462306a36Sopenharmony_ci	}
128562306a36Sopenharmony_ci
128662306a36Sopenharmony_ci	return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index,
128762306a36Sopenharmony_ci				key->flags, key->key, key->keylen,
128862306a36Sopenharmony_ci				sk_gfp_mask(sk, GFP_ATOMIC));
128962306a36Sopenharmony_ci}
129062306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_key_copy);
129162306a36Sopenharmony_ci
129262306a36Sopenharmony_ciint tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
129362306a36Sopenharmony_ci		   u8 prefixlen, int l3index, u8 flags)
129462306a36Sopenharmony_ci{
129562306a36Sopenharmony_ci	struct tcp_md5sig_key *key;
129662306a36Sopenharmony_ci
129762306a36Sopenharmony_ci	key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags);
129862306a36Sopenharmony_ci	if (!key)
129962306a36Sopenharmony_ci		return -ENOENT;
130062306a36Sopenharmony_ci	hlist_del_rcu(&key->node);
130162306a36Sopenharmony_ci	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
130262306a36Sopenharmony_ci	kfree_rcu(key, rcu);
130362306a36Sopenharmony_ci	return 0;
130462306a36Sopenharmony_ci}
130562306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_do_del);
130662306a36Sopenharmony_ci
130762306a36Sopenharmony_cistatic void tcp_clear_md5_list(struct sock *sk)
130862306a36Sopenharmony_ci{
130962306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
131062306a36Sopenharmony_ci	struct tcp_md5sig_key *key;
131162306a36Sopenharmony_ci	struct hlist_node *n;
131262306a36Sopenharmony_ci	struct tcp_md5sig_info *md5sig;
131362306a36Sopenharmony_ci
131462306a36Sopenharmony_ci	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
131562306a36Sopenharmony_ci
131662306a36Sopenharmony_ci	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
131762306a36Sopenharmony_ci		hlist_del_rcu(&key->node);
131862306a36Sopenharmony_ci		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
131962306a36Sopenharmony_ci		kfree_rcu(key, rcu);
132062306a36Sopenharmony_ci	}
132162306a36Sopenharmony_ci}
132262306a36Sopenharmony_ci
132362306a36Sopenharmony_cistatic int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
132462306a36Sopenharmony_ci				 sockptr_t optval, int optlen)
132562306a36Sopenharmony_ci{
132662306a36Sopenharmony_ci	struct tcp_md5sig cmd;
132762306a36Sopenharmony_ci	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
132862306a36Sopenharmony_ci	const union tcp_md5_addr *addr;
132962306a36Sopenharmony_ci	u8 prefixlen = 32;
133062306a36Sopenharmony_ci	int l3index = 0;
133162306a36Sopenharmony_ci	u8 flags;
133262306a36Sopenharmony_ci
133362306a36Sopenharmony_ci	if (optlen < sizeof(cmd))
133462306a36Sopenharmony_ci		return -EINVAL;
133562306a36Sopenharmony_ci
133662306a36Sopenharmony_ci	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
133762306a36Sopenharmony_ci		return -EFAULT;
133862306a36Sopenharmony_ci
133962306a36Sopenharmony_ci	if (sin->sin_family != AF_INET)
134062306a36Sopenharmony_ci		return -EINVAL;
134162306a36Sopenharmony_ci
134262306a36Sopenharmony_ci	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
134362306a36Sopenharmony_ci
134462306a36Sopenharmony_ci	if (optname == TCP_MD5SIG_EXT &&
134562306a36Sopenharmony_ci	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
134662306a36Sopenharmony_ci		prefixlen = cmd.tcpm_prefixlen;
134762306a36Sopenharmony_ci		if (prefixlen > 32)
134862306a36Sopenharmony_ci			return -EINVAL;
134962306a36Sopenharmony_ci	}
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_ci	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
135262306a36Sopenharmony_ci	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
135362306a36Sopenharmony_ci		struct net_device *dev;
135462306a36Sopenharmony_ci
135562306a36Sopenharmony_ci		rcu_read_lock();
135662306a36Sopenharmony_ci		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
135762306a36Sopenharmony_ci		if (dev && netif_is_l3_master(dev))
135862306a36Sopenharmony_ci			l3index = dev->ifindex;
135962306a36Sopenharmony_ci
136062306a36Sopenharmony_ci		rcu_read_unlock();
136162306a36Sopenharmony_ci
136262306a36Sopenharmony_ci		/* ok to reference set/not set outside of rcu;
136362306a36Sopenharmony_ci		 * right now device MUST be an L3 master
136462306a36Sopenharmony_ci		 */
136562306a36Sopenharmony_ci		if (!dev || !l3index)
136662306a36Sopenharmony_ci			return -EINVAL;
136762306a36Sopenharmony_ci	}
136862306a36Sopenharmony_ci
136962306a36Sopenharmony_ci	addr = (union tcp_md5_addr *)&sin->sin_addr.s_addr;
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_ci	if (!cmd.tcpm_keylen)
137262306a36Sopenharmony_ci		return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index, flags);
137362306a36Sopenharmony_ci
137462306a36Sopenharmony_ci	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
137562306a36Sopenharmony_ci		return -EINVAL;
137662306a36Sopenharmony_ci
137762306a36Sopenharmony_ci	return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags,
137862306a36Sopenharmony_ci			      cmd.tcpm_key, cmd.tcpm_keylen);
137962306a36Sopenharmony_ci}
138062306a36Sopenharmony_ci
138162306a36Sopenharmony_cistatic int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
138262306a36Sopenharmony_ci				   __be32 daddr, __be32 saddr,
138362306a36Sopenharmony_ci				   const struct tcphdr *th, int nbytes)
138462306a36Sopenharmony_ci{
138562306a36Sopenharmony_ci	struct tcp4_pseudohdr *bp;
138662306a36Sopenharmony_ci	struct scatterlist sg;
138762306a36Sopenharmony_ci	struct tcphdr *_th;
138862306a36Sopenharmony_ci
138962306a36Sopenharmony_ci	bp = hp->scratch;
139062306a36Sopenharmony_ci	bp->saddr = saddr;
139162306a36Sopenharmony_ci	bp->daddr = daddr;
139262306a36Sopenharmony_ci	bp->pad = 0;
139362306a36Sopenharmony_ci	bp->protocol = IPPROTO_TCP;
139462306a36Sopenharmony_ci	bp->len = cpu_to_be16(nbytes);
139562306a36Sopenharmony_ci
139662306a36Sopenharmony_ci	_th = (struct tcphdr *)(bp + 1);
139762306a36Sopenharmony_ci	memcpy(_th, th, sizeof(*th));
139862306a36Sopenharmony_ci	_th->check = 0;
139962306a36Sopenharmony_ci
140062306a36Sopenharmony_ci	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
140162306a36Sopenharmony_ci	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
140262306a36Sopenharmony_ci				sizeof(*bp) + sizeof(*th));
140362306a36Sopenharmony_ci	return crypto_ahash_update(hp->md5_req);
140462306a36Sopenharmony_ci}
140562306a36Sopenharmony_ci
140662306a36Sopenharmony_cistatic int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
140762306a36Sopenharmony_ci			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
140862306a36Sopenharmony_ci{
140962306a36Sopenharmony_ci	struct tcp_md5sig_pool *hp;
141062306a36Sopenharmony_ci	struct ahash_request *req;
141162306a36Sopenharmony_ci
141262306a36Sopenharmony_ci	hp = tcp_get_md5sig_pool();
141362306a36Sopenharmony_ci	if (!hp)
141462306a36Sopenharmony_ci		goto clear_hash_noput;
141562306a36Sopenharmony_ci	req = hp->md5_req;
141662306a36Sopenharmony_ci
141762306a36Sopenharmony_ci	if (crypto_ahash_init(req))
141862306a36Sopenharmony_ci		goto clear_hash;
141962306a36Sopenharmony_ci	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
142062306a36Sopenharmony_ci		goto clear_hash;
142162306a36Sopenharmony_ci	if (tcp_md5_hash_key(hp, key))
142262306a36Sopenharmony_ci		goto clear_hash;
142362306a36Sopenharmony_ci	ahash_request_set_crypt(req, NULL, md5_hash, 0);
142462306a36Sopenharmony_ci	if (crypto_ahash_final(req))
142562306a36Sopenharmony_ci		goto clear_hash;
142662306a36Sopenharmony_ci
142762306a36Sopenharmony_ci	tcp_put_md5sig_pool();
142862306a36Sopenharmony_ci	return 0;
142962306a36Sopenharmony_ci
143062306a36Sopenharmony_ciclear_hash:
143162306a36Sopenharmony_ci	tcp_put_md5sig_pool();
143262306a36Sopenharmony_ciclear_hash_noput:
143362306a36Sopenharmony_ci	memset(md5_hash, 0, 16);
143462306a36Sopenharmony_ci	return 1;
143562306a36Sopenharmony_ci}
143662306a36Sopenharmony_ci
143762306a36Sopenharmony_ciint tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
143862306a36Sopenharmony_ci			const struct sock *sk,
143962306a36Sopenharmony_ci			const struct sk_buff *skb)
144062306a36Sopenharmony_ci{
144162306a36Sopenharmony_ci	struct tcp_md5sig_pool *hp;
144262306a36Sopenharmony_ci	struct ahash_request *req;
144362306a36Sopenharmony_ci	const struct tcphdr *th = tcp_hdr(skb);
144462306a36Sopenharmony_ci	__be32 saddr, daddr;
144562306a36Sopenharmony_ci
144662306a36Sopenharmony_ci	if (sk) { /* valid for establish/request sockets */
144762306a36Sopenharmony_ci		saddr = sk->sk_rcv_saddr;
144862306a36Sopenharmony_ci		daddr = sk->sk_daddr;
144962306a36Sopenharmony_ci	} else {
145062306a36Sopenharmony_ci		const struct iphdr *iph = ip_hdr(skb);
145162306a36Sopenharmony_ci		saddr = iph->saddr;
145262306a36Sopenharmony_ci		daddr = iph->daddr;
145362306a36Sopenharmony_ci	}
145462306a36Sopenharmony_ci
145562306a36Sopenharmony_ci	hp = tcp_get_md5sig_pool();
145662306a36Sopenharmony_ci	if (!hp)
145762306a36Sopenharmony_ci		goto clear_hash_noput;
145862306a36Sopenharmony_ci	req = hp->md5_req;
145962306a36Sopenharmony_ci
146062306a36Sopenharmony_ci	if (crypto_ahash_init(req))
146162306a36Sopenharmony_ci		goto clear_hash;
146262306a36Sopenharmony_ci
146362306a36Sopenharmony_ci	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
146462306a36Sopenharmony_ci		goto clear_hash;
146562306a36Sopenharmony_ci	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
146662306a36Sopenharmony_ci		goto clear_hash;
146762306a36Sopenharmony_ci	if (tcp_md5_hash_key(hp, key))
146862306a36Sopenharmony_ci		goto clear_hash;
146962306a36Sopenharmony_ci	ahash_request_set_crypt(req, NULL, md5_hash, 0);
147062306a36Sopenharmony_ci	if (crypto_ahash_final(req))
147162306a36Sopenharmony_ci		goto clear_hash;
147262306a36Sopenharmony_ci
147362306a36Sopenharmony_ci	tcp_put_md5sig_pool();
147462306a36Sopenharmony_ci	return 0;
147562306a36Sopenharmony_ci
147662306a36Sopenharmony_ciclear_hash:
147762306a36Sopenharmony_ci	tcp_put_md5sig_pool();
147862306a36Sopenharmony_ciclear_hash_noput:
147962306a36Sopenharmony_ci	memset(md5_hash, 0, 16);
148062306a36Sopenharmony_ci	return 1;
148162306a36Sopenharmony_ci}
148262306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_md5_hash_skb);
148362306a36Sopenharmony_ci
148462306a36Sopenharmony_ci#endif
148562306a36Sopenharmony_ci
148662306a36Sopenharmony_cistatic void tcp_v4_init_req(struct request_sock *req,
148762306a36Sopenharmony_ci			    const struct sock *sk_listener,
148862306a36Sopenharmony_ci			    struct sk_buff *skb)
148962306a36Sopenharmony_ci{
149062306a36Sopenharmony_ci	struct inet_request_sock *ireq = inet_rsk(req);
149162306a36Sopenharmony_ci	struct net *net = sock_net(sk_listener);
149262306a36Sopenharmony_ci
149362306a36Sopenharmony_ci	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
149462306a36Sopenharmony_ci	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
149562306a36Sopenharmony_ci	RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
149662306a36Sopenharmony_ci}
149762306a36Sopenharmony_ci
149862306a36Sopenharmony_cistatic struct dst_entry *tcp_v4_route_req(const struct sock *sk,
149962306a36Sopenharmony_ci					  struct sk_buff *skb,
150062306a36Sopenharmony_ci					  struct flowi *fl,
150162306a36Sopenharmony_ci					  struct request_sock *req)
150262306a36Sopenharmony_ci{
150362306a36Sopenharmony_ci	tcp_v4_init_req(req, sk, skb);
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_ci	if (security_inet_conn_request(sk, skb, req))
150662306a36Sopenharmony_ci		return NULL;
150762306a36Sopenharmony_ci
150862306a36Sopenharmony_ci	return inet_csk_route_req(sk, &fl->u.ip4, req);
150962306a36Sopenharmony_ci}
151062306a36Sopenharmony_ci
151162306a36Sopenharmony_cistruct request_sock_ops tcp_request_sock_ops __read_mostly = {
151262306a36Sopenharmony_ci	.family		=	PF_INET,
151362306a36Sopenharmony_ci	.obj_size	=	sizeof(struct tcp_request_sock),
151462306a36Sopenharmony_ci	.rtx_syn_ack	=	tcp_rtx_synack,
151562306a36Sopenharmony_ci	.send_ack	=	tcp_v4_reqsk_send_ack,
151662306a36Sopenharmony_ci	.destructor	=	tcp_v4_reqsk_destructor,
151762306a36Sopenharmony_ci	.send_reset	=	tcp_v4_send_reset,
151862306a36Sopenharmony_ci	.syn_ack_timeout =	tcp_syn_ack_timeout,
151962306a36Sopenharmony_ci};
152062306a36Sopenharmony_ci
152162306a36Sopenharmony_ciconst struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
152262306a36Sopenharmony_ci	.mss_clamp	=	TCP_MSS_DEFAULT,
152362306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
152462306a36Sopenharmony_ci	.req_md5_lookup	=	tcp_v4_md5_lookup,
152562306a36Sopenharmony_ci	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
152662306a36Sopenharmony_ci#endif
152762306a36Sopenharmony_ci#ifdef CONFIG_SYN_COOKIES
152862306a36Sopenharmony_ci	.cookie_init_seq =	cookie_v4_init_sequence,
152962306a36Sopenharmony_ci#endif
153062306a36Sopenharmony_ci	.route_req	=	tcp_v4_route_req,
153162306a36Sopenharmony_ci	.init_seq	=	tcp_v4_init_seq,
153262306a36Sopenharmony_ci	.init_ts_off	=	tcp_v4_init_ts_off,
153362306a36Sopenharmony_ci	.send_synack	=	tcp_v4_send_synack,
153462306a36Sopenharmony_ci};
153562306a36Sopenharmony_ci
153662306a36Sopenharmony_ciint tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
153762306a36Sopenharmony_ci{
153862306a36Sopenharmony_ci	/* Never answer to SYNs send to broadcast or multicast */
153962306a36Sopenharmony_ci	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
154062306a36Sopenharmony_ci		goto drop;
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_ci	return tcp_conn_request(&tcp_request_sock_ops,
154362306a36Sopenharmony_ci				&tcp_request_sock_ipv4_ops, sk, skb);
154462306a36Sopenharmony_ci
154562306a36Sopenharmony_cidrop:
154662306a36Sopenharmony_ci	tcp_listendrop(sk);
154762306a36Sopenharmony_ci	return 0;
154862306a36Sopenharmony_ci}
154962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_conn_request);
155062306a36Sopenharmony_ci
155162306a36Sopenharmony_ci
155262306a36Sopenharmony_ci/*
155362306a36Sopenharmony_ci * The three way handshake has completed - we got a valid synack -
155462306a36Sopenharmony_ci * now create the new socket.
155562306a36Sopenharmony_ci */
155662306a36Sopenharmony_cistruct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
155762306a36Sopenharmony_ci				  struct request_sock *req,
155862306a36Sopenharmony_ci				  struct dst_entry *dst,
155962306a36Sopenharmony_ci				  struct request_sock *req_unhash,
156062306a36Sopenharmony_ci				  bool *own_req)
156162306a36Sopenharmony_ci{
156262306a36Sopenharmony_ci	struct inet_request_sock *ireq;
156362306a36Sopenharmony_ci	bool found_dup_sk = false;
156462306a36Sopenharmony_ci	struct inet_sock *newinet;
156562306a36Sopenharmony_ci	struct tcp_sock *newtp;
156662306a36Sopenharmony_ci	struct sock *newsk;
156762306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
156862306a36Sopenharmony_ci	const union tcp_md5_addr *addr;
156962306a36Sopenharmony_ci	struct tcp_md5sig_key *key;
157062306a36Sopenharmony_ci	int l3index;
157162306a36Sopenharmony_ci#endif
157262306a36Sopenharmony_ci	struct ip_options_rcu *inet_opt;
157362306a36Sopenharmony_ci
157462306a36Sopenharmony_ci	if (sk_acceptq_is_full(sk))
157562306a36Sopenharmony_ci		goto exit_overflow;
157662306a36Sopenharmony_ci
157762306a36Sopenharmony_ci	newsk = tcp_create_openreq_child(sk, req, skb);
157862306a36Sopenharmony_ci	if (!newsk)
157962306a36Sopenharmony_ci		goto exit_nonewsk;
158062306a36Sopenharmony_ci
158162306a36Sopenharmony_ci	newsk->sk_gso_type = SKB_GSO_TCPV4;
158262306a36Sopenharmony_ci	inet_sk_rx_dst_set(newsk, skb);
158362306a36Sopenharmony_ci
158462306a36Sopenharmony_ci	newtp		      = tcp_sk(newsk);
158562306a36Sopenharmony_ci	newinet		      = inet_sk(newsk);
158662306a36Sopenharmony_ci	ireq		      = inet_rsk(req);
158762306a36Sopenharmony_ci	sk_daddr_set(newsk, ireq->ir_rmt_addr);
158862306a36Sopenharmony_ci	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
158962306a36Sopenharmony_ci	newsk->sk_bound_dev_if = ireq->ir_iif;
159062306a36Sopenharmony_ci	newinet->inet_saddr   = ireq->ir_loc_addr;
159162306a36Sopenharmony_ci	inet_opt	      = rcu_dereference(ireq->ireq_opt);
159262306a36Sopenharmony_ci	RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
159362306a36Sopenharmony_ci	newinet->mc_index     = inet_iif(skb);
159462306a36Sopenharmony_ci	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
159562306a36Sopenharmony_ci	newinet->rcv_tos      = ip_hdr(skb)->tos;
159662306a36Sopenharmony_ci	inet_csk(newsk)->icsk_ext_hdr_len = 0;
159762306a36Sopenharmony_ci	if (inet_opt)
159862306a36Sopenharmony_ci		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
159962306a36Sopenharmony_ci	atomic_set(&newinet->inet_id, get_random_u16());
160062306a36Sopenharmony_ci
160162306a36Sopenharmony_ci	/* Set ToS of the new socket based upon the value of incoming SYN.
160262306a36Sopenharmony_ci	 * ECT bits are set later in tcp_init_transfer().
160362306a36Sopenharmony_ci	 */
160462306a36Sopenharmony_ci	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
160562306a36Sopenharmony_ci		newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
160662306a36Sopenharmony_ci
160762306a36Sopenharmony_ci	if (!dst) {
160862306a36Sopenharmony_ci		dst = inet_csk_route_child_sock(sk, newsk, req);
160962306a36Sopenharmony_ci		if (!dst)
161062306a36Sopenharmony_ci			goto put_and_exit;
161162306a36Sopenharmony_ci	} else {
161262306a36Sopenharmony_ci		/* syncookie case : see end of cookie_v4_check() */
161362306a36Sopenharmony_ci	}
161462306a36Sopenharmony_ci	sk_setup_caps(newsk, dst);
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_ci	tcp_ca_openreq_child(newsk, dst);
161762306a36Sopenharmony_ci
161862306a36Sopenharmony_ci	tcp_sync_mss(newsk, dst_mtu(dst));
161962306a36Sopenharmony_ci	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
162062306a36Sopenharmony_ci
162162306a36Sopenharmony_ci	tcp_initialize_rcv_mss(newsk);
162262306a36Sopenharmony_ci
162362306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
162462306a36Sopenharmony_ci	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
162562306a36Sopenharmony_ci	/* Copy over the MD5 key from the original socket */
162662306a36Sopenharmony_ci	addr = (union tcp_md5_addr *)&newinet->inet_daddr;
162762306a36Sopenharmony_ci	key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
162862306a36Sopenharmony_ci	if (key) {
162962306a36Sopenharmony_ci		if (tcp_md5_key_copy(newsk, addr, AF_INET, 32, l3index, key))
163062306a36Sopenharmony_ci			goto put_and_exit;
163162306a36Sopenharmony_ci		sk_gso_disable(newsk);
163262306a36Sopenharmony_ci	}
163362306a36Sopenharmony_ci#endif
163462306a36Sopenharmony_ci
163562306a36Sopenharmony_ci	if (__inet_inherit_port(sk, newsk) < 0)
163662306a36Sopenharmony_ci		goto put_and_exit;
163762306a36Sopenharmony_ci	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
163862306a36Sopenharmony_ci				       &found_dup_sk);
163962306a36Sopenharmony_ci	if (likely(*own_req)) {
164062306a36Sopenharmony_ci		tcp_move_syn(newtp, req);
164162306a36Sopenharmony_ci		ireq->ireq_opt = NULL;
164262306a36Sopenharmony_ci	} else {
164362306a36Sopenharmony_ci		newinet->inet_opt = NULL;
164462306a36Sopenharmony_ci
164562306a36Sopenharmony_ci		if (!req_unhash && found_dup_sk) {
164662306a36Sopenharmony_ci			/* This code path should only be executed in the
164762306a36Sopenharmony_ci			 * syncookie case only
164862306a36Sopenharmony_ci			 */
164962306a36Sopenharmony_ci			bh_unlock_sock(newsk);
165062306a36Sopenharmony_ci			sock_put(newsk);
165162306a36Sopenharmony_ci			newsk = NULL;
165262306a36Sopenharmony_ci		}
165362306a36Sopenharmony_ci	}
165462306a36Sopenharmony_ci	return newsk;
165562306a36Sopenharmony_ci
165662306a36Sopenharmony_ciexit_overflow:
165762306a36Sopenharmony_ci	NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
165862306a36Sopenharmony_ciexit_nonewsk:
165962306a36Sopenharmony_ci	dst_release(dst);
166062306a36Sopenharmony_ciexit:
166162306a36Sopenharmony_ci	tcp_listendrop(sk);
166262306a36Sopenharmony_ci	return NULL;
166362306a36Sopenharmony_ciput_and_exit:
166462306a36Sopenharmony_ci	newinet->inet_opt = NULL;
166562306a36Sopenharmony_ci	inet_csk_prepare_forced_close(newsk);
166662306a36Sopenharmony_ci	tcp_done(newsk);
166762306a36Sopenharmony_ci	goto exit;
166862306a36Sopenharmony_ci}
166962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_syn_recv_sock);
167062306a36Sopenharmony_ci
167162306a36Sopenharmony_cistatic struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
167262306a36Sopenharmony_ci{
167362306a36Sopenharmony_ci#ifdef CONFIG_SYN_COOKIES
167462306a36Sopenharmony_ci	const struct tcphdr *th = tcp_hdr(skb);
167562306a36Sopenharmony_ci
167662306a36Sopenharmony_ci	if (!th->syn)
167762306a36Sopenharmony_ci		sk = cookie_v4_check(sk, skb);
167862306a36Sopenharmony_ci#endif
167962306a36Sopenharmony_ci	return sk;
168062306a36Sopenharmony_ci}
168162306a36Sopenharmony_ci
168262306a36Sopenharmony_ciu16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
168362306a36Sopenharmony_ci			 struct tcphdr *th, u32 *cookie)
168462306a36Sopenharmony_ci{
168562306a36Sopenharmony_ci	u16 mss = 0;
168662306a36Sopenharmony_ci#ifdef CONFIG_SYN_COOKIES
168762306a36Sopenharmony_ci	mss = tcp_get_syncookie_mss(&tcp_request_sock_ops,
168862306a36Sopenharmony_ci				    &tcp_request_sock_ipv4_ops, sk, th);
168962306a36Sopenharmony_ci	if (mss) {
169062306a36Sopenharmony_ci		*cookie = __cookie_v4_init_sequence(iph, th, &mss);
169162306a36Sopenharmony_ci		tcp_synq_overflow(sk);
169262306a36Sopenharmony_ci	}
169362306a36Sopenharmony_ci#endif
169462306a36Sopenharmony_ci	return mss;
169562306a36Sopenharmony_ci}
169662306a36Sopenharmony_ci
169762306a36Sopenharmony_ciINDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
169862306a36Sopenharmony_ci							   u32));
169962306a36Sopenharmony_ci/* The socket must have it's spinlock held when we get
170062306a36Sopenharmony_ci * here, unless it is a TCP_LISTEN socket.
170162306a36Sopenharmony_ci *
170262306a36Sopenharmony_ci * We have a potential double-lock case here, so even when
170362306a36Sopenharmony_ci * doing backlog processing we use the BH locking scheme.
170462306a36Sopenharmony_ci * This is because we cannot sleep with the original spinlock
170562306a36Sopenharmony_ci * held.
170662306a36Sopenharmony_ci */
170762306a36Sopenharmony_ciint tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
170862306a36Sopenharmony_ci{
170962306a36Sopenharmony_ci	enum skb_drop_reason reason;
171062306a36Sopenharmony_ci	struct sock *rsk;
171162306a36Sopenharmony_ci
171262306a36Sopenharmony_ci	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
171362306a36Sopenharmony_ci		struct dst_entry *dst;
171462306a36Sopenharmony_ci
171562306a36Sopenharmony_ci		dst = rcu_dereference_protected(sk->sk_rx_dst,
171662306a36Sopenharmony_ci						lockdep_sock_is_held(sk));
171762306a36Sopenharmony_ci
171862306a36Sopenharmony_ci		sock_rps_save_rxhash(sk, skb);
171962306a36Sopenharmony_ci		sk_mark_napi_id(sk, skb);
172062306a36Sopenharmony_ci		if (dst) {
172162306a36Sopenharmony_ci			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
172262306a36Sopenharmony_ci			    !INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check,
172362306a36Sopenharmony_ci					     dst, 0)) {
172462306a36Sopenharmony_ci				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
172562306a36Sopenharmony_ci				dst_release(dst);
172662306a36Sopenharmony_ci			}
172762306a36Sopenharmony_ci		}
172862306a36Sopenharmony_ci		tcp_rcv_established(sk, skb);
172962306a36Sopenharmony_ci		return 0;
173062306a36Sopenharmony_ci	}
173162306a36Sopenharmony_ci
173262306a36Sopenharmony_ci	reason = SKB_DROP_REASON_NOT_SPECIFIED;
173362306a36Sopenharmony_ci	if (tcp_checksum_complete(skb))
173462306a36Sopenharmony_ci		goto csum_err;
173562306a36Sopenharmony_ci
173662306a36Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN) {
173762306a36Sopenharmony_ci		struct sock *nsk = tcp_v4_cookie_check(sk, skb);
173862306a36Sopenharmony_ci
173962306a36Sopenharmony_ci		if (!nsk)
174062306a36Sopenharmony_ci			goto discard;
174162306a36Sopenharmony_ci		if (nsk != sk) {
174262306a36Sopenharmony_ci			if (tcp_child_process(sk, nsk, skb)) {
174362306a36Sopenharmony_ci				rsk = nsk;
174462306a36Sopenharmony_ci				goto reset;
174562306a36Sopenharmony_ci			}
174662306a36Sopenharmony_ci			return 0;
174762306a36Sopenharmony_ci		}
174862306a36Sopenharmony_ci	} else
174962306a36Sopenharmony_ci		sock_rps_save_rxhash(sk, skb);
175062306a36Sopenharmony_ci
175162306a36Sopenharmony_ci	if (tcp_rcv_state_process(sk, skb)) {
175262306a36Sopenharmony_ci		rsk = sk;
175362306a36Sopenharmony_ci		goto reset;
175462306a36Sopenharmony_ci	}
175562306a36Sopenharmony_ci	return 0;
175662306a36Sopenharmony_ci
175762306a36Sopenharmony_cireset:
175862306a36Sopenharmony_ci	tcp_v4_send_reset(rsk, skb);
175962306a36Sopenharmony_cidiscard:
176062306a36Sopenharmony_ci	kfree_skb_reason(skb, reason);
176162306a36Sopenharmony_ci	/* Be careful here. If this function gets more complicated and
176262306a36Sopenharmony_ci	 * gcc suffers from register pressure on the x86, sk (in %ebx)
176362306a36Sopenharmony_ci	 * might be destroyed here. This current version compiles correctly,
176462306a36Sopenharmony_ci	 * but you have been warned.
176562306a36Sopenharmony_ci	 */
176662306a36Sopenharmony_ci	return 0;
176762306a36Sopenharmony_ci
176862306a36Sopenharmony_cicsum_err:
176962306a36Sopenharmony_ci	reason = SKB_DROP_REASON_TCP_CSUM;
177062306a36Sopenharmony_ci	trace_tcp_bad_csum(skb);
177162306a36Sopenharmony_ci	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
177262306a36Sopenharmony_ci	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
177362306a36Sopenharmony_ci	goto discard;
177462306a36Sopenharmony_ci}
177562306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_do_rcv);
177662306a36Sopenharmony_ci
177762306a36Sopenharmony_ciint tcp_v4_early_demux(struct sk_buff *skb)
177862306a36Sopenharmony_ci{
177962306a36Sopenharmony_ci	struct net *net = dev_net(skb->dev);
178062306a36Sopenharmony_ci	const struct iphdr *iph;
178162306a36Sopenharmony_ci	const struct tcphdr *th;
178262306a36Sopenharmony_ci	struct sock *sk;
178362306a36Sopenharmony_ci
178462306a36Sopenharmony_ci	if (skb->pkt_type != PACKET_HOST)
178562306a36Sopenharmony_ci		return 0;
178662306a36Sopenharmony_ci
178762306a36Sopenharmony_ci	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
178862306a36Sopenharmony_ci		return 0;
178962306a36Sopenharmony_ci
179062306a36Sopenharmony_ci	iph = ip_hdr(skb);
179162306a36Sopenharmony_ci	th = tcp_hdr(skb);
179262306a36Sopenharmony_ci
179362306a36Sopenharmony_ci	if (th->doff < sizeof(struct tcphdr) / 4)
179462306a36Sopenharmony_ci		return 0;
179562306a36Sopenharmony_ci
179662306a36Sopenharmony_ci	sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
179762306a36Sopenharmony_ci				       iph->saddr, th->source,
179862306a36Sopenharmony_ci				       iph->daddr, ntohs(th->dest),
179962306a36Sopenharmony_ci				       skb->skb_iif, inet_sdif(skb));
180062306a36Sopenharmony_ci	if (sk) {
180162306a36Sopenharmony_ci		skb->sk = sk;
180262306a36Sopenharmony_ci		skb->destructor = sock_edemux;
180362306a36Sopenharmony_ci		if (sk_fullsock(sk)) {
180462306a36Sopenharmony_ci			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
180562306a36Sopenharmony_ci
180662306a36Sopenharmony_ci			if (dst)
180762306a36Sopenharmony_ci				dst = dst_check(dst, 0);
180862306a36Sopenharmony_ci			if (dst &&
180962306a36Sopenharmony_ci			    sk->sk_rx_dst_ifindex == skb->skb_iif)
181062306a36Sopenharmony_ci				skb_dst_set_noref(skb, dst);
181162306a36Sopenharmony_ci		}
181262306a36Sopenharmony_ci	}
181362306a36Sopenharmony_ci	return 0;
181462306a36Sopenharmony_ci}
181562306a36Sopenharmony_ci
181662306a36Sopenharmony_cibool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
181762306a36Sopenharmony_ci		     enum skb_drop_reason *reason)
181862306a36Sopenharmony_ci{
181962306a36Sopenharmony_ci	u32 limit, tail_gso_size, tail_gso_segs;
182062306a36Sopenharmony_ci	struct skb_shared_info *shinfo;
182162306a36Sopenharmony_ci	const struct tcphdr *th;
182262306a36Sopenharmony_ci	struct tcphdr *thtail;
182362306a36Sopenharmony_ci	struct sk_buff *tail;
182462306a36Sopenharmony_ci	unsigned int hdrlen;
182562306a36Sopenharmony_ci	bool fragstolen;
182662306a36Sopenharmony_ci	u32 gso_segs;
182762306a36Sopenharmony_ci	u32 gso_size;
182862306a36Sopenharmony_ci	int delta;
182962306a36Sopenharmony_ci
183062306a36Sopenharmony_ci	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
183162306a36Sopenharmony_ci	 * we can fix skb->truesize to its real value to avoid future drops.
183262306a36Sopenharmony_ci	 * This is valid because skb is not yet charged to the socket.
183362306a36Sopenharmony_ci	 * It has been noticed pure SACK packets were sometimes dropped
183462306a36Sopenharmony_ci	 * (if cooked by drivers without copybreak feature).
183562306a36Sopenharmony_ci	 */
183662306a36Sopenharmony_ci	skb_condense(skb);
183762306a36Sopenharmony_ci
183862306a36Sopenharmony_ci	skb_dst_drop(skb);
183962306a36Sopenharmony_ci
184062306a36Sopenharmony_ci	if (unlikely(tcp_checksum_complete(skb))) {
184162306a36Sopenharmony_ci		bh_unlock_sock(sk);
184262306a36Sopenharmony_ci		trace_tcp_bad_csum(skb);
184362306a36Sopenharmony_ci		*reason = SKB_DROP_REASON_TCP_CSUM;
184462306a36Sopenharmony_ci		__TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
184562306a36Sopenharmony_ci		__TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
184662306a36Sopenharmony_ci		return true;
184762306a36Sopenharmony_ci	}
184862306a36Sopenharmony_ci
184962306a36Sopenharmony_ci	/* Attempt coalescing to last skb in backlog, even if we are
185062306a36Sopenharmony_ci	 * above the limits.
185162306a36Sopenharmony_ci	 * This is okay because skb capacity is limited to MAX_SKB_FRAGS.
185262306a36Sopenharmony_ci	 */
185362306a36Sopenharmony_ci	th = (const struct tcphdr *)skb->data;
185462306a36Sopenharmony_ci	hdrlen = th->doff * 4;
185562306a36Sopenharmony_ci
185662306a36Sopenharmony_ci	tail = sk->sk_backlog.tail;
185762306a36Sopenharmony_ci	if (!tail)
185862306a36Sopenharmony_ci		goto no_coalesce;
185962306a36Sopenharmony_ci	thtail = (struct tcphdr *)tail->data;
186062306a36Sopenharmony_ci
186162306a36Sopenharmony_ci	if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
186262306a36Sopenharmony_ci	    TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
186362306a36Sopenharmony_ci	    ((TCP_SKB_CB(tail)->tcp_flags |
186462306a36Sopenharmony_ci	      TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) ||
186562306a36Sopenharmony_ci	    !((TCP_SKB_CB(tail)->tcp_flags &
186662306a36Sopenharmony_ci	      TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
186762306a36Sopenharmony_ci	    ((TCP_SKB_CB(tail)->tcp_flags ^
186862306a36Sopenharmony_ci	      TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
186962306a36Sopenharmony_ci#ifdef CONFIG_TLS_DEVICE
187062306a36Sopenharmony_ci	    tail->decrypted != skb->decrypted ||
187162306a36Sopenharmony_ci#endif
187262306a36Sopenharmony_ci	    !mptcp_skb_can_collapse(tail, skb) ||
187362306a36Sopenharmony_ci	    thtail->doff != th->doff ||
187462306a36Sopenharmony_ci	    memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
187562306a36Sopenharmony_ci		goto no_coalesce;
187662306a36Sopenharmony_ci
187762306a36Sopenharmony_ci	__skb_pull(skb, hdrlen);
187862306a36Sopenharmony_ci
187962306a36Sopenharmony_ci	shinfo = skb_shinfo(skb);
188062306a36Sopenharmony_ci	gso_size = shinfo->gso_size ?: skb->len;
188162306a36Sopenharmony_ci	gso_segs = shinfo->gso_segs ?: 1;
188262306a36Sopenharmony_ci
188362306a36Sopenharmony_ci	shinfo = skb_shinfo(tail);
188462306a36Sopenharmony_ci	tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen);
188562306a36Sopenharmony_ci	tail_gso_segs = shinfo->gso_segs ?: 1;
188662306a36Sopenharmony_ci
188762306a36Sopenharmony_ci	if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
188862306a36Sopenharmony_ci		TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
188962306a36Sopenharmony_ci
189062306a36Sopenharmony_ci		if (likely(!before(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))) {
189162306a36Sopenharmony_ci			TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
189262306a36Sopenharmony_ci			thtail->window = th->window;
189362306a36Sopenharmony_ci		}
189462306a36Sopenharmony_ci
189562306a36Sopenharmony_ci		/* We have to update both TCP_SKB_CB(tail)->tcp_flags and
189662306a36Sopenharmony_ci		 * thtail->fin, so that the fast path in tcp_rcv_established()
189762306a36Sopenharmony_ci		 * is not entered if we append a packet with a FIN.
189862306a36Sopenharmony_ci		 * SYN, RST, URG are not present.
189962306a36Sopenharmony_ci		 * ACK is set on both packets.
190062306a36Sopenharmony_ci		 * PSH : we do not really care in TCP stack,
190162306a36Sopenharmony_ci		 *       at least for 'GRO' packets.
190262306a36Sopenharmony_ci		 */
190362306a36Sopenharmony_ci		thtail->fin |= th->fin;
190462306a36Sopenharmony_ci		TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
190562306a36Sopenharmony_ci
190662306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->has_rxtstamp) {
190762306a36Sopenharmony_ci			TCP_SKB_CB(tail)->has_rxtstamp = true;
190862306a36Sopenharmony_ci			tail->tstamp = skb->tstamp;
190962306a36Sopenharmony_ci			skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
191062306a36Sopenharmony_ci		}
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci		/* Not as strict as GRO. We only need to carry mss max value */
191362306a36Sopenharmony_ci		shinfo->gso_size = max(gso_size, tail_gso_size);
191462306a36Sopenharmony_ci		shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF);
191562306a36Sopenharmony_ci
191662306a36Sopenharmony_ci		sk->sk_backlog.len += delta;
191762306a36Sopenharmony_ci		__NET_INC_STATS(sock_net(sk),
191862306a36Sopenharmony_ci				LINUX_MIB_TCPBACKLOGCOALESCE);
191962306a36Sopenharmony_ci		kfree_skb_partial(skb, fragstolen);
192062306a36Sopenharmony_ci		return false;
192162306a36Sopenharmony_ci	}
192262306a36Sopenharmony_ci	__skb_push(skb, hdrlen);
192362306a36Sopenharmony_ci
192462306a36Sopenharmony_cino_coalesce:
192562306a36Sopenharmony_ci	limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
192662306a36Sopenharmony_ci
192762306a36Sopenharmony_ci	/* Only socket owner can try to collapse/prune rx queues
192862306a36Sopenharmony_ci	 * to reduce memory overhead, so add a little headroom here.
192962306a36Sopenharmony_ci	 * Few sockets backlog are possibly concurrently non empty.
193062306a36Sopenharmony_ci	 */
193162306a36Sopenharmony_ci	limit += 64 * 1024;
193262306a36Sopenharmony_ci
193362306a36Sopenharmony_ci	if (unlikely(sk_add_backlog(sk, skb, limit))) {
193462306a36Sopenharmony_ci		bh_unlock_sock(sk);
193562306a36Sopenharmony_ci		*reason = SKB_DROP_REASON_SOCKET_BACKLOG;
193662306a36Sopenharmony_ci		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
193762306a36Sopenharmony_ci		return true;
193862306a36Sopenharmony_ci	}
193962306a36Sopenharmony_ci	return false;
194062306a36Sopenharmony_ci}
194162306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_add_backlog);
194262306a36Sopenharmony_ci
194362306a36Sopenharmony_ciint tcp_filter(struct sock *sk, struct sk_buff *skb)
194462306a36Sopenharmony_ci{
194562306a36Sopenharmony_ci	struct tcphdr *th = (struct tcphdr *)skb->data;
194662306a36Sopenharmony_ci
194762306a36Sopenharmony_ci	return sk_filter_trim_cap(sk, skb, th->doff * 4);
194862306a36Sopenharmony_ci}
194962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_filter);
195062306a36Sopenharmony_ci
195162306a36Sopenharmony_cistatic void tcp_v4_restore_cb(struct sk_buff *skb)
195262306a36Sopenharmony_ci{
195362306a36Sopenharmony_ci	memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
195462306a36Sopenharmony_ci		sizeof(struct inet_skb_parm));
195562306a36Sopenharmony_ci}
195662306a36Sopenharmony_ci
195762306a36Sopenharmony_cistatic void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
195862306a36Sopenharmony_ci			   const struct tcphdr *th)
195962306a36Sopenharmony_ci{
196062306a36Sopenharmony_ci	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
196162306a36Sopenharmony_ci	 * barrier() makes sure compiler wont play fool^Waliasing games.
196262306a36Sopenharmony_ci	 */
196362306a36Sopenharmony_ci	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
196462306a36Sopenharmony_ci		sizeof(struct inet_skb_parm));
196562306a36Sopenharmony_ci	barrier();
196662306a36Sopenharmony_ci
196762306a36Sopenharmony_ci	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
196862306a36Sopenharmony_ci	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
196962306a36Sopenharmony_ci				    skb->len - th->doff * 4);
197062306a36Sopenharmony_ci	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
197162306a36Sopenharmony_ci	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
197262306a36Sopenharmony_ci	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
197362306a36Sopenharmony_ci	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
197462306a36Sopenharmony_ci	TCP_SKB_CB(skb)->sacked	 = 0;
197562306a36Sopenharmony_ci	TCP_SKB_CB(skb)->has_rxtstamp =
197662306a36Sopenharmony_ci			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
197762306a36Sopenharmony_ci}
197862306a36Sopenharmony_ci
197962306a36Sopenharmony_ci/*
198062306a36Sopenharmony_ci *	From tcp_input.c
198162306a36Sopenharmony_ci */
198262306a36Sopenharmony_ci
198362306a36Sopenharmony_ciint tcp_v4_rcv(struct sk_buff *skb)
198462306a36Sopenharmony_ci{
198562306a36Sopenharmony_ci	struct net *net = dev_net(skb->dev);
198662306a36Sopenharmony_ci	enum skb_drop_reason drop_reason;
198762306a36Sopenharmony_ci	int sdif = inet_sdif(skb);
198862306a36Sopenharmony_ci	int dif = inet_iif(skb);
198962306a36Sopenharmony_ci	const struct iphdr *iph;
199062306a36Sopenharmony_ci	const struct tcphdr *th;
199162306a36Sopenharmony_ci	bool refcounted;
199262306a36Sopenharmony_ci	struct sock *sk;
199362306a36Sopenharmony_ci	int ret;
199462306a36Sopenharmony_ci
199562306a36Sopenharmony_ci	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
199662306a36Sopenharmony_ci	if (skb->pkt_type != PACKET_HOST)
199762306a36Sopenharmony_ci		goto discard_it;
199862306a36Sopenharmony_ci
199962306a36Sopenharmony_ci	/* Count it even if it's bad */
200062306a36Sopenharmony_ci	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
200162306a36Sopenharmony_ci
200262306a36Sopenharmony_ci	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
200362306a36Sopenharmony_ci		goto discard_it;
200462306a36Sopenharmony_ci
200562306a36Sopenharmony_ci	th = (const struct tcphdr *)skb->data;
200662306a36Sopenharmony_ci
200762306a36Sopenharmony_ci	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
200862306a36Sopenharmony_ci		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
200962306a36Sopenharmony_ci		goto bad_packet;
201062306a36Sopenharmony_ci	}
201162306a36Sopenharmony_ci	if (!pskb_may_pull(skb, th->doff * 4))
201262306a36Sopenharmony_ci		goto discard_it;
201362306a36Sopenharmony_ci
201462306a36Sopenharmony_ci	/* An explanation is required here, I think.
201562306a36Sopenharmony_ci	 * Packet length and doff are validated by header prediction,
201662306a36Sopenharmony_ci	 * provided case of th->doff==0 is eliminated.
201762306a36Sopenharmony_ci	 * So, we defer the checks. */
201862306a36Sopenharmony_ci
201962306a36Sopenharmony_ci	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
202062306a36Sopenharmony_ci		goto csum_error;
202162306a36Sopenharmony_ci
202262306a36Sopenharmony_ci	th = (const struct tcphdr *)skb->data;
202362306a36Sopenharmony_ci	iph = ip_hdr(skb);
202462306a36Sopenharmony_cilookup:
202562306a36Sopenharmony_ci	sk = __inet_lookup_skb(net->ipv4.tcp_death_row.hashinfo,
202662306a36Sopenharmony_ci			       skb, __tcp_hdrlen(th), th->source,
202762306a36Sopenharmony_ci			       th->dest, sdif, &refcounted);
202862306a36Sopenharmony_ci	if (!sk)
202962306a36Sopenharmony_ci		goto no_tcp_socket;
203062306a36Sopenharmony_ci
203162306a36Sopenharmony_ciprocess:
203262306a36Sopenharmony_ci	if (sk->sk_state == TCP_TIME_WAIT)
203362306a36Sopenharmony_ci		goto do_time_wait;
203462306a36Sopenharmony_ci
203562306a36Sopenharmony_ci	if (sk->sk_state == TCP_NEW_SYN_RECV) {
203662306a36Sopenharmony_ci		struct request_sock *req = inet_reqsk(sk);
203762306a36Sopenharmony_ci		bool req_stolen = false;
203862306a36Sopenharmony_ci		struct sock *nsk;
203962306a36Sopenharmony_ci
204062306a36Sopenharmony_ci		sk = req->rsk_listener;
204162306a36Sopenharmony_ci		if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
204262306a36Sopenharmony_ci			drop_reason = SKB_DROP_REASON_XFRM_POLICY;
204362306a36Sopenharmony_ci		else
204462306a36Sopenharmony_ci			drop_reason = tcp_inbound_md5_hash(sk, skb,
204562306a36Sopenharmony_ci						   &iph->saddr, &iph->daddr,
204662306a36Sopenharmony_ci						   AF_INET, dif, sdif);
204762306a36Sopenharmony_ci		if (unlikely(drop_reason)) {
204862306a36Sopenharmony_ci			sk_drops_add(sk, skb);
204962306a36Sopenharmony_ci			reqsk_put(req);
205062306a36Sopenharmony_ci			goto discard_it;
205162306a36Sopenharmony_ci		}
205262306a36Sopenharmony_ci		if (tcp_checksum_complete(skb)) {
205362306a36Sopenharmony_ci			reqsk_put(req);
205462306a36Sopenharmony_ci			goto csum_error;
205562306a36Sopenharmony_ci		}
205662306a36Sopenharmony_ci		if (unlikely(sk->sk_state != TCP_LISTEN)) {
205762306a36Sopenharmony_ci			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
205862306a36Sopenharmony_ci			if (!nsk) {
205962306a36Sopenharmony_ci				inet_csk_reqsk_queue_drop_and_put(sk, req);
206062306a36Sopenharmony_ci				goto lookup;
206162306a36Sopenharmony_ci			}
206262306a36Sopenharmony_ci			sk = nsk;
206362306a36Sopenharmony_ci			/* reuseport_migrate_sock() has already held one sk_refcnt
206462306a36Sopenharmony_ci			 * before returning.
206562306a36Sopenharmony_ci			 */
206662306a36Sopenharmony_ci		} else {
206762306a36Sopenharmony_ci			/* We own a reference on the listener, increase it again
206862306a36Sopenharmony_ci			 * as we might lose it too soon.
206962306a36Sopenharmony_ci			 */
207062306a36Sopenharmony_ci			sock_hold(sk);
207162306a36Sopenharmony_ci		}
207262306a36Sopenharmony_ci		refcounted = true;
207362306a36Sopenharmony_ci		nsk = NULL;
207462306a36Sopenharmony_ci		if (!tcp_filter(sk, skb)) {
207562306a36Sopenharmony_ci			th = (const struct tcphdr *)skb->data;
207662306a36Sopenharmony_ci			iph = ip_hdr(skb);
207762306a36Sopenharmony_ci			tcp_v4_fill_cb(skb, iph, th);
207862306a36Sopenharmony_ci			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
207962306a36Sopenharmony_ci		} else {
208062306a36Sopenharmony_ci			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
208162306a36Sopenharmony_ci		}
208262306a36Sopenharmony_ci		if (!nsk) {
208362306a36Sopenharmony_ci			reqsk_put(req);
208462306a36Sopenharmony_ci			if (req_stolen) {
208562306a36Sopenharmony_ci				/* Another cpu got exclusive access to req
208662306a36Sopenharmony_ci				 * and created a full blown socket.
208762306a36Sopenharmony_ci				 * Try to feed this packet to this socket
208862306a36Sopenharmony_ci				 * instead of discarding it.
208962306a36Sopenharmony_ci				 */
209062306a36Sopenharmony_ci				tcp_v4_restore_cb(skb);
209162306a36Sopenharmony_ci				sock_put(sk);
209262306a36Sopenharmony_ci				goto lookup;
209362306a36Sopenharmony_ci			}
209462306a36Sopenharmony_ci			goto discard_and_relse;
209562306a36Sopenharmony_ci		}
209662306a36Sopenharmony_ci		nf_reset_ct(skb);
209762306a36Sopenharmony_ci		if (nsk == sk) {
209862306a36Sopenharmony_ci			reqsk_put(req);
209962306a36Sopenharmony_ci			tcp_v4_restore_cb(skb);
210062306a36Sopenharmony_ci		} else if (tcp_child_process(sk, nsk, skb)) {
210162306a36Sopenharmony_ci			tcp_v4_send_reset(nsk, skb);
210262306a36Sopenharmony_ci			goto discard_and_relse;
210362306a36Sopenharmony_ci		} else {
210462306a36Sopenharmony_ci			sock_put(sk);
210562306a36Sopenharmony_ci			return 0;
210662306a36Sopenharmony_ci		}
210762306a36Sopenharmony_ci	}
210862306a36Sopenharmony_ci
210962306a36Sopenharmony_ci	if (static_branch_unlikely(&ip4_min_ttl)) {
211062306a36Sopenharmony_ci		/* min_ttl can be changed concurrently from do_ip_setsockopt() */
211162306a36Sopenharmony_ci		if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
211262306a36Sopenharmony_ci			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
211362306a36Sopenharmony_ci			drop_reason = SKB_DROP_REASON_TCP_MINTTL;
211462306a36Sopenharmony_ci			goto discard_and_relse;
211562306a36Sopenharmony_ci		}
211662306a36Sopenharmony_ci	}
211762306a36Sopenharmony_ci
211862306a36Sopenharmony_ci	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
211962306a36Sopenharmony_ci		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
212062306a36Sopenharmony_ci		goto discard_and_relse;
212162306a36Sopenharmony_ci	}
212262306a36Sopenharmony_ci
212362306a36Sopenharmony_ci	drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr,
212462306a36Sopenharmony_ci					   &iph->daddr, AF_INET, dif, sdif);
212562306a36Sopenharmony_ci	if (drop_reason)
212662306a36Sopenharmony_ci		goto discard_and_relse;
212762306a36Sopenharmony_ci
212862306a36Sopenharmony_ci	nf_reset_ct(skb);
212962306a36Sopenharmony_ci
213062306a36Sopenharmony_ci	if (tcp_filter(sk, skb)) {
213162306a36Sopenharmony_ci		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
213262306a36Sopenharmony_ci		goto discard_and_relse;
213362306a36Sopenharmony_ci	}
213462306a36Sopenharmony_ci	th = (const struct tcphdr *)skb->data;
213562306a36Sopenharmony_ci	iph = ip_hdr(skb);
213662306a36Sopenharmony_ci	tcp_v4_fill_cb(skb, iph, th);
213762306a36Sopenharmony_ci
213862306a36Sopenharmony_ci	skb->dev = NULL;
213962306a36Sopenharmony_ci
214062306a36Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN) {
214162306a36Sopenharmony_ci		ret = tcp_v4_do_rcv(sk, skb);
214262306a36Sopenharmony_ci		goto put_and_return;
214362306a36Sopenharmony_ci	}
214462306a36Sopenharmony_ci
214562306a36Sopenharmony_ci	sk_incoming_cpu_update(sk);
214662306a36Sopenharmony_ci
214762306a36Sopenharmony_ci	bh_lock_sock_nested(sk);
214862306a36Sopenharmony_ci	tcp_segs_in(tcp_sk(sk), skb);
214962306a36Sopenharmony_ci	ret = 0;
215062306a36Sopenharmony_ci	if (!sock_owned_by_user(sk)) {
215162306a36Sopenharmony_ci		ret = tcp_v4_do_rcv(sk, skb);
215262306a36Sopenharmony_ci	} else {
215362306a36Sopenharmony_ci		if (tcp_add_backlog(sk, skb, &drop_reason))
215462306a36Sopenharmony_ci			goto discard_and_relse;
215562306a36Sopenharmony_ci	}
215662306a36Sopenharmony_ci	bh_unlock_sock(sk);
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_ciput_and_return:
215962306a36Sopenharmony_ci	if (refcounted)
216062306a36Sopenharmony_ci		sock_put(sk);
216162306a36Sopenharmony_ci
216262306a36Sopenharmony_ci	return ret;
216362306a36Sopenharmony_ci
216462306a36Sopenharmony_cino_tcp_socket:
216562306a36Sopenharmony_ci	drop_reason = SKB_DROP_REASON_NO_SOCKET;
216662306a36Sopenharmony_ci	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
216762306a36Sopenharmony_ci		goto discard_it;
216862306a36Sopenharmony_ci
216962306a36Sopenharmony_ci	tcp_v4_fill_cb(skb, iph, th);
217062306a36Sopenharmony_ci
217162306a36Sopenharmony_ci	if (tcp_checksum_complete(skb)) {
217262306a36Sopenharmony_cicsum_error:
217362306a36Sopenharmony_ci		drop_reason = SKB_DROP_REASON_TCP_CSUM;
217462306a36Sopenharmony_ci		trace_tcp_bad_csum(skb);
217562306a36Sopenharmony_ci		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
217662306a36Sopenharmony_cibad_packet:
217762306a36Sopenharmony_ci		__TCP_INC_STATS(net, TCP_MIB_INERRS);
217862306a36Sopenharmony_ci	} else {
217962306a36Sopenharmony_ci		tcp_v4_send_reset(NULL, skb);
218062306a36Sopenharmony_ci	}
218162306a36Sopenharmony_ci
218262306a36Sopenharmony_cidiscard_it:
218362306a36Sopenharmony_ci	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
218462306a36Sopenharmony_ci	/* Discard frame. */
218562306a36Sopenharmony_ci	kfree_skb_reason(skb, drop_reason);
218662306a36Sopenharmony_ci	return 0;
218762306a36Sopenharmony_ci
218862306a36Sopenharmony_cidiscard_and_relse:
218962306a36Sopenharmony_ci	sk_drops_add(sk, skb);
219062306a36Sopenharmony_ci	if (refcounted)
219162306a36Sopenharmony_ci		sock_put(sk);
219262306a36Sopenharmony_ci	goto discard_it;
219362306a36Sopenharmony_ci
219462306a36Sopenharmony_cido_time_wait:
219562306a36Sopenharmony_ci	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
219662306a36Sopenharmony_ci		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
219762306a36Sopenharmony_ci		inet_twsk_put(inet_twsk(sk));
219862306a36Sopenharmony_ci		goto discard_it;
219962306a36Sopenharmony_ci	}
220062306a36Sopenharmony_ci
220162306a36Sopenharmony_ci	tcp_v4_fill_cb(skb, iph, th);
220262306a36Sopenharmony_ci
220362306a36Sopenharmony_ci	if (tcp_checksum_complete(skb)) {
220462306a36Sopenharmony_ci		inet_twsk_put(inet_twsk(sk));
220562306a36Sopenharmony_ci		goto csum_error;
220662306a36Sopenharmony_ci	}
220762306a36Sopenharmony_ci	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
220862306a36Sopenharmony_ci	case TCP_TW_SYN: {
220962306a36Sopenharmony_ci		struct sock *sk2 = inet_lookup_listener(net,
221062306a36Sopenharmony_ci							net->ipv4.tcp_death_row.hashinfo,
221162306a36Sopenharmony_ci							skb, __tcp_hdrlen(th),
221262306a36Sopenharmony_ci							iph->saddr, th->source,
221362306a36Sopenharmony_ci							iph->daddr, th->dest,
221462306a36Sopenharmony_ci							inet_iif(skb),
221562306a36Sopenharmony_ci							sdif);
221662306a36Sopenharmony_ci		if (sk2) {
221762306a36Sopenharmony_ci			inet_twsk_deschedule_put(inet_twsk(sk));
221862306a36Sopenharmony_ci			sk = sk2;
221962306a36Sopenharmony_ci			tcp_v4_restore_cb(skb);
222062306a36Sopenharmony_ci			refcounted = false;
222162306a36Sopenharmony_ci			goto process;
222262306a36Sopenharmony_ci		}
222362306a36Sopenharmony_ci	}
222462306a36Sopenharmony_ci		/* to ACK */
222562306a36Sopenharmony_ci		fallthrough;
222662306a36Sopenharmony_ci	case TCP_TW_ACK:
222762306a36Sopenharmony_ci		tcp_v4_timewait_ack(sk, skb);
222862306a36Sopenharmony_ci		break;
222962306a36Sopenharmony_ci	case TCP_TW_RST:
223062306a36Sopenharmony_ci		tcp_v4_send_reset(sk, skb);
223162306a36Sopenharmony_ci		inet_twsk_deschedule_put(inet_twsk(sk));
223262306a36Sopenharmony_ci		goto discard_it;
223362306a36Sopenharmony_ci	case TCP_TW_SUCCESS:;
223462306a36Sopenharmony_ci	}
223562306a36Sopenharmony_ci	goto discard_it;
223662306a36Sopenharmony_ci}
223762306a36Sopenharmony_ci
223862306a36Sopenharmony_cistatic struct timewait_sock_ops tcp_timewait_sock_ops = {
223962306a36Sopenharmony_ci	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
224062306a36Sopenharmony_ci	.twsk_unique	= tcp_twsk_unique,
224162306a36Sopenharmony_ci	.twsk_destructor= tcp_twsk_destructor,
224262306a36Sopenharmony_ci};
224362306a36Sopenharmony_ci
224462306a36Sopenharmony_civoid inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
224562306a36Sopenharmony_ci{
224662306a36Sopenharmony_ci	struct dst_entry *dst = skb_dst(skb);
224762306a36Sopenharmony_ci
224862306a36Sopenharmony_ci	if (dst && dst_hold_safe(dst)) {
224962306a36Sopenharmony_ci		rcu_assign_pointer(sk->sk_rx_dst, dst);
225062306a36Sopenharmony_ci		sk->sk_rx_dst_ifindex = skb->skb_iif;
225162306a36Sopenharmony_ci	}
225262306a36Sopenharmony_ci}
225362306a36Sopenharmony_ciEXPORT_SYMBOL(inet_sk_rx_dst_set);
225462306a36Sopenharmony_ci
225562306a36Sopenharmony_ciconst struct inet_connection_sock_af_ops ipv4_specific = {
225662306a36Sopenharmony_ci	.queue_xmit	   = ip_queue_xmit,
225762306a36Sopenharmony_ci	.send_check	   = tcp_v4_send_check,
225862306a36Sopenharmony_ci	.rebuild_header	   = inet_sk_rebuild_header,
225962306a36Sopenharmony_ci	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
226062306a36Sopenharmony_ci	.conn_request	   = tcp_v4_conn_request,
226162306a36Sopenharmony_ci	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
226262306a36Sopenharmony_ci	.net_header_len	   = sizeof(struct iphdr),
226362306a36Sopenharmony_ci	.setsockopt	   = ip_setsockopt,
226462306a36Sopenharmony_ci	.getsockopt	   = ip_getsockopt,
226562306a36Sopenharmony_ci	.addr2sockaddr	   = inet_csk_addr2sockaddr,
226662306a36Sopenharmony_ci	.sockaddr_len	   = sizeof(struct sockaddr_in),
226762306a36Sopenharmony_ci	.mtu_reduced	   = tcp_v4_mtu_reduced,
226862306a36Sopenharmony_ci};
226962306a36Sopenharmony_ciEXPORT_SYMBOL(ipv4_specific);
227062306a36Sopenharmony_ci
227162306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
227262306a36Sopenharmony_cistatic const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
227362306a36Sopenharmony_ci	.md5_lookup		= tcp_v4_md5_lookup,
227462306a36Sopenharmony_ci	.calc_md5_hash		= tcp_v4_md5_hash_skb,
227562306a36Sopenharmony_ci	.md5_parse		= tcp_v4_parse_md5_keys,
227662306a36Sopenharmony_ci};
227762306a36Sopenharmony_ci#endif
227862306a36Sopenharmony_ci
227962306a36Sopenharmony_ci/* NOTE: A lot of things set to zero explicitly by call to
228062306a36Sopenharmony_ci *       sk_alloc() so need not be done here.
228162306a36Sopenharmony_ci */
228262306a36Sopenharmony_cistatic int tcp_v4_init_sock(struct sock *sk)
228362306a36Sopenharmony_ci{
228462306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
228562306a36Sopenharmony_ci
228662306a36Sopenharmony_ci	tcp_init_sock(sk);
228762306a36Sopenharmony_ci
228862306a36Sopenharmony_ci	icsk->icsk_af_ops = &ipv4_specific;
228962306a36Sopenharmony_ci
229062306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
229162306a36Sopenharmony_ci	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
229262306a36Sopenharmony_ci#endif
229362306a36Sopenharmony_ci
229462306a36Sopenharmony_ci	return 0;
229562306a36Sopenharmony_ci}
229662306a36Sopenharmony_ci
229762306a36Sopenharmony_civoid tcp_v4_destroy_sock(struct sock *sk)
229862306a36Sopenharmony_ci{
229962306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
230062306a36Sopenharmony_ci
230162306a36Sopenharmony_ci	trace_tcp_destroy_sock(sk);
230262306a36Sopenharmony_ci
230362306a36Sopenharmony_ci	tcp_clear_xmit_timers(sk);
230462306a36Sopenharmony_ci
230562306a36Sopenharmony_ci	tcp_cleanup_congestion_control(sk);
230662306a36Sopenharmony_ci
230762306a36Sopenharmony_ci	tcp_cleanup_ulp(sk);
230862306a36Sopenharmony_ci
230962306a36Sopenharmony_ci	/* Cleanup up the write buffer. */
231062306a36Sopenharmony_ci	tcp_write_queue_purge(sk);
231162306a36Sopenharmony_ci
231262306a36Sopenharmony_ci	/* Check if we want to disable active TFO */
231362306a36Sopenharmony_ci	tcp_fastopen_active_disable_ofo_check(sk);
231462306a36Sopenharmony_ci
231562306a36Sopenharmony_ci	/* Cleans up our, hopefully empty, out_of_order_queue. */
231662306a36Sopenharmony_ci	skb_rbtree_purge(&tp->out_of_order_queue);
231762306a36Sopenharmony_ci
231862306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
231962306a36Sopenharmony_ci	/* Clean up the MD5 key list, if any */
232062306a36Sopenharmony_ci	if (tp->md5sig_info) {
232162306a36Sopenharmony_ci		tcp_clear_md5_list(sk);
232262306a36Sopenharmony_ci		kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
232362306a36Sopenharmony_ci		tp->md5sig_info = NULL;
232462306a36Sopenharmony_ci		static_branch_slow_dec_deferred(&tcp_md5_needed);
232562306a36Sopenharmony_ci	}
232662306a36Sopenharmony_ci#endif
232762306a36Sopenharmony_ci
232862306a36Sopenharmony_ci	/* Clean up a referenced TCP bind bucket. */
232962306a36Sopenharmony_ci	if (inet_csk(sk)->icsk_bind_hash)
233062306a36Sopenharmony_ci		inet_put_port(sk);
233162306a36Sopenharmony_ci
233262306a36Sopenharmony_ci	BUG_ON(rcu_access_pointer(tp->fastopen_rsk));
233362306a36Sopenharmony_ci
233462306a36Sopenharmony_ci	/* If socket is aborted during connect operation */
233562306a36Sopenharmony_ci	tcp_free_fastopen_req(tp);
233662306a36Sopenharmony_ci	tcp_fastopen_destroy_cipher(sk);
233762306a36Sopenharmony_ci	tcp_saved_syn_free(tp);
233862306a36Sopenharmony_ci
233962306a36Sopenharmony_ci	sk_sockets_allocated_dec(sk);
234062306a36Sopenharmony_ci}
234162306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_destroy_sock);
234262306a36Sopenharmony_ci
234362306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS
234462306a36Sopenharmony_ci/* Proc filesystem TCP sock list dumping. */
234562306a36Sopenharmony_ci
234662306a36Sopenharmony_cistatic unsigned short seq_file_family(const struct seq_file *seq);
234762306a36Sopenharmony_ci
234862306a36Sopenharmony_cistatic bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
234962306a36Sopenharmony_ci{
235062306a36Sopenharmony_ci	unsigned short family = seq_file_family(seq);
235162306a36Sopenharmony_ci
235262306a36Sopenharmony_ci	/* AF_UNSPEC is used as a match all */
235362306a36Sopenharmony_ci	return ((family == AF_UNSPEC || family == sk->sk_family) &&
235462306a36Sopenharmony_ci		net_eq(sock_net(sk), seq_file_net(seq)));
235562306a36Sopenharmony_ci}
235662306a36Sopenharmony_ci
235762306a36Sopenharmony_ci/* Find a non empty bucket (starting from st->bucket)
235862306a36Sopenharmony_ci * and return the first sk from it.
235962306a36Sopenharmony_ci */
236062306a36Sopenharmony_cistatic void *listening_get_first(struct seq_file *seq)
236162306a36Sopenharmony_ci{
236262306a36Sopenharmony_ci	struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
236362306a36Sopenharmony_ci	struct tcp_iter_state *st = seq->private;
236462306a36Sopenharmony_ci
236562306a36Sopenharmony_ci	st->offset = 0;
236662306a36Sopenharmony_ci	for (; st->bucket <= hinfo->lhash2_mask; st->bucket++) {
236762306a36Sopenharmony_ci		struct inet_listen_hashbucket *ilb2;
236862306a36Sopenharmony_ci		struct hlist_nulls_node *node;
236962306a36Sopenharmony_ci		struct sock *sk;
237062306a36Sopenharmony_ci
237162306a36Sopenharmony_ci		ilb2 = &hinfo->lhash2[st->bucket];
237262306a36Sopenharmony_ci		if (hlist_nulls_empty(&ilb2->nulls_head))
237362306a36Sopenharmony_ci			continue;
237462306a36Sopenharmony_ci
237562306a36Sopenharmony_ci		spin_lock(&ilb2->lock);
237662306a36Sopenharmony_ci		sk_nulls_for_each(sk, node, &ilb2->nulls_head) {
237762306a36Sopenharmony_ci			if (seq_sk_match(seq, sk))
237862306a36Sopenharmony_ci				return sk;
237962306a36Sopenharmony_ci		}
238062306a36Sopenharmony_ci		spin_unlock(&ilb2->lock);
238162306a36Sopenharmony_ci	}
238262306a36Sopenharmony_ci
238362306a36Sopenharmony_ci	return NULL;
238462306a36Sopenharmony_ci}
238562306a36Sopenharmony_ci
238662306a36Sopenharmony_ci/* Find the next sk of "cur" within the same bucket (i.e. st->bucket).
238762306a36Sopenharmony_ci * If "cur" is the last one in the st->bucket,
238862306a36Sopenharmony_ci * call listening_get_first() to return the first sk of the next
238962306a36Sopenharmony_ci * non empty bucket.
239062306a36Sopenharmony_ci */
239162306a36Sopenharmony_cistatic void *listening_get_next(struct seq_file *seq, void *cur)
239262306a36Sopenharmony_ci{
239362306a36Sopenharmony_ci	struct tcp_iter_state *st = seq->private;
239462306a36Sopenharmony_ci	struct inet_listen_hashbucket *ilb2;
239562306a36Sopenharmony_ci	struct hlist_nulls_node *node;
239662306a36Sopenharmony_ci	struct inet_hashinfo *hinfo;
239762306a36Sopenharmony_ci	struct sock *sk = cur;
239862306a36Sopenharmony_ci
239962306a36Sopenharmony_ci	++st->num;
240062306a36Sopenharmony_ci	++st->offset;
240162306a36Sopenharmony_ci
240262306a36Sopenharmony_ci	sk = sk_nulls_next(sk);
240362306a36Sopenharmony_ci	sk_nulls_for_each_from(sk, node) {
240462306a36Sopenharmony_ci		if (seq_sk_match(seq, sk))
240562306a36Sopenharmony_ci			return sk;
240662306a36Sopenharmony_ci	}
240762306a36Sopenharmony_ci
240862306a36Sopenharmony_ci	hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
240962306a36Sopenharmony_ci	ilb2 = &hinfo->lhash2[st->bucket];
241062306a36Sopenharmony_ci	spin_unlock(&ilb2->lock);
241162306a36Sopenharmony_ci	++st->bucket;
241262306a36Sopenharmony_ci	return listening_get_first(seq);
241362306a36Sopenharmony_ci}
241462306a36Sopenharmony_ci
241562306a36Sopenharmony_cistatic void *listening_get_idx(struct seq_file *seq, loff_t *pos)
241662306a36Sopenharmony_ci{
241762306a36Sopenharmony_ci	struct tcp_iter_state *st = seq->private;
241862306a36Sopenharmony_ci	void *rc;
241962306a36Sopenharmony_ci
242062306a36Sopenharmony_ci	st->bucket = 0;
242162306a36Sopenharmony_ci	st->offset = 0;
242262306a36Sopenharmony_ci	rc = listening_get_first(seq);
242362306a36Sopenharmony_ci
242462306a36Sopenharmony_ci	while (rc && *pos) {
242562306a36Sopenharmony_ci		rc = listening_get_next(seq, rc);
242662306a36Sopenharmony_ci		--*pos;
242762306a36Sopenharmony_ci	}
242862306a36Sopenharmony_ci	return rc;
242962306a36Sopenharmony_ci}
243062306a36Sopenharmony_ci
243162306a36Sopenharmony_cistatic inline bool empty_bucket(struct inet_hashinfo *hinfo,
243262306a36Sopenharmony_ci				const struct tcp_iter_state *st)
243362306a36Sopenharmony_ci{
243462306a36Sopenharmony_ci	return hlist_nulls_empty(&hinfo->ehash[st->bucket].chain);
243562306a36Sopenharmony_ci}
243662306a36Sopenharmony_ci
243762306a36Sopenharmony_ci/*
243862306a36Sopenharmony_ci * Get first established socket starting from bucket given in st->bucket.
243962306a36Sopenharmony_ci * If st->bucket is zero, the very first socket in the hash is returned.
244062306a36Sopenharmony_ci */
244162306a36Sopenharmony_cistatic void *established_get_first(struct seq_file *seq)
244262306a36Sopenharmony_ci{
244362306a36Sopenharmony_ci	struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
244462306a36Sopenharmony_ci	struct tcp_iter_state *st = seq->private;
244562306a36Sopenharmony_ci
244662306a36Sopenharmony_ci	st->offset = 0;
244762306a36Sopenharmony_ci	for (; st->bucket <= hinfo->ehash_mask; ++st->bucket) {
244862306a36Sopenharmony_ci		struct sock *sk;
244962306a36Sopenharmony_ci		struct hlist_nulls_node *node;
245062306a36Sopenharmony_ci		spinlock_t *lock = inet_ehash_lockp(hinfo, st->bucket);
245162306a36Sopenharmony_ci
245262306a36Sopenharmony_ci		cond_resched();
245362306a36Sopenharmony_ci
245462306a36Sopenharmony_ci		/* Lockless fast path for the common case of empty buckets */
245562306a36Sopenharmony_ci		if (empty_bucket(hinfo, st))
245662306a36Sopenharmony_ci			continue;
245762306a36Sopenharmony_ci
245862306a36Sopenharmony_ci		spin_lock_bh(lock);
245962306a36Sopenharmony_ci		sk_nulls_for_each(sk, node, &hinfo->ehash[st->bucket].chain) {
246062306a36Sopenharmony_ci			if (seq_sk_match(seq, sk))
246162306a36Sopenharmony_ci				return sk;
246262306a36Sopenharmony_ci		}
246362306a36Sopenharmony_ci		spin_unlock_bh(lock);
246462306a36Sopenharmony_ci	}
246562306a36Sopenharmony_ci
246662306a36Sopenharmony_ci	return NULL;
246762306a36Sopenharmony_ci}
246862306a36Sopenharmony_ci
246962306a36Sopenharmony_cistatic void *established_get_next(struct seq_file *seq, void *cur)
247062306a36Sopenharmony_ci{
247162306a36Sopenharmony_ci	struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
247262306a36Sopenharmony_ci	struct tcp_iter_state *st = seq->private;
247362306a36Sopenharmony_ci	struct hlist_nulls_node *node;
247462306a36Sopenharmony_ci	struct sock *sk = cur;
247562306a36Sopenharmony_ci
247662306a36Sopenharmony_ci	++st->num;
247762306a36Sopenharmony_ci	++st->offset;
247862306a36Sopenharmony_ci
247962306a36Sopenharmony_ci	sk = sk_nulls_next(sk);
248062306a36Sopenharmony_ci
248162306a36Sopenharmony_ci	sk_nulls_for_each_from(sk, node) {
248262306a36Sopenharmony_ci		if (seq_sk_match(seq, sk))
248362306a36Sopenharmony_ci			return sk;
248462306a36Sopenharmony_ci	}
248562306a36Sopenharmony_ci
248662306a36Sopenharmony_ci	spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
248762306a36Sopenharmony_ci	++st->bucket;
248862306a36Sopenharmony_ci	return established_get_first(seq);
248962306a36Sopenharmony_ci}
249062306a36Sopenharmony_ci
249162306a36Sopenharmony_cistatic void *established_get_idx(struct seq_file *seq, loff_t pos)
249262306a36Sopenharmony_ci{
249362306a36Sopenharmony_ci	struct tcp_iter_state *st = seq->private;
249462306a36Sopenharmony_ci	void *rc;
249562306a36Sopenharmony_ci
249662306a36Sopenharmony_ci	st->bucket = 0;
249762306a36Sopenharmony_ci	rc = established_get_first(seq);
249862306a36Sopenharmony_ci
249962306a36Sopenharmony_ci	while (rc && pos) {
250062306a36Sopenharmony_ci		rc = established_get_next(seq, rc);
250162306a36Sopenharmony_ci		--pos;
250262306a36Sopenharmony_ci	}
250362306a36Sopenharmony_ci	return rc;
250462306a36Sopenharmony_ci}
250562306a36Sopenharmony_ci
250662306a36Sopenharmony_cistatic void *tcp_get_idx(struct seq_file *seq, loff_t pos)
250762306a36Sopenharmony_ci{
250862306a36Sopenharmony_ci	void *rc;
250962306a36Sopenharmony_ci	struct tcp_iter_state *st = seq->private;
251062306a36Sopenharmony_ci
251162306a36Sopenharmony_ci	st->state = TCP_SEQ_STATE_LISTENING;
251262306a36Sopenharmony_ci	rc	  = listening_get_idx(seq, &pos);
251362306a36Sopenharmony_ci
251462306a36Sopenharmony_ci	if (!rc) {
251562306a36Sopenharmony_ci		st->state = TCP_SEQ_STATE_ESTABLISHED;
251662306a36Sopenharmony_ci		rc	  = established_get_idx(seq, pos);
251762306a36Sopenharmony_ci	}
251862306a36Sopenharmony_ci
251962306a36Sopenharmony_ci	return rc;
252062306a36Sopenharmony_ci}
252162306a36Sopenharmony_ci
252262306a36Sopenharmony_cistatic void *tcp_seek_last_pos(struct seq_file *seq)
252362306a36Sopenharmony_ci{
252462306a36Sopenharmony_ci	struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
252562306a36Sopenharmony_ci	struct tcp_iter_state *st = seq->private;
252662306a36Sopenharmony_ci	int bucket = st->bucket;
252762306a36Sopenharmony_ci	int offset = st->offset;
252862306a36Sopenharmony_ci	int orig_num = st->num;
252962306a36Sopenharmony_ci	void *rc = NULL;
253062306a36Sopenharmony_ci
253162306a36Sopenharmony_ci	switch (st->state) {
253262306a36Sopenharmony_ci	case TCP_SEQ_STATE_LISTENING:
253362306a36Sopenharmony_ci		if (st->bucket > hinfo->lhash2_mask)
253462306a36Sopenharmony_ci			break;
253562306a36Sopenharmony_ci		rc = listening_get_first(seq);
253662306a36Sopenharmony_ci		while (offset-- && rc && bucket == st->bucket)
253762306a36Sopenharmony_ci			rc = listening_get_next(seq, rc);
253862306a36Sopenharmony_ci		if (rc)
253962306a36Sopenharmony_ci			break;
254062306a36Sopenharmony_ci		st->bucket = 0;
254162306a36Sopenharmony_ci		st->state = TCP_SEQ_STATE_ESTABLISHED;
254262306a36Sopenharmony_ci		fallthrough;
254362306a36Sopenharmony_ci	case TCP_SEQ_STATE_ESTABLISHED:
254462306a36Sopenharmony_ci		if (st->bucket > hinfo->ehash_mask)
254562306a36Sopenharmony_ci			break;
254662306a36Sopenharmony_ci		rc = established_get_first(seq);
254762306a36Sopenharmony_ci		while (offset-- && rc && bucket == st->bucket)
254862306a36Sopenharmony_ci			rc = established_get_next(seq, rc);
254962306a36Sopenharmony_ci	}
255062306a36Sopenharmony_ci
255162306a36Sopenharmony_ci	st->num = orig_num;
255262306a36Sopenharmony_ci
255362306a36Sopenharmony_ci	return rc;
255462306a36Sopenharmony_ci}
255562306a36Sopenharmony_ci
255662306a36Sopenharmony_civoid *tcp_seq_start(struct seq_file *seq, loff_t *pos)
255762306a36Sopenharmony_ci{
255862306a36Sopenharmony_ci	struct tcp_iter_state *st = seq->private;
255962306a36Sopenharmony_ci	void *rc;
256062306a36Sopenharmony_ci
256162306a36Sopenharmony_ci	if (*pos && *pos == st->last_pos) {
256262306a36Sopenharmony_ci		rc = tcp_seek_last_pos(seq);
256362306a36Sopenharmony_ci		if (rc)
256462306a36Sopenharmony_ci			goto out;
256562306a36Sopenharmony_ci	}
256662306a36Sopenharmony_ci
256762306a36Sopenharmony_ci	st->state = TCP_SEQ_STATE_LISTENING;
256862306a36Sopenharmony_ci	st->num = 0;
256962306a36Sopenharmony_ci	st->bucket = 0;
257062306a36Sopenharmony_ci	st->offset = 0;
257162306a36Sopenharmony_ci	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
257262306a36Sopenharmony_ci
257362306a36Sopenharmony_ciout:
257462306a36Sopenharmony_ci	st->last_pos = *pos;
257562306a36Sopenharmony_ci	return rc;
257662306a36Sopenharmony_ci}
257762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_seq_start);
257862306a36Sopenharmony_ci
257962306a36Sopenharmony_civoid *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
258062306a36Sopenharmony_ci{
258162306a36Sopenharmony_ci	struct tcp_iter_state *st = seq->private;
258262306a36Sopenharmony_ci	void *rc = NULL;
258362306a36Sopenharmony_ci
258462306a36Sopenharmony_ci	if (v == SEQ_START_TOKEN) {
258562306a36Sopenharmony_ci		rc = tcp_get_idx(seq, 0);
258662306a36Sopenharmony_ci		goto out;
258762306a36Sopenharmony_ci	}
258862306a36Sopenharmony_ci
258962306a36Sopenharmony_ci	switch (st->state) {
259062306a36Sopenharmony_ci	case TCP_SEQ_STATE_LISTENING:
259162306a36Sopenharmony_ci		rc = listening_get_next(seq, v);
259262306a36Sopenharmony_ci		if (!rc) {
259362306a36Sopenharmony_ci			st->state = TCP_SEQ_STATE_ESTABLISHED;
259462306a36Sopenharmony_ci			st->bucket = 0;
259562306a36Sopenharmony_ci			st->offset = 0;
259662306a36Sopenharmony_ci			rc	  = established_get_first(seq);
259762306a36Sopenharmony_ci		}
259862306a36Sopenharmony_ci		break;
259962306a36Sopenharmony_ci	case TCP_SEQ_STATE_ESTABLISHED:
260062306a36Sopenharmony_ci		rc = established_get_next(seq, v);
260162306a36Sopenharmony_ci		break;
260262306a36Sopenharmony_ci	}
260362306a36Sopenharmony_ciout:
260462306a36Sopenharmony_ci	++*pos;
260562306a36Sopenharmony_ci	st->last_pos = *pos;
260662306a36Sopenharmony_ci	return rc;
260762306a36Sopenharmony_ci}
260862306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_seq_next);
260962306a36Sopenharmony_ci
261062306a36Sopenharmony_civoid tcp_seq_stop(struct seq_file *seq, void *v)
261162306a36Sopenharmony_ci{
261262306a36Sopenharmony_ci	struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
261362306a36Sopenharmony_ci	struct tcp_iter_state *st = seq->private;
261462306a36Sopenharmony_ci
261562306a36Sopenharmony_ci	switch (st->state) {
261662306a36Sopenharmony_ci	case TCP_SEQ_STATE_LISTENING:
261762306a36Sopenharmony_ci		if (v != SEQ_START_TOKEN)
261862306a36Sopenharmony_ci			spin_unlock(&hinfo->lhash2[st->bucket].lock);
261962306a36Sopenharmony_ci		break;
262062306a36Sopenharmony_ci	case TCP_SEQ_STATE_ESTABLISHED:
262162306a36Sopenharmony_ci		if (v)
262262306a36Sopenharmony_ci			spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
262362306a36Sopenharmony_ci		break;
262462306a36Sopenharmony_ci	}
262562306a36Sopenharmony_ci}
262662306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_seq_stop);
262762306a36Sopenharmony_ci
262862306a36Sopenharmony_cistatic void get_openreq4(const struct request_sock *req,
262962306a36Sopenharmony_ci			 struct seq_file *f, int i)
263062306a36Sopenharmony_ci{
263162306a36Sopenharmony_ci	const struct inet_request_sock *ireq = inet_rsk(req);
263262306a36Sopenharmony_ci	long delta = req->rsk_timer.expires - jiffies;
263362306a36Sopenharmony_ci
263462306a36Sopenharmony_ci	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
263562306a36Sopenharmony_ci		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
263662306a36Sopenharmony_ci		i,
263762306a36Sopenharmony_ci		ireq->ir_loc_addr,
263862306a36Sopenharmony_ci		ireq->ir_num,
263962306a36Sopenharmony_ci		ireq->ir_rmt_addr,
264062306a36Sopenharmony_ci		ntohs(ireq->ir_rmt_port),
264162306a36Sopenharmony_ci		TCP_SYN_RECV,
264262306a36Sopenharmony_ci		0, 0, /* could print option size, but that is af dependent. */
264362306a36Sopenharmony_ci		1,    /* timers active (only the expire timer) */
264462306a36Sopenharmony_ci		jiffies_delta_to_clock_t(delta),
264562306a36Sopenharmony_ci		req->num_timeout,
264662306a36Sopenharmony_ci		from_kuid_munged(seq_user_ns(f),
264762306a36Sopenharmony_ci				 sock_i_uid(req->rsk_listener)),
264862306a36Sopenharmony_ci		0,  /* non standard timer */
264962306a36Sopenharmony_ci		0, /* open_requests have no inode */
265062306a36Sopenharmony_ci		0,
265162306a36Sopenharmony_ci		req);
265262306a36Sopenharmony_ci}
265362306a36Sopenharmony_ci
265462306a36Sopenharmony_cistatic void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
265562306a36Sopenharmony_ci{
265662306a36Sopenharmony_ci	int timer_active;
265762306a36Sopenharmony_ci	unsigned long timer_expires;
265862306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
265962306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
266062306a36Sopenharmony_ci	const struct inet_sock *inet = inet_sk(sk);
266162306a36Sopenharmony_ci	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
266262306a36Sopenharmony_ci	__be32 dest = inet->inet_daddr;
266362306a36Sopenharmony_ci	__be32 src = inet->inet_rcv_saddr;
266462306a36Sopenharmony_ci	__u16 destp = ntohs(inet->inet_dport);
266562306a36Sopenharmony_ci	__u16 srcp = ntohs(inet->inet_sport);
266662306a36Sopenharmony_ci	int rx_queue;
266762306a36Sopenharmony_ci	int state;
266862306a36Sopenharmony_ci
266962306a36Sopenharmony_ci	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
267062306a36Sopenharmony_ci	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
267162306a36Sopenharmony_ci	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
267262306a36Sopenharmony_ci		timer_active	= 1;
267362306a36Sopenharmony_ci		timer_expires	= icsk->icsk_timeout;
267462306a36Sopenharmony_ci	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
267562306a36Sopenharmony_ci		timer_active	= 4;
267662306a36Sopenharmony_ci		timer_expires	= icsk->icsk_timeout;
267762306a36Sopenharmony_ci	} else if (timer_pending(&sk->sk_timer)) {
267862306a36Sopenharmony_ci		timer_active	= 2;
267962306a36Sopenharmony_ci		timer_expires	= sk->sk_timer.expires;
268062306a36Sopenharmony_ci	} else {
268162306a36Sopenharmony_ci		timer_active	= 0;
268262306a36Sopenharmony_ci		timer_expires = jiffies;
268362306a36Sopenharmony_ci	}
268462306a36Sopenharmony_ci
268562306a36Sopenharmony_ci	state = inet_sk_state_load(sk);
268662306a36Sopenharmony_ci	if (state == TCP_LISTEN)
268762306a36Sopenharmony_ci		rx_queue = READ_ONCE(sk->sk_ack_backlog);
268862306a36Sopenharmony_ci	else
268962306a36Sopenharmony_ci		/* Because we don't lock the socket,
269062306a36Sopenharmony_ci		 * we might find a transient negative value.
269162306a36Sopenharmony_ci		 */
269262306a36Sopenharmony_ci		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
269362306a36Sopenharmony_ci				      READ_ONCE(tp->copied_seq), 0);
269462306a36Sopenharmony_ci
269562306a36Sopenharmony_ci	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
269662306a36Sopenharmony_ci			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
269762306a36Sopenharmony_ci		i, src, srcp, dest, destp, state,
269862306a36Sopenharmony_ci		READ_ONCE(tp->write_seq) - tp->snd_una,
269962306a36Sopenharmony_ci		rx_queue,
270062306a36Sopenharmony_ci		timer_active,
270162306a36Sopenharmony_ci		jiffies_delta_to_clock_t(timer_expires - jiffies),
270262306a36Sopenharmony_ci		icsk->icsk_retransmits,
270362306a36Sopenharmony_ci		from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
270462306a36Sopenharmony_ci		icsk->icsk_probes_out,
270562306a36Sopenharmony_ci		sock_i_ino(sk),
270662306a36Sopenharmony_ci		refcount_read(&sk->sk_refcnt), sk,
270762306a36Sopenharmony_ci		jiffies_to_clock_t(icsk->icsk_rto),
270862306a36Sopenharmony_ci		jiffies_to_clock_t(icsk->icsk_ack.ato),
270962306a36Sopenharmony_ci		(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
271062306a36Sopenharmony_ci		tcp_snd_cwnd(tp),
271162306a36Sopenharmony_ci		state == TCP_LISTEN ?
271262306a36Sopenharmony_ci		    fastopenq->max_qlen :
271362306a36Sopenharmony_ci		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
271462306a36Sopenharmony_ci}
271562306a36Sopenharmony_ci
271662306a36Sopenharmony_cistatic void get_timewait4_sock(const struct inet_timewait_sock *tw,
271762306a36Sopenharmony_ci			       struct seq_file *f, int i)
271862306a36Sopenharmony_ci{
271962306a36Sopenharmony_ci	long delta = tw->tw_timer.expires - jiffies;
272062306a36Sopenharmony_ci	__be32 dest, src;
272162306a36Sopenharmony_ci	__u16 destp, srcp;
272262306a36Sopenharmony_ci
272362306a36Sopenharmony_ci	dest  = tw->tw_daddr;
272462306a36Sopenharmony_ci	src   = tw->tw_rcv_saddr;
272562306a36Sopenharmony_ci	destp = ntohs(tw->tw_dport);
272662306a36Sopenharmony_ci	srcp  = ntohs(tw->tw_sport);
272762306a36Sopenharmony_ci
272862306a36Sopenharmony_ci	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
272962306a36Sopenharmony_ci		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
273062306a36Sopenharmony_ci		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
273162306a36Sopenharmony_ci		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
273262306a36Sopenharmony_ci		refcount_read(&tw->tw_refcnt), tw);
273362306a36Sopenharmony_ci}
273462306a36Sopenharmony_ci
273562306a36Sopenharmony_ci#define TMPSZ 150
273662306a36Sopenharmony_ci
273762306a36Sopenharmony_cistatic int tcp4_seq_show(struct seq_file *seq, void *v)
273862306a36Sopenharmony_ci{
273962306a36Sopenharmony_ci	struct tcp_iter_state *st;
274062306a36Sopenharmony_ci	struct sock *sk = v;
274162306a36Sopenharmony_ci
274262306a36Sopenharmony_ci	seq_setwidth(seq, TMPSZ - 1);
274362306a36Sopenharmony_ci	if (v == SEQ_START_TOKEN) {
274462306a36Sopenharmony_ci		seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
274562306a36Sopenharmony_ci			   "rx_queue tr tm->when retrnsmt   uid  timeout "
274662306a36Sopenharmony_ci			   "inode");
274762306a36Sopenharmony_ci		goto out;
274862306a36Sopenharmony_ci	}
274962306a36Sopenharmony_ci	st = seq->private;
275062306a36Sopenharmony_ci
275162306a36Sopenharmony_ci	if (sk->sk_state == TCP_TIME_WAIT)
275262306a36Sopenharmony_ci		get_timewait4_sock(v, seq, st->num);
275362306a36Sopenharmony_ci	else if (sk->sk_state == TCP_NEW_SYN_RECV)
275462306a36Sopenharmony_ci		get_openreq4(v, seq, st->num);
275562306a36Sopenharmony_ci	else
275662306a36Sopenharmony_ci		get_tcp4_sock(v, seq, st->num);
275762306a36Sopenharmony_ciout:
275862306a36Sopenharmony_ci	seq_pad(seq, '\n');
275962306a36Sopenharmony_ci	return 0;
276062306a36Sopenharmony_ci}
276162306a36Sopenharmony_ci
276262306a36Sopenharmony_ci#ifdef CONFIG_BPF_SYSCALL
276362306a36Sopenharmony_cistruct bpf_tcp_iter_state {
276462306a36Sopenharmony_ci	struct tcp_iter_state state;
276562306a36Sopenharmony_ci	unsigned int cur_sk;
276662306a36Sopenharmony_ci	unsigned int end_sk;
276762306a36Sopenharmony_ci	unsigned int max_sk;
276862306a36Sopenharmony_ci	struct sock **batch;
276962306a36Sopenharmony_ci	bool st_bucket_done;
277062306a36Sopenharmony_ci};
277162306a36Sopenharmony_ci
277262306a36Sopenharmony_cistruct bpf_iter__tcp {
277362306a36Sopenharmony_ci	__bpf_md_ptr(struct bpf_iter_meta *, meta);
277462306a36Sopenharmony_ci	__bpf_md_ptr(struct sock_common *, sk_common);
277562306a36Sopenharmony_ci	uid_t uid __aligned(8);
277662306a36Sopenharmony_ci};
277762306a36Sopenharmony_ci
277862306a36Sopenharmony_cistatic int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
277962306a36Sopenharmony_ci			     struct sock_common *sk_common, uid_t uid)
278062306a36Sopenharmony_ci{
278162306a36Sopenharmony_ci	struct bpf_iter__tcp ctx;
278262306a36Sopenharmony_ci
278362306a36Sopenharmony_ci	meta->seq_num--;  /* skip SEQ_START_TOKEN */
278462306a36Sopenharmony_ci	ctx.meta = meta;
278562306a36Sopenharmony_ci	ctx.sk_common = sk_common;
278662306a36Sopenharmony_ci	ctx.uid = uid;
278762306a36Sopenharmony_ci	return bpf_iter_run_prog(prog, &ctx);
278862306a36Sopenharmony_ci}
278962306a36Sopenharmony_ci
279062306a36Sopenharmony_cistatic void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
279162306a36Sopenharmony_ci{
279262306a36Sopenharmony_ci	while (iter->cur_sk < iter->end_sk)
279362306a36Sopenharmony_ci		sock_gen_put(iter->batch[iter->cur_sk++]);
279462306a36Sopenharmony_ci}
279562306a36Sopenharmony_ci
279662306a36Sopenharmony_cistatic int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
279762306a36Sopenharmony_ci				      unsigned int new_batch_sz)
279862306a36Sopenharmony_ci{
279962306a36Sopenharmony_ci	struct sock **new_batch;
280062306a36Sopenharmony_ci
280162306a36Sopenharmony_ci	new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
280262306a36Sopenharmony_ci			     GFP_USER | __GFP_NOWARN);
280362306a36Sopenharmony_ci	if (!new_batch)
280462306a36Sopenharmony_ci		return -ENOMEM;
280562306a36Sopenharmony_ci
280662306a36Sopenharmony_ci	bpf_iter_tcp_put_batch(iter);
280762306a36Sopenharmony_ci	kvfree(iter->batch);
280862306a36Sopenharmony_ci	iter->batch = new_batch;
280962306a36Sopenharmony_ci	iter->max_sk = new_batch_sz;
281062306a36Sopenharmony_ci
281162306a36Sopenharmony_ci	return 0;
281262306a36Sopenharmony_ci}
281362306a36Sopenharmony_ci
281462306a36Sopenharmony_cistatic unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
281562306a36Sopenharmony_ci						 struct sock *start_sk)
281662306a36Sopenharmony_ci{
281762306a36Sopenharmony_ci	struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
281862306a36Sopenharmony_ci	struct bpf_tcp_iter_state *iter = seq->private;
281962306a36Sopenharmony_ci	struct tcp_iter_state *st = &iter->state;
282062306a36Sopenharmony_ci	struct hlist_nulls_node *node;
282162306a36Sopenharmony_ci	unsigned int expected = 1;
282262306a36Sopenharmony_ci	struct sock *sk;
282362306a36Sopenharmony_ci
282462306a36Sopenharmony_ci	sock_hold(start_sk);
282562306a36Sopenharmony_ci	iter->batch[iter->end_sk++] = start_sk;
282662306a36Sopenharmony_ci
282762306a36Sopenharmony_ci	sk = sk_nulls_next(start_sk);
282862306a36Sopenharmony_ci	sk_nulls_for_each_from(sk, node) {
282962306a36Sopenharmony_ci		if (seq_sk_match(seq, sk)) {
283062306a36Sopenharmony_ci			if (iter->end_sk < iter->max_sk) {
283162306a36Sopenharmony_ci				sock_hold(sk);
283262306a36Sopenharmony_ci				iter->batch[iter->end_sk++] = sk;
283362306a36Sopenharmony_ci			}
283462306a36Sopenharmony_ci			expected++;
283562306a36Sopenharmony_ci		}
283662306a36Sopenharmony_ci	}
283762306a36Sopenharmony_ci	spin_unlock(&hinfo->lhash2[st->bucket].lock);
283862306a36Sopenharmony_ci
283962306a36Sopenharmony_ci	return expected;
284062306a36Sopenharmony_ci}
284162306a36Sopenharmony_ci
284262306a36Sopenharmony_cistatic unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
284362306a36Sopenharmony_ci						   struct sock *start_sk)
284462306a36Sopenharmony_ci{
284562306a36Sopenharmony_ci	struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
284662306a36Sopenharmony_ci	struct bpf_tcp_iter_state *iter = seq->private;
284762306a36Sopenharmony_ci	struct tcp_iter_state *st = &iter->state;
284862306a36Sopenharmony_ci	struct hlist_nulls_node *node;
284962306a36Sopenharmony_ci	unsigned int expected = 1;
285062306a36Sopenharmony_ci	struct sock *sk;
285162306a36Sopenharmony_ci
285262306a36Sopenharmony_ci	sock_hold(start_sk);
285362306a36Sopenharmony_ci	iter->batch[iter->end_sk++] = start_sk;
285462306a36Sopenharmony_ci
285562306a36Sopenharmony_ci	sk = sk_nulls_next(start_sk);
285662306a36Sopenharmony_ci	sk_nulls_for_each_from(sk, node) {
285762306a36Sopenharmony_ci		if (seq_sk_match(seq, sk)) {
285862306a36Sopenharmony_ci			if (iter->end_sk < iter->max_sk) {
285962306a36Sopenharmony_ci				sock_hold(sk);
286062306a36Sopenharmony_ci				iter->batch[iter->end_sk++] = sk;
286162306a36Sopenharmony_ci			}
286262306a36Sopenharmony_ci			expected++;
286362306a36Sopenharmony_ci		}
286462306a36Sopenharmony_ci	}
286562306a36Sopenharmony_ci	spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
286662306a36Sopenharmony_ci
286762306a36Sopenharmony_ci	return expected;
286862306a36Sopenharmony_ci}
286962306a36Sopenharmony_ci
287062306a36Sopenharmony_cistatic struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
287162306a36Sopenharmony_ci{
287262306a36Sopenharmony_ci	struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
287362306a36Sopenharmony_ci	struct bpf_tcp_iter_state *iter = seq->private;
287462306a36Sopenharmony_ci	struct tcp_iter_state *st = &iter->state;
287562306a36Sopenharmony_ci	unsigned int expected;
287662306a36Sopenharmony_ci	bool resized = false;
287762306a36Sopenharmony_ci	struct sock *sk;
287862306a36Sopenharmony_ci
287962306a36Sopenharmony_ci	/* The st->bucket is done.  Directly advance to the next
288062306a36Sopenharmony_ci	 * bucket instead of having the tcp_seek_last_pos() to skip
288162306a36Sopenharmony_ci	 * one by one in the current bucket and eventually find out
288262306a36Sopenharmony_ci	 * it has to advance to the next bucket.
288362306a36Sopenharmony_ci	 */
288462306a36Sopenharmony_ci	if (iter->st_bucket_done) {
288562306a36Sopenharmony_ci		st->offset = 0;
288662306a36Sopenharmony_ci		st->bucket++;
288762306a36Sopenharmony_ci		if (st->state == TCP_SEQ_STATE_LISTENING &&
288862306a36Sopenharmony_ci		    st->bucket > hinfo->lhash2_mask) {
288962306a36Sopenharmony_ci			st->state = TCP_SEQ_STATE_ESTABLISHED;
289062306a36Sopenharmony_ci			st->bucket = 0;
289162306a36Sopenharmony_ci		}
289262306a36Sopenharmony_ci	}
289362306a36Sopenharmony_ci
289462306a36Sopenharmony_ciagain:
289562306a36Sopenharmony_ci	/* Get a new batch */
289662306a36Sopenharmony_ci	iter->cur_sk = 0;
289762306a36Sopenharmony_ci	iter->end_sk = 0;
289862306a36Sopenharmony_ci	iter->st_bucket_done = false;
289962306a36Sopenharmony_ci
290062306a36Sopenharmony_ci	sk = tcp_seek_last_pos(seq);
290162306a36Sopenharmony_ci	if (!sk)
290262306a36Sopenharmony_ci		return NULL; /* Done */
290362306a36Sopenharmony_ci
290462306a36Sopenharmony_ci	if (st->state == TCP_SEQ_STATE_LISTENING)
290562306a36Sopenharmony_ci		expected = bpf_iter_tcp_listening_batch(seq, sk);
290662306a36Sopenharmony_ci	else
290762306a36Sopenharmony_ci		expected = bpf_iter_tcp_established_batch(seq, sk);
290862306a36Sopenharmony_ci
290962306a36Sopenharmony_ci	if (iter->end_sk == expected) {
291062306a36Sopenharmony_ci		iter->st_bucket_done = true;
291162306a36Sopenharmony_ci		return sk;
291262306a36Sopenharmony_ci	}
291362306a36Sopenharmony_ci
291462306a36Sopenharmony_ci	if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) {
291562306a36Sopenharmony_ci		resized = true;
291662306a36Sopenharmony_ci		goto again;
291762306a36Sopenharmony_ci	}
291862306a36Sopenharmony_ci
291962306a36Sopenharmony_ci	return sk;
292062306a36Sopenharmony_ci}
292162306a36Sopenharmony_ci
292262306a36Sopenharmony_cistatic void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos)
292362306a36Sopenharmony_ci{
292462306a36Sopenharmony_ci	/* bpf iter does not support lseek, so it always
292562306a36Sopenharmony_ci	 * continue from where it was stop()-ped.
292662306a36Sopenharmony_ci	 */
292762306a36Sopenharmony_ci	if (*pos)
292862306a36Sopenharmony_ci		return bpf_iter_tcp_batch(seq);
292962306a36Sopenharmony_ci
293062306a36Sopenharmony_ci	return SEQ_START_TOKEN;
293162306a36Sopenharmony_ci}
293262306a36Sopenharmony_ci
293362306a36Sopenharmony_cistatic void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
293462306a36Sopenharmony_ci{
293562306a36Sopenharmony_ci	struct bpf_tcp_iter_state *iter = seq->private;
293662306a36Sopenharmony_ci	struct tcp_iter_state *st = &iter->state;
293762306a36Sopenharmony_ci	struct sock *sk;
293862306a36Sopenharmony_ci
293962306a36Sopenharmony_ci	/* Whenever seq_next() is called, the iter->cur_sk is
294062306a36Sopenharmony_ci	 * done with seq_show(), so advance to the next sk in
294162306a36Sopenharmony_ci	 * the batch.
294262306a36Sopenharmony_ci	 */
294362306a36Sopenharmony_ci	if (iter->cur_sk < iter->end_sk) {
294462306a36Sopenharmony_ci		/* Keeping st->num consistent in tcp_iter_state.
294562306a36Sopenharmony_ci		 * bpf_iter_tcp does not use st->num.
294662306a36Sopenharmony_ci		 * meta.seq_num is used instead.
294762306a36Sopenharmony_ci		 */
294862306a36Sopenharmony_ci		st->num++;
294962306a36Sopenharmony_ci		/* Move st->offset to the next sk in the bucket such that
295062306a36Sopenharmony_ci		 * the future start() will resume at st->offset in
295162306a36Sopenharmony_ci		 * st->bucket.  See tcp_seek_last_pos().
295262306a36Sopenharmony_ci		 */
295362306a36Sopenharmony_ci		st->offset++;
295462306a36Sopenharmony_ci		sock_gen_put(iter->batch[iter->cur_sk++]);
295562306a36Sopenharmony_ci	}
295662306a36Sopenharmony_ci
295762306a36Sopenharmony_ci	if (iter->cur_sk < iter->end_sk)
295862306a36Sopenharmony_ci		sk = iter->batch[iter->cur_sk];
295962306a36Sopenharmony_ci	else
296062306a36Sopenharmony_ci		sk = bpf_iter_tcp_batch(seq);
296162306a36Sopenharmony_ci
296262306a36Sopenharmony_ci	++*pos;
296362306a36Sopenharmony_ci	/* Keeping st->last_pos consistent in tcp_iter_state.
296462306a36Sopenharmony_ci	 * bpf iter does not do lseek, so st->last_pos always equals to *pos.
296562306a36Sopenharmony_ci	 */
296662306a36Sopenharmony_ci	st->last_pos = *pos;
296762306a36Sopenharmony_ci	return sk;
296862306a36Sopenharmony_ci}
296962306a36Sopenharmony_ci
297062306a36Sopenharmony_cistatic int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
297162306a36Sopenharmony_ci{
297262306a36Sopenharmony_ci	struct bpf_iter_meta meta;
297362306a36Sopenharmony_ci	struct bpf_prog *prog;
297462306a36Sopenharmony_ci	struct sock *sk = v;
297562306a36Sopenharmony_ci	uid_t uid;
297662306a36Sopenharmony_ci	int ret;
297762306a36Sopenharmony_ci
297862306a36Sopenharmony_ci	if (v == SEQ_START_TOKEN)
297962306a36Sopenharmony_ci		return 0;
298062306a36Sopenharmony_ci
298162306a36Sopenharmony_ci	if (sk_fullsock(sk))
298262306a36Sopenharmony_ci		lock_sock(sk);
298362306a36Sopenharmony_ci
298462306a36Sopenharmony_ci	if (unlikely(sk_unhashed(sk))) {
298562306a36Sopenharmony_ci		ret = SEQ_SKIP;
298662306a36Sopenharmony_ci		goto unlock;
298762306a36Sopenharmony_ci	}
298862306a36Sopenharmony_ci
298962306a36Sopenharmony_ci	if (sk->sk_state == TCP_TIME_WAIT) {
299062306a36Sopenharmony_ci		uid = 0;
299162306a36Sopenharmony_ci	} else if (sk->sk_state == TCP_NEW_SYN_RECV) {
299262306a36Sopenharmony_ci		const struct request_sock *req = v;
299362306a36Sopenharmony_ci
299462306a36Sopenharmony_ci		uid = from_kuid_munged(seq_user_ns(seq),
299562306a36Sopenharmony_ci				       sock_i_uid(req->rsk_listener));
299662306a36Sopenharmony_ci	} else {
299762306a36Sopenharmony_ci		uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
299862306a36Sopenharmony_ci	}
299962306a36Sopenharmony_ci
300062306a36Sopenharmony_ci	meta.seq = seq;
300162306a36Sopenharmony_ci	prog = bpf_iter_get_info(&meta, false);
300262306a36Sopenharmony_ci	ret = tcp_prog_seq_show(prog, &meta, v, uid);
300362306a36Sopenharmony_ci
300462306a36Sopenharmony_ciunlock:
300562306a36Sopenharmony_ci	if (sk_fullsock(sk))
300662306a36Sopenharmony_ci		release_sock(sk);
300762306a36Sopenharmony_ci	return ret;
300862306a36Sopenharmony_ci
300962306a36Sopenharmony_ci}
301062306a36Sopenharmony_ci
301162306a36Sopenharmony_cistatic void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
301262306a36Sopenharmony_ci{
301362306a36Sopenharmony_ci	struct bpf_tcp_iter_state *iter = seq->private;
301462306a36Sopenharmony_ci	struct bpf_iter_meta meta;
301562306a36Sopenharmony_ci	struct bpf_prog *prog;
301662306a36Sopenharmony_ci
301762306a36Sopenharmony_ci	if (!v) {
301862306a36Sopenharmony_ci		meta.seq = seq;
301962306a36Sopenharmony_ci		prog = bpf_iter_get_info(&meta, true);
302062306a36Sopenharmony_ci		if (prog)
302162306a36Sopenharmony_ci			(void)tcp_prog_seq_show(prog, &meta, v, 0);
302262306a36Sopenharmony_ci	}
302362306a36Sopenharmony_ci
302462306a36Sopenharmony_ci	if (iter->cur_sk < iter->end_sk) {
302562306a36Sopenharmony_ci		bpf_iter_tcp_put_batch(iter);
302662306a36Sopenharmony_ci		iter->st_bucket_done = false;
302762306a36Sopenharmony_ci	}
302862306a36Sopenharmony_ci}
302962306a36Sopenharmony_ci
303062306a36Sopenharmony_cistatic const struct seq_operations bpf_iter_tcp_seq_ops = {
303162306a36Sopenharmony_ci	.show		= bpf_iter_tcp_seq_show,
303262306a36Sopenharmony_ci	.start		= bpf_iter_tcp_seq_start,
303362306a36Sopenharmony_ci	.next		= bpf_iter_tcp_seq_next,
303462306a36Sopenharmony_ci	.stop		= bpf_iter_tcp_seq_stop,
303562306a36Sopenharmony_ci};
303662306a36Sopenharmony_ci#endif
303762306a36Sopenharmony_cistatic unsigned short seq_file_family(const struct seq_file *seq)
303862306a36Sopenharmony_ci{
303962306a36Sopenharmony_ci	const struct tcp_seq_afinfo *afinfo;
304062306a36Sopenharmony_ci
304162306a36Sopenharmony_ci#ifdef CONFIG_BPF_SYSCALL
304262306a36Sopenharmony_ci	/* Iterated from bpf_iter.  Let the bpf prog to filter instead. */
304362306a36Sopenharmony_ci	if (seq->op == &bpf_iter_tcp_seq_ops)
304462306a36Sopenharmony_ci		return AF_UNSPEC;
304562306a36Sopenharmony_ci#endif
304662306a36Sopenharmony_ci
304762306a36Sopenharmony_ci	/* Iterated from proc fs */
304862306a36Sopenharmony_ci	afinfo = pde_data(file_inode(seq->file));
304962306a36Sopenharmony_ci	return afinfo->family;
305062306a36Sopenharmony_ci}
305162306a36Sopenharmony_ci
305262306a36Sopenharmony_cistatic const struct seq_operations tcp4_seq_ops = {
305362306a36Sopenharmony_ci	.show		= tcp4_seq_show,
305462306a36Sopenharmony_ci	.start		= tcp_seq_start,
305562306a36Sopenharmony_ci	.next		= tcp_seq_next,
305662306a36Sopenharmony_ci	.stop		= tcp_seq_stop,
305762306a36Sopenharmony_ci};
305862306a36Sopenharmony_ci
305962306a36Sopenharmony_cistatic struct tcp_seq_afinfo tcp4_seq_afinfo = {
306062306a36Sopenharmony_ci	.family		= AF_INET,
306162306a36Sopenharmony_ci};
306262306a36Sopenharmony_ci
306362306a36Sopenharmony_cistatic int __net_init tcp4_proc_init_net(struct net *net)
306462306a36Sopenharmony_ci{
306562306a36Sopenharmony_ci	if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
306662306a36Sopenharmony_ci			sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
306762306a36Sopenharmony_ci		return -ENOMEM;
306862306a36Sopenharmony_ci	return 0;
306962306a36Sopenharmony_ci}
307062306a36Sopenharmony_ci
307162306a36Sopenharmony_cistatic void __net_exit tcp4_proc_exit_net(struct net *net)
307262306a36Sopenharmony_ci{
307362306a36Sopenharmony_ci	remove_proc_entry("tcp", net->proc_net);
307462306a36Sopenharmony_ci}
307562306a36Sopenharmony_ci
307662306a36Sopenharmony_cistatic struct pernet_operations tcp4_net_ops = {
307762306a36Sopenharmony_ci	.init = tcp4_proc_init_net,
307862306a36Sopenharmony_ci	.exit = tcp4_proc_exit_net,
307962306a36Sopenharmony_ci};
308062306a36Sopenharmony_ci
308162306a36Sopenharmony_ciint __init tcp4_proc_init(void)
308262306a36Sopenharmony_ci{
308362306a36Sopenharmony_ci	return register_pernet_subsys(&tcp4_net_ops);
308462306a36Sopenharmony_ci}
308562306a36Sopenharmony_ci
308662306a36Sopenharmony_civoid tcp4_proc_exit(void)
308762306a36Sopenharmony_ci{
308862306a36Sopenharmony_ci	unregister_pernet_subsys(&tcp4_net_ops);
308962306a36Sopenharmony_ci}
309062306a36Sopenharmony_ci#endif /* CONFIG_PROC_FS */
309162306a36Sopenharmony_ci
309262306a36Sopenharmony_ci/* @wake is one when sk_stream_write_space() calls us.
309362306a36Sopenharmony_ci * This sends EPOLLOUT only if notsent_bytes is half the limit.
309462306a36Sopenharmony_ci * This mimics the strategy used in sock_def_write_space().
309562306a36Sopenharmony_ci */
309662306a36Sopenharmony_cibool tcp_stream_memory_free(const struct sock *sk, int wake)
309762306a36Sopenharmony_ci{
309862306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
309962306a36Sopenharmony_ci	u32 notsent_bytes = READ_ONCE(tp->write_seq) -
310062306a36Sopenharmony_ci			    READ_ONCE(tp->snd_nxt);
310162306a36Sopenharmony_ci
310262306a36Sopenharmony_ci	return (notsent_bytes << wake) < tcp_notsent_lowat(tp);
310362306a36Sopenharmony_ci}
310462306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_stream_memory_free);
310562306a36Sopenharmony_ci
310662306a36Sopenharmony_cistruct proto tcp_prot = {
310762306a36Sopenharmony_ci	.name			= "TCP",
310862306a36Sopenharmony_ci	.owner			= THIS_MODULE,
310962306a36Sopenharmony_ci	.close			= tcp_close,
311062306a36Sopenharmony_ci	.pre_connect		= tcp_v4_pre_connect,
311162306a36Sopenharmony_ci	.connect		= tcp_v4_connect,
311262306a36Sopenharmony_ci	.disconnect		= tcp_disconnect,
311362306a36Sopenharmony_ci	.accept			= inet_csk_accept,
311462306a36Sopenharmony_ci	.ioctl			= tcp_ioctl,
311562306a36Sopenharmony_ci	.init			= tcp_v4_init_sock,
311662306a36Sopenharmony_ci	.destroy		= tcp_v4_destroy_sock,
311762306a36Sopenharmony_ci	.shutdown		= tcp_shutdown,
311862306a36Sopenharmony_ci	.setsockopt		= tcp_setsockopt,
311962306a36Sopenharmony_ci	.getsockopt		= tcp_getsockopt,
312062306a36Sopenharmony_ci	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
312162306a36Sopenharmony_ci	.keepalive		= tcp_set_keepalive,
312262306a36Sopenharmony_ci	.recvmsg		= tcp_recvmsg,
312362306a36Sopenharmony_ci	.sendmsg		= tcp_sendmsg,
312462306a36Sopenharmony_ci	.splice_eof		= tcp_splice_eof,
312562306a36Sopenharmony_ci	.backlog_rcv		= tcp_v4_do_rcv,
312662306a36Sopenharmony_ci	.release_cb		= tcp_release_cb,
312762306a36Sopenharmony_ci	.hash			= inet_hash,
312862306a36Sopenharmony_ci	.unhash			= inet_unhash,
312962306a36Sopenharmony_ci	.get_port		= inet_csk_get_port,
313062306a36Sopenharmony_ci	.put_port		= inet_put_port,
313162306a36Sopenharmony_ci#ifdef CONFIG_BPF_SYSCALL
313262306a36Sopenharmony_ci	.psock_update_sk_prot	= tcp_bpf_update_proto,
313362306a36Sopenharmony_ci#endif
313462306a36Sopenharmony_ci	.enter_memory_pressure	= tcp_enter_memory_pressure,
313562306a36Sopenharmony_ci	.leave_memory_pressure	= tcp_leave_memory_pressure,
313662306a36Sopenharmony_ci	.stream_memory_free	= tcp_stream_memory_free,
313762306a36Sopenharmony_ci	.sockets_allocated	= &tcp_sockets_allocated,
313862306a36Sopenharmony_ci	.orphan_count		= &tcp_orphan_count,
313962306a36Sopenharmony_ci
314062306a36Sopenharmony_ci	.memory_allocated	= &tcp_memory_allocated,
314162306a36Sopenharmony_ci	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
314262306a36Sopenharmony_ci
314362306a36Sopenharmony_ci	.memory_pressure	= &tcp_memory_pressure,
314462306a36Sopenharmony_ci	.sysctl_mem		= sysctl_tcp_mem,
314562306a36Sopenharmony_ci	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
314662306a36Sopenharmony_ci	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
314762306a36Sopenharmony_ci	.max_header		= MAX_TCP_HEADER,
314862306a36Sopenharmony_ci	.obj_size		= sizeof(struct tcp_sock),
314962306a36Sopenharmony_ci	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
315062306a36Sopenharmony_ci	.twsk_prot		= &tcp_timewait_sock_ops,
315162306a36Sopenharmony_ci	.rsk_prot		= &tcp_request_sock_ops,
315262306a36Sopenharmony_ci	.h.hashinfo		= NULL,
315362306a36Sopenharmony_ci	.no_autobind		= true,
315462306a36Sopenharmony_ci	.diag_destroy		= tcp_abort,
315562306a36Sopenharmony_ci};
315662306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_prot);
315762306a36Sopenharmony_ci
315862306a36Sopenharmony_cistatic void __net_exit tcp_sk_exit(struct net *net)
315962306a36Sopenharmony_ci{
316062306a36Sopenharmony_ci	if (net->ipv4.tcp_congestion_control)
316162306a36Sopenharmony_ci		bpf_module_put(net->ipv4.tcp_congestion_control,
316262306a36Sopenharmony_ci			       net->ipv4.tcp_congestion_control->owner);
316362306a36Sopenharmony_ci}
316462306a36Sopenharmony_ci
316562306a36Sopenharmony_cistatic void __net_init tcp_set_hashinfo(struct net *net)
316662306a36Sopenharmony_ci{
316762306a36Sopenharmony_ci	struct inet_hashinfo *hinfo;
316862306a36Sopenharmony_ci	unsigned int ehash_entries;
316962306a36Sopenharmony_ci	struct net *old_net;
317062306a36Sopenharmony_ci
317162306a36Sopenharmony_ci	if (net_eq(net, &init_net))
317262306a36Sopenharmony_ci		goto fallback;
317362306a36Sopenharmony_ci
317462306a36Sopenharmony_ci	old_net = current->nsproxy->net_ns;
317562306a36Sopenharmony_ci	ehash_entries = READ_ONCE(old_net->ipv4.sysctl_tcp_child_ehash_entries);
317662306a36Sopenharmony_ci	if (!ehash_entries)
317762306a36Sopenharmony_ci		goto fallback;
317862306a36Sopenharmony_ci
317962306a36Sopenharmony_ci	ehash_entries = roundup_pow_of_two(ehash_entries);
318062306a36Sopenharmony_ci	hinfo = inet_pernet_hashinfo_alloc(&tcp_hashinfo, ehash_entries);
318162306a36Sopenharmony_ci	if (!hinfo) {
318262306a36Sopenharmony_ci		pr_warn("Failed to allocate TCP ehash (entries: %u) "
318362306a36Sopenharmony_ci			"for a netns, fallback to the global one\n",
318462306a36Sopenharmony_ci			ehash_entries);
318562306a36Sopenharmony_cifallback:
318662306a36Sopenharmony_ci		hinfo = &tcp_hashinfo;
318762306a36Sopenharmony_ci		ehash_entries = tcp_hashinfo.ehash_mask + 1;
318862306a36Sopenharmony_ci	}
318962306a36Sopenharmony_ci
319062306a36Sopenharmony_ci	net->ipv4.tcp_death_row.hashinfo = hinfo;
319162306a36Sopenharmony_ci	net->ipv4.tcp_death_row.sysctl_max_tw_buckets = ehash_entries / 2;
319262306a36Sopenharmony_ci	net->ipv4.sysctl_max_syn_backlog = max(128U, ehash_entries / 128);
319362306a36Sopenharmony_ci}
319462306a36Sopenharmony_ci
319562306a36Sopenharmony_cistatic int __net_init tcp_sk_init(struct net *net)
319662306a36Sopenharmony_ci{
319762306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_ecn = 2;
319862306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_ecn_fallback = 1;
319962306a36Sopenharmony_ci
320062306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
320162306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
320262306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
320362306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
320462306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS;
320562306a36Sopenharmony_ci
320662306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
320762306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
320862306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
320962306a36Sopenharmony_ci
321062306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
321162306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
321262306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_syncookies = 1;
321362306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
321462306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
321562306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
321662306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_orphan_retries = 0;
321762306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
321862306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
321962306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_tw_reuse = 2;
322062306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
322162306a36Sopenharmony_ci
322262306a36Sopenharmony_ci	refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1);
322362306a36Sopenharmony_ci	tcp_set_hashinfo(net);
322462306a36Sopenharmony_ci
322562306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_sack = 1;
322662306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_window_scaling = 1;
322762306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_timestamps = 1;
322862306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_early_retrans = 3;
322962306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
323062306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior.  */
323162306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_retrans_collapse = 1;
323262306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_max_reordering = 300;
323362306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_dsack = 1;
323462306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_app_win = 31;
323562306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_adv_win_scale = 1;
323662306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_frto = 2;
323762306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
323862306a36Sopenharmony_ci	/* This limits the percentage of the congestion window which we
323962306a36Sopenharmony_ci	 * will allow a single TSO frame to consume.  Building TSO frames
324062306a36Sopenharmony_ci	 * which are too large can cause TCP streams to be bursty.
324162306a36Sopenharmony_ci	 */
324262306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_tso_win_divisor = 3;
324362306a36Sopenharmony_ci	/* Default TSQ limit of 16 TSO segments */
324462306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
324562306a36Sopenharmony_ci
324662306a36Sopenharmony_ci	/* rfc5961 challenge ack rate limiting, per net-ns, disabled by default. */
324762306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_challenge_ack_limit = INT_MAX;
324862306a36Sopenharmony_ci
324962306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_min_tso_segs = 2;
325062306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_tso_rtt_log = 9;  /* 2^9 = 512 usec */
325162306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
325262306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_autocorking = 1;
325362306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
325462306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
325562306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
325662306a36Sopenharmony_ci	if (net != &init_net) {
325762306a36Sopenharmony_ci		memcpy(net->ipv4.sysctl_tcp_rmem,
325862306a36Sopenharmony_ci		       init_net.ipv4.sysctl_tcp_rmem,
325962306a36Sopenharmony_ci		       sizeof(init_net.ipv4.sysctl_tcp_rmem));
326062306a36Sopenharmony_ci		memcpy(net->ipv4.sysctl_tcp_wmem,
326162306a36Sopenharmony_ci		       init_net.ipv4.sysctl_tcp_wmem,
326262306a36Sopenharmony_ci		       sizeof(init_net.ipv4.sysctl_tcp_wmem));
326362306a36Sopenharmony_ci	}
326462306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
326562306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
326662306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_comp_sack_nr = 44;
326762306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
326862306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
326962306a36Sopenharmony_ci	atomic_set(&net->ipv4.tfo_active_disable_times, 0);
327062306a36Sopenharmony_ci
327162306a36Sopenharmony_ci	/* Set default values for PLB */
327262306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_plb_enabled = 0; /* Disabled by default */
327362306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_plb_idle_rehash_rounds = 3;
327462306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_plb_rehash_rounds = 12;
327562306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_plb_suspend_rto_sec = 60;
327662306a36Sopenharmony_ci	/* Default congestion threshold for PLB to mark a round is 50% */
327762306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_plb_cong_thresh = (1 << TCP_PLB_SCALE) / 2;
327862306a36Sopenharmony_ci
327962306a36Sopenharmony_ci	/* Reno is always built in */
328062306a36Sopenharmony_ci	if (!net_eq(net, &init_net) &&
328162306a36Sopenharmony_ci	    bpf_try_module_get(init_net.ipv4.tcp_congestion_control,
328262306a36Sopenharmony_ci			       init_net.ipv4.tcp_congestion_control->owner))
328362306a36Sopenharmony_ci		net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
328462306a36Sopenharmony_ci	else
328562306a36Sopenharmony_ci		net->ipv4.tcp_congestion_control = &tcp_reno;
328662306a36Sopenharmony_ci
328762306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_syn_linear_timeouts = 4;
328862306a36Sopenharmony_ci	net->ipv4.sysctl_tcp_shrink_window = 0;
328962306a36Sopenharmony_ci
329062306a36Sopenharmony_ci	return 0;
329162306a36Sopenharmony_ci}
329262306a36Sopenharmony_ci
329362306a36Sopenharmony_cistatic void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
329462306a36Sopenharmony_ci{
329562306a36Sopenharmony_ci	struct net *net;
329662306a36Sopenharmony_ci
329762306a36Sopenharmony_ci	tcp_twsk_purge(net_exit_list, AF_INET);
329862306a36Sopenharmony_ci
329962306a36Sopenharmony_ci	list_for_each_entry(net, net_exit_list, exit_list) {
330062306a36Sopenharmony_ci		inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo);
330162306a36Sopenharmony_ci		WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount));
330262306a36Sopenharmony_ci		tcp_fastopen_ctx_destroy(net);
330362306a36Sopenharmony_ci	}
330462306a36Sopenharmony_ci}
330562306a36Sopenharmony_ci
330662306a36Sopenharmony_cistatic struct pernet_operations __net_initdata tcp_sk_ops = {
330762306a36Sopenharmony_ci       .init	   = tcp_sk_init,
330862306a36Sopenharmony_ci       .exit	   = tcp_sk_exit,
330962306a36Sopenharmony_ci       .exit_batch = tcp_sk_exit_batch,
331062306a36Sopenharmony_ci};
331162306a36Sopenharmony_ci
331262306a36Sopenharmony_ci#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
331362306a36Sopenharmony_ciDEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
331462306a36Sopenharmony_ci		     struct sock_common *sk_common, uid_t uid)
331562306a36Sopenharmony_ci
331662306a36Sopenharmony_ci#define INIT_BATCH_SZ 16
331762306a36Sopenharmony_ci
331862306a36Sopenharmony_cistatic int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
331962306a36Sopenharmony_ci{
332062306a36Sopenharmony_ci	struct bpf_tcp_iter_state *iter = priv_data;
332162306a36Sopenharmony_ci	int err;
332262306a36Sopenharmony_ci
332362306a36Sopenharmony_ci	err = bpf_iter_init_seq_net(priv_data, aux);
332462306a36Sopenharmony_ci	if (err)
332562306a36Sopenharmony_ci		return err;
332662306a36Sopenharmony_ci
332762306a36Sopenharmony_ci	err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ);
332862306a36Sopenharmony_ci	if (err) {
332962306a36Sopenharmony_ci		bpf_iter_fini_seq_net(priv_data);
333062306a36Sopenharmony_ci		return err;
333162306a36Sopenharmony_ci	}
333262306a36Sopenharmony_ci
333362306a36Sopenharmony_ci	return 0;
333462306a36Sopenharmony_ci}
333562306a36Sopenharmony_ci
333662306a36Sopenharmony_cistatic void bpf_iter_fini_tcp(void *priv_data)
333762306a36Sopenharmony_ci{
333862306a36Sopenharmony_ci	struct bpf_tcp_iter_state *iter = priv_data;
333962306a36Sopenharmony_ci
334062306a36Sopenharmony_ci	bpf_iter_fini_seq_net(priv_data);
334162306a36Sopenharmony_ci	kvfree(iter->batch);
334262306a36Sopenharmony_ci}
334362306a36Sopenharmony_ci
334462306a36Sopenharmony_cistatic const struct bpf_iter_seq_info tcp_seq_info = {
334562306a36Sopenharmony_ci	.seq_ops		= &bpf_iter_tcp_seq_ops,
334662306a36Sopenharmony_ci	.init_seq_private	= bpf_iter_init_tcp,
334762306a36Sopenharmony_ci	.fini_seq_private	= bpf_iter_fini_tcp,
334862306a36Sopenharmony_ci	.seq_priv_size		= sizeof(struct bpf_tcp_iter_state),
334962306a36Sopenharmony_ci};
335062306a36Sopenharmony_ci
335162306a36Sopenharmony_cistatic const struct bpf_func_proto *
335262306a36Sopenharmony_cibpf_iter_tcp_get_func_proto(enum bpf_func_id func_id,
335362306a36Sopenharmony_ci			    const struct bpf_prog *prog)
335462306a36Sopenharmony_ci{
335562306a36Sopenharmony_ci	switch (func_id) {
335662306a36Sopenharmony_ci	case BPF_FUNC_setsockopt:
335762306a36Sopenharmony_ci		return &bpf_sk_setsockopt_proto;
335862306a36Sopenharmony_ci	case BPF_FUNC_getsockopt:
335962306a36Sopenharmony_ci		return &bpf_sk_getsockopt_proto;
336062306a36Sopenharmony_ci	default:
336162306a36Sopenharmony_ci		return NULL;
336262306a36Sopenharmony_ci	}
336362306a36Sopenharmony_ci}
336462306a36Sopenharmony_ci
336562306a36Sopenharmony_cistatic struct bpf_iter_reg tcp_reg_info = {
336662306a36Sopenharmony_ci	.target			= "tcp",
336762306a36Sopenharmony_ci	.ctx_arg_info_size	= 1,
336862306a36Sopenharmony_ci	.ctx_arg_info		= {
336962306a36Sopenharmony_ci		{ offsetof(struct bpf_iter__tcp, sk_common),
337062306a36Sopenharmony_ci		  PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
337162306a36Sopenharmony_ci	},
337262306a36Sopenharmony_ci	.get_func_proto		= bpf_iter_tcp_get_func_proto,
337362306a36Sopenharmony_ci	.seq_info		= &tcp_seq_info,
337462306a36Sopenharmony_ci};
337562306a36Sopenharmony_ci
337662306a36Sopenharmony_cistatic void __init bpf_iter_register(void)
337762306a36Sopenharmony_ci{
337862306a36Sopenharmony_ci	tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON];
337962306a36Sopenharmony_ci	if (bpf_iter_reg_target(&tcp_reg_info))
338062306a36Sopenharmony_ci		pr_warn("Warning: could not register bpf iterator tcp\n");
338162306a36Sopenharmony_ci}
338262306a36Sopenharmony_ci
338362306a36Sopenharmony_ci#endif
338462306a36Sopenharmony_ci
338562306a36Sopenharmony_civoid __init tcp_v4_init(void)
338662306a36Sopenharmony_ci{
338762306a36Sopenharmony_ci	int cpu, res;
338862306a36Sopenharmony_ci
338962306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
339062306a36Sopenharmony_ci		struct sock *sk;
339162306a36Sopenharmony_ci
339262306a36Sopenharmony_ci		res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
339362306a36Sopenharmony_ci					   IPPROTO_TCP, &init_net);
339462306a36Sopenharmony_ci		if (res)
339562306a36Sopenharmony_ci			panic("Failed to create the TCP control socket.\n");
339662306a36Sopenharmony_ci		sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
339762306a36Sopenharmony_ci
339862306a36Sopenharmony_ci		/* Please enforce IP_DF and IPID==0 for RST and
339962306a36Sopenharmony_ci		 * ACK sent in SYN-RECV and TIME-WAIT state.
340062306a36Sopenharmony_ci		 */
340162306a36Sopenharmony_ci		inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
340262306a36Sopenharmony_ci
340362306a36Sopenharmony_ci		per_cpu(ipv4_tcp_sk, cpu) = sk;
340462306a36Sopenharmony_ci	}
340562306a36Sopenharmony_ci	if (register_pernet_subsys(&tcp_sk_ops))
340662306a36Sopenharmony_ci		panic("Failed to create the TCP control socket.\n");
340762306a36Sopenharmony_ci
340862306a36Sopenharmony_ci#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
340962306a36Sopenharmony_ci	bpf_iter_register();
341062306a36Sopenharmony_ci#endif
341162306a36Sopenharmony_ci}
3412