162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * INET An implementation of the TCP/IP protocol suite for the LINUX 462306a36Sopenharmony_ci * operating system. INET is implemented using the BSD Socket 562306a36Sopenharmony_ci * interface as the means of communication with the user level. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Implementation of the Transmission Control Protocol(TCP). 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * IPv4 specific functions 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * code split from: 1262306a36Sopenharmony_ci * linux/ipv4/tcp.c 1362306a36Sopenharmony_ci * linux/ipv4/tcp_input.c 1462306a36Sopenharmony_ci * linux/ipv4/tcp_output.c 1562306a36Sopenharmony_ci * 1662306a36Sopenharmony_ci * See tcp.c for author information 1762306a36Sopenharmony_ci */ 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci/* 2062306a36Sopenharmony_ci * Changes: 2162306a36Sopenharmony_ci * David S. Miller : New socket lookup architecture. 2262306a36Sopenharmony_ci * This code is dedicated to John Dyson. 2362306a36Sopenharmony_ci * David S. Miller : Change semantics of established hash, 2462306a36Sopenharmony_ci * half is devoted to TIME_WAIT sockets 2562306a36Sopenharmony_ci * and the rest go in the other half. 2662306a36Sopenharmony_ci * Andi Kleen : Add support for syncookies and fixed 2762306a36Sopenharmony_ci * some bugs: ip options weren't passed to 2862306a36Sopenharmony_ci * the TCP layer, missed a check for an 2962306a36Sopenharmony_ci * ACK bit. 3062306a36Sopenharmony_ci * Andi Kleen : Implemented fast path mtu discovery. 3162306a36Sopenharmony_ci * Fixed many serious bugs in the 3262306a36Sopenharmony_ci * request_sock handling and moved 3362306a36Sopenharmony_ci * most of it into the af independent code. 3462306a36Sopenharmony_ci * Added tail drop and some other bugfixes. 3562306a36Sopenharmony_ci * Added new listen semantics. 3662306a36Sopenharmony_ci * Mike McLagan : Routing by source 3762306a36Sopenharmony_ci * Juan Jose Ciarlante: ip_dynaddr bits 3862306a36Sopenharmony_ci * Andi Kleen: various fixes. 3962306a36Sopenharmony_ci * Vitaly E. Lavrov : Transparent proxy revived after year 4062306a36Sopenharmony_ci * coma. 4162306a36Sopenharmony_ci * Andi Kleen : Fix new listen. 4262306a36Sopenharmony_ci * Andi Kleen : Fix accept error reporting. 4362306a36Sopenharmony_ci * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 4462306a36Sopenharmony_ci * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 4562306a36Sopenharmony_ci * a single port at the same time. 4662306a36Sopenharmony_ci */ 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci#define pr_fmt(fmt) "TCP: " fmt 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci#include <linux/bottom_half.h> 5162306a36Sopenharmony_ci#include <linux/types.h> 5262306a36Sopenharmony_ci#include <linux/fcntl.h> 5362306a36Sopenharmony_ci#include <linux/module.h> 5462306a36Sopenharmony_ci#include <linux/random.h> 5562306a36Sopenharmony_ci#include <linux/cache.h> 5662306a36Sopenharmony_ci#include <linux/jhash.h> 5762306a36Sopenharmony_ci#include <linux/init.h> 5862306a36Sopenharmony_ci#include <linux/times.h> 5962306a36Sopenharmony_ci#include <linux/slab.h> 6062306a36Sopenharmony_ci#include <linux/sched.h> 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci#include <net/net_namespace.h> 6362306a36Sopenharmony_ci#include <net/icmp.h> 6462306a36Sopenharmony_ci#include <net/inet_hashtables.h> 6562306a36Sopenharmony_ci#include <net/tcp.h> 6662306a36Sopenharmony_ci#include <net/transp_v6.h> 6762306a36Sopenharmony_ci#include <net/ipv6.h> 6862306a36Sopenharmony_ci#include <net/inet_common.h> 6962306a36Sopenharmony_ci#include <net/timewait_sock.h> 7062306a36Sopenharmony_ci#include <net/xfrm.h> 7162306a36Sopenharmony_ci#include <net/secure_seq.h> 7262306a36Sopenharmony_ci#include <net/busy_poll.h> 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci#include <linux/inet.h> 7562306a36Sopenharmony_ci#include <linux/ipv6.h> 7662306a36Sopenharmony_ci#include <linux/stddef.h> 7762306a36Sopenharmony_ci#include <linux/proc_fs.h> 7862306a36Sopenharmony_ci#include <linux/seq_file.h> 7962306a36Sopenharmony_ci#include <linux/inetdevice.h> 8062306a36Sopenharmony_ci#include <linux/btf_ids.h> 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci#include <crypto/hash.h> 8362306a36Sopenharmony_ci#include <linux/scatterlist.h> 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci#include <trace/events/tcp.h> 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 8862306a36Sopenharmony_cistatic int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 8962306a36Sopenharmony_ci __be32 daddr, __be32 saddr, const struct tcphdr *th); 9062306a36Sopenharmony_ci#endif 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_cistruct inet_hashinfo tcp_hashinfo; 9362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_hashinfo); 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_cistatic u32 tcp_v4_init_seq(const struct sk_buff *skb) 9862306a36Sopenharmony_ci{ 9962306a36Sopenharmony_ci return secure_tcp_seq(ip_hdr(skb)->daddr, 10062306a36Sopenharmony_ci ip_hdr(skb)->saddr, 10162306a36Sopenharmony_ci tcp_hdr(skb)->dest, 10262306a36Sopenharmony_ci tcp_hdr(skb)->source); 10362306a36Sopenharmony_ci} 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_cistatic u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); 10862306a36Sopenharmony_ci} 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ciint tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 11162306a36Sopenharmony_ci{ 11262306a36Sopenharmony_ci int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse); 11362306a36Sopenharmony_ci const struct inet_timewait_sock *tw = inet_twsk(sktw); 11462306a36Sopenharmony_ci const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 11562306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci if (reuse == 2) { 11862306a36Sopenharmony_ci /* Still does not detect *everything* that goes through 11962306a36Sopenharmony_ci * lo, since we require a loopback src or dst address 12062306a36Sopenharmony_ci * or direct binding to 'lo' interface. 12162306a36Sopenharmony_ci */ 12262306a36Sopenharmony_ci bool loopback = false; 12362306a36Sopenharmony_ci if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX) 12462306a36Sopenharmony_ci loopback = true; 12562306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 12662306a36Sopenharmony_ci if (tw->tw_family == AF_INET6) { 12762306a36Sopenharmony_ci if (ipv6_addr_loopback(&tw->tw_v6_daddr) || 12862306a36Sopenharmony_ci ipv6_addr_v4mapped_loopback(&tw->tw_v6_daddr) || 12962306a36Sopenharmony_ci ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) || 13062306a36Sopenharmony_ci ipv6_addr_v4mapped_loopback(&tw->tw_v6_rcv_saddr)) 13162306a36Sopenharmony_ci loopback = true; 13262306a36Sopenharmony_ci } else 13362306a36Sopenharmony_ci#endif 13462306a36Sopenharmony_ci { 13562306a36Sopenharmony_ci if (ipv4_is_loopback(tw->tw_daddr) || 13662306a36Sopenharmony_ci ipv4_is_loopback(tw->tw_rcv_saddr)) 13762306a36Sopenharmony_ci loopback = true; 13862306a36Sopenharmony_ci } 13962306a36Sopenharmony_ci if (!loopback) 14062306a36Sopenharmony_ci reuse = 0; 14162306a36Sopenharmony_ci } 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci /* With PAWS, it is safe from the viewpoint 14462306a36Sopenharmony_ci of data integrity. Even without PAWS it is safe provided sequence 14562306a36Sopenharmony_ci spaces do not overlap i.e. at data rates <= 80Mbit/sec. 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci Actually, the idea is close to VJ's one, only timestamp cache is 14862306a36Sopenharmony_ci held not per host, but per port pair and TW bucket is used as state 14962306a36Sopenharmony_ci holder. 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci If TW bucket has been already destroyed we fall back to VJ's scheme 15262306a36Sopenharmony_ci and use initial timestamp retrieved from peer table. 15362306a36Sopenharmony_ci */ 15462306a36Sopenharmony_ci if (tcptw->tw_ts_recent_stamp && 15562306a36Sopenharmony_ci (!twp || (reuse && time_after32(ktime_get_seconds(), 15662306a36Sopenharmony_ci tcptw->tw_ts_recent_stamp)))) { 15762306a36Sopenharmony_ci /* In case of repair and re-using TIME-WAIT sockets we still 15862306a36Sopenharmony_ci * want to be sure that it is safe as above but honor the 15962306a36Sopenharmony_ci * sequence numbers and time stamps set as part of the repair 16062306a36Sopenharmony_ci * process. 16162306a36Sopenharmony_ci * 16262306a36Sopenharmony_ci * Without this check re-using a TIME-WAIT socket with TCP 16362306a36Sopenharmony_ci * repair would accumulate a -1 on the repair assigned 16462306a36Sopenharmony_ci * sequence number. The first time it is reused the sequence 16562306a36Sopenharmony_ci * is -1, the second time -2, etc. This fixes that issue 16662306a36Sopenharmony_ci * without appearing to create any others. 16762306a36Sopenharmony_ci */ 16862306a36Sopenharmony_ci if (likely(!tp->repair)) { 16962306a36Sopenharmony_ci u32 seq = tcptw->tw_snd_nxt + 65535 + 2; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci if (!seq) 17262306a36Sopenharmony_ci seq = 1; 17362306a36Sopenharmony_ci WRITE_ONCE(tp->write_seq, seq); 17462306a36Sopenharmony_ci tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 17562306a36Sopenharmony_ci tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 17662306a36Sopenharmony_ci } 17762306a36Sopenharmony_ci sock_hold(sktw); 17862306a36Sopenharmony_ci return 1; 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci return 0; 18262306a36Sopenharmony_ci} 18362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_twsk_unique); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_cistatic int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, 18662306a36Sopenharmony_ci int addr_len) 18762306a36Sopenharmony_ci{ 18862306a36Sopenharmony_ci /* This check is replicated from tcp_v4_connect() and intended to 18962306a36Sopenharmony_ci * prevent BPF program called below from accessing bytes that are out 19062306a36Sopenharmony_ci * of the bound specified by user in addr_len. 19162306a36Sopenharmony_ci */ 19262306a36Sopenharmony_ci if (addr_len < sizeof(struct sockaddr_in)) 19362306a36Sopenharmony_ci return -EINVAL; 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci sock_owned_by_me(sk); 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, &addr_len); 19862306a36Sopenharmony_ci} 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci/* This will initiate an outgoing connection. */ 20162306a36Sopenharmony_ciint tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 20262306a36Sopenharmony_ci{ 20362306a36Sopenharmony_ci struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 20462306a36Sopenharmony_ci struct inet_timewait_death_row *tcp_death_row; 20562306a36Sopenharmony_ci struct inet_sock *inet = inet_sk(sk); 20662306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 20762306a36Sopenharmony_ci struct ip_options_rcu *inet_opt; 20862306a36Sopenharmony_ci struct net *net = sock_net(sk); 20962306a36Sopenharmony_ci __be16 orig_sport, orig_dport; 21062306a36Sopenharmony_ci __be32 daddr, nexthop; 21162306a36Sopenharmony_ci struct flowi4 *fl4; 21262306a36Sopenharmony_ci struct rtable *rt; 21362306a36Sopenharmony_ci int err; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci if (addr_len < sizeof(struct sockaddr_in)) 21662306a36Sopenharmony_ci return -EINVAL; 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci if (usin->sin_family != AF_INET) 21962306a36Sopenharmony_ci return -EAFNOSUPPORT; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci nexthop = daddr = usin->sin_addr.s_addr; 22262306a36Sopenharmony_ci inet_opt = rcu_dereference_protected(inet->inet_opt, 22362306a36Sopenharmony_ci lockdep_sock_is_held(sk)); 22462306a36Sopenharmony_ci if (inet_opt && inet_opt->opt.srr) { 22562306a36Sopenharmony_ci if (!daddr) 22662306a36Sopenharmony_ci return -EINVAL; 22762306a36Sopenharmony_ci nexthop = inet_opt->opt.faddr; 22862306a36Sopenharmony_ci } 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci orig_sport = inet->inet_sport; 23162306a36Sopenharmony_ci orig_dport = usin->sin_port; 23262306a36Sopenharmony_ci fl4 = &inet->cork.fl.u.ip4; 23362306a36Sopenharmony_ci rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 23462306a36Sopenharmony_ci sk->sk_bound_dev_if, IPPROTO_TCP, orig_sport, 23562306a36Sopenharmony_ci orig_dport, sk); 23662306a36Sopenharmony_ci if (IS_ERR(rt)) { 23762306a36Sopenharmony_ci err = PTR_ERR(rt); 23862306a36Sopenharmony_ci if (err == -ENETUNREACH) 23962306a36Sopenharmony_ci IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 24062306a36Sopenharmony_ci return err; 24162306a36Sopenharmony_ci } 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 24462306a36Sopenharmony_ci ip_rt_put(rt); 24562306a36Sopenharmony_ci return -ENETUNREACH; 24662306a36Sopenharmony_ci } 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci if (!inet_opt || !inet_opt->opt.srr) 24962306a36Sopenharmony_ci daddr = fl4->daddr; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci if (!inet->inet_saddr) { 25462306a36Sopenharmony_ci err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET); 25562306a36Sopenharmony_ci if (err) { 25662306a36Sopenharmony_ci ip_rt_put(rt); 25762306a36Sopenharmony_ci return err; 25862306a36Sopenharmony_ci } 25962306a36Sopenharmony_ci } else { 26062306a36Sopenharmony_ci sk_rcv_saddr_set(sk, inet->inet_saddr); 26162306a36Sopenharmony_ci } 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 26462306a36Sopenharmony_ci /* Reset inherited state */ 26562306a36Sopenharmony_ci tp->rx_opt.ts_recent = 0; 26662306a36Sopenharmony_ci tp->rx_opt.ts_recent_stamp = 0; 26762306a36Sopenharmony_ci if (likely(!tp->repair)) 26862306a36Sopenharmony_ci WRITE_ONCE(tp->write_seq, 0); 26962306a36Sopenharmony_ci } 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci inet->inet_dport = usin->sin_port; 27262306a36Sopenharmony_ci sk_daddr_set(sk, daddr); 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci inet_csk(sk)->icsk_ext_hdr_len = 0; 27562306a36Sopenharmony_ci if (inet_opt) 27662306a36Sopenharmony_ci inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci /* Socket identity is still unknown (sport may be zero). 28162306a36Sopenharmony_ci * However we set state to SYN-SENT and not releasing socket 28262306a36Sopenharmony_ci * lock select source port, enter ourselves into the hash tables and 28362306a36Sopenharmony_ci * complete initialization after this. 28462306a36Sopenharmony_ci */ 28562306a36Sopenharmony_ci tcp_set_state(sk, TCP_SYN_SENT); 28662306a36Sopenharmony_ci err = inet_hash_connect(tcp_death_row, sk); 28762306a36Sopenharmony_ci if (err) 28862306a36Sopenharmony_ci goto failure; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci sk_set_txhash(sk); 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, 29362306a36Sopenharmony_ci inet->inet_sport, inet->inet_dport, sk); 29462306a36Sopenharmony_ci if (IS_ERR(rt)) { 29562306a36Sopenharmony_ci err = PTR_ERR(rt); 29662306a36Sopenharmony_ci rt = NULL; 29762306a36Sopenharmony_ci goto failure; 29862306a36Sopenharmony_ci } 29962306a36Sopenharmony_ci /* OK, now commit destination to socket. */ 30062306a36Sopenharmony_ci sk->sk_gso_type = SKB_GSO_TCPV4; 30162306a36Sopenharmony_ci sk_setup_caps(sk, &rt->dst); 30262306a36Sopenharmony_ci rt = NULL; 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci if (likely(!tp->repair)) { 30562306a36Sopenharmony_ci if (!tp->write_seq) 30662306a36Sopenharmony_ci WRITE_ONCE(tp->write_seq, 30762306a36Sopenharmony_ci secure_tcp_seq(inet->inet_saddr, 30862306a36Sopenharmony_ci inet->inet_daddr, 30962306a36Sopenharmony_ci inet->inet_sport, 31062306a36Sopenharmony_ci usin->sin_port)); 31162306a36Sopenharmony_ci WRITE_ONCE(tp->tsoffset, 31262306a36Sopenharmony_ci secure_tcp_ts_off(net, inet->inet_saddr, 31362306a36Sopenharmony_ci inet->inet_daddr)); 31462306a36Sopenharmony_ci } 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci atomic_set(&inet->inet_id, get_random_u16()); 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci if (tcp_fastopen_defer_connect(sk, &err)) 31962306a36Sopenharmony_ci return err; 32062306a36Sopenharmony_ci if (err) 32162306a36Sopenharmony_ci goto failure; 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci err = tcp_connect(sk); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci if (err) 32662306a36Sopenharmony_ci goto failure; 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci return 0; 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_cifailure: 33162306a36Sopenharmony_ci /* 33262306a36Sopenharmony_ci * This unhashes the socket and releases the local port, 33362306a36Sopenharmony_ci * if necessary. 33462306a36Sopenharmony_ci */ 33562306a36Sopenharmony_ci tcp_set_state(sk, TCP_CLOSE); 33662306a36Sopenharmony_ci inet_bhash2_reset_saddr(sk); 33762306a36Sopenharmony_ci ip_rt_put(rt); 33862306a36Sopenharmony_ci sk->sk_route_caps = 0; 33962306a36Sopenharmony_ci inet->inet_dport = 0; 34062306a36Sopenharmony_ci return err; 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_connect); 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci/* 34562306a36Sopenharmony_ci * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. 34662306a36Sopenharmony_ci * It can be called through tcp_release_cb() if socket was owned by user 34762306a36Sopenharmony_ci * at the time tcp_v4_err() was called to handle ICMP message. 34862306a36Sopenharmony_ci */ 34962306a36Sopenharmony_civoid tcp_v4_mtu_reduced(struct sock *sk) 35062306a36Sopenharmony_ci{ 35162306a36Sopenharmony_ci struct inet_sock *inet = inet_sk(sk); 35262306a36Sopenharmony_ci struct dst_entry *dst; 35362306a36Sopenharmony_ci u32 mtu; 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 35662306a36Sopenharmony_ci return; 35762306a36Sopenharmony_ci mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 35862306a36Sopenharmony_ci dst = inet_csk_update_pmtu(sk, mtu); 35962306a36Sopenharmony_ci if (!dst) 36062306a36Sopenharmony_ci return; 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci /* Something is about to be wrong... Remember soft error 36362306a36Sopenharmony_ci * for the case, if this connection will not able to recover. 36462306a36Sopenharmony_ci */ 36562306a36Sopenharmony_ci if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 36662306a36Sopenharmony_ci WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci mtu = dst_mtu(dst); 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci if (inet->pmtudisc != IP_PMTUDISC_DONT && 37162306a36Sopenharmony_ci ip_sk_accept_pmtu(sk) && 37262306a36Sopenharmony_ci inet_csk(sk)->icsk_pmtu_cookie > mtu) { 37362306a36Sopenharmony_ci tcp_sync_mss(sk, mtu); 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci /* Resend the TCP packet because it's 37662306a36Sopenharmony_ci * clear that the old packet has been 37762306a36Sopenharmony_ci * dropped. This is the new "fast" path mtu 37862306a36Sopenharmony_ci * discovery. 37962306a36Sopenharmony_ci */ 38062306a36Sopenharmony_ci tcp_simple_retransmit(sk); 38162306a36Sopenharmony_ci } /* else let the usual retransmit timer handle it */ 38262306a36Sopenharmony_ci} 38362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_mtu_reduced); 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_cistatic void do_redirect(struct sk_buff *skb, struct sock *sk) 38662306a36Sopenharmony_ci{ 38762306a36Sopenharmony_ci struct dst_entry *dst = __sk_dst_check(sk, 0); 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci if (dst) 39062306a36Sopenharmony_ci dst->ops->redirect(dst, sk, skb); 39162306a36Sopenharmony_ci} 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ 39562306a36Sopenharmony_civoid tcp_req_err(struct sock *sk, u32 seq, bool abort) 39662306a36Sopenharmony_ci{ 39762306a36Sopenharmony_ci struct request_sock *req = inet_reqsk(sk); 39862306a36Sopenharmony_ci struct net *net = sock_net(sk); 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci /* ICMPs are not backlogged, hence we cannot get 40162306a36Sopenharmony_ci * an established socket here. 40262306a36Sopenharmony_ci */ 40362306a36Sopenharmony_ci if (seq != tcp_rsk(req)->snt_isn) { 40462306a36Sopenharmony_ci __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 40562306a36Sopenharmony_ci } else if (abort) { 40662306a36Sopenharmony_ci /* 40762306a36Sopenharmony_ci * Still in SYN_RECV, just remove it silently. 40862306a36Sopenharmony_ci * There is no good way to pass the error to the newly 40962306a36Sopenharmony_ci * created socket, and POSIX does not want network 41062306a36Sopenharmony_ci * errors returned from accept(). 41162306a36Sopenharmony_ci */ 41262306a36Sopenharmony_ci inet_csk_reqsk_queue_drop(req->rsk_listener, req); 41362306a36Sopenharmony_ci tcp_listendrop(req->rsk_listener); 41462306a36Sopenharmony_ci } 41562306a36Sopenharmony_ci reqsk_put(req); 41662306a36Sopenharmony_ci} 41762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_req_err); 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci/* TCP-LD (RFC 6069) logic */ 42062306a36Sopenharmony_civoid tcp_ld_RTO_revert(struct sock *sk, u32 seq) 42162306a36Sopenharmony_ci{ 42262306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 42362306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 42462306a36Sopenharmony_ci struct sk_buff *skb; 42562306a36Sopenharmony_ci s32 remaining; 42662306a36Sopenharmony_ci u32 delta_us; 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci if (sock_owned_by_user(sk)) 42962306a36Sopenharmony_ci return; 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci if (seq != tp->snd_una || !icsk->icsk_retransmits || 43262306a36Sopenharmony_ci !icsk->icsk_backoff) 43362306a36Sopenharmony_ci return; 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci skb = tcp_rtx_queue_head(sk); 43662306a36Sopenharmony_ci if (WARN_ON_ONCE(!skb)) 43762306a36Sopenharmony_ci return; 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci icsk->icsk_backoff--; 44062306a36Sopenharmony_ci icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT; 44162306a36Sopenharmony_ci icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci tcp_mstamp_refresh(tp); 44462306a36Sopenharmony_ci delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb)); 44562306a36Sopenharmony_ci remaining = icsk->icsk_rto - usecs_to_jiffies(delta_us); 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci if (remaining > 0) { 44862306a36Sopenharmony_ci inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 44962306a36Sopenharmony_ci remaining, TCP_RTO_MAX); 45062306a36Sopenharmony_ci } else { 45162306a36Sopenharmony_ci /* RTO revert clocked out retransmission. 45262306a36Sopenharmony_ci * Will retransmit now. 45362306a36Sopenharmony_ci */ 45462306a36Sopenharmony_ci tcp_retransmit_timer(sk); 45562306a36Sopenharmony_ci } 45662306a36Sopenharmony_ci} 45762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_ld_RTO_revert); 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci/* 46062306a36Sopenharmony_ci * This routine is called by the ICMP module when it gets some 46162306a36Sopenharmony_ci * sort of error condition. If err < 0 then the socket should 46262306a36Sopenharmony_ci * be closed and the error returned to the user. If err > 0 46362306a36Sopenharmony_ci * it's just the icmp type << 8 | icmp code. After adjustment 46462306a36Sopenharmony_ci * header points to the first 8 bytes of the tcp header. We need 46562306a36Sopenharmony_ci * to find the appropriate port. 46662306a36Sopenharmony_ci * 46762306a36Sopenharmony_ci * The locking strategy used here is very "optimistic". When 46862306a36Sopenharmony_ci * someone else accesses the socket the ICMP is just dropped 46962306a36Sopenharmony_ci * and for some paths there is no check at all. 47062306a36Sopenharmony_ci * A more general error queue to queue errors for later handling 47162306a36Sopenharmony_ci * is probably better. 47262306a36Sopenharmony_ci * 47362306a36Sopenharmony_ci */ 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ciint tcp_v4_err(struct sk_buff *skb, u32 info) 47662306a36Sopenharmony_ci{ 47762306a36Sopenharmony_ci const struct iphdr *iph = (const struct iphdr *)skb->data; 47862306a36Sopenharmony_ci struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); 47962306a36Sopenharmony_ci struct tcp_sock *tp; 48062306a36Sopenharmony_ci const int type = icmp_hdr(skb)->type; 48162306a36Sopenharmony_ci const int code = icmp_hdr(skb)->code; 48262306a36Sopenharmony_ci struct sock *sk; 48362306a36Sopenharmony_ci struct request_sock *fastopen; 48462306a36Sopenharmony_ci u32 seq, snd_una; 48562306a36Sopenharmony_ci int err; 48662306a36Sopenharmony_ci struct net *net = dev_net(skb->dev); 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, 48962306a36Sopenharmony_ci iph->daddr, th->dest, iph->saddr, 49062306a36Sopenharmony_ci ntohs(th->source), inet_iif(skb), 0); 49162306a36Sopenharmony_ci if (!sk) { 49262306a36Sopenharmony_ci __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); 49362306a36Sopenharmony_ci return -ENOENT; 49462306a36Sopenharmony_ci } 49562306a36Sopenharmony_ci if (sk->sk_state == TCP_TIME_WAIT) { 49662306a36Sopenharmony_ci inet_twsk_put(inet_twsk(sk)); 49762306a36Sopenharmony_ci return 0; 49862306a36Sopenharmony_ci } 49962306a36Sopenharmony_ci seq = ntohl(th->seq); 50062306a36Sopenharmony_ci if (sk->sk_state == TCP_NEW_SYN_RECV) { 50162306a36Sopenharmony_ci tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB || 50262306a36Sopenharmony_ci type == ICMP_TIME_EXCEEDED || 50362306a36Sopenharmony_ci (type == ICMP_DEST_UNREACH && 50462306a36Sopenharmony_ci (code == ICMP_NET_UNREACH || 50562306a36Sopenharmony_ci code == ICMP_HOST_UNREACH))); 50662306a36Sopenharmony_ci return 0; 50762306a36Sopenharmony_ci } 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci bh_lock_sock(sk); 51062306a36Sopenharmony_ci /* If too many ICMPs get dropped on busy 51162306a36Sopenharmony_ci * servers this needs to be solved differently. 51262306a36Sopenharmony_ci * We do take care of PMTU discovery (RFC1191) special case : 51362306a36Sopenharmony_ci * we can receive locally generated ICMP messages while socket is held. 51462306a36Sopenharmony_ci */ 51562306a36Sopenharmony_ci if (sock_owned_by_user(sk)) { 51662306a36Sopenharmony_ci if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) 51762306a36Sopenharmony_ci __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 51862306a36Sopenharmony_ci } 51962306a36Sopenharmony_ci if (sk->sk_state == TCP_CLOSE) 52062306a36Sopenharmony_ci goto out; 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci if (static_branch_unlikely(&ip4_min_ttl)) { 52362306a36Sopenharmony_ci /* min_ttl can be changed concurrently from do_ip_setsockopt() */ 52462306a36Sopenharmony_ci if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) { 52562306a36Sopenharmony_ci __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 52662306a36Sopenharmony_ci goto out; 52762306a36Sopenharmony_ci } 52862306a36Sopenharmony_ci } 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci tp = tcp_sk(sk); 53162306a36Sopenharmony_ci /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 53262306a36Sopenharmony_ci fastopen = rcu_dereference(tp->fastopen_rsk); 53362306a36Sopenharmony_ci snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 53462306a36Sopenharmony_ci if (sk->sk_state != TCP_LISTEN && 53562306a36Sopenharmony_ci !between(seq, snd_una, tp->snd_nxt)) { 53662306a36Sopenharmony_ci __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 53762306a36Sopenharmony_ci goto out; 53862306a36Sopenharmony_ci } 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci switch (type) { 54162306a36Sopenharmony_ci case ICMP_REDIRECT: 54262306a36Sopenharmony_ci if (!sock_owned_by_user(sk)) 54362306a36Sopenharmony_ci do_redirect(skb, sk); 54462306a36Sopenharmony_ci goto out; 54562306a36Sopenharmony_ci case ICMP_SOURCE_QUENCH: 54662306a36Sopenharmony_ci /* Just silently ignore these. */ 54762306a36Sopenharmony_ci goto out; 54862306a36Sopenharmony_ci case ICMP_PARAMETERPROB: 54962306a36Sopenharmony_ci err = EPROTO; 55062306a36Sopenharmony_ci break; 55162306a36Sopenharmony_ci case ICMP_DEST_UNREACH: 55262306a36Sopenharmony_ci if (code > NR_ICMP_UNREACH) 55362306a36Sopenharmony_ci goto out; 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 55662306a36Sopenharmony_ci /* We are not interested in TCP_LISTEN and open_requests 55762306a36Sopenharmony_ci * (SYN-ACKs send out by Linux are always <576bytes so 55862306a36Sopenharmony_ci * they should go through unfragmented). 55962306a36Sopenharmony_ci */ 56062306a36Sopenharmony_ci if (sk->sk_state == TCP_LISTEN) 56162306a36Sopenharmony_ci goto out; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci WRITE_ONCE(tp->mtu_info, info); 56462306a36Sopenharmony_ci if (!sock_owned_by_user(sk)) { 56562306a36Sopenharmony_ci tcp_v4_mtu_reduced(sk); 56662306a36Sopenharmony_ci } else { 56762306a36Sopenharmony_ci if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags)) 56862306a36Sopenharmony_ci sock_hold(sk); 56962306a36Sopenharmony_ci } 57062306a36Sopenharmony_ci goto out; 57162306a36Sopenharmony_ci } 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci err = icmp_err_convert[code].errno; 57462306a36Sopenharmony_ci /* check if this ICMP message allows revert of backoff. 57562306a36Sopenharmony_ci * (see RFC 6069) 57662306a36Sopenharmony_ci */ 57762306a36Sopenharmony_ci if (!fastopen && 57862306a36Sopenharmony_ci (code == ICMP_NET_UNREACH || code == ICMP_HOST_UNREACH)) 57962306a36Sopenharmony_ci tcp_ld_RTO_revert(sk, seq); 58062306a36Sopenharmony_ci break; 58162306a36Sopenharmony_ci case ICMP_TIME_EXCEEDED: 58262306a36Sopenharmony_ci err = EHOSTUNREACH; 58362306a36Sopenharmony_ci break; 58462306a36Sopenharmony_ci default: 58562306a36Sopenharmony_ci goto out; 58662306a36Sopenharmony_ci } 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_ci switch (sk->sk_state) { 58962306a36Sopenharmony_ci case TCP_SYN_SENT: 59062306a36Sopenharmony_ci case TCP_SYN_RECV: 59162306a36Sopenharmony_ci /* Only in fast or simultaneous open. If a fast open socket is 59262306a36Sopenharmony_ci * already accepted it is treated as a connected one below. 59362306a36Sopenharmony_ci */ 59462306a36Sopenharmony_ci if (fastopen && !fastopen->sk) 59562306a36Sopenharmony_ci break; 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th); 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci if (!sock_owned_by_user(sk)) { 60062306a36Sopenharmony_ci WRITE_ONCE(sk->sk_err, err); 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci sk_error_report(sk); 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci tcp_done(sk); 60562306a36Sopenharmony_ci } else { 60662306a36Sopenharmony_ci WRITE_ONCE(sk->sk_err_soft, err); 60762306a36Sopenharmony_ci } 60862306a36Sopenharmony_ci goto out; 60962306a36Sopenharmony_ci } 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci /* If we've already connected we will keep trying 61262306a36Sopenharmony_ci * until we time out, or the user gives up. 61362306a36Sopenharmony_ci * 61462306a36Sopenharmony_ci * rfc1122 4.2.3.9 allows to consider as hard errors 61562306a36Sopenharmony_ci * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 61662306a36Sopenharmony_ci * but it is obsoleted by pmtu discovery). 61762306a36Sopenharmony_ci * 61862306a36Sopenharmony_ci * Note, that in modern internet, where routing is unreliable 61962306a36Sopenharmony_ci * and in each dark corner broken firewalls sit, sending random 62062306a36Sopenharmony_ci * errors ordered by their masters even this two messages finally lose 62162306a36Sopenharmony_ci * their original sense (even Linux sends invalid PORT_UNREACHs) 62262306a36Sopenharmony_ci * 62362306a36Sopenharmony_ci * Now we are in compliance with RFCs. 62462306a36Sopenharmony_ci * --ANK (980905) 62562306a36Sopenharmony_ci */ 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci if (!sock_owned_by_user(sk) && 62862306a36Sopenharmony_ci inet_test_bit(RECVERR, sk)) { 62962306a36Sopenharmony_ci WRITE_ONCE(sk->sk_err, err); 63062306a36Sopenharmony_ci sk_error_report(sk); 63162306a36Sopenharmony_ci } else { /* Only an error on timeout */ 63262306a36Sopenharmony_ci WRITE_ONCE(sk->sk_err_soft, err); 63362306a36Sopenharmony_ci } 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ciout: 63662306a36Sopenharmony_ci bh_unlock_sock(sk); 63762306a36Sopenharmony_ci sock_put(sk); 63862306a36Sopenharmony_ci return 0; 63962306a36Sopenharmony_ci} 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_civoid __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) 64262306a36Sopenharmony_ci{ 64362306a36Sopenharmony_ci struct tcphdr *th = tcp_hdr(skb); 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); 64662306a36Sopenharmony_ci skb->csum_start = skb_transport_header(skb) - skb->head; 64762306a36Sopenharmony_ci skb->csum_offset = offsetof(struct tcphdr, check); 64862306a36Sopenharmony_ci} 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci/* This routine computes an IPv4 TCP checksum. */ 65162306a36Sopenharmony_civoid tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) 65262306a36Sopenharmony_ci{ 65362306a36Sopenharmony_ci const struct inet_sock *inet = inet_sk(sk); 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 65662306a36Sopenharmony_ci} 65762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_send_check); 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci/* 66062306a36Sopenharmony_ci * This routine will send an RST to the other tcp. 66162306a36Sopenharmony_ci * 66262306a36Sopenharmony_ci * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 66362306a36Sopenharmony_ci * for reset. 66462306a36Sopenharmony_ci * Answer: if a packet caused RST, it is not for a socket 66562306a36Sopenharmony_ci * existing in our system, if it is matched to a socket, 66662306a36Sopenharmony_ci * it is just duplicate segment or bug in other side's TCP. 66762306a36Sopenharmony_ci * So that we build reply only basing on parameters 66862306a36Sopenharmony_ci * arrived with segment. 66962306a36Sopenharmony_ci * Exception: precedence violation. We do not implement it in any case. 67062306a36Sopenharmony_ci */ 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 67362306a36Sopenharmony_ci#define OPTION_BYTES TCPOLEN_MD5SIG_ALIGNED 67462306a36Sopenharmony_ci#else 67562306a36Sopenharmony_ci#define OPTION_BYTES sizeof(__be32) 67662306a36Sopenharmony_ci#endif 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_cistatic void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) 67962306a36Sopenharmony_ci{ 68062306a36Sopenharmony_ci const struct tcphdr *th = tcp_hdr(skb); 68162306a36Sopenharmony_ci struct { 68262306a36Sopenharmony_ci struct tcphdr th; 68362306a36Sopenharmony_ci __be32 opt[OPTION_BYTES / sizeof(__be32)]; 68462306a36Sopenharmony_ci } rep; 68562306a36Sopenharmony_ci struct ip_reply_arg arg; 68662306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 68762306a36Sopenharmony_ci struct tcp_md5sig_key *key = NULL; 68862306a36Sopenharmony_ci const __u8 *hash_location = NULL; 68962306a36Sopenharmony_ci unsigned char newhash[16]; 69062306a36Sopenharmony_ci int genhash; 69162306a36Sopenharmony_ci struct sock *sk1 = NULL; 69262306a36Sopenharmony_ci#endif 69362306a36Sopenharmony_ci u64 transmit_time = 0; 69462306a36Sopenharmony_ci struct sock *ctl_sk; 69562306a36Sopenharmony_ci struct net *net; 69662306a36Sopenharmony_ci u32 txhash = 0; 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci /* Never send a reset in response to a reset. */ 69962306a36Sopenharmony_ci if (th->rst) 70062306a36Sopenharmony_ci return; 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci /* If sk not NULL, it means we did a successful lookup and incoming 70362306a36Sopenharmony_ci * route had to be correct. prequeue might have dropped our dst. 70462306a36Sopenharmony_ci */ 70562306a36Sopenharmony_ci if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL) 70662306a36Sopenharmony_ci return; 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci /* Swap the send and the receive. */ 70962306a36Sopenharmony_ci memset(&rep, 0, sizeof(rep)); 71062306a36Sopenharmony_ci rep.th.dest = th->source; 71162306a36Sopenharmony_ci rep.th.source = th->dest; 71262306a36Sopenharmony_ci rep.th.doff = sizeof(struct tcphdr) / 4; 71362306a36Sopenharmony_ci rep.th.rst = 1; 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci if (th->ack) { 71662306a36Sopenharmony_ci rep.th.seq = th->ack_seq; 71762306a36Sopenharmony_ci } else { 71862306a36Sopenharmony_ci rep.th.ack = 1; 71962306a36Sopenharmony_ci rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 72062306a36Sopenharmony_ci skb->len - (th->doff << 2)); 72162306a36Sopenharmony_ci } 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci memset(&arg, 0, sizeof(arg)); 72462306a36Sopenharmony_ci arg.iov[0].iov_base = (unsigned char *)&rep; 72562306a36Sopenharmony_ci arg.iov[0].iov_len = sizeof(rep.th); 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 72862306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 72962306a36Sopenharmony_ci rcu_read_lock(); 73062306a36Sopenharmony_ci hash_location = tcp_parse_md5sig_option(th); 73162306a36Sopenharmony_ci if (sk && sk_fullsock(sk)) { 73262306a36Sopenharmony_ci const union tcp_md5_addr *addr; 73362306a36Sopenharmony_ci int l3index; 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci /* sdif set, means packet ingressed via a device 73662306a36Sopenharmony_ci * in an L3 domain and inet_iif is set to it. 73762306a36Sopenharmony_ci */ 73862306a36Sopenharmony_ci l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; 73962306a36Sopenharmony_ci addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; 74062306a36Sopenharmony_ci key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); 74162306a36Sopenharmony_ci } else if (hash_location) { 74262306a36Sopenharmony_ci const union tcp_md5_addr *addr; 74362306a36Sopenharmony_ci int sdif = tcp_v4_sdif(skb); 74462306a36Sopenharmony_ci int dif = inet_iif(skb); 74562306a36Sopenharmony_ci int l3index; 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci /* 74862306a36Sopenharmony_ci * active side is lost. Try to find listening socket through 74962306a36Sopenharmony_ci * source port, and then find md5 key through listening socket. 75062306a36Sopenharmony_ci * we are not loose security here: 75162306a36Sopenharmony_ci * Incoming packet is checked with md5 hash with finding key, 75262306a36Sopenharmony_ci * no RST generated if md5 hash doesn't match. 75362306a36Sopenharmony_ci */ 75462306a36Sopenharmony_ci sk1 = __inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, 75562306a36Sopenharmony_ci NULL, 0, ip_hdr(skb)->saddr, 75662306a36Sopenharmony_ci th->source, ip_hdr(skb)->daddr, 75762306a36Sopenharmony_ci ntohs(th->source), dif, sdif); 75862306a36Sopenharmony_ci /* don't send rst if it can't find key */ 75962306a36Sopenharmony_ci if (!sk1) 76062306a36Sopenharmony_ci goto out; 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci /* sdif set, means packet ingressed via a device 76362306a36Sopenharmony_ci * in an L3 domain and dif is set to it. 76462306a36Sopenharmony_ci */ 76562306a36Sopenharmony_ci l3index = sdif ? dif : 0; 76662306a36Sopenharmony_ci addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; 76762306a36Sopenharmony_ci key = tcp_md5_do_lookup(sk1, l3index, addr, AF_INET); 76862306a36Sopenharmony_ci if (!key) 76962306a36Sopenharmony_ci goto out; 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci 77262306a36Sopenharmony_ci genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); 77362306a36Sopenharmony_ci if (genhash || memcmp(hash_location, newhash, 16) != 0) 77462306a36Sopenharmony_ci goto out; 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci } 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci if (key) { 77962306a36Sopenharmony_ci rep.opt[0] = htonl((TCPOPT_NOP << 24) | 78062306a36Sopenharmony_ci (TCPOPT_NOP << 16) | 78162306a36Sopenharmony_ci (TCPOPT_MD5SIG << 8) | 78262306a36Sopenharmony_ci TCPOLEN_MD5SIG); 78362306a36Sopenharmony_ci /* Update length and the length the header thinks exists */ 78462306a36Sopenharmony_ci arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 78562306a36Sopenharmony_ci rep.th.doff = arg.iov[0].iov_len / 4; 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], 78862306a36Sopenharmony_ci key, ip_hdr(skb)->saddr, 78962306a36Sopenharmony_ci ip_hdr(skb)->daddr, &rep.th); 79062306a36Sopenharmony_ci } 79162306a36Sopenharmony_ci#endif 79262306a36Sopenharmony_ci /* Can't co-exist with TCPMD5, hence check rep.opt[0] */ 79362306a36Sopenharmony_ci if (rep.opt[0] == 0) { 79462306a36Sopenharmony_ci __be32 mrst = mptcp_reset_option(skb); 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci if (mrst) { 79762306a36Sopenharmony_ci rep.opt[0] = mrst; 79862306a36Sopenharmony_ci arg.iov[0].iov_len += sizeof(mrst); 79962306a36Sopenharmony_ci rep.th.doff = arg.iov[0].iov_len / 4; 80062306a36Sopenharmony_ci } 80162306a36Sopenharmony_ci } 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 80462306a36Sopenharmony_ci ip_hdr(skb)->saddr, /* XXX */ 80562306a36Sopenharmony_ci arg.iov[0].iov_len, IPPROTO_TCP, 0); 80662306a36Sopenharmony_ci arg.csumoffset = offsetof(struct tcphdr, check) / 2; 80762306a36Sopenharmony_ci arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0; 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci /* When socket is gone, all binding information is lost. 81062306a36Sopenharmony_ci * routing might fail in this case. No choice here, if we choose to force 81162306a36Sopenharmony_ci * input interface, we will misroute in case of asymmetric route. 81262306a36Sopenharmony_ci */ 81362306a36Sopenharmony_ci if (sk) { 81462306a36Sopenharmony_ci arg.bound_dev_if = sk->sk_bound_dev_if; 81562306a36Sopenharmony_ci if (sk_fullsock(sk)) 81662306a36Sopenharmony_ci trace_tcp_send_reset(sk, skb); 81762306a36Sopenharmony_ci } 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != 82062306a36Sopenharmony_ci offsetof(struct inet_timewait_sock, tw_bound_dev_if)); 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci arg.tos = ip_hdr(skb)->tos; 82362306a36Sopenharmony_ci arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 82462306a36Sopenharmony_ci local_bh_disable(); 82562306a36Sopenharmony_ci ctl_sk = this_cpu_read(ipv4_tcp_sk); 82662306a36Sopenharmony_ci sock_net_set(ctl_sk, net); 82762306a36Sopenharmony_ci if (sk) { 82862306a36Sopenharmony_ci ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? 82962306a36Sopenharmony_ci inet_twsk(sk)->tw_mark : sk->sk_mark; 83062306a36Sopenharmony_ci ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? 83162306a36Sopenharmony_ci inet_twsk(sk)->tw_priority : sk->sk_priority; 83262306a36Sopenharmony_ci transmit_time = tcp_transmit_time(sk); 83362306a36Sopenharmony_ci xfrm_sk_clone_policy(ctl_sk, sk); 83462306a36Sopenharmony_ci txhash = (sk->sk_state == TCP_TIME_WAIT) ? 83562306a36Sopenharmony_ci inet_twsk(sk)->tw_txhash : sk->sk_txhash; 83662306a36Sopenharmony_ci } else { 83762306a36Sopenharmony_ci ctl_sk->sk_mark = 0; 83862306a36Sopenharmony_ci ctl_sk->sk_priority = 0; 83962306a36Sopenharmony_ci } 84062306a36Sopenharmony_ci ip_send_unicast_reply(ctl_sk, 84162306a36Sopenharmony_ci skb, &TCP_SKB_CB(skb)->header.h4.opt, 84262306a36Sopenharmony_ci ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 84362306a36Sopenharmony_ci &arg, arg.iov[0].iov_len, 84462306a36Sopenharmony_ci transmit_time, txhash); 84562306a36Sopenharmony_ci 84662306a36Sopenharmony_ci xfrm_sk_free_policy(ctl_sk); 84762306a36Sopenharmony_ci sock_net_set(ctl_sk, &init_net); 84862306a36Sopenharmony_ci __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 84962306a36Sopenharmony_ci __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 85062306a36Sopenharmony_ci local_bh_enable(); 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 85362306a36Sopenharmony_ciout: 85462306a36Sopenharmony_ci rcu_read_unlock(); 85562306a36Sopenharmony_ci#endif 85662306a36Sopenharmony_ci} 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 85962306a36Sopenharmony_ci outside socket context is ugly, certainly. What can I do? 86062306a36Sopenharmony_ci */ 86162306a36Sopenharmony_ci 86262306a36Sopenharmony_cistatic void tcp_v4_send_ack(const struct sock *sk, 86362306a36Sopenharmony_ci struct sk_buff *skb, u32 seq, u32 ack, 86462306a36Sopenharmony_ci u32 win, u32 tsval, u32 tsecr, int oif, 86562306a36Sopenharmony_ci struct tcp_md5sig_key *key, 86662306a36Sopenharmony_ci int reply_flags, u8 tos, u32 txhash) 86762306a36Sopenharmony_ci{ 86862306a36Sopenharmony_ci const struct tcphdr *th = tcp_hdr(skb); 86962306a36Sopenharmony_ci struct { 87062306a36Sopenharmony_ci struct tcphdr th; 87162306a36Sopenharmony_ci __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 87262306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 87362306a36Sopenharmony_ci + (TCPOLEN_MD5SIG_ALIGNED >> 2) 87462306a36Sopenharmony_ci#endif 87562306a36Sopenharmony_ci ]; 87662306a36Sopenharmony_ci } rep; 87762306a36Sopenharmony_ci struct net *net = sock_net(sk); 87862306a36Sopenharmony_ci struct ip_reply_arg arg; 87962306a36Sopenharmony_ci struct sock *ctl_sk; 88062306a36Sopenharmony_ci u64 transmit_time; 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci memset(&rep.th, 0, sizeof(struct tcphdr)); 88362306a36Sopenharmony_ci memset(&arg, 0, sizeof(arg)); 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci arg.iov[0].iov_base = (unsigned char *)&rep; 88662306a36Sopenharmony_ci arg.iov[0].iov_len = sizeof(rep.th); 88762306a36Sopenharmony_ci if (tsecr) { 88862306a36Sopenharmony_ci rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 88962306a36Sopenharmony_ci (TCPOPT_TIMESTAMP << 8) | 89062306a36Sopenharmony_ci TCPOLEN_TIMESTAMP); 89162306a36Sopenharmony_ci rep.opt[1] = htonl(tsval); 89262306a36Sopenharmony_ci rep.opt[2] = htonl(tsecr); 89362306a36Sopenharmony_ci arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 89462306a36Sopenharmony_ci } 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci /* Swap the send and the receive. */ 89762306a36Sopenharmony_ci rep.th.dest = th->source; 89862306a36Sopenharmony_ci rep.th.source = th->dest; 89962306a36Sopenharmony_ci rep.th.doff = arg.iov[0].iov_len / 4; 90062306a36Sopenharmony_ci rep.th.seq = htonl(seq); 90162306a36Sopenharmony_ci rep.th.ack_seq = htonl(ack); 90262306a36Sopenharmony_ci rep.th.ack = 1; 90362306a36Sopenharmony_ci rep.th.window = htons(win); 90462306a36Sopenharmony_ci 90562306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 90662306a36Sopenharmony_ci if (key) { 90762306a36Sopenharmony_ci int offset = (tsecr) ? 3 : 0; 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 91062306a36Sopenharmony_ci (TCPOPT_NOP << 16) | 91162306a36Sopenharmony_ci (TCPOPT_MD5SIG << 8) | 91262306a36Sopenharmony_ci TCPOLEN_MD5SIG); 91362306a36Sopenharmony_ci arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 91462306a36Sopenharmony_ci rep.th.doff = arg.iov[0].iov_len/4; 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], 91762306a36Sopenharmony_ci key, ip_hdr(skb)->saddr, 91862306a36Sopenharmony_ci ip_hdr(skb)->daddr, &rep.th); 91962306a36Sopenharmony_ci } 92062306a36Sopenharmony_ci#endif 92162306a36Sopenharmony_ci arg.flags = reply_flags; 92262306a36Sopenharmony_ci arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 92362306a36Sopenharmony_ci ip_hdr(skb)->saddr, /* XXX */ 92462306a36Sopenharmony_ci arg.iov[0].iov_len, IPPROTO_TCP, 0); 92562306a36Sopenharmony_ci arg.csumoffset = offsetof(struct tcphdr, check) / 2; 92662306a36Sopenharmony_ci if (oif) 92762306a36Sopenharmony_ci arg.bound_dev_if = oif; 92862306a36Sopenharmony_ci arg.tos = tos; 92962306a36Sopenharmony_ci arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); 93062306a36Sopenharmony_ci local_bh_disable(); 93162306a36Sopenharmony_ci ctl_sk = this_cpu_read(ipv4_tcp_sk); 93262306a36Sopenharmony_ci sock_net_set(ctl_sk, net); 93362306a36Sopenharmony_ci ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? 93462306a36Sopenharmony_ci inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark); 93562306a36Sopenharmony_ci ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? 93662306a36Sopenharmony_ci inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); 93762306a36Sopenharmony_ci transmit_time = tcp_transmit_time(sk); 93862306a36Sopenharmony_ci ip_send_unicast_reply(ctl_sk, 93962306a36Sopenharmony_ci skb, &TCP_SKB_CB(skb)->header.h4.opt, 94062306a36Sopenharmony_ci ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 94162306a36Sopenharmony_ci &arg, arg.iov[0].iov_len, 94262306a36Sopenharmony_ci transmit_time, txhash); 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ci sock_net_set(ctl_sk, &init_net); 94562306a36Sopenharmony_ci __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 94662306a36Sopenharmony_ci local_bh_enable(); 94762306a36Sopenharmony_ci} 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_cistatic void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 95062306a36Sopenharmony_ci{ 95162306a36Sopenharmony_ci struct inet_timewait_sock *tw = inet_twsk(sk); 95262306a36Sopenharmony_ci struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci tcp_v4_send_ack(sk, skb, 95562306a36Sopenharmony_ci tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 95662306a36Sopenharmony_ci tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 95762306a36Sopenharmony_ci tcp_time_stamp_raw() + tcptw->tw_ts_offset, 95862306a36Sopenharmony_ci tcptw->tw_ts_recent, 95962306a36Sopenharmony_ci tw->tw_bound_dev_if, 96062306a36Sopenharmony_ci tcp_twsk_md5_key(tcptw), 96162306a36Sopenharmony_ci tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, 96262306a36Sopenharmony_ci tw->tw_tos, 96362306a36Sopenharmony_ci tw->tw_txhash 96462306a36Sopenharmony_ci ); 96562306a36Sopenharmony_ci 96662306a36Sopenharmony_ci inet_twsk_put(tw); 96762306a36Sopenharmony_ci} 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_cistatic void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 97062306a36Sopenharmony_ci struct request_sock *req) 97162306a36Sopenharmony_ci{ 97262306a36Sopenharmony_ci const union tcp_md5_addr *addr; 97362306a36Sopenharmony_ci int l3index; 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 97662306a36Sopenharmony_ci * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 97762306a36Sopenharmony_ci */ 97862306a36Sopenharmony_ci u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : 97962306a36Sopenharmony_ci tcp_sk(sk)->snd_nxt; 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci /* RFC 7323 2.3 98262306a36Sopenharmony_ci * The window field (SEG.WND) of every outgoing segment, with the 98362306a36Sopenharmony_ci * exception of <SYN> segments, MUST be right-shifted by 98462306a36Sopenharmony_ci * Rcv.Wind.Shift bits: 98562306a36Sopenharmony_ci */ 98662306a36Sopenharmony_ci addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; 98762306a36Sopenharmony_ci l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; 98862306a36Sopenharmony_ci tcp_v4_send_ack(sk, skb, seq, 98962306a36Sopenharmony_ci tcp_rsk(req)->rcv_nxt, 99062306a36Sopenharmony_ci req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 99162306a36Sopenharmony_ci tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 99262306a36Sopenharmony_ci READ_ONCE(req->ts_recent), 99362306a36Sopenharmony_ci 0, 99462306a36Sopenharmony_ci tcp_md5_do_lookup(sk, l3index, addr, AF_INET), 99562306a36Sopenharmony_ci inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, 99662306a36Sopenharmony_ci ip_hdr(skb)->tos, 99762306a36Sopenharmony_ci READ_ONCE(tcp_rsk(req)->txhash)); 99862306a36Sopenharmony_ci} 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_ci/* 100162306a36Sopenharmony_ci * Send a SYN-ACK after having received a SYN. 100262306a36Sopenharmony_ci * This still operates on a request_sock only, not on a big 100362306a36Sopenharmony_ci * socket. 100462306a36Sopenharmony_ci */ 100562306a36Sopenharmony_cistatic int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, 100662306a36Sopenharmony_ci struct flowi *fl, 100762306a36Sopenharmony_ci struct request_sock *req, 100862306a36Sopenharmony_ci struct tcp_fastopen_cookie *foc, 100962306a36Sopenharmony_ci enum tcp_synack_type synack_type, 101062306a36Sopenharmony_ci struct sk_buff *syn_skb) 101162306a36Sopenharmony_ci{ 101262306a36Sopenharmony_ci const struct inet_request_sock *ireq = inet_rsk(req); 101362306a36Sopenharmony_ci struct flowi4 fl4; 101462306a36Sopenharmony_ci int err = -1; 101562306a36Sopenharmony_ci struct sk_buff *skb; 101662306a36Sopenharmony_ci u8 tos; 101762306a36Sopenharmony_ci 101862306a36Sopenharmony_ci /* First, grab a route. */ 101962306a36Sopenharmony_ci if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 102062306a36Sopenharmony_ci return -1; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci if (skb) { 102562306a36Sopenharmony_ci __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ci tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 102862306a36Sopenharmony_ci (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 102962306a36Sopenharmony_ci (inet_sk(sk)->tos & INET_ECN_MASK) : 103062306a36Sopenharmony_ci inet_sk(sk)->tos; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci if (!INET_ECN_is_capable(tos) && 103362306a36Sopenharmony_ci tcp_bpf_ca_needs_ecn((struct sock *)req)) 103462306a36Sopenharmony_ci tos |= INET_ECN_ECT_0; 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci rcu_read_lock(); 103762306a36Sopenharmony_ci err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, 103862306a36Sopenharmony_ci ireq->ir_rmt_addr, 103962306a36Sopenharmony_ci rcu_dereference(ireq->ireq_opt), 104062306a36Sopenharmony_ci tos); 104162306a36Sopenharmony_ci rcu_read_unlock(); 104262306a36Sopenharmony_ci err = net_xmit_eval(err); 104362306a36Sopenharmony_ci } 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci return err; 104662306a36Sopenharmony_ci} 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_ci/* 104962306a36Sopenharmony_ci * IPv4 request_sock destructor. 105062306a36Sopenharmony_ci */ 105162306a36Sopenharmony_cistatic void tcp_v4_reqsk_destructor(struct request_sock *req) 105262306a36Sopenharmony_ci{ 105362306a36Sopenharmony_ci kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); 105462306a36Sopenharmony_ci} 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 105762306a36Sopenharmony_ci/* 105862306a36Sopenharmony_ci * RFC2385 MD5 checksumming requires a mapping of 105962306a36Sopenharmony_ci * IP address->MD5 Key. 106062306a36Sopenharmony_ci * We need to maintain these in the sk structure. 106162306a36Sopenharmony_ci */ 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ciDEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_md5_needed, HZ); 106462306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_needed); 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_cistatic bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new) 106762306a36Sopenharmony_ci{ 106862306a36Sopenharmony_ci if (!old) 106962306a36Sopenharmony_ci return true; 107062306a36Sopenharmony_ci 107162306a36Sopenharmony_ci /* l3index always overrides non-l3index */ 107262306a36Sopenharmony_ci if (old->l3index && new->l3index == 0) 107362306a36Sopenharmony_ci return false; 107462306a36Sopenharmony_ci if (old->l3index == 0 && new->l3index) 107562306a36Sopenharmony_ci return true; 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci return old->prefixlen < new->prefixlen; 107862306a36Sopenharmony_ci} 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci/* Find the Key structure for an address. */ 108162306a36Sopenharmony_cistruct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, 108262306a36Sopenharmony_ci const union tcp_md5_addr *addr, 108362306a36Sopenharmony_ci int family) 108462306a36Sopenharmony_ci{ 108562306a36Sopenharmony_ci const struct tcp_sock *tp = tcp_sk(sk); 108662306a36Sopenharmony_ci struct tcp_md5sig_key *key; 108762306a36Sopenharmony_ci const struct tcp_md5sig_info *md5sig; 108862306a36Sopenharmony_ci __be32 mask; 108962306a36Sopenharmony_ci struct tcp_md5sig_key *best_match = NULL; 109062306a36Sopenharmony_ci bool match; 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_ci /* caller either holds rcu_read_lock() or socket lock */ 109362306a36Sopenharmony_ci md5sig = rcu_dereference_check(tp->md5sig_info, 109462306a36Sopenharmony_ci lockdep_sock_is_held(sk)); 109562306a36Sopenharmony_ci if (!md5sig) 109662306a36Sopenharmony_ci return NULL; 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci hlist_for_each_entry_rcu(key, &md5sig->head, node, 109962306a36Sopenharmony_ci lockdep_sock_is_held(sk)) { 110062306a36Sopenharmony_ci if (key->family != family) 110162306a36Sopenharmony_ci continue; 110262306a36Sopenharmony_ci if (key->flags & TCP_MD5SIG_FLAG_IFINDEX && key->l3index != l3index) 110362306a36Sopenharmony_ci continue; 110462306a36Sopenharmony_ci if (family == AF_INET) { 110562306a36Sopenharmony_ci mask = inet_make_mask(key->prefixlen); 110662306a36Sopenharmony_ci match = (key->addr.a4.s_addr & mask) == 110762306a36Sopenharmony_ci (addr->a4.s_addr & mask); 110862306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 110962306a36Sopenharmony_ci } else if (family == AF_INET6) { 111062306a36Sopenharmony_ci match = ipv6_prefix_equal(&key->addr.a6, &addr->a6, 111162306a36Sopenharmony_ci key->prefixlen); 111262306a36Sopenharmony_ci#endif 111362306a36Sopenharmony_ci } else { 111462306a36Sopenharmony_ci match = false; 111562306a36Sopenharmony_ci } 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci if (match && better_md5_match(best_match, key)) 111862306a36Sopenharmony_ci best_match = key; 111962306a36Sopenharmony_ci } 112062306a36Sopenharmony_ci return best_match; 112162306a36Sopenharmony_ci} 112262306a36Sopenharmony_ciEXPORT_SYMBOL(__tcp_md5_do_lookup); 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_cistatic struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, 112562306a36Sopenharmony_ci const union tcp_md5_addr *addr, 112662306a36Sopenharmony_ci int family, u8 prefixlen, 112762306a36Sopenharmony_ci int l3index, u8 flags) 112862306a36Sopenharmony_ci{ 112962306a36Sopenharmony_ci const struct tcp_sock *tp = tcp_sk(sk); 113062306a36Sopenharmony_ci struct tcp_md5sig_key *key; 113162306a36Sopenharmony_ci unsigned int size = sizeof(struct in_addr); 113262306a36Sopenharmony_ci const struct tcp_md5sig_info *md5sig; 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_ci /* caller either holds rcu_read_lock() or socket lock */ 113562306a36Sopenharmony_ci md5sig = rcu_dereference_check(tp->md5sig_info, 113662306a36Sopenharmony_ci lockdep_sock_is_held(sk)); 113762306a36Sopenharmony_ci if (!md5sig) 113862306a36Sopenharmony_ci return NULL; 113962306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 114062306a36Sopenharmony_ci if (family == AF_INET6) 114162306a36Sopenharmony_ci size = sizeof(struct in6_addr); 114262306a36Sopenharmony_ci#endif 114362306a36Sopenharmony_ci hlist_for_each_entry_rcu(key, &md5sig->head, node, 114462306a36Sopenharmony_ci lockdep_sock_is_held(sk)) { 114562306a36Sopenharmony_ci if (key->family != family) 114662306a36Sopenharmony_ci continue; 114762306a36Sopenharmony_ci if ((key->flags & TCP_MD5SIG_FLAG_IFINDEX) != (flags & TCP_MD5SIG_FLAG_IFINDEX)) 114862306a36Sopenharmony_ci continue; 114962306a36Sopenharmony_ci if (key->l3index != l3index) 115062306a36Sopenharmony_ci continue; 115162306a36Sopenharmony_ci if (!memcmp(&key->addr, addr, size) && 115262306a36Sopenharmony_ci key->prefixlen == prefixlen) 115362306a36Sopenharmony_ci return key; 115462306a36Sopenharmony_ci } 115562306a36Sopenharmony_ci return NULL; 115662306a36Sopenharmony_ci} 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_cistruct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, 115962306a36Sopenharmony_ci const struct sock *addr_sk) 116062306a36Sopenharmony_ci{ 116162306a36Sopenharmony_ci const union tcp_md5_addr *addr; 116262306a36Sopenharmony_ci int l3index; 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 116562306a36Sopenharmony_ci addr_sk->sk_bound_dev_if); 116662306a36Sopenharmony_ci addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; 116762306a36Sopenharmony_ci return tcp_md5_do_lookup(sk, l3index, addr, AF_INET); 116862306a36Sopenharmony_ci} 116962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_md5_lookup); 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_cistatic int tcp_md5sig_info_add(struct sock *sk, gfp_t gfp) 117262306a36Sopenharmony_ci{ 117362306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 117462306a36Sopenharmony_ci struct tcp_md5sig_info *md5sig; 117562306a36Sopenharmony_ci 117662306a36Sopenharmony_ci md5sig = kmalloc(sizeof(*md5sig), gfp); 117762306a36Sopenharmony_ci if (!md5sig) 117862306a36Sopenharmony_ci return -ENOMEM; 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_ci sk_gso_disable(sk); 118162306a36Sopenharmony_ci INIT_HLIST_HEAD(&md5sig->head); 118262306a36Sopenharmony_ci rcu_assign_pointer(tp->md5sig_info, md5sig); 118362306a36Sopenharmony_ci return 0; 118462306a36Sopenharmony_ci} 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_ci/* This can be called on a newly created socket, from other files */ 118762306a36Sopenharmony_cistatic int __tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, 118862306a36Sopenharmony_ci int family, u8 prefixlen, int l3index, u8 flags, 118962306a36Sopenharmony_ci const u8 *newkey, u8 newkeylen, gfp_t gfp) 119062306a36Sopenharmony_ci{ 119162306a36Sopenharmony_ci /* Add Key to the list */ 119262306a36Sopenharmony_ci struct tcp_md5sig_key *key; 119362306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 119462306a36Sopenharmony_ci struct tcp_md5sig_info *md5sig; 119562306a36Sopenharmony_ci 119662306a36Sopenharmony_ci key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags); 119762306a36Sopenharmony_ci if (key) { 119862306a36Sopenharmony_ci /* Pre-existing entry - just update that one. 119962306a36Sopenharmony_ci * Note that the key might be used concurrently. 120062306a36Sopenharmony_ci * data_race() is telling kcsan that we do not care of 120162306a36Sopenharmony_ci * key mismatches, since changing MD5 key on live flows 120262306a36Sopenharmony_ci * can lead to packet drops. 120362306a36Sopenharmony_ci */ 120462306a36Sopenharmony_ci data_race(memcpy(key->key, newkey, newkeylen)); 120562306a36Sopenharmony_ci 120662306a36Sopenharmony_ci /* Pairs with READ_ONCE() in tcp_md5_hash_key(). 120762306a36Sopenharmony_ci * Also note that a reader could catch new key->keylen value 120862306a36Sopenharmony_ci * but old key->key[], this is the reason we use __GFP_ZERO 120962306a36Sopenharmony_ci * at sock_kmalloc() time below these lines. 121062306a36Sopenharmony_ci */ 121162306a36Sopenharmony_ci WRITE_ONCE(key->keylen, newkeylen); 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_ci return 0; 121462306a36Sopenharmony_ci } 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci md5sig = rcu_dereference_protected(tp->md5sig_info, 121762306a36Sopenharmony_ci lockdep_sock_is_held(sk)); 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_ci key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO); 122062306a36Sopenharmony_ci if (!key) 122162306a36Sopenharmony_ci return -ENOMEM; 122262306a36Sopenharmony_ci if (!tcp_alloc_md5sig_pool()) { 122362306a36Sopenharmony_ci sock_kfree_s(sk, key, sizeof(*key)); 122462306a36Sopenharmony_ci return -ENOMEM; 122562306a36Sopenharmony_ci } 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci memcpy(key->key, newkey, newkeylen); 122862306a36Sopenharmony_ci key->keylen = newkeylen; 122962306a36Sopenharmony_ci key->family = family; 123062306a36Sopenharmony_ci key->prefixlen = prefixlen; 123162306a36Sopenharmony_ci key->l3index = l3index; 123262306a36Sopenharmony_ci key->flags = flags; 123362306a36Sopenharmony_ci memcpy(&key->addr, addr, 123462306a36Sopenharmony_ci (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) ? sizeof(struct in6_addr) : 123562306a36Sopenharmony_ci sizeof(struct in_addr)); 123662306a36Sopenharmony_ci hlist_add_head_rcu(&key->node, &md5sig->head); 123762306a36Sopenharmony_ci return 0; 123862306a36Sopenharmony_ci} 123962306a36Sopenharmony_ci 124062306a36Sopenharmony_ciint tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, 124162306a36Sopenharmony_ci int family, u8 prefixlen, int l3index, u8 flags, 124262306a36Sopenharmony_ci const u8 *newkey, u8 newkeylen) 124362306a36Sopenharmony_ci{ 124462306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { 124762306a36Sopenharmony_ci if (tcp_md5sig_info_add(sk, GFP_KERNEL)) 124862306a36Sopenharmony_ci return -ENOMEM; 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_ci if (!static_branch_inc(&tcp_md5_needed.key)) { 125162306a36Sopenharmony_ci struct tcp_md5sig_info *md5sig; 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_ci md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk)); 125462306a36Sopenharmony_ci rcu_assign_pointer(tp->md5sig_info, NULL); 125562306a36Sopenharmony_ci kfree_rcu(md5sig, rcu); 125662306a36Sopenharmony_ci return -EUSERS; 125762306a36Sopenharmony_ci } 125862306a36Sopenharmony_ci } 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, flags, 126162306a36Sopenharmony_ci newkey, newkeylen, GFP_KERNEL); 126262306a36Sopenharmony_ci} 126362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_do_add); 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ciint tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, 126662306a36Sopenharmony_ci int family, u8 prefixlen, int l3index, 126762306a36Sopenharmony_ci struct tcp_md5sig_key *key) 126862306a36Sopenharmony_ci{ 126962306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ci if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { 127262306a36Sopenharmony_ci if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) 127362306a36Sopenharmony_ci return -ENOMEM; 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_ci if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) { 127662306a36Sopenharmony_ci struct tcp_md5sig_info *md5sig; 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk)); 127962306a36Sopenharmony_ci net_warn_ratelimited("Too many TCP-MD5 keys in the system\n"); 128062306a36Sopenharmony_ci rcu_assign_pointer(tp->md5sig_info, NULL); 128162306a36Sopenharmony_ci kfree_rcu(md5sig, rcu); 128262306a36Sopenharmony_ci return -EUSERS; 128362306a36Sopenharmony_ci } 128462306a36Sopenharmony_ci } 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_ci return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, 128762306a36Sopenharmony_ci key->flags, key->key, key->keylen, 128862306a36Sopenharmony_ci sk_gfp_mask(sk, GFP_ATOMIC)); 128962306a36Sopenharmony_ci} 129062306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_key_copy); 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ciint tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, 129362306a36Sopenharmony_ci u8 prefixlen, int l3index, u8 flags) 129462306a36Sopenharmony_ci{ 129562306a36Sopenharmony_ci struct tcp_md5sig_key *key; 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ci key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags); 129862306a36Sopenharmony_ci if (!key) 129962306a36Sopenharmony_ci return -ENOENT; 130062306a36Sopenharmony_ci hlist_del_rcu(&key->node); 130162306a36Sopenharmony_ci atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 130262306a36Sopenharmony_ci kfree_rcu(key, rcu); 130362306a36Sopenharmony_ci return 0; 130462306a36Sopenharmony_ci} 130562306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_do_del); 130662306a36Sopenharmony_ci 130762306a36Sopenharmony_cistatic void tcp_clear_md5_list(struct sock *sk) 130862306a36Sopenharmony_ci{ 130962306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 131062306a36Sopenharmony_ci struct tcp_md5sig_key *key; 131162306a36Sopenharmony_ci struct hlist_node *n; 131262306a36Sopenharmony_ci struct tcp_md5sig_info *md5sig; 131362306a36Sopenharmony_ci 131462306a36Sopenharmony_ci md5sig = rcu_dereference_protected(tp->md5sig_info, 1); 131562306a36Sopenharmony_ci 131662306a36Sopenharmony_ci hlist_for_each_entry_safe(key, n, &md5sig->head, node) { 131762306a36Sopenharmony_ci hlist_del_rcu(&key->node); 131862306a36Sopenharmony_ci atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 131962306a36Sopenharmony_ci kfree_rcu(key, rcu); 132062306a36Sopenharmony_ci } 132162306a36Sopenharmony_ci} 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_cistatic int tcp_v4_parse_md5_keys(struct sock *sk, int optname, 132462306a36Sopenharmony_ci sockptr_t optval, int optlen) 132562306a36Sopenharmony_ci{ 132662306a36Sopenharmony_ci struct tcp_md5sig cmd; 132762306a36Sopenharmony_ci struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; 132862306a36Sopenharmony_ci const union tcp_md5_addr *addr; 132962306a36Sopenharmony_ci u8 prefixlen = 32; 133062306a36Sopenharmony_ci int l3index = 0; 133162306a36Sopenharmony_ci u8 flags; 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci if (optlen < sizeof(cmd)) 133462306a36Sopenharmony_ci return -EINVAL; 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_ci if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 133762306a36Sopenharmony_ci return -EFAULT; 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_ci if (sin->sin_family != AF_INET) 134062306a36Sopenharmony_ci return -EINVAL; 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci if (optname == TCP_MD5SIG_EXT && 134562306a36Sopenharmony_ci cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 134662306a36Sopenharmony_ci prefixlen = cmd.tcpm_prefixlen; 134762306a36Sopenharmony_ci if (prefixlen > 32) 134862306a36Sopenharmony_ci return -EINVAL; 134962306a36Sopenharmony_ci } 135062306a36Sopenharmony_ci 135162306a36Sopenharmony_ci if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 135262306a36Sopenharmony_ci cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 135362306a36Sopenharmony_ci struct net_device *dev; 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_ci rcu_read_lock(); 135662306a36Sopenharmony_ci dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 135762306a36Sopenharmony_ci if (dev && netif_is_l3_master(dev)) 135862306a36Sopenharmony_ci l3index = dev->ifindex; 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci rcu_read_unlock(); 136162306a36Sopenharmony_ci 136262306a36Sopenharmony_ci /* ok to reference set/not set outside of rcu; 136362306a36Sopenharmony_ci * right now device MUST be an L3 master 136462306a36Sopenharmony_ci */ 136562306a36Sopenharmony_ci if (!dev || !l3index) 136662306a36Sopenharmony_ci return -EINVAL; 136762306a36Sopenharmony_ci } 136862306a36Sopenharmony_ci 136962306a36Sopenharmony_ci addr = (union tcp_md5_addr *)&sin->sin_addr.s_addr; 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci if (!cmd.tcpm_keylen) 137262306a36Sopenharmony_ci return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index, flags); 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_ci if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 137562306a36Sopenharmony_ci return -EINVAL; 137662306a36Sopenharmony_ci 137762306a36Sopenharmony_ci return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, 137862306a36Sopenharmony_ci cmd.tcpm_key, cmd.tcpm_keylen); 137962306a36Sopenharmony_ci} 138062306a36Sopenharmony_ci 138162306a36Sopenharmony_cistatic int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, 138262306a36Sopenharmony_ci __be32 daddr, __be32 saddr, 138362306a36Sopenharmony_ci const struct tcphdr *th, int nbytes) 138462306a36Sopenharmony_ci{ 138562306a36Sopenharmony_ci struct tcp4_pseudohdr *bp; 138662306a36Sopenharmony_ci struct scatterlist sg; 138762306a36Sopenharmony_ci struct tcphdr *_th; 138862306a36Sopenharmony_ci 138962306a36Sopenharmony_ci bp = hp->scratch; 139062306a36Sopenharmony_ci bp->saddr = saddr; 139162306a36Sopenharmony_ci bp->daddr = daddr; 139262306a36Sopenharmony_ci bp->pad = 0; 139362306a36Sopenharmony_ci bp->protocol = IPPROTO_TCP; 139462306a36Sopenharmony_ci bp->len = cpu_to_be16(nbytes); 139562306a36Sopenharmony_ci 139662306a36Sopenharmony_ci _th = (struct tcphdr *)(bp + 1); 139762306a36Sopenharmony_ci memcpy(_th, th, sizeof(*th)); 139862306a36Sopenharmony_ci _th->check = 0; 139962306a36Sopenharmony_ci 140062306a36Sopenharmony_ci sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 140162306a36Sopenharmony_ci ahash_request_set_crypt(hp->md5_req, &sg, NULL, 140262306a36Sopenharmony_ci sizeof(*bp) + sizeof(*th)); 140362306a36Sopenharmony_ci return crypto_ahash_update(hp->md5_req); 140462306a36Sopenharmony_ci} 140562306a36Sopenharmony_ci 140662306a36Sopenharmony_cistatic int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 140762306a36Sopenharmony_ci __be32 daddr, __be32 saddr, const struct tcphdr *th) 140862306a36Sopenharmony_ci{ 140962306a36Sopenharmony_ci struct tcp_md5sig_pool *hp; 141062306a36Sopenharmony_ci struct ahash_request *req; 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci hp = tcp_get_md5sig_pool(); 141362306a36Sopenharmony_ci if (!hp) 141462306a36Sopenharmony_ci goto clear_hash_noput; 141562306a36Sopenharmony_ci req = hp->md5_req; 141662306a36Sopenharmony_ci 141762306a36Sopenharmony_ci if (crypto_ahash_init(req)) 141862306a36Sopenharmony_ci goto clear_hash; 141962306a36Sopenharmony_ci if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 142062306a36Sopenharmony_ci goto clear_hash; 142162306a36Sopenharmony_ci if (tcp_md5_hash_key(hp, key)) 142262306a36Sopenharmony_ci goto clear_hash; 142362306a36Sopenharmony_ci ahash_request_set_crypt(req, NULL, md5_hash, 0); 142462306a36Sopenharmony_ci if (crypto_ahash_final(req)) 142562306a36Sopenharmony_ci goto clear_hash; 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_ci tcp_put_md5sig_pool(); 142862306a36Sopenharmony_ci return 0; 142962306a36Sopenharmony_ci 143062306a36Sopenharmony_ciclear_hash: 143162306a36Sopenharmony_ci tcp_put_md5sig_pool(); 143262306a36Sopenharmony_ciclear_hash_noput: 143362306a36Sopenharmony_ci memset(md5_hash, 0, 16); 143462306a36Sopenharmony_ci return 1; 143562306a36Sopenharmony_ci} 143662306a36Sopenharmony_ci 143762306a36Sopenharmony_ciint tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 143862306a36Sopenharmony_ci const struct sock *sk, 143962306a36Sopenharmony_ci const struct sk_buff *skb) 144062306a36Sopenharmony_ci{ 144162306a36Sopenharmony_ci struct tcp_md5sig_pool *hp; 144262306a36Sopenharmony_ci struct ahash_request *req; 144362306a36Sopenharmony_ci const struct tcphdr *th = tcp_hdr(skb); 144462306a36Sopenharmony_ci __be32 saddr, daddr; 144562306a36Sopenharmony_ci 144662306a36Sopenharmony_ci if (sk) { /* valid for establish/request sockets */ 144762306a36Sopenharmony_ci saddr = sk->sk_rcv_saddr; 144862306a36Sopenharmony_ci daddr = sk->sk_daddr; 144962306a36Sopenharmony_ci } else { 145062306a36Sopenharmony_ci const struct iphdr *iph = ip_hdr(skb); 145162306a36Sopenharmony_ci saddr = iph->saddr; 145262306a36Sopenharmony_ci daddr = iph->daddr; 145362306a36Sopenharmony_ci } 145462306a36Sopenharmony_ci 145562306a36Sopenharmony_ci hp = tcp_get_md5sig_pool(); 145662306a36Sopenharmony_ci if (!hp) 145762306a36Sopenharmony_ci goto clear_hash_noput; 145862306a36Sopenharmony_ci req = hp->md5_req; 145962306a36Sopenharmony_ci 146062306a36Sopenharmony_ci if (crypto_ahash_init(req)) 146162306a36Sopenharmony_ci goto clear_hash; 146262306a36Sopenharmony_ci 146362306a36Sopenharmony_ci if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 146462306a36Sopenharmony_ci goto clear_hash; 146562306a36Sopenharmony_ci if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 146662306a36Sopenharmony_ci goto clear_hash; 146762306a36Sopenharmony_ci if (tcp_md5_hash_key(hp, key)) 146862306a36Sopenharmony_ci goto clear_hash; 146962306a36Sopenharmony_ci ahash_request_set_crypt(req, NULL, md5_hash, 0); 147062306a36Sopenharmony_ci if (crypto_ahash_final(req)) 147162306a36Sopenharmony_ci goto clear_hash; 147262306a36Sopenharmony_ci 147362306a36Sopenharmony_ci tcp_put_md5sig_pool(); 147462306a36Sopenharmony_ci return 0; 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_ciclear_hash: 147762306a36Sopenharmony_ci tcp_put_md5sig_pool(); 147862306a36Sopenharmony_ciclear_hash_noput: 147962306a36Sopenharmony_ci memset(md5_hash, 0, 16); 148062306a36Sopenharmony_ci return 1; 148162306a36Sopenharmony_ci} 148262306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_md5_hash_skb); 148362306a36Sopenharmony_ci 148462306a36Sopenharmony_ci#endif 148562306a36Sopenharmony_ci 148662306a36Sopenharmony_cistatic void tcp_v4_init_req(struct request_sock *req, 148762306a36Sopenharmony_ci const struct sock *sk_listener, 148862306a36Sopenharmony_ci struct sk_buff *skb) 148962306a36Sopenharmony_ci{ 149062306a36Sopenharmony_ci struct inet_request_sock *ireq = inet_rsk(req); 149162306a36Sopenharmony_ci struct net *net = sock_net(sk_listener); 149262306a36Sopenharmony_ci 149362306a36Sopenharmony_ci sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); 149462306a36Sopenharmony_ci sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); 149562306a36Sopenharmony_ci RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); 149662306a36Sopenharmony_ci} 149762306a36Sopenharmony_ci 149862306a36Sopenharmony_cistatic struct dst_entry *tcp_v4_route_req(const struct sock *sk, 149962306a36Sopenharmony_ci struct sk_buff *skb, 150062306a36Sopenharmony_ci struct flowi *fl, 150162306a36Sopenharmony_ci struct request_sock *req) 150262306a36Sopenharmony_ci{ 150362306a36Sopenharmony_ci tcp_v4_init_req(req, sk, skb); 150462306a36Sopenharmony_ci 150562306a36Sopenharmony_ci if (security_inet_conn_request(sk, skb, req)) 150662306a36Sopenharmony_ci return NULL; 150762306a36Sopenharmony_ci 150862306a36Sopenharmony_ci return inet_csk_route_req(sk, &fl->u.ip4, req); 150962306a36Sopenharmony_ci} 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_cistruct request_sock_ops tcp_request_sock_ops __read_mostly = { 151262306a36Sopenharmony_ci .family = PF_INET, 151362306a36Sopenharmony_ci .obj_size = sizeof(struct tcp_request_sock), 151462306a36Sopenharmony_ci .rtx_syn_ack = tcp_rtx_synack, 151562306a36Sopenharmony_ci .send_ack = tcp_v4_reqsk_send_ack, 151662306a36Sopenharmony_ci .destructor = tcp_v4_reqsk_destructor, 151762306a36Sopenharmony_ci .send_reset = tcp_v4_send_reset, 151862306a36Sopenharmony_ci .syn_ack_timeout = tcp_syn_ack_timeout, 151962306a36Sopenharmony_ci}; 152062306a36Sopenharmony_ci 152162306a36Sopenharmony_ciconst struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 152262306a36Sopenharmony_ci .mss_clamp = TCP_MSS_DEFAULT, 152362306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 152462306a36Sopenharmony_ci .req_md5_lookup = tcp_v4_md5_lookup, 152562306a36Sopenharmony_ci .calc_md5_hash = tcp_v4_md5_hash_skb, 152662306a36Sopenharmony_ci#endif 152762306a36Sopenharmony_ci#ifdef CONFIG_SYN_COOKIES 152862306a36Sopenharmony_ci .cookie_init_seq = cookie_v4_init_sequence, 152962306a36Sopenharmony_ci#endif 153062306a36Sopenharmony_ci .route_req = tcp_v4_route_req, 153162306a36Sopenharmony_ci .init_seq = tcp_v4_init_seq, 153262306a36Sopenharmony_ci .init_ts_off = tcp_v4_init_ts_off, 153362306a36Sopenharmony_ci .send_synack = tcp_v4_send_synack, 153462306a36Sopenharmony_ci}; 153562306a36Sopenharmony_ci 153662306a36Sopenharmony_ciint tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 153762306a36Sopenharmony_ci{ 153862306a36Sopenharmony_ci /* Never answer to SYNs send to broadcast or multicast */ 153962306a36Sopenharmony_ci if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 154062306a36Sopenharmony_ci goto drop; 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci return tcp_conn_request(&tcp_request_sock_ops, 154362306a36Sopenharmony_ci &tcp_request_sock_ipv4_ops, sk, skb); 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_cidrop: 154662306a36Sopenharmony_ci tcp_listendrop(sk); 154762306a36Sopenharmony_ci return 0; 154862306a36Sopenharmony_ci} 154962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_conn_request); 155062306a36Sopenharmony_ci 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci/* 155362306a36Sopenharmony_ci * The three way handshake has completed - we got a valid synack - 155462306a36Sopenharmony_ci * now create the new socket. 155562306a36Sopenharmony_ci */ 155662306a36Sopenharmony_cistruct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 155762306a36Sopenharmony_ci struct request_sock *req, 155862306a36Sopenharmony_ci struct dst_entry *dst, 155962306a36Sopenharmony_ci struct request_sock *req_unhash, 156062306a36Sopenharmony_ci bool *own_req) 156162306a36Sopenharmony_ci{ 156262306a36Sopenharmony_ci struct inet_request_sock *ireq; 156362306a36Sopenharmony_ci bool found_dup_sk = false; 156462306a36Sopenharmony_ci struct inet_sock *newinet; 156562306a36Sopenharmony_ci struct tcp_sock *newtp; 156662306a36Sopenharmony_ci struct sock *newsk; 156762306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 156862306a36Sopenharmony_ci const union tcp_md5_addr *addr; 156962306a36Sopenharmony_ci struct tcp_md5sig_key *key; 157062306a36Sopenharmony_ci int l3index; 157162306a36Sopenharmony_ci#endif 157262306a36Sopenharmony_ci struct ip_options_rcu *inet_opt; 157362306a36Sopenharmony_ci 157462306a36Sopenharmony_ci if (sk_acceptq_is_full(sk)) 157562306a36Sopenharmony_ci goto exit_overflow; 157662306a36Sopenharmony_ci 157762306a36Sopenharmony_ci newsk = tcp_create_openreq_child(sk, req, skb); 157862306a36Sopenharmony_ci if (!newsk) 157962306a36Sopenharmony_ci goto exit_nonewsk; 158062306a36Sopenharmony_ci 158162306a36Sopenharmony_ci newsk->sk_gso_type = SKB_GSO_TCPV4; 158262306a36Sopenharmony_ci inet_sk_rx_dst_set(newsk, skb); 158362306a36Sopenharmony_ci 158462306a36Sopenharmony_ci newtp = tcp_sk(newsk); 158562306a36Sopenharmony_ci newinet = inet_sk(newsk); 158662306a36Sopenharmony_ci ireq = inet_rsk(req); 158762306a36Sopenharmony_ci sk_daddr_set(newsk, ireq->ir_rmt_addr); 158862306a36Sopenharmony_ci sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); 158962306a36Sopenharmony_ci newsk->sk_bound_dev_if = ireq->ir_iif; 159062306a36Sopenharmony_ci newinet->inet_saddr = ireq->ir_loc_addr; 159162306a36Sopenharmony_ci inet_opt = rcu_dereference(ireq->ireq_opt); 159262306a36Sopenharmony_ci RCU_INIT_POINTER(newinet->inet_opt, inet_opt); 159362306a36Sopenharmony_ci newinet->mc_index = inet_iif(skb); 159462306a36Sopenharmony_ci newinet->mc_ttl = ip_hdr(skb)->ttl; 159562306a36Sopenharmony_ci newinet->rcv_tos = ip_hdr(skb)->tos; 159662306a36Sopenharmony_ci inet_csk(newsk)->icsk_ext_hdr_len = 0; 159762306a36Sopenharmony_ci if (inet_opt) 159862306a36Sopenharmony_ci inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 159962306a36Sopenharmony_ci atomic_set(&newinet->inet_id, get_random_u16()); 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_ci /* Set ToS of the new socket based upon the value of incoming SYN. 160262306a36Sopenharmony_ci * ECT bits are set later in tcp_init_transfer(). 160362306a36Sopenharmony_ci */ 160462306a36Sopenharmony_ci if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 160562306a36Sopenharmony_ci newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 160662306a36Sopenharmony_ci 160762306a36Sopenharmony_ci if (!dst) { 160862306a36Sopenharmony_ci dst = inet_csk_route_child_sock(sk, newsk, req); 160962306a36Sopenharmony_ci if (!dst) 161062306a36Sopenharmony_ci goto put_and_exit; 161162306a36Sopenharmony_ci } else { 161262306a36Sopenharmony_ci /* syncookie case : see end of cookie_v4_check() */ 161362306a36Sopenharmony_ci } 161462306a36Sopenharmony_ci sk_setup_caps(newsk, dst); 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci tcp_ca_openreq_child(newsk, dst); 161762306a36Sopenharmony_ci 161862306a36Sopenharmony_ci tcp_sync_mss(newsk, dst_mtu(dst)); 161962306a36Sopenharmony_ci newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 162062306a36Sopenharmony_ci 162162306a36Sopenharmony_ci tcp_initialize_rcv_mss(newsk); 162262306a36Sopenharmony_ci 162362306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 162462306a36Sopenharmony_ci l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 162562306a36Sopenharmony_ci /* Copy over the MD5 key from the original socket */ 162662306a36Sopenharmony_ci addr = (union tcp_md5_addr *)&newinet->inet_daddr; 162762306a36Sopenharmony_ci key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); 162862306a36Sopenharmony_ci if (key) { 162962306a36Sopenharmony_ci if (tcp_md5_key_copy(newsk, addr, AF_INET, 32, l3index, key)) 163062306a36Sopenharmony_ci goto put_and_exit; 163162306a36Sopenharmony_ci sk_gso_disable(newsk); 163262306a36Sopenharmony_ci } 163362306a36Sopenharmony_ci#endif 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_ci if (__inet_inherit_port(sk, newsk) < 0) 163662306a36Sopenharmony_ci goto put_and_exit; 163762306a36Sopenharmony_ci *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 163862306a36Sopenharmony_ci &found_dup_sk); 163962306a36Sopenharmony_ci if (likely(*own_req)) { 164062306a36Sopenharmony_ci tcp_move_syn(newtp, req); 164162306a36Sopenharmony_ci ireq->ireq_opt = NULL; 164262306a36Sopenharmony_ci } else { 164362306a36Sopenharmony_ci newinet->inet_opt = NULL; 164462306a36Sopenharmony_ci 164562306a36Sopenharmony_ci if (!req_unhash && found_dup_sk) { 164662306a36Sopenharmony_ci /* This code path should only be executed in the 164762306a36Sopenharmony_ci * syncookie case only 164862306a36Sopenharmony_ci */ 164962306a36Sopenharmony_ci bh_unlock_sock(newsk); 165062306a36Sopenharmony_ci sock_put(newsk); 165162306a36Sopenharmony_ci newsk = NULL; 165262306a36Sopenharmony_ci } 165362306a36Sopenharmony_ci } 165462306a36Sopenharmony_ci return newsk; 165562306a36Sopenharmony_ci 165662306a36Sopenharmony_ciexit_overflow: 165762306a36Sopenharmony_ci NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 165862306a36Sopenharmony_ciexit_nonewsk: 165962306a36Sopenharmony_ci dst_release(dst); 166062306a36Sopenharmony_ciexit: 166162306a36Sopenharmony_ci tcp_listendrop(sk); 166262306a36Sopenharmony_ci return NULL; 166362306a36Sopenharmony_ciput_and_exit: 166462306a36Sopenharmony_ci newinet->inet_opt = NULL; 166562306a36Sopenharmony_ci inet_csk_prepare_forced_close(newsk); 166662306a36Sopenharmony_ci tcp_done(newsk); 166762306a36Sopenharmony_ci goto exit; 166862306a36Sopenharmony_ci} 166962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_syn_recv_sock); 167062306a36Sopenharmony_ci 167162306a36Sopenharmony_cistatic struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) 167262306a36Sopenharmony_ci{ 167362306a36Sopenharmony_ci#ifdef CONFIG_SYN_COOKIES 167462306a36Sopenharmony_ci const struct tcphdr *th = tcp_hdr(skb); 167562306a36Sopenharmony_ci 167662306a36Sopenharmony_ci if (!th->syn) 167762306a36Sopenharmony_ci sk = cookie_v4_check(sk, skb); 167862306a36Sopenharmony_ci#endif 167962306a36Sopenharmony_ci return sk; 168062306a36Sopenharmony_ci} 168162306a36Sopenharmony_ci 168262306a36Sopenharmony_ciu16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, 168362306a36Sopenharmony_ci struct tcphdr *th, u32 *cookie) 168462306a36Sopenharmony_ci{ 168562306a36Sopenharmony_ci u16 mss = 0; 168662306a36Sopenharmony_ci#ifdef CONFIG_SYN_COOKIES 168762306a36Sopenharmony_ci mss = tcp_get_syncookie_mss(&tcp_request_sock_ops, 168862306a36Sopenharmony_ci &tcp_request_sock_ipv4_ops, sk, th); 168962306a36Sopenharmony_ci if (mss) { 169062306a36Sopenharmony_ci *cookie = __cookie_v4_init_sequence(iph, th, &mss); 169162306a36Sopenharmony_ci tcp_synq_overflow(sk); 169262306a36Sopenharmony_ci } 169362306a36Sopenharmony_ci#endif 169462306a36Sopenharmony_ci return mss; 169562306a36Sopenharmony_ci} 169662306a36Sopenharmony_ci 169762306a36Sopenharmony_ciINDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 169862306a36Sopenharmony_ci u32)); 169962306a36Sopenharmony_ci/* The socket must have it's spinlock held when we get 170062306a36Sopenharmony_ci * here, unless it is a TCP_LISTEN socket. 170162306a36Sopenharmony_ci * 170262306a36Sopenharmony_ci * We have a potential double-lock case here, so even when 170362306a36Sopenharmony_ci * doing backlog processing we use the BH locking scheme. 170462306a36Sopenharmony_ci * This is because we cannot sleep with the original spinlock 170562306a36Sopenharmony_ci * held. 170662306a36Sopenharmony_ci */ 170762306a36Sopenharmony_ciint tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 170862306a36Sopenharmony_ci{ 170962306a36Sopenharmony_ci enum skb_drop_reason reason; 171062306a36Sopenharmony_ci struct sock *rsk; 171162306a36Sopenharmony_ci 171262306a36Sopenharmony_ci if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 171362306a36Sopenharmony_ci struct dst_entry *dst; 171462306a36Sopenharmony_ci 171562306a36Sopenharmony_ci dst = rcu_dereference_protected(sk->sk_rx_dst, 171662306a36Sopenharmony_ci lockdep_sock_is_held(sk)); 171762306a36Sopenharmony_ci 171862306a36Sopenharmony_ci sock_rps_save_rxhash(sk, skb); 171962306a36Sopenharmony_ci sk_mark_napi_id(sk, skb); 172062306a36Sopenharmony_ci if (dst) { 172162306a36Sopenharmony_ci if (sk->sk_rx_dst_ifindex != skb->skb_iif || 172262306a36Sopenharmony_ci !INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check, 172362306a36Sopenharmony_ci dst, 0)) { 172462306a36Sopenharmony_ci RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 172562306a36Sopenharmony_ci dst_release(dst); 172662306a36Sopenharmony_ci } 172762306a36Sopenharmony_ci } 172862306a36Sopenharmony_ci tcp_rcv_established(sk, skb); 172962306a36Sopenharmony_ci return 0; 173062306a36Sopenharmony_ci } 173162306a36Sopenharmony_ci 173262306a36Sopenharmony_ci reason = SKB_DROP_REASON_NOT_SPECIFIED; 173362306a36Sopenharmony_ci if (tcp_checksum_complete(skb)) 173462306a36Sopenharmony_ci goto csum_err; 173562306a36Sopenharmony_ci 173662306a36Sopenharmony_ci if (sk->sk_state == TCP_LISTEN) { 173762306a36Sopenharmony_ci struct sock *nsk = tcp_v4_cookie_check(sk, skb); 173862306a36Sopenharmony_ci 173962306a36Sopenharmony_ci if (!nsk) 174062306a36Sopenharmony_ci goto discard; 174162306a36Sopenharmony_ci if (nsk != sk) { 174262306a36Sopenharmony_ci if (tcp_child_process(sk, nsk, skb)) { 174362306a36Sopenharmony_ci rsk = nsk; 174462306a36Sopenharmony_ci goto reset; 174562306a36Sopenharmony_ci } 174662306a36Sopenharmony_ci return 0; 174762306a36Sopenharmony_ci } 174862306a36Sopenharmony_ci } else 174962306a36Sopenharmony_ci sock_rps_save_rxhash(sk, skb); 175062306a36Sopenharmony_ci 175162306a36Sopenharmony_ci if (tcp_rcv_state_process(sk, skb)) { 175262306a36Sopenharmony_ci rsk = sk; 175362306a36Sopenharmony_ci goto reset; 175462306a36Sopenharmony_ci } 175562306a36Sopenharmony_ci return 0; 175662306a36Sopenharmony_ci 175762306a36Sopenharmony_cireset: 175862306a36Sopenharmony_ci tcp_v4_send_reset(rsk, skb); 175962306a36Sopenharmony_cidiscard: 176062306a36Sopenharmony_ci kfree_skb_reason(skb, reason); 176162306a36Sopenharmony_ci /* Be careful here. If this function gets more complicated and 176262306a36Sopenharmony_ci * gcc suffers from register pressure on the x86, sk (in %ebx) 176362306a36Sopenharmony_ci * might be destroyed here. This current version compiles correctly, 176462306a36Sopenharmony_ci * but you have been warned. 176562306a36Sopenharmony_ci */ 176662306a36Sopenharmony_ci return 0; 176762306a36Sopenharmony_ci 176862306a36Sopenharmony_cicsum_err: 176962306a36Sopenharmony_ci reason = SKB_DROP_REASON_TCP_CSUM; 177062306a36Sopenharmony_ci trace_tcp_bad_csum(skb); 177162306a36Sopenharmony_ci TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 177262306a36Sopenharmony_ci TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 177362306a36Sopenharmony_ci goto discard; 177462306a36Sopenharmony_ci} 177562306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_do_rcv); 177662306a36Sopenharmony_ci 177762306a36Sopenharmony_ciint tcp_v4_early_demux(struct sk_buff *skb) 177862306a36Sopenharmony_ci{ 177962306a36Sopenharmony_ci struct net *net = dev_net(skb->dev); 178062306a36Sopenharmony_ci const struct iphdr *iph; 178162306a36Sopenharmony_ci const struct tcphdr *th; 178262306a36Sopenharmony_ci struct sock *sk; 178362306a36Sopenharmony_ci 178462306a36Sopenharmony_ci if (skb->pkt_type != PACKET_HOST) 178562306a36Sopenharmony_ci return 0; 178662306a36Sopenharmony_ci 178762306a36Sopenharmony_ci if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 178862306a36Sopenharmony_ci return 0; 178962306a36Sopenharmony_ci 179062306a36Sopenharmony_ci iph = ip_hdr(skb); 179162306a36Sopenharmony_ci th = tcp_hdr(skb); 179262306a36Sopenharmony_ci 179362306a36Sopenharmony_ci if (th->doff < sizeof(struct tcphdr) / 4) 179462306a36Sopenharmony_ci return 0; 179562306a36Sopenharmony_ci 179662306a36Sopenharmony_ci sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, 179762306a36Sopenharmony_ci iph->saddr, th->source, 179862306a36Sopenharmony_ci iph->daddr, ntohs(th->dest), 179962306a36Sopenharmony_ci skb->skb_iif, inet_sdif(skb)); 180062306a36Sopenharmony_ci if (sk) { 180162306a36Sopenharmony_ci skb->sk = sk; 180262306a36Sopenharmony_ci skb->destructor = sock_edemux; 180362306a36Sopenharmony_ci if (sk_fullsock(sk)) { 180462306a36Sopenharmony_ci struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 180562306a36Sopenharmony_ci 180662306a36Sopenharmony_ci if (dst) 180762306a36Sopenharmony_ci dst = dst_check(dst, 0); 180862306a36Sopenharmony_ci if (dst && 180962306a36Sopenharmony_ci sk->sk_rx_dst_ifindex == skb->skb_iif) 181062306a36Sopenharmony_ci skb_dst_set_noref(skb, dst); 181162306a36Sopenharmony_ci } 181262306a36Sopenharmony_ci } 181362306a36Sopenharmony_ci return 0; 181462306a36Sopenharmony_ci} 181562306a36Sopenharmony_ci 181662306a36Sopenharmony_cibool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, 181762306a36Sopenharmony_ci enum skb_drop_reason *reason) 181862306a36Sopenharmony_ci{ 181962306a36Sopenharmony_ci u32 limit, tail_gso_size, tail_gso_segs; 182062306a36Sopenharmony_ci struct skb_shared_info *shinfo; 182162306a36Sopenharmony_ci const struct tcphdr *th; 182262306a36Sopenharmony_ci struct tcphdr *thtail; 182362306a36Sopenharmony_ci struct sk_buff *tail; 182462306a36Sopenharmony_ci unsigned int hdrlen; 182562306a36Sopenharmony_ci bool fragstolen; 182662306a36Sopenharmony_ci u32 gso_segs; 182762306a36Sopenharmony_ci u32 gso_size; 182862306a36Sopenharmony_ci int delta; 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_ci /* In case all data was pulled from skb frags (in __pskb_pull_tail()), 183162306a36Sopenharmony_ci * we can fix skb->truesize to its real value to avoid future drops. 183262306a36Sopenharmony_ci * This is valid because skb is not yet charged to the socket. 183362306a36Sopenharmony_ci * It has been noticed pure SACK packets were sometimes dropped 183462306a36Sopenharmony_ci * (if cooked by drivers without copybreak feature). 183562306a36Sopenharmony_ci */ 183662306a36Sopenharmony_ci skb_condense(skb); 183762306a36Sopenharmony_ci 183862306a36Sopenharmony_ci skb_dst_drop(skb); 183962306a36Sopenharmony_ci 184062306a36Sopenharmony_ci if (unlikely(tcp_checksum_complete(skb))) { 184162306a36Sopenharmony_ci bh_unlock_sock(sk); 184262306a36Sopenharmony_ci trace_tcp_bad_csum(skb); 184362306a36Sopenharmony_ci *reason = SKB_DROP_REASON_TCP_CSUM; 184462306a36Sopenharmony_ci __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 184562306a36Sopenharmony_ci __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 184662306a36Sopenharmony_ci return true; 184762306a36Sopenharmony_ci } 184862306a36Sopenharmony_ci 184962306a36Sopenharmony_ci /* Attempt coalescing to last skb in backlog, even if we are 185062306a36Sopenharmony_ci * above the limits. 185162306a36Sopenharmony_ci * This is okay because skb capacity is limited to MAX_SKB_FRAGS. 185262306a36Sopenharmony_ci */ 185362306a36Sopenharmony_ci th = (const struct tcphdr *)skb->data; 185462306a36Sopenharmony_ci hdrlen = th->doff * 4; 185562306a36Sopenharmony_ci 185662306a36Sopenharmony_ci tail = sk->sk_backlog.tail; 185762306a36Sopenharmony_ci if (!tail) 185862306a36Sopenharmony_ci goto no_coalesce; 185962306a36Sopenharmony_ci thtail = (struct tcphdr *)tail->data; 186062306a36Sopenharmony_ci 186162306a36Sopenharmony_ci if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq || 186262306a36Sopenharmony_ci TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield || 186362306a36Sopenharmony_ci ((TCP_SKB_CB(tail)->tcp_flags | 186462306a36Sopenharmony_ci TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) || 186562306a36Sopenharmony_ci !((TCP_SKB_CB(tail)->tcp_flags & 186662306a36Sopenharmony_ci TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || 186762306a36Sopenharmony_ci ((TCP_SKB_CB(tail)->tcp_flags ^ 186862306a36Sopenharmony_ci TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || 186962306a36Sopenharmony_ci#ifdef CONFIG_TLS_DEVICE 187062306a36Sopenharmony_ci tail->decrypted != skb->decrypted || 187162306a36Sopenharmony_ci#endif 187262306a36Sopenharmony_ci !mptcp_skb_can_collapse(tail, skb) || 187362306a36Sopenharmony_ci thtail->doff != th->doff || 187462306a36Sopenharmony_ci memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th))) 187562306a36Sopenharmony_ci goto no_coalesce; 187662306a36Sopenharmony_ci 187762306a36Sopenharmony_ci __skb_pull(skb, hdrlen); 187862306a36Sopenharmony_ci 187962306a36Sopenharmony_ci shinfo = skb_shinfo(skb); 188062306a36Sopenharmony_ci gso_size = shinfo->gso_size ?: skb->len; 188162306a36Sopenharmony_ci gso_segs = shinfo->gso_segs ?: 1; 188262306a36Sopenharmony_ci 188362306a36Sopenharmony_ci shinfo = skb_shinfo(tail); 188462306a36Sopenharmony_ci tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen); 188562306a36Sopenharmony_ci tail_gso_segs = shinfo->gso_segs ?: 1; 188662306a36Sopenharmony_ci 188762306a36Sopenharmony_ci if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) { 188862306a36Sopenharmony_ci TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq; 188962306a36Sopenharmony_ci 189062306a36Sopenharmony_ci if (likely(!before(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))) { 189162306a36Sopenharmony_ci TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq; 189262306a36Sopenharmony_ci thtail->window = th->window; 189362306a36Sopenharmony_ci } 189462306a36Sopenharmony_ci 189562306a36Sopenharmony_ci /* We have to update both TCP_SKB_CB(tail)->tcp_flags and 189662306a36Sopenharmony_ci * thtail->fin, so that the fast path in tcp_rcv_established() 189762306a36Sopenharmony_ci * is not entered if we append a packet with a FIN. 189862306a36Sopenharmony_ci * SYN, RST, URG are not present. 189962306a36Sopenharmony_ci * ACK is set on both packets. 190062306a36Sopenharmony_ci * PSH : we do not really care in TCP stack, 190162306a36Sopenharmony_ci * at least for 'GRO' packets. 190262306a36Sopenharmony_ci */ 190362306a36Sopenharmony_ci thtail->fin |= th->fin; 190462306a36Sopenharmony_ci TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; 190562306a36Sopenharmony_ci 190662306a36Sopenharmony_ci if (TCP_SKB_CB(skb)->has_rxtstamp) { 190762306a36Sopenharmony_ci TCP_SKB_CB(tail)->has_rxtstamp = true; 190862306a36Sopenharmony_ci tail->tstamp = skb->tstamp; 190962306a36Sopenharmony_ci skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp; 191062306a36Sopenharmony_ci } 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci /* Not as strict as GRO. We only need to carry mss max value */ 191362306a36Sopenharmony_ci shinfo->gso_size = max(gso_size, tail_gso_size); 191462306a36Sopenharmony_ci shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF); 191562306a36Sopenharmony_ci 191662306a36Sopenharmony_ci sk->sk_backlog.len += delta; 191762306a36Sopenharmony_ci __NET_INC_STATS(sock_net(sk), 191862306a36Sopenharmony_ci LINUX_MIB_TCPBACKLOGCOALESCE); 191962306a36Sopenharmony_ci kfree_skb_partial(skb, fragstolen); 192062306a36Sopenharmony_ci return false; 192162306a36Sopenharmony_ci } 192262306a36Sopenharmony_ci __skb_push(skb, hdrlen); 192362306a36Sopenharmony_ci 192462306a36Sopenharmony_cino_coalesce: 192562306a36Sopenharmony_ci limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1); 192662306a36Sopenharmony_ci 192762306a36Sopenharmony_ci /* Only socket owner can try to collapse/prune rx queues 192862306a36Sopenharmony_ci * to reduce memory overhead, so add a little headroom here. 192962306a36Sopenharmony_ci * Few sockets backlog are possibly concurrently non empty. 193062306a36Sopenharmony_ci */ 193162306a36Sopenharmony_ci limit += 64 * 1024; 193262306a36Sopenharmony_ci 193362306a36Sopenharmony_ci if (unlikely(sk_add_backlog(sk, skb, limit))) { 193462306a36Sopenharmony_ci bh_unlock_sock(sk); 193562306a36Sopenharmony_ci *reason = SKB_DROP_REASON_SOCKET_BACKLOG; 193662306a36Sopenharmony_ci __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); 193762306a36Sopenharmony_ci return true; 193862306a36Sopenharmony_ci } 193962306a36Sopenharmony_ci return false; 194062306a36Sopenharmony_ci} 194162306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_add_backlog); 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_ciint tcp_filter(struct sock *sk, struct sk_buff *skb) 194462306a36Sopenharmony_ci{ 194562306a36Sopenharmony_ci struct tcphdr *th = (struct tcphdr *)skb->data; 194662306a36Sopenharmony_ci 194762306a36Sopenharmony_ci return sk_filter_trim_cap(sk, skb, th->doff * 4); 194862306a36Sopenharmony_ci} 194962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_filter); 195062306a36Sopenharmony_ci 195162306a36Sopenharmony_cistatic void tcp_v4_restore_cb(struct sk_buff *skb) 195262306a36Sopenharmony_ci{ 195362306a36Sopenharmony_ci memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4, 195462306a36Sopenharmony_ci sizeof(struct inet_skb_parm)); 195562306a36Sopenharmony_ci} 195662306a36Sopenharmony_ci 195762306a36Sopenharmony_cistatic void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, 195862306a36Sopenharmony_ci const struct tcphdr *th) 195962306a36Sopenharmony_ci{ 196062306a36Sopenharmony_ci /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() 196162306a36Sopenharmony_ci * barrier() makes sure compiler wont play fool^Waliasing games. 196262306a36Sopenharmony_ci */ 196362306a36Sopenharmony_ci memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), 196462306a36Sopenharmony_ci sizeof(struct inet_skb_parm)); 196562306a36Sopenharmony_ci barrier(); 196662306a36Sopenharmony_ci 196762306a36Sopenharmony_ci TCP_SKB_CB(skb)->seq = ntohl(th->seq); 196862306a36Sopenharmony_ci TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 196962306a36Sopenharmony_ci skb->len - th->doff * 4); 197062306a36Sopenharmony_ci TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 197162306a36Sopenharmony_ci TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 197262306a36Sopenharmony_ci TCP_SKB_CB(skb)->tcp_tw_isn = 0; 197362306a36Sopenharmony_ci TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 197462306a36Sopenharmony_ci TCP_SKB_CB(skb)->sacked = 0; 197562306a36Sopenharmony_ci TCP_SKB_CB(skb)->has_rxtstamp = 197662306a36Sopenharmony_ci skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 197762306a36Sopenharmony_ci} 197862306a36Sopenharmony_ci 197962306a36Sopenharmony_ci/* 198062306a36Sopenharmony_ci * From tcp_input.c 198162306a36Sopenharmony_ci */ 198262306a36Sopenharmony_ci 198362306a36Sopenharmony_ciint tcp_v4_rcv(struct sk_buff *skb) 198462306a36Sopenharmony_ci{ 198562306a36Sopenharmony_ci struct net *net = dev_net(skb->dev); 198662306a36Sopenharmony_ci enum skb_drop_reason drop_reason; 198762306a36Sopenharmony_ci int sdif = inet_sdif(skb); 198862306a36Sopenharmony_ci int dif = inet_iif(skb); 198962306a36Sopenharmony_ci const struct iphdr *iph; 199062306a36Sopenharmony_ci const struct tcphdr *th; 199162306a36Sopenharmony_ci bool refcounted; 199262306a36Sopenharmony_ci struct sock *sk; 199362306a36Sopenharmony_ci int ret; 199462306a36Sopenharmony_ci 199562306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 199662306a36Sopenharmony_ci if (skb->pkt_type != PACKET_HOST) 199762306a36Sopenharmony_ci goto discard_it; 199862306a36Sopenharmony_ci 199962306a36Sopenharmony_ci /* Count it even if it's bad */ 200062306a36Sopenharmony_ci __TCP_INC_STATS(net, TCP_MIB_INSEGS); 200162306a36Sopenharmony_ci 200262306a36Sopenharmony_ci if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 200362306a36Sopenharmony_ci goto discard_it; 200462306a36Sopenharmony_ci 200562306a36Sopenharmony_ci th = (const struct tcphdr *)skb->data; 200662306a36Sopenharmony_ci 200762306a36Sopenharmony_ci if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 200862306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 200962306a36Sopenharmony_ci goto bad_packet; 201062306a36Sopenharmony_ci } 201162306a36Sopenharmony_ci if (!pskb_may_pull(skb, th->doff * 4)) 201262306a36Sopenharmony_ci goto discard_it; 201362306a36Sopenharmony_ci 201462306a36Sopenharmony_ci /* An explanation is required here, I think. 201562306a36Sopenharmony_ci * Packet length and doff are validated by header prediction, 201662306a36Sopenharmony_ci * provided case of th->doff==0 is eliminated. 201762306a36Sopenharmony_ci * So, we defer the checks. */ 201862306a36Sopenharmony_ci 201962306a36Sopenharmony_ci if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) 202062306a36Sopenharmony_ci goto csum_error; 202162306a36Sopenharmony_ci 202262306a36Sopenharmony_ci th = (const struct tcphdr *)skb->data; 202362306a36Sopenharmony_ci iph = ip_hdr(skb); 202462306a36Sopenharmony_cilookup: 202562306a36Sopenharmony_ci sk = __inet_lookup_skb(net->ipv4.tcp_death_row.hashinfo, 202662306a36Sopenharmony_ci skb, __tcp_hdrlen(th), th->source, 202762306a36Sopenharmony_ci th->dest, sdif, &refcounted); 202862306a36Sopenharmony_ci if (!sk) 202962306a36Sopenharmony_ci goto no_tcp_socket; 203062306a36Sopenharmony_ci 203162306a36Sopenharmony_ciprocess: 203262306a36Sopenharmony_ci if (sk->sk_state == TCP_TIME_WAIT) 203362306a36Sopenharmony_ci goto do_time_wait; 203462306a36Sopenharmony_ci 203562306a36Sopenharmony_ci if (sk->sk_state == TCP_NEW_SYN_RECV) { 203662306a36Sopenharmony_ci struct request_sock *req = inet_reqsk(sk); 203762306a36Sopenharmony_ci bool req_stolen = false; 203862306a36Sopenharmony_ci struct sock *nsk; 203962306a36Sopenharmony_ci 204062306a36Sopenharmony_ci sk = req->rsk_listener; 204162306a36Sopenharmony_ci if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 204262306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_XFRM_POLICY; 204362306a36Sopenharmony_ci else 204462306a36Sopenharmony_ci drop_reason = tcp_inbound_md5_hash(sk, skb, 204562306a36Sopenharmony_ci &iph->saddr, &iph->daddr, 204662306a36Sopenharmony_ci AF_INET, dif, sdif); 204762306a36Sopenharmony_ci if (unlikely(drop_reason)) { 204862306a36Sopenharmony_ci sk_drops_add(sk, skb); 204962306a36Sopenharmony_ci reqsk_put(req); 205062306a36Sopenharmony_ci goto discard_it; 205162306a36Sopenharmony_ci } 205262306a36Sopenharmony_ci if (tcp_checksum_complete(skb)) { 205362306a36Sopenharmony_ci reqsk_put(req); 205462306a36Sopenharmony_ci goto csum_error; 205562306a36Sopenharmony_ci } 205662306a36Sopenharmony_ci if (unlikely(sk->sk_state != TCP_LISTEN)) { 205762306a36Sopenharmony_ci nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 205862306a36Sopenharmony_ci if (!nsk) { 205962306a36Sopenharmony_ci inet_csk_reqsk_queue_drop_and_put(sk, req); 206062306a36Sopenharmony_ci goto lookup; 206162306a36Sopenharmony_ci } 206262306a36Sopenharmony_ci sk = nsk; 206362306a36Sopenharmony_ci /* reuseport_migrate_sock() has already held one sk_refcnt 206462306a36Sopenharmony_ci * before returning. 206562306a36Sopenharmony_ci */ 206662306a36Sopenharmony_ci } else { 206762306a36Sopenharmony_ci /* We own a reference on the listener, increase it again 206862306a36Sopenharmony_ci * as we might lose it too soon. 206962306a36Sopenharmony_ci */ 207062306a36Sopenharmony_ci sock_hold(sk); 207162306a36Sopenharmony_ci } 207262306a36Sopenharmony_ci refcounted = true; 207362306a36Sopenharmony_ci nsk = NULL; 207462306a36Sopenharmony_ci if (!tcp_filter(sk, skb)) { 207562306a36Sopenharmony_ci th = (const struct tcphdr *)skb->data; 207662306a36Sopenharmony_ci iph = ip_hdr(skb); 207762306a36Sopenharmony_ci tcp_v4_fill_cb(skb, iph, th); 207862306a36Sopenharmony_ci nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 207962306a36Sopenharmony_ci } else { 208062306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 208162306a36Sopenharmony_ci } 208262306a36Sopenharmony_ci if (!nsk) { 208362306a36Sopenharmony_ci reqsk_put(req); 208462306a36Sopenharmony_ci if (req_stolen) { 208562306a36Sopenharmony_ci /* Another cpu got exclusive access to req 208662306a36Sopenharmony_ci * and created a full blown socket. 208762306a36Sopenharmony_ci * Try to feed this packet to this socket 208862306a36Sopenharmony_ci * instead of discarding it. 208962306a36Sopenharmony_ci */ 209062306a36Sopenharmony_ci tcp_v4_restore_cb(skb); 209162306a36Sopenharmony_ci sock_put(sk); 209262306a36Sopenharmony_ci goto lookup; 209362306a36Sopenharmony_ci } 209462306a36Sopenharmony_ci goto discard_and_relse; 209562306a36Sopenharmony_ci } 209662306a36Sopenharmony_ci nf_reset_ct(skb); 209762306a36Sopenharmony_ci if (nsk == sk) { 209862306a36Sopenharmony_ci reqsk_put(req); 209962306a36Sopenharmony_ci tcp_v4_restore_cb(skb); 210062306a36Sopenharmony_ci } else if (tcp_child_process(sk, nsk, skb)) { 210162306a36Sopenharmony_ci tcp_v4_send_reset(nsk, skb); 210262306a36Sopenharmony_ci goto discard_and_relse; 210362306a36Sopenharmony_ci } else { 210462306a36Sopenharmony_ci sock_put(sk); 210562306a36Sopenharmony_ci return 0; 210662306a36Sopenharmony_ci } 210762306a36Sopenharmony_ci } 210862306a36Sopenharmony_ci 210962306a36Sopenharmony_ci if (static_branch_unlikely(&ip4_min_ttl)) { 211062306a36Sopenharmony_ci /* min_ttl can be changed concurrently from do_ip_setsockopt() */ 211162306a36Sopenharmony_ci if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) { 211262306a36Sopenharmony_ci __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 211362306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_TCP_MINTTL; 211462306a36Sopenharmony_ci goto discard_and_relse; 211562306a36Sopenharmony_ci } 211662306a36Sopenharmony_ci } 211762306a36Sopenharmony_ci 211862306a36Sopenharmony_ci if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { 211962306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_XFRM_POLICY; 212062306a36Sopenharmony_ci goto discard_and_relse; 212162306a36Sopenharmony_ci } 212262306a36Sopenharmony_ci 212362306a36Sopenharmony_ci drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr, 212462306a36Sopenharmony_ci &iph->daddr, AF_INET, dif, sdif); 212562306a36Sopenharmony_ci if (drop_reason) 212662306a36Sopenharmony_ci goto discard_and_relse; 212762306a36Sopenharmony_ci 212862306a36Sopenharmony_ci nf_reset_ct(skb); 212962306a36Sopenharmony_ci 213062306a36Sopenharmony_ci if (tcp_filter(sk, skb)) { 213162306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 213262306a36Sopenharmony_ci goto discard_and_relse; 213362306a36Sopenharmony_ci } 213462306a36Sopenharmony_ci th = (const struct tcphdr *)skb->data; 213562306a36Sopenharmony_ci iph = ip_hdr(skb); 213662306a36Sopenharmony_ci tcp_v4_fill_cb(skb, iph, th); 213762306a36Sopenharmony_ci 213862306a36Sopenharmony_ci skb->dev = NULL; 213962306a36Sopenharmony_ci 214062306a36Sopenharmony_ci if (sk->sk_state == TCP_LISTEN) { 214162306a36Sopenharmony_ci ret = tcp_v4_do_rcv(sk, skb); 214262306a36Sopenharmony_ci goto put_and_return; 214362306a36Sopenharmony_ci } 214462306a36Sopenharmony_ci 214562306a36Sopenharmony_ci sk_incoming_cpu_update(sk); 214662306a36Sopenharmony_ci 214762306a36Sopenharmony_ci bh_lock_sock_nested(sk); 214862306a36Sopenharmony_ci tcp_segs_in(tcp_sk(sk), skb); 214962306a36Sopenharmony_ci ret = 0; 215062306a36Sopenharmony_ci if (!sock_owned_by_user(sk)) { 215162306a36Sopenharmony_ci ret = tcp_v4_do_rcv(sk, skb); 215262306a36Sopenharmony_ci } else { 215362306a36Sopenharmony_ci if (tcp_add_backlog(sk, skb, &drop_reason)) 215462306a36Sopenharmony_ci goto discard_and_relse; 215562306a36Sopenharmony_ci } 215662306a36Sopenharmony_ci bh_unlock_sock(sk); 215762306a36Sopenharmony_ci 215862306a36Sopenharmony_ciput_and_return: 215962306a36Sopenharmony_ci if (refcounted) 216062306a36Sopenharmony_ci sock_put(sk); 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_ci return ret; 216362306a36Sopenharmony_ci 216462306a36Sopenharmony_cino_tcp_socket: 216562306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_NO_SOCKET; 216662306a36Sopenharmony_ci if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 216762306a36Sopenharmony_ci goto discard_it; 216862306a36Sopenharmony_ci 216962306a36Sopenharmony_ci tcp_v4_fill_cb(skb, iph, th); 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ci if (tcp_checksum_complete(skb)) { 217262306a36Sopenharmony_cicsum_error: 217362306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_TCP_CSUM; 217462306a36Sopenharmony_ci trace_tcp_bad_csum(skb); 217562306a36Sopenharmony_ci __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 217662306a36Sopenharmony_cibad_packet: 217762306a36Sopenharmony_ci __TCP_INC_STATS(net, TCP_MIB_INERRS); 217862306a36Sopenharmony_ci } else { 217962306a36Sopenharmony_ci tcp_v4_send_reset(NULL, skb); 218062306a36Sopenharmony_ci } 218162306a36Sopenharmony_ci 218262306a36Sopenharmony_cidiscard_it: 218362306a36Sopenharmony_ci SKB_DR_OR(drop_reason, NOT_SPECIFIED); 218462306a36Sopenharmony_ci /* Discard frame. */ 218562306a36Sopenharmony_ci kfree_skb_reason(skb, drop_reason); 218662306a36Sopenharmony_ci return 0; 218762306a36Sopenharmony_ci 218862306a36Sopenharmony_cidiscard_and_relse: 218962306a36Sopenharmony_ci sk_drops_add(sk, skb); 219062306a36Sopenharmony_ci if (refcounted) 219162306a36Sopenharmony_ci sock_put(sk); 219262306a36Sopenharmony_ci goto discard_it; 219362306a36Sopenharmony_ci 219462306a36Sopenharmony_cido_time_wait: 219562306a36Sopenharmony_ci if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 219662306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_XFRM_POLICY; 219762306a36Sopenharmony_ci inet_twsk_put(inet_twsk(sk)); 219862306a36Sopenharmony_ci goto discard_it; 219962306a36Sopenharmony_ci } 220062306a36Sopenharmony_ci 220162306a36Sopenharmony_ci tcp_v4_fill_cb(skb, iph, th); 220262306a36Sopenharmony_ci 220362306a36Sopenharmony_ci if (tcp_checksum_complete(skb)) { 220462306a36Sopenharmony_ci inet_twsk_put(inet_twsk(sk)); 220562306a36Sopenharmony_ci goto csum_error; 220662306a36Sopenharmony_ci } 220762306a36Sopenharmony_ci switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 220862306a36Sopenharmony_ci case TCP_TW_SYN: { 220962306a36Sopenharmony_ci struct sock *sk2 = inet_lookup_listener(net, 221062306a36Sopenharmony_ci net->ipv4.tcp_death_row.hashinfo, 221162306a36Sopenharmony_ci skb, __tcp_hdrlen(th), 221262306a36Sopenharmony_ci iph->saddr, th->source, 221362306a36Sopenharmony_ci iph->daddr, th->dest, 221462306a36Sopenharmony_ci inet_iif(skb), 221562306a36Sopenharmony_ci sdif); 221662306a36Sopenharmony_ci if (sk2) { 221762306a36Sopenharmony_ci inet_twsk_deschedule_put(inet_twsk(sk)); 221862306a36Sopenharmony_ci sk = sk2; 221962306a36Sopenharmony_ci tcp_v4_restore_cb(skb); 222062306a36Sopenharmony_ci refcounted = false; 222162306a36Sopenharmony_ci goto process; 222262306a36Sopenharmony_ci } 222362306a36Sopenharmony_ci } 222462306a36Sopenharmony_ci /* to ACK */ 222562306a36Sopenharmony_ci fallthrough; 222662306a36Sopenharmony_ci case TCP_TW_ACK: 222762306a36Sopenharmony_ci tcp_v4_timewait_ack(sk, skb); 222862306a36Sopenharmony_ci break; 222962306a36Sopenharmony_ci case TCP_TW_RST: 223062306a36Sopenharmony_ci tcp_v4_send_reset(sk, skb); 223162306a36Sopenharmony_ci inet_twsk_deschedule_put(inet_twsk(sk)); 223262306a36Sopenharmony_ci goto discard_it; 223362306a36Sopenharmony_ci case TCP_TW_SUCCESS:; 223462306a36Sopenharmony_ci } 223562306a36Sopenharmony_ci goto discard_it; 223662306a36Sopenharmony_ci} 223762306a36Sopenharmony_ci 223862306a36Sopenharmony_cistatic struct timewait_sock_ops tcp_timewait_sock_ops = { 223962306a36Sopenharmony_ci .twsk_obj_size = sizeof(struct tcp_timewait_sock), 224062306a36Sopenharmony_ci .twsk_unique = tcp_twsk_unique, 224162306a36Sopenharmony_ci .twsk_destructor= tcp_twsk_destructor, 224262306a36Sopenharmony_ci}; 224362306a36Sopenharmony_ci 224462306a36Sopenharmony_civoid inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 224562306a36Sopenharmony_ci{ 224662306a36Sopenharmony_ci struct dst_entry *dst = skb_dst(skb); 224762306a36Sopenharmony_ci 224862306a36Sopenharmony_ci if (dst && dst_hold_safe(dst)) { 224962306a36Sopenharmony_ci rcu_assign_pointer(sk->sk_rx_dst, dst); 225062306a36Sopenharmony_ci sk->sk_rx_dst_ifindex = skb->skb_iif; 225162306a36Sopenharmony_ci } 225262306a36Sopenharmony_ci} 225362306a36Sopenharmony_ciEXPORT_SYMBOL(inet_sk_rx_dst_set); 225462306a36Sopenharmony_ci 225562306a36Sopenharmony_ciconst struct inet_connection_sock_af_ops ipv4_specific = { 225662306a36Sopenharmony_ci .queue_xmit = ip_queue_xmit, 225762306a36Sopenharmony_ci .send_check = tcp_v4_send_check, 225862306a36Sopenharmony_ci .rebuild_header = inet_sk_rebuild_header, 225962306a36Sopenharmony_ci .sk_rx_dst_set = inet_sk_rx_dst_set, 226062306a36Sopenharmony_ci .conn_request = tcp_v4_conn_request, 226162306a36Sopenharmony_ci .syn_recv_sock = tcp_v4_syn_recv_sock, 226262306a36Sopenharmony_ci .net_header_len = sizeof(struct iphdr), 226362306a36Sopenharmony_ci .setsockopt = ip_setsockopt, 226462306a36Sopenharmony_ci .getsockopt = ip_getsockopt, 226562306a36Sopenharmony_ci .addr2sockaddr = inet_csk_addr2sockaddr, 226662306a36Sopenharmony_ci .sockaddr_len = sizeof(struct sockaddr_in), 226762306a36Sopenharmony_ci .mtu_reduced = tcp_v4_mtu_reduced, 226862306a36Sopenharmony_ci}; 226962306a36Sopenharmony_ciEXPORT_SYMBOL(ipv4_specific); 227062306a36Sopenharmony_ci 227162306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 227262306a36Sopenharmony_cistatic const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 227362306a36Sopenharmony_ci .md5_lookup = tcp_v4_md5_lookup, 227462306a36Sopenharmony_ci .calc_md5_hash = tcp_v4_md5_hash_skb, 227562306a36Sopenharmony_ci .md5_parse = tcp_v4_parse_md5_keys, 227662306a36Sopenharmony_ci}; 227762306a36Sopenharmony_ci#endif 227862306a36Sopenharmony_ci 227962306a36Sopenharmony_ci/* NOTE: A lot of things set to zero explicitly by call to 228062306a36Sopenharmony_ci * sk_alloc() so need not be done here. 228162306a36Sopenharmony_ci */ 228262306a36Sopenharmony_cistatic int tcp_v4_init_sock(struct sock *sk) 228362306a36Sopenharmony_ci{ 228462306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 228562306a36Sopenharmony_ci 228662306a36Sopenharmony_ci tcp_init_sock(sk); 228762306a36Sopenharmony_ci 228862306a36Sopenharmony_ci icsk->icsk_af_ops = &ipv4_specific; 228962306a36Sopenharmony_ci 229062306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 229162306a36Sopenharmony_ci tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; 229262306a36Sopenharmony_ci#endif 229362306a36Sopenharmony_ci 229462306a36Sopenharmony_ci return 0; 229562306a36Sopenharmony_ci} 229662306a36Sopenharmony_ci 229762306a36Sopenharmony_civoid tcp_v4_destroy_sock(struct sock *sk) 229862306a36Sopenharmony_ci{ 229962306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 230062306a36Sopenharmony_ci 230162306a36Sopenharmony_ci trace_tcp_destroy_sock(sk); 230262306a36Sopenharmony_ci 230362306a36Sopenharmony_ci tcp_clear_xmit_timers(sk); 230462306a36Sopenharmony_ci 230562306a36Sopenharmony_ci tcp_cleanup_congestion_control(sk); 230662306a36Sopenharmony_ci 230762306a36Sopenharmony_ci tcp_cleanup_ulp(sk); 230862306a36Sopenharmony_ci 230962306a36Sopenharmony_ci /* Cleanup up the write buffer. */ 231062306a36Sopenharmony_ci tcp_write_queue_purge(sk); 231162306a36Sopenharmony_ci 231262306a36Sopenharmony_ci /* Check if we want to disable active TFO */ 231362306a36Sopenharmony_ci tcp_fastopen_active_disable_ofo_check(sk); 231462306a36Sopenharmony_ci 231562306a36Sopenharmony_ci /* Cleans up our, hopefully empty, out_of_order_queue. */ 231662306a36Sopenharmony_ci skb_rbtree_purge(&tp->out_of_order_queue); 231762306a36Sopenharmony_ci 231862306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG 231962306a36Sopenharmony_ci /* Clean up the MD5 key list, if any */ 232062306a36Sopenharmony_ci if (tp->md5sig_info) { 232162306a36Sopenharmony_ci tcp_clear_md5_list(sk); 232262306a36Sopenharmony_ci kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu); 232362306a36Sopenharmony_ci tp->md5sig_info = NULL; 232462306a36Sopenharmony_ci static_branch_slow_dec_deferred(&tcp_md5_needed); 232562306a36Sopenharmony_ci } 232662306a36Sopenharmony_ci#endif 232762306a36Sopenharmony_ci 232862306a36Sopenharmony_ci /* Clean up a referenced TCP bind bucket. */ 232962306a36Sopenharmony_ci if (inet_csk(sk)->icsk_bind_hash) 233062306a36Sopenharmony_ci inet_put_port(sk); 233162306a36Sopenharmony_ci 233262306a36Sopenharmony_ci BUG_ON(rcu_access_pointer(tp->fastopen_rsk)); 233362306a36Sopenharmony_ci 233462306a36Sopenharmony_ci /* If socket is aborted during connect operation */ 233562306a36Sopenharmony_ci tcp_free_fastopen_req(tp); 233662306a36Sopenharmony_ci tcp_fastopen_destroy_cipher(sk); 233762306a36Sopenharmony_ci tcp_saved_syn_free(tp); 233862306a36Sopenharmony_ci 233962306a36Sopenharmony_ci sk_sockets_allocated_dec(sk); 234062306a36Sopenharmony_ci} 234162306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_v4_destroy_sock); 234262306a36Sopenharmony_ci 234362306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS 234462306a36Sopenharmony_ci/* Proc filesystem TCP sock list dumping. */ 234562306a36Sopenharmony_ci 234662306a36Sopenharmony_cistatic unsigned short seq_file_family(const struct seq_file *seq); 234762306a36Sopenharmony_ci 234862306a36Sopenharmony_cistatic bool seq_sk_match(struct seq_file *seq, const struct sock *sk) 234962306a36Sopenharmony_ci{ 235062306a36Sopenharmony_ci unsigned short family = seq_file_family(seq); 235162306a36Sopenharmony_ci 235262306a36Sopenharmony_ci /* AF_UNSPEC is used as a match all */ 235362306a36Sopenharmony_ci return ((family == AF_UNSPEC || family == sk->sk_family) && 235462306a36Sopenharmony_ci net_eq(sock_net(sk), seq_file_net(seq))); 235562306a36Sopenharmony_ci} 235662306a36Sopenharmony_ci 235762306a36Sopenharmony_ci/* Find a non empty bucket (starting from st->bucket) 235862306a36Sopenharmony_ci * and return the first sk from it. 235962306a36Sopenharmony_ci */ 236062306a36Sopenharmony_cistatic void *listening_get_first(struct seq_file *seq) 236162306a36Sopenharmony_ci{ 236262306a36Sopenharmony_ci struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; 236362306a36Sopenharmony_ci struct tcp_iter_state *st = seq->private; 236462306a36Sopenharmony_ci 236562306a36Sopenharmony_ci st->offset = 0; 236662306a36Sopenharmony_ci for (; st->bucket <= hinfo->lhash2_mask; st->bucket++) { 236762306a36Sopenharmony_ci struct inet_listen_hashbucket *ilb2; 236862306a36Sopenharmony_ci struct hlist_nulls_node *node; 236962306a36Sopenharmony_ci struct sock *sk; 237062306a36Sopenharmony_ci 237162306a36Sopenharmony_ci ilb2 = &hinfo->lhash2[st->bucket]; 237262306a36Sopenharmony_ci if (hlist_nulls_empty(&ilb2->nulls_head)) 237362306a36Sopenharmony_ci continue; 237462306a36Sopenharmony_ci 237562306a36Sopenharmony_ci spin_lock(&ilb2->lock); 237662306a36Sopenharmony_ci sk_nulls_for_each(sk, node, &ilb2->nulls_head) { 237762306a36Sopenharmony_ci if (seq_sk_match(seq, sk)) 237862306a36Sopenharmony_ci return sk; 237962306a36Sopenharmony_ci } 238062306a36Sopenharmony_ci spin_unlock(&ilb2->lock); 238162306a36Sopenharmony_ci } 238262306a36Sopenharmony_ci 238362306a36Sopenharmony_ci return NULL; 238462306a36Sopenharmony_ci} 238562306a36Sopenharmony_ci 238662306a36Sopenharmony_ci/* Find the next sk of "cur" within the same bucket (i.e. st->bucket). 238762306a36Sopenharmony_ci * If "cur" is the last one in the st->bucket, 238862306a36Sopenharmony_ci * call listening_get_first() to return the first sk of the next 238962306a36Sopenharmony_ci * non empty bucket. 239062306a36Sopenharmony_ci */ 239162306a36Sopenharmony_cistatic void *listening_get_next(struct seq_file *seq, void *cur) 239262306a36Sopenharmony_ci{ 239362306a36Sopenharmony_ci struct tcp_iter_state *st = seq->private; 239462306a36Sopenharmony_ci struct inet_listen_hashbucket *ilb2; 239562306a36Sopenharmony_ci struct hlist_nulls_node *node; 239662306a36Sopenharmony_ci struct inet_hashinfo *hinfo; 239762306a36Sopenharmony_ci struct sock *sk = cur; 239862306a36Sopenharmony_ci 239962306a36Sopenharmony_ci ++st->num; 240062306a36Sopenharmony_ci ++st->offset; 240162306a36Sopenharmony_ci 240262306a36Sopenharmony_ci sk = sk_nulls_next(sk); 240362306a36Sopenharmony_ci sk_nulls_for_each_from(sk, node) { 240462306a36Sopenharmony_ci if (seq_sk_match(seq, sk)) 240562306a36Sopenharmony_ci return sk; 240662306a36Sopenharmony_ci } 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; 240962306a36Sopenharmony_ci ilb2 = &hinfo->lhash2[st->bucket]; 241062306a36Sopenharmony_ci spin_unlock(&ilb2->lock); 241162306a36Sopenharmony_ci ++st->bucket; 241262306a36Sopenharmony_ci return listening_get_first(seq); 241362306a36Sopenharmony_ci} 241462306a36Sopenharmony_ci 241562306a36Sopenharmony_cistatic void *listening_get_idx(struct seq_file *seq, loff_t *pos) 241662306a36Sopenharmony_ci{ 241762306a36Sopenharmony_ci struct tcp_iter_state *st = seq->private; 241862306a36Sopenharmony_ci void *rc; 241962306a36Sopenharmony_ci 242062306a36Sopenharmony_ci st->bucket = 0; 242162306a36Sopenharmony_ci st->offset = 0; 242262306a36Sopenharmony_ci rc = listening_get_first(seq); 242362306a36Sopenharmony_ci 242462306a36Sopenharmony_ci while (rc && *pos) { 242562306a36Sopenharmony_ci rc = listening_get_next(seq, rc); 242662306a36Sopenharmony_ci --*pos; 242762306a36Sopenharmony_ci } 242862306a36Sopenharmony_ci return rc; 242962306a36Sopenharmony_ci} 243062306a36Sopenharmony_ci 243162306a36Sopenharmony_cistatic inline bool empty_bucket(struct inet_hashinfo *hinfo, 243262306a36Sopenharmony_ci const struct tcp_iter_state *st) 243362306a36Sopenharmony_ci{ 243462306a36Sopenharmony_ci return hlist_nulls_empty(&hinfo->ehash[st->bucket].chain); 243562306a36Sopenharmony_ci} 243662306a36Sopenharmony_ci 243762306a36Sopenharmony_ci/* 243862306a36Sopenharmony_ci * Get first established socket starting from bucket given in st->bucket. 243962306a36Sopenharmony_ci * If st->bucket is zero, the very first socket in the hash is returned. 244062306a36Sopenharmony_ci */ 244162306a36Sopenharmony_cistatic void *established_get_first(struct seq_file *seq) 244262306a36Sopenharmony_ci{ 244362306a36Sopenharmony_ci struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; 244462306a36Sopenharmony_ci struct tcp_iter_state *st = seq->private; 244562306a36Sopenharmony_ci 244662306a36Sopenharmony_ci st->offset = 0; 244762306a36Sopenharmony_ci for (; st->bucket <= hinfo->ehash_mask; ++st->bucket) { 244862306a36Sopenharmony_ci struct sock *sk; 244962306a36Sopenharmony_ci struct hlist_nulls_node *node; 245062306a36Sopenharmony_ci spinlock_t *lock = inet_ehash_lockp(hinfo, st->bucket); 245162306a36Sopenharmony_ci 245262306a36Sopenharmony_ci cond_resched(); 245362306a36Sopenharmony_ci 245462306a36Sopenharmony_ci /* Lockless fast path for the common case of empty buckets */ 245562306a36Sopenharmony_ci if (empty_bucket(hinfo, st)) 245662306a36Sopenharmony_ci continue; 245762306a36Sopenharmony_ci 245862306a36Sopenharmony_ci spin_lock_bh(lock); 245962306a36Sopenharmony_ci sk_nulls_for_each(sk, node, &hinfo->ehash[st->bucket].chain) { 246062306a36Sopenharmony_ci if (seq_sk_match(seq, sk)) 246162306a36Sopenharmony_ci return sk; 246262306a36Sopenharmony_ci } 246362306a36Sopenharmony_ci spin_unlock_bh(lock); 246462306a36Sopenharmony_ci } 246562306a36Sopenharmony_ci 246662306a36Sopenharmony_ci return NULL; 246762306a36Sopenharmony_ci} 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_cistatic void *established_get_next(struct seq_file *seq, void *cur) 247062306a36Sopenharmony_ci{ 247162306a36Sopenharmony_ci struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; 247262306a36Sopenharmony_ci struct tcp_iter_state *st = seq->private; 247362306a36Sopenharmony_ci struct hlist_nulls_node *node; 247462306a36Sopenharmony_ci struct sock *sk = cur; 247562306a36Sopenharmony_ci 247662306a36Sopenharmony_ci ++st->num; 247762306a36Sopenharmony_ci ++st->offset; 247862306a36Sopenharmony_ci 247962306a36Sopenharmony_ci sk = sk_nulls_next(sk); 248062306a36Sopenharmony_ci 248162306a36Sopenharmony_ci sk_nulls_for_each_from(sk, node) { 248262306a36Sopenharmony_ci if (seq_sk_match(seq, sk)) 248362306a36Sopenharmony_ci return sk; 248462306a36Sopenharmony_ci } 248562306a36Sopenharmony_ci 248662306a36Sopenharmony_ci spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket)); 248762306a36Sopenharmony_ci ++st->bucket; 248862306a36Sopenharmony_ci return established_get_first(seq); 248962306a36Sopenharmony_ci} 249062306a36Sopenharmony_ci 249162306a36Sopenharmony_cistatic void *established_get_idx(struct seq_file *seq, loff_t pos) 249262306a36Sopenharmony_ci{ 249362306a36Sopenharmony_ci struct tcp_iter_state *st = seq->private; 249462306a36Sopenharmony_ci void *rc; 249562306a36Sopenharmony_ci 249662306a36Sopenharmony_ci st->bucket = 0; 249762306a36Sopenharmony_ci rc = established_get_first(seq); 249862306a36Sopenharmony_ci 249962306a36Sopenharmony_ci while (rc && pos) { 250062306a36Sopenharmony_ci rc = established_get_next(seq, rc); 250162306a36Sopenharmony_ci --pos; 250262306a36Sopenharmony_ci } 250362306a36Sopenharmony_ci return rc; 250462306a36Sopenharmony_ci} 250562306a36Sopenharmony_ci 250662306a36Sopenharmony_cistatic void *tcp_get_idx(struct seq_file *seq, loff_t pos) 250762306a36Sopenharmony_ci{ 250862306a36Sopenharmony_ci void *rc; 250962306a36Sopenharmony_ci struct tcp_iter_state *st = seq->private; 251062306a36Sopenharmony_ci 251162306a36Sopenharmony_ci st->state = TCP_SEQ_STATE_LISTENING; 251262306a36Sopenharmony_ci rc = listening_get_idx(seq, &pos); 251362306a36Sopenharmony_ci 251462306a36Sopenharmony_ci if (!rc) { 251562306a36Sopenharmony_ci st->state = TCP_SEQ_STATE_ESTABLISHED; 251662306a36Sopenharmony_ci rc = established_get_idx(seq, pos); 251762306a36Sopenharmony_ci } 251862306a36Sopenharmony_ci 251962306a36Sopenharmony_ci return rc; 252062306a36Sopenharmony_ci} 252162306a36Sopenharmony_ci 252262306a36Sopenharmony_cistatic void *tcp_seek_last_pos(struct seq_file *seq) 252362306a36Sopenharmony_ci{ 252462306a36Sopenharmony_ci struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; 252562306a36Sopenharmony_ci struct tcp_iter_state *st = seq->private; 252662306a36Sopenharmony_ci int bucket = st->bucket; 252762306a36Sopenharmony_ci int offset = st->offset; 252862306a36Sopenharmony_ci int orig_num = st->num; 252962306a36Sopenharmony_ci void *rc = NULL; 253062306a36Sopenharmony_ci 253162306a36Sopenharmony_ci switch (st->state) { 253262306a36Sopenharmony_ci case TCP_SEQ_STATE_LISTENING: 253362306a36Sopenharmony_ci if (st->bucket > hinfo->lhash2_mask) 253462306a36Sopenharmony_ci break; 253562306a36Sopenharmony_ci rc = listening_get_first(seq); 253662306a36Sopenharmony_ci while (offset-- && rc && bucket == st->bucket) 253762306a36Sopenharmony_ci rc = listening_get_next(seq, rc); 253862306a36Sopenharmony_ci if (rc) 253962306a36Sopenharmony_ci break; 254062306a36Sopenharmony_ci st->bucket = 0; 254162306a36Sopenharmony_ci st->state = TCP_SEQ_STATE_ESTABLISHED; 254262306a36Sopenharmony_ci fallthrough; 254362306a36Sopenharmony_ci case TCP_SEQ_STATE_ESTABLISHED: 254462306a36Sopenharmony_ci if (st->bucket > hinfo->ehash_mask) 254562306a36Sopenharmony_ci break; 254662306a36Sopenharmony_ci rc = established_get_first(seq); 254762306a36Sopenharmony_ci while (offset-- && rc && bucket == st->bucket) 254862306a36Sopenharmony_ci rc = established_get_next(seq, rc); 254962306a36Sopenharmony_ci } 255062306a36Sopenharmony_ci 255162306a36Sopenharmony_ci st->num = orig_num; 255262306a36Sopenharmony_ci 255362306a36Sopenharmony_ci return rc; 255462306a36Sopenharmony_ci} 255562306a36Sopenharmony_ci 255662306a36Sopenharmony_civoid *tcp_seq_start(struct seq_file *seq, loff_t *pos) 255762306a36Sopenharmony_ci{ 255862306a36Sopenharmony_ci struct tcp_iter_state *st = seq->private; 255962306a36Sopenharmony_ci void *rc; 256062306a36Sopenharmony_ci 256162306a36Sopenharmony_ci if (*pos && *pos == st->last_pos) { 256262306a36Sopenharmony_ci rc = tcp_seek_last_pos(seq); 256362306a36Sopenharmony_ci if (rc) 256462306a36Sopenharmony_ci goto out; 256562306a36Sopenharmony_ci } 256662306a36Sopenharmony_ci 256762306a36Sopenharmony_ci st->state = TCP_SEQ_STATE_LISTENING; 256862306a36Sopenharmony_ci st->num = 0; 256962306a36Sopenharmony_ci st->bucket = 0; 257062306a36Sopenharmony_ci st->offset = 0; 257162306a36Sopenharmony_ci rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 257262306a36Sopenharmony_ci 257362306a36Sopenharmony_ciout: 257462306a36Sopenharmony_ci st->last_pos = *pos; 257562306a36Sopenharmony_ci return rc; 257662306a36Sopenharmony_ci} 257762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_seq_start); 257862306a36Sopenharmony_ci 257962306a36Sopenharmony_civoid *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 258062306a36Sopenharmony_ci{ 258162306a36Sopenharmony_ci struct tcp_iter_state *st = seq->private; 258262306a36Sopenharmony_ci void *rc = NULL; 258362306a36Sopenharmony_ci 258462306a36Sopenharmony_ci if (v == SEQ_START_TOKEN) { 258562306a36Sopenharmony_ci rc = tcp_get_idx(seq, 0); 258662306a36Sopenharmony_ci goto out; 258762306a36Sopenharmony_ci } 258862306a36Sopenharmony_ci 258962306a36Sopenharmony_ci switch (st->state) { 259062306a36Sopenharmony_ci case TCP_SEQ_STATE_LISTENING: 259162306a36Sopenharmony_ci rc = listening_get_next(seq, v); 259262306a36Sopenharmony_ci if (!rc) { 259362306a36Sopenharmony_ci st->state = TCP_SEQ_STATE_ESTABLISHED; 259462306a36Sopenharmony_ci st->bucket = 0; 259562306a36Sopenharmony_ci st->offset = 0; 259662306a36Sopenharmony_ci rc = established_get_first(seq); 259762306a36Sopenharmony_ci } 259862306a36Sopenharmony_ci break; 259962306a36Sopenharmony_ci case TCP_SEQ_STATE_ESTABLISHED: 260062306a36Sopenharmony_ci rc = established_get_next(seq, v); 260162306a36Sopenharmony_ci break; 260262306a36Sopenharmony_ci } 260362306a36Sopenharmony_ciout: 260462306a36Sopenharmony_ci ++*pos; 260562306a36Sopenharmony_ci st->last_pos = *pos; 260662306a36Sopenharmony_ci return rc; 260762306a36Sopenharmony_ci} 260862306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_seq_next); 260962306a36Sopenharmony_ci 261062306a36Sopenharmony_civoid tcp_seq_stop(struct seq_file *seq, void *v) 261162306a36Sopenharmony_ci{ 261262306a36Sopenharmony_ci struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; 261362306a36Sopenharmony_ci struct tcp_iter_state *st = seq->private; 261462306a36Sopenharmony_ci 261562306a36Sopenharmony_ci switch (st->state) { 261662306a36Sopenharmony_ci case TCP_SEQ_STATE_LISTENING: 261762306a36Sopenharmony_ci if (v != SEQ_START_TOKEN) 261862306a36Sopenharmony_ci spin_unlock(&hinfo->lhash2[st->bucket].lock); 261962306a36Sopenharmony_ci break; 262062306a36Sopenharmony_ci case TCP_SEQ_STATE_ESTABLISHED: 262162306a36Sopenharmony_ci if (v) 262262306a36Sopenharmony_ci spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket)); 262362306a36Sopenharmony_ci break; 262462306a36Sopenharmony_ci } 262562306a36Sopenharmony_ci} 262662306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_seq_stop); 262762306a36Sopenharmony_ci 262862306a36Sopenharmony_cistatic void get_openreq4(const struct request_sock *req, 262962306a36Sopenharmony_ci struct seq_file *f, int i) 263062306a36Sopenharmony_ci{ 263162306a36Sopenharmony_ci const struct inet_request_sock *ireq = inet_rsk(req); 263262306a36Sopenharmony_ci long delta = req->rsk_timer.expires - jiffies; 263362306a36Sopenharmony_ci 263462306a36Sopenharmony_ci seq_printf(f, "%4d: %08X:%04X %08X:%04X" 263562306a36Sopenharmony_ci " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", 263662306a36Sopenharmony_ci i, 263762306a36Sopenharmony_ci ireq->ir_loc_addr, 263862306a36Sopenharmony_ci ireq->ir_num, 263962306a36Sopenharmony_ci ireq->ir_rmt_addr, 264062306a36Sopenharmony_ci ntohs(ireq->ir_rmt_port), 264162306a36Sopenharmony_ci TCP_SYN_RECV, 264262306a36Sopenharmony_ci 0, 0, /* could print option size, but that is af dependent. */ 264362306a36Sopenharmony_ci 1, /* timers active (only the expire timer) */ 264462306a36Sopenharmony_ci jiffies_delta_to_clock_t(delta), 264562306a36Sopenharmony_ci req->num_timeout, 264662306a36Sopenharmony_ci from_kuid_munged(seq_user_ns(f), 264762306a36Sopenharmony_ci sock_i_uid(req->rsk_listener)), 264862306a36Sopenharmony_ci 0, /* non standard timer */ 264962306a36Sopenharmony_ci 0, /* open_requests have no inode */ 265062306a36Sopenharmony_ci 0, 265162306a36Sopenharmony_ci req); 265262306a36Sopenharmony_ci} 265362306a36Sopenharmony_ci 265462306a36Sopenharmony_cistatic void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) 265562306a36Sopenharmony_ci{ 265662306a36Sopenharmony_ci int timer_active; 265762306a36Sopenharmony_ci unsigned long timer_expires; 265862306a36Sopenharmony_ci const struct tcp_sock *tp = tcp_sk(sk); 265962306a36Sopenharmony_ci const struct inet_connection_sock *icsk = inet_csk(sk); 266062306a36Sopenharmony_ci const struct inet_sock *inet = inet_sk(sk); 266162306a36Sopenharmony_ci const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 266262306a36Sopenharmony_ci __be32 dest = inet->inet_daddr; 266362306a36Sopenharmony_ci __be32 src = inet->inet_rcv_saddr; 266462306a36Sopenharmony_ci __u16 destp = ntohs(inet->inet_dport); 266562306a36Sopenharmony_ci __u16 srcp = ntohs(inet->inet_sport); 266662306a36Sopenharmony_ci int rx_queue; 266762306a36Sopenharmony_ci int state; 266862306a36Sopenharmony_ci 266962306a36Sopenharmony_ci if (icsk->icsk_pending == ICSK_TIME_RETRANS || 267062306a36Sopenharmony_ci icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 267162306a36Sopenharmony_ci icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 267262306a36Sopenharmony_ci timer_active = 1; 267362306a36Sopenharmony_ci timer_expires = icsk->icsk_timeout; 267462306a36Sopenharmony_ci } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 267562306a36Sopenharmony_ci timer_active = 4; 267662306a36Sopenharmony_ci timer_expires = icsk->icsk_timeout; 267762306a36Sopenharmony_ci } else if (timer_pending(&sk->sk_timer)) { 267862306a36Sopenharmony_ci timer_active = 2; 267962306a36Sopenharmony_ci timer_expires = sk->sk_timer.expires; 268062306a36Sopenharmony_ci } else { 268162306a36Sopenharmony_ci timer_active = 0; 268262306a36Sopenharmony_ci timer_expires = jiffies; 268362306a36Sopenharmony_ci } 268462306a36Sopenharmony_ci 268562306a36Sopenharmony_ci state = inet_sk_state_load(sk); 268662306a36Sopenharmony_ci if (state == TCP_LISTEN) 268762306a36Sopenharmony_ci rx_queue = READ_ONCE(sk->sk_ack_backlog); 268862306a36Sopenharmony_ci else 268962306a36Sopenharmony_ci /* Because we don't lock the socket, 269062306a36Sopenharmony_ci * we might find a transient negative value. 269162306a36Sopenharmony_ci */ 269262306a36Sopenharmony_ci rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 269362306a36Sopenharmony_ci READ_ONCE(tp->copied_seq), 0); 269462306a36Sopenharmony_ci 269562306a36Sopenharmony_ci seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 269662306a36Sopenharmony_ci "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", 269762306a36Sopenharmony_ci i, src, srcp, dest, destp, state, 269862306a36Sopenharmony_ci READ_ONCE(tp->write_seq) - tp->snd_una, 269962306a36Sopenharmony_ci rx_queue, 270062306a36Sopenharmony_ci timer_active, 270162306a36Sopenharmony_ci jiffies_delta_to_clock_t(timer_expires - jiffies), 270262306a36Sopenharmony_ci icsk->icsk_retransmits, 270362306a36Sopenharmony_ci from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), 270462306a36Sopenharmony_ci icsk->icsk_probes_out, 270562306a36Sopenharmony_ci sock_i_ino(sk), 270662306a36Sopenharmony_ci refcount_read(&sk->sk_refcnt), sk, 270762306a36Sopenharmony_ci jiffies_to_clock_t(icsk->icsk_rto), 270862306a36Sopenharmony_ci jiffies_to_clock_t(icsk->icsk_ack.ato), 270962306a36Sopenharmony_ci (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk), 271062306a36Sopenharmony_ci tcp_snd_cwnd(tp), 271162306a36Sopenharmony_ci state == TCP_LISTEN ? 271262306a36Sopenharmony_ci fastopenq->max_qlen : 271362306a36Sopenharmony_ci (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); 271462306a36Sopenharmony_ci} 271562306a36Sopenharmony_ci 271662306a36Sopenharmony_cistatic void get_timewait4_sock(const struct inet_timewait_sock *tw, 271762306a36Sopenharmony_ci struct seq_file *f, int i) 271862306a36Sopenharmony_ci{ 271962306a36Sopenharmony_ci long delta = tw->tw_timer.expires - jiffies; 272062306a36Sopenharmony_ci __be32 dest, src; 272162306a36Sopenharmony_ci __u16 destp, srcp; 272262306a36Sopenharmony_ci 272362306a36Sopenharmony_ci dest = tw->tw_daddr; 272462306a36Sopenharmony_ci src = tw->tw_rcv_saddr; 272562306a36Sopenharmony_ci destp = ntohs(tw->tw_dport); 272662306a36Sopenharmony_ci srcp = ntohs(tw->tw_sport); 272762306a36Sopenharmony_ci 272862306a36Sopenharmony_ci seq_printf(f, "%4d: %08X:%04X %08X:%04X" 272962306a36Sopenharmony_ci " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK", 273062306a36Sopenharmony_ci i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 273162306a36Sopenharmony_ci 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 273262306a36Sopenharmony_ci refcount_read(&tw->tw_refcnt), tw); 273362306a36Sopenharmony_ci} 273462306a36Sopenharmony_ci 273562306a36Sopenharmony_ci#define TMPSZ 150 273662306a36Sopenharmony_ci 273762306a36Sopenharmony_cistatic int tcp4_seq_show(struct seq_file *seq, void *v) 273862306a36Sopenharmony_ci{ 273962306a36Sopenharmony_ci struct tcp_iter_state *st; 274062306a36Sopenharmony_ci struct sock *sk = v; 274162306a36Sopenharmony_ci 274262306a36Sopenharmony_ci seq_setwidth(seq, TMPSZ - 1); 274362306a36Sopenharmony_ci if (v == SEQ_START_TOKEN) { 274462306a36Sopenharmony_ci seq_puts(seq, " sl local_address rem_address st tx_queue " 274562306a36Sopenharmony_ci "rx_queue tr tm->when retrnsmt uid timeout " 274662306a36Sopenharmony_ci "inode"); 274762306a36Sopenharmony_ci goto out; 274862306a36Sopenharmony_ci } 274962306a36Sopenharmony_ci st = seq->private; 275062306a36Sopenharmony_ci 275162306a36Sopenharmony_ci if (sk->sk_state == TCP_TIME_WAIT) 275262306a36Sopenharmony_ci get_timewait4_sock(v, seq, st->num); 275362306a36Sopenharmony_ci else if (sk->sk_state == TCP_NEW_SYN_RECV) 275462306a36Sopenharmony_ci get_openreq4(v, seq, st->num); 275562306a36Sopenharmony_ci else 275662306a36Sopenharmony_ci get_tcp4_sock(v, seq, st->num); 275762306a36Sopenharmony_ciout: 275862306a36Sopenharmony_ci seq_pad(seq, '\n'); 275962306a36Sopenharmony_ci return 0; 276062306a36Sopenharmony_ci} 276162306a36Sopenharmony_ci 276262306a36Sopenharmony_ci#ifdef CONFIG_BPF_SYSCALL 276362306a36Sopenharmony_cistruct bpf_tcp_iter_state { 276462306a36Sopenharmony_ci struct tcp_iter_state state; 276562306a36Sopenharmony_ci unsigned int cur_sk; 276662306a36Sopenharmony_ci unsigned int end_sk; 276762306a36Sopenharmony_ci unsigned int max_sk; 276862306a36Sopenharmony_ci struct sock **batch; 276962306a36Sopenharmony_ci bool st_bucket_done; 277062306a36Sopenharmony_ci}; 277162306a36Sopenharmony_ci 277262306a36Sopenharmony_cistruct bpf_iter__tcp { 277362306a36Sopenharmony_ci __bpf_md_ptr(struct bpf_iter_meta *, meta); 277462306a36Sopenharmony_ci __bpf_md_ptr(struct sock_common *, sk_common); 277562306a36Sopenharmony_ci uid_t uid __aligned(8); 277662306a36Sopenharmony_ci}; 277762306a36Sopenharmony_ci 277862306a36Sopenharmony_cistatic int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 277962306a36Sopenharmony_ci struct sock_common *sk_common, uid_t uid) 278062306a36Sopenharmony_ci{ 278162306a36Sopenharmony_ci struct bpf_iter__tcp ctx; 278262306a36Sopenharmony_ci 278362306a36Sopenharmony_ci meta->seq_num--; /* skip SEQ_START_TOKEN */ 278462306a36Sopenharmony_ci ctx.meta = meta; 278562306a36Sopenharmony_ci ctx.sk_common = sk_common; 278662306a36Sopenharmony_ci ctx.uid = uid; 278762306a36Sopenharmony_ci return bpf_iter_run_prog(prog, &ctx); 278862306a36Sopenharmony_ci} 278962306a36Sopenharmony_ci 279062306a36Sopenharmony_cistatic void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter) 279162306a36Sopenharmony_ci{ 279262306a36Sopenharmony_ci while (iter->cur_sk < iter->end_sk) 279362306a36Sopenharmony_ci sock_gen_put(iter->batch[iter->cur_sk++]); 279462306a36Sopenharmony_ci} 279562306a36Sopenharmony_ci 279662306a36Sopenharmony_cistatic int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter, 279762306a36Sopenharmony_ci unsigned int new_batch_sz) 279862306a36Sopenharmony_ci{ 279962306a36Sopenharmony_ci struct sock **new_batch; 280062306a36Sopenharmony_ci 280162306a36Sopenharmony_ci new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 280262306a36Sopenharmony_ci GFP_USER | __GFP_NOWARN); 280362306a36Sopenharmony_ci if (!new_batch) 280462306a36Sopenharmony_ci return -ENOMEM; 280562306a36Sopenharmony_ci 280662306a36Sopenharmony_ci bpf_iter_tcp_put_batch(iter); 280762306a36Sopenharmony_ci kvfree(iter->batch); 280862306a36Sopenharmony_ci iter->batch = new_batch; 280962306a36Sopenharmony_ci iter->max_sk = new_batch_sz; 281062306a36Sopenharmony_ci 281162306a36Sopenharmony_ci return 0; 281262306a36Sopenharmony_ci} 281362306a36Sopenharmony_ci 281462306a36Sopenharmony_cistatic unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq, 281562306a36Sopenharmony_ci struct sock *start_sk) 281662306a36Sopenharmony_ci{ 281762306a36Sopenharmony_ci struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; 281862306a36Sopenharmony_ci struct bpf_tcp_iter_state *iter = seq->private; 281962306a36Sopenharmony_ci struct tcp_iter_state *st = &iter->state; 282062306a36Sopenharmony_ci struct hlist_nulls_node *node; 282162306a36Sopenharmony_ci unsigned int expected = 1; 282262306a36Sopenharmony_ci struct sock *sk; 282362306a36Sopenharmony_ci 282462306a36Sopenharmony_ci sock_hold(start_sk); 282562306a36Sopenharmony_ci iter->batch[iter->end_sk++] = start_sk; 282662306a36Sopenharmony_ci 282762306a36Sopenharmony_ci sk = sk_nulls_next(start_sk); 282862306a36Sopenharmony_ci sk_nulls_for_each_from(sk, node) { 282962306a36Sopenharmony_ci if (seq_sk_match(seq, sk)) { 283062306a36Sopenharmony_ci if (iter->end_sk < iter->max_sk) { 283162306a36Sopenharmony_ci sock_hold(sk); 283262306a36Sopenharmony_ci iter->batch[iter->end_sk++] = sk; 283362306a36Sopenharmony_ci } 283462306a36Sopenharmony_ci expected++; 283562306a36Sopenharmony_ci } 283662306a36Sopenharmony_ci } 283762306a36Sopenharmony_ci spin_unlock(&hinfo->lhash2[st->bucket].lock); 283862306a36Sopenharmony_ci 283962306a36Sopenharmony_ci return expected; 284062306a36Sopenharmony_ci} 284162306a36Sopenharmony_ci 284262306a36Sopenharmony_cistatic unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq, 284362306a36Sopenharmony_ci struct sock *start_sk) 284462306a36Sopenharmony_ci{ 284562306a36Sopenharmony_ci struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; 284662306a36Sopenharmony_ci struct bpf_tcp_iter_state *iter = seq->private; 284762306a36Sopenharmony_ci struct tcp_iter_state *st = &iter->state; 284862306a36Sopenharmony_ci struct hlist_nulls_node *node; 284962306a36Sopenharmony_ci unsigned int expected = 1; 285062306a36Sopenharmony_ci struct sock *sk; 285162306a36Sopenharmony_ci 285262306a36Sopenharmony_ci sock_hold(start_sk); 285362306a36Sopenharmony_ci iter->batch[iter->end_sk++] = start_sk; 285462306a36Sopenharmony_ci 285562306a36Sopenharmony_ci sk = sk_nulls_next(start_sk); 285662306a36Sopenharmony_ci sk_nulls_for_each_from(sk, node) { 285762306a36Sopenharmony_ci if (seq_sk_match(seq, sk)) { 285862306a36Sopenharmony_ci if (iter->end_sk < iter->max_sk) { 285962306a36Sopenharmony_ci sock_hold(sk); 286062306a36Sopenharmony_ci iter->batch[iter->end_sk++] = sk; 286162306a36Sopenharmony_ci } 286262306a36Sopenharmony_ci expected++; 286362306a36Sopenharmony_ci } 286462306a36Sopenharmony_ci } 286562306a36Sopenharmony_ci spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket)); 286662306a36Sopenharmony_ci 286762306a36Sopenharmony_ci return expected; 286862306a36Sopenharmony_ci} 286962306a36Sopenharmony_ci 287062306a36Sopenharmony_cistatic struct sock *bpf_iter_tcp_batch(struct seq_file *seq) 287162306a36Sopenharmony_ci{ 287262306a36Sopenharmony_ci struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; 287362306a36Sopenharmony_ci struct bpf_tcp_iter_state *iter = seq->private; 287462306a36Sopenharmony_ci struct tcp_iter_state *st = &iter->state; 287562306a36Sopenharmony_ci unsigned int expected; 287662306a36Sopenharmony_ci bool resized = false; 287762306a36Sopenharmony_ci struct sock *sk; 287862306a36Sopenharmony_ci 287962306a36Sopenharmony_ci /* The st->bucket is done. Directly advance to the next 288062306a36Sopenharmony_ci * bucket instead of having the tcp_seek_last_pos() to skip 288162306a36Sopenharmony_ci * one by one in the current bucket and eventually find out 288262306a36Sopenharmony_ci * it has to advance to the next bucket. 288362306a36Sopenharmony_ci */ 288462306a36Sopenharmony_ci if (iter->st_bucket_done) { 288562306a36Sopenharmony_ci st->offset = 0; 288662306a36Sopenharmony_ci st->bucket++; 288762306a36Sopenharmony_ci if (st->state == TCP_SEQ_STATE_LISTENING && 288862306a36Sopenharmony_ci st->bucket > hinfo->lhash2_mask) { 288962306a36Sopenharmony_ci st->state = TCP_SEQ_STATE_ESTABLISHED; 289062306a36Sopenharmony_ci st->bucket = 0; 289162306a36Sopenharmony_ci } 289262306a36Sopenharmony_ci } 289362306a36Sopenharmony_ci 289462306a36Sopenharmony_ciagain: 289562306a36Sopenharmony_ci /* Get a new batch */ 289662306a36Sopenharmony_ci iter->cur_sk = 0; 289762306a36Sopenharmony_ci iter->end_sk = 0; 289862306a36Sopenharmony_ci iter->st_bucket_done = false; 289962306a36Sopenharmony_ci 290062306a36Sopenharmony_ci sk = tcp_seek_last_pos(seq); 290162306a36Sopenharmony_ci if (!sk) 290262306a36Sopenharmony_ci return NULL; /* Done */ 290362306a36Sopenharmony_ci 290462306a36Sopenharmony_ci if (st->state == TCP_SEQ_STATE_LISTENING) 290562306a36Sopenharmony_ci expected = bpf_iter_tcp_listening_batch(seq, sk); 290662306a36Sopenharmony_ci else 290762306a36Sopenharmony_ci expected = bpf_iter_tcp_established_batch(seq, sk); 290862306a36Sopenharmony_ci 290962306a36Sopenharmony_ci if (iter->end_sk == expected) { 291062306a36Sopenharmony_ci iter->st_bucket_done = true; 291162306a36Sopenharmony_ci return sk; 291262306a36Sopenharmony_ci } 291362306a36Sopenharmony_ci 291462306a36Sopenharmony_ci if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) { 291562306a36Sopenharmony_ci resized = true; 291662306a36Sopenharmony_ci goto again; 291762306a36Sopenharmony_ci } 291862306a36Sopenharmony_ci 291962306a36Sopenharmony_ci return sk; 292062306a36Sopenharmony_ci} 292162306a36Sopenharmony_ci 292262306a36Sopenharmony_cistatic void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos) 292362306a36Sopenharmony_ci{ 292462306a36Sopenharmony_ci /* bpf iter does not support lseek, so it always 292562306a36Sopenharmony_ci * continue from where it was stop()-ped. 292662306a36Sopenharmony_ci */ 292762306a36Sopenharmony_ci if (*pos) 292862306a36Sopenharmony_ci return bpf_iter_tcp_batch(seq); 292962306a36Sopenharmony_ci 293062306a36Sopenharmony_ci return SEQ_START_TOKEN; 293162306a36Sopenharmony_ci} 293262306a36Sopenharmony_ci 293362306a36Sopenharmony_cistatic void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 293462306a36Sopenharmony_ci{ 293562306a36Sopenharmony_ci struct bpf_tcp_iter_state *iter = seq->private; 293662306a36Sopenharmony_ci struct tcp_iter_state *st = &iter->state; 293762306a36Sopenharmony_ci struct sock *sk; 293862306a36Sopenharmony_ci 293962306a36Sopenharmony_ci /* Whenever seq_next() is called, the iter->cur_sk is 294062306a36Sopenharmony_ci * done with seq_show(), so advance to the next sk in 294162306a36Sopenharmony_ci * the batch. 294262306a36Sopenharmony_ci */ 294362306a36Sopenharmony_ci if (iter->cur_sk < iter->end_sk) { 294462306a36Sopenharmony_ci /* Keeping st->num consistent in tcp_iter_state. 294562306a36Sopenharmony_ci * bpf_iter_tcp does not use st->num. 294662306a36Sopenharmony_ci * meta.seq_num is used instead. 294762306a36Sopenharmony_ci */ 294862306a36Sopenharmony_ci st->num++; 294962306a36Sopenharmony_ci /* Move st->offset to the next sk in the bucket such that 295062306a36Sopenharmony_ci * the future start() will resume at st->offset in 295162306a36Sopenharmony_ci * st->bucket. See tcp_seek_last_pos(). 295262306a36Sopenharmony_ci */ 295362306a36Sopenharmony_ci st->offset++; 295462306a36Sopenharmony_ci sock_gen_put(iter->batch[iter->cur_sk++]); 295562306a36Sopenharmony_ci } 295662306a36Sopenharmony_ci 295762306a36Sopenharmony_ci if (iter->cur_sk < iter->end_sk) 295862306a36Sopenharmony_ci sk = iter->batch[iter->cur_sk]; 295962306a36Sopenharmony_ci else 296062306a36Sopenharmony_ci sk = bpf_iter_tcp_batch(seq); 296162306a36Sopenharmony_ci 296262306a36Sopenharmony_ci ++*pos; 296362306a36Sopenharmony_ci /* Keeping st->last_pos consistent in tcp_iter_state. 296462306a36Sopenharmony_ci * bpf iter does not do lseek, so st->last_pos always equals to *pos. 296562306a36Sopenharmony_ci */ 296662306a36Sopenharmony_ci st->last_pos = *pos; 296762306a36Sopenharmony_ci return sk; 296862306a36Sopenharmony_ci} 296962306a36Sopenharmony_ci 297062306a36Sopenharmony_cistatic int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) 297162306a36Sopenharmony_ci{ 297262306a36Sopenharmony_ci struct bpf_iter_meta meta; 297362306a36Sopenharmony_ci struct bpf_prog *prog; 297462306a36Sopenharmony_ci struct sock *sk = v; 297562306a36Sopenharmony_ci uid_t uid; 297662306a36Sopenharmony_ci int ret; 297762306a36Sopenharmony_ci 297862306a36Sopenharmony_ci if (v == SEQ_START_TOKEN) 297962306a36Sopenharmony_ci return 0; 298062306a36Sopenharmony_ci 298162306a36Sopenharmony_ci if (sk_fullsock(sk)) 298262306a36Sopenharmony_ci lock_sock(sk); 298362306a36Sopenharmony_ci 298462306a36Sopenharmony_ci if (unlikely(sk_unhashed(sk))) { 298562306a36Sopenharmony_ci ret = SEQ_SKIP; 298662306a36Sopenharmony_ci goto unlock; 298762306a36Sopenharmony_ci } 298862306a36Sopenharmony_ci 298962306a36Sopenharmony_ci if (sk->sk_state == TCP_TIME_WAIT) { 299062306a36Sopenharmony_ci uid = 0; 299162306a36Sopenharmony_ci } else if (sk->sk_state == TCP_NEW_SYN_RECV) { 299262306a36Sopenharmony_ci const struct request_sock *req = v; 299362306a36Sopenharmony_ci 299462306a36Sopenharmony_ci uid = from_kuid_munged(seq_user_ns(seq), 299562306a36Sopenharmony_ci sock_i_uid(req->rsk_listener)); 299662306a36Sopenharmony_ci } else { 299762306a36Sopenharmony_ci uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 299862306a36Sopenharmony_ci } 299962306a36Sopenharmony_ci 300062306a36Sopenharmony_ci meta.seq = seq; 300162306a36Sopenharmony_ci prog = bpf_iter_get_info(&meta, false); 300262306a36Sopenharmony_ci ret = tcp_prog_seq_show(prog, &meta, v, uid); 300362306a36Sopenharmony_ci 300462306a36Sopenharmony_ciunlock: 300562306a36Sopenharmony_ci if (sk_fullsock(sk)) 300662306a36Sopenharmony_ci release_sock(sk); 300762306a36Sopenharmony_ci return ret; 300862306a36Sopenharmony_ci 300962306a36Sopenharmony_ci} 301062306a36Sopenharmony_ci 301162306a36Sopenharmony_cistatic void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v) 301262306a36Sopenharmony_ci{ 301362306a36Sopenharmony_ci struct bpf_tcp_iter_state *iter = seq->private; 301462306a36Sopenharmony_ci struct bpf_iter_meta meta; 301562306a36Sopenharmony_ci struct bpf_prog *prog; 301662306a36Sopenharmony_ci 301762306a36Sopenharmony_ci if (!v) { 301862306a36Sopenharmony_ci meta.seq = seq; 301962306a36Sopenharmony_ci prog = bpf_iter_get_info(&meta, true); 302062306a36Sopenharmony_ci if (prog) 302162306a36Sopenharmony_ci (void)tcp_prog_seq_show(prog, &meta, v, 0); 302262306a36Sopenharmony_ci } 302362306a36Sopenharmony_ci 302462306a36Sopenharmony_ci if (iter->cur_sk < iter->end_sk) { 302562306a36Sopenharmony_ci bpf_iter_tcp_put_batch(iter); 302662306a36Sopenharmony_ci iter->st_bucket_done = false; 302762306a36Sopenharmony_ci } 302862306a36Sopenharmony_ci} 302962306a36Sopenharmony_ci 303062306a36Sopenharmony_cistatic const struct seq_operations bpf_iter_tcp_seq_ops = { 303162306a36Sopenharmony_ci .show = bpf_iter_tcp_seq_show, 303262306a36Sopenharmony_ci .start = bpf_iter_tcp_seq_start, 303362306a36Sopenharmony_ci .next = bpf_iter_tcp_seq_next, 303462306a36Sopenharmony_ci .stop = bpf_iter_tcp_seq_stop, 303562306a36Sopenharmony_ci}; 303662306a36Sopenharmony_ci#endif 303762306a36Sopenharmony_cistatic unsigned short seq_file_family(const struct seq_file *seq) 303862306a36Sopenharmony_ci{ 303962306a36Sopenharmony_ci const struct tcp_seq_afinfo *afinfo; 304062306a36Sopenharmony_ci 304162306a36Sopenharmony_ci#ifdef CONFIG_BPF_SYSCALL 304262306a36Sopenharmony_ci /* Iterated from bpf_iter. Let the bpf prog to filter instead. */ 304362306a36Sopenharmony_ci if (seq->op == &bpf_iter_tcp_seq_ops) 304462306a36Sopenharmony_ci return AF_UNSPEC; 304562306a36Sopenharmony_ci#endif 304662306a36Sopenharmony_ci 304762306a36Sopenharmony_ci /* Iterated from proc fs */ 304862306a36Sopenharmony_ci afinfo = pde_data(file_inode(seq->file)); 304962306a36Sopenharmony_ci return afinfo->family; 305062306a36Sopenharmony_ci} 305162306a36Sopenharmony_ci 305262306a36Sopenharmony_cistatic const struct seq_operations tcp4_seq_ops = { 305362306a36Sopenharmony_ci .show = tcp4_seq_show, 305462306a36Sopenharmony_ci .start = tcp_seq_start, 305562306a36Sopenharmony_ci .next = tcp_seq_next, 305662306a36Sopenharmony_ci .stop = tcp_seq_stop, 305762306a36Sopenharmony_ci}; 305862306a36Sopenharmony_ci 305962306a36Sopenharmony_cistatic struct tcp_seq_afinfo tcp4_seq_afinfo = { 306062306a36Sopenharmony_ci .family = AF_INET, 306162306a36Sopenharmony_ci}; 306262306a36Sopenharmony_ci 306362306a36Sopenharmony_cistatic int __net_init tcp4_proc_init_net(struct net *net) 306462306a36Sopenharmony_ci{ 306562306a36Sopenharmony_ci if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops, 306662306a36Sopenharmony_ci sizeof(struct tcp_iter_state), &tcp4_seq_afinfo)) 306762306a36Sopenharmony_ci return -ENOMEM; 306862306a36Sopenharmony_ci return 0; 306962306a36Sopenharmony_ci} 307062306a36Sopenharmony_ci 307162306a36Sopenharmony_cistatic void __net_exit tcp4_proc_exit_net(struct net *net) 307262306a36Sopenharmony_ci{ 307362306a36Sopenharmony_ci remove_proc_entry("tcp", net->proc_net); 307462306a36Sopenharmony_ci} 307562306a36Sopenharmony_ci 307662306a36Sopenharmony_cistatic struct pernet_operations tcp4_net_ops = { 307762306a36Sopenharmony_ci .init = tcp4_proc_init_net, 307862306a36Sopenharmony_ci .exit = tcp4_proc_exit_net, 307962306a36Sopenharmony_ci}; 308062306a36Sopenharmony_ci 308162306a36Sopenharmony_ciint __init tcp4_proc_init(void) 308262306a36Sopenharmony_ci{ 308362306a36Sopenharmony_ci return register_pernet_subsys(&tcp4_net_ops); 308462306a36Sopenharmony_ci} 308562306a36Sopenharmony_ci 308662306a36Sopenharmony_civoid tcp4_proc_exit(void) 308762306a36Sopenharmony_ci{ 308862306a36Sopenharmony_ci unregister_pernet_subsys(&tcp4_net_ops); 308962306a36Sopenharmony_ci} 309062306a36Sopenharmony_ci#endif /* CONFIG_PROC_FS */ 309162306a36Sopenharmony_ci 309262306a36Sopenharmony_ci/* @wake is one when sk_stream_write_space() calls us. 309362306a36Sopenharmony_ci * This sends EPOLLOUT only if notsent_bytes is half the limit. 309462306a36Sopenharmony_ci * This mimics the strategy used in sock_def_write_space(). 309562306a36Sopenharmony_ci */ 309662306a36Sopenharmony_cibool tcp_stream_memory_free(const struct sock *sk, int wake) 309762306a36Sopenharmony_ci{ 309862306a36Sopenharmony_ci const struct tcp_sock *tp = tcp_sk(sk); 309962306a36Sopenharmony_ci u32 notsent_bytes = READ_ONCE(tp->write_seq) - 310062306a36Sopenharmony_ci READ_ONCE(tp->snd_nxt); 310162306a36Sopenharmony_ci 310262306a36Sopenharmony_ci return (notsent_bytes << wake) < tcp_notsent_lowat(tp); 310362306a36Sopenharmony_ci} 310462306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_stream_memory_free); 310562306a36Sopenharmony_ci 310662306a36Sopenharmony_cistruct proto tcp_prot = { 310762306a36Sopenharmony_ci .name = "TCP", 310862306a36Sopenharmony_ci .owner = THIS_MODULE, 310962306a36Sopenharmony_ci .close = tcp_close, 311062306a36Sopenharmony_ci .pre_connect = tcp_v4_pre_connect, 311162306a36Sopenharmony_ci .connect = tcp_v4_connect, 311262306a36Sopenharmony_ci .disconnect = tcp_disconnect, 311362306a36Sopenharmony_ci .accept = inet_csk_accept, 311462306a36Sopenharmony_ci .ioctl = tcp_ioctl, 311562306a36Sopenharmony_ci .init = tcp_v4_init_sock, 311662306a36Sopenharmony_ci .destroy = tcp_v4_destroy_sock, 311762306a36Sopenharmony_ci .shutdown = tcp_shutdown, 311862306a36Sopenharmony_ci .setsockopt = tcp_setsockopt, 311962306a36Sopenharmony_ci .getsockopt = tcp_getsockopt, 312062306a36Sopenharmony_ci .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 312162306a36Sopenharmony_ci .keepalive = tcp_set_keepalive, 312262306a36Sopenharmony_ci .recvmsg = tcp_recvmsg, 312362306a36Sopenharmony_ci .sendmsg = tcp_sendmsg, 312462306a36Sopenharmony_ci .splice_eof = tcp_splice_eof, 312562306a36Sopenharmony_ci .backlog_rcv = tcp_v4_do_rcv, 312662306a36Sopenharmony_ci .release_cb = tcp_release_cb, 312762306a36Sopenharmony_ci .hash = inet_hash, 312862306a36Sopenharmony_ci .unhash = inet_unhash, 312962306a36Sopenharmony_ci .get_port = inet_csk_get_port, 313062306a36Sopenharmony_ci .put_port = inet_put_port, 313162306a36Sopenharmony_ci#ifdef CONFIG_BPF_SYSCALL 313262306a36Sopenharmony_ci .psock_update_sk_prot = tcp_bpf_update_proto, 313362306a36Sopenharmony_ci#endif 313462306a36Sopenharmony_ci .enter_memory_pressure = tcp_enter_memory_pressure, 313562306a36Sopenharmony_ci .leave_memory_pressure = tcp_leave_memory_pressure, 313662306a36Sopenharmony_ci .stream_memory_free = tcp_stream_memory_free, 313762306a36Sopenharmony_ci .sockets_allocated = &tcp_sockets_allocated, 313862306a36Sopenharmony_ci .orphan_count = &tcp_orphan_count, 313962306a36Sopenharmony_ci 314062306a36Sopenharmony_ci .memory_allocated = &tcp_memory_allocated, 314162306a36Sopenharmony_ci .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 314262306a36Sopenharmony_ci 314362306a36Sopenharmony_ci .memory_pressure = &tcp_memory_pressure, 314462306a36Sopenharmony_ci .sysctl_mem = sysctl_tcp_mem, 314562306a36Sopenharmony_ci .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 314662306a36Sopenharmony_ci .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 314762306a36Sopenharmony_ci .max_header = MAX_TCP_HEADER, 314862306a36Sopenharmony_ci .obj_size = sizeof(struct tcp_sock), 314962306a36Sopenharmony_ci .slab_flags = SLAB_TYPESAFE_BY_RCU, 315062306a36Sopenharmony_ci .twsk_prot = &tcp_timewait_sock_ops, 315162306a36Sopenharmony_ci .rsk_prot = &tcp_request_sock_ops, 315262306a36Sopenharmony_ci .h.hashinfo = NULL, 315362306a36Sopenharmony_ci .no_autobind = true, 315462306a36Sopenharmony_ci .diag_destroy = tcp_abort, 315562306a36Sopenharmony_ci}; 315662306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_prot); 315762306a36Sopenharmony_ci 315862306a36Sopenharmony_cistatic void __net_exit tcp_sk_exit(struct net *net) 315962306a36Sopenharmony_ci{ 316062306a36Sopenharmony_ci if (net->ipv4.tcp_congestion_control) 316162306a36Sopenharmony_ci bpf_module_put(net->ipv4.tcp_congestion_control, 316262306a36Sopenharmony_ci net->ipv4.tcp_congestion_control->owner); 316362306a36Sopenharmony_ci} 316462306a36Sopenharmony_ci 316562306a36Sopenharmony_cistatic void __net_init tcp_set_hashinfo(struct net *net) 316662306a36Sopenharmony_ci{ 316762306a36Sopenharmony_ci struct inet_hashinfo *hinfo; 316862306a36Sopenharmony_ci unsigned int ehash_entries; 316962306a36Sopenharmony_ci struct net *old_net; 317062306a36Sopenharmony_ci 317162306a36Sopenharmony_ci if (net_eq(net, &init_net)) 317262306a36Sopenharmony_ci goto fallback; 317362306a36Sopenharmony_ci 317462306a36Sopenharmony_ci old_net = current->nsproxy->net_ns; 317562306a36Sopenharmony_ci ehash_entries = READ_ONCE(old_net->ipv4.sysctl_tcp_child_ehash_entries); 317662306a36Sopenharmony_ci if (!ehash_entries) 317762306a36Sopenharmony_ci goto fallback; 317862306a36Sopenharmony_ci 317962306a36Sopenharmony_ci ehash_entries = roundup_pow_of_two(ehash_entries); 318062306a36Sopenharmony_ci hinfo = inet_pernet_hashinfo_alloc(&tcp_hashinfo, ehash_entries); 318162306a36Sopenharmony_ci if (!hinfo) { 318262306a36Sopenharmony_ci pr_warn("Failed to allocate TCP ehash (entries: %u) " 318362306a36Sopenharmony_ci "for a netns, fallback to the global one\n", 318462306a36Sopenharmony_ci ehash_entries); 318562306a36Sopenharmony_cifallback: 318662306a36Sopenharmony_ci hinfo = &tcp_hashinfo; 318762306a36Sopenharmony_ci ehash_entries = tcp_hashinfo.ehash_mask + 1; 318862306a36Sopenharmony_ci } 318962306a36Sopenharmony_ci 319062306a36Sopenharmony_ci net->ipv4.tcp_death_row.hashinfo = hinfo; 319162306a36Sopenharmony_ci net->ipv4.tcp_death_row.sysctl_max_tw_buckets = ehash_entries / 2; 319262306a36Sopenharmony_ci net->ipv4.sysctl_max_syn_backlog = max(128U, ehash_entries / 128); 319362306a36Sopenharmony_ci} 319462306a36Sopenharmony_ci 319562306a36Sopenharmony_cistatic int __net_init tcp_sk_init(struct net *net) 319662306a36Sopenharmony_ci{ 319762306a36Sopenharmony_ci net->ipv4.sysctl_tcp_ecn = 2; 319862306a36Sopenharmony_ci net->ipv4.sysctl_tcp_ecn_fallback = 1; 319962306a36Sopenharmony_ci 320062306a36Sopenharmony_ci net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; 320162306a36Sopenharmony_ci net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; 320262306a36Sopenharmony_ci net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; 320362306a36Sopenharmony_ci net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; 320462306a36Sopenharmony_ci net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS; 320562306a36Sopenharmony_ci 320662306a36Sopenharmony_ci net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; 320762306a36Sopenharmony_ci net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; 320862306a36Sopenharmony_ci net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; 320962306a36Sopenharmony_ci 321062306a36Sopenharmony_ci net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 321162306a36Sopenharmony_ci net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 321262306a36Sopenharmony_ci net->ipv4.sysctl_tcp_syncookies = 1; 321362306a36Sopenharmony_ci net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; 321462306a36Sopenharmony_ci net->ipv4.sysctl_tcp_retries1 = TCP_RETR1; 321562306a36Sopenharmony_ci net->ipv4.sysctl_tcp_retries2 = TCP_RETR2; 321662306a36Sopenharmony_ci net->ipv4.sysctl_tcp_orphan_retries = 0; 321762306a36Sopenharmony_ci net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; 321862306a36Sopenharmony_ci net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; 321962306a36Sopenharmony_ci net->ipv4.sysctl_tcp_tw_reuse = 2; 322062306a36Sopenharmony_ci net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1; 322162306a36Sopenharmony_ci 322262306a36Sopenharmony_ci refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1); 322362306a36Sopenharmony_ci tcp_set_hashinfo(net); 322462306a36Sopenharmony_ci 322562306a36Sopenharmony_ci net->ipv4.sysctl_tcp_sack = 1; 322662306a36Sopenharmony_ci net->ipv4.sysctl_tcp_window_scaling = 1; 322762306a36Sopenharmony_ci net->ipv4.sysctl_tcp_timestamps = 1; 322862306a36Sopenharmony_ci net->ipv4.sysctl_tcp_early_retrans = 3; 322962306a36Sopenharmony_ci net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; 323062306a36Sopenharmony_ci net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ 323162306a36Sopenharmony_ci net->ipv4.sysctl_tcp_retrans_collapse = 1; 323262306a36Sopenharmony_ci net->ipv4.sysctl_tcp_max_reordering = 300; 323362306a36Sopenharmony_ci net->ipv4.sysctl_tcp_dsack = 1; 323462306a36Sopenharmony_ci net->ipv4.sysctl_tcp_app_win = 31; 323562306a36Sopenharmony_ci net->ipv4.sysctl_tcp_adv_win_scale = 1; 323662306a36Sopenharmony_ci net->ipv4.sysctl_tcp_frto = 2; 323762306a36Sopenharmony_ci net->ipv4.sysctl_tcp_moderate_rcvbuf = 1; 323862306a36Sopenharmony_ci /* This limits the percentage of the congestion window which we 323962306a36Sopenharmony_ci * will allow a single TSO frame to consume. Building TSO frames 324062306a36Sopenharmony_ci * which are too large can cause TCP streams to be bursty. 324162306a36Sopenharmony_ci */ 324262306a36Sopenharmony_ci net->ipv4.sysctl_tcp_tso_win_divisor = 3; 324362306a36Sopenharmony_ci /* Default TSQ limit of 16 TSO segments */ 324462306a36Sopenharmony_ci net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536; 324562306a36Sopenharmony_ci 324662306a36Sopenharmony_ci /* rfc5961 challenge ack rate limiting, per net-ns, disabled by default. */ 324762306a36Sopenharmony_ci net->ipv4.sysctl_tcp_challenge_ack_limit = INT_MAX; 324862306a36Sopenharmony_ci 324962306a36Sopenharmony_ci net->ipv4.sysctl_tcp_min_tso_segs = 2; 325062306a36Sopenharmony_ci net->ipv4.sysctl_tcp_tso_rtt_log = 9; /* 2^9 = 512 usec */ 325162306a36Sopenharmony_ci net->ipv4.sysctl_tcp_min_rtt_wlen = 300; 325262306a36Sopenharmony_ci net->ipv4.sysctl_tcp_autocorking = 1; 325362306a36Sopenharmony_ci net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2; 325462306a36Sopenharmony_ci net->ipv4.sysctl_tcp_pacing_ss_ratio = 200; 325562306a36Sopenharmony_ci net->ipv4.sysctl_tcp_pacing_ca_ratio = 120; 325662306a36Sopenharmony_ci if (net != &init_net) { 325762306a36Sopenharmony_ci memcpy(net->ipv4.sysctl_tcp_rmem, 325862306a36Sopenharmony_ci init_net.ipv4.sysctl_tcp_rmem, 325962306a36Sopenharmony_ci sizeof(init_net.ipv4.sysctl_tcp_rmem)); 326062306a36Sopenharmony_ci memcpy(net->ipv4.sysctl_tcp_wmem, 326162306a36Sopenharmony_ci init_net.ipv4.sysctl_tcp_wmem, 326262306a36Sopenharmony_ci sizeof(init_net.ipv4.sysctl_tcp_wmem)); 326362306a36Sopenharmony_ci } 326462306a36Sopenharmony_ci net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC; 326562306a36Sopenharmony_ci net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC; 326662306a36Sopenharmony_ci net->ipv4.sysctl_tcp_comp_sack_nr = 44; 326762306a36Sopenharmony_ci net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; 326862306a36Sopenharmony_ci net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; 326962306a36Sopenharmony_ci atomic_set(&net->ipv4.tfo_active_disable_times, 0); 327062306a36Sopenharmony_ci 327162306a36Sopenharmony_ci /* Set default values for PLB */ 327262306a36Sopenharmony_ci net->ipv4.sysctl_tcp_plb_enabled = 0; /* Disabled by default */ 327362306a36Sopenharmony_ci net->ipv4.sysctl_tcp_plb_idle_rehash_rounds = 3; 327462306a36Sopenharmony_ci net->ipv4.sysctl_tcp_plb_rehash_rounds = 12; 327562306a36Sopenharmony_ci net->ipv4.sysctl_tcp_plb_suspend_rto_sec = 60; 327662306a36Sopenharmony_ci /* Default congestion threshold for PLB to mark a round is 50% */ 327762306a36Sopenharmony_ci net->ipv4.sysctl_tcp_plb_cong_thresh = (1 << TCP_PLB_SCALE) / 2; 327862306a36Sopenharmony_ci 327962306a36Sopenharmony_ci /* Reno is always built in */ 328062306a36Sopenharmony_ci if (!net_eq(net, &init_net) && 328162306a36Sopenharmony_ci bpf_try_module_get(init_net.ipv4.tcp_congestion_control, 328262306a36Sopenharmony_ci init_net.ipv4.tcp_congestion_control->owner)) 328362306a36Sopenharmony_ci net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control; 328462306a36Sopenharmony_ci else 328562306a36Sopenharmony_ci net->ipv4.tcp_congestion_control = &tcp_reno; 328662306a36Sopenharmony_ci 328762306a36Sopenharmony_ci net->ipv4.sysctl_tcp_syn_linear_timeouts = 4; 328862306a36Sopenharmony_ci net->ipv4.sysctl_tcp_shrink_window = 0; 328962306a36Sopenharmony_ci 329062306a36Sopenharmony_ci return 0; 329162306a36Sopenharmony_ci} 329262306a36Sopenharmony_ci 329362306a36Sopenharmony_cistatic void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 329462306a36Sopenharmony_ci{ 329562306a36Sopenharmony_ci struct net *net; 329662306a36Sopenharmony_ci 329762306a36Sopenharmony_ci tcp_twsk_purge(net_exit_list, AF_INET); 329862306a36Sopenharmony_ci 329962306a36Sopenharmony_ci list_for_each_entry(net, net_exit_list, exit_list) { 330062306a36Sopenharmony_ci inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo); 330162306a36Sopenharmony_ci WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); 330262306a36Sopenharmony_ci tcp_fastopen_ctx_destroy(net); 330362306a36Sopenharmony_ci } 330462306a36Sopenharmony_ci} 330562306a36Sopenharmony_ci 330662306a36Sopenharmony_cistatic struct pernet_operations __net_initdata tcp_sk_ops = { 330762306a36Sopenharmony_ci .init = tcp_sk_init, 330862306a36Sopenharmony_ci .exit = tcp_sk_exit, 330962306a36Sopenharmony_ci .exit_batch = tcp_sk_exit_batch, 331062306a36Sopenharmony_ci}; 331162306a36Sopenharmony_ci 331262306a36Sopenharmony_ci#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 331362306a36Sopenharmony_ciDEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta, 331462306a36Sopenharmony_ci struct sock_common *sk_common, uid_t uid) 331562306a36Sopenharmony_ci 331662306a36Sopenharmony_ci#define INIT_BATCH_SZ 16 331762306a36Sopenharmony_ci 331862306a36Sopenharmony_cistatic int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux) 331962306a36Sopenharmony_ci{ 332062306a36Sopenharmony_ci struct bpf_tcp_iter_state *iter = priv_data; 332162306a36Sopenharmony_ci int err; 332262306a36Sopenharmony_ci 332362306a36Sopenharmony_ci err = bpf_iter_init_seq_net(priv_data, aux); 332462306a36Sopenharmony_ci if (err) 332562306a36Sopenharmony_ci return err; 332662306a36Sopenharmony_ci 332762306a36Sopenharmony_ci err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ); 332862306a36Sopenharmony_ci if (err) { 332962306a36Sopenharmony_ci bpf_iter_fini_seq_net(priv_data); 333062306a36Sopenharmony_ci return err; 333162306a36Sopenharmony_ci } 333262306a36Sopenharmony_ci 333362306a36Sopenharmony_ci return 0; 333462306a36Sopenharmony_ci} 333562306a36Sopenharmony_ci 333662306a36Sopenharmony_cistatic void bpf_iter_fini_tcp(void *priv_data) 333762306a36Sopenharmony_ci{ 333862306a36Sopenharmony_ci struct bpf_tcp_iter_state *iter = priv_data; 333962306a36Sopenharmony_ci 334062306a36Sopenharmony_ci bpf_iter_fini_seq_net(priv_data); 334162306a36Sopenharmony_ci kvfree(iter->batch); 334262306a36Sopenharmony_ci} 334362306a36Sopenharmony_ci 334462306a36Sopenharmony_cistatic const struct bpf_iter_seq_info tcp_seq_info = { 334562306a36Sopenharmony_ci .seq_ops = &bpf_iter_tcp_seq_ops, 334662306a36Sopenharmony_ci .init_seq_private = bpf_iter_init_tcp, 334762306a36Sopenharmony_ci .fini_seq_private = bpf_iter_fini_tcp, 334862306a36Sopenharmony_ci .seq_priv_size = sizeof(struct bpf_tcp_iter_state), 334962306a36Sopenharmony_ci}; 335062306a36Sopenharmony_ci 335162306a36Sopenharmony_cistatic const struct bpf_func_proto * 335262306a36Sopenharmony_cibpf_iter_tcp_get_func_proto(enum bpf_func_id func_id, 335362306a36Sopenharmony_ci const struct bpf_prog *prog) 335462306a36Sopenharmony_ci{ 335562306a36Sopenharmony_ci switch (func_id) { 335662306a36Sopenharmony_ci case BPF_FUNC_setsockopt: 335762306a36Sopenharmony_ci return &bpf_sk_setsockopt_proto; 335862306a36Sopenharmony_ci case BPF_FUNC_getsockopt: 335962306a36Sopenharmony_ci return &bpf_sk_getsockopt_proto; 336062306a36Sopenharmony_ci default: 336162306a36Sopenharmony_ci return NULL; 336262306a36Sopenharmony_ci } 336362306a36Sopenharmony_ci} 336462306a36Sopenharmony_ci 336562306a36Sopenharmony_cistatic struct bpf_iter_reg tcp_reg_info = { 336662306a36Sopenharmony_ci .target = "tcp", 336762306a36Sopenharmony_ci .ctx_arg_info_size = 1, 336862306a36Sopenharmony_ci .ctx_arg_info = { 336962306a36Sopenharmony_ci { offsetof(struct bpf_iter__tcp, sk_common), 337062306a36Sopenharmony_ci PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, 337162306a36Sopenharmony_ci }, 337262306a36Sopenharmony_ci .get_func_proto = bpf_iter_tcp_get_func_proto, 337362306a36Sopenharmony_ci .seq_info = &tcp_seq_info, 337462306a36Sopenharmony_ci}; 337562306a36Sopenharmony_ci 337662306a36Sopenharmony_cistatic void __init bpf_iter_register(void) 337762306a36Sopenharmony_ci{ 337862306a36Sopenharmony_ci tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON]; 337962306a36Sopenharmony_ci if (bpf_iter_reg_target(&tcp_reg_info)) 338062306a36Sopenharmony_ci pr_warn("Warning: could not register bpf iterator tcp\n"); 338162306a36Sopenharmony_ci} 338262306a36Sopenharmony_ci 338362306a36Sopenharmony_ci#endif 338462306a36Sopenharmony_ci 338562306a36Sopenharmony_civoid __init tcp_v4_init(void) 338662306a36Sopenharmony_ci{ 338762306a36Sopenharmony_ci int cpu, res; 338862306a36Sopenharmony_ci 338962306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 339062306a36Sopenharmony_ci struct sock *sk; 339162306a36Sopenharmony_ci 339262306a36Sopenharmony_ci res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, 339362306a36Sopenharmony_ci IPPROTO_TCP, &init_net); 339462306a36Sopenharmony_ci if (res) 339562306a36Sopenharmony_ci panic("Failed to create the TCP control socket.\n"); 339662306a36Sopenharmony_ci sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 339762306a36Sopenharmony_ci 339862306a36Sopenharmony_ci /* Please enforce IP_DF and IPID==0 for RST and 339962306a36Sopenharmony_ci * ACK sent in SYN-RECV and TIME-WAIT state. 340062306a36Sopenharmony_ci */ 340162306a36Sopenharmony_ci inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; 340262306a36Sopenharmony_ci 340362306a36Sopenharmony_ci per_cpu(ipv4_tcp_sk, cpu) = sk; 340462306a36Sopenharmony_ci } 340562306a36Sopenharmony_ci if (register_pernet_subsys(&tcp_sk_ops)) 340662306a36Sopenharmony_ci panic("Failed to create the TCP control socket.\n"); 340762306a36Sopenharmony_ci 340862306a36Sopenharmony_ci#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 340962306a36Sopenharmony_ci bpf_iter_register(); 341062306a36Sopenharmony_ci#endif 341162306a36Sopenharmony_ci} 3412