162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * INET An implementation of the TCP/IP protocol suite for the LINUX 462306a36Sopenharmony_ci * operating system. INET is implemented using the BSD Socket 562306a36Sopenharmony_ci * interface as the means of communication with the user level. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Support for INET connection oriented protocols. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Authors: See the TCP sources 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/module.h> 1362306a36Sopenharmony_ci#include <linux/jhash.h> 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#include <net/inet_connection_sock.h> 1662306a36Sopenharmony_ci#include <net/inet_hashtables.h> 1762306a36Sopenharmony_ci#include <net/inet_timewait_sock.h> 1862306a36Sopenharmony_ci#include <net/ip.h> 1962306a36Sopenharmony_ci#include <net/route.h> 2062306a36Sopenharmony_ci#include <net/tcp_states.h> 2162306a36Sopenharmony_ci#include <net/xfrm.h> 2262306a36Sopenharmony_ci#include <net/tcp.h> 2362306a36Sopenharmony_ci#include <net/sock_reuseport.h> 2462306a36Sopenharmony_ci#include <net/addrconf.h> 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 2762306a36Sopenharmony_ci/* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses 2862306a36Sopenharmony_ci * if IPv6 only, and any IPv4 addresses 2962306a36Sopenharmony_ci * if not IPv6 only 3062306a36Sopenharmony_ci * match_sk*_wildcard == false: addresses must be exactly the same, i.e. 3162306a36Sopenharmony_ci * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, 3262306a36Sopenharmony_ci * and 0.0.0.0 equals to 0.0.0.0 only 3362306a36Sopenharmony_ci */ 3462306a36Sopenharmony_cistatic bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, 3562306a36Sopenharmony_ci const struct in6_addr *sk2_rcv_saddr6, 3662306a36Sopenharmony_ci __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 3762306a36Sopenharmony_ci bool sk1_ipv6only, bool sk2_ipv6only, 3862306a36Sopenharmony_ci bool match_sk1_wildcard, 3962306a36Sopenharmony_ci bool match_sk2_wildcard) 4062306a36Sopenharmony_ci{ 4162306a36Sopenharmony_ci int addr_type = ipv6_addr_type(sk1_rcv_saddr6); 4262306a36Sopenharmony_ci int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci /* if both are mapped, treat as IPv4 */ 4562306a36Sopenharmony_ci if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { 4662306a36Sopenharmony_ci if (!sk2_ipv6only) { 4762306a36Sopenharmony_ci if (sk1_rcv_saddr == sk2_rcv_saddr) 4862306a36Sopenharmony_ci return true; 4962306a36Sopenharmony_ci return (match_sk1_wildcard && !sk1_rcv_saddr) || 5062306a36Sopenharmony_ci (match_sk2_wildcard && !sk2_rcv_saddr); 5162306a36Sopenharmony_ci } 5262306a36Sopenharmony_ci return false; 5362306a36Sopenharmony_ci } 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) 5662306a36Sopenharmony_ci return true; 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard && 5962306a36Sopenharmony_ci !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) 6062306a36Sopenharmony_ci return true; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard && 6362306a36Sopenharmony_ci !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) 6462306a36Sopenharmony_ci return true; 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci if (sk2_rcv_saddr6 && 6762306a36Sopenharmony_ci ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6)) 6862306a36Sopenharmony_ci return true; 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci return false; 7162306a36Sopenharmony_ci} 7262306a36Sopenharmony_ci#endif 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci/* match_sk*_wildcard == true: 0.0.0.0 equals to any IPv4 addresses 7562306a36Sopenharmony_ci * match_sk*_wildcard == false: addresses must be exactly the same, i.e. 7662306a36Sopenharmony_ci * 0.0.0.0 only equals to 0.0.0.0 7762306a36Sopenharmony_ci */ 7862306a36Sopenharmony_cistatic bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, 7962306a36Sopenharmony_ci bool sk2_ipv6only, bool match_sk1_wildcard, 8062306a36Sopenharmony_ci bool match_sk2_wildcard) 8162306a36Sopenharmony_ci{ 8262306a36Sopenharmony_ci if (!sk2_ipv6only) { 8362306a36Sopenharmony_ci if (sk1_rcv_saddr == sk2_rcv_saddr) 8462306a36Sopenharmony_ci return true; 8562306a36Sopenharmony_ci return (match_sk1_wildcard && !sk1_rcv_saddr) || 8662306a36Sopenharmony_ci (match_sk2_wildcard && !sk2_rcv_saddr); 8762306a36Sopenharmony_ci } 8862306a36Sopenharmony_ci return false; 8962306a36Sopenharmony_ci} 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_cibool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, 9262306a36Sopenharmony_ci bool match_wildcard) 9362306a36Sopenharmony_ci{ 9462306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 9562306a36Sopenharmony_ci if (sk->sk_family == AF_INET6) 9662306a36Sopenharmony_ci return ipv6_rcv_saddr_equal(&sk->sk_v6_rcv_saddr, 9762306a36Sopenharmony_ci inet6_rcv_saddr(sk2), 9862306a36Sopenharmony_ci sk->sk_rcv_saddr, 9962306a36Sopenharmony_ci sk2->sk_rcv_saddr, 10062306a36Sopenharmony_ci ipv6_only_sock(sk), 10162306a36Sopenharmony_ci ipv6_only_sock(sk2), 10262306a36Sopenharmony_ci match_wildcard, 10362306a36Sopenharmony_ci match_wildcard); 10462306a36Sopenharmony_ci#endif 10562306a36Sopenharmony_ci return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr, 10662306a36Sopenharmony_ci ipv6_only_sock(sk2), match_wildcard, 10762306a36Sopenharmony_ci match_wildcard); 10862306a36Sopenharmony_ci} 10962306a36Sopenharmony_ciEXPORT_SYMBOL(inet_rcv_saddr_equal); 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_cibool inet_rcv_saddr_any(const struct sock *sk) 11262306a36Sopenharmony_ci{ 11362306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 11462306a36Sopenharmony_ci if (sk->sk_family == AF_INET6) 11562306a36Sopenharmony_ci return ipv6_addr_any(&sk->sk_v6_rcv_saddr); 11662306a36Sopenharmony_ci#endif 11762306a36Sopenharmony_ci return !sk->sk_rcv_saddr; 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_civoid inet_get_local_port_range(const struct net *net, int *low, int *high) 12162306a36Sopenharmony_ci{ 12262306a36Sopenharmony_ci unsigned int seq; 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci do { 12562306a36Sopenharmony_ci seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci *low = net->ipv4.ip_local_ports.range[0]; 12862306a36Sopenharmony_ci *high = net->ipv4.ip_local_ports.range[1]; 12962306a36Sopenharmony_ci } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); 13062306a36Sopenharmony_ci} 13162306a36Sopenharmony_ciEXPORT_SYMBOL(inet_get_local_port_range); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_civoid inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high) 13462306a36Sopenharmony_ci{ 13562306a36Sopenharmony_ci const struct inet_sock *inet = inet_sk(sk); 13662306a36Sopenharmony_ci const struct net *net = sock_net(sk); 13762306a36Sopenharmony_ci int lo, hi, sk_lo, sk_hi; 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci inet_get_local_port_range(net, &lo, &hi); 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci sk_lo = inet->local_port_range.lo; 14262306a36Sopenharmony_ci sk_hi = inet->local_port_range.hi; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci if (unlikely(lo <= sk_lo && sk_lo <= hi)) 14562306a36Sopenharmony_ci lo = sk_lo; 14662306a36Sopenharmony_ci if (unlikely(lo <= sk_hi && sk_hi <= hi)) 14762306a36Sopenharmony_ci hi = sk_hi; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci *low = lo; 15062306a36Sopenharmony_ci *high = hi; 15162306a36Sopenharmony_ci} 15262306a36Sopenharmony_ciEXPORT_SYMBOL(inet_sk_get_local_port_range); 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_cistatic bool inet_use_bhash2_on_bind(const struct sock *sk) 15562306a36Sopenharmony_ci{ 15662306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 15762306a36Sopenharmony_ci if (sk->sk_family == AF_INET6) { 15862306a36Sopenharmony_ci int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci return addr_type != IPV6_ADDR_ANY && 16162306a36Sopenharmony_ci addr_type != IPV6_ADDR_MAPPED; 16262306a36Sopenharmony_ci } 16362306a36Sopenharmony_ci#endif 16462306a36Sopenharmony_ci return sk->sk_rcv_saddr != htonl(INADDR_ANY); 16562306a36Sopenharmony_ci} 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_cistatic bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, 16862306a36Sopenharmony_ci kuid_t sk_uid, bool relax, 16962306a36Sopenharmony_ci bool reuseport_cb_ok, bool reuseport_ok) 17062306a36Sopenharmony_ci{ 17162306a36Sopenharmony_ci int bound_dev_if2; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci if (sk == sk2) 17462306a36Sopenharmony_ci return false; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci if (!sk->sk_bound_dev_if || !bound_dev_if2 || 17962306a36Sopenharmony_ci sk->sk_bound_dev_if == bound_dev_if2) { 18062306a36Sopenharmony_ci if (sk->sk_reuse && sk2->sk_reuse && 18162306a36Sopenharmony_ci sk2->sk_state != TCP_LISTEN) { 18262306a36Sopenharmony_ci if (!relax || (!reuseport_ok && sk->sk_reuseport && 18362306a36Sopenharmony_ci sk2->sk_reuseport && reuseport_cb_ok && 18462306a36Sopenharmony_ci (sk2->sk_state == TCP_TIME_WAIT || 18562306a36Sopenharmony_ci uid_eq(sk_uid, sock_i_uid(sk2))))) 18662306a36Sopenharmony_ci return true; 18762306a36Sopenharmony_ci } else if (!reuseport_ok || !sk->sk_reuseport || 18862306a36Sopenharmony_ci !sk2->sk_reuseport || !reuseport_cb_ok || 18962306a36Sopenharmony_ci (sk2->sk_state != TCP_TIME_WAIT && 19062306a36Sopenharmony_ci !uid_eq(sk_uid, sock_i_uid(sk2)))) { 19162306a36Sopenharmony_ci return true; 19262306a36Sopenharmony_ci } 19362306a36Sopenharmony_ci } 19462306a36Sopenharmony_ci return false; 19562306a36Sopenharmony_ci} 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_cistatic bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, 19862306a36Sopenharmony_ci kuid_t sk_uid, bool relax, 19962306a36Sopenharmony_ci bool reuseport_cb_ok, bool reuseport_ok) 20062306a36Sopenharmony_ci{ 20162306a36Sopenharmony_ci if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) 20262306a36Sopenharmony_ci return false; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci return inet_bind_conflict(sk, sk2, sk_uid, relax, 20562306a36Sopenharmony_ci reuseport_cb_ok, reuseport_ok); 20662306a36Sopenharmony_ci} 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_cistatic bool inet_bhash2_conflict(const struct sock *sk, 20962306a36Sopenharmony_ci const struct inet_bind2_bucket *tb2, 21062306a36Sopenharmony_ci kuid_t sk_uid, 21162306a36Sopenharmony_ci bool relax, bool reuseport_cb_ok, 21262306a36Sopenharmony_ci bool reuseport_ok) 21362306a36Sopenharmony_ci{ 21462306a36Sopenharmony_ci struct inet_timewait_sock *tw2; 21562306a36Sopenharmony_ci struct sock *sk2; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci sk_for_each_bound_bhash2(sk2, &tb2->owners) { 21862306a36Sopenharmony_ci if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, 21962306a36Sopenharmony_ci reuseport_cb_ok, reuseport_ok)) 22062306a36Sopenharmony_ci return true; 22162306a36Sopenharmony_ci } 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) { 22462306a36Sopenharmony_ci sk2 = (struct sock *)tw2; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, 22762306a36Sopenharmony_ci reuseport_cb_ok, reuseport_ok)) 22862306a36Sopenharmony_ci return true; 22962306a36Sopenharmony_ci } 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci return false; 23262306a36Sopenharmony_ci} 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci/* This should be called only when the tb and tb2 hashbuckets' locks are held */ 23562306a36Sopenharmony_cistatic int inet_csk_bind_conflict(const struct sock *sk, 23662306a36Sopenharmony_ci const struct inet_bind_bucket *tb, 23762306a36Sopenharmony_ci const struct inet_bind2_bucket *tb2, /* may be null */ 23862306a36Sopenharmony_ci bool relax, bool reuseport_ok) 23962306a36Sopenharmony_ci{ 24062306a36Sopenharmony_ci bool reuseport_cb_ok; 24162306a36Sopenharmony_ci struct sock_reuseport *reuseport_cb; 24262306a36Sopenharmony_ci kuid_t uid = sock_i_uid((struct sock *)sk); 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci rcu_read_lock(); 24562306a36Sopenharmony_ci reuseport_cb = rcu_dereference(sk->sk_reuseport_cb); 24662306a36Sopenharmony_ci /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */ 24762306a36Sopenharmony_ci reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks); 24862306a36Sopenharmony_ci rcu_read_unlock(); 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci /* 25162306a36Sopenharmony_ci * Unlike other sk lookup places we do not check 25262306a36Sopenharmony_ci * for sk_net here, since _all_ the socks listed 25362306a36Sopenharmony_ci * in tb->owners and tb2->owners list belong 25462306a36Sopenharmony_ci * to the same net - the one this bucket belongs to. 25562306a36Sopenharmony_ci */ 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci if (!inet_use_bhash2_on_bind(sk)) { 25862306a36Sopenharmony_ci struct sock *sk2; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci sk_for_each_bound(sk2, &tb->owners) 26162306a36Sopenharmony_ci if (inet_bind_conflict(sk, sk2, uid, relax, 26262306a36Sopenharmony_ci reuseport_cb_ok, reuseport_ok) && 26362306a36Sopenharmony_ci inet_rcv_saddr_equal(sk, sk2, true)) 26462306a36Sopenharmony_ci return true; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci return false; 26762306a36Sopenharmony_ci } 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if 27062306a36Sopenharmony_ci * ipv4) should have been checked already. We need to do these two 27162306a36Sopenharmony_ci * checks separately because their spinlocks have to be acquired/released 27262306a36Sopenharmony_ci * independently of each other, to prevent possible deadlocks 27362306a36Sopenharmony_ci */ 27462306a36Sopenharmony_ci return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, 27562306a36Sopenharmony_ci reuseport_ok); 27662306a36Sopenharmony_ci} 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci/* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or 27962306a36Sopenharmony_ci * INADDR_ANY (if ipv4) socket. 28062306a36Sopenharmony_ci * 28162306a36Sopenharmony_ci * Caller must hold bhash hashbucket lock with local bh disabled, to protect 28262306a36Sopenharmony_ci * against concurrent binds on the port for addr any 28362306a36Sopenharmony_ci */ 28462306a36Sopenharmony_cistatic bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l3mdev, 28562306a36Sopenharmony_ci bool relax, bool reuseport_ok) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci kuid_t uid = sock_i_uid((struct sock *)sk); 28862306a36Sopenharmony_ci const struct net *net = sock_net(sk); 28962306a36Sopenharmony_ci struct sock_reuseport *reuseport_cb; 29062306a36Sopenharmony_ci struct inet_bind_hashbucket *head2; 29162306a36Sopenharmony_ci struct inet_bind2_bucket *tb2; 29262306a36Sopenharmony_ci bool reuseport_cb_ok; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci rcu_read_lock(); 29562306a36Sopenharmony_ci reuseport_cb = rcu_dereference(sk->sk_reuseport_cb); 29662306a36Sopenharmony_ci /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */ 29762306a36Sopenharmony_ci reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks); 29862306a36Sopenharmony_ci rcu_read_unlock(); 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci head2 = inet_bhash2_addr_any_hashbucket(sk, net, port); 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci spin_lock(&head2->lock); 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci inet_bind_bucket_for_each(tb2, &head2->chain) 30562306a36Sopenharmony_ci if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk)) 30662306a36Sopenharmony_ci break; 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, 30962306a36Sopenharmony_ci reuseport_ok)) { 31062306a36Sopenharmony_ci spin_unlock(&head2->lock); 31162306a36Sopenharmony_ci return true; 31262306a36Sopenharmony_ci } 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci spin_unlock(&head2->lock); 31562306a36Sopenharmony_ci return false; 31662306a36Sopenharmony_ci} 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci/* 31962306a36Sopenharmony_ci * Find an open port number for the socket. Returns with the 32062306a36Sopenharmony_ci * inet_bind_hashbucket locks held if successful. 32162306a36Sopenharmony_ci */ 32262306a36Sopenharmony_cistatic struct inet_bind_hashbucket * 32362306a36Sopenharmony_ciinet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret, 32462306a36Sopenharmony_ci struct inet_bind2_bucket **tb2_ret, 32562306a36Sopenharmony_ci struct inet_bind_hashbucket **head2_ret, int *port_ret) 32662306a36Sopenharmony_ci{ 32762306a36Sopenharmony_ci struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); 32862306a36Sopenharmony_ci int i, low, high, attempt_half, port, l3mdev; 32962306a36Sopenharmony_ci struct inet_bind_hashbucket *head, *head2; 33062306a36Sopenharmony_ci struct net *net = sock_net(sk); 33162306a36Sopenharmony_ci struct inet_bind2_bucket *tb2; 33262306a36Sopenharmony_ci struct inet_bind_bucket *tb; 33362306a36Sopenharmony_ci u32 remaining, offset; 33462306a36Sopenharmony_ci bool relax = false; 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci l3mdev = inet_sk_bound_l3mdev(sk); 33762306a36Sopenharmony_ciports_exhausted: 33862306a36Sopenharmony_ci attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; 33962306a36Sopenharmony_ciother_half_scan: 34062306a36Sopenharmony_ci inet_sk_get_local_port_range(sk, &low, &high); 34162306a36Sopenharmony_ci high++; /* [32768, 60999] -> [32768, 61000[ */ 34262306a36Sopenharmony_ci if (high - low < 4) 34362306a36Sopenharmony_ci attempt_half = 0; 34462306a36Sopenharmony_ci if (attempt_half) { 34562306a36Sopenharmony_ci int half = low + (((high - low) >> 2) << 1); 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci if (attempt_half == 1) 34862306a36Sopenharmony_ci high = half; 34962306a36Sopenharmony_ci else 35062306a36Sopenharmony_ci low = half; 35162306a36Sopenharmony_ci } 35262306a36Sopenharmony_ci remaining = high - low; 35362306a36Sopenharmony_ci if (likely(remaining > 1)) 35462306a36Sopenharmony_ci remaining &= ~1U; 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci offset = get_random_u32_below(remaining); 35762306a36Sopenharmony_ci /* __inet_hash_connect() favors ports having @low parity 35862306a36Sopenharmony_ci * We do the opposite to not pollute connect() users. 35962306a36Sopenharmony_ci */ 36062306a36Sopenharmony_ci offset |= 1U; 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ciother_parity_scan: 36362306a36Sopenharmony_ci port = low + offset; 36462306a36Sopenharmony_ci for (i = 0; i < remaining; i += 2, port += 2) { 36562306a36Sopenharmony_ci if (unlikely(port >= high)) 36662306a36Sopenharmony_ci port -= remaining; 36762306a36Sopenharmony_ci if (inet_is_local_reserved_port(net, port)) 36862306a36Sopenharmony_ci continue; 36962306a36Sopenharmony_ci head = &hinfo->bhash[inet_bhashfn(net, port, 37062306a36Sopenharmony_ci hinfo->bhash_size)]; 37162306a36Sopenharmony_ci spin_lock_bh(&head->lock); 37262306a36Sopenharmony_ci if (inet_use_bhash2_on_bind(sk)) { 37362306a36Sopenharmony_ci if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false)) 37462306a36Sopenharmony_ci goto next_port; 37562306a36Sopenharmony_ci } 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); 37862306a36Sopenharmony_ci spin_lock(&head2->lock); 37962306a36Sopenharmony_ci tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); 38062306a36Sopenharmony_ci inet_bind_bucket_for_each(tb, &head->chain) 38162306a36Sopenharmony_ci if (inet_bind_bucket_match(tb, net, port, l3mdev)) { 38262306a36Sopenharmony_ci if (!inet_csk_bind_conflict(sk, tb, tb2, 38362306a36Sopenharmony_ci relax, false)) 38462306a36Sopenharmony_ci goto success; 38562306a36Sopenharmony_ci spin_unlock(&head2->lock); 38662306a36Sopenharmony_ci goto next_port; 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci tb = NULL; 38962306a36Sopenharmony_ci goto success; 39062306a36Sopenharmony_cinext_port: 39162306a36Sopenharmony_ci spin_unlock_bh(&head->lock); 39262306a36Sopenharmony_ci cond_resched(); 39362306a36Sopenharmony_ci } 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci offset--; 39662306a36Sopenharmony_ci if (!(offset & 1)) 39762306a36Sopenharmony_ci goto other_parity_scan; 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci if (attempt_half == 1) { 40062306a36Sopenharmony_ci /* OK we now try the upper half of the range */ 40162306a36Sopenharmony_ci attempt_half = 2; 40262306a36Sopenharmony_ci goto other_half_scan; 40362306a36Sopenharmony_ci } 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) { 40662306a36Sopenharmony_ci /* We still have a chance to connect to different destinations */ 40762306a36Sopenharmony_ci relax = true; 40862306a36Sopenharmony_ci goto ports_exhausted; 40962306a36Sopenharmony_ci } 41062306a36Sopenharmony_ci return NULL; 41162306a36Sopenharmony_cisuccess: 41262306a36Sopenharmony_ci *port_ret = port; 41362306a36Sopenharmony_ci *tb_ret = tb; 41462306a36Sopenharmony_ci *tb2_ret = tb2; 41562306a36Sopenharmony_ci *head2_ret = head2; 41662306a36Sopenharmony_ci return head; 41762306a36Sopenharmony_ci} 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_cistatic inline int sk_reuseport_match(struct inet_bind_bucket *tb, 42062306a36Sopenharmony_ci struct sock *sk) 42162306a36Sopenharmony_ci{ 42262306a36Sopenharmony_ci kuid_t uid = sock_i_uid(sk); 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci if (tb->fastreuseport <= 0) 42562306a36Sopenharmony_ci return 0; 42662306a36Sopenharmony_ci if (!sk->sk_reuseport) 42762306a36Sopenharmony_ci return 0; 42862306a36Sopenharmony_ci if (rcu_access_pointer(sk->sk_reuseport_cb)) 42962306a36Sopenharmony_ci return 0; 43062306a36Sopenharmony_ci if (!uid_eq(tb->fastuid, uid)) 43162306a36Sopenharmony_ci return 0; 43262306a36Sopenharmony_ci /* We only need to check the rcv_saddr if this tb was once marked 43362306a36Sopenharmony_ci * without fastreuseport and then was reset, as we can only know that 43462306a36Sopenharmony_ci * the fast_*rcv_saddr doesn't have any conflicts with the socks on the 43562306a36Sopenharmony_ci * owners list. 43662306a36Sopenharmony_ci */ 43762306a36Sopenharmony_ci if (tb->fastreuseport == FASTREUSEPORT_ANY) 43862306a36Sopenharmony_ci return 1; 43962306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 44062306a36Sopenharmony_ci if (tb->fast_sk_family == AF_INET6) 44162306a36Sopenharmony_ci return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr, 44262306a36Sopenharmony_ci inet6_rcv_saddr(sk), 44362306a36Sopenharmony_ci tb->fast_rcv_saddr, 44462306a36Sopenharmony_ci sk->sk_rcv_saddr, 44562306a36Sopenharmony_ci tb->fast_ipv6_only, 44662306a36Sopenharmony_ci ipv6_only_sock(sk), true, false); 44762306a36Sopenharmony_ci#endif 44862306a36Sopenharmony_ci return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr, 44962306a36Sopenharmony_ci ipv6_only_sock(sk), true, false); 45062306a36Sopenharmony_ci} 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_civoid inet_csk_update_fastreuse(struct inet_bind_bucket *tb, 45362306a36Sopenharmony_ci struct sock *sk) 45462306a36Sopenharmony_ci{ 45562306a36Sopenharmony_ci kuid_t uid = sock_i_uid(sk); 45662306a36Sopenharmony_ci bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci if (hlist_empty(&tb->owners)) { 45962306a36Sopenharmony_ci tb->fastreuse = reuse; 46062306a36Sopenharmony_ci if (sk->sk_reuseport) { 46162306a36Sopenharmony_ci tb->fastreuseport = FASTREUSEPORT_ANY; 46262306a36Sopenharmony_ci tb->fastuid = uid; 46362306a36Sopenharmony_ci tb->fast_rcv_saddr = sk->sk_rcv_saddr; 46462306a36Sopenharmony_ci tb->fast_ipv6_only = ipv6_only_sock(sk); 46562306a36Sopenharmony_ci tb->fast_sk_family = sk->sk_family; 46662306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 46762306a36Sopenharmony_ci tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 46862306a36Sopenharmony_ci#endif 46962306a36Sopenharmony_ci } else { 47062306a36Sopenharmony_ci tb->fastreuseport = 0; 47162306a36Sopenharmony_ci } 47262306a36Sopenharmony_ci } else { 47362306a36Sopenharmony_ci if (!reuse) 47462306a36Sopenharmony_ci tb->fastreuse = 0; 47562306a36Sopenharmony_ci if (sk->sk_reuseport) { 47662306a36Sopenharmony_ci /* We didn't match or we don't have fastreuseport set on 47762306a36Sopenharmony_ci * the tb, but we have sk_reuseport set on this socket 47862306a36Sopenharmony_ci * and we know that there are no bind conflicts with 47962306a36Sopenharmony_ci * this socket in this tb, so reset our tb's reuseport 48062306a36Sopenharmony_ci * settings so that any subsequent sockets that match 48162306a36Sopenharmony_ci * our current socket will be put on the fast path. 48262306a36Sopenharmony_ci * 48362306a36Sopenharmony_ci * If we reset we need to set FASTREUSEPORT_STRICT so we 48462306a36Sopenharmony_ci * do extra checking for all subsequent sk_reuseport 48562306a36Sopenharmony_ci * socks. 48662306a36Sopenharmony_ci */ 48762306a36Sopenharmony_ci if (!sk_reuseport_match(tb, sk)) { 48862306a36Sopenharmony_ci tb->fastreuseport = FASTREUSEPORT_STRICT; 48962306a36Sopenharmony_ci tb->fastuid = uid; 49062306a36Sopenharmony_ci tb->fast_rcv_saddr = sk->sk_rcv_saddr; 49162306a36Sopenharmony_ci tb->fast_ipv6_only = ipv6_only_sock(sk); 49262306a36Sopenharmony_ci tb->fast_sk_family = sk->sk_family; 49362306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 49462306a36Sopenharmony_ci tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; 49562306a36Sopenharmony_ci#endif 49662306a36Sopenharmony_ci } 49762306a36Sopenharmony_ci } else { 49862306a36Sopenharmony_ci tb->fastreuseport = 0; 49962306a36Sopenharmony_ci } 50062306a36Sopenharmony_ci } 50162306a36Sopenharmony_ci} 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci/* Obtain a reference to a local port for the given sock, 50462306a36Sopenharmony_ci * if snum is zero it means select any available local port. 50562306a36Sopenharmony_ci * We try to allocate an odd port (and leave even ports for connect()) 50662306a36Sopenharmony_ci */ 50762306a36Sopenharmony_ciint inet_csk_get_port(struct sock *sk, unsigned short snum) 50862306a36Sopenharmony_ci{ 50962306a36Sopenharmony_ci struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); 51062306a36Sopenharmony_ci bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; 51162306a36Sopenharmony_ci bool found_port = false, check_bind_conflict = true; 51262306a36Sopenharmony_ci bool bhash_created = false, bhash2_created = false; 51362306a36Sopenharmony_ci int ret = -EADDRINUSE, port = snum, l3mdev; 51462306a36Sopenharmony_ci struct inet_bind_hashbucket *head, *head2; 51562306a36Sopenharmony_ci struct inet_bind2_bucket *tb2 = NULL; 51662306a36Sopenharmony_ci struct inet_bind_bucket *tb = NULL; 51762306a36Sopenharmony_ci bool head2_lock_acquired = false; 51862306a36Sopenharmony_ci struct net *net = sock_net(sk); 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci l3mdev = inet_sk_bound_l3mdev(sk); 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci if (!port) { 52362306a36Sopenharmony_ci head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port); 52462306a36Sopenharmony_ci if (!head) 52562306a36Sopenharmony_ci return ret; 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci head2_lock_acquired = true; 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci if (tb && tb2) 53062306a36Sopenharmony_ci goto success; 53162306a36Sopenharmony_ci found_port = true; 53262306a36Sopenharmony_ci } else { 53362306a36Sopenharmony_ci head = &hinfo->bhash[inet_bhashfn(net, port, 53462306a36Sopenharmony_ci hinfo->bhash_size)]; 53562306a36Sopenharmony_ci spin_lock_bh(&head->lock); 53662306a36Sopenharmony_ci inet_bind_bucket_for_each(tb, &head->chain) 53762306a36Sopenharmony_ci if (inet_bind_bucket_match(tb, net, port, l3mdev)) 53862306a36Sopenharmony_ci break; 53962306a36Sopenharmony_ci } 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci if (!tb) { 54262306a36Sopenharmony_ci tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net, 54362306a36Sopenharmony_ci head, port, l3mdev); 54462306a36Sopenharmony_ci if (!tb) 54562306a36Sopenharmony_ci goto fail_unlock; 54662306a36Sopenharmony_ci bhash_created = true; 54762306a36Sopenharmony_ci } 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci if (!found_port) { 55062306a36Sopenharmony_ci if (!hlist_empty(&tb->owners)) { 55162306a36Sopenharmony_ci if (sk->sk_reuse == SK_FORCE_REUSE || 55262306a36Sopenharmony_ci (tb->fastreuse > 0 && reuse) || 55362306a36Sopenharmony_ci sk_reuseport_match(tb, sk)) 55462306a36Sopenharmony_ci check_bind_conflict = false; 55562306a36Sopenharmony_ci } 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) { 55862306a36Sopenharmony_ci if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true)) 55962306a36Sopenharmony_ci goto fail_unlock; 56062306a36Sopenharmony_ci } 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); 56362306a36Sopenharmony_ci spin_lock(&head2->lock); 56462306a36Sopenharmony_ci head2_lock_acquired = true; 56562306a36Sopenharmony_ci tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); 56662306a36Sopenharmony_ci } 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci if (!tb2) { 56962306a36Sopenharmony_ci tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, 57062306a36Sopenharmony_ci net, head2, port, l3mdev, sk); 57162306a36Sopenharmony_ci if (!tb2) 57262306a36Sopenharmony_ci goto fail_unlock; 57362306a36Sopenharmony_ci bhash2_created = true; 57462306a36Sopenharmony_ci } 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci if (!found_port && check_bind_conflict) { 57762306a36Sopenharmony_ci if (inet_csk_bind_conflict(sk, tb, tb2, true, true)) 57862306a36Sopenharmony_ci goto fail_unlock; 57962306a36Sopenharmony_ci } 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_cisuccess: 58262306a36Sopenharmony_ci inet_csk_update_fastreuse(tb, sk); 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci if (!inet_csk(sk)->icsk_bind_hash) 58562306a36Sopenharmony_ci inet_bind_hash(sk, tb, tb2, port); 58662306a36Sopenharmony_ci WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 58762306a36Sopenharmony_ci WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2); 58862306a36Sopenharmony_ci ret = 0; 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_cifail_unlock: 59162306a36Sopenharmony_ci if (ret) { 59262306a36Sopenharmony_ci if (bhash_created) 59362306a36Sopenharmony_ci inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); 59462306a36Sopenharmony_ci if (bhash2_created) 59562306a36Sopenharmony_ci inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, 59662306a36Sopenharmony_ci tb2); 59762306a36Sopenharmony_ci } 59862306a36Sopenharmony_ci if (head2_lock_acquired) 59962306a36Sopenharmony_ci spin_unlock(&head2->lock); 60062306a36Sopenharmony_ci spin_unlock_bh(&head->lock); 60162306a36Sopenharmony_ci return ret; 60262306a36Sopenharmony_ci} 60362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(inet_csk_get_port); 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci/* 60662306a36Sopenharmony_ci * Wait for an incoming connection, avoid race conditions. This must be called 60762306a36Sopenharmony_ci * with the socket locked. 60862306a36Sopenharmony_ci */ 60962306a36Sopenharmony_cistatic int inet_csk_wait_for_connect(struct sock *sk, long timeo) 61062306a36Sopenharmony_ci{ 61162306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 61262306a36Sopenharmony_ci DEFINE_WAIT(wait); 61362306a36Sopenharmony_ci int err; 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci /* 61662306a36Sopenharmony_ci * True wake-one mechanism for incoming connections: only 61762306a36Sopenharmony_ci * one process gets woken up, not the 'whole herd'. 61862306a36Sopenharmony_ci * Since we do not 'race & poll' for established sockets 61962306a36Sopenharmony_ci * anymore, the common case will execute the loop only once. 62062306a36Sopenharmony_ci * 62162306a36Sopenharmony_ci * Subtle issue: "add_wait_queue_exclusive()" will be added 62262306a36Sopenharmony_ci * after any current non-exclusive waiters, and we know that 62362306a36Sopenharmony_ci * it will always _stay_ after any new non-exclusive waiters 62462306a36Sopenharmony_ci * because all non-exclusive waiters are added at the 62562306a36Sopenharmony_ci * beginning of the wait-queue. As such, it's ok to "drop" 62662306a36Sopenharmony_ci * our exclusiveness temporarily when we get woken up without 62762306a36Sopenharmony_ci * having to remove and re-insert us on the wait queue. 62862306a36Sopenharmony_ci */ 62962306a36Sopenharmony_ci for (;;) { 63062306a36Sopenharmony_ci prepare_to_wait_exclusive(sk_sleep(sk), &wait, 63162306a36Sopenharmony_ci TASK_INTERRUPTIBLE); 63262306a36Sopenharmony_ci release_sock(sk); 63362306a36Sopenharmony_ci if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 63462306a36Sopenharmony_ci timeo = schedule_timeout(timeo); 63562306a36Sopenharmony_ci sched_annotate_sleep(); 63662306a36Sopenharmony_ci lock_sock(sk); 63762306a36Sopenharmony_ci err = 0; 63862306a36Sopenharmony_ci if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) 63962306a36Sopenharmony_ci break; 64062306a36Sopenharmony_ci err = -EINVAL; 64162306a36Sopenharmony_ci if (sk->sk_state != TCP_LISTEN) 64262306a36Sopenharmony_ci break; 64362306a36Sopenharmony_ci err = sock_intr_errno(timeo); 64462306a36Sopenharmony_ci if (signal_pending(current)) 64562306a36Sopenharmony_ci break; 64662306a36Sopenharmony_ci err = -EAGAIN; 64762306a36Sopenharmony_ci if (!timeo) 64862306a36Sopenharmony_ci break; 64962306a36Sopenharmony_ci } 65062306a36Sopenharmony_ci finish_wait(sk_sleep(sk), &wait); 65162306a36Sopenharmony_ci return err; 65262306a36Sopenharmony_ci} 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci/* 65562306a36Sopenharmony_ci * This will accept the next outstanding connection. 65662306a36Sopenharmony_ci */ 65762306a36Sopenharmony_cistruct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) 65862306a36Sopenharmony_ci{ 65962306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 66062306a36Sopenharmony_ci struct request_sock_queue *queue = &icsk->icsk_accept_queue; 66162306a36Sopenharmony_ci struct request_sock *req; 66262306a36Sopenharmony_ci struct sock *newsk; 66362306a36Sopenharmony_ci int error; 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci lock_sock(sk); 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci /* We need to make sure that this socket is listening, 66862306a36Sopenharmony_ci * and that it has something pending. 66962306a36Sopenharmony_ci */ 67062306a36Sopenharmony_ci error = -EINVAL; 67162306a36Sopenharmony_ci if (sk->sk_state != TCP_LISTEN) 67262306a36Sopenharmony_ci goto out_err; 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci /* Find already established connection */ 67562306a36Sopenharmony_ci if (reqsk_queue_empty(queue)) { 67662306a36Sopenharmony_ci long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci /* If this is a non blocking socket don't sleep */ 67962306a36Sopenharmony_ci error = -EAGAIN; 68062306a36Sopenharmony_ci if (!timeo) 68162306a36Sopenharmony_ci goto out_err; 68262306a36Sopenharmony_ci 68362306a36Sopenharmony_ci error = inet_csk_wait_for_connect(sk, timeo); 68462306a36Sopenharmony_ci if (error) 68562306a36Sopenharmony_ci goto out_err; 68662306a36Sopenharmony_ci } 68762306a36Sopenharmony_ci req = reqsk_queue_remove(queue, sk); 68862306a36Sopenharmony_ci newsk = req->sk; 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci if (sk->sk_protocol == IPPROTO_TCP && 69162306a36Sopenharmony_ci tcp_rsk(req)->tfo_listener) { 69262306a36Sopenharmony_ci spin_lock_bh(&queue->fastopenq.lock); 69362306a36Sopenharmony_ci if (tcp_rsk(req)->tfo_listener) { 69462306a36Sopenharmony_ci /* We are still waiting for the final ACK from 3WHS 69562306a36Sopenharmony_ci * so can't free req now. Instead, we set req->sk to 69662306a36Sopenharmony_ci * NULL to signify that the child socket is taken 69762306a36Sopenharmony_ci * so reqsk_fastopen_remove() will free the req 69862306a36Sopenharmony_ci * when 3WHS finishes (or is aborted). 69962306a36Sopenharmony_ci */ 70062306a36Sopenharmony_ci req->sk = NULL; 70162306a36Sopenharmony_ci req = NULL; 70262306a36Sopenharmony_ci } 70362306a36Sopenharmony_ci spin_unlock_bh(&queue->fastopenq.lock); 70462306a36Sopenharmony_ci } 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ciout: 70762306a36Sopenharmony_ci release_sock(sk); 70862306a36Sopenharmony_ci if (newsk && mem_cgroup_sockets_enabled) { 70962306a36Sopenharmony_ci int amt = 0; 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci /* atomically get the memory usage, set and charge the 71262306a36Sopenharmony_ci * newsk->sk_memcg. 71362306a36Sopenharmony_ci */ 71462306a36Sopenharmony_ci lock_sock(newsk); 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci mem_cgroup_sk_alloc(newsk); 71762306a36Sopenharmony_ci if (newsk->sk_memcg) { 71862306a36Sopenharmony_ci /* The socket has not been accepted yet, no need 71962306a36Sopenharmony_ci * to look at newsk->sk_wmem_queued. 72062306a36Sopenharmony_ci */ 72162306a36Sopenharmony_ci amt = sk_mem_pages(newsk->sk_forward_alloc + 72262306a36Sopenharmony_ci atomic_read(&newsk->sk_rmem_alloc)); 72362306a36Sopenharmony_ci } 72462306a36Sopenharmony_ci 72562306a36Sopenharmony_ci if (amt) 72662306a36Sopenharmony_ci mem_cgroup_charge_skmem(newsk->sk_memcg, amt, 72762306a36Sopenharmony_ci GFP_KERNEL | __GFP_NOFAIL); 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci release_sock(newsk); 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci if (req) 73262306a36Sopenharmony_ci reqsk_put(req); 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci if (newsk) 73562306a36Sopenharmony_ci inet_init_csk_locks(newsk); 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci return newsk; 73862306a36Sopenharmony_ciout_err: 73962306a36Sopenharmony_ci newsk = NULL; 74062306a36Sopenharmony_ci req = NULL; 74162306a36Sopenharmony_ci *err = error; 74262306a36Sopenharmony_ci goto out; 74362306a36Sopenharmony_ci} 74462306a36Sopenharmony_ciEXPORT_SYMBOL(inet_csk_accept); 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci/* 74762306a36Sopenharmony_ci * Using different timers for retransmit, delayed acks and probes 74862306a36Sopenharmony_ci * We may wish use just one timer maintaining a list of expire jiffies 74962306a36Sopenharmony_ci * to optimize. 75062306a36Sopenharmony_ci */ 75162306a36Sopenharmony_civoid inet_csk_init_xmit_timers(struct sock *sk, 75262306a36Sopenharmony_ci void (*retransmit_handler)(struct timer_list *t), 75362306a36Sopenharmony_ci void (*delack_handler)(struct timer_list *t), 75462306a36Sopenharmony_ci void (*keepalive_handler)(struct timer_list *t)) 75562306a36Sopenharmony_ci{ 75662306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_ci timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0); 75962306a36Sopenharmony_ci timer_setup(&icsk->icsk_delack_timer, delack_handler, 0); 76062306a36Sopenharmony_ci timer_setup(&sk->sk_timer, keepalive_handler, 0); 76162306a36Sopenharmony_ci icsk->icsk_pending = icsk->icsk_ack.pending = 0; 76262306a36Sopenharmony_ci} 76362306a36Sopenharmony_ciEXPORT_SYMBOL(inet_csk_init_xmit_timers); 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_civoid inet_csk_clear_xmit_timers(struct sock *sk) 76662306a36Sopenharmony_ci{ 76762306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci icsk->icsk_pending = icsk->icsk_ack.pending = 0; 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci sk_stop_timer(sk, &icsk->icsk_retransmit_timer); 77262306a36Sopenharmony_ci sk_stop_timer(sk, &icsk->icsk_delack_timer); 77362306a36Sopenharmony_ci sk_stop_timer(sk, &sk->sk_timer); 77462306a36Sopenharmony_ci} 77562306a36Sopenharmony_ciEXPORT_SYMBOL(inet_csk_clear_xmit_timers); 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_civoid inet_csk_delete_keepalive_timer(struct sock *sk) 77862306a36Sopenharmony_ci{ 77962306a36Sopenharmony_ci sk_stop_timer(sk, &sk->sk_timer); 78062306a36Sopenharmony_ci} 78162306a36Sopenharmony_ciEXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_civoid inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 78462306a36Sopenharmony_ci{ 78562306a36Sopenharmony_ci sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 78662306a36Sopenharmony_ci} 78762306a36Sopenharmony_ciEXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_cistruct dst_entry *inet_csk_route_req(const struct sock *sk, 79062306a36Sopenharmony_ci struct flowi4 *fl4, 79162306a36Sopenharmony_ci const struct request_sock *req) 79262306a36Sopenharmony_ci{ 79362306a36Sopenharmony_ci const struct inet_request_sock *ireq = inet_rsk(req); 79462306a36Sopenharmony_ci struct net *net = read_pnet(&ireq->ireq_net); 79562306a36Sopenharmony_ci struct ip_options_rcu *opt; 79662306a36Sopenharmony_ci struct rtable *rt; 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci rcu_read_lock(); 79962306a36Sopenharmony_ci opt = rcu_dereference(ireq->ireq_opt); 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 80262306a36Sopenharmony_ci ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), 80362306a36Sopenharmony_ci sk->sk_protocol, inet_sk_flowi_flags(sk), 80462306a36Sopenharmony_ci (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 80562306a36Sopenharmony_ci ireq->ir_loc_addr, ireq->ir_rmt_port, 80662306a36Sopenharmony_ci htons(ireq->ir_num), sk->sk_uid); 80762306a36Sopenharmony_ci security_req_classify_flow(req, flowi4_to_flowi_common(fl4)); 80862306a36Sopenharmony_ci rt = ip_route_output_flow(net, fl4, sk); 80962306a36Sopenharmony_ci if (IS_ERR(rt)) 81062306a36Sopenharmony_ci goto no_route; 81162306a36Sopenharmony_ci if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 81262306a36Sopenharmony_ci goto route_err; 81362306a36Sopenharmony_ci rcu_read_unlock(); 81462306a36Sopenharmony_ci return &rt->dst; 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ciroute_err: 81762306a36Sopenharmony_ci ip_rt_put(rt); 81862306a36Sopenharmony_cino_route: 81962306a36Sopenharmony_ci rcu_read_unlock(); 82062306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 82162306a36Sopenharmony_ci return NULL; 82262306a36Sopenharmony_ci} 82362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(inet_csk_route_req); 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_cistruct dst_entry *inet_csk_route_child_sock(const struct sock *sk, 82662306a36Sopenharmony_ci struct sock *newsk, 82762306a36Sopenharmony_ci const struct request_sock *req) 82862306a36Sopenharmony_ci{ 82962306a36Sopenharmony_ci const struct inet_request_sock *ireq = inet_rsk(req); 83062306a36Sopenharmony_ci struct net *net = read_pnet(&ireq->ireq_net); 83162306a36Sopenharmony_ci struct inet_sock *newinet = inet_sk(newsk); 83262306a36Sopenharmony_ci struct ip_options_rcu *opt; 83362306a36Sopenharmony_ci struct flowi4 *fl4; 83462306a36Sopenharmony_ci struct rtable *rt; 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci opt = rcu_dereference(ireq->ireq_opt); 83762306a36Sopenharmony_ci fl4 = &newinet->cork.fl.u.ip4; 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, 84062306a36Sopenharmony_ci ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), 84162306a36Sopenharmony_ci sk->sk_protocol, inet_sk_flowi_flags(sk), 84262306a36Sopenharmony_ci (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 84362306a36Sopenharmony_ci ireq->ir_loc_addr, ireq->ir_rmt_port, 84462306a36Sopenharmony_ci htons(ireq->ir_num), sk->sk_uid); 84562306a36Sopenharmony_ci security_req_classify_flow(req, flowi4_to_flowi_common(fl4)); 84662306a36Sopenharmony_ci rt = ip_route_output_flow(net, fl4, sk); 84762306a36Sopenharmony_ci if (IS_ERR(rt)) 84862306a36Sopenharmony_ci goto no_route; 84962306a36Sopenharmony_ci if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 85062306a36Sopenharmony_ci goto route_err; 85162306a36Sopenharmony_ci return &rt->dst; 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ciroute_err: 85462306a36Sopenharmony_ci ip_rt_put(rt); 85562306a36Sopenharmony_cino_route: 85662306a36Sopenharmony_ci __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 85762306a36Sopenharmony_ci return NULL; 85862306a36Sopenharmony_ci} 85962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_ci/* Decide when to expire the request and when to resend SYN-ACK */ 86262306a36Sopenharmony_cistatic void syn_ack_recalc(struct request_sock *req, 86362306a36Sopenharmony_ci const int max_syn_ack_retries, 86462306a36Sopenharmony_ci const u8 rskq_defer_accept, 86562306a36Sopenharmony_ci int *expire, int *resend) 86662306a36Sopenharmony_ci{ 86762306a36Sopenharmony_ci if (!rskq_defer_accept) { 86862306a36Sopenharmony_ci *expire = req->num_timeout >= max_syn_ack_retries; 86962306a36Sopenharmony_ci *resend = 1; 87062306a36Sopenharmony_ci return; 87162306a36Sopenharmony_ci } 87262306a36Sopenharmony_ci *expire = req->num_timeout >= max_syn_ack_retries && 87362306a36Sopenharmony_ci (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept); 87462306a36Sopenharmony_ci /* Do not resend while waiting for data after ACK, 87562306a36Sopenharmony_ci * start to resend on end of deferring period to give 87662306a36Sopenharmony_ci * last chance for data or ACK to create established socket. 87762306a36Sopenharmony_ci */ 87862306a36Sopenharmony_ci *resend = !inet_rsk(req)->acked || 87962306a36Sopenharmony_ci req->num_timeout >= rskq_defer_accept - 1; 88062306a36Sopenharmony_ci} 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ciint inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) 88362306a36Sopenharmony_ci{ 88462306a36Sopenharmony_ci int err = req->rsk_ops->rtx_syn_ack(parent, req); 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci if (!err) 88762306a36Sopenharmony_ci req->num_retrans++; 88862306a36Sopenharmony_ci return err; 88962306a36Sopenharmony_ci} 89062306a36Sopenharmony_ciEXPORT_SYMBOL(inet_rtx_syn_ack); 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_cistatic struct request_sock *inet_reqsk_clone(struct request_sock *req, 89362306a36Sopenharmony_ci struct sock *sk) 89462306a36Sopenharmony_ci{ 89562306a36Sopenharmony_ci struct sock *req_sk, *nreq_sk; 89662306a36Sopenharmony_ci struct request_sock *nreq; 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN); 89962306a36Sopenharmony_ci if (!nreq) { 90062306a36Sopenharmony_ci __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci /* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */ 90362306a36Sopenharmony_ci sock_put(sk); 90462306a36Sopenharmony_ci return NULL; 90562306a36Sopenharmony_ci } 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci req_sk = req_to_sk(req); 90862306a36Sopenharmony_ci nreq_sk = req_to_sk(nreq); 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci memcpy(nreq_sk, req_sk, 91162306a36Sopenharmony_ci offsetof(struct sock, sk_dontcopy_begin)); 91262306a36Sopenharmony_ci memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end, 91362306a36Sopenharmony_ci req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end)); 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci sk_node_init(&nreq_sk->sk_node); 91662306a36Sopenharmony_ci nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping; 91762306a36Sopenharmony_ci#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING 91862306a36Sopenharmony_ci nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping; 91962306a36Sopenharmony_ci#endif 92062306a36Sopenharmony_ci nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu; 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_ci nreq->rsk_listener = sk; 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci /* We need not acquire fastopenq->lock 92562306a36Sopenharmony_ci * because the child socket is locked in inet_csk_listen_stop(). 92662306a36Sopenharmony_ci */ 92762306a36Sopenharmony_ci if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(nreq)->tfo_listener) 92862306a36Sopenharmony_ci rcu_assign_pointer(tcp_sk(nreq->sk)->fastopen_rsk, nreq); 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci return nreq; 93162306a36Sopenharmony_ci} 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_cistatic void reqsk_queue_migrated(struct request_sock_queue *queue, 93462306a36Sopenharmony_ci const struct request_sock *req) 93562306a36Sopenharmony_ci{ 93662306a36Sopenharmony_ci if (req->num_timeout == 0) 93762306a36Sopenharmony_ci atomic_inc(&queue->young); 93862306a36Sopenharmony_ci atomic_inc(&queue->qlen); 93962306a36Sopenharmony_ci} 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_cistatic void reqsk_migrate_reset(struct request_sock *req) 94262306a36Sopenharmony_ci{ 94362306a36Sopenharmony_ci req->saved_syn = NULL; 94462306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 94562306a36Sopenharmony_ci inet_rsk(req)->ipv6_opt = NULL; 94662306a36Sopenharmony_ci inet_rsk(req)->pktopts = NULL; 94762306a36Sopenharmony_ci#else 94862306a36Sopenharmony_ci inet_rsk(req)->ireq_opt = NULL; 94962306a36Sopenharmony_ci#endif 95062306a36Sopenharmony_ci} 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci/* return true if req was found in the ehash table */ 95362306a36Sopenharmony_cistatic bool reqsk_queue_unlink(struct request_sock *req) 95462306a36Sopenharmony_ci{ 95562306a36Sopenharmony_ci struct sock *sk = req_to_sk(req); 95662306a36Sopenharmony_ci bool found = false; 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci if (sk_hashed(sk)) { 95962306a36Sopenharmony_ci struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); 96062306a36Sopenharmony_ci spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci spin_lock(lock); 96362306a36Sopenharmony_ci found = __sk_nulls_del_node_init_rcu(sk); 96462306a36Sopenharmony_ci spin_unlock(lock); 96562306a36Sopenharmony_ci } 96662306a36Sopenharmony_ci if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) 96762306a36Sopenharmony_ci reqsk_put(req); 96862306a36Sopenharmony_ci return found; 96962306a36Sopenharmony_ci} 97062306a36Sopenharmony_ci 97162306a36Sopenharmony_cibool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) 97262306a36Sopenharmony_ci{ 97362306a36Sopenharmony_ci bool unlinked = reqsk_queue_unlink(req); 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci if (unlinked) { 97662306a36Sopenharmony_ci reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 97762306a36Sopenharmony_ci reqsk_put(req); 97862306a36Sopenharmony_ci } 97962306a36Sopenharmony_ci return unlinked; 98062306a36Sopenharmony_ci} 98162306a36Sopenharmony_ciEXPORT_SYMBOL(inet_csk_reqsk_queue_drop); 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_civoid inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) 98462306a36Sopenharmony_ci{ 98562306a36Sopenharmony_ci inet_csk_reqsk_queue_drop(sk, req); 98662306a36Sopenharmony_ci reqsk_put(req); 98762306a36Sopenharmony_ci} 98862306a36Sopenharmony_ciEXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_cistatic void reqsk_timer_handler(struct timer_list *t) 99162306a36Sopenharmony_ci{ 99262306a36Sopenharmony_ci struct request_sock *req = from_timer(req, t, rsk_timer); 99362306a36Sopenharmony_ci struct request_sock *nreq = NULL, *oreq = req; 99462306a36Sopenharmony_ci struct sock *sk_listener = req->rsk_listener; 99562306a36Sopenharmony_ci struct inet_connection_sock *icsk; 99662306a36Sopenharmony_ci struct request_sock_queue *queue; 99762306a36Sopenharmony_ci struct net *net; 99862306a36Sopenharmony_ci int max_syn_ack_retries, qlen, expire = 0, resend = 0; 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_ci if (inet_sk_state_load(sk_listener) != TCP_LISTEN) { 100162306a36Sopenharmony_ci struct sock *nsk; 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL); 100462306a36Sopenharmony_ci if (!nsk) 100562306a36Sopenharmony_ci goto drop; 100662306a36Sopenharmony_ci 100762306a36Sopenharmony_ci nreq = inet_reqsk_clone(req, nsk); 100862306a36Sopenharmony_ci if (!nreq) 100962306a36Sopenharmony_ci goto drop; 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci /* The new timer for the cloned req can decrease the 2 101262306a36Sopenharmony_ci * by calling inet_csk_reqsk_queue_drop_and_put(), so 101362306a36Sopenharmony_ci * hold another count to prevent use-after-free and 101462306a36Sopenharmony_ci * call reqsk_put() just before return. 101562306a36Sopenharmony_ci */ 101662306a36Sopenharmony_ci refcount_set(&nreq->rsk_refcnt, 2 + 1); 101762306a36Sopenharmony_ci timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED); 101862306a36Sopenharmony_ci reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req); 101962306a36Sopenharmony_ci 102062306a36Sopenharmony_ci req = nreq; 102162306a36Sopenharmony_ci sk_listener = nsk; 102262306a36Sopenharmony_ci } 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci icsk = inet_csk(sk_listener); 102562306a36Sopenharmony_ci net = sock_net(sk_listener); 102662306a36Sopenharmony_ci max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? : 102762306a36Sopenharmony_ci READ_ONCE(net->ipv4.sysctl_tcp_synack_retries); 102862306a36Sopenharmony_ci /* Normally all the openreqs are young and become mature 102962306a36Sopenharmony_ci * (i.e. converted to established socket) for first timeout. 103062306a36Sopenharmony_ci * If synack was not acknowledged for 1 second, it means 103162306a36Sopenharmony_ci * one of the following things: synack was lost, ack was lost, 103262306a36Sopenharmony_ci * rtt is high or nobody planned to ack (i.e. synflood). 103362306a36Sopenharmony_ci * When server is a bit loaded, queue is populated with old 103462306a36Sopenharmony_ci * open requests, reducing effective size of queue. 103562306a36Sopenharmony_ci * When server is well loaded, queue size reduces to zero 103662306a36Sopenharmony_ci * after several minutes of work. It is not synflood, 103762306a36Sopenharmony_ci * it is normal operation. The solution is pruning 103862306a36Sopenharmony_ci * too old entries overriding normal timeout, when 103962306a36Sopenharmony_ci * situation becomes dangerous. 104062306a36Sopenharmony_ci * 104162306a36Sopenharmony_ci * Essentially, we reserve half of room for young 104262306a36Sopenharmony_ci * embrions; and abort old ones without pity, if old 104362306a36Sopenharmony_ci * ones are about to clog our table. 104462306a36Sopenharmony_ci */ 104562306a36Sopenharmony_ci queue = &icsk->icsk_accept_queue; 104662306a36Sopenharmony_ci qlen = reqsk_queue_len(queue); 104762306a36Sopenharmony_ci if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) { 104862306a36Sopenharmony_ci int young = reqsk_queue_len_young(queue) << 1; 104962306a36Sopenharmony_ci 105062306a36Sopenharmony_ci while (max_syn_ack_retries > 2) { 105162306a36Sopenharmony_ci if (qlen < young) 105262306a36Sopenharmony_ci break; 105362306a36Sopenharmony_ci max_syn_ack_retries--; 105462306a36Sopenharmony_ci young <<= 1; 105562306a36Sopenharmony_ci } 105662306a36Sopenharmony_ci } 105762306a36Sopenharmony_ci syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept), 105862306a36Sopenharmony_ci &expire, &resend); 105962306a36Sopenharmony_ci req->rsk_ops->syn_ack_timeout(req); 106062306a36Sopenharmony_ci if (!expire && 106162306a36Sopenharmony_ci (!resend || 106262306a36Sopenharmony_ci !inet_rtx_syn_ack(sk_listener, req) || 106362306a36Sopenharmony_ci inet_rsk(req)->acked)) { 106462306a36Sopenharmony_ci if (req->num_timeout++ == 0) 106562306a36Sopenharmony_ci atomic_dec(&queue->young); 106662306a36Sopenharmony_ci mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX)); 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ci if (!nreq) 106962306a36Sopenharmony_ci return; 107062306a36Sopenharmony_ci 107162306a36Sopenharmony_ci if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) { 107262306a36Sopenharmony_ci /* delete timer */ 107362306a36Sopenharmony_ci inet_csk_reqsk_queue_drop(sk_listener, nreq); 107462306a36Sopenharmony_ci goto no_ownership; 107562306a36Sopenharmony_ci } 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS); 107862306a36Sopenharmony_ci reqsk_migrate_reset(oreq); 107962306a36Sopenharmony_ci reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq); 108062306a36Sopenharmony_ci reqsk_put(oreq); 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci reqsk_put(nreq); 108362306a36Sopenharmony_ci return; 108462306a36Sopenharmony_ci } 108562306a36Sopenharmony_ci 108662306a36Sopenharmony_ci /* Even if we can clone the req, we may need not retransmit any more 108762306a36Sopenharmony_ci * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another 108862306a36Sopenharmony_ci * CPU may win the "own_req" race so that inet_ehash_insert() fails. 108962306a36Sopenharmony_ci */ 109062306a36Sopenharmony_ci if (nreq) { 109162306a36Sopenharmony_ci __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE); 109262306a36Sopenharmony_cino_ownership: 109362306a36Sopenharmony_ci reqsk_migrate_reset(nreq); 109462306a36Sopenharmony_ci reqsk_queue_removed(queue, nreq); 109562306a36Sopenharmony_ci __reqsk_free(nreq); 109662306a36Sopenharmony_ci } 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_cidrop: 109962306a36Sopenharmony_ci inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); 110062306a36Sopenharmony_ci} 110162306a36Sopenharmony_ci 110262306a36Sopenharmony_cistatic void reqsk_queue_hash_req(struct request_sock *req, 110362306a36Sopenharmony_ci unsigned long timeout) 110462306a36Sopenharmony_ci{ 110562306a36Sopenharmony_ci timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); 110662306a36Sopenharmony_ci mod_timer(&req->rsk_timer, jiffies + timeout); 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci inet_ehash_insert(req_to_sk(req), NULL, NULL); 110962306a36Sopenharmony_ci /* before letting lookups find us, make sure all req fields 111062306a36Sopenharmony_ci * are committed to memory and refcnt initialized. 111162306a36Sopenharmony_ci */ 111262306a36Sopenharmony_ci smp_wmb(); 111362306a36Sopenharmony_ci refcount_set(&req->rsk_refcnt, 2 + 1); 111462306a36Sopenharmony_ci} 111562306a36Sopenharmony_ci 111662306a36Sopenharmony_civoid inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 111762306a36Sopenharmony_ci unsigned long timeout) 111862306a36Sopenharmony_ci{ 111962306a36Sopenharmony_ci reqsk_queue_hash_req(req, timeout); 112062306a36Sopenharmony_ci inet_csk_reqsk_queue_added(sk); 112162306a36Sopenharmony_ci} 112262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_cistatic void inet_clone_ulp(const struct request_sock *req, struct sock *newsk, 112562306a36Sopenharmony_ci const gfp_t priority) 112662306a36Sopenharmony_ci{ 112762306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(newsk); 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci if (!icsk->icsk_ulp_ops) 113062306a36Sopenharmony_ci return; 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci icsk->icsk_ulp_ops->clone(req, newsk, priority); 113362306a36Sopenharmony_ci} 113462306a36Sopenharmony_ci 113562306a36Sopenharmony_ci/** 113662306a36Sopenharmony_ci * inet_csk_clone_lock - clone an inet socket, and lock its clone 113762306a36Sopenharmony_ci * @sk: the socket to clone 113862306a36Sopenharmony_ci * @req: request_sock 113962306a36Sopenharmony_ci * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 114062306a36Sopenharmony_ci * 114162306a36Sopenharmony_ci * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 114262306a36Sopenharmony_ci */ 114362306a36Sopenharmony_cistruct sock *inet_csk_clone_lock(const struct sock *sk, 114462306a36Sopenharmony_ci const struct request_sock *req, 114562306a36Sopenharmony_ci const gfp_t priority) 114662306a36Sopenharmony_ci{ 114762306a36Sopenharmony_ci struct sock *newsk = sk_clone_lock(sk, priority); 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci if (newsk) { 115062306a36Sopenharmony_ci struct inet_connection_sock *newicsk = inet_csk(newsk); 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci inet_sk_set_state(newsk, TCP_SYN_RECV); 115362306a36Sopenharmony_ci newicsk->icsk_bind_hash = NULL; 115462306a36Sopenharmony_ci newicsk->icsk_bind2_hash = NULL; 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_ci inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; 115762306a36Sopenharmony_ci inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; 115862306a36Sopenharmony_ci inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci /* listeners have SOCK_RCU_FREE, not the children */ 116162306a36Sopenharmony_ci sock_reset_flag(newsk, SOCK_RCU_FREE); 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_ci inet_sk(newsk)->mc_list = NULL; 116462306a36Sopenharmony_ci 116562306a36Sopenharmony_ci newsk->sk_mark = inet_rsk(req)->ir_mark; 116662306a36Sopenharmony_ci atomic64_set(&newsk->sk_cookie, 116762306a36Sopenharmony_ci atomic64_read(&inet_rsk(req)->ir_cookie)); 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_ci newicsk->icsk_retransmits = 0; 117062306a36Sopenharmony_ci newicsk->icsk_backoff = 0; 117162306a36Sopenharmony_ci newicsk->icsk_probes_out = 0; 117262306a36Sopenharmony_ci newicsk->icsk_probes_tstamp = 0; 117362306a36Sopenharmony_ci 117462306a36Sopenharmony_ci /* Deinitialize accept_queue to trap illegal accesses. */ 117562306a36Sopenharmony_ci memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci inet_clone_ulp(req, newsk, priority); 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci security_inet_csk_clone(newsk, req); 118062306a36Sopenharmony_ci } 118162306a36Sopenharmony_ci return newsk; 118262306a36Sopenharmony_ci} 118362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(inet_csk_clone_lock); 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_ci/* 118662306a36Sopenharmony_ci * At this point, there should be no process reference to this 118762306a36Sopenharmony_ci * socket, and thus no user references at all. Therefore we 118862306a36Sopenharmony_ci * can assume the socket waitqueue is inactive and nobody will 118962306a36Sopenharmony_ci * try to jump onto it. 119062306a36Sopenharmony_ci */ 119162306a36Sopenharmony_civoid inet_csk_destroy_sock(struct sock *sk) 119262306a36Sopenharmony_ci{ 119362306a36Sopenharmony_ci WARN_ON(sk->sk_state != TCP_CLOSE); 119462306a36Sopenharmony_ci WARN_ON(!sock_flag(sk, SOCK_DEAD)); 119562306a36Sopenharmony_ci 119662306a36Sopenharmony_ci /* It cannot be in hash table! */ 119762306a36Sopenharmony_ci WARN_ON(!sk_unhashed(sk)); 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_ci /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ 120062306a36Sopenharmony_ci WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci sk->sk_prot->destroy(sk); 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci sk_stream_kill_queues(sk); 120562306a36Sopenharmony_ci 120662306a36Sopenharmony_ci xfrm_sk_free_policy(sk); 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci this_cpu_dec(*sk->sk_prot->orphan_count); 120962306a36Sopenharmony_ci 121062306a36Sopenharmony_ci sock_put(sk); 121162306a36Sopenharmony_ci} 121262306a36Sopenharmony_ciEXPORT_SYMBOL(inet_csk_destroy_sock); 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci/* This function allows to force a closure of a socket after the call to 121562306a36Sopenharmony_ci * tcp/dccp_create_openreq_child(). 121662306a36Sopenharmony_ci */ 121762306a36Sopenharmony_civoid inet_csk_prepare_forced_close(struct sock *sk) 121862306a36Sopenharmony_ci __releases(&sk->sk_lock.slock) 121962306a36Sopenharmony_ci{ 122062306a36Sopenharmony_ci /* sk_clone_lock locked the socket and set refcnt to 2 */ 122162306a36Sopenharmony_ci bh_unlock_sock(sk); 122262306a36Sopenharmony_ci sock_put(sk); 122362306a36Sopenharmony_ci inet_csk_prepare_for_destroy_sock(sk); 122462306a36Sopenharmony_ci inet_sk(sk)->inet_num = 0; 122562306a36Sopenharmony_ci} 122662306a36Sopenharmony_ciEXPORT_SYMBOL(inet_csk_prepare_forced_close); 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_cistatic int inet_ulp_can_listen(const struct sock *sk) 122962306a36Sopenharmony_ci{ 123062306a36Sopenharmony_ci const struct inet_connection_sock *icsk = inet_csk(sk); 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_ci if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone) 123362306a36Sopenharmony_ci return -EINVAL; 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci return 0; 123662306a36Sopenharmony_ci} 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ciint inet_csk_listen_start(struct sock *sk) 123962306a36Sopenharmony_ci{ 124062306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 124162306a36Sopenharmony_ci struct inet_sock *inet = inet_sk(sk); 124262306a36Sopenharmony_ci int err; 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci err = inet_ulp_can_listen(sk); 124562306a36Sopenharmony_ci if (unlikely(err)) 124662306a36Sopenharmony_ci return err; 124762306a36Sopenharmony_ci 124862306a36Sopenharmony_ci reqsk_queue_alloc(&icsk->icsk_accept_queue); 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_ci sk->sk_ack_backlog = 0; 125162306a36Sopenharmony_ci inet_csk_delack_init(sk); 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_ci /* There is race window here: we announce ourselves listening, 125462306a36Sopenharmony_ci * but this transition is still not validated by get_port(). 125562306a36Sopenharmony_ci * It is OK, because this socket enters to hash table only 125662306a36Sopenharmony_ci * after validation is complete. 125762306a36Sopenharmony_ci */ 125862306a36Sopenharmony_ci inet_sk_state_store(sk, TCP_LISTEN); 125962306a36Sopenharmony_ci err = sk->sk_prot->get_port(sk, inet->inet_num); 126062306a36Sopenharmony_ci if (!err) { 126162306a36Sopenharmony_ci inet->inet_sport = htons(inet->inet_num); 126262306a36Sopenharmony_ci 126362306a36Sopenharmony_ci sk_dst_reset(sk); 126462306a36Sopenharmony_ci err = sk->sk_prot->hash(sk); 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_ci if (likely(!err)) 126762306a36Sopenharmony_ci return 0; 126862306a36Sopenharmony_ci } 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci inet_sk_set_state(sk, TCP_CLOSE); 127162306a36Sopenharmony_ci return err; 127262306a36Sopenharmony_ci} 127362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(inet_csk_listen_start); 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_cistatic void inet_child_forget(struct sock *sk, struct request_sock *req, 127662306a36Sopenharmony_ci struct sock *child) 127762306a36Sopenharmony_ci{ 127862306a36Sopenharmony_ci sk->sk_prot->disconnect(child, O_NONBLOCK); 127962306a36Sopenharmony_ci 128062306a36Sopenharmony_ci sock_orphan(child); 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci this_cpu_inc(*sk->sk_prot->orphan_count); 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { 128562306a36Sopenharmony_ci BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); 128662306a36Sopenharmony_ci BUG_ON(sk != req->rsk_listener); 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci /* Paranoid, to prevent race condition if 128962306a36Sopenharmony_ci * an inbound pkt destined for child is 129062306a36Sopenharmony_ci * blocked by sock lock in tcp_v4_rcv(). 129162306a36Sopenharmony_ci * Also to satisfy an assertion in 129262306a36Sopenharmony_ci * tcp_v4_destroy_sock(). 129362306a36Sopenharmony_ci */ 129462306a36Sopenharmony_ci RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL); 129562306a36Sopenharmony_ci } 129662306a36Sopenharmony_ci inet_csk_destroy_sock(child); 129762306a36Sopenharmony_ci} 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_cistruct sock *inet_csk_reqsk_queue_add(struct sock *sk, 130062306a36Sopenharmony_ci struct request_sock *req, 130162306a36Sopenharmony_ci struct sock *child) 130262306a36Sopenharmony_ci{ 130362306a36Sopenharmony_ci struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_ci spin_lock(&queue->rskq_lock); 130662306a36Sopenharmony_ci if (unlikely(sk->sk_state != TCP_LISTEN)) { 130762306a36Sopenharmony_ci inet_child_forget(sk, req, child); 130862306a36Sopenharmony_ci child = NULL; 130962306a36Sopenharmony_ci } else { 131062306a36Sopenharmony_ci req->sk = child; 131162306a36Sopenharmony_ci req->dl_next = NULL; 131262306a36Sopenharmony_ci if (queue->rskq_accept_head == NULL) 131362306a36Sopenharmony_ci WRITE_ONCE(queue->rskq_accept_head, req); 131462306a36Sopenharmony_ci else 131562306a36Sopenharmony_ci queue->rskq_accept_tail->dl_next = req; 131662306a36Sopenharmony_ci queue->rskq_accept_tail = req; 131762306a36Sopenharmony_ci sk_acceptq_added(sk); 131862306a36Sopenharmony_ci } 131962306a36Sopenharmony_ci spin_unlock(&queue->rskq_lock); 132062306a36Sopenharmony_ci return child; 132162306a36Sopenharmony_ci} 132262306a36Sopenharmony_ciEXPORT_SYMBOL(inet_csk_reqsk_queue_add); 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_cistruct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, 132562306a36Sopenharmony_ci struct request_sock *req, bool own_req) 132662306a36Sopenharmony_ci{ 132762306a36Sopenharmony_ci if (own_req) { 132862306a36Sopenharmony_ci inet_csk_reqsk_queue_drop(req->rsk_listener, req); 132962306a36Sopenharmony_ci reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req); 133062306a36Sopenharmony_ci 133162306a36Sopenharmony_ci if (sk != req->rsk_listener) { 133262306a36Sopenharmony_ci /* another listening sk has been selected, 133362306a36Sopenharmony_ci * migrate the req to it. 133462306a36Sopenharmony_ci */ 133562306a36Sopenharmony_ci struct request_sock *nreq; 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_ci /* hold a refcnt for the nreq->rsk_listener 133862306a36Sopenharmony_ci * which is assigned in inet_reqsk_clone() 133962306a36Sopenharmony_ci */ 134062306a36Sopenharmony_ci sock_hold(sk); 134162306a36Sopenharmony_ci nreq = inet_reqsk_clone(req, sk); 134262306a36Sopenharmony_ci if (!nreq) { 134362306a36Sopenharmony_ci inet_child_forget(sk, req, child); 134462306a36Sopenharmony_ci goto child_put; 134562306a36Sopenharmony_ci } 134662306a36Sopenharmony_ci 134762306a36Sopenharmony_ci refcount_set(&nreq->rsk_refcnt, 1); 134862306a36Sopenharmony_ci if (inet_csk_reqsk_queue_add(sk, nreq, child)) { 134962306a36Sopenharmony_ci __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS); 135062306a36Sopenharmony_ci reqsk_migrate_reset(req); 135162306a36Sopenharmony_ci reqsk_put(req); 135262306a36Sopenharmony_ci return child; 135362306a36Sopenharmony_ci } 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_ci __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); 135662306a36Sopenharmony_ci reqsk_migrate_reset(nreq); 135762306a36Sopenharmony_ci __reqsk_free(nreq); 135862306a36Sopenharmony_ci } else if (inet_csk_reqsk_queue_add(sk, req, child)) { 135962306a36Sopenharmony_ci return child; 136062306a36Sopenharmony_ci } 136162306a36Sopenharmony_ci } 136262306a36Sopenharmony_ci /* Too bad, another child took ownership of the request, undo. */ 136362306a36Sopenharmony_cichild_put: 136462306a36Sopenharmony_ci bh_unlock_sock(child); 136562306a36Sopenharmony_ci sock_put(child); 136662306a36Sopenharmony_ci return NULL; 136762306a36Sopenharmony_ci} 136862306a36Sopenharmony_ciEXPORT_SYMBOL(inet_csk_complete_hashdance); 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_ci/* 137162306a36Sopenharmony_ci * This routine closes sockets which have been at least partially 137262306a36Sopenharmony_ci * opened, but not yet accepted. 137362306a36Sopenharmony_ci */ 137462306a36Sopenharmony_civoid inet_csk_listen_stop(struct sock *sk) 137562306a36Sopenharmony_ci{ 137662306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 137762306a36Sopenharmony_ci struct request_sock_queue *queue = &icsk->icsk_accept_queue; 137862306a36Sopenharmony_ci struct request_sock *next, *req; 137962306a36Sopenharmony_ci 138062306a36Sopenharmony_ci /* Following specs, it would be better either to send FIN 138162306a36Sopenharmony_ci * (and enter FIN-WAIT-1, it is normal close) 138262306a36Sopenharmony_ci * or to send active reset (abort). 138362306a36Sopenharmony_ci * Certainly, it is pretty dangerous while synflood, but it is 138462306a36Sopenharmony_ci * bad justification for our negligence 8) 138562306a36Sopenharmony_ci * To be honest, we are not able to make either 138662306a36Sopenharmony_ci * of the variants now. --ANK 138762306a36Sopenharmony_ci */ 138862306a36Sopenharmony_ci while ((req = reqsk_queue_remove(queue, sk)) != NULL) { 138962306a36Sopenharmony_ci struct sock *child = req->sk, *nsk; 139062306a36Sopenharmony_ci struct request_sock *nreq; 139162306a36Sopenharmony_ci 139262306a36Sopenharmony_ci local_bh_disable(); 139362306a36Sopenharmony_ci bh_lock_sock(child); 139462306a36Sopenharmony_ci WARN_ON(sock_owned_by_user(child)); 139562306a36Sopenharmony_ci sock_hold(child); 139662306a36Sopenharmony_ci 139762306a36Sopenharmony_ci nsk = reuseport_migrate_sock(sk, child, NULL); 139862306a36Sopenharmony_ci if (nsk) { 139962306a36Sopenharmony_ci nreq = inet_reqsk_clone(req, nsk); 140062306a36Sopenharmony_ci if (nreq) { 140162306a36Sopenharmony_ci refcount_set(&nreq->rsk_refcnt, 1); 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci if (inet_csk_reqsk_queue_add(nsk, nreq, child)) { 140462306a36Sopenharmony_ci __NET_INC_STATS(sock_net(nsk), 140562306a36Sopenharmony_ci LINUX_MIB_TCPMIGRATEREQSUCCESS); 140662306a36Sopenharmony_ci reqsk_migrate_reset(req); 140762306a36Sopenharmony_ci } else { 140862306a36Sopenharmony_ci __NET_INC_STATS(sock_net(nsk), 140962306a36Sopenharmony_ci LINUX_MIB_TCPMIGRATEREQFAILURE); 141062306a36Sopenharmony_ci reqsk_migrate_reset(nreq); 141162306a36Sopenharmony_ci __reqsk_free(nreq); 141262306a36Sopenharmony_ci } 141362306a36Sopenharmony_ci 141462306a36Sopenharmony_ci /* inet_csk_reqsk_queue_add() has already 141562306a36Sopenharmony_ci * called inet_child_forget() on failure case. 141662306a36Sopenharmony_ci */ 141762306a36Sopenharmony_ci goto skip_child_forget; 141862306a36Sopenharmony_ci } 141962306a36Sopenharmony_ci } 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_ci inet_child_forget(sk, req, child); 142262306a36Sopenharmony_ciskip_child_forget: 142362306a36Sopenharmony_ci reqsk_put(req); 142462306a36Sopenharmony_ci bh_unlock_sock(child); 142562306a36Sopenharmony_ci local_bh_enable(); 142662306a36Sopenharmony_ci sock_put(child); 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci cond_resched(); 142962306a36Sopenharmony_ci } 143062306a36Sopenharmony_ci if (queue->fastopenq.rskq_rst_head) { 143162306a36Sopenharmony_ci /* Free all the reqs queued in rskq_rst_head. */ 143262306a36Sopenharmony_ci spin_lock_bh(&queue->fastopenq.lock); 143362306a36Sopenharmony_ci req = queue->fastopenq.rskq_rst_head; 143462306a36Sopenharmony_ci queue->fastopenq.rskq_rst_head = NULL; 143562306a36Sopenharmony_ci spin_unlock_bh(&queue->fastopenq.lock); 143662306a36Sopenharmony_ci while (req != NULL) { 143762306a36Sopenharmony_ci next = req->dl_next; 143862306a36Sopenharmony_ci reqsk_put(req); 143962306a36Sopenharmony_ci req = next; 144062306a36Sopenharmony_ci } 144162306a36Sopenharmony_ci } 144262306a36Sopenharmony_ci WARN_ON_ONCE(sk->sk_ack_backlog); 144362306a36Sopenharmony_ci} 144462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(inet_csk_listen_stop); 144562306a36Sopenharmony_ci 144662306a36Sopenharmony_civoid inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 144762306a36Sopenharmony_ci{ 144862306a36Sopenharmony_ci struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 144962306a36Sopenharmony_ci const struct inet_sock *inet = inet_sk(sk); 145062306a36Sopenharmony_ci 145162306a36Sopenharmony_ci sin->sin_family = AF_INET; 145262306a36Sopenharmony_ci sin->sin_addr.s_addr = inet->inet_daddr; 145362306a36Sopenharmony_ci sin->sin_port = inet->inet_dport; 145462306a36Sopenharmony_ci} 145562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_cistatic struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) 145862306a36Sopenharmony_ci{ 145962306a36Sopenharmony_ci const struct inet_sock *inet = inet_sk(sk); 146062306a36Sopenharmony_ci const struct ip_options_rcu *inet_opt; 146162306a36Sopenharmony_ci __be32 daddr = inet->inet_daddr; 146262306a36Sopenharmony_ci struct flowi4 *fl4; 146362306a36Sopenharmony_ci struct rtable *rt; 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci rcu_read_lock(); 146662306a36Sopenharmony_ci inet_opt = rcu_dereference(inet->inet_opt); 146762306a36Sopenharmony_ci if (inet_opt && inet_opt->opt.srr) 146862306a36Sopenharmony_ci daddr = inet_opt->opt.faddr; 146962306a36Sopenharmony_ci fl4 = &fl->u.ip4; 147062306a36Sopenharmony_ci rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, 147162306a36Sopenharmony_ci inet->inet_saddr, inet->inet_dport, 147262306a36Sopenharmony_ci inet->inet_sport, sk->sk_protocol, 147362306a36Sopenharmony_ci RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); 147462306a36Sopenharmony_ci if (IS_ERR(rt)) 147562306a36Sopenharmony_ci rt = NULL; 147662306a36Sopenharmony_ci if (rt) 147762306a36Sopenharmony_ci sk_setup_caps(sk, &rt->dst); 147862306a36Sopenharmony_ci rcu_read_unlock(); 147962306a36Sopenharmony_ci 148062306a36Sopenharmony_ci return &rt->dst; 148162306a36Sopenharmony_ci} 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_cistruct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) 148462306a36Sopenharmony_ci{ 148562306a36Sopenharmony_ci struct dst_entry *dst = __sk_dst_check(sk, 0); 148662306a36Sopenharmony_ci struct inet_sock *inet = inet_sk(sk); 148762306a36Sopenharmony_ci 148862306a36Sopenharmony_ci if (!dst) { 148962306a36Sopenharmony_ci dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 149062306a36Sopenharmony_ci if (!dst) 149162306a36Sopenharmony_ci goto out; 149262306a36Sopenharmony_ci } 149362306a36Sopenharmony_ci dst->ops->update_pmtu(dst, sk, NULL, mtu, true); 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_ci dst = __sk_dst_check(sk, 0); 149662306a36Sopenharmony_ci if (!dst) 149762306a36Sopenharmony_ci dst = inet_csk_rebuild_route(sk, &inet->cork.fl); 149862306a36Sopenharmony_ciout: 149962306a36Sopenharmony_ci return dst; 150062306a36Sopenharmony_ci} 150162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(inet_csk_update_pmtu); 1502