162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci#include <linux/kernel.h>
362306a36Sopenharmony_ci#include <linux/tcp.h>
462306a36Sopenharmony_ci#include <linux/rcupdate.h>
562306a36Sopenharmony_ci#include <net/tcp.h>
662306a36Sopenharmony_ci
762306a36Sopenharmony_civoid tcp_fastopen_init_key_once(struct net *net)
862306a36Sopenharmony_ci{
962306a36Sopenharmony_ci	u8 key[TCP_FASTOPEN_KEY_LENGTH];
1062306a36Sopenharmony_ci	struct tcp_fastopen_context *ctxt;
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci	rcu_read_lock();
1362306a36Sopenharmony_ci	ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
1462306a36Sopenharmony_ci	if (ctxt) {
1562306a36Sopenharmony_ci		rcu_read_unlock();
1662306a36Sopenharmony_ci		return;
1762306a36Sopenharmony_ci	}
1862306a36Sopenharmony_ci	rcu_read_unlock();
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci	/* tcp_fastopen_reset_cipher publishes the new context
2162306a36Sopenharmony_ci	 * atomically, so we allow this race happening here.
2262306a36Sopenharmony_ci	 *
2362306a36Sopenharmony_ci	 * All call sites of tcp_fastopen_cookie_gen also check
2462306a36Sopenharmony_ci	 * for a valid cookie, so this is an acceptable risk.
2562306a36Sopenharmony_ci	 */
2662306a36Sopenharmony_ci	get_random_bytes(key, sizeof(key));
2762306a36Sopenharmony_ci	tcp_fastopen_reset_cipher(net, NULL, key, NULL);
2862306a36Sopenharmony_ci}
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_cistatic void tcp_fastopen_ctx_free(struct rcu_head *head)
3162306a36Sopenharmony_ci{
3262306a36Sopenharmony_ci	struct tcp_fastopen_context *ctx =
3362306a36Sopenharmony_ci	    container_of(head, struct tcp_fastopen_context, rcu);
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci	kfree_sensitive(ctx);
3662306a36Sopenharmony_ci}
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_civoid tcp_fastopen_destroy_cipher(struct sock *sk)
3962306a36Sopenharmony_ci{
4062306a36Sopenharmony_ci	struct tcp_fastopen_context *ctx;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	ctx = rcu_dereference_protected(
4362306a36Sopenharmony_ci			inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1);
4462306a36Sopenharmony_ci	if (ctx)
4562306a36Sopenharmony_ci		call_rcu(&ctx->rcu, tcp_fastopen_ctx_free);
4662306a36Sopenharmony_ci}
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_civoid tcp_fastopen_ctx_destroy(struct net *net)
4962306a36Sopenharmony_ci{
5062306a36Sopenharmony_ci	struct tcp_fastopen_context *ctxt;
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci	ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL);
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	if (ctxt)
5562306a36Sopenharmony_ci		call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
5662306a36Sopenharmony_ci}
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ciint tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
5962306a36Sopenharmony_ci			      void *primary_key, void *backup_key)
6062306a36Sopenharmony_ci{
6162306a36Sopenharmony_ci	struct tcp_fastopen_context *ctx, *octx;
6262306a36Sopenharmony_ci	struct fastopen_queue *q;
6362306a36Sopenharmony_ci	int err = 0;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
6662306a36Sopenharmony_ci	if (!ctx) {
6762306a36Sopenharmony_ci		err = -ENOMEM;
6862306a36Sopenharmony_ci		goto out;
6962306a36Sopenharmony_ci	}
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	ctx->key[0].key[0] = get_unaligned_le64(primary_key);
7262306a36Sopenharmony_ci	ctx->key[0].key[1] = get_unaligned_le64(primary_key + 8);
7362306a36Sopenharmony_ci	if (backup_key) {
7462306a36Sopenharmony_ci		ctx->key[1].key[0] = get_unaligned_le64(backup_key);
7562306a36Sopenharmony_ci		ctx->key[1].key[1] = get_unaligned_le64(backup_key + 8);
7662306a36Sopenharmony_ci		ctx->num = 2;
7762306a36Sopenharmony_ci	} else {
7862306a36Sopenharmony_ci		ctx->num = 1;
7962306a36Sopenharmony_ci	}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	if (sk) {
8262306a36Sopenharmony_ci		q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
8362306a36Sopenharmony_ci		octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx);
8462306a36Sopenharmony_ci	} else {
8562306a36Sopenharmony_ci		octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx);
8662306a36Sopenharmony_ci	}
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	if (octx)
8962306a36Sopenharmony_ci		call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
9062306a36Sopenharmony_ciout:
9162306a36Sopenharmony_ci	return err;
9262306a36Sopenharmony_ci}
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ciint tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk,
9562306a36Sopenharmony_ci			    u64 *key)
9662306a36Sopenharmony_ci{
9762306a36Sopenharmony_ci	struct tcp_fastopen_context *ctx;
9862306a36Sopenharmony_ci	int n_keys = 0, i;
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	rcu_read_lock();
10162306a36Sopenharmony_ci	if (icsk)
10262306a36Sopenharmony_ci		ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
10362306a36Sopenharmony_ci	else
10462306a36Sopenharmony_ci		ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
10562306a36Sopenharmony_ci	if (ctx) {
10662306a36Sopenharmony_ci		n_keys = tcp_fastopen_context_len(ctx);
10762306a36Sopenharmony_ci		for (i = 0; i < n_keys; i++) {
10862306a36Sopenharmony_ci			put_unaligned_le64(ctx->key[i].key[0], key + (i * 2));
10962306a36Sopenharmony_ci			put_unaligned_le64(ctx->key[i].key[1], key + (i * 2) + 1);
11062306a36Sopenharmony_ci		}
11162306a36Sopenharmony_ci	}
11262306a36Sopenharmony_ci	rcu_read_unlock();
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	return n_keys;
11562306a36Sopenharmony_ci}
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_cistatic bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
11862306a36Sopenharmony_ci					     struct sk_buff *syn,
11962306a36Sopenharmony_ci					     const siphash_key_t *key,
12062306a36Sopenharmony_ci					     struct tcp_fastopen_cookie *foc)
12162306a36Sopenharmony_ci{
12262306a36Sopenharmony_ci	BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64));
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	if (req->rsk_ops->family == AF_INET) {
12562306a36Sopenharmony_ci		const struct iphdr *iph = ip_hdr(syn);
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci		foc->val[0] = cpu_to_le64(siphash(&iph->saddr,
12862306a36Sopenharmony_ci					  sizeof(iph->saddr) +
12962306a36Sopenharmony_ci					  sizeof(iph->daddr),
13062306a36Sopenharmony_ci					  key));
13162306a36Sopenharmony_ci		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
13262306a36Sopenharmony_ci		return true;
13362306a36Sopenharmony_ci	}
13462306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
13562306a36Sopenharmony_ci	if (req->rsk_ops->family == AF_INET6) {
13662306a36Sopenharmony_ci		const struct ipv6hdr *ip6h = ipv6_hdr(syn);
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci		foc->val[0] = cpu_to_le64(siphash(&ip6h->saddr,
13962306a36Sopenharmony_ci					  sizeof(ip6h->saddr) +
14062306a36Sopenharmony_ci					  sizeof(ip6h->daddr),
14162306a36Sopenharmony_ci					  key));
14262306a36Sopenharmony_ci		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
14362306a36Sopenharmony_ci		return true;
14462306a36Sopenharmony_ci	}
14562306a36Sopenharmony_ci#endif
14662306a36Sopenharmony_ci	return false;
14762306a36Sopenharmony_ci}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci/* Generate the fastopen cookie by applying SipHash to both the source and
15062306a36Sopenharmony_ci * destination addresses.
15162306a36Sopenharmony_ci */
15262306a36Sopenharmony_cistatic void tcp_fastopen_cookie_gen(struct sock *sk,
15362306a36Sopenharmony_ci				    struct request_sock *req,
15462306a36Sopenharmony_ci				    struct sk_buff *syn,
15562306a36Sopenharmony_ci				    struct tcp_fastopen_cookie *foc)
15662306a36Sopenharmony_ci{
15762306a36Sopenharmony_ci	struct tcp_fastopen_context *ctx;
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	rcu_read_lock();
16062306a36Sopenharmony_ci	ctx = tcp_fastopen_get_ctx(sk);
16162306a36Sopenharmony_ci	if (ctx)
16262306a36Sopenharmony_ci		__tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc);
16362306a36Sopenharmony_ci	rcu_read_unlock();
16462306a36Sopenharmony_ci}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci/* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
16762306a36Sopenharmony_ci * queue this additional data / FIN.
16862306a36Sopenharmony_ci */
16962306a36Sopenharmony_civoid tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
17462306a36Sopenharmony_ci		return;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	skb = skb_clone(skb, GFP_ATOMIC);
17762306a36Sopenharmony_ci	if (!skb)
17862306a36Sopenharmony_ci		return;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	skb_dst_drop(skb);
18162306a36Sopenharmony_ci	/* segs_in has been initialized to 1 in tcp_create_openreq_child().
18262306a36Sopenharmony_ci	 * Hence, reset segs_in to 0 before calling tcp_segs_in()
18362306a36Sopenharmony_ci	 * to avoid double counting.  Also, tcp_segs_in() expects
18462306a36Sopenharmony_ci	 * skb->len to include the tcp_hdrlen.  Hence, it should
18562306a36Sopenharmony_ci	 * be called before __skb_pull().
18662306a36Sopenharmony_ci	 */
18762306a36Sopenharmony_ci	tp->segs_in = 0;
18862306a36Sopenharmony_ci	tcp_segs_in(tp, skb);
18962306a36Sopenharmony_ci	__skb_pull(skb, tcp_hdrlen(skb));
19062306a36Sopenharmony_ci	sk_forced_mem_schedule(sk, skb->truesize);
19162306a36Sopenharmony_ci	skb_set_owner_r(skb, sk);
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	TCP_SKB_CB(skb)->seq++;
19462306a36Sopenharmony_ci	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
19762306a36Sopenharmony_ci	__skb_queue_tail(&sk->sk_receive_queue, skb);
19862306a36Sopenharmony_ci	tp->syn_data_acked = 1;
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	/* u64_stats_update_begin(&tp->syncp) not needed here,
20162306a36Sopenharmony_ci	 * as we certainly are not changing upper 32bit value (0)
20262306a36Sopenharmony_ci	 */
20362306a36Sopenharmony_ci	tp->bytes_received = skb->len;
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
20662306a36Sopenharmony_ci		tcp_fin(sk);
20762306a36Sopenharmony_ci}
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci/* returns 0 - no key match, 1 for primary, 2 for backup */
21062306a36Sopenharmony_cistatic int tcp_fastopen_cookie_gen_check(struct sock *sk,
21162306a36Sopenharmony_ci					 struct request_sock *req,
21262306a36Sopenharmony_ci					 struct sk_buff *syn,
21362306a36Sopenharmony_ci					 struct tcp_fastopen_cookie *orig,
21462306a36Sopenharmony_ci					 struct tcp_fastopen_cookie *valid_foc)
21562306a36Sopenharmony_ci{
21662306a36Sopenharmony_ci	struct tcp_fastopen_cookie search_foc = { .len = -1 };
21762306a36Sopenharmony_ci	struct tcp_fastopen_cookie *foc = valid_foc;
21862306a36Sopenharmony_ci	struct tcp_fastopen_context *ctx;
21962306a36Sopenharmony_ci	int i, ret = 0;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	rcu_read_lock();
22262306a36Sopenharmony_ci	ctx = tcp_fastopen_get_ctx(sk);
22362306a36Sopenharmony_ci	if (!ctx)
22462306a36Sopenharmony_ci		goto out;
22562306a36Sopenharmony_ci	for (i = 0; i < tcp_fastopen_context_len(ctx); i++) {
22662306a36Sopenharmony_ci		__tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[i], foc);
22762306a36Sopenharmony_ci		if (tcp_fastopen_cookie_match(foc, orig)) {
22862306a36Sopenharmony_ci			ret = i + 1;
22962306a36Sopenharmony_ci			goto out;
23062306a36Sopenharmony_ci		}
23162306a36Sopenharmony_ci		foc = &search_foc;
23262306a36Sopenharmony_ci	}
23362306a36Sopenharmony_ciout:
23462306a36Sopenharmony_ci	rcu_read_unlock();
23562306a36Sopenharmony_ci	return ret;
23662306a36Sopenharmony_ci}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_cistatic struct sock *tcp_fastopen_create_child(struct sock *sk,
23962306a36Sopenharmony_ci					      struct sk_buff *skb,
24062306a36Sopenharmony_ci					      struct request_sock *req)
24162306a36Sopenharmony_ci{
24262306a36Sopenharmony_ci	struct tcp_sock *tp;
24362306a36Sopenharmony_ci	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
24462306a36Sopenharmony_ci	struct sock *child;
24562306a36Sopenharmony_ci	bool own_req;
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
24862306a36Sopenharmony_ci							 NULL, &own_req);
24962306a36Sopenharmony_ci	if (!child)
25062306a36Sopenharmony_ci		return NULL;
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	spin_lock(&queue->fastopenq.lock);
25362306a36Sopenharmony_ci	queue->fastopenq.qlen++;
25462306a36Sopenharmony_ci	spin_unlock(&queue->fastopenq.lock);
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	/* Initialize the child socket. Have to fix some values to take
25762306a36Sopenharmony_ci	 * into account the child is a Fast Open socket and is created
25862306a36Sopenharmony_ci	 * only out of the bits carried in the SYN packet.
25962306a36Sopenharmony_ci	 */
26062306a36Sopenharmony_ci	tp = tcp_sk(child);
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	rcu_assign_pointer(tp->fastopen_rsk, req);
26362306a36Sopenharmony_ci	tcp_rsk(req)->tfo_listener = true;
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	/* RFC1323: The window in SYN & SYN/ACK segments is never
26662306a36Sopenharmony_ci	 * scaled. So correct it appropriately.
26762306a36Sopenharmony_ci	 */
26862306a36Sopenharmony_ci	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
26962306a36Sopenharmony_ci	tp->max_window = tp->snd_wnd;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	/* Activate the retrans timer so that SYNACK can be retransmitted.
27262306a36Sopenharmony_ci	 * The request socket is not added to the ehash
27362306a36Sopenharmony_ci	 * because it's been added to the accept queue directly.
27462306a36Sopenharmony_ci	 */
27562306a36Sopenharmony_ci	req->timeout = tcp_timeout_init(child);
27662306a36Sopenharmony_ci	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
27762306a36Sopenharmony_ci				  req->timeout, TCP_RTO_MAX);
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	refcount_set(&req->rsk_refcnt, 2);
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	/* Now finish processing the fastopen child socket. */
28262306a36Sopenharmony_ci	tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb);
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	tcp_fastopen_add_skb(child, skb);
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
28962306a36Sopenharmony_ci	tp->rcv_wup = tp->rcv_nxt;
29062306a36Sopenharmony_ci	/* tcp_conn_request() is sending the SYNACK,
29162306a36Sopenharmony_ci	 * and queues the child into listener accept queue.
29262306a36Sopenharmony_ci	 */
29362306a36Sopenharmony_ci	return child;
29462306a36Sopenharmony_ci}
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_cistatic bool tcp_fastopen_queue_check(struct sock *sk)
29762306a36Sopenharmony_ci{
29862306a36Sopenharmony_ci	struct fastopen_queue *fastopenq;
29962306a36Sopenharmony_ci	int max_qlen;
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	/* Make sure the listener has enabled fastopen, and we don't
30262306a36Sopenharmony_ci	 * exceed the max # of pending TFO requests allowed before trying
30362306a36Sopenharmony_ci	 * to validating the cookie in order to avoid burning CPU cycles
30462306a36Sopenharmony_ci	 * unnecessarily.
30562306a36Sopenharmony_ci	 *
30662306a36Sopenharmony_ci	 * XXX (TFO) - The implication of checking the max_qlen before
30762306a36Sopenharmony_ci	 * processing a cookie request is that clients can't differentiate
30862306a36Sopenharmony_ci	 * between qlen overflow causing Fast Open to be disabled
30962306a36Sopenharmony_ci	 * temporarily vs a server not supporting Fast Open at all.
31062306a36Sopenharmony_ci	 */
31162306a36Sopenharmony_ci	fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
31262306a36Sopenharmony_ci	max_qlen = READ_ONCE(fastopenq->max_qlen);
31362306a36Sopenharmony_ci	if (max_qlen == 0)
31462306a36Sopenharmony_ci		return false;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	if (fastopenq->qlen >= max_qlen) {
31762306a36Sopenharmony_ci		struct request_sock *req1;
31862306a36Sopenharmony_ci		spin_lock(&fastopenq->lock);
31962306a36Sopenharmony_ci		req1 = fastopenq->rskq_rst_head;
32062306a36Sopenharmony_ci		if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) {
32162306a36Sopenharmony_ci			__NET_INC_STATS(sock_net(sk),
32262306a36Sopenharmony_ci					LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
32362306a36Sopenharmony_ci			spin_unlock(&fastopenq->lock);
32462306a36Sopenharmony_ci			return false;
32562306a36Sopenharmony_ci		}
32662306a36Sopenharmony_ci		fastopenq->rskq_rst_head = req1->dl_next;
32762306a36Sopenharmony_ci		fastopenq->qlen--;
32862306a36Sopenharmony_ci		spin_unlock(&fastopenq->lock);
32962306a36Sopenharmony_ci		reqsk_put(req1);
33062306a36Sopenharmony_ci	}
33162306a36Sopenharmony_ci	return true;
33262306a36Sopenharmony_ci}
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_cistatic bool tcp_fastopen_no_cookie(const struct sock *sk,
33562306a36Sopenharmony_ci				   const struct dst_entry *dst,
33662306a36Sopenharmony_ci				   int flag)
33762306a36Sopenharmony_ci{
33862306a36Sopenharmony_ci	return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) ||
33962306a36Sopenharmony_ci	       tcp_sk(sk)->fastopen_no_cookie ||
34062306a36Sopenharmony_ci	       (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
34162306a36Sopenharmony_ci}
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
34462306a36Sopenharmony_ci * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
34562306a36Sopenharmony_ci * cookie request (foc->len == 0).
34662306a36Sopenharmony_ci */
34762306a36Sopenharmony_cistruct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
34862306a36Sopenharmony_ci			      struct request_sock *req,
34962306a36Sopenharmony_ci			      struct tcp_fastopen_cookie *foc,
35062306a36Sopenharmony_ci			      const struct dst_entry *dst)
35162306a36Sopenharmony_ci{
35262306a36Sopenharmony_ci	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
35362306a36Sopenharmony_ci	int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
35462306a36Sopenharmony_ci	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
35562306a36Sopenharmony_ci	struct sock *child;
35662306a36Sopenharmony_ci	int ret = 0;
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci	if (foc->len == 0) /* Client requests a cookie */
35962306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
36262306a36Sopenharmony_ci	      (syn_data || foc->len >= 0) &&
36362306a36Sopenharmony_ci	      tcp_fastopen_queue_check(sk))) {
36462306a36Sopenharmony_ci		foc->len = -1;
36562306a36Sopenharmony_ci		return NULL;
36662306a36Sopenharmony_ci	}
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
36962306a36Sopenharmony_ci		goto fastopen;
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	if (foc->len == 0) {
37262306a36Sopenharmony_ci		/* Client requests a cookie. */
37362306a36Sopenharmony_ci		tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
37462306a36Sopenharmony_ci	} else if (foc->len > 0) {
37562306a36Sopenharmony_ci		ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc,
37662306a36Sopenharmony_ci						    &valid_foc);
37762306a36Sopenharmony_ci		if (!ret) {
37862306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk),
37962306a36Sopenharmony_ci				      LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
38062306a36Sopenharmony_ci		} else {
38162306a36Sopenharmony_ci			/* Cookie is valid. Create a (full) child socket to
38262306a36Sopenharmony_ci			 * accept the data in SYN before returning a SYN-ACK to
38362306a36Sopenharmony_ci			 * ack the data. If we fail to create the socket, fall
38462306a36Sopenharmony_ci			 * back and ack the ISN only but includes the same
38562306a36Sopenharmony_ci			 * cookie.
38662306a36Sopenharmony_ci			 *
38762306a36Sopenharmony_ci			 * Note: Data-less SYN with valid cookie is allowed to
38862306a36Sopenharmony_ci			 * send data in SYN_RECV state.
38962306a36Sopenharmony_ci			 */
39062306a36Sopenharmony_cifastopen:
39162306a36Sopenharmony_ci			child = tcp_fastopen_create_child(sk, skb, req);
39262306a36Sopenharmony_ci			if (child) {
39362306a36Sopenharmony_ci				if (ret == 2) {
39462306a36Sopenharmony_ci					valid_foc.exp = foc->exp;
39562306a36Sopenharmony_ci					*foc = valid_foc;
39662306a36Sopenharmony_ci					NET_INC_STATS(sock_net(sk),
39762306a36Sopenharmony_ci						      LINUX_MIB_TCPFASTOPENPASSIVEALTKEY);
39862306a36Sopenharmony_ci				} else {
39962306a36Sopenharmony_ci					foc->len = -1;
40062306a36Sopenharmony_ci				}
40162306a36Sopenharmony_ci				NET_INC_STATS(sock_net(sk),
40262306a36Sopenharmony_ci					      LINUX_MIB_TCPFASTOPENPASSIVE);
40362306a36Sopenharmony_ci				return child;
40462306a36Sopenharmony_ci			}
40562306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk),
40662306a36Sopenharmony_ci				      LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
40762306a36Sopenharmony_ci		}
40862306a36Sopenharmony_ci	}
40962306a36Sopenharmony_ci	valid_foc.exp = foc->exp;
41062306a36Sopenharmony_ci	*foc = valid_foc;
41162306a36Sopenharmony_ci	return NULL;
41262306a36Sopenharmony_ci}
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_cibool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
41562306a36Sopenharmony_ci			       struct tcp_fastopen_cookie *cookie)
41662306a36Sopenharmony_ci{
41762306a36Sopenharmony_ci	const struct dst_entry *dst;
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	tcp_fastopen_cache_get(sk, mss, cookie);
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	/* Firewall blackhole issue check */
42262306a36Sopenharmony_ci	if (tcp_fastopen_active_should_disable(sk)) {
42362306a36Sopenharmony_ci		cookie->len = -1;
42462306a36Sopenharmony_ci		return false;
42562306a36Sopenharmony_ci	}
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci	dst = __sk_dst_get(sk);
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) {
43062306a36Sopenharmony_ci		cookie->len = -1;
43162306a36Sopenharmony_ci		return true;
43262306a36Sopenharmony_ci	}
43362306a36Sopenharmony_ci	if (cookie->len > 0)
43462306a36Sopenharmony_ci		return true;
43562306a36Sopenharmony_ci	tcp_sk(sk)->fastopen_client_fail = TFO_COOKIE_UNAVAILABLE;
43662306a36Sopenharmony_ci	return false;
43762306a36Sopenharmony_ci}
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci/* This function checks if we want to defer sending SYN until the first
44062306a36Sopenharmony_ci * write().  We defer under the following conditions:
44162306a36Sopenharmony_ci * 1. fastopen_connect sockopt is set
44262306a36Sopenharmony_ci * 2. we have a valid cookie
44362306a36Sopenharmony_ci * Return value: return true if we want to defer until application writes data
44462306a36Sopenharmony_ci *               return false if we want to send out SYN immediately
44562306a36Sopenharmony_ci */
44662306a36Sopenharmony_cibool tcp_fastopen_defer_connect(struct sock *sk, int *err)
44762306a36Sopenharmony_ci{
44862306a36Sopenharmony_ci	struct tcp_fastopen_cookie cookie = { .len = 0 };
44962306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
45062306a36Sopenharmony_ci	u16 mss;
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci	if (tp->fastopen_connect && !tp->fastopen_req) {
45362306a36Sopenharmony_ci		if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) {
45462306a36Sopenharmony_ci			inet_set_bit(DEFER_CONNECT, sk);
45562306a36Sopenharmony_ci			return true;
45662306a36Sopenharmony_ci		}
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci		/* Alloc fastopen_req in order for FO option to be included
45962306a36Sopenharmony_ci		 * in SYN
46062306a36Sopenharmony_ci		 */
46162306a36Sopenharmony_ci		tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req),
46262306a36Sopenharmony_ci					   sk->sk_allocation);
46362306a36Sopenharmony_ci		if (tp->fastopen_req)
46462306a36Sopenharmony_ci			tp->fastopen_req->cookie = cookie;
46562306a36Sopenharmony_ci		else
46662306a36Sopenharmony_ci			*err = -ENOBUFS;
46762306a36Sopenharmony_ci	}
46862306a36Sopenharmony_ci	return false;
46962306a36Sopenharmony_ci}
47062306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_fastopen_defer_connect);
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci/*
47362306a36Sopenharmony_ci * The following code block is to deal with middle box issues with TFO:
47462306a36Sopenharmony_ci * Middlebox firewall issues can potentially cause server's data being
47562306a36Sopenharmony_ci * blackholed after a successful 3WHS using TFO.
47662306a36Sopenharmony_ci * The proposed solution is to disable active TFO globally under the
47762306a36Sopenharmony_ci * following circumstances:
47862306a36Sopenharmony_ci *   1. client side TFO socket receives out of order FIN
47962306a36Sopenharmony_ci *   2. client side TFO socket receives out of order RST
48062306a36Sopenharmony_ci *   3. client side TFO socket has timed out three times consecutively during
48162306a36Sopenharmony_ci *      or after handshake
48262306a36Sopenharmony_ci * We disable active side TFO globally for 1hr at first. Then if it
48362306a36Sopenharmony_ci * happens again, we disable it for 2h, then 4h, 8h, ...
48462306a36Sopenharmony_ci * And we reset the timeout back to 1hr when we see a successful active
48562306a36Sopenharmony_ci * TFO connection with data exchanges.
48662306a36Sopenharmony_ci */
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci/* Disable active TFO and record current jiffies and
48962306a36Sopenharmony_ci * tfo_active_disable_times
49062306a36Sopenharmony_ci */
49162306a36Sopenharmony_civoid tcp_fastopen_active_disable(struct sock *sk)
49262306a36Sopenharmony_ci{
49362306a36Sopenharmony_ci	struct net *net = sock_net(sk);
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci	if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout))
49662306a36Sopenharmony_ci		return;
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci	/* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
49962306a36Sopenharmony_ci	WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies);
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	/* Paired with smp_rmb() in tcp_fastopen_active_should_disable().
50262306a36Sopenharmony_ci	 * We want net->ipv4.tfo_active_disable_stamp to be updated first.
50362306a36Sopenharmony_ci	 */
50462306a36Sopenharmony_ci	smp_mb__before_atomic();
50562306a36Sopenharmony_ci	atomic_inc(&net->ipv4.tfo_active_disable_times);
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
50862306a36Sopenharmony_ci}
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci/* Calculate timeout for tfo active disable
51162306a36Sopenharmony_ci * Return true if we are still in the active TFO disable period
51262306a36Sopenharmony_ci * Return false if timeout already expired and we should use active TFO
51362306a36Sopenharmony_ci */
51462306a36Sopenharmony_cibool tcp_fastopen_active_should_disable(struct sock *sk)
51562306a36Sopenharmony_ci{
51662306a36Sopenharmony_ci	unsigned int tfo_bh_timeout =
51762306a36Sopenharmony_ci		READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout);
51862306a36Sopenharmony_ci	unsigned long timeout;
51962306a36Sopenharmony_ci	int tfo_da_times;
52062306a36Sopenharmony_ci	int multiplier;
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	if (!tfo_bh_timeout)
52362306a36Sopenharmony_ci		return false;
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
52662306a36Sopenharmony_ci	if (!tfo_da_times)
52762306a36Sopenharmony_ci		return false;
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	/* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */
53062306a36Sopenharmony_ci	smp_rmb();
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	/* Limit timeout to max: 2^6 * initial timeout */
53362306a36Sopenharmony_ci	multiplier = 1 << min(tfo_da_times - 1, 6);
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci	/* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */
53662306a36Sopenharmony_ci	timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) +
53762306a36Sopenharmony_ci		  multiplier * tfo_bh_timeout * HZ;
53862306a36Sopenharmony_ci	if (time_before(jiffies, timeout))
53962306a36Sopenharmony_ci		return true;
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci	/* Mark check bit so we can check for successful active TFO
54262306a36Sopenharmony_ci	 * condition and reset tfo_active_disable_times
54362306a36Sopenharmony_ci	 */
54462306a36Sopenharmony_ci	tcp_sk(sk)->syn_fastopen_ch = 1;
54562306a36Sopenharmony_ci	return false;
54662306a36Sopenharmony_ci}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci/* Disable active TFO if FIN is the only packet in the ofo queue
54962306a36Sopenharmony_ci * and no data is received.
55062306a36Sopenharmony_ci * Also check if we can reset tfo_active_disable_times if data is
55162306a36Sopenharmony_ci * received successfully on a marked active TFO sockets opened on
55262306a36Sopenharmony_ci * a non-loopback interface
55362306a36Sopenharmony_ci */
55462306a36Sopenharmony_civoid tcp_fastopen_active_disable_ofo_check(struct sock *sk)
55562306a36Sopenharmony_ci{
55662306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
55762306a36Sopenharmony_ci	struct dst_entry *dst;
55862306a36Sopenharmony_ci	struct sk_buff *skb;
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	if (!tp->syn_fastopen)
56162306a36Sopenharmony_ci		return;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	if (!tp->data_segs_in) {
56462306a36Sopenharmony_ci		skb = skb_rb_first(&tp->out_of_order_queue);
56562306a36Sopenharmony_ci		if (skb && !skb_rb_next(skb)) {
56662306a36Sopenharmony_ci			if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
56762306a36Sopenharmony_ci				tcp_fastopen_active_disable(sk);
56862306a36Sopenharmony_ci				return;
56962306a36Sopenharmony_ci			}
57062306a36Sopenharmony_ci		}
57162306a36Sopenharmony_ci	} else if (tp->syn_fastopen_ch &&
57262306a36Sopenharmony_ci		   atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
57362306a36Sopenharmony_ci		dst = sk_dst_get(sk);
57462306a36Sopenharmony_ci		if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
57562306a36Sopenharmony_ci			atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
57662306a36Sopenharmony_ci		dst_release(dst);
57762306a36Sopenharmony_ci	}
57862306a36Sopenharmony_ci}
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_civoid tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired)
58162306a36Sopenharmony_ci{
58262306a36Sopenharmony_ci	u32 timeouts = inet_csk(sk)->icsk_retransmits;
58362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci	/* Broken middle-boxes may black-hole Fast Open connection during or
58662306a36Sopenharmony_ci	 * even after the handshake. Be extremely conservative and pause
58762306a36Sopenharmony_ci	 * Fast Open globally after hitting the third consecutive timeout or
58862306a36Sopenharmony_ci	 * exceeding the configured timeout limit.
58962306a36Sopenharmony_ci	 */
59062306a36Sopenharmony_ci	if ((tp->syn_fastopen || tp->syn_data || tp->syn_data_acked) &&
59162306a36Sopenharmony_ci	    (timeouts == 2 || (timeouts < 2 && expired))) {
59262306a36Sopenharmony_ci		tcp_fastopen_active_disable(sk);
59362306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
59462306a36Sopenharmony_ci	}
59562306a36Sopenharmony_ci}
596