162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * INET		An implementation of the TCP/IP protocol suite for the LINUX
462306a36Sopenharmony_ci *		operating system.  INET is implemented using the  BSD Socket
562306a36Sopenharmony_ci *		interface as the means of communication with the user level.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci *		Implementation of the Transmission Control Protocol(TCP).
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * Authors:	Ross Biro
1062306a36Sopenharmony_ci *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
1162306a36Sopenharmony_ci *		Mark Evans, <evansmp@uhura.aston.ac.uk>
1262306a36Sopenharmony_ci *		Corey Minyard <wf-rch!minyard@relay.EU.net>
1362306a36Sopenharmony_ci *		Florian La Roche, <flla@stud.uni-sb.de>
1462306a36Sopenharmony_ci *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
1562306a36Sopenharmony_ci *		Linus Torvalds, <torvalds@cs.helsinki.fi>
1662306a36Sopenharmony_ci *		Alan Cox, <gw4pts@gw4pts.ampr.org>
1762306a36Sopenharmony_ci *		Matthew Dillon, <dillon@apollo.west.oic.com>
1862306a36Sopenharmony_ci *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
1962306a36Sopenharmony_ci *		Jorge Cwik, <jorge@laser.satlink.net>
2062306a36Sopenharmony_ci *
2162306a36Sopenharmony_ci * Fixes:
2262306a36Sopenharmony_ci *		Alan Cox	:	Numerous verify_area() calls
2362306a36Sopenharmony_ci *		Alan Cox	:	Set the ACK bit on a reset
2462306a36Sopenharmony_ci *		Alan Cox	:	Stopped it crashing if it closed while
2562306a36Sopenharmony_ci *					sk->inuse=1 and was trying to connect
2662306a36Sopenharmony_ci *					(tcp_err()).
2762306a36Sopenharmony_ci *		Alan Cox	:	All icmp error handling was broken
2862306a36Sopenharmony_ci *					pointers passed where wrong and the
2962306a36Sopenharmony_ci *					socket was looked up backwards. Nobody
3062306a36Sopenharmony_ci *					tested any icmp error code obviously.
3162306a36Sopenharmony_ci *		Alan Cox	:	tcp_err() now handled properly. It
3262306a36Sopenharmony_ci *					wakes people on errors. poll
3362306a36Sopenharmony_ci *					behaves and the icmp error race
3462306a36Sopenharmony_ci *					has gone by moving it into sock.c
3562306a36Sopenharmony_ci *		Alan Cox	:	tcp_send_reset() fixed to work for
3662306a36Sopenharmony_ci *					everything not just packets for
3762306a36Sopenharmony_ci *					unknown sockets.
3862306a36Sopenharmony_ci *		Alan Cox	:	tcp option processing.
3962306a36Sopenharmony_ci *		Alan Cox	:	Reset tweaked (still not 100%) [Had
4062306a36Sopenharmony_ci *					syn rule wrong]
4162306a36Sopenharmony_ci *		Herp Rosmanith  :	More reset fixes
4262306a36Sopenharmony_ci *		Alan Cox	:	No longer acks invalid rst frames.
4362306a36Sopenharmony_ci *					Acking any kind of RST is right out.
4462306a36Sopenharmony_ci *		Alan Cox	:	Sets an ignore me flag on an rst
4562306a36Sopenharmony_ci *					receive otherwise odd bits of prattle
4662306a36Sopenharmony_ci *					escape still
4762306a36Sopenharmony_ci *		Alan Cox	:	Fixed another acking RST frame bug.
4862306a36Sopenharmony_ci *					Should stop LAN workplace lockups.
4962306a36Sopenharmony_ci *		Alan Cox	: 	Some tidyups using the new skb list
5062306a36Sopenharmony_ci *					facilities
5162306a36Sopenharmony_ci *		Alan Cox	:	sk->keepopen now seems to work
5262306a36Sopenharmony_ci *		Alan Cox	:	Pulls options out correctly on accepts
5362306a36Sopenharmony_ci *		Alan Cox	:	Fixed assorted sk->rqueue->next errors
5462306a36Sopenharmony_ci *		Alan Cox	:	PSH doesn't end a TCP read. Switched a
5562306a36Sopenharmony_ci *					bit to skb ops.
5662306a36Sopenharmony_ci *		Alan Cox	:	Tidied tcp_data to avoid a potential
5762306a36Sopenharmony_ci *					nasty.
5862306a36Sopenharmony_ci *		Alan Cox	:	Added some better commenting, as the
5962306a36Sopenharmony_ci *					tcp is hard to follow
6062306a36Sopenharmony_ci *		Alan Cox	:	Removed incorrect check for 20 * psh
6162306a36Sopenharmony_ci *	Michael O'Reilly	:	ack < copied bug fix.
6262306a36Sopenharmony_ci *	Johannes Stille		:	Misc tcp fixes (not all in yet).
6362306a36Sopenharmony_ci *		Alan Cox	:	FIN with no memory -> CRASH
6462306a36Sopenharmony_ci *		Alan Cox	:	Added socket option proto entries.
6562306a36Sopenharmony_ci *					Also added awareness of them to accept.
6662306a36Sopenharmony_ci *		Alan Cox	:	Added TCP options (SOL_TCP)
6762306a36Sopenharmony_ci *		Alan Cox	:	Switched wakeup calls to callbacks,
6862306a36Sopenharmony_ci *					so the kernel can layer network
6962306a36Sopenharmony_ci *					sockets.
7062306a36Sopenharmony_ci *		Alan Cox	:	Use ip_tos/ip_ttl settings.
7162306a36Sopenharmony_ci *		Alan Cox	:	Handle FIN (more) properly (we hope).
7262306a36Sopenharmony_ci *		Alan Cox	:	RST frames sent on unsynchronised
7362306a36Sopenharmony_ci *					state ack error.
7462306a36Sopenharmony_ci *		Alan Cox	:	Put in missing check for SYN bit.
7562306a36Sopenharmony_ci *		Alan Cox	:	Added tcp_select_window() aka NET2E
7662306a36Sopenharmony_ci *					window non shrink trick.
7762306a36Sopenharmony_ci *		Alan Cox	:	Added a couple of small NET2E timer
7862306a36Sopenharmony_ci *					fixes
7962306a36Sopenharmony_ci *		Charles Hedrick :	TCP fixes
8062306a36Sopenharmony_ci *		Toomas Tamm	:	TCP window fixes
8162306a36Sopenharmony_ci *		Alan Cox	:	Small URG fix to rlogin ^C ack fight
8262306a36Sopenharmony_ci *		Charles Hedrick	:	Rewrote most of it to actually work
8362306a36Sopenharmony_ci *		Linus		:	Rewrote tcp_read() and URG handling
8462306a36Sopenharmony_ci *					completely
8562306a36Sopenharmony_ci *		Gerhard Koerting:	Fixed some missing timer handling
8662306a36Sopenharmony_ci *		Matthew Dillon  :	Reworked TCP machine states as per RFC
8762306a36Sopenharmony_ci *		Gerhard Koerting:	PC/TCP workarounds
8862306a36Sopenharmony_ci *		Adam Caldwell	:	Assorted timer/timing errors
8962306a36Sopenharmony_ci *		Matthew Dillon	:	Fixed another RST bug
9062306a36Sopenharmony_ci *		Alan Cox	:	Move to kernel side addressing changes.
9162306a36Sopenharmony_ci *		Alan Cox	:	Beginning work on TCP fastpathing
9262306a36Sopenharmony_ci *					(not yet usable)
9362306a36Sopenharmony_ci *		Arnt Gulbrandsen:	Turbocharged tcp_check() routine.
9462306a36Sopenharmony_ci *		Alan Cox	:	TCP fast path debugging
9562306a36Sopenharmony_ci *		Alan Cox	:	Window clamping
9662306a36Sopenharmony_ci *		Michael Riepe	:	Bug in tcp_check()
9762306a36Sopenharmony_ci *		Matt Dillon	:	More TCP improvements and RST bug fixes
9862306a36Sopenharmony_ci *		Matt Dillon	:	Yet more small nasties remove from the
9962306a36Sopenharmony_ci *					TCP code (Be very nice to this man if
10062306a36Sopenharmony_ci *					tcp finally works 100%) 8)
10162306a36Sopenharmony_ci *		Alan Cox	:	BSD accept semantics.
10262306a36Sopenharmony_ci *		Alan Cox	:	Reset on closedown bug.
10362306a36Sopenharmony_ci *	Peter De Schrijver	:	ENOTCONN check missing in tcp_sendto().
10462306a36Sopenharmony_ci *		Michael Pall	:	Handle poll() after URG properly in
10562306a36Sopenharmony_ci *					all cases.
10662306a36Sopenharmony_ci *		Michael Pall	:	Undo the last fix in tcp_read_urg()
10762306a36Sopenharmony_ci *					(multi URG PUSH broke rlogin).
10862306a36Sopenharmony_ci *		Michael Pall	:	Fix the multi URG PUSH problem in
10962306a36Sopenharmony_ci *					tcp_readable(), poll() after URG
11062306a36Sopenharmony_ci *					works now.
11162306a36Sopenharmony_ci *		Michael Pall	:	recv(...,MSG_OOB) never blocks in the
11262306a36Sopenharmony_ci *					BSD api.
11362306a36Sopenharmony_ci *		Alan Cox	:	Changed the semantics of sk->socket to
11462306a36Sopenharmony_ci *					fix a race and a signal problem with
11562306a36Sopenharmony_ci *					accept() and async I/O.
11662306a36Sopenharmony_ci *		Alan Cox	:	Relaxed the rules on tcp_sendto().
11762306a36Sopenharmony_ci *		Yury Shevchuk	:	Really fixed accept() blocking problem.
11862306a36Sopenharmony_ci *		Craig I. Hagan  :	Allow for BSD compatible TIME_WAIT for
11962306a36Sopenharmony_ci *					clients/servers which listen in on
12062306a36Sopenharmony_ci *					fixed ports.
12162306a36Sopenharmony_ci *		Alan Cox	:	Cleaned the above up and shrank it to
12262306a36Sopenharmony_ci *					a sensible code size.
12362306a36Sopenharmony_ci *		Alan Cox	:	Self connect lockup fix.
12462306a36Sopenharmony_ci *		Alan Cox	:	No connect to multicast.
12562306a36Sopenharmony_ci *		Ross Biro	:	Close unaccepted children on master
12662306a36Sopenharmony_ci *					socket close.
12762306a36Sopenharmony_ci *		Alan Cox	:	Reset tracing code.
12862306a36Sopenharmony_ci *		Alan Cox	:	Spurious resets on shutdown.
12962306a36Sopenharmony_ci *		Alan Cox	:	Giant 15 minute/60 second timer error
13062306a36Sopenharmony_ci *		Alan Cox	:	Small whoops in polling before an
13162306a36Sopenharmony_ci *					accept.
13262306a36Sopenharmony_ci *		Alan Cox	:	Kept the state trace facility since
13362306a36Sopenharmony_ci *					it's handy for debugging.
13462306a36Sopenharmony_ci *		Alan Cox	:	More reset handler fixes.
13562306a36Sopenharmony_ci *		Alan Cox	:	Started rewriting the code based on
13662306a36Sopenharmony_ci *					the RFC's for other useful protocol
13762306a36Sopenharmony_ci *					references see: Comer, KA9Q NOS, and
13862306a36Sopenharmony_ci *					for a reference on the difference
13962306a36Sopenharmony_ci *					between specifications and how BSD
14062306a36Sopenharmony_ci *					works see the 4.4lite source.
14162306a36Sopenharmony_ci *		A.N.Kuznetsov	:	Don't time wait on completion of tidy
14262306a36Sopenharmony_ci *					close.
14362306a36Sopenharmony_ci *		Linus Torvalds	:	Fin/Shutdown & copied_seq changes.
14462306a36Sopenharmony_ci *		Linus Torvalds	:	Fixed BSD port reuse to work first syn
14562306a36Sopenharmony_ci *		Alan Cox	:	Reimplemented timers as per the RFC
14662306a36Sopenharmony_ci *					and using multiple timers for sanity.
14762306a36Sopenharmony_ci *		Alan Cox	:	Small bug fixes, and a lot of new
14862306a36Sopenharmony_ci *					comments.
14962306a36Sopenharmony_ci *		Alan Cox	:	Fixed dual reader crash by locking
15062306a36Sopenharmony_ci *					the buffers (much like datagram.c)
15162306a36Sopenharmony_ci *		Alan Cox	:	Fixed stuck sockets in probe. A probe
15262306a36Sopenharmony_ci *					now gets fed up of retrying without
15362306a36Sopenharmony_ci *					(even a no space) answer.
15462306a36Sopenharmony_ci *		Alan Cox	:	Extracted closing code better
15562306a36Sopenharmony_ci *		Alan Cox	:	Fixed the closing state machine to
15662306a36Sopenharmony_ci *					resemble the RFC.
15762306a36Sopenharmony_ci *		Alan Cox	:	More 'per spec' fixes.
15862306a36Sopenharmony_ci *		Jorge Cwik	:	Even faster checksumming.
15962306a36Sopenharmony_ci *		Alan Cox	:	tcp_data() doesn't ack illegal PSH
16062306a36Sopenharmony_ci *					only frames. At least one pc tcp stack
16162306a36Sopenharmony_ci *					generates them.
16262306a36Sopenharmony_ci *		Alan Cox	:	Cache last socket.
16362306a36Sopenharmony_ci *		Alan Cox	:	Per route irtt.
16462306a36Sopenharmony_ci *		Matt Day	:	poll()->select() match BSD precisely on error
16562306a36Sopenharmony_ci *		Alan Cox	:	New buffers
16662306a36Sopenharmony_ci *		Marc Tamsky	:	Various sk->prot->retransmits and
16762306a36Sopenharmony_ci *					sk->retransmits misupdating fixed.
16862306a36Sopenharmony_ci *					Fixed tcp_write_timeout: stuck close,
16962306a36Sopenharmony_ci *					and TCP syn retries gets used now.
17062306a36Sopenharmony_ci *		Mark Yarvis	:	In tcp_read_wakeup(), don't send an
17162306a36Sopenharmony_ci *					ack if state is TCP_CLOSED.
17262306a36Sopenharmony_ci *		Alan Cox	:	Look up device on a retransmit - routes may
17362306a36Sopenharmony_ci *					change. Doesn't yet cope with MSS shrink right
17462306a36Sopenharmony_ci *					but it's a start!
17562306a36Sopenharmony_ci *		Marc Tamsky	:	Closing in closing fixes.
17662306a36Sopenharmony_ci *		Mike Shaver	:	RFC1122 verifications.
17762306a36Sopenharmony_ci *		Alan Cox	:	rcv_saddr errors.
17862306a36Sopenharmony_ci *		Alan Cox	:	Block double connect().
17962306a36Sopenharmony_ci *		Alan Cox	:	Small hooks for enSKIP.
18062306a36Sopenharmony_ci *		Alexey Kuznetsov:	Path MTU discovery.
18162306a36Sopenharmony_ci *		Alan Cox	:	Support soft errors.
18262306a36Sopenharmony_ci *		Alan Cox	:	Fix MTU discovery pathological case
18362306a36Sopenharmony_ci *					when the remote claims no mtu!
18462306a36Sopenharmony_ci *		Marc Tamsky	:	TCP_CLOSE fix.
18562306a36Sopenharmony_ci *		Colin (G3TNE)	:	Send a reset on syn ack replies in
18662306a36Sopenharmony_ci *					window but wrong (fixes NT lpd problems)
18762306a36Sopenharmony_ci *		Pedro Roque	:	Better TCP window handling, delayed ack.
18862306a36Sopenharmony_ci *		Joerg Reuter	:	No modification of locked buffers in
18962306a36Sopenharmony_ci *					tcp_do_retransmit()
19062306a36Sopenharmony_ci *		Eric Schenk	:	Changed receiver side silly window
19162306a36Sopenharmony_ci *					avoidance algorithm to BSD style
19262306a36Sopenharmony_ci *					algorithm. This doubles throughput
19362306a36Sopenharmony_ci *					against machines running Solaris,
19462306a36Sopenharmony_ci *					and seems to result in general
19562306a36Sopenharmony_ci *					improvement.
19662306a36Sopenharmony_ci *	Stefan Magdalinski	:	adjusted tcp_readable() to fix FIONREAD
19762306a36Sopenharmony_ci *	Willy Konynenberg	:	Transparent proxying support.
19862306a36Sopenharmony_ci *	Mike McLagan		:	Routing by source
19962306a36Sopenharmony_ci *		Keith Owens	:	Do proper merging with partial SKB's in
20062306a36Sopenharmony_ci *					tcp_do_sendmsg to avoid burstiness.
20162306a36Sopenharmony_ci *		Eric Schenk	:	Fix fast close down bug with
20262306a36Sopenharmony_ci *					shutdown() followed by close().
20362306a36Sopenharmony_ci *		Andi Kleen 	:	Make poll agree with SIGIO
20462306a36Sopenharmony_ci *	Salvatore Sanfilippo	:	Support SO_LINGER with linger == 1 and
20562306a36Sopenharmony_ci *					lingertime == 0 (RFC 793 ABORT Call)
20662306a36Sopenharmony_ci *	Hirokazu Takahashi	:	Use copy_from_user() instead of
20762306a36Sopenharmony_ci *					csum_and_copy_from_user() if possible.
20862306a36Sopenharmony_ci *
20962306a36Sopenharmony_ci * Description of States:
21062306a36Sopenharmony_ci *
21162306a36Sopenharmony_ci *	TCP_SYN_SENT		sent a connection request, waiting for ack
21262306a36Sopenharmony_ci *
21362306a36Sopenharmony_ci *	TCP_SYN_RECV		received a connection request, sent ack,
21462306a36Sopenharmony_ci *				waiting for final ack in three-way handshake.
21562306a36Sopenharmony_ci *
21662306a36Sopenharmony_ci *	TCP_ESTABLISHED		connection established
21762306a36Sopenharmony_ci *
21862306a36Sopenharmony_ci *	TCP_FIN_WAIT1		our side has shutdown, waiting to complete
21962306a36Sopenharmony_ci *				transmission of remaining buffered data
22062306a36Sopenharmony_ci *
22162306a36Sopenharmony_ci *	TCP_FIN_WAIT2		all buffered data sent, waiting for remote
22262306a36Sopenharmony_ci *				to shutdown
22362306a36Sopenharmony_ci *
22462306a36Sopenharmony_ci *	TCP_CLOSING		both sides have shutdown but we still have
22562306a36Sopenharmony_ci *				data we have to finish sending
22662306a36Sopenharmony_ci *
22762306a36Sopenharmony_ci *	TCP_TIME_WAIT		timeout to catch resent junk before entering
22862306a36Sopenharmony_ci *				closed, can only be entered from FIN_WAIT2
22962306a36Sopenharmony_ci *				or CLOSING.  Required because the other end
23062306a36Sopenharmony_ci *				may not have gotten our last ACK causing it
23162306a36Sopenharmony_ci *				to retransmit the data packet (which we ignore)
23262306a36Sopenharmony_ci *
23362306a36Sopenharmony_ci *	TCP_CLOSE_WAIT		remote side has shutdown and is waiting for
23462306a36Sopenharmony_ci *				us to finish writing our data and to shutdown
23562306a36Sopenharmony_ci *				(we have to close() to move on to LAST_ACK)
23662306a36Sopenharmony_ci *
23762306a36Sopenharmony_ci *	TCP_LAST_ACK		out side has shutdown after remote has
23862306a36Sopenharmony_ci *				shutdown.  There may still be data in our
23962306a36Sopenharmony_ci *				buffer that we have to finish sending
24062306a36Sopenharmony_ci *
24162306a36Sopenharmony_ci *	TCP_CLOSE		socket is finished
24262306a36Sopenharmony_ci */
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci#define pr_fmt(fmt) "TCP: " fmt
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci#include <crypto/hash.h>
24762306a36Sopenharmony_ci#include <linux/kernel.h>
24862306a36Sopenharmony_ci#include <linux/module.h>
24962306a36Sopenharmony_ci#include <linux/types.h>
25062306a36Sopenharmony_ci#include <linux/fcntl.h>
25162306a36Sopenharmony_ci#include <linux/poll.h>
25262306a36Sopenharmony_ci#include <linux/inet_diag.h>
25362306a36Sopenharmony_ci#include <linux/init.h>
25462306a36Sopenharmony_ci#include <linux/fs.h>
25562306a36Sopenharmony_ci#include <linux/skbuff.h>
25662306a36Sopenharmony_ci#include <linux/scatterlist.h>
25762306a36Sopenharmony_ci#include <linux/splice.h>
25862306a36Sopenharmony_ci#include <linux/net.h>
25962306a36Sopenharmony_ci#include <linux/socket.h>
26062306a36Sopenharmony_ci#include <linux/random.h>
26162306a36Sopenharmony_ci#include <linux/memblock.h>
26262306a36Sopenharmony_ci#include <linux/highmem.h>
26362306a36Sopenharmony_ci#include <linux/cache.h>
26462306a36Sopenharmony_ci#include <linux/err.h>
26562306a36Sopenharmony_ci#include <linux/time.h>
26662306a36Sopenharmony_ci#include <linux/slab.h>
26762306a36Sopenharmony_ci#include <linux/errqueue.h>
26862306a36Sopenharmony_ci#include <linux/static_key.h>
26962306a36Sopenharmony_ci#include <linux/btf.h>
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci#include <net/icmp.h>
27262306a36Sopenharmony_ci#include <net/inet_common.h>
27362306a36Sopenharmony_ci#include <net/tcp.h>
27462306a36Sopenharmony_ci#include <net/mptcp.h>
27562306a36Sopenharmony_ci#include <net/xfrm.h>
27662306a36Sopenharmony_ci#include <net/ip.h>
27762306a36Sopenharmony_ci#include <net/sock.h>
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci#include <linux/uaccess.h>
28062306a36Sopenharmony_ci#include <asm/ioctls.h>
28162306a36Sopenharmony_ci#include <net/busy_poll.h>
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci/* Track pending CMSGs. */
28462306a36Sopenharmony_cienum {
28562306a36Sopenharmony_ci	TCP_CMSG_INQ = 1,
28662306a36Sopenharmony_ci	TCP_CMSG_TS = 2
28762306a36Sopenharmony_ci};
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ciDEFINE_PER_CPU(unsigned int, tcp_orphan_count);
29062306a36Sopenharmony_ciEXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_cilong sysctl_tcp_mem[3] __read_mostly;
29362306a36Sopenharmony_ciEXPORT_SYMBOL(sysctl_tcp_mem);
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ciatomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp;	/* Current allocated memory. */
29662306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_memory_allocated);
29762306a36Sopenharmony_ciDEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
29862306a36Sopenharmony_ciEXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc);
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_SMC)
30162306a36Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(tcp_have_smc);
30262306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_have_smc);
30362306a36Sopenharmony_ci#endif
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci/*
30662306a36Sopenharmony_ci * Current number of TCP sockets.
30762306a36Sopenharmony_ci */
30862306a36Sopenharmony_cistruct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp;
30962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_sockets_allocated);
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci/*
31262306a36Sopenharmony_ci * TCP splice context
31362306a36Sopenharmony_ci */
31462306a36Sopenharmony_cistruct tcp_splice_state {
31562306a36Sopenharmony_ci	struct pipe_inode_info *pipe;
31662306a36Sopenharmony_ci	size_t len;
31762306a36Sopenharmony_ci	unsigned int flags;
31862306a36Sopenharmony_ci};
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci/*
32162306a36Sopenharmony_ci * Pressure flag: try to collapse.
32262306a36Sopenharmony_ci * Technical note: it is used by multiple contexts non atomically.
32362306a36Sopenharmony_ci * All the __sk_mem_schedule() is of this nature: accounting
32462306a36Sopenharmony_ci * is strict, actions are advisory and have some latency.
32562306a36Sopenharmony_ci */
32662306a36Sopenharmony_ciunsigned long tcp_memory_pressure __read_mostly;
32762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_memory_pressure);
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_civoid tcp_enter_memory_pressure(struct sock *sk)
33062306a36Sopenharmony_ci{
33162306a36Sopenharmony_ci	unsigned long val;
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	if (READ_ONCE(tcp_memory_pressure))
33462306a36Sopenharmony_ci		return;
33562306a36Sopenharmony_ci	val = jiffies;
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	if (!val)
33862306a36Sopenharmony_ci		val--;
33962306a36Sopenharmony_ci	if (!cmpxchg(&tcp_memory_pressure, 0, val))
34062306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
34162306a36Sopenharmony_ci}
34262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_enter_memory_pressure);
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_civoid tcp_leave_memory_pressure(struct sock *sk)
34562306a36Sopenharmony_ci{
34662306a36Sopenharmony_ci	unsigned long val;
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	if (!READ_ONCE(tcp_memory_pressure))
34962306a36Sopenharmony_ci		return;
35062306a36Sopenharmony_ci	val = xchg(&tcp_memory_pressure, 0);
35162306a36Sopenharmony_ci	if (val)
35262306a36Sopenharmony_ci		NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO,
35362306a36Sopenharmony_ci			      jiffies_to_msecs(jiffies - val));
35462306a36Sopenharmony_ci}
35562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_leave_memory_pressure);
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci/* Convert seconds to retransmits based on initial and max timeout */
35862306a36Sopenharmony_cistatic u8 secs_to_retrans(int seconds, int timeout, int rto_max)
35962306a36Sopenharmony_ci{
36062306a36Sopenharmony_ci	u8 res = 0;
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	if (seconds > 0) {
36362306a36Sopenharmony_ci		int period = timeout;
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci		res = 1;
36662306a36Sopenharmony_ci		while (seconds > period && res < 255) {
36762306a36Sopenharmony_ci			res++;
36862306a36Sopenharmony_ci			timeout <<= 1;
36962306a36Sopenharmony_ci			if (timeout > rto_max)
37062306a36Sopenharmony_ci				timeout = rto_max;
37162306a36Sopenharmony_ci			period += timeout;
37262306a36Sopenharmony_ci		}
37362306a36Sopenharmony_ci	}
37462306a36Sopenharmony_ci	return res;
37562306a36Sopenharmony_ci}
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci/* Convert retransmits to seconds based on initial and max timeout */
37862306a36Sopenharmony_cistatic int retrans_to_secs(u8 retrans, int timeout, int rto_max)
37962306a36Sopenharmony_ci{
38062306a36Sopenharmony_ci	int period = 0;
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci	if (retrans > 0) {
38362306a36Sopenharmony_ci		period = timeout;
38462306a36Sopenharmony_ci		while (--retrans) {
38562306a36Sopenharmony_ci			timeout <<= 1;
38662306a36Sopenharmony_ci			if (timeout > rto_max)
38762306a36Sopenharmony_ci				timeout = rto_max;
38862306a36Sopenharmony_ci			period += timeout;
38962306a36Sopenharmony_ci		}
39062306a36Sopenharmony_ci	}
39162306a36Sopenharmony_ci	return period;
39262306a36Sopenharmony_ci}
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_cistatic u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
39562306a36Sopenharmony_ci{
39662306a36Sopenharmony_ci	u32 rate = READ_ONCE(tp->rate_delivered);
39762306a36Sopenharmony_ci	u32 intv = READ_ONCE(tp->rate_interval_us);
39862306a36Sopenharmony_ci	u64 rate64 = 0;
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	if (rate && intv) {
40162306a36Sopenharmony_ci		rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
40262306a36Sopenharmony_ci		do_div(rate64, intv);
40362306a36Sopenharmony_ci	}
40462306a36Sopenharmony_ci	return rate64;
40562306a36Sopenharmony_ci}
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci/* Address-family independent initialization for a tcp_sock.
40862306a36Sopenharmony_ci *
40962306a36Sopenharmony_ci * NOTE: A lot of things set to zero explicitly by call to
41062306a36Sopenharmony_ci *       sk_alloc() so need not be done here.
41162306a36Sopenharmony_ci */
41262306a36Sopenharmony_civoid tcp_init_sock(struct sock *sk)
41362306a36Sopenharmony_ci{
41462306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
41562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	tp->out_of_order_queue = RB_ROOT;
41862306a36Sopenharmony_ci	sk->tcp_rtx_queue = RB_ROOT;
41962306a36Sopenharmony_ci	tcp_init_xmit_timers(sk);
42062306a36Sopenharmony_ci	INIT_LIST_HEAD(&tp->tsq_node);
42162306a36Sopenharmony_ci	INIT_LIST_HEAD(&tp->tsorted_sent_queue);
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	icsk->icsk_rto = TCP_TIMEOUT_INIT;
42462306a36Sopenharmony_ci	icsk->icsk_rto_min = TCP_RTO_MIN;
42562306a36Sopenharmony_ci	icsk->icsk_delack_max = TCP_DELACK_MAX;
42662306a36Sopenharmony_ci	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
42762306a36Sopenharmony_ci	minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	/* So many TCP implementations out there (incorrectly) count the
43062306a36Sopenharmony_ci	 * initial SYN frame in their delayed-ACK and congestion control
43162306a36Sopenharmony_ci	 * algorithms that we must have the following bandaid to talk
43262306a36Sopenharmony_ci	 * efficiently to them.  -DaveM
43362306a36Sopenharmony_ci	 */
43462306a36Sopenharmony_ci	tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	/* There's a bubble in the pipe until at least the first ACK. */
43762306a36Sopenharmony_ci	tp->app_limited = ~0U;
43862306a36Sopenharmony_ci	tp->rate_app_limited = 1;
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci	/* See draft-stevens-tcpca-spec-01 for discussion of the
44162306a36Sopenharmony_ci	 * initialization of these values.
44262306a36Sopenharmony_ci	 */
44362306a36Sopenharmony_ci	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
44462306a36Sopenharmony_ci	tp->snd_cwnd_clamp = ~0;
44562306a36Sopenharmony_ci	tp->mss_cache = TCP_MSS_DEFAULT;
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci	tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
44862306a36Sopenharmony_ci	tcp_assign_congestion_control(sk);
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci	tp->tsoffset = 0;
45162306a36Sopenharmony_ci	tp->rack.reo_wnd_steps = 1;
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci	sk->sk_write_space = sk_stream_write_space;
45462306a36Sopenharmony_ci	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	icsk->icsk_sync_mss = tcp_sync_mss;
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
45962306a36Sopenharmony_ci	WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
46062306a36Sopenharmony_ci	tcp_scaling_ratio_init(sk);
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
46362306a36Sopenharmony_ci	sk_sockets_allocated_inc(sk);
46462306a36Sopenharmony_ci}
46562306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_init_sock);
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_cistatic void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
46862306a36Sopenharmony_ci{
46962306a36Sopenharmony_ci	struct sk_buff *skb = tcp_write_queue_tail(sk);
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci	if (tsflags && skb) {
47262306a36Sopenharmony_ci		struct skb_shared_info *shinfo = skb_shinfo(skb);
47362306a36Sopenharmony_ci		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci		sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags);
47662306a36Sopenharmony_ci		if (tsflags & SOF_TIMESTAMPING_TX_ACK)
47762306a36Sopenharmony_ci			tcb->txstamp_ack = 1;
47862306a36Sopenharmony_ci		if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
47962306a36Sopenharmony_ci			shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
48062306a36Sopenharmony_ci	}
48162306a36Sopenharmony_ci}
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_cistatic bool tcp_stream_is_readable(struct sock *sk, int target)
48462306a36Sopenharmony_ci{
48562306a36Sopenharmony_ci	if (tcp_epollin_ready(sk, target))
48662306a36Sopenharmony_ci		return true;
48762306a36Sopenharmony_ci	return sk_is_readable(sk);
48862306a36Sopenharmony_ci}
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci/*
49162306a36Sopenharmony_ci *	Wait for a TCP event.
49262306a36Sopenharmony_ci *
49362306a36Sopenharmony_ci *	Note that we don't need to lock the socket, as the upper poll layers
49462306a36Sopenharmony_ci *	take care of normal races (between the test and the event) and we don't
49562306a36Sopenharmony_ci *	go look at any of the socket buffers directly.
49662306a36Sopenharmony_ci */
49762306a36Sopenharmony_ci__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
49862306a36Sopenharmony_ci{
49962306a36Sopenharmony_ci	__poll_t mask;
50062306a36Sopenharmony_ci	struct sock *sk = sock->sk;
50162306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
50262306a36Sopenharmony_ci	u8 shutdown;
50362306a36Sopenharmony_ci	int state;
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci	sock_poll_wait(file, sock, wait);
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	state = inet_sk_state_load(sk);
50862306a36Sopenharmony_ci	if (state == TCP_LISTEN)
50962306a36Sopenharmony_ci		return inet_csk_listen_poll(sk);
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci	/* Socket is not locked. We are protected from async events
51262306a36Sopenharmony_ci	 * by poll logic and correct handling of state changes
51362306a36Sopenharmony_ci	 * made by other threads is impossible in any case.
51462306a36Sopenharmony_ci	 */
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ci	mask = 0;
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci	/*
51962306a36Sopenharmony_ci	 * EPOLLHUP is certainly not done right. But poll() doesn't
52062306a36Sopenharmony_ci	 * have a notion of HUP in just one direction, and for a
52162306a36Sopenharmony_ci	 * socket the read side is more interesting.
52262306a36Sopenharmony_ci	 *
52362306a36Sopenharmony_ci	 * Some poll() documentation says that EPOLLHUP is incompatible
52462306a36Sopenharmony_ci	 * with the EPOLLOUT/POLLWR flags, so somebody should check this
52562306a36Sopenharmony_ci	 * all. But careful, it tends to be safer to return too many
52662306a36Sopenharmony_ci	 * bits than too few, and you can easily break real applications
52762306a36Sopenharmony_ci	 * if you don't tell them that something has hung up!
52862306a36Sopenharmony_ci	 *
52962306a36Sopenharmony_ci	 * Check-me.
53062306a36Sopenharmony_ci	 *
53162306a36Sopenharmony_ci	 * Check number 1. EPOLLHUP is _UNMASKABLE_ event (see UNIX98 and
53262306a36Sopenharmony_ci	 * our fs/select.c). It means that after we received EOF,
53362306a36Sopenharmony_ci	 * poll always returns immediately, making impossible poll() on write()
53462306a36Sopenharmony_ci	 * in state CLOSE_WAIT. One solution is evident --- to set EPOLLHUP
53562306a36Sopenharmony_ci	 * if and only if shutdown has been made in both directions.
53662306a36Sopenharmony_ci	 * Actually, it is interesting to look how Solaris and DUX
53762306a36Sopenharmony_ci	 * solve this dilemma. I would prefer, if EPOLLHUP were maskable,
53862306a36Sopenharmony_ci	 * then we could set it on SND_SHUTDOWN. BTW examples given
53962306a36Sopenharmony_ci	 * in Stevens' books assume exactly this behaviour, it explains
54062306a36Sopenharmony_ci	 * why EPOLLHUP is incompatible with EPOLLOUT.	--ANK
54162306a36Sopenharmony_ci	 *
54262306a36Sopenharmony_ci	 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
54362306a36Sopenharmony_ci	 * blocking on fresh not-connected or disconnected socket. --ANK
54462306a36Sopenharmony_ci	 */
54562306a36Sopenharmony_ci	shutdown = READ_ONCE(sk->sk_shutdown);
54662306a36Sopenharmony_ci	if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
54762306a36Sopenharmony_ci		mask |= EPOLLHUP;
54862306a36Sopenharmony_ci	if (shutdown & RCV_SHUTDOWN)
54962306a36Sopenharmony_ci		mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_ci	/* Connected or passive Fast Open socket? */
55262306a36Sopenharmony_ci	if (state != TCP_SYN_SENT &&
55362306a36Sopenharmony_ci	    (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) {
55462306a36Sopenharmony_ci		int target = sock_rcvlowat(sk, 0, INT_MAX);
55562306a36Sopenharmony_ci		u16 urg_data = READ_ONCE(tp->urg_data);
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci		if (unlikely(urg_data) &&
55862306a36Sopenharmony_ci		    READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) &&
55962306a36Sopenharmony_ci		    !sock_flag(sk, SOCK_URGINLINE))
56062306a36Sopenharmony_ci			target++;
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci		if (tcp_stream_is_readable(sk, target))
56362306a36Sopenharmony_ci			mask |= EPOLLIN | EPOLLRDNORM;
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci		if (!(shutdown & SEND_SHUTDOWN)) {
56662306a36Sopenharmony_ci			if (__sk_stream_is_writeable(sk, 1)) {
56762306a36Sopenharmony_ci				mask |= EPOLLOUT | EPOLLWRNORM;
56862306a36Sopenharmony_ci			} else {  /* send SIGIO later */
56962306a36Sopenharmony_ci				sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
57062306a36Sopenharmony_ci				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_ci				/* Race breaker. If space is freed after
57362306a36Sopenharmony_ci				 * wspace test but before the flags are set,
57462306a36Sopenharmony_ci				 * IO signal will be lost. Memory barrier
57562306a36Sopenharmony_ci				 * pairs with the input side.
57662306a36Sopenharmony_ci				 */
57762306a36Sopenharmony_ci				smp_mb__after_atomic();
57862306a36Sopenharmony_ci				if (__sk_stream_is_writeable(sk, 1))
57962306a36Sopenharmony_ci					mask |= EPOLLOUT | EPOLLWRNORM;
58062306a36Sopenharmony_ci			}
58162306a36Sopenharmony_ci		} else
58262306a36Sopenharmony_ci			mask |= EPOLLOUT | EPOLLWRNORM;
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci		if (urg_data & TCP_URG_VALID)
58562306a36Sopenharmony_ci			mask |= EPOLLPRI;
58662306a36Sopenharmony_ci	} else if (state == TCP_SYN_SENT &&
58762306a36Sopenharmony_ci		   inet_test_bit(DEFER_CONNECT, sk)) {
58862306a36Sopenharmony_ci		/* Active TCP fastopen socket with defer_connect
58962306a36Sopenharmony_ci		 * Return EPOLLOUT so application can call write()
59062306a36Sopenharmony_ci		 * in order for kernel to generate SYN+data
59162306a36Sopenharmony_ci		 */
59262306a36Sopenharmony_ci		mask |= EPOLLOUT | EPOLLWRNORM;
59362306a36Sopenharmony_ci	}
59462306a36Sopenharmony_ci	/* This barrier is coupled with smp_wmb() in tcp_reset() */
59562306a36Sopenharmony_ci	smp_rmb();
59662306a36Sopenharmony_ci	if (READ_ONCE(sk->sk_err) ||
59762306a36Sopenharmony_ci	    !skb_queue_empty_lockless(&sk->sk_error_queue))
59862306a36Sopenharmony_ci		mask |= EPOLLERR;
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	return mask;
60162306a36Sopenharmony_ci}
60262306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_poll);
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ciint tcp_ioctl(struct sock *sk, int cmd, int *karg)
60562306a36Sopenharmony_ci{
60662306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
60762306a36Sopenharmony_ci	int answ;
60862306a36Sopenharmony_ci	bool slow;
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	switch (cmd) {
61162306a36Sopenharmony_ci	case SIOCINQ:
61262306a36Sopenharmony_ci		if (sk->sk_state == TCP_LISTEN)
61362306a36Sopenharmony_ci			return -EINVAL;
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci		slow = lock_sock_fast(sk);
61662306a36Sopenharmony_ci		answ = tcp_inq(sk);
61762306a36Sopenharmony_ci		unlock_sock_fast(sk, slow);
61862306a36Sopenharmony_ci		break;
61962306a36Sopenharmony_ci	case SIOCATMARK:
62062306a36Sopenharmony_ci		answ = READ_ONCE(tp->urg_data) &&
62162306a36Sopenharmony_ci		       READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq);
62262306a36Sopenharmony_ci		break;
62362306a36Sopenharmony_ci	case SIOCOUTQ:
62462306a36Sopenharmony_ci		if (sk->sk_state == TCP_LISTEN)
62562306a36Sopenharmony_ci			return -EINVAL;
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
62862306a36Sopenharmony_ci			answ = 0;
62962306a36Sopenharmony_ci		else
63062306a36Sopenharmony_ci			answ = READ_ONCE(tp->write_seq) - tp->snd_una;
63162306a36Sopenharmony_ci		break;
63262306a36Sopenharmony_ci	case SIOCOUTQNSD:
63362306a36Sopenharmony_ci		if (sk->sk_state == TCP_LISTEN)
63462306a36Sopenharmony_ci			return -EINVAL;
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
63762306a36Sopenharmony_ci			answ = 0;
63862306a36Sopenharmony_ci		else
63962306a36Sopenharmony_ci			answ = READ_ONCE(tp->write_seq) -
64062306a36Sopenharmony_ci			       READ_ONCE(tp->snd_nxt);
64162306a36Sopenharmony_ci		break;
64262306a36Sopenharmony_ci	default:
64362306a36Sopenharmony_ci		return -ENOIOCTLCMD;
64462306a36Sopenharmony_ci	}
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci	*karg = answ;
64762306a36Sopenharmony_ci	return 0;
64862306a36Sopenharmony_ci}
64962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_ioctl);
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_civoid tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
65262306a36Sopenharmony_ci{
65362306a36Sopenharmony_ci	TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
65462306a36Sopenharmony_ci	tp->pushed_seq = tp->write_seq;
65562306a36Sopenharmony_ci}
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_cistatic inline bool forced_push(const struct tcp_sock *tp)
65862306a36Sopenharmony_ci{
65962306a36Sopenharmony_ci	return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
66062306a36Sopenharmony_ci}
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_civoid tcp_skb_entail(struct sock *sk, struct sk_buff *skb)
66362306a36Sopenharmony_ci{
66462306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
66562306a36Sopenharmony_ci	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci	tcb->seq     = tcb->end_seq = tp->write_seq;
66862306a36Sopenharmony_ci	tcb->tcp_flags = TCPHDR_ACK;
66962306a36Sopenharmony_ci	__skb_header_release(skb);
67062306a36Sopenharmony_ci	tcp_add_write_queue_tail(sk, skb);
67162306a36Sopenharmony_ci	sk_wmem_queued_add(sk, skb->truesize);
67262306a36Sopenharmony_ci	sk_mem_charge(sk, skb->truesize);
67362306a36Sopenharmony_ci	if (tp->nonagle & TCP_NAGLE_PUSH)
67462306a36Sopenharmony_ci		tp->nonagle &= ~TCP_NAGLE_PUSH;
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci	tcp_slow_start_after_idle_check(sk);
67762306a36Sopenharmony_ci}
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_cistatic inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
68062306a36Sopenharmony_ci{
68162306a36Sopenharmony_ci	if (flags & MSG_OOB)
68262306a36Sopenharmony_ci		tp->snd_up = tp->write_seq;
68362306a36Sopenharmony_ci}
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci/* If a not yet filled skb is pushed, do not send it if
68662306a36Sopenharmony_ci * we have data packets in Qdisc or NIC queues :
68762306a36Sopenharmony_ci * Because TX completion will happen shortly, it gives a chance
68862306a36Sopenharmony_ci * to coalesce future sendmsg() payload into this skb, without
68962306a36Sopenharmony_ci * need for a timer, and with no latency trade off.
69062306a36Sopenharmony_ci * As packets containing data payload have a bigger truesize
69162306a36Sopenharmony_ci * than pure acks (dataless) packets, the last checks prevent
69262306a36Sopenharmony_ci * autocorking if we only have an ACK in Qdisc/NIC queues,
69362306a36Sopenharmony_ci * or if TX completion was delayed after we processed ACK packet.
69462306a36Sopenharmony_ci */
69562306a36Sopenharmony_cistatic bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
69662306a36Sopenharmony_ci				int size_goal)
69762306a36Sopenharmony_ci{
69862306a36Sopenharmony_ci	return skb->len < size_goal &&
69962306a36Sopenharmony_ci	       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
70062306a36Sopenharmony_ci	       !tcp_rtx_queue_empty(sk) &&
70162306a36Sopenharmony_ci	       refcount_read(&sk->sk_wmem_alloc) > skb->truesize &&
70262306a36Sopenharmony_ci	       tcp_skb_can_collapse_to(skb);
70362306a36Sopenharmony_ci}
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_civoid tcp_push(struct sock *sk, int flags, int mss_now,
70662306a36Sopenharmony_ci	      int nonagle, int size_goal)
70762306a36Sopenharmony_ci{
70862306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
70962306a36Sopenharmony_ci	struct sk_buff *skb;
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci	skb = tcp_write_queue_tail(sk);
71262306a36Sopenharmony_ci	if (!skb)
71362306a36Sopenharmony_ci		return;
71462306a36Sopenharmony_ci	if (!(flags & MSG_MORE) || forced_push(tp))
71562306a36Sopenharmony_ci		tcp_mark_push(tp, skb);
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	tcp_mark_urg(tp, flags);
71862306a36Sopenharmony_ci
71962306a36Sopenharmony_ci	if (tcp_should_autocork(sk, skb, size_goal)) {
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci		/* avoid atomic op if TSQ_THROTTLED bit is already set */
72262306a36Sopenharmony_ci		if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
72362306a36Sopenharmony_ci			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
72462306a36Sopenharmony_ci			set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
72562306a36Sopenharmony_ci			smp_mb__after_atomic();
72662306a36Sopenharmony_ci		}
72762306a36Sopenharmony_ci		/* It is possible TX completion already happened
72862306a36Sopenharmony_ci		 * before we set TSQ_THROTTLED.
72962306a36Sopenharmony_ci		 */
73062306a36Sopenharmony_ci		if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize)
73162306a36Sopenharmony_ci			return;
73262306a36Sopenharmony_ci	}
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci	if (flags & MSG_MORE)
73562306a36Sopenharmony_ci		nonagle = TCP_NAGLE_CORK;
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_ci	__tcp_push_pending_frames(sk, mss_now, nonagle);
73862306a36Sopenharmony_ci}
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_cistatic int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
74162306a36Sopenharmony_ci				unsigned int offset, size_t len)
74262306a36Sopenharmony_ci{
74362306a36Sopenharmony_ci	struct tcp_splice_state *tss = rd_desc->arg.data;
74462306a36Sopenharmony_ci	int ret;
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci	ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
74762306a36Sopenharmony_ci			      min(rd_desc->count, len), tss->flags);
74862306a36Sopenharmony_ci	if (ret > 0)
74962306a36Sopenharmony_ci		rd_desc->count -= ret;
75062306a36Sopenharmony_ci	return ret;
75162306a36Sopenharmony_ci}
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_cistatic int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
75462306a36Sopenharmony_ci{
75562306a36Sopenharmony_ci	/* Store TCP splice context information in read_descriptor_t. */
75662306a36Sopenharmony_ci	read_descriptor_t rd_desc = {
75762306a36Sopenharmony_ci		.arg.data = tss,
75862306a36Sopenharmony_ci		.count	  = tss->len,
75962306a36Sopenharmony_ci	};
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci	return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
76262306a36Sopenharmony_ci}
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci/**
76562306a36Sopenharmony_ci *  tcp_splice_read - splice data from TCP socket to a pipe
76662306a36Sopenharmony_ci * @sock:	socket to splice from
76762306a36Sopenharmony_ci * @ppos:	position (not valid)
76862306a36Sopenharmony_ci * @pipe:	pipe to splice to
76962306a36Sopenharmony_ci * @len:	number of bytes to splice
77062306a36Sopenharmony_ci * @flags:	splice modifier flags
77162306a36Sopenharmony_ci *
77262306a36Sopenharmony_ci * Description:
77362306a36Sopenharmony_ci *    Will read pages from given socket and fill them into a pipe.
77462306a36Sopenharmony_ci *
77562306a36Sopenharmony_ci **/
77662306a36Sopenharmony_cissize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
77762306a36Sopenharmony_ci			struct pipe_inode_info *pipe, size_t len,
77862306a36Sopenharmony_ci			unsigned int flags)
77962306a36Sopenharmony_ci{
78062306a36Sopenharmony_ci	struct sock *sk = sock->sk;
78162306a36Sopenharmony_ci	struct tcp_splice_state tss = {
78262306a36Sopenharmony_ci		.pipe = pipe,
78362306a36Sopenharmony_ci		.len = len,
78462306a36Sopenharmony_ci		.flags = flags,
78562306a36Sopenharmony_ci	};
78662306a36Sopenharmony_ci	long timeo;
78762306a36Sopenharmony_ci	ssize_t spliced;
78862306a36Sopenharmony_ci	int ret;
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ci	sock_rps_record_flow(sk);
79162306a36Sopenharmony_ci	/*
79262306a36Sopenharmony_ci	 * We can't seek on a socket input
79362306a36Sopenharmony_ci	 */
79462306a36Sopenharmony_ci	if (unlikely(*ppos))
79562306a36Sopenharmony_ci		return -ESPIPE;
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	ret = spliced = 0;
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci	lock_sock(sk);
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci	timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
80262306a36Sopenharmony_ci	while (tss.len) {
80362306a36Sopenharmony_ci		ret = __tcp_splice_read(sk, &tss);
80462306a36Sopenharmony_ci		if (ret < 0)
80562306a36Sopenharmony_ci			break;
80662306a36Sopenharmony_ci		else if (!ret) {
80762306a36Sopenharmony_ci			if (spliced)
80862306a36Sopenharmony_ci				break;
80962306a36Sopenharmony_ci			if (sock_flag(sk, SOCK_DONE))
81062306a36Sopenharmony_ci				break;
81162306a36Sopenharmony_ci			if (sk->sk_err) {
81262306a36Sopenharmony_ci				ret = sock_error(sk);
81362306a36Sopenharmony_ci				break;
81462306a36Sopenharmony_ci			}
81562306a36Sopenharmony_ci			if (sk->sk_shutdown & RCV_SHUTDOWN)
81662306a36Sopenharmony_ci				break;
81762306a36Sopenharmony_ci			if (sk->sk_state == TCP_CLOSE) {
81862306a36Sopenharmony_ci				/*
81962306a36Sopenharmony_ci				 * This occurs when user tries to read
82062306a36Sopenharmony_ci				 * from never connected socket.
82162306a36Sopenharmony_ci				 */
82262306a36Sopenharmony_ci				ret = -ENOTCONN;
82362306a36Sopenharmony_ci				break;
82462306a36Sopenharmony_ci			}
82562306a36Sopenharmony_ci			if (!timeo) {
82662306a36Sopenharmony_ci				ret = -EAGAIN;
82762306a36Sopenharmony_ci				break;
82862306a36Sopenharmony_ci			}
82962306a36Sopenharmony_ci			/* if __tcp_splice_read() got nothing while we have
83062306a36Sopenharmony_ci			 * an skb in receive queue, we do not want to loop.
83162306a36Sopenharmony_ci			 * This might happen with URG data.
83262306a36Sopenharmony_ci			 */
83362306a36Sopenharmony_ci			if (!skb_queue_empty(&sk->sk_receive_queue))
83462306a36Sopenharmony_ci				break;
83562306a36Sopenharmony_ci			ret = sk_wait_data(sk, &timeo, NULL);
83662306a36Sopenharmony_ci			if (ret < 0)
83762306a36Sopenharmony_ci				break;
83862306a36Sopenharmony_ci			if (signal_pending(current)) {
83962306a36Sopenharmony_ci				ret = sock_intr_errno(timeo);
84062306a36Sopenharmony_ci				break;
84162306a36Sopenharmony_ci			}
84262306a36Sopenharmony_ci			continue;
84362306a36Sopenharmony_ci		}
84462306a36Sopenharmony_ci		tss.len -= ret;
84562306a36Sopenharmony_ci		spliced += ret;
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_ci		if (!tss.len || !timeo)
84862306a36Sopenharmony_ci			break;
84962306a36Sopenharmony_ci		release_sock(sk);
85062306a36Sopenharmony_ci		lock_sock(sk);
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci		if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
85362306a36Sopenharmony_ci		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
85462306a36Sopenharmony_ci		    signal_pending(current))
85562306a36Sopenharmony_ci			break;
85662306a36Sopenharmony_ci	}
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	release_sock(sk);
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci	if (spliced)
86162306a36Sopenharmony_ci		return spliced;
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	return ret;
86462306a36Sopenharmony_ci}
86562306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_splice_read);
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_cistruct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
86862306a36Sopenharmony_ci				     bool force_schedule)
86962306a36Sopenharmony_ci{
87062306a36Sopenharmony_ci	struct sk_buff *skb;
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	skb = alloc_skb_fclone(MAX_TCP_HEADER, gfp);
87362306a36Sopenharmony_ci	if (likely(skb)) {
87462306a36Sopenharmony_ci		bool mem_scheduled;
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_ci		skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
87762306a36Sopenharmony_ci		if (force_schedule) {
87862306a36Sopenharmony_ci			mem_scheduled = true;
87962306a36Sopenharmony_ci			sk_forced_mem_schedule(sk, skb->truesize);
88062306a36Sopenharmony_ci		} else {
88162306a36Sopenharmony_ci			mem_scheduled = sk_wmem_schedule(sk, skb->truesize);
88262306a36Sopenharmony_ci		}
88362306a36Sopenharmony_ci		if (likely(mem_scheduled)) {
88462306a36Sopenharmony_ci			skb_reserve(skb, MAX_TCP_HEADER);
88562306a36Sopenharmony_ci			skb->ip_summed = CHECKSUM_PARTIAL;
88662306a36Sopenharmony_ci			INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
88762306a36Sopenharmony_ci			return skb;
88862306a36Sopenharmony_ci		}
88962306a36Sopenharmony_ci		__kfree_skb(skb);
89062306a36Sopenharmony_ci	} else {
89162306a36Sopenharmony_ci		sk->sk_prot->enter_memory_pressure(sk);
89262306a36Sopenharmony_ci		sk_stream_moderate_sndbuf(sk);
89362306a36Sopenharmony_ci	}
89462306a36Sopenharmony_ci	return NULL;
89562306a36Sopenharmony_ci}
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_cistatic unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
89862306a36Sopenharmony_ci				       int large_allowed)
89962306a36Sopenharmony_ci{
90062306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
90162306a36Sopenharmony_ci	u32 new_size_goal, size_goal;
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci	if (!large_allowed)
90462306a36Sopenharmony_ci		return mss_now;
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci	/* Note : tcp_tso_autosize() will eventually split this later */
90762306a36Sopenharmony_ci	new_size_goal = tcp_bound_to_half_wnd(tp, sk->sk_gso_max_size);
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	/* We try hard to avoid divides here */
91062306a36Sopenharmony_ci	size_goal = tp->gso_segs * mss_now;
91162306a36Sopenharmony_ci	if (unlikely(new_size_goal < size_goal ||
91262306a36Sopenharmony_ci		     new_size_goal >= size_goal + mss_now)) {
91362306a36Sopenharmony_ci		tp->gso_segs = min_t(u16, new_size_goal / mss_now,
91462306a36Sopenharmony_ci				     sk->sk_gso_max_segs);
91562306a36Sopenharmony_ci		size_goal = tp->gso_segs * mss_now;
91662306a36Sopenharmony_ci	}
91762306a36Sopenharmony_ci
91862306a36Sopenharmony_ci	return max(size_goal, mss_now);
91962306a36Sopenharmony_ci}
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ciint tcp_send_mss(struct sock *sk, int *size_goal, int flags)
92262306a36Sopenharmony_ci{
92362306a36Sopenharmony_ci	int mss_now;
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci	mss_now = tcp_current_mss(sk);
92662306a36Sopenharmony_ci	*size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	return mss_now;
92962306a36Sopenharmony_ci}
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ci/* In some cases, sendmsg() could have added an skb to the write queue,
93262306a36Sopenharmony_ci * but failed adding payload on it. We need to remove it to consume less
93362306a36Sopenharmony_ci * memory, but more importantly be able to generate EPOLLOUT for Edge Trigger
93462306a36Sopenharmony_ci * epoll() users. Another reason is that tcp_write_xmit() does not like
93562306a36Sopenharmony_ci * finding an empty skb in the write queue.
93662306a36Sopenharmony_ci */
93762306a36Sopenharmony_civoid tcp_remove_empty_skb(struct sock *sk)
93862306a36Sopenharmony_ci{
93962306a36Sopenharmony_ci	struct sk_buff *skb = tcp_write_queue_tail(sk);
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_ci	if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
94262306a36Sopenharmony_ci		tcp_unlink_write_queue(skb, sk);
94362306a36Sopenharmony_ci		if (tcp_write_queue_empty(sk))
94462306a36Sopenharmony_ci			tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
94562306a36Sopenharmony_ci		tcp_wmem_free_skb(sk, skb);
94662306a36Sopenharmony_ci	}
94762306a36Sopenharmony_ci}
94862306a36Sopenharmony_ci
94962306a36Sopenharmony_ci/* skb changing from pure zc to mixed, must charge zc */
95062306a36Sopenharmony_cistatic int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb)
95162306a36Sopenharmony_ci{
95262306a36Sopenharmony_ci	if (unlikely(skb_zcopy_pure(skb))) {
95362306a36Sopenharmony_ci		u32 extra = skb->truesize -
95462306a36Sopenharmony_ci			    SKB_TRUESIZE(skb_end_offset(skb));
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci		if (!sk_wmem_schedule(sk, extra))
95762306a36Sopenharmony_ci			return -ENOMEM;
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_ci		sk_mem_charge(sk, extra);
96062306a36Sopenharmony_ci		skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
96162306a36Sopenharmony_ci	}
96262306a36Sopenharmony_ci	return 0;
96362306a36Sopenharmony_ci}
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci
96662306a36Sopenharmony_ciint tcp_wmem_schedule(struct sock *sk, int copy)
96762306a36Sopenharmony_ci{
96862306a36Sopenharmony_ci	int left;
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci	if (likely(sk_wmem_schedule(sk, copy)))
97162306a36Sopenharmony_ci		return copy;
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_ci	/* We could be in trouble if we have nothing queued.
97462306a36Sopenharmony_ci	 * Use whatever is left in sk->sk_forward_alloc and tcp_wmem[0]
97562306a36Sopenharmony_ci	 * to guarantee some progress.
97662306a36Sopenharmony_ci	 */
97762306a36Sopenharmony_ci	left = sock_net(sk)->ipv4.sysctl_tcp_wmem[0] - sk->sk_wmem_queued;
97862306a36Sopenharmony_ci	if (left > 0)
97962306a36Sopenharmony_ci		sk_forced_mem_schedule(sk, min(left, copy));
98062306a36Sopenharmony_ci	return min(copy, sk->sk_forward_alloc);
98162306a36Sopenharmony_ci}
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_civoid tcp_free_fastopen_req(struct tcp_sock *tp)
98462306a36Sopenharmony_ci{
98562306a36Sopenharmony_ci	if (tp->fastopen_req) {
98662306a36Sopenharmony_ci		kfree(tp->fastopen_req);
98762306a36Sopenharmony_ci		tp->fastopen_req = NULL;
98862306a36Sopenharmony_ci	}
98962306a36Sopenharmony_ci}
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ciint tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
99262306a36Sopenharmony_ci			 size_t size, struct ubuf_info *uarg)
99362306a36Sopenharmony_ci{
99462306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
99562306a36Sopenharmony_ci	struct inet_sock *inet = inet_sk(sk);
99662306a36Sopenharmony_ci	struct sockaddr *uaddr = msg->msg_name;
99762306a36Sopenharmony_ci	int err, flags;
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
100062306a36Sopenharmony_ci	      TFO_CLIENT_ENABLE) ||
100162306a36Sopenharmony_ci	    (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
100262306a36Sopenharmony_ci	     uaddr->sa_family == AF_UNSPEC))
100362306a36Sopenharmony_ci		return -EOPNOTSUPP;
100462306a36Sopenharmony_ci	if (tp->fastopen_req)
100562306a36Sopenharmony_ci		return -EALREADY; /* Another Fast Open is in progress */
100662306a36Sopenharmony_ci
100762306a36Sopenharmony_ci	tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request),
100862306a36Sopenharmony_ci				   sk->sk_allocation);
100962306a36Sopenharmony_ci	if (unlikely(!tp->fastopen_req))
101062306a36Sopenharmony_ci		return -ENOBUFS;
101162306a36Sopenharmony_ci	tp->fastopen_req->data = msg;
101262306a36Sopenharmony_ci	tp->fastopen_req->size = size;
101362306a36Sopenharmony_ci	tp->fastopen_req->uarg = uarg;
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ci	if (inet_test_bit(DEFER_CONNECT, sk)) {
101662306a36Sopenharmony_ci		err = tcp_connect(sk);
101762306a36Sopenharmony_ci		/* Same failure procedure as in tcp_v4/6_connect */
101862306a36Sopenharmony_ci		if (err) {
101962306a36Sopenharmony_ci			tcp_set_state(sk, TCP_CLOSE);
102062306a36Sopenharmony_ci			inet->inet_dport = 0;
102162306a36Sopenharmony_ci			sk->sk_route_caps = 0;
102262306a36Sopenharmony_ci		}
102362306a36Sopenharmony_ci	}
102462306a36Sopenharmony_ci	flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
102562306a36Sopenharmony_ci	err = __inet_stream_connect(sk->sk_socket, uaddr,
102662306a36Sopenharmony_ci				    msg->msg_namelen, flags, 1);
102762306a36Sopenharmony_ci	/* fastopen_req could already be freed in __inet_stream_connect
102862306a36Sopenharmony_ci	 * if the connection times out or gets rst
102962306a36Sopenharmony_ci	 */
103062306a36Sopenharmony_ci	if (tp->fastopen_req) {
103162306a36Sopenharmony_ci		*copied = tp->fastopen_req->copied;
103262306a36Sopenharmony_ci		tcp_free_fastopen_req(tp);
103362306a36Sopenharmony_ci		inet_clear_bit(DEFER_CONNECT, sk);
103462306a36Sopenharmony_ci	}
103562306a36Sopenharmony_ci	return err;
103662306a36Sopenharmony_ci}
103762306a36Sopenharmony_ci
103862306a36Sopenharmony_ciint tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
103962306a36Sopenharmony_ci{
104062306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
104162306a36Sopenharmony_ci	struct ubuf_info *uarg = NULL;
104262306a36Sopenharmony_ci	struct sk_buff *skb;
104362306a36Sopenharmony_ci	struct sockcm_cookie sockc;
104462306a36Sopenharmony_ci	int flags, err, copied = 0;
104562306a36Sopenharmony_ci	int mss_now = 0, size_goal, copied_syn = 0;
104662306a36Sopenharmony_ci	int process_backlog = 0;
104762306a36Sopenharmony_ci	int zc = 0;
104862306a36Sopenharmony_ci	long timeo;
104962306a36Sopenharmony_ci
105062306a36Sopenharmony_ci	flags = msg->msg_flags;
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci	if ((flags & MSG_ZEROCOPY) && size) {
105362306a36Sopenharmony_ci		if (msg->msg_ubuf) {
105462306a36Sopenharmony_ci			uarg = msg->msg_ubuf;
105562306a36Sopenharmony_ci			if (sk->sk_route_caps & NETIF_F_SG)
105662306a36Sopenharmony_ci				zc = MSG_ZEROCOPY;
105762306a36Sopenharmony_ci		} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
105862306a36Sopenharmony_ci			skb = tcp_write_queue_tail(sk);
105962306a36Sopenharmony_ci			uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
106062306a36Sopenharmony_ci			if (!uarg) {
106162306a36Sopenharmony_ci				err = -ENOBUFS;
106262306a36Sopenharmony_ci				goto out_err;
106362306a36Sopenharmony_ci			}
106462306a36Sopenharmony_ci			if (sk->sk_route_caps & NETIF_F_SG)
106562306a36Sopenharmony_ci				zc = MSG_ZEROCOPY;
106662306a36Sopenharmony_ci			else
106762306a36Sopenharmony_ci				uarg_to_msgzc(uarg)->zerocopy = 0;
106862306a36Sopenharmony_ci		}
106962306a36Sopenharmony_ci	} else if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES) && size) {
107062306a36Sopenharmony_ci		if (sk->sk_route_caps & NETIF_F_SG)
107162306a36Sopenharmony_ci			zc = MSG_SPLICE_PAGES;
107262306a36Sopenharmony_ci	}
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci	if (unlikely(flags & MSG_FASTOPEN ||
107562306a36Sopenharmony_ci		     inet_test_bit(DEFER_CONNECT, sk)) &&
107662306a36Sopenharmony_ci	    !tp->repair) {
107762306a36Sopenharmony_ci		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size, uarg);
107862306a36Sopenharmony_ci		if (err == -EINPROGRESS && copied_syn > 0)
107962306a36Sopenharmony_ci			goto out;
108062306a36Sopenharmony_ci		else if (err)
108162306a36Sopenharmony_ci			goto out_err;
108262306a36Sopenharmony_ci	}
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
108762306a36Sopenharmony_ci
108862306a36Sopenharmony_ci	/* Wait for a connection to finish. One exception is TCP Fast Open
108962306a36Sopenharmony_ci	 * (passive side) where data is allowed to be sent before a connection
109062306a36Sopenharmony_ci	 * is fully established.
109162306a36Sopenharmony_ci	 */
109262306a36Sopenharmony_ci	if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
109362306a36Sopenharmony_ci	    !tcp_passive_fastopen(sk)) {
109462306a36Sopenharmony_ci		err = sk_stream_wait_connect(sk, &timeo);
109562306a36Sopenharmony_ci		if (err != 0)
109662306a36Sopenharmony_ci			goto do_error;
109762306a36Sopenharmony_ci	}
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_ci	if (unlikely(tp->repair)) {
110062306a36Sopenharmony_ci		if (tp->repair_queue == TCP_RECV_QUEUE) {
110162306a36Sopenharmony_ci			copied = tcp_send_rcvq(sk, msg, size);
110262306a36Sopenharmony_ci			goto out_nopush;
110362306a36Sopenharmony_ci		}
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci		err = -EINVAL;
110662306a36Sopenharmony_ci		if (tp->repair_queue == TCP_NO_QUEUE)
110762306a36Sopenharmony_ci			goto out_err;
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci		/* 'common' sending to sendq */
111062306a36Sopenharmony_ci	}
111162306a36Sopenharmony_ci
111262306a36Sopenharmony_ci	sockcm_init(&sockc, sk);
111362306a36Sopenharmony_ci	if (msg->msg_controllen) {
111462306a36Sopenharmony_ci		err = sock_cmsg_send(sk, msg, &sockc);
111562306a36Sopenharmony_ci		if (unlikely(err)) {
111662306a36Sopenharmony_ci			err = -EINVAL;
111762306a36Sopenharmony_ci			goto out_err;
111862306a36Sopenharmony_ci		}
111962306a36Sopenharmony_ci	}
112062306a36Sopenharmony_ci
112162306a36Sopenharmony_ci	/* This should be in poll */
112262306a36Sopenharmony_ci	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
112362306a36Sopenharmony_ci
112462306a36Sopenharmony_ci	/* Ok commence sending. */
112562306a36Sopenharmony_ci	copied = 0;
112662306a36Sopenharmony_ci
112762306a36Sopenharmony_cirestart:
112862306a36Sopenharmony_ci	mss_now = tcp_send_mss(sk, &size_goal, flags);
112962306a36Sopenharmony_ci
113062306a36Sopenharmony_ci	err = -EPIPE;
113162306a36Sopenharmony_ci	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
113262306a36Sopenharmony_ci		goto do_error;
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci	while (msg_data_left(msg)) {
113562306a36Sopenharmony_ci		ssize_t copy = 0;
113662306a36Sopenharmony_ci
113762306a36Sopenharmony_ci		skb = tcp_write_queue_tail(sk);
113862306a36Sopenharmony_ci		if (skb)
113962306a36Sopenharmony_ci			copy = size_goal - skb->len;
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_ci		if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
114262306a36Sopenharmony_ci			bool first_skb;
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_cinew_segment:
114562306a36Sopenharmony_ci			if (!sk_stream_memory_free(sk))
114662306a36Sopenharmony_ci				goto wait_for_space;
114762306a36Sopenharmony_ci
114862306a36Sopenharmony_ci			if (unlikely(process_backlog >= 16)) {
114962306a36Sopenharmony_ci				process_backlog = 0;
115062306a36Sopenharmony_ci				if (sk_flush_backlog(sk))
115162306a36Sopenharmony_ci					goto restart;
115262306a36Sopenharmony_ci			}
115362306a36Sopenharmony_ci			first_skb = tcp_rtx_and_write_queues_empty(sk);
115462306a36Sopenharmony_ci			skb = tcp_stream_alloc_skb(sk, sk->sk_allocation,
115562306a36Sopenharmony_ci						   first_skb);
115662306a36Sopenharmony_ci			if (!skb)
115762306a36Sopenharmony_ci				goto wait_for_space;
115862306a36Sopenharmony_ci
115962306a36Sopenharmony_ci			process_backlog++;
116062306a36Sopenharmony_ci
116162306a36Sopenharmony_ci			tcp_skb_entail(sk, skb);
116262306a36Sopenharmony_ci			copy = size_goal;
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci			/* All packets are restored as if they have
116562306a36Sopenharmony_ci			 * already been sent. skb_mstamp_ns isn't set to
116662306a36Sopenharmony_ci			 * avoid wrong rtt estimation.
116762306a36Sopenharmony_ci			 */
116862306a36Sopenharmony_ci			if (tp->repair)
116962306a36Sopenharmony_ci				TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
117062306a36Sopenharmony_ci		}
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci		/* Try to append data to the end of skb. */
117362306a36Sopenharmony_ci		if (copy > msg_data_left(msg))
117462306a36Sopenharmony_ci			copy = msg_data_left(msg);
117562306a36Sopenharmony_ci
117662306a36Sopenharmony_ci		if (zc == 0) {
117762306a36Sopenharmony_ci			bool merge = true;
117862306a36Sopenharmony_ci			int i = skb_shinfo(skb)->nr_frags;
117962306a36Sopenharmony_ci			struct page_frag *pfrag = sk_page_frag(sk);
118062306a36Sopenharmony_ci
118162306a36Sopenharmony_ci			if (!sk_page_frag_refill(sk, pfrag))
118262306a36Sopenharmony_ci				goto wait_for_space;
118362306a36Sopenharmony_ci
118462306a36Sopenharmony_ci			if (!skb_can_coalesce(skb, i, pfrag->page,
118562306a36Sopenharmony_ci					      pfrag->offset)) {
118662306a36Sopenharmony_ci				if (i >= READ_ONCE(sysctl_max_skb_frags)) {
118762306a36Sopenharmony_ci					tcp_mark_push(tp, skb);
118862306a36Sopenharmony_ci					goto new_segment;
118962306a36Sopenharmony_ci				}
119062306a36Sopenharmony_ci				merge = false;
119162306a36Sopenharmony_ci			}
119262306a36Sopenharmony_ci
119362306a36Sopenharmony_ci			copy = min_t(int, copy, pfrag->size - pfrag->offset);
119462306a36Sopenharmony_ci
119562306a36Sopenharmony_ci			if (unlikely(skb_zcopy_pure(skb) || skb_zcopy_managed(skb))) {
119662306a36Sopenharmony_ci				if (tcp_downgrade_zcopy_pure(sk, skb))
119762306a36Sopenharmony_ci					goto wait_for_space;
119862306a36Sopenharmony_ci				skb_zcopy_downgrade_managed(skb);
119962306a36Sopenharmony_ci			}
120062306a36Sopenharmony_ci
120162306a36Sopenharmony_ci			copy = tcp_wmem_schedule(sk, copy);
120262306a36Sopenharmony_ci			if (!copy)
120362306a36Sopenharmony_ci				goto wait_for_space;
120462306a36Sopenharmony_ci
120562306a36Sopenharmony_ci			err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
120662306a36Sopenharmony_ci						       pfrag->page,
120762306a36Sopenharmony_ci						       pfrag->offset,
120862306a36Sopenharmony_ci						       copy);
120962306a36Sopenharmony_ci			if (err)
121062306a36Sopenharmony_ci				goto do_error;
121162306a36Sopenharmony_ci
121262306a36Sopenharmony_ci			/* Update the skb. */
121362306a36Sopenharmony_ci			if (merge) {
121462306a36Sopenharmony_ci				skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
121562306a36Sopenharmony_ci			} else {
121662306a36Sopenharmony_ci				skb_fill_page_desc(skb, i, pfrag->page,
121762306a36Sopenharmony_ci						   pfrag->offset, copy);
121862306a36Sopenharmony_ci				page_ref_inc(pfrag->page);
121962306a36Sopenharmony_ci			}
122062306a36Sopenharmony_ci			pfrag->offset += copy;
122162306a36Sopenharmony_ci		} else if (zc == MSG_ZEROCOPY)  {
122262306a36Sopenharmony_ci			/* First append to a fragless skb builds initial
122362306a36Sopenharmony_ci			 * pure zerocopy skb
122462306a36Sopenharmony_ci			 */
122562306a36Sopenharmony_ci			if (!skb->len)
122662306a36Sopenharmony_ci				skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY;
122762306a36Sopenharmony_ci
122862306a36Sopenharmony_ci			if (!skb_zcopy_pure(skb)) {
122962306a36Sopenharmony_ci				copy = tcp_wmem_schedule(sk, copy);
123062306a36Sopenharmony_ci				if (!copy)
123162306a36Sopenharmony_ci					goto wait_for_space;
123262306a36Sopenharmony_ci			}
123362306a36Sopenharmony_ci
123462306a36Sopenharmony_ci			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
123562306a36Sopenharmony_ci			if (err == -EMSGSIZE || err == -EEXIST) {
123662306a36Sopenharmony_ci				tcp_mark_push(tp, skb);
123762306a36Sopenharmony_ci				goto new_segment;
123862306a36Sopenharmony_ci			}
123962306a36Sopenharmony_ci			if (err < 0)
124062306a36Sopenharmony_ci				goto do_error;
124162306a36Sopenharmony_ci			copy = err;
124262306a36Sopenharmony_ci		} else if (zc == MSG_SPLICE_PAGES) {
124362306a36Sopenharmony_ci			/* Splice in data if we can; copy if we can't. */
124462306a36Sopenharmony_ci			if (tcp_downgrade_zcopy_pure(sk, skb))
124562306a36Sopenharmony_ci				goto wait_for_space;
124662306a36Sopenharmony_ci			copy = tcp_wmem_schedule(sk, copy);
124762306a36Sopenharmony_ci			if (!copy)
124862306a36Sopenharmony_ci				goto wait_for_space;
124962306a36Sopenharmony_ci
125062306a36Sopenharmony_ci			err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
125162306a36Sopenharmony_ci						   sk->sk_allocation);
125262306a36Sopenharmony_ci			if (err < 0) {
125362306a36Sopenharmony_ci				if (err == -EMSGSIZE) {
125462306a36Sopenharmony_ci					tcp_mark_push(tp, skb);
125562306a36Sopenharmony_ci					goto new_segment;
125662306a36Sopenharmony_ci				}
125762306a36Sopenharmony_ci				goto do_error;
125862306a36Sopenharmony_ci			}
125962306a36Sopenharmony_ci			copy = err;
126062306a36Sopenharmony_ci
126162306a36Sopenharmony_ci			if (!(flags & MSG_NO_SHARED_FRAGS))
126262306a36Sopenharmony_ci				skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
126362306a36Sopenharmony_ci
126462306a36Sopenharmony_ci			sk_wmem_queued_add(sk, copy);
126562306a36Sopenharmony_ci			sk_mem_charge(sk, copy);
126662306a36Sopenharmony_ci		}
126762306a36Sopenharmony_ci
126862306a36Sopenharmony_ci		if (!copied)
126962306a36Sopenharmony_ci			TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
127062306a36Sopenharmony_ci
127162306a36Sopenharmony_ci		WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
127262306a36Sopenharmony_ci		TCP_SKB_CB(skb)->end_seq += copy;
127362306a36Sopenharmony_ci		tcp_skb_pcount_set(skb, 0);
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_ci		copied += copy;
127662306a36Sopenharmony_ci		if (!msg_data_left(msg)) {
127762306a36Sopenharmony_ci			if (unlikely(flags & MSG_EOR))
127862306a36Sopenharmony_ci				TCP_SKB_CB(skb)->eor = 1;
127962306a36Sopenharmony_ci			goto out;
128062306a36Sopenharmony_ci		}
128162306a36Sopenharmony_ci
128262306a36Sopenharmony_ci		if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
128362306a36Sopenharmony_ci			continue;
128462306a36Sopenharmony_ci
128562306a36Sopenharmony_ci		if (forced_push(tp)) {
128662306a36Sopenharmony_ci			tcp_mark_push(tp, skb);
128762306a36Sopenharmony_ci			__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
128862306a36Sopenharmony_ci		} else if (skb == tcp_send_head(sk))
128962306a36Sopenharmony_ci			tcp_push_one(sk, mss_now);
129062306a36Sopenharmony_ci		continue;
129162306a36Sopenharmony_ci
129262306a36Sopenharmony_ciwait_for_space:
129362306a36Sopenharmony_ci		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
129462306a36Sopenharmony_ci		tcp_remove_empty_skb(sk);
129562306a36Sopenharmony_ci		if (copied)
129662306a36Sopenharmony_ci			tcp_push(sk, flags & ~MSG_MORE, mss_now,
129762306a36Sopenharmony_ci				 TCP_NAGLE_PUSH, size_goal);
129862306a36Sopenharmony_ci
129962306a36Sopenharmony_ci		err = sk_stream_wait_memory(sk, &timeo);
130062306a36Sopenharmony_ci		if (err != 0)
130162306a36Sopenharmony_ci			goto do_error;
130262306a36Sopenharmony_ci
130362306a36Sopenharmony_ci		mss_now = tcp_send_mss(sk, &size_goal, flags);
130462306a36Sopenharmony_ci	}
130562306a36Sopenharmony_ci
130662306a36Sopenharmony_ciout:
130762306a36Sopenharmony_ci	if (copied) {
130862306a36Sopenharmony_ci		tcp_tx_timestamp(sk, sockc.tsflags);
130962306a36Sopenharmony_ci		tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
131062306a36Sopenharmony_ci	}
131162306a36Sopenharmony_ciout_nopush:
131262306a36Sopenharmony_ci	/* msg->msg_ubuf is pinned by the caller so we don't take extra refs */
131362306a36Sopenharmony_ci	if (uarg && !msg->msg_ubuf)
131462306a36Sopenharmony_ci		net_zcopy_put(uarg);
131562306a36Sopenharmony_ci	return copied + copied_syn;
131662306a36Sopenharmony_ci
131762306a36Sopenharmony_cido_error:
131862306a36Sopenharmony_ci	tcp_remove_empty_skb(sk);
131962306a36Sopenharmony_ci
132062306a36Sopenharmony_ci	if (copied + copied_syn)
132162306a36Sopenharmony_ci		goto out;
132262306a36Sopenharmony_ciout_err:
132362306a36Sopenharmony_ci	/* msg->msg_ubuf is pinned by the caller so we don't take extra refs */
132462306a36Sopenharmony_ci	if (uarg && !msg->msg_ubuf)
132562306a36Sopenharmony_ci		net_zcopy_put_abort(uarg, true);
132662306a36Sopenharmony_ci	err = sk_stream_error(sk, flags, err);
132762306a36Sopenharmony_ci	/* make sure we wake any epoll edge trigger waiter */
132862306a36Sopenharmony_ci	if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
132962306a36Sopenharmony_ci		sk->sk_write_space(sk);
133062306a36Sopenharmony_ci		tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
133162306a36Sopenharmony_ci	}
133262306a36Sopenharmony_ci	return err;
133362306a36Sopenharmony_ci}
133462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_sendmsg_locked);
133562306a36Sopenharmony_ci
133662306a36Sopenharmony_ciint tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
133762306a36Sopenharmony_ci{
133862306a36Sopenharmony_ci	int ret;
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci	lock_sock(sk);
134162306a36Sopenharmony_ci	ret = tcp_sendmsg_locked(sk, msg, size);
134262306a36Sopenharmony_ci	release_sock(sk);
134362306a36Sopenharmony_ci
134462306a36Sopenharmony_ci	return ret;
134562306a36Sopenharmony_ci}
134662306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_sendmsg);
134762306a36Sopenharmony_ci
134862306a36Sopenharmony_civoid tcp_splice_eof(struct socket *sock)
134962306a36Sopenharmony_ci{
135062306a36Sopenharmony_ci	struct sock *sk = sock->sk;
135162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
135262306a36Sopenharmony_ci	int mss_now, size_goal;
135362306a36Sopenharmony_ci
135462306a36Sopenharmony_ci	if (!tcp_write_queue_tail(sk))
135562306a36Sopenharmony_ci		return;
135662306a36Sopenharmony_ci
135762306a36Sopenharmony_ci	lock_sock(sk);
135862306a36Sopenharmony_ci	mss_now = tcp_send_mss(sk, &size_goal, 0);
135962306a36Sopenharmony_ci	tcp_push(sk, 0, mss_now, tp->nonagle, size_goal);
136062306a36Sopenharmony_ci	release_sock(sk);
136162306a36Sopenharmony_ci}
136262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_splice_eof);
136362306a36Sopenharmony_ci
136462306a36Sopenharmony_ci/*
136562306a36Sopenharmony_ci *	Handle reading urgent data. BSD has very simple semantics for
136662306a36Sopenharmony_ci *	this, no blocking and very strange errors 8)
136762306a36Sopenharmony_ci */
136862306a36Sopenharmony_ci
136962306a36Sopenharmony_cistatic int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
137062306a36Sopenharmony_ci{
137162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
137262306a36Sopenharmony_ci
137362306a36Sopenharmony_ci	/* No URG data to read. */
137462306a36Sopenharmony_ci	if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data ||
137562306a36Sopenharmony_ci	    tp->urg_data == TCP_URG_READ)
137662306a36Sopenharmony_ci		return -EINVAL;	/* Yes this is right ! */
137762306a36Sopenharmony_ci
137862306a36Sopenharmony_ci	if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))
137962306a36Sopenharmony_ci		return -ENOTCONN;
138062306a36Sopenharmony_ci
138162306a36Sopenharmony_ci	if (tp->urg_data & TCP_URG_VALID) {
138262306a36Sopenharmony_ci		int err = 0;
138362306a36Sopenharmony_ci		char c = tp->urg_data;
138462306a36Sopenharmony_ci
138562306a36Sopenharmony_ci		if (!(flags & MSG_PEEK))
138662306a36Sopenharmony_ci			WRITE_ONCE(tp->urg_data, TCP_URG_READ);
138762306a36Sopenharmony_ci
138862306a36Sopenharmony_ci		/* Read urgent data. */
138962306a36Sopenharmony_ci		msg->msg_flags |= MSG_OOB;
139062306a36Sopenharmony_ci
139162306a36Sopenharmony_ci		if (len > 0) {
139262306a36Sopenharmony_ci			if (!(flags & MSG_TRUNC))
139362306a36Sopenharmony_ci				err = memcpy_to_msg(msg, &c, 1);
139462306a36Sopenharmony_ci			len = 1;
139562306a36Sopenharmony_ci		} else
139662306a36Sopenharmony_ci			msg->msg_flags |= MSG_TRUNC;
139762306a36Sopenharmony_ci
139862306a36Sopenharmony_ci		return err ? -EFAULT : len;
139962306a36Sopenharmony_ci	}
140062306a36Sopenharmony_ci
140162306a36Sopenharmony_ci	if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
140262306a36Sopenharmony_ci		return 0;
140362306a36Sopenharmony_ci
140462306a36Sopenharmony_ci	/* Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
140562306a36Sopenharmony_ci	 * the available implementations agree in this case:
140662306a36Sopenharmony_ci	 * this call should never block, independent of the
140762306a36Sopenharmony_ci	 * blocking state of the socket.
140862306a36Sopenharmony_ci	 * Mike <pall@rz.uni-karlsruhe.de>
140962306a36Sopenharmony_ci	 */
141062306a36Sopenharmony_ci	return -EAGAIN;
141162306a36Sopenharmony_ci}
141262306a36Sopenharmony_ci
141362306a36Sopenharmony_cistatic int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
141462306a36Sopenharmony_ci{
141562306a36Sopenharmony_ci	struct sk_buff *skb;
141662306a36Sopenharmony_ci	int copied = 0, err = 0;
141762306a36Sopenharmony_ci
141862306a36Sopenharmony_ci	/* XXX -- need to support SO_PEEK_OFF */
141962306a36Sopenharmony_ci
142062306a36Sopenharmony_ci	skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
142162306a36Sopenharmony_ci		err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
142262306a36Sopenharmony_ci		if (err)
142362306a36Sopenharmony_ci			return err;
142462306a36Sopenharmony_ci		copied += skb->len;
142562306a36Sopenharmony_ci	}
142662306a36Sopenharmony_ci
142762306a36Sopenharmony_ci	skb_queue_walk(&sk->sk_write_queue, skb) {
142862306a36Sopenharmony_ci		err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
142962306a36Sopenharmony_ci		if (err)
143062306a36Sopenharmony_ci			break;
143162306a36Sopenharmony_ci
143262306a36Sopenharmony_ci		copied += skb->len;
143362306a36Sopenharmony_ci	}
143462306a36Sopenharmony_ci
143562306a36Sopenharmony_ci	return err ?: copied;
143662306a36Sopenharmony_ci}
143762306a36Sopenharmony_ci
143862306a36Sopenharmony_ci/* Clean up the receive buffer for full frames taken by the user,
143962306a36Sopenharmony_ci * then send an ACK if necessary.  COPIED is the number of bytes
144062306a36Sopenharmony_ci * tcp_recvmsg has given to the user so far, it speeds up the
144162306a36Sopenharmony_ci * calculation of whether or not we must ACK for the sake of
144262306a36Sopenharmony_ci * a window update.
144362306a36Sopenharmony_ci */
144462306a36Sopenharmony_civoid __tcp_cleanup_rbuf(struct sock *sk, int copied)
144562306a36Sopenharmony_ci{
144662306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
144762306a36Sopenharmony_ci	bool time_to_ack = false;
144862306a36Sopenharmony_ci
144962306a36Sopenharmony_ci	if (inet_csk_ack_scheduled(sk)) {
145062306a36Sopenharmony_ci		const struct inet_connection_sock *icsk = inet_csk(sk);
145162306a36Sopenharmony_ci
145262306a36Sopenharmony_ci		if (/* Once-per-two-segments ACK was not sent by tcp_input.c */
145362306a36Sopenharmony_ci		    tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
145462306a36Sopenharmony_ci		    /*
145562306a36Sopenharmony_ci		     * If this read emptied read buffer, we send ACK, if
145662306a36Sopenharmony_ci		     * connection is not bidirectional, user drained
145762306a36Sopenharmony_ci		     * receive buffer and there was a small segment
145862306a36Sopenharmony_ci		     * in queue.
145962306a36Sopenharmony_ci		     */
146062306a36Sopenharmony_ci		    (copied > 0 &&
146162306a36Sopenharmony_ci		     ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
146262306a36Sopenharmony_ci		      ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
146362306a36Sopenharmony_ci		       !inet_csk_in_pingpong_mode(sk))) &&
146462306a36Sopenharmony_ci		      !atomic_read(&sk->sk_rmem_alloc)))
146562306a36Sopenharmony_ci			time_to_ack = true;
146662306a36Sopenharmony_ci	}
146762306a36Sopenharmony_ci
146862306a36Sopenharmony_ci	/* We send an ACK if we can now advertise a non-zero window
146962306a36Sopenharmony_ci	 * which has been raised "significantly".
147062306a36Sopenharmony_ci	 *
147162306a36Sopenharmony_ci	 * Even if window raised up to infinity, do not send window open ACK
147262306a36Sopenharmony_ci	 * in states, where we will not receive more. It is useless.
147362306a36Sopenharmony_ci	 */
147462306a36Sopenharmony_ci	if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
147562306a36Sopenharmony_ci		__u32 rcv_window_now = tcp_receive_window(tp);
147662306a36Sopenharmony_ci
147762306a36Sopenharmony_ci		/* Optimize, __tcp_select_window() is not cheap. */
147862306a36Sopenharmony_ci		if (2*rcv_window_now <= tp->window_clamp) {
147962306a36Sopenharmony_ci			__u32 new_window = __tcp_select_window(sk);
148062306a36Sopenharmony_ci
148162306a36Sopenharmony_ci			/* Send ACK now, if this read freed lots of space
148262306a36Sopenharmony_ci			 * in our buffer. Certainly, new_window is new window.
148362306a36Sopenharmony_ci			 * We can advertise it now, if it is not less than current one.
148462306a36Sopenharmony_ci			 * "Lots" means "at least twice" here.
148562306a36Sopenharmony_ci			 */
148662306a36Sopenharmony_ci			if (new_window && new_window >= 2 * rcv_window_now)
148762306a36Sopenharmony_ci				time_to_ack = true;
148862306a36Sopenharmony_ci		}
148962306a36Sopenharmony_ci	}
149062306a36Sopenharmony_ci	if (time_to_ack)
149162306a36Sopenharmony_ci		tcp_send_ack(sk);
149262306a36Sopenharmony_ci}
149362306a36Sopenharmony_ci
149462306a36Sopenharmony_civoid tcp_cleanup_rbuf(struct sock *sk, int copied)
149562306a36Sopenharmony_ci{
149662306a36Sopenharmony_ci	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
149762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
149862306a36Sopenharmony_ci
149962306a36Sopenharmony_ci	WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
150062306a36Sopenharmony_ci	     "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
150162306a36Sopenharmony_ci	     tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
150262306a36Sopenharmony_ci	__tcp_cleanup_rbuf(sk, copied);
150362306a36Sopenharmony_ci}
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_cistatic void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
150662306a36Sopenharmony_ci{
150762306a36Sopenharmony_ci	__skb_unlink(skb, &sk->sk_receive_queue);
150862306a36Sopenharmony_ci	if (likely(skb->destructor == sock_rfree)) {
150962306a36Sopenharmony_ci		sock_rfree(skb);
151062306a36Sopenharmony_ci		skb->destructor = NULL;
151162306a36Sopenharmony_ci		skb->sk = NULL;
151262306a36Sopenharmony_ci		return skb_attempt_defer_free(skb);
151362306a36Sopenharmony_ci	}
151462306a36Sopenharmony_ci	__kfree_skb(skb);
151562306a36Sopenharmony_ci}
151662306a36Sopenharmony_ci
151762306a36Sopenharmony_cistruct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
151862306a36Sopenharmony_ci{
151962306a36Sopenharmony_ci	struct sk_buff *skb;
152062306a36Sopenharmony_ci	u32 offset;
152162306a36Sopenharmony_ci
152262306a36Sopenharmony_ci	while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
152362306a36Sopenharmony_ci		offset = seq - TCP_SKB_CB(skb)->seq;
152462306a36Sopenharmony_ci		if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
152562306a36Sopenharmony_ci			pr_err_once("%s: found a SYN, please report !\n", __func__);
152662306a36Sopenharmony_ci			offset--;
152762306a36Sopenharmony_ci		}
152862306a36Sopenharmony_ci		if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
152962306a36Sopenharmony_ci			*off = offset;
153062306a36Sopenharmony_ci			return skb;
153162306a36Sopenharmony_ci		}
153262306a36Sopenharmony_ci		/* This looks weird, but this can happen if TCP collapsing
153362306a36Sopenharmony_ci		 * splitted a fat GRO packet, while we released socket lock
153462306a36Sopenharmony_ci		 * in skb_splice_bits()
153562306a36Sopenharmony_ci		 */
153662306a36Sopenharmony_ci		tcp_eat_recv_skb(sk, skb);
153762306a36Sopenharmony_ci	}
153862306a36Sopenharmony_ci	return NULL;
153962306a36Sopenharmony_ci}
154062306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_recv_skb);
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_ci/*
154362306a36Sopenharmony_ci * This routine provides an alternative to tcp_recvmsg() for routines
154462306a36Sopenharmony_ci * that would like to handle copying from skbuffs directly in 'sendfile'
154562306a36Sopenharmony_ci * fashion.
154662306a36Sopenharmony_ci * Note:
154762306a36Sopenharmony_ci *	- It is assumed that the socket was locked by the caller.
154862306a36Sopenharmony_ci *	- The routine does not block.
154962306a36Sopenharmony_ci *	- At present, there is no support for reading OOB data
155062306a36Sopenharmony_ci *	  or for 'peeking' the socket using this routine
155162306a36Sopenharmony_ci *	  (although both would be easy to implement).
155262306a36Sopenharmony_ci */
155362306a36Sopenharmony_ciint tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
155462306a36Sopenharmony_ci		  sk_read_actor_t recv_actor)
155562306a36Sopenharmony_ci{
155662306a36Sopenharmony_ci	struct sk_buff *skb;
155762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
155862306a36Sopenharmony_ci	u32 seq = tp->copied_seq;
155962306a36Sopenharmony_ci	u32 offset;
156062306a36Sopenharmony_ci	int copied = 0;
156162306a36Sopenharmony_ci
156262306a36Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN)
156362306a36Sopenharmony_ci		return -ENOTCONN;
156462306a36Sopenharmony_ci	while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
156562306a36Sopenharmony_ci		if (offset < skb->len) {
156662306a36Sopenharmony_ci			int used;
156762306a36Sopenharmony_ci			size_t len;
156862306a36Sopenharmony_ci
156962306a36Sopenharmony_ci			len = skb->len - offset;
157062306a36Sopenharmony_ci			/* Stop reading if we hit a patch of urgent data */
157162306a36Sopenharmony_ci			if (unlikely(tp->urg_data)) {
157262306a36Sopenharmony_ci				u32 urg_offset = tp->urg_seq - seq;
157362306a36Sopenharmony_ci				if (urg_offset < len)
157462306a36Sopenharmony_ci					len = urg_offset;
157562306a36Sopenharmony_ci				if (!len)
157662306a36Sopenharmony_ci					break;
157762306a36Sopenharmony_ci			}
157862306a36Sopenharmony_ci			used = recv_actor(desc, skb, offset, len);
157962306a36Sopenharmony_ci			if (used <= 0) {
158062306a36Sopenharmony_ci				if (!copied)
158162306a36Sopenharmony_ci					copied = used;
158262306a36Sopenharmony_ci				break;
158362306a36Sopenharmony_ci			}
158462306a36Sopenharmony_ci			if (WARN_ON_ONCE(used > len))
158562306a36Sopenharmony_ci				used = len;
158662306a36Sopenharmony_ci			seq += used;
158762306a36Sopenharmony_ci			copied += used;
158862306a36Sopenharmony_ci			offset += used;
158962306a36Sopenharmony_ci
159062306a36Sopenharmony_ci			/* If recv_actor drops the lock (e.g. TCP splice
159162306a36Sopenharmony_ci			 * receive) the skb pointer might be invalid when
159262306a36Sopenharmony_ci			 * getting here: tcp_collapse might have deleted it
159362306a36Sopenharmony_ci			 * while aggregating skbs from the socket queue.
159462306a36Sopenharmony_ci			 */
159562306a36Sopenharmony_ci			skb = tcp_recv_skb(sk, seq - 1, &offset);
159662306a36Sopenharmony_ci			if (!skb)
159762306a36Sopenharmony_ci				break;
159862306a36Sopenharmony_ci			/* TCP coalescing might have appended data to the skb.
159962306a36Sopenharmony_ci			 * Try to splice more frags
160062306a36Sopenharmony_ci			 */
160162306a36Sopenharmony_ci			if (offset + 1 != skb->len)
160262306a36Sopenharmony_ci				continue;
160362306a36Sopenharmony_ci		}
160462306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
160562306a36Sopenharmony_ci			tcp_eat_recv_skb(sk, skb);
160662306a36Sopenharmony_ci			++seq;
160762306a36Sopenharmony_ci			break;
160862306a36Sopenharmony_ci		}
160962306a36Sopenharmony_ci		tcp_eat_recv_skb(sk, skb);
161062306a36Sopenharmony_ci		if (!desc->count)
161162306a36Sopenharmony_ci			break;
161262306a36Sopenharmony_ci		WRITE_ONCE(tp->copied_seq, seq);
161362306a36Sopenharmony_ci	}
161462306a36Sopenharmony_ci	WRITE_ONCE(tp->copied_seq, seq);
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_ci	tcp_rcv_space_adjust(sk);
161762306a36Sopenharmony_ci
161862306a36Sopenharmony_ci	/* Clean up data we have read: This will do ACK frames. */
161962306a36Sopenharmony_ci	if (copied > 0) {
162062306a36Sopenharmony_ci		tcp_recv_skb(sk, seq, &offset);
162162306a36Sopenharmony_ci		tcp_cleanup_rbuf(sk, copied);
162262306a36Sopenharmony_ci	}
162362306a36Sopenharmony_ci	return copied;
162462306a36Sopenharmony_ci}
162562306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_read_sock);
162662306a36Sopenharmony_ci
162762306a36Sopenharmony_ciint tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
162862306a36Sopenharmony_ci{
162962306a36Sopenharmony_ci	struct sk_buff *skb;
163062306a36Sopenharmony_ci	int copied = 0;
163162306a36Sopenharmony_ci
163262306a36Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN)
163362306a36Sopenharmony_ci		return -ENOTCONN;
163462306a36Sopenharmony_ci
163562306a36Sopenharmony_ci	while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
163662306a36Sopenharmony_ci		u8 tcp_flags;
163762306a36Sopenharmony_ci		int used;
163862306a36Sopenharmony_ci
163962306a36Sopenharmony_ci		__skb_unlink(skb, &sk->sk_receive_queue);
164062306a36Sopenharmony_ci		WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
164162306a36Sopenharmony_ci		tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
164262306a36Sopenharmony_ci		used = recv_actor(sk, skb);
164362306a36Sopenharmony_ci		if (used < 0) {
164462306a36Sopenharmony_ci			if (!copied)
164562306a36Sopenharmony_ci				copied = used;
164662306a36Sopenharmony_ci			break;
164762306a36Sopenharmony_ci		}
164862306a36Sopenharmony_ci		copied += used;
164962306a36Sopenharmony_ci
165062306a36Sopenharmony_ci		if (tcp_flags & TCPHDR_FIN)
165162306a36Sopenharmony_ci			break;
165262306a36Sopenharmony_ci	}
165362306a36Sopenharmony_ci	return copied;
165462306a36Sopenharmony_ci}
165562306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_read_skb);
165662306a36Sopenharmony_ci
165762306a36Sopenharmony_civoid tcp_read_done(struct sock *sk, size_t len)
165862306a36Sopenharmony_ci{
165962306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
166062306a36Sopenharmony_ci	u32 seq = tp->copied_seq;
166162306a36Sopenharmony_ci	struct sk_buff *skb;
166262306a36Sopenharmony_ci	size_t left;
166362306a36Sopenharmony_ci	u32 offset;
166462306a36Sopenharmony_ci
166562306a36Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN)
166662306a36Sopenharmony_ci		return;
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_ci	left = len;
166962306a36Sopenharmony_ci	while (left && (skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
167062306a36Sopenharmony_ci		int used;
167162306a36Sopenharmony_ci
167262306a36Sopenharmony_ci		used = min_t(size_t, skb->len - offset, left);
167362306a36Sopenharmony_ci		seq += used;
167462306a36Sopenharmony_ci		left -= used;
167562306a36Sopenharmony_ci
167662306a36Sopenharmony_ci		if (skb->len > offset + used)
167762306a36Sopenharmony_ci			break;
167862306a36Sopenharmony_ci
167962306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
168062306a36Sopenharmony_ci			tcp_eat_recv_skb(sk, skb);
168162306a36Sopenharmony_ci			++seq;
168262306a36Sopenharmony_ci			break;
168362306a36Sopenharmony_ci		}
168462306a36Sopenharmony_ci		tcp_eat_recv_skb(sk, skb);
168562306a36Sopenharmony_ci	}
168662306a36Sopenharmony_ci	WRITE_ONCE(tp->copied_seq, seq);
168762306a36Sopenharmony_ci
168862306a36Sopenharmony_ci	tcp_rcv_space_adjust(sk);
168962306a36Sopenharmony_ci
169062306a36Sopenharmony_ci	/* Clean up data we have read: This will do ACK frames. */
169162306a36Sopenharmony_ci	if (left != len)
169262306a36Sopenharmony_ci		tcp_cleanup_rbuf(sk, len - left);
169362306a36Sopenharmony_ci}
169462306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_read_done);
169562306a36Sopenharmony_ci
169662306a36Sopenharmony_ciint tcp_peek_len(struct socket *sock)
169762306a36Sopenharmony_ci{
169862306a36Sopenharmony_ci	return tcp_inq(sock->sk);
169962306a36Sopenharmony_ci}
170062306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_peek_len);
170162306a36Sopenharmony_ci
170262306a36Sopenharmony_ci/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
170362306a36Sopenharmony_ciint tcp_set_rcvlowat(struct sock *sk, int val)
170462306a36Sopenharmony_ci{
170562306a36Sopenharmony_ci	int space, cap;
170662306a36Sopenharmony_ci
170762306a36Sopenharmony_ci	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
170862306a36Sopenharmony_ci		cap = sk->sk_rcvbuf >> 1;
170962306a36Sopenharmony_ci	else
171062306a36Sopenharmony_ci		cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
171162306a36Sopenharmony_ci	val = min(val, cap);
171262306a36Sopenharmony_ci	WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
171362306a36Sopenharmony_ci
171462306a36Sopenharmony_ci	/* Check if we need to signal EPOLLIN right now */
171562306a36Sopenharmony_ci	tcp_data_ready(sk);
171662306a36Sopenharmony_ci
171762306a36Sopenharmony_ci	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
171862306a36Sopenharmony_ci		return 0;
171962306a36Sopenharmony_ci
172062306a36Sopenharmony_ci	space = tcp_space_from_win(sk, val);
172162306a36Sopenharmony_ci	if (space > sk->sk_rcvbuf) {
172262306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_rcvbuf, space);
172362306a36Sopenharmony_ci		tcp_sk(sk)->window_clamp = val;
172462306a36Sopenharmony_ci	}
172562306a36Sopenharmony_ci	return 0;
172662306a36Sopenharmony_ci}
172762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_set_rcvlowat);
172862306a36Sopenharmony_ci
172962306a36Sopenharmony_civoid tcp_update_recv_tstamps(struct sk_buff *skb,
173062306a36Sopenharmony_ci			     struct scm_timestamping_internal *tss)
173162306a36Sopenharmony_ci{
173262306a36Sopenharmony_ci	if (skb->tstamp)
173362306a36Sopenharmony_ci		tss->ts[0] = ktime_to_timespec64(skb->tstamp);
173462306a36Sopenharmony_ci	else
173562306a36Sopenharmony_ci		tss->ts[0] = (struct timespec64) {0};
173662306a36Sopenharmony_ci
173762306a36Sopenharmony_ci	if (skb_hwtstamps(skb)->hwtstamp)
173862306a36Sopenharmony_ci		tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
173962306a36Sopenharmony_ci	else
174062306a36Sopenharmony_ci		tss->ts[2] = (struct timespec64) {0};
174162306a36Sopenharmony_ci}
174262306a36Sopenharmony_ci
174362306a36Sopenharmony_ci#ifdef CONFIG_MMU
174462306a36Sopenharmony_cistatic const struct vm_operations_struct tcp_vm_ops = {
174562306a36Sopenharmony_ci};
174662306a36Sopenharmony_ci
174762306a36Sopenharmony_ciint tcp_mmap(struct file *file, struct socket *sock,
174862306a36Sopenharmony_ci	     struct vm_area_struct *vma)
174962306a36Sopenharmony_ci{
175062306a36Sopenharmony_ci	if (vma->vm_flags & (VM_WRITE | VM_EXEC))
175162306a36Sopenharmony_ci		return -EPERM;
175262306a36Sopenharmony_ci	vm_flags_clear(vma, VM_MAYWRITE | VM_MAYEXEC);
175362306a36Sopenharmony_ci
175462306a36Sopenharmony_ci	/* Instruct vm_insert_page() to not mmap_read_lock(mm) */
175562306a36Sopenharmony_ci	vm_flags_set(vma, VM_MIXEDMAP);
175662306a36Sopenharmony_ci
175762306a36Sopenharmony_ci	vma->vm_ops = &tcp_vm_ops;
175862306a36Sopenharmony_ci	return 0;
175962306a36Sopenharmony_ci}
176062306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_mmap);
176162306a36Sopenharmony_ci
176262306a36Sopenharmony_cistatic skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
176362306a36Sopenharmony_ci				       u32 *offset_frag)
176462306a36Sopenharmony_ci{
176562306a36Sopenharmony_ci	skb_frag_t *frag;
176662306a36Sopenharmony_ci
176762306a36Sopenharmony_ci	if (unlikely(offset_skb >= skb->len))
176862306a36Sopenharmony_ci		return NULL;
176962306a36Sopenharmony_ci
177062306a36Sopenharmony_ci	offset_skb -= skb_headlen(skb);
177162306a36Sopenharmony_ci	if ((int)offset_skb < 0 || skb_has_frag_list(skb))
177262306a36Sopenharmony_ci		return NULL;
177362306a36Sopenharmony_ci
177462306a36Sopenharmony_ci	frag = skb_shinfo(skb)->frags;
177562306a36Sopenharmony_ci	while (offset_skb) {
177662306a36Sopenharmony_ci		if (skb_frag_size(frag) > offset_skb) {
177762306a36Sopenharmony_ci			*offset_frag = offset_skb;
177862306a36Sopenharmony_ci			return frag;
177962306a36Sopenharmony_ci		}
178062306a36Sopenharmony_ci		offset_skb -= skb_frag_size(frag);
178162306a36Sopenharmony_ci		++frag;
178262306a36Sopenharmony_ci	}
178362306a36Sopenharmony_ci	*offset_frag = 0;
178462306a36Sopenharmony_ci	return frag;
178562306a36Sopenharmony_ci}
178662306a36Sopenharmony_ci
178762306a36Sopenharmony_cistatic bool can_map_frag(const skb_frag_t *frag)
178862306a36Sopenharmony_ci{
178962306a36Sopenharmony_ci	struct page *page;
179062306a36Sopenharmony_ci
179162306a36Sopenharmony_ci	if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag))
179262306a36Sopenharmony_ci		return false;
179362306a36Sopenharmony_ci
179462306a36Sopenharmony_ci	page = skb_frag_page(frag);
179562306a36Sopenharmony_ci
179662306a36Sopenharmony_ci	if (PageCompound(page) || page->mapping)
179762306a36Sopenharmony_ci		return false;
179862306a36Sopenharmony_ci
179962306a36Sopenharmony_ci	return true;
180062306a36Sopenharmony_ci}
180162306a36Sopenharmony_ci
180262306a36Sopenharmony_cistatic int find_next_mappable_frag(const skb_frag_t *frag,
180362306a36Sopenharmony_ci				   int remaining_in_skb)
180462306a36Sopenharmony_ci{
180562306a36Sopenharmony_ci	int offset = 0;
180662306a36Sopenharmony_ci
180762306a36Sopenharmony_ci	if (likely(can_map_frag(frag)))
180862306a36Sopenharmony_ci		return 0;
180962306a36Sopenharmony_ci
181062306a36Sopenharmony_ci	while (offset < remaining_in_skb && !can_map_frag(frag)) {
181162306a36Sopenharmony_ci		offset += skb_frag_size(frag);
181262306a36Sopenharmony_ci		++frag;
181362306a36Sopenharmony_ci	}
181462306a36Sopenharmony_ci	return offset;
181562306a36Sopenharmony_ci}
181662306a36Sopenharmony_ci
181762306a36Sopenharmony_cistatic void tcp_zerocopy_set_hint_for_skb(struct sock *sk,
181862306a36Sopenharmony_ci					  struct tcp_zerocopy_receive *zc,
181962306a36Sopenharmony_ci					  struct sk_buff *skb, u32 offset)
182062306a36Sopenharmony_ci{
182162306a36Sopenharmony_ci	u32 frag_offset, partial_frag_remainder = 0;
182262306a36Sopenharmony_ci	int mappable_offset;
182362306a36Sopenharmony_ci	skb_frag_t *frag;
182462306a36Sopenharmony_ci
182562306a36Sopenharmony_ci	/* worst case: skip to next skb. try to improve on this case below */
182662306a36Sopenharmony_ci	zc->recv_skip_hint = skb->len - offset;
182762306a36Sopenharmony_ci
182862306a36Sopenharmony_ci	/* Find the frag containing this offset (and how far into that frag) */
182962306a36Sopenharmony_ci	frag = skb_advance_to_frag(skb, offset, &frag_offset);
183062306a36Sopenharmony_ci	if (!frag)
183162306a36Sopenharmony_ci		return;
183262306a36Sopenharmony_ci
183362306a36Sopenharmony_ci	if (frag_offset) {
183462306a36Sopenharmony_ci		struct skb_shared_info *info = skb_shinfo(skb);
183562306a36Sopenharmony_ci
183662306a36Sopenharmony_ci		/* We read part of the last frag, must recvmsg() rest of skb. */
183762306a36Sopenharmony_ci		if (frag == &info->frags[info->nr_frags - 1])
183862306a36Sopenharmony_ci			return;
183962306a36Sopenharmony_ci
184062306a36Sopenharmony_ci		/* Else, we must at least read the remainder in this frag. */
184162306a36Sopenharmony_ci		partial_frag_remainder = skb_frag_size(frag) - frag_offset;
184262306a36Sopenharmony_ci		zc->recv_skip_hint -= partial_frag_remainder;
184362306a36Sopenharmony_ci		++frag;
184462306a36Sopenharmony_ci	}
184562306a36Sopenharmony_ci
184662306a36Sopenharmony_ci	/* partial_frag_remainder: If part way through a frag, must read rest.
184762306a36Sopenharmony_ci	 * mappable_offset: Bytes till next mappable frag, *not* counting bytes
184862306a36Sopenharmony_ci	 * in partial_frag_remainder.
184962306a36Sopenharmony_ci	 */
185062306a36Sopenharmony_ci	mappable_offset = find_next_mappable_frag(frag, zc->recv_skip_hint);
185162306a36Sopenharmony_ci	zc->recv_skip_hint = mappable_offset + partial_frag_remainder;
185262306a36Sopenharmony_ci}
185362306a36Sopenharmony_ci
185462306a36Sopenharmony_cistatic int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
185562306a36Sopenharmony_ci			      int flags, struct scm_timestamping_internal *tss,
185662306a36Sopenharmony_ci			      int *cmsg_flags);
185762306a36Sopenharmony_cistatic int receive_fallback_to_copy(struct sock *sk,
185862306a36Sopenharmony_ci				    struct tcp_zerocopy_receive *zc, int inq,
185962306a36Sopenharmony_ci				    struct scm_timestamping_internal *tss)
186062306a36Sopenharmony_ci{
186162306a36Sopenharmony_ci	unsigned long copy_address = (unsigned long)zc->copybuf_address;
186262306a36Sopenharmony_ci	struct msghdr msg = {};
186362306a36Sopenharmony_ci	struct iovec iov;
186462306a36Sopenharmony_ci	int err;
186562306a36Sopenharmony_ci
186662306a36Sopenharmony_ci	zc->length = 0;
186762306a36Sopenharmony_ci	zc->recv_skip_hint = 0;
186862306a36Sopenharmony_ci
186962306a36Sopenharmony_ci	if (copy_address != zc->copybuf_address)
187062306a36Sopenharmony_ci		return -EINVAL;
187162306a36Sopenharmony_ci
187262306a36Sopenharmony_ci	err = import_single_range(ITER_DEST, (void __user *)copy_address,
187362306a36Sopenharmony_ci				  inq, &iov, &msg.msg_iter);
187462306a36Sopenharmony_ci	if (err)
187562306a36Sopenharmony_ci		return err;
187662306a36Sopenharmony_ci
187762306a36Sopenharmony_ci	err = tcp_recvmsg_locked(sk, &msg, inq, MSG_DONTWAIT,
187862306a36Sopenharmony_ci				 tss, &zc->msg_flags);
187962306a36Sopenharmony_ci	if (err < 0)
188062306a36Sopenharmony_ci		return err;
188162306a36Sopenharmony_ci
188262306a36Sopenharmony_ci	zc->copybuf_len = err;
188362306a36Sopenharmony_ci	if (likely(zc->copybuf_len)) {
188462306a36Sopenharmony_ci		struct sk_buff *skb;
188562306a36Sopenharmony_ci		u32 offset;
188662306a36Sopenharmony_ci
188762306a36Sopenharmony_ci		skb = tcp_recv_skb(sk, tcp_sk(sk)->copied_seq, &offset);
188862306a36Sopenharmony_ci		if (skb)
188962306a36Sopenharmony_ci			tcp_zerocopy_set_hint_for_skb(sk, zc, skb, offset);
189062306a36Sopenharmony_ci	}
189162306a36Sopenharmony_ci	return 0;
189262306a36Sopenharmony_ci}
189362306a36Sopenharmony_ci
189462306a36Sopenharmony_cistatic int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
189562306a36Sopenharmony_ci				   struct sk_buff *skb, u32 copylen,
189662306a36Sopenharmony_ci				   u32 *offset, u32 *seq)
189762306a36Sopenharmony_ci{
189862306a36Sopenharmony_ci	unsigned long copy_address = (unsigned long)zc->copybuf_address;
189962306a36Sopenharmony_ci	struct msghdr msg = {};
190062306a36Sopenharmony_ci	struct iovec iov;
190162306a36Sopenharmony_ci	int err;
190262306a36Sopenharmony_ci
190362306a36Sopenharmony_ci	if (copy_address != zc->copybuf_address)
190462306a36Sopenharmony_ci		return -EINVAL;
190562306a36Sopenharmony_ci
190662306a36Sopenharmony_ci	err = import_single_range(ITER_DEST, (void __user *)copy_address,
190762306a36Sopenharmony_ci				  copylen, &iov, &msg.msg_iter);
190862306a36Sopenharmony_ci	if (err)
190962306a36Sopenharmony_ci		return err;
191062306a36Sopenharmony_ci	err = skb_copy_datagram_msg(skb, *offset, &msg, copylen);
191162306a36Sopenharmony_ci	if (err)
191262306a36Sopenharmony_ci		return err;
191362306a36Sopenharmony_ci	zc->recv_skip_hint -= copylen;
191462306a36Sopenharmony_ci	*offset += copylen;
191562306a36Sopenharmony_ci	*seq += copylen;
191662306a36Sopenharmony_ci	return (__s32)copylen;
191762306a36Sopenharmony_ci}
191862306a36Sopenharmony_ci
191962306a36Sopenharmony_cistatic int tcp_zc_handle_leftover(struct tcp_zerocopy_receive *zc,
192062306a36Sopenharmony_ci				  struct sock *sk,
192162306a36Sopenharmony_ci				  struct sk_buff *skb,
192262306a36Sopenharmony_ci				  u32 *seq,
192362306a36Sopenharmony_ci				  s32 copybuf_len,
192462306a36Sopenharmony_ci				  struct scm_timestamping_internal *tss)
192562306a36Sopenharmony_ci{
192662306a36Sopenharmony_ci	u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint);
192762306a36Sopenharmony_ci
192862306a36Sopenharmony_ci	if (!copylen)
192962306a36Sopenharmony_ci		return 0;
193062306a36Sopenharmony_ci	/* skb is null if inq < PAGE_SIZE. */
193162306a36Sopenharmony_ci	if (skb) {
193262306a36Sopenharmony_ci		offset = *seq - TCP_SKB_CB(skb)->seq;
193362306a36Sopenharmony_ci	} else {
193462306a36Sopenharmony_ci		skb = tcp_recv_skb(sk, *seq, &offset);
193562306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->has_rxtstamp) {
193662306a36Sopenharmony_ci			tcp_update_recv_tstamps(skb, tss);
193762306a36Sopenharmony_ci			zc->msg_flags |= TCP_CMSG_TS;
193862306a36Sopenharmony_ci		}
193962306a36Sopenharmony_ci	}
194062306a36Sopenharmony_ci
194162306a36Sopenharmony_ci	zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset,
194262306a36Sopenharmony_ci						  seq);
194362306a36Sopenharmony_ci	return zc->copybuf_len < 0 ? 0 : copylen;
194462306a36Sopenharmony_ci}
194562306a36Sopenharmony_ci
194662306a36Sopenharmony_cistatic int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma,
194762306a36Sopenharmony_ci					      struct page **pending_pages,
194862306a36Sopenharmony_ci					      unsigned long pages_remaining,
194962306a36Sopenharmony_ci					      unsigned long *address,
195062306a36Sopenharmony_ci					      u32 *length,
195162306a36Sopenharmony_ci					      u32 *seq,
195262306a36Sopenharmony_ci					      struct tcp_zerocopy_receive *zc,
195362306a36Sopenharmony_ci					      u32 total_bytes_to_map,
195462306a36Sopenharmony_ci					      int err)
195562306a36Sopenharmony_ci{
195662306a36Sopenharmony_ci	/* At least one page did not map. Try zapping if we skipped earlier. */
195762306a36Sopenharmony_ci	if (err == -EBUSY &&
195862306a36Sopenharmony_ci	    zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT) {
195962306a36Sopenharmony_ci		u32 maybe_zap_len;
196062306a36Sopenharmony_ci
196162306a36Sopenharmony_ci		maybe_zap_len = total_bytes_to_map -  /* All bytes to map */
196262306a36Sopenharmony_ci				*length + /* Mapped or pending */
196362306a36Sopenharmony_ci				(pages_remaining * PAGE_SIZE); /* Failed map. */
196462306a36Sopenharmony_ci		zap_page_range_single(vma, *address, maybe_zap_len, NULL);
196562306a36Sopenharmony_ci		err = 0;
196662306a36Sopenharmony_ci	}
196762306a36Sopenharmony_ci
196862306a36Sopenharmony_ci	if (!err) {
196962306a36Sopenharmony_ci		unsigned long leftover_pages = pages_remaining;
197062306a36Sopenharmony_ci		int bytes_mapped;
197162306a36Sopenharmony_ci
197262306a36Sopenharmony_ci		/* We called zap_page_range_single, try to reinsert. */
197362306a36Sopenharmony_ci		err = vm_insert_pages(vma, *address,
197462306a36Sopenharmony_ci				      pending_pages,
197562306a36Sopenharmony_ci				      &pages_remaining);
197662306a36Sopenharmony_ci		bytes_mapped = PAGE_SIZE * (leftover_pages - pages_remaining);
197762306a36Sopenharmony_ci		*seq += bytes_mapped;
197862306a36Sopenharmony_ci		*address += bytes_mapped;
197962306a36Sopenharmony_ci	}
198062306a36Sopenharmony_ci	if (err) {
198162306a36Sopenharmony_ci		/* Either we were unable to zap, OR we zapped, retried an
198262306a36Sopenharmony_ci		 * insert, and still had an issue. Either ways, pages_remaining
198362306a36Sopenharmony_ci		 * is the number of pages we were unable to map, and we unroll
198462306a36Sopenharmony_ci		 * some state we speculatively touched before.
198562306a36Sopenharmony_ci		 */
198662306a36Sopenharmony_ci		const int bytes_not_mapped = PAGE_SIZE * pages_remaining;
198762306a36Sopenharmony_ci
198862306a36Sopenharmony_ci		*length -= bytes_not_mapped;
198962306a36Sopenharmony_ci		zc->recv_skip_hint += bytes_not_mapped;
199062306a36Sopenharmony_ci	}
199162306a36Sopenharmony_ci	return err;
199262306a36Sopenharmony_ci}
199362306a36Sopenharmony_ci
199462306a36Sopenharmony_cistatic int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
199562306a36Sopenharmony_ci					struct page **pages,
199662306a36Sopenharmony_ci					unsigned int pages_to_map,
199762306a36Sopenharmony_ci					unsigned long *address,
199862306a36Sopenharmony_ci					u32 *length,
199962306a36Sopenharmony_ci					u32 *seq,
200062306a36Sopenharmony_ci					struct tcp_zerocopy_receive *zc,
200162306a36Sopenharmony_ci					u32 total_bytes_to_map)
200262306a36Sopenharmony_ci{
200362306a36Sopenharmony_ci	unsigned long pages_remaining = pages_to_map;
200462306a36Sopenharmony_ci	unsigned int pages_mapped;
200562306a36Sopenharmony_ci	unsigned int bytes_mapped;
200662306a36Sopenharmony_ci	int err;
200762306a36Sopenharmony_ci
200862306a36Sopenharmony_ci	err = vm_insert_pages(vma, *address, pages, &pages_remaining);
200962306a36Sopenharmony_ci	pages_mapped = pages_to_map - (unsigned int)pages_remaining;
201062306a36Sopenharmony_ci	bytes_mapped = PAGE_SIZE * pages_mapped;
201162306a36Sopenharmony_ci	/* Even if vm_insert_pages fails, it may have partially succeeded in
201262306a36Sopenharmony_ci	 * mapping (some but not all of the pages).
201362306a36Sopenharmony_ci	 */
201462306a36Sopenharmony_ci	*seq += bytes_mapped;
201562306a36Sopenharmony_ci	*address += bytes_mapped;
201662306a36Sopenharmony_ci
201762306a36Sopenharmony_ci	if (likely(!err))
201862306a36Sopenharmony_ci		return 0;
201962306a36Sopenharmony_ci
202062306a36Sopenharmony_ci	/* Error: maybe zap and retry + rollback state for failed inserts. */
202162306a36Sopenharmony_ci	return tcp_zerocopy_vm_insert_batch_error(vma, pages + pages_mapped,
202262306a36Sopenharmony_ci		pages_remaining, address, length, seq, zc, total_bytes_to_map,
202362306a36Sopenharmony_ci		err);
202462306a36Sopenharmony_ci}
202562306a36Sopenharmony_ci
202662306a36Sopenharmony_ci#define TCP_VALID_ZC_MSG_FLAGS   (TCP_CMSG_TS)
202762306a36Sopenharmony_cistatic void tcp_zc_finalize_rx_tstamp(struct sock *sk,
202862306a36Sopenharmony_ci				      struct tcp_zerocopy_receive *zc,
202962306a36Sopenharmony_ci				      struct scm_timestamping_internal *tss)
203062306a36Sopenharmony_ci{
203162306a36Sopenharmony_ci	unsigned long msg_control_addr;
203262306a36Sopenharmony_ci	struct msghdr cmsg_dummy;
203362306a36Sopenharmony_ci
203462306a36Sopenharmony_ci	msg_control_addr = (unsigned long)zc->msg_control;
203562306a36Sopenharmony_ci	cmsg_dummy.msg_control_user = (void __user *)msg_control_addr;
203662306a36Sopenharmony_ci	cmsg_dummy.msg_controllen =
203762306a36Sopenharmony_ci		(__kernel_size_t)zc->msg_controllen;
203862306a36Sopenharmony_ci	cmsg_dummy.msg_flags = in_compat_syscall()
203962306a36Sopenharmony_ci		? MSG_CMSG_COMPAT : 0;
204062306a36Sopenharmony_ci	cmsg_dummy.msg_control_is_user = true;
204162306a36Sopenharmony_ci	zc->msg_flags = 0;
204262306a36Sopenharmony_ci	if (zc->msg_control == msg_control_addr &&
204362306a36Sopenharmony_ci	    zc->msg_controllen == cmsg_dummy.msg_controllen) {
204462306a36Sopenharmony_ci		tcp_recv_timestamp(&cmsg_dummy, sk, tss);
204562306a36Sopenharmony_ci		zc->msg_control = (__u64)
204662306a36Sopenharmony_ci			((uintptr_t)cmsg_dummy.msg_control_user);
204762306a36Sopenharmony_ci		zc->msg_controllen =
204862306a36Sopenharmony_ci			(__u64)cmsg_dummy.msg_controllen;
204962306a36Sopenharmony_ci		zc->msg_flags = (__u32)cmsg_dummy.msg_flags;
205062306a36Sopenharmony_ci	}
205162306a36Sopenharmony_ci}
205262306a36Sopenharmony_ci
205362306a36Sopenharmony_cistatic struct vm_area_struct *find_tcp_vma(struct mm_struct *mm,
205462306a36Sopenharmony_ci					   unsigned long address,
205562306a36Sopenharmony_ci					   bool *mmap_locked)
205662306a36Sopenharmony_ci{
205762306a36Sopenharmony_ci	struct vm_area_struct *vma = lock_vma_under_rcu(mm, address);
205862306a36Sopenharmony_ci
205962306a36Sopenharmony_ci	if (vma) {
206062306a36Sopenharmony_ci		if (vma->vm_ops != &tcp_vm_ops) {
206162306a36Sopenharmony_ci			vma_end_read(vma);
206262306a36Sopenharmony_ci			return NULL;
206362306a36Sopenharmony_ci		}
206462306a36Sopenharmony_ci		*mmap_locked = false;
206562306a36Sopenharmony_ci		return vma;
206662306a36Sopenharmony_ci	}
206762306a36Sopenharmony_ci
206862306a36Sopenharmony_ci	mmap_read_lock(mm);
206962306a36Sopenharmony_ci	vma = vma_lookup(mm, address);
207062306a36Sopenharmony_ci	if (!vma || vma->vm_ops != &tcp_vm_ops) {
207162306a36Sopenharmony_ci		mmap_read_unlock(mm);
207262306a36Sopenharmony_ci		return NULL;
207362306a36Sopenharmony_ci	}
207462306a36Sopenharmony_ci	*mmap_locked = true;
207562306a36Sopenharmony_ci	return vma;
207662306a36Sopenharmony_ci}
207762306a36Sopenharmony_ci
207862306a36Sopenharmony_ci#define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32
207962306a36Sopenharmony_cistatic int tcp_zerocopy_receive(struct sock *sk,
208062306a36Sopenharmony_ci				struct tcp_zerocopy_receive *zc,
208162306a36Sopenharmony_ci				struct scm_timestamping_internal *tss)
208262306a36Sopenharmony_ci{
208362306a36Sopenharmony_ci	u32 length = 0, offset, vma_len, avail_len, copylen = 0;
208462306a36Sopenharmony_ci	unsigned long address = (unsigned long)zc->address;
208562306a36Sopenharmony_ci	struct page *pages[TCP_ZEROCOPY_PAGE_BATCH_SIZE];
208662306a36Sopenharmony_ci	s32 copybuf_len = zc->copybuf_len;
208762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
208862306a36Sopenharmony_ci	const skb_frag_t *frags = NULL;
208962306a36Sopenharmony_ci	unsigned int pages_to_map = 0;
209062306a36Sopenharmony_ci	struct vm_area_struct *vma;
209162306a36Sopenharmony_ci	struct sk_buff *skb = NULL;
209262306a36Sopenharmony_ci	u32 seq = tp->copied_seq;
209362306a36Sopenharmony_ci	u32 total_bytes_to_map;
209462306a36Sopenharmony_ci	int inq = tcp_inq(sk);
209562306a36Sopenharmony_ci	bool mmap_locked;
209662306a36Sopenharmony_ci	int ret;
209762306a36Sopenharmony_ci
209862306a36Sopenharmony_ci	zc->copybuf_len = 0;
209962306a36Sopenharmony_ci	zc->msg_flags = 0;
210062306a36Sopenharmony_ci
210162306a36Sopenharmony_ci	if (address & (PAGE_SIZE - 1) || address != zc->address)
210262306a36Sopenharmony_ci		return -EINVAL;
210362306a36Sopenharmony_ci
210462306a36Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN)
210562306a36Sopenharmony_ci		return -ENOTCONN;
210662306a36Sopenharmony_ci
210762306a36Sopenharmony_ci	sock_rps_record_flow(sk);
210862306a36Sopenharmony_ci
210962306a36Sopenharmony_ci	if (inq && inq <= copybuf_len)
211062306a36Sopenharmony_ci		return receive_fallback_to_copy(sk, zc, inq, tss);
211162306a36Sopenharmony_ci
211262306a36Sopenharmony_ci	if (inq < PAGE_SIZE) {
211362306a36Sopenharmony_ci		zc->length = 0;
211462306a36Sopenharmony_ci		zc->recv_skip_hint = inq;
211562306a36Sopenharmony_ci		if (!inq && sock_flag(sk, SOCK_DONE))
211662306a36Sopenharmony_ci			return -EIO;
211762306a36Sopenharmony_ci		return 0;
211862306a36Sopenharmony_ci	}
211962306a36Sopenharmony_ci
212062306a36Sopenharmony_ci	vma = find_tcp_vma(current->mm, address, &mmap_locked);
212162306a36Sopenharmony_ci	if (!vma)
212262306a36Sopenharmony_ci		return -EINVAL;
212362306a36Sopenharmony_ci
212462306a36Sopenharmony_ci	vma_len = min_t(unsigned long, zc->length, vma->vm_end - address);
212562306a36Sopenharmony_ci	avail_len = min_t(u32, vma_len, inq);
212662306a36Sopenharmony_ci	total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
212762306a36Sopenharmony_ci	if (total_bytes_to_map) {
212862306a36Sopenharmony_ci		if (!(zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT))
212962306a36Sopenharmony_ci			zap_page_range_single(vma, address, total_bytes_to_map,
213062306a36Sopenharmony_ci					      NULL);
213162306a36Sopenharmony_ci		zc->length = total_bytes_to_map;
213262306a36Sopenharmony_ci		zc->recv_skip_hint = 0;
213362306a36Sopenharmony_ci	} else {
213462306a36Sopenharmony_ci		zc->length = avail_len;
213562306a36Sopenharmony_ci		zc->recv_skip_hint = avail_len;
213662306a36Sopenharmony_ci	}
213762306a36Sopenharmony_ci	ret = 0;
213862306a36Sopenharmony_ci	while (length + PAGE_SIZE <= zc->length) {
213962306a36Sopenharmony_ci		int mappable_offset;
214062306a36Sopenharmony_ci		struct page *page;
214162306a36Sopenharmony_ci
214262306a36Sopenharmony_ci		if (zc->recv_skip_hint < PAGE_SIZE) {
214362306a36Sopenharmony_ci			u32 offset_frag;
214462306a36Sopenharmony_ci
214562306a36Sopenharmony_ci			if (skb) {
214662306a36Sopenharmony_ci				if (zc->recv_skip_hint > 0)
214762306a36Sopenharmony_ci					break;
214862306a36Sopenharmony_ci				skb = skb->next;
214962306a36Sopenharmony_ci				offset = seq - TCP_SKB_CB(skb)->seq;
215062306a36Sopenharmony_ci			} else {
215162306a36Sopenharmony_ci				skb = tcp_recv_skb(sk, seq, &offset);
215262306a36Sopenharmony_ci			}
215362306a36Sopenharmony_ci
215462306a36Sopenharmony_ci			if (TCP_SKB_CB(skb)->has_rxtstamp) {
215562306a36Sopenharmony_ci				tcp_update_recv_tstamps(skb, tss);
215662306a36Sopenharmony_ci				zc->msg_flags |= TCP_CMSG_TS;
215762306a36Sopenharmony_ci			}
215862306a36Sopenharmony_ci			zc->recv_skip_hint = skb->len - offset;
215962306a36Sopenharmony_ci			frags = skb_advance_to_frag(skb, offset, &offset_frag);
216062306a36Sopenharmony_ci			if (!frags || offset_frag)
216162306a36Sopenharmony_ci				break;
216262306a36Sopenharmony_ci		}
216362306a36Sopenharmony_ci
216462306a36Sopenharmony_ci		mappable_offset = find_next_mappable_frag(frags,
216562306a36Sopenharmony_ci							  zc->recv_skip_hint);
216662306a36Sopenharmony_ci		if (mappable_offset) {
216762306a36Sopenharmony_ci			zc->recv_skip_hint = mappable_offset;
216862306a36Sopenharmony_ci			break;
216962306a36Sopenharmony_ci		}
217062306a36Sopenharmony_ci		page = skb_frag_page(frags);
217162306a36Sopenharmony_ci		prefetchw(page);
217262306a36Sopenharmony_ci		pages[pages_to_map++] = page;
217362306a36Sopenharmony_ci		length += PAGE_SIZE;
217462306a36Sopenharmony_ci		zc->recv_skip_hint -= PAGE_SIZE;
217562306a36Sopenharmony_ci		frags++;
217662306a36Sopenharmony_ci		if (pages_to_map == TCP_ZEROCOPY_PAGE_BATCH_SIZE ||
217762306a36Sopenharmony_ci		    zc->recv_skip_hint < PAGE_SIZE) {
217862306a36Sopenharmony_ci			/* Either full batch, or we're about to go to next skb
217962306a36Sopenharmony_ci			 * (and we cannot unroll failed ops across skbs).
218062306a36Sopenharmony_ci			 */
218162306a36Sopenharmony_ci			ret = tcp_zerocopy_vm_insert_batch(vma, pages,
218262306a36Sopenharmony_ci							   pages_to_map,
218362306a36Sopenharmony_ci							   &address, &length,
218462306a36Sopenharmony_ci							   &seq, zc,
218562306a36Sopenharmony_ci							   total_bytes_to_map);
218662306a36Sopenharmony_ci			if (ret)
218762306a36Sopenharmony_ci				goto out;
218862306a36Sopenharmony_ci			pages_to_map = 0;
218962306a36Sopenharmony_ci		}
219062306a36Sopenharmony_ci	}
219162306a36Sopenharmony_ci	if (pages_to_map) {
219262306a36Sopenharmony_ci		ret = tcp_zerocopy_vm_insert_batch(vma, pages, pages_to_map,
219362306a36Sopenharmony_ci						   &address, &length, &seq,
219462306a36Sopenharmony_ci						   zc, total_bytes_to_map);
219562306a36Sopenharmony_ci	}
219662306a36Sopenharmony_ciout:
219762306a36Sopenharmony_ci	if (mmap_locked)
219862306a36Sopenharmony_ci		mmap_read_unlock(current->mm);
219962306a36Sopenharmony_ci	else
220062306a36Sopenharmony_ci		vma_end_read(vma);
220162306a36Sopenharmony_ci	/* Try to copy straggler data. */
220262306a36Sopenharmony_ci	if (!ret)
220362306a36Sopenharmony_ci		copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss);
220462306a36Sopenharmony_ci
220562306a36Sopenharmony_ci	if (length + copylen) {
220662306a36Sopenharmony_ci		WRITE_ONCE(tp->copied_seq, seq);
220762306a36Sopenharmony_ci		tcp_rcv_space_adjust(sk);
220862306a36Sopenharmony_ci
220962306a36Sopenharmony_ci		/* Clean up data we have read: This will do ACK frames. */
221062306a36Sopenharmony_ci		tcp_recv_skb(sk, seq, &offset);
221162306a36Sopenharmony_ci		tcp_cleanup_rbuf(sk, length + copylen);
221262306a36Sopenharmony_ci		ret = 0;
221362306a36Sopenharmony_ci		if (length == zc->length)
221462306a36Sopenharmony_ci			zc->recv_skip_hint = 0;
221562306a36Sopenharmony_ci	} else {
221662306a36Sopenharmony_ci		if (!zc->recv_skip_hint && sock_flag(sk, SOCK_DONE))
221762306a36Sopenharmony_ci			ret = -EIO;
221862306a36Sopenharmony_ci	}
221962306a36Sopenharmony_ci	zc->length = length;
222062306a36Sopenharmony_ci	return ret;
222162306a36Sopenharmony_ci}
222262306a36Sopenharmony_ci#endif
222362306a36Sopenharmony_ci
222462306a36Sopenharmony_ci/* Similar to __sock_recv_timestamp, but does not require an skb */
222562306a36Sopenharmony_civoid tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
222662306a36Sopenharmony_ci			struct scm_timestamping_internal *tss)
222762306a36Sopenharmony_ci{
222862306a36Sopenharmony_ci	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
222962306a36Sopenharmony_ci	bool has_timestamping = false;
223062306a36Sopenharmony_ci
223162306a36Sopenharmony_ci	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
223262306a36Sopenharmony_ci		if (sock_flag(sk, SOCK_RCVTSTAMP)) {
223362306a36Sopenharmony_ci			if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
223462306a36Sopenharmony_ci				if (new_tstamp) {
223562306a36Sopenharmony_ci					struct __kernel_timespec kts = {
223662306a36Sopenharmony_ci						.tv_sec = tss->ts[0].tv_sec,
223762306a36Sopenharmony_ci						.tv_nsec = tss->ts[0].tv_nsec,
223862306a36Sopenharmony_ci					};
223962306a36Sopenharmony_ci					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
224062306a36Sopenharmony_ci						 sizeof(kts), &kts);
224162306a36Sopenharmony_ci				} else {
224262306a36Sopenharmony_ci					struct __kernel_old_timespec ts_old = {
224362306a36Sopenharmony_ci						.tv_sec = tss->ts[0].tv_sec,
224462306a36Sopenharmony_ci						.tv_nsec = tss->ts[0].tv_nsec,
224562306a36Sopenharmony_ci					};
224662306a36Sopenharmony_ci					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
224762306a36Sopenharmony_ci						 sizeof(ts_old), &ts_old);
224862306a36Sopenharmony_ci				}
224962306a36Sopenharmony_ci			} else {
225062306a36Sopenharmony_ci				if (new_tstamp) {
225162306a36Sopenharmony_ci					struct __kernel_sock_timeval stv = {
225262306a36Sopenharmony_ci						.tv_sec = tss->ts[0].tv_sec,
225362306a36Sopenharmony_ci						.tv_usec = tss->ts[0].tv_nsec / 1000,
225462306a36Sopenharmony_ci					};
225562306a36Sopenharmony_ci					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
225662306a36Sopenharmony_ci						 sizeof(stv), &stv);
225762306a36Sopenharmony_ci				} else {
225862306a36Sopenharmony_ci					struct __kernel_old_timeval tv = {
225962306a36Sopenharmony_ci						.tv_sec = tss->ts[0].tv_sec,
226062306a36Sopenharmony_ci						.tv_usec = tss->ts[0].tv_nsec / 1000,
226162306a36Sopenharmony_ci					};
226262306a36Sopenharmony_ci					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
226362306a36Sopenharmony_ci						 sizeof(tv), &tv);
226462306a36Sopenharmony_ci				}
226562306a36Sopenharmony_ci			}
226662306a36Sopenharmony_ci		}
226762306a36Sopenharmony_ci
226862306a36Sopenharmony_ci		if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
226962306a36Sopenharmony_ci			has_timestamping = true;
227062306a36Sopenharmony_ci		else
227162306a36Sopenharmony_ci			tss->ts[0] = (struct timespec64) {0};
227262306a36Sopenharmony_ci	}
227362306a36Sopenharmony_ci
227462306a36Sopenharmony_ci	if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
227562306a36Sopenharmony_ci		if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
227662306a36Sopenharmony_ci			has_timestamping = true;
227762306a36Sopenharmony_ci		else
227862306a36Sopenharmony_ci			tss->ts[2] = (struct timespec64) {0};
227962306a36Sopenharmony_ci	}
228062306a36Sopenharmony_ci
228162306a36Sopenharmony_ci	if (has_timestamping) {
228262306a36Sopenharmony_ci		tss->ts[1] = (struct timespec64) {0};
228362306a36Sopenharmony_ci		if (sock_flag(sk, SOCK_TSTAMP_NEW))
228462306a36Sopenharmony_ci			put_cmsg_scm_timestamping64(msg, tss);
228562306a36Sopenharmony_ci		else
228662306a36Sopenharmony_ci			put_cmsg_scm_timestamping(msg, tss);
228762306a36Sopenharmony_ci	}
228862306a36Sopenharmony_ci}
228962306a36Sopenharmony_ci
229062306a36Sopenharmony_cistatic int tcp_inq_hint(struct sock *sk)
229162306a36Sopenharmony_ci{
229262306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
229362306a36Sopenharmony_ci	u32 copied_seq = READ_ONCE(tp->copied_seq);
229462306a36Sopenharmony_ci	u32 rcv_nxt = READ_ONCE(tp->rcv_nxt);
229562306a36Sopenharmony_ci	int inq;
229662306a36Sopenharmony_ci
229762306a36Sopenharmony_ci	inq = rcv_nxt - copied_seq;
229862306a36Sopenharmony_ci	if (unlikely(inq < 0 || copied_seq != READ_ONCE(tp->copied_seq))) {
229962306a36Sopenharmony_ci		lock_sock(sk);
230062306a36Sopenharmony_ci		inq = tp->rcv_nxt - tp->copied_seq;
230162306a36Sopenharmony_ci		release_sock(sk);
230262306a36Sopenharmony_ci	}
230362306a36Sopenharmony_ci	/* After receiving a FIN, tell the user-space to continue reading
230462306a36Sopenharmony_ci	 * by returning a non-zero inq.
230562306a36Sopenharmony_ci	 */
230662306a36Sopenharmony_ci	if (inq == 0 && sock_flag(sk, SOCK_DONE))
230762306a36Sopenharmony_ci		inq = 1;
230862306a36Sopenharmony_ci	return inq;
230962306a36Sopenharmony_ci}
231062306a36Sopenharmony_ci
231162306a36Sopenharmony_ci/*
231262306a36Sopenharmony_ci *	This routine copies from a sock struct into the user buffer.
231362306a36Sopenharmony_ci *
231462306a36Sopenharmony_ci *	Technical note: in 2.3 we work on _locked_ socket, so that
231562306a36Sopenharmony_ci *	tricks with *seq access order and skb->users are not required.
231662306a36Sopenharmony_ci *	Probably, code can be easily improved even more.
231762306a36Sopenharmony_ci */
231862306a36Sopenharmony_ci
231962306a36Sopenharmony_cistatic int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
232062306a36Sopenharmony_ci			      int flags, struct scm_timestamping_internal *tss,
232162306a36Sopenharmony_ci			      int *cmsg_flags)
232262306a36Sopenharmony_ci{
232362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
232462306a36Sopenharmony_ci	int copied = 0;
232562306a36Sopenharmony_ci	u32 peek_seq;
232662306a36Sopenharmony_ci	u32 *seq;
232762306a36Sopenharmony_ci	unsigned long used;
232862306a36Sopenharmony_ci	int err;
232962306a36Sopenharmony_ci	int target;		/* Read at least this many bytes */
233062306a36Sopenharmony_ci	long timeo;
233162306a36Sopenharmony_ci	struct sk_buff *skb, *last;
233262306a36Sopenharmony_ci	u32 urg_hole = 0;
233362306a36Sopenharmony_ci
233462306a36Sopenharmony_ci	err = -ENOTCONN;
233562306a36Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN)
233662306a36Sopenharmony_ci		goto out;
233762306a36Sopenharmony_ci
233862306a36Sopenharmony_ci	if (tp->recvmsg_inq) {
233962306a36Sopenharmony_ci		*cmsg_flags = TCP_CMSG_INQ;
234062306a36Sopenharmony_ci		msg->msg_get_inq = 1;
234162306a36Sopenharmony_ci	}
234262306a36Sopenharmony_ci	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
234362306a36Sopenharmony_ci
234462306a36Sopenharmony_ci	/* Urgent data needs to be handled specially. */
234562306a36Sopenharmony_ci	if (flags & MSG_OOB)
234662306a36Sopenharmony_ci		goto recv_urg;
234762306a36Sopenharmony_ci
234862306a36Sopenharmony_ci	if (unlikely(tp->repair)) {
234962306a36Sopenharmony_ci		err = -EPERM;
235062306a36Sopenharmony_ci		if (!(flags & MSG_PEEK))
235162306a36Sopenharmony_ci			goto out;
235262306a36Sopenharmony_ci
235362306a36Sopenharmony_ci		if (tp->repair_queue == TCP_SEND_QUEUE)
235462306a36Sopenharmony_ci			goto recv_sndq;
235562306a36Sopenharmony_ci
235662306a36Sopenharmony_ci		err = -EINVAL;
235762306a36Sopenharmony_ci		if (tp->repair_queue == TCP_NO_QUEUE)
235862306a36Sopenharmony_ci			goto out;
235962306a36Sopenharmony_ci
236062306a36Sopenharmony_ci		/* 'common' recv queue MSG_PEEK-ing */
236162306a36Sopenharmony_ci	}
236262306a36Sopenharmony_ci
236362306a36Sopenharmony_ci	seq = &tp->copied_seq;
236462306a36Sopenharmony_ci	if (flags & MSG_PEEK) {
236562306a36Sopenharmony_ci		peek_seq = tp->copied_seq;
236662306a36Sopenharmony_ci		seq = &peek_seq;
236762306a36Sopenharmony_ci	}
236862306a36Sopenharmony_ci
236962306a36Sopenharmony_ci	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
237062306a36Sopenharmony_ci
237162306a36Sopenharmony_ci	do {
237262306a36Sopenharmony_ci		u32 offset;
237362306a36Sopenharmony_ci
237462306a36Sopenharmony_ci		/* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
237562306a36Sopenharmony_ci		if (unlikely(tp->urg_data) && tp->urg_seq == *seq) {
237662306a36Sopenharmony_ci			if (copied)
237762306a36Sopenharmony_ci				break;
237862306a36Sopenharmony_ci			if (signal_pending(current)) {
237962306a36Sopenharmony_ci				copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
238062306a36Sopenharmony_ci				break;
238162306a36Sopenharmony_ci			}
238262306a36Sopenharmony_ci		}
238362306a36Sopenharmony_ci
238462306a36Sopenharmony_ci		/* Next get a buffer. */
238562306a36Sopenharmony_ci
238662306a36Sopenharmony_ci		last = skb_peek_tail(&sk->sk_receive_queue);
238762306a36Sopenharmony_ci		skb_queue_walk(&sk->sk_receive_queue, skb) {
238862306a36Sopenharmony_ci			last = skb;
238962306a36Sopenharmony_ci			/* Now that we have two receive queues this
239062306a36Sopenharmony_ci			 * shouldn't happen.
239162306a36Sopenharmony_ci			 */
239262306a36Sopenharmony_ci			if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
239362306a36Sopenharmony_ci				 "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n",
239462306a36Sopenharmony_ci				 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
239562306a36Sopenharmony_ci				 flags))
239662306a36Sopenharmony_ci				break;
239762306a36Sopenharmony_ci
239862306a36Sopenharmony_ci			offset = *seq - TCP_SKB_CB(skb)->seq;
239962306a36Sopenharmony_ci			if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
240062306a36Sopenharmony_ci				pr_err_once("%s: found a SYN, please report !\n", __func__);
240162306a36Sopenharmony_ci				offset--;
240262306a36Sopenharmony_ci			}
240362306a36Sopenharmony_ci			if (offset < skb->len)
240462306a36Sopenharmony_ci				goto found_ok_skb;
240562306a36Sopenharmony_ci			if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
240662306a36Sopenharmony_ci				goto found_fin_ok;
240762306a36Sopenharmony_ci			WARN(!(flags & MSG_PEEK),
240862306a36Sopenharmony_ci			     "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n",
240962306a36Sopenharmony_ci			     *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
241062306a36Sopenharmony_ci		}
241162306a36Sopenharmony_ci
241262306a36Sopenharmony_ci		/* Well, if we have backlog, try to process it now yet. */
241362306a36Sopenharmony_ci
241462306a36Sopenharmony_ci		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
241562306a36Sopenharmony_ci			break;
241662306a36Sopenharmony_ci
241762306a36Sopenharmony_ci		if (copied) {
241862306a36Sopenharmony_ci			if (!timeo ||
241962306a36Sopenharmony_ci			    sk->sk_err ||
242062306a36Sopenharmony_ci			    sk->sk_state == TCP_CLOSE ||
242162306a36Sopenharmony_ci			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
242262306a36Sopenharmony_ci			    signal_pending(current))
242362306a36Sopenharmony_ci				break;
242462306a36Sopenharmony_ci		} else {
242562306a36Sopenharmony_ci			if (sock_flag(sk, SOCK_DONE))
242662306a36Sopenharmony_ci				break;
242762306a36Sopenharmony_ci
242862306a36Sopenharmony_ci			if (sk->sk_err) {
242962306a36Sopenharmony_ci				copied = sock_error(sk);
243062306a36Sopenharmony_ci				break;
243162306a36Sopenharmony_ci			}
243262306a36Sopenharmony_ci
243362306a36Sopenharmony_ci			if (sk->sk_shutdown & RCV_SHUTDOWN)
243462306a36Sopenharmony_ci				break;
243562306a36Sopenharmony_ci
243662306a36Sopenharmony_ci			if (sk->sk_state == TCP_CLOSE) {
243762306a36Sopenharmony_ci				/* This occurs when user tries to read
243862306a36Sopenharmony_ci				 * from never connected socket.
243962306a36Sopenharmony_ci				 */
244062306a36Sopenharmony_ci				copied = -ENOTCONN;
244162306a36Sopenharmony_ci				break;
244262306a36Sopenharmony_ci			}
244362306a36Sopenharmony_ci
244462306a36Sopenharmony_ci			if (!timeo) {
244562306a36Sopenharmony_ci				copied = -EAGAIN;
244662306a36Sopenharmony_ci				break;
244762306a36Sopenharmony_ci			}
244862306a36Sopenharmony_ci
244962306a36Sopenharmony_ci			if (signal_pending(current)) {
245062306a36Sopenharmony_ci				copied = sock_intr_errno(timeo);
245162306a36Sopenharmony_ci				break;
245262306a36Sopenharmony_ci			}
245362306a36Sopenharmony_ci		}
245462306a36Sopenharmony_ci
245562306a36Sopenharmony_ci		if (copied >= target) {
245662306a36Sopenharmony_ci			/* Do not sleep, just process backlog. */
245762306a36Sopenharmony_ci			__sk_flush_backlog(sk);
245862306a36Sopenharmony_ci		} else {
245962306a36Sopenharmony_ci			tcp_cleanup_rbuf(sk, copied);
246062306a36Sopenharmony_ci			err = sk_wait_data(sk, &timeo, last);
246162306a36Sopenharmony_ci			if (err < 0) {
246262306a36Sopenharmony_ci				err = copied ? : err;
246362306a36Sopenharmony_ci				goto out;
246462306a36Sopenharmony_ci			}
246562306a36Sopenharmony_ci		}
246662306a36Sopenharmony_ci
246762306a36Sopenharmony_ci		if ((flags & MSG_PEEK) &&
246862306a36Sopenharmony_ci		    (peek_seq - copied - urg_hole != tp->copied_seq)) {
246962306a36Sopenharmony_ci			net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
247062306a36Sopenharmony_ci					    current->comm,
247162306a36Sopenharmony_ci					    task_pid_nr(current));
247262306a36Sopenharmony_ci			peek_seq = tp->copied_seq;
247362306a36Sopenharmony_ci		}
247462306a36Sopenharmony_ci		continue;
247562306a36Sopenharmony_ci
247662306a36Sopenharmony_cifound_ok_skb:
247762306a36Sopenharmony_ci		/* Ok so how much can we use? */
247862306a36Sopenharmony_ci		used = skb->len - offset;
247962306a36Sopenharmony_ci		if (len < used)
248062306a36Sopenharmony_ci			used = len;
248162306a36Sopenharmony_ci
248262306a36Sopenharmony_ci		/* Do we have urgent data here? */
248362306a36Sopenharmony_ci		if (unlikely(tp->urg_data)) {
248462306a36Sopenharmony_ci			u32 urg_offset = tp->urg_seq - *seq;
248562306a36Sopenharmony_ci			if (urg_offset < used) {
248662306a36Sopenharmony_ci				if (!urg_offset) {
248762306a36Sopenharmony_ci					if (!sock_flag(sk, SOCK_URGINLINE)) {
248862306a36Sopenharmony_ci						WRITE_ONCE(*seq, *seq + 1);
248962306a36Sopenharmony_ci						urg_hole++;
249062306a36Sopenharmony_ci						offset++;
249162306a36Sopenharmony_ci						used--;
249262306a36Sopenharmony_ci						if (!used)
249362306a36Sopenharmony_ci							goto skip_copy;
249462306a36Sopenharmony_ci					}
249562306a36Sopenharmony_ci				} else
249662306a36Sopenharmony_ci					used = urg_offset;
249762306a36Sopenharmony_ci			}
249862306a36Sopenharmony_ci		}
249962306a36Sopenharmony_ci
250062306a36Sopenharmony_ci		if (!(flags & MSG_TRUNC)) {
250162306a36Sopenharmony_ci			err = skb_copy_datagram_msg(skb, offset, msg, used);
250262306a36Sopenharmony_ci			if (err) {
250362306a36Sopenharmony_ci				/* Exception. Bailout! */
250462306a36Sopenharmony_ci				if (!copied)
250562306a36Sopenharmony_ci					copied = -EFAULT;
250662306a36Sopenharmony_ci				break;
250762306a36Sopenharmony_ci			}
250862306a36Sopenharmony_ci		}
250962306a36Sopenharmony_ci
251062306a36Sopenharmony_ci		WRITE_ONCE(*seq, *seq + used);
251162306a36Sopenharmony_ci		copied += used;
251262306a36Sopenharmony_ci		len -= used;
251362306a36Sopenharmony_ci
251462306a36Sopenharmony_ci		tcp_rcv_space_adjust(sk);
251562306a36Sopenharmony_ci
251662306a36Sopenharmony_ciskip_copy:
251762306a36Sopenharmony_ci		if (unlikely(tp->urg_data) && after(tp->copied_seq, tp->urg_seq)) {
251862306a36Sopenharmony_ci			WRITE_ONCE(tp->urg_data, 0);
251962306a36Sopenharmony_ci			tcp_fast_path_check(sk);
252062306a36Sopenharmony_ci		}
252162306a36Sopenharmony_ci
252262306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->has_rxtstamp) {
252362306a36Sopenharmony_ci			tcp_update_recv_tstamps(skb, tss);
252462306a36Sopenharmony_ci			*cmsg_flags |= TCP_CMSG_TS;
252562306a36Sopenharmony_ci		}
252662306a36Sopenharmony_ci
252762306a36Sopenharmony_ci		if (used + offset < skb->len)
252862306a36Sopenharmony_ci			continue;
252962306a36Sopenharmony_ci
253062306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
253162306a36Sopenharmony_ci			goto found_fin_ok;
253262306a36Sopenharmony_ci		if (!(flags & MSG_PEEK))
253362306a36Sopenharmony_ci			tcp_eat_recv_skb(sk, skb);
253462306a36Sopenharmony_ci		continue;
253562306a36Sopenharmony_ci
253662306a36Sopenharmony_cifound_fin_ok:
253762306a36Sopenharmony_ci		/* Process the FIN. */
253862306a36Sopenharmony_ci		WRITE_ONCE(*seq, *seq + 1);
253962306a36Sopenharmony_ci		if (!(flags & MSG_PEEK))
254062306a36Sopenharmony_ci			tcp_eat_recv_skb(sk, skb);
254162306a36Sopenharmony_ci		break;
254262306a36Sopenharmony_ci	} while (len > 0);
254362306a36Sopenharmony_ci
254462306a36Sopenharmony_ci	/* According to UNIX98, msg_name/msg_namelen are ignored
254562306a36Sopenharmony_ci	 * on connected socket. I was just happy when found this 8) --ANK
254662306a36Sopenharmony_ci	 */
254762306a36Sopenharmony_ci
254862306a36Sopenharmony_ci	/* Clean up data we have read: This will do ACK frames. */
254962306a36Sopenharmony_ci	tcp_cleanup_rbuf(sk, copied);
255062306a36Sopenharmony_ci	return copied;
255162306a36Sopenharmony_ci
255262306a36Sopenharmony_ciout:
255362306a36Sopenharmony_ci	return err;
255462306a36Sopenharmony_ci
255562306a36Sopenharmony_cirecv_urg:
255662306a36Sopenharmony_ci	err = tcp_recv_urg(sk, msg, len, flags);
255762306a36Sopenharmony_ci	goto out;
255862306a36Sopenharmony_ci
255962306a36Sopenharmony_cirecv_sndq:
256062306a36Sopenharmony_ci	err = tcp_peek_sndq(sk, msg, len);
256162306a36Sopenharmony_ci	goto out;
256262306a36Sopenharmony_ci}
256362306a36Sopenharmony_ci
256462306a36Sopenharmony_ciint tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
256562306a36Sopenharmony_ci		int *addr_len)
256662306a36Sopenharmony_ci{
256762306a36Sopenharmony_ci	int cmsg_flags = 0, ret;
256862306a36Sopenharmony_ci	struct scm_timestamping_internal tss;
256962306a36Sopenharmony_ci
257062306a36Sopenharmony_ci	if (unlikely(flags & MSG_ERRQUEUE))
257162306a36Sopenharmony_ci		return inet_recv_error(sk, msg, len, addr_len);
257262306a36Sopenharmony_ci
257362306a36Sopenharmony_ci	if (sk_can_busy_loop(sk) &&
257462306a36Sopenharmony_ci	    skb_queue_empty_lockless(&sk->sk_receive_queue) &&
257562306a36Sopenharmony_ci	    sk->sk_state == TCP_ESTABLISHED)
257662306a36Sopenharmony_ci		sk_busy_loop(sk, flags & MSG_DONTWAIT);
257762306a36Sopenharmony_ci
257862306a36Sopenharmony_ci	lock_sock(sk);
257962306a36Sopenharmony_ci	ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags);
258062306a36Sopenharmony_ci	release_sock(sk);
258162306a36Sopenharmony_ci
258262306a36Sopenharmony_ci	if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
258362306a36Sopenharmony_ci		if (cmsg_flags & TCP_CMSG_TS)
258462306a36Sopenharmony_ci			tcp_recv_timestamp(msg, sk, &tss);
258562306a36Sopenharmony_ci		if (msg->msg_get_inq) {
258662306a36Sopenharmony_ci			msg->msg_inq = tcp_inq_hint(sk);
258762306a36Sopenharmony_ci			if (cmsg_flags & TCP_CMSG_INQ)
258862306a36Sopenharmony_ci				put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
258962306a36Sopenharmony_ci					 sizeof(msg->msg_inq), &msg->msg_inq);
259062306a36Sopenharmony_ci		}
259162306a36Sopenharmony_ci	}
259262306a36Sopenharmony_ci	return ret;
259362306a36Sopenharmony_ci}
259462306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_recvmsg);
259562306a36Sopenharmony_ci
259662306a36Sopenharmony_civoid tcp_set_state(struct sock *sk, int state)
259762306a36Sopenharmony_ci{
259862306a36Sopenharmony_ci	int oldstate = sk->sk_state;
259962306a36Sopenharmony_ci
260062306a36Sopenharmony_ci	/* We defined a new enum for TCP states that are exported in BPF
260162306a36Sopenharmony_ci	 * so as not force the internal TCP states to be frozen. The
260262306a36Sopenharmony_ci	 * following checks will detect if an internal state value ever
260362306a36Sopenharmony_ci	 * differs from the BPF value. If this ever happens, then we will
260462306a36Sopenharmony_ci	 * need to remap the internal value to the BPF value before calling
260562306a36Sopenharmony_ci	 * tcp_call_bpf_2arg.
260662306a36Sopenharmony_ci	 */
260762306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED);
260862306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT);
260962306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV);
261062306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1);
261162306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2);
261262306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT);
261362306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE);
261462306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT);
261562306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK);
261662306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
261762306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
261862306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
261962306a36Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
262062306a36Sopenharmony_ci
262162306a36Sopenharmony_ci	/* bpf uapi header bpf.h defines an anonymous enum with values
262262306a36Sopenharmony_ci	 * BPF_TCP_* used by bpf programs. Currently gcc built vmlinux
262362306a36Sopenharmony_ci	 * is able to emit this enum in DWARF due to the above BUILD_BUG_ON.
262462306a36Sopenharmony_ci	 * But clang built vmlinux does not have this enum in DWARF
262562306a36Sopenharmony_ci	 * since clang removes the above code before generating IR/debuginfo.
262662306a36Sopenharmony_ci	 * Let us explicitly emit the type debuginfo to ensure the
262762306a36Sopenharmony_ci	 * above-mentioned anonymous enum in the vmlinux DWARF and hence BTF
262862306a36Sopenharmony_ci	 * regardless of which compiler is used.
262962306a36Sopenharmony_ci	 */
263062306a36Sopenharmony_ci	BTF_TYPE_EMIT_ENUM(BPF_TCP_ESTABLISHED);
263162306a36Sopenharmony_ci
263262306a36Sopenharmony_ci	if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
263362306a36Sopenharmony_ci		tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
263462306a36Sopenharmony_ci
263562306a36Sopenharmony_ci	switch (state) {
263662306a36Sopenharmony_ci	case TCP_ESTABLISHED:
263762306a36Sopenharmony_ci		if (oldstate != TCP_ESTABLISHED)
263862306a36Sopenharmony_ci			TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
263962306a36Sopenharmony_ci		break;
264062306a36Sopenharmony_ci
264162306a36Sopenharmony_ci	case TCP_CLOSE:
264262306a36Sopenharmony_ci		if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
264362306a36Sopenharmony_ci			TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
264462306a36Sopenharmony_ci
264562306a36Sopenharmony_ci		sk->sk_prot->unhash(sk);
264662306a36Sopenharmony_ci		if (inet_csk(sk)->icsk_bind_hash &&
264762306a36Sopenharmony_ci		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
264862306a36Sopenharmony_ci			inet_put_port(sk);
264962306a36Sopenharmony_ci		fallthrough;
265062306a36Sopenharmony_ci	default:
265162306a36Sopenharmony_ci		if (oldstate == TCP_ESTABLISHED)
265262306a36Sopenharmony_ci			TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
265362306a36Sopenharmony_ci	}
265462306a36Sopenharmony_ci
265562306a36Sopenharmony_ci	/* Change state AFTER socket is unhashed to avoid closed
265662306a36Sopenharmony_ci	 * socket sitting in hash tables.
265762306a36Sopenharmony_ci	 */
265862306a36Sopenharmony_ci	inet_sk_state_store(sk, state);
265962306a36Sopenharmony_ci}
266062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_set_state);
266162306a36Sopenharmony_ci
266262306a36Sopenharmony_ci/*
266362306a36Sopenharmony_ci *	State processing on a close. This implements the state shift for
266462306a36Sopenharmony_ci *	sending our FIN frame. Note that we only send a FIN for some
266562306a36Sopenharmony_ci *	states. A shutdown() may have already sent the FIN, or we may be
266662306a36Sopenharmony_ci *	closed.
266762306a36Sopenharmony_ci */
266862306a36Sopenharmony_ci
266962306a36Sopenharmony_cistatic const unsigned char new_state[16] = {
267062306a36Sopenharmony_ci  /* current state:        new state:      action:	*/
267162306a36Sopenharmony_ci  [0 /* (Invalid) */]	= TCP_CLOSE,
267262306a36Sopenharmony_ci  [TCP_ESTABLISHED]	= TCP_FIN_WAIT1 | TCP_ACTION_FIN,
267362306a36Sopenharmony_ci  [TCP_SYN_SENT]	= TCP_CLOSE,
267462306a36Sopenharmony_ci  [TCP_SYN_RECV]	= TCP_FIN_WAIT1 | TCP_ACTION_FIN,
267562306a36Sopenharmony_ci  [TCP_FIN_WAIT1]	= TCP_FIN_WAIT1,
267662306a36Sopenharmony_ci  [TCP_FIN_WAIT2]	= TCP_FIN_WAIT2,
267762306a36Sopenharmony_ci  [TCP_TIME_WAIT]	= TCP_CLOSE,
267862306a36Sopenharmony_ci  [TCP_CLOSE]		= TCP_CLOSE,
267962306a36Sopenharmony_ci  [TCP_CLOSE_WAIT]	= TCP_LAST_ACK  | TCP_ACTION_FIN,
268062306a36Sopenharmony_ci  [TCP_LAST_ACK]	= TCP_LAST_ACK,
268162306a36Sopenharmony_ci  [TCP_LISTEN]		= TCP_CLOSE,
268262306a36Sopenharmony_ci  [TCP_CLOSING]		= TCP_CLOSING,
268362306a36Sopenharmony_ci  [TCP_NEW_SYN_RECV]	= TCP_CLOSE,	/* should not happen ! */
268462306a36Sopenharmony_ci};
268562306a36Sopenharmony_ci
268662306a36Sopenharmony_cistatic int tcp_close_state(struct sock *sk)
268762306a36Sopenharmony_ci{
268862306a36Sopenharmony_ci	int next = (int)new_state[sk->sk_state];
268962306a36Sopenharmony_ci	int ns = next & TCP_STATE_MASK;
269062306a36Sopenharmony_ci
269162306a36Sopenharmony_ci	tcp_set_state(sk, ns);
269262306a36Sopenharmony_ci
269362306a36Sopenharmony_ci	return next & TCP_ACTION_FIN;
269462306a36Sopenharmony_ci}
269562306a36Sopenharmony_ci
269662306a36Sopenharmony_ci/*
269762306a36Sopenharmony_ci *	Shutdown the sending side of a connection. Much like close except
269862306a36Sopenharmony_ci *	that we don't receive shut down or sock_set_flag(sk, SOCK_DEAD).
269962306a36Sopenharmony_ci */
270062306a36Sopenharmony_ci
270162306a36Sopenharmony_civoid tcp_shutdown(struct sock *sk, int how)
270262306a36Sopenharmony_ci{
270362306a36Sopenharmony_ci	/*	We need to grab some memory, and put together a FIN,
270462306a36Sopenharmony_ci	 *	and then put it into the queue to be sent.
270562306a36Sopenharmony_ci	 *		Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
270662306a36Sopenharmony_ci	 */
270762306a36Sopenharmony_ci	if (!(how & SEND_SHUTDOWN))
270862306a36Sopenharmony_ci		return;
270962306a36Sopenharmony_ci
271062306a36Sopenharmony_ci	/* If we've already sent a FIN, or it's a closed state, skip this. */
271162306a36Sopenharmony_ci	if ((1 << sk->sk_state) &
271262306a36Sopenharmony_ci	    (TCPF_ESTABLISHED | TCPF_SYN_SENT |
271362306a36Sopenharmony_ci	     TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
271462306a36Sopenharmony_ci		/* Clear out any half completed packets.  FIN if needed. */
271562306a36Sopenharmony_ci		if (tcp_close_state(sk))
271662306a36Sopenharmony_ci			tcp_send_fin(sk);
271762306a36Sopenharmony_ci	}
271862306a36Sopenharmony_ci}
271962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_shutdown);
272062306a36Sopenharmony_ci
272162306a36Sopenharmony_ciint tcp_orphan_count_sum(void)
272262306a36Sopenharmony_ci{
272362306a36Sopenharmony_ci	int i, total = 0;
272462306a36Sopenharmony_ci
272562306a36Sopenharmony_ci	for_each_possible_cpu(i)
272662306a36Sopenharmony_ci		total += per_cpu(tcp_orphan_count, i);
272762306a36Sopenharmony_ci
272862306a36Sopenharmony_ci	return max(total, 0);
272962306a36Sopenharmony_ci}
273062306a36Sopenharmony_ci
273162306a36Sopenharmony_cistatic int tcp_orphan_cache;
273262306a36Sopenharmony_cistatic struct timer_list tcp_orphan_timer;
273362306a36Sopenharmony_ci#define TCP_ORPHAN_TIMER_PERIOD msecs_to_jiffies(100)
273462306a36Sopenharmony_ci
273562306a36Sopenharmony_cistatic void tcp_orphan_update(struct timer_list *unused)
273662306a36Sopenharmony_ci{
273762306a36Sopenharmony_ci	WRITE_ONCE(tcp_orphan_cache, tcp_orphan_count_sum());
273862306a36Sopenharmony_ci	mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD);
273962306a36Sopenharmony_ci}
274062306a36Sopenharmony_ci
274162306a36Sopenharmony_cistatic bool tcp_too_many_orphans(int shift)
274262306a36Sopenharmony_ci{
274362306a36Sopenharmony_ci	return READ_ONCE(tcp_orphan_cache) << shift >
274462306a36Sopenharmony_ci		READ_ONCE(sysctl_tcp_max_orphans);
274562306a36Sopenharmony_ci}
274662306a36Sopenharmony_ci
274762306a36Sopenharmony_cibool tcp_check_oom(struct sock *sk, int shift)
274862306a36Sopenharmony_ci{
274962306a36Sopenharmony_ci	bool too_many_orphans, out_of_socket_memory;
275062306a36Sopenharmony_ci
275162306a36Sopenharmony_ci	too_many_orphans = tcp_too_many_orphans(shift);
275262306a36Sopenharmony_ci	out_of_socket_memory = tcp_out_of_memory(sk);
275362306a36Sopenharmony_ci
275462306a36Sopenharmony_ci	if (too_many_orphans)
275562306a36Sopenharmony_ci		net_info_ratelimited("too many orphaned sockets\n");
275662306a36Sopenharmony_ci	if (out_of_socket_memory)
275762306a36Sopenharmony_ci		net_info_ratelimited("out of memory -- consider tuning tcp_mem\n");
275862306a36Sopenharmony_ci	return too_many_orphans || out_of_socket_memory;
275962306a36Sopenharmony_ci}
276062306a36Sopenharmony_ci
276162306a36Sopenharmony_civoid __tcp_close(struct sock *sk, long timeout)
276262306a36Sopenharmony_ci{
276362306a36Sopenharmony_ci	struct sk_buff *skb;
276462306a36Sopenharmony_ci	int data_was_unread = 0;
276562306a36Sopenharmony_ci	int state;
276662306a36Sopenharmony_ci
276762306a36Sopenharmony_ci	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
276862306a36Sopenharmony_ci
276962306a36Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN) {
277062306a36Sopenharmony_ci		tcp_set_state(sk, TCP_CLOSE);
277162306a36Sopenharmony_ci
277262306a36Sopenharmony_ci		/* Special case. */
277362306a36Sopenharmony_ci		inet_csk_listen_stop(sk);
277462306a36Sopenharmony_ci
277562306a36Sopenharmony_ci		goto adjudge_to_death;
277662306a36Sopenharmony_ci	}
277762306a36Sopenharmony_ci
277862306a36Sopenharmony_ci	/*  We need to flush the recv. buffs.  We do this only on the
277962306a36Sopenharmony_ci	 *  descriptor close, not protocol-sourced closes, because the
278062306a36Sopenharmony_ci	 *  reader process may not have drained the data yet!
278162306a36Sopenharmony_ci	 */
278262306a36Sopenharmony_ci	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
278362306a36Sopenharmony_ci		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
278462306a36Sopenharmony_ci
278562306a36Sopenharmony_ci		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
278662306a36Sopenharmony_ci			len--;
278762306a36Sopenharmony_ci		data_was_unread += len;
278862306a36Sopenharmony_ci		__kfree_skb(skb);
278962306a36Sopenharmony_ci	}
279062306a36Sopenharmony_ci
279162306a36Sopenharmony_ci	/* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
279262306a36Sopenharmony_ci	if (sk->sk_state == TCP_CLOSE)
279362306a36Sopenharmony_ci		goto adjudge_to_death;
279462306a36Sopenharmony_ci
279562306a36Sopenharmony_ci	/* As outlined in RFC 2525, section 2.17, we send a RST here because
279662306a36Sopenharmony_ci	 * data was lost. To witness the awful effects of the old behavior of
279762306a36Sopenharmony_ci	 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
279862306a36Sopenharmony_ci	 * GET in an FTP client, suspend the process, wait for the client to
279962306a36Sopenharmony_ci	 * advertise a zero window, then kill -9 the FTP client, wheee...
280062306a36Sopenharmony_ci	 * Note: timeout is always zero in such a case.
280162306a36Sopenharmony_ci	 */
280262306a36Sopenharmony_ci	if (unlikely(tcp_sk(sk)->repair)) {
280362306a36Sopenharmony_ci		sk->sk_prot->disconnect(sk, 0);
280462306a36Sopenharmony_ci	} else if (data_was_unread) {
280562306a36Sopenharmony_ci		/* Unread data was tossed, zap the connection. */
280662306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
280762306a36Sopenharmony_ci		tcp_set_state(sk, TCP_CLOSE);
280862306a36Sopenharmony_ci		tcp_send_active_reset(sk, sk->sk_allocation);
280962306a36Sopenharmony_ci	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
281062306a36Sopenharmony_ci		/* Check zero linger _after_ checking for unread data. */
281162306a36Sopenharmony_ci		sk->sk_prot->disconnect(sk, 0);
281262306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
281362306a36Sopenharmony_ci	} else if (tcp_close_state(sk)) {
281462306a36Sopenharmony_ci		/* We FIN if the application ate all the data before
281562306a36Sopenharmony_ci		 * zapping the connection.
281662306a36Sopenharmony_ci		 */
281762306a36Sopenharmony_ci
281862306a36Sopenharmony_ci		/* RED-PEN. Formally speaking, we have broken TCP state
281962306a36Sopenharmony_ci		 * machine. State transitions:
282062306a36Sopenharmony_ci		 *
282162306a36Sopenharmony_ci		 * TCP_ESTABLISHED -> TCP_FIN_WAIT1
282262306a36Sopenharmony_ci		 * TCP_SYN_RECV	-> TCP_FIN_WAIT1 (forget it, it's impossible)
282362306a36Sopenharmony_ci		 * TCP_CLOSE_WAIT -> TCP_LAST_ACK
282462306a36Sopenharmony_ci		 *
282562306a36Sopenharmony_ci		 * are legal only when FIN has been sent (i.e. in window),
282662306a36Sopenharmony_ci		 * rather than queued out of window. Purists blame.
282762306a36Sopenharmony_ci		 *
282862306a36Sopenharmony_ci		 * F.e. "RFC state" is ESTABLISHED,
282962306a36Sopenharmony_ci		 * if Linux state is FIN-WAIT-1, but FIN is still not sent.
283062306a36Sopenharmony_ci		 *
283162306a36Sopenharmony_ci		 * The visible declinations are that sometimes
283262306a36Sopenharmony_ci		 * we enter time-wait state, when it is not required really
283362306a36Sopenharmony_ci		 * (harmless), do not send active resets, when they are
283462306a36Sopenharmony_ci		 * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when
283562306a36Sopenharmony_ci		 * they look as CLOSING or LAST_ACK for Linux)
283662306a36Sopenharmony_ci		 * Probably, I missed some more holelets.
283762306a36Sopenharmony_ci		 * 						--ANK
283862306a36Sopenharmony_ci		 * XXX (TFO) - To start off we don't support SYN+ACK+FIN
283962306a36Sopenharmony_ci		 * in a single packet! (May consider it later but will
284062306a36Sopenharmony_ci		 * probably need API support or TCP_CORK SYN-ACK until
284162306a36Sopenharmony_ci		 * data is written and socket is closed.)
284262306a36Sopenharmony_ci		 */
284362306a36Sopenharmony_ci		tcp_send_fin(sk);
284462306a36Sopenharmony_ci	}
284562306a36Sopenharmony_ci
284662306a36Sopenharmony_ci	sk_stream_wait_close(sk, timeout);
284762306a36Sopenharmony_ci
284862306a36Sopenharmony_ciadjudge_to_death:
284962306a36Sopenharmony_ci	state = sk->sk_state;
285062306a36Sopenharmony_ci	sock_hold(sk);
285162306a36Sopenharmony_ci	sock_orphan(sk);
285262306a36Sopenharmony_ci
285362306a36Sopenharmony_ci	local_bh_disable();
285462306a36Sopenharmony_ci	bh_lock_sock(sk);
285562306a36Sopenharmony_ci	/* remove backlog if any, without releasing ownership. */
285662306a36Sopenharmony_ci	__release_sock(sk);
285762306a36Sopenharmony_ci
285862306a36Sopenharmony_ci	this_cpu_inc(tcp_orphan_count);
285962306a36Sopenharmony_ci
286062306a36Sopenharmony_ci	/* Have we already been destroyed by a softirq or backlog? */
286162306a36Sopenharmony_ci	if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
286262306a36Sopenharmony_ci		goto out;
286362306a36Sopenharmony_ci
286462306a36Sopenharmony_ci	/*	This is a (useful) BSD violating of the RFC. There is a
286562306a36Sopenharmony_ci	 *	problem with TCP as specified in that the other end could
286662306a36Sopenharmony_ci	 *	keep a socket open forever with no application left this end.
286762306a36Sopenharmony_ci	 *	We use a 1 minute timeout (about the same as BSD) then kill
286862306a36Sopenharmony_ci	 *	our end. If they send after that then tough - BUT: long enough
286962306a36Sopenharmony_ci	 *	that we won't make the old 4*rto = almost no time - whoops
287062306a36Sopenharmony_ci	 *	reset mistake.
287162306a36Sopenharmony_ci	 *
287262306a36Sopenharmony_ci	 *	Nope, it was not mistake. It is really desired behaviour
287362306a36Sopenharmony_ci	 *	f.e. on http servers, when such sockets are useless, but
287462306a36Sopenharmony_ci	 *	consume significant resources. Let's do it with special
287562306a36Sopenharmony_ci	 *	linger2	option.					--ANK
287662306a36Sopenharmony_ci	 */
287762306a36Sopenharmony_ci
287862306a36Sopenharmony_ci	if (sk->sk_state == TCP_FIN_WAIT2) {
287962306a36Sopenharmony_ci		struct tcp_sock *tp = tcp_sk(sk);
288062306a36Sopenharmony_ci		if (READ_ONCE(tp->linger2) < 0) {
288162306a36Sopenharmony_ci			tcp_set_state(sk, TCP_CLOSE);
288262306a36Sopenharmony_ci			tcp_send_active_reset(sk, GFP_ATOMIC);
288362306a36Sopenharmony_ci			__NET_INC_STATS(sock_net(sk),
288462306a36Sopenharmony_ci					LINUX_MIB_TCPABORTONLINGER);
288562306a36Sopenharmony_ci		} else {
288662306a36Sopenharmony_ci			const int tmo = tcp_fin_time(sk);
288762306a36Sopenharmony_ci
288862306a36Sopenharmony_ci			if (tmo > TCP_TIMEWAIT_LEN) {
288962306a36Sopenharmony_ci				inet_csk_reset_keepalive_timer(sk,
289062306a36Sopenharmony_ci						tmo - TCP_TIMEWAIT_LEN);
289162306a36Sopenharmony_ci			} else {
289262306a36Sopenharmony_ci				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
289362306a36Sopenharmony_ci				goto out;
289462306a36Sopenharmony_ci			}
289562306a36Sopenharmony_ci		}
289662306a36Sopenharmony_ci	}
289762306a36Sopenharmony_ci	if (sk->sk_state != TCP_CLOSE) {
289862306a36Sopenharmony_ci		if (tcp_check_oom(sk, 0)) {
289962306a36Sopenharmony_ci			tcp_set_state(sk, TCP_CLOSE);
290062306a36Sopenharmony_ci			tcp_send_active_reset(sk, GFP_ATOMIC);
290162306a36Sopenharmony_ci			__NET_INC_STATS(sock_net(sk),
290262306a36Sopenharmony_ci					LINUX_MIB_TCPABORTONMEMORY);
290362306a36Sopenharmony_ci		} else if (!check_net(sock_net(sk))) {
290462306a36Sopenharmony_ci			/* Not possible to send reset; just close */
290562306a36Sopenharmony_ci			tcp_set_state(sk, TCP_CLOSE);
290662306a36Sopenharmony_ci		}
290762306a36Sopenharmony_ci	}
290862306a36Sopenharmony_ci
290962306a36Sopenharmony_ci	if (sk->sk_state == TCP_CLOSE) {
291062306a36Sopenharmony_ci		struct request_sock *req;
291162306a36Sopenharmony_ci
291262306a36Sopenharmony_ci		req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
291362306a36Sopenharmony_ci						lockdep_sock_is_held(sk));
291462306a36Sopenharmony_ci		/* We could get here with a non-NULL req if the socket is
291562306a36Sopenharmony_ci		 * aborted (e.g., closed with unread data) before 3WHS
291662306a36Sopenharmony_ci		 * finishes.
291762306a36Sopenharmony_ci		 */
291862306a36Sopenharmony_ci		if (req)
291962306a36Sopenharmony_ci			reqsk_fastopen_remove(sk, req, false);
292062306a36Sopenharmony_ci		inet_csk_destroy_sock(sk);
292162306a36Sopenharmony_ci	}
292262306a36Sopenharmony_ci	/* Otherwise, socket is reprieved until protocol close. */
292362306a36Sopenharmony_ci
292462306a36Sopenharmony_ciout:
292562306a36Sopenharmony_ci	bh_unlock_sock(sk);
292662306a36Sopenharmony_ci	local_bh_enable();
292762306a36Sopenharmony_ci}
292862306a36Sopenharmony_ci
292962306a36Sopenharmony_civoid tcp_close(struct sock *sk, long timeout)
293062306a36Sopenharmony_ci{
293162306a36Sopenharmony_ci	lock_sock(sk);
293262306a36Sopenharmony_ci	__tcp_close(sk, timeout);
293362306a36Sopenharmony_ci	release_sock(sk);
293462306a36Sopenharmony_ci	sock_put(sk);
293562306a36Sopenharmony_ci}
293662306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_close);
293762306a36Sopenharmony_ci
293862306a36Sopenharmony_ci/* These states need RST on ABORT according to RFC793 */
293962306a36Sopenharmony_ci
294062306a36Sopenharmony_cistatic inline bool tcp_need_reset(int state)
294162306a36Sopenharmony_ci{
294262306a36Sopenharmony_ci	return (1 << state) &
294362306a36Sopenharmony_ci	       (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
294462306a36Sopenharmony_ci		TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
294562306a36Sopenharmony_ci}
294662306a36Sopenharmony_ci
294762306a36Sopenharmony_cistatic void tcp_rtx_queue_purge(struct sock *sk)
294862306a36Sopenharmony_ci{
294962306a36Sopenharmony_ci	struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
295062306a36Sopenharmony_ci
295162306a36Sopenharmony_ci	tcp_sk(sk)->highest_sack = NULL;
295262306a36Sopenharmony_ci	while (p) {
295362306a36Sopenharmony_ci		struct sk_buff *skb = rb_to_skb(p);
295462306a36Sopenharmony_ci
295562306a36Sopenharmony_ci		p = rb_next(p);
295662306a36Sopenharmony_ci		/* Since we are deleting whole queue, no need to
295762306a36Sopenharmony_ci		 * list_del(&skb->tcp_tsorted_anchor)
295862306a36Sopenharmony_ci		 */
295962306a36Sopenharmony_ci		tcp_rtx_queue_unlink(skb, sk);
296062306a36Sopenharmony_ci		tcp_wmem_free_skb(sk, skb);
296162306a36Sopenharmony_ci	}
296262306a36Sopenharmony_ci}
296362306a36Sopenharmony_ci
296462306a36Sopenharmony_civoid tcp_write_queue_purge(struct sock *sk)
296562306a36Sopenharmony_ci{
296662306a36Sopenharmony_ci	struct sk_buff *skb;
296762306a36Sopenharmony_ci
296862306a36Sopenharmony_ci	tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
296962306a36Sopenharmony_ci	while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
297062306a36Sopenharmony_ci		tcp_skb_tsorted_anchor_cleanup(skb);
297162306a36Sopenharmony_ci		tcp_wmem_free_skb(sk, skb);
297262306a36Sopenharmony_ci	}
297362306a36Sopenharmony_ci	tcp_rtx_queue_purge(sk);
297462306a36Sopenharmony_ci	INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
297562306a36Sopenharmony_ci	tcp_clear_all_retrans_hints(tcp_sk(sk));
297662306a36Sopenharmony_ci	tcp_sk(sk)->packets_out = 0;
297762306a36Sopenharmony_ci	inet_csk(sk)->icsk_backoff = 0;
297862306a36Sopenharmony_ci}
297962306a36Sopenharmony_ci
298062306a36Sopenharmony_ciint tcp_disconnect(struct sock *sk, int flags)
298162306a36Sopenharmony_ci{
298262306a36Sopenharmony_ci	struct inet_sock *inet = inet_sk(sk);
298362306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
298462306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
298562306a36Sopenharmony_ci	int old_state = sk->sk_state;
298662306a36Sopenharmony_ci	u32 seq;
298762306a36Sopenharmony_ci
298862306a36Sopenharmony_ci	if (old_state != TCP_CLOSE)
298962306a36Sopenharmony_ci		tcp_set_state(sk, TCP_CLOSE);
299062306a36Sopenharmony_ci
299162306a36Sopenharmony_ci	/* ABORT function of RFC793 */
299262306a36Sopenharmony_ci	if (old_state == TCP_LISTEN) {
299362306a36Sopenharmony_ci		inet_csk_listen_stop(sk);
299462306a36Sopenharmony_ci	} else if (unlikely(tp->repair)) {
299562306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_err, ECONNABORTED);
299662306a36Sopenharmony_ci	} else if (tcp_need_reset(old_state) ||
299762306a36Sopenharmony_ci		   (tp->snd_nxt != tp->write_seq &&
299862306a36Sopenharmony_ci		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
299962306a36Sopenharmony_ci		/* The last check adjusts for discrepancy of Linux wrt. RFC
300062306a36Sopenharmony_ci		 * states
300162306a36Sopenharmony_ci		 */
300262306a36Sopenharmony_ci		tcp_send_active_reset(sk, gfp_any());
300362306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_err, ECONNRESET);
300462306a36Sopenharmony_ci	} else if (old_state == TCP_SYN_SENT)
300562306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_err, ECONNRESET);
300662306a36Sopenharmony_ci
300762306a36Sopenharmony_ci	tcp_clear_xmit_timers(sk);
300862306a36Sopenharmony_ci	__skb_queue_purge(&sk->sk_receive_queue);
300962306a36Sopenharmony_ci	WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
301062306a36Sopenharmony_ci	WRITE_ONCE(tp->urg_data, 0);
301162306a36Sopenharmony_ci	tcp_write_queue_purge(sk);
301262306a36Sopenharmony_ci	tcp_fastopen_active_disable_ofo_check(sk);
301362306a36Sopenharmony_ci	skb_rbtree_purge(&tp->out_of_order_queue);
301462306a36Sopenharmony_ci
301562306a36Sopenharmony_ci	inet->inet_dport = 0;
301662306a36Sopenharmony_ci
301762306a36Sopenharmony_ci	inet_bhash2_reset_saddr(sk);
301862306a36Sopenharmony_ci
301962306a36Sopenharmony_ci	WRITE_ONCE(sk->sk_shutdown, 0);
302062306a36Sopenharmony_ci	sock_reset_flag(sk, SOCK_DONE);
302162306a36Sopenharmony_ci	tp->srtt_us = 0;
302262306a36Sopenharmony_ci	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
302362306a36Sopenharmony_ci	tp->rcv_rtt_last_tsecr = 0;
302462306a36Sopenharmony_ci
302562306a36Sopenharmony_ci	seq = tp->write_seq + tp->max_window + 2;
302662306a36Sopenharmony_ci	if (!seq)
302762306a36Sopenharmony_ci		seq = 1;
302862306a36Sopenharmony_ci	WRITE_ONCE(tp->write_seq, seq);
302962306a36Sopenharmony_ci
303062306a36Sopenharmony_ci	icsk->icsk_backoff = 0;
303162306a36Sopenharmony_ci	icsk->icsk_probes_out = 0;
303262306a36Sopenharmony_ci	icsk->icsk_probes_tstamp = 0;
303362306a36Sopenharmony_ci	icsk->icsk_rto = TCP_TIMEOUT_INIT;
303462306a36Sopenharmony_ci	icsk->icsk_rto_min = TCP_RTO_MIN;
303562306a36Sopenharmony_ci	icsk->icsk_delack_max = TCP_DELACK_MAX;
303662306a36Sopenharmony_ci	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
303762306a36Sopenharmony_ci	tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
303862306a36Sopenharmony_ci	tp->snd_cwnd_cnt = 0;
303962306a36Sopenharmony_ci	tp->is_cwnd_limited = 0;
304062306a36Sopenharmony_ci	tp->max_packets_out = 0;
304162306a36Sopenharmony_ci	tp->window_clamp = 0;
304262306a36Sopenharmony_ci	tp->delivered = 0;
304362306a36Sopenharmony_ci	tp->delivered_ce = 0;
304462306a36Sopenharmony_ci	if (icsk->icsk_ca_ops->release)
304562306a36Sopenharmony_ci		icsk->icsk_ca_ops->release(sk);
304662306a36Sopenharmony_ci	memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
304762306a36Sopenharmony_ci	icsk->icsk_ca_initialized = 0;
304862306a36Sopenharmony_ci	tcp_set_ca_state(sk, TCP_CA_Open);
304962306a36Sopenharmony_ci	tp->is_sack_reneg = 0;
305062306a36Sopenharmony_ci	tcp_clear_retrans(tp);
305162306a36Sopenharmony_ci	tp->total_retrans = 0;
305262306a36Sopenharmony_ci	inet_csk_delack_init(sk);
305362306a36Sopenharmony_ci	/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
305462306a36Sopenharmony_ci	 * issue in __tcp_select_window()
305562306a36Sopenharmony_ci	 */
305662306a36Sopenharmony_ci	icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
305762306a36Sopenharmony_ci	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
305862306a36Sopenharmony_ci	__sk_dst_reset(sk);
305962306a36Sopenharmony_ci	dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL));
306062306a36Sopenharmony_ci	tcp_saved_syn_free(tp);
306162306a36Sopenharmony_ci	tp->compressed_ack = 0;
306262306a36Sopenharmony_ci	tp->segs_in = 0;
306362306a36Sopenharmony_ci	tp->segs_out = 0;
306462306a36Sopenharmony_ci	tp->bytes_sent = 0;
306562306a36Sopenharmony_ci	tp->bytes_acked = 0;
306662306a36Sopenharmony_ci	tp->bytes_received = 0;
306762306a36Sopenharmony_ci	tp->bytes_retrans = 0;
306862306a36Sopenharmony_ci	tp->data_segs_in = 0;
306962306a36Sopenharmony_ci	tp->data_segs_out = 0;
307062306a36Sopenharmony_ci	tp->duplicate_sack[0].start_seq = 0;
307162306a36Sopenharmony_ci	tp->duplicate_sack[0].end_seq = 0;
307262306a36Sopenharmony_ci	tp->dsack_dups = 0;
307362306a36Sopenharmony_ci	tp->reord_seen = 0;
307462306a36Sopenharmony_ci	tp->retrans_out = 0;
307562306a36Sopenharmony_ci	tp->sacked_out = 0;
307662306a36Sopenharmony_ci	tp->tlp_high_seq = 0;
307762306a36Sopenharmony_ci	tp->last_oow_ack_time = 0;
307862306a36Sopenharmony_ci	tp->plb_rehash = 0;
307962306a36Sopenharmony_ci	/* There's a bubble in the pipe until at least the first ACK. */
308062306a36Sopenharmony_ci	tp->app_limited = ~0U;
308162306a36Sopenharmony_ci	tp->rate_app_limited = 1;
308262306a36Sopenharmony_ci	tp->rack.mstamp = 0;
308362306a36Sopenharmony_ci	tp->rack.advanced = 0;
308462306a36Sopenharmony_ci	tp->rack.reo_wnd_steps = 1;
308562306a36Sopenharmony_ci	tp->rack.last_delivered = 0;
308662306a36Sopenharmony_ci	tp->rack.reo_wnd_persist = 0;
308762306a36Sopenharmony_ci	tp->rack.dsack_seen = 0;
308862306a36Sopenharmony_ci	tp->syn_data_acked = 0;
308962306a36Sopenharmony_ci	tp->rx_opt.saw_tstamp = 0;
309062306a36Sopenharmony_ci	tp->rx_opt.dsack = 0;
309162306a36Sopenharmony_ci	tp->rx_opt.num_sacks = 0;
309262306a36Sopenharmony_ci	tp->rcv_ooopack = 0;
309362306a36Sopenharmony_ci
309462306a36Sopenharmony_ci
309562306a36Sopenharmony_ci	/* Clean up fastopen related fields */
309662306a36Sopenharmony_ci	tcp_free_fastopen_req(tp);
309762306a36Sopenharmony_ci	inet_clear_bit(DEFER_CONNECT, sk);
309862306a36Sopenharmony_ci	tp->fastopen_client_fail = 0;
309962306a36Sopenharmony_ci
310062306a36Sopenharmony_ci	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
310162306a36Sopenharmony_ci
310262306a36Sopenharmony_ci	if (sk->sk_frag.page) {
310362306a36Sopenharmony_ci		put_page(sk->sk_frag.page);
310462306a36Sopenharmony_ci		sk->sk_frag.page = NULL;
310562306a36Sopenharmony_ci		sk->sk_frag.offset = 0;
310662306a36Sopenharmony_ci	}
310762306a36Sopenharmony_ci	sk_error_report(sk);
310862306a36Sopenharmony_ci	return 0;
310962306a36Sopenharmony_ci}
311062306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_disconnect);
311162306a36Sopenharmony_ci
311262306a36Sopenharmony_cistatic inline bool tcp_can_repair_sock(const struct sock *sk)
311362306a36Sopenharmony_ci{
311462306a36Sopenharmony_ci	return sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
311562306a36Sopenharmony_ci		(sk->sk_state != TCP_LISTEN);
311662306a36Sopenharmony_ci}
311762306a36Sopenharmony_ci
311862306a36Sopenharmony_cistatic int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len)
311962306a36Sopenharmony_ci{
312062306a36Sopenharmony_ci	struct tcp_repair_window opt;
312162306a36Sopenharmony_ci
312262306a36Sopenharmony_ci	if (!tp->repair)
312362306a36Sopenharmony_ci		return -EPERM;
312462306a36Sopenharmony_ci
312562306a36Sopenharmony_ci	if (len != sizeof(opt))
312662306a36Sopenharmony_ci		return -EINVAL;
312762306a36Sopenharmony_ci
312862306a36Sopenharmony_ci	if (copy_from_sockptr(&opt, optbuf, sizeof(opt)))
312962306a36Sopenharmony_ci		return -EFAULT;
313062306a36Sopenharmony_ci
313162306a36Sopenharmony_ci	if (opt.max_window < opt.snd_wnd)
313262306a36Sopenharmony_ci		return -EINVAL;
313362306a36Sopenharmony_ci
313462306a36Sopenharmony_ci	if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd))
313562306a36Sopenharmony_ci		return -EINVAL;
313662306a36Sopenharmony_ci
313762306a36Sopenharmony_ci	if (after(opt.rcv_wup, tp->rcv_nxt))
313862306a36Sopenharmony_ci		return -EINVAL;
313962306a36Sopenharmony_ci
314062306a36Sopenharmony_ci	tp->snd_wl1	= opt.snd_wl1;
314162306a36Sopenharmony_ci	tp->snd_wnd	= opt.snd_wnd;
314262306a36Sopenharmony_ci	tp->max_window	= opt.max_window;
314362306a36Sopenharmony_ci
314462306a36Sopenharmony_ci	tp->rcv_wnd	= opt.rcv_wnd;
314562306a36Sopenharmony_ci	tp->rcv_wup	= opt.rcv_wup;
314662306a36Sopenharmony_ci
314762306a36Sopenharmony_ci	return 0;
314862306a36Sopenharmony_ci}
314962306a36Sopenharmony_ci
315062306a36Sopenharmony_cistatic int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf,
315162306a36Sopenharmony_ci		unsigned int len)
315262306a36Sopenharmony_ci{
315362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
315462306a36Sopenharmony_ci	struct tcp_repair_opt opt;
315562306a36Sopenharmony_ci	size_t offset = 0;
315662306a36Sopenharmony_ci
315762306a36Sopenharmony_ci	while (len >= sizeof(opt)) {
315862306a36Sopenharmony_ci		if (copy_from_sockptr_offset(&opt, optbuf, offset, sizeof(opt)))
315962306a36Sopenharmony_ci			return -EFAULT;
316062306a36Sopenharmony_ci
316162306a36Sopenharmony_ci		offset += sizeof(opt);
316262306a36Sopenharmony_ci		len -= sizeof(opt);
316362306a36Sopenharmony_ci
316462306a36Sopenharmony_ci		switch (opt.opt_code) {
316562306a36Sopenharmony_ci		case TCPOPT_MSS:
316662306a36Sopenharmony_ci			tp->rx_opt.mss_clamp = opt.opt_val;
316762306a36Sopenharmony_ci			tcp_mtup_init(sk);
316862306a36Sopenharmony_ci			break;
316962306a36Sopenharmony_ci		case TCPOPT_WINDOW:
317062306a36Sopenharmony_ci			{
317162306a36Sopenharmony_ci				u16 snd_wscale = opt.opt_val & 0xFFFF;
317262306a36Sopenharmony_ci				u16 rcv_wscale = opt.opt_val >> 16;
317362306a36Sopenharmony_ci
317462306a36Sopenharmony_ci				if (snd_wscale > TCP_MAX_WSCALE || rcv_wscale > TCP_MAX_WSCALE)
317562306a36Sopenharmony_ci					return -EFBIG;
317662306a36Sopenharmony_ci
317762306a36Sopenharmony_ci				tp->rx_opt.snd_wscale = snd_wscale;
317862306a36Sopenharmony_ci				tp->rx_opt.rcv_wscale = rcv_wscale;
317962306a36Sopenharmony_ci				tp->rx_opt.wscale_ok = 1;
318062306a36Sopenharmony_ci			}
318162306a36Sopenharmony_ci			break;
318262306a36Sopenharmony_ci		case TCPOPT_SACK_PERM:
318362306a36Sopenharmony_ci			if (opt.opt_val != 0)
318462306a36Sopenharmony_ci				return -EINVAL;
318562306a36Sopenharmony_ci
318662306a36Sopenharmony_ci			tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
318762306a36Sopenharmony_ci			break;
318862306a36Sopenharmony_ci		case TCPOPT_TIMESTAMP:
318962306a36Sopenharmony_ci			if (opt.opt_val != 0)
319062306a36Sopenharmony_ci				return -EINVAL;
319162306a36Sopenharmony_ci
319262306a36Sopenharmony_ci			tp->rx_opt.tstamp_ok = 1;
319362306a36Sopenharmony_ci			break;
319462306a36Sopenharmony_ci		}
319562306a36Sopenharmony_ci	}
319662306a36Sopenharmony_ci
319762306a36Sopenharmony_ci	return 0;
319862306a36Sopenharmony_ci}
319962306a36Sopenharmony_ci
320062306a36Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled);
320162306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_tx_delay_enabled);
320262306a36Sopenharmony_ci
320362306a36Sopenharmony_cistatic void tcp_enable_tx_delay(void)
320462306a36Sopenharmony_ci{
320562306a36Sopenharmony_ci	if (!static_branch_unlikely(&tcp_tx_delay_enabled)) {
320662306a36Sopenharmony_ci		static int __tcp_tx_delay_enabled = 0;
320762306a36Sopenharmony_ci
320862306a36Sopenharmony_ci		if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) {
320962306a36Sopenharmony_ci			static_branch_enable(&tcp_tx_delay_enabled);
321062306a36Sopenharmony_ci			pr_info("TCP_TX_DELAY enabled\n");
321162306a36Sopenharmony_ci		}
321262306a36Sopenharmony_ci	}
321362306a36Sopenharmony_ci}
321462306a36Sopenharmony_ci
321562306a36Sopenharmony_ci/* When set indicates to always queue non-full frames.  Later the user clears
321662306a36Sopenharmony_ci * this option and we transmit any pending partial frames in the queue.  This is
321762306a36Sopenharmony_ci * meant to be used alongside sendfile() to get properly filled frames when the
321862306a36Sopenharmony_ci * user (for example) must write out headers with a write() call first and then
321962306a36Sopenharmony_ci * use sendfile to send out the data parts.
322062306a36Sopenharmony_ci *
322162306a36Sopenharmony_ci * TCP_CORK can be set together with TCP_NODELAY and it is stronger than
322262306a36Sopenharmony_ci * TCP_NODELAY.
322362306a36Sopenharmony_ci */
322462306a36Sopenharmony_civoid __tcp_sock_set_cork(struct sock *sk, bool on)
322562306a36Sopenharmony_ci{
322662306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
322762306a36Sopenharmony_ci
322862306a36Sopenharmony_ci	if (on) {
322962306a36Sopenharmony_ci		tp->nonagle |= TCP_NAGLE_CORK;
323062306a36Sopenharmony_ci	} else {
323162306a36Sopenharmony_ci		tp->nonagle &= ~TCP_NAGLE_CORK;
323262306a36Sopenharmony_ci		if (tp->nonagle & TCP_NAGLE_OFF)
323362306a36Sopenharmony_ci			tp->nonagle |= TCP_NAGLE_PUSH;
323462306a36Sopenharmony_ci		tcp_push_pending_frames(sk);
323562306a36Sopenharmony_ci	}
323662306a36Sopenharmony_ci}
323762306a36Sopenharmony_ci
323862306a36Sopenharmony_civoid tcp_sock_set_cork(struct sock *sk, bool on)
323962306a36Sopenharmony_ci{
324062306a36Sopenharmony_ci	lock_sock(sk);
324162306a36Sopenharmony_ci	__tcp_sock_set_cork(sk, on);
324262306a36Sopenharmony_ci	release_sock(sk);
324362306a36Sopenharmony_ci}
324462306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_cork);
324562306a36Sopenharmony_ci
324662306a36Sopenharmony_ci/* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is
324762306a36Sopenharmony_ci * remembered, but it is not activated until cork is cleared.
324862306a36Sopenharmony_ci *
324962306a36Sopenharmony_ci * However, when TCP_NODELAY is set we make an explicit push, which overrides
325062306a36Sopenharmony_ci * even TCP_CORK for currently queued segments.
325162306a36Sopenharmony_ci */
325262306a36Sopenharmony_civoid __tcp_sock_set_nodelay(struct sock *sk, bool on)
325362306a36Sopenharmony_ci{
325462306a36Sopenharmony_ci	if (on) {
325562306a36Sopenharmony_ci		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
325662306a36Sopenharmony_ci		tcp_push_pending_frames(sk);
325762306a36Sopenharmony_ci	} else {
325862306a36Sopenharmony_ci		tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF;
325962306a36Sopenharmony_ci	}
326062306a36Sopenharmony_ci}
326162306a36Sopenharmony_ci
326262306a36Sopenharmony_civoid tcp_sock_set_nodelay(struct sock *sk)
326362306a36Sopenharmony_ci{
326462306a36Sopenharmony_ci	lock_sock(sk);
326562306a36Sopenharmony_ci	__tcp_sock_set_nodelay(sk, true);
326662306a36Sopenharmony_ci	release_sock(sk);
326762306a36Sopenharmony_ci}
326862306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_nodelay);
326962306a36Sopenharmony_ci
327062306a36Sopenharmony_cistatic void __tcp_sock_set_quickack(struct sock *sk, int val)
327162306a36Sopenharmony_ci{
327262306a36Sopenharmony_ci	if (!val) {
327362306a36Sopenharmony_ci		inet_csk_enter_pingpong_mode(sk);
327462306a36Sopenharmony_ci		return;
327562306a36Sopenharmony_ci	}
327662306a36Sopenharmony_ci
327762306a36Sopenharmony_ci	inet_csk_exit_pingpong_mode(sk);
327862306a36Sopenharmony_ci	if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
327962306a36Sopenharmony_ci	    inet_csk_ack_scheduled(sk)) {
328062306a36Sopenharmony_ci		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED;
328162306a36Sopenharmony_ci		tcp_cleanup_rbuf(sk, 1);
328262306a36Sopenharmony_ci		if (!(val & 1))
328362306a36Sopenharmony_ci			inet_csk_enter_pingpong_mode(sk);
328462306a36Sopenharmony_ci	}
328562306a36Sopenharmony_ci}
328662306a36Sopenharmony_ci
328762306a36Sopenharmony_civoid tcp_sock_set_quickack(struct sock *sk, int val)
328862306a36Sopenharmony_ci{
328962306a36Sopenharmony_ci	lock_sock(sk);
329062306a36Sopenharmony_ci	__tcp_sock_set_quickack(sk, val);
329162306a36Sopenharmony_ci	release_sock(sk);
329262306a36Sopenharmony_ci}
329362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_quickack);
329462306a36Sopenharmony_ci
329562306a36Sopenharmony_ciint tcp_sock_set_syncnt(struct sock *sk, int val)
329662306a36Sopenharmony_ci{
329762306a36Sopenharmony_ci	if (val < 1 || val > MAX_TCP_SYNCNT)
329862306a36Sopenharmony_ci		return -EINVAL;
329962306a36Sopenharmony_ci
330062306a36Sopenharmony_ci	WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
330162306a36Sopenharmony_ci	return 0;
330262306a36Sopenharmony_ci}
330362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_syncnt);
330462306a36Sopenharmony_ci
330562306a36Sopenharmony_ciint tcp_sock_set_user_timeout(struct sock *sk, int val)
330662306a36Sopenharmony_ci{
330762306a36Sopenharmony_ci	/* Cap the max time in ms TCP will retry or probe the window
330862306a36Sopenharmony_ci	 * before giving up and aborting (ETIMEDOUT) a connection.
330962306a36Sopenharmony_ci	 */
331062306a36Sopenharmony_ci	if (val < 0)
331162306a36Sopenharmony_ci		return -EINVAL;
331262306a36Sopenharmony_ci
331362306a36Sopenharmony_ci	WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
331462306a36Sopenharmony_ci	return 0;
331562306a36Sopenharmony_ci}
331662306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_user_timeout);
331762306a36Sopenharmony_ci
331862306a36Sopenharmony_ciint tcp_sock_set_keepidle_locked(struct sock *sk, int val)
331962306a36Sopenharmony_ci{
332062306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
332162306a36Sopenharmony_ci
332262306a36Sopenharmony_ci	if (val < 1 || val > MAX_TCP_KEEPIDLE)
332362306a36Sopenharmony_ci		return -EINVAL;
332462306a36Sopenharmony_ci
332562306a36Sopenharmony_ci	/* Paired with WRITE_ONCE() in keepalive_time_when() */
332662306a36Sopenharmony_ci	WRITE_ONCE(tp->keepalive_time, val * HZ);
332762306a36Sopenharmony_ci	if (sock_flag(sk, SOCK_KEEPOPEN) &&
332862306a36Sopenharmony_ci	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
332962306a36Sopenharmony_ci		u32 elapsed = keepalive_time_elapsed(tp);
333062306a36Sopenharmony_ci
333162306a36Sopenharmony_ci		if (tp->keepalive_time > elapsed)
333262306a36Sopenharmony_ci			elapsed = tp->keepalive_time - elapsed;
333362306a36Sopenharmony_ci		else
333462306a36Sopenharmony_ci			elapsed = 0;
333562306a36Sopenharmony_ci		inet_csk_reset_keepalive_timer(sk, elapsed);
333662306a36Sopenharmony_ci	}
333762306a36Sopenharmony_ci
333862306a36Sopenharmony_ci	return 0;
333962306a36Sopenharmony_ci}
334062306a36Sopenharmony_ci
334162306a36Sopenharmony_ciint tcp_sock_set_keepidle(struct sock *sk, int val)
334262306a36Sopenharmony_ci{
334362306a36Sopenharmony_ci	int err;
334462306a36Sopenharmony_ci
334562306a36Sopenharmony_ci	lock_sock(sk);
334662306a36Sopenharmony_ci	err = tcp_sock_set_keepidle_locked(sk, val);
334762306a36Sopenharmony_ci	release_sock(sk);
334862306a36Sopenharmony_ci	return err;
334962306a36Sopenharmony_ci}
335062306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_keepidle);
335162306a36Sopenharmony_ci
335262306a36Sopenharmony_ciint tcp_sock_set_keepintvl(struct sock *sk, int val)
335362306a36Sopenharmony_ci{
335462306a36Sopenharmony_ci	if (val < 1 || val > MAX_TCP_KEEPINTVL)
335562306a36Sopenharmony_ci		return -EINVAL;
335662306a36Sopenharmony_ci
335762306a36Sopenharmony_ci	WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
335862306a36Sopenharmony_ci	return 0;
335962306a36Sopenharmony_ci}
336062306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_keepintvl);
336162306a36Sopenharmony_ci
336262306a36Sopenharmony_ciint tcp_sock_set_keepcnt(struct sock *sk, int val)
336362306a36Sopenharmony_ci{
336462306a36Sopenharmony_ci	if (val < 1 || val > MAX_TCP_KEEPCNT)
336562306a36Sopenharmony_ci		return -EINVAL;
336662306a36Sopenharmony_ci
336762306a36Sopenharmony_ci	/* Paired with READ_ONCE() in keepalive_probes() */
336862306a36Sopenharmony_ci	WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
336962306a36Sopenharmony_ci	return 0;
337062306a36Sopenharmony_ci}
337162306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_keepcnt);
337262306a36Sopenharmony_ci
337362306a36Sopenharmony_ciint tcp_set_window_clamp(struct sock *sk, int val)
337462306a36Sopenharmony_ci{
337562306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
337662306a36Sopenharmony_ci
337762306a36Sopenharmony_ci	if (!val) {
337862306a36Sopenharmony_ci		if (sk->sk_state != TCP_CLOSE)
337962306a36Sopenharmony_ci			return -EINVAL;
338062306a36Sopenharmony_ci		tp->window_clamp = 0;
338162306a36Sopenharmony_ci	} else {
338262306a36Sopenharmony_ci		u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
338362306a36Sopenharmony_ci		u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
338462306a36Sopenharmony_ci						SOCK_MIN_RCVBUF / 2 : val;
338562306a36Sopenharmony_ci
338662306a36Sopenharmony_ci		if (new_window_clamp == old_window_clamp)
338762306a36Sopenharmony_ci			return 0;
338862306a36Sopenharmony_ci
338962306a36Sopenharmony_ci		tp->window_clamp = new_window_clamp;
339062306a36Sopenharmony_ci		if (new_window_clamp < old_window_clamp) {
339162306a36Sopenharmony_ci			/* need to apply the reserved mem provisioning only
339262306a36Sopenharmony_ci			 * when shrinking the window clamp
339362306a36Sopenharmony_ci			 */
339462306a36Sopenharmony_ci			__tcp_adjust_rcv_ssthresh(sk, tp->window_clamp);
339562306a36Sopenharmony_ci
339662306a36Sopenharmony_ci		} else {
339762306a36Sopenharmony_ci			new_rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp);
339862306a36Sopenharmony_ci			tp->rcv_ssthresh = max(new_rcv_ssthresh,
339962306a36Sopenharmony_ci					       tp->rcv_ssthresh);
340062306a36Sopenharmony_ci		}
340162306a36Sopenharmony_ci	}
340262306a36Sopenharmony_ci	return 0;
340362306a36Sopenharmony_ci}
340462306a36Sopenharmony_ci
340562306a36Sopenharmony_ci/*
340662306a36Sopenharmony_ci *	Socket option code for TCP.
340762306a36Sopenharmony_ci */
340862306a36Sopenharmony_ciint do_tcp_setsockopt(struct sock *sk, int level, int optname,
340962306a36Sopenharmony_ci		      sockptr_t optval, unsigned int optlen)
341062306a36Sopenharmony_ci{
341162306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
341262306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
341362306a36Sopenharmony_ci	struct net *net = sock_net(sk);
341462306a36Sopenharmony_ci	int val;
341562306a36Sopenharmony_ci	int err = 0;
341662306a36Sopenharmony_ci
341762306a36Sopenharmony_ci	/* These are data/string values, all the others are ints */
341862306a36Sopenharmony_ci	switch (optname) {
341962306a36Sopenharmony_ci	case TCP_CONGESTION: {
342062306a36Sopenharmony_ci		char name[TCP_CA_NAME_MAX];
342162306a36Sopenharmony_ci
342262306a36Sopenharmony_ci		if (optlen < 1)
342362306a36Sopenharmony_ci			return -EINVAL;
342462306a36Sopenharmony_ci
342562306a36Sopenharmony_ci		val = strncpy_from_sockptr(name, optval,
342662306a36Sopenharmony_ci					min_t(long, TCP_CA_NAME_MAX-1, optlen));
342762306a36Sopenharmony_ci		if (val < 0)
342862306a36Sopenharmony_ci			return -EFAULT;
342962306a36Sopenharmony_ci		name[val] = 0;
343062306a36Sopenharmony_ci
343162306a36Sopenharmony_ci		sockopt_lock_sock(sk);
343262306a36Sopenharmony_ci		err = tcp_set_congestion_control(sk, name, !has_current_bpf_ctx(),
343362306a36Sopenharmony_ci						 sockopt_ns_capable(sock_net(sk)->user_ns,
343462306a36Sopenharmony_ci								    CAP_NET_ADMIN));
343562306a36Sopenharmony_ci		sockopt_release_sock(sk);
343662306a36Sopenharmony_ci		return err;
343762306a36Sopenharmony_ci	}
343862306a36Sopenharmony_ci	case TCP_ULP: {
343962306a36Sopenharmony_ci		char name[TCP_ULP_NAME_MAX];
344062306a36Sopenharmony_ci
344162306a36Sopenharmony_ci		if (optlen < 1)
344262306a36Sopenharmony_ci			return -EINVAL;
344362306a36Sopenharmony_ci
344462306a36Sopenharmony_ci		val = strncpy_from_sockptr(name, optval,
344562306a36Sopenharmony_ci					min_t(long, TCP_ULP_NAME_MAX - 1,
344662306a36Sopenharmony_ci					      optlen));
344762306a36Sopenharmony_ci		if (val < 0)
344862306a36Sopenharmony_ci			return -EFAULT;
344962306a36Sopenharmony_ci		name[val] = 0;
345062306a36Sopenharmony_ci
345162306a36Sopenharmony_ci		sockopt_lock_sock(sk);
345262306a36Sopenharmony_ci		err = tcp_set_ulp(sk, name);
345362306a36Sopenharmony_ci		sockopt_release_sock(sk);
345462306a36Sopenharmony_ci		return err;
345562306a36Sopenharmony_ci	}
345662306a36Sopenharmony_ci	case TCP_FASTOPEN_KEY: {
345762306a36Sopenharmony_ci		__u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
345862306a36Sopenharmony_ci		__u8 *backup_key = NULL;
345962306a36Sopenharmony_ci
346062306a36Sopenharmony_ci		/* Allow a backup key as well to facilitate key rotation
346162306a36Sopenharmony_ci		 * First key is the active one.
346262306a36Sopenharmony_ci		 */
346362306a36Sopenharmony_ci		if (optlen != TCP_FASTOPEN_KEY_LENGTH &&
346462306a36Sopenharmony_ci		    optlen != TCP_FASTOPEN_KEY_BUF_LENGTH)
346562306a36Sopenharmony_ci			return -EINVAL;
346662306a36Sopenharmony_ci
346762306a36Sopenharmony_ci		if (copy_from_sockptr(key, optval, optlen))
346862306a36Sopenharmony_ci			return -EFAULT;
346962306a36Sopenharmony_ci
347062306a36Sopenharmony_ci		if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH)
347162306a36Sopenharmony_ci			backup_key = key + TCP_FASTOPEN_KEY_LENGTH;
347262306a36Sopenharmony_ci
347362306a36Sopenharmony_ci		return tcp_fastopen_reset_cipher(net, sk, key, backup_key);
347462306a36Sopenharmony_ci	}
347562306a36Sopenharmony_ci	default:
347662306a36Sopenharmony_ci		/* fallthru */
347762306a36Sopenharmony_ci		break;
347862306a36Sopenharmony_ci	}
347962306a36Sopenharmony_ci
348062306a36Sopenharmony_ci	if (optlen < sizeof(int))
348162306a36Sopenharmony_ci		return -EINVAL;
348262306a36Sopenharmony_ci
348362306a36Sopenharmony_ci	if (copy_from_sockptr(&val, optval, sizeof(val)))
348462306a36Sopenharmony_ci		return -EFAULT;
348562306a36Sopenharmony_ci
348662306a36Sopenharmony_ci	/* Handle options that can be set without locking the socket. */
348762306a36Sopenharmony_ci	switch (optname) {
348862306a36Sopenharmony_ci	case TCP_SYNCNT:
348962306a36Sopenharmony_ci		return tcp_sock_set_syncnt(sk, val);
349062306a36Sopenharmony_ci	case TCP_USER_TIMEOUT:
349162306a36Sopenharmony_ci		return tcp_sock_set_user_timeout(sk, val);
349262306a36Sopenharmony_ci	case TCP_KEEPINTVL:
349362306a36Sopenharmony_ci		return tcp_sock_set_keepintvl(sk, val);
349462306a36Sopenharmony_ci	case TCP_KEEPCNT:
349562306a36Sopenharmony_ci		return tcp_sock_set_keepcnt(sk, val);
349662306a36Sopenharmony_ci	case TCP_LINGER2:
349762306a36Sopenharmony_ci		if (val < 0)
349862306a36Sopenharmony_ci			WRITE_ONCE(tp->linger2, -1);
349962306a36Sopenharmony_ci		else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
350062306a36Sopenharmony_ci			WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
350162306a36Sopenharmony_ci		else
350262306a36Sopenharmony_ci			WRITE_ONCE(tp->linger2, val * HZ);
350362306a36Sopenharmony_ci		return 0;
350462306a36Sopenharmony_ci	case TCP_DEFER_ACCEPT:
350562306a36Sopenharmony_ci		/* Translate value in seconds to number of retransmits */
350662306a36Sopenharmony_ci		WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
350762306a36Sopenharmony_ci			   secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
350862306a36Sopenharmony_ci					   TCP_RTO_MAX / HZ));
350962306a36Sopenharmony_ci		return 0;
351062306a36Sopenharmony_ci	}
351162306a36Sopenharmony_ci
351262306a36Sopenharmony_ci	sockopt_lock_sock(sk);
351362306a36Sopenharmony_ci
351462306a36Sopenharmony_ci	switch (optname) {
351562306a36Sopenharmony_ci	case TCP_MAXSEG:
351662306a36Sopenharmony_ci		/* Values greater than interface MTU won't take effect. However
351762306a36Sopenharmony_ci		 * at the point when this call is done we typically don't yet
351862306a36Sopenharmony_ci		 * know which interface is going to be used
351962306a36Sopenharmony_ci		 */
352062306a36Sopenharmony_ci		if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
352162306a36Sopenharmony_ci			err = -EINVAL;
352262306a36Sopenharmony_ci			break;
352362306a36Sopenharmony_ci		}
352462306a36Sopenharmony_ci		tp->rx_opt.user_mss = val;
352562306a36Sopenharmony_ci		break;
352662306a36Sopenharmony_ci
352762306a36Sopenharmony_ci	case TCP_NODELAY:
352862306a36Sopenharmony_ci		__tcp_sock_set_nodelay(sk, val);
352962306a36Sopenharmony_ci		break;
353062306a36Sopenharmony_ci
353162306a36Sopenharmony_ci	case TCP_THIN_LINEAR_TIMEOUTS:
353262306a36Sopenharmony_ci		if (val < 0 || val > 1)
353362306a36Sopenharmony_ci			err = -EINVAL;
353462306a36Sopenharmony_ci		else
353562306a36Sopenharmony_ci			tp->thin_lto = val;
353662306a36Sopenharmony_ci		break;
353762306a36Sopenharmony_ci
353862306a36Sopenharmony_ci	case TCP_THIN_DUPACK:
353962306a36Sopenharmony_ci		if (val < 0 || val > 1)
354062306a36Sopenharmony_ci			err = -EINVAL;
354162306a36Sopenharmony_ci		break;
354262306a36Sopenharmony_ci
354362306a36Sopenharmony_ci	case TCP_REPAIR:
354462306a36Sopenharmony_ci		if (!tcp_can_repair_sock(sk))
354562306a36Sopenharmony_ci			err = -EPERM;
354662306a36Sopenharmony_ci		else if (val == TCP_REPAIR_ON) {
354762306a36Sopenharmony_ci			tp->repair = 1;
354862306a36Sopenharmony_ci			sk->sk_reuse = SK_FORCE_REUSE;
354962306a36Sopenharmony_ci			tp->repair_queue = TCP_NO_QUEUE;
355062306a36Sopenharmony_ci		} else if (val == TCP_REPAIR_OFF) {
355162306a36Sopenharmony_ci			tp->repair = 0;
355262306a36Sopenharmony_ci			sk->sk_reuse = SK_NO_REUSE;
355362306a36Sopenharmony_ci			tcp_send_window_probe(sk);
355462306a36Sopenharmony_ci		} else if (val == TCP_REPAIR_OFF_NO_WP) {
355562306a36Sopenharmony_ci			tp->repair = 0;
355662306a36Sopenharmony_ci			sk->sk_reuse = SK_NO_REUSE;
355762306a36Sopenharmony_ci		} else
355862306a36Sopenharmony_ci			err = -EINVAL;
355962306a36Sopenharmony_ci
356062306a36Sopenharmony_ci		break;
356162306a36Sopenharmony_ci
356262306a36Sopenharmony_ci	case TCP_REPAIR_QUEUE:
356362306a36Sopenharmony_ci		if (!tp->repair)
356462306a36Sopenharmony_ci			err = -EPERM;
356562306a36Sopenharmony_ci		else if ((unsigned int)val < TCP_QUEUES_NR)
356662306a36Sopenharmony_ci			tp->repair_queue = val;
356762306a36Sopenharmony_ci		else
356862306a36Sopenharmony_ci			err = -EINVAL;
356962306a36Sopenharmony_ci		break;
357062306a36Sopenharmony_ci
357162306a36Sopenharmony_ci	case TCP_QUEUE_SEQ:
357262306a36Sopenharmony_ci		if (sk->sk_state != TCP_CLOSE) {
357362306a36Sopenharmony_ci			err = -EPERM;
357462306a36Sopenharmony_ci		} else if (tp->repair_queue == TCP_SEND_QUEUE) {
357562306a36Sopenharmony_ci			if (!tcp_rtx_queue_empty(sk))
357662306a36Sopenharmony_ci				err = -EPERM;
357762306a36Sopenharmony_ci			else
357862306a36Sopenharmony_ci				WRITE_ONCE(tp->write_seq, val);
357962306a36Sopenharmony_ci		} else if (tp->repair_queue == TCP_RECV_QUEUE) {
358062306a36Sopenharmony_ci			if (tp->rcv_nxt != tp->copied_seq) {
358162306a36Sopenharmony_ci				err = -EPERM;
358262306a36Sopenharmony_ci			} else {
358362306a36Sopenharmony_ci				WRITE_ONCE(tp->rcv_nxt, val);
358462306a36Sopenharmony_ci				WRITE_ONCE(tp->copied_seq, val);
358562306a36Sopenharmony_ci			}
358662306a36Sopenharmony_ci		} else {
358762306a36Sopenharmony_ci			err = -EINVAL;
358862306a36Sopenharmony_ci		}
358962306a36Sopenharmony_ci		break;
359062306a36Sopenharmony_ci
359162306a36Sopenharmony_ci	case TCP_REPAIR_OPTIONS:
359262306a36Sopenharmony_ci		if (!tp->repair)
359362306a36Sopenharmony_ci			err = -EINVAL;
359462306a36Sopenharmony_ci		else if (sk->sk_state == TCP_ESTABLISHED && !tp->bytes_sent)
359562306a36Sopenharmony_ci			err = tcp_repair_options_est(sk, optval, optlen);
359662306a36Sopenharmony_ci		else
359762306a36Sopenharmony_ci			err = -EPERM;
359862306a36Sopenharmony_ci		break;
359962306a36Sopenharmony_ci
360062306a36Sopenharmony_ci	case TCP_CORK:
360162306a36Sopenharmony_ci		__tcp_sock_set_cork(sk, val);
360262306a36Sopenharmony_ci		break;
360362306a36Sopenharmony_ci
360462306a36Sopenharmony_ci	case TCP_KEEPIDLE:
360562306a36Sopenharmony_ci		err = tcp_sock_set_keepidle_locked(sk, val);
360662306a36Sopenharmony_ci		break;
360762306a36Sopenharmony_ci	case TCP_SAVE_SYN:
360862306a36Sopenharmony_ci		/* 0: disable, 1: enable, 2: start from ether_header */
360962306a36Sopenharmony_ci		if (val < 0 || val > 2)
361062306a36Sopenharmony_ci			err = -EINVAL;
361162306a36Sopenharmony_ci		else
361262306a36Sopenharmony_ci			tp->save_syn = val;
361362306a36Sopenharmony_ci		break;
361462306a36Sopenharmony_ci
361562306a36Sopenharmony_ci	case TCP_WINDOW_CLAMP:
361662306a36Sopenharmony_ci		err = tcp_set_window_clamp(sk, val);
361762306a36Sopenharmony_ci		break;
361862306a36Sopenharmony_ci
361962306a36Sopenharmony_ci	case TCP_QUICKACK:
362062306a36Sopenharmony_ci		__tcp_sock_set_quickack(sk, val);
362162306a36Sopenharmony_ci		break;
362262306a36Sopenharmony_ci
362362306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
362462306a36Sopenharmony_ci	case TCP_MD5SIG:
362562306a36Sopenharmony_ci	case TCP_MD5SIG_EXT:
362662306a36Sopenharmony_ci		err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
362762306a36Sopenharmony_ci		break;
362862306a36Sopenharmony_ci#endif
362962306a36Sopenharmony_ci	case TCP_FASTOPEN:
363062306a36Sopenharmony_ci		if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
363162306a36Sopenharmony_ci		    TCPF_LISTEN))) {
363262306a36Sopenharmony_ci			tcp_fastopen_init_key_once(net);
363362306a36Sopenharmony_ci
363462306a36Sopenharmony_ci			fastopen_queue_tune(sk, val);
363562306a36Sopenharmony_ci		} else {
363662306a36Sopenharmony_ci			err = -EINVAL;
363762306a36Sopenharmony_ci		}
363862306a36Sopenharmony_ci		break;
363962306a36Sopenharmony_ci	case TCP_FASTOPEN_CONNECT:
364062306a36Sopenharmony_ci		if (val > 1 || val < 0) {
364162306a36Sopenharmony_ci			err = -EINVAL;
364262306a36Sopenharmony_ci		} else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
364362306a36Sopenharmony_ci			   TFO_CLIENT_ENABLE) {
364462306a36Sopenharmony_ci			if (sk->sk_state == TCP_CLOSE)
364562306a36Sopenharmony_ci				tp->fastopen_connect = val;
364662306a36Sopenharmony_ci			else
364762306a36Sopenharmony_ci				err = -EINVAL;
364862306a36Sopenharmony_ci		} else {
364962306a36Sopenharmony_ci			err = -EOPNOTSUPP;
365062306a36Sopenharmony_ci		}
365162306a36Sopenharmony_ci		break;
365262306a36Sopenharmony_ci	case TCP_FASTOPEN_NO_COOKIE:
365362306a36Sopenharmony_ci		if (val > 1 || val < 0)
365462306a36Sopenharmony_ci			err = -EINVAL;
365562306a36Sopenharmony_ci		else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
365662306a36Sopenharmony_ci			err = -EINVAL;
365762306a36Sopenharmony_ci		else
365862306a36Sopenharmony_ci			tp->fastopen_no_cookie = val;
365962306a36Sopenharmony_ci		break;
366062306a36Sopenharmony_ci	case TCP_TIMESTAMP:
366162306a36Sopenharmony_ci		if (!tp->repair)
366262306a36Sopenharmony_ci			err = -EPERM;
366362306a36Sopenharmony_ci		else
366462306a36Sopenharmony_ci			WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
366562306a36Sopenharmony_ci		break;
366662306a36Sopenharmony_ci	case TCP_REPAIR_WINDOW:
366762306a36Sopenharmony_ci		err = tcp_repair_set_window(tp, optval, optlen);
366862306a36Sopenharmony_ci		break;
366962306a36Sopenharmony_ci	case TCP_NOTSENT_LOWAT:
367062306a36Sopenharmony_ci		WRITE_ONCE(tp->notsent_lowat, val);
367162306a36Sopenharmony_ci		sk->sk_write_space(sk);
367262306a36Sopenharmony_ci		break;
367362306a36Sopenharmony_ci	case TCP_INQ:
367462306a36Sopenharmony_ci		if (val > 1 || val < 0)
367562306a36Sopenharmony_ci			err = -EINVAL;
367662306a36Sopenharmony_ci		else
367762306a36Sopenharmony_ci			tp->recvmsg_inq = val;
367862306a36Sopenharmony_ci		break;
367962306a36Sopenharmony_ci	case TCP_TX_DELAY:
368062306a36Sopenharmony_ci		if (val)
368162306a36Sopenharmony_ci			tcp_enable_tx_delay();
368262306a36Sopenharmony_ci		WRITE_ONCE(tp->tcp_tx_delay, val);
368362306a36Sopenharmony_ci		break;
368462306a36Sopenharmony_ci	default:
368562306a36Sopenharmony_ci		err = -ENOPROTOOPT;
368662306a36Sopenharmony_ci		break;
368762306a36Sopenharmony_ci	}
368862306a36Sopenharmony_ci
368962306a36Sopenharmony_ci	sockopt_release_sock(sk);
369062306a36Sopenharmony_ci	return err;
369162306a36Sopenharmony_ci}
369262306a36Sopenharmony_ci
369362306a36Sopenharmony_ciint tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
369462306a36Sopenharmony_ci		   unsigned int optlen)
369562306a36Sopenharmony_ci{
369662306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
369762306a36Sopenharmony_ci
369862306a36Sopenharmony_ci	if (level != SOL_TCP)
369962306a36Sopenharmony_ci		/* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */
370062306a36Sopenharmony_ci		return READ_ONCE(icsk->icsk_af_ops)->setsockopt(sk, level, optname,
370162306a36Sopenharmony_ci								optval, optlen);
370262306a36Sopenharmony_ci	return do_tcp_setsockopt(sk, level, optname, optval, optlen);
370362306a36Sopenharmony_ci}
370462306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_setsockopt);
370562306a36Sopenharmony_ci
370662306a36Sopenharmony_cistatic void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
370762306a36Sopenharmony_ci				      struct tcp_info *info)
370862306a36Sopenharmony_ci{
370962306a36Sopenharmony_ci	u64 stats[__TCP_CHRONO_MAX], total = 0;
371062306a36Sopenharmony_ci	enum tcp_chrono i;
371162306a36Sopenharmony_ci
371262306a36Sopenharmony_ci	for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) {
371362306a36Sopenharmony_ci		stats[i] = tp->chrono_stat[i - 1];
371462306a36Sopenharmony_ci		if (i == tp->chrono_type)
371562306a36Sopenharmony_ci			stats[i] += tcp_jiffies32 - tp->chrono_start;
371662306a36Sopenharmony_ci		stats[i] *= USEC_PER_SEC / HZ;
371762306a36Sopenharmony_ci		total += stats[i];
371862306a36Sopenharmony_ci	}
371962306a36Sopenharmony_ci
372062306a36Sopenharmony_ci	info->tcpi_busy_time = total;
372162306a36Sopenharmony_ci	info->tcpi_rwnd_limited = stats[TCP_CHRONO_RWND_LIMITED];
372262306a36Sopenharmony_ci	info->tcpi_sndbuf_limited = stats[TCP_CHRONO_SNDBUF_LIMITED];
372362306a36Sopenharmony_ci}
372462306a36Sopenharmony_ci
372562306a36Sopenharmony_ci/* Return information about state of tcp endpoint in API format. */
372662306a36Sopenharmony_civoid tcp_get_info(struct sock *sk, struct tcp_info *info)
372762306a36Sopenharmony_ci{
372862306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
372962306a36Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
373062306a36Sopenharmony_ci	unsigned long rate;
373162306a36Sopenharmony_ci	u32 now;
373262306a36Sopenharmony_ci	u64 rate64;
373362306a36Sopenharmony_ci	bool slow;
373462306a36Sopenharmony_ci
373562306a36Sopenharmony_ci	memset(info, 0, sizeof(*info));
373662306a36Sopenharmony_ci	if (sk->sk_type != SOCK_STREAM)
373762306a36Sopenharmony_ci		return;
373862306a36Sopenharmony_ci
373962306a36Sopenharmony_ci	info->tcpi_state = inet_sk_state_load(sk);
374062306a36Sopenharmony_ci
374162306a36Sopenharmony_ci	/* Report meaningful fields for all TCP states, including listeners */
374262306a36Sopenharmony_ci	rate = READ_ONCE(sk->sk_pacing_rate);
374362306a36Sopenharmony_ci	rate64 = (rate != ~0UL) ? rate : ~0ULL;
374462306a36Sopenharmony_ci	info->tcpi_pacing_rate = rate64;
374562306a36Sopenharmony_ci
374662306a36Sopenharmony_ci	rate = READ_ONCE(sk->sk_max_pacing_rate);
374762306a36Sopenharmony_ci	rate64 = (rate != ~0UL) ? rate : ~0ULL;
374862306a36Sopenharmony_ci	info->tcpi_max_pacing_rate = rate64;
374962306a36Sopenharmony_ci
375062306a36Sopenharmony_ci	info->tcpi_reordering = tp->reordering;
375162306a36Sopenharmony_ci	info->tcpi_snd_cwnd = tcp_snd_cwnd(tp);
375262306a36Sopenharmony_ci
375362306a36Sopenharmony_ci	if (info->tcpi_state == TCP_LISTEN) {
375462306a36Sopenharmony_ci		/* listeners aliased fields :
375562306a36Sopenharmony_ci		 * tcpi_unacked -> Number of children ready for accept()
375662306a36Sopenharmony_ci		 * tcpi_sacked  -> max backlog
375762306a36Sopenharmony_ci		 */
375862306a36Sopenharmony_ci		info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog);
375962306a36Sopenharmony_ci		info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog);
376062306a36Sopenharmony_ci		return;
376162306a36Sopenharmony_ci	}
376262306a36Sopenharmony_ci
376362306a36Sopenharmony_ci	slow = lock_sock_fast(sk);
376462306a36Sopenharmony_ci
376562306a36Sopenharmony_ci	info->tcpi_ca_state = icsk->icsk_ca_state;
376662306a36Sopenharmony_ci	info->tcpi_retransmits = icsk->icsk_retransmits;
376762306a36Sopenharmony_ci	info->tcpi_probes = icsk->icsk_probes_out;
376862306a36Sopenharmony_ci	info->tcpi_backoff = icsk->icsk_backoff;
376962306a36Sopenharmony_ci
377062306a36Sopenharmony_ci	if (tp->rx_opt.tstamp_ok)
377162306a36Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
377262306a36Sopenharmony_ci	if (tcp_is_sack(tp))
377362306a36Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_SACK;
377462306a36Sopenharmony_ci	if (tp->rx_opt.wscale_ok) {
377562306a36Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_WSCALE;
377662306a36Sopenharmony_ci		info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
377762306a36Sopenharmony_ci		info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
377862306a36Sopenharmony_ci	}
377962306a36Sopenharmony_ci
378062306a36Sopenharmony_ci	if (tp->ecn_flags & TCP_ECN_OK)
378162306a36Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_ECN;
378262306a36Sopenharmony_ci	if (tp->ecn_flags & TCP_ECN_SEEN)
378362306a36Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_ECN_SEEN;
378462306a36Sopenharmony_ci	if (tp->syn_data_acked)
378562306a36Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_SYN_DATA;
378662306a36Sopenharmony_ci
378762306a36Sopenharmony_ci	info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
378862306a36Sopenharmony_ci	info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato,
378962306a36Sopenharmony_ci					      tcp_delack_max(sk)));
379062306a36Sopenharmony_ci	info->tcpi_snd_mss = tp->mss_cache;
379162306a36Sopenharmony_ci	info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
379262306a36Sopenharmony_ci
379362306a36Sopenharmony_ci	info->tcpi_unacked = tp->packets_out;
379462306a36Sopenharmony_ci	info->tcpi_sacked = tp->sacked_out;
379562306a36Sopenharmony_ci
379662306a36Sopenharmony_ci	info->tcpi_lost = tp->lost_out;
379762306a36Sopenharmony_ci	info->tcpi_retrans = tp->retrans_out;
379862306a36Sopenharmony_ci
379962306a36Sopenharmony_ci	now = tcp_jiffies32;
380062306a36Sopenharmony_ci	info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
380162306a36Sopenharmony_ci	info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
380262306a36Sopenharmony_ci	info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
380362306a36Sopenharmony_ci
380462306a36Sopenharmony_ci	info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
380562306a36Sopenharmony_ci	info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
380662306a36Sopenharmony_ci	info->tcpi_rtt = tp->srtt_us >> 3;
380762306a36Sopenharmony_ci	info->tcpi_rttvar = tp->mdev_us >> 2;
380862306a36Sopenharmony_ci	info->tcpi_snd_ssthresh = tp->snd_ssthresh;
380962306a36Sopenharmony_ci	info->tcpi_advmss = tp->advmss;
381062306a36Sopenharmony_ci
381162306a36Sopenharmony_ci	info->tcpi_rcv_rtt = tp->rcv_rtt_est.rtt_us >> 3;
381262306a36Sopenharmony_ci	info->tcpi_rcv_space = tp->rcvq_space.space;
381362306a36Sopenharmony_ci
381462306a36Sopenharmony_ci	info->tcpi_total_retrans = tp->total_retrans;
381562306a36Sopenharmony_ci
381662306a36Sopenharmony_ci	info->tcpi_bytes_acked = tp->bytes_acked;
381762306a36Sopenharmony_ci	info->tcpi_bytes_received = tp->bytes_received;
381862306a36Sopenharmony_ci	info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt);
381962306a36Sopenharmony_ci	tcp_get_info_chrono_stats(tp, info);
382062306a36Sopenharmony_ci
382162306a36Sopenharmony_ci	info->tcpi_segs_out = tp->segs_out;
382262306a36Sopenharmony_ci
382362306a36Sopenharmony_ci	/* segs_in and data_segs_in can be updated from tcp_segs_in() from BH */
382462306a36Sopenharmony_ci	info->tcpi_segs_in = READ_ONCE(tp->segs_in);
382562306a36Sopenharmony_ci	info->tcpi_data_segs_in = READ_ONCE(tp->data_segs_in);
382662306a36Sopenharmony_ci
382762306a36Sopenharmony_ci	info->tcpi_min_rtt = tcp_min_rtt(tp);
382862306a36Sopenharmony_ci	info->tcpi_data_segs_out = tp->data_segs_out;
382962306a36Sopenharmony_ci
383062306a36Sopenharmony_ci	info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
383162306a36Sopenharmony_ci	rate64 = tcp_compute_delivery_rate(tp);
383262306a36Sopenharmony_ci	if (rate64)
383362306a36Sopenharmony_ci		info->tcpi_delivery_rate = rate64;
383462306a36Sopenharmony_ci	info->tcpi_delivered = tp->delivered;
383562306a36Sopenharmony_ci	info->tcpi_delivered_ce = tp->delivered_ce;
383662306a36Sopenharmony_ci	info->tcpi_bytes_sent = tp->bytes_sent;
383762306a36Sopenharmony_ci	info->tcpi_bytes_retrans = tp->bytes_retrans;
383862306a36Sopenharmony_ci	info->tcpi_dsack_dups = tp->dsack_dups;
383962306a36Sopenharmony_ci	info->tcpi_reord_seen = tp->reord_seen;
384062306a36Sopenharmony_ci	info->tcpi_rcv_ooopack = tp->rcv_ooopack;
384162306a36Sopenharmony_ci	info->tcpi_snd_wnd = tp->snd_wnd;
384262306a36Sopenharmony_ci	info->tcpi_rcv_wnd = tp->rcv_wnd;
384362306a36Sopenharmony_ci	info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash;
384462306a36Sopenharmony_ci	info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
384562306a36Sopenharmony_ci	unlock_sock_fast(sk, slow);
384662306a36Sopenharmony_ci}
384762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_get_info);
384862306a36Sopenharmony_ci
384962306a36Sopenharmony_cistatic size_t tcp_opt_stats_get_size(void)
385062306a36Sopenharmony_ci{
385162306a36Sopenharmony_ci	return
385262306a36Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BUSY */
385362306a36Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_RWND_LIMITED */
385462306a36Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_SNDBUF_LIMITED */
385562306a36Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DATA_SEGS_OUT */
385662306a36Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_TOTAL_RETRANS */
385762306a36Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_PACING_RATE */
385862306a36Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DELIVERY_RATE */
385962306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_CWND */
386062306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_REORDERING */
386162306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_MIN_RTT */
386262306a36Sopenharmony_ci		nla_total_size(sizeof(u8)) + /* TCP_NLA_RECUR_RETRANS */
386362306a36Sopenharmony_ci		nla_total_size(sizeof(u8)) + /* TCP_NLA_DELIVERY_RATE_APP_LMT */
386462306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_SNDQ_SIZE */
386562306a36Sopenharmony_ci		nla_total_size(sizeof(u8)) + /* TCP_NLA_CA_STATE */
386662306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_SSTHRESH */
386762306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED */
386862306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
386962306a36Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
387062306a36Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
387162306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
387262306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
387362306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */
387462306a36Sopenharmony_ci		nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */
387562306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */
387662306a36Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */
387762306a36Sopenharmony_ci		nla_total_size(sizeof(u8)) + /* TCP_NLA_TTL */
387862306a36Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_REHASH */
387962306a36Sopenharmony_ci		0;
388062306a36Sopenharmony_ci}
388162306a36Sopenharmony_ci
388262306a36Sopenharmony_ci/* Returns TTL or hop limit of an incoming packet from skb. */
388362306a36Sopenharmony_cistatic u8 tcp_skb_ttl_or_hop_limit(const struct sk_buff *skb)
388462306a36Sopenharmony_ci{
388562306a36Sopenharmony_ci	if (skb->protocol == htons(ETH_P_IP))
388662306a36Sopenharmony_ci		return ip_hdr(skb)->ttl;
388762306a36Sopenharmony_ci	else if (skb->protocol == htons(ETH_P_IPV6))
388862306a36Sopenharmony_ci		return ipv6_hdr(skb)->hop_limit;
388962306a36Sopenharmony_ci	else
389062306a36Sopenharmony_ci		return 0;
389162306a36Sopenharmony_ci}
389262306a36Sopenharmony_ci
389362306a36Sopenharmony_cistruct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
389462306a36Sopenharmony_ci					       const struct sk_buff *orig_skb,
389562306a36Sopenharmony_ci					       const struct sk_buff *ack_skb)
389662306a36Sopenharmony_ci{
389762306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
389862306a36Sopenharmony_ci	struct sk_buff *stats;
389962306a36Sopenharmony_ci	struct tcp_info info;
390062306a36Sopenharmony_ci	unsigned long rate;
390162306a36Sopenharmony_ci	u64 rate64;
390262306a36Sopenharmony_ci
390362306a36Sopenharmony_ci	stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
390462306a36Sopenharmony_ci	if (!stats)
390562306a36Sopenharmony_ci		return NULL;
390662306a36Sopenharmony_ci
390762306a36Sopenharmony_ci	tcp_get_info_chrono_stats(tp, &info);
390862306a36Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_BUSY,
390962306a36Sopenharmony_ci			  info.tcpi_busy_time, TCP_NLA_PAD);
391062306a36Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED,
391162306a36Sopenharmony_ci			  info.tcpi_rwnd_limited, TCP_NLA_PAD);
391262306a36Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED,
391362306a36Sopenharmony_ci			  info.tcpi_sndbuf_limited, TCP_NLA_PAD);
391462306a36Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_DATA_SEGS_OUT,
391562306a36Sopenharmony_ci			  tp->data_segs_out, TCP_NLA_PAD);
391662306a36Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS,
391762306a36Sopenharmony_ci			  tp->total_retrans, TCP_NLA_PAD);
391862306a36Sopenharmony_ci
391962306a36Sopenharmony_ci	rate = READ_ONCE(sk->sk_pacing_rate);
392062306a36Sopenharmony_ci	rate64 = (rate != ~0UL) ? rate : ~0ULL;
392162306a36Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
392262306a36Sopenharmony_ci
392362306a36Sopenharmony_ci	rate64 = tcp_compute_delivery_rate(tp);
392462306a36Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
392562306a36Sopenharmony_ci
392662306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp));
392762306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
392862306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
392962306a36Sopenharmony_ci
393062306a36Sopenharmony_ci	nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
393162306a36Sopenharmony_ci	nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
393262306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
393362306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered);
393462306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce);
393562306a36Sopenharmony_ci
393662306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
393762306a36Sopenharmony_ci	nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
393862306a36Sopenharmony_ci
393962306a36Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent,
394062306a36Sopenharmony_ci			  TCP_NLA_PAD);
394162306a36Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
394262306a36Sopenharmony_ci			  TCP_NLA_PAD);
394362306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
394462306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
394562306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3);
394662306a36Sopenharmony_ci	nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash);
394762306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT,
394862306a36Sopenharmony_ci		    max_t(int, 0, tp->write_seq - tp->snd_nxt));
394962306a36Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns,
395062306a36Sopenharmony_ci			  TCP_NLA_PAD);
395162306a36Sopenharmony_ci	if (ack_skb)
395262306a36Sopenharmony_ci		nla_put_u8(stats, TCP_NLA_TTL,
395362306a36Sopenharmony_ci			   tcp_skb_ttl_or_hop_limit(ack_skb));
395462306a36Sopenharmony_ci
395562306a36Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_REHASH, tp->plb_rehash + tp->timeout_rehash);
395662306a36Sopenharmony_ci	return stats;
395762306a36Sopenharmony_ci}
395862306a36Sopenharmony_ci
395962306a36Sopenharmony_ciint do_tcp_getsockopt(struct sock *sk, int level,
396062306a36Sopenharmony_ci		      int optname, sockptr_t optval, sockptr_t optlen)
396162306a36Sopenharmony_ci{
396262306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
396362306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
396462306a36Sopenharmony_ci	struct net *net = sock_net(sk);
396562306a36Sopenharmony_ci	int val, len;
396662306a36Sopenharmony_ci
396762306a36Sopenharmony_ci	if (copy_from_sockptr(&len, optlen, sizeof(int)))
396862306a36Sopenharmony_ci		return -EFAULT;
396962306a36Sopenharmony_ci
397062306a36Sopenharmony_ci	if (len < 0)
397162306a36Sopenharmony_ci		return -EINVAL;
397262306a36Sopenharmony_ci
397362306a36Sopenharmony_ci	len = min_t(unsigned int, len, sizeof(int));
397462306a36Sopenharmony_ci
397562306a36Sopenharmony_ci	switch (optname) {
397662306a36Sopenharmony_ci	case TCP_MAXSEG:
397762306a36Sopenharmony_ci		val = tp->mss_cache;
397862306a36Sopenharmony_ci		if (tp->rx_opt.user_mss &&
397962306a36Sopenharmony_ci		    ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
398062306a36Sopenharmony_ci			val = tp->rx_opt.user_mss;
398162306a36Sopenharmony_ci		if (tp->repair)
398262306a36Sopenharmony_ci			val = tp->rx_opt.mss_clamp;
398362306a36Sopenharmony_ci		break;
398462306a36Sopenharmony_ci	case TCP_NODELAY:
398562306a36Sopenharmony_ci		val = !!(tp->nonagle&TCP_NAGLE_OFF);
398662306a36Sopenharmony_ci		break;
398762306a36Sopenharmony_ci	case TCP_CORK:
398862306a36Sopenharmony_ci		val = !!(tp->nonagle&TCP_NAGLE_CORK);
398962306a36Sopenharmony_ci		break;
399062306a36Sopenharmony_ci	case TCP_KEEPIDLE:
399162306a36Sopenharmony_ci		val = keepalive_time_when(tp) / HZ;
399262306a36Sopenharmony_ci		break;
399362306a36Sopenharmony_ci	case TCP_KEEPINTVL:
399462306a36Sopenharmony_ci		val = keepalive_intvl_when(tp) / HZ;
399562306a36Sopenharmony_ci		break;
399662306a36Sopenharmony_ci	case TCP_KEEPCNT:
399762306a36Sopenharmony_ci		val = keepalive_probes(tp);
399862306a36Sopenharmony_ci		break;
399962306a36Sopenharmony_ci	case TCP_SYNCNT:
400062306a36Sopenharmony_ci		val = READ_ONCE(icsk->icsk_syn_retries) ? :
400162306a36Sopenharmony_ci			READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
400262306a36Sopenharmony_ci		break;
400362306a36Sopenharmony_ci	case TCP_LINGER2:
400462306a36Sopenharmony_ci		val = READ_ONCE(tp->linger2);
400562306a36Sopenharmony_ci		if (val >= 0)
400662306a36Sopenharmony_ci			val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
400762306a36Sopenharmony_ci		break;
400862306a36Sopenharmony_ci	case TCP_DEFER_ACCEPT:
400962306a36Sopenharmony_ci		val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
401062306a36Sopenharmony_ci		val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
401162306a36Sopenharmony_ci				      TCP_RTO_MAX / HZ);
401262306a36Sopenharmony_ci		break;
401362306a36Sopenharmony_ci	case TCP_WINDOW_CLAMP:
401462306a36Sopenharmony_ci		val = tp->window_clamp;
401562306a36Sopenharmony_ci		break;
401662306a36Sopenharmony_ci	case TCP_INFO: {
401762306a36Sopenharmony_ci		struct tcp_info info;
401862306a36Sopenharmony_ci
401962306a36Sopenharmony_ci		if (copy_from_sockptr(&len, optlen, sizeof(int)))
402062306a36Sopenharmony_ci			return -EFAULT;
402162306a36Sopenharmony_ci
402262306a36Sopenharmony_ci		tcp_get_info(sk, &info);
402362306a36Sopenharmony_ci
402462306a36Sopenharmony_ci		len = min_t(unsigned int, len, sizeof(info));
402562306a36Sopenharmony_ci		if (copy_to_sockptr(optlen, &len, sizeof(int)))
402662306a36Sopenharmony_ci			return -EFAULT;
402762306a36Sopenharmony_ci		if (copy_to_sockptr(optval, &info, len))
402862306a36Sopenharmony_ci			return -EFAULT;
402962306a36Sopenharmony_ci		return 0;
403062306a36Sopenharmony_ci	}
403162306a36Sopenharmony_ci	case TCP_CC_INFO: {
403262306a36Sopenharmony_ci		const struct tcp_congestion_ops *ca_ops;
403362306a36Sopenharmony_ci		union tcp_cc_info info;
403462306a36Sopenharmony_ci		size_t sz = 0;
403562306a36Sopenharmony_ci		int attr;
403662306a36Sopenharmony_ci
403762306a36Sopenharmony_ci		if (copy_from_sockptr(&len, optlen, sizeof(int)))
403862306a36Sopenharmony_ci			return -EFAULT;
403962306a36Sopenharmony_ci
404062306a36Sopenharmony_ci		ca_ops = icsk->icsk_ca_ops;
404162306a36Sopenharmony_ci		if (ca_ops && ca_ops->get_info)
404262306a36Sopenharmony_ci			sz = ca_ops->get_info(sk, ~0U, &attr, &info);
404362306a36Sopenharmony_ci
404462306a36Sopenharmony_ci		len = min_t(unsigned int, len, sz);
404562306a36Sopenharmony_ci		if (copy_to_sockptr(optlen, &len, sizeof(int)))
404662306a36Sopenharmony_ci			return -EFAULT;
404762306a36Sopenharmony_ci		if (copy_to_sockptr(optval, &info, len))
404862306a36Sopenharmony_ci			return -EFAULT;
404962306a36Sopenharmony_ci		return 0;
405062306a36Sopenharmony_ci	}
405162306a36Sopenharmony_ci	case TCP_QUICKACK:
405262306a36Sopenharmony_ci		val = !inet_csk_in_pingpong_mode(sk);
405362306a36Sopenharmony_ci		break;
405462306a36Sopenharmony_ci
405562306a36Sopenharmony_ci	case TCP_CONGESTION:
405662306a36Sopenharmony_ci		if (copy_from_sockptr(&len, optlen, sizeof(int)))
405762306a36Sopenharmony_ci			return -EFAULT;
405862306a36Sopenharmony_ci		len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
405962306a36Sopenharmony_ci		if (copy_to_sockptr(optlen, &len, sizeof(int)))
406062306a36Sopenharmony_ci			return -EFAULT;
406162306a36Sopenharmony_ci		if (copy_to_sockptr(optval, icsk->icsk_ca_ops->name, len))
406262306a36Sopenharmony_ci			return -EFAULT;
406362306a36Sopenharmony_ci		return 0;
406462306a36Sopenharmony_ci
406562306a36Sopenharmony_ci	case TCP_ULP:
406662306a36Sopenharmony_ci		if (copy_from_sockptr(&len, optlen, sizeof(int)))
406762306a36Sopenharmony_ci			return -EFAULT;
406862306a36Sopenharmony_ci		len = min_t(unsigned int, len, TCP_ULP_NAME_MAX);
406962306a36Sopenharmony_ci		if (!icsk->icsk_ulp_ops) {
407062306a36Sopenharmony_ci			len = 0;
407162306a36Sopenharmony_ci			if (copy_to_sockptr(optlen, &len, sizeof(int)))
407262306a36Sopenharmony_ci				return -EFAULT;
407362306a36Sopenharmony_ci			return 0;
407462306a36Sopenharmony_ci		}
407562306a36Sopenharmony_ci		if (copy_to_sockptr(optlen, &len, sizeof(int)))
407662306a36Sopenharmony_ci			return -EFAULT;
407762306a36Sopenharmony_ci		if (copy_to_sockptr(optval, icsk->icsk_ulp_ops->name, len))
407862306a36Sopenharmony_ci			return -EFAULT;
407962306a36Sopenharmony_ci		return 0;
408062306a36Sopenharmony_ci
408162306a36Sopenharmony_ci	case TCP_FASTOPEN_KEY: {
408262306a36Sopenharmony_ci		u64 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u64)];
408362306a36Sopenharmony_ci		unsigned int key_len;
408462306a36Sopenharmony_ci
408562306a36Sopenharmony_ci		if (copy_from_sockptr(&len, optlen, sizeof(int)))
408662306a36Sopenharmony_ci			return -EFAULT;
408762306a36Sopenharmony_ci
408862306a36Sopenharmony_ci		key_len = tcp_fastopen_get_cipher(net, icsk, key) *
408962306a36Sopenharmony_ci				TCP_FASTOPEN_KEY_LENGTH;
409062306a36Sopenharmony_ci		len = min_t(unsigned int, len, key_len);
409162306a36Sopenharmony_ci		if (copy_to_sockptr(optlen, &len, sizeof(int)))
409262306a36Sopenharmony_ci			return -EFAULT;
409362306a36Sopenharmony_ci		if (copy_to_sockptr(optval, key, len))
409462306a36Sopenharmony_ci			return -EFAULT;
409562306a36Sopenharmony_ci		return 0;
409662306a36Sopenharmony_ci	}
409762306a36Sopenharmony_ci	case TCP_THIN_LINEAR_TIMEOUTS:
409862306a36Sopenharmony_ci		val = tp->thin_lto;
409962306a36Sopenharmony_ci		break;
410062306a36Sopenharmony_ci
410162306a36Sopenharmony_ci	case TCP_THIN_DUPACK:
410262306a36Sopenharmony_ci		val = 0;
410362306a36Sopenharmony_ci		break;
410462306a36Sopenharmony_ci
410562306a36Sopenharmony_ci	case TCP_REPAIR:
410662306a36Sopenharmony_ci		val = tp->repair;
410762306a36Sopenharmony_ci		break;
410862306a36Sopenharmony_ci
410962306a36Sopenharmony_ci	case TCP_REPAIR_QUEUE:
411062306a36Sopenharmony_ci		if (tp->repair)
411162306a36Sopenharmony_ci			val = tp->repair_queue;
411262306a36Sopenharmony_ci		else
411362306a36Sopenharmony_ci			return -EINVAL;
411462306a36Sopenharmony_ci		break;
411562306a36Sopenharmony_ci
411662306a36Sopenharmony_ci	case TCP_REPAIR_WINDOW: {
411762306a36Sopenharmony_ci		struct tcp_repair_window opt;
411862306a36Sopenharmony_ci
411962306a36Sopenharmony_ci		if (copy_from_sockptr(&len, optlen, sizeof(int)))
412062306a36Sopenharmony_ci			return -EFAULT;
412162306a36Sopenharmony_ci
412262306a36Sopenharmony_ci		if (len != sizeof(opt))
412362306a36Sopenharmony_ci			return -EINVAL;
412462306a36Sopenharmony_ci
412562306a36Sopenharmony_ci		if (!tp->repair)
412662306a36Sopenharmony_ci			return -EPERM;
412762306a36Sopenharmony_ci
412862306a36Sopenharmony_ci		opt.snd_wl1	= tp->snd_wl1;
412962306a36Sopenharmony_ci		opt.snd_wnd	= tp->snd_wnd;
413062306a36Sopenharmony_ci		opt.max_window	= tp->max_window;
413162306a36Sopenharmony_ci		opt.rcv_wnd	= tp->rcv_wnd;
413262306a36Sopenharmony_ci		opt.rcv_wup	= tp->rcv_wup;
413362306a36Sopenharmony_ci
413462306a36Sopenharmony_ci		if (copy_to_sockptr(optval, &opt, len))
413562306a36Sopenharmony_ci			return -EFAULT;
413662306a36Sopenharmony_ci		return 0;
413762306a36Sopenharmony_ci	}
413862306a36Sopenharmony_ci	case TCP_QUEUE_SEQ:
413962306a36Sopenharmony_ci		if (tp->repair_queue == TCP_SEND_QUEUE)
414062306a36Sopenharmony_ci			val = tp->write_seq;
414162306a36Sopenharmony_ci		else if (tp->repair_queue == TCP_RECV_QUEUE)
414262306a36Sopenharmony_ci			val = tp->rcv_nxt;
414362306a36Sopenharmony_ci		else
414462306a36Sopenharmony_ci			return -EINVAL;
414562306a36Sopenharmony_ci		break;
414662306a36Sopenharmony_ci
414762306a36Sopenharmony_ci	case TCP_USER_TIMEOUT:
414862306a36Sopenharmony_ci		val = READ_ONCE(icsk->icsk_user_timeout);
414962306a36Sopenharmony_ci		break;
415062306a36Sopenharmony_ci
415162306a36Sopenharmony_ci	case TCP_FASTOPEN:
415262306a36Sopenharmony_ci		val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
415362306a36Sopenharmony_ci		break;
415462306a36Sopenharmony_ci
415562306a36Sopenharmony_ci	case TCP_FASTOPEN_CONNECT:
415662306a36Sopenharmony_ci		val = tp->fastopen_connect;
415762306a36Sopenharmony_ci		break;
415862306a36Sopenharmony_ci
415962306a36Sopenharmony_ci	case TCP_FASTOPEN_NO_COOKIE:
416062306a36Sopenharmony_ci		val = tp->fastopen_no_cookie;
416162306a36Sopenharmony_ci		break;
416262306a36Sopenharmony_ci
416362306a36Sopenharmony_ci	case TCP_TX_DELAY:
416462306a36Sopenharmony_ci		val = READ_ONCE(tp->tcp_tx_delay);
416562306a36Sopenharmony_ci		break;
416662306a36Sopenharmony_ci
416762306a36Sopenharmony_ci	case TCP_TIMESTAMP:
416862306a36Sopenharmony_ci		val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
416962306a36Sopenharmony_ci		break;
417062306a36Sopenharmony_ci	case TCP_NOTSENT_LOWAT:
417162306a36Sopenharmony_ci		val = READ_ONCE(tp->notsent_lowat);
417262306a36Sopenharmony_ci		break;
417362306a36Sopenharmony_ci	case TCP_INQ:
417462306a36Sopenharmony_ci		val = tp->recvmsg_inq;
417562306a36Sopenharmony_ci		break;
417662306a36Sopenharmony_ci	case TCP_SAVE_SYN:
417762306a36Sopenharmony_ci		val = tp->save_syn;
417862306a36Sopenharmony_ci		break;
417962306a36Sopenharmony_ci	case TCP_SAVED_SYN: {
418062306a36Sopenharmony_ci		if (copy_from_sockptr(&len, optlen, sizeof(int)))
418162306a36Sopenharmony_ci			return -EFAULT;
418262306a36Sopenharmony_ci
418362306a36Sopenharmony_ci		sockopt_lock_sock(sk);
418462306a36Sopenharmony_ci		if (tp->saved_syn) {
418562306a36Sopenharmony_ci			if (len < tcp_saved_syn_len(tp->saved_syn)) {
418662306a36Sopenharmony_ci				len = tcp_saved_syn_len(tp->saved_syn);
418762306a36Sopenharmony_ci				if (copy_to_sockptr(optlen, &len, sizeof(int))) {
418862306a36Sopenharmony_ci					sockopt_release_sock(sk);
418962306a36Sopenharmony_ci					return -EFAULT;
419062306a36Sopenharmony_ci				}
419162306a36Sopenharmony_ci				sockopt_release_sock(sk);
419262306a36Sopenharmony_ci				return -EINVAL;
419362306a36Sopenharmony_ci			}
419462306a36Sopenharmony_ci			len = tcp_saved_syn_len(tp->saved_syn);
419562306a36Sopenharmony_ci			if (copy_to_sockptr(optlen, &len, sizeof(int))) {
419662306a36Sopenharmony_ci				sockopt_release_sock(sk);
419762306a36Sopenharmony_ci				return -EFAULT;
419862306a36Sopenharmony_ci			}
419962306a36Sopenharmony_ci			if (copy_to_sockptr(optval, tp->saved_syn->data, len)) {
420062306a36Sopenharmony_ci				sockopt_release_sock(sk);
420162306a36Sopenharmony_ci				return -EFAULT;
420262306a36Sopenharmony_ci			}
420362306a36Sopenharmony_ci			tcp_saved_syn_free(tp);
420462306a36Sopenharmony_ci			sockopt_release_sock(sk);
420562306a36Sopenharmony_ci		} else {
420662306a36Sopenharmony_ci			sockopt_release_sock(sk);
420762306a36Sopenharmony_ci			len = 0;
420862306a36Sopenharmony_ci			if (copy_to_sockptr(optlen, &len, sizeof(int)))
420962306a36Sopenharmony_ci				return -EFAULT;
421062306a36Sopenharmony_ci		}
421162306a36Sopenharmony_ci		return 0;
421262306a36Sopenharmony_ci	}
421362306a36Sopenharmony_ci#ifdef CONFIG_MMU
421462306a36Sopenharmony_ci	case TCP_ZEROCOPY_RECEIVE: {
421562306a36Sopenharmony_ci		struct scm_timestamping_internal tss;
421662306a36Sopenharmony_ci		struct tcp_zerocopy_receive zc = {};
421762306a36Sopenharmony_ci		int err;
421862306a36Sopenharmony_ci
421962306a36Sopenharmony_ci		if (copy_from_sockptr(&len, optlen, sizeof(int)))
422062306a36Sopenharmony_ci			return -EFAULT;
422162306a36Sopenharmony_ci		if (len < 0 ||
422262306a36Sopenharmony_ci		    len < offsetofend(struct tcp_zerocopy_receive, length))
422362306a36Sopenharmony_ci			return -EINVAL;
422462306a36Sopenharmony_ci		if (unlikely(len > sizeof(zc))) {
422562306a36Sopenharmony_ci			err = check_zeroed_sockptr(optval, sizeof(zc),
422662306a36Sopenharmony_ci						   len - sizeof(zc));
422762306a36Sopenharmony_ci			if (err < 1)
422862306a36Sopenharmony_ci				return err == 0 ? -EINVAL : err;
422962306a36Sopenharmony_ci			len = sizeof(zc);
423062306a36Sopenharmony_ci			if (copy_to_sockptr(optlen, &len, sizeof(int)))
423162306a36Sopenharmony_ci				return -EFAULT;
423262306a36Sopenharmony_ci		}
423362306a36Sopenharmony_ci		if (copy_from_sockptr(&zc, optval, len))
423462306a36Sopenharmony_ci			return -EFAULT;
423562306a36Sopenharmony_ci		if (zc.reserved)
423662306a36Sopenharmony_ci			return -EINVAL;
423762306a36Sopenharmony_ci		if (zc.msg_flags &  ~(TCP_VALID_ZC_MSG_FLAGS))
423862306a36Sopenharmony_ci			return -EINVAL;
423962306a36Sopenharmony_ci		sockopt_lock_sock(sk);
424062306a36Sopenharmony_ci		err = tcp_zerocopy_receive(sk, &zc, &tss);
424162306a36Sopenharmony_ci		err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname,
424262306a36Sopenharmony_ci							  &zc, &len, err);
424362306a36Sopenharmony_ci		sockopt_release_sock(sk);
424462306a36Sopenharmony_ci		if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags))
424562306a36Sopenharmony_ci			goto zerocopy_rcv_cmsg;
424662306a36Sopenharmony_ci		switch (len) {
424762306a36Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, msg_flags):
424862306a36Sopenharmony_ci			goto zerocopy_rcv_cmsg;
424962306a36Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, msg_controllen):
425062306a36Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, msg_control):
425162306a36Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, flags):
425262306a36Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, copybuf_len):
425362306a36Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, copybuf_address):
425462306a36Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, err):
425562306a36Sopenharmony_ci			goto zerocopy_rcv_sk_err;
425662306a36Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, inq):
425762306a36Sopenharmony_ci			goto zerocopy_rcv_inq;
425862306a36Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, length):
425962306a36Sopenharmony_ci		default:
426062306a36Sopenharmony_ci			goto zerocopy_rcv_out;
426162306a36Sopenharmony_ci		}
426262306a36Sopenharmony_cizerocopy_rcv_cmsg:
426362306a36Sopenharmony_ci		if (zc.msg_flags & TCP_CMSG_TS)
426462306a36Sopenharmony_ci			tcp_zc_finalize_rx_tstamp(sk, &zc, &tss);
426562306a36Sopenharmony_ci		else
426662306a36Sopenharmony_ci			zc.msg_flags = 0;
426762306a36Sopenharmony_cizerocopy_rcv_sk_err:
426862306a36Sopenharmony_ci		if (!err)
426962306a36Sopenharmony_ci			zc.err = sock_error(sk);
427062306a36Sopenharmony_cizerocopy_rcv_inq:
427162306a36Sopenharmony_ci		zc.inq = tcp_inq_hint(sk);
427262306a36Sopenharmony_cizerocopy_rcv_out:
427362306a36Sopenharmony_ci		if (!err && copy_to_sockptr(optval, &zc, len))
427462306a36Sopenharmony_ci			err = -EFAULT;
427562306a36Sopenharmony_ci		return err;
427662306a36Sopenharmony_ci	}
427762306a36Sopenharmony_ci#endif
427862306a36Sopenharmony_ci	default:
427962306a36Sopenharmony_ci		return -ENOPROTOOPT;
428062306a36Sopenharmony_ci	}
428162306a36Sopenharmony_ci
428262306a36Sopenharmony_ci	if (copy_to_sockptr(optlen, &len, sizeof(int)))
428362306a36Sopenharmony_ci		return -EFAULT;
428462306a36Sopenharmony_ci	if (copy_to_sockptr(optval, &val, len))
428562306a36Sopenharmony_ci		return -EFAULT;
428662306a36Sopenharmony_ci	return 0;
428762306a36Sopenharmony_ci}
428862306a36Sopenharmony_ci
428962306a36Sopenharmony_cibool tcp_bpf_bypass_getsockopt(int level, int optname)
429062306a36Sopenharmony_ci{
429162306a36Sopenharmony_ci	/* TCP do_tcp_getsockopt has optimized getsockopt implementation
429262306a36Sopenharmony_ci	 * to avoid extra socket lock for TCP_ZEROCOPY_RECEIVE.
429362306a36Sopenharmony_ci	 */
429462306a36Sopenharmony_ci	if (level == SOL_TCP && optname == TCP_ZEROCOPY_RECEIVE)
429562306a36Sopenharmony_ci		return true;
429662306a36Sopenharmony_ci
429762306a36Sopenharmony_ci	return false;
429862306a36Sopenharmony_ci}
429962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_bpf_bypass_getsockopt);
430062306a36Sopenharmony_ci
430162306a36Sopenharmony_ciint tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
430262306a36Sopenharmony_ci		   int __user *optlen)
430362306a36Sopenharmony_ci{
430462306a36Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
430562306a36Sopenharmony_ci
430662306a36Sopenharmony_ci	if (level != SOL_TCP)
430762306a36Sopenharmony_ci		/* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */
430862306a36Sopenharmony_ci		return READ_ONCE(icsk->icsk_af_ops)->getsockopt(sk, level, optname,
430962306a36Sopenharmony_ci								optval, optlen);
431062306a36Sopenharmony_ci	return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval),
431162306a36Sopenharmony_ci				 USER_SOCKPTR(optlen));
431262306a36Sopenharmony_ci}
431362306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_getsockopt);
431462306a36Sopenharmony_ci
431562306a36Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
431662306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
431762306a36Sopenharmony_cistatic DEFINE_MUTEX(tcp_md5sig_mutex);
431862306a36Sopenharmony_cistatic bool tcp_md5sig_pool_populated = false;
431962306a36Sopenharmony_ci
432062306a36Sopenharmony_cistatic void __tcp_alloc_md5sig_pool(void)
432162306a36Sopenharmony_ci{
432262306a36Sopenharmony_ci	struct crypto_ahash *hash;
432362306a36Sopenharmony_ci	int cpu;
432462306a36Sopenharmony_ci
432562306a36Sopenharmony_ci	hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
432662306a36Sopenharmony_ci	if (IS_ERR(hash))
432762306a36Sopenharmony_ci		return;
432862306a36Sopenharmony_ci
432962306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
433062306a36Sopenharmony_ci		void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
433162306a36Sopenharmony_ci		struct ahash_request *req;
433262306a36Sopenharmony_ci
433362306a36Sopenharmony_ci		if (!scratch) {
433462306a36Sopenharmony_ci			scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
433562306a36Sopenharmony_ci					       sizeof(struct tcphdr),
433662306a36Sopenharmony_ci					       GFP_KERNEL,
433762306a36Sopenharmony_ci					       cpu_to_node(cpu));
433862306a36Sopenharmony_ci			if (!scratch)
433962306a36Sopenharmony_ci				return;
434062306a36Sopenharmony_ci			per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
434162306a36Sopenharmony_ci		}
434262306a36Sopenharmony_ci		if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
434362306a36Sopenharmony_ci			continue;
434462306a36Sopenharmony_ci
434562306a36Sopenharmony_ci		req = ahash_request_alloc(hash, GFP_KERNEL);
434662306a36Sopenharmony_ci		if (!req)
434762306a36Sopenharmony_ci			return;
434862306a36Sopenharmony_ci
434962306a36Sopenharmony_ci		ahash_request_set_callback(req, 0, NULL, NULL);
435062306a36Sopenharmony_ci
435162306a36Sopenharmony_ci		per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
435262306a36Sopenharmony_ci	}
435362306a36Sopenharmony_ci	/* before setting tcp_md5sig_pool_populated, we must commit all writes
435462306a36Sopenharmony_ci	 * to memory. See smp_rmb() in tcp_get_md5sig_pool()
435562306a36Sopenharmony_ci	 */
435662306a36Sopenharmony_ci	smp_wmb();
435762306a36Sopenharmony_ci	/* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
435862306a36Sopenharmony_ci	 * and tcp_get_md5sig_pool().
435962306a36Sopenharmony_ci	*/
436062306a36Sopenharmony_ci	WRITE_ONCE(tcp_md5sig_pool_populated, true);
436162306a36Sopenharmony_ci}
436262306a36Sopenharmony_ci
436362306a36Sopenharmony_cibool tcp_alloc_md5sig_pool(void)
436462306a36Sopenharmony_ci{
436562306a36Sopenharmony_ci	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
436662306a36Sopenharmony_ci	if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
436762306a36Sopenharmony_ci		mutex_lock(&tcp_md5sig_mutex);
436862306a36Sopenharmony_ci
436962306a36Sopenharmony_ci		if (!tcp_md5sig_pool_populated)
437062306a36Sopenharmony_ci			__tcp_alloc_md5sig_pool();
437162306a36Sopenharmony_ci
437262306a36Sopenharmony_ci		mutex_unlock(&tcp_md5sig_mutex);
437362306a36Sopenharmony_ci	}
437462306a36Sopenharmony_ci	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
437562306a36Sopenharmony_ci	return READ_ONCE(tcp_md5sig_pool_populated);
437662306a36Sopenharmony_ci}
437762306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_alloc_md5sig_pool);
437862306a36Sopenharmony_ci
437962306a36Sopenharmony_ci
438062306a36Sopenharmony_ci/**
438162306a36Sopenharmony_ci *	tcp_get_md5sig_pool - get md5sig_pool for this user
438262306a36Sopenharmony_ci *
438362306a36Sopenharmony_ci *	We use percpu structure, so if we succeed, we exit with preemption
438462306a36Sopenharmony_ci *	and BH disabled, to make sure another thread or softirq handling
438562306a36Sopenharmony_ci *	wont try to get same context.
438662306a36Sopenharmony_ci */
438762306a36Sopenharmony_cistruct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
438862306a36Sopenharmony_ci{
438962306a36Sopenharmony_ci	local_bh_disable();
439062306a36Sopenharmony_ci
439162306a36Sopenharmony_ci	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
439262306a36Sopenharmony_ci	if (READ_ONCE(tcp_md5sig_pool_populated)) {
439362306a36Sopenharmony_ci		/* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
439462306a36Sopenharmony_ci		smp_rmb();
439562306a36Sopenharmony_ci		return this_cpu_ptr(&tcp_md5sig_pool);
439662306a36Sopenharmony_ci	}
439762306a36Sopenharmony_ci	local_bh_enable();
439862306a36Sopenharmony_ci	return NULL;
439962306a36Sopenharmony_ci}
440062306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_get_md5sig_pool);
440162306a36Sopenharmony_ci
440262306a36Sopenharmony_ciint tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
440362306a36Sopenharmony_ci			  const struct sk_buff *skb, unsigned int header_len)
440462306a36Sopenharmony_ci{
440562306a36Sopenharmony_ci	struct scatterlist sg;
440662306a36Sopenharmony_ci	const struct tcphdr *tp = tcp_hdr(skb);
440762306a36Sopenharmony_ci	struct ahash_request *req = hp->md5_req;
440862306a36Sopenharmony_ci	unsigned int i;
440962306a36Sopenharmony_ci	const unsigned int head_data_len = skb_headlen(skb) > header_len ?
441062306a36Sopenharmony_ci					   skb_headlen(skb) - header_len : 0;
441162306a36Sopenharmony_ci	const struct skb_shared_info *shi = skb_shinfo(skb);
441262306a36Sopenharmony_ci	struct sk_buff *frag_iter;
441362306a36Sopenharmony_ci
441462306a36Sopenharmony_ci	sg_init_table(&sg, 1);
441562306a36Sopenharmony_ci
441662306a36Sopenharmony_ci	sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
441762306a36Sopenharmony_ci	ahash_request_set_crypt(req, &sg, NULL, head_data_len);
441862306a36Sopenharmony_ci	if (crypto_ahash_update(req))
441962306a36Sopenharmony_ci		return 1;
442062306a36Sopenharmony_ci
442162306a36Sopenharmony_ci	for (i = 0; i < shi->nr_frags; ++i) {
442262306a36Sopenharmony_ci		const skb_frag_t *f = &shi->frags[i];
442362306a36Sopenharmony_ci		unsigned int offset = skb_frag_off(f);
442462306a36Sopenharmony_ci		struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
442562306a36Sopenharmony_ci
442662306a36Sopenharmony_ci		sg_set_page(&sg, page, skb_frag_size(f),
442762306a36Sopenharmony_ci			    offset_in_page(offset));
442862306a36Sopenharmony_ci		ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
442962306a36Sopenharmony_ci		if (crypto_ahash_update(req))
443062306a36Sopenharmony_ci			return 1;
443162306a36Sopenharmony_ci	}
443262306a36Sopenharmony_ci
443362306a36Sopenharmony_ci	skb_walk_frags(skb, frag_iter)
443462306a36Sopenharmony_ci		if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
443562306a36Sopenharmony_ci			return 1;
443662306a36Sopenharmony_ci
443762306a36Sopenharmony_ci	return 0;
443862306a36Sopenharmony_ci}
443962306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_hash_skb_data);
444062306a36Sopenharmony_ci
444162306a36Sopenharmony_ciint tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
444262306a36Sopenharmony_ci{
444362306a36Sopenharmony_ci	u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */
444462306a36Sopenharmony_ci	struct scatterlist sg;
444562306a36Sopenharmony_ci
444662306a36Sopenharmony_ci	sg_init_one(&sg, key->key, keylen);
444762306a36Sopenharmony_ci	ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen);
444862306a36Sopenharmony_ci
444962306a36Sopenharmony_ci	/* We use data_race() because tcp_md5_do_add() might change key->key under us */
445062306a36Sopenharmony_ci	return data_race(crypto_ahash_update(hp->md5_req));
445162306a36Sopenharmony_ci}
445262306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_hash_key);
445362306a36Sopenharmony_ci
445462306a36Sopenharmony_ci/* Called with rcu_read_lock() */
445562306a36Sopenharmony_cienum skb_drop_reason
445662306a36Sopenharmony_citcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
445762306a36Sopenharmony_ci		     const void *saddr, const void *daddr,
445862306a36Sopenharmony_ci		     int family, int dif, int sdif)
445962306a36Sopenharmony_ci{
446062306a36Sopenharmony_ci	/*
446162306a36Sopenharmony_ci	 * This gets called for each TCP segment that arrives
446262306a36Sopenharmony_ci	 * so we want to be efficient.
446362306a36Sopenharmony_ci	 * We have 3 drop cases:
446462306a36Sopenharmony_ci	 * o No MD5 hash and one expected.
446562306a36Sopenharmony_ci	 * o MD5 hash and we're not expecting one.
446662306a36Sopenharmony_ci	 * o MD5 hash and its wrong.
446762306a36Sopenharmony_ci	 */
446862306a36Sopenharmony_ci	const __u8 *hash_location = NULL;
446962306a36Sopenharmony_ci	struct tcp_md5sig_key *hash_expected;
447062306a36Sopenharmony_ci	const struct tcphdr *th = tcp_hdr(skb);
447162306a36Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
447262306a36Sopenharmony_ci	int genhash, l3index;
447362306a36Sopenharmony_ci	u8 newhash[16];
447462306a36Sopenharmony_ci
447562306a36Sopenharmony_ci	/* sdif set, means packet ingressed via a device
447662306a36Sopenharmony_ci	 * in an L3 domain and dif is set to the l3mdev
447762306a36Sopenharmony_ci	 */
447862306a36Sopenharmony_ci	l3index = sdif ? dif : 0;
447962306a36Sopenharmony_ci
448062306a36Sopenharmony_ci	hash_expected = tcp_md5_do_lookup(sk, l3index, saddr, family);
448162306a36Sopenharmony_ci	hash_location = tcp_parse_md5sig_option(th);
448262306a36Sopenharmony_ci
448362306a36Sopenharmony_ci	/* We've parsed the options - do we have a hash? */
448462306a36Sopenharmony_ci	if (!hash_expected && !hash_location)
448562306a36Sopenharmony_ci		return SKB_NOT_DROPPED_YET;
448662306a36Sopenharmony_ci
448762306a36Sopenharmony_ci	if (hash_expected && !hash_location) {
448862306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
448962306a36Sopenharmony_ci		return SKB_DROP_REASON_TCP_MD5NOTFOUND;
449062306a36Sopenharmony_ci	}
449162306a36Sopenharmony_ci
449262306a36Sopenharmony_ci	if (!hash_expected && hash_location) {
449362306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
449462306a36Sopenharmony_ci		return SKB_DROP_REASON_TCP_MD5UNEXPECTED;
449562306a36Sopenharmony_ci	}
449662306a36Sopenharmony_ci
449762306a36Sopenharmony_ci	/* Check the signature.
449862306a36Sopenharmony_ci	 * To support dual stack listeners, we need to handle
449962306a36Sopenharmony_ci	 * IPv4-mapped case.
450062306a36Sopenharmony_ci	 */
450162306a36Sopenharmony_ci	if (family == AF_INET)
450262306a36Sopenharmony_ci		genhash = tcp_v4_md5_hash_skb(newhash,
450362306a36Sopenharmony_ci					      hash_expected,
450462306a36Sopenharmony_ci					      NULL, skb);
450562306a36Sopenharmony_ci	else
450662306a36Sopenharmony_ci		genhash = tp->af_specific->calc_md5_hash(newhash,
450762306a36Sopenharmony_ci							 hash_expected,
450862306a36Sopenharmony_ci							 NULL, skb);
450962306a36Sopenharmony_ci
451062306a36Sopenharmony_ci	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
451162306a36Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
451262306a36Sopenharmony_ci		if (family == AF_INET) {
451362306a36Sopenharmony_ci			net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
451462306a36Sopenharmony_ci					saddr, ntohs(th->source),
451562306a36Sopenharmony_ci					daddr, ntohs(th->dest),
451662306a36Sopenharmony_ci					genhash ? " tcp_v4_calc_md5_hash failed"
451762306a36Sopenharmony_ci					: "", l3index);
451862306a36Sopenharmony_ci		} else {
451962306a36Sopenharmony_ci			net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
452062306a36Sopenharmony_ci					genhash ? "failed" : "mismatch",
452162306a36Sopenharmony_ci					saddr, ntohs(th->source),
452262306a36Sopenharmony_ci					daddr, ntohs(th->dest), l3index);
452362306a36Sopenharmony_ci		}
452462306a36Sopenharmony_ci		return SKB_DROP_REASON_TCP_MD5FAILURE;
452562306a36Sopenharmony_ci	}
452662306a36Sopenharmony_ci	return SKB_NOT_DROPPED_YET;
452762306a36Sopenharmony_ci}
452862306a36Sopenharmony_ciEXPORT_SYMBOL(tcp_inbound_md5_hash);
452962306a36Sopenharmony_ci
453062306a36Sopenharmony_ci#endif
453162306a36Sopenharmony_ci
453262306a36Sopenharmony_civoid tcp_done(struct sock *sk)
453362306a36Sopenharmony_ci{
453462306a36Sopenharmony_ci	struct request_sock *req;
453562306a36Sopenharmony_ci
453662306a36Sopenharmony_ci	/* We might be called with a new socket, after
453762306a36Sopenharmony_ci	 * inet_csk_prepare_forced_close() has been called
453862306a36Sopenharmony_ci	 * so we can not use lockdep_sock_is_held(sk)
453962306a36Sopenharmony_ci	 */
454062306a36Sopenharmony_ci	req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1);
454162306a36Sopenharmony_ci
454262306a36Sopenharmony_ci	if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
454362306a36Sopenharmony_ci		TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
454462306a36Sopenharmony_ci
454562306a36Sopenharmony_ci	tcp_set_state(sk, TCP_CLOSE);
454662306a36Sopenharmony_ci	tcp_clear_xmit_timers(sk);
454762306a36Sopenharmony_ci	if (req)
454862306a36Sopenharmony_ci		reqsk_fastopen_remove(sk, req, false);
454962306a36Sopenharmony_ci
455062306a36Sopenharmony_ci	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
455162306a36Sopenharmony_ci
455262306a36Sopenharmony_ci	if (!sock_flag(sk, SOCK_DEAD))
455362306a36Sopenharmony_ci		sk->sk_state_change(sk);
455462306a36Sopenharmony_ci	else
455562306a36Sopenharmony_ci		inet_csk_destroy_sock(sk);
455662306a36Sopenharmony_ci}
455762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_done);
455862306a36Sopenharmony_ci
455962306a36Sopenharmony_ciint tcp_abort(struct sock *sk, int err)
456062306a36Sopenharmony_ci{
456162306a36Sopenharmony_ci	int state = inet_sk_state_load(sk);
456262306a36Sopenharmony_ci
456362306a36Sopenharmony_ci	if (state == TCP_NEW_SYN_RECV) {
456462306a36Sopenharmony_ci		struct request_sock *req = inet_reqsk(sk);
456562306a36Sopenharmony_ci
456662306a36Sopenharmony_ci		local_bh_disable();
456762306a36Sopenharmony_ci		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
456862306a36Sopenharmony_ci		local_bh_enable();
456962306a36Sopenharmony_ci		return 0;
457062306a36Sopenharmony_ci	}
457162306a36Sopenharmony_ci	if (state == TCP_TIME_WAIT) {
457262306a36Sopenharmony_ci		struct inet_timewait_sock *tw = inet_twsk(sk);
457362306a36Sopenharmony_ci
457462306a36Sopenharmony_ci		refcount_inc(&tw->tw_refcnt);
457562306a36Sopenharmony_ci		local_bh_disable();
457662306a36Sopenharmony_ci		inet_twsk_deschedule_put(tw);
457762306a36Sopenharmony_ci		local_bh_enable();
457862306a36Sopenharmony_ci		return 0;
457962306a36Sopenharmony_ci	}
458062306a36Sopenharmony_ci
458162306a36Sopenharmony_ci	/* BPF context ensures sock locking. */
458262306a36Sopenharmony_ci	if (!has_current_bpf_ctx())
458362306a36Sopenharmony_ci		/* Don't race with userspace socket closes such as tcp_close. */
458462306a36Sopenharmony_ci		lock_sock(sk);
458562306a36Sopenharmony_ci
458662306a36Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN) {
458762306a36Sopenharmony_ci		tcp_set_state(sk, TCP_CLOSE);
458862306a36Sopenharmony_ci		inet_csk_listen_stop(sk);
458962306a36Sopenharmony_ci	}
459062306a36Sopenharmony_ci
459162306a36Sopenharmony_ci	/* Don't race with BH socket closes such as inet_csk_listen_stop. */
459262306a36Sopenharmony_ci	local_bh_disable();
459362306a36Sopenharmony_ci	bh_lock_sock(sk);
459462306a36Sopenharmony_ci
459562306a36Sopenharmony_ci	if (!sock_flag(sk, SOCK_DEAD)) {
459662306a36Sopenharmony_ci		WRITE_ONCE(sk->sk_err, err);
459762306a36Sopenharmony_ci		/* This barrier is coupled with smp_rmb() in tcp_poll() */
459862306a36Sopenharmony_ci		smp_wmb();
459962306a36Sopenharmony_ci		sk_error_report(sk);
460062306a36Sopenharmony_ci		if (tcp_need_reset(sk->sk_state))
460162306a36Sopenharmony_ci			tcp_send_active_reset(sk, GFP_ATOMIC);
460262306a36Sopenharmony_ci		tcp_done(sk);
460362306a36Sopenharmony_ci	}
460462306a36Sopenharmony_ci
460562306a36Sopenharmony_ci	bh_unlock_sock(sk);
460662306a36Sopenharmony_ci	local_bh_enable();
460762306a36Sopenharmony_ci	tcp_write_queue_purge(sk);
460862306a36Sopenharmony_ci	if (!has_current_bpf_ctx())
460962306a36Sopenharmony_ci		release_sock(sk);
461062306a36Sopenharmony_ci	return 0;
461162306a36Sopenharmony_ci}
461262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_abort);
461362306a36Sopenharmony_ci
461462306a36Sopenharmony_ciextern struct tcp_congestion_ops tcp_reno;
461562306a36Sopenharmony_ci
461662306a36Sopenharmony_cistatic __initdata unsigned long thash_entries;
461762306a36Sopenharmony_cistatic int __init set_thash_entries(char *str)
461862306a36Sopenharmony_ci{
461962306a36Sopenharmony_ci	ssize_t ret;
462062306a36Sopenharmony_ci
462162306a36Sopenharmony_ci	if (!str)
462262306a36Sopenharmony_ci		return 0;
462362306a36Sopenharmony_ci
462462306a36Sopenharmony_ci	ret = kstrtoul(str, 0, &thash_entries);
462562306a36Sopenharmony_ci	if (ret)
462662306a36Sopenharmony_ci		return 0;
462762306a36Sopenharmony_ci
462862306a36Sopenharmony_ci	return 1;
462962306a36Sopenharmony_ci}
463062306a36Sopenharmony_ci__setup("thash_entries=", set_thash_entries);
463162306a36Sopenharmony_ci
463262306a36Sopenharmony_cistatic void __init tcp_init_mem(void)
463362306a36Sopenharmony_ci{
463462306a36Sopenharmony_ci	unsigned long limit = nr_free_buffer_pages() / 16;
463562306a36Sopenharmony_ci
463662306a36Sopenharmony_ci	limit = max(limit, 128UL);
463762306a36Sopenharmony_ci	sysctl_tcp_mem[0] = limit / 4 * 3;		/* 4.68 % */
463862306a36Sopenharmony_ci	sysctl_tcp_mem[1] = limit;			/* 6.25 % */
463962306a36Sopenharmony_ci	sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;	/* 9.37 % */
464062306a36Sopenharmony_ci}
464162306a36Sopenharmony_ci
464262306a36Sopenharmony_civoid __init tcp_init(void)
464362306a36Sopenharmony_ci{
464462306a36Sopenharmony_ci	int max_rshare, max_wshare, cnt;
464562306a36Sopenharmony_ci	unsigned long limit;
464662306a36Sopenharmony_ci	unsigned int i;
464762306a36Sopenharmony_ci
464862306a36Sopenharmony_ci	BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
464962306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
465062306a36Sopenharmony_ci		     sizeof_field(struct sk_buff, cb));
465162306a36Sopenharmony_ci
465262306a36Sopenharmony_ci	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
465362306a36Sopenharmony_ci
465462306a36Sopenharmony_ci	timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE);
465562306a36Sopenharmony_ci	mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD);
465662306a36Sopenharmony_ci
465762306a36Sopenharmony_ci	inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
465862306a36Sopenharmony_ci			    thash_entries, 21,  /* one slot per 2 MB*/
465962306a36Sopenharmony_ci			    0, 64 * 1024);
466062306a36Sopenharmony_ci	tcp_hashinfo.bind_bucket_cachep =
466162306a36Sopenharmony_ci		kmem_cache_create("tcp_bind_bucket",
466262306a36Sopenharmony_ci				  sizeof(struct inet_bind_bucket), 0,
466362306a36Sopenharmony_ci				  SLAB_HWCACHE_ALIGN | SLAB_PANIC |
466462306a36Sopenharmony_ci				  SLAB_ACCOUNT,
466562306a36Sopenharmony_ci				  NULL);
466662306a36Sopenharmony_ci	tcp_hashinfo.bind2_bucket_cachep =
466762306a36Sopenharmony_ci		kmem_cache_create("tcp_bind2_bucket",
466862306a36Sopenharmony_ci				  sizeof(struct inet_bind2_bucket), 0,
466962306a36Sopenharmony_ci				  SLAB_HWCACHE_ALIGN | SLAB_PANIC |
467062306a36Sopenharmony_ci				  SLAB_ACCOUNT,
467162306a36Sopenharmony_ci				  NULL);
467262306a36Sopenharmony_ci
467362306a36Sopenharmony_ci	/* Size and allocate the main established and bind bucket
467462306a36Sopenharmony_ci	 * hash tables.
467562306a36Sopenharmony_ci	 *
467662306a36Sopenharmony_ci	 * The methodology is similar to that of the buffer cache.
467762306a36Sopenharmony_ci	 */
467862306a36Sopenharmony_ci	tcp_hashinfo.ehash =
467962306a36Sopenharmony_ci		alloc_large_system_hash("TCP established",
468062306a36Sopenharmony_ci					sizeof(struct inet_ehash_bucket),
468162306a36Sopenharmony_ci					thash_entries,
468262306a36Sopenharmony_ci					17, /* one slot per 128 KB of memory */
468362306a36Sopenharmony_ci					0,
468462306a36Sopenharmony_ci					NULL,
468562306a36Sopenharmony_ci					&tcp_hashinfo.ehash_mask,
468662306a36Sopenharmony_ci					0,
468762306a36Sopenharmony_ci					thash_entries ? 0 : 512 * 1024);
468862306a36Sopenharmony_ci	for (i = 0; i <= tcp_hashinfo.ehash_mask; i++)
468962306a36Sopenharmony_ci		INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
469062306a36Sopenharmony_ci
469162306a36Sopenharmony_ci	if (inet_ehash_locks_alloc(&tcp_hashinfo))
469262306a36Sopenharmony_ci		panic("TCP: failed to alloc ehash_locks");
469362306a36Sopenharmony_ci	tcp_hashinfo.bhash =
469462306a36Sopenharmony_ci		alloc_large_system_hash("TCP bind",
469562306a36Sopenharmony_ci					2 * sizeof(struct inet_bind_hashbucket),
469662306a36Sopenharmony_ci					tcp_hashinfo.ehash_mask + 1,
469762306a36Sopenharmony_ci					17, /* one slot per 128 KB of memory */
469862306a36Sopenharmony_ci					0,
469962306a36Sopenharmony_ci					&tcp_hashinfo.bhash_size,
470062306a36Sopenharmony_ci					NULL,
470162306a36Sopenharmony_ci					0,
470262306a36Sopenharmony_ci					64 * 1024);
470362306a36Sopenharmony_ci	tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
470462306a36Sopenharmony_ci	tcp_hashinfo.bhash2 = tcp_hashinfo.bhash + tcp_hashinfo.bhash_size;
470562306a36Sopenharmony_ci	for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
470662306a36Sopenharmony_ci		spin_lock_init(&tcp_hashinfo.bhash[i].lock);
470762306a36Sopenharmony_ci		INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
470862306a36Sopenharmony_ci		spin_lock_init(&tcp_hashinfo.bhash2[i].lock);
470962306a36Sopenharmony_ci		INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
471062306a36Sopenharmony_ci	}
471162306a36Sopenharmony_ci
471262306a36Sopenharmony_ci	tcp_hashinfo.pernet = false;
471362306a36Sopenharmony_ci
471462306a36Sopenharmony_ci	cnt = tcp_hashinfo.ehash_mask + 1;
471562306a36Sopenharmony_ci	sysctl_tcp_max_orphans = cnt / 2;
471662306a36Sopenharmony_ci
471762306a36Sopenharmony_ci	tcp_init_mem();
471862306a36Sopenharmony_ci	/* Set per-socket limits to no more than 1/128 the pressure threshold */
471962306a36Sopenharmony_ci	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
472062306a36Sopenharmony_ci	max_wshare = min(4UL*1024*1024, limit);
472162306a36Sopenharmony_ci	max_rshare = min(6UL*1024*1024, limit);
472262306a36Sopenharmony_ci
472362306a36Sopenharmony_ci	init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE;
472462306a36Sopenharmony_ci	init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
472562306a36Sopenharmony_ci	init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
472662306a36Sopenharmony_ci
472762306a36Sopenharmony_ci	init_net.ipv4.sysctl_tcp_rmem[0] = PAGE_SIZE;
472862306a36Sopenharmony_ci	init_net.ipv4.sysctl_tcp_rmem[1] = 131072;
472962306a36Sopenharmony_ci	init_net.ipv4.sysctl_tcp_rmem[2] = max(131072, max_rshare);
473062306a36Sopenharmony_ci
473162306a36Sopenharmony_ci	pr_info("Hash tables configured (established %u bind %u)\n",
473262306a36Sopenharmony_ci		tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
473362306a36Sopenharmony_ci
473462306a36Sopenharmony_ci	tcp_v4_init();
473562306a36Sopenharmony_ci	tcp_metrics_init();
473662306a36Sopenharmony_ci	BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
473762306a36Sopenharmony_ci	tcp_tasklet_init();
473862306a36Sopenharmony_ci	mptcp_init();
473962306a36Sopenharmony_ci}
4740